1 /* $OpenBSD: bpf.c,v 1.221 2023/03/09 05:56:58 dlg Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/rwlock.h> 56 #include <sys/atomic.h> 57 #include <sys/event.h> 58 #include <sys/mutex.h> 59 #include <sys/refcnt.h> 60 #include <sys/smr.h> 61 #include <sys/specdev.h> 62 #include <sys/sigio.h> 63 #include <sys/task.h> 64 #include <sys/time.h> 65 66 #include <net/if.h> 67 #include <net/bpf.h> 68 #include <net/bpfdesc.h> 69 70 #include <netinet/in.h> 71 #include <netinet/if_ether.h> 72 73 #include "vlan.h" 74 #if NVLAN > 0 75 #include <net/if_vlan_var.h> 76 #endif 77 78 #define BPF_BUFSIZE 32768 79 80 #define BPF_S_IDLE 0 81 #define BPF_S_WAIT 1 82 #define BPF_S_DONE 2 83 84 #define PRINET 26 /* interruptible */ 85 86 /* 87 * The default read buffer size is patchable. 88 */ 89 int bpf_bufsize = BPF_BUFSIZE; 90 int bpf_maxbufsize = BPF_MAXBUFSIZE; 91 92 /* 93 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 94 * bpf_d_list is the list of descriptors 95 */ 96 struct bpf_if *bpf_iflist; 97 LIST_HEAD(, bpf_d) bpf_d_list; 98 99 int bpf_allocbufs(struct bpf_d *); 100 void bpf_ifname(struct bpf_if*, struct ifreq *); 101 void bpf_mcopy(const void *, void *, size_t); 102 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 103 struct sockaddr *); 104 int bpf_setif(struct bpf_d *, struct ifreq *); 105 int bpfkqfilter(dev_t, struct knote *); 106 void bpf_wakeup(struct bpf_d *); 107 void bpf_wakeup_cb(void *); 108 void bpf_wait_cb(void *); 109 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 110 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 111 const struct bpf_hdr *); 112 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 113 int bpf_setdlt(struct bpf_d *, u_int); 114 115 void filt_bpfrdetach(struct knote *); 116 int filt_bpfread(struct knote *, long); 117 int filt_bpfreadmodify(struct kevent *, struct knote *); 118 int filt_bpfreadprocess(struct knote *, struct kevent *); 119 120 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 121 122 struct bpf_d *bpfilter_lookup(int); 123 124 /* 125 * Called holding ``bd_mtx''. 126 */ 127 void bpf_attachd(struct bpf_d *, struct bpf_if *); 128 void bpf_detachd(struct bpf_d *); 129 void bpf_resetd(struct bpf_d *); 130 131 void bpf_prog_smr(void *); 132 void bpf_d_smr(void *); 133 134 /* 135 * Reference count access to descriptor buffers 136 */ 137 void bpf_get(struct bpf_d *); 138 void bpf_put(struct bpf_d *); 139 140 141 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 142 143 int 144 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 145 struct sockaddr *sockp) 146 { 147 struct bpf_program_smr *bps; 148 struct bpf_insn *fcode = NULL; 149 struct mbuf *m; 150 struct m_tag *mtag; 151 int error; 152 u_int hlen, alen, mlen; 153 u_int len; 154 u_int linktype; 155 u_int slen; 156 157 /* 158 * Build a sockaddr based on the data link layer type. 159 * We do this at this level because the ethernet header 160 * is copied directly into the data field of the sockaddr. 161 * In the case of SLIP, there is no header and the packet 162 * is forwarded as is. 163 * Also, we are careful to leave room at the front of the mbuf 164 * for the link level header. 165 */ 166 linktype = d->bd_bif->bif_dlt; 167 switch (linktype) { 168 169 case DLT_SLIP: 170 sockp->sa_family = AF_INET; 171 hlen = 0; 172 break; 173 174 case DLT_PPP: 175 sockp->sa_family = AF_UNSPEC; 176 hlen = 0; 177 break; 178 179 case DLT_EN10MB: 180 sockp->sa_family = AF_UNSPEC; 181 /* XXX Would MAXLINKHDR be better? */ 182 hlen = ETHER_HDR_LEN; 183 break; 184 185 case DLT_IEEE802_11: 186 case DLT_IEEE802_11_RADIO: 187 sockp->sa_family = AF_UNSPEC; 188 hlen = 0; 189 break; 190 191 case DLT_RAW: 192 case DLT_NULL: 193 sockp->sa_family = AF_UNSPEC; 194 hlen = 0; 195 break; 196 197 case DLT_LOOP: 198 sockp->sa_family = AF_UNSPEC; 199 hlen = sizeof(u_int32_t); 200 break; 201 202 default: 203 return (EIO); 204 } 205 206 if (uio->uio_resid > MAXMCLBYTES) 207 return (EMSGSIZE); 208 len = uio->uio_resid; 209 if (len < hlen) 210 return (EINVAL); 211 212 /* 213 * Get the length of the payload so we can align it properly. 214 */ 215 alen = len - hlen; 216 217 /* 218 * Allocate enough space for headers and the aligned payload. 219 */ 220 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long)); 221 if (mlen > MAXMCLBYTES) 222 return (EMSGSIZE); 223 224 MGETHDR(m, M_WAIT, MT_DATA); 225 if (mlen > MHLEN) { 226 MCLGETL(m, M_WAIT, mlen); 227 if ((m->m_flags & M_EXT) == 0) { 228 error = ENOBUFS; 229 goto bad; 230 } 231 } 232 233 m_align(m, alen); /* Align the payload. */ 234 m->m_data -= hlen; 235 236 m->m_pkthdr.ph_ifidx = 0; 237 m->m_pkthdr.len = len; 238 m->m_len = len; 239 240 error = uiomove(mtod(m, caddr_t), len, uio); 241 if (error) 242 goto bad; 243 244 smr_read_enter(); 245 bps = SMR_PTR_GET(&d->bd_wfilter); 246 if (bps != NULL) 247 fcode = bps->bps_bf.bf_insns; 248 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 249 smr_read_leave(); 250 251 if (slen < len) { 252 error = EPERM; 253 goto bad; 254 } 255 256 /* 257 * Make room for link header, and copy it to sockaddr 258 */ 259 if (hlen != 0) { 260 if (linktype == DLT_LOOP) { 261 u_int32_t af; 262 263 /* the link header indicates the address family */ 264 KASSERT(hlen == sizeof(u_int32_t)); 265 memcpy(&af, m->m_data, hlen); 266 sockp->sa_family = ntohl(af); 267 } else 268 memcpy(sockp->sa_data, m->m_data, hlen); 269 270 m->m_pkthdr.len -= hlen; 271 m->m_len -= hlen; 272 m->m_data += hlen; 273 } 274 275 /* 276 * Prepend the data link type as a mbuf tag 277 */ 278 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 279 *(u_int *)(mtag + 1) = linktype; 280 m_tag_prepend(m, mtag); 281 282 *mp = m; 283 return (0); 284 bad: 285 m_freem(m); 286 return (error); 287 } 288 289 /* 290 * Attach file to the bpf interface, i.e. make d listen on bp. 291 */ 292 void 293 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 294 { 295 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 296 297 /* 298 * Point d at bp, and add d to the interface's list of listeners. 299 * Finally, point the driver's bpf cookie at the interface so 300 * it will divert packets to bpf. 301 */ 302 303 d->bd_bif = bp; 304 305 KERNEL_ASSERT_LOCKED(); 306 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 307 308 *bp->bif_driverp = bp; 309 } 310 311 /* 312 * Detach a file from its interface. 313 */ 314 void 315 bpf_detachd(struct bpf_d *d) 316 { 317 struct bpf_if *bp; 318 319 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 320 321 bp = d->bd_bif; 322 /* Not attached. */ 323 if (bp == NULL) 324 return; 325 326 /* Remove ``d'' from the interface's descriptor list. */ 327 KERNEL_ASSERT_LOCKED(); 328 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 329 330 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 331 /* 332 * Let the driver know that there are no more listeners. 333 */ 334 *bp->bif_driverp = NULL; 335 } 336 337 d->bd_bif = NULL; 338 339 /* 340 * Check if this descriptor had requested promiscuous mode. 341 * If so, turn it off. 342 */ 343 if (d->bd_promisc) { 344 int error; 345 346 KASSERT(bp->bif_ifp != NULL); 347 348 d->bd_promisc = 0; 349 350 bpf_get(d); 351 mtx_leave(&d->bd_mtx); 352 NET_LOCK(); 353 error = ifpromisc(bp->bif_ifp, 0); 354 NET_UNLOCK(); 355 mtx_enter(&d->bd_mtx); 356 bpf_put(d); 357 358 if (error && !(error == EINVAL || error == ENODEV || 359 error == ENXIO)) 360 /* 361 * Something is really wrong if we were able to put 362 * the driver into promiscuous mode, but can't 363 * take it out. 364 */ 365 panic("bpf: ifpromisc failed"); 366 } 367 } 368 369 void 370 bpfilterattach(int n) 371 { 372 LIST_INIT(&bpf_d_list); 373 } 374 375 /* 376 * Open ethernet device. Returns ENXIO for illegal minor device number, 377 * EBUSY if file is open by another process. 378 */ 379 int 380 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 381 { 382 struct bpf_d *bd; 383 int unit = minor(dev); 384 385 if (unit & ((1 << CLONE_SHIFT) - 1)) 386 return (ENXIO); 387 388 KASSERT(bpfilter_lookup(unit) == NULL); 389 390 /* create on demand */ 391 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 392 return (EBUSY); 393 394 /* Mark "free" and do most initialization. */ 395 bd->bd_unit = unit; 396 bd->bd_bufsize = bpf_bufsize; 397 bd->bd_sig = SIGIO; 398 mtx_init(&bd->bd_mtx, IPL_NET); 399 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 400 timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd); 401 smr_init(&bd->bd_smr); 402 sigio_init(&bd->bd_sigio); 403 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx); 404 405 bd->bd_rtout = 0; /* no timeout by default */ 406 bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */ 407 408 refcnt_init(&bd->bd_refcnt); 409 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 410 411 return (0); 412 } 413 414 /* 415 * Close the descriptor by detaching it from its interface, 416 * deallocating its buffers, and marking it free. 417 */ 418 int 419 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 420 { 421 struct bpf_d *d; 422 423 d = bpfilter_lookup(minor(dev)); 424 mtx_enter(&d->bd_mtx); 425 bpf_detachd(d); 426 bpf_wakeup(d); 427 LIST_REMOVE(d, bd_list); 428 mtx_leave(&d->bd_mtx); 429 bpf_put(d); 430 431 return (0); 432 } 433 434 /* 435 * Rotate the packet buffers in descriptor d. Move the store buffer 436 * into the hold slot, and the free buffer into the store slot. 437 * Zero the length of the new store buffer. 438 */ 439 #define ROTATE_BUFFERS(d) \ 440 KASSERT(d->bd_in_uiomove == 0); \ 441 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 442 (d)->bd_hbuf = (d)->bd_sbuf; \ 443 (d)->bd_hlen = (d)->bd_slen; \ 444 (d)->bd_sbuf = (d)->bd_fbuf; \ 445 (d)->bd_state = BPF_S_IDLE; \ 446 (d)->bd_slen = 0; \ 447 (d)->bd_fbuf = NULL; 448 449 /* 450 * bpfread - read next chunk of packets from buffers 451 */ 452 int 453 bpfread(dev_t dev, struct uio *uio, int ioflag) 454 { 455 uint64_t end, now; 456 struct bpf_d *d; 457 caddr_t hbuf; 458 int error, hlen; 459 460 KERNEL_ASSERT_LOCKED(); 461 462 d = bpfilter_lookup(minor(dev)); 463 if (d->bd_bif == NULL) 464 return (ENXIO); 465 466 bpf_get(d); 467 mtx_enter(&d->bd_mtx); 468 469 /* 470 * Restrict application to use a buffer the same size as 471 * as kernel buffers. 472 */ 473 if (uio->uio_resid != d->bd_bufsize) { 474 error = EINVAL; 475 goto out; 476 } 477 478 /* 479 * If there's a timeout, mark when the read should end. 480 */ 481 if (d->bd_rtout != 0) { 482 now = nsecuptime(); 483 end = now + d->bd_rtout; 484 if (end < now) 485 end = UINT64_MAX; 486 } 487 488 /* 489 * If the hold buffer is empty, then do a timed sleep, which 490 * ends when the timeout expires or when enough packets 491 * have arrived to fill the store buffer. 492 */ 493 while (d->bd_hbuf == NULL) { 494 if (d->bd_bif == NULL) { 495 /* interface is gone */ 496 if (d->bd_slen == 0) { 497 error = EIO; 498 goto out; 499 } 500 ROTATE_BUFFERS(d); 501 break; 502 } 503 if (d->bd_state == BPF_S_DONE) { 504 /* 505 * A packet(s) either arrived since the previous 506 * read or arrived while we were asleep. 507 * Rotate the buffers and return what's here. 508 */ 509 ROTATE_BUFFERS(d); 510 break; 511 } 512 if (ISSET(ioflag, IO_NDELAY)) { 513 /* User requested non-blocking I/O */ 514 error = EWOULDBLOCK; 515 } else if (d->bd_rtout == 0) { 516 /* No read timeout set. */ 517 d->bd_nreaders++; 518 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 519 "bpf", INFSLP); 520 d->bd_nreaders--; 521 } else if ((now = nsecuptime()) < end) { 522 /* Read timeout has not expired yet. */ 523 d->bd_nreaders++; 524 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 525 "bpf", end - now); 526 d->bd_nreaders--; 527 } else { 528 /* Read timeout has expired. */ 529 error = EWOULDBLOCK; 530 } 531 if (error == EINTR || error == ERESTART) 532 goto out; 533 if (error == EWOULDBLOCK) { 534 /* 535 * On a timeout, return what's in the buffer, 536 * which may be nothing. If there is something 537 * in the store buffer, we can rotate the buffers. 538 */ 539 if (d->bd_hbuf != NULL) 540 /* 541 * We filled up the buffer in between 542 * getting the timeout and arriving 543 * here, so we don't need to rotate. 544 */ 545 break; 546 547 if (d->bd_slen == 0) { 548 error = 0; 549 goto out; 550 } 551 ROTATE_BUFFERS(d); 552 break; 553 } 554 } 555 /* 556 * At this point, we know we have something in the hold slot. 557 */ 558 hbuf = d->bd_hbuf; 559 hlen = d->bd_hlen; 560 d->bd_hbuf = NULL; 561 d->bd_hlen = 0; 562 d->bd_fbuf = NULL; 563 d->bd_in_uiomove = 1; 564 565 /* 566 * Move data from hold buffer into user space. 567 * We know the entire buffer is transferred since 568 * we checked above that the read buffer is bpf_bufsize bytes. 569 */ 570 mtx_leave(&d->bd_mtx); 571 error = uiomove(hbuf, hlen, uio); 572 mtx_enter(&d->bd_mtx); 573 574 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 575 KASSERT(d->bd_fbuf == NULL); 576 KASSERT(d->bd_hbuf == NULL); 577 d->bd_fbuf = hbuf; 578 d->bd_in_uiomove = 0; 579 out: 580 mtx_leave(&d->bd_mtx); 581 bpf_put(d); 582 583 return (error); 584 } 585 586 /* 587 * If there are processes sleeping on this descriptor, wake them up. 588 */ 589 void 590 bpf_wakeup(struct bpf_d *d) 591 { 592 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 593 594 if (d->bd_nreaders) 595 wakeup(d); 596 597 knote_locked(&d->bd_klist, 0); 598 599 /* 600 * As long as pgsigio() needs to be protected 601 * by the KERNEL_LOCK() we have to delay the wakeup to 602 * another context to keep the hot path KERNEL_LOCK()-free. 603 */ 604 if (d->bd_async && d->bd_sig) { 605 bpf_get(d); 606 if (!task_add(systq, &d->bd_wake_task)) 607 bpf_put(d); 608 } 609 } 610 611 void 612 bpf_wakeup_cb(void *xd) 613 { 614 struct bpf_d *d = xd; 615 616 if (d->bd_async && d->bd_sig) 617 pgsigio(&d->bd_sigio, d->bd_sig, 0); 618 619 bpf_put(d); 620 } 621 622 void 623 bpf_wait_cb(void *xd) 624 { 625 struct bpf_d *d = xd; 626 627 mtx_enter(&d->bd_mtx); 628 if (d->bd_state == BPF_S_WAIT) { 629 d->bd_state = BPF_S_DONE; 630 bpf_wakeup(d); 631 } 632 mtx_leave(&d->bd_mtx); 633 634 bpf_put(d); 635 } 636 637 int 638 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 639 { 640 struct bpf_d *d; 641 struct ifnet *ifp; 642 struct mbuf *m; 643 int error; 644 struct sockaddr_storage dst; 645 646 KERNEL_ASSERT_LOCKED(); 647 648 d = bpfilter_lookup(minor(dev)); 649 if (d->bd_bif == NULL) 650 return (ENXIO); 651 652 bpf_get(d); 653 ifp = d->bd_bif->bif_ifp; 654 655 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 656 error = ENETDOWN; 657 goto out; 658 } 659 660 if (uio->uio_resid == 0) { 661 error = 0; 662 goto out; 663 } 664 665 error = bpf_movein(uio, d, &m, sstosa(&dst)); 666 if (error) 667 goto out; 668 669 if (m->m_pkthdr.len > ifp->if_mtu) { 670 m_freem(m); 671 error = EMSGSIZE; 672 goto out; 673 } 674 675 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 676 m->m_pkthdr.pf.prio = ifp->if_llprio; 677 678 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 679 dst.ss_family = pseudo_AF_HDRCMPLT; 680 681 NET_LOCK(); 682 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 683 NET_UNLOCK(); 684 685 out: 686 bpf_put(d); 687 return (error); 688 } 689 690 /* 691 * Reset a descriptor by flushing its packet buffer and clearing the 692 * receive and drop counts. 693 */ 694 void 695 bpf_resetd(struct bpf_d *d) 696 { 697 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 698 KASSERT(d->bd_in_uiomove == 0); 699 700 if (timeout_del(&d->bd_wait_tmo)) 701 bpf_put(d); 702 703 if (d->bd_hbuf != NULL) { 704 /* Free the hold buffer. */ 705 d->bd_fbuf = d->bd_hbuf; 706 d->bd_hbuf = NULL; 707 } 708 d->bd_state = BPF_S_IDLE; 709 d->bd_slen = 0; 710 d->bd_hlen = 0; 711 d->bd_rcount = 0; 712 d->bd_dcount = 0; 713 } 714 715 static int 716 bpf_set_wtout(struct bpf_d *d, uint64_t wtout) 717 { 718 mtx_enter(&d->bd_mtx); 719 d->bd_wtout = wtout; 720 mtx_leave(&d->bd_mtx); 721 722 return (0); 723 } 724 725 static int 726 bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv) 727 { 728 uint64_t nsec; 729 730 if (tv->tv_sec < 0 || !timerisvalid(tv)) 731 return (EINVAL); 732 733 nsec = TIMEVAL_TO_NSEC(tv); 734 if (nsec > MAXTSLP) 735 return (EOVERFLOW); 736 737 return (bpf_set_wtout(d, nsec)); 738 } 739 740 static int 741 bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv) 742 { 743 uint64_t nsec; 744 745 mtx_enter(&d->bd_mtx); 746 nsec = d->bd_wtout; 747 mtx_leave(&d->bd_mtx); 748 749 if (nsec == INFSLP) 750 return (ENXIO); 751 752 memset(tv, 0, sizeof(*tv)); 753 NSEC_TO_TIMEVAL(nsec, tv); 754 755 return (0); 756 } 757 758 /* 759 * FIONREAD Check for read packet available. 760 * BIOCGBLEN Get buffer len [for read()]. 761 * BIOCSETF Set ethernet read filter. 762 * BIOCFLUSH Flush read packet buffer. 763 * BIOCPROMISC Put interface into promiscuous mode. 764 * BIOCGDLTLIST Get supported link layer types. 765 * BIOCGDLT Get link layer type. 766 * BIOCSDLT Set link layer type. 767 * BIOCGETIF Get interface name. 768 * BIOCSETIF Set interface. 769 * BIOCSRTIMEOUT Set read timeout. 770 * BIOCGRTIMEOUT Get read timeout. 771 * BIOCSWTIMEOUT Set wait timeout. 772 * BIOCGWTIMEOUT Get wait timeout. 773 * BIOCDWTIMEOUT Del wait timeout. 774 * BIOCGSTATS Get packet stats. 775 * BIOCIMMEDIATE Set immediate mode. 776 * BIOCVERSION Get filter language version. 777 * BIOCGHDRCMPLT Get "header already complete" flag 778 * BIOCSHDRCMPLT Set "header already complete" flag 779 */ 780 int 781 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 782 { 783 struct bpf_d *d; 784 int error = 0; 785 786 d = bpfilter_lookup(minor(dev)); 787 if (d->bd_locked && suser(p) != 0) { 788 /* list of allowed ioctls when locked and not root */ 789 switch (cmd) { 790 case BIOCGBLEN: 791 case BIOCFLUSH: 792 case BIOCGDLT: 793 case BIOCGDLTLIST: 794 case BIOCGETIF: 795 case BIOCGRTIMEOUT: 796 case BIOCGWTIMEOUT: 797 case BIOCGSTATS: 798 case BIOCVERSION: 799 case BIOCGRSIG: 800 case BIOCGHDRCMPLT: 801 case FIONREAD: 802 case BIOCLOCK: 803 case BIOCSRTIMEOUT: 804 case BIOCSWTIMEOUT: 805 case BIOCDWTIMEOUT: 806 case BIOCIMMEDIATE: 807 case TIOCGPGRP: 808 case BIOCGDIRFILT: 809 break; 810 default: 811 return (EPERM); 812 } 813 } 814 815 bpf_get(d); 816 817 switch (cmd) { 818 default: 819 error = EINVAL; 820 break; 821 822 /* 823 * Check for read packet available. 824 */ 825 case FIONREAD: 826 { 827 int n; 828 829 mtx_enter(&d->bd_mtx); 830 n = d->bd_slen; 831 if (d->bd_hbuf != NULL) 832 n += d->bd_hlen; 833 mtx_leave(&d->bd_mtx); 834 835 *(int *)addr = n; 836 break; 837 } 838 839 /* 840 * Get buffer len [for read()]. 841 */ 842 case BIOCGBLEN: 843 *(u_int *)addr = d->bd_bufsize; 844 break; 845 846 /* 847 * Set buffer length. 848 */ 849 case BIOCSBLEN: 850 if (d->bd_bif != NULL) 851 error = EINVAL; 852 else { 853 u_int size = *(u_int *)addr; 854 855 if (size > bpf_maxbufsize) 856 *(u_int *)addr = size = bpf_maxbufsize; 857 else if (size < BPF_MINBUFSIZE) 858 *(u_int *)addr = size = BPF_MINBUFSIZE; 859 mtx_enter(&d->bd_mtx); 860 d->bd_bufsize = size; 861 mtx_leave(&d->bd_mtx); 862 } 863 break; 864 865 /* 866 * Set link layer read filter. 867 */ 868 case BIOCSETF: 869 error = bpf_setf(d, (struct bpf_program *)addr, 0); 870 break; 871 872 /* 873 * Set link layer write filter. 874 */ 875 case BIOCSETWF: 876 error = bpf_setf(d, (struct bpf_program *)addr, 1); 877 break; 878 879 /* 880 * Flush read packet buffer. 881 */ 882 case BIOCFLUSH: 883 mtx_enter(&d->bd_mtx); 884 bpf_resetd(d); 885 mtx_leave(&d->bd_mtx); 886 break; 887 888 /* 889 * Put interface into promiscuous mode. 890 */ 891 case BIOCPROMISC: 892 if (d->bd_bif == NULL) { 893 /* 894 * No interface attached yet. 895 */ 896 error = EINVAL; 897 } else if (d->bd_bif->bif_ifp != NULL) { 898 if (d->bd_promisc == 0) { 899 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 900 NET_LOCK(); 901 error = ifpromisc(d->bd_bif->bif_ifp, 1); 902 NET_UNLOCK(); 903 if (error == 0) 904 d->bd_promisc = 1; 905 } 906 } 907 break; 908 909 /* 910 * Get a list of supported device parameters. 911 */ 912 case BIOCGDLTLIST: 913 if (d->bd_bif == NULL) 914 error = EINVAL; 915 else 916 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 917 break; 918 919 /* 920 * Get device parameters. 921 */ 922 case BIOCGDLT: 923 if (d->bd_bif == NULL) 924 error = EINVAL; 925 else 926 *(u_int *)addr = d->bd_bif->bif_dlt; 927 break; 928 929 /* 930 * Set device parameters. 931 */ 932 case BIOCSDLT: 933 if (d->bd_bif == NULL) 934 error = EINVAL; 935 else { 936 mtx_enter(&d->bd_mtx); 937 error = bpf_setdlt(d, *(u_int *)addr); 938 mtx_leave(&d->bd_mtx); 939 } 940 break; 941 942 /* 943 * Set interface name. 944 */ 945 case BIOCGETIF: 946 if (d->bd_bif == NULL) 947 error = EINVAL; 948 else 949 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 950 break; 951 952 /* 953 * Set interface. 954 */ 955 case BIOCSETIF: 956 error = bpf_setif(d, (struct ifreq *)addr); 957 break; 958 959 /* 960 * Set read timeout. 961 */ 962 case BIOCSRTIMEOUT: 963 { 964 struct timeval *tv = (struct timeval *)addr; 965 uint64_t rtout; 966 967 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 968 error = EINVAL; 969 break; 970 } 971 rtout = TIMEVAL_TO_NSEC(tv); 972 if (rtout > MAXTSLP) { 973 error = EOVERFLOW; 974 break; 975 } 976 mtx_enter(&d->bd_mtx); 977 d->bd_rtout = rtout; 978 mtx_leave(&d->bd_mtx); 979 break; 980 } 981 982 /* 983 * Get read timeout. 984 */ 985 case BIOCGRTIMEOUT: 986 { 987 struct timeval *tv = (struct timeval *)addr; 988 989 memset(tv, 0, sizeof(*tv)); 990 mtx_enter(&d->bd_mtx); 991 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 992 mtx_leave(&d->bd_mtx); 993 break; 994 } 995 996 /* 997 * Get packet stats. 998 */ 999 case BIOCGSTATS: 1000 { 1001 struct bpf_stat *bs = (struct bpf_stat *)addr; 1002 1003 bs->bs_recv = d->bd_rcount; 1004 bs->bs_drop = d->bd_dcount; 1005 break; 1006 } 1007 1008 /* 1009 * Set immediate mode. 1010 */ 1011 case BIOCIMMEDIATE: 1012 error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP); 1013 break; 1014 1015 /* 1016 * Wait timeout. 1017 */ 1018 case BIOCSWTIMEOUT: 1019 error = bpf_set_wtimeout(d, (const struct timeval *)addr); 1020 break; 1021 case BIOCGWTIMEOUT: 1022 error = bpf_get_wtimeout(d, (struct timeval *)addr); 1023 break; 1024 case BIOCDWTIMEOUT: 1025 error = bpf_set_wtout(d, INFSLP); 1026 break; 1027 1028 case BIOCVERSION: 1029 { 1030 struct bpf_version *bv = (struct bpf_version *)addr; 1031 1032 bv->bv_major = BPF_MAJOR_VERSION; 1033 bv->bv_minor = BPF_MINOR_VERSION; 1034 break; 1035 } 1036 1037 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1038 *(u_int *)addr = d->bd_hdrcmplt; 1039 break; 1040 1041 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1042 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1043 break; 1044 1045 case BIOCLOCK: /* set "locked" flag (no reset) */ 1046 d->bd_locked = 1; 1047 break; 1048 1049 case BIOCGFILDROP: /* get "filter-drop" flag */ 1050 *(u_int *)addr = d->bd_fildrop; 1051 break; 1052 1053 case BIOCSFILDROP: { /* set "filter-drop" flag */ 1054 unsigned int fildrop = *(u_int *)addr; 1055 switch (fildrop) { 1056 case BPF_FILDROP_PASS: 1057 case BPF_FILDROP_CAPTURE: 1058 case BPF_FILDROP_DROP: 1059 d->bd_fildrop = fildrop; 1060 break; 1061 default: 1062 error = EINVAL; 1063 break; 1064 } 1065 break; 1066 } 1067 1068 case BIOCGDIRFILT: /* get direction filter */ 1069 *(u_int *)addr = d->bd_dirfilt; 1070 break; 1071 1072 case BIOCSDIRFILT: /* set direction filter */ 1073 d->bd_dirfilt = (*(u_int *)addr) & 1074 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 1075 break; 1076 1077 case FIONBIO: /* Non-blocking I/O */ 1078 /* let vfs to keep track of this */ 1079 break; 1080 1081 case FIOASYNC: /* Send signal on receive packets */ 1082 d->bd_async = *(int *)addr; 1083 break; 1084 1085 case FIOSETOWN: /* Process or group to send signals to */ 1086 case TIOCSPGRP: 1087 error = sigio_setown(&d->bd_sigio, cmd, addr); 1088 break; 1089 1090 case FIOGETOWN: 1091 case TIOCGPGRP: 1092 sigio_getown(&d->bd_sigio, cmd, addr); 1093 break; 1094 1095 case BIOCSRSIG: /* Set receive signal */ 1096 { 1097 u_int sig; 1098 1099 sig = *(u_int *)addr; 1100 1101 if (sig >= NSIG) 1102 error = EINVAL; 1103 else 1104 d->bd_sig = sig; 1105 break; 1106 } 1107 case BIOCGRSIG: 1108 *(u_int *)addr = d->bd_sig; 1109 break; 1110 } 1111 1112 bpf_put(d); 1113 return (error); 1114 } 1115 1116 /* 1117 * Set d's packet filter program to fp. If this file already has a filter, 1118 * free it and replace it. Returns EINVAL for bogus requests. 1119 */ 1120 int 1121 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1122 { 1123 struct bpf_program_smr *bps, *old_bps; 1124 struct bpf_insn *fcode; 1125 u_int flen, size; 1126 1127 KERNEL_ASSERT_LOCKED(); 1128 1129 if (fp->bf_insns == 0) { 1130 if (fp->bf_len != 0) 1131 return (EINVAL); 1132 bps = NULL; 1133 } else { 1134 flen = fp->bf_len; 1135 if (flen > BPF_MAXINSNS) 1136 return (EINVAL); 1137 1138 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1139 M_WAITOK | M_CANFAIL); 1140 if (fcode == NULL) 1141 return (ENOMEM); 1142 1143 size = flen * sizeof(*fp->bf_insns); 1144 if (copyin(fp->bf_insns, fcode, size) != 0 || 1145 bpf_validate(fcode, (int)flen) == 0) { 1146 free(fcode, M_DEVBUF, size); 1147 return (EINVAL); 1148 } 1149 1150 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1151 smr_init(&bps->bps_smr); 1152 bps->bps_bf.bf_len = flen; 1153 bps->bps_bf.bf_insns = fcode; 1154 } 1155 1156 if (wf == 0) { 1157 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1158 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1159 } else { 1160 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1161 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1162 } 1163 1164 mtx_enter(&d->bd_mtx); 1165 bpf_resetd(d); 1166 mtx_leave(&d->bd_mtx); 1167 if (old_bps != NULL) 1168 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1169 1170 return (0); 1171 } 1172 1173 /* 1174 * Detach a file from its current interface (if attached at all) and attach 1175 * to the interface indicated by the name stored in ifr. 1176 * Return an errno or 0. 1177 */ 1178 int 1179 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1180 { 1181 struct bpf_if *bp, *candidate = NULL; 1182 int error = 0; 1183 1184 /* 1185 * Look through attached interfaces for the named one. 1186 */ 1187 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1188 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1189 continue; 1190 1191 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1192 candidate = bp; 1193 } 1194 1195 /* Not found. */ 1196 if (candidate == NULL) 1197 return (ENXIO); 1198 1199 /* 1200 * Allocate the packet buffers if we need to. 1201 * If we're already attached to requested interface, 1202 * just flush the buffer. 1203 */ 1204 mtx_enter(&d->bd_mtx); 1205 if (d->bd_sbuf == NULL) { 1206 if ((error = bpf_allocbufs(d))) 1207 goto out; 1208 } 1209 if (candidate != d->bd_bif) { 1210 /* 1211 * Detach if attached to something else. 1212 */ 1213 bpf_detachd(d); 1214 bpf_attachd(d, candidate); 1215 } 1216 bpf_resetd(d); 1217 out: 1218 mtx_leave(&d->bd_mtx); 1219 return (error); 1220 } 1221 1222 /* 1223 * Copy the interface name to the ifreq. 1224 */ 1225 void 1226 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1227 { 1228 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1229 } 1230 1231 const struct filterops bpfread_filtops = { 1232 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1233 .f_attach = NULL, 1234 .f_detach = filt_bpfrdetach, 1235 .f_event = filt_bpfread, 1236 .f_modify = filt_bpfreadmodify, 1237 .f_process = filt_bpfreadprocess, 1238 }; 1239 1240 int 1241 bpfkqfilter(dev_t dev, struct knote *kn) 1242 { 1243 struct bpf_d *d; 1244 struct klist *klist; 1245 1246 KERNEL_ASSERT_LOCKED(); 1247 1248 d = bpfilter_lookup(minor(dev)); 1249 if (d == NULL) 1250 return (ENXIO); 1251 1252 switch (kn->kn_filter) { 1253 case EVFILT_READ: 1254 klist = &d->bd_klist; 1255 kn->kn_fop = &bpfread_filtops; 1256 break; 1257 default: 1258 return (EINVAL); 1259 } 1260 1261 bpf_get(d); 1262 kn->kn_hook = d; 1263 klist_insert(klist, kn); 1264 1265 return (0); 1266 } 1267 1268 void 1269 filt_bpfrdetach(struct knote *kn) 1270 { 1271 struct bpf_d *d = kn->kn_hook; 1272 1273 klist_remove(&d->bd_klist, kn); 1274 bpf_put(d); 1275 } 1276 1277 int 1278 filt_bpfread(struct knote *kn, long hint) 1279 { 1280 struct bpf_d *d = kn->kn_hook; 1281 1282 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1283 1284 kn->kn_data = d->bd_hlen; 1285 if (d->bd_wtout == 0) 1286 kn->kn_data += d->bd_slen; 1287 1288 return (kn->kn_data > 0); 1289 } 1290 1291 int 1292 filt_bpfreadmodify(struct kevent *kev, struct knote *kn) 1293 { 1294 struct bpf_d *d = kn->kn_hook; 1295 int active; 1296 1297 mtx_enter(&d->bd_mtx); 1298 active = knote_modify_fn(kev, kn, filt_bpfread); 1299 mtx_leave(&d->bd_mtx); 1300 1301 return (active); 1302 } 1303 1304 int 1305 filt_bpfreadprocess(struct knote *kn, struct kevent *kev) 1306 { 1307 struct bpf_d *d = kn->kn_hook; 1308 int active; 1309 1310 mtx_enter(&d->bd_mtx); 1311 active = knote_process_fn(kn, kev, filt_bpfread); 1312 mtx_leave(&d->bd_mtx); 1313 1314 return (active); 1315 } 1316 1317 /* 1318 * Copy data from an mbuf chain into a buffer. This code is derived 1319 * from m_copydata in sys/uipc_mbuf.c. 1320 */ 1321 void 1322 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1323 { 1324 const struct mbuf *m; 1325 u_int count; 1326 u_char *dst; 1327 1328 m = src_arg; 1329 dst = dst_arg; 1330 while (len > 0) { 1331 if (m == NULL) 1332 panic("bpf_mcopy"); 1333 count = min(m->m_len, len); 1334 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1335 m = m->m_next; 1336 dst += count; 1337 len -= count; 1338 } 1339 } 1340 1341 int 1342 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1343 { 1344 return _bpf_mtap(arg, m, m, direction); 1345 } 1346 1347 int 1348 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1349 u_int direction) 1350 { 1351 struct bpf_if *bp = (struct bpf_if *)arg; 1352 struct bpf_d *d; 1353 size_t pktlen, slen; 1354 const struct mbuf *m0; 1355 struct bpf_hdr tbh; 1356 int gothdr = 0; 1357 int drop = 0; 1358 1359 if (m == NULL) 1360 return (0); 1361 1362 if (bp == NULL) 1363 return (0); 1364 1365 pktlen = 0; 1366 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1367 pktlen += m0->m_len; 1368 1369 smr_read_enter(); 1370 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1371 struct bpf_program_smr *bps; 1372 struct bpf_insn *fcode = NULL; 1373 1374 atomic_inc_long(&d->bd_rcount); 1375 1376 if (ISSET(d->bd_dirfilt, direction)) 1377 continue; 1378 1379 bps = SMR_PTR_GET(&d->bd_rfilter); 1380 if (bps != NULL) 1381 fcode = bps->bps_bf.bf_insns; 1382 slen = bpf_mfilter(fcode, m, pktlen); 1383 1384 if (slen == 0) 1385 continue; 1386 if (d->bd_fildrop != BPF_FILDROP_PASS) 1387 drop = 1; 1388 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1389 if (!gothdr) { 1390 struct timeval tv; 1391 memset(&tbh, 0, sizeof(tbh)); 1392 1393 if (ISSET(mp->m_flags, M_PKTHDR)) { 1394 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1395 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1396 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1397 if (ISSET(mp->m_pkthdr.csum_flags, 1398 M_FLOWID)) 1399 SET(tbh.bh_flags, BPF_F_FLOWID); 1400 1401 m_microtime(mp, &tv); 1402 } else 1403 microtime(&tv); 1404 1405 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1406 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1407 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1408 1409 gothdr = 1; 1410 } 1411 1412 mtx_enter(&d->bd_mtx); 1413 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1414 mtx_leave(&d->bd_mtx); 1415 } 1416 } 1417 smr_read_leave(); 1418 1419 return (drop); 1420 } 1421 1422 /* 1423 * Incoming linkage from device drivers, where a data buffer should be 1424 * prepended by an arbitrary header. In this situation we already have a 1425 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1426 * the existing functionality by attaching the buffers to mbufs. 1427 * 1428 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1429 * struct m_hdr each for the header and data on the stack. 1430 */ 1431 int 1432 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1433 const void *buf, unsigned int buflen, u_int direction) 1434 { 1435 struct m_hdr mh, md; 1436 struct mbuf *m0 = NULL; 1437 struct mbuf **mp = &m0; 1438 1439 if (hdr != NULL) { 1440 mh.mh_flags = 0; 1441 mh.mh_next = NULL; 1442 mh.mh_len = hdrlen; 1443 mh.mh_data = (void *)hdr; 1444 1445 *mp = (struct mbuf *)&mh; 1446 mp = &mh.mh_next; 1447 } 1448 1449 if (buf != NULL) { 1450 md.mh_flags = 0; 1451 md.mh_next = NULL; 1452 md.mh_len = buflen; 1453 md.mh_data = (void *)buf; 1454 1455 *mp = (struct mbuf *)&md; 1456 } 1457 1458 return bpf_mtap(arg, m0, direction); 1459 } 1460 1461 /* 1462 * Incoming linkage from device drivers, where we have a mbuf chain 1463 * but need to prepend some arbitrary header from a linear buffer. 1464 * 1465 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1466 * struct m_hdr on the stack. This is safe as bpf only reads from the 1467 * fields in this header that we initialize, and will not try to free 1468 * it or keep a pointer to it. 1469 */ 1470 int 1471 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1472 u_int direction) 1473 { 1474 struct m_hdr mh; 1475 const struct mbuf *m0; 1476 1477 if (dlen > 0) { 1478 mh.mh_flags = 0; 1479 mh.mh_next = (struct mbuf *)m; 1480 mh.mh_len = dlen; 1481 mh.mh_data = (void *)data; 1482 m0 = (struct mbuf *)&mh; 1483 } else 1484 m0 = m; 1485 1486 return _bpf_mtap(arg, m, m0, direction); 1487 } 1488 1489 /* 1490 * Incoming linkage from device drivers, where we have a mbuf chain 1491 * but need to prepend the address family. 1492 * 1493 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1494 * struct m_hdr on the stack. This is safe as bpf only reads from the 1495 * fields in this header that we initialize, and will not try to free 1496 * it or keep a pointer to it. 1497 */ 1498 int 1499 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1500 { 1501 u_int32_t afh; 1502 1503 afh = htonl(af); 1504 1505 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1506 } 1507 1508 /* 1509 * Incoming linkage from device drivers, where we have a mbuf chain 1510 * but need to prepend a VLAN encapsulation header. 1511 * 1512 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1513 * struct m_hdr on the stack. This is safe as bpf only reads from the 1514 * fields in this header that we initialize, and will not try to free 1515 * it or keep a pointer to it. 1516 */ 1517 int 1518 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1519 { 1520 #if NVLAN > 0 1521 struct ether_vlan_header evh; 1522 struct m_hdr mh, md; 1523 1524 if ((m->m_flags & M_VLANTAG) == 0) 1525 #endif 1526 { 1527 return _bpf_mtap(arg, m, m, direction); 1528 } 1529 1530 #if NVLAN > 0 1531 KASSERT(m->m_len >= ETHER_HDR_LEN); 1532 1533 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1534 evh.evl_proto = evh.evl_encap_proto; 1535 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1536 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1537 1538 mh.mh_flags = 0; 1539 mh.mh_data = (caddr_t)&evh; 1540 mh.mh_len = sizeof(evh); 1541 mh.mh_next = (struct mbuf *)&md; 1542 1543 md.mh_flags = 0; 1544 md.mh_data = m->m_data + ETHER_HDR_LEN; 1545 md.mh_len = m->m_len - ETHER_HDR_LEN; 1546 md.mh_next = m->m_next; 1547 1548 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1549 #endif 1550 } 1551 1552 /* 1553 * Move the packet data from interface memory (pkt) into the 1554 * store buffer. Wake up listeners if needed. 1555 * "copy" is the routine called to do the actual data 1556 * transfer. bcopy is passed in to copy contiguous chunks, while 1557 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1558 * pkt is really an mbuf. 1559 */ 1560 void 1561 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1562 const struct bpf_hdr *tbh) 1563 { 1564 struct bpf_hdr *bh; 1565 int totlen, curlen; 1566 int hdrlen, do_wakeup = 0; 1567 1568 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1569 if (d->bd_bif == NULL) 1570 return; 1571 1572 hdrlen = d->bd_bif->bif_hdrlen; 1573 1574 /* 1575 * Figure out how many bytes to move. If the packet is 1576 * greater or equal to the snapshot length, transfer that 1577 * much. Otherwise, transfer the whole packet (unless 1578 * we hit the buffer size limit). 1579 */ 1580 totlen = hdrlen + min(snaplen, pktlen); 1581 if (totlen > d->bd_bufsize) 1582 totlen = d->bd_bufsize; 1583 1584 /* 1585 * Round up the end of the previous packet to the next longword. 1586 */ 1587 curlen = BPF_WORDALIGN(d->bd_slen); 1588 if (curlen + totlen > d->bd_bufsize) { 1589 /* 1590 * This packet will overflow the storage buffer. 1591 * Rotate the buffers if we can, then wakeup any 1592 * pending reads. 1593 */ 1594 if (d->bd_fbuf == NULL) { 1595 /* 1596 * We haven't completed the previous read yet, 1597 * so drop the packet. 1598 */ 1599 ++d->bd_dcount; 1600 return; 1601 } 1602 1603 /* cancel pending wtime */ 1604 if (timeout_del(&d->bd_wait_tmo)) 1605 bpf_put(d); 1606 1607 ROTATE_BUFFERS(d); 1608 do_wakeup = 1; 1609 curlen = 0; 1610 } 1611 1612 /* 1613 * Append the bpf header. 1614 */ 1615 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1616 *bh = *tbh; 1617 bh->bh_datalen = pktlen; 1618 bh->bh_hdrlen = hdrlen; 1619 bh->bh_caplen = totlen - hdrlen; 1620 1621 /* 1622 * Copy the packet data into the store buffer and update its length. 1623 */ 1624 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1625 d->bd_slen = curlen + totlen; 1626 1627 switch (d->bd_wtout) { 1628 case 0: 1629 /* 1630 * Immediate mode is set. A packet arrived so any 1631 * reads should be woken up. 1632 */ 1633 if (d->bd_state == BPF_S_IDLE) 1634 d->bd_state = BPF_S_DONE; 1635 do_wakeup = 1; 1636 break; 1637 case INFSLP: 1638 break; 1639 default: 1640 if (d->bd_state == BPF_S_IDLE) { 1641 d->bd_state = BPF_S_WAIT; 1642 1643 bpf_get(d); 1644 if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout)) 1645 bpf_put(d); 1646 } 1647 break; 1648 } 1649 1650 if (do_wakeup) 1651 bpf_wakeup(d); 1652 } 1653 1654 /* 1655 * Initialize all nonzero fields of a descriptor. 1656 */ 1657 int 1658 bpf_allocbufs(struct bpf_d *d) 1659 { 1660 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1661 1662 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1663 if (d->bd_fbuf == NULL) 1664 return (ENOMEM); 1665 1666 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1667 if (d->bd_sbuf == NULL) { 1668 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1669 d->bd_fbuf = NULL; 1670 return (ENOMEM); 1671 } 1672 1673 d->bd_slen = 0; 1674 d->bd_hlen = 0; 1675 1676 return (0); 1677 } 1678 1679 void 1680 bpf_prog_smr(void *bps_arg) 1681 { 1682 struct bpf_program_smr *bps = bps_arg; 1683 1684 free(bps->bps_bf.bf_insns, M_DEVBUF, 1685 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1686 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1687 } 1688 1689 void 1690 bpf_d_smr(void *smr) 1691 { 1692 struct bpf_d *bd = smr; 1693 1694 sigio_free(&bd->bd_sigio); 1695 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1696 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1697 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1698 1699 if (bd->bd_rfilter != NULL) 1700 bpf_prog_smr(bd->bd_rfilter); 1701 if (bd->bd_wfilter != NULL) 1702 bpf_prog_smr(bd->bd_wfilter); 1703 1704 klist_free(&bd->bd_klist); 1705 free(bd, M_DEVBUF, sizeof(*bd)); 1706 } 1707 1708 void 1709 bpf_get(struct bpf_d *bd) 1710 { 1711 refcnt_take(&bd->bd_refcnt); 1712 } 1713 1714 /* 1715 * Free buffers currently in use by a descriptor 1716 * when the reference count drops to zero. 1717 */ 1718 void 1719 bpf_put(struct bpf_d *bd) 1720 { 1721 if (refcnt_rele(&bd->bd_refcnt) == 0) 1722 return; 1723 1724 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1725 } 1726 1727 void * 1728 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1729 { 1730 struct bpf_if *bp; 1731 1732 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1733 panic("bpfattach"); 1734 SMR_SLIST_INIT(&bp->bif_dlist); 1735 bp->bif_driverp = (struct bpf_if **)bpfp; 1736 bp->bif_name = name; 1737 bp->bif_ifp = NULL; 1738 bp->bif_dlt = dlt; 1739 1740 bp->bif_next = bpf_iflist; 1741 bpf_iflist = bp; 1742 1743 *bp->bif_driverp = NULL; 1744 1745 /* 1746 * Compute the length of the bpf header. This is not necessarily 1747 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1748 * that the network layer header begins on a longword boundary (for 1749 * performance reasons and to alleviate alignment restrictions). 1750 */ 1751 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1752 1753 return (bp); 1754 } 1755 1756 void 1757 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1758 { 1759 struct bpf_if *bp; 1760 1761 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1762 bp->bif_ifp = ifp; 1763 } 1764 1765 /* Detach an interface from its attached bpf device. */ 1766 void 1767 bpfdetach(struct ifnet *ifp) 1768 { 1769 struct bpf_if *bp, *nbp; 1770 1771 KERNEL_ASSERT_LOCKED(); 1772 1773 for (bp = bpf_iflist; bp; bp = nbp) { 1774 nbp = bp->bif_next; 1775 if (bp->bif_ifp == ifp) 1776 bpfsdetach(bp); 1777 } 1778 ifp->if_bpf = NULL; 1779 } 1780 1781 void 1782 bpfsdetach(void *p) 1783 { 1784 struct bpf_if *bp = p, *tbp; 1785 struct bpf_d *bd; 1786 int maj; 1787 1788 KERNEL_ASSERT_LOCKED(); 1789 1790 /* Locate the major number. */ 1791 for (maj = 0; maj < nchrdev; maj++) 1792 if (cdevsw[maj].d_open == bpfopen) 1793 break; 1794 1795 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1796 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1797 klist_invalidate(&bd->bd_klist); 1798 } 1799 1800 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1801 if (tbp->bif_next == bp) { 1802 tbp->bif_next = bp->bif_next; 1803 break; 1804 } 1805 } 1806 1807 if (bpf_iflist == bp) 1808 bpf_iflist = bp->bif_next; 1809 1810 free(bp, M_DEVBUF, sizeof(*bp)); 1811 } 1812 1813 int 1814 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1815 void *newp, size_t newlen) 1816 { 1817 switch (name[0]) { 1818 case NET_BPF_BUFSIZE: 1819 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1820 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize); 1821 case NET_BPF_MAXBUFSIZE: 1822 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1823 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1824 default: 1825 return (EOPNOTSUPP); 1826 } 1827 } 1828 1829 int 1830 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1831 size_t newlen) 1832 { 1833 int flags = RW_INTR; 1834 int error; 1835 1836 if (namelen != 1) 1837 return (ENOTDIR); 1838 1839 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1840 1841 error = rw_enter(&bpf_sysctl_lk, flags); 1842 if (error != 0) 1843 return (error); 1844 1845 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1846 1847 rw_exit(&bpf_sysctl_lk); 1848 1849 return (error); 1850 } 1851 1852 struct bpf_d * 1853 bpfilter_lookup(int unit) 1854 { 1855 struct bpf_d *bd; 1856 1857 KERNEL_ASSERT_LOCKED(); 1858 1859 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1860 if (bd->bd_unit == unit) 1861 return (bd); 1862 return (NULL); 1863 } 1864 1865 /* 1866 * Get a list of available data link type of the interface. 1867 */ 1868 int 1869 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1870 { 1871 int n, error; 1872 struct bpf_if *bp; 1873 const char *name; 1874 1875 name = d->bd_bif->bif_name; 1876 n = 0; 1877 error = 0; 1878 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1879 if (strcmp(name, bp->bif_name) != 0) 1880 continue; 1881 if (bfl->bfl_list != NULL) { 1882 if (n >= bfl->bfl_len) 1883 return (ENOMEM); 1884 error = copyout(&bp->bif_dlt, 1885 bfl->bfl_list + n, sizeof(u_int)); 1886 if (error) 1887 break; 1888 } 1889 n++; 1890 } 1891 1892 bfl->bfl_len = n; 1893 return (error); 1894 } 1895 1896 /* 1897 * Set the data link type of a BPF instance. 1898 */ 1899 int 1900 bpf_setdlt(struct bpf_d *d, u_int dlt) 1901 { 1902 const char *name; 1903 struct bpf_if *bp; 1904 1905 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1906 if (d->bd_bif->bif_dlt == dlt) 1907 return (0); 1908 name = d->bd_bif->bif_name; 1909 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1910 if (strcmp(name, bp->bif_name) != 0) 1911 continue; 1912 if (bp->bif_dlt == dlt) 1913 break; 1914 } 1915 if (bp == NULL) 1916 return (EINVAL); 1917 bpf_detachd(d); 1918 bpf_attachd(d, bp); 1919 bpf_resetd(d); 1920 return (0); 1921 } 1922 1923 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1924 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1925 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1926 1927 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1928 void *, u_int32_t); 1929 1930 const struct bpf_ops bpf_mbuf_ops = { 1931 bpf_mbuf_ldw, 1932 bpf_mbuf_ldh, 1933 bpf_mbuf_ldb, 1934 }; 1935 1936 int 1937 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1938 { 1939 u_int8_t *cp = buf; 1940 u_int32_t count; 1941 1942 while (off >= m->m_len) { 1943 off -= m->m_len; 1944 1945 m = m->m_next; 1946 if (m == NULL) 1947 return (-1); 1948 } 1949 1950 for (;;) { 1951 count = min(m->m_len - off, len); 1952 1953 memcpy(cp, m->m_data + off, count); 1954 len -= count; 1955 1956 if (len == 0) 1957 return (0); 1958 1959 m = m->m_next; 1960 if (m == NULL) 1961 break; 1962 1963 cp += count; 1964 off = 0; 1965 } 1966 1967 return (-1); 1968 } 1969 1970 u_int32_t 1971 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1972 { 1973 u_int32_t v; 1974 1975 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1976 *err = 1; 1977 return (0); 1978 } 1979 1980 *err = 0; 1981 return ntohl(v); 1982 } 1983 1984 u_int32_t 1985 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1986 { 1987 u_int16_t v; 1988 1989 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1990 *err = 1; 1991 return (0); 1992 } 1993 1994 *err = 0; 1995 return ntohs(v); 1996 } 1997 1998 u_int32_t 1999 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 2000 { 2001 const struct mbuf *m = m0; 2002 u_int8_t v; 2003 2004 while (k >= m->m_len) { 2005 k -= m->m_len; 2006 2007 m = m->m_next; 2008 if (m == NULL) { 2009 *err = 1; 2010 return (0); 2011 } 2012 } 2013 v = m->m_data[k]; 2014 2015 *err = 0; 2016 return v; 2017 } 2018 2019 u_int 2020 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 2021 { 2022 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 2023 } 2024