1 /* $NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.203 2016/07/19 02:47:45 pgoyette Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/buf.h> 54 #include <sys/time.h> 55 #include <sys/proc.h> 56 #include <sys/ioctl.h> 57 #include <sys/conf.h> 58 #include <sys/vnode.h> 59 #include <sys/queue.h> 60 #include <sys/stat.h> 61 #include <sys/module.h> 62 #include <sys/atomic.h> 63 64 #include <sys/file.h> 65 #include <sys/filedesc.h> 66 #include <sys/tty.h> 67 #include <sys/uio.h> 68 69 #include <sys/protosw.h> 70 #include <sys/socket.h> 71 #include <sys/errno.h> 72 #include <sys/kernel.h> 73 #include <sys/poll.h> 74 #include <sys/sysctl.h> 75 #include <sys/kauth.h> 76 77 #include <net/if.h> 78 #include <net/slip.h> 79 80 #include <net/bpf.h> 81 #include <net/bpfdesc.h> 82 #include <net/bpfjit.h> 83 84 #include <net/if_arc.h> 85 #include <net/if_ether.h> 86 87 #include <netinet/in.h> 88 #include <netinet/if_inarp.h> 89 90 91 #include <compat/sys/sockio.h> 92 93 #ifndef BPF_BUFSIZE 94 /* 95 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 96 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 97 */ 98 # define BPF_BUFSIZE 32768 99 #endif 100 101 #define PRINET 26 /* interruptible */ 102 103 /* 104 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 105 * XXX the default values should be computed dynamically based 106 * on available memory size and available mbuf clusters. 107 */ 108 int bpf_bufsize = BPF_BUFSIZE; 109 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 110 bool bpf_jit = false; 111 112 struct bpfjit_ops bpfjit_module_ops = { 113 .bj_generate_code = NULL, 114 .bj_free_code = NULL 115 }; 116 117 /* 118 * Global BPF statistics returned by net.bpf.stats sysctl. 119 */ 120 struct bpf_stat bpf_gstats; 121 122 /* 123 * Use a mutex to avoid a race condition between gathering the stats/peers 124 * and opening/closing the device. 125 */ 126 static kmutex_t bpf_mtx; 127 128 /* 129 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 130 * bpf_dtab holds the descriptors, indexed by minor device # 131 */ 132 struct bpf_if *bpf_iflist; 133 LIST_HEAD(, bpf_d) bpf_list; 134 135 static int bpf_allocbufs(struct bpf_d *); 136 static void bpf_deliver(struct bpf_if *, 137 void *(*cpfn)(void *, const void *, size_t), 138 void *, u_int, u_int, const bool); 139 static void bpf_freed(struct bpf_d *); 140 static void bpf_ifname(struct ifnet *, struct ifreq *); 141 static void *bpf_mcpy(void *, const void *, size_t); 142 static int bpf_movein(struct uio *, int, uint64_t, 143 struct mbuf **, struct sockaddr *); 144 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 145 static void bpf_detachd(struct bpf_d *); 146 static int bpf_setif(struct bpf_d *, struct ifreq *); 147 static void bpf_timed_out(void *); 148 static inline void 149 bpf_wakeup(struct bpf_d *); 150 static int bpf_hdrlen(struct bpf_d *); 151 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 152 void *(*)(void *, const void *, size_t), struct timespec *); 153 static void reset_d(struct bpf_d *); 154 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 155 static int bpf_setdlt(struct bpf_d *, u_int); 156 157 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 158 int); 159 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 160 int); 161 static int bpf_ioctl(struct file *, u_long, void *); 162 static int bpf_poll(struct file *, int); 163 static int bpf_stat(struct file *, struct stat *); 164 static int bpf_close(struct file *); 165 static int bpf_kqfilter(struct file *, struct knote *); 166 static void bpf_softintr(void *); 167 168 static const struct fileops bpf_fileops = { 169 .fo_read = bpf_read, 170 .fo_write = bpf_write, 171 .fo_ioctl = bpf_ioctl, 172 .fo_fcntl = fnullop_fcntl, 173 .fo_poll = bpf_poll, 174 .fo_stat = bpf_stat, 175 .fo_close = bpf_close, 176 .fo_kqfilter = bpf_kqfilter, 177 .fo_restart = fnullop_restart, 178 }; 179 180 dev_type_open(bpfopen); 181 182 const struct cdevsw bpf_cdevsw = { 183 .d_open = bpfopen, 184 .d_close = noclose, 185 .d_read = noread, 186 .d_write = nowrite, 187 .d_ioctl = noioctl, 188 .d_stop = nostop, 189 .d_tty = notty, 190 .d_poll = nopoll, 191 .d_mmap = nommap, 192 .d_kqfilter = nokqfilter, 193 .d_discard = nodiscard, 194 .d_flag = D_OTHER 195 }; 196 197 bpfjit_func_t 198 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size) 199 { 200 201 membar_consumer(); 202 if (bpfjit_module_ops.bj_generate_code != NULL) { 203 return bpfjit_module_ops.bj_generate_code(bc, code, size); 204 } 205 return NULL; 206 } 207 208 void 209 bpf_jit_freecode(bpfjit_func_t jcode) 210 { 211 KASSERT(bpfjit_module_ops.bj_free_code != NULL); 212 bpfjit_module_ops.bj_free_code(jcode); 213 } 214 215 static int 216 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp, 217 struct sockaddr *sockp) 218 { 219 struct mbuf *m; 220 int error; 221 size_t len; 222 size_t hlen; 223 size_t align; 224 225 /* 226 * Build a sockaddr based on the data link layer type. 227 * We do this at this level because the ethernet header 228 * is copied directly into the data field of the sockaddr. 229 * In the case of SLIP, there is no header and the packet 230 * is forwarded as is. 231 * Also, we are careful to leave room at the front of the mbuf 232 * for the link level header. 233 */ 234 switch (linktype) { 235 236 case DLT_SLIP: 237 sockp->sa_family = AF_INET; 238 hlen = 0; 239 align = 0; 240 break; 241 242 case DLT_PPP: 243 sockp->sa_family = AF_UNSPEC; 244 hlen = 0; 245 align = 0; 246 break; 247 248 case DLT_EN10MB: 249 sockp->sa_family = AF_UNSPEC; 250 /* XXX Would MAXLINKHDR be better? */ 251 /* 6(dst)+6(src)+2(type) */ 252 hlen = sizeof(struct ether_header); 253 align = 2; 254 break; 255 256 case DLT_ARCNET: 257 sockp->sa_family = AF_UNSPEC; 258 hlen = ARC_HDRLEN; 259 align = 5; 260 break; 261 262 case DLT_FDDI: 263 sockp->sa_family = AF_LINK; 264 /* XXX 4(FORMAC)+6(dst)+6(src) */ 265 hlen = 16; 266 align = 0; 267 break; 268 269 case DLT_ECONET: 270 sockp->sa_family = AF_UNSPEC; 271 hlen = 6; 272 align = 2; 273 break; 274 275 case DLT_NULL: 276 sockp->sa_family = AF_UNSPEC; 277 hlen = 0; 278 align = 0; 279 break; 280 281 default: 282 return (EIO); 283 } 284 285 len = uio->uio_resid; 286 /* 287 * If there aren't enough bytes for a link level header or the 288 * packet length exceeds the interface mtu, return an error. 289 */ 290 if (len - hlen > mtu) 291 return (EMSGSIZE); 292 293 /* 294 * XXX Avoid complicated buffer chaining --- 295 * bail if it won't fit in a single mbuf. 296 * (Take into account possible alignment bytes) 297 */ 298 if (len + align > MCLBYTES) 299 return (EIO); 300 301 m = m_gethdr(M_WAIT, MT_DATA); 302 m_reset_rcvif(m); 303 m->m_pkthdr.len = (int)(len - hlen); 304 if (len + align > MHLEN) { 305 m_clget(m, M_WAIT); 306 if ((m->m_flags & M_EXT) == 0) { 307 error = ENOBUFS; 308 goto bad; 309 } 310 } 311 312 /* Insure the data is properly aligned */ 313 if (align > 0) { 314 m->m_data += align; 315 m->m_len -= (int)align; 316 } 317 318 error = uiomove(mtod(m, void *), len, uio); 319 if (error) 320 goto bad; 321 if (hlen != 0) { 322 memcpy(sockp->sa_data, mtod(m, void *), hlen); 323 m->m_data += hlen; /* XXX */ 324 len -= hlen; 325 } 326 m->m_len = (int)len; 327 *mp = m; 328 return (0); 329 330 bad: 331 m_freem(m); 332 return (error); 333 } 334 335 /* 336 * Attach file to the bpf interface, i.e. make d listen on bp. 337 * Must be called at splnet. 338 */ 339 static void 340 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 341 { 342 KASSERT(mutex_owned(&bpf_mtx)); 343 /* 344 * Point d at bp, and add d to the interface's list of listeners. 345 * Finally, point the driver's bpf cookie at the interface so 346 * it will divert packets to bpf. 347 */ 348 d->bd_bif = bp; 349 d->bd_next = bp->bif_dlist; 350 bp->bif_dlist = d; 351 352 *bp->bif_driverp = bp; 353 } 354 355 /* 356 * Detach a file from its interface. 357 */ 358 static void 359 bpf_detachd(struct bpf_d *d) 360 { 361 struct bpf_d **p; 362 struct bpf_if *bp; 363 364 KASSERT(mutex_owned(&bpf_mtx)); 365 366 bp = d->bd_bif; 367 /* 368 * Check if this descriptor had requested promiscuous mode. 369 * If so, turn it off. 370 */ 371 if (d->bd_promisc) { 372 int error __diagused; 373 374 d->bd_promisc = 0; 375 /* 376 * Take device out of promiscuous mode. Since we were 377 * able to enter promiscuous mode, we should be able 378 * to turn it off. But we can get an error if 379 * the interface was configured down, so only panic 380 * if we don't get an unexpected error. 381 */ 382 error = ifpromisc(bp->bif_ifp, 0); 383 #ifdef DIAGNOSTIC 384 if (error) 385 printf("%s: ifpromisc failed: %d", __func__, error); 386 #endif 387 } 388 /* Remove d from the interface's descriptor list. */ 389 p = &bp->bif_dlist; 390 while (*p != d) { 391 p = &(*p)->bd_next; 392 if (*p == NULL) 393 panic("%s: descriptor not in list", __func__); 394 } 395 *p = (*p)->bd_next; 396 if (bp->bif_dlist == NULL) 397 /* 398 * Let the driver know that there are no more listeners. 399 */ 400 *d->bd_bif->bif_driverp = NULL; 401 d->bd_bif = NULL; 402 } 403 404 static void 405 bpf_init(void) 406 { 407 408 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 409 410 LIST_INIT(&bpf_list); 411 412 bpf_gstats.bs_recv = 0; 413 bpf_gstats.bs_drop = 0; 414 bpf_gstats.bs_capt = 0; 415 416 return; 417 } 418 419 /* 420 * bpfilterattach() is called at boot time. We don't need to do anything 421 * here, since any initialization will happen as part of module init code. 422 */ 423 /* ARGSUSED */ 424 void 425 bpfilterattach(int n) 426 { 427 428 } 429 430 /* 431 * Open ethernet device. Clones. 432 */ 433 /* ARGSUSED */ 434 int 435 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 436 { 437 struct bpf_d *d; 438 struct file *fp; 439 int error, fd; 440 441 /* falloc() will fill in the descriptor for us. */ 442 if ((error = fd_allocfile(&fp, &fd)) != 0) 443 return error; 444 445 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO); 446 d->bd_bufsize = bpf_bufsize; 447 d->bd_seesent = 1; 448 d->bd_feedback = 0; 449 d->bd_pid = l->l_proc->p_pid; 450 #ifdef _LP64 451 if (curproc->p_flag & PK_32) 452 d->bd_compat32 = 1; 453 #endif 454 getnanotime(&d->bd_btime); 455 d->bd_atime = d->bd_mtime = d->bd_btime; 456 callout_init(&d->bd_callout, 0); 457 selinit(&d->bd_sel); 458 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d); 459 d->bd_jitcode = NULL; 460 461 mutex_enter(&bpf_mtx); 462 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 463 mutex_exit(&bpf_mtx); 464 465 return fd_clone(fp, fd, flag, &bpf_fileops, d); 466 } 467 468 /* 469 * Close the descriptor by detaching it from its interface, 470 * deallocating its buffers, and marking it free. 471 */ 472 /* ARGSUSED */ 473 static int 474 bpf_close(struct file *fp) 475 { 476 struct bpf_d *d; 477 int s; 478 479 KERNEL_LOCK(1, NULL); 480 mutex_enter(&bpf_mtx); 481 482 if ((d = fp->f_bpf) == NULL) { 483 mutex_exit(&bpf_mtx); 484 KERNEL_UNLOCK_ONE(NULL); 485 return 0; 486 } 487 488 /* 489 * Refresh the PID associated with this bpf file. 490 */ 491 d->bd_pid = curproc->p_pid; 492 493 s = splnet(); 494 if (d->bd_state == BPF_WAITING) 495 callout_stop(&d->bd_callout); 496 d->bd_state = BPF_IDLE; 497 if (d->bd_bif) 498 bpf_detachd(d); 499 splx(s); 500 bpf_freed(d); 501 LIST_REMOVE(d, bd_list); 502 fp->f_bpf = NULL; 503 504 mutex_exit(&bpf_mtx); 505 KERNEL_UNLOCK_ONE(NULL); 506 507 callout_destroy(&d->bd_callout); 508 seldestroy(&d->bd_sel); 509 softint_disestablish(d->bd_sih); 510 free(d, M_DEVBUF); 511 512 return (0); 513 } 514 515 /* 516 * Rotate the packet buffers in descriptor d. Move the store buffer 517 * into the hold slot, and the free buffer into the store slot. 518 * Zero the length of the new store buffer. 519 */ 520 #define ROTATE_BUFFERS(d) \ 521 (d)->bd_hbuf = (d)->bd_sbuf; \ 522 (d)->bd_hlen = (d)->bd_slen; \ 523 (d)->bd_sbuf = (d)->bd_fbuf; \ 524 (d)->bd_slen = 0; \ 525 (d)->bd_fbuf = NULL; 526 /* 527 * bpfread - read next chunk of packets from buffers 528 */ 529 static int 530 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 531 kauth_cred_t cred, int flags) 532 { 533 struct bpf_d *d = fp->f_bpf; 534 int timed_out; 535 int error; 536 int s; 537 538 getnanotime(&d->bd_atime); 539 /* 540 * Restrict application to use a buffer the same size as 541 * the kernel buffers. 542 */ 543 if (uio->uio_resid != d->bd_bufsize) 544 return (EINVAL); 545 546 KERNEL_LOCK(1, NULL); 547 s = splnet(); 548 if (d->bd_state == BPF_WAITING) 549 callout_stop(&d->bd_callout); 550 timed_out = (d->bd_state == BPF_TIMED_OUT); 551 d->bd_state = BPF_IDLE; 552 /* 553 * If the hold buffer is empty, then do a timed sleep, which 554 * ends when the timeout expires or when enough packets 555 * have arrived to fill the store buffer. 556 */ 557 while (d->bd_hbuf == NULL) { 558 if (fp->f_flag & FNONBLOCK) { 559 if (d->bd_slen == 0) { 560 splx(s); 561 KERNEL_UNLOCK_ONE(NULL); 562 return (EWOULDBLOCK); 563 } 564 ROTATE_BUFFERS(d); 565 break; 566 } 567 568 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 569 /* 570 * A packet(s) either arrived since the previous 571 * read or arrived while we were asleep. 572 * Rotate the buffers and return what's here. 573 */ 574 ROTATE_BUFFERS(d); 575 break; 576 } 577 error = tsleep(d, PRINET|PCATCH, "bpf", 578 d->bd_rtout); 579 if (error == EINTR || error == ERESTART) { 580 splx(s); 581 KERNEL_UNLOCK_ONE(NULL); 582 return (error); 583 } 584 if (error == EWOULDBLOCK) { 585 /* 586 * On a timeout, return what's in the buffer, 587 * which may be nothing. If there is something 588 * in the store buffer, we can rotate the buffers. 589 */ 590 if (d->bd_hbuf) 591 /* 592 * We filled up the buffer in between 593 * getting the timeout and arriving 594 * here, so we don't need to rotate. 595 */ 596 break; 597 598 if (d->bd_slen == 0) { 599 splx(s); 600 KERNEL_UNLOCK_ONE(NULL); 601 return (0); 602 } 603 ROTATE_BUFFERS(d); 604 break; 605 } 606 if (error != 0) 607 goto done; 608 } 609 /* 610 * At this point, we know we have something in the hold slot. 611 */ 612 splx(s); 613 614 /* 615 * Move data from hold buffer into user space. 616 * We know the entire buffer is transferred since 617 * we checked above that the read buffer is bpf_bufsize bytes. 618 */ 619 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 620 621 s = splnet(); 622 d->bd_fbuf = d->bd_hbuf; 623 d->bd_hbuf = NULL; 624 d->bd_hlen = 0; 625 done: 626 splx(s); 627 KERNEL_UNLOCK_ONE(NULL); 628 return (error); 629 } 630 631 632 /* 633 * If there are processes sleeping on this descriptor, wake them up. 634 */ 635 static inline void 636 bpf_wakeup(struct bpf_d *d) 637 { 638 wakeup(d); 639 if (d->bd_async) 640 softint_schedule(d->bd_sih); 641 selnotify(&d->bd_sel, 0, 0); 642 } 643 644 static void 645 bpf_softintr(void *cookie) 646 { 647 struct bpf_d *d; 648 649 d = cookie; 650 if (d->bd_async) 651 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 652 } 653 654 static void 655 bpf_timed_out(void *arg) 656 { 657 struct bpf_d *d = arg; 658 int s; 659 660 s = splnet(); 661 if (d->bd_state == BPF_WAITING) { 662 d->bd_state = BPF_TIMED_OUT; 663 if (d->bd_slen != 0) 664 bpf_wakeup(d); 665 } 666 splx(s); 667 } 668 669 670 static int 671 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 672 kauth_cred_t cred, int flags) 673 { 674 struct bpf_d *d = fp->f_bpf; 675 struct ifnet *ifp; 676 struct mbuf *m, *mc; 677 int error, s; 678 static struct sockaddr_storage dst; 679 680 m = NULL; /* XXX gcc */ 681 682 KERNEL_LOCK(1, NULL); 683 684 if (d->bd_bif == NULL) { 685 KERNEL_UNLOCK_ONE(NULL); 686 return (ENXIO); 687 } 688 getnanotime(&d->bd_mtime); 689 690 ifp = d->bd_bif->bif_ifp; 691 692 if (uio->uio_resid == 0) { 693 KERNEL_UNLOCK_ONE(NULL); 694 return (0); 695 } 696 697 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, 698 (struct sockaddr *) &dst); 699 if (error) { 700 KERNEL_UNLOCK_ONE(NULL); 701 return (error); 702 } 703 704 if (m->m_pkthdr.len > ifp->if_mtu) { 705 KERNEL_UNLOCK_ONE(NULL); 706 m_freem(m); 707 return (EMSGSIZE); 708 } 709 710 if (d->bd_hdrcmplt) 711 dst.ss_family = pseudo_AF_HDRCMPLT; 712 713 if (d->bd_feedback) { 714 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT); 715 if (mc != NULL) 716 m_set_rcvif(mc, ifp); 717 /* Set M_PROMISC for outgoing packets to be discarded. */ 718 if (1 /*d->bd_direction == BPF_D_INOUT*/) 719 m->m_flags |= M_PROMISC; 720 } else 721 mc = NULL; 722 723 s = splsoftnet(); 724 error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL); 725 726 if (mc != NULL) { 727 if (error == 0) 728 ifp->_if_input(ifp, mc); 729 else 730 m_freem(mc); 731 } 732 splx(s); 733 KERNEL_UNLOCK_ONE(NULL); 734 /* 735 * The driver frees the mbuf. 736 */ 737 return (error); 738 } 739 740 /* 741 * Reset a descriptor by flushing its packet buffer and clearing the 742 * receive and drop counts. Should be called at splnet. 743 */ 744 static void 745 reset_d(struct bpf_d *d) 746 { 747 if (d->bd_hbuf) { 748 /* Free the hold buffer. */ 749 d->bd_fbuf = d->bd_hbuf; 750 d->bd_hbuf = NULL; 751 } 752 d->bd_slen = 0; 753 d->bd_hlen = 0; 754 d->bd_rcount = 0; 755 d->bd_dcount = 0; 756 d->bd_ccount = 0; 757 } 758 759 /* 760 * FIONREAD Check for read packet available. 761 * BIOCGBLEN Get buffer len [for read()]. 762 * BIOCSETF Set ethernet read filter. 763 * BIOCFLUSH Flush read packet buffer. 764 * BIOCPROMISC Put interface into promiscuous mode. 765 * BIOCGDLT Get link layer type. 766 * BIOCGETIF Get interface name. 767 * BIOCSETIF Set interface. 768 * BIOCSRTIMEOUT Set read timeout. 769 * BIOCGRTIMEOUT Get read timeout. 770 * BIOCGSTATS Get packet stats. 771 * BIOCIMMEDIATE Set immediate mode. 772 * BIOCVERSION Get filter language version. 773 * BIOCGHDRCMPLT Get "header already complete" flag. 774 * BIOCSHDRCMPLT Set "header already complete" flag. 775 * BIOCSFEEDBACK Set packet feedback mode. 776 * BIOCGFEEDBACK Get packet feedback mode. 777 * BIOCGSEESENT Get "see sent packets" mode. 778 * BIOCSSEESENT Set "see sent packets" mode. 779 */ 780 /* ARGSUSED */ 781 static int 782 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 783 { 784 struct bpf_d *d = fp->f_bpf; 785 int s, error = 0; 786 787 /* 788 * Refresh the PID associated with this bpf file. 789 */ 790 KERNEL_LOCK(1, NULL); 791 d->bd_pid = curproc->p_pid; 792 #ifdef _LP64 793 if (curproc->p_flag & PK_32) 794 d->bd_compat32 = 1; 795 else 796 d->bd_compat32 = 0; 797 #endif 798 799 s = splnet(); 800 if (d->bd_state == BPF_WAITING) 801 callout_stop(&d->bd_callout); 802 d->bd_state = BPF_IDLE; 803 splx(s); 804 805 switch (cmd) { 806 807 default: 808 error = EINVAL; 809 break; 810 811 /* 812 * Check for read packet available. 813 */ 814 case FIONREAD: 815 { 816 int n; 817 818 s = splnet(); 819 n = d->bd_slen; 820 if (d->bd_hbuf) 821 n += d->bd_hlen; 822 splx(s); 823 824 *(int *)addr = n; 825 break; 826 } 827 828 /* 829 * Get buffer len [for read()]. 830 */ 831 case BIOCGBLEN: 832 *(u_int *)addr = d->bd_bufsize; 833 break; 834 835 /* 836 * Set buffer length. 837 */ 838 case BIOCSBLEN: 839 if (d->bd_bif != NULL) 840 error = EINVAL; 841 else { 842 u_int size = *(u_int *)addr; 843 844 if (size > bpf_maxbufsize) 845 *(u_int *)addr = size = bpf_maxbufsize; 846 else if (size < BPF_MINBUFSIZE) 847 *(u_int *)addr = size = BPF_MINBUFSIZE; 848 d->bd_bufsize = size; 849 } 850 break; 851 852 /* 853 * Set link layer read filter. 854 */ 855 case BIOCSETF: 856 error = bpf_setf(d, addr); 857 break; 858 859 /* 860 * Flush read packet buffer. 861 */ 862 case BIOCFLUSH: 863 s = splnet(); 864 reset_d(d); 865 splx(s); 866 break; 867 868 /* 869 * Put interface into promiscuous mode. 870 */ 871 case BIOCPROMISC: 872 if (d->bd_bif == NULL) { 873 /* 874 * No interface attached yet. 875 */ 876 error = EINVAL; 877 break; 878 } 879 s = splnet(); 880 if (d->bd_promisc == 0) { 881 error = ifpromisc(d->bd_bif->bif_ifp, 1); 882 if (error == 0) 883 d->bd_promisc = 1; 884 } 885 splx(s); 886 break; 887 888 /* 889 * Get device parameters. 890 */ 891 case BIOCGDLT: 892 if (d->bd_bif == NULL) 893 error = EINVAL; 894 else 895 *(u_int *)addr = d->bd_bif->bif_dlt; 896 break; 897 898 /* 899 * Get a list of supported device parameters. 900 */ 901 case BIOCGDLTLIST: 902 if (d->bd_bif == NULL) 903 error = EINVAL; 904 else 905 error = bpf_getdltlist(d, addr); 906 break; 907 908 /* 909 * Set device parameters. 910 */ 911 case BIOCSDLT: 912 mutex_enter(&bpf_mtx); 913 if (d->bd_bif == NULL) 914 error = EINVAL; 915 else 916 error = bpf_setdlt(d, *(u_int *)addr); 917 mutex_exit(&bpf_mtx); 918 break; 919 920 /* 921 * Set interface name. 922 */ 923 #ifdef OBIOCGETIF 924 case OBIOCGETIF: 925 #endif 926 case BIOCGETIF: 927 if (d->bd_bif == NULL) 928 error = EINVAL; 929 else 930 bpf_ifname(d->bd_bif->bif_ifp, addr); 931 break; 932 933 /* 934 * Set interface. 935 */ 936 #ifdef OBIOCSETIF 937 case OBIOCSETIF: 938 #endif 939 case BIOCSETIF: 940 mutex_enter(&bpf_mtx); 941 error = bpf_setif(d, addr); 942 mutex_exit(&bpf_mtx); 943 break; 944 945 /* 946 * Set read timeout. 947 */ 948 case BIOCSRTIMEOUT: 949 { 950 struct timeval *tv = addr; 951 952 /* Compute number of ticks. */ 953 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 954 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 955 d->bd_rtout = 1; 956 break; 957 } 958 959 #ifdef BIOCGORTIMEOUT 960 /* 961 * Get read timeout. 962 */ 963 case BIOCGORTIMEOUT: 964 { 965 struct timeval50 *tv = addr; 966 967 tv->tv_sec = d->bd_rtout / hz; 968 tv->tv_usec = (d->bd_rtout % hz) * tick; 969 break; 970 } 971 #endif 972 973 #ifdef BIOCSORTIMEOUT 974 /* 975 * Set read timeout. 976 */ 977 case BIOCSORTIMEOUT: 978 { 979 struct timeval50 *tv = addr; 980 981 /* Compute number of ticks. */ 982 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 983 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 984 d->bd_rtout = 1; 985 break; 986 } 987 #endif 988 989 /* 990 * Get read timeout. 991 */ 992 case BIOCGRTIMEOUT: 993 { 994 struct timeval *tv = addr; 995 996 tv->tv_sec = d->bd_rtout / hz; 997 tv->tv_usec = (d->bd_rtout % hz) * tick; 998 break; 999 } 1000 /* 1001 * Get packet stats. 1002 */ 1003 case BIOCGSTATS: 1004 { 1005 struct bpf_stat *bs = addr; 1006 1007 bs->bs_recv = d->bd_rcount; 1008 bs->bs_drop = d->bd_dcount; 1009 bs->bs_capt = d->bd_ccount; 1010 break; 1011 } 1012 1013 case BIOCGSTATSOLD: 1014 { 1015 struct bpf_stat_old *bs = addr; 1016 1017 bs->bs_recv = d->bd_rcount; 1018 bs->bs_drop = d->bd_dcount; 1019 break; 1020 } 1021 1022 /* 1023 * Set immediate mode. 1024 */ 1025 case BIOCIMMEDIATE: 1026 d->bd_immediate = *(u_int *)addr; 1027 break; 1028 1029 case BIOCVERSION: 1030 { 1031 struct bpf_version *bv = addr; 1032 1033 bv->bv_major = BPF_MAJOR_VERSION; 1034 bv->bv_minor = BPF_MINOR_VERSION; 1035 break; 1036 } 1037 1038 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1039 *(u_int *)addr = d->bd_hdrcmplt; 1040 break; 1041 1042 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1043 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1044 break; 1045 1046 /* 1047 * Get "see sent packets" flag 1048 */ 1049 case BIOCGSEESENT: 1050 *(u_int *)addr = d->bd_seesent; 1051 break; 1052 1053 /* 1054 * Set "see sent" packets flag 1055 */ 1056 case BIOCSSEESENT: 1057 d->bd_seesent = *(u_int *)addr; 1058 break; 1059 1060 /* 1061 * Set "feed packets from bpf back to input" mode 1062 */ 1063 case BIOCSFEEDBACK: 1064 d->bd_feedback = *(u_int *)addr; 1065 break; 1066 1067 /* 1068 * Get "feed packets from bpf back to input" mode 1069 */ 1070 case BIOCGFEEDBACK: 1071 *(u_int *)addr = d->bd_feedback; 1072 break; 1073 1074 case FIONBIO: /* Non-blocking I/O */ 1075 /* 1076 * No need to do anything special as we use IO_NDELAY in 1077 * bpfread() as an indication of whether or not to block 1078 * the read. 1079 */ 1080 break; 1081 1082 case FIOASYNC: /* Send signal on receive packets */ 1083 d->bd_async = *(int *)addr; 1084 break; 1085 1086 case TIOCSPGRP: /* Process or group to send signals to */ 1087 case FIOSETOWN: 1088 error = fsetown(&d->bd_pgid, cmd, addr); 1089 break; 1090 1091 case TIOCGPGRP: 1092 case FIOGETOWN: 1093 error = fgetown(d->bd_pgid, cmd, addr); 1094 break; 1095 } 1096 KERNEL_UNLOCK_ONE(NULL); 1097 return (error); 1098 } 1099 1100 /* 1101 * Set d's packet filter program to fp. If this file already has a filter, 1102 * free it and replace it. Returns EINVAL for bogus requests. 1103 */ 1104 int 1105 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1106 { 1107 struct bpf_insn *fcode, *old; 1108 bpfjit_func_t jcode, oldj; 1109 size_t flen, size; 1110 int s; 1111 1112 jcode = NULL; 1113 flen = fp->bf_len; 1114 1115 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) { 1116 return EINVAL; 1117 } 1118 1119 if (flen) { 1120 /* 1121 * Allocate the buffer, copy the byte-code from 1122 * userspace and validate it. 1123 */ 1124 size = flen * sizeof(*fp->bf_insns); 1125 fcode = malloc(size, M_DEVBUF, M_WAITOK); 1126 if (copyin(fp->bf_insns, fcode, size) != 0 || 1127 !bpf_validate(fcode, (int)flen)) { 1128 free(fcode, M_DEVBUF); 1129 return EINVAL; 1130 } 1131 membar_consumer(); 1132 if (bpf_jit) 1133 jcode = bpf_jit_generate(NULL, fcode, flen); 1134 } else { 1135 fcode = NULL; 1136 } 1137 1138 s = splnet(); 1139 old = d->bd_filter; 1140 d->bd_filter = fcode; 1141 oldj = d->bd_jitcode; 1142 d->bd_jitcode = jcode; 1143 reset_d(d); 1144 splx(s); 1145 1146 if (old) { 1147 free(old, M_DEVBUF); 1148 } 1149 if (oldj) { 1150 bpf_jit_freecode(oldj); 1151 } 1152 1153 return 0; 1154 } 1155 1156 /* 1157 * Detach a file from its current interface (if attached at all) and attach 1158 * to the interface indicated by the name stored in ifr. 1159 * Return an errno or 0. 1160 */ 1161 static int 1162 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1163 { 1164 struct bpf_if *bp; 1165 char *cp; 1166 int unit_seen, i, s, error; 1167 1168 KASSERT(mutex_owned(&bpf_mtx)); 1169 /* 1170 * Make sure the provided name has a unit number, and default 1171 * it to '0' if not specified. 1172 * XXX This is ugly ... do this differently? 1173 */ 1174 unit_seen = 0; 1175 cp = ifr->ifr_name; 1176 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1177 while (*cp++) 1178 if (*cp >= '0' && *cp <= '9') 1179 unit_seen = 1; 1180 if (!unit_seen) { 1181 /* Make sure to leave room for the '\0'. */ 1182 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1183 if ((ifr->ifr_name[i] >= 'a' && 1184 ifr->ifr_name[i] <= 'z') || 1185 (ifr->ifr_name[i] >= 'A' && 1186 ifr->ifr_name[i] <= 'Z')) 1187 continue; 1188 ifr->ifr_name[i] = '0'; 1189 } 1190 } 1191 1192 /* 1193 * Look through attached interfaces for the named one. 1194 */ 1195 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1196 struct ifnet *ifp = bp->bif_ifp; 1197 1198 if (ifp == NULL || 1199 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1200 continue; 1201 /* skip additional entry */ 1202 if (bp->bif_driverp != &ifp->if_bpf) 1203 continue; 1204 /* 1205 * We found the requested interface. 1206 * Allocate the packet buffers if we need to. 1207 * If we're already attached to requested interface, 1208 * just flush the buffer. 1209 */ 1210 if (d->bd_sbuf == NULL) { 1211 error = bpf_allocbufs(d); 1212 if (error != 0) 1213 return (error); 1214 } 1215 s = splnet(); 1216 if (bp != d->bd_bif) { 1217 if (d->bd_bif) 1218 /* 1219 * Detach if attached to something else. 1220 */ 1221 bpf_detachd(d); 1222 1223 bpf_attachd(d, bp); 1224 } 1225 reset_d(d); 1226 splx(s); 1227 return (0); 1228 } 1229 /* Not found. */ 1230 return (ENXIO); 1231 } 1232 1233 /* 1234 * Copy the interface name to the ifreq. 1235 */ 1236 static void 1237 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1238 { 1239 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1240 } 1241 1242 static int 1243 bpf_stat(struct file *fp, struct stat *st) 1244 { 1245 struct bpf_d *d = fp->f_bpf; 1246 1247 (void)memset(st, 0, sizeof(*st)); 1248 KERNEL_LOCK(1, NULL); 1249 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1250 st->st_atimespec = d->bd_atime; 1251 st->st_mtimespec = d->bd_mtime; 1252 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1253 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1254 st->st_gid = kauth_cred_getegid(fp->f_cred); 1255 st->st_mode = S_IFCHR; 1256 KERNEL_UNLOCK_ONE(NULL); 1257 return 0; 1258 } 1259 1260 /* 1261 * Support for poll() system call 1262 * 1263 * Return true iff the specific operation will not block indefinitely - with 1264 * the assumption that it is safe to positively acknowledge a request for the 1265 * ability to write to the BPF device. 1266 * Otherwise, return false but make a note that a selnotify() must be done. 1267 */ 1268 static int 1269 bpf_poll(struct file *fp, int events) 1270 { 1271 struct bpf_d *d = fp->f_bpf; 1272 int s = splnet(); 1273 int revents; 1274 1275 /* 1276 * Refresh the PID associated with this bpf file. 1277 */ 1278 KERNEL_LOCK(1, NULL); 1279 d->bd_pid = curproc->p_pid; 1280 1281 revents = events & (POLLOUT | POLLWRNORM); 1282 if (events & (POLLIN | POLLRDNORM)) { 1283 /* 1284 * An imitation of the FIONREAD ioctl code. 1285 */ 1286 if (d->bd_hlen != 0 || 1287 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1288 d->bd_slen != 0)) { 1289 revents |= events & (POLLIN | POLLRDNORM); 1290 } else { 1291 selrecord(curlwp, &d->bd_sel); 1292 /* Start the read timeout if necessary */ 1293 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1294 callout_reset(&d->bd_callout, d->bd_rtout, 1295 bpf_timed_out, d); 1296 d->bd_state = BPF_WAITING; 1297 } 1298 } 1299 } 1300 1301 KERNEL_UNLOCK_ONE(NULL); 1302 splx(s); 1303 return (revents); 1304 } 1305 1306 static void 1307 filt_bpfrdetach(struct knote *kn) 1308 { 1309 struct bpf_d *d = kn->kn_hook; 1310 int s; 1311 1312 KERNEL_LOCK(1, NULL); 1313 s = splnet(); 1314 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1315 splx(s); 1316 KERNEL_UNLOCK_ONE(NULL); 1317 } 1318 1319 static int 1320 filt_bpfread(struct knote *kn, long hint) 1321 { 1322 struct bpf_d *d = kn->kn_hook; 1323 int rv; 1324 1325 KERNEL_LOCK(1, NULL); 1326 kn->kn_data = d->bd_hlen; 1327 if (d->bd_immediate) 1328 kn->kn_data += d->bd_slen; 1329 rv = (kn->kn_data > 0); 1330 KERNEL_UNLOCK_ONE(NULL); 1331 return rv; 1332 } 1333 1334 static const struct filterops bpfread_filtops = 1335 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1336 1337 static int 1338 bpf_kqfilter(struct file *fp, struct knote *kn) 1339 { 1340 struct bpf_d *d = fp->f_bpf; 1341 struct klist *klist; 1342 int s; 1343 1344 KERNEL_LOCK(1, NULL); 1345 1346 switch (kn->kn_filter) { 1347 case EVFILT_READ: 1348 klist = &d->bd_sel.sel_klist; 1349 kn->kn_fop = &bpfread_filtops; 1350 break; 1351 1352 default: 1353 KERNEL_UNLOCK_ONE(NULL); 1354 return (EINVAL); 1355 } 1356 1357 kn->kn_hook = d; 1358 1359 s = splnet(); 1360 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1361 splx(s); 1362 KERNEL_UNLOCK_ONE(NULL); 1363 1364 return (0); 1365 } 1366 1367 /* 1368 * Copy data from an mbuf chain into a buffer. This code is derived 1369 * from m_copydata in sys/uipc_mbuf.c. 1370 */ 1371 static void * 1372 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1373 { 1374 const struct mbuf *m; 1375 u_int count; 1376 u_char *dst; 1377 1378 m = src_arg; 1379 dst = dst_arg; 1380 while (len > 0) { 1381 if (m == NULL) 1382 panic("bpf_mcpy"); 1383 count = min(m->m_len, len); 1384 memcpy(dst, mtod(m, const void *), count); 1385 m = m->m_next; 1386 dst += count; 1387 len -= count; 1388 } 1389 return dst_arg; 1390 } 1391 1392 /* 1393 * Dispatch a packet to all the listeners on interface bp. 1394 * 1395 * pkt pointer to the packet, either a data buffer or an mbuf chain 1396 * buflen buffer length, if pkt is a data buffer 1397 * cpfn a function that can copy pkt into the listener's buffer 1398 * pktlen length of the packet 1399 * rcv true if packet came in 1400 */ 1401 static inline void 1402 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1403 void *pkt, u_int pktlen, u_int buflen, const bool rcv) 1404 { 1405 uint32_t mem[BPF_MEMWORDS]; 1406 bpf_args_t args = { 1407 .pkt = (const uint8_t *)pkt, 1408 .wirelen = pktlen, 1409 .buflen = buflen, 1410 .mem = mem, 1411 .arg = NULL 1412 }; 1413 bool gottime = false; 1414 struct timespec ts; 1415 1416 /* 1417 * Note that the IPL does not have to be raised at this point. 1418 * The only problem that could arise here is that if two different 1419 * interfaces shared any data. This is not the case. 1420 */ 1421 for (struct bpf_d *d = bp->bif_dlist; d != NULL; d = d->bd_next) { 1422 u_int slen; 1423 1424 if (!d->bd_seesent && !rcv) { 1425 continue; 1426 } 1427 d->bd_rcount++; 1428 bpf_gstats.bs_recv++; 1429 1430 if (d->bd_jitcode) 1431 slen = d->bd_jitcode(NULL, &args); 1432 else 1433 slen = bpf_filter_ext(NULL, d->bd_filter, &args); 1434 1435 if (!slen) { 1436 continue; 1437 } 1438 if (!gottime) { 1439 gottime = true; 1440 nanotime(&ts); 1441 } 1442 catchpacket(d, pkt, pktlen, slen, cpfn, &ts); 1443 } 1444 } 1445 1446 /* 1447 * Incoming linkage from device drivers. Process the packet pkt, of length 1448 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1449 * by each process' filter, and if accepted, stashed into the corresponding 1450 * buffer. 1451 */ 1452 static void 1453 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1454 { 1455 1456 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true); 1457 } 1458 1459 /* 1460 * Incoming linkage from device drivers, when the head of the packet is in 1461 * a buffer, and the tail is in an mbuf chain. 1462 */ 1463 static void 1464 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1465 { 1466 u_int pktlen; 1467 struct mbuf mb; 1468 1469 /* Skip outgoing duplicate packets. */ 1470 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) { 1471 m->m_flags &= ~M_PROMISC; 1472 return; 1473 } 1474 1475 pktlen = m_length(m) + dlen; 1476 1477 /* 1478 * Craft on-stack mbuf suitable for passing to bpf_filter. 1479 * Note that we cut corners here; we only setup what's 1480 * absolutely needed--this mbuf should never go anywhere else. 1481 */ 1482 (void)memset(&mb, 0, sizeof(mb)); 1483 mb.m_next = m; 1484 mb.m_data = data; 1485 mb.m_len = dlen; 1486 1487 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif_index != 0); 1488 } 1489 1490 /* 1491 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1492 */ 1493 static void 1494 _bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1495 { 1496 void *(*cpfn)(void *, const void *, size_t); 1497 u_int pktlen, buflen; 1498 void *marg; 1499 1500 /* Skip outgoing duplicate packets. */ 1501 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) { 1502 m->m_flags &= ~M_PROMISC; 1503 return; 1504 } 1505 1506 pktlen = m_length(m); 1507 1508 if (pktlen == m->m_len) { 1509 cpfn = (void *)memcpy; 1510 marg = mtod(m, void *); 1511 buflen = pktlen; 1512 } else { 1513 cpfn = bpf_mcpy; 1514 marg = m; 1515 buflen = 0; 1516 } 1517 1518 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif_index != 0); 1519 } 1520 1521 /* 1522 * We need to prepend the address family as 1523 * a four byte field. Cons up a dummy header 1524 * to pacify bpf. This is safe because bpf 1525 * will only read from the mbuf (i.e., it won't 1526 * try to free it or keep a pointer a to it). 1527 */ 1528 static void 1529 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1530 { 1531 struct mbuf m0; 1532 1533 m0.m_flags = 0; 1534 m0.m_next = m; 1535 m0.m_len = 4; 1536 m0.m_data = (char *)⁡ 1537 1538 _bpf_mtap(bp, &m0); 1539 } 1540 1541 /* 1542 * Put the SLIP pseudo-"link header" in place. 1543 * Note this M_PREPEND() should never fail, 1544 * swince we know we always have enough space 1545 * in the input buffer. 1546 */ 1547 static void 1548 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1549 { 1550 int s; 1551 u_char *hp; 1552 1553 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1554 if (*m == NULL) 1555 return; 1556 1557 hp = mtod(*m, u_char *); 1558 hp[SLX_DIR] = SLIPDIR_IN; 1559 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1560 1561 s = splnet(); 1562 _bpf_mtap(bp, *m); 1563 splx(s); 1564 1565 m_adj(*m, SLIP_HDRLEN); 1566 } 1567 1568 /* 1569 * Put the SLIP pseudo-"link header" in 1570 * place. The compressed header is now 1571 * at the beginning of the mbuf. 1572 */ 1573 static void 1574 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1575 { 1576 struct mbuf m0; 1577 u_char *hp; 1578 int s; 1579 1580 m0.m_flags = 0; 1581 m0.m_next = m; 1582 m0.m_data = m0.m_dat; 1583 m0.m_len = SLIP_HDRLEN; 1584 1585 hp = mtod(&m0, u_char *); 1586 1587 hp[SLX_DIR] = SLIPDIR_OUT; 1588 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1589 1590 s = splnet(); 1591 _bpf_mtap(bp, &m0); 1592 splx(s); 1593 m_freem(m); 1594 } 1595 1596 static int 1597 bpf_hdrlen(struct bpf_d *d) 1598 { 1599 int hdrlen = d->bd_bif->bif_hdrlen; 1600 /* 1601 * Compute the length of the bpf header. This is not necessarily 1602 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1603 * that the network layer header begins on a longword boundary (for 1604 * performance reasons and to alleviate alignment restrictions). 1605 */ 1606 #ifdef _LP64 1607 if (d->bd_compat32) 1608 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen); 1609 else 1610 #endif 1611 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen); 1612 } 1613 1614 /* 1615 * Move the packet data from interface memory (pkt) into the 1616 * store buffer. Call the wakeup functions if it's time to wakeup 1617 * a listener (buffer full), "cpfn" is the routine called to do the 1618 * actual data transfer. memcpy is passed in to copy contiguous chunks, 1619 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1620 * pkt is really an mbuf. 1621 */ 1622 static void 1623 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1624 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1625 { 1626 char *h; 1627 int totlen, curlen, caplen; 1628 int hdrlen = bpf_hdrlen(d); 1629 int do_wakeup = 0; 1630 1631 ++d->bd_ccount; 1632 ++bpf_gstats.bs_capt; 1633 /* 1634 * Figure out how many bytes to move. If the packet is 1635 * greater or equal to the snapshot length, transfer that 1636 * much. Otherwise, transfer the whole packet (unless 1637 * we hit the buffer size limit). 1638 */ 1639 totlen = hdrlen + min(snaplen, pktlen); 1640 if (totlen > d->bd_bufsize) 1641 totlen = d->bd_bufsize; 1642 /* 1643 * If we adjusted totlen to fit the bufsize, it could be that 1644 * totlen is smaller than hdrlen because of the link layer header. 1645 */ 1646 caplen = totlen - hdrlen; 1647 if (caplen < 0) 1648 caplen = 0; 1649 1650 /* 1651 * Round up the end of the previous packet to the next longword. 1652 */ 1653 #ifdef _LP64 1654 if (d->bd_compat32) 1655 curlen = BPF_WORDALIGN32(d->bd_slen); 1656 else 1657 #endif 1658 curlen = BPF_WORDALIGN(d->bd_slen); 1659 if (curlen + totlen > d->bd_bufsize) { 1660 /* 1661 * This packet will overflow the storage buffer. 1662 * Rotate the buffers if we can, then wakeup any 1663 * pending reads. 1664 */ 1665 if (d->bd_fbuf == NULL) { 1666 /* 1667 * We haven't completed the previous read yet, 1668 * so drop the packet. 1669 */ 1670 ++d->bd_dcount; 1671 ++bpf_gstats.bs_drop; 1672 return; 1673 } 1674 ROTATE_BUFFERS(d); 1675 do_wakeup = 1; 1676 curlen = 0; 1677 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1678 /* 1679 * Immediate mode is set, or the read timeout has 1680 * already expired during a select call. A packet 1681 * arrived, so the reader should be woken up. 1682 */ 1683 do_wakeup = 1; 1684 } 1685 1686 /* 1687 * Append the bpf header. 1688 */ 1689 h = (char *)d->bd_sbuf + curlen; 1690 #ifdef _LP64 1691 if (d->bd_compat32) { 1692 struct bpf_hdr32 *hp32; 1693 1694 hp32 = (struct bpf_hdr32 *)h; 1695 hp32->bh_tstamp.tv_sec = ts->tv_sec; 1696 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1697 hp32->bh_datalen = pktlen; 1698 hp32->bh_hdrlen = hdrlen; 1699 hp32->bh_caplen = caplen; 1700 } else 1701 #endif 1702 { 1703 struct bpf_hdr *hp; 1704 1705 hp = (struct bpf_hdr *)h; 1706 hp->bh_tstamp.tv_sec = ts->tv_sec; 1707 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1708 hp->bh_datalen = pktlen; 1709 hp->bh_hdrlen = hdrlen; 1710 hp->bh_caplen = caplen; 1711 } 1712 1713 /* 1714 * Copy the packet data into the store buffer and update its length. 1715 */ 1716 (*cpfn)(h + hdrlen, pkt, caplen); 1717 d->bd_slen = curlen + totlen; 1718 1719 /* 1720 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1721 * will cause filt_bpfread() to be called with it adjusted. 1722 */ 1723 if (do_wakeup) 1724 bpf_wakeup(d); 1725 } 1726 1727 /* 1728 * Initialize all nonzero fields of a descriptor. 1729 */ 1730 static int 1731 bpf_allocbufs(struct bpf_d *d) 1732 { 1733 1734 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1735 if (!d->bd_fbuf) 1736 return (ENOBUFS); 1737 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1738 if (!d->bd_sbuf) { 1739 free(d->bd_fbuf, M_DEVBUF); 1740 return (ENOBUFS); 1741 } 1742 d->bd_slen = 0; 1743 d->bd_hlen = 0; 1744 return (0); 1745 } 1746 1747 /* 1748 * Free buffers currently in use by a descriptor. 1749 * Called on close. 1750 */ 1751 static void 1752 bpf_freed(struct bpf_d *d) 1753 { 1754 /* 1755 * We don't need to lock out interrupts since this descriptor has 1756 * been detached from its interface and it yet hasn't been marked 1757 * free. 1758 */ 1759 if (d->bd_sbuf != NULL) { 1760 free(d->bd_sbuf, M_DEVBUF); 1761 if (d->bd_hbuf != NULL) 1762 free(d->bd_hbuf, M_DEVBUF); 1763 if (d->bd_fbuf != NULL) 1764 free(d->bd_fbuf, M_DEVBUF); 1765 } 1766 if (d->bd_filter) 1767 free(d->bd_filter, M_DEVBUF); 1768 1769 if (d->bd_jitcode != NULL) { 1770 bpf_jit_freecode(d->bd_jitcode); 1771 } 1772 } 1773 1774 /* 1775 * Attach an interface to bpf. dlt is the link layer type; 1776 * hdrlen is the fixed size of the link header for the specified dlt 1777 * (variable length headers not yet supported). 1778 */ 1779 static void 1780 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1781 { 1782 struct bpf_if *bp; 1783 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); 1784 if (bp == NULL) 1785 panic("bpfattach"); 1786 1787 mutex_enter(&bpf_mtx); 1788 bp->bif_dlist = NULL; 1789 bp->bif_driverp = driverp; 1790 bp->bif_ifp = ifp; 1791 bp->bif_dlt = dlt; 1792 1793 bp->bif_next = bpf_iflist; 1794 bpf_iflist = bp; 1795 1796 *bp->bif_driverp = NULL; 1797 1798 bp->bif_hdrlen = hdrlen; 1799 mutex_exit(&bpf_mtx); 1800 #if 0 1801 printf("bpf: %s attached\n", ifp->if_xname); 1802 #endif 1803 } 1804 1805 /* 1806 * Remove an interface from bpf. 1807 */ 1808 static void 1809 _bpfdetach(struct ifnet *ifp) 1810 { 1811 struct bpf_if *bp, **pbp; 1812 struct bpf_d *d; 1813 int s; 1814 1815 mutex_enter(&bpf_mtx); 1816 /* Nuke the vnodes for any open instances */ 1817 LIST_FOREACH(d, &bpf_list, bd_list) { 1818 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 1819 /* 1820 * Detach the descriptor from an interface now. 1821 * It will be free'ed later by close routine. 1822 */ 1823 s = splnet(); 1824 d->bd_promisc = 0; /* we can't touch device. */ 1825 bpf_detachd(d); 1826 splx(s); 1827 } 1828 } 1829 1830 again: 1831 for (bp = bpf_iflist, pbp = &bpf_iflist; 1832 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { 1833 if (bp->bif_ifp == ifp) { 1834 *pbp = bp->bif_next; 1835 free(bp, M_DEVBUF); 1836 goto again; 1837 } 1838 } 1839 mutex_exit(&bpf_mtx); 1840 } 1841 1842 /* 1843 * Change the data link type of a interface. 1844 */ 1845 static void 1846 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1847 { 1848 struct bpf_if *bp; 1849 1850 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1851 if (bp->bif_driverp == &ifp->if_bpf) 1852 break; 1853 } 1854 if (bp == NULL) 1855 panic("bpf_change_type"); 1856 1857 bp->bif_dlt = dlt; 1858 1859 bp->bif_hdrlen = hdrlen; 1860 } 1861 1862 /* 1863 * Get a list of available data link type of the interface. 1864 */ 1865 static int 1866 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1867 { 1868 int n, error; 1869 struct ifnet *ifp; 1870 struct bpf_if *bp; 1871 1872 ifp = d->bd_bif->bif_ifp; 1873 n = 0; 1874 error = 0; 1875 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1876 if (bp->bif_ifp != ifp) 1877 continue; 1878 if (bfl->bfl_list != NULL) { 1879 if (n >= bfl->bfl_len) 1880 return ENOMEM; 1881 error = copyout(&bp->bif_dlt, 1882 bfl->bfl_list + n, sizeof(u_int)); 1883 } 1884 n++; 1885 } 1886 bfl->bfl_len = n; 1887 return error; 1888 } 1889 1890 /* 1891 * Set the data link type of a BPF instance. 1892 */ 1893 static int 1894 bpf_setdlt(struct bpf_d *d, u_int dlt) 1895 { 1896 int s, error, opromisc; 1897 struct ifnet *ifp; 1898 struct bpf_if *bp; 1899 1900 KASSERT(mutex_owned(&bpf_mtx)); 1901 1902 if (d->bd_bif->bif_dlt == dlt) 1903 return 0; 1904 ifp = d->bd_bif->bif_ifp; 1905 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1906 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1907 break; 1908 } 1909 if (bp == NULL) 1910 return EINVAL; 1911 s = splnet(); 1912 opromisc = d->bd_promisc; 1913 bpf_detachd(d); 1914 bpf_attachd(d, bp); 1915 reset_d(d); 1916 if (opromisc) { 1917 error = ifpromisc(bp->bif_ifp, 1); 1918 if (error) 1919 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 1920 bp->bif_ifp->if_xname, error); 1921 else 1922 d->bd_promisc = 1; 1923 } 1924 splx(s); 1925 return 0; 1926 } 1927 1928 static int 1929 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 1930 { 1931 int newsize, error; 1932 struct sysctlnode node; 1933 1934 node = *rnode; 1935 node.sysctl_data = &newsize; 1936 newsize = bpf_maxbufsize; 1937 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1938 if (error || newp == NULL) 1939 return (error); 1940 1941 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 1942 return (EINVAL); 1943 1944 bpf_maxbufsize = newsize; 1945 1946 return (0); 1947 } 1948 1949 #if defined(MODULAR) || defined(BPFJIT) 1950 static int 1951 sysctl_net_bpf_jit(SYSCTLFN_ARGS) 1952 { 1953 bool newval; 1954 int error; 1955 struct sysctlnode node; 1956 1957 node = *rnode; 1958 node.sysctl_data = &newval; 1959 newval = bpf_jit; 1960 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1961 if (error != 0 || newp == NULL) 1962 return error; 1963 1964 bpf_jit = newval; 1965 1966 /* 1967 * Do a full sync to publish new bpf_jit value and 1968 * update bpfjit_module_ops.bj_generate_code variable. 1969 */ 1970 membar_sync(); 1971 1972 if (newval && bpfjit_module_ops.bj_generate_code == NULL) { 1973 printf("JIT compilation is postponed " 1974 "until after bpfjit module is loaded\n"); 1975 } 1976 1977 return 0; 1978 } 1979 #endif 1980 1981 static int 1982 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 1983 { 1984 int error, elem_count; 1985 struct bpf_d *dp; 1986 struct bpf_d_ext dpe; 1987 size_t len, needed, elem_size, out_size; 1988 char *sp; 1989 1990 if (namelen == 1 && name[0] == CTL_QUERY) 1991 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1992 1993 if (namelen != 2) 1994 return (EINVAL); 1995 1996 /* BPF peers is privileged information. */ 1997 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 1998 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 1999 if (error) 2000 return (EPERM); 2001 2002 len = (oldp != NULL) ? *oldlenp : 0; 2003 sp = oldp; 2004 elem_size = name[0]; 2005 elem_count = name[1]; 2006 out_size = MIN(sizeof(dpe), elem_size); 2007 needed = 0; 2008 2009 if (elem_size < 1 || elem_count < 0) 2010 return (EINVAL); 2011 2012 mutex_enter(&bpf_mtx); 2013 LIST_FOREACH(dp, &bpf_list, bd_list) { 2014 if (len >= elem_size && elem_count > 0) { 2015 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 2016 BPF_EXT(bufsize); 2017 BPF_EXT(promisc); 2018 BPF_EXT(state); 2019 BPF_EXT(immediate); 2020 BPF_EXT(hdrcmplt); 2021 BPF_EXT(seesent); 2022 BPF_EXT(pid); 2023 BPF_EXT(rcount); 2024 BPF_EXT(dcount); 2025 BPF_EXT(ccount); 2026 #undef BPF_EXT 2027 if (dp->bd_bif) 2028 (void)strlcpy(dpe.bde_ifname, 2029 dp->bd_bif->bif_ifp->if_xname, 2030 IFNAMSIZ - 1); 2031 else 2032 dpe.bde_ifname[0] = '\0'; 2033 2034 error = copyout(&dpe, sp, out_size); 2035 if (error) 2036 break; 2037 sp += elem_size; 2038 len -= elem_size; 2039 } 2040 needed += elem_size; 2041 if (elem_count > 0 && elem_count != INT_MAX) 2042 elem_count--; 2043 } 2044 mutex_exit(&bpf_mtx); 2045 2046 *oldlenp = needed; 2047 2048 return (error); 2049 } 2050 2051 static struct sysctllog *bpf_sysctllog; 2052 static void 2053 sysctl_net_bpf_setup(void) 2054 { 2055 const struct sysctlnode *node; 2056 2057 node = NULL; 2058 sysctl_createv(&bpf_sysctllog, 0, NULL, &node, 2059 CTLFLAG_PERMANENT, 2060 CTLTYPE_NODE, "bpf", 2061 SYSCTL_DESCR("BPF options"), 2062 NULL, 0, NULL, 0, 2063 CTL_NET, CTL_CREATE, CTL_EOL); 2064 if (node != NULL) { 2065 #if defined(MODULAR) || defined(BPFJIT) 2066 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2067 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2068 CTLTYPE_BOOL, "jit", 2069 SYSCTL_DESCR("Toggle Just-In-Time compilation"), 2070 sysctl_net_bpf_jit, 0, &bpf_jit, 0, 2071 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2072 #endif 2073 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2074 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2075 CTLTYPE_INT, "maxbufsize", 2076 SYSCTL_DESCR("Maximum size for data capture buffer"), 2077 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 2078 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2079 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2080 CTLFLAG_PERMANENT, 2081 CTLTYPE_STRUCT, "stats", 2082 SYSCTL_DESCR("BPF stats"), 2083 NULL, 0, &bpf_gstats, sizeof(bpf_gstats), 2084 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2085 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2086 CTLFLAG_PERMANENT, 2087 CTLTYPE_STRUCT, "peers", 2088 SYSCTL_DESCR("BPF peers"), 2089 sysctl_net_bpf_peers, 0, NULL, 0, 2090 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2091 } 2092 2093 } 2094 2095 struct bpf_ops bpf_ops_kernel = { 2096 .bpf_attach = _bpfattach, 2097 .bpf_detach = _bpfdetach, 2098 .bpf_change_type = _bpf_change_type, 2099 2100 .bpf_tap = _bpf_tap, 2101 .bpf_mtap = _bpf_mtap, 2102 .bpf_mtap2 = _bpf_mtap2, 2103 .bpf_mtap_af = _bpf_mtap_af, 2104 .bpf_mtap_sl_in = _bpf_mtap_sl_in, 2105 .bpf_mtap_sl_out = _bpf_mtap_sl_out, 2106 }; 2107 2108 MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter"); 2109 2110 static int 2111 bpf_modcmd(modcmd_t cmd, void *arg) 2112 { 2113 #ifdef _MODULE 2114 devmajor_t bmajor, cmajor; 2115 #endif 2116 int error = 0; 2117 2118 switch (cmd) { 2119 case MODULE_CMD_INIT: 2120 bpf_init(); 2121 #ifdef _MODULE 2122 bmajor = cmajor = NODEVMAJOR; 2123 error = devsw_attach("bpf", NULL, &bmajor, 2124 &bpf_cdevsw, &cmajor); 2125 if (error) 2126 break; 2127 #endif 2128 2129 bpf_ops_handover_enter(&bpf_ops_kernel); 2130 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 2131 bpf_ops_handover_exit(); 2132 sysctl_net_bpf_setup(); 2133 break; 2134 2135 case MODULE_CMD_FINI: 2136 /* 2137 * While there is no reference counting for bpf callers, 2138 * unload could at least in theory be done similarly to 2139 * system call disestablishment. This should even be 2140 * a little simpler: 2141 * 2142 * 1) replace op vector with stubs 2143 * 2) post update to all cpus with xc 2144 * 3) check that nobody is in bpf anymore 2145 * (it's doubtful we'd want something like l_sysent, 2146 * but we could do something like *signed* percpu 2147 * counters. if the sum is 0, we're good). 2148 * 4) if fail, unroll changes 2149 * 2150 * NOTE: change won't be atomic to the outside. some 2151 * packets may be not captured even if unload is 2152 * not succesful. I think packet capture not working 2153 * is a perfectly logical consequence of trying to 2154 * disable packet capture. 2155 */ 2156 error = EOPNOTSUPP; 2157 /* insert sysctl teardown */ 2158 break; 2159 2160 default: 2161 error = ENOTTY; 2162 break; 2163 } 2164 2165 return error; 2166 } 2167