1 /* $NetBSD: bpf.c,v 1.183 2014/06/24 10:53:30 alnsn Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.183 2014/06/24 10:53:30 alnsn Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/buf.h> 54 #include <sys/time.h> 55 #include <sys/proc.h> 56 #include <sys/ioctl.h> 57 #include <sys/conf.h> 58 #include <sys/vnode.h> 59 #include <sys/queue.h> 60 #include <sys/stat.h> 61 #include <sys/module.h> 62 #include <sys/once.h> 63 #include <sys/atomic.h> 64 65 #include <sys/file.h> 66 #include <sys/filedesc.h> 67 #include <sys/tty.h> 68 #include <sys/uio.h> 69 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/errno.h> 73 #include <sys/kernel.h> 74 #include <sys/poll.h> 75 #include <sys/sysctl.h> 76 #include <sys/kauth.h> 77 78 #include <net/if.h> 79 #include <net/slip.h> 80 81 #include <net/bpf.h> 82 #include <net/bpfdesc.h> 83 #include <net/bpfjit.h> 84 85 #include <net/if_arc.h> 86 #include <net/if_ether.h> 87 88 #include <netinet/in.h> 89 #include <netinet/if_inarp.h> 90 91 92 #include <compat/sys/sockio.h> 93 94 #ifndef BPF_BUFSIZE 95 /* 96 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 97 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 98 */ 99 # define BPF_BUFSIZE 32768 100 #endif 101 102 #define PRINET 26 /* interruptible */ 103 104 /* 105 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 106 * XXX the default values should be computed dynamically based 107 * on available memory size and available mbuf clusters. 108 */ 109 int bpf_bufsize = BPF_BUFSIZE; 110 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 111 bool bpf_jit = false; 112 113 struct bpfjit_ops bpfjit_module_ops = { 114 .bj_generate_code = NULL, 115 .bj_free_code = NULL 116 }; 117 118 /* 119 * Global BPF statistics returned by net.bpf.stats sysctl. 120 */ 121 struct bpf_stat bpf_gstats; 122 123 /* 124 * Use a mutex to avoid a race condition between gathering the stats/peers 125 * and opening/closing the device. 126 */ 127 static kmutex_t bpf_mtx; 128 129 /* 130 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 131 * bpf_dtab holds the descriptors, indexed by minor device # 132 */ 133 struct bpf_if *bpf_iflist; 134 LIST_HEAD(, bpf_d) bpf_list; 135 136 static int bpf_allocbufs(struct bpf_d *); 137 static void bpf_deliver(struct bpf_if *, 138 void *(*cpfn)(void *, const void *, size_t), 139 void *, u_int, u_int, const bool); 140 static void bpf_freed(struct bpf_d *); 141 static void bpf_ifname(struct ifnet *, struct ifreq *); 142 static void *bpf_mcpy(void *, const void *, size_t); 143 static int bpf_movein(struct uio *, int, uint64_t, 144 struct mbuf **, struct sockaddr *); 145 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 146 static void bpf_detachd(struct bpf_d *); 147 static int bpf_setif(struct bpf_d *, struct ifreq *); 148 static void bpf_timed_out(void *); 149 static inline void 150 bpf_wakeup(struct bpf_d *); 151 static int bpf_hdrlen(struct bpf_d *); 152 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 153 void *(*)(void *, const void *, size_t), struct timespec *); 154 static void reset_d(struct bpf_d *); 155 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 156 static int bpf_setdlt(struct bpf_d *, u_int); 157 158 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 159 int); 160 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 161 int); 162 static int bpf_ioctl(struct file *, u_long, void *); 163 static int bpf_poll(struct file *, int); 164 static int bpf_stat(struct file *, struct stat *); 165 static int bpf_close(struct file *); 166 static int bpf_kqfilter(struct file *, struct knote *); 167 static void bpf_softintr(void *); 168 169 static const struct fileops bpf_fileops = { 170 .fo_read = bpf_read, 171 .fo_write = bpf_write, 172 .fo_ioctl = bpf_ioctl, 173 .fo_fcntl = fnullop_fcntl, 174 .fo_poll = bpf_poll, 175 .fo_stat = bpf_stat, 176 .fo_close = bpf_close, 177 .fo_kqfilter = bpf_kqfilter, 178 .fo_restart = fnullop_restart, 179 }; 180 181 dev_type_open(bpfopen); 182 183 const struct cdevsw bpf_cdevsw = { 184 .d_open = bpfopen, 185 .d_close = noclose, 186 .d_read = noread, 187 .d_write = nowrite, 188 .d_ioctl = noioctl, 189 .d_stop = nostop, 190 .d_tty = notty, 191 .d_poll = nopoll, 192 .d_mmap = nommap, 193 .d_kqfilter = nokqfilter, 194 .d_flag = D_OTHER 195 }; 196 197 bpfjit_func_t 198 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size) 199 { 200 201 membar_consumer(); 202 if (bpfjit_module_ops.bj_generate_code != NULL) { 203 return bpfjit_module_ops.bj_generate_code(bc, code, size); 204 } 205 return NULL; 206 } 207 208 void 209 bpf_jit_freecode(bpfjit_func_t jcode) 210 { 211 KASSERT(bpfjit_module_ops.bj_free_code != NULL); 212 bpfjit_module_ops.bj_free_code(jcode); 213 } 214 215 static int 216 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp, 217 struct sockaddr *sockp) 218 { 219 struct mbuf *m; 220 int error; 221 size_t len; 222 size_t hlen; 223 size_t align; 224 225 /* 226 * Build a sockaddr based on the data link layer type. 227 * We do this at this level because the ethernet header 228 * is copied directly into the data field of the sockaddr. 229 * In the case of SLIP, there is no header and the packet 230 * is forwarded as is. 231 * Also, we are careful to leave room at the front of the mbuf 232 * for the link level header. 233 */ 234 switch (linktype) { 235 236 case DLT_SLIP: 237 sockp->sa_family = AF_INET; 238 hlen = 0; 239 align = 0; 240 break; 241 242 case DLT_PPP: 243 sockp->sa_family = AF_UNSPEC; 244 hlen = 0; 245 align = 0; 246 break; 247 248 case DLT_EN10MB: 249 sockp->sa_family = AF_UNSPEC; 250 /* XXX Would MAXLINKHDR be better? */ 251 /* 6(dst)+6(src)+2(type) */ 252 hlen = sizeof(struct ether_header); 253 align = 2; 254 break; 255 256 case DLT_ARCNET: 257 sockp->sa_family = AF_UNSPEC; 258 hlen = ARC_HDRLEN; 259 align = 5; 260 break; 261 262 case DLT_FDDI: 263 sockp->sa_family = AF_LINK; 264 /* XXX 4(FORMAC)+6(dst)+6(src) */ 265 hlen = 16; 266 align = 0; 267 break; 268 269 case DLT_ECONET: 270 sockp->sa_family = AF_UNSPEC; 271 hlen = 6; 272 align = 2; 273 break; 274 275 case DLT_NULL: 276 sockp->sa_family = AF_UNSPEC; 277 hlen = 0; 278 align = 0; 279 break; 280 281 default: 282 return (EIO); 283 } 284 285 len = uio->uio_resid; 286 /* 287 * If there aren't enough bytes for a link level header or the 288 * packet length exceeds the interface mtu, return an error. 289 */ 290 if (len - hlen > mtu) 291 return (EMSGSIZE); 292 293 /* 294 * XXX Avoid complicated buffer chaining --- 295 * bail if it won't fit in a single mbuf. 296 * (Take into account possible alignment bytes) 297 */ 298 if (len + align > MCLBYTES) 299 return (EIO); 300 301 m = m_gethdr(M_WAIT, MT_DATA); 302 m->m_pkthdr.rcvif = 0; 303 m->m_pkthdr.len = (int)(len - hlen); 304 if (len + align > MHLEN) { 305 m_clget(m, M_WAIT); 306 if ((m->m_flags & M_EXT) == 0) { 307 error = ENOBUFS; 308 goto bad; 309 } 310 } 311 312 /* Insure the data is properly aligned */ 313 if (align > 0) { 314 m->m_data += align; 315 m->m_len -= (int)align; 316 } 317 318 error = uiomove(mtod(m, void *), len, uio); 319 if (error) 320 goto bad; 321 if (hlen != 0) { 322 memcpy(sockp->sa_data, mtod(m, void *), hlen); 323 m->m_data += hlen; /* XXX */ 324 len -= hlen; 325 } 326 m->m_len = (int)len; 327 *mp = m; 328 return (0); 329 330 bad: 331 m_freem(m); 332 return (error); 333 } 334 335 /* 336 * Attach file to the bpf interface, i.e. make d listen on bp. 337 * Must be called at splnet. 338 */ 339 static void 340 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 341 { 342 /* 343 * Point d at bp, and add d to the interface's list of listeners. 344 * Finally, point the driver's bpf cookie at the interface so 345 * it will divert packets to bpf. 346 */ 347 d->bd_bif = bp; 348 d->bd_next = bp->bif_dlist; 349 bp->bif_dlist = d; 350 351 *bp->bif_driverp = bp; 352 } 353 354 /* 355 * Detach a file from its interface. 356 */ 357 static void 358 bpf_detachd(struct bpf_d *d) 359 { 360 struct bpf_d **p; 361 struct bpf_if *bp; 362 363 bp = d->bd_bif; 364 /* 365 * Check if this descriptor had requested promiscuous mode. 366 * If so, turn it off. 367 */ 368 if (d->bd_promisc) { 369 int error __diagused; 370 371 d->bd_promisc = 0; 372 /* 373 * Take device out of promiscuous mode. Since we were 374 * able to enter promiscuous mode, we should be able 375 * to turn it off. But we can get an error if 376 * the interface was configured down, so only panic 377 * if we don't get an unexpected error. 378 */ 379 error = ifpromisc(bp->bif_ifp, 0); 380 #ifdef DIAGNOSTIC 381 if (error) 382 printf("%s: ifpromisc failed: %d", __func__, error); 383 #endif 384 } 385 /* Remove d from the interface's descriptor list. */ 386 p = &bp->bif_dlist; 387 while (*p != d) { 388 p = &(*p)->bd_next; 389 if (*p == 0) 390 panic("%s: descriptor not in list", __func__); 391 } 392 *p = (*p)->bd_next; 393 if (bp->bif_dlist == 0) 394 /* 395 * Let the driver know that there are no more listeners. 396 */ 397 *d->bd_bif->bif_driverp = 0; 398 d->bd_bif = 0; 399 } 400 401 static int 402 doinit(void) 403 { 404 405 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 406 407 LIST_INIT(&bpf_list); 408 409 bpf_gstats.bs_recv = 0; 410 bpf_gstats.bs_drop = 0; 411 bpf_gstats.bs_capt = 0; 412 413 return 0; 414 } 415 416 /* 417 * bpfilterattach() is called at boot time. 418 */ 419 /* ARGSUSED */ 420 void 421 bpfilterattach(int n) 422 { 423 static ONCE_DECL(control); 424 425 RUN_ONCE(&control, doinit); 426 } 427 428 /* 429 * Open ethernet device. Clones. 430 */ 431 /* ARGSUSED */ 432 int 433 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 434 { 435 struct bpf_d *d; 436 struct file *fp; 437 int error, fd; 438 439 /* falloc() will use the descriptor for us. */ 440 if ((error = fd_allocfile(&fp, &fd)) != 0) 441 return error; 442 443 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO); 444 d->bd_bufsize = bpf_bufsize; 445 d->bd_seesent = 1; 446 d->bd_feedback = 0; 447 d->bd_pid = l->l_proc->p_pid; 448 #ifdef _LP64 449 if (curproc->p_flag & PK_32) 450 d->bd_compat32 = 1; 451 #endif 452 getnanotime(&d->bd_btime); 453 d->bd_atime = d->bd_mtime = d->bd_btime; 454 callout_init(&d->bd_callout, 0); 455 selinit(&d->bd_sel); 456 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d); 457 d->bd_jitcode = NULL; 458 459 mutex_enter(&bpf_mtx); 460 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 461 mutex_exit(&bpf_mtx); 462 463 return fd_clone(fp, fd, flag, &bpf_fileops, d); 464 } 465 466 /* 467 * Close the descriptor by detaching it from its interface, 468 * deallocating its buffers, and marking it free. 469 */ 470 /* ARGSUSED */ 471 static int 472 bpf_close(struct file *fp) 473 { 474 struct bpf_d *d = fp->f_data; 475 int s; 476 477 KERNEL_LOCK(1, NULL); 478 479 /* 480 * Refresh the PID associated with this bpf file. 481 */ 482 d->bd_pid = curproc->p_pid; 483 484 s = splnet(); 485 if (d->bd_state == BPF_WAITING) 486 callout_stop(&d->bd_callout); 487 d->bd_state = BPF_IDLE; 488 if (d->bd_bif) 489 bpf_detachd(d); 490 splx(s); 491 bpf_freed(d); 492 mutex_enter(&bpf_mtx); 493 LIST_REMOVE(d, bd_list); 494 mutex_exit(&bpf_mtx); 495 callout_destroy(&d->bd_callout); 496 seldestroy(&d->bd_sel); 497 softint_disestablish(d->bd_sih); 498 free(d, M_DEVBUF); 499 fp->f_data = NULL; 500 501 KERNEL_UNLOCK_ONE(NULL); 502 503 return (0); 504 } 505 506 /* 507 * Rotate the packet buffers in descriptor d. Move the store buffer 508 * into the hold slot, and the free buffer into the store slot. 509 * Zero the length of the new store buffer. 510 */ 511 #define ROTATE_BUFFERS(d) \ 512 (d)->bd_hbuf = (d)->bd_sbuf; \ 513 (d)->bd_hlen = (d)->bd_slen; \ 514 (d)->bd_sbuf = (d)->bd_fbuf; \ 515 (d)->bd_slen = 0; \ 516 (d)->bd_fbuf = 0; 517 /* 518 * bpfread - read next chunk of packets from buffers 519 */ 520 static int 521 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 522 kauth_cred_t cred, int flags) 523 { 524 struct bpf_d *d = fp->f_data; 525 int timed_out; 526 int error; 527 int s; 528 529 getnanotime(&d->bd_atime); 530 /* 531 * Restrict application to use a buffer the same size as 532 * the kernel buffers. 533 */ 534 if (uio->uio_resid != d->bd_bufsize) 535 return (EINVAL); 536 537 KERNEL_LOCK(1, NULL); 538 s = splnet(); 539 if (d->bd_state == BPF_WAITING) 540 callout_stop(&d->bd_callout); 541 timed_out = (d->bd_state == BPF_TIMED_OUT); 542 d->bd_state = BPF_IDLE; 543 /* 544 * If the hold buffer is empty, then do a timed sleep, which 545 * ends when the timeout expires or when enough packets 546 * have arrived to fill the store buffer. 547 */ 548 while (d->bd_hbuf == 0) { 549 if (fp->f_flag & FNONBLOCK) { 550 if (d->bd_slen == 0) { 551 splx(s); 552 KERNEL_UNLOCK_ONE(NULL); 553 return (EWOULDBLOCK); 554 } 555 ROTATE_BUFFERS(d); 556 break; 557 } 558 559 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 560 /* 561 * A packet(s) either arrived since the previous 562 * read or arrived while we were asleep. 563 * Rotate the buffers and return what's here. 564 */ 565 ROTATE_BUFFERS(d); 566 break; 567 } 568 error = tsleep(d, PRINET|PCATCH, "bpf", 569 d->bd_rtout); 570 if (error == EINTR || error == ERESTART) { 571 splx(s); 572 KERNEL_UNLOCK_ONE(NULL); 573 return (error); 574 } 575 if (error == EWOULDBLOCK) { 576 /* 577 * On a timeout, return what's in the buffer, 578 * which may be nothing. If there is something 579 * in the store buffer, we can rotate the buffers. 580 */ 581 if (d->bd_hbuf) 582 /* 583 * We filled up the buffer in between 584 * getting the timeout and arriving 585 * here, so we don't need to rotate. 586 */ 587 break; 588 589 if (d->bd_slen == 0) { 590 splx(s); 591 KERNEL_UNLOCK_ONE(NULL); 592 return (0); 593 } 594 ROTATE_BUFFERS(d); 595 break; 596 } 597 if (error != 0) 598 goto done; 599 } 600 /* 601 * At this point, we know we have something in the hold slot. 602 */ 603 splx(s); 604 605 /* 606 * Move data from hold buffer into user space. 607 * We know the entire buffer is transferred since 608 * we checked above that the read buffer is bpf_bufsize bytes. 609 */ 610 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 611 612 s = splnet(); 613 d->bd_fbuf = d->bd_hbuf; 614 d->bd_hbuf = 0; 615 d->bd_hlen = 0; 616 done: 617 splx(s); 618 KERNEL_UNLOCK_ONE(NULL); 619 return (error); 620 } 621 622 623 /* 624 * If there are processes sleeping on this descriptor, wake them up. 625 */ 626 static inline void 627 bpf_wakeup(struct bpf_d *d) 628 { 629 wakeup(d); 630 if (d->bd_async) 631 softint_schedule(d->bd_sih); 632 selnotify(&d->bd_sel, 0, 0); 633 } 634 635 static void 636 bpf_softintr(void *cookie) 637 { 638 struct bpf_d *d; 639 640 d = cookie; 641 if (d->bd_async) 642 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 643 } 644 645 static void 646 bpf_timed_out(void *arg) 647 { 648 struct bpf_d *d = arg; 649 int s; 650 651 s = splnet(); 652 if (d->bd_state == BPF_WAITING) { 653 d->bd_state = BPF_TIMED_OUT; 654 if (d->bd_slen != 0) 655 bpf_wakeup(d); 656 } 657 splx(s); 658 } 659 660 661 static int 662 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 663 kauth_cred_t cred, int flags) 664 { 665 struct bpf_d *d = fp->f_data; 666 struct ifnet *ifp; 667 struct mbuf *m, *mc; 668 int error, s; 669 static struct sockaddr_storage dst; 670 671 m = NULL; /* XXX gcc */ 672 673 KERNEL_LOCK(1, NULL); 674 675 if (d->bd_bif == 0) { 676 KERNEL_UNLOCK_ONE(NULL); 677 return (ENXIO); 678 } 679 getnanotime(&d->bd_mtime); 680 681 ifp = d->bd_bif->bif_ifp; 682 683 if (uio->uio_resid == 0) { 684 KERNEL_UNLOCK_ONE(NULL); 685 return (0); 686 } 687 688 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, 689 (struct sockaddr *) &dst); 690 if (error) { 691 KERNEL_UNLOCK_ONE(NULL); 692 return (error); 693 } 694 695 if (m->m_pkthdr.len > ifp->if_mtu) { 696 KERNEL_UNLOCK_ONE(NULL); 697 m_freem(m); 698 return (EMSGSIZE); 699 } 700 701 if (d->bd_hdrcmplt) 702 dst.ss_family = pseudo_AF_HDRCMPLT; 703 704 if (d->bd_feedback) { 705 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT); 706 if (mc != NULL) 707 mc->m_pkthdr.rcvif = ifp; 708 /* Set M_PROMISC for outgoing packets to be discarded. */ 709 if (1 /*d->bd_direction == BPF_D_INOUT*/) 710 m->m_flags |= M_PROMISC; 711 } else 712 mc = NULL; 713 714 s = splsoftnet(); 715 error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL); 716 717 if (mc != NULL) { 718 if (error == 0) 719 (*ifp->if_input)(ifp, mc); 720 m_freem(mc); 721 } 722 splx(s); 723 KERNEL_UNLOCK_ONE(NULL); 724 /* 725 * The driver frees the mbuf. 726 */ 727 return (error); 728 } 729 730 /* 731 * Reset a descriptor by flushing its packet buffer and clearing the 732 * receive and drop counts. Should be called at splnet. 733 */ 734 static void 735 reset_d(struct bpf_d *d) 736 { 737 if (d->bd_hbuf) { 738 /* Free the hold buffer. */ 739 d->bd_fbuf = d->bd_hbuf; 740 d->bd_hbuf = 0; 741 } 742 d->bd_slen = 0; 743 d->bd_hlen = 0; 744 d->bd_rcount = 0; 745 d->bd_dcount = 0; 746 d->bd_ccount = 0; 747 } 748 749 /* 750 * FIONREAD Check for read packet available. 751 * BIOCGBLEN Get buffer len [for read()]. 752 * BIOCSETF Set ethernet read filter. 753 * BIOCFLUSH Flush read packet buffer. 754 * BIOCPROMISC Put interface into promiscuous mode. 755 * BIOCGDLT Get link layer type. 756 * BIOCGETIF Get interface name. 757 * BIOCSETIF Set interface. 758 * BIOCSRTIMEOUT Set read timeout. 759 * BIOCGRTIMEOUT Get read timeout. 760 * BIOCGSTATS Get packet stats. 761 * BIOCIMMEDIATE Set immediate mode. 762 * BIOCVERSION Get filter language version. 763 * BIOCGHDRCMPLT Get "header already complete" flag. 764 * BIOCSHDRCMPLT Set "header already complete" flag. 765 * BIOCSFEEDBACK Set packet feedback mode. 766 * BIOCGFEEDBACK Get packet feedback mode. 767 * BIOCGSEESENT Get "see sent packets" mode. 768 * BIOCSSEESENT Set "see sent packets" mode. 769 */ 770 /* ARGSUSED */ 771 static int 772 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 773 { 774 struct bpf_d *d = fp->f_data; 775 int s, error = 0; 776 777 /* 778 * Refresh the PID associated with this bpf file. 779 */ 780 KERNEL_LOCK(1, NULL); 781 d->bd_pid = curproc->p_pid; 782 #ifdef _LP64 783 if (curproc->p_flag & PK_32) 784 d->bd_compat32 = 1; 785 else 786 d->bd_compat32 = 0; 787 #endif 788 789 s = splnet(); 790 if (d->bd_state == BPF_WAITING) 791 callout_stop(&d->bd_callout); 792 d->bd_state = BPF_IDLE; 793 splx(s); 794 795 switch (cmd) { 796 797 default: 798 error = EINVAL; 799 break; 800 801 /* 802 * Check for read packet available. 803 */ 804 case FIONREAD: 805 { 806 int n; 807 808 s = splnet(); 809 n = d->bd_slen; 810 if (d->bd_hbuf) 811 n += d->bd_hlen; 812 splx(s); 813 814 *(int *)addr = n; 815 break; 816 } 817 818 /* 819 * Get buffer len [for read()]. 820 */ 821 case BIOCGBLEN: 822 *(u_int *)addr = d->bd_bufsize; 823 break; 824 825 /* 826 * Set buffer length. 827 */ 828 case BIOCSBLEN: 829 if (d->bd_bif != 0) 830 error = EINVAL; 831 else { 832 u_int size = *(u_int *)addr; 833 834 if (size > bpf_maxbufsize) 835 *(u_int *)addr = size = bpf_maxbufsize; 836 else if (size < BPF_MINBUFSIZE) 837 *(u_int *)addr = size = BPF_MINBUFSIZE; 838 d->bd_bufsize = size; 839 } 840 break; 841 842 /* 843 * Set link layer read filter. 844 */ 845 case BIOCSETF: 846 error = bpf_setf(d, addr); 847 break; 848 849 /* 850 * Flush read packet buffer. 851 */ 852 case BIOCFLUSH: 853 s = splnet(); 854 reset_d(d); 855 splx(s); 856 break; 857 858 /* 859 * Put interface into promiscuous mode. 860 */ 861 case BIOCPROMISC: 862 if (d->bd_bif == 0) { 863 /* 864 * No interface attached yet. 865 */ 866 error = EINVAL; 867 break; 868 } 869 s = splnet(); 870 if (d->bd_promisc == 0) { 871 error = ifpromisc(d->bd_bif->bif_ifp, 1); 872 if (error == 0) 873 d->bd_promisc = 1; 874 } 875 splx(s); 876 break; 877 878 /* 879 * Get device parameters. 880 */ 881 case BIOCGDLT: 882 if (d->bd_bif == 0) 883 error = EINVAL; 884 else 885 *(u_int *)addr = d->bd_bif->bif_dlt; 886 break; 887 888 /* 889 * Get a list of supported device parameters. 890 */ 891 case BIOCGDLTLIST: 892 if (d->bd_bif == 0) 893 error = EINVAL; 894 else 895 error = bpf_getdltlist(d, addr); 896 break; 897 898 /* 899 * Set device parameters. 900 */ 901 case BIOCSDLT: 902 if (d->bd_bif == 0) 903 error = EINVAL; 904 else 905 error = bpf_setdlt(d, *(u_int *)addr); 906 break; 907 908 /* 909 * Set interface name. 910 */ 911 #ifdef OBIOCGETIF 912 case OBIOCGETIF: 913 #endif 914 case BIOCGETIF: 915 if (d->bd_bif == 0) 916 error = EINVAL; 917 else 918 bpf_ifname(d->bd_bif->bif_ifp, addr); 919 break; 920 921 /* 922 * Set interface. 923 */ 924 #ifdef OBIOCSETIF 925 case OBIOCSETIF: 926 #endif 927 case BIOCSETIF: 928 error = bpf_setif(d, addr); 929 break; 930 931 /* 932 * Set read timeout. 933 */ 934 case BIOCSRTIMEOUT: 935 { 936 struct timeval *tv = addr; 937 938 /* Compute number of ticks. */ 939 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 940 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 941 d->bd_rtout = 1; 942 break; 943 } 944 945 #ifdef BIOCGORTIMEOUT 946 /* 947 * Get read timeout. 948 */ 949 case BIOCGORTIMEOUT: 950 { 951 struct timeval50 *tv = addr; 952 953 tv->tv_sec = d->bd_rtout / hz; 954 tv->tv_usec = (d->bd_rtout % hz) * tick; 955 break; 956 } 957 #endif 958 959 #ifdef BIOCSORTIMEOUT 960 /* 961 * Set read timeout. 962 */ 963 case BIOCSORTIMEOUT: 964 { 965 struct timeval50 *tv = addr; 966 967 /* Compute number of ticks. */ 968 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 969 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 970 d->bd_rtout = 1; 971 break; 972 } 973 #endif 974 975 /* 976 * Get read timeout. 977 */ 978 case BIOCGRTIMEOUT: 979 { 980 struct timeval *tv = addr; 981 982 tv->tv_sec = d->bd_rtout / hz; 983 tv->tv_usec = (d->bd_rtout % hz) * tick; 984 break; 985 } 986 /* 987 * Get packet stats. 988 */ 989 case BIOCGSTATS: 990 { 991 struct bpf_stat *bs = addr; 992 993 bs->bs_recv = d->bd_rcount; 994 bs->bs_drop = d->bd_dcount; 995 bs->bs_capt = d->bd_ccount; 996 break; 997 } 998 999 case BIOCGSTATSOLD: 1000 { 1001 struct bpf_stat_old *bs = addr; 1002 1003 bs->bs_recv = d->bd_rcount; 1004 bs->bs_drop = d->bd_dcount; 1005 break; 1006 } 1007 1008 /* 1009 * Set immediate mode. 1010 */ 1011 case BIOCIMMEDIATE: 1012 d->bd_immediate = *(u_int *)addr; 1013 break; 1014 1015 case BIOCVERSION: 1016 { 1017 struct bpf_version *bv = addr; 1018 1019 bv->bv_major = BPF_MAJOR_VERSION; 1020 bv->bv_minor = BPF_MINOR_VERSION; 1021 break; 1022 } 1023 1024 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1025 *(u_int *)addr = d->bd_hdrcmplt; 1026 break; 1027 1028 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1029 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1030 break; 1031 1032 /* 1033 * Get "see sent packets" flag 1034 */ 1035 case BIOCGSEESENT: 1036 *(u_int *)addr = d->bd_seesent; 1037 break; 1038 1039 /* 1040 * Set "see sent" packets flag 1041 */ 1042 case BIOCSSEESENT: 1043 d->bd_seesent = *(u_int *)addr; 1044 break; 1045 1046 /* 1047 * Set "feed packets from bpf back to input" mode 1048 */ 1049 case BIOCSFEEDBACK: 1050 d->bd_feedback = *(u_int *)addr; 1051 break; 1052 1053 /* 1054 * Get "feed packets from bpf back to input" mode 1055 */ 1056 case BIOCGFEEDBACK: 1057 *(u_int *)addr = d->bd_feedback; 1058 break; 1059 1060 case FIONBIO: /* Non-blocking I/O */ 1061 /* 1062 * No need to do anything special as we use IO_NDELAY in 1063 * bpfread() as an indication of whether or not to block 1064 * the read. 1065 */ 1066 break; 1067 1068 case FIOASYNC: /* Send signal on receive packets */ 1069 d->bd_async = *(int *)addr; 1070 break; 1071 1072 case TIOCSPGRP: /* Process or group to send signals to */ 1073 case FIOSETOWN: 1074 error = fsetown(&d->bd_pgid, cmd, addr); 1075 break; 1076 1077 case TIOCGPGRP: 1078 case FIOGETOWN: 1079 error = fgetown(d->bd_pgid, cmd, addr); 1080 break; 1081 } 1082 KERNEL_UNLOCK_ONE(NULL); 1083 return (error); 1084 } 1085 1086 /* 1087 * Set d's packet filter program to fp. If this file already has a filter, 1088 * free it and replace it. Returns EINVAL for bogus requests. 1089 */ 1090 int 1091 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1092 { 1093 struct bpf_insn *fcode, *old; 1094 bpfjit_func_t jcode, oldj; 1095 size_t flen, size; 1096 int s; 1097 1098 jcode = NULL; 1099 flen = fp->bf_len; 1100 1101 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) { 1102 return EINVAL; 1103 } 1104 1105 if (flen) { 1106 /* 1107 * Allocate the buffer, copy the byte-code from 1108 * userspace and validate it. 1109 */ 1110 size = flen * sizeof(*fp->bf_insns); 1111 fcode = malloc(size, M_DEVBUF, M_WAITOK); 1112 if (copyin(fp->bf_insns, fcode, size) != 0 || 1113 !bpf_validate(fcode, (int)flen)) { 1114 free(fcode, M_DEVBUF); 1115 return EINVAL; 1116 } 1117 membar_consumer(); 1118 if (bpf_jit) 1119 jcode = bpf_jit_generate(NULL, fcode, flen); 1120 } else { 1121 fcode = NULL; 1122 } 1123 1124 s = splnet(); 1125 old = d->bd_filter; 1126 d->bd_filter = fcode; 1127 oldj = d->bd_jitcode; 1128 d->bd_jitcode = jcode; 1129 reset_d(d); 1130 splx(s); 1131 1132 if (old) { 1133 free(old, M_DEVBUF); 1134 } 1135 if (oldj) { 1136 bpf_jit_freecode(oldj); 1137 } 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * Detach a file from its current interface (if attached at all) and attach 1144 * to the interface indicated by the name stored in ifr. 1145 * Return an errno or 0. 1146 */ 1147 static int 1148 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1149 { 1150 struct bpf_if *bp; 1151 char *cp; 1152 int unit_seen, i, s, error; 1153 1154 /* 1155 * Make sure the provided name has a unit number, and default 1156 * it to '0' if not specified. 1157 * XXX This is ugly ... do this differently? 1158 */ 1159 unit_seen = 0; 1160 cp = ifr->ifr_name; 1161 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1162 while (*cp++) 1163 if (*cp >= '0' && *cp <= '9') 1164 unit_seen = 1; 1165 if (!unit_seen) { 1166 /* Make sure to leave room for the '\0'. */ 1167 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1168 if ((ifr->ifr_name[i] >= 'a' && 1169 ifr->ifr_name[i] <= 'z') || 1170 (ifr->ifr_name[i] >= 'A' && 1171 ifr->ifr_name[i] <= 'Z')) 1172 continue; 1173 ifr->ifr_name[i] = '0'; 1174 } 1175 } 1176 1177 /* 1178 * Look through attached interfaces for the named one. 1179 */ 1180 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1181 struct ifnet *ifp = bp->bif_ifp; 1182 1183 if (ifp == 0 || 1184 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1185 continue; 1186 /* skip additional entry */ 1187 if (bp->bif_driverp != &ifp->if_bpf) 1188 continue; 1189 /* 1190 * We found the requested interface. 1191 * Allocate the packet buffers if we need to. 1192 * If we're already attached to requested interface, 1193 * just flush the buffer. 1194 */ 1195 if (d->bd_sbuf == 0) { 1196 error = bpf_allocbufs(d); 1197 if (error != 0) 1198 return (error); 1199 } 1200 s = splnet(); 1201 if (bp != d->bd_bif) { 1202 if (d->bd_bif) 1203 /* 1204 * Detach if attached to something else. 1205 */ 1206 bpf_detachd(d); 1207 1208 bpf_attachd(d, bp); 1209 } 1210 reset_d(d); 1211 splx(s); 1212 return (0); 1213 } 1214 /* Not found. */ 1215 return (ENXIO); 1216 } 1217 1218 /* 1219 * Copy the interface name to the ifreq. 1220 */ 1221 static void 1222 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1223 { 1224 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1225 } 1226 1227 static int 1228 bpf_stat(struct file *fp, struct stat *st) 1229 { 1230 struct bpf_d *d = fp->f_data; 1231 1232 (void)memset(st, 0, sizeof(*st)); 1233 KERNEL_LOCK(1, NULL); 1234 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1235 st->st_atimespec = d->bd_atime; 1236 st->st_mtimespec = d->bd_mtime; 1237 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1238 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1239 st->st_gid = kauth_cred_getegid(fp->f_cred); 1240 st->st_mode = S_IFCHR; 1241 KERNEL_UNLOCK_ONE(NULL); 1242 return 0; 1243 } 1244 1245 /* 1246 * Support for poll() system call 1247 * 1248 * Return true iff the specific operation will not block indefinitely - with 1249 * the assumption that it is safe to positively acknowledge a request for the 1250 * ability to write to the BPF device. 1251 * Otherwise, return false but make a note that a selnotify() must be done. 1252 */ 1253 static int 1254 bpf_poll(struct file *fp, int events) 1255 { 1256 struct bpf_d *d = fp->f_data; 1257 int s = splnet(); 1258 int revents; 1259 1260 /* 1261 * Refresh the PID associated with this bpf file. 1262 */ 1263 KERNEL_LOCK(1, NULL); 1264 d->bd_pid = curproc->p_pid; 1265 1266 revents = events & (POLLOUT | POLLWRNORM); 1267 if (events & (POLLIN | POLLRDNORM)) { 1268 /* 1269 * An imitation of the FIONREAD ioctl code. 1270 */ 1271 if (d->bd_hlen != 0 || 1272 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1273 d->bd_slen != 0)) { 1274 revents |= events & (POLLIN | POLLRDNORM); 1275 } else { 1276 selrecord(curlwp, &d->bd_sel); 1277 /* Start the read timeout if necessary */ 1278 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1279 callout_reset(&d->bd_callout, d->bd_rtout, 1280 bpf_timed_out, d); 1281 d->bd_state = BPF_WAITING; 1282 } 1283 } 1284 } 1285 1286 KERNEL_UNLOCK_ONE(NULL); 1287 splx(s); 1288 return (revents); 1289 } 1290 1291 static void 1292 filt_bpfrdetach(struct knote *kn) 1293 { 1294 struct bpf_d *d = kn->kn_hook; 1295 int s; 1296 1297 KERNEL_LOCK(1, NULL); 1298 s = splnet(); 1299 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1300 splx(s); 1301 KERNEL_UNLOCK_ONE(NULL); 1302 } 1303 1304 static int 1305 filt_bpfread(struct knote *kn, long hint) 1306 { 1307 struct bpf_d *d = kn->kn_hook; 1308 int rv; 1309 1310 KERNEL_LOCK(1, NULL); 1311 kn->kn_data = d->bd_hlen; 1312 if (d->bd_immediate) 1313 kn->kn_data += d->bd_slen; 1314 rv = (kn->kn_data > 0); 1315 KERNEL_UNLOCK_ONE(NULL); 1316 return rv; 1317 } 1318 1319 static const struct filterops bpfread_filtops = 1320 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1321 1322 static int 1323 bpf_kqfilter(struct file *fp, struct knote *kn) 1324 { 1325 struct bpf_d *d = fp->f_data; 1326 struct klist *klist; 1327 int s; 1328 1329 KERNEL_LOCK(1, NULL); 1330 1331 switch (kn->kn_filter) { 1332 case EVFILT_READ: 1333 klist = &d->bd_sel.sel_klist; 1334 kn->kn_fop = &bpfread_filtops; 1335 break; 1336 1337 default: 1338 KERNEL_UNLOCK_ONE(NULL); 1339 return (EINVAL); 1340 } 1341 1342 kn->kn_hook = d; 1343 1344 s = splnet(); 1345 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1346 splx(s); 1347 KERNEL_UNLOCK_ONE(NULL); 1348 1349 return (0); 1350 } 1351 1352 /* 1353 * Copy data from an mbuf chain into a buffer. This code is derived 1354 * from m_copydata in sys/uipc_mbuf.c. 1355 */ 1356 static void * 1357 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1358 { 1359 const struct mbuf *m; 1360 u_int count; 1361 u_char *dst; 1362 1363 m = src_arg; 1364 dst = dst_arg; 1365 while (len > 0) { 1366 if (m == NULL) 1367 panic("bpf_mcpy"); 1368 count = min(m->m_len, len); 1369 memcpy(dst, mtod(m, const void *), count); 1370 m = m->m_next; 1371 dst += count; 1372 len -= count; 1373 } 1374 return dst_arg; 1375 } 1376 1377 /* 1378 * Dispatch a packet to all the listeners on interface bp. 1379 * 1380 * pkt pointer to the packet, either a data buffer or an mbuf chain 1381 * buflen buffer length, if pkt is a data buffer 1382 * cpfn a function that can copy pkt into the listener's buffer 1383 * pktlen length of the packet 1384 * rcv true if packet came in 1385 */ 1386 static inline void 1387 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1388 void *pkt, u_int pktlen, u_int buflen, const bool rcv) 1389 { 1390 struct timespec ts; 1391 bpf_args_t args; 1392 struct bpf_d *d; 1393 1394 const bpf_ctx_t *bc = NULL; 1395 bool gottime = false; 1396 1397 args.pkt = (const uint8_t *)pkt; 1398 args.wirelen = pktlen; 1399 args.buflen = buflen; 1400 1401 /* 1402 * Note that the IPL does not have to be raised at this point. 1403 * The only problem that could arise here is that if two different 1404 * interfaces shared any data. This is not the case. 1405 */ 1406 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) { 1407 u_int slen; 1408 1409 if (!d->bd_seesent && !rcv) { 1410 continue; 1411 } 1412 d->bd_rcount++; 1413 bpf_gstats.bs_recv++; 1414 1415 if (d->bd_jitcode) 1416 slen = d->bd_jitcode(bc, &args); 1417 else 1418 slen = bpf_filter_ext(bc, d->bd_filter, &args); 1419 1420 if (!slen) { 1421 continue; 1422 } 1423 if (!gottime) { 1424 gottime = true; 1425 nanotime(&ts); 1426 } 1427 catchpacket(d, pkt, pktlen, slen, cpfn, &ts); 1428 } 1429 } 1430 1431 /* 1432 * Incoming linkage from device drivers. Process the packet pkt, of length 1433 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1434 * by each process' filter, and if accepted, stashed into the corresponding 1435 * buffer. 1436 */ 1437 static void 1438 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1439 { 1440 1441 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true); 1442 } 1443 1444 /* 1445 * Incoming linkage from device drivers, when the head of the packet is in 1446 * a buffer, and the tail is in an mbuf chain. 1447 */ 1448 static void 1449 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1450 { 1451 u_int pktlen; 1452 struct mbuf mb; 1453 1454 /* Skip outgoing duplicate packets. */ 1455 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1456 m->m_flags &= ~M_PROMISC; 1457 return; 1458 } 1459 1460 pktlen = m_length(m) + dlen; 1461 1462 /* 1463 * Craft on-stack mbuf suitable for passing to bpf_filter. 1464 * Note that we cut corners here; we only setup what's 1465 * absolutely needed--this mbuf should never go anywhere else. 1466 */ 1467 (void)memset(&mb, 0, sizeof(mb)); 1468 mb.m_next = m; 1469 mb.m_data = data; 1470 mb.m_len = dlen; 1471 1472 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif != NULL); 1473 } 1474 1475 /* 1476 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1477 */ 1478 static void 1479 _bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1480 { 1481 void *(*cpfn)(void *, const void *, size_t); 1482 u_int pktlen, buflen; 1483 void *marg; 1484 1485 /* Skip outgoing duplicate packets. */ 1486 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1487 m->m_flags &= ~M_PROMISC; 1488 return; 1489 } 1490 1491 pktlen = m_length(m); 1492 1493 if (pktlen == m->m_len) { 1494 cpfn = (void *)memcpy; 1495 marg = mtod(m, void *); 1496 buflen = pktlen; 1497 } else { 1498 cpfn = bpf_mcpy; 1499 marg = m; 1500 buflen = 0; 1501 } 1502 1503 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif != NULL); 1504 } 1505 1506 /* 1507 * We need to prepend the address family as 1508 * a four byte field. Cons up a dummy header 1509 * to pacify bpf. This is safe because bpf 1510 * will only read from the mbuf (i.e., it won't 1511 * try to free it or keep a pointer a to it). 1512 */ 1513 static void 1514 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1515 { 1516 struct mbuf m0; 1517 1518 m0.m_flags = 0; 1519 m0.m_next = m; 1520 m0.m_len = 4; 1521 m0.m_data = (char *)⁡ 1522 1523 _bpf_mtap(bp, &m0); 1524 } 1525 1526 /* 1527 * Put the SLIP pseudo-"link header" in place. 1528 * Note this M_PREPEND() should never fail, 1529 * swince we know we always have enough space 1530 * in the input buffer. 1531 */ 1532 static void 1533 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1534 { 1535 int s; 1536 u_char *hp; 1537 1538 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1539 if (*m == NULL) 1540 return; 1541 1542 hp = mtod(*m, u_char *); 1543 hp[SLX_DIR] = SLIPDIR_IN; 1544 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1545 1546 s = splnet(); 1547 _bpf_mtap(bp, *m); 1548 splx(s); 1549 1550 m_adj(*m, SLIP_HDRLEN); 1551 } 1552 1553 /* 1554 * Put the SLIP pseudo-"link header" in 1555 * place. The compressed header is now 1556 * at the beginning of the mbuf. 1557 */ 1558 static void 1559 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1560 { 1561 struct mbuf m0; 1562 u_char *hp; 1563 int s; 1564 1565 m0.m_flags = 0; 1566 m0.m_next = m; 1567 m0.m_data = m0.m_dat; 1568 m0.m_len = SLIP_HDRLEN; 1569 1570 hp = mtod(&m0, u_char *); 1571 1572 hp[SLX_DIR] = SLIPDIR_OUT; 1573 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1574 1575 s = splnet(); 1576 _bpf_mtap(bp, &m0); 1577 splx(s); 1578 m_freem(m); 1579 } 1580 1581 static int 1582 bpf_hdrlen(struct bpf_d *d) 1583 { 1584 int hdrlen = d->bd_bif->bif_hdrlen; 1585 /* 1586 * Compute the length of the bpf header. This is not necessarily 1587 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1588 * that the network layer header begins on a longword boundary (for 1589 * performance reasons and to alleviate alignment restrictions). 1590 */ 1591 #ifdef _LP64 1592 if (d->bd_compat32) 1593 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen); 1594 else 1595 #endif 1596 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen); 1597 } 1598 1599 /* 1600 * Move the packet data from interface memory (pkt) into the 1601 * store buffer. Call the wakeup functions if it's time to wakeup 1602 * a listener (buffer full), "cpfn" is the routine called to do the 1603 * actual data transfer. memcpy is passed in to copy contiguous chunks, 1604 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1605 * pkt is really an mbuf. 1606 */ 1607 static void 1608 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1609 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1610 { 1611 char *h; 1612 int totlen, curlen, caplen; 1613 int hdrlen = bpf_hdrlen(d); 1614 int do_wakeup = 0; 1615 1616 ++d->bd_ccount; 1617 ++bpf_gstats.bs_capt; 1618 /* 1619 * Figure out how many bytes to move. If the packet is 1620 * greater or equal to the snapshot length, transfer that 1621 * much. Otherwise, transfer the whole packet (unless 1622 * we hit the buffer size limit). 1623 */ 1624 totlen = hdrlen + min(snaplen, pktlen); 1625 if (totlen > d->bd_bufsize) 1626 totlen = d->bd_bufsize; 1627 /* 1628 * If we adjusted totlen to fit the bufsize, it could be that 1629 * totlen is smaller than hdrlen because of the link layer header. 1630 */ 1631 caplen = totlen - hdrlen; 1632 if (caplen < 0) 1633 caplen = 0; 1634 1635 /* 1636 * Round up the end of the previous packet to the next longword. 1637 */ 1638 #ifdef _LP64 1639 if (d->bd_compat32) 1640 curlen = BPF_WORDALIGN32(d->bd_slen); 1641 else 1642 #endif 1643 curlen = BPF_WORDALIGN(d->bd_slen); 1644 if (curlen + totlen > d->bd_bufsize) { 1645 /* 1646 * This packet will overflow the storage buffer. 1647 * Rotate the buffers if we can, then wakeup any 1648 * pending reads. 1649 */ 1650 if (d->bd_fbuf == 0) { 1651 /* 1652 * We haven't completed the previous read yet, 1653 * so drop the packet. 1654 */ 1655 ++d->bd_dcount; 1656 ++bpf_gstats.bs_drop; 1657 return; 1658 } 1659 ROTATE_BUFFERS(d); 1660 do_wakeup = 1; 1661 curlen = 0; 1662 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1663 /* 1664 * Immediate mode is set, or the read timeout has 1665 * already expired during a select call. A packet 1666 * arrived, so the reader should be woken up. 1667 */ 1668 do_wakeup = 1; 1669 } 1670 1671 /* 1672 * Append the bpf header. 1673 */ 1674 h = (char *)d->bd_sbuf + curlen; 1675 #ifdef _LP64 1676 if (d->bd_compat32) { 1677 struct bpf_hdr32 *hp32; 1678 1679 hp32 = (struct bpf_hdr32 *)h; 1680 hp32->bh_tstamp.tv_sec = ts->tv_sec; 1681 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1682 hp32->bh_datalen = pktlen; 1683 hp32->bh_hdrlen = hdrlen; 1684 hp32->bh_caplen = caplen; 1685 } else 1686 #endif 1687 { 1688 struct bpf_hdr *hp; 1689 1690 hp = (struct bpf_hdr *)h; 1691 hp->bh_tstamp.tv_sec = ts->tv_sec; 1692 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1693 hp->bh_datalen = pktlen; 1694 hp->bh_hdrlen = hdrlen; 1695 hp->bh_caplen = caplen; 1696 } 1697 1698 /* 1699 * Copy the packet data into the store buffer and update its length. 1700 */ 1701 (*cpfn)(h + hdrlen, pkt, caplen); 1702 d->bd_slen = curlen + totlen; 1703 1704 /* 1705 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1706 * will cause filt_bpfread() to be called with it adjusted. 1707 */ 1708 if (do_wakeup) 1709 bpf_wakeup(d); 1710 } 1711 1712 /* 1713 * Initialize all nonzero fields of a descriptor. 1714 */ 1715 static int 1716 bpf_allocbufs(struct bpf_d *d) 1717 { 1718 1719 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL); 1720 if (!d->bd_fbuf) 1721 return (ENOBUFS); 1722 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL); 1723 if (!d->bd_sbuf) { 1724 free(d->bd_fbuf, M_DEVBUF); 1725 return (ENOBUFS); 1726 } 1727 d->bd_slen = 0; 1728 d->bd_hlen = 0; 1729 return (0); 1730 } 1731 1732 /* 1733 * Free buffers currently in use by a descriptor. 1734 * Called on close. 1735 */ 1736 static void 1737 bpf_freed(struct bpf_d *d) 1738 { 1739 /* 1740 * We don't need to lock out interrupts since this descriptor has 1741 * been detached from its interface and it yet hasn't been marked 1742 * free. 1743 */ 1744 if (d->bd_sbuf != NULL) { 1745 free(d->bd_sbuf, M_DEVBUF); 1746 if (d->bd_hbuf != NULL) 1747 free(d->bd_hbuf, M_DEVBUF); 1748 if (d->bd_fbuf != NULL) 1749 free(d->bd_fbuf, M_DEVBUF); 1750 } 1751 if (d->bd_filter) 1752 free(d->bd_filter, M_DEVBUF); 1753 1754 if (d->bd_jitcode != NULL) { 1755 bpf_jit_freecode(d->bd_jitcode); 1756 } 1757 } 1758 1759 /* 1760 * Attach an interface to bpf. dlt is the link layer type; 1761 * hdrlen is the fixed size of the link header for the specified dlt 1762 * (variable length headers not yet supported). 1763 */ 1764 static void 1765 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1766 { 1767 struct bpf_if *bp; 1768 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); 1769 if (bp == 0) 1770 panic("bpfattach"); 1771 1772 bp->bif_dlist = 0; 1773 bp->bif_driverp = driverp; 1774 bp->bif_ifp = ifp; 1775 bp->bif_dlt = dlt; 1776 1777 bp->bif_next = bpf_iflist; 1778 bpf_iflist = bp; 1779 1780 *bp->bif_driverp = 0; 1781 1782 bp->bif_hdrlen = hdrlen; 1783 #if 0 1784 printf("bpf: %s attached\n", ifp->if_xname); 1785 #endif 1786 } 1787 1788 /* 1789 * Remove an interface from bpf. 1790 */ 1791 static void 1792 _bpfdetach(struct ifnet *ifp) 1793 { 1794 struct bpf_if *bp, **pbp; 1795 struct bpf_d *d; 1796 int s; 1797 1798 /* Nuke the vnodes for any open instances */ 1799 LIST_FOREACH(d, &bpf_list, bd_list) { 1800 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 1801 /* 1802 * Detach the descriptor from an interface now. 1803 * It will be free'ed later by close routine. 1804 */ 1805 s = splnet(); 1806 d->bd_promisc = 0; /* we can't touch device. */ 1807 bpf_detachd(d); 1808 splx(s); 1809 } 1810 } 1811 1812 again: 1813 for (bp = bpf_iflist, pbp = &bpf_iflist; 1814 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { 1815 if (bp->bif_ifp == ifp) { 1816 *pbp = bp->bif_next; 1817 free(bp, M_DEVBUF); 1818 goto again; 1819 } 1820 } 1821 } 1822 1823 /* 1824 * Change the data link type of a interface. 1825 */ 1826 static void 1827 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1828 { 1829 struct bpf_if *bp; 1830 1831 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1832 if (bp->bif_driverp == &ifp->if_bpf) 1833 break; 1834 } 1835 if (bp == NULL) 1836 panic("bpf_change_type"); 1837 1838 bp->bif_dlt = dlt; 1839 1840 bp->bif_hdrlen = hdrlen; 1841 } 1842 1843 /* 1844 * Get a list of available data link type of the interface. 1845 */ 1846 static int 1847 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1848 { 1849 int n, error; 1850 struct ifnet *ifp; 1851 struct bpf_if *bp; 1852 1853 ifp = d->bd_bif->bif_ifp; 1854 n = 0; 1855 error = 0; 1856 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1857 if (bp->bif_ifp != ifp) 1858 continue; 1859 if (bfl->bfl_list != NULL) { 1860 if (n >= bfl->bfl_len) 1861 return ENOMEM; 1862 error = copyout(&bp->bif_dlt, 1863 bfl->bfl_list + n, sizeof(u_int)); 1864 } 1865 n++; 1866 } 1867 bfl->bfl_len = n; 1868 return error; 1869 } 1870 1871 /* 1872 * Set the data link type of a BPF instance. 1873 */ 1874 static int 1875 bpf_setdlt(struct bpf_d *d, u_int dlt) 1876 { 1877 int s, error, opromisc; 1878 struct ifnet *ifp; 1879 struct bpf_if *bp; 1880 1881 if (d->bd_bif->bif_dlt == dlt) 1882 return 0; 1883 ifp = d->bd_bif->bif_ifp; 1884 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1885 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1886 break; 1887 } 1888 if (bp == NULL) 1889 return EINVAL; 1890 s = splnet(); 1891 opromisc = d->bd_promisc; 1892 bpf_detachd(d); 1893 bpf_attachd(d, bp); 1894 reset_d(d); 1895 if (opromisc) { 1896 error = ifpromisc(bp->bif_ifp, 1); 1897 if (error) 1898 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 1899 bp->bif_ifp->if_xname, error); 1900 else 1901 d->bd_promisc = 1; 1902 } 1903 splx(s); 1904 return 0; 1905 } 1906 1907 static int 1908 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 1909 { 1910 int newsize, error; 1911 struct sysctlnode node; 1912 1913 node = *rnode; 1914 node.sysctl_data = &newsize; 1915 newsize = bpf_maxbufsize; 1916 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1917 if (error || newp == NULL) 1918 return (error); 1919 1920 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 1921 return (EINVAL); 1922 1923 bpf_maxbufsize = newsize; 1924 1925 return (0); 1926 } 1927 1928 static int 1929 sysctl_net_bpf_jit(SYSCTLFN_ARGS) 1930 { 1931 bool newval; 1932 int error; 1933 struct sysctlnode node; 1934 1935 node = *rnode; 1936 node.sysctl_data = &newval; 1937 newval = bpf_jit; 1938 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1939 if (error != 0 || newp == NULL) 1940 return error; 1941 1942 bpf_jit = newval; 1943 1944 /* 1945 * Do a full sync to publish new bpf_jit value and 1946 * update bpfjit_module_ops.bj_generate_code variable. 1947 */ 1948 membar_sync(); 1949 1950 if (newval && bpfjit_module_ops.bj_generate_code == NULL) { 1951 printf("WARNING jit activation is postponed " 1952 "until after bpfjit module is loaded\n"); 1953 } 1954 1955 return 0; 1956 } 1957 1958 static int 1959 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 1960 { 1961 int error, elem_count; 1962 struct bpf_d *dp; 1963 struct bpf_d_ext dpe; 1964 size_t len, needed, elem_size, out_size; 1965 char *sp; 1966 1967 if (namelen == 1 && name[0] == CTL_QUERY) 1968 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1969 1970 if (namelen != 2) 1971 return (EINVAL); 1972 1973 /* BPF peers is privileged information. */ 1974 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 1975 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 1976 if (error) 1977 return (EPERM); 1978 1979 len = (oldp != NULL) ? *oldlenp : 0; 1980 sp = oldp; 1981 elem_size = name[0]; 1982 elem_count = name[1]; 1983 out_size = MIN(sizeof(dpe), elem_size); 1984 needed = 0; 1985 1986 if (elem_size < 1 || elem_count < 0) 1987 return (EINVAL); 1988 1989 mutex_enter(&bpf_mtx); 1990 LIST_FOREACH(dp, &bpf_list, bd_list) { 1991 if (len >= elem_size && elem_count > 0) { 1992 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 1993 BPF_EXT(bufsize); 1994 BPF_EXT(promisc); 1995 BPF_EXT(state); 1996 BPF_EXT(immediate); 1997 BPF_EXT(hdrcmplt); 1998 BPF_EXT(seesent); 1999 BPF_EXT(pid); 2000 BPF_EXT(rcount); 2001 BPF_EXT(dcount); 2002 BPF_EXT(ccount); 2003 #undef BPF_EXT 2004 if (dp->bd_bif) 2005 (void)strlcpy(dpe.bde_ifname, 2006 dp->bd_bif->bif_ifp->if_xname, 2007 IFNAMSIZ - 1); 2008 else 2009 dpe.bde_ifname[0] = '\0'; 2010 2011 error = copyout(&dpe, sp, out_size); 2012 if (error) 2013 break; 2014 sp += elem_size; 2015 len -= elem_size; 2016 } 2017 needed += elem_size; 2018 if (elem_count > 0 && elem_count != INT_MAX) 2019 elem_count--; 2020 } 2021 mutex_exit(&bpf_mtx); 2022 2023 *oldlenp = needed; 2024 2025 return (error); 2026 } 2027 2028 static struct sysctllog *bpf_sysctllog; 2029 static void 2030 sysctl_net_bpf_setup(void) 2031 { 2032 const struct sysctlnode *node; 2033 2034 node = NULL; 2035 sysctl_createv(&bpf_sysctllog, 0, NULL, &node, 2036 CTLFLAG_PERMANENT, 2037 CTLTYPE_NODE, "bpf", 2038 SYSCTL_DESCR("BPF options"), 2039 NULL, 0, NULL, 0, 2040 CTL_NET, CTL_CREATE, CTL_EOL); 2041 if (node != NULL) { 2042 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2043 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2044 CTLTYPE_BOOL, "jit", 2045 SYSCTL_DESCR("Toggle Just-In-Time compilation"), 2046 sysctl_net_bpf_jit, 0, &bpf_jit, 0, 2047 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2048 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2049 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2050 CTLTYPE_INT, "maxbufsize", 2051 SYSCTL_DESCR("Maximum size for data capture buffer"), 2052 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 2053 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2054 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2055 CTLFLAG_PERMANENT, 2056 CTLTYPE_STRUCT, "stats", 2057 SYSCTL_DESCR("BPF stats"), 2058 NULL, 0, &bpf_gstats, sizeof(bpf_gstats), 2059 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2060 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2061 CTLFLAG_PERMANENT, 2062 CTLTYPE_STRUCT, "peers", 2063 SYSCTL_DESCR("BPF peers"), 2064 sysctl_net_bpf_peers, 0, NULL, 0, 2065 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2066 } 2067 2068 } 2069 2070 struct bpf_ops bpf_ops_kernel = { 2071 .bpf_attach = _bpfattach, 2072 .bpf_detach = _bpfdetach, 2073 .bpf_change_type = _bpf_change_type, 2074 2075 .bpf_tap = _bpf_tap, 2076 .bpf_mtap = _bpf_mtap, 2077 .bpf_mtap2 = _bpf_mtap2, 2078 .bpf_mtap_af = _bpf_mtap_af, 2079 .bpf_mtap_sl_in = _bpf_mtap_sl_in, 2080 .bpf_mtap_sl_out = _bpf_mtap_sl_out, 2081 }; 2082 2083 MODULE(MODULE_CLASS_DRIVER, bpf, NULL); 2084 2085 static int 2086 bpf_modcmd(modcmd_t cmd, void *arg) 2087 { 2088 devmajor_t bmajor, cmajor; 2089 int error; 2090 2091 bmajor = cmajor = NODEVMAJOR; 2092 2093 switch (cmd) { 2094 case MODULE_CMD_INIT: 2095 bpfilterattach(0); 2096 error = devsw_attach("bpf", NULL, &bmajor, 2097 &bpf_cdevsw, &cmajor); 2098 if (error == EEXIST) 2099 error = 0; /* maybe built-in ... improve eventually */ 2100 if (error) 2101 break; 2102 2103 bpf_ops_handover_enter(&bpf_ops_kernel); 2104 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 2105 bpf_ops_handover_exit(); 2106 sysctl_net_bpf_setup(); 2107 break; 2108 2109 case MODULE_CMD_FINI: 2110 /* 2111 * While there is no reference counting for bpf callers, 2112 * unload could at least in theory be done similarly to 2113 * system call disestablishment. This should even be 2114 * a little simpler: 2115 * 2116 * 1) replace op vector with stubs 2117 * 2) post update to all cpus with xc 2118 * 3) check that nobody is in bpf anymore 2119 * (it's doubtful we'd want something like l_sysent, 2120 * but we could do something like *signed* percpu 2121 * counters. if the sum is 0, we're good). 2122 * 4) if fail, unroll changes 2123 * 2124 * NOTE: change won't be atomic to the outside. some 2125 * packets may be not captured even if unload is 2126 * not succesful. I think packet capture not working 2127 * is a perfectly logical consequence of trying to 2128 * disable packet capture. 2129 */ 2130 error = EOPNOTSUPP; 2131 /* insert sysctl teardown */ 2132 break; 2133 2134 default: 2135 error = ENOTTY; 2136 break; 2137 } 2138 2139 return error; 2140 } 2141