1 /* $NetBSD: bpf.c,v 1.172 2012/09/27 18:28:56 alnsn Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.172 2012/09/27 18:28:56 alnsn Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/buf.h> 54 #include <sys/time.h> 55 #include <sys/proc.h> 56 #include <sys/ioctl.h> 57 #include <sys/conf.h> 58 #include <sys/vnode.h> 59 #include <sys/queue.h> 60 #include <sys/stat.h> 61 #include <sys/module.h> 62 #include <sys/once.h> 63 #include <sys/atomic.h> 64 65 #include <sys/file.h> 66 #include <sys/filedesc.h> 67 #include <sys/tty.h> 68 #include <sys/uio.h> 69 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/errno.h> 73 #include <sys/kernel.h> 74 #include <sys/poll.h> 75 #include <sys/sysctl.h> 76 #include <sys/kauth.h> 77 78 #include <net/if.h> 79 #include <net/slip.h> 80 81 #include <net/bpf.h> 82 #include <net/bpfdesc.h> 83 84 #include <net/if_arc.h> 85 #include <net/if_ether.h> 86 87 #include <netinet/in.h> 88 #include <netinet/if_inarp.h> 89 90 91 #include <compat/sys/sockio.h> 92 93 #ifndef BPF_BUFSIZE 94 /* 95 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 96 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 97 */ 98 # define BPF_BUFSIZE 32768 99 #endif 100 101 #define PRINET 26 /* interruptible */ 102 103 /* 104 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 105 * XXX the default values should be computed dynamically based 106 * on available memory size and available mbuf clusters. 107 */ 108 int bpf_bufsize = BPF_BUFSIZE; 109 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 110 111 /* 112 * Global BPF statistics returned by net.bpf.stats sysctl. 113 */ 114 struct bpf_stat bpf_gstats; 115 116 /* 117 * Use a mutex to avoid a race condition between gathering the stats/peers 118 * and opening/closing the device. 119 */ 120 static kmutex_t bpf_mtx; 121 122 /* 123 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 124 * bpf_dtab holds the descriptors, indexed by minor device # 125 */ 126 struct bpf_if *bpf_iflist; 127 LIST_HEAD(, bpf_d) bpf_list; 128 129 static int bpf_allocbufs(struct bpf_d *); 130 static void bpf_deliver(struct bpf_if *, 131 void *(*cpfn)(void *, const void *, size_t), 132 void *, u_int, u_int, const bool); 133 static void bpf_freed(struct bpf_d *); 134 static void bpf_ifname(struct ifnet *, struct ifreq *); 135 static void *bpf_mcpy(void *, const void *, size_t); 136 static int bpf_movein(struct uio *, int, uint64_t, 137 struct mbuf **, struct sockaddr *); 138 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 139 static void bpf_detachd(struct bpf_d *); 140 static int bpf_setif(struct bpf_d *, struct ifreq *); 141 static void bpf_timed_out(void *); 142 static inline void 143 bpf_wakeup(struct bpf_d *); 144 static int bpf_hdrlen(struct bpf_d *); 145 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 146 void *(*)(void *, const void *, size_t), struct timespec *); 147 static void reset_d(struct bpf_d *); 148 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 149 static int bpf_setdlt(struct bpf_d *, u_int); 150 151 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 152 int); 153 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 154 int); 155 static int bpf_ioctl(struct file *, u_long, void *); 156 static int bpf_poll(struct file *, int); 157 static int bpf_stat(struct file *, struct stat *); 158 static int bpf_close(struct file *); 159 static int bpf_kqfilter(struct file *, struct knote *); 160 static void bpf_softintr(void *); 161 162 static const struct fileops bpf_fileops = { 163 .fo_read = bpf_read, 164 .fo_write = bpf_write, 165 .fo_ioctl = bpf_ioctl, 166 .fo_fcntl = fnullop_fcntl, 167 .fo_poll = bpf_poll, 168 .fo_stat = bpf_stat, 169 .fo_close = bpf_close, 170 .fo_kqfilter = bpf_kqfilter, 171 .fo_restart = fnullop_restart, 172 }; 173 174 dev_type_open(bpfopen); 175 176 const struct cdevsw bpf_cdevsw = { 177 bpfopen, noclose, noread, nowrite, noioctl, 178 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER 179 }; 180 181 static int 182 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp, 183 struct sockaddr *sockp) 184 { 185 struct mbuf *m; 186 int error; 187 size_t len; 188 size_t hlen; 189 size_t align; 190 191 /* 192 * Build a sockaddr based on the data link layer type. 193 * We do this at this level because the ethernet header 194 * is copied directly into the data field of the sockaddr. 195 * In the case of SLIP, there is no header and the packet 196 * is forwarded as is. 197 * Also, we are careful to leave room at the front of the mbuf 198 * for the link level header. 199 */ 200 switch (linktype) { 201 202 case DLT_SLIP: 203 sockp->sa_family = AF_INET; 204 hlen = 0; 205 align = 0; 206 break; 207 208 case DLT_PPP: 209 sockp->sa_family = AF_UNSPEC; 210 hlen = 0; 211 align = 0; 212 break; 213 214 case DLT_EN10MB: 215 sockp->sa_family = AF_UNSPEC; 216 /* XXX Would MAXLINKHDR be better? */ 217 /* 6(dst)+6(src)+2(type) */ 218 hlen = sizeof(struct ether_header); 219 align = 2; 220 break; 221 222 case DLT_ARCNET: 223 sockp->sa_family = AF_UNSPEC; 224 hlen = ARC_HDRLEN; 225 align = 5; 226 break; 227 228 case DLT_FDDI: 229 sockp->sa_family = AF_LINK; 230 /* XXX 4(FORMAC)+6(dst)+6(src) */ 231 hlen = 16; 232 align = 0; 233 break; 234 235 case DLT_ECONET: 236 sockp->sa_family = AF_UNSPEC; 237 hlen = 6; 238 align = 2; 239 break; 240 241 case DLT_NULL: 242 sockp->sa_family = AF_UNSPEC; 243 hlen = 0; 244 align = 0; 245 break; 246 247 default: 248 return (EIO); 249 } 250 251 len = uio->uio_resid; 252 /* 253 * If there aren't enough bytes for a link level header or the 254 * packet length exceeds the interface mtu, return an error. 255 */ 256 if (len - hlen > mtu) 257 return (EMSGSIZE); 258 259 /* 260 * XXX Avoid complicated buffer chaining --- 261 * bail if it won't fit in a single mbuf. 262 * (Take into account possible alignment bytes) 263 */ 264 if (len + align > MCLBYTES) 265 return (EIO); 266 267 m = m_gethdr(M_WAIT, MT_DATA); 268 m->m_pkthdr.rcvif = 0; 269 m->m_pkthdr.len = (int)(len - hlen); 270 if (len + align > MHLEN) { 271 m_clget(m, M_WAIT); 272 if ((m->m_flags & M_EXT) == 0) { 273 error = ENOBUFS; 274 goto bad; 275 } 276 } 277 278 /* Insure the data is properly aligned */ 279 if (align > 0) { 280 m->m_data += align; 281 m->m_len -= (int)align; 282 } 283 284 error = uiomove(mtod(m, void *), len, uio); 285 if (error) 286 goto bad; 287 if (hlen != 0) { 288 memcpy(sockp->sa_data, mtod(m, void *), hlen); 289 m->m_data += hlen; /* XXX */ 290 len -= hlen; 291 } 292 m->m_len = (int)len; 293 *mp = m; 294 return (0); 295 296 bad: 297 m_freem(m); 298 return (error); 299 } 300 301 /* 302 * Attach file to the bpf interface, i.e. make d listen on bp. 303 * Must be called at splnet. 304 */ 305 static void 306 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 307 { 308 /* 309 * Point d at bp, and add d to the interface's list of listeners. 310 * Finally, point the driver's bpf cookie at the interface so 311 * it will divert packets to bpf. 312 */ 313 d->bd_bif = bp; 314 d->bd_next = bp->bif_dlist; 315 bp->bif_dlist = d; 316 317 *bp->bif_driverp = bp; 318 } 319 320 /* 321 * Detach a file from its interface. 322 */ 323 static void 324 bpf_detachd(struct bpf_d *d) 325 { 326 struct bpf_d **p; 327 struct bpf_if *bp; 328 329 bp = d->bd_bif; 330 /* 331 * Check if this descriptor had requested promiscuous mode. 332 * If so, turn it off. 333 */ 334 if (d->bd_promisc) { 335 int error; 336 337 d->bd_promisc = 0; 338 /* 339 * Take device out of promiscuous mode. Since we were 340 * able to enter promiscuous mode, we should be able 341 * to turn it off. But we can get an error if 342 * the interface was configured down, so only panic 343 * if we don't get an unexpected error. 344 */ 345 error = ifpromisc(bp->bif_ifp, 0); 346 if (error && error != EINVAL) 347 panic("%s: ifpromisc failed: %d", __func__, error); 348 } 349 /* Remove d from the interface's descriptor list. */ 350 p = &bp->bif_dlist; 351 while (*p != d) { 352 p = &(*p)->bd_next; 353 if (*p == 0) 354 panic("%s: descriptor not in list", __func__); 355 } 356 *p = (*p)->bd_next; 357 if (bp->bif_dlist == 0) 358 /* 359 * Let the driver know that there are no more listeners. 360 */ 361 *d->bd_bif->bif_driverp = 0; 362 d->bd_bif = 0; 363 } 364 365 static int 366 doinit(void) 367 { 368 369 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 370 371 LIST_INIT(&bpf_list); 372 373 bpf_gstats.bs_recv = 0; 374 bpf_gstats.bs_drop = 0; 375 bpf_gstats.bs_capt = 0; 376 377 return 0; 378 } 379 380 /* 381 * bpfilterattach() is called at boot time. 382 */ 383 /* ARGSUSED */ 384 void 385 bpfilterattach(int n) 386 { 387 static ONCE_DECL(control); 388 389 RUN_ONCE(&control, doinit); 390 } 391 392 /* 393 * Open ethernet device. Clones. 394 */ 395 /* ARGSUSED */ 396 int 397 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 398 { 399 struct bpf_d *d; 400 struct file *fp; 401 int error, fd; 402 403 /* falloc() will use the descriptor for us. */ 404 if ((error = fd_allocfile(&fp, &fd)) != 0) 405 return error; 406 407 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO); 408 d->bd_bufsize = bpf_bufsize; 409 d->bd_seesent = 1; 410 d->bd_feedback = 0; 411 d->bd_pid = l->l_proc->p_pid; 412 #ifdef _LP64 413 if (curproc->p_flag & PK_32) 414 d->bd_compat32 = 1; 415 #endif 416 getnanotime(&d->bd_btime); 417 d->bd_atime = d->bd_mtime = d->bd_btime; 418 callout_init(&d->bd_callout, 0); 419 selinit(&d->bd_sel); 420 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d); 421 422 mutex_enter(&bpf_mtx); 423 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 424 mutex_exit(&bpf_mtx); 425 426 return fd_clone(fp, fd, flag, &bpf_fileops, d); 427 } 428 429 /* 430 * Close the descriptor by detaching it from its interface, 431 * deallocating its buffers, and marking it free. 432 */ 433 /* ARGSUSED */ 434 static int 435 bpf_close(struct file *fp) 436 { 437 struct bpf_d *d = fp->f_data; 438 int s; 439 440 KERNEL_LOCK(1, NULL); 441 442 /* 443 * Refresh the PID associated with this bpf file. 444 */ 445 d->bd_pid = curproc->p_pid; 446 447 s = splnet(); 448 if (d->bd_state == BPF_WAITING) 449 callout_stop(&d->bd_callout); 450 d->bd_state = BPF_IDLE; 451 if (d->bd_bif) 452 bpf_detachd(d); 453 splx(s); 454 bpf_freed(d); 455 mutex_enter(&bpf_mtx); 456 LIST_REMOVE(d, bd_list); 457 mutex_exit(&bpf_mtx); 458 callout_destroy(&d->bd_callout); 459 seldestroy(&d->bd_sel); 460 softint_disestablish(d->bd_sih); 461 free(d, M_DEVBUF); 462 fp->f_data = NULL; 463 464 KERNEL_UNLOCK_ONE(NULL); 465 466 return (0); 467 } 468 469 /* 470 * Rotate the packet buffers in descriptor d. Move the store buffer 471 * into the hold slot, and the free buffer into the store slot. 472 * Zero the length of the new store buffer. 473 */ 474 #define ROTATE_BUFFERS(d) \ 475 (d)->bd_hbuf = (d)->bd_sbuf; \ 476 (d)->bd_hlen = (d)->bd_slen; \ 477 (d)->bd_sbuf = (d)->bd_fbuf; \ 478 (d)->bd_slen = 0; \ 479 (d)->bd_fbuf = 0; 480 /* 481 * bpfread - read next chunk of packets from buffers 482 */ 483 static int 484 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 485 kauth_cred_t cred, int flags) 486 { 487 struct bpf_d *d = fp->f_data; 488 int timed_out; 489 int error; 490 int s; 491 492 getnanotime(&d->bd_atime); 493 /* 494 * Restrict application to use a buffer the same size as 495 * the kernel buffers. 496 */ 497 if (uio->uio_resid != d->bd_bufsize) 498 return (EINVAL); 499 500 KERNEL_LOCK(1, NULL); 501 s = splnet(); 502 if (d->bd_state == BPF_WAITING) 503 callout_stop(&d->bd_callout); 504 timed_out = (d->bd_state == BPF_TIMED_OUT); 505 d->bd_state = BPF_IDLE; 506 /* 507 * If the hold buffer is empty, then do a timed sleep, which 508 * ends when the timeout expires or when enough packets 509 * have arrived to fill the store buffer. 510 */ 511 while (d->bd_hbuf == 0) { 512 if (fp->f_flag & FNONBLOCK) { 513 if (d->bd_slen == 0) { 514 splx(s); 515 KERNEL_UNLOCK_ONE(NULL); 516 return (EWOULDBLOCK); 517 } 518 ROTATE_BUFFERS(d); 519 break; 520 } 521 522 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 523 /* 524 * A packet(s) either arrived since the previous 525 * read or arrived while we were asleep. 526 * Rotate the buffers and return what's here. 527 */ 528 ROTATE_BUFFERS(d); 529 break; 530 } 531 error = tsleep(d, PRINET|PCATCH, "bpf", 532 d->bd_rtout); 533 if (error == EINTR || error == ERESTART) { 534 splx(s); 535 KERNEL_UNLOCK_ONE(NULL); 536 return (error); 537 } 538 if (error == EWOULDBLOCK) { 539 /* 540 * On a timeout, return what's in the buffer, 541 * which may be nothing. If there is something 542 * in the store buffer, we can rotate the buffers. 543 */ 544 if (d->bd_hbuf) 545 /* 546 * We filled up the buffer in between 547 * getting the timeout and arriving 548 * here, so we don't need to rotate. 549 */ 550 break; 551 552 if (d->bd_slen == 0) { 553 splx(s); 554 KERNEL_UNLOCK_ONE(NULL); 555 return (0); 556 } 557 ROTATE_BUFFERS(d); 558 break; 559 } 560 if (error != 0) 561 goto done; 562 } 563 /* 564 * At this point, we know we have something in the hold slot. 565 */ 566 splx(s); 567 568 /* 569 * Move data from hold buffer into user space. 570 * We know the entire buffer is transferred since 571 * we checked above that the read buffer is bpf_bufsize bytes. 572 */ 573 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 574 575 s = splnet(); 576 d->bd_fbuf = d->bd_hbuf; 577 d->bd_hbuf = 0; 578 d->bd_hlen = 0; 579 done: 580 splx(s); 581 KERNEL_UNLOCK_ONE(NULL); 582 return (error); 583 } 584 585 586 /* 587 * If there are processes sleeping on this descriptor, wake them up. 588 */ 589 static inline void 590 bpf_wakeup(struct bpf_d *d) 591 { 592 wakeup(d); 593 if (d->bd_async) 594 softint_schedule(d->bd_sih); 595 selnotify(&d->bd_sel, 0, 0); 596 } 597 598 static void 599 bpf_softintr(void *cookie) 600 { 601 struct bpf_d *d; 602 603 d = cookie; 604 if (d->bd_async) 605 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 606 } 607 608 static void 609 bpf_timed_out(void *arg) 610 { 611 struct bpf_d *d = arg; 612 int s; 613 614 s = splnet(); 615 if (d->bd_state == BPF_WAITING) { 616 d->bd_state = BPF_TIMED_OUT; 617 if (d->bd_slen != 0) 618 bpf_wakeup(d); 619 } 620 splx(s); 621 } 622 623 624 static int 625 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 626 kauth_cred_t cred, int flags) 627 { 628 struct bpf_d *d = fp->f_data; 629 struct ifnet *ifp; 630 struct mbuf *m, *mc; 631 int error, s; 632 static struct sockaddr_storage dst; 633 634 m = NULL; /* XXX gcc */ 635 636 KERNEL_LOCK(1, NULL); 637 638 if (d->bd_bif == 0) { 639 KERNEL_UNLOCK_ONE(NULL); 640 return (ENXIO); 641 } 642 getnanotime(&d->bd_mtime); 643 644 ifp = d->bd_bif->bif_ifp; 645 646 if (uio->uio_resid == 0) { 647 KERNEL_UNLOCK_ONE(NULL); 648 return (0); 649 } 650 651 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, 652 (struct sockaddr *) &dst); 653 if (error) { 654 KERNEL_UNLOCK_ONE(NULL); 655 return (error); 656 } 657 658 if (m->m_pkthdr.len > ifp->if_mtu) { 659 KERNEL_UNLOCK_ONE(NULL); 660 m_freem(m); 661 return (EMSGSIZE); 662 } 663 664 if (d->bd_hdrcmplt) 665 dst.ss_family = pseudo_AF_HDRCMPLT; 666 667 if (d->bd_feedback) { 668 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT); 669 if (mc != NULL) 670 mc->m_pkthdr.rcvif = ifp; 671 /* Set M_PROMISC for outgoing packets to be discarded. */ 672 if (1 /*d->bd_direction == BPF_D_INOUT*/) 673 m->m_flags |= M_PROMISC; 674 } else 675 mc = NULL; 676 677 s = splsoftnet(); 678 error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL); 679 680 if (mc != NULL) { 681 if (error == 0) 682 (*ifp->if_input)(ifp, mc); 683 m_freem(mc); 684 } 685 splx(s); 686 KERNEL_UNLOCK_ONE(NULL); 687 /* 688 * The driver frees the mbuf. 689 */ 690 return (error); 691 } 692 693 /* 694 * Reset a descriptor by flushing its packet buffer and clearing the 695 * receive and drop counts. Should be called at splnet. 696 */ 697 static void 698 reset_d(struct bpf_d *d) 699 { 700 if (d->bd_hbuf) { 701 /* Free the hold buffer. */ 702 d->bd_fbuf = d->bd_hbuf; 703 d->bd_hbuf = 0; 704 } 705 d->bd_slen = 0; 706 d->bd_hlen = 0; 707 d->bd_rcount = 0; 708 d->bd_dcount = 0; 709 d->bd_ccount = 0; 710 } 711 712 /* 713 * FIONREAD Check for read packet available. 714 * BIOCGBLEN Get buffer len [for read()]. 715 * BIOCSETF Set ethernet read filter. 716 * BIOCFLUSH Flush read packet buffer. 717 * BIOCPROMISC Put interface into promiscuous mode. 718 * BIOCGDLT Get link layer type. 719 * BIOCGETIF Get interface name. 720 * BIOCSETIF Set interface. 721 * BIOCSRTIMEOUT Set read timeout. 722 * BIOCGRTIMEOUT Get read timeout. 723 * BIOCGSTATS Get packet stats. 724 * BIOCIMMEDIATE Set immediate mode. 725 * BIOCVERSION Get filter language version. 726 * BIOCGHDRCMPLT Get "header already complete" flag. 727 * BIOCSHDRCMPLT Set "header already complete" flag. 728 * BIOCSFEEDBACK Set packet feedback mode. 729 * BIOCGFEEDBACK Get packet feedback mode. 730 * BIOCGSEESENT Get "see sent packets" mode. 731 * BIOCSSEESENT Set "see sent packets" mode. 732 */ 733 /* ARGSUSED */ 734 static int 735 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 736 { 737 struct bpf_d *d = fp->f_data; 738 int s, error = 0; 739 740 /* 741 * Refresh the PID associated with this bpf file. 742 */ 743 KERNEL_LOCK(1, NULL); 744 d->bd_pid = curproc->p_pid; 745 #ifdef _LP64 746 if (curproc->p_flag & PK_32) 747 d->bd_compat32 = 1; 748 else 749 d->bd_compat32 = 0; 750 #endif 751 752 s = splnet(); 753 if (d->bd_state == BPF_WAITING) 754 callout_stop(&d->bd_callout); 755 d->bd_state = BPF_IDLE; 756 splx(s); 757 758 switch (cmd) { 759 760 default: 761 error = EINVAL; 762 break; 763 764 /* 765 * Check for read packet available. 766 */ 767 case FIONREAD: 768 { 769 int n; 770 771 s = splnet(); 772 n = d->bd_slen; 773 if (d->bd_hbuf) 774 n += d->bd_hlen; 775 splx(s); 776 777 *(int *)addr = n; 778 break; 779 } 780 781 /* 782 * Get buffer len [for read()]. 783 */ 784 case BIOCGBLEN: 785 *(u_int *)addr = d->bd_bufsize; 786 break; 787 788 /* 789 * Set buffer length. 790 */ 791 case BIOCSBLEN: 792 if (d->bd_bif != 0) 793 error = EINVAL; 794 else { 795 u_int size = *(u_int *)addr; 796 797 if (size > bpf_maxbufsize) 798 *(u_int *)addr = size = bpf_maxbufsize; 799 else if (size < BPF_MINBUFSIZE) 800 *(u_int *)addr = size = BPF_MINBUFSIZE; 801 d->bd_bufsize = size; 802 } 803 break; 804 805 /* 806 * Set link layer read filter. 807 */ 808 case BIOCSETF: 809 error = bpf_setf(d, addr); 810 break; 811 812 /* 813 * Flush read packet buffer. 814 */ 815 case BIOCFLUSH: 816 s = splnet(); 817 reset_d(d); 818 splx(s); 819 break; 820 821 /* 822 * Put interface into promiscuous mode. 823 */ 824 case BIOCPROMISC: 825 if (d->bd_bif == 0) { 826 /* 827 * No interface attached yet. 828 */ 829 error = EINVAL; 830 break; 831 } 832 s = splnet(); 833 if (d->bd_promisc == 0) { 834 error = ifpromisc(d->bd_bif->bif_ifp, 1); 835 if (error == 0) 836 d->bd_promisc = 1; 837 } 838 splx(s); 839 break; 840 841 /* 842 * Get device parameters. 843 */ 844 case BIOCGDLT: 845 if (d->bd_bif == 0) 846 error = EINVAL; 847 else 848 *(u_int *)addr = d->bd_bif->bif_dlt; 849 break; 850 851 /* 852 * Get a list of supported device parameters. 853 */ 854 case BIOCGDLTLIST: 855 if (d->bd_bif == 0) 856 error = EINVAL; 857 else 858 error = bpf_getdltlist(d, addr); 859 break; 860 861 /* 862 * Set device parameters. 863 */ 864 case BIOCSDLT: 865 if (d->bd_bif == 0) 866 error = EINVAL; 867 else 868 error = bpf_setdlt(d, *(u_int *)addr); 869 break; 870 871 /* 872 * Set interface name. 873 */ 874 #ifdef OBIOCGETIF 875 case OBIOCGETIF: 876 #endif 877 case BIOCGETIF: 878 if (d->bd_bif == 0) 879 error = EINVAL; 880 else 881 bpf_ifname(d->bd_bif->bif_ifp, addr); 882 break; 883 884 /* 885 * Set interface. 886 */ 887 #ifdef OBIOCSETIF 888 case OBIOCSETIF: 889 #endif 890 case BIOCSETIF: 891 error = bpf_setif(d, addr); 892 break; 893 894 /* 895 * Set read timeout. 896 */ 897 case BIOCSRTIMEOUT: 898 { 899 struct timeval *tv = addr; 900 901 /* Compute number of ticks. */ 902 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 903 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 904 d->bd_rtout = 1; 905 break; 906 } 907 908 #ifdef BIOCGORTIMEOUT 909 /* 910 * Get read timeout. 911 */ 912 case BIOCGORTIMEOUT: 913 { 914 struct timeval50 *tv = addr; 915 916 tv->tv_sec = d->bd_rtout / hz; 917 tv->tv_usec = (d->bd_rtout % hz) * tick; 918 break; 919 } 920 #endif 921 922 #ifdef BIOCSORTIMEOUT 923 /* 924 * Set read timeout. 925 */ 926 case BIOCSORTIMEOUT: 927 { 928 struct timeval50 *tv = addr; 929 930 /* Compute number of ticks. */ 931 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 932 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 933 d->bd_rtout = 1; 934 break; 935 } 936 #endif 937 938 /* 939 * Get read timeout. 940 */ 941 case BIOCGRTIMEOUT: 942 { 943 struct timeval *tv = addr; 944 945 tv->tv_sec = d->bd_rtout / hz; 946 tv->tv_usec = (d->bd_rtout % hz) * tick; 947 break; 948 } 949 /* 950 * Get packet stats. 951 */ 952 case BIOCGSTATS: 953 { 954 struct bpf_stat *bs = addr; 955 956 bs->bs_recv = d->bd_rcount; 957 bs->bs_drop = d->bd_dcount; 958 bs->bs_capt = d->bd_ccount; 959 break; 960 } 961 962 case BIOCGSTATSOLD: 963 { 964 struct bpf_stat_old *bs = addr; 965 966 bs->bs_recv = d->bd_rcount; 967 bs->bs_drop = d->bd_dcount; 968 break; 969 } 970 971 /* 972 * Set immediate mode. 973 */ 974 case BIOCIMMEDIATE: 975 d->bd_immediate = *(u_int *)addr; 976 break; 977 978 case BIOCVERSION: 979 { 980 struct bpf_version *bv = addr; 981 982 bv->bv_major = BPF_MAJOR_VERSION; 983 bv->bv_minor = BPF_MINOR_VERSION; 984 break; 985 } 986 987 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 988 *(u_int *)addr = d->bd_hdrcmplt; 989 break; 990 991 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 992 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 993 break; 994 995 /* 996 * Get "see sent packets" flag 997 */ 998 case BIOCGSEESENT: 999 *(u_int *)addr = d->bd_seesent; 1000 break; 1001 1002 /* 1003 * Set "see sent" packets flag 1004 */ 1005 case BIOCSSEESENT: 1006 d->bd_seesent = *(u_int *)addr; 1007 break; 1008 1009 /* 1010 * Set "feed packets from bpf back to input" mode 1011 */ 1012 case BIOCSFEEDBACK: 1013 d->bd_feedback = *(u_int *)addr; 1014 break; 1015 1016 /* 1017 * Get "feed packets from bpf back to input" mode 1018 */ 1019 case BIOCGFEEDBACK: 1020 *(u_int *)addr = d->bd_feedback; 1021 break; 1022 1023 case FIONBIO: /* Non-blocking I/O */ 1024 /* 1025 * No need to do anything special as we use IO_NDELAY in 1026 * bpfread() as an indication of whether or not to block 1027 * the read. 1028 */ 1029 break; 1030 1031 case FIOASYNC: /* Send signal on receive packets */ 1032 d->bd_async = *(int *)addr; 1033 break; 1034 1035 case TIOCSPGRP: /* Process or group to send signals to */ 1036 case FIOSETOWN: 1037 error = fsetown(&d->bd_pgid, cmd, addr); 1038 break; 1039 1040 case TIOCGPGRP: 1041 case FIOGETOWN: 1042 error = fgetown(d->bd_pgid, cmd, addr); 1043 break; 1044 } 1045 KERNEL_UNLOCK_ONE(NULL); 1046 return (error); 1047 } 1048 1049 /* 1050 * Set d's packet filter program to fp. If this file already has a filter, 1051 * free it and replace it. Returns EINVAL for bogus requests. 1052 */ 1053 int 1054 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1055 { 1056 struct bpf_insn *fcode, *old; 1057 size_t flen, size; 1058 int s; 1059 1060 flen = fp->bf_len; 1061 1062 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) { 1063 return EINVAL; 1064 } 1065 1066 if (flen) { 1067 /* 1068 * Allocate the buffer, copy the byte-code from 1069 * userspace and validate it. 1070 */ 1071 size = flen * sizeof(*fp->bf_insns); 1072 fcode = malloc(size, M_DEVBUF, M_WAITOK); 1073 if (copyin(fp->bf_insns, fcode, size) != 0 || 1074 !bpf_validate(fcode, (int)flen)) { 1075 free(fcode, M_DEVBUF); 1076 return EINVAL; 1077 } 1078 } else { 1079 fcode = NULL; 1080 } 1081 1082 s = splnet(); 1083 old = d->bd_filter; 1084 d->bd_filter = fcode; 1085 reset_d(d); 1086 splx(s); 1087 1088 if (old) { 1089 free(old, M_DEVBUF); 1090 } 1091 1092 return 0; 1093 } 1094 1095 /* 1096 * Detach a file from its current interface (if attached at all) and attach 1097 * to the interface indicated by the name stored in ifr. 1098 * Return an errno or 0. 1099 */ 1100 static int 1101 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1102 { 1103 struct bpf_if *bp; 1104 char *cp; 1105 int unit_seen, i, s, error; 1106 1107 /* 1108 * Make sure the provided name has a unit number, and default 1109 * it to '0' if not specified. 1110 * XXX This is ugly ... do this differently? 1111 */ 1112 unit_seen = 0; 1113 cp = ifr->ifr_name; 1114 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1115 while (*cp++) 1116 if (*cp >= '0' && *cp <= '9') 1117 unit_seen = 1; 1118 if (!unit_seen) { 1119 /* Make sure to leave room for the '\0'. */ 1120 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1121 if ((ifr->ifr_name[i] >= 'a' && 1122 ifr->ifr_name[i] <= 'z') || 1123 (ifr->ifr_name[i] >= 'A' && 1124 ifr->ifr_name[i] <= 'Z')) 1125 continue; 1126 ifr->ifr_name[i] = '0'; 1127 } 1128 } 1129 1130 /* 1131 * Look through attached interfaces for the named one. 1132 */ 1133 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1134 struct ifnet *ifp = bp->bif_ifp; 1135 1136 if (ifp == 0 || 1137 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1138 continue; 1139 /* skip additional entry */ 1140 if (bp->bif_driverp != &ifp->if_bpf) 1141 continue; 1142 /* 1143 * We found the requested interface. 1144 * Allocate the packet buffers if we need to. 1145 * If we're already attached to requested interface, 1146 * just flush the buffer. 1147 */ 1148 if (d->bd_sbuf == 0) { 1149 error = bpf_allocbufs(d); 1150 if (error != 0) 1151 return (error); 1152 } 1153 s = splnet(); 1154 if (bp != d->bd_bif) { 1155 if (d->bd_bif) 1156 /* 1157 * Detach if attached to something else. 1158 */ 1159 bpf_detachd(d); 1160 1161 bpf_attachd(d, bp); 1162 } 1163 reset_d(d); 1164 splx(s); 1165 return (0); 1166 } 1167 /* Not found. */ 1168 return (ENXIO); 1169 } 1170 1171 /* 1172 * Copy the interface name to the ifreq. 1173 */ 1174 static void 1175 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1176 { 1177 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1178 } 1179 1180 static int 1181 bpf_stat(struct file *fp, struct stat *st) 1182 { 1183 struct bpf_d *d = fp->f_data; 1184 1185 (void)memset(st, 0, sizeof(*st)); 1186 KERNEL_LOCK(1, NULL); 1187 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1188 st->st_atimespec = d->bd_atime; 1189 st->st_mtimespec = d->bd_mtime; 1190 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1191 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1192 st->st_gid = kauth_cred_getegid(fp->f_cred); 1193 st->st_mode = S_IFCHR; 1194 KERNEL_UNLOCK_ONE(NULL); 1195 return 0; 1196 } 1197 1198 /* 1199 * Support for poll() system call 1200 * 1201 * Return true iff the specific operation will not block indefinitely - with 1202 * the assumption that it is safe to positively acknowledge a request for the 1203 * ability to write to the BPF device. 1204 * Otherwise, return false but make a note that a selnotify() must be done. 1205 */ 1206 static int 1207 bpf_poll(struct file *fp, int events) 1208 { 1209 struct bpf_d *d = fp->f_data; 1210 int s = splnet(); 1211 int revents; 1212 1213 /* 1214 * Refresh the PID associated with this bpf file. 1215 */ 1216 KERNEL_LOCK(1, NULL); 1217 d->bd_pid = curproc->p_pid; 1218 1219 revents = events & (POLLOUT | POLLWRNORM); 1220 if (events & (POLLIN | POLLRDNORM)) { 1221 /* 1222 * An imitation of the FIONREAD ioctl code. 1223 */ 1224 if (d->bd_hlen != 0 || 1225 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1226 d->bd_slen != 0)) { 1227 revents |= events & (POLLIN | POLLRDNORM); 1228 } else { 1229 selrecord(curlwp, &d->bd_sel); 1230 /* Start the read timeout if necessary */ 1231 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1232 callout_reset(&d->bd_callout, d->bd_rtout, 1233 bpf_timed_out, d); 1234 d->bd_state = BPF_WAITING; 1235 } 1236 } 1237 } 1238 1239 KERNEL_UNLOCK_ONE(NULL); 1240 splx(s); 1241 return (revents); 1242 } 1243 1244 static void 1245 filt_bpfrdetach(struct knote *kn) 1246 { 1247 struct bpf_d *d = kn->kn_hook; 1248 int s; 1249 1250 KERNEL_LOCK(1, NULL); 1251 s = splnet(); 1252 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1253 splx(s); 1254 KERNEL_UNLOCK_ONE(NULL); 1255 } 1256 1257 static int 1258 filt_bpfread(struct knote *kn, long hint) 1259 { 1260 struct bpf_d *d = kn->kn_hook; 1261 int rv; 1262 1263 KERNEL_LOCK(1, NULL); 1264 kn->kn_data = d->bd_hlen; 1265 if (d->bd_immediate) 1266 kn->kn_data += d->bd_slen; 1267 rv = (kn->kn_data > 0); 1268 KERNEL_UNLOCK_ONE(NULL); 1269 return rv; 1270 } 1271 1272 static const struct filterops bpfread_filtops = 1273 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1274 1275 static int 1276 bpf_kqfilter(struct file *fp, struct knote *kn) 1277 { 1278 struct bpf_d *d = fp->f_data; 1279 struct klist *klist; 1280 int s; 1281 1282 KERNEL_LOCK(1, NULL); 1283 1284 switch (kn->kn_filter) { 1285 case EVFILT_READ: 1286 klist = &d->bd_sel.sel_klist; 1287 kn->kn_fop = &bpfread_filtops; 1288 break; 1289 1290 default: 1291 KERNEL_UNLOCK_ONE(NULL); 1292 return (EINVAL); 1293 } 1294 1295 kn->kn_hook = d; 1296 1297 s = splnet(); 1298 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1299 splx(s); 1300 KERNEL_UNLOCK_ONE(NULL); 1301 1302 return (0); 1303 } 1304 1305 /* 1306 * Copy data from an mbuf chain into a buffer. This code is derived 1307 * from m_copydata in sys/uipc_mbuf.c. 1308 */ 1309 static void * 1310 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1311 { 1312 const struct mbuf *m; 1313 u_int count; 1314 u_char *dst; 1315 1316 m = src_arg; 1317 dst = dst_arg; 1318 while (len > 0) { 1319 if (m == NULL) 1320 panic("bpf_mcpy"); 1321 count = min(m->m_len, len); 1322 memcpy(dst, mtod(m, const void *), count); 1323 m = m->m_next; 1324 dst += count; 1325 len -= count; 1326 } 1327 return dst_arg; 1328 } 1329 1330 /* 1331 * Dispatch a packet to all the listeners on interface bp. 1332 * 1333 * pkt pointer to the packet, either a data buffer or an mbuf chain 1334 * buflen buffer length, if pkt is a data buffer 1335 * cpfn a function that can copy pkt into the listener's buffer 1336 * pktlen length of the packet 1337 * rcv true if packet came in 1338 */ 1339 static inline void 1340 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1341 void *pkt, u_int pktlen, u_int buflen, const bool rcv) 1342 { 1343 struct bpf_d *d; 1344 struct timespec ts; 1345 bool gottime = false; 1346 1347 /* 1348 * Note that the IPL does not have to be raised at this point. 1349 * The only problem that could arise here is that if two different 1350 * interfaces shared any data. This is not the case. 1351 */ 1352 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) { 1353 u_int slen; 1354 1355 if (!d->bd_seesent && !rcv) { 1356 continue; 1357 } 1358 d->bd_rcount++; 1359 bpf_gstats.bs_recv++; 1360 1361 slen = bpf_filter(d->bd_filter, pkt, pktlen, buflen); 1362 if (!slen) { 1363 continue; 1364 } 1365 if (!gottime) { 1366 gottime = true; 1367 nanotime(&ts); 1368 } 1369 catchpacket(d, pkt, pktlen, slen, cpfn, &ts); 1370 } 1371 } 1372 1373 /* 1374 * Incoming linkage from device drivers. Process the packet pkt, of length 1375 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1376 * by each process' filter, and if accepted, stashed into the corresponding 1377 * buffer. 1378 */ 1379 static void 1380 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1381 { 1382 1383 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true); 1384 } 1385 1386 /* 1387 * Incoming linkage from device drivers, when the head of the packet is in 1388 * a buffer, and the tail is in an mbuf chain. 1389 */ 1390 static void 1391 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1392 { 1393 u_int pktlen; 1394 struct mbuf mb; 1395 1396 /* Skip outgoing duplicate packets. */ 1397 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1398 m->m_flags &= ~M_PROMISC; 1399 return; 1400 } 1401 1402 pktlen = m_length(m) + dlen; 1403 1404 /* 1405 * Craft on-stack mbuf suitable for passing to bpf_filter. 1406 * Note that we cut corners here; we only setup what's 1407 * absolutely needed--this mbuf should never go anywhere else. 1408 */ 1409 (void)memset(&mb, 0, sizeof(mb)); 1410 mb.m_next = m; 1411 mb.m_data = data; 1412 mb.m_len = dlen; 1413 1414 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif != NULL); 1415 } 1416 1417 /* 1418 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1419 */ 1420 static void 1421 _bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1422 { 1423 void *(*cpfn)(void *, const void *, size_t); 1424 u_int pktlen, buflen; 1425 void *marg; 1426 1427 /* Skip outgoing duplicate packets. */ 1428 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1429 m->m_flags &= ~M_PROMISC; 1430 return; 1431 } 1432 1433 pktlen = m_length(m); 1434 1435 if (pktlen == m->m_len) { 1436 cpfn = (void *)memcpy; 1437 marg = mtod(m, void *); 1438 buflen = pktlen; 1439 } else { 1440 cpfn = bpf_mcpy; 1441 marg = m; 1442 buflen = 0; 1443 } 1444 1445 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif != NULL); 1446 } 1447 1448 /* 1449 * We need to prepend the address family as 1450 * a four byte field. Cons up a dummy header 1451 * to pacify bpf. This is safe because bpf 1452 * will only read from the mbuf (i.e., it won't 1453 * try to free it or keep a pointer a to it). 1454 */ 1455 static void 1456 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1457 { 1458 struct mbuf m0; 1459 1460 m0.m_flags = 0; 1461 m0.m_next = m; 1462 m0.m_len = 4; 1463 m0.m_data = (char *)⁡ 1464 1465 _bpf_mtap(bp, &m0); 1466 } 1467 1468 /* 1469 * Put the SLIP pseudo-"link header" in place. 1470 * Note this M_PREPEND() should never fail, 1471 * swince we know we always have enough space 1472 * in the input buffer. 1473 */ 1474 static void 1475 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1476 { 1477 int s; 1478 u_char *hp; 1479 1480 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1481 if (*m == NULL) 1482 return; 1483 1484 hp = mtod(*m, u_char *); 1485 hp[SLX_DIR] = SLIPDIR_IN; 1486 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1487 1488 s = splnet(); 1489 _bpf_mtap(bp, *m); 1490 splx(s); 1491 1492 m_adj(*m, SLIP_HDRLEN); 1493 } 1494 1495 /* 1496 * Put the SLIP pseudo-"link header" in 1497 * place. The compressed header is now 1498 * at the beginning of the mbuf. 1499 */ 1500 static void 1501 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1502 { 1503 struct mbuf m0; 1504 u_char *hp; 1505 int s; 1506 1507 m0.m_flags = 0; 1508 m0.m_next = m; 1509 m0.m_data = m0.m_dat; 1510 m0.m_len = SLIP_HDRLEN; 1511 1512 hp = mtod(&m0, u_char *); 1513 1514 hp[SLX_DIR] = SLIPDIR_OUT; 1515 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1516 1517 s = splnet(); 1518 _bpf_mtap(bp, &m0); 1519 splx(s); 1520 m_freem(m); 1521 } 1522 1523 static int 1524 bpf_hdrlen(struct bpf_d *d) 1525 { 1526 int hdrlen = d->bd_bif->bif_hdrlen; 1527 /* 1528 * Compute the length of the bpf header. This is not necessarily 1529 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1530 * that the network layer header begins on a longword boundary (for 1531 * performance reasons and to alleviate alignment restrictions). 1532 */ 1533 #ifdef _LP64 1534 if (d->bd_compat32) 1535 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen); 1536 else 1537 #endif 1538 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen); 1539 } 1540 1541 /* 1542 * Move the packet data from interface memory (pkt) into the 1543 * store buffer. Call the wakeup functions if it's time to wakeup 1544 * a listener (buffer full), "cpfn" is the routine called to do the 1545 * actual data transfer. memcpy is passed in to copy contiguous chunks, 1546 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1547 * pkt is really an mbuf. 1548 */ 1549 static void 1550 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1551 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1552 { 1553 struct bpf_hdr *hp; 1554 #ifdef _LP64 1555 struct bpf_hdr32 *hp32; 1556 #endif 1557 int totlen, curlen; 1558 int hdrlen = bpf_hdrlen(d); 1559 int do_wakeup = 0; 1560 1561 ++d->bd_ccount; 1562 ++bpf_gstats.bs_capt; 1563 /* 1564 * Figure out how many bytes to move. If the packet is 1565 * greater or equal to the snapshot length, transfer that 1566 * much. Otherwise, transfer the whole packet (unless 1567 * we hit the buffer size limit). 1568 */ 1569 totlen = hdrlen + min(snaplen, pktlen); 1570 if (totlen > d->bd_bufsize) 1571 totlen = d->bd_bufsize; 1572 1573 /* 1574 * Round up the end of the previous packet to the next longword. 1575 */ 1576 #ifdef _LP64 1577 if (d->bd_compat32) 1578 curlen = BPF_WORDALIGN32(d->bd_slen); 1579 else 1580 #endif 1581 curlen = BPF_WORDALIGN(d->bd_slen); 1582 if (curlen + totlen > d->bd_bufsize) { 1583 /* 1584 * This packet will overflow the storage buffer. 1585 * Rotate the buffers if we can, then wakeup any 1586 * pending reads. 1587 */ 1588 if (d->bd_fbuf == 0) { 1589 /* 1590 * We haven't completed the previous read yet, 1591 * so drop the packet. 1592 */ 1593 ++d->bd_dcount; 1594 ++bpf_gstats.bs_drop; 1595 return; 1596 } 1597 ROTATE_BUFFERS(d); 1598 do_wakeup = 1; 1599 curlen = 0; 1600 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1601 /* 1602 * Immediate mode is set, or the read timeout has 1603 * already expired during a select call. A packet 1604 * arrived, so the reader should be woken up. 1605 */ 1606 do_wakeup = 1; 1607 } 1608 1609 /* 1610 * Append the bpf header. 1611 */ 1612 #ifdef _LP64 1613 if (d->bd_compat32) { 1614 hp32 = (struct bpf_hdr32 *)((char *)d->bd_sbuf + curlen); 1615 hp32->bh_tstamp.tv_sec = ts->tv_sec; 1616 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1617 hp32->bh_datalen = pktlen; 1618 hp32->bh_hdrlen = hdrlen; 1619 /* 1620 * Copy the packet data into the store buffer and update its length. 1621 */ 1622 (*cpfn)((u_char *)hp32 + hdrlen, pkt, 1623 (hp32->bh_caplen = totlen - hdrlen)); 1624 } else 1625 #endif 1626 { 1627 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 1628 hp->bh_tstamp.tv_sec = ts->tv_sec; 1629 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1630 hp->bh_datalen = pktlen; 1631 hp->bh_hdrlen = hdrlen; 1632 /* 1633 * Copy the packet data into the store buffer and update 1634 * its length. 1635 */ 1636 (*cpfn)((u_char *)hp + hdrlen, pkt, 1637 (hp->bh_caplen = totlen - hdrlen)); 1638 } 1639 d->bd_slen = curlen + totlen; 1640 1641 /* 1642 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1643 * will cause filt_bpfread() to be called with it adjusted. 1644 */ 1645 if (do_wakeup) 1646 bpf_wakeup(d); 1647 } 1648 1649 /* 1650 * Initialize all nonzero fields of a descriptor. 1651 */ 1652 static int 1653 bpf_allocbufs(struct bpf_d *d) 1654 { 1655 1656 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL); 1657 if (!d->bd_fbuf) 1658 return (ENOBUFS); 1659 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK | M_CANFAIL); 1660 if (!d->bd_sbuf) { 1661 free(d->bd_fbuf, M_DEVBUF); 1662 return (ENOBUFS); 1663 } 1664 d->bd_slen = 0; 1665 d->bd_hlen = 0; 1666 return (0); 1667 } 1668 1669 /* 1670 * Free buffers currently in use by a descriptor. 1671 * Called on close. 1672 */ 1673 static void 1674 bpf_freed(struct bpf_d *d) 1675 { 1676 /* 1677 * We don't need to lock out interrupts since this descriptor has 1678 * been detached from its interface and it yet hasn't been marked 1679 * free. 1680 */ 1681 if (d->bd_sbuf != NULL) { 1682 free(d->bd_sbuf, M_DEVBUF); 1683 if (d->bd_hbuf != NULL) 1684 free(d->bd_hbuf, M_DEVBUF); 1685 if (d->bd_fbuf != NULL) 1686 free(d->bd_fbuf, M_DEVBUF); 1687 } 1688 if (d->bd_filter) 1689 free(d->bd_filter, M_DEVBUF); 1690 } 1691 1692 /* 1693 * Attach an interface to bpf. dlt is the link layer type; 1694 * hdrlen is the fixed size of the link header for the specified dlt 1695 * (variable length headers not yet supported). 1696 */ 1697 static void 1698 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1699 { 1700 struct bpf_if *bp; 1701 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); 1702 if (bp == 0) 1703 panic("bpfattach"); 1704 1705 bp->bif_dlist = 0; 1706 bp->bif_driverp = driverp; 1707 bp->bif_ifp = ifp; 1708 bp->bif_dlt = dlt; 1709 1710 bp->bif_next = bpf_iflist; 1711 bpf_iflist = bp; 1712 1713 *bp->bif_driverp = 0; 1714 1715 bp->bif_hdrlen = hdrlen; 1716 #if 0 1717 printf("bpf: %s attached\n", ifp->if_xname); 1718 #endif 1719 } 1720 1721 /* 1722 * Remove an interface from bpf. 1723 */ 1724 static void 1725 _bpfdetach(struct ifnet *ifp) 1726 { 1727 struct bpf_if *bp, **pbp; 1728 struct bpf_d *d; 1729 int s; 1730 1731 /* Nuke the vnodes for any open instances */ 1732 LIST_FOREACH(d, &bpf_list, bd_list) { 1733 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 1734 /* 1735 * Detach the descriptor from an interface now. 1736 * It will be free'ed later by close routine. 1737 */ 1738 s = splnet(); 1739 d->bd_promisc = 0; /* we can't touch device. */ 1740 bpf_detachd(d); 1741 splx(s); 1742 } 1743 } 1744 1745 again: 1746 for (bp = bpf_iflist, pbp = &bpf_iflist; 1747 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { 1748 if (bp->bif_ifp == ifp) { 1749 *pbp = bp->bif_next; 1750 free(bp, M_DEVBUF); 1751 goto again; 1752 } 1753 } 1754 } 1755 1756 /* 1757 * Change the data link type of a interface. 1758 */ 1759 static void 1760 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1761 { 1762 struct bpf_if *bp; 1763 1764 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1765 if (bp->bif_driverp == &ifp->if_bpf) 1766 break; 1767 } 1768 if (bp == NULL) 1769 panic("bpf_change_type"); 1770 1771 bp->bif_dlt = dlt; 1772 1773 bp->bif_hdrlen = hdrlen; 1774 } 1775 1776 /* 1777 * Get a list of available data link type of the interface. 1778 */ 1779 static int 1780 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1781 { 1782 int n, error; 1783 struct ifnet *ifp; 1784 struct bpf_if *bp; 1785 1786 ifp = d->bd_bif->bif_ifp; 1787 n = 0; 1788 error = 0; 1789 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1790 if (bp->bif_ifp != ifp) 1791 continue; 1792 if (bfl->bfl_list != NULL) { 1793 if (n >= bfl->bfl_len) 1794 return ENOMEM; 1795 error = copyout(&bp->bif_dlt, 1796 bfl->bfl_list + n, sizeof(u_int)); 1797 } 1798 n++; 1799 } 1800 bfl->bfl_len = n; 1801 return error; 1802 } 1803 1804 /* 1805 * Set the data link type of a BPF instance. 1806 */ 1807 static int 1808 bpf_setdlt(struct bpf_d *d, u_int dlt) 1809 { 1810 int s, error, opromisc; 1811 struct ifnet *ifp; 1812 struct bpf_if *bp; 1813 1814 if (d->bd_bif->bif_dlt == dlt) 1815 return 0; 1816 ifp = d->bd_bif->bif_ifp; 1817 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1818 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1819 break; 1820 } 1821 if (bp == NULL) 1822 return EINVAL; 1823 s = splnet(); 1824 opromisc = d->bd_promisc; 1825 bpf_detachd(d); 1826 bpf_attachd(d, bp); 1827 reset_d(d); 1828 if (opromisc) { 1829 error = ifpromisc(bp->bif_ifp, 1); 1830 if (error) 1831 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 1832 bp->bif_ifp->if_xname, error); 1833 else 1834 d->bd_promisc = 1; 1835 } 1836 splx(s); 1837 return 0; 1838 } 1839 1840 static int 1841 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 1842 { 1843 int newsize, error; 1844 struct sysctlnode node; 1845 1846 node = *rnode; 1847 node.sysctl_data = &newsize; 1848 newsize = bpf_maxbufsize; 1849 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1850 if (error || newp == NULL) 1851 return (error); 1852 1853 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 1854 return (EINVAL); 1855 1856 bpf_maxbufsize = newsize; 1857 1858 return (0); 1859 } 1860 1861 static int 1862 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 1863 { 1864 int error, elem_count; 1865 struct bpf_d *dp; 1866 struct bpf_d_ext dpe; 1867 size_t len, needed, elem_size, out_size; 1868 char *sp; 1869 1870 if (namelen == 1 && name[0] == CTL_QUERY) 1871 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1872 1873 if (namelen != 2) 1874 return (EINVAL); 1875 1876 /* BPF peers is privileged information. */ 1877 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 1878 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 1879 if (error) 1880 return (EPERM); 1881 1882 len = (oldp != NULL) ? *oldlenp : 0; 1883 sp = oldp; 1884 elem_size = name[0]; 1885 elem_count = name[1]; 1886 out_size = MIN(sizeof(dpe), elem_size); 1887 needed = 0; 1888 1889 if (elem_size < 1 || elem_count < 0) 1890 return (EINVAL); 1891 1892 mutex_enter(&bpf_mtx); 1893 LIST_FOREACH(dp, &bpf_list, bd_list) { 1894 if (len >= elem_size && elem_count > 0) { 1895 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 1896 BPF_EXT(bufsize); 1897 BPF_EXT(promisc); 1898 BPF_EXT(state); 1899 BPF_EXT(immediate); 1900 BPF_EXT(hdrcmplt); 1901 BPF_EXT(seesent); 1902 BPF_EXT(pid); 1903 BPF_EXT(rcount); 1904 BPF_EXT(dcount); 1905 BPF_EXT(ccount); 1906 #undef BPF_EXT 1907 if (dp->bd_bif) 1908 (void)strlcpy(dpe.bde_ifname, 1909 dp->bd_bif->bif_ifp->if_xname, 1910 IFNAMSIZ - 1); 1911 else 1912 dpe.bde_ifname[0] = '\0'; 1913 1914 error = copyout(&dpe, sp, out_size); 1915 if (error) 1916 break; 1917 sp += elem_size; 1918 len -= elem_size; 1919 } 1920 needed += elem_size; 1921 if (elem_count > 0 && elem_count != INT_MAX) 1922 elem_count--; 1923 } 1924 mutex_exit(&bpf_mtx); 1925 1926 *oldlenp = needed; 1927 1928 return (error); 1929 } 1930 1931 static struct sysctllog *bpf_sysctllog; 1932 static void 1933 sysctl_net_bpf_setup(void) 1934 { 1935 const struct sysctlnode *node; 1936 1937 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1938 CTLFLAG_PERMANENT, 1939 CTLTYPE_NODE, "net", NULL, 1940 NULL, 0, NULL, 0, 1941 CTL_NET, CTL_EOL); 1942 1943 node = NULL; 1944 sysctl_createv(&bpf_sysctllog, 0, NULL, &node, 1945 CTLFLAG_PERMANENT, 1946 CTLTYPE_NODE, "bpf", 1947 SYSCTL_DESCR("BPF options"), 1948 NULL, 0, NULL, 0, 1949 CTL_NET, CTL_CREATE, CTL_EOL); 1950 if (node != NULL) { 1951 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1952 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1953 CTLTYPE_INT, "maxbufsize", 1954 SYSCTL_DESCR("Maximum size for data capture buffer"), 1955 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 1956 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1957 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1958 CTLFLAG_PERMANENT, 1959 CTLTYPE_STRUCT, "stats", 1960 SYSCTL_DESCR("BPF stats"), 1961 NULL, 0, &bpf_gstats, sizeof(bpf_gstats), 1962 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1963 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1964 CTLFLAG_PERMANENT, 1965 CTLTYPE_STRUCT, "peers", 1966 SYSCTL_DESCR("BPF peers"), 1967 sysctl_net_bpf_peers, 0, NULL, 0, 1968 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1969 } 1970 1971 } 1972 1973 struct bpf_ops bpf_ops_kernel = { 1974 .bpf_attach = _bpfattach, 1975 .bpf_detach = _bpfdetach, 1976 .bpf_change_type = _bpf_change_type, 1977 1978 .bpf_tap = _bpf_tap, 1979 .bpf_mtap = _bpf_mtap, 1980 .bpf_mtap2 = _bpf_mtap2, 1981 .bpf_mtap_af = _bpf_mtap_af, 1982 .bpf_mtap_sl_in = _bpf_mtap_sl_in, 1983 .bpf_mtap_sl_out = _bpf_mtap_sl_out, 1984 }; 1985 1986 MODULE(MODULE_CLASS_DRIVER, bpf, NULL); 1987 1988 static int 1989 bpf_modcmd(modcmd_t cmd, void *arg) 1990 { 1991 devmajor_t bmajor, cmajor; 1992 int error; 1993 1994 bmajor = cmajor = NODEVMAJOR; 1995 1996 switch (cmd) { 1997 case MODULE_CMD_INIT: 1998 bpfilterattach(0); 1999 error = devsw_attach("bpf", NULL, &bmajor, 2000 &bpf_cdevsw, &cmajor); 2001 if (error == EEXIST) 2002 error = 0; /* maybe built-in ... improve eventually */ 2003 if (error) 2004 break; 2005 2006 bpf_ops_handover_enter(&bpf_ops_kernel); 2007 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 2008 bpf_ops_handover_exit(); 2009 sysctl_net_bpf_setup(); 2010 break; 2011 2012 case MODULE_CMD_FINI: 2013 /* 2014 * While there is no reference counting for bpf callers, 2015 * unload could at least in theory be done similarly to 2016 * system call disestablishment. This should even be 2017 * a little simpler: 2018 * 2019 * 1) replace op vector with stubs 2020 * 2) post update to all cpus with xc 2021 * 3) check that nobody is in bpf anymore 2022 * (it's doubtful we'd want something like l_sysent, 2023 * but we could do something like *signed* percpu 2024 * counters. if the sum is 0, we're good). 2025 * 4) if fail, unroll changes 2026 * 2027 * NOTE: change won't be atomic to the outside. some 2028 * packets may be not captured even if unload is 2029 * not succesful. I think packet capture not working 2030 * is a perfectly logical consequence of trying to 2031 * disable packet capture. 2032 */ 2033 error = EOPNOTSUPP; 2034 /* insert sysctl teardown */ 2035 break; 2036 2037 default: 2038 error = ENOTTY; 2039 break; 2040 } 2041 2042 return error; 2043 } 2044