1 /* $NetBSD: bpf.c,v 1.155 2010/01/26 01:06:23 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.155 2010/01/26 01:06:23 pooka Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/buf.h> 54 #include <sys/time.h> 55 #include <sys/proc.h> 56 #include <sys/ioctl.h> 57 #include <sys/conf.h> 58 #include <sys/vnode.h> 59 #include <sys/queue.h> 60 #include <sys/stat.h> 61 #include <sys/module.h> 62 #include <sys/once.h> 63 #include <sys/atomic.h> 64 65 #include <sys/file.h> 66 #include <sys/filedesc.h> 67 #include <sys/tty.h> 68 #include <sys/uio.h> 69 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/errno.h> 73 #include <sys/kernel.h> 74 #include <sys/poll.h> 75 #include <sys/sysctl.h> 76 #include <sys/kauth.h> 77 78 #include <net/if.h> 79 #include <net/slip.h> 80 81 #include <net/bpf.h> 82 #include <net/bpfdesc.h> 83 84 #include <net/if_arc.h> 85 #include <net/if_ether.h> 86 87 #include <netinet/in.h> 88 #include <netinet/if_inarp.h> 89 90 91 #include <compat/sys/sockio.h> 92 93 #ifndef BPF_BUFSIZE 94 /* 95 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 96 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 97 */ 98 # define BPF_BUFSIZE 32768 99 #endif 100 101 #define PRINET 26 /* interruptible */ 102 103 /* 104 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 105 * XXX the default values should be computed dynamically based 106 * on available memory size and available mbuf clusters. 107 */ 108 int bpf_bufsize = BPF_BUFSIZE; 109 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 110 111 112 /* 113 * Global BPF statistics returned by net.bpf.stats sysctl. 114 */ 115 struct bpf_stat bpf_gstats; 116 117 /* 118 * Use a mutex to avoid a race condition between gathering the stats/peers 119 * and opening/closing the device. 120 */ 121 static kmutex_t bpf_mtx; 122 123 /* 124 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 125 * bpf_dtab holds the descriptors, indexed by minor device # 126 */ 127 struct bpf_if *bpf_iflist; 128 LIST_HEAD(, bpf_d) bpf_list; 129 130 static int bpf_allocbufs(struct bpf_d *); 131 static void bpf_deliver(struct bpf_if *, 132 void *(*cpfn)(void *, const void *, size_t), 133 void *, u_int, u_int, struct ifnet *); 134 static void bpf_freed(struct bpf_d *); 135 static void bpf_ifname(struct ifnet *, struct ifreq *); 136 static void *bpf_mcpy(void *, const void *, size_t); 137 static int bpf_movein(struct uio *, int, int, 138 struct mbuf **, struct sockaddr *); 139 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 140 static void bpf_detachd(struct bpf_d *); 141 static int bpf_setif(struct bpf_d *, struct ifreq *); 142 static void bpf_timed_out(void *); 143 static inline void 144 bpf_wakeup(struct bpf_d *); 145 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 146 void *(*)(void *, const void *, size_t), struct timespec *); 147 static void reset_d(struct bpf_d *); 148 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 149 static int bpf_setdlt(struct bpf_d *, u_int); 150 151 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 152 int); 153 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 154 int); 155 static int bpf_ioctl(struct file *, u_long, void *); 156 static int bpf_poll(struct file *, int); 157 static int bpf_stat(struct file *, struct stat *); 158 static int bpf_close(struct file *); 159 static int bpf_kqfilter(struct file *, struct knote *); 160 static void bpf_softintr(void *); 161 162 static const struct fileops bpf_fileops = { 163 .fo_read = bpf_read, 164 .fo_write = bpf_write, 165 .fo_ioctl = bpf_ioctl, 166 .fo_fcntl = fnullop_fcntl, 167 .fo_poll = bpf_poll, 168 .fo_stat = bpf_stat, 169 .fo_close = bpf_close, 170 .fo_kqfilter = bpf_kqfilter, 171 .fo_restart = fnullop_restart, 172 }; 173 174 dev_type_open(bpfopen); 175 176 const struct cdevsw bpf_cdevsw = { 177 bpfopen, noclose, noread, nowrite, noioctl, 178 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER 179 }; 180 181 static int 182 bpf_movein(struct uio *uio, int linktype, int mtu, struct mbuf **mp, 183 struct sockaddr *sockp) 184 { 185 struct mbuf *m; 186 int error; 187 int len; 188 int hlen; 189 int align; 190 191 /* 192 * Build a sockaddr based on the data link layer type. 193 * We do this at this level because the ethernet header 194 * is copied directly into the data field of the sockaddr. 195 * In the case of SLIP, there is no header and the packet 196 * is forwarded as is. 197 * Also, we are careful to leave room at the front of the mbuf 198 * for the link level header. 199 */ 200 switch (linktype) { 201 202 case DLT_SLIP: 203 sockp->sa_family = AF_INET; 204 hlen = 0; 205 align = 0; 206 break; 207 208 case DLT_PPP: 209 sockp->sa_family = AF_UNSPEC; 210 hlen = 0; 211 align = 0; 212 break; 213 214 case DLT_EN10MB: 215 sockp->sa_family = AF_UNSPEC; 216 /* XXX Would MAXLINKHDR be better? */ 217 /* 6(dst)+6(src)+2(type) */ 218 hlen = sizeof(struct ether_header); 219 align = 2; 220 break; 221 222 case DLT_ARCNET: 223 sockp->sa_family = AF_UNSPEC; 224 hlen = ARC_HDRLEN; 225 align = 5; 226 break; 227 228 case DLT_FDDI: 229 sockp->sa_family = AF_LINK; 230 /* XXX 4(FORMAC)+6(dst)+6(src) */ 231 hlen = 16; 232 align = 0; 233 break; 234 235 case DLT_ECONET: 236 sockp->sa_family = AF_UNSPEC; 237 hlen = 6; 238 align = 2; 239 break; 240 241 case DLT_NULL: 242 sockp->sa_family = AF_UNSPEC; 243 hlen = 0; 244 align = 0; 245 break; 246 247 default: 248 return (EIO); 249 } 250 251 len = uio->uio_resid; 252 /* 253 * If there aren't enough bytes for a link level header or the 254 * packet length exceeds the interface mtu, return an error. 255 */ 256 if (len < hlen || len - hlen > mtu) 257 return (EMSGSIZE); 258 259 /* 260 * XXX Avoid complicated buffer chaining --- 261 * bail if it won't fit in a single mbuf. 262 * (Take into account possible alignment bytes) 263 */ 264 if ((unsigned)len > MCLBYTES - align) 265 return (EIO); 266 267 m = m_gethdr(M_WAIT, MT_DATA); 268 m->m_pkthdr.rcvif = 0; 269 m->m_pkthdr.len = len - hlen; 270 if (len > MHLEN - align) { 271 m_clget(m, M_WAIT); 272 if ((m->m_flags & M_EXT) == 0) { 273 error = ENOBUFS; 274 goto bad; 275 } 276 } 277 278 /* Insure the data is properly aligned */ 279 if (align > 0) { 280 m->m_data += align; 281 m->m_len -= align; 282 } 283 284 error = uiomove(mtod(m, void *), len, uio); 285 if (error) 286 goto bad; 287 if (hlen != 0) { 288 memcpy(sockp->sa_data, mtod(m, void *), hlen); 289 m->m_data += hlen; /* XXX */ 290 len -= hlen; 291 } 292 m->m_len = len; 293 *mp = m; 294 return (0); 295 296 bad: 297 m_freem(m); 298 return (error); 299 } 300 301 /* 302 * Attach file to the bpf interface, i.e. make d listen on bp. 303 * Must be called at splnet. 304 */ 305 static void 306 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 307 { 308 /* 309 * Point d at bp, and add d to the interface's list of listeners. 310 * Finally, point the driver's bpf cookie at the interface so 311 * it will divert packets to bpf. 312 */ 313 d->bd_bif = bp; 314 d->bd_next = bp->bif_dlist; 315 bp->bif_dlist = d; 316 317 *bp->bif_driverp = bp; 318 } 319 320 /* 321 * Detach a file from its interface. 322 */ 323 static void 324 bpf_detachd(struct bpf_d *d) 325 { 326 struct bpf_d **p; 327 struct bpf_if *bp; 328 329 bp = d->bd_bif; 330 /* 331 * Check if this descriptor had requested promiscuous mode. 332 * If so, turn it off. 333 */ 334 if (d->bd_promisc) { 335 int error; 336 337 d->bd_promisc = 0; 338 /* 339 * Take device out of promiscuous mode. Since we were 340 * able to enter promiscuous mode, we should be able 341 * to turn it off. But we can get an error if 342 * the interface was configured down, so only panic 343 * if we don't get an unexpected error. 344 */ 345 error = ifpromisc(bp->bif_ifp, 0); 346 if (error && error != EINVAL) 347 panic("%s: ifpromisc failed: %d", __func__, error); 348 } 349 /* Remove d from the interface's descriptor list. */ 350 p = &bp->bif_dlist; 351 while (*p != d) { 352 p = &(*p)->bd_next; 353 if (*p == 0) 354 panic("%s: descriptor not in list", __func__); 355 } 356 *p = (*p)->bd_next; 357 if (bp->bif_dlist == 0) 358 /* 359 * Let the driver know that there are no more listeners. 360 */ 361 *d->bd_bif->bif_driverp = 0; 362 d->bd_bif = 0; 363 } 364 365 static int 366 doinit(void) 367 { 368 369 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 370 371 LIST_INIT(&bpf_list); 372 373 bpf_gstats.bs_recv = 0; 374 bpf_gstats.bs_drop = 0; 375 bpf_gstats.bs_capt = 0; 376 377 return 0; 378 } 379 380 /* 381 * bpfilterattach() is called at boot time. 382 */ 383 /* ARGSUSED */ 384 void 385 bpfilterattach(int n) 386 { 387 static ONCE_DECL(control); 388 389 RUN_ONCE(&control, doinit); 390 } 391 392 /* 393 * Open ethernet device. Clones. 394 */ 395 /* ARGSUSED */ 396 int 397 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 398 { 399 struct bpf_d *d; 400 struct file *fp; 401 int error, fd; 402 403 /* falloc() will use the descriptor for us. */ 404 if ((error = fd_allocfile(&fp, &fd)) != 0) 405 return error; 406 407 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO); 408 d->bd_bufsize = bpf_bufsize; 409 d->bd_seesent = 1; 410 d->bd_pid = l->l_proc->p_pid; 411 getnanotime(&d->bd_btime); 412 d->bd_atime = d->bd_mtime = d->bd_btime; 413 callout_init(&d->bd_callout, 0); 414 selinit(&d->bd_sel); 415 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d); 416 417 mutex_enter(&bpf_mtx); 418 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 419 mutex_exit(&bpf_mtx); 420 421 return fd_clone(fp, fd, flag, &bpf_fileops, d); 422 } 423 424 /* 425 * Close the descriptor by detaching it from its interface, 426 * deallocating its buffers, and marking it free. 427 */ 428 /* ARGSUSED */ 429 static int 430 bpf_close(struct file *fp) 431 { 432 struct bpf_d *d = fp->f_data; 433 int s; 434 435 KERNEL_LOCK(1, NULL); 436 437 /* 438 * Refresh the PID associated with this bpf file. 439 */ 440 d->bd_pid = curproc->p_pid; 441 442 s = splnet(); 443 if (d->bd_state == BPF_WAITING) 444 callout_stop(&d->bd_callout); 445 d->bd_state = BPF_IDLE; 446 if (d->bd_bif) 447 bpf_detachd(d); 448 splx(s); 449 bpf_freed(d); 450 mutex_enter(&bpf_mtx); 451 LIST_REMOVE(d, bd_list); 452 mutex_exit(&bpf_mtx); 453 callout_destroy(&d->bd_callout); 454 seldestroy(&d->bd_sel); 455 softint_disestablish(d->bd_sih); 456 free(d, M_DEVBUF); 457 fp->f_data = NULL; 458 459 KERNEL_UNLOCK_ONE(NULL); 460 461 return (0); 462 } 463 464 /* 465 * Rotate the packet buffers in descriptor d. Move the store buffer 466 * into the hold slot, and the free buffer into the store slot. 467 * Zero the length of the new store buffer. 468 */ 469 #define ROTATE_BUFFERS(d) \ 470 (d)->bd_hbuf = (d)->bd_sbuf; \ 471 (d)->bd_hlen = (d)->bd_slen; \ 472 (d)->bd_sbuf = (d)->bd_fbuf; \ 473 (d)->bd_slen = 0; \ 474 (d)->bd_fbuf = 0; 475 /* 476 * bpfread - read next chunk of packets from buffers 477 */ 478 static int 479 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 480 kauth_cred_t cred, int flags) 481 { 482 struct bpf_d *d = fp->f_data; 483 int timed_out; 484 int error; 485 int s; 486 487 getnanotime(&d->bd_atime); 488 /* 489 * Restrict application to use a buffer the same size as 490 * the kernel buffers. 491 */ 492 if (uio->uio_resid != d->bd_bufsize) 493 return (EINVAL); 494 495 KERNEL_LOCK(1, NULL); 496 s = splnet(); 497 if (d->bd_state == BPF_WAITING) 498 callout_stop(&d->bd_callout); 499 timed_out = (d->bd_state == BPF_TIMED_OUT); 500 d->bd_state = BPF_IDLE; 501 /* 502 * If the hold buffer is empty, then do a timed sleep, which 503 * ends when the timeout expires or when enough packets 504 * have arrived to fill the store buffer. 505 */ 506 while (d->bd_hbuf == 0) { 507 if (fp->f_flag & FNONBLOCK) { 508 if (d->bd_slen == 0) { 509 splx(s); 510 KERNEL_UNLOCK_ONE(NULL); 511 return (EWOULDBLOCK); 512 } 513 ROTATE_BUFFERS(d); 514 break; 515 } 516 517 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 518 /* 519 * A packet(s) either arrived since the previous 520 * read or arrived while we were asleep. 521 * Rotate the buffers and return what's here. 522 */ 523 ROTATE_BUFFERS(d); 524 break; 525 } 526 error = tsleep(d, PRINET|PCATCH, "bpf", 527 d->bd_rtout); 528 if (error == EINTR || error == ERESTART) { 529 splx(s); 530 KERNEL_UNLOCK_ONE(NULL); 531 return (error); 532 } 533 if (error == EWOULDBLOCK) { 534 /* 535 * On a timeout, return what's in the buffer, 536 * which may be nothing. If there is something 537 * in the store buffer, we can rotate the buffers. 538 */ 539 if (d->bd_hbuf) 540 /* 541 * We filled up the buffer in between 542 * getting the timeout and arriving 543 * here, so we don't need to rotate. 544 */ 545 break; 546 547 if (d->bd_slen == 0) { 548 splx(s); 549 KERNEL_UNLOCK_ONE(NULL); 550 return (0); 551 } 552 ROTATE_BUFFERS(d); 553 break; 554 } 555 if (error != 0) 556 goto done; 557 } 558 /* 559 * At this point, we know we have something in the hold slot. 560 */ 561 splx(s); 562 563 /* 564 * Move data from hold buffer into user space. 565 * We know the entire buffer is transferred since 566 * we checked above that the read buffer is bpf_bufsize bytes. 567 */ 568 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 569 570 s = splnet(); 571 d->bd_fbuf = d->bd_hbuf; 572 d->bd_hbuf = 0; 573 d->bd_hlen = 0; 574 done: 575 splx(s); 576 KERNEL_UNLOCK_ONE(NULL); 577 return (error); 578 } 579 580 581 /* 582 * If there are processes sleeping on this descriptor, wake them up. 583 */ 584 static inline void 585 bpf_wakeup(struct bpf_d *d) 586 { 587 wakeup(d); 588 if (d->bd_async) 589 softint_schedule(d->bd_sih); 590 selnotify(&d->bd_sel, 0, 0); 591 } 592 593 static void 594 bpf_softintr(void *cookie) 595 { 596 struct bpf_d *d; 597 598 d = cookie; 599 if (d->bd_async) 600 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 601 } 602 603 static void 604 bpf_timed_out(void *arg) 605 { 606 struct bpf_d *d = arg; 607 int s; 608 609 s = splnet(); 610 if (d->bd_state == BPF_WAITING) { 611 d->bd_state = BPF_TIMED_OUT; 612 if (d->bd_slen != 0) 613 bpf_wakeup(d); 614 } 615 splx(s); 616 } 617 618 619 static int 620 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 621 kauth_cred_t cred, int flags) 622 { 623 struct bpf_d *d = fp->f_data; 624 struct ifnet *ifp; 625 struct mbuf *m; 626 int error, s; 627 static struct sockaddr_storage dst; 628 629 m = NULL; /* XXX gcc */ 630 631 KERNEL_LOCK(1, NULL); 632 633 if (d->bd_bif == 0) { 634 KERNEL_UNLOCK_ONE(NULL); 635 return (ENXIO); 636 } 637 getnanotime(&d->bd_mtime); 638 639 ifp = d->bd_bif->bif_ifp; 640 641 if (uio->uio_resid == 0) { 642 KERNEL_UNLOCK_ONE(NULL); 643 return (0); 644 } 645 646 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, 647 (struct sockaddr *) &dst); 648 if (error) { 649 KERNEL_UNLOCK_ONE(NULL); 650 return (error); 651 } 652 653 if (m->m_pkthdr.len > ifp->if_mtu) { 654 KERNEL_UNLOCK_ONE(NULL); 655 m_freem(m); 656 return (EMSGSIZE); 657 } 658 659 if (d->bd_hdrcmplt) 660 dst.ss_family = pseudo_AF_HDRCMPLT; 661 662 s = splsoftnet(); 663 error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL); 664 splx(s); 665 KERNEL_UNLOCK_ONE(NULL); 666 /* 667 * The driver frees the mbuf. 668 */ 669 return (error); 670 } 671 672 /* 673 * Reset a descriptor by flushing its packet buffer and clearing the 674 * receive and drop counts. Should be called at splnet. 675 */ 676 static void 677 reset_d(struct bpf_d *d) 678 { 679 if (d->bd_hbuf) { 680 /* Free the hold buffer. */ 681 d->bd_fbuf = d->bd_hbuf; 682 d->bd_hbuf = 0; 683 } 684 d->bd_slen = 0; 685 d->bd_hlen = 0; 686 d->bd_rcount = 0; 687 d->bd_dcount = 0; 688 d->bd_ccount = 0; 689 } 690 691 /* 692 * FIONREAD Check for read packet available. 693 * BIOCGBLEN Get buffer len [for read()]. 694 * BIOCSETF Set ethernet read filter. 695 * BIOCFLUSH Flush read packet buffer. 696 * BIOCPROMISC Put interface into promiscuous mode. 697 * BIOCGDLT Get link layer type. 698 * BIOCGETIF Get interface name. 699 * BIOCSETIF Set interface. 700 * BIOCSRTIMEOUT Set read timeout. 701 * BIOCGRTIMEOUT Get read timeout. 702 * BIOCGSTATS Get packet stats. 703 * BIOCIMMEDIATE Set immediate mode. 704 * BIOCVERSION Get filter language version. 705 * BIOCGHDRCMPLT Get "header already complete" flag. 706 * BIOCSHDRCMPLT Set "header already complete" flag. 707 */ 708 /* ARGSUSED */ 709 static int 710 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 711 { 712 struct bpf_d *d = fp->f_data; 713 int s, error = 0; 714 715 /* 716 * Refresh the PID associated with this bpf file. 717 */ 718 KERNEL_LOCK(1, NULL); 719 d->bd_pid = curproc->p_pid; 720 721 s = splnet(); 722 if (d->bd_state == BPF_WAITING) 723 callout_stop(&d->bd_callout); 724 d->bd_state = BPF_IDLE; 725 splx(s); 726 727 switch (cmd) { 728 729 default: 730 error = EINVAL; 731 break; 732 733 /* 734 * Check for read packet available. 735 */ 736 case FIONREAD: 737 { 738 int n; 739 740 s = splnet(); 741 n = d->bd_slen; 742 if (d->bd_hbuf) 743 n += d->bd_hlen; 744 splx(s); 745 746 *(int *)addr = n; 747 break; 748 } 749 750 /* 751 * Get buffer len [for read()]. 752 */ 753 case BIOCGBLEN: 754 *(u_int *)addr = d->bd_bufsize; 755 break; 756 757 /* 758 * Set buffer length. 759 */ 760 case BIOCSBLEN: 761 if (d->bd_bif != 0) 762 error = EINVAL; 763 else { 764 u_int size = *(u_int *)addr; 765 766 if (size > bpf_maxbufsize) 767 *(u_int *)addr = size = bpf_maxbufsize; 768 else if (size < BPF_MINBUFSIZE) 769 *(u_int *)addr = size = BPF_MINBUFSIZE; 770 d->bd_bufsize = size; 771 } 772 break; 773 774 /* 775 * Set link layer read filter. 776 */ 777 case BIOCSETF: 778 error = bpf_setf(d, addr); 779 break; 780 781 /* 782 * Flush read packet buffer. 783 */ 784 case BIOCFLUSH: 785 s = splnet(); 786 reset_d(d); 787 splx(s); 788 break; 789 790 /* 791 * Put interface into promiscuous mode. 792 */ 793 case BIOCPROMISC: 794 if (d->bd_bif == 0) { 795 /* 796 * No interface attached yet. 797 */ 798 error = EINVAL; 799 break; 800 } 801 s = splnet(); 802 if (d->bd_promisc == 0) { 803 error = ifpromisc(d->bd_bif->bif_ifp, 1); 804 if (error == 0) 805 d->bd_promisc = 1; 806 } 807 splx(s); 808 break; 809 810 /* 811 * Get device parameters. 812 */ 813 case BIOCGDLT: 814 if (d->bd_bif == 0) 815 error = EINVAL; 816 else 817 *(u_int *)addr = d->bd_bif->bif_dlt; 818 break; 819 820 /* 821 * Get a list of supported device parameters. 822 */ 823 case BIOCGDLTLIST: 824 if (d->bd_bif == 0) 825 error = EINVAL; 826 else 827 error = bpf_getdltlist(d, addr); 828 break; 829 830 /* 831 * Set device parameters. 832 */ 833 case BIOCSDLT: 834 if (d->bd_bif == 0) 835 error = EINVAL; 836 else 837 error = bpf_setdlt(d, *(u_int *)addr); 838 break; 839 840 /* 841 * Set interface name. 842 */ 843 #ifdef OBIOCGETIF 844 case OBIOCGETIF: 845 #endif 846 case BIOCGETIF: 847 if (d->bd_bif == 0) 848 error = EINVAL; 849 else 850 bpf_ifname(d->bd_bif->bif_ifp, addr); 851 break; 852 853 /* 854 * Set interface. 855 */ 856 #ifdef OBIOCSETIF 857 case OBIOCSETIF: 858 #endif 859 case BIOCSETIF: 860 error = bpf_setif(d, addr); 861 break; 862 863 /* 864 * Set read timeout. 865 */ 866 case BIOCSRTIMEOUT: 867 { 868 struct timeval *tv = addr; 869 870 /* Compute number of ticks. */ 871 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 872 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 873 d->bd_rtout = 1; 874 break; 875 } 876 877 #ifdef BIOCGORTIMEOUT 878 /* 879 * Get read timeout. 880 */ 881 case BIOCGORTIMEOUT: 882 { 883 struct timeval50 *tv = addr; 884 885 tv->tv_sec = d->bd_rtout / hz; 886 tv->tv_usec = (d->bd_rtout % hz) * tick; 887 break; 888 } 889 #endif 890 891 #ifdef BIOCSORTIMEOUT 892 /* 893 * Set read timeout. 894 */ 895 case BIOCSORTIMEOUT: 896 { 897 struct timeval50 *tv = addr; 898 899 /* Compute number of ticks. */ 900 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 901 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 902 d->bd_rtout = 1; 903 break; 904 } 905 #endif 906 907 /* 908 * Get read timeout. 909 */ 910 case BIOCGRTIMEOUT: 911 { 912 struct timeval *tv = addr; 913 914 tv->tv_sec = d->bd_rtout / hz; 915 tv->tv_usec = (d->bd_rtout % hz) * tick; 916 break; 917 } 918 /* 919 * Get packet stats. 920 */ 921 case BIOCGSTATS: 922 { 923 struct bpf_stat *bs = addr; 924 925 bs->bs_recv = d->bd_rcount; 926 bs->bs_drop = d->bd_dcount; 927 bs->bs_capt = d->bd_ccount; 928 break; 929 } 930 931 case BIOCGSTATSOLD: 932 { 933 struct bpf_stat_old *bs = addr; 934 935 bs->bs_recv = d->bd_rcount; 936 bs->bs_drop = d->bd_dcount; 937 break; 938 } 939 940 /* 941 * Set immediate mode. 942 */ 943 case BIOCIMMEDIATE: 944 d->bd_immediate = *(u_int *)addr; 945 break; 946 947 case BIOCVERSION: 948 { 949 struct bpf_version *bv = addr; 950 951 bv->bv_major = BPF_MAJOR_VERSION; 952 bv->bv_minor = BPF_MINOR_VERSION; 953 break; 954 } 955 956 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 957 *(u_int *)addr = d->bd_hdrcmplt; 958 break; 959 960 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 961 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 962 break; 963 964 /* 965 * Get "see sent packets" flag 966 */ 967 case BIOCGSEESENT: 968 *(u_int *)addr = d->bd_seesent; 969 break; 970 971 /* 972 * Set "see sent" packets flag 973 */ 974 case BIOCSSEESENT: 975 d->bd_seesent = *(u_int *)addr; 976 break; 977 978 case FIONBIO: /* Non-blocking I/O */ 979 /* 980 * No need to do anything special as we use IO_NDELAY in 981 * bpfread() as an indication of whether or not to block 982 * the read. 983 */ 984 break; 985 986 case FIOASYNC: /* Send signal on receive packets */ 987 d->bd_async = *(int *)addr; 988 break; 989 990 case TIOCSPGRP: /* Process or group to send signals to */ 991 case FIOSETOWN: 992 error = fsetown(&d->bd_pgid, cmd, addr); 993 break; 994 995 case TIOCGPGRP: 996 case FIOGETOWN: 997 error = fgetown(d->bd_pgid, cmd, addr); 998 break; 999 } 1000 KERNEL_UNLOCK_ONE(NULL); 1001 return (error); 1002 } 1003 1004 /* 1005 * Set d's packet filter program to fp. If this file already has a filter, 1006 * free it and replace it. Returns EINVAL for bogus requests. 1007 */ 1008 int 1009 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1010 { 1011 struct bpf_insn *fcode, *old; 1012 u_int flen, size; 1013 int s; 1014 1015 old = d->bd_filter; 1016 if (fp->bf_insns == 0) { 1017 if (fp->bf_len != 0) 1018 return (EINVAL); 1019 s = splnet(); 1020 d->bd_filter = 0; 1021 reset_d(d); 1022 splx(s); 1023 if (old != 0) 1024 free(old, M_DEVBUF); 1025 return (0); 1026 } 1027 flen = fp->bf_len; 1028 if (flen > BPF_MAXINSNS) 1029 return (EINVAL); 1030 1031 size = flen * sizeof(*fp->bf_insns); 1032 fcode = malloc(size, M_DEVBUF, M_WAITOK); 1033 if (copyin(fp->bf_insns, fcode, size) == 0 && 1034 bpf_validate(fcode, (int)flen)) { 1035 s = splnet(); 1036 d->bd_filter = fcode; 1037 reset_d(d); 1038 splx(s); 1039 if (old != 0) 1040 free(old, M_DEVBUF); 1041 1042 return (0); 1043 } 1044 free(fcode, M_DEVBUF); 1045 return (EINVAL); 1046 } 1047 1048 /* 1049 * Detach a file from its current interface (if attached at all) and attach 1050 * to the interface indicated by the name stored in ifr. 1051 * Return an errno or 0. 1052 */ 1053 static int 1054 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1055 { 1056 struct bpf_if *bp; 1057 char *cp; 1058 int unit_seen, i, s, error; 1059 1060 /* 1061 * Make sure the provided name has a unit number, and default 1062 * it to '0' if not specified. 1063 * XXX This is ugly ... do this differently? 1064 */ 1065 unit_seen = 0; 1066 cp = ifr->ifr_name; 1067 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1068 while (*cp++) 1069 if (*cp >= '0' && *cp <= '9') 1070 unit_seen = 1; 1071 if (!unit_seen) { 1072 /* Make sure to leave room for the '\0'. */ 1073 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1074 if ((ifr->ifr_name[i] >= 'a' && 1075 ifr->ifr_name[i] <= 'z') || 1076 (ifr->ifr_name[i] >= 'A' && 1077 ifr->ifr_name[i] <= 'Z')) 1078 continue; 1079 ifr->ifr_name[i] = '0'; 1080 } 1081 } 1082 1083 /* 1084 * Look through attached interfaces for the named one. 1085 */ 1086 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1087 struct ifnet *ifp = bp->bif_ifp; 1088 1089 if (ifp == 0 || 1090 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1091 continue; 1092 /* skip additional entry */ 1093 if (bp->bif_driverp != &ifp->if_bpf) 1094 continue; 1095 /* 1096 * We found the requested interface. 1097 * Allocate the packet buffers if we need to. 1098 * If we're already attached to requested interface, 1099 * just flush the buffer. 1100 */ 1101 if (d->bd_sbuf == 0) { 1102 error = bpf_allocbufs(d); 1103 if (error != 0) 1104 return (error); 1105 } 1106 s = splnet(); 1107 if (bp != d->bd_bif) { 1108 if (d->bd_bif) 1109 /* 1110 * Detach if attached to something else. 1111 */ 1112 bpf_detachd(d); 1113 1114 bpf_attachd(d, bp); 1115 } 1116 reset_d(d); 1117 splx(s); 1118 return (0); 1119 } 1120 /* Not found. */ 1121 return (ENXIO); 1122 } 1123 1124 /* 1125 * Copy the interface name to the ifreq. 1126 */ 1127 static void 1128 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1129 { 1130 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1131 } 1132 1133 static int 1134 bpf_stat(struct file *fp, struct stat *st) 1135 { 1136 struct bpf_d *d = fp->f_data; 1137 1138 (void)memset(st, 0, sizeof(*st)); 1139 KERNEL_LOCK(1, NULL); 1140 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1141 st->st_atimespec = d->bd_atime; 1142 st->st_mtimespec = d->bd_mtime; 1143 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1144 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1145 st->st_gid = kauth_cred_getegid(fp->f_cred); 1146 KERNEL_UNLOCK_ONE(NULL); 1147 return 0; 1148 } 1149 1150 /* 1151 * Support for poll() system call 1152 * 1153 * Return true iff the specific operation will not block indefinitely - with 1154 * the assumption that it is safe to positively acknowledge a request for the 1155 * ability to write to the BPF device. 1156 * Otherwise, return false but make a note that a selnotify() must be done. 1157 */ 1158 static int 1159 bpf_poll(struct file *fp, int events) 1160 { 1161 struct bpf_d *d = fp->f_data; 1162 int s = splnet(); 1163 int revents; 1164 1165 /* 1166 * Refresh the PID associated with this bpf file. 1167 */ 1168 KERNEL_LOCK(1, NULL); 1169 d->bd_pid = curproc->p_pid; 1170 1171 revents = events & (POLLOUT | POLLWRNORM); 1172 if (events & (POLLIN | POLLRDNORM)) { 1173 /* 1174 * An imitation of the FIONREAD ioctl code. 1175 */ 1176 if (d->bd_hlen != 0 || 1177 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1178 d->bd_slen != 0)) { 1179 revents |= events & (POLLIN | POLLRDNORM); 1180 } else { 1181 selrecord(curlwp, &d->bd_sel); 1182 /* Start the read timeout if necessary */ 1183 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1184 callout_reset(&d->bd_callout, d->bd_rtout, 1185 bpf_timed_out, d); 1186 d->bd_state = BPF_WAITING; 1187 } 1188 } 1189 } 1190 1191 KERNEL_UNLOCK_ONE(NULL); 1192 splx(s); 1193 return (revents); 1194 } 1195 1196 static void 1197 filt_bpfrdetach(struct knote *kn) 1198 { 1199 struct bpf_d *d = kn->kn_hook; 1200 int s; 1201 1202 KERNEL_LOCK(1, NULL); 1203 s = splnet(); 1204 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1205 splx(s); 1206 KERNEL_UNLOCK_ONE(NULL); 1207 } 1208 1209 static int 1210 filt_bpfread(struct knote *kn, long hint) 1211 { 1212 struct bpf_d *d = kn->kn_hook; 1213 int rv; 1214 1215 KERNEL_LOCK(1, NULL); 1216 kn->kn_data = d->bd_hlen; 1217 if (d->bd_immediate) 1218 kn->kn_data += d->bd_slen; 1219 rv = (kn->kn_data > 0); 1220 KERNEL_UNLOCK_ONE(NULL); 1221 return rv; 1222 } 1223 1224 static const struct filterops bpfread_filtops = 1225 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1226 1227 static int 1228 bpf_kqfilter(struct file *fp, struct knote *kn) 1229 { 1230 struct bpf_d *d = fp->f_data; 1231 struct klist *klist; 1232 int s; 1233 1234 KERNEL_LOCK(1, NULL); 1235 1236 switch (kn->kn_filter) { 1237 case EVFILT_READ: 1238 klist = &d->bd_sel.sel_klist; 1239 kn->kn_fop = &bpfread_filtops; 1240 break; 1241 1242 default: 1243 KERNEL_UNLOCK_ONE(NULL); 1244 return (EINVAL); 1245 } 1246 1247 kn->kn_hook = d; 1248 1249 s = splnet(); 1250 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1251 splx(s); 1252 KERNEL_UNLOCK_ONE(NULL); 1253 1254 return (0); 1255 } 1256 1257 /* 1258 * Incoming linkage from device drivers. Process the packet pkt, of length 1259 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1260 * by each process' filter, and if accepted, stashed into the corresponding 1261 * buffer. 1262 */ 1263 static void 1264 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1265 { 1266 struct bpf_d *d; 1267 u_int slen; 1268 struct timespec ts; 1269 int gottime=0; 1270 1271 /* 1272 * Note that the ipl does not have to be raised at this point. 1273 * The only problem that could arise here is that if two different 1274 * interfaces shared any data. This is not the case. 1275 */ 1276 for (d = bp->bif_dlist; d != 0; d = d->bd_next) { 1277 ++d->bd_rcount; 1278 ++bpf_gstats.bs_recv; 1279 slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); 1280 if (slen != 0) { 1281 if (!gottime) { 1282 nanotime(&ts); 1283 gottime = 1; 1284 } 1285 catchpacket(d, pkt, pktlen, slen, memcpy, &ts); 1286 } 1287 } 1288 } 1289 1290 /* 1291 * Copy data from an mbuf chain into a buffer. This code is derived 1292 * from m_copydata in sys/uipc_mbuf.c. 1293 */ 1294 static void * 1295 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1296 { 1297 const struct mbuf *m; 1298 u_int count; 1299 u_char *dst; 1300 1301 m = src_arg; 1302 dst = dst_arg; 1303 while (len > 0) { 1304 if (m == NULL) 1305 panic("bpf_mcpy"); 1306 count = min(m->m_len, len); 1307 memcpy(dst, mtod(m, const void *), count); 1308 m = m->m_next; 1309 dst += count; 1310 len -= count; 1311 } 1312 return dst_arg; 1313 } 1314 1315 /* 1316 * Dispatch a packet to all the listeners on interface bp. 1317 * 1318 * marg pointer to the packet, either a data buffer or an mbuf chain 1319 * buflen buffer length, if marg is a data buffer 1320 * cpfn a function that can copy marg into the listener's buffer 1321 * pktlen length of the packet 1322 * rcvif either NULL or the interface the packet came in on. 1323 */ 1324 static inline void 1325 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1326 void *marg, u_int pktlen, u_int buflen, struct ifnet *rcvif) 1327 { 1328 u_int slen; 1329 struct bpf_d *d; 1330 struct timespec ts; 1331 int gottime = 0; 1332 1333 for (d = bp->bif_dlist; d != 0; d = d->bd_next) { 1334 if (!d->bd_seesent && (rcvif == NULL)) 1335 continue; 1336 ++d->bd_rcount; 1337 ++bpf_gstats.bs_recv; 1338 slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 1339 if (slen != 0) { 1340 if(!gottime) { 1341 nanotime(&ts); 1342 gottime = 1; 1343 } 1344 catchpacket(d, marg, pktlen, slen, cpfn, &ts); 1345 } 1346 } 1347 } 1348 1349 /* 1350 * Incoming linkage from device drivers, when the head of the packet is in 1351 * a buffer, and the tail is in an mbuf chain. 1352 */ 1353 static void 1354 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1355 { 1356 u_int pktlen; 1357 struct mbuf mb; 1358 1359 pktlen = m_length(m) + dlen; 1360 1361 /* 1362 * Craft on-stack mbuf suitable for passing to bpf_filter. 1363 * Note that we cut corners here; we only setup what's 1364 * absolutely needed--this mbuf should never go anywhere else. 1365 */ 1366 (void)memset(&mb, 0, sizeof(mb)); 1367 mb.m_next = m; 1368 mb.m_data = data; 1369 mb.m_len = dlen; 1370 1371 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif); 1372 } 1373 1374 /* 1375 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1376 */ 1377 static void 1378 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1379 { 1380 void *(*cpfn)(void *, const void *, size_t); 1381 u_int pktlen, buflen; 1382 void *marg; 1383 1384 pktlen = m_length(m); 1385 1386 if (pktlen == m->m_len) { 1387 cpfn = (void *)memcpy; 1388 marg = mtod(m, void *); 1389 buflen = pktlen; 1390 } else { 1391 /*###1299 [cc] warning: assignment from incompatible pointer type%%%*/ 1392 cpfn = bpf_mcpy; 1393 marg = m; 1394 buflen = 0; 1395 } 1396 1397 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif); 1398 } 1399 1400 /* 1401 * We need to prepend the address family as 1402 * a four byte field. Cons up a dummy header 1403 * to pacify bpf. This is safe because bpf 1404 * will only read from the mbuf (i.e., it won't 1405 * try to free it or keep a pointer a to it). 1406 */ 1407 static void 1408 bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1409 { 1410 struct mbuf m0; 1411 1412 m0.m_flags = 0; 1413 m0.m_next = m; 1414 m0.m_len = 4; 1415 m0.m_data = (char *)⁡ 1416 1417 bpf_mtap(bp, &m0); 1418 } 1419 1420 static void 1421 bpf_mtap_et(struct bpf_if *bp, uint16_t et, struct mbuf *m) 1422 { 1423 struct mbuf m0; 1424 1425 m0.m_flags = 0; 1426 m0.m_next = m; 1427 m0.m_len = 14; 1428 m0.m_data = m0.m_dat; 1429 1430 ((uint32_t *)m0.m_data)[0] = 0; 1431 ((uint32_t *)m0.m_data)[1] = 0; 1432 ((uint32_t *)m0.m_data)[2] = 0; 1433 ((uint16_t *)m0.m_data)[6] = et; 1434 1435 bpf_mtap(bp, &m0); 1436 } 1437 1438 /* 1439 * Put the SLIP pseudo-"link header" in place. 1440 * Note this M_PREPEND() should never fail, 1441 * swince we know we always have enough space 1442 * in the input buffer. 1443 */ 1444 static void 1445 bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1446 { 1447 int s; 1448 u_char *hp; 1449 1450 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1451 if (*m == NULL) 1452 return; 1453 1454 hp = mtod(*m, u_char *); 1455 hp[SLX_DIR] = SLIPDIR_IN; 1456 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1457 1458 s = splnet(); 1459 bpf_mtap(bp, *m); 1460 splx(s); 1461 1462 m_adj(*m, SLIP_HDRLEN); 1463 } 1464 1465 /* 1466 * Put the SLIP pseudo-"link header" in 1467 * place. The compressed header is now 1468 * at the beginning of the mbuf. 1469 */ 1470 static void 1471 bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1472 { 1473 struct mbuf m0; 1474 u_char *hp; 1475 int s; 1476 1477 m0.m_flags = 0; 1478 m0.m_next = m; 1479 m0.m_data = m0.m_dat; 1480 m0.m_len = SLIP_HDRLEN; 1481 1482 hp = mtod(&m0, u_char *); 1483 1484 hp[SLX_DIR] = SLIPDIR_OUT; 1485 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1486 1487 s = splnet(); 1488 bpf_mtap(bp, &m0); 1489 splx(s); 1490 m_freem(m); 1491 } 1492 1493 /* 1494 * Move the packet data from interface memory (pkt) into the 1495 * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 1496 * otherwise 0. "copy" is the routine called to do the actual data 1497 * transfer. memcpy is passed in to copy contiguous chunks, while 1498 * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1499 * pkt is really an mbuf. 1500 */ 1501 static void 1502 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1503 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1504 { 1505 struct bpf_hdr *hp; 1506 int totlen, curlen; 1507 int hdrlen = d->bd_bif->bif_hdrlen; 1508 int do_wakeup = 0; 1509 1510 ++d->bd_ccount; 1511 ++bpf_gstats.bs_capt; 1512 /* 1513 * Figure out how many bytes to move. If the packet is 1514 * greater or equal to the snapshot length, transfer that 1515 * much. Otherwise, transfer the whole packet (unless 1516 * we hit the buffer size limit). 1517 */ 1518 totlen = hdrlen + min(snaplen, pktlen); 1519 if (totlen > d->bd_bufsize) 1520 totlen = d->bd_bufsize; 1521 1522 /* 1523 * Round up the end of the previous packet to the next longword. 1524 */ 1525 curlen = BPF_WORDALIGN(d->bd_slen); 1526 if (curlen + totlen > d->bd_bufsize) { 1527 /* 1528 * This packet will overflow the storage buffer. 1529 * Rotate the buffers if we can, then wakeup any 1530 * pending reads. 1531 */ 1532 if (d->bd_fbuf == 0) { 1533 /* 1534 * We haven't completed the previous read yet, 1535 * so drop the packet. 1536 */ 1537 ++d->bd_dcount; 1538 ++bpf_gstats.bs_drop; 1539 return; 1540 } 1541 ROTATE_BUFFERS(d); 1542 do_wakeup = 1; 1543 curlen = 0; 1544 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1545 /* 1546 * Immediate mode is set, or the read timeout has 1547 * already expired during a select call. A packet 1548 * arrived, so the reader should be woken up. 1549 */ 1550 do_wakeup = 1; 1551 } 1552 1553 /* 1554 * Append the bpf header. 1555 */ 1556 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 1557 hp->bh_tstamp.tv_sec = ts->tv_sec; 1558 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1559 hp->bh_datalen = pktlen; 1560 hp->bh_hdrlen = hdrlen; 1561 /* 1562 * Copy the packet data into the store buffer and update its length. 1563 */ 1564 (*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen)); 1565 d->bd_slen = curlen + totlen; 1566 1567 /* 1568 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1569 * will cause filt_bpfread() to be called with it adjusted. 1570 */ 1571 if (do_wakeup) 1572 bpf_wakeup(d); 1573 } 1574 1575 /* 1576 * Initialize all nonzero fields of a descriptor. 1577 */ 1578 static int 1579 bpf_allocbufs(struct bpf_d *d) 1580 { 1581 1582 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1583 if (!d->bd_fbuf) 1584 return (ENOBUFS); 1585 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1586 if (!d->bd_sbuf) { 1587 free(d->bd_fbuf, M_DEVBUF); 1588 return (ENOBUFS); 1589 } 1590 d->bd_slen = 0; 1591 d->bd_hlen = 0; 1592 return (0); 1593 } 1594 1595 /* 1596 * Free buffers currently in use by a descriptor. 1597 * Called on close. 1598 */ 1599 static void 1600 bpf_freed(struct bpf_d *d) 1601 { 1602 /* 1603 * We don't need to lock out interrupts since this descriptor has 1604 * been detached from its interface and it yet hasn't been marked 1605 * free. 1606 */ 1607 if (d->bd_sbuf != 0) { 1608 free(d->bd_sbuf, M_DEVBUF); 1609 if (d->bd_hbuf != 0) 1610 free(d->bd_hbuf, M_DEVBUF); 1611 if (d->bd_fbuf != 0) 1612 free(d->bd_fbuf, M_DEVBUF); 1613 } 1614 if (d->bd_filter) 1615 free(d->bd_filter, M_DEVBUF); 1616 } 1617 1618 /* 1619 * Attach an interface to bpf. dlt is the link layer type; 1620 * hdrlen is the fixed size of the link header for the specified dlt 1621 * (variable length headers not yet supported). 1622 */ 1623 static void 1624 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1625 { 1626 struct bpf_if *bp; 1627 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); 1628 if (bp == 0) 1629 panic("bpfattach"); 1630 1631 bp->bif_dlist = 0; 1632 bp->bif_driverp = driverp; 1633 bp->bif_ifp = ifp; 1634 bp->bif_dlt = dlt; 1635 1636 bp->bif_next = bpf_iflist; 1637 bpf_iflist = bp; 1638 1639 *bp->bif_driverp = 0; 1640 1641 /* 1642 * Compute the length of the bpf header. This is not necessarily 1643 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1644 * that the network layer header begins on a longword boundary (for 1645 * performance reasons and to alleviate alignment restrictions). 1646 */ 1647 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1648 1649 #if 0 1650 printf("bpf: %s attached\n", ifp->if_xname); 1651 #endif 1652 } 1653 1654 /* 1655 * Remove an interface from bpf. 1656 */ 1657 static void 1658 bpfdetach(struct ifnet *ifp) 1659 { 1660 struct bpf_if *bp, **pbp; 1661 struct bpf_d *d; 1662 int s; 1663 1664 /* Nuke the vnodes for any open instances */ 1665 LIST_FOREACH(d, &bpf_list, bd_list) { 1666 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 1667 /* 1668 * Detach the descriptor from an interface now. 1669 * It will be free'ed later by close routine. 1670 */ 1671 s = splnet(); 1672 d->bd_promisc = 0; /* we can't touch device. */ 1673 bpf_detachd(d); 1674 splx(s); 1675 } 1676 } 1677 1678 again: 1679 for (bp = bpf_iflist, pbp = &bpf_iflist; 1680 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { 1681 if (bp->bif_ifp == ifp) { 1682 *pbp = bp->bif_next; 1683 free(bp, M_DEVBUF); 1684 goto again; 1685 } 1686 } 1687 } 1688 1689 /* 1690 * Change the data link type of a interface. 1691 */ 1692 static void 1693 bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1694 { 1695 struct bpf_if *bp; 1696 1697 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1698 if (bp->bif_driverp == &ifp->if_bpf) 1699 break; 1700 } 1701 if (bp == NULL) 1702 panic("bpf_change_type"); 1703 1704 bp->bif_dlt = dlt; 1705 1706 /* 1707 * Compute the length of the bpf header. This is not necessarily 1708 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1709 * that the network layer header begins on a longword boundary (for 1710 * performance reasons and to alleviate alignment restrictions). 1711 */ 1712 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1713 } 1714 1715 /* 1716 * Get a list of available data link type of the interface. 1717 */ 1718 static int 1719 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1720 { 1721 int n, error; 1722 struct ifnet *ifp; 1723 struct bpf_if *bp; 1724 1725 ifp = d->bd_bif->bif_ifp; 1726 n = 0; 1727 error = 0; 1728 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1729 if (bp->bif_ifp != ifp) 1730 continue; 1731 if (bfl->bfl_list != NULL) { 1732 if (n >= bfl->bfl_len) 1733 return ENOMEM; 1734 error = copyout(&bp->bif_dlt, 1735 bfl->bfl_list + n, sizeof(u_int)); 1736 } 1737 n++; 1738 } 1739 bfl->bfl_len = n; 1740 return error; 1741 } 1742 1743 /* 1744 * Set the data link type of a BPF instance. 1745 */ 1746 static int 1747 bpf_setdlt(struct bpf_d *d, u_int dlt) 1748 { 1749 int s, error, opromisc; 1750 struct ifnet *ifp; 1751 struct bpf_if *bp; 1752 1753 if (d->bd_bif->bif_dlt == dlt) 1754 return 0; 1755 ifp = d->bd_bif->bif_ifp; 1756 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1757 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1758 break; 1759 } 1760 if (bp == NULL) 1761 return EINVAL; 1762 s = splnet(); 1763 opromisc = d->bd_promisc; 1764 bpf_detachd(d); 1765 bpf_attachd(d, bp); 1766 reset_d(d); 1767 if (opromisc) { 1768 error = ifpromisc(bp->bif_ifp, 1); 1769 if (error) 1770 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 1771 bp->bif_ifp->if_xname, error); 1772 else 1773 d->bd_promisc = 1; 1774 } 1775 splx(s); 1776 return 0; 1777 } 1778 1779 static int 1780 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 1781 { 1782 int newsize, error; 1783 struct sysctlnode node; 1784 1785 node = *rnode; 1786 node.sysctl_data = &newsize; 1787 newsize = bpf_maxbufsize; 1788 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1789 if (error || newp == NULL) 1790 return (error); 1791 1792 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 1793 return (EINVAL); 1794 1795 bpf_maxbufsize = newsize; 1796 1797 return (0); 1798 } 1799 1800 static int 1801 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 1802 { 1803 int error, elem_count; 1804 struct bpf_d *dp; 1805 struct bpf_d_ext dpe; 1806 size_t len, needed, elem_size, out_size; 1807 char *sp; 1808 1809 if (namelen == 1 && name[0] == CTL_QUERY) 1810 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1811 1812 if (namelen != 2) 1813 return (EINVAL); 1814 1815 /* BPF peers is privileged information. */ 1816 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 1817 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 1818 if (error) 1819 return (EPERM); 1820 1821 len = (oldp != NULL) ? *oldlenp : 0; 1822 sp = oldp; 1823 elem_size = name[0]; 1824 elem_count = name[1]; 1825 out_size = MIN(sizeof(dpe), elem_size); 1826 needed = 0; 1827 1828 if (elem_size < 1 || elem_count < 0) 1829 return (EINVAL); 1830 1831 mutex_enter(&bpf_mtx); 1832 LIST_FOREACH(dp, &bpf_list, bd_list) { 1833 if (len >= elem_size && elem_count > 0) { 1834 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 1835 BPF_EXT(bufsize); 1836 BPF_EXT(promisc); 1837 BPF_EXT(promisc); 1838 BPF_EXT(state); 1839 BPF_EXT(immediate); 1840 BPF_EXT(hdrcmplt); 1841 BPF_EXT(seesent); 1842 BPF_EXT(pid); 1843 BPF_EXT(rcount); 1844 BPF_EXT(dcount); 1845 BPF_EXT(ccount); 1846 #undef BPF_EXT 1847 if (dp->bd_bif) 1848 (void)strlcpy(dpe.bde_ifname, 1849 dp->bd_bif->bif_ifp->if_xname, 1850 IFNAMSIZ - 1); 1851 else 1852 dpe.bde_ifname[0] = '\0'; 1853 1854 error = copyout(&dpe, sp, out_size); 1855 if (error) 1856 break; 1857 sp += elem_size; 1858 len -= elem_size; 1859 } 1860 needed += elem_size; 1861 if (elem_count > 0 && elem_count != INT_MAX) 1862 elem_count--; 1863 } 1864 mutex_exit(&bpf_mtx); 1865 1866 *oldlenp = needed; 1867 1868 return (error); 1869 } 1870 1871 SYSCTL_SETUP(sysctl_net_bpf_setup, "sysctl net.bpf subtree setup") 1872 { 1873 const struct sysctlnode *node; 1874 1875 sysctl_createv(clog, 0, NULL, NULL, 1876 CTLFLAG_PERMANENT, 1877 CTLTYPE_NODE, "net", NULL, 1878 NULL, 0, NULL, 0, 1879 CTL_NET, CTL_EOL); 1880 1881 node = NULL; 1882 sysctl_createv(clog, 0, NULL, &node, 1883 CTLFLAG_PERMANENT, 1884 CTLTYPE_NODE, "bpf", 1885 SYSCTL_DESCR("BPF options"), 1886 NULL, 0, NULL, 0, 1887 CTL_NET, CTL_CREATE, CTL_EOL); 1888 if (node != NULL) { 1889 sysctl_createv(clog, 0, NULL, NULL, 1890 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1891 CTLTYPE_INT, "maxbufsize", 1892 SYSCTL_DESCR("Maximum size for data capture buffer"), 1893 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 1894 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1895 sysctl_createv(clog, 0, NULL, NULL, 1896 CTLFLAG_PERMANENT, 1897 CTLTYPE_STRUCT, "stats", 1898 SYSCTL_DESCR("BPF stats"), 1899 NULL, 0, &bpf_gstats, sizeof(bpf_gstats), 1900 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1901 sysctl_createv(clog, 0, NULL, NULL, 1902 CTLFLAG_PERMANENT, 1903 CTLTYPE_STRUCT, "peers", 1904 SYSCTL_DESCR("BPF peers"), 1905 sysctl_net_bpf_peers, 0, NULL, 0, 1906 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1907 } 1908 1909 } 1910 1911 struct bpf_ops bpf_ops_kernel = { 1912 .bpf_attach = bpfattach, 1913 .bpf_detach = bpfdetach, 1914 .bpf_change_type = bpf_change_type, 1915 1916 .bpf_tap = bpf_tap, 1917 .bpf_mtap = bpf_mtap, 1918 .bpf_mtap2 = bpf_mtap2, 1919 .bpf_mtap_af = bpf_mtap_af, 1920 .bpf_mtap_et = bpf_mtap_et, 1921 .bpf_mtap_sl_in = bpf_mtap_sl_in, 1922 .bpf_mtap_sl_out = bpf_mtap_sl_out, 1923 }; 1924 1925 MODULE(MODULE_CLASS_DRIVER, bpf, NULL); 1926 1927 static int 1928 bpf_modcmd(modcmd_t cmd, void *arg) 1929 { 1930 devmajor_t bmajor, cmajor; 1931 int error; 1932 1933 bmajor = cmajor = NODEVMAJOR; 1934 1935 switch (cmd) { 1936 case MODULE_CMD_INIT: 1937 bpfilterattach(0); 1938 error = devsw_attach("bpf", NULL, &bmajor, 1939 &bpf_cdevsw, &cmajor); 1940 if (error == EEXIST) 1941 error = 0; /* maybe built-in ... improve eventually */ 1942 if (error) 1943 break; 1944 1945 bpf_ops_handover_enter(&bpf_ops_kernel); 1946 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 1947 bpf_ops_handover_exit(); 1948 break; 1949 1950 case MODULE_CMD_FINI: 1951 /* 1952 * bpf_ops is not (yet) referenced in the callers before 1953 * attach. maybe other issues too. "safety first". 1954 */ 1955 error = EOPNOTSUPP; 1956 break; 1957 1958 default: 1959 error = ENOTTY; 1960 break; 1961 } 1962 1963 return error; 1964 } 1965