1 /* $NetBSD: bpf.c,v 1.223 2018/01/25 02:45:02 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.223 2018/01/25 02:45:02 ozaki-r Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #include "opt_net_mpsafe.h" 49 #endif 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/mbuf.h> 54 #include <sys/buf.h> 55 #include <sys/time.h> 56 #include <sys/proc.h> 57 #include <sys/ioctl.h> 58 #include <sys/conf.h> 59 #include <sys/vnode.h> 60 #include <sys/queue.h> 61 #include <sys/stat.h> 62 #include <sys/module.h> 63 #include <sys/atomic.h> 64 #include <sys/cpu.h> 65 66 #include <sys/file.h> 67 #include <sys/filedesc.h> 68 #include <sys/tty.h> 69 #include <sys/uio.h> 70 71 #include <sys/protosw.h> 72 #include <sys/socket.h> 73 #include <sys/errno.h> 74 #include <sys/kernel.h> 75 #include <sys/poll.h> 76 #include <sys/sysctl.h> 77 #include <sys/kauth.h> 78 #include <sys/syslog.h> 79 #include <sys/percpu.h> 80 #include <sys/pserialize.h> 81 #include <sys/lwp.h> 82 83 #include <net/if.h> 84 #include <net/slip.h> 85 86 #include <net/bpf.h> 87 #include <net/bpfdesc.h> 88 #include <net/bpfjit.h> 89 90 #include <net/if_arc.h> 91 #include <net/if_ether.h> 92 93 #include <netinet/in.h> 94 #include <netinet/if_inarp.h> 95 96 97 #include <compat/sys/sockio.h> 98 99 #ifndef BPF_BUFSIZE 100 /* 101 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 102 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 103 */ 104 # define BPF_BUFSIZE 32768 105 #endif 106 107 #define PRINET 26 /* interruptible */ 108 109 /* 110 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 111 * XXX the default values should be computed dynamically based 112 * on available memory size and available mbuf clusters. 113 */ 114 static int bpf_bufsize = BPF_BUFSIZE; 115 static int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 116 static bool bpf_jit = false; 117 118 struct bpfjit_ops bpfjit_module_ops = { 119 .bj_generate_code = NULL, 120 .bj_free_code = NULL 121 }; 122 123 /* 124 * Global BPF statistics returned by net.bpf.stats sysctl. 125 */ 126 static struct percpu *bpf_gstats_percpu; /* struct bpf_stat */ 127 128 #define BPF_STATINC(id) \ 129 { \ 130 struct bpf_stat *__stats = \ 131 percpu_getref(bpf_gstats_percpu); \ 132 __stats->bs_##id++; \ 133 percpu_putref(bpf_gstats_percpu); \ 134 } 135 136 /* 137 * Locking notes: 138 * - bpf_mtx (adaptive mutex) protects: 139 * - Gobal lists: bpf_iflist and bpf_dlist 140 * - struct bpf_if 141 * - bpf_close 142 * - bpf_psz (pserialize) 143 * - struct bpf_d has two mutexes: 144 * - bd_buf_mtx (spin mutex) protects the buffers that can be accessed 145 * on packet tapping 146 * - bd_mtx (adaptive mutex) protects member variables other than the buffers 147 * - Locking order: bpf_mtx => bpf_d#bd_mtx => bpf_d#bd_buf_mtx 148 * - struct bpf_d obtained via fp->f_bpf in bpf_read and bpf_write is 149 * never freed because struct bpf_d is only freed in bpf_close and 150 * bpf_close never be called while executing bpf_read and bpf_write 151 * - A filter that is assigned to bpf_d can be replaced with another filter 152 * while tapping packets, so it needs to be done atomically 153 * - struct bpf_d is iterated on bpf_dlist with psz 154 * - struct bpf_if is iterated on bpf_iflist with psz or psref 155 */ 156 /* 157 * Use a mutex to avoid a race condition between gathering the stats/peers 158 * and opening/closing the device. 159 */ 160 static kmutex_t bpf_mtx; 161 162 static struct psref_class *bpf_psref_class __read_mostly; 163 static pserialize_t bpf_psz; 164 165 static inline void 166 bpf_if_acquire(struct bpf_if *bp, struct psref *psref) 167 { 168 169 psref_acquire(psref, &bp->bif_psref, bpf_psref_class); 170 } 171 172 static inline void 173 bpf_if_release(struct bpf_if *bp, struct psref *psref) 174 { 175 176 psref_release(psref, &bp->bif_psref, bpf_psref_class); 177 } 178 179 /* 180 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 181 * bpf_dtab holds the descriptors, indexed by minor device # 182 */ 183 static struct pslist_head bpf_iflist; 184 static struct pslist_head bpf_dlist; 185 186 /* Macros for bpf_d on bpf_dlist */ 187 #define BPF_DLIST_WRITER_INSERT_HEAD(__d) \ 188 PSLIST_WRITER_INSERT_HEAD(&bpf_dlist, (__d), bd_bpf_dlist_entry) 189 #define BPF_DLIST_READER_FOREACH(__d) \ 190 PSLIST_READER_FOREACH((__d), &bpf_dlist, struct bpf_d, \ 191 bd_bpf_dlist_entry) 192 #define BPF_DLIST_WRITER_FOREACH(__d) \ 193 PSLIST_WRITER_FOREACH((__d), &bpf_dlist, struct bpf_d, \ 194 bd_bpf_dlist_entry) 195 #define BPF_DLIST_ENTRY_INIT(__d) \ 196 PSLIST_ENTRY_INIT((__d), bd_bpf_dlist_entry) 197 #define BPF_DLIST_WRITER_REMOVE(__d) \ 198 PSLIST_WRITER_REMOVE((__d), bd_bpf_dlist_entry) 199 #define BPF_DLIST_ENTRY_DESTROY(__d) \ 200 PSLIST_ENTRY_DESTROY((__d), bd_bpf_dlist_entry) 201 202 /* Macros for bpf_if on bpf_iflist */ 203 #define BPF_IFLIST_WRITER_INSERT_HEAD(__bp) \ 204 PSLIST_WRITER_INSERT_HEAD(&bpf_iflist, (__bp), bif_iflist_entry) 205 #define BPF_IFLIST_READER_FOREACH(__bp) \ 206 PSLIST_READER_FOREACH((__bp), &bpf_iflist, struct bpf_if, \ 207 bif_iflist_entry) 208 #define BPF_IFLIST_WRITER_FOREACH(__bp) \ 209 PSLIST_WRITER_FOREACH((__bp), &bpf_iflist, struct bpf_if, \ 210 bif_iflist_entry) 211 #define BPF_IFLIST_WRITER_REMOVE(__bp) \ 212 PSLIST_WRITER_REMOVE((__bp), bif_iflist_entry) 213 #define BPF_IFLIST_ENTRY_INIT(__bp) \ 214 PSLIST_ENTRY_INIT((__bp), bif_iflist_entry) 215 #define BPF_IFLIST_ENTRY_DESTROY(__bp) \ 216 PSLIST_ENTRY_DESTROY((__bp), bif_iflist_entry) 217 218 /* Macros for bpf_d on bpf_if#bif_dlist_pslist */ 219 #define BPFIF_DLIST_READER_FOREACH(__d, __bp) \ 220 PSLIST_READER_FOREACH((__d), &(__bp)->bif_dlist_head, struct bpf_d, \ 221 bd_bif_dlist_entry) 222 #define BPFIF_DLIST_WRITER_INSERT_HEAD(__bp, __d) \ 223 PSLIST_WRITER_INSERT_HEAD(&(__bp)->bif_dlist_head, (__d), \ 224 bd_bif_dlist_entry) 225 #define BPFIF_DLIST_WRITER_REMOVE(__d) \ 226 PSLIST_WRITER_REMOVE((__d), bd_bif_dlist_entry) 227 #define BPFIF_DLIST_ENTRY_INIT(__d) \ 228 PSLIST_ENTRY_INIT((__d), bd_bif_dlist_entry) 229 #define BPFIF_DLIST_READER_EMPTY(__bp) \ 230 (PSLIST_READER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d, \ 231 bd_bif_dlist_entry) == NULL) 232 #define BPFIF_DLIST_WRITER_EMPTY(__bp) \ 233 (PSLIST_WRITER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d, \ 234 bd_bif_dlist_entry) == NULL) 235 #define BPFIF_DLIST_ENTRY_DESTROY(__d) \ 236 PSLIST_ENTRY_DESTROY((__d), bd_bif_dlist_entry) 237 238 static int bpf_allocbufs(struct bpf_d *); 239 static void bpf_deliver(struct bpf_if *, 240 void *(*cpfn)(void *, const void *, size_t), 241 void *, u_int, u_int, const bool); 242 static void bpf_freed(struct bpf_d *); 243 static void bpf_free_filter(struct bpf_filter *); 244 static void bpf_ifname(struct ifnet *, struct ifreq *); 245 static void *bpf_mcpy(void *, const void *, size_t); 246 static int bpf_movein(struct uio *, int, uint64_t, 247 struct mbuf **, struct sockaddr *); 248 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 249 static void bpf_detachd(struct bpf_d *); 250 static int bpf_setif(struct bpf_d *, struct ifreq *); 251 static int bpf_setf(struct bpf_d *, struct bpf_program *); 252 static void bpf_timed_out(void *); 253 static inline void 254 bpf_wakeup(struct bpf_d *); 255 static int bpf_hdrlen(struct bpf_d *); 256 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 257 void *(*)(void *, const void *, size_t), struct timespec *); 258 static void reset_d(struct bpf_d *); 259 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 260 static int bpf_setdlt(struct bpf_d *, u_int); 261 262 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 263 int); 264 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 265 int); 266 static int bpf_ioctl(struct file *, u_long, void *); 267 static int bpf_poll(struct file *, int); 268 static int bpf_stat(struct file *, struct stat *); 269 static int bpf_close(struct file *); 270 static int bpf_kqfilter(struct file *, struct knote *); 271 272 static const struct fileops bpf_fileops = { 273 .fo_name = "bpf", 274 .fo_read = bpf_read, 275 .fo_write = bpf_write, 276 .fo_ioctl = bpf_ioctl, 277 .fo_fcntl = fnullop_fcntl, 278 .fo_poll = bpf_poll, 279 .fo_stat = bpf_stat, 280 .fo_close = bpf_close, 281 .fo_kqfilter = bpf_kqfilter, 282 .fo_restart = fnullop_restart, 283 }; 284 285 dev_type_open(bpfopen); 286 287 const struct cdevsw bpf_cdevsw = { 288 .d_open = bpfopen, 289 .d_close = noclose, 290 .d_read = noread, 291 .d_write = nowrite, 292 .d_ioctl = noioctl, 293 .d_stop = nostop, 294 .d_tty = notty, 295 .d_poll = nopoll, 296 .d_mmap = nommap, 297 .d_kqfilter = nokqfilter, 298 .d_discard = nodiscard, 299 .d_flag = D_OTHER | D_MPSAFE 300 }; 301 302 bpfjit_func_t 303 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size) 304 { 305 306 membar_consumer(); 307 if (bpfjit_module_ops.bj_generate_code != NULL) { 308 return bpfjit_module_ops.bj_generate_code(bc, code, size); 309 } 310 return NULL; 311 } 312 313 void 314 bpf_jit_freecode(bpfjit_func_t jcode) 315 { 316 KASSERT(bpfjit_module_ops.bj_free_code != NULL); 317 bpfjit_module_ops.bj_free_code(jcode); 318 } 319 320 static int 321 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp, 322 struct sockaddr *sockp) 323 { 324 struct mbuf *m; 325 int error; 326 size_t len; 327 size_t hlen; 328 size_t align; 329 330 /* 331 * Build a sockaddr based on the data link layer type. 332 * We do this at this level because the ethernet header 333 * is copied directly into the data field of the sockaddr. 334 * In the case of SLIP, there is no header and the packet 335 * is forwarded as is. 336 * Also, we are careful to leave room at the front of the mbuf 337 * for the link level header. 338 */ 339 switch (linktype) { 340 341 case DLT_SLIP: 342 sockp->sa_family = AF_INET; 343 hlen = 0; 344 align = 0; 345 break; 346 347 case DLT_PPP: 348 sockp->sa_family = AF_UNSPEC; 349 hlen = 0; 350 align = 0; 351 break; 352 353 case DLT_EN10MB: 354 sockp->sa_family = AF_UNSPEC; 355 /* XXX Would MAXLINKHDR be better? */ 356 /* 6(dst)+6(src)+2(type) */ 357 hlen = sizeof(struct ether_header); 358 align = 2; 359 break; 360 361 case DLT_ARCNET: 362 sockp->sa_family = AF_UNSPEC; 363 hlen = ARC_HDRLEN; 364 align = 5; 365 break; 366 367 case DLT_FDDI: 368 sockp->sa_family = AF_LINK; 369 /* XXX 4(FORMAC)+6(dst)+6(src) */ 370 hlen = 16; 371 align = 0; 372 break; 373 374 case DLT_ECONET: 375 sockp->sa_family = AF_UNSPEC; 376 hlen = 6; 377 align = 2; 378 break; 379 380 case DLT_NULL: 381 sockp->sa_family = AF_UNSPEC; 382 hlen = 0; 383 align = 0; 384 break; 385 386 default: 387 return (EIO); 388 } 389 390 len = uio->uio_resid; 391 /* 392 * If there aren't enough bytes for a link level header or the 393 * packet length exceeds the interface mtu, return an error. 394 */ 395 if (len - hlen > mtu) 396 return (EMSGSIZE); 397 398 /* 399 * XXX Avoid complicated buffer chaining --- 400 * bail if it won't fit in a single mbuf. 401 * (Take into account possible alignment bytes) 402 */ 403 if (len + align > MCLBYTES) 404 return (EIO); 405 406 m = m_gethdr(M_WAIT, MT_DATA); 407 m_reset_rcvif(m); 408 m->m_pkthdr.len = (int)(len - hlen); 409 if (len + align > MHLEN) { 410 m_clget(m, M_WAIT); 411 if ((m->m_flags & M_EXT) == 0) { 412 error = ENOBUFS; 413 goto bad; 414 } 415 } 416 417 /* Insure the data is properly aligned */ 418 if (align > 0) { 419 m->m_data += align; 420 m->m_len -= (int)align; 421 } 422 423 error = uiomove(mtod(m, void *), len, uio); 424 if (error) 425 goto bad; 426 if (hlen != 0) { 427 memcpy(sockp->sa_data, mtod(m, void *), hlen); 428 m->m_data += hlen; /* XXX */ 429 len -= hlen; 430 } 431 m->m_len = (int)len; 432 *mp = m; 433 return (0); 434 435 bad: 436 m_freem(m); 437 return (error); 438 } 439 440 /* 441 * Attach file to the bpf interface, i.e. make d listen on bp. 442 */ 443 static void 444 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 445 { 446 447 KASSERT(mutex_owned(&bpf_mtx)); 448 KASSERT(mutex_owned(d->bd_mtx)); 449 /* 450 * Point d at bp, and add d to the interface's list of listeners. 451 * Finally, point the driver's bpf cookie at the interface so 452 * it will divert packets to bpf. 453 */ 454 d->bd_bif = bp; 455 BPFIF_DLIST_WRITER_INSERT_HEAD(bp, d); 456 457 *bp->bif_driverp = bp; 458 } 459 460 /* 461 * Detach a file from its interface. 462 */ 463 static void 464 bpf_detachd(struct bpf_d *d) 465 { 466 struct bpf_if *bp; 467 468 KASSERT(mutex_owned(&bpf_mtx)); 469 KASSERT(mutex_owned(d->bd_mtx)); 470 471 bp = d->bd_bif; 472 /* 473 * Check if this descriptor had requested promiscuous mode. 474 * If so, turn it off. 475 */ 476 if (d->bd_promisc) { 477 int error __diagused; 478 479 d->bd_promisc = 0; 480 /* 481 * Take device out of promiscuous mode. Since we were 482 * able to enter promiscuous mode, we should be able 483 * to turn it off. But we can get an error if 484 * the interface was configured down, so only panic 485 * if we don't get an unexpected error. 486 */ 487 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 488 error = ifpromisc(bp->bif_ifp, 0); 489 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 490 #ifdef DIAGNOSTIC 491 if (error) 492 printf("%s: ifpromisc failed: %d", __func__, error); 493 #endif 494 } 495 496 /* Remove d from the interface's descriptor list. */ 497 BPFIF_DLIST_WRITER_REMOVE(d); 498 499 pserialize_perform(bpf_psz); 500 501 if (BPFIF_DLIST_WRITER_EMPTY(bp)) { 502 /* 503 * Let the driver know that there are no more listeners. 504 */ 505 *d->bd_bif->bif_driverp = NULL; 506 } 507 d->bd_bif = NULL; 508 } 509 510 static void 511 bpf_init(void) 512 { 513 514 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 515 bpf_psz = pserialize_create(); 516 bpf_psref_class = psref_class_create("bpf", IPL_SOFTNET); 517 518 PSLIST_INIT(&bpf_iflist); 519 PSLIST_INIT(&bpf_dlist); 520 521 bpf_gstats_percpu = percpu_alloc(sizeof(struct bpf_stat)); 522 523 return; 524 } 525 526 /* 527 * bpfilterattach() is called at boot time. We don't need to do anything 528 * here, since any initialization will happen as part of module init code. 529 */ 530 /* ARGSUSED */ 531 void 532 bpfilterattach(int n) 533 { 534 535 } 536 537 /* 538 * Open ethernet device. Clones. 539 */ 540 /* ARGSUSED */ 541 int 542 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 543 { 544 struct bpf_d *d; 545 struct file *fp; 546 int error, fd; 547 548 /* falloc() will fill in the descriptor for us. */ 549 if ((error = fd_allocfile(&fp, &fd)) != 0) 550 return error; 551 552 d = kmem_zalloc(sizeof(*d), KM_SLEEP); 553 d->bd_bufsize = bpf_bufsize; 554 d->bd_seesent = 1; 555 d->bd_feedback = 0; 556 d->bd_pid = l->l_proc->p_pid; 557 #ifdef _LP64 558 if (curproc->p_flag & PK_32) 559 d->bd_compat32 = 1; 560 #endif 561 getnanotime(&d->bd_btime); 562 d->bd_atime = d->bd_mtime = d->bd_btime; 563 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 564 selinit(&d->bd_sel); 565 d->bd_jitcode = NULL; 566 d->bd_filter = NULL; 567 BPF_DLIST_ENTRY_INIT(d); 568 BPFIF_DLIST_ENTRY_INIT(d); 569 d->bd_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET); 570 d->bd_buf_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET); 571 cv_init(&d->bd_cv, "bpf"); 572 573 mutex_enter(&bpf_mtx); 574 BPF_DLIST_WRITER_INSERT_HEAD(d); 575 mutex_exit(&bpf_mtx); 576 577 return fd_clone(fp, fd, flag, &bpf_fileops, d); 578 } 579 580 /* 581 * Close the descriptor by detaching it from its interface, 582 * deallocating its buffers, and marking it free. 583 */ 584 /* ARGSUSED */ 585 static int 586 bpf_close(struct file *fp) 587 { 588 struct bpf_d *d; 589 590 mutex_enter(&bpf_mtx); 591 592 if ((d = fp->f_bpf) == NULL) { 593 mutex_exit(&bpf_mtx); 594 return 0; 595 } 596 597 /* 598 * Refresh the PID associated with this bpf file. 599 */ 600 d->bd_pid = curproc->p_pid; 601 602 mutex_enter(d->bd_mtx); 603 if (d->bd_state == BPF_WAITING) 604 callout_halt(&d->bd_callout, d->bd_mtx); 605 d->bd_state = BPF_IDLE; 606 if (d->bd_bif) 607 bpf_detachd(d); 608 mutex_exit(d->bd_mtx); 609 610 BPF_DLIST_WRITER_REMOVE(d); 611 612 pserialize_perform(bpf_psz); 613 mutex_exit(&bpf_mtx); 614 615 BPFIF_DLIST_ENTRY_DESTROY(d); 616 BPF_DLIST_ENTRY_DESTROY(d); 617 fp->f_bpf = NULL; 618 bpf_freed(d); 619 callout_destroy(&d->bd_callout); 620 seldestroy(&d->bd_sel); 621 mutex_obj_free(d->bd_mtx); 622 mutex_obj_free(d->bd_buf_mtx); 623 cv_destroy(&d->bd_cv); 624 625 kmem_free(d, sizeof(*d)); 626 627 return (0); 628 } 629 630 /* 631 * Rotate the packet buffers in descriptor d. Move the store buffer 632 * into the hold slot, and the free buffer into the store slot. 633 * Zero the length of the new store buffer. 634 */ 635 #define ROTATE_BUFFERS(d) \ 636 (d)->bd_hbuf = (d)->bd_sbuf; \ 637 (d)->bd_hlen = (d)->bd_slen; \ 638 (d)->bd_sbuf = (d)->bd_fbuf; \ 639 (d)->bd_slen = 0; \ 640 (d)->bd_fbuf = NULL; 641 /* 642 * bpfread - read next chunk of packets from buffers 643 */ 644 static int 645 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 646 kauth_cred_t cred, int flags) 647 { 648 struct bpf_d *d = fp->f_bpf; 649 int timed_out; 650 int error; 651 652 getnanotime(&d->bd_atime); 653 /* 654 * Restrict application to use a buffer the same size as 655 * the kernel buffers. 656 */ 657 if (uio->uio_resid != d->bd_bufsize) 658 return (EINVAL); 659 660 mutex_enter(d->bd_mtx); 661 if (d->bd_state == BPF_WAITING) 662 callout_halt(&d->bd_callout, d->bd_mtx); 663 timed_out = (d->bd_state == BPF_TIMED_OUT); 664 d->bd_state = BPF_IDLE; 665 mutex_exit(d->bd_mtx); 666 /* 667 * If the hold buffer is empty, then do a timed sleep, which 668 * ends when the timeout expires or when enough packets 669 * have arrived to fill the store buffer. 670 */ 671 mutex_enter(d->bd_buf_mtx); 672 while (d->bd_hbuf == NULL) { 673 if (fp->f_flag & FNONBLOCK) { 674 if (d->bd_slen == 0) { 675 error = EWOULDBLOCK; 676 goto out; 677 } 678 ROTATE_BUFFERS(d); 679 break; 680 } 681 682 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 683 /* 684 * A packet(s) either arrived since the previous 685 * read or arrived while we were asleep. 686 * Rotate the buffers and return what's here. 687 */ 688 ROTATE_BUFFERS(d); 689 break; 690 } 691 692 error = cv_timedwait_sig(&d->bd_cv, d->bd_buf_mtx, d->bd_rtout); 693 694 if (error == EINTR || error == ERESTART) 695 goto out; 696 697 if (error == EWOULDBLOCK) { 698 /* 699 * On a timeout, return what's in the buffer, 700 * which may be nothing. If there is something 701 * in the store buffer, we can rotate the buffers. 702 */ 703 if (d->bd_hbuf) 704 /* 705 * We filled up the buffer in between 706 * getting the timeout and arriving 707 * here, so we don't need to rotate. 708 */ 709 break; 710 711 if (d->bd_slen == 0) { 712 error = 0; 713 goto out; 714 } 715 ROTATE_BUFFERS(d); 716 break; 717 } 718 if (error != 0) 719 goto out; 720 } 721 /* 722 * At this point, we know we have something in the hold slot. 723 */ 724 mutex_exit(d->bd_buf_mtx); 725 726 /* 727 * Move data from hold buffer into user space. 728 * We know the entire buffer is transferred since 729 * we checked above that the read buffer is bpf_bufsize bytes. 730 */ 731 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 732 733 mutex_enter(d->bd_buf_mtx); 734 d->bd_fbuf = d->bd_hbuf; 735 d->bd_hbuf = NULL; 736 d->bd_hlen = 0; 737 out: 738 mutex_exit(d->bd_buf_mtx); 739 return (error); 740 } 741 742 743 /* 744 * If there are processes sleeping on this descriptor, wake them up. 745 */ 746 static inline void 747 bpf_wakeup(struct bpf_d *d) 748 { 749 750 mutex_enter(d->bd_buf_mtx); 751 cv_broadcast(&d->bd_cv); 752 mutex_exit(d->bd_buf_mtx); 753 754 if (d->bd_async) 755 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 756 selnotify(&d->bd_sel, 0, 0); 757 } 758 759 static void 760 bpf_timed_out(void *arg) 761 { 762 struct bpf_d *d = arg; 763 764 mutex_enter(d->bd_mtx); 765 if (d->bd_state == BPF_WAITING) { 766 d->bd_state = BPF_TIMED_OUT; 767 if (d->bd_slen != 0) 768 bpf_wakeup(d); 769 } 770 mutex_exit(d->bd_mtx); 771 } 772 773 774 static int 775 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 776 kauth_cred_t cred, int flags) 777 { 778 struct bpf_d *d = fp->f_bpf; 779 struct bpf_if *bp; 780 struct ifnet *ifp; 781 struct mbuf *m, *mc; 782 int error; 783 static struct sockaddr_storage dst; 784 struct psref psref; 785 int bound; 786 787 m = NULL; /* XXX gcc */ 788 789 bound = curlwp_bind(); 790 mutex_enter(d->bd_mtx); 791 bp = d->bd_bif; 792 if (bp == NULL) { 793 mutex_exit(d->bd_mtx); 794 error = ENXIO; 795 goto out_bindx; 796 } 797 bpf_if_acquire(bp, &psref); 798 mutex_exit(d->bd_mtx); 799 800 getnanotime(&d->bd_mtime); 801 802 ifp = bp->bif_ifp; 803 if (if_is_deactivated(ifp)) { 804 error = ENXIO; 805 goto out; 806 } 807 808 if (uio->uio_resid == 0) { 809 error = 0; 810 goto out; 811 } 812 813 error = bpf_movein(uio, (int)bp->bif_dlt, ifp->if_mtu, &m, 814 (struct sockaddr *) &dst); 815 if (error) 816 goto out; 817 818 if (m->m_pkthdr.len > ifp->if_mtu) { 819 m_freem(m); 820 error = EMSGSIZE; 821 goto out; 822 } 823 824 if (d->bd_hdrcmplt) 825 dst.ss_family = pseudo_AF_HDRCMPLT; 826 827 if (d->bd_feedback) { 828 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT); 829 if (mc != NULL) 830 m_set_rcvif(mc, ifp); 831 /* Set M_PROMISC for outgoing packets to be discarded. */ 832 if (1 /*d->bd_direction == BPF_D_INOUT*/) 833 m->m_flags |= M_PROMISC; 834 } else 835 mc = NULL; 836 837 error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL); 838 839 if (mc != NULL) { 840 if (error == 0) 841 ifp->_if_input(ifp, mc); 842 else 843 m_freem(mc); 844 } 845 /* 846 * The driver frees the mbuf. 847 */ 848 out: 849 bpf_if_release(bp, &psref); 850 out_bindx: 851 curlwp_bindx(bound); 852 return error; 853 } 854 855 /* 856 * Reset a descriptor by flushing its packet buffer and clearing the 857 * receive and drop counts. 858 */ 859 static void 860 reset_d(struct bpf_d *d) 861 { 862 863 KASSERT(mutex_owned(d->bd_mtx)); 864 865 mutex_enter(d->bd_buf_mtx); 866 if (d->bd_hbuf) { 867 /* Free the hold buffer. */ 868 d->bd_fbuf = d->bd_hbuf; 869 d->bd_hbuf = NULL; 870 } 871 d->bd_slen = 0; 872 d->bd_hlen = 0; 873 d->bd_rcount = 0; 874 d->bd_dcount = 0; 875 d->bd_ccount = 0; 876 mutex_exit(d->bd_buf_mtx); 877 } 878 879 /* 880 * FIONREAD Check for read packet available. 881 * BIOCGBLEN Get buffer len [for read()]. 882 * BIOCSETF Set ethernet read filter. 883 * BIOCFLUSH Flush read packet buffer. 884 * BIOCPROMISC Put interface into promiscuous mode. 885 * BIOCGDLT Get link layer type. 886 * BIOCGETIF Get interface name. 887 * BIOCSETIF Set interface. 888 * BIOCSRTIMEOUT Set read timeout. 889 * BIOCGRTIMEOUT Get read timeout. 890 * BIOCGSTATS Get packet stats. 891 * BIOCIMMEDIATE Set immediate mode. 892 * BIOCVERSION Get filter language version. 893 * BIOCGHDRCMPLT Get "header already complete" flag. 894 * BIOCSHDRCMPLT Set "header already complete" flag. 895 * BIOCSFEEDBACK Set packet feedback mode. 896 * BIOCGFEEDBACK Get packet feedback mode. 897 * BIOCGSEESENT Get "see sent packets" mode. 898 * BIOCSSEESENT Set "see sent packets" mode. 899 */ 900 /* ARGSUSED */ 901 static int 902 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 903 { 904 struct bpf_d *d = fp->f_bpf; 905 int error = 0; 906 907 /* 908 * Refresh the PID associated with this bpf file. 909 */ 910 d->bd_pid = curproc->p_pid; 911 #ifdef _LP64 912 if (curproc->p_flag & PK_32) 913 d->bd_compat32 = 1; 914 else 915 d->bd_compat32 = 0; 916 #endif 917 918 mutex_enter(d->bd_mtx); 919 if (d->bd_state == BPF_WAITING) 920 callout_halt(&d->bd_callout, d->bd_mtx); 921 d->bd_state = BPF_IDLE; 922 mutex_exit(d->bd_mtx); 923 924 switch (cmd) { 925 926 default: 927 error = EINVAL; 928 break; 929 930 /* 931 * Check for read packet available. 932 */ 933 case FIONREAD: 934 { 935 int n; 936 937 mutex_enter(d->bd_buf_mtx); 938 n = d->bd_slen; 939 if (d->bd_hbuf) 940 n += d->bd_hlen; 941 mutex_exit(d->bd_buf_mtx); 942 943 *(int *)addr = n; 944 break; 945 } 946 947 /* 948 * Get buffer len [for read()]. 949 */ 950 case BIOCGBLEN: 951 *(u_int *)addr = d->bd_bufsize; 952 break; 953 954 /* 955 * Set buffer length. 956 */ 957 case BIOCSBLEN: 958 /* 959 * Forbid to change the buffer length if buffers are already 960 * allocated. 961 */ 962 mutex_enter(d->bd_mtx); 963 mutex_enter(d->bd_buf_mtx); 964 if (d->bd_bif != NULL || d->bd_sbuf != NULL) 965 error = EINVAL; 966 else { 967 u_int size = *(u_int *)addr; 968 969 if (size > bpf_maxbufsize) 970 *(u_int *)addr = size = bpf_maxbufsize; 971 else if (size < BPF_MINBUFSIZE) 972 *(u_int *)addr = size = BPF_MINBUFSIZE; 973 d->bd_bufsize = size; 974 } 975 mutex_exit(d->bd_buf_mtx); 976 mutex_exit(d->bd_mtx); 977 break; 978 979 /* 980 * Set link layer read filter. 981 */ 982 case BIOCSETF: 983 error = bpf_setf(d, addr); 984 break; 985 986 /* 987 * Flush read packet buffer. 988 */ 989 case BIOCFLUSH: 990 mutex_enter(d->bd_mtx); 991 reset_d(d); 992 mutex_exit(d->bd_mtx); 993 break; 994 995 /* 996 * Put interface into promiscuous mode. 997 */ 998 case BIOCPROMISC: 999 mutex_enter(d->bd_mtx); 1000 if (d->bd_bif == NULL) { 1001 mutex_exit(d->bd_mtx); 1002 /* 1003 * No interface attached yet. 1004 */ 1005 error = EINVAL; 1006 break; 1007 } 1008 if (d->bd_promisc == 0) { 1009 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 1010 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1011 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 1012 if (error == 0) 1013 d->bd_promisc = 1; 1014 } 1015 mutex_exit(d->bd_mtx); 1016 break; 1017 1018 /* 1019 * Get device parameters. 1020 */ 1021 case BIOCGDLT: 1022 mutex_enter(d->bd_mtx); 1023 if (d->bd_bif == NULL) 1024 error = EINVAL; 1025 else 1026 *(u_int *)addr = d->bd_bif->bif_dlt; 1027 mutex_exit(d->bd_mtx); 1028 break; 1029 1030 /* 1031 * Get a list of supported device parameters. 1032 */ 1033 case BIOCGDLTLIST: 1034 mutex_enter(d->bd_mtx); 1035 if (d->bd_bif == NULL) 1036 error = EINVAL; 1037 else 1038 error = bpf_getdltlist(d, addr); 1039 mutex_exit(d->bd_mtx); 1040 break; 1041 1042 /* 1043 * Set device parameters. 1044 */ 1045 case BIOCSDLT: 1046 mutex_enter(&bpf_mtx); 1047 mutex_enter(d->bd_mtx); 1048 if (d->bd_bif == NULL) 1049 error = EINVAL; 1050 else 1051 error = bpf_setdlt(d, *(u_int *)addr); 1052 mutex_exit(d->bd_mtx); 1053 mutex_exit(&bpf_mtx); 1054 break; 1055 1056 /* 1057 * Set interface name. 1058 */ 1059 #ifdef OBIOCGETIF 1060 case OBIOCGETIF: 1061 #endif 1062 case BIOCGETIF: 1063 mutex_enter(d->bd_mtx); 1064 if (d->bd_bif == NULL) 1065 error = EINVAL; 1066 else 1067 bpf_ifname(d->bd_bif->bif_ifp, addr); 1068 mutex_exit(d->bd_mtx); 1069 break; 1070 1071 /* 1072 * Set interface. 1073 */ 1074 #ifdef OBIOCSETIF 1075 case OBIOCSETIF: 1076 #endif 1077 case BIOCSETIF: 1078 mutex_enter(&bpf_mtx); 1079 error = bpf_setif(d, addr); 1080 mutex_exit(&bpf_mtx); 1081 break; 1082 1083 /* 1084 * Set read timeout. 1085 */ 1086 case BIOCSRTIMEOUT: 1087 { 1088 struct timeval *tv = addr; 1089 1090 /* Compute number of ticks. */ 1091 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 1092 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 1093 d->bd_rtout = 1; 1094 break; 1095 } 1096 1097 #ifdef BIOCGORTIMEOUT 1098 /* 1099 * Get read timeout. 1100 */ 1101 case BIOCGORTIMEOUT: 1102 { 1103 struct timeval50 *tv = addr; 1104 1105 tv->tv_sec = d->bd_rtout / hz; 1106 tv->tv_usec = (d->bd_rtout % hz) * tick; 1107 break; 1108 } 1109 #endif 1110 1111 #ifdef BIOCSORTIMEOUT 1112 /* 1113 * Set read timeout. 1114 */ 1115 case BIOCSORTIMEOUT: 1116 { 1117 struct timeval50 *tv = addr; 1118 1119 /* Compute number of ticks. */ 1120 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 1121 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 1122 d->bd_rtout = 1; 1123 break; 1124 } 1125 #endif 1126 1127 /* 1128 * Get read timeout. 1129 */ 1130 case BIOCGRTIMEOUT: 1131 { 1132 struct timeval *tv = addr; 1133 1134 tv->tv_sec = d->bd_rtout / hz; 1135 tv->tv_usec = (d->bd_rtout % hz) * tick; 1136 break; 1137 } 1138 /* 1139 * Get packet stats. 1140 */ 1141 case BIOCGSTATS: 1142 { 1143 struct bpf_stat *bs = addr; 1144 1145 bs->bs_recv = d->bd_rcount; 1146 bs->bs_drop = d->bd_dcount; 1147 bs->bs_capt = d->bd_ccount; 1148 break; 1149 } 1150 1151 case BIOCGSTATSOLD: 1152 { 1153 struct bpf_stat_old *bs = addr; 1154 1155 bs->bs_recv = d->bd_rcount; 1156 bs->bs_drop = d->bd_dcount; 1157 break; 1158 } 1159 1160 /* 1161 * Set immediate mode. 1162 */ 1163 case BIOCIMMEDIATE: 1164 d->bd_immediate = *(u_int *)addr; 1165 break; 1166 1167 case BIOCVERSION: 1168 { 1169 struct bpf_version *bv = addr; 1170 1171 bv->bv_major = BPF_MAJOR_VERSION; 1172 bv->bv_minor = BPF_MINOR_VERSION; 1173 break; 1174 } 1175 1176 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1177 *(u_int *)addr = d->bd_hdrcmplt; 1178 break; 1179 1180 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1181 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1182 break; 1183 1184 /* 1185 * Get "see sent packets" flag 1186 */ 1187 case BIOCGSEESENT: 1188 *(u_int *)addr = d->bd_seesent; 1189 break; 1190 1191 /* 1192 * Set "see sent" packets flag 1193 */ 1194 case BIOCSSEESENT: 1195 d->bd_seesent = *(u_int *)addr; 1196 break; 1197 1198 /* 1199 * Set "feed packets from bpf back to input" mode 1200 */ 1201 case BIOCSFEEDBACK: 1202 d->bd_feedback = *(u_int *)addr; 1203 break; 1204 1205 /* 1206 * Get "feed packets from bpf back to input" mode 1207 */ 1208 case BIOCGFEEDBACK: 1209 *(u_int *)addr = d->bd_feedback; 1210 break; 1211 1212 case FIONBIO: /* Non-blocking I/O */ 1213 /* 1214 * No need to do anything special as we use IO_NDELAY in 1215 * bpfread() as an indication of whether or not to block 1216 * the read. 1217 */ 1218 break; 1219 1220 case FIOASYNC: /* Send signal on receive packets */ 1221 mutex_enter(d->bd_mtx); 1222 d->bd_async = *(int *)addr; 1223 mutex_exit(d->bd_mtx); 1224 break; 1225 1226 case TIOCSPGRP: /* Process or group to send signals to */ 1227 case FIOSETOWN: 1228 error = fsetown(&d->bd_pgid, cmd, addr); 1229 break; 1230 1231 case TIOCGPGRP: 1232 case FIOGETOWN: 1233 error = fgetown(d->bd_pgid, cmd, addr); 1234 break; 1235 } 1236 return (error); 1237 } 1238 1239 /* 1240 * Set d's packet filter program to fp. If this file already has a filter, 1241 * free it and replace it. Returns EINVAL for bogus requests. 1242 */ 1243 static int 1244 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1245 { 1246 struct bpf_insn *fcode; 1247 bpfjit_func_t jcode; 1248 size_t flen, size = 0; 1249 struct bpf_filter *oldf, *newf; 1250 1251 jcode = NULL; 1252 flen = fp->bf_len; 1253 1254 if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) { 1255 return EINVAL; 1256 } 1257 1258 if (flen) { 1259 /* 1260 * Allocate the buffer, copy the byte-code from 1261 * userspace and validate it. 1262 */ 1263 size = flen * sizeof(*fp->bf_insns); 1264 fcode = kmem_alloc(size, KM_SLEEP); 1265 if (copyin(fp->bf_insns, fcode, size) != 0 || 1266 !bpf_validate(fcode, (int)flen)) { 1267 kmem_free(fcode, size); 1268 return EINVAL; 1269 } 1270 membar_consumer(); 1271 if (bpf_jit) 1272 jcode = bpf_jit_generate(NULL, fcode, flen); 1273 } else { 1274 fcode = NULL; 1275 } 1276 1277 newf = kmem_alloc(sizeof(*newf), KM_SLEEP); 1278 newf->bf_insn = fcode; 1279 newf->bf_size = size; 1280 newf->bf_jitcode = jcode; 1281 d->bd_jitcode = jcode; /* XXX just for kvm(3) users */ 1282 1283 /* Need to hold bpf_mtx for pserialize_perform */ 1284 mutex_enter(&bpf_mtx); 1285 mutex_enter(d->bd_mtx); 1286 oldf = d->bd_filter; 1287 d->bd_filter = newf; 1288 membar_producer(); 1289 reset_d(d); 1290 pserialize_perform(bpf_psz); 1291 mutex_exit(d->bd_mtx); 1292 mutex_exit(&bpf_mtx); 1293 1294 if (oldf != NULL) 1295 bpf_free_filter(oldf); 1296 1297 return 0; 1298 } 1299 1300 /* 1301 * Detach a file from its current interface (if attached at all) and attach 1302 * to the interface indicated by the name stored in ifr. 1303 * Return an errno or 0. 1304 */ 1305 static int 1306 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1307 { 1308 struct bpf_if *bp; 1309 char *cp; 1310 int unit_seen, i, error; 1311 1312 KASSERT(mutex_owned(&bpf_mtx)); 1313 /* 1314 * Make sure the provided name has a unit number, and default 1315 * it to '0' if not specified. 1316 * XXX This is ugly ... do this differently? 1317 */ 1318 unit_seen = 0; 1319 cp = ifr->ifr_name; 1320 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1321 while (*cp++) 1322 if (*cp >= '0' && *cp <= '9') 1323 unit_seen = 1; 1324 if (!unit_seen) { 1325 /* Make sure to leave room for the '\0'. */ 1326 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1327 if ((ifr->ifr_name[i] >= 'a' && 1328 ifr->ifr_name[i] <= 'z') || 1329 (ifr->ifr_name[i] >= 'A' && 1330 ifr->ifr_name[i] <= 'Z')) 1331 continue; 1332 ifr->ifr_name[i] = '0'; 1333 } 1334 } 1335 1336 /* 1337 * Look through attached interfaces for the named one. 1338 */ 1339 BPF_IFLIST_WRITER_FOREACH(bp) { 1340 struct ifnet *ifp = bp->bif_ifp; 1341 1342 if (ifp == NULL || 1343 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1344 continue; 1345 /* skip additional entry */ 1346 if (bp->bif_driverp != &ifp->if_bpf) 1347 continue; 1348 /* 1349 * We found the requested interface. 1350 * Allocate the packet buffers if we need to. 1351 * If we're already attached to requested interface, 1352 * just flush the buffer. 1353 */ 1354 /* 1355 * bpf_allocbufs is called only here. bpf_mtx ensures that 1356 * no race condition happen on d->bd_sbuf. 1357 */ 1358 if (d->bd_sbuf == NULL) { 1359 error = bpf_allocbufs(d); 1360 if (error != 0) 1361 return (error); 1362 } 1363 mutex_enter(d->bd_mtx); 1364 if (bp != d->bd_bif) { 1365 if (d->bd_bif) { 1366 /* 1367 * Detach if attached to something else. 1368 */ 1369 bpf_detachd(d); 1370 BPFIF_DLIST_ENTRY_INIT(d); 1371 } 1372 1373 bpf_attachd(d, bp); 1374 } 1375 reset_d(d); 1376 mutex_exit(d->bd_mtx); 1377 return (0); 1378 } 1379 /* Not found. */ 1380 return (ENXIO); 1381 } 1382 1383 /* 1384 * Copy the interface name to the ifreq. 1385 */ 1386 static void 1387 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1388 { 1389 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1390 } 1391 1392 static int 1393 bpf_stat(struct file *fp, struct stat *st) 1394 { 1395 struct bpf_d *d = fp->f_bpf; 1396 1397 (void)memset(st, 0, sizeof(*st)); 1398 mutex_enter(d->bd_mtx); 1399 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1400 st->st_atimespec = d->bd_atime; 1401 st->st_mtimespec = d->bd_mtime; 1402 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1403 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1404 st->st_gid = kauth_cred_getegid(fp->f_cred); 1405 st->st_mode = S_IFCHR; 1406 mutex_exit(d->bd_mtx); 1407 return 0; 1408 } 1409 1410 /* 1411 * Support for poll() system call 1412 * 1413 * Return true iff the specific operation will not block indefinitely - with 1414 * the assumption that it is safe to positively acknowledge a request for the 1415 * ability to write to the BPF device. 1416 * Otherwise, return false but make a note that a selnotify() must be done. 1417 */ 1418 static int 1419 bpf_poll(struct file *fp, int events) 1420 { 1421 struct bpf_d *d = fp->f_bpf; 1422 int revents; 1423 1424 /* 1425 * Refresh the PID associated with this bpf file. 1426 */ 1427 mutex_enter(&bpf_mtx); 1428 d->bd_pid = curproc->p_pid; 1429 1430 revents = events & (POLLOUT | POLLWRNORM); 1431 if (events & (POLLIN | POLLRDNORM)) { 1432 /* 1433 * An imitation of the FIONREAD ioctl code. 1434 */ 1435 mutex_enter(d->bd_mtx); 1436 if (d->bd_hlen != 0 || 1437 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1438 d->bd_slen != 0)) { 1439 revents |= events & (POLLIN | POLLRDNORM); 1440 } else { 1441 selrecord(curlwp, &d->bd_sel); 1442 /* Start the read timeout if necessary */ 1443 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1444 callout_reset(&d->bd_callout, d->bd_rtout, 1445 bpf_timed_out, d); 1446 d->bd_state = BPF_WAITING; 1447 } 1448 } 1449 mutex_exit(d->bd_mtx); 1450 } 1451 1452 mutex_exit(&bpf_mtx); 1453 return (revents); 1454 } 1455 1456 static void 1457 filt_bpfrdetach(struct knote *kn) 1458 { 1459 struct bpf_d *d = kn->kn_hook; 1460 1461 mutex_enter(d->bd_buf_mtx); 1462 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1463 mutex_exit(d->bd_buf_mtx); 1464 } 1465 1466 static int 1467 filt_bpfread(struct knote *kn, long hint) 1468 { 1469 struct bpf_d *d = kn->kn_hook; 1470 int rv; 1471 1472 mutex_enter(d->bd_buf_mtx); 1473 kn->kn_data = d->bd_hlen; 1474 if (d->bd_immediate) 1475 kn->kn_data += d->bd_slen; 1476 rv = (kn->kn_data > 0); 1477 mutex_exit(d->bd_buf_mtx); 1478 return rv; 1479 } 1480 1481 static const struct filterops bpfread_filtops = { 1482 .f_isfd = 1, 1483 .f_attach = NULL, 1484 .f_detach = filt_bpfrdetach, 1485 .f_event = filt_bpfread, 1486 }; 1487 1488 static int 1489 bpf_kqfilter(struct file *fp, struct knote *kn) 1490 { 1491 struct bpf_d *d = fp->f_bpf; 1492 struct klist *klist; 1493 1494 mutex_enter(d->bd_buf_mtx); 1495 switch (kn->kn_filter) { 1496 case EVFILT_READ: 1497 klist = &d->bd_sel.sel_klist; 1498 kn->kn_fop = &bpfread_filtops; 1499 break; 1500 1501 default: 1502 mutex_exit(d->bd_buf_mtx); 1503 return (EINVAL); 1504 } 1505 1506 kn->kn_hook = d; 1507 1508 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1509 mutex_exit(d->bd_buf_mtx); 1510 1511 return (0); 1512 } 1513 1514 /* 1515 * Copy data from an mbuf chain into a buffer. This code is derived 1516 * from m_copydata in sys/uipc_mbuf.c. 1517 */ 1518 static void * 1519 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1520 { 1521 const struct mbuf *m; 1522 u_int count; 1523 u_char *dst; 1524 1525 m = src_arg; 1526 dst = dst_arg; 1527 while (len > 0) { 1528 if (m == NULL) 1529 panic("bpf_mcpy"); 1530 count = min(m->m_len, len); 1531 memcpy(dst, mtod(m, const void *), count); 1532 m = m->m_next; 1533 dst += count; 1534 len -= count; 1535 } 1536 return dst_arg; 1537 } 1538 1539 /* 1540 * Dispatch a packet to all the listeners on interface bp. 1541 * 1542 * pkt pointer to the packet, either a data buffer or an mbuf chain 1543 * buflen buffer length, if pkt is a data buffer 1544 * cpfn a function that can copy pkt into the listener's buffer 1545 * pktlen length of the packet 1546 * rcv true if packet came in 1547 */ 1548 static inline void 1549 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1550 void *pkt, u_int pktlen, u_int buflen, const bool rcv) 1551 { 1552 uint32_t mem[BPF_MEMWORDS]; 1553 bpf_args_t args = { 1554 .pkt = (const uint8_t *)pkt, 1555 .wirelen = pktlen, 1556 .buflen = buflen, 1557 .mem = mem, 1558 .arg = NULL 1559 }; 1560 bool gottime = false; 1561 struct timespec ts; 1562 struct bpf_d *d; 1563 int s; 1564 1565 KASSERT(!cpu_intr_p()); 1566 1567 /* 1568 * Note that the IPL does not have to be raised at this point. 1569 * The only problem that could arise here is that if two different 1570 * interfaces shared any data. This is not the case. 1571 */ 1572 s = pserialize_read_enter(); 1573 BPFIF_DLIST_READER_FOREACH(d, bp) { 1574 u_int slen = 0; 1575 struct bpf_filter *filter; 1576 1577 if (!d->bd_seesent && !rcv) { 1578 continue; 1579 } 1580 atomic_inc_ulong(&d->bd_rcount); 1581 BPF_STATINC(recv); 1582 1583 filter = d->bd_filter; 1584 membar_datadep_consumer(); 1585 if (filter != NULL) { 1586 if (filter->bf_jitcode != NULL) 1587 slen = filter->bf_jitcode(NULL, &args); 1588 else 1589 slen = bpf_filter_ext(NULL, filter->bf_insn, 1590 &args); 1591 } 1592 1593 if (!slen) { 1594 continue; 1595 } 1596 if (!gottime) { 1597 gottime = true; 1598 nanotime(&ts); 1599 } 1600 /* Assume catchpacket doesn't sleep */ 1601 catchpacket(d, pkt, pktlen, slen, cpfn, &ts); 1602 } 1603 pserialize_read_exit(s); 1604 } 1605 1606 /* 1607 * Incoming linkage from device drivers. Process the packet pkt, of length 1608 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1609 * by each process' filter, and if accepted, stashed into the corresponding 1610 * buffer. 1611 */ 1612 static void 1613 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1614 { 1615 1616 bpf_deliver(bp, memcpy, pkt, pktlen, pktlen, true); 1617 } 1618 1619 /* 1620 * Incoming linkage from device drivers, when the head of the packet is in 1621 * a buffer, and the tail is in an mbuf chain. 1622 */ 1623 static void 1624 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1625 { 1626 u_int pktlen; 1627 struct mbuf mb; 1628 1629 /* Skip outgoing duplicate packets. */ 1630 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) { 1631 m->m_flags &= ~M_PROMISC; 1632 return; 1633 } 1634 1635 pktlen = m_length(m) + dlen; 1636 1637 /* 1638 * Craft on-stack mbuf suitable for passing to bpf_filter. 1639 * Note that we cut corners here; we only setup what's 1640 * absolutely needed--this mbuf should never go anywhere else. 1641 */ 1642 (void)memset(&mb, 0, sizeof(mb)); 1643 mb.m_next = m; 1644 mb.m_data = data; 1645 mb.m_len = dlen; 1646 1647 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif_index != 0); 1648 } 1649 1650 /* 1651 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1652 */ 1653 static void 1654 _bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1655 { 1656 void *(*cpfn)(void *, const void *, size_t); 1657 u_int pktlen, buflen; 1658 void *marg; 1659 1660 /* Skip outgoing duplicate packets. */ 1661 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) { 1662 m->m_flags &= ~M_PROMISC; 1663 return; 1664 } 1665 1666 pktlen = m_length(m); 1667 1668 if (pktlen == m->m_len) { 1669 cpfn = (void *)memcpy; 1670 marg = mtod(m, void *); 1671 buflen = pktlen; 1672 } else { 1673 cpfn = bpf_mcpy; 1674 marg = m; 1675 buflen = 0; 1676 } 1677 1678 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif_index != 0); 1679 } 1680 1681 /* 1682 * We need to prepend the address family as 1683 * a four byte field. Cons up a dummy header 1684 * to pacify bpf. This is safe because bpf 1685 * will only read from the mbuf (i.e., it won't 1686 * try to free it or keep a pointer a to it). 1687 */ 1688 static void 1689 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1690 { 1691 struct mbuf m0; 1692 1693 m0.m_flags = 0; 1694 m0.m_next = m; 1695 m0.m_len = 4; 1696 m0.m_data = (char *)⁡ 1697 1698 _bpf_mtap(bp, &m0); 1699 } 1700 1701 /* 1702 * Put the SLIP pseudo-"link header" in place. 1703 * Note this M_PREPEND() should never fail, 1704 * swince we know we always have enough space 1705 * in the input buffer. 1706 */ 1707 static void 1708 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1709 { 1710 u_char *hp; 1711 1712 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1713 if (*m == NULL) 1714 return; 1715 1716 hp = mtod(*m, u_char *); 1717 hp[SLX_DIR] = SLIPDIR_IN; 1718 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1719 1720 _bpf_mtap(bp, *m); 1721 1722 m_adj(*m, SLIP_HDRLEN); 1723 } 1724 1725 /* 1726 * Put the SLIP pseudo-"link header" in 1727 * place. The compressed header is now 1728 * at the beginning of the mbuf. 1729 */ 1730 static void 1731 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1732 { 1733 struct mbuf m0; 1734 u_char *hp; 1735 1736 m0.m_flags = 0; 1737 m0.m_next = m; 1738 m0.m_data = m0.m_dat; 1739 m0.m_len = SLIP_HDRLEN; 1740 1741 hp = mtod(&m0, u_char *); 1742 1743 hp[SLX_DIR] = SLIPDIR_OUT; 1744 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1745 1746 _bpf_mtap(bp, &m0); 1747 m_freem(m); 1748 } 1749 1750 static struct mbuf * 1751 bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m) 1752 { 1753 struct mbuf *dup; 1754 1755 dup = m_dup(m, 0, M_COPYALL, M_NOWAIT); 1756 if (dup == NULL) 1757 return NULL; 1758 1759 if (bp->bif_mbuf_tail != NULL) { 1760 bp->bif_mbuf_tail->m_nextpkt = dup; 1761 } else { 1762 bp->bif_mbuf_head = dup; 1763 } 1764 bp->bif_mbuf_tail = dup; 1765 #ifdef BPF_MTAP_SOFTINT_DEBUG 1766 log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n", 1767 __func__, dup, bp->bif_ifp->if_xname); 1768 #endif 1769 1770 return dup; 1771 } 1772 1773 static struct mbuf * 1774 bpf_mbuf_dequeue(struct bpf_if *bp) 1775 { 1776 struct mbuf *m; 1777 int s; 1778 1779 /* XXX NOMPSAFE: assumed running on one CPU */ 1780 s = splnet(); 1781 m = bp->bif_mbuf_head; 1782 if (m != NULL) { 1783 bp->bif_mbuf_head = m->m_nextpkt; 1784 m->m_nextpkt = NULL; 1785 1786 if (bp->bif_mbuf_head == NULL) 1787 bp->bif_mbuf_tail = NULL; 1788 #ifdef BPF_MTAP_SOFTINT_DEBUG 1789 log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n", 1790 __func__, m, bp->bif_ifp->if_xname); 1791 #endif 1792 } 1793 splx(s); 1794 1795 return m; 1796 } 1797 1798 static void 1799 bpf_mtap_si(void *arg) 1800 { 1801 struct bpf_if *bp = arg; 1802 struct mbuf *m; 1803 1804 while ((m = bpf_mbuf_dequeue(bp)) != NULL) { 1805 #ifdef BPF_MTAP_SOFTINT_DEBUG 1806 log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n", 1807 __func__, m, bp->bif_ifp->if_xname); 1808 #endif 1809 bpf_ops->bpf_mtap(bp, m); 1810 m_freem(m); 1811 } 1812 } 1813 1814 static void 1815 _bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m) 1816 { 1817 struct bpf_if *bp = ifp->if_bpf; 1818 struct mbuf *dup; 1819 1820 KASSERT(cpu_intr_p()); 1821 1822 /* To avoid extra invocations of the softint */ 1823 if (BPFIF_DLIST_READER_EMPTY(bp)) 1824 return; 1825 KASSERT(bp->bif_si != NULL); 1826 1827 dup = bpf_mbuf_enqueue(bp, m); 1828 if (dup != NULL) 1829 softint_schedule(bp->bif_si); 1830 } 1831 1832 static int 1833 bpf_hdrlen(struct bpf_d *d) 1834 { 1835 int hdrlen = d->bd_bif->bif_hdrlen; 1836 /* 1837 * Compute the length of the bpf header. This is not necessarily 1838 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1839 * that the network layer header begins on a longword boundary (for 1840 * performance reasons and to alleviate alignment restrictions). 1841 */ 1842 #ifdef _LP64 1843 if (d->bd_compat32) 1844 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen); 1845 else 1846 #endif 1847 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen); 1848 } 1849 1850 /* 1851 * Move the packet data from interface memory (pkt) into the 1852 * store buffer. Call the wakeup functions if it's time to wakeup 1853 * a listener (buffer full), "cpfn" is the routine called to do the 1854 * actual data transfer. memcpy is passed in to copy contiguous chunks, 1855 * while bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1856 * pkt is really an mbuf. 1857 */ 1858 static void 1859 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1860 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1861 { 1862 char *h; 1863 int totlen, curlen, caplen; 1864 int hdrlen = bpf_hdrlen(d); 1865 int do_wakeup = 0; 1866 1867 atomic_inc_ulong(&d->bd_ccount); 1868 BPF_STATINC(capt); 1869 /* 1870 * Figure out how many bytes to move. If the packet is 1871 * greater or equal to the snapshot length, transfer that 1872 * much. Otherwise, transfer the whole packet (unless 1873 * we hit the buffer size limit). 1874 */ 1875 totlen = hdrlen + min(snaplen, pktlen); 1876 if (totlen > d->bd_bufsize) 1877 totlen = d->bd_bufsize; 1878 /* 1879 * If we adjusted totlen to fit the bufsize, it could be that 1880 * totlen is smaller than hdrlen because of the link layer header. 1881 */ 1882 caplen = totlen - hdrlen; 1883 if (caplen < 0) 1884 caplen = 0; 1885 1886 mutex_enter(d->bd_buf_mtx); 1887 /* 1888 * Round up the end of the previous packet to the next longword. 1889 */ 1890 #ifdef _LP64 1891 if (d->bd_compat32) 1892 curlen = BPF_WORDALIGN32(d->bd_slen); 1893 else 1894 #endif 1895 curlen = BPF_WORDALIGN(d->bd_slen); 1896 if (curlen + totlen > d->bd_bufsize) { 1897 /* 1898 * This packet will overflow the storage buffer. 1899 * Rotate the buffers if we can, then wakeup any 1900 * pending reads. 1901 */ 1902 if (d->bd_fbuf == NULL) { 1903 mutex_exit(d->bd_buf_mtx); 1904 /* 1905 * We haven't completed the previous read yet, 1906 * so drop the packet. 1907 */ 1908 atomic_inc_ulong(&d->bd_dcount); 1909 BPF_STATINC(drop); 1910 return; 1911 } 1912 ROTATE_BUFFERS(d); 1913 do_wakeup = 1; 1914 curlen = 0; 1915 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1916 /* 1917 * Immediate mode is set, or the read timeout has 1918 * already expired during a select call. A packet 1919 * arrived, so the reader should be woken up. 1920 */ 1921 do_wakeup = 1; 1922 } 1923 1924 /* 1925 * Append the bpf header. 1926 */ 1927 h = (char *)d->bd_sbuf + curlen; 1928 #ifdef _LP64 1929 if (d->bd_compat32) { 1930 struct bpf_hdr32 *hp32; 1931 1932 hp32 = (struct bpf_hdr32 *)h; 1933 hp32->bh_tstamp.tv_sec = ts->tv_sec; 1934 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1935 hp32->bh_datalen = pktlen; 1936 hp32->bh_hdrlen = hdrlen; 1937 hp32->bh_caplen = caplen; 1938 } else 1939 #endif 1940 { 1941 struct bpf_hdr *hp; 1942 1943 hp = (struct bpf_hdr *)h; 1944 hp->bh_tstamp.tv_sec = ts->tv_sec; 1945 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1946 hp->bh_datalen = pktlen; 1947 hp->bh_hdrlen = hdrlen; 1948 hp->bh_caplen = caplen; 1949 } 1950 1951 /* 1952 * Copy the packet data into the store buffer and update its length. 1953 */ 1954 (*cpfn)(h + hdrlen, pkt, caplen); 1955 d->bd_slen = curlen + totlen; 1956 mutex_exit(d->bd_buf_mtx); 1957 1958 /* 1959 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1960 * will cause filt_bpfread() to be called with it adjusted. 1961 */ 1962 if (do_wakeup) 1963 bpf_wakeup(d); 1964 } 1965 1966 /* 1967 * Initialize all nonzero fields of a descriptor. 1968 */ 1969 static int 1970 bpf_allocbufs(struct bpf_d *d) 1971 { 1972 1973 d->bd_fbuf = kmem_alloc(d->bd_bufsize, KM_NOSLEEP); 1974 if (!d->bd_fbuf) 1975 return (ENOBUFS); 1976 d->bd_sbuf = kmem_alloc(d->bd_bufsize, KM_NOSLEEP); 1977 if (!d->bd_sbuf) { 1978 kmem_free(d->bd_fbuf, d->bd_bufsize); 1979 return (ENOBUFS); 1980 } 1981 d->bd_slen = 0; 1982 d->bd_hlen = 0; 1983 return (0); 1984 } 1985 1986 static void 1987 bpf_free_filter(struct bpf_filter *filter) 1988 { 1989 1990 KASSERT(filter != NULL); 1991 KASSERT(filter->bf_insn != NULL); 1992 1993 kmem_free(filter->bf_insn, filter->bf_size); 1994 if (filter->bf_jitcode != NULL) 1995 bpf_jit_freecode(filter->bf_jitcode); 1996 kmem_free(filter, sizeof(*filter)); 1997 } 1998 1999 /* 2000 * Free buffers currently in use by a descriptor. 2001 * Called on close. 2002 */ 2003 static void 2004 bpf_freed(struct bpf_d *d) 2005 { 2006 /* 2007 * We don't need to lock out interrupts since this descriptor has 2008 * been detached from its interface and it yet hasn't been marked 2009 * free. 2010 */ 2011 if (d->bd_sbuf != NULL) { 2012 kmem_free(d->bd_sbuf, d->bd_bufsize); 2013 if (d->bd_hbuf != NULL) 2014 kmem_free(d->bd_hbuf, d->bd_bufsize); 2015 if (d->bd_fbuf != NULL) 2016 kmem_free(d->bd_fbuf, d->bd_bufsize); 2017 } 2018 if (d->bd_filter != NULL) { 2019 bpf_free_filter(d->bd_filter); 2020 d->bd_filter = NULL; 2021 } 2022 d->bd_jitcode = NULL; 2023 } 2024 2025 /* 2026 * Attach an interface to bpf. dlt is the link layer type; 2027 * hdrlen is the fixed size of the link header for the specified dlt 2028 * (variable length headers not yet supported). 2029 */ 2030 static void 2031 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2032 { 2033 struct bpf_if *bp; 2034 bp = kmem_alloc(sizeof(*bp), KM_NOSLEEP); 2035 if (bp == NULL) 2036 panic("bpfattach"); 2037 2038 mutex_enter(&bpf_mtx); 2039 bp->bif_driverp = driverp; 2040 bp->bif_ifp = ifp; 2041 bp->bif_dlt = dlt; 2042 bp->bif_si = NULL; 2043 BPF_IFLIST_ENTRY_INIT(bp); 2044 PSLIST_INIT(&bp->bif_dlist_head); 2045 psref_target_init(&bp->bif_psref, bpf_psref_class); 2046 2047 BPF_IFLIST_WRITER_INSERT_HEAD(bp); 2048 2049 *bp->bif_driverp = NULL; 2050 2051 bp->bif_hdrlen = hdrlen; 2052 mutex_exit(&bpf_mtx); 2053 #if 0 2054 printf("bpf: %s attached\n", ifp->if_xname); 2055 #endif 2056 } 2057 2058 static void 2059 _bpf_mtap_softint_init(struct ifnet *ifp) 2060 { 2061 struct bpf_if *bp; 2062 2063 mutex_enter(&bpf_mtx); 2064 BPF_IFLIST_WRITER_FOREACH(bp) { 2065 if (bp->bif_ifp != ifp) 2066 continue; 2067 2068 bp->bif_mbuf_head = NULL; 2069 bp->bif_mbuf_tail = NULL; 2070 bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp); 2071 if (bp->bif_si == NULL) 2072 panic("%s: softint_establish() failed", __func__); 2073 break; 2074 } 2075 mutex_exit(&bpf_mtx); 2076 2077 if (bp == NULL) 2078 panic("%s: no bpf_if found for %s", __func__, ifp->if_xname); 2079 } 2080 2081 /* 2082 * Remove an interface from bpf. 2083 */ 2084 static void 2085 _bpfdetach(struct ifnet *ifp) 2086 { 2087 struct bpf_if *bp; 2088 struct bpf_d *d; 2089 int s; 2090 2091 mutex_enter(&bpf_mtx); 2092 /* Nuke the vnodes for any open instances */ 2093 again_d: 2094 BPF_DLIST_WRITER_FOREACH(d) { 2095 mutex_enter(d->bd_mtx); 2096 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 2097 /* 2098 * Detach the descriptor from an interface now. 2099 * It will be free'ed later by close routine. 2100 */ 2101 d->bd_promisc = 0; /* we can't touch device. */ 2102 bpf_detachd(d); 2103 mutex_exit(d->bd_mtx); 2104 goto again_d; 2105 } 2106 mutex_exit(d->bd_mtx); 2107 } 2108 2109 again: 2110 BPF_IFLIST_WRITER_FOREACH(bp) { 2111 if (bp->bif_ifp == ifp) { 2112 BPF_IFLIST_WRITER_REMOVE(bp); 2113 2114 pserialize_perform(bpf_psz); 2115 psref_target_destroy(&bp->bif_psref, bpf_psref_class); 2116 2117 BPF_IFLIST_ENTRY_DESTROY(bp); 2118 if (bp->bif_si != NULL) { 2119 /* XXX NOMPSAFE: assumed running on one CPU */ 2120 s = splnet(); 2121 while (bp->bif_mbuf_head != NULL) { 2122 struct mbuf *m = bp->bif_mbuf_head; 2123 bp->bif_mbuf_head = m->m_nextpkt; 2124 m_freem(m); 2125 } 2126 splx(s); 2127 softint_disestablish(bp->bif_si); 2128 } 2129 kmem_free(bp, sizeof(*bp)); 2130 goto again; 2131 } 2132 } 2133 mutex_exit(&bpf_mtx); 2134 } 2135 2136 /* 2137 * Change the data link type of a interface. 2138 */ 2139 static void 2140 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2141 { 2142 struct bpf_if *bp; 2143 2144 mutex_enter(&bpf_mtx); 2145 BPF_IFLIST_WRITER_FOREACH(bp) { 2146 if (bp->bif_driverp == &ifp->if_bpf) 2147 break; 2148 } 2149 if (bp == NULL) 2150 panic("bpf_change_type"); 2151 2152 bp->bif_dlt = dlt; 2153 2154 bp->bif_hdrlen = hdrlen; 2155 mutex_exit(&bpf_mtx); 2156 } 2157 2158 /* 2159 * Get a list of available data link type of the interface. 2160 */ 2161 static int 2162 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2163 { 2164 int n, error; 2165 struct ifnet *ifp; 2166 struct bpf_if *bp; 2167 int s, bound; 2168 2169 KASSERT(mutex_owned(d->bd_mtx)); 2170 2171 ifp = d->bd_bif->bif_ifp; 2172 n = 0; 2173 error = 0; 2174 2175 bound = curlwp_bind(); 2176 s = pserialize_read_enter(); 2177 BPF_IFLIST_READER_FOREACH(bp) { 2178 if (bp->bif_ifp != ifp) 2179 continue; 2180 if (bfl->bfl_list != NULL) { 2181 struct psref psref; 2182 2183 if (n >= bfl->bfl_len) { 2184 pserialize_read_exit(s); 2185 return ENOMEM; 2186 } 2187 2188 bpf_if_acquire(bp, &psref); 2189 pserialize_read_exit(s); 2190 2191 error = copyout(&bp->bif_dlt, 2192 bfl->bfl_list + n, sizeof(u_int)); 2193 2194 s = pserialize_read_enter(); 2195 bpf_if_release(bp, &psref); 2196 } 2197 n++; 2198 } 2199 pserialize_read_exit(s); 2200 curlwp_bindx(bound); 2201 2202 bfl->bfl_len = n; 2203 return error; 2204 } 2205 2206 /* 2207 * Set the data link type of a BPF instance. 2208 */ 2209 static int 2210 bpf_setdlt(struct bpf_d *d, u_int dlt) 2211 { 2212 int error, opromisc; 2213 struct ifnet *ifp; 2214 struct bpf_if *bp; 2215 2216 KASSERT(mutex_owned(&bpf_mtx)); 2217 KASSERT(mutex_owned(d->bd_mtx)); 2218 2219 if (d->bd_bif->bif_dlt == dlt) 2220 return 0; 2221 ifp = d->bd_bif->bif_ifp; 2222 BPF_IFLIST_WRITER_FOREACH(bp) { 2223 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 2224 break; 2225 } 2226 if (bp == NULL) 2227 return EINVAL; 2228 opromisc = d->bd_promisc; 2229 bpf_detachd(d); 2230 BPFIF_DLIST_ENTRY_INIT(d); 2231 bpf_attachd(d, bp); 2232 reset_d(d); 2233 if (opromisc) { 2234 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 2235 error = ifpromisc(bp->bif_ifp, 1); 2236 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 2237 if (error) 2238 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 2239 bp->bif_ifp->if_xname, error); 2240 else 2241 d->bd_promisc = 1; 2242 } 2243 return 0; 2244 } 2245 2246 static int 2247 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 2248 { 2249 int newsize, error; 2250 struct sysctlnode node; 2251 2252 node = *rnode; 2253 node.sysctl_data = &newsize; 2254 newsize = bpf_maxbufsize; 2255 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2256 if (error || newp == NULL) 2257 return (error); 2258 2259 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 2260 return (EINVAL); 2261 2262 bpf_maxbufsize = newsize; 2263 2264 return (0); 2265 } 2266 2267 #if defined(MODULAR) || defined(BPFJIT) 2268 static int 2269 sysctl_net_bpf_jit(SYSCTLFN_ARGS) 2270 { 2271 bool newval; 2272 int error; 2273 struct sysctlnode node; 2274 2275 node = *rnode; 2276 node.sysctl_data = &newval; 2277 newval = bpf_jit; 2278 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2279 if (error != 0 || newp == NULL) 2280 return error; 2281 2282 bpf_jit = newval; 2283 2284 /* 2285 * Do a full sync to publish new bpf_jit value and 2286 * update bpfjit_module_ops.bj_generate_code variable. 2287 */ 2288 membar_sync(); 2289 2290 if (newval && bpfjit_module_ops.bj_generate_code == NULL) { 2291 printf("JIT compilation is postponed " 2292 "until after bpfjit module is loaded\n"); 2293 } 2294 2295 return 0; 2296 } 2297 #endif 2298 2299 static int 2300 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 2301 { 2302 int error, elem_count; 2303 struct bpf_d *dp; 2304 struct bpf_d_ext dpe; 2305 size_t len, needed, elem_size, out_size; 2306 char *sp; 2307 2308 if (namelen == 1 && name[0] == CTL_QUERY) 2309 return (sysctl_query(SYSCTLFN_CALL(rnode))); 2310 2311 if (namelen != 2) 2312 return (EINVAL); 2313 2314 /* BPF peers is privileged information. */ 2315 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 2316 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 2317 if (error) 2318 return (EPERM); 2319 2320 len = (oldp != NULL) ? *oldlenp : 0; 2321 sp = oldp; 2322 elem_size = name[0]; 2323 elem_count = name[1]; 2324 out_size = MIN(sizeof(dpe), elem_size); 2325 needed = 0; 2326 2327 if (elem_size < 1 || elem_count < 0) 2328 return (EINVAL); 2329 2330 mutex_enter(&bpf_mtx); 2331 BPF_DLIST_WRITER_FOREACH(dp) { 2332 if (len >= elem_size && elem_count > 0) { 2333 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 2334 BPF_EXT(bufsize); 2335 BPF_EXT(promisc); 2336 BPF_EXT(state); 2337 BPF_EXT(immediate); 2338 BPF_EXT(hdrcmplt); 2339 BPF_EXT(seesent); 2340 BPF_EXT(pid); 2341 BPF_EXT(rcount); 2342 BPF_EXT(dcount); 2343 BPF_EXT(ccount); 2344 #undef BPF_EXT 2345 mutex_enter(dp->bd_mtx); 2346 if (dp->bd_bif) 2347 (void)strlcpy(dpe.bde_ifname, 2348 dp->bd_bif->bif_ifp->if_xname, 2349 IFNAMSIZ - 1); 2350 else 2351 dpe.bde_ifname[0] = '\0'; 2352 mutex_exit(dp->bd_mtx); 2353 2354 error = copyout(&dpe, sp, out_size); 2355 if (error) 2356 break; 2357 sp += elem_size; 2358 len -= elem_size; 2359 } 2360 needed += elem_size; 2361 if (elem_count > 0 && elem_count != INT_MAX) 2362 elem_count--; 2363 } 2364 mutex_exit(&bpf_mtx); 2365 2366 *oldlenp = needed; 2367 2368 return (error); 2369 } 2370 2371 static void 2372 bpf_stats(void *p, void *arg, struct cpu_info *ci __unused) 2373 { 2374 struct bpf_stat *const stats = p; 2375 struct bpf_stat *sum = arg; 2376 2377 sum->bs_recv += stats->bs_recv; 2378 sum->bs_drop += stats->bs_drop; 2379 sum->bs_capt += stats->bs_capt; 2380 } 2381 2382 static int 2383 bpf_sysctl_gstats_handler(SYSCTLFN_ARGS) 2384 { 2385 struct sysctlnode node; 2386 int error; 2387 struct bpf_stat sum; 2388 2389 memset(&sum, 0, sizeof(sum)); 2390 node = *rnode; 2391 2392 percpu_foreach(bpf_gstats_percpu, bpf_stats, &sum); 2393 2394 node.sysctl_data = ∑ 2395 node.sysctl_size = sizeof(sum); 2396 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 2397 if (error != 0 || newp == NULL) 2398 return error; 2399 2400 return 0; 2401 } 2402 2403 static struct sysctllog *bpf_sysctllog; 2404 static void 2405 sysctl_net_bpf_setup(void) 2406 { 2407 const struct sysctlnode *node; 2408 2409 node = NULL; 2410 sysctl_createv(&bpf_sysctllog, 0, NULL, &node, 2411 CTLFLAG_PERMANENT, 2412 CTLTYPE_NODE, "bpf", 2413 SYSCTL_DESCR("BPF options"), 2414 NULL, 0, NULL, 0, 2415 CTL_NET, CTL_CREATE, CTL_EOL); 2416 if (node != NULL) { 2417 #if defined(MODULAR) || defined(BPFJIT) 2418 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2419 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2420 CTLTYPE_BOOL, "jit", 2421 SYSCTL_DESCR("Toggle Just-In-Time compilation"), 2422 sysctl_net_bpf_jit, 0, &bpf_jit, 0, 2423 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2424 #endif 2425 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2426 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2427 CTLTYPE_INT, "maxbufsize", 2428 SYSCTL_DESCR("Maximum size for data capture buffer"), 2429 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 2430 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2431 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2432 CTLFLAG_PERMANENT, 2433 CTLTYPE_STRUCT, "stats", 2434 SYSCTL_DESCR("BPF stats"), 2435 bpf_sysctl_gstats_handler, 0, NULL, 0, 2436 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2437 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 2438 CTLFLAG_PERMANENT, 2439 CTLTYPE_STRUCT, "peers", 2440 SYSCTL_DESCR("BPF peers"), 2441 sysctl_net_bpf_peers, 0, NULL, 0, 2442 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 2443 } 2444 2445 } 2446 2447 struct bpf_ops bpf_ops_kernel = { 2448 .bpf_attach = _bpfattach, 2449 .bpf_detach = _bpfdetach, 2450 .bpf_change_type = _bpf_change_type, 2451 2452 .bpf_tap = _bpf_tap, 2453 .bpf_mtap = _bpf_mtap, 2454 .bpf_mtap2 = _bpf_mtap2, 2455 .bpf_mtap_af = _bpf_mtap_af, 2456 .bpf_mtap_sl_in = _bpf_mtap_sl_in, 2457 .bpf_mtap_sl_out = _bpf_mtap_sl_out, 2458 2459 .bpf_mtap_softint = _bpf_mtap_softint, 2460 .bpf_mtap_softint_init = _bpf_mtap_softint_init, 2461 }; 2462 2463 MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter"); 2464 2465 static int 2466 bpf_modcmd(modcmd_t cmd, void *arg) 2467 { 2468 #ifdef _MODULE 2469 devmajor_t bmajor, cmajor; 2470 #endif 2471 int error = 0; 2472 2473 switch (cmd) { 2474 case MODULE_CMD_INIT: 2475 bpf_init(); 2476 #ifdef _MODULE 2477 bmajor = cmajor = NODEVMAJOR; 2478 error = devsw_attach("bpf", NULL, &bmajor, 2479 &bpf_cdevsw, &cmajor); 2480 if (error) 2481 break; 2482 #endif 2483 2484 bpf_ops_handover_enter(&bpf_ops_kernel); 2485 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 2486 bpf_ops_handover_exit(); 2487 sysctl_net_bpf_setup(); 2488 break; 2489 2490 case MODULE_CMD_FINI: 2491 /* 2492 * While there is no reference counting for bpf callers, 2493 * unload could at least in theory be done similarly to 2494 * system call disestablishment. This should even be 2495 * a little simpler: 2496 * 2497 * 1) replace op vector with stubs 2498 * 2) post update to all cpus with xc 2499 * 3) check that nobody is in bpf anymore 2500 * (it's doubtful we'd want something like l_sysent, 2501 * but we could do something like *signed* percpu 2502 * counters. if the sum is 0, we're good). 2503 * 4) if fail, unroll changes 2504 * 2505 * NOTE: change won't be atomic to the outside. some 2506 * packets may be not captured even if unload is 2507 * not succesful. I think packet capture not working 2508 * is a perfectly logical consequence of trying to 2509 * disable packet capture. 2510 */ 2511 error = EOPNOTSUPP; 2512 /* insert sysctl teardown */ 2513 break; 2514 2515 default: 2516 error = ENOTTY; 2517 break; 2518 } 2519 2520 return error; 2521 } 2522