1 /* $OpenBSD: kern_event.c,v 1.183 2022/02/22 01:15:01 guenther Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/stat.h> 51 #include <sys/uio.h> 52 #include <sys/mount.h> 53 #include <sys/poll.h> 54 #include <sys/syscallargs.h> 55 #include <sys/time.h> 56 #include <sys/timeout.h> 57 #include <sys/vnode.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void KQREF(struct kqueue *); 74 void KQRELE(struct kqueue *); 75 76 void kqueue_purge(struct proc *, struct kqueue *); 77 int kqueue_sleep(struct kqueue *, struct timespec *); 78 79 int kqueue_read(struct file *, struct uio *, int); 80 int kqueue_write(struct file *, struct uio *, int); 81 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct proc *p); 83 int kqueue_poll(struct file *fp, int events, struct proc *p); 84 int kqueue_kqfilter(struct file *fp, struct knote *kn); 85 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 86 int kqueue_close(struct file *fp, struct proc *p); 87 void kqueue_wakeup(struct kqueue *kq); 88 89 #ifdef KQUEUE_DEBUG 90 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 91 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 92 #else 93 #define kqueue_check(kq) do {} while (0) 94 #endif 95 96 static int filter_attach(struct knote *kn); 97 static void filter_detach(struct knote *kn); 98 static int filter_event(struct knote *kn, long hint); 99 static int filter_modify(struct kevent *kev, struct knote *kn); 100 static int filter_process(struct knote *kn, struct kevent *kev); 101 static void kqueue_expand_hash(struct kqueue *kq); 102 static void kqueue_expand_list(struct kqueue *kq, int fd); 103 static void kqueue_task(void *); 104 static int klist_lock(struct klist *); 105 static void klist_unlock(struct klist *, int); 106 107 const struct fileops kqueueops = { 108 .fo_read = kqueue_read, 109 .fo_write = kqueue_write, 110 .fo_ioctl = kqueue_ioctl, 111 .fo_poll = kqueue_poll, 112 .fo_kqfilter = kqueue_kqfilter, 113 .fo_stat = kqueue_stat, 114 .fo_close = kqueue_close 115 }; 116 117 void knote_attach(struct knote *kn); 118 void knote_detach(struct knote *kn); 119 void knote_drop(struct knote *kn, struct proc *p); 120 void knote_enqueue(struct knote *kn); 121 void knote_dequeue(struct knote *kn); 122 int knote_acquire(struct knote *kn, struct klist *, int); 123 void knote_release(struct knote *kn); 124 void knote_activate(struct knote *kn); 125 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, 126 int purge); 127 128 void filt_kqdetach(struct knote *kn); 129 int filt_kqueue(struct knote *kn, long hint); 130 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 131 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 132 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 133 int filt_procattach(struct knote *kn); 134 void filt_procdetach(struct knote *kn); 135 int filt_proc(struct knote *kn, long hint); 136 int filt_fileattach(struct knote *kn); 137 void filt_timerexpire(void *knx); 138 int filt_timerattach(struct knote *kn); 139 void filt_timerdetach(struct knote *kn); 140 int filt_timermodify(struct kevent *kev, struct knote *kn); 141 int filt_timerprocess(struct knote *kn, struct kevent *kev); 142 void filt_seltruedetach(struct knote *kn); 143 144 const struct filterops kqread_filtops = { 145 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 146 .f_attach = NULL, 147 .f_detach = filt_kqdetach, 148 .f_event = filt_kqueue, 149 .f_modify = filt_kqueuemodify, 150 .f_process = filt_kqueueprocess, 151 }; 152 153 const struct filterops proc_filtops = { 154 .f_flags = 0, 155 .f_attach = filt_procattach, 156 .f_detach = filt_procdetach, 157 .f_event = filt_proc, 158 }; 159 160 const struct filterops file_filtops = { 161 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 162 .f_attach = filt_fileattach, 163 .f_detach = NULL, 164 .f_event = NULL, 165 }; 166 167 const struct filterops timer_filtops = { 168 .f_flags = 0, 169 .f_attach = filt_timerattach, 170 .f_detach = filt_timerdetach, 171 .f_event = NULL, 172 .f_modify = filt_timermodify, 173 .f_process = filt_timerprocess, 174 }; 175 176 struct pool knote_pool; 177 struct pool kqueue_pool; 178 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 179 int kq_ntimeouts = 0; 180 int kq_timeoutmax = (4 * 1024); 181 182 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 183 184 /* 185 * Table for for all system-defined filters. 186 */ 187 const struct filterops *const sysfilt_ops[] = { 188 &file_filtops, /* EVFILT_READ */ 189 &file_filtops, /* EVFILT_WRITE */ 190 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 191 &file_filtops, /* EVFILT_VNODE */ 192 &proc_filtops, /* EVFILT_PROC */ 193 &sig_filtops, /* EVFILT_SIGNAL */ 194 &timer_filtops, /* EVFILT_TIMER */ 195 &file_filtops, /* EVFILT_DEVICE */ 196 &file_filtops, /* EVFILT_EXCEPT */ 197 }; 198 199 void 200 KQREF(struct kqueue *kq) 201 { 202 atomic_inc_int(&kq->kq_refs); 203 } 204 205 void 206 KQRELE(struct kqueue *kq) 207 { 208 struct filedesc *fdp; 209 210 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 211 return; 212 213 fdp = kq->kq_fdp; 214 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 215 LIST_REMOVE(kq, kq_next); 216 } else { 217 fdplock(fdp); 218 LIST_REMOVE(kq, kq_next); 219 fdpunlock(fdp); 220 } 221 222 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 223 KASSERT(kq->kq_nknotes == 0); 224 225 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 226 sizeof(struct knlist)); 227 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 228 klist_free(&kq->kq_sel.si_note); 229 pool_put(&kqueue_pool, kq); 230 } 231 232 void 233 kqueue_init(void) 234 { 235 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 236 PR_WAITOK, "kqueuepl", NULL); 237 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 238 PR_WAITOK, "knotepl", NULL); 239 } 240 241 void 242 kqueue_init_percpu(void) 243 { 244 pool_cache_init(&knote_pool); 245 } 246 247 int 248 filt_fileattach(struct knote *kn) 249 { 250 struct file *fp = kn->kn_fp; 251 252 return fp->f_ops->fo_kqfilter(fp, kn); 253 } 254 255 int 256 kqueue_kqfilter(struct file *fp, struct knote *kn) 257 { 258 struct kqueue *kq = kn->kn_fp->f_data; 259 260 if (kn->kn_filter != EVFILT_READ) 261 return (EINVAL); 262 263 kn->kn_fop = &kqread_filtops; 264 klist_insert(&kq->kq_sel.si_note, kn); 265 return (0); 266 } 267 268 void 269 filt_kqdetach(struct knote *kn) 270 { 271 struct kqueue *kq = kn->kn_fp->f_data; 272 273 klist_remove(&kq->kq_sel.si_note, kn); 274 } 275 276 int 277 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 278 { 279 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 280 281 kn->kn_data = kq->kq_count; 282 283 return (kn->kn_data > 0); 284 } 285 286 int 287 filt_kqueue(struct knote *kn, long hint) 288 { 289 struct kqueue *kq = kn->kn_fp->f_data; 290 int active; 291 292 mtx_enter(&kq->kq_lock); 293 active = filt_kqueue_common(kn, kq); 294 mtx_leave(&kq->kq_lock); 295 296 return (active); 297 } 298 299 int 300 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 301 { 302 struct kqueue *kq = kn->kn_fp->f_data; 303 int active; 304 305 mtx_enter(&kq->kq_lock); 306 knote_assign(kev, kn); 307 active = filt_kqueue_common(kn, kq); 308 mtx_leave(&kq->kq_lock); 309 310 return (active); 311 } 312 313 int 314 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 315 { 316 struct kqueue *kq = kn->kn_fp->f_data; 317 int active; 318 319 mtx_enter(&kq->kq_lock); 320 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 321 active = 1; 322 else 323 active = filt_kqueue_common(kn, kq); 324 if (active) 325 knote_submit(kn, kev); 326 mtx_leave(&kq->kq_lock); 327 328 return (active); 329 } 330 331 int 332 filt_procattach(struct knote *kn) 333 { 334 struct process *pr; 335 int s; 336 337 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 338 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 339 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 340 341 if (kn->kn_id > PID_MAX) 342 return ESRCH; 343 344 pr = prfind(kn->kn_id); 345 if (pr == NULL) 346 return (ESRCH); 347 348 /* exiting processes can't be specified */ 349 if (pr->ps_flags & PS_EXITING) 350 return (ESRCH); 351 352 kn->kn_ptr.p_process = pr; 353 kn->kn_flags |= EV_CLEAR; /* automatically set */ 354 355 /* 356 * internal flag indicating registration done by kernel 357 */ 358 if (kn->kn_flags & EV_FLAG1) { 359 kn->kn_data = kn->kn_sdata; /* ppid */ 360 kn->kn_fflags = NOTE_CHILD; 361 kn->kn_flags &= ~EV_FLAG1; 362 } 363 364 s = splhigh(); 365 klist_insert_locked(&pr->ps_klist, kn); 366 splx(s); 367 368 return (0); 369 } 370 371 /* 372 * The knote may be attached to a different process, which may exit, 373 * leaving nothing for the knote to be attached to. So when the process 374 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 375 * it will be deleted when read out. However, as part of the knote deletion, 376 * this routine is called, so a check is needed to avoid actually performing 377 * a detach, because the original process does not exist any more. 378 */ 379 void 380 filt_procdetach(struct knote *kn) 381 { 382 struct kqueue *kq = kn->kn_kq; 383 struct process *pr = kn->kn_ptr.p_process; 384 int s, status; 385 386 mtx_enter(&kq->kq_lock); 387 status = kn->kn_status; 388 mtx_leave(&kq->kq_lock); 389 390 if (status & KN_DETACHED) 391 return; 392 393 s = splhigh(); 394 klist_remove_locked(&pr->ps_klist, kn); 395 splx(s); 396 } 397 398 int 399 filt_proc(struct knote *kn, long hint) 400 { 401 struct kqueue *kq = kn->kn_kq; 402 u_int event; 403 404 /* 405 * mask off extra data 406 */ 407 event = (u_int)hint & NOTE_PCTRLMASK; 408 409 /* 410 * if the user is interested in this event, record it. 411 */ 412 if (kn->kn_sfflags & event) 413 kn->kn_fflags |= event; 414 415 /* 416 * process is gone, so flag the event as finished and remove it 417 * from the process's klist 418 */ 419 if (event == NOTE_EXIT) { 420 struct process *pr = kn->kn_ptr.p_process; 421 int s; 422 423 mtx_enter(&kq->kq_lock); 424 kn->kn_status |= KN_DETACHED; 425 mtx_leave(&kq->kq_lock); 426 427 s = splhigh(); 428 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 429 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 430 klist_remove_locked(&pr->ps_klist, kn); 431 splx(s); 432 return (1); 433 } 434 435 /* 436 * process forked, and user wants to track the new process, 437 * so attach a new knote to it, and immediately report an 438 * event with the parent's pid. 439 */ 440 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 441 struct kevent kev; 442 int error; 443 444 /* 445 * register knote with new process. 446 */ 447 memset(&kev, 0, sizeof(kev)); 448 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 449 kev.filter = kn->kn_filter; 450 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 451 kev.fflags = kn->kn_sfflags; 452 kev.data = kn->kn_id; /* parent */ 453 kev.udata = kn->kn_udata; /* preserve udata */ 454 error = kqueue_register(kq, &kev, 0, NULL); 455 if (error) 456 kn->kn_fflags |= NOTE_TRACKERR; 457 } 458 459 return (kn->kn_fflags != 0); 460 } 461 462 static void 463 filt_timer_timeout_add(struct knote *kn) 464 { 465 struct timeval tv; 466 struct timeout *to = kn->kn_hook; 467 int tticks; 468 469 tv.tv_sec = kn->kn_sdata / 1000; 470 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 471 tticks = tvtohz(&tv); 472 /* Remove extra tick from tvtohz() if timeout has fired before. */ 473 if (timeout_triggered(to)) 474 tticks--; 475 timeout_add(to, (tticks > 0) ? tticks : 1); 476 } 477 478 void 479 filt_timerexpire(void *knx) 480 { 481 struct knote *kn = knx; 482 struct kqueue *kq = kn->kn_kq; 483 484 kn->kn_data++; 485 mtx_enter(&kq->kq_lock); 486 knote_activate(kn); 487 mtx_leave(&kq->kq_lock); 488 489 if ((kn->kn_flags & EV_ONESHOT) == 0) 490 filt_timer_timeout_add(kn); 491 } 492 493 494 /* 495 * data contains amount of time to sleep, in milliseconds 496 */ 497 int 498 filt_timerattach(struct knote *kn) 499 { 500 struct timeout *to; 501 502 if (kq_ntimeouts > kq_timeoutmax) 503 return (ENOMEM); 504 kq_ntimeouts++; 505 506 kn->kn_flags |= EV_CLEAR; /* automatically set */ 507 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 508 timeout_set(to, filt_timerexpire, kn); 509 kn->kn_hook = to; 510 filt_timer_timeout_add(kn); 511 512 return (0); 513 } 514 515 void 516 filt_timerdetach(struct knote *kn) 517 { 518 struct timeout *to; 519 520 to = (struct timeout *)kn->kn_hook; 521 timeout_del_barrier(to); 522 free(to, M_KEVENT, sizeof(*to)); 523 kq_ntimeouts--; 524 } 525 526 int 527 filt_timermodify(struct kevent *kev, struct knote *kn) 528 { 529 struct kqueue *kq = kn->kn_kq; 530 struct timeout *to = kn->kn_hook; 531 532 /* Reset the timer. Any pending events are discarded. */ 533 534 timeout_del_barrier(to); 535 536 mtx_enter(&kq->kq_lock); 537 if (kn->kn_status & KN_QUEUED) 538 knote_dequeue(kn); 539 kn->kn_status &= ~KN_ACTIVE; 540 mtx_leave(&kq->kq_lock); 541 542 kn->kn_data = 0; 543 knote_assign(kev, kn); 544 /* Reinit timeout to invoke tick adjustment again. */ 545 timeout_set(to, filt_timerexpire, kn); 546 filt_timer_timeout_add(kn); 547 548 return (0); 549 } 550 551 int 552 filt_timerprocess(struct knote *kn, struct kevent *kev) 553 { 554 int active, s; 555 556 s = splsoftclock(); 557 active = (kn->kn_data != 0); 558 if (active) 559 knote_submit(kn, kev); 560 splx(s); 561 562 return (active); 563 } 564 565 566 /* 567 * filt_seltrue: 568 * 569 * This filter "event" routine simulates seltrue(). 570 */ 571 int 572 filt_seltrue(struct knote *kn, long hint) 573 { 574 575 /* 576 * We don't know how much data can be read/written, 577 * but we know that it *can* be. This is about as 578 * good as select/poll does as well. 579 */ 580 kn->kn_data = 0; 581 return (1); 582 } 583 584 int 585 filt_seltruemodify(struct kevent *kev, struct knote *kn) 586 { 587 knote_assign(kev, kn); 588 return (kn->kn_fop->f_event(kn, 0)); 589 } 590 591 int 592 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 593 { 594 int active; 595 596 active = kn->kn_fop->f_event(kn, 0); 597 if (active) 598 knote_submit(kn, kev); 599 return (active); 600 } 601 602 /* 603 * This provides full kqfilter entry for device switch tables, which 604 * has same effect as filter using filt_seltrue() as filter method. 605 */ 606 void 607 filt_seltruedetach(struct knote *kn) 608 { 609 /* Nothing to do */ 610 } 611 612 const struct filterops seltrue_filtops = { 613 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 614 .f_attach = NULL, 615 .f_detach = filt_seltruedetach, 616 .f_event = filt_seltrue, 617 .f_modify = filt_seltruemodify, 618 .f_process = filt_seltrueprocess, 619 }; 620 621 int 622 seltrue_kqfilter(dev_t dev, struct knote *kn) 623 { 624 switch (kn->kn_filter) { 625 case EVFILT_READ: 626 case EVFILT_WRITE: 627 kn->kn_fop = &seltrue_filtops; 628 break; 629 default: 630 return (EINVAL); 631 } 632 633 /* Nothing more to do */ 634 return (0); 635 } 636 637 static int 638 filt_dead(struct knote *kn, long hint) 639 { 640 if (kn->kn_filter == EVFILT_EXCEPT) { 641 /* 642 * Do not deliver event because there is no out-of-band data. 643 * However, let HUP condition pass for poll(2). 644 */ 645 if ((kn->kn_flags & __EV_POLL) == 0) { 646 kn->kn_flags |= EV_DISABLE; 647 return (0); 648 } 649 } 650 651 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 652 if (kn->kn_flags & __EV_POLL) 653 kn->kn_flags |= __EV_HUP; 654 kn->kn_data = 0; 655 return (1); 656 } 657 658 static void 659 filt_deaddetach(struct knote *kn) 660 { 661 /* Nothing to do */ 662 } 663 664 const struct filterops dead_filtops = { 665 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 666 .f_attach = NULL, 667 .f_detach = filt_deaddetach, 668 .f_event = filt_dead, 669 .f_modify = filt_seltruemodify, 670 .f_process = filt_seltrueprocess, 671 }; 672 673 static int 674 filt_badfd(struct knote *kn, long hint) 675 { 676 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 677 kn->kn_data = EBADF; 678 return (1); 679 } 680 681 /* For use with kqpoll. */ 682 const struct filterops badfd_filtops = { 683 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 684 .f_attach = NULL, 685 .f_detach = filt_deaddetach, 686 .f_event = filt_badfd, 687 .f_modify = filt_seltruemodify, 688 .f_process = filt_seltrueprocess, 689 }; 690 691 static int 692 filter_attach(struct knote *kn) 693 { 694 int error; 695 696 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 697 error = kn->kn_fop->f_attach(kn); 698 } else { 699 KERNEL_LOCK(); 700 error = kn->kn_fop->f_attach(kn); 701 KERNEL_UNLOCK(); 702 } 703 return (error); 704 } 705 706 static void 707 filter_detach(struct knote *kn) 708 { 709 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 710 kn->kn_fop->f_detach(kn); 711 } else { 712 KERNEL_LOCK(); 713 kn->kn_fop->f_detach(kn); 714 KERNEL_UNLOCK(); 715 } 716 } 717 718 static int 719 filter_event(struct knote *kn, long hint) 720 { 721 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 722 KERNEL_ASSERT_LOCKED(); 723 724 return (kn->kn_fop->f_event(kn, hint)); 725 } 726 727 static int 728 filter_modify(struct kevent *kev, struct knote *kn) 729 { 730 int active, s; 731 732 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 733 active = kn->kn_fop->f_modify(kev, kn); 734 } else { 735 KERNEL_LOCK(); 736 if (kn->kn_fop->f_modify != NULL) { 737 active = kn->kn_fop->f_modify(kev, kn); 738 } else { 739 s = splhigh(); 740 active = knote_modify(kev, kn); 741 splx(s); 742 } 743 KERNEL_UNLOCK(); 744 } 745 return (active); 746 } 747 748 static int 749 filter_process(struct knote *kn, struct kevent *kev) 750 { 751 int active, s; 752 753 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 754 active = kn->kn_fop->f_process(kn, kev); 755 } else { 756 KERNEL_LOCK(); 757 if (kn->kn_fop->f_process != NULL) { 758 active = kn->kn_fop->f_process(kn, kev); 759 } else { 760 s = splhigh(); 761 active = knote_process(kn, kev); 762 splx(s); 763 } 764 KERNEL_UNLOCK(); 765 } 766 return (active); 767 } 768 769 /* 770 * Initialize the current thread for poll/select system call. 771 * num indicates the number of serials that the system call may utilize. 772 * After this function, the valid range of serials is 773 * p_kq_serial <= x < p_kq_serial + num. 774 */ 775 void 776 kqpoll_init(unsigned int num) 777 { 778 struct proc *p = curproc; 779 struct filedesc *fdp; 780 781 if (p->p_kq == NULL) { 782 p->p_kq = kqueue_alloc(p->p_fd); 783 p->p_kq_serial = arc4random(); 784 fdp = p->p_fd; 785 fdplock(fdp); 786 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 787 fdpunlock(fdp); 788 } 789 790 if (p->p_kq_serial + num < p->p_kq_serial) { 791 /* Serial is about to wrap. Clear all attached knotes. */ 792 kqueue_purge(p, p->p_kq); 793 p->p_kq_serial = 0; 794 } 795 } 796 797 /* 798 * Finish poll/select system call. 799 * num must have the same value that was used with kqpoll_init(). 800 */ 801 void 802 kqpoll_done(unsigned int num) 803 { 804 struct proc *p = curproc; 805 struct kqueue *kq = p->p_kq; 806 807 KASSERT(p->p_kq != NULL); 808 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 809 810 p->p_kq_serial += num; 811 812 /* 813 * Because of kn_pollid key, a thread can in principle allocate 814 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 815 * with suitably varying pollfd arrays. 816 * Prevent such a large allocation by clearing knotes eagerly 817 * if there are too many of them. 818 * 819 * A small multiple of kq_knlistsize should give enough margin 820 * that eager clearing is infrequent, or does not happen at all, 821 * with normal programs. 822 * A single pollfd entry can use up to three knotes. 823 * Typically there is no significant overlap of fd and events 824 * between different entries in the pollfd array. 825 */ 826 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 827 kqueue_purge(p, kq); 828 } 829 830 void 831 kqpoll_exit(void) 832 { 833 struct proc *p = curproc; 834 835 if (p->p_kq == NULL) 836 return; 837 838 kqueue_purge(p, p->p_kq); 839 kqueue_terminate(p, p->p_kq); 840 KASSERT(p->p_kq->kq_refs == 1); 841 KQRELE(p->p_kq); 842 p->p_kq = NULL; 843 } 844 845 struct kqueue * 846 kqueue_alloc(struct filedesc *fdp) 847 { 848 struct kqueue *kq; 849 850 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 851 kq->kq_refs = 1; 852 kq->kq_fdp = fdp; 853 TAILQ_INIT(&kq->kq_head); 854 mtx_init(&kq->kq_lock, IPL_HIGH); 855 task_set(&kq->kq_task, kqueue_task, kq); 856 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 857 858 return (kq); 859 } 860 861 int 862 sys_kqueue(struct proc *p, void *v, register_t *retval) 863 { 864 struct filedesc *fdp = p->p_fd; 865 struct kqueue *kq; 866 struct file *fp; 867 int fd, error; 868 869 kq = kqueue_alloc(fdp); 870 871 fdplock(fdp); 872 error = falloc(p, &fp, &fd); 873 if (error) 874 goto out; 875 fp->f_flag = FREAD | FWRITE; 876 fp->f_type = DTYPE_KQUEUE; 877 fp->f_ops = &kqueueops; 878 fp->f_data = kq; 879 *retval = fd; 880 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 881 kq = NULL; 882 fdinsert(fdp, fd, 0, fp); 883 FRELE(fp, p); 884 out: 885 fdpunlock(fdp); 886 if (kq != NULL) 887 pool_put(&kqueue_pool, kq); 888 return (error); 889 } 890 891 int 892 sys_kevent(struct proc *p, void *v, register_t *retval) 893 { 894 struct kqueue_scan_state scan; 895 struct filedesc* fdp = p->p_fd; 896 struct sys_kevent_args /* { 897 syscallarg(int) fd; 898 syscallarg(const struct kevent *) changelist; 899 syscallarg(int) nchanges; 900 syscallarg(struct kevent *) eventlist; 901 syscallarg(int) nevents; 902 syscallarg(const struct timespec *) timeout; 903 } */ *uap = v; 904 struct kevent *kevp; 905 struct kqueue *kq; 906 struct file *fp; 907 struct timespec ts; 908 struct timespec *tsp = NULL; 909 int i, n, nerrors, error; 910 int ready, total; 911 struct kevent kev[KQ_NEVENTS]; 912 913 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 914 return (EBADF); 915 916 if (fp->f_type != DTYPE_KQUEUE) { 917 error = EBADF; 918 goto done; 919 } 920 921 if (SCARG(uap, timeout) != NULL) { 922 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 923 if (error) 924 goto done; 925 #ifdef KTRACE 926 if (KTRPOINT(p, KTR_STRUCT)) 927 ktrreltimespec(p, &ts); 928 #endif 929 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 930 error = EINVAL; 931 goto done; 932 } 933 tsp = &ts; 934 } 935 936 kq = fp->f_data; 937 nerrors = 0; 938 939 while ((n = SCARG(uap, nchanges)) > 0) { 940 if (n > nitems(kev)) 941 n = nitems(kev); 942 error = copyin(SCARG(uap, changelist), kev, 943 n * sizeof(struct kevent)); 944 if (error) 945 goto done; 946 #ifdef KTRACE 947 if (KTRPOINT(p, KTR_STRUCT)) 948 ktrevent(p, kev, n); 949 #endif 950 for (i = 0; i < n; i++) { 951 kevp = &kev[i]; 952 kevp->flags &= ~EV_SYSFLAGS; 953 error = kqueue_register(kq, kevp, 0, p); 954 if (error || (kevp->flags & EV_RECEIPT)) { 955 if (SCARG(uap, nevents) != 0) { 956 kevp->flags = EV_ERROR; 957 kevp->data = error; 958 copyout(kevp, SCARG(uap, eventlist), 959 sizeof(*kevp)); 960 SCARG(uap, eventlist)++; 961 SCARG(uap, nevents)--; 962 nerrors++; 963 } else { 964 goto done; 965 } 966 } 967 } 968 SCARG(uap, nchanges) -= n; 969 SCARG(uap, changelist) += n; 970 } 971 if (nerrors) { 972 *retval = nerrors; 973 error = 0; 974 goto done; 975 } 976 977 kqueue_scan_setup(&scan, kq); 978 FRELE(fp, p); 979 /* 980 * Collect as many events as we can. The timeout on successive 981 * loops is disabled (kqueue_scan() becomes non-blocking). 982 */ 983 total = 0; 984 error = 0; 985 while ((n = SCARG(uap, nevents) - total) > 0) { 986 if (n > nitems(kev)) 987 n = nitems(kev); 988 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 989 if (ready == 0) 990 break; 991 error = copyout(kev, SCARG(uap, eventlist) + total, 992 sizeof(struct kevent) * ready); 993 #ifdef KTRACE 994 if (KTRPOINT(p, KTR_STRUCT)) 995 ktrevent(p, kev, ready); 996 #endif 997 total += ready; 998 if (error || ready < n) 999 break; 1000 } 1001 kqueue_scan_finish(&scan); 1002 *retval = total; 1003 return (error); 1004 1005 done: 1006 FRELE(fp, p); 1007 return (error); 1008 } 1009 1010 #ifdef KQUEUE_DEBUG 1011 void 1012 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1013 { 1014 struct knote *kn; 1015 int count = 0, nmarker = 0; 1016 1017 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1018 1019 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1020 if (kn->kn_filter == EVFILT_MARKER) { 1021 if ((kn->kn_status & KN_QUEUED) != 0) 1022 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1023 func, line, kq, kn); 1024 nmarker++; 1025 } else { 1026 if ((kn->kn_status & KN_ACTIVE) == 0) 1027 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1028 func, line, kq, kn); 1029 if ((kn->kn_status & KN_QUEUED) == 0) 1030 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1031 func, line, kq, kn); 1032 if (kn->kn_kq != kq) 1033 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1034 func, line, kq, kn, kn->kn_kq); 1035 count++; 1036 if (count > kq->kq_count) 1037 goto bad; 1038 } 1039 } 1040 if (count != kq->kq_count) { 1041 bad: 1042 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1043 func, line, kq, kq->kq_count, count, nmarker); 1044 } 1045 } 1046 #endif 1047 1048 int 1049 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1050 struct proc *p) 1051 { 1052 struct filedesc *fdp = kq->kq_fdp; 1053 const struct filterops *fops = NULL; 1054 struct file *fp = NULL; 1055 struct knote *kn = NULL, *newkn = NULL; 1056 struct knlist *list = NULL; 1057 int active, error = 0; 1058 1059 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1060 1061 if (kev->filter < 0) { 1062 if (kev->filter + EVFILT_SYSCOUNT < 0) 1063 return (EINVAL); 1064 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1065 } 1066 1067 if (fops == NULL) { 1068 /* 1069 * XXX 1070 * filter attach routine is responsible for ensuring that 1071 * the identifier can be attached to it. 1072 */ 1073 return (EINVAL); 1074 } 1075 1076 if (fops->f_flags & FILTEROP_ISFD) { 1077 /* validate descriptor */ 1078 if (kev->ident > INT_MAX) 1079 return (EBADF); 1080 } 1081 1082 if (kev->flags & EV_ADD) 1083 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1084 1085 again: 1086 if (fops->f_flags & FILTEROP_ISFD) { 1087 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1088 error = EBADF; 1089 goto done; 1090 } 1091 mtx_enter(&kq->kq_lock); 1092 if (kev->flags & EV_ADD) 1093 kqueue_expand_list(kq, kev->ident); 1094 if (kev->ident < kq->kq_knlistsize) 1095 list = &kq->kq_knlist[kev->ident]; 1096 } else { 1097 mtx_enter(&kq->kq_lock); 1098 if (kev->flags & EV_ADD) 1099 kqueue_expand_hash(kq); 1100 if (kq->kq_knhashmask != 0) { 1101 list = &kq->kq_knhash[ 1102 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1103 } 1104 } 1105 if (list != NULL) { 1106 SLIST_FOREACH(kn, list, kn_link) { 1107 if (kev->filter == kn->kn_filter && 1108 kev->ident == kn->kn_id && 1109 pollid == kn->kn_pollid) { 1110 if (!knote_acquire(kn, NULL, 0)) { 1111 /* knote_acquire() has released 1112 * kq_lock. */ 1113 if (fp != NULL) { 1114 FRELE(fp, p); 1115 fp = NULL; 1116 } 1117 goto again; 1118 } 1119 break; 1120 } 1121 } 1122 } 1123 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1124 1125 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1126 mtx_leave(&kq->kq_lock); 1127 error = ENOENT; 1128 goto done; 1129 } 1130 1131 /* 1132 * kn now contains the matching knote, or NULL if no match. 1133 */ 1134 if (kev->flags & EV_ADD) { 1135 if (kn == NULL) { 1136 kn = newkn; 1137 newkn = NULL; 1138 kn->kn_status = KN_PROCESSING; 1139 kn->kn_fp = fp; 1140 kn->kn_kq = kq; 1141 kn->kn_fop = fops; 1142 1143 /* 1144 * apply reference count to knote structure, and 1145 * do not release it at the end of this routine. 1146 */ 1147 fp = NULL; 1148 1149 kn->kn_sfflags = kev->fflags; 1150 kn->kn_sdata = kev->data; 1151 kev->fflags = 0; 1152 kev->data = 0; 1153 kn->kn_kevent = *kev; 1154 kn->kn_pollid = pollid; 1155 1156 knote_attach(kn); 1157 mtx_leave(&kq->kq_lock); 1158 1159 error = filter_attach(kn); 1160 if (error != 0) { 1161 knote_drop(kn, p); 1162 goto done; 1163 } 1164 1165 /* 1166 * If this is a file descriptor filter, check if 1167 * fd was closed while the knote was being added. 1168 * knote_fdclose() has missed kn if the function 1169 * ran before kn appeared in kq_knlist. 1170 */ 1171 if ((fops->f_flags & FILTEROP_ISFD) && 1172 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1173 /* 1174 * Drop the knote silently without error 1175 * because another thread might already have 1176 * seen it. This corresponds to the insert 1177 * happening in full before the close. 1178 */ 1179 filter_detach(kn); 1180 knote_drop(kn, p); 1181 goto done; 1182 } 1183 1184 /* Check if there is a pending event. */ 1185 active = filter_process(kn, NULL); 1186 mtx_enter(&kq->kq_lock); 1187 if (active) 1188 knote_activate(kn); 1189 } else if (kn->kn_fop == &badfd_filtops) { 1190 /* 1191 * Nothing expects this badfd knote any longer. 1192 * Drop it to make room for the new knote and retry. 1193 */ 1194 KASSERT(kq == p->p_kq); 1195 mtx_leave(&kq->kq_lock); 1196 filter_detach(kn); 1197 knote_drop(kn, p); 1198 1199 KASSERT(fp != NULL); 1200 FRELE(fp, p); 1201 fp = NULL; 1202 1203 goto again; 1204 } else { 1205 /* 1206 * The user may change some filter values after the 1207 * initial EV_ADD, but doing so will not reset any 1208 * filters which have already been triggered. 1209 */ 1210 mtx_leave(&kq->kq_lock); 1211 active = filter_modify(kev, kn); 1212 mtx_enter(&kq->kq_lock); 1213 if (active) 1214 knote_activate(kn); 1215 if (kev->flags & EV_ERROR) { 1216 error = kev->data; 1217 goto release; 1218 } 1219 } 1220 } else if (kev->flags & EV_DELETE) { 1221 mtx_leave(&kq->kq_lock); 1222 filter_detach(kn); 1223 knote_drop(kn, p); 1224 goto done; 1225 } 1226 1227 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1228 kn->kn_status |= KN_DISABLED; 1229 1230 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1231 kn->kn_status &= ~KN_DISABLED; 1232 mtx_leave(&kq->kq_lock); 1233 /* Check if there is a pending event. */ 1234 active = filter_process(kn, NULL); 1235 mtx_enter(&kq->kq_lock); 1236 if (active) 1237 knote_activate(kn); 1238 } 1239 1240 release: 1241 knote_release(kn); 1242 mtx_leave(&kq->kq_lock); 1243 done: 1244 if (fp != NULL) 1245 FRELE(fp, p); 1246 if (newkn != NULL) 1247 pool_put(&knote_pool, newkn); 1248 return (error); 1249 } 1250 1251 int 1252 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1253 { 1254 struct timespec elapsed, start, stop; 1255 uint64_t nsecs; 1256 int error; 1257 1258 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1259 1260 if (tsp != NULL) { 1261 getnanouptime(&start); 1262 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1263 } else 1264 nsecs = INFSLP; 1265 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1266 "kqread", nsecs); 1267 if (tsp != NULL) { 1268 getnanouptime(&stop); 1269 timespecsub(&stop, &start, &elapsed); 1270 timespecsub(tsp, &elapsed, tsp); 1271 if (tsp->tv_sec < 0) 1272 timespecclear(tsp); 1273 } 1274 1275 return (error); 1276 } 1277 1278 /* 1279 * Scan the kqueue, blocking if necessary until the target time is reached. 1280 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1281 * 0 we do not block at all. 1282 */ 1283 int 1284 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1285 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1286 { 1287 struct kqueue *kq = scan->kqs_kq; 1288 struct knote *kn; 1289 int error = 0, nkev = 0; 1290 1291 if (maxevents == 0) 1292 goto done; 1293 retry: 1294 KASSERT(nkev == 0); 1295 1296 error = 0; 1297 1298 /* msleep() with PCATCH requires kernel lock. */ 1299 KERNEL_LOCK(); 1300 1301 mtx_enter(&kq->kq_lock); 1302 1303 if (kq->kq_state & KQ_DYING) { 1304 mtx_leave(&kq->kq_lock); 1305 KERNEL_UNLOCK(); 1306 error = EBADF; 1307 goto done; 1308 } 1309 1310 if (kq->kq_count == 0) { 1311 /* 1312 * Successive loops are only necessary if there are more 1313 * ready events to gather, so they don't need to block. 1314 */ 1315 if ((tsp != NULL && !timespecisset(tsp)) || 1316 scan->kqs_nevent != 0) { 1317 mtx_leave(&kq->kq_lock); 1318 KERNEL_UNLOCK(); 1319 error = 0; 1320 goto done; 1321 } 1322 kq->kq_state |= KQ_SLEEP; 1323 error = kqueue_sleep(kq, tsp); 1324 /* kqueue_sleep() has released kq_lock. */ 1325 KERNEL_UNLOCK(); 1326 if (error == 0 || error == EWOULDBLOCK) 1327 goto retry; 1328 /* don't restart after signals... */ 1329 if (error == ERESTART) 1330 error = EINTR; 1331 goto done; 1332 } 1333 1334 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1335 KERNEL_UNLOCK(); 1336 1337 /* 1338 * Put the end marker in the queue to limit the scan to the events 1339 * that are currently active. This prevents events from being 1340 * recollected if they reactivate during scan. 1341 * 1342 * If a partial scan has been performed already but no events have 1343 * been collected, reposition the end marker to make any new events 1344 * reachable. 1345 */ 1346 if (!scan->kqs_queued) { 1347 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1348 scan->kqs_queued = 1; 1349 } else if (scan->kqs_nevent == 0) { 1350 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1351 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1352 } 1353 1354 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1355 while (nkev < maxevents) { 1356 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1357 if (kn->kn_filter == EVFILT_MARKER) { 1358 if (kn == &scan->kqs_end) 1359 break; 1360 1361 /* Move start marker past another thread's marker. */ 1362 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1363 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1364 kn_tqe); 1365 continue; 1366 } 1367 1368 if (!knote_acquire(kn, NULL, 0)) { 1369 /* knote_acquire() has released kq_lock. */ 1370 mtx_enter(&kq->kq_lock); 1371 continue; 1372 } 1373 1374 kqueue_check(kq); 1375 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1376 kn->kn_status &= ~KN_QUEUED; 1377 kq->kq_count--; 1378 kqueue_check(kq); 1379 1380 if (kn->kn_status & KN_DISABLED) { 1381 knote_release(kn); 1382 continue; 1383 } 1384 1385 mtx_leave(&kq->kq_lock); 1386 1387 /* Drop expired kqpoll knotes. */ 1388 if (p->p_kq == kq && 1389 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1390 filter_detach(kn); 1391 knote_drop(kn, p); 1392 mtx_enter(&kq->kq_lock); 1393 continue; 1394 } 1395 1396 /* 1397 * Invalidate knotes whose vnodes have been revoked. 1398 * This is a workaround; it is tricky to clear existing 1399 * knotes and prevent new ones from being registered 1400 * with the current revocation mechanism. 1401 */ 1402 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1403 kn->kn_fp != NULL && 1404 kn->kn_fp->f_type == DTYPE_VNODE) { 1405 struct vnode *vp = kn->kn_fp->f_data; 1406 1407 if (__predict_false(vp->v_op == &dead_vops && 1408 kn->kn_fop != &dead_filtops)) { 1409 filter_detach(kn); 1410 kn->kn_fop = &dead_filtops; 1411 1412 /* 1413 * Check if the event should be delivered. 1414 * Use f_event directly because this is 1415 * a special situation. 1416 */ 1417 if (kn->kn_fop->f_event(kn, 0) == 0) { 1418 filter_detach(kn); 1419 knote_drop(kn, p); 1420 mtx_enter(&kq->kq_lock); 1421 continue; 1422 } 1423 } 1424 } 1425 1426 memset(kevp, 0, sizeof(*kevp)); 1427 if (filter_process(kn, kevp) == 0) { 1428 mtx_enter(&kq->kq_lock); 1429 if ((kn->kn_status & KN_QUEUED) == 0) 1430 kn->kn_status &= ~KN_ACTIVE; 1431 knote_release(kn); 1432 kqueue_check(kq); 1433 continue; 1434 } 1435 1436 /* 1437 * Post-event action on the note 1438 */ 1439 if (kevp->flags & EV_ONESHOT) { 1440 filter_detach(kn); 1441 knote_drop(kn, p); 1442 mtx_enter(&kq->kq_lock); 1443 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1444 mtx_enter(&kq->kq_lock); 1445 if (kevp->flags & EV_DISPATCH) 1446 kn->kn_status |= KN_DISABLED; 1447 if ((kn->kn_status & KN_QUEUED) == 0) 1448 kn->kn_status &= ~KN_ACTIVE; 1449 knote_release(kn); 1450 } else { 1451 mtx_enter(&kq->kq_lock); 1452 if ((kn->kn_status & KN_QUEUED) == 0) { 1453 kqueue_check(kq); 1454 kq->kq_count++; 1455 kn->kn_status |= KN_QUEUED; 1456 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1457 } 1458 knote_release(kn); 1459 } 1460 kqueue_check(kq); 1461 1462 kevp++; 1463 nkev++; 1464 scan->kqs_nevent++; 1465 } 1466 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1467 mtx_leave(&kq->kq_lock); 1468 if (scan->kqs_nevent == 0) 1469 goto retry; 1470 done: 1471 *errorp = error; 1472 return (nkev); 1473 } 1474 1475 void 1476 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1477 { 1478 memset(scan, 0, sizeof(*scan)); 1479 1480 KQREF(kq); 1481 scan->kqs_kq = kq; 1482 scan->kqs_start.kn_filter = EVFILT_MARKER; 1483 scan->kqs_start.kn_status = KN_PROCESSING; 1484 scan->kqs_end.kn_filter = EVFILT_MARKER; 1485 scan->kqs_end.kn_status = KN_PROCESSING; 1486 } 1487 1488 void 1489 kqueue_scan_finish(struct kqueue_scan_state *scan) 1490 { 1491 struct kqueue *kq = scan->kqs_kq; 1492 1493 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1494 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1495 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1496 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1497 1498 if (scan->kqs_queued) { 1499 scan->kqs_queued = 0; 1500 mtx_enter(&kq->kq_lock); 1501 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1502 mtx_leave(&kq->kq_lock); 1503 } 1504 KQRELE(kq); 1505 } 1506 1507 /* 1508 * XXX 1509 * This could be expanded to call kqueue_scan, if desired. 1510 */ 1511 int 1512 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1513 { 1514 return (ENXIO); 1515 } 1516 1517 int 1518 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1519 { 1520 return (ENXIO); 1521 } 1522 1523 int 1524 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1525 { 1526 return (ENOTTY); 1527 } 1528 1529 int 1530 kqueue_poll(struct file *fp, int events, struct proc *p) 1531 { 1532 struct kqueue *kq = (struct kqueue *)fp->f_data; 1533 int revents = 0; 1534 1535 if (events & (POLLIN | POLLRDNORM)) { 1536 mtx_enter(&kq->kq_lock); 1537 if (kq->kq_count) { 1538 revents |= events & (POLLIN | POLLRDNORM); 1539 } else { 1540 selrecord(p, &kq->kq_sel); 1541 kq->kq_state |= KQ_SEL; 1542 } 1543 mtx_leave(&kq->kq_lock); 1544 } 1545 return (revents); 1546 } 1547 1548 int 1549 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1550 { 1551 struct kqueue *kq = fp->f_data; 1552 1553 memset(st, 0, sizeof(*st)); 1554 st->st_size = kq->kq_count; /* unlocked read */ 1555 st->st_blksize = sizeof(struct kevent); 1556 st->st_mode = S_IFIFO; 1557 return (0); 1558 } 1559 1560 void 1561 kqueue_purge(struct proc *p, struct kqueue *kq) 1562 { 1563 int i; 1564 1565 mtx_enter(&kq->kq_lock); 1566 for (i = 0; i < kq->kq_knlistsize; i++) 1567 knote_remove(p, kq, &kq->kq_knlist[i], 1); 1568 if (kq->kq_knhashmask != 0) { 1569 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1570 knote_remove(p, kq, &kq->kq_knhash[i], 1); 1571 } 1572 mtx_leave(&kq->kq_lock); 1573 } 1574 1575 void 1576 kqueue_terminate(struct proc *p, struct kqueue *kq) 1577 { 1578 struct knote *kn; 1579 1580 mtx_enter(&kq->kq_lock); 1581 1582 /* 1583 * Any remaining entries should be scan markers. 1584 * They are removed when the ongoing scans finish. 1585 */ 1586 KASSERT(kq->kq_count == 0); 1587 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1588 KASSERT(kn->kn_filter == EVFILT_MARKER); 1589 1590 kq->kq_state |= KQ_DYING; 1591 kqueue_wakeup(kq); 1592 mtx_leave(&kq->kq_lock); 1593 1594 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1595 task_del(systq, &kq->kq_task); 1596 1597 } 1598 1599 int 1600 kqueue_close(struct file *fp, struct proc *p) 1601 { 1602 struct kqueue *kq = fp->f_data; 1603 1604 fp->f_data = NULL; 1605 1606 kqueue_purge(p, kq); 1607 kqueue_terminate(p, kq); 1608 1609 KQRELE(kq); 1610 1611 return (0); 1612 } 1613 1614 static void 1615 kqueue_task(void *arg) 1616 { 1617 struct kqueue *kq = arg; 1618 1619 /* Kernel lock is needed inside selwakeup(). */ 1620 KERNEL_ASSERT_LOCKED(); 1621 1622 mtx_enter(&kqueue_klist_lock); 1623 mtx_enter(&kq->kq_lock); 1624 if (kq->kq_state & KQ_SEL) { 1625 kq->kq_state &= ~KQ_SEL; 1626 mtx_leave(&kq->kq_lock); 1627 selwakeup(&kq->kq_sel); 1628 } else { 1629 mtx_leave(&kq->kq_lock); 1630 KNOTE(&kq->kq_sel.si_note, 0); 1631 } 1632 mtx_leave(&kqueue_klist_lock); 1633 KQRELE(kq); 1634 } 1635 1636 void 1637 kqueue_wakeup(struct kqueue *kq) 1638 { 1639 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1640 1641 if (kq->kq_state & KQ_SLEEP) { 1642 kq->kq_state &= ~KQ_SLEEP; 1643 wakeup(kq); 1644 } 1645 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1646 /* Defer activation to avoid recursion. */ 1647 KQREF(kq); 1648 if (!task_add(systq, &kq->kq_task)) 1649 KQRELE(kq); 1650 } 1651 } 1652 1653 static void 1654 kqueue_expand_hash(struct kqueue *kq) 1655 { 1656 struct knlist *hash; 1657 u_long hashmask; 1658 1659 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1660 1661 if (kq->kq_knhashmask == 0) { 1662 mtx_leave(&kq->kq_lock); 1663 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1664 mtx_enter(&kq->kq_lock); 1665 if (kq->kq_knhashmask == 0) { 1666 kq->kq_knhash = hash; 1667 kq->kq_knhashmask = hashmask; 1668 } else { 1669 /* Another thread has allocated the hash. */ 1670 mtx_leave(&kq->kq_lock); 1671 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1672 mtx_enter(&kq->kq_lock); 1673 } 1674 } 1675 } 1676 1677 static void 1678 kqueue_expand_list(struct kqueue *kq, int fd) 1679 { 1680 struct knlist *list, *olist; 1681 int size, osize; 1682 1683 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1684 1685 if (kq->kq_knlistsize <= fd) { 1686 size = kq->kq_knlistsize; 1687 mtx_leave(&kq->kq_lock); 1688 while (size <= fd) 1689 size += KQEXTENT; 1690 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1691 mtx_enter(&kq->kq_lock); 1692 if (kq->kq_knlistsize <= fd) { 1693 memcpy(list, kq->kq_knlist, 1694 kq->kq_knlistsize * sizeof(*list)); 1695 memset(&list[kq->kq_knlistsize], 0, 1696 (size - kq->kq_knlistsize) * sizeof(*list)); 1697 olist = kq->kq_knlist; 1698 osize = kq->kq_knlistsize; 1699 kq->kq_knlist = list; 1700 kq->kq_knlistsize = size; 1701 mtx_leave(&kq->kq_lock); 1702 free(olist, M_KEVENT, osize * sizeof(*list)); 1703 mtx_enter(&kq->kq_lock); 1704 } else { 1705 /* Another thread has expanded the list. */ 1706 mtx_leave(&kq->kq_lock); 1707 free(list, M_KEVENT, size * sizeof(*list)); 1708 mtx_enter(&kq->kq_lock); 1709 } 1710 } 1711 } 1712 1713 /* 1714 * Acquire a knote, return non-zero on success, 0 on failure. 1715 * 1716 * If we cannot acquire the knote we sleep and return 0. The knote 1717 * may be stale on return in this case and the caller must restart 1718 * whatever loop they are in. 1719 * 1720 * If we are about to sleep and klist is non-NULL, the list is unlocked 1721 * before sleep and remains unlocked on return. 1722 */ 1723 int 1724 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1725 { 1726 struct kqueue *kq = kn->kn_kq; 1727 1728 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1729 KASSERT(kn->kn_filter != EVFILT_MARKER); 1730 1731 if (kn->kn_status & KN_PROCESSING) { 1732 kn->kn_status |= KN_WAITING; 1733 if (klist != NULL) { 1734 mtx_leave(&kq->kq_lock); 1735 klist_unlock(klist, ls); 1736 /* XXX Timeout resolves potential loss of wakeup. */ 1737 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1738 } else { 1739 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1740 SEC_TO_NSEC(1)); 1741 } 1742 /* knote may be stale now */ 1743 return (0); 1744 } 1745 kn->kn_status |= KN_PROCESSING; 1746 return (1); 1747 } 1748 1749 /* 1750 * Release an acquired knote, clearing KN_PROCESSING. 1751 */ 1752 void 1753 knote_release(struct knote *kn) 1754 { 1755 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1756 KASSERT(kn->kn_filter != EVFILT_MARKER); 1757 KASSERT(kn->kn_status & KN_PROCESSING); 1758 1759 if (kn->kn_status & KN_WAITING) { 1760 kn->kn_status &= ~KN_WAITING; 1761 wakeup(kn); 1762 } 1763 kn->kn_status &= ~KN_PROCESSING; 1764 /* kn should not be accessed anymore */ 1765 } 1766 1767 /* 1768 * activate one knote. 1769 */ 1770 void 1771 knote_activate(struct knote *kn) 1772 { 1773 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1774 1775 kn->kn_status |= KN_ACTIVE; 1776 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1777 knote_enqueue(kn); 1778 } 1779 1780 /* 1781 * walk down a list of knotes, activating them if their event has triggered. 1782 */ 1783 void 1784 knote(struct klist *list, long hint) 1785 { 1786 struct knote *kn, *kn0; 1787 struct kqueue *kq; 1788 1789 KLIST_ASSERT_LOCKED(list); 1790 1791 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1792 if (filter_event(kn, hint)) { 1793 kq = kn->kn_kq; 1794 mtx_enter(&kq->kq_lock); 1795 knote_activate(kn); 1796 mtx_leave(&kq->kq_lock); 1797 } 1798 } 1799 } 1800 1801 /* 1802 * remove all knotes from a specified knlist 1803 */ 1804 void 1805 knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) 1806 { 1807 struct knote *kn; 1808 1809 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1810 1811 while ((kn = SLIST_FIRST(list)) != NULL) { 1812 KASSERT(kn->kn_kq == kq); 1813 1814 if (!purge) { 1815 /* Skip pending badfd knotes. */ 1816 while (kn->kn_fop == &badfd_filtops) { 1817 kn = SLIST_NEXT(kn, kn_link); 1818 if (kn == NULL) 1819 return; 1820 KASSERT(kn->kn_kq == kq); 1821 } 1822 } 1823 1824 if (!knote_acquire(kn, NULL, 0)) { 1825 /* knote_acquire() has released kq_lock. */ 1826 mtx_enter(&kq->kq_lock); 1827 continue; 1828 } 1829 mtx_leave(&kq->kq_lock); 1830 filter_detach(kn); 1831 1832 /* 1833 * Notify poll(2) and select(2) when a monitored 1834 * file descriptor is closed. 1835 * 1836 * This reuses the original knote for delivering the 1837 * notification so as to avoid allocating memory. 1838 */ 1839 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1840 !(p->p_kq == kq && 1841 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1842 kn->kn_fop != &badfd_filtops) { 1843 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1844 FRELE(kn->kn_fp, p); 1845 kn->kn_fp = NULL; 1846 1847 kn->kn_fop = &badfd_filtops; 1848 filter_event(kn, 0); 1849 mtx_enter(&kq->kq_lock); 1850 knote_activate(kn); 1851 knote_release(kn); 1852 continue; 1853 } 1854 1855 knote_drop(kn, p); 1856 mtx_enter(&kq->kq_lock); 1857 } 1858 } 1859 1860 /* 1861 * remove all knotes referencing a specified fd 1862 */ 1863 void 1864 knote_fdclose(struct proc *p, int fd) 1865 { 1866 struct filedesc *fdp = p->p_p->ps_fd; 1867 struct kqueue *kq; 1868 1869 /* 1870 * fdplock can be ignored if the file descriptor table is being freed 1871 * because no other thread can access the fdp. 1872 */ 1873 if (fdp->fd_refcnt != 0) 1874 fdpassertlocked(fdp); 1875 1876 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1877 mtx_enter(&kq->kq_lock); 1878 if (fd < kq->kq_knlistsize) 1879 knote_remove(p, kq, &kq->kq_knlist[fd], 0); 1880 mtx_leave(&kq->kq_lock); 1881 } 1882 } 1883 1884 /* 1885 * handle a process exiting, including the triggering of NOTE_EXIT notes 1886 * XXX this could be more efficient, doing a single pass down the klist 1887 */ 1888 void 1889 knote_processexit(struct proc *p) 1890 { 1891 struct process *pr = p->p_p; 1892 1893 KERNEL_ASSERT_LOCKED(); 1894 KASSERT(p == curproc); 1895 1896 KNOTE(&pr->ps_klist, NOTE_EXIT); 1897 1898 /* remove other knotes hanging off the process */ 1899 klist_invalidate(&pr->ps_klist); 1900 } 1901 1902 void 1903 knote_attach(struct knote *kn) 1904 { 1905 struct kqueue *kq = kn->kn_kq; 1906 struct knlist *list; 1907 1908 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1909 KASSERT(kn->kn_status & KN_PROCESSING); 1910 1911 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1912 KASSERT(kq->kq_knlistsize > kn->kn_id); 1913 list = &kq->kq_knlist[kn->kn_id]; 1914 } else { 1915 KASSERT(kq->kq_knhashmask != 0); 1916 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1917 } 1918 SLIST_INSERT_HEAD(list, kn, kn_link); 1919 kq->kq_nknotes++; 1920 } 1921 1922 void 1923 knote_detach(struct knote *kn) 1924 { 1925 struct kqueue *kq = kn->kn_kq; 1926 struct knlist *list; 1927 1928 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1929 KASSERT(kn->kn_status & KN_PROCESSING); 1930 1931 kq->kq_nknotes--; 1932 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1933 list = &kq->kq_knlist[kn->kn_id]; 1934 else 1935 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1936 SLIST_REMOVE(list, kn, knote, kn_link); 1937 } 1938 1939 /* 1940 * should be called at spl == 0, since we don't want to hold spl 1941 * while calling FRELE and pool_put. 1942 */ 1943 void 1944 knote_drop(struct knote *kn, struct proc *p) 1945 { 1946 struct kqueue *kq = kn->kn_kq; 1947 1948 KASSERT(kn->kn_filter != EVFILT_MARKER); 1949 1950 mtx_enter(&kq->kq_lock); 1951 knote_detach(kn); 1952 if (kn->kn_status & KN_QUEUED) 1953 knote_dequeue(kn); 1954 if (kn->kn_status & KN_WAITING) { 1955 kn->kn_status &= ~KN_WAITING; 1956 wakeup(kn); 1957 } 1958 mtx_leave(&kq->kq_lock); 1959 1960 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1961 FRELE(kn->kn_fp, p); 1962 pool_put(&knote_pool, kn); 1963 } 1964 1965 1966 void 1967 knote_enqueue(struct knote *kn) 1968 { 1969 struct kqueue *kq = kn->kn_kq; 1970 1971 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1972 KASSERT(kn->kn_filter != EVFILT_MARKER); 1973 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1974 1975 kqueue_check(kq); 1976 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1977 kn->kn_status |= KN_QUEUED; 1978 kq->kq_count++; 1979 kqueue_check(kq); 1980 kqueue_wakeup(kq); 1981 } 1982 1983 void 1984 knote_dequeue(struct knote *kn) 1985 { 1986 struct kqueue *kq = kn->kn_kq; 1987 1988 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1989 KASSERT(kn->kn_filter != EVFILT_MARKER); 1990 KASSERT(kn->kn_status & KN_QUEUED); 1991 1992 kqueue_check(kq); 1993 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1994 kn->kn_status &= ~KN_QUEUED; 1995 kq->kq_count--; 1996 kqueue_check(kq); 1997 } 1998 1999 /* 2000 * Assign parameters to the knote. 2001 * 2002 * The knote's object lock must be held. 2003 */ 2004 void 2005 knote_assign(const struct kevent *kev, struct knote *kn) 2006 { 2007 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2008 KERNEL_ASSERT_LOCKED(); 2009 2010 kn->kn_sfflags = kev->fflags; 2011 kn->kn_sdata = kev->data; 2012 kn->kn_udata = kev->udata; 2013 } 2014 2015 /* 2016 * Submit the knote's event for delivery. 2017 * 2018 * The knote's object lock must be held. 2019 */ 2020 void 2021 knote_submit(struct knote *kn, struct kevent *kev) 2022 { 2023 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2024 KERNEL_ASSERT_LOCKED(); 2025 2026 if (kev != NULL) { 2027 *kev = kn->kn_kevent; 2028 if (kn->kn_flags & EV_CLEAR) { 2029 kn->kn_fflags = 0; 2030 kn->kn_data = 0; 2031 } 2032 } 2033 } 2034 2035 void 2036 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2037 { 2038 SLIST_INIT(&klist->kl_list); 2039 klist->kl_ops = ops; 2040 klist->kl_arg = arg; 2041 } 2042 2043 void 2044 klist_free(struct klist *klist) 2045 { 2046 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2047 } 2048 2049 void 2050 klist_insert(struct klist *klist, struct knote *kn) 2051 { 2052 int ls; 2053 2054 ls = klist_lock(klist); 2055 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2056 klist_unlock(klist, ls); 2057 } 2058 2059 void 2060 klist_insert_locked(struct klist *klist, struct knote *kn) 2061 { 2062 KLIST_ASSERT_LOCKED(klist); 2063 2064 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2065 } 2066 2067 void 2068 klist_remove(struct klist *klist, struct knote *kn) 2069 { 2070 int ls; 2071 2072 ls = klist_lock(klist); 2073 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2074 klist_unlock(klist, ls); 2075 } 2076 2077 void 2078 klist_remove_locked(struct klist *klist, struct knote *kn) 2079 { 2080 KLIST_ASSERT_LOCKED(klist); 2081 2082 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2083 } 2084 2085 /* 2086 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2087 * 2088 * The caller of this function must not hold any locks that can block 2089 * filterops callbacks that run with KN_PROCESSING. 2090 * Otherwise this function might deadlock. 2091 */ 2092 void 2093 klist_invalidate(struct klist *list) 2094 { 2095 struct knote *kn; 2096 struct kqueue *kq; 2097 struct proc *p = curproc; 2098 int ls; 2099 2100 NET_ASSERT_UNLOCKED(); 2101 2102 ls = klist_lock(list); 2103 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2104 kq = kn->kn_kq; 2105 mtx_enter(&kq->kq_lock); 2106 if (!knote_acquire(kn, list, ls)) { 2107 /* knote_acquire() has released kq_lock 2108 * and klist lock. */ 2109 ls = klist_lock(list); 2110 continue; 2111 } 2112 mtx_leave(&kq->kq_lock); 2113 klist_unlock(list, ls); 2114 filter_detach(kn); 2115 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2116 kn->kn_fop = &dead_filtops; 2117 filter_event(kn, 0); 2118 mtx_enter(&kq->kq_lock); 2119 knote_activate(kn); 2120 knote_release(kn); 2121 mtx_leave(&kq->kq_lock); 2122 } else { 2123 knote_drop(kn, p); 2124 } 2125 ls = klist_lock(list); 2126 } 2127 klist_unlock(list, ls); 2128 } 2129 2130 static int 2131 klist_lock(struct klist *list) 2132 { 2133 int ls = 0; 2134 2135 if (list->kl_ops != NULL) { 2136 ls = list->kl_ops->klo_lock(list->kl_arg); 2137 } else { 2138 KERNEL_LOCK(); 2139 ls = splhigh(); 2140 } 2141 return ls; 2142 } 2143 2144 static void 2145 klist_unlock(struct klist *list, int ls) 2146 { 2147 if (list->kl_ops != NULL) { 2148 list->kl_ops->klo_unlock(list->kl_arg, ls); 2149 } else { 2150 splx(ls); 2151 KERNEL_UNLOCK(); 2152 } 2153 } 2154 2155 static void 2156 klist_mutex_assertlk(void *arg) 2157 { 2158 struct mutex *mtx = arg; 2159 2160 (void)mtx; 2161 2162 MUTEX_ASSERT_LOCKED(mtx); 2163 } 2164 2165 static int 2166 klist_mutex_lock(void *arg) 2167 { 2168 struct mutex *mtx = arg; 2169 2170 mtx_enter(mtx); 2171 return 0; 2172 } 2173 2174 static void 2175 klist_mutex_unlock(void *arg, int s) 2176 { 2177 struct mutex *mtx = arg; 2178 2179 mtx_leave(mtx); 2180 } 2181 2182 static const struct klistops mutex_klistops = { 2183 .klo_assertlk = klist_mutex_assertlk, 2184 .klo_lock = klist_mutex_lock, 2185 .klo_unlock = klist_mutex_unlock, 2186 }; 2187 2188 void 2189 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2190 { 2191 klist_init(klist, &mutex_klistops, mtx); 2192 } 2193 2194 static void 2195 klist_rwlock_assertlk(void *arg) 2196 { 2197 struct rwlock *rwl = arg; 2198 2199 (void)rwl; 2200 2201 rw_assert_wrlock(rwl); 2202 } 2203 2204 static int 2205 klist_rwlock_lock(void *arg) 2206 { 2207 struct rwlock *rwl = arg; 2208 2209 rw_enter_write(rwl); 2210 return 0; 2211 } 2212 2213 static void 2214 klist_rwlock_unlock(void *arg, int s) 2215 { 2216 struct rwlock *rwl = arg; 2217 2218 rw_exit_write(rwl); 2219 } 2220 2221 static const struct klistops rwlock_klistops = { 2222 .klo_assertlk = klist_rwlock_assertlk, 2223 .klo_lock = klist_rwlock_lock, 2224 .klo_unlock = klist_rwlock_unlock, 2225 }; 2226 2227 void 2228 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2229 { 2230 klist_init(klist, &rwlock_klistops, rwl); 2231 } 2232