1 /* $OpenBSD: kern_event.c,v 1.182 2022/02/13 13:05:51 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/vnode.h> 59 #include <sys/wait.h> 60 61 #ifdef DIAGNOSTIC 62 #define KLIST_ASSERT_LOCKED(kl) do { \ 63 if ((kl)->kl_ops != NULL) \ 64 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 65 else \ 66 KERNEL_ASSERT_LOCKED(); \ 67 } while (0) 68 #else 69 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 70 #endif 71 72 struct kqueue *kqueue_alloc(struct filedesc *); 73 void kqueue_terminate(struct proc *p, struct kqueue *); 74 void KQREF(struct kqueue *); 75 void KQRELE(struct kqueue *); 76 77 void kqueue_purge(struct proc *, struct kqueue *); 78 int kqueue_sleep(struct kqueue *, struct timespec *); 79 80 int kqueue_read(struct file *, struct uio *, int); 81 int kqueue_write(struct file *, struct uio *, int); 82 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 83 struct proc *p); 84 int kqueue_poll(struct file *fp, int events, struct proc *p); 85 int kqueue_kqfilter(struct file *fp, struct knote *kn); 86 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 87 int kqueue_close(struct file *fp, struct proc *p); 88 void kqueue_wakeup(struct kqueue *kq); 89 90 #ifdef KQUEUE_DEBUG 91 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 92 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 93 #else 94 #define kqueue_check(kq) do {} while (0) 95 #endif 96 97 static int filter_attach(struct knote *kn); 98 static void filter_detach(struct knote *kn); 99 static int filter_event(struct knote *kn, long hint); 100 static int filter_modify(struct kevent *kev, struct knote *kn); 101 static int filter_process(struct knote *kn, struct kevent *kev); 102 static void kqueue_expand_hash(struct kqueue *kq); 103 static void kqueue_expand_list(struct kqueue *kq, int fd); 104 static void kqueue_task(void *); 105 static int klist_lock(struct klist *); 106 static void klist_unlock(struct klist *, int); 107 108 const struct fileops kqueueops = { 109 .fo_read = kqueue_read, 110 .fo_write = kqueue_write, 111 .fo_ioctl = kqueue_ioctl, 112 .fo_poll = kqueue_poll, 113 .fo_kqfilter = kqueue_kqfilter, 114 .fo_stat = kqueue_stat, 115 .fo_close = kqueue_close 116 }; 117 118 void knote_attach(struct knote *kn); 119 void knote_detach(struct knote *kn); 120 void knote_drop(struct knote *kn, struct proc *p); 121 void knote_enqueue(struct knote *kn); 122 void knote_dequeue(struct knote *kn); 123 int knote_acquire(struct knote *kn, struct klist *, int); 124 void knote_release(struct knote *kn); 125 void knote_activate(struct knote *kn); 126 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, 127 int purge); 128 129 void filt_kqdetach(struct knote *kn); 130 int filt_kqueue(struct knote *kn, long hint); 131 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 132 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 133 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 134 int filt_procattach(struct knote *kn); 135 void filt_procdetach(struct knote *kn); 136 int filt_proc(struct knote *kn, long hint); 137 int filt_fileattach(struct knote *kn); 138 void filt_timerexpire(void *knx); 139 int filt_timerattach(struct knote *kn); 140 void filt_timerdetach(struct knote *kn); 141 int filt_timermodify(struct kevent *kev, struct knote *kn); 142 int filt_timerprocess(struct knote *kn, struct kevent *kev); 143 void filt_seltruedetach(struct knote *kn); 144 145 const struct filterops kqread_filtops = { 146 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 147 .f_attach = NULL, 148 .f_detach = filt_kqdetach, 149 .f_event = filt_kqueue, 150 .f_modify = filt_kqueuemodify, 151 .f_process = filt_kqueueprocess, 152 }; 153 154 const struct filterops proc_filtops = { 155 .f_flags = 0, 156 .f_attach = filt_procattach, 157 .f_detach = filt_procdetach, 158 .f_event = filt_proc, 159 }; 160 161 const struct filterops file_filtops = { 162 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 163 .f_attach = filt_fileattach, 164 .f_detach = NULL, 165 .f_event = NULL, 166 }; 167 168 const struct filterops timer_filtops = { 169 .f_flags = 0, 170 .f_attach = filt_timerattach, 171 .f_detach = filt_timerdetach, 172 .f_event = NULL, 173 .f_modify = filt_timermodify, 174 .f_process = filt_timerprocess, 175 }; 176 177 struct pool knote_pool; 178 struct pool kqueue_pool; 179 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 180 int kq_ntimeouts = 0; 181 int kq_timeoutmax = (4 * 1024); 182 183 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 184 185 /* 186 * Table for for all system-defined filters. 187 */ 188 const struct filterops *const sysfilt_ops[] = { 189 &file_filtops, /* EVFILT_READ */ 190 &file_filtops, /* EVFILT_WRITE */ 191 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 192 &file_filtops, /* EVFILT_VNODE */ 193 &proc_filtops, /* EVFILT_PROC */ 194 &sig_filtops, /* EVFILT_SIGNAL */ 195 &timer_filtops, /* EVFILT_TIMER */ 196 &file_filtops, /* EVFILT_DEVICE */ 197 &file_filtops, /* EVFILT_EXCEPT */ 198 }; 199 200 void 201 KQREF(struct kqueue *kq) 202 { 203 atomic_inc_int(&kq->kq_refs); 204 } 205 206 void 207 KQRELE(struct kqueue *kq) 208 { 209 struct filedesc *fdp; 210 211 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 212 return; 213 214 fdp = kq->kq_fdp; 215 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 216 LIST_REMOVE(kq, kq_next); 217 } else { 218 fdplock(fdp); 219 LIST_REMOVE(kq, kq_next); 220 fdpunlock(fdp); 221 } 222 223 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 224 KASSERT(kq->kq_nknotes == 0); 225 226 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 227 sizeof(struct knlist)); 228 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 229 klist_free(&kq->kq_sel.si_note); 230 pool_put(&kqueue_pool, kq); 231 } 232 233 void 234 kqueue_init(void) 235 { 236 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 237 PR_WAITOK, "kqueuepl", NULL); 238 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 239 PR_WAITOK, "knotepl", NULL); 240 } 241 242 void 243 kqueue_init_percpu(void) 244 { 245 pool_cache_init(&knote_pool); 246 } 247 248 int 249 filt_fileattach(struct knote *kn) 250 { 251 struct file *fp = kn->kn_fp; 252 253 return fp->f_ops->fo_kqfilter(fp, kn); 254 } 255 256 int 257 kqueue_kqfilter(struct file *fp, struct knote *kn) 258 { 259 struct kqueue *kq = kn->kn_fp->f_data; 260 261 if (kn->kn_filter != EVFILT_READ) 262 return (EINVAL); 263 264 kn->kn_fop = &kqread_filtops; 265 klist_insert(&kq->kq_sel.si_note, kn); 266 return (0); 267 } 268 269 void 270 filt_kqdetach(struct knote *kn) 271 { 272 struct kqueue *kq = kn->kn_fp->f_data; 273 274 klist_remove(&kq->kq_sel.si_note, kn); 275 } 276 277 int 278 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 279 { 280 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 281 282 kn->kn_data = kq->kq_count; 283 284 return (kn->kn_data > 0); 285 } 286 287 int 288 filt_kqueue(struct knote *kn, long hint) 289 { 290 struct kqueue *kq = kn->kn_fp->f_data; 291 int active; 292 293 mtx_enter(&kq->kq_lock); 294 active = filt_kqueue_common(kn, kq); 295 mtx_leave(&kq->kq_lock); 296 297 return (active); 298 } 299 300 int 301 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 302 { 303 struct kqueue *kq = kn->kn_fp->f_data; 304 int active; 305 306 mtx_enter(&kq->kq_lock); 307 knote_assign(kev, kn); 308 active = filt_kqueue_common(kn, kq); 309 mtx_leave(&kq->kq_lock); 310 311 return (active); 312 } 313 314 int 315 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 316 { 317 struct kqueue *kq = kn->kn_fp->f_data; 318 int active; 319 320 mtx_enter(&kq->kq_lock); 321 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 322 active = 1; 323 else 324 active = filt_kqueue_common(kn, kq); 325 if (active) 326 knote_submit(kn, kev); 327 mtx_leave(&kq->kq_lock); 328 329 return (active); 330 } 331 332 int 333 filt_procattach(struct knote *kn) 334 { 335 struct process *pr; 336 int s; 337 338 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 339 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 340 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 341 342 if (kn->kn_id > PID_MAX) 343 return ESRCH; 344 345 pr = prfind(kn->kn_id); 346 if (pr == NULL) 347 return (ESRCH); 348 349 /* exiting processes can't be specified */ 350 if (pr->ps_flags & PS_EXITING) 351 return (ESRCH); 352 353 kn->kn_ptr.p_process = pr; 354 kn->kn_flags |= EV_CLEAR; /* automatically set */ 355 356 /* 357 * internal flag indicating registration done by kernel 358 */ 359 if (kn->kn_flags & EV_FLAG1) { 360 kn->kn_data = kn->kn_sdata; /* ppid */ 361 kn->kn_fflags = NOTE_CHILD; 362 kn->kn_flags &= ~EV_FLAG1; 363 } 364 365 s = splhigh(); 366 klist_insert_locked(&pr->ps_klist, kn); 367 splx(s); 368 369 return (0); 370 } 371 372 /* 373 * The knote may be attached to a different process, which may exit, 374 * leaving nothing for the knote to be attached to. So when the process 375 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 376 * it will be deleted when read out. However, as part of the knote deletion, 377 * this routine is called, so a check is needed to avoid actually performing 378 * a detach, because the original process does not exist any more. 379 */ 380 void 381 filt_procdetach(struct knote *kn) 382 { 383 struct kqueue *kq = kn->kn_kq; 384 struct process *pr = kn->kn_ptr.p_process; 385 int s, status; 386 387 mtx_enter(&kq->kq_lock); 388 status = kn->kn_status; 389 mtx_leave(&kq->kq_lock); 390 391 if (status & KN_DETACHED) 392 return; 393 394 s = splhigh(); 395 klist_remove_locked(&pr->ps_klist, kn); 396 splx(s); 397 } 398 399 int 400 filt_proc(struct knote *kn, long hint) 401 { 402 struct kqueue *kq = kn->kn_kq; 403 u_int event; 404 405 /* 406 * mask off extra data 407 */ 408 event = (u_int)hint & NOTE_PCTRLMASK; 409 410 /* 411 * if the user is interested in this event, record it. 412 */ 413 if (kn->kn_sfflags & event) 414 kn->kn_fflags |= event; 415 416 /* 417 * process is gone, so flag the event as finished and remove it 418 * from the process's klist 419 */ 420 if (event == NOTE_EXIT) { 421 struct process *pr = kn->kn_ptr.p_process; 422 int s; 423 424 mtx_enter(&kq->kq_lock); 425 kn->kn_status |= KN_DETACHED; 426 mtx_leave(&kq->kq_lock); 427 428 s = splhigh(); 429 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 430 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 431 klist_remove_locked(&pr->ps_klist, kn); 432 splx(s); 433 return (1); 434 } 435 436 /* 437 * process forked, and user wants to track the new process, 438 * so attach a new knote to it, and immediately report an 439 * event with the parent's pid. 440 */ 441 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 442 struct kevent kev; 443 int error; 444 445 /* 446 * register knote with new process. 447 */ 448 memset(&kev, 0, sizeof(kev)); 449 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 450 kev.filter = kn->kn_filter; 451 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 452 kev.fflags = kn->kn_sfflags; 453 kev.data = kn->kn_id; /* parent */ 454 kev.udata = kn->kn_udata; /* preserve udata */ 455 error = kqueue_register(kq, &kev, 0, NULL); 456 if (error) 457 kn->kn_fflags |= NOTE_TRACKERR; 458 } 459 460 return (kn->kn_fflags != 0); 461 } 462 463 static void 464 filt_timer_timeout_add(struct knote *kn) 465 { 466 struct timeval tv; 467 struct timeout *to = kn->kn_hook; 468 int tticks; 469 470 tv.tv_sec = kn->kn_sdata / 1000; 471 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 472 tticks = tvtohz(&tv); 473 /* Remove extra tick from tvtohz() if timeout has fired before. */ 474 if (timeout_triggered(to)) 475 tticks--; 476 timeout_add(to, (tticks > 0) ? tticks : 1); 477 } 478 479 void 480 filt_timerexpire(void *knx) 481 { 482 struct knote *kn = knx; 483 struct kqueue *kq = kn->kn_kq; 484 485 kn->kn_data++; 486 mtx_enter(&kq->kq_lock); 487 knote_activate(kn); 488 mtx_leave(&kq->kq_lock); 489 490 if ((kn->kn_flags & EV_ONESHOT) == 0) 491 filt_timer_timeout_add(kn); 492 } 493 494 495 /* 496 * data contains amount of time to sleep, in milliseconds 497 */ 498 int 499 filt_timerattach(struct knote *kn) 500 { 501 struct timeout *to; 502 503 if (kq_ntimeouts > kq_timeoutmax) 504 return (ENOMEM); 505 kq_ntimeouts++; 506 507 kn->kn_flags |= EV_CLEAR; /* automatically set */ 508 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 509 timeout_set(to, filt_timerexpire, kn); 510 kn->kn_hook = to; 511 filt_timer_timeout_add(kn); 512 513 return (0); 514 } 515 516 void 517 filt_timerdetach(struct knote *kn) 518 { 519 struct timeout *to; 520 521 to = (struct timeout *)kn->kn_hook; 522 timeout_del_barrier(to); 523 free(to, M_KEVENT, sizeof(*to)); 524 kq_ntimeouts--; 525 } 526 527 int 528 filt_timermodify(struct kevent *kev, struct knote *kn) 529 { 530 struct kqueue *kq = kn->kn_kq; 531 struct timeout *to = kn->kn_hook; 532 533 /* Reset the timer. Any pending events are discarded. */ 534 535 timeout_del_barrier(to); 536 537 mtx_enter(&kq->kq_lock); 538 if (kn->kn_status & KN_QUEUED) 539 knote_dequeue(kn); 540 kn->kn_status &= ~KN_ACTIVE; 541 mtx_leave(&kq->kq_lock); 542 543 kn->kn_data = 0; 544 knote_assign(kev, kn); 545 /* Reinit timeout to invoke tick adjustment again. */ 546 timeout_set(to, filt_timerexpire, kn); 547 filt_timer_timeout_add(kn); 548 549 return (0); 550 } 551 552 int 553 filt_timerprocess(struct knote *kn, struct kevent *kev) 554 { 555 int active, s; 556 557 s = splsoftclock(); 558 active = (kn->kn_data != 0); 559 if (active) 560 knote_submit(kn, kev); 561 splx(s); 562 563 return (active); 564 } 565 566 567 /* 568 * filt_seltrue: 569 * 570 * This filter "event" routine simulates seltrue(). 571 */ 572 int 573 filt_seltrue(struct knote *kn, long hint) 574 { 575 576 /* 577 * We don't know how much data can be read/written, 578 * but we know that it *can* be. This is about as 579 * good as select/poll does as well. 580 */ 581 kn->kn_data = 0; 582 return (1); 583 } 584 585 int 586 filt_seltruemodify(struct kevent *kev, struct knote *kn) 587 { 588 knote_assign(kev, kn); 589 return (kn->kn_fop->f_event(kn, 0)); 590 } 591 592 int 593 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 594 { 595 int active; 596 597 active = kn->kn_fop->f_event(kn, 0); 598 if (active) 599 knote_submit(kn, kev); 600 return (active); 601 } 602 603 /* 604 * This provides full kqfilter entry for device switch tables, which 605 * has same effect as filter using filt_seltrue() as filter method. 606 */ 607 void 608 filt_seltruedetach(struct knote *kn) 609 { 610 /* Nothing to do */ 611 } 612 613 const struct filterops seltrue_filtops = { 614 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 615 .f_attach = NULL, 616 .f_detach = filt_seltruedetach, 617 .f_event = filt_seltrue, 618 .f_modify = filt_seltruemodify, 619 .f_process = filt_seltrueprocess, 620 }; 621 622 int 623 seltrue_kqfilter(dev_t dev, struct knote *kn) 624 { 625 switch (kn->kn_filter) { 626 case EVFILT_READ: 627 case EVFILT_WRITE: 628 kn->kn_fop = &seltrue_filtops; 629 break; 630 default: 631 return (EINVAL); 632 } 633 634 /* Nothing more to do */ 635 return (0); 636 } 637 638 static int 639 filt_dead(struct knote *kn, long hint) 640 { 641 if (kn->kn_filter == EVFILT_EXCEPT) { 642 /* 643 * Do not deliver event because there is no out-of-band data. 644 * However, let HUP condition pass for poll(2). 645 */ 646 if ((kn->kn_flags & __EV_POLL) == 0) { 647 kn->kn_flags |= EV_DISABLE; 648 return (0); 649 } 650 } 651 652 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 653 if (kn->kn_flags & __EV_POLL) 654 kn->kn_flags |= __EV_HUP; 655 kn->kn_data = 0; 656 return (1); 657 } 658 659 static void 660 filt_deaddetach(struct knote *kn) 661 { 662 /* Nothing to do */ 663 } 664 665 const struct filterops dead_filtops = { 666 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 667 .f_attach = NULL, 668 .f_detach = filt_deaddetach, 669 .f_event = filt_dead, 670 .f_modify = filt_seltruemodify, 671 .f_process = filt_seltrueprocess, 672 }; 673 674 static int 675 filt_badfd(struct knote *kn, long hint) 676 { 677 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 678 kn->kn_data = EBADF; 679 return (1); 680 } 681 682 /* For use with kqpoll. */ 683 const struct filterops badfd_filtops = { 684 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 685 .f_attach = NULL, 686 .f_detach = filt_deaddetach, 687 .f_event = filt_badfd, 688 .f_modify = filt_seltruemodify, 689 .f_process = filt_seltrueprocess, 690 }; 691 692 static int 693 filter_attach(struct knote *kn) 694 { 695 int error; 696 697 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 698 error = kn->kn_fop->f_attach(kn); 699 } else { 700 KERNEL_LOCK(); 701 error = kn->kn_fop->f_attach(kn); 702 KERNEL_UNLOCK(); 703 } 704 return (error); 705 } 706 707 static void 708 filter_detach(struct knote *kn) 709 { 710 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 711 kn->kn_fop->f_detach(kn); 712 } else { 713 KERNEL_LOCK(); 714 kn->kn_fop->f_detach(kn); 715 KERNEL_UNLOCK(); 716 } 717 } 718 719 static int 720 filter_event(struct knote *kn, long hint) 721 { 722 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 723 KERNEL_ASSERT_LOCKED(); 724 725 return (kn->kn_fop->f_event(kn, hint)); 726 } 727 728 static int 729 filter_modify(struct kevent *kev, struct knote *kn) 730 { 731 int active, s; 732 733 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 734 active = kn->kn_fop->f_modify(kev, kn); 735 } else { 736 KERNEL_LOCK(); 737 if (kn->kn_fop->f_modify != NULL) { 738 active = kn->kn_fop->f_modify(kev, kn); 739 } else { 740 s = splhigh(); 741 active = knote_modify(kev, kn); 742 splx(s); 743 } 744 KERNEL_UNLOCK(); 745 } 746 return (active); 747 } 748 749 static int 750 filter_process(struct knote *kn, struct kevent *kev) 751 { 752 int active, s; 753 754 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 755 active = kn->kn_fop->f_process(kn, kev); 756 } else { 757 KERNEL_LOCK(); 758 if (kn->kn_fop->f_process != NULL) { 759 active = kn->kn_fop->f_process(kn, kev); 760 } else { 761 s = splhigh(); 762 active = knote_process(kn, kev); 763 splx(s); 764 } 765 KERNEL_UNLOCK(); 766 } 767 return (active); 768 } 769 770 /* 771 * Initialize the current thread for poll/select system call. 772 * num indicates the number of serials that the system call may utilize. 773 * After this function, the valid range of serials is 774 * p_kq_serial <= x < p_kq_serial + num. 775 */ 776 void 777 kqpoll_init(unsigned int num) 778 { 779 struct proc *p = curproc; 780 struct filedesc *fdp; 781 782 if (p->p_kq == NULL) { 783 p->p_kq = kqueue_alloc(p->p_fd); 784 p->p_kq_serial = arc4random(); 785 fdp = p->p_fd; 786 fdplock(fdp); 787 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 788 fdpunlock(fdp); 789 } 790 791 if (p->p_kq_serial + num < p->p_kq_serial) { 792 /* Serial is about to wrap. Clear all attached knotes. */ 793 kqueue_purge(p, p->p_kq); 794 p->p_kq_serial = 0; 795 } 796 } 797 798 /* 799 * Finish poll/select system call. 800 * num must have the same value that was used with kqpoll_init(). 801 */ 802 void 803 kqpoll_done(unsigned int num) 804 { 805 struct proc *p = curproc; 806 struct kqueue *kq = p->p_kq; 807 808 KASSERT(p->p_kq != NULL); 809 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 810 811 p->p_kq_serial += num; 812 813 /* 814 * Because of kn_pollid key, a thread can in principle allocate 815 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 816 * with suitably varying pollfd arrays. 817 * Prevent such a large allocation by clearing knotes eagerly 818 * if there are too many of them. 819 * 820 * A small multiple of kq_knlistsize should give enough margin 821 * that eager clearing is infrequent, or does not happen at all, 822 * with normal programs. 823 * A single pollfd entry can use up to three knotes. 824 * Typically there is no significant overlap of fd and events 825 * between different entries in the pollfd array. 826 */ 827 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 828 kqueue_purge(p, kq); 829 } 830 831 void 832 kqpoll_exit(void) 833 { 834 struct proc *p = curproc; 835 836 if (p->p_kq == NULL) 837 return; 838 839 kqueue_purge(p, p->p_kq); 840 kqueue_terminate(p, p->p_kq); 841 KASSERT(p->p_kq->kq_refs == 1); 842 KQRELE(p->p_kq); 843 p->p_kq = NULL; 844 } 845 846 struct kqueue * 847 kqueue_alloc(struct filedesc *fdp) 848 { 849 struct kqueue *kq; 850 851 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 852 kq->kq_refs = 1; 853 kq->kq_fdp = fdp; 854 TAILQ_INIT(&kq->kq_head); 855 mtx_init(&kq->kq_lock, IPL_HIGH); 856 task_set(&kq->kq_task, kqueue_task, kq); 857 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 858 859 return (kq); 860 } 861 862 int 863 sys_kqueue(struct proc *p, void *v, register_t *retval) 864 { 865 struct filedesc *fdp = p->p_fd; 866 struct kqueue *kq; 867 struct file *fp; 868 int fd, error; 869 870 kq = kqueue_alloc(fdp); 871 872 fdplock(fdp); 873 error = falloc(p, &fp, &fd); 874 if (error) 875 goto out; 876 fp->f_flag = FREAD | FWRITE; 877 fp->f_type = DTYPE_KQUEUE; 878 fp->f_ops = &kqueueops; 879 fp->f_data = kq; 880 *retval = fd; 881 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 882 kq = NULL; 883 fdinsert(fdp, fd, 0, fp); 884 FRELE(fp, p); 885 out: 886 fdpunlock(fdp); 887 if (kq != NULL) 888 pool_put(&kqueue_pool, kq); 889 return (error); 890 } 891 892 int 893 sys_kevent(struct proc *p, void *v, register_t *retval) 894 { 895 struct kqueue_scan_state scan; 896 struct filedesc* fdp = p->p_fd; 897 struct sys_kevent_args /* { 898 syscallarg(int) fd; 899 syscallarg(const struct kevent *) changelist; 900 syscallarg(int) nchanges; 901 syscallarg(struct kevent *) eventlist; 902 syscallarg(int) nevents; 903 syscallarg(const struct timespec *) timeout; 904 } */ *uap = v; 905 struct kevent *kevp; 906 struct kqueue *kq; 907 struct file *fp; 908 struct timespec ts; 909 struct timespec *tsp = NULL; 910 int i, n, nerrors, error; 911 int ready, total; 912 struct kevent kev[KQ_NEVENTS]; 913 914 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 915 return (EBADF); 916 917 if (fp->f_type != DTYPE_KQUEUE) { 918 error = EBADF; 919 goto done; 920 } 921 922 if (SCARG(uap, timeout) != NULL) { 923 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 924 if (error) 925 goto done; 926 #ifdef KTRACE 927 if (KTRPOINT(p, KTR_STRUCT)) 928 ktrreltimespec(p, &ts); 929 #endif 930 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 931 error = EINVAL; 932 goto done; 933 } 934 tsp = &ts; 935 } 936 937 kq = fp->f_data; 938 nerrors = 0; 939 940 while ((n = SCARG(uap, nchanges)) > 0) { 941 if (n > nitems(kev)) 942 n = nitems(kev); 943 error = copyin(SCARG(uap, changelist), kev, 944 n * sizeof(struct kevent)); 945 if (error) 946 goto done; 947 #ifdef KTRACE 948 if (KTRPOINT(p, KTR_STRUCT)) 949 ktrevent(p, kev, n); 950 #endif 951 for (i = 0; i < n; i++) { 952 kevp = &kev[i]; 953 kevp->flags &= ~EV_SYSFLAGS; 954 error = kqueue_register(kq, kevp, 0, p); 955 if (error || (kevp->flags & EV_RECEIPT)) { 956 if (SCARG(uap, nevents) != 0) { 957 kevp->flags = EV_ERROR; 958 kevp->data = error; 959 copyout(kevp, SCARG(uap, eventlist), 960 sizeof(*kevp)); 961 SCARG(uap, eventlist)++; 962 SCARG(uap, nevents)--; 963 nerrors++; 964 } else { 965 goto done; 966 } 967 } 968 } 969 SCARG(uap, nchanges) -= n; 970 SCARG(uap, changelist) += n; 971 } 972 if (nerrors) { 973 *retval = nerrors; 974 error = 0; 975 goto done; 976 } 977 978 kqueue_scan_setup(&scan, kq); 979 FRELE(fp, p); 980 /* 981 * Collect as many events as we can. The timeout on successive 982 * loops is disabled (kqueue_scan() becomes non-blocking). 983 */ 984 total = 0; 985 error = 0; 986 while ((n = SCARG(uap, nevents) - total) > 0) { 987 if (n > nitems(kev)) 988 n = nitems(kev); 989 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 990 if (ready == 0) 991 break; 992 error = copyout(kev, SCARG(uap, eventlist) + total, 993 sizeof(struct kevent) * ready); 994 #ifdef KTRACE 995 if (KTRPOINT(p, KTR_STRUCT)) 996 ktrevent(p, kev, ready); 997 #endif 998 total += ready; 999 if (error || ready < n) 1000 break; 1001 } 1002 kqueue_scan_finish(&scan); 1003 *retval = total; 1004 return (error); 1005 1006 done: 1007 FRELE(fp, p); 1008 return (error); 1009 } 1010 1011 #ifdef KQUEUE_DEBUG 1012 void 1013 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1014 { 1015 struct knote *kn; 1016 int count = 0, nmarker = 0; 1017 1018 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1019 1020 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1021 if (kn->kn_filter == EVFILT_MARKER) { 1022 if ((kn->kn_status & KN_QUEUED) != 0) 1023 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1024 func, line, kq, kn); 1025 nmarker++; 1026 } else { 1027 if ((kn->kn_status & KN_ACTIVE) == 0) 1028 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1029 func, line, kq, kn); 1030 if ((kn->kn_status & KN_QUEUED) == 0) 1031 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1032 func, line, kq, kn); 1033 if (kn->kn_kq != kq) 1034 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1035 func, line, kq, kn, kn->kn_kq); 1036 count++; 1037 if (count > kq->kq_count) 1038 goto bad; 1039 } 1040 } 1041 if (count != kq->kq_count) { 1042 bad: 1043 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1044 func, line, kq, kq->kq_count, count, nmarker); 1045 } 1046 } 1047 #endif 1048 1049 int 1050 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1051 struct proc *p) 1052 { 1053 struct filedesc *fdp = kq->kq_fdp; 1054 const struct filterops *fops = NULL; 1055 struct file *fp = NULL; 1056 struct knote *kn = NULL, *newkn = NULL; 1057 struct knlist *list = NULL; 1058 int active, error = 0; 1059 1060 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1061 1062 if (kev->filter < 0) { 1063 if (kev->filter + EVFILT_SYSCOUNT < 0) 1064 return (EINVAL); 1065 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1066 } 1067 1068 if (fops == NULL) { 1069 /* 1070 * XXX 1071 * filter attach routine is responsible for ensuring that 1072 * the identifier can be attached to it. 1073 */ 1074 return (EINVAL); 1075 } 1076 1077 if (fops->f_flags & FILTEROP_ISFD) { 1078 /* validate descriptor */ 1079 if (kev->ident > INT_MAX) 1080 return (EBADF); 1081 } 1082 1083 if (kev->flags & EV_ADD) 1084 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1085 1086 again: 1087 if (fops->f_flags & FILTEROP_ISFD) { 1088 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1089 error = EBADF; 1090 goto done; 1091 } 1092 mtx_enter(&kq->kq_lock); 1093 if (kev->flags & EV_ADD) 1094 kqueue_expand_list(kq, kev->ident); 1095 if (kev->ident < kq->kq_knlistsize) 1096 list = &kq->kq_knlist[kev->ident]; 1097 } else { 1098 mtx_enter(&kq->kq_lock); 1099 if (kev->flags & EV_ADD) 1100 kqueue_expand_hash(kq); 1101 if (kq->kq_knhashmask != 0) { 1102 list = &kq->kq_knhash[ 1103 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1104 } 1105 } 1106 if (list != NULL) { 1107 SLIST_FOREACH(kn, list, kn_link) { 1108 if (kev->filter == kn->kn_filter && 1109 kev->ident == kn->kn_id && 1110 pollid == kn->kn_pollid) { 1111 if (!knote_acquire(kn, NULL, 0)) { 1112 /* knote_acquire() has released 1113 * kq_lock. */ 1114 if (fp != NULL) { 1115 FRELE(fp, p); 1116 fp = NULL; 1117 } 1118 goto again; 1119 } 1120 break; 1121 } 1122 } 1123 } 1124 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1125 1126 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1127 mtx_leave(&kq->kq_lock); 1128 error = ENOENT; 1129 goto done; 1130 } 1131 1132 /* 1133 * kn now contains the matching knote, or NULL if no match. 1134 */ 1135 if (kev->flags & EV_ADD) { 1136 if (kn == NULL) { 1137 kn = newkn; 1138 newkn = NULL; 1139 kn->kn_status = KN_PROCESSING; 1140 kn->kn_fp = fp; 1141 kn->kn_kq = kq; 1142 kn->kn_fop = fops; 1143 1144 /* 1145 * apply reference count to knote structure, and 1146 * do not release it at the end of this routine. 1147 */ 1148 fp = NULL; 1149 1150 kn->kn_sfflags = kev->fflags; 1151 kn->kn_sdata = kev->data; 1152 kev->fflags = 0; 1153 kev->data = 0; 1154 kn->kn_kevent = *kev; 1155 kn->kn_pollid = pollid; 1156 1157 knote_attach(kn); 1158 mtx_leave(&kq->kq_lock); 1159 1160 error = filter_attach(kn); 1161 if (error != 0) { 1162 knote_drop(kn, p); 1163 goto done; 1164 } 1165 1166 /* 1167 * If this is a file descriptor filter, check if 1168 * fd was closed while the knote was being added. 1169 * knote_fdclose() has missed kn if the function 1170 * ran before kn appeared in kq_knlist. 1171 */ 1172 if ((fops->f_flags & FILTEROP_ISFD) && 1173 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1174 /* 1175 * Drop the knote silently without error 1176 * because another thread might already have 1177 * seen it. This corresponds to the insert 1178 * happening in full before the close. 1179 */ 1180 filter_detach(kn); 1181 knote_drop(kn, p); 1182 goto done; 1183 } 1184 1185 /* Check if there is a pending event. */ 1186 active = filter_process(kn, NULL); 1187 mtx_enter(&kq->kq_lock); 1188 if (active) 1189 knote_activate(kn); 1190 } else if (kn->kn_fop == &badfd_filtops) { 1191 /* 1192 * Nothing expects this badfd knote any longer. 1193 * Drop it to make room for the new knote and retry. 1194 */ 1195 KASSERT(kq == p->p_kq); 1196 mtx_leave(&kq->kq_lock); 1197 filter_detach(kn); 1198 knote_drop(kn, p); 1199 1200 KASSERT(fp != NULL); 1201 FRELE(fp, p); 1202 fp = NULL; 1203 1204 goto again; 1205 } else { 1206 /* 1207 * The user may change some filter values after the 1208 * initial EV_ADD, but doing so will not reset any 1209 * filters which have already been triggered. 1210 */ 1211 mtx_leave(&kq->kq_lock); 1212 active = filter_modify(kev, kn); 1213 mtx_enter(&kq->kq_lock); 1214 if (active) 1215 knote_activate(kn); 1216 if (kev->flags & EV_ERROR) { 1217 error = kev->data; 1218 goto release; 1219 } 1220 } 1221 } else if (kev->flags & EV_DELETE) { 1222 mtx_leave(&kq->kq_lock); 1223 filter_detach(kn); 1224 knote_drop(kn, p); 1225 goto done; 1226 } 1227 1228 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1229 kn->kn_status |= KN_DISABLED; 1230 1231 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1232 kn->kn_status &= ~KN_DISABLED; 1233 mtx_leave(&kq->kq_lock); 1234 /* Check if there is a pending event. */ 1235 active = filter_process(kn, NULL); 1236 mtx_enter(&kq->kq_lock); 1237 if (active) 1238 knote_activate(kn); 1239 } 1240 1241 release: 1242 knote_release(kn); 1243 mtx_leave(&kq->kq_lock); 1244 done: 1245 if (fp != NULL) 1246 FRELE(fp, p); 1247 if (newkn != NULL) 1248 pool_put(&knote_pool, newkn); 1249 return (error); 1250 } 1251 1252 int 1253 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1254 { 1255 struct timespec elapsed, start, stop; 1256 uint64_t nsecs; 1257 int error; 1258 1259 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1260 1261 if (tsp != NULL) { 1262 getnanouptime(&start); 1263 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1264 } else 1265 nsecs = INFSLP; 1266 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1267 "kqread", nsecs); 1268 if (tsp != NULL) { 1269 getnanouptime(&stop); 1270 timespecsub(&stop, &start, &elapsed); 1271 timespecsub(tsp, &elapsed, tsp); 1272 if (tsp->tv_sec < 0) 1273 timespecclear(tsp); 1274 } 1275 1276 return (error); 1277 } 1278 1279 /* 1280 * Scan the kqueue, blocking if necessary until the target time is reached. 1281 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1282 * 0 we do not block at all. 1283 */ 1284 int 1285 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1286 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1287 { 1288 struct kqueue *kq = scan->kqs_kq; 1289 struct knote *kn; 1290 int error = 0, nkev = 0; 1291 1292 if (maxevents == 0) 1293 goto done; 1294 retry: 1295 KASSERT(nkev == 0); 1296 1297 error = 0; 1298 1299 /* msleep() with PCATCH requires kernel lock. */ 1300 KERNEL_LOCK(); 1301 1302 mtx_enter(&kq->kq_lock); 1303 1304 if (kq->kq_state & KQ_DYING) { 1305 mtx_leave(&kq->kq_lock); 1306 KERNEL_UNLOCK(); 1307 error = EBADF; 1308 goto done; 1309 } 1310 1311 if (kq->kq_count == 0) { 1312 /* 1313 * Successive loops are only necessary if there are more 1314 * ready events to gather, so they don't need to block. 1315 */ 1316 if ((tsp != NULL && !timespecisset(tsp)) || 1317 scan->kqs_nevent != 0) { 1318 mtx_leave(&kq->kq_lock); 1319 KERNEL_UNLOCK(); 1320 error = 0; 1321 goto done; 1322 } 1323 kq->kq_state |= KQ_SLEEP; 1324 error = kqueue_sleep(kq, tsp); 1325 /* kqueue_sleep() has released kq_lock. */ 1326 KERNEL_UNLOCK(); 1327 if (error == 0 || error == EWOULDBLOCK) 1328 goto retry; 1329 /* don't restart after signals... */ 1330 if (error == ERESTART) 1331 error = EINTR; 1332 goto done; 1333 } 1334 1335 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1336 KERNEL_UNLOCK(); 1337 1338 /* 1339 * Put the end marker in the queue to limit the scan to the events 1340 * that are currently active. This prevents events from being 1341 * recollected if they reactivate during scan. 1342 * 1343 * If a partial scan has been performed already but no events have 1344 * been collected, reposition the end marker to make any new events 1345 * reachable. 1346 */ 1347 if (!scan->kqs_queued) { 1348 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1349 scan->kqs_queued = 1; 1350 } else if (scan->kqs_nevent == 0) { 1351 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1352 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1353 } 1354 1355 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1356 while (nkev < maxevents) { 1357 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1358 if (kn->kn_filter == EVFILT_MARKER) { 1359 if (kn == &scan->kqs_end) 1360 break; 1361 1362 /* Move start marker past another thread's marker. */ 1363 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1364 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1365 kn_tqe); 1366 continue; 1367 } 1368 1369 if (!knote_acquire(kn, NULL, 0)) { 1370 /* knote_acquire() has released kq_lock. */ 1371 mtx_enter(&kq->kq_lock); 1372 continue; 1373 } 1374 1375 kqueue_check(kq); 1376 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1377 kn->kn_status &= ~KN_QUEUED; 1378 kq->kq_count--; 1379 kqueue_check(kq); 1380 1381 if (kn->kn_status & KN_DISABLED) { 1382 knote_release(kn); 1383 continue; 1384 } 1385 1386 mtx_leave(&kq->kq_lock); 1387 1388 /* Drop expired kqpoll knotes. */ 1389 if (p->p_kq == kq && 1390 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1391 filter_detach(kn); 1392 knote_drop(kn, p); 1393 mtx_enter(&kq->kq_lock); 1394 continue; 1395 } 1396 1397 /* 1398 * Invalidate knotes whose vnodes have been revoked. 1399 * This is a workaround; it is tricky to clear existing 1400 * knotes and prevent new ones from being registered 1401 * with the current revocation mechanism. 1402 */ 1403 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1404 kn->kn_fp != NULL && 1405 kn->kn_fp->f_type == DTYPE_VNODE) { 1406 struct vnode *vp = kn->kn_fp->f_data; 1407 1408 if (__predict_false(vp->v_op == &dead_vops && 1409 kn->kn_fop != &dead_filtops)) { 1410 filter_detach(kn); 1411 kn->kn_fop = &dead_filtops; 1412 1413 /* 1414 * Check if the event should be delivered. 1415 * Use f_event directly because this is 1416 * a special situation. 1417 */ 1418 if (kn->kn_fop->f_event(kn, 0) == 0) { 1419 filter_detach(kn); 1420 knote_drop(kn, p); 1421 mtx_enter(&kq->kq_lock); 1422 continue; 1423 } 1424 } 1425 } 1426 1427 memset(kevp, 0, sizeof(*kevp)); 1428 if (filter_process(kn, kevp) == 0) { 1429 mtx_enter(&kq->kq_lock); 1430 if ((kn->kn_status & KN_QUEUED) == 0) 1431 kn->kn_status &= ~KN_ACTIVE; 1432 knote_release(kn); 1433 kqueue_check(kq); 1434 continue; 1435 } 1436 1437 /* 1438 * Post-event action on the note 1439 */ 1440 if (kevp->flags & EV_ONESHOT) { 1441 filter_detach(kn); 1442 knote_drop(kn, p); 1443 mtx_enter(&kq->kq_lock); 1444 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1445 mtx_enter(&kq->kq_lock); 1446 if (kevp->flags & EV_DISPATCH) 1447 kn->kn_status |= KN_DISABLED; 1448 if ((kn->kn_status & KN_QUEUED) == 0) 1449 kn->kn_status &= ~KN_ACTIVE; 1450 knote_release(kn); 1451 } else { 1452 mtx_enter(&kq->kq_lock); 1453 if ((kn->kn_status & KN_QUEUED) == 0) { 1454 kqueue_check(kq); 1455 kq->kq_count++; 1456 kn->kn_status |= KN_QUEUED; 1457 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1458 } 1459 knote_release(kn); 1460 } 1461 kqueue_check(kq); 1462 1463 kevp++; 1464 nkev++; 1465 scan->kqs_nevent++; 1466 } 1467 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1468 mtx_leave(&kq->kq_lock); 1469 if (scan->kqs_nevent == 0) 1470 goto retry; 1471 done: 1472 *errorp = error; 1473 return (nkev); 1474 } 1475 1476 void 1477 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1478 { 1479 memset(scan, 0, sizeof(*scan)); 1480 1481 KQREF(kq); 1482 scan->kqs_kq = kq; 1483 scan->kqs_start.kn_filter = EVFILT_MARKER; 1484 scan->kqs_start.kn_status = KN_PROCESSING; 1485 scan->kqs_end.kn_filter = EVFILT_MARKER; 1486 scan->kqs_end.kn_status = KN_PROCESSING; 1487 } 1488 1489 void 1490 kqueue_scan_finish(struct kqueue_scan_state *scan) 1491 { 1492 struct kqueue *kq = scan->kqs_kq; 1493 1494 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1495 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1496 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1497 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1498 1499 if (scan->kqs_queued) { 1500 scan->kqs_queued = 0; 1501 mtx_enter(&kq->kq_lock); 1502 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1503 mtx_leave(&kq->kq_lock); 1504 } 1505 KQRELE(kq); 1506 } 1507 1508 /* 1509 * XXX 1510 * This could be expanded to call kqueue_scan, if desired. 1511 */ 1512 int 1513 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1514 { 1515 return (ENXIO); 1516 } 1517 1518 int 1519 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1520 { 1521 return (ENXIO); 1522 } 1523 1524 int 1525 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1526 { 1527 return (ENOTTY); 1528 } 1529 1530 int 1531 kqueue_poll(struct file *fp, int events, struct proc *p) 1532 { 1533 struct kqueue *kq = (struct kqueue *)fp->f_data; 1534 int revents = 0; 1535 1536 if (events & (POLLIN | POLLRDNORM)) { 1537 mtx_enter(&kq->kq_lock); 1538 if (kq->kq_count) { 1539 revents |= events & (POLLIN | POLLRDNORM); 1540 } else { 1541 selrecord(p, &kq->kq_sel); 1542 kq->kq_state |= KQ_SEL; 1543 } 1544 mtx_leave(&kq->kq_lock); 1545 } 1546 return (revents); 1547 } 1548 1549 int 1550 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1551 { 1552 struct kqueue *kq = fp->f_data; 1553 1554 memset(st, 0, sizeof(*st)); 1555 st->st_size = kq->kq_count; /* unlocked read */ 1556 st->st_blksize = sizeof(struct kevent); 1557 st->st_mode = S_IFIFO; 1558 return (0); 1559 } 1560 1561 void 1562 kqueue_purge(struct proc *p, struct kqueue *kq) 1563 { 1564 int i; 1565 1566 mtx_enter(&kq->kq_lock); 1567 for (i = 0; i < kq->kq_knlistsize; i++) 1568 knote_remove(p, kq, &kq->kq_knlist[i], 1); 1569 if (kq->kq_knhashmask != 0) { 1570 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1571 knote_remove(p, kq, &kq->kq_knhash[i], 1); 1572 } 1573 mtx_leave(&kq->kq_lock); 1574 } 1575 1576 void 1577 kqueue_terminate(struct proc *p, struct kqueue *kq) 1578 { 1579 struct knote *kn; 1580 1581 mtx_enter(&kq->kq_lock); 1582 1583 /* 1584 * Any remaining entries should be scan markers. 1585 * They are removed when the ongoing scans finish. 1586 */ 1587 KASSERT(kq->kq_count == 0); 1588 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1589 KASSERT(kn->kn_filter == EVFILT_MARKER); 1590 1591 kq->kq_state |= KQ_DYING; 1592 kqueue_wakeup(kq); 1593 mtx_leave(&kq->kq_lock); 1594 1595 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1596 task_del(systq, &kq->kq_task); 1597 1598 } 1599 1600 int 1601 kqueue_close(struct file *fp, struct proc *p) 1602 { 1603 struct kqueue *kq = fp->f_data; 1604 1605 fp->f_data = NULL; 1606 1607 kqueue_purge(p, kq); 1608 kqueue_terminate(p, kq); 1609 1610 KQRELE(kq); 1611 1612 return (0); 1613 } 1614 1615 static void 1616 kqueue_task(void *arg) 1617 { 1618 struct kqueue *kq = arg; 1619 1620 /* Kernel lock is needed inside selwakeup(). */ 1621 KERNEL_ASSERT_LOCKED(); 1622 1623 mtx_enter(&kqueue_klist_lock); 1624 mtx_enter(&kq->kq_lock); 1625 if (kq->kq_state & KQ_SEL) { 1626 kq->kq_state &= ~KQ_SEL; 1627 mtx_leave(&kq->kq_lock); 1628 selwakeup(&kq->kq_sel); 1629 } else { 1630 mtx_leave(&kq->kq_lock); 1631 KNOTE(&kq->kq_sel.si_note, 0); 1632 } 1633 mtx_leave(&kqueue_klist_lock); 1634 KQRELE(kq); 1635 } 1636 1637 void 1638 kqueue_wakeup(struct kqueue *kq) 1639 { 1640 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1641 1642 if (kq->kq_state & KQ_SLEEP) { 1643 kq->kq_state &= ~KQ_SLEEP; 1644 wakeup(kq); 1645 } 1646 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1647 /* Defer activation to avoid recursion. */ 1648 KQREF(kq); 1649 if (!task_add(systq, &kq->kq_task)) 1650 KQRELE(kq); 1651 } 1652 } 1653 1654 static void 1655 kqueue_expand_hash(struct kqueue *kq) 1656 { 1657 struct knlist *hash; 1658 u_long hashmask; 1659 1660 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1661 1662 if (kq->kq_knhashmask == 0) { 1663 mtx_leave(&kq->kq_lock); 1664 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1665 mtx_enter(&kq->kq_lock); 1666 if (kq->kq_knhashmask == 0) { 1667 kq->kq_knhash = hash; 1668 kq->kq_knhashmask = hashmask; 1669 } else { 1670 /* Another thread has allocated the hash. */ 1671 mtx_leave(&kq->kq_lock); 1672 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1673 mtx_enter(&kq->kq_lock); 1674 } 1675 } 1676 } 1677 1678 static void 1679 kqueue_expand_list(struct kqueue *kq, int fd) 1680 { 1681 struct knlist *list, *olist; 1682 int size, osize; 1683 1684 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1685 1686 if (kq->kq_knlistsize <= fd) { 1687 size = kq->kq_knlistsize; 1688 mtx_leave(&kq->kq_lock); 1689 while (size <= fd) 1690 size += KQEXTENT; 1691 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1692 mtx_enter(&kq->kq_lock); 1693 if (kq->kq_knlistsize <= fd) { 1694 memcpy(list, kq->kq_knlist, 1695 kq->kq_knlistsize * sizeof(*list)); 1696 memset(&list[kq->kq_knlistsize], 0, 1697 (size - kq->kq_knlistsize) * sizeof(*list)); 1698 olist = kq->kq_knlist; 1699 osize = kq->kq_knlistsize; 1700 kq->kq_knlist = list; 1701 kq->kq_knlistsize = size; 1702 mtx_leave(&kq->kq_lock); 1703 free(olist, M_KEVENT, osize * sizeof(*list)); 1704 mtx_enter(&kq->kq_lock); 1705 } else { 1706 /* Another thread has expanded the list. */ 1707 mtx_leave(&kq->kq_lock); 1708 free(list, M_KEVENT, size * sizeof(*list)); 1709 mtx_enter(&kq->kq_lock); 1710 } 1711 } 1712 } 1713 1714 /* 1715 * Acquire a knote, return non-zero on success, 0 on failure. 1716 * 1717 * If we cannot acquire the knote we sleep and return 0. The knote 1718 * may be stale on return in this case and the caller must restart 1719 * whatever loop they are in. 1720 * 1721 * If we are about to sleep and klist is non-NULL, the list is unlocked 1722 * before sleep and remains unlocked on return. 1723 */ 1724 int 1725 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1726 { 1727 struct kqueue *kq = kn->kn_kq; 1728 1729 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1730 KASSERT(kn->kn_filter != EVFILT_MARKER); 1731 1732 if (kn->kn_status & KN_PROCESSING) { 1733 kn->kn_status |= KN_WAITING; 1734 if (klist != NULL) { 1735 mtx_leave(&kq->kq_lock); 1736 klist_unlock(klist, ls); 1737 /* XXX Timeout resolves potential loss of wakeup. */ 1738 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1739 } else { 1740 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1741 SEC_TO_NSEC(1)); 1742 } 1743 /* knote may be stale now */ 1744 return (0); 1745 } 1746 kn->kn_status |= KN_PROCESSING; 1747 return (1); 1748 } 1749 1750 /* 1751 * Release an acquired knote, clearing KN_PROCESSING. 1752 */ 1753 void 1754 knote_release(struct knote *kn) 1755 { 1756 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1757 KASSERT(kn->kn_filter != EVFILT_MARKER); 1758 KASSERT(kn->kn_status & KN_PROCESSING); 1759 1760 if (kn->kn_status & KN_WAITING) { 1761 kn->kn_status &= ~KN_WAITING; 1762 wakeup(kn); 1763 } 1764 kn->kn_status &= ~KN_PROCESSING; 1765 /* kn should not be accessed anymore */ 1766 } 1767 1768 /* 1769 * activate one knote. 1770 */ 1771 void 1772 knote_activate(struct knote *kn) 1773 { 1774 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1775 1776 kn->kn_status |= KN_ACTIVE; 1777 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1778 knote_enqueue(kn); 1779 } 1780 1781 /* 1782 * walk down a list of knotes, activating them if their event has triggered. 1783 */ 1784 void 1785 knote(struct klist *list, long hint) 1786 { 1787 struct knote *kn, *kn0; 1788 struct kqueue *kq; 1789 1790 KLIST_ASSERT_LOCKED(list); 1791 1792 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1793 if (filter_event(kn, hint)) { 1794 kq = kn->kn_kq; 1795 mtx_enter(&kq->kq_lock); 1796 knote_activate(kn); 1797 mtx_leave(&kq->kq_lock); 1798 } 1799 } 1800 } 1801 1802 /* 1803 * remove all knotes from a specified knlist 1804 */ 1805 void 1806 knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) 1807 { 1808 struct knote *kn; 1809 1810 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1811 1812 while ((kn = SLIST_FIRST(list)) != NULL) { 1813 KASSERT(kn->kn_kq == kq); 1814 1815 if (!purge) { 1816 /* Skip pending badfd knotes. */ 1817 while (kn->kn_fop == &badfd_filtops) { 1818 kn = SLIST_NEXT(kn, kn_link); 1819 if (kn == NULL) 1820 return; 1821 KASSERT(kn->kn_kq == kq); 1822 } 1823 } 1824 1825 if (!knote_acquire(kn, NULL, 0)) { 1826 /* knote_acquire() has released kq_lock. */ 1827 mtx_enter(&kq->kq_lock); 1828 continue; 1829 } 1830 mtx_leave(&kq->kq_lock); 1831 filter_detach(kn); 1832 1833 /* 1834 * Notify poll(2) and select(2) when a monitored 1835 * file descriptor is closed. 1836 * 1837 * This reuses the original knote for delivering the 1838 * notification so as to avoid allocating memory. 1839 */ 1840 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1841 !(p->p_kq == kq && 1842 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1843 kn->kn_fop != &badfd_filtops) { 1844 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1845 FRELE(kn->kn_fp, p); 1846 kn->kn_fp = NULL; 1847 1848 kn->kn_fop = &badfd_filtops; 1849 filter_event(kn, 0); 1850 mtx_enter(&kq->kq_lock); 1851 knote_activate(kn); 1852 knote_release(kn); 1853 continue; 1854 } 1855 1856 knote_drop(kn, p); 1857 mtx_enter(&kq->kq_lock); 1858 } 1859 } 1860 1861 /* 1862 * remove all knotes referencing a specified fd 1863 */ 1864 void 1865 knote_fdclose(struct proc *p, int fd) 1866 { 1867 struct filedesc *fdp = p->p_p->ps_fd; 1868 struct kqueue *kq; 1869 1870 /* 1871 * fdplock can be ignored if the file descriptor table is being freed 1872 * because no other thread can access the fdp. 1873 */ 1874 if (fdp->fd_refcnt != 0) 1875 fdpassertlocked(fdp); 1876 1877 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1878 mtx_enter(&kq->kq_lock); 1879 if (fd < kq->kq_knlistsize) 1880 knote_remove(p, kq, &kq->kq_knlist[fd], 0); 1881 mtx_leave(&kq->kq_lock); 1882 } 1883 } 1884 1885 /* 1886 * handle a process exiting, including the triggering of NOTE_EXIT notes 1887 * XXX this could be more efficient, doing a single pass down the klist 1888 */ 1889 void 1890 knote_processexit(struct proc *p) 1891 { 1892 struct process *pr = p->p_p; 1893 1894 KERNEL_ASSERT_LOCKED(); 1895 KASSERT(p == curproc); 1896 1897 KNOTE(&pr->ps_klist, NOTE_EXIT); 1898 1899 /* remove other knotes hanging off the process */ 1900 klist_invalidate(&pr->ps_klist); 1901 } 1902 1903 void 1904 knote_attach(struct knote *kn) 1905 { 1906 struct kqueue *kq = kn->kn_kq; 1907 struct knlist *list; 1908 1909 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1910 KASSERT(kn->kn_status & KN_PROCESSING); 1911 1912 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1913 KASSERT(kq->kq_knlistsize > kn->kn_id); 1914 list = &kq->kq_knlist[kn->kn_id]; 1915 } else { 1916 KASSERT(kq->kq_knhashmask != 0); 1917 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1918 } 1919 SLIST_INSERT_HEAD(list, kn, kn_link); 1920 kq->kq_nknotes++; 1921 } 1922 1923 void 1924 knote_detach(struct knote *kn) 1925 { 1926 struct kqueue *kq = kn->kn_kq; 1927 struct knlist *list; 1928 1929 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1930 KASSERT(kn->kn_status & KN_PROCESSING); 1931 1932 kq->kq_nknotes--; 1933 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1934 list = &kq->kq_knlist[kn->kn_id]; 1935 else 1936 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1937 SLIST_REMOVE(list, kn, knote, kn_link); 1938 } 1939 1940 /* 1941 * should be called at spl == 0, since we don't want to hold spl 1942 * while calling FRELE and pool_put. 1943 */ 1944 void 1945 knote_drop(struct knote *kn, struct proc *p) 1946 { 1947 struct kqueue *kq = kn->kn_kq; 1948 1949 KASSERT(kn->kn_filter != EVFILT_MARKER); 1950 1951 mtx_enter(&kq->kq_lock); 1952 knote_detach(kn); 1953 if (kn->kn_status & KN_QUEUED) 1954 knote_dequeue(kn); 1955 if (kn->kn_status & KN_WAITING) { 1956 kn->kn_status &= ~KN_WAITING; 1957 wakeup(kn); 1958 } 1959 mtx_leave(&kq->kq_lock); 1960 1961 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1962 FRELE(kn->kn_fp, p); 1963 pool_put(&knote_pool, kn); 1964 } 1965 1966 1967 void 1968 knote_enqueue(struct knote *kn) 1969 { 1970 struct kqueue *kq = kn->kn_kq; 1971 1972 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1973 KASSERT(kn->kn_filter != EVFILT_MARKER); 1974 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1975 1976 kqueue_check(kq); 1977 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1978 kn->kn_status |= KN_QUEUED; 1979 kq->kq_count++; 1980 kqueue_check(kq); 1981 kqueue_wakeup(kq); 1982 } 1983 1984 void 1985 knote_dequeue(struct knote *kn) 1986 { 1987 struct kqueue *kq = kn->kn_kq; 1988 1989 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1990 KASSERT(kn->kn_filter != EVFILT_MARKER); 1991 KASSERT(kn->kn_status & KN_QUEUED); 1992 1993 kqueue_check(kq); 1994 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1995 kn->kn_status &= ~KN_QUEUED; 1996 kq->kq_count--; 1997 kqueue_check(kq); 1998 } 1999 2000 /* 2001 * Assign parameters to the knote. 2002 * 2003 * The knote's object lock must be held. 2004 */ 2005 void 2006 knote_assign(const struct kevent *kev, struct knote *kn) 2007 { 2008 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2009 KERNEL_ASSERT_LOCKED(); 2010 2011 kn->kn_sfflags = kev->fflags; 2012 kn->kn_sdata = kev->data; 2013 kn->kn_udata = kev->udata; 2014 } 2015 2016 /* 2017 * Submit the knote's event for delivery. 2018 * 2019 * The knote's object lock must be held. 2020 */ 2021 void 2022 knote_submit(struct knote *kn, struct kevent *kev) 2023 { 2024 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2025 KERNEL_ASSERT_LOCKED(); 2026 2027 if (kev != NULL) { 2028 *kev = kn->kn_kevent; 2029 if (kn->kn_flags & EV_CLEAR) { 2030 kn->kn_fflags = 0; 2031 kn->kn_data = 0; 2032 } 2033 } 2034 } 2035 2036 void 2037 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2038 { 2039 SLIST_INIT(&klist->kl_list); 2040 klist->kl_ops = ops; 2041 klist->kl_arg = arg; 2042 } 2043 2044 void 2045 klist_free(struct klist *klist) 2046 { 2047 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2048 } 2049 2050 void 2051 klist_insert(struct klist *klist, struct knote *kn) 2052 { 2053 int ls; 2054 2055 ls = klist_lock(klist); 2056 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2057 klist_unlock(klist, ls); 2058 } 2059 2060 void 2061 klist_insert_locked(struct klist *klist, struct knote *kn) 2062 { 2063 KLIST_ASSERT_LOCKED(klist); 2064 2065 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2066 } 2067 2068 void 2069 klist_remove(struct klist *klist, struct knote *kn) 2070 { 2071 int ls; 2072 2073 ls = klist_lock(klist); 2074 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2075 klist_unlock(klist, ls); 2076 } 2077 2078 void 2079 klist_remove_locked(struct klist *klist, struct knote *kn) 2080 { 2081 KLIST_ASSERT_LOCKED(klist); 2082 2083 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2084 } 2085 2086 /* 2087 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2088 * 2089 * The caller of this function must not hold any locks that can block 2090 * filterops callbacks that run with KN_PROCESSING. 2091 * Otherwise this function might deadlock. 2092 */ 2093 void 2094 klist_invalidate(struct klist *list) 2095 { 2096 struct knote *kn; 2097 struct kqueue *kq; 2098 struct proc *p = curproc; 2099 int ls; 2100 2101 NET_ASSERT_UNLOCKED(); 2102 2103 ls = klist_lock(list); 2104 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2105 kq = kn->kn_kq; 2106 mtx_enter(&kq->kq_lock); 2107 if (!knote_acquire(kn, list, ls)) { 2108 /* knote_acquire() has released kq_lock 2109 * and klist lock. */ 2110 ls = klist_lock(list); 2111 continue; 2112 } 2113 mtx_leave(&kq->kq_lock); 2114 klist_unlock(list, ls); 2115 filter_detach(kn); 2116 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2117 kn->kn_fop = &dead_filtops; 2118 filter_event(kn, 0); 2119 mtx_enter(&kq->kq_lock); 2120 knote_activate(kn); 2121 knote_release(kn); 2122 mtx_leave(&kq->kq_lock); 2123 } else { 2124 knote_drop(kn, p); 2125 } 2126 ls = klist_lock(list); 2127 } 2128 klist_unlock(list, ls); 2129 } 2130 2131 static int 2132 klist_lock(struct klist *list) 2133 { 2134 int ls = 0; 2135 2136 if (list->kl_ops != NULL) { 2137 ls = list->kl_ops->klo_lock(list->kl_arg); 2138 } else { 2139 KERNEL_LOCK(); 2140 ls = splhigh(); 2141 } 2142 return ls; 2143 } 2144 2145 static void 2146 klist_unlock(struct klist *list, int ls) 2147 { 2148 if (list->kl_ops != NULL) { 2149 list->kl_ops->klo_unlock(list->kl_arg, ls); 2150 } else { 2151 splx(ls); 2152 KERNEL_UNLOCK(); 2153 } 2154 } 2155 2156 static void 2157 klist_mutex_assertlk(void *arg) 2158 { 2159 struct mutex *mtx = arg; 2160 2161 (void)mtx; 2162 2163 MUTEX_ASSERT_LOCKED(mtx); 2164 } 2165 2166 static int 2167 klist_mutex_lock(void *arg) 2168 { 2169 struct mutex *mtx = arg; 2170 2171 mtx_enter(mtx); 2172 return 0; 2173 } 2174 2175 static void 2176 klist_mutex_unlock(void *arg, int s) 2177 { 2178 struct mutex *mtx = arg; 2179 2180 mtx_leave(mtx); 2181 } 2182 2183 static const struct klistops mutex_klistops = { 2184 .klo_assertlk = klist_mutex_assertlk, 2185 .klo_lock = klist_mutex_lock, 2186 .klo_unlock = klist_mutex_unlock, 2187 }; 2188 2189 void 2190 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2191 { 2192 klist_init(klist, &mutex_klistops, mtx); 2193 } 2194 2195 static void 2196 klist_rwlock_assertlk(void *arg) 2197 { 2198 struct rwlock *rwl = arg; 2199 2200 (void)rwl; 2201 2202 rw_assert_wrlock(rwl); 2203 } 2204 2205 static int 2206 klist_rwlock_lock(void *arg) 2207 { 2208 struct rwlock *rwl = arg; 2209 2210 rw_enter_write(rwl); 2211 return 0; 2212 } 2213 2214 static void 2215 klist_rwlock_unlock(void *arg, int s) 2216 { 2217 struct rwlock *rwl = arg; 2218 2219 rw_exit_write(rwl); 2220 } 2221 2222 static const struct klistops rwlock_klistops = { 2223 .klo_assertlk = klist_rwlock_assertlk, 2224 .klo_lock = klist_rwlock_lock, 2225 .klo_unlock = klist_rwlock_unlock, 2226 }; 2227 2228 void 2229 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2230 { 2231 klist_init(klist, &rwlock_klistops, rwl); 2232 } 2233