1 /* $OpenBSD: kern_event.c,v 1.181 2022/02/13 12:58:46 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/vnode.h> 59 #include <sys/wait.h> 60 61 #ifdef DIAGNOSTIC 62 #define KLIST_ASSERT_LOCKED(kl) do { \ 63 if ((kl)->kl_ops != NULL) \ 64 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 65 else \ 66 KERNEL_ASSERT_LOCKED(); \ 67 } while (0) 68 #else 69 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 70 #endif 71 72 struct kqueue *kqueue_alloc(struct filedesc *); 73 void kqueue_terminate(struct proc *p, struct kqueue *); 74 void KQREF(struct kqueue *); 75 void KQRELE(struct kqueue *); 76 77 void kqueue_purge(struct proc *, struct kqueue *); 78 int kqueue_sleep(struct kqueue *, struct timespec *); 79 80 int kqueue_read(struct file *, struct uio *, int); 81 int kqueue_write(struct file *, struct uio *, int); 82 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 83 struct proc *p); 84 int kqueue_poll(struct file *fp, int events, struct proc *p); 85 int kqueue_kqfilter(struct file *fp, struct knote *kn); 86 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 87 int kqueue_close(struct file *fp, struct proc *p); 88 void kqueue_wakeup(struct kqueue *kq); 89 90 #ifdef KQUEUE_DEBUG 91 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 92 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 93 #else 94 #define kqueue_check(kq) do {} while (0) 95 #endif 96 97 static int filter_attach(struct knote *kn); 98 static void filter_detach(struct knote *kn); 99 static int filter_event(struct knote *kn, long hint); 100 static int filter_modify(struct kevent *kev, struct knote *kn); 101 static int filter_process(struct knote *kn, struct kevent *kev); 102 static void kqueue_expand_hash(struct kqueue *kq); 103 static void kqueue_expand_list(struct kqueue *kq, int fd); 104 static void kqueue_task(void *); 105 static int klist_lock(struct klist *); 106 static void klist_unlock(struct klist *, int); 107 108 const struct fileops kqueueops = { 109 .fo_read = kqueue_read, 110 .fo_write = kqueue_write, 111 .fo_ioctl = kqueue_ioctl, 112 .fo_poll = kqueue_poll, 113 .fo_kqfilter = kqueue_kqfilter, 114 .fo_stat = kqueue_stat, 115 .fo_close = kqueue_close 116 }; 117 118 void knote_attach(struct knote *kn); 119 void knote_detach(struct knote *kn); 120 void knote_drop(struct knote *kn, struct proc *p); 121 void knote_enqueue(struct knote *kn); 122 void knote_dequeue(struct knote *kn); 123 int knote_acquire(struct knote *kn, struct klist *, int); 124 void knote_release(struct knote *kn); 125 void knote_activate(struct knote *kn); 126 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, 127 int purge); 128 129 void filt_kqdetach(struct knote *kn); 130 int filt_kqueue(struct knote *kn, long hint); 131 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 132 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 133 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 134 int filt_procattach(struct knote *kn); 135 void filt_procdetach(struct knote *kn); 136 int filt_proc(struct knote *kn, long hint); 137 int filt_fileattach(struct knote *kn); 138 void filt_timerexpire(void *knx); 139 int filt_timerattach(struct knote *kn); 140 void filt_timerdetach(struct knote *kn); 141 int filt_timermodify(struct kevent *kev, struct knote *kn); 142 int filt_timerprocess(struct knote *kn, struct kevent *kev); 143 void filt_seltruedetach(struct knote *kn); 144 145 const struct filterops kqread_filtops = { 146 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 147 .f_attach = NULL, 148 .f_detach = filt_kqdetach, 149 .f_event = filt_kqueue, 150 .f_modify = filt_kqueuemodify, 151 .f_process = filt_kqueueprocess, 152 }; 153 154 const struct filterops proc_filtops = { 155 .f_flags = 0, 156 .f_attach = filt_procattach, 157 .f_detach = filt_procdetach, 158 .f_event = filt_proc, 159 }; 160 161 const struct filterops file_filtops = { 162 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 163 .f_attach = filt_fileattach, 164 .f_detach = NULL, 165 .f_event = NULL, 166 }; 167 168 const struct filterops timer_filtops = { 169 .f_flags = 0, 170 .f_attach = filt_timerattach, 171 .f_detach = filt_timerdetach, 172 .f_event = NULL, 173 .f_modify = filt_timermodify, 174 .f_process = filt_timerprocess, 175 }; 176 177 struct pool knote_pool; 178 struct pool kqueue_pool; 179 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 180 int kq_ntimeouts = 0; 181 int kq_timeoutmax = (4 * 1024); 182 183 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 184 185 /* 186 * Table for for all system-defined filters. 187 */ 188 const struct filterops *const sysfilt_ops[] = { 189 &file_filtops, /* EVFILT_READ */ 190 &file_filtops, /* EVFILT_WRITE */ 191 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 192 &file_filtops, /* EVFILT_VNODE */ 193 &proc_filtops, /* EVFILT_PROC */ 194 &sig_filtops, /* EVFILT_SIGNAL */ 195 &timer_filtops, /* EVFILT_TIMER */ 196 &file_filtops, /* EVFILT_DEVICE */ 197 &file_filtops, /* EVFILT_EXCEPT */ 198 }; 199 200 void 201 KQREF(struct kqueue *kq) 202 { 203 atomic_inc_int(&kq->kq_refs); 204 } 205 206 void 207 KQRELE(struct kqueue *kq) 208 { 209 struct filedesc *fdp; 210 211 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 212 return; 213 214 fdp = kq->kq_fdp; 215 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 216 LIST_REMOVE(kq, kq_next); 217 } else { 218 fdplock(fdp); 219 LIST_REMOVE(kq, kq_next); 220 fdpunlock(fdp); 221 } 222 223 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 224 KASSERT(kq->kq_nknotes == 0); 225 226 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 227 sizeof(struct knlist)); 228 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 229 klist_free(&kq->kq_sel.si_note); 230 pool_put(&kqueue_pool, kq); 231 } 232 233 void 234 kqueue_init(void) 235 { 236 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 237 PR_WAITOK, "kqueuepl", NULL); 238 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 239 PR_WAITOK, "knotepl", NULL); 240 } 241 242 void 243 kqueue_init_percpu(void) 244 { 245 pool_cache_init(&knote_pool); 246 } 247 248 int 249 filt_fileattach(struct knote *kn) 250 { 251 struct file *fp = kn->kn_fp; 252 253 return fp->f_ops->fo_kqfilter(fp, kn); 254 } 255 256 int 257 kqueue_kqfilter(struct file *fp, struct knote *kn) 258 { 259 struct kqueue *kq = kn->kn_fp->f_data; 260 261 if (kn->kn_filter != EVFILT_READ) 262 return (EINVAL); 263 264 kn->kn_fop = &kqread_filtops; 265 klist_insert(&kq->kq_sel.si_note, kn); 266 return (0); 267 } 268 269 void 270 filt_kqdetach(struct knote *kn) 271 { 272 struct kqueue *kq = kn->kn_fp->f_data; 273 274 klist_remove(&kq->kq_sel.si_note, kn); 275 } 276 277 int 278 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 279 { 280 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 281 282 kn->kn_data = kq->kq_count; 283 284 return (kn->kn_data > 0); 285 } 286 287 int 288 filt_kqueue(struct knote *kn, long hint) 289 { 290 struct kqueue *kq = kn->kn_fp->f_data; 291 int active; 292 293 mtx_enter(&kq->kq_lock); 294 active = filt_kqueue_common(kn, kq); 295 mtx_leave(&kq->kq_lock); 296 297 return (active); 298 } 299 300 int 301 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 302 { 303 struct kqueue *kq = kn->kn_fp->f_data; 304 int active; 305 306 mtx_enter(&kq->kq_lock); 307 knote_assign(kev, kn); 308 active = filt_kqueue_common(kn, kq); 309 mtx_leave(&kq->kq_lock); 310 311 return (active); 312 } 313 314 int 315 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 316 { 317 struct kqueue *kq = kn->kn_fp->f_data; 318 int active; 319 320 mtx_enter(&kq->kq_lock); 321 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 322 active = 1; 323 else 324 active = filt_kqueue_common(kn, kq); 325 if (active) 326 knote_submit(kn, kev); 327 mtx_leave(&kq->kq_lock); 328 329 return (active); 330 } 331 332 int 333 filt_procattach(struct knote *kn) 334 { 335 struct process *pr; 336 int s; 337 338 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 339 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 340 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 341 342 if (kn->kn_id > PID_MAX) 343 return ESRCH; 344 345 pr = prfind(kn->kn_id); 346 if (pr == NULL) 347 return (ESRCH); 348 349 /* exiting processes can't be specified */ 350 if (pr->ps_flags & PS_EXITING) 351 return (ESRCH); 352 353 kn->kn_ptr.p_process = pr; 354 kn->kn_flags |= EV_CLEAR; /* automatically set */ 355 356 /* 357 * internal flag indicating registration done by kernel 358 */ 359 if (kn->kn_flags & EV_FLAG1) { 360 kn->kn_data = kn->kn_sdata; /* ppid */ 361 kn->kn_fflags = NOTE_CHILD; 362 kn->kn_flags &= ~EV_FLAG1; 363 } 364 365 s = splhigh(); 366 klist_insert_locked(&pr->ps_klist, kn); 367 splx(s); 368 369 return (0); 370 } 371 372 /* 373 * The knote may be attached to a different process, which may exit, 374 * leaving nothing for the knote to be attached to. So when the process 375 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 376 * it will be deleted when read out. However, as part of the knote deletion, 377 * this routine is called, so a check is needed to avoid actually performing 378 * a detach, because the original process does not exist any more. 379 */ 380 void 381 filt_procdetach(struct knote *kn) 382 { 383 struct kqueue *kq = kn->kn_kq; 384 struct process *pr = kn->kn_ptr.p_process; 385 int s, status; 386 387 mtx_enter(&kq->kq_lock); 388 status = kn->kn_status; 389 mtx_leave(&kq->kq_lock); 390 391 if (status & KN_DETACHED) 392 return; 393 394 s = splhigh(); 395 klist_remove_locked(&pr->ps_klist, kn); 396 splx(s); 397 } 398 399 int 400 filt_proc(struct knote *kn, long hint) 401 { 402 struct kqueue *kq = kn->kn_kq; 403 u_int event; 404 405 /* 406 * mask off extra data 407 */ 408 event = (u_int)hint & NOTE_PCTRLMASK; 409 410 /* 411 * if the user is interested in this event, record it. 412 */ 413 if (kn->kn_sfflags & event) 414 kn->kn_fflags |= event; 415 416 /* 417 * process is gone, so flag the event as finished and remove it 418 * from the process's klist 419 */ 420 if (event == NOTE_EXIT) { 421 struct process *pr = kn->kn_ptr.p_process; 422 int s; 423 424 mtx_enter(&kq->kq_lock); 425 kn->kn_status |= KN_DETACHED; 426 mtx_leave(&kq->kq_lock); 427 428 s = splhigh(); 429 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 430 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 431 klist_remove_locked(&pr->ps_klist, kn); 432 splx(s); 433 return (1); 434 } 435 436 /* 437 * process forked, and user wants to track the new process, 438 * so attach a new knote to it, and immediately report an 439 * event with the parent's pid. 440 */ 441 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 442 struct kevent kev; 443 int error; 444 445 /* 446 * register knote with new process. 447 */ 448 memset(&kev, 0, sizeof(kev)); 449 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 450 kev.filter = kn->kn_filter; 451 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 452 kev.fflags = kn->kn_sfflags; 453 kev.data = kn->kn_id; /* parent */ 454 kev.udata = kn->kn_udata; /* preserve udata */ 455 error = kqueue_register(kq, &kev, 0, NULL); 456 if (error) 457 kn->kn_fflags |= NOTE_TRACKERR; 458 } 459 460 return (kn->kn_fflags != 0); 461 } 462 463 static void 464 filt_timer_timeout_add(struct knote *kn) 465 { 466 struct timeval tv; 467 struct timeout *to = kn->kn_hook; 468 int tticks; 469 470 tv.tv_sec = kn->kn_sdata / 1000; 471 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 472 tticks = tvtohz(&tv); 473 /* Remove extra tick from tvtohz() if timeout has fired before. */ 474 if (timeout_triggered(to)) 475 tticks--; 476 timeout_add(to, (tticks > 0) ? tticks : 1); 477 } 478 479 void 480 filt_timerexpire(void *knx) 481 { 482 struct knote *kn = knx; 483 struct kqueue *kq = kn->kn_kq; 484 485 kn->kn_data++; 486 mtx_enter(&kq->kq_lock); 487 knote_activate(kn); 488 mtx_leave(&kq->kq_lock); 489 490 if ((kn->kn_flags & EV_ONESHOT) == 0) 491 filt_timer_timeout_add(kn); 492 } 493 494 495 /* 496 * data contains amount of time to sleep, in milliseconds 497 */ 498 int 499 filt_timerattach(struct knote *kn) 500 { 501 struct timeout *to; 502 503 if (kq_ntimeouts > kq_timeoutmax) 504 return (ENOMEM); 505 kq_ntimeouts++; 506 507 kn->kn_flags |= EV_CLEAR; /* automatically set */ 508 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 509 timeout_set(to, filt_timerexpire, kn); 510 kn->kn_hook = to; 511 filt_timer_timeout_add(kn); 512 513 return (0); 514 } 515 516 void 517 filt_timerdetach(struct knote *kn) 518 { 519 struct timeout *to; 520 521 to = (struct timeout *)kn->kn_hook; 522 timeout_del_barrier(to); 523 free(to, M_KEVENT, sizeof(*to)); 524 kq_ntimeouts--; 525 } 526 527 int 528 filt_timermodify(struct kevent *kev, struct knote *kn) 529 { 530 struct kqueue *kq = kn->kn_kq; 531 struct timeout *to = kn->kn_hook; 532 533 /* Reset the timer. Any pending events are discarded. */ 534 535 timeout_del_barrier(to); 536 537 mtx_enter(&kq->kq_lock); 538 if (kn->kn_status & KN_QUEUED) 539 knote_dequeue(kn); 540 kn->kn_status &= ~KN_ACTIVE; 541 mtx_leave(&kq->kq_lock); 542 543 kn->kn_data = 0; 544 knote_assign(kev, kn); 545 /* Reinit timeout to invoke tick adjustment again. */ 546 timeout_set(to, filt_timerexpire, kn); 547 filt_timer_timeout_add(kn); 548 549 return (0); 550 } 551 552 int 553 filt_timerprocess(struct knote *kn, struct kevent *kev) 554 { 555 int active, s; 556 557 s = splsoftclock(); 558 active = (kn->kn_data != 0); 559 if (active) 560 knote_submit(kn, kev); 561 splx(s); 562 563 return (active); 564 } 565 566 567 /* 568 * filt_seltrue: 569 * 570 * This filter "event" routine simulates seltrue(). 571 */ 572 int 573 filt_seltrue(struct knote *kn, long hint) 574 { 575 576 /* 577 * We don't know how much data can be read/written, 578 * but we know that it *can* be. This is about as 579 * good as select/poll does as well. 580 */ 581 kn->kn_data = 0; 582 return (1); 583 } 584 585 int 586 filt_seltruemodify(struct kevent *kev, struct knote *kn) 587 { 588 knote_assign(kev, kn); 589 return (kn->kn_fop->f_event(kn, 0)); 590 } 591 592 int 593 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 594 { 595 int active; 596 597 active = kn->kn_fop->f_event(kn, 0); 598 if (active) 599 knote_submit(kn, kev); 600 return (active); 601 } 602 603 /* 604 * This provides full kqfilter entry for device switch tables, which 605 * has same effect as filter using filt_seltrue() as filter method. 606 */ 607 void 608 filt_seltruedetach(struct knote *kn) 609 { 610 /* Nothing to do */ 611 } 612 613 const struct filterops seltrue_filtops = { 614 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 615 .f_attach = NULL, 616 .f_detach = filt_seltruedetach, 617 .f_event = filt_seltrue, 618 .f_modify = filt_seltruemodify, 619 .f_process = filt_seltrueprocess, 620 }; 621 622 int 623 seltrue_kqfilter(dev_t dev, struct knote *kn) 624 { 625 switch (kn->kn_filter) { 626 case EVFILT_READ: 627 case EVFILT_WRITE: 628 kn->kn_fop = &seltrue_filtops; 629 break; 630 default: 631 return (EINVAL); 632 } 633 634 /* Nothing more to do */ 635 return (0); 636 } 637 638 static int 639 filt_dead(struct knote *kn, long hint) 640 { 641 if (kn->kn_filter == EVFILT_EXCEPT) { 642 /* 643 * Do not deliver event because there is no out-of-band data. 644 * However, let HUP condition pass for poll(2). 645 */ 646 if ((kn->kn_flags & __EV_POLL) == 0) { 647 kn->kn_flags |= EV_DISABLE; 648 return (0); 649 } 650 } 651 652 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 653 if (kn->kn_flags & __EV_POLL) 654 kn->kn_flags |= __EV_HUP; 655 kn->kn_data = 0; 656 return (1); 657 } 658 659 static void 660 filt_deaddetach(struct knote *kn) 661 { 662 /* Nothing to do */ 663 } 664 665 const struct filterops dead_filtops = { 666 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 667 .f_attach = NULL, 668 .f_detach = filt_deaddetach, 669 .f_event = filt_dead, 670 .f_modify = filt_seltruemodify, 671 .f_process = filt_seltrueprocess, 672 }; 673 674 static int 675 filt_badfd(struct knote *kn, long hint) 676 { 677 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 678 kn->kn_data = EBADF; 679 return (1); 680 } 681 682 /* For use with kqpoll. */ 683 const struct filterops badfd_filtops = { 684 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 685 .f_attach = NULL, 686 .f_detach = filt_deaddetach, 687 .f_event = filt_badfd, 688 .f_modify = filt_seltruemodify, 689 .f_process = filt_seltrueprocess, 690 }; 691 692 static int 693 filter_attach(struct knote *kn) 694 { 695 int error; 696 697 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 698 error = kn->kn_fop->f_attach(kn); 699 } else { 700 KERNEL_LOCK(); 701 error = kn->kn_fop->f_attach(kn); 702 KERNEL_UNLOCK(); 703 } 704 return (error); 705 } 706 707 static void 708 filter_detach(struct knote *kn) 709 { 710 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 711 kn->kn_fop->f_detach(kn); 712 } else { 713 KERNEL_LOCK(); 714 kn->kn_fop->f_detach(kn); 715 KERNEL_UNLOCK(); 716 } 717 } 718 719 static int 720 filter_event(struct knote *kn, long hint) 721 { 722 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 723 KERNEL_ASSERT_LOCKED(); 724 725 return (kn->kn_fop->f_event(kn, hint)); 726 } 727 728 static int 729 filter_modify(struct kevent *kev, struct knote *kn) 730 { 731 int active, s; 732 733 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 734 active = kn->kn_fop->f_modify(kev, kn); 735 } else { 736 KERNEL_LOCK(); 737 if (kn->kn_fop->f_modify != NULL) { 738 active = kn->kn_fop->f_modify(kev, kn); 739 } else { 740 /* Emulate f_modify using f_event. */ 741 s = splhigh(); 742 knote_assign(kev, kn); 743 active = kn->kn_fop->f_event(kn, 0); 744 splx(s); 745 } 746 KERNEL_UNLOCK(); 747 } 748 return (active); 749 } 750 751 static int 752 filter_process(struct knote *kn, struct kevent *kev) 753 { 754 int active, s; 755 756 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 757 active = kn->kn_fop->f_process(kn, kev); 758 } else { 759 KERNEL_LOCK(); 760 if (kn->kn_fop->f_process != NULL) { 761 active = kn->kn_fop->f_process(kn, kev); 762 } else { 763 /* Emulate f_process using f_event. */ 764 s = splhigh(); 765 /* 766 * If called from kqueue_scan(), skip f_event 767 * when EV_ONESHOT is set, to preserve old behaviour. 768 */ 769 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 770 active = 1; 771 else 772 active = kn->kn_fop->f_event(kn, 0); 773 if (active) 774 knote_submit(kn, kev); 775 splx(s); 776 } 777 KERNEL_UNLOCK(); 778 } 779 return (active); 780 } 781 782 /* 783 * Initialize the current thread for poll/select system call. 784 * num indicates the number of serials that the system call may utilize. 785 * After this function, the valid range of serials is 786 * p_kq_serial <= x < p_kq_serial + num. 787 */ 788 void 789 kqpoll_init(unsigned int num) 790 { 791 struct proc *p = curproc; 792 struct filedesc *fdp; 793 794 if (p->p_kq == NULL) { 795 p->p_kq = kqueue_alloc(p->p_fd); 796 p->p_kq_serial = arc4random(); 797 fdp = p->p_fd; 798 fdplock(fdp); 799 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 800 fdpunlock(fdp); 801 } 802 803 if (p->p_kq_serial + num < p->p_kq_serial) { 804 /* Serial is about to wrap. Clear all attached knotes. */ 805 kqueue_purge(p, p->p_kq); 806 p->p_kq_serial = 0; 807 } 808 } 809 810 /* 811 * Finish poll/select system call. 812 * num must have the same value that was used with kqpoll_init(). 813 */ 814 void 815 kqpoll_done(unsigned int num) 816 { 817 struct proc *p = curproc; 818 struct kqueue *kq = p->p_kq; 819 820 KASSERT(p->p_kq != NULL); 821 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 822 823 p->p_kq_serial += num; 824 825 /* 826 * Because of kn_pollid key, a thread can in principle allocate 827 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 828 * with suitably varying pollfd arrays. 829 * Prevent such a large allocation by clearing knotes eagerly 830 * if there are too many of them. 831 * 832 * A small multiple of kq_knlistsize should give enough margin 833 * that eager clearing is infrequent, or does not happen at all, 834 * with normal programs. 835 * A single pollfd entry can use up to three knotes. 836 * Typically there is no significant overlap of fd and events 837 * between different entries in the pollfd array. 838 */ 839 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 840 kqueue_purge(p, kq); 841 } 842 843 void 844 kqpoll_exit(void) 845 { 846 struct proc *p = curproc; 847 848 if (p->p_kq == NULL) 849 return; 850 851 kqueue_purge(p, p->p_kq); 852 kqueue_terminate(p, p->p_kq); 853 KASSERT(p->p_kq->kq_refs == 1); 854 KQRELE(p->p_kq); 855 p->p_kq = NULL; 856 } 857 858 struct kqueue * 859 kqueue_alloc(struct filedesc *fdp) 860 { 861 struct kqueue *kq; 862 863 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 864 kq->kq_refs = 1; 865 kq->kq_fdp = fdp; 866 TAILQ_INIT(&kq->kq_head); 867 mtx_init(&kq->kq_lock, IPL_HIGH); 868 task_set(&kq->kq_task, kqueue_task, kq); 869 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 870 871 return (kq); 872 } 873 874 int 875 sys_kqueue(struct proc *p, void *v, register_t *retval) 876 { 877 struct filedesc *fdp = p->p_fd; 878 struct kqueue *kq; 879 struct file *fp; 880 int fd, error; 881 882 kq = kqueue_alloc(fdp); 883 884 fdplock(fdp); 885 error = falloc(p, &fp, &fd); 886 if (error) 887 goto out; 888 fp->f_flag = FREAD | FWRITE; 889 fp->f_type = DTYPE_KQUEUE; 890 fp->f_ops = &kqueueops; 891 fp->f_data = kq; 892 *retval = fd; 893 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 894 kq = NULL; 895 fdinsert(fdp, fd, 0, fp); 896 FRELE(fp, p); 897 out: 898 fdpunlock(fdp); 899 if (kq != NULL) 900 pool_put(&kqueue_pool, kq); 901 return (error); 902 } 903 904 int 905 sys_kevent(struct proc *p, void *v, register_t *retval) 906 { 907 struct kqueue_scan_state scan; 908 struct filedesc* fdp = p->p_fd; 909 struct sys_kevent_args /* { 910 syscallarg(int) fd; 911 syscallarg(const struct kevent *) changelist; 912 syscallarg(int) nchanges; 913 syscallarg(struct kevent *) eventlist; 914 syscallarg(int) nevents; 915 syscallarg(const struct timespec *) timeout; 916 } */ *uap = v; 917 struct kevent *kevp; 918 struct kqueue *kq; 919 struct file *fp; 920 struct timespec ts; 921 struct timespec *tsp = NULL; 922 int i, n, nerrors, error; 923 int ready, total; 924 struct kevent kev[KQ_NEVENTS]; 925 926 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 927 return (EBADF); 928 929 if (fp->f_type != DTYPE_KQUEUE) { 930 error = EBADF; 931 goto done; 932 } 933 934 if (SCARG(uap, timeout) != NULL) { 935 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 936 if (error) 937 goto done; 938 #ifdef KTRACE 939 if (KTRPOINT(p, KTR_STRUCT)) 940 ktrreltimespec(p, &ts); 941 #endif 942 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 943 error = EINVAL; 944 goto done; 945 } 946 tsp = &ts; 947 } 948 949 kq = fp->f_data; 950 nerrors = 0; 951 952 while ((n = SCARG(uap, nchanges)) > 0) { 953 if (n > nitems(kev)) 954 n = nitems(kev); 955 error = copyin(SCARG(uap, changelist), kev, 956 n * sizeof(struct kevent)); 957 if (error) 958 goto done; 959 #ifdef KTRACE 960 if (KTRPOINT(p, KTR_STRUCT)) 961 ktrevent(p, kev, n); 962 #endif 963 for (i = 0; i < n; i++) { 964 kevp = &kev[i]; 965 kevp->flags &= ~EV_SYSFLAGS; 966 error = kqueue_register(kq, kevp, 0, p); 967 if (error || (kevp->flags & EV_RECEIPT)) { 968 if (SCARG(uap, nevents) != 0) { 969 kevp->flags = EV_ERROR; 970 kevp->data = error; 971 copyout(kevp, SCARG(uap, eventlist), 972 sizeof(*kevp)); 973 SCARG(uap, eventlist)++; 974 SCARG(uap, nevents)--; 975 nerrors++; 976 } else { 977 goto done; 978 } 979 } 980 } 981 SCARG(uap, nchanges) -= n; 982 SCARG(uap, changelist) += n; 983 } 984 if (nerrors) { 985 *retval = nerrors; 986 error = 0; 987 goto done; 988 } 989 990 kqueue_scan_setup(&scan, kq); 991 FRELE(fp, p); 992 /* 993 * Collect as many events as we can. The timeout on successive 994 * loops is disabled (kqueue_scan() becomes non-blocking). 995 */ 996 total = 0; 997 error = 0; 998 while ((n = SCARG(uap, nevents) - total) > 0) { 999 if (n > nitems(kev)) 1000 n = nitems(kev); 1001 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 1002 if (ready == 0) 1003 break; 1004 error = copyout(kev, SCARG(uap, eventlist) + total, 1005 sizeof(struct kevent) * ready); 1006 #ifdef KTRACE 1007 if (KTRPOINT(p, KTR_STRUCT)) 1008 ktrevent(p, kev, ready); 1009 #endif 1010 total += ready; 1011 if (error || ready < n) 1012 break; 1013 } 1014 kqueue_scan_finish(&scan); 1015 *retval = total; 1016 return (error); 1017 1018 done: 1019 FRELE(fp, p); 1020 return (error); 1021 } 1022 1023 #ifdef KQUEUE_DEBUG 1024 void 1025 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1026 { 1027 struct knote *kn; 1028 int count = 0, nmarker = 0; 1029 1030 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1031 1032 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1033 if (kn->kn_filter == EVFILT_MARKER) { 1034 if ((kn->kn_status & KN_QUEUED) != 0) 1035 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1036 func, line, kq, kn); 1037 nmarker++; 1038 } else { 1039 if ((kn->kn_status & KN_ACTIVE) == 0) 1040 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1041 func, line, kq, kn); 1042 if ((kn->kn_status & KN_QUEUED) == 0) 1043 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1044 func, line, kq, kn); 1045 if (kn->kn_kq != kq) 1046 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1047 func, line, kq, kn, kn->kn_kq); 1048 count++; 1049 if (count > kq->kq_count) 1050 goto bad; 1051 } 1052 } 1053 if (count != kq->kq_count) { 1054 bad: 1055 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1056 func, line, kq, kq->kq_count, count, nmarker); 1057 } 1058 } 1059 #endif 1060 1061 int 1062 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1063 struct proc *p) 1064 { 1065 struct filedesc *fdp = kq->kq_fdp; 1066 const struct filterops *fops = NULL; 1067 struct file *fp = NULL; 1068 struct knote *kn = NULL, *newkn = NULL; 1069 struct knlist *list = NULL; 1070 int active, error = 0; 1071 1072 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1073 1074 if (kev->filter < 0) { 1075 if (kev->filter + EVFILT_SYSCOUNT < 0) 1076 return (EINVAL); 1077 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1078 } 1079 1080 if (fops == NULL) { 1081 /* 1082 * XXX 1083 * filter attach routine is responsible for ensuring that 1084 * the identifier can be attached to it. 1085 */ 1086 return (EINVAL); 1087 } 1088 1089 if (fops->f_flags & FILTEROP_ISFD) { 1090 /* validate descriptor */ 1091 if (kev->ident > INT_MAX) 1092 return (EBADF); 1093 } 1094 1095 if (kev->flags & EV_ADD) 1096 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1097 1098 again: 1099 if (fops->f_flags & FILTEROP_ISFD) { 1100 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1101 error = EBADF; 1102 goto done; 1103 } 1104 mtx_enter(&kq->kq_lock); 1105 if (kev->flags & EV_ADD) 1106 kqueue_expand_list(kq, kev->ident); 1107 if (kev->ident < kq->kq_knlistsize) 1108 list = &kq->kq_knlist[kev->ident]; 1109 } else { 1110 mtx_enter(&kq->kq_lock); 1111 if (kev->flags & EV_ADD) 1112 kqueue_expand_hash(kq); 1113 if (kq->kq_knhashmask != 0) { 1114 list = &kq->kq_knhash[ 1115 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1116 } 1117 } 1118 if (list != NULL) { 1119 SLIST_FOREACH(kn, list, kn_link) { 1120 if (kev->filter == kn->kn_filter && 1121 kev->ident == kn->kn_id && 1122 pollid == kn->kn_pollid) { 1123 if (!knote_acquire(kn, NULL, 0)) { 1124 /* knote_acquire() has released 1125 * kq_lock. */ 1126 if (fp != NULL) { 1127 FRELE(fp, p); 1128 fp = NULL; 1129 } 1130 goto again; 1131 } 1132 break; 1133 } 1134 } 1135 } 1136 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1137 1138 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1139 mtx_leave(&kq->kq_lock); 1140 error = ENOENT; 1141 goto done; 1142 } 1143 1144 /* 1145 * kn now contains the matching knote, or NULL if no match. 1146 */ 1147 if (kev->flags & EV_ADD) { 1148 if (kn == NULL) { 1149 kn = newkn; 1150 newkn = NULL; 1151 kn->kn_status = KN_PROCESSING; 1152 kn->kn_fp = fp; 1153 kn->kn_kq = kq; 1154 kn->kn_fop = fops; 1155 1156 /* 1157 * apply reference count to knote structure, and 1158 * do not release it at the end of this routine. 1159 */ 1160 fp = NULL; 1161 1162 kn->kn_sfflags = kev->fflags; 1163 kn->kn_sdata = kev->data; 1164 kev->fflags = 0; 1165 kev->data = 0; 1166 kn->kn_kevent = *kev; 1167 kn->kn_pollid = pollid; 1168 1169 knote_attach(kn); 1170 mtx_leave(&kq->kq_lock); 1171 1172 error = filter_attach(kn); 1173 if (error != 0) { 1174 knote_drop(kn, p); 1175 goto done; 1176 } 1177 1178 /* 1179 * If this is a file descriptor filter, check if 1180 * fd was closed while the knote was being added. 1181 * knote_fdclose() has missed kn if the function 1182 * ran before kn appeared in kq_knlist. 1183 */ 1184 if ((fops->f_flags & FILTEROP_ISFD) && 1185 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1186 /* 1187 * Drop the knote silently without error 1188 * because another thread might already have 1189 * seen it. This corresponds to the insert 1190 * happening in full before the close. 1191 */ 1192 filter_detach(kn); 1193 knote_drop(kn, p); 1194 goto done; 1195 } 1196 1197 /* Check if there is a pending event. */ 1198 active = filter_process(kn, NULL); 1199 mtx_enter(&kq->kq_lock); 1200 if (active) 1201 knote_activate(kn); 1202 } else if (kn->kn_fop == &badfd_filtops) { 1203 /* 1204 * Nothing expects this badfd knote any longer. 1205 * Drop it to make room for the new knote and retry. 1206 */ 1207 KASSERT(kq == p->p_kq); 1208 mtx_leave(&kq->kq_lock); 1209 filter_detach(kn); 1210 knote_drop(kn, p); 1211 1212 KASSERT(fp != NULL); 1213 FRELE(fp, p); 1214 fp = NULL; 1215 1216 goto again; 1217 } else { 1218 /* 1219 * The user may change some filter values after the 1220 * initial EV_ADD, but doing so will not reset any 1221 * filters which have already been triggered. 1222 */ 1223 mtx_leave(&kq->kq_lock); 1224 active = filter_modify(kev, kn); 1225 mtx_enter(&kq->kq_lock); 1226 if (active) 1227 knote_activate(kn); 1228 if (kev->flags & EV_ERROR) { 1229 error = kev->data; 1230 goto release; 1231 } 1232 } 1233 } else if (kev->flags & EV_DELETE) { 1234 mtx_leave(&kq->kq_lock); 1235 filter_detach(kn); 1236 knote_drop(kn, p); 1237 goto done; 1238 } 1239 1240 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1241 kn->kn_status |= KN_DISABLED; 1242 1243 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1244 kn->kn_status &= ~KN_DISABLED; 1245 mtx_leave(&kq->kq_lock); 1246 /* Check if there is a pending event. */ 1247 active = filter_process(kn, NULL); 1248 mtx_enter(&kq->kq_lock); 1249 if (active) 1250 knote_activate(kn); 1251 } 1252 1253 release: 1254 knote_release(kn); 1255 mtx_leave(&kq->kq_lock); 1256 done: 1257 if (fp != NULL) 1258 FRELE(fp, p); 1259 if (newkn != NULL) 1260 pool_put(&knote_pool, newkn); 1261 return (error); 1262 } 1263 1264 int 1265 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1266 { 1267 struct timespec elapsed, start, stop; 1268 uint64_t nsecs; 1269 int error; 1270 1271 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1272 1273 if (tsp != NULL) { 1274 getnanouptime(&start); 1275 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1276 } else 1277 nsecs = INFSLP; 1278 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1279 "kqread", nsecs); 1280 if (tsp != NULL) { 1281 getnanouptime(&stop); 1282 timespecsub(&stop, &start, &elapsed); 1283 timespecsub(tsp, &elapsed, tsp); 1284 if (tsp->tv_sec < 0) 1285 timespecclear(tsp); 1286 } 1287 1288 return (error); 1289 } 1290 1291 /* 1292 * Scan the kqueue, blocking if necessary until the target time is reached. 1293 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1294 * 0 we do not block at all. 1295 */ 1296 int 1297 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1298 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1299 { 1300 struct kqueue *kq = scan->kqs_kq; 1301 struct knote *kn; 1302 int error = 0, nkev = 0; 1303 1304 if (maxevents == 0) 1305 goto done; 1306 retry: 1307 KASSERT(nkev == 0); 1308 1309 error = 0; 1310 1311 /* msleep() with PCATCH requires kernel lock. */ 1312 KERNEL_LOCK(); 1313 1314 mtx_enter(&kq->kq_lock); 1315 1316 if (kq->kq_state & KQ_DYING) { 1317 mtx_leave(&kq->kq_lock); 1318 KERNEL_UNLOCK(); 1319 error = EBADF; 1320 goto done; 1321 } 1322 1323 if (kq->kq_count == 0) { 1324 /* 1325 * Successive loops are only necessary if there are more 1326 * ready events to gather, so they don't need to block. 1327 */ 1328 if ((tsp != NULL && !timespecisset(tsp)) || 1329 scan->kqs_nevent != 0) { 1330 mtx_leave(&kq->kq_lock); 1331 KERNEL_UNLOCK(); 1332 error = 0; 1333 goto done; 1334 } 1335 kq->kq_state |= KQ_SLEEP; 1336 error = kqueue_sleep(kq, tsp); 1337 /* kqueue_sleep() has released kq_lock. */ 1338 KERNEL_UNLOCK(); 1339 if (error == 0 || error == EWOULDBLOCK) 1340 goto retry; 1341 /* don't restart after signals... */ 1342 if (error == ERESTART) 1343 error = EINTR; 1344 goto done; 1345 } 1346 1347 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1348 KERNEL_UNLOCK(); 1349 1350 /* 1351 * Put the end marker in the queue to limit the scan to the events 1352 * that are currently active. This prevents events from being 1353 * recollected if they reactivate during scan. 1354 * 1355 * If a partial scan has been performed already but no events have 1356 * been collected, reposition the end marker to make any new events 1357 * reachable. 1358 */ 1359 if (!scan->kqs_queued) { 1360 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1361 scan->kqs_queued = 1; 1362 } else if (scan->kqs_nevent == 0) { 1363 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1364 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1365 } 1366 1367 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1368 while (nkev < maxevents) { 1369 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1370 if (kn->kn_filter == EVFILT_MARKER) { 1371 if (kn == &scan->kqs_end) 1372 break; 1373 1374 /* Move start marker past another thread's marker. */ 1375 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1376 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1377 kn_tqe); 1378 continue; 1379 } 1380 1381 if (!knote_acquire(kn, NULL, 0)) { 1382 /* knote_acquire() has released kq_lock. */ 1383 mtx_enter(&kq->kq_lock); 1384 continue; 1385 } 1386 1387 kqueue_check(kq); 1388 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1389 kn->kn_status &= ~KN_QUEUED; 1390 kq->kq_count--; 1391 kqueue_check(kq); 1392 1393 if (kn->kn_status & KN_DISABLED) { 1394 knote_release(kn); 1395 continue; 1396 } 1397 1398 mtx_leave(&kq->kq_lock); 1399 1400 /* Drop expired kqpoll knotes. */ 1401 if (p->p_kq == kq && 1402 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1403 filter_detach(kn); 1404 knote_drop(kn, p); 1405 mtx_enter(&kq->kq_lock); 1406 continue; 1407 } 1408 1409 /* 1410 * Invalidate knotes whose vnodes have been revoked. 1411 * This is a workaround; it is tricky to clear existing 1412 * knotes and prevent new ones from being registered 1413 * with the current revocation mechanism. 1414 */ 1415 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1416 kn->kn_fp != NULL && 1417 kn->kn_fp->f_type == DTYPE_VNODE) { 1418 struct vnode *vp = kn->kn_fp->f_data; 1419 1420 if (__predict_false(vp->v_op == &dead_vops && 1421 kn->kn_fop != &dead_filtops)) { 1422 filter_detach(kn); 1423 kn->kn_fop = &dead_filtops; 1424 1425 /* 1426 * Check if the event should be delivered. 1427 * Use f_event directly because this is 1428 * a special situation. 1429 */ 1430 if (kn->kn_fop->f_event(kn, 0) == 0) { 1431 filter_detach(kn); 1432 knote_drop(kn, p); 1433 mtx_enter(&kq->kq_lock); 1434 continue; 1435 } 1436 } 1437 } 1438 1439 memset(kevp, 0, sizeof(*kevp)); 1440 if (filter_process(kn, kevp) == 0) { 1441 mtx_enter(&kq->kq_lock); 1442 if ((kn->kn_status & KN_QUEUED) == 0) 1443 kn->kn_status &= ~KN_ACTIVE; 1444 knote_release(kn); 1445 kqueue_check(kq); 1446 continue; 1447 } 1448 1449 /* 1450 * Post-event action on the note 1451 */ 1452 if (kevp->flags & EV_ONESHOT) { 1453 filter_detach(kn); 1454 knote_drop(kn, p); 1455 mtx_enter(&kq->kq_lock); 1456 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1457 mtx_enter(&kq->kq_lock); 1458 if (kevp->flags & EV_DISPATCH) 1459 kn->kn_status |= KN_DISABLED; 1460 if ((kn->kn_status & KN_QUEUED) == 0) 1461 kn->kn_status &= ~KN_ACTIVE; 1462 knote_release(kn); 1463 } else { 1464 mtx_enter(&kq->kq_lock); 1465 if ((kn->kn_status & KN_QUEUED) == 0) { 1466 kqueue_check(kq); 1467 kq->kq_count++; 1468 kn->kn_status |= KN_QUEUED; 1469 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1470 } 1471 knote_release(kn); 1472 } 1473 kqueue_check(kq); 1474 1475 kevp++; 1476 nkev++; 1477 scan->kqs_nevent++; 1478 } 1479 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1480 mtx_leave(&kq->kq_lock); 1481 if (scan->kqs_nevent == 0) 1482 goto retry; 1483 done: 1484 *errorp = error; 1485 return (nkev); 1486 } 1487 1488 void 1489 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1490 { 1491 memset(scan, 0, sizeof(*scan)); 1492 1493 KQREF(kq); 1494 scan->kqs_kq = kq; 1495 scan->kqs_start.kn_filter = EVFILT_MARKER; 1496 scan->kqs_start.kn_status = KN_PROCESSING; 1497 scan->kqs_end.kn_filter = EVFILT_MARKER; 1498 scan->kqs_end.kn_status = KN_PROCESSING; 1499 } 1500 1501 void 1502 kqueue_scan_finish(struct kqueue_scan_state *scan) 1503 { 1504 struct kqueue *kq = scan->kqs_kq; 1505 1506 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1507 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1508 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1509 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1510 1511 if (scan->kqs_queued) { 1512 scan->kqs_queued = 0; 1513 mtx_enter(&kq->kq_lock); 1514 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1515 mtx_leave(&kq->kq_lock); 1516 } 1517 KQRELE(kq); 1518 } 1519 1520 /* 1521 * XXX 1522 * This could be expanded to call kqueue_scan, if desired. 1523 */ 1524 int 1525 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1526 { 1527 return (ENXIO); 1528 } 1529 1530 int 1531 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1532 { 1533 return (ENXIO); 1534 } 1535 1536 int 1537 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1538 { 1539 return (ENOTTY); 1540 } 1541 1542 int 1543 kqueue_poll(struct file *fp, int events, struct proc *p) 1544 { 1545 struct kqueue *kq = (struct kqueue *)fp->f_data; 1546 int revents = 0; 1547 1548 if (events & (POLLIN | POLLRDNORM)) { 1549 mtx_enter(&kq->kq_lock); 1550 if (kq->kq_count) { 1551 revents |= events & (POLLIN | POLLRDNORM); 1552 } else { 1553 selrecord(p, &kq->kq_sel); 1554 kq->kq_state |= KQ_SEL; 1555 } 1556 mtx_leave(&kq->kq_lock); 1557 } 1558 return (revents); 1559 } 1560 1561 int 1562 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1563 { 1564 struct kqueue *kq = fp->f_data; 1565 1566 memset(st, 0, sizeof(*st)); 1567 st->st_size = kq->kq_count; /* unlocked read */ 1568 st->st_blksize = sizeof(struct kevent); 1569 st->st_mode = S_IFIFO; 1570 return (0); 1571 } 1572 1573 void 1574 kqueue_purge(struct proc *p, struct kqueue *kq) 1575 { 1576 int i; 1577 1578 mtx_enter(&kq->kq_lock); 1579 for (i = 0; i < kq->kq_knlistsize; i++) 1580 knote_remove(p, kq, &kq->kq_knlist[i], 1); 1581 if (kq->kq_knhashmask != 0) { 1582 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1583 knote_remove(p, kq, &kq->kq_knhash[i], 1); 1584 } 1585 mtx_leave(&kq->kq_lock); 1586 } 1587 1588 void 1589 kqueue_terminate(struct proc *p, struct kqueue *kq) 1590 { 1591 struct knote *kn; 1592 1593 mtx_enter(&kq->kq_lock); 1594 1595 /* 1596 * Any remaining entries should be scan markers. 1597 * They are removed when the ongoing scans finish. 1598 */ 1599 KASSERT(kq->kq_count == 0); 1600 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1601 KASSERT(kn->kn_filter == EVFILT_MARKER); 1602 1603 kq->kq_state |= KQ_DYING; 1604 kqueue_wakeup(kq); 1605 mtx_leave(&kq->kq_lock); 1606 1607 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1608 task_del(systq, &kq->kq_task); 1609 1610 } 1611 1612 int 1613 kqueue_close(struct file *fp, struct proc *p) 1614 { 1615 struct kqueue *kq = fp->f_data; 1616 1617 fp->f_data = NULL; 1618 1619 kqueue_purge(p, kq); 1620 kqueue_terminate(p, kq); 1621 1622 KQRELE(kq); 1623 1624 return (0); 1625 } 1626 1627 static void 1628 kqueue_task(void *arg) 1629 { 1630 struct kqueue *kq = arg; 1631 1632 /* Kernel lock is needed inside selwakeup(). */ 1633 KERNEL_ASSERT_LOCKED(); 1634 1635 mtx_enter(&kqueue_klist_lock); 1636 mtx_enter(&kq->kq_lock); 1637 if (kq->kq_state & KQ_SEL) { 1638 kq->kq_state &= ~KQ_SEL; 1639 mtx_leave(&kq->kq_lock); 1640 selwakeup(&kq->kq_sel); 1641 } else { 1642 mtx_leave(&kq->kq_lock); 1643 KNOTE(&kq->kq_sel.si_note, 0); 1644 } 1645 mtx_leave(&kqueue_klist_lock); 1646 KQRELE(kq); 1647 } 1648 1649 void 1650 kqueue_wakeup(struct kqueue *kq) 1651 { 1652 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1653 1654 if (kq->kq_state & KQ_SLEEP) { 1655 kq->kq_state &= ~KQ_SLEEP; 1656 wakeup(kq); 1657 } 1658 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1659 /* Defer activation to avoid recursion. */ 1660 KQREF(kq); 1661 if (!task_add(systq, &kq->kq_task)) 1662 KQRELE(kq); 1663 } 1664 } 1665 1666 static void 1667 kqueue_expand_hash(struct kqueue *kq) 1668 { 1669 struct knlist *hash; 1670 u_long hashmask; 1671 1672 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1673 1674 if (kq->kq_knhashmask == 0) { 1675 mtx_leave(&kq->kq_lock); 1676 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1677 mtx_enter(&kq->kq_lock); 1678 if (kq->kq_knhashmask == 0) { 1679 kq->kq_knhash = hash; 1680 kq->kq_knhashmask = hashmask; 1681 } else { 1682 /* Another thread has allocated the hash. */ 1683 mtx_leave(&kq->kq_lock); 1684 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1685 mtx_enter(&kq->kq_lock); 1686 } 1687 } 1688 } 1689 1690 static void 1691 kqueue_expand_list(struct kqueue *kq, int fd) 1692 { 1693 struct knlist *list, *olist; 1694 int size, osize; 1695 1696 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1697 1698 if (kq->kq_knlistsize <= fd) { 1699 size = kq->kq_knlistsize; 1700 mtx_leave(&kq->kq_lock); 1701 while (size <= fd) 1702 size += KQEXTENT; 1703 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1704 mtx_enter(&kq->kq_lock); 1705 if (kq->kq_knlistsize <= fd) { 1706 memcpy(list, kq->kq_knlist, 1707 kq->kq_knlistsize * sizeof(*list)); 1708 memset(&list[kq->kq_knlistsize], 0, 1709 (size - kq->kq_knlistsize) * sizeof(*list)); 1710 olist = kq->kq_knlist; 1711 osize = kq->kq_knlistsize; 1712 kq->kq_knlist = list; 1713 kq->kq_knlistsize = size; 1714 mtx_leave(&kq->kq_lock); 1715 free(olist, M_KEVENT, osize * sizeof(*list)); 1716 mtx_enter(&kq->kq_lock); 1717 } else { 1718 /* Another thread has expanded the list. */ 1719 mtx_leave(&kq->kq_lock); 1720 free(list, M_KEVENT, size * sizeof(*list)); 1721 mtx_enter(&kq->kq_lock); 1722 } 1723 } 1724 } 1725 1726 /* 1727 * Acquire a knote, return non-zero on success, 0 on failure. 1728 * 1729 * If we cannot acquire the knote we sleep and return 0. The knote 1730 * may be stale on return in this case and the caller must restart 1731 * whatever loop they are in. 1732 * 1733 * If we are about to sleep and klist is non-NULL, the list is unlocked 1734 * before sleep and remains unlocked on return. 1735 */ 1736 int 1737 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1738 { 1739 struct kqueue *kq = kn->kn_kq; 1740 1741 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1742 KASSERT(kn->kn_filter != EVFILT_MARKER); 1743 1744 if (kn->kn_status & KN_PROCESSING) { 1745 kn->kn_status |= KN_WAITING; 1746 if (klist != NULL) { 1747 mtx_leave(&kq->kq_lock); 1748 klist_unlock(klist, ls); 1749 /* XXX Timeout resolves potential loss of wakeup. */ 1750 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1751 } else { 1752 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1753 SEC_TO_NSEC(1)); 1754 } 1755 /* knote may be stale now */ 1756 return (0); 1757 } 1758 kn->kn_status |= KN_PROCESSING; 1759 return (1); 1760 } 1761 1762 /* 1763 * Release an acquired knote, clearing KN_PROCESSING. 1764 */ 1765 void 1766 knote_release(struct knote *kn) 1767 { 1768 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1769 KASSERT(kn->kn_filter != EVFILT_MARKER); 1770 KASSERT(kn->kn_status & KN_PROCESSING); 1771 1772 if (kn->kn_status & KN_WAITING) { 1773 kn->kn_status &= ~KN_WAITING; 1774 wakeup(kn); 1775 } 1776 kn->kn_status &= ~KN_PROCESSING; 1777 /* kn should not be accessed anymore */ 1778 } 1779 1780 /* 1781 * activate one knote. 1782 */ 1783 void 1784 knote_activate(struct knote *kn) 1785 { 1786 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1787 1788 kn->kn_status |= KN_ACTIVE; 1789 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1790 knote_enqueue(kn); 1791 } 1792 1793 /* 1794 * walk down a list of knotes, activating them if their event has triggered. 1795 */ 1796 void 1797 knote(struct klist *list, long hint) 1798 { 1799 struct knote *kn, *kn0; 1800 struct kqueue *kq; 1801 1802 KLIST_ASSERT_LOCKED(list); 1803 1804 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1805 if (filter_event(kn, hint)) { 1806 kq = kn->kn_kq; 1807 mtx_enter(&kq->kq_lock); 1808 knote_activate(kn); 1809 mtx_leave(&kq->kq_lock); 1810 } 1811 } 1812 } 1813 1814 /* 1815 * remove all knotes from a specified knlist 1816 */ 1817 void 1818 knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) 1819 { 1820 struct knote *kn; 1821 1822 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1823 1824 while ((kn = SLIST_FIRST(list)) != NULL) { 1825 KASSERT(kn->kn_kq == kq); 1826 1827 if (!purge) { 1828 /* Skip pending badfd knotes. */ 1829 while (kn->kn_fop == &badfd_filtops) { 1830 kn = SLIST_NEXT(kn, kn_link); 1831 if (kn == NULL) 1832 return; 1833 KASSERT(kn->kn_kq == kq); 1834 } 1835 } 1836 1837 if (!knote_acquire(kn, NULL, 0)) { 1838 /* knote_acquire() has released kq_lock. */ 1839 mtx_enter(&kq->kq_lock); 1840 continue; 1841 } 1842 mtx_leave(&kq->kq_lock); 1843 filter_detach(kn); 1844 1845 /* 1846 * Notify poll(2) and select(2) when a monitored 1847 * file descriptor is closed. 1848 * 1849 * This reuses the original knote for delivering the 1850 * notification so as to avoid allocating memory. 1851 */ 1852 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1853 !(p->p_kq == kq && 1854 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1855 kn->kn_fop != &badfd_filtops) { 1856 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1857 FRELE(kn->kn_fp, p); 1858 kn->kn_fp = NULL; 1859 1860 kn->kn_fop = &badfd_filtops; 1861 filter_event(kn, 0); 1862 mtx_enter(&kq->kq_lock); 1863 knote_activate(kn); 1864 knote_release(kn); 1865 continue; 1866 } 1867 1868 knote_drop(kn, p); 1869 mtx_enter(&kq->kq_lock); 1870 } 1871 } 1872 1873 /* 1874 * remove all knotes referencing a specified fd 1875 */ 1876 void 1877 knote_fdclose(struct proc *p, int fd) 1878 { 1879 struct filedesc *fdp = p->p_p->ps_fd; 1880 struct kqueue *kq; 1881 1882 /* 1883 * fdplock can be ignored if the file descriptor table is being freed 1884 * because no other thread can access the fdp. 1885 */ 1886 if (fdp->fd_refcnt != 0) 1887 fdpassertlocked(fdp); 1888 1889 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1890 mtx_enter(&kq->kq_lock); 1891 if (fd < kq->kq_knlistsize) 1892 knote_remove(p, kq, &kq->kq_knlist[fd], 0); 1893 mtx_leave(&kq->kq_lock); 1894 } 1895 } 1896 1897 /* 1898 * handle a process exiting, including the triggering of NOTE_EXIT notes 1899 * XXX this could be more efficient, doing a single pass down the klist 1900 */ 1901 void 1902 knote_processexit(struct proc *p) 1903 { 1904 struct process *pr = p->p_p; 1905 1906 KERNEL_ASSERT_LOCKED(); 1907 KASSERT(p == curproc); 1908 1909 KNOTE(&pr->ps_klist, NOTE_EXIT); 1910 1911 /* remove other knotes hanging off the process */ 1912 klist_invalidate(&pr->ps_klist); 1913 } 1914 1915 void 1916 knote_attach(struct knote *kn) 1917 { 1918 struct kqueue *kq = kn->kn_kq; 1919 struct knlist *list; 1920 1921 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1922 KASSERT(kn->kn_status & KN_PROCESSING); 1923 1924 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1925 KASSERT(kq->kq_knlistsize > kn->kn_id); 1926 list = &kq->kq_knlist[kn->kn_id]; 1927 } else { 1928 KASSERT(kq->kq_knhashmask != 0); 1929 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1930 } 1931 SLIST_INSERT_HEAD(list, kn, kn_link); 1932 kq->kq_nknotes++; 1933 } 1934 1935 void 1936 knote_detach(struct knote *kn) 1937 { 1938 struct kqueue *kq = kn->kn_kq; 1939 struct knlist *list; 1940 1941 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1942 KASSERT(kn->kn_status & KN_PROCESSING); 1943 1944 kq->kq_nknotes--; 1945 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1946 list = &kq->kq_knlist[kn->kn_id]; 1947 else 1948 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1949 SLIST_REMOVE(list, kn, knote, kn_link); 1950 } 1951 1952 /* 1953 * should be called at spl == 0, since we don't want to hold spl 1954 * while calling FRELE and pool_put. 1955 */ 1956 void 1957 knote_drop(struct knote *kn, struct proc *p) 1958 { 1959 struct kqueue *kq = kn->kn_kq; 1960 1961 KASSERT(kn->kn_filter != EVFILT_MARKER); 1962 1963 mtx_enter(&kq->kq_lock); 1964 knote_detach(kn); 1965 if (kn->kn_status & KN_QUEUED) 1966 knote_dequeue(kn); 1967 if (kn->kn_status & KN_WAITING) { 1968 kn->kn_status &= ~KN_WAITING; 1969 wakeup(kn); 1970 } 1971 mtx_leave(&kq->kq_lock); 1972 1973 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1974 FRELE(kn->kn_fp, p); 1975 pool_put(&knote_pool, kn); 1976 } 1977 1978 1979 void 1980 knote_enqueue(struct knote *kn) 1981 { 1982 struct kqueue *kq = kn->kn_kq; 1983 1984 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1985 KASSERT(kn->kn_filter != EVFILT_MARKER); 1986 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1987 1988 kqueue_check(kq); 1989 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1990 kn->kn_status |= KN_QUEUED; 1991 kq->kq_count++; 1992 kqueue_check(kq); 1993 kqueue_wakeup(kq); 1994 } 1995 1996 void 1997 knote_dequeue(struct knote *kn) 1998 { 1999 struct kqueue *kq = kn->kn_kq; 2000 2001 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2002 KASSERT(kn->kn_filter != EVFILT_MARKER); 2003 KASSERT(kn->kn_status & KN_QUEUED); 2004 2005 kqueue_check(kq); 2006 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 2007 kn->kn_status &= ~KN_QUEUED; 2008 kq->kq_count--; 2009 kqueue_check(kq); 2010 } 2011 2012 /* 2013 * Assign parameters to the knote. 2014 * 2015 * The knote's object lock must be held. 2016 */ 2017 void 2018 knote_assign(const struct kevent *kev, struct knote *kn) 2019 { 2020 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2021 KERNEL_ASSERT_LOCKED(); 2022 2023 kn->kn_sfflags = kev->fflags; 2024 kn->kn_sdata = kev->data; 2025 kn->kn_udata = kev->udata; 2026 } 2027 2028 /* 2029 * Submit the knote's event for delivery. 2030 * 2031 * The knote's object lock must be held. 2032 */ 2033 void 2034 knote_submit(struct knote *kn, struct kevent *kev) 2035 { 2036 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2037 KERNEL_ASSERT_LOCKED(); 2038 2039 if (kev != NULL) { 2040 *kev = kn->kn_kevent; 2041 if (kn->kn_flags & EV_CLEAR) { 2042 kn->kn_fflags = 0; 2043 kn->kn_data = 0; 2044 } 2045 } 2046 } 2047 2048 void 2049 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2050 { 2051 SLIST_INIT(&klist->kl_list); 2052 klist->kl_ops = ops; 2053 klist->kl_arg = arg; 2054 } 2055 2056 void 2057 klist_free(struct klist *klist) 2058 { 2059 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2060 } 2061 2062 void 2063 klist_insert(struct klist *klist, struct knote *kn) 2064 { 2065 int ls; 2066 2067 ls = klist_lock(klist); 2068 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2069 klist_unlock(klist, ls); 2070 } 2071 2072 void 2073 klist_insert_locked(struct klist *klist, struct knote *kn) 2074 { 2075 KLIST_ASSERT_LOCKED(klist); 2076 2077 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2078 } 2079 2080 void 2081 klist_remove(struct klist *klist, struct knote *kn) 2082 { 2083 int ls; 2084 2085 ls = klist_lock(klist); 2086 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2087 klist_unlock(klist, ls); 2088 } 2089 2090 void 2091 klist_remove_locked(struct klist *klist, struct knote *kn) 2092 { 2093 KLIST_ASSERT_LOCKED(klist); 2094 2095 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2096 } 2097 2098 /* 2099 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2100 * 2101 * The caller of this function must not hold any locks that can block 2102 * filterops callbacks that run with KN_PROCESSING. 2103 * Otherwise this function might deadlock. 2104 */ 2105 void 2106 klist_invalidate(struct klist *list) 2107 { 2108 struct knote *kn; 2109 struct kqueue *kq; 2110 struct proc *p = curproc; 2111 int ls; 2112 2113 NET_ASSERT_UNLOCKED(); 2114 2115 ls = klist_lock(list); 2116 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2117 kq = kn->kn_kq; 2118 mtx_enter(&kq->kq_lock); 2119 if (!knote_acquire(kn, list, ls)) { 2120 /* knote_acquire() has released kq_lock 2121 * and klist lock. */ 2122 ls = klist_lock(list); 2123 continue; 2124 } 2125 mtx_leave(&kq->kq_lock); 2126 klist_unlock(list, ls); 2127 filter_detach(kn); 2128 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2129 kn->kn_fop = &dead_filtops; 2130 filter_event(kn, 0); 2131 mtx_enter(&kq->kq_lock); 2132 knote_activate(kn); 2133 knote_release(kn); 2134 mtx_leave(&kq->kq_lock); 2135 } else { 2136 knote_drop(kn, p); 2137 } 2138 ls = klist_lock(list); 2139 } 2140 klist_unlock(list, ls); 2141 } 2142 2143 static int 2144 klist_lock(struct klist *list) 2145 { 2146 int ls = 0; 2147 2148 if (list->kl_ops != NULL) { 2149 ls = list->kl_ops->klo_lock(list->kl_arg); 2150 } else { 2151 KERNEL_LOCK(); 2152 ls = splhigh(); 2153 } 2154 return ls; 2155 } 2156 2157 static void 2158 klist_unlock(struct klist *list, int ls) 2159 { 2160 if (list->kl_ops != NULL) { 2161 list->kl_ops->klo_unlock(list->kl_arg, ls); 2162 } else { 2163 splx(ls); 2164 KERNEL_UNLOCK(); 2165 } 2166 } 2167 2168 static void 2169 klist_mutex_assertlk(void *arg) 2170 { 2171 struct mutex *mtx = arg; 2172 2173 (void)mtx; 2174 2175 MUTEX_ASSERT_LOCKED(mtx); 2176 } 2177 2178 static int 2179 klist_mutex_lock(void *arg) 2180 { 2181 struct mutex *mtx = arg; 2182 2183 mtx_enter(mtx); 2184 return 0; 2185 } 2186 2187 static void 2188 klist_mutex_unlock(void *arg, int s) 2189 { 2190 struct mutex *mtx = arg; 2191 2192 mtx_leave(mtx); 2193 } 2194 2195 static const struct klistops mutex_klistops = { 2196 .klo_assertlk = klist_mutex_assertlk, 2197 .klo_lock = klist_mutex_lock, 2198 .klo_unlock = klist_mutex_unlock, 2199 }; 2200 2201 void 2202 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2203 { 2204 klist_init(klist, &mutex_klistops, mtx); 2205 } 2206 2207 static void 2208 klist_rwlock_assertlk(void *arg) 2209 { 2210 struct rwlock *rwl = arg; 2211 2212 (void)rwl; 2213 2214 rw_assert_wrlock(rwl); 2215 } 2216 2217 static int 2218 klist_rwlock_lock(void *arg) 2219 { 2220 struct rwlock *rwl = arg; 2221 2222 rw_enter_write(rwl); 2223 return 0; 2224 } 2225 2226 static void 2227 klist_rwlock_unlock(void *arg, int s) 2228 { 2229 struct rwlock *rwl = arg; 2230 2231 rw_exit_write(rwl); 2232 } 2233 2234 static const struct klistops rwlock_klistops = { 2235 .klo_assertlk = klist_rwlock_assertlk, 2236 .klo_lock = klist_rwlock_lock, 2237 .klo_unlock = klist_rwlock_unlock, 2238 }; 2239 2240 void 2241 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2242 { 2243 klist_init(klist, &rwlock_klistops, rwl); 2244 } 2245