1 /* $OpenBSD: kern_event.c,v 1.199 2024/07/29 12:42:53 claudio Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/proc.h> 34 #include <sys/pledge.h> 35 #include <sys/malloc.h> 36 #include <sys/file.h> 37 #include <sys/filedesc.h> 38 #include <sys/fcntl.h> 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <sys/eventvar.h> 42 #include <sys/ktrace.h> 43 #include <sys/pool.h> 44 #include <sys/stat.h> 45 #include <sys/mount.h> 46 #include <sys/syscallargs.h> 47 #include <sys/time.h> 48 #include <sys/timeout.h> 49 #include <sys/vnode.h> 50 #include <sys/wait.h> 51 52 #ifdef DIAGNOSTIC 53 #define KLIST_ASSERT_LOCKED(kl) do { \ 54 if ((kl)->kl_ops != NULL) \ 55 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 56 else \ 57 KERNEL_ASSERT_LOCKED(); \ 58 } while (0) 59 #else 60 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 61 #endif 62 63 int dokqueue(struct proc *, int, register_t *); 64 struct kqueue *kqueue_alloc(struct filedesc *); 65 void kqueue_terminate(struct proc *p, struct kqueue *); 66 void KQREF(struct kqueue *); 67 void KQRELE(struct kqueue *); 68 69 void kqueue_purge(struct proc *, struct kqueue *); 70 int kqueue_sleep(struct kqueue *, struct timespec *); 71 72 int kqueue_read(struct file *, struct uio *, int); 73 int kqueue_write(struct file *, struct uio *, int); 74 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 75 struct proc *p); 76 int kqueue_kqfilter(struct file *fp, struct knote *kn); 77 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 78 int kqueue_close(struct file *fp, struct proc *p); 79 void kqueue_wakeup(struct kqueue *kq); 80 81 #ifdef KQUEUE_DEBUG 82 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 83 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 84 #else 85 #define kqueue_check(kq) do {} while (0) 86 #endif 87 88 static int filter_attach(struct knote *kn); 89 static void filter_detach(struct knote *kn); 90 static int filter_event(struct knote *kn, long hint); 91 static int filter_modify(struct kevent *kev, struct knote *kn); 92 static int filter_process(struct knote *kn, struct kevent *kev); 93 static void kqueue_expand_hash(struct kqueue *kq); 94 static void kqueue_expand_list(struct kqueue *kq, int fd); 95 static void kqueue_task(void *); 96 static int klist_lock(struct klist *); 97 static void klist_unlock(struct klist *, int); 98 99 const struct fileops kqueueops = { 100 .fo_read = kqueue_read, 101 .fo_write = kqueue_write, 102 .fo_ioctl = kqueue_ioctl, 103 .fo_kqfilter = kqueue_kqfilter, 104 .fo_stat = kqueue_stat, 105 .fo_close = kqueue_close 106 }; 107 108 void knote_attach(struct knote *kn); 109 void knote_detach(struct knote *kn); 110 void knote_drop(struct knote *kn, struct proc *p); 111 void knote_enqueue(struct knote *kn); 112 void knote_dequeue(struct knote *kn); 113 int knote_acquire(struct knote *kn, struct klist *, int); 114 void knote_release(struct knote *kn); 115 void knote_activate(struct knote *kn); 116 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 117 int idx, int purge); 118 119 void filt_kqdetach(struct knote *kn); 120 int filt_kqueue(struct knote *kn, long hint); 121 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 122 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 123 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 124 int filt_procattach(struct knote *kn); 125 void filt_procdetach(struct knote *kn); 126 int filt_proc(struct knote *kn, long hint); 127 int filt_sigattach(struct knote *kn); 128 void filt_sigdetach(struct knote *kn); 129 int filt_signal(struct knote *kn, long hint); 130 int filt_fileattach(struct knote *kn); 131 void filt_timerexpire(void *knx); 132 int filt_timerattach(struct knote *kn); 133 void filt_timerdetach(struct knote *kn); 134 int filt_timermodify(struct kevent *kev, struct knote *kn); 135 int filt_timerprocess(struct knote *kn, struct kevent *kev); 136 void filt_seltruedetach(struct knote *kn); 137 138 const struct filterops kqread_filtops = { 139 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 140 .f_attach = NULL, 141 .f_detach = filt_kqdetach, 142 .f_event = filt_kqueue, 143 .f_modify = filt_kqueuemodify, 144 .f_process = filt_kqueueprocess, 145 }; 146 147 const struct filterops proc_filtops = { 148 .f_flags = 0, 149 .f_attach = filt_procattach, 150 .f_detach = filt_procdetach, 151 .f_event = filt_proc, 152 }; 153 154 const struct filterops sig_filtops = { 155 .f_flags = 0, 156 .f_attach = filt_sigattach, 157 .f_detach = filt_sigdetach, 158 .f_event = filt_signal, 159 }; 160 161 const struct filterops file_filtops = { 162 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 163 .f_attach = filt_fileattach, 164 .f_detach = NULL, 165 .f_event = NULL, 166 }; 167 168 const struct filterops timer_filtops = { 169 .f_flags = 0, 170 .f_attach = filt_timerattach, 171 .f_detach = filt_timerdetach, 172 .f_event = NULL, 173 .f_modify = filt_timermodify, 174 .f_process = filt_timerprocess, 175 }; 176 177 struct pool knote_pool; 178 struct pool kqueue_pool; 179 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 180 int kq_ntimeouts = 0; 181 int kq_timeoutmax = (4 * 1024); 182 183 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 184 185 /* 186 * Table for all system-defined filters. 187 */ 188 const struct filterops *const sysfilt_ops[] = { 189 &file_filtops, /* EVFILT_READ */ 190 &file_filtops, /* EVFILT_WRITE */ 191 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 192 &file_filtops, /* EVFILT_VNODE */ 193 &proc_filtops, /* EVFILT_PROC */ 194 &sig_filtops, /* EVFILT_SIGNAL */ 195 &timer_filtops, /* EVFILT_TIMER */ 196 &file_filtops, /* EVFILT_DEVICE */ 197 &file_filtops, /* EVFILT_EXCEPT */ 198 }; 199 200 void 201 KQREF(struct kqueue *kq) 202 { 203 refcnt_take(&kq->kq_refcnt); 204 } 205 206 void 207 KQRELE(struct kqueue *kq) 208 { 209 struct filedesc *fdp; 210 211 if (refcnt_rele(&kq->kq_refcnt) == 0) 212 return; 213 214 fdp = kq->kq_fdp; 215 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 216 LIST_REMOVE(kq, kq_next); 217 } else { 218 fdplock(fdp); 219 LIST_REMOVE(kq, kq_next); 220 fdpunlock(fdp); 221 } 222 223 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 224 KASSERT(kq->kq_nknotes == 0); 225 226 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 227 sizeof(struct knlist)); 228 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 229 klist_free(&kq->kq_klist); 230 pool_put(&kqueue_pool, kq); 231 } 232 233 void 234 kqueue_init(void) 235 { 236 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 237 PR_WAITOK, "kqueuepl", NULL); 238 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 239 PR_WAITOK, "knotepl", NULL); 240 } 241 242 void 243 kqueue_init_percpu(void) 244 { 245 pool_cache_init(&knote_pool); 246 } 247 248 int 249 filt_fileattach(struct knote *kn) 250 { 251 struct file *fp = kn->kn_fp; 252 253 return fp->f_ops->fo_kqfilter(fp, kn); 254 } 255 256 int 257 kqueue_kqfilter(struct file *fp, struct knote *kn) 258 { 259 struct kqueue *kq = kn->kn_fp->f_data; 260 261 if (kn->kn_filter != EVFILT_READ) 262 return (EINVAL); 263 264 kn->kn_fop = &kqread_filtops; 265 klist_insert(&kq->kq_klist, kn); 266 return (0); 267 } 268 269 void 270 filt_kqdetach(struct knote *kn) 271 { 272 struct kqueue *kq = kn->kn_fp->f_data; 273 274 klist_remove(&kq->kq_klist, kn); 275 } 276 277 int 278 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 279 { 280 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 281 282 kn->kn_data = kq->kq_count; 283 284 return (kn->kn_data > 0); 285 } 286 287 int 288 filt_kqueue(struct knote *kn, long hint) 289 { 290 struct kqueue *kq = kn->kn_fp->f_data; 291 int active; 292 293 mtx_enter(&kq->kq_lock); 294 active = filt_kqueue_common(kn, kq); 295 mtx_leave(&kq->kq_lock); 296 297 return (active); 298 } 299 300 int 301 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 302 { 303 struct kqueue *kq = kn->kn_fp->f_data; 304 int active; 305 306 mtx_enter(&kq->kq_lock); 307 knote_assign(kev, kn); 308 active = filt_kqueue_common(kn, kq); 309 mtx_leave(&kq->kq_lock); 310 311 return (active); 312 } 313 314 int 315 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 316 { 317 struct kqueue *kq = kn->kn_fp->f_data; 318 int active; 319 320 mtx_enter(&kq->kq_lock); 321 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 322 active = 1; 323 else 324 active = filt_kqueue_common(kn, kq); 325 if (active) 326 knote_submit(kn, kev); 327 mtx_leave(&kq->kq_lock); 328 329 return (active); 330 } 331 332 int 333 filt_procattach(struct knote *kn) 334 { 335 struct process *pr; 336 int s; 337 338 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 339 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 340 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 341 342 if (kn->kn_id > PID_MAX) 343 return ESRCH; 344 345 pr = prfind(kn->kn_id); 346 if (pr == NULL) 347 return (ESRCH); 348 349 /* exiting processes can't be specified */ 350 if (pr->ps_flags & PS_EXITING) 351 return (ESRCH); 352 353 kn->kn_ptr.p_process = pr; 354 kn->kn_flags |= EV_CLEAR; /* automatically set */ 355 356 /* 357 * internal flag indicating registration done by kernel 358 */ 359 if (kn->kn_flags & EV_FLAG1) { 360 kn->kn_data = kn->kn_sdata; /* ppid */ 361 kn->kn_fflags = NOTE_CHILD; 362 kn->kn_flags &= ~EV_FLAG1; 363 } 364 365 s = splhigh(); 366 klist_insert_locked(&pr->ps_klist, kn); 367 splx(s); 368 369 return (0); 370 } 371 372 /* 373 * The knote may be attached to a different process, which may exit, 374 * leaving nothing for the knote to be attached to. So when the process 375 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 376 * it will be deleted when read out. However, as part of the knote deletion, 377 * this routine is called, so a check is needed to avoid actually performing 378 * a detach, because the original process does not exist any more. 379 */ 380 void 381 filt_procdetach(struct knote *kn) 382 { 383 struct kqueue *kq = kn->kn_kq; 384 struct process *pr = kn->kn_ptr.p_process; 385 int s, status; 386 387 mtx_enter(&kq->kq_lock); 388 status = kn->kn_status; 389 mtx_leave(&kq->kq_lock); 390 391 if (status & KN_DETACHED) 392 return; 393 394 s = splhigh(); 395 klist_remove_locked(&pr->ps_klist, kn); 396 splx(s); 397 } 398 399 int 400 filt_proc(struct knote *kn, long hint) 401 { 402 struct kqueue *kq = kn->kn_kq; 403 u_int event; 404 405 /* 406 * mask off extra data 407 */ 408 event = (u_int)hint & NOTE_PCTRLMASK; 409 410 /* 411 * if the user is interested in this event, record it. 412 */ 413 if (kn->kn_sfflags & event) 414 kn->kn_fflags |= event; 415 416 /* 417 * process is gone, so flag the event as finished and remove it 418 * from the process's klist 419 */ 420 if (event == NOTE_EXIT) { 421 struct process *pr = kn->kn_ptr.p_process; 422 int s; 423 424 mtx_enter(&kq->kq_lock); 425 kn->kn_status |= KN_DETACHED; 426 mtx_leave(&kq->kq_lock); 427 428 s = splhigh(); 429 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 430 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 431 klist_remove_locked(&pr->ps_klist, kn); 432 splx(s); 433 return (1); 434 } 435 436 /* 437 * process forked, and user wants to track the new process, 438 * so attach a new knote to it, and immediately report an 439 * event with the parent's pid. 440 */ 441 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 442 struct kevent kev; 443 int error; 444 445 /* 446 * register knote with new process. 447 */ 448 memset(&kev, 0, sizeof(kev)); 449 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 450 kev.filter = kn->kn_filter; 451 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 452 kev.fflags = kn->kn_sfflags; 453 kev.data = kn->kn_id; /* parent */ 454 kev.udata = kn->kn_udata; /* preserve udata */ 455 error = kqueue_register(kq, &kev, 0, NULL); 456 if (error) 457 kn->kn_fflags |= NOTE_TRACKERR; 458 } 459 460 return (kn->kn_fflags != 0); 461 } 462 463 /* 464 * signal knotes are shared with proc knotes, so we apply a mask to 465 * the hint in order to differentiate them from process hints. This 466 * could be avoided by using a signal-specific knote list, but probably 467 * isn't worth the trouble. 468 */ 469 int 470 filt_sigattach(struct knote *kn) 471 { 472 struct process *pr = curproc->p_p; 473 int s; 474 475 if (kn->kn_id >= NSIG) 476 return EINVAL; 477 478 kn->kn_ptr.p_process = pr; 479 kn->kn_flags |= EV_CLEAR; /* automatically set */ 480 481 s = splhigh(); 482 klist_insert_locked(&pr->ps_klist, kn); 483 splx(s); 484 485 return (0); 486 } 487 488 void 489 filt_sigdetach(struct knote *kn) 490 { 491 struct process *pr = kn->kn_ptr.p_process; 492 int s; 493 494 s = splhigh(); 495 klist_remove_locked(&pr->ps_klist, kn); 496 splx(s); 497 } 498 499 int 500 filt_signal(struct knote *kn, long hint) 501 { 502 503 if (hint & NOTE_SIGNAL) { 504 hint &= ~NOTE_SIGNAL; 505 506 if (kn->kn_id == hint) 507 kn->kn_data++; 508 } 509 return (kn->kn_data != 0); 510 } 511 512 #define NOTE_TIMER_UNITMASK \ 513 (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS) 514 515 static int 516 filt_timervalidate(int sfflags, int64_t sdata, struct timespec *ts) 517 { 518 if (sfflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME)) 519 return (EINVAL); 520 521 switch (sfflags & NOTE_TIMER_UNITMASK) { 522 case NOTE_SECONDS: 523 ts->tv_sec = sdata; 524 ts->tv_nsec = 0; 525 break; 526 case NOTE_MSECONDS: 527 ts->tv_sec = sdata / 1000; 528 ts->tv_nsec = (sdata % 1000) * 1000000; 529 break; 530 case NOTE_USECONDS: 531 ts->tv_sec = sdata / 1000000; 532 ts->tv_nsec = (sdata % 1000000) * 1000; 533 break; 534 case NOTE_NSECONDS: 535 ts->tv_sec = sdata / 1000000000; 536 ts->tv_nsec = sdata % 1000000000; 537 break; 538 default: 539 return (EINVAL); 540 } 541 542 return (0); 543 } 544 545 static void 546 filt_timeradd(struct knote *kn, struct timespec *ts) 547 { 548 struct timespec expiry, now; 549 struct timeout *to = kn->kn_hook; 550 int tticks; 551 552 if (kn->kn_sfflags & NOTE_ABSTIME) { 553 nanotime(&now); 554 if (timespeccmp(ts, &now, >)) { 555 timespecsub(ts, &now, &expiry); 556 /* XXX timeout_abs_ts with CLOCK_REALTIME */ 557 timeout_add(to, tstohz(&expiry)); 558 } else { 559 /* Expire immediately. */ 560 filt_timerexpire(kn); 561 } 562 return; 563 } 564 565 tticks = tstohz(ts); 566 /* Remove extra tick from tstohz() if timeout has fired before. */ 567 if (timeout_triggered(to)) 568 tticks--; 569 timeout_add(to, (tticks > 0) ? tticks : 1); 570 } 571 572 void 573 filt_timerexpire(void *knx) 574 { 575 struct timespec ts; 576 struct knote *kn = knx; 577 struct kqueue *kq = kn->kn_kq; 578 579 kn->kn_data++; 580 mtx_enter(&kq->kq_lock); 581 knote_activate(kn); 582 mtx_leave(&kq->kq_lock); 583 584 if ((kn->kn_flags & EV_ONESHOT) == 0 && 585 (kn->kn_sfflags & NOTE_ABSTIME) == 0) { 586 (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 587 filt_timeradd(kn, &ts); 588 } 589 } 590 591 /* 592 * data contains amount of time to sleep 593 */ 594 int 595 filt_timerattach(struct knote *kn) 596 { 597 struct timespec ts; 598 struct timeout *to; 599 int error; 600 601 error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 602 if (error != 0) 603 return (error); 604 605 if (kq_ntimeouts > kq_timeoutmax) 606 return (ENOMEM); 607 kq_ntimeouts++; 608 609 if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) 610 kn->kn_flags |= EV_CLEAR; /* automatically set */ 611 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 612 timeout_set(to, filt_timerexpire, kn); 613 kn->kn_hook = to; 614 filt_timeradd(kn, &ts); 615 616 return (0); 617 } 618 619 void 620 filt_timerdetach(struct knote *kn) 621 { 622 struct timeout *to; 623 624 to = (struct timeout *)kn->kn_hook; 625 timeout_del_barrier(to); 626 free(to, M_KEVENT, sizeof(*to)); 627 kq_ntimeouts--; 628 } 629 630 int 631 filt_timermodify(struct kevent *kev, struct knote *kn) 632 { 633 struct timespec ts; 634 struct kqueue *kq = kn->kn_kq; 635 struct timeout *to = kn->kn_hook; 636 int error; 637 638 error = filt_timervalidate(kev->fflags, kev->data, &ts); 639 if (error != 0) { 640 kev->flags |= EV_ERROR; 641 kev->data = error; 642 return (0); 643 } 644 645 /* Reset the timer. Any pending events are discarded. */ 646 647 timeout_del_barrier(to); 648 649 mtx_enter(&kq->kq_lock); 650 if (kn->kn_status & KN_QUEUED) 651 knote_dequeue(kn); 652 kn->kn_status &= ~KN_ACTIVE; 653 mtx_leave(&kq->kq_lock); 654 655 kn->kn_data = 0; 656 knote_assign(kev, kn); 657 /* Reinit timeout to invoke tick adjustment again. */ 658 timeout_set(to, filt_timerexpire, kn); 659 filt_timeradd(kn, &ts); 660 661 return (0); 662 } 663 664 int 665 filt_timerprocess(struct knote *kn, struct kevent *kev) 666 { 667 int active, s; 668 669 s = splsoftclock(); 670 active = (kn->kn_data != 0); 671 if (active) 672 knote_submit(kn, kev); 673 splx(s); 674 675 return (active); 676 } 677 678 679 /* 680 * filt_seltrue: 681 * 682 * This filter "event" routine simulates seltrue(). 683 */ 684 int 685 filt_seltrue(struct knote *kn, long hint) 686 { 687 688 /* 689 * We don't know how much data can be read/written, 690 * but we know that it *can* be. This is about as 691 * good as select/poll does as well. 692 */ 693 kn->kn_data = 0; 694 return (1); 695 } 696 697 int 698 filt_seltruemodify(struct kevent *kev, struct knote *kn) 699 { 700 knote_assign(kev, kn); 701 return (kn->kn_fop->f_event(kn, 0)); 702 } 703 704 int 705 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 706 { 707 int active; 708 709 active = kn->kn_fop->f_event(kn, 0); 710 if (active) 711 knote_submit(kn, kev); 712 return (active); 713 } 714 715 /* 716 * This provides full kqfilter entry for device switch tables, which 717 * has same effect as filter using filt_seltrue() as filter method. 718 */ 719 void 720 filt_seltruedetach(struct knote *kn) 721 { 722 /* Nothing to do */ 723 } 724 725 const struct filterops seltrue_filtops = { 726 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 727 .f_attach = NULL, 728 .f_detach = filt_seltruedetach, 729 .f_event = filt_seltrue, 730 .f_modify = filt_seltruemodify, 731 .f_process = filt_seltrueprocess, 732 }; 733 734 int 735 seltrue_kqfilter(dev_t dev, struct knote *kn) 736 { 737 switch (kn->kn_filter) { 738 case EVFILT_READ: 739 case EVFILT_WRITE: 740 kn->kn_fop = &seltrue_filtops; 741 break; 742 default: 743 return (EINVAL); 744 } 745 746 /* Nothing more to do */ 747 return (0); 748 } 749 750 static int 751 filt_dead(struct knote *kn, long hint) 752 { 753 if (kn->kn_filter == EVFILT_EXCEPT) { 754 /* 755 * Do not deliver event because there is no out-of-band data. 756 * However, let HUP condition pass for poll(2). 757 */ 758 if ((kn->kn_flags & __EV_POLL) == 0) { 759 kn->kn_flags |= EV_DISABLE; 760 return (0); 761 } 762 } 763 764 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 765 if (kn->kn_flags & __EV_POLL) 766 kn->kn_flags |= __EV_HUP; 767 kn->kn_data = 0; 768 return (1); 769 } 770 771 static void 772 filt_deaddetach(struct knote *kn) 773 { 774 /* Nothing to do */ 775 } 776 777 const struct filterops dead_filtops = { 778 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 779 .f_attach = NULL, 780 .f_detach = filt_deaddetach, 781 .f_event = filt_dead, 782 .f_modify = filt_seltruemodify, 783 .f_process = filt_seltrueprocess, 784 }; 785 786 static int 787 filt_badfd(struct knote *kn, long hint) 788 { 789 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 790 kn->kn_data = EBADF; 791 return (1); 792 } 793 794 /* For use with kqpoll. */ 795 const struct filterops badfd_filtops = { 796 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 797 .f_attach = NULL, 798 .f_detach = filt_deaddetach, 799 .f_event = filt_badfd, 800 .f_modify = filt_seltruemodify, 801 .f_process = filt_seltrueprocess, 802 }; 803 804 static int 805 filter_attach(struct knote *kn) 806 { 807 int error; 808 809 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 810 error = kn->kn_fop->f_attach(kn); 811 } else { 812 KERNEL_LOCK(); 813 error = kn->kn_fop->f_attach(kn); 814 KERNEL_UNLOCK(); 815 } 816 return (error); 817 } 818 819 static void 820 filter_detach(struct knote *kn) 821 { 822 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 823 kn->kn_fop->f_detach(kn); 824 } else { 825 KERNEL_LOCK(); 826 kn->kn_fop->f_detach(kn); 827 KERNEL_UNLOCK(); 828 } 829 } 830 831 static int 832 filter_event(struct knote *kn, long hint) 833 { 834 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 835 KERNEL_ASSERT_LOCKED(); 836 837 return (kn->kn_fop->f_event(kn, hint)); 838 } 839 840 static int 841 filter_modify(struct kevent *kev, struct knote *kn) 842 { 843 int active, s; 844 845 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 846 active = kn->kn_fop->f_modify(kev, kn); 847 } else { 848 KERNEL_LOCK(); 849 if (kn->kn_fop->f_modify != NULL) { 850 active = kn->kn_fop->f_modify(kev, kn); 851 } else { 852 s = splhigh(); 853 active = knote_modify(kev, kn); 854 splx(s); 855 } 856 KERNEL_UNLOCK(); 857 } 858 return (active); 859 } 860 861 static int 862 filter_process(struct knote *kn, struct kevent *kev) 863 { 864 int active, s; 865 866 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 867 active = kn->kn_fop->f_process(kn, kev); 868 } else { 869 KERNEL_LOCK(); 870 if (kn->kn_fop->f_process != NULL) { 871 active = kn->kn_fop->f_process(kn, kev); 872 } else { 873 s = splhigh(); 874 active = knote_process(kn, kev); 875 splx(s); 876 } 877 KERNEL_UNLOCK(); 878 } 879 return (active); 880 } 881 882 /* 883 * Initialize the current thread for poll/select system call. 884 * num indicates the number of serials that the system call may utilize. 885 * After this function, the valid range of serials is 886 * p_kq_serial <= x < p_kq_serial + num. 887 */ 888 void 889 kqpoll_init(unsigned int num) 890 { 891 struct proc *p = curproc; 892 struct filedesc *fdp; 893 894 if (p->p_kq == NULL) { 895 p->p_kq = kqueue_alloc(p->p_fd); 896 p->p_kq_serial = arc4random(); 897 fdp = p->p_fd; 898 fdplock(fdp); 899 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 900 fdpunlock(fdp); 901 } 902 903 if (p->p_kq_serial + num < p->p_kq_serial) { 904 /* Serial is about to wrap. Clear all attached knotes. */ 905 kqueue_purge(p, p->p_kq); 906 p->p_kq_serial = 0; 907 } 908 } 909 910 /* 911 * Finish poll/select system call. 912 * num must have the same value that was used with kqpoll_init(). 913 */ 914 void 915 kqpoll_done(unsigned int num) 916 { 917 struct proc *p = curproc; 918 struct kqueue *kq = p->p_kq; 919 920 KASSERT(p->p_kq != NULL); 921 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 922 923 p->p_kq_serial += num; 924 925 /* 926 * Because of kn_pollid key, a thread can in principle allocate 927 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 928 * with suitably varying pollfd arrays. 929 * Prevent such a large allocation by clearing knotes eagerly 930 * if there are too many of them. 931 * 932 * A small multiple of kq_knlistsize should give enough margin 933 * that eager clearing is infrequent, or does not happen at all, 934 * with normal programs. 935 * A single pollfd entry can use up to three knotes. 936 * Typically there is no significant overlap of fd and events 937 * between different entries in the pollfd array. 938 */ 939 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 940 kqueue_purge(p, kq); 941 } 942 943 void 944 kqpoll_exit(void) 945 { 946 struct proc *p = curproc; 947 948 if (p->p_kq == NULL) 949 return; 950 951 kqueue_purge(p, p->p_kq); 952 kqueue_terminate(p, p->p_kq); 953 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 954 KQRELE(p->p_kq); 955 p->p_kq = NULL; 956 } 957 958 struct kqueue * 959 kqueue_alloc(struct filedesc *fdp) 960 { 961 struct kqueue *kq; 962 963 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 964 refcnt_init(&kq->kq_refcnt); 965 kq->kq_fdp = fdp; 966 TAILQ_INIT(&kq->kq_head); 967 mtx_init(&kq->kq_lock, IPL_HIGH); 968 task_set(&kq->kq_task, kqueue_task, kq); 969 klist_init_mutex(&kq->kq_klist, &kqueue_klist_lock); 970 971 return (kq); 972 } 973 974 int 975 dokqueue(struct proc *p, int flags, register_t *retval) 976 { 977 struct filedesc *fdp = p->p_fd; 978 struct kqueue *kq; 979 struct file *fp; 980 int cloexec, error, fd; 981 982 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 983 984 kq = kqueue_alloc(fdp); 985 986 fdplock(fdp); 987 error = falloc(p, &fp, &fd); 988 if (error) 989 goto out; 990 fp->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 991 fp->f_type = DTYPE_KQUEUE; 992 fp->f_ops = &kqueueops; 993 fp->f_data = kq; 994 *retval = fd; 995 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 996 kq = NULL; 997 fdinsert(fdp, fd, cloexec, fp); 998 FRELE(fp, p); 999 out: 1000 fdpunlock(fdp); 1001 if (kq != NULL) 1002 pool_put(&kqueue_pool, kq); 1003 return (error); 1004 } 1005 1006 int 1007 sys_kqueue(struct proc *p, void *v, register_t *retval) 1008 { 1009 return (dokqueue(p, 0, retval)); 1010 } 1011 1012 int 1013 sys_kqueue1(struct proc *p, void *v, register_t *retval) 1014 { 1015 struct sys_kqueue1_args /* { 1016 syscallarg(int) flags; 1017 } */ *uap = v; 1018 1019 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 1020 return (EINVAL); 1021 return (dokqueue(p, SCARG(uap, flags), retval)); 1022 } 1023 1024 int 1025 sys_kevent(struct proc *p, void *v, register_t *retval) 1026 { 1027 struct kqueue_scan_state scan; 1028 struct filedesc* fdp = p->p_fd; 1029 struct sys_kevent_args /* { 1030 syscallarg(int) fd; 1031 syscallarg(const struct kevent *) changelist; 1032 syscallarg(int) nchanges; 1033 syscallarg(struct kevent *) eventlist; 1034 syscallarg(int) nevents; 1035 syscallarg(const struct timespec *) timeout; 1036 } */ *uap = v; 1037 struct kevent *kevp; 1038 struct kqueue *kq; 1039 struct file *fp; 1040 struct timespec ts; 1041 struct timespec *tsp = NULL; 1042 int i, n, nerrors, error; 1043 int ready, total; 1044 struct kevent kev[KQ_NEVENTS]; 1045 1046 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 1047 return (EBADF); 1048 1049 if (fp->f_type != DTYPE_KQUEUE) { 1050 error = EBADF; 1051 goto done; 1052 } 1053 1054 if (SCARG(uap, timeout) != NULL) { 1055 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 1056 if (error) 1057 goto done; 1058 #ifdef KTRACE 1059 if (KTRPOINT(p, KTR_STRUCT)) 1060 ktrreltimespec(p, &ts); 1061 #endif 1062 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 1063 error = EINVAL; 1064 goto done; 1065 } 1066 tsp = &ts; 1067 } 1068 1069 kq = fp->f_data; 1070 nerrors = 0; 1071 1072 while ((n = SCARG(uap, nchanges)) > 0) { 1073 if (n > nitems(kev)) 1074 n = nitems(kev); 1075 error = copyin(SCARG(uap, changelist), kev, 1076 n * sizeof(struct kevent)); 1077 if (error) 1078 goto done; 1079 #ifdef KTRACE 1080 if (KTRPOINT(p, KTR_STRUCT)) 1081 ktrevent(p, kev, n); 1082 #endif 1083 for (i = 0; i < n; i++) { 1084 kevp = &kev[i]; 1085 kevp->flags &= ~EV_SYSFLAGS; 1086 error = kqueue_register(kq, kevp, 0, p); 1087 if (error || (kevp->flags & EV_RECEIPT)) { 1088 if (SCARG(uap, nevents) != 0) { 1089 kevp->flags = EV_ERROR; 1090 kevp->data = error; 1091 copyout(kevp, SCARG(uap, eventlist), 1092 sizeof(*kevp)); 1093 SCARG(uap, eventlist)++; 1094 SCARG(uap, nevents)--; 1095 nerrors++; 1096 } else { 1097 goto done; 1098 } 1099 } 1100 } 1101 SCARG(uap, nchanges) -= n; 1102 SCARG(uap, changelist) += n; 1103 } 1104 if (nerrors) { 1105 *retval = nerrors; 1106 error = 0; 1107 goto done; 1108 } 1109 1110 kqueue_scan_setup(&scan, kq); 1111 FRELE(fp, p); 1112 /* 1113 * Collect as many events as we can. The timeout on successive 1114 * loops is disabled (kqueue_scan() becomes non-blocking). 1115 */ 1116 total = 0; 1117 error = 0; 1118 while ((n = SCARG(uap, nevents) - total) > 0) { 1119 if (n > nitems(kev)) 1120 n = nitems(kev); 1121 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 1122 if (ready == 0) 1123 break; 1124 error = copyout(kev, SCARG(uap, eventlist) + total, 1125 sizeof(struct kevent) * ready); 1126 #ifdef KTRACE 1127 if (KTRPOINT(p, KTR_STRUCT)) 1128 ktrevent(p, kev, ready); 1129 #endif 1130 total += ready; 1131 if (error || ready < n) 1132 break; 1133 } 1134 kqueue_scan_finish(&scan); 1135 *retval = total; 1136 return (error); 1137 1138 done: 1139 FRELE(fp, p); 1140 return (error); 1141 } 1142 1143 #ifdef KQUEUE_DEBUG 1144 void 1145 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1146 { 1147 struct knote *kn; 1148 int count = 0, nmarker = 0; 1149 1150 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1151 1152 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1153 if (kn->kn_filter == EVFILT_MARKER) { 1154 if ((kn->kn_status & KN_QUEUED) != 0) 1155 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1156 func, line, kq, kn); 1157 nmarker++; 1158 } else { 1159 if ((kn->kn_status & KN_ACTIVE) == 0) 1160 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1161 func, line, kq, kn); 1162 if ((kn->kn_status & KN_QUEUED) == 0) 1163 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1164 func, line, kq, kn); 1165 if (kn->kn_kq != kq) 1166 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1167 func, line, kq, kn, kn->kn_kq); 1168 count++; 1169 if (count > kq->kq_count) 1170 goto bad; 1171 } 1172 } 1173 if (count != kq->kq_count) { 1174 bad: 1175 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1176 func, line, kq, kq->kq_count, count, nmarker); 1177 } 1178 } 1179 #endif 1180 1181 int 1182 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1183 struct proc *p) 1184 { 1185 struct filedesc *fdp = kq->kq_fdp; 1186 const struct filterops *fops = NULL; 1187 struct file *fp = NULL; 1188 struct knote *kn = NULL, *newkn = NULL; 1189 struct knlist *list = NULL; 1190 int active, error = 0; 1191 1192 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1193 1194 if (kev->filter < 0) { 1195 if (kev->filter + EVFILT_SYSCOUNT < 0) 1196 return (EINVAL); 1197 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1198 } 1199 1200 if (fops == NULL) { 1201 /* 1202 * XXX 1203 * filter attach routine is responsible for ensuring that 1204 * the identifier can be attached to it. 1205 */ 1206 return (EINVAL); 1207 } 1208 1209 if (fops->f_flags & FILTEROP_ISFD) { 1210 /* validate descriptor */ 1211 if (kev->ident > INT_MAX) 1212 return (EBADF); 1213 } 1214 1215 if (kev->flags & EV_ADD) 1216 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1217 1218 again: 1219 if (fops->f_flags & FILTEROP_ISFD) { 1220 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1221 error = EBADF; 1222 goto done; 1223 } 1224 mtx_enter(&kq->kq_lock); 1225 if (kev->flags & EV_ADD) 1226 kqueue_expand_list(kq, kev->ident); 1227 if (kev->ident < kq->kq_knlistsize) 1228 list = &kq->kq_knlist[kev->ident]; 1229 } else { 1230 mtx_enter(&kq->kq_lock); 1231 if (kev->flags & EV_ADD) 1232 kqueue_expand_hash(kq); 1233 if (kq->kq_knhashmask != 0) { 1234 list = &kq->kq_knhash[ 1235 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1236 } 1237 } 1238 if (list != NULL) { 1239 SLIST_FOREACH(kn, list, kn_link) { 1240 if (kev->filter == kn->kn_filter && 1241 kev->ident == kn->kn_id && 1242 pollid == kn->kn_pollid) { 1243 if (!knote_acquire(kn, NULL, 0)) { 1244 /* knote_acquire() has released 1245 * kq_lock. */ 1246 if (fp != NULL) { 1247 FRELE(fp, p); 1248 fp = NULL; 1249 } 1250 goto again; 1251 } 1252 break; 1253 } 1254 } 1255 } 1256 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1257 1258 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1259 mtx_leave(&kq->kq_lock); 1260 error = ENOENT; 1261 goto done; 1262 } 1263 1264 /* 1265 * kn now contains the matching knote, or NULL if no match. 1266 */ 1267 if (kev->flags & EV_ADD) { 1268 if (kn == NULL) { 1269 kn = newkn; 1270 newkn = NULL; 1271 kn->kn_status = KN_PROCESSING; 1272 kn->kn_fp = fp; 1273 kn->kn_kq = kq; 1274 kn->kn_fop = fops; 1275 1276 /* 1277 * apply reference count to knote structure, and 1278 * do not release it at the end of this routine. 1279 */ 1280 fp = NULL; 1281 1282 kn->kn_sfflags = kev->fflags; 1283 kn->kn_sdata = kev->data; 1284 kev->fflags = 0; 1285 kev->data = 0; 1286 kn->kn_kevent = *kev; 1287 kn->kn_pollid = pollid; 1288 1289 knote_attach(kn); 1290 mtx_leave(&kq->kq_lock); 1291 1292 error = filter_attach(kn); 1293 if (error != 0) { 1294 knote_drop(kn, p); 1295 goto done; 1296 } 1297 1298 /* 1299 * If this is a file descriptor filter, check if 1300 * fd was closed while the knote was being added. 1301 * knote_fdclose() has missed kn if the function 1302 * ran before kn appeared in kq_knlist. 1303 */ 1304 if ((fops->f_flags & FILTEROP_ISFD) && 1305 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1306 /* 1307 * Drop the knote silently without error 1308 * because another thread might already have 1309 * seen it. This corresponds to the insert 1310 * happening in full before the close. 1311 */ 1312 filter_detach(kn); 1313 knote_drop(kn, p); 1314 goto done; 1315 } 1316 1317 /* Check if there is a pending event. */ 1318 active = filter_process(kn, NULL); 1319 mtx_enter(&kq->kq_lock); 1320 if (active) 1321 knote_activate(kn); 1322 } else if (kn->kn_fop == &badfd_filtops) { 1323 /* 1324 * Nothing expects this badfd knote any longer. 1325 * Drop it to make room for the new knote and retry. 1326 */ 1327 KASSERT(kq == p->p_kq); 1328 mtx_leave(&kq->kq_lock); 1329 filter_detach(kn); 1330 knote_drop(kn, p); 1331 1332 KASSERT(fp != NULL); 1333 FRELE(fp, p); 1334 fp = NULL; 1335 1336 goto again; 1337 } else { 1338 /* 1339 * The user may change some filter values after the 1340 * initial EV_ADD, but doing so will not reset any 1341 * filters which have already been triggered. 1342 */ 1343 mtx_leave(&kq->kq_lock); 1344 active = filter_modify(kev, kn); 1345 mtx_enter(&kq->kq_lock); 1346 if (active) 1347 knote_activate(kn); 1348 if (kev->flags & EV_ERROR) { 1349 error = kev->data; 1350 goto release; 1351 } 1352 } 1353 } else if (kev->flags & EV_DELETE) { 1354 mtx_leave(&kq->kq_lock); 1355 filter_detach(kn); 1356 knote_drop(kn, p); 1357 goto done; 1358 } 1359 1360 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1361 kn->kn_status |= KN_DISABLED; 1362 1363 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1364 kn->kn_status &= ~KN_DISABLED; 1365 mtx_leave(&kq->kq_lock); 1366 /* Check if there is a pending event. */ 1367 active = filter_process(kn, NULL); 1368 mtx_enter(&kq->kq_lock); 1369 if (active) 1370 knote_activate(kn); 1371 } 1372 1373 release: 1374 knote_release(kn); 1375 mtx_leave(&kq->kq_lock); 1376 done: 1377 if (fp != NULL) 1378 FRELE(fp, p); 1379 if (newkn != NULL) 1380 pool_put(&knote_pool, newkn); 1381 return (error); 1382 } 1383 1384 int 1385 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1386 { 1387 struct timespec elapsed, start, stop; 1388 uint64_t nsecs; 1389 int error; 1390 1391 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1392 1393 if (tsp != NULL) { 1394 getnanouptime(&start); 1395 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1396 } else 1397 nsecs = INFSLP; 1398 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1399 "kqread", nsecs); 1400 if (tsp != NULL) { 1401 getnanouptime(&stop); 1402 timespecsub(&stop, &start, &elapsed); 1403 timespecsub(tsp, &elapsed, tsp); 1404 if (tsp->tv_sec < 0) 1405 timespecclear(tsp); 1406 } 1407 1408 return (error); 1409 } 1410 1411 /* 1412 * Scan the kqueue, blocking if necessary until the target time is reached. 1413 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1414 * 0 we do not block at all. 1415 */ 1416 int 1417 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1418 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1419 { 1420 struct kqueue *kq = scan->kqs_kq; 1421 struct knote *kn; 1422 int error = 0, nkev = 0; 1423 int reinserted; 1424 1425 if (maxevents == 0) 1426 goto done; 1427 retry: 1428 KASSERT(nkev == 0); 1429 1430 error = 0; 1431 reinserted = 0; 1432 1433 mtx_enter(&kq->kq_lock); 1434 1435 if (kq->kq_state & KQ_DYING) { 1436 mtx_leave(&kq->kq_lock); 1437 error = EBADF; 1438 goto done; 1439 } 1440 1441 if (kq->kq_count == 0) { 1442 /* 1443 * Successive loops are only necessary if there are more 1444 * ready events to gather, so they don't need to block. 1445 */ 1446 if ((tsp != NULL && !timespecisset(tsp)) || 1447 scan->kqs_nevent != 0) { 1448 mtx_leave(&kq->kq_lock); 1449 error = 0; 1450 goto done; 1451 } 1452 kq->kq_state |= KQ_SLEEP; 1453 error = kqueue_sleep(kq, tsp); 1454 /* kqueue_sleep() has released kq_lock. */ 1455 if (error == 0 || error == EWOULDBLOCK) 1456 goto retry; 1457 /* don't restart after signals... */ 1458 if (error == ERESTART) 1459 error = EINTR; 1460 goto done; 1461 } 1462 1463 /* 1464 * Put the end marker in the queue to limit the scan to the events 1465 * that are currently active. This prevents events from being 1466 * recollected if they reactivate during scan. 1467 * 1468 * If a partial scan has been performed already but no events have 1469 * been collected, reposition the end marker to make any new events 1470 * reachable. 1471 */ 1472 if (!scan->kqs_queued) { 1473 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1474 scan->kqs_queued = 1; 1475 } else if (scan->kqs_nevent == 0) { 1476 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1477 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1478 } 1479 1480 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1481 while (nkev < maxevents) { 1482 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1483 if (kn->kn_filter == EVFILT_MARKER) { 1484 if (kn == &scan->kqs_end) 1485 break; 1486 1487 /* Move start marker past another thread's marker. */ 1488 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1489 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1490 kn_tqe); 1491 continue; 1492 } 1493 1494 if (!knote_acquire(kn, NULL, 0)) { 1495 /* knote_acquire() has released kq_lock. */ 1496 mtx_enter(&kq->kq_lock); 1497 continue; 1498 } 1499 1500 kqueue_check(kq); 1501 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1502 kn->kn_status &= ~KN_QUEUED; 1503 kq->kq_count--; 1504 kqueue_check(kq); 1505 1506 if (kn->kn_status & KN_DISABLED) { 1507 knote_release(kn); 1508 continue; 1509 } 1510 1511 mtx_leave(&kq->kq_lock); 1512 1513 /* Drop expired kqpoll knotes. */ 1514 if (p->p_kq == kq && 1515 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1516 filter_detach(kn); 1517 knote_drop(kn, p); 1518 mtx_enter(&kq->kq_lock); 1519 continue; 1520 } 1521 1522 /* 1523 * Invalidate knotes whose vnodes have been revoked. 1524 * This is a workaround; it is tricky to clear existing 1525 * knotes and prevent new ones from being registered 1526 * with the current revocation mechanism. 1527 */ 1528 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1529 kn->kn_fp != NULL && 1530 kn->kn_fp->f_type == DTYPE_VNODE) { 1531 struct vnode *vp = kn->kn_fp->f_data; 1532 1533 if (__predict_false(vp->v_op == &dead_vops && 1534 kn->kn_fop != &dead_filtops)) { 1535 filter_detach(kn); 1536 kn->kn_fop = &dead_filtops; 1537 1538 /* 1539 * Check if the event should be delivered. 1540 * Use f_event directly because this is 1541 * a special situation. 1542 */ 1543 if (kn->kn_fop->f_event(kn, 0) == 0) { 1544 filter_detach(kn); 1545 knote_drop(kn, p); 1546 mtx_enter(&kq->kq_lock); 1547 continue; 1548 } 1549 } 1550 } 1551 1552 memset(kevp, 0, sizeof(*kevp)); 1553 if (filter_process(kn, kevp) == 0) { 1554 mtx_enter(&kq->kq_lock); 1555 if ((kn->kn_status & KN_QUEUED) == 0) 1556 kn->kn_status &= ~KN_ACTIVE; 1557 knote_release(kn); 1558 kqueue_check(kq); 1559 continue; 1560 } 1561 1562 /* 1563 * Post-event action on the note 1564 */ 1565 if (kevp->flags & EV_ONESHOT) { 1566 filter_detach(kn); 1567 knote_drop(kn, p); 1568 mtx_enter(&kq->kq_lock); 1569 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1570 mtx_enter(&kq->kq_lock); 1571 if (kevp->flags & EV_DISPATCH) 1572 kn->kn_status |= KN_DISABLED; 1573 if ((kn->kn_status & KN_QUEUED) == 0) 1574 kn->kn_status &= ~KN_ACTIVE; 1575 knote_release(kn); 1576 } else { 1577 mtx_enter(&kq->kq_lock); 1578 if ((kn->kn_status & KN_QUEUED) == 0) { 1579 kqueue_check(kq); 1580 kq->kq_count++; 1581 kn->kn_status |= KN_QUEUED; 1582 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1583 /* Wakeup is done after loop. */ 1584 reinserted = 1; 1585 } 1586 knote_release(kn); 1587 } 1588 kqueue_check(kq); 1589 1590 kevp++; 1591 nkev++; 1592 scan->kqs_nevent++; 1593 } 1594 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1595 if (reinserted && kq->kq_count != 0) 1596 kqueue_wakeup(kq); 1597 mtx_leave(&kq->kq_lock); 1598 if (scan->kqs_nevent == 0) 1599 goto retry; 1600 done: 1601 *errorp = error; 1602 return (nkev); 1603 } 1604 1605 void 1606 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1607 { 1608 memset(scan, 0, sizeof(*scan)); 1609 1610 KQREF(kq); 1611 scan->kqs_kq = kq; 1612 scan->kqs_start.kn_filter = EVFILT_MARKER; 1613 scan->kqs_start.kn_status = KN_PROCESSING; 1614 scan->kqs_end.kn_filter = EVFILT_MARKER; 1615 scan->kqs_end.kn_status = KN_PROCESSING; 1616 } 1617 1618 void 1619 kqueue_scan_finish(struct kqueue_scan_state *scan) 1620 { 1621 struct kqueue *kq = scan->kqs_kq; 1622 1623 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1624 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1625 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1626 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1627 1628 if (scan->kqs_queued) { 1629 scan->kqs_queued = 0; 1630 mtx_enter(&kq->kq_lock); 1631 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1632 mtx_leave(&kq->kq_lock); 1633 } 1634 KQRELE(kq); 1635 } 1636 1637 /* 1638 * XXX 1639 * This could be expanded to call kqueue_scan, if desired. 1640 */ 1641 int 1642 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1643 { 1644 return (ENXIO); 1645 } 1646 1647 int 1648 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1649 { 1650 return (ENXIO); 1651 } 1652 1653 int 1654 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1655 { 1656 return (ENOTTY); 1657 } 1658 1659 int 1660 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1661 { 1662 struct kqueue *kq = fp->f_data; 1663 1664 memset(st, 0, sizeof(*st)); 1665 st->st_size = kq->kq_count; /* unlocked read */ 1666 st->st_blksize = sizeof(struct kevent); 1667 st->st_mode = S_IFIFO; 1668 return (0); 1669 } 1670 1671 void 1672 kqueue_purge(struct proc *p, struct kqueue *kq) 1673 { 1674 int i; 1675 1676 mtx_enter(&kq->kq_lock); 1677 for (i = 0; i < kq->kq_knlistsize; i++) 1678 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1679 if (kq->kq_knhashmask != 0) { 1680 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1681 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1682 } 1683 mtx_leave(&kq->kq_lock); 1684 } 1685 1686 void 1687 kqueue_terminate(struct proc *p, struct kqueue *kq) 1688 { 1689 struct knote *kn; 1690 int state; 1691 1692 mtx_enter(&kq->kq_lock); 1693 1694 /* 1695 * Any remaining entries should be scan markers. 1696 * They are removed when the ongoing scans finish. 1697 */ 1698 KASSERT(kq->kq_count == 0); 1699 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1700 KASSERT(kn->kn_filter == EVFILT_MARKER); 1701 1702 kq->kq_state |= KQ_DYING; 1703 state = kq->kq_state; 1704 kqueue_wakeup(kq); 1705 mtx_leave(&kq->kq_lock); 1706 1707 /* 1708 * Any knotes that were attached to this kqueue were deleted 1709 * by knote_fdclose() when this kqueue's file descriptor was closed. 1710 */ 1711 KASSERT(klist_empty(&kq->kq_klist)); 1712 if (state & KQ_TASK) 1713 taskq_del_barrier(systqmp, &kq->kq_task); 1714 } 1715 1716 int 1717 kqueue_close(struct file *fp, struct proc *p) 1718 { 1719 struct kqueue *kq = fp->f_data; 1720 1721 fp->f_data = NULL; 1722 1723 kqueue_purge(p, kq); 1724 kqueue_terminate(p, kq); 1725 1726 KQRELE(kq); 1727 1728 return (0); 1729 } 1730 1731 static void 1732 kqueue_task(void *arg) 1733 { 1734 struct kqueue *kq = arg; 1735 1736 knote(&kq->kq_klist, 0); 1737 } 1738 1739 void 1740 kqueue_wakeup(struct kqueue *kq) 1741 { 1742 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1743 1744 if (kq->kq_state & KQ_SLEEP) { 1745 kq->kq_state &= ~KQ_SLEEP; 1746 wakeup(kq); 1747 } 1748 if (!klist_empty(&kq->kq_klist)) { 1749 /* Defer activation to avoid recursion. */ 1750 kq->kq_state |= KQ_TASK; 1751 task_add(systqmp, &kq->kq_task); 1752 } 1753 } 1754 1755 static void 1756 kqueue_expand_hash(struct kqueue *kq) 1757 { 1758 struct knlist *hash; 1759 u_long hashmask; 1760 1761 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1762 1763 if (kq->kq_knhashmask == 0) { 1764 mtx_leave(&kq->kq_lock); 1765 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1766 mtx_enter(&kq->kq_lock); 1767 if (kq->kq_knhashmask == 0) { 1768 kq->kq_knhash = hash; 1769 kq->kq_knhashmask = hashmask; 1770 } else { 1771 /* Another thread has allocated the hash. */ 1772 mtx_leave(&kq->kq_lock); 1773 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1774 mtx_enter(&kq->kq_lock); 1775 } 1776 } 1777 } 1778 1779 static void 1780 kqueue_expand_list(struct kqueue *kq, int fd) 1781 { 1782 struct knlist *list, *olist; 1783 int size, osize; 1784 1785 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1786 1787 if (kq->kq_knlistsize <= fd) { 1788 size = kq->kq_knlistsize; 1789 mtx_leave(&kq->kq_lock); 1790 while (size <= fd) 1791 size += KQEXTENT; 1792 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1793 mtx_enter(&kq->kq_lock); 1794 if (kq->kq_knlistsize <= fd) { 1795 memcpy(list, kq->kq_knlist, 1796 kq->kq_knlistsize * sizeof(*list)); 1797 memset(&list[kq->kq_knlistsize], 0, 1798 (size - kq->kq_knlistsize) * sizeof(*list)); 1799 olist = kq->kq_knlist; 1800 osize = kq->kq_knlistsize; 1801 kq->kq_knlist = list; 1802 kq->kq_knlistsize = size; 1803 mtx_leave(&kq->kq_lock); 1804 free(olist, M_KEVENT, osize * sizeof(*list)); 1805 mtx_enter(&kq->kq_lock); 1806 } else { 1807 /* Another thread has expanded the list. */ 1808 mtx_leave(&kq->kq_lock); 1809 free(list, M_KEVENT, size * sizeof(*list)); 1810 mtx_enter(&kq->kq_lock); 1811 } 1812 } 1813 } 1814 1815 /* 1816 * Acquire a knote, return non-zero on success, 0 on failure. 1817 * 1818 * If we cannot acquire the knote we sleep and return 0. The knote 1819 * may be stale on return in this case and the caller must restart 1820 * whatever loop they are in. 1821 * 1822 * If we are about to sleep and klist is non-NULL, the list is unlocked 1823 * before sleep and remains unlocked on return. 1824 */ 1825 int 1826 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1827 { 1828 struct kqueue *kq = kn->kn_kq; 1829 1830 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1831 KASSERT(kn->kn_filter != EVFILT_MARKER); 1832 1833 if (kn->kn_status & KN_PROCESSING) { 1834 kn->kn_status |= KN_WAITING; 1835 if (klist != NULL) { 1836 mtx_leave(&kq->kq_lock); 1837 klist_unlock(klist, ls); 1838 /* XXX Timeout resolves potential loss of wakeup. */ 1839 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1840 } else { 1841 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1842 SEC_TO_NSEC(1)); 1843 } 1844 /* knote may be stale now */ 1845 return (0); 1846 } 1847 kn->kn_status |= KN_PROCESSING; 1848 return (1); 1849 } 1850 1851 /* 1852 * Release an acquired knote, clearing KN_PROCESSING. 1853 */ 1854 void 1855 knote_release(struct knote *kn) 1856 { 1857 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1858 KASSERT(kn->kn_filter != EVFILT_MARKER); 1859 KASSERT(kn->kn_status & KN_PROCESSING); 1860 1861 if (kn->kn_status & KN_WAITING) { 1862 kn->kn_status &= ~KN_WAITING; 1863 wakeup(kn); 1864 } 1865 kn->kn_status &= ~KN_PROCESSING; 1866 /* kn should not be accessed anymore */ 1867 } 1868 1869 /* 1870 * activate one knote. 1871 */ 1872 void 1873 knote_activate(struct knote *kn) 1874 { 1875 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1876 1877 kn->kn_status |= KN_ACTIVE; 1878 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1879 knote_enqueue(kn); 1880 } 1881 1882 /* 1883 * walk down a list of knotes, activating them if their event has triggered. 1884 */ 1885 void 1886 knote(struct klist *list, long hint) 1887 { 1888 int ls; 1889 1890 ls = klist_lock(list); 1891 knote_locked(list, hint); 1892 klist_unlock(list, ls); 1893 } 1894 1895 void 1896 knote_locked(struct klist *list, long hint) 1897 { 1898 struct knote *kn, *kn0; 1899 struct kqueue *kq; 1900 1901 KLIST_ASSERT_LOCKED(list); 1902 1903 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1904 if (filter_event(kn, hint)) { 1905 kq = kn->kn_kq; 1906 mtx_enter(&kq->kq_lock); 1907 knote_activate(kn); 1908 mtx_leave(&kq->kq_lock); 1909 } 1910 } 1911 } 1912 1913 /* 1914 * remove all knotes from a specified knlist 1915 */ 1916 void 1917 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1918 int purge) 1919 { 1920 struct knote *kn; 1921 1922 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1923 1924 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1925 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1926 KASSERT(kn->kn_kq == kq); 1927 1928 if (!purge) { 1929 /* Skip pending badfd knotes. */ 1930 while (kn->kn_fop == &badfd_filtops) { 1931 kn = SLIST_NEXT(kn, kn_link); 1932 if (kn == NULL) 1933 return; 1934 KASSERT(kn->kn_kq == kq); 1935 } 1936 } 1937 1938 if (!knote_acquire(kn, NULL, 0)) { 1939 /* knote_acquire() has released kq_lock. */ 1940 mtx_enter(&kq->kq_lock); 1941 continue; 1942 } 1943 mtx_leave(&kq->kq_lock); 1944 filter_detach(kn); 1945 1946 /* 1947 * Notify poll(2) and select(2) when a monitored 1948 * file descriptor is closed. 1949 * 1950 * This reuses the original knote for delivering the 1951 * notification so as to avoid allocating memory. 1952 */ 1953 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1954 !(p->p_kq == kq && 1955 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1956 kn->kn_fop != &badfd_filtops) { 1957 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1958 FRELE(kn->kn_fp, p); 1959 kn->kn_fp = NULL; 1960 1961 kn->kn_fop = &badfd_filtops; 1962 filter_event(kn, 0); 1963 mtx_enter(&kq->kq_lock); 1964 knote_activate(kn); 1965 knote_release(kn); 1966 continue; 1967 } 1968 1969 knote_drop(kn, p); 1970 mtx_enter(&kq->kq_lock); 1971 } 1972 } 1973 1974 /* 1975 * remove all knotes referencing a specified fd 1976 */ 1977 void 1978 knote_fdclose(struct proc *p, int fd) 1979 { 1980 struct filedesc *fdp = p->p_p->ps_fd; 1981 struct kqueue *kq; 1982 1983 /* 1984 * fdplock can be ignored if the file descriptor table is being freed 1985 * because no other thread can access the fdp. 1986 */ 1987 if (fdp->fd_refcnt != 0) 1988 fdpassertlocked(fdp); 1989 1990 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1991 mtx_enter(&kq->kq_lock); 1992 if (fd < kq->kq_knlistsize) 1993 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1994 mtx_leave(&kq->kq_lock); 1995 } 1996 } 1997 1998 /* 1999 * handle a process exiting, including the triggering of NOTE_EXIT notes 2000 * XXX this could be more efficient, doing a single pass down the klist 2001 */ 2002 void 2003 knote_processexit(struct process *pr) 2004 { 2005 KERNEL_ASSERT_LOCKED(); 2006 2007 knote_locked(&pr->ps_klist, NOTE_EXIT); 2008 2009 /* remove other knotes hanging off the process */ 2010 klist_invalidate(&pr->ps_klist); 2011 } 2012 2013 void 2014 knote_attach(struct knote *kn) 2015 { 2016 struct kqueue *kq = kn->kn_kq; 2017 struct knlist *list; 2018 2019 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2020 KASSERT(kn->kn_status & KN_PROCESSING); 2021 2022 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2023 KASSERT(kq->kq_knlistsize > kn->kn_id); 2024 list = &kq->kq_knlist[kn->kn_id]; 2025 } else { 2026 KASSERT(kq->kq_knhashmask != 0); 2027 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 2028 } 2029 SLIST_INSERT_HEAD(list, kn, kn_link); 2030 kq->kq_nknotes++; 2031 } 2032 2033 void 2034 knote_detach(struct knote *kn) 2035 { 2036 struct kqueue *kq = kn->kn_kq; 2037 struct knlist *list; 2038 2039 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2040 KASSERT(kn->kn_status & KN_PROCESSING); 2041 2042 kq->kq_nknotes--; 2043 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 2044 list = &kq->kq_knlist[kn->kn_id]; 2045 else 2046 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 2047 SLIST_REMOVE(list, kn, knote, kn_link); 2048 } 2049 2050 /* 2051 * should be called at spl == 0, since we don't want to hold spl 2052 * while calling FRELE and pool_put. 2053 */ 2054 void 2055 knote_drop(struct knote *kn, struct proc *p) 2056 { 2057 struct kqueue *kq = kn->kn_kq; 2058 2059 KASSERT(kn->kn_filter != EVFILT_MARKER); 2060 2061 mtx_enter(&kq->kq_lock); 2062 knote_detach(kn); 2063 if (kn->kn_status & KN_QUEUED) 2064 knote_dequeue(kn); 2065 if (kn->kn_status & KN_WAITING) { 2066 kn->kn_status &= ~KN_WAITING; 2067 wakeup(kn); 2068 } 2069 mtx_leave(&kq->kq_lock); 2070 2071 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 2072 FRELE(kn->kn_fp, p); 2073 pool_put(&knote_pool, kn); 2074 } 2075 2076 2077 void 2078 knote_enqueue(struct knote *kn) 2079 { 2080 struct kqueue *kq = kn->kn_kq; 2081 2082 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2083 KASSERT(kn->kn_filter != EVFILT_MARKER); 2084 KASSERT((kn->kn_status & KN_QUEUED) == 0); 2085 2086 kqueue_check(kq); 2087 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 2088 kn->kn_status |= KN_QUEUED; 2089 kq->kq_count++; 2090 kqueue_check(kq); 2091 kqueue_wakeup(kq); 2092 } 2093 2094 void 2095 knote_dequeue(struct knote *kn) 2096 { 2097 struct kqueue *kq = kn->kn_kq; 2098 2099 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2100 KASSERT(kn->kn_filter != EVFILT_MARKER); 2101 KASSERT(kn->kn_status & KN_QUEUED); 2102 2103 kqueue_check(kq); 2104 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 2105 kn->kn_status &= ~KN_QUEUED; 2106 kq->kq_count--; 2107 kqueue_check(kq); 2108 } 2109 2110 /* 2111 * Assign parameters to the knote. 2112 * 2113 * The knote's object lock must be held. 2114 */ 2115 void 2116 knote_assign(const struct kevent *kev, struct knote *kn) 2117 { 2118 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2119 KERNEL_ASSERT_LOCKED(); 2120 2121 kn->kn_sfflags = kev->fflags; 2122 kn->kn_sdata = kev->data; 2123 kn->kn_udata = kev->udata; 2124 } 2125 2126 /* 2127 * Submit the knote's event for delivery. 2128 * 2129 * The knote's object lock must be held. 2130 */ 2131 void 2132 knote_submit(struct knote *kn, struct kevent *kev) 2133 { 2134 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2135 KERNEL_ASSERT_LOCKED(); 2136 2137 if (kev != NULL) { 2138 *kev = kn->kn_kevent; 2139 if (kn->kn_flags & EV_CLEAR) { 2140 kn->kn_fflags = 0; 2141 kn->kn_data = 0; 2142 } 2143 } 2144 } 2145 2146 void 2147 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2148 { 2149 SLIST_INIT(&klist->kl_list); 2150 klist->kl_ops = ops; 2151 klist->kl_arg = arg; 2152 } 2153 2154 void 2155 klist_free(struct klist *klist) 2156 { 2157 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2158 } 2159 2160 void 2161 klist_insert(struct klist *klist, struct knote *kn) 2162 { 2163 int ls; 2164 2165 ls = klist_lock(klist); 2166 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2167 klist_unlock(klist, ls); 2168 } 2169 2170 void 2171 klist_insert_locked(struct klist *klist, struct knote *kn) 2172 { 2173 KLIST_ASSERT_LOCKED(klist); 2174 2175 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2176 } 2177 2178 void 2179 klist_remove(struct klist *klist, struct knote *kn) 2180 { 2181 int ls; 2182 2183 ls = klist_lock(klist); 2184 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2185 klist_unlock(klist, ls); 2186 } 2187 2188 void 2189 klist_remove_locked(struct klist *klist, struct knote *kn) 2190 { 2191 KLIST_ASSERT_LOCKED(klist); 2192 2193 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2194 } 2195 2196 /* 2197 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2198 * 2199 * The caller of this function must not hold any locks that can block 2200 * filterops callbacks that run with KN_PROCESSING. 2201 * Otherwise this function might deadlock. 2202 */ 2203 void 2204 klist_invalidate(struct klist *list) 2205 { 2206 struct knote *kn; 2207 struct kqueue *kq; 2208 struct proc *p = curproc; 2209 int ls; 2210 2211 NET_ASSERT_UNLOCKED(); 2212 2213 ls = klist_lock(list); 2214 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2215 kq = kn->kn_kq; 2216 mtx_enter(&kq->kq_lock); 2217 if (!knote_acquire(kn, list, ls)) { 2218 /* knote_acquire() has released kq_lock 2219 * and klist lock. */ 2220 ls = klist_lock(list); 2221 continue; 2222 } 2223 mtx_leave(&kq->kq_lock); 2224 klist_unlock(list, ls); 2225 filter_detach(kn); 2226 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2227 kn->kn_fop = &dead_filtops; 2228 filter_event(kn, 0); 2229 mtx_enter(&kq->kq_lock); 2230 knote_activate(kn); 2231 knote_release(kn); 2232 mtx_leave(&kq->kq_lock); 2233 } else { 2234 knote_drop(kn, p); 2235 } 2236 ls = klist_lock(list); 2237 } 2238 klist_unlock(list, ls); 2239 } 2240 2241 static int 2242 klist_lock(struct klist *list) 2243 { 2244 int ls = 0; 2245 2246 if (list->kl_ops != NULL) { 2247 ls = list->kl_ops->klo_lock(list->kl_arg); 2248 } else { 2249 KERNEL_LOCK(); 2250 ls = splhigh(); 2251 } 2252 return ls; 2253 } 2254 2255 static void 2256 klist_unlock(struct klist *list, int ls) 2257 { 2258 if (list->kl_ops != NULL) { 2259 list->kl_ops->klo_unlock(list->kl_arg, ls); 2260 } else { 2261 splx(ls); 2262 KERNEL_UNLOCK(); 2263 } 2264 } 2265 2266 static void 2267 klist_mutex_assertlk(void *arg) 2268 { 2269 struct mutex *mtx = arg; 2270 2271 (void)mtx; 2272 2273 MUTEX_ASSERT_LOCKED(mtx); 2274 } 2275 2276 static int 2277 klist_mutex_lock(void *arg) 2278 { 2279 struct mutex *mtx = arg; 2280 2281 mtx_enter(mtx); 2282 return 0; 2283 } 2284 2285 static void 2286 klist_mutex_unlock(void *arg, int s) 2287 { 2288 struct mutex *mtx = arg; 2289 2290 mtx_leave(mtx); 2291 } 2292 2293 static const struct klistops mutex_klistops = { 2294 .klo_assertlk = klist_mutex_assertlk, 2295 .klo_lock = klist_mutex_lock, 2296 .klo_unlock = klist_mutex_unlock, 2297 }; 2298 2299 void 2300 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2301 { 2302 klist_init(klist, &mutex_klistops, mtx); 2303 } 2304 2305 static void 2306 klist_rwlock_assertlk(void *arg) 2307 { 2308 struct rwlock *rwl = arg; 2309 2310 (void)rwl; 2311 2312 rw_assert_wrlock(rwl); 2313 } 2314 2315 static int 2316 klist_rwlock_lock(void *arg) 2317 { 2318 struct rwlock *rwl = arg; 2319 2320 rw_enter_write(rwl); 2321 return 0; 2322 } 2323 2324 static void 2325 klist_rwlock_unlock(void *arg, int s) 2326 { 2327 struct rwlock *rwl = arg; 2328 2329 rw_exit_write(rwl); 2330 } 2331 2332 static const struct klistops rwlock_klistops = { 2333 .klo_assertlk = klist_rwlock_assertlk, 2334 .klo_lock = klist_rwlock_lock, 2335 .klo_unlock = klist_rwlock_unlock, 2336 }; 2337 2338 void 2339 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2340 { 2341 klist_init(klist, &rwlock_klistops, rwl); 2342 } 2343