1 /* $OpenBSD: kern_event.c,v 1.198 2023/08/20 15:13:43 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/proc.h> 34 #include <sys/pledge.h> 35 #include <sys/malloc.h> 36 #include <sys/file.h> 37 #include <sys/filedesc.h> 38 #include <sys/fcntl.h> 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <sys/eventvar.h> 42 #include <sys/ktrace.h> 43 #include <sys/pool.h> 44 #include <sys/stat.h> 45 #include <sys/mount.h> 46 #include <sys/syscallargs.h> 47 #include <sys/time.h> 48 #include <sys/timeout.h> 49 #include <sys/vnode.h> 50 #include <sys/wait.h> 51 52 #ifdef DIAGNOSTIC 53 #define KLIST_ASSERT_LOCKED(kl) do { \ 54 if ((kl)->kl_ops != NULL) \ 55 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 56 else \ 57 KERNEL_ASSERT_LOCKED(); \ 58 } while (0) 59 #else 60 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 61 #endif 62 63 int dokqueue(struct proc *, int, register_t *); 64 struct kqueue *kqueue_alloc(struct filedesc *); 65 void kqueue_terminate(struct proc *p, struct kqueue *); 66 void KQREF(struct kqueue *); 67 void KQRELE(struct kqueue *); 68 69 void kqueue_purge(struct proc *, struct kqueue *); 70 int kqueue_sleep(struct kqueue *, struct timespec *); 71 72 int kqueue_read(struct file *, struct uio *, int); 73 int kqueue_write(struct file *, struct uio *, int); 74 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 75 struct proc *p); 76 int kqueue_kqfilter(struct file *fp, struct knote *kn); 77 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 78 int kqueue_close(struct file *fp, struct proc *p); 79 void kqueue_wakeup(struct kqueue *kq); 80 81 #ifdef KQUEUE_DEBUG 82 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 83 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 84 #else 85 #define kqueue_check(kq) do {} while (0) 86 #endif 87 88 static int filter_attach(struct knote *kn); 89 static void filter_detach(struct knote *kn); 90 static int filter_event(struct knote *kn, long hint); 91 static int filter_modify(struct kevent *kev, struct knote *kn); 92 static int filter_process(struct knote *kn, struct kevent *kev); 93 static void kqueue_expand_hash(struct kqueue *kq); 94 static void kqueue_expand_list(struct kqueue *kq, int fd); 95 static void kqueue_task(void *); 96 static int klist_lock(struct klist *); 97 static void klist_unlock(struct klist *, int); 98 99 const struct fileops kqueueops = { 100 .fo_read = kqueue_read, 101 .fo_write = kqueue_write, 102 .fo_ioctl = kqueue_ioctl, 103 .fo_kqfilter = kqueue_kqfilter, 104 .fo_stat = kqueue_stat, 105 .fo_close = kqueue_close 106 }; 107 108 void knote_attach(struct knote *kn); 109 void knote_detach(struct knote *kn); 110 void knote_drop(struct knote *kn, struct proc *p); 111 void knote_enqueue(struct knote *kn); 112 void knote_dequeue(struct knote *kn); 113 int knote_acquire(struct knote *kn, struct klist *, int); 114 void knote_release(struct knote *kn); 115 void knote_activate(struct knote *kn); 116 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 117 int idx, int purge); 118 119 void filt_kqdetach(struct knote *kn); 120 int filt_kqueue(struct knote *kn, long hint); 121 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 122 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 123 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 124 int filt_procattach(struct knote *kn); 125 void filt_procdetach(struct knote *kn); 126 int filt_proc(struct knote *kn, long hint); 127 int filt_fileattach(struct knote *kn); 128 void filt_timerexpire(void *knx); 129 int filt_timerattach(struct knote *kn); 130 void filt_timerdetach(struct knote *kn); 131 int filt_timermodify(struct kevent *kev, struct knote *kn); 132 int filt_timerprocess(struct knote *kn, struct kevent *kev); 133 void filt_seltruedetach(struct knote *kn); 134 135 const struct filterops kqread_filtops = { 136 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 137 .f_attach = NULL, 138 .f_detach = filt_kqdetach, 139 .f_event = filt_kqueue, 140 .f_modify = filt_kqueuemodify, 141 .f_process = filt_kqueueprocess, 142 }; 143 144 const struct filterops proc_filtops = { 145 .f_flags = 0, 146 .f_attach = filt_procattach, 147 .f_detach = filt_procdetach, 148 .f_event = filt_proc, 149 }; 150 151 const struct filterops file_filtops = { 152 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 153 .f_attach = filt_fileattach, 154 .f_detach = NULL, 155 .f_event = NULL, 156 }; 157 158 const struct filterops timer_filtops = { 159 .f_flags = 0, 160 .f_attach = filt_timerattach, 161 .f_detach = filt_timerdetach, 162 .f_event = NULL, 163 .f_modify = filt_timermodify, 164 .f_process = filt_timerprocess, 165 }; 166 167 struct pool knote_pool; 168 struct pool kqueue_pool; 169 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 170 int kq_ntimeouts = 0; 171 int kq_timeoutmax = (4 * 1024); 172 173 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 174 175 /* 176 * Table for all system-defined filters. 177 */ 178 const struct filterops *const sysfilt_ops[] = { 179 &file_filtops, /* EVFILT_READ */ 180 &file_filtops, /* EVFILT_WRITE */ 181 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 182 &file_filtops, /* EVFILT_VNODE */ 183 &proc_filtops, /* EVFILT_PROC */ 184 &sig_filtops, /* EVFILT_SIGNAL */ 185 &timer_filtops, /* EVFILT_TIMER */ 186 &file_filtops, /* EVFILT_DEVICE */ 187 &file_filtops, /* EVFILT_EXCEPT */ 188 }; 189 190 void 191 KQREF(struct kqueue *kq) 192 { 193 refcnt_take(&kq->kq_refcnt); 194 } 195 196 void 197 KQRELE(struct kqueue *kq) 198 { 199 struct filedesc *fdp; 200 201 if (refcnt_rele(&kq->kq_refcnt) == 0) 202 return; 203 204 fdp = kq->kq_fdp; 205 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 206 LIST_REMOVE(kq, kq_next); 207 } else { 208 fdplock(fdp); 209 LIST_REMOVE(kq, kq_next); 210 fdpunlock(fdp); 211 } 212 213 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 214 KASSERT(kq->kq_nknotes == 0); 215 216 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 217 sizeof(struct knlist)); 218 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 219 klist_free(&kq->kq_klist); 220 pool_put(&kqueue_pool, kq); 221 } 222 223 void 224 kqueue_init(void) 225 { 226 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 227 PR_WAITOK, "kqueuepl", NULL); 228 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 229 PR_WAITOK, "knotepl", NULL); 230 } 231 232 void 233 kqueue_init_percpu(void) 234 { 235 pool_cache_init(&knote_pool); 236 } 237 238 int 239 filt_fileattach(struct knote *kn) 240 { 241 struct file *fp = kn->kn_fp; 242 243 return fp->f_ops->fo_kqfilter(fp, kn); 244 } 245 246 int 247 kqueue_kqfilter(struct file *fp, struct knote *kn) 248 { 249 struct kqueue *kq = kn->kn_fp->f_data; 250 251 if (kn->kn_filter != EVFILT_READ) 252 return (EINVAL); 253 254 kn->kn_fop = &kqread_filtops; 255 klist_insert(&kq->kq_klist, kn); 256 return (0); 257 } 258 259 void 260 filt_kqdetach(struct knote *kn) 261 { 262 struct kqueue *kq = kn->kn_fp->f_data; 263 264 klist_remove(&kq->kq_klist, kn); 265 } 266 267 int 268 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 269 { 270 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 271 272 kn->kn_data = kq->kq_count; 273 274 return (kn->kn_data > 0); 275 } 276 277 int 278 filt_kqueue(struct knote *kn, long hint) 279 { 280 struct kqueue *kq = kn->kn_fp->f_data; 281 int active; 282 283 mtx_enter(&kq->kq_lock); 284 active = filt_kqueue_common(kn, kq); 285 mtx_leave(&kq->kq_lock); 286 287 return (active); 288 } 289 290 int 291 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 292 { 293 struct kqueue *kq = kn->kn_fp->f_data; 294 int active; 295 296 mtx_enter(&kq->kq_lock); 297 knote_assign(kev, kn); 298 active = filt_kqueue_common(kn, kq); 299 mtx_leave(&kq->kq_lock); 300 301 return (active); 302 } 303 304 int 305 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 306 { 307 struct kqueue *kq = kn->kn_fp->f_data; 308 int active; 309 310 mtx_enter(&kq->kq_lock); 311 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 312 active = 1; 313 else 314 active = filt_kqueue_common(kn, kq); 315 if (active) 316 knote_submit(kn, kev); 317 mtx_leave(&kq->kq_lock); 318 319 return (active); 320 } 321 322 int 323 filt_procattach(struct knote *kn) 324 { 325 struct process *pr; 326 int s; 327 328 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 329 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 330 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 331 332 if (kn->kn_id > PID_MAX) 333 return ESRCH; 334 335 pr = prfind(kn->kn_id); 336 if (pr == NULL) 337 return (ESRCH); 338 339 /* exiting processes can't be specified */ 340 if (pr->ps_flags & PS_EXITING) 341 return (ESRCH); 342 343 kn->kn_ptr.p_process = pr; 344 kn->kn_flags |= EV_CLEAR; /* automatically set */ 345 346 /* 347 * internal flag indicating registration done by kernel 348 */ 349 if (kn->kn_flags & EV_FLAG1) { 350 kn->kn_data = kn->kn_sdata; /* ppid */ 351 kn->kn_fflags = NOTE_CHILD; 352 kn->kn_flags &= ~EV_FLAG1; 353 } 354 355 s = splhigh(); 356 klist_insert_locked(&pr->ps_klist, kn); 357 splx(s); 358 359 return (0); 360 } 361 362 /* 363 * The knote may be attached to a different process, which may exit, 364 * leaving nothing for the knote to be attached to. So when the process 365 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 366 * it will be deleted when read out. However, as part of the knote deletion, 367 * this routine is called, so a check is needed to avoid actually performing 368 * a detach, because the original process does not exist any more. 369 */ 370 void 371 filt_procdetach(struct knote *kn) 372 { 373 struct kqueue *kq = kn->kn_kq; 374 struct process *pr = kn->kn_ptr.p_process; 375 int s, status; 376 377 mtx_enter(&kq->kq_lock); 378 status = kn->kn_status; 379 mtx_leave(&kq->kq_lock); 380 381 if (status & KN_DETACHED) 382 return; 383 384 s = splhigh(); 385 klist_remove_locked(&pr->ps_klist, kn); 386 splx(s); 387 } 388 389 int 390 filt_proc(struct knote *kn, long hint) 391 { 392 struct kqueue *kq = kn->kn_kq; 393 u_int event; 394 395 /* 396 * mask off extra data 397 */ 398 event = (u_int)hint & NOTE_PCTRLMASK; 399 400 /* 401 * if the user is interested in this event, record it. 402 */ 403 if (kn->kn_sfflags & event) 404 kn->kn_fflags |= event; 405 406 /* 407 * process is gone, so flag the event as finished and remove it 408 * from the process's klist 409 */ 410 if (event == NOTE_EXIT) { 411 struct process *pr = kn->kn_ptr.p_process; 412 int s; 413 414 mtx_enter(&kq->kq_lock); 415 kn->kn_status |= KN_DETACHED; 416 mtx_leave(&kq->kq_lock); 417 418 s = splhigh(); 419 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 420 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 421 klist_remove_locked(&pr->ps_klist, kn); 422 splx(s); 423 return (1); 424 } 425 426 /* 427 * process forked, and user wants to track the new process, 428 * so attach a new knote to it, and immediately report an 429 * event with the parent's pid. 430 */ 431 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 432 struct kevent kev; 433 int error; 434 435 /* 436 * register knote with new process. 437 */ 438 memset(&kev, 0, sizeof(kev)); 439 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 440 kev.filter = kn->kn_filter; 441 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 442 kev.fflags = kn->kn_sfflags; 443 kev.data = kn->kn_id; /* parent */ 444 kev.udata = kn->kn_udata; /* preserve udata */ 445 error = kqueue_register(kq, &kev, 0, NULL); 446 if (error) 447 kn->kn_fflags |= NOTE_TRACKERR; 448 } 449 450 return (kn->kn_fflags != 0); 451 } 452 453 #define NOTE_TIMER_UNITMASK \ 454 (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS) 455 456 static int 457 filt_timervalidate(int sfflags, int64_t sdata, struct timespec *ts) 458 { 459 if (sfflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME)) 460 return (EINVAL); 461 462 switch (sfflags & NOTE_TIMER_UNITMASK) { 463 case NOTE_SECONDS: 464 ts->tv_sec = sdata; 465 ts->tv_nsec = 0; 466 break; 467 case NOTE_MSECONDS: 468 ts->tv_sec = sdata / 1000; 469 ts->tv_nsec = (sdata % 1000) * 1000000; 470 break; 471 case NOTE_USECONDS: 472 ts->tv_sec = sdata / 1000000; 473 ts->tv_nsec = (sdata % 1000000) * 1000; 474 break; 475 case NOTE_NSECONDS: 476 ts->tv_sec = sdata / 1000000000; 477 ts->tv_nsec = sdata % 1000000000; 478 break; 479 default: 480 return (EINVAL); 481 } 482 483 return (0); 484 } 485 486 static void 487 filt_timeradd(struct knote *kn, struct timespec *ts) 488 { 489 struct timespec expiry, now; 490 struct timeout *to = kn->kn_hook; 491 int tticks; 492 493 if (kn->kn_sfflags & NOTE_ABSTIME) { 494 nanotime(&now); 495 if (timespeccmp(ts, &now, >)) { 496 timespecsub(ts, &now, &expiry); 497 /* XXX timeout_abs_ts with CLOCK_REALTIME */ 498 timeout_add(to, tstohz(&expiry)); 499 } else { 500 /* Expire immediately. */ 501 filt_timerexpire(kn); 502 } 503 return; 504 } 505 506 tticks = tstohz(ts); 507 /* Remove extra tick from tstohz() if timeout has fired before. */ 508 if (timeout_triggered(to)) 509 tticks--; 510 timeout_add(to, (tticks > 0) ? tticks : 1); 511 } 512 513 void 514 filt_timerexpire(void *knx) 515 { 516 struct timespec ts; 517 struct knote *kn = knx; 518 struct kqueue *kq = kn->kn_kq; 519 520 kn->kn_data++; 521 mtx_enter(&kq->kq_lock); 522 knote_activate(kn); 523 mtx_leave(&kq->kq_lock); 524 525 if ((kn->kn_flags & EV_ONESHOT) == 0 && 526 (kn->kn_sfflags & NOTE_ABSTIME) == 0) { 527 (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 528 filt_timeradd(kn, &ts); 529 } 530 } 531 532 /* 533 * data contains amount of time to sleep 534 */ 535 int 536 filt_timerattach(struct knote *kn) 537 { 538 struct timespec ts; 539 struct timeout *to; 540 int error; 541 542 error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 543 if (error != 0) 544 return (error); 545 546 if (kq_ntimeouts > kq_timeoutmax) 547 return (ENOMEM); 548 kq_ntimeouts++; 549 550 if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) 551 kn->kn_flags |= EV_CLEAR; /* automatically set */ 552 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 553 timeout_set(to, filt_timerexpire, kn); 554 kn->kn_hook = to; 555 filt_timeradd(kn, &ts); 556 557 return (0); 558 } 559 560 void 561 filt_timerdetach(struct knote *kn) 562 { 563 struct timeout *to; 564 565 to = (struct timeout *)kn->kn_hook; 566 timeout_del_barrier(to); 567 free(to, M_KEVENT, sizeof(*to)); 568 kq_ntimeouts--; 569 } 570 571 int 572 filt_timermodify(struct kevent *kev, struct knote *kn) 573 { 574 struct timespec ts; 575 struct kqueue *kq = kn->kn_kq; 576 struct timeout *to = kn->kn_hook; 577 int error; 578 579 error = filt_timervalidate(kev->fflags, kev->data, &ts); 580 if (error != 0) { 581 kev->flags |= EV_ERROR; 582 kev->data = error; 583 return (0); 584 } 585 586 /* Reset the timer. Any pending events are discarded. */ 587 588 timeout_del_barrier(to); 589 590 mtx_enter(&kq->kq_lock); 591 if (kn->kn_status & KN_QUEUED) 592 knote_dequeue(kn); 593 kn->kn_status &= ~KN_ACTIVE; 594 mtx_leave(&kq->kq_lock); 595 596 kn->kn_data = 0; 597 knote_assign(kev, kn); 598 /* Reinit timeout to invoke tick adjustment again. */ 599 timeout_set(to, filt_timerexpire, kn); 600 filt_timeradd(kn, &ts); 601 602 return (0); 603 } 604 605 int 606 filt_timerprocess(struct knote *kn, struct kevent *kev) 607 { 608 int active, s; 609 610 s = splsoftclock(); 611 active = (kn->kn_data != 0); 612 if (active) 613 knote_submit(kn, kev); 614 splx(s); 615 616 return (active); 617 } 618 619 620 /* 621 * filt_seltrue: 622 * 623 * This filter "event" routine simulates seltrue(). 624 */ 625 int 626 filt_seltrue(struct knote *kn, long hint) 627 { 628 629 /* 630 * We don't know how much data can be read/written, 631 * but we know that it *can* be. This is about as 632 * good as select/poll does as well. 633 */ 634 kn->kn_data = 0; 635 return (1); 636 } 637 638 int 639 filt_seltruemodify(struct kevent *kev, struct knote *kn) 640 { 641 knote_assign(kev, kn); 642 return (kn->kn_fop->f_event(kn, 0)); 643 } 644 645 int 646 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 647 { 648 int active; 649 650 active = kn->kn_fop->f_event(kn, 0); 651 if (active) 652 knote_submit(kn, kev); 653 return (active); 654 } 655 656 /* 657 * This provides full kqfilter entry for device switch tables, which 658 * has same effect as filter using filt_seltrue() as filter method. 659 */ 660 void 661 filt_seltruedetach(struct knote *kn) 662 { 663 /* Nothing to do */ 664 } 665 666 const struct filterops seltrue_filtops = { 667 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 668 .f_attach = NULL, 669 .f_detach = filt_seltruedetach, 670 .f_event = filt_seltrue, 671 .f_modify = filt_seltruemodify, 672 .f_process = filt_seltrueprocess, 673 }; 674 675 int 676 seltrue_kqfilter(dev_t dev, struct knote *kn) 677 { 678 switch (kn->kn_filter) { 679 case EVFILT_READ: 680 case EVFILT_WRITE: 681 kn->kn_fop = &seltrue_filtops; 682 break; 683 default: 684 return (EINVAL); 685 } 686 687 /* Nothing more to do */ 688 return (0); 689 } 690 691 static int 692 filt_dead(struct knote *kn, long hint) 693 { 694 if (kn->kn_filter == EVFILT_EXCEPT) { 695 /* 696 * Do not deliver event because there is no out-of-band data. 697 * However, let HUP condition pass for poll(2). 698 */ 699 if ((kn->kn_flags & __EV_POLL) == 0) { 700 kn->kn_flags |= EV_DISABLE; 701 return (0); 702 } 703 } 704 705 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 706 if (kn->kn_flags & __EV_POLL) 707 kn->kn_flags |= __EV_HUP; 708 kn->kn_data = 0; 709 return (1); 710 } 711 712 static void 713 filt_deaddetach(struct knote *kn) 714 { 715 /* Nothing to do */ 716 } 717 718 const struct filterops dead_filtops = { 719 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 720 .f_attach = NULL, 721 .f_detach = filt_deaddetach, 722 .f_event = filt_dead, 723 .f_modify = filt_seltruemodify, 724 .f_process = filt_seltrueprocess, 725 }; 726 727 static int 728 filt_badfd(struct knote *kn, long hint) 729 { 730 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 731 kn->kn_data = EBADF; 732 return (1); 733 } 734 735 /* For use with kqpoll. */ 736 const struct filterops badfd_filtops = { 737 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 738 .f_attach = NULL, 739 .f_detach = filt_deaddetach, 740 .f_event = filt_badfd, 741 .f_modify = filt_seltruemodify, 742 .f_process = filt_seltrueprocess, 743 }; 744 745 static int 746 filter_attach(struct knote *kn) 747 { 748 int error; 749 750 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 751 error = kn->kn_fop->f_attach(kn); 752 } else { 753 KERNEL_LOCK(); 754 error = kn->kn_fop->f_attach(kn); 755 KERNEL_UNLOCK(); 756 } 757 return (error); 758 } 759 760 static void 761 filter_detach(struct knote *kn) 762 { 763 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 764 kn->kn_fop->f_detach(kn); 765 } else { 766 KERNEL_LOCK(); 767 kn->kn_fop->f_detach(kn); 768 KERNEL_UNLOCK(); 769 } 770 } 771 772 static int 773 filter_event(struct knote *kn, long hint) 774 { 775 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 776 KERNEL_ASSERT_LOCKED(); 777 778 return (kn->kn_fop->f_event(kn, hint)); 779 } 780 781 static int 782 filter_modify(struct kevent *kev, struct knote *kn) 783 { 784 int active, s; 785 786 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 787 active = kn->kn_fop->f_modify(kev, kn); 788 } else { 789 KERNEL_LOCK(); 790 if (kn->kn_fop->f_modify != NULL) { 791 active = kn->kn_fop->f_modify(kev, kn); 792 } else { 793 s = splhigh(); 794 active = knote_modify(kev, kn); 795 splx(s); 796 } 797 KERNEL_UNLOCK(); 798 } 799 return (active); 800 } 801 802 static int 803 filter_process(struct knote *kn, struct kevent *kev) 804 { 805 int active, s; 806 807 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 808 active = kn->kn_fop->f_process(kn, kev); 809 } else { 810 KERNEL_LOCK(); 811 if (kn->kn_fop->f_process != NULL) { 812 active = kn->kn_fop->f_process(kn, kev); 813 } else { 814 s = splhigh(); 815 active = knote_process(kn, kev); 816 splx(s); 817 } 818 KERNEL_UNLOCK(); 819 } 820 return (active); 821 } 822 823 /* 824 * Initialize the current thread for poll/select system call. 825 * num indicates the number of serials that the system call may utilize. 826 * After this function, the valid range of serials is 827 * p_kq_serial <= x < p_kq_serial + num. 828 */ 829 void 830 kqpoll_init(unsigned int num) 831 { 832 struct proc *p = curproc; 833 struct filedesc *fdp; 834 835 if (p->p_kq == NULL) { 836 p->p_kq = kqueue_alloc(p->p_fd); 837 p->p_kq_serial = arc4random(); 838 fdp = p->p_fd; 839 fdplock(fdp); 840 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 841 fdpunlock(fdp); 842 } 843 844 if (p->p_kq_serial + num < p->p_kq_serial) { 845 /* Serial is about to wrap. Clear all attached knotes. */ 846 kqueue_purge(p, p->p_kq); 847 p->p_kq_serial = 0; 848 } 849 } 850 851 /* 852 * Finish poll/select system call. 853 * num must have the same value that was used with kqpoll_init(). 854 */ 855 void 856 kqpoll_done(unsigned int num) 857 { 858 struct proc *p = curproc; 859 struct kqueue *kq = p->p_kq; 860 861 KASSERT(p->p_kq != NULL); 862 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 863 864 p->p_kq_serial += num; 865 866 /* 867 * Because of kn_pollid key, a thread can in principle allocate 868 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 869 * with suitably varying pollfd arrays. 870 * Prevent such a large allocation by clearing knotes eagerly 871 * if there are too many of them. 872 * 873 * A small multiple of kq_knlistsize should give enough margin 874 * that eager clearing is infrequent, or does not happen at all, 875 * with normal programs. 876 * A single pollfd entry can use up to three knotes. 877 * Typically there is no significant overlap of fd and events 878 * between different entries in the pollfd array. 879 */ 880 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 881 kqueue_purge(p, kq); 882 } 883 884 void 885 kqpoll_exit(void) 886 { 887 struct proc *p = curproc; 888 889 if (p->p_kq == NULL) 890 return; 891 892 kqueue_purge(p, p->p_kq); 893 kqueue_terminate(p, p->p_kq); 894 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 895 KQRELE(p->p_kq); 896 p->p_kq = NULL; 897 } 898 899 struct kqueue * 900 kqueue_alloc(struct filedesc *fdp) 901 { 902 struct kqueue *kq; 903 904 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 905 refcnt_init(&kq->kq_refcnt); 906 kq->kq_fdp = fdp; 907 TAILQ_INIT(&kq->kq_head); 908 mtx_init(&kq->kq_lock, IPL_HIGH); 909 task_set(&kq->kq_task, kqueue_task, kq); 910 klist_init_mutex(&kq->kq_klist, &kqueue_klist_lock); 911 912 return (kq); 913 } 914 915 int 916 dokqueue(struct proc *p, int flags, register_t *retval) 917 { 918 struct filedesc *fdp = p->p_fd; 919 struct kqueue *kq; 920 struct file *fp; 921 int cloexec, error, fd; 922 923 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 924 925 kq = kqueue_alloc(fdp); 926 927 fdplock(fdp); 928 error = falloc(p, &fp, &fd); 929 if (error) 930 goto out; 931 fp->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 932 fp->f_type = DTYPE_KQUEUE; 933 fp->f_ops = &kqueueops; 934 fp->f_data = kq; 935 *retval = fd; 936 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 937 kq = NULL; 938 fdinsert(fdp, fd, cloexec, fp); 939 FRELE(fp, p); 940 out: 941 fdpunlock(fdp); 942 if (kq != NULL) 943 pool_put(&kqueue_pool, kq); 944 return (error); 945 } 946 947 int 948 sys_kqueue(struct proc *p, void *v, register_t *retval) 949 { 950 return (dokqueue(p, 0, retval)); 951 } 952 953 int 954 sys_kqueue1(struct proc *p, void *v, register_t *retval) 955 { 956 struct sys_kqueue1_args /* { 957 syscallarg(int) flags; 958 } */ *uap = v; 959 960 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 961 return (EINVAL); 962 return (dokqueue(p, SCARG(uap, flags), retval)); 963 } 964 965 int 966 sys_kevent(struct proc *p, void *v, register_t *retval) 967 { 968 struct kqueue_scan_state scan; 969 struct filedesc* fdp = p->p_fd; 970 struct sys_kevent_args /* { 971 syscallarg(int) fd; 972 syscallarg(const struct kevent *) changelist; 973 syscallarg(int) nchanges; 974 syscallarg(struct kevent *) eventlist; 975 syscallarg(int) nevents; 976 syscallarg(const struct timespec *) timeout; 977 } */ *uap = v; 978 struct kevent *kevp; 979 struct kqueue *kq; 980 struct file *fp; 981 struct timespec ts; 982 struct timespec *tsp = NULL; 983 int i, n, nerrors, error; 984 int ready, total; 985 struct kevent kev[KQ_NEVENTS]; 986 987 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 988 return (EBADF); 989 990 if (fp->f_type != DTYPE_KQUEUE) { 991 error = EBADF; 992 goto done; 993 } 994 995 if (SCARG(uap, timeout) != NULL) { 996 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 997 if (error) 998 goto done; 999 #ifdef KTRACE 1000 if (KTRPOINT(p, KTR_STRUCT)) 1001 ktrreltimespec(p, &ts); 1002 #endif 1003 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 1004 error = EINVAL; 1005 goto done; 1006 } 1007 tsp = &ts; 1008 } 1009 1010 kq = fp->f_data; 1011 nerrors = 0; 1012 1013 while ((n = SCARG(uap, nchanges)) > 0) { 1014 if (n > nitems(kev)) 1015 n = nitems(kev); 1016 error = copyin(SCARG(uap, changelist), kev, 1017 n * sizeof(struct kevent)); 1018 if (error) 1019 goto done; 1020 #ifdef KTRACE 1021 if (KTRPOINT(p, KTR_STRUCT)) 1022 ktrevent(p, kev, n); 1023 #endif 1024 for (i = 0; i < n; i++) { 1025 kevp = &kev[i]; 1026 kevp->flags &= ~EV_SYSFLAGS; 1027 error = kqueue_register(kq, kevp, 0, p); 1028 if (error || (kevp->flags & EV_RECEIPT)) { 1029 if (SCARG(uap, nevents) != 0) { 1030 kevp->flags = EV_ERROR; 1031 kevp->data = error; 1032 copyout(kevp, SCARG(uap, eventlist), 1033 sizeof(*kevp)); 1034 SCARG(uap, eventlist)++; 1035 SCARG(uap, nevents)--; 1036 nerrors++; 1037 } else { 1038 goto done; 1039 } 1040 } 1041 } 1042 SCARG(uap, nchanges) -= n; 1043 SCARG(uap, changelist) += n; 1044 } 1045 if (nerrors) { 1046 *retval = nerrors; 1047 error = 0; 1048 goto done; 1049 } 1050 1051 kqueue_scan_setup(&scan, kq); 1052 FRELE(fp, p); 1053 /* 1054 * Collect as many events as we can. The timeout on successive 1055 * loops is disabled (kqueue_scan() becomes non-blocking). 1056 */ 1057 total = 0; 1058 error = 0; 1059 while ((n = SCARG(uap, nevents) - total) > 0) { 1060 if (n > nitems(kev)) 1061 n = nitems(kev); 1062 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 1063 if (ready == 0) 1064 break; 1065 error = copyout(kev, SCARG(uap, eventlist) + total, 1066 sizeof(struct kevent) * ready); 1067 #ifdef KTRACE 1068 if (KTRPOINT(p, KTR_STRUCT)) 1069 ktrevent(p, kev, ready); 1070 #endif 1071 total += ready; 1072 if (error || ready < n) 1073 break; 1074 } 1075 kqueue_scan_finish(&scan); 1076 *retval = total; 1077 return (error); 1078 1079 done: 1080 FRELE(fp, p); 1081 return (error); 1082 } 1083 1084 #ifdef KQUEUE_DEBUG 1085 void 1086 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1087 { 1088 struct knote *kn; 1089 int count = 0, nmarker = 0; 1090 1091 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1092 1093 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1094 if (kn->kn_filter == EVFILT_MARKER) { 1095 if ((kn->kn_status & KN_QUEUED) != 0) 1096 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1097 func, line, kq, kn); 1098 nmarker++; 1099 } else { 1100 if ((kn->kn_status & KN_ACTIVE) == 0) 1101 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1102 func, line, kq, kn); 1103 if ((kn->kn_status & KN_QUEUED) == 0) 1104 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1105 func, line, kq, kn); 1106 if (kn->kn_kq != kq) 1107 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1108 func, line, kq, kn, kn->kn_kq); 1109 count++; 1110 if (count > kq->kq_count) 1111 goto bad; 1112 } 1113 } 1114 if (count != kq->kq_count) { 1115 bad: 1116 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1117 func, line, kq, kq->kq_count, count, nmarker); 1118 } 1119 } 1120 #endif 1121 1122 int 1123 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1124 struct proc *p) 1125 { 1126 struct filedesc *fdp = kq->kq_fdp; 1127 const struct filterops *fops = NULL; 1128 struct file *fp = NULL; 1129 struct knote *kn = NULL, *newkn = NULL; 1130 struct knlist *list = NULL; 1131 int active, error = 0; 1132 1133 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1134 1135 if (kev->filter < 0) { 1136 if (kev->filter + EVFILT_SYSCOUNT < 0) 1137 return (EINVAL); 1138 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1139 } 1140 1141 if (fops == NULL) { 1142 /* 1143 * XXX 1144 * filter attach routine is responsible for ensuring that 1145 * the identifier can be attached to it. 1146 */ 1147 return (EINVAL); 1148 } 1149 1150 if (fops->f_flags & FILTEROP_ISFD) { 1151 /* validate descriptor */ 1152 if (kev->ident > INT_MAX) 1153 return (EBADF); 1154 } 1155 1156 if (kev->flags & EV_ADD) 1157 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1158 1159 again: 1160 if (fops->f_flags & FILTEROP_ISFD) { 1161 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1162 error = EBADF; 1163 goto done; 1164 } 1165 mtx_enter(&kq->kq_lock); 1166 if (kev->flags & EV_ADD) 1167 kqueue_expand_list(kq, kev->ident); 1168 if (kev->ident < kq->kq_knlistsize) 1169 list = &kq->kq_knlist[kev->ident]; 1170 } else { 1171 mtx_enter(&kq->kq_lock); 1172 if (kev->flags & EV_ADD) 1173 kqueue_expand_hash(kq); 1174 if (kq->kq_knhashmask != 0) { 1175 list = &kq->kq_knhash[ 1176 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1177 } 1178 } 1179 if (list != NULL) { 1180 SLIST_FOREACH(kn, list, kn_link) { 1181 if (kev->filter == kn->kn_filter && 1182 kev->ident == kn->kn_id && 1183 pollid == kn->kn_pollid) { 1184 if (!knote_acquire(kn, NULL, 0)) { 1185 /* knote_acquire() has released 1186 * kq_lock. */ 1187 if (fp != NULL) { 1188 FRELE(fp, p); 1189 fp = NULL; 1190 } 1191 goto again; 1192 } 1193 break; 1194 } 1195 } 1196 } 1197 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1198 1199 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1200 mtx_leave(&kq->kq_lock); 1201 error = ENOENT; 1202 goto done; 1203 } 1204 1205 /* 1206 * kn now contains the matching knote, or NULL if no match. 1207 */ 1208 if (kev->flags & EV_ADD) { 1209 if (kn == NULL) { 1210 kn = newkn; 1211 newkn = NULL; 1212 kn->kn_status = KN_PROCESSING; 1213 kn->kn_fp = fp; 1214 kn->kn_kq = kq; 1215 kn->kn_fop = fops; 1216 1217 /* 1218 * apply reference count to knote structure, and 1219 * do not release it at the end of this routine. 1220 */ 1221 fp = NULL; 1222 1223 kn->kn_sfflags = kev->fflags; 1224 kn->kn_sdata = kev->data; 1225 kev->fflags = 0; 1226 kev->data = 0; 1227 kn->kn_kevent = *kev; 1228 kn->kn_pollid = pollid; 1229 1230 knote_attach(kn); 1231 mtx_leave(&kq->kq_lock); 1232 1233 error = filter_attach(kn); 1234 if (error != 0) { 1235 knote_drop(kn, p); 1236 goto done; 1237 } 1238 1239 /* 1240 * If this is a file descriptor filter, check if 1241 * fd was closed while the knote was being added. 1242 * knote_fdclose() has missed kn if the function 1243 * ran before kn appeared in kq_knlist. 1244 */ 1245 if ((fops->f_flags & FILTEROP_ISFD) && 1246 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1247 /* 1248 * Drop the knote silently without error 1249 * because another thread might already have 1250 * seen it. This corresponds to the insert 1251 * happening in full before the close. 1252 */ 1253 filter_detach(kn); 1254 knote_drop(kn, p); 1255 goto done; 1256 } 1257 1258 /* Check if there is a pending event. */ 1259 active = filter_process(kn, NULL); 1260 mtx_enter(&kq->kq_lock); 1261 if (active) 1262 knote_activate(kn); 1263 } else if (kn->kn_fop == &badfd_filtops) { 1264 /* 1265 * Nothing expects this badfd knote any longer. 1266 * Drop it to make room for the new knote and retry. 1267 */ 1268 KASSERT(kq == p->p_kq); 1269 mtx_leave(&kq->kq_lock); 1270 filter_detach(kn); 1271 knote_drop(kn, p); 1272 1273 KASSERT(fp != NULL); 1274 FRELE(fp, p); 1275 fp = NULL; 1276 1277 goto again; 1278 } else { 1279 /* 1280 * The user may change some filter values after the 1281 * initial EV_ADD, but doing so will not reset any 1282 * filters which have already been triggered. 1283 */ 1284 mtx_leave(&kq->kq_lock); 1285 active = filter_modify(kev, kn); 1286 mtx_enter(&kq->kq_lock); 1287 if (active) 1288 knote_activate(kn); 1289 if (kev->flags & EV_ERROR) { 1290 error = kev->data; 1291 goto release; 1292 } 1293 } 1294 } else if (kev->flags & EV_DELETE) { 1295 mtx_leave(&kq->kq_lock); 1296 filter_detach(kn); 1297 knote_drop(kn, p); 1298 goto done; 1299 } 1300 1301 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1302 kn->kn_status |= KN_DISABLED; 1303 1304 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1305 kn->kn_status &= ~KN_DISABLED; 1306 mtx_leave(&kq->kq_lock); 1307 /* Check if there is a pending event. */ 1308 active = filter_process(kn, NULL); 1309 mtx_enter(&kq->kq_lock); 1310 if (active) 1311 knote_activate(kn); 1312 } 1313 1314 release: 1315 knote_release(kn); 1316 mtx_leave(&kq->kq_lock); 1317 done: 1318 if (fp != NULL) 1319 FRELE(fp, p); 1320 if (newkn != NULL) 1321 pool_put(&knote_pool, newkn); 1322 return (error); 1323 } 1324 1325 int 1326 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1327 { 1328 struct timespec elapsed, start, stop; 1329 uint64_t nsecs; 1330 int error; 1331 1332 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1333 1334 if (tsp != NULL) { 1335 getnanouptime(&start); 1336 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1337 } else 1338 nsecs = INFSLP; 1339 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1340 "kqread", nsecs); 1341 if (tsp != NULL) { 1342 getnanouptime(&stop); 1343 timespecsub(&stop, &start, &elapsed); 1344 timespecsub(tsp, &elapsed, tsp); 1345 if (tsp->tv_sec < 0) 1346 timespecclear(tsp); 1347 } 1348 1349 return (error); 1350 } 1351 1352 /* 1353 * Scan the kqueue, blocking if necessary until the target time is reached. 1354 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1355 * 0 we do not block at all. 1356 */ 1357 int 1358 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1359 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1360 { 1361 struct kqueue *kq = scan->kqs_kq; 1362 struct knote *kn; 1363 int error = 0, nkev = 0; 1364 int reinserted; 1365 1366 if (maxevents == 0) 1367 goto done; 1368 retry: 1369 KASSERT(nkev == 0); 1370 1371 error = 0; 1372 reinserted = 0; 1373 1374 mtx_enter(&kq->kq_lock); 1375 1376 if (kq->kq_state & KQ_DYING) { 1377 mtx_leave(&kq->kq_lock); 1378 error = EBADF; 1379 goto done; 1380 } 1381 1382 if (kq->kq_count == 0) { 1383 /* 1384 * Successive loops are only necessary if there are more 1385 * ready events to gather, so they don't need to block. 1386 */ 1387 if ((tsp != NULL && !timespecisset(tsp)) || 1388 scan->kqs_nevent != 0) { 1389 mtx_leave(&kq->kq_lock); 1390 error = 0; 1391 goto done; 1392 } 1393 kq->kq_state |= KQ_SLEEP; 1394 error = kqueue_sleep(kq, tsp); 1395 /* kqueue_sleep() has released kq_lock. */ 1396 if (error == 0 || error == EWOULDBLOCK) 1397 goto retry; 1398 /* don't restart after signals... */ 1399 if (error == ERESTART) 1400 error = EINTR; 1401 goto done; 1402 } 1403 1404 /* 1405 * Put the end marker in the queue to limit the scan to the events 1406 * that are currently active. This prevents events from being 1407 * recollected if they reactivate during scan. 1408 * 1409 * If a partial scan has been performed already but no events have 1410 * been collected, reposition the end marker to make any new events 1411 * reachable. 1412 */ 1413 if (!scan->kqs_queued) { 1414 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1415 scan->kqs_queued = 1; 1416 } else if (scan->kqs_nevent == 0) { 1417 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1418 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1419 } 1420 1421 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1422 while (nkev < maxevents) { 1423 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1424 if (kn->kn_filter == EVFILT_MARKER) { 1425 if (kn == &scan->kqs_end) 1426 break; 1427 1428 /* Move start marker past another thread's marker. */ 1429 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1430 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1431 kn_tqe); 1432 continue; 1433 } 1434 1435 if (!knote_acquire(kn, NULL, 0)) { 1436 /* knote_acquire() has released kq_lock. */ 1437 mtx_enter(&kq->kq_lock); 1438 continue; 1439 } 1440 1441 kqueue_check(kq); 1442 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1443 kn->kn_status &= ~KN_QUEUED; 1444 kq->kq_count--; 1445 kqueue_check(kq); 1446 1447 if (kn->kn_status & KN_DISABLED) { 1448 knote_release(kn); 1449 continue; 1450 } 1451 1452 mtx_leave(&kq->kq_lock); 1453 1454 /* Drop expired kqpoll knotes. */ 1455 if (p->p_kq == kq && 1456 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1457 filter_detach(kn); 1458 knote_drop(kn, p); 1459 mtx_enter(&kq->kq_lock); 1460 continue; 1461 } 1462 1463 /* 1464 * Invalidate knotes whose vnodes have been revoked. 1465 * This is a workaround; it is tricky to clear existing 1466 * knotes and prevent new ones from being registered 1467 * with the current revocation mechanism. 1468 */ 1469 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1470 kn->kn_fp != NULL && 1471 kn->kn_fp->f_type == DTYPE_VNODE) { 1472 struct vnode *vp = kn->kn_fp->f_data; 1473 1474 if (__predict_false(vp->v_op == &dead_vops && 1475 kn->kn_fop != &dead_filtops)) { 1476 filter_detach(kn); 1477 kn->kn_fop = &dead_filtops; 1478 1479 /* 1480 * Check if the event should be delivered. 1481 * Use f_event directly because this is 1482 * a special situation. 1483 */ 1484 if (kn->kn_fop->f_event(kn, 0) == 0) { 1485 filter_detach(kn); 1486 knote_drop(kn, p); 1487 mtx_enter(&kq->kq_lock); 1488 continue; 1489 } 1490 } 1491 } 1492 1493 memset(kevp, 0, sizeof(*kevp)); 1494 if (filter_process(kn, kevp) == 0) { 1495 mtx_enter(&kq->kq_lock); 1496 if ((kn->kn_status & KN_QUEUED) == 0) 1497 kn->kn_status &= ~KN_ACTIVE; 1498 knote_release(kn); 1499 kqueue_check(kq); 1500 continue; 1501 } 1502 1503 /* 1504 * Post-event action on the note 1505 */ 1506 if (kevp->flags & EV_ONESHOT) { 1507 filter_detach(kn); 1508 knote_drop(kn, p); 1509 mtx_enter(&kq->kq_lock); 1510 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1511 mtx_enter(&kq->kq_lock); 1512 if (kevp->flags & EV_DISPATCH) 1513 kn->kn_status |= KN_DISABLED; 1514 if ((kn->kn_status & KN_QUEUED) == 0) 1515 kn->kn_status &= ~KN_ACTIVE; 1516 knote_release(kn); 1517 } else { 1518 mtx_enter(&kq->kq_lock); 1519 if ((kn->kn_status & KN_QUEUED) == 0) { 1520 kqueue_check(kq); 1521 kq->kq_count++; 1522 kn->kn_status |= KN_QUEUED; 1523 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1524 /* Wakeup is done after loop. */ 1525 reinserted = 1; 1526 } 1527 knote_release(kn); 1528 } 1529 kqueue_check(kq); 1530 1531 kevp++; 1532 nkev++; 1533 scan->kqs_nevent++; 1534 } 1535 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1536 if (reinserted && kq->kq_count != 0) 1537 kqueue_wakeup(kq); 1538 mtx_leave(&kq->kq_lock); 1539 if (scan->kqs_nevent == 0) 1540 goto retry; 1541 done: 1542 *errorp = error; 1543 return (nkev); 1544 } 1545 1546 void 1547 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1548 { 1549 memset(scan, 0, sizeof(*scan)); 1550 1551 KQREF(kq); 1552 scan->kqs_kq = kq; 1553 scan->kqs_start.kn_filter = EVFILT_MARKER; 1554 scan->kqs_start.kn_status = KN_PROCESSING; 1555 scan->kqs_end.kn_filter = EVFILT_MARKER; 1556 scan->kqs_end.kn_status = KN_PROCESSING; 1557 } 1558 1559 void 1560 kqueue_scan_finish(struct kqueue_scan_state *scan) 1561 { 1562 struct kqueue *kq = scan->kqs_kq; 1563 1564 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1565 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1566 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1567 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1568 1569 if (scan->kqs_queued) { 1570 scan->kqs_queued = 0; 1571 mtx_enter(&kq->kq_lock); 1572 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1573 mtx_leave(&kq->kq_lock); 1574 } 1575 KQRELE(kq); 1576 } 1577 1578 /* 1579 * XXX 1580 * This could be expanded to call kqueue_scan, if desired. 1581 */ 1582 int 1583 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1584 { 1585 return (ENXIO); 1586 } 1587 1588 int 1589 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1590 { 1591 return (ENXIO); 1592 } 1593 1594 int 1595 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1596 { 1597 return (ENOTTY); 1598 } 1599 1600 int 1601 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1602 { 1603 struct kqueue *kq = fp->f_data; 1604 1605 memset(st, 0, sizeof(*st)); 1606 st->st_size = kq->kq_count; /* unlocked read */ 1607 st->st_blksize = sizeof(struct kevent); 1608 st->st_mode = S_IFIFO; 1609 return (0); 1610 } 1611 1612 void 1613 kqueue_purge(struct proc *p, struct kqueue *kq) 1614 { 1615 int i; 1616 1617 mtx_enter(&kq->kq_lock); 1618 for (i = 0; i < kq->kq_knlistsize; i++) 1619 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1620 if (kq->kq_knhashmask != 0) { 1621 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1622 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1623 } 1624 mtx_leave(&kq->kq_lock); 1625 } 1626 1627 void 1628 kqueue_terminate(struct proc *p, struct kqueue *kq) 1629 { 1630 struct knote *kn; 1631 int state; 1632 1633 mtx_enter(&kq->kq_lock); 1634 1635 /* 1636 * Any remaining entries should be scan markers. 1637 * They are removed when the ongoing scans finish. 1638 */ 1639 KASSERT(kq->kq_count == 0); 1640 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1641 KASSERT(kn->kn_filter == EVFILT_MARKER); 1642 1643 kq->kq_state |= KQ_DYING; 1644 state = kq->kq_state; 1645 kqueue_wakeup(kq); 1646 mtx_leave(&kq->kq_lock); 1647 1648 /* 1649 * Any knotes that were attached to this kqueue were deleted 1650 * by knote_fdclose() when this kqueue's file descriptor was closed. 1651 */ 1652 KASSERT(klist_empty(&kq->kq_klist)); 1653 if (state & KQ_TASK) 1654 taskq_del_barrier(systqmp, &kq->kq_task); 1655 } 1656 1657 int 1658 kqueue_close(struct file *fp, struct proc *p) 1659 { 1660 struct kqueue *kq = fp->f_data; 1661 1662 fp->f_data = NULL; 1663 1664 kqueue_purge(p, kq); 1665 kqueue_terminate(p, kq); 1666 1667 KQRELE(kq); 1668 1669 return (0); 1670 } 1671 1672 static void 1673 kqueue_task(void *arg) 1674 { 1675 struct kqueue *kq = arg; 1676 1677 knote(&kq->kq_klist, 0); 1678 } 1679 1680 void 1681 kqueue_wakeup(struct kqueue *kq) 1682 { 1683 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1684 1685 if (kq->kq_state & KQ_SLEEP) { 1686 kq->kq_state &= ~KQ_SLEEP; 1687 wakeup(kq); 1688 } 1689 if (!klist_empty(&kq->kq_klist)) { 1690 /* Defer activation to avoid recursion. */ 1691 kq->kq_state |= KQ_TASK; 1692 task_add(systqmp, &kq->kq_task); 1693 } 1694 } 1695 1696 static void 1697 kqueue_expand_hash(struct kqueue *kq) 1698 { 1699 struct knlist *hash; 1700 u_long hashmask; 1701 1702 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1703 1704 if (kq->kq_knhashmask == 0) { 1705 mtx_leave(&kq->kq_lock); 1706 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1707 mtx_enter(&kq->kq_lock); 1708 if (kq->kq_knhashmask == 0) { 1709 kq->kq_knhash = hash; 1710 kq->kq_knhashmask = hashmask; 1711 } else { 1712 /* Another thread has allocated the hash. */ 1713 mtx_leave(&kq->kq_lock); 1714 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1715 mtx_enter(&kq->kq_lock); 1716 } 1717 } 1718 } 1719 1720 static void 1721 kqueue_expand_list(struct kqueue *kq, int fd) 1722 { 1723 struct knlist *list, *olist; 1724 int size, osize; 1725 1726 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1727 1728 if (kq->kq_knlistsize <= fd) { 1729 size = kq->kq_knlistsize; 1730 mtx_leave(&kq->kq_lock); 1731 while (size <= fd) 1732 size += KQEXTENT; 1733 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1734 mtx_enter(&kq->kq_lock); 1735 if (kq->kq_knlistsize <= fd) { 1736 memcpy(list, kq->kq_knlist, 1737 kq->kq_knlistsize * sizeof(*list)); 1738 memset(&list[kq->kq_knlistsize], 0, 1739 (size - kq->kq_knlistsize) * sizeof(*list)); 1740 olist = kq->kq_knlist; 1741 osize = kq->kq_knlistsize; 1742 kq->kq_knlist = list; 1743 kq->kq_knlistsize = size; 1744 mtx_leave(&kq->kq_lock); 1745 free(olist, M_KEVENT, osize * sizeof(*list)); 1746 mtx_enter(&kq->kq_lock); 1747 } else { 1748 /* Another thread has expanded the list. */ 1749 mtx_leave(&kq->kq_lock); 1750 free(list, M_KEVENT, size * sizeof(*list)); 1751 mtx_enter(&kq->kq_lock); 1752 } 1753 } 1754 } 1755 1756 /* 1757 * Acquire a knote, return non-zero on success, 0 on failure. 1758 * 1759 * If we cannot acquire the knote we sleep and return 0. The knote 1760 * may be stale on return in this case and the caller must restart 1761 * whatever loop they are in. 1762 * 1763 * If we are about to sleep and klist is non-NULL, the list is unlocked 1764 * before sleep and remains unlocked on return. 1765 */ 1766 int 1767 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1768 { 1769 struct kqueue *kq = kn->kn_kq; 1770 1771 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1772 KASSERT(kn->kn_filter != EVFILT_MARKER); 1773 1774 if (kn->kn_status & KN_PROCESSING) { 1775 kn->kn_status |= KN_WAITING; 1776 if (klist != NULL) { 1777 mtx_leave(&kq->kq_lock); 1778 klist_unlock(klist, ls); 1779 /* XXX Timeout resolves potential loss of wakeup. */ 1780 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1781 } else { 1782 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1783 SEC_TO_NSEC(1)); 1784 } 1785 /* knote may be stale now */ 1786 return (0); 1787 } 1788 kn->kn_status |= KN_PROCESSING; 1789 return (1); 1790 } 1791 1792 /* 1793 * Release an acquired knote, clearing KN_PROCESSING. 1794 */ 1795 void 1796 knote_release(struct knote *kn) 1797 { 1798 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1799 KASSERT(kn->kn_filter != EVFILT_MARKER); 1800 KASSERT(kn->kn_status & KN_PROCESSING); 1801 1802 if (kn->kn_status & KN_WAITING) { 1803 kn->kn_status &= ~KN_WAITING; 1804 wakeup(kn); 1805 } 1806 kn->kn_status &= ~KN_PROCESSING; 1807 /* kn should not be accessed anymore */ 1808 } 1809 1810 /* 1811 * activate one knote. 1812 */ 1813 void 1814 knote_activate(struct knote *kn) 1815 { 1816 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1817 1818 kn->kn_status |= KN_ACTIVE; 1819 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1820 knote_enqueue(kn); 1821 } 1822 1823 /* 1824 * walk down a list of knotes, activating them if their event has triggered. 1825 */ 1826 void 1827 knote(struct klist *list, long hint) 1828 { 1829 int ls; 1830 1831 ls = klist_lock(list); 1832 knote_locked(list, hint); 1833 klist_unlock(list, ls); 1834 } 1835 1836 void 1837 knote_locked(struct klist *list, long hint) 1838 { 1839 struct knote *kn, *kn0; 1840 struct kqueue *kq; 1841 1842 KLIST_ASSERT_LOCKED(list); 1843 1844 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1845 if (filter_event(kn, hint)) { 1846 kq = kn->kn_kq; 1847 mtx_enter(&kq->kq_lock); 1848 knote_activate(kn); 1849 mtx_leave(&kq->kq_lock); 1850 } 1851 } 1852 } 1853 1854 /* 1855 * remove all knotes from a specified knlist 1856 */ 1857 void 1858 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1859 int purge) 1860 { 1861 struct knote *kn; 1862 1863 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1864 1865 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1866 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1867 KASSERT(kn->kn_kq == kq); 1868 1869 if (!purge) { 1870 /* Skip pending badfd knotes. */ 1871 while (kn->kn_fop == &badfd_filtops) { 1872 kn = SLIST_NEXT(kn, kn_link); 1873 if (kn == NULL) 1874 return; 1875 KASSERT(kn->kn_kq == kq); 1876 } 1877 } 1878 1879 if (!knote_acquire(kn, NULL, 0)) { 1880 /* knote_acquire() has released kq_lock. */ 1881 mtx_enter(&kq->kq_lock); 1882 continue; 1883 } 1884 mtx_leave(&kq->kq_lock); 1885 filter_detach(kn); 1886 1887 /* 1888 * Notify poll(2) and select(2) when a monitored 1889 * file descriptor is closed. 1890 * 1891 * This reuses the original knote for delivering the 1892 * notification so as to avoid allocating memory. 1893 */ 1894 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1895 !(p->p_kq == kq && 1896 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1897 kn->kn_fop != &badfd_filtops) { 1898 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1899 FRELE(kn->kn_fp, p); 1900 kn->kn_fp = NULL; 1901 1902 kn->kn_fop = &badfd_filtops; 1903 filter_event(kn, 0); 1904 mtx_enter(&kq->kq_lock); 1905 knote_activate(kn); 1906 knote_release(kn); 1907 continue; 1908 } 1909 1910 knote_drop(kn, p); 1911 mtx_enter(&kq->kq_lock); 1912 } 1913 } 1914 1915 /* 1916 * remove all knotes referencing a specified fd 1917 */ 1918 void 1919 knote_fdclose(struct proc *p, int fd) 1920 { 1921 struct filedesc *fdp = p->p_p->ps_fd; 1922 struct kqueue *kq; 1923 1924 /* 1925 * fdplock can be ignored if the file descriptor table is being freed 1926 * because no other thread can access the fdp. 1927 */ 1928 if (fdp->fd_refcnt != 0) 1929 fdpassertlocked(fdp); 1930 1931 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1932 mtx_enter(&kq->kq_lock); 1933 if (fd < kq->kq_knlistsize) 1934 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1935 mtx_leave(&kq->kq_lock); 1936 } 1937 } 1938 1939 /* 1940 * handle a process exiting, including the triggering of NOTE_EXIT notes 1941 * XXX this could be more efficient, doing a single pass down the klist 1942 */ 1943 void 1944 knote_processexit(struct process *pr) 1945 { 1946 KERNEL_ASSERT_LOCKED(); 1947 1948 knote_locked(&pr->ps_klist, NOTE_EXIT); 1949 1950 /* remove other knotes hanging off the process */ 1951 klist_invalidate(&pr->ps_klist); 1952 } 1953 1954 void 1955 knote_attach(struct knote *kn) 1956 { 1957 struct kqueue *kq = kn->kn_kq; 1958 struct knlist *list; 1959 1960 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1961 KASSERT(kn->kn_status & KN_PROCESSING); 1962 1963 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1964 KASSERT(kq->kq_knlistsize > kn->kn_id); 1965 list = &kq->kq_knlist[kn->kn_id]; 1966 } else { 1967 KASSERT(kq->kq_knhashmask != 0); 1968 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1969 } 1970 SLIST_INSERT_HEAD(list, kn, kn_link); 1971 kq->kq_nknotes++; 1972 } 1973 1974 void 1975 knote_detach(struct knote *kn) 1976 { 1977 struct kqueue *kq = kn->kn_kq; 1978 struct knlist *list; 1979 1980 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1981 KASSERT(kn->kn_status & KN_PROCESSING); 1982 1983 kq->kq_nknotes--; 1984 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1985 list = &kq->kq_knlist[kn->kn_id]; 1986 else 1987 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1988 SLIST_REMOVE(list, kn, knote, kn_link); 1989 } 1990 1991 /* 1992 * should be called at spl == 0, since we don't want to hold spl 1993 * while calling FRELE and pool_put. 1994 */ 1995 void 1996 knote_drop(struct knote *kn, struct proc *p) 1997 { 1998 struct kqueue *kq = kn->kn_kq; 1999 2000 KASSERT(kn->kn_filter != EVFILT_MARKER); 2001 2002 mtx_enter(&kq->kq_lock); 2003 knote_detach(kn); 2004 if (kn->kn_status & KN_QUEUED) 2005 knote_dequeue(kn); 2006 if (kn->kn_status & KN_WAITING) { 2007 kn->kn_status &= ~KN_WAITING; 2008 wakeup(kn); 2009 } 2010 mtx_leave(&kq->kq_lock); 2011 2012 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 2013 FRELE(kn->kn_fp, p); 2014 pool_put(&knote_pool, kn); 2015 } 2016 2017 2018 void 2019 knote_enqueue(struct knote *kn) 2020 { 2021 struct kqueue *kq = kn->kn_kq; 2022 2023 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2024 KASSERT(kn->kn_filter != EVFILT_MARKER); 2025 KASSERT((kn->kn_status & KN_QUEUED) == 0); 2026 2027 kqueue_check(kq); 2028 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 2029 kn->kn_status |= KN_QUEUED; 2030 kq->kq_count++; 2031 kqueue_check(kq); 2032 kqueue_wakeup(kq); 2033 } 2034 2035 void 2036 knote_dequeue(struct knote *kn) 2037 { 2038 struct kqueue *kq = kn->kn_kq; 2039 2040 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2041 KASSERT(kn->kn_filter != EVFILT_MARKER); 2042 KASSERT(kn->kn_status & KN_QUEUED); 2043 2044 kqueue_check(kq); 2045 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 2046 kn->kn_status &= ~KN_QUEUED; 2047 kq->kq_count--; 2048 kqueue_check(kq); 2049 } 2050 2051 /* 2052 * Assign parameters to the knote. 2053 * 2054 * The knote's object lock must be held. 2055 */ 2056 void 2057 knote_assign(const struct kevent *kev, struct knote *kn) 2058 { 2059 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2060 KERNEL_ASSERT_LOCKED(); 2061 2062 kn->kn_sfflags = kev->fflags; 2063 kn->kn_sdata = kev->data; 2064 kn->kn_udata = kev->udata; 2065 } 2066 2067 /* 2068 * Submit the knote's event for delivery. 2069 * 2070 * The knote's object lock must be held. 2071 */ 2072 void 2073 knote_submit(struct knote *kn, struct kevent *kev) 2074 { 2075 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2076 KERNEL_ASSERT_LOCKED(); 2077 2078 if (kev != NULL) { 2079 *kev = kn->kn_kevent; 2080 if (kn->kn_flags & EV_CLEAR) { 2081 kn->kn_fflags = 0; 2082 kn->kn_data = 0; 2083 } 2084 } 2085 } 2086 2087 void 2088 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2089 { 2090 SLIST_INIT(&klist->kl_list); 2091 klist->kl_ops = ops; 2092 klist->kl_arg = arg; 2093 } 2094 2095 void 2096 klist_free(struct klist *klist) 2097 { 2098 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2099 } 2100 2101 void 2102 klist_insert(struct klist *klist, struct knote *kn) 2103 { 2104 int ls; 2105 2106 ls = klist_lock(klist); 2107 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2108 klist_unlock(klist, ls); 2109 } 2110 2111 void 2112 klist_insert_locked(struct klist *klist, struct knote *kn) 2113 { 2114 KLIST_ASSERT_LOCKED(klist); 2115 2116 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2117 } 2118 2119 void 2120 klist_remove(struct klist *klist, struct knote *kn) 2121 { 2122 int ls; 2123 2124 ls = klist_lock(klist); 2125 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2126 klist_unlock(klist, ls); 2127 } 2128 2129 void 2130 klist_remove_locked(struct klist *klist, struct knote *kn) 2131 { 2132 KLIST_ASSERT_LOCKED(klist); 2133 2134 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2135 } 2136 2137 /* 2138 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2139 * 2140 * The caller of this function must not hold any locks that can block 2141 * filterops callbacks that run with KN_PROCESSING. 2142 * Otherwise this function might deadlock. 2143 */ 2144 void 2145 klist_invalidate(struct klist *list) 2146 { 2147 struct knote *kn; 2148 struct kqueue *kq; 2149 struct proc *p = curproc; 2150 int ls; 2151 2152 NET_ASSERT_UNLOCKED(); 2153 2154 ls = klist_lock(list); 2155 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2156 kq = kn->kn_kq; 2157 mtx_enter(&kq->kq_lock); 2158 if (!knote_acquire(kn, list, ls)) { 2159 /* knote_acquire() has released kq_lock 2160 * and klist lock. */ 2161 ls = klist_lock(list); 2162 continue; 2163 } 2164 mtx_leave(&kq->kq_lock); 2165 klist_unlock(list, ls); 2166 filter_detach(kn); 2167 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2168 kn->kn_fop = &dead_filtops; 2169 filter_event(kn, 0); 2170 mtx_enter(&kq->kq_lock); 2171 knote_activate(kn); 2172 knote_release(kn); 2173 mtx_leave(&kq->kq_lock); 2174 } else { 2175 knote_drop(kn, p); 2176 } 2177 ls = klist_lock(list); 2178 } 2179 klist_unlock(list, ls); 2180 } 2181 2182 static int 2183 klist_lock(struct klist *list) 2184 { 2185 int ls = 0; 2186 2187 if (list->kl_ops != NULL) { 2188 ls = list->kl_ops->klo_lock(list->kl_arg); 2189 } else { 2190 KERNEL_LOCK(); 2191 ls = splhigh(); 2192 } 2193 return ls; 2194 } 2195 2196 static void 2197 klist_unlock(struct klist *list, int ls) 2198 { 2199 if (list->kl_ops != NULL) { 2200 list->kl_ops->klo_unlock(list->kl_arg, ls); 2201 } else { 2202 splx(ls); 2203 KERNEL_UNLOCK(); 2204 } 2205 } 2206 2207 static void 2208 klist_mutex_assertlk(void *arg) 2209 { 2210 struct mutex *mtx = arg; 2211 2212 (void)mtx; 2213 2214 MUTEX_ASSERT_LOCKED(mtx); 2215 } 2216 2217 static int 2218 klist_mutex_lock(void *arg) 2219 { 2220 struct mutex *mtx = arg; 2221 2222 mtx_enter(mtx); 2223 return 0; 2224 } 2225 2226 static void 2227 klist_mutex_unlock(void *arg, int s) 2228 { 2229 struct mutex *mtx = arg; 2230 2231 mtx_leave(mtx); 2232 } 2233 2234 static const struct klistops mutex_klistops = { 2235 .klo_assertlk = klist_mutex_assertlk, 2236 .klo_lock = klist_mutex_lock, 2237 .klo_unlock = klist_mutex_unlock, 2238 }; 2239 2240 void 2241 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2242 { 2243 klist_init(klist, &mutex_klistops, mtx); 2244 } 2245 2246 static void 2247 klist_rwlock_assertlk(void *arg) 2248 { 2249 struct rwlock *rwl = arg; 2250 2251 (void)rwl; 2252 2253 rw_assert_wrlock(rwl); 2254 } 2255 2256 static int 2257 klist_rwlock_lock(void *arg) 2258 { 2259 struct rwlock *rwl = arg; 2260 2261 rw_enter_write(rwl); 2262 return 0; 2263 } 2264 2265 static void 2266 klist_rwlock_unlock(void *arg, int s) 2267 { 2268 struct rwlock *rwl = arg; 2269 2270 rw_exit_write(rwl); 2271 } 2272 2273 static const struct klistops rwlock_klistops = { 2274 .klo_assertlk = klist_rwlock_assertlk, 2275 .klo_lock = klist_rwlock_lock, 2276 .klo_unlock = klist_rwlock_unlock, 2277 }; 2278 2279 void 2280 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2281 { 2282 klist_init(klist, &rwlock_klistops, rwl); 2283 } 2284