1 /* $OpenBSD: kern_event.c,v 1.160 2021/01/27 02:58:03 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void kqueue_init(void); 74 void KQREF(struct kqueue *); 75 void KQRELE(struct kqueue *); 76 77 int kqueue_sleep(struct kqueue *, struct timespec *); 78 79 int kqueue_read(struct file *, struct uio *, int); 80 int kqueue_write(struct file *, struct uio *, int); 81 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct proc *p); 83 int kqueue_poll(struct file *fp, int events, struct proc *p); 84 int kqueue_kqfilter(struct file *fp, struct knote *kn); 85 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 86 int kqueue_close(struct file *fp, struct proc *p); 87 void kqueue_wakeup(struct kqueue *kq); 88 89 #ifdef KQUEUE_DEBUG 90 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 91 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 92 #else 93 #define kqueue_check(kq) do {} while (0) 94 #endif 95 96 void kqpoll_dequeue(struct proc *p); 97 98 static void kqueue_expand_hash(struct kqueue *kq); 99 static void kqueue_expand_list(struct kqueue *kq, int fd); 100 static void kqueue_task(void *); 101 static int klist_lock(struct klist *); 102 static void klist_unlock(struct klist *, int); 103 104 const struct fileops kqueueops = { 105 .fo_read = kqueue_read, 106 .fo_write = kqueue_write, 107 .fo_ioctl = kqueue_ioctl, 108 .fo_poll = kqueue_poll, 109 .fo_kqfilter = kqueue_kqfilter, 110 .fo_stat = kqueue_stat, 111 .fo_close = kqueue_close 112 }; 113 114 void knote_attach(struct knote *kn); 115 void knote_detach(struct knote *kn); 116 void knote_drop(struct knote *kn, struct proc *p); 117 void knote_enqueue(struct knote *kn); 118 void knote_dequeue(struct knote *kn); 119 int knote_acquire(struct knote *kn, struct klist *, int); 120 void knote_release(struct knote *kn); 121 void knote_activate(struct knote *kn); 122 void knote_remove(struct proc *p, struct knlist *list, int purge); 123 124 void filt_kqdetach(struct knote *kn); 125 int filt_kqueue(struct knote *kn, long hint); 126 int filt_procattach(struct knote *kn); 127 void filt_procdetach(struct knote *kn); 128 int filt_proc(struct knote *kn, long hint); 129 int filt_fileattach(struct knote *kn); 130 void filt_timerexpire(void *knx); 131 int filt_timerattach(struct knote *kn); 132 void filt_timerdetach(struct knote *kn); 133 int filt_timer(struct knote *kn, long hint); 134 void filt_seltruedetach(struct knote *kn); 135 136 const struct filterops kqread_filtops = { 137 .f_flags = FILTEROP_ISFD, 138 .f_attach = NULL, 139 .f_detach = filt_kqdetach, 140 .f_event = filt_kqueue, 141 }; 142 143 const struct filterops proc_filtops = { 144 .f_flags = 0, 145 .f_attach = filt_procattach, 146 .f_detach = filt_procdetach, 147 .f_event = filt_proc, 148 }; 149 150 const struct filterops file_filtops = { 151 .f_flags = FILTEROP_ISFD, 152 .f_attach = filt_fileattach, 153 .f_detach = NULL, 154 .f_event = NULL, 155 }; 156 157 const struct filterops timer_filtops = { 158 .f_flags = 0, 159 .f_attach = filt_timerattach, 160 .f_detach = filt_timerdetach, 161 .f_event = filt_timer, 162 }; 163 164 struct pool knote_pool; 165 struct pool kqueue_pool; 166 int kq_ntimeouts = 0; 167 int kq_timeoutmax = (4 * 1024); 168 169 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 170 171 /* 172 * Table for for all system-defined filters. 173 */ 174 const struct filterops *const sysfilt_ops[] = { 175 &file_filtops, /* EVFILT_READ */ 176 &file_filtops, /* EVFILT_WRITE */ 177 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 178 &file_filtops, /* EVFILT_VNODE */ 179 &proc_filtops, /* EVFILT_PROC */ 180 &sig_filtops, /* EVFILT_SIGNAL */ 181 &timer_filtops, /* EVFILT_TIMER */ 182 &file_filtops, /* EVFILT_DEVICE */ 183 &file_filtops, /* EVFILT_EXCEPT */ 184 }; 185 186 void 187 KQREF(struct kqueue *kq) 188 { 189 atomic_inc_int(&kq->kq_refs); 190 } 191 192 void 193 KQRELE(struct kqueue *kq) 194 { 195 struct filedesc *fdp; 196 197 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 198 return; 199 200 fdp = kq->kq_fdp; 201 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 202 LIST_REMOVE(kq, kq_next); 203 } else { 204 fdplock(fdp); 205 LIST_REMOVE(kq, kq_next); 206 fdpunlock(fdp); 207 } 208 209 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 210 211 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 212 sizeof(struct knlist)); 213 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 214 pool_put(&kqueue_pool, kq); 215 } 216 217 void 218 kqueue_init(void) 219 { 220 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 221 PR_WAITOK, "kqueuepl", NULL); 222 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 223 PR_WAITOK, "knotepl", NULL); 224 } 225 226 int 227 filt_fileattach(struct knote *kn) 228 { 229 struct file *fp = kn->kn_fp; 230 231 return fp->f_ops->fo_kqfilter(fp, kn); 232 } 233 234 int 235 kqueue_kqfilter(struct file *fp, struct knote *kn) 236 { 237 struct kqueue *kq = kn->kn_fp->f_data; 238 239 if (kn->kn_filter != EVFILT_READ) 240 return (EINVAL); 241 242 kn->kn_fop = &kqread_filtops; 243 klist_insert_locked(&kq->kq_sel.si_note, kn); 244 return (0); 245 } 246 247 void 248 filt_kqdetach(struct knote *kn) 249 { 250 struct kqueue *kq = kn->kn_fp->f_data; 251 252 klist_remove_locked(&kq->kq_sel.si_note, kn); 253 } 254 255 int 256 filt_kqueue(struct knote *kn, long hint) 257 { 258 struct kqueue *kq = kn->kn_fp->f_data; 259 260 kn->kn_data = kq->kq_count; 261 return (kn->kn_data > 0); 262 } 263 264 int 265 filt_procattach(struct knote *kn) 266 { 267 struct process *pr; 268 int s; 269 270 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 271 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 272 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 273 274 if (kn->kn_id > PID_MAX) 275 return ESRCH; 276 277 pr = prfind(kn->kn_id); 278 if (pr == NULL) 279 return (ESRCH); 280 281 /* exiting processes can't be specified */ 282 if (pr->ps_flags & PS_EXITING) 283 return (ESRCH); 284 285 kn->kn_ptr.p_process = pr; 286 kn->kn_flags |= EV_CLEAR; /* automatically set */ 287 288 /* 289 * internal flag indicating registration done by kernel 290 */ 291 if (kn->kn_flags & EV_FLAG1) { 292 kn->kn_data = kn->kn_sdata; /* ppid */ 293 kn->kn_fflags = NOTE_CHILD; 294 kn->kn_flags &= ~EV_FLAG1; 295 } 296 297 s = splhigh(); 298 klist_insert_locked(&pr->ps_klist, kn); 299 splx(s); 300 301 return (0); 302 } 303 304 /* 305 * The knote may be attached to a different process, which may exit, 306 * leaving nothing for the knote to be attached to. So when the process 307 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 308 * it will be deleted when read out. However, as part of the knote deletion, 309 * this routine is called, so a check is needed to avoid actually performing 310 * a detach, because the original process does not exist any more. 311 */ 312 void 313 filt_procdetach(struct knote *kn) 314 { 315 struct process *pr = kn->kn_ptr.p_process; 316 int s; 317 318 if (kn->kn_status & KN_DETACHED) 319 return; 320 321 s = splhigh(); 322 klist_remove_locked(&pr->ps_klist, kn); 323 splx(s); 324 } 325 326 int 327 filt_proc(struct knote *kn, long hint) 328 { 329 u_int event; 330 331 /* 332 * mask off extra data 333 */ 334 event = (u_int)hint & NOTE_PCTRLMASK; 335 336 /* 337 * if the user is interested in this event, record it. 338 */ 339 if (kn->kn_sfflags & event) 340 kn->kn_fflags |= event; 341 342 /* 343 * process is gone, so flag the event as finished and remove it 344 * from the process's klist 345 */ 346 if (event == NOTE_EXIT) { 347 struct process *pr = kn->kn_ptr.p_process; 348 int s; 349 350 s = splhigh(); 351 kn->kn_status |= KN_DETACHED; 352 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 353 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 354 klist_remove_locked(&pr->ps_klist, kn); 355 splx(s); 356 return (1); 357 } 358 359 /* 360 * process forked, and user wants to track the new process, 361 * so attach a new knote to it, and immediately report an 362 * event with the parent's pid. 363 */ 364 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 365 struct kevent kev; 366 int error; 367 368 /* 369 * register knote with new process. 370 */ 371 memset(&kev, 0, sizeof(kev)); 372 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 373 kev.filter = kn->kn_filter; 374 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 375 kev.fflags = kn->kn_sfflags; 376 kev.data = kn->kn_id; /* parent */ 377 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 378 error = kqueue_register(kn->kn_kq, &kev, NULL); 379 if (error) 380 kn->kn_fflags |= NOTE_TRACKERR; 381 } 382 383 return (kn->kn_fflags != 0); 384 } 385 386 static void 387 filt_timer_timeout_add(struct knote *kn) 388 { 389 struct timeval tv; 390 struct timeout *to = kn->kn_hook; 391 int tticks; 392 393 tv.tv_sec = kn->kn_sdata / 1000; 394 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 395 tticks = tvtohz(&tv); 396 /* Remove extra tick from tvtohz() if timeout has fired before. */ 397 if (timeout_triggered(to)) 398 tticks--; 399 timeout_add(to, (tticks > 0) ? tticks : 1); 400 } 401 402 void 403 filt_timerexpire(void *knx) 404 { 405 struct knote *kn = knx; 406 407 kn->kn_data++; 408 knote_activate(kn); 409 410 if ((kn->kn_flags & EV_ONESHOT) == 0) 411 filt_timer_timeout_add(kn); 412 } 413 414 415 /* 416 * data contains amount of time to sleep, in milliseconds 417 */ 418 int 419 filt_timerattach(struct knote *kn) 420 { 421 struct timeout *to; 422 423 if (kq_ntimeouts > kq_timeoutmax) 424 return (ENOMEM); 425 kq_ntimeouts++; 426 427 kn->kn_flags |= EV_CLEAR; /* automatically set */ 428 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 429 timeout_set(to, filt_timerexpire, kn); 430 kn->kn_hook = to; 431 filt_timer_timeout_add(kn); 432 433 return (0); 434 } 435 436 void 437 filt_timerdetach(struct knote *kn) 438 { 439 struct timeout *to; 440 441 to = (struct timeout *)kn->kn_hook; 442 timeout_del(to); 443 free(to, M_KEVENT, sizeof(*to)); 444 kq_ntimeouts--; 445 } 446 447 int 448 filt_timer(struct knote *kn, long hint) 449 { 450 return (kn->kn_data != 0); 451 } 452 453 454 /* 455 * filt_seltrue: 456 * 457 * This filter "event" routine simulates seltrue(). 458 */ 459 int 460 filt_seltrue(struct knote *kn, long hint) 461 { 462 463 /* 464 * We don't know how much data can be read/written, 465 * but we know that it *can* be. This is about as 466 * good as select/poll does as well. 467 */ 468 kn->kn_data = 0; 469 return (1); 470 } 471 472 /* 473 * This provides full kqfilter entry for device switch tables, which 474 * has same effect as filter using filt_seltrue() as filter method. 475 */ 476 void 477 filt_seltruedetach(struct knote *kn) 478 { 479 /* Nothing to do */ 480 } 481 482 const struct filterops seltrue_filtops = { 483 .f_flags = FILTEROP_ISFD, 484 .f_attach = NULL, 485 .f_detach = filt_seltruedetach, 486 .f_event = filt_seltrue, 487 }; 488 489 int 490 seltrue_kqfilter(dev_t dev, struct knote *kn) 491 { 492 switch (kn->kn_filter) { 493 case EVFILT_READ: 494 case EVFILT_WRITE: 495 kn->kn_fop = &seltrue_filtops; 496 break; 497 default: 498 return (EINVAL); 499 } 500 501 /* Nothing more to do */ 502 return (0); 503 } 504 505 static int 506 filt_dead(struct knote *kn, long hint) 507 { 508 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 509 if (kn->kn_flags & __EV_POLL) 510 kn->kn_flags |= __EV_HUP; 511 kn->kn_data = 0; 512 return (1); 513 } 514 515 static void 516 filt_deaddetach(struct knote *kn) 517 { 518 /* Nothing to do */ 519 } 520 521 const struct filterops dead_filtops = { 522 .f_flags = FILTEROP_ISFD, 523 .f_attach = NULL, 524 .f_detach = filt_deaddetach, 525 .f_event = filt_dead, 526 }; 527 528 static int 529 filt_badfd(struct knote *kn, long hint) 530 { 531 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 532 kn->kn_data = EBADF; 533 return (1); 534 } 535 536 /* For use with kqpoll. */ 537 const struct filterops badfd_filtops = { 538 .f_flags = FILTEROP_ISFD, 539 .f_attach = NULL, 540 .f_detach = filt_deaddetach, 541 .f_event = filt_badfd, 542 }; 543 544 void 545 kqpoll_init(void) 546 { 547 struct proc *p = curproc; 548 struct filedesc *fdp; 549 550 if (p->p_kq != NULL) { 551 /* 552 * Discard any knotes that have been enqueued after 553 * previous scan. 554 * This prevents accumulation of enqueued badfd knotes 555 * in case scan does not make progress for some reason. 556 */ 557 kqpoll_dequeue(p); 558 return; 559 } 560 561 p->p_kq = kqueue_alloc(p->p_fd); 562 p->p_kq_serial = arc4random(); 563 fdp = p->p_fd; 564 fdplock(fdp); 565 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 566 fdpunlock(fdp); 567 } 568 569 void 570 kqpoll_exit(void) 571 { 572 struct proc *p = curproc; 573 574 if (p->p_kq == NULL) 575 return; 576 577 kqueue_purge(p, p->p_kq); 578 /* Clear any detached knotes that remain in the queue. */ 579 kqpoll_dequeue(p); 580 kqueue_terminate(p, p->p_kq); 581 KASSERT(p->p_kq->kq_refs == 1); 582 KQRELE(p->p_kq); 583 p->p_kq = NULL; 584 } 585 586 void 587 kqpoll_dequeue(struct proc *p) 588 { 589 struct knote *kn; 590 struct kqueue *kq = p->p_kq; 591 int s; 592 593 s = splhigh(); 594 while ((kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { 595 /* This kqueue should not be scanned by other threads. */ 596 KASSERT(kn->kn_filter != EVFILT_MARKER); 597 598 if (!knote_acquire(kn, NULL, 0)) 599 continue; 600 601 kqueue_check(kq); 602 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 603 kn->kn_status &= ~KN_QUEUED; 604 kq->kq_count--; 605 606 splx(s); 607 kn->kn_fop->f_detach(kn); 608 knote_drop(kn, p); 609 s = splhigh(); 610 kqueue_check(kq); 611 } 612 splx(s); 613 } 614 615 struct kqueue * 616 kqueue_alloc(struct filedesc *fdp) 617 { 618 struct kqueue *kq; 619 620 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 621 kq->kq_refs = 1; 622 kq->kq_fdp = fdp; 623 TAILQ_INIT(&kq->kq_head); 624 task_set(&kq->kq_task, kqueue_task, kq); 625 626 return (kq); 627 } 628 629 int 630 sys_kqueue(struct proc *p, void *v, register_t *retval) 631 { 632 struct filedesc *fdp = p->p_fd; 633 struct kqueue *kq; 634 struct file *fp; 635 int fd, error; 636 637 kq = kqueue_alloc(fdp); 638 639 fdplock(fdp); 640 error = falloc(p, &fp, &fd); 641 if (error) 642 goto out; 643 fp->f_flag = FREAD | FWRITE; 644 fp->f_type = DTYPE_KQUEUE; 645 fp->f_ops = &kqueueops; 646 fp->f_data = kq; 647 *retval = fd; 648 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 649 kq = NULL; 650 fdinsert(fdp, fd, 0, fp); 651 FRELE(fp, p); 652 out: 653 fdpunlock(fdp); 654 if (kq != NULL) 655 pool_put(&kqueue_pool, kq); 656 return (error); 657 } 658 659 int 660 sys_kevent(struct proc *p, void *v, register_t *retval) 661 { 662 struct kqueue_scan_state scan; 663 struct filedesc* fdp = p->p_fd; 664 struct sys_kevent_args /* { 665 syscallarg(int) fd; 666 syscallarg(const struct kevent *) changelist; 667 syscallarg(int) nchanges; 668 syscallarg(struct kevent *) eventlist; 669 syscallarg(int) nevents; 670 syscallarg(const struct timespec *) timeout; 671 } */ *uap = v; 672 struct kevent *kevp; 673 struct kqueue *kq; 674 struct file *fp; 675 struct timespec ts; 676 struct timespec *tsp = NULL; 677 int i, n, nerrors, error; 678 int ready, total; 679 struct kevent kev[KQ_NEVENTS]; 680 681 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 682 return (EBADF); 683 684 if (fp->f_type != DTYPE_KQUEUE) { 685 error = EBADF; 686 goto done; 687 } 688 689 if (SCARG(uap, timeout) != NULL) { 690 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 691 if (error) 692 goto done; 693 #ifdef KTRACE 694 if (KTRPOINT(p, KTR_STRUCT)) 695 ktrreltimespec(p, &ts); 696 #endif 697 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 698 error = EINVAL; 699 goto done; 700 } 701 tsp = &ts; 702 } 703 704 kq = fp->f_data; 705 nerrors = 0; 706 707 while ((n = SCARG(uap, nchanges)) > 0) { 708 if (n > nitems(kev)) 709 n = nitems(kev); 710 error = copyin(SCARG(uap, changelist), kev, 711 n * sizeof(struct kevent)); 712 if (error) 713 goto done; 714 #ifdef KTRACE 715 if (KTRPOINT(p, KTR_STRUCT)) 716 ktrevent(p, kev, n); 717 #endif 718 for (i = 0; i < n; i++) { 719 kevp = &kev[i]; 720 kevp->flags &= ~EV_SYSFLAGS; 721 error = kqueue_register(kq, kevp, p); 722 if (error || (kevp->flags & EV_RECEIPT)) { 723 if (SCARG(uap, nevents) != 0) { 724 kevp->flags = EV_ERROR; 725 kevp->data = error; 726 copyout(kevp, SCARG(uap, eventlist), 727 sizeof(*kevp)); 728 SCARG(uap, eventlist)++; 729 SCARG(uap, nevents)--; 730 nerrors++; 731 } else { 732 goto done; 733 } 734 } 735 } 736 SCARG(uap, nchanges) -= n; 737 SCARG(uap, changelist) += n; 738 } 739 if (nerrors) { 740 *retval = nerrors; 741 error = 0; 742 goto done; 743 } 744 745 kqueue_scan_setup(&scan, kq); 746 FRELE(fp, p); 747 /* 748 * Collect as many events as we can. The timeout on successive 749 * loops is disabled (kqueue_scan() becomes non-blocking). 750 */ 751 total = 0; 752 error = 0; 753 while ((n = SCARG(uap, nevents) - total) > 0) { 754 if (n > nitems(kev)) 755 n = nitems(kev); 756 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 757 if (ready == 0) 758 break; 759 error = copyout(kev, SCARG(uap, eventlist) + total, 760 sizeof(struct kevent) * ready); 761 #ifdef KTRACE 762 if (KTRPOINT(p, KTR_STRUCT)) 763 ktrevent(p, kev, ready); 764 #endif 765 total += ready; 766 if (error || ready < n) 767 break; 768 } 769 kqueue_scan_finish(&scan); 770 *retval = total; 771 return (error); 772 773 done: 774 FRELE(fp, p); 775 return (error); 776 } 777 778 #ifdef KQUEUE_DEBUG 779 void 780 kqueue_do_check(struct kqueue *kq, const char *func, int line) 781 { 782 struct knote *kn; 783 int count = 0, nmarker = 0; 784 785 KERNEL_ASSERT_LOCKED(); 786 splassert(IPL_HIGH); 787 788 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 789 if (kn->kn_filter == EVFILT_MARKER) { 790 if ((kn->kn_status & KN_QUEUED) != 0) 791 panic("%s:%d: kq=%p kn=%p marker QUEUED", 792 func, line, kq, kn); 793 nmarker++; 794 } else { 795 if ((kn->kn_status & KN_ACTIVE) == 0) 796 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 797 func, line, kq, kn); 798 if ((kn->kn_status & KN_QUEUED) == 0) 799 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 800 func, line, kq, kn); 801 if (kn->kn_kq != kq) 802 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 803 func, line, kq, kn, kn->kn_kq); 804 count++; 805 if (count > kq->kq_count) 806 goto bad; 807 } 808 } 809 if (count != kq->kq_count) { 810 bad: 811 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 812 func, line, kq, kq->kq_count, count, nmarker); 813 } 814 } 815 #endif 816 817 int 818 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 819 { 820 struct filedesc *fdp = kq->kq_fdp; 821 const struct filterops *fops = NULL; 822 struct file *fp = NULL; 823 struct knote *kn = NULL, *newkn = NULL; 824 struct knlist *list = NULL; 825 int s, error = 0; 826 827 if (kev->filter < 0) { 828 if (kev->filter + EVFILT_SYSCOUNT < 0) 829 return (EINVAL); 830 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 831 } 832 833 if (fops == NULL) { 834 /* 835 * XXX 836 * filter attach routine is responsible for ensuring that 837 * the identifier can be attached to it. 838 */ 839 return (EINVAL); 840 } 841 842 if (fops->f_flags & FILTEROP_ISFD) { 843 /* validate descriptor */ 844 if (kev->ident > INT_MAX) 845 return (EBADF); 846 } 847 848 if (kev->flags & EV_ADD) 849 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 850 851 again: 852 if (fops->f_flags & FILTEROP_ISFD) { 853 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 854 error = EBADF; 855 goto done; 856 } 857 if (kev->flags & EV_ADD) 858 kqueue_expand_list(kq, kev->ident); 859 if (kev->ident < kq->kq_knlistsize) 860 list = &kq->kq_knlist[kev->ident]; 861 } else { 862 if (kev->flags & EV_ADD) 863 kqueue_expand_hash(kq); 864 if (kq->kq_knhashmask != 0) { 865 list = &kq->kq_knhash[ 866 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 867 } 868 } 869 if (list != NULL) { 870 SLIST_FOREACH(kn, list, kn_link) { 871 if (kev->filter == kn->kn_filter && 872 kev->ident == kn->kn_id) { 873 s = splhigh(); 874 if (!knote_acquire(kn, NULL, 0)) { 875 splx(s); 876 if (fp != NULL) { 877 FRELE(fp, p); 878 fp = NULL; 879 } 880 goto again; 881 } 882 splx(s); 883 break; 884 } 885 } 886 } 887 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 888 889 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 890 error = ENOENT; 891 goto done; 892 } 893 894 /* 895 * kn now contains the matching knote, or NULL if no match. 896 * If adding a new knote, sleeping is not allowed until the knote 897 * has been inserted. 898 */ 899 if (kev->flags & EV_ADD) { 900 if (kn == NULL) { 901 kn = newkn; 902 newkn = NULL; 903 kn->kn_status = KN_PROCESSING; 904 kn->kn_fp = fp; 905 kn->kn_kq = kq; 906 kn->kn_fop = fops; 907 908 /* 909 * apply reference count to knote structure, and 910 * do not release it at the end of this routine. 911 */ 912 fp = NULL; 913 914 kn->kn_sfflags = kev->fflags; 915 kn->kn_sdata = kev->data; 916 kev->fflags = 0; 917 kev->data = 0; 918 kn->kn_kevent = *kev; 919 920 knote_attach(kn); 921 if ((error = fops->f_attach(kn)) != 0) { 922 knote_drop(kn, p); 923 goto done; 924 } 925 926 /* 927 * If this is a file descriptor filter, check if 928 * fd was closed while the knote was being added. 929 * knote_fdclose() has missed kn if the function 930 * ran before kn appeared in kq_knlist. 931 */ 932 if ((fops->f_flags & FILTEROP_ISFD) && 933 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 934 /* 935 * Drop the knote silently without error 936 * because another thread might already have 937 * seen it. This corresponds to the insert 938 * happening in full before the close. 939 */ 940 kn->kn_fop->f_detach(kn); 941 knote_drop(kn, p); 942 goto done; 943 } 944 } else { 945 /* 946 * The user may change some filter values after the 947 * initial EV_ADD, but doing so will not reset any 948 * filters which have already been triggered. 949 */ 950 kn->kn_sfflags = kev->fflags; 951 kn->kn_sdata = kev->data; 952 kn->kn_kevent.udata = kev->udata; 953 } 954 955 s = splhigh(); 956 if (kn->kn_fop->f_event(kn, 0)) 957 knote_activate(kn); 958 splx(s); 959 960 } else if (kev->flags & EV_DELETE) { 961 kn->kn_fop->f_detach(kn); 962 knote_drop(kn, p); 963 goto done; 964 } 965 966 if ((kev->flags & EV_DISABLE) && 967 ((kn->kn_status & KN_DISABLED) == 0)) { 968 s = splhigh(); 969 kn->kn_status |= KN_DISABLED; 970 splx(s); 971 } 972 973 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 974 s = splhigh(); 975 kn->kn_status &= ~KN_DISABLED; 976 if (kn->kn_fop->f_event(kn, 0)) 977 kn->kn_status |= KN_ACTIVE; 978 if ((kn->kn_status & KN_ACTIVE) && 979 ((kn->kn_status & KN_QUEUED) == 0)) 980 knote_enqueue(kn); 981 splx(s); 982 } 983 984 s = splhigh(); 985 knote_release(kn); 986 splx(s); 987 done: 988 if (fp != NULL) 989 FRELE(fp, p); 990 if (newkn != NULL) 991 pool_put(&knote_pool, newkn); 992 return (error); 993 } 994 995 int 996 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 997 { 998 struct timespec elapsed, start, stop; 999 uint64_t nsecs; 1000 int error; 1001 1002 splassert(IPL_HIGH); 1003 1004 if (tsp != NULL) { 1005 getnanouptime(&start); 1006 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1007 } else 1008 nsecs = INFSLP; 1009 error = tsleep_nsec(kq, PSOCK | PCATCH, "kqread", nsecs); 1010 if (tsp != NULL) { 1011 getnanouptime(&stop); 1012 timespecsub(&stop, &start, &elapsed); 1013 timespecsub(tsp, &elapsed, tsp); 1014 if (tsp->tv_sec < 0) 1015 timespecclear(tsp); 1016 } 1017 1018 return (error); 1019 } 1020 1021 /* 1022 * Scan the kqueue, blocking if necessary until the target time is reached. 1023 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1024 * 0 we do not block at all. 1025 */ 1026 int 1027 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1028 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1029 { 1030 struct kqueue *kq = scan->kqs_kq; 1031 struct knote *kn; 1032 int s, error = 0, nkev = 0; 1033 1034 if (maxevents == 0) 1035 goto done; 1036 retry: 1037 KASSERT(nkev == 0); 1038 1039 error = 0; 1040 1041 if (kq->kq_state & KQ_DYING) { 1042 error = EBADF; 1043 goto done; 1044 } 1045 1046 s = splhigh(); 1047 if (kq->kq_count == 0) { 1048 /* 1049 * Successive loops are only necessary if there are more 1050 * ready events to gather, so they don't need to block. 1051 */ 1052 if ((tsp != NULL && !timespecisset(tsp)) || 1053 scan->kqs_nevent != 0) { 1054 splx(s); 1055 error = 0; 1056 goto done; 1057 } 1058 kq->kq_state |= KQ_SLEEP; 1059 error = kqueue_sleep(kq, tsp); 1060 splx(s); 1061 if (error == 0 || error == EWOULDBLOCK) 1062 goto retry; 1063 /* don't restart after signals... */ 1064 if (error == ERESTART) 1065 error = EINTR; 1066 goto done; 1067 } 1068 1069 /* 1070 * Put the end marker in the queue to limit the scan to the events 1071 * that are currently active. This prevents events from being 1072 * recollected if they reactivate during scan. 1073 * 1074 * If a partial scan has been performed already but no events have 1075 * been collected, reposition the end marker to make any new events 1076 * reachable. 1077 */ 1078 if (!scan->kqs_queued) { 1079 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1080 scan->kqs_queued = 1; 1081 } else if (scan->kqs_nevent == 0) { 1082 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1083 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1084 } 1085 1086 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1087 while (nkev < maxevents) { 1088 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1089 if (kn->kn_filter == EVFILT_MARKER) { 1090 if (kn == &scan->kqs_end) 1091 break; 1092 1093 /* Move start marker past another thread's marker. */ 1094 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1095 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1096 kn_tqe); 1097 continue; 1098 } 1099 1100 if (!knote_acquire(kn, NULL, 0)) 1101 continue; 1102 1103 kqueue_check(kq); 1104 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1105 kn->kn_status &= ~KN_QUEUED; 1106 kq->kq_count--; 1107 kqueue_check(kq); 1108 1109 if (kn->kn_status & KN_DISABLED) { 1110 knote_release(kn); 1111 continue; 1112 } 1113 if ((kn->kn_flags & EV_ONESHOT) == 0 && 1114 kn->kn_fop->f_event(kn, 0) == 0) { 1115 if ((kn->kn_status & KN_QUEUED) == 0) 1116 kn->kn_status &= ~KN_ACTIVE; 1117 knote_release(kn); 1118 kqueue_check(kq); 1119 continue; 1120 } 1121 *kevp = kn->kn_kevent; 1122 kevp++; 1123 nkev++; 1124 scan->kqs_nevent++; 1125 1126 /* 1127 * Post-event action on the note 1128 */ 1129 if (kn->kn_flags & EV_ONESHOT) { 1130 splx(s); 1131 kn->kn_fop->f_detach(kn); 1132 knote_drop(kn, p); 1133 s = splhigh(); 1134 } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 1135 if (kn->kn_flags & EV_CLEAR) { 1136 kn->kn_data = 0; 1137 kn->kn_fflags = 0; 1138 } 1139 if (kn->kn_flags & EV_DISPATCH) 1140 kn->kn_status |= KN_DISABLED; 1141 if ((kn->kn_status & KN_QUEUED) == 0) 1142 kn->kn_status &= ~KN_ACTIVE; 1143 KASSERT(kn->kn_status & KN_ATTACHED); 1144 knote_release(kn); 1145 } else { 1146 if ((kn->kn_status & KN_QUEUED) == 0) { 1147 kqueue_check(kq); 1148 kq->kq_count++; 1149 kn->kn_status |= KN_QUEUED; 1150 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1151 } 1152 KASSERT(kn->kn_status & KN_ATTACHED); 1153 knote_release(kn); 1154 } 1155 kqueue_check(kq); 1156 } 1157 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1158 splx(s); 1159 if (scan->kqs_nevent == 0) 1160 goto retry; 1161 done: 1162 *errorp = error; 1163 return (nkev); 1164 } 1165 1166 void 1167 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1168 { 1169 memset(scan, 0, sizeof(*scan)); 1170 1171 KQREF(kq); 1172 scan->kqs_kq = kq; 1173 scan->kqs_start.kn_filter = EVFILT_MARKER; 1174 scan->kqs_start.kn_status = KN_PROCESSING; 1175 scan->kqs_end.kn_filter = EVFILT_MARKER; 1176 scan->kqs_end.kn_status = KN_PROCESSING; 1177 } 1178 1179 void 1180 kqueue_scan_finish(struct kqueue_scan_state *scan) 1181 { 1182 struct kqueue *kq = scan->kqs_kq; 1183 int s; 1184 1185 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1186 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1187 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1188 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1189 1190 if (scan->kqs_queued) { 1191 scan->kqs_queued = 0; 1192 s = splhigh(); 1193 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1194 splx(s); 1195 } 1196 KQRELE(kq); 1197 } 1198 1199 /* 1200 * XXX 1201 * This could be expanded to call kqueue_scan, if desired. 1202 */ 1203 int 1204 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1205 { 1206 return (ENXIO); 1207 } 1208 1209 int 1210 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1211 { 1212 return (ENXIO); 1213 } 1214 1215 int 1216 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1217 { 1218 return (ENOTTY); 1219 } 1220 1221 int 1222 kqueue_poll(struct file *fp, int events, struct proc *p) 1223 { 1224 struct kqueue *kq = (struct kqueue *)fp->f_data; 1225 int revents = 0; 1226 int s = splhigh(); 1227 1228 if (events & (POLLIN | POLLRDNORM)) { 1229 if (kq->kq_count) { 1230 revents |= events & (POLLIN | POLLRDNORM); 1231 } else { 1232 selrecord(p, &kq->kq_sel); 1233 kq->kq_state |= KQ_SEL; 1234 } 1235 } 1236 splx(s); 1237 return (revents); 1238 } 1239 1240 int 1241 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1242 { 1243 struct kqueue *kq = fp->f_data; 1244 1245 memset(st, 0, sizeof(*st)); 1246 st->st_size = kq->kq_count; 1247 st->st_blksize = sizeof(struct kevent); 1248 st->st_mode = S_IFIFO; 1249 return (0); 1250 } 1251 1252 void 1253 kqueue_purge(struct proc *p, struct kqueue *kq) 1254 { 1255 int i; 1256 1257 KERNEL_ASSERT_LOCKED(); 1258 1259 for (i = 0; i < kq->kq_knlistsize; i++) 1260 knote_remove(p, &kq->kq_knlist[i], 1); 1261 if (kq->kq_knhashmask != 0) { 1262 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1263 knote_remove(p, &kq->kq_knhash[i], 1); 1264 } 1265 } 1266 1267 void 1268 kqueue_terminate(struct proc *p, struct kqueue *kq) 1269 { 1270 struct knote *kn; 1271 1272 /* 1273 * Any remaining entries should be scan markers. 1274 * They are removed when the ongoing scans finish. 1275 */ 1276 KASSERT(kq->kq_count == 0); 1277 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1278 KASSERT(kn->kn_filter == EVFILT_MARKER); 1279 1280 kq->kq_state |= KQ_DYING; 1281 kqueue_wakeup(kq); 1282 1283 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1284 task_del(systq, &kq->kq_task); 1285 1286 } 1287 1288 int 1289 kqueue_close(struct file *fp, struct proc *p) 1290 { 1291 struct kqueue *kq = fp->f_data; 1292 1293 KERNEL_LOCK(); 1294 kqueue_purge(p, kq); 1295 kqueue_terminate(p, kq); 1296 fp->f_data = NULL; 1297 1298 KQRELE(kq); 1299 1300 KERNEL_UNLOCK(); 1301 1302 return (0); 1303 } 1304 1305 static void 1306 kqueue_task(void *arg) 1307 { 1308 struct kqueue *kq = arg; 1309 1310 if (kq->kq_state & KQ_SEL) { 1311 kq->kq_state &= ~KQ_SEL; 1312 selwakeup(&kq->kq_sel); 1313 } else { 1314 KNOTE(&kq->kq_sel.si_note, 0); 1315 } 1316 KQRELE(kq); 1317 } 1318 1319 void 1320 kqueue_wakeup(struct kqueue *kq) 1321 { 1322 1323 if (kq->kq_state & KQ_SLEEP) { 1324 kq->kq_state &= ~KQ_SLEEP; 1325 wakeup(kq); 1326 } 1327 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1328 /* Defer activation to avoid recursion. */ 1329 KQREF(kq); 1330 if (!task_add(systq, &kq->kq_task)) 1331 KQRELE(kq); 1332 } 1333 } 1334 1335 static void 1336 kqueue_expand_hash(struct kqueue *kq) 1337 { 1338 struct knlist *hash; 1339 u_long hashmask; 1340 1341 if (kq->kq_knhashmask == 0) { 1342 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1343 if (kq->kq_knhashmask == 0) { 1344 kq->kq_knhash = hash; 1345 kq->kq_knhashmask = hashmask; 1346 } else { 1347 /* Another thread has allocated the hash. */ 1348 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1349 } 1350 } 1351 } 1352 1353 static void 1354 kqueue_expand_list(struct kqueue *kq, int fd) 1355 { 1356 struct knlist *list; 1357 int size; 1358 1359 if (kq->kq_knlistsize <= fd) { 1360 size = kq->kq_knlistsize; 1361 while (size <= fd) 1362 size += KQEXTENT; 1363 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1364 if (kq->kq_knlistsize <= fd) { 1365 memcpy(list, kq->kq_knlist, 1366 kq->kq_knlistsize * sizeof(*list)); 1367 memset(&list[kq->kq_knlistsize], 0, 1368 (size - kq->kq_knlistsize) * sizeof(*list)); 1369 free(kq->kq_knlist, M_KEVENT, 1370 kq->kq_knlistsize * sizeof(*list)); 1371 kq->kq_knlist = list; 1372 kq->kq_knlistsize = size; 1373 } else { 1374 /* Another thread has expanded the list. */ 1375 free(list, M_KEVENT, size * sizeof(*list)); 1376 } 1377 } 1378 } 1379 1380 /* 1381 * Acquire a knote, return non-zero on success, 0 on failure. 1382 * 1383 * If we cannot acquire the knote we sleep and return 0. The knote 1384 * may be stale on return in this case and the caller must restart 1385 * whatever loop they are in. 1386 * 1387 * If we are about to sleep and klist is non-NULL, the list is unlocked 1388 * before sleep and remains unlocked on return. 1389 */ 1390 int 1391 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1392 { 1393 splassert(IPL_HIGH); 1394 KASSERT(kn->kn_filter != EVFILT_MARKER); 1395 1396 if (kn->kn_status & KN_PROCESSING) { 1397 kn->kn_status |= KN_WAITING; 1398 if (klist != NULL) 1399 klist_unlock(klist, ls); 1400 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1401 /* knote may be stale now */ 1402 return (0); 1403 } 1404 kn->kn_status |= KN_PROCESSING; 1405 return (1); 1406 } 1407 1408 /* 1409 * Release an acquired knote, clearing KN_PROCESSING. 1410 */ 1411 void 1412 knote_release(struct knote *kn) 1413 { 1414 splassert(IPL_HIGH); 1415 KASSERT(kn->kn_filter != EVFILT_MARKER); 1416 KASSERT(kn->kn_status & KN_PROCESSING); 1417 1418 if (kn->kn_status & KN_WAITING) { 1419 kn->kn_status &= ~KN_WAITING; 1420 wakeup(kn); 1421 } 1422 kn->kn_status &= ~KN_PROCESSING; 1423 /* kn should not be accessed anymore */ 1424 } 1425 1426 /* 1427 * activate one knote. 1428 */ 1429 void 1430 knote_activate(struct knote *kn) 1431 { 1432 int s; 1433 1434 s = splhigh(); 1435 kn->kn_status |= KN_ACTIVE; 1436 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1437 knote_enqueue(kn); 1438 splx(s); 1439 } 1440 1441 /* 1442 * walk down a list of knotes, activating them if their event has triggered. 1443 */ 1444 void 1445 knote(struct klist *list, long hint) 1446 { 1447 struct knote *kn, *kn0; 1448 1449 KLIST_ASSERT_LOCKED(list); 1450 1451 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) 1452 if (kn->kn_fop->f_event(kn, hint)) 1453 knote_activate(kn); 1454 } 1455 1456 /* 1457 * remove all knotes from a specified knlist 1458 */ 1459 void 1460 knote_remove(struct proc *p, struct knlist *list, int purge) 1461 { 1462 struct knote *kn; 1463 int s; 1464 1465 while ((kn = SLIST_FIRST(list)) != NULL) { 1466 s = splhigh(); 1467 if (!knote_acquire(kn, NULL, 0)) { 1468 splx(s); 1469 continue; 1470 } 1471 splx(s); 1472 kn->kn_fop->f_detach(kn); 1473 1474 /* 1475 * Notify poll(2) and select(2) when a monitored 1476 * file descriptor is closed. 1477 * 1478 * This reuses the original knote for delivering the 1479 * notification so as to avoid allocating memory. 1480 * The knote will be reachable only through the queue 1481 * of active knotes and is freed either by kqueue_scan() 1482 * or kqpoll_dequeue(). 1483 */ 1484 if (!purge && (kn->kn_flags & __EV_POLL) != 0) { 1485 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1486 knote_detach(kn); 1487 FRELE(kn->kn_fp, p); 1488 kn->kn_fp = NULL; 1489 1490 kn->kn_fop = &badfd_filtops; 1491 kn->kn_fop->f_event(kn, 0); 1492 knote_activate(kn); 1493 s = splhigh(); 1494 knote_release(kn); 1495 splx(s); 1496 continue; 1497 } 1498 1499 knote_drop(kn, p); 1500 } 1501 } 1502 1503 /* 1504 * remove all knotes referencing a specified fd 1505 */ 1506 void 1507 knote_fdclose(struct proc *p, int fd) 1508 { 1509 struct filedesc *fdp = p->p_p->ps_fd; 1510 struct kqueue *kq; 1511 struct knlist *list; 1512 1513 /* 1514 * fdplock can be ignored if the file descriptor table is being freed 1515 * because no other thread can access the fdp. 1516 */ 1517 if (fdp->fd_refcnt != 0) 1518 fdpassertlocked(fdp); 1519 1520 if (LIST_EMPTY(&fdp->fd_kqlist)) 1521 return; 1522 1523 KERNEL_LOCK(); 1524 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1525 if (fd >= kq->kq_knlistsize) 1526 continue; 1527 1528 list = &kq->kq_knlist[fd]; 1529 knote_remove(p, list, 0); 1530 } 1531 KERNEL_UNLOCK(); 1532 } 1533 1534 /* 1535 * handle a process exiting, including the triggering of NOTE_EXIT notes 1536 * XXX this could be more efficient, doing a single pass down the klist 1537 */ 1538 void 1539 knote_processexit(struct proc *p) 1540 { 1541 struct process *pr = p->p_p; 1542 1543 KASSERT(p == curproc); 1544 1545 KNOTE(&pr->ps_klist, NOTE_EXIT); 1546 1547 /* remove other knotes hanging off the process */ 1548 klist_invalidate(&pr->ps_klist); 1549 } 1550 1551 void 1552 knote_attach(struct knote *kn) 1553 { 1554 struct kqueue *kq = kn->kn_kq; 1555 struct knlist *list; 1556 int s; 1557 1558 KASSERT(kn->kn_status & KN_PROCESSING); 1559 KASSERT((kn->kn_status & KN_ATTACHED) == 0); 1560 1561 s = splhigh(); 1562 kn->kn_status |= KN_ATTACHED; 1563 splx(s); 1564 1565 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1566 KASSERT(kq->kq_knlistsize > kn->kn_id); 1567 list = &kq->kq_knlist[kn->kn_id]; 1568 } else { 1569 KASSERT(kq->kq_knhashmask != 0); 1570 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1571 } 1572 SLIST_INSERT_HEAD(list, kn, kn_link); 1573 } 1574 1575 void 1576 knote_detach(struct knote *kn) 1577 { 1578 struct kqueue *kq = kn->kn_kq; 1579 struct knlist *list; 1580 int s; 1581 1582 KASSERT(kn->kn_status & KN_PROCESSING); 1583 1584 if ((kn->kn_status & KN_ATTACHED) == 0) 1585 return; 1586 1587 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1588 list = &kq->kq_knlist[kn->kn_id]; 1589 else 1590 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1591 SLIST_REMOVE(list, kn, knote, kn_link); 1592 1593 s = splhigh(); 1594 kn->kn_status &= ~KN_ATTACHED; 1595 splx(s); 1596 } 1597 1598 /* 1599 * should be called at spl == 0, since we don't want to hold spl 1600 * while calling FRELE and pool_put. 1601 */ 1602 void 1603 knote_drop(struct knote *kn, struct proc *p) 1604 { 1605 int s; 1606 1607 KASSERT(kn->kn_filter != EVFILT_MARKER); 1608 1609 knote_detach(kn); 1610 1611 s = splhigh(); 1612 if (kn->kn_status & KN_QUEUED) 1613 knote_dequeue(kn); 1614 if (kn->kn_status & KN_WAITING) { 1615 kn->kn_status &= ~KN_WAITING; 1616 wakeup(kn); 1617 } 1618 splx(s); 1619 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1620 FRELE(kn->kn_fp, p); 1621 pool_put(&knote_pool, kn); 1622 } 1623 1624 1625 void 1626 knote_enqueue(struct knote *kn) 1627 { 1628 struct kqueue *kq = kn->kn_kq; 1629 1630 splassert(IPL_HIGH); 1631 KASSERT(kn->kn_filter != EVFILT_MARKER); 1632 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1633 1634 kqueue_check(kq); 1635 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1636 kn->kn_status |= KN_QUEUED; 1637 kq->kq_count++; 1638 kqueue_check(kq); 1639 kqueue_wakeup(kq); 1640 } 1641 1642 void 1643 knote_dequeue(struct knote *kn) 1644 { 1645 struct kqueue *kq = kn->kn_kq; 1646 1647 splassert(IPL_HIGH); 1648 KASSERT(kn->kn_filter != EVFILT_MARKER); 1649 KASSERT(kn->kn_status & KN_QUEUED); 1650 1651 kqueue_check(kq); 1652 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1653 kn->kn_status &= ~KN_QUEUED; 1654 kq->kq_count--; 1655 kqueue_check(kq); 1656 } 1657 1658 void 1659 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 1660 { 1661 SLIST_INIT(&klist->kl_list); 1662 klist->kl_ops = ops; 1663 klist->kl_arg = arg; 1664 } 1665 1666 void 1667 klist_free(struct klist *klist) 1668 { 1669 KASSERT(SLIST_EMPTY(&klist->kl_list)); 1670 } 1671 1672 void 1673 klist_insert(struct klist *klist, struct knote *kn) 1674 { 1675 int ls; 1676 1677 ls = klist_lock(klist); 1678 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1679 klist_unlock(klist, ls); 1680 } 1681 1682 void 1683 klist_insert_locked(struct klist *klist, struct knote *kn) 1684 { 1685 KLIST_ASSERT_LOCKED(klist); 1686 1687 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1688 } 1689 1690 void 1691 klist_remove(struct klist *klist, struct knote *kn) 1692 { 1693 int ls; 1694 1695 ls = klist_lock(klist); 1696 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1697 klist_unlock(klist, ls); 1698 } 1699 1700 void 1701 klist_remove_locked(struct klist *klist, struct knote *kn) 1702 { 1703 KLIST_ASSERT_LOCKED(klist); 1704 1705 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1706 } 1707 1708 int 1709 klist_empty(struct klist *klist) 1710 { 1711 return (SLIST_EMPTY(&klist->kl_list)); 1712 } 1713 1714 /* 1715 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 1716 * 1717 * The caller of this function must not hold any locks that can block 1718 * filterops callbacks that run with KN_PROCESSING. 1719 * Otherwise this function might deadlock. 1720 */ 1721 void 1722 klist_invalidate(struct klist *list) 1723 { 1724 struct knote *kn; 1725 struct proc *p = curproc; 1726 int ls, s; 1727 1728 NET_ASSERT_UNLOCKED(); 1729 1730 s = splhigh(); 1731 ls = klist_lock(list); 1732 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 1733 if (!knote_acquire(kn, list, ls)) { 1734 /* knote_acquire() has unlocked list. */ 1735 ls = klist_lock(list); 1736 continue; 1737 } 1738 klist_unlock(list, ls); 1739 splx(s); 1740 kn->kn_fop->f_detach(kn); 1741 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1742 kn->kn_fop = &dead_filtops; 1743 kn->kn_fop->f_event(kn, 0); 1744 knote_activate(kn); 1745 s = splhigh(); 1746 knote_release(kn); 1747 } else { 1748 knote_drop(kn, p); 1749 s = splhigh(); 1750 } 1751 ls = klist_lock(list); 1752 } 1753 klist_unlock(list, ls); 1754 splx(s); 1755 } 1756 1757 static int 1758 klist_lock(struct klist *list) 1759 { 1760 int ls = 0; 1761 1762 if (list->kl_ops != NULL) { 1763 ls = list->kl_ops->klo_lock(list->kl_arg); 1764 } else { 1765 KERNEL_LOCK(); 1766 ls = splhigh(); 1767 } 1768 return ls; 1769 } 1770 1771 static void 1772 klist_unlock(struct klist *list, int ls) 1773 { 1774 if (list->kl_ops != NULL) { 1775 list->kl_ops->klo_unlock(list->kl_arg, ls); 1776 } else { 1777 splx(ls); 1778 KERNEL_UNLOCK(); 1779 } 1780 } 1781 1782 static void 1783 klist_mutex_assertlk(void *arg) 1784 { 1785 struct mutex *mtx = arg; 1786 1787 (void)mtx; 1788 1789 MUTEX_ASSERT_LOCKED(mtx); 1790 } 1791 1792 static int 1793 klist_mutex_lock(void *arg) 1794 { 1795 struct mutex *mtx = arg; 1796 1797 mtx_enter(mtx); 1798 return 0; 1799 } 1800 1801 static void 1802 klist_mutex_unlock(void *arg, int s) 1803 { 1804 struct mutex *mtx = arg; 1805 1806 mtx_leave(mtx); 1807 } 1808 1809 static const struct klistops mutex_klistops = { 1810 .klo_assertlk = klist_mutex_assertlk, 1811 .klo_lock = klist_mutex_lock, 1812 .klo_unlock = klist_mutex_unlock, 1813 }; 1814 1815 void 1816 klist_init_mutex(struct klist *klist, struct mutex *mtx) 1817 { 1818 klist_init(klist, &mutex_klistops, mtx); 1819 } 1820 1821 static void 1822 klist_rwlock_assertlk(void *arg) 1823 { 1824 struct rwlock *rwl = arg; 1825 1826 (void)rwl; 1827 1828 rw_assert_wrlock(rwl); 1829 } 1830 1831 static int 1832 klist_rwlock_lock(void *arg) 1833 { 1834 struct rwlock *rwl = arg; 1835 1836 rw_enter_write(rwl); 1837 return 0; 1838 } 1839 1840 static void 1841 klist_rwlock_unlock(void *arg, int s) 1842 { 1843 struct rwlock *rwl = arg; 1844 1845 rw_exit_write(rwl); 1846 } 1847 1848 static const struct klistops rwlock_klistops = { 1849 .klo_assertlk = klist_rwlock_assertlk, 1850 .klo_lock = klist_rwlock_lock, 1851 .klo_unlock = klist_rwlock_unlock, 1852 }; 1853 1854 void 1855 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 1856 { 1857 klist_init(klist, &rwlock_klistops, rwl); 1858 } 1859