1 /* $OpenBSD: kern_event.c,v 1.159 2021/01/17 05:56:32 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void kqueue_init(void); 74 void KQREF(struct kqueue *); 75 void KQRELE(struct kqueue *); 76 77 int kqueue_sleep(struct kqueue *, struct timespec *); 78 79 int kqueue_read(struct file *, struct uio *, int); 80 int kqueue_write(struct file *, struct uio *, int); 81 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct proc *p); 83 int kqueue_poll(struct file *fp, int events, struct proc *p); 84 int kqueue_kqfilter(struct file *fp, struct knote *kn); 85 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 86 int kqueue_close(struct file *fp, struct proc *p); 87 void kqueue_wakeup(struct kqueue *kq); 88 89 #ifdef KQUEUE_DEBUG 90 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 91 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 92 #else 93 #define kqueue_check(kq) do {} while (0) 94 #endif 95 96 void kqpoll_dequeue(struct proc *p); 97 98 static void kqueue_expand_hash(struct kqueue *kq); 99 static void kqueue_expand_list(struct kqueue *kq, int fd); 100 static void kqueue_task(void *); 101 static int klist_lock(struct klist *); 102 static void klist_unlock(struct klist *, int); 103 104 const struct fileops kqueueops = { 105 .fo_read = kqueue_read, 106 .fo_write = kqueue_write, 107 .fo_ioctl = kqueue_ioctl, 108 .fo_poll = kqueue_poll, 109 .fo_kqfilter = kqueue_kqfilter, 110 .fo_stat = kqueue_stat, 111 .fo_close = kqueue_close 112 }; 113 114 void knote_attach(struct knote *kn); 115 void knote_detach(struct knote *kn); 116 void knote_drop(struct knote *kn, struct proc *p); 117 void knote_enqueue(struct knote *kn); 118 void knote_dequeue(struct knote *kn); 119 int knote_acquire(struct knote *kn, struct klist *, int); 120 void knote_release(struct knote *kn); 121 void knote_activate(struct knote *kn); 122 void knote_remove(struct proc *p, struct knlist *list, int purge); 123 124 void filt_kqdetach(struct knote *kn); 125 int filt_kqueue(struct knote *kn, long hint); 126 int filt_procattach(struct knote *kn); 127 void filt_procdetach(struct knote *kn); 128 int filt_proc(struct knote *kn, long hint); 129 int filt_fileattach(struct knote *kn); 130 void filt_timerexpire(void *knx); 131 int filt_timerattach(struct knote *kn); 132 void filt_timerdetach(struct knote *kn); 133 int filt_timer(struct knote *kn, long hint); 134 void filt_seltruedetach(struct knote *kn); 135 136 const struct filterops kqread_filtops = { 137 .f_flags = FILTEROP_ISFD, 138 .f_attach = NULL, 139 .f_detach = filt_kqdetach, 140 .f_event = filt_kqueue, 141 }; 142 143 const struct filterops proc_filtops = { 144 .f_flags = 0, 145 .f_attach = filt_procattach, 146 .f_detach = filt_procdetach, 147 .f_event = filt_proc, 148 }; 149 150 const struct filterops file_filtops = { 151 .f_flags = FILTEROP_ISFD, 152 .f_attach = filt_fileattach, 153 .f_detach = NULL, 154 .f_event = NULL, 155 }; 156 157 const struct filterops timer_filtops = { 158 .f_flags = 0, 159 .f_attach = filt_timerattach, 160 .f_detach = filt_timerdetach, 161 .f_event = filt_timer, 162 }; 163 164 struct pool knote_pool; 165 struct pool kqueue_pool; 166 int kq_ntimeouts = 0; 167 int kq_timeoutmax = (4 * 1024); 168 169 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 170 171 /* 172 * Table for for all system-defined filters. 173 */ 174 const struct filterops *const sysfilt_ops[] = { 175 &file_filtops, /* EVFILT_READ */ 176 &file_filtops, /* EVFILT_WRITE */ 177 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 178 &file_filtops, /* EVFILT_VNODE */ 179 &proc_filtops, /* EVFILT_PROC */ 180 &sig_filtops, /* EVFILT_SIGNAL */ 181 &timer_filtops, /* EVFILT_TIMER */ 182 &file_filtops, /* EVFILT_DEVICE */ 183 &file_filtops, /* EVFILT_EXCEPT */ 184 }; 185 186 void 187 KQREF(struct kqueue *kq) 188 { 189 atomic_inc_int(&kq->kq_refs); 190 } 191 192 void 193 KQRELE(struct kqueue *kq) 194 { 195 struct filedesc *fdp; 196 197 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 198 return; 199 200 fdp = kq->kq_fdp; 201 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 202 LIST_REMOVE(kq, kq_next); 203 } else { 204 fdplock(fdp); 205 LIST_REMOVE(kq, kq_next); 206 fdpunlock(fdp); 207 } 208 209 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 210 sizeof(struct knlist)); 211 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 212 pool_put(&kqueue_pool, kq); 213 } 214 215 void 216 kqueue_init(void) 217 { 218 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 219 PR_WAITOK, "kqueuepl", NULL); 220 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 221 PR_WAITOK, "knotepl", NULL); 222 } 223 224 int 225 filt_fileattach(struct knote *kn) 226 { 227 struct file *fp = kn->kn_fp; 228 229 return fp->f_ops->fo_kqfilter(fp, kn); 230 } 231 232 int 233 kqueue_kqfilter(struct file *fp, struct knote *kn) 234 { 235 struct kqueue *kq = kn->kn_fp->f_data; 236 237 if (kn->kn_filter != EVFILT_READ) 238 return (EINVAL); 239 240 kn->kn_fop = &kqread_filtops; 241 klist_insert_locked(&kq->kq_sel.si_note, kn); 242 return (0); 243 } 244 245 void 246 filt_kqdetach(struct knote *kn) 247 { 248 struct kqueue *kq = kn->kn_fp->f_data; 249 250 klist_remove_locked(&kq->kq_sel.si_note, kn); 251 } 252 253 int 254 filt_kqueue(struct knote *kn, long hint) 255 { 256 struct kqueue *kq = kn->kn_fp->f_data; 257 258 kn->kn_data = kq->kq_count; 259 return (kn->kn_data > 0); 260 } 261 262 int 263 filt_procattach(struct knote *kn) 264 { 265 struct process *pr; 266 int s; 267 268 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 269 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 270 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 271 272 if (kn->kn_id > PID_MAX) 273 return ESRCH; 274 275 pr = prfind(kn->kn_id); 276 if (pr == NULL) 277 return (ESRCH); 278 279 /* exiting processes can't be specified */ 280 if (pr->ps_flags & PS_EXITING) 281 return (ESRCH); 282 283 kn->kn_ptr.p_process = pr; 284 kn->kn_flags |= EV_CLEAR; /* automatically set */ 285 286 /* 287 * internal flag indicating registration done by kernel 288 */ 289 if (kn->kn_flags & EV_FLAG1) { 290 kn->kn_data = kn->kn_sdata; /* ppid */ 291 kn->kn_fflags = NOTE_CHILD; 292 kn->kn_flags &= ~EV_FLAG1; 293 } 294 295 s = splhigh(); 296 klist_insert_locked(&pr->ps_klist, kn); 297 splx(s); 298 299 return (0); 300 } 301 302 /* 303 * The knote may be attached to a different process, which may exit, 304 * leaving nothing for the knote to be attached to. So when the process 305 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 306 * it will be deleted when read out. However, as part of the knote deletion, 307 * this routine is called, so a check is needed to avoid actually performing 308 * a detach, because the original process does not exist any more. 309 */ 310 void 311 filt_procdetach(struct knote *kn) 312 { 313 struct process *pr = kn->kn_ptr.p_process; 314 int s; 315 316 if (kn->kn_status & KN_DETACHED) 317 return; 318 319 s = splhigh(); 320 klist_remove_locked(&pr->ps_klist, kn); 321 splx(s); 322 } 323 324 int 325 filt_proc(struct knote *kn, long hint) 326 { 327 u_int event; 328 329 /* 330 * mask off extra data 331 */ 332 event = (u_int)hint & NOTE_PCTRLMASK; 333 334 /* 335 * if the user is interested in this event, record it. 336 */ 337 if (kn->kn_sfflags & event) 338 kn->kn_fflags |= event; 339 340 /* 341 * process is gone, so flag the event as finished and remove it 342 * from the process's klist 343 */ 344 if (event == NOTE_EXIT) { 345 struct process *pr = kn->kn_ptr.p_process; 346 int s; 347 348 s = splhigh(); 349 kn->kn_status |= KN_DETACHED; 350 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 351 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 352 klist_remove_locked(&pr->ps_klist, kn); 353 splx(s); 354 return (1); 355 } 356 357 /* 358 * process forked, and user wants to track the new process, 359 * so attach a new knote to it, and immediately report an 360 * event with the parent's pid. 361 */ 362 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 363 struct kevent kev; 364 int error; 365 366 /* 367 * register knote with new process. 368 */ 369 memset(&kev, 0, sizeof(kev)); 370 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 371 kev.filter = kn->kn_filter; 372 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 373 kev.fflags = kn->kn_sfflags; 374 kev.data = kn->kn_id; /* parent */ 375 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 376 error = kqueue_register(kn->kn_kq, &kev, NULL); 377 if (error) 378 kn->kn_fflags |= NOTE_TRACKERR; 379 } 380 381 return (kn->kn_fflags != 0); 382 } 383 384 static void 385 filt_timer_timeout_add(struct knote *kn) 386 { 387 struct timeval tv; 388 struct timeout *to = kn->kn_hook; 389 int tticks; 390 391 tv.tv_sec = kn->kn_sdata / 1000; 392 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 393 tticks = tvtohz(&tv); 394 /* Remove extra tick from tvtohz() if timeout has fired before. */ 395 if (timeout_triggered(to)) 396 tticks--; 397 timeout_add(to, (tticks > 0) ? tticks : 1); 398 } 399 400 void 401 filt_timerexpire(void *knx) 402 { 403 struct knote *kn = knx; 404 405 kn->kn_data++; 406 knote_activate(kn); 407 408 if ((kn->kn_flags & EV_ONESHOT) == 0) 409 filt_timer_timeout_add(kn); 410 } 411 412 413 /* 414 * data contains amount of time to sleep, in milliseconds 415 */ 416 int 417 filt_timerattach(struct knote *kn) 418 { 419 struct timeout *to; 420 421 if (kq_ntimeouts > kq_timeoutmax) 422 return (ENOMEM); 423 kq_ntimeouts++; 424 425 kn->kn_flags |= EV_CLEAR; /* automatically set */ 426 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 427 timeout_set(to, filt_timerexpire, kn); 428 kn->kn_hook = to; 429 filt_timer_timeout_add(kn); 430 431 return (0); 432 } 433 434 void 435 filt_timerdetach(struct knote *kn) 436 { 437 struct timeout *to; 438 439 to = (struct timeout *)kn->kn_hook; 440 timeout_del(to); 441 free(to, M_KEVENT, sizeof(*to)); 442 kq_ntimeouts--; 443 } 444 445 int 446 filt_timer(struct knote *kn, long hint) 447 { 448 return (kn->kn_data != 0); 449 } 450 451 452 /* 453 * filt_seltrue: 454 * 455 * This filter "event" routine simulates seltrue(). 456 */ 457 int 458 filt_seltrue(struct knote *kn, long hint) 459 { 460 461 /* 462 * We don't know how much data can be read/written, 463 * but we know that it *can* be. This is about as 464 * good as select/poll does as well. 465 */ 466 kn->kn_data = 0; 467 return (1); 468 } 469 470 /* 471 * This provides full kqfilter entry for device switch tables, which 472 * has same effect as filter using filt_seltrue() as filter method. 473 */ 474 void 475 filt_seltruedetach(struct knote *kn) 476 { 477 /* Nothing to do */ 478 } 479 480 const struct filterops seltrue_filtops = { 481 .f_flags = FILTEROP_ISFD, 482 .f_attach = NULL, 483 .f_detach = filt_seltruedetach, 484 .f_event = filt_seltrue, 485 }; 486 487 int 488 seltrue_kqfilter(dev_t dev, struct knote *kn) 489 { 490 switch (kn->kn_filter) { 491 case EVFILT_READ: 492 case EVFILT_WRITE: 493 kn->kn_fop = &seltrue_filtops; 494 break; 495 default: 496 return (EINVAL); 497 } 498 499 /* Nothing more to do */ 500 return (0); 501 } 502 503 static int 504 filt_dead(struct knote *kn, long hint) 505 { 506 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 507 if (kn->kn_flags & __EV_POLL) 508 kn->kn_flags |= __EV_HUP; 509 kn->kn_data = 0; 510 return (1); 511 } 512 513 static void 514 filt_deaddetach(struct knote *kn) 515 { 516 /* Nothing to do */ 517 } 518 519 const struct filterops dead_filtops = { 520 .f_flags = FILTEROP_ISFD, 521 .f_attach = NULL, 522 .f_detach = filt_deaddetach, 523 .f_event = filt_dead, 524 }; 525 526 static int 527 filt_badfd(struct knote *kn, long hint) 528 { 529 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 530 kn->kn_data = EBADF; 531 return (1); 532 } 533 534 /* For use with kqpoll. */ 535 const struct filterops badfd_filtops = { 536 .f_flags = FILTEROP_ISFD, 537 .f_attach = NULL, 538 .f_detach = filt_deaddetach, 539 .f_event = filt_badfd, 540 }; 541 542 void 543 kqpoll_init(void) 544 { 545 struct proc *p = curproc; 546 struct filedesc *fdp; 547 548 if (p->p_kq != NULL) { 549 /* 550 * Discard any knotes that have been enqueued after 551 * previous scan. 552 * This prevents accumulation of enqueued badfd knotes 553 * in case scan does not make progress for some reason. 554 */ 555 kqpoll_dequeue(p); 556 return; 557 } 558 559 p->p_kq = kqueue_alloc(p->p_fd); 560 p->p_kq_serial = arc4random(); 561 fdp = p->p_fd; 562 fdplock(fdp); 563 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 564 fdpunlock(fdp); 565 } 566 567 void 568 kqpoll_exit(void) 569 { 570 struct proc *p = curproc; 571 572 if (p->p_kq == NULL) 573 return; 574 575 kqueue_purge(p, p->p_kq); 576 /* Clear any detached knotes that remain in the queue. */ 577 kqpoll_dequeue(p); 578 kqueue_terminate(p, p->p_kq); 579 KASSERT(p->p_kq->kq_refs == 1); 580 KQRELE(p->p_kq); 581 p->p_kq = NULL; 582 } 583 584 void 585 kqpoll_dequeue(struct proc *p) 586 { 587 struct knote *kn; 588 struct kqueue *kq = p->p_kq; 589 int s; 590 591 s = splhigh(); 592 while ((kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { 593 /* This kqueue should not be scanned by other threads. */ 594 KASSERT(kn->kn_filter != EVFILT_MARKER); 595 596 if (!knote_acquire(kn, NULL, 0)) 597 continue; 598 599 kqueue_check(kq); 600 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 601 kn->kn_status &= ~KN_QUEUED; 602 kq->kq_count--; 603 604 splx(s); 605 kn->kn_fop->f_detach(kn); 606 knote_drop(kn, p); 607 s = splhigh(); 608 kqueue_check(kq); 609 } 610 splx(s); 611 } 612 613 struct kqueue * 614 kqueue_alloc(struct filedesc *fdp) 615 { 616 struct kqueue *kq; 617 618 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 619 kq->kq_refs = 1; 620 kq->kq_fdp = fdp; 621 TAILQ_INIT(&kq->kq_head); 622 task_set(&kq->kq_task, kqueue_task, kq); 623 624 return (kq); 625 } 626 627 int 628 sys_kqueue(struct proc *p, void *v, register_t *retval) 629 { 630 struct filedesc *fdp = p->p_fd; 631 struct kqueue *kq; 632 struct file *fp; 633 int fd, error; 634 635 kq = kqueue_alloc(fdp); 636 637 fdplock(fdp); 638 error = falloc(p, &fp, &fd); 639 if (error) 640 goto out; 641 fp->f_flag = FREAD | FWRITE; 642 fp->f_type = DTYPE_KQUEUE; 643 fp->f_ops = &kqueueops; 644 fp->f_data = kq; 645 *retval = fd; 646 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 647 kq = NULL; 648 fdinsert(fdp, fd, 0, fp); 649 FRELE(fp, p); 650 out: 651 fdpunlock(fdp); 652 if (kq != NULL) 653 pool_put(&kqueue_pool, kq); 654 return (error); 655 } 656 657 int 658 sys_kevent(struct proc *p, void *v, register_t *retval) 659 { 660 struct kqueue_scan_state scan; 661 struct filedesc* fdp = p->p_fd; 662 struct sys_kevent_args /* { 663 syscallarg(int) fd; 664 syscallarg(const struct kevent *) changelist; 665 syscallarg(int) nchanges; 666 syscallarg(struct kevent *) eventlist; 667 syscallarg(int) nevents; 668 syscallarg(const struct timespec *) timeout; 669 } */ *uap = v; 670 struct kevent *kevp; 671 struct kqueue *kq; 672 struct file *fp; 673 struct timespec ts; 674 struct timespec *tsp = NULL; 675 int i, n, nerrors, error; 676 int ready, total; 677 struct kevent kev[KQ_NEVENTS]; 678 679 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 680 return (EBADF); 681 682 if (fp->f_type != DTYPE_KQUEUE) { 683 error = EBADF; 684 goto done; 685 } 686 687 if (SCARG(uap, timeout) != NULL) { 688 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 689 if (error) 690 goto done; 691 #ifdef KTRACE 692 if (KTRPOINT(p, KTR_STRUCT)) 693 ktrreltimespec(p, &ts); 694 #endif 695 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 696 error = EINVAL; 697 goto done; 698 } 699 tsp = &ts; 700 } 701 702 kq = fp->f_data; 703 nerrors = 0; 704 705 while ((n = SCARG(uap, nchanges)) > 0) { 706 if (n > nitems(kev)) 707 n = nitems(kev); 708 error = copyin(SCARG(uap, changelist), kev, 709 n * sizeof(struct kevent)); 710 if (error) 711 goto done; 712 #ifdef KTRACE 713 if (KTRPOINT(p, KTR_STRUCT)) 714 ktrevent(p, kev, n); 715 #endif 716 for (i = 0; i < n; i++) { 717 kevp = &kev[i]; 718 kevp->flags &= ~EV_SYSFLAGS; 719 error = kqueue_register(kq, kevp, p); 720 if (error || (kevp->flags & EV_RECEIPT)) { 721 if (SCARG(uap, nevents) != 0) { 722 kevp->flags = EV_ERROR; 723 kevp->data = error; 724 copyout(kevp, SCARG(uap, eventlist), 725 sizeof(*kevp)); 726 SCARG(uap, eventlist)++; 727 SCARG(uap, nevents)--; 728 nerrors++; 729 } else { 730 goto done; 731 } 732 } 733 } 734 SCARG(uap, nchanges) -= n; 735 SCARG(uap, changelist) += n; 736 } 737 if (nerrors) { 738 *retval = nerrors; 739 error = 0; 740 goto done; 741 } 742 743 kqueue_scan_setup(&scan, kq); 744 FRELE(fp, p); 745 /* 746 * Collect as many events as we can. The timeout on successive 747 * loops is disabled (kqueue_scan() becomes non-blocking). 748 */ 749 total = 0; 750 error = 0; 751 while ((n = SCARG(uap, nevents) - total) > 0) { 752 if (n > nitems(kev)) 753 n = nitems(kev); 754 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 755 if (ready == 0) 756 break; 757 error = copyout(kev, SCARG(uap, eventlist) + total, 758 sizeof(struct kevent) * ready); 759 #ifdef KTRACE 760 if (KTRPOINT(p, KTR_STRUCT)) 761 ktrevent(p, kev, ready); 762 #endif 763 total += ready; 764 if (error || ready < n) 765 break; 766 } 767 kqueue_scan_finish(&scan); 768 *retval = total; 769 return (error); 770 771 done: 772 FRELE(fp, p); 773 return (error); 774 } 775 776 #ifdef KQUEUE_DEBUG 777 void 778 kqueue_do_check(struct kqueue *kq, const char *func, int line) 779 { 780 struct knote *kn; 781 int count = 0, nmarker = 0; 782 783 KERNEL_ASSERT_LOCKED(); 784 splassert(IPL_HIGH); 785 786 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 787 if (kn->kn_filter == EVFILT_MARKER) { 788 if ((kn->kn_status & KN_QUEUED) != 0) 789 panic("%s:%d: kq=%p kn=%p marker QUEUED", 790 func, line, kq, kn); 791 nmarker++; 792 } else { 793 if ((kn->kn_status & KN_ACTIVE) == 0) 794 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 795 func, line, kq, kn); 796 if ((kn->kn_status & KN_QUEUED) == 0) 797 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 798 func, line, kq, kn); 799 if (kn->kn_kq != kq) 800 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 801 func, line, kq, kn, kn->kn_kq); 802 count++; 803 if (count > kq->kq_count) 804 goto bad; 805 } 806 } 807 if (count != kq->kq_count) { 808 bad: 809 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 810 func, line, kq, kq->kq_count, count, nmarker); 811 } 812 } 813 #endif 814 815 int 816 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 817 { 818 struct filedesc *fdp = kq->kq_fdp; 819 const struct filterops *fops = NULL; 820 struct file *fp = NULL; 821 struct knote *kn = NULL, *newkn = NULL; 822 struct knlist *list = NULL; 823 int s, error = 0; 824 825 if (kev->filter < 0) { 826 if (kev->filter + EVFILT_SYSCOUNT < 0) 827 return (EINVAL); 828 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 829 } 830 831 if (fops == NULL) { 832 /* 833 * XXX 834 * filter attach routine is responsible for ensuring that 835 * the identifier can be attached to it. 836 */ 837 return (EINVAL); 838 } 839 840 if (fops->f_flags & FILTEROP_ISFD) { 841 /* validate descriptor */ 842 if (kev->ident > INT_MAX) 843 return (EBADF); 844 } 845 846 if (kev->flags & EV_ADD) 847 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 848 849 again: 850 if (fops->f_flags & FILTEROP_ISFD) { 851 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 852 error = EBADF; 853 goto done; 854 } 855 if (kev->flags & EV_ADD) 856 kqueue_expand_list(kq, kev->ident); 857 if (kev->ident < kq->kq_knlistsize) 858 list = &kq->kq_knlist[kev->ident]; 859 } else { 860 if (kev->flags & EV_ADD) 861 kqueue_expand_hash(kq); 862 if (kq->kq_knhashmask != 0) { 863 list = &kq->kq_knhash[ 864 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 865 } 866 } 867 if (list != NULL) { 868 SLIST_FOREACH(kn, list, kn_link) { 869 if (kev->filter == kn->kn_filter && 870 kev->ident == kn->kn_id) { 871 s = splhigh(); 872 if (!knote_acquire(kn, NULL, 0)) { 873 splx(s); 874 if (fp != NULL) { 875 FRELE(fp, p); 876 fp = NULL; 877 } 878 goto again; 879 } 880 splx(s); 881 break; 882 } 883 } 884 } 885 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 886 887 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 888 error = ENOENT; 889 goto done; 890 } 891 892 /* 893 * kn now contains the matching knote, or NULL if no match. 894 * If adding a new knote, sleeping is not allowed until the knote 895 * has been inserted. 896 */ 897 if (kev->flags & EV_ADD) { 898 if (kn == NULL) { 899 kn = newkn; 900 newkn = NULL; 901 kn->kn_status = KN_PROCESSING; 902 kn->kn_fp = fp; 903 kn->kn_kq = kq; 904 kn->kn_fop = fops; 905 906 /* 907 * apply reference count to knote structure, and 908 * do not release it at the end of this routine. 909 */ 910 fp = NULL; 911 912 kn->kn_sfflags = kev->fflags; 913 kn->kn_sdata = kev->data; 914 kev->fflags = 0; 915 kev->data = 0; 916 kn->kn_kevent = *kev; 917 918 knote_attach(kn); 919 if ((error = fops->f_attach(kn)) != 0) { 920 knote_drop(kn, p); 921 goto done; 922 } 923 924 /* 925 * If this is a file descriptor filter, check if 926 * fd was closed while the knote was being added. 927 * knote_fdclose() has missed kn if the function 928 * ran before kn appeared in kq_knlist. 929 */ 930 if ((fops->f_flags & FILTEROP_ISFD) && 931 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 932 /* 933 * Drop the knote silently without error 934 * because another thread might already have 935 * seen it. This corresponds to the insert 936 * happening in full before the close. 937 */ 938 kn->kn_fop->f_detach(kn); 939 knote_drop(kn, p); 940 goto done; 941 } 942 } else { 943 /* 944 * The user may change some filter values after the 945 * initial EV_ADD, but doing so will not reset any 946 * filters which have already been triggered. 947 */ 948 kn->kn_sfflags = kev->fflags; 949 kn->kn_sdata = kev->data; 950 kn->kn_kevent.udata = kev->udata; 951 } 952 953 s = splhigh(); 954 if (kn->kn_fop->f_event(kn, 0)) 955 knote_activate(kn); 956 splx(s); 957 958 } else if (kev->flags & EV_DELETE) { 959 kn->kn_fop->f_detach(kn); 960 knote_drop(kn, p); 961 goto done; 962 } 963 964 if ((kev->flags & EV_DISABLE) && 965 ((kn->kn_status & KN_DISABLED) == 0)) { 966 s = splhigh(); 967 kn->kn_status |= KN_DISABLED; 968 splx(s); 969 } 970 971 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 972 s = splhigh(); 973 kn->kn_status &= ~KN_DISABLED; 974 if (kn->kn_fop->f_event(kn, 0)) 975 kn->kn_status |= KN_ACTIVE; 976 if ((kn->kn_status & KN_ACTIVE) && 977 ((kn->kn_status & KN_QUEUED) == 0)) 978 knote_enqueue(kn); 979 splx(s); 980 } 981 982 s = splhigh(); 983 knote_release(kn); 984 splx(s); 985 done: 986 if (fp != NULL) 987 FRELE(fp, p); 988 if (newkn != NULL) 989 pool_put(&knote_pool, newkn); 990 return (error); 991 } 992 993 int 994 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 995 { 996 struct timespec elapsed, start, stop; 997 uint64_t nsecs; 998 int error; 999 1000 splassert(IPL_HIGH); 1001 1002 if (tsp != NULL) { 1003 getnanouptime(&start); 1004 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1005 } else 1006 nsecs = INFSLP; 1007 error = tsleep_nsec(kq, PSOCK | PCATCH, "kqread", nsecs); 1008 if (tsp != NULL) { 1009 getnanouptime(&stop); 1010 timespecsub(&stop, &start, &elapsed); 1011 timespecsub(tsp, &elapsed, tsp); 1012 if (tsp->tv_sec < 0) 1013 timespecclear(tsp); 1014 } 1015 1016 return (error); 1017 } 1018 1019 /* 1020 * Scan the kqueue, blocking if necessary until the target time is reached. 1021 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1022 * 0 we do not block at all. 1023 */ 1024 int 1025 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1026 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1027 { 1028 struct kqueue *kq = scan->kqs_kq; 1029 struct knote *kn; 1030 int s, error = 0, nkev = 0; 1031 1032 if (maxevents == 0) 1033 goto done; 1034 retry: 1035 KASSERT(nkev == 0); 1036 1037 error = 0; 1038 1039 if (kq->kq_state & KQ_DYING) { 1040 error = EBADF; 1041 goto done; 1042 } 1043 1044 s = splhigh(); 1045 if (kq->kq_count == 0) { 1046 /* 1047 * Successive loops are only necessary if there are more 1048 * ready events to gather, so they don't need to block. 1049 */ 1050 if ((tsp != NULL && !timespecisset(tsp)) || 1051 scan->kqs_nevent != 0) { 1052 splx(s); 1053 error = 0; 1054 goto done; 1055 } 1056 kq->kq_state |= KQ_SLEEP; 1057 error = kqueue_sleep(kq, tsp); 1058 splx(s); 1059 if (error == 0 || error == EWOULDBLOCK) 1060 goto retry; 1061 /* don't restart after signals... */ 1062 if (error == ERESTART) 1063 error = EINTR; 1064 goto done; 1065 } 1066 1067 /* 1068 * Put the end marker in the queue to limit the scan to the events 1069 * that are currently active. This prevents events from being 1070 * recollected if they reactivate during scan. 1071 * 1072 * If a partial scan has been performed already but no events have 1073 * been collected, reposition the end marker to make any new events 1074 * reachable. 1075 */ 1076 if (!scan->kqs_queued) { 1077 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1078 scan->kqs_queued = 1; 1079 } else if (scan->kqs_nevent == 0) { 1080 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1081 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1082 } 1083 1084 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1085 while (nkev < maxevents) { 1086 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1087 if (kn->kn_filter == EVFILT_MARKER) { 1088 if (kn == &scan->kqs_end) 1089 break; 1090 1091 /* Move start marker past another thread's marker. */ 1092 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1093 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1094 kn_tqe); 1095 continue; 1096 } 1097 1098 if (!knote_acquire(kn, NULL, 0)) 1099 continue; 1100 1101 kqueue_check(kq); 1102 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1103 kn->kn_status &= ~KN_QUEUED; 1104 kq->kq_count--; 1105 kqueue_check(kq); 1106 1107 if (kn->kn_status & KN_DISABLED) { 1108 knote_release(kn); 1109 continue; 1110 } 1111 if ((kn->kn_flags & EV_ONESHOT) == 0 && 1112 kn->kn_fop->f_event(kn, 0) == 0) { 1113 if ((kn->kn_status & KN_QUEUED) == 0) 1114 kn->kn_status &= ~KN_ACTIVE; 1115 knote_release(kn); 1116 kqueue_check(kq); 1117 continue; 1118 } 1119 *kevp = kn->kn_kevent; 1120 kevp++; 1121 nkev++; 1122 scan->kqs_nevent++; 1123 1124 /* 1125 * Post-event action on the note 1126 */ 1127 if (kn->kn_flags & EV_ONESHOT) { 1128 splx(s); 1129 kn->kn_fop->f_detach(kn); 1130 knote_drop(kn, p); 1131 s = splhigh(); 1132 } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 1133 if (kn->kn_flags & EV_CLEAR) { 1134 kn->kn_data = 0; 1135 kn->kn_fflags = 0; 1136 } 1137 if (kn->kn_flags & EV_DISPATCH) 1138 kn->kn_status |= KN_DISABLED; 1139 if ((kn->kn_status & KN_QUEUED) == 0) 1140 kn->kn_status &= ~KN_ACTIVE; 1141 KASSERT(kn->kn_status & KN_ATTACHED); 1142 knote_release(kn); 1143 } else { 1144 if ((kn->kn_status & KN_QUEUED) == 0) { 1145 kqueue_check(kq); 1146 kq->kq_count++; 1147 kn->kn_status |= KN_QUEUED; 1148 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1149 } 1150 KASSERT(kn->kn_status & KN_ATTACHED); 1151 knote_release(kn); 1152 } 1153 kqueue_check(kq); 1154 } 1155 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1156 splx(s); 1157 if (scan->kqs_nevent == 0) 1158 goto retry; 1159 done: 1160 *errorp = error; 1161 return (nkev); 1162 } 1163 1164 void 1165 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1166 { 1167 memset(scan, 0, sizeof(*scan)); 1168 1169 KQREF(kq); 1170 scan->kqs_kq = kq; 1171 scan->kqs_start.kn_filter = EVFILT_MARKER; 1172 scan->kqs_start.kn_status = KN_PROCESSING; 1173 scan->kqs_end.kn_filter = EVFILT_MARKER; 1174 scan->kqs_end.kn_status = KN_PROCESSING; 1175 } 1176 1177 void 1178 kqueue_scan_finish(struct kqueue_scan_state *scan) 1179 { 1180 struct kqueue *kq = scan->kqs_kq; 1181 int s; 1182 1183 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1184 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1185 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1186 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1187 1188 if (scan->kqs_queued) { 1189 scan->kqs_queued = 0; 1190 s = splhigh(); 1191 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1192 splx(s); 1193 } 1194 KQRELE(kq); 1195 } 1196 1197 /* 1198 * XXX 1199 * This could be expanded to call kqueue_scan, if desired. 1200 */ 1201 int 1202 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1203 { 1204 return (ENXIO); 1205 } 1206 1207 int 1208 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1209 { 1210 return (ENXIO); 1211 } 1212 1213 int 1214 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1215 { 1216 return (ENOTTY); 1217 } 1218 1219 int 1220 kqueue_poll(struct file *fp, int events, struct proc *p) 1221 { 1222 struct kqueue *kq = (struct kqueue *)fp->f_data; 1223 int revents = 0; 1224 int s = splhigh(); 1225 1226 if (events & (POLLIN | POLLRDNORM)) { 1227 if (kq->kq_count) { 1228 revents |= events & (POLLIN | POLLRDNORM); 1229 } else { 1230 selrecord(p, &kq->kq_sel); 1231 kq->kq_state |= KQ_SEL; 1232 } 1233 } 1234 splx(s); 1235 return (revents); 1236 } 1237 1238 int 1239 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1240 { 1241 struct kqueue *kq = fp->f_data; 1242 1243 memset(st, 0, sizeof(*st)); 1244 st->st_size = kq->kq_count; 1245 st->st_blksize = sizeof(struct kevent); 1246 st->st_mode = S_IFIFO; 1247 return (0); 1248 } 1249 1250 void 1251 kqueue_purge(struct proc *p, struct kqueue *kq) 1252 { 1253 int i; 1254 1255 KERNEL_ASSERT_LOCKED(); 1256 1257 for (i = 0; i < kq->kq_knlistsize; i++) 1258 knote_remove(p, &kq->kq_knlist[i], 1); 1259 if (kq->kq_knhashmask != 0) { 1260 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1261 knote_remove(p, &kq->kq_knhash[i], 1); 1262 } 1263 } 1264 1265 void 1266 kqueue_terminate(struct proc *p, struct kqueue *kq) 1267 { 1268 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 1269 1270 kq->kq_state |= KQ_DYING; 1271 kqueue_wakeup(kq); 1272 1273 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1274 task_del(systq, &kq->kq_task); 1275 1276 } 1277 1278 int 1279 kqueue_close(struct file *fp, struct proc *p) 1280 { 1281 struct kqueue *kq = fp->f_data; 1282 1283 KERNEL_LOCK(); 1284 kqueue_purge(p, kq); 1285 kqueue_terminate(p, kq); 1286 fp->f_data = NULL; 1287 1288 KQRELE(kq); 1289 1290 KERNEL_UNLOCK(); 1291 1292 return (0); 1293 } 1294 1295 static void 1296 kqueue_task(void *arg) 1297 { 1298 struct kqueue *kq = arg; 1299 1300 if (kq->kq_state & KQ_SEL) { 1301 kq->kq_state &= ~KQ_SEL; 1302 selwakeup(&kq->kq_sel); 1303 } else { 1304 KNOTE(&kq->kq_sel.si_note, 0); 1305 } 1306 KQRELE(kq); 1307 } 1308 1309 void 1310 kqueue_wakeup(struct kqueue *kq) 1311 { 1312 1313 if (kq->kq_state & KQ_SLEEP) { 1314 kq->kq_state &= ~KQ_SLEEP; 1315 wakeup(kq); 1316 } 1317 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1318 /* Defer activation to avoid recursion. */ 1319 KQREF(kq); 1320 if (!task_add(systq, &kq->kq_task)) 1321 KQRELE(kq); 1322 } 1323 } 1324 1325 static void 1326 kqueue_expand_hash(struct kqueue *kq) 1327 { 1328 struct knlist *hash; 1329 u_long hashmask; 1330 1331 if (kq->kq_knhashmask == 0) { 1332 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1333 if (kq->kq_knhashmask == 0) { 1334 kq->kq_knhash = hash; 1335 kq->kq_knhashmask = hashmask; 1336 } else { 1337 /* Another thread has allocated the hash. */ 1338 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1339 } 1340 } 1341 } 1342 1343 static void 1344 kqueue_expand_list(struct kqueue *kq, int fd) 1345 { 1346 struct knlist *list; 1347 int size; 1348 1349 if (kq->kq_knlistsize <= fd) { 1350 size = kq->kq_knlistsize; 1351 while (size <= fd) 1352 size += KQEXTENT; 1353 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1354 if (kq->kq_knlistsize <= fd) { 1355 memcpy(list, kq->kq_knlist, 1356 kq->kq_knlistsize * sizeof(*list)); 1357 memset(&list[kq->kq_knlistsize], 0, 1358 (size - kq->kq_knlistsize) * sizeof(*list)); 1359 free(kq->kq_knlist, M_KEVENT, 1360 kq->kq_knlistsize * sizeof(*list)); 1361 kq->kq_knlist = list; 1362 kq->kq_knlistsize = size; 1363 } else { 1364 /* Another thread has expanded the list. */ 1365 free(list, M_KEVENT, size * sizeof(*list)); 1366 } 1367 } 1368 } 1369 1370 /* 1371 * Acquire a knote, return non-zero on success, 0 on failure. 1372 * 1373 * If we cannot acquire the knote we sleep and return 0. The knote 1374 * may be stale on return in this case and the caller must restart 1375 * whatever loop they are in. 1376 * 1377 * If we are about to sleep and klist is non-NULL, the list is unlocked 1378 * before sleep and remains unlocked on return. 1379 */ 1380 int 1381 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1382 { 1383 splassert(IPL_HIGH); 1384 KASSERT(kn->kn_filter != EVFILT_MARKER); 1385 1386 if (kn->kn_status & KN_PROCESSING) { 1387 kn->kn_status |= KN_WAITING; 1388 if (klist != NULL) 1389 klist_unlock(klist, ls); 1390 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1391 /* knote may be stale now */ 1392 return (0); 1393 } 1394 kn->kn_status |= KN_PROCESSING; 1395 return (1); 1396 } 1397 1398 /* 1399 * Release an acquired knote, clearing KN_PROCESSING. 1400 */ 1401 void 1402 knote_release(struct knote *kn) 1403 { 1404 splassert(IPL_HIGH); 1405 KASSERT(kn->kn_filter != EVFILT_MARKER); 1406 KASSERT(kn->kn_status & KN_PROCESSING); 1407 1408 if (kn->kn_status & KN_WAITING) { 1409 kn->kn_status &= ~KN_WAITING; 1410 wakeup(kn); 1411 } 1412 kn->kn_status &= ~KN_PROCESSING; 1413 /* kn should not be accessed anymore */ 1414 } 1415 1416 /* 1417 * activate one knote. 1418 */ 1419 void 1420 knote_activate(struct knote *kn) 1421 { 1422 int s; 1423 1424 s = splhigh(); 1425 kn->kn_status |= KN_ACTIVE; 1426 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1427 knote_enqueue(kn); 1428 splx(s); 1429 } 1430 1431 /* 1432 * walk down a list of knotes, activating them if their event has triggered. 1433 */ 1434 void 1435 knote(struct klist *list, long hint) 1436 { 1437 struct knote *kn, *kn0; 1438 1439 KLIST_ASSERT_LOCKED(list); 1440 1441 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) 1442 if (kn->kn_fop->f_event(kn, hint)) 1443 knote_activate(kn); 1444 } 1445 1446 /* 1447 * remove all knotes from a specified knlist 1448 */ 1449 void 1450 knote_remove(struct proc *p, struct knlist *list, int purge) 1451 { 1452 struct knote *kn; 1453 int s; 1454 1455 while ((kn = SLIST_FIRST(list)) != NULL) { 1456 s = splhigh(); 1457 if (!knote_acquire(kn, NULL, 0)) { 1458 splx(s); 1459 continue; 1460 } 1461 splx(s); 1462 kn->kn_fop->f_detach(kn); 1463 1464 /* 1465 * Notify poll(2) and select(2) when a monitored 1466 * file descriptor is closed. 1467 * 1468 * This reuses the original knote for delivering the 1469 * notification so as to avoid allocating memory. 1470 * The knote will be reachable only through the queue 1471 * of active knotes and is freed either by kqueue_scan() 1472 * or kqpoll_dequeue(). 1473 */ 1474 if (!purge && (kn->kn_flags & __EV_POLL) != 0) { 1475 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1476 knote_detach(kn); 1477 FRELE(kn->kn_fp, p); 1478 kn->kn_fp = NULL; 1479 1480 kn->kn_fop = &badfd_filtops; 1481 kn->kn_fop->f_event(kn, 0); 1482 knote_activate(kn); 1483 s = splhigh(); 1484 knote_release(kn); 1485 splx(s); 1486 continue; 1487 } 1488 1489 knote_drop(kn, p); 1490 } 1491 } 1492 1493 /* 1494 * remove all knotes referencing a specified fd 1495 */ 1496 void 1497 knote_fdclose(struct proc *p, int fd) 1498 { 1499 struct filedesc *fdp = p->p_p->ps_fd; 1500 struct kqueue *kq; 1501 struct knlist *list; 1502 1503 /* 1504 * fdplock can be ignored if the file descriptor table is being freed 1505 * because no other thread can access the fdp. 1506 */ 1507 if (fdp->fd_refcnt != 0) 1508 fdpassertlocked(fdp); 1509 1510 if (LIST_EMPTY(&fdp->fd_kqlist)) 1511 return; 1512 1513 KERNEL_LOCK(); 1514 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1515 if (fd >= kq->kq_knlistsize) 1516 continue; 1517 1518 list = &kq->kq_knlist[fd]; 1519 knote_remove(p, list, 0); 1520 } 1521 KERNEL_UNLOCK(); 1522 } 1523 1524 /* 1525 * handle a process exiting, including the triggering of NOTE_EXIT notes 1526 * XXX this could be more efficient, doing a single pass down the klist 1527 */ 1528 void 1529 knote_processexit(struct proc *p) 1530 { 1531 struct process *pr = p->p_p; 1532 1533 KASSERT(p == curproc); 1534 1535 KNOTE(&pr->ps_klist, NOTE_EXIT); 1536 1537 /* remove other knotes hanging off the process */ 1538 klist_invalidate(&pr->ps_klist); 1539 } 1540 1541 void 1542 knote_attach(struct knote *kn) 1543 { 1544 struct kqueue *kq = kn->kn_kq; 1545 struct knlist *list; 1546 int s; 1547 1548 KASSERT(kn->kn_status & KN_PROCESSING); 1549 KASSERT((kn->kn_status & KN_ATTACHED) == 0); 1550 1551 s = splhigh(); 1552 kn->kn_status |= KN_ATTACHED; 1553 splx(s); 1554 1555 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1556 KASSERT(kq->kq_knlistsize > kn->kn_id); 1557 list = &kq->kq_knlist[kn->kn_id]; 1558 } else { 1559 KASSERT(kq->kq_knhashmask != 0); 1560 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1561 } 1562 SLIST_INSERT_HEAD(list, kn, kn_link); 1563 } 1564 1565 void 1566 knote_detach(struct knote *kn) 1567 { 1568 struct kqueue *kq = kn->kn_kq; 1569 struct knlist *list; 1570 int s; 1571 1572 KASSERT(kn->kn_status & KN_PROCESSING); 1573 1574 if ((kn->kn_status & KN_ATTACHED) == 0) 1575 return; 1576 1577 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1578 list = &kq->kq_knlist[kn->kn_id]; 1579 else 1580 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1581 SLIST_REMOVE(list, kn, knote, kn_link); 1582 1583 s = splhigh(); 1584 kn->kn_status &= ~KN_ATTACHED; 1585 splx(s); 1586 } 1587 1588 /* 1589 * should be called at spl == 0, since we don't want to hold spl 1590 * while calling FRELE and pool_put. 1591 */ 1592 void 1593 knote_drop(struct knote *kn, struct proc *p) 1594 { 1595 int s; 1596 1597 KASSERT(kn->kn_filter != EVFILT_MARKER); 1598 1599 knote_detach(kn); 1600 1601 s = splhigh(); 1602 if (kn->kn_status & KN_QUEUED) 1603 knote_dequeue(kn); 1604 if (kn->kn_status & KN_WAITING) { 1605 kn->kn_status &= ~KN_WAITING; 1606 wakeup(kn); 1607 } 1608 splx(s); 1609 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1610 FRELE(kn->kn_fp, p); 1611 pool_put(&knote_pool, kn); 1612 } 1613 1614 1615 void 1616 knote_enqueue(struct knote *kn) 1617 { 1618 struct kqueue *kq = kn->kn_kq; 1619 1620 splassert(IPL_HIGH); 1621 KASSERT(kn->kn_filter != EVFILT_MARKER); 1622 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1623 1624 kqueue_check(kq); 1625 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1626 kn->kn_status |= KN_QUEUED; 1627 kq->kq_count++; 1628 kqueue_check(kq); 1629 kqueue_wakeup(kq); 1630 } 1631 1632 void 1633 knote_dequeue(struct knote *kn) 1634 { 1635 struct kqueue *kq = kn->kn_kq; 1636 1637 splassert(IPL_HIGH); 1638 KASSERT(kn->kn_filter != EVFILT_MARKER); 1639 KASSERT(kn->kn_status & KN_QUEUED); 1640 1641 kqueue_check(kq); 1642 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1643 kn->kn_status &= ~KN_QUEUED; 1644 kq->kq_count--; 1645 kqueue_check(kq); 1646 } 1647 1648 void 1649 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 1650 { 1651 SLIST_INIT(&klist->kl_list); 1652 klist->kl_ops = ops; 1653 klist->kl_arg = arg; 1654 } 1655 1656 void 1657 klist_free(struct klist *klist) 1658 { 1659 KASSERT(SLIST_EMPTY(&klist->kl_list)); 1660 } 1661 1662 void 1663 klist_insert(struct klist *klist, struct knote *kn) 1664 { 1665 int ls; 1666 1667 ls = klist_lock(klist); 1668 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1669 klist_unlock(klist, ls); 1670 } 1671 1672 void 1673 klist_insert_locked(struct klist *klist, struct knote *kn) 1674 { 1675 KLIST_ASSERT_LOCKED(klist); 1676 1677 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1678 } 1679 1680 void 1681 klist_remove(struct klist *klist, struct knote *kn) 1682 { 1683 int ls; 1684 1685 ls = klist_lock(klist); 1686 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1687 klist_unlock(klist, ls); 1688 } 1689 1690 void 1691 klist_remove_locked(struct klist *klist, struct knote *kn) 1692 { 1693 KLIST_ASSERT_LOCKED(klist); 1694 1695 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1696 } 1697 1698 int 1699 klist_empty(struct klist *klist) 1700 { 1701 return (SLIST_EMPTY(&klist->kl_list)); 1702 } 1703 1704 /* 1705 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 1706 * 1707 * The caller of this function must not hold any locks that can block 1708 * filterops callbacks that run with KN_PROCESSING. 1709 * Otherwise this function might deadlock. 1710 */ 1711 void 1712 klist_invalidate(struct klist *list) 1713 { 1714 struct knote *kn; 1715 struct proc *p = curproc; 1716 int ls, s; 1717 1718 NET_ASSERT_UNLOCKED(); 1719 1720 s = splhigh(); 1721 ls = klist_lock(list); 1722 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 1723 if (!knote_acquire(kn, list, ls)) { 1724 /* knote_acquire() has unlocked list. */ 1725 ls = klist_lock(list); 1726 continue; 1727 } 1728 klist_unlock(list, ls); 1729 splx(s); 1730 kn->kn_fop->f_detach(kn); 1731 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1732 kn->kn_fop = &dead_filtops; 1733 kn->kn_fop->f_event(kn, 0); 1734 knote_activate(kn); 1735 s = splhigh(); 1736 knote_release(kn); 1737 } else { 1738 knote_drop(kn, p); 1739 s = splhigh(); 1740 } 1741 ls = klist_lock(list); 1742 } 1743 klist_unlock(list, ls); 1744 splx(s); 1745 } 1746 1747 static int 1748 klist_lock(struct klist *list) 1749 { 1750 int ls = 0; 1751 1752 if (list->kl_ops != NULL) { 1753 ls = list->kl_ops->klo_lock(list->kl_arg); 1754 } else { 1755 KERNEL_LOCK(); 1756 ls = splhigh(); 1757 } 1758 return ls; 1759 } 1760 1761 static void 1762 klist_unlock(struct klist *list, int ls) 1763 { 1764 if (list->kl_ops != NULL) { 1765 list->kl_ops->klo_unlock(list->kl_arg, ls); 1766 } else { 1767 splx(ls); 1768 KERNEL_UNLOCK(); 1769 } 1770 } 1771 1772 static void 1773 klist_mutex_assertlk(void *arg) 1774 { 1775 struct mutex *mtx = arg; 1776 1777 (void)mtx; 1778 1779 MUTEX_ASSERT_LOCKED(mtx); 1780 } 1781 1782 static int 1783 klist_mutex_lock(void *arg) 1784 { 1785 struct mutex *mtx = arg; 1786 1787 mtx_enter(mtx); 1788 return 0; 1789 } 1790 1791 static void 1792 klist_mutex_unlock(void *arg, int s) 1793 { 1794 struct mutex *mtx = arg; 1795 1796 mtx_leave(mtx); 1797 } 1798 1799 static const struct klistops mutex_klistops = { 1800 .klo_assertlk = klist_mutex_assertlk, 1801 .klo_lock = klist_mutex_lock, 1802 .klo_unlock = klist_mutex_unlock, 1803 }; 1804 1805 void 1806 klist_init_mutex(struct klist *klist, struct mutex *mtx) 1807 { 1808 klist_init(klist, &mutex_klistops, mtx); 1809 } 1810 1811 static void 1812 klist_rwlock_assertlk(void *arg) 1813 { 1814 struct rwlock *rwl = arg; 1815 1816 (void)rwl; 1817 1818 rw_assert_wrlock(rwl); 1819 } 1820 1821 static int 1822 klist_rwlock_lock(void *arg) 1823 { 1824 struct rwlock *rwl = arg; 1825 1826 rw_enter_write(rwl); 1827 return 0; 1828 } 1829 1830 static void 1831 klist_rwlock_unlock(void *arg, int s) 1832 { 1833 struct rwlock *rwl = arg; 1834 1835 rw_exit_write(rwl); 1836 } 1837 1838 static const struct klistops rwlock_klistops = { 1839 .klo_assertlk = klist_rwlock_assertlk, 1840 .klo_lock = klist_rwlock_lock, 1841 .klo_unlock = klist_rwlock_unlock, 1842 }; 1843 1844 void 1845 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 1846 { 1847 klist_init(klist, &rwlock_klistops, rwl); 1848 } 1849