1 /* $OpenBSD: kern_event.c,v 1.156 2020/12/25 12:59:52 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void kqueue_init(void); 74 void KQREF(struct kqueue *); 75 void KQRELE(struct kqueue *); 76 77 int kqueue_sleep(struct kqueue *, struct timespec *); 78 79 int kqueue_read(struct file *, struct uio *, int); 80 int kqueue_write(struct file *, struct uio *, int); 81 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct proc *p); 83 int kqueue_poll(struct file *fp, int events, struct proc *p); 84 int kqueue_kqfilter(struct file *fp, struct knote *kn); 85 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 86 int kqueue_close(struct file *fp, struct proc *p); 87 void kqueue_wakeup(struct kqueue *kq); 88 89 static void kqueue_expand_hash(struct kqueue *kq); 90 static void kqueue_expand_list(struct kqueue *kq, int fd); 91 static void kqueue_task(void *); 92 static int klist_lock(struct klist *); 93 static void klist_unlock(struct klist *, int); 94 95 const struct fileops kqueueops = { 96 .fo_read = kqueue_read, 97 .fo_write = kqueue_write, 98 .fo_ioctl = kqueue_ioctl, 99 .fo_poll = kqueue_poll, 100 .fo_kqfilter = kqueue_kqfilter, 101 .fo_stat = kqueue_stat, 102 .fo_close = kqueue_close 103 }; 104 105 void knote_attach(struct knote *kn); 106 void knote_drop(struct knote *kn, struct proc *p); 107 void knote_enqueue(struct knote *kn); 108 void knote_dequeue(struct knote *kn); 109 int knote_acquire(struct knote *kn, struct klist *, int); 110 void knote_release(struct knote *kn); 111 void knote_activate(struct knote *kn); 112 void knote_remove(struct proc *p, struct knlist *list, int purge); 113 114 void filt_kqdetach(struct knote *kn); 115 int filt_kqueue(struct knote *kn, long hint); 116 int filt_procattach(struct knote *kn); 117 void filt_procdetach(struct knote *kn); 118 int filt_proc(struct knote *kn, long hint); 119 int filt_fileattach(struct knote *kn); 120 void filt_timerexpire(void *knx); 121 int filt_timerattach(struct knote *kn); 122 void filt_timerdetach(struct knote *kn); 123 int filt_timer(struct knote *kn, long hint); 124 void filt_seltruedetach(struct knote *kn); 125 126 const struct filterops kqread_filtops = { 127 .f_flags = FILTEROP_ISFD, 128 .f_attach = NULL, 129 .f_detach = filt_kqdetach, 130 .f_event = filt_kqueue, 131 }; 132 133 const struct filterops proc_filtops = { 134 .f_flags = 0, 135 .f_attach = filt_procattach, 136 .f_detach = filt_procdetach, 137 .f_event = filt_proc, 138 }; 139 140 const struct filterops file_filtops = { 141 .f_flags = FILTEROP_ISFD, 142 .f_attach = filt_fileattach, 143 .f_detach = NULL, 144 .f_event = NULL, 145 }; 146 147 const struct filterops timer_filtops = { 148 .f_flags = 0, 149 .f_attach = filt_timerattach, 150 .f_detach = filt_timerdetach, 151 .f_event = filt_timer, 152 }; 153 154 struct pool knote_pool; 155 struct pool kqueue_pool; 156 int kq_ntimeouts = 0; 157 int kq_timeoutmax = (4 * 1024); 158 159 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 160 161 /* 162 * Table for for all system-defined filters. 163 */ 164 const struct filterops *const sysfilt_ops[] = { 165 &file_filtops, /* EVFILT_READ */ 166 &file_filtops, /* EVFILT_WRITE */ 167 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 168 &file_filtops, /* EVFILT_VNODE */ 169 &proc_filtops, /* EVFILT_PROC */ 170 &sig_filtops, /* EVFILT_SIGNAL */ 171 &timer_filtops, /* EVFILT_TIMER */ 172 &file_filtops, /* EVFILT_DEVICE */ 173 &file_filtops, /* EVFILT_EXCEPT */ 174 }; 175 176 void 177 KQREF(struct kqueue *kq) 178 { 179 atomic_inc_int(&kq->kq_refs); 180 } 181 182 void 183 KQRELE(struct kqueue *kq) 184 { 185 struct filedesc *fdp; 186 187 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 188 return; 189 190 fdp = kq->kq_fdp; 191 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 192 LIST_REMOVE(kq, kq_next); 193 } else { 194 fdplock(fdp); 195 LIST_REMOVE(kq, kq_next); 196 fdpunlock(fdp); 197 } 198 199 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 200 sizeof(struct knlist)); 201 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 202 pool_put(&kqueue_pool, kq); 203 } 204 205 void 206 kqueue_init(void) 207 { 208 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 209 PR_WAITOK, "kqueuepl", NULL); 210 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 211 PR_WAITOK, "knotepl", NULL); 212 } 213 214 int 215 filt_fileattach(struct knote *kn) 216 { 217 struct file *fp = kn->kn_fp; 218 219 return fp->f_ops->fo_kqfilter(fp, kn); 220 } 221 222 int 223 kqueue_kqfilter(struct file *fp, struct knote *kn) 224 { 225 struct kqueue *kq = kn->kn_fp->f_data; 226 227 if (kn->kn_filter != EVFILT_READ) 228 return (EINVAL); 229 230 kn->kn_fop = &kqread_filtops; 231 klist_insert_locked(&kq->kq_sel.si_note, kn); 232 return (0); 233 } 234 235 void 236 filt_kqdetach(struct knote *kn) 237 { 238 struct kqueue *kq = kn->kn_fp->f_data; 239 240 klist_remove_locked(&kq->kq_sel.si_note, kn); 241 } 242 243 int 244 filt_kqueue(struct knote *kn, long hint) 245 { 246 struct kqueue *kq = kn->kn_fp->f_data; 247 248 kn->kn_data = kq->kq_count; 249 return (kn->kn_data > 0); 250 } 251 252 int 253 filt_procattach(struct knote *kn) 254 { 255 struct process *pr; 256 int s; 257 258 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 259 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 260 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 261 262 if (kn->kn_id > PID_MAX) 263 return ESRCH; 264 265 pr = prfind(kn->kn_id); 266 if (pr == NULL) 267 return (ESRCH); 268 269 /* exiting processes can't be specified */ 270 if (pr->ps_flags & PS_EXITING) 271 return (ESRCH); 272 273 kn->kn_ptr.p_process = pr; 274 kn->kn_flags |= EV_CLEAR; /* automatically set */ 275 276 /* 277 * internal flag indicating registration done by kernel 278 */ 279 if (kn->kn_flags & EV_FLAG1) { 280 kn->kn_data = kn->kn_sdata; /* ppid */ 281 kn->kn_fflags = NOTE_CHILD; 282 kn->kn_flags &= ~EV_FLAG1; 283 } 284 285 s = splhigh(); 286 klist_insert_locked(&pr->ps_klist, kn); 287 splx(s); 288 289 return (0); 290 } 291 292 /* 293 * The knote may be attached to a different process, which may exit, 294 * leaving nothing for the knote to be attached to. So when the process 295 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 296 * it will be deleted when read out. However, as part of the knote deletion, 297 * this routine is called, so a check is needed to avoid actually performing 298 * a detach, because the original process does not exist any more. 299 */ 300 void 301 filt_procdetach(struct knote *kn) 302 { 303 struct process *pr = kn->kn_ptr.p_process; 304 int s; 305 306 if (kn->kn_status & KN_DETACHED) 307 return; 308 309 s = splhigh(); 310 klist_remove_locked(&pr->ps_klist, kn); 311 splx(s); 312 } 313 314 int 315 filt_proc(struct knote *kn, long hint) 316 { 317 u_int event; 318 319 /* 320 * mask off extra data 321 */ 322 event = (u_int)hint & NOTE_PCTRLMASK; 323 324 /* 325 * if the user is interested in this event, record it. 326 */ 327 if (kn->kn_sfflags & event) 328 kn->kn_fflags |= event; 329 330 /* 331 * process is gone, so flag the event as finished and remove it 332 * from the process's klist 333 */ 334 if (event == NOTE_EXIT) { 335 struct process *pr = kn->kn_ptr.p_process; 336 int s; 337 338 s = splhigh(); 339 kn->kn_status |= KN_DETACHED; 340 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 341 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 342 klist_remove_locked(&pr->ps_klist, kn); 343 splx(s); 344 return (1); 345 } 346 347 /* 348 * process forked, and user wants to track the new process, 349 * so attach a new knote to it, and immediately report an 350 * event with the parent's pid. 351 */ 352 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 353 struct kevent kev; 354 int error; 355 356 /* 357 * register knote with new process. 358 */ 359 memset(&kev, 0, sizeof(kev)); 360 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 361 kev.filter = kn->kn_filter; 362 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 363 kev.fflags = kn->kn_sfflags; 364 kev.data = kn->kn_id; /* parent */ 365 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 366 error = kqueue_register(kn->kn_kq, &kev, NULL); 367 if (error) 368 kn->kn_fflags |= NOTE_TRACKERR; 369 } 370 371 return (kn->kn_fflags != 0); 372 } 373 374 static void 375 filt_timer_timeout_add(struct knote *kn) 376 { 377 struct timeval tv; 378 struct timeout *to = kn->kn_hook; 379 int tticks; 380 381 tv.tv_sec = kn->kn_sdata / 1000; 382 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 383 tticks = tvtohz(&tv); 384 /* Remove extra tick from tvtohz() if timeout has fired before. */ 385 if (timeout_triggered(to)) 386 tticks--; 387 timeout_add(to, (tticks > 0) ? tticks : 1); 388 } 389 390 void 391 filt_timerexpire(void *knx) 392 { 393 struct knote *kn = knx; 394 395 kn->kn_data++; 396 knote_activate(kn); 397 398 if ((kn->kn_flags & EV_ONESHOT) == 0) 399 filt_timer_timeout_add(kn); 400 } 401 402 403 /* 404 * data contains amount of time to sleep, in milliseconds 405 */ 406 int 407 filt_timerattach(struct knote *kn) 408 { 409 struct timeout *to; 410 411 if (kq_ntimeouts > kq_timeoutmax) 412 return (ENOMEM); 413 kq_ntimeouts++; 414 415 kn->kn_flags |= EV_CLEAR; /* automatically set */ 416 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 417 timeout_set(to, filt_timerexpire, kn); 418 kn->kn_hook = to; 419 filt_timer_timeout_add(kn); 420 421 return (0); 422 } 423 424 void 425 filt_timerdetach(struct knote *kn) 426 { 427 struct timeout *to; 428 429 to = (struct timeout *)kn->kn_hook; 430 timeout_del(to); 431 free(to, M_KEVENT, sizeof(*to)); 432 kq_ntimeouts--; 433 } 434 435 int 436 filt_timer(struct knote *kn, long hint) 437 { 438 return (kn->kn_data != 0); 439 } 440 441 442 /* 443 * filt_seltrue: 444 * 445 * This filter "event" routine simulates seltrue(). 446 */ 447 int 448 filt_seltrue(struct knote *kn, long hint) 449 { 450 451 /* 452 * We don't know how much data can be read/written, 453 * but we know that it *can* be. This is about as 454 * good as select/poll does as well. 455 */ 456 kn->kn_data = 0; 457 return (1); 458 } 459 460 /* 461 * This provides full kqfilter entry for device switch tables, which 462 * has same effect as filter using filt_seltrue() as filter method. 463 */ 464 void 465 filt_seltruedetach(struct knote *kn) 466 { 467 /* Nothing to do */ 468 } 469 470 const struct filterops seltrue_filtops = { 471 .f_flags = FILTEROP_ISFD, 472 .f_attach = NULL, 473 .f_detach = filt_seltruedetach, 474 .f_event = filt_seltrue, 475 }; 476 477 int 478 seltrue_kqfilter(dev_t dev, struct knote *kn) 479 { 480 switch (kn->kn_filter) { 481 case EVFILT_READ: 482 case EVFILT_WRITE: 483 kn->kn_fop = &seltrue_filtops; 484 break; 485 default: 486 return (EINVAL); 487 } 488 489 /* Nothing more to do */ 490 return (0); 491 } 492 493 static int 494 filt_dead(struct knote *kn, long hint) 495 { 496 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 497 if (kn->kn_flags & __EV_POLL) 498 kn->kn_flags |= __EV_HUP; 499 kn->kn_data = 0; 500 return (1); 501 } 502 503 static void 504 filt_deaddetach(struct knote *kn) 505 { 506 /* Nothing to do */ 507 } 508 509 const struct filterops dead_filtops = { 510 .f_flags = FILTEROP_ISFD, 511 .f_attach = NULL, 512 .f_detach = filt_deaddetach, 513 .f_event = filt_dead, 514 }; 515 516 void 517 kqpoll_init(void) 518 { 519 struct proc *p = curproc; 520 struct filedesc *fdp; 521 522 if (p->p_kq != NULL) { 523 /* 524 * Clear any pending error that was raised after 525 * previous scan. 526 */ 527 p->p_kq->kq_error = 0; 528 return; 529 } 530 531 p->p_kq = kqueue_alloc(p->p_fd); 532 p->p_kq_serial = arc4random(); 533 fdp = p->p_fd; 534 fdplock(fdp); 535 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 536 fdpunlock(fdp); 537 } 538 539 void 540 kqpoll_exit(void) 541 { 542 struct proc *p = curproc; 543 544 if (p->p_kq == NULL) 545 return; 546 547 kqueue_terminate(p, p->p_kq); 548 KASSERT(p->p_kq->kq_refs == 1); 549 KQRELE(p->p_kq); 550 p->p_kq = NULL; 551 } 552 553 struct kqueue * 554 kqueue_alloc(struct filedesc *fdp) 555 { 556 struct kqueue *kq; 557 558 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 559 kq->kq_refs = 1; 560 kq->kq_fdp = fdp; 561 TAILQ_INIT(&kq->kq_head); 562 task_set(&kq->kq_task, kqueue_task, kq); 563 564 return (kq); 565 } 566 567 int 568 sys_kqueue(struct proc *p, void *v, register_t *retval) 569 { 570 struct filedesc *fdp = p->p_fd; 571 struct kqueue *kq; 572 struct file *fp; 573 int fd, error; 574 575 kq = kqueue_alloc(fdp); 576 577 fdplock(fdp); 578 error = falloc(p, &fp, &fd); 579 if (error) 580 goto out; 581 fp->f_flag = FREAD | FWRITE; 582 fp->f_type = DTYPE_KQUEUE; 583 fp->f_ops = &kqueueops; 584 fp->f_data = kq; 585 *retval = fd; 586 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 587 kq = NULL; 588 fdinsert(fdp, fd, 0, fp); 589 FRELE(fp, p); 590 out: 591 fdpunlock(fdp); 592 if (kq != NULL) 593 pool_put(&kqueue_pool, kq); 594 return (error); 595 } 596 597 int 598 sys_kevent(struct proc *p, void *v, register_t *retval) 599 { 600 struct kqueue_scan_state scan; 601 struct filedesc* fdp = p->p_fd; 602 struct sys_kevent_args /* { 603 syscallarg(int) fd; 604 syscallarg(const struct kevent *) changelist; 605 syscallarg(int) nchanges; 606 syscallarg(struct kevent *) eventlist; 607 syscallarg(int) nevents; 608 syscallarg(const struct timespec *) timeout; 609 } */ *uap = v; 610 struct kevent *kevp; 611 struct kqueue *kq; 612 struct file *fp; 613 struct timespec ts; 614 struct timespec *tsp = NULL; 615 int i, n, nerrors, error; 616 int ready, total; 617 struct kevent kev[KQ_NEVENTS]; 618 619 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 620 return (EBADF); 621 622 if (fp->f_type != DTYPE_KQUEUE) { 623 error = EBADF; 624 goto done; 625 } 626 627 if (SCARG(uap, timeout) != NULL) { 628 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 629 if (error) 630 goto done; 631 #ifdef KTRACE 632 if (KTRPOINT(p, KTR_STRUCT)) 633 ktrreltimespec(p, &ts); 634 #endif 635 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 636 error = EINVAL; 637 goto done; 638 } 639 tsp = &ts; 640 } 641 642 kq = fp->f_data; 643 nerrors = 0; 644 645 while ((n = SCARG(uap, nchanges)) > 0) { 646 if (n > nitems(kev)) 647 n = nitems(kev); 648 error = copyin(SCARG(uap, changelist), kev, 649 n * sizeof(struct kevent)); 650 if (error) 651 goto done; 652 #ifdef KTRACE 653 if (KTRPOINT(p, KTR_STRUCT)) 654 ktrevent(p, kev, n); 655 #endif 656 for (i = 0; i < n; i++) { 657 kevp = &kev[i]; 658 kevp->flags &= ~EV_SYSFLAGS; 659 error = kqueue_register(kq, kevp, p); 660 if (error || (kevp->flags & EV_RECEIPT)) { 661 if (SCARG(uap, nevents) != 0) { 662 kevp->flags = EV_ERROR; 663 kevp->data = error; 664 copyout(kevp, SCARG(uap, eventlist), 665 sizeof(*kevp)); 666 SCARG(uap, eventlist)++; 667 SCARG(uap, nevents)--; 668 nerrors++; 669 } else { 670 goto done; 671 } 672 } 673 } 674 SCARG(uap, nchanges) -= n; 675 SCARG(uap, changelist) += n; 676 } 677 if (nerrors) { 678 *retval = nerrors; 679 error = 0; 680 goto done; 681 } 682 683 kqueue_scan_setup(&scan, kq); 684 FRELE(fp, p); 685 /* 686 * Collect as many events as we can. The timeout on successive 687 * loops is disabled (kqueue_scan() becomes non-blocking). 688 */ 689 total = 0; 690 error = 0; 691 while ((n = SCARG(uap, nevents) - total) > 0) { 692 if (n > nitems(kev)) 693 n = nitems(kev); 694 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 695 if (ready == 0) 696 break; 697 error = copyout(kev, SCARG(uap, eventlist) + total, 698 sizeof(struct kevent) * ready); 699 #ifdef KTRACE 700 if (KTRPOINT(p, KTR_STRUCT)) 701 ktrevent(p, kev, ready); 702 #endif 703 total += ready; 704 if (error || ready < n) 705 break; 706 } 707 kqueue_scan_finish(&scan); 708 *retval = total; 709 return (error); 710 711 done: 712 FRELE(fp, p); 713 return (error); 714 } 715 716 #ifdef KQUEUE_DEBUG 717 void 718 kqueue_do_check(struct kqueue *kq, const char *func, int line) 719 { 720 struct knote *kn; 721 int count = 0, nmarker = 0; 722 723 KERNEL_ASSERT_LOCKED(); 724 splassert(IPL_HIGH); 725 726 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 727 if (kn->kn_filter == EVFILT_MARKER) { 728 if ((kn->kn_status & KN_QUEUED) != 0) 729 panic("%s:%d: kq=%p kn=%p marker QUEUED", 730 func, line, kq, kn); 731 nmarker++; 732 } else { 733 if ((kn->kn_status & KN_ACTIVE) == 0) 734 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 735 func, line, kq, kn); 736 if ((kn->kn_status & KN_QUEUED) == 0) 737 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 738 func, line, kq, kn); 739 if (kn->kn_kq != kq) 740 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 741 func, line, kq, kn, kn->kn_kq); 742 count++; 743 if (count > kq->kq_count) 744 goto bad; 745 } 746 } 747 if (count != kq->kq_count) { 748 bad: 749 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 750 func, line, kq, kq->kq_count, count, nmarker); 751 } 752 } 753 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 754 #else 755 #define kqueue_check(kq) do {} while (0) 756 #endif 757 758 int 759 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 760 { 761 struct filedesc *fdp = kq->kq_fdp; 762 const struct filterops *fops = NULL; 763 struct file *fp = NULL; 764 struct knote *kn = NULL, *newkn = NULL; 765 struct knlist *list = NULL; 766 int s, error = 0; 767 768 if (kev->filter < 0) { 769 if (kev->filter + EVFILT_SYSCOUNT < 0) 770 return (EINVAL); 771 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 772 } 773 774 if (fops == NULL) { 775 /* 776 * XXX 777 * filter attach routine is responsible for ensuring that 778 * the identifier can be attached to it. 779 */ 780 return (EINVAL); 781 } 782 783 if (fops->f_flags & FILTEROP_ISFD) { 784 /* validate descriptor */ 785 if (kev->ident > INT_MAX) 786 return (EBADF); 787 } 788 789 if (kev->flags & EV_ADD) 790 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 791 792 again: 793 if (fops->f_flags & FILTEROP_ISFD) { 794 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 795 error = EBADF; 796 goto done; 797 } 798 if (kev->flags & EV_ADD) 799 kqueue_expand_list(kq, kev->ident); 800 if (kev->ident < kq->kq_knlistsize) 801 list = &kq->kq_knlist[kev->ident]; 802 } else { 803 if (kev->flags & EV_ADD) 804 kqueue_expand_hash(kq); 805 if (kq->kq_knhashmask != 0) { 806 list = &kq->kq_knhash[ 807 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 808 } 809 } 810 if (list != NULL) { 811 SLIST_FOREACH(kn, list, kn_link) { 812 if (kev->filter == kn->kn_filter && 813 kev->ident == kn->kn_id) { 814 s = splhigh(); 815 if (!knote_acquire(kn, NULL, 0)) { 816 splx(s); 817 if (fp != NULL) { 818 FRELE(fp, p); 819 fp = NULL; 820 } 821 goto again; 822 } 823 splx(s); 824 break; 825 } 826 } 827 } 828 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 829 830 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 831 error = ENOENT; 832 goto done; 833 } 834 835 /* 836 * kn now contains the matching knote, or NULL if no match. 837 * If adding a new knote, sleeping is not allowed until the knote 838 * has been inserted. 839 */ 840 if (kev->flags & EV_ADD) { 841 if (kn == NULL) { 842 kn = newkn; 843 newkn = NULL; 844 kn->kn_status = KN_PROCESSING; 845 kn->kn_fp = fp; 846 kn->kn_kq = kq; 847 kn->kn_fop = fops; 848 849 /* 850 * apply reference count to knote structure, and 851 * do not release it at the end of this routine. 852 */ 853 fp = NULL; 854 855 kn->kn_sfflags = kev->fflags; 856 kn->kn_sdata = kev->data; 857 kev->fflags = 0; 858 kev->data = 0; 859 kn->kn_kevent = *kev; 860 861 knote_attach(kn); 862 if ((error = fops->f_attach(kn)) != 0) { 863 knote_drop(kn, p); 864 goto done; 865 } 866 867 /* 868 * If this is a file descriptor filter, check if 869 * fd was closed while the knote was being added. 870 * knote_fdclose() has missed kn if the function 871 * ran before kn appeared in kq_knlist. 872 */ 873 if ((fops->f_flags & FILTEROP_ISFD) && 874 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 875 /* 876 * Drop the knote silently without error 877 * because another thread might already have 878 * seen it. This corresponds to the insert 879 * happening in full before the close. 880 */ 881 kn->kn_fop->f_detach(kn); 882 knote_drop(kn, p); 883 goto done; 884 } 885 } else { 886 /* 887 * The user may change some filter values after the 888 * initial EV_ADD, but doing so will not reset any 889 * filters which have already been triggered. 890 */ 891 kn->kn_sfflags = kev->fflags; 892 kn->kn_sdata = kev->data; 893 kn->kn_kevent.udata = kev->udata; 894 } 895 896 s = splhigh(); 897 if (kn->kn_fop->f_event(kn, 0)) 898 knote_activate(kn); 899 splx(s); 900 901 } else if (kev->flags & EV_DELETE) { 902 kn->kn_fop->f_detach(kn); 903 knote_drop(kn, p); 904 goto done; 905 } 906 907 if ((kev->flags & EV_DISABLE) && 908 ((kn->kn_status & KN_DISABLED) == 0)) { 909 s = splhigh(); 910 kn->kn_status |= KN_DISABLED; 911 splx(s); 912 } 913 914 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 915 s = splhigh(); 916 kn->kn_status &= ~KN_DISABLED; 917 if (kn->kn_fop->f_event(kn, 0)) 918 kn->kn_status |= KN_ACTIVE; 919 if ((kn->kn_status & KN_ACTIVE) && 920 ((kn->kn_status & KN_QUEUED) == 0)) 921 knote_enqueue(kn); 922 splx(s); 923 } 924 925 s = splhigh(); 926 knote_release(kn); 927 splx(s); 928 done: 929 if (fp != NULL) 930 FRELE(fp, p); 931 if (newkn != NULL) 932 pool_put(&knote_pool, newkn); 933 return (error); 934 } 935 936 int 937 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 938 { 939 struct timespec elapsed, start, stop; 940 uint64_t nsecs; 941 int error; 942 943 splassert(IPL_HIGH); 944 945 if (tsp != NULL) { 946 getnanouptime(&start); 947 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 948 } else 949 nsecs = INFSLP; 950 error = tsleep_nsec(kq, PSOCK | PCATCH, "kqread", nsecs); 951 if (tsp != NULL) { 952 getnanouptime(&stop); 953 timespecsub(&stop, &start, &elapsed); 954 timespecsub(tsp, &elapsed, tsp); 955 if (tsp->tv_sec < 0) 956 timespecclear(tsp); 957 } 958 959 return (error); 960 } 961 962 /* 963 * Scan the kqueue, blocking if necessary until the target time is reached. 964 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 965 * 0 we do not block at all. 966 */ 967 int 968 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 969 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 970 { 971 struct kqueue *kq = scan->kqs_kq; 972 struct knote *kn; 973 int s, error = 0, nkev = 0; 974 975 if (maxevents == 0) 976 goto done; 977 retry: 978 KASSERT(nkev == 0); 979 980 error = 0; 981 982 if (kq->kq_state & KQ_DYING) { 983 error = EBADF; 984 goto done; 985 } 986 987 s = splhigh(); 988 989 if (kq->kq_error != 0) { 990 /* Deliver the pending error. */ 991 error = kq->kq_error; 992 kq->kq_error = 0; 993 splx(s); 994 goto done; 995 } 996 997 if (kq->kq_count == 0) { 998 /* 999 * Successive loops are only necessary if there are more 1000 * ready events to gather, so they don't need to block. 1001 */ 1002 if ((tsp != NULL && !timespecisset(tsp)) || 1003 scan->kqs_nevent != 0) { 1004 splx(s); 1005 error = 0; 1006 goto done; 1007 } 1008 kq->kq_state |= KQ_SLEEP; 1009 error = kqueue_sleep(kq, tsp); 1010 splx(s); 1011 if (error == 0 || error == EWOULDBLOCK) 1012 goto retry; 1013 /* don't restart after signals... */ 1014 if (error == ERESTART) 1015 error = EINTR; 1016 goto done; 1017 } 1018 1019 /* 1020 * Put the end marker in the queue to limit the scan to the events 1021 * that are currently active. This prevents events from being 1022 * recollected if they reactivate during scan. 1023 * 1024 * If a partial scan has been performed already but no events have 1025 * been collected, reposition the end marker to make any new events 1026 * reachable. 1027 */ 1028 if (!scan->kqs_queued) { 1029 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1030 scan->kqs_queued = 1; 1031 } else if (scan->kqs_nevent == 0) { 1032 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1033 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1034 } 1035 1036 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1037 while (nkev < maxevents) { 1038 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1039 if (kn->kn_filter == EVFILT_MARKER) { 1040 if (kn == &scan->kqs_end) 1041 break; 1042 1043 /* Move start marker past another thread's marker. */ 1044 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1045 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1046 kn_tqe); 1047 continue; 1048 } 1049 1050 if (!knote_acquire(kn, NULL, 0)) 1051 continue; 1052 1053 kqueue_check(kq); 1054 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1055 kn->kn_status &= ~KN_QUEUED; 1056 kq->kq_count--; 1057 kqueue_check(kq); 1058 1059 if (kn->kn_status & KN_DISABLED) { 1060 knote_release(kn); 1061 continue; 1062 } 1063 if ((kn->kn_flags & EV_ONESHOT) == 0 && 1064 kn->kn_fop->f_event(kn, 0) == 0) { 1065 if ((kn->kn_status & KN_QUEUED) == 0) 1066 kn->kn_status &= ~KN_ACTIVE; 1067 knote_release(kn); 1068 kqueue_check(kq); 1069 continue; 1070 } 1071 *kevp = kn->kn_kevent; 1072 kevp++; 1073 nkev++; 1074 scan->kqs_nevent++; 1075 1076 /* 1077 * Post-event action on the note 1078 */ 1079 if (kn->kn_flags & EV_ONESHOT) { 1080 splx(s); 1081 kn->kn_fop->f_detach(kn); 1082 knote_drop(kn, p); 1083 s = splhigh(); 1084 } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 1085 if (kn->kn_flags & EV_CLEAR) { 1086 kn->kn_data = 0; 1087 kn->kn_fflags = 0; 1088 } 1089 if (kn->kn_flags & EV_DISPATCH) 1090 kn->kn_status |= KN_DISABLED; 1091 if ((kn->kn_status & KN_QUEUED) == 0) 1092 kn->kn_status &= ~KN_ACTIVE; 1093 knote_release(kn); 1094 } else { 1095 if ((kn->kn_status & KN_QUEUED) == 0) { 1096 kqueue_check(kq); 1097 kq->kq_count++; 1098 kn->kn_status |= KN_QUEUED; 1099 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1100 } 1101 knote_release(kn); 1102 } 1103 kqueue_check(kq); 1104 } 1105 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1106 splx(s); 1107 if (scan->kqs_nevent == 0) 1108 goto retry; 1109 done: 1110 *errorp = error; 1111 return (nkev); 1112 } 1113 1114 void 1115 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1116 { 1117 memset(scan, 0, sizeof(*scan)); 1118 1119 KQREF(kq); 1120 scan->kqs_kq = kq; 1121 scan->kqs_start.kn_filter = EVFILT_MARKER; 1122 scan->kqs_start.kn_status = KN_PROCESSING; 1123 scan->kqs_end.kn_filter = EVFILT_MARKER; 1124 scan->kqs_end.kn_status = KN_PROCESSING; 1125 } 1126 1127 void 1128 kqueue_scan_finish(struct kqueue_scan_state *scan) 1129 { 1130 struct kqueue *kq = scan->kqs_kq; 1131 int s; 1132 1133 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1134 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1135 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1136 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1137 1138 if (scan->kqs_queued) { 1139 scan->kqs_queued = 0; 1140 s = splhigh(); 1141 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1142 splx(s); 1143 } 1144 KQRELE(kq); 1145 } 1146 1147 /* 1148 * XXX 1149 * This could be expanded to call kqueue_scan, if desired. 1150 */ 1151 int 1152 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1153 { 1154 return (ENXIO); 1155 } 1156 1157 int 1158 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1159 { 1160 return (ENXIO); 1161 } 1162 1163 int 1164 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1165 { 1166 return (ENOTTY); 1167 } 1168 1169 int 1170 kqueue_poll(struct file *fp, int events, struct proc *p) 1171 { 1172 struct kqueue *kq = (struct kqueue *)fp->f_data; 1173 int revents = 0; 1174 int s = splhigh(); 1175 1176 if (events & (POLLIN | POLLRDNORM)) { 1177 if (kq->kq_count) { 1178 revents |= events & (POLLIN | POLLRDNORM); 1179 } else { 1180 selrecord(p, &kq->kq_sel); 1181 kq->kq_state |= KQ_SEL; 1182 } 1183 } 1184 splx(s); 1185 return (revents); 1186 } 1187 1188 int 1189 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1190 { 1191 struct kqueue *kq = fp->f_data; 1192 1193 memset(st, 0, sizeof(*st)); 1194 st->st_size = kq->kq_count; 1195 st->st_blksize = sizeof(struct kevent); 1196 st->st_mode = S_IFIFO; 1197 return (0); 1198 } 1199 1200 void 1201 kqueue_purge(struct proc *p, struct kqueue *kq) 1202 { 1203 int i; 1204 1205 KERNEL_ASSERT_LOCKED(); 1206 1207 for (i = 0; i < kq->kq_knlistsize; i++) 1208 knote_remove(p, &kq->kq_knlist[i], 1); 1209 if (kq->kq_knhashmask != 0) { 1210 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1211 knote_remove(p, &kq->kq_knhash[i], 1); 1212 } 1213 } 1214 1215 void 1216 kqueue_terminate(struct proc *p, struct kqueue *kq) 1217 { 1218 kqueue_purge(p, kq); 1219 kq->kq_state |= KQ_DYING; 1220 kqueue_wakeup(kq); 1221 1222 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1223 task_del(systq, &kq->kq_task); 1224 1225 } 1226 1227 int 1228 kqueue_close(struct file *fp, struct proc *p) 1229 { 1230 struct kqueue *kq = fp->f_data; 1231 1232 KERNEL_LOCK(); 1233 kqueue_terminate(p, kq); 1234 fp->f_data = NULL; 1235 1236 KQRELE(kq); 1237 1238 KERNEL_UNLOCK(); 1239 1240 return (0); 1241 } 1242 1243 static void 1244 kqueue_task(void *arg) 1245 { 1246 struct kqueue *kq = arg; 1247 1248 if (kq->kq_state & KQ_SEL) { 1249 kq->kq_state &= ~KQ_SEL; 1250 selwakeup(&kq->kq_sel); 1251 } else { 1252 KNOTE(&kq->kq_sel.si_note, 0); 1253 } 1254 KQRELE(kq); 1255 } 1256 1257 void 1258 kqueue_wakeup(struct kqueue *kq) 1259 { 1260 1261 if (kq->kq_state & KQ_SLEEP) { 1262 kq->kq_state &= ~KQ_SLEEP; 1263 wakeup(kq); 1264 } 1265 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1266 /* Defer activation to avoid recursion. */ 1267 KQREF(kq); 1268 if (!task_add(systq, &kq->kq_task)) 1269 KQRELE(kq); 1270 } 1271 } 1272 1273 static void 1274 kqueue_expand_hash(struct kqueue *kq) 1275 { 1276 struct knlist *hash; 1277 u_long hashmask; 1278 1279 if (kq->kq_knhashmask == 0) { 1280 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1281 if (kq->kq_knhashmask == 0) { 1282 kq->kq_knhash = hash; 1283 kq->kq_knhashmask = hashmask; 1284 } else { 1285 /* Another thread has allocated the hash. */ 1286 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1287 } 1288 } 1289 } 1290 1291 static void 1292 kqueue_expand_list(struct kqueue *kq, int fd) 1293 { 1294 struct knlist *list; 1295 int size; 1296 1297 if (kq->kq_knlistsize <= fd) { 1298 size = kq->kq_knlistsize; 1299 while (size <= fd) 1300 size += KQEXTENT; 1301 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1302 if (kq->kq_knlistsize <= fd) { 1303 memcpy(list, kq->kq_knlist, 1304 kq->kq_knlistsize * sizeof(*list)); 1305 memset(&list[kq->kq_knlistsize], 0, 1306 (size - kq->kq_knlistsize) * sizeof(*list)); 1307 free(kq->kq_knlist, M_KEVENT, 1308 kq->kq_knlistsize * sizeof(*list)); 1309 kq->kq_knlist = list; 1310 kq->kq_knlistsize = size; 1311 } else { 1312 /* Another thread has expanded the list. */ 1313 free(list, M_KEVENT, size * sizeof(*list)); 1314 } 1315 } 1316 } 1317 1318 /* 1319 * Acquire a knote, return non-zero on success, 0 on failure. 1320 * 1321 * If we cannot acquire the knote we sleep and return 0. The knote 1322 * may be stale on return in this case and the caller must restart 1323 * whatever loop they are in. 1324 * 1325 * If we are about to sleep and klist is non-NULL, the list is unlocked 1326 * before sleep and remains unlocked on return. 1327 */ 1328 int 1329 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1330 { 1331 splassert(IPL_HIGH); 1332 KASSERT(kn->kn_filter != EVFILT_MARKER); 1333 1334 if (kn->kn_status & KN_PROCESSING) { 1335 kn->kn_status |= KN_WAITING; 1336 if (klist != NULL) 1337 klist_unlock(klist, ls); 1338 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1339 /* knote may be stale now */ 1340 return (0); 1341 } 1342 kn->kn_status |= KN_PROCESSING; 1343 return (1); 1344 } 1345 1346 /* 1347 * Release an acquired knote, clearing KN_PROCESSING. 1348 */ 1349 void 1350 knote_release(struct knote *kn) 1351 { 1352 splassert(IPL_HIGH); 1353 KASSERT(kn->kn_filter != EVFILT_MARKER); 1354 KASSERT(kn->kn_status & KN_PROCESSING); 1355 1356 if (kn->kn_status & KN_WAITING) { 1357 kn->kn_status &= ~KN_WAITING; 1358 wakeup(kn); 1359 } 1360 kn->kn_status &= ~KN_PROCESSING; 1361 /* kn should not be accessed anymore */ 1362 } 1363 1364 /* 1365 * activate one knote. 1366 */ 1367 void 1368 knote_activate(struct knote *kn) 1369 { 1370 int s; 1371 1372 s = splhigh(); 1373 kn->kn_status |= KN_ACTIVE; 1374 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1375 knote_enqueue(kn); 1376 splx(s); 1377 } 1378 1379 /* 1380 * walk down a list of knotes, activating them if their event has triggered. 1381 */ 1382 void 1383 knote(struct klist *list, long hint) 1384 { 1385 struct knote *kn, *kn0; 1386 1387 KLIST_ASSERT_LOCKED(list); 1388 1389 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) 1390 if (kn->kn_fop->f_event(kn, hint)) 1391 knote_activate(kn); 1392 } 1393 1394 /* 1395 * remove all knotes from a specified knlist 1396 */ 1397 void 1398 knote_remove(struct proc *p, struct knlist *list, int purge) 1399 { 1400 struct knote *kn; 1401 struct kqueue *kq; 1402 int s; 1403 1404 while ((kn = SLIST_FIRST(list)) != NULL) { 1405 s = splhigh(); 1406 if (!knote_acquire(kn, NULL, 0)) { 1407 splx(s); 1408 continue; 1409 } 1410 splx(s); 1411 kn->kn_fop->f_detach(kn); 1412 1413 /* 1414 * Notify poll(2) and select(2) when a monitored 1415 * file descriptor is closed. 1416 */ 1417 if (!purge && (kn->kn_flags & __EV_POLL) != 0) { 1418 kq = kn->kn_kq; 1419 s = splhigh(); 1420 if (kq->kq_error == 0) { 1421 kq->kq_error = EBADF; 1422 kqueue_wakeup(kq); 1423 } 1424 splx(s); 1425 } 1426 1427 knote_drop(kn, p); 1428 } 1429 } 1430 1431 /* 1432 * remove all knotes referencing a specified fd 1433 */ 1434 void 1435 knote_fdclose(struct proc *p, int fd) 1436 { 1437 struct filedesc *fdp = p->p_p->ps_fd; 1438 struct kqueue *kq; 1439 struct knlist *list; 1440 1441 /* 1442 * fdplock can be ignored if the file descriptor table is being freed 1443 * because no other thread can access the fdp. 1444 */ 1445 if (fdp->fd_refcnt != 0) 1446 fdpassertlocked(fdp); 1447 1448 if (LIST_EMPTY(&fdp->fd_kqlist)) 1449 return; 1450 1451 KERNEL_LOCK(); 1452 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1453 if (fd >= kq->kq_knlistsize) 1454 continue; 1455 1456 list = &kq->kq_knlist[fd]; 1457 knote_remove(p, list, 0); 1458 } 1459 KERNEL_UNLOCK(); 1460 } 1461 1462 /* 1463 * handle a process exiting, including the triggering of NOTE_EXIT notes 1464 * XXX this could be more efficient, doing a single pass down the klist 1465 */ 1466 void 1467 knote_processexit(struct proc *p) 1468 { 1469 struct process *pr = p->p_p; 1470 1471 KASSERT(p == curproc); 1472 1473 KNOTE(&pr->ps_klist, NOTE_EXIT); 1474 1475 /* remove other knotes hanging off the process */ 1476 klist_invalidate(&pr->ps_klist); 1477 } 1478 1479 void 1480 knote_attach(struct knote *kn) 1481 { 1482 struct kqueue *kq = kn->kn_kq; 1483 struct knlist *list; 1484 1485 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1486 KASSERT(kq->kq_knlistsize > kn->kn_id); 1487 list = &kq->kq_knlist[kn->kn_id]; 1488 } else { 1489 KASSERT(kq->kq_knhashmask != 0); 1490 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1491 } 1492 SLIST_INSERT_HEAD(list, kn, kn_link); 1493 } 1494 1495 /* 1496 * should be called at spl == 0, since we don't want to hold spl 1497 * while calling FRELE and pool_put. 1498 */ 1499 void 1500 knote_drop(struct knote *kn, struct proc *p) 1501 { 1502 struct kqueue *kq = kn->kn_kq; 1503 struct knlist *list; 1504 int s; 1505 1506 KASSERT(kn->kn_filter != EVFILT_MARKER); 1507 1508 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1509 list = &kq->kq_knlist[kn->kn_id]; 1510 else 1511 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1512 1513 SLIST_REMOVE(list, kn, knote, kn_link); 1514 s = splhigh(); 1515 if (kn->kn_status & KN_QUEUED) 1516 knote_dequeue(kn); 1517 if (kn->kn_status & KN_WAITING) { 1518 kn->kn_status &= ~KN_WAITING; 1519 wakeup(kn); 1520 } 1521 splx(s); 1522 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1523 FRELE(kn->kn_fp, p); 1524 pool_put(&knote_pool, kn); 1525 } 1526 1527 1528 void 1529 knote_enqueue(struct knote *kn) 1530 { 1531 struct kqueue *kq = kn->kn_kq; 1532 1533 splassert(IPL_HIGH); 1534 KASSERT(kn->kn_filter != EVFILT_MARKER); 1535 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1536 1537 kqueue_check(kq); 1538 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1539 kn->kn_status |= KN_QUEUED; 1540 kq->kq_count++; 1541 kqueue_check(kq); 1542 kqueue_wakeup(kq); 1543 } 1544 1545 void 1546 knote_dequeue(struct knote *kn) 1547 { 1548 struct kqueue *kq = kn->kn_kq; 1549 1550 splassert(IPL_HIGH); 1551 KASSERT(kn->kn_filter != EVFILT_MARKER); 1552 KASSERT(kn->kn_status & KN_QUEUED); 1553 1554 kqueue_check(kq); 1555 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1556 kn->kn_status &= ~KN_QUEUED; 1557 kq->kq_count--; 1558 kqueue_check(kq); 1559 } 1560 1561 void 1562 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 1563 { 1564 SLIST_INIT(&klist->kl_list); 1565 klist->kl_ops = ops; 1566 klist->kl_arg = arg; 1567 } 1568 1569 void 1570 klist_free(struct klist *klist) 1571 { 1572 KASSERT(SLIST_EMPTY(&klist->kl_list)); 1573 } 1574 1575 void 1576 klist_insert(struct klist *klist, struct knote *kn) 1577 { 1578 int ls; 1579 1580 ls = klist_lock(klist); 1581 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1582 klist_unlock(klist, ls); 1583 } 1584 1585 void 1586 klist_insert_locked(struct klist *klist, struct knote *kn) 1587 { 1588 KLIST_ASSERT_LOCKED(klist); 1589 1590 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1591 } 1592 1593 void 1594 klist_remove(struct klist *klist, struct knote *kn) 1595 { 1596 int ls; 1597 1598 ls = klist_lock(klist); 1599 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1600 klist_unlock(klist, ls); 1601 } 1602 1603 void 1604 klist_remove_locked(struct klist *klist, struct knote *kn) 1605 { 1606 KLIST_ASSERT_LOCKED(klist); 1607 1608 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1609 } 1610 1611 int 1612 klist_empty(struct klist *klist) 1613 { 1614 return (SLIST_EMPTY(&klist->kl_list)); 1615 } 1616 1617 void 1618 klist_invalidate(struct klist *list) 1619 { 1620 struct knote *kn; 1621 struct proc *p = curproc; 1622 int ls, s; 1623 1624 /* 1625 * NET_LOCK() must not be held because it can block another thread 1626 * in f_event with a knote acquired. 1627 */ 1628 NET_ASSERT_UNLOCKED(); 1629 1630 s = splhigh(); 1631 ls = klist_lock(list); 1632 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 1633 if (!knote_acquire(kn, list, ls)) { 1634 /* knote_acquire() has unlocked list. */ 1635 ls = klist_lock(list); 1636 continue; 1637 } 1638 klist_unlock(list, ls); 1639 splx(s); 1640 kn->kn_fop->f_detach(kn); 1641 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1642 kn->kn_fop = &dead_filtops; 1643 kn->kn_fop->f_event(kn, 0); 1644 knote_activate(kn); 1645 s = splhigh(); 1646 knote_release(kn); 1647 } else { 1648 knote_drop(kn, p); 1649 s = splhigh(); 1650 } 1651 ls = klist_lock(list); 1652 } 1653 klist_unlock(list, ls); 1654 splx(s); 1655 } 1656 1657 static int 1658 klist_lock(struct klist *list) 1659 { 1660 int ls = 0; 1661 1662 if (list->kl_ops != NULL) { 1663 ls = list->kl_ops->klo_lock(list->kl_arg); 1664 } else { 1665 ls = splhigh(); 1666 KERNEL_LOCK(); 1667 } 1668 return ls; 1669 } 1670 1671 static void 1672 klist_unlock(struct klist *list, int ls) 1673 { 1674 if (list->kl_ops != NULL) { 1675 list->kl_ops->klo_unlock(list->kl_arg, ls); 1676 } else { 1677 KERNEL_UNLOCK(); 1678 splx(ls); 1679 } 1680 } 1681 1682 static void 1683 klist_mutex_assertlk(void *arg) 1684 { 1685 struct mutex *mtx = arg; 1686 1687 (void)mtx; 1688 1689 MUTEX_ASSERT_LOCKED(mtx); 1690 } 1691 1692 static int 1693 klist_mutex_lock(void *arg) 1694 { 1695 struct mutex *mtx = arg; 1696 1697 mtx_enter(mtx); 1698 return 0; 1699 } 1700 1701 static void 1702 klist_mutex_unlock(void *arg, int s) 1703 { 1704 struct mutex *mtx = arg; 1705 1706 mtx_leave(mtx); 1707 } 1708 1709 static const struct klistops mutex_klistops = { 1710 .klo_assertlk = klist_mutex_assertlk, 1711 .klo_lock = klist_mutex_lock, 1712 .klo_unlock = klist_mutex_unlock, 1713 }; 1714 1715 void 1716 klist_init_mutex(struct klist *klist, struct mutex *mtx) 1717 { 1718 klist_init(klist, &mutex_klistops, mtx); 1719 } 1720 1721 static void 1722 klist_rwlock_assertlk(void *arg) 1723 { 1724 struct rwlock *rwl = arg; 1725 1726 (void)rwl; 1727 1728 rw_assert_wrlock(rwl); 1729 } 1730 1731 static int 1732 klist_rwlock_lock(void *arg) 1733 { 1734 struct rwlock *rwl = arg; 1735 1736 rw_enter_write(rwl); 1737 return 0; 1738 } 1739 1740 static void 1741 klist_rwlock_unlock(void *arg, int s) 1742 { 1743 struct rwlock *rwl = arg; 1744 1745 rw_exit_write(rwl); 1746 } 1747 1748 static const struct klistops rwlock_klistops = { 1749 .klo_assertlk = klist_rwlock_assertlk, 1750 .klo_lock = klist_rwlock_lock, 1751 .klo_unlock = klist_rwlock_unlock, 1752 }; 1753 1754 void 1755 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 1756 { 1757 klist_init(klist, &rwlock_klistops, rwl); 1758 } 1759