1 /* $OpenBSD: kern_event.c,v 1.189 2022/06/12 10:34:36 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/proc.h> 35 #include <sys/pledge.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/selinfo.h> 42 #include <sys/queue.h> 43 #include <sys/event.h> 44 #include <sys/eventvar.h> 45 #include <sys/ktrace.h> 46 #include <sys/pool.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/uio.h> 51 #include <sys/mount.h> 52 #include <sys/poll.h> 53 #include <sys/syscallargs.h> 54 #include <sys/time.h> 55 #include <sys/timeout.h> 56 #include <sys/vnode.h> 57 #include <sys/wait.h> 58 59 #ifdef DIAGNOSTIC 60 #define KLIST_ASSERT_LOCKED(kl) do { \ 61 if ((kl)->kl_ops != NULL) \ 62 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 63 else \ 64 KERNEL_ASSERT_LOCKED(); \ 65 } while (0) 66 #else 67 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 68 #endif 69 70 struct kqueue *kqueue_alloc(struct filedesc *); 71 void kqueue_terminate(struct proc *p, struct kqueue *); 72 void KQREF(struct kqueue *); 73 void KQRELE(struct kqueue *); 74 75 void kqueue_purge(struct proc *, struct kqueue *); 76 int kqueue_sleep(struct kqueue *, struct timespec *); 77 78 int kqueue_read(struct file *, struct uio *, int); 79 int kqueue_write(struct file *, struct uio *, int); 80 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 81 struct proc *p); 82 int kqueue_poll(struct file *fp, int events, struct proc *p); 83 int kqueue_kqfilter(struct file *fp, struct knote *kn); 84 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 85 int kqueue_close(struct file *fp, struct proc *p); 86 void kqueue_wakeup(struct kqueue *kq); 87 88 #ifdef KQUEUE_DEBUG 89 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 90 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 91 #else 92 #define kqueue_check(kq) do {} while (0) 93 #endif 94 95 static int filter_attach(struct knote *kn); 96 static void filter_detach(struct knote *kn); 97 static int filter_event(struct knote *kn, long hint); 98 static int filter_modify(struct kevent *kev, struct knote *kn); 99 static int filter_process(struct knote *kn, struct kevent *kev); 100 static void kqueue_expand_hash(struct kqueue *kq); 101 static void kqueue_expand_list(struct kqueue *kq, int fd); 102 static void kqueue_task(void *); 103 static int klist_lock(struct klist *); 104 static void klist_unlock(struct klist *, int); 105 106 const struct fileops kqueueops = { 107 .fo_read = kqueue_read, 108 .fo_write = kqueue_write, 109 .fo_ioctl = kqueue_ioctl, 110 .fo_poll = kqueue_poll, 111 .fo_kqfilter = kqueue_kqfilter, 112 .fo_stat = kqueue_stat, 113 .fo_close = kqueue_close 114 }; 115 116 void knote_attach(struct knote *kn); 117 void knote_detach(struct knote *kn); 118 void knote_drop(struct knote *kn, struct proc *p); 119 void knote_enqueue(struct knote *kn); 120 void knote_dequeue(struct knote *kn); 121 int knote_acquire(struct knote *kn, struct klist *, int); 122 void knote_release(struct knote *kn); 123 void knote_activate(struct knote *kn); 124 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 125 int idx, int purge); 126 127 void filt_kqdetach(struct knote *kn); 128 int filt_kqueue(struct knote *kn, long hint); 129 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 130 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 131 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 132 int filt_procattach(struct knote *kn); 133 void filt_procdetach(struct knote *kn); 134 int filt_proc(struct knote *kn, long hint); 135 int filt_fileattach(struct knote *kn); 136 void filt_timerexpire(void *knx); 137 int filt_timerattach(struct knote *kn); 138 void filt_timerdetach(struct knote *kn); 139 int filt_timermodify(struct kevent *kev, struct knote *kn); 140 int filt_timerprocess(struct knote *kn, struct kevent *kev); 141 void filt_seltruedetach(struct knote *kn); 142 143 const struct filterops kqread_filtops = { 144 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 145 .f_attach = NULL, 146 .f_detach = filt_kqdetach, 147 .f_event = filt_kqueue, 148 .f_modify = filt_kqueuemodify, 149 .f_process = filt_kqueueprocess, 150 }; 151 152 const struct filterops proc_filtops = { 153 .f_flags = 0, 154 .f_attach = filt_procattach, 155 .f_detach = filt_procdetach, 156 .f_event = filt_proc, 157 }; 158 159 const struct filterops file_filtops = { 160 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 161 .f_attach = filt_fileattach, 162 .f_detach = NULL, 163 .f_event = NULL, 164 }; 165 166 const struct filterops timer_filtops = { 167 .f_flags = 0, 168 .f_attach = filt_timerattach, 169 .f_detach = filt_timerdetach, 170 .f_event = NULL, 171 .f_modify = filt_timermodify, 172 .f_process = filt_timerprocess, 173 }; 174 175 struct pool knote_pool; 176 struct pool kqueue_pool; 177 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 178 int kq_ntimeouts = 0; 179 int kq_timeoutmax = (4 * 1024); 180 181 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 182 183 /* 184 * Table for for all system-defined filters. 185 */ 186 const struct filterops *const sysfilt_ops[] = { 187 &file_filtops, /* EVFILT_READ */ 188 &file_filtops, /* EVFILT_WRITE */ 189 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 190 &file_filtops, /* EVFILT_VNODE */ 191 &proc_filtops, /* EVFILT_PROC */ 192 &sig_filtops, /* EVFILT_SIGNAL */ 193 &timer_filtops, /* EVFILT_TIMER */ 194 &file_filtops, /* EVFILT_DEVICE */ 195 &file_filtops, /* EVFILT_EXCEPT */ 196 }; 197 198 void 199 KQREF(struct kqueue *kq) 200 { 201 refcnt_take(&kq->kq_refcnt); 202 } 203 204 void 205 KQRELE(struct kqueue *kq) 206 { 207 struct filedesc *fdp; 208 209 if (refcnt_rele(&kq->kq_refcnt) == 0) 210 return; 211 212 fdp = kq->kq_fdp; 213 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 214 LIST_REMOVE(kq, kq_next); 215 } else { 216 fdplock(fdp); 217 LIST_REMOVE(kq, kq_next); 218 fdpunlock(fdp); 219 } 220 221 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 222 KASSERT(kq->kq_nknotes == 0); 223 224 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 225 sizeof(struct knlist)); 226 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 227 klist_free(&kq->kq_sel.si_note); 228 pool_put(&kqueue_pool, kq); 229 } 230 231 void 232 kqueue_init(void) 233 { 234 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 235 PR_WAITOK, "kqueuepl", NULL); 236 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 237 PR_WAITOK, "knotepl", NULL); 238 } 239 240 void 241 kqueue_init_percpu(void) 242 { 243 pool_cache_init(&knote_pool); 244 } 245 246 int 247 filt_fileattach(struct knote *kn) 248 { 249 struct file *fp = kn->kn_fp; 250 251 return fp->f_ops->fo_kqfilter(fp, kn); 252 } 253 254 int 255 kqueue_kqfilter(struct file *fp, struct knote *kn) 256 { 257 struct kqueue *kq = kn->kn_fp->f_data; 258 259 if (kn->kn_filter != EVFILT_READ) 260 return (EINVAL); 261 262 kn->kn_fop = &kqread_filtops; 263 klist_insert(&kq->kq_sel.si_note, kn); 264 return (0); 265 } 266 267 void 268 filt_kqdetach(struct knote *kn) 269 { 270 struct kqueue *kq = kn->kn_fp->f_data; 271 272 klist_remove(&kq->kq_sel.si_note, kn); 273 } 274 275 int 276 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 277 { 278 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 279 280 kn->kn_data = kq->kq_count; 281 282 return (kn->kn_data > 0); 283 } 284 285 int 286 filt_kqueue(struct knote *kn, long hint) 287 { 288 struct kqueue *kq = kn->kn_fp->f_data; 289 int active; 290 291 mtx_enter(&kq->kq_lock); 292 active = filt_kqueue_common(kn, kq); 293 mtx_leave(&kq->kq_lock); 294 295 return (active); 296 } 297 298 int 299 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 300 { 301 struct kqueue *kq = kn->kn_fp->f_data; 302 int active; 303 304 mtx_enter(&kq->kq_lock); 305 knote_assign(kev, kn); 306 active = filt_kqueue_common(kn, kq); 307 mtx_leave(&kq->kq_lock); 308 309 return (active); 310 } 311 312 int 313 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 314 { 315 struct kqueue *kq = kn->kn_fp->f_data; 316 int active; 317 318 mtx_enter(&kq->kq_lock); 319 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 320 active = 1; 321 else 322 active = filt_kqueue_common(kn, kq); 323 if (active) 324 knote_submit(kn, kev); 325 mtx_leave(&kq->kq_lock); 326 327 return (active); 328 } 329 330 int 331 filt_procattach(struct knote *kn) 332 { 333 struct process *pr; 334 int s; 335 336 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 337 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 338 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 339 340 if (kn->kn_id > PID_MAX) 341 return ESRCH; 342 343 pr = prfind(kn->kn_id); 344 if (pr == NULL) 345 return (ESRCH); 346 347 /* exiting processes can't be specified */ 348 if (pr->ps_flags & PS_EXITING) 349 return (ESRCH); 350 351 kn->kn_ptr.p_process = pr; 352 kn->kn_flags |= EV_CLEAR; /* automatically set */ 353 354 /* 355 * internal flag indicating registration done by kernel 356 */ 357 if (kn->kn_flags & EV_FLAG1) { 358 kn->kn_data = kn->kn_sdata; /* ppid */ 359 kn->kn_fflags = NOTE_CHILD; 360 kn->kn_flags &= ~EV_FLAG1; 361 } 362 363 s = splhigh(); 364 klist_insert_locked(&pr->ps_klist, kn); 365 splx(s); 366 367 return (0); 368 } 369 370 /* 371 * The knote may be attached to a different process, which may exit, 372 * leaving nothing for the knote to be attached to. So when the process 373 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 374 * it will be deleted when read out. However, as part of the knote deletion, 375 * this routine is called, so a check is needed to avoid actually performing 376 * a detach, because the original process does not exist any more. 377 */ 378 void 379 filt_procdetach(struct knote *kn) 380 { 381 struct kqueue *kq = kn->kn_kq; 382 struct process *pr = kn->kn_ptr.p_process; 383 int s, status; 384 385 mtx_enter(&kq->kq_lock); 386 status = kn->kn_status; 387 mtx_leave(&kq->kq_lock); 388 389 if (status & KN_DETACHED) 390 return; 391 392 s = splhigh(); 393 klist_remove_locked(&pr->ps_klist, kn); 394 splx(s); 395 } 396 397 int 398 filt_proc(struct knote *kn, long hint) 399 { 400 struct kqueue *kq = kn->kn_kq; 401 u_int event; 402 403 /* 404 * mask off extra data 405 */ 406 event = (u_int)hint & NOTE_PCTRLMASK; 407 408 /* 409 * if the user is interested in this event, record it. 410 */ 411 if (kn->kn_sfflags & event) 412 kn->kn_fflags |= event; 413 414 /* 415 * process is gone, so flag the event as finished and remove it 416 * from the process's klist 417 */ 418 if (event == NOTE_EXIT) { 419 struct process *pr = kn->kn_ptr.p_process; 420 int s; 421 422 mtx_enter(&kq->kq_lock); 423 kn->kn_status |= KN_DETACHED; 424 mtx_leave(&kq->kq_lock); 425 426 s = splhigh(); 427 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 428 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 429 klist_remove_locked(&pr->ps_klist, kn); 430 splx(s); 431 return (1); 432 } 433 434 /* 435 * process forked, and user wants to track the new process, 436 * so attach a new knote to it, and immediately report an 437 * event with the parent's pid. 438 */ 439 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 440 struct kevent kev; 441 int error; 442 443 /* 444 * register knote with new process. 445 */ 446 memset(&kev, 0, sizeof(kev)); 447 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 448 kev.filter = kn->kn_filter; 449 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 450 kev.fflags = kn->kn_sfflags; 451 kev.data = kn->kn_id; /* parent */ 452 kev.udata = kn->kn_udata; /* preserve udata */ 453 error = kqueue_register(kq, &kev, 0, NULL); 454 if (error) 455 kn->kn_fflags |= NOTE_TRACKERR; 456 } 457 458 return (kn->kn_fflags != 0); 459 } 460 461 static void 462 filt_timer_timeout_add(struct knote *kn) 463 { 464 struct timeval tv; 465 struct timeout *to = kn->kn_hook; 466 int tticks; 467 468 tv.tv_sec = kn->kn_sdata / 1000; 469 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 470 tticks = tvtohz(&tv); 471 /* Remove extra tick from tvtohz() if timeout has fired before. */ 472 if (timeout_triggered(to)) 473 tticks--; 474 timeout_add(to, (tticks > 0) ? tticks : 1); 475 } 476 477 void 478 filt_timerexpire(void *knx) 479 { 480 struct knote *kn = knx; 481 struct kqueue *kq = kn->kn_kq; 482 483 kn->kn_data++; 484 mtx_enter(&kq->kq_lock); 485 knote_activate(kn); 486 mtx_leave(&kq->kq_lock); 487 488 if ((kn->kn_flags & EV_ONESHOT) == 0) 489 filt_timer_timeout_add(kn); 490 } 491 492 493 /* 494 * data contains amount of time to sleep, in milliseconds 495 */ 496 int 497 filt_timerattach(struct knote *kn) 498 { 499 struct timeout *to; 500 501 if (kq_ntimeouts > kq_timeoutmax) 502 return (ENOMEM); 503 kq_ntimeouts++; 504 505 kn->kn_flags |= EV_CLEAR; /* automatically set */ 506 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 507 timeout_set(to, filt_timerexpire, kn); 508 kn->kn_hook = to; 509 filt_timer_timeout_add(kn); 510 511 return (0); 512 } 513 514 void 515 filt_timerdetach(struct knote *kn) 516 { 517 struct timeout *to; 518 519 to = (struct timeout *)kn->kn_hook; 520 timeout_del_barrier(to); 521 free(to, M_KEVENT, sizeof(*to)); 522 kq_ntimeouts--; 523 } 524 525 int 526 filt_timermodify(struct kevent *kev, struct knote *kn) 527 { 528 struct kqueue *kq = kn->kn_kq; 529 struct timeout *to = kn->kn_hook; 530 531 /* Reset the timer. Any pending events are discarded. */ 532 533 timeout_del_barrier(to); 534 535 mtx_enter(&kq->kq_lock); 536 if (kn->kn_status & KN_QUEUED) 537 knote_dequeue(kn); 538 kn->kn_status &= ~KN_ACTIVE; 539 mtx_leave(&kq->kq_lock); 540 541 kn->kn_data = 0; 542 knote_assign(kev, kn); 543 /* Reinit timeout to invoke tick adjustment again. */ 544 timeout_set(to, filt_timerexpire, kn); 545 filt_timer_timeout_add(kn); 546 547 return (0); 548 } 549 550 int 551 filt_timerprocess(struct knote *kn, struct kevent *kev) 552 { 553 int active, s; 554 555 s = splsoftclock(); 556 active = (kn->kn_data != 0); 557 if (active) 558 knote_submit(kn, kev); 559 splx(s); 560 561 return (active); 562 } 563 564 565 /* 566 * filt_seltrue: 567 * 568 * This filter "event" routine simulates seltrue(). 569 */ 570 int 571 filt_seltrue(struct knote *kn, long hint) 572 { 573 574 /* 575 * We don't know how much data can be read/written, 576 * but we know that it *can* be. This is about as 577 * good as select/poll does as well. 578 */ 579 kn->kn_data = 0; 580 return (1); 581 } 582 583 int 584 filt_seltruemodify(struct kevent *kev, struct knote *kn) 585 { 586 knote_assign(kev, kn); 587 return (kn->kn_fop->f_event(kn, 0)); 588 } 589 590 int 591 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 592 { 593 int active; 594 595 active = kn->kn_fop->f_event(kn, 0); 596 if (active) 597 knote_submit(kn, kev); 598 return (active); 599 } 600 601 /* 602 * This provides full kqfilter entry for device switch tables, which 603 * has same effect as filter using filt_seltrue() as filter method. 604 */ 605 void 606 filt_seltruedetach(struct knote *kn) 607 { 608 /* Nothing to do */ 609 } 610 611 const struct filterops seltrue_filtops = { 612 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 613 .f_attach = NULL, 614 .f_detach = filt_seltruedetach, 615 .f_event = filt_seltrue, 616 .f_modify = filt_seltruemodify, 617 .f_process = filt_seltrueprocess, 618 }; 619 620 int 621 seltrue_kqfilter(dev_t dev, struct knote *kn) 622 { 623 switch (kn->kn_filter) { 624 case EVFILT_READ: 625 case EVFILT_WRITE: 626 kn->kn_fop = &seltrue_filtops; 627 break; 628 default: 629 return (EINVAL); 630 } 631 632 /* Nothing more to do */ 633 return (0); 634 } 635 636 static int 637 filt_dead(struct knote *kn, long hint) 638 { 639 if (kn->kn_filter == EVFILT_EXCEPT) { 640 /* 641 * Do not deliver event because there is no out-of-band data. 642 * However, let HUP condition pass for poll(2). 643 */ 644 if ((kn->kn_flags & __EV_POLL) == 0) { 645 kn->kn_flags |= EV_DISABLE; 646 return (0); 647 } 648 } 649 650 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 651 if (kn->kn_flags & __EV_POLL) 652 kn->kn_flags |= __EV_HUP; 653 kn->kn_data = 0; 654 return (1); 655 } 656 657 static void 658 filt_deaddetach(struct knote *kn) 659 { 660 /* Nothing to do */ 661 } 662 663 const struct filterops dead_filtops = { 664 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 665 .f_attach = NULL, 666 .f_detach = filt_deaddetach, 667 .f_event = filt_dead, 668 .f_modify = filt_seltruemodify, 669 .f_process = filt_seltrueprocess, 670 }; 671 672 static int 673 filt_badfd(struct knote *kn, long hint) 674 { 675 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 676 kn->kn_data = EBADF; 677 return (1); 678 } 679 680 /* For use with kqpoll. */ 681 const struct filterops badfd_filtops = { 682 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 683 .f_attach = NULL, 684 .f_detach = filt_deaddetach, 685 .f_event = filt_badfd, 686 .f_modify = filt_seltruemodify, 687 .f_process = filt_seltrueprocess, 688 }; 689 690 static int 691 filter_attach(struct knote *kn) 692 { 693 int error; 694 695 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 696 error = kn->kn_fop->f_attach(kn); 697 } else { 698 KERNEL_LOCK(); 699 error = kn->kn_fop->f_attach(kn); 700 KERNEL_UNLOCK(); 701 } 702 return (error); 703 } 704 705 static void 706 filter_detach(struct knote *kn) 707 { 708 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 709 kn->kn_fop->f_detach(kn); 710 } else { 711 KERNEL_LOCK(); 712 kn->kn_fop->f_detach(kn); 713 KERNEL_UNLOCK(); 714 } 715 } 716 717 static int 718 filter_event(struct knote *kn, long hint) 719 { 720 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 721 KERNEL_ASSERT_LOCKED(); 722 723 return (kn->kn_fop->f_event(kn, hint)); 724 } 725 726 static int 727 filter_modify(struct kevent *kev, struct knote *kn) 728 { 729 int active, s; 730 731 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 732 active = kn->kn_fop->f_modify(kev, kn); 733 } else { 734 KERNEL_LOCK(); 735 if (kn->kn_fop->f_modify != NULL) { 736 active = kn->kn_fop->f_modify(kev, kn); 737 } else { 738 s = splhigh(); 739 active = knote_modify(kev, kn); 740 splx(s); 741 } 742 KERNEL_UNLOCK(); 743 } 744 return (active); 745 } 746 747 static int 748 filter_process(struct knote *kn, struct kevent *kev) 749 { 750 int active, s; 751 752 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 753 active = kn->kn_fop->f_process(kn, kev); 754 } else { 755 KERNEL_LOCK(); 756 if (kn->kn_fop->f_process != NULL) { 757 active = kn->kn_fop->f_process(kn, kev); 758 } else { 759 s = splhigh(); 760 active = knote_process(kn, kev); 761 splx(s); 762 } 763 KERNEL_UNLOCK(); 764 } 765 return (active); 766 } 767 768 /* 769 * Initialize the current thread for poll/select system call. 770 * num indicates the number of serials that the system call may utilize. 771 * After this function, the valid range of serials is 772 * p_kq_serial <= x < p_kq_serial + num. 773 */ 774 void 775 kqpoll_init(unsigned int num) 776 { 777 struct proc *p = curproc; 778 struct filedesc *fdp; 779 780 if (p->p_kq == NULL) { 781 p->p_kq = kqueue_alloc(p->p_fd); 782 p->p_kq_serial = arc4random(); 783 fdp = p->p_fd; 784 fdplock(fdp); 785 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 786 fdpunlock(fdp); 787 } 788 789 if (p->p_kq_serial + num < p->p_kq_serial) { 790 /* Serial is about to wrap. Clear all attached knotes. */ 791 kqueue_purge(p, p->p_kq); 792 p->p_kq_serial = 0; 793 } 794 } 795 796 /* 797 * Finish poll/select system call. 798 * num must have the same value that was used with kqpoll_init(). 799 */ 800 void 801 kqpoll_done(unsigned int num) 802 { 803 struct proc *p = curproc; 804 struct kqueue *kq = p->p_kq; 805 806 KASSERT(p->p_kq != NULL); 807 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 808 809 p->p_kq_serial += num; 810 811 /* 812 * Because of kn_pollid key, a thread can in principle allocate 813 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 814 * with suitably varying pollfd arrays. 815 * Prevent such a large allocation by clearing knotes eagerly 816 * if there are too many of them. 817 * 818 * A small multiple of kq_knlistsize should give enough margin 819 * that eager clearing is infrequent, or does not happen at all, 820 * with normal programs. 821 * A single pollfd entry can use up to three knotes. 822 * Typically there is no significant overlap of fd and events 823 * between different entries in the pollfd array. 824 */ 825 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 826 kqueue_purge(p, kq); 827 } 828 829 void 830 kqpoll_exit(void) 831 { 832 struct proc *p = curproc; 833 834 if (p->p_kq == NULL) 835 return; 836 837 kqueue_purge(p, p->p_kq); 838 kqueue_terminate(p, p->p_kq); 839 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 840 KQRELE(p->p_kq); 841 p->p_kq = NULL; 842 } 843 844 struct kqueue * 845 kqueue_alloc(struct filedesc *fdp) 846 { 847 struct kqueue *kq; 848 849 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 850 refcnt_init(&kq->kq_refcnt); 851 kq->kq_fdp = fdp; 852 TAILQ_INIT(&kq->kq_head); 853 mtx_init(&kq->kq_lock, IPL_HIGH); 854 task_set(&kq->kq_task, kqueue_task, kq); 855 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 856 857 return (kq); 858 } 859 860 int 861 sys_kqueue(struct proc *p, void *v, register_t *retval) 862 { 863 struct filedesc *fdp = p->p_fd; 864 struct kqueue *kq; 865 struct file *fp; 866 int fd, error; 867 868 kq = kqueue_alloc(fdp); 869 870 fdplock(fdp); 871 error = falloc(p, &fp, &fd); 872 if (error) 873 goto out; 874 fp->f_flag = FREAD | FWRITE; 875 fp->f_type = DTYPE_KQUEUE; 876 fp->f_ops = &kqueueops; 877 fp->f_data = kq; 878 *retval = fd; 879 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 880 kq = NULL; 881 fdinsert(fdp, fd, 0, fp); 882 FRELE(fp, p); 883 out: 884 fdpunlock(fdp); 885 if (kq != NULL) 886 pool_put(&kqueue_pool, kq); 887 return (error); 888 } 889 890 int 891 sys_kevent(struct proc *p, void *v, register_t *retval) 892 { 893 struct kqueue_scan_state scan; 894 struct filedesc* fdp = p->p_fd; 895 struct sys_kevent_args /* { 896 syscallarg(int) fd; 897 syscallarg(const struct kevent *) changelist; 898 syscallarg(int) nchanges; 899 syscallarg(struct kevent *) eventlist; 900 syscallarg(int) nevents; 901 syscallarg(const struct timespec *) timeout; 902 } */ *uap = v; 903 struct kevent *kevp; 904 struct kqueue *kq; 905 struct file *fp; 906 struct timespec ts; 907 struct timespec *tsp = NULL; 908 int i, n, nerrors, error; 909 int ready, total; 910 struct kevent kev[KQ_NEVENTS]; 911 912 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 913 return (EBADF); 914 915 if (fp->f_type != DTYPE_KQUEUE) { 916 error = EBADF; 917 goto done; 918 } 919 920 if (SCARG(uap, timeout) != NULL) { 921 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 922 if (error) 923 goto done; 924 #ifdef KTRACE 925 if (KTRPOINT(p, KTR_STRUCT)) 926 ktrreltimespec(p, &ts); 927 #endif 928 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 929 error = EINVAL; 930 goto done; 931 } 932 tsp = &ts; 933 } 934 935 kq = fp->f_data; 936 nerrors = 0; 937 938 while ((n = SCARG(uap, nchanges)) > 0) { 939 if (n > nitems(kev)) 940 n = nitems(kev); 941 error = copyin(SCARG(uap, changelist), kev, 942 n * sizeof(struct kevent)); 943 if (error) 944 goto done; 945 #ifdef KTRACE 946 if (KTRPOINT(p, KTR_STRUCT)) 947 ktrevent(p, kev, n); 948 #endif 949 for (i = 0; i < n; i++) { 950 kevp = &kev[i]; 951 kevp->flags &= ~EV_SYSFLAGS; 952 error = kqueue_register(kq, kevp, 0, p); 953 if (error || (kevp->flags & EV_RECEIPT)) { 954 if (SCARG(uap, nevents) != 0) { 955 kevp->flags = EV_ERROR; 956 kevp->data = error; 957 copyout(kevp, SCARG(uap, eventlist), 958 sizeof(*kevp)); 959 SCARG(uap, eventlist)++; 960 SCARG(uap, nevents)--; 961 nerrors++; 962 } else { 963 goto done; 964 } 965 } 966 } 967 SCARG(uap, nchanges) -= n; 968 SCARG(uap, changelist) += n; 969 } 970 if (nerrors) { 971 *retval = nerrors; 972 error = 0; 973 goto done; 974 } 975 976 kqueue_scan_setup(&scan, kq); 977 FRELE(fp, p); 978 /* 979 * Collect as many events as we can. The timeout on successive 980 * loops is disabled (kqueue_scan() becomes non-blocking). 981 */ 982 total = 0; 983 error = 0; 984 while ((n = SCARG(uap, nevents) - total) > 0) { 985 if (n > nitems(kev)) 986 n = nitems(kev); 987 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 988 if (ready == 0) 989 break; 990 error = copyout(kev, SCARG(uap, eventlist) + total, 991 sizeof(struct kevent) * ready); 992 #ifdef KTRACE 993 if (KTRPOINT(p, KTR_STRUCT)) 994 ktrevent(p, kev, ready); 995 #endif 996 total += ready; 997 if (error || ready < n) 998 break; 999 } 1000 kqueue_scan_finish(&scan); 1001 *retval = total; 1002 return (error); 1003 1004 done: 1005 FRELE(fp, p); 1006 return (error); 1007 } 1008 1009 #ifdef KQUEUE_DEBUG 1010 void 1011 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1012 { 1013 struct knote *kn; 1014 int count = 0, nmarker = 0; 1015 1016 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1017 1018 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1019 if (kn->kn_filter == EVFILT_MARKER) { 1020 if ((kn->kn_status & KN_QUEUED) != 0) 1021 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1022 func, line, kq, kn); 1023 nmarker++; 1024 } else { 1025 if ((kn->kn_status & KN_ACTIVE) == 0) 1026 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1027 func, line, kq, kn); 1028 if ((kn->kn_status & KN_QUEUED) == 0) 1029 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1030 func, line, kq, kn); 1031 if (kn->kn_kq != kq) 1032 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1033 func, line, kq, kn, kn->kn_kq); 1034 count++; 1035 if (count > kq->kq_count) 1036 goto bad; 1037 } 1038 } 1039 if (count != kq->kq_count) { 1040 bad: 1041 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1042 func, line, kq, kq->kq_count, count, nmarker); 1043 } 1044 } 1045 #endif 1046 1047 int 1048 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1049 struct proc *p) 1050 { 1051 struct filedesc *fdp = kq->kq_fdp; 1052 const struct filterops *fops = NULL; 1053 struct file *fp = NULL; 1054 struct knote *kn = NULL, *newkn = NULL; 1055 struct knlist *list = NULL; 1056 int active, error = 0; 1057 1058 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1059 1060 if (kev->filter < 0) { 1061 if (kev->filter + EVFILT_SYSCOUNT < 0) 1062 return (EINVAL); 1063 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1064 } 1065 1066 if (fops == NULL) { 1067 /* 1068 * XXX 1069 * filter attach routine is responsible for ensuring that 1070 * the identifier can be attached to it. 1071 */ 1072 return (EINVAL); 1073 } 1074 1075 if (fops->f_flags & FILTEROP_ISFD) { 1076 /* validate descriptor */ 1077 if (kev->ident > INT_MAX) 1078 return (EBADF); 1079 } 1080 1081 if (kev->flags & EV_ADD) 1082 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1083 1084 again: 1085 if (fops->f_flags & FILTEROP_ISFD) { 1086 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1087 error = EBADF; 1088 goto done; 1089 } 1090 mtx_enter(&kq->kq_lock); 1091 if (kev->flags & EV_ADD) 1092 kqueue_expand_list(kq, kev->ident); 1093 if (kev->ident < kq->kq_knlistsize) 1094 list = &kq->kq_knlist[kev->ident]; 1095 } else { 1096 mtx_enter(&kq->kq_lock); 1097 if (kev->flags & EV_ADD) 1098 kqueue_expand_hash(kq); 1099 if (kq->kq_knhashmask != 0) { 1100 list = &kq->kq_knhash[ 1101 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1102 } 1103 } 1104 if (list != NULL) { 1105 SLIST_FOREACH(kn, list, kn_link) { 1106 if (kev->filter == kn->kn_filter && 1107 kev->ident == kn->kn_id && 1108 pollid == kn->kn_pollid) { 1109 if (!knote_acquire(kn, NULL, 0)) { 1110 /* knote_acquire() has released 1111 * kq_lock. */ 1112 if (fp != NULL) { 1113 FRELE(fp, p); 1114 fp = NULL; 1115 } 1116 goto again; 1117 } 1118 break; 1119 } 1120 } 1121 } 1122 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1123 1124 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1125 mtx_leave(&kq->kq_lock); 1126 error = ENOENT; 1127 goto done; 1128 } 1129 1130 /* 1131 * kn now contains the matching knote, or NULL if no match. 1132 */ 1133 if (kev->flags & EV_ADD) { 1134 if (kn == NULL) { 1135 kn = newkn; 1136 newkn = NULL; 1137 kn->kn_status = KN_PROCESSING; 1138 kn->kn_fp = fp; 1139 kn->kn_kq = kq; 1140 kn->kn_fop = fops; 1141 1142 /* 1143 * apply reference count to knote structure, and 1144 * do not release it at the end of this routine. 1145 */ 1146 fp = NULL; 1147 1148 kn->kn_sfflags = kev->fflags; 1149 kn->kn_sdata = kev->data; 1150 kev->fflags = 0; 1151 kev->data = 0; 1152 kn->kn_kevent = *kev; 1153 kn->kn_pollid = pollid; 1154 1155 knote_attach(kn); 1156 mtx_leave(&kq->kq_lock); 1157 1158 error = filter_attach(kn); 1159 if (error != 0) { 1160 knote_drop(kn, p); 1161 goto done; 1162 } 1163 1164 /* 1165 * If this is a file descriptor filter, check if 1166 * fd was closed while the knote was being added. 1167 * knote_fdclose() has missed kn if the function 1168 * ran before kn appeared in kq_knlist. 1169 */ 1170 if ((fops->f_flags & FILTEROP_ISFD) && 1171 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1172 /* 1173 * Drop the knote silently without error 1174 * because another thread might already have 1175 * seen it. This corresponds to the insert 1176 * happening in full before the close. 1177 */ 1178 filter_detach(kn); 1179 knote_drop(kn, p); 1180 goto done; 1181 } 1182 1183 /* Check if there is a pending event. */ 1184 active = filter_process(kn, NULL); 1185 mtx_enter(&kq->kq_lock); 1186 if (active) 1187 knote_activate(kn); 1188 } else if (kn->kn_fop == &badfd_filtops) { 1189 /* 1190 * Nothing expects this badfd knote any longer. 1191 * Drop it to make room for the new knote and retry. 1192 */ 1193 KASSERT(kq == p->p_kq); 1194 mtx_leave(&kq->kq_lock); 1195 filter_detach(kn); 1196 knote_drop(kn, p); 1197 1198 KASSERT(fp != NULL); 1199 FRELE(fp, p); 1200 fp = NULL; 1201 1202 goto again; 1203 } else { 1204 /* 1205 * The user may change some filter values after the 1206 * initial EV_ADD, but doing so will not reset any 1207 * filters which have already been triggered. 1208 */ 1209 mtx_leave(&kq->kq_lock); 1210 active = filter_modify(kev, kn); 1211 mtx_enter(&kq->kq_lock); 1212 if (active) 1213 knote_activate(kn); 1214 if (kev->flags & EV_ERROR) { 1215 error = kev->data; 1216 goto release; 1217 } 1218 } 1219 } else if (kev->flags & EV_DELETE) { 1220 mtx_leave(&kq->kq_lock); 1221 filter_detach(kn); 1222 knote_drop(kn, p); 1223 goto done; 1224 } 1225 1226 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1227 kn->kn_status |= KN_DISABLED; 1228 1229 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1230 kn->kn_status &= ~KN_DISABLED; 1231 mtx_leave(&kq->kq_lock); 1232 /* Check if there is a pending event. */ 1233 active = filter_process(kn, NULL); 1234 mtx_enter(&kq->kq_lock); 1235 if (active) 1236 knote_activate(kn); 1237 } 1238 1239 release: 1240 knote_release(kn); 1241 mtx_leave(&kq->kq_lock); 1242 done: 1243 if (fp != NULL) 1244 FRELE(fp, p); 1245 if (newkn != NULL) 1246 pool_put(&knote_pool, newkn); 1247 return (error); 1248 } 1249 1250 int 1251 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1252 { 1253 struct timespec elapsed, start, stop; 1254 uint64_t nsecs; 1255 int error; 1256 1257 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1258 1259 if (tsp != NULL) { 1260 getnanouptime(&start); 1261 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1262 } else 1263 nsecs = INFSLP; 1264 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1265 "kqread", nsecs); 1266 if (tsp != NULL) { 1267 getnanouptime(&stop); 1268 timespecsub(&stop, &start, &elapsed); 1269 timespecsub(tsp, &elapsed, tsp); 1270 if (tsp->tv_sec < 0) 1271 timespecclear(tsp); 1272 } 1273 1274 return (error); 1275 } 1276 1277 /* 1278 * Scan the kqueue, blocking if necessary until the target time is reached. 1279 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1280 * 0 we do not block at all. 1281 */ 1282 int 1283 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1284 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1285 { 1286 struct kqueue *kq = scan->kqs_kq; 1287 struct knote *kn; 1288 int error = 0, nkev = 0; 1289 int reinserted; 1290 1291 if (maxevents == 0) 1292 goto done; 1293 retry: 1294 KASSERT(nkev == 0); 1295 1296 error = 0; 1297 reinserted = 0; 1298 1299 /* msleep() with PCATCH requires kernel lock. */ 1300 KERNEL_LOCK(); 1301 1302 mtx_enter(&kq->kq_lock); 1303 1304 if (kq->kq_state & KQ_DYING) { 1305 mtx_leave(&kq->kq_lock); 1306 KERNEL_UNLOCK(); 1307 error = EBADF; 1308 goto done; 1309 } 1310 1311 if (kq->kq_count == 0) { 1312 /* 1313 * Successive loops are only necessary if there are more 1314 * ready events to gather, so they don't need to block. 1315 */ 1316 if ((tsp != NULL && !timespecisset(tsp)) || 1317 scan->kqs_nevent != 0) { 1318 mtx_leave(&kq->kq_lock); 1319 KERNEL_UNLOCK(); 1320 error = 0; 1321 goto done; 1322 } 1323 kq->kq_state |= KQ_SLEEP; 1324 error = kqueue_sleep(kq, tsp); 1325 /* kqueue_sleep() has released kq_lock. */ 1326 KERNEL_UNLOCK(); 1327 if (error == 0 || error == EWOULDBLOCK) 1328 goto retry; 1329 /* don't restart after signals... */ 1330 if (error == ERESTART) 1331 error = EINTR; 1332 goto done; 1333 } 1334 1335 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1336 KERNEL_UNLOCK(); 1337 1338 /* 1339 * Put the end marker in the queue to limit the scan to the events 1340 * that are currently active. This prevents events from being 1341 * recollected if they reactivate during scan. 1342 * 1343 * If a partial scan has been performed already but no events have 1344 * been collected, reposition the end marker to make any new events 1345 * reachable. 1346 */ 1347 if (!scan->kqs_queued) { 1348 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1349 scan->kqs_queued = 1; 1350 } else if (scan->kqs_nevent == 0) { 1351 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1352 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1353 } 1354 1355 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1356 while (nkev < maxevents) { 1357 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1358 if (kn->kn_filter == EVFILT_MARKER) { 1359 if (kn == &scan->kqs_end) 1360 break; 1361 1362 /* Move start marker past another thread's marker. */ 1363 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1364 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1365 kn_tqe); 1366 continue; 1367 } 1368 1369 if (!knote_acquire(kn, NULL, 0)) { 1370 /* knote_acquire() has released kq_lock. */ 1371 mtx_enter(&kq->kq_lock); 1372 continue; 1373 } 1374 1375 kqueue_check(kq); 1376 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1377 kn->kn_status &= ~KN_QUEUED; 1378 kq->kq_count--; 1379 kqueue_check(kq); 1380 1381 if (kn->kn_status & KN_DISABLED) { 1382 knote_release(kn); 1383 continue; 1384 } 1385 1386 mtx_leave(&kq->kq_lock); 1387 1388 /* Drop expired kqpoll knotes. */ 1389 if (p->p_kq == kq && 1390 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1391 filter_detach(kn); 1392 knote_drop(kn, p); 1393 mtx_enter(&kq->kq_lock); 1394 continue; 1395 } 1396 1397 /* 1398 * Invalidate knotes whose vnodes have been revoked. 1399 * This is a workaround; it is tricky to clear existing 1400 * knotes and prevent new ones from being registered 1401 * with the current revocation mechanism. 1402 */ 1403 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1404 kn->kn_fp != NULL && 1405 kn->kn_fp->f_type == DTYPE_VNODE) { 1406 struct vnode *vp = kn->kn_fp->f_data; 1407 1408 if (__predict_false(vp->v_op == &dead_vops && 1409 kn->kn_fop != &dead_filtops)) { 1410 filter_detach(kn); 1411 kn->kn_fop = &dead_filtops; 1412 1413 /* 1414 * Check if the event should be delivered. 1415 * Use f_event directly because this is 1416 * a special situation. 1417 */ 1418 if (kn->kn_fop->f_event(kn, 0) == 0) { 1419 filter_detach(kn); 1420 knote_drop(kn, p); 1421 mtx_enter(&kq->kq_lock); 1422 continue; 1423 } 1424 } 1425 } 1426 1427 memset(kevp, 0, sizeof(*kevp)); 1428 if (filter_process(kn, kevp) == 0) { 1429 mtx_enter(&kq->kq_lock); 1430 if ((kn->kn_status & KN_QUEUED) == 0) 1431 kn->kn_status &= ~KN_ACTIVE; 1432 knote_release(kn); 1433 kqueue_check(kq); 1434 continue; 1435 } 1436 1437 /* 1438 * Post-event action on the note 1439 */ 1440 if (kevp->flags & EV_ONESHOT) { 1441 filter_detach(kn); 1442 knote_drop(kn, p); 1443 mtx_enter(&kq->kq_lock); 1444 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1445 mtx_enter(&kq->kq_lock); 1446 if (kevp->flags & EV_DISPATCH) 1447 kn->kn_status |= KN_DISABLED; 1448 if ((kn->kn_status & KN_QUEUED) == 0) 1449 kn->kn_status &= ~KN_ACTIVE; 1450 knote_release(kn); 1451 } else { 1452 mtx_enter(&kq->kq_lock); 1453 if ((kn->kn_status & KN_QUEUED) == 0) { 1454 kqueue_check(kq); 1455 kq->kq_count++; 1456 kn->kn_status |= KN_QUEUED; 1457 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1458 /* Wakeup is done after loop. */ 1459 reinserted = 1; 1460 } 1461 knote_release(kn); 1462 } 1463 kqueue_check(kq); 1464 1465 kevp++; 1466 nkev++; 1467 scan->kqs_nevent++; 1468 } 1469 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1470 if (reinserted && kq->kq_count != 0) 1471 kqueue_wakeup(kq); 1472 mtx_leave(&kq->kq_lock); 1473 if (scan->kqs_nevent == 0) 1474 goto retry; 1475 done: 1476 *errorp = error; 1477 return (nkev); 1478 } 1479 1480 void 1481 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1482 { 1483 memset(scan, 0, sizeof(*scan)); 1484 1485 KQREF(kq); 1486 scan->kqs_kq = kq; 1487 scan->kqs_start.kn_filter = EVFILT_MARKER; 1488 scan->kqs_start.kn_status = KN_PROCESSING; 1489 scan->kqs_end.kn_filter = EVFILT_MARKER; 1490 scan->kqs_end.kn_status = KN_PROCESSING; 1491 } 1492 1493 void 1494 kqueue_scan_finish(struct kqueue_scan_state *scan) 1495 { 1496 struct kqueue *kq = scan->kqs_kq; 1497 1498 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1499 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1500 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1501 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1502 1503 if (scan->kqs_queued) { 1504 scan->kqs_queued = 0; 1505 mtx_enter(&kq->kq_lock); 1506 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1507 mtx_leave(&kq->kq_lock); 1508 } 1509 KQRELE(kq); 1510 } 1511 1512 /* 1513 * XXX 1514 * This could be expanded to call kqueue_scan, if desired. 1515 */ 1516 int 1517 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1518 { 1519 return (ENXIO); 1520 } 1521 1522 int 1523 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1524 { 1525 return (ENXIO); 1526 } 1527 1528 int 1529 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1530 { 1531 return (ENOTTY); 1532 } 1533 1534 int 1535 kqueue_poll(struct file *fp, int events, struct proc *p) 1536 { 1537 struct kqueue *kq = (struct kqueue *)fp->f_data; 1538 int revents = 0; 1539 1540 if (events & (POLLIN | POLLRDNORM)) { 1541 mtx_enter(&kq->kq_lock); 1542 if (kq->kq_count) { 1543 revents |= events & (POLLIN | POLLRDNORM); 1544 } else { 1545 selrecord(p, &kq->kq_sel); 1546 kq->kq_state |= KQ_SEL; 1547 } 1548 mtx_leave(&kq->kq_lock); 1549 } 1550 return (revents); 1551 } 1552 1553 int 1554 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1555 { 1556 struct kqueue *kq = fp->f_data; 1557 1558 memset(st, 0, sizeof(*st)); 1559 st->st_size = kq->kq_count; /* unlocked read */ 1560 st->st_blksize = sizeof(struct kevent); 1561 st->st_mode = S_IFIFO; 1562 return (0); 1563 } 1564 1565 void 1566 kqueue_purge(struct proc *p, struct kqueue *kq) 1567 { 1568 int i; 1569 1570 mtx_enter(&kq->kq_lock); 1571 for (i = 0; i < kq->kq_knlistsize; i++) 1572 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1573 if (kq->kq_knhashmask != 0) { 1574 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1575 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1576 } 1577 mtx_leave(&kq->kq_lock); 1578 } 1579 1580 void 1581 kqueue_terminate(struct proc *p, struct kqueue *kq) 1582 { 1583 struct knote *kn; 1584 1585 mtx_enter(&kq->kq_lock); 1586 1587 /* 1588 * Any remaining entries should be scan markers. 1589 * They are removed when the ongoing scans finish. 1590 */ 1591 KASSERT(kq->kq_count == 0); 1592 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1593 KASSERT(kn->kn_filter == EVFILT_MARKER); 1594 1595 kq->kq_state |= KQ_DYING; 1596 kqueue_wakeup(kq); 1597 mtx_leave(&kq->kq_lock); 1598 1599 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1600 task_del(systqmp, &kq->kq_task); 1601 } 1602 1603 int 1604 kqueue_close(struct file *fp, struct proc *p) 1605 { 1606 struct kqueue *kq = fp->f_data; 1607 1608 fp->f_data = NULL; 1609 1610 kqueue_purge(p, kq); 1611 kqueue_terminate(p, kq); 1612 1613 KQRELE(kq); 1614 1615 return (0); 1616 } 1617 1618 static void 1619 kqueue_task(void *arg) 1620 { 1621 struct kqueue *kq = arg; 1622 1623 mtx_enter(&kqueue_klist_lock); 1624 KNOTE(&kq->kq_sel.si_note, 0); 1625 mtx_leave(&kqueue_klist_lock); 1626 KQRELE(kq); 1627 } 1628 1629 void 1630 kqueue_wakeup(struct kqueue *kq) 1631 { 1632 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1633 1634 if (kq->kq_state & KQ_SLEEP) { 1635 kq->kq_state &= ~KQ_SLEEP; 1636 wakeup(kq); 1637 } 1638 if (!klist_empty(&kq->kq_sel.si_note)) { 1639 /* Defer activation to avoid recursion. */ 1640 KQREF(kq); 1641 if (!task_add(systqmp, &kq->kq_task)) 1642 KQRELE(kq); 1643 } 1644 } 1645 1646 static void 1647 kqueue_expand_hash(struct kqueue *kq) 1648 { 1649 struct knlist *hash; 1650 u_long hashmask; 1651 1652 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1653 1654 if (kq->kq_knhashmask == 0) { 1655 mtx_leave(&kq->kq_lock); 1656 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1657 mtx_enter(&kq->kq_lock); 1658 if (kq->kq_knhashmask == 0) { 1659 kq->kq_knhash = hash; 1660 kq->kq_knhashmask = hashmask; 1661 } else { 1662 /* Another thread has allocated the hash. */ 1663 mtx_leave(&kq->kq_lock); 1664 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1665 mtx_enter(&kq->kq_lock); 1666 } 1667 } 1668 } 1669 1670 static void 1671 kqueue_expand_list(struct kqueue *kq, int fd) 1672 { 1673 struct knlist *list, *olist; 1674 int size, osize; 1675 1676 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1677 1678 if (kq->kq_knlistsize <= fd) { 1679 size = kq->kq_knlistsize; 1680 mtx_leave(&kq->kq_lock); 1681 while (size <= fd) 1682 size += KQEXTENT; 1683 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1684 mtx_enter(&kq->kq_lock); 1685 if (kq->kq_knlistsize <= fd) { 1686 memcpy(list, kq->kq_knlist, 1687 kq->kq_knlistsize * sizeof(*list)); 1688 memset(&list[kq->kq_knlistsize], 0, 1689 (size - kq->kq_knlistsize) * sizeof(*list)); 1690 olist = kq->kq_knlist; 1691 osize = kq->kq_knlistsize; 1692 kq->kq_knlist = list; 1693 kq->kq_knlistsize = size; 1694 mtx_leave(&kq->kq_lock); 1695 free(olist, M_KEVENT, osize * sizeof(*list)); 1696 mtx_enter(&kq->kq_lock); 1697 } else { 1698 /* Another thread has expanded the list. */ 1699 mtx_leave(&kq->kq_lock); 1700 free(list, M_KEVENT, size * sizeof(*list)); 1701 mtx_enter(&kq->kq_lock); 1702 } 1703 } 1704 } 1705 1706 /* 1707 * Acquire a knote, return non-zero on success, 0 on failure. 1708 * 1709 * If we cannot acquire the knote we sleep and return 0. The knote 1710 * may be stale on return in this case and the caller must restart 1711 * whatever loop they are in. 1712 * 1713 * If we are about to sleep and klist is non-NULL, the list is unlocked 1714 * before sleep and remains unlocked on return. 1715 */ 1716 int 1717 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1718 { 1719 struct kqueue *kq = kn->kn_kq; 1720 1721 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1722 KASSERT(kn->kn_filter != EVFILT_MARKER); 1723 1724 if (kn->kn_status & KN_PROCESSING) { 1725 kn->kn_status |= KN_WAITING; 1726 if (klist != NULL) { 1727 mtx_leave(&kq->kq_lock); 1728 klist_unlock(klist, ls); 1729 /* XXX Timeout resolves potential loss of wakeup. */ 1730 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1731 } else { 1732 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1733 SEC_TO_NSEC(1)); 1734 } 1735 /* knote may be stale now */ 1736 return (0); 1737 } 1738 kn->kn_status |= KN_PROCESSING; 1739 return (1); 1740 } 1741 1742 /* 1743 * Release an acquired knote, clearing KN_PROCESSING. 1744 */ 1745 void 1746 knote_release(struct knote *kn) 1747 { 1748 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1749 KASSERT(kn->kn_filter != EVFILT_MARKER); 1750 KASSERT(kn->kn_status & KN_PROCESSING); 1751 1752 if (kn->kn_status & KN_WAITING) { 1753 kn->kn_status &= ~KN_WAITING; 1754 wakeup(kn); 1755 } 1756 kn->kn_status &= ~KN_PROCESSING; 1757 /* kn should not be accessed anymore */ 1758 } 1759 1760 /* 1761 * activate one knote. 1762 */ 1763 void 1764 knote_activate(struct knote *kn) 1765 { 1766 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1767 1768 kn->kn_status |= KN_ACTIVE; 1769 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1770 knote_enqueue(kn); 1771 } 1772 1773 /* 1774 * walk down a list of knotes, activating them if their event has triggered. 1775 */ 1776 void 1777 knote(struct klist *list, long hint) 1778 { 1779 struct knote *kn, *kn0; 1780 struct kqueue *kq; 1781 1782 KLIST_ASSERT_LOCKED(list); 1783 1784 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1785 if (filter_event(kn, hint)) { 1786 kq = kn->kn_kq; 1787 mtx_enter(&kq->kq_lock); 1788 knote_activate(kn); 1789 mtx_leave(&kq->kq_lock); 1790 } 1791 } 1792 } 1793 1794 /* 1795 * remove all knotes from a specified knlist 1796 */ 1797 void 1798 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1799 int purge) 1800 { 1801 struct knote *kn; 1802 1803 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1804 1805 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1806 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1807 KASSERT(kn->kn_kq == kq); 1808 1809 if (!purge) { 1810 /* Skip pending badfd knotes. */ 1811 while (kn->kn_fop == &badfd_filtops) { 1812 kn = SLIST_NEXT(kn, kn_link); 1813 if (kn == NULL) 1814 return; 1815 KASSERT(kn->kn_kq == kq); 1816 } 1817 } 1818 1819 if (!knote_acquire(kn, NULL, 0)) { 1820 /* knote_acquire() has released kq_lock. */ 1821 mtx_enter(&kq->kq_lock); 1822 continue; 1823 } 1824 mtx_leave(&kq->kq_lock); 1825 filter_detach(kn); 1826 1827 /* 1828 * Notify poll(2) and select(2) when a monitored 1829 * file descriptor is closed. 1830 * 1831 * This reuses the original knote for delivering the 1832 * notification so as to avoid allocating memory. 1833 */ 1834 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1835 !(p->p_kq == kq && 1836 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1837 kn->kn_fop != &badfd_filtops) { 1838 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1839 FRELE(kn->kn_fp, p); 1840 kn->kn_fp = NULL; 1841 1842 kn->kn_fop = &badfd_filtops; 1843 filter_event(kn, 0); 1844 mtx_enter(&kq->kq_lock); 1845 knote_activate(kn); 1846 knote_release(kn); 1847 continue; 1848 } 1849 1850 knote_drop(kn, p); 1851 mtx_enter(&kq->kq_lock); 1852 } 1853 } 1854 1855 /* 1856 * remove all knotes referencing a specified fd 1857 */ 1858 void 1859 knote_fdclose(struct proc *p, int fd) 1860 { 1861 struct filedesc *fdp = p->p_p->ps_fd; 1862 struct kqueue *kq; 1863 1864 /* 1865 * fdplock can be ignored if the file descriptor table is being freed 1866 * because no other thread can access the fdp. 1867 */ 1868 if (fdp->fd_refcnt != 0) 1869 fdpassertlocked(fdp); 1870 1871 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1872 mtx_enter(&kq->kq_lock); 1873 if (fd < kq->kq_knlistsize) 1874 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1875 mtx_leave(&kq->kq_lock); 1876 } 1877 } 1878 1879 /* 1880 * handle a process exiting, including the triggering of NOTE_EXIT notes 1881 * XXX this could be more efficient, doing a single pass down the klist 1882 */ 1883 void 1884 knote_processexit(struct process *pr) 1885 { 1886 KERNEL_ASSERT_LOCKED(); 1887 1888 KNOTE(&pr->ps_klist, NOTE_EXIT); 1889 1890 /* remove other knotes hanging off the process */ 1891 klist_invalidate(&pr->ps_klist); 1892 } 1893 1894 void 1895 knote_attach(struct knote *kn) 1896 { 1897 struct kqueue *kq = kn->kn_kq; 1898 struct knlist *list; 1899 1900 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1901 KASSERT(kn->kn_status & KN_PROCESSING); 1902 1903 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1904 KASSERT(kq->kq_knlistsize > kn->kn_id); 1905 list = &kq->kq_knlist[kn->kn_id]; 1906 } else { 1907 KASSERT(kq->kq_knhashmask != 0); 1908 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1909 } 1910 SLIST_INSERT_HEAD(list, kn, kn_link); 1911 kq->kq_nknotes++; 1912 } 1913 1914 void 1915 knote_detach(struct knote *kn) 1916 { 1917 struct kqueue *kq = kn->kn_kq; 1918 struct knlist *list; 1919 1920 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1921 KASSERT(kn->kn_status & KN_PROCESSING); 1922 1923 kq->kq_nknotes--; 1924 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1925 list = &kq->kq_knlist[kn->kn_id]; 1926 else 1927 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1928 SLIST_REMOVE(list, kn, knote, kn_link); 1929 } 1930 1931 /* 1932 * should be called at spl == 0, since we don't want to hold spl 1933 * while calling FRELE and pool_put. 1934 */ 1935 void 1936 knote_drop(struct knote *kn, struct proc *p) 1937 { 1938 struct kqueue *kq = kn->kn_kq; 1939 1940 KASSERT(kn->kn_filter != EVFILT_MARKER); 1941 1942 mtx_enter(&kq->kq_lock); 1943 knote_detach(kn); 1944 if (kn->kn_status & KN_QUEUED) 1945 knote_dequeue(kn); 1946 if (kn->kn_status & KN_WAITING) { 1947 kn->kn_status &= ~KN_WAITING; 1948 wakeup(kn); 1949 } 1950 mtx_leave(&kq->kq_lock); 1951 1952 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1953 FRELE(kn->kn_fp, p); 1954 pool_put(&knote_pool, kn); 1955 } 1956 1957 1958 void 1959 knote_enqueue(struct knote *kn) 1960 { 1961 struct kqueue *kq = kn->kn_kq; 1962 1963 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1964 KASSERT(kn->kn_filter != EVFILT_MARKER); 1965 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1966 1967 kqueue_check(kq); 1968 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1969 kn->kn_status |= KN_QUEUED; 1970 kq->kq_count++; 1971 kqueue_check(kq); 1972 kqueue_wakeup(kq); 1973 } 1974 1975 void 1976 knote_dequeue(struct knote *kn) 1977 { 1978 struct kqueue *kq = kn->kn_kq; 1979 1980 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1981 KASSERT(kn->kn_filter != EVFILT_MARKER); 1982 KASSERT(kn->kn_status & KN_QUEUED); 1983 1984 kqueue_check(kq); 1985 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1986 kn->kn_status &= ~KN_QUEUED; 1987 kq->kq_count--; 1988 kqueue_check(kq); 1989 } 1990 1991 /* 1992 * Assign parameters to the knote. 1993 * 1994 * The knote's object lock must be held. 1995 */ 1996 void 1997 knote_assign(const struct kevent *kev, struct knote *kn) 1998 { 1999 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2000 KERNEL_ASSERT_LOCKED(); 2001 2002 kn->kn_sfflags = kev->fflags; 2003 kn->kn_sdata = kev->data; 2004 kn->kn_udata = kev->udata; 2005 } 2006 2007 /* 2008 * Submit the knote's event for delivery. 2009 * 2010 * The knote's object lock must be held. 2011 */ 2012 void 2013 knote_submit(struct knote *kn, struct kevent *kev) 2014 { 2015 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 2016 KERNEL_ASSERT_LOCKED(); 2017 2018 if (kev != NULL) { 2019 *kev = kn->kn_kevent; 2020 if (kn->kn_flags & EV_CLEAR) { 2021 kn->kn_fflags = 0; 2022 kn->kn_data = 0; 2023 } 2024 } 2025 } 2026 2027 void 2028 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2029 { 2030 SLIST_INIT(&klist->kl_list); 2031 klist->kl_ops = ops; 2032 klist->kl_arg = arg; 2033 } 2034 2035 void 2036 klist_free(struct klist *klist) 2037 { 2038 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2039 } 2040 2041 void 2042 klist_insert(struct klist *klist, struct knote *kn) 2043 { 2044 int ls; 2045 2046 ls = klist_lock(klist); 2047 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2048 klist_unlock(klist, ls); 2049 } 2050 2051 void 2052 klist_insert_locked(struct klist *klist, struct knote *kn) 2053 { 2054 KLIST_ASSERT_LOCKED(klist); 2055 2056 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2057 } 2058 2059 void 2060 klist_remove(struct klist *klist, struct knote *kn) 2061 { 2062 int ls; 2063 2064 ls = klist_lock(klist); 2065 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2066 klist_unlock(klist, ls); 2067 } 2068 2069 void 2070 klist_remove_locked(struct klist *klist, struct knote *kn) 2071 { 2072 KLIST_ASSERT_LOCKED(klist); 2073 2074 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2075 } 2076 2077 /* 2078 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2079 * 2080 * The caller of this function must not hold any locks that can block 2081 * filterops callbacks that run with KN_PROCESSING. 2082 * Otherwise this function might deadlock. 2083 */ 2084 void 2085 klist_invalidate(struct klist *list) 2086 { 2087 struct knote *kn; 2088 struct kqueue *kq; 2089 struct proc *p = curproc; 2090 int ls; 2091 2092 NET_ASSERT_UNLOCKED(); 2093 2094 ls = klist_lock(list); 2095 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2096 kq = kn->kn_kq; 2097 mtx_enter(&kq->kq_lock); 2098 if (!knote_acquire(kn, list, ls)) { 2099 /* knote_acquire() has released kq_lock 2100 * and klist lock. */ 2101 ls = klist_lock(list); 2102 continue; 2103 } 2104 mtx_leave(&kq->kq_lock); 2105 klist_unlock(list, ls); 2106 filter_detach(kn); 2107 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2108 kn->kn_fop = &dead_filtops; 2109 filter_event(kn, 0); 2110 mtx_enter(&kq->kq_lock); 2111 knote_activate(kn); 2112 knote_release(kn); 2113 mtx_leave(&kq->kq_lock); 2114 } else { 2115 knote_drop(kn, p); 2116 } 2117 ls = klist_lock(list); 2118 } 2119 klist_unlock(list, ls); 2120 } 2121 2122 static int 2123 klist_lock(struct klist *list) 2124 { 2125 int ls = 0; 2126 2127 if (list->kl_ops != NULL) { 2128 ls = list->kl_ops->klo_lock(list->kl_arg); 2129 } else { 2130 KERNEL_LOCK(); 2131 ls = splhigh(); 2132 } 2133 return ls; 2134 } 2135 2136 static void 2137 klist_unlock(struct klist *list, int ls) 2138 { 2139 if (list->kl_ops != NULL) { 2140 list->kl_ops->klo_unlock(list->kl_arg, ls); 2141 } else { 2142 splx(ls); 2143 KERNEL_UNLOCK(); 2144 } 2145 } 2146 2147 static void 2148 klist_mutex_assertlk(void *arg) 2149 { 2150 struct mutex *mtx = arg; 2151 2152 (void)mtx; 2153 2154 MUTEX_ASSERT_LOCKED(mtx); 2155 } 2156 2157 static int 2158 klist_mutex_lock(void *arg) 2159 { 2160 struct mutex *mtx = arg; 2161 2162 mtx_enter(mtx); 2163 return 0; 2164 } 2165 2166 static void 2167 klist_mutex_unlock(void *arg, int s) 2168 { 2169 struct mutex *mtx = arg; 2170 2171 mtx_leave(mtx); 2172 } 2173 2174 static const struct klistops mutex_klistops = { 2175 .klo_assertlk = klist_mutex_assertlk, 2176 .klo_lock = klist_mutex_lock, 2177 .klo_unlock = klist_mutex_unlock, 2178 }; 2179 2180 void 2181 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2182 { 2183 klist_init(klist, &mutex_klistops, mtx); 2184 } 2185 2186 static void 2187 klist_rwlock_assertlk(void *arg) 2188 { 2189 struct rwlock *rwl = arg; 2190 2191 (void)rwl; 2192 2193 rw_assert_wrlock(rwl); 2194 } 2195 2196 static int 2197 klist_rwlock_lock(void *arg) 2198 { 2199 struct rwlock *rwl = arg; 2200 2201 rw_enter_write(rwl); 2202 return 0; 2203 } 2204 2205 static void 2206 klist_rwlock_unlock(void *arg, int s) 2207 { 2208 struct rwlock *rwl = arg; 2209 2210 rw_exit_write(rwl); 2211 } 2212 2213 static const struct klistops rwlock_klistops = { 2214 .klo_assertlk = klist_rwlock_assertlk, 2215 .klo_lock = klist_rwlock_lock, 2216 .klo_unlock = klist_rwlock_unlock, 2217 }; 2218 2219 void 2220 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2221 { 2222 klist_init(klist, &rwlock_klistops, rwl); 2223 } 2224