1 /* $OpenBSD: kern_event.c,v 1.193 2022/08/14 01:58:27 jsg Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/proc.h> 34 #include <sys/pledge.h> 35 #include <sys/malloc.h> 36 #include <sys/file.h> 37 #include <sys/filedesc.h> 38 #include <sys/fcntl.h> 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <sys/eventvar.h> 42 #include <sys/ktrace.h> 43 #include <sys/pool.h> 44 #include <sys/stat.h> 45 #include <sys/mount.h> 46 #include <sys/syscallargs.h> 47 #include <sys/time.h> 48 #include <sys/timeout.h> 49 #include <sys/vnode.h> 50 #include <sys/wait.h> 51 52 #ifdef DIAGNOSTIC 53 #define KLIST_ASSERT_LOCKED(kl) do { \ 54 if ((kl)->kl_ops != NULL) \ 55 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 56 else \ 57 KERNEL_ASSERT_LOCKED(); \ 58 } while (0) 59 #else 60 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 61 #endif 62 63 struct kqueue *kqueue_alloc(struct filedesc *); 64 void kqueue_terminate(struct proc *p, struct kqueue *); 65 void KQREF(struct kqueue *); 66 void KQRELE(struct kqueue *); 67 68 void kqueue_purge(struct proc *, struct kqueue *); 69 int kqueue_sleep(struct kqueue *, struct timespec *); 70 71 int kqueue_read(struct file *, struct uio *, int); 72 int kqueue_write(struct file *, struct uio *, int); 73 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 74 struct proc *p); 75 int kqueue_kqfilter(struct file *fp, struct knote *kn); 76 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 77 int kqueue_close(struct file *fp, struct proc *p); 78 void kqueue_wakeup(struct kqueue *kq); 79 80 #ifdef KQUEUE_DEBUG 81 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 82 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 83 #else 84 #define kqueue_check(kq) do {} while (0) 85 #endif 86 87 static int filter_attach(struct knote *kn); 88 static void filter_detach(struct knote *kn); 89 static int filter_event(struct knote *kn, long hint); 90 static int filter_modify(struct kevent *kev, struct knote *kn); 91 static int filter_process(struct knote *kn, struct kevent *kev); 92 static void kqueue_expand_hash(struct kqueue *kq); 93 static void kqueue_expand_list(struct kqueue *kq, int fd); 94 static void kqueue_task(void *); 95 static int klist_lock(struct klist *); 96 static void klist_unlock(struct klist *, int); 97 98 const struct fileops kqueueops = { 99 .fo_read = kqueue_read, 100 .fo_write = kqueue_write, 101 .fo_ioctl = kqueue_ioctl, 102 .fo_kqfilter = kqueue_kqfilter, 103 .fo_stat = kqueue_stat, 104 .fo_close = kqueue_close 105 }; 106 107 void knote_attach(struct knote *kn); 108 void knote_detach(struct knote *kn); 109 void knote_drop(struct knote *kn, struct proc *p); 110 void knote_enqueue(struct knote *kn); 111 void knote_dequeue(struct knote *kn); 112 int knote_acquire(struct knote *kn, struct klist *, int); 113 void knote_release(struct knote *kn); 114 void knote_activate(struct knote *kn); 115 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 116 int idx, int purge); 117 118 void filt_kqdetach(struct knote *kn); 119 int filt_kqueue(struct knote *kn, long hint); 120 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 121 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 122 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 123 int filt_procattach(struct knote *kn); 124 void filt_procdetach(struct knote *kn); 125 int filt_proc(struct knote *kn, long hint); 126 int filt_fileattach(struct knote *kn); 127 void filt_timerexpire(void *knx); 128 int filt_timerattach(struct knote *kn); 129 void filt_timerdetach(struct knote *kn); 130 int filt_timermodify(struct kevent *kev, struct knote *kn); 131 int filt_timerprocess(struct knote *kn, struct kevent *kev); 132 void filt_seltruedetach(struct knote *kn); 133 134 const struct filterops kqread_filtops = { 135 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 136 .f_attach = NULL, 137 .f_detach = filt_kqdetach, 138 .f_event = filt_kqueue, 139 .f_modify = filt_kqueuemodify, 140 .f_process = filt_kqueueprocess, 141 }; 142 143 const struct filterops proc_filtops = { 144 .f_flags = 0, 145 .f_attach = filt_procattach, 146 .f_detach = filt_procdetach, 147 .f_event = filt_proc, 148 }; 149 150 const struct filterops file_filtops = { 151 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 152 .f_attach = filt_fileattach, 153 .f_detach = NULL, 154 .f_event = NULL, 155 }; 156 157 const struct filterops timer_filtops = { 158 .f_flags = 0, 159 .f_attach = filt_timerattach, 160 .f_detach = filt_timerdetach, 161 .f_event = NULL, 162 .f_modify = filt_timermodify, 163 .f_process = filt_timerprocess, 164 }; 165 166 struct pool knote_pool; 167 struct pool kqueue_pool; 168 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 169 int kq_ntimeouts = 0; 170 int kq_timeoutmax = (4 * 1024); 171 172 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 173 174 /* 175 * Table for for all system-defined filters. 176 */ 177 const struct filterops *const sysfilt_ops[] = { 178 &file_filtops, /* EVFILT_READ */ 179 &file_filtops, /* EVFILT_WRITE */ 180 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 181 &file_filtops, /* EVFILT_VNODE */ 182 &proc_filtops, /* EVFILT_PROC */ 183 &sig_filtops, /* EVFILT_SIGNAL */ 184 &timer_filtops, /* EVFILT_TIMER */ 185 &file_filtops, /* EVFILT_DEVICE */ 186 &file_filtops, /* EVFILT_EXCEPT */ 187 }; 188 189 void 190 KQREF(struct kqueue *kq) 191 { 192 refcnt_take(&kq->kq_refcnt); 193 } 194 195 void 196 KQRELE(struct kqueue *kq) 197 { 198 struct filedesc *fdp; 199 200 if (refcnt_rele(&kq->kq_refcnt) == 0) 201 return; 202 203 fdp = kq->kq_fdp; 204 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 205 LIST_REMOVE(kq, kq_next); 206 } else { 207 fdplock(fdp); 208 LIST_REMOVE(kq, kq_next); 209 fdpunlock(fdp); 210 } 211 212 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 213 KASSERT(kq->kq_nknotes == 0); 214 215 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 216 sizeof(struct knlist)); 217 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 218 klist_free(&kq->kq_klist); 219 pool_put(&kqueue_pool, kq); 220 } 221 222 void 223 kqueue_init(void) 224 { 225 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 226 PR_WAITOK, "kqueuepl", NULL); 227 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 228 PR_WAITOK, "knotepl", NULL); 229 } 230 231 void 232 kqueue_init_percpu(void) 233 { 234 pool_cache_init(&knote_pool); 235 } 236 237 int 238 filt_fileattach(struct knote *kn) 239 { 240 struct file *fp = kn->kn_fp; 241 242 return fp->f_ops->fo_kqfilter(fp, kn); 243 } 244 245 int 246 kqueue_kqfilter(struct file *fp, struct knote *kn) 247 { 248 struct kqueue *kq = kn->kn_fp->f_data; 249 250 if (kn->kn_filter != EVFILT_READ) 251 return (EINVAL); 252 253 kn->kn_fop = &kqread_filtops; 254 klist_insert(&kq->kq_klist, kn); 255 return (0); 256 } 257 258 void 259 filt_kqdetach(struct knote *kn) 260 { 261 struct kqueue *kq = kn->kn_fp->f_data; 262 263 klist_remove(&kq->kq_klist, kn); 264 } 265 266 int 267 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 268 { 269 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 270 271 kn->kn_data = kq->kq_count; 272 273 return (kn->kn_data > 0); 274 } 275 276 int 277 filt_kqueue(struct knote *kn, long hint) 278 { 279 struct kqueue *kq = kn->kn_fp->f_data; 280 int active; 281 282 mtx_enter(&kq->kq_lock); 283 active = filt_kqueue_common(kn, kq); 284 mtx_leave(&kq->kq_lock); 285 286 return (active); 287 } 288 289 int 290 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 291 { 292 struct kqueue *kq = kn->kn_fp->f_data; 293 int active; 294 295 mtx_enter(&kq->kq_lock); 296 knote_assign(kev, kn); 297 active = filt_kqueue_common(kn, kq); 298 mtx_leave(&kq->kq_lock); 299 300 return (active); 301 } 302 303 int 304 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 305 { 306 struct kqueue *kq = kn->kn_fp->f_data; 307 int active; 308 309 mtx_enter(&kq->kq_lock); 310 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 311 active = 1; 312 else 313 active = filt_kqueue_common(kn, kq); 314 if (active) 315 knote_submit(kn, kev); 316 mtx_leave(&kq->kq_lock); 317 318 return (active); 319 } 320 321 int 322 filt_procattach(struct knote *kn) 323 { 324 struct process *pr; 325 int s; 326 327 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 328 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 329 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 330 331 if (kn->kn_id > PID_MAX) 332 return ESRCH; 333 334 pr = prfind(kn->kn_id); 335 if (pr == NULL) 336 return (ESRCH); 337 338 /* exiting processes can't be specified */ 339 if (pr->ps_flags & PS_EXITING) 340 return (ESRCH); 341 342 kn->kn_ptr.p_process = pr; 343 kn->kn_flags |= EV_CLEAR; /* automatically set */ 344 345 /* 346 * internal flag indicating registration done by kernel 347 */ 348 if (kn->kn_flags & EV_FLAG1) { 349 kn->kn_data = kn->kn_sdata; /* ppid */ 350 kn->kn_fflags = NOTE_CHILD; 351 kn->kn_flags &= ~EV_FLAG1; 352 } 353 354 s = splhigh(); 355 klist_insert_locked(&pr->ps_klist, kn); 356 splx(s); 357 358 return (0); 359 } 360 361 /* 362 * The knote may be attached to a different process, which may exit, 363 * leaving nothing for the knote to be attached to. So when the process 364 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 365 * it will be deleted when read out. However, as part of the knote deletion, 366 * this routine is called, so a check is needed to avoid actually performing 367 * a detach, because the original process does not exist any more. 368 */ 369 void 370 filt_procdetach(struct knote *kn) 371 { 372 struct kqueue *kq = kn->kn_kq; 373 struct process *pr = kn->kn_ptr.p_process; 374 int s, status; 375 376 mtx_enter(&kq->kq_lock); 377 status = kn->kn_status; 378 mtx_leave(&kq->kq_lock); 379 380 if (status & KN_DETACHED) 381 return; 382 383 s = splhigh(); 384 klist_remove_locked(&pr->ps_klist, kn); 385 splx(s); 386 } 387 388 int 389 filt_proc(struct knote *kn, long hint) 390 { 391 struct kqueue *kq = kn->kn_kq; 392 u_int event; 393 394 /* 395 * mask off extra data 396 */ 397 event = (u_int)hint & NOTE_PCTRLMASK; 398 399 /* 400 * if the user is interested in this event, record it. 401 */ 402 if (kn->kn_sfflags & event) 403 kn->kn_fflags |= event; 404 405 /* 406 * process is gone, so flag the event as finished and remove it 407 * from the process's klist 408 */ 409 if (event == NOTE_EXIT) { 410 struct process *pr = kn->kn_ptr.p_process; 411 int s; 412 413 mtx_enter(&kq->kq_lock); 414 kn->kn_status |= KN_DETACHED; 415 mtx_leave(&kq->kq_lock); 416 417 s = splhigh(); 418 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 419 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 420 klist_remove_locked(&pr->ps_klist, kn); 421 splx(s); 422 return (1); 423 } 424 425 /* 426 * process forked, and user wants to track the new process, 427 * so attach a new knote to it, and immediately report an 428 * event with the parent's pid. 429 */ 430 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 431 struct kevent kev; 432 int error; 433 434 /* 435 * register knote with new process. 436 */ 437 memset(&kev, 0, sizeof(kev)); 438 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 439 kev.filter = kn->kn_filter; 440 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 441 kev.fflags = kn->kn_sfflags; 442 kev.data = kn->kn_id; /* parent */ 443 kev.udata = kn->kn_udata; /* preserve udata */ 444 error = kqueue_register(kq, &kev, 0, NULL); 445 if (error) 446 kn->kn_fflags |= NOTE_TRACKERR; 447 } 448 449 return (kn->kn_fflags != 0); 450 } 451 452 static void 453 filt_timer_timeout_add(struct knote *kn) 454 { 455 struct timeval tv; 456 struct timeout *to = kn->kn_hook; 457 int tticks; 458 459 tv.tv_sec = kn->kn_sdata / 1000; 460 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 461 tticks = tvtohz(&tv); 462 /* Remove extra tick from tvtohz() if timeout has fired before. */ 463 if (timeout_triggered(to)) 464 tticks--; 465 timeout_add(to, (tticks > 0) ? tticks : 1); 466 } 467 468 void 469 filt_timerexpire(void *knx) 470 { 471 struct knote *kn = knx; 472 struct kqueue *kq = kn->kn_kq; 473 474 kn->kn_data++; 475 mtx_enter(&kq->kq_lock); 476 knote_activate(kn); 477 mtx_leave(&kq->kq_lock); 478 479 if ((kn->kn_flags & EV_ONESHOT) == 0) 480 filt_timer_timeout_add(kn); 481 } 482 483 484 /* 485 * data contains amount of time to sleep, in milliseconds 486 */ 487 int 488 filt_timerattach(struct knote *kn) 489 { 490 struct timeout *to; 491 492 if (kq_ntimeouts > kq_timeoutmax) 493 return (ENOMEM); 494 kq_ntimeouts++; 495 496 kn->kn_flags |= EV_CLEAR; /* automatically set */ 497 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 498 timeout_set(to, filt_timerexpire, kn); 499 kn->kn_hook = to; 500 filt_timer_timeout_add(kn); 501 502 return (0); 503 } 504 505 void 506 filt_timerdetach(struct knote *kn) 507 { 508 struct timeout *to; 509 510 to = (struct timeout *)kn->kn_hook; 511 timeout_del_barrier(to); 512 free(to, M_KEVENT, sizeof(*to)); 513 kq_ntimeouts--; 514 } 515 516 int 517 filt_timermodify(struct kevent *kev, struct knote *kn) 518 { 519 struct kqueue *kq = kn->kn_kq; 520 struct timeout *to = kn->kn_hook; 521 522 /* Reset the timer. Any pending events are discarded. */ 523 524 timeout_del_barrier(to); 525 526 mtx_enter(&kq->kq_lock); 527 if (kn->kn_status & KN_QUEUED) 528 knote_dequeue(kn); 529 kn->kn_status &= ~KN_ACTIVE; 530 mtx_leave(&kq->kq_lock); 531 532 kn->kn_data = 0; 533 knote_assign(kev, kn); 534 /* Reinit timeout to invoke tick adjustment again. */ 535 timeout_set(to, filt_timerexpire, kn); 536 filt_timer_timeout_add(kn); 537 538 return (0); 539 } 540 541 int 542 filt_timerprocess(struct knote *kn, struct kevent *kev) 543 { 544 int active, s; 545 546 s = splsoftclock(); 547 active = (kn->kn_data != 0); 548 if (active) 549 knote_submit(kn, kev); 550 splx(s); 551 552 return (active); 553 } 554 555 556 /* 557 * filt_seltrue: 558 * 559 * This filter "event" routine simulates seltrue(). 560 */ 561 int 562 filt_seltrue(struct knote *kn, long hint) 563 { 564 565 /* 566 * We don't know how much data can be read/written, 567 * but we know that it *can* be. This is about as 568 * good as select/poll does as well. 569 */ 570 kn->kn_data = 0; 571 return (1); 572 } 573 574 int 575 filt_seltruemodify(struct kevent *kev, struct knote *kn) 576 { 577 knote_assign(kev, kn); 578 return (kn->kn_fop->f_event(kn, 0)); 579 } 580 581 int 582 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 583 { 584 int active; 585 586 active = kn->kn_fop->f_event(kn, 0); 587 if (active) 588 knote_submit(kn, kev); 589 return (active); 590 } 591 592 /* 593 * This provides full kqfilter entry for device switch tables, which 594 * has same effect as filter using filt_seltrue() as filter method. 595 */ 596 void 597 filt_seltruedetach(struct knote *kn) 598 { 599 /* Nothing to do */ 600 } 601 602 const struct filterops seltrue_filtops = { 603 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 604 .f_attach = NULL, 605 .f_detach = filt_seltruedetach, 606 .f_event = filt_seltrue, 607 .f_modify = filt_seltruemodify, 608 .f_process = filt_seltrueprocess, 609 }; 610 611 int 612 seltrue_kqfilter(dev_t dev, struct knote *kn) 613 { 614 switch (kn->kn_filter) { 615 case EVFILT_READ: 616 case EVFILT_WRITE: 617 kn->kn_fop = &seltrue_filtops; 618 break; 619 default: 620 return (EINVAL); 621 } 622 623 /* Nothing more to do */ 624 return (0); 625 } 626 627 static int 628 filt_dead(struct knote *kn, long hint) 629 { 630 if (kn->kn_filter == EVFILT_EXCEPT) { 631 /* 632 * Do not deliver event because there is no out-of-band data. 633 * However, let HUP condition pass for poll(2). 634 */ 635 if ((kn->kn_flags & __EV_POLL) == 0) { 636 kn->kn_flags |= EV_DISABLE; 637 return (0); 638 } 639 } 640 641 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 642 if (kn->kn_flags & __EV_POLL) 643 kn->kn_flags |= __EV_HUP; 644 kn->kn_data = 0; 645 return (1); 646 } 647 648 static void 649 filt_deaddetach(struct knote *kn) 650 { 651 /* Nothing to do */ 652 } 653 654 const struct filterops dead_filtops = { 655 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 656 .f_attach = NULL, 657 .f_detach = filt_deaddetach, 658 .f_event = filt_dead, 659 .f_modify = filt_seltruemodify, 660 .f_process = filt_seltrueprocess, 661 }; 662 663 static int 664 filt_badfd(struct knote *kn, long hint) 665 { 666 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 667 kn->kn_data = EBADF; 668 return (1); 669 } 670 671 /* For use with kqpoll. */ 672 const struct filterops badfd_filtops = { 673 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 674 .f_attach = NULL, 675 .f_detach = filt_deaddetach, 676 .f_event = filt_badfd, 677 .f_modify = filt_seltruemodify, 678 .f_process = filt_seltrueprocess, 679 }; 680 681 static int 682 filter_attach(struct knote *kn) 683 { 684 int error; 685 686 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 687 error = kn->kn_fop->f_attach(kn); 688 } else { 689 KERNEL_LOCK(); 690 error = kn->kn_fop->f_attach(kn); 691 KERNEL_UNLOCK(); 692 } 693 return (error); 694 } 695 696 static void 697 filter_detach(struct knote *kn) 698 { 699 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 700 kn->kn_fop->f_detach(kn); 701 } else { 702 KERNEL_LOCK(); 703 kn->kn_fop->f_detach(kn); 704 KERNEL_UNLOCK(); 705 } 706 } 707 708 static int 709 filter_event(struct knote *kn, long hint) 710 { 711 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 712 KERNEL_ASSERT_LOCKED(); 713 714 return (kn->kn_fop->f_event(kn, hint)); 715 } 716 717 static int 718 filter_modify(struct kevent *kev, struct knote *kn) 719 { 720 int active, s; 721 722 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 723 active = kn->kn_fop->f_modify(kev, kn); 724 } else { 725 KERNEL_LOCK(); 726 if (kn->kn_fop->f_modify != NULL) { 727 active = kn->kn_fop->f_modify(kev, kn); 728 } else { 729 s = splhigh(); 730 active = knote_modify(kev, kn); 731 splx(s); 732 } 733 KERNEL_UNLOCK(); 734 } 735 return (active); 736 } 737 738 static int 739 filter_process(struct knote *kn, struct kevent *kev) 740 { 741 int active, s; 742 743 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 744 active = kn->kn_fop->f_process(kn, kev); 745 } else { 746 KERNEL_LOCK(); 747 if (kn->kn_fop->f_process != NULL) { 748 active = kn->kn_fop->f_process(kn, kev); 749 } else { 750 s = splhigh(); 751 active = knote_process(kn, kev); 752 splx(s); 753 } 754 KERNEL_UNLOCK(); 755 } 756 return (active); 757 } 758 759 /* 760 * Initialize the current thread for poll/select system call. 761 * num indicates the number of serials that the system call may utilize. 762 * After this function, the valid range of serials is 763 * p_kq_serial <= x < p_kq_serial + num. 764 */ 765 void 766 kqpoll_init(unsigned int num) 767 { 768 struct proc *p = curproc; 769 struct filedesc *fdp; 770 771 if (p->p_kq == NULL) { 772 p->p_kq = kqueue_alloc(p->p_fd); 773 p->p_kq_serial = arc4random(); 774 fdp = p->p_fd; 775 fdplock(fdp); 776 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 777 fdpunlock(fdp); 778 } 779 780 if (p->p_kq_serial + num < p->p_kq_serial) { 781 /* Serial is about to wrap. Clear all attached knotes. */ 782 kqueue_purge(p, p->p_kq); 783 p->p_kq_serial = 0; 784 } 785 } 786 787 /* 788 * Finish poll/select system call. 789 * num must have the same value that was used with kqpoll_init(). 790 */ 791 void 792 kqpoll_done(unsigned int num) 793 { 794 struct proc *p = curproc; 795 struct kqueue *kq = p->p_kq; 796 797 KASSERT(p->p_kq != NULL); 798 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 799 800 p->p_kq_serial += num; 801 802 /* 803 * Because of kn_pollid key, a thread can in principle allocate 804 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 805 * with suitably varying pollfd arrays. 806 * Prevent such a large allocation by clearing knotes eagerly 807 * if there are too many of them. 808 * 809 * A small multiple of kq_knlistsize should give enough margin 810 * that eager clearing is infrequent, or does not happen at all, 811 * with normal programs. 812 * A single pollfd entry can use up to three knotes. 813 * Typically there is no significant overlap of fd and events 814 * between different entries in the pollfd array. 815 */ 816 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 817 kqueue_purge(p, kq); 818 } 819 820 void 821 kqpoll_exit(void) 822 { 823 struct proc *p = curproc; 824 825 if (p->p_kq == NULL) 826 return; 827 828 kqueue_purge(p, p->p_kq); 829 kqueue_terminate(p, p->p_kq); 830 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 831 KQRELE(p->p_kq); 832 p->p_kq = NULL; 833 } 834 835 struct kqueue * 836 kqueue_alloc(struct filedesc *fdp) 837 { 838 struct kqueue *kq; 839 840 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 841 refcnt_init(&kq->kq_refcnt); 842 kq->kq_fdp = fdp; 843 TAILQ_INIT(&kq->kq_head); 844 mtx_init(&kq->kq_lock, IPL_HIGH); 845 task_set(&kq->kq_task, kqueue_task, kq); 846 klist_init_mutex(&kq->kq_klist, &kqueue_klist_lock); 847 848 return (kq); 849 } 850 851 int 852 sys_kqueue(struct proc *p, void *v, register_t *retval) 853 { 854 struct filedesc *fdp = p->p_fd; 855 struct kqueue *kq; 856 struct file *fp; 857 int fd, error; 858 859 kq = kqueue_alloc(fdp); 860 861 fdplock(fdp); 862 error = falloc(p, &fp, &fd); 863 if (error) 864 goto out; 865 fp->f_flag = FREAD | FWRITE; 866 fp->f_type = DTYPE_KQUEUE; 867 fp->f_ops = &kqueueops; 868 fp->f_data = kq; 869 *retval = fd; 870 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 871 kq = NULL; 872 fdinsert(fdp, fd, 0, fp); 873 FRELE(fp, p); 874 out: 875 fdpunlock(fdp); 876 if (kq != NULL) 877 pool_put(&kqueue_pool, kq); 878 return (error); 879 } 880 881 int 882 sys_kevent(struct proc *p, void *v, register_t *retval) 883 { 884 struct kqueue_scan_state scan; 885 struct filedesc* fdp = p->p_fd; 886 struct sys_kevent_args /* { 887 syscallarg(int) fd; 888 syscallarg(const struct kevent *) changelist; 889 syscallarg(int) nchanges; 890 syscallarg(struct kevent *) eventlist; 891 syscallarg(int) nevents; 892 syscallarg(const struct timespec *) timeout; 893 } */ *uap = v; 894 struct kevent *kevp; 895 struct kqueue *kq; 896 struct file *fp; 897 struct timespec ts; 898 struct timespec *tsp = NULL; 899 int i, n, nerrors, error; 900 int ready, total; 901 struct kevent kev[KQ_NEVENTS]; 902 903 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 904 return (EBADF); 905 906 if (fp->f_type != DTYPE_KQUEUE) { 907 error = EBADF; 908 goto done; 909 } 910 911 if (SCARG(uap, timeout) != NULL) { 912 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 913 if (error) 914 goto done; 915 #ifdef KTRACE 916 if (KTRPOINT(p, KTR_STRUCT)) 917 ktrreltimespec(p, &ts); 918 #endif 919 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 920 error = EINVAL; 921 goto done; 922 } 923 tsp = &ts; 924 } 925 926 kq = fp->f_data; 927 nerrors = 0; 928 929 while ((n = SCARG(uap, nchanges)) > 0) { 930 if (n > nitems(kev)) 931 n = nitems(kev); 932 error = copyin(SCARG(uap, changelist), kev, 933 n * sizeof(struct kevent)); 934 if (error) 935 goto done; 936 #ifdef KTRACE 937 if (KTRPOINT(p, KTR_STRUCT)) 938 ktrevent(p, kev, n); 939 #endif 940 for (i = 0; i < n; i++) { 941 kevp = &kev[i]; 942 kevp->flags &= ~EV_SYSFLAGS; 943 error = kqueue_register(kq, kevp, 0, p); 944 if (error || (kevp->flags & EV_RECEIPT)) { 945 if (SCARG(uap, nevents) != 0) { 946 kevp->flags = EV_ERROR; 947 kevp->data = error; 948 copyout(kevp, SCARG(uap, eventlist), 949 sizeof(*kevp)); 950 SCARG(uap, eventlist)++; 951 SCARG(uap, nevents)--; 952 nerrors++; 953 } else { 954 goto done; 955 } 956 } 957 } 958 SCARG(uap, nchanges) -= n; 959 SCARG(uap, changelist) += n; 960 } 961 if (nerrors) { 962 *retval = nerrors; 963 error = 0; 964 goto done; 965 } 966 967 kqueue_scan_setup(&scan, kq); 968 FRELE(fp, p); 969 /* 970 * Collect as many events as we can. The timeout on successive 971 * loops is disabled (kqueue_scan() becomes non-blocking). 972 */ 973 total = 0; 974 error = 0; 975 while ((n = SCARG(uap, nevents) - total) > 0) { 976 if (n > nitems(kev)) 977 n = nitems(kev); 978 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 979 if (ready == 0) 980 break; 981 error = copyout(kev, SCARG(uap, eventlist) + total, 982 sizeof(struct kevent) * ready); 983 #ifdef KTRACE 984 if (KTRPOINT(p, KTR_STRUCT)) 985 ktrevent(p, kev, ready); 986 #endif 987 total += ready; 988 if (error || ready < n) 989 break; 990 } 991 kqueue_scan_finish(&scan); 992 *retval = total; 993 return (error); 994 995 done: 996 FRELE(fp, p); 997 return (error); 998 } 999 1000 #ifdef KQUEUE_DEBUG 1001 void 1002 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1003 { 1004 struct knote *kn; 1005 int count = 0, nmarker = 0; 1006 1007 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1008 1009 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1010 if (kn->kn_filter == EVFILT_MARKER) { 1011 if ((kn->kn_status & KN_QUEUED) != 0) 1012 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1013 func, line, kq, kn); 1014 nmarker++; 1015 } else { 1016 if ((kn->kn_status & KN_ACTIVE) == 0) 1017 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1018 func, line, kq, kn); 1019 if ((kn->kn_status & KN_QUEUED) == 0) 1020 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1021 func, line, kq, kn); 1022 if (kn->kn_kq != kq) 1023 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1024 func, line, kq, kn, kn->kn_kq); 1025 count++; 1026 if (count > kq->kq_count) 1027 goto bad; 1028 } 1029 } 1030 if (count != kq->kq_count) { 1031 bad: 1032 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1033 func, line, kq, kq->kq_count, count, nmarker); 1034 } 1035 } 1036 #endif 1037 1038 int 1039 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1040 struct proc *p) 1041 { 1042 struct filedesc *fdp = kq->kq_fdp; 1043 const struct filterops *fops = NULL; 1044 struct file *fp = NULL; 1045 struct knote *kn = NULL, *newkn = NULL; 1046 struct knlist *list = NULL; 1047 int active, error = 0; 1048 1049 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1050 1051 if (kev->filter < 0) { 1052 if (kev->filter + EVFILT_SYSCOUNT < 0) 1053 return (EINVAL); 1054 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1055 } 1056 1057 if (fops == NULL) { 1058 /* 1059 * XXX 1060 * filter attach routine is responsible for ensuring that 1061 * the identifier can be attached to it. 1062 */ 1063 return (EINVAL); 1064 } 1065 1066 if (fops->f_flags & FILTEROP_ISFD) { 1067 /* validate descriptor */ 1068 if (kev->ident > INT_MAX) 1069 return (EBADF); 1070 } 1071 1072 if (kev->flags & EV_ADD) 1073 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1074 1075 again: 1076 if (fops->f_flags & FILTEROP_ISFD) { 1077 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1078 error = EBADF; 1079 goto done; 1080 } 1081 mtx_enter(&kq->kq_lock); 1082 if (kev->flags & EV_ADD) 1083 kqueue_expand_list(kq, kev->ident); 1084 if (kev->ident < kq->kq_knlistsize) 1085 list = &kq->kq_knlist[kev->ident]; 1086 } else { 1087 mtx_enter(&kq->kq_lock); 1088 if (kev->flags & EV_ADD) 1089 kqueue_expand_hash(kq); 1090 if (kq->kq_knhashmask != 0) { 1091 list = &kq->kq_knhash[ 1092 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1093 } 1094 } 1095 if (list != NULL) { 1096 SLIST_FOREACH(kn, list, kn_link) { 1097 if (kev->filter == kn->kn_filter && 1098 kev->ident == kn->kn_id && 1099 pollid == kn->kn_pollid) { 1100 if (!knote_acquire(kn, NULL, 0)) { 1101 /* knote_acquire() has released 1102 * kq_lock. */ 1103 if (fp != NULL) { 1104 FRELE(fp, p); 1105 fp = NULL; 1106 } 1107 goto again; 1108 } 1109 break; 1110 } 1111 } 1112 } 1113 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1114 1115 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1116 mtx_leave(&kq->kq_lock); 1117 error = ENOENT; 1118 goto done; 1119 } 1120 1121 /* 1122 * kn now contains the matching knote, or NULL if no match. 1123 */ 1124 if (kev->flags & EV_ADD) { 1125 if (kn == NULL) { 1126 kn = newkn; 1127 newkn = NULL; 1128 kn->kn_status = KN_PROCESSING; 1129 kn->kn_fp = fp; 1130 kn->kn_kq = kq; 1131 kn->kn_fop = fops; 1132 1133 /* 1134 * apply reference count to knote structure, and 1135 * do not release it at the end of this routine. 1136 */ 1137 fp = NULL; 1138 1139 kn->kn_sfflags = kev->fflags; 1140 kn->kn_sdata = kev->data; 1141 kev->fflags = 0; 1142 kev->data = 0; 1143 kn->kn_kevent = *kev; 1144 kn->kn_pollid = pollid; 1145 1146 knote_attach(kn); 1147 mtx_leave(&kq->kq_lock); 1148 1149 error = filter_attach(kn); 1150 if (error != 0) { 1151 knote_drop(kn, p); 1152 goto done; 1153 } 1154 1155 /* 1156 * If this is a file descriptor filter, check if 1157 * fd was closed while the knote was being added. 1158 * knote_fdclose() has missed kn if the function 1159 * ran before kn appeared in kq_knlist. 1160 */ 1161 if ((fops->f_flags & FILTEROP_ISFD) && 1162 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1163 /* 1164 * Drop the knote silently without error 1165 * because another thread might already have 1166 * seen it. This corresponds to the insert 1167 * happening in full before the close. 1168 */ 1169 filter_detach(kn); 1170 knote_drop(kn, p); 1171 goto done; 1172 } 1173 1174 /* Check if there is a pending event. */ 1175 active = filter_process(kn, NULL); 1176 mtx_enter(&kq->kq_lock); 1177 if (active) 1178 knote_activate(kn); 1179 } else if (kn->kn_fop == &badfd_filtops) { 1180 /* 1181 * Nothing expects this badfd knote any longer. 1182 * Drop it to make room for the new knote and retry. 1183 */ 1184 KASSERT(kq == p->p_kq); 1185 mtx_leave(&kq->kq_lock); 1186 filter_detach(kn); 1187 knote_drop(kn, p); 1188 1189 KASSERT(fp != NULL); 1190 FRELE(fp, p); 1191 fp = NULL; 1192 1193 goto again; 1194 } else { 1195 /* 1196 * The user may change some filter values after the 1197 * initial EV_ADD, but doing so will not reset any 1198 * filters which have already been triggered. 1199 */ 1200 mtx_leave(&kq->kq_lock); 1201 active = filter_modify(kev, kn); 1202 mtx_enter(&kq->kq_lock); 1203 if (active) 1204 knote_activate(kn); 1205 if (kev->flags & EV_ERROR) { 1206 error = kev->data; 1207 goto release; 1208 } 1209 } 1210 } else if (kev->flags & EV_DELETE) { 1211 mtx_leave(&kq->kq_lock); 1212 filter_detach(kn); 1213 knote_drop(kn, p); 1214 goto done; 1215 } 1216 1217 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1218 kn->kn_status |= KN_DISABLED; 1219 1220 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1221 kn->kn_status &= ~KN_DISABLED; 1222 mtx_leave(&kq->kq_lock); 1223 /* Check if there is a pending event. */ 1224 active = filter_process(kn, NULL); 1225 mtx_enter(&kq->kq_lock); 1226 if (active) 1227 knote_activate(kn); 1228 } 1229 1230 release: 1231 knote_release(kn); 1232 mtx_leave(&kq->kq_lock); 1233 done: 1234 if (fp != NULL) 1235 FRELE(fp, p); 1236 if (newkn != NULL) 1237 pool_put(&knote_pool, newkn); 1238 return (error); 1239 } 1240 1241 int 1242 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1243 { 1244 struct timespec elapsed, start, stop; 1245 uint64_t nsecs; 1246 int error; 1247 1248 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1249 1250 if (tsp != NULL) { 1251 getnanouptime(&start); 1252 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1253 } else 1254 nsecs = INFSLP; 1255 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1256 "kqread", nsecs); 1257 if (tsp != NULL) { 1258 getnanouptime(&stop); 1259 timespecsub(&stop, &start, &elapsed); 1260 timespecsub(tsp, &elapsed, tsp); 1261 if (tsp->tv_sec < 0) 1262 timespecclear(tsp); 1263 } 1264 1265 return (error); 1266 } 1267 1268 /* 1269 * Scan the kqueue, blocking if necessary until the target time is reached. 1270 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1271 * 0 we do not block at all. 1272 */ 1273 int 1274 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1275 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1276 { 1277 struct kqueue *kq = scan->kqs_kq; 1278 struct knote *kn; 1279 int error = 0, nkev = 0; 1280 int reinserted; 1281 1282 if (maxevents == 0) 1283 goto done; 1284 retry: 1285 KASSERT(nkev == 0); 1286 1287 error = 0; 1288 reinserted = 0; 1289 1290 /* msleep() with PCATCH requires kernel lock. */ 1291 KERNEL_LOCK(); 1292 1293 mtx_enter(&kq->kq_lock); 1294 1295 if (kq->kq_state & KQ_DYING) { 1296 mtx_leave(&kq->kq_lock); 1297 KERNEL_UNLOCK(); 1298 error = EBADF; 1299 goto done; 1300 } 1301 1302 if (kq->kq_count == 0) { 1303 /* 1304 * Successive loops are only necessary if there are more 1305 * ready events to gather, so they don't need to block. 1306 */ 1307 if ((tsp != NULL && !timespecisset(tsp)) || 1308 scan->kqs_nevent != 0) { 1309 mtx_leave(&kq->kq_lock); 1310 KERNEL_UNLOCK(); 1311 error = 0; 1312 goto done; 1313 } 1314 kq->kq_state |= KQ_SLEEP; 1315 error = kqueue_sleep(kq, tsp); 1316 /* kqueue_sleep() has released kq_lock. */ 1317 KERNEL_UNLOCK(); 1318 if (error == 0 || error == EWOULDBLOCK) 1319 goto retry; 1320 /* don't restart after signals... */ 1321 if (error == ERESTART) 1322 error = EINTR; 1323 goto done; 1324 } 1325 1326 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1327 KERNEL_UNLOCK(); 1328 1329 /* 1330 * Put the end marker in the queue to limit the scan to the events 1331 * that are currently active. This prevents events from being 1332 * recollected if they reactivate during scan. 1333 * 1334 * If a partial scan has been performed already but no events have 1335 * been collected, reposition the end marker to make any new events 1336 * reachable. 1337 */ 1338 if (!scan->kqs_queued) { 1339 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1340 scan->kqs_queued = 1; 1341 } else if (scan->kqs_nevent == 0) { 1342 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1343 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1344 } 1345 1346 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1347 while (nkev < maxevents) { 1348 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1349 if (kn->kn_filter == EVFILT_MARKER) { 1350 if (kn == &scan->kqs_end) 1351 break; 1352 1353 /* Move start marker past another thread's marker. */ 1354 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1355 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1356 kn_tqe); 1357 continue; 1358 } 1359 1360 if (!knote_acquire(kn, NULL, 0)) { 1361 /* knote_acquire() has released kq_lock. */ 1362 mtx_enter(&kq->kq_lock); 1363 continue; 1364 } 1365 1366 kqueue_check(kq); 1367 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1368 kn->kn_status &= ~KN_QUEUED; 1369 kq->kq_count--; 1370 kqueue_check(kq); 1371 1372 if (kn->kn_status & KN_DISABLED) { 1373 knote_release(kn); 1374 continue; 1375 } 1376 1377 mtx_leave(&kq->kq_lock); 1378 1379 /* Drop expired kqpoll knotes. */ 1380 if (p->p_kq == kq && 1381 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1382 filter_detach(kn); 1383 knote_drop(kn, p); 1384 mtx_enter(&kq->kq_lock); 1385 continue; 1386 } 1387 1388 /* 1389 * Invalidate knotes whose vnodes have been revoked. 1390 * This is a workaround; it is tricky to clear existing 1391 * knotes and prevent new ones from being registered 1392 * with the current revocation mechanism. 1393 */ 1394 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1395 kn->kn_fp != NULL && 1396 kn->kn_fp->f_type == DTYPE_VNODE) { 1397 struct vnode *vp = kn->kn_fp->f_data; 1398 1399 if (__predict_false(vp->v_op == &dead_vops && 1400 kn->kn_fop != &dead_filtops)) { 1401 filter_detach(kn); 1402 kn->kn_fop = &dead_filtops; 1403 1404 /* 1405 * Check if the event should be delivered. 1406 * Use f_event directly because this is 1407 * a special situation. 1408 */ 1409 if (kn->kn_fop->f_event(kn, 0) == 0) { 1410 filter_detach(kn); 1411 knote_drop(kn, p); 1412 mtx_enter(&kq->kq_lock); 1413 continue; 1414 } 1415 } 1416 } 1417 1418 memset(kevp, 0, sizeof(*kevp)); 1419 if (filter_process(kn, kevp) == 0) { 1420 mtx_enter(&kq->kq_lock); 1421 if ((kn->kn_status & KN_QUEUED) == 0) 1422 kn->kn_status &= ~KN_ACTIVE; 1423 knote_release(kn); 1424 kqueue_check(kq); 1425 continue; 1426 } 1427 1428 /* 1429 * Post-event action on the note 1430 */ 1431 if (kevp->flags & EV_ONESHOT) { 1432 filter_detach(kn); 1433 knote_drop(kn, p); 1434 mtx_enter(&kq->kq_lock); 1435 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1436 mtx_enter(&kq->kq_lock); 1437 if (kevp->flags & EV_DISPATCH) 1438 kn->kn_status |= KN_DISABLED; 1439 if ((kn->kn_status & KN_QUEUED) == 0) 1440 kn->kn_status &= ~KN_ACTIVE; 1441 knote_release(kn); 1442 } else { 1443 mtx_enter(&kq->kq_lock); 1444 if ((kn->kn_status & KN_QUEUED) == 0) { 1445 kqueue_check(kq); 1446 kq->kq_count++; 1447 kn->kn_status |= KN_QUEUED; 1448 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1449 /* Wakeup is done after loop. */ 1450 reinserted = 1; 1451 } 1452 knote_release(kn); 1453 } 1454 kqueue_check(kq); 1455 1456 kevp++; 1457 nkev++; 1458 scan->kqs_nevent++; 1459 } 1460 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1461 if (reinserted && kq->kq_count != 0) 1462 kqueue_wakeup(kq); 1463 mtx_leave(&kq->kq_lock); 1464 if (scan->kqs_nevent == 0) 1465 goto retry; 1466 done: 1467 *errorp = error; 1468 return (nkev); 1469 } 1470 1471 void 1472 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1473 { 1474 memset(scan, 0, sizeof(*scan)); 1475 1476 KQREF(kq); 1477 scan->kqs_kq = kq; 1478 scan->kqs_start.kn_filter = EVFILT_MARKER; 1479 scan->kqs_start.kn_status = KN_PROCESSING; 1480 scan->kqs_end.kn_filter = EVFILT_MARKER; 1481 scan->kqs_end.kn_status = KN_PROCESSING; 1482 } 1483 1484 void 1485 kqueue_scan_finish(struct kqueue_scan_state *scan) 1486 { 1487 struct kqueue *kq = scan->kqs_kq; 1488 1489 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1490 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1491 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1492 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1493 1494 if (scan->kqs_queued) { 1495 scan->kqs_queued = 0; 1496 mtx_enter(&kq->kq_lock); 1497 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1498 mtx_leave(&kq->kq_lock); 1499 } 1500 KQRELE(kq); 1501 } 1502 1503 /* 1504 * XXX 1505 * This could be expanded to call kqueue_scan, if desired. 1506 */ 1507 int 1508 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1509 { 1510 return (ENXIO); 1511 } 1512 1513 int 1514 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1515 { 1516 return (ENXIO); 1517 } 1518 1519 int 1520 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1521 { 1522 return (ENOTTY); 1523 } 1524 1525 int 1526 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1527 { 1528 struct kqueue *kq = fp->f_data; 1529 1530 memset(st, 0, sizeof(*st)); 1531 st->st_size = kq->kq_count; /* unlocked read */ 1532 st->st_blksize = sizeof(struct kevent); 1533 st->st_mode = S_IFIFO; 1534 return (0); 1535 } 1536 1537 void 1538 kqueue_purge(struct proc *p, struct kqueue *kq) 1539 { 1540 int i; 1541 1542 mtx_enter(&kq->kq_lock); 1543 for (i = 0; i < kq->kq_knlistsize; i++) 1544 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1545 if (kq->kq_knhashmask != 0) { 1546 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1547 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1548 } 1549 mtx_leave(&kq->kq_lock); 1550 } 1551 1552 void 1553 kqueue_terminate(struct proc *p, struct kqueue *kq) 1554 { 1555 struct knote *kn; 1556 int state; 1557 1558 mtx_enter(&kq->kq_lock); 1559 1560 /* 1561 * Any remaining entries should be scan markers. 1562 * They are removed when the ongoing scans finish. 1563 */ 1564 KASSERT(kq->kq_count == 0); 1565 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1566 KASSERT(kn->kn_filter == EVFILT_MARKER); 1567 1568 kq->kq_state |= KQ_DYING; 1569 state = kq->kq_state; 1570 kqueue_wakeup(kq); 1571 mtx_leave(&kq->kq_lock); 1572 1573 /* 1574 * Any knotes that were attached to this kqueue were deleted 1575 * by knote_fdclose() when this kqueue's file descriptor was closed. 1576 */ 1577 KASSERT(klist_empty(&kq->kq_klist)); 1578 if (state & KQ_TASK) 1579 taskq_del_barrier(systqmp, &kq->kq_task); 1580 } 1581 1582 int 1583 kqueue_close(struct file *fp, struct proc *p) 1584 { 1585 struct kqueue *kq = fp->f_data; 1586 1587 fp->f_data = NULL; 1588 1589 kqueue_purge(p, kq); 1590 kqueue_terminate(p, kq); 1591 1592 KQRELE(kq); 1593 1594 return (0); 1595 } 1596 1597 static void 1598 kqueue_task(void *arg) 1599 { 1600 struct kqueue *kq = arg; 1601 1602 mtx_enter(&kqueue_klist_lock); 1603 KNOTE(&kq->kq_klist, 0); 1604 mtx_leave(&kqueue_klist_lock); 1605 } 1606 1607 void 1608 kqueue_wakeup(struct kqueue *kq) 1609 { 1610 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1611 1612 if (kq->kq_state & KQ_SLEEP) { 1613 kq->kq_state &= ~KQ_SLEEP; 1614 wakeup(kq); 1615 } 1616 if (!klist_empty(&kq->kq_klist)) { 1617 /* Defer activation to avoid recursion. */ 1618 kq->kq_state |= KQ_TASK; 1619 task_add(systqmp, &kq->kq_task); 1620 } 1621 } 1622 1623 static void 1624 kqueue_expand_hash(struct kqueue *kq) 1625 { 1626 struct knlist *hash; 1627 u_long hashmask; 1628 1629 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1630 1631 if (kq->kq_knhashmask == 0) { 1632 mtx_leave(&kq->kq_lock); 1633 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1634 mtx_enter(&kq->kq_lock); 1635 if (kq->kq_knhashmask == 0) { 1636 kq->kq_knhash = hash; 1637 kq->kq_knhashmask = hashmask; 1638 } else { 1639 /* Another thread has allocated the hash. */ 1640 mtx_leave(&kq->kq_lock); 1641 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1642 mtx_enter(&kq->kq_lock); 1643 } 1644 } 1645 } 1646 1647 static void 1648 kqueue_expand_list(struct kqueue *kq, int fd) 1649 { 1650 struct knlist *list, *olist; 1651 int size, osize; 1652 1653 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1654 1655 if (kq->kq_knlistsize <= fd) { 1656 size = kq->kq_knlistsize; 1657 mtx_leave(&kq->kq_lock); 1658 while (size <= fd) 1659 size += KQEXTENT; 1660 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1661 mtx_enter(&kq->kq_lock); 1662 if (kq->kq_knlistsize <= fd) { 1663 memcpy(list, kq->kq_knlist, 1664 kq->kq_knlistsize * sizeof(*list)); 1665 memset(&list[kq->kq_knlistsize], 0, 1666 (size - kq->kq_knlistsize) * sizeof(*list)); 1667 olist = kq->kq_knlist; 1668 osize = kq->kq_knlistsize; 1669 kq->kq_knlist = list; 1670 kq->kq_knlistsize = size; 1671 mtx_leave(&kq->kq_lock); 1672 free(olist, M_KEVENT, osize * sizeof(*list)); 1673 mtx_enter(&kq->kq_lock); 1674 } else { 1675 /* Another thread has expanded the list. */ 1676 mtx_leave(&kq->kq_lock); 1677 free(list, M_KEVENT, size * sizeof(*list)); 1678 mtx_enter(&kq->kq_lock); 1679 } 1680 } 1681 } 1682 1683 /* 1684 * Acquire a knote, return non-zero on success, 0 on failure. 1685 * 1686 * If we cannot acquire the knote we sleep and return 0. The knote 1687 * may be stale on return in this case and the caller must restart 1688 * whatever loop they are in. 1689 * 1690 * If we are about to sleep and klist is non-NULL, the list is unlocked 1691 * before sleep and remains unlocked on return. 1692 */ 1693 int 1694 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1695 { 1696 struct kqueue *kq = kn->kn_kq; 1697 1698 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1699 KASSERT(kn->kn_filter != EVFILT_MARKER); 1700 1701 if (kn->kn_status & KN_PROCESSING) { 1702 kn->kn_status |= KN_WAITING; 1703 if (klist != NULL) { 1704 mtx_leave(&kq->kq_lock); 1705 klist_unlock(klist, ls); 1706 /* XXX Timeout resolves potential loss of wakeup. */ 1707 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1708 } else { 1709 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1710 SEC_TO_NSEC(1)); 1711 } 1712 /* knote may be stale now */ 1713 return (0); 1714 } 1715 kn->kn_status |= KN_PROCESSING; 1716 return (1); 1717 } 1718 1719 /* 1720 * Release an acquired knote, clearing KN_PROCESSING. 1721 */ 1722 void 1723 knote_release(struct knote *kn) 1724 { 1725 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1726 KASSERT(kn->kn_filter != EVFILT_MARKER); 1727 KASSERT(kn->kn_status & KN_PROCESSING); 1728 1729 if (kn->kn_status & KN_WAITING) { 1730 kn->kn_status &= ~KN_WAITING; 1731 wakeup(kn); 1732 } 1733 kn->kn_status &= ~KN_PROCESSING; 1734 /* kn should not be accessed anymore */ 1735 } 1736 1737 /* 1738 * activate one knote. 1739 */ 1740 void 1741 knote_activate(struct knote *kn) 1742 { 1743 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1744 1745 kn->kn_status |= KN_ACTIVE; 1746 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1747 knote_enqueue(kn); 1748 } 1749 1750 /* 1751 * walk down a list of knotes, activating them if their event has triggered. 1752 */ 1753 void 1754 knote(struct klist *list, long hint) 1755 { 1756 struct knote *kn, *kn0; 1757 struct kqueue *kq; 1758 1759 KLIST_ASSERT_LOCKED(list); 1760 1761 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1762 if (filter_event(kn, hint)) { 1763 kq = kn->kn_kq; 1764 mtx_enter(&kq->kq_lock); 1765 knote_activate(kn); 1766 mtx_leave(&kq->kq_lock); 1767 } 1768 } 1769 } 1770 1771 /* 1772 * remove all knotes from a specified knlist 1773 */ 1774 void 1775 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1776 int purge) 1777 { 1778 struct knote *kn; 1779 1780 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1781 1782 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1783 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1784 KASSERT(kn->kn_kq == kq); 1785 1786 if (!purge) { 1787 /* Skip pending badfd knotes. */ 1788 while (kn->kn_fop == &badfd_filtops) { 1789 kn = SLIST_NEXT(kn, kn_link); 1790 if (kn == NULL) 1791 return; 1792 KASSERT(kn->kn_kq == kq); 1793 } 1794 } 1795 1796 if (!knote_acquire(kn, NULL, 0)) { 1797 /* knote_acquire() has released kq_lock. */ 1798 mtx_enter(&kq->kq_lock); 1799 continue; 1800 } 1801 mtx_leave(&kq->kq_lock); 1802 filter_detach(kn); 1803 1804 /* 1805 * Notify poll(2) and select(2) when a monitored 1806 * file descriptor is closed. 1807 * 1808 * This reuses the original knote for delivering the 1809 * notification so as to avoid allocating memory. 1810 */ 1811 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1812 !(p->p_kq == kq && 1813 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1814 kn->kn_fop != &badfd_filtops) { 1815 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1816 FRELE(kn->kn_fp, p); 1817 kn->kn_fp = NULL; 1818 1819 kn->kn_fop = &badfd_filtops; 1820 filter_event(kn, 0); 1821 mtx_enter(&kq->kq_lock); 1822 knote_activate(kn); 1823 knote_release(kn); 1824 continue; 1825 } 1826 1827 knote_drop(kn, p); 1828 mtx_enter(&kq->kq_lock); 1829 } 1830 } 1831 1832 /* 1833 * remove all knotes referencing a specified fd 1834 */ 1835 void 1836 knote_fdclose(struct proc *p, int fd) 1837 { 1838 struct filedesc *fdp = p->p_p->ps_fd; 1839 struct kqueue *kq; 1840 1841 /* 1842 * fdplock can be ignored if the file descriptor table is being freed 1843 * because no other thread can access the fdp. 1844 */ 1845 if (fdp->fd_refcnt != 0) 1846 fdpassertlocked(fdp); 1847 1848 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1849 mtx_enter(&kq->kq_lock); 1850 if (fd < kq->kq_knlistsize) 1851 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1852 mtx_leave(&kq->kq_lock); 1853 } 1854 } 1855 1856 /* 1857 * handle a process exiting, including the triggering of NOTE_EXIT notes 1858 * XXX this could be more efficient, doing a single pass down the klist 1859 */ 1860 void 1861 knote_processexit(struct process *pr) 1862 { 1863 KERNEL_ASSERT_LOCKED(); 1864 1865 KNOTE(&pr->ps_klist, NOTE_EXIT); 1866 1867 /* remove other knotes hanging off the process */ 1868 klist_invalidate(&pr->ps_klist); 1869 } 1870 1871 void 1872 knote_attach(struct knote *kn) 1873 { 1874 struct kqueue *kq = kn->kn_kq; 1875 struct knlist *list; 1876 1877 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1878 KASSERT(kn->kn_status & KN_PROCESSING); 1879 1880 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1881 KASSERT(kq->kq_knlistsize > kn->kn_id); 1882 list = &kq->kq_knlist[kn->kn_id]; 1883 } else { 1884 KASSERT(kq->kq_knhashmask != 0); 1885 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1886 } 1887 SLIST_INSERT_HEAD(list, kn, kn_link); 1888 kq->kq_nknotes++; 1889 } 1890 1891 void 1892 knote_detach(struct knote *kn) 1893 { 1894 struct kqueue *kq = kn->kn_kq; 1895 struct knlist *list; 1896 1897 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1898 KASSERT(kn->kn_status & KN_PROCESSING); 1899 1900 kq->kq_nknotes--; 1901 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1902 list = &kq->kq_knlist[kn->kn_id]; 1903 else 1904 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1905 SLIST_REMOVE(list, kn, knote, kn_link); 1906 } 1907 1908 /* 1909 * should be called at spl == 0, since we don't want to hold spl 1910 * while calling FRELE and pool_put. 1911 */ 1912 void 1913 knote_drop(struct knote *kn, struct proc *p) 1914 { 1915 struct kqueue *kq = kn->kn_kq; 1916 1917 KASSERT(kn->kn_filter != EVFILT_MARKER); 1918 1919 mtx_enter(&kq->kq_lock); 1920 knote_detach(kn); 1921 if (kn->kn_status & KN_QUEUED) 1922 knote_dequeue(kn); 1923 if (kn->kn_status & KN_WAITING) { 1924 kn->kn_status &= ~KN_WAITING; 1925 wakeup(kn); 1926 } 1927 mtx_leave(&kq->kq_lock); 1928 1929 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1930 FRELE(kn->kn_fp, p); 1931 pool_put(&knote_pool, kn); 1932 } 1933 1934 1935 void 1936 knote_enqueue(struct knote *kn) 1937 { 1938 struct kqueue *kq = kn->kn_kq; 1939 1940 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1941 KASSERT(kn->kn_filter != EVFILT_MARKER); 1942 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1943 1944 kqueue_check(kq); 1945 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1946 kn->kn_status |= KN_QUEUED; 1947 kq->kq_count++; 1948 kqueue_check(kq); 1949 kqueue_wakeup(kq); 1950 } 1951 1952 void 1953 knote_dequeue(struct knote *kn) 1954 { 1955 struct kqueue *kq = kn->kn_kq; 1956 1957 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1958 KASSERT(kn->kn_filter != EVFILT_MARKER); 1959 KASSERT(kn->kn_status & KN_QUEUED); 1960 1961 kqueue_check(kq); 1962 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1963 kn->kn_status &= ~KN_QUEUED; 1964 kq->kq_count--; 1965 kqueue_check(kq); 1966 } 1967 1968 /* 1969 * Assign parameters to the knote. 1970 * 1971 * The knote's object lock must be held. 1972 */ 1973 void 1974 knote_assign(const struct kevent *kev, struct knote *kn) 1975 { 1976 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1977 KERNEL_ASSERT_LOCKED(); 1978 1979 kn->kn_sfflags = kev->fflags; 1980 kn->kn_sdata = kev->data; 1981 kn->kn_udata = kev->udata; 1982 } 1983 1984 /* 1985 * Submit the knote's event for delivery. 1986 * 1987 * The knote's object lock must be held. 1988 */ 1989 void 1990 knote_submit(struct knote *kn, struct kevent *kev) 1991 { 1992 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1993 KERNEL_ASSERT_LOCKED(); 1994 1995 if (kev != NULL) { 1996 *kev = kn->kn_kevent; 1997 if (kn->kn_flags & EV_CLEAR) { 1998 kn->kn_fflags = 0; 1999 kn->kn_data = 0; 2000 } 2001 } 2002 } 2003 2004 void 2005 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2006 { 2007 SLIST_INIT(&klist->kl_list); 2008 klist->kl_ops = ops; 2009 klist->kl_arg = arg; 2010 } 2011 2012 void 2013 klist_free(struct klist *klist) 2014 { 2015 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2016 } 2017 2018 void 2019 klist_insert(struct klist *klist, struct knote *kn) 2020 { 2021 int ls; 2022 2023 ls = klist_lock(klist); 2024 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2025 klist_unlock(klist, ls); 2026 } 2027 2028 void 2029 klist_insert_locked(struct klist *klist, struct knote *kn) 2030 { 2031 KLIST_ASSERT_LOCKED(klist); 2032 2033 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2034 } 2035 2036 void 2037 klist_remove(struct klist *klist, struct knote *kn) 2038 { 2039 int ls; 2040 2041 ls = klist_lock(klist); 2042 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2043 klist_unlock(klist, ls); 2044 } 2045 2046 void 2047 klist_remove_locked(struct klist *klist, struct knote *kn) 2048 { 2049 KLIST_ASSERT_LOCKED(klist); 2050 2051 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2052 } 2053 2054 /* 2055 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2056 * 2057 * The caller of this function must not hold any locks that can block 2058 * filterops callbacks that run with KN_PROCESSING. 2059 * Otherwise this function might deadlock. 2060 */ 2061 void 2062 klist_invalidate(struct klist *list) 2063 { 2064 struct knote *kn; 2065 struct kqueue *kq; 2066 struct proc *p = curproc; 2067 int ls; 2068 2069 NET_ASSERT_UNLOCKED(); 2070 2071 ls = klist_lock(list); 2072 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2073 kq = kn->kn_kq; 2074 mtx_enter(&kq->kq_lock); 2075 if (!knote_acquire(kn, list, ls)) { 2076 /* knote_acquire() has released kq_lock 2077 * and klist lock. */ 2078 ls = klist_lock(list); 2079 continue; 2080 } 2081 mtx_leave(&kq->kq_lock); 2082 klist_unlock(list, ls); 2083 filter_detach(kn); 2084 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2085 kn->kn_fop = &dead_filtops; 2086 filter_event(kn, 0); 2087 mtx_enter(&kq->kq_lock); 2088 knote_activate(kn); 2089 knote_release(kn); 2090 mtx_leave(&kq->kq_lock); 2091 } else { 2092 knote_drop(kn, p); 2093 } 2094 ls = klist_lock(list); 2095 } 2096 klist_unlock(list, ls); 2097 } 2098 2099 static int 2100 klist_lock(struct klist *list) 2101 { 2102 int ls = 0; 2103 2104 if (list->kl_ops != NULL) { 2105 ls = list->kl_ops->klo_lock(list->kl_arg); 2106 } else { 2107 KERNEL_LOCK(); 2108 ls = splhigh(); 2109 } 2110 return ls; 2111 } 2112 2113 static void 2114 klist_unlock(struct klist *list, int ls) 2115 { 2116 if (list->kl_ops != NULL) { 2117 list->kl_ops->klo_unlock(list->kl_arg, ls); 2118 } else { 2119 splx(ls); 2120 KERNEL_UNLOCK(); 2121 } 2122 } 2123 2124 static void 2125 klist_mutex_assertlk(void *arg) 2126 { 2127 struct mutex *mtx = arg; 2128 2129 (void)mtx; 2130 2131 MUTEX_ASSERT_LOCKED(mtx); 2132 } 2133 2134 static int 2135 klist_mutex_lock(void *arg) 2136 { 2137 struct mutex *mtx = arg; 2138 2139 mtx_enter(mtx); 2140 return 0; 2141 } 2142 2143 static void 2144 klist_mutex_unlock(void *arg, int s) 2145 { 2146 struct mutex *mtx = arg; 2147 2148 mtx_leave(mtx); 2149 } 2150 2151 static const struct klistops mutex_klistops = { 2152 .klo_assertlk = klist_mutex_assertlk, 2153 .klo_lock = klist_mutex_lock, 2154 .klo_unlock = klist_mutex_unlock, 2155 }; 2156 2157 void 2158 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2159 { 2160 klist_init(klist, &mutex_klistops, mtx); 2161 } 2162 2163 static void 2164 klist_rwlock_assertlk(void *arg) 2165 { 2166 struct rwlock *rwl = arg; 2167 2168 (void)rwl; 2169 2170 rw_assert_wrlock(rwl); 2171 } 2172 2173 static int 2174 klist_rwlock_lock(void *arg) 2175 { 2176 struct rwlock *rwl = arg; 2177 2178 rw_enter_write(rwl); 2179 return 0; 2180 } 2181 2182 static void 2183 klist_rwlock_unlock(void *arg, int s) 2184 { 2185 struct rwlock *rwl = arg; 2186 2187 rw_exit_write(rwl); 2188 } 2189 2190 static const struct klistops rwlock_klistops = { 2191 .klo_assertlk = klist_rwlock_assertlk, 2192 .klo_lock = klist_rwlock_lock, 2193 .klo_unlock = klist_rwlock_unlock, 2194 }; 2195 2196 void 2197 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2198 { 2199 klist_init(klist, &rwlock_klistops, rwl); 2200 } 2201