1 /* $OpenBSD: kern_event.c,v 1.190 2022/06/20 01:39:44 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/proc.h> 35 #include <sys/pledge.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/selinfo.h> 42 #include <sys/queue.h> 43 #include <sys/event.h> 44 #include <sys/eventvar.h> 45 #include <sys/ktrace.h> 46 #include <sys/pool.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/uio.h> 51 #include <sys/mount.h> 52 #include <sys/syscallargs.h> 53 #include <sys/time.h> 54 #include <sys/timeout.h> 55 #include <sys/vnode.h> 56 #include <sys/wait.h> 57 58 #ifdef DIAGNOSTIC 59 #define KLIST_ASSERT_LOCKED(kl) do { \ 60 if ((kl)->kl_ops != NULL) \ 61 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 62 else \ 63 KERNEL_ASSERT_LOCKED(); \ 64 } while (0) 65 #else 66 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 67 #endif 68 69 struct kqueue *kqueue_alloc(struct filedesc *); 70 void kqueue_terminate(struct proc *p, struct kqueue *); 71 void KQREF(struct kqueue *); 72 void KQRELE(struct kqueue *); 73 74 void kqueue_purge(struct proc *, struct kqueue *); 75 int kqueue_sleep(struct kqueue *, struct timespec *); 76 77 int kqueue_read(struct file *, struct uio *, int); 78 int kqueue_write(struct file *, struct uio *, int); 79 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 80 struct proc *p); 81 int kqueue_kqfilter(struct file *fp, struct knote *kn); 82 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 83 int kqueue_close(struct file *fp, struct proc *p); 84 void kqueue_wakeup(struct kqueue *kq); 85 86 #ifdef KQUEUE_DEBUG 87 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 88 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 89 #else 90 #define kqueue_check(kq) do {} while (0) 91 #endif 92 93 static int filter_attach(struct knote *kn); 94 static void filter_detach(struct knote *kn); 95 static int filter_event(struct knote *kn, long hint); 96 static int filter_modify(struct kevent *kev, struct knote *kn); 97 static int filter_process(struct knote *kn, struct kevent *kev); 98 static void kqueue_expand_hash(struct kqueue *kq); 99 static void kqueue_expand_list(struct kqueue *kq, int fd); 100 static void kqueue_task(void *); 101 static int klist_lock(struct klist *); 102 static void klist_unlock(struct klist *, int); 103 104 const struct fileops kqueueops = { 105 .fo_read = kqueue_read, 106 .fo_write = kqueue_write, 107 .fo_ioctl = kqueue_ioctl, 108 .fo_kqfilter = kqueue_kqfilter, 109 .fo_stat = kqueue_stat, 110 .fo_close = kqueue_close 111 }; 112 113 void knote_attach(struct knote *kn); 114 void knote_detach(struct knote *kn); 115 void knote_drop(struct knote *kn, struct proc *p); 116 void knote_enqueue(struct knote *kn); 117 void knote_dequeue(struct knote *kn); 118 int knote_acquire(struct knote *kn, struct klist *, int); 119 void knote_release(struct knote *kn); 120 void knote_activate(struct knote *kn); 121 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 122 int idx, int purge); 123 124 void filt_kqdetach(struct knote *kn); 125 int filt_kqueue(struct knote *kn, long hint); 126 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 127 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 128 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 129 int filt_procattach(struct knote *kn); 130 void filt_procdetach(struct knote *kn); 131 int filt_proc(struct knote *kn, long hint); 132 int filt_fileattach(struct knote *kn); 133 void filt_timerexpire(void *knx); 134 int filt_timerattach(struct knote *kn); 135 void filt_timerdetach(struct knote *kn); 136 int filt_timermodify(struct kevent *kev, struct knote *kn); 137 int filt_timerprocess(struct knote *kn, struct kevent *kev); 138 void filt_seltruedetach(struct knote *kn); 139 140 const struct filterops kqread_filtops = { 141 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 142 .f_attach = NULL, 143 .f_detach = filt_kqdetach, 144 .f_event = filt_kqueue, 145 .f_modify = filt_kqueuemodify, 146 .f_process = filt_kqueueprocess, 147 }; 148 149 const struct filterops proc_filtops = { 150 .f_flags = 0, 151 .f_attach = filt_procattach, 152 .f_detach = filt_procdetach, 153 .f_event = filt_proc, 154 }; 155 156 const struct filterops file_filtops = { 157 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 158 .f_attach = filt_fileattach, 159 .f_detach = NULL, 160 .f_event = NULL, 161 }; 162 163 const struct filterops timer_filtops = { 164 .f_flags = 0, 165 .f_attach = filt_timerattach, 166 .f_detach = filt_timerdetach, 167 .f_event = NULL, 168 .f_modify = filt_timermodify, 169 .f_process = filt_timerprocess, 170 }; 171 172 struct pool knote_pool; 173 struct pool kqueue_pool; 174 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 175 int kq_ntimeouts = 0; 176 int kq_timeoutmax = (4 * 1024); 177 178 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 179 180 /* 181 * Table for for all system-defined filters. 182 */ 183 const struct filterops *const sysfilt_ops[] = { 184 &file_filtops, /* EVFILT_READ */ 185 &file_filtops, /* EVFILT_WRITE */ 186 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 187 &file_filtops, /* EVFILT_VNODE */ 188 &proc_filtops, /* EVFILT_PROC */ 189 &sig_filtops, /* EVFILT_SIGNAL */ 190 &timer_filtops, /* EVFILT_TIMER */ 191 &file_filtops, /* EVFILT_DEVICE */ 192 &file_filtops, /* EVFILT_EXCEPT */ 193 }; 194 195 void 196 KQREF(struct kqueue *kq) 197 { 198 refcnt_take(&kq->kq_refcnt); 199 } 200 201 void 202 KQRELE(struct kqueue *kq) 203 { 204 struct filedesc *fdp; 205 206 if (refcnt_rele(&kq->kq_refcnt) == 0) 207 return; 208 209 fdp = kq->kq_fdp; 210 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 211 LIST_REMOVE(kq, kq_next); 212 } else { 213 fdplock(fdp); 214 LIST_REMOVE(kq, kq_next); 215 fdpunlock(fdp); 216 } 217 218 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 219 KASSERT(kq->kq_nknotes == 0); 220 221 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 222 sizeof(struct knlist)); 223 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 224 klist_free(&kq->kq_sel.si_note); 225 pool_put(&kqueue_pool, kq); 226 } 227 228 void 229 kqueue_init(void) 230 { 231 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 232 PR_WAITOK, "kqueuepl", NULL); 233 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 234 PR_WAITOK, "knotepl", NULL); 235 } 236 237 void 238 kqueue_init_percpu(void) 239 { 240 pool_cache_init(&knote_pool); 241 } 242 243 int 244 filt_fileattach(struct knote *kn) 245 { 246 struct file *fp = kn->kn_fp; 247 248 return fp->f_ops->fo_kqfilter(fp, kn); 249 } 250 251 int 252 kqueue_kqfilter(struct file *fp, struct knote *kn) 253 { 254 struct kqueue *kq = kn->kn_fp->f_data; 255 256 if (kn->kn_filter != EVFILT_READ) 257 return (EINVAL); 258 259 kn->kn_fop = &kqread_filtops; 260 klist_insert(&kq->kq_sel.si_note, kn); 261 return (0); 262 } 263 264 void 265 filt_kqdetach(struct knote *kn) 266 { 267 struct kqueue *kq = kn->kn_fp->f_data; 268 269 klist_remove(&kq->kq_sel.si_note, kn); 270 } 271 272 int 273 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 274 { 275 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 276 277 kn->kn_data = kq->kq_count; 278 279 return (kn->kn_data > 0); 280 } 281 282 int 283 filt_kqueue(struct knote *kn, long hint) 284 { 285 struct kqueue *kq = kn->kn_fp->f_data; 286 int active; 287 288 mtx_enter(&kq->kq_lock); 289 active = filt_kqueue_common(kn, kq); 290 mtx_leave(&kq->kq_lock); 291 292 return (active); 293 } 294 295 int 296 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 297 { 298 struct kqueue *kq = kn->kn_fp->f_data; 299 int active; 300 301 mtx_enter(&kq->kq_lock); 302 knote_assign(kev, kn); 303 active = filt_kqueue_common(kn, kq); 304 mtx_leave(&kq->kq_lock); 305 306 return (active); 307 } 308 309 int 310 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 311 { 312 struct kqueue *kq = kn->kn_fp->f_data; 313 int active; 314 315 mtx_enter(&kq->kq_lock); 316 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 317 active = 1; 318 else 319 active = filt_kqueue_common(kn, kq); 320 if (active) 321 knote_submit(kn, kev); 322 mtx_leave(&kq->kq_lock); 323 324 return (active); 325 } 326 327 int 328 filt_procattach(struct knote *kn) 329 { 330 struct process *pr; 331 int s; 332 333 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 334 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 335 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 336 337 if (kn->kn_id > PID_MAX) 338 return ESRCH; 339 340 pr = prfind(kn->kn_id); 341 if (pr == NULL) 342 return (ESRCH); 343 344 /* exiting processes can't be specified */ 345 if (pr->ps_flags & PS_EXITING) 346 return (ESRCH); 347 348 kn->kn_ptr.p_process = pr; 349 kn->kn_flags |= EV_CLEAR; /* automatically set */ 350 351 /* 352 * internal flag indicating registration done by kernel 353 */ 354 if (kn->kn_flags & EV_FLAG1) { 355 kn->kn_data = kn->kn_sdata; /* ppid */ 356 kn->kn_fflags = NOTE_CHILD; 357 kn->kn_flags &= ~EV_FLAG1; 358 } 359 360 s = splhigh(); 361 klist_insert_locked(&pr->ps_klist, kn); 362 splx(s); 363 364 return (0); 365 } 366 367 /* 368 * The knote may be attached to a different process, which may exit, 369 * leaving nothing for the knote to be attached to. So when the process 370 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 371 * it will be deleted when read out. However, as part of the knote deletion, 372 * this routine is called, so a check is needed to avoid actually performing 373 * a detach, because the original process does not exist any more. 374 */ 375 void 376 filt_procdetach(struct knote *kn) 377 { 378 struct kqueue *kq = kn->kn_kq; 379 struct process *pr = kn->kn_ptr.p_process; 380 int s, status; 381 382 mtx_enter(&kq->kq_lock); 383 status = kn->kn_status; 384 mtx_leave(&kq->kq_lock); 385 386 if (status & KN_DETACHED) 387 return; 388 389 s = splhigh(); 390 klist_remove_locked(&pr->ps_klist, kn); 391 splx(s); 392 } 393 394 int 395 filt_proc(struct knote *kn, long hint) 396 { 397 struct kqueue *kq = kn->kn_kq; 398 u_int event; 399 400 /* 401 * mask off extra data 402 */ 403 event = (u_int)hint & NOTE_PCTRLMASK; 404 405 /* 406 * if the user is interested in this event, record it. 407 */ 408 if (kn->kn_sfflags & event) 409 kn->kn_fflags |= event; 410 411 /* 412 * process is gone, so flag the event as finished and remove it 413 * from the process's klist 414 */ 415 if (event == NOTE_EXIT) { 416 struct process *pr = kn->kn_ptr.p_process; 417 int s; 418 419 mtx_enter(&kq->kq_lock); 420 kn->kn_status |= KN_DETACHED; 421 mtx_leave(&kq->kq_lock); 422 423 s = splhigh(); 424 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 425 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 426 klist_remove_locked(&pr->ps_klist, kn); 427 splx(s); 428 return (1); 429 } 430 431 /* 432 * process forked, and user wants to track the new process, 433 * so attach a new knote to it, and immediately report an 434 * event with the parent's pid. 435 */ 436 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 437 struct kevent kev; 438 int error; 439 440 /* 441 * register knote with new process. 442 */ 443 memset(&kev, 0, sizeof(kev)); 444 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 445 kev.filter = kn->kn_filter; 446 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 447 kev.fflags = kn->kn_sfflags; 448 kev.data = kn->kn_id; /* parent */ 449 kev.udata = kn->kn_udata; /* preserve udata */ 450 error = kqueue_register(kq, &kev, 0, NULL); 451 if (error) 452 kn->kn_fflags |= NOTE_TRACKERR; 453 } 454 455 return (kn->kn_fflags != 0); 456 } 457 458 static void 459 filt_timer_timeout_add(struct knote *kn) 460 { 461 struct timeval tv; 462 struct timeout *to = kn->kn_hook; 463 int tticks; 464 465 tv.tv_sec = kn->kn_sdata / 1000; 466 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 467 tticks = tvtohz(&tv); 468 /* Remove extra tick from tvtohz() if timeout has fired before. */ 469 if (timeout_triggered(to)) 470 tticks--; 471 timeout_add(to, (tticks > 0) ? tticks : 1); 472 } 473 474 void 475 filt_timerexpire(void *knx) 476 { 477 struct knote *kn = knx; 478 struct kqueue *kq = kn->kn_kq; 479 480 kn->kn_data++; 481 mtx_enter(&kq->kq_lock); 482 knote_activate(kn); 483 mtx_leave(&kq->kq_lock); 484 485 if ((kn->kn_flags & EV_ONESHOT) == 0) 486 filt_timer_timeout_add(kn); 487 } 488 489 490 /* 491 * data contains amount of time to sleep, in milliseconds 492 */ 493 int 494 filt_timerattach(struct knote *kn) 495 { 496 struct timeout *to; 497 498 if (kq_ntimeouts > kq_timeoutmax) 499 return (ENOMEM); 500 kq_ntimeouts++; 501 502 kn->kn_flags |= EV_CLEAR; /* automatically set */ 503 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 504 timeout_set(to, filt_timerexpire, kn); 505 kn->kn_hook = to; 506 filt_timer_timeout_add(kn); 507 508 return (0); 509 } 510 511 void 512 filt_timerdetach(struct knote *kn) 513 { 514 struct timeout *to; 515 516 to = (struct timeout *)kn->kn_hook; 517 timeout_del_barrier(to); 518 free(to, M_KEVENT, sizeof(*to)); 519 kq_ntimeouts--; 520 } 521 522 int 523 filt_timermodify(struct kevent *kev, struct knote *kn) 524 { 525 struct kqueue *kq = kn->kn_kq; 526 struct timeout *to = kn->kn_hook; 527 528 /* Reset the timer. Any pending events are discarded. */ 529 530 timeout_del_barrier(to); 531 532 mtx_enter(&kq->kq_lock); 533 if (kn->kn_status & KN_QUEUED) 534 knote_dequeue(kn); 535 kn->kn_status &= ~KN_ACTIVE; 536 mtx_leave(&kq->kq_lock); 537 538 kn->kn_data = 0; 539 knote_assign(kev, kn); 540 /* Reinit timeout to invoke tick adjustment again. */ 541 timeout_set(to, filt_timerexpire, kn); 542 filt_timer_timeout_add(kn); 543 544 return (0); 545 } 546 547 int 548 filt_timerprocess(struct knote *kn, struct kevent *kev) 549 { 550 int active, s; 551 552 s = splsoftclock(); 553 active = (kn->kn_data != 0); 554 if (active) 555 knote_submit(kn, kev); 556 splx(s); 557 558 return (active); 559 } 560 561 562 /* 563 * filt_seltrue: 564 * 565 * This filter "event" routine simulates seltrue(). 566 */ 567 int 568 filt_seltrue(struct knote *kn, long hint) 569 { 570 571 /* 572 * We don't know how much data can be read/written, 573 * but we know that it *can* be. This is about as 574 * good as select/poll does as well. 575 */ 576 kn->kn_data = 0; 577 return (1); 578 } 579 580 int 581 filt_seltruemodify(struct kevent *kev, struct knote *kn) 582 { 583 knote_assign(kev, kn); 584 return (kn->kn_fop->f_event(kn, 0)); 585 } 586 587 int 588 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 589 { 590 int active; 591 592 active = kn->kn_fop->f_event(kn, 0); 593 if (active) 594 knote_submit(kn, kev); 595 return (active); 596 } 597 598 /* 599 * This provides full kqfilter entry for device switch tables, which 600 * has same effect as filter using filt_seltrue() as filter method. 601 */ 602 void 603 filt_seltruedetach(struct knote *kn) 604 { 605 /* Nothing to do */ 606 } 607 608 const struct filterops seltrue_filtops = { 609 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 610 .f_attach = NULL, 611 .f_detach = filt_seltruedetach, 612 .f_event = filt_seltrue, 613 .f_modify = filt_seltruemodify, 614 .f_process = filt_seltrueprocess, 615 }; 616 617 int 618 seltrue_kqfilter(dev_t dev, struct knote *kn) 619 { 620 switch (kn->kn_filter) { 621 case EVFILT_READ: 622 case EVFILT_WRITE: 623 kn->kn_fop = &seltrue_filtops; 624 break; 625 default: 626 return (EINVAL); 627 } 628 629 /* Nothing more to do */ 630 return (0); 631 } 632 633 static int 634 filt_dead(struct knote *kn, long hint) 635 { 636 if (kn->kn_filter == EVFILT_EXCEPT) { 637 /* 638 * Do not deliver event because there is no out-of-band data. 639 * However, let HUP condition pass for poll(2). 640 */ 641 if ((kn->kn_flags & __EV_POLL) == 0) { 642 kn->kn_flags |= EV_DISABLE; 643 return (0); 644 } 645 } 646 647 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 648 if (kn->kn_flags & __EV_POLL) 649 kn->kn_flags |= __EV_HUP; 650 kn->kn_data = 0; 651 return (1); 652 } 653 654 static void 655 filt_deaddetach(struct knote *kn) 656 { 657 /* Nothing to do */ 658 } 659 660 const struct filterops dead_filtops = { 661 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 662 .f_attach = NULL, 663 .f_detach = filt_deaddetach, 664 .f_event = filt_dead, 665 .f_modify = filt_seltruemodify, 666 .f_process = filt_seltrueprocess, 667 }; 668 669 static int 670 filt_badfd(struct knote *kn, long hint) 671 { 672 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 673 kn->kn_data = EBADF; 674 return (1); 675 } 676 677 /* For use with kqpoll. */ 678 const struct filterops badfd_filtops = { 679 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 680 .f_attach = NULL, 681 .f_detach = filt_deaddetach, 682 .f_event = filt_badfd, 683 .f_modify = filt_seltruemodify, 684 .f_process = filt_seltrueprocess, 685 }; 686 687 static int 688 filter_attach(struct knote *kn) 689 { 690 int error; 691 692 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 693 error = kn->kn_fop->f_attach(kn); 694 } else { 695 KERNEL_LOCK(); 696 error = kn->kn_fop->f_attach(kn); 697 KERNEL_UNLOCK(); 698 } 699 return (error); 700 } 701 702 static void 703 filter_detach(struct knote *kn) 704 { 705 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 706 kn->kn_fop->f_detach(kn); 707 } else { 708 KERNEL_LOCK(); 709 kn->kn_fop->f_detach(kn); 710 KERNEL_UNLOCK(); 711 } 712 } 713 714 static int 715 filter_event(struct knote *kn, long hint) 716 { 717 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 718 KERNEL_ASSERT_LOCKED(); 719 720 return (kn->kn_fop->f_event(kn, hint)); 721 } 722 723 static int 724 filter_modify(struct kevent *kev, struct knote *kn) 725 { 726 int active, s; 727 728 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 729 active = kn->kn_fop->f_modify(kev, kn); 730 } else { 731 KERNEL_LOCK(); 732 if (kn->kn_fop->f_modify != NULL) { 733 active = kn->kn_fop->f_modify(kev, kn); 734 } else { 735 s = splhigh(); 736 active = knote_modify(kev, kn); 737 splx(s); 738 } 739 KERNEL_UNLOCK(); 740 } 741 return (active); 742 } 743 744 static int 745 filter_process(struct knote *kn, struct kevent *kev) 746 { 747 int active, s; 748 749 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 750 active = kn->kn_fop->f_process(kn, kev); 751 } else { 752 KERNEL_LOCK(); 753 if (kn->kn_fop->f_process != NULL) { 754 active = kn->kn_fop->f_process(kn, kev); 755 } else { 756 s = splhigh(); 757 active = knote_process(kn, kev); 758 splx(s); 759 } 760 KERNEL_UNLOCK(); 761 } 762 return (active); 763 } 764 765 /* 766 * Initialize the current thread for poll/select system call. 767 * num indicates the number of serials that the system call may utilize. 768 * After this function, the valid range of serials is 769 * p_kq_serial <= x < p_kq_serial + num. 770 */ 771 void 772 kqpoll_init(unsigned int num) 773 { 774 struct proc *p = curproc; 775 struct filedesc *fdp; 776 777 if (p->p_kq == NULL) { 778 p->p_kq = kqueue_alloc(p->p_fd); 779 p->p_kq_serial = arc4random(); 780 fdp = p->p_fd; 781 fdplock(fdp); 782 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 783 fdpunlock(fdp); 784 } 785 786 if (p->p_kq_serial + num < p->p_kq_serial) { 787 /* Serial is about to wrap. Clear all attached knotes. */ 788 kqueue_purge(p, p->p_kq); 789 p->p_kq_serial = 0; 790 } 791 } 792 793 /* 794 * Finish poll/select system call. 795 * num must have the same value that was used with kqpoll_init(). 796 */ 797 void 798 kqpoll_done(unsigned int num) 799 { 800 struct proc *p = curproc; 801 struct kqueue *kq = p->p_kq; 802 803 KASSERT(p->p_kq != NULL); 804 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 805 806 p->p_kq_serial += num; 807 808 /* 809 * Because of kn_pollid key, a thread can in principle allocate 810 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 811 * with suitably varying pollfd arrays. 812 * Prevent such a large allocation by clearing knotes eagerly 813 * if there are too many of them. 814 * 815 * A small multiple of kq_knlistsize should give enough margin 816 * that eager clearing is infrequent, or does not happen at all, 817 * with normal programs. 818 * A single pollfd entry can use up to three knotes. 819 * Typically there is no significant overlap of fd and events 820 * between different entries in the pollfd array. 821 */ 822 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 823 kqueue_purge(p, kq); 824 } 825 826 void 827 kqpoll_exit(void) 828 { 829 struct proc *p = curproc; 830 831 if (p->p_kq == NULL) 832 return; 833 834 kqueue_purge(p, p->p_kq); 835 kqueue_terminate(p, p->p_kq); 836 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 837 KQRELE(p->p_kq); 838 p->p_kq = NULL; 839 } 840 841 struct kqueue * 842 kqueue_alloc(struct filedesc *fdp) 843 { 844 struct kqueue *kq; 845 846 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 847 refcnt_init(&kq->kq_refcnt); 848 kq->kq_fdp = fdp; 849 TAILQ_INIT(&kq->kq_head); 850 mtx_init(&kq->kq_lock, IPL_HIGH); 851 task_set(&kq->kq_task, kqueue_task, kq); 852 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 853 854 return (kq); 855 } 856 857 int 858 sys_kqueue(struct proc *p, void *v, register_t *retval) 859 { 860 struct filedesc *fdp = p->p_fd; 861 struct kqueue *kq; 862 struct file *fp; 863 int fd, error; 864 865 kq = kqueue_alloc(fdp); 866 867 fdplock(fdp); 868 error = falloc(p, &fp, &fd); 869 if (error) 870 goto out; 871 fp->f_flag = FREAD | FWRITE; 872 fp->f_type = DTYPE_KQUEUE; 873 fp->f_ops = &kqueueops; 874 fp->f_data = kq; 875 *retval = fd; 876 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 877 kq = NULL; 878 fdinsert(fdp, fd, 0, fp); 879 FRELE(fp, p); 880 out: 881 fdpunlock(fdp); 882 if (kq != NULL) 883 pool_put(&kqueue_pool, kq); 884 return (error); 885 } 886 887 int 888 sys_kevent(struct proc *p, void *v, register_t *retval) 889 { 890 struct kqueue_scan_state scan; 891 struct filedesc* fdp = p->p_fd; 892 struct sys_kevent_args /* { 893 syscallarg(int) fd; 894 syscallarg(const struct kevent *) changelist; 895 syscallarg(int) nchanges; 896 syscallarg(struct kevent *) eventlist; 897 syscallarg(int) nevents; 898 syscallarg(const struct timespec *) timeout; 899 } */ *uap = v; 900 struct kevent *kevp; 901 struct kqueue *kq; 902 struct file *fp; 903 struct timespec ts; 904 struct timespec *tsp = NULL; 905 int i, n, nerrors, error; 906 int ready, total; 907 struct kevent kev[KQ_NEVENTS]; 908 909 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 910 return (EBADF); 911 912 if (fp->f_type != DTYPE_KQUEUE) { 913 error = EBADF; 914 goto done; 915 } 916 917 if (SCARG(uap, timeout) != NULL) { 918 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 919 if (error) 920 goto done; 921 #ifdef KTRACE 922 if (KTRPOINT(p, KTR_STRUCT)) 923 ktrreltimespec(p, &ts); 924 #endif 925 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 926 error = EINVAL; 927 goto done; 928 } 929 tsp = &ts; 930 } 931 932 kq = fp->f_data; 933 nerrors = 0; 934 935 while ((n = SCARG(uap, nchanges)) > 0) { 936 if (n > nitems(kev)) 937 n = nitems(kev); 938 error = copyin(SCARG(uap, changelist), kev, 939 n * sizeof(struct kevent)); 940 if (error) 941 goto done; 942 #ifdef KTRACE 943 if (KTRPOINT(p, KTR_STRUCT)) 944 ktrevent(p, kev, n); 945 #endif 946 for (i = 0; i < n; i++) { 947 kevp = &kev[i]; 948 kevp->flags &= ~EV_SYSFLAGS; 949 error = kqueue_register(kq, kevp, 0, p); 950 if (error || (kevp->flags & EV_RECEIPT)) { 951 if (SCARG(uap, nevents) != 0) { 952 kevp->flags = EV_ERROR; 953 kevp->data = error; 954 copyout(kevp, SCARG(uap, eventlist), 955 sizeof(*kevp)); 956 SCARG(uap, eventlist)++; 957 SCARG(uap, nevents)--; 958 nerrors++; 959 } else { 960 goto done; 961 } 962 } 963 } 964 SCARG(uap, nchanges) -= n; 965 SCARG(uap, changelist) += n; 966 } 967 if (nerrors) { 968 *retval = nerrors; 969 error = 0; 970 goto done; 971 } 972 973 kqueue_scan_setup(&scan, kq); 974 FRELE(fp, p); 975 /* 976 * Collect as many events as we can. The timeout on successive 977 * loops is disabled (kqueue_scan() becomes non-blocking). 978 */ 979 total = 0; 980 error = 0; 981 while ((n = SCARG(uap, nevents) - total) > 0) { 982 if (n > nitems(kev)) 983 n = nitems(kev); 984 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 985 if (ready == 0) 986 break; 987 error = copyout(kev, SCARG(uap, eventlist) + total, 988 sizeof(struct kevent) * ready); 989 #ifdef KTRACE 990 if (KTRPOINT(p, KTR_STRUCT)) 991 ktrevent(p, kev, ready); 992 #endif 993 total += ready; 994 if (error || ready < n) 995 break; 996 } 997 kqueue_scan_finish(&scan); 998 *retval = total; 999 return (error); 1000 1001 done: 1002 FRELE(fp, p); 1003 return (error); 1004 } 1005 1006 #ifdef KQUEUE_DEBUG 1007 void 1008 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1009 { 1010 struct knote *kn; 1011 int count = 0, nmarker = 0; 1012 1013 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1014 1015 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1016 if (kn->kn_filter == EVFILT_MARKER) { 1017 if ((kn->kn_status & KN_QUEUED) != 0) 1018 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1019 func, line, kq, kn); 1020 nmarker++; 1021 } else { 1022 if ((kn->kn_status & KN_ACTIVE) == 0) 1023 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1024 func, line, kq, kn); 1025 if ((kn->kn_status & KN_QUEUED) == 0) 1026 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1027 func, line, kq, kn); 1028 if (kn->kn_kq != kq) 1029 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1030 func, line, kq, kn, kn->kn_kq); 1031 count++; 1032 if (count > kq->kq_count) 1033 goto bad; 1034 } 1035 } 1036 if (count != kq->kq_count) { 1037 bad: 1038 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1039 func, line, kq, kq->kq_count, count, nmarker); 1040 } 1041 } 1042 #endif 1043 1044 int 1045 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1046 struct proc *p) 1047 { 1048 struct filedesc *fdp = kq->kq_fdp; 1049 const struct filterops *fops = NULL; 1050 struct file *fp = NULL; 1051 struct knote *kn = NULL, *newkn = NULL; 1052 struct knlist *list = NULL; 1053 int active, error = 0; 1054 1055 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1056 1057 if (kev->filter < 0) { 1058 if (kev->filter + EVFILT_SYSCOUNT < 0) 1059 return (EINVAL); 1060 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1061 } 1062 1063 if (fops == NULL) { 1064 /* 1065 * XXX 1066 * filter attach routine is responsible for ensuring that 1067 * the identifier can be attached to it. 1068 */ 1069 return (EINVAL); 1070 } 1071 1072 if (fops->f_flags & FILTEROP_ISFD) { 1073 /* validate descriptor */ 1074 if (kev->ident > INT_MAX) 1075 return (EBADF); 1076 } 1077 1078 if (kev->flags & EV_ADD) 1079 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1080 1081 again: 1082 if (fops->f_flags & FILTEROP_ISFD) { 1083 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1084 error = EBADF; 1085 goto done; 1086 } 1087 mtx_enter(&kq->kq_lock); 1088 if (kev->flags & EV_ADD) 1089 kqueue_expand_list(kq, kev->ident); 1090 if (kev->ident < kq->kq_knlistsize) 1091 list = &kq->kq_knlist[kev->ident]; 1092 } else { 1093 mtx_enter(&kq->kq_lock); 1094 if (kev->flags & EV_ADD) 1095 kqueue_expand_hash(kq); 1096 if (kq->kq_knhashmask != 0) { 1097 list = &kq->kq_knhash[ 1098 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1099 } 1100 } 1101 if (list != NULL) { 1102 SLIST_FOREACH(kn, list, kn_link) { 1103 if (kev->filter == kn->kn_filter && 1104 kev->ident == kn->kn_id && 1105 pollid == kn->kn_pollid) { 1106 if (!knote_acquire(kn, NULL, 0)) { 1107 /* knote_acquire() has released 1108 * kq_lock. */ 1109 if (fp != NULL) { 1110 FRELE(fp, p); 1111 fp = NULL; 1112 } 1113 goto again; 1114 } 1115 break; 1116 } 1117 } 1118 } 1119 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1120 1121 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1122 mtx_leave(&kq->kq_lock); 1123 error = ENOENT; 1124 goto done; 1125 } 1126 1127 /* 1128 * kn now contains the matching knote, or NULL if no match. 1129 */ 1130 if (kev->flags & EV_ADD) { 1131 if (kn == NULL) { 1132 kn = newkn; 1133 newkn = NULL; 1134 kn->kn_status = KN_PROCESSING; 1135 kn->kn_fp = fp; 1136 kn->kn_kq = kq; 1137 kn->kn_fop = fops; 1138 1139 /* 1140 * apply reference count to knote structure, and 1141 * do not release it at the end of this routine. 1142 */ 1143 fp = NULL; 1144 1145 kn->kn_sfflags = kev->fflags; 1146 kn->kn_sdata = kev->data; 1147 kev->fflags = 0; 1148 kev->data = 0; 1149 kn->kn_kevent = *kev; 1150 kn->kn_pollid = pollid; 1151 1152 knote_attach(kn); 1153 mtx_leave(&kq->kq_lock); 1154 1155 error = filter_attach(kn); 1156 if (error != 0) { 1157 knote_drop(kn, p); 1158 goto done; 1159 } 1160 1161 /* 1162 * If this is a file descriptor filter, check if 1163 * fd was closed while the knote was being added. 1164 * knote_fdclose() has missed kn if the function 1165 * ran before kn appeared in kq_knlist. 1166 */ 1167 if ((fops->f_flags & FILTEROP_ISFD) && 1168 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1169 /* 1170 * Drop the knote silently without error 1171 * because another thread might already have 1172 * seen it. This corresponds to the insert 1173 * happening in full before the close. 1174 */ 1175 filter_detach(kn); 1176 knote_drop(kn, p); 1177 goto done; 1178 } 1179 1180 /* Check if there is a pending event. */ 1181 active = filter_process(kn, NULL); 1182 mtx_enter(&kq->kq_lock); 1183 if (active) 1184 knote_activate(kn); 1185 } else if (kn->kn_fop == &badfd_filtops) { 1186 /* 1187 * Nothing expects this badfd knote any longer. 1188 * Drop it to make room for the new knote and retry. 1189 */ 1190 KASSERT(kq == p->p_kq); 1191 mtx_leave(&kq->kq_lock); 1192 filter_detach(kn); 1193 knote_drop(kn, p); 1194 1195 KASSERT(fp != NULL); 1196 FRELE(fp, p); 1197 fp = NULL; 1198 1199 goto again; 1200 } else { 1201 /* 1202 * The user may change some filter values after the 1203 * initial EV_ADD, but doing so will not reset any 1204 * filters which have already been triggered. 1205 */ 1206 mtx_leave(&kq->kq_lock); 1207 active = filter_modify(kev, kn); 1208 mtx_enter(&kq->kq_lock); 1209 if (active) 1210 knote_activate(kn); 1211 if (kev->flags & EV_ERROR) { 1212 error = kev->data; 1213 goto release; 1214 } 1215 } 1216 } else if (kev->flags & EV_DELETE) { 1217 mtx_leave(&kq->kq_lock); 1218 filter_detach(kn); 1219 knote_drop(kn, p); 1220 goto done; 1221 } 1222 1223 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1224 kn->kn_status |= KN_DISABLED; 1225 1226 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1227 kn->kn_status &= ~KN_DISABLED; 1228 mtx_leave(&kq->kq_lock); 1229 /* Check if there is a pending event. */ 1230 active = filter_process(kn, NULL); 1231 mtx_enter(&kq->kq_lock); 1232 if (active) 1233 knote_activate(kn); 1234 } 1235 1236 release: 1237 knote_release(kn); 1238 mtx_leave(&kq->kq_lock); 1239 done: 1240 if (fp != NULL) 1241 FRELE(fp, p); 1242 if (newkn != NULL) 1243 pool_put(&knote_pool, newkn); 1244 return (error); 1245 } 1246 1247 int 1248 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1249 { 1250 struct timespec elapsed, start, stop; 1251 uint64_t nsecs; 1252 int error; 1253 1254 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1255 1256 if (tsp != NULL) { 1257 getnanouptime(&start); 1258 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1259 } else 1260 nsecs = INFSLP; 1261 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1262 "kqread", nsecs); 1263 if (tsp != NULL) { 1264 getnanouptime(&stop); 1265 timespecsub(&stop, &start, &elapsed); 1266 timespecsub(tsp, &elapsed, tsp); 1267 if (tsp->tv_sec < 0) 1268 timespecclear(tsp); 1269 } 1270 1271 return (error); 1272 } 1273 1274 /* 1275 * Scan the kqueue, blocking if necessary until the target time is reached. 1276 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1277 * 0 we do not block at all. 1278 */ 1279 int 1280 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1281 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1282 { 1283 struct kqueue *kq = scan->kqs_kq; 1284 struct knote *kn; 1285 int error = 0, nkev = 0; 1286 int reinserted; 1287 1288 if (maxevents == 0) 1289 goto done; 1290 retry: 1291 KASSERT(nkev == 0); 1292 1293 error = 0; 1294 reinserted = 0; 1295 1296 /* msleep() with PCATCH requires kernel lock. */ 1297 KERNEL_LOCK(); 1298 1299 mtx_enter(&kq->kq_lock); 1300 1301 if (kq->kq_state & KQ_DYING) { 1302 mtx_leave(&kq->kq_lock); 1303 KERNEL_UNLOCK(); 1304 error = EBADF; 1305 goto done; 1306 } 1307 1308 if (kq->kq_count == 0) { 1309 /* 1310 * Successive loops are only necessary if there are more 1311 * ready events to gather, so they don't need to block. 1312 */ 1313 if ((tsp != NULL && !timespecisset(tsp)) || 1314 scan->kqs_nevent != 0) { 1315 mtx_leave(&kq->kq_lock); 1316 KERNEL_UNLOCK(); 1317 error = 0; 1318 goto done; 1319 } 1320 kq->kq_state |= KQ_SLEEP; 1321 error = kqueue_sleep(kq, tsp); 1322 /* kqueue_sleep() has released kq_lock. */ 1323 KERNEL_UNLOCK(); 1324 if (error == 0 || error == EWOULDBLOCK) 1325 goto retry; 1326 /* don't restart after signals... */ 1327 if (error == ERESTART) 1328 error = EINTR; 1329 goto done; 1330 } 1331 1332 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1333 KERNEL_UNLOCK(); 1334 1335 /* 1336 * Put the end marker in the queue to limit the scan to the events 1337 * that are currently active. This prevents events from being 1338 * recollected if they reactivate during scan. 1339 * 1340 * If a partial scan has been performed already but no events have 1341 * been collected, reposition the end marker to make any new events 1342 * reachable. 1343 */ 1344 if (!scan->kqs_queued) { 1345 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1346 scan->kqs_queued = 1; 1347 } else if (scan->kqs_nevent == 0) { 1348 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1349 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1350 } 1351 1352 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1353 while (nkev < maxevents) { 1354 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1355 if (kn->kn_filter == EVFILT_MARKER) { 1356 if (kn == &scan->kqs_end) 1357 break; 1358 1359 /* Move start marker past another thread's marker. */ 1360 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1361 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1362 kn_tqe); 1363 continue; 1364 } 1365 1366 if (!knote_acquire(kn, NULL, 0)) { 1367 /* knote_acquire() has released kq_lock. */ 1368 mtx_enter(&kq->kq_lock); 1369 continue; 1370 } 1371 1372 kqueue_check(kq); 1373 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1374 kn->kn_status &= ~KN_QUEUED; 1375 kq->kq_count--; 1376 kqueue_check(kq); 1377 1378 if (kn->kn_status & KN_DISABLED) { 1379 knote_release(kn); 1380 continue; 1381 } 1382 1383 mtx_leave(&kq->kq_lock); 1384 1385 /* Drop expired kqpoll knotes. */ 1386 if (p->p_kq == kq && 1387 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1388 filter_detach(kn); 1389 knote_drop(kn, p); 1390 mtx_enter(&kq->kq_lock); 1391 continue; 1392 } 1393 1394 /* 1395 * Invalidate knotes whose vnodes have been revoked. 1396 * This is a workaround; it is tricky to clear existing 1397 * knotes and prevent new ones from being registered 1398 * with the current revocation mechanism. 1399 */ 1400 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1401 kn->kn_fp != NULL && 1402 kn->kn_fp->f_type == DTYPE_VNODE) { 1403 struct vnode *vp = kn->kn_fp->f_data; 1404 1405 if (__predict_false(vp->v_op == &dead_vops && 1406 kn->kn_fop != &dead_filtops)) { 1407 filter_detach(kn); 1408 kn->kn_fop = &dead_filtops; 1409 1410 /* 1411 * Check if the event should be delivered. 1412 * Use f_event directly because this is 1413 * a special situation. 1414 */ 1415 if (kn->kn_fop->f_event(kn, 0) == 0) { 1416 filter_detach(kn); 1417 knote_drop(kn, p); 1418 mtx_enter(&kq->kq_lock); 1419 continue; 1420 } 1421 } 1422 } 1423 1424 memset(kevp, 0, sizeof(*kevp)); 1425 if (filter_process(kn, kevp) == 0) { 1426 mtx_enter(&kq->kq_lock); 1427 if ((kn->kn_status & KN_QUEUED) == 0) 1428 kn->kn_status &= ~KN_ACTIVE; 1429 knote_release(kn); 1430 kqueue_check(kq); 1431 continue; 1432 } 1433 1434 /* 1435 * Post-event action on the note 1436 */ 1437 if (kevp->flags & EV_ONESHOT) { 1438 filter_detach(kn); 1439 knote_drop(kn, p); 1440 mtx_enter(&kq->kq_lock); 1441 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1442 mtx_enter(&kq->kq_lock); 1443 if (kevp->flags & EV_DISPATCH) 1444 kn->kn_status |= KN_DISABLED; 1445 if ((kn->kn_status & KN_QUEUED) == 0) 1446 kn->kn_status &= ~KN_ACTIVE; 1447 knote_release(kn); 1448 } else { 1449 mtx_enter(&kq->kq_lock); 1450 if ((kn->kn_status & KN_QUEUED) == 0) { 1451 kqueue_check(kq); 1452 kq->kq_count++; 1453 kn->kn_status |= KN_QUEUED; 1454 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1455 /* Wakeup is done after loop. */ 1456 reinserted = 1; 1457 } 1458 knote_release(kn); 1459 } 1460 kqueue_check(kq); 1461 1462 kevp++; 1463 nkev++; 1464 scan->kqs_nevent++; 1465 } 1466 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1467 if (reinserted && kq->kq_count != 0) 1468 kqueue_wakeup(kq); 1469 mtx_leave(&kq->kq_lock); 1470 if (scan->kqs_nevent == 0) 1471 goto retry; 1472 done: 1473 *errorp = error; 1474 return (nkev); 1475 } 1476 1477 void 1478 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1479 { 1480 memset(scan, 0, sizeof(*scan)); 1481 1482 KQREF(kq); 1483 scan->kqs_kq = kq; 1484 scan->kqs_start.kn_filter = EVFILT_MARKER; 1485 scan->kqs_start.kn_status = KN_PROCESSING; 1486 scan->kqs_end.kn_filter = EVFILT_MARKER; 1487 scan->kqs_end.kn_status = KN_PROCESSING; 1488 } 1489 1490 void 1491 kqueue_scan_finish(struct kqueue_scan_state *scan) 1492 { 1493 struct kqueue *kq = scan->kqs_kq; 1494 1495 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1496 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1497 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1498 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1499 1500 if (scan->kqs_queued) { 1501 scan->kqs_queued = 0; 1502 mtx_enter(&kq->kq_lock); 1503 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1504 mtx_leave(&kq->kq_lock); 1505 } 1506 KQRELE(kq); 1507 } 1508 1509 /* 1510 * XXX 1511 * This could be expanded to call kqueue_scan, if desired. 1512 */ 1513 int 1514 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1515 { 1516 return (ENXIO); 1517 } 1518 1519 int 1520 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1521 { 1522 return (ENXIO); 1523 } 1524 1525 int 1526 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1527 { 1528 return (ENOTTY); 1529 } 1530 1531 int 1532 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1533 { 1534 struct kqueue *kq = fp->f_data; 1535 1536 memset(st, 0, sizeof(*st)); 1537 st->st_size = kq->kq_count; /* unlocked read */ 1538 st->st_blksize = sizeof(struct kevent); 1539 st->st_mode = S_IFIFO; 1540 return (0); 1541 } 1542 1543 void 1544 kqueue_purge(struct proc *p, struct kqueue *kq) 1545 { 1546 int i; 1547 1548 mtx_enter(&kq->kq_lock); 1549 for (i = 0; i < kq->kq_knlistsize; i++) 1550 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1551 if (kq->kq_knhashmask != 0) { 1552 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1553 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1554 } 1555 mtx_leave(&kq->kq_lock); 1556 } 1557 1558 void 1559 kqueue_terminate(struct proc *p, struct kqueue *kq) 1560 { 1561 struct knote *kn; 1562 1563 mtx_enter(&kq->kq_lock); 1564 1565 /* 1566 * Any remaining entries should be scan markers. 1567 * They are removed when the ongoing scans finish. 1568 */ 1569 KASSERT(kq->kq_count == 0); 1570 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1571 KASSERT(kn->kn_filter == EVFILT_MARKER); 1572 1573 kq->kq_state |= KQ_DYING; 1574 kqueue_wakeup(kq); 1575 mtx_leave(&kq->kq_lock); 1576 1577 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1578 task_del(systqmp, &kq->kq_task); 1579 } 1580 1581 int 1582 kqueue_close(struct file *fp, struct proc *p) 1583 { 1584 struct kqueue *kq = fp->f_data; 1585 1586 fp->f_data = NULL; 1587 1588 kqueue_purge(p, kq); 1589 kqueue_terminate(p, kq); 1590 1591 KQRELE(kq); 1592 1593 return (0); 1594 } 1595 1596 static void 1597 kqueue_task(void *arg) 1598 { 1599 struct kqueue *kq = arg; 1600 1601 mtx_enter(&kqueue_klist_lock); 1602 KNOTE(&kq->kq_sel.si_note, 0); 1603 mtx_leave(&kqueue_klist_lock); 1604 KQRELE(kq); 1605 } 1606 1607 void 1608 kqueue_wakeup(struct kqueue *kq) 1609 { 1610 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1611 1612 if (kq->kq_state & KQ_SLEEP) { 1613 kq->kq_state &= ~KQ_SLEEP; 1614 wakeup(kq); 1615 } 1616 if (!klist_empty(&kq->kq_sel.si_note)) { 1617 /* Defer activation to avoid recursion. */ 1618 KQREF(kq); 1619 if (!task_add(systqmp, &kq->kq_task)) 1620 KQRELE(kq); 1621 } 1622 } 1623 1624 static void 1625 kqueue_expand_hash(struct kqueue *kq) 1626 { 1627 struct knlist *hash; 1628 u_long hashmask; 1629 1630 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1631 1632 if (kq->kq_knhashmask == 0) { 1633 mtx_leave(&kq->kq_lock); 1634 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1635 mtx_enter(&kq->kq_lock); 1636 if (kq->kq_knhashmask == 0) { 1637 kq->kq_knhash = hash; 1638 kq->kq_knhashmask = hashmask; 1639 } else { 1640 /* Another thread has allocated the hash. */ 1641 mtx_leave(&kq->kq_lock); 1642 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1643 mtx_enter(&kq->kq_lock); 1644 } 1645 } 1646 } 1647 1648 static void 1649 kqueue_expand_list(struct kqueue *kq, int fd) 1650 { 1651 struct knlist *list, *olist; 1652 int size, osize; 1653 1654 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1655 1656 if (kq->kq_knlistsize <= fd) { 1657 size = kq->kq_knlistsize; 1658 mtx_leave(&kq->kq_lock); 1659 while (size <= fd) 1660 size += KQEXTENT; 1661 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1662 mtx_enter(&kq->kq_lock); 1663 if (kq->kq_knlistsize <= fd) { 1664 memcpy(list, kq->kq_knlist, 1665 kq->kq_knlistsize * sizeof(*list)); 1666 memset(&list[kq->kq_knlistsize], 0, 1667 (size - kq->kq_knlistsize) * sizeof(*list)); 1668 olist = kq->kq_knlist; 1669 osize = kq->kq_knlistsize; 1670 kq->kq_knlist = list; 1671 kq->kq_knlistsize = size; 1672 mtx_leave(&kq->kq_lock); 1673 free(olist, M_KEVENT, osize * sizeof(*list)); 1674 mtx_enter(&kq->kq_lock); 1675 } else { 1676 /* Another thread has expanded the list. */ 1677 mtx_leave(&kq->kq_lock); 1678 free(list, M_KEVENT, size * sizeof(*list)); 1679 mtx_enter(&kq->kq_lock); 1680 } 1681 } 1682 } 1683 1684 /* 1685 * Acquire a knote, return non-zero on success, 0 on failure. 1686 * 1687 * If we cannot acquire the knote we sleep and return 0. The knote 1688 * may be stale on return in this case and the caller must restart 1689 * whatever loop they are in. 1690 * 1691 * If we are about to sleep and klist is non-NULL, the list is unlocked 1692 * before sleep and remains unlocked on return. 1693 */ 1694 int 1695 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1696 { 1697 struct kqueue *kq = kn->kn_kq; 1698 1699 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1700 KASSERT(kn->kn_filter != EVFILT_MARKER); 1701 1702 if (kn->kn_status & KN_PROCESSING) { 1703 kn->kn_status |= KN_WAITING; 1704 if (klist != NULL) { 1705 mtx_leave(&kq->kq_lock); 1706 klist_unlock(klist, ls); 1707 /* XXX Timeout resolves potential loss of wakeup. */ 1708 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1709 } else { 1710 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1711 SEC_TO_NSEC(1)); 1712 } 1713 /* knote may be stale now */ 1714 return (0); 1715 } 1716 kn->kn_status |= KN_PROCESSING; 1717 return (1); 1718 } 1719 1720 /* 1721 * Release an acquired knote, clearing KN_PROCESSING. 1722 */ 1723 void 1724 knote_release(struct knote *kn) 1725 { 1726 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1727 KASSERT(kn->kn_filter != EVFILT_MARKER); 1728 KASSERT(kn->kn_status & KN_PROCESSING); 1729 1730 if (kn->kn_status & KN_WAITING) { 1731 kn->kn_status &= ~KN_WAITING; 1732 wakeup(kn); 1733 } 1734 kn->kn_status &= ~KN_PROCESSING; 1735 /* kn should not be accessed anymore */ 1736 } 1737 1738 /* 1739 * activate one knote. 1740 */ 1741 void 1742 knote_activate(struct knote *kn) 1743 { 1744 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1745 1746 kn->kn_status |= KN_ACTIVE; 1747 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1748 knote_enqueue(kn); 1749 } 1750 1751 /* 1752 * walk down a list of knotes, activating them if their event has triggered. 1753 */ 1754 void 1755 knote(struct klist *list, long hint) 1756 { 1757 struct knote *kn, *kn0; 1758 struct kqueue *kq; 1759 1760 KLIST_ASSERT_LOCKED(list); 1761 1762 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1763 if (filter_event(kn, hint)) { 1764 kq = kn->kn_kq; 1765 mtx_enter(&kq->kq_lock); 1766 knote_activate(kn); 1767 mtx_leave(&kq->kq_lock); 1768 } 1769 } 1770 } 1771 1772 /* 1773 * remove all knotes from a specified knlist 1774 */ 1775 void 1776 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1777 int purge) 1778 { 1779 struct knote *kn; 1780 1781 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1782 1783 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1784 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1785 KASSERT(kn->kn_kq == kq); 1786 1787 if (!purge) { 1788 /* Skip pending badfd knotes. */ 1789 while (kn->kn_fop == &badfd_filtops) { 1790 kn = SLIST_NEXT(kn, kn_link); 1791 if (kn == NULL) 1792 return; 1793 KASSERT(kn->kn_kq == kq); 1794 } 1795 } 1796 1797 if (!knote_acquire(kn, NULL, 0)) { 1798 /* knote_acquire() has released kq_lock. */ 1799 mtx_enter(&kq->kq_lock); 1800 continue; 1801 } 1802 mtx_leave(&kq->kq_lock); 1803 filter_detach(kn); 1804 1805 /* 1806 * Notify poll(2) and select(2) when a monitored 1807 * file descriptor is closed. 1808 * 1809 * This reuses the original knote for delivering the 1810 * notification so as to avoid allocating memory. 1811 */ 1812 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1813 !(p->p_kq == kq && 1814 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1815 kn->kn_fop != &badfd_filtops) { 1816 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1817 FRELE(kn->kn_fp, p); 1818 kn->kn_fp = NULL; 1819 1820 kn->kn_fop = &badfd_filtops; 1821 filter_event(kn, 0); 1822 mtx_enter(&kq->kq_lock); 1823 knote_activate(kn); 1824 knote_release(kn); 1825 continue; 1826 } 1827 1828 knote_drop(kn, p); 1829 mtx_enter(&kq->kq_lock); 1830 } 1831 } 1832 1833 /* 1834 * remove all knotes referencing a specified fd 1835 */ 1836 void 1837 knote_fdclose(struct proc *p, int fd) 1838 { 1839 struct filedesc *fdp = p->p_p->ps_fd; 1840 struct kqueue *kq; 1841 1842 /* 1843 * fdplock can be ignored if the file descriptor table is being freed 1844 * because no other thread can access the fdp. 1845 */ 1846 if (fdp->fd_refcnt != 0) 1847 fdpassertlocked(fdp); 1848 1849 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1850 mtx_enter(&kq->kq_lock); 1851 if (fd < kq->kq_knlistsize) 1852 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1853 mtx_leave(&kq->kq_lock); 1854 } 1855 } 1856 1857 /* 1858 * handle a process exiting, including the triggering of NOTE_EXIT notes 1859 * XXX this could be more efficient, doing a single pass down the klist 1860 */ 1861 void 1862 knote_processexit(struct process *pr) 1863 { 1864 KERNEL_ASSERT_LOCKED(); 1865 1866 KNOTE(&pr->ps_klist, NOTE_EXIT); 1867 1868 /* remove other knotes hanging off the process */ 1869 klist_invalidate(&pr->ps_klist); 1870 } 1871 1872 void 1873 knote_attach(struct knote *kn) 1874 { 1875 struct kqueue *kq = kn->kn_kq; 1876 struct knlist *list; 1877 1878 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1879 KASSERT(kn->kn_status & KN_PROCESSING); 1880 1881 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1882 KASSERT(kq->kq_knlistsize > kn->kn_id); 1883 list = &kq->kq_knlist[kn->kn_id]; 1884 } else { 1885 KASSERT(kq->kq_knhashmask != 0); 1886 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1887 } 1888 SLIST_INSERT_HEAD(list, kn, kn_link); 1889 kq->kq_nknotes++; 1890 } 1891 1892 void 1893 knote_detach(struct knote *kn) 1894 { 1895 struct kqueue *kq = kn->kn_kq; 1896 struct knlist *list; 1897 1898 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1899 KASSERT(kn->kn_status & KN_PROCESSING); 1900 1901 kq->kq_nknotes--; 1902 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1903 list = &kq->kq_knlist[kn->kn_id]; 1904 else 1905 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1906 SLIST_REMOVE(list, kn, knote, kn_link); 1907 } 1908 1909 /* 1910 * should be called at spl == 0, since we don't want to hold spl 1911 * while calling FRELE and pool_put. 1912 */ 1913 void 1914 knote_drop(struct knote *kn, struct proc *p) 1915 { 1916 struct kqueue *kq = kn->kn_kq; 1917 1918 KASSERT(kn->kn_filter != EVFILT_MARKER); 1919 1920 mtx_enter(&kq->kq_lock); 1921 knote_detach(kn); 1922 if (kn->kn_status & KN_QUEUED) 1923 knote_dequeue(kn); 1924 if (kn->kn_status & KN_WAITING) { 1925 kn->kn_status &= ~KN_WAITING; 1926 wakeup(kn); 1927 } 1928 mtx_leave(&kq->kq_lock); 1929 1930 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1931 FRELE(kn->kn_fp, p); 1932 pool_put(&knote_pool, kn); 1933 } 1934 1935 1936 void 1937 knote_enqueue(struct knote *kn) 1938 { 1939 struct kqueue *kq = kn->kn_kq; 1940 1941 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1942 KASSERT(kn->kn_filter != EVFILT_MARKER); 1943 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1944 1945 kqueue_check(kq); 1946 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1947 kn->kn_status |= KN_QUEUED; 1948 kq->kq_count++; 1949 kqueue_check(kq); 1950 kqueue_wakeup(kq); 1951 } 1952 1953 void 1954 knote_dequeue(struct knote *kn) 1955 { 1956 struct kqueue *kq = kn->kn_kq; 1957 1958 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1959 KASSERT(kn->kn_filter != EVFILT_MARKER); 1960 KASSERT(kn->kn_status & KN_QUEUED); 1961 1962 kqueue_check(kq); 1963 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1964 kn->kn_status &= ~KN_QUEUED; 1965 kq->kq_count--; 1966 kqueue_check(kq); 1967 } 1968 1969 /* 1970 * Assign parameters to the knote. 1971 * 1972 * The knote's object lock must be held. 1973 */ 1974 void 1975 knote_assign(const struct kevent *kev, struct knote *kn) 1976 { 1977 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1978 KERNEL_ASSERT_LOCKED(); 1979 1980 kn->kn_sfflags = kev->fflags; 1981 kn->kn_sdata = kev->data; 1982 kn->kn_udata = kev->udata; 1983 } 1984 1985 /* 1986 * Submit the knote's event for delivery. 1987 * 1988 * The knote's object lock must be held. 1989 */ 1990 void 1991 knote_submit(struct knote *kn, struct kevent *kev) 1992 { 1993 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1994 KERNEL_ASSERT_LOCKED(); 1995 1996 if (kev != NULL) { 1997 *kev = kn->kn_kevent; 1998 if (kn->kn_flags & EV_CLEAR) { 1999 kn->kn_fflags = 0; 2000 kn->kn_data = 0; 2001 } 2002 } 2003 } 2004 2005 void 2006 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2007 { 2008 SLIST_INIT(&klist->kl_list); 2009 klist->kl_ops = ops; 2010 klist->kl_arg = arg; 2011 } 2012 2013 void 2014 klist_free(struct klist *klist) 2015 { 2016 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2017 } 2018 2019 void 2020 klist_insert(struct klist *klist, struct knote *kn) 2021 { 2022 int ls; 2023 2024 ls = klist_lock(klist); 2025 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2026 klist_unlock(klist, ls); 2027 } 2028 2029 void 2030 klist_insert_locked(struct klist *klist, struct knote *kn) 2031 { 2032 KLIST_ASSERT_LOCKED(klist); 2033 2034 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2035 } 2036 2037 void 2038 klist_remove(struct klist *klist, struct knote *kn) 2039 { 2040 int ls; 2041 2042 ls = klist_lock(klist); 2043 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2044 klist_unlock(klist, ls); 2045 } 2046 2047 void 2048 klist_remove_locked(struct klist *klist, struct knote *kn) 2049 { 2050 KLIST_ASSERT_LOCKED(klist); 2051 2052 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2053 } 2054 2055 /* 2056 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2057 * 2058 * The caller of this function must not hold any locks that can block 2059 * filterops callbacks that run with KN_PROCESSING. 2060 * Otherwise this function might deadlock. 2061 */ 2062 void 2063 klist_invalidate(struct klist *list) 2064 { 2065 struct knote *kn; 2066 struct kqueue *kq; 2067 struct proc *p = curproc; 2068 int ls; 2069 2070 NET_ASSERT_UNLOCKED(); 2071 2072 ls = klist_lock(list); 2073 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2074 kq = kn->kn_kq; 2075 mtx_enter(&kq->kq_lock); 2076 if (!knote_acquire(kn, list, ls)) { 2077 /* knote_acquire() has released kq_lock 2078 * and klist lock. */ 2079 ls = klist_lock(list); 2080 continue; 2081 } 2082 mtx_leave(&kq->kq_lock); 2083 klist_unlock(list, ls); 2084 filter_detach(kn); 2085 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2086 kn->kn_fop = &dead_filtops; 2087 filter_event(kn, 0); 2088 mtx_enter(&kq->kq_lock); 2089 knote_activate(kn); 2090 knote_release(kn); 2091 mtx_leave(&kq->kq_lock); 2092 } else { 2093 knote_drop(kn, p); 2094 } 2095 ls = klist_lock(list); 2096 } 2097 klist_unlock(list, ls); 2098 } 2099 2100 static int 2101 klist_lock(struct klist *list) 2102 { 2103 int ls = 0; 2104 2105 if (list->kl_ops != NULL) { 2106 ls = list->kl_ops->klo_lock(list->kl_arg); 2107 } else { 2108 KERNEL_LOCK(); 2109 ls = splhigh(); 2110 } 2111 return ls; 2112 } 2113 2114 static void 2115 klist_unlock(struct klist *list, int ls) 2116 { 2117 if (list->kl_ops != NULL) { 2118 list->kl_ops->klo_unlock(list->kl_arg, ls); 2119 } else { 2120 splx(ls); 2121 KERNEL_UNLOCK(); 2122 } 2123 } 2124 2125 static void 2126 klist_mutex_assertlk(void *arg) 2127 { 2128 struct mutex *mtx = arg; 2129 2130 (void)mtx; 2131 2132 MUTEX_ASSERT_LOCKED(mtx); 2133 } 2134 2135 static int 2136 klist_mutex_lock(void *arg) 2137 { 2138 struct mutex *mtx = arg; 2139 2140 mtx_enter(mtx); 2141 return 0; 2142 } 2143 2144 static void 2145 klist_mutex_unlock(void *arg, int s) 2146 { 2147 struct mutex *mtx = arg; 2148 2149 mtx_leave(mtx); 2150 } 2151 2152 static const struct klistops mutex_klistops = { 2153 .klo_assertlk = klist_mutex_assertlk, 2154 .klo_lock = klist_mutex_lock, 2155 .klo_unlock = klist_mutex_unlock, 2156 }; 2157 2158 void 2159 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2160 { 2161 klist_init(klist, &mutex_klistops, mtx); 2162 } 2163 2164 static void 2165 klist_rwlock_assertlk(void *arg) 2166 { 2167 struct rwlock *rwl = arg; 2168 2169 (void)rwl; 2170 2171 rw_assert_wrlock(rwl); 2172 } 2173 2174 static int 2175 klist_rwlock_lock(void *arg) 2176 { 2177 struct rwlock *rwl = arg; 2178 2179 rw_enter_write(rwl); 2180 return 0; 2181 } 2182 2183 static void 2184 klist_rwlock_unlock(void *arg, int s) 2185 { 2186 struct rwlock *rwl = arg; 2187 2188 rw_exit_write(rwl); 2189 } 2190 2191 static const struct klistops rwlock_klistops = { 2192 .klo_assertlk = klist_rwlock_assertlk, 2193 .klo_lock = klist_rwlock_lock, 2194 .klo_unlock = klist_rwlock_unlock, 2195 }; 2196 2197 void 2198 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2199 { 2200 klist_init(klist, &rwlock_klistops, rwl); 2201 } 2202