1 /* $OpenBSD: kern_event.c,v 1.192 2022/07/09 12:48:21 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/proc.h> 35 #include <sys/pledge.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <sys/eventvar.h> 44 #include <sys/ktrace.h> 45 #include <sys/pool.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/stat.h> 49 #include <sys/uio.h> 50 #include <sys/mount.h> 51 #include <sys/syscallargs.h> 52 #include <sys/time.h> 53 #include <sys/timeout.h> 54 #include <sys/vnode.h> 55 #include <sys/wait.h> 56 57 #ifdef DIAGNOSTIC 58 #define KLIST_ASSERT_LOCKED(kl) do { \ 59 if ((kl)->kl_ops != NULL) \ 60 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 61 else \ 62 KERNEL_ASSERT_LOCKED(); \ 63 } while (0) 64 #else 65 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 66 #endif 67 68 struct kqueue *kqueue_alloc(struct filedesc *); 69 void kqueue_terminate(struct proc *p, struct kqueue *); 70 void KQREF(struct kqueue *); 71 void KQRELE(struct kqueue *); 72 73 void kqueue_purge(struct proc *, struct kqueue *); 74 int kqueue_sleep(struct kqueue *, struct timespec *); 75 76 int kqueue_read(struct file *, struct uio *, int); 77 int kqueue_write(struct file *, struct uio *, int); 78 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 79 struct proc *p); 80 int kqueue_kqfilter(struct file *fp, struct knote *kn); 81 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 82 int kqueue_close(struct file *fp, struct proc *p); 83 void kqueue_wakeup(struct kqueue *kq); 84 85 #ifdef KQUEUE_DEBUG 86 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 87 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 88 #else 89 #define kqueue_check(kq) do {} while (0) 90 #endif 91 92 static int filter_attach(struct knote *kn); 93 static void filter_detach(struct knote *kn); 94 static int filter_event(struct knote *kn, long hint); 95 static int filter_modify(struct kevent *kev, struct knote *kn); 96 static int filter_process(struct knote *kn, struct kevent *kev); 97 static void kqueue_expand_hash(struct kqueue *kq); 98 static void kqueue_expand_list(struct kqueue *kq, int fd); 99 static void kqueue_task(void *); 100 static int klist_lock(struct klist *); 101 static void klist_unlock(struct klist *, int); 102 103 const struct fileops kqueueops = { 104 .fo_read = kqueue_read, 105 .fo_write = kqueue_write, 106 .fo_ioctl = kqueue_ioctl, 107 .fo_kqfilter = kqueue_kqfilter, 108 .fo_stat = kqueue_stat, 109 .fo_close = kqueue_close 110 }; 111 112 void knote_attach(struct knote *kn); 113 void knote_detach(struct knote *kn); 114 void knote_drop(struct knote *kn, struct proc *p); 115 void knote_enqueue(struct knote *kn); 116 void knote_dequeue(struct knote *kn); 117 int knote_acquire(struct knote *kn, struct klist *, int); 118 void knote_release(struct knote *kn); 119 void knote_activate(struct knote *kn); 120 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 121 int idx, int purge); 122 123 void filt_kqdetach(struct knote *kn); 124 int filt_kqueue(struct knote *kn, long hint); 125 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 126 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 127 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 128 int filt_procattach(struct knote *kn); 129 void filt_procdetach(struct knote *kn); 130 int filt_proc(struct knote *kn, long hint); 131 int filt_fileattach(struct knote *kn); 132 void filt_timerexpire(void *knx); 133 int filt_timerattach(struct knote *kn); 134 void filt_timerdetach(struct knote *kn); 135 int filt_timermodify(struct kevent *kev, struct knote *kn); 136 int filt_timerprocess(struct knote *kn, struct kevent *kev); 137 void filt_seltruedetach(struct knote *kn); 138 139 const struct filterops kqread_filtops = { 140 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 141 .f_attach = NULL, 142 .f_detach = filt_kqdetach, 143 .f_event = filt_kqueue, 144 .f_modify = filt_kqueuemodify, 145 .f_process = filt_kqueueprocess, 146 }; 147 148 const struct filterops proc_filtops = { 149 .f_flags = 0, 150 .f_attach = filt_procattach, 151 .f_detach = filt_procdetach, 152 .f_event = filt_proc, 153 }; 154 155 const struct filterops file_filtops = { 156 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 157 .f_attach = filt_fileattach, 158 .f_detach = NULL, 159 .f_event = NULL, 160 }; 161 162 const struct filterops timer_filtops = { 163 .f_flags = 0, 164 .f_attach = filt_timerattach, 165 .f_detach = filt_timerdetach, 166 .f_event = NULL, 167 .f_modify = filt_timermodify, 168 .f_process = filt_timerprocess, 169 }; 170 171 struct pool knote_pool; 172 struct pool kqueue_pool; 173 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 174 int kq_ntimeouts = 0; 175 int kq_timeoutmax = (4 * 1024); 176 177 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 178 179 /* 180 * Table for for all system-defined filters. 181 */ 182 const struct filterops *const sysfilt_ops[] = { 183 &file_filtops, /* EVFILT_READ */ 184 &file_filtops, /* EVFILT_WRITE */ 185 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 186 &file_filtops, /* EVFILT_VNODE */ 187 &proc_filtops, /* EVFILT_PROC */ 188 &sig_filtops, /* EVFILT_SIGNAL */ 189 &timer_filtops, /* EVFILT_TIMER */ 190 &file_filtops, /* EVFILT_DEVICE */ 191 &file_filtops, /* EVFILT_EXCEPT */ 192 }; 193 194 void 195 KQREF(struct kqueue *kq) 196 { 197 refcnt_take(&kq->kq_refcnt); 198 } 199 200 void 201 KQRELE(struct kqueue *kq) 202 { 203 struct filedesc *fdp; 204 205 if (refcnt_rele(&kq->kq_refcnt) == 0) 206 return; 207 208 fdp = kq->kq_fdp; 209 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 210 LIST_REMOVE(kq, kq_next); 211 } else { 212 fdplock(fdp); 213 LIST_REMOVE(kq, kq_next); 214 fdpunlock(fdp); 215 } 216 217 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 218 KASSERT(kq->kq_nknotes == 0); 219 220 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 221 sizeof(struct knlist)); 222 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 223 klist_free(&kq->kq_klist); 224 pool_put(&kqueue_pool, kq); 225 } 226 227 void 228 kqueue_init(void) 229 { 230 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 231 PR_WAITOK, "kqueuepl", NULL); 232 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 233 PR_WAITOK, "knotepl", NULL); 234 } 235 236 void 237 kqueue_init_percpu(void) 238 { 239 pool_cache_init(&knote_pool); 240 } 241 242 int 243 filt_fileattach(struct knote *kn) 244 { 245 struct file *fp = kn->kn_fp; 246 247 return fp->f_ops->fo_kqfilter(fp, kn); 248 } 249 250 int 251 kqueue_kqfilter(struct file *fp, struct knote *kn) 252 { 253 struct kqueue *kq = kn->kn_fp->f_data; 254 255 if (kn->kn_filter != EVFILT_READ) 256 return (EINVAL); 257 258 kn->kn_fop = &kqread_filtops; 259 klist_insert(&kq->kq_klist, kn); 260 return (0); 261 } 262 263 void 264 filt_kqdetach(struct knote *kn) 265 { 266 struct kqueue *kq = kn->kn_fp->f_data; 267 268 klist_remove(&kq->kq_klist, kn); 269 } 270 271 int 272 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 273 { 274 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 275 276 kn->kn_data = kq->kq_count; 277 278 return (kn->kn_data > 0); 279 } 280 281 int 282 filt_kqueue(struct knote *kn, long hint) 283 { 284 struct kqueue *kq = kn->kn_fp->f_data; 285 int active; 286 287 mtx_enter(&kq->kq_lock); 288 active = filt_kqueue_common(kn, kq); 289 mtx_leave(&kq->kq_lock); 290 291 return (active); 292 } 293 294 int 295 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 296 { 297 struct kqueue *kq = kn->kn_fp->f_data; 298 int active; 299 300 mtx_enter(&kq->kq_lock); 301 knote_assign(kev, kn); 302 active = filt_kqueue_common(kn, kq); 303 mtx_leave(&kq->kq_lock); 304 305 return (active); 306 } 307 308 int 309 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 310 { 311 struct kqueue *kq = kn->kn_fp->f_data; 312 int active; 313 314 mtx_enter(&kq->kq_lock); 315 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 316 active = 1; 317 else 318 active = filt_kqueue_common(kn, kq); 319 if (active) 320 knote_submit(kn, kev); 321 mtx_leave(&kq->kq_lock); 322 323 return (active); 324 } 325 326 int 327 filt_procattach(struct knote *kn) 328 { 329 struct process *pr; 330 int s; 331 332 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 333 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 334 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 335 336 if (kn->kn_id > PID_MAX) 337 return ESRCH; 338 339 pr = prfind(kn->kn_id); 340 if (pr == NULL) 341 return (ESRCH); 342 343 /* exiting processes can't be specified */ 344 if (pr->ps_flags & PS_EXITING) 345 return (ESRCH); 346 347 kn->kn_ptr.p_process = pr; 348 kn->kn_flags |= EV_CLEAR; /* automatically set */ 349 350 /* 351 * internal flag indicating registration done by kernel 352 */ 353 if (kn->kn_flags & EV_FLAG1) { 354 kn->kn_data = kn->kn_sdata; /* ppid */ 355 kn->kn_fflags = NOTE_CHILD; 356 kn->kn_flags &= ~EV_FLAG1; 357 } 358 359 s = splhigh(); 360 klist_insert_locked(&pr->ps_klist, kn); 361 splx(s); 362 363 return (0); 364 } 365 366 /* 367 * The knote may be attached to a different process, which may exit, 368 * leaving nothing for the knote to be attached to. So when the process 369 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 370 * it will be deleted when read out. However, as part of the knote deletion, 371 * this routine is called, so a check is needed to avoid actually performing 372 * a detach, because the original process does not exist any more. 373 */ 374 void 375 filt_procdetach(struct knote *kn) 376 { 377 struct kqueue *kq = kn->kn_kq; 378 struct process *pr = kn->kn_ptr.p_process; 379 int s, status; 380 381 mtx_enter(&kq->kq_lock); 382 status = kn->kn_status; 383 mtx_leave(&kq->kq_lock); 384 385 if (status & KN_DETACHED) 386 return; 387 388 s = splhigh(); 389 klist_remove_locked(&pr->ps_klist, kn); 390 splx(s); 391 } 392 393 int 394 filt_proc(struct knote *kn, long hint) 395 { 396 struct kqueue *kq = kn->kn_kq; 397 u_int event; 398 399 /* 400 * mask off extra data 401 */ 402 event = (u_int)hint & NOTE_PCTRLMASK; 403 404 /* 405 * if the user is interested in this event, record it. 406 */ 407 if (kn->kn_sfflags & event) 408 kn->kn_fflags |= event; 409 410 /* 411 * process is gone, so flag the event as finished and remove it 412 * from the process's klist 413 */ 414 if (event == NOTE_EXIT) { 415 struct process *pr = kn->kn_ptr.p_process; 416 int s; 417 418 mtx_enter(&kq->kq_lock); 419 kn->kn_status |= KN_DETACHED; 420 mtx_leave(&kq->kq_lock); 421 422 s = splhigh(); 423 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 424 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 425 klist_remove_locked(&pr->ps_klist, kn); 426 splx(s); 427 return (1); 428 } 429 430 /* 431 * process forked, and user wants to track the new process, 432 * so attach a new knote to it, and immediately report an 433 * event with the parent's pid. 434 */ 435 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 436 struct kevent kev; 437 int error; 438 439 /* 440 * register knote with new process. 441 */ 442 memset(&kev, 0, sizeof(kev)); 443 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 444 kev.filter = kn->kn_filter; 445 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 446 kev.fflags = kn->kn_sfflags; 447 kev.data = kn->kn_id; /* parent */ 448 kev.udata = kn->kn_udata; /* preserve udata */ 449 error = kqueue_register(kq, &kev, 0, NULL); 450 if (error) 451 kn->kn_fflags |= NOTE_TRACKERR; 452 } 453 454 return (kn->kn_fflags != 0); 455 } 456 457 static void 458 filt_timer_timeout_add(struct knote *kn) 459 { 460 struct timeval tv; 461 struct timeout *to = kn->kn_hook; 462 int tticks; 463 464 tv.tv_sec = kn->kn_sdata / 1000; 465 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 466 tticks = tvtohz(&tv); 467 /* Remove extra tick from tvtohz() if timeout has fired before. */ 468 if (timeout_triggered(to)) 469 tticks--; 470 timeout_add(to, (tticks > 0) ? tticks : 1); 471 } 472 473 void 474 filt_timerexpire(void *knx) 475 { 476 struct knote *kn = knx; 477 struct kqueue *kq = kn->kn_kq; 478 479 kn->kn_data++; 480 mtx_enter(&kq->kq_lock); 481 knote_activate(kn); 482 mtx_leave(&kq->kq_lock); 483 484 if ((kn->kn_flags & EV_ONESHOT) == 0) 485 filt_timer_timeout_add(kn); 486 } 487 488 489 /* 490 * data contains amount of time to sleep, in milliseconds 491 */ 492 int 493 filt_timerattach(struct knote *kn) 494 { 495 struct timeout *to; 496 497 if (kq_ntimeouts > kq_timeoutmax) 498 return (ENOMEM); 499 kq_ntimeouts++; 500 501 kn->kn_flags |= EV_CLEAR; /* automatically set */ 502 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 503 timeout_set(to, filt_timerexpire, kn); 504 kn->kn_hook = to; 505 filt_timer_timeout_add(kn); 506 507 return (0); 508 } 509 510 void 511 filt_timerdetach(struct knote *kn) 512 { 513 struct timeout *to; 514 515 to = (struct timeout *)kn->kn_hook; 516 timeout_del_barrier(to); 517 free(to, M_KEVENT, sizeof(*to)); 518 kq_ntimeouts--; 519 } 520 521 int 522 filt_timermodify(struct kevent *kev, struct knote *kn) 523 { 524 struct kqueue *kq = kn->kn_kq; 525 struct timeout *to = kn->kn_hook; 526 527 /* Reset the timer. Any pending events are discarded. */ 528 529 timeout_del_barrier(to); 530 531 mtx_enter(&kq->kq_lock); 532 if (kn->kn_status & KN_QUEUED) 533 knote_dequeue(kn); 534 kn->kn_status &= ~KN_ACTIVE; 535 mtx_leave(&kq->kq_lock); 536 537 kn->kn_data = 0; 538 knote_assign(kev, kn); 539 /* Reinit timeout to invoke tick adjustment again. */ 540 timeout_set(to, filt_timerexpire, kn); 541 filt_timer_timeout_add(kn); 542 543 return (0); 544 } 545 546 int 547 filt_timerprocess(struct knote *kn, struct kevent *kev) 548 { 549 int active, s; 550 551 s = splsoftclock(); 552 active = (kn->kn_data != 0); 553 if (active) 554 knote_submit(kn, kev); 555 splx(s); 556 557 return (active); 558 } 559 560 561 /* 562 * filt_seltrue: 563 * 564 * This filter "event" routine simulates seltrue(). 565 */ 566 int 567 filt_seltrue(struct knote *kn, long hint) 568 { 569 570 /* 571 * We don't know how much data can be read/written, 572 * but we know that it *can* be. This is about as 573 * good as select/poll does as well. 574 */ 575 kn->kn_data = 0; 576 return (1); 577 } 578 579 int 580 filt_seltruemodify(struct kevent *kev, struct knote *kn) 581 { 582 knote_assign(kev, kn); 583 return (kn->kn_fop->f_event(kn, 0)); 584 } 585 586 int 587 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 588 { 589 int active; 590 591 active = kn->kn_fop->f_event(kn, 0); 592 if (active) 593 knote_submit(kn, kev); 594 return (active); 595 } 596 597 /* 598 * This provides full kqfilter entry for device switch tables, which 599 * has same effect as filter using filt_seltrue() as filter method. 600 */ 601 void 602 filt_seltruedetach(struct knote *kn) 603 { 604 /* Nothing to do */ 605 } 606 607 const struct filterops seltrue_filtops = { 608 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 609 .f_attach = NULL, 610 .f_detach = filt_seltruedetach, 611 .f_event = filt_seltrue, 612 .f_modify = filt_seltruemodify, 613 .f_process = filt_seltrueprocess, 614 }; 615 616 int 617 seltrue_kqfilter(dev_t dev, struct knote *kn) 618 { 619 switch (kn->kn_filter) { 620 case EVFILT_READ: 621 case EVFILT_WRITE: 622 kn->kn_fop = &seltrue_filtops; 623 break; 624 default: 625 return (EINVAL); 626 } 627 628 /* Nothing more to do */ 629 return (0); 630 } 631 632 static int 633 filt_dead(struct knote *kn, long hint) 634 { 635 if (kn->kn_filter == EVFILT_EXCEPT) { 636 /* 637 * Do not deliver event because there is no out-of-band data. 638 * However, let HUP condition pass for poll(2). 639 */ 640 if ((kn->kn_flags & __EV_POLL) == 0) { 641 kn->kn_flags |= EV_DISABLE; 642 return (0); 643 } 644 } 645 646 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 647 if (kn->kn_flags & __EV_POLL) 648 kn->kn_flags |= __EV_HUP; 649 kn->kn_data = 0; 650 return (1); 651 } 652 653 static void 654 filt_deaddetach(struct knote *kn) 655 { 656 /* Nothing to do */ 657 } 658 659 const struct filterops dead_filtops = { 660 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 661 .f_attach = NULL, 662 .f_detach = filt_deaddetach, 663 .f_event = filt_dead, 664 .f_modify = filt_seltruemodify, 665 .f_process = filt_seltrueprocess, 666 }; 667 668 static int 669 filt_badfd(struct knote *kn, long hint) 670 { 671 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 672 kn->kn_data = EBADF; 673 return (1); 674 } 675 676 /* For use with kqpoll. */ 677 const struct filterops badfd_filtops = { 678 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 679 .f_attach = NULL, 680 .f_detach = filt_deaddetach, 681 .f_event = filt_badfd, 682 .f_modify = filt_seltruemodify, 683 .f_process = filt_seltrueprocess, 684 }; 685 686 static int 687 filter_attach(struct knote *kn) 688 { 689 int error; 690 691 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 692 error = kn->kn_fop->f_attach(kn); 693 } else { 694 KERNEL_LOCK(); 695 error = kn->kn_fop->f_attach(kn); 696 KERNEL_UNLOCK(); 697 } 698 return (error); 699 } 700 701 static void 702 filter_detach(struct knote *kn) 703 { 704 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 705 kn->kn_fop->f_detach(kn); 706 } else { 707 KERNEL_LOCK(); 708 kn->kn_fop->f_detach(kn); 709 KERNEL_UNLOCK(); 710 } 711 } 712 713 static int 714 filter_event(struct knote *kn, long hint) 715 { 716 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 717 KERNEL_ASSERT_LOCKED(); 718 719 return (kn->kn_fop->f_event(kn, hint)); 720 } 721 722 static int 723 filter_modify(struct kevent *kev, struct knote *kn) 724 { 725 int active, s; 726 727 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 728 active = kn->kn_fop->f_modify(kev, kn); 729 } else { 730 KERNEL_LOCK(); 731 if (kn->kn_fop->f_modify != NULL) { 732 active = kn->kn_fop->f_modify(kev, kn); 733 } else { 734 s = splhigh(); 735 active = knote_modify(kev, kn); 736 splx(s); 737 } 738 KERNEL_UNLOCK(); 739 } 740 return (active); 741 } 742 743 static int 744 filter_process(struct knote *kn, struct kevent *kev) 745 { 746 int active, s; 747 748 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 749 active = kn->kn_fop->f_process(kn, kev); 750 } else { 751 KERNEL_LOCK(); 752 if (kn->kn_fop->f_process != NULL) { 753 active = kn->kn_fop->f_process(kn, kev); 754 } else { 755 s = splhigh(); 756 active = knote_process(kn, kev); 757 splx(s); 758 } 759 KERNEL_UNLOCK(); 760 } 761 return (active); 762 } 763 764 /* 765 * Initialize the current thread for poll/select system call. 766 * num indicates the number of serials that the system call may utilize. 767 * After this function, the valid range of serials is 768 * p_kq_serial <= x < p_kq_serial + num. 769 */ 770 void 771 kqpoll_init(unsigned int num) 772 { 773 struct proc *p = curproc; 774 struct filedesc *fdp; 775 776 if (p->p_kq == NULL) { 777 p->p_kq = kqueue_alloc(p->p_fd); 778 p->p_kq_serial = arc4random(); 779 fdp = p->p_fd; 780 fdplock(fdp); 781 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 782 fdpunlock(fdp); 783 } 784 785 if (p->p_kq_serial + num < p->p_kq_serial) { 786 /* Serial is about to wrap. Clear all attached knotes. */ 787 kqueue_purge(p, p->p_kq); 788 p->p_kq_serial = 0; 789 } 790 } 791 792 /* 793 * Finish poll/select system call. 794 * num must have the same value that was used with kqpoll_init(). 795 */ 796 void 797 kqpoll_done(unsigned int num) 798 { 799 struct proc *p = curproc; 800 struct kqueue *kq = p->p_kq; 801 802 KASSERT(p->p_kq != NULL); 803 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 804 805 p->p_kq_serial += num; 806 807 /* 808 * Because of kn_pollid key, a thread can in principle allocate 809 * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 810 * with suitably varying pollfd arrays. 811 * Prevent such a large allocation by clearing knotes eagerly 812 * if there are too many of them. 813 * 814 * A small multiple of kq_knlistsize should give enough margin 815 * that eager clearing is infrequent, or does not happen at all, 816 * with normal programs. 817 * A single pollfd entry can use up to three knotes. 818 * Typically there is no significant overlap of fd and events 819 * between different entries in the pollfd array. 820 */ 821 if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 822 kqueue_purge(p, kq); 823 } 824 825 void 826 kqpoll_exit(void) 827 { 828 struct proc *p = curproc; 829 830 if (p->p_kq == NULL) 831 return; 832 833 kqueue_purge(p, p->p_kq); 834 kqueue_terminate(p, p->p_kq); 835 KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 836 KQRELE(p->p_kq); 837 p->p_kq = NULL; 838 } 839 840 struct kqueue * 841 kqueue_alloc(struct filedesc *fdp) 842 { 843 struct kqueue *kq; 844 845 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 846 refcnt_init(&kq->kq_refcnt); 847 kq->kq_fdp = fdp; 848 TAILQ_INIT(&kq->kq_head); 849 mtx_init(&kq->kq_lock, IPL_HIGH); 850 task_set(&kq->kq_task, kqueue_task, kq); 851 klist_init_mutex(&kq->kq_klist, &kqueue_klist_lock); 852 853 return (kq); 854 } 855 856 int 857 sys_kqueue(struct proc *p, void *v, register_t *retval) 858 { 859 struct filedesc *fdp = p->p_fd; 860 struct kqueue *kq; 861 struct file *fp; 862 int fd, error; 863 864 kq = kqueue_alloc(fdp); 865 866 fdplock(fdp); 867 error = falloc(p, &fp, &fd); 868 if (error) 869 goto out; 870 fp->f_flag = FREAD | FWRITE; 871 fp->f_type = DTYPE_KQUEUE; 872 fp->f_ops = &kqueueops; 873 fp->f_data = kq; 874 *retval = fd; 875 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 876 kq = NULL; 877 fdinsert(fdp, fd, 0, fp); 878 FRELE(fp, p); 879 out: 880 fdpunlock(fdp); 881 if (kq != NULL) 882 pool_put(&kqueue_pool, kq); 883 return (error); 884 } 885 886 int 887 sys_kevent(struct proc *p, void *v, register_t *retval) 888 { 889 struct kqueue_scan_state scan; 890 struct filedesc* fdp = p->p_fd; 891 struct sys_kevent_args /* { 892 syscallarg(int) fd; 893 syscallarg(const struct kevent *) changelist; 894 syscallarg(int) nchanges; 895 syscallarg(struct kevent *) eventlist; 896 syscallarg(int) nevents; 897 syscallarg(const struct timespec *) timeout; 898 } */ *uap = v; 899 struct kevent *kevp; 900 struct kqueue *kq; 901 struct file *fp; 902 struct timespec ts; 903 struct timespec *tsp = NULL; 904 int i, n, nerrors, error; 905 int ready, total; 906 struct kevent kev[KQ_NEVENTS]; 907 908 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 909 return (EBADF); 910 911 if (fp->f_type != DTYPE_KQUEUE) { 912 error = EBADF; 913 goto done; 914 } 915 916 if (SCARG(uap, timeout) != NULL) { 917 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 918 if (error) 919 goto done; 920 #ifdef KTRACE 921 if (KTRPOINT(p, KTR_STRUCT)) 922 ktrreltimespec(p, &ts); 923 #endif 924 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 925 error = EINVAL; 926 goto done; 927 } 928 tsp = &ts; 929 } 930 931 kq = fp->f_data; 932 nerrors = 0; 933 934 while ((n = SCARG(uap, nchanges)) > 0) { 935 if (n > nitems(kev)) 936 n = nitems(kev); 937 error = copyin(SCARG(uap, changelist), kev, 938 n * sizeof(struct kevent)); 939 if (error) 940 goto done; 941 #ifdef KTRACE 942 if (KTRPOINT(p, KTR_STRUCT)) 943 ktrevent(p, kev, n); 944 #endif 945 for (i = 0; i < n; i++) { 946 kevp = &kev[i]; 947 kevp->flags &= ~EV_SYSFLAGS; 948 error = kqueue_register(kq, kevp, 0, p); 949 if (error || (kevp->flags & EV_RECEIPT)) { 950 if (SCARG(uap, nevents) != 0) { 951 kevp->flags = EV_ERROR; 952 kevp->data = error; 953 copyout(kevp, SCARG(uap, eventlist), 954 sizeof(*kevp)); 955 SCARG(uap, eventlist)++; 956 SCARG(uap, nevents)--; 957 nerrors++; 958 } else { 959 goto done; 960 } 961 } 962 } 963 SCARG(uap, nchanges) -= n; 964 SCARG(uap, changelist) += n; 965 } 966 if (nerrors) { 967 *retval = nerrors; 968 error = 0; 969 goto done; 970 } 971 972 kqueue_scan_setup(&scan, kq); 973 FRELE(fp, p); 974 /* 975 * Collect as many events as we can. The timeout on successive 976 * loops is disabled (kqueue_scan() becomes non-blocking). 977 */ 978 total = 0; 979 error = 0; 980 while ((n = SCARG(uap, nevents) - total) > 0) { 981 if (n > nitems(kev)) 982 n = nitems(kev); 983 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 984 if (ready == 0) 985 break; 986 error = copyout(kev, SCARG(uap, eventlist) + total, 987 sizeof(struct kevent) * ready); 988 #ifdef KTRACE 989 if (KTRPOINT(p, KTR_STRUCT)) 990 ktrevent(p, kev, ready); 991 #endif 992 total += ready; 993 if (error || ready < n) 994 break; 995 } 996 kqueue_scan_finish(&scan); 997 *retval = total; 998 return (error); 999 1000 done: 1001 FRELE(fp, p); 1002 return (error); 1003 } 1004 1005 #ifdef KQUEUE_DEBUG 1006 void 1007 kqueue_do_check(struct kqueue *kq, const char *func, int line) 1008 { 1009 struct knote *kn; 1010 int count = 0, nmarker = 0; 1011 1012 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1013 1014 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1015 if (kn->kn_filter == EVFILT_MARKER) { 1016 if ((kn->kn_status & KN_QUEUED) != 0) 1017 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1018 func, line, kq, kn); 1019 nmarker++; 1020 } else { 1021 if ((kn->kn_status & KN_ACTIVE) == 0) 1022 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1023 func, line, kq, kn); 1024 if ((kn->kn_status & KN_QUEUED) == 0) 1025 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1026 func, line, kq, kn); 1027 if (kn->kn_kq != kq) 1028 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1029 func, line, kq, kn, kn->kn_kq); 1030 count++; 1031 if (count > kq->kq_count) 1032 goto bad; 1033 } 1034 } 1035 if (count != kq->kq_count) { 1036 bad: 1037 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1038 func, line, kq, kq->kq_count, count, nmarker); 1039 } 1040 } 1041 #endif 1042 1043 int 1044 kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1045 struct proc *p) 1046 { 1047 struct filedesc *fdp = kq->kq_fdp; 1048 const struct filterops *fops = NULL; 1049 struct file *fp = NULL; 1050 struct knote *kn = NULL, *newkn = NULL; 1051 struct knlist *list = NULL; 1052 int active, error = 0; 1053 1054 KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1055 1056 if (kev->filter < 0) { 1057 if (kev->filter + EVFILT_SYSCOUNT < 0) 1058 return (EINVAL); 1059 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1060 } 1061 1062 if (fops == NULL) { 1063 /* 1064 * XXX 1065 * filter attach routine is responsible for ensuring that 1066 * the identifier can be attached to it. 1067 */ 1068 return (EINVAL); 1069 } 1070 1071 if (fops->f_flags & FILTEROP_ISFD) { 1072 /* validate descriptor */ 1073 if (kev->ident > INT_MAX) 1074 return (EBADF); 1075 } 1076 1077 if (kev->flags & EV_ADD) 1078 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1079 1080 again: 1081 if (fops->f_flags & FILTEROP_ISFD) { 1082 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1083 error = EBADF; 1084 goto done; 1085 } 1086 mtx_enter(&kq->kq_lock); 1087 if (kev->flags & EV_ADD) 1088 kqueue_expand_list(kq, kev->ident); 1089 if (kev->ident < kq->kq_knlistsize) 1090 list = &kq->kq_knlist[kev->ident]; 1091 } else { 1092 mtx_enter(&kq->kq_lock); 1093 if (kev->flags & EV_ADD) 1094 kqueue_expand_hash(kq); 1095 if (kq->kq_knhashmask != 0) { 1096 list = &kq->kq_knhash[ 1097 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1098 } 1099 } 1100 if (list != NULL) { 1101 SLIST_FOREACH(kn, list, kn_link) { 1102 if (kev->filter == kn->kn_filter && 1103 kev->ident == kn->kn_id && 1104 pollid == kn->kn_pollid) { 1105 if (!knote_acquire(kn, NULL, 0)) { 1106 /* knote_acquire() has released 1107 * kq_lock. */ 1108 if (fp != NULL) { 1109 FRELE(fp, p); 1110 fp = NULL; 1111 } 1112 goto again; 1113 } 1114 break; 1115 } 1116 } 1117 } 1118 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1119 1120 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1121 mtx_leave(&kq->kq_lock); 1122 error = ENOENT; 1123 goto done; 1124 } 1125 1126 /* 1127 * kn now contains the matching knote, or NULL if no match. 1128 */ 1129 if (kev->flags & EV_ADD) { 1130 if (kn == NULL) { 1131 kn = newkn; 1132 newkn = NULL; 1133 kn->kn_status = KN_PROCESSING; 1134 kn->kn_fp = fp; 1135 kn->kn_kq = kq; 1136 kn->kn_fop = fops; 1137 1138 /* 1139 * apply reference count to knote structure, and 1140 * do not release it at the end of this routine. 1141 */ 1142 fp = NULL; 1143 1144 kn->kn_sfflags = kev->fflags; 1145 kn->kn_sdata = kev->data; 1146 kev->fflags = 0; 1147 kev->data = 0; 1148 kn->kn_kevent = *kev; 1149 kn->kn_pollid = pollid; 1150 1151 knote_attach(kn); 1152 mtx_leave(&kq->kq_lock); 1153 1154 error = filter_attach(kn); 1155 if (error != 0) { 1156 knote_drop(kn, p); 1157 goto done; 1158 } 1159 1160 /* 1161 * If this is a file descriptor filter, check if 1162 * fd was closed while the knote was being added. 1163 * knote_fdclose() has missed kn if the function 1164 * ran before kn appeared in kq_knlist. 1165 */ 1166 if ((fops->f_flags & FILTEROP_ISFD) && 1167 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1168 /* 1169 * Drop the knote silently without error 1170 * because another thread might already have 1171 * seen it. This corresponds to the insert 1172 * happening in full before the close. 1173 */ 1174 filter_detach(kn); 1175 knote_drop(kn, p); 1176 goto done; 1177 } 1178 1179 /* Check if there is a pending event. */ 1180 active = filter_process(kn, NULL); 1181 mtx_enter(&kq->kq_lock); 1182 if (active) 1183 knote_activate(kn); 1184 } else if (kn->kn_fop == &badfd_filtops) { 1185 /* 1186 * Nothing expects this badfd knote any longer. 1187 * Drop it to make room for the new knote and retry. 1188 */ 1189 KASSERT(kq == p->p_kq); 1190 mtx_leave(&kq->kq_lock); 1191 filter_detach(kn); 1192 knote_drop(kn, p); 1193 1194 KASSERT(fp != NULL); 1195 FRELE(fp, p); 1196 fp = NULL; 1197 1198 goto again; 1199 } else { 1200 /* 1201 * The user may change some filter values after the 1202 * initial EV_ADD, but doing so will not reset any 1203 * filters which have already been triggered. 1204 */ 1205 mtx_leave(&kq->kq_lock); 1206 active = filter_modify(kev, kn); 1207 mtx_enter(&kq->kq_lock); 1208 if (active) 1209 knote_activate(kn); 1210 if (kev->flags & EV_ERROR) { 1211 error = kev->data; 1212 goto release; 1213 } 1214 } 1215 } else if (kev->flags & EV_DELETE) { 1216 mtx_leave(&kq->kq_lock); 1217 filter_detach(kn); 1218 knote_drop(kn, p); 1219 goto done; 1220 } 1221 1222 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1223 kn->kn_status |= KN_DISABLED; 1224 1225 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1226 kn->kn_status &= ~KN_DISABLED; 1227 mtx_leave(&kq->kq_lock); 1228 /* Check if there is a pending event. */ 1229 active = filter_process(kn, NULL); 1230 mtx_enter(&kq->kq_lock); 1231 if (active) 1232 knote_activate(kn); 1233 } 1234 1235 release: 1236 knote_release(kn); 1237 mtx_leave(&kq->kq_lock); 1238 done: 1239 if (fp != NULL) 1240 FRELE(fp, p); 1241 if (newkn != NULL) 1242 pool_put(&knote_pool, newkn); 1243 return (error); 1244 } 1245 1246 int 1247 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1248 { 1249 struct timespec elapsed, start, stop; 1250 uint64_t nsecs; 1251 int error; 1252 1253 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1254 1255 if (tsp != NULL) { 1256 getnanouptime(&start); 1257 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1258 } else 1259 nsecs = INFSLP; 1260 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1261 "kqread", nsecs); 1262 if (tsp != NULL) { 1263 getnanouptime(&stop); 1264 timespecsub(&stop, &start, &elapsed); 1265 timespecsub(tsp, &elapsed, tsp); 1266 if (tsp->tv_sec < 0) 1267 timespecclear(tsp); 1268 } 1269 1270 return (error); 1271 } 1272 1273 /* 1274 * Scan the kqueue, blocking if necessary until the target time is reached. 1275 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1276 * 0 we do not block at all. 1277 */ 1278 int 1279 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1280 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1281 { 1282 struct kqueue *kq = scan->kqs_kq; 1283 struct knote *kn; 1284 int error = 0, nkev = 0; 1285 int reinserted; 1286 1287 if (maxevents == 0) 1288 goto done; 1289 retry: 1290 KASSERT(nkev == 0); 1291 1292 error = 0; 1293 reinserted = 0; 1294 1295 /* msleep() with PCATCH requires kernel lock. */ 1296 KERNEL_LOCK(); 1297 1298 mtx_enter(&kq->kq_lock); 1299 1300 if (kq->kq_state & KQ_DYING) { 1301 mtx_leave(&kq->kq_lock); 1302 KERNEL_UNLOCK(); 1303 error = EBADF; 1304 goto done; 1305 } 1306 1307 if (kq->kq_count == 0) { 1308 /* 1309 * Successive loops are only necessary if there are more 1310 * ready events to gather, so they don't need to block. 1311 */ 1312 if ((tsp != NULL && !timespecisset(tsp)) || 1313 scan->kqs_nevent != 0) { 1314 mtx_leave(&kq->kq_lock); 1315 KERNEL_UNLOCK(); 1316 error = 0; 1317 goto done; 1318 } 1319 kq->kq_state |= KQ_SLEEP; 1320 error = kqueue_sleep(kq, tsp); 1321 /* kqueue_sleep() has released kq_lock. */ 1322 KERNEL_UNLOCK(); 1323 if (error == 0 || error == EWOULDBLOCK) 1324 goto retry; 1325 /* don't restart after signals... */ 1326 if (error == ERESTART) 1327 error = EINTR; 1328 goto done; 1329 } 1330 1331 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1332 KERNEL_UNLOCK(); 1333 1334 /* 1335 * Put the end marker in the queue to limit the scan to the events 1336 * that are currently active. This prevents events from being 1337 * recollected if they reactivate during scan. 1338 * 1339 * If a partial scan has been performed already but no events have 1340 * been collected, reposition the end marker to make any new events 1341 * reachable. 1342 */ 1343 if (!scan->kqs_queued) { 1344 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1345 scan->kqs_queued = 1; 1346 } else if (scan->kqs_nevent == 0) { 1347 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1348 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1349 } 1350 1351 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1352 while (nkev < maxevents) { 1353 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1354 if (kn->kn_filter == EVFILT_MARKER) { 1355 if (kn == &scan->kqs_end) 1356 break; 1357 1358 /* Move start marker past another thread's marker. */ 1359 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1360 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1361 kn_tqe); 1362 continue; 1363 } 1364 1365 if (!knote_acquire(kn, NULL, 0)) { 1366 /* knote_acquire() has released kq_lock. */ 1367 mtx_enter(&kq->kq_lock); 1368 continue; 1369 } 1370 1371 kqueue_check(kq); 1372 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1373 kn->kn_status &= ~KN_QUEUED; 1374 kq->kq_count--; 1375 kqueue_check(kq); 1376 1377 if (kn->kn_status & KN_DISABLED) { 1378 knote_release(kn); 1379 continue; 1380 } 1381 1382 mtx_leave(&kq->kq_lock); 1383 1384 /* Drop expired kqpoll knotes. */ 1385 if (p->p_kq == kq && 1386 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1387 filter_detach(kn); 1388 knote_drop(kn, p); 1389 mtx_enter(&kq->kq_lock); 1390 continue; 1391 } 1392 1393 /* 1394 * Invalidate knotes whose vnodes have been revoked. 1395 * This is a workaround; it is tricky to clear existing 1396 * knotes and prevent new ones from being registered 1397 * with the current revocation mechanism. 1398 */ 1399 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1400 kn->kn_fp != NULL && 1401 kn->kn_fp->f_type == DTYPE_VNODE) { 1402 struct vnode *vp = kn->kn_fp->f_data; 1403 1404 if (__predict_false(vp->v_op == &dead_vops && 1405 kn->kn_fop != &dead_filtops)) { 1406 filter_detach(kn); 1407 kn->kn_fop = &dead_filtops; 1408 1409 /* 1410 * Check if the event should be delivered. 1411 * Use f_event directly because this is 1412 * a special situation. 1413 */ 1414 if (kn->kn_fop->f_event(kn, 0) == 0) { 1415 filter_detach(kn); 1416 knote_drop(kn, p); 1417 mtx_enter(&kq->kq_lock); 1418 continue; 1419 } 1420 } 1421 } 1422 1423 memset(kevp, 0, sizeof(*kevp)); 1424 if (filter_process(kn, kevp) == 0) { 1425 mtx_enter(&kq->kq_lock); 1426 if ((kn->kn_status & KN_QUEUED) == 0) 1427 kn->kn_status &= ~KN_ACTIVE; 1428 knote_release(kn); 1429 kqueue_check(kq); 1430 continue; 1431 } 1432 1433 /* 1434 * Post-event action on the note 1435 */ 1436 if (kevp->flags & EV_ONESHOT) { 1437 filter_detach(kn); 1438 knote_drop(kn, p); 1439 mtx_enter(&kq->kq_lock); 1440 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1441 mtx_enter(&kq->kq_lock); 1442 if (kevp->flags & EV_DISPATCH) 1443 kn->kn_status |= KN_DISABLED; 1444 if ((kn->kn_status & KN_QUEUED) == 0) 1445 kn->kn_status &= ~KN_ACTIVE; 1446 knote_release(kn); 1447 } else { 1448 mtx_enter(&kq->kq_lock); 1449 if ((kn->kn_status & KN_QUEUED) == 0) { 1450 kqueue_check(kq); 1451 kq->kq_count++; 1452 kn->kn_status |= KN_QUEUED; 1453 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1454 /* Wakeup is done after loop. */ 1455 reinserted = 1; 1456 } 1457 knote_release(kn); 1458 } 1459 kqueue_check(kq); 1460 1461 kevp++; 1462 nkev++; 1463 scan->kqs_nevent++; 1464 } 1465 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1466 if (reinserted && kq->kq_count != 0) 1467 kqueue_wakeup(kq); 1468 mtx_leave(&kq->kq_lock); 1469 if (scan->kqs_nevent == 0) 1470 goto retry; 1471 done: 1472 *errorp = error; 1473 return (nkev); 1474 } 1475 1476 void 1477 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1478 { 1479 memset(scan, 0, sizeof(*scan)); 1480 1481 KQREF(kq); 1482 scan->kqs_kq = kq; 1483 scan->kqs_start.kn_filter = EVFILT_MARKER; 1484 scan->kqs_start.kn_status = KN_PROCESSING; 1485 scan->kqs_end.kn_filter = EVFILT_MARKER; 1486 scan->kqs_end.kn_status = KN_PROCESSING; 1487 } 1488 1489 void 1490 kqueue_scan_finish(struct kqueue_scan_state *scan) 1491 { 1492 struct kqueue *kq = scan->kqs_kq; 1493 1494 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1495 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1496 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1497 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1498 1499 if (scan->kqs_queued) { 1500 scan->kqs_queued = 0; 1501 mtx_enter(&kq->kq_lock); 1502 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1503 mtx_leave(&kq->kq_lock); 1504 } 1505 KQRELE(kq); 1506 } 1507 1508 /* 1509 * XXX 1510 * This could be expanded to call kqueue_scan, if desired. 1511 */ 1512 int 1513 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1514 { 1515 return (ENXIO); 1516 } 1517 1518 int 1519 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1520 { 1521 return (ENXIO); 1522 } 1523 1524 int 1525 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1526 { 1527 return (ENOTTY); 1528 } 1529 1530 int 1531 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1532 { 1533 struct kqueue *kq = fp->f_data; 1534 1535 memset(st, 0, sizeof(*st)); 1536 st->st_size = kq->kq_count; /* unlocked read */ 1537 st->st_blksize = sizeof(struct kevent); 1538 st->st_mode = S_IFIFO; 1539 return (0); 1540 } 1541 1542 void 1543 kqueue_purge(struct proc *p, struct kqueue *kq) 1544 { 1545 int i; 1546 1547 mtx_enter(&kq->kq_lock); 1548 for (i = 0; i < kq->kq_knlistsize; i++) 1549 knote_remove(p, kq, &kq->kq_knlist, i, 1); 1550 if (kq->kq_knhashmask != 0) { 1551 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1552 knote_remove(p, kq, &kq->kq_knhash, i, 1); 1553 } 1554 mtx_leave(&kq->kq_lock); 1555 } 1556 1557 void 1558 kqueue_terminate(struct proc *p, struct kqueue *kq) 1559 { 1560 struct knote *kn; 1561 int state; 1562 1563 mtx_enter(&kq->kq_lock); 1564 1565 /* 1566 * Any remaining entries should be scan markers. 1567 * They are removed when the ongoing scans finish. 1568 */ 1569 KASSERT(kq->kq_count == 0); 1570 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1571 KASSERT(kn->kn_filter == EVFILT_MARKER); 1572 1573 kq->kq_state |= KQ_DYING; 1574 state = kq->kq_state; 1575 kqueue_wakeup(kq); 1576 mtx_leave(&kq->kq_lock); 1577 1578 /* 1579 * Any knotes that were attached to this kqueue were deleted 1580 * by knote_fdclose() when this kqueue's file descriptor was closed. 1581 */ 1582 KASSERT(klist_empty(&kq->kq_klist)); 1583 if (state & KQ_TASK) 1584 taskq_del_barrier(systqmp, &kq->kq_task); 1585 } 1586 1587 int 1588 kqueue_close(struct file *fp, struct proc *p) 1589 { 1590 struct kqueue *kq = fp->f_data; 1591 1592 fp->f_data = NULL; 1593 1594 kqueue_purge(p, kq); 1595 kqueue_terminate(p, kq); 1596 1597 KQRELE(kq); 1598 1599 return (0); 1600 } 1601 1602 static void 1603 kqueue_task(void *arg) 1604 { 1605 struct kqueue *kq = arg; 1606 1607 mtx_enter(&kqueue_klist_lock); 1608 KNOTE(&kq->kq_klist, 0); 1609 mtx_leave(&kqueue_klist_lock); 1610 } 1611 1612 void 1613 kqueue_wakeup(struct kqueue *kq) 1614 { 1615 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1616 1617 if (kq->kq_state & KQ_SLEEP) { 1618 kq->kq_state &= ~KQ_SLEEP; 1619 wakeup(kq); 1620 } 1621 if (!klist_empty(&kq->kq_klist)) { 1622 /* Defer activation to avoid recursion. */ 1623 kq->kq_state |= KQ_TASK; 1624 task_add(systqmp, &kq->kq_task); 1625 } 1626 } 1627 1628 static void 1629 kqueue_expand_hash(struct kqueue *kq) 1630 { 1631 struct knlist *hash; 1632 u_long hashmask; 1633 1634 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1635 1636 if (kq->kq_knhashmask == 0) { 1637 mtx_leave(&kq->kq_lock); 1638 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1639 mtx_enter(&kq->kq_lock); 1640 if (kq->kq_knhashmask == 0) { 1641 kq->kq_knhash = hash; 1642 kq->kq_knhashmask = hashmask; 1643 } else { 1644 /* Another thread has allocated the hash. */ 1645 mtx_leave(&kq->kq_lock); 1646 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1647 mtx_enter(&kq->kq_lock); 1648 } 1649 } 1650 } 1651 1652 static void 1653 kqueue_expand_list(struct kqueue *kq, int fd) 1654 { 1655 struct knlist *list, *olist; 1656 int size, osize; 1657 1658 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1659 1660 if (kq->kq_knlistsize <= fd) { 1661 size = kq->kq_knlistsize; 1662 mtx_leave(&kq->kq_lock); 1663 while (size <= fd) 1664 size += KQEXTENT; 1665 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1666 mtx_enter(&kq->kq_lock); 1667 if (kq->kq_knlistsize <= fd) { 1668 memcpy(list, kq->kq_knlist, 1669 kq->kq_knlistsize * sizeof(*list)); 1670 memset(&list[kq->kq_knlistsize], 0, 1671 (size - kq->kq_knlistsize) * sizeof(*list)); 1672 olist = kq->kq_knlist; 1673 osize = kq->kq_knlistsize; 1674 kq->kq_knlist = list; 1675 kq->kq_knlistsize = size; 1676 mtx_leave(&kq->kq_lock); 1677 free(olist, M_KEVENT, osize * sizeof(*list)); 1678 mtx_enter(&kq->kq_lock); 1679 } else { 1680 /* Another thread has expanded the list. */ 1681 mtx_leave(&kq->kq_lock); 1682 free(list, M_KEVENT, size * sizeof(*list)); 1683 mtx_enter(&kq->kq_lock); 1684 } 1685 } 1686 } 1687 1688 /* 1689 * Acquire a knote, return non-zero on success, 0 on failure. 1690 * 1691 * If we cannot acquire the knote we sleep and return 0. The knote 1692 * may be stale on return in this case and the caller must restart 1693 * whatever loop they are in. 1694 * 1695 * If we are about to sleep and klist is non-NULL, the list is unlocked 1696 * before sleep and remains unlocked on return. 1697 */ 1698 int 1699 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1700 { 1701 struct kqueue *kq = kn->kn_kq; 1702 1703 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1704 KASSERT(kn->kn_filter != EVFILT_MARKER); 1705 1706 if (kn->kn_status & KN_PROCESSING) { 1707 kn->kn_status |= KN_WAITING; 1708 if (klist != NULL) { 1709 mtx_leave(&kq->kq_lock); 1710 klist_unlock(klist, ls); 1711 /* XXX Timeout resolves potential loss of wakeup. */ 1712 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1713 } else { 1714 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1715 SEC_TO_NSEC(1)); 1716 } 1717 /* knote may be stale now */ 1718 return (0); 1719 } 1720 kn->kn_status |= KN_PROCESSING; 1721 return (1); 1722 } 1723 1724 /* 1725 * Release an acquired knote, clearing KN_PROCESSING. 1726 */ 1727 void 1728 knote_release(struct knote *kn) 1729 { 1730 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1731 KASSERT(kn->kn_filter != EVFILT_MARKER); 1732 KASSERT(kn->kn_status & KN_PROCESSING); 1733 1734 if (kn->kn_status & KN_WAITING) { 1735 kn->kn_status &= ~KN_WAITING; 1736 wakeup(kn); 1737 } 1738 kn->kn_status &= ~KN_PROCESSING; 1739 /* kn should not be accessed anymore */ 1740 } 1741 1742 /* 1743 * activate one knote. 1744 */ 1745 void 1746 knote_activate(struct knote *kn) 1747 { 1748 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1749 1750 kn->kn_status |= KN_ACTIVE; 1751 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1752 knote_enqueue(kn); 1753 } 1754 1755 /* 1756 * walk down a list of knotes, activating them if their event has triggered. 1757 */ 1758 void 1759 knote(struct klist *list, long hint) 1760 { 1761 struct knote *kn, *kn0; 1762 struct kqueue *kq; 1763 1764 KLIST_ASSERT_LOCKED(list); 1765 1766 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1767 if (filter_event(kn, hint)) { 1768 kq = kn->kn_kq; 1769 mtx_enter(&kq->kq_lock); 1770 knote_activate(kn); 1771 mtx_leave(&kq->kq_lock); 1772 } 1773 } 1774 } 1775 1776 /* 1777 * remove all knotes from a specified knlist 1778 */ 1779 void 1780 knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 1781 int purge) 1782 { 1783 struct knote *kn; 1784 1785 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1786 1787 /* Always fetch array pointer as another thread can resize kq_knlist. */ 1788 while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 1789 KASSERT(kn->kn_kq == kq); 1790 1791 if (!purge) { 1792 /* Skip pending badfd knotes. */ 1793 while (kn->kn_fop == &badfd_filtops) { 1794 kn = SLIST_NEXT(kn, kn_link); 1795 if (kn == NULL) 1796 return; 1797 KASSERT(kn->kn_kq == kq); 1798 } 1799 } 1800 1801 if (!knote_acquire(kn, NULL, 0)) { 1802 /* knote_acquire() has released kq_lock. */ 1803 mtx_enter(&kq->kq_lock); 1804 continue; 1805 } 1806 mtx_leave(&kq->kq_lock); 1807 filter_detach(kn); 1808 1809 /* 1810 * Notify poll(2) and select(2) when a monitored 1811 * file descriptor is closed. 1812 * 1813 * This reuses the original knote for delivering the 1814 * notification so as to avoid allocating memory. 1815 */ 1816 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1817 !(p->p_kq == kq && 1818 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1819 kn->kn_fop != &badfd_filtops) { 1820 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1821 FRELE(kn->kn_fp, p); 1822 kn->kn_fp = NULL; 1823 1824 kn->kn_fop = &badfd_filtops; 1825 filter_event(kn, 0); 1826 mtx_enter(&kq->kq_lock); 1827 knote_activate(kn); 1828 knote_release(kn); 1829 continue; 1830 } 1831 1832 knote_drop(kn, p); 1833 mtx_enter(&kq->kq_lock); 1834 } 1835 } 1836 1837 /* 1838 * remove all knotes referencing a specified fd 1839 */ 1840 void 1841 knote_fdclose(struct proc *p, int fd) 1842 { 1843 struct filedesc *fdp = p->p_p->ps_fd; 1844 struct kqueue *kq; 1845 1846 /* 1847 * fdplock can be ignored if the file descriptor table is being freed 1848 * because no other thread can access the fdp. 1849 */ 1850 if (fdp->fd_refcnt != 0) 1851 fdpassertlocked(fdp); 1852 1853 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1854 mtx_enter(&kq->kq_lock); 1855 if (fd < kq->kq_knlistsize) 1856 knote_remove(p, kq, &kq->kq_knlist, fd, 0); 1857 mtx_leave(&kq->kq_lock); 1858 } 1859 } 1860 1861 /* 1862 * handle a process exiting, including the triggering of NOTE_EXIT notes 1863 * XXX this could be more efficient, doing a single pass down the klist 1864 */ 1865 void 1866 knote_processexit(struct process *pr) 1867 { 1868 KERNEL_ASSERT_LOCKED(); 1869 1870 KNOTE(&pr->ps_klist, NOTE_EXIT); 1871 1872 /* remove other knotes hanging off the process */ 1873 klist_invalidate(&pr->ps_klist); 1874 } 1875 1876 void 1877 knote_attach(struct knote *kn) 1878 { 1879 struct kqueue *kq = kn->kn_kq; 1880 struct knlist *list; 1881 1882 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1883 KASSERT(kn->kn_status & KN_PROCESSING); 1884 1885 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1886 KASSERT(kq->kq_knlistsize > kn->kn_id); 1887 list = &kq->kq_knlist[kn->kn_id]; 1888 } else { 1889 KASSERT(kq->kq_knhashmask != 0); 1890 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1891 } 1892 SLIST_INSERT_HEAD(list, kn, kn_link); 1893 kq->kq_nknotes++; 1894 } 1895 1896 void 1897 knote_detach(struct knote *kn) 1898 { 1899 struct kqueue *kq = kn->kn_kq; 1900 struct knlist *list; 1901 1902 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1903 KASSERT(kn->kn_status & KN_PROCESSING); 1904 1905 kq->kq_nknotes--; 1906 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1907 list = &kq->kq_knlist[kn->kn_id]; 1908 else 1909 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1910 SLIST_REMOVE(list, kn, knote, kn_link); 1911 } 1912 1913 /* 1914 * should be called at spl == 0, since we don't want to hold spl 1915 * while calling FRELE and pool_put. 1916 */ 1917 void 1918 knote_drop(struct knote *kn, struct proc *p) 1919 { 1920 struct kqueue *kq = kn->kn_kq; 1921 1922 KASSERT(kn->kn_filter != EVFILT_MARKER); 1923 1924 mtx_enter(&kq->kq_lock); 1925 knote_detach(kn); 1926 if (kn->kn_status & KN_QUEUED) 1927 knote_dequeue(kn); 1928 if (kn->kn_status & KN_WAITING) { 1929 kn->kn_status &= ~KN_WAITING; 1930 wakeup(kn); 1931 } 1932 mtx_leave(&kq->kq_lock); 1933 1934 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1935 FRELE(kn->kn_fp, p); 1936 pool_put(&knote_pool, kn); 1937 } 1938 1939 1940 void 1941 knote_enqueue(struct knote *kn) 1942 { 1943 struct kqueue *kq = kn->kn_kq; 1944 1945 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1946 KASSERT(kn->kn_filter != EVFILT_MARKER); 1947 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1948 1949 kqueue_check(kq); 1950 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1951 kn->kn_status |= KN_QUEUED; 1952 kq->kq_count++; 1953 kqueue_check(kq); 1954 kqueue_wakeup(kq); 1955 } 1956 1957 void 1958 knote_dequeue(struct knote *kn) 1959 { 1960 struct kqueue *kq = kn->kn_kq; 1961 1962 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1963 KASSERT(kn->kn_filter != EVFILT_MARKER); 1964 KASSERT(kn->kn_status & KN_QUEUED); 1965 1966 kqueue_check(kq); 1967 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1968 kn->kn_status &= ~KN_QUEUED; 1969 kq->kq_count--; 1970 kqueue_check(kq); 1971 } 1972 1973 /* 1974 * Assign parameters to the knote. 1975 * 1976 * The knote's object lock must be held. 1977 */ 1978 void 1979 knote_assign(const struct kevent *kev, struct knote *kn) 1980 { 1981 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1982 KERNEL_ASSERT_LOCKED(); 1983 1984 kn->kn_sfflags = kev->fflags; 1985 kn->kn_sdata = kev->data; 1986 kn->kn_udata = kev->udata; 1987 } 1988 1989 /* 1990 * Submit the knote's event for delivery. 1991 * 1992 * The knote's object lock must be held. 1993 */ 1994 void 1995 knote_submit(struct knote *kn, struct kevent *kev) 1996 { 1997 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1998 KERNEL_ASSERT_LOCKED(); 1999 2000 if (kev != NULL) { 2001 *kev = kn->kn_kevent; 2002 if (kn->kn_flags & EV_CLEAR) { 2003 kn->kn_fflags = 0; 2004 kn->kn_data = 0; 2005 } 2006 } 2007 } 2008 2009 void 2010 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 2011 { 2012 SLIST_INIT(&klist->kl_list); 2013 klist->kl_ops = ops; 2014 klist->kl_arg = arg; 2015 } 2016 2017 void 2018 klist_free(struct klist *klist) 2019 { 2020 KASSERT(SLIST_EMPTY(&klist->kl_list)); 2021 } 2022 2023 void 2024 klist_insert(struct klist *klist, struct knote *kn) 2025 { 2026 int ls; 2027 2028 ls = klist_lock(klist); 2029 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2030 klist_unlock(klist, ls); 2031 } 2032 2033 void 2034 klist_insert_locked(struct klist *klist, struct knote *kn) 2035 { 2036 KLIST_ASSERT_LOCKED(klist); 2037 2038 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2039 } 2040 2041 void 2042 klist_remove(struct klist *klist, struct knote *kn) 2043 { 2044 int ls; 2045 2046 ls = klist_lock(klist); 2047 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2048 klist_unlock(klist, ls); 2049 } 2050 2051 void 2052 klist_remove_locked(struct klist *klist, struct knote *kn) 2053 { 2054 KLIST_ASSERT_LOCKED(klist); 2055 2056 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2057 } 2058 2059 /* 2060 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2061 * 2062 * The caller of this function must not hold any locks that can block 2063 * filterops callbacks that run with KN_PROCESSING. 2064 * Otherwise this function might deadlock. 2065 */ 2066 void 2067 klist_invalidate(struct klist *list) 2068 { 2069 struct knote *kn; 2070 struct kqueue *kq; 2071 struct proc *p = curproc; 2072 int ls; 2073 2074 NET_ASSERT_UNLOCKED(); 2075 2076 ls = klist_lock(list); 2077 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2078 kq = kn->kn_kq; 2079 mtx_enter(&kq->kq_lock); 2080 if (!knote_acquire(kn, list, ls)) { 2081 /* knote_acquire() has released kq_lock 2082 * and klist lock. */ 2083 ls = klist_lock(list); 2084 continue; 2085 } 2086 mtx_leave(&kq->kq_lock); 2087 klist_unlock(list, ls); 2088 filter_detach(kn); 2089 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2090 kn->kn_fop = &dead_filtops; 2091 filter_event(kn, 0); 2092 mtx_enter(&kq->kq_lock); 2093 knote_activate(kn); 2094 knote_release(kn); 2095 mtx_leave(&kq->kq_lock); 2096 } else { 2097 knote_drop(kn, p); 2098 } 2099 ls = klist_lock(list); 2100 } 2101 klist_unlock(list, ls); 2102 } 2103 2104 static int 2105 klist_lock(struct klist *list) 2106 { 2107 int ls = 0; 2108 2109 if (list->kl_ops != NULL) { 2110 ls = list->kl_ops->klo_lock(list->kl_arg); 2111 } else { 2112 KERNEL_LOCK(); 2113 ls = splhigh(); 2114 } 2115 return ls; 2116 } 2117 2118 static void 2119 klist_unlock(struct klist *list, int ls) 2120 { 2121 if (list->kl_ops != NULL) { 2122 list->kl_ops->klo_unlock(list->kl_arg, ls); 2123 } else { 2124 splx(ls); 2125 KERNEL_UNLOCK(); 2126 } 2127 } 2128 2129 static void 2130 klist_mutex_assertlk(void *arg) 2131 { 2132 struct mutex *mtx = arg; 2133 2134 (void)mtx; 2135 2136 MUTEX_ASSERT_LOCKED(mtx); 2137 } 2138 2139 static int 2140 klist_mutex_lock(void *arg) 2141 { 2142 struct mutex *mtx = arg; 2143 2144 mtx_enter(mtx); 2145 return 0; 2146 } 2147 2148 static void 2149 klist_mutex_unlock(void *arg, int s) 2150 { 2151 struct mutex *mtx = arg; 2152 2153 mtx_leave(mtx); 2154 } 2155 2156 static const struct klistops mutex_klistops = { 2157 .klo_assertlk = klist_mutex_assertlk, 2158 .klo_lock = klist_mutex_lock, 2159 .klo_unlock = klist_mutex_unlock, 2160 }; 2161 2162 void 2163 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2164 { 2165 klist_init(klist, &mutex_klistops, mtx); 2166 } 2167 2168 static void 2169 klist_rwlock_assertlk(void *arg) 2170 { 2171 struct rwlock *rwl = arg; 2172 2173 (void)rwl; 2174 2175 rw_assert_wrlock(rwl); 2176 } 2177 2178 static int 2179 klist_rwlock_lock(void *arg) 2180 { 2181 struct rwlock *rwl = arg; 2182 2183 rw_enter_write(rwl); 2184 return 0; 2185 } 2186 2187 static void 2188 klist_rwlock_unlock(void *arg, int s) 2189 { 2190 struct rwlock *rwl = arg; 2191 2192 rw_exit_write(rwl); 2193 } 2194 2195 static const struct klistops rwlock_klistops = { 2196 .klo_assertlk = klist_rwlock_assertlk, 2197 .klo_lock = klist_rwlock_lock, 2198 .klo_unlock = klist_rwlock_unlock, 2199 }; 2200 2201 void 2202 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2203 { 2204 klist_init(klist, &rwlock_klistops, rwl); 2205 } 2206