1 /* $OpenBSD: kern_event.c,v 1.169 2021/07/24 09:16:51 mpi Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void KQREF(struct kqueue *); 74 void KQRELE(struct kqueue *); 75 76 int kqueue_sleep(struct kqueue *, struct timespec *); 77 78 int kqueue_read(struct file *, struct uio *, int); 79 int kqueue_write(struct file *, struct uio *, int); 80 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 81 struct proc *p); 82 int kqueue_poll(struct file *fp, int events, struct proc *p); 83 int kqueue_kqfilter(struct file *fp, struct knote *kn); 84 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 85 int kqueue_close(struct file *fp, struct proc *p); 86 void kqueue_wakeup(struct kqueue *kq); 87 88 #ifdef KQUEUE_DEBUG 89 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 90 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 91 #else 92 #define kqueue_check(kq) do {} while (0) 93 #endif 94 95 void kqpoll_dequeue(struct proc *p, int all); 96 97 static int filter_attach(struct knote *kn); 98 static void filter_detach(struct knote *kn); 99 static int filter_event(struct knote *kn, long hint); 100 static int filter_modify(struct kevent *kev, struct knote *kn); 101 static int filter_process(struct knote *kn, struct kevent *kev); 102 static void kqueue_expand_hash(struct kqueue *kq); 103 static void kqueue_expand_list(struct kqueue *kq, int fd); 104 static void kqueue_task(void *); 105 static int klist_lock(struct klist *); 106 static void klist_unlock(struct klist *, int); 107 108 const struct fileops kqueueops = { 109 .fo_read = kqueue_read, 110 .fo_write = kqueue_write, 111 .fo_ioctl = kqueue_ioctl, 112 .fo_poll = kqueue_poll, 113 .fo_kqfilter = kqueue_kqfilter, 114 .fo_stat = kqueue_stat, 115 .fo_close = kqueue_close 116 }; 117 118 void knote_attach(struct knote *kn); 119 void knote_detach(struct knote *kn); 120 void knote_drop(struct knote *kn, struct proc *p); 121 void knote_enqueue(struct knote *kn); 122 void knote_dequeue(struct knote *kn); 123 int knote_acquire(struct knote *kn, struct klist *, int); 124 void knote_release(struct knote *kn); 125 void knote_activate(struct knote *kn); 126 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, 127 int purge); 128 129 void filt_kqdetach(struct knote *kn); 130 int filt_kqueue(struct knote *kn, long hint); 131 int filt_procattach(struct knote *kn); 132 void filt_procdetach(struct knote *kn); 133 int filt_proc(struct knote *kn, long hint); 134 int filt_fileattach(struct knote *kn); 135 void filt_timerexpire(void *knx); 136 int filt_timerattach(struct knote *kn); 137 void filt_timerdetach(struct knote *kn); 138 int filt_timermodify(struct kevent *kev, struct knote *kn); 139 int filt_timerprocess(struct knote *kn, struct kevent *kev); 140 void filt_seltruedetach(struct knote *kn); 141 142 const struct filterops kqread_filtops = { 143 .f_flags = FILTEROP_ISFD, 144 .f_attach = NULL, 145 .f_detach = filt_kqdetach, 146 .f_event = filt_kqueue, 147 }; 148 149 const struct filterops proc_filtops = { 150 .f_flags = 0, 151 .f_attach = filt_procattach, 152 .f_detach = filt_procdetach, 153 .f_event = filt_proc, 154 }; 155 156 const struct filterops file_filtops = { 157 .f_flags = FILTEROP_ISFD, 158 .f_attach = filt_fileattach, 159 .f_detach = NULL, 160 .f_event = NULL, 161 }; 162 163 const struct filterops timer_filtops = { 164 .f_flags = 0, 165 .f_attach = filt_timerattach, 166 .f_detach = filt_timerdetach, 167 .f_event = NULL, 168 .f_modify = filt_timermodify, 169 .f_process = filt_timerprocess, 170 }; 171 172 struct pool knote_pool; 173 struct pool kqueue_pool; 174 int kq_ntimeouts = 0; 175 int kq_timeoutmax = (4 * 1024); 176 177 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 178 179 /* 180 * Table for for all system-defined filters. 181 */ 182 const struct filterops *const sysfilt_ops[] = { 183 &file_filtops, /* EVFILT_READ */ 184 &file_filtops, /* EVFILT_WRITE */ 185 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 186 &file_filtops, /* EVFILT_VNODE */ 187 &proc_filtops, /* EVFILT_PROC */ 188 &sig_filtops, /* EVFILT_SIGNAL */ 189 &timer_filtops, /* EVFILT_TIMER */ 190 &file_filtops, /* EVFILT_DEVICE */ 191 &file_filtops, /* EVFILT_EXCEPT */ 192 }; 193 194 void 195 KQREF(struct kqueue *kq) 196 { 197 atomic_inc_int(&kq->kq_refs); 198 } 199 200 void 201 KQRELE(struct kqueue *kq) 202 { 203 struct filedesc *fdp; 204 205 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 206 return; 207 208 fdp = kq->kq_fdp; 209 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 210 LIST_REMOVE(kq, kq_next); 211 } else { 212 fdplock(fdp); 213 LIST_REMOVE(kq, kq_next); 214 fdpunlock(fdp); 215 } 216 217 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 218 219 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 220 sizeof(struct knlist)); 221 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 222 pool_put(&kqueue_pool, kq); 223 } 224 225 void 226 kqueue_init(void) 227 { 228 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 229 PR_WAITOK, "kqueuepl", NULL); 230 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 231 PR_WAITOK, "knotepl", NULL); 232 } 233 234 void 235 kqueue_init_percpu(void) 236 { 237 pool_cache_init(&knote_pool); 238 } 239 240 int 241 filt_fileattach(struct knote *kn) 242 { 243 struct file *fp = kn->kn_fp; 244 245 return fp->f_ops->fo_kqfilter(fp, kn); 246 } 247 248 int 249 kqueue_kqfilter(struct file *fp, struct knote *kn) 250 { 251 struct kqueue *kq = kn->kn_fp->f_data; 252 253 if (kn->kn_filter != EVFILT_READ) 254 return (EINVAL); 255 256 kn->kn_fop = &kqread_filtops; 257 klist_insert_locked(&kq->kq_sel.si_note, kn); 258 return (0); 259 } 260 261 void 262 filt_kqdetach(struct knote *kn) 263 { 264 struct kqueue *kq = kn->kn_fp->f_data; 265 266 klist_remove_locked(&kq->kq_sel.si_note, kn); 267 } 268 269 int 270 filt_kqueue(struct knote *kn, long hint) 271 { 272 struct kqueue *kq = kn->kn_fp->f_data; 273 274 mtx_enter(&kq->kq_lock); 275 kn->kn_data = kq->kq_count; 276 mtx_leave(&kq->kq_lock); 277 return (kn->kn_data > 0); 278 } 279 280 int 281 filt_procattach(struct knote *kn) 282 { 283 struct process *pr; 284 int s; 285 286 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 287 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 288 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 289 290 if (kn->kn_id > PID_MAX) 291 return ESRCH; 292 293 pr = prfind(kn->kn_id); 294 if (pr == NULL) 295 return (ESRCH); 296 297 /* exiting processes can't be specified */ 298 if (pr->ps_flags & PS_EXITING) 299 return (ESRCH); 300 301 kn->kn_ptr.p_process = pr; 302 kn->kn_flags |= EV_CLEAR; /* automatically set */ 303 304 /* 305 * internal flag indicating registration done by kernel 306 */ 307 if (kn->kn_flags & EV_FLAG1) { 308 kn->kn_data = kn->kn_sdata; /* ppid */ 309 kn->kn_fflags = NOTE_CHILD; 310 kn->kn_flags &= ~EV_FLAG1; 311 } 312 313 s = splhigh(); 314 klist_insert_locked(&pr->ps_klist, kn); 315 splx(s); 316 317 return (0); 318 } 319 320 /* 321 * The knote may be attached to a different process, which may exit, 322 * leaving nothing for the knote to be attached to. So when the process 323 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 324 * it will be deleted when read out. However, as part of the knote deletion, 325 * this routine is called, so a check is needed to avoid actually performing 326 * a detach, because the original process does not exist any more. 327 */ 328 void 329 filt_procdetach(struct knote *kn) 330 { 331 struct kqueue *kq = kn->kn_kq; 332 struct process *pr = kn->kn_ptr.p_process; 333 int s, status; 334 335 mtx_enter(&kq->kq_lock); 336 status = kn->kn_status; 337 mtx_leave(&kq->kq_lock); 338 339 if (status & KN_DETACHED) 340 return; 341 342 s = splhigh(); 343 klist_remove_locked(&pr->ps_klist, kn); 344 splx(s); 345 } 346 347 int 348 filt_proc(struct knote *kn, long hint) 349 { 350 struct kqueue *kq = kn->kn_kq; 351 u_int event; 352 353 /* 354 * mask off extra data 355 */ 356 event = (u_int)hint & NOTE_PCTRLMASK; 357 358 /* 359 * if the user is interested in this event, record it. 360 */ 361 if (kn->kn_sfflags & event) 362 kn->kn_fflags |= event; 363 364 /* 365 * process is gone, so flag the event as finished and remove it 366 * from the process's klist 367 */ 368 if (event == NOTE_EXIT) { 369 struct process *pr = kn->kn_ptr.p_process; 370 int s; 371 372 mtx_enter(&kq->kq_lock); 373 kn->kn_status |= KN_DETACHED; 374 mtx_leave(&kq->kq_lock); 375 376 s = splhigh(); 377 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 378 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 379 klist_remove_locked(&pr->ps_klist, kn); 380 splx(s); 381 return (1); 382 } 383 384 /* 385 * process forked, and user wants to track the new process, 386 * so attach a new knote to it, and immediately report an 387 * event with the parent's pid. 388 */ 389 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 390 struct kevent kev; 391 int error; 392 393 /* 394 * register knote with new process. 395 */ 396 memset(&kev, 0, sizeof(kev)); 397 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 398 kev.filter = kn->kn_filter; 399 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 400 kev.fflags = kn->kn_sfflags; 401 kev.data = kn->kn_id; /* parent */ 402 kev.udata = kn->kn_udata; /* preserve udata */ 403 error = kqueue_register(kq, &kev, NULL); 404 if (error) 405 kn->kn_fflags |= NOTE_TRACKERR; 406 } 407 408 return (kn->kn_fflags != 0); 409 } 410 411 static void 412 filt_timer_timeout_add(struct knote *kn) 413 { 414 struct timeval tv; 415 struct timeout *to = kn->kn_hook; 416 int tticks; 417 418 tv.tv_sec = kn->kn_sdata / 1000; 419 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 420 tticks = tvtohz(&tv); 421 /* Remove extra tick from tvtohz() if timeout has fired before. */ 422 if (timeout_triggered(to)) 423 tticks--; 424 timeout_add(to, (tticks > 0) ? tticks : 1); 425 } 426 427 void 428 filt_timerexpire(void *knx) 429 { 430 struct knote *kn = knx; 431 struct kqueue *kq = kn->kn_kq; 432 433 kn->kn_data++; 434 mtx_enter(&kq->kq_lock); 435 knote_activate(kn); 436 mtx_leave(&kq->kq_lock); 437 438 if ((kn->kn_flags & EV_ONESHOT) == 0) 439 filt_timer_timeout_add(kn); 440 } 441 442 443 /* 444 * data contains amount of time to sleep, in milliseconds 445 */ 446 int 447 filt_timerattach(struct knote *kn) 448 { 449 struct timeout *to; 450 451 if (kq_ntimeouts > kq_timeoutmax) 452 return (ENOMEM); 453 kq_ntimeouts++; 454 455 kn->kn_flags |= EV_CLEAR; /* automatically set */ 456 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 457 timeout_set(to, filt_timerexpire, kn); 458 kn->kn_hook = to; 459 filt_timer_timeout_add(kn); 460 461 return (0); 462 } 463 464 void 465 filt_timerdetach(struct knote *kn) 466 { 467 struct timeout *to; 468 469 to = (struct timeout *)kn->kn_hook; 470 timeout_del_barrier(to); 471 free(to, M_KEVENT, sizeof(*to)); 472 kq_ntimeouts--; 473 } 474 475 int 476 filt_timermodify(struct kevent *kev, struct knote *kn) 477 { 478 struct kqueue *kq = kn->kn_kq; 479 struct timeout *to = kn->kn_hook; 480 481 /* Reset the timer. Any pending events are discarded. */ 482 483 timeout_del_barrier(to); 484 485 mtx_enter(&kq->kq_lock); 486 if (kn->kn_status & KN_QUEUED) 487 knote_dequeue(kn); 488 kn->kn_status &= ~KN_ACTIVE; 489 mtx_leave(&kq->kq_lock); 490 491 kn->kn_data = 0; 492 knote_modify(kev, kn); 493 /* Reinit timeout to invoke tick adjustment again. */ 494 timeout_set(to, filt_timerexpire, kn); 495 filt_timer_timeout_add(kn); 496 497 return (0); 498 } 499 500 int 501 filt_timerprocess(struct knote *kn, struct kevent *kev) 502 { 503 int active, s; 504 505 s = splsoftclock(); 506 active = (kn->kn_data != 0); 507 if (active) 508 knote_submit(kn, kev); 509 splx(s); 510 511 return (active); 512 } 513 514 515 /* 516 * filt_seltrue: 517 * 518 * This filter "event" routine simulates seltrue(). 519 */ 520 int 521 filt_seltrue(struct knote *kn, long hint) 522 { 523 524 /* 525 * We don't know how much data can be read/written, 526 * but we know that it *can* be. This is about as 527 * good as select/poll does as well. 528 */ 529 kn->kn_data = 0; 530 return (1); 531 } 532 533 int 534 filt_seltruemodify(struct kevent *kev, struct knote *kn) 535 { 536 knote_modify(kev, kn); 537 return (1); 538 } 539 540 int 541 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 542 { 543 knote_submit(kn, kev); 544 return (1); 545 } 546 547 /* 548 * This provides full kqfilter entry for device switch tables, which 549 * has same effect as filter using filt_seltrue() as filter method. 550 */ 551 void 552 filt_seltruedetach(struct knote *kn) 553 { 554 /* Nothing to do */ 555 } 556 557 const struct filterops seltrue_filtops = { 558 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 559 .f_attach = NULL, 560 .f_detach = filt_seltruedetach, 561 .f_event = filt_seltrue, 562 .f_modify = filt_seltruemodify, 563 .f_process = filt_seltrueprocess, 564 }; 565 566 int 567 seltrue_kqfilter(dev_t dev, struct knote *kn) 568 { 569 switch (kn->kn_filter) { 570 case EVFILT_READ: 571 case EVFILT_WRITE: 572 kn->kn_fop = &seltrue_filtops; 573 break; 574 default: 575 return (EINVAL); 576 } 577 578 /* Nothing more to do */ 579 return (0); 580 } 581 582 static int 583 filt_dead(struct knote *kn, long hint) 584 { 585 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 586 if (kn->kn_flags & __EV_POLL) 587 kn->kn_flags |= __EV_HUP; 588 kn->kn_data = 0; 589 return (1); 590 } 591 592 static void 593 filt_deaddetach(struct knote *kn) 594 { 595 /* Nothing to do */ 596 } 597 598 const struct filterops dead_filtops = { 599 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 600 .f_attach = NULL, 601 .f_detach = filt_deaddetach, 602 .f_event = filt_dead, 603 .f_modify = filt_seltruemodify, 604 .f_process = filt_seltrueprocess, 605 }; 606 607 static int 608 filt_badfd(struct knote *kn, long hint) 609 { 610 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 611 kn->kn_data = EBADF; 612 return (1); 613 } 614 615 /* For use with kqpoll. */ 616 const struct filterops badfd_filtops = { 617 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 618 .f_attach = NULL, 619 .f_detach = filt_deaddetach, 620 .f_event = filt_badfd, 621 .f_modify = filt_seltruemodify, 622 .f_process = filt_seltrueprocess, 623 }; 624 625 static int 626 filter_attach(struct knote *kn) 627 { 628 int error; 629 630 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 631 error = kn->kn_fop->f_attach(kn); 632 } else { 633 KERNEL_LOCK(); 634 error = kn->kn_fop->f_attach(kn); 635 KERNEL_UNLOCK(); 636 } 637 return (error); 638 } 639 640 static void 641 filter_detach(struct knote *kn) 642 { 643 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 644 kn->kn_fop->f_detach(kn); 645 } else { 646 KERNEL_LOCK(); 647 kn->kn_fop->f_detach(kn); 648 KERNEL_UNLOCK(); 649 } 650 } 651 652 static int 653 filter_event(struct knote *kn, long hint) 654 { 655 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 656 KERNEL_ASSERT_LOCKED(); 657 658 return (kn->kn_fop->f_event(kn, hint)); 659 } 660 661 static int 662 filter_modify(struct kevent *kev, struct knote *kn) 663 { 664 int active, s; 665 666 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 667 active = kn->kn_fop->f_modify(kev, kn); 668 } else { 669 KERNEL_LOCK(); 670 if (kn->kn_fop->f_modify != NULL) { 671 active = kn->kn_fop->f_modify(kev, kn); 672 } else { 673 /* Emulate f_modify using f_event. */ 674 s = splhigh(); 675 knote_modify(kev, kn); 676 active = kn->kn_fop->f_event(kn, 0); 677 splx(s); 678 } 679 KERNEL_UNLOCK(); 680 } 681 return (active); 682 } 683 684 static int 685 filter_process(struct knote *kn, struct kevent *kev) 686 { 687 int active, s; 688 689 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 690 active = kn->kn_fop->f_process(kn, kev); 691 } else { 692 KERNEL_LOCK(); 693 if (kn->kn_fop->f_process != NULL) { 694 active = kn->kn_fop->f_process(kn, kev); 695 } else { 696 /* Emulate f_process using f_event. */ 697 s = splhigh(); 698 /* 699 * If called from kqueue_scan(), skip f_event 700 * when EV_ONESHOT is set, to preserve old behaviour. 701 */ 702 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 703 active = 1; 704 else 705 active = kn->kn_fop->f_event(kn, 0); 706 if (active) 707 knote_submit(kn, kev); 708 splx(s); 709 } 710 KERNEL_UNLOCK(); 711 } 712 return (active); 713 } 714 715 void 716 kqpoll_init(void) 717 { 718 struct proc *p = curproc; 719 struct filedesc *fdp; 720 721 if (p->p_kq != NULL) { 722 /* 723 * Discard any badfd knotes that have been enqueued after 724 * previous scan. 725 * This prevents them from accumulating in case 726 * scan does not make progress for some reason. 727 */ 728 kqpoll_dequeue(p, 0); 729 return; 730 } 731 732 p->p_kq = kqueue_alloc(p->p_fd); 733 p->p_kq_serial = arc4random(); 734 fdp = p->p_fd; 735 fdplock(fdp); 736 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 737 fdpunlock(fdp); 738 } 739 740 void 741 kqpoll_exit(void) 742 { 743 struct proc *p = curproc; 744 745 if (p->p_kq == NULL) 746 return; 747 748 kqueue_purge(p, p->p_kq); 749 /* Clear any detached knotes that remain in the queue. */ 750 kqpoll_dequeue(p, 1); 751 kqueue_terminate(p, p->p_kq); 752 KASSERT(p->p_kq->kq_refs == 1); 753 KQRELE(p->p_kq); 754 p->p_kq = NULL; 755 } 756 757 void 758 kqpoll_dequeue(struct proc *p, int all) 759 { 760 struct knote marker; 761 struct knote *kn; 762 struct kqueue *kq = p->p_kq; 763 764 /* 765 * Bail out early without locking if the queue appears empty. 766 * 767 * This thread might not see the latest value of kq_count yet. 768 * However, if there is any sustained increase in the queue size, 769 * this thread will eventually observe that kq_count has become 770 * non-zero. 771 */ 772 if (all == 0 && kq->kq_count == 0) 773 return; 774 775 memset(&marker, 0, sizeof(marker)); 776 marker.kn_filter = EVFILT_MARKER; 777 marker.kn_status = KN_PROCESSING; 778 779 mtx_enter(&kq->kq_lock); 780 kn = TAILQ_FIRST(&kq->kq_head); 781 while (kn != NULL) { 782 /* This kqueue should not be scanned by other threads. */ 783 KASSERT(kn->kn_filter != EVFILT_MARKER); 784 785 if (all == 0 && (kn->kn_status & KN_ATTACHED)) { 786 kn = TAILQ_NEXT(kn, kn_tqe); 787 continue; 788 } 789 790 TAILQ_INSERT_BEFORE(kn, &marker, kn_tqe); 791 792 if (!knote_acquire(kn, NULL, 0)) { 793 /* knote_acquire() has released kq_lock. */ 794 } else { 795 kqueue_check(kq); 796 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 797 kn->kn_status &= ~KN_QUEUED; 798 kq->kq_count--; 799 mtx_leave(&kq->kq_lock); 800 801 filter_detach(kn); 802 knote_drop(kn, p); 803 } 804 805 mtx_enter(&kq->kq_lock); 806 kqueue_check(kq); 807 kn = TAILQ_NEXT(&marker, kn_tqe); 808 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 809 } 810 mtx_leave(&kq->kq_lock); 811 } 812 813 struct kqueue * 814 kqueue_alloc(struct filedesc *fdp) 815 { 816 struct kqueue *kq; 817 818 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 819 kq->kq_refs = 1; 820 kq->kq_fdp = fdp; 821 TAILQ_INIT(&kq->kq_head); 822 mtx_init(&kq->kq_lock, IPL_HIGH); 823 task_set(&kq->kq_task, kqueue_task, kq); 824 825 return (kq); 826 } 827 828 int 829 sys_kqueue(struct proc *p, void *v, register_t *retval) 830 { 831 struct filedesc *fdp = p->p_fd; 832 struct kqueue *kq; 833 struct file *fp; 834 int fd, error; 835 836 kq = kqueue_alloc(fdp); 837 838 fdplock(fdp); 839 error = falloc(p, &fp, &fd); 840 if (error) 841 goto out; 842 fp->f_flag = FREAD | FWRITE; 843 fp->f_type = DTYPE_KQUEUE; 844 fp->f_ops = &kqueueops; 845 fp->f_data = kq; 846 *retval = fd; 847 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 848 kq = NULL; 849 fdinsert(fdp, fd, 0, fp); 850 FRELE(fp, p); 851 out: 852 fdpunlock(fdp); 853 if (kq != NULL) 854 pool_put(&kqueue_pool, kq); 855 return (error); 856 } 857 858 int 859 sys_kevent(struct proc *p, void *v, register_t *retval) 860 { 861 struct kqueue_scan_state scan; 862 struct filedesc* fdp = p->p_fd; 863 struct sys_kevent_args /* { 864 syscallarg(int) fd; 865 syscallarg(const struct kevent *) changelist; 866 syscallarg(int) nchanges; 867 syscallarg(struct kevent *) eventlist; 868 syscallarg(int) nevents; 869 syscallarg(const struct timespec *) timeout; 870 } */ *uap = v; 871 struct kevent *kevp; 872 struct kqueue *kq; 873 struct file *fp; 874 struct timespec ts; 875 struct timespec *tsp = NULL; 876 int i, n, nerrors, error; 877 int ready, total; 878 struct kevent kev[KQ_NEVENTS]; 879 880 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 881 return (EBADF); 882 883 if (fp->f_type != DTYPE_KQUEUE) { 884 error = EBADF; 885 goto done; 886 } 887 888 if (SCARG(uap, timeout) != NULL) { 889 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 890 if (error) 891 goto done; 892 #ifdef KTRACE 893 if (KTRPOINT(p, KTR_STRUCT)) 894 ktrreltimespec(p, &ts); 895 #endif 896 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 897 error = EINVAL; 898 goto done; 899 } 900 tsp = &ts; 901 } 902 903 kq = fp->f_data; 904 nerrors = 0; 905 906 while ((n = SCARG(uap, nchanges)) > 0) { 907 if (n > nitems(kev)) 908 n = nitems(kev); 909 error = copyin(SCARG(uap, changelist), kev, 910 n * sizeof(struct kevent)); 911 if (error) 912 goto done; 913 #ifdef KTRACE 914 if (KTRPOINT(p, KTR_STRUCT)) 915 ktrevent(p, kev, n); 916 #endif 917 for (i = 0; i < n; i++) { 918 kevp = &kev[i]; 919 kevp->flags &= ~EV_SYSFLAGS; 920 error = kqueue_register(kq, kevp, p); 921 if (error || (kevp->flags & EV_RECEIPT)) { 922 if (SCARG(uap, nevents) != 0) { 923 kevp->flags = EV_ERROR; 924 kevp->data = error; 925 copyout(kevp, SCARG(uap, eventlist), 926 sizeof(*kevp)); 927 SCARG(uap, eventlist)++; 928 SCARG(uap, nevents)--; 929 nerrors++; 930 } else { 931 goto done; 932 } 933 } 934 } 935 SCARG(uap, nchanges) -= n; 936 SCARG(uap, changelist) += n; 937 } 938 if (nerrors) { 939 *retval = nerrors; 940 error = 0; 941 goto done; 942 } 943 944 kqueue_scan_setup(&scan, kq); 945 FRELE(fp, p); 946 /* 947 * Collect as many events as we can. The timeout on successive 948 * loops is disabled (kqueue_scan() becomes non-blocking). 949 */ 950 total = 0; 951 error = 0; 952 while ((n = SCARG(uap, nevents) - total) > 0) { 953 if (n > nitems(kev)) 954 n = nitems(kev); 955 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 956 if (ready == 0) 957 break; 958 error = copyout(kev, SCARG(uap, eventlist) + total, 959 sizeof(struct kevent) * ready); 960 #ifdef KTRACE 961 if (KTRPOINT(p, KTR_STRUCT)) 962 ktrevent(p, kev, ready); 963 #endif 964 total += ready; 965 if (error || ready < n) 966 break; 967 } 968 kqueue_scan_finish(&scan); 969 *retval = total; 970 return (error); 971 972 done: 973 FRELE(fp, p); 974 return (error); 975 } 976 977 #ifdef KQUEUE_DEBUG 978 void 979 kqueue_do_check(struct kqueue *kq, const char *func, int line) 980 { 981 struct knote *kn; 982 int count = 0, nmarker = 0; 983 984 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 985 986 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 987 if (kn->kn_filter == EVFILT_MARKER) { 988 if ((kn->kn_status & KN_QUEUED) != 0) 989 panic("%s:%d: kq=%p kn=%p marker QUEUED", 990 func, line, kq, kn); 991 nmarker++; 992 } else { 993 if ((kn->kn_status & KN_ACTIVE) == 0) 994 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 995 func, line, kq, kn); 996 if ((kn->kn_status & KN_QUEUED) == 0) 997 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 998 func, line, kq, kn); 999 if (kn->kn_kq != kq) 1000 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1001 func, line, kq, kn, kn->kn_kq); 1002 count++; 1003 if (count > kq->kq_count) 1004 goto bad; 1005 } 1006 } 1007 if (count != kq->kq_count) { 1008 bad: 1009 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1010 func, line, kq, kq->kq_count, count, nmarker); 1011 } 1012 } 1013 #endif 1014 1015 int 1016 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 1017 { 1018 struct filedesc *fdp = kq->kq_fdp; 1019 const struct filterops *fops = NULL; 1020 struct file *fp = NULL; 1021 struct knote *kn = NULL, *newkn = NULL; 1022 struct knlist *list = NULL; 1023 int active, error = 0; 1024 1025 if (kev->filter < 0) { 1026 if (kev->filter + EVFILT_SYSCOUNT < 0) 1027 return (EINVAL); 1028 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1029 } 1030 1031 if (fops == NULL) { 1032 /* 1033 * XXX 1034 * filter attach routine is responsible for ensuring that 1035 * the identifier can be attached to it. 1036 */ 1037 return (EINVAL); 1038 } 1039 1040 if (fops->f_flags & FILTEROP_ISFD) { 1041 /* validate descriptor */ 1042 if (kev->ident > INT_MAX) 1043 return (EBADF); 1044 } 1045 1046 if (kev->flags & EV_ADD) 1047 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1048 1049 again: 1050 if (fops->f_flags & FILTEROP_ISFD) { 1051 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1052 error = EBADF; 1053 goto done; 1054 } 1055 mtx_enter(&kq->kq_lock); 1056 if (kev->flags & EV_ADD) 1057 kqueue_expand_list(kq, kev->ident); 1058 if (kev->ident < kq->kq_knlistsize) 1059 list = &kq->kq_knlist[kev->ident]; 1060 } else { 1061 mtx_enter(&kq->kq_lock); 1062 if (kev->flags & EV_ADD) 1063 kqueue_expand_hash(kq); 1064 if (kq->kq_knhashmask != 0) { 1065 list = &kq->kq_knhash[ 1066 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1067 } 1068 } 1069 if (list != NULL) { 1070 SLIST_FOREACH(kn, list, kn_link) { 1071 if (kev->filter == kn->kn_filter && 1072 kev->ident == kn->kn_id) { 1073 if (!knote_acquire(kn, NULL, 0)) { 1074 /* knote_acquire() has released 1075 * kq_lock. */ 1076 if (fp != NULL) { 1077 FRELE(fp, p); 1078 fp = NULL; 1079 } 1080 goto again; 1081 } 1082 break; 1083 } 1084 } 1085 } 1086 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1087 1088 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1089 mtx_leave(&kq->kq_lock); 1090 error = ENOENT; 1091 goto done; 1092 } 1093 1094 /* 1095 * kn now contains the matching knote, or NULL if no match. 1096 */ 1097 if (kev->flags & EV_ADD) { 1098 if (kn == NULL) { 1099 kn = newkn; 1100 newkn = NULL; 1101 kn->kn_status = KN_PROCESSING; 1102 kn->kn_fp = fp; 1103 kn->kn_kq = kq; 1104 kn->kn_fop = fops; 1105 1106 /* 1107 * apply reference count to knote structure, and 1108 * do not release it at the end of this routine. 1109 */ 1110 fp = NULL; 1111 1112 kn->kn_sfflags = kev->fflags; 1113 kn->kn_sdata = kev->data; 1114 kev->fflags = 0; 1115 kev->data = 0; 1116 kn->kn_kevent = *kev; 1117 1118 knote_attach(kn); 1119 mtx_leave(&kq->kq_lock); 1120 1121 error = filter_attach(kn); 1122 if (error != 0) { 1123 knote_drop(kn, p); 1124 goto done; 1125 } 1126 1127 /* 1128 * If this is a file descriptor filter, check if 1129 * fd was closed while the knote was being added. 1130 * knote_fdclose() has missed kn if the function 1131 * ran before kn appeared in kq_knlist. 1132 */ 1133 if ((fops->f_flags & FILTEROP_ISFD) && 1134 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1135 /* 1136 * Drop the knote silently without error 1137 * because another thread might already have 1138 * seen it. This corresponds to the insert 1139 * happening in full before the close. 1140 */ 1141 filter_detach(kn); 1142 knote_drop(kn, p); 1143 goto done; 1144 } 1145 1146 /* Check if there is a pending event. */ 1147 active = filter_process(kn, NULL); 1148 mtx_enter(&kq->kq_lock); 1149 if (active) 1150 knote_activate(kn); 1151 } else { 1152 /* 1153 * The user may change some filter values after the 1154 * initial EV_ADD, but doing so will not reset any 1155 * filters which have already been triggered. 1156 */ 1157 mtx_leave(&kq->kq_lock); 1158 active = filter_modify(kev, kn); 1159 mtx_enter(&kq->kq_lock); 1160 if (active) 1161 knote_activate(kn); 1162 if (kev->flags & EV_ERROR) { 1163 error = kev->data; 1164 goto release; 1165 } 1166 } 1167 } else if (kev->flags & EV_DELETE) { 1168 mtx_leave(&kq->kq_lock); 1169 filter_detach(kn); 1170 knote_drop(kn, p); 1171 goto done; 1172 } 1173 1174 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1175 kn->kn_status |= KN_DISABLED; 1176 1177 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1178 kn->kn_status &= ~KN_DISABLED; 1179 mtx_leave(&kq->kq_lock); 1180 /* Check if there is a pending event. */ 1181 active = filter_process(kn, NULL); 1182 mtx_enter(&kq->kq_lock); 1183 if (active) 1184 knote_activate(kn); 1185 } 1186 1187 release: 1188 knote_release(kn); 1189 mtx_leave(&kq->kq_lock); 1190 done: 1191 if (fp != NULL) 1192 FRELE(fp, p); 1193 if (newkn != NULL) 1194 pool_put(&knote_pool, newkn); 1195 return (error); 1196 } 1197 1198 int 1199 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1200 { 1201 struct timespec elapsed, start, stop; 1202 uint64_t nsecs; 1203 int error; 1204 1205 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1206 1207 if (tsp != NULL) { 1208 getnanouptime(&start); 1209 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1210 } else 1211 nsecs = INFSLP; 1212 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1213 "kqread", nsecs); 1214 if (tsp != NULL) { 1215 getnanouptime(&stop); 1216 timespecsub(&stop, &start, &elapsed); 1217 timespecsub(tsp, &elapsed, tsp); 1218 if (tsp->tv_sec < 0) 1219 timespecclear(tsp); 1220 } 1221 1222 return (error); 1223 } 1224 1225 /* 1226 * Scan the kqueue, blocking if necessary until the target time is reached. 1227 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1228 * 0 we do not block at all. 1229 */ 1230 int 1231 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1232 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1233 { 1234 struct kqueue *kq = scan->kqs_kq; 1235 struct knote *kn; 1236 int error = 0, nkev = 0; 1237 1238 if (maxevents == 0) 1239 goto done; 1240 retry: 1241 KASSERT(nkev == 0); 1242 1243 error = 0; 1244 1245 /* msleep() with PCATCH requires kernel lock. */ 1246 KERNEL_LOCK(); 1247 1248 mtx_enter(&kq->kq_lock); 1249 1250 if (kq->kq_state & KQ_DYING) { 1251 mtx_leave(&kq->kq_lock); 1252 KERNEL_UNLOCK(); 1253 error = EBADF; 1254 goto done; 1255 } 1256 1257 if (kq->kq_count == 0) { 1258 /* 1259 * Successive loops are only necessary if there are more 1260 * ready events to gather, so they don't need to block. 1261 */ 1262 if ((tsp != NULL && !timespecisset(tsp)) || 1263 scan->kqs_nevent != 0) { 1264 mtx_leave(&kq->kq_lock); 1265 KERNEL_UNLOCK(); 1266 error = 0; 1267 goto done; 1268 } 1269 kq->kq_state |= KQ_SLEEP; 1270 error = kqueue_sleep(kq, tsp); 1271 /* kqueue_sleep() has released kq_lock. */ 1272 KERNEL_UNLOCK(); 1273 if (error == 0 || error == EWOULDBLOCK) 1274 goto retry; 1275 /* don't restart after signals... */ 1276 if (error == ERESTART) 1277 error = EINTR; 1278 goto done; 1279 } 1280 1281 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1282 KERNEL_UNLOCK(); 1283 1284 /* 1285 * Put the end marker in the queue to limit the scan to the events 1286 * that are currently active. This prevents events from being 1287 * recollected if they reactivate during scan. 1288 * 1289 * If a partial scan has been performed already but no events have 1290 * been collected, reposition the end marker to make any new events 1291 * reachable. 1292 */ 1293 if (!scan->kqs_queued) { 1294 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1295 scan->kqs_queued = 1; 1296 } else if (scan->kqs_nevent == 0) { 1297 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1298 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1299 } 1300 1301 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1302 while (nkev < maxevents) { 1303 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1304 if (kn->kn_filter == EVFILT_MARKER) { 1305 if (kn == &scan->kqs_end) 1306 break; 1307 1308 /* Move start marker past another thread's marker. */ 1309 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1310 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1311 kn_tqe); 1312 continue; 1313 } 1314 1315 if (!knote_acquire(kn, NULL, 0)) { 1316 /* knote_acquire() has released kq_lock. */ 1317 mtx_enter(&kq->kq_lock); 1318 continue; 1319 } 1320 1321 kqueue_check(kq); 1322 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1323 kn->kn_status &= ~KN_QUEUED; 1324 kq->kq_count--; 1325 kqueue_check(kq); 1326 1327 if (kn->kn_status & KN_DISABLED) { 1328 knote_release(kn); 1329 continue; 1330 } 1331 1332 mtx_leave(&kq->kq_lock); 1333 1334 memset(kevp, 0, sizeof(*kevp)); 1335 if (filter_process(kn, kevp) == 0) { 1336 mtx_enter(&kq->kq_lock); 1337 if ((kn->kn_status & KN_QUEUED) == 0) 1338 kn->kn_status &= ~KN_ACTIVE; 1339 knote_release(kn); 1340 kqueue_check(kq); 1341 continue; 1342 } 1343 1344 /* 1345 * Post-event action on the note 1346 */ 1347 if (kevp->flags & EV_ONESHOT) { 1348 filter_detach(kn); 1349 knote_drop(kn, p); 1350 mtx_enter(&kq->kq_lock); 1351 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1352 mtx_enter(&kq->kq_lock); 1353 if (kevp->flags & EV_DISPATCH) 1354 kn->kn_status |= KN_DISABLED; 1355 if ((kn->kn_status & KN_QUEUED) == 0) 1356 kn->kn_status &= ~KN_ACTIVE; 1357 KASSERT(kn->kn_status & KN_ATTACHED); 1358 knote_release(kn); 1359 } else { 1360 mtx_enter(&kq->kq_lock); 1361 if ((kn->kn_status & KN_QUEUED) == 0) { 1362 kqueue_check(kq); 1363 kq->kq_count++; 1364 kn->kn_status |= KN_QUEUED; 1365 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1366 } 1367 KASSERT(kn->kn_status & KN_ATTACHED); 1368 knote_release(kn); 1369 } 1370 kqueue_check(kq); 1371 1372 kevp++; 1373 nkev++; 1374 scan->kqs_nevent++; 1375 } 1376 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1377 mtx_leave(&kq->kq_lock); 1378 if (scan->kqs_nevent == 0) 1379 goto retry; 1380 done: 1381 *errorp = error; 1382 return (nkev); 1383 } 1384 1385 void 1386 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1387 { 1388 memset(scan, 0, sizeof(*scan)); 1389 1390 KQREF(kq); 1391 scan->kqs_kq = kq; 1392 scan->kqs_start.kn_filter = EVFILT_MARKER; 1393 scan->kqs_start.kn_status = KN_PROCESSING; 1394 scan->kqs_end.kn_filter = EVFILT_MARKER; 1395 scan->kqs_end.kn_status = KN_PROCESSING; 1396 } 1397 1398 void 1399 kqueue_scan_finish(struct kqueue_scan_state *scan) 1400 { 1401 struct kqueue *kq = scan->kqs_kq; 1402 1403 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1404 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1405 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1406 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1407 1408 if (scan->kqs_queued) { 1409 scan->kqs_queued = 0; 1410 mtx_enter(&kq->kq_lock); 1411 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1412 mtx_leave(&kq->kq_lock); 1413 } 1414 KQRELE(kq); 1415 } 1416 1417 /* 1418 * XXX 1419 * This could be expanded to call kqueue_scan, if desired. 1420 */ 1421 int 1422 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1423 { 1424 return (ENXIO); 1425 } 1426 1427 int 1428 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1429 { 1430 return (ENXIO); 1431 } 1432 1433 int 1434 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1435 { 1436 return (ENOTTY); 1437 } 1438 1439 int 1440 kqueue_poll(struct file *fp, int events, struct proc *p) 1441 { 1442 struct kqueue *kq = (struct kqueue *)fp->f_data; 1443 int revents = 0; 1444 1445 if (events & (POLLIN | POLLRDNORM)) { 1446 mtx_enter(&kq->kq_lock); 1447 if (kq->kq_count) { 1448 revents |= events & (POLLIN | POLLRDNORM); 1449 } else { 1450 selrecord(p, &kq->kq_sel); 1451 kq->kq_state |= KQ_SEL; 1452 } 1453 mtx_leave(&kq->kq_lock); 1454 } 1455 return (revents); 1456 } 1457 1458 int 1459 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1460 { 1461 struct kqueue *kq = fp->f_data; 1462 1463 memset(st, 0, sizeof(*st)); 1464 st->st_size = kq->kq_count; /* unlocked read */ 1465 st->st_blksize = sizeof(struct kevent); 1466 st->st_mode = S_IFIFO; 1467 return (0); 1468 } 1469 1470 void 1471 kqueue_purge(struct proc *p, struct kqueue *kq) 1472 { 1473 int i; 1474 1475 mtx_enter(&kq->kq_lock); 1476 for (i = 0; i < kq->kq_knlistsize; i++) 1477 knote_remove(p, kq, &kq->kq_knlist[i], 1); 1478 if (kq->kq_knhashmask != 0) { 1479 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1480 knote_remove(p, kq, &kq->kq_knhash[i], 1); 1481 } 1482 mtx_leave(&kq->kq_lock); 1483 } 1484 1485 void 1486 kqueue_terminate(struct proc *p, struct kqueue *kq) 1487 { 1488 struct knote *kn; 1489 1490 mtx_enter(&kq->kq_lock); 1491 1492 /* 1493 * Any remaining entries should be scan markers. 1494 * They are removed when the ongoing scans finish. 1495 */ 1496 KASSERT(kq->kq_count == 0); 1497 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1498 KASSERT(kn->kn_filter == EVFILT_MARKER); 1499 1500 kq->kq_state |= KQ_DYING; 1501 kqueue_wakeup(kq); 1502 mtx_leave(&kq->kq_lock); 1503 1504 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1505 task_del(systq, &kq->kq_task); 1506 1507 } 1508 1509 int 1510 kqueue_close(struct file *fp, struct proc *p) 1511 { 1512 struct kqueue *kq = fp->f_data; 1513 1514 fp->f_data = NULL; 1515 1516 kqueue_purge(p, kq); 1517 kqueue_terminate(p, kq); 1518 1519 KQRELE(kq); 1520 1521 return (0); 1522 } 1523 1524 static void 1525 kqueue_task(void *arg) 1526 { 1527 struct kqueue *kq = arg; 1528 1529 /* Kernel lock is needed inside selwakeup(). */ 1530 KERNEL_ASSERT_LOCKED(); 1531 1532 mtx_enter(&kq->kq_lock); 1533 if (kq->kq_state & KQ_SEL) { 1534 kq->kq_state &= ~KQ_SEL; 1535 mtx_leave(&kq->kq_lock); 1536 selwakeup(&kq->kq_sel); 1537 } else { 1538 mtx_leave(&kq->kq_lock); 1539 KNOTE(&kq->kq_sel.si_note, 0); 1540 } 1541 KQRELE(kq); 1542 } 1543 1544 void 1545 kqueue_wakeup(struct kqueue *kq) 1546 { 1547 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1548 1549 if (kq->kq_state & KQ_SLEEP) { 1550 kq->kq_state &= ~KQ_SLEEP; 1551 wakeup(kq); 1552 } 1553 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1554 /* Defer activation to avoid recursion. */ 1555 KQREF(kq); 1556 if (!task_add(systq, &kq->kq_task)) 1557 KQRELE(kq); 1558 } 1559 } 1560 1561 static void 1562 kqueue_expand_hash(struct kqueue *kq) 1563 { 1564 struct knlist *hash; 1565 u_long hashmask; 1566 1567 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1568 1569 if (kq->kq_knhashmask == 0) { 1570 mtx_leave(&kq->kq_lock); 1571 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1572 mtx_enter(&kq->kq_lock); 1573 if (kq->kq_knhashmask == 0) { 1574 kq->kq_knhash = hash; 1575 kq->kq_knhashmask = hashmask; 1576 } else { 1577 /* Another thread has allocated the hash. */ 1578 mtx_leave(&kq->kq_lock); 1579 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1580 mtx_enter(&kq->kq_lock); 1581 } 1582 } 1583 } 1584 1585 static void 1586 kqueue_expand_list(struct kqueue *kq, int fd) 1587 { 1588 struct knlist *list, *olist; 1589 int size, osize; 1590 1591 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1592 1593 if (kq->kq_knlistsize <= fd) { 1594 size = kq->kq_knlistsize; 1595 mtx_leave(&kq->kq_lock); 1596 while (size <= fd) 1597 size += KQEXTENT; 1598 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1599 mtx_enter(&kq->kq_lock); 1600 if (kq->kq_knlistsize <= fd) { 1601 memcpy(list, kq->kq_knlist, 1602 kq->kq_knlistsize * sizeof(*list)); 1603 memset(&list[kq->kq_knlistsize], 0, 1604 (size - kq->kq_knlistsize) * sizeof(*list)); 1605 olist = kq->kq_knlist; 1606 osize = kq->kq_knlistsize; 1607 kq->kq_knlist = list; 1608 kq->kq_knlistsize = size; 1609 mtx_leave(&kq->kq_lock); 1610 free(olist, M_KEVENT, osize * sizeof(*list)); 1611 mtx_enter(&kq->kq_lock); 1612 } else { 1613 /* Another thread has expanded the list. */ 1614 mtx_leave(&kq->kq_lock); 1615 free(list, M_KEVENT, size * sizeof(*list)); 1616 mtx_enter(&kq->kq_lock); 1617 } 1618 } 1619 } 1620 1621 /* 1622 * Acquire a knote, return non-zero on success, 0 on failure. 1623 * 1624 * If we cannot acquire the knote we sleep and return 0. The knote 1625 * may be stale on return in this case and the caller must restart 1626 * whatever loop they are in. 1627 * 1628 * If we are about to sleep and klist is non-NULL, the list is unlocked 1629 * before sleep and remains unlocked on return. 1630 */ 1631 int 1632 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1633 { 1634 struct kqueue *kq = kn->kn_kq; 1635 1636 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1637 KASSERT(kn->kn_filter != EVFILT_MARKER); 1638 1639 if (kn->kn_status & KN_PROCESSING) { 1640 kn->kn_status |= KN_WAITING; 1641 if (klist != NULL) { 1642 mtx_leave(&kq->kq_lock); 1643 klist_unlock(klist, ls); 1644 /* XXX Timeout resolves potential loss of wakeup. */ 1645 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1646 } else { 1647 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1648 SEC_TO_NSEC(1)); 1649 } 1650 /* knote may be stale now */ 1651 return (0); 1652 } 1653 kn->kn_status |= KN_PROCESSING; 1654 return (1); 1655 } 1656 1657 /* 1658 * Release an acquired knote, clearing KN_PROCESSING. 1659 */ 1660 void 1661 knote_release(struct knote *kn) 1662 { 1663 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1664 KASSERT(kn->kn_filter != EVFILT_MARKER); 1665 KASSERT(kn->kn_status & KN_PROCESSING); 1666 1667 if (kn->kn_status & KN_WAITING) { 1668 kn->kn_status &= ~KN_WAITING; 1669 wakeup(kn); 1670 } 1671 kn->kn_status &= ~KN_PROCESSING; 1672 /* kn should not be accessed anymore */ 1673 } 1674 1675 /* 1676 * activate one knote. 1677 */ 1678 void 1679 knote_activate(struct knote *kn) 1680 { 1681 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1682 1683 kn->kn_status |= KN_ACTIVE; 1684 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1685 knote_enqueue(kn); 1686 } 1687 1688 /* 1689 * walk down a list of knotes, activating them if their event has triggered. 1690 */ 1691 void 1692 knote(struct klist *list, long hint) 1693 { 1694 struct knote *kn, *kn0; 1695 struct kqueue *kq; 1696 1697 KLIST_ASSERT_LOCKED(list); 1698 1699 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1700 if (filter_event(kn, hint)) { 1701 kq = kn->kn_kq; 1702 mtx_enter(&kq->kq_lock); 1703 knote_activate(kn); 1704 mtx_leave(&kq->kq_lock); 1705 } 1706 } 1707 } 1708 1709 /* 1710 * remove all knotes from a specified knlist 1711 */ 1712 void 1713 knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) 1714 { 1715 struct knote *kn; 1716 1717 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1718 1719 while ((kn = SLIST_FIRST(list)) != NULL) { 1720 KASSERT(kn->kn_kq == kq); 1721 if (!knote_acquire(kn, NULL, 0)) { 1722 /* knote_acquire() has released kq_lock. */ 1723 mtx_enter(&kq->kq_lock); 1724 continue; 1725 } 1726 mtx_leave(&kq->kq_lock); 1727 filter_detach(kn); 1728 1729 /* 1730 * Notify poll(2) and select(2) when a monitored 1731 * file descriptor is closed. 1732 * 1733 * This reuses the original knote for delivering the 1734 * notification so as to avoid allocating memory. 1735 * The knote will be reachable only through the queue 1736 * of active knotes and is freed either by kqueue_scan() 1737 * or kqpoll_dequeue(). 1738 */ 1739 if (!purge && (kn->kn_flags & __EV_POLL) != 0) { 1740 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1741 mtx_enter(&kq->kq_lock); 1742 knote_detach(kn); 1743 mtx_leave(&kq->kq_lock); 1744 FRELE(kn->kn_fp, p); 1745 kn->kn_fp = NULL; 1746 1747 kn->kn_fop = &badfd_filtops; 1748 filter_event(kn, 0); 1749 mtx_enter(&kq->kq_lock); 1750 knote_activate(kn); 1751 knote_release(kn); 1752 continue; 1753 } 1754 1755 knote_drop(kn, p); 1756 mtx_enter(&kq->kq_lock); 1757 } 1758 } 1759 1760 /* 1761 * remove all knotes referencing a specified fd 1762 */ 1763 void 1764 knote_fdclose(struct proc *p, int fd) 1765 { 1766 struct filedesc *fdp = p->p_p->ps_fd; 1767 struct kqueue *kq; 1768 1769 /* 1770 * fdplock can be ignored if the file descriptor table is being freed 1771 * because no other thread can access the fdp. 1772 */ 1773 if (fdp->fd_refcnt != 0) 1774 fdpassertlocked(fdp); 1775 1776 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1777 mtx_enter(&kq->kq_lock); 1778 if (fd < kq->kq_knlistsize) 1779 knote_remove(p, kq, &kq->kq_knlist[fd], 0); 1780 mtx_leave(&kq->kq_lock); 1781 } 1782 } 1783 1784 /* 1785 * handle a process exiting, including the triggering of NOTE_EXIT notes 1786 * XXX this could be more efficient, doing a single pass down the klist 1787 */ 1788 void 1789 knote_processexit(struct proc *p) 1790 { 1791 struct process *pr = p->p_p; 1792 1793 KERNEL_ASSERT_LOCKED(); 1794 KASSERT(p == curproc); 1795 1796 KNOTE(&pr->ps_klist, NOTE_EXIT); 1797 1798 /* remove other knotes hanging off the process */ 1799 klist_invalidate(&pr->ps_klist); 1800 } 1801 1802 void 1803 knote_attach(struct knote *kn) 1804 { 1805 struct kqueue *kq = kn->kn_kq; 1806 struct knlist *list; 1807 1808 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1809 KASSERT(kn->kn_status & KN_PROCESSING); 1810 KASSERT((kn->kn_status & KN_ATTACHED) == 0); 1811 1812 kn->kn_status |= KN_ATTACHED; 1813 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1814 KASSERT(kq->kq_knlistsize > kn->kn_id); 1815 list = &kq->kq_knlist[kn->kn_id]; 1816 } else { 1817 KASSERT(kq->kq_knhashmask != 0); 1818 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1819 } 1820 SLIST_INSERT_HEAD(list, kn, kn_link); 1821 } 1822 1823 void 1824 knote_detach(struct knote *kn) 1825 { 1826 struct kqueue *kq = kn->kn_kq; 1827 struct knlist *list; 1828 1829 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1830 KASSERT(kn->kn_status & KN_PROCESSING); 1831 1832 if ((kn->kn_status & KN_ATTACHED) == 0) 1833 return; 1834 1835 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1836 list = &kq->kq_knlist[kn->kn_id]; 1837 else 1838 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1839 SLIST_REMOVE(list, kn, knote, kn_link); 1840 kn->kn_status &= ~KN_ATTACHED; 1841 } 1842 1843 /* 1844 * should be called at spl == 0, since we don't want to hold spl 1845 * while calling FRELE and pool_put. 1846 */ 1847 void 1848 knote_drop(struct knote *kn, struct proc *p) 1849 { 1850 struct kqueue *kq = kn->kn_kq; 1851 1852 KASSERT(kn->kn_filter != EVFILT_MARKER); 1853 1854 mtx_enter(&kq->kq_lock); 1855 knote_detach(kn); 1856 if (kn->kn_status & KN_QUEUED) 1857 knote_dequeue(kn); 1858 if (kn->kn_status & KN_WAITING) { 1859 kn->kn_status &= ~KN_WAITING; 1860 wakeup(kn); 1861 } 1862 mtx_leave(&kq->kq_lock); 1863 1864 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1865 FRELE(kn->kn_fp, p); 1866 pool_put(&knote_pool, kn); 1867 } 1868 1869 1870 void 1871 knote_enqueue(struct knote *kn) 1872 { 1873 struct kqueue *kq = kn->kn_kq; 1874 1875 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1876 KASSERT(kn->kn_filter != EVFILT_MARKER); 1877 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1878 1879 kqueue_check(kq); 1880 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1881 kn->kn_status |= KN_QUEUED; 1882 kq->kq_count++; 1883 kqueue_check(kq); 1884 kqueue_wakeup(kq); 1885 } 1886 1887 void 1888 knote_dequeue(struct knote *kn) 1889 { 1890 struct kqueue *kq = kn->kn_kq; 1891 1892 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1893 KASSERT(kn->kn_filter != EVFILT_MARKER); 1894 KASSERT(kn->kn_status & KN_QUEUED); 1895 1896 kqueue_check(kq); 1897 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1898 kn->kn_status &= ~KN_QUEUED; 1899 kq->kq_count--; 1900 kqueue_check(kq); 1901 } 1902 1903 /* 1904 * Modify the knote's parameters. 1905 * 1906 * The knote's object lock must be held. 1907 */ 1908 void 1909 knote_modify(const struct kevent *kev, struct knote *kn) 1910 { 1911 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1912 KERNEL_ASSERT_LOCKED(); 1913 1914 kn->kn_sfflags = kev->fflags; 1915 kn->kn_sdata = kev->data; 1916 kn->kn_udata = kev->udata; 1917 } 1918 1919 /* 1920 * Submit the knote's event for delivery. 1921 * 1922 * The knote's object lock must be held. 1923 */ 1924 void 1925 knote_submit(struct knote *kn, struct kevent *kev) 1926 { 1927 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1928 KERNEL_ASSERT_LOCKED(); 1929 1930 if (kev != NULL) { 1931 *kev = kn->kn_kevent; 1932 if (kn->kn_flags & EV_CLEAR) { 1933 kn->kn_fflags = 0; 1934 kn->kn_data = 0; 1935 } 1936 } 1937 } 1938 1939 void 1940 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 1941 { 1942 SLIST_INIT(&klist->kl_list); 1943 klist->kl_ops = ops; 1944 klist->kl_arg = arg; 1945 } 1946 1947 void 1948 klist_free(struct klist *klist) 1949 { 1950 KASSERT(SLIST_EMPTY(&klist->kl_list)); 1951 } 1952 1953 void 1954 klist_insert(struct klist *klist, struct knote *kn) 1955 { 1956 int ls; 1957 1958 ls = klist_lock(klist); 1959 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1960 klist_unlock(klist, ls); 1961 } 1962 1963 void 1964 klist_insert_locked(struct klist *klist, struct knote *kn) 1965 { 1966 KLIST_ASSERT_LOCKED(klist); 1967 1968 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1969 } 1970 1971 void 1972 klist_remove(struct klist *klist, struct knote *kn) 1973 { 1974 int ls; 1975 1976 ls = klist_lock(klist); 1977 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1978 klist_unlock(klist, ls); 1979 } 1980 1981 void 1982 klist_remove_locked(struct klist *klist, struct knote *kn) 1983 { 1984 KLIST_ASSERT_LOCKED(klist); 1985 1986 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 1987 } 1988 1989 int 1990 klist_empty(struct klist *klist) 1991 { 1992 return (SLIST_EMPTY(&klist->kl_list)); 1993 } 1994 1995 /* 1996 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 1997 * 1998 * The caller of this function must not hold any locks that can block 1999 * filterops callbacks that run with KN_PROCESSING. 2000 * Otherwise this function might deadlock. 2001 */ 2002 void 2003 klist_invalidate(struct klist *list) 2004 { 2005 struct knote *kn; 2006 struct kqueue *kq; 2007 struct proc *p = curproc; 2008 int ls; 2009 2010 NET_ASSERT_UNLOCKED(); 2011 2012 ls = klist_lock(list); 2013 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2014 kq = kn->kn_kq; 2015 mtx_enter(&kq->kq_lock); 2016 if (!knote_acquire(kn, list, ls)) { 2017 /* knote_acquire() has released kq_lock 2018 * and klist lock. */ 2019 ls = klist_lock(list); 2020 continue; 2021 } 2022 mtx_leave(&kq->kq_lock); 2023 klist_unlock(list, ls); 2024 filter_detach(kn); 2025 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2026 kn->kn_fop = &dead_filtops; 2027 filter_event(kn, 0); 2028 mtx_enter(&kq->kq_lock); 2029 knote_activate(kn); 2030 knote_release(kn); 2031 mtx_leave(&kq->kq_lock); 2032 } else { 2033 knote_drop(kn, p); 2034 } 2035 ls = klist_lock(list); 2036 } 2037 klist_unlock(list, ls); 2038 } 2039 2040 static int 2041 klist_lock(struct klist *list) 2042 { 2043 int ls = 0; 2044 2045 if (list->kl_ops != NULL) { 2046 ls = list->kl_ops->klo_lock(list->kl_arg); 2047 } else { 2048 KERNEL_LOCK(); 2049 ls = splhigh(); 2050 } 2051 return ls; 2052 } 2053 2054 static void 2055 klist_unlock(struct klist *list, int ls) 2056 { 2057 if (list->kl_ops != NULL) { 2058 list->kl_ops->klo_unlock(list->kl_arg, ls); 2059 } else { 2060 splx(ls); 2061 KERNEL_UNLOCK(); 2062 } 2063 } 2064 2065 static void 2066 klist_mutex_assertlk(void *arg) 2067 { 2068 struct mutex *mtx = arg; 2069 2070 (void)mtx; 2071 2072 MUTEX_ASSERT_LOCKED(mtx); 2073 } 2074 2075 static int 2076 klist_mutex_lock(void *arg) 2077 { 2078 struct mutex *mtx = arg; 2079 2080 mtx_enter(mtx); 2081 return 0; 2082 } 2083 2084 static void 2085 klist_mutex_unlock(void *arg, int s) 2086 { 2087 struct mutex *mtx = arg; 2088 2089 mtx_leave(mtx); 2090 } 2091 2092 static const struct klistops mutex_klistops = { 2093 .klo_assertlk = klist_mutex_assertlk, 2094 .klo_lock = klist_mutex_lock, 2095 .klo_unlock = klist_mutex_unlock, 2096 }; 2097 2098 void 2099 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2100 { 2101 klist_init(klist, &mutex_klistops, mtx); 2102 } 2103 2104 static void 2105 klist_rwlock_assertlk(void *arg) 2106 { 2107 struct rwlock *rwl = arg; 2108 2109 (void)rwl; 2110 2111 rw_assert_wrlock(rwl); 2112 } 2113 2114 static int 2115 klist_rwlock_lock(void *arg) 2116 { 2117 struct rwlock *rwl = arg; 2118 2119 rw_enter_write(rwl); 2120 return 0; 2121 } 2122 2123 static void 2124 klist_rwlock_unlock(void *arg, int s) 2125 { 2126 struct rwlock *rwl = arg; 2127 2128 rw_exit_write(rwl); 2129 } 2130 2131 static const struct klistops rwlock_klistops = { 2132 .klo_assertlk = klist_rwlock_assertlk, 2133 .klo_lock = klist_rwlock_lock, 2134 .klo_unlock = klist_rwlock_unlock, 2135 }; 2136 2137 void 2138 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2139 { 2140 klist_init(klist, &rwlock_klistops, rwl); 2141 } 2142