1 /* $NetBSD: kern_event.c,v 1.61 2009/01/11 02:45:52 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * 54 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp 55 */ 56 57 #include <sys/cdefs.h> 58 __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.61 2009/01/11 02:45:52 christos Exp $"); 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/proc.h> 64 #include <sys/file.h> 65 #include <sys/select.h> 66 #include <sys/queue.h> 67 #include <sys/event.h> 68 #include <sys/eventvar.h> 69 #include <sys/poll.h> 70 #include <sys/kmem.h> 71 #include <sys/stat.h> 72 #include <sys/filedesc.h> 73 #include <sys/syscallargs.h> 74 #include <sys/kauth.h> 75 #include <sys/conf.h> 76 #include <sys/atomic.h> 77 78 static int kqueue_scan(file_t *, size_t, struct kevent *, 79 const struct timespec *, register_t *, 80 const struct kevent_ops *, struct kevent *, 81 size_t); 82 static int kqueue_ioctl(file_t *, u_long, void *); 83 static int kqueue_fcntl(file_t *, u_int, void *); 84 static int kqueue_poll(file_t *, int); 85 static int kqueue_kqfilter(file_t *, struct knote *); 86 static int kqueue_stat(file_t *, struct stat *); 87 static int kqueue_close(file_t *); 88 static int kqueue_register(struct kqueue *, struct kevent *); 89 static void kqueue_doclose(struct kqueue *, struct klist *, int); 90 91 static void knote_detach(struct knote *, filedesc_t *fdp, bool); 92 static void knote_enqueue(struct knote *); 93 static void knote_activate(struct knote *); 94 95 static void filt_kqdetach(struct knote *); 96 static int filt_kqueue(struct knote *, long hint); 97 static int filt_procattach(struct knote *); 98 static void filt_procdetach(struct knote *); 99 static int filt_proc(struct knote *, long hint); 100 static int filt_fileattach(struct knote *); 101 static void filt_timerexpire(void *x); 102 static int filt_timerattach(struct knote *); 103 static void filt_timerdetach(struct knote *); 104 static int filt_timer(struct knote *, long hint); 105 106 static const struct fileops kqueueops = { 107 (void *)enxio, (void *)enxio, kqueue_ioctl, kqueue_fcntl, kqueue_poll, 108 kqueue_stat, kqueue_close, kqueue_kqfilter 109 }; 110 111 static const struct filterops kqread_filtops = 112 { 1, NULL, filt_kqdetach, filt_kqueue }; 113 static const struct filterops proc_filtops = 114 { 0, filt_procattach, filt_procdetach, filt_proc }; 115 static const struct filterops file_filtops = 116 { 1, filt_fileattach, NULL, NULL }; 117 static const struct filterops timer_filtops = 118 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 119 120 static u_int kq_ncallouts = 0; 121 static int kq_calloutmax = (4 * 1024); 122 123 #define KN_HASHSIZE 64 /* XXX should be tunable */ 124 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 125 126 extern const struct filterops sig_filtops; 127 128 /* 129 * Table for for all system-defined filters. 130 * These should be listed in the numeric order of the EVFILT_* defines. 131 * If filtops is NULL, the filter isn't implemented in NetBSD. 132 * End of list is when name is NULL. 133 * 134 * Note that 'refcnt' is meaningless for built-in filters. 135 */ 136 struct kfilter { 137 const char *name; /* name of filter */ 138 uint32_t filter; /* id of filter */ 139 unsigned refcnt; /* reference count */ 140 const struct filterops *filtops;/* operations for filter */ 141 size_t namelen; /* length of name string */ 142 }; 143 144 /* System defined filters */ 145 static struct kfilter sys_kfilters[] = { 146 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 }, 147 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, }, 148 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 }, 149 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 }, 150 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 }, 151 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 }, 152 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, 153 { NULL, 0, 0, NULL, 0 }, 154 }; 155 156 /* User defined kfilters */ 157 static struct kfilter *user_kfilters; /* array */ 158 static int user_kfilterc; /* current offset */ 159 static int user_kfiltermaxc; /* max size so far */ 160 static size_t user_kfiltersz; /* size of allocated memory */ 161 162 /* Locks */ 163 static krwlock_t kqueue_filter_lock; /* lock on filter lists */ 164 static kmutex_t kqueue_misc_lock; /* miscellaneous */ 165 166 /* 167 * Initialize the kqueue subsystem. 168 */ 169 void 170 kqueue_init(void) 171 { 172 173 rw_init(&kqueue_filter_lock); 174 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE); 175 } 176 177 /* 178 * Find kfilter entry by name, or NULL if not found. 179 */ 180 static struct kfilter * 181 kfilter_byname_sys(const char *name) 182 { 183 int i; 184 185 KASSERT(rw_lock_held(&kqueue_filter_lock)); 186 187 for (i = 0; sys_kfilters[i].name != NULL; i++) { 188 if (strcmp(name, sys_kfilters[i].name) == 0) 189 return &sys_kfilters[i]; 190 } 191 return NULL; 192 } 193 194 static struct kfilter * 195 kfilter_byname_user(const char *name) 196 { 197 int i; 198 199 KASSERT(rw_lock_held(&kqueue_filter_lock)); 200 201 /* user filter slots have a NULL name if previously deregistered */ 202 for (i = 0; i < user_kfilterc ; i++) { 203 if (user_kfilters[i].name != NULL && 204 strcmp(name, user_kfilters[i].name) == 0) 205 return &user_kfilters[i]; 206 } 207 return NULL; 208 } 209 210 static struct kfilter * 211 kfilter_byname(const char *name) 212 { 213 struct kfilter *kfilter; 214 215 KASSERT(rw_lock_held(&kqueue_filter_lock)); 216 217 if ((kfilter = kfilter_byname_sys(name)) != NULL) 218 return kfilter; 219 220 return kfilter_byname_user(name); 221 } 222 223 /* 224 * Find kfilter entry by filter id, or NULL if not found. 225 * Assumes entries are indexed in filter id order, for speed. 226 */ 227 static struct kfilter * 228 kfilter_byfilter(uint32_t filter) 229 { 230 struct kfilter *kfilter; 231 232 KASSERT(rw_lock_held(&kqueue_filter_lock)); 233 234 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 235 kfilter = &sys_kfilters[filter]; 236 else if (user_kfilters != NULL && 237 filter < EVFILT_SYSCOUNT + user_kfilterc) 238 /* it's a user filter */ 239 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 240 else 241 return (NULL); /* out of range */ 242 KASSERT(kfilter->filter == filter); /* sanity check! */ 243 return (kfilter); 244 } 245 246 /* 247 * Register a new kfilter. Stores the entry in user_kfilters. 248 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 249 * If retfilter != NULL, the new filterid is returned in it. 250 */ 251 int 252 kfilter_register(const char *name, const struct filterops *filtops, 253 int *retfilter) 254 { 255 struct kfilter *kfilter; 256 size_t len; 257 int i; 258 259 if (name == NULL || name[0] == '\0' || filtops == NULL) 260 return (EINVAL); /* invalid args */ 261 262 rw_enter(&kqueue_filter_lock, RW_WRITER); 263 if (kfilter_byname(name) != NULL) { 264 rw_exit(&kqueue_filter_lock); 265 return (EEXIST); /* already exists */ 266 } 267 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) { 268 rw_exit(&kqueue_filter_lock); 269 return (EINVAL); /* too many */ 270 } 271 272 for (i = 0; i < user_kfilterc; i++) { 273 kfilter = &user_kfilters[i]; 274 if (kfilter->name == NULL) { 275 /* Previously deregistered slot. Reuse. */ 276 goto reuse; 277 } 278 } 279 280 /* check if need to grow user_kfilters */ 281 if (user_kfilterc + 1 > user_kfiltermaxc) { 282 /* Grow in KFILTER_EXTENT chunks. */ 283 user_kfiltermaxc += KFILTER_EXTENT; 284 len = user_kfiltermaxc * sizeof(struct filter *); 285 kfilter = kmem_alloc(len, KM_SLEEP); 286 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz); 287 if (user_kfilters != NULL) { 288 memcpy(kfilter, user_kfilters, user_kfiltersz); 289 kmem_free(user_kfilters, user_kfiltersz); 290 } 291 user_kfiltersz = len; 292 user_kfilters = kfilter; 293 } 294 /* Adding new slot */ 295 kfilter = &user_kfilters[user_kfilterc++]; 296 reuse: 297 kfilter->namelen = strlen(name) + 1; 298 kfilter->name = kmem_alloc(kfilter->namelen, KM_SLEEP); 299 memcpy(__UNCONST(kfilter->name), name, kfilter->namelen); 300 301 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT; 302 303 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP); 304 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops)); 305 306 if (retfilter != NULL) 307 *retfilter = kfilter->filter; 308 rw_exit(&kqueue_filter_lock); 309 310 return (0); 311 } 312 313 /* 314 * Unregister a kfilter previously registered with kfilter_register. 315 * This retains the filter id, but clears the name and frees filtops (filter 316 * operations), so that the number isn't reused during a boot. 317 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 318 */ 319 int 320 kfilter_unregister(const char *name) 321 { 322 struct kfilter *kfilter; 323 324 if (name == NULL || name[0] == '\0') 325 return (EINVAL); /* invalid name */ 326 327 rw_enter(&kqueue_filter_lock, RW_WRITER); 328 if (kfilter_byname_sys(name) != NULL) { 329 rw_exit(&kqueue_filter_lock); 330 return (EINVAL); /* can't detach system filters */ 331 } 332 333 kfilter = kfilter_byname_user(name); 334 if (kfilter == NULL) { 335 rw_exit(&kqueue_filter_lock); 336 return (ENOENT); 337 } 338 if (kfilter->refcnt != 0) { 339 rw_exit(&kqueue_filter_lock); 340 return (EBUSY); 341 } 342 343 /* Cast away const (but we know it's safe. */ 344 kmem_free(__UNCONST(kfilter->name), kfilter->namelen); 345 kfilter->name = NULL; /* mark as `not implemented' */ 346 347 if (kfilter->filtops != NULL) { 348 /* Cast away const (but we know it's safe. */ 349 kmem_free(__UNCONST(kfilter->filtops), 350 sizeof(*kfilter->filtops)); 351 kfilter->filtops = NULL; /* mark as `not implemented' */ 352 } 353 rw_exit(&kqueue_filter_lock); 354 355 return (0); 356 } 357 358 359 /* 360 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 361 * descriptors. Calls fileops kqfilter method for given file descriptor. 362 */ 363 static int 364 filt_fileattach(struct knote *kn) 365 { 366 file_t *fp; 367 368 fp = kn->kn_obj; 369 370 return (*fp->f_ops->fo_kqfilter)(fp, kn); 371 } 372 373 /* 374 * Filter detach method for EVFILT_READ on kqueue descriptor. 375 */ 376 static void 377 filt_kqdetach(struct knote *kn) 378 { 379 struct kqueue *kq; 380 381 kq = ((file_t *)kn->kn_obj)->f_data; 382 383 mutex_spin_enter(&kq->kq_lock); 384 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 385 mutex_spin_exit(&kq->kq_lock); 386 } 387 388 /* 389 * Filter event method for EVFILT_READ on kqueue descriptor. 390 */ 391 /*ARGSUSED*/ 392 static int 393 filt_kqueue(struct knote *kn, long hint) 394 { 395 struct kqueue *kq; 396 int rv; 397 398 kq = ((file_t *)kn->kn_obj)->f_data; 399 400 if (hint != NOTE_SUBMIT) 401 mutex_spin_enter(&kq->kq_lock); 402 kn->kn_data = kq->kq_count; 403 rv = (kn->kn_data > 0); 404 if (hint != NOTE_SUBMIT) 405 mutex_spin_exit(&kq->kq_lock); 406 407 return rv; 408 } 409 410 /* 411 * Filter attach method for EVFILT_PROC. 412 */ 413 static int 414 filt_procattach(struct knote *kn) 415 { 416 struct proc *p, *curp; 417 struct lwp *curl; 418 419 curl = curlwp; 420 curp = curl->l_proc; 421 422 mutex_enter(proc_lock); 423 p = p_find(kn->kn_id, PFIND_LOCKED); 424 if (p == NULL) { 425 mutex_exit(proc_lock); 426 return ESRCH; 427 } 428 429 /* 430 * Fail if it's not owned by you, or the last exec gave us 431 * setuid/setgid privs (unless you're root). 432 */ 433 mutex_enter(p->p_lock); 434 mutex_exit(proc_lock); 435 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER, 436 p, NULL, NULL, NULL) != 0) { 437 mutex_exit(p->p_lock); 438 return EACCES; 439 } 440 441 kn->kn_obj = p; 442 kn->kn_flags |= EV_CLEAR; /* automatically set */ 443 444 /* 445 * internal flag indicating registration done by kernel 446 */ 447 if (kn->kn_flags & EV_FLAG1) { 448 kn->kn_data = kn->kn_sdata; /* ppid */ 449 kn->kn_fflags = NOTE_CHILD; 450 kn->kn_flags &= ~EV_FLAG1; 451 } 452 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 453 mutex_exit(p->p_lock); 454 455 return 0; 456 } 457 458 /* 459 * Filter detach method for EVFILT_PROC. 460 * 461 * The knote may be attached to a different process, which may exit, 462 * leaving nothing for the knote to be attached to. So when the process 463 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 464 * it will be deleted when read out. However, as part of the knote deletion, 465 * this routine is called, so a check is needed to avoid actually performing 466 * a detach, because the original process might not exist any more. 467 */ 468 static void 469 filt_procdetach(struct knote *kn) 470 { 471 struct proc *p; 472 473 if (kn->kn_status & KN_DETACHED) 474 return; 475 476 p = kn->kn_obj; 477 478 mutex_enter(p->p_lock); 479 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 480 mutex_exit(p->p_lock); 481 } 482 483 /* 484 * Filter event method for EVFILT_PROC. 485 */ 486 static int 487 filt_proc(struct knote *kn, long hint) 488 { 489 u_int event, fflag; 490 struct kevent kev; 491 struct kqueue *kq; 492 int error; 493 494 event = (u_int)hint & NOTE_PCTRLMASK; 495 kq = kn->kn_kq; 496 fflag = 0; 497 498 /* If the user is interested in this event, record it. */ 499 if (kn->kn_sfflags & event) 500 fflag |= event; 501 502 if (event == NOTE_EXIT) { 503 /* 504 * Process is gone, so flag the event as finished. 505 * 506 * Detach the knote from watched process and mark 507 * it as such. We can't leave this to kqueue_scan(), 508 * since the process might not exist by then. And we 509 * have to do this now, since psignal KNOTE() is called 510 * also for zombies and we might end up reading freed 511 * memory if the kevent would already be picked up 512 * and knote g/c'ed. 513 */ 514 filt_procdetach(kn); 515 516 mutex_spin_enter(&kq->kq_lock); 517 kn->kn_status |= KN_DETACHED; 518 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 519 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 520 kn->kn_fflags |= fflag; 521 mutex_spin_exit(&kq->kq_lock); 522 523 return 1; 524 } 525 526 mutex_spin_enter(&kq->kq_lock); 527 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 528 /* 529 * Process forked, and user wants to track the new process, 530 * so attach a new knote to it, and immediately report an 531 * event with the parent's pid. Register knote with new 532 * process. 533 */ 534 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 535 kev.filter = kn->kn_filter; 536 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 537 kev.fflags = kn->kn_sfflags; 538 kev.data = kn->kn_id; /* parent */ 539 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 540 mutex_spin_exit(&kq->kq_lock); 541 error = kqueue_register(kq, &kev); 542 mutex_spin_enter(&kq->kq_lock); 543 if (error != 0) 544 kn->kn_fflags |= NOTE_TRACKERR; 545 } 546 kn->kn_fflags |= fflag; 547 fflag = kn->kn_fflags; 548 mutex_spin_exit(&kq->kq_lock); 549 550 return fflag != 0; 551 } 552 553 static void 554 filt_timerexpire(void *knx) 555 { 556 struct knote *kn = knx; 557 int tticks; 558 559 mutex_enter(&kqueue_misc_lock); 560 kn->kn_data++; 561 knote_activate(kn); 562 if ((kn->kn_flags & EV_ONESHOT) == 0) { 563 tticks = mstohz(kn->kn_sdata); 564 callout_schedule((callout_t *)kn->kn_hook, tticks); 565 } 566 mutex_exit(&kqueue_misc_lock); 567 } 568 569 /* 570 * data contains amount of time to sleep, in milliseconds 571 */ 572 static int 573 filt_timerattach(struct knote *kn) 574 { 575 callout_t *calloutp; 576 struct kqueue *kq; 577 int tticks; 578 579 tticks = mstohz(kn->kn_sdata); 580 581 /* if the supplied value is under our resolution, use 1 tick */ 582 if (tticks == 0) { 583 if (kn->kn_sdata == 0) 584 return EINVAL; 585 tticks = 1; 586 } 587 588 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax || 589 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) { 590 atomic_dec_uint(&kq_ncallouts); 591 return ENOMEM; 592 } 593 callout_init(calloutp, CALLOUT_MPSAFE); 594 595 kq = kn->kn_kq; 596 mutex_spin_enter(&kq->kq_lock); 597 kn->kn_flags |= EV_CLEAR; /* automatically set */ 598 kn->kn_hook = calloutp; 599 mutex_spin_exit(&kq->kq_lock); 600 601 callout_reset(calloutp, tticks, filt_timerexpire, kn); 602 603 return (0); 604 } 605 606 static void 607 filt_timerdetach(struct knote *kn) 608 { 609 callout_t *calloutp; 610 611 calloutp = (callout_t *)kn->kn_hook; 612 callout_halt(calloutp, NULL); 613 callout_destroy(calloutp); 614 kmem_free(calloutp, sizeof(*calloutp)); 615 atomic_dec_uint(&kq_ncallouts); 616 } 617 618 static int 619 filt_timer(struct knote *kn, long hint) 620 { 621 int rv; 622 623 mutex_enter(&kqueue_misc_lock); 624 rv = (kn->kn_data != 0); 625 mutex_exit(&kqueue_misc_lock); 626 627 return rv; 628 } 629 630 /* 631 * filt_seltrue: 632 * 633 * This filter "event" routine simulates seltrue(). 634 */ 635 int 636 filt_seltrue(struct knote *kn, long hint) 637 { 638 639 /* 640 * We don't know how much data can be read/written, 641 * but we know that it *can* be. This is about as 642 * good as select/poll does as well. 643 */ 644 kn->kn_data = 0; 645 return (1); 646 } 647 648 /* 649 * This provides full kqfilter entry for device switch tables, which 650 * has same effect as filter using filt_seltrue() as filter method. 651 */ 652 static void 653 filt_seltruedetach(struct knote *kn) 654 { 655 /* Nothing to do */ 656 } 657 658 const struct filterops seltrue_filtops = 659 { 1, NULL, filt_seltruedetach, filt_seltrue }; 660 661 int 662 seltrue_kqfilter(dev_t dev, struct knote *kn) 663 { 664 switch (kn->kn_filter) { 665 case EVFILT_READ: 666 case EVFILT_WRITE: 667 kn->kn_fop = &seltrue_filtops; 668 break; 669 default: 670 return (EINVAL); 671 } 672 673 /* Nothing more to do */ 674 return (0); 675 } 676 677 /* 678 * kqueue(2) system call. 679 */ 680 int 681 sys_kqueue(struct lwp *l, const void *v, register_t *retval) 682 { 683 struct kqueue *kq; 684 file_t *fp; 685 int fd, error; 686 687 if ((error = fd_allocfile(&fp, &fd)) != 0) 688 return error; 689 fp->f_flag = FREAD | FWRITE; 690 fp->f_type = DTYPE_KQUEUE; 691 fp->f_ops = &kqueueops; 692 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP); 693 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED); 694 cv_init(&kq->kq_cv, "kqueue"); 695 selinit(&kq->kq_sel); 696 TAILQ_INIT(&kq->kq_head); 697 fp->f_data = kq; 698 *retval = fd; 699 kq->kq_fdp = curlwp->l_fd; 700 fd_affix(curproc, fp, fd); 701 return error; 702 } 703 704 /* 705 * kevent(2) system call. 706 */ 707 int 708 kevent_fetch_changes(void *private, const struct kevent *changelist, 709 struct kevent *changes, size_t index, int n) 710 { 711 712 return copyin(changelist + index, changes, n * sizeof(*changes)); 713 } 714 715 int 716 kevent_put_events(void *private, struct kevent *events, 717 struct kevent *eventlist, size_t index, int n) 718 { 719 720 return copyout(events, eventlist + index, n * sizeof(*events)); 721 } 722 723 static const struct kevent_ops kevent_native_ops = { 724 .keo_private = NULL, 725 .keo_fetch_timeout = copyin, 726 .keo_fetch_changes = kevent_fetch_changes, 727 .keo_put_events = kevent_put_events, 728 }; 729 730 int 731 sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap, 732 register_t *retval) 733 { 734 /* { 735 syscallarg(int) fd; 736 syscallarg(const struct kevent *) changelist; 737 syscallarg(size_t) nchanges; 738 syscallarg(struct kevent *) eventlist; 739 syscallarg(size_t) nevents; 740 syscallarg(const struct timespec *) timeout; 741 } */ 742 743 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), 744 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), 745 SCARG(uap, timeout), &kevent_native_ops); 746 } 747 748 int 749 kevent1(register_t *retval, int fd, 750 const struct kevent *changelist, size_t nchanges, 751 struct kevent *eventlist, size_t nevents, 752 const struct timespec *timeout, 753 const struct kevent_ops *keops) 754 { 755 struct kevent *kevp; 756 struct kqueue *kq; 757 struct timespec ts; 758 size_t i, n, ichange; 759 int nerrors, error; 760 struct kevent kevbuf[8]; /* approx 300 bytes on 64-bit */ 761 file_t *fp; 762 763 /* check that we're dealing with a kq */ 764 fp = fd_getfile(fd); 765 if (fp == NULL) 766 return (EBADF); 767 768 if (fp->f_type != DTYPE_KQUEUE) { 769 fd_putfile(fd); 770 return (EBADF); 771 } 772 773 if (timeout != NULL) { 774 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts)); 775 if (error) 776 goto done; 777 timeout = &ts; 778 } 779 780 kq = (struct kqueue *)fp->f_data; 781 nerrors = 0; 782 ichange = 0; 783 784 /* traverse list of events to register */ 785 while (nchanges > 0) { 786 n = MIN(nchanges, __arraycount(kevbuf)); 787 error = (*keops->keo_fetch_changes)(keops->keo_private, 788 changelist, kevbuf, ichange, n); 789 if (error) 790 goto done; 791 for (i = 0; i < n; i++) { 792 kevp = &kevbuf[i]; 793 kevp->flags &= ~EV_SYSFLAGS; 794 /* register each knote */ 795 error = kqueue_register(kq, kevp); 796 if (error) { 797 if (nevents != 0) { 798 kevp->flags = EV_ERROR; 799 kevp->data = error; 800 error = (*keops->keo_put_events) 801 (keops->keo_private, kevp, 802 eventlist, nerrors, 1); 803 if (error) 804 goto done; 805 nevents--; 806 nerrors++; 807 } else { 808 goto done; 809 } 810 } 811 } 812 nchanges -= n; /* update the results */ 813 ichange += n; 814 } 815 if (nerrors) { 816 *retval = nerrors; 817 error = 0; 818 goto done; 819 } 820 821 /* actually scan through the events */ 822 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops, 823 kevbuf, __arraycount(kevbuf)); 824 done: 825 fd_putfile(fd); 826 return (error); 827 } 828 829 /* 830 * Register a given kevent kev onto the kqueue 831 */ 832 static int 833 kqueue_register(struct kqueue *kq, struct kevent *kev) 834 { 835 struct kfilter *kfilter; 836 filedesc_t *fdp; 837 file_t *fp; 838 fdfile_t *ff; 839 struct knote *kn, *newkn; 840 struct klist *list; 841 int error, fd, rv; 842 843 fdp = kq->kq_fdp; 844 fp = NULL; 845 kn = NULL; 846 error = 0; 847 fd = 0; 848 849 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP); 850 851 rw_enter(&kqueue_filter_lock, RW_READER); 852 kfilter = kfilter_byfilter(kev->filter); 853 if (kfilter == NULL || kfilter->filtops == NULL) { 854 /* filter not found nor implemented */ 855 rw_exit(&kqueue_filter_lock); 856 kmem_free(newkn, sizeof(*newkn)); 857 return (EINVAL); 858 } 859 860 mutex_enter(&fdp->fd_lock); 861 862 /* search if knote already exists */ 863 if (kfilter->filtops->f_isfd) { 864 /* monitoring a file descriptor */ 865 fd = kev->ident; 866 if ((fp = fd_getfile(fd)) == NULL) { 867 mutex_exit(&fdp->fd_lock); 868 rw_exit(&kqueue_filter_lock); 869 kmem_free(newkn, sizeof(*newkn)); 870 return EBADF; 871 } 872 ff = fdp->fd_ofiles[fd]; 873 if (fd <= fdp->fd_lastkqfile) { 874 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { 875 if (kq == kn->kn_kq && 876 kev->filter == kn->kn_filter) 877 break; 878 } 879 } 880 } else { 881 /* 882 * not monitoring a file descriptor, so 883 * lookup knotes in internal hash table 884 */ 885 if (fdp->fd_knhashmask != 0) { 886 list = &fdp->fd_knhash[ 887 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 888 SLIST_FOREACH(kn, list, kn_link) { 889 if (kev->ident == kn->kn_id && 890 kq == kn->kn_kq && 891 kev->filter == kn->kn_filter) 892 break; 893 } 894 } 895 } 896 897 /* 898 * kn now contains the matching knote, or NULL if no match 899 */ 900 if (kev->flags & EV_ADD) { 901 if (kn == NULL) { 902 /* create new knote */ 903 kn = newkn; 904 newkn = NULL; 905 kn->kn_obj = fp; 906 kn->kn_kq = kq; 907 kn->kn_fop = kfilter->filtops; 908 kn->kn_kfilter = kfilter; 909 kn->kn_sfflags = kev->fflags; 910 kn->kn_sdata = kev->data; 911 kev->fflags = 0; 912 kev->data = 0; 913 kn->kn_kevent = *kev; 914 915 /* 916 * apply reference count to knote structure, and 917 * do not release it at the end of this routine. 918 */ 919 fp = NULL; 920 921 if (!kn->kn_fop->f_isfd) { 922 /* 923 * If knote is not on an fd, store on 924 * internal hash table. 925 */ 926 if (fdp->fd_knhashmask == 0) { 927 /* XXXAD can block with fd_lock held */ 928 fdp->fd_knhash = hashinit(KN_HASHSIZE, 929 HASH_LIST, true, 930 &fdp->fd_knhashmask); 931 } 932 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, 933 fdp->fd_knhashmask)]; 934 } else { 935 /* Otherwise, knote is on an fd. */ 936 list = (struct klist *) 937 &fdp->fd_ofiles[kn->kn_id]->ff_knlist; 938 if ((int)kn->kn_id > fdp->fd_lastkqfile) 939 fdp->fd_lastkqfile = kn->kn_id; 940 } 941 SLIST_INSERT_HEAD(list, kn, kn_link); 942 943 KERNEL_LOCK(1, NULL); /* XXXSMP */ 944 error = (*kfilter->filtops->f_attach)(kn); 945 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 946 if (error != 0) { 947 /* knote_detach() drops fdp->fd_lock */ 948 knote_detach(kn, fdp, false); 949 goto done; 950 } 951 atomic_inc_uint(&kfilter->refcnt); 952 } else { 953 /* 954 * The user may change some filter values after the 955 * initial EV_ADD, but doing so will not reset any 956 * filter which have already been triggered. 957 */ 958 kn->kn_sfflags = kev->fflags; 959 kn->kn_sdata = kev->data; 960 kn->kn_kevent.udata = kev->udata; 961 } 962 KERNEL_LOCK(1, NULL); /* XXXSMP */ 963 rv = (*kn->kn_fop->f_event)(kn, 0); 964 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 965 if (rv) 966 knote_activate(kn); 967 } else { 968 if (kn == NULL) { 969 error = ENOENT; 970 mutex_exit(&fdp->fd_lock); 971 goto done; 972 } 973 if (kev->flags & EV_DELETE) { 974 /* knote_detach() drops fdp->fd_lock */ 975 knote_detach(kn, fdp, true); 976 goto done; 977 } 978 } 979 980 /* disable knote */ 981 if ((kev->flags & EV_DISABLE)) { 982 mutex_spin_enter(&kq->kq_lock); 983 if ((kn->kn_status & KN_DISABLED) == 0) 984 kn->kn_status |= KN_DISABLED; 985 mutex_spin_exit(&kq->kq_lock); 986 } 987 988 /* enable knote */ 989 if ((kev->flags & EV_ENABLE)) { 990 knote_enqueue(kn); 991 } 992 mutex_exit(&fdp->fd_lock); 993 done: 994 rw_exit(&kqueue_filter_lock); 995 if (newkn != NULL) 996 kmem_free(newkn, sizeof(*newkn)); 997 if (fp != NULL) 998 fd_putfile(fd); 999 return (error); 1000 } 1001 1002 #if defined(DEBUG) 1003 static void 1004 kq_check(struct kqueue *kq) 1005 { 1006 const struct knote *kn; 1007 int count; 1008 int nmarker; 1009 1010 KASSERT(mutex_owned(&kq->kq_lock)); 1011 KASSERT(kq->kq_count >= 0); 1012 1013 count = 0; 1014 nmarker = 0; 1015 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1016 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) { 1017 panic("%s: kq=%p kn=%p inconsist 1", __func__, kq, kn); 1018 } 1019 if ((kn->kn_status & KN_MARKER) == 0) { 1020 if (kn->kn_kq != kq) { 1021 panic("%s: kq=%p kn=%p inconsist 2", 1022 __func__, kq, kn); 1023 } 1024 if ((kn->kn_status & KN_ACTIVE) == 0) { 1025 panic("%s: kq=%p kn=%p: not active", 1026 __func__, kq, kn); 1027 } 1028 count++; 1029 if (count > kq->kq_count) { 1030 goto bad; 1031 } 1032 } else { 1033 nmarker++; 1034 #if 0 1035 if (nmarker > 10000) { 1036 panic("%s: kq=%p too many markers: %d != %d, " 1037 "nmarker=%d", 1038 __func__, kq, kq->kq_count, count, nmarker); 1039 } 1040 #endif 1041 } 1042 } 1043 if (kq->kq_count != count) { 1044 bad: 1045 panic("%s: kq=%p inconsist 3: %d != %d, nmarker=%d", 1046 __func__, kq, kq->kq_count, count, nmarker); 1047 } 1048 } 1049 #else /* defined(DEBUG) */ 1050 #define kq_check(a) /* nothing */ 1051 #endif /* defined(DEBUG) */ 1052 1053 /* 1054 * Scan through the list of events on fp (for a maximum of maxevents), 1055 * returning the results in to ulistp. Timeout is determined by tsp; if 1056 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 1057 * as appropriate. 1058 */ 1059 static int 1060 kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp, 1061 const struct timespec *tsp, register_t *retval, 1062 const struct kevent_ops *keops, struct kevent *kevbuf, 1063 size_t kevcnt) 1064 { 1065 struct kqueue *kq; 1066 struct kevent *kevp; 1067 struct timeval atv, sleeptv; 1068 struct knote *kn, *marker; 1069 size_t count, nkev, nevents; 1070 int timeout, error, rv; 1071 filedesc_t *fdp; 1072 1073 fdp = curlwp->l_fd; 1074 kq = fp->f_data; 1075 count = maxevents; 1076 nkev = nevents = error = 0; 1077 if (count == 0) { 1078 *retval = 0; 1079 return 0; 1080 } 1081 1082 if (tsp) { /* timeout supplied */ 1083 TIMESPEC_TO_TIMEVAL(&atv, tsp); 1084 if (inittimeleft(&atv, &sleeptv) == -1) { 1085 *retval = maxevents; 1086 return EINVAL; 1087 } 1088 timeout = tvtohz(&atv); 1089 if (timeout <= 0) 1090 timeout = -1; /* do poll */ 1091 } else { 1092 /* no timeout, wait forever */ 1093 timeout = 0; 1094 } 1095 1096 marker = kmem_zalloc(sizeof(*marker), KM_SLEEP); 1097 marker->kn_status = KN_MARKER; 1098 mutex_spin_enter(&kq->kq_lock); 1099 retry: 1100 kevp = kevbuf; 1101 if (kq->kq_count == 0) { 1102 if (timeout >= 0) { 1103 error = cv_timedwait_sig(&kq->kq_cv, 1104 &kq->kq_lock, timeout); 1105 if (error == 0) { 1106 if (tsp == NULL || (timeout = 1107 gettimeleft(&atv, &sleeptv)) > 0) 1108 goto retry; 1109 } else { 1110 /* don't restart after signals... */ 1111 if (error == ERESTART) 1112 error = EINTR; 1113 if (error == EWOULDBLOCK) 1114 error = 0; 1115 } 1116 } 1117 } else { 1118 /* mark end of knote list */ 1119 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1120 1121 while (count != 0) { 1122 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 1123 while ((kn->kn_status & KN_MARKER) != 0) { 1124 if (kn == marker) { 1125 /* it's our marker, stop */ 1126 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1127 if (count < maxevents || (tsp != NULL && 1128 (timeout = gettimeleft(&atv, 1129 &sleeptv)) <= 0)) 1130 goto done; 1131 goto retry; 1132 } 1133 /* someone else's marker. */ 1134 kn = TAILQ_NEXT(kn, kn_tqe); 1135 } 1136 kq_check(kq); 1137 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1138 kq->kq_count--; 1139 kn->kn_status &= ~KN_QUEUED; 1140 kq_check(kq); 1141 if (kn->kn_status & KN_DISABLED) { 1142 /* don't want disabled events */ 1143 continue; 1144 } 1145 if ((kn->kn_flags & EV_ONESHOT) == 0) { 1146 mutex_spin_exit(&kq->kq_lock); 1147 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1148 rv = (*kn->kn_fop->f_event)(kn, 0); 1149 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1150 mutex_spin_enter(&kq->kq_lock); 1151 /* Re-poll if note was re-enqueued. */ 1152 if ((kn->kn_status & KN_QUEUED) != 0) 1153 continue; 1154 if (rv == 0) { 1155 /* 1156 * non-ONESHOT event that hasn't 1157 * triggered again, so de-queue. 1158 */ 1159 kn->kn_status &= ~KN_ACTIVE; 1160 continue; 1161 } 1162 } 1163 /* XXXAD should be got from f_event if !oneshot. */ 1164 *kevp++ = kn->kn_kevent; 1165 nkev++; 1166 if (kn->kn_flags & EV_ONESHOT) { 1167 /* delete ONESHOT events after retrieval */ 1168 mutex_spin_exit(&kq->kq_lock); 1169 mutex_enter(&fdp->fd_lock); 1170 knote_detach(kn, fdp, true); 1171 mutex_spin_enter(&kq->kq_lock); 1172 } else if (kn->kn_flags & EV_CLEAR) { 1173 /* clear state after retrieval */ 1174 kn->kn_data = 0; 1175 kn->kn_fflags = 0; 1176 kn->kn_status &= ~KN_ACTIVE; 1177 } else { 1178 /* add event back on list */ 1179 kq_check(kq); 1180 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1181 kq->kq_count++; 1182 kn->kn_status |= KN_QUEUED; 1183 kq_check(kq); 1184 } 1185 if (nkev == kevcnt) { 1186 /* do copyouts in kevcnt chunks */ 1187 mutex_spin_exit(&kq->kq_lock); 1188 error = (*keops->keo_put_events) 1189 (keops->keo_private, 1190 kevbuf, ulistp, nevents, nkev); 1191 mutex_spin_enter(&kq->kq_lock); 1192 nevents += nkev; 1193 nkev = 0; 1194 kevp = kevbuf; 1195 } 1196 count--; 1197 if (error != 0 || count == 0) { 1198 /* remove marker */ 1199 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 1200 break; 1201 } 1202 } 1203 } 1204 done: 1205 mutex_spin_exit(&kq->kq_lock); 1206 if (marker != NULL) 1207 kmem_free(marker, sizeof(*marker)); 1208 if (nkev != 0) { 1209 /* copyout remaining events */ 1210 error = (*keops->keo_put_events)(keops->keo_private, 1211 kevbuf, ulistp, nevents, nkev); 1212 } 1213 *retval = maxevents - count; 1214 1215 return error; 1216 } 1217 1218 /* 1219 * fileops ioctl method for a kqueue descriptor. 1220 * 1221 * Two ioctls are currently supported. They both use struct kfilter_mapping: 1222 * KFILTER_BYNAME find name for filter, and return result in 1223 * name, which is of size len. 1224 * KFILTER_BYFILTER find filter for name. len is ignored. 1225 */ 1226 /*ARGSUSED*/ 1227 static int 1228 kqueue_ioctl(file_t *fp, u_long com, void *data) 1229 { 1230 struct kfilter_mapping *km; 1231 const struct kfilter *kfilter; 1232 char *name; 1233 int error; 1234 1235 km = data; 1236 error = 0; 1237 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP); 1238 1239 switch (com) { 1240 case KFILTER_BYFILTER: /* convert filter -> name */ 1241 rw_enter(&kqueue_filter_lock, RW_READER); 1242 kfilter = kfilter_byfilter(km->filter); 1243 if (kfilter != NULL) { 1244 strlcpy(name, kfilter->name, KFILTER_MAXNAME); 1245 rw_exit(&kqueue_filter_lock); 1246 error = copyoutstr(name, km->name, km->len, NULL); 1247 } else { 1248 rw_exit(&kqueue_filter_lock); 1249 error = ENOENT; 1250 } 1251 break; 1252 1253 case KFILTER_BYNAME: /* convert name -> filter */ 1254 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1255 if (error) { 1256 break; 1257 } 1258 rw_enter(&kqueue_filter_lock, RW_READER); 1259 kfilter = kfilter_byname(name); 1260 if (kfilter != NULL) 1261 km->filter = kfilter->filter; 1262 else 1263 error = ENOENT; 1264 rw_exit(&kqueue_filter_lock); 1265 break; 1266 1267 default: 1268 error = ENOTTY; 1269 break; 1270 1271 } 1272 kmem_free(name, KFILTER_MAXNAME); 1273 return (error); 1274 } 1275 1276 /* 1277 * fileops fcntl method for a kqueue descriptor. 1278 */ 1279 static int 1280 kqueue_fcntl(file_t *fp, u_int com, void *data) 1281 { 1282 1283 return (ENOTTY); 1284 } 1285 1286 /* 1287 * fileops poll method for a kqueue descriptor. 1288 * Determine if kqueue has events pending. 1289 */ 1290 static int 1291 kqueue_poll(file_t *fp, int events) 1292 { 1293 struct kqueue *kq; 1294 int revents; 1295 1296 kq = fp->f_data; 1297 1298 revents = 0; 1299 if (events & (POLLIN | POLLRDNORM)) { 1300 mutex_spin_enter(&kq->kq_lock); 1301 if (kq->kq_count != 0) { 1302 revents |= events & (POLLIN | POLLRDNORM); 1303 } else { 1304 selrecord(curlwp, &kq->kq_sel); 1305 } 1306 kq_check(kq); 1307 mutex_spin_exit(&kq->kq_lock); 1308 } 1309 1310 return revents; 1311 } 1312 1313 /* 1314 * fileops stat method for a kqueue descriptor. 1315 * Returns dummy info, with st_size being number of events pending. 1316 */ 1317 static int 1318 kqueue_stat(file_t *fp, struct stat *st) 1319 { 1320 struct kqueue *kq; 1321 1322 kq = fp->f_data; 1323 1324 memset(st, 0, sizeof(*st)); 1325 st->st_size = kq->kq_count; 1326 st->st_blksize = sizeof(struct kevent); 1327 st->st_mode = S_IFIFO; 1328 1329 return 0; 1330 } 1331 1332 static void 1333 kqueue_doclose(struct kqueue *kq, struct klist *list, int fd) 1334 { 1335 struct knote *kn; 1336 filedesc_t *fdp; 1337 1338 fdp = kq->kq_fdp; 1339 1340 KASSERT(mutex_owned(&fdp->fd_lock)); 1341 1342 for (kn = SLIST_FIRST(list); kn != NULL;) { 1343 if (kq != kn->kn_kq) { 1344 kn = SLIST_NEXT(kn, kn_link); 1345 continue; 1346 } 1347 knote_detach(kn, fdp, true); 1348 mutex_enter(&fdp->fd_lock); 1349 kn = SLIST_FIRST(list); 1350 } 1351 } 1352 1353 1354 /* 1355 * fileops close method for a kqueue descriptor. 1356 */ 1357 static int 1358 kqueue_close(file_t *fp) 1359 { 1360 struct kqueue *kq; 1361 filedesc_t *fdp; 1362 fdfile_t *ff; 1363 int i; 1364 1365 kq = fp->f_data; 1366 fdp = curlwp->l_fd; 1367 1368 mutex_enter(&fdp->fd_lock); 1369 for (i = 0; i <= fdp->fd_lastkqfile; i++) { 1370 if ((ff = fdp->fd_ofiles[i]) == NULL) 1371 continue; 1372 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); 1373 } 1374 if (fdp->fd_knhashmask != 0) { 1375 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1376 kqueue_doclose(kq, &fdp->fd_knhash[i], -1); 1377 } 1378 } 1379 mutex_exit(&fdp->fd_lock); 1380 1381 KASSERT(kq->kq_count == 0); 1382 mutex_destroy(&kq->kq_lock); 1383 cv_destroy(&kq->kq_cv); 1384 seldestroy(&kq->kq_sel); 1385 kmem_free(kq, sizeof(*kq)); 1386 fp->f_data = NULL; 1387 1388 return (0); 1389 } 1390 1391 /* 1392 * struct fileops kqfilter method for a kqueue descriptor. 1393 * Event triggered when monitored kqueue changes. 1394 */ 1395 static int 1396 kqueue_kqfilter(file_t *fp, struct knote *kn) 1397 { 1398 struct kqueue *kq; 1399 filedesc_t *fdp; 1400 1401 kq = ((file_t *)kn->kn_obj)->f_data; 1402 1403 KASSERT(fp == kn->kn_obj); 1404 1405 if (kn->kn_filter != EVFILT_READ) 1406 return 1; 1407 1408 kn->kn_fop = &kqread_filtops; 1409 fdp = curlwp->l_fd; 1410 mutex_enter(&kq->kq_lock); 1411 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1412 mutex_exit(&kq->kq_lock); 1413 1414 return 0; 1415 } 1416 1417 1418 /* 1419 * Walk down a list of knotes, activating them if their event has 1420 * triggered. The caller's object lock (e.g. device driver lock) 1421 * must be held. 1422 */ 1423 void 1424 knote(struct klist *list, long hint) 1425 { 1426 struct knote *kn; 1427 1428 SLIST_FOREACH(kn, list, kn_selnext) { 1429 if ((*kn->kn_fop->f_event)(kn, hint)) 1430 knote_activate(kn); 1431 } 1432 } 1433 1434 /* 1435 * Remove all knotes referencing a specified fd 1436 */ 1437 void 1438 knote_fdclose(int fd) 1439 { 1440 struct klist *list; 1441 struct knote *kn; 1442 filedesc_t *fdp; 1443 1444 fdp = curlwp->l_fd; 1445 list = (struct klist *)&fdp->fd_ofiles[fd]->ff_knlist; 1446 mutex_enter(&fdp->fd_lock); 1447 while ((kn = SLIST_FIRST(list)) != NULL) { 1448 knote_detach(kn, fdp, true); 1449 mutex_enter(&fdp->fd_lock); 1450 } 1451 mutex_exit(&fdp->fd_lock); 1452 } 1453 1454 /* 1455 * Drop knote. Called with fdp->fd_lock held, and will drop before 1456 * returning. 1457 */ 1458 static void 1459 knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) 1460 { 1461 struct klist *list; 1462 struct kqueue *kq; 1463 1464 kq = kn->kn_kq; 1465 1466 KASSERT((kn->kn_status & KN_MARKER) == 0); 1467 KASSERT(mutex_owned(&fdp->fd_lock)); 1468 1469 /* Remove from monitored object. */ 1470 if (dofop) { 1471 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1472 (*kn->kn_fop->f_detach)(kn); 1473 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1474 } 1475 1476 /* Remove from descriptor table. */ 1477 if (kn->kn_fop->f_isfd) 1478 list = (struct klist *)&fdp->fd_ofiles[kn->kn_id]->ff_knlist; 1479 else 1480 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1481 1482 SLIST_REMOVE(list, kn, knote, kn_link); 1483 1484 /* Remove from kqueue. */ 1485 /* XXXAD should verify not in use by kqueue_scan. */ 1486 mutex_spin_enter(&kq->kq_lock); 1487 if ((kn->kn_status & KN_QUEUED) != 0) { 1488 kq_check(kq); 1489 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1490 kn->kn_status &= ~KN_QUEUED; 1491 kq->kq_count--; 1492 kq_check(kq); 1493 } 1494 mutex_spin_exit(&kq->kq_lock); 1495 1496 mutex_exit(&fdp->fd_lock); 1497 if (kn->kn_fop->f_isfd) 1498 fd_putfile(kn->kn_id); 1499 atomic_dec_uint(&kn->kn_kfilter->refcnt); 1500 kmem_free(kn, sizeof(*kn)); 1501 } 1502 1503 /* 1504 * Queue new event for knote. 1505 */ 1506 static void 1507 knote_enqueue(struct knote *kn) 1508 { 1509 struct kqueue *kq; 1510 1511 KASSERT((kn->kn_status & KN_MARKER) == 0); 1512 1513 kq = kn->kn_kq; 1514 1515 mutex_spin_enter(&kq->kq_lock); 1516 if ((kn->kn_status & KN_DISABLED) != 0) { 1517 kn->kn_status &= ~KN_DISABLED; 1518 } 1519 if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) { 1520 kq_check(kq); 1521 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1522 kn->kn_status |= KN_QUEUED; 1523 kq->kq_count++; 1524 kq_check(kq); 1525 cv_broadcast(&kq->kq_cv); 1526 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1527 } 1528 mutex_spin_exit(&kq->kq_lock); 1529 } 1530 /* 1531 * Queue new event for knote. 1532 */ 1533 static void 1534 knote_activate(struct knote *kn) 1535 { 1536 struct kqueue *kq; 1537 1538 KASSERT((kn->kn_status & KN_MARKER) == 0); 1539 1540 kq = kn->kn_kq; 1541 1542 mutex_spin_enter(&kq->kq_lock); 1543 kn->kn_status |= KN_ACTIVE; 1544 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { 1545 kq_check(kq); 1546 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1547 kn->kn_status |= KN_QUEUED; 1548 kq->kq_count++; 1549 kq_check(kq); 1550 cv_broadcast(&kq->kq_cv); 1551 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1552 } 1553 mutex_spin_exit(&kq->kq_lock); 1554 } 1555