1 /* $NetBSD: kern_event.c,v 1.58 2008/04/28 20:24:03 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * 54 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp 55 */ 56 57 #include <sys/cdefs.h> 58 __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.58 2008/04/28 20:24:03 martin Exp $"); 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/proc.h> 64 #include <sys/file.h> 65 #include <sys/select.h> 66 #include <sys/queue.h> 67 #include <sys/event.h> 68 #include <sys/eventvar.h> 69 #include <sys/poll.h> 70 #include <sys/malloc.h> /* for hashinit */ 71 #include <sys/kmem.h> 72 #include <sys/stat.h> 73 #include <sys/filedesc.h> 74 #include <sys/syscallargs.h> 75 #include <sys/kauth.h> 76 #include <sys/conf.h> 77 #include <sys/atomic.h> 78 79 static int kqueue_scan(file_t *, size_t, struct kevent *, 80 const struct timespec *, register_t *, 81 const struct kevent_ops *, struct kevent *, 82 size_t); 83 static int kqueue_ioctl(file_t *, u_long, void *); 84 static int kqueue_fcntl(file_t *, u_int, void *); 85 static int kqueue_poll(file_t *, int); 86 static int kqueue_kqfilter(file_t *, struct knote *); 87 static int kqueue_stat(file_t *, struct stat *); 88 static int kqueue_close(file_t *); 89 static int kqueue_register(struct kqueue *, struct kevent *); 90 static void kqueue_doclose(struct kqueue *, struct klist *, int); 91 92 static void knote_detach(struct knote *, filedesc_t *fdp, bool); 93 static void knote_enqueue(struct knote *); 94 static void knote_activate(struct knote *); 95 96 static void filt_kqdetach(struct knote *); 97 static int filt_kqueue(struct knote *, long hint); 98 static int filt_procattach(struct knote *); 99 static void filt_procdetach(struct knote *); 100 static int filt_proc(struct knote *, long hint); 101 static int filt_fileattach(struct knote *); 102 static void filt_timerexpire(void *x); 103 static int filt_timerattach(struct knote *); 104 static void filt_timerdetach(struct knote *); 105 static int filt_timer(struct knote *, long hint); 106 107 static const struct fileops kqueueops = { 108 (void *)enxio, (void *)enxio, kqueue_ioctl, kqueue_fcntl, kqueue_poll, 109 kqueue_stat, kqueue_close, kqueue_kqfilter 110 }; 111 112 static const struct filterops kqread_filtops = 113 { 1, NULL, filt_kqdetach, filt_kqueue }; 114 static const struct filterops proc_filtops = 115 { 0, filt_procattach, filt_procdetach, filt_proc }; 116 static const struct filterops file_filtops = 117 { 1, filt_fileattach, NULL, NULL }; 118 static const struct filterops timer_filtops = 119 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 120 121 static u_int kq_ncallouts = 0; 122 static int kq_calloutmax = (4 * 1024); 123 124 MALLOC_DEFINE(M_KEVENT, "kevent", "kevents/knotes"); /* for hashinit */ 125 126 #define KN_HASHSIZE 64 /* XXX should be tunable */ 127 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 128 129 extern const struct filterops sig_filtops; 130 131 /* 132 * Table for for all system-defined filters. 133 * These should be listed in the numeric order of the EVFILT_* defines. 134 * If filtops is NULL, the filter isn't implemented in NetBSD. 135 * End of list is when name is NULL. 136 * 137 * Note that 'refcnt' is meaningless for built-in filters. 138 */ 139 struct kfilter { 140 const char *name; /* name of filter */ 141 uint32_t filter; /* id of filter */ 142 unsigned refcnt; /* reference count */ 143 const struct filterops *filtops;/* operations for filter */ 144 size_t namelen; /* length of name string */ 145 }; 146 147 /* System defined filters */ 148 static struct kfilter sys_kfilters[] = { 149 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 }, 150 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, }, 151 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 }, 152 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 }, 153 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 }, 154 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 }, 155 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, 156 { NULL, 0, 0, NULL, 0 }, 157 }; 158 159 /* User defined kfilters */ 160 static struct kfilter *user_kfilters; /* array */ 161 static int user_kfilterc; /* current offset */ 162 static int user_kfiltermaxc; /* max size so far */ 163 static size_t user_kfiltersz; /* size of allocated memory */ 164 165 /* Locks */ 166 static krwlock_t kqueue_filter_lock; /* lock on filter lists */ 167 static kmutex_t kqueue_misc_lock; /* miscellaneous */ 168 169 /* 170 * Initialize the kqueue subsystem. 171 */ 172 void 173 kqueue_init(void) 174 { 175 176 rw_init(&kqueue_filter_lock); 177 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE); 178 } 179 180 /* 181 * Find kfilter entry by name, or NULL if not found. 182 */ 183 static struct kfilter * 184 kfilter_byname_sys(const char *name) 185 { 186 int i; 187 188 KASSERT(rw_lock_held(&kqueue_filter_lock)); 189 190 for (i = 0; sys_kfilters[i].name != NULL; i++) { 191 if (strcmp(name, sys_kfilters[i].name) == 0) 192 return &sys_kfilters[i]; 193 } 194 return NULL; 195 } 196 197 static struct kfilter * 198 kfilter_byname_user(const char *name) 199 { 200 int i; 201 202 KASSERT(rw_lock_held(&kqueue_filter_lock)); 203 204 /* user filter slots have a NULL name if previously deregistered */ 205 for (i = 0; i < user_kfilterc ; i++) { 206 if (user_kfilters[i].name != NULL && 207 strcmp(name, user_kfilters[i].name) == 0) 208 return &user_kfilters[i]; 209 } 210 return NULL; 211 } 212 213 static struct kfilter * 214 kfilter_byname(const char *name) 215 { 216 struct kfilter *kfilter; 217 218 KASSERT(rw_lock_held(&kqueue_filter_lock)); 219 220 if ((kfilter = kfilter_byname_sys(name)) != NULL) 221 return kfilter; 222 223 return kfilter_byname_user(name); 224 } 225 226 /* 227 * Find kfilter entry by filter id, or NULL if not found. 228 * Assumes entries are indexed in filter id order, for speed. 229 */ 230 static struct kfilter * 231 kfilter_byfilter(uint32_t filter) 232 { 233 struct kfilter *kfilter; 234 235 KASSERT(rw_lock_held(&kqueue_filter_lock)); 236 237 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 238 kfilter = &sys_kfilters[filter]; 239 else if (user_kfilters != NULL && 240 filter < EVFILT_SYSCOUNT + user_kfilterc) 241 /* it's a user filter */ 242 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 243 else 244 return (NULL); /* out of range */ 245 KASSERT(kfilter->filter == filter); /* sanity check! */ 246 return (kfilter); 247 } 248 249 /* 250 * Register a new kfilter. Stores the entry in user_kfilters. 251 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 252 * If retfilter != NULL, the new filterid is returned in it. 253 */ 254 int 255 kfilter_register(const char *name, const struct filterops *filtops, 256 int *retfilter) 257 { 258 struct kfilter *kfilter; 259 size_t len; 260 int i; 261 262 if (name == NULL || name[0] == '\0' || filtops == NULL) 263 return (EINVAL); /* invalid args */ 264 265 rw_enter(&kqueue_filter_lock, RW_WRITER); 266 if (kfilter_byname(name) != NULL) { 267 rw_exit(&kqueue_filter_lock); 268 return (EEXIST); /* already exists */ 269 } 270 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) { 271 rw_exit(&kqueue_filter_lock); 272 return (EINVAL); /* too many */ 273 } 274 275 for (i = 0; i < user_kfilterc; i++) { 276 kfilter = &user_kfilters[i]; 277 if (kfilter->name == NULL) { 278 /* Previously deregistered slot. Reuse. */ 279 goto reuse; 280 } 281 } 282 283 /* check if need to grow user_kfilters */ 284 if (user_kfilterc + 1 > user_kfiltermaxc) { 285 /* Grow in KFILTER_EXTENT chunks. */ 286 user_kfiltermaxc += KFILTER_EXTENT; 287 len = user_kfiltermaxc * sizeof(struct filter *); 288 kfilter = kmem_alloc(len, KM_SLEEP); 289 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz); 290 if (user_kfilters != NULL) { 291 memcpy(kfilter, user_kfilters, user_kfiltersz); 292 kmem_free(user_kfilters, user_kfiltersz); 293 } 294 user_kfiltersz = len; 295 user_kfilters = kfilter; 296 } 297 /* Adding new slot */ 298 kfilter = &user_kfilters[user_kfilterc++]; 299 reuse: 300 kfilter->namelen = strlen(name) + 1; 301 kfilter->name = kmem_alloc(kfilter->namelen, KM_SLEEP); 302 memcpy(__UNCONST(kfilter->name), name, kfilter->namelen); 303 304 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT; 305 306 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP); 307 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops)); 308 309 if (retfilter != NULL) 310 *retfilter = kfilter->filter; 311 rw_exit(&kqueue_filter_lock); 312 313 return (0); 314 } 315 316 /* 317 * Unregister a kfilter previously registered with kfilter_register. 318 * This retains the filter id, but clears the name and frees filtops (filter 319 * operations), so that the number isn't reused during a boot. 320 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 321 */ 322 int 323 kfilter_unregister(const char *name) 324 { 325 struct kfilter *kfilter; 326 327 if (name == NULL || name[0] == '\0') 328 return (EINVAL); /* invalid name */ 329 330 rw_enter(&kqueue_filter_lock, RW_WRITER); 331 if (kfilter_byname_sys(name) != NULL) { 332 rw_exit(&kqueue_filter_lock); 333 return (EINVAL); /* can't detach system filters */ 334 } 335 336 kfilter = kfilter_byname_user(name); 337 if (kfilter == NULL) { 338 rw_exit(&kqueue_filter_lock); 339 return (ENOENT); 340 } 341 if (kfilter->refcnt != 0) { 342 rw_exit(&kqueue_filter_lock); 343 return (EBUSY); 344 } 345 346 /* Cast away const (but we know it's safe. */ 347 kmem_free(__UNCONST(kfilter->name), kfilter->namelen); 348 kfilter->name = NULL; /* mark as `not implemented' */ 349 350 if (kfilter->filtops != NULL) { 351 /* Cast away const (but we know it's safe. */ 352 kmem_free(__UNCONST(kfilter->filtops), 353 sizeof(*kfilter->filtops)); 354 kfilter->filtops = NULL; /* mark as `not implemented' */ 355 } 356 rw_exit(&kqueue_filter_lock); 357 358 return (0); 359 } 360 361 362 /* 363 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 364 * descriptors. Calls fileops kqfilter method for given file descriptor. 365 */ 366 static int 367 filt_fileattach(struct knote *kn) 368 { 369 file_t *fp; 370 371 fp = kn->kn_obj; 372 373 return (*fp->f_ops->fo_kqfilter)(fp, kn); 374 } 375 376 /* 377 * Filter detach method for EVFILT_READ on kqueue descriptor. 378 */ 379 static void 380 filt_kqdetach(struct knote *kn) 381 { 382 struct kqueue *kq; 383 384 kq = ((file_t *)kn->kn_obj)->f_data; 385 386 mutex_spin_enter(&kq->kq_lock); 387 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 388 mutex_spin_exit(&kq->kq_lock); 389 } 390 391 /* 392 * Filter event method for EVFILT_READ on kqueue descriptor. 393 */ 394 /*ARGSUSED*/ 395 static int 396 filt_kqueue(struct knote *kn, long hint) 397 { 398 struct kqueue *kq; 399 int rv; 400 401 kq = ((file_t *)kn->kn_obj)->f_data; 402 403 if (hint != NOTE_SUBMIT) 404 mutex_spin_enter(&kq->kq_lock); 405 kn->kn_data = kq->kq_count; 406 rv = (kn->kn_data > 0); 407 if (hint != NOTE_SUBMIT) 408 mutex_spin_exit(&kq->kq_lock); 409 410 return rv; 411 } 412 413 /* 414 * Filter attach method for EVFILT_PROC. 415 */ 416 static int 417 filt_procattach(struct knote *kn) 418 { 419 struct proc *p, *curp; 420 struct lwp *curl; 421 422 curl = curlwp; 423 curp = curl->l_proc; 424 425 mutex_enter(proc_lock); 426 p = p_find(kn->kn_id, PFIND_LOCKED); 427 if (p == NULL) { 428 mutex_exit(proc_lock); 429 return ESRCH; 430 } 431 432 /* 433 * Fail if it's not owned by you, or the last exec gave us 434 * setuid/setgid privs (unless you're root). 435 */ 436 mutex_enter(p->p_lock); 437 mutex_exit(proc_lock); 438 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER, 439 p, NULL, NULL, NULL) != 0) { 440 mutex_exit(p->p_lock); 441 return EACCES; 442 } 443 444 kn->kn_obj = p; 445 kn->kn_flags |= EV_CLEAR; /* automatically set */ 446 447 /* 448 * internal flag indicating registration done by kernel 449 */ 450 if (kn->kn_flags & EV_FLAG1) { 451 kn->kn_data = kn->kn_sdata; /* ppid */ 452 kn->kn_fflags = NOTE_CHILD; 453 kn->kn_flags &= ~EV_FLAG1; 454 } 455 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 456 mutex_exit(p->p_lock); 457 458 return 0; 459 } 460 461 /* 462 * Filter detach method for EVFILT_PROC. 463 * 464 * The knote may be attached to a different process, which may exit, 465 * leaving nothing for the knote to be attached to. So when the process 466 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 467 * it will be deleted when read out. However, as part of the knote deletion, 468 * this routine is called, so a check is needed to avoid actually performing 469 * a detach, because the original process might not exist any more. 470 */ 471 static void 472 filt_procdetach(struct knote *kn) 473 { 474 struct proc *p; 475 476 if (kn->kn_status & KN_DETACHED) 477 return; 478 479 p = kn->kn_obj; 480 481 mutex_enter(p->p_lock); 482 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 483 mutex_exit(p->p_lock); 484 } 485 486 /* 487 * Filter event method for EVFILT_PROC. 488 */ 489 static int 490 filt_proc(struct knote *kn, long hint) 491 { 492 u_int event, fflag; 493 struct kevent kev; 494 struct kqueue *kq; 495 int error; 496 497 event = (u_int)hint & NOTE_PCTRLMASK; 498 kq = kn->kn_kq; 499 fflag = 0; 500 501 /* If the user is interested in this event, record it. */ 502 if (kn->kn_sfflags & event) 503 fflag |= event; 504 505 if (event == NOTE_EXIT) { 506 /* 507 * Process is gone, so flag the event as finished. 508 * 509 * Detach the knote from watched process and mark 510 * it as such. We can't leave this to kqueue_scan(), 511 * since the process might not exist by then. And we 512 * have to do this now, since psignal KNOTE() is called 513 * also for zombies and we might end up reading freed 514 * memory if the kevent would already be picked up 515 * and knote g/c'ed. 516 */ 517 filt_procdetach(kn); 518 519 mutex_spin_enter(&kq->kq_lock); 520 kn->kn_status |= KN_DETACHED; 521 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 522 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 523 kn->kn_fflags |= fflag; 524 mutex_spin_exit(&kq->kq_lock); 525 526 return 1; 527 } 528 529 mutex_spin_enter(&kq->kq_lock); 530 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 531 /* 532 * Process forked, and user wants to track the new process, 533 * so attach a new knote to it, and immediately report an 534 * event with the parent's pid. Register knote with new 535 * process. 536 */ 537 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 538 kev.filter = kn->kn_filter; 539 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 540 kev.fflags = kn->kn_sfflags; 541 kev.data = kn->kn_id; /* parent */ 542 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 543 mutex_spin_exit(&kq->kq_lock); 544 error = kqueue_register(kq, &kev); 545 mutex_spin_enter(&kq->kq_lock); 546 if (error != 0) 547 kn->kn_fflags |= NOTE_TRACKERR; 548 } 549 kn->kn_fflags |= fflag; 550 fflag = kn->kn_fflags; 551 mutex_spin_exit(&kq->kq_lock); 552 553 return fflag != 0; 554 } 555 556 static void 557 filt_timerexpire(void *knx) 558 { 559 struct knote *kn = knx; 560 int tticks; 561 562 mutex_enter(&kqueue_misc_lock); 563 kn->kn_data++; 564 knote_activate(kn); 565 if ((kn->kn_flags & EV_ONESHOT) == 0) { 566 tticks = mstohz(kn->kn_sdata); 567 callout_schedule((callout_t *)kn->kn_hook, tticks); 568 } 569 mutex_exit(&kqueue_misc_lock); 570 } 571 572 /* 573 * data contains amount of time to sleep, in milliseconds 574 */ 575 static int 576 filt_timerattach(struct knote *kn) 577 { 578 callout_t *calloutp; 579 struct kqueue *kq; 580 int tticks; 581 582 tticks = mstohz(kn->kn_sdata); 583 584 /* if the supplied value is under our resolution, use 1 tick */ 585 if (tticks == 0) { 586 if (kn->kn_sdata == 0) 587 return EINVAL; 588 tticks = 1; 589 } 590 591 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax || 592 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) { 593 atomic_dec_uint(&kq_ncallouts); 594 return ENOMEM; 595 } 596 callout_init(calloutp, CALLOUT_MPSAFE); 597 598 kq = kn->kn_kq; 599 mutex_spin_enter(&kq->kq_lock); 600 kn->kn_flags |= EV_CLEAR; /* automatically set */ 601 kn->kn_hook = calloutp; 602 mutex_spin_exit(&kq->kq_lock); 603 604 callout_reset(calloutp, tticks, filt_timerexpire, kn); 605 606 return (0); 607 } 608 609 static void 610 filt_timerdetach(struct knote *kn) 611 { 612 callout_t *calloutp; 613 614 calloutp = (callout_t *)kn->kn_hook; 615 callout_halt(calloutp, NULL); 616 callout_destroy(calloutp); 617 kmem_free(calloutp, sizeof(*calloutp)); 618 atomic_dec_uint(&kq_ncallouts); 619 } 620 621 static int 622 filt_timer(struct knote *kn, long hint) 623 { 624 int rv; 625 626 mutex_enter(&kqueue_misc_lock); 627 rv = (kn->kn_data != 0); 628 mutex_exit(&kqueue_misc_lock); 629 630 return rv; 631 } 632 633 /* 634 * filt_seltrue: 635 * 636 * This filter "event" routine simulates seltrue(). 637 */ 638 int 639 filt_seltrue(struct knote *kn, long hint) 640 { 641 642 /* 643 * We don't know how much data can be read/written, 644 * but we know that it *can* be. This is about as 645 * good as select/poll does as well. 646 */ 647 kn->kn_data = 0; 648 return (1); 649 } 650 651 /* 652 * This provides full kqfilter entry for device switch tables, which 653 * has same effect as filter using filt_seltrue() as filter method. 654 */ 655 static void 656 filt_seltruedetach(struct knote *kn) 657 { 658 /* Nothing to do */ 659 } 660 661 const struct filterops seltrue_filtops = 662 { 1, NULL, filt_seltruedetach, filt_seltrue }; 663 664 int 665 seltrue_kqfilter(dev_t dev, struct knote *kn) 666 { 667 switch (kn->kn_filter) { 668 case EVFILT_READ: 669 case EVFILT_WRITE: 670 kn->kn_fop = &seltrue_filtops; 671 break; 672 default: 673 return (EINVAL); 674 } 675 676 /* Nothing more to do */ 677 return (0); 678 } 679 680 /* 681 * kqueue(2) system call. 682 */ 683 int 684 sys_kqueue(struct lwp *l, const void *v, register_t *retval) 685 { 686 struct kqueue *kq; 687 file_t *fp; 688 int fd, error; 689 690 if ((error = fd_allocfile(&fp, &fd)) != 0) 691 return error; 692 fp->f_flag = FREAD | FWRITE; 693 fp->f_type = DTYPE_KQUEUE; 694 fp->f_ops = &kqueueops; 695 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP); 696 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED); 697 cv_init(&kq->kq_cv, "kqueue"); 698 selinit(&kq->kq_sel); 699 TAILQ_INIT(&kq->kq_head); 700 fp->f_data = kq; 701 *retval = fd; 702 kq->kq_fdp = curlwp->l_fd; 703 fd_affix(curproc, fp, fd); 704 return error; 705 } 706 707 /* 708 * kevent(2) system call. 709 */ 710 static int 711 kevent_fetch_changes(void *private, const struct kevent *changelist, 712 struct kevent *changes, size_t index, int n) 713 { 714 715 return copyin(changelist + index, changes, n * sizeof(*changes)); 716 } 717 718 static int 719 kevent_put_events(void *private, struct kevent *events, 720 struct kevent *eventlist, size_t index, int n) 721 { 722 723 return copyout(events, eventlist + index, n * sizeof(*events)); 724 } 725 726 static const struct kevent_ops kevent_native_ops = { 727 keo_private: NULL, 728 keo_fetch_timeout: copyin, 729 keo_fetch_changes: kevent_fetch_changes, 730 keo_put_events: kevent_put_events, 731 }; 732 733 int 734 sys_kevent(struct lwp *l, const struct sys_kevent_args *uap, register_t *retval) 735 { 736 /* { 737 syscallarg(int) fd; 738 syscallarg(const struct kevent *) changelist; 739 syscallarg(size_t) nchanges; 740 syscallarg(struct kevent *) eventlist; 741 syscallarg(size_t) nevents; 742 syscallarg(const struct timespec *) timeout; 743 } */ 744 745 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), 746 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), 747 SCARG(uap, timeout), &kevent_native_ops); 748 } 749 750 int 751 kevent1(register_t *retval, int fd, 752 const struct kevent *changelist, size_t nchanges, 753 struct kevent *eventlist, size_t nevents, 754 const struct timespec *timeout, 755 const struct kevent_ops *keops) 756 { 757 struct kevent *kevp; 758 struct kqueue *kq; 759 struct timespec ts; 760 size_t i, n, ichange; 761 int nerrors, error; 762 struct kevent kevbuf[8]; /* approx 300 bytes on 64-bit */ 763 file_t *fp; 764 765 /* check that we're dealing with a kq */ 766 fp = fd_getfile(fd); 767 if (fp == NULL) 768 return (EBADF); 769 770 if (fp->f_type != DTYPE_KQUEUE) { 771 fd_putfile(fd); 772 return (EBADF); 773 } 774 775 if (timeout != NULL) { 776 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts)); 777 if (error) 778 goto done; 779 timeout = &ts; 780 } 781 782 kq = (struct kqueue *)fp->f_data; 783 nerrors = 0; 784 ichange = 0; 785 786 /* traverse list of events to register */ 787 while (nchanges > 0) { 788 n = MIN(nchanges, __arraycount(kevbuf)); 789 error = (*keops->keo_fetch_changes)(keops->keo_private, 790 changelist, kevbuf, ichange, n); 791 if (error) 792 goto done; 793 for (i = 0; i < n; i++) { 794 kevp = &kevbuf[i]; 795 kevp->flags &= ~EV_SYSFLAGS; 796 /* register each knote */ 797 error = kqueue_register(kq, kevp); 798 if (error) { 799 if (nevents != 0) { 800 kevp->flags = EV_ERROR; 801 kevp->data = error; 802 error = (*keops->keo_put_events) 803 (keops->keo_private, kevp, 804 eventlist, nerrors, 1); 805 if (error) 806 goto done; 807 nevents--; 808 nerrors++; 809 } else { 810 goto done; 811 } 812 } 813 } 814 nchanges -= n; /* update the results */ 815 ichange += n; 816 } 817 if (nerrors) { 818 *retval = nerrors; 819 error = 0; 820 goto done; 821 } 822 823 /* actually scan through the events */ 824 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops, 825 kevbuf, __arraycount(kevbuf)); 826 done: 827 fd_putfile(fd); 828 return (error); 829 } 830 831 /* 832 * Register a given kevent kev onto the kqueue 833 */ 834 static int 835 kqueue_register(struct kqueue *kq, struct kevent *kev) 836 { 837 struct kfilter *kfilter; 838 filedesc_t *fdp; 839 file_t *fp; 840 fdfile_t *ff; 841 struct knote *kn, *newkn; 842 struct klist *list; 843 int error, fd, rv; 844 845 fdp = kq->kq_fdp; 846 fp = NULL; 847 kn = NULL; 848 error = 0; 849 fd = 0; 850 851 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP); 852 853 rw_enter(&kqueue_filter_lock, RW_READER); 854 kfilter = kfilter_byfilter(kev->filter); 855 if (kfilter == NULL || kfilter->filtops == NULL) { 856 /* filter not found nor implemented */ 857 rw_exit(&kqueue_filter_lock); 858 kmem_free(newkn, sizeof(*newkn)); 859 return (EINVAL); 860 } 861 862 mutex_enter(&fdp->fd_lock); 863 864 /* search if knote already exists */ 865 if (kfilter->filtops->f_isfd) { 866 /* monitoring a file descriptor */ 867 fd = kev->ident; 868 if ((fp = fd_getfile(fd)) == NULL) { 869 mutex_exit(&fdp->fd_lock); 870 rw_exit(&kqueue_filter_lock); 871 kmem_free(newkn, sizeof(*newkn)); 872 return EBADF; 873 } 874 ff = fdp->fd_ofiles[fd]; 875 if (fd <= fdp->fd_lastkqfile) { 876 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { 877 if (kq == kn->kn_kq && 878 kev->filter == kn->kn_filter) 879 break; 880 } 881 } 882 } else { 883 /* 884 * not monitoring a file descriptor, so 885 * lookup knotes in internal hash table 886 */ 887 if (fdp->fd_knhashmask != 0) { 888 list = &fdp->fd_knhash[ 889 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 890 SLIST_FOREACH(kn, list, kn_link) { 891 if (kev->ident == kn->kn_id && 892 kq == kn->kn_kq && 893 kev->filter == kn->kn_filter) 894 break; 895 } 896 } 897 } 898 899 /* 900 * kn now contains the matching knote, or NULL if no match 901 */ 902 if (kev->flags & EV_ADD) { 903 if (kn == NULL) { 904 /* create new knote */ 905 kn = newkn; 906 newkn = NULL; 907 kn->kn_obj = fp; 908 kn->kn_kq = kq; 909 kn->kn_fop = kfilter->filtops; 910 kn->kn_kfilter = kfilter; 911 kn->kn_sfflags = kev->fflags; 912 kn->kn_sdata = kev->data; 913 kev->fflags = 0; 914 kev->data = 0; 915 kn->kn_kevent = *kev; 916 917 /* 918 * apply reference count to knote structure, and 919 * do not release it at the end of this routine. 920 */ 921 fp = NULL; 922 923 if (!kn->kn_fop->f_isfd) { 924 /* 925 * If knote is not on an fd, store on 926 * internal hash table. 927 */ 928 if (fdp->fd_knhashmask == 0) { 929 /* XXXAD can block with fd_lock held */ 930 fdp->fd_knhash = hashinit(KN_HASHSIZE, 931 HASH_LIST, M_KEVENT, M_WAITOK, 932 &fdp->fd_knhashmask); 933 } 934 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, 935 fdp->fd_knhashmask)]; 936 } else { 937 /* Otherwise, knote is on an fd. */ 938 list = (struct klist *) 939 &fdp->fd_ofiles[kn->kn_id]->ff_knlist; 940 if ((int)kn->kn_id > fdp->fd_lastkqfile) 941 fdp->fd_lastkqfile = kn->kn_id; 942 } 943 SLIST_INSERT_HEAD(list, kn, kn_link); 944 945 KERNEL_LOCK(1, NULL); /* XXXSMP */ 946 error = (*kfilter->filtops->f_attach)(kn); 947 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 948 if (error != 0) { 949 /* knote_detach() drops fdp->fd_lock */ 950 knote_detach(kn, fdp, false); 951 goto done; 952 } 953 atomic_inc_uint(&kfilter->refcnt); 954 } else { 955 /* 956 * The user may change some filter values after the 957 * initial EV_ADD, but doing so will not reset any 958 * filter which have already been triggered. 959 */ 960 kn->kn_sfflags = kev->fflags; 961 kn->kn_sdata = kev->data; 962 kn->kn_kevent.udata = kev->udata; 963 } 964 KERNEL_LOCK(1, NULL); /* XXXSMP */ 965 rv = (*kn->kn_fop->f_event)(kn, 0); 966 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 967 if (rv) 968 knote_activate(kn); 969 } else { 970 if (kn == NULL) { 971 error = ENOENT; 972 mutex_exit(&fdp->fd_lock); 973 goto done; 974 } 975 if (kev->flags & EV_DELETE) { 976 /* knote_detach() drops fdp->fd_lock */ 977 knote_detach(kn, fdp, true); 978 goto done; 979 } 980 } 981 982 /* disable knote */ 983 if ((kev->flags & EV_DISABLE)) { 984 mutex_spin_enter(&kq->kq_lock); 985 if ((kn->kn_status & KN_DISABLED) == 0) 986 kn->kn_status |= KN_DISABLED; 987 mutex_spin_exit(&kq->kq_lock); 988 } 989 990 /* enable knote */ 991 if ((kev->flags & EV_ENABLE)) { 992 knote_enqueue(kn); 993 } 994 mutex_exit(&fdp->fd_lock); 995 done: 996 rw_exit(&kqueue_filter_lock); 997 if (newkn != NULL) 998 kmem_free(newkn, sizeof(*newkn)); 999 if (fp != NULL) 1000 fd_putfile(fd); 1001 return (error); 1002 } 1003 1004 #if defined(DEBUG) 1005 static void 1006 kq_check(struct kqueue *kq) 1007 { 1008 const struct knote *kn; 1009 int count; 1010 int nmarker; 1011 1012 KASSERT(mutex_owned(&kq->kq_lock)); 1013 KASSERT(kq->kq_count >= 0); 1014 1015 count = 0; 1016 nmarker = 0; 1017 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1018 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) { 1019 panic("%s: kq=%p kn=%p inconsist 1", __func__, kq, kn); 1020 } 1021 if ((kn->kn_status & KN_MARKER) == 0) { 1022 if (kn->kn_kq != kq) { 1023 panic("%s: kq=%p kn=%p inconsist 2", 1024 __func__, kq, kn); 1025 } 1026 if ((kn->kn_status & KN_ACTIVE) == 0) { 1027 panic("%s: kq=%p kn=%p: not active", 1028 __func__, kq, kn); 1029 } 1030 count++; 1031 if (count > kq->kq_count) { 1032 goto bad; 1033 } 1034 } else { 1035 nmarker++; 1036 #if 0 1037 if (nmarker > 10000) { 1038 panic("%s: kq=%p too many markers: %d != %d, " 1039 "nmarker=%d", 1040 __func__, kq, kq->kq_count, count, nmarker); 1041 } 1042 #endif 1043 } 1044 } 1045 if (kq->kq_count != count) { 1046 bad: 1047 panic("%s: kq=%p inconsist 3: %d != %d, nmarker=%d", 1048 __func__, kq, kq->kq_count, count, nmarker); 1049 } 1050 } 1051 #else /* defined(DEBUG) */ 1052 #define kq_check(a) /* nothing */ 1053 #endif /* defined(DEBUG) */ 1054 1055 /* 1056 * Scan through the list of events on fp (for a maximum of maxevents), 1057 * returning the results in to ulistp. Timeout is determined by tsp; if 1058 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 1059 * as appropriate. 1060 */ 1061 static int 1062 kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp, 1063 const struct timespec *tsp, register_t *retval, 1064 const struct kevent_ops *keops, struct kevent *kevbuf, 1065 size_t kevcnt) 1066 { 1067 struct kqueue *kq; 1068 struct kevent *kevp; 1069 struct timeval atv, sleeptv; 1070 struct knote *kn, *marker; 1071 size_t count, nkev, nevents; 1072 int timeout, error, rv; 1073 filedesc_t *fdp; 1074 1075 fdp = curlwp->l_fd; 1076 kq = fp->f_data; 1077 count = maxevents; 1078 nkev = nevents = error = 0; 1079 if (count == 0) { 1080 *retval = 0; 1081 return 0; 1082 } 1083 1084 if (tsp) { /* timeout supplied */ 1085 TIMESPEC_TO_TIMEVAL(&atv, tsp); 1086 if (inittimeleft(&atv, &sleeptv) == -1) { 1087 *retval = maxevents; 1088 return EINVAL; 1089 } 1090 timeout = tvtohz(&atv); 1091 if (timeout <= 0) 1092 timeout = -1; /* do poll */ 1093 } else { 1094 /* no timeout, wait forever */ 1095 timeout = 0; 1096 } 1097 1098 marker = kmem_zalloc(sizeof(*marker), KM_SLEEP); 1099 marker->kn_status = KN_MARKER; 1100 mutex_spin_enter(&kq->kq_lock); 1101 retry: 1102 kevp = kevbuf; 1103 if (kq->kq_count == 0) { 1104 if (timeout >= 0) { 1105 error = cv_timedwait_sig(&kq->kq_cv, 1106 &kq->kq_lock, timeout); 1107 if (error == 0) { 1108 if (tsp == NULL || (timeout = 1109 gettimeleft(&atv, &sleeptv)) > 0) 1110 goto retry; 1111 } else { 1112 /* don't restart after signals... */ 1113 if (error == ERESTART) 1114 error = EINTR; 1115 if (error == EWOULDBLOCK) 1116 error = 0; 1117 } 1118 } 1119 } else { 1120 /* mark end of knote list */ 1121 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1122 1123 while (count != 0) { 1124 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 1125 while ((kn->kn_status & KN_MARKER) != 0) { 1126 if (kn == marker) { 1127 /* it's our marker, stop */ 1128 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1129 if (count < maxevents || (tsp != NULL && 1130 (timeout = gettimeleft(&atv, 1131 &sleeptv)) <= 0)) 1132 goto done; 1133 goto retry; 1134 } 1135 /* someone else's marker. */ 1136 kn = TAILQ_NEXT(kn, kn_tqe); 1137 } 1138 kq_check(kq); 1139 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1140 kq->kq_count--; 1141 kn->kn_status &= ~KN_QUEUED; 1142 kq_check(kq); 1143 if (kn->kn_status & KN_DISABLED) { 1144 /* don't want disabled events */ 1145 continue; 1146 } 1147 if ((kn->kn_flags & EV_ONESHOT) == 0) { 1148 mutex_spin_exit(&kq->kq_lock); 1149 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1150 rv = (*kn->kn_fop->f_event)(kn, 0); 1151 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1152 mutex_spin_enter(&kq->kq_lock); 1153 /* Re-poll if note was re-enqueued. */ 1154 if ((kn->kn_status & KN_QUEUED) != 0) 1155 continue; 1156 if (rv == 0) { 1157 /* 1158 * non-ONESHOT event that hasn't 1159 * triggered again, so de-queue. 1160 */ 1161 kn->kn_status &= ~KN_ACTIVE; 1162 continue; 1163 } 1164 } 1165 /* XXXAD should be got from f_event if !oneshot. */ 1166 *kevp++ = kn->kn_kevent; 1167 nkev++; 1168 if (kn->kn_flags & EV_ONESHOT) { 1169 /* delete ONESHOT events after retrieval */ 1170 mutex_spin_exit(&kq->kq_lock); 1171 mutex_enter(&fdp->fd_lock); 1172 knote_detach(kn, fdp, true); 1173 mutex_spin_enter(&kq->kq_lock); 1174 } else if (kn->kn_flags & EV_CLEAR) { 1175 /* clear state after retrieval */ 1176 kn->kn_data = 0; 1177 kn->kn_fflags = 0; 1178 kn->kn_status &= ~KN_ACTIVE; 1179 } else { 1180 /* add event back on list */ 1181 kq_check(kq); 1182 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1183 kq->kq_count++; 1184 kn->kn_status |= KN_QUEUED; 1185 kq_check(kq); 1186 } 1187 if (nkev == kevcnt) { 1188 /* do copyouts in kevcnt chunks */ 1189 mutex_spin_exit(&kq->kq_lock); 1190 error = (*keops->keo_put_events) 1191 (keops->keo_private, 1192 kevbuf, ulistp, nevents, nkev); 1193 mutex_spin_enter(&kq->kq_lock); 1194 nevents += nkev; 1195 nkev = 0; 1196 kevp = kevbuf; 1197 } 1198 count--; 1199 if (error != 0 || count == 0) { 1200 /* remove marker */ 1201 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 1202 break; 1203 } 1204 } 1205 } 1206 done: 1207 mutex_spin_exit(&kq->kq_lock); 1208 if (marker != NULL) 1209 kmem_free(marker, sizeof(*marker)); 1210 if (nkev != 0) { 1211 /* copyout remaining events */ 1212 error = (*keops->keo_put_events)(keops->keo_private, 1213 kevbuf, ulistp, nevents, nkev); 1214 } 1215 *retval = maxevents - count; 1216 1217 return error; 1218 } 1219 1220 /* 1221 * fileops ioctl method for a kqueue descriptor. 1222 * 1223 * Two ioctls are currently supported. They both use struct kfilter_mapping: 1224 * KFILTER_BYNAME find name for filter, and return result in 1225 * name, which is of size len. 1226 * KFILTER_BYFILTER find filter for name. len is ignored. 1227 */ 1228 /*ARGSUSED*/ 1229 static int 1230 kqueue_ioctl(file_t *fp, u_long com, void *data) 1231 { 1232 struct kfilter_mapping *km; 1233 const struct kfilter *kfilter; 1234 char *name; 1235 int error; 1236 1237 km = data; 1238 error = 0; 1239 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP); 1240 1241 switch (com) { 1242 case KFILTER_BYFILTER: /* convert filter -> name */ 1243 rw_enter(&kqueue_filter_lock, RW_READER); 1244 kfilter = kfilter_byfilter(km->filter); 1245 if (kfilter != NULL) { 1246 strlcpy(name, kfilter->name, KFILTER_MAXNAME); 1247 rw_exit(&kqueue_filter_lock); 1248 error = copyoutstr(name, km->name, km->len, NULL); 1249 } else { 1250 rw_exit(&kqueue_filter_lock); 1251 error = ENOENT; 1252 } 1253 break; 1254 1255 case KFILTER_BYNAME: /* convert name -> filter */ 1256 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1257 if (error) { 1258 break; 1259 } 1260 rw_enter(&kqueue_filter_lock, RW_READER); 1261 kfilter = kfilter_byname(name); 1262 if (kfilter != NULL) 1263 km->filter = kfilter->filter; 1264 else 1265 error = ENOENT; 1266 rw_exit(&kqueue_filter_lock); 1267 break; 1268 1269 default: 1270 error = ENOTTY; 1271 break; 1272 1273 } 1274 kmem_free(name, KFILTER_MAXNAME); 1275 return (error); 1276 } 1277 1278 /* 1279 * fileops fcntl method for a kqueue descriptor. 1280 */ 1281 static int 1282 kqueue_fcntl(file_t *fp, u_int com, void *data) 1283 { 1284 1285 return (ENOTTY); 1286 } 1287 1288 /* 1289 * fileops poll method for a kqueue descriptor. 1290 * Determine if kqueue has events pending. 1291 */ 1292 static int 1293 kqueue_poll(file_t *fp, int events) 1294 { 1295 struct kqueue *kq; 1296 int revents; 1297 1298 kq = fp->f_data; 1299 1300 revents = 0; 1301 if (events & (POLLIN | POLLRDNORM)) { 1302 mutex_spin_enter(&kq->kq_lock); 1303 if (kq->kq_count != 0) { 1304 revents |= events & (POLLIN | POLLRDNORM); 1305 } else { 1306 selrecord(curlwp, &kq->kq_sel); 1307 } 1308 kq_check(kq); 1309 mutex_spin_exit(&kq->kq_lock); 1310 } 1311 1312 return revents; 1313 } 1314 1315 /* 1316 * fileops stat method for a kqueue descriptor. 1317 * Returns dummy info, with st_size being number of events pending. 1318 */ 1319 static int 1320 kqueue_stat(file_t *fp, struct stat *st) 1321 { 1322 struct kqueue *kq; 1323 1324 kq = fp->f_data; 1325 1326 memset(st, 0, sizeof(*st)); 1327 st->st_size = kq->kq_count; 1328 st->st_blksize = sizeof(struct kevent); 1329 st->st_mode = S_IFIFO; 1330 1331 return 0; 1332 } 1333 1334 static void 1335 kqueue_doclose(struct kqueue *kq, struct klist *list, int fd) 1336 { 1337 struct knote *kn; 1338 filedesc_t *fdp; 1339 1340 fdp = kq->kq_fdp; 1341 1342 KASSERT(mutex_owned(&fdp->fd_lock)); 1343 1344 for (kn = SLIST_FIRST(list); kn != NULL;) { 1345 if (kq != kn->kn_kq) { 1346 kn = SLIST_NEXT(kn, kn_link); 1347 continue; 1348 } 1349 knote_detach(kn, fdp, true); 1350 mutex_enter(&fdp->fd_lock); 1351 kn = SLIST_FIRST(list); 1352 } 1353 } 1354 1355 1356 /* 1357 * fileops close method for a kqueue descriptor. 1358 */ 1359 static int 1360 kqueue_close(file_t *fp) 1361 { 1362 struct kqueue *kq; 1363 filedesc_t *fdp; 1364 fdfile_t *ff; 1365 int i; 1366 1367 kq = fp->f_data; 1368 fdp = curlwp->l_fd; 1369 1370 mutex_enter(&fdp->fd_lock); 1371 for (i = 0; i <= fdp->fd_lastkqfile; i++) { 1372 if ((ff = fdp->fd_ofiles[i]) == NULL) 1373 continue; 1374 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); 1375 } 1376 if (fdp->fd_knhashmask != 0) { 1377 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1378 kqueue_doclose(kq, &fdp->fd_knhash[i], -1); 1379 } 1380 } 1381 mutex_exit(&fdp->fd_lock); 1382 1383 KASSERT(kq->kq_count == 0); 1384 mutex_destroy(&kq->kq_lock); 1385 cv_destroy(&kq->kq_cv); 1386 seldestroy(&kq->kq_sel); 1387 kmem_free(kq, sizeof(*kq)); 1388 fp->f_data = NULL; 1389 1390 return (0); 1391 } 1392 1393 /* 1394 * struct fileops kqfilter method for a kqueue descriptor. 1395 * Event triggered when monitored kqueue changes. 1396 */ 1397 static int 1398 kqueue_kqfilter(file_t *fp, struct knote *kn) 1399 { 1400 struct kqueue *kq; 1401 filedesc_t *fdp; 1402 1403 kq = ((file_t *)kn->kn_obj)->f_data; 1404 1405 KASSERT(fp == kn->kn_obj); 1406 1407 if (kn->kn_filter != EVFILT_READ) 1408 return 1; 1409 1410 kn->kn_fop = &kqread_filtops; 1411 fdp = curlwp->l_fd; 1412 mutex_enter(&kq->kq_lock); 1413 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1414 mutex_exit(&kq->kq_lock); 1415 1416 return 0; 1417 } 1418 1419 1420 /* 1421 * Walk down a list of knotes, activating them if their event has 1422 * triggered. The caller's object lock (e.g. device driver lock) 1423 * must be held. 1424 */ 1425 void 1426 knote(struct klist *list, long hint) 1427 { 1428 struct knote *kn; 1429 1430 SLIST_FOREACH(kn, list, kn_selnext) { 1431 if ((*kn->kn_fop->f_event)(kn, hint)) 1432 knote_activate(kn); 1433 } 1434 } 1435 1436 /* 1437 * Remove all knotes referencing a specified fd 1438 */ 1439 void 1440 knote_fdclose(int fd) 1441 { 1442 struct klist *list; 1443 struct knote *kn; 1444 filedesc_t *fdp; 1445 1446 fdp = curlwp->l_fd; 1447 list = (struct klist *)&fdp->fd_ofiles[fd]->ff_knlist; 1448 mutex_enter(&fdp->fd_lock); 1449 while ((kn = SLIST_FIRST(list)) != NULL) { 1450 knote_detach(kn, fdp, true); 1451 mutex_enter(&fdp->fd_lock); 1452 } 1453 mutex_exit(&fdp->fd_lock); 1454 } 1455 1456 /* 1457 * Drop knote. Called with fdp->fd_lock held, and will drop before 1458 * returning. 1459 */ 1460 static void 1461 knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) 1462 { 1463 struct klist *list; 1464 struct kqueue *kq; 1465 1466 kq = kn->kn_kq; 1467 1468 KASSERT((kn->kn_status & KN_MARKER) == 0); 1469 KASSERT(mutex_owned(&fdp->fd_lock)); 1470 1471 /* Remove from monitored object. */ 1472 if (dofop) { 1473 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1474 (*kn->kn_fop->f_detach)(kn); 1475 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1476 } 1477 1478 /* Remove from descriptor table. */ 1479 if (kn->kn_fop->f_isfd) 1480 list = (struct klist *)&fdp->fd_ofiles[kn->kn_id]->ff_knlist; 1481 else 1482 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1483 1484 SLIST_REMOVE(list, kn, knote, kn_link); 1485 1486 /* Remove from kqueue. */ 1487 /* XXXAD should verify not in use by kqueue_scan. */ 1488 mutex_spin_enter(&kq->kq_lock); 1489 if ((kn->kn_status & KN_QUEUED) != 0) { 1490 kq_check(kq); 1491 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1492 kn->kn_status &= ~KN_QUEUED; 1493 kq->kq_count--; 1494 kq_check(kq); 1495 } 1496 mutex_spin_exit(&kq->kq_lock); 1497 1498 mutex_exit(&fdp->fd_lock); 1499 if (kn->kn_fop->f_isfd) 1500 fd_putfile(kn->kn_id); 1501 atomic_dec_uint(&kn->kn_kfilter->refcnt); 1502 kmem_free(kn, sizeof(*kn)); 1503 } 1504 1505 /* 1506 * Queue new event for knote. 1507 */ 1508 static void 1509 knote_enqueue(struct knote *kn) 1510 { 1511 struct kqueue *kq; 1512 1513 KASSERT((kn->kn_status & KN_MARKER) == 0); 1514 1515 kq = kn->kn_kq; 1516 1517 mutex_spin_enter(&kq->kq_lock); 1518 if ((kn->kn_status & KN_DISABLED) != 0) { 1519 kn->kn_status &= ~KN_DISABLED; 1520 } 1521 if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) { 1522 kq_check(kq); 1523 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1524 kn->kn_status |= KN_QUEUED; 1525 kq->kq_count++; 1526 kq_check(kq); 1527 cv_broadcast(&kq->kq_cv); 1528 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1529 } 1530 mutex_spin_exit(&kq->kq_lock); 1531 } 1532 /* 1533 * Queue new event for knote. 1534 */ 1535 static void 1536 knote_activate(struct knote *kn) 1537 { 1538 struct kqueue *kq; 1539 1540 KASSERT((kn->kn_status & KN_MARKER) == 0); 1541 1542 kq = kn->kn_kq; 1543 1544 mutex_spin_enter(&kq->kq_lock); 1545 kn->kn_status |= KN_ACTIVE; 1546 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { 1547 kq_check(kq); 1548 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1549 kn->kn_status |= KN_QUEUED; 1550 kq->kq_count++; 1551 kq_check(kq); 1552 cv_broadcast(&kq->kq_cv); 1553 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1554 } 1555 mutex_spin_exit(&kq->kq_lock); 1556 } 1557