1 /* $NetBSD: kern_event.c,v 1.60 2008/06/24 10:27:35 gmcgarry Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * 54 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp 55 */ 56 57 #include <sys/cdefs.h> 58 __KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.60 2008/06/24 10:27:35 gmcgarry Exp $"); 59 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/proc.h> 64 #include <sys/file.h> 65 #include <sys/select.h> 66 #include <sys/queue.h> 67 #include <sys/event.h> 68 #include <sys/eventvar.h> 69 #include <sys/poll.h> 70 #include <sys/kmem.h> 71 #include <sys/stat.h> 72 #include <sys/filedesc.h> 73 #include <sys/syscallargs.h> 74 #include <sys/kauth.h> 75 #include <sys/conf.h> 76 #include <sys/atomic.h> 77 78 static int kqueue_scan(file_t *, size_t, struct kevent *, 79 const struct timespec *, register_t *, 80 const struct kevent_ops *, struct kevent *, 81 size_t); 82 static int kqueue_ioctl(file_t *, u_long, void *); 83 static int kqueue_fcntl(file_t *, u_int, void *); 84 static int kqueue_poll(file_t *, int); 85 static int kqueue_kqfilter(file_t *, struct knote *); 86 static int kqueue_stat(file_t *, struct stat *); 87 static int kqueue_close(file_t *); 88 static int kqueue_register(struct kqueue *, struct kevent *); 89 static void kqueue_doclose(struct kqueue *, struct klist *, int); 90 91 static void knote_detach(struct knote *, filedesc_t *fdp, bool); 92 static void knote_enqueue(struct knote *); 93 static void knote_activate(struct knote *); 94 95 static void filt_kqdetach(struct knote *); 96 static int filt_kqueue(struct knote *, long hint); 97 static int filt_procattach(struct knote *); 98 static void filt_procdetach(struct knote *); 99 static int filt_proc(struct knote *, long hint); 100 static int filt_fileattach(struct knote *); 101 static void filt_timerexpire(void *x); 102 static int filt_timerattach(struct knote *); 103 static void filt_timerdetach(struct knote *); 104 static int filt_timer(struct knote *, long hint); 105 106 static const struct fileops kqueueops = { 107 (void *)enxio, (void *)enxio, kqueue_ioctl, kqueue_fcntl, kqueue_poll, 108 kqueue_stat, kqueue_close, kqueue_kqfilter 109 }; 110 111 static const struct filterops kqread_filtops = 112 { 1, NULL, filt_kqdetach, filt_kqueue }; 113 static const struct filterops proc_filtops = 114 { 0, filt_procattach, filt_procdetach, filt_proc }; 115 static const struct filterops file_filtops = 116 { 1, filt_fileattach, NULL, NULL }; 117 static const struct filterops timer_filtops = 118 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 119 120 static u_int kq_ncallouts = 0; 121 static int kq_calloutmax = (4 * 1024); 122 123 #define KN_HASHSIZE 64 /* XXX should be tunable */ 124 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 125 126 extern const struct filterops sig_filtops; 127 128 /* 129 * Table for for all system-defined filters. 130 * These should be listed in the numeric order of the EVFILT_* defines. 131 * If filtops is NULL, the filter isn't implemented in NetBSD. 132 * End of list is when name is NULL. 133 * 134 * Note that 'refcnt' is meaningless for built-in filters. 135 */ 136 struct kfilter { 137 const char *name; /* name of filter */ 138 uint32_t filter; /* id of filter */ 139 unsigned refcnt; /* reference count */ 140 const struct filterops *filtops;/* operations for filter */ 141 size_t namelen; /* length of name string */ 142 }; 143 144 /* System defined filters */ 145 static struct kfilter sys_kfilters[] = { 146 { "EVFILT_READ", EVFILT_READ, 0, &file_filtops, 0 }, 147 { "EVFILT_WRITE", EVFILT_WRITE, 0, &file_filtops, 0, }, 148 { "EVFILT_AIO", EVFILT_AIO, 0, NULL, 0 }, 149 { "EVFILT_VNODE", EVFILT_VNODE, 0, &file_filtops, 0 }, 150 { "EVFILT_PROC", EVFILT_PROC, 0, &proc_filtops, 0 }, 151 { "EVFILT_SIGNAL", EVFILT_SIGNAL, 0, &sig_filtops, 0 }, 152 { "EVFILT_TIMER", EVFILT_TIMER, 0, &timer_filtops, 0 }, 153 { NULL, 0, 0, NULL, 0 }, 154 }; 155 156 /* User defined kfilters */ 157 static struct kfilter *user_kfilters; /* array */ 158 static int user_kfilterc; /* current offset */ 159 static int user_kfiltermaxc; /* max size so far */ 160 static size_t user_kfiltersz; /* size of allocated memory */ 161 162 /* Locks */ 163 static krwlock_t kqueue_filter_lock; /* lock on filter lists */ 164 static kmutex_t kqueue_misc_lock; /* miscellaneous */ 165 166 /* 167 * Initialize the kqueue subsystem. 168 */ 169 void 170 kqueue_init(void) 171 { 172 173 rw_init(&kqueue_filter_lock); 174 mutex_init(&kqueue_misc_lock, MUTEX_DEFAULT, IPL_NONE); 175 } 176 177 /* 178 * Find kfilter entry by name, or NULL if not found. 179 */ 180 static struct kfilter * 181 kfilter_byname_sys(const char *name) 182 { 183 int i; 184 185 KASSERT(rw_lock_held(&kqueue_filter_lock)); 186 187 for (i = 0; sys_kfilters[i].name != NULL; i++) { 188 if (strcmp(name, sys_kfilters[i].name) == 0) 189 return &sys_kfilters[i]; 190 } 191 return NULL; 192 } 193 194 static struct kfilter * 195 kfilter_byname_user(const char *name) 196 { 197 int i; 198 199 KASSERT(rw_lock_held(&kqueue_filter_lock)); 200 201 /* user filter slots have a NULL name if previously deregistered */ 202 for (i = 0; i < user_kfilterc ; i++) { 203 if (user_kfilters[i].name != NULL && 204 strcmp(name, user_kfilters[i].name) == 0) 205 return &user_kfilters[i]; 206 } 207 return NULL; 208 } 209 210 static struct kfilter * 211 kfilter_byname(const char *name) 212 { 213 struct kfilter *kfilter; 214 215 KASSERT(rw_lock_held(&kqueue_filter_lock)); 216 217 if ((kfilter = kfilter_byname_sys(name)) != NULL) 218 return kfilter; 219 220 return kfilter_byname_user(name); 221 } 222 223 /* 224 * Find kfilter entry by filter id, or NULL if not found. 225 * Assumes entries are indexed in filter id order, for speed. 226 */ 227 static struct kfilter * 228 kfilter_byfilter(uint32_t filter) 229 { 230 struct kfilter *kfilter; 231 232 KASSERT(rw_lock_held(&kqueue_filter_lock)); 233 234 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 235 kfilter = &sys_kfilters[filter]; 236 else if (user_kfilters != NULL && 237 filter < EVFILT_SYSCOUNT + user_kfilterc) 238 /* it's a user filter */ 239 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 240 else 241 return (NULL); /* out of range */ 242 KASSERT(kfilter->filter == filter); /* sanity check! */ 243 return (kfilter); 244 } 245 246 /* 247 * Register a new kfilter. Stores the entry in user_kfilters. 248 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 249 * If retfilter != NULL, the new filterid is returned in it. 250 */ 251 int 252 kfilter_register(const char *name, const struct filterops *filtops, 253 int *retfilter) 254 { 255 struct kfilter *kfilter; 256 size_t len; 257 int i; 258 259 if (name == NULL || name[0] == '\0' || filtops == NULL) 260 return (EINVAL); /* invalid args */ 261 262 rw_enter(&kqueue_filter_lock, RW_WRITER); 263 if (kfilter_byname(name) != NULL) { 264 rw_exit(&kqueue_filter_lock); 265 return (EEXIST); /* already exists */ 266 } 267 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) { 268 rw_exit(&kqueue_filter_lock); 269 return (EINVAL); /* too many */ 270 } 271 272 for (i = 0; i < user_kfilterc; i++) { 273 kfilter = &user_kfilters[i]; 274 if (kfilter->name == NULL) { 275 /* Previously deregistered slot. Reuse. */ 276 goto reuse; 277 } 278 } 279 280 /* check if need to grow user_kfilters */ 281 if (user_kfilterc + 1 > user_kfiltermaxc) { 282 /* Grow in KFILTER_EXTENT chunks. */ 283 user_kfiltermaxc += KFILTER_EXTENT; 284 len = user_kfiltermaxc * sizeof(struct filter *); 285 kfilter = kmem_alloc(len, KM_SLEEP); 286 memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz); 287 if (user_kfilters != NULL) { 288 memcpy(kfilter, user_kfilters, user_kfiltersz); 289 kmem_free(user_kfilters, user_kfiltersz); 290 } 291 user_kfiltersz = len; 292 user_kfilters = kfilter; 293 } 294 /* Adding new slot */ 295 kfilter = &user_kfilters[user_kfilterc++]; 296 reuse: 297 kfilter->namelen = strlen(name) + 1; 298 kfilter->name = kmem_alloc(kfilter->namelen, KM_SLEEP); 299 memcpy(__UNCONST(kfilter->name), name, kfilter->namelen); 300 301 kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT; 302 303 kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP); 304 memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops)); 305 306 if (retfilter != NULL) 307 *retfilter = kfilter->filter; 308 rw_exit(&kqueue_filter_lock); 309 310 return (0); 311 } 312 313 /* 314 * Unregister a kfilter previously registered with kfilter_register. 315 * This retains the filter id, but clears the name and frees filtops (filter 316 * operations), so that the number isn't reused during a boot. 317 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 318 */ 319 int 320 kfilter_unregister(const char *name) 321 { 322 struct kfilter *kfilter; 323 324 if (name == NULL || name[0] == '\0') 325 return (EINVAL); /* invalid name */ 326 327 rw_enter(&kqueue_filter_lock, RW_WRITER); 328 if (kfilter_byname_sys(name) != NULL) { 329 rw_exit(&kqueue_filter_lock); 330 return (EINVAL); /* can't detach system filters */ 331 } 332 333 kfilter = kfilter_byname_user(name); 334 if (kfilter == NULL) { 335 rw_exit(&kqueue_filter_lock); 336 return (ENOENT); 337 } 338 if (kfilter->refcnt != 0) { 339 rw_exit(&kqueue_filter_lock); 340 return (EBUSY); 341 } 342 343 /* Cast away const (but we know it's safe. */ 344 kmem_free(__UNCONST(kfilter->name), kfilter->namelen); 345 kfilter->name = NULL; /* mark as `not implemented' */ 346 347 if (kfilter->filtops != NULL) { 348 /* Cast away const (but we know it's safe. */ 349 kmem_free(__UNCONST(kfilter->filtops), 350 sizeof(*kfilter->filtops)); 351 kfilter->filtops = NULL; /* mark as `not implemented' */ 352 } 353 rw_exit(&kqueue_filter_lock); 354 355 return (0); 356 } 357 358 359 /* 360 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 361 * descriptors. Calls fileops kqfilter method for given file descriptor. 362 */ 363 static int 364 filt_fileattach(struct knote *kn) 365 { 366 file_t *fp; 367 368 fp = kn->kn_obj; 369 370 return (*fp->f_ops->fo_kqfilter)(fp, kn); 371 } 372 373 /* 374 * Filter detach method for EVFILT_READ on kqueue descriptor. 375 */ 376 static void 377 filt_kqdetach(struct knote *kn) 378 { 379 struct kqueue *kq; 380 381 kq = ((file_t *)kn->kn_obj)->f_data; 382 383 mutex_spin_enter(&kq->kq_lock); 384 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 385 mutex_spin_exit(&kq->kq_lock); 386 } 387 388 /* 389 * Filter event method for EVFILT_READ on kqueue descriptor. 390 */ 391 /*ARGSUSED*/ 392 static int 393 filt_kqueue(struct knote *kn, long hint) 394 { 395 struct kqueue *kq; 396 int rv; 397 398 kq = ((file_t *)kn->kn_obj)->f_data; 399 400 if (hint != NOTE_SUBMIT) 401 mutex_spin_enter(&kq->kq_lock); 402 kn->kn_data = kq->kq_count; 403 rv = (kn->kn_data > 0); 404 if (hint != NOTE_SUBMIT) 405 mutex_spin_exit(&kq->kq_lock); 406 407 return rv; 408 } 409 410 /* 411 * Filter attach method for EVFILT_PROC. 412 */ 413 static int 414 filt_procattach(struct knote *kn) 415 { 416 struct proc *p, *curp; 417 struct lwp *curl; 418 419 curl = curlwp; 420 curp = curl->l_proc; 421 422 mutex_enter(proc_lock); 423 p = p_find(kn->kn_id, PFIND_LOCKED); 424 if (p == NULL) { 425 mutex_exit(proc_lock); 426 return ESRCH; 427 } 428 429 /* 430 * Fail if it's not owned by you, or the last exec gave us 431 * setuid/setgid privs (unless you're root). 432 */ 433 mutex_enter(p->p_lock); 434 mutex_exit(proc_lock); 435 if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KEVENT_FILTER, 436 p, NULL, NULL, NULL) != 0) { 437 mutex_exit(p->p_lock); 438 return EACCES; 439 } 440 441 kn->kn_obj = p; 442 kn->kn_flags |= EV_CLEAR; /* automatically set */ 443 444 /* 445 * internal flag indicating registration done by kernel 446 */ 447 if (kn->kn_flags & EV_FLAG1) { 448 kn->kn_data = kn->kn_sdata; /* ppid */ 449 kn->kn_fflags = NOTE_CHILD; 450 kn->kn_flags &= ~EV_FLAG1; 451 } 452 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 453 mutex_exit(p->p_lock); 454 455 return 0; 456 } 457 458 /* 459 * Filter detach method for EVFILT_PROC. 460 * 461 * The knote may be attached to a different process, which may exit, 462 * leaving nothing for the knote to be attached to. So when the process 463 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 464 * it will be deleted when read out. However, as part of the knote deletion, 465 * this routine is called, so a check is needed to avoid actually performing 466 * a detach, because the original process might not exist any more. 467 */ 468 static void 469 filt_procdetach(struct knote *kn) 470 { 471 struct proc *p; 472 473 if (kn->kn_status & KN_DETACHED) 474 return; 475 476 p = kn->kn_obj; 477 478 mutex_enter(p->p_lock); 479 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 480 mutex_exit(p->p_lock); 481 } 482 483 /* 484 * Filter event method for EVFILT_PROC. 485 */ 486 static int 487 filt_proc(struct knote *kn, long hint) 488 { 489 u_int event, fflag; 490 struct kevent kev; 491 struct kqueue *kq; 492 int error; 493 494 event = (u_int)hint & NOTE_PCTRLMASK; 495 kq = kn->kn_kq; 496 fflag = 0; 497 498 /* If the user is interested in this event, record it. */ 499 if (kn->kn_sfflags & event) 500 fflag |= event; 501 502 if (event == NOTE_EXIT) { 503 /* 504 * Process is gone, so flag the event as finished. 505 * 506 * Detach the knote from watched process and mark 507 * it as such. We can't leave this to kqueue_scan(), 508 * since the process might not exist by then. And we 509 * have to do this now, since psignal KNOTE() is called 510 * also for zombies and we might end up reading freed 511 * memory if the kevent would already be picked up 512 * and knote g/c'ed. 513 */ 514 filt_procdetach(kn); 515 516 mutex_spin_enter(&kq->kq_lock); 517 kn->kn_status |= KN_DETACHED; 518 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 519 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 520 kn->kn_fflags |= fflag; 521 mutex_spin_exit(&kq->kq_lock); 522 523 return 1; 524 } 525 526 mutex_spin_enter(&kq->kq_lock); 527 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 528 /* 529 * Process forked, and user wants to track the new process, 530 * so attach a new knote to it, and immediately report an 531 * event with the parent's pid. Register knote with new 532 * process. 533 */ 534 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 535 kev.filter = kn->kn_filter; 536 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 537 kev.fflags = kn->kn_sfflags; 538 kev.data = kn->kn_id; /* parent */ 539 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 540 mutex_spin_exit(&kq->kq_lock); 541 error = kqueue_register(kq, &kev); 542 mutex_spin_enter(&kq->kq_lock); 543 if (error != 0) 544 kn->kn_fflags |= NOTE_TRACKERR; 545 } 546 kn->kn_fflags |= fflag; 547 fflag = kn->kn_fflags; 548 mutex_spin_exit(&kq->kq_lock); 549 550 return fflag != 0; 551 } 552 553 static void 554 filt_timerexpire(void *knx) 555 { 556 struct knote *kn = knx; 557 int tticks; 558 559 mutex_enter(&kqueue_misc_lock); 560 kn->kn_data++; 561 knote_activate(kn); 562 if ((kn->kn_flags & EV_ONESHOT) == 0) { 563 tticks = mstohz(kn->kn_sdata); 564 callout_schedule((callout_t *)kn->kn_hook, tticks); 565 } 566 mutex_exit(&kqueue_misc_lock); 567 } 568 569 /* 570 * data contains amount of time to sleep, in milliseconds 571 */ 572 static int 573 filt_timerattach(struct knote *kn) 574 { 575 callout_t *calloutp; 576 struct kqueue *kq; 577 int tticks; 578 579 tticks = mstohz(kn->kn_sdata); 580 581 /* if the supplied value is under our resolution, use 1 tick */ 582 if (tticks == 0) { 583 if (kn->kn_sdata == 0) 584 return EINVAL; 585 tticks = 1; 586 } 587 588 if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax || 589 (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) { 590 atomic_dec_uint(&kq_ncallouts); 591 return ENOMEM; 592 } 593 callout_init(calloutp, CALLOUT_MPSAFE); 594 595 kq = kn->kn_kq; 596 mutex_spin_enter(&kq->kq_lock); 597 kn->kn_flags |= EV_CLEAR; /* automatically set */ 598 kn->kn_hook = calloutp; 599 mutex_spin_exit(&kq->kq_lock); 600 601 callout_reset(calloutp, tticks, filt_timerexpire, kn); 602 603 return (0); 604 } 605 606 static void 607 filt_timerdetach(struct knote *kn) 608 { 609 callout_t *calloutp; 610 611 calloutp = (callout_t *)kn->kn_hook; 612 callout_halt(calloutp, NULL); 613 callout_destroy(calloutp); 614 kmem_free(calloutp, sizeof(*calloutp)); 615 atomic_dec_uint(&kq_ncallouts); 616 } 617 618 static int 619 filt_timer(struct knote *kn, long hint) 620 { 621 int rv; 622 623 mutex_enter(&kqueue_misc_lock); 624 rv = (kn->kn_data != 0); 625 mutex_exit(&kqueue_misc_lock); 626 627 return rv; 628 } 629 630 /* 631 * filt_seltrue: 632 * 633 * This filter "event" routine simulates seltrue(). 634 */ 635 int 636 filt_seltrue(struct knote *kn, long hint) 637 { 638 639 /* 640 * We don't know how much data can be read/written, 641 * but we know that it *can* be. This is about as 642 * good as select/poll does as well. 643 */ 644 kn->kn_data = 0; 645 return (1); 646 } 647 648 /* 649 * This provides full kqfilter entry for device switch tables, which 650 * has same effect as filter using filt_seltrue() as filter method. 651 */ 652 static void 653 filt_seltruedetach(struct knote *kn) 654 { 655 /* Nothing to do */ 656 } 657 658 const struct filterops seltrue_filtops = 659 { 1, NULL, filt_seltruedetach, filt_seltrue }; 660 661 int 662 seltrue_kqfilter(dev_t dev, struct knote *kn) 663 { 664 switch (kn->kn_filter) { 665 case EVFILT_READ: 666 case EVFILT_WRITE: 667 kn->kn_fop = &seltrue_filtops; 668 break; 669 default: 670 return (EINVAL); 671 } 672 673 /* Nothing more to do */ 674 return (0); 675 } 676 677 /* 678 * kqueue(2) system call. 679 */ 680 int 681 sys_kqueue(struct lwp *l, const void *v, register_t *retval) 682 { 683 struct kqueue *kq; 684 file_t *fp; 685 int fd, error; 686 687 if ((error = fd_allocfile(&fp, &fd)) != 0) 688 return error; 689 fp->f_flag = FREAD | FWRITE; 690 fp->f_type = DTYPE_KQUEUE; 691 fp->f_ops = &kqueueops; 692 kq = kmem_zalloc(sizeof(*kq), KM_SLEEP); 693 mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED); 694 cv_init(&kq->kq_cv, "kqueue"); 695 selinit(&kq->kq_sel); 696 TAILQ_INIT(&kq->kq_head); 697 fp->f_data = kq; 698 *retval = fd; 699 kq->kq_fdp = curlwp->l_fd; 700 fd_affix(curproc, fp, fd); 701 return error; 702 } 703 704 /* 705 * kevent(2) system call. 706 */ 707 static int 708 kevent_fetch_changes(void *private, const struct kevent *changelist, 709 struct kevent *changes, size_t index, int n) 710 { 711 712 return copyin(changelist + index, changes, n * sizeof(*changes)); 713 } 714 715 static int 716 kevent_put_events(void *private, struct kevent *events, 717 struct kevent *eventlist, size_t index, int n) 718 { 719 720 return copyout(events, eventlist + index, n * sizeof(*events)); 721 } 722 723 static const struct kevent_ops kevent_native_ops = { 724 .keo_private = NULL, 725 .keo_fetch_timeout = copyin, 726 .keo_fetch_changes = kevent_fetch_changes, 727 .keo_put_events = kevent_put_events, 728 }; 729 730 int 731 sys_kevent(struct lwp *l, const struct sys_kevent_args *uap, register_t *retval) 732 { 733 /* { 734 syscallarg(int) fd; 735 syscallarg(const struct kevent *) changelist; 736 syscallarg(size_t) nchanges; 737 syscallarg(struct kevent *) eventlist; 738 syscallarg(size_t) nevents; 739 syscallarg(const struct timespec *) timeout; 740 } */ 741 742 return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), 743 SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), 744 SCARG(uap, timeout), &kevent_native_ops); 745 } 746 747 int 748 kevent1(register_t *retval, int fd, 749 const struct kevent *changelist, size_t nchanges, 750 struct kevent *eventlist, size_t nevents, 751 const struct timespec *timeout, 752 const struct kevent_ops *keops) 753 { 754 struct kevent *kevp; 755 struct kqueue *kq; 756 struct timespec ts; 757 size_t i, n, ichange; 758 int nerrors, error; 759 struct kevent kevbuf[8]; /* approx 300 bytes on 64-bit */ 760 file_t *fp; 761 762 /* check that we're dealing with a kq */ 763 fp = fd_getfile(fd); 764 if (fp == NULL) 765 return (EBADF); 766 767 if (fp->f_type != DTYPE_KQUEUE) { 768 fd_putfile(fd); 769 return (EBADF); 770 } 771 772 if (timeout != NULL) { 773 error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts)); 774 if (error) 775 goto done; 776 timeout = &ts; 777 } 778 779 kq = (struct kqueue *)fp->f_data; 780 nerrors = 0; 781 ichange = 0; 782 783 /* traverse list of events to register */ 784 while (nchanges > 0) { 785 n = MIN(nchanges, __arraycount(kevbuf)); 786 error = (*keops->keo_fetch_changes)(keops->keo_private, 787 changelist, kevbuf, ichange, n); 788 if (error) 789 goto done; 790 for (i = 0; i < n; i++) { 791 kevp = &kevbuf[i]; 792 kevp->flags &= ~EV_SYSFLAGS; 793 /* register each knote */ 794 error = kqueue_register(kq, kevp); 795 if (error) { 796 if (nevents != 0) { 797 kevp->flags = EV_ERROR; 798 kevp->data = error; 799 error = (*keops->keo_put_events) 800 (keops->keo_private, kevp, 801 eventlist, nerrors, 1); 802 if (error) 803 goto done; 804 nevents--; 805 nerrors++; 806 } else { 807 goto done; 808 } 809 } 810 } 811 nchanges -= n; /* update the results */ 812 ichange += n; 813 } 814 if (nerrors) { 815 *retval = nerrors; 816 error = 0; 817 goto done; 818 } 819 820 /* actually scan through the events */ 821 error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops, 822 kevbuf, __arraycount(kevbuf)); 823 done: 824 fd_putfile(fd); 825 return (error); 826 } 827 828 /* 829 * Register a given kevent kev onto the kqueue 830 */ 831 static int 832 kqueue_register(struct kqueue *kq, struct kevent *kev) 833 { 834 struct kfilter *kfilter; 835 filedesc_t *fdp; 836 file_t *fp; 837 fdfile_t *ff; 838 struct knote *kn, *newkn; 839 struct klist *list; 840 int error, fd, rv; 841 842 fdp = kq->kq_fdp; 843 fp = NULL; 844 kn = NULL; 845 error = 0; 846 fd = 0; 847 848 newkn = kmem_zalloc(sizeof(*newkn), KM_SLEEP); 849 850 rw_enter(&kqueue_filter_lock, RW_READER); 851 kfilter = kfilter_byfilter(kev->filter); 852 if (kfilter == NULL || kfilter->filtops == NULL) { 853 /* filter not found nor implemented */ 854 rw_exit(&kqueue_filter_lock); 855 kmem_free(newkn, sizeof(*newkn)); 856 return (EINVAL); 857 } 858 859 mutex_enter(&fdp->fd_lock); 860 861 /* search if knote already exists */ 862 if (kfilter->filtops->f_isfd) { 863 /* monitoring a file descriptor */ 864 fd = kev->ident; 865 if ((fp = fd_getfile(fd)) == NULL) { 866 mutex_exit(&fdp->fd_lock); 867 rw_exit(&kqueue_filter_lock); 868 kmem_free(newkn, sizeof(*newkn)); 869 return EBADF; 870 } 871 ff = fdp->fd_ofiles[fd]; 872 if (fd <= fdp->fd_lastkqfile) { 873 SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { 874 if (kq == kn->kn_kq && 875 kev->filter == kn->kn_filter) 876 break; 877 } 878 } 879 } else { 880 /* 881 * not monitoring a file descriptor, so 882 * lookup knotes in internal hash table 883 */ 884 if (fdp->fd_knhashmask != 0) { 885 list = &fdp->fd_knhash[ 886 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 887 SLIST_FOREACH(kn, list, kn_link) { 888 if (kev->ident == kn->kn_id && 889 kq == kn->kn_kq && 890 kev->filter == kn->kn_filter) 891 break; 892 } 893 } 894 } 895 896 /* 897 * kn now contains the matching knote, or NULL if no match 898 */ 899 if (kev->flags & EV_ADD) { 900 if (kn == NULL) { 901 /* create new knote */ 902 kn = newkn; 903 newkn = NULL; 904 kn->kn_obj = fp; 905 kn->kn_kq = kq; 906 kn->kn_fop = kfilter->filtops; 907 kn->kn_kfilter = kfilter; 908 kn->kn_sfflags = kev->fflags; 909 kn->kn_sdata = kev->data; 910 kev->fflags = 0; 911 kev->data = 0; 912 kn->kn_kevent = *kev; 913 914 /* 915 * apply reference count to knote structure, and 916 * do not release it at the end of this routine. 917 */ 918 fp = NULL; 919 920 if (!kn->kn_fop->f_isfd) { 921 /* 922 * If knote is not on an fd, store on 923 * internal hash table. 924 */ 925 if (fdp->fd_knhashmask == 0) { 926 /* XXXAD can block with fd_lock held */ 927 fdp->fd_knhash = hashinit(KN_HASHSIZE, 928 HASH_LIST, true, 929 &fdp->fd_knhashmask); 930 } 931 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, 932 fdp->fd_knhashmask)]; 933 } else { 934 /* Otherwise, knote is on an fd. */ 935 list = (struct klist *) 936 &fdp->fd_ofiles[kn->kn_id]->ff_knlist; 937 if ((int)kn->kn_id > fdp->fd_lastkqfile) 938 fdp->fd_lastkqfile = kn->kn_id; 939 } 940 SLIST_INSERT_HEAD(list, kn, kn_link); 941 942 KERNEL_LOCK(1, NULL); /* XXXSMP */ 943 error = (*kfilter->filtops->f_attach)(kn); 944 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 945 if (error != 0) { 946 /* knote_detach() drops fdp->fd_lock */ 947 knote_detach(kn, fdp, false); 948 goto done; 949 } 950 atomic_inc_uint(&kfilter->refcnt); 951 } else { 952 /* 953 * The user may change some filter values after the 954 * initial EV_ADD, but doing so will not reset any 955 * filter which have already been triggered. 956 */ 957 kn->kn_sfflags = kev->fflags; 958 kn->kn_sdata = kev->data; 959 kn->kn_kevent.udata = kev->udata; 960 } 961 KERNEL_LOCK(1, NULL); /* XXXSMP */ 962 rv = (*kn->kn_fop->f_event)(kn, 0); 963 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 964 if (rv) 965 knote_activate(kn); 966 } else { 967 if (kn == NULL) { 968 error = ENOENT; 969 mutex_exit(&fdp->fd_lock); 970 goto done; 971 } 972 if (kev->flags & EV_DELETE) { 973 /* knote_detach() drops fdp->fd_lock */ 974 knote_detach(kn, fdp, true); 975 goto done; 976 } 977 } 978 979 /* disable knote */ 980 if ((kev->flags & EV_DISABLE)) { 981 mutex_spin_enter(&kq->kq_lock); 982 if ((kn->kn_status & KN_DISABLED) == 0) 983 kn->kn_status |= KN_DISABLED; 984 mutex_spin_exit(&kq->kq_lock); 985 } 986 987 /* enable knote */ 988 if ((kev->flags & EV_ENABLE)) { 989 knote_enqueue(kn); 990 } 991 mutex_exit(&fdp->fd_lock); 992 done: 993 rw_exit(&kqueue_filter_lock); 994 if (newkn != NULL) 995 kmem_free(newkn, sizeof(*newkn)); 996 if (fp != NULL) 997 fd_putfile(fd); 998 return (error); 999 } 1000 1001 #if defined(DEBUG) 1002 static void 1003 kq_check(struct kqueue *kq) 1004 { 1005 const struct knote *kn; 1006 int count; 1007 int nmarker; 1008 1009 KASSERT(mutex_owned(&kq->kq_lock)); 1010 KASSERT(kq->kq_count >= 0); 1011 1012 count = 0; 1013 nmarker = 0; 1014 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1015 if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) { 1016 panic("%s: kq=%p kn=%p inconsist 1", __func__, kq, kn); 1017 } 1018 if ((kn->kn_status & KN_MARKER) == 0) { 1019 if (kn->kn_kq != kq) { 1020 panic("%s: kq=%p kn=%p inconsist 2", 1021 __func__, kq, kn); 1022 } 1023 if ((kn->kn_status & KN_ACTIVE) == 0) { 1024 panic("%s: kq=%p kn=%p: not active", 1025 __func__, kq, kn); 1026 } 1027 count++; 1028 if (count > kq->kq_count) { 1029 goto bad; 1030 } 1031 } else { 1032 nmarker++; 1033 #if 0 1034 if (nmarker > 10000) { 1035 panic("%s: kq=%p too many markers: %d != %d, " 1036 "nmarker=%d", 1037 __func__, kq, kq->kq_count, count, nmarker); 1038 } 1039 #endif 1040 } 1041 } 1042 if (kq->kq_count != count) { 1043 bad: 1044 panic("%s: kq=%p inconsist 3: %d != %d, nmarker=%d", 1045 __func__, kq, kq->kq_count, count, nmarker); 1046 } 1047 } 1048 #else /* defined(DEBUG) */ 1049 #define kq_check(a) /* nothing */ 1050 #endif /* defined(DEBUG) */ 1051 1052 /* 1053 * Scan through the list of events on fp (for a maximum of maxevents), 1054 * returning the results in to ulistp. Timeout is determined by tsp; if 1055 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 1056 * as appropriate. 1057 */ 1058 static int 1059 kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp, 1060 const struct timespec *tsp, register_t *retval, 1061 const struct kevent_ops *keops, struct kevent *kevbuf, 1062 size_t kevcnt) 1063 { 1064 struct kqueue *kq; 1065 struct kevent *kevp; 1066 struct timeval atv, sleeptv; 1067 struct knote *kn, *marker; 1068 size_t count, nkev, nevents; 1069 int timeout, error, rv; 1070 filedesc_t *fdp; 1071 1072 fdp = curlwp->l_fd; 1073 kq = fp->f_data; 1074 count = maxevents; 1075 nkev = nevents = error = 0; 1076 if (count == 0) { 1077 *retval = 0; 1078 return 0; 1079 } 1080 1081 if (tsp) { /* timeout supplied */ 1082 TIMESPEC_TO_TIMEVAL(&atv, tsp); 1083 if (inittimeleft(&atv, &sleeptv) == -1) { 1084 *retval = maxevents; 1085 return EINVAL; 1086 } 1087 timeout = tvtohz(&atv); 1088 if (timeout <= 0) 1089 timeout = -1; /* do poll */ 1090 } else { 1091 /* no timeout, wait forever */ 1092 timeout = 0; 1093 } 1094 1095 marker = kmem_zalloc(sizeof(*marker), KM_SLEEP); 1096 marker->kn_status = KN_MARKER; 1097 mutex_spin_enter(&kq->kq_lock); 1098 retry: 1099 kevp = kevbuf; 1100 if (kq->kq_count == 0) { 1101 if (timeout >= 0) { 1102 error = cv_timedwait_sig(&kq->kq_cv, 1103 &kq->kq_lock, timeout); 1104 if (error == 0) { 1105 if (tsp == NULL || (timeout = 1106 gettimeleft(&atv, &sleeptv)) > 0) 1107 goto retry; 1108 } else { 1109 /* don't restart after signals... */ 1110 if (error == ERESTART) 1111 error = EINTR; 1112 if (error == EWOULDBLOCK) 1113 error = 0; 1114 } 1115 } 1116 } else { 1117 /* mark end of knote list */ 1118 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 1119 1120 while (count != 0) { 1121 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 1122 while ((kn->kn_status & KN_MARKER) != 0) { 1123 if (kn == marker) { 1124 /* it's our marker, stop */ 1125 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1126 if (count < maxevents || (tsp != NULL && 1127 (timeout = gettimeleft(&atv, 1128 &sleeptv)) <= 0)) 1129 goto done; 1130 goto retry; 1131 } 1132 /* someone else's marker. */ 1133 kn = TAILQ_NEXT(kn, kn_tqe); 1134 } 1135 kq_check(kq); 1136 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1137 kq->kq_count--; 1138 kn->kn_status &= ~KN_QUEUED; 1139 kq_check(kq); 1140 if (kn->kn_status & KN_DISABLED) { 1141 /* don't want disabled events */ 1142 continue; 1143 } 1144 if ((kn->kn_flags & EV_ONESHOT) == 0) { 1145 mutex_spin_exit(&kq->kq_lock); 1146 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1147 rv = (*kn->kn_fop->f_event)(kn, 0); 1148 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1149 mutex_spin_enter(&kq->kq_lock); 1150 /* Re-poll if note was re-enqueued. */ 1151 if ((kn->kn_status & KN_QUEUED) != 0) 1152 continue; 1153 if (rv == 0) { 1154 /* 1155 * non-ONESHOT event that hasn't 1156 * triggered again, so de-queue. 1157 */ 1158 kn->kn_status &= ~KN_ACTIVE; 1159 continue; 1160 } 1161 } 1162 /* XXXAD should be got from f_event if !oneshot. */ 1163 *kevp++ = kn->kn_kevent; 1164 nkev++; 1165 if (kn->kn_flags & EV_ONESHOT) { 1166 /* delete ONESHOT events after retrieval */ 1167 mutex_spin_exit(&kq->kq_lock); 1168 mutex_enter(&fdp->fd_lock); 1169 knote_detach(kn, fdp, true); 1170 mutex_spin_enter(&kq->kq_lock); 1171 } else if (kn->kn_flags & EV_CLEAR) { 1172 /* clear state after retrieval */ 1173 kn->kn_data = 0; 1174 kn->kn_fflags = 0; 1175 kn->kn_status &= ~KN_ACTIVE; 1176 } else { 1177 /* add event back on list */ 1178 kq_check(kq); 1179 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1180 kq->kq_count++; 1181 kn->kn_status |= KN_QUEUED; 1182 kq_check(kq); 1183 } 1184 if (nkev == kevcnt) { 1185 /* do copyouts in kevcnt chunks */ 1186 mutex_spin_exit(&kq->kq_lock); 1187 error = (*keops->keo_put_events) 1188 (keops->keo_private, 1189 kevbuf, ulistp, nevents, nkev); 1190 mutex_spin_enter(&kq->kq_lock); 1191 nevents += nkev; 1192 nkev = 0; 1193 kevp = kevbuf; 1194 } 1195 count--; 1196 if (error != 0 || count == 0) { 1197 /* remove marker */ 1198 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 1199 break; 1200 } 1201 } 1202 } 1203 done: 1204 mutex_spin_exit(&kq->kq_lock); 1205 if (marker != NULL) 1206 kmem_free(marker, sizeof(*marker)); 1207 if (nkev != 0) { 1208 /* copyout remaining events */ 1209 error = (*keops->keo_put_events)(keops->keo_private, 1210 kevbuf, ulistp, nevents, nkev); 1211 } 1212 *retval = maxevents - count; 1213 1214 return error; 1215 } 1216 1217 /* 1218 * fileops ioctl method for a kqueue descriptor. 1219 * 1220 * Two ioctls are currently supported. They both use struct kfilter_mapping: 1221 * KFILTER_BYNAME find name for filter, and return result in 1222 * name, which is of size len. 1223 * KFILTER_BYFILTER find filter for name. len is ignored. 1224 */ 1225 /*ARGSUSED*/ 1226 static int 1227 kqueue_ioctl(file_t *fp, u_long com, void *data) 1228 { 1229 struct kfilter_mapping *km; 1230 const struct kfilter *kfilter; 1231 char *name; 1232 int error; 1233 1234 km = data; 1235 error = 0; 1236 name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP); 1237 1238 switch (com) { 1239 case KFILTER_BYFILTER: /* convert filter -> name */ 1240 rw_enter(&kqueue_filter_lock, RW_READER); 1241 kfilter = kfilter_byfilter(km->filter); 1242 if (kfilter != NULL) { 1243 strlcpy(name, kfilter->name, KFILTER_MAXNAME); 1244 rw_exit(&kqueue_filter_lock); 1245 error = copyoutstr(name, km->name, km->len, NULL); 1246 } else { 1247 rw_exit(&kqueue_filter_lock); 1248 error = ENOENT; 1249 } 1250 break; 1251 1252 case KFILTER_BYNAME: /* convert name -> filter */ 1253 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1254 if (error) { 1255 break; 1256 } 1257 rw_enter(&kqueue_filter_lock, RW_READER); 1258 kfilter = kfilter_byname(name); 1259 if (kfilter != NULL) 1260 km->filter = kfilter->filter; 1261 else 1262 error = ENOENT; 1263 rw_exit(&kqueue_filter_lock); 1264 break; 1265 1266 default: 1267 error = ENOTTY; 1268 break; 1269 1270 } 1271 kmem_free(name, KFILTER_MAXNAME); 1272 return (error); 1273 } 1274 1275 /* 1276 * fileops fcntl method for a kqueue descriptor. 1277 */ 1278 static int 1279 kqueue_fcntl(file_t *fp, u_int com, void *data) 1280 { 1281 1282 return (ENOTTY); 1283 } 1284 1285 /* 1286 * fileops poll method for a kqueue descriptor. 1287 * Determine if kqueue has events pending. 1288 */ 1289 static int 1290 kqueue_poll(file_t *fp, int events) 1291 { 1292 struct kqueue *kq; 1293 int revents; 1294 1295 kq = fp->f_data; 1296 1297 revents = 0; 1298 if (events & (POLLIN | POLLRDNORM)) { 1299 mutex_spin_enter(&kq->kq_lock); 1300 if (kq->kq_count != 0) { 1301 revents |= events & (POLLIN | POLLRDNORM); 1302 } else { 1303 selrecord(curlwp, &kq->kq_sel); 1304 } 1305 kq_check(kq); 1306 mutex_spin_exit(&kq->kq_lock); 1307 } 1308 1309 return revents; 1310 } 1311 1312 /* 1313 * fileops stat method for a kqueue descriptor. 1314 * Returns dummy info, with st_size being number of events pending. 1315 */ 1316 static int 1317 kqueue_stat(file_t *fp, struct stat *st) 1318 { 1319 struct kqueue *kq; 1320 1321 kq = fp->f_data; 1322 1323 memset(st, 0, sizeof(*st)); 1324 st->st_size = kq->kq_count; 1325 st->st_blksize = sizeof(struct kevent); 1326 st->st_mode = S_IFIFO; 1327 1328 return 0; 1329 } 1330 1331 static void 1332 kqueue_doclose(struct kqueue *kq, struct klist *list, int fd) 1333 { 1334 struct knote *kn; 1335 filedesc_t *fdp; 1336 1337 fdp = kq->kq_fdp; 1338 1339 KASSERT(mutex_owned(&fdp->fd_lock)); 1340 1341 for (kn = SLIST_FIRST(list); kn != NULL;) { 1342 if (kq != kn->kn_kq) { 1343 kn = SLIST_NEXT(kn, kn_link); 1344 continue; 1345 } 1346 knote_detach(kn, fdp, true); 1347 mutex_enter(&fdp->fd_lock); 1348 kn = SLIST_FIRST(list); 1349 } 1350 } 1351 1352 1353 /* 1354 * fileops close method for a kqueue descriptor. 1355 */ 1356 static int 1357 kqueue_close(file_t *fp) 1358 { 1359 struct kqueue *kq; 1360 filedesc_t *fdp; 1361 fdfile_t *ff; 1362 int i; 1363 1364 kq = fp->f_data; 1365 fdp = curlwp->l_fd; 1366 1367 mutex_enter(&fdp->fd_lock); 1368 for (i = 0; i <= fdp->fd_lastkqfile; i++) { 1369 if ((ff = fdp->fd_ofiles[i]) == NULL) 1370 continue; 1371 kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); 1372 } 1373 if (fdp->fd_knhashmask != 0) { 1374 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1375 kqueue_doclose(kq, &fdp->fd_knhash[i], -1); 1376 } 1377 } 1378 mutex_exit(&fdp->fd_lock); 1379 1380 KASSERT(kq->kq_count == 0); 1381 mutex_destroy(&kq->kq_lock); 1382 cv_destroy(&kq->kq_cv); 1383 seldestroy(&kq->kq_sel); 1384 kmem_free(kq, sizeof(*kq)); 1385 fp->f_data = NULL; 1386 1387 return (0); 1388 } 1389 1390 /* 1391 * struct fileops kqfilter method for a kqueue descriptor. 1392 * Event triggered when monitored kqueue changes. 1393 */ 1394 static int 1395 kqueue_kqfilter(file_t *fp, struct knote *kn) 1396 { 1397 struct kqueue *kq; 1398 filedesc_t *fdp; 1399 1400 kq = ((file_t *)kn->kn_obj)->f_data; 1401 1402 KASSERT(fp == kn->kn_obj); 1403 1404 if (kn->kn_filter != EVFILT_READ) 1405 return 1; 1406 1407 kn->kn_fop = &kqread_filtops; 1408 fdp = curlwp->l_fd; 1409 mutex_enter(&kq->kq_lock); 1410 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1411 mutex_exit(&kq->kq_lock); 1412 1413 return 0; 1414 } 1415 1416 1417 /* 1418 * Walk down a list of knotes, activating them if their event has 1419 * triggered. The caller's object lock (e.g. device driver lock) 1420 * must be held. 1421 */ 1422 void 1423 knote(struct klist *list, long hint) 1424 { 1425 struct knote *kn; 1426 1427 SLIST_FOREACH(kn, list, kn_selnext) { 1428 if ((*kn->kn_fop->f_event)(kn, hint)) 1429 knote_activate(kn); 1430 } 1431 } 1432 1433 /* 1434 * Remove all knotes referencing a specified fd 1435 */ 1436 void 1437 knote_fdclose(int fd) 1438 { 1439 struct klist *list; 1440 struct knote *kn; 1441 filedesc_t *fdp; 1442 1443 fdp = curlwp->l_fd; 1444 list = (struct klist *)&fdp->fd_ofiles[fd]->ff_knlist; 1445 mutex_enter(&fdp->fd_lock); 1446 while ((kn = SLIST_FIRST(list)) != NULL) { 1447 knote_detach(kn, fdp, true); 1448 mutex_enter(&fdp->fd_lock); 1449 } 1450 mutex_exit(&fdp->fd_lock); 1451 } 1452 1453 /* 1454 * Drop knote. Called with fdp->fd_lock held, and will drop before 1455 * returning. 1456 */ 1457 static void 1458 knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) 1459 { 1460 struct klist *list; 1461 struct kqueue *kq; 1462 1463 kq = kn->kn_kq; 1464 1465 KASSERT((kn->kn_status & KN_MARKER) == 0); 1466 KASSERT(mutex_owned(&fdp->fd_lock)); 1467 1468 /* Remove from monitored object. */ 1469 if (dofop) { 1470 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1471 (*kn->kn_fop->f_detach)(kn); 1472 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1473 } 1474 1475 /* Remove from descriptor table. */ 1476 if (kn->kn_fop->f_isfd) 1477 list = (struct klist *)&fdp->fd_ofiles[kn->kn_id]->ff_knlist; 1478 else 1479 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1480 1481 SLIST_REMOVE(list, kn, knote, kn_link); 1482 1483 /* Remove from kqueue. */ 1484 /* XXXAD should verify not in use by kqueue_scan. */ 1485 mutex_spin_enter(&kq->kq_lock); 1486 if ((kn->kn_status & KN_QUEUED) != 0) { 1487 kq_check(kq); 1488 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1489 kn->kn_status &= ~KN_QUEUED; 1490 kq->kq_count--; 1491 kq_check(kq); 1492 } 1493 mutex_spin_exit(&kq->kq_lock); 1494 1495 mutex_exit(&fdp->fd_lock); 1496 if (kn->kn_fop->f_isfd) 1497 fd_putfile(kn->kn_id); 1498 atomic_dec_uint(&kn->kn_kfilter->refcnt); 1499 kmem_free(kn, sizeof(*kn)); 1500 } 1501 1502 /* 1503 * Queue new event for knote. 1504 */ 1505 static void 1506 knote_enqueue(struct knote *kn) 1507 { 1508 struct kqueue *kq; 1509 1510 KASSERT((kn->kn_status & KN_MARKER) == 0); 1511 1512 kq = kn->kn_kq; 1513 1514 mutex_spin_enter(&kq->kq_lock); 1515 if ((kn->kn_status & KN_DISABLED) != 0) { 1516 kn->kn_status &= ~KN_DISABLED; 1517 } 1518 if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) { 1519 kq_check(kq); 1520 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1521 kn->kn_status |= KN_QUEUED; 1522 kq->kq_count++; 1523 kq_check(kq); 1524 cv_broadcast(&kq->kq_cv); 1525 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1526 } 1527 mutex_spin_exit(&kq->kq_lock); 1528 } 1529 /* 1530 * Queue new event for knote. 1531 */ 1532 static void 1533 knote_activate(struct knote *kn) 1534 { 1535 struct kqueue *kq; 1536 1537 KASSERT((kn->kn_status & KN_MARKER) == 0); 1538 1539 kq = kn->kn_kq; 1540 1541 mutex_spin_enter(&kq->kq_lock); 1542 kn->kn_status |= KN_ACTIVE; 1543 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { 1544 kq_check(kq); 1545 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1546 kn->kn_status |= KN_QUEUED; 1547 kq->kq_count++; 1548 kq_check(kq); 1549 cv_broadcast(&kq->kq_cv); 1550 selnotify(&kq->kq_sel, 0, NOTE_SUBMIT); 1551 } 1552 mutex_spin_exit(&kq->kq_lock); 1553 } 1554