1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/proc.h> 33 #include <sys/malloc.h> 34 #include <sys/unistd.h> 35 #include <sys/file.h> 36 #include <sys/lock.h> 37 #include <sys/fcntl.h> 38 #include <sys/queue.h> 39 #include <sys/event.h> 40 #include <sys/eventvar.h> 41 #include <sys/protosw.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/stat.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/thread.h> 48 #include <sys/uio.h> 49 #include <sys/signalvar.h> 50 #include <sys/filio.h> 51 #include <sys/ktr.h> 52 53 #include <sys/thread2.h> 54 #include <sys/file2.h> 55 #include <sys/mplock2.h> 56 57 /* 58 * Global token for kqueue subsystem 59 */ 60 #if 0 61 struct lwkt_token kq_token = LWKT_TOKEN_INITIALIZER(kq_token); 62 SYSCTL_LONG(_lwkt, OID_AUTO, kq_collisions, 63 CTLFLAG_RW, &kq_token.t_collisions, 0, 64 "Collision counter of kq_token"); 65 #endif 66 67 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 68 69 struct kevent_copyin_args { 70 struct kevent_args *ka; 71 int pchanges; 72 }; 73 74 static int kqueue_sleep(struct kqueue *kq, struct timespec *tsp); 75 static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 76 struct knote *marker); 77 static int kqueue_read(struct file *fp, struct uio *uio, 78 struct ucred *cred, int flags); 79 static int kqueue_write(struct file *fp, struct uio *uio, 80 struct ucred *cred, int flags); 81 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct ucred *cred, struct sysmsg *msg); 83 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 84 static int kqueue_stat(struct file *fp, struct stat *st, 85 struct ucred *cred); 86 static int kqueue_close(struct file *fp); 87 static void kqueue_wakeup(struct kqueue *kq); 88 static int filter_attach(struct knote *kn); 89 static int filter_event(struct knote *kn, long hint); 90 91 /* 92 * MPSAFE 93 */ 94 static struct fileops kqueueops = { 95 .fo_read = kqueue_read, 96 .fo_write = kqueue_write, 97 .fo_ioctl = kqueue_ioctl, 98 .fo_kqfilter = kqueue_kqfilter, 99 .fo_stat = kqueue_stat, 100 .fo_close = kqueue_close, 101 .fo_shutdown = nofo_shutdown 102 }; 103 104 static void knote_attach(struct knote *kn); 105 static void knote_drop(struct knote *kn); 106 static void knote_detach_and_drop(struct knote *kn); 107 static void knote_enqueue(struct knote *kn); 108 static void knote_dequeue(struct knote *kn); 109 static struct knote *knote_alloc(void); 110 static void knote_free(struct knote *kn); 111 112 static void filt_kqdetach(struct knote *kn); 113 static int filt_kqueue(struct knote *kn, long hint); 114 static int filt_procattach(struct knote *kn); 115 static void filt_procdetach(struct knote *kn); 116 static int filt_proc(struct knote *kn, long hint); 117 static int filt_fileattach(struct knote *kn); 118 static void filt_timerexpire(void *knx); 119 static int filt_timerattach(struct knote *kn); 120 static void filt_timerdetach(struct knote *kn); 121 static int filt_timer(struct knote *kn, long hint); 122 123 static struct filterops file_filtops = 124 { FILTEROP_ISFD, filt_fileattach, NULL, NULL }; 125 static struct filterops kqread_filtops = 126 { FILTEROP_ISFD, NULL, filt_kqdetach, filt_kqueue }; 127 static struct filterops proc_filtops = 128 { 0, filt_procattach, filt_procdetach, filt_proc }; 129 static struct filterops timer_filtops = 130 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 131 132 static int kq_ncallouts = 0; 133 static int kq_calloutmax = (4 * 1024); 134 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 135 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 136 static int kq_checkloop = 1000000; 137 SYSCTL_INT(_kern, OID_AUTO, kq_checkloop, CTLFLAG_RW, 138 &kq_checkloop, 0, "Maximum number of callouts allocated for kqueue"); 139 140 #define KNOTE_ACTIVATE(kn) do { \ 141 kn->kn_status |= KN_ACTIVE; \ 142 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 143 knote_enqueue(kn); \ 144 } while(0) 145 146 #define KN_HASHSIZE 64 /* XXX should be tunable */ 147 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 148 149 extern struct filterops aio_filtops; 150 extern struct filterops sig_filtops; 151 152 /* 153 * Table for for all system-defined filters. 154 */ 155 static struct filterops *sysfilt_ops[] = { 156 &file_filtops, /* EVFILT_READ */ 157 &file_filtops, /* EVFILT_WRITE */ 158 &aio_filtops, /* EVFILT_AIO */ 159 &file_filtops, /* EVFILT_VNODE */ 160 &proc_filtops, /* EVFILT_PROC */ 161 &sig_filtops, /* EVFILT_SIGNAL */ 162 &timer_filtops, /* EVFILT_TIMER */ 163 &file_filtops, /* EVFILT_EXCEPT */ 164 }; 165 166 static int 167 filt_fileattach(struct knote *kn) 168 { 169 return (fo_kqfilter(kn->kn_fp, kn)); 170 } 171 172 /* 173 * MPSAFE 174 */ 175 static int 176 kqueue_kqfilter(struct file *fp, struct knote *kn) 177 { 178 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 179 180 if (kn->kn_filter != EVFILT_READ) 181 return (EOPNOTSUPP); 182 183 kn->kn_fop = &kqread_filtops; 184 knote_insert(&kq->kq_kqinfo.ki_note, kn); 185 return (0); 186 } 187 188 static void 189 filt_kqdetach(struct knote *kn) 190 { 191 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 192 193 knote_remove(&kq->kq_kqinfo.ki_note, kn); 194 } 195 196 /*ARGSUSED*/ 197 static int 198 filt_kqueue(struct knote *kn, long hint) 199 { 200 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 201 202 kn->kn_data = kq->kq_count; 203 return (kn->kn_data > 0); 204 } 205 206 static int 207 filt_procattach(struct knote *kn) 208 { 209 struct proc *p; 210 int immediate; 211 212 immediate = 0; 213 p = pfind(kn->kn_id); 214 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 215 p = zpfind(kn->kn_id); 216 immediate = 1; 217 } 218 if (p == NULL) { 219 return (ESRCH); 220 } 221 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 222 if (p) 223 PRELE(p); 224 return (EACCES); 225 } 226 227 lwkt_gettoken(&p->p_token); 228 kn->kn_ptr.p_proc = p; 229 kn->kn_flags |= EV_CLEAR; /* automatically set */ 230 231 /* 232 * internal flag indicating registration done by kernel 233 */ 234 if (kn->kn_flags & EV_FLAG1) { 235 kn->kn_data = kn->kn_sdata; /* ppid */ 236 kn->kn_fflags = NOTE_CHILD; 237 kn->kn_flags &= ~EV_FLAG1; 238 } 239 240 knote_insert(&p->p_klist, kn); 241 242 /* 243 * Immediately activate any exit notes if the target process is a 244 * zombie. This is necessary to handle the case where the target 245 * process, e.g. a child, dies before the kevent is negistered. 246 */ 247 if (immediate && filt_proc(kn, NOTE_EXIT)) 248 KNOTE_ACTIVATE(kn); 249 lwkt_reltoken(&p->p_token); 250 PRELE(p); 251 252 return (0); 253 } 254 255 /* 256 * The knote may be attached to a different process, which may exit, 257 * leaving nothing for the knote to be attached to. So when the process 258 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 259 * it will be deleted when read out. However, as part of the knote deletion, 260 * this routine is called, so a check is needed to avoid actually performing 261 * a detach, because the original process does not exist any more. 262 */ 263 static void 264 filt_procdetach(struct knote *kn) 265 { 266 struct proc *p; 267 268 if (kn->kn_status & KN_DETACHED) 269 return; 270 /* XXX locking? take proc_token here? */ 271 p = kn->kn_ptr.p_proc; 272 knote_remove(&p->p_klist, kn); 273 } 274 275 static int 276 filt_proc(struct knote *kn, long hint) 277 { 278 u_int event; 279 280 /* 281 * mask off extra data 282 */ 283 event = (u_int)hint & NOTE_PCTRLMASK; 284 285 /* 286 * if the user is interested in this event, record it. 287 */ 288 if (kn->kn_sfflags & event) 289 kn->kn_fflags |= event; 290 291 /* 292 * Process is gone, so flag the event as finished. Detach the 293 * knote from the process now because the process will be poof, 294 * gone later on. 295 */ 296 if (event == NOTE_EXIT) { 297 struct proc *p = kn->kn_ptr.p_proc; 298 if ((kn->kn_status & KN_DETACHED) == 0) { 299 knote_remove(&p->p_klist, kn); 300 kn->kn_status |= KN_DETACHED; 301 kn->kn_data = p->p_xstat; 302 kn->kn_ptr.p_proc = NULL; 303 } 304 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 305 return (1); 306 } 307 308 /* 309 * process forked, and user wants to track the new process, 310 * so attach a new knote to it, and immediately report an 311 * event with the parent's pid. 312 */ 313 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 314 struct kevent kev; 315 int error; 316 317 /* 318 * register knote with new process. 319 */ 320 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 321 kev.filter = kn->kn_filter; 322 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 323 kev.fflags = kn->kn_sfflags; 324 kev.data = kn->kn_id; /* parent */ 325 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 326 error = kqueue_register(kn->kn_kq, &kev); 327 if (error) 328 kn->kn_fflags |= NOTE_TRACKERR; 329 } 330 331 return (kn->kn_fflags != 0); 332 } 333 334 /* 335 * The callout interlocks with callout_terminate() but can still 336 * race a deletion so if KN_DELETING is set we just don't touch 337 * the knote. 338 */ 339 static void 340 filt_timerexpire(void *knx) 341 { 342 struct lwkt_token *tok; 343 struct knote *kn = knx; 344 struct callout *calloutp; 345 struct timeval tv; 346 int tticks; 347 348 tok = lwkt_token_pool_lookup(kn->kn_kq); 349 lwkt_gettoken(tok); 350 if ((kn->kn_status & KN_DELETING) == 0) { 351 kn->kn_data++; 352 KNOTE_ACTIVATE(kn); 353 354 if ((kn->kn_flags & EV_ONESHOT) == 0) { 355 tv.tv_sec = kn->kn_sdata / 1000; 356 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 357 tticks = tvtohz_high(&tv); 358 calloutp = (struct callout *)kn->kn_hook; 359 callout_reset(calloutp, tticks, filt_timerexpire, kn); 360 } 361 } 362 lwkt_reltoken(tok); 363 } 364 365 /* 366 * data contains amount of time to sleep, in milliseconds 367 */ 368 static int 369 filt_timerattach(struct knote *kn) 370 { 371 struct callout *calloutp; 372 struct timeval tv; 373 int tticks; 374 375 if (kq_ncallouts >= kq_calloutmax) { 376 kn->kn_hook = NULL; 377 return (ENOMEM); 378 } 379 kq_ncallouts++; 380 381 tv.tv_sec = kn->kn_sdata / 1000; 382 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 383 tticks = tvtohz_high(&tv); 384 385 kn->kn_flags |= EV_CLEAR; /* automatically set */ 386 calloutp = kmalloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); 387 callout_init(calloutp); 388 kn->kn_hook = (caddr_t)calloutp; 389 callout_reset(calloutp, tticks, filt_timerexpire, kn); 390 391 return (0); 392 } 393 394 /* 395 * This function is called with the knote flagged locked but it is 396 * still possible to race a callout event due to the callback blocking. 397 * We must call callout_terminate() instead of callout_stop() to deal 398 * with the race. 399 */ 400 static void 401 filt_timerdetach(struct knote *kn) 402 { 403 struct callout *calloutp; 404 405 calloutp = (struct callout *)kn->kn_hook; 406 callout_terminate(calloutp); 407 kfree(calloutp, M_KQUEUE); 408 kq_ncallouts--; 409 } 410 411 static int 412 filt_timer(struct knote *kn, long hint) 413 { 414 415 return (kn->kn_data != 0); 416 } 417 418 /* 419 * Acquire a knote, return non-zero on success, 0 on failure. 420 * 421 * If we cannot acquire the knote we sleep and return 0. The knote 422 * may be stale on return in this case and the caller must restart 423 * whatever loop they are in. 424 * 425 * Related kq token must be held. 426 */ 427 static __inline 428 int 429 knote_acquire(struct knote *kn) 430 { 431 if (kn->kn_status & KN_PROCESSING) { 432 kn->kn_status |= KN_WAITING | KN_REPROCESS; 433 tsleep(kn, 0, "kqepts", hz); 434 /* knote may be stale now */ 435 return(0); 436 } 437 kn->kn_status |= KN_PROCESSING; 438 return(1); 439 } 440 441 /* 442 * Release an acquired knote, clearing KN_PROCESSING and handling any 443 * KN_REPROCESS events. 444 * 445 * Caller must be holding the related kq token 446 * 447 * Non-zero is returned if the knote is destroyed. 448 */ 449 static __inline 450 int 451 knote_release(struct knote *kn) 452 { 453 while (kn->kn_status & KN_REPROCESS) { 454 kn->kn_status &= ~KN_REPROCESS; 455 if (kn->kn_status & KN_WAITING) { 456 kn->kn_status &= ~KN_WAITING; 457 wakeup(kn); 458 } 459 if (kn->kn_status & KN_DELETING) { 460 knote_detach_and_drop(kn); 461 return(1); 462 /* NOT REACHED */ 463 } 464 if (filter_event(kn, 0)) 465 KNOTE_ACTIVATE(kn); 466 } 467 kn->kn_status &= ~KN_PROCESSING; 468 return(0); 469 } 470 471 /* 472 * Initialize a kqueue. 473 * 474 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops. 475 * 476 * MPSAFE 477 */ 478 void 479 kqueue_init(struct kqueue *kq, struct filedesc *fdp) 480 { 481 TAILQ_INIT(&kq->kq_knpend); 482 TAILQ_INIT(&kq->kq_knlist); 483 kq->kq_count = 0; 484 kq->kq_fdp = fdp; 485 SLIST_INIT(&kq->kq_kqinfo.ki_note); 486 } 487 488 /* 489 * Terminate a kqueue. Freeing the actual kq itself is left up to the 490 * caller (it might be embedded in a lwp so we don't do it here). 491 * 492 * The kq's knlist must be completely eradicated so block on any 493 * processing races. 494 */ 495 void 496 kqueue_terminate(struct kqueue *kq) 497 { 498 struct lwkt_token *tok; 499 struct knote *kn; 500 501 tok = lwkt_token_pool_lookup(kq); 502 lwkt_gettoken(tok); 503 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL) { 504 if (knote_acquire(kn)) 505 knote_detach_and_drop(kn); 506 } 507 if (kq->kq_knhash) { 508 kfree(kq->kq_knhash, M_KQUEUE); 509 kq->kq_knhash = NULL; 510 kq->kq_knhashmask = 0; 511 } 512 lwkt_reltoken(tok); 513 } 514 515 /* 516 * MPSAFE 517 */ 518 int 519 sys_kqueue(struct kqueue_args *uap) 520 { 521 struct thread *td = curthread; 522 struct kqueue *kq; 523 struct file *fp; 524 int fd, error; 525 526 error = falloc(td->td_lwp, &fp, &fd); 527 if (error) 528 return (error); 529 fp->f_flag = FREAD | FWRITE; 530 fp->f_type = DTYPE_KQUEUE; 531 fp->f_ops = &kqueueops; 532 533 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 534 kqueue_init(kq, td->td_proc->p_fd); 535 fp->f_data = kq; 536 537 fsetfd(kq->kq_fdp, fp, fd); 538 uap->sysmsg_result = fd; 539 fdrop(fp); 540 return (error); 541 } 542 543 /* 544 * Copy 'count' items into the destination list pointed to by uap->eventlist. 545 */ 546 static int 547 kevent_copyout(void *arg, struct kevent *kevp, int count, int *res) 548 { 549 struct kevent_copyin_args *kap; 550 int error; 551 552 kap = (struct kevent_copyin_args *)arg; 553 554 error = copyout(kevp, kap->ka->eventlist, count * sizeof(*kevp)); 555 if (error == 0) { 556 kap->ka->eventlist += count; 557 *res += count; 558 } else { 559 *res = -1; 560 } 561 562 return (error); 563 } 564 565 /* 566 * Copy at most 'max' items from the list pointed to by kap->changelist, 567 * return number of items in 'events'. 568 */ 569 static int 570 kevent_copyin(void *arg, struct kevent *kevp, int max, int *events) 571 { 572 struct kevent_copyin_args *kap; 573 int error, count; 574 575 kap = (struct kevent_copyin_args *)arg; 576 577 count = min(kap->ka->nchanges - kap->pchanges, max); 578 error = copyin(kap->ka->changelist, kevp, count * sizeof *kevp); 579 if (error == 0) { 580 kap->ka->changelist += count; 581 kap->pchanges += count; 582 *events = count; 583 } 584 585 return (error); 586 } 587 588 /* 589 * MPSAFE 590 */ 591 int 592 kern_kevent(struct kqueue *kq, int nevents, int *res, void *uap, 593 k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn, 594 struct timespec *tsp_in) 595 { 596 struct kevent *kevp; 597 struct timespec *tsp; 598 int i, n, total, error, nerrors = 0; 599 int lres; 600 int limit = kq_checkloop; 601 struct kevent kev[KQ_NEVENTS]; 602 struct knote marker; 603 struct lwkt_token *tok; 604 605 tsp = tsp_in; 606 *res = 0; 607 608 tok = lwkt_token_pool_lookup(kq); 609 lwkt_gettoken(tok); 610 for ( ;; ) { 611 n = 0; 612 error = kevent_copyinfn(uap, kev, KQ_NEVENTS, &n); 613 if (error) 614 goto done; 615 if (n == 0) 616 break; 617 for (i = 0; i < n; i++) { 618 kevp = &kev[i]; 619 kevp->flags &= ~EV_SYSFLAGS; 620 error = kqueue_register(kq, kevp); 621 622 /* 623 * If a registration returns an error we 624 * immediately post the error. The kevent() 625 * call itself will fail with the error if 626 * no space is available for posting. 627 * 628 * Such errors normally bypass the timeout/blocking 629 * code. However, if the copyoutfn function refuses 630 * to post the error (see sys_poll()), then we 631 * ignore it too. 632 */ 633 if (error) { 634 kevp->flags = EV_ERROR; 635 kevp->data = error; 636 lres = *res; 637 kevent_copyoutfn(uap, kevp, 1, res); 638 if (*res < 0) { 639 goto done; 640 } else if (lres != *res) { 641 nevents--; 642 nerrors++; 643 } 644 } 645 } 646 } 647 if (nerrors) { 648 error = 0; 649 goto done; 650 } 651 652 /* 653 * Acquire/wait for events - setup timeout 654 */ 655 if (tsp != NULL) { 656 struct timespec ats; 657 658 if (tsp->tv_sec || tsp->tv_nsec) { 659 nanouptime(&ats); 660 timespecadd(tsp, &ats); /* tsp = target time */ 661 } 662 } 663 664 /* 665 * Loop as required. 666 * 667 * Collect as many events as we can. Sleeping on successive 668 * loops is disabled if copyoutfn has incremented (*res). 669 * 670 * The loop stops if an error occurs, all events have been 671 * scanned (the marker has been reached), or fewer than the 672 * maximum number of events is found. 673 * 674 * The copyoutfn function does not have to increment (*res) in 675 * order for the loop to continue. 676 * 677 * NOTE: doselect() usually passes 0x7FFFFFFF for nevents. 678 */ 679 total = 0; 680 error = 0; 681 marker.kn_filter = EVFILT_MARKER; 682 marker.kn_status = KN_PROCESSING; 683 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 684 while ((n = nevents - total) > 0) { 685 if (n > KQ_NEVENTS) 686 n = KQ_NEVENTS; 687 688 /* 689 * If no events are pending sleep until timeout (if any) 690 * or an event occurs. 691 * 692 * After the sleep completes the marker is moved to the 693 * end of the list, making any received events available 694 * to our scan. 695 */ 696 if (kq->kq_count == 0 && *res == 0) { 697 error = kqueue_sleep(kq, tsp); 698 if (error) 699 break; 700 701 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 702 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 703 } 704 705 /* 706 * Process all received events 707 * Account for all non-spurious events in our total 708 */ 709 i = kqueue_scan(kq, kev, n, &marker); 710 if (i) { 711 lres = *res; 712 error = kevent_copyoutfn(uap, kev, i, res); 713 total += *res - lres; 714 if (error) 715 break; 716 } 717 if (limit && --limit == 0) 718 panic("kqueue: checkloop failed i=%d", i); 719 720 /* 721 * Normally when fewer events are returned than requested 722 * we can stop. However, if only spurious events were 723 * collected the copyout will not bump (*res) and we have 724 * to continue. 725 */ 726 if (i < n && *res) 727 break; 728 729 /* 730 * Deal with an edge case where spurious events can cause 731 * a loop to occur without moving the marker. This can 732 * prevent kqueue_scan() from picking up new events which 733 * race us. We must be sure to move the marker for this 734 * case. 735 * 736 * NOTE: We do not want to move the marker if events 737 * were scanned because normal kqueue operations 738 * may reactivate events. Moving the marker in 739 * that case could result in duplicates for the 740 * same event. 741 */ 742 if (i == 0) { 743 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 744 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 745 } 746 } 747 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 748 749 /* Timeouts do not return EWOULDBLOCK. */ 750 if (error == EWOULDBLOCK) 751 error = 0; 752 753 done: 754 lwkt_reltoken(tok); 755 return (error); 756 } 757 758 /* 759 * MPALMOSTSAFE 760 */ 761 int 762 sys_kevent(struct kevent_args *uap) 763 { 764 struct thread *td = curthread; 765 struct proc *p = td->td_proc; 766 struct timespec ts, *tsp; 767 struct kqueue *kq; 768 struct file *fp = NULL; 769 struct kevent_copyin_args *kap, ka; 770 int error; 771 772 if (uap->timeout) { 773 error = copyin(uap->timeout, &ts, sizeof(ts)); 774 if (error) 775 return (error); 776 tsp = &ts; 777 } else { 778 tsp = NULL; 779 } 780 781 fp = holdfp(p->p_fd, uap->fd, -1); 782 if (fp == NULL) 783 return (EBADF); 784 if (fp->f_type != DTYPE_KQUEUE) { 785 fdrop(fp); 786 return (EBADF); 787 } 788 789 kq = (struct kqueue *)fp->f_data; 790 791 kap = &ka; 792 kap->ka = uap; 793 kap->pchanges = 0; 794 795 error = kern_kevent(kq, uap->nevents, &uap->sysmsg_result, kap, 796 kevent_copyin, kevent_copyout, tsp); 797 798 fdrop(fp); 799 800 return (error); 801 } 802 803 /* 804 * Caller must be holding the kq token 805 */ 806 int 807 kqueue_register(struct kqueue *kq, struct kevent *kev) 808 { 809 struct lwkt_token *tok; 810 struct filedesc *fdp = kq->kq_fdp; 811 struct filterops *fops; 812 struct file *fp = NULL; 813 struct knote *kn = NULL; 814 int error = 0; 815 816 if (kev->filter < 0) { 817 if (kev->filter + EVFILT_SYSCOUNT < 0) 818 return (EINVAL); 819 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 820 } else { 821 /* 822 * XXX 823 * filter attach routine is responsible for insuring that 824 * the identifier can be attached to it. 825 */ 826 kprintf("unknown filter: %d\n", kev->filter); 827 return (EINVAL); 828 } 829 830 tok = lwkt_token_pool_lookup(kq); 831 lwkt_gettoken(tok); 832 if (fops->f_flags & FILTEROP_ISFD) { 833 /* validate descriptor */ 834 fp = holdfp(fdp, kev->ident, -1); 835 if (fp == NULL) { 836 lwkt_reltoken(tok); 837 return (EBADF); 838 } 839 lwkt_getpooltoken(&fp->f_klist); 840 again1: 841 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 842 if (kn->kn_kq == kq && 843 kn->kn_filter == kev->filter && 844 kn->kn_id == kev->ident) { 845 if (knote_acquire(kn) == 0) 846 goto again1; 847 break; 848 } 849 } 850 lwkt_relpooltoken(&fp->f_klist); 851 } else { 852 if (kq->kq_knhashmask) { 853 struct klist *list; 854 855 list = &kq->kq_knhash[ 856 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 857 lwkt_getpooltoken(list); 858 again2: 859 SLIST_FOREACH(kn, list, kn_link) { 860 if (kn->kn_id == kev->ident && 861 kn->kn_filter == kev->filter) { 862 if (knote_acquire(kn) == 0) 863 goto again2; 864 break; 865 } 866 } 867 lwkt_relpooltoken(list); 868 } 869 } 870 871 /* 872 * NOTE: At this point if kn is non-NULL we will have acquired 873 * it and set KN_PROCESSING. 874 */ 875 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 876 error = ENOENT; 877 goto done; 878 } 879 880 /* 881 * kn now contains the matching knote, or NULL if no match 882 */ 883 if (kev->flags & EV_ADD) { 884 if (kn == NULL) { 885 kn = knote_alloc(); 886 if (kn == NULL) { 887 error = ENOMEM; 888 goto done; 889 } 890 kn->kn_fp = fp; 891 kn->kn_kq = kq; 892 kn->kn_fop = fops; 893 894 /* 895 * apply reference count to knote structure, and 896 * do not release it at the end of this routine. 897 */ 898 fp = NULL; 899 900 kn->kn_sfflags = kev->fflags; 901 kn->kn_sdata = kev->data; 902 kev->fflags = 0; 903 kev->data = 0; 904 kn->kn_kevent = *kev; 905 906 /* 907 * KN_PROCESSING prevents the knote from getting 908 * ripped out from under us while we are trying 909 * to attach it, in case the attach blocks. 910 */ 911 kn->kn_status = KN_PROCESSING; 912 knote_attach(kn); 913 if ((error = filter_attach(kn)) != 0) { 914 kn->kn_status |= KN_DELETING | KN_REPROCESS; 915 knote_drop(kn); 916 goto done; 917 } 918 919 /* 920 * Interlock against close races which either tried 921 * to remove our knote while we were blocked or missed 922 * it entirely prior to our attachment. We do not 923 * want to end up with a knote on a closed descriptor. 924 */ 925 if ((fops->f_flags & FILTEROP_ISFD) && 926 checkfdclosed(fdp, kev->ident, kn->kn_fp)) { 927 kn->kn_status |= KN_DELETING | KN_REPROCESS; 928 } 929 } else { 930 /* 931 * The user may change some filter values after the 932 * initial EV_ADD, but doing so will not reset any 933 * filter which have already been triggered. 934 */ 935 KKASSERT(kn->kn_status & KN_PROCESSING); 936 kn->kn_sfflags = kev->fflags; 937 kn->kn_sdata = kev->data; 938 kn->kn_kevent.udata = kev->udata; 939 } 940 941 /* 942 * Execute the filter event to immediately activate the 943 * knote if necessary. If reprocessing events are pending 944 * due to blocking above we do not run the filter here 945 * but instead let knote_release() do it. Otherwise we 946 * might run the filter on a deleted event. 947 */ 948 if ((kn->kn_status & KN_REPROCESS) == 0) { 949 if (filter_event(kn, 0)) 950 KNOTE_ACTIVATE(kn); 951 } 952 } else if (kev->flags & EV_DELETE) { 953 /* 954 * Delete the existing knote 955 */ 956 knote_detach_and_drop(kn); 957 goto done; 958 } 959 960 /* 961 * Disablement does not deactivate a knote here. 962 */ 963 if ((kev->flags & EV_DISABLE) && 964 ((kn->kn_status & KN_DISABLED) == 0)) { 965 kn->kn_status |= KN_DISABLED; 966 } 967 968 /* 969 * Re-enablement may have to immediately enqueue an active knote. 970 */ 971 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 972 kn->kn_status &= ~KN_DISABLED; 973 if ((kn->kn_status & KN_ACTIVE) && 974 ((kn->kn_status & KN_QUEUED) == 0)) { 975 knote_enqueue(kn); 976 } 977 } 978 979 /* 980 * Handle any required reprocessing 981 */ 982 knote_release(kn); 983 /* kn may be invalid now */ 984 985 done: 986 lwkt_reltoken(tok); 987 if (fp != NULL) 988 fdrop(fp); 989 return (error); 990 } 991 992 /* 993 * Block as necessary until the target time is reached. 994 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 995 * 0 we do not block at all. 996 * 997 * Caller must be holding the kq token. 998 */ 999 static int 1000 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1001 { 1002 int error = 0; 1003 1004 if (tsp == NULL) { 1005 kq->kq_state |= KQ_SLEEP; 1006 error = tsleep(kq, PCATCH, "kqread", 0); 1007 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 1008 error = EWOULDBLOCK; 1009 } else { 1010 struct timespec ats; 1011 struct timespec atx = *tsp; 1012 int timeout; 1013 1014 nanouptime(&ats); 1015 timespecsub(&atx, &ats); 1016 if (ats.tv_sec < 0) { 1017 error = EWOULDBLOCK; 1018 } else { 1019 timeout = atx.tv_sec > 24 * 60 * 60 ? 1020 24 * 60 * 60 * hz : tstohz_high(&atx); 1021 kq->kq_state |= KQ_SLEEP; 1022 error = tsleep(kq, PCATCH, "kqread", timeout); 1023 } 1024 } 1025 1026 /* don't restart after signals... */ 1027 if (error == ERESTART) 1028 return (EINTR); 1029 1030 return (error); 1031 } 1032 1033 /* 1034 * Scan the kqueue, return the number of active events placed in kevp up 1035 * to count. 1036 * 1037 * Continuous mode events may get recycled, do not continue scanning past 1038 * marker unless no events have been collected. 1039 * 1040 * Caller must be holding the kq token 1041 */ 1042 static int 1043 kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 1044 struct knote *marker) 1045 { 1046 struct knote *kn, local_marker; 1047 int total; 1048 1049 total = 0; 1050 local_marker.kn_filter = EVFILT_MARKER; 1051 local_marker.kn_status = KN_PROCESSING; 1052 1053 /* 1054 * Collect events. 1055 */ 1056 TAILQ_INSERT_HEAD(&kq->kq_knpend, &local_marker, kn_tqe); 1057 while (count) { 1058 kn = TAILQ_NEXT(&local_marker, kn_tqe); 1059 if (kn->kn_filter == EVFILT_MARKER) { 1060 /* Marker reached, we are done */ 1061 if (kn == marker) 1062 break; 1063 1064 /* Move local marker past some other threads marker */ 1065 kn = TAILQ_NEXT(kn, kn_tqe); 1066 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe); 1067 TAILQ_INSERT_BEFORE(kn, &local_marker, kn_tqe); 1068 continue; 1069 } 1070 1071 /* 1072 * We can't skip a knote undergoing processing, otherwise 1073 * we risk not returning it when the user process expects 1074 * it should be returned. Sleep and retry. 1075 */ 1076 if (knote_acquire(kn) == 0) 1077 continue; 1078 1079 /* 1080 * Remove the event for processing. 1081 * 1082 * WARNING! We must leave KN_QUEUED set to prevent the 1083 * event from being KNOTE_ACTIVATE()d while 1084 * the queue state is in limbo, in case we 1085 * block. 1086 * 1087 * WARNING! We must set KN_PROCESSING to avoid races 1088 * against deletion or another thread's 1089 * processing. 1090 */ 1091 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1092 kq->kq_count--; 1093 1094 /* 1095 * We have to deal with an extremely important race against 1096 * file descriptor close()s here. The file descriptor can 1097 * disappear MPSAFE, and there is a small window of 1098 * opportunity between that and the call to knote_fdclose(). 1099 * 1100 * If we hit that window here while doselect or dopoll is 1101 * trying to delete a spurious event they will not be able 1102 * to match up the event against a knote and will go haywire. 1103 */ 1104 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1105 checkfdclosed(kq->kq_fdp, kn->kn_kevent.ident, kn->kn_fp)) { 1106 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1107 } 1108 1109 if (kn->kn_status & KN_DISABLED) { 1110 /* 1111 * If disabled we ensure the event is not queued 1112 * but leave its active bit set. On re-enablement 1113 * the event may be immediately triggered. 1114 */ 1115 kn->kn_status &= ~KN_QUEUED; 1116 } else if ((kn->kn_flags & EV_ONESHOT) == 0 && 1117 (kn->kn_status & KN_DELETING) == 0 && 1118 filter_event(kn, 0) == 0) { 1119 /* 1120 * If not running in one-shot mode and the event 1121 * is no longer present we ensure it is removed 1122 * from the queue and ignore it. 1123 */ 1124 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1125 } else { 1126 /* 1127 * Post the event 1128 */ 1129 *kevp++ = kn->kn_kevent; 1130 ++total; 1131 --count; 1132 1133 if (kn->kn_flags & EV_ONESHOT) { 1134 kn->kn_status &= ~KN_QUEUED; 1135 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1136 } else if (kn->kn_flags & EV_CLEAR) { 1137 kn->kn_data = 0; 1138 kn->kn_fflags = 0; 1139 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1140 } else { 1141 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1142 kq->kq_count++; 1143 } 1144 } 1145 1146 /* 1147 * Handle any post-processing states 1148 */ 1149 knote_release(kn); 1150 } 1151 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe); 1152 1153 return (total); 1154 } 1155 1156 /* 1157 * XXX 1158 * This could be expanded to call kqueue_scan, if desired. 1159 * 1160 * MPSAFE 1161 */ 1162 static int 1163 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 1164 { 1165 return (ENXIO); 1166 } 1167 1168 /* 1169 * MPSAFE 1170 */ 1171 static int 1172 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 1173 { 1174 return (ENXIO); 1175 } 1176 1177 /* 1178 * MPALMOSTSAFE 1179 */ 1180 static int 1181 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 1182 struct ucred *cred, struct sysmsg *msg) 1183 { 1184 struct lwkt_token *tok; 1185 struct kqueue *kq; 1186 int error; 1187 1188 kq = (struct kqueue *)fp->f_data; 1189 tok = lwkt_token_pool_lookup(kq); 1190 lwkt_gettoken(tok); 1191 1192 switch(com) { 1193 case FIOASYNC: 1194 if (*(int *)data) 1195 kq->kq_state |= KQ_ASYNC; 1196 else 1197 kq->kq_state &= ~KQ_ASYNC; 1198 error = 0; 1199 break; 1200 case FIOSETOWN: 1201 error = fsetown(*(int *)data, &kq->kq_sigio); 1202 break; 1203 default: 1204 error = ENOTTY; 1205 break; 1206 } 1207 lwkt_reltoken(tok); 1208 return (error); 1209 } 1210 1211 /* 1212 * MPSAFE 1213 */ 1214 static int 1215 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 1216 { 1217 struct kqueue *kq = (struct kqueue *)fp->f_data; 1218 1219 bzero((void *)st, sizeof(*st)); 1220 st->st_size = kq->kq_count; 1221 st->st_blksize = sizeof(struct kevent); 1222 st->st_mode = S_IFIFO; 1223 return (0); 1224 } 1225 1226 /* 1227 * MPSAFE 1228 */ 1229 static int 1230 kqueue_close(struct file *fp) 1231 { 1232 struct kqueue *kq = (struct kqueue *)fp->f_data; 1233 1234 kqueue_terminate(kq); 1235 1236 fp->f_data = NULL; 1237 funsetown(&kq->kq_sigio); 1238 1239 kfree(kq, M_KQUEUE); 1240 return (0); 1241 } 1242 1243 static void 1244 kqueue_wakeup(struct kqueue *kq) 1245 { 1246 if (kq->kq_state & KQ_SLEEP) { 1247 kq->kq_state &= ~KQ_SLEEP; 1248 wakeup(kq); 1249 } 1250 KNOTE(&kq->kq_kqinfo.ki_note, 0); 1251 } 1252 1253 /* 1254 * Calls filterops f_attach function, acquiring mplock if filter is not 1255 * marked as FILTEROP_MPSAFE. 1256 * 1257 * Caller must be holding the related kq token 1258 */ 1259 static int 1260 filter_attach(struct knote *kn) 1261 { 1262 int ret; 1263 1264 if (!(kn->kn_fop->f_flags & FILTEROP_MPSAFE)) { 1265 get_mplock(); 1266 ret = kn->kn_fop->f_attach(kn); 1267 rel_mplock(); 1268 } else { 1269 ret = kn->kn_fop->f_attach(kn); 1270 } 1271 1272 return (ret); 1273 } 1274 1275 /* 1276 * Detach the knote and drop it, destroying the knote. 1277 * 1278 * Calls filterops f_detach function, acquiring mplock if filter is not 1279 * marked as FILTEROP_MPSAFE. 1280 * 1281 * Caller must be holding the related kq token 1282 */ 1283 static void 1284 knote_detach_and_drop(struct knote *kn) 1285 { 1286 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1287 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 1288 kn->kn_fop->f_detach(kn); 1289 } else { 1290 get_mplock(); 1291 kn->kn_fop->f_detach(kn); 1292 rel_mplock(); 1293 } 1294 knote_drop(kn); 1295 } 1296 1297 /* 1298 * Calls filterops f_event function, acquiring mplock if filter is not 1299 * marked as FILTEROP_MPSAFE. 1300 * 1301 * If the knote is in the middle of being created or deleted we cannot 1302 * safely call the filter op. 1303 * 1304 * Caller must be holding the related kq token 1305 */ 1306 static int 1307 filter_event(struct knote *kn, long hint) 1308 { 1309 int ret; 1310 1311 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 1312 ret = kn->kn_fop->f_event(kn, hint); 1313 } else { 1314 get_mplock(); 1315 ret = kn->kn_fop->f_event(kn, hint); 1316 rel_mplock(); 1317 } 1318 return (ret); 1319 } 1320 1321 /* 1322 * Walk down a list of knotes, activating them if their event has triggered. 1323 * 1324 * If we encounter any knotes which are undergoing processing we just mark 1325 * them for reprocessing and do not try to [re]activate the knote. However, 1326 * if a hint is being passed we have to wait and that makes things a bit 1327 * sticky. 1328 */ 1329 void 1330 knote(struct klist *list, long hint) 1331 { 1332 struct kqueue *kq; 1333 struct knote *kn; 1334 struct knote *kntmp; 1335 1336 lwkt_getpooltoken(list); 1337 restart: 1338 SLIST_FOREACH(kn, list, kn_next) { 1339 kq = kn->kn_kq; 1340 lwkt_getpooltoken(kq); 1341 1342 /* temporary verification hack */ 1343 SLIST_FOREACH(kntmp, list, kn_next) { 1344 if (kn == kntmp) 1345 break; 1346 } 1347 if (kn != kntmp || kn->kn_kq != kq) { 1348 lwkt_relpooltoken(kq); 1349 goto restart; 1350 } 1351 1352 if (kn->kn_status & KN_PROCESSING) { 1353 /* 1354 * Someone else is processing the knote, ask the 1355 * other thread to reprocess it and don't mess 1356 * with it otherwise. 1357 */ 1358 if (hint == 0) { 1359 kn->kn_status |= KN_REPROCESS; 1360 lwkt_relpooltoken(kq); 1361 continue; 1362 } 1363 1364 /* 1365 * If the hint is non-zero we have to wait or risk 1366 * losing the state the caller is trying to update. 1367 * 1368 * XXX This is a real problem, certain process 1369 * and signal filters will bump kn_data for 1370 * already-processed notes more than once if 1371 * we restart the list scan. FIXME. 1372 */ 1373 kn->kn_status |= KN_WAITING | KN_REPROCESS; 1374 tsleep(kn, 0, "knotec", hz); 1375 lwkt_relpooltoken(kq); 1376 goto restart; 1377 } 1378 1379 /* 1380 * Become the reprocessing master ourselves. 1381 * 1382 * If hint is non-zer running the event is mandatory 1383 * when not deleting so do it whether reprocessing is 1384 * set or not. 1385 */ 1386 kn->kn_status |= KN_PROCESSING; 1387 if ((kn->kn_status & KN_DELETING) == 0) { 1388 if (filter_event(kn, hint)) 1389 KNOTE_ACTIVATE(kn); 1390 } 1391 if (knote_release(kn)) { 1392 lwkt_relpooltoken(kq); 1393 goto restart; 1394 } 1395 lwkt_relpooltoken(kq); 1396 } 1397 lwkt_relpooltoken(list); 1398 } 1399 1400 /* 1401 * Insert knote at head of klist. 1402 * 1403 * This function may only be called via a filter function and thus 1404 * kq_token should already be held and marked for processing. 1405 */ 1406 void 1407 knote_insert(struct klist *klist, struct knote *kn) 1408 { 1409 lwkt_getpooltoken(klist); 1410 KKASSERT(kn->kn_status & KN_PROCESSING); 1411 SLIST_INSERT_HEAD(klist, kn, kn_next); 1412 lwkt_relpooltoken(klist); 1413 } 1414 1415 /* 1416 * Remove knote from a klist 1417 * 1418 * This function may only be called via a filter function and thus 1419 * kq_token should already be held and marked for processing. 1420 */ 1421 void 1422 knote_remove(struct klist *klist, struct knote *kn) 1423 { 1424 lwkt_getpooltoken(klist); 1425 KKASSERT(kn->kn_status & KN_PROCESSING); 1426 SLIST_REMOVE(klist, kn, knote, kn_next); 1427 lwkt_relpooltoken(klist); 1428 } 1429 1430 #if 0 1431 /* 1432 * Remove all knotes from a specified klist 1433 * 1434 * Only called from aio. 1435 */ 1436 void 1437 knote_empty(struct klist *list) 1438 { 1439 struct knote *kn; 1440 1441 lwkt_gettoken(&kq_token); 1442 while ((kn = SLIST_FIRST(list)) != NULL) { 1443 if (knote_acquire(kn)) 1444 knote_detach_and_drop(kn); 1445 } 1446 lwkt_reltoken(&kq_token); 1447 } 1448 #endif 1449 1450 void 1451 knote_assume_knotes(struct kqinfo *src, struct kqinfo *dst, 1452 struct filterops *ops, void *hook) 1453 { 1454 struct kqueue *kq; 1455 struct knote *kn; 1456 1457 lwkt_getpooltoken(&src->ki_note); 1458 lwkt_getpooltoken(&dst->ki_note); 1459 while ((kn = SLIST_FIRST(&src->ki_note)) != NULL) { 1460 kq = kn->kn_kq; 1461 lwkt_getpooltoken(kq); 1462 if (SLIST_FIRST(&src->ki_note) != kn || kn->kn_kq != kq) { 1463 lwkt_relpooltoken(kq); 1464 continue; 1465 } 1466 if (knote_acquire(kn)) { 1467 knote_remove(&src->ki_note, kn); 1468 kn->kn_fop = ops; 1469 kn->kn_hook = hook; 1470 knote_insert(&dst->ki_note, kn); 1471 knote_release(kn); 1472 /* kn may be invalid now */ 1473 } 1474 lwkt_relpooltoken(kq); 1475 } 1476 lwkt_relpooltoken(&dst->ki_note); 1477 lwkt_relpooltoken(&src->ki_note); 1478 } 1479 1480 /* 1481 * Remove all knotes referencing a specified fd 1482 */ 1483 void 1484 knote_fdclose(struct file *fp, struct filedesc *fdp, int fd) 1485 { 1486 struct kqueue *kq; 1487 struct knote *kn; 1488 struct knote *kntmp; 1489 1490 lwkt_getpooltoken(&fp->f_klist); 1491 restart: 1492 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 1493 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) { 1494 kq = kn->kn_kq; 1495 lwkt_getpooltoken(kq); 1496 1497 /* temporary verification hack */ 1498 SLIST_FOREACH(kntmp, &fp->f_klist, kn_link) { 1499 if (kn == kntmp) 1500 break; 1501 } 1502 if (kn != kntmp || kn->kn_kq->kq_fdp != fdp || 1503 kn->kn_id != fd || kn->kn_kq != kq) { 1504 lwkt_relpooltoken(kq); 1505 goto restart; 1506 } 1507 if (knote_acquire(kn)) 1508 knote_detach_and_drop(kn); 1509 lwkt_relpooltoken(kq); 1510 goto restart; 1511 } 1512 } 1513 lwkt_relpooltoken(&fp->f_klist); 1514 } 1515 1516 /* 1517 * Low level attach function. 1518 * 1519 * The knote should already be marked for processing. 1520 * Caller must hold the related kq token. 1521 */ 1522 static void 1523 knote_attach(struct knote *kn) 1524 { 1525 struct klist *list; 1526 struct kqueue *kq = kn->kn_kq; 1527 1528 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1529 KKASSERT(kn->kn_fp); 1530 list = &kn->kn_fp->f_klist; 1531 } else { 1532 if (kq->kq_knhashmask == 0) 1533 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1534 &kq->kq_knhashmask); 1535 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1536 } 1537 lwkt_getpooltoken(list); 1538 SLIST_INSERT_HEAD(list, kn, kn_link); 1539 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink); 1540 lwkt_relpooltoken(list); 1541 } 1542 1543 /* 1544 * Low level drop function. 1545 * 1546 * The knote should already be marked for processing. 1547 * Caller must hold the related kq token. 1548 */ 1549 static void 1550 knote_drop(struct knote *kn) 1551 { 1552 struct kqueue *kq; 1553 struct klist *list; 1554 1555 kq = kn->kn_kq; 1556 1557 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1558 list = &kn->kn_fp->f_klist; 1559 else 1560 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1561 1562 lwkt_getpooltoken(list); 1563 SLIST_REMOVE(list, kn, knote, kn_link); 1564 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 1565 if (kn->kn_status & KN_QUEUED) 1566 knote_dequeue(kn); 1567 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1568 fdrop(kn->kn_fp); 1569 kn->kn_fp = NULL; 1570 } 1571 knote_free(kn); 1572 lwkt_relpooltoken(list); 1573 } 1574 1575 /* 1576 * Low level enqueue function. 1577 * 1578 * The knote should already be marked for processing. 1579 * Caller must be holding the kq token 1580 */ 1581 static void 1582 knote_enqueue(struct knote *kn) 1583 { 1584 struct kqueue *kq = kn->kn_kq; 1585 1586 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1587 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1588 kn->kn_status |= KN_QUEUED; 1589 ++kq->kq_count; 1590 1591 /* 1592 * Send SIGIO on request (typically set up as a mailbox signal) 1593 */ 1594 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1595 pgsigio(kq->kq_sigio, SIGIO, 0); 1596 1597 kqueue_wakeup(kq); 1598 } 1599 1600 /* 1601 * Low level dequeue function. 1602 * 1603 * The knote should already be marked for processing. 1604 * Caller must be holding the kq token 1605 */ 1606 static void 1607 knote_dequeue(struct knote *kn) 1608 { 1609 struct kqueue *kq = kn->kn_kq; 1610 1611 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1612 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1613 kn->kn_status &= ~KN_QUEUED; 1614 kq->kq_count--; 1615 } 1616 1617 static struct knote * 1618 knote_alloc(void) 1619 { 1620 return kmalloc(sizeof(struct knote), M_KQUEUE, M_WAITOK); 1621 } 1622 1623 static void 1624 knote_free(struct knote *kn) 1625 { 1626 kfree(kn, M_KQUEUE); 1627 } 1628