1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/select.h> 40 #include <sys/queue.h> 41 #include <sys/event.h> 42 #include <sys/eventvar.h> 43 #include <sys/poll.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/uio.h> 51 #include <sys/signalvar.h> 52 #include <sys/filio.h> 53 54 #include <sys/thread2.h> 55 #include <sys/file2.h> 56 #include <sys/mplock2.h> 57 58 #include <vm/vm_zone.h> 59 60 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 61 62 static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 63 struct timespec *tsp, int *errorp); 64 static int kqueue_read(struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags); 66 static int kqueue_write(struct file *fp, struct uio *uio, 67 struct ucred *cred, int flags); 68 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 69 struct ucred *cred, struct sysmsg *msg); 70 static int kqueue_poll(struct file *fp, int events, struct ucred *cred); 71 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 72 static int kqueue_stat(struct file *fp, struct stat *st, 73 struct ucred *cred); 74 static int kqueue_close(struct file *fp); 75 static void kqueue_wakeup(struct kqueue *kq); 76 77 /* 78 * MPSAFE 79 */ 80 static struct fileops kqueueops = { 81 .fo_read = kqueue_read, 82 .fo_write = kqueue_write, 83 .fo_ioctl = kqueue_ioctl, 84 .fo_poll = kqueue_poll, 85 .fo_kqfilter = kqueue_kqfilter, 86 .fo_stat = kqueue_stat, 87 .fo_close = kqueue_close, 88 .fo_shutdown = nofo_shutdown 89 }; 90 91 static void knote_attach(struct knote *kn); 92 static void knote_drop(struct knote *kn); 93 static void knote_enqueue(struct knote *kn); 94 static void knote_dequeue(struct knote *kn); 95 static void knote_init(void); 96 static struct knote *knote_alloc(void); 97 static void knote_free(struct knote *kn); 98 99 static void filt_kqdetach(struct knote *kn); 100 static int filt_kqueue(struct knote *kn, long hint); 101 static int filt_procattach(struct knote *kn); 102 static void filt_procdetach(struct knote *kn); 103 static int filt_proc(struct knote *kn, long hint); 104 static int filt_fileattach(struct knote *kn); 105 static void filt_timerexpire(void *knx); 106 static int filt_timerattach(struct knote *kn); 107 static void filt_timerdetach(struct knote *kn); 108 static int filt_timer(struct knote *kn, long hint); 109 110 static struct filterops file_filtops = 111 { 1, filt_fileattach, NULL, NULL }; 112 static struct filterops kqread_filtops = 113 { 1, NULL, filt_kqdetach, filt_kqueue }; 114 static struct filterops proc_filtops = 115 { 0, filt_procattach, filt_procdetach, filt_proc }; 116 static struct filterops timer_filtops = 117 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 118 119 static vm_zone_t knote_zone; 120 static int kq_ncallouts = 0; 121 static int kq_calloutmax = (4 * 1024); 122 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 123 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 124 125 #define KNOTE_ACTIVATE(kn) do { \ 126 kn->kn_status |= KN_ACTIVE; \ 127 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 128 knote_enqueue(kn); \ 129 } while(0) 130 131 #define KN_HASHSIZE 64 /* XXX should be tunable */ 132 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 133 134 extern struct filterops aio_filtops; 135 extern struct filterops sig_filtops; 136 137 /* 138 * Table for for all system-defined filters. 139 */ 140 static struct filterops *sysfilt_ops[] = { 141 &file_filtops, /* EVFILT_READ */ 142 &file_filtops, /* EVFILT_WRITE */ 143 &aio_filtops, /* EVFILT_AIO */ 144 &file_filtops, /* EVFILT_VNODE */ 145 &proc_filtops, /* EVFILT_PROC */ 146 &sig_filtops, /* EVFILT_SIGNAL */ 147 &timer_filtops, /* EVFILT_TIMER */ 148 }; 149 150 static int 151 filt_fileattach(struct knote *kn) 152 { 153 return (fo_kqfilter(kn->kn_fp, kn)); 154 } 155 156 /* 157 * MPALMOSTSAFE - acquires mplock 158 */ 159 static int 160 kqueue_kqfilter(struct file *fp, struct knote *kn) 161 { 162 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 163 164 get_mplock(); 165 if (kn->kn_filter != EVFILT_READ) { 166 rel_mplock(); 167 return (1); 168 } 169 170 kn->kn_fop = &kqread_filtops; 171 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 172 rel_mplock(); 173 return (0); 174 } 175 176 static void 177 filt_kqdetach(struct knote *kn) 178 { 179 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 180 181 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 182 } 183 184 /*ARGSUSED*/ 185 static int 186 filt_kqueue(struct knote *kn, long hint) 187 { 188 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 189 190 kn->kn_data = kq->kq_count; 191 return (kn->kn_data > 0); 192 } 193 194 static int 195 filt_procattach(struct knote *kn) 196 { 197 struct proc *p; 198 int immediate; 199 200 immediate = 0; 201 lwkt_gettoken(&proc_token); 202 p = pfind(kn->kn_id); 203 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 204 p = zpfind(kn->kn_id); 205 immediate = 1; 206 } 207 if (p == NULL) { 208 lwkt_reltoken(&proc_token); 209 return (ESRCH); 210 } 211 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 212 lwkt_reltoken(&proc_token); 213 return (EACCES); 214 } 215 216 kn->kn_ptr.p_proc = p; 217 kn->kn_flags |= EV_CLEAR; /* automatically set */ 218 219 /* 220 * internal flag indicating registration done by kernel 221 */ 222 if (kn->kn_flags & EV_FLAG1) { 223 kn->kn_data = kn->kn_sdata; /* ppid */ 224 kn->kn_fflags = NOTE_CHILD; 225 kn->kn_flags &= ~EV_FLAG1; 226 } 227 228 /* XXX lock the proc here while adding to the list? */ 229 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 230 231 /* 232 * Immediately activate any exit notes if the target process is a 233 * zombie. This is necessary to handle the case where the target 234 * process, e.g. a child, dies before the kevent is registered. 235 */ 236 if (immediate && filt_proc(kn, NOTE_EXIT)) 237 KNOTE_ACTIVATE(kn); 238 lwkt_reltoken(&proc_token); 239 240 return (0); 241 } 242 243 /* 244 * The knote may be attached to a different process, which may exit, 245 * leaving nothing for the knote to be attached to. So when the process 246 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 247 * it will be deleted when read out. However, as part of the knote deletion, 248 * this routine is called, so a check is needed to avoid actually performing 249 * a detach, because the original process does not exist any more. 250 */ 251 static void 252 filt_procdetach(struct knote *kn) 253 { 254 struct proc *p; 255 256 if (kn->kn_status & KN_DETACHED) 257 return; 258 /* XXX locking? this might modify another process. */ 259 p = kn->kn_ptr.p_proc; 260 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 261 } 262 263 static int 264 filt_proc(struct knote *kn, long hint) 265 { 266 u_int event; 267 268 /* 269 * mask off extra data 270 */ 271 event = (u_int)hint & NOTE_PCTRLMASK; 272 273 /* 274 * if the user is interested in this event, record it. 275 */ 276 if (kn->kn_sfflags & event) 277 kn->kn_fflags |= event; 278 279 /* 280 * Process is gone, so flag the event as finished. Detach the 281 * knote from the process now because the process will be poof, 282 * gone later on. 283 */ 284 if (event == NOTE_EXIT) { 285 struct proc *p = kn->kn_ptr.p_proc; 286 if ((kn->kn_status & KN_DETACHED) == 0) { 287 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 288 kn->kn_status |= KN_DETACHED; 289 kn->kn_data = p->p_xstat; 290 kn->kn_ptr.p_proc = NULL; 291 } 292 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 293 return (1); 294 } 295 296 /* 297 * process forked, and user wants to track the new process, 298 * so attach a new knote to it, and immediately report an 299 * event with the parent's pid. 300 */ 301 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 302 struct kevent kev; 303 int error; 304 305 /* 306 * register knote with new process. 307 */ 308 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 309 kev.filter = kn->kn_filter; 310 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 311 kev.fflags = kn->kn_sfflags; 312 kev.data = kn->kn_id; /* parent */ 313 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 314 error = kqueue_register(kn->kn_kq, &kev); 315 if (error) 316 kn->kn_fflags |= NOTE_TRACKERR; 317 } 318 319 return (kn->kn_fflags != 0); 320 } 321 322 static void 323 filt_timerexpire(void *knx) 324 { 325 struct knote *kn = knx; 326 struct callout *calloutp; 327 struct timeval tv; 328 int tticks; 329 330 kn->kn_data++; 331 KNOTE_ACTIVATE(kn); 332 333 if ((kn->kn_flags & EV_ONESHOT) == 0) { 334 tv.tv_sec = kn->kn_sdata / 1000; 335 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 336 tticks = tvtohz_high(&tv); 337 calloutp = (struct callout *)kn->kn_hook; 338 callout_reset(calloutp, tticks, filt_timerexpire, kn); 339 } 340 } 341 342 /* 343 * data contains amount of time to sleep, in milliseconds 344 */ 345 static int 346 filt_timerattach(struct knote *kn) 347 { 348 struct callout *calloutp; 349 struct timeval tv; 350 int tticks; 351 352 if (kq_ncallouts >= kq_calloutmax) 353 return (ENOMEM); 354 kq_ncallouts++; 355 356 tv.tv_sec = kn->kn_sdata / 1000; 357 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 358 tticks = tvtohz_high(&tv); 359 360 kn->kn_flags |= EV_CLEAR; /* automatically set */ 361 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 362 M_KQUEUE, M_WAITOK); 363 callout_init(calloutp); 364 kn->kn_hook = (caddr_t)calloutp; 365 callout_reset(calloutp, tticks, filt_timerexpire, kn); 366 367 return (0); 368 } 369 370 static void 371 filt_timerdetach(struct knote *kn) 372 { 373 struct callout *calloutp; 374 375 calloutp = (struct callout *)kn->kn_hook; 376 callout_stop(calloutp); 377 FREE(calloutp, M_KQUEUE); 378 kq_ncallouts--; 379 } 380 381 static int 382 filt_timer(struct knote *kn, long hint) 383 { 384 385 return (kn->kn_data != 0); 386 } 387 388 /* 389 * Initialize a kqueue. 390 * 391 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops. 392 * 393 * MPSAFE 394 */ 395 void 396 kqueue_init(struct kqueue *kq, struct filedesc *fdp) 397 { 398 TAILQ_INIT(&kq->kq_knpend); 399 TAILQ_INIT(&kq->kq_knlist); 400 kq->kq_fdp = fdp; 401 } 402 403 /* 404 * Terminate a kqueue. Freeing the actual kq itself is left up to the 405 * caller (it might be embedded in a lwp so we don't do it here). 406 */ 407 void 408 kqueue_terminate(struct kqueue *kq) 409 { 410 struct knote *kn; 411 struct klist *list; 412 int hv; 413 414 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL) { 415 kn->kn_fop->f_detach(kn); 416 if (kn->kn_fop->f_isfd) { 417 list = &kn->kn_fp->f_klist; 418 SLIST_REMOVE(list, kn, knote, kn_link); 419 fdrop(kn->kn_fp); 420 kn->kn_fp = NULL; 421 } else { 422 hv = KN_HASH(kn->kn_id, kq->kq_knhashmask); 423 list = &kq->kq_knhash[hv]; 424 SLIST_REMOVE(list, kn, knote, kn_link); 425 } 426 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 427 if (kn->kn_status & KN_QUEUED) 428 knote_dequeue(kn); 429 knote_free(kn); 430 } 431 432 if (kq->kq_knhash) { 433 kfree(kq->kq_knhash, M_KQUEUE); 434 kq->kq_knhash = NULL; 435 kq->kq_knhashmask = 0; 436 } 437 } 438 439 /* 440 * MPSAFE 441 */ 442 int 443 sys_kqueue(struct kqueue_args *uap) 444 { 445 struct thread *td = curthread; 446 struct kqueue *kq; 447 struct file *fp; 448 int fd, error; 449 450 error = falloc(td->td_lwp, &fp, &fd); 451 if (error) 452 return (error); 453 fp->f_flag = FREAD | FWRITE; 454 fp->f_type = DTYPE_KQUEUE; 455 fp->f_ops = &kqueueops; 456 457 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 458 kqueue_init(kq, td->td_proc->p_fd); 459 fp->f_data = kq; 460 461 fsetfd(kq->kq_fdp, fp, fd); 462 uap->sysmsg_result = fd; 463 fdrop(fp); 464 return (error); 465 } 466 467 /* 468 * Copy 'count' items into the destination list pointed to by uap->eventlist. 469 */ 470 static int 471 kevent_copyout(void *arg, struct kevent *kevp, int count) 472 { 473 struct kevent_args *uap; 474 int error; 475 476 uap = (struct kevent_args *)arg; 477 478 error = copyout(kevp, uap->eventlist, count * sizeof *kevp); 479 if (error == 0) 480 uap->eventlist += count; 481 return (error); 482 } 483 484 /* 485 * Copy 'count' items from the list pointed to by uap->changelist. 486 */ 487 static int 488 kevent_copyin(void *arg, struct kevent *kevp, int count) 489 { 490 struct kevent_args *uap; 491 int error; 492 493 uap = (struct kevent_args *)arg; 494 495 error = copyin(uap->changelist, kevp, count * sizeof *kevp); 496 if (error == 0) 497 uap->changelist += count; 498 return (error); 499 } 500 501 /* 502 * MPALMOSTSAFE 503 */ 504 int 505 kern_kevent(int fd, int nchanges, int nevents, struct kevent_args *uap, 506 k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn, 507 struct timespec *tsp_in) 508 { 509 struct thread *td = curthread; 510 struct proc *p = td->td_proc; 511 struct kevent *kevp; 512 struct kqueue *kq; 513 struct file *fp = NULL; 514 struct timespec ts; 515 struct timespec *tsp; 516 int i, n, total, nerrors, error; 517 struct kevent kev[KQ_NEVENTS]; 518 519 tsp = tsp_in; 520 521 fp = holdfp(p->p_fd, fd, -1); 522 if (fp == NULL) 523 return (EBADF); 524 if (fp->f_type != DTYPE_KQUEUE) { 525 fdrop(fp); 526 return (EBADF); 527 } 528 529 kq = (struct kqueue *)fp->f_data; 530 nerrors = 0; 531 532 get_mplock(); 533 while (nchanges > 0) { 534 n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; 535 error = kevent_copyinfn(uap, kev, n); 536 if (error) 537 goto done; 538 for (i = 0; i < n; i++) { 539 kevp = &kev[i]; 540 kevp->flags &= ~EV_SYSFLAGS; 541 error = kqueue_register(kq, kevp); 542 if (error) { 543 if (nevents != 0) { 544 kevp->flags = EV_ERROR; 545 kevp->data = error; 546 kevent_copyoutfn(uap, kevp, 1); 547 nevents--; 548 nerrors++; 549 } else { 550 goto done; 551 } 552 } 553 } 554 nchanges -= n; 555 } 556 if (nerrors) { 557 uap->sysmsg_result = nerrors; 558 error = 0; 559 goto done; 560 } 561 562 /* 563 * Acquire/wait for events - setup timeout 564 */ 565 if (tsp != NULL) { 566 struct timespec ats; 567 568 if (tsp->tv_sec || tsp->tv_nsec) { 569 nanouptime(&ats); 570 timespecadd(tsp, &ats); /* tsp = target time */ 571 } 572 } 573 574 /* 575 * Loop as required. 576 * 577 * Collect as many events as we can. The timeout on successive 578 * loops is disabled (kqueue_scan() becomes non-blocking). 579 */ 580 total = 0; 581 error = 0; 582 while ((n = nevents - total) > 0) { 583 if (n > KQ_NEVENTS) 584 n = KQ_NEVENTS; 585 i = kqueue_scan(kq, kev, n, tsp, &error); 586 if (i == 0) 587 break; 588 error = kevent_copyoutfn(uap, kev, i); 589 total += i; 590 if (error || i != n) 591 break; 592 tsp = &ts; /* successive loops non-blocking */ 593 tsp->tv_sec = 0; 594 tsp->tv_nsec = 0; 595 } 596 uap->sysmsg_result = total; 597 done: 598 rel_mplock(); 599 if (fp != NULL) 600 fdrop(fp); 601 return (error); 602 } 603 604 /* 605 * MPALMOSTSAFE 606 */ 607 int 608 sys_kevent(struct kevent_args *uap) 609 { 610 struct timespec ts, *tsp; 611 int error; 612 613 if (uap->timeout) { 614 error = copyin(uap->timeout, &ts, sizeof(ts)); 615 if (error) 616 return (error); 617 tsp = &ts; 618 } else { 619 tsp = NULL; 620 } 621 622 error = kern_kevent(uap->fd, uap->nchanges, uap->nevents, 623 uap, kevent_copyin, kevent_copyout, tsp); 624 625 return (error); 626 } 627 628 int 629 kqueue_register(struct kqueue *kq, struct kevent *kev) 630 { 631 struct filedesc *fdp = kq->kq_fdp; 632 struct filterops *fops; 633 struct file *fp = NULL; 634 struct knote *kn = NULL; 635 int error = 0; 636 637 if (kev->filter < 0) { 638 if (kev->filter + EVFILT_SYSCOUNT < 0) 639 return (EINVAL); 640 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 641 } else { 642 /* 643 * XXX 644 * filter attach routine is responsible for insuring that 645 * the identifier can be attached to it. 646 */ 647 kprintf("unknown filter: %d\n", kev->filter); 648 return (EINVAL); 649 } 650 651 if (fops->f_isfd) { 652 /* validate descriptor */ 653 fp = holdfp(fdp, kev->ident, -1); 654 if (fp == NULL) 655 return (EBADF); 656 657 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 658 if (kn->kn_kq == kq && 659 kn->kn_filter == kev->filter && 660 kn->kn_id == kev->ident) { 661 break; 662 } 663 } 664 } else { 665 if (kq->kq_knhashmask) { 666 struct klist *list; 667 668 list = &kq->kq_knhash[ 669 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 670 SLIST_FOREACH(kn, list, kn_link) { 671 if (kn->kn_id == kev->ident && 672 kn->kn_filter == kev->filter) 673 break; 674 } 675 } 676 } 677 678 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 679 error = ENOENT; 680 goto done; 681 } 682 683 /* 684 * kn now contains the matching knote, or NULL if no match 685 */ 686 if (kev->flags & EV_ADD) { 687 if (kn == NULL) { 688 kn = knote_alloc(); 689 if (kn == NULL) { 690 error = ENOMEM; 691 goto done; 692 } 693 kn->kn_fp = fp; 694 kn->kn_kq = kq; 695 kn->kn_fop = fops; 696 697 /* 698 * apply reference count to knote structure, and 699 * do not release it at the end of this routine. 700 */ 701 fp = NULL; 702 703 kn->kn_sfflags = kev->fflags; 704 kn->kn_sdata = kev->data; 705 kev->fflags = 0; 706 kev->data = 0; 707 kn->kn_kevent = *kev; 708 709 knote_attach(kn); 710 if ((error = fops->f_attach(kn)) != 0) { 711 knote_drop(kn); 712 goto done; 713 } 714 } else { 715 /* 716 * The user may change some filter values after the 717 * initial EV_ADD, but doing so will not reset any 718 * filter which have already been triggered. 719 */ 720 kn->kn_sfflags = kev->fflags; 721 kn->kn_sdata = kev->data; 722 kn->kn_kevent.udata = kev->udata; 723 } 724 725 crit_enter(); 726 if (kn->kn_fop->f_event(kn, 0)) 727 KNOTE_ACTIVATE(kn); 728 crit_exit(); 729 } else if (kev->flags & EV_DELETE) { 730 kn->kn_fop->f_detach(kn); 731 knote_drop(kn); 732 goto done; 733 } 734 735 if ((kev->flags & EV_DISABLE) && 736 ((kn->kn_status & KN_DISABLED) == 0)) { 737 crit_enter(); 738 kn->kn_status |= KN_DISABLED; 739 crit_exit(); 740 } 741 742 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 743 crit_enter(); 744 kn->kn_status &= ~KN_DISABLED; 745 if ((kn->kn_status & KN_ACTIVE) && 746 ((kn->kn_status & KN_QUEUED) == 0)) 747 knote_enqueue(kn); 748 crit_exit(); 749 } 750 751 done: 752 if (fp != NULL) 753 fdrop(fp); 754 return (error); 755 } 756 757 /* 758 * Scan the kqueue, blocking if necessary until the target time is reached. 759 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 760 * 0 we do not block at all. 761 */ 762 static int 763 kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 764 struct timespec *tsp, int *errorp) 765 { 766 struct knote *kn, marker; 767 int total; 768 769 total = 0; 770 again: 771 crit_enter(); 772 if (kq->kq_count == 0) { 773 if (tsp == NULL) { 774 kq->kq_state |= KQ_SLEEP; 775 *errorp = tsleep(kq, PCATCH, "kqread", 0); 776 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 777 *errorp = EWOULDBLOCK; 778 } else { 779 struct timespec ats; 780 struct timespec atx = *tsp; 781 int timeout; 782 783 nanouptime(&ats); 784 timespecsub(&atx, &ats); 785 if (ats.tv_sec < 0) { 786 *errorp = EWOULDBLOCK; 787 } else { 788 timeout = atx.tv_sec > 24 * 60 * 60 ? 789 24 * 60 * 60 * hz : tstohz_high(&atx); 790 kq->kq_state |= KQ_SLEEP; 791 *errorp = tsleep(kq, PCATCH, "kqread", timeout); 792 } 793 } 794 crit_exit(); 795 if (*errorp == 0) 796 goto again; 797 /* don't restart after signals... */ 798 if (*errorp == ERESTART) 799 *errorp = EINTR; 800 else if (*errorp == EWOULDBLOCK) 801 *errorp = 0; 802 goto done; 803 } 804 805 /* 806 * Collect events. Continuous mode events may get recycled 807 * past the marker so we stop when we hit it unless no events 808 * have been collected. 809 */ 810 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 811 while (count) { 812 kn = TAILQ_FIRST(&kq->kq_knpend); 813 if (kn == &marker) 814 break; 815 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 816 if (kn->kn_status & KN_DISABLED) { 817 kn->kn_status &= ~KN_QUEUED; 818 kq->kq_count--; 819 continue; 820 } 821 if ((kn->kn_flags & EV_ONESHOT) == 0 && 822 kn->kn_fop->f_event(kn, 0) == 0) { 823 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 824 kq->kq_count--; 825 continue; 826 } 827 *kevp++ = kn->kn_kevent; 828 ++total; 829 --count; 830 831 /* 832 * Post-event action on the note 833 */ 834 if (kn->kn_flags & EV_ONESHOT) { 835 kn->kn_status &= ~KN_QUEUED; 836 kq->kq_count--; 837 crit_exit(); 838 kn->kn_fop->f_detach(kn); 839 knote_drop(kn); 840 crit_enter(); 841 } else if (kn->kn_flags & EV_CLEAR) { 842 kn->kn_data = 0; 843 kn->kn_fflags = 0; 844 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 845 kq->kq_count--; 846 } else { 847 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 848 } 849 } 850 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 851 crit_exit(); 852 if (total == 0) 853 goto again; 854 done: 855 return (total); 856 } 857 858 /* 859 * XXX 860 * This could be expanded to call kqueue_scan, if desired. 861 * 862 * MPSAFE 863 */ 864 static int 865 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 866 { 867 return (ENXIO); 868 } 869 870 /* 871 * MPSAFE 872 */ 873 static int 874 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 875 { 876 return (ENXIO); 877 } 878 879 /* 880 * MPALMOSTSAFE 881 */ 882 static int 883 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 884 struct ucred *cred, struct sysmsg *msg) 885 { 886 struct kqueue *kq; 887 int error; 888 889 get_mplock(); 890 kq = (struct kqueue *)fp->f_data; 891 892 switch(com) { 893 case FIOASYNC: 894 if (*(int *)data) 895 kq->kq_state |= KQ_ASYNC; 896 else 897 kq->kq_state &= ~KQ_ASYNC; 898 error = 0; 899 break; 900 case FIOSETOWN: 901 error = fsetown(*(int *)data, &kq->kq_sigio); 902 break; 903 default: 904 error = ENOTTY; 905 break; 906 } 907 rel_mplock(); 908 return (error); 909 } 910 911 /* 912 * MPALMOSTSAFE - acquires mplock 913 */ 914 static int 915 kqueue_poll(struct file *fp, int events, struct ucred *cred) 916 { 917 struct kqueue *kq = (struct kqueue *)fp->f_data; 918 int revents = 0; 919 920 get_mplock(); 921 crit_enter(); 922 if (events & (POLLIN | POLLRDNORM)) { 923 if (kq->kq_count) { 924 revents |= events & (POLLIN | POLLRDNORM); 925 } else { 926 selrecord(curthread, &kq->kq_sel); 927 kq->kq_state |= KQ_SEL; 928 } 929 } 930 crit_exit(); 931 rel_mplock(); 932 return (revents); 933 } 934 935 /* 936 * MPSAFE 937 */ 938 static int 939 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 940 { 941 struct kqueue *kq = (struct kqueue *)fp->f_data; 942 943 bzero((void *)st, sizeof(*st)); 944 st->st_size = kq->kq_count; 945 st->st_blksize = sizeof(struct kevent); 946 st->st_mode = S_IFIFO; 947 return (0); 948 } 949 950 /* 951 * MPALMOSTSAFE - acquires mplock 952 */ 953 static int 954 kqueue_close(struct file *fp) 955 { 956 struct kqueue *kq = (struct kqueue *)fp->f_data; 957 958 get_mplock(); 959 960 kqueue_terminate(kq); 961 962 fp->f_data = NULL; 963 funsetown(kq->kq_sigio); 964 rel_mplock(); 965 966 kfree(kq, M_KQUEUE); 967 return (0); 968 } 969 970 static void 971 kqueue_wakeup(struct kqueue *kq) 972 { 973 if (kq->kq_state & KQ_SLEEP) { 974 kq->kq_state &= ~KQ_SLEEP; 975 wakeup(kq); 976 } 977 if (kq->kq_state & KQ_SEL) { 978 kq->kq_state &= ~KQ_SEL; 979 selwakeup(&kq->kq_sel); 980 } 981 KNOTE(&kq->kq_sel.si_note, 0); 982 } 983 984 /* 985 * walk down a list of knotes, activating them if their event has triggered. 986 */ 987 void 988 knote(struct klist *list, long hint) 989 { 990 struct knote *kn; 991 992 SLIST_FOREACH(kn, list, kn_selnext) 993 if (kn->kn_fop->f_event(kn, hint)) 994 KNOTE_ACTIVATE(kn); 995 } 996 997 /* 998 * remove all knotes from a specified klist 999 */ 1000 void 1001 knote_remove(struct klist *list) 1002 { 1003 struct knote *kn; 1004 1005 while ((kn = SLIST_FIRST(list)) != NULL) { 1006 kn->kn_fop->f_detach(kn); 1007 knote_drop(kn); 1008 } 1009 } 1010 1011 /* 1012 * remove all knotes referencing a specified fd 1013 */ 1014 void 1015 knote_fdclose(struct file *fp, struct filedesc *fdp, int fd) 1016 { 1017 struct knote *kn; 1018 1019 restart: 1020 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 1021 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) { 1022 kn->kn_fop->f_detach(kn); 1023 knote_drop(kn); 1024 goto restart; 1025 } 1026 } 1027 } 1028 1029 static void 1030 knote_attach(struct knote *kn) 1031 { 1032 struct klist *list; 1033 struct kqueue *kq = kn->kn_kq; 1034 1035 if (kn->kn_fop->f_isfd) { 1036 KKASSERT(kn->kn_fp); 1037 list = &kn->kn_fp->f_klist; 1038 } else { 1039 if (kq->kq_knhashmask == 0) 1040 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1041 &kq->kq_knhashmask); 1042 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1043 } 1044 SLIST_INSERT_HEAD(list, kn, kn_link); 1045 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink); 1046 kn->kn_status = 0; 1047 } 1048 1049 /* 1050 * should be called outside of a critical section, since we don't want to 1051 * hold a critical section while calling fdrop and free. 1052 */ 1053 static void 1054 knote_drop(struct knote *kn) 1055 { 1056 struct kqueue *kq; 1057 struct klist *list; 1058 1059 kq = kn->kn_kq; 1060 1061 if (kn->kn_fop->f_isfd) 1062 list = &kn->kn_fp->f_klist; 1063 else 1064 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1065 1066 SLIST_REMOVE(list, kn, knote, kn_link); 1067 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 1068 if (kn->kn_status & KN_QUEUED) 1069 knote_dequeue(kn); 1070 if (kn->kn_fop->f_isfd) 1071 fdrop(kn->kn_fp); 1072 knote_free(kn); 1073 } 1074 1075 1076 static void 1077 knote_enqueue(struct knote *kn) 1078 { 1079 struct kqueue *kq = kn->kn_kq; 1080 1081 crit_enter(); 1082 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1083 1084 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1085 kn->kn_status |= KN_QUEUED; 1086 ++kq->kq_count; 1087 1088 /* 1089 * Send SIGIO on request (typically set up as a mailbox signal) 1090 */ 1091 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1092 pgsigio(kq->kq_sigio, SIGIO, 0); 1093 crit_exit(); 1094 kqueue_wakeup(kq); 1095 } 1096 1097 static void 1098 knote_dequeue(struct knote *kn) 1099 { 1100 struct kqueue *kq = kn->kn_kq; 1101 1102 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1103 crit_enter(); 1104 1105 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1106 kn->kn_status &= ~KN_QUEUED; 1107 kq->kq_count--; 1108 crit_exit(); 1109 } 1110 1111 static void 1112 knote_init(void) 1113 { 1114 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1115 } 1116 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1117 1118 static struct knote * 1119 knote_alloc(void) 1120 { 1121 return ((struct knote *)zalloc(knote_zone)); 1122 } 1123 1124 static void 1125 knote_free(struct knote *kn) 1126 { 1127 zfree(knote_zone, kn); 1128 } 1129