1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/select.h> 40 #include <sys/queue.h> 41 #include <sys/event.h> 42 #include <sys/eventvar.h> 43 #include <sys/poll.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/uio.h> 51 #include <sys/signalvar.h> 52 #include <sys/filio.h> 53 54 #include <sys/thread2.h> 55 #include <sys/file2.h> 56 #include <sys/mplock2.h> 57 58 #include <vm/vm_zone.h> 59 60 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 61 62 static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 63 struct timespec *tsp, int *errorp); 64 static int kqueue_read(struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags); 66 static int kqueue_write(struct file *fp, struct uio *uio, 67 struct ucred *cred, int flags); 68 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 69 struct ucred *cred, struct sysmsg *msg); 70 static int kqueue_poll(struct file *fp, int events, struct ucred *cred); 71 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 72 static int kqueue_stat(struct file *fp, struct stat *st, 73 struct ucred *cred); 74 static int kqueue_close(struct file *fp); 75 static void kqueue_wakeup(struct kqueue *kq); 76 77 /* 78 * MPSAFE 79 */ 80 static struct fileops kqueueops = { 81 .fo_read = kqueue_read, 82 .fo_write = kqueue_write, 83 .fo_ioctl = kqueue_ioctl, 84 .fo_poll = kqueue_poll, 85 .fo_kqfilter = kqueue_kqfilter, 86 .fo_stat = kqueue_stat, 87 .fo_close = kqueue_close, 88 .fo_shutdown = nofo_shutdown 89 }; 90 91 static void knote_attach(struct knote *kn); 92 static void knote_drop(struct knote *kn); 93 static void knote_enqueue(struct knote *kn); 94 static void knote_dequeue(struct knote *kn); 95 static void knote_init(void); 96 static struct knote *knote_alloc(void); 97 static void knote_free(struct knote *kn); 98 99 static void filt_kqdetach(struct knote *kn); 100 static int filt_kqueue(struct knote *kn, long hint); 101 static int filt_procattach(struct knote *kn); 102 static void filt_procdetach(struct knote *kn); 103 static int filt_proc(struct knote *kn, long hint); 104 static int filt_fileattach(struct knote *kn); 105 static void filt_timerexpire(void *knx); 106 static int filt_timerattach(struct knote *kn); 107 static void filt_timerdetach(struct knote *kn); 108 static int filt_timer(struct knote *kn, long hint); 109 110 static struct filterops file_filtops = 111 { 1, filt_fileattach, NULL, NULL }; 112 static struct filterops kqread_filtops = 113 { 1, NULL, filt_kqdetach, filt_kqueue }; 114 static struct filterops proc_filtops = 115 { 0, filt_procattach, filt_procdetach, filt_proc }; 116 static struct filterops timer_filtops = 117 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 118 119 static vm_zone_t knote_zone; 120 static int kq_ncallouts = 0; 121 static int kq_calloutmax = (4 * 1024); 122 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 123 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 124 125 #define KNOTE_ACTIVATE(kn) do { \ 126 kn->kn_status |= KN_ACTIVE; \ 127 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 128 knote_enqueue(kn); \ 129 } while(0) 130 131 #define KN_HASHSIZE 64 /* XXX should be tunable */ 132 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 133 134 extern struct filterops aio_filtops; 135 extern struct filterops sig_filtops; 136 137 /* 138 * Table for for all system-defined filters. 139 */ 140 static struct filterops *sysfilt_ops[] = { 141 &file_filtops, /* EVFILT_READ */ 142 &file_filtops, /* EVFILT_WRITE */ 143 &aio_filtops, /* EVFILT_AIO */ 144 &file_filtops, /* EVFILT_VNODE */ 145 &proc_filtops, /* EVFILT_PROC */ 146 &sig_filtops, /* EVFILT_SIGNAL */ 147 &timer_filtops, /* EVFILT_TIMER */ 148 }; 149 150 static int 151 filt_fileattach(struct knote *kn) 152 { 153 return (fo_kqfilter(kn->kn_fp, kn)); 154 } 155 156 /* 157 * MPALMOSTSAFE - acquires mplock 158 */ 159 static int 160 kqueue_kqfilter(struct file *fp, struct knote *kn) 161 { 162 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 163 164 get_mplock(); 165 if (kn->kn_filter != EVFILT_READ) { 166 rel_mplock(); 167 return (1); 168 } 169 170 kn->kn_fop = &kqread_filtops; 171 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 172 rel_mplock(); 173 return (0); 174 } 175 176 static void 177 filt_kqdetach(struct knote *kn) 178 { 179 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 180 181 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 182 } 183 184 /*ARGSUSED*/ 185 static int 186 filt_kqueue(struct knote *kn, long hint) 187 { 188 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 189 190 kn->kn_data = kq->kq_count; 191 return (kn->kn_data > 0); 192 } 193 194 static int 195 filt_procattach(struct knote *kn) 196 { 197 struct proc *p; 198 int immediate; 199 200 immediate = 0; 201 p = pfind(kn->kn_id); 202 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 203 p = zpfind(kn->kn_id); 204 immediate = 1; 205 } 206 if (p == NULL) 207 return (ESRCH); 208 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) 209 return (EACCES); 210 211 kn->kn_ptr.p_proc = p; 212 kn->kn_flags |= EV_CLEAR; /* automatically set */ 213 214 /* 215 * internal flag indicating registration done by kernel 216 */ 217 if (kn->kn_flags & EV_FLAG1) { 218 kn->kn_data = kn->kn_sdata; /* ppid */ 219 kn->kn_fflags = NOTE_CHILD; 220 kn->kn_flags &= ~EV_FLAG1; 221 } 222 223 /* XXX lock the proc here while adding to the list? */ 224 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 225 226 /* 227 * Immediately activate any exit notes if the target process is a 228 * zombie. This is necessary to handle the case where the target 229 * process, e.g. a child, dies before the kevent is registered. 230 */ 231 if (immediate && filt_proc(kn, NOTE_EXIT)) 232 KNOTE_ACTIVATE(kn); 233 234 return (0); 235 } 236 237 /* 238 * The knote may be attached to a different process, which may exit, 239 * leaving nothing for the knote to be attached to. So when the process 240 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 241 * it will be deleted when read out. However, as part of the knote deletion, 242 * this routine is called, so a check is needed to avoid actually performing 243 * a detach, because the original process does not exist any more. 244 */ 245 static void 246 filt_procdetach(struct knote *kn) 247 { 248 struct proc *p; 249 250 if (kn->kn_status & KN_DETACHED) 251 return; 252 /* XXX locking? this might modify another process. */ 253 p = kn->kn_ptr.p_proc; 254 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 255 } 256 257 static int 258 filt_proc(struct knote *kn, long hint) 259 { 260 u_int event; 261 262 /* 263 * mask off extra data 264 */ 265 event = (u_int)hint & NOTE_PCTRLMASK; 266 267 /* 268 * if the user is interested in this event, record it. 269 */ 270 if (kn->kn_sfflags & event) 271 kn->kn_fflags |= event; 272 273 /* 274 * Process is gone, so flag the event as finished. Detach the 275 * knote from the process now because the process will be poof, 276 * gone later on. 277 */ 278 if (event == NOTE_EXIT) { 279 struct proc *p = kn->kn_ptr.p_proc; 280 if ((kn->kn_status & KN_DETACHED) == 0) { 281 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 282 kn->kn_status |= KN_DETACHED; 283 kn->kn_data = p->p_xstat; 284 kn->kn_ptr.p_proc = NULL; 285 } 286 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 287 return (1); 288 } 289 290 /* 291 * process forked, and user wants to track the new process, 292 * so attach a new knote to it, and immediately report an 293 * event with the parent's pid. 294 */ 295 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 296 struct kevent kev; 297 int error; 298 299 /* 300 * register knote with new process. 301 */ 302 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 303 kev.filter = kn->kn_filter; 304 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 305 kev.fflags = kn->kn_sfflags; 306 kev.data = kn->kn_id; /* parent */ 307 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 308 error = kqueue_register(kn->kn_kq, &kev); 309 if (error) 310 kn->kn_fflags |= NOTE_TRACKERR; 311 } 312 313 return (kn->kn_fflags != 0); 314 } 315 316 static void 317 filt_timerexpire(void *knx) 318 { 319 struct knote *kn = knx; 320 struct callout *calloutp; 321 struct timeval tv; 322 int tticks; 323 324 kn->kn_data++; 325 KNOTE_ACTIVATE(kn); 326 327 if ((kn->kn_flags & EV_ONESHOT) == 0) { 328 tv.tv_sec = kn->kn_sdata / 1000; 329 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 330 tticks = tvtohz_high(&tv); 331 calloutp = (struct callout *)kn->kn_hook; 332 callout_reset(calloutp, tticks, filt_timerexpire, kn); 333 } 334 } 335 336 /* 337 * data contains amount of time to sleep, in milliseconds 338 */ 339 static int 340 filt_timerattach(struct knote *kn) 341 { 342 struct callout *calloutp; 343 struct timeval tv; 344 int tticks; 345 346 if (kq_ncallouts >= kq_calloutmax) 347 return (ENOMEM); 348 kq_ncallouts++; 349 350 tv.tv_sec = kn->kn_sdata / 1000; 351 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 352 tticks = tvtohz_high(&tv); 353 354 kn->kn_flags |= EV_CLEAR; /* automatically set */ 355 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 356 M_KQUEUE, M_WAITOK); 357 callout_init(calloutp); 358 kn->kn_hook = (caddr_t)calloutp; 359 callout_reset(calloutp, tticks, filt_timerexpire, kn); 360 361 return (0); 362 } 363 364 static void 365 filt_timerdetach(struct knote *kn) 366 { 367 struct callout *calloutp; 368 369 calloutp = (struct callout *)kn->kn_hook; 370 callout_stop(calloutp); 371 FREE(calloutp, M_KQUEUE); 372 kq_ncallouts--; 373 } 374 375 static int 376 filt_timer(struct knote *kn, long hint) 377 { 378 379 return (kn->kn_data != 0); 380 } 381 382 /* 383 * Initialize a kqueue. 384 * 385 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops. 386 * 387 * MPSAFE 388 */ 389 void 390 kqueue_init(struct kqueue *kq, struct filedesc *fdp) 391 { 392 TAILQ_INIT(&kq->kq_knpend); 393 TAILQ_INIT(&kq->kq_knlist); 394 kq->kq_fdp = fdp; 395 } 396 397 /* 398 * Terminate a kqueue. Freeing the actual kq itself is left up to the 399 * caller (it might be embedded in a lwp so we don't do it here). 400 */ 401 void 402 kqueue_terminate(struct kqueue *kq) 403 { 404 struct knote *kn; 405 struct klist *list; 406 int hv; 407 408 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL) { 409 kn->kn_fop->f_detach(kn); 410 if (kn->kn_fop->f_isfd) { 411 list = &kn->kn_fp->f_klist; 412 SLIST_REMOVE(list, kn, knote, kn_link); 413 fdrop(kn->kn_fp); 414 kn->kn_fp = NULL; 415 } else { 416 hv = KN_HASH(kn->kn_id, kq->kq_knhashmask); 417 list = &kq->kq_knhash[hv]; 418 SLIST_REMOVE(list, kn, knote, kn_link); 419 } 420 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 421 if (kn->kn_status & KN_QUEUED) 422 knote_dequeue(kn); 423 knote_free(kn); 424 } 425 426 if (kq->kq_knhash) { 427 kfree(kq->kq_knhash, M_KQUEUE); 428 kq->kq_knhash = NULL; 429 kq->kq_knhashmask = 0; 430 } 431 } 432 433 /* 434 * MPSAFE 435 */ 436 int 437 sys_kqueue(struct kqueue_args *uap) 438 { 439 struct thread *td = curthread; 440 struct kqueue *kq; 441 struct file *fp; 442 int fd, error; 443 444 error = falloc(td->td_lwp, &fp, &fd); 445 if (error) 446 return (error); 447 fp->f_flag = FREAD | FWRITE; 448 fp->f_type = DTYPE_KQUEUE; 449 fp->f_ops = &kqueueops; 450 451 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 452 kqueue_init(kq, td->td_proc->p_fd); 453 fp->f_data = kq; 454 455 fsetfd(kq->kq_fdp, fp, fd); 456 uap->sysmsg_result = fd; 457 fdrop(fp); 458 return (error); 459 } 460 461 /* 462 * Copy 'count' items into the destination list pointed to by uap->eventlist. 463 */ 464 static int 465 kevent_copyout(void *arg, struct kevent *kevp, int count) 466 { 467 struct kevent_args *uap; 468 int error; 469 470 uap = (struct kevent_args *)arg; 471 472 error = copyout(kevp, uap->eventlist, count * sizeof *kevp); 473 if (error == 0) 474 uap->eventlist += count; 475 return (error); 476 } 477 478 /* 479 * Copy 'count' items from the list pointed to by uap->changelist. 480 */ 481 static int 482 kevent_copyin(void *arg, struct kevent *kevp, int count) 483 { 484 struct kevent_args *uap; 485 int error; 486 487 uap = (struct kevent_args *)arg; 488 489 error = copyin(uap->changelist, kevp, count * sizeof *kevp); 490 if (error == 0) 491 uap->changelist += count; 492 return (error); 493 } 494 495 /* 496 * MPALMOSTSAFE 497 */ 498 int 499 kern_kevent(int fd, int nchanges, int nevents, struct kevent_args *uap, 500 k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn, 501 struct timespec *tsp_in) 502 { 503 struct thread *td = curthread; 504 struct proc *p = td->td_proc; 505 struct kevent *kevp; 506 struct kqueue *kq; 507 struct file *fp = NULL; 508 struct timespec ts; 509 struct timespec *tsp; 510 int i, n, total, nerrors, error; 511 struct kevent kev[KQ_NEVENTS]; 512 513 tsp = tsp_in; 514 515 fp = holdfp(p->p_fd, fd, -1); 516 if (fp == NULL) 517 return (EBADF); 518 if (fp->f_type != DTYPE_KQUEUE) { 519 fdrop(fp); 520 return (EBADF); 521 } 522 523 kq = (struct kqueue *)fp->f_data; 524 nerrors = 0; 525 526 get_mplock(); 527 while (nchanges > 0) { 528 n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; 529 error = kevent_copyinfn(uap, kev, n); 530 if (error) 531 goto done; 532 for (i = 0; i < n; i++) { 533 kevp = &kev[i]; 534 kevp->flags &= ~EV_SYSFLAGS; 535 error = kqueue_register(kq, kevp); 536 if (error) { 537 if (nevents != 0) { 538 kevp->flags = EV_ERROR; 539 kevp->data = error; 540 kevent_copyoutfn(uap, kevp, 1); 541 nevents--; 542 nerrors++; 543 } else { 544 goto done; 545 } 546 } 547 } 548 nchanges -= n; 549 } 550 if (nerrors) { 551 uap->sysmsg_result = nerrors; 552 error = 0; 553 goto done; 554 } 555 556 /* 557 * Acquire/wait for events - setup timeout 558 */ 559 if (tsp != NULL) { 560 struct timespec ats; 561 562 if (tsp->tv_sec || tsp->tv_nsec) { 563 nanouptime(&ats); 564 timespecadd(tsp, &ats); /* tsp = target time */ 565 } 566 } 567 568 /* 569 * Loop as required. 570 * 571 * Collect as many events as we can. The timeout on successive 572 * loops is disabled (kqueue_scan() becomes non-blocking). 573 */ 574 total = 0; 575 error = 0; 576 while ((n = nevents - total) > 0) { 577 if (n > KQ_NEVENTS) 578 n = KQ_NEVENTS; 579 i = kqueue_scan(kq, kev, n, tsp, &error); 580 if (i == 0) 581 break; 582 error = kevent_copyoutfn(uap, kev, i); 583 total += i; 584 if (error || i != n) 585 break; 586 tsp = &ts; /* successive loops non-blocking */ 587 tsp->tv_sec = 0; 588 tsp->tv_nsec = 0; 589 } 590 uap->sysmsg_result = total; 591 done: 592 rel_mplock(); 593 if (fp != NULL) 594 fdrop(fp); 595 return (error); 596 } 597 598 /* 599 * MPALMOSTSAFE 600 */ 601 int 602 sys_kevent(struct kevent_args *uap) 603 { 604 struct timespec ts, *tsp; 605 int error; 606 607 if (uap->timeout) { 608 error = copyin(uap->timeout, &ts, sizeof(ts)); 609 if (error) 610 return (error); 611 tsp = &ts; 612 } else { 613 tsp = NULL; 614 } 615 616 error = kern_kevent(uap->fd, uap->nchanges, uap->nevents, 617 uap, kevent_copyin, kevent_copyout, tsp); 618 619 return (error); 620 } 621 622 int 623 kqueue_register(struct kqueue *kq, struct kevent *kev) 624 { 625 struct filedesc *fdp = kq->kq_fdp; 626 struct filterops *fops; 627 struct file *fp = NULL; 628 struct knote *kn = NULL; 629 int error = 0; 630 631 if (kev->filter < 0) { 632 if (kev->filter + EVFILT_SYSCOUNT < 0) 633 return (EINVAL); 634 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 635 } else { 636 /* 637 * XXX 638 * filter attach routine is responsible for insuring that 639 * the identifier can be attached to it. 640 */ 641 kprintf("unknown filter: %d\n", kev->filter); 642 return (EINVAL); 643 } 644 645 if (fops->f_isfd) { 646 /* validate descriptor */ 647 fp = holdfp(fdp, kev->ident, -1); 648 if (fp == NULL) 649 return (EBADF); 650 651 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 652 if (kn->kn_kq == kq && 653 kn->kn_filter == kev->filter && 654 kn->kn_id == kev->ident) { 655 break; 656 } 657 } 658 } else { 659 if (kq->kq_knhashmask) { 660 struct klist *list; 661 662 list = &kq->kq_knhash[ 663 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 664 SLIST_FOREACH(kn, list, kn_link) { 665 if (kn->kn_id == kev->ident && 666 kn->kn_filter == kev->filter) 667 break; 668 } 669 } 670 } 671 672 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 673 error = ENOENT; 674 goto done; 675 } 676 677 /* 678 * kn now contains the matching knote, or NULL if no match 679 */ 680 if (kev->flags & EV_ADD) { 681 if (kn == NULL) { 682 kn = knote_alloc(); 683 if (kn == NULL) { 684 error = ENOMEM; 685 goto done; 686 } 687 kn->kn_fp = fp; 688 kn->kn_kq = kq; 689 kn->kn_fop = fops; 690 691 /* 692 * apply reference count to knote structure, and 693 * do not release it at the end of this routine. 694 */ 695 fp = NULL; 696 697 kn->kn_sfflags = kev->fflags; 698 kn->kn_sdata = kev->data; 699 kev->fflags = 0; 700 kev->data = 0; 701 kn->kn_kevent = *kev; 702 703 knote_attach(kn); 704 if ((error = fops->f_attach(kn)) != 0) { 705 knote_drop(kn); 706 goto done; 707 } 708 } else { 709 /* 710 * The user may change some filter values after the 711 * initial EV_ADD, but doing so will not reset any 712 * filter which have already been triggered. 713 */ 714 kn->kn_sfflags = kev->fflags; 715 kn->kn_sdata = kev->data; 716 kn->kn_kevent.udata = kev->udata; 717 } 718 719 crit_enter(); 720 if (kn->kn_fop->f_event(kn, 0)) 721 KNOTE_ACTIVATE(kn); 722 crit_exit(); 723 } else if (kev->flags & EV_DELETE) { 724 kn->kn_fop->f_detach(kn); 725 knote_drop(kn); 726 goto done; 727 } 728 729 if ((kev->flags & EV_DISABLE) && 730 ((kn->kn_status & KN_DISABLED) == 0)) { 731 crit_enter(); 732 kn->kn_status |= KN_DISABLED; 733 crit_exit(); 734 } 735 736 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 737 crit_enter(); 738 kn->kn_status &= ~KN_DISABLED; 739 if ((kn->kn_status & KN_ACTIVE) && 740 ((kn->kn_status & KN_QUEUED) == 0)) 741 knote_enqueue(kn); 742 crit_exit(); 743 } 744 745 done: 746 if (fp != NULL) 747 fdrop(fp); 748 return (error); 749 } 750 751 /* 752 * Scan the kqueue, blocking if necessary until the target time is reached. 753 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 754 * 0 we do not block at all. 755 */ 756 static int 757 kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 758 struct timespec *tsp, int *errorp) 759 { 760 struct knote *kn, marker; 761 int total; 762 763 total = 0; 764 again: 765 crit_enter(); 766 if (kq->kq_count == 0) { 767 if (tsp == NULL) { 768 kq->kq_state |= KQ_SLEEP; 769 *errorp = tsleep(kq, PCATCH, "kqread", 0); 770 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 771 *errorp = EWOULDBLOCK; 772 } else { 773 struct timespec ats; 774 struct timespec atx = *tsp; 775 int timeout; 776 777 nanouptime(&ats); 778 timespecsub(&atx, &ats); 779 if (ats.tv_sec < 0) { 780 *errorp = EWOULDBLOCK; 781 } else { 782 timeout = atx.tv_sec > 24 * 60 * 60 ? 783 24 * 60 * 60 * hz : tstohz_high(&atx); 784 kq->kq_state |= KQ_SLEEP; 785 *errorp = tsleep(kq, PCATCH, "kqread", timeout); 786 } 787 } 788 crit_exit(); 789 if (*errorp == 0) 790 goto again; 791 /* don't restart after signals... */ 792 if (*errorp == ERESTART) 793 *errorp = EINTR; 794 else if (*errorp == EWOULDBLOCK) 795 *errorp = 0; 796 goto done; 797 } 798 799 /* 800 * Collect events. Continuous mode events may get recycled 801 * past the marker so we stop when we hit it unless no events 802 * have been collected. 803 */ 804 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 805 while (count) { 806 kn = TAILQ_FIRST(&kq->kq_knpend); 807 if (kn == &marker) 808 break; 809 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 810 if (kn->kn_status & KN_DISABLED) { 811 kn->kn_status &= ~KN_QUEUED; 812 kq->kq_count--; 813 continue; 814 } 815 if ((kn->kn_flags & EV_ONESHOT) == 0 && 816 kn->kn_fop->f_event(kn, 0) == 0) { 817 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 818 kq->kq_count--; 819 continue; 820 } 821 *kevp++ = kn->kn_kevent; 822 ++total; 823 --count; 824 825 /* 826 * Post-event action on the note 827 */ 828 if (kn->kn_flags & EV_ONESHOT) { 829 kn->kn_status &= ~KN_QUEUED; 830 kq->kq_count--; 831 crit_exit(); 832 kn->kn_fop->f_detach(kn); 833 knote_drop(kn); 834 crit_enter(); 835 } else if (kn->kn_flags & EV_CLEAR) { 836 kn->kn_data = 0; 837 kn->kn_fflags = 0; 838 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 839 kq->kq_count--; 840 } else { 841 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 842 } 843 } 844 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 845 crit_exit(); 846 if (total == 0) 847 goto again; 848 done: 849 return (total); 850 } 851 852 /* 853 * XXX 854 * This could be expanded to call kqueue_scan, if desired. 855 * 856 * MPSAFE 857 */ 858 static int 859 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 860 { 861 return (ENXIO); 862 } 863 864 /* 865 * MPSAFE 866 */ 867 static int 868 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 869 { 870 return (ENXIO); 871 } 872 873 /* 874 * MPALMOSTSAFE 875 */ 876 static int 877 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 878 struct ucred *cred, struct sysmsg *msg) 879 { 880 struct kqueue *kq; 881 int error; 882 883 get_mplock(); 884 kq = (struct kqueue *)fp->f_data; 885 886 switch(com) { 887 case FIOASYNC: 888 if (*(int *)data) 889 kq->kq_state |= KQ_ASYNC; 890 else 891 kq->kq_state &= ~KQ_ASYNC; 892 error = 0; 893 break; 894 case FIOSETOWN: 895 error = fsetown(*(int *)data, &kq->kq_sigio); 896 break; 897 default: 898 error = ENOTTY; 899 break; 900 } 901 rel_mplock(); 902 return (error); 903 } 904 905 /* 906 * MPALMOSTSAFE - acquires mplock 907 */ 908 static int 909 kqueue_poll(struct file *fp, int events, struct ucred *cred) 910 { 911 struct kqueue *kq = (struct kqueue *)fp->f_data; 912 int revents = 0; 913 914 get_mplock(); 915 crit_enter(); 916 if (events & (POLLIN | POLLRDNORM)) { 917 if (kq->kq_count) { 918 revents |= events & (POLLIN | POLLRDNORM); 919 } else { 920 selrecord(curthread, &kq->kq_sel); 921 kq->kq_state |= KQ_SEL; 922 } 923 } 924 crit_exit(); 925 rel_mplock(); 926 return (revents); 927 } 928 929 /* 930 * MPSAFE 931 */ 932 static int 933 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 934 { 935 struct kqueue *kq = (struct kqueue *)fp->f_data; 936 937 bzero((void *)st, sizeof(*st)); 938 st->st_size = kq->kq_count; 939 st->st_blksize = sizeof(struct kevent); 940 st->st_mode = S_IFIFO; 941 return (0); 942 } 943 944 /* 945 * MPALMOSTSAFE - acquires mplock 946 */ 947 static int 948 kqueue_close(struct file *fp) 949 { 950 struct kqueue *kq = (struct kqueue *)fp->f_data; 951 952 get_mplock(); 953 954 kqueue_terminate(kq); 955 956 fp->f_data = NULL; 957 funsetown(kq->kq_sigio); 958 rel_mplock(); 959 960 kfree(kq, M_KQUEUE); 961 return (0); 962 } 963 964 static void 965 kqueue_wakeup(struct kqueue *kq) 966 { 967 if (kq->kq_state & KQ_SLEEP) { 968 kq->kq_state &= ~KQ_SLEEP; 969 wakeup(kq); 970 } 971 if (kq->kq_state & KQ_SEL) { 972 kq->kq_state &= ~KQ_SEL; 973 selwakeup(&kq->kq_sel); 974 } 975 KNOTE(&kq->kq_sel.si_note, 0); 976 } 977 978 /* 979 * walk down a list of knotes, activating them if their event has triggered. 980 */ 981 void 982 knote(struct klist *list, long hint) 983 { 984 struct knote *kn; 985 986 SLIST_FOREACH(kn, list, kn_selnext) 987 if (kn->kn_fop->f_event(kn, hint)) 988 KNOTE_ACTIVATE(kn); 989 } 990 991 /* 992 * remove all knotes from a specified klist 993 */ 994 void 995 knote_remove(struct klist *list) 996 { 997 struct knote *kn; 998 999 while ((kn = SLIST_FIRST(list)) != NULL) { 1000 kn->kn_fop->f_detach(kn); 1001 knote_drop(kn); 1002 } 1003 } 1004 1005 /* 1006 * remove all knotes referencing a specified fd 1007 */ 1008 void 1009 knote_fdclose(struct file *fp, struct filedesc *fdp, int fd) 1010 { 1011 struct knote *kn; 1012 1013 restart: 1014 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 1015 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) { 1016 kn->kn_fop->f_detach(kn); 1017 knote_drop(kn); 1018 goto restart; 1019 } 1020 } 1021 } 1022 1023 static void 1024 knote_attach(struct knote *kn) 1025 { 1026 struct klist *list; 1027 struct kqueue *kq = kn->kn_kq; 1028 1029 if (kn->kn_fop->f_isfd) { 1030 KKASSERT(kn->kn_fp); 1031 list = &kn->kn_fp->f_klist; 1032 } else { 1033 if (kq->kq_knhashmask == 0) 1034 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1035 &kq->kq_knhashmask); 1036 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1037 } 1038 SLIST_INSERT_HEAD(list, kn, kn_link); 1039 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink); 1040 kn->kn_status = 0; 1041 } 1042 1043 /* 1044 * should be called outside of a critical section, since we don't want to 1045 * hold a critical section while calling fdrop and free. 1046 */ 1047 static void 1048 knote_drop(struct knote *kn) 1049 { 1050 struct kqueue *kq; 1051 struct klist *list; 1052 1053 kq = kn->kn_kq; 1054 1055 if (kn->kn_fop->f_isfd) 1056 list = &kn->kn_fp->f_klist; 1057 else 1058 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1059 1060 SLIST_REMOVE(list, kn, knote, kn_link); 1061 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 1062 if (kn->kn_status & KN_QUEUED) 1063 knote_dequeue(kn); 1064 if (kn->kn_fop->f_isfd) 1065 fdrop(kn->kn_fp); 1066 knote_free(kn); 1067 } 1068 1069 1070 static void 1071 knote_enqueue(struct knote *kn) 1072 { 1073 struct kqueue *kq = kn->kn_kq; 1074 1075 crit_enter(); 1076 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1077 1078 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1079 kn->kn_status |= KN_QUEUED; 1080 ++kq->kq_count; 1081 1082 /* 1083 * Send SIGIO on request (typically set up as a mailbox signal) 1084 */ 1085 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1086 pgsigio(kq->kq_sigio, SIGIO, 0); 1087 crit_exit(); 1088 kqueue_wakeup(kq); 1089 } 1090 1091 static void 1092 knote_dequeue(struct knote *kn) 1093 { 1094 struct kqueue *kq = kn->kn_kq; 1095 1096 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1097 crit_enter(); 1098 1099 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1100 kn->kn_status &= ~KN_QUEUED; 1101 kq->kq_count--; 1102 crit_exit(); 1103 } 1104 1105 static void 1106 knote_init(void) 1107 { 1108 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1109 } 1110 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1111 1112 static struct knote * 1113 knote_alloc(void) 1114 { 1115 return ((struct knote *)zalloc(knote_zone)); 1116 } 1117 1118 static void 1119 knote_free(struct knote *kn) 1120 { 1121 zfree(knote_zone, kn); 1122 } 1123