1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/select.h> 40 #include <sys/queue.h> 41 #include <sys/event.h> 42 #include <sys/eventvar.h> 43 #include <sys/poll.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/uio.h> 51 #include <sys/signalvar.h> 52 #include <sys/filio.h> 53 54 #include <sys/thread2.h> 55 #include <sys/file2.h> 56 #include <sys/mplock2.h> 57 58 #include <vm/vm_zone.h> 59 60 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 61 62 static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 63 struct timespec *tsp, int *errorp); 64 static int kqueue_read(struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags); 66 static int kqueue_write(struct file *fp, struct uio *uio, 67 struct ucred *cred, int flags); 68 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 69 struct ucred *cred, struct sysmsg *msg); 70 static int kqueue_poll(struct file *fp, int events, struct ucred *cred); 71 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 72 static int kqueue_stat(struct file *fp, struct stat *st, 73 struct ucred *cred); 74 static int kqueue_close(struct file *fp); 75 static void kqueue_wakeup(struct kqueue *kq); 76 77 /* 78 * MPSAFE 79 */ 80 static struct fileops kqueueops = { 81 .fo_read = kqueue_read, 82 .fo_write = kqueue_write, 83 .fo_ioctl = kqueue_ioctl, 84 .fo_poll = kqueue_poll, 85 .fo_kqfilter = kqueue_kqfilter, 86 .fo_stat = kqueue_stat, 87 .fo_close = kqueue_close, 88 .fo_shutdown = nofo_shutdown 89 }; 90 91 static void knote_attach(struct knote *kn); 92 static void knote_drop(struct knote *kn); 93 static void knote_enqueue(struct knote *kn); 94 static void knote_dequeue(struct knote *kn); 95 static void knote_init(void); 96 static struct knote *knote_alloc(void); 97 static void knote_free(struct knote *kn); 98 99 static void filt_kqdetach(struct knote *kn); 100 static int filt_kqueue(struct knote *kn, long hint); 101 static int filt_procattach(struct knote *kn); 102 static void filt_procdetach(struct knote *kn); 103 static int filt_proc(struct knote *kn, long hint); 104 static int filt_fileattach(struct knote *kn); 105 static void filt_timerexpire(void *knx); 106 static int filt_timerattach(struct knote *kn); 107 static void filt_timerdetach(struct knote *kn); 108 static int filt_timer(struct knote *kn, long hint); 109 110 static struct filterops file_filtops = 111 { 1, filt_fileattach, NULL, NULL }; 112 static struct filterops kqread_filtops = 113 { 1, NULL, filt_kqdetach, filt_kqueue }; 114 static struct filterops proc_filtops = 115 { 0, filt_procattach, filt_procdetach, filt_proc }; 116 static struct filterops timer_filtops = 117 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 118 119 static vm_zone_t knote_zone; 120 static int kq_ncallouts = 0; 121 static int kq_calloutmax = (4 * 1024); 122 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 123 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 124 125 #define KNOTE_ACTIVATE(kn) do { \ 126 kn->kn_status |= KN_ACTIVE; \ 127 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 128 knote_enqueue(kn); \ 129 } while(0) 130 131 #define KN_HASHSIZE 64 /* XXX should be tunable */ 132 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 133 134 extern struct filterops aio_filtops; 135 extern struct filterops sig_filtops; 136 137 /* 138 * Table for for all system-defined filters. 139 */ 140 static struct filterops *sysfilt_ops[] = { 141 &file_filtops, /* EVFILT_READ */ 142 &file_filtops, /* EVFILT_WRITE */ 143 &aio_filtops, /* EVFILT_AIO */ 144 &file_filtops, /* EVFILT_VNODE */ 145 &proc_filtops, /* EVFILT_PROC */ 146 &sig_filtops, /* EVFILT_SIGNAL */ 147 &timer_filtops, /* EVFILT_TIMER */ 148 }; 149 150 static int 151 filt_fileattach(struct knote *kn) 152 { 153 return (fo_kqfilter(kn->kn_fp, kn)); 154 } 155 156 /* 157 * MPALMOSTSAFE - acquires mplock 158 */ 159 static int 160 kqueue_kqfilter(struct file *fp, struct knote *kn) 161 { 162 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 163 164 get_mplock(); 165 if (kn->kn_filter != EVFILT_READ) { 166 rel_mplock(); 167 return (1); 168 } 169 170 kn->kn_fop = &kqread_filtops; 171 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 172 rel_mplock(); 173 return (0); 174 } 175 176 static void 177 filt_kqdetach(struct knote *kn) 178 { 179 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 180 181 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 182 } 183 184 /*ARGSUSED*/ 185 static int 186 filt_kqueue(struct knote *kn, long hint) 187 { 188 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 189 190 kn->kn_data = kq->kq_count; 191 return (kn->kn_data > 0); 192 } 193 194 static int 195 filt_procattach(struct knote *kn) 196 { 197 struct proc *p; 198 int immediate; 199 200 immediate = 0; 201 p = pfind(kn->kn_id); 202 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 203 p = zpfind(kn->kn_id); 204 immediate = 1; 205 } 206 if (p == NULL) 207 return (ESRCH); 208 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) 209 return (EACCES); 210 211 kn->kn_ptr.p_proc = p; 212 kn->kn_flags |= EV_CLEAR; /* automatically set */ 213 214 /* 215 * internal flag indicating registration done by kernel 216 */ 217 if (kn->kn_flags & EV_FLAG1) { 218 kn->kn_data = kn->kn_sdata; /* ppid */ 219 kn->kn_fflags = NOTE_CHILD; 220 kn->kn_flags &= ~EV_FLAG1; 221 } 222 223 /* XXX lock the proc here while adding to the list? */ 224 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 225 226 /* 227 * Immediately activate any exit notes if the target process is a 228 * zombie. This is necessary to handle the case where the target 229 * process, e.g. a child, dies before the kevent is registered. 230 */ 231 if (immediate && filt_proc(kn, NOTE_EXIT)) 232 KNOTE_ACTIVATE(kn); 233 234 return (0); 235 } 236 237 /* 238 * The knote may be attached to a different process, which may exit, 239 * leaving nothing for the knote to be attached to. So when the process 240 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 241 * it will be deleted when read out. However, as part of the knote deletion, 242 * this routine is called, so a check is needed to avoid actually performing 243 * a detach, because the original process does not exist any more. 244 */ 245 static void 246 filt_procdetach(struct knote *kn) 247 { 248 struct proc *p; 249 250 if (kn->kn_status & KN_DETACHED) 251 return; 252 /* XXX locking? this might modify another process. */ 253 p = kn->kn_ptr.p_proc; 254 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 255 } 256 257 static int 258 filt_proc(struct knote *kn, long hint) 259 { 260 u_int event; 261 262 /* 263 * mask off extra data 264 */ 265 event = (u_int)hint & NOTE_PCTRLMASK; 266 267 /* 268 * if the user is interested in this event, record it. 269 */ 270 if (kn->kn_sfflags & event) 271 kn->kn_fflags |= event; 272 273 /* 274 * Process is gone, so flag the event as finished. Detach the 275 * knote from the process now because the process will be poof, 276 * gone later on. 277 */ 278 if (event == NOTE_EXIT) { 279 struct proc *p = kn->kn_ptr.p_proc; 280 if ((kn->kn_status & KN_DETACHED) == 0) { 281 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 282 kn->kn_status |= KN_DETACHED; 283 kn->kn_data = p->p_xstat; 284 kn->kn_ptr.p_proc = NULL; 285 } 286 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 287 return (1); 288 } 289 290 /* 291 * process forked, and user wants to track the new process, 292 * so attach a new knote to it, and immediately report an 293 * event with the parent's pid. 294 */ 295 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 296 struct kevent kev; 297 int error; 298 299 /* 300 * register knote with new process. 301 */ 302 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 303 kev.filter = kn->kn_filter; 304 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 305 kev.fflags = kn->kn_sfflags; 306 kev.data = kn->kn_id; /* parent */ 307 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 308 error = kqueue_register(kn->kn_kq, &kev); 309 if (error) 310 kn->kn_fflags |= NOTE_TRACKERR; 311 } 312 313 return (kn->kn_fflags != 0); 314 } 315 316 static void 317 filt_timerexpire(void *knx) 318 { 319 struct knote *kn = knx; 320 struct callout *calloutp; 321 struct timeval tv; 322 int tticks; 323 324 kn->kn_data++; 325 KNOTE_ACTIVATE(kn); 326 327 if ((kn->kn_flags & EV_ONESHOT) == 0) { 328 tv.tv_sec = kn->kn_sdata / 1000; 329 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 330 tticks = tvtohz_high(&tv); 331 calloutp = (struct callout *)kn->kn_hook; 332 callout_reset(calloutp, tticks, filt_timerexpire, kn); 333 } 334 } 335 336 /* 337 * data contains amount of time to sleep, in milliseconds 338 */ 339 static int 340 filt_timerattach(struct knote *kn) 341 { 342 struct callout *calloutp; 343 struct timeval tv; 344 int tticks; 345 346 if (kq_ncallouts >= kq_calloutmax) 347 return (ENOMEM); 348 kq_ncallouts++; 349 350 tv.tv_sec = kn->kn_sdata / 1000; 351 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 352 tticks = tvtohz_high(&tv); 353 354 kn->kn_flags |= EV_CLEAR; /* automatically set */ 355 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 356 M_KQUEUE, M_WAITOK); 357 callout_init(calloutp); 358 kn->kn_hook = (caddr_t)calloutp; 359 callout_reset(calloutp, tticks, filt_timerexpire, kn); 360 361 return (0); 362 } 363 364 static void 365 filt_timerdetach(struct knote *kn) 366 { 367 struct callout *calloutp; 368 369 calloutp = (struct callout *)kn->kn_hook; 370 callout_stop(calloutp); 371 FREE(calloutp, M_KQUEUE); 372 kq_ncallouts--; 373 } 374 375 static int 376 filt_timer(struct knote *kn, long hint) 377 { 378 379 return (kn->kn_data != 0); 380 } 381 382 /* 383 * Initialize a kqueue. 384 * 385 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops. 386 * 387 * MPSAFE 388 */ 389 void 390 kqueue_init(struct kqueue *kq, struct filedesc *fdp) 391 { 392 TAILQ_INIT(&kq->kq_knpend); 393 TAILQ_INIT(&kq->kq_knlist); 394 kq->kq_fdp = fdp; 395 } 396 397 /* 398 * Terminate a kqueue. Freeing the actual kq itself is left up to the 399 * caller (it might be embedded in a lwp so we don't do it here). 400 */ 401 void 402 kqueue_terminate(struct kqueue *kq) 403 { 404 struct knote *kn; 405 struct klist *list; 406 int hv; 407 408 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL) { 409 kn->kn_fop->f_detach(kn); 410 if (kn->kn_fop->f_isfd) { 411 list = &kn->kn_fp->f_klist; 412 SLIST_REMOVE(list, kn, knote, kn_link); 413 fdrop(kn->kn_fp); 414 kn->kn_fp = NULL; 415 } else { 416 hv = KN_HASH(kn->kn_id, kq->kq_knhashmask); 417 list = &kq->kq_knhash[hv]; 418 SLIST_REMOVE(list, kn, knote, kn_link); 419 } 420 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 421 if (kn->kn_status & KN_QUEUED) 422 knote_dequeue(kn); 423 knote_free(kn); 424 } 425 426 if (kq->kq_knhash) { 427 kfree(kq->kq_knhash, M_KQUEUE); 428 kq->kq_knhash = NULL; 429 kq->kq_knhashmask = 0; 430 } 431 } 432 433 /* 434 * MPSAFE 435 */ 436 int 437 sys_kqueue(struct kqueue_args *uap) 438 { 439 struct thread *td = curthread; 440 struct kqueue *kq; 441 struct file *fp; 442 int fd, error; 443 444 error = falloc(td->td_lwp, &fp, &fd); 445 if (error) 446 return (error); 447 fp->f_flag = FREAD | FWRITE; 448 fp->f_type = DTYPE_KQUEUE; 449 fp->f_ops = &kqueueops; 450 451 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 452 kqueue_init(kq, td->td_proc->p_fd); 453 fp->f_data = kq; 454 455 fsetfd(kq->kq_fdp, fp, fd); 456 uap->sysmsg_result = fd; 457 fdrop(fp); 458 return (error); 459 } 460 461 /* 462 * MPALMOSTSAFE 463 */ 464 int 465 sys_kevent(struct kevent_args *uap) 466 { 467 struct thread *td = curthread; 468 struct proc *p = td->td_proc; 469 struct kevent *kevp; 470 struct kqueue *kq; 471 struct file *fp = NULL; 472 struct timespec ts; 473 struct timespec *tsp; 474 int i, n, total, nerrors, error; 475 struct kevent kev[KQ_NEVENTS]; 476 477 fp = holdfp(p->p_fd, uap->fd, -1); 478 if (fp == NULL) 479 return (EBADF); 480 if (fp->f_type != DTYPE_KQUEUE) { 481 fdrop(fp); 482 return (EBADF); 483 } 484 485 if (uap->timeout) { 486 error = copyin(uap->timeout, &ts, sizeof(ts)); 487 if (error) 488 goto done; 489 tsp = &ts; 490 } else { 491 tsp = NULL; 492 } 493 494 kq = (struct kqueue *)fp->f_data; 495 nerrors = 0; 496 497 get_mplock(); 498 while (uap->nchanges > 0) { 499 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 500 error = copyin(uap->changelist, kev, n * sizeof(struct kevent)); 501 if (error) 502 goto done; 503 for (i = 0; i < n; i++) { 504 kevp = &kev[i]; 505 kevp->flags &= ~EV_SYSFLAGS; 506 error = kqueue_register(kq, kevp); 507 if (error) { 508 if (uap->nevents != 0) { 509 kevp->flags = EV_ERROR; 510 kevp->data = error; 511 copyout(kevp, uap->eventlist, 512 sizeof(*kevp)); 513 uap->eventlist++; 514 uap->nevents--; 515 nerrors++; 516 } else { 517 goto done; 518 } 519 } 520 } 521 uap->nchanges -= n; 522 uap->changelist += n; 523 } 524 if (nerrors) { 525 uap->sysmsg_result = nerrors; 526 error = 0; 527 goto done; 528 } 529 530 /* 531 * Acquire/wait for events - setup timeout 532 */ 533 if (tsp != NULL) { 534 struct timespec ats; 535 536 if (tsp->tv_sec || tsp->tv_nsec) { 537 nanouptime(&ats); 538 timespecadd(tsp, &ats); /* tsp = target time */ 539 } 540 } 541 542 /* 543 * Loop as required. 544 * 545 * Collect as many events as we can. The timeout on successive 546 * loops is disabled (kqueue_scan() becomes non-blocking). 547 */ 548 total = 0; 549 error = 0; 550 while ((n = uap->nevents - total) > 0) { 551 if (n > KQ_NEVENTS) 552 n = KQ_NEVENTS; 553 i = kqueue_scan(kq, kev, n, tsp, &error); 554 if (i == 0) 555 break; 556 error = copyout(kev, uap->eventlist + total, 557 (size_t)i * sizeof(struct kevent)); 558 total += i; 559 if (error || i != n) 560 break; 561 tsp = &ts; /* successive loops non-blocking */ 562 tsp->tv_sec = 0; 563 tsp->tv_nsec = 0; 564 } 565 uap->sysmsg_result = total; 566 done: 567 rel_mplock(); 568 if (fp != NULL) 569 fdrop(fp); 570 return (error); 571 } 572 573 int 574 kqueue_register(struct kqueue *kq, struct kevent *kev) 575 { 576 struct filedesc *fdp = kq->kq_fdp; 577 struct filterops *fops; 578 struct file *fp = NULL; 579 struct knote *kn = NULL; 580 int error = 0; 581 582 if (kev->filter < 0) { 583 if (kev->filter + EVFILT_SYSCOUNT < 0) 584 return (EINVAL); 585 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 586 } else { 587 /* 588 * XXX 589 * filter attach routine is responsible for insuring that 590 * the identifier can be attached to it. 591 */ 592 kprintf("unknown filter: %d\n", kev->filter); 593 return (EINVAL); 594 } 595 596 if (fops->f_isfd) { 597 /* validate descriptor */ 598 fp = holdfp(fdp, kev->ident, -1); 599 if (fp == NULL) 600 return (EBADF); 601 602 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 603 if (kn->kn_kq == kq && 604 kn->kn_filter == kev->filter && 605 kn->kn_id == kev->ident) { 606 break; 607 } 608 } 609 } else { 610 if (kq->kq_knhashmask) { 611 struct klist *list; 612 613 list = &kq->kq_knhash[ 614 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 615 SLIST_FOREACH(kn, list, kn_link) { 616 if (kn->kn_id == kev->ident && 617 kn->kn_filter == kev->filter) 618 break; 619 } 620 } 621 } 622 623 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 624 error = ENOENT; 625 goto done; 626 } 627 628 /* 629 * kn now contains the matching knote, or NULL if no match 630 */ 631 if (kev->flags & EV_ADD) { 632 if (kn == NULL) { 633 kn = knote_alloc(); 634 if (kn == NULL) { 635 error = ENOMEM; 636 goto done; 637 } 638 kn->kn_fp = fp; 639 kn->kn_kq = kq; 640 kn->kn_fop = fops; 641 642 /* 643 * apply reference count to knote structure, and 644 * do not release it at the end of this routine. 645 */ 646 fp = NULL; 647 648 kn->kn_sfflags = kev->fflags; 649 kn->kn_sdata = kev->data; 650 kev->fflags = 0; 651 kev->data = 0; 652 kn->kn_kevent = *kev; 653 654 knote_attach(kn); 655 if ((error = fops->f_attach(kn)) != 0) { 656 knote_drop(kn); 657 goto done; 658 } 659 } else { 660 /* 661 * The user may change some filter values after the 662 * initial EV_ADD, but doing so will not reset any 663 * filter which have already been triggered. 664 */ 665 kn->kn_sfflags = kev->fflags; 666 kn->kn_sdata = kev->data; 667 kn->kn_kevent.udata = kev->udata; 668 } 669 670 crit_enter(); 671 if (kn->kn_fop->f_event(kn, 0)) 672 KNOTE_ACTIVATE(kn); 673 crit_exit(); 674 } else if (kev->flags & EV_DELETE) { 675 kn->kn_fop->f_detach(kn); 676 knote_drop(kn); 677 goto done; 678 } 679 680 if ((kev->flags & EV_DISABLE) && 681 ((kn->kn_status & KN_DISABLED) == 0)) { 682 crit_enter(); 683 kn->kn_status |= KN_DISABLED; 684 crit_exit(); 685 } 686 687 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 688 crit_enter(); 689 kn->kn_status &= ~KN_DISABLED; 690 if ((kn->kn_status & KN_ACTIVE) && 691 ((kn->kn_status & KN_QUEUED) == 0)) 692 knote_enqueue(kn); 693 crit_exit(); 694 } 695 696 done: 697 if (fp != NULL) 698 fdrop(fp); 699 return (error); 700 } 701 702 /* 703 * Scan the kqueue, blocking if necessary until the target time is reached. 704 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 705 * 0 we do not block at all. 706 */ 707 static int 708 kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 709 struct timespec *tsp, int *errorp) 710 { 711 struct knote *kn, marker; 712 int total; 713 714 total = 0; 715 again: 716 crit_enter(); 717 if (kq->kq_count == 0) { 718 if (tsp == NULL) { 719 kq->kq_state |= KQ_SLEEP; 720 *errorp = tsleep(kq, PCATCH, "kqread", 0); 721 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 722 *errorp = EWOULDBLOCK; 723 } else { 724 struct timespec ats; 725 struct timespec atx = *tsp; 726 int timeout; 727 728 nanouptime(&ats); 729 timespecsub(&atx, &ats); 730 if (ats.tv_sec < 0) { 731 *errorp = EWOULDBLOCK; 732 } else { 733 timeout = atx.tv_sec > 24 * 60 * 60 ? 734 24 * 60 * 60 * hz : tstohz_high(&atx); 735 kq->kq_state |= KQ_SLEEP; 736 *errorp = tsleep(kq, PCATCH, "kqread", timeout); 737 } 738 } 739 crit_exit(); 740 if (*errorp == 0) 741 goto again; 742 /* don't restart after signals... */ 743 if (*errorp == ERESTART) 744 *errorp = EINTR; 745 else if (*errorp == EWOULDBLOCK) 746 *errorp = 0; 747 goto done; 748 } 749 750 /* 751 * Collect events. Continuous mode events may get recycled 752 * past the marker so we stop when we hit it unless no events 753 * have been collected. 754 */ 755 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 756 while (count) { 757 kn = TAILQ_FIRST(&kq->kq_knpend); 758 if (kn == &marker) 759 break; 760 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 761 if (kn->kn_status & KN_DISABLED) { 762 kn->kn_status &= ~KN_QUEUED; 763 kq->kq_count--; 764 continue; 765 } 766 if ((kn->kn_flags & EV_ONESHOT) == 0 && 767 kn->kn_fop->f_event(kn, 0) == 0) { 768 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 769 kq->kq_count--; 770 continue; 771 } 772 *kevp++ = kn->kn_kevent; 773 ++total; 774 --count; 775 776 /* 777 * Post-event action on the note 778 */ 779 if (kn->kn_flags & EV_ONESHOT) { 780 kn->kn_status &= ~KN_QUEUED; 781 kq->kq_count--; 782 crit_exit(); 783 kn->kn_fop->f_detach(kn); 784 knote_drop(kn); 785 crit_enter(); 786 } else if (kn->kn_flags & EV_CLEAR) { 787 kn->kn_data = 0; 788 kn->kn_fflags = 0; 789 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 790 kq->kq_count--; 791 } else { 792 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 793 } 794 } 795 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 796 crit_exit(); 797 if (total == 0) 798 goto again; 799 done: 800 return (total); 801 } 802 803 /* 804 * XXX 805 * This could be expanded to call kqueue_scan, if desired. 806 * 807 * MPSAFE 808 */ 809 static int 810 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 811 { 812 return (ENXIO); 813 } 814 815 /* 816 * MPSAFE 817 */ 818 static int 819 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 820 { 821 return (ENXIO); 822 } 823 824 /* 825 * MPALMOSTSAFE 826 */ 827 static int 828 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 829 struct ucred *cred, struct sysmsg *msg) 830 { 831 struct kqueue *kq; 832 int error; 833 834 get_mplock(); 835 kq = (struct kqueue *)fp->f_data; 836 837 switch(com) { 838 case FIOASYNC: 839 if (*(int *)data) 840 kq->kq_state |= KQ_ASYNC; 841 else 842 kq->kq_state &= ~KQ_ASYNC; 843 error = 0; 844 break; 845 case FIOSETOWN: 846 error = fsetown(*(int *)data, &kq->kq_sigio); 847 break; 848 default: 849 error = ENOTTY; 850 break; 851 } 852 rel_mplock(); 853 return (error); 854 } 855 856 /* 857 * MPALMOSTSAFE - acquires mplock 858 */ 859 static int 860 kqueue_poll(struct file *fp, int events, struct ucred *cred) 861 { 862 struct kqueue *kq = (struct kqueue *)fp->f_data; 863 int revents = 0; 864 865 get_mplock(); 866 crit_enter(); 867 if (events & (POLLIN | POLLRDNORM)) { 868 if (kq->kq_count) { 869 revents |= events & (POLLIN | POLLRDNORM); 870 } else { 871 selrecord(curthread, &kq->kq_sel); 872 kq->kq_state |= KQ_SEL; 873 } 874 } 875 crit_exit(); 876 rel_mplock(); 877 return (revents); 878 } 879 880 /* 881 * MPSAFE 882 */ 883 static int 884 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 885 { 886 struct kqueue *kq = (struct kqueue *)fp->f_data; 887 888 bzero((void *)st, sizeof(*st)); 889 st->st_size = kq->kq_count; 890 st->st_blksize = sizeof(struct kevent); 891 st->st_mode = S_IFIFO; 892 return (0); 893 } 894 895 /* 896 * MPALMOSTSAFE - acquires mplock 897 */ 898 static int 899 kqueue_close(struct file *fp) 900 { 901 struct kqueue *kq = (struct kqueue *)fp->f_data; 902 903 get_mplock(); 904 905 kqueue_terminate(kq); 906 907 fp->f_data = NULL; 908 funsetown(kq->kq_sigio); 909 rel_mplock(); 910 911 kfree(kq, M_KQUEUE); 912 return (0); 913 } 914 915 static void 916 kqueue_wakeup(struct kqueue *kq) 917 { 918 if (kq->kq_state & KQ_SLEEP) { 919 kq->kq_state &= ~KQ_SLEEP; 920 wakeup(kq); 921 } 922 if (kq->kq_state & KQ_SEL) { 923 kq->kq_state &= ~KQ_SEL; 924 selwakeup(&kq->kq_sel); 925 } 926 KNOTE(&kq->kq_sel.si_note, 0); 927 } 928 929 /* 930 * walk down a list of knotes, activating them if their event has triggered. 931 */ 932 void 933 knote(struct klist *list, long hint) 934 { 935 struct knote *kn; 936 937 SLIST_FOREACH(kn, list, kn_selnext) 938 if (kn->kn_fop->f_event(kn, hint)) 939 KNOTE_ACTIVATE(kn); 940 } 941 942 /* 943 * remove all knotes from a specified klist 944 */ 945 void 946 knote_remove(struct klist *list) 947 { 948 struct knote *kn; 949 950 while ((kn = SLIST_FIRST(list)) != NULL) { 951 kn->kn_fop->f_detach(kn); 952 knote_drop(kn); 953 } 954 } 955 956 /* 957 * remove all knotes referencing a specified fd 958 */ 959 void 960 knote_fdclose(struct file *fp, struct filedesc *fdp, int fd) 961 { 962 struct knote *kn; 963 964 restart: 965 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 966 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) { 967 kn->kn_fop->f_detach(kn); 968 knote_drop(kn); 969 goto restart; 970 } 971 } 972 } 973 974 static void 975 knote_attach(struct knote *kn) 976 { 977 struct klist *list; 978 struct kqueue *kq = kn->kn_kq; 979 980 if (kn->kn_fop->f_isfd) { 981 KKASSERT(kn->kn_fp); 982 list = &kn->kn_fp->f_klist; 983 } else { 984 if (kq->kq_knhashmask == 0) 985 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 986 &kq->kq_knhashmask); 987 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 988 } 989 SLIST_INSERT_HEAD(list, kn, kn_link); 990 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink); 991 kn->kn_status = 0; 992 } 993 994 /* 995 * should be called outside of a critical section, since we don't want to 996 * hold a critical section while calling fdrop and free. 997 */ 998 static void 999 knote_drop(struct knote *kn) 1000 { 1001 struct kqueue *kq; 1002 struct klist *list; 1003 1004 kq = kn->kn_kq; 1005 1006 if (kn->kn_fop->f_isfd) 1007 list = &kn->kn_fp->f_klist; 1008 else 1009 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1010 1011 SLIST_REMOVE(list, kn, knote, kn_link); 1012 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 1013 if (kn->kn_status & KN_QUEUED) 1014 knote_dequeue(kn); 1015 if (kn->kn_fop->f_isfd) 1016 fdrop(kn->kn_fp); 1017 knote_free(kn); 1018 } 1019 1020 1021 static void 1022 knote_enqueue(struct knote *kn) 1023 { 1024 struct kqueue *kq = kn->kn_kq; 1025 1026 crit_enter(); 1027 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1028 1029 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1030 kn->kn_status |= KN_QUEUED; 1031 ++kq->kq_count; 1032 1033 /* 1034 * Send SIGIO on request (typically set up as a mailbox signal) 1035 */ 1036 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1037 pgsigio(kq->kq_sigio, SIGIO, 0); 1038 crit_exit(); 1039 kqueue_wakeup(kq); 1040 } 1041 1042 static void 1043 knote_dequeue(struct knote *kn) 1044 { 1045 struct kqueue *kq = kn->kn_kq; 1046 1047 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1048 crit_enter(); 1049 1050 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1051 kn->kn_status &= ~KN_QUEUED; 1052 kq->kq_count--; 1053 crit_exit(); 1054 } 1055 1056 static void 1057 knote_init(void) 1058 { 1059 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1060 } 1061 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1062 1063 static struct knote * 1064 knote_alloc(void) 1065 { 1066 return ((struct knote *)zalloc(knote_zone)); 1067 } 1068 1069 static void 1070 knote_free(struct knote *kn) 1071 { 1072 zfree(knote_zone, kn); 1073 } 1074