1 /* $OpenBSD: kern_event.c,v 1.34 2009/06/02 11:04:55 guenther Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/proc.h> 35 #include <sys/malloc.h> 36 #include <sys/unistd.h> 37 #include <sys/file.h> 38 #include <sys/filedesc.h> 39 #include <sys/fcntl.h> 40 #include <sys/selinfo.h> 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <sys/eventvar.h> 44 #include <sys/pool.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/stat.h> 49 #include <sys/uio.h> 50 #include <sys/mount.h> 51 #include <sys/poll.h> 52 #include <sys/syscallargs.h> 53 #include <sys/timeout.h> 54 55 int kqueue_scan(struct file *fp, int maxevents, 56 struct kevent *ulistp, const struct timespec *timeout, 57 struct proc *p, int *retval); 58 59 int kqueue_read(struct file *fp, off_t *poff, struct uio *uio, 60 struct ucred *cred); 61 int kqueue_write(struct file *fp, off_t *poff, struct uio *uio, 62 struct ucred *cred); 63 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 64 struct proc *p); 65 int kqueue_poll(struct file *fp, int events, struct proc *p); 66 int kqueue_kqfilter(struct file *fp, struct knote *kn); 67 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 68 int kqueue_close(struct file *fp, struct proc *p); 69 void kqueue_wakeup(struct kqueue *kq); 70 71 struct fileops kqueueops = { 72 kqueue_read, 73 kqueue_write, 74 kqueue_ioctl, 75 kqueue_poll, 76 kqueue_kqfilter, 77 kqueue_stat, 78 kqueue_close 79 }; 80 81 void knote_attach(struct knote *kn, struct filedesc *fdp); 82 void knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp); 83 void knote_enqueue(struct knote *kn); 84 void knote_dequeue(struct knote *kn); 85 #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK)) 86 #define knote_free(kn) pool_put(&knote_pool, (kn)) 87 88 void filt_kqdetach(struct knote *kn); 89 int filt_kqueue(struct knote *kn, long hint); 90 int filt_procattach(struct knote *kn); 91 void filt_procdetach(struct knote *kn); 92 int filt_proc(struct knote *kn, long hint); 93 int filt_fileattach(struct knote *kn); 94 void filt_timerexpire(void *knx); 95 int filt_timerattach(struct knote *kn); 96 void filt_timerdetach(struct knote *kn); 97 int filt_timer(struct knote *kn, long hint); 98 99 struct filterops kqread_filtops = 100 { 1, NULL, filt_kqdetach, filt_kqueue }; 101 struct filterops proc_filtops = 102 { 0, filt_procattach, filt_procdetach, filt_proc }; 103 struct filterops file_filtops = 104 { 1, filt_fileattach, NULL, NULL }; 105 struct filterops timer_filtops = 106 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 107 108 struct pool knote_pool; 109 struct pool kqueue_pool; 110 int kq_ntimeouts = 0; 111 int kq_timeoutmax = (4 * 1024); 112 113 #define KNOTE_ACTIVATE(kn) do { \ 114 kn->kn_status |= KN_ACTIVE; \ 115 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 116 knote_enqueue(kn); \ 117 } while(0) 118 119 #define KN_HASHSIZE 64 /* XXX should be tunable */ 120 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 121 122 extern struct filterops sig_filtops; 123 #ifdef notyet 124 extern struct filterops aio_filtops; 125 #endif 126 127 /* 128 * Table for for all system-defined filters. 129 */ 130 struct filterops *sysfilt_ops[] = { 131 &file_filtops, /* EVFILT_READ */ 132 &file_filtops, /* EVFILT_WRITE */ 133 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 134 &file_filtops, /* EVFILT_VNODE */ 135 &proc_filtops, /* EVFILT_PROC */ 136 &sig_filtops, /* EVFILT_SIGNAL */ 137 &timer_filtops, /* EVFILT_TIMER */ 138 }; 139 140 void kqueue_init(void); 141 142 void 143 kqueue_init(void) 144 { 145 146 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, 0, 0, "kqueuepl", 147 &pool_allocator_nointr); 148 pool_init(&knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl", 149 &pool_allocator_nointr); 150 } 151 152 int 153 filt_fileattach(struct knote *kn) 154 { 155 struct file *fp = kn->kn_fp; 156 157 return ((*fp->f_ops->fo_kqfilter)(fp, kn)); 158 } 159 160 int 161 kqueue_kqfilter(struct file *fp, struct knote *kn) 162 { 163 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 164 165 if (kn->kn_filter != EVFILT_READ) 166 return (1); 167 168 kn->kn_fop = &kqread_filtops; 169 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 170 return (0); 171 } 172 173 void 174 filt_kqdetach(struct knote *kn) 175 { 176 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 177 178 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 179 } 180 181 /*ARGSUSED*/ 182 int 183 filt_kqueue(struct knote *kn, long hint) 184 { 185 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 186 187 kn->kn_data = kq->kq_count; 188 return (kn->kn_data > 0); 189 } 190 191 int 192 filt_procattach(struct knote *kn) 193 { 194 struct proc *p; 195 196 p = pfind(kn->kn_id); 197 if (p == NULL) 198 return (ESRCH); 199 200 /* 201 * Fail if it's not owned by you, or the last exec gave us 202 * setuid/setgid privs (unless you're root). 203 */ 204 if (p->p_p != curproc->p_p && 205 (p->p_cred->p_ruid != curproc->p_cred->p_ruid || 206 (p->p_flag & P_SUGID)) && suser(curproc, 0) != 0) 207 return (EACCES); 208 209 kn->kn_ptr.p_proc = p; 210 kn->kn_flags |= EV_CLEAR; /* automatically set */ 211 212 /* 213 * internal flag indicating registration done by kernel 214 */ 215 if (kn->kn_flags & EV_FLAG1) { 216 kn->kn_data = kn->kn_sdata; /* ppid */ 217 kn->kn_fflags = NOTE_CHILD; 218 kn->kn_flags &= ~EV_FLAG1; 219 } 220 221 /* XXX lock the proc here while adding to the list? */ 222 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 223 224 return (0); 225 } 226 227 /* 228 * The knote may be attached to a different process, which may exit, 229 * leaving nothing for the knote to be attached to. So when the process 230 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 231 * it will be deleted when read out. However, as part of the knote deletion, 232 * this routine is called, so a check is needed to avoid actually performing 233 * a detach, because the original process does not exist any more. 234 */ 235 void 236 filt_procdetach(struct knote *kn) 237 { 238 struct proc *p = kn->kn_ptr.p_proc; 239 240 if (kn->kn_status & KN_DETACHED) 241 return; 242 243 /* XXX locking? this might modify another process. */ 244 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 245 } 246 247 int 248 filt_proc(struct knote *kn, long hint) 249 { 250 u_int event; 251 252 /* 253 * mask off extra data 254 */ 255 event = (u_int)hint & NOTE_PCTRLMASK; 256 257 /* 258 * if the user is interested in this event, record it. 259 */ 260 if (kn->kn_sfflags & event) 261 kn->kn_fflags |= event; 262 263 /* 264 * process is gone, so flag the event as finished. 265 */ 266 if (event == NOTE_EXIT) { 267 kn->kn_status |= KN_DETACHED; 268 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 269 return (1); 270 } 271 272 /* 273 * process forked, and user wants to track the new process, 274 * so attach a new knote to it, and immediately report an 275 * event with the parent's pid. 276 */ 277 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 278 struct kevent kev; 279 int error; 280 281 /* 282 * register knote with new process. 283 */ 284 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 285 kev.filter = kn->kn_filter; 286 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 287 kev.fflags = kn->kn_sfflags; 288 kev.data = kn->kn_id; /* parent */ 289 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 290 error = kqueue_register(kn->kn_kq, &kev, NULL); 291 if (error) 292 kn->kn_fflags |= NOTE_TRACKERR; 293 } 294 295 return (kn->kn_fflags != 0); 296 } 297 298 void 299 filt_timerexpire(void *knx) 300 { 301 struct knote *kn = knx; 302 struct timeval tv; 303 int tticks; 304 305 kn->kn_data++; 306 KNOTE_ACTIVATE(kn); 307 308 if ((kn->kn_flags & EV_ONESHOT) == 0) { 309 tv.tv_sec = kn->kn_sdata / 1000; 310 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 311 tticks = tvtohz(&tv); 312 timeout_add((struct timeout *)kn->kn_hook, tticks); 313 } 314 } 315 316 317 /* 318 * data contains amount of time to sleep, in milliseconds 319 */ 320 int 321 filt_timerattach(struct knote *kn) 322 { 323 struct timeout *to; 324 struct timeval tv; 325 int tticks; 326 327 if (kq_ntimeouts > kq_timeoutmax) 328 return (ENOMEM); 329 kq_ntimeouts++; 330 331 tv.tv_sec = kn->kn_sdata / 1000; 332 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 333 tticks = tvtohz(&tv); 334 335 kn->kn_flags |= EV_CLEAR; /* automatically set */ 336 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 337 timeout_set(to, filt_timerexpire, kn); 338 timeout_add(to, tticks); 339 kn->kn_hook = to; 340 341 return (0); 342 } 343 344 void 345 filt_timerdetach(struct knote *kn) 346 { 347 struct timeout *to; 348 349 to = (struct timeout *)kn->kn_hook; 350 timeout_del(to); 351 free(to, M_KEVENT); 352 kq_ntimeouts--; 353 } 354 355 int 356 filt_timer(struct knote *kn, long hint) 357 { 358 return (kn->kn_data != 0); 359 } 360 361 362 /* 363 * filt_seltrue: 364 * 365 * This filter "event" routine simulates seltrue(). 366 */ 367 int 368 filt_seltrue(struct knote *kn, long hint) 369 { 370 371 /* 372 * We don't know how much data can be read/written, 373 * but we know that it *can* be. This is about as 374 * good as select/poll does as well. 375 */ 376 kn->kn_data = 0; 377 return (1); 378 } 379 380 int 381 sys_kqueue(struct proc *p, void *v, register_t *retval) 382 { 383 struct filedesc *fdp = p->p_fd; 384 struct kqueue *kq; 385 struct file *fp; 386 int fd, error; 387 388 error = falloc(p, &fp, &fd); 389 if (error) 390 return (error); 391 fp->f_flag = FREAD | FWRITE; 392 fp->f_type = DTYPE_KQUEUE; 393 fp->f_ops = &kqueueops; 394 kq = pool_get(&kqueue_pool, PR_WAITOK|PR_ZERO); 395 TAILQ_INIT(&kq->kq_head); 396 fp->f_data = (caddr_t)kq; 397 *retval = fd; 398 if (fdp->fd_knlistsize < 0) 399 fdp->fd_knlistsize = 0; /* this process has a kq */ 400 kq->kq_fdp = fdp; 401 FILE_SET_MATURE(fp); 402 return (0); 403 } 404 405 int 406 sys_kevent(struct proc *p, void *v, register_t *retval) 407 { 408 struct filedesc* fdp = p->p_fd; 409 struct sys_kevent_args /* { 410 syscallarg(int) fd; 411 syscallarg(const struct kevent *) changelist; 412 syscallarg(int) nchanges; 413 syscallarg(struct kevent *) eventlist; 414 syscallarg(int) nevents; 415 syscallarg(const struct timespec *) timeout; 416 } */ *uap = v; 417 struct kevent *kevp; 418 struct kqueue *kq; 419 struct file *fp; 420 struct timespec ts; 421 int i, n, nerrors, error; 422 423 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL || 424 (fp->f_type != DTYPE_KQUEUE)) 425 return (EBADF); 426 427 FREF(fp); 428 429 if (SCARG(uap, timeout) != NULL) { 430 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 431 if (error) 432 goto done; 433 SCARG(uap, timeout) = &ts; 434 } 435 436 kq = (struct kqueue *)fp->f_data; 437 nerrors = 0; 438 439 while (SCARG(uap, nchanges) > 0) { 440 n = SCARG(uap, nchanges) > KQ_NEVENTS 441 ? KQ_NEVENTS : SCARG(uap, nchanges); 442 error = copyin(SCARG(uap, changelist), kq->kq_kev, 443 n * sizeof(struct kevent)); 444 if (error) 445 goto done; 446 for (i = 0; i < n; i++) { 447 kevp = &kq->kq_kev[i]; 448 kevp->flags &= ~EV_SYSFLAGS; 449 error = kqueue_register(kq, kevp, p); 450 if (error) { 451 if (SCARG(uap, nevents) != 0) { 452 kevp->flags = EV_ERROR; 453 kevp->data = error; 454 (void) copyout((caddr_t)kevp, 455 (caddr_t)SCARG(uap, eventlist), 456 sizeof(*kevp)); 457 SCARG(uap, eventlist)++; 458 SCARG(uap, nevents)--; 459 nerrors++; 460 } else { 461 goto done; 462 } 463 } 464 } 465 SCARG(uap, nchanges) -= n; 466 SCARG(uap, changelist) += n; 467 } 468 if (nerrors) { 469 *retval = nerrors; 470 error = 0; 471 goto done; 472 } 473 474 error = kqueue_scan(fp, SCARG(uap, nevents), SCARG(uap, eventlist), 475 SCARG(uap, timeout), p, &n); 476 *retval = n; 477 done: 478 FRELE(fp); 479 return (error); 480 } 481 482 int 483 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 484 { 485 struct filedesc *fdp = kq->kq_fdp; 486 struct filterops *fops = NULL; 487 struct file *fp = NULL; 488 struct knote *kn = NULL; 489 int s, error = 0; 490 491 if (kev->filter < 0) { 492 if (kev->filter + EVFILT_SYSCOUNT < 0) 493 return (EINVAL); 494 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 495 } 496 497 if (fops == NULL) { 498 /* 499 * XXX 500 * filter attach routine is responsible for ensuring that 501 * the identifier can be attached to it. 502 */ 503 return (EINVAL); 504 } 505 506 if (fops->f_isfd) { 507 /* validate descriptor */ 508 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) 509 return (EBADF); 510 FREF(fp); 511 fp->f_count++; 512 513 if (kev->ident < fdp->fd_knlistsize) { 514 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 515 if (kq == kn->kn_kq && 516 kev->filter == kn->kn_filter) 517 break; 518 } 519 } else { 520 if (fdp->fd_knhashmask != 0) { 521 struct klist *list; 522 523 list = &fdp->fd_knhash[ 524 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 525 SLIST_FOREACH(kn, list, kn_link) 526 if (kev->ident == kn->kn_id && 527 kq == kn->kn_kq && 528 kev->filter == kn->kn_filter) 529 break; 530 } 531 } 532 533 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 534 error = ENOENT; 535 goto done; 536 } 537 538 /* 539 * kn now contains the matching knote, or NULL if no match 540 */ 541 if (kev->flags & EV_ADD) { 542 543 if (kn == NULL) { 544 kn = knote_alloc(); 545 if (kn == NULL) { 546 error = ENOMEM; 547 goto done; 548 } 549 kn->kn_fp = fp; 550 kn->kn_kq = kq; 551 kn->kn_fop = fops; 552 553 /* 554 * apply reference count to knote structure, and 555 * do not release it at the end of this routine. 556 */ 557 if (fp != NULL) 558 FRELE(fp); 559 fp = NULL; 560 561 kn->kn_sfflags = kev->fflags; 562 kn->kn_sdata = kev->data; 563 kev->fflags = 0; 564 kev->data = 0; 565 kn->kn_kevent = *kev; 566 567 knote_attach(kn, fdp); 568 if ((error = fops->f_attach(kn)) != 0) { 569 knote_drop(kn, p, fdp); 570 goto done; 571 } 572 } else { 573 /* 574 * The user may change some filter values after the 575 * initial EV_ADD, but doing so will not reset any 576 * filters which have already been triggered. 577 */ 578 kn->kn_sfflags = kev->fflags; 579 kn->kn_sdata = kev->data; 580 kn->kn_kevent.udata = kev->udata; 581 } 582 583 s = splhigh(); 584 if (kn->kn_fop->f_event(kn, 0)) 585 KNOTE_ACTIVATE(kn); 586 splx(s); 587 588 } else if (kev->flags & EV_DELETE) { 589 kn->kn_fop->f_detach(kn); 590 knote_drop(kn, p, p->p_fd); 591 goto done; 592 } 593 594 if ((kev->flags & EV_DISABLE) && 595 ((kn->kn_status & KN_DISABLED) == 0)) { 596 s = splhigh(); 597 kn->kn_status |= KN_DISABLED; 598 splx(s); 599 } 600 601 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 602 s = splhigh(); 603 kn->kn_status &= ~KN_DISABLED; 604 if ((kn->kn_status & KN_ACTIVE) && 605 ((kn->kn_status & KN_QUEUED) == 0)) 606 knote_enqueue(kn); 607 splx(s); 608 } 609 610 done: 611 if (fp != NULL) 612 closef(fp, p); 613 return (error); 614 } 615 616 int 617 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 618 const struct timespec *tsp, struct proc *p, int *retval) 619 { 620 struct kqueue *kq = (struct kqueue *)fp->f_data; 621 struct kevent *kevp; 622 struct timeval atv, rtv, ttv; 623 struct knote *kn, marker; 624 int s, count, timeout, nkev = 0, error = 0; 625 626 count = maxevents; 627 if (count == 0) 628 goto done; 629 630 if (tsp != NULL) { 631 TIMESPEC_TO_TIMEVAL(&atv, tsp); 632 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 633 /* No timeout, just poll */ 634 timeout = -1; 635 goto start; 636 } 637 if (itimerfix(&atv)) { 638 error = EINVAL; 639 goto done; 640 } 641 642 timeout = atv.tv_sec > 24 * 60 * 60 ? 643 24 * 60 * 60 * hz : tvtohz(&atv); 644 645 getmicrouptime(&rtv); 646 timeradd(&atv, &rtv, &atv); 647 } else { 648 atv.tv_sec = 0; 649 atv.tv_usec = 0; 650 timeout = 0; 651 } 652 goto start; 653 654 retry: 655 if (atv.tv_sec || atv.tv_usec) { 656 getmicrouptime(&rtv); 657 if (timercmp(&rtv, &atv, >=)) 658 goto done; 659 ttv = atv; 660 timersub(&ttv, &rtv, &ttv); 661 timeout = ttv.tv_sec > 24 * 60 * 60 ? 662 24 * 60 * 60 * hz : tvtohz(&ttv); 663 } 664 665 start: 666 kevp = kq->kq_kev; 667 s = splhigh(); 668 if (kq->kq_count == 0) { 669 if (timeout < 0) { 670 error = EWOULDBLOCK; 671 } else { 672 kq->kq_state |= KQ_SLEEP; 673 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 674 } 675 splx(s); 676 if (error == 0) 677 goto retry; 678 /* don't restart after signals... */ 679 if (error == ERESTART) 680 error = EINTR; 681 else if (error == EWOULDBLOCK) 682 error = 0; 683 goto done; 684 } 685 686 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 687 while (count) { 688 kn = TAILQ_FIRST(&kq->kq_head); 689 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 690 if (kn == &marker) { 691 splx(s); 692 if (count == maxevents) 693 goto retry; 694 goto done; 695 } 696 if (kn->kn_status & KN_DISABLED) { 697 kn->kn_status &= ~KN_QUEUED; 698 kq->kq_count--; 699 continue; 700 } 701 if ((kn->kn_flags & EV_ONESHOT) == 0 && 702 kn->kn_fop->f_event(kn, 0) == 0) { 703 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 704 kq->kq_count--; 705 continue; 706 } 707 *kevp = kn->kn_kevent; 708 kevp++; 709 nkev++; 710 if (kn->kn_flags & EV_ONESHOT) { 711 kn->kn_status &= ~KN_QUEUED; 712 kq->kq_count--; 713 splx(s); 714 kn->kn_fop->f_detach(kn); 715 knote_drop(kn, p, p->p_fd); 716 s = splhigh(); 717 } else if (kn->kn_flags & EV_CLEAR) { 718 kn->kn_data = 0; 719 kn->kn_fflags = 0; 720 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 721 kq->kq_count--; 722 } else { 723 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 724 } 725 count--; 726 if (nkev == KQ_NEVENTS) { 727 splx(s); 728 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 729 sizeof(struct kevent) * nkev); 730 ulistp += nkev; 731 nkev = 0; 732 kevp = kq->kq_kev; 733 s = splhigh(); 734 if (error) 735 break; 736 } 737 } 738 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 739 splx(s); 740 done: 741 if (nkev != 0) 742 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 743 sizeof(struct kevent) * nkev); 744 *retval = maxevents - count; 745 return (error); 746 } 747 748 /* 749 * XXX 750 * This could be expanded to call kqueue_scan, if desired. 751 */ 752 /*ARGSUSED*/ 753 int 754 kqueue_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) 755 { 756 return (ENXIO); 757 } 758 759 /*ARGSUSED*/ 760 int 761 kqueue_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) 762 763 { 764 return (ENXIO); 765 } 766 767 /*ARGSUSED*/ 768 int 769 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 770 { 771 return (ENOTTY); 772 } 773 774 /*ARGSUSED*/ 775 int 776 kqueue_poll(struct file *fp, int events, struct proc *p) 777 { 778 struct kqueue *kq = (struct kqueue *)fp->f_data; 779 int revents = 0; 780 int s = splhigh(); 781 782 if (events & (POLLIN | POLLRDNORM)) { 783 if (kq->kq_count) { 784 revents |= events & (POLLIN | POLLRDNORM); 785 } else { 786 selrecord(p, &kq->kq_sel); 787 kq->kq_state |= KQ_SEL; 788 } 789 } 790 splx(s); 791 return (revents); 792 } 793 794 /*ARGSUSED*/ 795 int 796 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 797 { 798 struct kqueue *kq = (struct kqueue *)fp->f_data; 799 800 bzero((void *)st, sizeof(*st)); 801 st->st_size = kq->kq_count; 802 st->st_blksize = sizeof(struct kevent); 803 st->st_mode = S_IFIFO; 804 return (0); 805 } 806 807 /*ARGSUSED*/ 808 int 809 kqueue_close(struct file *fp, struct proc *p) 810 { 811 struct kqueue *kq = (struct kqueue *)fp->f_data; 812 struct filedesc *fdp = p->p_fd; 813 struct knote **knp, *kn, *kn0; 814 int i; 815 816 for (i = 0; i < fdp->fd_knlistsize; i++) { 817 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 818 kn = *knp; 819 while (kn != NULL) { 820 kn0 = SLIST_NEXT(kn, kn_link); 821 if (kq == kn->kn_kq) { 822 FREF(kn->kn_fp); 823 kn->kn_fop->f_detach(kn); 824 closef(kn->kn_fp, p); 825 knote_free(kn); 826 *knp = kn0; 827 } else { 828 knp = &SLIST_NEXT(kn, kn_link); 829 } 830 kn = kn0; 831 } 832 } 833 if (fdp->fd_knhashmask != 0) { 834 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 835 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 836 kn = *knp; 837 while (kn != NULL) { 838 kn0 = SLIST_NEXT(kn, kn_link); 839 if (kq == kn->kn_kq) { 840 kn->kn_fop->f_detach(kn); 841 /* XXX non-fd release of kn->kn_ptr */ 842 knote_free(kn); 843 *knp = kn0; 844 } else { 845 knp = &SLIST_NEXT(kn, kn_link); 846 } 847 kn = kn0; 848 } 849 } 850 } 851 pool_put(&kqueue_pool, kq); 852 fp->f_data = NULL; 853 854 return (0); 855 } 856 857 void 858 kqueue_wakeup(struct kqueue *kq) 859 { 860 861 if (kq->kq_state & KQ_SLEEP) { 862 kq->kq_state &= ~KQ_SLEEP; 863 wakeup(kq); 864 } 865 if (kq->kq_state & KQ_SEL) { 866 kq->kq_state &= ~KQ_SEL; 867 selwakeup(&kq->kq_sel); 868 } 869 KNOTE(&kq->kq_sel.si_note, 0); 870 } 871 872 /* 873 * walk down a list of knotes, activating them if their event has triggered. 874 */ 875 void 876 knote(struct klist *list, long hint) 877 { 878 struct knote *kn; 879 880 SLIST_FOREACH(kn, list, kn_selnext) 881 if (kn->kn_fop->f_event(kn, hint)) 882 KNOTE_ACTIVATE(kn); 883 } 884 885 /* 886 * remove all knotes from a specified klist 887 */ 888 void 889 knote_remove(struct proc *p, struct klist *list) 890 { 891 struct knote *kn; 892 893 while ((kn = SLIST_FIRST(list)) != NULL) { 894 kn->kn_fop->f_detach(kn); 895 knote_drop(kn, p, p->p_fd); 896 } 897 } 898 899 /* 900 * remove all knotes referencing a specified fd 901 */ 902 void 903 knote_fdclose(struct proc *p, int fd) 904 { 905 struct filedesc *fdp = p->p_fd; 906 struct klist *list = &fdp->fd_knlist[fd]; 907 908 knote_remove(p, list); 909 } 910 911 void 912 knote_attach(struct knote *kn, struct filedesc *fdp) 913 { 914 struct klist *list; 915 int size; 916 917 if (! kn->kn_fop->f_isfd) { 918 if (fdp->fd_knhashmask == 0) 919 fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP, 920 M_WAITOK, &fdp->fd_knhashmask); 921 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 922 goto done; 923 } 924 925 if (fdp->fd_knlistsize <= kn->kn_id) { 926 size = fdp->fd_knlistsize; 927 while (size <= kn->kn_id) 928 size += KQEXTENT; 929 list = malloc(size * sizeof(struct klist *), M_TEMP, M_WAITOK); 930 bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, 931 fdp->fd_knlistsize * sizeof(struct klist *)); 932 bzero((caddr_t)list + 933 fdp->fd_knlistsize * sizeof(struct klist *), 934 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 935 if (fdp->fd_knlist != NULL) 936 free(fdp->fd_knlist, M_TEMP); 937 fdp->fd_knlistsize = size; 938 fdp->fd_knlist = list; 939 } 940 list = &fdp->fd_knlist[kn->kn_id]; 941 done: 942 SLIST_INSERT_HEAD(list, kn, kn_link); 943 kn->kn_status = 0; 944 } 945 946 /* 947 * should be called at spl == 0, since we don't want to hold spl 948 * while calling closef and free. 949 */ 950 void 951 knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp) 952 { 953 struct klist *list; 954 955 if (kn->kn_fop->f_isfd) 956 list = &fdp->fd_knlist[kn->kn_id]; 957 else 958 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 959 960 SLIST_REMOVE(list, kn, knote, kn_link); 961 if (kn->kn_status & KN_QUEUED) 962 knote_dequeue(kn); 963 if (kn->kn_fop->f_isfd) { 964 FREF(kn->kn_fp); 965 closef(kn->kn_fp, p); 966 } 967 knote_free(kn); 968 } 969 970 971 void 972 knote_enqueue(struct knote *kn) 973 { 974 struct kqueue *kq = kn->kn_kq; 975 int s = splhigh(); 976 977 KASSERT((kn->kn_status & KN_QUEUED) == 0); 978 979 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 980 kn->kn_status |= KN_QUEUED; 981 kq->kq_count++; 982 splx(s); 983 kqueue_wakeup(kq); 984 } 985 986 void 987 knote_dequeue(struct knote *kn) 988 { 989 struct kqueue *kq = kn->kn_kq; 990 int s = splhigh(); 991 992 KASSERT(kn->kn_status & KN_QUEUED); 993 994 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 995 kn->kn_status &= ~KN_QUEUED; 996 kq->kq_count--; 997 splx(s); 998 } 999 1000 void 1001 klist_invalidate(struct klist *list) 1002 { 1003 struct knote *kn; 1004 1005 SLIST_FOREACH(kn, list, kn_selnext) { 1006 kn->kn_status |= KN_DETACHED; 1007 kn->kn_flags |= EV_EOF | EV_ONESHOT; 1008 } 1009 } 1010