1 /* $OpenBSD: kern_event.c,v 1.81 2017/10/11 08:06:56 mpi Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/proc.h> 35 #include <sys/pledge.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/selinfo.h> 42 #include <sys/queue.h> 43 #include <sys/event.h> 44 #include <sys/eventvar.h> 45 #include <sys/ktrace.h> 46 #include <sys/pool.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/stat.h> 51 #include <sys/uio.h> 52 #include <sys/mount.h> 53 #include <sys/poll.h> 54 #include <sys/syscallargs.h> 55 #include <sys/timeout.h> 56 57 int kqueue_scan(struct kqueue *kq, int maxevents, 58 struct kevent *ulistp, const struct timespec *timeout, 59 struct proc *p, int *retval); 60 61 int kqueue_read(struct file *fp, off_t *poff, struct uio *uio, 62 struct ucred *cred); 63 int kqueue_write(struct file *fp, off_t *poff, struct uio *uio, 64 struct ucred *cred); 65 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 66 struct proc *p); 67 int kqueue_poll(struct file *fp, int events, struct proc *p); 68 int kqueue_kqfilter(struct file *fp, struct knote *kn); 69 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 70 int kqueue_close(struct file *fp, struct proc *p); 71 void kqueue_wakeup(struct kqueue *kq); 72 73 struct fileops kqueueops = { 74 kqueue_read, 75 kqueue_write, 76 kqueue_ioctl, 77 kqueue_poll, 78 kqueue_kqfilter, 79 kqueue_stat, 80 kqueue_close 81 }; 82 83 void knote_attach(struct knote *kn, struct filedesc *fdp); 84 void knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp); 85 void knote_enqueue(struct knote *kn); 86 void knote_dequeue(struct knote *kn); 87 #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK)) 88 #define knote_free(kn) pool_put(&knote_pool, (kn)) 89 90 void filt_kqdetach(struct knote *kn); 91 int filt_kqueue(struct knote *kn, long hint); 92 int filt_procattach(struct knote *kn); 93 void filt_procdetach(struct knote *kn); 94 int filt_proc(struct knote *kn, long hint); 95 int filt_fileattach(struct knote *kn); 96 void filt_timerexpire(void *knx); 97 int filt_timerattach(struct knote *kn); 98 void filt_timerdetach(struct knote *kn); 99 int filt_timer(struct knote *kn, long hint); 100 void filt_seltruedetach(struct knote *kn); 101 102 struct filterops kqread_filtops = 103 { 1, NULL, filt_kqdetach, filt_kqueue }; 104 struct filterops proc_filtops = 105 { 0, filt_procattach, filt_procdetach, filt_proc }; 106 struct filterops file_filtops = 107 { 1, filt_fileattach, NULL, NULL }; 108 struct filterops timer_filtops = 109 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 110 111 struct pool knote_pool; 112 struct pool kqueue_pool; 113 int kq_ntimeouts = 0; 114 int kq_timeoutmax = (4 * 1024); 115 116 #define KNOTE_ACTIVATE(kn) do { \ 117 kn->kn_status |= KN_ACTIVE; \ 118 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 119 knote_enqueue(kn); \ 120 } while(0) 121 122 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 123 124 extern struct filterops sig_filtops; 125 #ifdef notyet 126 extern struct filterops aio_filtops; 127 #endif 128 129 /* 130 * Table for for all system-defined filters. 131 */ 132 struct filterops *sysfilt_ops[] = { 133 &file_filtops, /* EVFILT_READ */ 134 &file_filtops, /* EVFILT_WRITE */ 135 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 136 &file_filtops, /* EVFILT_VNODE */ 137 &proc_filtops, /* EVFILT_PROC */ 138 &sig_filtops, /* EVFILT_SIGNAL */ 139 &timer_filtops, /* EVFILT_TIMER */ 140 }; 141 142 void KQREF(struct kqueue *); 143 void KQRELE(struct kqueue *); 144 145 void 146 KQREF(struct kqueue *kq) 147 { 148 ++kq->kq_refs; 149 } 150 151 void 152 KQRELE(struct kqueue *kq) 153 { 154 if (--kq->kq_refs == 0) { 155 pool_put(&kqueue_pool, kq); 156 } 157 } 158 159 void kqueue_init(void); 160 161 void 162 kqueue_init(void) 163 { 164 165 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_NONE, PR_WAITOK, 166 "kqueuepl", NULL); 167 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_NONE, PR_WAITOK, 168 "knotepl", NULL); 169 } 170 171 int 172 filt_fileattach(struct knote *kn) 173 { 174 struct file *fp = kn->kn_fp; 175 176 return fp->f_ops->fo_kqfilter(fp, kn); 177 } 178 179 int 180 kqueue_kqfilter(struct file *fp, struct knote *kn) 181 { 182 struct kqueue *kq = kn->kn_fp->f_data; 183 184 if (kn->kn_filter != EVFILT_READ) 185 return (EINVAL); 186 187 kn->kn_fop = &kqread_filtops; 188 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 189 return (0); 190 } 191 192 void 193 filt_kqdetach(struct knote *kn) 194 { 195 struct kqueue *kq = kn->kn_fp->f_data; 196 197 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 198 } 199 200 int 201 filt_kqueue(struct knote *kn, long hint) 202 { 203 struct kqueue *kq = kn->kn_fp->f_data; 204 205 kn->kn_data = kq->kq_count; 206 return (kn->kn_data > 0); 207 } 208 209 int 210 filt_procattach(struct knote *kn) 211 { 212 struct process *pr; 213 214 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 215 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 216 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 217 218 if (kn->kn_id > PID_MAX) 219 return ESRCH; 220 221 pr = prfind(kn->kn_id); 222 if (pr == NULL) 223 return (ESRCH); 224 225 /* exiting processes can't be specified */ 226 if (pr->ps_flags & PS_EXITING) 227 return (ESRCH); 228 229 kn->kn_ptr.p_process = pr; 230 kn->kn_flags |= EV_CLEAR; /* automatically set */ 231 232 /* 233 * internal flag indicating registration done by kernel 234 */ 235 if (kn->kn_flags & EV_FLAG1) { 236 kn->kn_data = kn->kn_sdata; /* ppid */ 237 kn->kn_fflags = NOTE_CHILD; 238 kn->kn_flags &= ~EV_FLAG1; 239 } 240 241 /* XXX lock the proc here while adding to the list? */ 242 SLIST_INSERT_HEAD(&pr->ps_klist, kn, kn_selnext); 243 244 return (0); 245 } 246 247 /* 248 * The knote may be attached to a different process, which may exit, 249 * leaving nothing for the knote to be attached to. So when the process 250 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 251 * it will be deleted when read out. However, as part of the knote deletion, 252 * this routine is called, so a check is needed to avoid actually performing 253 * a detach, because the original process does not exist any more. 254 */ 255 void 256 filt_procdetach(struct knote *kn) 257 { 258 struct process *pr = kn->kn_ptr.p_process; 259 260 if (kn->kn_status & KN_DETACHED) 261 return; 262 263 /* XXX locking? this might modify another process. */ 264 SLIST_REMOVE(&pr->ps_klist, kn, knote, kn_selnext); 265 } 266 267 int 268 filt_proc(struct knote *kn, long hint) 269 { 270 u_int event; 271 272 /* 273 * mask off extra data 274 */ 275 event = (u_int)hint & NOTE_PCTRLMASK; 276 277 /* 278 * if the user is interested in this event, record it. 279 */ 280 if (kn->kn_sfflags & event) 281 kn->kn_fflags |= event; 282 283 /* 284 * process is gone, so flag the event as finished and remove it 285 * from the process's klist 286 */ 287 if (event == NOTE_EXIT) { 288 struct process *pr = kn->kn_ptr.p_process; 289 290 kn->kn_status |= KN_DETACHED; 291 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 292 kn->kn_data = pr->ps_mainproc->p_xstat; 293 SLIST_REMOVE(&pr->ps_klist, kn, knote, kn_selnext); 294 return (1); 295 } 296 297 /* 298 * process forked, and user wants to track the new process, 299 * so attach a new knote to it, and immediately report an 300 * event with the parent's pid. 301 */ 302 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 303 struct kevent kev; 304 int error; 305 306 /* 307 * register knote with new process. 308 */ 309 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 310 kev.filter = kn->kn_filter; 311 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 312 kev.fflags = kn->kn_sfflags; 313 kev.data = kn->kn_id; /* parent */ 314 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 315 error = kqueue_register(kn->kn_kq, &kev, NULL); 316 if (error) 317 kn->kn_fflags |= NOTE_TRACKERR; 318 } 319 320 return (kn->kn_fflags != 0); 321 } 322 323 static void 324 filt_timer_timeout_add(struct knote *kn) 325 { 326 struct timeval tv; 327 int tticks; 328 329 tv.tv_sec = kn->kn_sdata / 1000; 330 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 331 tticks = tvtohz(&tv); 332 timeout_add(kn->kn_hook, tticks ? tticks : 1); 333 } 334 335 void 336 filt_timerexpire(void *knx) 337 { 338 struct knote *kn = knx; 339 340 kn->kn_data++; 341 KNOTE_ACTIVATE(kn); 342 343 if ((kn->kn_flags & EV_ONESHOT) == 0) 344 filt_timer_timeout_add(kn); 345 } 346 347 348 /* 349 * data contains amount of time to sleep, in milliseconds 350 */ 351 int 352 filt_timerattach(struct knote *kn) 353 { 354 struct timeout *to; 355 356 if (kq_ntimeouts > kq_timeoutmax) 357 return (ENOMEM); 358 kq_ntimeouts++; 359 360 kn->kn_flags |= EV_CLEAR; /* automatically set */ 361 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 362 timeout_set(to, filt_timerexpire, kn); 363 kn->kn_hook = to; 364 filt_timer_timeout_add(kn); 365 366 return (0); 367 } 368 369 void 370 filt_timerdetach(struct knote *kn) 371 { 372 struct timeout *to; 373 374 to = (struct timeout *)kn->kn_hook; 375 timeout_del(to); 376 free(to, M_KEVENT, sizeof(*to)); 377 kq_ntimeouts--; 378 } 379 380 int 381 filt_timer(struct knote *kn, long hint) 382 { 383 return (kn->kn_data != 0); 384 } 385 386 387 /* 388 * filt_seltrue: 389 * 390 * This filter "event" routine simulates seltrue(). 391 */ 392 int 393 filt_seltrue(struct knote *kn, long hint) 394 { 395 396 /* 397 * We don't know how much data can be read/written, 398 * but we know that it *can* be. This is about as 399 * good as select/poll does as well. 400 */ 401 kn->kn_data = 0; 402 return (1); 403 } 404 405 /* 406 * This provides full kqfilter entry for device switch tables, which 407 * has same effect as filter using filt_seltrue() as filter method. 408 */ 409 void 410 filt_seltruedetach(struct knote *kn) 411 { 412 /* Nothing to do */ 413 } 414 415 const struct filterops seltrue_filtops = 416 { 1, NULL, filt_seltruedetach, filt_seltrue }; 417 418 int 419 seltrue_kqfilter(dev_t dev, struct knote *kn) 420 { 421 switch (kn->kn_filter) { 422 case EVFILT_READ: 423 case EVFILT_WRITE: 424 kn->kn_fop = &seltrue_filtops; 425 break; 426 default: 427 return (EINVAL); 428 } 429 430 /* Nothing more to do */ 431 return (0); 432 } 433 434 int 435 sys_kqueue(struct proc *p, void *v, register_t *retval) 436 { 437 struct filedesc *fdp = p->p_fd; 438 struct kqueue *kq; 439 struct file *fp; 440 int fd, error; 441 442 fdplock(fdp); 443 error = falloc(p, 0, &fp, &fd); 444 fdpunlock(fdp); 445 if (error) 446 return (error); 447 fp->f_flag = FREAD | FWRITE; 448 fp->f_type = DTYPE_KQUEUE; 449 fp->f_ops = &kqueueops; 450 kq = pool_get(&kqueue_pool, PR_WAITOK|PR_ZERO); 451 TAILQ_INIT(&kq->kq_head); 452 fp->f_data = kq; 453 KQREF(kq); 454 *retval = fd; 455 if (fdp->fd_knlistsize < 0) 456 fdp->fd_knlistsize = 0; /* this process has a kq */ 457 kq->kq_fdp = fdp; 458 FILE_SET_MATURE(fp, p); 459 return (0); 460 } 461 462 int 463 sys_kevent(struct proc *p, void *v, register_t *retval) 464 { 465 struct filedesc* fdp = p->p_fd; 466 struct sys_kevent_args /* { 467 syscallarg(int) fd; 468 syscallarg(const struct kevent *) changelist; 469 syscallarg(int) nchanges; 470 syscallarg(struct kevent *) eventlist; 471 syscallarg(int) nevents; 472 syscallarg(const struct timespec *) timeout; 473 } */ *uap = v; 474 struct kevent *kevp; 475 struct kqueue *kq; 476 struct file *fp; 477 struct timespec ts; 478 int i, n, nerrors, error; 479 struct kevent kev[KQ_NEVENTS]; 480 481 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL || 482 (fp->f_type != DTYPE_KQUEUE)) 483 return (EBADF); 484 485 FREF(fp); 486 487 if (SCARG(uap, timeout) != NULL) { 488 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 489 if (error) 490 goto done; 491 #ifdef KTRACE 492 if (KTRPOINT(p, KTR_STRUCT)) 493 ktrreltimespec(p, &ts); 494 #endif 495 SCARG(uap, timeout) = &ts; 496 } 497 498 kq = fp->f_data; 499 nerrors = 0; 500 501 while (SCARG(uap, nchanges) > 0) { 502 n = SCARG(uap, nchanges) > KQ_NEVENTS ? 503 KQ_NEVENTS : SCARG(uap, nchanges); 504 error = copyin(SCARG(uap, changelist), kev, 505 n * sizeof(struct kevent)); 506 if (error) 507 goto done; 508 #ifdef KTRACE 509 if (KTRPOINT(p, KTR_STRUCT)) 510 ktrevent(p, kev, n); 511 #endif 512 for (i = 0; i < n; i++) { 513 kevp = &kev[i]; 514 kevp->flags &= ~EV_SYSFLAGS; 515 error = kqueue_register(kq, kevp, p); 516 if (error || (kevp->flags & EV_RECEIPT)) { 517 if (SCARG(uap, nevents) != 0) { 518 kevp->flags = EV_ERROR; 519 kevp->data = error; 520 copyout(kevp, SCARG(uap, eventlist), 521 sizeof(*kevp)); 522 SCARG(uap, eventlist)++; 523 SCARG(uap, nevents)--; 524 nerrors++; 525 } else { 526 goto done; 527 } 528 } 529 } 530 SCARG(uap, nchanges) -= n; 531 SCARG(uap, changelist) += n; 532 } 533 if (nerrors) { 534 *retval = nerrors; 535 error = 0; 536 goto done; 537 } 538 539 KQREF(kq); 540 FRELE(fp, p); 541 error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist), 542 SCARG(uap, timeout), p, &n); 543 KQRELE(kq); 544 *retval = n; 545 return (error); 546 547 done: 548 FRELE(fp, p); 549 return (error); 550 } 551 552 int 553 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 554 { 555 struct filedesc *fdp = kq->kq_fdp; 556 struct filterops *fops = NULL; 557 struct file *fp = NULL; 558 struct knote *kn = NULL; 559 int s, error = 0; 560 561 if (kev->filter < 0) { 562 if (kev->filter + EVFILT_SYSCOUNT < 0) 563 return (EINVAL); 564 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 565 } 566 567 if (fops == NULL) { 568 /* 569 * XXX 570 * filter attach routine is responsible for ensuring that 571 * the identifier can be attached to it. 572 */ 573 return (EINVAL); 574 } 575 576 if (fops->f_isfd) { 577 /* validate descriptor */ 578 if (kev->ident > INT_MAX) 579 return (EBADF); 580 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) 581 return (EBADF); 582 FREF(fp); 583 584 if (kev->ident < fdp->fd_knlistsize) { 585 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) { 586 if (kq == kn->kn_kq && 587 kev->filter == kn->kn_filter) 588 break; 589 } 590 } 591 } else { 592 if (fdp->fd_knhashmask != 0) { 593 struct klist *list; 594 595 list = &fdp->fd_knhash[ 596 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 597 SLIST_FOREACH(kn, list, kn_link) { 598 if (kev->ident == kn->kn_id && 599 kq == kn->kn_kq && 600 kev->filter == kn->kn_filter) 601 break; 602 } 603 } 604 } 605 606 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 607 error = ENOENT; 608 goto done; 609 } 610 611 /* 612 * kn now contains the matching knote, or NULL if no match 613 */ 614 if (kev->flags & EV_ADD) { 615 616 if (kn == NULL) { 617 kn = knote_alloc(); 618 if (kn == NULL) { 619 error = ENOMEM; 620 goto done; 621 } 622 kn->kn_fp = fp; 623 kn->kn_kq = kq; 624 kn->kn_fop = fops; 625 626 /* 627 * apply reference count to knote structure, and 628 * do not release it at the end of this routine. 629 */ 630 fp = NULL; 631 632 kn->kn_sfflags = kev->fflags; 633 kn->kn_sdata = kev->data; 634 kev->fflags = 0; 635 kev->data = 0; 636 kn->kn_kevent = *kev; 637 638 knote_attach(kn, fdp); 639 if ((error = fops->f_attach(kn)) != 0) { 640 knote_drop(kn, p, fdp); 641 goto done; 642 } 643 } else { 644 /* 645 * The user may change some filter values after the 646 * initial EV_ADD, but doing so will not reset any 647 * filters which have already been triggered. 648 */ 649 kn->kn_sfflags = kev->fflags; 650 kn->kn_sdata = kev->data; 651 kn->kn_kevent.udata = kev->udata; 652 } 653 654 s = splhigh(); 655 if (kn->kn_fop->f_event(kn, 0)) 656 KNOTE_ACTIVATE(kn); 657 splx(s); 658 659 } else if (kev->flags & EV_DELETE) { 660 kn->kn_fop->f_detach(kn); 661 knote_drop(kn, p, p->p_fd); 662 goto done; 663 } 664 665 if ((kev->flags & EV_DISABLE) && 666 ((kn->kn_status & KN_DISABLED) == 0)) { 667 s = splhigh(); 668 kn->kn_status |= KN_DISABLED; 669 splx(s); 670 } 671 672 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 673 s = splhigh(); 674 kn->kn_status &= ~KN_DISABLED; 675 if ((kn->kn_status & KN_ACTIVE) && 676 ((kn->kn_status & KN_QUEUED) == 0)) 677 knote_enqueue(kn); 678 splx(s); 679 } 680 681 done: 682 if (fp != NULL) 683 FRELE(fp, p); 684 return (error); 685 } 686 687 int 688 kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, 689 const struct timespec *tsp, struct proc *p, int *retval) 690 { 691 struct kevent *kevp; 692 struct timeval atv, rtv, ttv; 693 struct knote *kn, marker; 694 int s, count, timeout, nkev = 0, error = 0; 695 struct kevent kev[KQ_NEVENTS]; 696 697 count = maxevents; 698 if (count == 0) 699 goto done; 700 701 if (tsp != NULL) { 702 TIMESPEC_TO_TIMEVAL(&atv, tsp); 703 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 704 /* No timeout, just poll */ 705 timeout = -1; 706 goto start; 707 } 708 if (itimerfix(&atv)) { 709 error = EINVAL; 710 goto done; 711 } 712 713 timeout = atv.tv_sec > 24 * 60 * 60 ? 714 24 * 60 * 60 * hz : tvtohz(&atv); 715 716 getmicrouptime(&rtv); 717 timeradd(&atv, &rtv, &atv); 718 } else { 719 atv.tv_sec = 0; 720 atv.tv_usec = 0; 721 timeout = 0; 722 } 723 goto start; 724 725 retry: 726 if (atv.tv_sec || atv.tv_usec) { 727 getmicrouptime(&rtv); 728 if (timercmp(&rtv, &atv, >=)) 729 goto done; 730 ttv = atv; 731 timersub(&ttv, &rtv, &ttv); 732 timeout = ttv.tv_sec > 24 * 60 * 60 ? 733 24 * 60 * 60 * hz : tvtohz(&ttv); 734 } 735 736 start: 737 if (kq->kq_state & KQ_DYING) { 738 error = EBADF; 739 goto done; 740 } 741 742 kevp = &kev[0]; 743 s = splhigh(); 744 if (kq->kq_count == 0) { 745 if (timeout < 0) { 746 error = EWOULDBLOCK; 747 } else { 748 kq->kq_state |= KQ_SLEEP; 749 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 750 } 751 splx(s); 752 if (error == 0) 753 goto retry; 754 /* don't restart after signals... */ 755 if (error == ERESTART) 756 error = EINTR; 757 else if (error == EWOULDBLOCK) 758 error = 0; 759 goto done; 760 } 761 762 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 763 while (count) { 764 kn = TAILQ_FIRST(&kq->kq_head); 765 if (kn == &marker) { 766 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 767 splx(s); 768 if (count == maxevents) 769 goto retry; 770 goto done; 771 } 772 773 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 774 kq->kq_count--; 775 776 if (kn->kn_status & KN_DISABLED) { 777 kn->kn_status &= ~KN_QUEUED; 778 continue; 779 } 780 if ((kn->kn_flags & EV_ONESHOT) == 0 && 781 kn->kn_fop->f_event(kn, 0) == 0) { 782 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 783 continue; 784 } 785 *kevp = kn->kn_kevent; 786 kevp++; 787 nkev++; 788 if (kn->kn_flags & EV_ONESHOT) { 789 kn->kn_status &= ~KN_QUEUED; 790 splx(s); 791 kn->kn_fop->f_detach(kn); 792 knote_drop(kn, p, p->p_fd); 793 s = splhigh(); 794 } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 795 if (kn->kn_flags & EV_CLEAR) { 796 kn->kn_data = 0; 797 kn->kn_fflags = 0; 798 } 799 if (kn->kn_flags & EV_DISPATCH) 800 kn->kn_status |= KN_DISABLED; 801 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 802 } else { 803 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 804 kq->kq_count++; 805 } 806 count--; 807 if (nkev == KQ_NEVENTS) { 808 splx(s); 809 #ifdef KTRACE 810 if (KTRPOINT(p, KTR_STRUCT)) 811 ktrevent(p, kev, nkev); 812 #endif 813 error = copyout(kev, ulistp, 814 sizeof(struct kevent) * nkev); 815 ulistp += nkev; 816 nkev = 0; 817 kevp = &kev[0]; 818 s = splhigh(); 819 if (error) 820 break; 821 } 822 } 823 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 824 splx(s); 825 done: 826 if (nkev != 0) { 827 #ifdef KTRACE 828 if (KTRPOINT(p, KTR_STRUCT)) 829 ktrevent(p, kev, nkev); 830 #endif 831 error = copyout(kev, ulistp, 832 sizeof(struct kevent) * nkev); 833 } 834 *retval = maxevents - count; 835 return (error); 836 } 837 838 /* 839 * XXX 840 * This could be expanded to call kqueue_scan, if desired. 841 */ 842 int 843 kqueue_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) 844 { 845 return (ENXIO); 846 } 847 848 int 849 kqueue_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) 850 851 { 852 return (ENXIO); 853 } 854 855 int 856 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 857 { 858 return (ENOTTY); 859 } 860 861 int 862 kqueue_poll(struct file *fp, int events, struct proc *p) 863 { 864 struct kqueue *kq = (struct kqueue *)fp->f_data; 865 int revents = 0; 866 int s = splhigh(); 867 868 if (events & (POLLIN | POLLRDNORM)) { 869 if (kq->kq_count) { 870 revents |= events & (POLLIN | POLLRDNORM); 871 } else { 872 selrecord(p, &kq->kq_sel); 873 kq->kq_state |= KQ_SEL; 874 } 875 } 876 splx(s); 877 return (revents); 878 } 879 880 int 881 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 882 { 883 struct kqueue *kq = fp->f_data; 884 885 memset(st, 0, sizeof(*st)); 886 st->st_size = kq->kq_count; 887 st->st_blksize = sizeof(struct kevent); 888 st->st_mode = S_IFIFO; 889 return (0); 890 } 891 892 int 893 kqueue_close(struct file *fp, struct proc *p) 894 { 895 struct kqueue *kq = fp->f_data; 896 struct filedesc *fdp = p->p_fd; 897 struct knote **knp, *kn, *kn0; 898 int i; 899 900 for (i = 0; i < fdp->fd_knlistsize; i++) { 901 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 902 kn = *knp; 903 while (kn != NULL) { 904 kn0 = SLIST_NEXT(kn, kn_link); 905 if (kq == kn->kn_kq) { 906 kn->kn_fop->f_detach(kn); 907 FRELE(kn->kn_fp, p); 908 knote_free(kn); 909 *knp = kn0; 910 } else { 911 knp = &SLIST_NEXT(kn, kn_link); 912 } 913 kn = kn0; 914 } 915 } 916 if (fdp->fd_knhashmask != 0) { 917 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 918 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 919 kn = *knp; 920 while (kn != NULL) { 921 kn0 = SLIST_NEXT(kn, kn_link); 922 if (kq == kn->kn_kq) { 923 kn->kn_fop->f_detach(kn); 924 /* XXX non-fd release of kn->kn_ptr */ 925 knote_free(kn); 926 *knp = kn0; 927 } else { 928 knp = &SLIST_NEXT(kn, kn_link); 929 } 930 kn = kn0; 931 } 932 } 933 } 934 fp->f_data = NULL; 935 936 kq->kq_state |= KQ_DYING; 937 kqueue_wakeup(kq); 938 KQRELE(kq); 939 940 return (0); 941 } 942 943 void 944 kqueue_wakeup(struct kqueue *kq) 945 { 946 947 if (kq->kq_state & KQ_SLEEP) { 948 kq->kq_state &= ~KQ_SLEEP; 949 wakeup(kq); 950 } 951 if (kq->kq_state & KQ_SEL) { 952 kq->kq_state &= ~KQ_SEL; 953 selwakeup(&kq->kq_sel); 954 } else 955 KNOTE(&kq->kq_sel.si_note, 0); 956 } 957 958 /* 959 * activate one knote. 960 */ 961 void 962 knote_activate(struct knote *kn) 963 { 964 KNOTE_ACTIVATE(kn); 965 } 966 967 /* 968 * walk down a list of knotes, activating them if their event has triggered. 969 */ 970 void 971 knote(struct klist *list, long hint) 972 { 973 struct knote *kn, *kn0; 974 975 SLIST_FOREACH_SAFE(kn, list, kn_selnext, kn0) 976 if (kn->kn_fop->f_event(kn, hint)) 977 KNOTE_ACTIVATE(kn); 978 } 979 980 /* 981 * remove all knotes from a specified klist 982 */ 983 void 984 knote_remove(struct proc *p, struct klist *list) 985 { 986 struct knote *kn; 987 988 while ((kn = SLIST_FIRST(list)) != NULL) { 989 kn->kn_fop->f_detach(kn); 990 knote_drop(kn, p, p->p_fd); 991 } 992 } 993 994 /* 995 * remove all knotes referencing a specified fd 996 */ 997 void 998 knote_fdclose(struct proc *p, int fd) 999 { 1000 struct filedesc *fdp = p->p_fd; 1001 struct klist *list = &fdp->fd_knlist[fd]; 1002 1003 knote_remove(p, list); 1004 } 1005 1006 /* 1007 * handle a process exiting, including the triggering of NOTE_EXIT notes 1008 * XXX this could be more efficient, doing a single pass down the klist 1009 */ 1010 void 1011 knote_processexit(struct proc *p) 1012 { 1013 struct process *pr = p->p_p; 1014 1015 KNOTE(&pr->ps_klist, NOTE_EXIT); 1016 1017 /* remove other knotes hanging off the process */ 1018 knote_remove(p, &pr->ps_klist); 1019 } 1020 1021 void 1022 knote_attach(struct knote *kn, struct filedesc *fdp) 1023 { 1024 struct klist *list; 1025 int size; 1026 1027 if (!kn->kn_fop->f_isfd) { 1028 if (fdp->fd_knhashmask == 0) 1029 fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP, 1030 M_WAITOK, &fdp->fd_knhashmask); 1031 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1032 goto done; 1033 } 1034 1035 if (fdp->fd_knlistsize <= kn->kn_id) { 1036 size = fdp->fd_knlistsize; 1037 while (size <= kn->kn_id) 1038 size += KQEXTENT; 1039 list = mallocarray(size, sizeof(struct klist), M_TEMP, 1040 M_WAITOK); 1041 memcpy(list, fdp->fd_knlist, 1042 fdp->fd_knlistsize * sizeof(struct klist)); 1043 memset(&list[fdp->fd_knlistsize], 0, 1044 (size - fdp->fd_knlistsize) * sizeof(struct klist)); 1045 free(fdp->fd_knlist, M_TEMP, 1046 fdp->fd_knlistsize * sizeof(struct klist)); 1047 fdp->fd_knlistsize = size; 1048 fdp->fd_knlist = list; 1049 } 1050 list = &fdp->fd_knlist[kn->kn_id]; 1051 done: 1052 SLIST_INSERT_HEAD(list, kn, kn_link); 1053 kn->kn_status = 0; 1054 } 1055 1056 /* 1057 * should be called at spl == 0, since we don't want to hold spl 1058 * while calling FRELE and knote_free. 1059 */ 1060 void 1061 knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp) 1062 { 1063 struct klist *list; 1064 1065 if (kn->kn_fop->f_isfd) 1066 list = &fdp->fd_knlist[kn->kn_id]; 1067 else 1068 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1069 1070 SLIST_REMOVE(list, kn, knote, kn_link); 1071 if (kn->kn_status & KN_QUEUED) 1072 knote_dequeue(kn); 1073 if (kn->kn_fop->f_isfd) 1074 FRELE(kn->kn_fp, p); 1075 knote_free(kn); 1076 } 1077 1078 1079 void 1080 knote_enqueue(struct knote *kn) 1081 { 1082 struct kqueue *kq = kn->kn_kq; 1083 int s = splhigh(); 1084 1085 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1086 1087 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1088 kn->kn_status |= KN_QUEUED; 1089 kq->kq_count++; 1090 splx(s); 1091 kqueue_wakeup(kq); 1092 } 1093 1094 void 1095 knote_dequeue(struct knote *kn) 1096 { 1097 struct kqueue *kq = kn->kn_kq; 1098 int s = splhigh(); 1099 1100 KASSERT(kn->kn_status & KN_QUEUED); 1101 1102 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1103 kn->kn_status &= ~KN_QUEUED; 1104 kq->kq_count--; 1105 splx(s); 1106 } 1107 1108 void 1109 klist_invalidate(struct klist *list) 1110 { 1111 struct knote *kn; 1112 1113 SLIST_FOREACH(kn, list, kn_selnext) { 1114 kn->kn_status |= KN_DETACHED; 1115 kn->kn_flags |= EV_EOF | EV_ONESHOT; 1116 } 1117 } 1118