1*0747e3d2Sclaudio /* $OpenBSD: kern_event.c,v 1.200 2024/08/06 08:44:54 claudio Exp $ */ 2d0f8810fSmickey 31a12e8a7Sprovos /*- 4cc90df54Sprovos * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 51a12e8a7Sprovos * All rights reserved. 61a12e8a7Sprovos * 71a12e8a7Sprovos * Redistribution and use in source and binary forms, with or without 81a12e8a7Sprovos * modification, are permitted provided that the following conditions 91a12e8a7Sprovos * are met: 101a12e8a7Sprovos * 1. Redistributions of source code must retain the above copyright 111a12e8a7Sprovos * notice, this list of conditions and the following disclaimer. 121a12e8a7Sprovos * 2. Redistributions in binary form must reproduce the above copyright 131a12e8a7Sprovos * notice, this list of conditions and the following disclaimer in the 141a12e8a7Sprovos * documentation and/or other materials provided with the distribution. 151a12e8a7Sprovos * 161a12e8a7Sprovos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 171a12e8a7Sprovos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 181a12e8a7Sprovos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 191a12e8a7Sprovos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 201a12e8a7Sprovos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 211a12e8a7Sprovos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 221a12e8a7Sprovos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 231a12e8a7Sprovos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 241a12e8a7Sprovos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 251a12e8a7Sprovos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261a12e8a7Sprovos * SUCH DAMAGE. 271a12e8a7Sprovos * 28cc90df54Sprovos * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 291a12e8a7Sprovos */ 301a12e8a7Sprovos 311a12e8a7Sprovos #include <sys/param.h> 321a12e8a7Sprovos #include <sys/systm.h> 331a12e8a7Sprovos #include <sys/proc.h> 34c84235d7Stedu #include <sys/pledge.h> 351a12e8a7Sprovos #include <sys/malloc.h> 361a12e8a7Sprovos #include <sys/file.h> 371a12e8a7Sprovos #include <sys/filedesc.h> 381a12e8a7Sprovos #include <sys/fcntl.h> 391a12e8a7Sprovos #include <sys/queue.h> 401a12e8a7Sprovos #include <sys/event.h> 411a12e8a7Sprovos #include <sys/eventvar.h> 429ece112bSguenther #include <sys/ktrace.h> 4391a0b8fbSprovos #include <sys/pool.h> 441a12e8a7Sprovos #include <sys/stat.h> 451a12e8a7Sprovos #include <sys/mount.h> 461a12e8a7Sprovos #include <sys/syscallargs.h> 47a403738fScheloha #include <sys/time.h> 4847cb05d5Stedu #include <sys/timeout.h> 49ce1591e9Svisa #include <sys/vnode.h> 50381e34d2Sguenther #include <sys/wait.h> 511a12e8a7Sprovos 5288864a09Svisa #ifdef DIAGNOSTIC 5388864a09Svisa #define KLIST_ASSERT_LOCKED(kl) do { \ 5488864a09Svisa if ((kl)->kl_ops != NULL) \ 5588864a09Svisa (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 5688864a09Svisa else \ 5788864a09Svisa KERNEL_ASSERT_LOCKED(); \ 5888864a09Svisa } while (0) 5988864a09Svisa #else 6088864a09Svisa #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 6188864a09Svisa #endif 6288864a09Svisa 63eaac6367Svisa int dokqueue(struct proc *, int, register_t *); 6439f6f778Smpi struct kqueue *kqueue_alloc(struct filedesc *); 65d9144382Smpi void kqueue_terminate(struct proc *p, struct kqueue *); 66f783a2adSmpi void KQREF(struct kqueue *); 67f783a2adSmpi void KQRELE(struct kqueue *); 68f783a2adSmpi 69cbf33da8Svisa void kqueue_purge(struct proc *, struct kqueue *); 702699785bSmpi int kqueue_sleep(struct kqueue *, struct timespec *); 711a12e8a7Sprovos 722bd648c0Smpi int kqueue_read(struct file *, struct uio *, int); 732bd648c0Smpi int kqueue_write(struct file *, struct uio *, int); 741a12e8a7Sprovos int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 751a12e8a7Sprovos struct proc *p); 76cc90df54Sprovos int kqueue_kqfilter(struct file *fp, struct knote *kn); 77212b1187Sart int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 781a12e8a7Sprovos int kqueue_close(struct file *fp, struct proc *p); 791a12e8a7Sprovos void kqueue_wakeup(struct kqueue *kq); 801a12e8a7Sprovos 8160229baaSvisa #ifdef KQUEUE_DEBUG 8260229baaSvisa void kqueue_do_check(struct kqueue *kq, const char *func, int line); 8360229baaSvisa #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 8460229baaSvisa #else 8560229baaSvisa #define kqueue_check(kq) do {} while (0) 8660229baaSvisa #endif 8760229baaSvisa 886d57c564Svisa static int filter_attach(struct knote *kn); 896d57c564Svisa static void filter_detach(struct knote *kn); 906d57c564Svisa static int filter_event(struct knote *kn, long hint); 916d57c564Svisa static int filter_modify(struct kevent *kev, struct knote *kn); 926d57c564Svisa static int filter_process(struct knote *kn, struct kevent *kev); 93f30ff743Svisa static void kqueue_expand_hash(struct kqueue *kq); 94f30ff743Svisa static void kqueue_expand_list(struct kqueue *kq, int fd); 95ed576331Svisa static void kqueue_task(void *); 9688864a09Svisa static int klist_lock(struct klist *); 9788864a09Svisa static void klist_unlock(struct klist *, int); 98f30ff743Svisa 99c02bfb27Svisa const struct fileops kqueueops = { 100e7e081a5Smpi .fo_read = kqueue_read, 101e7e081a5Smpi .fo_write = kqueue_write, 102e7e081a5Smpi .fo_ioctl = kqueue_ioctl, 103e7e081a5Smpi .fo_kqfilter = kqueue_kqfilter, 104e7e081a5Smpi .fo_stat = kqueue_stat, 105e7e081a5Smpi .fo_close = kqueue_close 106cc90df54Sprovos }; 107cc90df54Sprovos 108316aeb9fSanton void knote_attach(struct knote *kn); 10960229baaSvisa void knote_detach(struct knote *kn); 110316aeb9fSanton void knote_drop(struct knote *kn, struct proc *p); 1111a12e8a7Sprovos void knote_enqueue(struct knote *kn); 1121a12e8a7Sprovos void knote_dequeue(struct knote *kn); 11388864a09Svisa int knote_acquire(struct knote *kn, struct klist *, int); 114696db594Svisa void knote_release(struct knote *kn); 115c0e5ee33Svisa void knote_activate(struct knote *kn); 1161789dd4eSvisa void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, 1171789dd4eSvisa int idx, int purge); 1181a12e8a7Sprovos 119cc90df54Sprovos void filt_kqdetach(struct knote *kn); 120cc90df54Sprovos int filt_kqueue(struct knote *kn, long hint); 121d0fd4be4Svisa int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 122d0fd4be4Svisa int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 123d0fd4be4Svisa int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 124cc90df54Sprovos int filt_procattach(struct knote *kn); 125cc90df54Sprovos void filt_procdetach(struct knote *kn); 126cc90df54Sprovos int filt_proc(struct knote *kn, long hint); 127*0747e3d2Sclaudio int filt_procmodify(struct kevent *kev, struct knote *kn); 128*0747e3d2Sclaudio int filt_procprocess(struct knote *kn, struct kevent *kev); 129235013ebSclaudio int filt_sigattach(struct knote *kn); 130235013ebSclaudio void filt_sigdetach(struct knote *kn); 131235013ebSclaudio int filt_signal(struct knote *kn, long hint); 132cc90df54Sprovos int filt_fileattach(struct knote *kn); 13347cb05d5Stedu void filt_timerexpire(void *knx); 13447cb05d5Stedu int filt_timerattach(struct knote *kn); 13547cb05d5Stedu void filt_timerdetach(struct knote *kn); 136a23d840bSvisa int filt_timermodify(struct kevent *kev, struct knote *kn); 137a23d840bSvisa int filt_timerprocess(struct knote *kn, struct kevent *kev); 138a56f3bb5Snicm void filt_seltruedetach(struct knote *kn); 139cc90df54Sprovos 14094321eb4Svisa const struct filterops kqread_filtops = { 141d0fd4be4Svisa .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 14294321eb4Svisa .f_attach = NULL, 14394321eb4Svisa .f_detach = filt_kqdetach, 14494321eb4Svisa .f_event = filt_kqueue, 145d0fd4be4Svisa .f_modify = filt_kqueuemodify, 146d0fd4be4Svisa .f_process = filt_kqueueprocess, 14794321eb4Svisa }; 14894321eb4Svisa 14994321eb4Svisa const struct filterops proc_filtops = { 150*0747e3d2Sclaudio .f_flags = FILTEROP_MPSAFE, 15194321eb4Svisa .f_attach = filt_procattach, 15294321eb4Svisa .f_detach = filt_procdetach, 15394321eb4Svisa .f_event = filt_proc, 154*0747e3d2Sclaudio .f_modify = filt_procmodify, 155*0747e3d2Sclaudio .f_process = filt_procprocess, 15694321eb4Svisa }; 15794321eb4Svisa 158235013ebSclaudio const struct filterops sig_filtops = { 159*0747e3d2Sclaudio .f_flags = FILTEROP_MPSAFE, 160235013ebSclaudio .f_attach = filt_sigattach, 161235013ebSclaudio .f_detach = filt_sigdetach, 162235013ebSclaudio .f_event = filt_signal, 163*0747e3d2Sclaudio .f_modify = filt_procmodify, 164*0747e3d2Sclaudio .f_process = filt_procprocess, 165235013ebSclaudio }; 166235013ebSclaudio 16794321eb4Svisa const struct filterops file_filtops = { 16861820845Svisa .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 16994321eb4Svisa .f_attach = filt_fileattach, 17094321eb4Svisa .f_detach = NULL, 17194321eb4Svisa .f_event = NULL, 17294321eb4Svisa }; 17394321eb4Svisa 17494321eb4Svisa const struct filterops timer_filtops = { 175b8213689Svisa .f_flags = 0, 17694321eb4Svisa .f_attach = filt_timerattach, 17794321eb4Svisa .f_detach = filt_timerdetach, 178a23d840bSvisa .f_event = NULL, 179a23d840bSvisa .f_modify = filt_timermodify, 180a23d840bSvisa .f_process = filt_timerprocess, 18194321eb4Svisa }; 182cc90df54Sprovos 18391a0b8fbSprovos struct pool knote_pool; 1849fcc791dSart struct pool kqueue_pool; 185d0fd4be4Svisa struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 186*0747e3d2Sclaudio struct rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl"); 187b36172baStedu int kq_ntimeouts = 0; 188b36172baStedu int kq_timeoutmax = (4 * 1024); 18991a0b8fbSprovos 1901a12e8a7Sprovos #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 1911a12e8a7Sprovos 1921a12e8a7Sprovos /* 193b3af768dSjsg * Table for all system-defined filters. 1941a12e8a7Sprovos */ 19594321eb4Svisa const struct filterops *const sysfilt_ops[] = { 196cc90df54Sprovos &file_filtops, /* EVFILT_READ */ 197cc90df54Sprovos &file_filtops, /* EVFILT_WRITE */ 1981a12e8a7Sprovos NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 199cc90df54Sprovos &file_filtops, /* EVFILT_VNODE */ 2001a12e8a7Sprovos &proc_filtops, /* EVFILT_PROC */ 2011a12e8a7Sprovos &sig_filtops, /* EVFILT_SIGNAL */ 20247cb05d5Stedu &timer_filtops, /* EVFILT_TIMER */ 203586ac5a1Srobert &file_filtops, /* EVFILT_DEVICE */ 2047ab02df9Smpi &file_filtops, /* EVFILT_EXCEPT */ 2051a12e8a7Sprovos }; 2061a12e8a7Sprovos 20789ce1a60Sderaadt void 20889ce1a60Sderaadt KQREF(struct kqueue *kq) 20989ce1a60Sderaadt { 21050b77637Svisa refcnt_take(&kq->kq_refcnt); 21189ce1a60Sderaadt } 21289ce1a60Sderaadt 21389ce1a60Sderaadt void 21489ce1a60Sderaadt KQRELE(struct kqueue *kq) 21589ce1a60Sderaadt { 2166a01a8aeSvisa struct filedesc *fdp; 2176a01a8aeSvisa 21850b77637Svisa if (refcnt_rele(&kq->kq_refcnt) == 0) 219316aeb9fSanton return; 220316aeb9fSanton 2216a01a8aeSvisa fdp = kq->kq_fdp; 222e6dd1245Svisa if (rw_status(&fdp->fd_lock) == RW_WRITE) { 223316aeb9fSanton LIST_REMOVE(kq, kq_next); 224e6dd1245Svisa } else { 225e6dd1245Svisa fdplock(fdp); 226e6dd1245Svisa LIST_REMOVE(kq, kq_next); 227e6dd1245Svisa fdpunlock(fdp); 228e6dd1245Svisa } 229e6dd1245Svisa 230715db9d6Svisa KASSERT(TAILQ_EMPTY(&kq->kq_head)); 231c5e59ae3Svisa KASSERT(kq->kq_nknotes == 0); 232715db9d6Svisa 2339c969c9aSvisa free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 2349c969c9aSvisa sizeof(struct knlist)); 235eb4d0442Svisa hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 236a820167aSvisa klist_free(&kq->kq_klist); 23789ce1a60Sderaadt pool_put(&kqueue_pool, kq); 23889ce1a60Sderaadt } 23989ce1a60Sderaadt 2409fcc791dSart void 2419fcc791dSart kqueue_init(void) 2429fcc791dSart { 243568eb576Svisa pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 244568eb576Svisa PR_WAITOK, "kqueuepl", NULL); 245568eb576Svisa pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 246568eb576Svisa PR_WAITOK, "knotepl", NULL); 2479fcc791dSart } 2489fcc791dSart 24972adf922Svisa void 25072adf922Svisa kqueue_init_percpu(void) 25172adf922Svisa { 25272adf922Svisa pool_cache_init(&knote_pool); 25372adf922Svisa } 25472adf922Svisa 2551a12e8a7Sprovos int 256cc90df54Sprovos filt_fileattach(struct knote *kn) 2571a12e8a7Sprovos { 258cc90df54Sprovos struct file *fp = kn->kn_fp; 2591a12e8a7Sprovos 260916645f0Stedu return fp->f_ops->fo_kqfilter(fp, kn); 2611a12e8a7Sprovos } 2621a12e8a7Sprovos 2631a12e8a7Sprovos int 264cc90df54Sprovos kqueue_kqfilter(struct file *fp, struct knote *kn) 2651a12e8a7Sprovos { 266916645f0Stedu struct kqueue *kq = kn->kn_fp->f_data; 2671a12e8a7Sprovos 268cc90df54Sprovos if (kn->kn_filter != EVFILT_READ) 269b8d5a5fbSnicm return (EINVAL); 270cc90df54Sprovos 271cc90df54Sprovos kn->kn_fop = &kqread_filtops; 272a820167aSvisa klist_insert(&kq->kq_klist, kn); 2731a12e8a7Sprovos return (0); 2741a12e8a7Sprovos } 2751a12e8a7Sprovos 2761a12e8a7Sprovos void 2771a12e8a7Sprovos filt_kqdetach(struct knote *kn) 2781a12e8a7Sprovos { 279916645f0Stedu struct kqueue *kq = kn->kn_fp->f_data; 2801a12e8a7Sprovos 281a820167aSvisa klist_remove(&kq->kq_klist, kn); 282d0fd4be4Svisa } 283d0fd4be4Svisa 284d0fd4be4Svisa int 285d0fd4be4Svisa filt_kqueue_common(struct knote *kn, struct kqueue *kq) 286d0fd4be4Svisa { 287d0fd4be4Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 288d0fd4be4Svisa 289d0fd4be4Svisa kn->kn_data = kq->kq_count; 290d0fd4be4Svisa 291d0fd4be4Svisa return (kn->kn_data > 0); 2921a12e8a7Sprovos } 2931a12e8a7Sprovos 2941a12e8a7Sprovos int 2951a12e8a7Sprovos filt_kqueue(struct knote *kn, long hint) 2961a12e8a7Sprovos { 297916645f0Stedu struct kqueue *kq = kn->kn_fp->f_data; 298d0fd4be4Svisa int active; 2991a12e8a7Sprovos 30019ece097Svisa mtx_enter(&kq->kq_lock); 301d0fd4be4Svisa active = filt_kqueue_common(kn, kq); 30219ece097Svisa mtx_leave(&kq->kq_lock); 303d0fd4be4Svisa 304d0fd4be4Svisa return (active); 305d0fd4be4Svisa } 306d0fd4be4Svisa 307d0fd4be4Svisa int 308d0fd4be4Svisa filt_kqueuemodify(struct kevent *kev, struct knote *kn) 309d0fd4be4Svisa { 310d0fd4be4Svisa struct kqueue *kq = kn->kn_fp->f_data; 311d0fd4be4Svisa int active; 312d0fd4be4Svisa 313d0fd4be4Svisa mtx_enter(&kq->kq_lock); 314a3a2b40eSvisa knote_assign(kev, kn); 315d0fd4be4Svisa active = filt_kqueue_common(kn, kq); 316d0fd4be4Svisa mtx_leave(&kq->kq_lock); 317d0fd4be4Svisa 318d0fd4be4Svisa return (active); 319d0fd4be4Svisa } 320d0fd4be4Svisa 321d0fd4be4Svisa int 322d0fd4be4Svisa filt_kqueueprocess(struct knote *kn, struct kevent *kev) 323d0fd4be4Svisa { 324d0fd4be4Svisa struct kqueue *kq = kn->kn_fp->f_data; 325d0fd4be4Svisa int active; 326d0fd4be4Svisa 327d0fd4be4Svisa mtx_enter(&kq->kq_lock); 328d0fd4be4Svisa if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 329d0fd4be4Svisa active = 1; 330d0fd4be4Svisa else 331d0fd4be4Svisa active = filt_kqueue_common(kn, kq); 332d0fd4be4Svisa if (active) 333d0fd4be4Svisa knote_submit(kn, kev); 334d0fd4be4Svisa mtx_leave(&kq->kq_lock); 335d0fd4be4Svisa 336d0fd4be4Svisa return (active); 3371a12e8a7Sprovos } 3381a12e8a7Sprovos 3391a12e8a7Sprovos int 3401a12e8a7Sprovos filt_procattach(struct knote *kn) 3411a12e8a7Sprovos { 3420006fbf0Sguenther struct process *pr; 343*0747e3d2Sclaudio int nolock; 3441a12e8a7Sprovos 345c84235d7Stedu if ((curproc->p_p->ps_flags & PS_PLEDGE) && 346c84235d7Stedu (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 347c84235d7Stedu return pledge_fail(curproc, EPERM, PLEDGE_PROC); 348c84235d7Stedu 349b261876bSguenther if (kn->kn_id > PID_MAX) 350b261876bSguenther return ESRCH; 351b261876bSguenther 352*0747e3d2Sclaudio KERNEL_LOCK(); 3530006fbf0Sguenther pr = prfind(kn->kn_id); 3540006fbf0Sguenther if (pr == NULL) 355*0747e3d2Sclaudio goto fail; 356c6dbc6b5Sprovos 3570006fbf0Sguenther /* exiting processes can't be specified */ 3580006fbf0Sguenther if (pr->ps_flags & PS_EXITING) 359*0747e3d2Sclaudio goto fail; 360458bf05cSguenther 3610006fbf0Sguenther kn->kn_ptr.p_process = pr; 3621a12e8a7Sprovos kn->kn_flags |= EV_CLEAR; /* automatically set */ 3631a12e8a7Sprovos 3641a12e8a7Sprovos /* 3651a12e8a7Sprovos * internal flag indicating registration done by kernel 3661a12e8a7Sprovos */ 367a8f4946aSmpi if (kn->kn_flags & EV_FLAG1) { 3681a12e8a7Sprovos kn->kn_data = kn->kn_sdata; /* ppid */ 3691a12e8a7Sprovos kn->kn_fflags = NOTE_CHILD; 370a8f4946aSmpi kn->kn_flags &= ~EV_FLAG1; 371*0747e3d2Sclaudio rw_assert_wrlock(&kqueue_ps_list_lock); 3721a12e8a7Sprovos } 3731a12e8a7Sprovos 374*0747e3d2Sclaudio /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ 375*0747e3d2Sclaudio nolock = (rw_status(&kqueue_ps_list_lock) == RW_WRITE); 376*0747e3d2Sclaudio if (!nolock) 377*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 378*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 3799b0cf67bSvisa klist_insert_locked(&pr->ps_klist, kn); 380*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 381*0747e3d2Sclaudio if (!nolock) 382*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 383*0747e3d2Sclaudio 384*0747e3d2Sclaudio KERNEL_UNLOCK(); 3851a12e8a7Sprovos 3861a12e8a7Sprovos return (0); 387*0747e3d2Sclaudio 388*0747e3d2Sclaudio fail: 389*0747e3d2Sclaudio KERNEL_UNLOCK(); 390*0747e3d2Sclaudio return (ESRCH); 3911a12e8a7Sprovos } 3921a12e8a7Sprovos 3931a12e8a7Sprovos /* 3941a12e8a7Sprovos * The knote may be attached to a different process, which may exit, 3951a12e8a7Sprovos * leaving nothing for the knote to be attached to. So when the process 3961a12e8a7Sprovos * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 3971a12e8a7Sprovos * it will be deleted when read out. However, as part of the knote deletion, 3981a12e8a7Sprovos * this routine is called, so a check is needed to avoid actually performing 3991a12e8a7Sprovos * a detach, because the original process does not exist any more. 4001a12e8a7Sprovos */ 4011a12e8a7Sprovos void 4021a12e8a7Sprovos filt_procdetach(struct knote *kn) 4031a12e8a7Sprovos { 4040006fbf0Sguenther struct process *pr = kn->kn_ptr.p_process; 405*0747e3d2Sclaudio int status; 4061a12e8a7Sprovos 407*0747e3d2Sclaudio /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ 408*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 409*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 410627c97d9Svisa status = kn->kn_status; 411627c97d9Svisa 412*0747e3d2Sclaudio if ((status & KN_DETACHED) == 0) 4139b0cf67bSvisa klist_remove_locked(&pr->ps_klist, kn); 414*0747e3d2Sclaudio 415*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 416*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 4171a12e8a7Sprovos } 4181a12e8a7Sprovos 4191a12e8a7Sprovos int 4201a12e8a7Sprovos filt_proc(struct knote *kn, long hint) 4211a12e8a7Sprovos { 422*0747e3d2Sclaudio struct process *pr = kn->kn_ptr.p_process; 423627c97d9Svisa struct kqueue *kq = kn->kn_kq; 4241a12e8a7Sprovos u_int event; 4251a12e8a7Sprovos 4261a12e8a7Sprovos /* 4271a12e8a7Sprovos * mask off extra data 4281a12e8a7Sprovos */ 4291a12e8a7Sprovos event = (u_int)hint & NOTE_PCTRLMASK; 4301a12e8a7Sprovos 4311a12e8a7Sprovos /* 4321a12e8a7Sprovos * if the user is interested in this event, record it. 4331a12e8a7Sprovos */ 4341a12e8a7Sprovos if (kn->kn_sfflags & event) 4351a12e8a7Sprovos kn->kn_fflags |= event; 4361a12e8a7Sprovos 4371a12e8a7Sprovos /* 438458bf05cSguenther * process is gone, so flag the event as finished and remove it 439458bf05cSguenther * from the process's klist 4401a12e8a7Sprovos */ 4411a12e8a7Sprovos if (event == NOTE_EXIT) { 4420006fbf0Sguenther struct process *pr = kn->kn_ptr.p_process; 443458bf05cSguenther 444627c97d9Svisa mtx_enter(&kq->kq_lock); 4451a12e8a7Sprovos kn->kn_status |= KN_DETACHED; 446627c97d9Svisa mtx_leave(&kq->kq_lock); 447627c97d9Svisa 4481a12e8a7Sprovos kn->kn_flags |= (EV_EOF | EV_ONESHOT); 449381e34d2Sguenther kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 4509b0cf67bSvisa klist_remove_locked(&pr->ps_klist, kn); 4511a12e8a7Sprovos return (1); 4521a12e8a7Sprovos } 4531a12e8a7Sprovos 4541a12e8a7Sprovos /* 4551a12e8a7Sprovos * process forked, and user wants to track the new process, 4561a12e8a7Sprovos * so attach a new knote to it, and immediately report an 4571a12e8a7Sprovos * event with the parent's pid. 4581a12e8a7Sprovos */ 4591a12e8a7Sprovos if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 4601a12e8a7Sprovos struct kevent kev; 4611a12e8a7Sprovos int error; 4621a12e8a7Sprovos 4631a12e8a7Sprovos /* 4641a12e8a7Sprovos * register knote with new process. 4651a12e8a7Sprovos */ 466b20a1666Smillert memset(&kev, 0, sizeof(kev)); 4671a12e8a7Sprovos kev.ident = hint & NOTE_PDATAMASK; /* pid */ 4681a12e8a7Sprovos kev.filter = kn->kn_filter; 469a8f4946aSmpi kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 4701a12e8a7Sprovos kev.fflags = kn->kn_sfflags; 4711a12e8a7Sprovos kev.data = kn->kn_id; /* parent */ 4726d57c564Svisa kev.udata = kn->kn_udata; /* preserve udata */ 473*0747e3d2Sclaudio 474*0747e3d2Sclaudio rw_assert_wrlock(&kqueue_ps_list_lock); 475*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 476c5e59ae3Svisa error = kqueue_register(kq, &kev, 0, NULL); 477*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 478*0747e3d2Sclaudio 4791a12e8a7Sprovos if (error) 4801a12e8a7Sprovos kn->kn_fflags |= NOTE_TRACKERR; 4811a12e8a7Sprovos } 4821a12e8a7Sprovos 4831a12e8a7Sprovos return (kn->kn_fflags != 0); 4841a12e8a7Sprovos } 4851a12e8a7Sprovos 486*0747e3d2Sclaudio int 487*0747e3d2Sclaudio filt_procmodify(struct kevent *kev, struct knote *kn) 488*0747e3d2Sclaudio { 489*0747e3d2Sclaudio struct process *pr = kn->kn_ptr.p_process; 490*0747e3d2Sclaudio int active; 491*0747e3d2Sclaudio 492*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 493*0747e3d2Sclaudio active = knote_modify(kev, kn); 494*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 495*0747e3d2Sclaudio 496*0747e3d2Sclaudio return (active); 497*0747e3d2Sclaudio } 498*0747e3d2Sclaudio 499*0747e3d2Sclaudio /* 500*0747e3d2Sclaudio * By default only grab the mutex here. If the event requires extra protection 501*0747e3d2Sclaudio * because it alters the klist (NOTE_EXIT, NOTE_FORK the caller of the knote 502*0747e3d2Sclaudio * needs to grab the rwlock first. 503*0747e3d2Sclaudio */ 504*0747e3d2Sclaudio int 505*0747e3d2Sclaudio filt_procprocess(struct knote *kn, struct kevent *kev) 506*0747e3d2Sclaudio { 507*0747e3d2Sclaudio struct process *pr = kn->kn_ptr.p_process; 508*0747e3d2Sclaudio int active; 509*0747e3d2Sclaudio 510*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 511*0747e3d2Sclaudio active = knote_process(kn, kev); 512*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 513*0747e3d2Sclaudio 514*0747e3d2Sclaudio return (active); 515*0747e3d2Sclaudio } 516*0747e3d2Sclaudio 517235013ebSclaudio /* 518235013ebSclaudio * signal knotes are shared with proc knotes, so we apply a mask to 519235013ebSclaudio * the hint in order to differentiate them from process hints. This 520235013ebSclaudio * could be avoided by using a signal-specific knote list, but probably 521235013ebSclaudio * isn't worth the trouble. 522235013ebSclaudio */ 523235013ebSclaudio int 524235013ebSclaudio filt_sigattach(struct knote *kn) 525235013ebSclaudio { 526235013ebSclaudio struct process *pr = curproc->p_p; 527235013ebSclaudio 528235013ebSclaudio if (kn->kn_id >= NSIG) 529235013ebSclaudio return EINVAL; 530235013ebSclaudio 531235013ebSclaudio kn->kn_ptr.p_process = pr; 532235013ebSclaudio kn->kn_flags |= EV_CLEAR; /* automatically set */ 533235013ebSclaudio 534*0747e3d2Sclaudio /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ 535*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 536*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 537235013ebSclaudio klist_insert_locked(&pr->ps_klist, kn); 538*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 539*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 540235013ebSclaudio 541235013ebSclaudio return (0); 542235013ebSclaudio } 543235013ebSclaudio 544235013ebSclaudio void 545235013ebSclaudio filt_sigdetach(struct knote *kn) 546235013ebSclaudio { 547235013ebSclaudio struct process *pr = kn->kn_ptr.p_process; 548235013ebSclaudio 549*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 550*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 551235013ebSclaudio klist_remove_locked(&pr->ps_klist, kn); 552*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 553*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 554235013ebSclaudio } 555235013ebSclaudio 556235013ebSclaudio int 557235013ebSclaudio filt_signal(struct knote *kn, long hint) 558235013ebSclaudio { 559235013ebSclaudio if (hint & NOTE_SIGNAL) { 560235013ebSclaudio hint &= ~NOTE_SIGNAL; 561235013ebSclaudio 562235013ebSclaudio if (kn->kn_id == hint) 563235013ebSclaudio kn->kn_data++; 564235013ebSclaudio } 565235013ebSclaudio return (kn->kn_data != 0); 566235013ebSclaudio } 567235013ebSclaudio 568471dbed6Svisa #define NOTE_TIMER_UNITMASK \ 569471dbed6Svisa (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS) 570471dbed6Svisa 571471dbed6Svisa static int 572471dbed6Svisa filt_timervalidate(int sfflags, int64_t sdata, struct timespec *ts) 573ad228b76Sguenther { 574471dbed6Svisa if (sfflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME)) 575471dbed6Svisa return (EINVAL); 576471dbed6Svisa 577471dbed6Svisa switch (sfflags & NOTE_TIMER_UNITMASK) { 578471dbed6Svisa case NOTE_SECONDS: 579471dbed6Svisa ts->tv_sec = sdata; 580471dbed6Svisa ts->tv_nsec = 0; 581471dbed6Svisa break; 582471dbed6Svisa case NOTE_MSECONDS: 583471dbed6Svisa ts->tv_sec = sdata / 1000; 584471dbed6Svisa ts->tv_nsec = (sdata % 1000) * 1000000; 585471dbed6Svisa break; 586471dbed6Svisa case NOTE_USECONDS: 587471dbed6Svisa ts->tv_sec = sdata / 1000000; 588471dbed6Svisa ts->tv_nsec = (sdata % 1000000) * 1000; 589471dbed6Svisa break; 590471dbed6Svisa case NOTE_NSECONDS: 591471dbed6Svisa ts->tv_sec = sdata / 1000000000; 592471dbed6Svisa ts->tv_nsec = sdata % 1000000000; 593471dbed6Svisa break; 594471dbed6Svisa default: 595471dbed6Svisa return (EINVAL); 596471dbed6Svisa } 597471dbed6Svisa 598471dbed6Svisa return (0); 599471dbed6Svisa } 600471dbed6Svisa 601471dbed6Svisa static void 602471dbed6Svisa filt_timeradd(struct knote *kn, struct timespec *ts) 603471dbed6Svisa { 604471dbed6Svisa struct timespec expiry, now; 605efdedfd7Scheloha struct timeout *to = kn->kn_hook; 606ad228b76Sguenther int tticks; 607ad228b76Sguenther 608471dbed6Svisa if (kn->kn_sfflags & NOTE_ABSTIME) { 609471dbed6Svisa nanotime(&now); 610471dbed6Svisa if (timespeccmp(ts, &now, >)) { 611471dbed6Svisa timespecsub(ts, &now, &expiry); 612471dbed6Svisa /* XXX timeout_abs_ts with CLOCK_REALTIME */ 613471dbed6Svisa timeout_add(to, tstohz(&expiry)); 614471dbed6Svisa } else { 615471dbed6Svisa /* Expire immediately. */ 616471dbed6Svisa filt_timerexpire(kn); 617471dbed6Svisa } 618471dbed6Svisa return; 619471dbed6Svisa } 620471dbed6Svisa 621471dbed6Svisa tticks = tstohz(ts); 622471dbed6Svisa /* Remove extra tick from tstohz() if timeout has fired before. */ 623efdedfd7Scheloha if (timeout_triggered(to)) 624efdedfd7Scheloha tticks--; 625efdedfd7Scheloha timeout_add(to, (tticks > 0) ? tticks : 1); 626ad228b76Sguenther } 627ad228b76Sguenther 62847cb05d5Stedu void 62947cb05d5Stedu filt_timerexpire(void *knx) 63047cb05d5Stedu { 631471dbed6Svisa struct timespec ts; 63247cb05d5Stedu struct knote *kn = knx; 63319ece097Svisa struct kqueue *kq = kn->kn_kq; 63447cb05d5Stedu 63547cb05d5Stedu kn->kn_data++; 63619ece097Svisa mtx_enter(&kq->kq_lock); 637eb8a26a3Svisa knote_activate(kn); 63819ece097Svisa mtx_leave(&kq->kq_lock); 63947cb05d5Stedu 640471dbed6Svisa if ((kn->kn_flags & EV_ONESHOT) == 0 && 641471dbed6Svisa (kn->kn_sfflags & NOTE_ABSTIME) == 0) { 642471dbed6Svisa (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 643471dbed6Svisa filt_timeradd(kn, &ts); 644471dbed6Svisa } 64547cb05d5Stedu } 64647cb05d5Stedu 64747cb05d5Stedu /* 648471dbed6Svisa * data contains amount of time to sleep 64947cb05d5Stedu */ 65047cb05d5Stedu int 65147cb05d5Stedu filt_timerattach(struct knote *kn) 65247cb05d5Stedu { 653471dbed6Svisa struct timespec ts; 65447cb05d5Stedu struct timeout *to; 655471dbed6Svisa int error; 656471dbed6Svisa 657471dbed6Svisa error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); 658471dbed6Svisa if (error != 0) 659471dbed6Svisa return (error); 66047cb05d5Stedu 661b36172baStedu if (kq_ntimeouts > kq_timeoutmax) 66247cb05d5Stedu return (ENOMEM); 663b36172baStedu kq_ntimeouts++; 66447cb05d5Stedu 665471dbed6Svisa if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) 66647cb05d5Stedu kn->kn_flags |= EV_CLEAR; /* automatically set */ 667a8f078fdSchl to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 66847cb05d5Stedu timeout_set(to, filt_timerexpire, kn); 66947cb05d5Stedu kn->kn_hook = to; 670471dbed6Svisa filt_timeradd(kn, &ts); 67147cb05d5Stedu 67247cb05d5Stedu return (0); 67347cb05d5Stedu } 67447cb05d5Stedu 67547cb05d5Stedu void 67647cb05d5Stedu filt_timerdetach(struct knote *kn) 67747cb05d5Stedu { 67847cb05d5Stedu struct timeout *to; 67947cb05d5Stedu 68047cb05d5Stedu to = (struct timeout *)kn->kn_hook; 681a23d840bSvisa timeout_del_barrier(to); 6825ff140d2Sderaadt free(to, M_KEVENT, sizeof(*to)); 683b36172baStedu kq_ntimeouts--; 68447cb05d5Stedu } 68547cb05d5Stedu 68647cb05d5Stedu int 687a23d840bSvisa filt_timermodify(struct kevent *kev, struct knote *kn) 68847cb05d5Stedu { 689471dbed6Svisa struct timespec ts; 690baa47be0Svisa struct kqueue *kq = kn->kn_kq; 691a23d840bSvisa struct timeout *to = kn->kn_hook; 692471dbed6Svisa int error; 693471dbed6Svisa 694471dbed6Svisa error = filt_timervalidate(kev->fflags, kev->data, &ts); 695471dbed6Svisa if (error != 0) { 696471dbed6Svisa kev->flags |= EV_ERROR; 697471dbed6Svisa kev->data = error; 698471dbed6Svisa return (0); 699471dbed6Svisa } 700a23d840bSvisa 701a23d840bSvisa /* Reset the timer. Any pending events are discarded. */ 702a23d840bSvisa 703a23d840bSvisa timeout_del_barrier(to); 704a23d840bSvisa 705baa47be0Svisa mtx_enter(&kq->kq_lock); 706a23d840bSvisa if (kn->kn_status & KN_QUEUED) 707a23d840bSvisa knote_dequeue(kn); 708a23d840bSvisa kn->kn_status &= ~KN_ACTIVE; 709baa47be0Svisa mtx_leave(&kq->kq_lock); 710a23d840bSvisa 711a23d840bSvisa kn->kn_data = 0; 712a3a2b40eSvisa knote_assign(kev, kn); 713a23d840bSvisa /* Reinit timeout to invoke tick adjustment again. */ 714a23d840bSvisa timeout_set(to, filt_timerexpire, kn); 715471dbed6Svisa filt_timeradd(kn, &ts); 716a23d840bSvisa 717a23d840bSvisa return (0); 718a23d840bSvisa } 719a23d840bSvisa 720a23d840bSvisa int 721a23d840bSvisa filt_timerprocess(struct knote *kn, struct kevent *kev) 722a23d840bSvisa { 723a23d840bSvisa int active, s; 724a23d840bSvisa 725a23d840bSvisa s = splsoftclock(); 726a23d840bSvisa active = (kn->kn_data != 0); 727a23d840bSvisa if (active) 728a23d840bSvisa knote_submit(kn, kev); 729a23d840bSvisa splx(s); 730a23d840bSvisa 731a23d840bSvisa return (active); 73247cb05d5Stedu } 73347cb05d5Stedu 73447cb05d5Stedu 73530fec190Snate /* 73630fec190Snate * filt_seltrue: 73730fec190Snate * 73830fec190Snate * This filter "event" routine simulates seltrue(). 73930fec190Snate */ 74030fec190Snate int 74130fec190Snate filt_seltrue(struct knote *kn, long hint) 74230fec190Snate { 74330fec190Snate 74430fec190Snate /* 74530fec190Snate * We don't know how much data can be read/written, 74630fec190Snate * but we know that it *can* be. This is about as 74730fec190Snate * good as select/poll does as well. 74830fec190Snate */ 74930fec190Snate kn->kn_data = 0; 75030fec190Snate return (1); 75130fec190Snate } 75230fec190Snate 7536d57c564Svisa int 7546d57c564Svisa filt_seltruemodify(struct kevent *kev, struct knote *kn) 7556d57c564Svisa { 756a3a2b40eSvisa knote_assign(kev, kn); 75769e8f641Svisa return (kn->kn_fop->f_event(kn, 0)); 7586d57c564Svisa } 7596d57c564Svisa 7606d57c564Svisa int 7616d57c564Svisa filt_seltrueprocess(struct knote *kn, struct kevent *kev) 7626d57c564Svisa { 76369e8f641Svisa int active; 76469e8f641Svisa 76569e8f641Svisa active = kn->kn_fop->f_event(kn, 0); 76669e8f641Svisa if (active) 7676d57c564Svisa knote_submit(kn, kev); 76869e8f641Svisa return (active); 7696d57c564Svisa } 7706d57c564Svisa 771a56f3bb5Snicm /* 772a56f3bb5Snicm * This provides full kqfilter entry for device switch tables, which 773a56f3bb5Snicm * has same effect as filter using filt_seltrue() as filter method. 774a56f3bb5Snicm */ 775a56f3bb5Snicm void 776a56f3bb5Snicm filt_seltruedetach(struct knote *kn) 777a56f3bb5Snicm { 778a56f3bb5Snicm /* Nothing to do */ 779a56f3bb5Snicm } 780a56f3bb5Snicm 78194321eb4Svisa const struct filterops seltrue_filtops = { 7826d57c564Svisa .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 78394321eb4Svisa .f_attach = NULL, 78494321eb4Svisa .f_detach = filt_seltruedetach, 78594321eb4Svisa .f_event = filt_seltrue, 7866d57c564Svisa .f_modify = filt_seltruemodify, 7876d57c564Svisa .f_process = filt_seltrueprocess, 78894321eb4Svisa }; 789a56f3bb5Snicm 790a56f3bb5Snicm int 791a56f3bb5Snicm seltrue_kqfilter(dev_t dev, struct knote *kn) 792a56f3bb5Snicm { 793a56f3bb5Snicm switch (kn->kn_filter) { 794a56f3bb5Snicm case EVFILT_READ: 795a56f3bb5Snicm case EVFILT_WRITE: 796a56f3bb5Snicm kn->kn_fop = &seltrue_filtops; 797a56f3bb5Snicm break; 798a56f3bb5Snicm default: 799a56f3bb5Snicm return (EINVAL); 800a56f3bb5Snicm } 801a56f3bb5Snicm 802a56f3bb5Snicm /* Nothing more to do */ 803a56f3bb5Snicm return (0); 804a56f3bb5Snicm } 805a56f3bb5Snicm 80694dbc69aSvisa static int 80794dbc69aSvisa filt_dead(struct knote *kn, long hint) 80894dbc69aSvisa { 809321d7ba1Svisa if (kn->kn_filter == EVFILT_EXCEPT) { 810321d7ba1Svisa /* 811321d7ba1Svisa * Do not deliver event because there is no out-of-band data. 812321d7ba1Svisa * However, let HUP condition pass for poll(2). 813321d7ba1Svisa */ 814321d7ba1Svisa if ((kn->kn_flags & __EV_POLL) == 0) { 815321d7ba1Svisa kn->kn_flags |= EV_DISABLE; 816321d7ba1Svisa return (0); 817321d7ba1Svisa } 818321d7ba1Svisa } 819321d7ba1Svisa 82094dbc69aSvisa kn->kn_flags |= (EV_EOF | EV_ONESHOT); 821cd731760Smpi if (kn->kn_flags & __EV_POLL) 822cd731760Smpi kn->kn_flags |= __EV_HUP; 82394dbc69aSvisa kn->kn_data = 0; 82494dbc69aSvisa return (1); 82594dbc69aSvisa } 82694dbc69aSvisa 82794dbc69aSvisa static void 82894dbc69aSvisa filt_deaddetach(struct knote *kn) 82994dbc69aSvisa { 83094dbc69aSvisa /* Nothing to do */ 83194dbc69aSvisa } 83294dbc69aSvisa 833cd731760Smpi const struct filterops dead_filtops = { 8346d57c564Svisa .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 83594dbc69aSvisa .f_attach = NULL, 83694dbc69aSvisa .f_detach = filt_deaddetach, 83794dbc69aSvisa .f_event = filt_dead, 8386d57c564Svisa .f_modify = filt_seltruemodify, 8396d57c564Svisa .f_process = filt_seltrueprocess, 84094dbc69aSvisa }; 84194dbc69aSvisa 84260229baaSvisa static int 84360229baaSvisa filt_badfd(struct knote *kn, long hint) 84460229baaSvisa { 84560229baaSvisa kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 84660229baaSvisa kn->kn_data = EBADF; 84760229baaSvisa return (1); 84860229baaSvisa } 84960229baaSvisa 85060229baaSvisa /* For use with kqpoll. */ 85160229baaSvisa const struct filterops badfd_filtops = { 8526d57c564Svisa .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 85360229baaSvisa .f_attach = NULL, 85460229baaSvisa .f_detach = filt_deaddetach, 85560229baaSvisa .f_event = filt_badfd, 8566d57c564Svisa .f_modify = filt_seltruemodify, 8576d57c564Svisa .f_process = filt_seltrueprocess, 85860229baaSvisa }; 85960229baaSvisa 8606d57c564Svisa static int 8616d57c564Svisa filter_attach(struct knote *kn) 8626d57c564Svisa { 8636d57c564Svisa int error; 8646d57c564Svisa 8656d57c564Svisa if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 8666d57c564Svisa error = kn->kn_fop->f_attach(kn); 8676d57c564Svisa } else { 8686d57c564Svisa KERNEL_LOCK(); 8696d57c564Svisa error = kn->kn_fop->f_attach(kn); 8706d57c564Svisa KERNEL_UNLOCK(); 8716d57c564Svisa } 8726d57c564Svisa return (error); 8736d57c564Svisa } 8746d57c564Svisa 8756d57c564Svisa static void 8766d57c564Svisa filter_detach(struct knote *kn) 8776d57c564Svisa { 8786d57c564Svisa if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 8796d57c564Svisa kn->kn_fop->f_detach(kn); 8806d57c564Svisa } else { 8816d57c564Svisa KERNEL_LOCK(); 8826d57c564Svisa kn->kn_fop->f_detach(kn); 8836d57c564Svisa KERNEL_UNLOCK(); 8846d57c564Svisa } 8856d57c564Svisa } 8866d57c564Svisa 8876d57c564Svisa static int 8886d57c564Svisa filter_event(struct knote *kn, long hint) 8896d57c564Svisa { 8906d57c564Svisa if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 8916d57c564Svisa KERNEL_ASSERT_LOCKED(); 8926d57c564Svisa 8936d57c564Svisa return (kn->kn_fop->f_event(kn, hint)); 8946d57c564Svisa } 8956d57c564Svisa 8966d57c564Svisa static int 8976d57c564Svisa filter_modify(struct kevent *kev, struct knote *kn) 8986d57c564Svisa { 8996d57c564Svisa int active, s; 9006d57c564Svisa 9016d57c564Svisa if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 9026d57c564Svisa active = kn->kn_fop->f_modify(kev, kn); 9036d57c564Svisa } else { 9046d57c564Svisa KERNEL_LOCK(); 9056d57c564Svisa if (kn->kn_fop->f_modify != NULL) { 9066d57c564Svisa active = kn->kn_fop->f_modify(kev, kn); 9076d57c564Svisa } else { 9086d57c564Svisa s = splhigh(); 909f093adccSvisa active = knote_modify(kev, kn); 9106d57c564Svisa splx(s); 9116d57c564Svisa } 9126d57c564Svisa KERNEL_UNLOCK(); 9136d57c564Svisa } 9146d57c564Svisa return (active); 9156d57c564Svisa } 9166d57c564Svisa 9176d57c564Svisa static int 9186d57c564Svisa filter_process(struct knote *kn, struct kevent *kev) 9196d57c564Svisa { 9206d57c564Svisa int active, s; 9216d57c564Svisa 9226d57c564Svisa if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 9236d57c564Svisa active = kn->kn_fop->f_process(kn, kev); 9246d57c564Svisa } else { 9256d57c564Svisa KERNEL_LOCK(); 9266d57c564Svisa if (kn->kn_fop->f_process != NULL) { 9276d57c564Svisa active = kn->kn_fop->f_process(kn, kev); 9286d57c564Svisa } else { 9296d57c564Svisa s = splhigh(); 930f093adccSvisa active = knote_process(kn, kev); 9316d57c564Svisa splx(s); 9326d57c564Svisa } 9336d57c564Svisa KERNEL_UNLOCK(); 9346d57c564Svisa } 9356d57c564Svisa return (active); 9366d57c564Svisa } 9376d57c564Svisa 938cbf33da8Svisa /* 939cbf33da8Svisa * Initialize the current thread for poll/select system call. 940cbf33da8Svisa * num indicates the number of serials that the system call may utilize. 941cbf33da8Svisa * After this function, the valid range of serials is 942cbf33da8Svisa * p_kq_serial <= x < p_kq_serial + num. 943cbf33da8Svisa */ 94439f6f778Smpi void 945cbf33da8Svisa kqpoll_init(unsigned int num) 94639f6f778Smpi { 94739f6f778Smpi struct proc *p = curproc; 9480a7f7778Svisa struct filedesc *fdp; 94939f6f778Smpi 950cbf33da8Svisa if (p->p_kq == NULL) { 95139f6f778Smpi p->p_kq = kqueue_alloc(p->p_fd); 95239f6f778Smpi p->p_kq_serial = arc4random(); 9530a7f7778Svisa fdp = p->p_fd; 9540a7f7778Svisa fdplock(fdp); 9550a7f7778Svisa LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 9560a7f7778Svisa fdpunlock(fdp); 95739f6f778Smpi } 95839f6f778Smpi 959cbf33da8Svisa if (p->p_kq_serial + num < p->p_kq_serial) { 960cbf33da8Svisa /* Serial is about to wrap. Clear all attached knotes. */ 961cbf33da8Svisa kqueue_purge(p, p->p_kq); 962cbf33da8Svisa p->p_kq_serial = 0; 963cbf33da8Svisa } 964cbf33da8Svisa } 965cbf33da8Svisa 966cbf33da8Svisa /* 967cbf33da8Svisa * Finish poll/select system call. 968cbf33da8Svisa * num must have the same value that was used with kqpoll_init(). 969cbf33da8Svisa */ 970cbf33da8Svisa void 971cbf33da8Svisa kqpoll_done(unsigned int num) 972cbf33da8Svisa { 973cbf33da8Svisa struct proc *p = curproc; 974c5e59ae3Svisa struct kqueue *kq = p->p_kq; 975cbf33da8Svisa 976cbf33da8Svisa KASSERT(p->p_kq != NULL); 977cbf33da8Svisa KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 978cbf33da8Svisa 979cbf33da8Svisa p->p_kq_serial += num; 980c5e59ae3Svisa 981c5e59ae3Svisa /* 982c5e59ae3Svisa * Because of kn_pollid key, a thread can in principle allocate 983c5e59ae3Svisa * up to O(maxfiles^2) knotes by calling poll(2) repeatedly 984c5e59ae3Svisa * with suitably varying pollfd arrays. 985c5e59ae3Svisa * Prevent such a large allocation by clearing knotes eagerly 986c5e59ae3Svisa * if there are too many of them. 987c5e59ae3Svisa * 988c5e59ae3Svisa * A small multiple of kq_knlistsize should give enough margin 989c5e59ae3Svisa * that eager clearing is infrequent, or does not happen at all, 990c5e59ae3Svisa * with normal programs. 991c5e59ae3Svisa * A single pollfd entry can use up to three knotes. 992c5e59ae3Svisa * Typically there is no significant overlap of fd and events 993c5e59ae3Svisa * between different entries in the pollfd array. 994c5e59ae3Svisa */ 995c5e59ae3Svisa if (kq->kq_nknotes > 4 * kq->kq_knlistsize) 996c5e59ae3Svisa kqueue_purge(p, kq); 997cbf33da8Svisa } 998cbf33da8Svisa 99939f6f778Smpi void 100039f6f778Smpi kqpoll_exit(void) 100139f6f778Smpi { 100239f6f778Smpi struct proc *p = curproc; 100339f6f778Smpi 100439f6f778Smpi if (p->p_kq == NULL) 100539f6f778Smpi return; 100639f6f778Smpi 100760229baaSvisa kqueue_purge(p, p->p_kq); 100839f6f778Smpi kqueue_terminate(p, p->p_kq); 100950b77637Svisa KASSERT(p->p_kq->kq_refcnt.r_refs == 1); 10106a01a8aeSvisa KQRELE(p->p_kq); 101139f6f778Smpi p->p_kq = NULL; 101239f6f778Smpi } 101339f6f778Smpi 1014d9144382Smpi struct kqueue * 1015d9144382Smpi kqueue_alloc(struct filedesc *fdp) 1016d9144382Smpi { 1017d9144382Smpi struct kqueue *kq; 1018d9144382Smpi 1019d9144382Smpi kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 102050b77637Svisa refcnt_init(&kq->kq_refcnt); 1021d9144382Smpi kq->kq_fdp = fdp; 1022d9144382Smpi TAILQ_INIT(&kq->kq_head); 102319ece097Svisa mtx_init(&kq->kq_lock, IPL_HIGH); 1024d9144382Smpi task_set(&kq->kq_task, kqueue_task, kq); 1025a820167aSvisa klist_init_mutex(&kq->kq_klist, &kqueue_klist_lock); 1026d9144382Smpi 1027d9144382Smpi return (kq); 1028d9144382Smpi } 1029d9144382Smpi 10301a12e8a7Sprovos int 1031eaac6367Svisa dokqueue(struct proc *p, int flags, register_t *retval) 10321a12e8a7Sprovos { 10331a12e8a7Sprovos struct filedesc *fdp = p->p_fd; 10341a12e8a7Sprovos struct kqueue *kq; 10351a12e8a7Sprovos struct file *fp; 1036eaac6367Svisa int cloexec, error, fd; 1037eaac6367Svisa 1038eaac6367Svisa cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 10391a12e8a7Sprovos 1040d9144382Smpi kq = kqueue_alloc(fdp); 10413eb13f99Svisa 1042638f0017Sguenther fdplock(fdp); 1043cd6537e7Smpi error = falloc(p, &fp, &fd); 10441a12e8a7Sprovos if (error) 1045cd6537e7Smpi goto out; 1046eaac6367Svisa fp->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 10471a12e8a7Sprovos fp->f_type = DTYPE_KQUEUE; 10481a12e8a7Sprovos fp->f_ops = &kqueueops; 1049cc7eebb0Sguenther fp->f_data = kq; 10501a12e8a7Sprovos *retval = fd; 1051e6dd1245Svisa LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 10523eb13f99Svisa kq = NULL; 1053eaac6367Svisa fdinsert(fdp, fd, cloexec, fp); 1054cd6537e7Smpi FRELE(fp, p); 1055cd6537e7Smpi out: 1056cd6537e7Smpi fdpunlock(fdp); 10573eb13f99Svisa if (kq != NULL) 10583eb13f99Svisa pool_put(&kqueue_pool, kq); 1059cd6537e7Smpi return (error); 10601a12e8a7Sprovos } 10611a12e8a7Sprovos 10621a12e8a7Sprovos int 1063eaac6367Svisa sys_kqueue(struct proc *p, void *v, register_t *retval) 1064eaac6367Svisa { 1065eaac6367Svisa return (dokqueue(p, 0, retval)); 1066eaac6367Svisa } 1067eaac6367Svisa 1068eaac6367Svisa int 1069eaac6367Svisa sys_kqueue1(struct proc *p, void *v, register_t *retval) 1070eaac6367Svisa { 1071eaac6367Svisa struct sys_kqueue1_args /* { 1072eaac6367Svisa syscallarg(int) flags; 1073eaac6367Svisa } */ *uap = v; 1074eaac6367Svisa 1075eaac6367Svisa if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 1076eaac6367Svisa return (EINVAL); 1077eaac6367Svisa return (dokqueue(p, SCARG(uap, flags), retval)); 1078eaac6367Svisa } 1079eaac6367Svisa 1080eaac6367Svisa int 10811a12e8a7Sprovos sys_kevent(struct proc *p, void *v, register_t *retval) 10821a12e8a7Sprovos { 108306881677Smpi struct kqueue_scan_state scan; 10841a12e8a7Sprovos struct filedesc* fdp = p->p_fd; 10851a12e8a7Sprovos struct sys_kevent_args /* { 10861a12e8a7Sprovos syscallarg(int) fd; 10871a12e8a7Sprovos syscallarg(const struct kevent *) changelist; 10881a12e8a7Sprovos syscallarg(int) nchanges; 10891a12e8a7Sprovos syscallarg(struct kevent *) eventlist; 10901a12e8a7Sprovos syscallarg(int) nevents; 10911a12e8a7Sprovos syscallarg(const struct timespec *) timeout; 10921a12e8a7Sprovos } */ *uap = v; 10931a12e8a7Sprovos struct kevent *kevp; 10941a12e8a7Sprovos struct kqueue *kq; 109503e6dd16Sart struct file *fp; 10961a12e8a7Sprovos struct timespec ts; 109774fc51dfScheloha struct timespec *tsp = NULL; 10981a12e8a7Sprovos int i, n, nerrors, error; 109918e888c6Smpi int ready, total; 1100d7b620f3Smpi struct kevent kev[KQ_NEVENTS]; 11011a12e8a7Sprovos 1102dba13a1fSmpi if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 11031a12e8a7Sprovos return (EBADF); 1104cc90df54Sprovos 1105dba13a1fSmpi if (fp->f_type != DTYPE_KQUEUE) { 1106dba13a1fSmpi error = EBADF; 1107dba13a1fSmpi goto done; 1108dba13a1fSmpi } 1109dba13a1fSmpi 11101a12e8a7Sprovos if (SCARG(uap, timeout) != NULL) { 11111a12e8a7Sprovos error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 11121a12e8a7Sprovos if (error) 1113cc90df54Sprovos goto done; 11149ece112bSguenther #ifdef KTRACE 11159ece112bSguenther if (KTRPOINT(p, KTR_STRUCT)) 11169ece112bSguenther ktrreltimespec(p, &ts); 11179ece112bSguenther #endif 1118e5476f23Scheloha if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 1119e5476f23Scheloha error = EINVAL; 1120e5476f23Scheloha goto done; 1121e5476f23Scheloha } 112274fc51dfScheloha tsp = &ts; 11231a12e8a7Sprovos } 11241a12e8a7Sprovos 1125916645f0Stedu kq = fp->f_data; 11261a12e8a7Sprovos nerrors = 0; 11271a12e8a7Sprovos 112818e888c6Smpi while ((n = SCARG(uap, nchanges)) > 0) { 112918e888c6Smpi if (n > nitems(kev)) 113018e888c6Smpi n = nitems(kev); 1131d7b620f3Smpi error = copyin(SCARG(uap, changelist), kev, 11321a12e8a7Sprovos n * sizeof(struct kevent)); 11331a12e8a7Sprovos if (error) 1134cc90df54Sprovos goto done; 113518fc65f3Stedu #ifdef KTRACE 113618fc65f3Stedu if (KTRPOINT(p, KTR_STRUCT)) 1137d7b620f3Smpi ktrevent(p, kev, n); 113818fc65f3Stedu #endif 11391a12e8a7Sprovos for (i = 0; i < n; i++) { 1140d7b620f3Smpi kevp = &kev[i]; 11411a12e8a7Sprovos kevp->flags &= ~EV_SYSFLAGS; 1142c5e59ae3Svisa error = kqueue_register(kq, kevp, 0, p); 114383c05d2aSmikeb if (error || (kevp->flags & EV_RECEIPT)) { 11441a12e8a7Sprovos if (SCARG(uap, nevents) != 0) { 11451a12e8a7Sprovos kevp->flags = EV_ERROR; 11461a12e8a7Sprovos kevp->data = error; 1147cc7eebb0Sguenther copyout(kevp, SCARG(uap, eventlist), 11481a12e8a7Sprovos sizeof(*kevp)); 11491a12e8a7Sprovos SCARG(uap, eventlist)++; 11501a12e8a7Sprovos SCARG(uap, nevents)--; 11511a12e8a7Sprovos nerrors++; 11521a12e8a7Sprovos } else { 1153cc90df54Sprovos goto done; 11541a12e8a7Sprovos } 11551a12e8a7Sprovos } 11561a12e8a7Sprovos } 11571a12e8a7Sprovos SCARG(uap, nchanges) -= n; 11581a12e8a7Sprovos SCARG(uap, changelist) += n; 11591a12e8a7Sprovos } 11601a12e8a7Sprovos if (nerrors) { 11611a12e8a7Sprovos *retval = nerrors; 1162cc90df54Sprovos error = 0; 1163cc90df54Sprovos goto done; 11641a12e8a7Sprovos } 11651a12e8a7Sprovos 116606881677Smpi kqueue_scan_setup(&scan, kq); 116706a89b59Sguenther FRELE(fp, p); 116818e888c6Smpi /* 116918e888c6Smpi * Collect as many events as we can. The timeout on successive 117018e888c6Smpi * loops is disabled (kqueue_scan() becomes non-blocking). 117118e888c6Smpi */ 117218e888c6Smpi total = 0; 117318e888c6Smpi error = 0; 117418e888c6Smpi while ((n = SCARG(uap, nevents) - total) > 0) { 117518e888c6Smpi if (n > nitems(kev)) 117618e888c6Smpi n = nitems(kev); 117718e888c6Smpi ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 117818e888c6Smpi if (ready == 0) 117918e888c6Smpi break; 118018e888c6Smpi error = copyout(kev, SCARG(uap, eventlist) + total, 118118e888c6Smpi sizeof(struct kevent) * ready); 118218e888c6Smpi #ifdef KTRACE 118318e888c6Smpi if (KTRPOINT(p, KTR_STRUCT)) 118418e888c6Smpi ktrevent(p, kev, ready); 118518e888c6Smpi #endif 118618e888c6Smpi total += ready; 118718e888c6Smpi if (error || ready < n) 118818e888c6Smpi break; 118918e888c6Smpi } 119006881677Smpi kqueue_scan_finish(&scan); 119118e888c6Smpi *retval = total; 119289ce1a60Sderaadt return (error); 119389ce1a60Sderaadt 1194cc90df54Sprovos done: 119506a89b59Sguenther FRELE(fp, p); 11961a12e8a7Sprovos return (error); 11971a12e8a7Sprovos } 11981a12e8a7Sprovos 1199696db594Svisa #ifdef KQUEUE_DEBUG 1200696db594Svisa void 1201696db594Svisa kqueue_do_check(struct kqueue *kq, const char *func, int line) 1202696db594Svisa { 1203696db594Svisa struct knote *kn; 1204696db594Svisa int count = 0, nmarker = 0; 1205696db594Svisa 120619ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1207696db594Svisa 1208696db594Svisa TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 1209696db594Svisa if (kn->kn_filter == EVFILT_MARKER) { 1210696db594Svisa if ((kn->kn_status & KN_QUEUED) != 0) 1211696db594Svisa panic("%s:%d: kq=%p kn=%p marker QUEUED", 1212696db594Svisa func, line, kq, kn); 1213696db594Svisa nmarker++; 1214696db594Svisa } else { 1215696db594Svisa if ((kn->kn_status & KN_ACTIVE) == 0) 1216696db594Svisa panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1217696db594Svisa func, line, kq, kn); 1218696db594Svisa if ((kn->kn_status & KN_QUEUED) == 0) 1219696db594Svisa panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1220696db594Svisa func, line, kq, kn); 1221696db594Svisa if (kn->kn_kq != kq) 1222696db594Svisa panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1223696db594Svisa func, line, kq, kn, kn->kn_kq); 1224696db594Svisa count++; 1225696db594Svisa if (count > kq->kq_count) 1226696db594Svisa goto bad; 1227696db594Svisa } 1228696db594Svisa } 1229696db594Svisa if (count != kq->kq_count) { 1230696db594Svisa bad: 1231696db594Svisa panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1232696db594Svisa func, line, kq, kq->kq_count, count, nmarker); 1233696db594Svisa } 1234696db594Svisa } 1235696db594Svisa #endif 1236696db594Svisa 12371a12e8a7Sprovos int 1238c5e59ae3Svisa kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid, 1239c5e59ae3Svisa struct proc *p) 12401a12e8a7Sprovos { 12411a12e8a7Sprovos struct filedesc *fdp = kq->kq_fdp; 124294321eb4Svisa const struct filterops *fops = NULL; 12431a12e8a7Sprovos struct file *fp = NULL; 1244f30ff743Svisa struct knote *kn = NULL, *newkn = NULL; 12459c969c9aSvisa struct knlist *list = NULL; 124619ece097Svisa int active, error = 0; 12471a12e8a7Sprovos 1248c5e59ae3Svisa KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq)); 1249c5e59ae3Svisa 12501a12e8a7Sprovos if (kev->filter < 0) { 12511a12e8a7Sprovos if (kev->filter + EVFILT_SYSCOUNT < 0) 12521a12e8a7Sprovos return (EINVAL); 12531a12e8a7Sprovos fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 12541a12e8a7Sprovos } 12551a12e8a7Sprovos 12561a12e8a7Sprovos if (fops == NULL) { 12571a12e8a7Sprovos /* 12581a12e8a7Sprovos * XXX 12592addf348Sjmc * filter attach routine is responsible for ensuring that 12601a12e8a7Sprovos * the identifier can be attached to it. 12611a12e8a7Sprovos */ 12621a12e8a7Sprovos return (EINVAL); 12631a12e8a7Sprovos } 12641a12e8a7Sprovos 1265b8213689Svisa if (fops->f_flags & FILTEROP_ISFD) { 12661a12e8a7Sprovos /* validate descriptor */ 12678e9151d4Stedu if (kev->ident > INT_MAX) 12688e9151d4Stedu return (EBADF); 1269f30ff743Svisa } 12701a12e8a7Sprovos 1271f30ff743Svisa if (kev->flags & EV_ADD) 1272b3424cb6Svisa newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1273f30ff743Svisa 1274f30ff743Svisa again: 1275b8213689Svisa if (fops->f_flags & FILTEROP_ISFD) { 1276f30ff743Svisa if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1277f30ff743Svisa error = EBADF; 1278f30ff743Svisa goto done; 12791a12e8a7Sprovos } 128019ece097Svisa mtx_enter(&kq->kq_lock); 1281f30ff743Svisa if (kev->flags & EV_ADD) 1282f30ff743Svisa kqueue_expand_list(kq, kev->ident); 1283f30ff743Svisa if (kev->ident < kq->kq_knlistsize) 1284f30ff743Svisa list = &kq->kq_knlist[kev->ident]; 12851a12e8a7Sprovos } else { 128619ece097Svisa mtx_enter(&kq->kq_lock); 1287f30ff743Svisa if (kev->flags & EV_ADD) 1288f30ff743Svisa kqueue_expand_hash(kq); 1289316aeb9fSanton if (kq->kq_knhashmask != 0) { 1290316aeb9fSanton list = &kq->kq_knhash[ 1291316aeb9fSanton KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1292f30ff743Svisa } 1293f30ff743Svisa } 1294f30ff743Svisa if (list != NULL) { 1295916645f0Stedu SLIST_FOREACH(kn, list, kn_link) { 1296f30ff743Svisa if (kev->filter == kn->kn_filter && 1297c5e59ae3Svisa kev->ident == kn->kn_id && 1298c5e59ae3Svisa pollid == kn->kn_pollid) { 129988864a09Svisa if (!knote_acquire(kn, NULL, 0)) { 130019ece097Svisa /* knote_acquire() has released 130119ece097Svisa * kq_lock. */ 1302f30ff743Svisa if (fp != NULL) { 1303f30ff743Svisa FRELE(fp, p); 1304f30ff743Svisa fp = NULL; 1305f30ff743Svisa } 1306f30ff743Svisa goto again; 1307f30ff743Svisa } 13081a12e8a7Sprovos break; 13091a12e8a7Sprovos } 13101a12e8a7Sprovos } 1311916645f0Stedu } 1312f30ff743Svisa KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 13131a12e8a7Sprovos 1314cc90df54Sprovos if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 131519ece097Svisa mtx_leave(&kq->kq_lock); 1316cc90df54Sprovos error = ENOENT; 1317cc90df54Sprovos goto done; 1318cc90df54Sprovos } 13191a12e8a7Sprovos 13201a12e8a7Sprovos /* 1321f30ff743Svisa * kn now contains the matching knote, or NULL if no match. 13221a12e8a7Sprovos */ 13231a12e8a7Sprovos if (kev->flags & EV_ADD) { 13241a12e8a7Sprovos if (kn == NULL) { 1325f30ff743Svisa kn = newkn; 1326f30ff743Svisa newkn = NULL; 1327f30ff743Svisa kn->kn_status = KN_PROCESSING; 13281a12e8a7Sprovos kn->kn_fp = fp; 13291a12e8a7Sprovos kn->kn_kq = kq; 13301a12e8a7Sprovos kn->kn_fop = fops; 13311a12e8a7Sprovos 1332cc90df54Sprovos /* 1333cc90df54Sprovos * apply reference count to knote structure, and 1334cc90df54Sprovos * do not release it at the end of this routine. 1335cc90df54Sprovos */ 1336cc90df54Sprovos fp = NULL; 1337cc90df54Sprovos 13381a12e8a7Sprovos kn->kn_sfflags = kev->fflags; 13391a12e8a7Sprovos kn->kn_sdata = kev->data; 13401a12e8a7Sprovos kev->fflags = 0; 13411a12e8a7Sprovos kev->data = 0; 13421a12e8a7Sprovos kn->kn_kevent = *kev; 1343c5e59ae3Svisa kn->kn_pollid = pollid; 13441a12e8a7Sprovos 1345316aeb9fSanton knote_attach(kn); 134619ece097Svisa mtx_leave(&kq->kq_lock); 134719ece097Svisa 13486d57c564Svisa error = filter_attach(kn); 13496d57c564Svisa if (error != 0) { 1350316aeb9fSanton knote_drop(kn, p); 13511a12e8a7Sprovos goto done; 13521a12e8a7Sprovos } 1353210860c7Svisa 1354210860c7Svisa /* 1355210860c7Svisa * If this is a file descriptor filter, check if 1356210860c7Svisa * fd was closed while the knote was being added. 1357210860c7Svisa * knote_fdclose() has missed kn if the function 1358210860c7Svisa * ran before kn appeared in kq_knlist. 1359210860c7Svisa */ 1360b8213689Svisa if ((fops->f_flags & FILTEROP_ISFD) && 1361210860c7Svisa fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1362210860c7Svisa /* 1363210860c7Svisa * Drop the knote silently without error 1364210860c7Svisa * because another thread might already have 1365210860c7Svisa * seen it. This corresponds to the insert 1366210860c7Svisa * happening in full before the close. 1367210860c7Svisa */ 13686d57c564Svisa filter_detach(kn); 1369210860c7Svisa knote_drop(kn, p); 1370210860c7Svisa goto done; 1371210860c7Svisa } 13726d57c564Svisa 13736d57c564Svisa /* Check if there is a pending event. */ 137419ece097Svisa active = filter_process(kn, NULL); 137519ece097Svisa mtx_enter(&kq->kq_lock); 137619ece097Svisa if (active) 13776d57c564Svisa knote_activate(kn); 137813ce2699Svisa } else if (kn->kn_fop == &badfd_filtops) { 137913ce2699Svisa /* 138013ce2699Svisa * Nothing expects this badfd knote any longer. 138113ce2699Svisa * Drop it to make room for the new knote and retry. 138213ce2699Svisa */ 138313ce2699Svisa KASSERT(kq == p->p_kq); 138413ce2699Svisa mtx_leave(&kq->kq_lock); 138513ce2699Svisa filter_detach(kn); 138613ce2699Svisa knote_drop(kn, p); 138713ce2699Svisa 138813ce2699Svisa KASSERT(fp != NULL); 138913ce2699Svisa FRELE(fp, p); 139013ce2699Svisa fp = NULL; 139113ce2699Svisa 139213ce2699Svisa goto again; 13931a12e8a7Sprovos } else { 13941a12e8a7Sprovos /* 13951a12e8a7Sprovos * The user may change some filter values after the 13961a12e8a7Sprovos * initial EV_ADD, but doing so will not reset any 13972addf348Sjmc * filters which have already been triggered. 13981a12e8a7Sprovos */ 139919ece097Svisa mtx_leave(&kq->kq_lock); 140019ece097Svisa active = filter_modify(kev, kn); 140119ece097Svisa mtx_enter(&kq->kq_lock); 140219ece097Svisa if (active) 1403eb8a26a3Svisa knote_activate(kn); 14046d57c564Svisa if (kev->flags & EV_ERROR) { 14056d57c564Svisa error = kev->data; 14066d57c564Svisa goto release; 14076d57c564Svisa } 14086d57c564Svisa } 14091a12e8a7Sprovos } else if (kev->flags & EV_DELETE) { 141019ece097Svisa mtx_leave(&kq->kq_lock); 14116d57c564Svisa filter_detach(kn); 1412316aeb9fSanton knote_drop(kn, p); 14131a12e8a7Sprovos goto done; 14141a12e8a7Sprovos } 14151a12e8a7Sprovos 141619ece097Svisa if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 14171a12e8a7Sprovos kn->kn_status |= KN_DISABLED; 14181a12e8a7Sprovos 14191a12e8a7Sprovos if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 14201a12e8a7Sprovos kn->kn_status &= ~KN_DISABLED; 142119ece097Svisa mtx_leave(&kq->kq_lock); 14226d57c564Svisa /* Check if there is a pending event. */ 142319ece097Svisa active = filter_process(kn, NULL); 142419ece097Svisa mtx_enter(&kq->kq_lock); 142519ece097Svisa if (active) 14266d57c564Svisa knote_activate(kn); 14271a12e8a7Sprovos } 14281a12e8a7Sprovos 14296d57c564Svisa release: 1430f30ff743Svisa knote_release(kn); 143119ece097Svisa mtx_leave(&kq->kq_lock); 14321a12e8a7Sprovos done: 1433cc90df54Sprovos if (fp != NULL) 1434f27fd256Sguenther FRELE(fp, p); 1435f30ff743Svisa if (newkn != NULL) 1436f30ff743Svisa pool_put(&knote_pool, newkn); 14371a12e8a7Sprovos return (error); 14381a12e8a7Sprovos } 14391a12e8a7Sprovos 14401a12e8a7Sprovos int 14412699785bSmpi kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 14422699785bSmpi { 14432699785bSmpi struct timespec elapsed, start, stop; 14442699785bSmpi uint64_t nsecs; 14452699785bSmpi int error; 14462699785bSmpi 144719ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 14482699785bSmpi 14492699785bSmpi if (tsp != NULL) { 14502699785bSmpi getnanouptime(&start); 14512699785bSmpi nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 14522699785bSmpi } else 14532699785bSmpi nsecs = INFSLP; 145419ece097Svisa error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 145519ece097Svisa "kqread", nsecs); 14562699785bSmpi if (tsp != NULL) { 14572699785bSmpi getnanouptime(&stop); 14582699785bSmpi timespecsub(&stop, &start, &elapsed); 14592699785bSmpi timespecsub(tsp, &elapsed, tsp); 14602699785bSmpi if (tsp->tv_sec < 0) 14612699785bSmpi timespecclear(tsp); 14622699785bSmpi } 14632699785bSmpi 14642699785bSmpi return (error); 14652699785bSmpi } 14662699785bSmpi 146718e888c6Smpi /* 146818e888c6Smpi * Scan the kqueue, blocking if necessary until the target time is reached. 146918e888c6Smpi * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 147018e888c6Smpi * 0 we do not block at all. 147118e888c6Smpi */ 14722699785bSmpi int 147306881677Smpi kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 147418e888c6Smpi struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 14751a12e8a7Sprovos { 147606881677Smpi struct kqueue *kq = scan->kqs_kq; 147706881677Smpi struct knote *kn; 147819ece097Svisa int error = 0, nkev = 0; 1479a26b930eSvisa int reinserted; 148022f7ba00Svisa 14816d5c3438Svisa if (maxevents == 0) 14821a12e8a7Sprovos goto done; 14831a12e8a7Sprovos retry: 148422f7ba00Svisa KASSERT(nkev == 0); 148522f7ba00Svisa 14867b6b6b48Svisa error = 0; 1487a26b930eSvisa reinserted = 0; 14887b6b6b48Svisa 148919ece097Svisa mtx_enter(&kq->kq_lock); 149019ece097Svisa 149189ce1a60Sderaadt if (kq->kq_state & KQ_DYING) { 149219ece097Svisa mtx_leave(&kq->kq_lock); 149389ce1a60Sderaadt error = EBADF; 149489ce1a60Sderaadt goto done; 149589ce1a60Sderaadt } 149689ce1a60Sderaadt 14971a12e8a7Sprovos if (kq->kq_count == 0) { 149818e888c6Smpi /* 149918e888c6Smpi * Successive loops are only necessary if there are more 150018e888c6Smpi * ready events to gather, so they don't need to block. 150118e888c6Smpi */ 1502a270f568Smpi if ((tsp != NULL && !timespecisset(tsp)) || 1503a270f568Smpi scan->kqs_nevent != 0) { 150419ece097Svisa mtx_leave(&kq->kq_lock); 150574fc51dfScheloha error = 0; 150674fc51dfScheloha goto done; 150774fc51dfScheloha } 15081a12e8a7Sprovos kq->kq_state |= KQ_SLEEP; 15092699785bSmpi error = kqueue_sleep(kq, tsp); 151019ece097Svisa /* kqueue_sleep() has released kq_lock. */ 151174fc51dfScheloha if (error == 0 || error == EWOULDBLOCK) 15121a12e8a7Sprovos goto retry; 15131a12e8a7Sprovos /* don't restart after signals... */ 15141a12e8a7Sprovos if (error == ERESTART) 15151a12e8a7Sprovos error = EINTR; 15161a12e8a7Sprovos goto done; 15171a12e8a7Sprovos } 15181a12e8a7Sprovos 1519a270f568Smpi /* 1520a270f568Smpi * Put the end marker in the queue to limit the scan to the events 1521a270f568Smpi * that are currently active. This prevents events from being 1522a270f568Smpi * recollected if they reactivate during scan. 1523a270f568Smpi * 1524a270f568Smpi * If a partial scan has been performed already but no events have 1525a270f568Smpi * been collected, reposition the end marker to make any new events 1526a270f568Smpi * reachable. 1527a270f568Smpi */ 1528a270f568Smpi if (!scan->kqs_queued) { 152906881677Smpi TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1530a270f568Smpi scan->kqs_queued = 1; 1531a270f568Smpi } else if (scan->kqs_nevent == 0) { 1532a270f568Smpi TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1533a270f568Smpi TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1534a270f568Smpi } 1535a270f568Smpi 153606881677Smpi TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 15376d5c3438Svisa while (nkev < maxevents) { 153806881677Smpi kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1539696db594Svisa if (kn->kn_filter == EVFILT_MARKER) { 154018e888c6Smpi if (kn == &scan->kqs_end) 154118e888c6Smpi break; 1542b0b84d75Smpi 1543696db594Svisa /* Move start marker past another thread's marker. */ 154406881677Smpi TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 154506881677Smpi TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 154606881677Smpi kn_tqe); 1547696db594Svisa continue; 1548696db594Svisa } 1549696db594Svisa 155019ece097Svisa if (!knote_acquire(kn, NULL, 0)) { 155119ece097Svisa /* knote_acquire() has released kq_lock. */ 155219ece097Svisa mtx_enter(&kq->kq_lock); 1553696db594Svisa continue; 155419ece097Svisa } 1555696db594Svisa 1556696db594Svisa kqueue_check(kq); 1557b0b84d75Smpi TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1558696db594Svisa kn->kn_status &= ~KN_QUEUED; 1559b0b84d75Smpi kq->kq_count--; 1560696db594Svisa kqueue_check(kq); 1561b0b84d75Smpi 15621a12e8a7Sprovos if (kn->kn_status & KN_DISABLED) { 1563696db594Svisa knote_release(kn); 15641a12e8a7Sprovos continue; 15651a12e8a7Sprovos } 15666d57c564Svisa 156719ece097Svisa mtx_leave(&kq->kq_lock); 15686d57c564Svisa 1569cbf33da8Svisa /* Drop expired kqpoll knotes. */ 1570cbf33da8Svisa if (p->p_kq == kq && 1571cbf33da8Svisa p->p_kq_serial > (unsigned long)kn->kn_udata) { 1572cbf33da8Svisa filter_detach(kn); 1573cbf33da8Svisa knote_drop(kn, p); 1574cbf33da8Svisa mtx_enter(&kq->kq_lock); 1575cbf33da8Svisa continue; 1576cbf33da8Svisa } 1577cbf33da8Svisa 1578ce1591e9Svisa /* 1579ce1591e9Svisa * Invalidate knotes whose vnodes have been revoked. 1580ce1591e9Svisa * This is a workaround; it is tricky to clear existing 1581ce1591e9Svisa * knotes and prevent new ones from being registered 1582ce1591e9Svisa * with the current revocation mechanism. 1583ce1591e9Svisa */ 1584ce1591e9Svisa if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1585ce1591e9Svisa kn->kn_fp != NULL && 1586ce1591e9Svisa kn->kn_fp->f_type == DTYPE_VNODE) { 1587ce1591e9Svisa struct vnode *vp = kn->kn_fp->f_data; 1588ce1591e9Svisa 1589ce1591e9Svisa if (__predict_false(vp->v_op == &dead_vops && 1590ce1591e9Svisa kn->kn_fop != &dead_filtops)) { 1591ce1591e9Svisa filter_detach(kn); 1592ce1591e9Svisa kn->kn_fop = &dead_filtops; 1593ce1591e9Svisa 1594ce1591e9Svisa /* 1595ce1591e9Svisa * Check if the event should be delivered. 1596ce1591e9Svisa * Use f_event directly because this is 1597ce1591e9Svisa * a special situation. 1598ce1591e9Svisa */ 1599ce1591e9Svisa if (kn->kn_fop->f_event(kn, 0) == 0) { 1600ce1591e9Svisa filter_detach(kn); 1601ce1591e9Svisa knote_drop(kn, p); 1602ce1591e9Svisa mtx_enter(&kq->kq_lock); 1603ce1591e9Svisa continue; 1604ce1591e9Svisa } 1605ce1591e9Svisa } 1606ce1591e9Svisa } 1607ce1591e9Svisa 16086d57c564Svisa memset(kevp, 0, sizeof(*kevp)); 16096d57c564Svisa if (filter_process(kn, kevp) == 0) { 161019ece097Svisa mtx_enter(&kq->kq_lock); 1611696db594Svisa if ((kn->kn_status & KN_QUEUED) == 0) 1612696db594Svisa kn->kn_status &= ~KN_ACTIVE; 1613696db594Svisa knote_release(kn); 1614696db594Svisa kqueue_check(kq); 16151a12e8a7Sprovos continue; 16161a12e8a7Sprovos } 1617a270f568Smpi 161818e888c6Smpi /* 161918e888c6Smpi * Post-event action on the note 162018e888c6Smpi */ 16216d57c564Svisa if (kevp->flags & EV_ONESHOT) { 16226d57c564Svisa filter_detach(kn); 1623316aeb9fSanton knote_drop(kn, p); 162419ece097Svisa mtx_enter(&kq->kq_lock); 16256d57c564Svisa } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 162619ece097Svisa mtx_enter(&kq->kq_lock); 16276d57c564Svisa if (kevp->flags & EV_DISPATCH) 162883c05d2aSmikeb kn->kn_status |= KN_DISABLED; 1629696db594Svisa if ((kn->kn_status & KN_QUEUED) == 0) 1630696db594Svisa kn->kn_status &= ~KN_ACTIVE; 1631696db594Svisa knote_release(kn); 16321a12e8a7Sprovos } else { 163319ece097Svisa mtx_enter(&kq->kq_lock); 1634696db594Svisa if ((kn->kn_status & KN_QUEUED) == 0) { 1635696db594Svisa kqueue_check(kq); 1636b0b84d75Smpi kq->kq_count++; 1637696db594Svisa kn->kn_status |= KN_QUEUED; 1638696db594Svisa TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1639a26b930eSvisa /* Wakeup is done after loop. */ 1640a26b930eSvisa reinserted = 1; 16411a12e8a7Sprovos } 1642696db594Svisa knote_release(kn); 1643696db594Svisa } 1644696db594Svisa kqueue_check(kq); 16456d57c564Svisa 16466d57c564Svisa kevp++; 16476d57c564Svisa nkev++; 16486d57c564Svisa scan->kqs_nevent++; 16491a12e8a7Sprovos } 165006881677Smpi TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1651a26b930eSvisa if (reinserted && kq->kq_count != 0) 1652a26b930eSvisa kqueue_wakeup(kq); 165319ece097Svisa mtx_leave(&kq->kq_lock); 165418e888c6Smpi if (scan->kqs_nevent == 0) 165518e888c6Smpi goto retry; 16561a12e8a7Sprovos done: 165718e888c6Smpi *errorp = error; 165818e888c6Smpi return (nkev); 16591a12e8a7Sprovos } 16601a12e8a7Sprovos 166106881677Smpi void 166206881677Smpi kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 166306881677Smpi { 166406881677Smpi memset(scan, 0, sizeof(*scan)); 166506881677Smpi 166606881677Smpi KQREF(kq); 166706881677Smpi scan->kqs_kq = kq; 166806881677Smpi scan->kqs_start.kn_filter = EVFILT_MARKER; 166906881677Smpi scan->kqs_start.kn_status = KN_PROCESSING; 167006881677Smpi scan->kqs_end.kn_filter = EVFILT_MARKER; 167106881677Smpi scan->kqs_end.kn_status = KN_PROCESSING; 167206881677Smpi } 167306881677Smpi 167406881677Smpi void 167506881677Smpi kqueue_scan_finish(struct kqueue_scan_state *scan) 167606881677Smpi { 167706881677Smpi struct kqueue *kq = scan->kqs_kq; 167806881677Smpi 167906881677Smpi KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 168006881677Smpi KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 168106881677Smpi KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 168206881677Smpi KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 168306881677Smpi 1684a270f568Smpi if (scan->kqs_queued) { 1685a270f568Smpi scan->kqs_queued = 0; 168619ece097Svisa mtx_enter(&kq->kq_lock); 1687a270f568Smpi TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 168819ece097Svisa mtx_leave(&kq->kq_lock); 1689a270f568Smpi } 169006881677Smpi KQRELE(kq); 169106881677Smpi } 169206881677Smpi 16931a12e8a7Sprovos /* 16941a12e8a7Sprovos * XXX 16951a12e8a7Sprovos * This could be expanded to call kqueue_scan, if desired. 16961a12e8a7Sprovos */ 16971a12e8a7Sprovos int 16982bd648c0Smpi kqueue_read(struct file *fp, struct uio *uio, int fflags) 16991a12e8a7Sprovos { 17001a12e8a7Sprovos return (ENXIO); 17011a12e8a7Sprovos } 17021a12e8a7Sprovos 17031a12e8a7Sprovos int 17042bd648c0Smpi kqueue_write(struct file *fp, struct uio *uio, int fflags) 17051a12e8a7Sprovos { 17061a12e8a7Sprovos return (ENXIO); 17071a12e8a7Sprovos } 17081a12e8a7Sprovos 17091a12e8a7Sprovos int 17101a12e8a7Sprovos kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 17111a12e8a7Sprovos { 17121a12e8a7Sprovos return (ENOTTY); 17131a12e8a7Sprovos } 17141a12e8a7Sprovos 17151a12e8a7Sprovos int 1716212b1187Sart kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1717212b1187Sart { 1718916645f0Stedu struct kqueue *kq = fp->f_data; 1719212b1187Sart 172091ba896dStedu memset(st, 0, sizeof(*st)); 172119ece097Svisa st->st_size = kq->kq_count; /* unlocked read */ 1722212b1187Sart st->st_blksize = sizeof(struct kevent); 1723212b1187Sart st->st_mode = S_IFIFO; 1724212b1187Sart return (0); 1725212b1187Sart } 1726212b1187Sart 1727d9144382Smpi void 172839f6f778Smpi kqueue_purge(struct proc *p, struct kqueue *kq) 17291a12e8a7Sprovos { 17301a12e8a7Sprovos int i; 17311a12e8a7Sprovos 173219ece097Svisa mtx_enter(&kq->kq_lock); 17334c035d07Svisa for (i = 0; i < kq->kq_knlistsize; i++) 17341789dd4eSvisa knote_remove(p, kq, &kq->kq_knlist, i, 1); 1735316aeb9fSanton if (kq->kq_knhashmask != 0) { 17364c035d07Svisa for (i = 0; i < kq->kq_knhashmask + 1; i++) 17371789dd4eSvisa knote_remove(p, kq, &kq->kq_knhash, i, 1); 17381a12e8a7Sprovos } 173919ece097Svisa mtx_leave(&kq->kq_lock); 174039f6f778Smpi } 174139f6f778Smpi 174239f6f778Smpi void 174339f6f778Smpi kqueue_terminate(struct proc *p, struct kqueue *kq) 174439f6f778Smpi { 1745715db9d6Svisa struct knote *kn; 1746dc399801Svisa int state; 1747715db9d6Svisa 174819ece097Svisa mtx_enter(&kq->kq_lock); 174919ece097Svisa 1750715db9d6Svisa /* 1751715db9d6Svisa * Any remaining entries should be scan markers. 1752715db9d6Svisa * They are removed when the ongoing scans finish. 1753715db9d6Svisa */ 1754715db9d6Svisa KASSERT(kq->kq_count == 0); 1755715db9d6Svisa TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1756715db9d6Svisa KASSERT(kn->kn_filter == EVFILT_MARKER); 175760229baaSvisa 175889ce1a60Sderaadt kq->kq_state |= KQ_DYING; 1759dc399801Svisa state = kq->kq_state; 176089ce1a60Sderaadt kqueue_wakeup(kq); 176119ece097Svisa mtx_leave(&kq->kq_lock); 1762ed576331Svisa 1763dc399801Svisa /* 1764dc399801Svisa * Any knotes that were attached to this kqueue were deleted 1765dc399801Svisa * by knote_fdclose() when this kqueue's file descriptor was closed. 1766dc399801Svisa */ 1767a820167aSvisa KASSERT(klist_empty(&kq->kq_klist)); 1768dc399801Svisa if (state & KQ_TASK) 1769dc399801Svisa taskq_del_barrier(systqmp, &kq->kq_task); 1770d9144382Smpi } 1771d9144382Smpi 1772d9144382Smpi int 1773d9144382Smpi kqueue_close(struct file *fp, struct proc *p) 1774d9144382Smpi { 1775d9144382Smpi struct kqueue *kq = fp->f_data; 1776d9144382Smpi 1777d9144382Smpi fp->f_data = NULL; 1778d9144382Smpi 177919ece097Svisa kqueue_purge(p, kq); 178019ece097Svisa kqueue_terminate(p, kq); 178189ce1a60Sderaadt 178219ece097Svisa KQRELE(kq); 1783fcba5756Svisa 17841a12e8a7Sprovos return (0); 17851a12e8a7Sprovos } 17861a12e8a7Sprovos 1787ed576331Svisa static void 1788ed576331Svisa kqueue_task(void *arg) 1789ed576331Svisa { 1790ed576331Svisa struct kqueue *kq = arg; 1791ed576331Svisa 1792c78098b6Svisa knote(&kq->kq_klist, 0); 1793ed576331Svisa } 1794ed576331Svisa 17951a12e8a7Sprovos void 17961a12e8a7Sprovos kqueue_wakeup(struct kqueue *kq) 17971a12e8a7Sprovos { 179819ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 17991a12e8a7Sprovos 18001a12e8a7Sprovos if (kq->kq_state & KQ_SLEEP) { 18011a12e8a7Sprovos kq->kq_state &= ~KQ_SLEEP; 18021a12e8a7Sprovos wakeup(kq); 18031a12e8a7Sprovos } 1804a820167aSvisa if (!klist_empty(&kq->kq_klist)) { 1805ed576331Svisa /* Defer activation to avoid recursion. */ 1806dc399801Svisa kq->kq_state |= KQ_TASK; 1807dc399801Svisa task_add(systqmp, &kq->kq_task); 1808ed576331Svisa } 18091a12e8a7Sprovos } 18101a12e8a7Sprovos 1811f30ff743Svisa static void 1812f30ff743Svisa kqueue_expand_hash(struct kqueue *kq) 1813f30ff743Svisa { 18149c969c9aSvisa struct knlist *hash; 1815f30ff743Svisa u_long hashmask; 1816f30ff743Svisa 181719ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 181819ece097Svisa 1819f30ff743Svisa if (kq->kq_knhashmask == 0) { 182019ece097Svisa mtx_leave(&kq->kq_lock); 1821eb4d0442Svisa hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 182219ece097Svisa mtx_enter(&kq->kq_lock); 1823f30ff743Svisa if (kq->kq_knhashmask == 0) { 1824f30ff743Svisa kq->kq_knhash = hash; 1825f30ff743Svisa kq->kq_knhashmask = hashmask; 1826f30ff743Svisa } else { 1827f30ff743Svisa /* Another thread has allocated the hash. */ 182819ece097Svisa mtx_leave(&kq->kq_lock); 1829eb4d0442Svisa hashfree(hash, KN_HASHSIZE, M_KEVENT); 183019ece097Svisa mtx_enter(&kq->kq_lock); 1831f30ff743Svisa } 1832f30ff743Svisa } 1833f30ff743Svisa } 1834f30ff743Svisa 1835f30ff743Svisa static void 1836f30ff743Svisa kqueue_expand_list(struct kqueue *kq, int fd) 1837f30ff743Svisa { 183819ece097Svisa struct knlist *list, *olist; 183919ece097Svisa int size, osize; 184019ece097Svisa 184119ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1842f30ff743Svisa 1843f30ff743Svisa if (kq->kq_knlistsize <= fd) { 1844f30ff743Svisa size = kq->kq_knlistsize; 184519ece097Svisa mtx_leave(&kq->kq_lock); 1846f30ff743Svisa while (size <= fd) 1847f30ff743Svisa size += KQEXTENT; 1848eb4d0442Svisa list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 184919ece097Svisa mtx_enter(&kq->kq_lock); 1850f30ff743Svisa if (kq->kq_knlistsize <= fd) { 1851f30ff743Svisa memcpy(list, kq->kq_knlist, 1852f30ff743Svisa kq->kq_knlistsize * sizeof(*list)); 1853f30ff743Svisa memset(&list[kq->kq_knlistsize], 0, 1854f30ff743Svisa (size - kq->kq_knlistsize) * sizeof(*list)); 185519ece097Svisa olist = kq->kq_knlist; 185619ece097Svisa osize = kq->kq_knlistsize; 1857f30ff743Svisa kq->kq_knlist = list; 1858f30ff743Svisa kq->kq_knlistsize = size; 185919ece097Svisa mtx_leave(&kq->kq_lock); 186019ece097Svisa free(olist, M_KEVENT, osize * sizeof(*list)); 186119ece097Svisa mtx_enter(&kq->kq_lock); 1862f30ff743Svisa } else { 1863f30ff743Svisa /* Another thread has expanded the list. */ 186419ece097Svisa mtx_leave(&kq->kq_lock); 1865eb4d0442Svisa free(list, M_KEVENT, size * sizeof(*list)); 186619ece097Svisa mtx_enter(&kq->kq_lock); 1867f30ff743Svisa } 1868f30ff743Svisa } 1869f30ff743Svisa } 1870f30ff743Svisa 18711a12e8a7Sprovos /* 1872696db594Svisa * Acquire a knote, return non-zero on success, 0 on failure. 1873696db594Svisa * 1874696db594Svisa * If we cannot acquire the knote we sleep and return 0. The knote 1875696db594Svisa * may be stale on return in this case and the caller must restart 1876696db594Svisa * whatever loop they are in. 187788864a09Svisa * 187888864a09Svisa * If we are about to sleep and klist is non-NULL, the list is unlocked 187988864a09Svisa * before sleep and remains unlocked on return. 1880696db594Svisa */ 1881696db594Svisa int 188288864a09Svisa knote_acquire(struct knote *kn, struct klist *klist, int ls) 1883696db594Svisa { 188419ece097Svisa struct kqueue *kq = kn->kn_kq; 188519ece097Svisa 188619ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1887696db594Svisa KASSERT(kn->kn_filter != EVFILT_MARKER); 1888696db594Svisa 1889696db594Svisa if (kn->kn_status & KN_PROCESSING) { 1890696db594Svisa kn->kn_status |= KN_WAITING; 189119ece097Svisa if (klist != NULL) { 189219ece097Svisa mtx_leave(&kq->kq_lock); 189388864a09Svisa klist_unlock(klist, ls); 189419ece097Svisa /* XXX Timeout resolves potential loss of wakeup. */ 1895696db594Svisa tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 189619ece097Svisa } else { 189719ece097Svisa msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 189819ece097Svisa SEC_TO_NSEC(1)); 189919ece097Svisa } 1900696db594Svisa /* knote may be stale now */ 1901696db594Svisa return (0); 1902696db594Svisa } 1903696db594Svisa kn->kn_status |= KN_PROCESSING; 1904696db594Svisa return (1); 1905696db594Svisa } 1906696db594Svisa 1907696db594Svisa /* 1908696db594Svisa * Release an acquired knote, clearing KN_PROCESSING. 1909696db594Svisa */ 1910696db594Svisa void 1911696db594Svisa knote_release(struct knote *kn) 1912696db594Svisa { 191319ece097Svisa MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1914696db594Svisa KASSERT(kn->kn_filter != EVFILT_MARKER); 1915696db594Svisa KASSERT(kn->kn_status & KN_PROCESSING); 1916696db594Svisa 1917696db594Svisa if (kn->kn_status & KN_WAITING) { 1918696db594Svisa kn->kn_status &= ~KN_WAITING; 1919696db594Svisa wakeup(kn); 1920696db594Svisa } 1921696db594Svisa kn->kn_status &= ~KN_PROCESSING; 1922696db594Svisa /* kn should not be accessed anymore */ 1923696db594Svisa } 1924696db594Svisa 1925696db594Svisa /* 1926be2adc8dSnicm * activate one knote. 1927be2adc8dSnicm */ 1928be2adc8dSnicm void 1929be2adc8dSnicm knote_activate(struct knote *kn) 1930be2adc8dSnicm { 193119ece097Svisa MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 19328c478636Svisa 1933eb8a26a3Svisa kn->kn_status |= KN_ACTIVE; 1934eb8a26a3Svisa if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1935eb8a26a3Svisa knote_enqueue(kn); 1936be2adc8dSnicm } 1937be2adc8dSnicm 1938be2adc8dSnicm /* 19391a12e8a7Sprovos * walk down a list of knotes, activating them if their event has triggered. 19401a12e8a7Sprovos */ 19411a12e8a7Sprovos void 19421a12e8a7Sprovos knote(struct klist *list, long hint) 19431a12e8a7Sprovos { 1944c78098b6Svisa int ls; 1945c78098b6Svisa 1946c78098b6Svisa ls = klist_lock(list); 1947c78098b6Svisa knote_locked(list, hint); 1948c78098b6Svisa klist_unlock(list, ls); 1949c78098b6Svisa } 1950c78098b6Svisa 1951c78098b6Svisa void 1952c78098b6Svisa knote_locked(struct klist *list, long hint) 1953c78098b6Svisa { 19543de3b7a0Sjsing struct knote *kn, *kn0; 195519ece097Svisa struct kqueue *kq; 19561a12e8a7Sprovos 195788864a09Svisa KLIST_ASSERT_LOCKED(list); 195888864a09Svisa 195919ece097Svisa SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 196019ece097Svisa if (filter_event(kn, hint)) { 196119ece097Svisa kq = kn->kn_kq; 196219ece097Svisa mtx_enter(&kq->kq_lock); 1963eb8a26a3Svisa knote_activate(kn); 196419ece097Svisa mtx_leave(&kq->kq_lock); 196519ece097Svisa } 196619ece097Svisa } 19671a12e8a7Sprovos } 19681a12e8a7Sprovos 19691a12e8a7Sprovos /* 19709c969c9aSvisa * remove all knotes from a specified knlist 19711a12e8a7Sprovos */ 19721a12e8a7Sprovos void 19731789dd4eSvisa knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int idx, 19741789dd4eSvisa int purge) 19751a12e8a7Sprovos { 19761a12e8a7Sprovos struct knote *kn; 197719ece097Svisa 197819ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 19791a12e8a7Sprovos 19801789dd4eSvisa /* Always fetch array pointer as another thread can resize kq_knlist. */ 19811789dd4eSvisa while ((kn = SLIST_FIRST(*plist + idx)) != NULL) { 198219ece097Svisa KASSERT(kn->kn_kq == kq); 198313ce2699Svisa 198413ce2699Svisa if (!purge) { 198513ce2699Svisa /* Skip pending badfd knotes. */ 198613ce2699Svisa while (kn->kn_fop == &badfd_filtops) { 198713ce2699Svisa kn = SLIST_NEXT(kn, kn_link); 198813ce2699Svisa if (kn == NULL) 198913ce2699Svisa return; 199013ce2699Svisa KASSERT(kn->kn_kq == kq); 199113ce2699Svisa } 199213ce2699Svisa } 199313ce2699Svisa 199488864a09Svisa if (!knote_acquire(kn, NULL, 0)) { 199519ece097Svisa /* knote_acquire() has released kq_lock. */ 199619ece097Svisa mtx_enter(&kq->kq_lock); 1997696db594Svisa continue; 19988c478636Svisa } 199919ece097Svisa mtx_leave(&kq->kq_lock); 20006d57c564Svisa filter_detach(kn); 200138cb8205Svisa 200238cb8205Svisa /* 200338cb8205Svisa * Notify poll(2) and select(2) when a monitored 200438cb8205Svisa * file descriptor is closed. 200560229baaSvisa * 200660229baaSvisa * This reuses the original knote for delivering the 200760229baaSvisa * notification so as to avoid allocating memory. 200838cb8205Svisa */ 20096ecc0d7fSvisa if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 201013ce2699Svisa !(p->p_kq == kq && 201113ce2699Svisa p->p_kq_serial > (unsigned long)kn->kn_udata) && 201213ce2699Svisa kn->kn_fop != &badfd_filtops) { 201360229baaSvisa KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 201460229baaSvisa FRELE(kn->kn_fp, p); 201560229baaSvisa kn->kn_fp = NULL; 201660229baaSvisa 201760229baaSvisa kn->kn_fop = &badfd_filtops; 2018673024cdSvisa filter_event(kn, 0); 201919ece097Svisa mtx_enter(&kq->kq_lock); 202060229baaSvisa knote_activate(kn); 202160229baaSvisa knote_release(kn); 202260229baaSvisa continue; 202338cb8205Svisa } 202438cb8205Svisa 2025316aeb9fSanton knote_drop(kn, p); 202619ece097Svisa mtx_enter(&kq->kq_lock); 20271a12e8a7Sprovos } 20281a12e8a7Sprovos } 20291a12e8a7Sprovos 20301a12e8a7Sprovos /* 20311a12e8a7Sprovos * remove all knotes referencing a specified fd 20321a12e8a7Sprovos */ 20331a12e8a7Sprovos void 20341a12e8a7Sprovos knote_fdclose(struct proc *p, int fd) 20351a12e8a7Sprovos { 2036e6dd1245Svisa struct filedesc *fdp = p->p_p->ps_fd; 2037316aeb9fSanton struct kqueue *kq; 20381a12e8a7Sprovos 203981aacb2fSvisa /* 204081aacb2fSvisa * fdplock can be ignored if the file descriptor table is being freed 204181aacb2fSvisa * because no other thread can access the fdp. 204281aacb2fSvisa */ 204381aacb2fSvisa if (fdp->fd_refcnt != 0) 2044e6dd1245Svisa fdpassertlocked(fdp); 204599edfff4Svisa 2046e6dd1245Svisa LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 204719ece097Svisa mtx_enter(&kq->kq_lock); 204819ece097Svisa if (fd < kq->kq_knlistsize) 20491789dd4eSvisa knote_remove(p, kq, &kq->kq_knlist, fd, 0); 205019ece097Svisa mtx_leave(&kq->kq_lock); 20511a12e8a7Sprovos } 2052316aeb9fSanton } 20531a12e8a7Sprovos 2054458bf05cSguenther /* 2055458bf05cSguenther * handle a process exiting, including the triggering of NOTE_EXIT notes 2056458bf05cSguenther * XXX this could be more efficient, doing a single pass down the klist 2057458bf05cSguenther */ 2058458bf05cSguenther void 205926eeb0beSmillert knote_processexit(struct process *pr) 2060458bf05cSguenther { 2061*0747e3d2Sclaudio /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ 2062*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 2063*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 2064c78098b6Svisa knote_locked(&pr->ps_klist, NOTE_EXIT); 2065*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 2066*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 2067458bf05cSguenther 2068458bf05cSguenther /* remove other knotes hanging off the process */ 206901647961Svisa klist_invalidate(&pr->ps_klist); 2070458bf05cSguenther } 2071458bf05cSguenther 20721a12e8a7Sprovos void 2073*0747e3d2Sclaudio knote_processfork(struct process *pr, pid_t pid) 2074*0747e3d2Sclaudio { 2075*0747e3d2Sclaudio /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ 2076*0747e3d2Sclaudio rw_enter_write(&kqueue_ps_list_lock); 2077*0747e3d2Sclaudio mtx_enter(&pr->ps_mtx); 2078*0747e3d2Sclaudio knote_locked(&pr->ps_klist, NOTE_FORK | pid); 2079*0747e3d2Sclaudio mtx_leave(&pr->ps_mtx); 2080*0747e3d2Sclaudio rw_exit_write(&kqueue_ps_list_lock); 2081*0747e3d2Sclaudio } 2082*0747e3d2Sclaudio 2083*0747e3d2Sclaudio void 2084316aeb9fSanton knote_attach(struct knote *kn) 20851a12e8a7Sprovos { 2086316aeb9fSanton struct kqueue *kq = kn->kn_kq; 20879c969c9aSvisa struct knlist *list; 208860229baaSvisa 208919ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 209060229baaSvisa KASSERT(kn->kn_status & KN_PROCESSING); 209160229baaSvisa 2092b8213689Svisa if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2093f30ff743Svisa KASSERT(kq->kq_knlistsize > kn->kn_id); 2094316aeb9fSanton list = &kq->kq_knlist[kn->kn_id]; 2095f30ff743Svisa } else { 2096f30ff743Svisa KASSERT(kq->kq_knhashmask != 0); 2097f30ff743Svisa list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 2098f30ff743Svisa } 20991a12e8a7Sprovos SLIST_INSERT_HEAD(list, kn, kn_link); 2100c5e59ae3Svisa kq->kq_nknotes++; 21011a12e8a7Sprovos } 21021a12e8a7Sprovos 210360229baaSvisa void 210460229baaSvisa knote_detach(struct knote *kn) 210560229baaSvisa { 210660229baaSvisa struct kqueue *kq = kn->kn_kq; 210760229baaSvisa struct knlist *list; 210860229baaSvisa 210919ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 211060229baaSvisa KASSERT(kn->kn_status & KN_PROCESSING); 211160229baaSvisa 2112c5e59ae3Svisa kq->kq_nknotes--; 211360229baaSvisa if (kn->kn_fop->f_flags & FILTEROP_ISFD) 211460229baaSvisa list = &kq->kq_knlist[kn->kn_id]; 211560229baaSvisa else 211660229baaSvisa list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 211760229baaSvisa SLIST_REMOVE(list, kn, knote, kn_link); 211860229baaSvisa } 211960229baaSvisa 21201a12e8a7Sprovos /* 21211a12e8a7Sprovos * should be called at spl == 0, since we don't want to hold spl 21229d16a6d0Svisa * while calling FRELE and pool_put. 21231a12e8a7Sprovos */ 21241a12e8a7Sprovos void 2125316aeb9fSanton knote_drop(struct knote *kn, struct proc *p) 21261a12e8a7Sprovos { 212719ece097Svisa struct kqueue *kq = kn->kn_kq; 21281a12e8a7Sprovos 2129696db594Svisa KASSERT(kn->kn_filter != EVFILT_MARKER); 2130696db594Svisa 213119ece097Svisa mtx_enter(&kq->kq_lock); 213260229baaSvisa knote_detach(kn); 21331a12e8a7Sprovos if (kn->kn_status & KN_QUEUED) 21341a12e8a7Sprovos knote_dequeue(kn); 213539d9284aSvisa if (kn->kn_status & KN_WAITING) { 213639d9284aSvisa kn->kn_status &= ~KN_WAITING; 213739d9284aSvisa wakeup(kn); 213839d9284aSvisa } 213919ece097Svisa mtx_leave(&kq->kq_lock); 214019ece097Svisa 214160229baaSvisa if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 2142f27fd256Sguenther FRELE(kn->kn_fp, p); 21439d16a6d0Svisa pool_put(&knote_pool, kn); 21441a12e8a7Sprovos } 21451a12e8a7Sprovos 21461a12e8a7Sprovos 21471a12e8a7Sprovos void 21481a12e8a7Sprovos knote_enqueue(struct knote *kn) 21491a12e8a7Sprovos { 21501a12e8a7Sprovos struct kqueue *kq = kn->kn_kq; 21511a12e8a7Sprovos 215219ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2153696db594Svisa KASSERT(kn->kn_filter != EVFILT_MARKER); 21541a12e8a7Sprovos KASSERT((kn->kn_status & KN_QUEUED) == 0); 21551a12e8a7Sprovos 2156696db594Svisa kqueue_check(kq); 21571a12e8a7Sprovos TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 21581a12e8a7Sprovos kn->kn_status |= KN_QUEUED; 21591a12e8a7Sprovos kq->kq_count++; 2160696db594Svisa kqueue_check(kq); 21611a12e8a7Sprovos kqueue_wakeup(kq); 21621a12e8a7Sprovos } 21631a12e8a7Sprovos 21641a12e8a7Sprovos void 21651a12e8a7Sprovos knote_dequeue(struct knote *kn) 21661a12e8a7Sprovos { 21671a12e8a7Sprovos struct kqueue *kq = kn->kn_kq; 21681a12e8a7Sprovos 216919ece097Svisa MUTEX_ASSERT_LOCKED(&kq->kq_lock); 2170696db594Svisa KASSERT(kn->kn_filter != EVFILT_MARKER); 21711a12e8a7Sprovos KASSERT(kn->kn_status & KN_QUEUED); 21721a12e8a7Sprovos 2173696db594Svisa kqueue_check(kq); 21741a12e8a7Sprovos TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 21751a12e8a7Sprovos kn->kn_status &= ~KN_QUEUED; 21761a12e8a7Sprovos kq->kq_count--; 2177696db594Svisa kqueue_check(kq); 21781a12e8a7Sprovos } 21796510b4cdStedu 21806d57c564Svisa /* 2181a3a2b40eSvisa * Assign parameters to the knote. 21826d57c564Svisa * 21836d57c564Svisa * The knote's object lock must be held. 21846d57c564Svisa */ 21856d57c564Svisa void 2186a3a2b40eSvisa knote_assign(const struct kevent *kev, struct knote *kn) 21876d57c564Svisa { 21887ab93437Smpi if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 21897ab93437Smpi KERNEL_ASSERT_LOCKED(); 21907ab93437Smpi 21916d57c564Svisa kn->kn_sfflags = kev->fflags; 21926d57c564Svisa kn->kn_sdata = kev->data; 21936d57c564Svisa kn->kn_udata = kev->udata; 21946d57c564Svisa } 21956d57c564Svisa 21966d57c564Svisa /* 21976d57c564Svisa * Submit the knote's event for delivery. 21986d57c564Svisa * 21996d57c564Svisa * The knote's object lock must be held. 22006d57c564Svisa */ 22016d57c564Svisa void 22026d57c564Svisa knote_submit(struct knote *kn, struct kevent *kev) 22036d57c564Svisa { 22047ab93437Smpi if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 22057ab93437Smpi KERNEL_ASSERT_LOCKED(); 22067ab93437Smpi 22076d57c564Svisa if (kev != NULL) { 22086d57c564Svisa *kev = kn->kn_kevent; 22096d57c564Svisa if (kn->kn_flags & EV_CLEAR) { 22106d57c564Svisa kn->kn_fflags = 0; 22116d57c564Svisa kn->kn_data = 0; 22126d57c564Svisa } 22136d57c564Svisa } 22146d57c564Svisa } 22156d57c564Svisa 22166510b4cdStedu void 221788864a09Svisa klist_init(struct klist *klist, const struct klistops *ops, void *arg) 221888864a09Svisa { 221988864a09Svisa SLIST_INIT(&klist->kl_list); 222088864a09Svisa klist->kl_ops = ops; 222188864a09Svisa klist->kl_arg = arg; 222288864a09Svisa } 222388864a09Svisa 222488864a09Svisa void 222588864a09Svisa klist_free(struct klist *klist) 222688864a09Svisa { 222788864a09Svisa KASSERT(SLIST_EMPTY(&klist->kl_list)); 222888864a09Svisa } 222988864a09Svisa 223088864a09Svisa void 22319c969c9aSvisa klist_insert(struct klist *klist, struct knote *kn) 22329c969c9aSvisa { 22339b0cf67bSvisa int ls; 22349b0cf67bSvisa 22359b0cf67bSvisa ls = klist_lock(klist); 22369b0cf67bSvisa SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 22379b0cf67bSvisa klist_unlock(klist, ls); 22389b0cf67bSvisa } 22399b0cf67bSvisa 22409b0cf67bSvisa void 22419b0cf67bSvisa klist_insert_locked(struct klist *klist, struct knote *kn) 22429b0cf67bSvisa { 224388864a09Svisa KLIST_ASSERT_LOCKED(klist); 224488864a09Svisa 22459c969c9aSvisa SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 22469c969c9aSvisa } 22479c969c9aSvisa 22489c969c9aSvisa void 22499c969c9aSvisa klist_remove(struct klist *klist, struct knote *kn) 22509c969c9aSvisa { 22519b0cf67bSvisa int ls; 22529b0cf67bSvisa 22539b0cf67bSvisa ls = klist_lock(klist); 22549b0cf67bSvisa SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 22559b0cf67bSvisa klist_unlock(klist, ls); 22569b0cf67bSvisa } 22579b0cf67bSvisa 22589b0cf67bSvisa void 22599b0cf67bSvisa klist_remove_locked(struct klist *klist, struct knote *kn) 22609b0cf67bSvisa { 226188864a09Svisa KLIST_ASSERT_LOCKED(klist); 226288864a09Svisa 22639c969c9aSvisa SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 22649c969c9aSvisa } 22659c969c9aSvisa 2266cb197b15Svisa /* 2267cb197b15Svisa * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2268cb197b15Svisa * 2269cb197b15Svisa * The caller of this function must not hold any locks that can block 2270cb197b15Svisa * filterops callbacks that run with KN_PROCESSING. 2271cb197b15Svisa * Otherwise this function might deadlock. 2272cb197b15Svisa */ 22739c969c9aSvisa void 22746510b4cdStedu klist_invalidate(struct klist *list) 22756510b4cdStedu { 22766510b4cdStedu struct knote *kn; 227719ece097Svisa struct kqueue *kq; 227801647961Svisa struct proc *p = curproc; 227919ece097Svisa int ls; 22806510b4cdStedu 228194dbc69aSvisa NET_ASSERT_UNLOCKED(); 228294dbc69aSvisa 228388864a09Svisa ls = klist_lock(list); 22849c969c9aSvisa while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 228519ece097Svisa kq = kn->kn_kq; 228619ece097Svisa mtx_enter(&kq->kq_lock); 228788864a09Svisa if (!knote_acquire(kn, list, ls)) { 228819ece097Svisa /* knote_acquire() has released kq_lock 228919ece097Svisa * and klist lock. */ 229088864a09Svisa ls = klist_lock(list); 229194dbc69aSvisa continue; 229288864a09Svisa } 229319ece097Svisa mtx_leave(&kq->kq_lock); 229488864a09Svisa klist_unlock(list, ls); 22956d57c564Svisa filter_detach(kn); 229601647961Svisa if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 229794dbc69aSvisa kn->kn_fop = &dead_filtops; 22986d57c564Svisa filter_event(kn, 0); 229919ece097Svisa mtx_enter(&kq->kq_lock); 230094dbc69aSvisa knote_activate(kn); 230194dbc69aSvisa knote_release(kn); 230219ece097Svisa mtx_leave(&kq->kq_lock); 230301647961Svisa } else { 230401647961Svisa knote_drop(kn, p); 230501647961Svisa } 230688864a09Svisa ls = klist_lock(list); 23076510b4cdStedu } 230888864a09Svisa klist_unlock(list, ls); 23096510b4cdStedu } 231088864a09Svisa 231188864a09Svisa static int 231288864a09Svisa klist_lock(struct klist *list) 231388864a09Svisa { 231488864a09Svisa int ls = 0; 231588864a09Svisa 231688864a09Svisa if (list->kl_ops != NULL) { 231788864a09Svisa ls = list->kl_ops->klo_lock(list->kl_arg); 231888864a09Svisa } else { 231988864a09Svisa KERNEL_LOCK(); 23204352aa63Svisa ls = splhigh(); 232188864a09Svisa } 232288864a09Svisa return ls; 232388864a09Svisa } 232488864a09Svisa 232588864a09Svisa static void 232688864a09Svisa klist_unlock(struct klist *list, int ls) 232788864a09Svisa { 232888864a09Svisa if (list->kl_ops != NULL) { 232988864a09Svisa list->kl_ops->klo_unlock(list->kl_arg, ls); 233088864a09Svisa } else { 233188864a09Svisa splx(ls); 23324352aa63Svisa KERNEL_UNLOCK(); 233388864a09Svisa } 233488864a09Svisa } 233588864a09Svisa 233688864a09Svisa static void 233788864a09Svisa klist_mutex_assertlk(void *arg) 233888864a09Svisa { 233988864a09Svisa struct mutex *mtx = arg; 234088864a09Svisa 234188864a09Svisa (void)mtx; 234288864a09Svisa 234388864a09Svisa MUTEX_ASSERT_LOCKED(mtx); 234488864a09Svisa } 234588864a09Svisa 234688864a09Svisa static int 234788864a09Svisa klist_mutex_lock(void *arg) 234888864a09Svisa { 234988864a09Svisa struct mutex *mtx = arg; 235088864a09Svisa 235188864a09Svisa mtx_enter(mtx); 235288864a09Svisa return 0; 235388864a09Svisa } 235488864a09Svisa 235588864a09Svisa static void 235688864a09Svisa klist_mutex_unlock(void *arg, int s) 235788864a09Svisa { 235888864a09Svisa struct mutex *mtx = arg; 235988864a09Svisa 236088864a09Svisa mtx_leave(mtx); 236188864a09Svisa } 236288864a09Svisa 236388864a09Svisa static const struct klistops mutex_klistops = { 236488864a09Svisa .klo_assertlk = klist_mutex_assertlk, 236588864a09Svisa .klo_lock = klist_mutex_lock, 236688864a09Svisa .klo_unlock = klist_mutex_unlock, 236788864a09Svisa }; 236888864a09Svisa 236988864a09Svisa void 237088864a09Svisa klist_init_mutex(struct klist *klist, struct mutex *mtx) 237188864a09Svisa { 237288864a09Svisa klist_init(klist, &mutex_klistops, mtx); 237388864a09Svisa } 237488864a09Svisa 237588864a09Svisa static void 237688864a09Svisa klist_rwlock_assertlk(void *arg) 237788864a09Svisa { 237888864a09Svisa struct rwlock *rwl = arg; 237988864a09Svisa 238088864a09Svisa (void)rwl; 238188864a09Svisa 238288864a09Svisa rw_assert_wrlock(rwl); 238388864a09Svisa } 238488864a09Svisa 238588864a09Svisa static int 238688864a09Svisa klist_rwlock_lock(void *arg) 238788864a09Svisa { 238888864a09Svisa struct rwlock *rwl = arg; 238988864a09Svisa 239088864a09Svisa rw_enter_write(rwl); 239188864a09Svisa return 0; 239288864a09Svisa } 239388864a09Svisa 239488864a09Svisa static void 239588864a09Svisa klist_rwlock_unlock(void *arg, int s) 239688864a09Svisa { 239788864a09Svisa struct rwlock *rwl = arg; 239888864a09Svisa 239988864a09Svisa rw_exit_write(rwl); 240088864a09Svisa } 240188864a09Svisa 240288864a09Svisa static const struct klistops rwlock_klistops = { 240388864a09Svisa .klo_assertlk = klist_rwlock_assertlk, 240488864a09Svisa .klo_lock = klist_rwlock_lock, 240588864a09Svisa .klo_unlock = klist_rwlock_unlock, 240688864a09Svisa }; 240788864a09Svisa 240888864a09Svisa void 240988864a09Svisa klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 241088864a09Svisa { 241188864a09Svisa klist_init(klist, &rwlock_klistops, rwl); 241288864a09Svisa } 2413