1*eab38032Sriastradh /* $NetBSD: linux_inotify.c,v 1.7 2024/10/01 16:41:29 riastradh Exp $ */ 28575c986Schristos 38575c986Schristos /*- 48575c986Schristos * Copyright (c) 2023 The NetBSD Foundation, Inc. 58575c986Schristos * All rights reserved. 68575c986Schristos * 78575c986Schristos * This code is derived from software contributed to The NetBSD Foundation 88575c986Schristos * by Theodore Preduta. 98575c986Schristos * 108575c986Schristos * Redistribution and use in source and binary forms, with or without 118575c986Schristos * modification, are permitted provided that the following conditions 128575c986Schristos * are met: 138575c986Schristos * 1. Redistributions of source code must retain the above copyright 148575c986Schristos * notice, this list of conditions and the following disclaimer. 158575c986Schristos * 2. Redistributions in binary form must reproduce the above copyright 168575c986Schristos * notice, this list of conditions and the following disclaimer in the 178575c986Schristos * documentation and/or other materials provided with the distribution. 188575c986Schristos * 198575c986Schristos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 208575c986Schristos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 218575c986Schristos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 228575c986Schristos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 238575c986Schristos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 248575c986Schristos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 258575c986Schristos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 268575c986Schristos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 278575c986Schristos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 288575c986Schristos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 298575c986Schristos * POSSIBILITY OF SUCH DAMAGE. 308575c986Schristos */ 318575c986Schristos #include <sys/cdefs.h> 32*eab38032Sriastradh __KERNEL_RCSID(0, "$NetBSD: linux_inotify.c,v 1.7 2024/10/01 16:41:29 riastradh Exp $"); 338575c986Schristos 348575c986Schristos #include <sys/param.h> 358575c986Schristos #include <sys/types.h> 368575c986Schristos #include <sys/bitops.h> 378575c986Schristos #include <sys/dirent.h> 388575c986Schristos #include <sys/event.h> 398575c986Schristos #include <sys/eventvar.h> 408575c986Schristos #include <sys/errno.h> 418575c986Schristos #include <sys/file.h> 428575c986Schristos #include <sys/filedesc.h> 438575c986Schristos #include <sys/fcntl.h> 448575c986Schristos #include <sys/poll.h> 458575c986Schristos #include <sys/proc.h> 468575c986Schristos #include <sys/selinfo.h> 478575c986Schristos #include <sys/select.h> 488575c986Schristos #include <sys/signal.h> 498575c986Schristos #include <sys/vnode.h> 508575c986Schristos 518575c986Schristos #include <sys/syscallargs.h> 528575c986Schristos 538575c986Schristos #include <compat/linux/common/linux_machdep.h> 548575c986Schristos #include <compat/linux/common/linux_fcntl.h> 558575c986Schristos #include <compat/linux/common/linux_inotify.h> 568575c986Schristos #include <compat/linux/common/linux_ipc.h> 578575c986Schristos #include <compat/linux/common/linux_sched.h> 588575c986Schristos #include <compat/linux/common/linux_sem.h> 598575c986Schristos #include <compat/linux/common/linux_signal.h> 608575c986Schristos 618575c986Schristos #include <compat/linux/linux_syscallargs.h> 628575c986Schristos 638575c986Schristos /* 648575c986Schristos * inotify(2). This interface allows the user to get file system 658575c986Schristos * events and (unlike kqueue(2)) their order is strictly preserved. 668575c986Schristos * While nice, the API has sufficient gotchas that mean we don't want 678575c986Schristos * to add native entry points for it. They are: 688575c986Schristos * 698575c986Schristos * - Because data is returned via read(2), this API is prone to 708575c986Schristos * unaligned memory accesses. There is a note in the Linux man page 718575c986Schristos * that says the name field of struct linux_inotify_event *can* be 728575c986Schristos * used for alignment purposes. In practice, even Linux doesn't 738575c986Schristos * always do this, so for simplicity, we don't ever do this. 748575c986Schristos */ 758575c986Schristos 768575c986Schristos #define LINUX_INOTIFY_MAX_QUEUED 16384 778575c986Schristos #define LINUX_INOTIFY_MAX_FROM_KEVENT 3 788575c986Schristos 798575c986Schristos #if DEBUG_LINUX 808575c986Schristos #define DPRINTF(x) uprintf x 818575c986Schristos #else 828575c986Schristos #define DPRINTF(x) __nothing 838575c986Schristos #endif 848575c986Schristos 858575c986Schristos struct inotify_entry { 868575c986Schristos TAILQ_ENTRY(inotify_entry) ie_entries; 878575c986Schristos char ie_name[NAME_MAX + 1]; 88ba958ad8Schristos struct linux_inotify_event ie_event; 898575c986Schristos }; 908575c986Schristos 918575c986Schristos struct inotify_dir_entries { 928575c986Schristos size_t ide_count; 938575c986Schristos struct inotify_dir_entry { 948575c986Schristos char name[NAME_MAX + 1]; 958575c986Schristos ino_t fileno; 968575c986Schristos } ide_entries[]; 978575c986Schristos }; 988575c986Schristos #define INOTIFY_DIR_ENTRIES_SIZE(count) (sizeof(struct inotify_dir_entries) \ 998575c986Schristos + count * sizeof(struct inotify_dir_entry)) 1008575c986Schristos 1018575c986Schristos struct inotifyfd { 1028575c986Schristos int ifd_kqfd; /* kqueue fd used by this inotify */ 1038575c986Schristos /* instance */ 1048575c986Schristos struct selinfo ifd_sel; /* for EVFILT_READ by epoll */ 1058575c986Schristos kmutex_t ifd_lock; /* lock for ifd_sel, ifd_wds and */ 1068575c986Schristos /* ifd_nwds */ 1078575c986Schristos 1088575c986Schristos struct inotify_dir_entries **ifd_wds; 1098575c986Schristos /* keeps track of watch descriptors */ 1108575c986Schristos /* for directories: snapshot of the */ 1118575c986Schristos /* directory state */ 1128575c986Schristos /* for files: an inotify_dir_entries */ 1138575c986Schristos /* with ide_count == 0 */ 1148575c986Schristos size_t ifd_nwds; /* max watch descriptor that can be */ 1158575c986Schristos /* stored in ifd_wds + 1 */ 1168575c986Schristos 1178575c986Schristos TAILQ_HEAD(, inotify_entry) ifd_qhead; /* queue of pending events */ 1188575c986Schristos size_t ifd_qcount; /* number of pending events */ 1198575c986Schristos kcondvar_t ifd_qcv; /* condvar for blocking reads */ 1208575c986Schristos kmutex_t ifd_qlock; /* lock for ifd_q* and interlock */ 1218575c986Schristos /* for ifd_qcv */ 1228575c986Schristos }; 1238575c986Schristos 1248575c986Schristos struct inotify_kevent_mask_pair { 1258575c986Schristos uint32_t inotify; 1268575c986Schristos uint32_t kevent; 1278575c986Schristos }; 1288575c986Schristos 1298575c986Schristos static int inotify_kev_fetch_changes(void *, const struct kevent *, 1308575c986Schristos struct kevent *, size_t, int); 1318575c986Schristos static int do_inotify_init(struct lwp *, register_t *, int); 1328575c986Schristos static int inotify_close_wd(struct inotifyfd *, int); 1338575c986Schristos static uint32_t inotify_mask_to_kevent_fflags(uint32_t, enum vtype); 1348575c986Schristos static void do_kevent_to_inotify(int32_t, uint32_t, uint32_t, 1358575c986Schristos struct inotify_entry *, size_t *, char *); 1368575c986Schristos static int kevent_to_inotify(struct inotifyfd *, int, enum vtype, uint32_t, 1378575c986Schristos uint32_t, struct inotify_entry *, size_t *); 138ed30ecdeSchristos static int inotify_readdir(file_t *, struct dirent *, int *, bool); 139ed30ecdeSchristos static struct inotify_dir_entries *get_inotify_dir_entries(int, bool); 1408575c986Schristos 1418575c986Schristos static int inotify_filt_attach(struct knote *); 1428575c986Schristos static void inotify_filt_detach(struct knote *); 1438575c986Schristos static int inotify_filt_event(struct knote *, long); 1448575c986Schristos static void inotify_read_filt_detach(struct knote *); 1458575c986Schristos static int inotify_read_filt_event(struct knote *, long); 1468575c986Schristos 1478575c986Schristos static int inotify_read(file_t *, off_t *, struct uio *, kauth_cred_t, int); 1488575c986Schristos static int inotify_close(file_t *); 1498575c986Schristos static int inotify_poll(file_t *, int); 1508575c986Schristos static int inotify_kqfilter(file_t *, struct knote *); 1518575c986Schristos static void inotify_restart(file_t *); 1528575c986Schristos 1538575c986Schristos static const char inotify_filtname[] = "LINUX_INOTIFY"; 1548575c986Schristos static int inotify_filtid; 1558575c986Schristos 1568575c986Schristos /* "fake" EVFILT_VNODE that gets attached to ifd_deps */ 1578575c986Schristos static const struct filterops inotify_filtops = { 1588575c986Schristos .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1598575c986Schristos .f_attach = inotify_filt_attach, 1608575c986Schristos .f_detach = inotify_filt_detach, 1618575c986Schristos .f_event = inotify_filt_event, 1628575c986Schristos .f_touch = NULL, 1638575c986Schristos }; 1648575c986Schristos 1658575c986Schristos /* EVFILT_READ attached to inotifyfd (to support watching via epoll) */ 1668575c986Schristos static const struct filterops inotify_read_filtops = { 1678575c986Schristos .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1688575c986Schristos .f_attach = NULL, /* attached via .fo_kqfilter */ 1698575c986Schristos .f_detach = inotify_read_filt_detach, 1708575c986Schristos .f_event = inotify_read_filt_event, 1718575c986Schristos .f_touch = NULL, 1728575c986Schristos }; 1738575c986Schristos 1748575c986Schristos static const struct fileops inotify_fileops = { 1758575c986Schristos .fo_name = "inotify", 1768575c986Schristos .fo_read = inotify_read, 1778575c986Schristos .fo_write = fbadop_write, 1788575c986Schristos .fo_ioctl = fbadop_ioctl, 1798575c986Schristos .fo_fcntl = fnullop_fcntl, 1808575c986Schristos .fo_poll = inotify_poll, 1818575c986Schristos .fo_stat = fbadop_stat, 1828575c986Schristos .fo_close = inotify_close, 1838575c986Schristos .fo_kqfilter = inotify_kqfilter, 1848575c986Schristos .fo_restart = inotify_restart, 1858575c986Schristos .fo_fpathconf = (void *)eopnotsupp, 1868575c986Schristos }; 1878575c986Schristos 1888575c986Schristos /* basic flag translations */ 1898575c986Schristos static const struct inotify_kevent_mask_pair common_inotify_to_kevent[] = { 1908575c986Schristos { .inotify = LINUX_IN_ATTRIB, .kevent = NOTE_ATTRIB, }, 1918575c986Schristos { .inotify = LINUX_IN_CLOSE_NOWRITE, .kevent = NOTE_CLOSE, }, 1928575c986Schristos { .inotify = LINUX_IN_OPEN, .kevent = NOTE_OPEN, }, 1938575c986Schristos { .inotify = LINUX_IN_MOVE_SELF, .kevent = NOTE_RENAME, }, 1948575c986Schristos }; 1958575c986Schristos static const size_t common_inotify_to_kevent_len = 1968575c986Schristos __arraycount(common_inotify_to_kevent); 1978575c986Schristos 1988575c986Schristos static const struct inotify_kevent_mask_pair vreg_inotify_to_kevent[] = { 1998575c986Schristos { .inotify = LINUX_IN_ACCESS, .kevent = NOTE_READ, }, 2008575c986Schristos { .inotify = LINUX_IN_ATTRIB, .kevent = NOTE_ATTRIB|NOTE_LINK, }, 2018575c986Schristos { .inotify = LINUX_IN_CLOSE_WRITE, .kevent = NOTE_CLOSE_WRITE, }, 2028575c986Schristos { .inotify = LINUX_IN_MODIFY, .kevent = NOTE_WRITE, }, 2038575c986Schristos }; 2048575c986Schristos static const size_t vreg_inotify_to_kevent_len = 2058575c986Schristos __arraycount(vreg_inotify_to_kevent); 2068575c986Schristos 2078575c986Schristos static const struct inotify_kevent_mask_pair vdir_inotify_to_kevent[] = { 2088575c986Schristos { .inotify = LINUX_IN_ACCESS, .kevent = NOTE_READ, }, 2098575c986Schristos { .inotify = LINUX_IN_CREATE, .kevent = NOTE_WRITE, }, 2108575c986Schristos { .inotify = LINUX_IN_DELETE, .kevent = NOTE_WRITE, }, 2118575c986Schristos { .inotify = LINUX_IN_MOVED_FROM, .kevent = NOTE_WRITE, }, 2128575c986Schristos { .inotify = LINUX_IN_MOVED_TO, .kevent = NOTE_WRITE, }, 2138575c986Schristos }; 2148575c986Schristos static const size_t vdir_inotify_to_kevent_len = 2158575c986Schristos __arraycount(vdir_inotify_to_kevent); 2168575c986Schristos 2178575c986Schristos static const struct inotify_kevent_mask_pair common_kevent_to_inotify[] = { 2188575c986Schristos { .kevent = NOTE_ATTRIB, .inotify = LINUX_IN_ATTRIB, }, 2198575c986Schristos { .kevent = NOTE_CLOSE, .inotify = LINUX_IN_CLOSE_NOWRITE, }, 2208575c986Schristos { .kevent = NOTE_CLOSE_WRITE, .inotify = LINUX_IN_CLOSE_WRITE, }, 2218575c986Schristos { .kevent = NOTE_OPEN, .inotify = LINUX_IN_OPEN, }, 2228575c986Schristos { .kevent = NOTE_READ, .inotify = LINUX_IN_ACCESS, }, 2238575c986Schristos { .kevent = NOTE_RENAME, .inotify = LINUX_IN_MOVE_SELF, }, 2248575c986Schristos { .kevent = NOTE_REVOKE, .inotify = LINUX_IN_UNMOUNT, }, 2258575c986Schristos }; 2268575c986Schristos static const size_t common_kevent_to_inotify_len = 2278575c986Schristos __arraycount(common_kevent_to_inotify); 2288575c986Schristos 2298575c986Schristos static const struct inotify_kevent_mask_pair vreg_kevent_to_inotify[] = { 2308575c986Schristos { .kevent = NOTE_DELETE|NOTE_LINK, .inotify = LINUX_IN_ATTRIB, }, 2318575c986Schristos { .kevent = NOTE_WRITE, .inotify = LINUX_IN_MODIFY, }, 2328575c986Schristos }; 2338575c986Schristos static const size_t vreg_kevent_to_inotify_len = 2348575c986Schristos __arraycount(vreg_kevent_to_inotify); 2358575c986Schristos 2368575c986Schristos /* 2378575c986Schristos * Register the custom kfilter for inotify. 2388575c986Schristos */ 2398575c986Schristos int 2408575c986Schristos linux_inotify_init(void) 2418575c986Schristos { 2428575c986Schristos return kfilter_register(inotify_filtname, &inotify_filtops, 2438575c986Schristos &inotify_filtid); 2448575c986Schristos } 2458575c986Schristos 2468575c986Schristos /* 2478575c986Schristos * Unregister the custom kfilter for inotify. 2488575c986Schristos */ 2498575c986Schristos int 2508575c986Schristos linux_inotify_fini(void) 2518575c986Schristos { 2528575c986Schristos return kfilter_unregister(inotify_filtname); 2538575c986Schristos } 2548575c986Schristos 2558575c986Schristos /* 2568575c986Schristos * Copyin callback used by kevent. This copies already converted 2578575c986Schristos * filters from kernel memory to the kevent internal kernel memory. 2588575c986Schristos * Hence the memcpy instead of copyin. 2598575c986Schristos */ 2608575c986Schristos static int 2618575c986Schristos inotify_kev_fetch_changes(void *ctx, const struct kevent *changelist, 2628575c986Schristos struct kevent *changes, size_t index, int n) 2638575c986Schristos { 2648575c986Schristos memcpy(changes, changelist + index, n * sizeof(*changes)); 2658575c986Schristos 2668575c986Schristos return 0; 2678575c986Schristos } 2688575c986Schristos 2698575c986Schristos /* 2708575c986Schristos * Initialize a new inotify fd. 2718575c986Schristos */ 2728575c986Schristos static int 2738575c986Schristos do_inotify_init(struct lwp *l, register_t *retval, int flags) 2748575c986Schristos { 2758575c986Schristos file_t *fp; 2768575c986Schristos int error, fd; 2778575c986Schristos struct proc *p = l->l_proc; 2788575c986Schristos struct inotifyfd *ifd; 2798575c986Schristos struct sys_kqueue1_args kqa; 2808575c986Schristos 2818575c986Schristos if (flags & ~(LINUX_IN_ALL_FLAGS)) 2828575c986Schristos return EINVAL; 2838575c986Schristos 2848575c986Schristos ifd = kmem_zalloc(sizeof(*ifd), KM_SLEEP); 2858575c986Schristos mutex_init(&ifd->ifd_lock, MUTEX_DEFAULT, IPL_NONE); 2868575c986Schristos mutex_init(&ifd->ifd_qlock, MUTEX_DEFAULT, IPL_NONE); 2878575c986Schristos cv_init(&ifd->ifd_qcv, "inotify"); 2888575c986Schristos selinit(&ifd->ifd_sel); 2898575c986Schristos TAILQ_INIT(&ifd->ifd_qhead); 2908575c986Schristos 2918575c986Schristos ifd->ifd_nwds = 1; 2928575c986Schristos ifd->ifd_wds = kmem_zalloc(ifd->ifd_nwds * sizeof(*ifd->ifd_wds), 2938575c986Schristos KM_SLEEP); 2948575c986Schristos 2958575c986Schristos SCARG(&kqa, flags) = 0; 2968575c986Schristos if (flags & LINUX_IN_NONBLOCK) 2978575c986Schristos SCARG(&kqa, flags) |= O_NONBLOCK; 2988575c986Schristos error = sys_kqueue1(l, &kqa, retval); 2998575c986Schristos if (error != 0) 3008575c986Schristos goto leave0; 3018575c986Schristos ifd->ifd_kqfd = *retval; 3028575c986Schristos 3038575c986Schristos error = fd_allocfile(&fp, &fd); 3048575c986Schristos if (error != 0) 3058575c986Schristos goto leave1; 3068575c986Schristos 3078575c986Schristos fp->f_flag = FREAD; 3088575c986Schristos if (flags & LINUX_IN_NONBLOCK) 3098575c986Schristos fp->f_flag |= FNONBLOCK; 3108575c986Schristos fp->f_type = DTYPE_MISC; 3118575c986Schristos fp->f_ops = &inotify_fileops; 3128575c986Schristos fp->f_data = ifd; 3138575c986Schristos fd_set_exclose(l, fd, (flags & LINUX_IN_CLOEXEC) != 0); 3148575c986Schristos fd_affix(p, fp, fd); 3158575c986Schristos 3168575c986Schristos *retval = fd; 3178575c986Schristos return 0; 3188575c986Schristos 3198575c986Schristos leave1: 3208575c986Schristos KASSERT(fd_getfile(ifd->ifd_kqfd) != NULL); 3218575c986Schristos fd_close(ifd->ifd_kqfd); 3228575c986Schristos leave0: 3238575c986Schristos kmem_free(ifd->ifd_wds, ifd->ifd_nwds * sizeof(*ifd->ifd_wds)); 3248575c986Schristos kmem_free(ifd, sizeof(*ifd)); 3258575c986Schristos 3268575c986Schristos mutex_destroy(&ifd->ifd_lock); 3278575c986Schristos mutex_destroy(&ifd->ifd_qlock); 3288575c986Schristos cv_destroy(&ifd->ifd_qcv); 3298575c986Schristos seldestroy(&ifd->ifd_sel); 3308575c986Schristos 3318575c986Schristos return error; 3328575c986Schristos } 3338575c986Schristos 3346ecdc276Schristos #ifndef __aarch64__ 3358575c986Schristos /* 3368575c986Schristos * inotify_init(2). Initialize a new inotify fd with flags=0. 3378575c986Schristos */ 3388575c986Schristos int 3398575c986Schristos linux_sys_inotify_init(struct lwp *l, const void *v, register_t *retval) 3408575c986Schristos { 3418575c986Schristos return do_inotify_init(l, retval, 0); 3428575c986Schristos } 3436ecdc276Schristos #endif 3448575c986Schristos 3458575c986Schristos /* 3468575c986Schristos * inotify_init(2). Initialize a new inotify fd with the given flags. 3478575c986Schristos */ 3488575c986Schristos int 3498575c986Schristos linux_sys_inotify_init1(struct lwp *l, 3508575c986Schristos const struct linux_sys_inotify_init1_args *uap, register_t *retval) 3518575c986Schristos { 3528575c986Schristos /* { 3538575c986Schristos syscallarg(int) flags; 3548575c986Schristos } */ 3558575c986Schristos 3568575c986Schristos return do_inotify_init(l, retval, SCARG(uap, flags)); 3578575c986Schristos } 3588575c986Schristos 3598575c986Schristos /* 3608575c986Schristos * Convert inotify mask to the fflags of an equivalent kevent. 3618575c986Schristos */ 3628575c986Schristos static uint32_t 3638575c986Schristos inotify_mask_to_kevent_fflags(uint32_t mask, enum vtype type) 3648575c986Schristos { 3658575c986Schristos const struct inotify_kevent_mask_pair *type_inotify_to_kevent; 3668575c986Schristos uint32_t fflags; 3678575c986Schristos size_t i, type_inotify_to_kevent_len; 3688575c986Schristos 3698575c986Schristos switch (type) { 3708575c986Schristos case VREG: 3718575c986Schristos case VDIR: 3728575c986Schristos case VLNK: 3738575c986Schristos break; 3748575c986Schristos 3758575c986Schristos default: 3768575c986Schristos return 0; 3778575c986Schristos } 3788575c986Schristos 3798575c986Schristos /* flags that all watches could have */ 3808575c986Schristos fflags = NOTE_DELETE|NOTE_REVOKE; 3818575c986Schristos for (i = 0; i < common_inotify_to_kevent_len; i++) 3828575c986Schristos if (mask & common_inotify_to_kevent[i].inotify) 3838575c986Schristos fflags |= common_inotify_to_kevent[i].kevent; 3848575c986Schristos 3858575c986Schristos /* flags that depend on type */ 3868575c986Schristos switch (type) { 3878575c986Schristos case VREG: 3888575c986Schristos type_inotify_to_kevent = vreg_inotify_to_kevent; 3898575c986Schristos type_inotify_to_kevent_len = vreg_inotify_to_kevent_len; 3908575c986Schristos break; 3918575c986Schristos 3928575c986Schristos case VDIR: 3938575c986Schristos type_inotify_to_kevent = vdir_inotify_to_kevent; 3948575c986Schristos type_inotify_to_kevent_len = vdir_inotify_to_kevent_len; 3958575c986Schristos break; 3968575c986Schristos 3978575c986Schristos default: 3988575c986Schristos type_inotify_to_kevent_len = 0; 3998575c986Schristos break; 4008575c986Schristos } 4018575c986Schristos for (i = 0; i < type_inotify_to_kevent_len; i++) 4028575c986Schristos if (mask & type_inotify_to_kevent[i].inotify) 4038575c986Schristos fflags |= type_inotify_to_kevent[i].kevent; 4048575c986Schristos 4058575c986Schristos return fflags; 4068575c986Schristos } 4078575c986Schristos 4088575c986Schristos /* 4098575c986Schristos * inotify_add_watch(2). Open a fd for pathname (if desired by mask) 4108575c986Schristos * track it and add an equivalent kqueue event for it in 4118575c986Schristos * ifd->ifd_kqfd. 4128575c986Schristos */ 4138575c986Schristos int 4148575c986Schristos linux_sys_inotify_add_watch(struct lwp *l, 4158575c986Schristos const struct linux_sys_inotify_add_watch_args *uap, register_t *retval) 4168575c986Schristos { 4178575c986Schristos /* { 4188575c986Schristos syscallarg(int) fd; 4198575c986Schristos syscallarg(const char *) pathname; 4208575c986Schristos syscallarg(uint32_t) mask; 4218575c986Schristos } */ 422ed30ecdeSchristos int wd, i, error = 0; 4238575c986Schristos file_t *fp, *wp, *cur_fp; 4248575c986Schristos struct inotifyfd *ifd; 4258575c986Schristos struct inotify_dir_entries **new_wds; 4268575c986Schristos struct knote *kn, *tmpkn; 4278575c986Schristos struct sys_open_args oa; 4288575c986Schristos struct kevent kev; 429ed30ecdeSchristos struct vnode *wvp; 430ed30ecdeSchristos namei_simple_flags_t sflags; 4318575c986Schristos struct kevent_ops k_ops = { 4328575c986Schristos .keo_private = NULL, 4338575c986Schristos .keo_fetch_timeout = NULL, 4348575c986Schristos .keo_fetch_changes = inotify_kev_fetch_changes, 4358575c986Schristos .keo_put_events = NULL, 4368575c986Schristos }; 4378575c986Schristos const int fd = SCARG(uap, fd); 4388575c986Schristos const uint32_t mask = SCARG(uap, mask); 4398575c986Schristos 4408575c986Schristos if (mask & ~LINUX_IN_ADD_KNOWN) 4418575c986Schristos return EINVAL; 4428575c986Schristos 4438575c986Schristos fp = fd_getfile(fd); 4448575c986Schristos if (fp == NULL) 4458575c986Schristos return EBADF; 4468575c986Schristos 4478575c986Schristos if (fp->f_ops != &inotify_fileops) { 4488575c986Schristos /* not an inotify fd */ 4498575c986Schristos error = EBADF; 4508575c986Schristos goto leave0; 4518575c986Schristos } 4528575c986Schristos 4538575c986Schristos ifd = fp->f_data; 4548575c986Schristos 4558575c986Schristos mutex_enter(&ifd->ifd_lock); 4568575c986Schristos 4578575c986Schristos if (mask & LINUX_IN_DONT_FOLLOW) 458ed30ecdeSchristos sflags = NSM_NOFOLLOW_TRYEMULROOT; 459ed30ecdeSchristos else 460ed30ecdeSchristos sflags = NSM_FOLLOW_TRYEMULROOT; 461ed30ecdeSchristos error = namei_simple_user(SCARG(uap, pathname), sflags, &wvp); 4628575c986Schristos if (error != 0) 4638575c986Schristos goto leave1; 4648575c986Schristos 4658575c986Schristos /* Check to see if we already have a descriptor to wd's file. */ 466ed30ecdeSchristos wd = -1; 4678575c986Schristos for (i = 0; i < ifd->ifd_nwds; i++) { 4688575c986Schristos if (ifd->ifd_wds[i] != NULL) { 4698575c986Schristos cur_fp = fd_getfile(i); 4708575c986Schristos if (cur_fp == NULL) { 4718575c986Schristos DPRINTF(("%s: wd=%d was closed externally\n", 4728575c986Schristos __func__, i)); 4738575c986Schristos error = EBADF; 4748575c986Schristos goto leave1; 4758575c986Schristos } 4768575c986Schristos if (cur_fp->f_type != DTYPE_VNODE) { 4778575c986Schristos DPRINTF(("%s: wd=%d was replaced " 4788575c986Schristos "with a non-vnode\n", __func__, i)); 4798575c986Schristos error = EBADF; 4808575c986Schristos } 481ed30ecdeSchristos if (error == 0 && cur_fp->f_vnode == wvp) 482ed30ecdeSchristos wd = i; 4838575c986Schristos fd_putfile(i); 4848575c986Schristos if (error != 0) 4858575c986Schristos goto leave1; 4868575c986Schristos 487ed30ecdeSchristos if (wd != -1) 4888575c986Schristos break; 4898575c986Schristos } 4908575c986Schristos } 4918575c986Schristos 492ed30ecdeSchristos if (wd == -1) { 4938575c986Schristos /* 494ed30ecdeSchristos * If we do not have a descriptor to wd's file, we 495ed30ecdeSchristos * need to open the watch descriptor. 4968575c986Schristos */ 497ed30ecdeSchristos SCARG(&oa, path) = SCARG(uap, pathname); 498ed30ecdeSchristos SCARG(&oa, mode) = 0; 499ed30ecdeSchristos SCARG(&oa, flags) = O_RDONLY; 500ed30ecdeSchristos if (mask & LINUX_IN_DONT_FOLLOW) 501ed30ecdeSchristos SCARG(&oa, flags) |= O_NOFOLLOW; 502ed30ecdeSchristos if (mask & LINUX_IN_ONLYDIR) 503ed30ecdeSchristos SCARG(&oa, flags) |= O_DIRECTORY; 504ed30ecdeSchristos 505ed30ecdeSchristos error = sys_open(l, &oa, retval); 506ed30ecdeSchristos if (error != 0) 507ed30ecdeSchristos goto leave1; 508ed30ecdeSchristos wd = *retval; 509ed30ecdeSchristos wp = fd_getfile(wd); 510ed30ecdeSchristos KASSERT(wp != NULL); 511ed30ecdeSchristos KASSERT(wp->f_type == DTYPE_VNODE); 512ed30ecdeSchristos 513ed30ecdeSchristos /* translate the flags */ 514ed30ecdeSchristos memset(&kev, 0, sizeof(kev)); 515ed30ecdeSchristos EV_SET(&kev, wd, inotify_filtid, EV_ADD|EV_ENABLE, 516ed30ecdeSchristos NOTE_DELETE|NOTE_REVOKE, 0, ifd); 517ed30ecdeSchristos if (mask & LINUX_IN_ONESHOT) 518ed30ecdeSchristos kev.flags |= EV_ONESHOT; 519ed30ecdeSchristos kev.fflags |= inotify_mask_to_kevent_fflags(mask, 520ed30ecdeSchristos wp->f_vnode->v_type); 521ed30ecdeSchristos 5228575c986Schristos error = kevent1(retval, ifd->ifd_kqfd, &kev, 1, NULL, 0, NULL, 5238575c986Schristos &k_ops); 5248575c986Schristos if (error != 0) { 5258575c986Schristos KASSERT(fd_getfile(wd) != NULL); 5268575c986Schristos fd_close(wd); 5278575c986Schristos } else { 5288575c986Schristos /* Success! */ 5298575c986Schristos *retval = wd; 5308575c986Schristos 531100a3398Sandvar /* Resize ifd_nwds to accommodate wd. */ 5328575c986Schristos if (wd+1 > ifd->ifd_nwds) { 5338575c986Schristos new_wds = kmem_zalloc( 5348575c986Schristos (wd+1) * sizeof(*ifd->ifd_wds), KM_SLEEP); 5358575c986Schristos memcpy(new_wds, ifd->ifd_wds, 5368575c986Schristos ifd->ifd_nwds * sizeof(*ifd->ifd_wds)); 5378575c986Schristos 5388575c986Schristos kmem_free(ifd->ifd_wds, 5398575c986Schristos ifd->ifd_nwds * sizeof(*ifd->ifd_wds)); 5408575c986Schristos 5418575c986Schristos ifd->ifd_wds = new_wds; 5428575c986Schristos ifd->ifd_nwds = wd+1; 5438575c986Schristos } 5448575c986Schristos 545ed30ecdeSchristos ifd->ifd_wds[wd] = get_inotify_dir_entries(wd, true); 5468575c986Schristos } 5478575c986Schristos } else { 5488575c986Schristos /* 5498575c986Schristos * If we do have a descriptor to wd's file, try to edit 5508575c986Schristos * the relevant knote. 5518575c986Schristos */ 5528575c986Schristos if (mask & LINUX_IN_MASK_CREATE) { 5538575c986Schristos error = EEXIST; 5548575c986Schristos goto leave1; 5558575c986Schristos } 5568575c986Schristos 557ed30ecdeSchristos wp = fd_getfile(wd); 5588575c986Schristos if (wp == NULL) { 5598575c986Schristos DPRINTF(("%s: wd=%d was closed externally " 560ed30ecdeSchristos "(race, probably)\n", __func__, wd)); 5618575c986Schristos error = EBADF; 5628575c986Schristos goto leave1; 5638575c986Schristos } 564ed30ecdeSchristos if (wp->f_type != DTYPE_VNODE) { 565ed30ecdeSchristos DPRINTF(("%s: wd=%d was replace with a non-vnode " 566ed30ecdeSchristos "(race, probably)\n", __func__, wd)); 567ed30ecdeSchristos error = EBADF; 568ed30ecdeSchristos goto leave2; 569ed30ecdeSchristos } 570ed30ecdeSchristos 571ed30ecdeSchristos kev.fflags = NOTE_DELETE | NOTE_REVOKE 572ed30ecdeSchristos | inotify_mask_to_kevent_fflags(mask, wp->f_vnode->v_type); 5738575c986Schristos 5748575c986Schristos mutex_enter(wp->f_vnode->v_interlock); 5758575c986Schristos 5768575c986Schristos /* 5778575c986Schristos * XXX We are forced to find the appropriate knote 5788575c986Schristos * manually because we cannot create a custom f_touch 5798575c986Schristos * function for inotify_filtops. See filter_touch() 5808575c986Schristos * in kern_event.c for details. 5818575c986Schristos */ 5828575c986Schristos SLIST_FOREACH_SAFE(kn, &wp->f_vnode->v_klist->vk_klist, 5838575c986Schristos kn_selnext, tmpkn) { 5848575c986Schristos if (kn->kn_fop == &inotify_filtops 5858575c986Schristos && ifd == kn->kn_kevent.udata) { 5868575c986Schristos mutex_enter(&kn->kn_kq->kq_lock); 5878575c986Schristos if (mask & LINUX_IN_MASK_ADD) 5888575c986Schristos kn->kn_sfflags |= kev.fflags; 5898575c986Schristos else 5908575c986Schristos kn->kn_sfflags = kev.fflags; 5918575c986Schristos wp->f_vnode->v_klist->vk_interest |= 5928575c986Schristos kn->kn_sfflags; 5938575c986Schristos mutex_exit(&kn->kn_kq->kq_lock); 5948575c986Schristos } 5958575c986Schristos } 5968575c986Schristos 5978575c986Schristos mutex_exit(wp->f_vnode->v_interlock); 598ed30ecdeSchristos 599ed30ecdeSchristos /* Success! */ 600ed30ecdeSchristos *retval = wd; 6018575c986Schristos } 6028575c986Schristos 603ed30ecdeSchristos leave2: 604ed30ecdeSchristos fd_putfile(wd); 6058575c986Schristos leave1: 6068575c986Schristos mutex_exit(&ifd->ifd_lock); 6078575c986Schristos leave0: 6088575c986Schristos fd_putfile(fd); 6098575c986Schristos return error; 6108575c986Schristos } 6118575c986Schristos 6128575c986Schristos /* 6138575c986Schristos * Remove a wd from ifd and close wd. 6148575c986Schristos */ 6158575c986Schristos static int 6168575c986Schristos inotify_close_wd(struct inotifyfd *ifd, int wd) 6178575c986Schristos { 6188575c986Schristos file_t *wp; 6198575c986Schristos int error; 6208575c986Schristos register_t retval; 6218575c986Schristos struct kevent kev; 6228575c986Schristos struct kevent_ops k_ops = { 6238575c986Schristos .keo_private = NULL, 6248575c986Schristos .keo_fetch_timeout = NULL, 6258575c986Schristos .keo_fetch_changes = inotify_kev_fetch_changes, 6268575c986Schristos .keo_put_events = NULL, 6278575c986Schristos }; 6288575c986Schristos 6298575c986Schristos mutex_enter(&ifd->ifd_lock); 6308575c986Schristos 6318575c986Schristos KASSERT(0 <= wd && wd < ifd->ifd_nwds && ifd->ifd_wds[wd] != NULL); 6328575c986Schristos 6338575c986Schristos kmem_free(ifd->ifd_wds[wd], 6348575c986Schristos INOTIFY_DIR_ENTRIES_SIZE(ifd->ifd_wds[wd]->ide_count)); 6358575c986Schristos ifd->ifd_wds[wd] = NULL; 6368575c986Schristos 6378575c986Schristos mutex_exit(&ifd->ifd_lock); 6388575c986Schristos 6398575c986Schristos wp = fd_getfile(wd); 6408575c986Schristos if (wp == NULL) { 6418575c986Schristos DPRINTF(("%s: wd=%d is already closed\n", __func__, wd)); 6428575c986Schristos return 0; 6438575c986Schristos } 6448575c986Schristos KASSERT(!mutex_owned(wp->f_vnode->v_interlock)); 6458575c986Schristos 6468575c986Schristos memset(&kev, 0, sizeof(kev)); 6478575c986Schristos EV_SET(&kev, wd, EVFILT_VNODE, EV_DELETE, 0, 0, 0); 6488575c986Schristos error = kevent1(&retval, ifd->ifd_kqfd, &kev, 1, NULL, 0, NULL, &k_ops); 6498575c986Schristos if (error != 0) 6508575c986Schristos DPRINTF(("%s: attempt to disable all events for wd=%d " 6518575c986Schristos "had error=%d\n", __func__, wd, error)); 6528575c986Schristos 6538575c986Schristos return fd_close(wd); 6548575c986Schristos } 6558575c986Schristos 6568575c986Schristos /* 6578575c986Schristos * inotify_rm_watch(2). Close wd and remove it from ifd->ifd_wds. 6588575c986Schristos */ 6598575c986Schristos int 6608575c986Schristos linux_sys_inotify_rm_watch(struct lwp *l, 6618575c986Schristos const struct linux_sys_inotify_rm_watch_args *uap, register_t *retval) 6628575c986Schristos { 6638575c986Schristos /* { 6648575c986Schristos syscallarg(int) fd; 6658575c986Schristos syscallarg(int) wd; 6668575c986Schristos } */ 6678575c986Schristos struct inotifyfd *ifd; 6688575c986Schristos file_t *fp; 6698575c986Schristos int error = 0; 6708575c986Schristos const int fd = SCARG(uap, fd); 6718575c986Schristos const int wd = SCARG(uap, wd); 6728575c986Schristos 6738575c986Schristos fp = fd_getfile(fd); 6748575c986Schristos if (fp == NULL) 6758575c986Schristos return EBADF; 6768575c986Schristos if (fp->f_ops != &inotify_fileops) { 6778575c986Schristos /* not an inotify fd */ 6788575c986Schristos error = EINVAL; 6798575c986Schristos goto leave; 6808575c986Schristos } 6818575c986Schristos 6828575c986Schristos ifd = fp->f_data; 6838575c986Schristos if (wd < 0 || wd >= ifd->ifd_nwds || ifd->ifd_wds[wd] == NULL) { 6848575c986Schristos error = EINVAL; 6858575c986Schristos goto leave; 6868575c986Schristos } 6878575c986Schristos 6888575c986Schristos error = inotify_close_wd(ifd, wd); 6898575c986Schristos 6908575c986Schristos leave: 6918575c986Schristos fd_putfile(fd); 6928575c986Schristos return error; 6938575c986Schristos } 6948575c986Schristos 6958575c986Schristos /* 6968575c986Schristos * Attach the inotify filter. 6978575c986Schristos */ 6988575c986Schristos static int 6998575c986Schristos inotify_filt_attach(struct knote *kn) 7008575c986Schristos { 7018575c986Schristos file_t *fp = kn->kn_obj; 7028575c986Schristos struct vnode *vp; 7038575c986Schristos 7048575c986Schristos KASSERT(fp->f_type == DTYPE_VNODE); 7058575c986Schristos vp = fp->f_vnode; 7068575c986Schristos 7078575c986Schristos /* 7088575c986Schristos * Needs to be set so that we get the same event handling as 7098575c986Schristos * EVFILT_VNODE. Otherwise we don't get any events. 7108575c986Schristos * 7118575c986Schristos * A consequence of this is that modifications/removals of 7128575c986Schristos * this knote need to specify EVFILT_VNODE rather than 7138575c986Schristos * inotify_filtid. 7148575c986Schristos */ 7158575c986Schristos kn->kn_filter = EVFILT_VNODE; 7168575c986Schristos 7178575c986Schristos kn->kn_fop = &inotify_filtops; 7188575c986Schristos kn->kn_hook = vp; 7198575c986Schristos vn_knote_attach(vp, kn); 7208575c986Schristos 7218575c986Schristos return 0; 7228575c986Schristos } 7238575c986Schristos 7248575c986Schristos /* 7258575c986Schristos * Detach the inotify filter. 7268575c986Schristos */ 7278575c986Schristos static void 7288575c986Schristos inotify_filt_detach(struct knote *kn) 7298575c986Schristos { 7308575c986Schristos struct vnode *vp = (struct vnode *)kn->kn_hook; 7318575c986Schristos 7328575c986Schristos vn_knote_detach(vp, kn); 7338575c986Schristos } 7348575c986Schristos 7358575c986Schristos /* 7368575c986Schristos * Create a single inotify event. 7378575c986Schristos */ 7388575c986Schristos static void 7398575c986Schristos do_kevent_to_inotify(int32_t wd, uint32_t mask, uint32_t cookie, 7408575c986Schristos struct inotify_entry *buf, size_t *nbuf, char *name) 7418575c986Schristos { 7428575c986Schristos KASSERT(*nbuf < LINUX_INOTIFY_MAX_FROM_KEVENT); 7438575c986Schristos 7448575c986Schristos buf += *nbuf; 7458575c986Schristos 7468575c986Schristos memset(buf, 0, sizeof(*buf)); 7478575c986Schristos 7488575c986Schristos buf->ie_event.wd = wd; 7498575c986Schristos buf->ie_event.mask = mask; 7508575c986Schristos buf->ie_event.cookie = cookie; 7518575c986Schristos 7528575c986Schristos if (name != NULL) { 7538575c986Schristos buf->ie_event.len = strlen(name) + 1; 7548575c986Schristos KASSERT(buf->ie_event.len < sizeof(buf->ie_name)); 7558575c986Schristos strcpy(buf->ie_name, name); 7568575c986Schristos } 7578575c986Schristos 7588575c986Schristos ++(*nbuf); 7598575c986Schristos } 7608575c986Schristos 7618575c986Schristos /* 762ed30ecdeSchristos * Like vn_readdir(), but with vnode locking only if needs_lock is 763ed30ecdeSchristos * true (to avoid double locking in some situations). 7648575c986Schristos */ 7658575c986Schristos static int 766ed30ecdeSchristos inotify_readdir(file_t *fp, struct dirent *dep, int *done, bool needs_lock) 7678575c986Schristos { 7688575c986Schristos struct vnode *vp; 7698575c986Schristos struct iovec iov; 7708575c986Schristos struct uio uio; 7718575c986Schristos int error, eofflag; 7728575c986Schristos 7738575c986Schristos KASSERT(fp->f_type == DTYPE_VNODE); 7748575c986Schristos vp = fp->f_vnode; 7758575c986Schristos KASSERT(vp->v_type == VDIR); 7768575c986Schristos 7778575c986Schristos iov.iov_base = dep; 7788575c986Schristos iov.iov_len = sizeof(*dep); 7798575c986Schristos 7808575c986Schristos uio.uio_iov = &iov; 7818575c986Schristos uio.uio_iovcnt = 1; 7828575c986Schristos uio.uio_rw = UIO_READ; 7838575c986Schristos uio.uio_resid = sizeof(*dep); 7848575c986Schristos UIO_SETUP_SYSSPACE(&uio); 7858575c986Schristos 7868575c986Schristos mutex_enter(&fp->f_lock); 7878575c986Schristos uio.uio_offset = fp->f_offset; 7888575c986Schristos mutex_exit(&fp->f_lock); 7898575c986Schristos 7908575c986Schristos /* XXX: should pass whether to lock or not */ 791ed30ecdeSchristos if (needs_lock) 7928575c986Schristos vn_lock(vp, LK_SHARED | LK_RETRY); 7932915865eSchristos else 7942915865eSchristos /* 7952915865eSchristos * XXX We need to temprarily drop v_interlock because 7962915865eSchristos * it may be temporarily acquired by biowait(). 7972915865eSchristos */ 7982915865eSchristos mutex_exit(vp->v_interlock); 7992915865eSchristos KASSERT(!mutex_owned(vp->v_interlock)); 8008575c986Schristos error = VOP_READDIR(vp, &uio, fp->f_cred, &eofflag, NULL, NULL); 801ed30ecdeSchristos if (needs_lock) 8028575c986Schristos VOP_UNLOCK(vp); 8032915865eSchristos else 8042915865eSchristos mutex_enter(vp->v_interlock); 8058575c986Schristos 8068575c986Schristos mutex_enter(&fp->f_lock); 8078575c986Schristos fp->f_offset = uio.uio_offset; 8088575c986Schristos mutex_exit(&fp->f_lock); 8098575c986Schristos 8108575c986Schristos *done = sizeof(*dep) - uio.uio_resid; 8118575c986Schristos return error; 8128575c986Schristos } 8138575c986Schristos 8148575c986Schristos /* 8158575c986Schristos * Create (and allocate) an appropriate inotify_dir_entries struct for wd to be 8168575c986Schristos * used on ifd_wds of inotifyfd. If the entries on a directory fail to be read, 817ed30ecdeSchristos * NULL is returned. needs_lock indicates if the vnode's lock is not already 818ed30ecdeSchristos * owned. 8198575c986Schristos */ 8208575c986Schristos static struct inotify_dir_entries * 821ed30ecdeSchristos get_inotify_dir_entries(int wd, bool needs_lock) 8228575c986Schristos { 8238575c986Schristos struct dirent de; 8248575c986Schristos struct dirent *currdep; 8258575c986Schristos struct inotify_dir_entries *idep = NULL; 8268575c986Schristos file_t *wp; 8278575c986Schristos int done, error; 8288575c986Schristos size_t i, decount; 8298575c986Schristos 8308575c986Schristos wp = fd_getfile(wd); 8318575c986Schristos if (wp == NULL) 8328575c986Schristos return NULL; 8338575c986Schristos if (wp->f_type != DTYPE_VNODE) 8348575c986Schristos goto leave; 8358575c986Schristos 8368575c986Schristos /* for non-directories, we have 0 entries. */ 8378575c986Schristos if (wp->f_vnode->v_type != VDIR) { 8388575c986Schristos idep = kmem_zalloc(INOTIFY_DIR_ENTRIES_SIZE(0), KM_SLEEP); 8398575c986Schristos goto leave; 8408575c986Schristos } 8418575c986Schristos 8428575c986Schristos mutex_enter(&wp->f_lock); 8438575c986Schristos wp->f_offset = 0; 8448575c986Schristos mutex_exit(&wp->f_lock); 8458575c986Schristos decount = 0; 8468575c986Schristos for (;;) { 847ed30ecdeSchristos error = inotify_readdir(wp, &de, &done, needs_lock); 8488575c986Schristos if (error != 0) 8498575c986Schristos goto leave; 8508575c986Schristos if (done == 0) 8518575c986Schristos break; 8528575c986Schristos 8538575c986Schristos currdep = &de; 8548575c986Schristos while ((char *)currdep < ((char *)&de) + done) { 8558575c986Schristos decount++; 8568575c986Schristos currdep = _DIRENT_NEXT(currdep); 8578575c986Schristos } 8588575c986Schristos } 8598575c986Schristos 8608575c986Schristos idep = kmem_zalloc(INOTIFY_DIR_ENTRIES_SIZE(decount), KM_SLEEP); 8618575c986Schristos idep->ide_count = decount; 8628575c986Schristos 8638575c986Schristos mutex_enter(&wp->f_lock); 8648575c986Schristos wp->f_offset = 0; 8658575c986Schristos mutex_exit(&wp->f_lock); 8668575c986Schristos for (i = 0; i < decount;) { 867ed30ecdeSchristos error = inotify_readdir(wp, &de, &done, needs_lock); 8688575c986Schristos if (error != 0 || done == 0) { 8698575c986Schristos kmem_free(idep, INOTIFY_DIR_ENTRIES_SIZE(decount)); 8708575c986Schristos idep = NULL; 8718575c986Schristos goto leave; 8728575c986Schristos } 8738575c986Schristos 8748575c986Schristos currdep = &de; 8758575c986Schristos while ((char *)currdep < ((char *)&de) + done) { 8768575c986Schristos idep->ide_entries[i].fileno = currdep->d_fileno; 8778575c986Schristos strcpy(idep->ide_entries[i].name, currdep->d_name); 8788575c986Schristos 8798575c986Schristos currdep = _DIRENT_NEXT(currdep); 8808575c986Schristos i++; 8818575c986Schristos } 8828575c986Schristos } 8838575c986Schristos 8848575c986Schristos leave: 8858575c986Schristos fd_putfile(wd); 8868575c986Schristos return idep; 8878575c986Schristos } 8888575c986Schristos 8898575c986Schristos static size_t 8908575c986Schristos find_entry(struct inotify_dir_entries *i1, struct inotify_dir_entries *i2) 8918575c986Schristos { 8928575c986Schristos for (size_t i = 0; i < i2->ide_count; i++) 8938575c986Schristos if (i2->ide_entries[i].fileno != i1->ide_entries[i].fileno) 8948575c986Schristos return i; 8958575c986Schristos KASSERTMSG(0, "Entry not found"); 8968575c986Schristos return -1; 8978575c986Schristos } 8988575c986Schristos 8998575c986Schristos static void 9008575c986Schristos handle_write(struct inotifyfd *ifd, int wd, struct inotify_entry *buf, 9018575c986Schristos size_t *nbuf) 9028575c986Schristos { 9038575c986Schristos struct inotify_dir_entries *old_idep, *new_idep; 9048575c986Schristos size_t i; 9058575c986Schristos 9068575c986Schristos mutex_enter(&ifd->ifd_lock); 9078575c986Schristos 9088575c986Schristos old_idep = ifd->ifd_wds[wd]; 9098575c986Schristos KASSERT(old_idep != NULL); 910ed30ecdeSchristos new_idep = get_inotify_dir_entries(wd, false); 9118575c986Schristos if (new_idep == NULL) { 9128575c986Schristos DPRINTF(("%s: directory for wd=%d could not be read\n", 9138575c986Schristos __func__, wd)); 9148575c986Schristos mutex_exit(&ifd->ifd_lock); 9158575c986Schristos return; 9168575c986Schristos } 9178575c986Schristos 9188575c986Schristos 9198575c986Schristos if (old_idep->ide_count < new_idep->ide_count) { 9208575c986Schristos KASSERT(old_idep->ide_count + 1 == new_idep->ide_count); 9218575c986Schristos 9228575c986Schristos /* Find the new entry. */ 9238575c986Schristos i = find_entry(new_idep, old_idep); 9248575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_CREATE, 0, 9258575c986Schristos buf, nbuf, new_idep->ide_entries[i].name); 9268575c986Schristos goto out; 9278575c986Schristos } 9288575c986Schristos 9298575c986Schristos if (old_idep->ide_count > new_idep->ide_count) { 9308575c986Schristos KASSERT(old_idep->ide_count == new_idep->ide_count + 1); 9318575c986Schristos 9328575c986Schristos /* Find the deleted entry. */ 9338575c986Schristos i = find_entry(old_idep, new_idep); 9348575c986Schristos 9358575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_DELETE, 0, 9368575c986Schristos buf, nbuf, old_idep->ide_entries[i].name); 9378575c986Schristos goto out; 9388575c986Schristos } 9398575c986Schristos 9408575c986Schristos /* 9418575c986Schristos * XXX Because we are not watching the entire 9428575c986Schristos * file system, the only time we know for sure 9438575c986Schristos * that the event is a LINUX_IN_MOVED_FROM/ 9448575c986Schristos * LINUX_IN_MOVED_TO is when the move happens 9458575c986Schristos * within a single directory... ie. the number 9468575c986Schristos * of directory entries has not changed. 9478575c986Schristos * 9488575c986Schristos * Otherwise all we can say for sure is that 9498575c986Schristos * something was created/deleted. So we issue a 9508575c986Schristos * LINUX_IN_CREATE/LINUX_IN_DELETE. 9518575c986Schristos */ 9528575c986Schristos ino_t changed = new_idep->ide_entries[new_idep->ide_count - 1].fileno; 9538575c986Schristos 9548575c986Schristos /* Find the deleted entry. */ 9558575c986Schristos for (i = 0; i < old_idep->ide_count; i++) 9568575c986Schristos if (old_idep->ide_entries[i].fileno == changed) 9578575c986Schristos break; 9588575c986Schristos KASSERT(i != old_idep->ide_count); 9598575c986Schristos 9608575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_MOVED_FROM, changed, buf, nbuf, 9618575c986Schristos old_idep->ide_entries[i].name); 9628575c986Schristos 9638575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_MOVED_TO, changed, buf, nbuf, 9648575c986Schristos new_idep->ide_entries[new_idep->ide_count - 1].name); 9658575c986Schristos 9668575c986Schristos out: 9678575c986Schristos ifd->ifd_wds[wd] = new_idep; 9688575c986Schristos mutex_exit(&ifd->ifd_lock); 9698575c986Schristos } 9708575c986Schristos 9718575c986Schristos /* 9728575c986Schristos * Convert a kevent flags and fflags for EVFILT_VNODE to some number 9738575c986Schristos * of inotify events. 9748575c986Schristos */ 9758575c986Schristos static int 9768575c986Schristos kevent_to_inotify(struct inotifyfd *ifd, int wd, enum vtype wtype, 9778575c986Schristos uint32_t flags, uint32_t fflags, struct inotify_entry *buf, 9788575c986Schristos size_t *nbuf) 9798575c986Schristos { 9808575c986Schristos struct stat st; 9818575c986Schristos file_t *wp; 9828575c986Schristos size_t i; 9838575c986Schristos int error = 0; 9848575c986Schristos 9858575c986Schristos for (i = 0; i < common_kevent_to_inotify_len; i++) 9868575c986Schristos if (fflags & common_kevent_to_inotify[i].kevent) 9878575c986Schristos do_kevent_to_inotify(wd, 9888575c986Schristos common_kevent_to_inotify[i].inotify, 0, buf, nbuf, 9898575c986Schristos NULL); 9908575c986Schristos 9918575c986Schristos if (wtype == VREG) { 9928575c986Schristos for (i = 0; i < vreg_kevent_to_inotify_len; i++) 9938575c986Schristos if (fflags & vreg_kevent_to_inotify[i].kevent) 9948575c986Schristos do_kevent_to_inotify(wd, 9958575c986Schristos vreg_kevent_to_inotify[i].inotify, 0, 9968575c986Schristos buf, nbuf, NULL); 9978575c986Schristos } else if (wtype == VDIR) { 9988575c986Schristos for (i = 0; i < *nbuf; i++) 9998575c986Schristos if (buf[i].ie_event.mask & 10008575c986Schristos (LINUX_IN_ACCESS|LINUX_IN_ATTRIB 10018575c986Schristos |LINUX_IN_CLOSE|LINUX_IN_OPEN)) 10028575c986Schristos buf[i].ie_event.mask |= LINUX_IN_ISDIR; 10038575c986Schristos 10048575c986Schristos /* Need to disambiguate the possible NOTE_WRITEs. */ 10058575c986Schristos if (fflags & NOTE_WRITE) 10068575c986Schristos handle_write(ifd, wd, buf, nbuf); 10078575c986Schristos } 10088575c986Schristos 10098575c986Schristos /* 10108575c986Schristos * Need to check if wd is actually has a link count of 0 to issue a 10118575c986Schristos * LINUX_IN_DELETE_SELF. 10128575c986Schristos */ 10138575c986Schristos if (fflags & NOTE_DELETE) { 10148575c986Schristos wp = fd_getfile(wd); 10158575c986Schristos KASSERT(wp != NULL); 10168575c986Schristos KASSERT(wp->f_type == DTYPE_VNODE); 10178575c986Schristos vn_stat(wp->f_vnode, &st); 10188575c986Schristos fd_putfile(wd); 10198575c986Schristos 10208575c986Schristos if (st.st_nlink == 0) 10218575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_DELETE_SELF, 0, 10228575c986Schristos buf, nbuf, NULL); 10238575c986Schristos } 10248575c986Schristos 10258575c986Schristos /* LINUX_IN_IGNORED must be the last event issued for wd. */ 10268575c986Schristos if ((flags & EV_ONESHOT) || (fflags & (NOTE_REVOKE|NOTE_DELETE))) { 10278575c986Schristos do_kevent_to_inotify(wd, LINUX_IN_IGNORED, 0, buf, nbuf, NULL); 10288575c986Schristos /* 10298575c986Schristos * XXX in theory we could call inotify_close_wd(ifd, wd) but if 10308575c986Schristos * we get here we must already be holding v_interlock for 10318575c986Schristos * wd... so we can't. 10328575c986Schristos * 10338575c986Schristos * For simplicity we do nothing, and so wd will only be closed 10348575c986Schristos * when the inotify fd is closed. 10358575c986Schristos */ 10368575c986Schristos } 10378575c986Schristos 10388575c986Schristos return error; 10398575c986Schristos } 10408575c986Schristos 10418575c986Schristos /* 10428575c986Schristos * Handle an event. Unlike EVFILT_VNODE, we translate the event to a 10438575c986Schristos * linux_inotify_event and put it in our own custom queue. 10448575c986Schristos */ 10458575c986Schristos static int 10468575c986Schristos inotify_filt_event(struct knote *kn, long hint) 10478575c986Schristos { 10488575c986Schristos struct vnode *vp = (struct vnode *)kn->kn_hook; 10498575c986Schristos struct inotifyfd *ifd; 10508575c986Schristos struct inotify_entry *cur_ie; 10518575c986Schristos size_t nbuf, i; 10528575c986Schristos uint32_t status; 10538575c986Schristos struct inotify_entry buf[LINUX_INOTIFY_MAX_FROM_KEVENT]; 10548575c986Schristos 10558575c986Schristos /* 10568575c986Schristos * If KN_WILLDETACH is set then 10578575c986Schristos * 1. kn->kn_kevent.udata has already been trashed with a 10588575c986Schristos * struct lwp *, so we don't have access to a real ifd 10598575c986Schristos * anymore, and 10608575c986Schristos * 2. we're about to detach anyways, so we don't really care 10618575c986Schristos * about the events. 10628575c986Schristos * (Also because of this we need to get ifd under the same 10638575c986Schristos * lock as kn->kn_status.) 10648575c986Schristos */ 10658575c986Schristos mutex_enter(&kn->kn_kq->kq_lock); 10668575c986Schristos status = kn->kn_status; 10678575c986Schristos ifd = kn->kn_kevent.udata; 10688575c986Schristos mutex_exit(&kn->kn_kq->kq_lock); 10698575c986Schristos if (status & KN_WILLDETACH) 10708575c986Schristos return 0; 10718575c986Schristos 10728575c986Schristos /* 10738575c986Schristos * If we don't care about the NOTEs in hint, we don't generate 10748575c986Schristos * any events. 10758575c986Schristos */ 10768575c986Schristos hint &= kn->kn_sfflags; 10778575c986Schristos if (hint == 0) 10788575c986Schristos return 0; 10798575c986Schristos 10808575c986Schristos KASSERT(mutex_owned(vp->v_interlock)); 1081ed30ecdeSchristos KASSERT(!mutex_owned(&ifd->ifd_lock)); 10828575c986Schristos 10838575c986Schristos mutex_enter(&ifd->ifd_qlock); 10848575c986Schristos 10858575c986Schristos /* 10868575c986Schristos * early out: there's no point even traslating the event if we 10878575c986Schristos * have nowhere to put it (and an LINUX_IN_Q_OVERFLOW has 10888575c986Schristos * already been added). 10898575c986Schristos */ 10908575c986Schristos if (ifd->ifd_qcount >= LINUX_INOTIFY_MAX_QUEUED) 10918575c986Schristos goto leave; 10928575c986Schristos 10938575c986Schristos nbuf = 0; 10948575c986Schristos (void)kevent_to_inotify(ifd, kn->kn_id, vp->v_type, kn->kn_flags, 10958575c986Schristos hint, buf, &nbuf); 10968575c986Schristos for (i = 0; i < nbuf && ifd->ifd_qcount < LINUX_INOTIFY_MAX_QUEUED-1; 10978575c986Schristos i++) { 10988575c986Schristos cur_ie = kmem_zalloc(sizeof(*cur_ie), KM_SLEEP); 10998575c986Schristos memcpy(cur_ie, &buf[i], sizeof(*cur_ie)); 11008575c986Schristos 11018575c986Schristos TAILQ_INSERT_TAIL(&ifd->ifd_qhead, cur_ie, ie_entries); 11028575c986Schristos ifd->ifd_qcount++; 11038575c986Schristos } 11048575c986Schristos /* handle early overflow, by adding an overflow event to the end */ 11058575c986Schristos if (i != nbuf) { 11068575c986Schristos nbuf = 0; 11078575c986Schristos cur_ie = kmem_zalloc(sizeof(*cur_ie), KM_SLEEP); 11088575c986Schristos do_kevent_to_inotify(-1, LINUX_IN_Q_OVERFLOW, 0, 11098575c986Schristos cur_ie, &nbuf, NULL); 11108575c986Schristos 11118575c986Schristos TAILQ_INSERT_TAIL(&ifd->ifd_qhead, cur_ie, ie_entries); 11128575c986Schristos ifd->ifd_qcount++; 11138575c986Schristos } 11148575c986Schristos 11158575c986Schristos if (nbuf > 0) { 11168575c986Schristos cv_signal(&ifd->ifd_qcv); 11178575c986Schristos 11188575c986Schristos mutex_enter(&ifd->ifd_lock); 11192915865eSchristos selnotify(&ifd->ifd_sel, 0, NOTE_LOWAT); 11208575c986Schristos mutex_exit(&ifd->ifd_lock); 11218575c986Schristos } else 11228575c986Schristos DPRINTF(("%s: hint=%lx resulted in 0 inotify events\n", 11238575c986Schristos __func__, hint)); 11248575c986Schristos 11258575c986Schristos leave: 11268575c986Schristos mutex_exit(&ifd->ifd_qlock); 11278575c986Schristos return 0; 11288575c986Schristos } 11298575c986Schristos 11308575c986Schristos /* 11318575c986Schristos * Read inotify events from the queue. 11328575c986Schristos */ 11338575c986Schristos static int 11348575c986Schristos inotify_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred, 11358575c986Schristos int flags) 11368575c986Schristos { 11378575c986Schristos struct inotify_entry *cur_iep; 11388575c986Schristos size_t cur_size, nread; 11398575c986Schristos int error = 0; 11408575c986Schristos struct inotifyfd *ifd = fp->f_data; 11418575c986Schristos 11428575c986Schristos mutex_enter(&ifd->ifd_qlock); 11438575c986Schristos 11448575c986Schristos if (ifd->ifd_qcount == 0) { 11458575c986Schristos if (fp->f_flag & O_NONBLOCK) { 11468575c986Schristos error = EAGAIN; 11478575c986Schristos goto leave; 11488575c986Schristos } 11498575c986Schristos 11508575c986Schristos while (ifd->ifd_qcount == 0) { 11518575c986Schristos /* wait until there is an event to read */ 11528575c986Schristos error = cv_wait_sig(&ifd->ifd_qcv, &ifd->ifd_qlock); 11538575c986Schristos if (error != 0) { 11548575c986Schristos error = EINTR; 11558575c986Schristos goto leave; 11568575c986Schristos } 11578575c986Schristos } 11588575c986Schristos } 11598575c986Schristos 11608575c986Schristos KASSERT(ifd->ifd_qcount > 0); 11618575c986Schristos KASSERT(mutex_owned(&ifd->ifd_qlock)); 11628575c986Schristos 11638575c986Schristos nread = 0; 11648575c986Schristos while (ifd->ifd_qcount > 0) { 11658575c986Schristos cur_iep = TAILQ_FIRST(&ifd->ifd_qhead); 11668575c986Schristos KASSERT(cur_iep != NULL); 11678575c986Schristos 11688575c986Schristos cur_size = sizeof(cur_iep->ie_event) + cur_iep->ie_event.len; 11698575c986Schristos if (cur_size > uio->uio_resid) { 11708575c986Schristos if (nread == 0) 11718575c986Schristos error = EINVAL; 11728575c986Schristos break; 11738575c986Schristos } 11748575c986Schristos 11758575c986Schristos error = uiomove(&cur_iep->ie_event, sizeof(cur_iep->ie_event), 11768575c986Schristos uio); 11778575c986Schristos if (error != 0) 11788575c986Schristos break; 11798575c986Schristos error = uiomove(&cur_iep->ie_name, cur_iep->ie_event.len, uio); 11808575c986Schristos if (error != 0) 11818575c986Schristos break; 11828575c986Schristos 11838575c986Schristos /* cleanup */ 11848575c986Schristos TAILQ_REMOVE(&ifd->ifd_qhead, cur_iep, ie_entries); 11858575c986Schristos kmem_free(cur_iep, sizeof(*cur_iep)); 11868575c986Schristos 11878575c986Schristos nread++; 11888575c986Schristos ifd->ifd_qcount--; 11898575c986Schristos } 11908575c986Schristos 11918575c986Schristos leave: 11928575c986Schristos /* Wake up the next reader, if the queue is not empty. */ 11938575c986Schristos if (ifd->ifd_qcount > 0) 11948575c986Schristos cv_signal(&ifd->ifd_qcv); 11958575c986Schristos 11968575c986Schristos mutex_exit(&ifd->ifd_qlock); 11978575c986Schristos return error; 11988575c986Schristos } 11998575c986Schristos 12008575c986Schristos /* 12018575c986Schristos * Close all the file descriptors associated with fp. 12028575c986Schristos */ 12038575c986Schristos static int 12048575c986Schristos inotify_close(file_t *fp) 12058575c986Schristos { 12068575c986Schristos int error; 12078575c986Schristos size_t i; 12088575c986Schristos file_t *kqfp; 12098575c986Schristos struct inotifyfd *ifd = fp->f_data; 12108575c986Schristos 12118575c986Schristos for (i = 0; i < ifd->ifd_nwds; i++) { 12128575c986Schristos if (ifd->ifd_wds[i] != NULL) { 12138575c986Schristos error = inotify_close_wd(ifd, i); 12148575c986Schristos if (error != 0) 12158575c986Schristos return error; 12168575c986Schristos } 12178575c986Schristos } 12188575c986Schristos 12198575c986Schristos /* the reference we need to hold is ifd->ifd_kqfp */ 12208575c986Schristos kqfp = fd_getfile(ifd->ifd_kqfd); 12218575c986Schristos if (kqfp == NULL) { 12228575c986Schristos DPRINTF(("%s: kqfp=%d is already closed\n", __func__, 12238575c986Schristos ifd->ifd_kqfd)); 12248575c986Schristos } else { 12258575c986Schristos error = fd_close(ifd->ifd_kqfd); 12268575c986Schristos if (error != 0) 12278575c986Schristos return error; 12288575c986Schristos } 12298575c986Schristos 12308575c986Schristos mutex_destroy(&ifd->ifd_lock); 12318575c986Schristos mutex_destroy(&ifd->ifd_qlock); 12328575c986Schristos cv_destroy(&ifd->ifd_qcv); 12338575c986Schristos seldestroy(&ifd->ifd_sel); 12348575c986Schristos 12358575c986Schristos kmem_free(ifd->ifd_wds, ifd->ifd_nwds * sizeof(*ifd->ifd_wds)); 12368575c986Schristos kmem_free(ifd, sizeof(*ifd)); 12378575c986Schristos fp->f_data = NULL; 12388575c986Schristos 12398575c986Schristos return 0; 12408575c986Schristos } 12418575c986Schristos 12428575c986Schristos /* 12438575c986Schristos * Check if there are pending read events. 12448575c986Schristos */ 12458575c986Schristos static int 12468575c986Schristos inotify_poll(file_t *fp, int events) 12478575c986Schristos { 12488575c986Schristos int revents; 12498575c986Schristos struct inotifyfd *ifd = fp->f_data; 12508575c986Schristos 12518575c986Schristos revents = 0; 12528575c986Schristos if (events & (POLLIN|POLLRDNORM)) { 12538575c986Schristos mutex_enter(&ifd->ifd_qlock); 12548575c986Schristos 12558575c986Schristos if (ifd->ifd_qcount > 0) 12568575c986Schristos revents |= events & (POLLIN|POLLRDNORM); 12578575c986Schristos 12588575c986Schristos mutex_exit(&ifd->ifd_qlock); 12598575c986Schristos } 12608575c986Schristos 12618575c986Schristos return revents; 12628575c986Schristos } 12638575c986Schristos 12648575c986Schristos /* 12658575c986Schristos * Attach EVFILT_READ to the inotify instance in fp. 12668575c986Schristos * 12678575c986Schristos * This is so you can watch inotify with epoll. No other kqueue 12688575c986Schristos * filter needs to be supported. 12698575c986Schristos */ 12708575c986Schristos static int 12718575c986Schristos inotify_kqfilter(file_t *fp, struct knote *kn) 12728575c986Schristos { 12738575c986Schristos struct inotifyfd *ifd = fp->f_data; 12748575c986Schristos 12758575c986Schristos KASSERT(fp == kn->kn_obj); 12768575c986Schristos 12778575c986Schristos if (kn->kn_filter != EVFILT_READ) 12788575c986Schristos return EINVAL; 12798575c986Schristos 12808575c986Schristos kn->kn_fop = &inotify_read_filtops; 12818575c986Schristos mutex_enter(&ifd->ifd_lock); 12828575c986Schristos selrecord_knote(&ifd->ifd_sel, kn); 12838575c986Schristos mutex_exit(&ifd->ifd_lock); 12848575c986Schristos 12858575c986Schristos return 0; 12868575c986Schristos } 12878575c986Schristos 12888575c986Schristos /* 12898575c986Schristos * Detach a filter from an inotify instance. 12908575c986Schristos */ 12918575c986Schristos static void 12928575c986Schristos inotify_read_filt_detach(struct knote *kn) 12938575c986Schristos { 12948575c986Schristos struct inotifyfd *ifd = ((file_t *)kn->kn_obj)->f_data; 12958575c986Schristos 12968575c986Schristos mutex_enter(&ifd->ifd_lock); 12978575c986Schristos selremove_knote(&ifd->ifd_sel, kn); 12988575c986Schristos mutex_exit(&ifd->ifd_lock); 12998575c986Schristos } 13008575c986Schristos 13018575c986Schristos /* 13028575c986Schristos * Handle EVFILT_READ events. Note that nothing is put in kn_data. 13038575c986Schristos */ 13048575c986Schristos static int 13058575c986Schristos inotify_read_filt_event(struct knote *kn, long hint) 13068575c986Schristos { 13078575c986Schristos struct inotifyfd *ifd = ((file_t *)kn->kn_obj)->f_data; 13088575c986Schristos 13092915865eSchristos if (hint != 0) { 13102915865eSchristos KASSERT(mutex_owned(&ifd->ifd_lock)); 13112915865eSchristos KASSERT(mutex_owned(&ifd->ifd_qlock)); 13122915865eSchristos KASSERT(hint == NOTE_LOWAT); 13138575c986Schristos 13142915865eSchristos kn->kn_data = ifd->ifd_qcount; 13152915865eSchristos } 13162915865eSchristos 13172915865eSchristos return kn->kn_data > 0; 13188575c986Schristos } 13198575c986Schristos 13208575c986Schristos /* 13218575c986Schristos * Restart the inotify instance. 13228575c986Schristos */ 13238575c986Schristos static void 13248575c986Schristos inotify_restart(file_t *fp) 13258575c986Schristos { 13268575c986Schristos struct inotifyfd *ifd = fp->f_data; 13278575c986Schristos 13288575c986Schristos mutex_enter(&ifd->ifd_qlock); 13298575c986Schristos cv_broadcast(&ifd->ifd_qcv); 13308575c986Schristos mutex_exit(&ifd->ifd_qlock); 13318575c986Schristos } 1332