17a202823SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 37a202823SKonstantin Belousov * 41ca6b15bSDmitry Chagin * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 57a202823SKonstantin Belousov * 67a202823SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 77a202823SKonstantin Belousov * modification, are permitted provided that the following conditions 87a202823SKonstantin Belousov * are met: 97a202823SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 107a202823SKonstantin Belousov * notice, this list of conditions and the following disclaimer. 117a202823SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 127a202823SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 137a202823SKonstantin Belousov * documentation and/or other materials provided with the distribution. 147a202823SKonstantin Belousov * 157a202823SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 167a202823SKonstantin Belousov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177a202823SKonstantin Belousov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187a202823SKonstantin Belousov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197a202823SKonstantin Belousov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207a202823SKonstantin Belousov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217a202823SKonstantin Belousov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227a202823SKonstantin Belousov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237a202823SKonstantin Belousov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247a202823SKonstantin Belousov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257a202823SKonstantin Belousov * SUCH DAMAGE. 267a202823SKonstantin Belousov */ 277a202823SKonstantin Belousov 287a202823SKonstantin Belousov #include <sys/param.h> 297a202823SKonstantin Belousov #include <sys/systm.h> 307a202823SKonstantin Belousov #include <sys/kernel.h> 317a202823SKonstantin Belousov #include <sys/malloc.h> 327a202823SKonstantin Belousov #include <sys/limits.h> 337a202823SKonstantin Belousov #include <sys/lock.h> 347a202823SKonstantin Belousov #include <sys/mutex.h> 357a202823SKonstantin Belousov #include <sys/types.h> 367a202823SKonstantin Belousov #include <sys/user.h> 377a202823SKonstantin Belousov #include <sys/fcntl.h> 387a202823SKonstantin Belousov #include <sys/file.h> 397a202823SKonstantin Belousov #include <sys/filedesc.h> 407a202823SKonstantin Belousov #include <sys/filio.h> 417a202823SKonstantin Belousov #include <sys/stat.h> 427a202823SKonstantin Belousov #include <sys/errno.h> 437a202823SKonstantin Belousov #include <sys/event.h> 447a202823SKonstantin Belousov #include <sys/poll.h> 457a202823SKonstantin Belousov #include <sys/proc.h> 467a202823SKonstantin Belousov #include <sys/uio.h> 477a202823SKonstantin Belousov #include <sys/selinfo.h> 487a202823SKonstantin Belousov #include <sys/eventfd.h> 497a202823SKonstantin Belousov 507a202823SKonstantin Belousov #include <security/audit/audit.h> 517a202823SKonstantin Belousov 527a202823SKonstantin Belousov _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 537a202823SKonstantin Belousov _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 547a202823SKonstantin Belousov 557a202823SKonstantin Belousov MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 567a202823SKonstantin Belousov 577a202823SKonstantin Belousov static fo_rdwr_t eventfd_read; 587a202823SKonstantin Belousov static fo_rdwr_t eventfd_write; 597a202823SKonstantin Belousov static fo_ioctl_t eventfd_ioctl; 607a202823SKonstantin Belousov static fo_poll_t eventfd_poll; 617a202823SKonstantin Belousov static fo_kqfilter_t eventfd_kqfilter; 627a202823SKonstantin Belousov static fo_stat_t eventfd_stat; 637a202823SKonstantin Belousov static fo_close_t eventfd_close; 647a202823SKonstantin Belousov static fo_fill_kinfo_t eventfd_fill_kinfo; 657a202823SKonstantin Belousov 66*ef9ffb85SMark Johnston static const struct fileops eventfdops = { 677a202823SKonstantin Belousov .fo_read = eventfd_read, 687a202823SKonstantin Belousov .fo_write = eventfd_write, 697a202823SKonstantin Belousov .fo_truncate = invfo_truncate, 707a202823SKonstantin Belousov .fo_ioctl = eventfd_ioctl, 717a202823SKonstantin Belousov .fo_poll = eventfd_poll, 727a202823SKonstantin Belousov .fo_kqfilter = eventfd_kqfilter, 737a202823SKonstantin Belousov .fo_stat = eventfd_stat, 747a202823SKonstantin Belousov .fo_close = eventfd_close, 757a202823SKonstantin Belousov .fo_chmod = invfo_chmod, 767a202823SKonstantin Belousov .fo_chown = invfo_chown, 777a202823SKonstantin Belousov .fo_sendfile = invfo_sendfile, 787a202823SKonstantin Belousov .fo_fill_kinfo = eventfd_fill_kinfo, 79f28526e9SKonstantin Belousov .fo_cmp = file_kcmp_generic, 807a202823SKonstantin Belousov .fo_flags = DFLAG_PASSABLE 817a202823SKonstantin Belousov }; 827a202823SKonstantin Belousov 837a202823SKonstantin Belousov static void filt_eventfddetach(struct knote *kn); 847a202823SKonstantin Belousov static int filt_eventfdread(struct knote *kn, long hint); 857a202823SKonstantin Belousov static int filt_eventfdwrite(struct knote *kn, long hint); 867a202823SKonstantin Belousov 87*ef9ffb85SMark Johnston static const struct filterops eventfd_rfiltops = { 887a202823SKonstantin Belousov .f_isfd = 1, 897a202823SKonstantin Belousov .f_detach = filt_eventfddetach, 907a202823SKonstantin Belousov .f_event = filt_eventfdread 917a202823SKonstantin Belousov }; 927a202823SKonstantin Belousov 93*ef9ffb85SMark Johnston static const struct filterops eventfd_wfiltops = { 947a202823SKonstantin Belousov .f_isfd = 1, 957a202823SKonstantin Belousov .f_detach = filt_eventfddetach, 967a202823SKonstantin Belousov .f_event = filt_eventfdwrite 977a202823SKonstantin Belousov }; 987a202823SKonstantin Belousov 997a202823SKonstantin Belousov struct eventfd { 1007a202823SKonstantin Belousov eventfd_t efd_count; 1017a202823SKonstantin Belousov uint32_t efd_flags; 1027a202823SKonstantin Belousov struct selinfo efd_sel; 1037a202823SKonstantin Belousov struct mtx efd_lock; 1047a202823SKonstantin Belousov }; 1057a202823SKonstantin Belousov 1067a202823SKonstantin Belousov int 1077a202823SKonstantin Belousov eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 1087a202823SKonstantin Belousov int flags) 1097a202823SKonstantin Belousov { 1107a202823SKonstantin Belousov struct eventfd *efd; 1117a202823SKonstantin Belousov int fflags; 1127a202823SKonstantin Belousov 1137a202823SKonstantin Belousov AUDIT_ARG_FFLAGS(flags); 1147a202823SKonstantin Belousov AUDIT_ARG_VALUE(initval); 1157a202823SKonstantin Belousov 1167a202823SKonstantin Belousov efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 1177a202823SKonstantin Belousov efd->efd_flags = flags; 1187a202823SKonstantin Belousov efd->efd_count = initval; 1197a202823SKonstantin Belousov mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 1207a202823SKonstantin Belousov knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 1217a202823SKonstantin Belousov 1227a202823SKonstantin Belousov fflags = FREAD | FWRITE; 1237a202823SKonstantin Belousov if ((flags & EFD_NONBLOCK) != 0) 1247a202823SKonstantin Belousov fflags |= FNONBLOCK; 1257a202823SKonstantin Belousov finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 1267a202823SKonstantin Belousov 1277a202823SKonstantin Belousov return (0); 1287a202823SKonstantin Belousov } 1297a202823SKonstantin Belousov 1307a202823SKonstantin Belousov static int 1317a202823SKonstantin Belousov eventfd_close(struct file *fp, struct thread *td) 1327a202823SKonstantin Belousov { 1337a202823SKonstantin Belousov struct eventfd *efd; 1347a202823SKonstantin Belousov 1357a202823SKonstantin Belousov efd = fp->f_data; 1367a202823SKonstantin Belousov seldrain(&efd->efd_sel); 1377a202823SKonstantin Belousov knlist_destroy(&efd->efd_sel.si_note); 1387a202823SKonstantin Belousov mtx_destroy(&efd->efd_lock); 1397a202823SKonstantin Belousov free(efd, M_EVENTFD); 1407a202823SKonstantin Belousov return (0); 1417a202823SKonstantin Belousov } 1427a202823SKonstantin Belousov 1437a202823SKonstantin Belousov static int 1447a202823SKonstantin Belousov eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 1457a202823SKonstantin Belousov int flags, struct thread *td) 1467a202823SKonstantin Belousov { 1477a202823SKonstantin Belousov struct eventfd *efd; 1487a202823SKonstantin Belousov eventfd_t count; 1497a202823SKonstantin Belousov int error; 1507a202823SKonstantin Belousov 1517a202823SKonstantin Belousov if (uio->uio_resid < sizeof(eventfd_t)) 1527a202823SKonstantin Belousov return (EINVAL); 1537a202823SKonstantin Belousov 1547a202823SKonstantin Belousov error = 0; 1557a202823SKonstantin Belousov efd = fp->f_data; 1567a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 1577a202823SKonstantin Belousov while (error == 0 && efd->efd_count == 0) { 1587a202823SKonstantin Belousov if ((fp->f_flag & FNONBLOCK) != 0) { 1597a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1607a202823SKonstantin Belousov return (EAGAIN); 1617a202823SKonstantin Belousov } 1627a202823SKonstantin Belousov error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 1637a202823SKonstantin Belousov "efdrd", 0); 1647a202823SKonstantin Belousov } 1657a202823SKonstantin Belousov if (error == 0) { 1667a202823SKonstantin Belousov MPASS(efd->efd_count > 0); 1677a202823SKonstantin Belousov if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 1687a202823SKonstantin Belousov count = 1; 1697a202823SKonstantin Belousov --efd->efd_count; 1707a202823SKonstantin Belousov } else { 1717a202823SKonstantin Belousov count = efd->efd_count; 1727a202823SKonstantin Belousov efd->efd_count = 0; 1737a202823SKonstantin Belousov } 1747a202823SKonstantin Belousov KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 1757a202823SKonstantin Belousov selwakeup(&efd->efd_sel); 1767a202823SKonstantin Belousov wakeup(&efd->efd_count); 1777a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1787a202823SKonstantin Belousov error = uiomove(&count, sizeof(eventfd_t), uio); 1797a202823SKonstantin Belousov } else 1807a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1817a202823SKonstantin Belousov 1827a202823SKonstantin Belousov return (error); 1837a202823SKonstantin Belousov } 1847a202823SKonstantin Belousov 1857a202823SKonstantin Belousov static int 1867a202823SKonstantin Belousov eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 1877a202823SKonstantin Belousov int flags, struct thread *td) 1887a202823SKonstantin Belousov { 1897a202823SKonstantin Belousov struct eventfd *efd; 1907a202823SKonstantin Belousov eventfd_t count; 1917a202823SKonstantin Belousov int error; 1927a202823SKonstantin Belousov 1937a202823SKonstantin Belousov if (uio->uio_resid < sizeof(eventfd_t)) 1947a202823SKonstantin Belousov return (EINVAL); 1957a202823SKonstantin Belousov 1967a202823SKonstantin Belousov error = uiomove(&count, sizeof(eventfd_t), uio); 1977a202823SKonstantin Belousov if (error != 0) 1987a202823SKonstantin Belousov return (error); 1997a202823SKonstantin Belousov if (count == UINT64_MAX) 2007a202823SKonstantin Belousov return (EINVAL); 2017a202823SKonstantin Belousov 2027a202823SKonstantin Belousov efd = fp->f_data; 2037a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2047a202823SKonstantin Belousov retry: 2057a202823SKonstantin Belousov if (UINT64_MAX - efd->efd_count <= count) { 2067a202823SKonstantin Belousov if ((fp->f_flag & FNONBLOCK) != 0) { 2077a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2087a202823SKonstantin Belousov /* Do not not return the number of bytes written */ 2097a202823SKonstantin Belousov uio->uio_resid += sizeof(eventfd_t); 2107a202823SKonstantin Belousov return (EAGAIN); 2117a202823SKonstantin Belousov } 2127a202823SKonstantin Belousov error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 2137a202823SKonstantin Belousov PCATCH, "efdwr", 0); 2147a202823SKonstantin Belousov if (error == 0) 2157a202823SKonstantin Belousov goto retry; 2167a202823SKonstantin Belousov } 2177a202823SKonstantin Belousov if (error == 0) { 2187a202823SKonstantin Belousov MPASS(UINT64_MAX - efd->efd_count > count); 2197a202823SKonstantin Belousov efd->efd_count += count; 2207a202823SKonstantin Belousov KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 2217a202823SKonstantin Belousov selwakeup(&efd->efd_sel); 2227a202823SKonstantin Belousov wakeup(&efd->efd_count); 2237a202823SKonstantin Belousov } 2247a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2257a202823SKonstantin Belousov 2267a202823SKonstantin Belousov return (error); 2277a202823SKonstantin Belousov } 2287a202823SKonstantin Belousov 2297a202823SKonstantin Belousov static int 2307a202823SKonstantin Belousov eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 2317a202823SKonstantin Belousov struct thread *td) 2327a202823SKonstantin Belousov { 2337a202823SKonstantin Belousov struct eventfd *efd; 2347a202823SKonstantin Belousov int revents; 2357a202823SKonstantin Belousov 2367a202823SKonstantin Belousov efd = fp->f_data; 2377a202823SKonstantin Belousov revents = 0; 2387a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2397a202823SKonstantin Belousov if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 2407a202823SKonstantin Belousov revents |= events & (POLLIN | POLLRDNORM); 2417a202823SKonstantin Belousov if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 2427a202823SKonstantin Belousov efd->efd_count) 2437a202823SKonstantin Belousov revents |= events & (POLLOUT | POLLWRNORM); 2447a202823SKonstantin Belousov if (revents == 0) 2457a202823SKonstantin Belousov selrecord(td, &efd->efd_sel); 2467a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2477a202823SKonstantin Belousov 2487a202823SKonstantin Belousov return (revents); 2497a202823SKonstantin Belousov } 2507a202823SKonstantin Belousov 2517a202823SKonstantin Belousov static int 2527a202823SKonstantin Belousov eventfd_kqfilter(struct file *fp, struct knote *kn) 2537a202823SKonstantin Belousov { 2547a202823SKonstantin Belousov struct eventfd *efd = fp->f_data; 2557a202823SKonstantin Belousov 2567a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2577a202823SKonstantin Belousov switch (kn->kn_filter) { 2587a202823SKonstantin Belousov case EVFILT_READ: 2597a202823SKonstantin Belousov kn->kn_fop = &eventfd_rfiltops; 2607a202823SKonstantin Belousov break; 2617a202823SKonstantin Belousov case EVFILT_WRITE: 2627a202823SKonstantin Belousov kn->kn_fop = &eventfd_wfiltops; 2637a202823SKonstantin Belousov break; 2647a202823SKonstantin Belousov default: 2657a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2667a202823SKonstantin Belousov return (EINVAL); 2677a202823SKonstantin Belousov } 2687a202823SKonstantin Belousov 2697a202823SKonstantin Belousov kn->kn_hook = efd; 2707a202823SKonstantin Belousov knlist_add(&efd->efd_sel.si_note, kn, 1); 2717a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2727a202823SKonstantin Belousov 2737a202823SKonstantin Belousov return (0); 2747a202823SKonstantin Belousov } 2757a202823SKonstantin Belousov 2767a202823SKonstantin Belousov static void 2777a202823SKonstantin Belousov filt_eventfddetach(struct knote *kn) 2787a202823SKonstantin Belousov { 2797a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 2807a202823SKonstantin Belousov 2817a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2827a202823SKonstantin Belousov knlist_remove(&efd->efd_sel.si_note, kn, 1); 2837a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2847a202823SKonstantin Belousov } 2857a202823SKonstantin Belousov 2867a202823SKonstantin Belousov static int 2877a202823SKonstantin Belousov filt_eventfdread(struct knote *kn, long hint) 2887a202823SKonstantin Belousov { 2897a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 2907a202823SKonstantin Belousov int ret; 2917a202823SKonstantin Belousov 2927a202823SKonstantin Belousov mtx_assert(&efd->efd_lock, MA_OWNED); 2937a202823SKonstantin Belousov kn->kn_data = (int64_t)efd->efd_count; 2947a202823SKonstantin Belousov ret = efd->efd_count > 0; 2957a202823SKonstantin Belousov 2967a202823SKonstantin Belousov return (ret); 2977a202823SKonstantin Belousov } 2987a202823SKonstantin Belousov 2997a202823SKonstantin Belousov static int 3007a202823SKonstantin Belousov filt_eventfdwrite(struct knote *kn, long hint) 3017a202823SKonstantin Belousov { 3027a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 3037a202823SKonstantin Belousov int ret; 3047a202823SKonstantin Belousov 3057a202823SKonstantin Belousov mtx_assert(&efd->efd_lock, MA_OWNED); 3067a202823SKonstantin Belousov kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 3077a202823SKonstantin Belousov ret = UINT64_MAX - 1 > efd->efd_count; 3087a202823SKonstantin Belousov 3097a202823SKonstantin Belousov return (ret); 3107a202823SKonstantin Belousov } 3117a202823SKonstantin Belousov 3127a202823SKonstantin Belousov static int 3137a202823SKonstantin Belousov eventfd_ioctl(struct file *fp, u_long cmd, void *data, 3147a202823SKonstantin Belousov struct ucred *active_cred, struct thread *td) 3157a202823SKonstantin Belousov { 3167a202823SKonstantin Belousov switch (cmd) { 3177a202823SKonstantin Belousov case FIONBIO: 3187a202823SKonstantin Belousov case FIOASYNC: 3197a202823SKonstantin Belousov return (0); 3207a202823SKonstantin Belousov } 3217a202823SKonstantin Belousov 3227a202823SKonstantin Belousov return (ENOTTY); 3237a202823SKonstantin Belousov } 3247a202823SKonstantin Belousov 3257a202823SKonstantin Belousov static int 3262b68eb8eSMateusz Guzik eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 3277a202823SKonstantin Belousov { 3287a202823SKonstantin Belousov bzero((void *)st, sizeof *st); 3297a202823SKonstantin Belousov st->st_mode = S_IFIFO; 3307a202823SKonstantin Belousov return (0); 3317a202823SKonstantin Belousov } 3327a202823SKonstantin Belousov 3337a202823SKonstantin Belousov static int 3347a202823SKonstantin Belousov eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 3357a202823SKonstantin Belousov { 3367a202823SKonstantin Belousov struct eventfd *efd = fp->f_data; 3377a202823SKonstantin Belousov 3387a202823SKonstantin Belousov kif->kf_type = KF_TYPE_EVENTFD; 3397a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 3407a202823SKonstantin Belousov kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 3417a202823SKonstantin Belousov kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 3428c309d48SDamjan Jovanovic kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd; 3437a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 3447a202823SKonstantin Belousov return (0); 3457a202823SKonstantin Belousov } 346