10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 23*258Scth * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/types.h> 300Sstevel@tonic-gate #include <sys/devops.h> 310Sstevel@tonic-gate #include <sys/conf.h> 320Sstevel@tonic-gate #include <sys/modctl.h> 330Sstevel@tonic-gate #include <sys/sunddi.h> 340Sstevel@tonic-gate #include <sys/stat.h> 350Sstevel@tonic-gate #include <sys/poll_impl.h> 360Sstevel@tonic-gate #include <sys/errno.h> 370Sstevel@tonic-gate #include <sys/kmem.h> 380Sstevel@tonic-gate #include <sys/mkdev.h> 390Sstevel@tonic-gate #include <sys/debug.h> 400Sstevel@tonic-gate #include <sys/file.h> 410Sstevel@tonic-gate #include <sys/sysmacros.h> 420Sstevel@tonic-gate #include <sys/systm.h> 430Sstevel@tonic-gate #include <sys/bitmap.h> 440Sstevel@tonic-gate #include <sys/devpoll.h> 450Sstevel@tonic-gate #include <sys/rctl.h> 460Sstevel@tonic-gate #include <sys/resource.h> 470Sstevel@tonic-gate 480Sstevel@tonic-gate #define RESERVED 1 490Sstevel@tonic-gate 500Sstevel@tonic-gate /* local data struct */ 510Sstevel@tonic-gate static dp_entry_t **devpolltbl; /* dev poll entries */ 520Sstevel@tonic-gate static size_t dptblsize; 530Sstevel@tonic-gate 540Sstevel@tonic-gate static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 550Sstevel@tonic-gate int devpoll_init; /* is /dev/poll initialized already */ 560Sstevel@tonic-gate 570Sstevel@tonic-gate /* device local functions */ 580Sstevel@tonic-gate 590Sstevel@tonic-gate static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 600Sstevel@tonic-gate static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 610Sstevel@tonic-gate static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 620Sstevel@tonic-gate int *rvalp); 630Sstevel@tonic-gate static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 640Sstevel@tonic-gate struct pollhead **phpp); 650Sstevel@tonic-gate static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 660Sstevel@tonic-gate static dev_info_t *dpdevi; 670Sstevel@tonic-gate 680Sstevel@tonic-gate 690Sstevel@tonic-gate static struct cb_ops dp_cb_ops = { 700Sstevel@tonic-gate dpopen, /* open */ 710Sstevel@tonic-gate dpclose, /* close */ 720Sstevel@tonic-gate nodev, /* strategy */ 730Sstevel@tonic-gate nodev, /* print */ 740Sstevel@tonic-gate nodev, /* dump */ 750Sstevel@tonic-gate nodev, /* read */ 760Sstevel@tonic-gate dpwrite, /* write */ 770Sstevel@tonic-gate dpioctl, /* ioctl */ 780Sstevel@tonic-gate nodev, /* devmap */ 790Sstevel@tonic-gate nodev, /* mmap */ 800Sstevel@tonic-gate nodev, /* segmap */ 810Sstevel@tonic-gate dppoll, /* poll */ 82*258Scth ddi_prop_op, /* prop_op */ 830Sstevel@tonic-gate (struct streamtab *)0, /* streamtab */ 84*258Scth D_MP, /* flags */ 85*258Scth CB_REV, /* cb_ops revision */ 86*258Scth nodev, /* aread */ 87*258Scth nodev /* awrite */ 880Sstevel@tonic-gate }; 890Sstevel@tonic-gate 900Sstevel@tonic-gate static int dpattach(dev_info_t *, ddi_attach_cmd_t); 910Sstevel@tonic-gate static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 920Sstevel@tonic-gate static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 930Sstevel@tonic-gate 940Sstevel@tonic-gate static struct dev_ops dp_ops = { 950Sstevel@tonic-gate DEVO_REV, /* devo_rev */ 960Sstevel@tonic-gate 0, /* refcnt */ 970Sstevel@tonic-gate dpinfo, /* info */ 980Sstevel@tonic-gate nulldev, /* identify */ 990Sstevel@tonic-gate nulldev, /* probe */ 1000Sstevel@tonic-gate dpattach, /* attach */ 1010Sstevel@tonic-gate dpdetach, /* detach */ 1020Sstevel@tonic-gate nodev, /* reset */ 1030Sstevel@tonic-gate &dp_cb_ops, /* driver operations */ 1040Sstevel@tonic-gate (struct bus_ops *)NULL, /* bus operations */ 1050Sstevel@tonic-gate nulldev /* power */ 1060Sstevel@tonic-gate }; 1070Sstevel@tonic-gate 1080Sstevel@tonic-gate 1090Sstevel@tonic-gate static struct modldrv modldrv = { 1100Sstevel@tonic-gate &mod_driverops, /* type of module - a driver */ 1110Sstevel@tonic-gate "Dev Poll driver %I%", 1120Sstevel@tonic-gate &dp_ops, 1130Sstevel@tonic-gate }; 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate static struct modlinkage modlinkage = { 1160Sstevel@tonic-gate MODREV_1, 1170Sstevel@tonic-gate (void *)&modldrv, 1180Sstevel@tonic-gate NULL 1190Sstevel@tonic-gate }; 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate /* 1220Sstevel@tonic-gate * Locking Design 1230Sstevel@tonic-gate * 1240Sstevel@tonic-gate * The /dev/poll driver shares most of its code with poll sys call whose 1250Sstevel@tonic-gate * code is in common/syscall/poll.c. In poll(2) design, the pollcache 1260Sstevel@tonic-gate * structure is per lwp. An implicit assumption is made there that some 1270Sstevel@tonic-gate * portion of pollcache will never be touched by other lwps. E.g., in 1280Sstevel@tonic-gate * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 1290Sstevel@tonic-gate * This assumption is not true for /dev/poll; hence the need for extra 1300Sstevel@tonic-gate * locking. 1310Sstevel@tonic-gate * 1320Sstevel@tonic-gate * To allow more paralellism, each /dev/poll file descriptor (indexed by 1330Sstevel@tonic-gate * minor number) has its own lock. Since read (dpioctl) is a much more 1340Sstevel@tonic-gate * frequent operation than write, we want to allow multiple reads on same 1350Sstevel@tonic-gate * /dev/poll fd. However, we prevent writes from being starved by giving 1360Sstevel@tonic-gate * priority to write operation. Theoretically writes can starve reads as 1370Sstevel@tonic-gate * well. But in pratical sense this is not important because (1) writes 1380Sstevel@tonic-gate * happens less often than reads, and (2) write operation defines the 1390Sstevel@tonic-gate * content of poll fd a cache set. If writes happens so often that they 1400Sstevel@tonic-gate * can starve reads, that means the cached set is very unstable. It may 1410Sstevel@tonic-gate * not make sense to read an unstable cache set anyway. Therefore, the 1420Sstevel@tonic-gate * writers starving readers case is not handled in this design. 1430Sstevel@tonic-gate */ 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate int 1460Sstevel@tonic-gate _init() 1470Sstevel@tonic-gate { 1480Sstevel@tonic-gate int error; 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate dptblsize = DEVPOLLSIZE; 1510Sstevel@tonic-gate devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 1520Sstevel@tonic-gate mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 1530Sstevel@tonic-gate devpoll_init = 1; 1540Sstevel@tonic-gate if ((error = mod_install(&modlinkage)) != 0) { 1550Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1560Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1570Sstevel@tonic-gate devpoll_init = 0; 1580Sstevel@tonic-gate } 1590Sstevel@tonic-gate return (error); 1600Sstevel@tonic-gate } 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate int 1630Sstevel@tonic-gate _fini() 1640Sstevel@tonic-gate { 1650Sstevel@tonic-gate int error; 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate if ((error = mod_remove(&modlinkage)) != 0) { 1680Sstevel@tonic-gate return (error); 1690Sstevel@tonic-gate } 1700Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1710Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1720Sstevel@tonic-gate return (0); 1730Sstevel@tonic-gate } 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate int 1760Sstevel@tonic-gate _info(struct modinfo *modinfop) 1770Sstevel@tonic-gate { 1780Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 1790Sstevel@tonic-gate } 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /*ARGSUSED*/ 1820Sstevel@tonic-gate static int 1830Sstevel@tonic-gate dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1840Sstevel@tonic-gate { 1850Sstevel@tonic-gate if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 1860Sstevel@tonic-gate == DDI_FAILURE) { 1870Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 1880Sstevel@tonic-gate return (DDI_FAILURE); 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate dpdevi = devi; 1910Sstevel@tonic-gate return (DDI_SUCCESS); 1920Sstevel@tonic-gate } 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate static int 1950Sstevel@tonic-gate dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1960Sstevel@tonic-gate { 1970Sstevel@tonic-gate if (cmd != DDI_DETACH) 1980Sstevel@tonic-gate return (DDI_FAILURE); 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 2010Sstevel@tonic-gate return (DDI_SUCCESS); 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate /* ARGSUSED */ 2050Sstevel@tonic-gate static int 2060Sstevel@tonic-gate dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2070Sstevel@tonic-gate { 2080Sstevel@tonic-gate int error; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate switch (infocmd) { 2110Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 2120Sstevel@tonic-gate *result = (void *)dpdevi; 2130Sstevel@tonic-gate error = DDI_SUCCESS; 2140Sstevel@tonic-gate break; 2150Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 2160Sstevel@tonic-gate *result = (void *)0; 2170Sstevel@tonic-gate error = DDI_SUCCESS; 2180Sstevel@tonic-gate break; 2190Sstevel@tonic-gate default: 2200Sstevel@tonic-gate error = DDI_FAILURE; 2210Sstevel@tonic-gate } 2220Sstevel@tonic-gate return (error); 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate /* 2260Sstevel@tonic-gate * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 2270Sstevel@tonic-gate * differences are: (1) /dev/poll requires scanning the bitmap starting at 2280Sstevel@tonic-gate * where it was stopped last time, instead of always starting from 0, 2290Sstevel@tonic-gate * (2) since user may not have cleaned up the cached fds when they are 2300Sstevel@tonic-gate * closed, some polldats in cache may refer to closed or reused fds. We 2310Sstevel@tonic-gate * need to check for those cases. 2320Sstevel@tonic-gate * 2330Sstevel@tonic-gate * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 2340Sstevel@tonic-gate * poll(2) caches but NOT for /dev/poll caches. So expect some 2350Sstevel@tonic-gate * stale entries! 2360Sstevel@tonic-gate */ 2370Sstevel@tonic-gate static int 2380Sstevel@tonic-gate dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) 2390Sstevel@tonic-gate { 2400Sstevel@tonic-gate int start, ostart, end; 2410Sstevel@tonic-gate int fdcnt, fd; 2420Sstevel@tonic-gate boolean_t done; 2430Sstevel@tonic-gate file_t *fp; 2440Sstevel@tonic-gate short revent; 2450Sstevel@tonic-gate boolean_t no_wrap; 2460Sstevel@tonic-gate pollhead_t *php; 2470Sstevel@tonic-gate polldat_t *pdp; 2480Sstevel@tonic-gate int error = 0; 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pcp->pc_lock)); 2510Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 2520Sstevel@tonic-gate /* 2530Sstevel@tonic-gate * No Need to search because no poll fd 2540Sstevel@tonic-gate * has been cached. 2550Sstevel@tonic-gate */ 2560Sstevel@tonic-gate return (error); 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate retry: 2590Sstevel@tonic-gate start = ostart = pcp->pc_mapstart; 2600Sstevel@tonic-gate end = pcp->pc_mapend; 2610Sstevel@tonic-gate php = NULL; 2620Sstevel@tonic-gate 2630Sstevel@tonic-gate if (start == 0) { 2640Sstevel@tonic-gate /* 2650Sstevel@tonic-gate * started from every begining, no need to wrap around. 2660Sstevel@tonic-gate */ 2670Sstevel@tonic-gate no_wrap = B_TRUE; 2680Sstevel@tonic-gate } else { 2690Sstevel@tonic-gate no_wrap = B_FALSE; 2700Sstevel@tonic-gate } 2710Sstevel@tonic-gate done = B_FALSE; 2720Sstevel@tonic-gate fdcnt = 0; 2730Sstevel@tonic-gate while ((fdcnt < nfds) && !done) { 2740Sstevel@tonic-gate php = NULL; 2750Sstevel@tonic-gate revent = 0; 2760Sstevel@tonic-gate /* 2770Sstevel@tonic-gate * Examine the bit map in a circular fashion 2780Sstevel@tonic-gate * to avoid starvation. Always resume from 2790Sstevel@tonic-gate * last stop. Scan till end of the map. Then 2800Sstevel@tonic-gate * wrap around. 2810Sstevel@tonic-gate */ 2820Sstevel@tonic-gate fd = bt_getlowbit(pcp->pc_bitmap, start, end); 2830Sstevel@tonic-gate ASSERT(fd <= end); 2840Sstevel@tonic-gate if (fd >= 0) { 2850Sstevel@tonic-gate if (fd == end) { 2860Sstevel@tonic-gate if (no_wrap) { 2870Sstevel@tonic-gate done = B_TRUE; 2880Sstevel@tonic-gate } else { 2890Sstevel@tonic-gate start = 0; 2900Sstevel@tonic-gate end = ostart - 1; 2910Sstevel@tonic-gate no_wrap = B_TRUE; 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate } else { 2940Sstevel@tonic-gate start = fd + 1; 2950Sstevel@tonic-gate } 2960Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 2970Sstevel@tonic-gate ASSERT(pdp != NULL); 2980Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 2990Sstevel@tonic-gate if (pdp->pd_fp == NULL) { 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * The fd is POLLREMOVed. This fd is 3020Sstevel@tonic-gate * logically no longer cached. So move 3030Sstevel@tonic-gate * on to the next one. 3040Sstevel@tonic-gate */ 3050Sstevel@tonic-gate continue; 3060Sstevel@tonic-gate } 3070Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 3080Sstevel@tonic-gate /* 3090Sstevel@tonic-gate * The fd has been closed, but user has not 3100Sstevel@tonic-gate * done a POLLREMOVE on this fd yet. Instead 3110Sstevel@tonic-gate * of cleaning it here implicitly, we return 3120Sstevel@tonic-gate * POLLNVAL. This is consistent with poll(2) 3130Sstevel@tonic-gate * polling a closed fd. Hope this will remind 3140Sstevel@tonic-gate * user to do a POLLREMOVE. 3150Sstevel@tonic-gate */ 3160Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3170Sstevel@tonic-gate pfdp[fdcnt].revents = POLLNVAL; 3180Sstevel@tonic-gate fdcnt++; 3190Sstevel@tonic-gate continue; 3200Sstevel@tonic-gate } 3210Sstevel@tonic-gate if (fp != pdp->pd_fp) { 3220Sstevel@tonic-gate /* 3230Sstevel@tonic-gate * user is polling on a cached fd which was 3240Sstevel@tonic-gate * closed and then reused. Unfortunately 3250Sstevel@tonic-gate * there is no good way to inform user. 3260Sstevel@tonic-gate * If the file struct is also reused, we 3270Sstevel@tonic-gate * may not be able to detect the fd reuse 3280Sstevel@tonic-gate * at all. As long as this does not 3290Sstevel@tonic-gate * cause system failure and/or memory leak, 3300Sstevel@tonic-gate * we will play along. Man page states if 3310Sstevel@tonic-gate * user does not clean up closed fds, polling 3320Sstevel@tonic-gate * results will be indeterministic. 3330Sstevel@tonic-gate * 3340Sstevel@tonic-gate * XXX - perhaps log the detection of fd 3350Sstevel@tonic-gate * reuse? 3360Sstevel@tonic-gate */ 3370Sstevel@tonic-gate pdp->pd_fp = fp; 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate /* 3400Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 3410Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 3420Sstevel@tonic-gate * cleaner solution if we could pass pcp as 3430Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 3440Sstevel@tonic-gate * of implicitly passing it using thread_t 3450Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 3460Sstevel@tonic-gate * interface will require all driver/file system 3470Sstevel@tonic-gate * poll routine to change. May want to revisit 3480Sstevel@tonic-gate * the tradeoff later. 3490Sstevel@tonic-gate */ 3500Sstevel@tonic-gate curthread->t_pollcache = pcp; 3510Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 3520Sstevel@tonic-gate &revent, &php); 3530Sstevel@tonic-gate curthread->t_pollcache = NULL; 3540Sstevel@tonic-gate releasef(fd); 3550Sstevel@tonic-gate if (error != 0) { 3560Sstevel@tonic-gate break; 3570Sstevel@tonic-gate } 3580Sstevel@tonic-gate /* 3590Sstevel@tonic-gate * layered devices (e.g. console driver) 3600Sstevel@tonic-gate * may change the vnode and thus the pollhead 3610Sstevel@tonic-gate * pointer out from underneath us. 3620Sstevel@tonic-gate */ 3630Sstevel@tonic-gate if (php != NULL && pdp->pd_php != NULL && 3640Sstevel@tonic-gate php != pdp->pd_php) { 3650Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 3660Sstevel@tonic-gate pdp->pd_php = php; 3670Sstevel@tonic-gate pollhead_insert(php, pdp); 3680Sstevel@tonic-gate /* 3690Sstevel@tonic-gate * The bit should still be set. 3700Sstevel@tonic-gate */ 3710Sstevel@tonic-gate ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 3720Sstevel@tonic-gate goto retry; 3730Sstevel@tonic-gate } 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate if (revent != 0) { 3760Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3770Sstevel@tonic-gate pfdp[fdcnt].events = pdp->pd_events; 3780Sstevel@tonic-gate pfdp[fdcnt].revents = revent; 3790Sstevel@tonic-gate fdcnt++; 3800Sstevel@tonic-gate } else if (php != NULL) { 3810Sstevel@tonic-gate /* 3820Sstevel@tonic-gate * We clear a bit or cache a poll fd if 3830Sstevel@tonic-gate * the driver returns a poll head ptr, 3840Sstevel@tonic-gate * which is expected in the case of 0 3850Sstevel@tonic-gate * revents. Some buggy driver may return 3860Sstevel@tonic-gate * NULL php pointer with 0 revents. In 3870Sstevel@tonic-gate * this case, we just treat the driver as 3880Sstevel@tonic-gate * "noncachable" and not clearing the bit 3890Sstevel@tonic-gate * in bitmap. 3900Sstevel@tonic-gate */ 3910Sstevel@tonic-gate if ((pdp->pd_php != NULL) && 3920Sstevel@tonic-gate ((pcp->pc_flag & T_POLLWAKE) == 0)) { 3930Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 3940Sstevel@tonic-gate } 3950Sstevel@tonic-gate if (pdp->pd_php == NULL) { 3960Sstevel@tonic-gate pollhead_insert(php, pdp); 3970Sstevel@tonic-gate pdp->pd_php = php; 3980Sstevel@tonic-gate } 3990Sstevel@tonic-gate } 4000Sstevel@tonic-gate } else { 4010Sstevel@tonic-gate /* 4020Sstevel@tonic-gate * No bit set in the range. Check for wrap around. 4030Sstevel@tonic-gate */ 4040Sstevel@tonic-gate if (!no_wrap) { 4050Sstevel@tonic-gate start = 0; 4060Sstevel@tonic-gate end = ostart - 1; 4070Sstevel@tonic-gate no_wrap = B_TRUE; 4080Sstevel@tonic-gate } else { 4090Sstevel@tonic-gate done = B_TRUE; 4100Sstevel@tonic-gate } 4110Sstevel@tonic-gate } 4120Sstevel@tonic-gate } 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate if (!done) { 4150Sstevel@tonic-gate pcp->pc_mapstart = start; 4160Sstevel@tonic-gate } 4170Sstevel@tonic-gate ASSERT(*fdcntp == 0); 4180Sstevel@tonic-gate *fdcntp = fdcnt; 4190Sstevel@tonic-gate return (error); 4200Sstevel@tonic-gate } 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate /*ARGSUSED*/ 4230Sstevel@tonic-gate static int 4240Sstevel@tonic-gate dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 4250Sstevel@tonic-gate { 4260Sstevel@tonic-gate minor_t minordev; 4270Sstevel@tonic-gate dp_entry_t *dpep; 4280Sstevel@tonic-gate pollcache_t *pcp; 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate ASSERT(devpoll_init); 4310Sstevel@tonic-gate ASSERT(dptblsize <= MAXMIN); 4320Sstevel@tonic-gate mutex_enter(&devpoll_lock); 4330Sstevel@tonic-gate for (minordev = 0; minordev < dptblsize; minordev++) { 4340Sstevel@tonic-gate if (devpolltbl[minordev] == NULL) { 4350Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 4360Sstevel@tonic-gate break; 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate } 4390Sstevel@tonic-gate if (minordev == dptblsize) { 4400Sstevel@tonic-gate dp_entry_t **newtbl; 4410Sstevel@tonic-gate size_t oldsize; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate /* 4440Sstevel@tonic-gate * Used up every entry in the existing devpoll table. 4450Sstevel@tonic-gate * Grow the table by DEVPOLLSIZE. 4460Sstevel@tonic-gate */ 4470Sstevel@tonic-gate if ((oldsize = dptblsize) >= MAXMIN) { 4480Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4490Sstevel@tonic-gate return (ENXIO); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate dptblsize += DEVPOLLSIZE; 4520Sstevel@tonic-gate if (dptblsize > MAXMIN) { 4530Sstevel@tonic-gate dptblsize = MAXMIN; 4540Sstevel@tonic-gate } 4550Sstevel@tonic-gate newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 4560Sstevel@tonic-gate bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 4570Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 4580Sstevel@tonic-gate devpolltbl = newtbl; 4590Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 4640Sstevel@tonic-gate /* 4650Sstevel@tonic-gate * allocate a pollcache skeleton here. Delay allocating bitmap 4660Sstevel@tonic-gate * structures until dpwrite() time, since we don't know the 4670Sstevel@tonic-gate * optimal size yet. 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate pcp = pcache_alloc(); 4700Sstevel@tonic-gate dpep->dpe_pcache = pcp; 4710Sstevel@tonic-gate pcp->pc_pid = curproc->p_pid; 4720Sstevel@tonic-gate *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 4730Sstevel@tonic-gate mutex_enter(&devpoll_lock); 4740Sstevel@tonic-gate ASSERT(minordev < dptblsize); 4750Sstevel@tonic-gate ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 4760Sstevel@tonic-gate devpolltbl[minordev] = dpep; 4770Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4780Sstevel@tonic-gate return (0); 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate /* 4820Sstevel@tonic-gate * Write to dev/poll add/remove fd's to/from a cached poll fd set, 4830Sstevel@tonic-gate * or change poll events for a watched fd. 4840Sstevel@tonic-gate */ 4850Sstevel@tonic-gate /*ARGSUSED*/ 4860Sstevel@tonic-gate static int 4870Sstevel@tonic-gate dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 4880Sstevel@tonic-gate { 4890Sstevel@tonic-gate minor_t minor; 4900Sstevel@tonic-gate dp_entry_t *dpep; 4910Sstevel@tonic-gate pollcache_t *pcp; 4920Sstevel@tonic-gate pollfd_t *pollfdp, *pfdp; 4930Sstevel@tonic-gate int error; 4940Sstevel@tonic-gate ssize_t uiosize; 4950Sstevel@tonic-gate nfds_t pollfdnum; 4960Sstevel@tonic-gate struct pollhead *php = NULL; 4970Sstevel@tonic-gate polldat_t *pdp; 4980Sstevel@tonic-gate int fd; 4990Sstevel@tonic-gate file_t *fp; 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate minor = getminor(dev); 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate mutex_enter(&devpoll_lock); 5040Sstevel@tonic-gate ASSERT(minor < dptblsize); 5050Sstevel@tonic-gate dpep = devpolltbl[minor]; 5060Sstevel@tonic-gate ASSERT(dpep != NULL); 5070Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5080Sstevel@tonic-gate pcp = dpep->dpe_pcache; 5090Sstevel@tonic-gate if (curproc->p_pid != pcp->pc_pid) { 5100Sstevel@tonic-gate return (EACCES); 5110Sstevel@tonic-gate } 5120Sstevel@tonic-gate uiosize = uiop->uio_resid; 5130Sstevel@tonic-gate pollfdnum = uiosize / sizeof (pollfd_t); 5140Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 5150Sstevel@tonic-gate if (pollfdnum > (uint_t)rctl_enforced_value( 5160Sstevel@tonic-gate rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 5170Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 5180Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_SAFE); 5190Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5200Sstevel@tonic-gate return (set_errno(EINVAL)); 5210Sstevel@tonic-gate } 5220Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5230Sstevel@tonic-gate /* 5240Sstevel@tonic-gate * Copy in the pollfd array. Walk through the array and add 5250Sstevel@tonic-gate * each polled fd to the cached set. 5260Sstevel@tonic-gate */ 5270Sstevel@tonic-gate pollfdp = kmem_alloc(uiosize, KM_SLEEP); 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate /* 5300Sstevel@tonic-gate * Although /dev/poll uses the write(2) interface to cache fds, it's 5310Sstevel@tonic-gate * not supposed to function as a seekable device. To prevent offset 5320Sstevel@tonic-gate * from growing and eventually exceed the maximum, reset the offset 5330Sstevel@tonic-gate * here for every call. 5340Sstevel@tonic-gate */ 5350Sstevel@tonic-gate uiop->uio_loffset = 0; 5360Sstevel@tonic-gate if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 5370Sstevel@tonic-gate != 0) { 5380Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 5390Sstevel@tonic-gate return (error); 5400Sstevel@tonic-gate } 5410Sstevel@tonic-gate /* 5420Sstevel@tonic-gate * We are about to enter the core portion of dpwrite(). Make sure this 5430Sstevel@tonic-gate * write has exclusive access in this portion of the code, i.e., no 5440Sstevel@tonic-gate * other writers in this code and no other readers in dpioctl. 5450Sstevel@tonic-gate */ 5460Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 5470Sstevel@tonic-gate dpep->dpe_writerwait++; 5480Sstevel@tonic-gate while (dpep->dpe_refcnt != 0) { 5490Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 5500Sstevel@tonic-gate dpep->dpe_writerwait--; 5510Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 5520Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 5530Sstevel@tonic-gate return (set_errno(EINTR)); 5540Sstevel@tonic-gate } 5550Sstevel@tonic-gate } 5560Sstevel@tonic-gate dpep->dpe_writerwait--; 5570Sstevel@tonic-gate dpep->dpe_flag |= DP_WRITER_PRESENT; 5580Sstevel@tonic-gate dpep->dpe_refcnt++; 5590Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 5600Sstevel@tonic-gate 5610Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 5620Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 5630Sstevel@tonic-gate pcache_create(pcp, pollfdnum); 5640Sstevel@tonic-gate } 5650Sstevel@tonic-gate for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { 5660Sstevel@tonic-gate fd = pfdp->fd; 5670Sstevel@tonic-gate if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) 5680Sstevel@tonic-gate continue; 5690Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 5700Sstevel@tonic-gate if (pfdp->events != POLLREMOVE) { 5710Sstevel@tonic-gate if (pdp == NULL) { 5720Sstevel@tonic-gate pdp = pcache_alloc_fd(0); 5730Sstevel@tonic-gate pdp->pd_fd = fd; 5740Sstevel@tonic-gate pdp->pd_pcache = pcp; 5750Sstevel@tonic-gate pcache_insert_fd(pcp, pdp, pollfdnum); 5760Sstevel@tonic-gate } 5770Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 5780Sstevel@tonic-gate ASSERT(pdp->pd_pcache == pcp); 5790Sstevel@tonic-gate if (fd >= pcp->pc_mapsize) { 5800Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 5810Sstevel@tonic-gate pcache_grow_map(pcp, fd); 5820Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 5830Sstevel@tonic-gate } 5840Sstevel@tonic-gate if (fd > pcp->pc_mapend) { 5850Sstevel@tonic-gate pcp->pc_mapend = fd; 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 5880Sstevel@tonic-gate /* 5890Sstevel@tonic-gate * The fd is not valid. Since we can't pass 5900Sstevel@tonic-gate * this error back in the write() call, set 5910Sstevel@tonic-gate * the bit in bitmap to force DP_POLL ioctl 5920Sstevel@tonic-gate * to examine it. 5930Sstevel@tonic-gate */ 5940Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 5950Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 5960Sstevel@tonic-gate continue; 5970Sstevel@tonic-gate } 5980Sstevel@tonic-gate /* 5990Sstevel@tonic-gate * Don't do VOP_POLL for an already cached fd with 6000Sstevel@tonic-gate * same poll events. 6010Sstevel@tonic-gate */ 6020Sstevel@tonic-gate if ((pdp->pd_events == pfdp->events) && 6030Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 6040Sstevel@tonic-gate /* 6050Sstevel@tonic-gate * the events are already cached 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate releasef(fd); 6080Sstevel@tonic-gate continue; 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate /* 6120Sstevel@tonic-gate * do VOP_POLL and cache this poll fd. 6130Sstevel@tonic-gate */ 6140Sstevel@tonic-gate /* 6150Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 6160Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 6170Sstevel@tonic-gate * cleaner solution if we could pass pcp as 6180Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 6190Sstevel@tonic-gate * of implicitly passing it using thread_t 6200Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 6210Sstevel@tonic-gate * interface will require all driver/file system 6220Sstevel@tonic-gate * poll routine to change. May want to revisit 6230Sstevel@tonic-gate * the tradeoff later. 6240Sstevel@tonic-gate */ 6250Sstevel@tonic-gate curthread->t_pollcache = pcp; 6260Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 6270Sstevel@tonic-gate &pfdp->revents, &php); 6280Sstevel@tonic-gate curthread->t_pollcache = NULL; 6290Sstevel@tonic-gate /* 6300Sstevel@tonic-gate * We always set the bit when this fd is cached. 6310Sstevel@tonic-gate * So we don't have to worry about missing a 6320Sstevel@tonic-gate * pollwakeup between VOP_POLL and pollhead_insert. 6330Sstevel@tonic-gate * This forces the first DP_POLL to poll this fd. 6340Sstevel@tonic-gate * Real performance gain comes from subsequent 6350Sstevel@tonic-gate * DP_POLL. 6360Sstevel@tonic-gate */ 6370Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 6380Sstevel@tonic-gate if (error != 0) { 6390Sstevel@tonic-gate releasef(fd); 6400Sstevel@tonic-gate break; 6410Sstevel@tonic-gate } 6420Sstevel@tonic-gate pdp->pd_fp = fp; 6430Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 6440Sstevel@tonic-gate if (php != NULL) { 6450Sstevel@tonic-gate if (pdp->pd_php == NULL) { 6460Sstevel@tonic-gate pollhead_insert(php, pdp); 6470Sstevel@tonic-gate pdp->pd_php = php; 6480Sstevel@tonic-gate } else { 6490Sstevel@tonic-gate if (pdp->pd_php != php) { 6500Sstevel@tonic-gate pollhead_delete(pdp->pd_php, 6510Sstevel@tonic-gate pdp); 6520Sstevel@tonic-gate pollhead_insert(php, pdp); 6530Sstevel@tonic-gate pdp->pd_php = php; 6540Sstevel@tonic-gate } 6550Sstevel@tonic-gate } 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate } 6580Sstevel@tonic-gate releasef(fd); 6590Sstevel@tonic-gate } else { 6600Sstevel@tonic-gate if (pdp == NULL) { 6610Sstevel@tonic-gate continue; 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 6640Sstevel@tonic-gate pdp->pd_fp = NULL; 6650Sstevel@tonic-gate pdp->pd_events = 0; 6660Sstevel@tonic-gate ASSERT(pdp->pd_thread == NULL); 6670Sstevel@tonic-gate if (pdp->pd_php != NULL) { 6680Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 6690Sstevel@tonic-gate pdp->pd_php = NULL; 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 6720Sstevel@tonic-gate } 6730Sstevel@tonic-gate } 6740Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 6750Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 6760Sstevel@tonic-gate dpep->dpe_flag &= ~DP_WRITER_PRESENT; 6770Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 1); 6780Sstevel@tonic-gate dpep->dpe_refcnt--; 6790Sstevel@tonic-gate cv_broadcast(&dpep->dpe_cv); 6800Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 6810Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 6820Sstevel@tonic-gate return (error); 6830Sstevel@tonic-gate } 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate /*ARGSUSED*/ 6860Sstevel@tonic-gate static int 6870Sstevel@tonic-gate dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 6880Sstevel@tonic-gate { 6890Sstevel@tonic-gate timestruc_t now; 6900Sstevel@tonic-gate timestruc_t rqtime; 6910Sstevel@tonic-gate timestruc_t *rqtp = NULL; 6920Sstevel@tonic-gate int timecheck = 0; 6930Sstevel@tonic-gate minor_t minor; 6940Sstevel@tonic-gate dp_entry_t *dpep; 6950Sstevel@tonic-gate pollcache_t *pcp; 6960Sstevel@tonic-gate int error = 0; 6970Sstevel@tonic-gate STRUCT_DECL(dvpoll, dvpoll); 6980Sstevel@tonic-gate 6990Sstevel@tonic-gate if (cmd == DP_POLL) { 7000Sstevel@tonic-gate /* do this now, before we sleep on DP_WRITER_PRESENT below */ 7010Sstevel@tonic-gate timecheck = timechanged; 7020Sstevel@tonic-gate gethrestime(&now); 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate minor = getminor(dev); 7050Sstevel@tonic-gate mutex_enter(&devpoll_lock); 7060Sstevel@tonic-gate ASSERT(minor < dptblsize); 7070Sstevel@tonic-gate dpep = devpolltbl[minor]; 7080Sstevel@tonic-gate mutex_exit(&devpoll_lock); 7090Sstevel@tonic-gate ASSERT(dpep != NULL); 7100Sstevel@tonic-gate pcp = dpep->dpe_pcache; 7110Sstevel@tonic-gate if (curproc->p_pid != pcp->pc_pid) 7120Sstevel@tonic-gate return (EACCES); 7130Sstevel@tonic-gate 7140Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 7150Sstevel@tonic-gate while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 7160Sstevel@tonic-gate (dpep->dpe_writerwait != 0)) { 7170Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 7180Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7190Sstevel@tonic-gate return (EINTR); 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate dpep->dpe_refcnt++; 7230Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate switch (cmd) { 7260Sstevel@tonic-gate case DP_POLL: 7270Sstevel@tonic-gate { 7280Sstevel@tonic-gate pollstate_t *ps; 7290Sstevel@tonic-gate nfds_t nfds; 7300Sstevel@tonic-gate int fdcnt = 0; 7310Sstevel@tonic-gate int time_out; 7320Sstevel@tonic-gate int rval; 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 7350Sstevel@tonic-gate error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 7360Sstevel@tonic-gate STRUCT_SIZE(dvpoll)); 7370Sstevel@tonic-gate if (error) { 7380Sstevel@tonic-gate DP_REFRELE(dpep); 7390Sstevel@tonic-gate return (EFAULT); 7400Sstevel@tonic-gate } 7410Sstevel@tonic-gate 7420Sstevel@tonic-gate time_out = STRUCT_FGET(dvpoll, dp_timeout); 7430Sstevel@tonic-gate if (time_out > 0) { 7440Sstevel@tonic-gate /* 7450Sstevel@tonic-gate * Determine the future time of the requested timeout. 7460Sstevel@tonic-gate */ 7470Sstevel@tonic-gate rqtp = &rqtime; 7480Sstevel@tonic-gate rqtp->tv_sec = time_out / MILLISEC; 7490Sstevel@tonic-gate rqtp->tv_nsec = (time_out % MILLISEC) * MICROSEC; 7500Sstevel@tonic-gate timespecadd(rqtp, &now); 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * We are just using DP_POLL to sleep, so 7560Sstevel@tonic-gate * we don't any of the devpoll apparatus. 7570Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 7580Sstevel@tonic-gate */ 7590Sstevel@tonic-gate DP_REFRELE(dpep); 7600Sstevel@tonic-gate if (time_out == 0) 7610Sstevel@tonic-gate return (0); 7620Sstevel@tonic-gate mutex_enter(&curthread->t_delay_lock); 7630Sstevel@tonic-gate while ((rval = cv_waituntil_sig(&curthread->t_delay_cv, 7640Sstevel@tonic-gate &curthread->t_delay_lock, rqtp, timecheck)) > 0) 7650Sstevel@tonic-gate continue; 7660Sstevel@tonic-gate mutex_exit(&curthread->t_delay_lock); 7670Sstevel@tonic-gate return ((rval == 0)? EINTR : 0); 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate /* 7710Sstevel@tonic-gate * XXX It'd be nice not to have to alloc each time. 7720Sstevel@tonic-gate * But it requires another per thread structure hook. 7730Sstevel@tonic-gate * Do it later if there is data suggest that. 7740Sstevel@tonic-gate */ 7750Sstevel@tonic-gate if ((ps = curthread->t_pollstate) == NULL) { 7760Sstevel@tonic-gate curthread->t_pollstate = pollstate_create(); 7770Sstevel@tonic-gate ps = curthread->t_pollstate; 7780Sstevel@tonic-gate } 7790Sstevel@tonic-gate if (ps->ps_dpbufsize < nfds) { 7800Sstevel@tonic-gate struct proc *p = ttoproc(curthread); 7810Sstevel@tonic-gate /* 7820Sstevel@tonic-gate * The maximum size should be no large than 7830Sstevel@tonic-gate * current maximum open file count. 7840Sstevel@tonic-gate */ 7850Sstevel@tonic-gate mutex_enter(&p->p_lock); 7860Sstevel@tonic-gate if (nfds >= p->p_fno_ctl) { 7870Sstevel@tonic-gate mutex_exit(&p->p_lock); 7880Sstevel@tonic-gate DP_REFRELE(dpep); 7890Sstevel@tonic-gate return (EINVAL); 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate mutex_exit(&p->p_lock); 7920Sstevel@tonic-gate kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * 7930Sstevel@tonic-gate ps->ps_dpbufsize); 7940Sstevel@tonic-gate ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * 7950Sstevel@tonic-gate nfds, KM_SLEEP); 7960Sstevel@tonic-gate ps->ps_dpbufsize = nfds; 7970Sstevel@tonic-gate } 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8000Sstevel@tonic-gate for (;;) { 8010Sstevel@tonic-gate pcp->pc_flag = 0; 8020Sstevel@tonic-gate error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); 8030Sstevel@tonic-gate if (fdcnt > 0 || error != 0) 8040Sstevel@tonic-gate break; 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * A pollwake has happened since we polled cache. 8080Sstevel@tonic-gate */ 8090Sstevel@tonic-gate if (pcp->pc_flag & T_POLLWAKE) 8100Sstevel@tonic-gate continue; 8110Sstevel@tonic-gate 8120Sstevel@tonic-gate /* 8130Sstevel@tonic-gate * Sleep until we are notified, signalled, or timed out. 8140Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 8150Sstevel@tonic-gate */ 8160Sstevel@tonic-gate if (time_out == 0) /* immediate timeout */ 8170Sstevel@tonic-gate break; 8180Sstevel@tonic-gate rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 8190Sstevel@tonic-gate rqtp, timecheck); 8200Sstevel@tonic-gate /* 8210Sstevel@tonic-gate * If we were awakened by a signal or timeout 8220Sstevel@tonic-gate * then break the loop, else poll again. 8230Sstevel@tonic-gate */ 8240Sstevel@tonic-gate if (rval <= 0) { 8250Sstevel@tonic-gate if (rval == 0) /* signal */ 8260Sstevel@tonic-gate error = EINTR; 8270Sstevel@tonic-gate break; 8280Sstevel@tonic-gate } 8290Sstevel@tonic-gate } 8300Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8310Sstevel@tonic-gate 8320Sstevel@tonic-gate if (error == 0 && fdcnt > 0) { 8330Sstevel@tonic-gate if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, 8340Sstevel@tonic-gate dp_fds), sizeof (pollfd_t) * fdcnt)) { 8350Sstevel@tonic-gate DP_REFRELE(dpep); 8360Sstevel@tonic-gate return (EFAULT); 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate *rvalp = fdcnt; 8390Sstevel@tonic-gate } 8400Sstevel@tonic-gate break; 8410Sstevel@tonic-gate } 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate case DP_ISPOLLED: 8440Sstevel@tonic-gate { 8450Sstevel@tonic-gate pollfd_t pollfd; 8460Sstevel@tonic-gate polldat_t *pdp; 8470Sstevel@tonic-gate 8480Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 8490Sstevel@tonic-gate error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 8500Sstevel@tonic-gate if (error) { 8510Sstevel@tonic-gate DP_REFRELE(dpep); 8520Sstevel@tonic-gate return (EFAULT); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8550Sstevel@tonic-gate if (pcp->pc_hash == NULL) { 8560Sstevel@tonic-gate /* 8570Sstevel@tonic-gate * No Need to search because no poll fd 8580Sstevel@tonic-gate * has been cached. 8590Sstevel@tonic-gate */ 8600Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8610Sstevel@tonic-gate DP_REFRELE(dpep); 8620Sstevel@tonic-gate return (0); 8630Sstevel@tonic-gate } 8640Sstevel@tonic-gate if (pollfd.fd < 0) { 8650Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8660Sstevel@tonic-gate break; 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, pollfd.fd); 8690Sstevel@tonic-gate if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 8700Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 8710Sstevel@tonic-gate pollfd.revents = pdp->pd_events; 8720Sstevel@tonic-gate if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 8730Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8740Sstevel@tonic-gate DP_REFRELE(dpep); 8750Sstevel@tonic-gate return (EFAULT); 8760Sstevel@tonic-gate } 8770Sstevel@tonic-gate *rvalp = 1; 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8800Sstevel@tonic-gate break; 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate default: 8840Sstevel@tonic-gate DP_REFRELE(dpep); 8850Sstevel@tonic-gate return (EINVAL); 8860Sstevel@tonic-gate } 8870Sstevel@tonic-gate DP_REFRELE(dpep); 8880Sstevel@tonic-gate return (error); 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate 8910Sstevel@tonic-gate /*ARGSUSED*/ 8920Sstevel@tonic-gate static int 8930Sstevel@tonic-gate dppoll(dev_t dev, short events, int anyyet, short *reventsp, 8940Sstevel@tonic-gate struct pollhead **phpp) 8950Sstevel@tonic-gate { 8960Sstevel@tonic-gate /* 8970Sstevel@tonic-gate * Polling on a /dev/poll fd is not fully supported yet. 8980Sstevel@tonic-gate */ 8990Sstevel@tonic-gate *reventsp = POLLERR; 9000Sstevel@tonic-gate return (0); 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate 9030Sstevel@tonic-gate /* 9040Sstevel@tonic-gate * devpoll close should do enough clean up before the pollcache is deleted, 9050Sstevel@tonic-gate * i.e., it should ensure no one still references the pollcache later. 9060Sstevel@tonic-gate * There is no "permission" check in here. Any process having the last 9070Sstevel@tonic-gate * reference of this /dev/poll fd can close. 9080Sstevel@tonic-gate */ 9090Sstevel@tonic-gate /*ARGSUSED*/ 9100Sstevel@tonic-gate static int 9110Sstevel@tonic-gate dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 9120Sstevel@tonic-gate { 9130Sstevel@tonic-gate minor_t minor; 9140Sstevel@tonic-gate dp_entry_t *dpep; 9150Sstevel@tonic-gate pollcache_t *pcp; 9160Sstevel@tonic-gate int i; 9170Sstevel@tonic-gate polldat_t **hashtbl; 9180Sstevel@tonic-gate polldat_t *pdp; 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate minor = getminor(dev); 9210Sstevel@tonic-gate 9220Sstevel@tonic-gate mutex_enter(&devpoll_lock); 9230Sstevel@tonic-gate dpep = devpolltbl[minor]; 9240Sstevel@tonic-gate ASSERT(dpep != NULL); 9250Sstevel@tonic-gate devpolltbl[minor] = NULL; 9260Sstevel@tonic-gate mutex_exit(&devpoll_lock); 9270Sstevel@tonic-gate pcp = dpep->dpe_pcache; 9280Sstevel@tonic-gate ASSERT(pcp != NULL); 9290Sstevel@tonic-gate /* 9300Sstevel@tonic-gate * At this point, no other lwp can access this pollcache via the 9310Sstevel@tonic-gate * /dev/poll fd. This pollcache is going away, so do the clean 9320Sstevel@tonic-gate * up without the pc_lock. 9330Sstevel@tonic-gate */ 9340Sstevel@tonic-gate hashtbl = pcp->pc_hash; 9350Sstevel@tonic-gate for (i = 0; i < pcp->pc_hashsize; i++) { 9360Sstevel@tonic-gate for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 9370Sstevel@tonic-gate if (pdp->pd_php != NULL) { 9380Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 9390Sstevel@tonic-gate pdp->pd_php = NULL; 9400Sstevel@tonic-gate pdp->pd_fp = NULL; 9410Sstevel@tonic-gate } 9420Sstevel@tonic-gate } 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate /* 9450Sstevel@tonic-gate * pollwakeup() may still interact with this pollcache. Wait until 9460Sstevel@tonic-gate * it is done. 9470Sstevel@tonic-gate */ 9480Sstevel@tonic-gate mutex_enter(&pcp->pc_no_exit); 9490Sstevel@tonic-gate ASSERT(pcp->pc_busy >= 0); 9500Sstevel@tonic-gate while (pcp->pc_busy > 0) 9510Sstevel@tonic-gate cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 9520Sstevel@tonic-gate mutex_exit(&pcp->pc_no_exit); 9530Sstevel@tonic-gate pcache_destroy(pcp); 9540Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 0); 9550Sstevel@tonic-gate kmem_free(dpep, sizeof (dp_entry_t)); 9560Sstevel@tonic-gate return (0); 9570Sstevel@tonic-gate } 958