xref: /onnv-gate/usr/src/uts/common/fs/portfs/port_fd.c (revision 8587:b37cc627811a)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51790Spraks  * Common Development and Distribution License (the "License").
61790Spraks  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
23*8587SPramod.Batni@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/systm.h>
300Sstevel@tonic-gate #include <sys/stat.h>
310Sstevel@tonic-gate #include <sys/errno.h>
320Sstevel@tonic-gate #include <sys/kmem.h>
330Sstevel@tonic-gate #include <sys/sysmacros.h>
340Sstevel@tonic-gate #include <sys/debug.h>
350Sstevel@tonic-gate #include <sys/poll_impl.h>
360Sstevel@tonic-gate #include <sys/port_impl.h>
370Sstevel@tonic-gate 
380Sstevel@tonic-gate #define	PORTHASH_START	256	/* start cache space for events */
390Sstevel@tonic-gate #define	PORTHASH_MULT	2	/* growth threshold and factor */
400Sstevel@tonic-gate 
410Sstevel@tonic-gate /* local functions */
420Sstevel@tonic-gate static int	port_fd_callback(void *, int *, pid_t, int, void *);
430Sstevel@tonic-gate static int	port_bind_pollhead(pollhead_t **, polldat_t *, short *);
440Sstevel@tonic-gate static void	port_close_sourcefd(void *, int, pid_t, int);
450Sstevel@tonic-gate static void	port_cache_insert_fd(port_fdcache_t *, polldat_t *);
460Sstevel@tonic-gate 
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate  * port_fd_callback()
490Sstevel@tonic-gate  * The event port framework uses callback functions to notify associated
500Sstevel@tonic-gate  * event sources about actions on source specific objects.
510Sstevel@tonic-gate  * The source itself defines the "arg" required to identify the object with
520Sstevel@tonic-gate  * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
530Sstevel@tonic-gate  * structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
540Sstevel@tonic-gate  * The port_fd_callback() function is notified in three cases:
550Sstevel@tonic-gate  * - PORT_CALLBACK_DEFAULT
560Sstevel@tonic-gate  *	The object (fd) will be delivered to the application.
570Sstevel@tonic-gate  * - PORT_CALLBACK_DISSOCIATE
580Sstevel@tonic-gate  *	The object (fd) will be dissociated from  the port.
590Sstevel@tonic-gate  * - PORT_CALLBACK_CLOSE
600Sstevel@tonic-gate  *	The object (fd) will be dissociated from the port because the port
610Sstevel@tonic-gate  *	is being closed.
620Sstevel@tonic-gate  * A fd is shareable between processes only when
630Sstevel@tonic-gate  * - processes have the same fd id and
640Sstevel@tonic-gate  * - processes have the same fp.
650Sstevel@tonic-gate  * A fd becomes shareable:
660Sstevel@tonic-gate  * - on fork() across parent and child process and
670Sstevel@tonic-gate  * - when I_SENDFD is used to pass file descriptors between parent and child
680Sstevel@tonic-gate  *   immediately after fork() (the sender and receiver must get the same
690Sstevel@tonic-gate  *   file descriptor id).
700Sstevel@tonic-gate  * If a fd is shared between processes, all involved processes will get
710Sstevel@tonic-gate  * the same rights related to re-association of the fd with the port and
720Sstevel@tonic-gate  * retrieve of events from that fd.
730Sstevel@tonic-gate  * The process which associated the fd with a port for the first time
740Sstevel@tonic-gate  * becomes also the owner of the association. Only the owner of the
750Sstevel@tonic-gate  * association is allowed to dissociate the fd from the port.
760Sstevel@tonic-gate  */
770Sstevel@tonic-gate /* ARGSUSED */
780Sstevel@tonic-gate static int
port_fd_callback(void * arg,int * events,pid_t pid,int flag,void * evp)790Sstevel@tonic-gate port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
800Sstevel@tonic-gate {
810Sstevel@tonic-gate 	portfd_t	*pfd = (portfd_t *)arg;
820Sstevel@tonic-gate 	polldat_t	*pdp = PFTOD(pfd);
830Sstevel@tonic-gate 	port_fdcache_t	*pcp;
840Sstevel@tonic-gate 	file_t		*fp;
850Sstevel@tonic-gate 	int		error;
860Sstevel@tonic-gate 
870Sstevel@tonic-gate 	ASSERT((pdp != NULL) && (events != NULL));
880Sstevel@tonic-gate 	switch (flag) {
890Sstevel@tonic-gate 	case PORT_CALLBACK_DEFAULT:
900Sstevel@tonic-gate 		if (curproc->p_pid != pid) {
910Sstevel@tonic-gate 			/*
920Sstevel@tonic-gate 			 * Check if current process is allowed to retrieve
930Sstevel@tonic-gate 			 * events from this fd.
940Sstevel@tonic-gate 			 */
950Sstevel@tonic-gate 			fp = getf(pdp->pd_fd);
960Sstevel@tonic-gate 			if (fp == NULL) {
970Sstevel@tonic-gate 				error = EACCES; /* deny delivery of events */
980Sstevel@tonic-gate 				break;
990Sstevel@tonic-gate 			}
1000Sstevel@tonic-gate 			releasef(pdp->pd_fd);
1010Sstevel@tonic-gate 			if (fp != pdp->pd_fp) {
1020Sstevel@tonic-gate 				error = EACCES; /* deny delivery of events */
1030Sstevel@tonic-gate 				break;
1040Sstevel@tonic-gate 			}
1050Sstevel@tonic-gate 		}
1060Sstevel@tonic-gate 		*events = pdp->pd_portev->portkev_events; /* update events */
1070Sstevel@tonic-gate 		error = 0;
1080Sstevel@tonic-gate 		break;
1090Sstevel@tonic-gate 	case PORT_CALLBACK_DISSOCIATE:
1100Sstevel@tonic-gate 		error = 0;
1110Sstevel@tonic-gate 		break;
1120Sstevel@tonic-gate 	case PORT_CALLBACK_CLOSE:
1130Sstevel@tonic-gate 		/* remove polldat/portfd struct */
1140Sstevel@tonic-gate 		pdp->pd_portev = NULL;
1150Sstevel@tonic-gate 		pcp = (port_fdcache_t *)pdp->pd_pcache;
1160Sstevel@tonic-gate 		mutex_enter(&pcp->pc_lock);
1170Sstevel@tonic-gate 		pdp->pd_fp = NULL;
1180Sstevel@tonic-gate 		pdp->pd_events = 0;
1190Sstevel@tonic-gate 		if (pdp->pd_php != NULL) {
1200Sstevel@tonic-gate 			pollhead_delete(pdp->pd_php, pdp);
1210Sstevel@tonic-gate 			pdp->pd_php = NULL;
1220Sstevel@tonic-gate 		}
1230Sstevel@tonic-gate 		port_pcache_remove_fd(pcp, pfd);
1240Sstevel@tonic-gate 		mutex_exit(&pcp->pc_lock);
1250Sstevel@tonic-gate 		error = 0;
1260Sstevel@tonic-gate 		break;
1270Sstevel@tonic-gate 	default:
1280Sstevel@tonic-gate 		error = EINVAL;
1290Sstevel@tonic-gate 		break;
1300Sstevel@tonic-gate 	}
1310Sstevel@tonic-gate 	return (error);
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate /*
1350Sstevel@tonic-gate  * This routine returns a pointer to a cached poll fd entry, or NULL if it
1360Sstevel@tonic-gate  * does not find it in the hash table.
1370Sstevel@tonic-gate  * The fd is used as index.
1380Sstevel@tonic-gate  * The fd and the fp are used to detect a valid entry.
1390Sstevel@tonic-gate  * This function returns a pointer to a valid portfd_t structure only when
1400Sstevel@tonic-gate  * the fd and the fp in the args match the entries in polldat_t.
1410Sstevel@tonic-gate  */
1420Sstevel@tonic-gate portfd_t *
port_cache_lookup_fp(port_fdcache_t * pcp,int fd,file_t * fp)1430Sstevel@tonic-gate port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp)
1440Sstevel@tonic-gate {
1450Sstevel@tonic-gate 	polldat_t	*pdp;
1460Sstevel@tonic-gate 	portfd_t	**bucket;
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
1490Sstevel@tonic-gate 	bucket = PORT_FD_BUCKET(pcp, fd);
1500Sstevel@tonic-gate 	pdp = PFTOD(*bucket);
1510Sstevel@tonic-gate 	while (pdp != NULL) {
1520Sstevel@tonic-gate 		if (pdp->pd_fd == fd && pdp->pd_fp == fp)
1530Sstevel@tonic-gate 			break;
1540Sstevel@tonic-gate 		pdp = pdp->pd_hashnext;
1550Sstevel@tonic-gate 	}
1560Sstevel@tonic-gate 	return (PDTOF(pdp));
1570Sstevel@tonic-gate }
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate /*
1600Sstevel@tonic-gate  * port_associate_fd()
1610Sstevel@tonic-gate  * This function associates new file descriptors with a port or
1620Sstevel@tonic-gate  * reactivate already associated file descriptors.
1630Sstevel@tonic-gate  * The reactivation also updates the events types to be checked and the
1640Sstevel@tonic-gate  * attached user pointer.
1650Sstevel@tonic-gate  * Per port a cache is used to store associated file descriptors.
1660Sstevel@tonic-gate  * Internally the VOP_POLL interface is used to poll for existing events.
1670Sstevel@tonic-gate  * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure
1680Sstevel@tonic-gate  * which is used to enqueue polldat_t structures with pending events.
1690Sstevel@tonic-gate  * If VOP_POLL immediately returns valid events (revents) then those events
1700Sstevel@tonic-gate  * will be submitted to the event port with port_send_event().
1710Sstevel@tonic-gate  * Otherwise VOP_POLL does not return events but it delivers a pointer to a
1720Sstevel@tonic-gate  * pollhead_t structure. In such a case the corresponding file system behind
1735331Samw  * VOP_POLL will use the pollwakeup() function to notify about existing
1740Sstevel@tonic-gate  * events.
1750Sstevel@tonic-gate  */
1760Sstevel@tonic-gate int
port_associate_fd(port_t * pp,int source,uintptr_t object,int events,void * user)1770Sstevel@tonic-gate port_associate_fd(port_t *pp, int source, uintptr_t object, int events,
1780Sstevel@tonic-gate     void *user)
1790Sstevel@tonic-gate {
1800Sstevel@tonic-gate 	port_fdcache_t	*pcp;
1810Sstevel@tonic-gate 	int		fd;
1820Sstevel@tonic-gate 	struct pollhead	*php = NULL;
1830Sstevel@tonic-gate 	portfd_t	*pfd;
1840Sstevel@tonic-gate 	polldat_t	*pdp;
1850Sstevel@tonic-gate 	file_t		*fp;
1860Sstevel@tonic-gate 	port_kevent_t	*pkevp;
1870Sstevel@tonic-gate 	short		revents;
1880Sstevel@tonic-gate 	int		error = 0;
1893734Spraks 	int		active;
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 	pcp = pp->port_queue.portq_pcp;
1920Sstevel@tonic-gate 	if (object > (uintptr_t)INT_MAX)
1930Sstevel@tonic-gate 		return (EBADFD);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	fd = object;
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 	if ((fp = getf(fd)) == NULL)
1980Sstevel@tonic-gate 		return (EBADFD);
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	mutex_enter(&pcp->pc_lock);
201*8587SPramod.Batni@Sun.COM 
2020Sstevel@tonic-gate 	if (pcp->pc_hash == NULL) {
2030Sstevel@tonic-gate 		/*
2040Sstevel@tonic-gate 		 * This is the first time that a fd is being associated with
2050Sstevel@tonic-gate 		 * the current port:
2060Sstevel@tonic-gate 		 * - create PORT_SOURCE_FD cache
2070Sstevel@tonic-gate 		 * - associate PORT_SOURCE_FD source with the port
2080Sstevel@tonic-gate 		 */
2090Sstevel@tonic-gate 		error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD,
2100Sstevel@tonic-gate 		    NULL, port_close_sourcefd, pp, NULL);
2110Sstevel@tonic-gate 		if (error) {
2120Sstevel@tonic-gate 			mutex_exit(&pcp->pc_lock);
2130Sstevel@tonic-gate 			releasef(fd);
2140Sstevel@tonic-gate 			return (error);
2150Sstevel@tonic-gate 		}
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 		/* create polldat cache */
2180Sstevel@tonic-gate 		pcp->pc_hashsize = PORTHASH_START;
2190Sstevel@tonic-gate 		pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize *
2200Sstevel@tonic-gate 		    sizeof (portfd_t *), KM_SLEEP);
2210Sstevel@tonic-gate 		pfd = NULL;
2220Sstevel@tonic-gate 	} else {
2230Sstevel@tonic-gate 		/* Check if the fd/fp is already associated with the port */
2240Sstevel@tonic-gate 		pfd = port_cache_lookup_fp(pcp, fd, fp);
2250Sstevel@tonic-gate 	}
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate 	if (pfd == NULL) {
2280Sstevel@tonic-gate 		/*
2290Sstevel@tonic-gate 		 * new entry
2300Sstevel@tonic-gate 		 * Allocate a polldat_t structure per fd
2310Sstevel@tonic-gate 		 * The use of the polldat_t structure to cache file descriptors
2320Sstevel@tonic-gate 		 * is required to be able to share the pollwakeup() function
2330Sstevel@tonic-gate 		 * with poll(2) and devpoll(7d).
2340Sstevel@tonic-gate 		 */
2350Sstevel@tonic-gate 		pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP);
2360Sstevel@tonic-gate 		pdp = PFTOD(pfd);
2370Sstevel@tonic-gate 		pdp->pd_fd = fd;
2380Sstevel@tonic-gate 		pdp->pd_fp = fp;
2390Sstevel@tonic-gate 		pdp->pd_pcache = (void *)pcp;
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 		/* Allocate a port event structure per fd */
2420Sstevel@tonic-gate 		error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED,
2430Sstevel@tonic-gate 		    &pdp->pd_portev);
2440Sstevel@tonic-gate 		if (error) {
2450Sstevel@tonic-gate 			kmem_free(pfd, sizeof (portfd_t));
2460Sstevel@tonic-gate 			releasef(fd);
2470Sstevel@tonic-gate 			mutex_exit(&pcp->pc_lock);
2480Sstevel@tonic-gate 			return (error);
2490Sstevel@tonic-gate 		}
2500Sstevel@tonic-gate 		pkevp = pdp->pd_portev;
2510Sstevel@tonic-gate 		pkevp->portkev_callback = port_fd_callback;
2520Sstevel@tonic-gate 		pkevp->portkev_arg = pfd;
2530Sstevel@tonic-gate 
2540Sstevel@tonic-gate 		/* add portfd_t entry  to the cache */
2550Sstevel@tonic-gate 		port_cache_insert_fd(pcp, pdp);
2560Sstevel@tonic-gate 		pkevp->portkev_object = fd;
2570Sstevel@tonic-gate 		pkevp->portkev_user = user;
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 		/*
2600Sstevel@tonic-gate 		 * Add current port to the file descriptor interested list
2610Sstevel@tonic-gate 		 * The members of the list are notified when the file descriptor
2620Sstevel@tonic-gate 		 * is closed.
2630Sstevel@tonic-gate 		 */
2640Sstevel@tonic-gate 		addfd_port(fd, pfd);
2650Sstevel@tonic-gate 	} else {
2660Sstevel@tonic-gate 		/*
2670Sstevel@tonic-gate 		 * The file descriptor is already associated with the port
2680Sstevel@tonic-gate 		 */
2690Sstevel@tonic-gate 		pdp = PFTOD(pfd);
2700Sstevel@tonic-gate 		pkevp = pdp->pd_portev;
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 		/*
2730Sstevel@tonic-gate 		 * Check if the re-association happens before the last
2740Sstevel@tonic-gate 		 * submitted event of the file descriptor was retrieved.
2751425Spraks 		 * Clear the PORT_KEV_VALID flag if set. No new events
2761425Spraks 		 * should get submitted after this flag is cleared.
2770Sstevel@tonic-gate 		 */
2781425Spraks 		mutex_enter(&pkevp->portkev_lock);
2791425Spraks 		if (pkevp->portkev_flags & PORT_KEV_VALID) {
2801425Spraks 			pkevp->portkev_flags &= ~PORT_KEV_VALID;
2811425Spraks 		}
2820Sstevel@tonic-gate 		if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
2831425Spraks 			mutex_exit(&pkevp->portkev_lock);
2840Sstevel@tonic-gate 			/*
2851425Spraks 			 * Remove any events that where already fired
2861425Spraks 			 * for this fd and are still in the port queue.
2870Sstevel@tonic-gate 			 */
2884863Spraks 			(void) port_remove_done_event(pkevp);
2891425Spraks 		} else {
2901425Spraks 			mutex_exit(&pkevp->portkev_lock);
2910Sstevel@tonic-gate 		}
2920Sstevel@tonic-gate 		pkevp->portkev_user = user;
2930Sstevel@tonic-gate 	}
2940Sstevel@tonic-gate 
295*8587SPramod.Batni@Sun.COM 	pfd->pfd_thread = curthread;
2961790Spraks 	mutex_enter(&pkevp->portkev_lock);
2970Sstevel@tonic-gate 	pkevp->portkev_events = 0;	/* no fired events */
2980Sstevel@tonic-gate 	pdp->pd_events = events;	/* events associated */
2991790Spraks 	/*
3001790Spraks 	 * allow new events.
3011790Spraks 	 */
3021790Spraks 	pkevp->portkev_flags |= PORT_KEV_VALID;
3031790Spraks 	mutex_exit(&pkevp->portkev_lock);
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	/*
3060Sstevel@tonic-gate 	 * do VOP_POLL and cache this poll fd.
3070Sstevel@tonic-gate 	 *
3080Sstevel@tonic-gate 	 * XXX - pollrelock() logic needs to know
3090Sstevel@tonic-gate 	 * which pollcache lock to grab. It'd be a
3100Sstevel@tonic-gate 	 * cleaner solution if we could pass pcp as
3110Sstevel@tonic-gate 	 * an arguement in VOP_POLL interface instead
3120Sstevel@tonic-gate 	 * of implicitly passing it using thread_t
3130Sstevel@tonic-gate 	 * struct. On the other hand, changing VOP_POLL
3140Sstevel@tonic-gate 	 * interface will require all driver/file system
3150Sstevel@tonic-gate 	 * poll routine to change.
3160Sstevel@tonic-gate 	 */
3170Sstevel@tonic-gate 	curthread->t_pollcache = (pollcache_t *)pcp;
3185331Samw 	error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php, NULL);
3190Sstevel@tonic-gate 	curthread->t_pollcache = NULL;
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	/*
322*8587SPramod.Batni@Sun.COM 	 * The pc_lock can get dropped and reaquired in VOP_POLL.
323*8587SPramod.Batni@Sun.COM 	 * In the window pc_lock is dropped another thread in
324*8587SPramod.Batni@Sun.COM 	 * port_dissociate can remove the pfd from the port cache
325*8587SPramod.Batni@Sun.COM 	 * and free the pfd.
326*8587SPramod.Batni@Sun.COM 	 * It is also possible for another thread to sneak in and do a
327*8587SPramod.Batni@Sun.COM 	 * port_associate on the same fd during the same window.
328*8587SPramod.Batni@Sun.COM 	 * For both these cases return the current value of error.
329*8587SPramod.Batni@Sun.COM 	 * The application should take care to ensure that the threads
330*8587SPramod.Batni@Sun.COM 	 * do not race with each other for association and disassociation
331*8587SPramod.Batni@Sun.COM 	 * of the same fd.
332*8587SPramod.Batni@Sun.COM 	 */
333*8587SPramod.Batni@Sun.COM 	if (((pfd = port_cache_lookup_fp(pcp, fd, fp)) == NULL) ||
334*8587SPramod.Batni@Sun.COM 	    (pfd->pfd_thread != curthread)) {
335*8587SPramod.Batni@Sun.COM 		releasef(fd);
336*8587SPramod.Batni@Sun.COM 		mutex_exit(&pcp->pc_lock);
337*8587SPramod.Batni@Sun.COM 		return (error);
338*8587SPramod.Batni@Sun.COM 	}
339*8587SPramod.Batni@Sun.COM 
340*8587SPramod.Batni@Sun.COM 	/*
3410Sstevel@tonic-gate 	 * To keep synchronization between VOP_POLL above and
3420Sstevel@tonic-gate 	 * pollhead_insert below, it is necessary to
3430Sstevel@tonic-gate 	 * call VOP_POLL() again (see port_bind_pollhead()).
3440Sstevel@tonic-gate 	 */
3450Sstevel@tonic-gate 	if (error) {
3463734Spraks 		goto errout;
3470Sstevel@tonic-gate 	}
3480Sstevel@tonic-gate 
349*8587SPramod.Batni@Sun.COM 	if (php != NULL && (pdp->pd_php != php)) {
3500Sstevel@tonic-gate 		/*
3510Sstevel@tonic-gate 		 * No events delivered yet.
3520Sstevel@tonic-gate 		 * Bind pollhead pointer with current polldat_t structure.
3530Sstevel@tonic-gate 		 * Sub-system will call pollwakeup() later with php as
3540Sstevel@tonic-gate 		 * argument.
3550Sstevel@tonic-gate 		 */
3560Sstevel@tonic-gate 		error = port_bind_pollhead(&php, pdp, &revents);
357*8587SPramod.Batni@Sun.COM 		/*
358*8587SPramod.Batni@Sun.COM 		 * The pc_lock can get dropped and reaquired in VOP_POLL.
359*8587SPramod.Batni@Sun.COM 		 * In the window pc_lock is dropped another thread in
360*8587SPramod.Batni@Sun.COM 		 * port_dissociate can remove the pfd from the port cache
361*8587SPramod.Batni@Sun.COM 		 * and free the pfd.
362*8587SPramod.Batni@Sun.COM 		 * It is also possible for another thread to sneak in and do a
363*8587SPramod.Batni@Sun.COM 		 * port_associate on the same fd during the same window.
364*8587SPramod.Batni@Sun.COM 		 * For both these cases return the current value of error.
365*8587SPramod.Batni@Sun.COM 		 * The application should take care to ensure that the threads
366*8587SPramod.Batni@Sun.COM 		 * do not race with each other for association
367*8587SPramod.Batni@Sun.COM 		 * and disassociation of the same fd.
368*8587SPramod.Batni@Sun.COM 		 */
369*8587SPramod.Batni@Sun.COM 		if (((pfd = port_cache_lookup_fp(pcp, fd, fp)) == NULL) ||
370*8587SPramod.Batni@Sun.COM 		    (pfd->pfd_thread != curthread)) {
371*8587SPramod.Batni@Sun.COM 			releasef(fd);
372*8587SPramod.Batni@Sun.COM 			mutex_exit(&pcp->pc_lock);
373*8587SPramod.Batni@Sun.COM 			return (error);
374*8587SPramod.Batni@Sun.COM 		}
375*8587SPramod.Batni@Sun.COM 
3760Sstevel@tonic-gate 		if (error) {
3773734Spraks 			goto errout;
3780Sstevel@tonic-gate 		}
3790Sstevel@tonic-gate 	}
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 	/*
3821790Spraks 	 * Check if new events where detected and no events have been
3831790Spraks 	 * delivered. The revents was already set after the VOP_POLL
3841790Spraks 	 * above or it was updated in port_bind_pollhead().
3850Sstevel@tonic-gate 	 */
3861425Spraks 	mutex_enter(&pkevp->portkev_lock);
3871790Spraks 	if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) {
3881425Spraks 		ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0);
3891790Spraks 		pkevp->portkev_flags &= ~PORT_KEV_VALID;
3900Sstevel@tonic-gate 		revents = revents & (pdp->pd_events | POLLHUP | POLLERR);
3911425Spraks 		/* send events to the event port */
3921425Spraks 		pkevp->portkev_events = revents;
3931425Spraks 		/*
3941425Spraks 		 * port_send_event will release the portkev_lock mutex.
3951425Spraks 		 */
3961885Sraf 		port_send_event(pkevp);
3971425Spraks 	} else {
3981425Spraks 		mutex_exit(&pkevp->portkev_lock);
3990Sstevel@tonic-gate 	}
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	releasef(fd);
4020Sstevel@tonic-gate 	mutex_exit(&pcp->pc_lock);
4030Sstevel@tonic-gate 	return (error);
4043734Spraks 
4053734Spraks errout:
4063734Spraks 	delfd_port(fd, pfd);
4073734Spraks 	/*
4083734Spraks 	 * If the portkev is not valid, then an event was
4093734Spraks 	 * delivered.
4103734Spraks 	 *
4113734Spraks 	 * If an event was delivered and got picked up, then
4123734Spraks 	 * we return error = 0 treating this as a successful
4133734Spraks 	 * port associate call. The thread which received
4143734Spraks 	 * the event gets control of the object.
4153734Spraks 	 */
4163734Spraks 	active = 0;
4173734Spraks 	mutex_enter(&pkevp->portkev_lock);
4183734Spraks 	if (pkevp->portkev_flags & PORT_KEV_VALID) {
4193734Spraks 		pkevp->portkev_flags &= ~PORT_KEV_VALID;
4203734Spraks 		active = 1;
4213734Spraks 	}
4223757Spraks 	mutex_exit(&pkevp->portkev_lock);
4233734Spraks 
4243734Spraks 	if (!port_remove_fd_object(pfd, pp, pcp) && !active) {
4253734Spraks 		error = 0;
4263734Spraks 	}
4273734Spraks 	releasef(fd);
4283734Spraks 	mutex_exit(&pcp->pc_lock);
4293734Spraks 	return (error);
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate /*
4330Sstevel@tonic-gate  * The port_dissociate_fd() function dissociates the delivered file
4340Sstevel@tonic-gate  * descriptor from the event port and removes already fired events.
4350Sstevel@tonic-gate  * If a fd is shared between processes, all involved processes will get
4360Sstevel@tonic-gate  * the same rights related to re-association of the fd with the port and
4370Sstevel@tonic-gate  * retrieve of events from that fd.
4380Sstevel@tonic-gate  * The process which associated the fd with a port for the first time
4390Sstevel@tonic-gate  * becomes also the owner of the association. Only the owner of the
4400Sstevel@tonic-gate  * association is allowed to dissociate the fd from the port.
4410Sstevel@tonic-gate  */
4420Sstevel@tonic-gate int
port_dissociate_fd(port_t * pp,uintptr_t object)4430Sstevel@tonic-gate port_dissociate_fd(port_t *pp, uintptr_t object)
4440Sstevel@tonic-gate {
4450Sstevel@tonic-gate 	int		fd;
4460Sstevel@tonic-gate 	port_fdcache_t	*pcp;
4470Sstevel@tonic-gate 	portfd_t	*pfd;
4480Sstevel@tonic-gate 	file_t		*fp;
4493734Spraks 	int		active;
4503734Spraks 	port_kevent_t	*pkevp;
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	if (object > (uintptr_t)INT_MAX)
4530Sstevel@tonic-gate 		return (EBADFD);
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	fd = object;
4560Sstevel@tonic-gate 	pcp = pp->port_queue.portq_pcp;
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	mutex_enter(&pcp->pc_lock);
4590Sstevel@tonic-gate 	if (pcp->pc_hash == NULL) {
4600Sstevel@tonic-gate 		/* no file descriptor cache available */
4610Sstevel@tonic-gate 		mutex_exit(&pcp->pc_lock);
4623734Spraks 		return (ENOENT);
4630Sstevel@tonic-gate 	}
4640Sstevel@tonic-gate 	if ((fp = getf(fd)) == NULL) {
4650Sstevel@tonic-gate 		mutex_exit(&pcp->pc_lock);
4660Sstevel@tonic-gate 		return (EBADFD);
4670Sstevel@tonic-gate 	}
4680Sstevel@tonic-gate 	pfd = port_cache_lookup_fp(pcp, fd, fp);
4690Sstevel@tonic-gate 	if (pfd == NULL) {
4700Sstevel@tonic-gate 		releasef(fd);
4710Sstevel@tonic-gate 		mutex_exit(&pcp->pc_lock);
4723734Spraks 		return (ENOENT);
4730Sstevel@tonic-gate 	}
4740Sstevel@tonic-gate 	/* only association owner is allowed to remove the association */
4750Sstevel@tonic-gate 	if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) {
4760Sstevel@tonic-gate 		releasef(fd);
4770Sstevel@tonic-gate 		mutex_exit(&pcp->pc_lock);
4780Sstevel@tonic-gate 		return (EACCES);
4790Sstevel@tonic-gate 	}
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	/* remove port from the file descriptor interested list */
4820Sstevel@tonic-gate 	delfd_port(fd, pfd);
4830Sstevel@tonic-gate 
4843734Spraks 	/*
4853734Spraks 	 * Deactivate the association. No events get posted after
4863734Spraks 	 * this.
4873734Spraks 	 */
4883734Spraks 	pkevp = PFTOD(pfd)->pd_portev;
4893734Spraks 	mutex_enter(&pkevp->portkev_lock);
4903734Spraks 	if (pkevp->portkev_flags & PORT_KEV_VALID) {
4913734Spraks 		pkevp->portkev_flags &= ~PORT_KEV_VALID;
4923734Spraks 		active = 1;
4933734Spraks 	} else {
4943734Spraks 		active = 0;
4953734Spraks 	}
4963734Spraks 	mutex_exit(&pkevp->portkev_lock);
4970Sstevel@tonic-gate 
4983734Spraks 	/* remove polldat & port event structure */
4993734Spraks 	if (port_remove_fd_object(pfd, pp, pcp)) {
5003734Spraks 		/*
5013734Spraks 		 * An event was found and removed from the
5023734Spraks 		 * port done queue. This means the event has not yet
5033734Spraks 		 * been retrived. In this case we treat this as an active
5043734Spraks 		 * association.
5053734Spraks 		 */
5063734Spraks 		ASSERT(active == 0);
5073734Spraks 		active = 1;
5083734Spraks 	}
509*8587SPramod.Batni@Sun.COM 	releasef(fd);
5103734Spraks 	mutex_exit(&pcp->pc_lock);
5110Sstevel@tonic-gate 
5123734Spraks 	/*
5133734Spraks 	 * Return ENOENT if there was no active association.
5143734Spraks 	 */
5153734Spraks 	return ((active ? 0 : ENOENT));
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate /*
5190Sstevel@tonic-gate  * Associate event port polldat_t structure with sub-system pointer to
5200Sstevel@tonic-gate  * a polhead_t structure.
5210Sstevel@tonic-gate  */
5220Sstevel@tonic-gate static int
port_bind_pollhead(pollhead_t ** php,polldat_t * pdp,short * revents)5230Sstevel@tonic-gate port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents)
5240Sstevel@tonic-gate {
5250Sstevel@tonic-gate 	int		error;
5260Sstevel@tonic-gate 	file_t		*fp;
5270Sstevel@tonic-gate 
5280Sstevel@tonic-gate 	/* polldat_t associated with another pollhead_t pointer */
5290Sstevel@tonic-gate 	if (pdp->pd_php != NULL)
5300Sstevel@tonic-gate 		pollhead_delete(pdp->pd_php, pdp);
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	/*
5330Sstevel@tonic-gate 	 * Before pollhead_insert() pollwakeup() will not detect a polldat
5340Sstevel@tonic-gate 	 * entry in the ph_list and the event notification will disappear.
5350Sstevel@tonic-gate 	 * This happens because polldat_t is still not associated with
5360Sstevel@tonic-gate 	 * the pointer to the pollhead_t structure.
5370Sstevel@tonic-gate 	 */
5380Sstevel@tonic-gate 	pollhead_insert(*php, pdp);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/*
5410Sstevel@tonic-gate 	 * From now on event notification can be detected in pollwakeup(),
5420Sstevel@tonic-gate 	 * Use VOP_POLL() again to check the current status of the event.
5430Sstevel@tonic-gate 	 */
5440Sstevel@tonic-gate 	pdp->pd_php = *php;
5450Sstevel@tonic-gate 	fp = pdp->pd_fp;
5460Sstevel@tonic-gate 	curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache;
5475331Samw 	error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php, NULL);
5480Sstevel@tonic-gate 	curthread->t_pollcache = NULL;
5490Sstevel@tonic-gate 	return (error);
5500Sstevel@tonic-gate }
5510Sstevel@tonic-gate 
5520Sstevel@tonic-gate /*
5530Sstevel@tonic-gate  * Grow the hash table. Rehash all the elements on the hash table.
5540Sstevel@tonic-gate  */
5550Sstevel@tonic-gate static void
port_cache_grow_hashtbl(port_fdcache_t * pcp)5560Sstevel@tonic-gate port_cache_grow_hashtbl(port_fdcache_t *pcp)
5570Sstevel@tonic-gate {
5580Sstevel@tonic-gate 	portfd_t	**oldtbl;
5590Sstevel@tonic-gate 	polldat_t	*pdp;
5600Sstevel@tonic-gate 	portfd_t	*pfd;
5610Sstevel@tonic-gate 	polldat_t	*pdp1;
5620Sstevel@tonic-gate 	int		oldsize;
5630Sstevel@tonic-gate 	int		i;
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
5660Sstevel@tonic-gate 	oldsize = pcp->pc_hashsize;
5670Sstevel@tonic-gate 	oldtbl = pcp->pc_hash;
5680Sstevel@tonic-gate 	pcp->pc_hashsize *= PORTHASH_MULT;
5690Sstevel@tonic-gate 	pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *),
5700Sstevel@tonic-gate 	    KM_SLEEP);
5710Sstevel@tonic-gate 	/*
5720Sstevel@tonic-gate 	 * rehash existing elements
5730Sstevel@tonic-gate 	 */
5740Sstevel@tonic-gate 	pcp->pc_fdcount = 0;
5750Sstevel@tonic-gate 	for (i = 0; i < oldsize; i++) {
5760Sstevel@tonic-gate 		pfd = oldtbl[i];
5770Sstevel@tonic-gate 		pdp = PFTOD(pfd);
5780Sstevel@tonic-gate 		while (pdp != NULL) {
5790Sstevel@tonic-gate 			pdp1 = pdp->pd_hashnext;
5800Sstevel@tonic-gate 			port_cache_insert_fd(pcp, pdp);
5810Sstevel@tonic-gate 			pdp = pdp1;
5820Sstevel@tonic-gate 		}
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 	kmem_free(oldtbl, oldsize * sizeof (portfd_t *));
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate /*
5870Sstevel@tonic-gate  * This routine inserts a polldat into the portcache's hash table. It
5880Sstevel@tonic-gate  * may be necessary to grow the size of the hash table.
5890Sstevel@tonic-gate  */
5900Sstevel@tonic-gate static void
port_cache_insert_fd(port_fdcache_t * pcp,polldat_t * pdp)5910Sstevel@tonic-gate port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate 	portfd_t	**bucket;
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
5960Sstevel@tonic-gate 	if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT))
5970Sstevel@tonic-gate 		port_cache_grow_hashtbl(pcp);
5980Sstevel@tonic-gate 	bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
5990Sstevel@tonic-gate 	pdp->pd_hashnext = PFTOD(*bucket);
6000Sstevel@tonic-gate 	*bucket = PDTOF(pdp);
6010Sstevel@tonic-gate 	pcp->pc_fdcount++;
6020Sstevel@tonic-gate }
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate /*
6060Sstevel@tonic-gate  * The port_remove_portfd() function dissociates the port from the fd
6070Sstevel@tonic-gate  * and vive versa.
6080Sstevel@tonic-gate  */
6090Sstevel@tonic-gate static void
port_remove_portfd(polldat_t * pdp,port_fdcache_t * pcp)6100Sstevel@tonic-gate port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp)
6110Sstevel@tonic-gate {
6120Sstevel@tonic-gate 	port_t	*pp;
6130Sstevel@tonic-gate 	file_t	*fp;
614*8587SPramod.Batni@Sun.COM 	int	fd;
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pcp->pc_lock));
6170Sstevel@tonic-gate 	pp = pdp->pd_portev->portkev_port;
618*8587SPramod.Batni@Sun.COM 	fp = getf(fd = pdp->pd_fd);
6191425Spraks 	/*
6201425Spraks 	 * If we did not get the fp for pd_fd but its portfd_t
6211425Spraks 	 * still exist in the cache, it means the pd_fd is being
6221425Spraks 	 * closed by some other thread which will also free the portfd_t.
6231425Spraks 	 */
6241425Spraks 	if (fp != NULL) {
6251425Spraks 		delfd_port(pdp->pd_fd, PDTOF(pdp));
6263734Spraks 		(void) port_remove_fd_object(PDTOF(pdp), pp, pcp);
627*8587SPramod.Batni@Sun.COM 		releasef(fd);
6281425Spraks 	}
6290Sstevel@tonic-gate }
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate /*
6320Sstevel@tonic-gate  * This function is used by port_close_sourcefd() to destroy the cache
6330Sstevel@tonic-gate  * on last close.
6340Sstevel@tonic-gate  */
6350Sstevel@tonic-gate static void
port_pcache_destroy(port_fdcache_t * pcp)6360Sstevel@tonic-gate port_pcache_destroy(port_fdcache_t *pcp)
6370Sstevel@tonic-gate {
6380Sstevel@tonic-gate 	ASSERT(pcp->pc_fdcount == 0);
6390Sstevel@tonic-gate 	kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
6400Sstevel@tonic-gate 	mutex_destroy(&pcp->pc_lock);
6410Sstevel@tonic-gate 	kmem_free(pcp, sizeof (port_fdcache_t));
6420Sstevel@tonic-gate }
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate /*
6450Sstevel@tonic-gate  * port_close() calls this function to request the PORT_SOURCE_FD source
6460Sstevel@tonic-gate  * to remove/free all resources allocated and associated with the port.
6470Sstevel@tonic-gate  */
6480Sstevel@tonic-gate /* ARGSUSED */
6490Sstevel@tonic-gate static void
port_close_sourcefd(void * arg,int port,pid_t pid,int lastclose)6500Sstevel@tonic-gate port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose)
6510Sstevel@tonic-gate {
6520Sstevel@tonic-gate 	port_t		*pp = arg;
6530Sstevel@tonic-gate 	port_fdcache_t	*pcp;
6540Sstevel@tonic-gate 	portfd_t	**hashtbl;
6550Sstevel@tonic-gate 	polldat_t	*pdp;
6560Sstevel@tonic-gate 	polldat_t	*pdpnext;
6570Sstevel@tonic-gate 	int		index;
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 	pcp = pp->port_queue.portq_pcp;
6600Sstevel@tonic-gate 	if (pcp == NULL)
6610Sstevel@tonic-gate 		/* no cache available -> nothing to do */
6620Sstevel@tonic-gate 		return;
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 	mutex_enter(&pcp->pc_lock);
6650Sstevel@tonic-gate 	/*
6660Sstevel@tonic-gate 	 * Scan the cache and free all allocated portfd_t and port_kevent_t
6670Sstevel@tonic-gate 	 * structures.
6680Sstevel@tonic-gate 	 */
6690Sstevel@tonic-gate 	hashtbl = pcp->pc_hash;
6700Sstevel@tonic-gate 	for (index = 0; index < pcp->pc_hashsize; index++) {
6710Sstevel@tonic-gate 		for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) {
6720Sstevel@tonic-gate 			pdpnext = pdp->pd_hashnext;
6730Sstevel@tonic-gate 			if (pid == pdp->pd_portev->portkev_pid) {
6740Sstevel@tonic-gate 				/*
6750Sstevel@tonic-gate 				 * remove polldat + port_event_t from cache
6760Sstevel@tonic-gate 				 * only when current process did the
6770Sstevel@tonic-gate 				 * association.
6780Sstevel@tonic-gate 				 */
6790Sstevel@tonic-gate 				port_remove_portfd(pdp, pcp);
6800Sstevel@tonic-gate 			}
6810Sstevel@tonic-gate 		}
6820Sstevel@tonic-gate 	}
6831425Spraks 	if (lastclose) {
6841425Spraks 		/*
6851425Spraks 		 * Wait for all the portfd's to be freed.
6861425Spraks 		 * The remaining portfd_t's are the once we did not
6871425Spraks 		 * free in port_remove_portfd since some other thread
6881425Spraks 		 * is closing the fd. These threads will free the portfd_t's
6891425Spraks 		 * once we drop the pc_lock mutex.
6901425Spraks 		 */
6911425Spraks 		while (pcp->pc_fdcount) {
6921425Spraks 			(void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock);
6931425Spraks 		}
6941425Spraks 		/* event port vnode will be destroyed -> remove everything */
6951425Spraks 		pp->port_queue.portq_pcp = NULL;
6961425Spraks 	}
6970Sstevel@tonic-gate 	mutex_exit(&pcp->pc_lock);
6980Sstevel@tonic-gate 	/*
6990Sstevel@tonic-gate 	 * last close:
7000Sstevel@tonic-gate 	 * pollwakeup() can not further interact with this cache
7010Sstevel@tonic-gate 	 * (all polldat structs are removed from pollhead entries).
7020Sstevel@tonic-gate 	 */
7030Sstevel@tonic-gate 	if (lastclose)
7040Sstevel@tonic-gate 		port_pcache_destroy(pcp);
7050Sstevel@tonic-gate }
706