xref: /minix3/minix/lib/libsockevent/sockevent.c (revision 79a488aa87e1810dbafce4a8d392cd741cfb4f2f)
14c27a833SDavid van Moolenbroek /* Socket event dispatching library - by D.C. van Moolenbroek */
24c27a833SDavid van Moolenbroek 
34c27a833SDavid van Moolenbroek #include <minix/drivers.h>
44c27a833SDavid van Moolenbroek #include <minix/sockdriver.h>
54c27a833SDavid van Moolenbroek #include <minix/sockevent.h>
64c27a833SDavid van Moolenbroek #include <sys/ioctl.h>
74c27a833SDavid van Moolenbroek 
84c27a833SDavid van Moolenbroek #include "sockevent_proc.h"
94c27a833SDavid van Moolenbroek 
104c27a833SDavid van Moolenbroek #define US		1000000UL	/* microseconds per second */
114c27a833SDavid van Moolenbroek 
124c27a833SDavid van Moolenbroek #define SOCKHASH_SLOTS	256		/* # slots in ID-to-sock hash table */
134c27a833SDavid van Moolenbroek 
144c27a833SDavid van Moolenbroek static SLIST_HEAD(, sock) sockhash[SOCKHASH_SLOTS];
154c27a833SDavid van Moolenbroek 
164c27a833SDavid van Moolenbroek static SLIST_HEAD(, sock) socktimer;
174c27a833SDavid van Moolenbroek 
184c27a833SDavid van Moolenbroek static minix_timer_t sockevent_timer;
194c27a833SDavid van Moolenbroek 
204c27a833SDavid van Moolenbroek static SIMPLEQ_HEAD(, sock) sockevent_pending;
214c27a833SDavid van Moolenbroek 
224c27a833SDavid van Moolenbroek static sockevent_socket_cb_t sockevent_socket_cb = NULL;
234c27a833SDavid van Moolenbroek 
244c27a833SDavid van Moolenbroek static int sockevent_working;
254c27a833SDavid van Moolenbroek 
264c27a833SDavid van Moolenbroek static void socktimer_del(struct sock * sock);
274c27a833SDavid van Moolenbroek static void sockevent_cancel_send(struct sock * sock,
284c27a833SDavid van Moolenbroek 	struct sockevent_proc * spr, int err);
294c27a833SDavid van Moolenbroek static void sockevent_cancel_recv(struct sock * sock,
304c27a833SDavid van Moolenbroek 	struct sockevent_proc * spr, int err);
314c27a833SDavid van Moolenbroek 
324c27a833SDavid van Moolenbroek /*
334c27a833SDavid van Moolenbroek  * Initialize the hash table of sock objects.
344c27a833SDavid van Moolenbroek  */
354c27a833SDavid van Moolenbroek static void
sockhash_init(void)364c27a833SDavid van Moolenbroek sockhash_init(void)
374c27a833SDavid van Moolenbroek {
384c27a833SDavid van Moolenbroek 	unsigned int slot;
394c27a833SDavid van Moolenbroek 
404c27a833SDavid van Moolenbroek 	for (slot = 0; slot < __arraycount(sockhash); slot++)
414c27a833SDavid van Moolenbroek 		SLIST_INIT(&sockhash[slot]);
424c27a833SDavid van Moolenbroek }
434c27a833SDavid van Moolenbroek 
444c27a833SDavid van Moolenbroek /*
454c27a833SDavid van Moolenbroek  * Given a socket identifier, return a hash table slot number.
464c27a833SDavid van Moolenbroek  */
474c27a833SDavid van Moolenbroek static unsigned int
sockhash_slot(sockid_t id)484c27a833SDavid van Moolenbroek sockhash_slot(sockid_t id)
494c27a833SDavid van Moolenbroek {
504c27a833SDavid van Moolenbroek 
514c27a833SDavid van Moolenbroek 	/*
524c27a833SDavid van Moolenbroek 	 * The idea of the shift is that a socket driver may offer multiple
534c27a833SDavid van Moolenbroek 	 * classes of sockets, and put the class in the higher bits.  The shift
544c27a833SDavid van Moolenbroek 	 * aims to prevent that all classes' first sockets end up in the same
554c27a833SDavid van Moolenbroek 	 * hash slot.
564c27a833SDavid van Moolenbroek 	 */
574c27a833SDavid van Moolenbroek 	return (id + (id >> 16)) % SOCKHASH_SLOTS;
584c27a833SDavid van Moolenbroek }
594c27a833SDavid van Moolenbroek 
604c27a833SDavid van Moolenbroek /*
614c27a833SDavid van Moolenbroek  * Obtain a sock object from the hash table using its unique identifier.
624c27a833SDavid van Moolenbroek  * Return a pointer to the object if found, or NULL otherwise.
634c27a833SDavid van Moolenbroek  */
644c27a833SDavid van Moolenbroek static struct sock *
sockhash_get(sockid_t id)654c27a833SDavid van Moolenbroek sockhash_get(sockid_t id)
664c27a833SDavid van Moolenbroek {
674c27a833SDavid van Moolenbroek 	struct sock *sock;
684c27a833SDavid van Moolenbroek 	unsigned int slot;
694c27a833SDavid van Moolenbroek 
704c27a833SDavid van Moolenbroek 	slot = sockhash_slot(id);
714c27a833SDavid van Moolenbroek 
724c27a833SDavid van Moolenbroek 	SLIST_FOREACH(sock, &sockhash[slot], sock_hash) {
734c27a833SDavid van Moolenbroek 		if (sock->sock_id == id)
744c27a833SDavid van Moolenbroek 			return sock;
754c27a833SDavid van Moolenbroek 	}
764c27a833SDavid van Moolenbroek 
774c27a833SDavid van Moolenbroek 	return NULL;
784c27a833SDavid van Moolenbroek }
794c27a833SDavid van Moolenbroek 
804c27a833SDavid van Moolenbroek /*
814c27a833SDavid van Moolenbroek  * Add a sock object to the hash table.  The sock object must have a valid ID
824c27a833SDavid van Moolenbroek  * in its 'sock_id' field, and must not be in the hash table already.
834c27a833SDavid van Moolenbroek  */
844c27a833SDavid van Moolenbroek static void
sockhash_add(struct sock * sock)854c27a833SDavid van Moolenbroek sockhash_add(struct sock * sock)
864c27a833SDavid van Moolenbroek {
874c27a833SDavid van Moolenbroek 	unsigned int slot;
884c27a833SDavid van Moolenbroek 
894c27a833SDavid van Moolenbroek 	slot = sockhash_slot(sock->sock_id);
904c27a833SDavid van Moolenbroek 
914c27a833SDavid van Moolenbroek 	SLIST_INSERT_HEAD(&sockhash[slot], sock, sock_hash);
924c27a833SDavid van Moolenbroek }
934c27a833SDavid van Moolenbroek 
944c27a833SDavid van Moolenbroek /*
954c27a833SDavid van Moolenbroek  * Remove a sock object from the hash table.  The sock object must be in the
964c27a833SDavid van Moolenbroek  * hash table.
974c27a833SDavid van Moolenbroek  */
984c27a833SDavid van Moolenbroek static void
sockhash_del(struct sock * sock)994c27a833SDavid van Moolenbroek sockhash_del(struct sock * sock)
1004c27a833SDavid van Moolenbroek {
1014c27a833SDavid van Moolenbroek 	unsigned int slot;
1024c27a833SDavid van Moolenbroek 
1034c27a833SDavid van Moolenbroek 	slot = sockhash_slot(sock->sock_id);
1044c27a833SDavid van Moolenbroek 
1054c27a833SDavid van Moolenbroek 	/* This macro is O(n). */
1064c27a833SDavid van Moolenbroek 	SLIST_REMOVE(&sockhash[slot], sock, sock, sock_hash);
1074c27a833SDavid van Moolenbroek }
1084c27a833SDavid van Moolenbroek 
1094c27a833SDavid van Moolenbroek /*
1104c27a833SDavid van Moolenbroek  * Reset a socket object to a proper initial state, with a particular socket
1114c27a833SDavid van Moolenbroek  * identifier, a SOCK_ type, and a socket operations table.  The socket is
1124c27a833SDavid van Moolenbroek  * added to the ID-to-object hash table.  This function always succeeds.
1134c27a833SDavid van Moolenbroek  */
1144c27a833SDavid van Moolenbroek static void
sockevent_reset(struct sock * sock,sockid_t id,int domain,int type,const struct sockevent_ops * ops)1154c27a833SDavid van Moolenbroek sockevent_reset(struct sock * sock, sockid_t id, int domain, int type,
1164c27a833SDavid van Moolenbroek 	const struct sockevent_ops * ops)
1174c27a833SDavid van Moolenbroek {
1184c27a833SDavid van Moolenbroek 
1194c27a833SDavid van Moolenbroek 	assert(sock != NULL);
1204c27a833SDavid van Moolenbroek 
1214c27a833SDavid van Moolenbroek 	memset(sock, 0, sizeof(*sock));
1224c27a833SDavid van Moolenbroek 
1234c27a833SDavid van Moolenbroek 	sock->sock_id = id;
1244c27a833SDavid van Moolenbroek 	sock->sock_domain = domain;
1254c27a833SDavid van Moolenbroek 	sock->sock_type = type;
1264c27a833SDavid van Moolenbroek 
1274c27a833SDavid van Moolenbroek 	sock->sock_slowat = 1;
1284c27a833SDavid van Moolenbroek 	sock->sock_rlowat = 1;
1294c27a833SDavid van Moolenbroek 
1304c27a833SDavid van Moolenbroek 	sock->sock_ops = ops;
1314c27a833SDavid van Moolenbroek 	sock->sock_proc = NULL;
1324c27a833SDavid van Moolenbroek 	sock->sock_select.ss_endpt = NONE;
1334c27a833SDavid van Moolenbroek 
1344c27a833SDavid van Moolenbroek 	sockhash_add(sock);
1354c27a833SDavid van Moolenbroek }
1364c27a833SDavid van Moolenbroek 
1374c27a833SDavid van Moolenbroek /*
1384c27a833SDavid van Moolenbroek  * Initialize a new socket that will serve as an accepted socket on the given
1394c27a833SDavid van Moolenbroek  * listening socket 'sock'.  The new socket is given as 'newsock', and its new
1404c27a833SDavid van Moolenbroek  * socket identifier is given as 'newid'.  This function always succeeds.
1414c27a833SDavid van Moolenbroek  */
1424c27a833SDavid van Moolenbroek void
sockevent_clone(struct sock * sock,struct sock * newsock,sockid_t newid)1434c27a833SDavid van Moolenbroek sockevent_clone(struct sock * sock, struct sock * newsock, sockid_t newid)
1444c27a833SDavid van Moolenbroek {
1454c27a833SDavid van Moolenbroek 
1464c27a833SDavid van Moolenbroek 	sockevent_reset(newsock, newid, (int)sock->sock_domain,
1474c27a833SDavid van Moolenbroek 	    sock->sock_type, sock->sock_ops);
1484c27a833SDavid van Moolenbroek 
1494c27a833SDavid van Moolenbroek 	/* These are the settings that are currently inherited. */
1504c27a833SDavid van Moolenbroek 	newsock->sock_opt = sock->sock_opt & ~SO_ACCEPTCONN;
1514c27a833SDavid van Moolenbroek 	newsock->sock_linger = sock->sock_linger;
1524c27a833SDavid van Moolenbroek 	newsock->sock_stimeo = sock->sock_stimeo;
1534c27a833SDavid van Moolenbroek 	newsock->sock_rtimeo = sock->sock_rtimeo;
1544c27a833SDavid van Moolenbroek 	newsock->sock_slowat = sock->sock_slowat;
1554c27a833SDavid van Moolenbroek 	newsock->sock_rlowat = sock->sock_rlowat;
1564c27a833SDavid van Moolenbroek 
1574c27a833SDavid van Moolenbroek 	newsock->sock_flags |= SFL_CLONED;
1584c27a833SDavid van Moolenbroek }
1594c27a833SDavid van Moolenbroek 
1604c27a833SDavid van Moolenbroek /*
1614c27a833SDavid van Moolenbroek  * A new socket has just been accepted.  The corresponding listening socket is
1624c27a833SDavid van Moolenbroek  * given as 'sock'.  The new socket has ID 'newid', and if it had not already
1634c27a833SDavid van Moolenbroek  * been added to the hash table through sockevent_clone() before, 'newsock' is
1644c27a833SDavid van Moolenbroek  * a non-NULL pointer which identifies the socket object to clone into.
1654c27a833SDavid van Moolenbroek  */
1664c27a833SDavid van Moolenbroek static void
sockevent_accepted(struct sock * sock,struct sock * newsock,sockid_t newid)1674c27a833SDavid van Moolenbroek sockevent_accepted(struct sock * sock, struct sock * newsock, sockid_t newid)
1684c27a833SDavid van Moolenbroek {
1694c27a833SDavid van Moolenbroek 
1704c27a833SDavid van Moolenbroek 	if (newsock == NULL) {
1714c27a833SDavid van Moolenbroek 		if ((newsock = sockhash_get(newid)) == NULL)
1724c27a833SDavid van Moolenbroek 			panic("libsockdriver: socket driver returned unknown "
1734c27a833SDavid van Moolenbroek 			    "ID %d from accept callback", newid);
1744c27a833SDavid van Moolenbroek 	} else
1754c27a833SDavid van Moolenbroek 		sockevent_clone(sock, newsock, newid);
1764c27a833SDavid van Moolenbroek 
1774c27a833SDavid van Moolenbroek 	assert(newsock->sock_flags & SFL_CLONED);
1784c27a833SDavid van Moolenbroek 	newsock->sock_flags &= ~SFL_CLONED;
1794c27a833SDavid van Moolenbroek }
1804c27a833SDavid van Moolenbroek 
1814c27a833SDavid van Moolenbroek /*
1824c27a833SDavid van Moolenbroek  * Allocate a sock object, by asking the socket driver for one.  On success,
1834c27a833SDavid van Moolenbroek  * return OK, with a pointer to the new object stored in 'sockp'.  This new
1844c27a833SDavid van Moolenbroek  * object has all its fields set to initial values, in part based on the given
1854c27a833SDavid van Moolenbroek  * parameters.  On failure, return an error code.  Failure has two typical
1864c27a833SDavid van Moolenbroek  * cause: either the given domain, type, protocol combination is not supported,
1874c27a833SDavid van Moolenbroek  * or the socket driver is out of sockets (globally or for this combination).
1884c27a833SDavid van Moolenbroek  */
1894c27a833SDavid van Moolenbroek static int
sockevent_alloc(int domain,int type,int protocol,endpoint_t user_endpt,struct sock ** sockp)1904c27a833SDavid van Moolenbroek sockevent_alloc(int domain, int type, int protocol, endpoint_t user_endpt,
1914c27a833SDavid van Moolenbroek 	struct sock ** sockp)
1924c27a833SDavid van Moolenbroek {
1934c27a833SDavid van Moolenbroek 	struct sock *sock;
1944c27a833SDavid van Moolenbroek 	const struct sockevent_ops *ops;
1954c27a833SDavid van Moolenbroek 	sockid_t r;
1964c27a833SDavid van Moolenbroek 
1974c27a833SDavid van Moolenbroek 	/*
1984c27a833SDavid van Moolenbroek 	 * Verify that the given domain is sane.  Unlike the type and protocol,
1994c27a833SDavid van Moolenbroek 	 * the domain is already verified by VFS, so we do not limit ourselves
2004c27a833SDavid van Moolenbroek 	 * here.  The result is that we can store the domain in just a byte.
2014c27a833SDavid van Moolenbroek 	 */
2024c27a833SDavid van Moolenbroek 	if (domain < 0 || domain > UINT8_MAX)
2034c27a833SDavid van Moolenbroek 		return EAFNOSUPPORT;
2044c27a833SDavid van Moolenbroek 
2054c27a833SDavid van Moolenbroek 	/* Make sure that the library has actually been initialized. */
2064c27a833SDavid van Moolenbroek 	if (sockevent_socket_cb == NULL)
2074c27a833SDavid van Moolenbroek 		panic("libsockevent: not initialized");
2084c27a833SDavid van Moolenbroek 
2094c27a833SDavid van Moolenbroek 	sock = NULL;
2104c27a833SDavid van Moolenbroek 	ops = NULL;
2114c27a833SDavid van Moolenbroek 
2124c27a833SDavid van Moolenbroek 	/*
2134c27a833SDavid van Moolenbroek 	 * Ask the socket driver to create a socket for the given combination
2144c27a833SDavid van Moolenbroek 	 * of domain, type, and protocol.  If so, let it return a new sock
2154c27a833SDavid van Moolenbroek 	 * object, a unique socket identifier for that object, and an
2164c27a833SDavid van Moolenbroek 	 * operations table for it.
2174c27a833SDavid van Moolenbroek 	 */
2184c27a833SDavid van Moolenbroek 	if ((r = sockevent_socket_cb(domain, type, protocol, user_endpt, &sock,
2194c27a833SDavid van Moolenbroek 	    &ops)) < 0)
2204c27a833SDavid van Moolenbroek 		return r;
2214c27a833SDavid van Moolenbroek 
2224c27a833SDavid van Moolenbroek 	assert(sock != NULL);
2234c27a833SDavid van Moolenbroek 	assert(ops != NULL);
2244c27a833SDavid van Moolenbroek 
2254c27a833SDavid van Moolenbroek 	sockevent_reset(sock, r, domain, type, ops);
2264c27a833SDavid van Moolenbroek 
2274c27a833SDavid van Moolenbroek 	*sockp = sock;
2284c27a833SDavid van Moolenbroek 	return OK;
2294c27a833SDavid van Moolenbroek }
2304c27a833SDavid van Moolenbroek 
2314c27a833SDavid van Moolenbroek /*
2324c27a833SDavid van Moolenbroek  * Free a previously allocated sock object.
2334c27a833SDavid van Moolenbroek  */
2344c27a833SDavid van Moolenbroek static void
sockevent_free(struct sock * sock)2354c27a833SDavid van Moolenbroek sockevent_free(struct sock * sock)
2364c27a833SDavid van Moolenbroek {
2374c27a833SDavid van Moolenbroek 	const struct sockevent_ops *ops;
2384c27a833SDavid van Moolenbroek 
2394c27a833SDavid van Moolenbroek 	assert(sock->sock_proc == NULL);
2404c27a833SDavid van Moolenbroek 
2414c27a833SDavid van Moolenbroek 	socktimer_del(sock);
2424c27a833SDavid van Moolenbroek 
2434c27a833SDavid van Moolenbroek 	sockhash_del(sock);
2444c27a833SDavid van Moolenbroek 
2454c27a833SDavid van Moolenbroek 	/*
2464c27a833SDavid van Moolenbroek 	 * Invalidate the operations table on the socket, before freeing the
2474c27a833SDavid van Moolenbroek 	 * socket.  This allows us to detect cases where sockevent functions
2484c27a833SDavid van Moolenbroek 	 * are called on sockets that have already been freed.
2494c27a833SDavid van Moolenbroek 	 */
2504c27a833SDavid van Moolenbroek 	ops = sock->sock_ops;
2514c27a833SDavid van Moolenbroek 	sock->sock_ops = NULL;
2524c27a833SDavid van Moolenbroek 
2534c27a833SDavid van Moolenbroek 	assert(ops != NULL);
2544c27a833SDavid van Moolenbroek 	assert(ops->sop_free != NULL);
2554c27a833SDavid van Moolenbroek 
2564c27a833SDavid van Moolenbroek 	ops->sop_free(sock);
2574c27a833SDavid van Moolenbroek }
2584c27a833SDavid van Moolenbroek 
2594c27a833SDavid van Moolenbroek /*
2604c27a833SDavid van Moolenbroek  * Create a new socket.
2614c27a833SDavid van Moolenbroek  */
2624c27a833SDavid van Moolenbroek static sockid_t
sockevent_socket(int domain,int type,int protocol,endpoint_t user_endpt)2634c27a833SDavid van Moolenbroek sockevent_socket(int domain, int type, int protocol, endpoint_t user_endpt)
2644c27a833SDavid van Moolenbroek {
2654c27a833SDavid van Moolenbroek 	struct sock *sock;
2664c27a833SDavid van Moolenbroek 	int r;
2674c27a833SDavid van Moolenbroek 
2684c27a833SDavid van Moolenbroek 	if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
2694c27a833SDavid van Moolenbroek 	    &sock)) != OK)
2704c27a833SDavid van Moolenbroek 		return r;
2714c27a833SDavid van Moolenbroek 
2724c27a833SDavid van Moolenbroek 	return sock->sock_id;
2734c27a833SDavid van Moolenbroek }
2744c27a833SDavid van Moolenbroek 
2754c27a833SDavid van Moolenbroek /*
2764c27a833SDavid van Moolenbroek  * Create a pair of connected sockets.
2774c27a833SDavid van Moolenbroek  */
2784c27a833SDavid van Moolenbroek static int
sockevent_socketpair(int domain,int type,int protocol,endpoint_t user_endpt,sockid_t id[2])2794c27a833SDavid van Moolenbroek sockevent_socketpair(int domain, int type, int protocol, endpoint_t user_endpt,
2804c27a833SDavid van Moolenbroek 	sockid_t id[2])
2814c27a833SDavid van Moolenbroek {
2824c27a833SDavid van Moolenbroek 	struct sock *sock1, *sock2;
2834c27a833SDavid van Moolenbroek 	int r;
2844c27a833SDavid van Moolenbroek 
2854c27a833SDavid van Moolenbroek 	if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
2864c27a833SDavid van Moolenbroek 	    &sock1)) != OK)
2874c27a833SDavid van Moolenbroek 		return r;
2884c27a833SDavid van Moolenbroek 
2894c27a833SDavid van Moolenbroek 	/* Creating socket pairs is not always supported. */
2904c27a833SDavid van Moolenbroek 	if (sock1->sock_ops->sop_pair == NULL) {
2914c27a833SDavid van Moolenbroek 		sockevent_free(sock1);
2924c27a833SDavid van Moolenbroek 
2934c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
2944c27a833SDavid van Moolenbroek 	}
2954c27a833SDavid van Moolenbroek 
2964c27a833SDavid van Moolenbroek 	if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
2974c27a833SDavid van Moolenbroek 	    &sock2)) != OK) {
2984c27a833SDavid van Moolenbroek 		sockevent_free(sock1);
2994c27a833SDavid van Moolenbroek 
3004c27a833SDavid van Moolenbroek 		return r;
3014c27a833SDavid van Moolenbroek 	}
3024c27a833SDavid van Moolenbroek 
3034c27a833SDavid van Moolenbroek 	assert(sock1->sock_ops == sock2->sock_ops);
3044c27a833SDavid van Moolenbroek 
3054c27a833SDavid van Moolenbroek 	r = sock1->sock_ops->sop_pair(sock1, sock2, user_endpt);
3064c27a833SDavid van Moolenbroek 
3074c27a833SDavid van Moolenbroek 	if (r != OK) {
3084c27a833SDavid van Moolenbroek 		sockevent_free(sock2);
3094c27a833SDavid van Moolenbroek 		sockevent_free(sock1);
3104c27a833SDavid van Moolenbroek 
3114c27a833SDavid van Moolenbroek 		return r;
3124c27a833SDavid van Moolenbroek 	}
3134c27a833SDavid van Moolenbroek 
3144c27a833SDavid van Moolenbroek 	id[0] = sock1->sock_id;
3154c27a833SDavid van Moolenbroek 	id[1] = sock2->sock_id;
3164c27a833SDavid van Moolenbroek 	return OK;
3174c27a833SDavid van Moolenbroek }
3184c27a833SDavid van Moolenbroek 
3194c27a833SDavid van Moolenbroek /*
3204c27a833SDavid van Moolenbroek  * A send request returned EPIPE.  If desired, send a SIGPIPE signal to the
3214c27a833SDavid van Moolenbroek  * user process that issued the request.
3224c27a833SDavid van Moolenbroek  */
3234c27a833SDavid van Moolenbroek static void
sockevent_sigpipe(struct sock * sock,endpoint_t user_endpt,int flags)3244c27a833SDavid van Moolenbroek sockevent_sigpipe(struct sock * sock, endpoint_t user_endpt, int flags)
3254c27a833SDavid van Moolenbroek {
3264c27a833SDavid van Moolenbroek 
3274c27a833SDavid van Moolenbroek 	/*
3284c27a833SDavid van Moolenbroek 	 * POSIX says that pipe signals should be generated for SOCK_STREAM
3294c27a833SDavid van Moolenbroek 	 * sockets.  Linux does just this, NetBSD raises signals for all socket
3304c27a833SDavid van Moolenbroek 	 * types.
3314c27a833SDavid van Moolenbroek 	 */
3324c27a833SDavid van Moolenbroek 	if (sock->sock_type != SOCK_STREAM)
3334c27a833SDavid van Moolenbroek 		return;
3344c27a833SDavid van Moolenbroek 
3354c27a833SDavid van Moolenbroek 	/*
3364c27a833SDavid van Moolenbroek 	 * Why would there be fewer than four ways to do the same thing?
3374c27a833SDavid van Moolenbroek 	 * O_NOSIGPIPE, MSG_NOSIGNAL, SO_NOSIGPIPE, and of course blocking
3384c27a833SDavid van Moolenbroek 	 * SIGPIPE.  VFS already sets MSG_NOSIGNAL for calls on sockets with
3394c27a833SDavid van Moolenbroek 	 * O_NOSIGPIPE.  The fact that SO_NOSIGPIPE is a thing, is also the
3404c27a833SDavid van Moolenbroek 	 * reason why we cannot let VFS handle signal generation altogether.
3414c27a833SDavid van Moolenbroek 	 */
3424c27a833SDavid van Moolenbroek 	if (flags & MSG_NOSIGNAL)
3434c27a833SDavid van Moolenbroek 		return;
3444c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_NOSIGPIPE)
3454c27a833SDavid van Moolenbroek 		return;
3464c27a833SDavid van Moolenbroek 
3474c27a833SDavid van Moolenbroek 	/*
3484c27a833SDavid van Moolenbroek 	 * Send a SIGPIPE signal to the user process.  Unfortunately we cannot
3494c27a833SDavid van Moolenbroek 	 * guarantee that the SIGPIPE reaches the user process before the send
3504c27a833SDavid van Moolenbroek 	 * call returns.  Usually, the scheduling priorities of system services
3514c27a833SDavid van Moolenbroek 	 * are such that the signal is likely to arrive first anyway, but if
3524c27a833SDavid van Moolenbroek 	 * timely arrival of the signal is required, a more fundamental change
3534c27a833SDavid van Moolenbroek 	 * to the system would be needed.
3544c27a833SDavid van Moolenbroek 	 */
3554c27a833SDavid van Moolenbroek 	sys_kill(user_endpt, SIGPIPE);
3564c27a833SDavid van Moolenbroek }
3574c27a833SDavid van Moolenbroek 
3584c27a833SDavid van Moolenbroek /*
3594c27a833SDavid van Moolenbroek  * Suspend a request without data, that is, a bind, connect, accept, or close
3604c27a833SDavid van Moolenbroek  * request.
3614c27a833SDavid van Moolenbroek  */
3624c27a833SDavid van Moolenbroek static void
sockevent_suspend(struct sock * sock,unsigned int event,const struct sockdriver_call * __restrict call,endpoint_t user_endpt)3634c27a833SDavid van Moolenbroek sockevent_suspend(struct sock * sock, unsigned int event,
3644c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call, endpoint_t user_endpt)
3654c27a833SDavid van Moolenbroek {
3664c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr, **sprp;
3674c27a833SDavid van Moolenbroek 
3684c27a833SDavid van Moolenbroek 	/* There is one slot for each process, so this should never fail. */
3694c27a833SDavid van Moolenbroek 	if ((spr = sockevent_proc_alloc()) == NULL)
3704c27a833SDavid van Moolenbroek 		panic("libsockevent: too many suspended processes");
3714c27a833SDavid van Moolenbroek 
3724c27a833SDavid van Moolenbroek 	spr->spr_next = NULL;
3734c27a833SDavid van Moolenbroek 	spr->spr_event = event;
3744c27a833SDavid van Moolenbroek 	spr->spr_timer = FALSE;
3754c27a833SDavid van Moolenbroek 	spr->spr_call = *call;
3764c27a833SDavid van Moolenbroek 	spr->spr_endpt = user_endpt;
3774c27a833SDavid van Moolenbroek 
3784c27a833SDavid van Moolenbroek 	/*
3794c27a833SDavid van Moolenbroek 	 * Add the request to the tail of the queue.  This operation is O(n),
3804c27a833SDavid van Moolenbroek 	 * but the number of suspended requests per socket is expected to be
3814c27a833SDavid van Moolenbroek 	 * low at all times.
3824c27a833SDavid van Moolenbroek 	 */
3834c27a833SDavid van Moolenbroek 	for (sprp = &sock->sock_proc; *sprp != NULL;
3844c27a833SDavid van Moolenbroek 	     sprp = &(*sprp)->spr_next);
3854c27a833SDavid van Moolenbroek 	*sprp = spr;
3864c27a833SDavid van Moolenbroek }
3874c27a833SDavid van Moolenbroek 
3884c27a833SDavid van Moolenbroek /*
3894c27a833SDavid van Moolenbroek  * Suspend a request with data, that is, a send or receive request.
3904c27a833SDavid van Moolenbroek  */
3914c27a833SDavid van Moolenbroek static void
sockevent_suspend_data(struct sock * sock,unsigned int event,int timer,const struct sockdriver_call * __restrict call,endpoint_t user_endpt,const struct sockdriver_data * __restrict data,size_t len,size_t off,const struct sockdriver_data * __restrict ctl,socklen_t ctl_len,socklen_t ctl_off,int flags,int rflags,clock_t time)3924c27a833SDavid van Moolenbroek sockevent_suspend_data(struct sock * sock, unsigned int event, int timer,
3934c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call, endpoint_t user_endpt,
3944c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict data, size_t len, size_t off,
3954c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict ctl, socklen_t ctl_len,
3964c27a833SDavid van Moolenbroek 	socklen_t ctl_off, int flags, int rflags, clock_t time)
3974c27a833SDavid van Moolenbroek {
3984c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr, **sprp;
3994c27a833SDavid van Moolenbroek 
4004c27a833SDavid van Moolenbroek 	/* There is one slot for each process, so this should never fail. */
4014c27a833SDavid van Moolenbroek 	if ((spr = sockevent_proc_alloc()) == NULL)
4024c27a833SDavid van Moolenbroek 		panic("libsockevent: too many suspended processes");
4034c27a833SDavid van Moolenbroek 
4044c27a833SDavid van Moolenbroek 	spr->spr_next = NULL;
4054c27a833SDavid van Moolenbroek 	spr->spr_event = event;
4064c27a833SDavid van Moolenbroek 	spr->spr_timer = timer;
4074c27a833SDavid van Moolenbroek 	spr->spr_call = *call;
4084c27a833SDavid van Moolenbroek 	spr->spr_endpt = user_endpt;
4094c27a833SDavid van Moolenbroek 	sockdriver_pack_data(&spr->spr_data, call, data, len);
4104c27a833SDavid van Moolenbroek 	spr->spr_datalen = len;
4114c27a833SDavid van Moolenbroek 	spr->spr_dataoff = off;
4124c27a833SDavid van Moolenbroek 	sockdriver_pack_data(&spr->spr_ctl, call, ctl, ctl_len);
4134c27a833SDavid van Moolenbroek 	spr->spr_ctllen = ctl_len;
4144c27a833SDavid van Moolenbroek 	spr->spr_ctloff = ctl_off;
4154c27a833SDavid van Moolenbroek 	spr->spr_flags = flags;
4164c27a833SDavid van Moolenbroek 	spr->spr_rflags = rflags;
4174c27a833SDavid van Moolenbroek 	spr->spr_time = time;
4184c27a833SDavid van Moolenbroek 
4194c27a833SDavid van Moolenbroek 	/*
4204c27a833SDavid van Moolenbroek 	 * Add the request to the tail of the queue.  This operation is O(n),
4214c27a833SDavid van Moolenbroek 	 * but the number of suspended requests per socket is expected to be
4224c27a833SDavid van Moolenbroek 	 * low at all times.
4234c27a833SDavid van Moolenbroek 	 */
4244c27a833SDavid van Moolenbroek 	for (sprp = &sock->sock_proc; *sprp != NULL;
4254c27a833SDavid van Moolenbroek 	     sprp = &(*sprp)->spr_next);
4264c27a833SDavid van Moolenbroek 	*sprp = spr;
4274c27a833SDavid van Moolenbroek }
4284c27a833SDavid van Moolenbroek 
4294c27a833SDavid van Moolenbroek /*
4304c27a833SDavid van Moolenbroek  * Return TRUE if there are any suspended requests on the given socket's queue
4314c27a833SDavid van Moolenbroek  * that match any of the events in the given event mask, or FALSE otherwise.
4324c27a833SDavid van Moolenbroek  */
4334c27a833SDavid van Moolenbroek static int
sockevent_has_suspended(struct sock * sock,unsigned int mask)4344c27a833SDavid van Moolenbroek sockevent_has_suspended(struct sock * sock, unsigned int mask)
4354c27a833SDavid van Moolenbroek {
4364c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr;
4374c27a833SDavid van Moolenbroek 
4384c27a833SDavid van Moolenbroek 	for (spr = sock->sock_proc; spr != NULL; spr = spr->spr_next)
4394c27a833SDavid van Moolenbroek 		if (spr->spr_event & mask)
4404c27a833SDavid van Moolenbroek 			return TRUE;
4414c27a833SDavid van Moolenbroek 
4424c27a833SDavid van Moolenbroek 	return FALSE;
4434c27a833SDavid van Moolenbroek }
4444c27a833SDavid van Moolenbroek 
4454c27a833SDavid van Moolenbroek /*
4464c27a833SDavid van Moolenbroek  * Check whether the given call is on the given socket's queue of suspended
4474c27a833SDavid van Moolenbroek  * requests.  If so, remove it from the queue and return a pointer to the
4484c27a833SDavid van Moolenbroek  * suspension data structure.  The caller is then responsible for freeing that
4494c27a833SDavid van Moolenbroek  * data structure using sockevent_proc_free().  If the call was not found, the
4504c27a833SDavid van Moolenbroek  * function returns NULL.
4514c27a833SDavid van Moolenbroek  */
4524c27a833SDavid van Moolenbroek static struct sockevent_proc *
sockevent_unsuspend(struct sock * sock,const struct sockdriver_call * call)4534c27a833SDavid van Moolenbroek sockevent_unsuspend(struct sock * sock, const struct sockdriver_call * call)
4544c27a833SDavid van Moolenbroek {
4554c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr, **sprp;
4564c27a833SDavid van Moolenbroek 
4574c27a833SDavid van Moolenbroek 	/* Find the suspended request being canceled. */
4584c27a833SDavid van Moolenbroek 	for (sprp = &sock->sock_proc; (spr = *sprp) != NULL;
4594c27a833SDavid van Moolenbroek 	    sprp = &spr->spr_next) {
4604c27a833SDavid van Moolenbroek 		if (spr->spr_call.sc_endpt == call->sc_endpt &&
4614c27a833SDavid van Moolenbroek 		    spr->spr_call.sc_req == call->sc_req) {
4624c27a833SDavid van Moolenbroek 			/* Found; remove and return it. */
4634c27a833SDavid van Moolenbroek 			*sprp = spr->spr_next;
4644c27a833SDavid van Moolenbroek 
4654c27a833SDavid van Moolenbroek 			return spr;
4664c27a833SDavid van Moolenbroek 		}
4674c27a833SDavid van Moolenbroek 	}
4684c27a833SDavid van Moolenbroek 
4694c27a833SDavid van Moolenbroek 	return NULL;
4704c27a833SDavid van Moolenbroek }
4714c27a833SDavid van Moolenbroek 
4724c27a833SDavid van Moolenbroek /*
4734c27a833SDavid van Moolenbroek  * Attempt to resume the given suspended request for the given socket object.
4744c27a833SDavid van Moolenbroek  * Return TRUE if the suspended request has been fully resumed and can be
4754c27a833SDavid van Moolenbroek  * removed from the queue of suspended requests, or FALSE if it has not been
4764c27a833SDavid van Moolenbroek  * fully resumed and should stay on the queue.  In the latter case, no
4774c27a833SDavid van Moolenbroek  * resumption will be attempted for other suspended requests of the same type.
4784c27a833SDavid van Moolenbroek  */
4794c27a833SDavid van Moolenbroek static int
sockevent_resume(struct sock * sock,struct sockevent_proc * spr)4804c27a833SDavid van Moolenbroek sockevent_resume(struct sock * sock, struct sockevent_proc * spr)
4814c27a833SDavid van Moolenbroek {
4824c27a833SDavid van Moolenbroek 	struct sock *newsock;
4834c27a833SDavid van Moolenbroek 	struct sockdriver_data data, ctl;
4844c27a833SDavid van Moolenbroek 	char addr[SOCKADDR_MAX];
4854c27a833SDavid van Moolenbroek 	socklen_t addr_len;
4864c27a833SDavid van Moolenbroek 	size_t len, min;
4874c27a833SDavid van Moolenbroek 	sockid_t r;
4884c27a833SDavid van Moolenbroek 
4894c27a833SDavid van Moolenbroek 	switch (spr->spr_event) {
4904c27a833SDavid van Moolenbroek 	case SEV_CONNECT:
4914c27a833SDavid van Moolenbroek 		/*
4924c27a833SDavid van Moolenbroek 		 * If the connect call was suspended for the purpose of
4934c27a833SDavid van Moolenbroek 		 * intercepting resumption, simply remove it from the queue.
4944c27a833SDavid van Moolenbroek 		 */
4954c27a833SDavid van Moolenbroek 		if (spr->spr_call.sc_endpt == NONE)
4964c27a833SDavid van Moolenbroek 			return TRUE;
4974c27a833SDavid van Moolenbroek 
4984c27a833SDavid van Moolenbroek 		/* FALLTHROUGH */
4994c27a833SDavid van Moolenbroek 	case SEV_BIND:
5004c27a833SDavid van Moolenbroek 		if ((r = sock->sock_err) != OK)
5014c27a833SDavid van Moolenbroek 			sock->sock_err = OK;
5024c27a833SDavid van Moolenbroek 
5034c27a833SDavid van Moolenbroek 		sockdriver_reply_generic(&spr->spr_call, r);
5044c27a833SDavid van Moolenbroek 
5054c27a833SDavid van Moolenbroek 		return TRUE;
5064c27a833SDavid van Moolenbroek 
5074c27a833SDavid van Moolenbroek 	case SEV_ACCEPT:
5084c27a833SDavid van Moolenbroek 		/*
5094c27a833SDavid van Moolenbroek 		 * A previous accept call may not have blocked on a socket that
5104c27a833SDavid van Moolenbroek 		 * was not in listening mode.
5114c27a833SDavid van Moolenbroek 		 */
5124c27a833SDavid van Moolenbroek 		assert(sock->sock_opt & SO_ACCEPTCONN);
5134c27a833SDavid van Moolenbroek 
5144c27a833SDavid van Moolenbroek 		addr_len = 0;
5154c27a833SDavid van Moolenbroek 		newsock = NULL;
5164c27a833SDavid van Moolenbroek 
5174c27a833SDavid van Moolenbroek 		/*
5184c27a833SDavid van Moolenbroek 		 * This call is suspended, which implies that the call table
5194c27a833SDavid van Moolenbroek 		 * pointer has already tested to be non-NULL.
5204c27a833SDavid van Moolenbroek 		 */
5214c27a833SDavid van Moolenbroek 		if ((r = sock->sock_ops->sop_accept(sock,
5224c27a833SDavid van Moolenbroek 		    (struct sockaddr *)&addr, &addr_len, spr->spr_endpt,
5234c27a833SDavid van Moolenbroek 		    &newsock)) == SUSPEND)
5244c27a833SDavid van Moolenbroek 			return FALSE;
5254c27a833SDavid van Moolenbroek 
5264c27a833SDavid van Moolenbroek 		if (r >= 0) {
5274c27a833SDavid van Moolenbroek 			assert(addr_len <= sizeof(addr));
5284c27a833SDavid van Moolenbroek 
5294c27a833SDavid van Moolenbroek 			sockevent_accepted(sock, newsock, r);
5304c27a833SDavid van Moolenbroek 		}
5314c27a833SDavid van Moolenbroek 
5324c27a833SDavid van Moolenbroek 		sockdriver_reply_accept(&spr->spr_call, r,
5334c27a833SDavid van Moolenbroek 		    (struct sockaddr *)&addr, addr_len);
5344c27a833SDavid van Moolenbroek 
5354c27a833SDavid van Moolenbroek 		return TRUE;
5364c27a833SDavid van Moolenbroek 
5374c27a833SDavid van Moolenbroek 	case SEV_SEND:
5384c27a833SDavid van Moolenbroek 		if (sock->sock_err != OK || (sock->sock_flags & SFL_SHUT_WR)) {
5394c27a833SDavid van Moolenbroek 			if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
5404c27a833SDavid van Moolenbroek 				r = (int)spr->spr_dataoff;
5414c27a833SDavid van Moolenbroek 			else if ((r = sock->sock_err) != OK)
5424c27a833SDavid van Moolenbroek 				sock->sock_err = OK;
5434c27a833SDavid van Moolenbroek 			else
5444c27a833SDavid van Moolenbroek 				r = EPIPE;
5454c27a833SDavid van Moolenbroek 		} else {
5464c27a833SDavid van Moolenbroek 			sockdriver_unpack_data(&data, &spr->spr_call,
5474c27a833SDavid van Moolenbroek 			    &spr->spr_data, spr->spr_datalen);
5484c27a833SDavid van Moolenbroek 			sockdriver_unpack_data(&ctl, &spr->spr_call,
5494c27a833SDavid van Moolenbroek 			    &spr->spr_ctl, spr->spr_ctllen);
5504c27a833SDavid van Moolenbroek 
5514c27a833SDavid van Moolenbroek 			len = spr->spr_datalen - spr->spr_dataoff;
5524c27a833SDavid van Moolenbroek 
5534c27a833SDavid van Moolenbroek 			min = sock->sock_slowat;
5544c27a833SDavid van Moolenbroek 			if (min > len)
5554c27a833SDavid van Moolenbroek 				min = len;
5564c27a833SDavid van Moolenbroek 
5574c27a833SDavid van Moolenbroek 			/*
5584c27a833SDavid van Moolenbroek 			 * As mentioned elsewhere, we do not save the address
5594c27a833SDavid van Moolenbroek 			 * upon suspension so we cannot supply it anymore here.
5604c27a833SDavid van Moolenbroek 			 */
5614c27a833SDavid van Moolenbroek 			r = sock->sock_ops->sop_send(sock, &data, len,
5624c27a833SDavid van Moolenbroek 			    &spr->spr_dataoff, &ctl,
5634c27a833SDavid van Moolenbroek 			    spr->spr_ctllen - spr->spr_ctloff,
5644c27a833SDavid van Moolenbroek 			    &spr->spr_ctloff, NULL, 0, spr->spr_endpt,
5654c27a833SDavid van Moolenbroek 			    spr->spr_flags, min);
5664c27a833SDavid van Moolenbroek 
5674c27a833SDavid van Moolenbroek 			assert(r <= 0);
5684c27a833SDavid van Moolenbroek 
5694c27a833SDavid van Moolenbroek 			if (r == SUSPEND)
5704c27a833SDavid van Moolenbroek 				return FALSE;
5714c27a833SDavid van Moolenbroek 
5724c27a833SDavid van Moolenbroek 			/*
5734c27a833SDavid van Moolenbroek 			 * If an error occurred but some data were already
5744c27a833SDavid van Moolenbroek 			 * sent, return the progress rather than the error.
5754c27a833SDavid van Moolenbroek 			 * Note that if the socket driver detects an
5764c27a833SDavid van Moolenbroek 			 * asynchronous error during the send, it itself must
5774c27a833SDavid van Moolenbroek 			 * perform this check and call sockevent_set_error() as
5784c27a833SDavid van Moolenbroek 			 * needed, to make sure the error does not get lost.
5794c27a833SDavid van Moolenbroek 			 */
5804c27a833SDavid van Moolenbroek 			if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
5814c27a833SDavid van Moolenbroek 				r = spr->spr_dataoff;
5824c27a833SDavid van Moolenbroek 		}
5834c27a833SDavid van Moolenbroek 
5844c27a833SDavid van Moolenbroek 		if (r == EPIPE)
5854c27a833SDavid van Moolenbroek 			sockevent_sigpipe(sock, spr->spr_endpt,
5864c27a833SDavid van Moolenbroek 			    spr->spr_flags);
5874c27a833SDavid van Moolenbroek 
5884c27a833SDavid van Moolenbroek 		sockdriver_reply_generic(&spr->spr_call, r);
5894c27a833SDavid van Moolenbroek 
5904c27a833SDavid van Moolenbroek 		return TRUE;
5914c27a833SDavid van Moolenbroek 
5924c27a833SDavid van Moolenbroek 	case SEV_RECV:
5934c27a833SDavid van Moolenbroek 		addr_len = 0;
5944c27a833SDavid van Moolenbroek 
5954c27a833SDavid van Moolenbroek 		if (sock->sock_flags & SFL_SHUT_RD)
5964c27a833SDavid van Moolenbroek 			r = SOCKEVENT_EOF;
5974c27a833SDavid van Moolenbroek 		else {
5984c27a833SDavid van Moolenbroek 			len = spr->spr_datalen - spr->spr_dataoff;
5994c27a833SDavid van Moolenbroek 
6004c27a833SDavid van Moolenbroek 			if (sock->sock_err == OK) {
6014c27a833SDavid van Moolenbroek 				min = sock->sock_rlowat;
6024c27a833SDavid van Moolenbroek 				if (min > len)
6034c27a833SDavid van Moolenbroek 					min = len;
6044c27a833SDavid van Moolenbroek 			} else
6054c27a833SDavid van Moolenbroek 				min = 0;
6064c27a833SDavid van Moolenbroek 
6074c27a833SDavid van Moolenbroek 			sockdriver_unpack_data(&data, &spr->spr_call,
6084c27a833SDavid van Moolenbroek 			    &spr->spr_data, spr->spr_datalen);
6094c27a833SDavid van Moolenbroek 			sockdriver_unpack_data(&ctl, &spr->spr_call,
6104c27a833SDavid van Moolenbroek 			    &spr->spr_ctl, spr->spr_ctllen);
6114c27a833SDavid van Moolenbroek 
6124c27a833SDavid van Moolenbroek 			r = sock->sock_ops->sop_recv(sock, &data, len,
6134c27a833SDavid van Moolenbroek 			    &spr->spr_dataoff, &ctl,
6144c27a833SDavid van Moolenbroek 			    spr->spr_ctllen - spr->spr_ctloff,
6154c27a833SDavid van Moolenbroek 			    &spr->spr_ctloff, (struct sockaddr *)&addr,
6164c27a833SDavid van Moolenbroek 			    &addr_len, spr->spr_endpt, spr->spr_flags, min,
6174c27a833SDavid van Moolenbroek 			    &spr->spr_rflags);
6184c27a833SDavid van Moolenbroek 
6194c27a833SDavid van Moolenbroek 			/*
6204c27a833SDavid van Moolenbroek 			 * If the call remains suspended but a socket error is
6214c27a833SDavid van Moolenbroek 			 * pending, return the pending socket error instead.
6224c27a833SDavid van Moolenbroek 			 */
6234c27a833SDavid van Moolenbroek 			if (r == SUSPEND) {
6244c27a833SDavid van Moolenbroek 				if (sock->sock_err == OK)
6254c27a833SDavid van Moolenbroek 					return FALSE;
6264c27a833SDavid van Moolenbroek 
6274c27a833SDavid van Moolenbroek 				r = SOCKEVENT_EOF;
6284c27a833SDavid van Moolenbroek 			}
6294c27a833SDavid van Moolenbroek 
6304c27a833SDavid van Moolenbroek 			assert(addr_len <= sizeof(addr));
6314c27a833SDavid van Moolenbroek 		}
6324c27a833SDavid van Moolenbroek 
6334c27a833SDavid van Moolenbroek 		/*
6344c27a833SDavid van Moolenbroek 		 * If the receive call reported success, or if some data were
6354c27a833SDavid van Moolenbroek 		 * already received, return the (partial) result.  Otherwise,
6364c27a833SDavid van Moolenbroek 		 * return a pending error if any, or otherwise a regular error
6374c27a833SDavid van Moolenbroek 		 * or 0 for EOF.
6384c27a833SDavid van Moolenbroek 		 */
6394c27a833SDavid van Moolenbroek 		if (r == OK || spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
6404c27a833SDavid van Moolenbroek 			r = (int)spr->spr_dataoff;
6414c27a833SDavid van Moolenbroek 		else if (sock->sock_err != OK) {
6424c27a833SDavid van Moolenbroek 			r = sock->sock_err;
6434c27a833SDavid van Moolenbroek 
6444c27a833SDavid van Moolenbroek 			sock->sock_err = OK;
6454c27a833SDavid van Moolenbroek 		} else if (r == SOCKEVENT_EOF)
6464c27a833SDavid van Moolenbroek 			r = 0; /* EOF */
6474c27a833SDavid van Moolenbroek 
6484c27a833SDavid van Moolenbroek 		sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff,
6494c27a833SDavid van Moolenbroek 		    (struct sockaddr *)&addr, addr_len, spr->spr_rflags);
6504c27a833SDavid van Moolenbroek 
6514c27a833SDavid van Moolenbroek 		return TRUE;
6524c27a833SDavid van Moolenbroek 
6534c27a833SDavid van Moolenbroek 	case SEV_CLOSE:
6544c27a833SDavid van Moolenbroek 		sockdriver_reply_generic(&spr->spr_call, OK);
6554c27a833SDavid van Moolenbroek 
6564c27a833SDavid van Moolenbroek 		return TRUE;
6574c27a833SDavid van Moolenbroek 
6584c27a833SDavid van Moolenbroek 	default:
6594c27a833SDavid van Moolenbroek 		panic("libsockevent: process suspended on unknown event 0x%x",
6604c27a833SDavid van Moolenbroek 		    spr->spr_event);
6614c27a833SDavid van Moolenbroek 	}
6624c27a833SDavid van Moolenbroek }
6634c27a833SDavid van Moolenbroek 
6644c27a833SDavid van Moolenbroek /*
6654c27a833SDavid van Moolenbroek  * Return TRUE if the given socket is ready for reading for a select call, or
6664c27a833SDavid van Moolenbroek  * FALSE otherwise.
6674c27a833SDavid van Moolenbroek  */
6684c27a833SDavid van Moolenbroek static int
sockevent_test_readable(struct sock * sock)6694c27a833SDavid van Moolenbroek sockevent_test_readable(struct sock * sock)
6704c27a833SDavid van Moolenbroek {
6714c27a833SDavid van Moolenbroek 	int r;
6724c27a833SDavid van Moolenbroek 
6734c27a833SDavid van Moolenbroek 	/*
6744c27a833SDavid van Moolenbroek 	 * The meaning of "ready-to-read" depends on whether the socket is a
6754c27a833SDavid van Moolenbroek 	 * listening socket or not.  For the former, it is a test on whether
6764c27a833SDavid van Moolenbroek 	 * there are any new sockets to accept.  However, shutdown flags take
6774c27a833SDavid van Moolenbroek 	 * precedence in both cases.
6784c27a833SDavid van Moolenbroek 	 */
6794c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_SHUT_RD)
6804c27a833SDavid van Moolenbroek 		return TRUE;
6814c27a833SDavid van Moolenbroek 
6824c27a833SDavid van Moolenbroek 	if (sock->sock_err != OK)
6834c27a833SDavid van Moolenbroek 		return TRUE;
6844c27a833SDavid van Moolenbroek 
6854c27a833SDavid van Moolenbroek 	/*
6864c27a833SDavid van Moolenbroek 	 * Depending on whether this is a listening-mode socket, test whether
6874c27a833SDavid van Moolenbroek 	 * either accepts or receives would block.
6884c27a833SDavid van Moolenbroek 	 */
6894c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_ACCEPTCONN) {
6904c27a833SDavid van Moolenbroek 		if (sock->sock_ops->sop_test_accept == NULL)
6914c27a833SDavid van Moolenbroek 			return TRUE;
6924c27a833SDavid van Moolenbroek 
6934c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_test_accept(sock);
6944c27a833SDavid van Moolenbroek 	} else {
6954c27a833SDavid van Moolenbroek 		if (sock->sock_ops->sop_test_recv == NULL)
6964c27a833SDavid van Moolenbroek 			return TRUE;
6974c27a833SDavid van Moolenbroek 
6984c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_test_recv(sock, sock->sock_rlowat,
6994c27a833SDavid van Moolenbroek 		    NULL);
7004c27a833SDavid van Moolenbroek 	}
7014c27a833SDavid van Moolenbroek 
7024c27a833SDavid van Moolenbroek 	return (r != SUSPEND);
7034c27a833SDavid van Moolenbroek }
7044c27a833SDavid van Moolenbroek 
7054c27a833SDavid van Moolenbroek /*
7064c27a833SDavid van Moolenbroek  * Return TRUE if the given socket is ready for writing for a select call, or
7074c27a833SDavid van Moolenbroek  * FALSE otherwise.
7084c27a833SDavid van Moolenbroek  */
7094c27a833SDavid van Moolenbroek static int
sockevent_test_writable(struct sock * sock)7104c27a833SDavid van Moolenbroek sockevent_test_writable(struct sock * sock)
7114c27a833SDavid van Moolenbroek {
7124c27a833SDavid van Moolenbroek 	int r;
7134c27a833SDavid van Moolenbroek 
7144c27a833SDavid van Moolenbroek 	if (sock->sock_err != OK)
7154c27a833SDavid van Moolenbroek 		return TRUE;
7164c27a833SDavid van Moolenbroek 
7174c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_SHUT_WR)
7184c27a833SDavid van Moolenbroek 		return TRUE;
7194c27a833SDavid van Moolenbroek 
7204c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_test_send == NULL)
7214c27a833SDavid van Moolenbroek 		return TRUE;
7224c27a833SDavid van Moolenbroek 
7234c27a833SDavid van Moolenbroek 	/*
7244c27a833SDavid van Moolenbroek 	 * Test whether sends would block.  The low send watermark is relevant
7254c27a833SDavid van Moolenbroek 	 * for stream-type sockets only.
7264c27a833SDavid van Moolenbroek 	 */
7274c27a833SDavid van Moolenbroek 	r = sock->sock_ops->sop_test_send(sock, sock->sock_slowat);
7284c27a833SDavid van Moolenbroek 
7294c27a833SDavid van Moolenbroek 	return (r != SUSPEND);
7304c27a833SDavid van Moolenbroek }
7314c27a833SDavid van Moolenbroek 
7324c27a833SDavid van Moolenbroek /*
7334c27a833SDavid van Moolenbroek  * Test whether any of the given select operations are ready on the given
7344c27a833SDavid van Moolenbroek  * socket.  Return the subset of ready operations; zero if none.
7354c27a833SDavid van Moolenbroek  */
7364c27a833SDavid van Moolenbroek static unsigned int
sockevent_test_select(struct sock * sock,unsigned int ops)7374c27a833SDavid van Moolenbroek sockevent_test_select(struct sock * sock, unsigned int ops)
7384c27a833SDavid van Moolenbroek {
7394c27a833SDavid van Moolenbroek 	unsigned int ready_ops;
7404c27a833SDavid van Moolenbroek 
7414c27a833SDavid van Moolenbroek 	assert(!(ops & ~(SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR)));
7424c27a833SDavid van Moolenbroek 
7434c27a833SDavid van Moolenbroek 	/*
7444c27a833SDavid van Moolenbroek 	 * We do not support the "bind in progress" case here.  If a blocking
7454c27a833SDavid van Moolenbroek 	 * bind call is in progress, the file descriptor should not be ready
7464c27a833SDavid van Moolenbroek 	 * for either reading or writing.  Currently, socket drivers will have
7474c27a833SDavid van Moolenbroek 	 * to cover this case themselves.  Otherwise we would have to check the
7484c27a833SDavid van Moolenbroek 	 * queue of suspended calls, or create a custom flag for this.
7494c27a833SDavid van Moolenbroek 	 */
7504c27a833SDavid van Moolenbroek 
7514c27a833SDavid van Moolenbroek 	ready_ops = 0;
7524c27a833SDavid van Moolenbroek 
7534c27a833SDavid van Moolenbroek 	if ((ops & SDEV_OP_RD) && sockevent_test_readable(sock))
7544c27a833SDavid van Moolenbroek 		ready_ops |= SDEV_OP_RD;
7554c27a833SDavid van Moolenbroek 
7564c27a833SDavid van Moolenbroek 	if ((ops & SDEV_OP_WR) && sockevent_test_writable(sock))
7574c27a833SDavid van Moolenbroek 		ready_ops |= SDEV_OP_WR;
7584c27a833SDavid van Moolenbroek 
7594c27a833SDavid van Moolenbroek 	/* TODO: OOB receive support. */
7604c27a833SDavid van Moolenbroek 
7614c27a833SDavid van Moolenbroek 	return ready_ops;
7624c27a833SDavid van Moolenbroek }
7634c27a833SDavid van Moolenbroek 
7644c27a833SDavid van Moolenbroek /*
7654c27a833SDavid van Moolenbroek  * Fire the given mask of events on the given socket object now.
7664c27a833SDavid van Moolenbroek  */
7674c27a833SDavid van Moolenbroek static void
sockevent_fire(struct sock * sock,unsigned int mask)7684c27a833SDavid van Moolenbroek sockevent_fire(struct sock * sock, unsigned int mask)
7694c27a833SDavid van Moolenbroek {
7704c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr, **sprp;
7714c27a833SDavid van Moolenbroek 	unsigned int r, flag, ops;
7724c27a833SDavid van Moolenbroek 
7734c27a833SDavid van Moolenbroek 	/*
7744c27a833SDavid van Moolenbroek 	 * A completed connection attempt (successful or not) also always
7754c27a833SDavid van Moolenbroek 	 * implies that the socket becomes writable.  For convenience we
7764c27a833SDavid van Moolenbroek 	 * enforce this rule here, because it is easy to forget.  Note that in
7774c27a833SDavid van Moolenbroek 	 * any case, a suspended connect request should be the first in the
7784c27a833SDavid van Moolenbroek 	 * list, so we do not risk returning 0 from a connect call as a result
7794c27a833SDavid van Moolenbroek 	 * of sock_err getting eaten by another resumed call.
7804c27a833SDavid van Moolenbroek 	 */
7814c27a833SDavid van Moolenbroek 	if (mask & SEV_CONNECT)
7824c27a833SDavid van Moolenbroek 		mask |= SEV_SEND;
7834c27a833SDavid van Moolenbroek 
7844c27a833SDavid van Moolenbroek 	/*
7854c27a833SDavid van Moolenbroek 	 * First try resuming regular system calls.
7864c27a833SDavid van Moolenbroek 	 */
7874c27a833SDavid van Moolenbroek 	for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
7884c27a833SDavid van Moolenbroek 		flag = spr->spr_event;
7894c27a833SDavid van Moolenbroek 
7904c27a833SDavid van Moolenbroek 		if ((mask & flag) && sockevent_resume(sock, spr)) {
7914c27a833SDavid van Moolenbroek 			*sprp = spr->spr_next;
7924c27a833SDavid van Moolenbroek 
7934c27a833SDavid van Moolenbroek 			sockevent_proc_free(spr);
7944c27a833SDavid van Moolenbroek 		} else {
7954c27a833SDavid van Moolenbroek 			mask &= ~flag;
7964c27a833SDavid van Moolenbroek 
7974c27a833SDavid van Moolenbroek 			sprp = &spr->spr_next;
7984c27a833SDavid van Moolenbroek 		}
7994c27a833SDavid van Moolenbroek 	}
8004c27a833SDavid van Moolenbroek 
8014c27a833SDavid van Moolenbroek 	/*
8024c27a833SDavid van Moolenbroek 	 * Then see if we can satisfy pending select queries.
8034c27a833SDavid van Moolenbroek 	 */
8044c27a833SDavid van Moolenbroek 	if ((mask & (SEV_ACCEPT | SEV_SEND | SEV_RECV)) &&
8054c27a833SDavid van Moolenbroek 	    sock->sock_select.ss_endpt != NONE) {
8064c27a833SDavid van Moolenbroek 		assert(sock->sock_selops != 0);
8074c27a833SDavid van Moolenbroek 
8084c27a833SDavid van Moolenbroek 		/*
8094c27a833SDavid van Moolenbroek 		 * Only retest select operations that, based on the given event
8104c27a833SDavid van Moolenbroek 		 * mask, could possibly be satisfied now.
8114c27a833SDavid van Moolenbroek 		 */
8124c27a833SDavid van Moolenbroek 		ops = sock->sock_selops;
8134c27a833SDavid van Moolenbroek 		if (!(mask & (SEV_ACCEPT | SEV_RECV)))
8144c27a833SDavid van Moolenbroek 			ops &= ~SDEV_OP_RD;
8154c27a833SDavid van Moolenbroek 		if (!(mask & SEV_SEND))
8164c27a833SDavid van Moolenbroek 			ops &= ~SDEV_OP_WR;
8174c27a833SDavid van Moolenbroek 		if (!(0))			/* TODO: OOB receive support */
8184c27a833SDavid van Moolenbroek 			ops &= ~SDEV_OP_ERR;
8194c27a833SDavid van Moolenbroek 
8204c27a833SDavid van Moolenbroek 		/* Are there any operations to test? */
8214c27a833SDavid van Moolenbroek 		if (ops != 0) {
8224c27a833SDavid van Moolenbroek 			/* Test those operations. */
8234c27a833SDavid van Moolenbroek 			r = sockevent_test_select(sock, ops);
8244c27a833SDavid van Moolenbroek 
8254c27a833SDavid van Moolenbroek 			/* Were any satisfied? */
8264c27a833SDavid van Moolenbroek 			if (r != 0) {
8274c27a833SDavid van Moolenbroek 				/* Let the caller know. */
8284c27a833SDavid van Moolenbroek 				sockdriver_reply_select(&sock->sock_select,
8294c27a833SDavid van Moolenbroek 				    sock->sock_id, r);
8304c27a833SDavid van Moolenbroek 
8314c27a833SDavid van Moolenbroek 				sock->sock_selops &= ~r;
8324c27a833SDavid van Moolenbroek 
8334c27a833SDavid van Moolenbroek 				/* Are there any saved operations left now? */
8344c27a833SDavid van Moolenbroek 				if (sock->sock_selops == 0)
8354c27a833SDavid van Moolenbroek 					sock->sock_select.ss_endpt = NONE;
8364c27a833SDavid van Moolenbroek 			}
8374c27a833SDavid van Moolenbroek 		}
8384c27a833SDavid van Moolenbroek 	}
8394c27a833SDavid van Moolenbroek 
8404c27a833SDavid van Moolenbroek 	/*
8414c27a833SDavid van Moolenbroek 	 * Finally, a SEV_CLOSE event unconditionally frees the sock object.
8424c27a833SDavid van Moolenbroek 	 * This event should be fired only for sockets that are either not yet,
8434c27a833SDavid van Moolenbroek 	 * or not anymore, in use by userland.
8444c27a833SDavid van Moolenbroek 	 */
8454c27a833SDavid van Moolenbroek 	if (mask & SEV_CLOSE) {
8464c27a833SDavid van Moolenbroek 		assert(sock->sock_flags & (SFL_CLONED | SFL_CLOSING));
8474c27a833SDavid van Moolenbroek 
8484c27a833SDavid van Moolenbroek 		sockevent_free(sock);
8494c27a833SDavid van Moolenbroek 	}
8504c27a833SDavid van Moolenbroek }
8514c27a833SDavid van Moolenbroek 
8524c27a833SDavid van Moolenbroek /*
8534c27a833SDavid van Moolenbroek  * Process all pending events.  Events must still be blocked, so that if
8544c27a833SDavid van Moolenbroek  * handling one event generates a new event, that event is handled from here
8554c27a833SDavid van Moolenbroek  * rather than immediately.
8564c27a833SDavid van Moolenbroek  */
8574c27a833SDavid van Moolenbroek static void
sockevent_pump(void)8584c27a833SDavid van Moolenbroek sockevent_pump(void)
8594c27a833SDavid van Moolenbroek {
8604c27a833SDavid van Moolenbroek 	struct sock *sock;
8614c27a833SDavid van Moolenbroek 	unsigned int mask;
8624c27a833SDavid van Moolenbroek 
8634c27a833SDavid van Moolenbroek 	assert(sockevent_working);
8644c27a833SDavid van Moolenbroek 
8654c27a833SDavid van Moolenbroek 	while (!SIMPLEQ_EMPTY(&sockevent_pending)) {
8664c27a833SDavid van Moolenbroek 		sock = SIMPLEQ_FIRST(&sockevent_pending);
8674c27a833SDavid van Moolenbroek 		SIMPLEQ_REMOVE_HEAD(&sockevent_pending, sock_next);
8684c27a833SDavid van Moolenbroek 
8694c27a833SDavid van Moolenbroek 		mask = sock->sock_events;
8704c27a833SDavid van Moolenbroek 		assert(mask != 0);
8714c27a833SDavid van Moolenbroek 		sock->sock_events = 0;
8724c27a833SDavid van Moolenbroek 
8734c27a833SDavid van Moolenbroek 		sockevent_fire(sock, mask);
8744c27a833SDavid van Moolenbroek 		/*
8754c27a833SDavid van Moolenbroek 		 * At this point, the sock object may already have been readded
8764c27a833SDavid van Moolenbroek 		 * to the event list, or even be deallocated altogether.
8774c27a833SDavid van Moolenbroek 		 */
8784c27a833SDavid van Moolenbroek 	}
8794c27a833SDavid van Moolenbroek }
8804c27a833SDavid van Moolenbroek 
8814c27a833SDavid van Moolenbroek /*
8824c27a833SDavid van Moolenbroek  * Return TRUE if any events are pending on any sockets, or FALSE otherwise.
8834c27a833SDavid van Moolenbroek  */
8844c27a833SDavid van Moolenbroek static int
sockevent_has_events(void)8854c27a833SDavid van Moolenbroek sockevent_has_events(void)
8864c27a833SDavid van Moolenbroek {
8874c27a833SDavid van Moolenbroek 
8884c27a833SDavid van Moolenbroek 	return (!SIMPLEQ_EMPTY(&sockevent_pending));
8894c27a833SDavid van Moolenbroek }
8904c27a833SDavid van Moolenbroek 
8914c27a833SDavid van Moolenbroek /*
8924c27a833SDavid van Moolenbroek  * Raise the given bitwise-OR'ed set of events on the given socket object.
8934c27a833SDavid van Moolenbroek  * Depending on the context of the call, they events may or may not be
8944c27a833SDavid van Moolenbroek  * processed immediately.
8954c27a833SDavid van Moolenbroek  */
8964c27a833SDavid van Moolenbroek void
sockevent_raise(struct sock * sock,unsigned int mask)8974c27a833SDavid van Moolenbroek sockevent_raise(struct sock * sock, unsigned int mask)
8984c27a833SDavid van Moolenbroek {
8994c27a833SDavid van Moolenbroek 
9004c27a833SDavid van Moolenbroek 	assert(sock->sock_ops != NULL);
9014c27a833SDavid van Moolenbroek 
9024c27a833SDavid van Moolenbroek 	/*
9034c27a833SDavid van Moolenbroek 	 * Handle SEV_CLOSE first.  This event must not be deferred, so as to
9044c27a833SDavid van Moolenbroek 	 * let socket drivers recycle sock objects as they are needed.  For
9054c27a833SDavid van Moolenbroek 	 * example, a user-closed TCP socket may stay open to transmit the
9064c27a833SDavid van Moolenbroek 	 * remainder of its send buffer, until the TCP driver runs out of
9074c27a833SDavid van Moolenbroek 	 * sockets, in which case the connection is aborted.  The driver would
9084c27a833SDavid van Moolenbroek 	 * then raise SEV_CLOSE on the sock object so as to clean it up, and
9094c27a833SDavid van Moolenbroek 	 * immediately reuse it afterward.  If the close event were to be
9104c27a833SDavid van Moolenbroek 	 * deferred, this immediate reuse would not be possible.
9114c27a833SDavid van Moolenbroek 	 *
9124c27a833SDavid van Moolenbroek 	 * The sop_free() callback routine may not raise new events, and thus,
9134c27a833SDavid van Moolenbroek 	 * the state of 'sockevent_working' need not be checked or set here.
9144c27a833SDavid van Moolenbroek 	 */
9154c27a833SDavid van Moolenbroek 	if (mask & SEV_CLOSE) {
9164c27a833SDavid van Moolenbroek 		assert(mask == SEV_CLOSE);
9174c27a833SDavid van Moolenbroek 
9184c27a833SDavid van Moolenbroek 		sockevent_fire(sock, mask);
9194c27a833SDavid van Moolenbroek 
9204c27a833SDavid van Moolenbroek 		return;
9214c27a833SDavid van Moolenbroek 	}
9224c27a833SDavid van Moolenbroek 
9234c27a833SDavid van Moolenbroek 	/*
9244c27a833SDavid van Moolenbroek 	 * If we are currently processing a socket message, store the event for
9254c27a833SDavid van Moolenbroek 	 * later.  If not, this call is not coming from inside libsockevent,
9264c27a833SDavid van Moolenbroek 	 * and we must handle the event immediately.
9274c27a833SDavid van Moolenbroek 	 */
9284c27a833SDavid van Moolenbroek 	if (sockevent_working) {
9294c27a833SDavid van Moolenbroek 		assert(mask != 0);
9304c27a833SDavid van Moolenbroek 		assert(mask <= UCHAR_MAX); /* sock_events field size check */
9314c27a833SDavid van Moolenbroek 
9324c27a833SDavid van Moolenbroek 		if (sock->sock_events == 0)
9334c27a833SDavid van Moolenbroek 			SIMPLEQ_INSERT_TAIL(&sockevent_pending, sock,
9344c27a833SDavid van Moolenbroek 			    sock_next);
9354c27a833SDavid van Moolenbroek 
9364c27a833SDavid van Moolenbroek 		sock->sock_events |= mask;
9374c27a833SDavid van Moolenbroek 	} else {
9384c27a833SDavid van Moolenbroek 		sockevent_working = TRUE;
9394c27a833SDavid van Moolenbroek 
9404c27a833SDavid van Moolenbroek 		sockevent_fire(sock, mask);
9414c27a833SDavid van Moolenbroek 
9424c27a833SDavid van Moolenbroek 		if (sockevent_has_events())
9434c27a833SDavid van Moolenbroek 			sockevent_pump();
9444c27a833SDavid van Moolenbroek 
9454c27a833SDavid van Moolenbroek 		sockevent_working = FALSE;
9464c27a833SDavid van Moolenbroek 	}
9474c27a833SDavid van Moolenbroek }
9484c27a833SDavid van Moolenbroek 
9494c27a833SDavid van Moolenbroek /*
9504c27a833SDavid van Moolenbroek  * Set a pending error on the socket object, and wake up any suspended
9514c27a833SDavid van Moolenbroek  * operations that are affected by this.
9524c27a833SDavid van Moolenbroek  */
9534c27a833SDavid van Moolenbroek void
sockevent_set_error(struct sock * sock,int err)9544c27a833SDavid van Moolenbroek sockevent_set_error(struct sock * sock, int err)
9554c27a833SDavid van Moolenbroek {
9564c27a833SDavid van Moolenbroek 
9574c27a833SDavid van Moolenbroek 	assert(err < 0);
9584c27a833SDavid van Moolenbroek 	assert(sock->sock_ops != NULL);
9594c27a833SDavid van Moolenbroek 
9604c27a833SDavid van Moolenbroek 	/* If an error was set already, it will be overridden. */
9614c27a833SDavid van Moolenbroek 	sock->sock_err = err;
9624c27a833SDavid van Moolenbroek 
9634c27a833SDavid van Moolenbroek 	sockevent_raise(sock, SEV_BIND | SEV_CONNECT | SEV_SEND | SEV_RECV);
9644c27a833SDavid van Moolenbroek }
9654c27a833SDavid van Moolenbroek 
9664c27a833SDavid van Moolenbroek /*
9674c27a833SDavid van Moolenbroek  * Initialize timer-related data structures.
9684c27a833SDavid van Moolenbroek  */
9694c27a833SDavid van Moolenbroek static void
socktimer_init(void)9704c27a833SDavid van Moolenbroek socktimer_init(void)
9714c27a833SDavid van Moolenbroek {
9724c27a833SDavid van Moolenbroek 
9734c27a833SDavid van Moolenbroek 	SLIST_INIT(&socktimer);
9744c27a833SDavid van Moolenbroek 
9754c27a833SDavid van Moolenbroek 	init_timer(&sockevent_timer);
9764c27a833SDavid van Moolenbroek }
9774c27a833SDavid van Moolenbroek 
9784c27a833SDavid van Moolenbroek /*
9794c27a833SDavid van Moolenbroek  * Check whether the given socket object has any suspended requests that have
9804c27a833SDavid van Moolenbroek  * now expired.  If so, cancel them.  Also, if the socket object has any
9814c27a833SDavid van Moolenbroek  * suspended requests with a timeout that has not yet expired, return the
9824c27a833SDavid van Moolenbroek  * earliest (relative) timeout of all of them, or TMR_NEVER if no such requests
9834c27a833SDavid van Moolenbroek  * are present.
9844c27a833SDavid van Moolenbroek  */
9854c27a833SDavid van Moolenbroek static clock_t
sockevent_expire(struct sock * sock,clock_t now)9864c27a833SDavid van Moolenbroek sockevent_expire(struct sock * sock, clock_t now)
9874c27a833SDavid van Moolenbroek {
9884c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr, **sprp;
9894c27a833SDavid van Moolenbroek 	clock_t lowest, left;
9904c27a833SDavid van Moolenbroek 	int r;
9914c27a833SDavid van Moolenbroek 
9924c27a833SDavid van Moolenbroek 	/*
9934c27a833SDavid van Moolenbroek 	 * First handle the case that the socket is closed.  In this case,
9944c27a833SDavid van Moolenbroek 	 * there may be a linger timer, although the socket may also simply
9954c27a833SDavid van Moolenbroek 	 * still be on the timer list because of a request that did not time
9964c27a833SDavid van Moolenbroek 	 * out right before the socket was closed.
9974c27a833SDavid van Moolenbroek 	 */
9984c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_CLOSING) {
9994c27a833SDavid van Moolenbroek 		/* Was there a linger timer and has it expired? */
10004c27a833SDavid van Moolenbroek 		if ((sock->sock_opt & SO_LINGER) &&
10014c27a833SDavid van Moolenbroek 		    tmr_is_first(sock->sock_linger, now)) {
10024c27a833SDavid van Moolenbroek 			assert(sock->sock_ops->sop_close != NULL);
10034c27a833SDavid van Moolenbroek 
10044c27a833SDavid van Moolenbroek 			/*
10054c27a833SDavid van Moolenbroek 			 * Whatever happens next, we must now resume the
10064c27a833SDavid van Moolenbroek 			 * pending close operation, if it was not canceled
10074c27a833SDavid van Moolenbroek 			 * earlier.  As before, we return OK rather than the
10084c27a833SDavid van Moolenbroek 			 * standardized EWOULDBLOCK, to ensure that the user
10094c27a833SDavid van Moolenbroek 			 * process knows the file descriptor has been closed.
10104c27a833SDavid van Moolenbroek 			 */
10114c27a833SDavid van Moolenbroek 			if ((spr = sock->sock_proc) != NULL) {
10124c27a833SDavid van Moolenbroek 				assert(spr->spr_event == SEV_CLOSE);
10134c27a833SDavid van Moolenbroek 				assert(spr->spr_next == NULL);
10144c27a833SDavid van Moolenbroek 
10154c27a833SDavid van Moolenbroek 				sock->sock_proc = NULL;
10164c27a833SDavid van Moolenbroek 
10174c27a833SDavid van Moolenbroek 				sockdriver_reply_generic(&spr->spr_call, OK);
10184c27a833SDavid van Moolenbroek 
10194c27a833SDavid van Moolenbroek 				sockevent_proc_free(spr);
10204c27a833SDavid van Moolenbroek 			}
10214c27a833SDavid van Moolenbroek 
10224c27a833SDavid van Moolenbroek 			/*
10234c27a833SDavid van Moolenbroek 			 * Tell the socket driver that closing the socket is
10244c27a833SDavid van Moolenbroek 			 * now a bit more desired than the last time we asked.
10254c27a833SDavid van Moolenbroek 			 */
10264c27a833SDavid van Moolenbroek 			r = sock->sock_ops->sop_close(sock, TRUE /*force*/);
10274c27a833SDavid van Moolenbroek 
10284c27a833SDavid van Moolenbroek 			assert(r == OK || r == SUSPEND);
10294c27a833SDavid van Moolenbroek 
10304c27a833SDavid van Moolenbroek 			/*
10314c27a833SDavid van Moolenbroek 			 * The linger timer fires once.  After that, the socket
10324c27a833SDavid van Moolenbroek 			 * driver is free to decide that it still will not
10334c27a833SDavid van Moolenbroek 			 * close the socket.  If it does, do not fire the
10344c27a833SDavid van Moolenbroek 			 * linger timer again.
10354c27a833SDavid van Moolenbroek 			 */
10364c27a833SDavid van Moolenbroek 			if (r == SUSPEND)
10374c27a833SDavid van Moolenbroek 				sock->sock_opt &= ~SO_LINGER;
10384c27a833SDavid van Moolenbroek 			else
10394c27a833SDavid van Moolenbroek 				sockevent_free(sock);
10404c27a833SDavid van Moolenbroek 		}
10414c27a833SDavid van Moolenbroek 
10424c27a833SDavid van Moolenbroek 		return TMR_NEVER;
10434c27a833SDavid van Moolenbroek 	}
10444c27a833SDavid van Moolenbroek 
10454c27a833SDavid van Moolenbroek 	/*
10464c27a833SDavid van Moolenbroek 	 * Then see if any send and/or receive requests have expired.  Also see
10474c27a833SDavid van Moolenbroek 	 * if there are any send and/or receive requests left that have not yet
10484c27a833SDavid van Moolenbroek 	 * expired but do have a timeout, so that we can return the lowest of
10494c27a833SDavid van Moolenbroek 	 * those timeouts.
10504c27a833SDavid van Moolenbroek 	 */
10514c27a833SDavid van Moolenbroek 	lowest = TMR_NEVER;
10524c27a833SDavid van Moolenbroek 
10534c27a833SDavid van Moolenbroek 	for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
10544c27a833SDavid van Moolenbroek 		/* Skip requests without a timeout. */
10554c27a833SDavid van Moolenbroek 		if (spr->spr_timer == 0) {
10564c27a833SDavid van Moolenbroek 			sprp = &spr->spr_next;
10574c27a833SDavid van Moolenbroek 
10584c27a833SDavid van Moolenbroek 			continue;
10594c27a833SDavid van Moolenbroek 		}
10604c27a833SDavid van Moolenbroek 
10614c27a833SDavid van Moolenbroek 		assert(spr->spr_event == SEV_SEND ||
10624c27a833SDavid van Moolenbroek 		    spr->spr_event == SEV_RECV);
10634c27a833SDavid van Moolenbroek 
10644c27a833SDavid van Moolenbroek 		/*
10654c27a833SDavid van Moolenbroek 		 * If the request has expired, cancel it and remove it from the
10664c27a833SDavid van Moolenbroek 		 * list.  Otherwise, see if the request has the lowest number
10674c27a833SDavid van Moolenbroek 		 * of ticks until its timeout so far.
10684c27a833SDavid van Moolenbroek 		 */
10694c27a833SDavid van Moolenbroek 		if (tmr_is_first(spr->spr_time, now)) {
10704c27a833SDavid van Moolenbroek 			*sprp = spr->spr_next;
10714c27a833SDavid van Moolenbroek 
10724c27a833SDavid van Moolenbroek 			if (spr->spr_event == SEV_SEND)
10734c27a833SDavid van Moolenbroek 				sockevent_cancel_send(sock, spr, EWOULDBLOCK);
10744c27a833SDavid van Moolenbroek 			else
10754c27a833SDavid van Moolenbroek 				sockevent_cancel_recv(sock, spr, EWOULDBLOCK);
10764c27a833SDavid van Moolenbroek 
10774c27a833SDavid van Moolenbroek 			sockevent_proc_free(spr);
10784c27a833SDavid van Moolenbroek 		} else {
10794c27a833SDavid van Moolenbroek 			left = spr->spr_time - now;
10804c27a833SDavid van Moolenbroek 
10814c27a833SDavid van Moolenbroek 			if (lowest == TMR_NEVER || lowest > left)
10824c27a833SDavid van Moolenbroek 				lowest = left;
10834c27a833SDavid van Moolenbroek 
10844c27a833SDavid van Moolenbroek 			sprp = &spr->spr_next;
10854c27a833SDavid van Moolenbroek 		}
10864c27a833SDavid van Moolenbroek 	}
10874c27a833SDavid van Moolenbroek 
10884c27a833SDavid van Moolenbroek 	return lowest;
10894c27a833SDavid van Moolenbroek }
10904c27a833SDavid van Moolenbroek 
10914c27a833SDavid van Moolenbroek /*
10924c27a833SDavid van Moolenbroek  * The socket event alarm went off.  Go through the set of socket objects with
10934c27a833SDavid van Moolenbroek  * timers, and see if any of their requests have now expired.  Set a new alarm
10944c27a833SDavid van Moolenbroek  * as necessary.
10954c27a833SDavid van Moolenbroek  */
10964c27a833SDavid van Moolenbroek static void
socktimer_expire(int arg __unused)10974c27a833SDavid van Moolenbroek socktimer_expire(int arg __unused)
10984c27a833SDavid van Moolenbroek {
10994c27a833SDavid van Moolenbroek 	SLIST_HEAD(, sock) oldtimer;
11004c27a833SDavid van Moolenbroek 	struct sock *sock, *tsock;
11014c27a833SDavid van Moolenbroek 	clock_t now, lowest, left;
11024c27a833SDavid van Moolenbroek 	int working;
11034c27a833SDavid van Moolenbroek 
11044c27a833SDavid van Moolenbroek 	/*
11054c27a833SDavid van Moolenbroek 	 * This function may or may not be called from a context where we are
11064c27a833SDavid van Moolenbroek 	 * already deferring events, so we have to cover both cases here.
11074c27a833SDavid van Moolenbroek 	 */
11084c27a833SDavid van Moolenbroek 	if ((working = sockevent_working) == FALSE)
11094c27a833SDavid van Moolenbroek 		sockevent_working = TRUE;
11104c27a833SDavid van Moolenbroek 
11114c27a833SDavid van Moolenbroek 	/* Start a new list. */
11124c27a833SDavid van Moolenbroek 	memcpy(&oldtimer, &socktimer, sizeof(oldtimer));
11134c27a833SDavid van Moolenbroek 	SLIST_INIT(&socktimer);
11144c27a833SDavid van Moolenbroek 
11154c27a833SDavid van Moolenbroek 	now = getticks();
11164c27a833SDavid van Moolenbroek 	lowest = TMR_NEVER;
11174c27a833SDavid van Moolenbroek 
11184c27a833SDavid van Moolenbroek 	/*
11194c27a833SDavid van Moolenbroek 	 * Go through all sockets that have or had a request with a timeout,
11204c27a833SDavid van Moolenbroek 	 * canceling any expired requests and building a new list of sockets
11214c27a833SDavid van Moolenbroek 	 * that still have requests with timeouts as we go.
11224c27a833SDavid van Moolenbroek 	 */
11234c27a833SDavid van Moolenbroek 	SLIST_FOREACH_SAFE(sock, &oldtimer, sock_timer, tsock) {
11244c27a833SDavid van Moolenbroek 		assert(sock->sock_flags & SFL_TIMER);
11254c27a833SDavid van Moolenbroek 		sock->sock_flags &= ~SFL_TIMER;
11264c27a833SDavid van Moolenbroek 
11274c27a833SDavid van Moolenbroek 		left = sockevent_expire(sock, now);
11284c27a833SDavid van Moolenbroek 		/*
11294c27a833SDavid van Moolenbroek 		 * The sock object may already have been deallocated now.
11304c27a833SDavid van Moolenbroek 		 * If 'next' is TMR_NEVER, do not touch 'sock' anymore.
11314c27a833SDavid van Moolenbroek 		 */
11324c27a833SDavid van Moolenbroek 
11334c27a833SDavid van Moolenbroek 		if (left != TMR_NEVER) {
11344c27a833SDavid van Moolenbroek 			if (lowest == TMR_NEVER || lowest > left)
11354c27a833SDavid van Moolenbroek 				lowest = left;
11364c27a833SDavid van Moolenbroek 
11374c27a833SDavid van Moolenbroek 			SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
11384c27a833SDavid van Moolenbroek 
11394c27a833SDavid van Moolenbroek 			sock->sock_flags |= SFL_TIMER;
11404c27a833SDavid van Moolenbroek 		}
11414c27a833SDavid van Moolenbroek 	}
11424c27a833SDavid van Moolenbroek 
11434c27a833SDavid van Moolenbroek 	/* If there is a new lowest timeout at all, set a new timer. */
11444c27a833SDavid van Moolenbroek 	if (lowest != TMR_NEVER)
11454c27a833SDavid van Moolenbroek 		set_timer(&sockevent_timer, lowest, socktimer_expire, 0);
11464c27a833SDavid van Moolenbroek 
11474c27a833SDavid van Moolenbroek 	if (!working) {
11484c27a833SDavid van Moolenbroek 		/* If any new events were raised, process them now. */
11494c27a833SDavid van Moolenbroek 		if (sockevent_has_events())
11504c27a833SDavid van Moolenbroek 			sockevent_pump();
11514c27a833SDavid van Moolenbroek 
11524c27a833SDavid van Moolenbroek 		sockevent_working = FALSE;
11534c27a833SDavid van Moolenbroek 	}
11544c27a833SDavid van Moolenbroek }
11554c27a833SDavid van Moolenbroek 
11564c27a833SDavid van Moolenbroek /*
11574c27a833SDavid van Moolenbroek  * Set a timer for the given (relative) number of clock ticks, adding the
11584c27a833SDavid van Moolenbroek  * associated socket object to the set of socket objects with timers, if it was
11594c27a833SDavid van Moolenbroek  * not already in that set.  Set a new alarm if necessary, and return the
11604c27a833SDavid van Moolenbroek  * absolute timeout for the timer.  Since the timers list is maintained lazily,
11614c27a833SDavid van Moolenbroek  * the caller need not take the object off the set if the call was canceled
11624c27a833SDavid van Moolenbroek  * later; see also socktimer_del().
11634c27a833SDavid van Moolenbroek  */
11644c27a833SDavid van Moolenbroek static clock_t
socktimer_add(struct sock * sock,clock_t ticks)11654c27a833SDavid van Moolenbroek socktimer_add(struct sock * sock, clock_t ticks)
11664c27a833SDavid van Moolenbroek {
11674c27a833SDavid van Moolenbroek 	clock_t now;
11684c27a833SDavid van Moolenbroek 
11694c27a833SDavid van Moolenbroek 	/*
11704c27a833SDavid van Moolenbroek 	 * Relative time comparisons require that any two times are no more
11714c27a833SDavid van Moolenbroek 	 * than half the comparison space (clock_t, unsigned long) apart.
11724c27a833SDavid van Moolenbroek 	 */
11734c27a833SDavid van Moolenbroek 	assert(ticks <= TMRDIFF_MAX);
11744c27a833SDavid van Moolenbroek 
11754c27a833SDavid van Moolenbroek 	/* If the socket was not already on the timers list, put it on. */
11764c27a833SDavid van Moolenbroek 	if (!(sock->sock_flags & SFL_TIMER)) {
11774c27a833SDavid van Moolenbroek 		SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
11784c27a833SDavid van Moolenbroek 
11794c27a833SDavid van Moolenbroek 		sock->sock_flags |= SFL_TIMER;
11804c27a833SDavid van Moolenbroek 	}
11814c27a833SDavid van Moolenbroek 
11824c27a833SDavid van Moolenbroek 	/*
11834c27a833SDavid van Moolenbroek 	 * (Re)set the timer if either it was not running at all or this new
11844c27a833SDavid van Moolenbroek 	 * timeout will occur sooner than the currently scheduled alarm.  Note
11854c27a833SDavid van Moolenbroek 	 * that setting a timer that was already set is allowed.
11864c27a833SDavid van Moolenbroek 	 */
11874c27a833SDavid van Moolenbroek 	now = getticks();
11884c27a833SDavid van Moolenbroek 
11894c27a833SDavid van Moolenbroek 	if (!tmr_is_set(&sockevent_timer) ||
11904c27a833SDavid van Moolenbroek 	    tmr_is_first(now + ticks, tmr_exp_time(&sockevent_timer)))
11914c27a833SDavid van Moolenbroek 		set_timer(&sockevent_timer, ticks, socktimer_expire, 0);
11924c27a833SDavid van Moolenbroek 
11934c27a833SDavid van Moolenbroek 	/* Return the absolute timeout. */
11944c27a833SDavid van Moolenbroek 	return now + ticks;
11954c27a833SDavid van Moolenbroek }
11964c27a833SDavid van Moolenbroek 
11974c27a833SDavid van Moolenbroek /*
11984c27a833SDavid van Moolenbroek  * Remove a socket object from the set of socket objects with timers.  Since
11994c27a833SDavid van Moolenbroek  * the timer list is maintained lazily, this needs to be done only right before
12004c27a833SDavid van Moolenbroek  * the socket object is freed.
12014c27a833SDavid van Moolenbroek  */
12024c27a833SDavid van Moolenbroek static void
socktimer_del(struct sock * sock)12034c27a833SDavid van Moolenbroek socktimer_del(struct sock * sock)
12044c27a833SDavid van Moolenbroek {
12054c27a833SDavid van Moolenbroek 
12064c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_TIMER) {
12074c27a833SDavid van Moolenbroek 		/* This macro is O(n). */
12084c27a833SDavid van Moolenbroek 		SLIST_REMOVE(&socktimer, sock, sock, sock_timer);
12094c27a833SDavid van Moolenbroek 
12104c27a833SDavid van Moolenbroek 		sock->sock_flags &= ~SFL_TIMER;
12114c27a833SDavid van Moolenbroek 	}
12124c27a833SDavid van Moolenbroek }
12134c27a833SDavid van Moolenbroek 
12144c27a833SDavid van Moolenbroek /*
12154c27a833SDavid van Moolenbroek  * Bind a socket to a local address.
12164c27a833SDavid van Moolenbroek  */
12174c27a833SDavid van Moolenbroek static int
sockevent_bind(sockid_t id,const struct sockaddr * __restrict addr,socklen_t addr_len,endpoint_t user_endpt,const struct sockdriver_call * __restrict call)12184c27a833SDavid van Moolenbroek sockevent_bind(sockid_t id, const struct sockaddr * __restrict addr,
12194c27a833SDavid van Moolenbroek 	socklen_t addr_len, endpoint_t user_endpt,
12204c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call)
12214c27a833SDavid van Moolenbroek {
12224c27a833SDavid van Moolenbroek 	struct sock *sock;
12234c27a833SDavid van Moolenbroek 	int r;
12244c27a833SDavid van Moolenbroek 
12254c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
12264c27a833SDavid van Moolenbroek 		return EINVAL;
12274c27a833SDavid van Moolenbroek 
12284c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_bind == NULL)
12294c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
12304c27a833SDavid van Moolenbroek 
12314c27a833SDavid van Moolenbroek 	/* Binding a socket in listening mode is never supported. */
12324c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_ACCEPTCONN)
12334c27a833SDavid van Moolenbroek 		return EINVAL;
12344c27a833SDavid van Moolenbroek 
12354c27a833SDavid van Moolenbroek 	r = sock->sock_ops->sop_bind(sock, addr, addr_len, user_endpt);
12364c27a833SDavid van Moolenbroek 
12374c27a833SDavid van Moolenbroek 	if (r == SUSPEND) {
12384c27a833SDavid van Moolenbroek 		if (call == NULL)
12394c27a833SDavid van Moolenbroek 			return EINPROGRESS;
12404c27a833SDavid van Moolenbroek 
12414c27a833SDavid van Moolenbroek 		sockevent_suspend(sock, SEV_BIND, call, user_endpt);
12424c27a833SDavid van Moolenbroek 	}
12434c27a833SDavid van Moolenbroek 
12444c27a833SDavid van Moolenbroek 	return r;
12454c27a833SDavid van Moolenbroek }
12464c27a833SDavid van Moolenbroek 
12474c27a833SDavid van Moolenbroek /*
12484c27a833SDavid van Moolenbroek  * Connect a socket to a remote address.
12494c27a833SDavid van Moolenbroek  */
12504c27a833SDavid van Moolenbroek static int
sockevent_connect(sockid_t id,const struct sockaddr * __restrict addr,socklen_t addr_len,endpoint_t user_endpt,const struct sockdriver_call * call)12514c27a833SDavid van Moolenbroek sockevent_connect(sockid_t id, const struct sockaddr * __restrict addr,
12524c27a833SDavid van Moolenbroek 	socklen_t addr_len, endpoint_t user_endpt,
12534c27a833SDavid van Moolenbroek 	const struct sockdriver_call * call)
12544c27a833SDavid van Moolenbroek {
12554c27a833SDavid van Moolenbroek 	struct sockdriver_call fakecall;
12564c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr;
12574c27a833SDavid van Moolenbroek 	struct sock *sock;
12584c27a833SDavid van Moolenbroek 	int r;
12594c27a833SDavid van Moolenbroek 
12604c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
12614c27a833SDavid van Moolenbroek 		return EINVAL;
12624c27a833SDavid van Moolenbroek 
12634c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_connect == NULL)
12644c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
12654c27a833SDavid van Moolenbroek 
12664c27a833SDavid van Moolenbroek 	/* Connecting a socket in listening mode is never supported. */
12674c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_ACCEPTCONN)
12684c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
12694c27a833SDavid van Moolenbroek 
12704c27a833SDavid van Moolenbroek 	/*
12714c27a833SDavid van Moolenbroek 	 * The upcoming connect call may fire an accept event for which the
12724c27a833SDavid van Moolenbroek 	 * handler may in turn fire a connect event on this socket.  Since we
12734c27a833SDavid van Moolenbroek 	 * delay event processing until after processing calls, this would
12744c27a833SDavid van Moolenbroek 	 * create the problem that even if the connection is accepted right
12754c27a833SDavid van Moolenbroek 	 * away, non-blocking connect requests would return EINPROGRESS.  For
12764c27a833SDavid van Moolenbroek 	 * UDS, this is undesirable behavior.  To remedy this, we use a hack:
12774c27a833SDavid van Moolenbroek 	 * we temporarily suspend the connect even if non-blocking, then
12784c27a833SDavid van Moolenbroek 	 * process events, and then cancel the connect request again.  If the
12794c27a833SDavid van Moolenbroek 	 * connection was accepted immediately, the cancellation will have no
12804c27a833SDavid van Moolenbroek 	 * effect, since the request has already been replied to.  In order not
12814c27a833SDavid van Moolenbroek 	 * to violate libsockdriver rules with this hack, we fabricate a fake
12824c27a833SDavid van Moolenbroek 	 * 'conn' object.
12834c27a833SDavid van Moolenbroek 	 */
12844c27a833SDavid van Moolenbroek 	r = sock->sock_ops->sop_connect(sock, addr, addr_len, user_endpt);
12854c27a833SDavid van Moolenbroek 
12864c27a833SDavid van Moolenbroek 	if (r == SUSPEND) {
12874c27a833SDavid van Moolenbroek 		if (call != NULL || sockevent_has_events()) {
12884c27a833SDavid van Moolenbroek 			if (call == NULL) {
12894c27a833SDavid van Moolenbroek 				fakecall.sc_endpt = NONE;
12904c27a833SDavid van Moolenbroek 
12914c27a833SDavid van Moolenbroek 				call = &fakecall;
12924c27a833SDavid van Moolenbroek 			}
12934c27a833SDavid van Moolenbroek 
12944c27a833SDavid van Moolenbroek 			assert(!sockevent_has_suspended(sock,
12954c27a833SDavid van Moolenbroek 			    SEV_SEND | SEV_RECV));
12964c27a833SDavid van Moolenbroek 
12974c27a833SDavid van Moolenbroek 			sockevent_suspend(sock, SEV_CONNECT, call, user_endpt);
12984c27a833SDavid van Moolenbroek 
12994c27a833SDavid van Moolenbroek 			if (call == &fakecall) {
13004c27a833SDavid van Moolenbroek 				/* Process any pending events first now. */
13014c27a833SDavid van Moolenbroek 				sockevent_pump();
13024c27a833SDavid van Moolenbroek 
13034c27a833SDavid van Moolenbroek 				/*
13044c27a833SDavid van Moolenbroek 				 * If the connect request has not been resumed
13054c27a833SDavid van Moolenbroek 				 * yet now, we must remove it from the queue
13064c27a833SDavid van Moolenbroek 				 * again, and return EINPROGRESS ourselves.
13074c27a833SDavid van Moolenbroek 				 * Otherwise, return OK or a pending error.
13084c27a833SDavid van Moolenbroek 				 */
13094c27a833SDavid van Moolenbroek 				spr = sockevent_unsuspend(sock, call);
13104c27a833SDavid van Moolenbroek 				if (spr != NULL) {
13114c27a833SDavid van Moolenbroek 					sockevent_proc_free(spr);
13124c27a833SDavid van Moolenbroek 
13134c27a833SDavid van Moolenbroek 					r = EINPROGRESS;
13144c27a833SDavid van Moolenbroek 				} else if ((r = sock->sock_err) != OK)
13154c27a833SDavid van Moolenbroek 					sock->sock_err = OK;
13164c27a833SDavid van Moolenbroek 			}
13174c27a833SDavid van Moolenbroek 		} else
13184c27a833SDavid van Moolenbroek 			r = EINPROGRESS;
13194c27a833SDavid van Moolenbroek 	}
13204c27a833SDavid van Moolenbroek 
13214c27a833SDavid van Moolenbroek 	if (r == OK) {
13224c27a833SDavid van Moolenbroek 		/*
13234c27a833SDavid van Moolenbroek 		 * A completed connection attempt also always implies that the
13244c27a833SDavid van Moolenbroek 		 * socket becomes writable.  For convenience we enforce this
13254c27a833SDavid van Moolenbroek 		 * rule here, because it is easy to forget.
13264c27a833SDavid van Moolenbroek 		 */
13274c27a833SDavid van Moolenbroek 		sockevent_raise(sock, SEV_SEND);
13284c27a833SDavid van Moolenbroek 	}
13294c27a833SDavid van Moolenbroek 
13304c27a833SDavid van Moolenbroek 	return r;
13314c27a833SDavid van Moolenbroek }
13324c27a833SDavid van Moolenbroek 
13334c27a833SDavid van Moolenbroek /*
13344c27a833SDavid van Moolenbroek  * Put a socket in listening mode.
13354c27a833SDavid van Moolenbroek  */
13364c27a833SDavid van Moolenbroek static int
sockevent_listen(sockid_t id,int backlog)13374c27a833SDavid van Moolenbroek sockevent_listen(sockid_t id, int backlog)
13384c27a833SDavid van Moolenbroek {
13394c27a833SDavid van Moolenbroek 	struct sock *sock;
13404c27a833SDavid van Moolenbroek 	int r;
13414c27a833SDavid van Moolenbroek 
13424c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
13434c27a833SDavid van Moolenbroek 		return EINVAL;
13444c27a833SDavid van Moolenbroek 
13454c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_listen == NULL)
13464c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
13474c27a833SDavid van Moolenbroek 
13484c27a833SDavid van Moolenbroek 	/*
13494c27a833SDavid van Moolenbroek 	 * Perform a general adjustment on the backlog value, applying the
13504c27a833SDavid van Moolenbroek 	 * customary BSD "fudge factor" of 1.5x.  Keep the value within bounds
13514c27a833SDavid van Moolenbroek 	 * though.  POSIX imposes that a negative backlog value is equal to a
13524c27a833SDavid van Moolenbroek 	 * backlog value of zero.  A backlog value of zero, in turn, may mean
13534c27a833SDavid van Moolenbroek 	 * anything; we take it to be one.  POSIX also imposes that all socket
13544c27a833SDavid van Moolenbroek 	 * drivers accept up to at least SOMAXCONN connections on the queue.
13554c27a833SDavid van Moolenbroek 	 */
13564c27a833SDavid van Moolenbroek 	if (backlog < 0)
13574c27a833SDavid van Moolenbroek 		backlog = 0;
13584c27a833SDavid van Moolenbroek 	if (backlog < SOMAXCONN)
13594c27a833SDavid van Moolenbroek 		backlog += 1 + ((unsigned int)backlog >> 1);
13604c27a833SDavid van Moolenbroek 	if (backlog > SOMAXCONN)
13614c27a833SDavid van Moolenbroek 		backlog = SOMAXCONN;
13624c27a833SDavid van Moolenbroek 
13634c27a833SDavid van Moolenbroek 	r = sock->sock_ops->sop_listen(sock, backlog);
13644c27a833SDavid van Moolenbroek 
13654c27a833SDavid van Moolenbroek 	/*
13664c27a833SDavid van Moolenbroek 	 * On success, the socket is now in listening mode.  As part of that,
13674c27a833SDavid van Moolenbroek 	 * a select(2) ready-to-read condition now indicates that a connection
13684c27a833SDavid van Moolenbroek 	 * may be accepted on the socket, rather than that data may be read.
13694c27a833SDavid van Moolenbroek 	 * Since libsockevent is responsible for this distinction, we keep
13704c27a833SDavid van Moolenbroek 	 * track of the listening mode at this level.  Conveniently, there is a
13714c27a833SDavid van Moolenbroek 	 * socket option for this, which we support out of the box as a result.
13724c27a833SDavid van Moolenbroek 	 */
13734c27a833SDavid van Moolenbroek 	if (r == OK) {
13744c27a833SDavid van Moolenbroek 		sock->sock_opt |= SO_ACCEPTCONN;
13754c27a833SDavid van Moolenbroek 
13764c27a833SDavid van Moolenbroek 		/*
13774c27a833SDavid van Moolenbroek 		 * For the extremely unlikely case that right after the socket
1378*79a488aaSDavid van Moolenbroek 		 * is put into listening mode, it has a connection ready to
13794c27a833SDavid van Moolenbroek 		 * accept, we retest blocked ready-to-read select queries now.
13804c27a833SDavid van Moolenbroek 		 */
13814c27a833SDavid van Moolenbroek 		sockevent_raise(sock, SEV_ACCEPT);
13824c27a833SDavid van Moolenbroek 	}
13834c27a833SDavid van Moolenbroek 
13844c27a833SDavid van Moolenbroek 	return r;
13854c27a833SDavid van Moolenbroek }
13864c27a833SDavid van Moolenbroek 
13874c27a833SDavid van Moolenbroek /*
13884c27a833SDavid van Moolenbroek  * Accept a connection on a listening socket, creating a new socket.
13894c27a833SDavid van Moolenbroek  */
13904c27a833SDavid van Moolenbroek static sockid_t
sockevent_accept(sockid_t id,struct sockaddr * __restrict addr,socklen_t * __restrict addr_len,endpoint_t user_endpt,const struct sockdriver_call * __restrict call)13914c27a833SDavid van Moolenbroek sockevent_accept(sockid_t id, struct sockaddr * __restrict addr,
13924c27a833SDavid van Moolenbroek 	socklen_t * __restrict addr_len, endpoint_t user_endpt,
13934c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call)
13944c27a833SDavid van Moolenbroek {
13954c27a833SDavid van Moolenbroek 	struct sock *sock, *newsock;
13964c27a833SDavid van Moolenbroek 	sockid_t r;
13974c27a833SDavid van Moolenbroek 
13984c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
13994c27a833SDavid van Moolenbroek 		return EINVAL;
14004c27a833SDavid van Moolenbroek 
14014c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_accept == NULL)
14024c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
14034c27a833SDavid van Moolenbroek 
14044c27a833SDavid van Moolenbroek 	/*
14054c27a833SDavid van Moolenbroek 	 * Attempt to accept a connection.  The socket driver is responsible
14064c27a833SDavid van Moolenbroek 	 * for allocating a sock object (and identifier) on success.  It may
14074c27a833SDavid van Moolenbroek 	 * already have done so before, in which case it should leave newsock
14084c27a833SDavid van Moolenbroek 	 * filled with NULL; otherwise, the returned sock object is cloned from
14094c27a833SDavid van Moolenbroek 	 * the listening socket.  The socket driver is also responsible for
14104c27a833SDavid van Moolenbroek 	 * failing the call if the socket is not in listening mode, because it
14114c27a833SDavid van Moolenbroek 	 * must specify the error to return: EOPNOTSUPP or EINVAL.
14124c27a833SDavid van Moolenbroek 	 */
14134c27a833SDavid van Moolenbroek 	newsock = NULL;
14144c27a833SDavid van Moolenbroek 
14154c27a833SDavid van Moolenbroek 	if ((r = sock->sock_ops->sop_accept(sock, addr, addr_len, user_endpt,
14164c27a833SDavid van Moolenbroek 	    &newsock)) == SUSPEND) {
14174c27a833SDavid van Moolenbroek 		assert(sock->sock_opt & SO_ACCEPTCONN);
14184c27a833SDavid van Moolenbroek 
14194c27a833SDavid van Moolenbroek 		if (call == NULL)
14204c27a833SDavid van Moolenbroek 			return EWOULDBLOCK;
14214c27a833SDavid van Moolenbroek 
14224c27a833SDavid van Moolenbroek 		sockevent_suspend(sock, SEV_ACCEPT, call, user_endpt);
14234c27a833SDavid van Moolenbroek 
14244c27a833SDavid van Moolenbroek 		return SUSPEND;
14254c27a833SDavid van Moolenbroek 	}
14264c27a833SDavid van Moolenbroek 
14274c27a833SDavid van Moolenbroek 	if (r >= 0)
14284c27a833SDavid van Moolenbroek 		sockevent_accepted(sock, newsock, r);
14294c27a833SDavid van Moolenbroek 
14304c27a833SDavid van Moolenbroek 	return r;
14314c27a833SDavid van Moolenbroek }
14324c27a833SDavid van Moolenbroek 
14334c27a833SDavid van Moolenbroek /*
14344c27a833SDavid van Moolenbroek  * Send regular and/or control data.
14354c27a833SDavid van Moolenbroek  */
14364c27a833SDavid van Moolenbroek static int
sockevent_send(sockid_t id,const struct sockdriver_data * __restrict data,size_t len,const struct sockdriver_data * __restrict ctl_data,socklen_t ctl_len,const struct sockaddr * __restrict addr,socklen_t addr_len,endpoint_t user_endpt,int flags,const struct sockdriver_call * __restrict call)14374c27a833SDavid van Moolenbroek sockevent_send(sockid_t id, const struct sockdriver_data * __restrict data,
14384c27a833SDavid van Moolenbroek 	size_t len, const struct sockdriver_data * __restrict ctl_data,
14394c27a833SDavid van Moolenbroek 	socklen_t ctl_len, const struct sockaddr * __restrict addr,
14404c27a833SDavid van Moolenbroek 	socklen_t addr_len, endpoint_t user_endpt, int flags,
14414c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call)
14424c27a833SDavid van Moolenbroek {
14434c27a833SDavid van Moolenbroek 	struct sock *sock;
14444c27a833SDavid van Moolenbroek 	clock_t time;
14454c27a833SDavid van Moolenbroek 	size_t min, off;
14464c27a833SDavid van Moolenbroek 	socklen_t ctl_off;
14474c27a833SDavid van Moolenbroek 	int r, timer;
14484c27a833SDavid van Moolenbroek 
14494c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
14504c27a833SDavid van Moolenbroek 		return EINVAL;
14514c27a833SDavid van Moolenbroek 
14524c27a833SDavid van Moolenbroek 	/*
14534c27a833SDavid van Moolenbroek 	 * The order of the following checks is not necessarily fixed, and may
14544c27a833SDavid van Moolenbroek 	 * be changed later.  As far as applicable, they should match the order
14554c27a833SDavid van Moolenbroek 	 * of the checks during call resumption, though.
14564c27a833SDavid van Moolenbroek 	 */
14574c27a833SDavid van Moolenbroek 	if ((r = sock->sock_err) != OK) {
14584c27a833SDavid van Moolenbroek 		sock->sock_err = OK;
14594c27a833SDavid van Moolenbroek 
14604c27a833SDavid van Moolenbroek 		return r;
14614c27a833SDavid van Moolenbroek 	}
14624c27a833SDavid van Moolenbroek 
14634c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_SHUT_WR) {
14644c27a833SDavid van Moolenbroek 		sockevent_sigpipe(sock, user_endpt, flags);
14654c27a833SDavid van Moolenbroek 
14664c27a833SDavid van Moolenbroek 		return EPIPE;
14674c27a833SDavid van Moolenbroek 	}
14684c27a833SDavid van Moolenbroek 
14694c27a833SDavid van Moolenbroek 	/*
14704c27a833SDavid van Moolenbroek 	 * Translate the sticky SO_DONTROUTE option to a per-request
14714c27a833SDavid van Moolenbroek 	 * MSG_DONTROUTE flag.  This achieves two purposes: socket drivers have
14724c27a833SDavid van Moolenbroek 	 * to check only one flag, and socket drivers that do not support the
14734c27a833SDavid van Moolenbroek 	 * flag will fail send requests in a consistent way.
14744c27a833SDavid van Moolenbroek 	 */
14754c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_DONTROUTE)
14764c27a833SDavid van Moolenbroek 		flags |= MSG_DONTROUTE;
14774c27a833SDavid van Moolenbroek 
14784c27a833SDavid van Moolenbroek 	/*
14794c27a833SDavid van Moolenbroek 	 * Check if this is a valid send request as far as the socket driver is
14804c27a833SDavid van Moolenbroek 	 * concerned.  We do this separately from sop_send for the reason that
14814c27a833SDavid van Moolenbroek 	 * this send request may immediately be queued behind other pending
14824c27a833SDavid van Moolenbroek 	 * send requests (without a call to sop_send), which means even invalid
14834c27a833SDavid van Moolenbroek 	 * requests would be queued and not return failure until much later.
14844c27a833SDavid van Moolenbroek 	 */
14854c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_pre_send != NULL &&
14864c27a833SDavid van Moolenbroek 	    (r = sock->sock_ops->sop_pre_send(sock, len, ctl_len, addr,
14874c27a833SDavid van Moolenbroek 	    addr_len, user_endpt,
14884c27a833SDavid van Moolenbroek 	    flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
14894c27a833SDavid van Moolenbroek 		return r;
14904c27a833SDavid van Moolenbroek 
14914c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_send == NULL)
14924c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
14934c27a833SDavid van Moolenbroek 
14944c27a833SDavid van Moolenbroek 	off = 0;
14954c27a833SDavid van Moolenbroek 	ctl_off = 0;
14964c27a833SDavid van Moolenbroek 
14974c27a833SDavid van Moolenbroek 	/*
14984c27a833SDavid van Moolenbroek 	 * Sending out-of-band data is treated differently from regular data:
14994c27a833SDavid van Moolenbroek 	 *
15004c27a833SDavid van Moolenbroek 	 * - sop_send is called immediately, even if a partial non-OOB send
15014c27a833SDavid van Moolenbroek 	 *   operation is currently suspended (TODO: it may have to be aborted
15024c27a833SDavid van Moolenbroek 	 *   in order to maintain atomicity guarantees - that should be easy);
15034c27a833SDavid van Moolenbroek 	 * - sop_send must not return SUSPEND; instead, if it cannot process
15044c27a833SDavid van Moolenbroek 	 *   the OOB data immediately, it must return an appropriate error;
15054c27a833SDavid van Moolenbroek 	 * - the send low watermark is ignored.
15064c27a833SDavid van Moolenbroek 	 *
15074c27a833SDavid van Moolenbroek 	 * Given that none of the current socket drivers support OOB data at
15084c27a833SDavid van Moolenbroek 	 * all, more sophisticated approaches would have no added value now.
15094c27a833SDavid van Moolenbroek 	 */
15104c27a833SDavid van Moolenbroek 	if (flags & MSG_OOB) {
15114c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
15124c27a833SDavid van Moolenbroek 		    ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, 0);
15134c27a833SDavid van Moolenbroek 
15144c27a833SDavid van Moolenbroek 		if (r == SUSPEND)
15154c27a833SDavid van Moolenbroek 			panic("libsockevent: MSG_OOB send calls may not be "
15164c27a833SDavid van Moolenbroek 			    "suspended");
15174c27a833SDavid van Moolenbroek 
15184c27a833SDavid van Moolenbroek 		return (r == OK) ? (int)off : r;
15194c27a833SDavid van Moolenbroek 	}
15204c27a833SDavid van Moolenbroek 
15214c27a833SDavid van Moolenbroek 	/*
15224c27a833SDavid van Moolenbroek 	 * Only call the actual sop_send function now if no other send calls
15234c27a833SDavid van Moolenbroek 	 * are suspended already.
15244c27a833SDavid van Moolenbroek 	 *
15254c27a833SDavid van Moolenbroek 	 * Call sop_send with 'min' set to the minimum of the request size and
15264c27a833SDavid van Moolenbroek 	 * the socket's send low water mark, but only if the call is non-
15274c27a833SDavid van Moolenbroek 	 * blocking.  For stream-oriented sockets, this should have the effect
15284c27a833SDavid van Moolenbroek 	 * that non-blocking calls fail with EWOULDBLOCK if not at least that
15294c27a833SDavid van Moolenbroek 	 * much can be sent immediately. For consistency, we choose to apply
15304c27a833SDavid van Moolenbroek 	 * the same threshold to blocking calls.  For datagram-oriented
15314c27a833SDavid van Moolenbroek 	 * sockets, the minimum is not a factor to be considered.
15324c27a833SDavid van Moolenbroek 	 */
15334c27a833SDavid van Moolenbroek 	if (!sockevent_has_suspended(sock, SEV_SEND)) {
15344c27a833SDavid van Moolenbroek 		min = sock->sock_slowat;
15354c27a833SDavid van Moolenbroek 		if (min > len)
15364c27a833SDavid van Moolenbroek 			min = len;
15374c27a833SDavid van Moolenbroek 
15384c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
15394c27a833SDavid van Moolenbroek 		    ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, min);
15404c27a833SDavid van Moolenbroek 	} else
15414c27a833SDavid van Moolenbroek 		r = SUSPEND;
15424c27a833SDavid van Moolenbroek 
15434c27a833SDavid van Moolenbroek 	if (r == SUSPEND) {
15444c27a833SDavid van Moolenbroek 		/*
15454c27a833SDavid van Moolenbroek 		 * We do not store the target's address on suspension, because
15464c27a833SDavid van Moolenbroek 		 * that would add significantly to the per-process suspension
15474c27a833SDavid van Moolenbroek 		 * state.  As a result, we disallow socket drivers from
15484c27a833SDavid van Moolenbroek 		 * suspending send calls with addresses, because we would no
15494c27a833SDavid van Moolenbroek 		 * longer have the address for proper call resumption.
15504c27a833SDavid van Moolenbroek 		 * However, we do not know here whether the socket is in
15514c27a833SDavid van Moolenbroek 		 * connection-oriented mode; if it is, the address is to be
15524c27a833SDavid van Moolenbroek 		 * ignored altogether.  Therefore, there is no test on 'addr'
15534c27a833SDavid van Moolenbroek 		 * here.  Resumed calls will get a NULL address pointer, and
15544c27a833SDavid van Moolenbroek 		 * the socket driver is expected to do the right thing.
15554c27a833SDavid van Moolenbroek 		 */
15564c27a833SDavid van Moolenbroek 
15574c27a833SDavid van Moolenbroek 		/*
15584c27a833SDavid van Moolenbroek 		 * For non-blocking socket calls, return an error only if we
15594c27a833SDavid van Moolenbroek 		 * were not able to send anything at all.  If only control data
15604c27a833SDavid van Moolenbroek 		 * were sent, the return value is therefore zero.
15614c27a833SDavid van Moolenbroek 		 */
15624c27a833SDavid van Moolenbroek 		if (call != NULL) {
15634c27a833SDavid van Moolenbroek 			if (sock->sock_stimeo != 0) {
15644c27a833SDavid van Moolenbroek 				timer = TRUE;
15654c27a833SDavid van Moolenbroek 				time = socktimer_add(sock, sock->sock_stimeo);
15664c27a833SDavid van Moolenbroek 			} else {
15674c27a833SDavid van Moolenbroek 				timer = FALSE;
15684c27a833SDavid van Moolenbroek 				time = 0;
15694c27a833SDavid van Moolenbroek 			}
15704c27a833SDavid van Moolenbroek 
15714c27a833SDavid van Moolenbroek 			sockevent_suspend_data(sock, SEV_SEND, timer, call,
15724c27a833SDavid van Moolenbroek 			    user_endpt, data, len, off, ctl_data, ctl_len,
15734c27a833SDavid van Moolenbroek 			    ctl_off, flags, 0, time);
15744c27a833SDavid van Moolenbroek 		} else
15754c27a833SDavid van Moolenbroek 			r = (off > 0 || ctl_off > 0) ? OK : EWOULDBLOCK;
15764c27a833SDavid van Moolenbroek 	} else if (r == EPIPE)
15774c27a833SDavid van Moolenbroek 		sockevent_sigpipe(sock, user_endpt, flags);
15784c27a833SDavid van Moolenbroek 
15794c27a833SDavid van Moolenbroek 	return (r == OK) ? (int)off : r;
15804c27a833SDavid van Moolenbroek }
15814c27a833SDavid van Moolenbroek 
15824c27a833SDavid van Moolenbroek /*
15834c27a833SDavid van Moolenbroek  * The inner part of the receive request handler.  An error returned from here
15844c27a833SDavid van Moolenbroek  * may be overridden by an error pending on the socket, although data returned
15854c27a833SDavid van Moolenbroek  * from here trumps such pending errors.
15864c27a833SDavid van Moolenbroek  */
15874c27a833SDavid van Moolenbroek static int
sockevent_recv_inner(struct sock * sock,const struct sockdriver_data * __restrict data,size_t len,size_t * __restrict off,const struct sockdriver_data * __restrict ctl_data,socklen_t ctl_len,socklen_t * __restrict ctl_off,struct sockaddr * __restrict addr,socklen_t * __restrict addr_len,endpoint_t user_endpt,int * __restrict flags,const struct sockdriver_call * __restrict call)15884c27a833SDavid van Moolenbroek sockevent_recv_inner(struct sock * sock,
15894c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict data,
15904c27a833SDavid van Moolenbroek 	size_t len, size_t * __restrict off,
15914c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict ctl_data,
15924c27a833SDavid van Moolenbroek 	socklen_t ctl_len, socklen_t * __restrict ctl_off,
15934c27a833SDavid van Moolenbroek 	struct sockaddr * __restrict addr,
15944c27a833SDavid van Moolenbroek 	socklen_t * __restrict addr_len, endpoint_t user_endpt,
15954c27a833SDavid van Moolenbroek 	int * __restrict flags, const struct sockdriver_call * __restrict call)
15964c27a833SDavid van Moolenbroek {
15974c27a833SDavid van Moolenbroek 	clock_t time;
15984c27a833SDavid van Moolenbroek 	size_t min;
15994c27a833SDavid van Moolenbroek 	int r, oob, inflags, timer;
16004c27a833SDavid van Moolenbroek 
16014c27a833SDavid van Moolenbroek 	/*
16024c27a833SDavid van Moolenbroek 	 * Check if this is a valid receive request as far as the socket driver
16034c27a833SDavid van Moolenbroek 	 * is concerned.  We do this separately from sop_recv for the reason
16044c27a833SDavid van Moolenbroek 	 * that this receive request may immediately be queued behind other
16054c27a833SDavid van Moolenbroek 	 * pending receive requests (without a call to sop_recv), which means
16064c27a833SDavid van Moolenbroek 	 * even invalid requests would be queued and not return failure until
16074c27a833SDavid van Moolenbroek 	 * much later.
16084c27a833SDavid van Moolenbroek 	 */
16094c27a833SDavid van Moolenbroek 	inflags = *flags;
16104c27a833SDavid van Moolenbroek 	*flags = 0;
16114c27a833SDavid van Moolenbroek 
16124c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_pre_recv != NULL &&
16134c27a833SDavid van Moolenbroek 	    (r = sock->sock_ops->sop_pre_recv(sock, user_endpt,
16144c27a833SDavid van Moolenbroek 	    inflags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
16154c27a833SDavid van Moolenbroek 		return r;
16164c27a833SDavid van Moolenbroek 
16174c27a833SDavid van Moolenbroek 	/*
16184c27a833SDavid van Moolenbroek 	 * The order of the following checks is not necessarily fixed, and may
16194c27a833SDavid van Moolenbroek 	 * be changed later.  As far as applicable, they should match the order
16204c27a833SDavid van Moolenbroek 	 * of the checks during call resumption, though.
16214c27a833SDavid van Moolenbroek 	 */
16224c27a833SDavid van Moolenbroek 	if (sock->sock_flags & SFL_SHUT_RD)
16234c27a833SDavid van Moolenbroek 		return SOCKEVENT_EOF;
16244c27a833SDavid van Moolenbroek 
16254c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_recv == NULL)
16264c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
16274c27a833SDavid van Moolenbroek 
16284c27a833SDavid van Moolenbroek 	/*
16294c27a833SDavid van Moolenbroek 	 * Receiving out-of-band data is treated differently from regular data:
16304c27a833SDavid van Moolenbroek 	 *
16314c27a833SDavid van Moolenbroek 	 * - sop_recv is called immediately, even if a partial non-OOB receive
16324c27a833SDavid van Moolenbroek 	 *   operation is currently suspended (TODO: it may have to be aborted
16334c27a833SDavid van Moolenbroek 	 *   in order to maintain atomicity guarantees - that should be easy);
16344c27a833SDavid van Moolenbroek 	 * - sop_recv must not return SUSPEND; instead, if it cannot return any
16354c27a833SDavid van Moolenbroek 	 *   the OOB data immediately, it must return an appropriate error;
16364c27a833SDavid van Moolenbroek 	 * - the receive low watermark is ignored.
16374c27a833SDavid van Moolenbroek 	 *
16384c27a833SDavid van Moolenbroek 	 * Given that none of the current socket drivers support OOB data at
16394c27a833SDavid van Moolenbroek 	 * all, more sophisticated approaches would have no added value now.
16404c27a833SDavid van Moolenbroek 	 */
16414c27a833SDavid van Moolenbroek 	oob = (inflags & MSG_OOB);
16424c27a833SDavid van Moolenbroek 
16434c27a833SDavid van Moolenbroek 	if (oob && (sock->sock_opt & SO_OOBINLINE))
16444c27a833SDavid van Moolenbroek 		return EINVAL;
16454c27a833SDavid van Moolenbroek 
16464c27a833SDavid van Moolenbroek 	/*
16474c27a833SDavid van Moolenbroek 	 * Only call the actual sop_recv function now if no other receive
16484c27a833SDavid van Moolenbroek 	 * calls are suspended already.
16494c27a833SDavid van Moolenbroek 	 *
16504c27a833SDavid van Moolenbroek 	 * Call sop_recv with 'min' set to the minimum of the request size and
16514c27a833SDavid van Moolenbroek 	 * the socket's socket's low water mark, unless there is a pending
16524c27a833SDavid van Moolenbroek 	 * error.  As a result, blocking calls will block, and non-blocking
16534c27a833SDavid van Moolenbroek 	 * calls will yield EWOULDBLOCK, if at least that much can be received,
16544c27a833SDavid van Moolenbroek 	 * unless another condition (EOF or that pending error) prevents more
16554c27a833SDavid van Moolenbroek 	 * from being received anyway.  For datagram-oriented sockets, the
16564c27a833SDavid van Moolenbroek 	 * minimum is not a factor to be considered.
16574c27a833SDavid van Moolenbroek 	 */
16584c27a833SDavid van Moolenbroek 	if (oob || !sockevent_has_suspended(sock, SEV_RECV)) {
16594c27a833SDavid van Moolenbroek 		if (!oob && sock->sock_err == OK) {
16604c27a833SDavid van Moolenbroek 			min = sock->sock_rlowat;
16614c27a833SDavid van Moolenbroek 			if (min > len)
16624c27a833SDavid van Moolenbroek 				min = len;
16634c27a833SDavid van Moolenbroek 		} else
16644c27a833SDavid van Moolenbroek 			min = 0; /* receive even no-data segments */
16654c27a833SDavid van Moolenbroek 
16664c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_recv(sock, data, len, off, ctl_data,
16674c27a833SDavid van Moolenbroek 		    ctl_len, ctl_off, addr, addr_len, user_endpt, inflags, min,
16684c27a833SDavid van Moolenbroek 		    flags);
16694c27a833SDavid van Moolenbroek 	} else
16704c27a833SDavid van Moolenbroek 		r = SUSPEND;
16714c27a833SDavid van Moolenbroek 
16724c27a833SDavid van Moolenbroek 	assert(r <= 0 || r == SOCKEVENT_EOF);
16734c27a833SDavid van Moolenbroek 
16744c27a833SDavid van Moolenbroek 	if (r == SUSPEND) {
16754c27a833SDavid van Moolenbroek 		if (oob)
16764c27a833SDavid van Moolenbroek 			panic("libsockevent: MSG_OOB receive calls may not be "
16774c27a833SDavid van Moolenbroek 			    "suspended");
16784c27a833SDavid van Moolenbroek 
16794c27a833SDavid van Moolenbroek 		/*
16804c27a833SDavid van Moolenbroek 		 * For non-blocking socket calls, return EWOULDBLOCK only if we
16814c27a833SDavid van Moolenbroek 		 * did not receive anything at all.  If only control data were
16824c27a833SDavid van Moolenbroek 		 * received, the return value is therefore zero.  Suspension
16834c27a833SDavid van Moolenbroek 		 * implies that there is nothing to read.  For the purpose of
16844c27a833SDavid van Moolenbroek 		 * the calling wrapper function, never suspend a call when
16854c27a833SDavid van Moolenbroek 		 * there is a pending error.
16864c27a833SDavid van Moolenbroek 		 */
16874c27a833SDavid van Moolenbroek 		if (call != NULL && sock->sock_err == OK) {
16884c27a833SDavid van Moolenbroek 			if (sock->sock_rtimeo != 0) {
16894c27a833SDavid van Moolenbroek 				timer = TRUE;
16904c27a833SDavid van Moolenbroek 				time = socktimer_add(sock, sock->sock_rtimeo);
16914c27a833SDavid van Moolenbroek 			} else {
16924c27a833SDavid van Moolenbroek 				timer = FALSE;
16934c27a833SDavid van Moolenbroek 				time = 0;
16944c27a833SDavid van Moolenbroek 			}
16954c27a833SDavid van Moolenbroek 
16964c27a833SDavid van Moolenbroek 			sockevent_suspend_data(sock, SEV_RECV, timer, call,
16974c27a833SDavid van Moolenbroek 			    user_endpt, data, len, *off, ctl_data,
16984c27a833SDavid van Moolenbroek 			    ctl_len, *ctl_off, inflags, *flags, time);
16994c27a833SDavid van Moolenbroek 		} else
17004c27a833SDavid van Moolenbroek 			r = EWOULDBLOCK;
17014c27a833SDavid van Moolenbroek 	}
17024c27a833SDavid van Moolenbroek 
17034c27a833SDavid van Moolenbroek 	return r;
17044c27a833SDavid van Moolenbroek }
17054c27a833SDavid van Moolenbroek 
17064c27a833SDavid van Moolenbroek /*
17074c27a833SDavid van Moolenbroek  * Receive regular and/or control data.
17084c27a833SDavid van Moolenbroek  */
17094c27a833SDavid van Moolenbroek static int
sockevent_recv(sockid_t id,const struct sockdriver_data * __restrict data,size_t len,const struct sockdriver_data * __restrict ctl_data,socklen_t * __restrict ctl_len,struct sockaddr * __restrict addr,socklen_t * __restrict addr_len,endpoint_t user_endpt,int * __restrict flags,const struct sockdriver_call * __restrict call)17104c27a833SDavid van Moolenbroek sockevent_recv(sockid_t id, const struct sockdriver_data * __restrict data,
17114c27a833SDavid van Moolenbroek 	size_t len, const struct sockdriver_data * __restrict ctl_data,
17124c27a833SDavid van Moolenbroek 	socklen_t * __restrict ctl_len, struct sockaddr * __restrict addr,
17134c27a833SDavid van Moolenbroek 	socklen_t * __restrict addr_len, endpoint_t user_endpt,
17144c27a833SDavid van Moolenbroek 	int * __restrict flags, const struct sockdriver_call * __restrict call)
17154c27a833SDavid van Moolenbroek {
17164c27a833SDavid van Moolenbroek 	struct sock *sock;
17174c27a833SDavid van Moolenbroek 	size_t off;
17184c27a833SDavid van Moolenbroek 	socklen_t ctl_inlen;
17194c27a833SDavid van Moolenbroek 	int r;
17204c27a833SDavid van Moolenbroek 
17214c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
17224c27a833SDavid van Moolenbroek 		return EINVAL;
17234c27a833SDavid van Moolenbroek 
17244c27a833SDavid van Moolenbroek 	/*
17254c27a833SDavid van Moolenbroek 	 * This function is a wrapper around the actual receive functionality.
17264c27a833SDavid van Moolenbroek 	 * The reason for this is that receiving data should take precedence
17274c27a833SDavid van Moolenbroek 	 * over a pending socket error, while a pending socket error should
17284c27a833SDavid van Moolenbroek 	 * take precedence over both regular errors as well as EOF.  In other
17294c27a833SDavid van Moolenbroek 	 * words: if there is a pending error, we must try to receive anything
17304c27a833SDavid van Moolenbroek 	 * at all; if receiving does not work, we must fail the call with the
17314c27a833SDavid van Moolenbroek 	 * pending error.  However, until we call the receive callback, we have
17324c27a833SDavid van Moolenbroek 	 * no way of telling whether any data can be received.  So we must try
17334c27a833SDavid van Moolenbroek 	 * that before we can decide whether to return a pending error.
17344c27a833SDavid van Moolenbroek 	 */
17354c27a833SDavid van Moolenbroek 	off = 0;
17364c27a833SDavid van Moolenbroek 	ctl_inlen = *ctl_len;
17374c27a833SDavid van Moolenbroek 	*ctl_len = 0;
17384c27a833SDavid van Moolenbroek 
17394c27a833SDavid van Moolenbroek 	/*
17404c27a833SDavid van Moolenbroek 	 * Attempt to perform the actual receive call.
17414c27a833SDavid van Moolenbroek 	 */
17424c27a833SDavid van Moolenbroek 	r = sockevent_recv_inner(sock, data, len, &off, ctl_data, ctl_inlen,
17434c27a833SDavid van Moolenbroek 	    ctl_len, addr, addr_len, user_endpt, flags, call);
17444c27a833SDavid van Moolenbroek 
17454c27a833SDavid van Moolenbroek 	/*
17464c27a833SDavid van Moolenbroek 	 * If the receive request succeeded, or it failed but yielded a partial
17474c27a833SDavid van Moolenbroek 	 * result, then return the (partal) result.  Otherwise, if an error is
17484c27a833SDavid van Moolenbroek 	 * pending, return that error.  Otherwise, return either a regular
17494c27a833SDavid van Moolenbroek 	 * error or 0 for EOF.
17504c27a833SDavid van Moolenbroek 	 */
17514c27a833SDavid van Moolenbroek 	if (r == OK || (r != SUSPEND && (off > 0 || *ctl_len > 0)))
17524c27a833SDavid van Moolenbroek 		r = (int)off;
17534c27a833SDavid van Moolenbroek 	else if (sock->sock_err != OK) {
17544c27a833SDavid van Moolenbroek 		assert(r != SUSPEND);
17554c27a833SDavid van Moolenbroek 
17564c27a833SDavid van Moolenbroek 		r = sock->sock_err;
17574c27a833SDavid van Moolenbroek 
17584c27a833SDavid van Moolenbroek 		sock->sock_err = OK;
17594c27a833SDavid van Moolenbroek 	} else if (r == SOCKEVENT_EOF)
17604c27a833SDavid van Moolenbroek 		r = 0;
17614c27a833SDavid van Moolenbroek 
17624c27a833SDavid van Moolenbroek 	return r;
17634c27a833SDavid van Moolenbroek }
17644c27a833SDavid van Moolenbroek 
17654c27a833SDavid van Moolenbroek /*
17664c27a833SDavid van Moolenbroek  * Process an I/O control call.
17674c27a833SDavid van Moolenbroek  */
17684c27a833SDavid van Moolenbroek static int
sockevent_ioctl(sockid_t id,unsigned long request,const struct sockdriver_data * __restrict data,endpoint_t user_endpt,const struct sockdriver_call * __restrict call __unused)17694c27a833SDavid van Moolenbroek sockevent_ioctl(sockid_t id, unsigned long request,
17704c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict data, endpoint_t user_endpt,
17714c27a833SDavid van Moolenbroek 	const struct sockdriver_call * __restrict call __unused)
17724c27a833SDavid van Moolenbroek {
17734c27a833SDavid van Moolenbroek 	struct sock *sock;
17744c27a833SDavid van Moolenbroek 	size_t size;
17754c27a833SDavid van Moolenbroek 	int r, val;
17764c27a833SDavid van Moolenbroek 
17774c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
17784c27a833SDavid van Moolenbroek 		return EINVAL;
17794c27a833SDavid van Moolenbroek 
17804c27a833SDavid van Moolenbroek 	/* We handle a very small subset of generic IOCTLs here. */
17814c27a833SDavid van Moolenbroek 	switch (request) {
17824c27a833SDavid van Moolenbroek 	case FIONREAD:
17834c27a833SDavid van Moolenbroek 		size = 0;
17844c27a833SDavid van Moolenbroek 		if (!(sock->sock_flags & SFL_SHUT_RD) &&
17854c27a833SDavid van Moolenbroek 		    sock->sock_ops->sop_test_recv != NULL)
17864c27a833SDavid van Moolenbroek 			(void)sock->sock_ops->sop_test_recv(sock, 0, &size);
17874c27a833SDavid van Moolenbroek 
17884c27a833SDavid van Moolenbroek 		val = (int)size;
17894c27a833SDavid van Moolenbroek 
17904c27a833SDavid van Moolenbroek 		return sockdriver_copyout(data, 0, &val, sizeof(val));
17914c27a833SDavid van Moolenbroek 	}
17924c27a833SDavid van Moolenbroek 
17934c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_ioctl == NULL)
17944c27a833SDavid van Moolenbroek 		return ENOTTY;
17954c27a833SDavid van Moolenbroek 
17964c27a833SDavid van Moolenbroek 	r = sock->sock_ops->sop_ioctl(sock, request, data, user_endpt);
17974c27a833SDavid van Moolenbroek 
17984c27a833SDavid van Moolenbroek 	/*
17994c27a833SDavid van Moolenbroek 	 * Suspending IOCTL requests is not currently supported by this
18004c27a833SDavid van Moolenbroek 	 * library, even though the VFS protocol and libsockdriver do support
18014c27a833SDavid van Moolenbroek 	 * it.  The reason is that IOCTLs do not match our proces suspension
18024c27a833SDavid van Moolenbroek 	 * model: they could be neither queued nor repeated.  For now, it seems
18034c27a833SDavid van Moolenbroek 	 * that this feature is not needed by the socket drivers either.  Thus,
18044c27a833SDavid van Moolenbroek 	 * even though there are possible solutions, we defer implementing them
18054c27a833SDavid van Moolenbroek 	 * until we know what exactly is needed.
18064c27a833SDavid van Moolenbroek 	 */
18074c27a833SDavid van Moolenbroek 	if (r == SUSPEND)
18084c27a833SDavid van Moolenbroek 		panic("libsockevent: socket driver suspended IOCTL 0x%lx",
18094c27a833SDavid van Moolenbroek 		    request);
18104c27a833SDavid van Moolenbroek 
18114c27a833SDavid van Moolenbroek 	return r;
18124c27a833SDavid van Moolenbroek }
18134c27a833SDavid van Moolenbroek 
18144c27a833SDavid van Moolenbroek /*
18154c27a833SDavid van Moolenbroek  * Set socket options.
18164c27a833SDavid van Moolenbroek  */
18174c27a833SDavid van Moolenbroek static int
sockevent_setsockopt(sockid_t id,int level,int name,const struct sockdriver_data * data,socklen_t len)18184c27a833SDavid van Moolenbroek sockevent_setsockopt(sockid_t id, int level, int name,
18194c27a833SDavid van Moolenbroek 	const struct sockdriver_data * data, socklen_t len)
18204c27a833SDavid van Moolenbroek {
18214c27a833SDavid van Moolenbroek 	struct sock *sock;
18224c27a833SDavid van Moolenbroek 	struct linger linger;
18234c27a833SDavid van Moolenbroek 	struct timeval tv;
18244c27a833SDavid van Moolenbroek 	clock_t secs, ticks;
18254c27a833SDavid van Moolenbroek 	int r, val;
18264c27a833SDavid van Moolenbroek 
18274c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
18284c27a833SDavid van Moolenbroek 		return EINVAL;
18294c27a833SDavid van Moolenbroek 
18304c27a833SDavid van Moolenbroek 	if (level == SOL_SOCKET) {
18314c27a833SDavid van Moolenbroek 		/*
18324c27a833SDavid van Moolenbroek 		 * Handle a subset of the socket-level options here.  For most
18334c27a833SDavid van Moolenbroek 		 * of them, this means that the socket driver itself need not
18344c27a833SDavid van Moolenbroek 		 * handle changing or returning the options, but still needs to
18354c27a833SDavid van Moolenbroek 		 * implement the correct behavior based on them where needed.
18364c27a833SDavid van Moolenbroek 		 * A few of them are handled exclusively in this library:
18374c27a833SDavid van Moolenbroek 		 * SO_ACCEPTCONN, SO_NOSIGPIPE, SO_ERROR, SO_TYPE, SO_LINGER,
18384c27a833SDavid van Moolenbroek 		 * SO_SNDLOWAT, SO_RCVLOWAT, SO_SNDTIMEO, and SO_RCVTIMEO.
18394c27a833SDavid van Moolenbroek 		 * The SO_USELOOPBACK option is explicitly absent, as it is
18404c27a833SDavid van Moolenbroek 		 * valid for routing sockets only and is set by default there.
18414c27a833SDavid van Moolenbroek 		 */
18424c27a833SDavid van Moolenbroek 		switch (name) {
18434c27a833SDavid van Moolenbroek 		case SO_DEBUG:
18444c27a833SDavid van Moolenbroek 		case SO_REUSEADDR:
18454c27a833SDavid van Moolenbroek 		case SO_KEEPALIVE:
18464c27a833SDavid van Moolenbroek 		case SO_DONTROUTE:
18474c27a833SDavid van Moolenbroek 		case SO_BROADCAST:
18484c27a833SDavid van Moolenbroek 		case SO_OOBINLINE:
18494c27a833SDavid van Moolenbroek 		case SO_REUSEPORT:
18504c27a833SDavid van Moolenbroek 		case SO_NOSIGPIPE:
18514c27a833SDavid van Moolenbroek 		case SO_TIMESTAMP:
18524c27a833SDavid van Moolenbroek 			/*
18534c27a833SDavid van Moolenbroek 			 * Simple on-off options.  Changing them does not
18544c27a833SDavid van Moolenbroek 			 * involve the socket driver.
18554c27a833SDavid van Moolenbroek 			 */
18564c27a833SDavid van Moolenbroek 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
18574c27a833SDavid van Moolenbroek 			    len)) != OK)
18584c27a833SDavid van Moolenbroek 				return r;
18594c27a833SDavid van Moolenbroek 
18604c27a833SDavid van Moolenbroek 			if (val)
18614c27a833SDavid van Moolenbroek 				sock->sock_opt |= (unsigned int)name;
18624c27a833SDavid van Moolenbroek 			else
18634c27a833SDavid van Moolenbroek 				sock->sock_opt &= ~(unsigned int)name;
18644c27a833SDavid van Moolenbroek 
18654c27a833SDavid van Moolenbroek 			/*
18664c27a833SDavid van Moolenbroek 			 * In priciple these on-off options are maintained in
18674c27a833SDavid van Moolenbroek 			 * this library, but some socket drivers may need to
18684c27a833SDavid van Moolenbroek 			 * apply the options elsewhere, so we notify them that
18694c27a833SDavid van Moolenbroek 			 * something has changed.  Using the sop_setsockopt
18704c27a833SDavid van Moolenbroek 			 * callback would be inconvenient for this for two
18714c27a833SDavid van Moolenbroek 			 * reasons: multiple value copy-ins and default errors.
18724c27a833SDavid van Moolenbroek 			 */
18734c27a833SDavid van Moolenbroek 			if (sock->sock_ops->sop_setsockmask != NULL)
18744c27a833SDavid van Moolenbroek 				sock->sock_ops->sop_setsockmask(sock,
18754c27a833SDavid van Moolenbroek 				    sock->sock_opt);
18764c27a833SDavid van Moolenbroek 
18774c27a833SDavid van Moolenbroek 			/*
18784c27a833SDavid van Moolenbroek 			 * The inlining of OOB data may make new data available
18794c27a833SDavid van Moolenbroek 			 * through regular receive calls.  Thus, see if we can
18804c27a833SDavid van Moolenbroek 			 * wake up any suspended receive calls now.
18814c27a833SDavid van Moolenbroek 			 */
18824c27a833SDavid van Moolenbroek 			if (name == SO_OOBINLINE && val)
18834c27a833SDavid van Moolenbroek 				sockevent_raise(sock, SEV_RECV);
18844c27a833SDavid van Moolenbroek 
18854c27a833SDavid van Moolenbroek 			return OK;
18864c27a833SDavid van Moolenbroek 
18874c27a833SDavid van Moolenbroek 		case SO_LINGER:
18884c27a833SDavid van Moolenbroek 			/* The only on-off option with an associated value. */
18894c27a833SDavid van Moolenbroek 			if ((r = sockdriver_copyin_opt(data, &linger,
18904c27a833SDavid van Moolenbroek 			    sizeof(linger), len)) != OK)
18914c27a833SDavid van Moolenbroek 				return r;
18924c27a833SDavid van Moolenbroek 
18934c27a833SDavid van Moolenbroek 			if (linger.l_onoff) {
18944c27a833SDavid van Moolenbroek 				if (linger.l_linger < 0)
18954c27a833SDavid van Moolenbroek 					return EINVAL;
18964c27a833SDavid van Moolenbroek 				/* EDOM is the closest applicable error.. */
18974c27a833SDavid van Moolenbroek 				secs = (clock_t)linger.l_linger;
18984c27a833SDavid van Moolenbroek 				if (secs >= TMRDIFF_MAX / sys_hz())
18994c27a833SDavid van Moolenbroek 					return EDOM;
19004c27a833SDavid van Moolenbroek 
19014c27a833SDavid van Moolenbroek 				sock->sock_opt |= SO_LINGER;
19024c27a833SDavid van Moolenbroek 				sock->sock_linger = secs * sys_hz();
19034c27a833SDavid van Moolenbroek 			} else {
19044c27a833SDavid van Moolenbroek 				sock->sock_opt &= ~SO_LINGER;
19054c27a833SDavid van Moolenbroek 				sock->sock_linger = 0;
19064c27a833SDavid van Moolenbroek 			}
19074c27a833SDavid van Moolenbroek 
19084c27a833SDavid van Moolenbroek 			return OK;
19094c27a833SDavid van Moolenbroek 
19104c27a833SDavid van Moolenbroek 		case SO_SNDLOWAT:
19114c27a833SDavid van Moolenbroek 		case SO_RCVLOWAT:
19124c27a833SDavid van Moolenbroek 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
19134c27a833SDavid van Moolenbroek 			    len)) != OK)
19144c27a833SDavid van Moolenbroek 				return r;
19154c27a833SDavid van Moolenbroek 
19164c27a833SDavid van Moolenbroek 			if (val <= 0)
19174c27a833SDavid van Moolenbroek 				return EINVAL;
19184c27a833SDavid van Moolenbroek 
19194c27a833SDavid van Moolenbroek 			/*
19204c27a833SDavid van Moolenbroek 			 * Setting these values may allow suspended operations
19214c27a833SDavid van Moolenbroek 			 * (send, recv, select) to be resumed, so recheck.
19224c27a833SDavid van Moolenbroek 			 */
19234c27a833SDavid van Moolenbroek 			if (name == SO_SNDLOWAT) {
19244c27a833SDavid van Moolenbroek 				sock->sock_slowat = (size_t)val;
19254c27a833SDavid van Moolenbroek 
19264c27a833SDavid van Moolenbroek 				sockevent_raise(sock, SEV_SEND);
19274c27a833SDavid van Moolenbroek 			} else {
19284c27a833SDavid van Moolenbroek 				sock->sock_rlowat = (size_t)val;
19294c27a833SDavid van Moolenbroek 
19304c27a833SDavid van Moolenbroek 				sockevent_raise(sock, SEV_RECV);
19314c27a833SDavid van Moolenbroek 			}
19324c27a833SDavid van Moolenbroek 
19334c27a833SDavid van Moolenbroek 			return OK;
19344c27a833SDavid van Moolenbroek 
19354c27a833SDavid van Moolenbroek 		case SO_SNDTIMEO:
19364c27a833SDavid van Moolenbroek 		case SO_RCVTIMEO:
19374c27a833SDavid van Moolenbroek 			if ((r = sockdriver_copyin_opt(data, &tv, sizeof(tv),
19384c27a833SDavid van Moolenbroek 			    len)) != OK)
19394c27a833SDavid van Moolenbroek 				return r;
19404c27a833SDavid van Moolenbroek 
19414c27a833SDavid van Moolenbroek 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
19424c27a833SDavid van Moolenbroek 			    (unsigned long)tv.tv_usec >= US)
19434c27a833SDavid van Moolenbroek 				return EINVAL;
19444c27a833SDavid van Moolenbroek 			if (tv.tv_sec >= TMRDIFF_MAX / sys_hz())
19454c27a833SDavid van Moolenbroek 				return EDOM;
19464c27a833SDavid van Moolenbroek 
19474c27a833SDavid van Moolenbroek 			ticks = tv.tv_sec * sys_hz() +
19484c27a833SDavid van Moolenbroek 			    (tv.tv_usec * sys_hz() + US - 1) / US;
19494c27a833SDavid van Moolenbroek 
19504c27a833SDavid van Moolenbroek 			if (name == SO_SNDTIMEO)
19514c27a833SDavid van Moolenbroek 				sock->sock_stimeo = ticks;
19524c27a833SDavid van Moolenbroek 			else
19534c27a833SDavid van Moolenbroek 				sock->sock_rtimeo = ticks;
19544c27a833SDavid van Moolenbroek 
19554c27a833SDavid van Moolenbroek 			/*
19564c27a833SDavid van Moolenbroek 			 * The timeouts for any calls already in progress for
19574c27a833SDavid van Moolenbroek 			 * this socket are left as is.
19584c27a833SDavid van Moolenbroek 			 */
19594c27a833SDavid van Moolenbroek 			return OK;
19604c27a833SDavid van Moolenbroek 
19614c27a833SDavid van Moolenbroek 		case SO_ACCEPTCONN:
19624c27a833SDavid van Moolenbroek 		case SO_ERROR:
19634c27a833SDavid van Moolenbroek 		case SO_TYPE:
19644c27a833SDavid van Moolenbroek 			/* These options may be retrieved but not set. */
19654c27a833SDavid van Moolenbroek 			return ENOPROTOOPT;
19664c27a833SDavid van Moolenbroek 
19674c27a833SDavid van Moolenbroek 		default:
19684c27a833SDavid van Moolenbroek 			/*
19694c27a833SDavid van Moolenbroek 			 * The remaining options either cannot be handled in a
19704c27a833SDavid van Moolenbroek 			 * generic way, or are not recognized altogether.  Pass
19714c27a833SDavid van Moolenbroek 			 * them to the socket driver, which should handle what
19724c27a833SDavid van Moolenbroek 			 * it knows and reject the rest.
19734c27a833SDavid van Moolenbroek 			 */
19744c27a833SDavid van Moolenbroek 			break;
19754c27a833SDavid van Moolenbroek 		}
19764c27a833SDavid van Moolenbroek 	}
19774c27a833SDavid van Moolenbroek 
19784c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_setsockopt == NULL)
19794c27a833SDavid van Moolenbroek 		return ENOPROTOOPT;
19804c27a833SDavid van Moolenbroek 
19814c27a833SDavid van Moolenbroek 	/*
19824c27a833SDavid van Moolenbroek 	 * The socket driver must return ENOPROTOOPT for all options it does
19834c27a833SDavid van Moolenbroek 	 * not recognize.
19844c27a833SDavid van Moolenbroek 	 */
19854c27a833SDavid van Moolenbroek 	return sock->sock_ops->sop_setsockopt(sock, level, name, data, len);
19864c27a833SDavid van Moolenbroek }
19874c27a833SDavid van Moolenbroek 
19884c27a833SDavid van Moolenbroek /*
19894c27a833SDavid van Moolenbroek  * Retrieve socket options.
19904c27a833SDavid van Moolenbroek  */
19914c27a833SDavid van Moolenbroek static int
sockevent_getsockopt(sockid_t id,int level,int name,const struct sockdriver_data * __restrict data,socklen_t * __restrict len)19924c27a833SDavid van Moolenbroek sockevent_getsockopt(sockid_t id, int level, int name,
19934c27a833SDavid van Moolenbroek 	const struct sockdriver_data * __restrict data,
19944c27a833SDavid van Moolenbroek 	socklen_t * __restrict len)
19954c27a833SDavid van Moolenbroek {
19964c27a833SDavid van Moolenbroek 	struct sock *sock;
19974c27a833SDavid van Moolenbroek 	struct linger linger;
19984c27a833SDavid van Moolenbroek 	struct timeval tv;
19994c27a833SDavid van Moolenbroek 	clock_t ticks;
20004c27a833SDavid van Moolenbroek 	int val;
20014c27a833SDavid van Moolenbroek 
20024c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
20034c27a833SDavid van Moolenbroek 		return EINVAL;
20044c27a833SDavid van Moolenbroek 
20054c27a833SDavid van Moolenbroek 	if (level == SOL_SOCKET) {
20064c27a833SDavid van Moolenbroek 		/*
20074c27a833SDavid van Moolenbroek 		 * As with setting, handle a subset of the socket-level options
20084c27a833SDavid van Moolenbroek 		 * here.  The rest is to be taken care of by the socket driver.
20094c27a833SDavid van Moolenbroek 		 */
20104c27a833SDavid van Moolenbroek 		switch (name) {
20114c27a833SDavid van Moolenbroek 		case SO_DEBUG:
20124c27a833SDavid van Moolenbroek 		case SO_ACCEPTCONN:
20134c27a833SDavid van Moolenbroek 		case SO_REUSEADDR:
20144c27a833SDavid van Moolenbroek 		case SO_KEEPALIVE:
20154c27a833SDavid van Moolenbroek 		case SO_DONTROUTE:
20164c27a833SDavid van Moolenbroek 		case SO_BROADCAST:
20174c27a833SDavid van Moolenbroek 		case SO_OOBINLINE:
20184c27a833SDavid van Moolenbroek 		case SO_REUSEPORT:
20194c27a833SDavid van Moolenbroek 		case SO_NOSIGPIPE:
20204c27a833SDavid van Moolenbroek 		case SO_TIMESTAMP:
20214c27a833SDavid van Moolenbroek 			val = !!(sock->sock_opt & (unsigned int)name);
20224c27a833SDavid van Moolenbroek 
20234c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &val, sizeof(val),
20244c27a833SDavid van Moolenbroek 			    len);
20254c27a833SDavid van Moolenbroek 
20264c27a833SDavid van Moolenbroek 		case SO_LINGER:
20274c27a833SDavid van Moolenbroek 			linger.l_onoff = !!(sock->sock_opt & SO_LINGER);
20284c27a833SDavid van Moolenbroek 			linger.l_linger = sock->sock_linger / sys_hz();
20294c27a833SDavid van Moolenbroek 
20304c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &linger,
20314c27a833SDavid van Moolenbroek 			   sizeof(linger), len);
20324c27a833SDavid van Moolenbroek 
20334c27a833SDavid van Moolenbroek 		case SO_ERROR:
20344c27a833SDavid van Moolenbroek 			if ((val = -sock->sock_err) != OK)
20354c27a833SDavid van Moolenbroek 				sock->sock_err = OK;
20364c27a833SDavid van Moolenbroek 
20374c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &val, sizeof(val),
20384c27a833SDavid van Moolenbroek 			    len);
20394c27a833SDavid van Moolenbroek 
20404c27a833SDavid van Moolenbroek 		case SO_TYPE:
20414c27a833SDavid van Moolenbroek 			val = sock->sock_type;
20424c27a833SDavid van Moolenbroek 
20434c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &val, sizeof(val),
20444c27a833SDavid van Moolenbroek 			    len);
20454c27a833SDavid van Moolenbroek 
20464c27a833SDavid van Moolenbroek 		case SO_SNDLOWAT:
20474c27a833SDavid van Moolenbroek 			val = (int)sock->sock_slowat;
20484c27a833SDavid van Moolenbroek 
20494c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &val, sizeof(val),
20504c27a833SDavid van Moolenbroek 			    len);
20514c27a833SDavid van Moolenbroek 
20524c27a833SDavid van Moolenbroek 		case SO_RCVLOWAT:
20534c27a833SDavid van Moolenbroek 			val = (int)sock->sock_rlowat;
20544c27a833SDavid van Moolenbroek 
20554c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &val, sizeof(val),
20564c27a833SDavid van Moolenbroek 			    len);
20574c27a833SDavid van Moolenbroek 
20584c27a833SDavid van Moolenbroek 		case SO_SNDTIMEO:
20594c27a833SDavid van Moolenbroek 		case SO_RCVTIMEO:
20604c27a833SDavid van Moolenbroek 			if (name == SO_SNDTIMEO)
20614c27a833SDavid van Moolenbroek 				ticks = sock->sock_stimeo;
20624c27a833SDavid van Moolenbroek 			else
20634c27a833SDavid van Moolenbroek 				ticks = sock->sock_rtimeo;
20644c27a833SDavid van Moolenbroek 
20654c27a833SDavid van Moolenbroek 			tv.tv_sec = ticks / sys_hz();
20664c27a833SDavid van Moolenbroek 			tv.tv_usec = (ticks % sys_hz()) * US / sys_hz();
20674c27a833SDavid van Moolenbroek 
20684c27a833SDavid van Moolenbroek 			return sockdriver_copyout_opt(data, &tv, sizeof(tv),
20694c27a833SDavid van Moolenbroek 			    len);
20704c27a833SDavid van Moolenbroek 
20714c27a833SDavid van Moolenbroek 		default:
20724c27a833SDavid van Moolenbroek 			break;
20734c27a833SDavid van Moolenbroek 		}
20744c27a833SDavid van Moolenbroek 	}
20754c27a833SDavid van Moolenbroek 
20764c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_getsockopt == NULL)
20774c27a833SDavid van Moolenbroek 		return ENOPROTOOPT;
20784c27a833SDavid van Moolenbroek 
20794c27a833SDavid van Moolenbroek 	/*
20804c27a833SDavid van Moolenbroek 	 * The socket driver must return ENOPROTOOPT for all options it does
20814c27a833SDavid van Moolenbroek 	 * not recognize.
20824c27a833SDavid van Moolenbroek 	 */
20834c27a833SDavid van Moolenbroek 	return sock->sock_ops->sop_getsockopt(sock, level, name, data, len);
20844c27a833SDavid van Moolenbroek }
20854c27a833SDavid van Moolenbroek 
20864c27a833SDavid van Moolenbroek /*
20874c27a833SDavid van Moolenbroek  * Retrieve a socket's local address.
20884c27a833SDavid van Moolenbroek  */
20894c27a833SDavid van Moolenbroek static int
sockevent_getsockname(sockid_t id,struct sockaddr * __restrict addr,socklen_t * __restrict addr_len)20904c27a833SDavid van Moolenbroek sockevent_getsockname(sockid_t id, struct sockaddr * __restrict addr,
20914c27a833SDavid van Moolenbroek 	socklen_t * __restrict addr_len)
20924c27a833SDavid van Moolenbroek {
20934c27a833SDavid van Moolenbroek 	struct sock *sock;
20944c27a833SDavid van Moolenbroek 
20954c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
20964c27a833SDavid van Moolenbroek 		return EINVAL;
20974c27a833SDavid van Moolenbroek 
20984c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_getsockname == NULL)
20994c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
21004c27a833SDavid van Moolenbroek 
21014c27a833SDavid van Moolenbroek 	return sock->sock_ops->sop_getsockname(sock, addr, addr_len);
21024c27a833SDavid van Moolenbroek }
21034c27a833SDavid van Moolenbroek 
21044c27a833SDavid van Moolenbroek /*
21054c27a833SDavid van Moolenbroek  * Retrieve a socket's remote address.
21064c27a833SDavid van Moolenbroek  */
21074c27a833SDavid van Moolenbroek static int
sockevent_getpeername(sockid_t id,struct sockaddr * __restrict addr,socklen_t * __restrict addr_len)21084c27a833SDavid van Moolenbroek sockevent_getpeername(sockid_t id, struct sockaddr * __restrict addr,
21094c27a833SDavid van Moolenbroek 	socklen_t * __restrict addr_len)
21104c27a833SDavid van Moolenbroek {
21114c27a833SDavid van Moolenbroek 	struct sock *sock;
21124c27a833SDavid van Moolenbroek 
21134c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
21144c27a833SDavid van Moolenbroek 		return EINVAL;
21154c27a833SDavid van Moolenbroek 
21164c27a833SDavid van Moolenbroek 	/* Listening-mode sockets cannot possibly have a peer address. */
21174c27a833SDavid van Moolenbroek 	if (sock->sock_opt & SO_ACCEPTCONN)
21184c27a833SDavid van Moolenbroek 		return ENOTCONN;
21194c27a833SDavid van Moolenbroek 
21204c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_getpeername == NULL)
21214c27a833SDavid van Moolenbroek 		return EOPNOTSUPP;
21224c27a833SDavid van Moolenbroek 
21234c27a833SDavid van Moolenbroek 	return sock->sock_ops->sop_getpeername(sock, addr, addr_len);
21244c27a833SDavid van Moolenbroek }
21254c27a833SDavid van Moolenbroek 
21264c27a833SDavid van Moolenbroek /*
21274c27a833SDavid van Moolenbroek  * Mark the socket object as shut down for sending and/or receiving.  The flags
21284c27a833SDavid van Moolenbroek  * parameter may be a bitwise-OR'ed combination of SFL_SHUT_RD and SFL_SHUT_WR.
21294c27a833SDavid van Moolenbroek  * This function will wake up any suspended requests affected by this change,
21304c27a833SDavid van Moolenbroek  * but it will not invoke the sop_shutdown() callback function on the socket.
21314c27a833SDavid van Moolenbroek  * The function may in fact be called from sop_shutdown() before completion to
21324c27a833SDavid van Moolenbroek  * mark the socket as shut down as reflected by sockevent_is_shutdown().
21334c27a833SDavid van Moolenbroek  */
21344c27a833SDavid van Moolenbroek void
sockevent_set_shutdown(struct sock * sock,unsigned int flags)21354c27a833SDavid van Moolenbroek sockevent_set_shutdown(struct sock * sock, unsigned int flags)
21364c27a833SDavid van Moolenbroek {
21374c27a833SDavid van Moolenbroek 	unsigned int mask;
21384c27a833SDavid van Moolenbroek 
21394c27a833SDavid van Moolenbroek 	assert(sock->sock_ops != NULL);
21404c27a833SDavid van Moolenbroek 	assert(!(flags & ~(SFL_SHUT_RD | SFL_SHUT_WR)));
21414c27a833SDavid van Moolenbroek 
21424c27a833SDavid van Moolenbroek 	/* Look at the newly set flags only. */
21434c27a833SDavid van Moolenbroek 	flags &= ~(unsigned int)sock->sock_flags;
21444c27a833SDavid van Moolenbroek 
21454c27a833SDavid van Moolenbroek 	if (flags != 0) {
21464c27a833SDavid van Moolenbroek 		sock->sock_flags |= flags;
21474c27a833SDavid van Moolenbroek 
21484c27a833SDavid van Moolenbroek 		/*
21494c27a833SDavid van Moolenbroek 		 * Wake up any blocked calls that are affected by the shutdown.
21504c27a833SDavid van Moolenbroek 		 * Shutting down listening sockets causes ongoing accept calls
21514c27a833SDavid van Moolenbroek 		 * to be rechecked.
21524c27a833SDavid van Moolenbroek 		 */
21534c27a833SDavid van Moolenbroek 		mask = 0;
21544c27a833SDavid van Moolenbroek 		if (flags & SFL_SHUT_RD)
21554c27a833SDavid van Moolenbroek 			mask |= SEV_RECV;
21564c27a833SDavid van Moolenbroek 		if (flags & SFL_SHUT_WR)
21574c27a833SDavid van Moolenbroek 			mask |= SEV_SEND;
21584c27a833SDavid van Moolenbroek 		if (sock->sock_opt & SO_ACCEPTCONN)
21594c27a833SDavid van Moolenbroek 			mask |= SEV_ACCEPT;
21604c27a833SDavid van Moolenbroek 
21614c27a833SDavid van Moolenbroek 		assert(mask != 0);
21624c27a833SDavid van Moolenbroek 		sockevent_raise(sock, mask);
21634c27a833SDavid van Moolenbroek 	}
21644c27a833SDavid van Moolenbroek }
21654c27a833SDavid van Moolenbroek 
21664c27a833SDavid van Moolenbroek /*
21674c27a833SDavid van Moolenbroek  * Shut down socket send and receive operations.
21684c27a833SDavid van Moolenbroek  */
21694c27a833SDavid van Moolenbroek static int
sockevent_shutdown(sockid_t id,int how)21704c27a833SDavid van Moolenbroek sockevent_shutdown(sockid_t id, int how)
21714c27a833SDavid van Moolenbroek {
21724c27a833SDavid van Moolenbroek 	struct sock *sock;
21734c27a833SDavid van Moolenbroek 	unsigned int flags;
21744c27a833SDavid van Moolenbroek 	int r;
21754c27a833SDavid van Moolenbroek 
21764c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
21774c27a833SDavid van Moolenbroek 		return EINVAL;
21784c27a833SDavid van Moolenbroek 
21794c27a833SDavid van Moolenbroek 	/* Convert the request to a set of flags. */
21804c27a833SDavid van Moolenbroek 	flags = 0;
21814c27a833SDavid van Moolenbroek 	if (how == SHUT_RD || how == SHUT_RDWR)
21824c27a833SDavid van Moolenbroek 		flags |= SFL_SHUT_RD;
21834c27a833SDavid van Moolenbroek 	if (how == SHUT_WR || how == SHUT_RDWR)
21844c27a833SDavid van Moolenbroek 		flags |= SFL_SHUT_WR;
21854c27a833SDavid van Moolenbroek 
21864c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_shutdown != NULL)
21874c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_shutdown(sock, flags);
21884c27a833SDavid van Moolenbroek 	else
21894c27a833SDavid van Moolenbroek 		r = OK;
21904c27a833SDavid van Moolenbroek 
21914c27a833SDavid van Moolenbroek 	/* On success, update our internal state as well. */
21924c27a833SDavid van Moolenbroek 	if (r == OK)
21934c27a833SDavid van Moolenbroek 		sockevent_set_shutdown(sock, flags);
21944c27a833SDavid van Moolenbroek 
21954c27a833SDavid van Moolenbroek 	return r;
21964c27a833SDavid van Moolenbroek }
21974c27a833SDavid van Moolenbroek 
21984c27a833SDavid van Moolenbroek /*
21994c27a833SDavid van Moolenbroek  * Close a socket.
22004c27a833SDavid van Moolenbroek  */
22014c27a833SDavid van Moolenbroek static int
sockevent_close(sockid_t id,const struct sockdriver_call * call)22024c27a833SDavid van Moolenbroek sockevent_close(sockid_t id, const struct sockdriver_call * call)
22034c27a833SDavid van Moolenbroek {
22044c27a833SDavid van Moolenbroek 	struct sock *sock;
22054c27a833SDavid van Moolenbroek 	int r, force;
22064c27a833SDavid van Moolenbroek 
22074c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
22084c27a833SDavid van Moolenbroek 		return EINVAL;
22094c27a833SDavid van Moolenbroek 
22104c27a833SDavid van Moolenbroek 	assert(sock->sock_proc == NULL);
22114c27a833SDavid van Moolenbroek 	sock->sock_select.ss_endpt = NONE;
22124c27a833SDavid van Moolenbroek 
22134c27a833SDavid van Moolenbroek 	/*
22144c27a833SDavid van Moolenbroek 	 * There are several scenarios when it comes to closing sockets.  First
22154c27a833SDavid van Moolenbroek 	 * of all, we never actually force the socket driver to close a socket.
22164c27a833SDavid van Moolenbroek 	 * The driver may always suspend the close call and take as long as it
22174c27a833SDavid van Moolenbroek 	 * wants.  After a suspension, it signals its completion of the close
22184c27a833SDavid van Moolenbroek 	 * through the SEV_CLOSE socket event.
22194c27a833SDavid van Moolenbroek 	 *
22204c27a833SDavid van Moolenbroek 	 * With that said, we offer two levels of urgency regarding the close
22214c27a833SDavid van Moolenbroek 	 * request: regular and forced.  The former allows for a graceful
22224c27a833SDavid van Moolenbroek 	 * close; the latter urges the socket driver to close the socket as
22234c27a833SDavid van Moolenbroek 	 * soon as possible.  A socket that has been requested to be closed
22244c27a833SDavid van Moolenbroek 	 * gracefully can, as long as it is still open (i.e., no SEV_CLOSE was
22254c27a833SDavid van Moolenbroek 	 * fired yet), later be requested to be closed forcefully.  This is how
22264c27a833SDavid van Moolenbroek 	 * SO_LINGER with a nonzero timeout is implemented.  If SO_LINGER is
22274c27a833SDavid van Moolenbroek 	 * set with a zero timeout, the socket is force-closed immediately.
22284c27a833SDavid van Moolenbroek 	 * Finally, if SO_LINGER is not set, the socket will be closed normally
22294c27a833SDavid van Moolenbroek 	 * and never be forced--akin to SO_LINGER with an infinite timeout.
22304c27a833SDavid van Moolenbroek 	 *
22314c27a833SDavid van Moolenbroek 	 * The return value of the caller's close(2) may only ever be either
22324c27a833SDavid van Moolenbroek 	 * OK or EINPROGRESS, to ensure that the caller knows that the file
22334c27a833SDavid van Moolenbroek 	 * descriptor is freed up, as per Austin Group Defect #529.  In fact,
22344c27a833SDavid van Moolenbroek 	 * EINPROGRESS is to be returned only on signal interruption (i.e.,
22354c27a833SDavid van Moolenbroek 	 * cancel).  For that reason, this function only ever returns OK.
22364c27a833SDavid van Moolenbroek 	 */
22374c27a833SDavid van Moolenbroek 	force = ((sock->sock_opt & SO_LINGER) && sock->sock_linger == 0);
22384c27a833SDavid van Moolenbroek 
22394c27a833SDavid van Moolenbroek 	if (sock->sock_ops->sop_close != NULL)
22404c27a833SDavid van Moolenbroek 		r = sock->sock_ops->sop_close(sock, force);
22414c27a833SDavid van Moolenbroek 	else
22424c27a833SDavid van Moolenbroek 		r = OK;
22434c27a833SDavid van Moolenbroek 
22444c27a833SDavid van Moolenbroek 	assert(r == OK || r == SUSPEND);
22454c27a833SDavid van Moolenbroek 
22464c27a833SDavid van Moolenbroek 	if (r == SUSPEND) {
22474c27a833SDavid van Moolenbroek 		sock->sock_flags |= SFL_CLOSING;
22484c27a833SDavid van Moolenbroek 
22494c27a833SDavid van Moolenbroek 		/*
22504c27a833SDavid van Moolenbroek 		 * If we were requested to force-close the socket immediately,
22514c27a833SDavid van Moolenbroek 		 * but the socket driver needs more time anyway, then tell the
22524c27a833SDavid van Moolenbroek 		 * caller that the socket was closed right away.
22534c27a833SDavid van Moolenbroek 		 */
22544c27a833SDavid van Moolenbroek 		if (force)
22554c27a833SDavid van Moolenbroek 			return OK;
22564c27a833SDavid van Moolenbroek 
22574c27a833SDavid van Moolenbroek 		/*
22584c27a833SDavid van Moolenbroek 		 * If we are to force-close the socket only after a specific
22594c27a833SDavid van Moolenbroek 		 * linger timeout, set the timer for that now, even if the call
22604c27a833SDavid van Moolenbroek 		 * is non-blocking.  This also means that we cannot associate
22614c27a833SDavid van Moolenbroek 		 * the linger timeout with the close call.  Instead, we convert
22624c27a833SDavid van Moolenbroek 		 * the sock_linger value from a (relative) duration to an
22634c27a833SDavid van Moolenbroek 		 * (absolute) timeout time, and use the SFL_CLOSING flag (along
22644c27a833SDavid van Moolenbroek 		 * with SFL_TIMER) to tell the difference.  Since the socket is
22654c27a833SDavid van Moolenbroek 		 * otherwise unreachable from userland at this point, the
22664c27a833SDavid van Moolenbroek 		 * conversion is never visible in any way.
22674c27a833SDavid van Moolenbroek 		 *
22684c27a833SDavid van Moolenbroek 		 * The socket may already be in the timers list, so we must
22694c27a833SDavid van Moolenbroek 		 * always check the SO_LINGER flag before checking sock_linger.
22704c27a833SDavid van Moolenbroek 		 *
22714c27a833SDavid van Moolenbroek 		 * If SO_LINGER is not set, we must never suspend the call.
22724c27a833SDavid van Moolenbroek 		 */
22734c27a833SDavid van Moolenbroek 		if (sock->sock_opt & SO_LINGER) {
22744c27a833SDavid van Moolenbroek 			sock->sock_linger =
22754c27a833SDavid van Moolenbroek 			    socktimer_add(sock, sock->sock_linger);
22764c27a833SDavid van Moolenbroek 		} else
22774c27a833SDavid van Moolenbroek 			call = NULL;
22784c27a833SDavid van Moolenbroek 
22794c27a833SDavid van Moolenbroek 		/*
22804c27a833SDavid van Moolenbroek 		 * A non-blocking close is completed asynchronously.  The
22814c27a833SDavid van Moolenbroek 		 * caller is not told about this with EWOULDBLOCK as usual, for
22824c27a833SDavid van Moolenbroek 		 * the reasons mentioned above.
22834c27a833SDavid van Moolenbroek 		 */
22844c27a833SDavid van Moolenbroek 		if (call != NULL)
22854c27a833SDavid van Moolenbroek 			sockevent_suspend(sock, SEV_CLOSE, call, NONE);
22864c27a833SDavid van Moolenbroek 		else
22874c27a833SDavid van Moolenbroek 			r = OK;
22884c27a833SDavid van Moolenbroek 	} else if (r == OK)
22894c27a833SDavid van Moolenbroek 		sockevent_free(sock);
22904c27a833SDavid van Moolenbroek 
22914c27a833SDavid van Moolenbroek 	return r;
22924c27a833SDavid van Moolenbroek }
22934c27a833SDavid van Moolenbroek 
22944c27a833SDavid van Moolenbroek /*
22954c27a833SDavid van Moolenbroek  * Cancel a suspended send request.
22964c27a833SDavid van Moolenbroek  */
22974c27a833SDavid van Moolenbroek static void
sockevent_cancel_send(struct sock * sock,struct sockevent_proc * spr,int err)22984c27a833SDavid van Moolenbroek sockevent_cancel_send(struct sock * sock, struct sockevent_proc * spr, int err)
22994c27a833SDavid van Moolenbroek {
23004c27a833SDavid van Moolenbroek 	int r;
23014c27a833SDavid van Moolenbroek 
23024c27a833SDavid van Moolenbroek 	/*
23034c27a833SDavid van Moolenbroek 	 * If any regular or control data were sent, return the number of data
23044c27a833SDavid van Moolenbroek 	 * bytes sent--possibly zero.  Otherwise return the given error code.
23054c27a833SDavid van Moolenbroek 	 */
23064c27a833SDavid van Moolenbroek 	if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
23074c27a833SDavid van Moolenbroek 		r = (int)spr->spr_dataoff;
23084c27a833SDavid van Moolenbroek 	else
23094c27a833SDavid van Moolenbroek 		r = err;
23104c27a833SDavid van Moolenbroek 
23114c27a833SDavid van Moolenbroek 	sockdriver_reply_generic(&spr->spr_call, r);
23124c27a833SDavid van Moolenbroek 
23134c27a833SDavid van Moolenbroek 	/*
23144c27a833SDavid van Moolenbroek 	 * In extremely rare circumstances, one send may be queued behind
23154c27a833SDavid van Moolenbroek 	 * another send even though the former can actually be sent on the
23164c27a833SDavid van Moolenbroek 	 * socket right away.  For this reason, we retry sending when canceling
23174c27a833SDavid van Moolenbroek 	 * a send.  We need to do this only when the first send in the queue
23184c27a833SDavid van Moolenbroek 	 * was canceled, but multiple blocked sends on a single socket should
23194c27a833SDavid van Moolenbroek 	 * be rare anyway.
23204c27a833SDavid van Moolenbroek 	 */
23214c27a833SDavid van Moolenbroek 	sockevent_raise(sock, SEV_SEND);
23224c27a833SDavid van Moolenbroek }
23234c27a833SDavid van Moolenbroek 
23244c27a833SDavid van Moolenbroek /*
23254c27a833SDavid van Moolenbroek  * Cancel a suspended receive request.
23264c27a833SDavid van Moolenbroek  */
23274c27a833SDavid van Moolenbroek static void
sockevent_cancel_recv(struct sock * sock,struct sockevent_proc * spr,int err)23284c27a833SDavid van Moolenbroek sockevent_cancel_recv(struct sock * sock, struct sockevent_proc * spr, int err)
23294c27a833SDavid van Moolenbroek {
23304c27a833SDavid van Moolenbroek 	int r;
23314c27a833SDavid van Moolenbroek 
23324c27a833SDavid van Moolenbroek 	/*
23334c27a833SDavid van Moolenbroek 	 * If any regular or control data were received, return the number of
23344c27a833SDavid van Moolenbroek 	 * data bytes received--possibly zero.  Otherwise return the given
23354c27a833SDavid van Moolenbroek 	 * error code.
23364c27a833SDavid van Moolenbroek 	 */
23374c27a833SDavid van Moolenbroek 	if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
23384c27a833SDavid van Moolenbroek 		r = (int)spr->spr_dataoff;
23394c27a833SDavid van Moolenbroek 	else
23404c27a833SDavid van Moolenbroek 		r = err;
23414c27a833SDavid van Moolenbroek 
23424c27a833SDavid van Moolenbroek 	/*
23434c27a833SDavid van Moolenbroek 	 * Also return any flags set for the data received so far, e.g.
23444c27a833SDavid van Moolenbroek 	 * MSG_CTRUNC.  Do not return an address: receive calls on unconnected
23454c27a833SDavid van Moolenbroek 	 * sockets must never block after receiving some data--instead, they
23464c27a833SDavid van Moolenbroek 	 * are supposed to return MSG_TRUNC if not all data were copied out.
23474c27a833SDavid van Moolenbroek 	 */
23484c27a833SDavid van Moolenbroek 	sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff, NULL, 0,
23494c27a833SDavid van Moolenbroek 	    spr->spr_rflags);
23504c27a833SDavid van Moolenbroek 
23514c27a833SDavid van Moolenbroek 	/*
23524c27a833SDavid van Moolenbroek 	 * The same story as for sends (see above) applies to receives,
23534c27a833SDavid van Moolenbroek 	 * although this case should be even more rare in practice.
23544c27a833SDavid van Moolenbroek 	 */
23554c27a833SDavid van Moolenbroek 	sockevent_raise(sock, SEV_RECV);
23564c27a833SDavid van Moolenbroek }
23574c27a833SDavid van Moolenbroek 
23584c27a833SDavid van Moolenbroek /*
23594c27a833SDavid van Moolenbroek  * Cancel a previous request that may currently be suspended.  The cancel
23604c27a833SDavid van Moolenbroek  * operation itself does not have a reply.  Instead, if the given request was
23614c27a833SDavid van Moolenbroek  * found to be suspended, that request must be aborted and an appropriate reply
23624c27a833SDavid van Moolenbroek  * must be sent for the request.  If no matching request was found, no reply
23634c27a833SDavid van Moolenbroek  * must be sent at all.
23644c27a833SDavid van Moolenbroek  */
23654c27a833SDavid van Moolenbroek static void
sockevent_cancel(sockid_t id,const struct sockdriver_call * call)23664c27a833SDavid van Moolenbroek sockevent_cancel(sockid_t id, const struct sockdriver_call * call)
23674c27a833SDavid van Moolenbroek {
23684c27a833SDavid van Moolenbroek 	struct sockevent_proc *spr;
23694c27a833SDavid van Moolenbroek 	struct sock *sock;
23704c27a833SDavid van Moolenbroek 
23714c27a833SDavid van Moolenbroek 	/*
23724c27a833SDavid van Moolenbroek 	 * Due to asynchronous close(2) operations, not even the sock object
23734c27a833SDavid van Moolenbroek 	 * may be found.  If this (entirely legitimate) case, do not send any
23744c27a833SDavid van Moolenbroek 	 * reply.
23754c27a833SDavid van Moolenbroek 	 */
23764c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
23774c27a833SDavid van Moolenbroek 		return;
23784c27a833SDavid van Moolenbroek 
23794c27a833SDavid van Moolenbroek 	/*
23804c27a833SDavid van Moolenbroek 	 * The request may already have completed by the time we receive the
23814c27a833SDavid van Moolenbroek 	 * cancel request, in which case we can not find it.  In this (entirely
23824c27a833SDavid van Moolenbroek 	 * legitimate) case, do not send any reply.
23834c27a833SDavid van Moolenbroek 	 */
23844c27a833SDavid van Moolenbroek 	if ((spr = sockevent_unsuspend(sock, call)) == NULL)
23854c27a833SDavid van Moolenbroek 		return;
23864c27a833SDavid van Moolenbroek 
23874c27a833SDavid van Moolenbroek 	/*
23884c27a833SDavid van Moolenbroek 	 * We found the operation.  Cancel it according to its call type.
23894c27a833SDavid van Moolenbroek 	 * Then, once fully done with it, free the suspension data structure.
23904c27a833SDavid van Moolenbroek 	 *
23914c27a833SDavid van Moolenbroek 	 * Note that we have to use the call structure from the suspension data
23924c27a833SDavid van Moolenbroek 	 * structure rather than the given 'call' pointer: only the former
23934c27a833SDavid van Moolenbroek 	 * includes all the information necessary to resume the request!
23944c27a833SDavid van Moolenbroek 	 */
23954c27a833SDavid van Moolenbroek 	switch (spr->spr_event) {
23964c27a833SDavid van Moolenbroek 	case SEV_BIND:
23974c27a833SDavid van Moolenbroek 	case SEV_CONNECT:
23984c27a833SDavid van Moolenbroek 		assert(spr->spr_call.sc_endpt != NONE);
23994c27a833SDavid van Moolenbroek 
24004c27a833SDavid van Moolenbroek 		sockdriver_reply_generic(&spr->spr_call, EINTR);
24014c27a833SDavid van Moolenbroek 
24024c27a833SDavid van Moolenbroek 		break;
24034c27a833SDavid van Moolenbroek 
24044c27a833SDavid van Moolenbroek 	case SEV_ACCEPT:
24054c27a833SDavid van Moolenbroek 		sockdriver_reply_accept(&spr->spr_call, EINTR, NULL, 0);
24064c27a833SDavid van Moolenbroek 
24074c27a833SDavid van Moolenbroek 		break;
24084c27a833SDavid van Moolenbroek 
24094c27a833SDavid van Moolenbroek 	case SEV_SEND:
24104c27a833SDavid van Moolenbroek 		sockevent_cancel_send(sock, spr, EINTR);
24114c27a833SDavid van Moolenbroek 
24124c27a833SDavid van Moolenbroek 		break;
24134c27a833SDavid van Moolenbroek 
24144c27a833SDavid van Moolenbroek 	case SEV_RECV:
24154c27a833SDavid van Moolenbroek 		sockevent_cancel_recv(sock, spr, EINTR);
24164c27a833SDavid van Moolenbroek 
24174c27a833SDavid van Moolenbroek 		break;
24184c27a833SDavid van Moolenbroek 
24194c27a833SDavid van Moolenbroek 	case SEV_CLOSE:
24204c27a833SDavid van Moolenbroek 		/*
24214c27a833SDavid van Moolenbroek 		 * Return EINPROGRESS rather than EINTR, so that the user
24224c27a833SDavid van Moolenbroek 		 * process can tell from the close(2) result that the file
24234c27a833SDavid van Moolenbroek 		 * descriptor has in fact been closed.
24244c27a833SDavid van Moolenbroek 		 */
24254c27a833SDavid van Moolenbroek 		sockdriver_reply_generic(&spr->spr_call, EINPROGRESS);
24264c27a833SDavid van Moolenbroek 
24274c27a833SDavid van Moolenbroek 		/*
24284c27a833SDavid van Moolenbroek 		 * Do not free the sock object here: the socket driver will
24294c27a833SDavid van Moolenbroek 		 * complete the close in the background, and fire SEV_CLOSE
24304c27a833SDavid van Moolenbroek 		 * once it is done.  Only then is the sock object freed.
24314c27a833SDavid van Moolenbroek 		 */
24324c27a833SDavid van Moolenbroek 		break;
24334c27a833SDavid van Moolenbroek 
24344c27a833SDavid van Moolenbroek 	default:
24354c27a833SDavid van Moolenbroek 		panic("libsockevent: process suspended on unknown event 0x%x",
24364c27a833SDavid van Moolenbroek 		    spr->spr_event);
24374c27a833SDavid van Moolenbroek 	}
24384c27a833SDavid van Moolenbroek 
24394c27a833SDavid van Moolenbroek 	sockevent_proc_free(spr);
24404c27a833SDavid van Moolenbroek }
24414c27a833SDavid van Moolenbroek 
24424c27a833SDavid van Moolenbroek /*
24434c27a833SDavid van Moolenbroek  * Process a select request.
24444c27a833SDavid van Moolenbroek  */
24454c27a833SDavid van Moolenbroek static int
sockevent_select(sockid_t id,unsigned int ops,const struct sockdriver_select * sel)24464c27a833SDavid van Moolenbroek sockevent_select(sockid_t id, unsigned int ops,
24474c27a833SDavid van Moolenbroek 	const struct sockdriver_select * sel)
24484c27a833SDavid van Moolenbroek {
24494c27a833SDavid van Moolenbroek 	struct sock *sock;
24504c27a833SDavid van Moolenbroek 	unsigned int r, notify;
24514c27a833SDavid van Moolenbroek 
24524c27a833SDavid van Moolenbroek 	if ((sock = sockhash_get(id)) == NULL)
24534c27a833SDavid van Moolenbroek 		return EINVAL;
24544c27a833SDavid van Moolenbroek 
24554c27a833SDavid van Moolenbroek 	notify = (ops & SDEV_NOTIFY);
24564c27a833SDavid van Moolenbroek 	ops &= (SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR);
24574c27a833SDavid van Moolenbroek 
24584c27a833SDavid van Moolenbroek 	/*
24594c27a833SDavid van Moolenbroek 	 * See if any of the requested select operations can be satisfied
24604c27a833SDavid van Moolenbroek 	 * immediately.
24614c27a833SDavid van Moolenbroek 	 */
24624c27a833SDavid van Moolenbroek 	r = sockevent_test_select(sock, ops);
24634c27a833SDavid van Moolenbroek 
24644c27a833SDavid van Moolenbroek 	/*
24654c27a833SDavid van Moolenbroek 	 * If select operations were pending, the new results must not indicate
24664c27a833SDavid van Moolenbroek 	 * that any of those were satisfied, as that would indicate an internal
24674c27a833SDavid van Moolenbroek 	 * logic error: the socket driver is supposed to update its state
24684c27a833SDavid van Moolenbroek 	 * proactively, and thus, discovering that things have changed here is
24694c27a833SDavid van Moolenbroek 	 * not something that should ever happen.
24704c27a833SDavid van Moolenbroek 	 */
24714c27a833SDavid van Moolenbroek 	assert(!(sock->sock_selops & r));
24724c27a833SDavid van Moolenbroek 
24734c27a833SDavid van Moolenbroek 	/*
24744c27a833SDavid van Moolenbroek 	 * If any select operations are not satisfied immediately, and we are
24754c27a833SDavid van Moolenbroek 	 * asked to notify the caller when they are satisfied later, save them
24764c27a833SDavid van Moolenbroek 	 * for later retesting.
24774c27a833SDavid van Moolenbroek 	 */
24784c27a833SDavid van Moolenbroek 	ops &= ~r;
24794c27a833SDavid van Moolenbroek 
24804c27a833SDavid van Moolenbroek 	if (notify && ops != 0) {
24814c27a833SDavid van Moolenbroek 		/*
24824c27a833SDavid van Moolenbroek 		 * For now, we support only one caller when it comes to select
24834c27a833SDavid van Moolenbroek 		 * queries: VFS.  If we want to support a networked file system
24844c27a833SDavid van Moolenbroek 		 * (or so) directly calling select as well, this library will
24854c27a833SDavid van Moolenbroek 		 * have to be extended accordingly (should not be too hard).
24864c27a833SDavid van Moolenbroek 		 */
24874c27a833SDavid van Moolenbroek 		if (sock->sock_select.ss_endpt != NONE) {
24884c27a833SDavid van Moolenbroek 			if (sock->sock_select.ss_endpt != sel->ss_endpt) {
24894c27a833SDavid van Moolenbroek 				printf("libsockevent: no support for multiple "
24904c27a833SDavid van Moolenbroek 				    "select callers yet\n");
24914c27a833SDavid van Moolenbroek 
24924c27a833SDavid van Moolenbroek 				return EIO;
24934c27a833SDavid van Moolenbroek 			}
24944c27a833SDavid van Moolenbroek 
24954c27a833SDavid van Moolenbroek 			/*
24964c27a833SDavid van Moolenbroek 			 * If a select query was already pending for this
24974c27a833SDavid van Moolenbroek 			 * caller, we must simply merge in the new operations.
24984c27a833SDavid van Moolenbroek 			 */
24994c27a833SDavid van Moolenbroek 			sock->sock_selops |= ops;
25004c27a833SDavid van Moolenbroek 		} else {
25014c27a833SDavid van Moolenbroek 			assert(sel->ss_endpt != NONE);
25024c27a833SDavid van Moolenbroek 
25034c27a833SDavid van Moolenbroek 			sock->sock_select = *sel;
25044c27a833SDavid van Moolenbroek 			sock->sock_selops = ops;
25054c27a833SDavid van Moolenbroek 		}
25064c27a833SDavid van Moolenbroek 	}
25074c27a833SDavid van Moolenbroek 
25084c27a833SDavid van Moolenbroek 	return r;
25094c27a833SDavid van Moolenbroek }
25104c27a833SDavid van Moolenbroek 
25114c27a833SDavid van Moolenbroek /*
25124c27a833SDavid van Moolenbroek  * An alarm has triggered.  Expire any timers.  Socket drivers that do not pass
25134c27a833SDavid van Moolenbroek  * clock notification messages to libsockevent must call expire_timers(3)
25144c27a833SDavid van Moolenbroek  * themselves instead.
25154c27a833SDavid van Moolenbroek  */
25164c27a833SDavid van Moolenbroek static void
sockevent_alarm(clock_t now)25174c27a833SDavid van Moolenbroek sockevent_alarm(clock_t now)
25184c27a833SDavid van Moolenbroek {
25194c27a833SDavid van Moolenbroek 
25204c27a833SDavid van Moolenbroek 	expire_timers(now);
25214c27a833SDavid van Moolenbroek }
25224c27a833SDavid van Moolenbroek 
25234c27a833SDavid van Moolenbroek static const struct sockdriver sockevent_tab = {
25244c27a833SDavid van Moolenbroek 	.sdr_socket		= sockevent_socket,
25254c27a833SDavid van Moolenbroek 	.sdr_socketpair		= sockevent_socketpair,
25264c27a833SDavid van Moolenbroek 	.sdr_bind		= sockevent_bind,
25274c27a833SDavid van Moolenbroek 	.sdr_connect		= sockevent_connect,
25284c27a833SDavid van Moolenbroek 	.sdr_listen		= sockevent_listen,
25294c27a833SDavid van Moolenbroek 	.sdr_accept		= sockevent_accept,
25304c27a833SDavid van Moolenbroek 	.sdr_send		= sockevent_send,
25314c27a833SDavid van Moolenbroek 	.sdr_recv		= sockevent_recv,
25324c27a833SDavid van Moolenbroek 	.sdr_ioctl		= sockevent_ioctl,
25334c27a833SDavid van Moolenbroek 	.sdr_setsockopt		= sockevent_setsockopt,
25344c27a833SDavid van Moolenbroek 	.sdr_getsockopt		= sockevent_getsockopt,
25354c27a833SDavid van Moolenbroek 	.sdr_getsockname	= sockevent_getsockname,
25364c27a833SDavid van Moolenbroek 	.sdr_getpeername	= sockevent_getpeername,
25374c27a833SDavid van Moolenbroek 	.sdr_shutdown		= sockevent_shutdown,
25384c27a833SDavid van Moolenbroek 	.sdr_close		= sockevent_close,
25394c27a833SDavid van Moolenbroek 	.sdr_cancel		= sockevent_cancel,
25404c27a833SDavid van Moolenbroek 	.sdr_select		= sockevent_select,
25414c27a833SDavid van Moolenbroek 	.sdr_alarm		= sockevent_alarm
25424c27a833SDavid van Moolenbroek };
25434c27a833SDavid van Moolenbroek 
25444c27a833SDavid van Moolenbroek /*
25454c27a833SDavid van Moolenbroek  * Initialize the socket event library.
25464c27a833SDavid van Moolenbroek  */
25474c27a833SDavid van Moolenbroek void
sockevent_init(sockevent_socket_cb_t socket_cb)25484c27a833SDavid van Moolenbroek sockevent_init(sockevent_socket_cb_t socket_cb)
25494c27a833SDavid van Moolenbroek {
25504c27a833SDavid van Moolenbroek 
25514c27a833SDavid van Moolenbroek 	sockhash_init();
25524c27a833SDavid van Moolenbroek 
25534c27a833SDavid van Moolenbroek 	socktimer_init();
25544c27a833SDavid van Moolenbroek 
25554c27a833SDavid van Moolenbroek 	sockevent_proc_init();
25564c27a833SDavid van Moolenbroek 
25574c27a833SDavid van Moolenbroek 	SIMPLEQ_INIT(&sockevent_pending);
25584c27a833SDavid van Moolenbroek 
25594c27a833SDavid van Moolenbroek 	assert(socket_cb != NULL);
25604c27a833SDavid van Moolenbroek 	sockevent_socket_cb = socket_cb;
25614c27a833SDavid van Moolenbroek 
25624c27a833SDavid van Moolenbroek 	/* Announce we are up. */
25634c27a833SDavid van Moolenbroek 	sockdriver_announce();
25644c27a833SDavid van Moolenbroek 
25654c27a833SDavid van Moolenbroek 	sockevent_working = FALSE;
25664c27a833SDavid van Moolenbroek }
25674c27a833SDavid van Moolenbroek 
25684c27a833SDavid van Moolenbroek /*
25694c27a833SDavid van Moolenbroek  * Process a socket driver request message.
25704c27a833SDavid van Moolenbroek  */
25714c27a833SDavid van Moolenbroek void
sockevent_process(const message * m_ptr,int ipc_status)25724c27a833SDavid van Moolenbroek sockevent_process(const message * m_ptr, int ipc_status)
25734c27a833SDavid van Moolenbroek {
25744c27a833SDavid van Moolenbroek 
25754c27a833SDavid van Moolenbroek 	/* Block events until after we have processed the request. */
25764c27a833SDavid van Moolenbroek 	assert(!sockevent_working);
25774c27a833SDavid van Moolenbroek 	sockevent_working = TRUE;
25784c27a833SDavid van Moolenbroek 
25794c27a833SDavid van Moolenbroek 	/* Actually process the request. */
25804c27a833SDavid van Moolenbroek 	sockdriver_process(&sockevent_tab, m_ptr, ipc_status);
25814c27a833SDavid van Moolenbroek 
25824c27a833SDavid van Moolenbroek 	/*
25834c27a833SDavid van Moolenbroek 	 * If any events were fired while processing the request, they will
25844c27a833SDavid van Moolenbroek 	 * have been queued for later.  Go through them now.
25854c27a833SDavid van Moolenbroek 	 */
25864c27a833SDavid van Moolenbroek 	if (sockevent_has_events())
25874c27a833SDavid van Moolenbroek 		sockevent_pump();
25884c27a833SDavid van Moolenbroek 
25894c27a833SDavid van Moolenbroek 	sockevent_working = FALSE;
25904c27a833SDavid van Moolenbroek }
2591