127852ebeSDavid van Moolenbroek /* UNIX Domain Sockets - io.c - sending and receiving */
227852ebeSDavid van Moolenbroek
327852ebeSDavid van Moolenbroek #include "uds.h"
427852ebeSDavid van Moolenbroek #include <sys/mman.h>
527852ebeSDavid van Moolenbroek
627852ebeSDavid van Moolenbroek /*
727852ebeSDavid van Moolenbroek * Our UDS sockets do not have a send buffer. They only have a receive buffer.
827852ebeSDavid van Moolenbroek * This receive buffer, when not empty, is split up in segments. Each segment
927852ebeSDavid van Moolenbroek * may contain regular data, ancillary data, both, or (for SOCK_SEQPACKET and
1027852ebeSDavid van Moolenbroek * (SOCK_DGRAM) neither. There are two types of ancillary data: in-flight file
1127852ebeSDavid van Moolenbroek * descriptors and sender credentials. In addition, for SOCK_DGRAM sockets,
1227852ebeSDavid van Moolenbroek * the segment may contain the sender's socket path (if the sender's socket is
1379a488aaSDavid van Moolenbroek * bound). Each segment has a header, containing the full segment size, the
1479a488aaSDavid van Moolenbroek * size of the actual data in the segment (if any), and a flags field that
1527852ebeSDavid van Moolenbroek * states which ancillary are associated with the segment (if any). For
1627852ebeSDavid van Moolenbroek * SOCK_STREAM type sockets, new data may be merged into a previous segment,
1727852ebeSDavid van Moolenbroek * but only if it has no ancillary data. For the other two socket types, each
1827852ebeSDavid van Moolenbroek * packet has its own header. The resulting behavior should be in line with
1927852ebeSDavid van Moolenbroek * the POSIX "Socket Receive Queue" specification.
2027852ebeSDavid van Moolenbroek *
2127852ebeSDavid van Moolenbroek * More specifically, each segment consists of the following parts:
2227852ebeSDavid van Moolenbroek * - always a five-byte header, containing a two-byte segment length (including
2327852ebeSDavid van Moolenbroek * the header, so always non-zero), a two-byte regular data length (zero or
2427852ebeSDavid van Moolenbroek * more), and a one-byte flags field which is a bitwise combination of
2527852ebeSDavid van Moolenbroek * UDS_HAS_{FD,CRED,PATH} flags;
2627852ebeSDavid van Moolenbroek * - next, if UDS_HAS_CRED is set in the segment header: a sockcred structure;
2727852ebeSDavid van Moolenbroek * since this structure is variable-size, the structure is prepended by a
2827852ebeSDavid van Moolenbroek * single byte that contains the length of the structure (excluding the byte
2927852ebeSDavid van Moolenbroek * itself, thus ranging from sizeof(struct sockcred) to UDS_MAXCREDLEN);
3027852ebeSDavid van Moolenbroek * - next, if UDS_HAS_PATH is set in the segment header:
3127852ebeSDavid van Moolenbroek * - next, if the data length is non-zero, the actual regular data.
3227852ebeSDavid van Moolenbroek * If the segment is not the last in the receive buffer, it is followed by the
3327852ebeSDavid van Moolenbroek * next segment immediately afterward. There is no alignment.
3427852ebeSDavid van Moolenbroek *
3527852ebeSDavid van Moolenbroek * It is the sender's responsibility to merge new data into the last segment
3627852ebeSDavid van Moolenbroek * whenever possible, so that the receiver side never needs to consider more
3727852ebeSDavid van Moolenbroek * than one segment at once. In order to allow such merging, each receive
3827852ebeSDavid van Moolenbroek * buffer has not only a tail and in-use length (pointing to the head when
3927852ebeSDavid van Moolenbroek * combined) but also an offset from the tail to the last header, if any. Note
4027852ebeSDavid van Moolenbroek * that the receiver may over time still look at multiple segments for a single
4127852ebeSDavid van Moolenbroek * request: this happens when a MSG_WAITALL request empties the buffer and then
4227852ebeSDavid van Moolenbroek * blocks - the next piece of arriving data can then obviously not be merged.
4327852ebeSDavid van Moolenbroek *
4427852ebeSDavid van Moolenbroek * If a segment has the UDS_HAS_FD flag set, then one or more in-flight file
4527852ebeSDavid van Moolenbroek * descriptors are associated with the segment. These are stored in a separate
4627852ebeSDavid van Moolenbroek * data structure, mainly to simplify cleaning up when the socket is shut down
4727852ebeSDavid van Moolenbroek * for reading or closed. That structure also contains the number of file
4827852ebeSDavid van Moolenbroek * descriptors associated with the current segment, so this is not stored in
4927852ebeSDavid van Moolenbroek * the segment itself. As mentioned later, this may be changed in the future.
5027852ebeSDavid van Moolenbroek *
5127852ebeSDavid van Moolenbroek * On the sender side, there is a trade-off between fully utilizing the receive
5227852ebeSDavid van Moolenbroek * buffer, and not repeatedly performing expensive actions for the same call:
5327852ebeSDavid van Moolenbroek * it may be costly to determine exactly how many in-flight file descriptors
5427852ebeSDavid van Moolenbroek * there will be (if any) and/or how much space is needed to store credentials.
5527852ebeSDavid van Moolenbroek * We currently use the policy that we rather block/reject a send request that
5627852ebeSDavid van Moolenbroek * may (just) have fit in the remaining part of the receive buffer, than obtain
5727852ebeSDavid van Moolenbroek * the same information multiple times or keep state between callbacks. In
5827852ebeSDavid van Moolenbroek * practice this is not expected to make a difference, especially since
5927852ebeSDavid van Moolenbroek * transfer of ancillary data should be rare anyway.
6027852ebeSDavid van Moolenbroek */
6127852ebeSDavid van Moolenbroek /*
6227852ebeSDavid van Moolenbroek * The current layout of the segment header is as follows.
6327852ebeSDavid van Moolenbroek *
6427852ebeSDavid van Moolenbroek * The first byte contains the upper eight bits of the total segment length.
6527852ebeSDavid van Moolenbroek * The second byte contains the lower eight bits of the total segment length.
6627852ebeSDavid van Moolenbroek * The third byte contains the upper eight bits of the data length.
6727852ebeSDavid van Moolenbroek * The fourth byte contains the lower eight bits of the data length.
6827852ebeSDavid van Moolenbroek * The fifth byte is a bitmask for ancillary data associated with the segment.
6927852ebeSDavid van Moolenbroek */
7027852ebeSDavid van Moolenbroek #define UDS_HDRLEN 5
7127852ebeSDavid van Moolenbroek
7227852ebeSDavid van Moolenbroek #define UDS_HAS_FDS 0x01 /* segment has in-flight file descriptors */
7327852ebeSDavid van Moolenbroek #define UDS_HAS_CRED 0x02 /* segment has sender credentials */
7427852ebeSDavid van Moolenbroek #define UDS_HAS_PATH 0x04 /* segment has source socket path */
7527852ebeSDavid van Moolenbroek
7627852ebeSDavid van Moolenbroek #define UDS_MAXCREDLEN SOCKCREDSIZE(NGROUPS_MAX)
7727852ebeSDavid van Moolenbroek
7827852ebeSDavid van Moolenbroek #define uds_get_head(uds) \
7927852ebeSDavid van Moolenbroek ((size_t)((uds)->uds_tail + (uds)->uds_len) % UDS_BUF)
8027852ebeSDavid van Moolenbroek #define uds_get_last(uds) \
8127852ebeSDavid van Moolenbroek ((size_t)((uds)->uds_tail + (uds)->uds_last) % UDS_BUF)
8227852ebeSDavid van Moolenbroek #define uds_advance(pos,add) (((pos) + (add)) % UDS_BUF)
8327852ebeSDavid van Moolenbroek
8427852ebeSDavid van Moolenbroek /*
8527852ebeSDavid van Moolenbroek * All in-flight file descriptors are (co-)owned by the UDS driver itself, as
8627852ebeSDavid van Moolenbroek * local open file descriptors. Like any other process, the UDS driver can not
8727852ebeSDavid van Moolenbroek * have more than OPEN_MAX open file descriptors at any time. Thus, this is
8827852ebeSDavid van Moolenbroek * also the inherent maximum number of in-flight file descriptors. Therefore,
8927852ebeSDavid van Moolenbroek * we maintain a single pool of in-flight FD structures, and we associate these
9027852ebeSDavid van Moolenbroek * structures with sockets as needed.
9127852ebeSDavid van Moolenbroek */
9227852ebeSDavid van Moolenbroek static struct uds_fd uds_fds[OPEN_MAX];
9327852ebeSDavid van Moolenbroek static SIMPLEQ_HEAD(uds_freefds, uds_fd) uds_freefds;
9427852ebeSDavid van Moolenbroek
9527852ebeSDavid van Moolenbroek static char uds_ctlbuf[UDS_CTL_MAX];
9627852ebeSDavid van Moolenbroek static int uds_ctlfds[UDS_CTL_MAX / sizeof(int)];
9727852ebeSDavid van Moolenbroek
9827852ebeSDavid van Moolenbroek /*
9927852ebeSDavid van Moolenbroek * Initialize the input/output part of the UDS service.
10027852ebeSDavid van Moolenbroek */
10127852ebeSDavid van Moolenbroek void
uds_io_init(void)10227852ebeSDavid van Moolenbroek uds_io_init(void)
10327852ebeSDavid van Moolenbroek {
10427852ebeSDavid van Moolenbroek unsigned int slot;
10527852ebeSDavid van Moolenbroek
10627852ebeSDavid van Moolenbroek SIMPLEQ_INIT(&uds_freefds);
10727852ebeSDavid van Moolenbroek
10827852ebeSDavid van Moolenbroek for (slot = 0; slot < __arraycount(uds_fds); slot++)
10927852ebeSDavid van Moolenbroek SIMPLEQ_INSERT_TAIL(&uds_freefds, &uds_fds[slot], ufd_next);
11027852ebeSDavid van Moolenbroek }
11127852ebeSDavid van Moolenbroek
11227852ebeSDavid van Moolenbroek /*
11327852ebeSDavid van Moolenbroek * Set up all input/output state for the given socket, which has just been
11427852ebeSDavid van Moolenbroek * allocated. As part of this, allocate memory for the receive buffer of the
11527852ebeSDavid van Moolenbroek * socket. Return OK or a negative error code.
11627852ebeSDavid van Moolenbroek */
11727852ebeSDavid van Moolenbroek int
uds_io_setup(struct udssock * uds)11827852ebeSDavid van Moolenbroek uds_io_setup(struct udssock * uds)
11927852ebeSDavid van Moolenbroek {
12027852ebeSDavid van Moolenbroek
12127852ebeSDavid van Moolenbroek /* TODO: decide if we should preallocate the memory. */
12227852ebeSDavid van Moolenbroek if ((uds->uds_buf = mmap(NULL, UDS_BUF, PROT_READ | PROT_WRITE,
12327852ebeSDavid van Moolenbroek MAP_ANON | MAP_PRIVATE, -1, 0)) == MAP_FAILED)
12427852ebeSDavid van Moolenbroek return ENOMEM;
12527852ebeSDavid van Moolenbroek
12627852ebeSDavid van Moolenbroek uds->uds_tail = 0;
12727852ebeSDavid van Moolenbroek uds->uds_len = 0;
12827852ebeSDavid van Moolenbroek uds->uds_last = 0;
12927852ebeSDavid van Moolenbroek
13027852ebeSDavid van Moolenbroek SIMPLEQ_INIT(&uds->uds_fds);
13127852ebeSDavid van Moolenbroek
13227852ebeSDavid van Moolenbroek return OK;
13327852ebeSDavid van Moolenbroek }
13427852ebeSDavid van Moolenbroek
13527852ebeSDavid van Moolenbroek /*
13627852ebeSDavid van Moolenbroek * Clean up the input/output state for the given socket, which is about to be
13727852ebeSDavid van Moolenbroek * freed. As part of this, deallocate memory for the receive buffer and close
13827852ebeSDavid van Moolenbroek * any file descriptors still in flight on the socket.
13927852ebeSDavid van Moolenbroek */
14027852ebeSDavid van Moolenbroek void
uds_io_cleanup(struct udssock * uds)14127852ebeSDavid van Moolenbroek uds_io_cleanup(struct udssock * uds)
14227852ebeSDavid van Moolenbroek {
14327852ebeSDavid van Moolenbroek
14427852ebeSDavid van Moolenbroek /* Close any in-flight file descriptors. */
14527852ebeSDavid van Moolenbroek uds_io_reset(uds);
14627852ebeSDavid van Moolenbroek
14727852ebeSDavid van Moolenbroek /* Free the receive buffer memory. */
14827852ebeSDavid van Moolenbroek if (munmap(uds->uds_buf, UDS_BUF) != 0)
14927852ebeSDavid van Moolenbroek panic("UDS: munmap failed: %d", errno);
15027852ebeSDavid van Moolenbroek }
15127852ebeSDavid van Moolenbroek
15227852ebeSDavid van Moolenbroek /*
15327852ebeSDavid van Moolenbroek * The socket is being closed or shut down for reading. If there are still any
15427852ebeSDavid van Moolenbroek * in-flight file descriptors, theey will never be received anymore, so close
15527852ebeSDavid van Moolenbroek * them now.
15627852ebeSDavid van Moolenbroek */
15727852ebeSDavid van Moolenbroek void
uds_io_reset(struct udssock * uds)15827852ebeSDavid van Moolenbroek uds_io_reset(struct udssock * uds)
15927852ebeSDavid van Moolenbroek {
16027852ebeSDavid van Moolenbroek struct uds_fd *ufd;
16127852ebeSDavid van Moolenbroek
16227852ebeSDavid van Moolenbroek /*
16327852ebeSDavid van Moolenbroek * The UDS service may have the last and only reference to any of these
16427852ebeSDavid van Moolenbroek * file descriptors here. For that reason, we currently disallow
16527852ebeSDavid van Moolenbroek * transfer of UDS file descriptors, because the close(2) here could
16627852ebeSDavid van Moolenbroek * block on a socket close operation back to us, leading to a deadlock.
16727852ebeSDavid van Moolenbroek * Also, we use a non-blocking variant of close(2), to prevent that we
16827852ebeSDavid van Moolenbroek * end up hanging on sockets with SO_LINGER turned on.
16927852ebeSDavid van Moolenbroek */
17027852ebeSDavid van Moolenbroek SIMPLEQ_FOREACH(ufd, &uds->uds_fds, ufd_next) {
17127852ebeSDavid van Moolenbroek dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd));
17227852ebeSDavid van Moolenbroek
17327852ebeSDavid van Moolenbroek closenb(ufd->ufd_fd);
17427852ebeSDavid van Moolenbroek }
17527852ebeSDavid van Moolenbroek
17627852ebeSDavid van Moolenbroek SIMPLEQ_CONCAT(&uds_freefds, &uds->uds_fds);
17727852ebeSDavid van Moolenbroek
17827852ebeSDavid van Moolenbroek /*
17927852ebeSDavid van Moolenbroek * If this reset happens as part of a shutdown, it might be done
18027852ebeSDavid van Moolenbroek * again on close, so ensure that it will find a clean state. The
18127852ebeSDavid van Moolenbroek * receive buffer should never be looked at again either way, but reset
18227852ebeSDavid van Moolenbroek * it too just to be sure.
18327852ebeSDavid van Moolenbroek */
18427852ebeSDavid van Moolenbroek uds->uds_tail = 0;
18527852ebeSDavid van Moolenbroek uds->uds_len = 0;
18627852ebeSDavid van Moolenbroek uds->uds_last = 0;
18727852ebeSDavid van Moolenbroek
18827852ebeSDavid van Moolenbroek SIMPLEQ_INIT(&uds->uds_fds);
18927852ebeSDavid van Moolenbroek }
19027852ebeSDavid van Moolenbroek
19127852ebeSDavid van Moolenbroek /*
19227852ebeSDavid van Moolenbroek * Return the maximum usable part of the receive buffer, in bytes. The return
19327852ebeSDavid van Moolenbroek * value is used for the SO_SNDBUF and SO_RCVBUF socket options.
19427852ebeSDavid van Moolenbroek */
19527852ebeSDavid van Moolenbroek size_t
uds_io_buflen(void)19627852ebeSDavid van Moolenbroek uds_io_buflen(void)
19727852ebeSDavid van Moolenbroek {
19827852ebeSDavid van Moolenbroek
19927852ebeSDavid van Moolenbroek /*
20027852ebeSDavid van Moolenbroek * TODO: it would be nicer if at least for SOCK_STREAM-type sockets, we
20127852ebeSDavid van Moolenbroek * could use the full receive buffer for data. This would require that
20227852ebeSDavid van Moolenbroek * we store up to one header in the socket object rather than in the
20327852ebeSDavid van Moolenbroek * receive buffer.
20427852ebeSDavid van Moolenbroek */
20527852ebeSDavid van Moolenbroek return UDS_BUF - UDS_HDRLEN;
20627852ebeSDavid van Moolenbroek }
20727852ebeSDavid van Moolenbroek
20827852ebeSDavid van Moolenbroek /*
20927852ebeSDavid van Moolenbroek * Fetch 'len' bytes starting from absolute position 'pos' into the receive
21027852ebeSDavid van Moolenbroek * buffer of socket 'uds', and copy them into the buffer pointed to by 'ptr'.
21127852ebeSDavid van Moolenbroek * Return the absolute position of the first byte after the fetched data in the
21227852ebeSDavid van Moolenbroek * receive buffer.
21327852ebeSDavid van Moolenbroek */
21427852ebeSDavid van Moolenbroek static size_t
uds_fetch(struct udssock * uds,size_t off,void * ptr,size_t len)21527852ebeSDavid van Moolenbroek uds_fetch(struct udssock * uds, size_t off, void * ptr, size_t len)
21627852ebeSDavid van Moolenbroek {
21727852ebeSDavid van Moolenbroek size_t left;
21827852ebeSDavid van Moolenbroek
21927852ebeSDavid van Moolenbroek assert(off < UDS_BUF);
22027852ebeSDavid van Moolenbroek
22127852ebeSDavid van Moolenbroek left = UDS_BUF - off;
22227852ebeSDavid van Moolenbroek if (len >= left) {
22327852ebeSDavid van Moolenbroek memcpy(ptr, &uds->uds_buf[off], left);
22427852ebeSDavid van Moolenbroek
22527852ebeSDavid van Moolenbroek if ((len -= left) > 0)
22627852ebeSDavid van Moolenbroek memcpy((char *)ptr + left, &uds->uds_buf[0], len);
22727852ebeSDavid van Moolenbroek
22827852ebeSDavid van Moolenbroek return len;
22927852ebeSDavid van Moolenbroek } else {
23027852ebeSDavid van Moolenbroek memcpy(ptr, &uds->uds_buf[off], len);
23127852ebeSDavid van Moolenbroek
23227852ebeSDavid van Moolenbroek return off + len;
23327852ebeSDavid van Moolenbroek }
23427852ebeSDavid van Moolenbroek }
23527852ebeSDavid van Moolenbroek
23627852ebeSDavid van Moolenbroek /*
23727852ebeSDavid van Moolenbroek * Store 'len' bytes from the buffer pointed to by 'ptr' into the receive
23827852ebeSDavid van Moolenbroek * buffer of socket 'uds', starting at absolute position 'pos' into the receive
23927852ebeSDavid van Moolenbroek * buffer. Return the absolute position of the first byte after the stored
24027852ebeSDavid van Moolenbroek * data in the receive buffer.
24127852ebeSDavid van Moolenbroek */
24227852ebeSDavid van Moolenbroek static size_t
uds_store(struct udssock * uds,size_t off,const void * ptr,size_t len)24327852ebeSDavid van Moolenbroek uds_store(struct udssock * uds, size_t off, const void * ptr, size_t len)
24427852ebeSDavid van Moolenbroek {
24527852ebeSDavid van Moolenbroek size_t left;
24627852ebeSDavid van Moolenbroek
24727852ebeSDavid van Moolenbroek assert(off < UDS_BUF);
24827852ebeSDavid van Moolenbroek
24927852ebeSDavid van Moolenbroek left = UDS_BUF - off;
25027852ebeSDavid van Moolenbroek if (len >= left) {
25127852ebeSDavid van Moolenbroek memcpy(&uds->uds_buf[off], ptr, left);
25227852ebeSDavid van Moolenbroek
25327852ebeSDavid van Moolenbroek if ((len -= left) > 0)
25427852ebeSDavid van Moolenbroek memcpy(&uds->uds_buf[0], (const char *)ptr + left,
25527852ebeSDavid van Moolenbroek len);
25627852ebeSDavid van Moolenbroek
25727852ebeSDavid van Moolenbroek return len;
25827852ebeSDavid van Moolenbroek } else {
25927852ebeSDavid van Moolenbroek memcpy(&uds->uds_buf[off], ptr, len);
26027852ebeSDavid van Moolenbroek
26127852ebeSDavid van Moolenbroek return off + len;
26227852ebeSDavid van Moolenbroek }
26327852ebeSDavid van Moolenbroek }
26427852ebeSDavid van Moolenbroek
26527852ebeSDavid van Moolenbroek /*
26627852ebeSDavid van Moolenbroek * Fetch a segment header previously stored in the receive buffer of socket
26727852ebeSDavid van Moolenbroek * 'uds' at absolute position 'off'. Return the absolute position of the first
26827852ebeSDavid van Moolenbroek * byte after the header, as well as the entire segment length in 'seglen', the
26927852ebeSDavid van Moolenbroek * length of the data in the segment in 'datalen', and the segment flags in
27027852ebeSDavid van Moolenbroek * 'segflags'.
27127852ebeSDavid van Moolenbroek */
27227852ebeSDavid van Moolenbroek static size_t
uds_fetch_hdr(struct udssock * uds,size_t off,size_t * seglen,size_t * datalen,unsigned int * segflags)27327852ebeSDavid van Moolenbroek uds_fetch_hdr(struct udssock * uds, size_t off, size_t * seglen,
27427852ebeSDavid van Moolenbroek size_t * datalen, unsigned int * segflags)
27527852ebeSDavid van Moolenbroek {
27627852ebeSDavid van Moolenbroek unsigned char hdr[UDS_HDRLEN];
27727852ebeSDavid van Moolenbroek
27827852ebeSDavid van Moolenbroek off = uds_fetch(uds, off, hdr, sizeof(hdr));
27927852ebeSDavid van Moolenbroek
28027852ebeSDavid van Moolenbroek *seglen = ((size_t)hdr[0] << 8) | (size_t)hdr[1];
28127852ebeSDavid van Moolenbroek *datalen = ((size_t)hdr[2] << 8) | (size_t)hdr[3];
28227852ebeSDavid van Moolenbroek *segflags = hdr[4];
28327852ebeSDavid van Moolenbroek
28427852ebeSDavid van Moolenbroek assert(*seglen >= UDS_HDRLEN);
28527852ebeSDavid van Moolenbroek assert(*seglen <= uds->uds_len);
28627852ebeSDavid van Moolenbroek assert(*datalen <= *seglen - UDS_HDRLEN);
28727852ebeSDavid van Moolenbroek assert(*segflags != 0 || *datalen == *seglen - UDS_HDRLEN);
28827852ebeSDavid van Moolenbroek assert(!(*segflags & ~(UDS_HAS_FDS | UDS_HAS_CRED | UDS_HAS_PATH)));
28927852ebeSDavid van Moolenbroek
29027852ebeSDavid van Moolenbroek return off;
29127852ebeSDavid van Moolenbroek }
29227852ebeSDavid van Moolenbroek
29327852ebeSDavid van Moolenbroek /*
29427852ebeSDavid van Moolenbroek * Store a segment header in the receive buffer of socket 'uds' at absolute
29527852ebeSDavid van Moolenbroek * position 'off', with the segment length 'seglen', the segment data length
29627852ebeSDavid van Moolenbroek * 'datalen', and the segment flags 'segflags'. Return the absolute receive
29727852ebeSDavid van Moolenbroek * buffer position of the first data byte after the stored header.
29827852ebeSDavid van Moolenbroek */
29927852ebeSDavid van Moolenbroek static size_t
uds_store_hdr(struct udssock * uds,size_t off,size_t seglen,size_t datalen,unsigned int segflags)30027852ebeSDavid van Moolenbroek uds_store_hdr(struct udssock * uds, size_t off, size_t seglen, size_t datalen,
30127852ebeSDavid van Moolenbroek unsigned int segflags)
30227852ebeSDavid van Moolenbroek {
30327852ebeSDavid van Moolenbroek unsigned char hdr[UDS_HDRLEN];
30427852ebeSDavid van Moolenbroek
30527852ebeSDavid van Moolenbroek assert(seglen <= USHRT_MAX);
30627852ebeSDavid van Moolenbroek assert(datalen <= seglen);
30727852ebeSDavid van Moolenbroek assert(segflags <= UCHAR_MAX);
30827852ebeSDavid van Moolenbroek assert(!(segflags & ~(UDS_HAS_FDS | UDS_HAS_CRED | UDS_HAS_PATH)));
30927852ebeSDavid van Moolenbroek
31027852ebeSDavid van Moolenbroek hdr[0] = (seglen >> 8) & 0xff;
31127852ebeSDavid van Moolenbroek hdr[1] = seglen & 0xff;
31227852ebeSDavid van Moolenbroek hdr[2] = (datalen >> 8) & 0xff;
31327852ebeSDavid van Moolenbroek hdr[3] = datalen & 0xff;
31427852ebeSDavid van Moolenbroek hdr[4] = segflags;
31527852ebeSDavid van Moolenbroek
31627852ebeSDavid van Moolenbroek return uds_store(uds, off, hdr, sizeof(hdr));
31727852ebeSDavid van Moolenbroek }
31827852ebeSDavid van Moolenbroek
31927852ebeSDavid van Moolenbroek /*
32027852ebeSDavid van Moolenbroek * Perform initial checks on a send request, before it may potentially be
32127852ebeSDavid van Moolenbroek * suspended. Return OK if this send request is valid, or a negative error
32227852ebeSDavid van Moolenbroek * code if it is not.
32327852ebeSDavid van Moolenbroek */
32427852ebeSDavid van Moolenbroek int
uds_pre_send(struct sock * sock,size_t len,socklen_t ctl_len __unused,const struct sockaddr * addr,socklen_t addr_len __unused,endpoint_t user_endpt __unused,int flags)32527852ebeSDavid van Moolenbroek uds_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused,
32627852ebeSDavid van Moolenbroek const struct sockaddr * addr, socklen_t addr_len __unused,
32727852ebeSDavid van Moolenbroek endpoint_t user_endpt __unused, int flags)
32827852ebeSDavid van Moolenbroek {
32927852ebeSDavid van Moolenbroek struct udssock *uds = (struct udssock *)sock;
33027852ebeSDavid van Moolenbroek size_t pathlen;
33127852ebeSDavid van Moolenbroek
33227852ebeSDavid van Moolenbroek /*
33327852ebeSDavid van Moolenbroek * Reject calls with unknown flags. Besides the flags handled entirely
33427852ebeSDavid van Moolenbroek * by libsockevent (which are not part of 'flags' here), that is all of
33527852ebeSDavid van Moolenbroek * them. TODO: ensure that we should really reject all other flags
33627852ebeSDavid van Moolenbroek * rather than ignore them.
33727852ebeSDavid van Moolenbroek */
33827852ebeSDavid van Moolenbroek if (flags != 0)
33927852ebeSDavid van Moolenbroek return EOPNOTSUPP;
34027852ebeSDavid van Moolenbroek
34127852ebeSDavid van Moolenbroek /*
34227852ebeSDavid van Moolenbroek * Perform very basic address and message size checks on the send call.
34327852ebeSDavid van Moolenbroek * For non-stream sockets, we must reject packets that may never fit in
34427852ebeSDavid van Moolenbroek * the receive buffer, or otherwise (at least for SOCK_SEQPACKET) the
34527852ebeSDavid van Moolenbroek * send call may end up being suspended indefinitely. Therefore, we
34627852ebeSDavid van Moolenbroek * assume the worst-case scenario, which is that a full set of
34727852ebeSDavid van Moolenbroek * credentials must be associated with the packet. As a result, we may
34827852ebeSDavid van Moolenbroek * reject some large packets that could actually just fit. Checking
34927852ebeSDavid van Moolenbroek * the peer's LOCAL_CREDS setting here is not safe: even if we know the
35027852ebeSDavid van Moolenbroek * peer already at all (for SOCK_DGRAM we do not), the send may still
35127852ebeSDavid van Moolenbroek * block and the option toggled before it unblocks.
35227852ebeSDavid van Moolenbroek */
35327852ebeSDavid van Moolenbroek switch (uds_get_type(uds)) {
35427852ebeSDavid van Moolenbroek case SOCK_STREAM:
35527852ebeSDavid van Moolenbroek /* Nothing to check for this case. */
35627852ebeSDavid van Moolenbroek break;
35727852ebeSDavid van Moolenbroek
35827852ebeSDavid van Moolenbroek case SOCK_SEQPACKET:
35927852ebeSDavid van Moolenbroek if (len > UDS_BUF - UDS_HDRLEN - 1 - UDS_MAXCREDLEN)
36027852ebeSDavid van Moolenbroek return EMSGSIZE;
36127852ebeSDavid van Moolenbroek
36227852ebeSDavid van Moolenbroek break;
36327852ebeSDavid van Moolenbroek
36427852ebeSDavid van Moolenbroek case SOCK_DGRAM:
36527852ebeSDavid van Moolenbroek if (!uds_has_link(uds) && addr == NULL)
36627852ebeSDavid van Moolenbroek return EDESTADDRREQ;
36727852ebeSDavid van Moolenbroek
36827852ebeSDavid van Moolenbroek /*
36927852ebeSDavid van Moolenbroek * The path is stored without null terminator, but with leading
37027852ebeSDavid van Moolenbroek * byte containing the path length--if there is a path at all.
37127852ebeSDavid van Moolenbroek */
37227852ebeSDavid van Moolenbroek pathlen = (size_t)uds->uds_pathlen;
37327852ebeSDavid van Moolenbroek if (pathlen > 0)
37427852ebeSDavid van Moolenbroek pathlen++;
37527852ebeSDavid van Moolenbroek
37627852ebeSDavid van Moolenbroek if (len > UDS_BUF - UDS_HDRLEN - pathlen - 1 - UDS_MAXCREDLEN)
37727852ebeSDavid van Moolenbroek return EMSGSIZE;
37827852ebeSDavid van Moolenbroek
37927852ebeSDavid van Moolenbroek break;
38027852ebeSDavid van Moolenbroek
38127852ebeSDavid van Moolenbroek default:
38227852ebeSDavid van Moolenbroek assert(0);
38327852ebeSDavid van Moolenbroek }
38427852ebeSDavid van Moolenbroek
38527852ebeSDavid van Moolenbroek return OK;
38627852ebeSDavid van Moolenbroek }
38727852ebeSDavid van Moolenbroek
38827852ebeSDavid van Moolenbroek /*
38927852ebeSDavid van Moolenbroek * Determine whether the (real or pretend) send request should be processed
39027852ebeSDavid van Moolenbroek * now, suspended until later, or rejected based on the current socket state.
39127852ebeSDavid van Moolenbroek * Return OK if the send request should be processed now. Return SUSPEND if
39227852ebeSDavid van Moolenbroek * the send request should be retried later. Return an appropriate negative
39327852ebeSDavid van Moolenbroek * error code if the send request should fail.
39427852ebeSDavid van Moolenbroek */
39527852ebeSDavid van Moolenbroek static int
uds_send_test(struct udssock * uds,size_t len,socklen_t ctl_len,size_t min,int partial)39627852ebeSDavid van Moolenbroek uds_send_test(struct udssock * uds, size_t len, socklen_t ctl_len, size_t min,
39727852ebeSDavid van Moolenbroek int partial)
39827852ebeSDavid van Moolenbroek {
39927852ebeSDavid van Moolenbroek struct udssock *conn;
40027852ebeSDavid van Moolenbroek size_t avail, hdrlen, credlen;
40127852ebeSDavid van Moolenbroek
40227852ebeSDavid van Moolenbroek assert(!uds_is_shutdown(uds, SFL_SHUT_WR));
40327852ebeSDavid van Moolenbroek
40427852ebeSDavid van Moolenbroek if (uds_get_type(uds) != SOCK_DGRAM) {
40527852ebeSDavid van Moolenbroek if (uds_is_connecting(uds))
40627852ebeSDavid van Moolenbroek return SUSPEND;
40727852ebeSDavid van Moolenbroek if (!uds_is_connected(uds) && !uds_is_disconnected(uds))
40827852ebeSDavid van Moolenbroek return ENOTCONN;
40927852ebeSDavid van Moolenbroek if (!uds_has_conn(uds))
41027852ebeSDavid van Moolenbroek return EPIPE;
41127852ebeSDavid van Moolenbroek
41227852ebeSDavid van Moolenbroek conn = uds->uds_conn;
41327852ebeSDavid van Moolenbroek
41427852ebeSDavid van Moolenbroek if (uds_is_shutdown(conn, SFL_SHUT_RD))
41527852ebeSDavid van Moolenbroek return EPIPE;
41627852ebeSDavid van Moolenbroek
41727852ebeSDavid van Moolenbroek /*
41827852ebeSDavid van Moolenbroek * For connection-type sockets, we now have to check if there
41927852ebeSDavid van Moolenbroek * is enough room in the receive buffer. For SOCK_STREAM
42027852ebeSDavid van Moolenbroek * sockets, we must check if at least 'min' bytes can be moved
42127852ebeSDavid van Moolenbroek * into the receive buffer, at least if that is a reasonable
42227852ebeSDavid van Moolenbroek * value for ever making any forward progress at all. For
42327852ebeSDavid van Moolenbroek * SOCK_SEQPACKET sockets, we must check if the entire packet
42427852ebeSDavid van Moolenbroek * of size 'len' can be stored in the receive buffer. In both
42527852ebeSDavid van Moolenbroek * cases, we must take into account any metadata to store along
42627852ebeSDavid van Moolenbroek * with the data.
42727852ebeSDavid van Moolenbroek *
42827852ebeSDavid van Moolenbroek * Unlike in uds_pre_send(), we can now check safely whether
42927852ebeSDavid van Moolenbroek * the peer is expecting credentials, but we still don't know
43027852ebeSDavid van Moolenbroek * the actual size of the credentials, so again we take the
43127852ebeSDavid van Moolenbroek * maximum possible size. The same applies to file descriptors
43227852ebeSDavid van Moolenbroek * transferred via control data: all we have the control length
43327852ebeSDavid van Moolenbroek * right now, which if non-zero we assume to mean there might
43427852ebeSDavid van Moolenbroek * be file descriptors.
43527852ebeSDavid van Moolenbroek *
43627852ebeSDavid van Moolenbroek * In both cases, the reason of overestimating is that actually
43727852ebeSDavid van Moolenbroek * getting accurate sizes, by obtaining credentials or copying
43827852ebeSDavid van Moolenbroek * in control data, is very costly. We want to do that only
43927852ebeSDavid van Moolenbroek * when we are sure we will not suspend the send call after
44027852ebeSDavid van Moolenbroek * all. It is no problem to overestimate how much space will
44127852ebeSDavid van Moolenbroek * be needed here, but not to underestimate: that could cause
44227852ebeSDavid van Moolenbroek * applications that use select(2) and non-blocking sockets to
44327852ebeSDavid van Moolenbroek * end up in a busy-wait loop.
44427852ebeSDavid van Moolenbroek */
44527852ebeSDavid van Moolenbroek if (!partial && (conn->uds_flags & UDSF_PASSCRED))
44627852ebeSDavid van Moolenbroek credlen = 1 + UDS_MAXCREDLEN;
44727852ebeSDavid van Moolenbroek else
44827852ebeSDavid van Moolenbroek credlen = 0;
44927852ebeSDavid van Moolenbroek
45027852ebeSDavid van Moolenbroek avail = UDS_BUF - conn->uds_len;
45127852ebeSDavid van Moolenbroek
45227852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_STREAM) {
45327852ebeSDavid van Moolenbroek /*
45427852ebeSDavid van Moolenbroek * Limit the low threshold to the maximum that can ever
45527852ebeSDavid van Moolenbroek * be sent at once.
45627852ebeSDavid van Moolenbroek */
45727852ebeSDavid van Moolenbroek if (min > UDS_BUF - UDS_HDRLEN - credlen)
45827852ebeSDavid van Moolenbroek min = UDS_BUF - UDS_HDRLEN - credlen;
45927852ebeSDavid van Moolenbroek
46027852ebeSDavid van Moolenbroek /*
46127852ebeSDavid van Moolenbroek * Suspend the call only if not even the low threshold
46227852ebeSDavid van Moolenbroek * is met. Otherwise we may make (partial) progress.
46327852ebeSDavid van Moolenbroek */
46427852ebeSDavid van Moolenbroek if (len > min)
46527852ebeSDavid van Moolenbroek len = min;
46627852ebeSDavid van Moolenbroek
46727852ebeSDavid van Moolenbroek /*
46827852ebeSDavid van Moolenbroek * If the receive buffer already has at least one
46927852ebeSDavid van Moolenbroek * segment, and there are certainly no file descriptors
47027852ebeSDavid van Moolenbroek * to transfer now, and we do not have to store
47127852ebeSDavid van Moolenbroek * credentials either, then this segment can be merged
47227852ebeSDavid van Moolenbroek * with the previous one. In that case, we need no
47327852ebeSDavid van Moolenbroek * space for a header. That is certainly the case if
47427852ebeSDavid van Moolenbroek * we are resuming an already partially completed send.
47527852ebeSDavid van Moolenbroek */
47627852ebeSDavid van Moolenbroek hdrlen = (avail == UDS_BUF || ctl_len != 0 ||
47727852ebeSDavid van Moolenbroek credlen > 0) ? UDS_HDRLEN : 0;
47827852ebeSDavid van Moolenbroek } else
47927852ebeSDavid van Moolenbroek hdrlen = UDS_HDRLEN;
48027852ebeSDavid van Moolenbroek
48127852ebeSDavid van Moolenbroek if (avail < hdrlen + credlen + len)
48227852ebeSDavid van Moolenbroek return SUSPEND;
48327852ebeSDavid van Moolenbroek }
48427852ebeSDavid van Moolenbroek
48527852ebeSDavid van Moolenbroek return OK;
48627852ebeSDavid van Moolenbroek }
48727852ebeSDavid van Moolenbroek
48827852ebeSDavid van Moolenbroek /*
48927852ebeSDavid van Moolenbroek * Get the destination peer for a send request. The send test has already been
49027852ebeSDavid van Moolenbroek * performed first. On success, return OK, with a pointer to the peer socket
49127852ebeSDavid van Moolenbroek * stored in 'peerp'. On failure, return an appropriate error code.
49227852ebeSDavid van Moolenbroek */
49327852ebeSDavid van Moolenbroek static int
uds_send_peer(struct udssock * uds,const struct sockaddr * addr,socklen_t addr_len,endpoint_t user_endpt,struct udssock ** peerp)49427852ebeSDavid van Moolenbroek uds_send_peer(struct udssock * uds, const struct sockaddr * addr,
49527852ebeSDavid van Moolenbroek socklen_t addr_len, endpoint_t user_endpt, struct udssock ** peerp)
49627852ebeSDavid van Moolenbroek {
49727852ebeSDavid van Moolenbroek struct udssock *peer;
49827852ebeSDavid van Moolenbroek int r;
49927852ebeSDavid van Moolenbroek
50027852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_DGRAM) {
50127852ebeSDavid van Moolenbroek if (!uds_has_link(uds)) {
50227852ebeSDavid van Moolenbroek /* This was already checked in uds_pre_check(). */
50327852ebeSDavid van Moolenbroek assert(addr != NULL);
50427852ebeSDavid van Moolenbroek
50527852ebeSDavid van Moolenbroek /*
50627852ebeSDavid van Moolenbroek * Find the socket identified by the given address.
50727852ebeSDavid van Moolenbroek * If it exists at all, see if it is a proper match.
50827852ebeSDavid van Moolenbroek */
50927852ebeSDavid van Moolenbroek if ((r = uds_lookup(uds, addr, addr_len, user_endpt,
51027852ebeSDavid van Moolenbroek &peer)) != OK)
51127852ebeSDavid van Moolenbroek return r;
51227852ebeSDavid van Moolenbroek
51327852ebeSDavid van Moolenbroek /*
51427852ebeSDavid van Moolenbroek * If the peer socket is connected to a target, it
51527852ebeSDavid van Moolenbroek * must be this socket. Unfortunately, POSIX does not
51627852ebeSDavid van Moolenbroek * specify an error code for this. We borrow Linux's.
51727852ebeSDavid van Moolenbroek */
51827852ebeSDavid van Moolenbroek if (uds_has_link(peer) && peer->uds_link != uds)
51927852ebeSDavid van Moolenbroek return EPERM;
52027852ebeSDavid van Moolenbroek } else
52127852ebeSDavid van Moolenbroek peer = uds->uds_link;
52227852ebeSDavid van Moolenbroek
52327852ebeSDavid van Moolenbroek /*
52427852ebeSDavid van Moolenbroek * If the receiving end will never receive this packet, we
52527852ebeSDavid van Moolenbroek * might as well not send it, so drop it immeiately. Indicate
52627852ebeSDavid van Moolenbroek * as such to the caller, using NetBSD's chosen error code.
52727852ebeSDavid van Moolenbroek */
52827852ebeSDavid van Moolenbroek if (uds_is_shutdown(peer, SFL_SHUT_RD))
52927852ebeSDavid van Moolenbroek return ENOBUFS;
53027852ebeSDavid van Moolenbroek } else {
53127852ebeSDavid van Moolenbroek assert(uds_has_conn(uds));
53227852ebeSDavid van Moolenbroek
53327852ebeSDavid van Moolenbroek peer = uds->uds_conn;
53427852ebeSDavid van Moolenbroek }
53527852ebeSDavid van Moolenbroek
53627852ebeSDavid van Moolenbroek *peerp = peer;
53727852ebeSDavid van Moolenbroek return OK;
53827852ebeSDavid van Moolenbroek }
53927852ebeSDavid van Moolenbroek
54027852ebeSDavid van Moolenbroek /*
54127852ebeSDavid van Moolenbroek * Generate a new segment for the current send request, or arrange things such
54227852ebeSDavid van Moolenbroek * that new data can be merged with a previous segment. As part of this,
54327852ebeSDavid van Moolenbroek * decide whether we can merge data at all. The segment will be merged if, and
54427852ebeSDavid van Moolenbroek * only if, all of the following requirements are met:
54527852ebeSDavid van Moolenbroek *
54627852ebeSDavid van Moolenbroek * 1) the socket is of type SOCK_STREAM;
54727852ebeSDavid van Moolenbroek * 2) there is a previous segment in the receive buffer;
54827852ebeSDavid van Moolenbroek * 3) there is no ancillary data for the current send request.
54927852ebeSDavid van Moolenbroek *
55027852ebeSDavid van Moolenbroek * Also copy in regular data (if any), retrieve the sender's credentials (if
55127852ebeSDavid van Moolenbroek * needed), and copy over the source path (if applicable). However, do not yet
55227852ebeSDavid van Moolenbroek * commit the segment (or the new part to be merged), because the send request
55327852ebeSDavid van Moolenbroek * may still fail for other reasons.
55427852ebeSDavid van Moolenbroek *
55527852ebeSDavid van Moolenbroek * On success, return the length of the new segment (or, when merging, the
55627852ebeSDavid van Moolenbroek * length to be added to the last segment), as well as a flag indicating
55727852ebeSDavid van Moolenbroek * whether we are merging into the last segment in 'mergep', the length of the
55827852ebeSDavid van Moolenbroek * (new) data in the segment in 'datalenp', and the new segment's flags in
55927852ebeSDavid van Moolenbroek * 'segflagsp' (always zero when merging). Note that a return value of zero
56027852ebeSDavid van Moolenbroek * implies that we are merging zero extra bytes into the last segment, which
56127852ebeSDavid van Moolenbroek * means that effectively nothing changes; in that case the send call will be
56227852ebeSDavid van Moolenbroek * cut short and return zero to the caller as well. On failure, return a
56327852ebeSDavid van Moolenbroek * negative error code.
56427852ebeSDavid van Moolenbroek */
56527852ebeSDavid van Moolenbroek static int
uds_send_data(struct udssock * uds,struct udssock * peer,const struct sockdriver_data * data,size_t len,size_t off,endpoint_t user_endpt,unsigned int nfds,int * __restrict mergep,size_t * __restrict datalenp,unsigned int * __restrict segflagsp)56627852ebeSDavid van Moolenbroek uds_send_data(struct udssock * uds, struct udssock * peer,
56727852ebeSDavid van Moolenbroek const struct sockdriver_data * data, size_t len, size_t off,
56827852ebeSDavid van Moolenbroek endpoint_t user_endpt, unsigned int nfds, int * __restrict mergep,
56927852ebeSDavid van Moolenbroek size_t * __restrict datalenp, unsigned int * __restrict segflagsp)
57027852ebeSDavid van Moolenbroek {
57127852ebeSDavid van Moolenbroek struct sockcred sockcred;
57227852ebeSDavid van Moolenbroek gid_t groups[NGROUPS_MAX];
57327852ebeSDavid van Moolenbroek iovec_t iov[2];
57427852ebeSDavid van Moolenbroek unsigned int iovcnt, segflags;
57527852ebeSDavid van Moolenbroek unsigned char lenbyte;
57627852ebeSDavid van Moolenbroek size_t credlen, pathlen, datalen, seglen;
57727852ebeSDavid van Moolenbroek size_t avail, pos, left;
57827852ebeSDavid van Moolenbroek int r, merge;
57927852ebeSDavid van Moolenbroek
58027852ebeSDavid van Moolenbroek /*
58127852ebeSDavid van Moolenbroek * At this point we should add the data to the peer's receive buffer.
58227852ebeSDavid van Moolenbroek * In the case of SOCK_STREAM sockets, we should add as much of the
58327852ebeSDavid van Moolenbroek * data as possible and suspend the call to send the rest later, if
58427852ebeSDavid van Moolenbroek * applicable. In the case of SOCK_DGRAM sockets, we should drop the
58527852ebeSDavid van Moolenbroek * packet if it does not fit in the buffer.
58627852ebeSDavid van Moolenbroek *
58727852ebeSDavid van Moolenbroek * Due to the checks in uds_can_send(), we know for sure that we no
58827852ebeSDavid van Moolenbroek * longer have to suspend without making any progress at this point.
58927852ebeSDavid van Moolenbroek */
59027852ebeSDavid van Moolenbroek segflags = (nfds > 0) ? UDS_HAS_FDS : 0;
59127852ebeSDavid van Moolenbroek
59227852ebeSDavid van Moolenbroek /*
59327852ebeSDavid van Moolenbroek * Obtain the credentials now. Doing so allows us to determine how
59427852ebeSDavid van Moolenbroek * much space we actually need for them.
59527852ebeSDavid van Moolenbroek */
59627852ebeSDavid van Moolenbroek if (off == 0 && (peer->uds_flags & UDSF_PASSCRED)) {
59727852ebeSDavid van Moolenbroek memset(&sockcred, 0, sizeof(sockcred));
59827852ebeSDavid van Moolenbroek
59927852ebeSDavid van Moolenbroek if ((r = getsockcred(user_endpt, &sockcred, groups,
60027852ebeSDavid van Moolenbroek __arraycount(groups))) != OK)
60127852ebeSDavid van Moolenbroek return r;
60227852ebeSDavid van Moolenbroek
60379a488aaSDavid van Moolenbroek /*
60479a488aaSDavid van Moolenbroek * getsockcred(3) returns the total number of groups for the
60579a488aaSDavid van Moolenbroek * process, which may exceed the size of the given array. Our
60679a488aaSDavid van Moolenbroek * groups array should always be large enough for all groups,
60779a488aaSDavid van Moolenbroek * but we check to be sure anyway.
60879a488aaSDavid van Moolenbroek */
60979a488aaSDavid van Moolenbroek assert(sockcred.sc_ngroups <= (int)__arraycount(groups));
61079a488aaSDavid van Moolenbroek
61127852ebeSDavid van Moolenbroek credlen = 1 + SOCKCREDSIZE(sockcred.sc_ngroups);
61227852ebeSDavid van Moolenbroek
61327852ebeSDavid van Moolenbroek segflags |= UDS_HAS_CRED;
61427852ebeSDavid van Moolenbroek } else
61527852ebeSDavid van Moolenbroek credlen = 0;
61627852ebeSDavid van Moolenbroek
61727852ebeSDavid van Moolenbroek /* For bound source datagram sockets, include the source path. */
61827852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_DGRAM && uds->uds_pathlen != 0) {
61927852ebeSDavid van Moolenbroek pathlen = (size_t)uds->uds_pathlen + 1;
62027852ebeSDavid van Moolenbroek
62127852ebeSDavid van Moolenbroek segflags |= UDS_HAS_PATH;
62227852ebeSDavid van Moolenbroek } else
62327852ebeSDavid van Moolenbroek pathlen = 0;
62427852ebeSDavid van Moolenbroek
62527852ebeSDavid van Moolenbroek avail = UDS_BUF - peer->uds_len;
62627852ebeSDavid van Moolenbroek
62727852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_STREAM) {
62827852ebeSDavid van Moolenbroek /*
62927852ebeSDavid van Moolenbroek * Determine whether we can merge data into the previous
63027852ebeSDavid van Moolenbroek * segment. This is a more refined version of the test in
63127852ebeSDavid van Moolenbroek * uds_can_send(), as we now know whether there are actually
63227852ebeSDavid van Moolenbroek * any FDs to transfer.
63327852ebeSDavid van Moolenbroek */
63427852ebeSDavid van Moolenbroek merge = (peer->uds_len != 0 && nfds == 0 && credlen == 0);
63527852ebeSDavid van Moolenbroek
63627852ebeSDavid van Moolenbroek /* Determine how much we can send at once. */
63727852ebeSDavid van Moolenbroek if (!merge) {
63827852ebeSDavid van Moolenbroek assert(avail > UDS_HDRLEN + credlen);
63927852ebeSDavid van Moolenbroek datalen = avail - UDS_HDRLEN - credlen;
64027852ebeSDavid van Moolenbroek } else
64127852ebeSDavid van Moolenbroek datalen = avail;
64227852ebeSDavid van Moolenbroek
64327852ebeSDavid van Moolenbroek if (datalen > len)
64427852ebeSDavid van Moolenbroek datalen = len;
64527852ebeSDavid van Moolenbroek
64627852ebeSDavid van Moolenbroek /* If we cannot make progress, we should have suspended.. */
64727852ebeSDavid van Moolenbroek assert(datalen != 0 || len == 0);
64827852ebeSDavid van Moolenbroek } else {
64927852ebeSDavid van Moolenbroek merge = FALSE;
65027852ebeSDavid van Moolenbroek
65127852ebeSDavid van Moolenbroek datalen = len;
65227852ebeSDavid van Moolenbroek }
65327852ebeSDavid van Moolenbroek assert(datalen <= len);
65427852ebeSDavid van Moolenbroek assert(datalen <= UDS_BUF);
65527852ebeSDavid van Moolenbroek
65627852ebeSDavid van Moolenbroek /*
65727852ebeSDavid van Moolenbroek * Compute the total amount of space we need for the segment in the
65827852ebeSDavid van Moolenbroek * receive buffer. Given that we have done will-it-fit tests in
65927852ebeSDavid van Moolenbroek * uds_can_send() for SOCK_STREAM and SOCK_SEQPACKET, there is only one
66027852ebeSDavid van Moolenbroek * case left where the result may not fit, and that is for SOCK_DGRAM
66127852ebeSDavid van Moolenbroek * packets. In that case, we drop the packet. POSIX says we should
66227852ebeSDavid van Moolenbroek * throw an error in that case, and that is also what NetBSD does.
66327852ebeSDavid van Moolenbroek */
66427852ebeSDavid van Moolenbroek if (!merge)
66527852ebeSDavid van Moolenbroek seglen = UDS_HDRLEN + credlen + pathlen + datalen;
66627852ebeSDavid van Moolenbroek else
66727852ebeSDavid van Moolenbroek seglen = datalen;
66827852ebeSDavid van Moolenbroek
66927852ebeSDavid van Moolenbroek if (seglen > avail) {
67027852ebeSDavid van Moolenbroek assert(uds_get_type(uds) == SOCK_DGRAM);
67127852ebeSDavid van Moolenbroek
67227852ebeSDavid van Moolenbroek /* Drop the packet, borrowing NetBSD's chosen error code. */
67327852ebeSDavid van Moolenbroek return ENOBUFS;
67427852ebeSDavid van Moolenbroek }
67527852ebeSDavid van Moolenbroek
67627852ebeSDavid van Moolenbroek /*
67727852ebeSDavid van Moolenbroek * Generate the full segment, but do not yet update the buffer head.
67827852ebeSDavid van Moolenbroek * We may still run into an error (copying in file descriptors) or even
67927852ebeSDavid van Moolenbroek * decide that nothing gets sent after all (if there are no data or
68027852ebeSDavid van Moolenbroek * file descriptors). If we are merging the new data into the previous
68127852ebeSDavid van Moolenbroek * segment, do not generate a header.
68227852ebeSDavid van Moolenbroek */
68327852ebeSDavid van Moolenbroek pos = uds_get_head(peer);
68427852ebeSDavid van Moolenbroek
68527852ebeSDavid van Moolenbroek /* Generate the header, if needed. */
68627852ebeSDavid van Moolenbroek if (!merge)
68727852ebeSDavid van Moolenbroek pos = uds_store_hdr(peer, pos, seglen, datalen, segflags);
68827852ebeSDavid van Moolenbroek else
68927852ebeSDavid van Moolenbroek assert(segflags == 0);
69027852ebeSDavid van Moolenbroek
69127852ebeSDavid van Moolenbroek /* Copy in and store the sender's credentials, if desired. */
69227852ebeSDavid van Moolenbroek if (credlen > 0) {
69327852ebeSDavid van Moolenbroek assert(credlen >= 1 + sizeof(sockcred));
69427852ebeSDavid van Moolenbroek assert(credlen <= UCHAR_MAX);
69527852ebeSDavid van Moolenbroek
69627852ebeSDavid van Moolenbroek lenbyte = credlen - 1;
69727852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, &lenbyte, 1);
69827852ebeSDavid van Moolenbroek
69927852ebeSDavid van Moolenbroek if (sockcred.sc_ngroups > 0) {
70027852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, &sockcred,
70127852ebeSDavid van Moolenbroek offsetof(struct sockcred, sc_groups));
70227852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, groups,
70327852ebeSDavid van Moolenbroek sockcred.sc_ngroups * sizeof(gid_t));
70427852ebeSDavid van Moolenbroek } else
70527852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, &sockcred,
70627852ebeSDavid van Moolenbroek sizeof(sockcred));
70727852ebeSDavid van Moolenbroek }
70827852ebeSDavid van Moolenbroek
70927852ebeSDavid van Moolenbroek /* Store the sender's address if any. Datagram sockets only. */
71027852ebeSDavid van Moolenbroek if (pathlen > 0) {
71127852ebeSDavid van Moolenbroek assert(pathlen > 1);
71227852ebeSDavid van Moolenbroek assert(pathlen <= UCHAR_MAX);
71327852ebeSDavid van Moolenbroek
71427852ebeSDavid van Moolenbroek lenbyte = uds->uds_pathlen;
71527852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, &lenbyte, 1);
71627852ebeSDavid van Moolenbroek pos = uds_store(peer, pos, uds->uds_path, pathlen - 1);
71727852ebeSDavid van Moolenbroek }
71827852ebeSDavid van Moolenbroek
71927852ebeSDavid van Moolenbroek /* Lastly, copy in the actual data (if any) from the caller. */
72027852ebeSDavid van Moolenbroek if (datalen > 0) {
72127852ebeSDavid van Moolenbroek iov[0].iov_addr = (vir_bytes)&peer->uds_buf[pos];
72227852ebeSDavid van Moolenbroek left = UDS_BUF - pos;
72327852ebeSDavid van Moolenbroek
72427852ebeSDavid van Moolenbroek if (left < datalen) {
72527852ebeSDavid van Moolenbroek assert(left > 0);
72627852ebeSDavid van Moolenbroek iov[0].iov_size = left;
72727852ebeSDavid van Moolenbroek iov[1].iov_addr = (vir_bytes)&peer->uds_buf[0];
72827852ebeSDavid van Moolenbroek iov[1].iov_size = datalen - left;
72927852ebeSDavid van Moolenbroek iovcnt = 2;
73027852ebeSDavid van Moolenbroek } else {
73127852ebeSDavid van Moolenbroek iov[0].iov_size = datalen;
73227852ebeSDavid van Moolenbroek iovcnt = 1;
73327852ebeSDavid van Moolenbroek }
73427852ebeSDavid van Moolenbroek
73527852ebeSDavid van Moolenbroek if ((r = sockdriver_vcopyin(data, off, iov, iovcnt)) != OK)
73627852ebeSDavid van Moolenbroek return r;
73727852ebeSDavid van Moolenbroek }
73827852ebeSDavid van Moolenbroek
73927852ebeSDavid van Moolenbroek *mergep = merge;
74027852ebeSDavid van Moolenbroek *datalenp = datalen;
74127852ebeSDavid van Moolenbroek *segflagsp = segflags;
74227852ebeSDavid van Moolenbroek return seglen;
74327852ebeSDavid van Moolenbroek }
74427852ebeSDavid van Moolenbroek
74527852ebeSDavid van Moolenbroek /*
74627852ebeSDavid van Moolenbroek * Copy in control data for the current send request, and extract any file
74727852ebeSDavid van Moolenbroek * descriptors to be transferred. Do not yet duplicate the file descriptors,
74827852ebeSDavid van Moolenbroek * but rather store a list in a temporary buffer: the send request may still
74927852ebeSDavid van Moolenbroek * fail in which case we want to avoid having to undo the duplication.
75027852ebeSDavid van Moolenbroek *
75127852ebeSDavid van Moolenbroek * On success, return the number of (zero or more) file descriptors extracted
75227852ebeSDavid van Moolenbroek * from the request and stored in the temporary buffer. On failure, return a
75327852ebeSDavid van Moolenbroek * negative error code.
75427852ebeSDavid van Moolenbroek */
75527852ebeSDavid van Moolenbroek static int
uds_send_ctl(const struct sockdriver_data * ctl,socklen_t ctl_len,endpoint_t user_endpt)75627852ebeSDavid van Moolenbroek uds_send_ctl(const struct sockdriver_data * ctl, socklen_t ctl_len,
75727852ebeSDavid van Moolenbroek endpoint_t user_endpt)
75827852ebeSDavid van Moolenbroek {
75927852ebeSDavid van Moolenbroek struct msghdr msghdr;
76027852ebeSDavid van Moolenbroek struct cmsghdr *cmsg;
76127852ebeSDavid van Moolenbroek socklen_t left;
76227852ebeSDavid van Moolenbroek unsigned int i, n, nfds;
76327852ebeSDavid van Moolenbroek int r;
76427852ebeSDavid van Moolenbroek
76527852ebeSDavid van Moolenbroek /*
76627852ebeSDavid van Moolenbroek * Copy in the control data. We can spend a lot of effort copying in
76727852ebeSDavid van Moolenbroek * the data in small chunks, and change the receiving side to do the
76827852ebeSDavid van Moolenbroek * same, but it is really not worth it: applications never send a whole
76927852ebeSDavid van Moolenbroek * lot of file descriptors at once, and the buffer size is currently
77027852ebeSDavid van Moolenbroek * such that the UDS service itself will exhaust its OPEN_MAX limit
77127852ebeSDavid van Moolenbroek * anyway if they do.
77227852ebeSDavid van Moolenbroek */
77327852ebeSDavid van Moolenbroek if (ctl_len > sizeof(uds_ctlbuf))
77427852ebeSDavid van Moolenbroek return ENOBUFS;
77527852ebeSDavid van Moolenbroek
77627852ebeSDavid van Moolenbroek if ((r = sockdriver_copyin(ctl, 0, uds_ctlbuf, ctl_len)) != OK)
77727852ebeSDavid van Moolenbroek return r;
77827852ebeSDavid van Moolenbroek
77927852ebeSDavid van Moolenbroek if (ctl_len < sizeof(uds_ctlbuf))
78027852ebeSDavid van Moolenbroek memset(&uds_ctlbuf[ctl_len], 0, sizeof(uds_ctlbuf) - ctl_len);
78127852ebeSDavid van Moolenbroek
78227852ebeSDavid van Moolenbroek /*
78327852ebeSDavid van Moolenbroek * Look for any file descriptors, and store their remote file
78427852ebeSDavid van Moolenbroek * descriptor numbers into a temporary array.
78527852ebeSDavid van Moolenbroek */
78627852ebeSDavid van Moolenbroek memset(&msghdr, 0, sizeof(msghdr));
78727852ebeSDavid van Moolenbroek msghdr.msg_control = uds_ctlbuf;
78827852ebeSDavid van Moolenbroek msghdr.msg_controllen = ctl_len;
78927852ebeSDavid van Moolenbroek
79027852ebeSDavid van Moolenbroek nfds = 0;
79127852ebeSDavid van Moolenbroek r = OK;
79227852ebeSDavid van Moolenbroek
79327852ebeSDavid van Moolenbroek /*
79427852ebeSDavid van Moolenbroek * The sender may provide file descriptors in multiple chunks.
79527852ebeSDavid van Moolenbroek * Currently we do not preserve these chunk boundaries, instead
79627852ebeSDavid van Moolenbroek * generating one single chunk with all file descriptors for the
79727852ebeSDavid van Moolenbroek * segment upon receipt. If needed, we can fairly easily adapt this
79827852ebeSDavid van Moolenbroek * later.
79927852ebeSDavid van Moolenbroek */
80027852ebeSDavid van Moolenbroek for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
80127852ebeSDavid van Moolenbroek cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
80227852ebeSDavid van Moolenbroek /*
80327852ebeSDavid van Moolenbroek * Check for bogus lengths. There is no excuse for this;
80427852ebeSDavid van Moolenbroek * either the caller does not know what they are doing or we
80527852ebeSDavid van Moolenbroek * are looking at a hacking attempt.
80627852ebeSDavid van Moolenbroek */
80727852ebeSDavid van Moolenbroek assert((socklen_t)((char *)cmsg - uds_ctlbuf) <= ctl_len);
80827852ebeSDavid van Moolenbroek left = ctl_len - (socklen_t)((char *)cmsg - uds_ctlbuf);
80927852ebeSDavid van Moolenbroek assert(left >= CMSG_LEN(0)); /* guaranteed by CMSG_xxHDR */
81027852ebeSDavid van Moolenbroek
81127852ebeSDavid van Moolenbroek if (cmsg->cmsg_len < CMSG_LEN(0) || cmsg->cmsg_len > left) {
81227852ebeSDavid van Moolenbroek printf("UDS: malformed control data from %u\n",
81327852ebeSDavid van Moolenbroek user_endpt);
81427852ebeSDavid van Moolenbroek r = EINVAL;
81527852ebeSDavid van Moolenbroek break;
81627852ebeSDavid van Moolenbroek }
81727852ebeSDavid van Moolenbroek
81827852ebeSDavid van Moolenbroek if (cmsg->cmsg_level != SOL_SOCKET ||
81927852ebeSDavid van Moolenbroek cmsg->cmsg_type != SCM_RIGHTS)
82027852ebeSDavid van Moolenbroek continue;
82127852ebeSDavid van Moolenbroek
82227852ebeSDavid van Moolenbroek n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
82327852ebeSDavid van Moolenbroek
82427852ebeSDavid van Moolenbroek for (i = 0; i < n; i++) {
82527852ebeSDavid van Moolenbroek /*
82627852ebeSDavid van Moolenbroek * Copy the file descriptor to the temporary buffer,
82727852ebeSDavid van Moolenbroek * whose size is based on the control data buffer, so
82827852ebeSDavid van Moolenbroek * it is always large enough to contain all FDs.
82927852ebeSDavid van Moolenbroek */
83027852ebeSDavid van Moolenbroek assert(nfds < __arraycount(uds_ctlfds));
83127852ebeSDavid van Moolenbroek
83227852ebeSDavid van Moolenbroek memcpy(&uds_ctlfds[nfds],
83327852ebeSDavid van Moolenbroek &((int *)CMSG_DATA(cmsg))[i], sizeof(int));
83427852ebeSDavid van Moolenbroek
83527852ebeSDavid van Moolenbroek nfds++;
83627852ebeSDavid van Moolenbroek }
83727852ebeSDavid van Moolenbroek }
83827852ebeSDavid van Moolenbroek
83927852ebeSDavid van Moolenbroek return nfds;
84027852ebeSDavid van Moolenbroek }
84127852ebeSDavid van Moolenbroek
84227852ebeSDavid van Moolenbroek /*
84327852ebeSDavid van Moolenbroek * Actually duplicate any file descriptors that we extracted from the sender's
84427852ebeSDavid van Moolenbroek * control data and stored in our temporary buffer. On success, return OK,
84527852ebeSDavid van Moolenbroek * with all file descriptors stored in file descriptor objects that are
84627852ebeSDavid van Moolenbroek * appended to the socket's list of in-flight FD objects. Thus, on success,
84727852ebeSDavid van Moolenbroek * the send request may no longer fail. On failure, return a negative error
84827852ebeSDavid van Moolenbroek * code, with any partial duplication undone.
84927852ebeSDavid van Moolenbroek */
85027852ebeSDavid van Moolenbroek static int
uds_send_fds(struct udssock * peer,unsigned int nfds,endpoint_t user_endpt)85127852ebeSDavid van Moolenbroek uds_send_fds(struct udssock * peer, unsigned int nfds, endpoint_t user_endpt)
85227852ebeSDavid van Moolenbroek {
85327852ebeSDavid van Moolenbroek SIMPLEQ_HEAD(, uds_fd) fds;
85427852ebeSDavid van Moolenbroek struct uds_fd *ufd;
85527852ebeSDavid van Moolenbroek unsigned int i;
85627852ebeSDavid van Moolenbroek int r;
85727852ebeSDavid van Moolenbroek
85827852ebeSDavid van Moolenbroek SIMPLEQ_INIT(&fds);
85927852ebeSDavid van Moolenbroek
86027852ebeSDavid van Moolenbroek for (i = 0; i < nfds; i++) {
86127852ebeSDavid van Moolenbroek if (SIMPLEQ_EMPTY(&uds_freefds)) {
86227852ebeSDavid van Moolenbroek /* UDS itself may already have OPEN_MAX FDs. */
86327852ebeSDavid van Moolenbroek r = ENFILE;
86427852ebeSDavid van Moolenbroek break;
86527852ebeSDavid van Moolenbroek }
86627852ebeSDavid van Moolenbroek
86727852ebeSDavid van Moolenbroek /*
86827852ebeSDavid van Moolenbroek * The caller may have given an invalid FD, or UDS itself may
86927852ebeSDavid van Moolenbroek * unexpectedly have run out of available file descriptors etc.
87027852ebeSDavid van Moolenbroek */
87127852ebeSDavid van Moolenbroek if ((r = copyfd(user_endpt, uds_ctlfds[i], COPYFD_FROM)) < 0)
87227852ebeSDavid van Moolenbroek break;
87327852ebeSDavid van Moolenbroek
87427852ebeSDavid van Moolenbroek ufd = SIMPLEQ_FIRST(&uds_freefds);
87527852ebeSDavid van Moolenbroek SIMPLEQ_REMOVE_HEAD(&uds_freefds, ufd_next);
87627852ebeSDavid van Moolenbroek
87727852ebeSDavid van Moolenbroek ufd->ufd_fd = r;
87827852ebeSDavid van Moolenbroek ufd->ufd_count = 0;
87927852ebeSDavid van Moolenbroek
88027852ebeSDavid van Moolenbroek SIMPLEQ_INSERT_TAIL(&fds, ufd, ufd_next);
88127852ebeSDavid van Moolenbroek
88227852ebeSDavid van Moolenbroek dprintf(("UDS: copied in fd %d -> %d\n", uds_ctlfds[i], r));
88327852ebeSDavid van Moolenbroek }
88427852ebeSDavid van Moolenbroek
88527852ebeSDavid van Moolenbroek /* Did we experience an error while copying in the file descriptors? */
88627852ebeSDavid van Moolenbroek if (r < 0) {
88727852ebeSDavid van Moolenbroek /* Revert the successful copyfd() calls made so far. */
88827852ebeSDavid van Moolenbroek SIMPLEQ_FOREACH(ufd, &fds, ufd_next) {
88927852ebeSDavid van Moolenbroek dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd));
89027852ebeSDavid van Moolenbroek
89127852ebeSDavid van Moolenbroek closenb(ufd->ufd_fd);
89227852ebeSDavid van Moolenbroek }
89327852ebeSDavid van Moolenbroek
89427852ebeSDavid van Moolenbroek SIMPLEQ_CONCAT(&uds_freefds, &fds);
89527852ebeSDavid van Moolenbroek
89627852ebeSDavid van Moolenbroek return r;
89727852ebeSDavid van Moolenbroek }
89827852ebeSDavid van Moolenbroek
89927852ebeSDavid van Moolenbroek /*
90027852ebeSDavid van Moolenbroek * Success. If there were any file descriptors at all, add them to the
90127852ebeSDavid van Moolenbroek * peer's list of in-flight file descriptors. Assign the number of
90227852ebeSDavid van Moolenbroek * file descriptors copied in to the first file descriptor object, so
90327852ebeSDavid van Moolenbroek * that we know how many to copy out (or discard) for this segment.
90427852ebeSDavid van Moolenbroek * Also set the UDS_HAS_FDS flag on the segment.
90527852ebeSDavid van Moolenbroek */
90627852ebeSDavid van Moolenbroek ufd = SIMPLEQ_FIRST(&fds);
90727852ebeSDavid van Moolenbroek ufd->ufd_count = nfds;
90827852ebeSDavid van Moolenbroek
90927852ebeSDavid van Moolenbroek SIMPLEQ_CONCAT(&peer->uds_fds, &fds);
91027852ebeSDavid van Moolenbroek
91127852ebeSDavid van Moolenbroek return OK;
91227852ebeSDavid van Moolenbroek }
91327852ebeSDavid van Moolenbroek
91427852ebeSDavid van Moolenbroek /*
91527852ebeSDavid van Moolenbroek * The current send request is successful or at least has made progress.
91627852ebeSDavid van Moolenbroek * Commit the new segment or, if we decided to merge the new data into the last
91727852ebeSDavid van Moolenbroek * segment, update the header of the last segment. Also wake up the receiving
91827852ebeSDavid van Moolenbroek * side, because there will now be new data to receive.
91927852ebeSDavid van Moolenbroek */
92027852ebeSDavid van Moolenbroek static void
uds_send_advance(struct udssock * uds,struct udssock * peer,size_t datalen,int merge,size_t seglen,unsigned int segflags)92127852ebeSDavid van Moolenbroek uds_send_advance(struct udssock * uds, struct udssock * peer, size_t datalen,
92227852ebeSDavid van Moolenbroek int merge, size_t seglen, unsigned int segflags)
92327852ebeSDavid van Moolenbroek {
92427852ebeSDavid van Moolenbroek size_t pos, prevseglen, prevdatalen;
92527852ebeSDavid van Moolenbroek
92627852ebeSDavid van Moolenbroek /*
92727852ebeSDavid van Moolenbroek * For non-datagram sockets, credentials are sent only once after
92827852ebeSDavid van Moolenbroek * setting the LOCAL_CREDS option. After that, the option is unset.
92927852ebeSDavid van Moolenbroek */
93027852ebeSDavid van Moolenbroek if ((segflags & UDS_HAS_CRED) && uds_get_type(uds) != SOCK_DGRAM)
93127852ebeSDavid van Moolenbroek peer->uds_flags &= ~UDSF_PASSCRED;
93227852ebeSDavid van Moolenbroek
93327852ebeSDavid van Moolenbroek if (merge) {
93427852ebeSDavid van Moolenbroek assert(segflags == 0);
93527852ebeSDavid van Moolenbroek
93627852ebeSDavid van Moolenbroek pos = uds_get_last(peer);
93727852ebeSDavid van Moolenbroek
93827852ebeSDavid van Moolenbroek (void)uds_fetch_hdr(peer, pos, &prevseglen, &prevdatalen,
93927852ebeSDavid van Moolenbroek &segflags);
94027852ebeSDavid van Moolenbroek
94127852ebeSDavid van Moolenbroek peer->uds_len += seglen;
94227852ebeSDavid van Moolenbroek assert(peer->uds_len <= UDS_BUF);
94327852ebeSDavid van Moolenbroek
94427852ebeSDavid van Moolenbroek seglen += prevseglen;
94527852ebeSDavid van Moolenbroek datalen += prevdatalen;
94627852ebeSDavid van Moolenbroek assert(seglen <= UDS_BUF);
94727852ebeSDavid van Moolenbroek
94827852ebeSDavid van Moolenbroek uds_store_hdr(peer, pos, seglen, datalen, segflags);
94927852ebeSDavid van Moolenbroek } else {
95027852ebeSDavid van Moolenbroek peer->uds_last = peer->uds_len;
95127852ebeSDavid van Moolenbroek
95227852ebeSDavid van Moolenbroek peer->uds_len += seglen;
95327852ebeSDavid van Moolenbroek assert(peer->uds_len <= UDS_BUF);
95427852ebeSDavid van Moolenbroek }
95527852ebeSDavid van Moolenbroek
95627852ebeSDavid van Moolenbroek /* Now that there are new data, wake up the receiver side. */
95727852ebeSDavid van Moolenbroek sockevent_raise(&peer->uds_sock, SEV_RECV);
95827852ebeSDavid van Moolenbroek }
95927852ebeSDavid van Moolenbroek
96027852ebeSDavid van Moolenbroek /*
96127852ebeSDavid van Moolenbroek * Process a send request. Return OK if the send request has successfully
96227852ebeSDavid van Moolenbroek * completed, SUSPEND if it should be tried again later, or a negative error
96327852ebeSDavid van Moolenbroek * code on failure. In all cases, the values of 'off' and 'ctl_off' must be
96427852ebeSDavid van Moolenbroek * updated if any progress has been made; if either is non-zero, libsockevent
96527852ebeSDavid van Moolenbroek * will return the partial progress rather than an error code.
96627852ebeSDavid van Moolenbroek */
96727852ebeSDavid van Moolenbroek int
uds_send(struct sock * sock,const struct sockdriver_data * data,size_t len,size_t * off,const struct sockdriver_data * ctl,socklen_t ctl_len,socklen_t * ctl_off,const struct sockaddr * addr,socklen_t addr_len,endpoint_t user_endpt,int flags __unused,size_t min)96827852ebeSDavid van Moolenbroek uds_send(struct sock * sock, const struct sockdriver_data * data, size_t len,
96927852ebeSDavid van Moolenbroek size_t * off, const struct sockdriver_data * ctl, socklen_t ctl_len,
97027852ebeSDavid van Moolenbroek socklen_t * ctl_off, const struct sockaddr * addr, socklen_t addr_len,
97127852ebeSDavid van Moolenbroek endpoint_t user_endpt, int flags __unused, size_t min)
97227852ebeSDavid van Moolenbroek {
97327852ebeSDavid van Moolenbroek struct udssock *uds = (struct udssock *)sock;
97427852ebeSDavid van Moolenbroek struct udssock *peer;
97527852ebeSDavid van Moolenbroek size_t seglen, datalen = 0 /*gcc*/;
97627852ebeSDavid van Moolenbroek unsigned int nfds, segflags = 0 /*gcc*/;
97727852ebeSDavid van Moolenbroek int r, partial, merge = 0 /*gcc*/;
97827852ebeSDavid van Moolenbroek
97927852ebeSDavid van Moolenbroek dprintf(("UDS: send(%d,%zu,%zu,%u,%u,0x%x)\n",
98027852ebeSDavid van Moolenbroek uds_get_id(uds), len, (off != NULL) ? *off : 0, ctl_len,
98127852ebeSDavid van Moolenbroek (ctl_off != NULL) ? *ctl_off : 0, flags));
98227852ebeSDavid van Moolenbroek
98327852ebeSDavid van Moolenbroek partial = (off != NULL && *off > 0);
98427852ebeSDavid van Moolenbroek
98527852ebeSDavid van Moolenbroek /*
98627852ebeSDavid van Moolenbroek * First see whether we can process this send call at all right now.
98727852ebeSDavid van Moolenbroek * Most importantly, for connected sockets, if the peer's receive
98827852ebeSDavid van Moolenbroek * buffer is full, we may have to suspend the call until some space has
98927852ebeSDavid van Moolenbroek * been freed up.
99027852ebeSDavid van Moolenbroek */
99127852ebeSDavid van Moolenbroek if ((r = uds_send_test(uds, len, ctl_len, min, partial)) != OK)
99227852ebeSDavid van Moolenbroek return r;
99327852ebeSDavid van Moolenbroek
99427852ebeSDavid van Moolenbroek /*
99527852ebeSDavid van Moolenbroek * Then get the peer socket. For connected sockets, this is trivial.
99627852ebeSDavid van Moolenbroek * For unconnected sockets, it may involve a lookup of the given
99727852ebeSDavid van Moolenbroek * address.
99827852ebeSDavid van Moolenbroek */
99927852ebeSDavid van Moolenbroek if ((r = uds_send_peer(uds, addr, addr_len, user_endpt, &peer)) != OK)
100027852ebeSDavid van Moolenbroek return r;
100127852ebeSDavid van Moolenbroek
100227852ebeSDavid van Moolenbroek /*
100327852ebeSDavid van Moolenbroek * We now know for sure that we will not suspend this call without
100427852ebeSDavid van Moolenbroek * making any progress. However, the call may still fail. Copy in
100527852ebeSDavid van Moolenbroek * control data first now, so that we know whether there are any file
100627852ebeSDavid van Moolenbroek * descriptors to transfer. This aspect may determine whether or not
100727852ebeSDavid van Moolenbroek * we can merge data with a previous segment. Do not actually copy in
100827852ebeSDavid van Moolenbroek * the actual file descriptors yet, because that is much harder to undo
100927852ebeSDavid van Moolenbroek * in case of a failure later on.
101027852ebeSDavid van Moolenbroek */
101127852ebeSDavid van Moolenbroek if (ctl_len > 0) {
101227852ebeSDavid van Moolenbroek /* We process control data once, in full. */
101327852ebeSDavid van Moolenbroek assert(*ctl_off == 0);
101427852ebeSDavid van Moolenbroek
101527852ebeSDavid van Moolenbroek if ((r = uds_send_ctl(ctl, ctl_len, user_endpt)) < 0)
101627852ebeSDavid van Moolenbroek return r;
101727852ebeSDavid van Moolenbroek nfds = (unsigned int)r;
101827852ebeSDavid van Moolenbroek } else
101927852ebeSDavid van Moolenbroek nfds = 0;
102027852ebeSDavid van Moolenbroek
102127852ebeSDavid van Moolenbroek /*
102227852ebeSDavid van Moolenbroek * Now generate a new segment, or (if possible) merge new data into the
102327852ebeSDavid van Moolenbroek * last segment. Since the call may still fail, prepare the segment
102427852ebeSDavid van Moolenbroek * but do not update the buffer head yet. Note that the segment
102527852ebeSDavid van Moolenbroek * contains not just regular data (in fact it may contain no data at
102627852ebeSDavid van Moolenbroek * all) but (also) certain ancillary data.
102727852ebeSDavid van Moolenbroek */
102827852ebeSDavid van Moolenbroek if ((r = uds_send_data(uds, peer, data, len, *off, user_endpt, nfds,
102927852ebeSDavid van Moolenbroek &merge, &datalen, &segflags)) <= 0)
103027852ebeSDavid van Moolenbroek return r;
103127852ebeSDavid van Moolenbroek seglen = (size_t)r;
103227852ebeSDavid van Moolenbroek
103327852ebeSDavid van Moolenbroek /*
103427852ebeSDavid van Moolenbroek * If we extracted any file descriptors from the control data earlier,
103527852ebeSDavid van Moolenbroek * copy them over to ourselves now. The resulting in-flight file
103627852ebeSDavid van Moolenbroek * descriptors are stored in a separate data structure. This is the
103727852ebeSDavid van Moolenbroek * last point where the send call may actually fail.
103827852ebeSDavid van Moolenbroek */
103927852ebeSDavid van Moolenbroek if (nfds > 0) {
104027852ebeSDavid van Moolenbroek if ((r = uds_send_fds(peer, nfds, user_endpt)) != OK)
104127852ebeSDavid van Moolenbroek return r;
104227852ebeSDavid van Moolenbroek }
104327852ebeSDavid van Moolenbroek
104427852ebeSDavid van Moolenbroek /*
104527852ebeSDavid van Moolenbroek * The transmission is now known to be (partially) successful. Commit
104627852ebeSDavid van Moolenbroek * the new work by moving the receive buffer head.
104727852ebeSDavid van Moolenbroek */
104827852ebeSDavid van Moolenbroek uds_send_advance(uds, peer, datalen, merge, seglen, segflags);
104927852ebeSDavid van Moolenbroek
105027852ebeSDavid van Moolenbroek /*
105127852ebeSDavid van Moolenbroek * Register the result. For stream-type sockets, the expected behavior
105227852ebeSDavid van Moolenbroek * is that all data be sent, and so we may still have to suspend the
105327852ebeSDavid van Moolenbroek * call after partial progress. Otherwise, we are now done. Either
105427852ebeSDavid van Moolenbroek * way, we are done with the control data, so mark it as consumed.
105527852ebeSDavid van Moolenbroek */
105627852ebeSDavid van Moolenbroek *off += datalen;
105727852ebeSDavid van Moolenbroek *ctl_off += ctl_len;
105827852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_STREAM && datalen < len)
105927852ebeSDavid van Moolenbroek return SUSPEND;
106027852ebeSDavid van Moolenbroek else
106127852ebeSDavid van Moolenbroek return OK;
106227852ebeSDavid van Moolenbroek }
106327852ebeSDavid van Moolenbroek
106427852ebeSDavid van Moolenbroek /*
106527852ebeSDavid van Moolenbroek * Test whether a send request would block. The given 'min' parameter contains
106627852ebeSDavid van Moolenbroek * the minimum number of bytes that should be possible to send without blocking
106727852ebeSDavid van Moolenbroek * (the low send watermark). Return SUSPEND if the send request would block,
106827852ebeSDavid van Moolenbroek * or any other error code if it would not.
106927852ebeSDavid van Moolenbroek */
107027852ebeSDavid van Moolenbroek int
uds_test_send(struct sock * sock,size_t min)107127852ebeSDavid van Moolenbroek uds_test_send(struct sock * sock, size_t min)
107227852ebeSDavid van Moolenbroek {
107327852ebeSDavid van Moolenbroek struct udssock *uds = (struct udssock *)sock;
107427852ebeSDavid van Moolenbroek
107527852ebeSDavid van Moolenbroek return uds_send_test(uds, min, 0, min, FALSE /*partial*/);
107627852ebeSDavid van Moolenbroek }
107727852ebeSDavid van Moolenbroek
107827852ebeSDavid van Moolenbroek /*
107927852ebeSDavid van Moolenbroek * Perform initial checks on a receive request, before it may potentially be
108027852ebeSDavid van Moolenbroek * suspended. Return OK if this receive request is valid, or a negative error
108127852ebeSDavid van Moolenbroek * code if it is not.
108227852ebeSDavid van Moolenbroek */
108327852ebeSDavid van Moolenbroek int
uds_pre_recv(struct sock * sock __unused,endpoint_t user_endpt __unused,int flags)108427852ebeSDavid van Moolenbroek uds_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused,
108527852ebeSDavid van Moolenbroek int flags)
108627852ebeSDavid van Moolenbroek {
108727852ebeSDavid van Moolenbroek
108827852ebeSDavid van Moolenbroek /*
108927852ebeSDavid van Moolenbroek * Reject calls with unknown flags. TODO: ensure that we should really
109027852ebeSDavid van Moolenbroek * reject all other flags rather than ignore them.
109127852ebeSDavid van Moolenbroek */
109227852ebeSDavid van Moolenbroek if ((flags & ~(MSG_PEEK | MSG_WAITALL | MSG_CMSG_CLOEXEC)) != 0)
109327852ebeSDavid van Moolenbroek return EOPNOTSUPP;
109427852ebeSDavid van Moolenbroek
109527852ebeSDavid van Moolenbroek return OK;
109627852ebeSDavid van Moolenbroek }
109727852ebeSDavid van Moolenbroek
109827852ebeSDavid van Moolenbroek /*
109927852ebeSDavid van Moolenbroek * Determine whether the (real or pretend) receive request should be processed
110027852ebeSDavid van Moolenbroek * now, suspended until later, or rejected based on the current socket state.
110127852ebeSDavid van Moolenbroek * Return OK if the receive request should be processed now, along with a first
110227852ebeSDavid van Moolenbroek * indication whether the call may still be suspended later in 'may_block'.
110327852ebeSDavid van Moolenbroek * Return SUSPEND if the receive request should be retried later. Return an
110427852ebeSDavid van Moolenbroek * appropriate negative error code if the receive request should fail.
110527852ebeSDavid van Moolenbroek */
110627852ebeSDavid van Moolenbroek static int
uds_recv_test(struct udssock * uds,size_t len,size_t min,int partial,int * may_block)110727852ebeSDavid van Moolenbroek uds_recv_test(struct udssock * uds, size_t len, size_t min, int partial,
110827852ebeSDavid van Moolenbroek int * may_block)
110927852ebeSDavid van Moolenbroek {
111027852ebeSDavid van Moolenbroek size_t seglen, datalen;
111127852ebeSDavid van Moolenbroek unsigned int segflags;
111227852ebeSDavid van Moolenbroek int r;
111327852ebeSDavid van Moolenbroek
111427852ebeSDavid van Moolenbroek /*
111527852ebeSDavid van Moolenbroek * If there are any pending data, those should always be received
111627852ebeSDavid van Moolenbroek * first. However, if there is nothing to receive, then whether we
111727852ebeSDavid van Moolenbroek * should suspend the receive call or fail immediately depends on other
111827852ebeSDavid van Moolenbroek * conditions. We first look at these other conditions.
111927852ebeSDavid van Moolenbroek */
112027852ebeSDavid van Moolenbroek r = OK;
112127852ebeSDavid van Moolenbroek
112227852ebeSDavid van Moolenbroek if (uds_get_type(uds) != SOCK_DGRAM) {
112327852ebeSDavid van Moolenbroek if (uds_is_connecting(uds))
112427852ebeSDavid van Moolenbroek r = SUSPEND;
112527852ebeSDavid van Moolenbroek else if (!uds_is_connected(uds) && !uds_is_disconnected(uds))
112627852ebeSDavid van Moolenbroek r = ENOTCONN;
112727852ebeSDavid van Moolenbroek else if (!uds_has_conn(uds) ||
112827852ebeSDavid van Moolenbroek uds_is_shutdown(uds->uds_conn, SFL_SHUT_WR))
112927852ebeSDavid van Moolenbroek r = SOCKEVENT_EOF;
113027852ebeSDavid van Moolenbroek }
113127852ebeSDavid van Moolenbroek
113227852ebeSDavid van Moolenbroek if (uds->uds_len == 0) {
113327852ebeSDavid van Moolenbroek /*
113427852ebeSDavid van Moolenbroek * For stream-type sockets, we use the policy: if no regular
113527852ebeSDavid van Moolenbroek * data is requested, then end the call without receiving
113627852ebeSDavid van Moolenbroek * anything. For packet-type sockets, the request should block
113727852ebeSDavid van Moolenbroek * until there is a packet to discard, though.
113827852ebeSDavid van Moolenbroek */
113927852ebeSDavid van Moolenbroek if (r != OK || (uds_get_type(uds) == SOCK_STREAM && len == 0))
114027852ebeSDavid van Moolenbroek return r;
114127852ebeSDavid van Moolenbroek
114227852ebeSDavid van Moolenbroek return SUSPEND;
114327852ebeSDavid van Moolenbroek }
114427852ebeSDavid van Moolenbroek
114527852ebeSDavid van Moolenbroek /*
114627852ebeSDavid van Moolenbroek * For stream-type sockets, we should still suspend the call if fewer
114727852ebeSDavid van Moolenbroek * than 'min' bytes are available right now, and there is a possibility
114827852ebeSDavid van Moolenbroek * that more data may arrive later. More may arrive later iff 'r' is
114927852ebeSDavid van Moolenbroek * OK (i.e., no EOF or error will follow) and, in case we already
115027852ebeSDavid van Moolenbroek * received some partial results, there is not already a next segment
115127852ebeSDavid van Moolenbroek * with ancillary data (i.e, nonzero segment flags), or in any case
115227852ebeSDavid van Moolenbroek * there isn't more than one segment in the buffer. Limit 'min' to the
115327852ebeSDavid van Moolenbroek * maximum that can ever be received, though. Since that is difficult
115427852ebeSDavid van Moolenbroek * in our case, we check whether the buffer is entirely full instead.
115527852ebeSDavid van Moolenbroek */
115627852ebeSDavid van Moolenbroek if (r == OK && uds_get_type(uds) == SOCK_STREAM && min > 0 &&
115727852ebeSDavid van Moolenbroek uds->uds_len < UDS_BUF) {
115827852ebeSDavid van Moolenbroek assert(uds->uds_len >= UDS_HDRLEN);
115927852ebeSDavid van Moolenbroek
116027852ebeSDavid van Moolenbroek (void)uds_fetch_hdr(uds, uds->uds_tail, &seglen, &datalen,
116127852ebeSDavid van Moolenbroek &segflags);
116227852ebeSDavid van Moolenbroek
116327852ebeSDavid van Moolenbroek if (datalen < min && seglen == uds->uds_len &&
116427852ebeSDavid van Moolenbroek (!partial || segflags == 0))
116527852ebeSDavid van Moolenbroek return SUSPEND;
116627852ebeSDavid van Moolenbroek }
116727852ebeSDavid van Moolenbroek
116827852ebeSDavid van Moolenbroek /*
116927852ebeSDavid van Moolenbroek * Also start the decision process as to whether we should suspend the
117027852ebeSDavid van Moolenbroek * current call if MSG_WAITALL is given. Unfortunately there is no one
117127852ebeSDavid van Moolenbroek * place where we can conveniently do all the required checks.
117227852ebeSDavid van Moolenbroek */
117327852ebeSDavid van Moolenbroek if (may_block != NULL)
117427852ebeSDavid van Moolenbroek *may_block = (r == OK && uds_get_type(uds) == SOCK_STREAM);
117527852ebeSDavid van Moolenbroek return OK;
117627852ebeSDavid van Moolenbroek }
117727852ebeSDavid van Moolenbroek
117827852ebeSDavid van Moolenbroek /*
117927852ebeSDavid van Moolenbroek * Receive regular data, and possibly the source path, from the tail segment in
118027852ebeSDavid van Moolenbroek * the receive buffer. On success, return the positive non-zero length of the
118127852ebeSDavid van Moolenbroek * tail segment, with 'addr' and 'addr_len' modified to store the source
118227852ebeSDavid van Moolenbroek * address if applicable, the result flags in 'rflags' updated as appropriate,
118327852ebeSDavid van Moolenbroek * the tail segment's data length stored in 'datalen', the number of received
118427852ebeSDavid van Moolenbroek * regular data bytes stored in 'reslen', the segment flags stored in
118527852ebeSDavid van Moolenbroek * 'segflags', and the absolute receive buffer position of the credentials in
118627852ebeSDavid van Moolenbroek * the segment stored in 'credpos' if applicable. Since the receive call may
118727852ebeSDavid van Moolenbroek * still fail, this function must not yet update the tail or any other aspect
118827852ebeSDavid van Moolenbroek * of the receive buffer. Return zero if the current receive call was already
118927852ebeSDavid van Moolenbroek * partially successful (due to MSG_WAITALL) and can no longer make progress,
119027852ebeSDavid van Moolenbroek * and thus should be ended. Return a negative error code on failure.
119127852ebeSDavid van Moolenbroek */
119227852ebeSDavid van Moolenbroek static int
uds_recv_data(struct udssock * uds,const struct sockdriver_data * data,size_t len,size_t off,struct sockaddr * addr,socklen_t * addr_len,int * __restrict rflags,size_t * __restrict datalen,size_t * __restrict reslen,unsigned int * __restrict segflags,size_t * __restrict credpos)119327852ebeSDavid van Moolenbroek uds_recv_data(struct udssock * uds, const struct sockdriver_data * data,
119427852ebeSDavid van Moolenbroek size_t len, size_t off, struct sockaddr * addr, socklen_t * addr_len,
119527852ebeSDavid van Moolenbroek int * __restrict rflags, size_t * __restrict datalen,
119627852ebeSDavid van Moolenbroek size_t * __restrict reslen, unsigned int * __restrict segflags,
119727852ebeSDavid van Moolenbroek size_t * __restrict credpos)
119827852ebeSDavid van Moolenbroek {
119927852ebeSDavid van Moolenbroek iovec_t iov[2];
120027852ebeSDavid van Moolenbroek unsigned char lenbyte;
120127852ebeSDavid van Moolenbroek unsigned int iovcnt;
120227852ebeSDavid van Moolenbroek size_t pos, seglen, left;
120327852ebeSDavid van Moolenbroek int r;
120427852ebeSDavid van Moolenbroek
120527852ebeSDavid van Moolenbroek pos = uds_fetch_hdr(uds, uds->uds_tail, &seglen, datalen, segflags);
120627852ebeSDavid van Moolenbroek
120727852ebeSDavid van Moolenbroek /*
120827852ebeSDavid van Moolenbroek * If a partially completed receive now runs into a segment that cannot
120927852ebeSDavid van Moolenbroek * be logically merged with the previous one (because it has at least
121027852ebeSDavid van Moolenbroek * one segment flag set, meaning it has ancillary data), then we must
121127852ebeSDavid van Moolenbroek * shortcut the receive now.
121227852ebeSDavid van Moolenbroek */
121327852ebeSDavid van Moolenbroek if (off != 0 && *segflags != 0)
121427852ebeSDavid van Moolenbroek return OK;
121527852ebeSDavid van Moolenbroek
121627852ebeSDavid van Moolenbroek /*
121727852ebeSDavid van Moolenbroek * As stated, for stream-type sockets, we choose to ignore zero-size
121827852ebeSDavid van Moolenbroek * receive calls. This has the consequence that reading a zero-sized
121927852ebeSDavid van Moolenbroek * segment (with ancillary data) requires a receive request for at
122027852ebeSDavid van Moolenbroek * least one regular data byte. Such a receive call would then return
122127852ebeSDavid van Moolenbroek * zero. The problem with handling zero-data receive requests is that
122227852ebeSDavid van Moolenbroek * we need to know whether the current segment is terminated (i.e., no
122327852ebeSDavid van Moolenbroek * more data can possibly be merged into it later), which is a test
122427852ebeSDavid van Moolenbroek * that we rather not perform, not in the least because we do not know
122527852ebeSDavid van Moolenbroek * whether there is an error pending on the socket.
122627852ebeSDavid van Moolenbroek *
122727852ebeSDavid van Moolenbroek * For datagrams, we currently allow a zero-size receive call to
122827852ebeSDavid van Moolenbroek * discard the next datagram.
122927852ebeSDavid van Moolenbroek *
123027852ebeSDavid van Moolenbroek * TODO: compare this against policies on other platforms.
123127852ebeSDavid van Moolenbroek */
123227852ebeSDavid van Moolenbroek if (len == 0 && uds_get_type(uds) == SOCK_STREAM)
123327852ebeSDavid van Moolenbroek return OK;
123427852ebeSDavid van Moolenbroek
123527852ebeSDavid van Moolenbroek /*
123627852ebeSDavid van Moolenbroek * We have to skip the credentials for now: these are copied out as
123727852ebeSDavid van Moolenbroek * control data, and thus will (well, may) be looked at when dealing
123827852ebeSDavid van Moolenbroek * with the control data. For the same reason, we do not even look at
123927852ebeSDavid van Moolenbroek * UDS_HAS_FDS here.
124027852ebeSDavid van Moolenbroek */
124127852ebeSDavid van Moolenbroek if (*segflags & UDS_HAS_CRED) {
124227852ebeSDavid van Moolenbroek *credpos = pos;
124327852ebeSDavid van Moolenbroek
124427852ebeSDavid van Moolenbroek pos = uds_fetch(uds, pos, &lenbyte, 1);
124527852ebeSDavid van Moolenbroek pos = uds_advance(pos, (size_t)lenbyte);
124627852ebeSDavid van Moolenbroek }
124727852ebeSDavid van Moolenbroek
124827852ebeSDavid van Moolenbroek /*
124927852ebeSDavid van Moolenbroek * Copy out the source address, but only if the (datagram) socket is
125027852ebeSDavid van Moolenbroek * not connected. TODO: even when it is connected, it may still
125127852ebeSDavid van Moolenbroek * receive packets sent to it from other sockets *before* being
125227852ebeSDavid van Moolenbroek * connected, and the receiver has no way of knowing that those packets
125327852ebeSDavid van Moolenbroek * did not come from its new peer. Ideally, the older packets should
125427852ebeSDavid van Moolenbroek * be dropped..
125527852ebeSDavid van Moolenbroek */
125627852ebeSDavid van Moolenbroek if (*segflags & UDS_HAS_PATH) {
125727852ebeSDavid van Moolenbroek pos = uds_fetch(uds, pos, &lenbyte, 1);
125827852ebeSDavid van Moolenbroek
125927852ebeSDavid van Moolenbroek if (uds_get_type(uds) == SOCK_DGRAM && !uds_has_link(uds))
126027852ebeSDavid van Moolenbroek uds_make_addr((const char *)&uds->uds_buf[pos],
126127852ebeSDavid van Moolenbroek (size_t)lenbyte, addr, addr_len);
126227852ebeSDavid van Moolenbroek
126327852ebeSDavid van Moolenbroek pos = uds_advance(pos, (size_t)lenbyte);
126427852ebeSDavid van Moolenbroek }
126527852ebeSDavid van Moolenbroek
126627852ebeSDavid van Moolenbroek /*
126727852ebeSDavid van Moolenbroek * We can receive no more data than those that are present in the
126827852ebeSDavid van Moolenbroek * segment, obviously. For stream-type sockets, any more data that
126927852ebeSDavid van Moolenbroek * could have been received along with the current data would have been
127027852ebeSDavid van Moolenbroek * merged in the current segment, so we need not search for any next
127127852ebeSDavid van Moolenbroek * segments.
127227852ebeSDavid van Moolenbroek *
127327852ebeSDavid van Moolenbroek * For non-stream sockets, the caller may receive less than a whole
127427852ebeSDavid van Moolenbroek * packet if it supplied a small buffer. In that case, the rest of the
127527852ebeSDavid van Moolenbroek * packet will be discarded (but not here yet!) and the caller gets
127627852ebeSDavid van Moolenbroek * the MSG_TRUNC flag in its result, if it was using sendmsg(2) anyway.
127727852ebeSDavid van Moolenbroek */
127827852ebeSDavid van Moolenbroek if (len > *datalen)
127927852ebeSDavid van Moolenbroek len = *datalen;
128027852ebeSDavid van Moolenbroek else if (len < *datalen && uds_get_type(uds) != SOCK_STREAM)
128127852ebeSDavid van Moolenbroek *rflags |= MSG_TRUNC;
128227852ebeSDavid van Moolenbroek
128327852ebeSDavid van Moolenbroek /* Copy out the data to the caller. */
128427852ebeSDavid van Moolenbroek if (len > 0) {
128527852ebeSDavid van Moolenbroek iov[0].iov_addr = (vir_bytes)&uds->uds_buf[pos];
128627852ebeSDavid van Moolenbroek left = UDS_BUF - pos;
128727852ebeSDavid van Moolenbroek
128827852ebeSDavid van Moolenbroek if (left < len) {
128927852ebeSDavid van Moolenbroek iov[0].iov_size = left;
129027852ebeSDavid van Moolenbroek iov[1].iov_addr = (vir_bytes)&uds->uds_buf[0];
129127852ebeSDavid van Moolenbroek iov[1].iov_size = len - left;
129227852ebeSDavid van Moolenbroek iovcnt = 2;
129327852ebeSDavid van Moolenbroek } else {
129427852ebeSDavid van Moolenbroek iov[0].iov_size = len;
129527852ebeSDavid van Moolenbroek iovcnt = 1;
129627852ebeSDavid van Moolenbroek }
129727852ebeSDavid van Moolenbroek
129827852ebeSDavid van Moolenbroek if ((r = sockdriver_vcopyout(data, off, iov, iovcnt)) != OK)
129927852ebeSDavid van Moolenbroek return r;
130027852ebeSDavid van Moolenbroek }
130127852ebeSDavid van Moolenbroek
130227852ebeSDavid van Moolenbroek *reslen = len;
130327852ebeSDavid van Moolenbroek assert(seglen > 0 && seglen <= INT_MAX);
130427852ebeSDavid van Moolenbroek return (int)seglen;
130527852ebeSDavid van Moolenbroek }
130627852ebeSDavid van Moolenbroek
130727852ebeSDavid van Moolenbroek /*
130827852ebeSDavid van Moolenbroek * The current segment has associated file descriptors. If possible, copy out
130927852ebeSDavid van Moolenbroek * all file descriptors to the receiver, and generate and copy out a chunk of
131027852ebeSDavid van Moolenbroek * control data that contains their file descriptor numbers. If not all
131127852ebeSDavid van Moolenbroek * file descriptors fit in the receiver's buffer, or if any error occurs, no
131227852ebeSDavid van Moolenbroek * file descriptors are copied out.
131327852ebeSDavid van Moolenbroek */
131427852ebeSDavid van Moolenbroek static int
uds_recv_fds(struct udssock * uds,const struct sockdriver_data * ctl,socklen_t ctl_len,socklen_t ctl_off,endpoint_t user_endpt,int flags)131527852ebeSDavid van Moolenbroek uds_recv_fds(struct udssock * uds, const struct sockdriver_data * ctl,
131627852ebeSDavid van Moolenbroek socklen_t ctl_len, socklen_t ctl_off, endpoint_t user_endpt, int flags)
131727852ebeSDavid van Moolenbroek {
131827852ebeSDavid van Moolenbroek struct msghdr msghdr;
131927852ebeSDavid van Moolenbroek struct cmsghdr *cmsg;
132027852ebeSDavid van Moolenbroek struct uds_fd *ufd;
132127852ebeSDavid van Moolenbroek unsigned int i, nfds;
132227852ebeSDavid van Moolenbroek socklen_t chunklen, chunkspace;
132327852ebeSDavid van Moolenbroek int r, fd, what;
132427852ebeSDavid van Moolenbroek
132527852ebeSDavid van Moolenbroek /* See how many file descriptors should be part of this chunk. */
132627852ebeSDavid van Moolenbroek assert(!SIMPLEQ_EMPTY(&uds->uds_fds));
132727852ebeSDavid van Moolenbroek ufd = SIMPLEQ_FIRST(&uds->uds_fds);
132827852ebeSDavid van Moolenbroek nfds = ufd->ufd_count;
132927852ebeSDavid van Moolenbroek assert(nfds > 0);
133027852ebeSDavid van Moolenbroek
133127852ebeSDavid van Moolenbroek /*
133227852ebeSDavid van Moolenbroek * We produce and copy out potentially unaligned chunks, using
133327852ebeSDavid van Moolenbroek * CMSG_LEN, but return the aligned size at the end, using CMSG_SPACE.
133427852ebeSDavid van Moolenbroek * This may leave "gap" bytes unchanged in userland, but that should
133527852ebeSDavid van Moolenbroek * not be a problem. By producing unaligned chunks, we eliminate a
133627852ebeSDavid van Moolenbroek * potential boundary case where the unaligned chunk passed in (by the
133727852ebeSDavid van Moolenbroek * sender) no longer fits in the same buffer after being aligned here.
133827852ebeSDavid van Moolenbroek */
133927852ebeSDavid van Moolenbroek chunklen = CMSG_LEN(sizeof(int) * nfds);
134027852ebeSDavid van Moolenbroek chunkspace = CMSG_SPACE(sizeof(int) * nfds);
134127852ebeSDavid van Moolenbroek assert(chunklen <= sizeof(uds_ctlbuf));
134227852ebeSDavid van Moolenbroek if (chunklen > ctl_len)
134327852ebeSDavid van Moolenbroek return 0; /* chunk would not fit, so produce nothing instead */
134427852ebeSDavid van Moolenbroek if (chunkspace > ctl_len)
134527852ebeSDavid van Moolenbroek chunkspace = ctl_len;
134627852ebeSDavid van Moolenbroek
134727852ebeSDavid van Moolenbroek memset(&msghdr, 0, sizeof(msghdr));
134827852ebeSDavid van Moolenbroek msghdr.msg_control = uds_ctlbuf;
134927852ebeSDavid van Moolenbroek msghdr.msg_controllen = sizeof(uds_ctlbuf);
135027852ebeSDavid van Moolenbroek
135127852ebeSDavid van Moolenbroek memset(uds_ctlbuf, 0, chunklen);
135227852ebeSDavid van Moolenbroek cmsg = CMSG_FIRSTHDR(&msghdr);
135327852ebeSDavid van Moolenbroek cmsg->cmsg_len = chunklen;
135427852ebeSDavid van Moolenbroek cmsg->cmsg_level = SOL_SOCKET;
135527852ebeSDavid van Moolenbroek cmsg->cmsg_type = SCM_RIGHTS;
135627852ebeSDavid van Moolenbroek
135727852ebeSDavid van Moolenbroek /*
135827852ebeSDavid van Moolenbroek * Copy the group's local file descriptors to the target endpoint, and
135927852ebeSDavid van Moolenbroek * store the resulting remote file descriptors in the chunk buffer.
136027852ebeSDavid van Moolenbroek */
136127852ebeSDavid van Moolenbroek r = OK;
136227852ebeSDavid van Moolenbroek
136327852ebeSDavid van Moolenbroek for (i = 0; i < nfds; i++) {
136427852ebeSDavid van Moolenbroek assert(ufd != SIMPLEQ_END(&uds->uds_fds));
136527852ebeSDavid van Moolenbroek assert(i == 0 || ufd->ufd_count == 0);
136627852ebeSDavid van Moolenbroek
136727852ebeSDavid van Moolenbroek what = COPYFD_TO;
136827852ebeSDavid van Moolenbroek if (flags & MSG_CMSG_CLOEXEC)
136927852ebeSDavid van Moolenbroek what |= COPYFD_CLOEXEC;
137027852ebeSDavid van Moolenbroek
137127852ebeSDavid van Moolenbroek /* Failure may happen legitimately here (e.g., EMFILE). */
137227852ebeSDavid van Moolenbroek if ((r = copyfd(user_endpt, ufd->ufd_fd, what)) < 0)
137327852ebeSDavid van Moolenbroek break; /* we keep our progress so far in 'i' */
137427852ebeSDavid van Moolenbroek
137527852ebeSDavid van Moolenbroek fd = r;
137627852ebeSDavid van Moolenbroek
137727852ebeSDavid van Moolenbroek dprintf(("UDS: copied out fd %d -> %d\n", ufd->ufd_fd, fd));
137827852ebeSDavid van Moolenbroek
137927852ebeSDavid van Moolenbroek memcpy(&((int *)CMSG_DATA(cmsg))[i], &fd, sizeof(int));
138027852ebeSDavid van Moolenbroek
138127852ebeSDavid van Moolenbroek ufd = SIMPLEQ_NEXT(ufd, ufd_next);
138227852ebeSDavid van Moolenbroek }
138327852ebeSDavid van Moolenbroek
138427852ebeSDavid van Moolenbroek /* If everything went well so far, copy out the produced chunk. */
138527852ebeSDavid van Moolenbroek if (r >= 0)
138627852ebeSDavid van Moolenbroek r = sockdriver_copyout(ctl, ctl_off, uds_ctlbuf, chunklen);
138727852ebeSDavid van Moolenbroek
138827852ebeSDavid van Moolenbroek /*
138927852ebeSDavid van Moolenbroek * Handle errors. At this point, the 'i' variable contains the number
139027852ebeSDavid van Moolenbroek * of file descriptors that have already been successfully copied out.
139127852ebeSDavid van Moolenbroek */
139227852ebeSDavid van Moolenbroek if (r < 0) {
139327852ebeSDavid van Moolenbroek /* Revert the successful copyfd() calls made so far. */
139427852ebeSDavid van Moolenbroek while (i-- > 0) {
139527852ebeSDavid van Moolenbroek memcpy(&fd, &((int *)CMSG_DATA(cmsg))[i], sizeof(int));
139627852ebeSDavid van Moolenbroek
139727852ebeSDavid van Moolenbroek (void)copyfd(user_endpt, fd, COPYFD_CLOSE);
139827852ebeSDavid van Moolenbroek }
139927852ebeSDavid van Moolenbroek
140027852ebeSDavid van Moolenbroek return r;
140127852ebeSDavid van Moolenbroek }
140227852ebeSDavid van Moolenbroek
140327852ebeSDavid van Moolenbroek /*
140427852ebeSDavid van Moolenbroek * Success. Return the aligned size of the produced chunk, if the
140527852ebeSDavid van Moolenbroek * given length permits it. From here on, the receive call may no
140627852ebeSDavid van Moolenbroek * longer fail, as that would result in lost file descriptors.
140727852ebeSDavid van Moolenbroek */
140827852ebeSDavid van Moolenbroek return chunkspace;
140927852ebeSDavid van Moolenbroek }
141027852ebeSDavid van Moolenbroek
141127852ebeSDavid van Moolenbroek /*
141227852ebeSDavid van Moolenbroek * Generate and copy out a chunk of control data with the sender's credentials.
141327852ebeSDavid van Moolenbroek * Return the aligned chunk size on success, or a negative error code on
141427852ebeSDavid van Moolenbroek * failure.
141527852ebeSDavid van Moolenbroek */
141627852ebeSDavid van Moolenbroek static int
uds_recv_cred(struct udssock * uds,const struct sockdriver_data * ctl,socklen_t ctl_len,socklen_t ctl_off,size_t credpos)141727852ebeSDavid van Moolenbroek uds_recv_cred(struct udssock * uds, const struct sockdriver_data * ctl,
141827852ebeSDavid van Moolenbroek socklen_t ctl_len, socklen_t ctl_off, size_t credpos)
141927852ebeSDavid van Moolenbroek {
142027852ebeSDavid van Moolenbroek struct msghdr msghdr;
142127852ebeSDavid van Moolenbroek struct cmsghdr *cmsg;
142227852ebeSDavid van Moolenbroek socklen_t chunklen, chunkspace;
142327852ebeSDavid van Moolenbroek unsigned char lenbyte;
142427852ebeSDavid van Moolenbroek size_t credlen;
142527852ebeSDavid van Moolenbroek int r;
142627852ebeSDavid van Moolenbroek
142727852ebeSDavid van Moolenbroek /*
142827852ebeSDavid van Moolenbroek * Since the sender side already did the hard work of producing the
142927852ebeSDavid van Moolenbroek * (variable-size) sockcred structure as it should be received, there
143027852ebeSDavid van Moolenbroek * is relatively little work to be done here.
143127852ebeSDavid van Moolenbroek */
143227852ebeSDavid van Moolenbroek credpos = uds_fetch(uds, credpos, &lenbyte, 1);
143327852ebeSDavid van Moolenbroek credlen = (size_t)lenbyte;
143427852ebeSDavid van Moolenbroek
143527852ebeSDavid van Moolenbroek chunklen = CMSG_LEN(credlen);
143627852ebeSDavid van Moolenbroek chunkspace = CMSG_SPACE(credlen);
143727852ebeSDavid van Moolenbroek assert(chunklen <= sizeof(uds_ctlbuf));
143827852ebeSDavid van Moolenbroek if (chunklen > ctl_len)
143927852ebeSDavid van Moolenbroek return 0; /* chunk would not fit, so produce nothing instead */
144027852ebeSDavid van Moolenbroek if (chunkspace > ctl_len)
144127852ebeSDavid van Moolenbroek chunkspace = ctl_len;
144227852ebeSDavid van Moolenbroek
144327852ebeSDavid van Moolenbroek memset(&msghdr, 0, sizeof(msghdr));
144427852ebeSDavid van Moolenbroek msghdr.msg_control = uds_ctlbuf;
144527852ebeSDavid van Moolenbroek msghdr.msg_controllen = sizeof(uds_ctlbuf);
144627852ebeSDavid van Moolenbroek
144727852ebeSDavid van Moolenbroek memset(uds_ctlbuf, 0, chunklen);
144827852ebeSDavid van Moolenbroek cmsg = CMSG_FIRSTHDR(&msghdr);
144927852ebeSDavid van Moolenbroek cmsg->cmsg_len = chunklen;
145027852ebeSDavid van Moolenbroek cmsg->cmsg_level = SOL_SOCKET;
145127852ebeSDavid van Moolenbroek cmsg->cmsg_type = SCM_CREDS;
145227852ebeSDavid van Moolenbroek
145327852ebeSDavid van Moolenbroek uds_fetch(uds, credpos, CMSG_DATA(cmsg), credlen);
145427852ebeSDavid van Moolenbroek
145527852ebeSDavid van Moolenbroek if ((r = sockdriver_copyout(ctl, ctl_off, uds_ctlbuf, chunklen)) != OK)
145627852ebeSDavid van Moolenbroek return r;
145727852ebeSDavid van Moolenbroek
145827852ebeSDavid van Moolenbroek return chunkspace;
145927852ebeSDavid van Moolenbroek }
146027852ebeSDavid van Moolenbroek
146127852ebeSDavid van Moolenbroek /*
146227852ebeSDavid van Moolenbroek * Copy out control data for the ancillary data associated with the current
146327852ebeSDavid van Moolenbroek * segment, if any. Return OK on success, at which point the current receive
146427852ebeSDavid van Moolenbroek * call may no longer fail. 'rflags' may be updated with additional result
146527852ebeSDavid van Moolenbroek * flags. Return a negative error code on failure.
146627852ebeSDavid van Moolenbroek */
146727852ebeSDavid van Moolenbroek static int
uds_recv_ctl(struct udssock * uds,const struct sockdriver_data * ctl,socklen_t ctl_len,socklen_t * ctl_off,endpoint_t user_endpt,int flags,unsigned int segflags,size_t credpos,int * rflags)146827852ebeSDavid van Moolenbroek uds_recv_ctl(struct udssock * uds, const struct sockdriver_data * ctl,
146927852ebeSDavid van Moolenbroek socklen_t ctl_len, socklen_t * ctl_off, endpoint_t user_endpt,
147027852ebeSDavid van Moolenbroek int flags, unsigned int segflags, size_t credpos, int * rflags)
147127852ebeSDavid van Moolenbroek {
147227852ebeSDavid van Moolenbroek int r;
147327852ebeSDavid van Moolenbroek
147427852ebeSDavid van Moolenbroek /*
147527852ebeSDavid van Moolenbroek * We first copy out all file descriptors, if any. We put them in one
147627852ebeSDavid van Moolenbroek * SCM_RIGHTS chunk, even if the sender put them in separate SCM_RIGHTS
147727852ebeSDavid van Moolenbroek * chunks. We believe that this should not cause application-level
147827852ebeSDavid van Moolenbroek * issues, but if it does, we can change that later with some effort.
147927852ebeSDavid van Moolenbroek * We then copy out credentials, if any.
148027852ebeSDavid van Moolenbroek *
148127852ebeSDavid van Moolenbroek * We copy out each control chunk independently of the others, and also
148227852ebeSDavid van Moolenbroek * perform error recovery on a per-chunk basis. This implies the
148327852ebeSDavid van Moolenbroek * following. If producing or copying out the first chunk fails, the
148427852ebeSDavid van Moolenbroek * entire recvmsg(2) call will fail with an appropriate error. If
148527852ebeSDavid van Moolenbroek * producing or copying out any subsequent chunk fails, the recvmsg(2)
148627852ebeSDavid van Moolenbroek * call will still return the previously generated chunks (a "short
148727852ebeSDavid van Moolenbroek * control read" if you will) as well as the MSG_CTRUNC flag. This
148827852ebeSDavid van Moolenbroek * approach is simple and clean, and it guarantees that we can always
148927852ebeSDavid van Moolenbroek * copy out at least as many file descriptors as we copied in for this
149027852ebeSDavid van Moolenbroek * segment, even if credentials are present as well. However, the
149127852ebeSDavid van Moolenbroek * approach does cause slightly more overhead when there are multiple
149227852ebeSDavid van Moolenbroek * chunks per call, as those are copied out separately.
149327852ebeSDavid van Moolenbroek *
149427852ebeSDavid van Moolenbroek * Since the generated SCM_RIGHTS chunk is never larger than the
149527852ebeSDavid van Moolenbroek * originally received SCM_RIGHTS chunk, the temporary "uds_ctlbuf"
149627852ebeSDavid van Moolenbroek * buffer is always large enough to contain the chunk in its entirety.
149727852ebeSDavid van Moolenbroek * SCM_CREDS chunks should always fit easily as well.
149827852ebeSDavid van Moolenbroek *
149927852ebeSDavid van Moolenbroek * The MSG_CTRUNC flag will be returned iff not the entire user-given
150027852ebeSDavid van Moolenbroek * control buffer was filled and not all control chunks were delivered.
150127852ebeSDavid van Moolenbroek * Our current implementation does not deliver partial chunks. NetBSD
150227852ebeSDavid van Moolenbroek * does, except for SCM_RIGHTS chunks.
150327852ebeSDavid van Moolenbroek *
150427852ebeSDavid van Moolenbroek * TODO: get rid of the redundancy in processing return values.
150527852ebeSDavid van Moolenbroek */
150627852ebeSDavid van Moolenbroek if (segflags & UDS_HAS_FDS) {
150727852ebeSDavid van Moolenbroek r = uds_recv_fds(uds, ctl, ctl_len, *ctl_off, user_endpt,
150827852ebeSDavid van Moolenbroek flags);
150927852ebeSDavid van Moolenbroek
151027852ebeSDavid van Moolenbroek /*
151127852ebeSDavid van Moolenbroek * At this point, 'r' contains one of the following:
151227852ebeSDavid van Moolenbroek *
151327852ebeSDavid van Moolenbroek * r > 0 a chunk of 'r' bytes was added successfully.
151427852ebeSDavid van Moolenbroek * r == 0 not enough space left; the chunk was not added.
151527852ebeSDavid van Moolenbroek * r < 0 an error occurred; the chunk was not added.
151627852ebeSDavid van Moolenbroek */
151727852ebeSDavid van Moolenbroek if (r < 0 && *ctl_off == 0)
151827852ebeSDavid van Moolenbroek return r;
151927852ebeSDavid van Moolenbroek
152027852ebeSDavid van Moolenbroek if (r > 0) {
152127852ebeSDavid van Moolenbroek ctl_len -= r;
152227852ebeSDavid van Moolenbroek *ctl_off += r;
152327852ebeSDavid van Moolenbroek } else
152427852ebeSDavid van Moolenbroek *rflags |= MSG_CTRUNC;
152527852ebeSDavid van Moolenbroek }
152627852ebeSDavid van Moolenbroek
152727852ebeSDavid van Moolenbroek if (segflags & UDS_HAS_CRED) {
152827852ebeSDavid van Moolenbroek r = uds_recv_cred(uds, ctl, ctl_len, *ctl_off, credpos);
152927852ebeSDavid van Moolenbroek
153027852ebeSDavid van Moolenbroek /* As above. */
153127852ebeSDavid van Moolenbroek if (r < 0 && *ctl_off == 0)
153227852ebeSDavid van Moolenbroek return r;
153327852ebeSDavid van Moolenbroek
153427852ebeSDavid van Moolenbroek if (r > 0) {
153527852ebeSDavid van Moolenbroek ctl_len -= r;
153627852ebeSDavid van Moolenbroek *ctl_off += r;
153727852ebeSDavid van Moolenbroek } else
153827852ebeSDavid van Moolenbroek *rflags |= MSG_CTRUNC;
153927852ebeSDavid van Moolenbroek }
154027852ebeSDavid van Moolenbroek
154127852ebeSDavid van Moolenbroek return OK;
154227852ebeSDavid van Moolenbroek }
154327852ebeSDavid van Moolenbroek
154427852ebeSDavid van Moolenbroek /*
154527852ebeSDavid van Moolenbroek * The current receive request is successful or, in the case of MSG_WAITALL,
154627852ebeSDavid van Moolenbroek * has made progress. Advance the receive buffer tail, either by discarding
154727852ebeSDavid van Moolenbroek * the entire tail segment or by generating a new, smaller tail segment that
154827852ebeSDavid van Moolenbroek * contains only the regular data left to be received from the original tail
154927852ebeSDavid van Moolenbroek * segment. Also wake up the sending side for connection-oriented sockets if
155027852ebeSDavid van Moolenbroek * applicable, because there may now be room for more data to be sent. Update
155127852ebeSDavid van Moolenbroek * 'may_block' if we are now sure that the call may not block on MSG_WAITALL
155227852ebeSDavid van Moolenbroek * after all.
155327852ebeSDavid van Moolenbroek */
155427852ebeSDavid van Moolenbroek static void
uds_recv_advance(struct udssock * uds,size_t seglen,size_t datalen,size_t reslen,unsigned int segflags,int * may_block)155527852ebeSDavid van Moolenbroek uds_recv_advance(struct udssock * uds, size_t seglen, size_t datalen,
155627852ebeSDavid van Moolenbroek size_t reslen, unsigned int segflags, int * may_block)
155727852ebeSDavid van Moolenbroek {
155827852ebeSDavid van Moolenbroek struct udssock *conn;
155927852ebeSDavid van Moolenbroek struct uds_fd *ufd;
156027852ebeSDavid van Moolenbroek size_t delta, nseglen, advance;
156127852ebeSDavid van Moolenbroek unsigned int nfds;
156227852ebeSDavid van Moolenbroek
156327852ebeSDavid van Moolenbroek /* Note that 'reslen' may be legitimately zero. */
156427852ebeSDavid van Moolenbroek assert(reslen <= datalen);
156527852ebeSDavid van Moolenbroek
156627852ebeSDavid van Moolenbroek if (uds_get_type(uds) != SOCK_STREAM && reslen < datalen)
156727852ebeSDavid van Moolenbroek reslen = datalen;
156827852ebeSDavid van Moolenbroek
156927852ebeSDavid van Moolenbroek delta = datalen - reslen;
157027852ebeSDavid van Moolenbroek
157127852ebeSDavid van Moolenbroek if (delta == 0) {
157227852ebeSDavid van Moolenbroek /*
157327852ebeSDavid van Moolenbroek * Fully consume the tail segment. We advance the tail by the
157427852ebeSDavid van Moolenbroek * full segment length, thus moving up to either the next
157527852ebeSDavid van Moolenbroek * segment in the receive buffer, or an empty receive buffer.
157627852ebeSDavid van Moolenbroek */
157727852ebeSDavid van Moolenbroek advance = seglen;
157827852ebeSDavid van Moolenbroek
157927852ebeSDavid van Moolenbroek uds->uds_tail = uds_advance(uds->uds_tail, advance);
158027852ebeSDavid van Moolenbroek } else {
158127852ebeSDavid van Moolenbroek /*
158227852ebeSDavid van Moolenbroek * Partially consume the tail segment. We put a new segment
158327852ebeSDavid van Moolenbroek * header right in front of the remaining data, which obviously
158427852ebeSDavid van Moolenbroek * always fits. Since any ancillary data was consumed along
158527852ebeSDavid van Moolenbroek * with the first data byte of the segment, the new segment has
158627852ebeSDavid van Moolenbroek * no ancillary data anymore (and thus a zero flags field).
158727852ebeSDavid van Moolenbroek */
158827852ebeSDavid van Moolenbroek nseglen = UDS_HDRLEN + delta;
158927852ebeSDavid van Moolenbroek assert(nseglen < seglen);
159027852ebeSDavid van Moolenbroek
159127852ebeSDavid van Moolenbroek advance = seglen - nseglen;
159227852ebeSDavid van Moolenbroek
159327852ebeSDavid van Moolenbroek uds->uds_tail = uds_advance(uds->uds_tail, advance);
159427852ebeSDavid van Moolenbroek
159527852ebeSDavid van Moolenbroek uds_store_hdr(uds, uds->uds_tail, nseglen, delta, 0);
159627852ebeSDavid van Moolenbroek }
159727852ebeSDavid van Moolenbroek
159827852ebeSDavid van Moolenbroek /*
159927852ebeSDavid van Moolenbroek * For datagram-oriented sockets, we always consume at least a header.
160027852ebeSDavid van Moolenbroek * For stream-type sockets, we either consume a zero-data segment along
160127852ebeSDavid van Moolenbroek * with its ancillary data, or we consume at least one byte from a
160227852ebeSDavid van Moolenbroek * segment that does have regular data. In all other cases, the
160327852ebeSDavid van Moolenbroek * receive call has already been ended by now. Thus, we always advance
160427852ebeSDavid van Moolenbroek * the tail of the receive buffer here.
160527852ebeSDavid van Moolenbroek */
160627852ebeSDavid van Moolenbroek assert(advance > 0);
160727852ebeSDavid van Moolenbroek
160827852ebeSDavid van Moolenbroek /*
160927852ebeSDavid van Moolenbroek * The receive buffer's used length (uds_len) and pointer to the
161027852ebeSDavid van Moolenbroek * previous segment header (uds_last) are offsets from the tail. Now
161127852ebeSDavid van Moolenbroek * that we have moved the tail, we need to adjust these accordingly.
161227852ebeSDavid van Moolenbroek * If the buffer is now empty, reset the tail to the buffer start so as
161327852ebeSDavid van Moolenbroek * to avoid splitting inter-process copies whenever possible.
161427852ebeSDavid van Moolenbroek */
161527852ebeSDavid van Moolenbroek assert(uds->uds_len >= advance);
161627852ebeSDavid van Moolenbroek uds->uds_len -= advance;
161727852ebeSDavid van Moolenbroek
161827852ebeSDavid van Moolenbroek if (uds->uds_len == 0)
161927852ebeSDavid van Moolenbroek uds->uds_tail = 0;
162027852ebeSDavid van Moolenbroek
162127852ebeSDavid van Moolenbroek /*
162227852ebeSDavid van Moolenbroek * If uds_last is zero here, it was pointing to the segment we just
162327852ebeSDavid van Moolenbroek * (partially) consumed. By leaving it zero, it will still point to
162427852ebeSDavid van Moolenbroek * the new or next segment.
162527852ebeSDavid van Moolenbroek */
162627852ebeSDavid van Moolenbroek if (uds->uds_last > 0) {
162727852ebeSDavid van Moolenbroek assert(uds->uds_len > 0);
162827852ebeSDavid van Moolenbroek assert(uds->uds_last >= advance);
162927852ebeSDavid van Moolenbroek uds->uds_last -= advance;
163027852ebeSDavid van Moolenbroek }
163127852ebeSDavid van Moolenbroek
163227852ebeSDavid van Moolenbroek /*
163327852ebeSDavid van Moolenbroek * If there were any file descriptors associated with this segment,
163427852ebeSDavid van Moolenbroek * close and free them now.
163527852ebeSDavid van Moolenbroek */
163627852ebeSDavid van Moolenbroek if (segflags & UDS_HAS_FDS) {
163727852ebeSDavid van Moolenbroek assert(!SIMPLEQ_EMPTY(&uds->uds_fds));
163827852ebeSDavid van Moolenbroek ufd = SIMPLEQ_FIRST(&uds->uds_fds);
163927852ebeSDavid van Moolenbroek nfds = ufd->ufd_count;
164027852ebeSDavid van Moolenbroek assert(nfds > 0);
164127852ebeSDavid van Moolenbroek
164227852ebeSDavid van Moolenbroek while (nfds-- > 0) {
164327852ebeSDavid van Moolenbroek assert(!SIMPLEQ_EMPTY(&uds->uds_fds));
164427852ebeSDavid van Moolenbroek ufd = SIMPLEQ_FIRST(&uds->uds_fds);
164527852ebeSDavid van Moolenbroek SIMPLEQ_REMOVE_HEAD(&uds->uds_fds, ufd_next);
164627852ebeSDavid van Moolenbroek
164727852ebeSDavid van Moolenbroek dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd));
164827852ebeSDavid van Moolenbroek
164927852ebeSDavid van Moolenbroek closenb(ufd->ufd_fd);
165027852ebeSDavid van Moolenbroek
165127852ebeSDavid van Moolenbroek SIMPLEQ_INSERT_TAIL(&uds_freefds, ufd, ufd_next);
165227852ebeSDavid van Moolenbroek }
165327852ebeSDavid van Moolenbroek }
165427852ebeSDavid van Moolenbroek
165527852ebeSDavid van Moolenbroek /*
165627852ebeSDavid van Moolenbroek * If there is now any data left in the receive buffer, then there has
165727852ebeSDavid van Moolenbroek * been a reason that we haven't received it. For stream sockets, that
165827852ebeSDavid van Moolenbroek * reason is that the next segment has ancillary data. In any case,
165927852ebeSDavid van Moolenbroek * this means we should never block the current receive operation
166027852ebeSDavid van Moolenbroek * waiting for more data. Otherwise, we may block on MSG_WAITALL.
166127852ebeSDavid van Moolenbroek */
166227852ebeSDavid van Moolenbroek if (uds->uds_len > 0)
166327852ebeSDavid van Moolenbroek *may_block = FALSE;
166427852ebeSDavid van Moolenbroek
166527852ebeSDavid van Moolenbroek /*
166627852ebeSDavid van Moolenbroek * If the (non-datagram) socket has a peer that is not shut down for
166727852ebeSDavid van Moolenbroek * writing, see if it can be woken up to send more data. Note that
166827852ebeSDavid van Moolenbroek * the event will never be processed immediately.
166927852ebeSDavid van Moolenbroek */
167027852ebeSDavid van Moolenbroek if (uds_is_connected(uds)) {
167127852ebeSDavid van Moolenbroek assert(uds_get_type(uds) != SOCK_DGRAM);
167227852ebeSDavid van Moolenbroek
167327852ebeSDavid van Moolenbroek conn = uds->uds_conn;
167427852ebeSDavid van Moolenbroek
167527852ebeSDavid van Moolenbroek if (!uds_is_shutdown(conn, SFL_SHUT_WR))
167627852ebeSDavid van Moolenbroek sockevent_raise(&conn->uds_sock, SEV_SEND);
167727852ebeSDavid van Moolenbroek }
167827852ebeSDavid van Moolenbroek }
167927852ebeSDavid van Moolenbroek
168027852ebeSDavid van Moolenbroek /*
168127852ebeSDavid van Moolenbroek * Process a receive request. Return OK if the receive request has completed
168227852ebeSDavid van Moolenbroek * successfully, SUSPEND if it should be tried again later, SOCKEVENT_EOF if an
168327852ebeSDavid van Moolenbroek * end-of-file condition is reached, or a negative error code on failure. In
168427852ebeSDavid van Moolenbroek * all cases, the values of 'off' and 'ctl_off' must be updated if any progress
168527852ebeSDavid van Moolenbroek * has been made; if either is non-zero, libsockevent will return the partial
168627852ebeSDavid van Moolenbroek * progress rather than an error code or EOF.
168727852ebeSDavid van Moolenbroek */
168827852ebeSDavid van Moolenbroek int
uds_recv(struct sock * sock,const struct sockdriver_data * data,size_t len,size_t * off,const struct sockdriver_data * ctl,socklen_t ctl_len,socklen_t * ctl_off,struct sockaddr * addr,socklen_t * addr_len,endpoint_t user_endpt,int flags,size_t min,int * rflags)168927852ebeSDavid van Moolenbroek uds_recv(struct sock * sock, const struct sockdriver_data * data, size_t len,
169027852ebeSDavid van Moolenbroek size_t * off, const struct sockdriver_data * ctl, socklen_t ctl_len,
169127852ebeSDavid van Moolenbroek socklen_t * ctl_off, struct sockaddr * addr, socklen_t * addr_len,
169227852ebeSDavid van Moolenbroek endpoint_t user_endpt, int flags, size_t min, int * rflags)
169327852ebeSDavid van Moolenbroek {
169427852ebeSDavid van Moolenbroek struct udssock *uds = (struct udssock *)sock;
169527852ebeSDavid van Moolenbroek size_t seglen, datalen, reslen = 0 /*gcc*/, credpos = 0 /*gcc*/;
169627852ebeSDavid van Moolenbroek unsigned int segflags;
1697*03ac74edSLionel Sambuc int r, partial, may_block = 0 /*gcc*/;
169827852ebeSDavid van Moolenbroek
169927852ebeSDavid van Moolenbroek dprintf(("UDS: recv(%d,%zu,%zu,%u,%u,0x%x)\n",
170027852ebeSDavid van Moolenbroek uds_get_id(uds), len, (off != NULL) ? *off : 0, ctl_len,
170127852ebeSDavid van Moolenbroek (ctl_off != NULL) ? *ctl_off : 0, flags));
170227852ebeSDavid van Moolenbroek
170327852ebeSDavid van Moolenbroek /*
170427852ebeSDavid van Moolenbroek * Start by testing whether anything can be received at all, or whether
170527852ebeSDavid van Moolenbroek * an error or EOF should be returned instead, or whether the receive
170627852ebeSDavid van Moolenbroek * call should be suspended until later otherwise. If no (regular or
170727852ebeSDavid van Moolenbroek * control) data can be received, or if this was a test for select,
170827852ebeSDavid van Moolenbroek * we bail out right after.
170927852ebeSDavid van Moolenbroek */
171027852ebeSDavid van Moolenbroek partial = (off != NULL && *off > 0);
171127852ebeSDavid van Moolenbroek
171227852ebeSDavid van Moolenbroek if ((r = uds_recv_test(uds, len, min, partial, &may_block)) != OK)
171327852ebeSDavid van Moolenbroek return r;
171427852ebeSDavid van Moolenbroek
171527852ebeSDavid van Moolenbroek /*
171627852ebeSDavid van Moolenbroek * Copy out regular data, if any. Do this before copying out control
171727852ebeSDavid van Moolenbroek * data, because the latter is harder to undo on failure. This data
171827852ebeSDavid van Moolenbroek * copy function returns returns OK (0) if we are to return a result of
171927852ebeSDavid van Moolenbroek * zero bytes (which is *not* EOF) to the caller without doing anything
172027852ebeSDavid van Moolenbroek * else. The function returns a nonzero positive segment length if we
172127852ebeSDavid van Moolenbroek * should carry on with the receive call (as it happens, all its other
172227852ebeSDavid van Moolenbroek * returned values may in fact be zero).
172327852ebeSDavid van Moolenbroek */
172427852ebeSDavid van Moolenbroek if ((r = uds_recv_data(uds, data, len, *off, addr, addr_len, rflags,
172527852ebeSDavid van Moolenbroek &datalen, &reslen, &segflags, &credpos)) <= 0)
172627852ebeSDavid van Moolenbroek return r;
172727852ebeSDavid van Moolenbroek seglen = (size_t)r;
172827852ebeSDavid van Moolenbroek
172927852ebeSDavid van Moolenbroek /*
173027852ebeSDavid van Moolenbroek * Copy out control data, if any: transfer and copy out records of file
173127852ebeSDavid van Moolenbroek * descriptors, and/or copy out sender credentials. This is the last
173227852ebeSDavid van Moolenbroek * part of the call that may fail.
173327852ebeSDavid van Moolenbroek */
173427852ebeSDavid van Moolenbroek if ((r = uds_recv_ctl(uds, ctl, ctl_len, ctl_off, user_endpt, flags,
173527852ebeSDavid van Moolenbroek segflags, credpos, rflags)) != OK)
173627852ebeSDavid van Moolenbroek return r;
173727852ebeSDavid van Moolenbroek
173827852ebeSDavid van Moolenbroek /*
173927852ebeSDavid van Moolenbroek * Now that the call has succeeded, move the tail of the receive
174027852ebeSDavid van Moolenbroek * buffer, unless we were merely peeking.
174127852ebeSDavid van Moolenbroek */
174227852ebeSDavid van Moolenbroek if (!(flags & MSG_PEEK))
174327852ebeSDavid van Moolenbroek uds_recv_advance(uds, seglen, datalen, reslen, segflags,
174427852ebeSDavid van Moolenbroek &may_block);
174527852ebeSDavid van Moolenbroek else
174627852ebeSDavid van Moolenbroek may_block = FALSE;
174727852ebeSDavid van Moolenbroek
174827852ebeSDavid van Moolenbroek /*
174927852ebeSDavid van Moolenbroek * If the MSG_WAITALL flag was given, we may still have to suspend the
175027852ebeSDavid van Moolenbroek * call after partial success. In particular, the receive call may
175127852ebeSDavid van Moolenbroek * suspend after partial success if all of these conditions are met:
175227852ebeSDavid van Moolenbroek *
175327852ebeSDavid van Moolenbroek * 1) the socket is a stream-type socket;
175427852ebeSDavid van Moolenbroek * 2) MSG_WAITALL is set;
175527852ebeSDavid van Moolenbroek * 3) MSG_PEEK is not set;
175627852ebeSDavid van Moolenbroek * 4) MSG_DONTWAIT is not set (tested upon return);
175727852ebeSDavid van Moolenbroek * 5) the socket must not have a pending error (tested upon return);
175827852ebeSDavid van Moolenbroek * 6) the socket must not be shut down for reading (tested later);
175927852ebeSDavid van Moolenbroek * 7) the socket must still be connected to a peer (no EOF);
176027852ebeSDavid van Moolenbroek * 8) the peer must not have been shut down for writing (no EOF);
176127852ebeSDavid van Moolenbroek * 9) the next segment, if any, contains no ancillary data.
176227852ebeSDavid van Moolenbroek *
176327852ebeSDavid van Moolenbroek * Together, these points guarantee that the call could conceivably
176427852ebeSDavid van Moolenbroek * receive more after being resumed. Points 4 to 6 are covered by
176527852ebeSDavid van Moolenbroek * libsockevent, which will end the call even if we return SUSPEND
176627852ebeSDavid van Moolenbroek * here. Due to segment merging, we cover point 9 by checking that
176727852ebeSDavid van Moolenbroek * there is currently no next segment at all. Once a new segment
176827852ebeSDavid van Moolenbroek * arrives, the ancillary-data test is done then.
176927852ebeSDavid van Moolenbroek */
177027852ebeSDavid van Moolenbroek *off += reslen;
177127852ebeSDavid van Moolenbroek if ((flags & MSG_WAITALL) && reslen < len && may_block)
177227852ebeSDavid van Moolenbroek return SUSPEND;
177327852ebeSDavid van Moolenbroek else
177427852ebeSDavid van Moolenbroek return OK;
177527852ebeSDavid van Moolenbroek }
177627852ebeSDavid van Moolenbroek
177727852ebeSDavid van Moolenbroek /*
177827852ebeSDavid van Moolenbroek * Test whether a receive request would block. The given 'min' parameter
177927852ebeSDavid van Moolenbroek * contains the minimum number of bytes that should be possible to receive
178027852ebeSDavid van Moolenbroek * without blocking (the low receive watermark). Return SUSPEND if the send
178127852ebeSDavid van Moolenbroek * request would block. Otherwise, return any other error code (including OK
178227852ebeSDavid van Moolenbroek * or SOCKEVENT_EOF), and if 'size' is not a NULL pointer, it should be filled
178327852ebeSDavid van Moolenbroek * with the number of bytes available for receipt right now (if not zero).
178427852ebeSDavid van Moolenbroek * Note that if 'size' is not NULL, 'min' will always be zero.
178527852ebeSDavid van Moolenbroek */
178627852ebeSDavid van Moolenbroek int
uds_test_recv(struct sock * sock,size_t min,size_t * size)178727852ebeSDavid van Moolenbroek uds_test_recv(struct sock * sock, size_t min, size_t * size)
178827852ebeSDavid van Moolenbroek {
178927852ebeSDavid van Moolenbroek struct udssock *uds = (struct udssock *)sock;
179027852ebeSDavid van Moolenbroek size_t seglen;
179127852ebeSDavid van Moolenbroek unsigned int segflags;
179227852ebeSDavid van Moolenbroek int r;
179327852ebeSDavid van Moolenbroek
179427852ebeSDavid van Moolenbroek if ((r = uds_recv_test(uds, min, min, FALSE /*partial*/,
179527852ebeSDavid van Moolenbroek NULL /*may_block*/)) == SUSPEND)
179627852ebeSDavid van Moolenbroek return r;
179727852ebeSDavid van Moolenbroek
179827852ebeSDavid van Moolenbroek if (size != NULL && uds->uds_len > 0)
179927852ebeSDavid van Moolenbroek (void)uds_fetch_hdr(uds, uds->uds_tail, &seglen, size,
180027852ebeSDavid van Moolenbroek &segflags);
180127852ebeSDavid van Moolenbroek
180227852ebeSDavid van Moolenbroek return r;
180327852ebeSDavid van Moolenbroek }
1804