15185a700Sflorian /*
25185a700Sflorian * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
35185a700Sflorian *
45185a700Sflorian * Permission to use, copy, modify, and/or distribute this software for any
55185a700Sflorian * purpose with or without fee is hereby granted, provided that the above
65185a700Sflorian * copyright notice and this permission notice appear in all copies.
75185a700Sflorian *
85185a700Sflorian * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
95185a700Sflorian * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
105185a700Sflorian * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
115185a700Sflorian * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
125185a700Sflorian * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
135185a700Sflorian * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
145185a700Sflorian * PERFORMANCE OF THIS SOFTWARE.
155185a700Sflorian */
165185a700Sflorian
175185a700Sflorian /*! \file */
185185a700Sflorian
195185a700Sflorian #include <sys/socket.h>
205185a700Sflorian #include <sys/time.h>
215185a700Sflorian #include <sys/uio.h>
225185a700Sflorian
235185a700Sflorian #include <netinet/tcp.h>
245185a700Sflorian
255185a700Sflorian #include <errno.h>
265185a700Sflorian #include <fcntl.h>
275185a700Sflorian #include <stddef.h>
285185a700Sflorian #include <stdlib.h>
295185a700Sflorian #include <string.h>
305185a700Sflorian #include <unistd.h>
315185a700Sflorian
325185a700Sflorian #include <isc/buffer.h>
335185a700Sflorian #include <isc/bufferlist.h>
3487f06ebfSflorian
355185a700Sflorian #include <isc/list.h>
365185a700Sflorian #include <isc/log.h>
375185a700Sflorian #include <isc/region.h>
385185a700Sflorian #include <isc/socket.h>
395185a700Sflorian #include <isc/task.h>
405185a700Sflorian #include <isc/util.h>
415185a700Sflorian
425185a700Sflorian #include "errno2result.h"
435185a700Sflorian
445185a700Sflorian #include "socket_p.h"
455185a700Sflorian #include "../task_p.h"
465185a700Sflorian
475185a700Sflorian struct isc_socketwait {
485185a700Sflorian fd_set *readset;
495185a700Sflorian fd_set *writeset;
505185a700Sflorian int nfds;
515185a700Sflorian int maxfd;
525185a700Sflorian };
535185a700Sflorian
545185a700Sflorian /*
555185a700Sflorian * Set by the -T dscp option on the command line. If set to a value
565185a700Sflorian * other than -1, we check to make sure DSCP values match it, and
575185a700Sflorian * assert if not.
585185a700Sflorian */
595185a700Sflorian int isc_dscp_check_value = -1;
605185a700Sflorian
615185a700Sflorian /*%
625185a700Sflorian * Some systems define the socket length argument as an int, some as size_t,
635185a700Sflorian * some as socklen_t. This is here so it can be easily changed if needed.
645185a700Sflorian */
655185a700Sflorian
665185a700Sflorian /*%
675185a700Sflorian * Define what the possible "soft" errors can be. These are non-fatal returns
685185a700Sflorian * of various network related functions, like recv() and so on.
695185a700Sflorian *
705185a700Sflorian * For some reason, BSDI (and perhaps others) will sometimes return <0
715185a700Sflorian * from recv() but will have errno==0. This is broken, but we have to
725185a700Sflorian * work around it here.
735185a700Sflorian */
745185a700Sflorian #define SOFT_ERROR(e) ((e) == EAGAIN || \
755185a700Sflorian (e) == EWOULDBLOCK || \
765185a700Sflorian (e) == EINTR || \
775185a700Sflorian (e) == 0)
785185a700Sflorian
795185a700Sflorian #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
805185a700Sflorian
815185a700Sflorian /*!<
825185a700Sflorian * DLVL(90) -- Function entry/exit and other tracing.
835185a700Sflorian * DLVL(60) -- Socket data send/receive
845185a700Sflorian * DLVL(50) -- Event tracing, including receiving/sending completion events.
855185a700Sflorian * DLVL(20) -- Socket creation/destruction.
865185a700Sflorian */
875185a700Sflorian #define TRACE_LEVEL 90
885185a700Sflorian #define IOEVENT_LEVEL 60
895185a700Sflorian #define EVENT_LEVEL 50
905185a700Sflorian #define CREATION_LEVEL 20
915185a700Sflorian
925185a700Sflorian #define TRACE DLVL(TRACE_LEVEL)
935185a700Sflorian #define IOEVENT DLVL(IOEVENT_LEVEL)
945185a700Sflorian #define EVENT DLVL(EVENT_LEVEL)
955185a700Sflorian #define CREATION DLVL(CREATION_LEVEL)
965185a700Sflorian
975185a700Sflorian typedef isc_event_t intev_t;
985185a700Sflorian
995185a700Sflorian /*!
1005185a700Sflorian * IPv6 control information. If the socket is an IPv6 socket we want
1015185a700Sflorian * to collect the destination address and interface so the client can
1025185a700Sflorian * set them on outgoing packets.
1035185a700Sflorian */
1045185a700Sflorian
1055185a700Sflorian /*%
1065185a700Sflorian * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
1075185a700Sflorian * a setsockopt() like interface to request timestamps, and if the OS
1085185a700Sflorian * doesn't do it for us, call gettimeofday() on every UDP receive?
1095185a700Sflorian */
1105185a700Sflorian
1115185a700Sflorian /*%
1125185a700Sflorian * Instead of calculating the cmsgbuf lengths every time we take
1135185a700Sflorian * a rule of thumb approach - sizes are taken from x86_64 linux,
1145185a700Sflorian * multiplied by 2, everything should fit. Those sizes are not
1155185a700Sflorian * large enough to cause any concern.
1165185a700Sflorian */
1175185a700Sflorian #define CMSG_SP_IN6PKT 40
1185185a700Sflorian
1195185a700Sflorian #define CMSG_SP_TIMESTAMP 32
1205185a700Sflorian
1215185a700Sflorian #define CMSG_SP_TCTOS 24
1225185a700Sflorian
1235185a700Sflorian #define CMSG_SP_INT 24
1245185a700Sflorian
1255185a700Sflorian #define RECVCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_TIMESTAMP + CMSG_SP_TCTOS)+1)
1265185a700Sflorian #define SENDCMSGBUFLEN (2*(CMSG_SP_IN6PKT + CMSG_SP_INT + CMSG_SP_TCTOS)+1)
1275185a700Sflorian
1285185a700Sflorian /*%
1295185a700Sflorian * The number of times a send operation is repeated if the result is EINTR.
1305185a700Sflorian */
1315185a700Sflorian #define NRETRIES 10
1325185a700Sflorian
1338b553854Sflorian struct isc_socket {
1345185a700Sflorian /* Not locked. */
1358b553854Sflorian isc_socketmgr_t *manager;
1365185a700Sflorian isc_sockettype_t type;
1375185a700Sflorian
1385185a700Sflorian /* Locked by socket lock. */
1398b553854Sflorian ISC_LINK(isc_socket_t) link;
1405185a700Sflorian unsigned int references;
1415185a700Sflorian int fd;
1425185a700Sflorian int pf;
1435185a700Sflorian
1445185a700Sflorian ISC_LIST(isc_socketevent_t) send_list;
1455185a700Sflorian ISC_LIST(isc_socketevent_t) recv_list;
1465185a700Sflorian isc_socket_connev_t *connect_ev;
1475185a700Sflorian
1485185a700Sflorian /*
1495185a700Sflorian * Internal events. Posted when a descriptor is readable or
1505185a700Sflorian * writable. These are statically allocated and never freed.
1515185a700Sflorian * They will be set to non-purgable before use.
1525185a700Sflorian */
1535185a700Sflorian intev_t readable_ev;
1545185a700Sflorian intev_t writable_ev;
1555185a700Sflorian
156*b1a294b5Sflorian struct sockaddr_storage peer_address; /* remote address */
1575185a700Sflorian
1585185a700Sflorian unsigned int pending_recv : 1,
1595185a700Sflorian pending_send : 1,
1605185a700Sflorian connected : 1,
1615185a700Sflorian connecting : 1, /* connect pending */
1625185a700Sflorian bound : 1, /* bound to local addr */
1635185a700Sflorian active : 1, /* currently active */
1645185a700Sflorian pktdscp : 1; /* per packet dscp */
1655185a700Sflorian unsigned int dscp;
1665185a700Sflorian };
1675185a700Sflorian
1688b553854Sflorian struct isc_socketmgr {
1695185a700Sflorian /* Not locked. */
1705185a700Sflorian int fd_bufsize;
1715185a700Sflorian unsigned int maxsocks;
1725185a700Sflorian
1738b553854Sflorian isc_socket_t **fds;
1745185a700Sflorian int *fdstate;
1755185a700Sflorian
1765185a700Sflorian /* Locked by manager lock. */
1778b553854Sflorian ISC_LIST(isc_socket_t) socklist;
1785185a700Sflorian fd_set *read_fds;
1795185a700Sflorian fd_set *read_fds_copy;
1805185a700Sflorian fd_set *write_fds;
1815185a700Sflorian fd_set *write_fds_copy;
1825185a700Sflorian int maxfd;
1835185a700Sflorian unsigned int refs;
1845185a700Sflorian };
1855185a700Sflorian
1868b553854Sflorian static isc_socketmgr_t *socketmgr = NULL;
1875185a700Sflorian
1885185a700Sflorian #define CLOSED 0 /* this one must be zero */
1895185a700Sflorian #define MANAGED 1
1905185a700Sflorian #define CLOSE_PENDING 2
1915185a700Sflorian
1925185a700Sflorian /*
1935185a700Sflorian * send() and recv() iovec counts
1945185a700Sflorian */
1955185a700Sflorian #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
1965185a700Sflorian #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
1975185a700Sflorian
1985185a700Sflorian static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
1995185a700Sflorian isc_sockettype_t type,
2005185a700Sflorian isc_socket_t **socketp);
2018b553854Sflorian static void send_recvdone_event(isc_socket_t *, isc_socketevent_t **);
2028b553854Sflorian static void send_senddone_event(isc_socket_t *, isc_socketevent_t **);
2038b553854Sflorian static void free_socket(isc_socket_t **);
2048b553854Sflorian static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
2058b553854Sflorian isc_socket_t **);
2068b553854Sflorian static void destroy(isc_socket_t **);
2075185a700Sflorian static void internal_connect(isc_task_t *, isc_event_t *);
2085185a700Sflorian static void internal_recv(isc_task_t *, isc_event_t *);
2095185a700Sflorian static void internal_send(isc_task_t *, isc_event_t *);
2108b553854Sflorian static void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);
2118b553854Sflorian static void build_msghdr_send(isc_socket_t *, char *, isc_socketevent_t *,
2125185a700Sflorian struct msghdr *, struct iovec *, size_t *);
2138b553854Sflorian static void build_msghdr_recv(isc_socket_t *, char *, isc_socketevent_t *,
2145185a700Sflorian struct msghdr *, struct iovec *, size_t *);
2155185a700Sflorian
2165185a700Sflorian #define SELECT_POKE_SHUTDOWN (-1)
2175185a700Sflorian #define SELECT_POKE_READ (-3)
2185185a700Sflorian #define SELECT_POKE_WRITE (-4)
2195185a700Sflorian #define SELECT_POKE_CONNECT (-4) /*%< Same as _WRITE */
2205185a700Sflorian #define SELECT_POKE_CLOSE (-5)
2215185a700Sflorian
2225185a700Sflorian #define SOCK_DEAD(s) ((s)->references == 0)
2235185a700Sflorian
2245185a700Sflorian /*%
2255185a700Sflorian * Shortcut index arrays to get access to statistics counters.
2265185a700Sflorian */
2275185a700Sflorian enum {
2285185a700Sflorian STATID_OPEN = 0,
2295185a700Sflorian STATID_OPENFAIL = 1,
2305185a700Sflorian STATID_CLOSE = 2,
2315185a700Sflorian STATID_BINDFAIL = 3,
2325185a700Sflorian STATID_CONNECTFAIL = 4,
2335185a700Sflorian STATID_CONNECT = 5,
2345185a700Sflorian STATID_ACCEPTFAIL = 6,
2355185a700Sflorian STATID_ACCEPT = 7,
2365185a700Sflorian STATID_SENDFAIL = 8,
2375185a700Sflorian STATID_RECVFAIL = 9,
2385185a700Sflorian STATID_ACTIVE = 10
2395185a700Sflorian };
2405185a700Sflorian
2415185a700Sflorian static void
242*b1a294b5Sflorian socket_log(isc_socket_t *sock, struct sockaddr_storage *address,
2435185a700Sflorian isc_logcategory_t *category, isc_logmodule_t *module, int level,
24487f06ebfSflorian const char *fmt, ...) __attribute__((__format__(__printf__, 6, 7)));
2455185a700Sflorian static void
socket_log(isc_socket_t * sock,struct sockaddr_storage * address,isc_logcategory_t * category,isc_logmodule_t * module,int level,const char * fmt,...)246*b1a294b5Sflorian socket_log(isc_socket_t *sock, struct sockaddr_storage *address,
2475185a700Sflorian isc_logcategory_t *category, isc_logmodule_t *module, int level,
2485185a700Sflorian const char *fmt, ...)
2495185a700Sflorian {
2505185a700Sflorian char msgbuf[2048];
2515185a700Sflorian char peerbuf[ISC_SOCKADDR_FORMATSIZE];
2525185a700Sflorian va_list ap;
2535185a700Sflorian
2545185a700Sflorian if (! isc_log_wouldlog(isc_lctx, level))
2555185a700Sflorian return;
2565185a700Sflorian
2575185a700Sflorian va_start(ap, fmt);
2585185a700Sflorian vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
2595185a700Sflorian va_end(ap);
2605185a700Sflorian
2615185a700Sflorian if (address == NULL) {
2625185a700Sflorian isc_log_write(isc_lctx, category, module, level,
2635185a700Sflorian "socket %p: %s", sock, msgbuf);
2645185a700Sflorian } else {
2655185a700Sflorian isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
2665185a700Sflorian isc_log_write(isc_lctx, category, module, level,
2675185a700Sflorian "socket %p %s: %s", sock, peerbuf, msgbuf);
2685185a700Sflorian }
2695185a700Sflorian }
2705185a700Sflorian
2715185a700Sflorian static inline isc_result_t
watch_fd(isc_socketmgr_t * manager,int fd,int msg)2728b553854Sflorian watch_fd(isc_socketmgr_t *manager, int fd, int msg) {
2735185a700Sflorian isc_result_t result = ISC_R_SUCCESS;
2745185a700Sflorian
2755185a700Sflorian if (msg == SELECT_POKE_READ)
2765185a700Sflorian FD_SET(fd, manager->read_fds);
2775185a700Sflorian if (msg == SELECT_POKE_WRITE)
2785185a700Sflorian FD_SET(fd, manager->write_fds);
2795185a700Sflorian
2805185a700Sflorian return (result);
2815185a700Sflorian }
2825185a700Sflorian
2835185a700Sflorian static inline isc_result_t
unwatch_fd(isc_socketmgr_t * manager,int fd,int msg)2848b553854Sflorian unwatch_fd(isc_socketmgr_t *manager, int fd, int msg) {
2855185a700Sflorian isc_result_t result = ISC_R_SUCCESS;
2865185a700Sflorian
2875185a700Sflorian if (msg == SELECT_POKE_READ)
2885185a700Sflorian FD_CLR(fd, manager->read_fds);
2895185a700Sflorian else if (msg == SELECT_POKE_WRITE)
2905185a700Sflorian FD_CLR(fd, manager->write_fds);
2915185a700Sflorian
2925185a700Sflorian return (result);
2935185a700Sflorian }
2945185a700Sflorian
2955185a700Sflorian static void
wakeup_socket(isc_socketmgr_t * manager,int fd,int msg)2968b553854Sflorian wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
2975185a700Sflorian isc_result_t result;
2985185a700Sflorian
2995185a700Sflorian /*
3005185a700Sflorian * This is a wakeup on a socket. If the socket is not in the
3015185a700Sflorian * process of being closed, start watching it for either reads
3025185a700Sflorian * or writes.
3035185a700Sflorian */
3045185a700Sflorian
3055185a700Sflorian INSIST(fd >= 0 && fd < (int)manager->maxsocks);
3065185a700Sflorian
3075185a700Sflorian if (msg == SELECT_POKE_CLOSE) {
3085185a700Sflorian /* No one should be updating fdstate, so no need to lock it */
3095185a700Sflorian INSIST(manager->fdstate[fd] == CLOSE_PENDING);
3105185a700Sflorian manager->fdstate[fd] = CLOSED;
3115185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_READ);
3125185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
3135185a700Sflorian (void)close(fd);
3145185a700Sflorian return;
3155185a700Sflorian }
3165185a700Sflorian
3175185a700Sflorian if (manager->fdstate[fd] == CLOSE_PENDING) {
3185185a700Sflorian
3195185a700Sflorian /*
3205185a700Sflorian * We accept (and ignore) any error from unwatch_fd() as we are
3215185a700Sflorian * closing the socket, hoping it doesn't leave dangling state in
3225185a700Sflorian * the kernel.
3235185a700Sflorian */
3245185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_READ);
3255185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
3265185a700Sflorian return;
3275185a700Sflorian }
3285185a700Sflorian if (manager->fdstate[fd] != MANAGED) {
3295185a700Sflorian return;
3305185a700Sflorian }
3315185a700Sflorian
3325185a700Sflorian /*
3335185a700Sflorian * Set requested bit.
3345185a700Sflorian */
3355185a700Sflorian result = watch_fd(manager, fd, msg);
3365185a700Sflorian if (result != ISC_R_SUCCESS) {
3375185a700Sflorian /*
3385185a700Sflorian * XXXJT: what should we do? Ignoring the failure of watching
3395185a700Sflorian * a socket will make the application dysfunctional, but there
3405185a700Sflorian * seems to be no reasonable recovery process.
3415185a700Sflorian */
3425185a700Sflorian isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
3435185a700Sflorian ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
3445185a700Sflorian "failed to start watching FD (%d): %s",
3455185a700Sflorian fd, isc_result_totext(result));
3465185a700Sflorian }
3475185a700Sflorian }
3485185a700Sflorian
3495185a700Sflorian /*
3505185a700Sflorian * Update the state of the socketmgr when something changes.
3515185a700Sflorian */
3525185a700Sflorian static void
select_poke(isc_socketmgr_t * manager,int fd,int msg)3538b553854Sflorian select_poke(isc_socketmgr_t *manager, int fd, int msg) {
3545185a700Sflorian if (msg == SELECT_POKE_SHUTDOWN)
3555185a700Sflorian return;
3565185a700Sflorian else if (fd >= 0)
3575185a700Sflorian wakeup_socket(manager, fd, msg);
3585185a700Sflorian return;
3595185a700Sflorian }
3605185a700Sflorian
3615185a700Sflorian /*
3625185a700Sflorian * Make a fd non-blocking.
3635185a700Sflorian */
3645185a700Sflorian static isc_result_t
make_nonblock(int fd)3655185a700Sflorian make_nonblock(int fd) {
3665185a700Sflorian int ret;
3675185a700Sflorian int flags;
3685185a700Sflorian
3695185a700Sflorian flags = fcntl(fd, F_GETFL, 0);
3705185a700Sflorian flags |= O_NONBLOCK;
3715185a700Sflorian ret = fcntl(fd, F_SETFL, flags);
3725185a700Sflorian
3735185a700Sflorian if (ret == -1) {
3745185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
3755185a700Sflorian "fcntl(%d, F_SETFL, %d): %s", fd, flags,
37640adc7c5Sjung strerror(errno));
3775185a700Sflorian return (ISC_R_UNEXPECTED);
3785185a700Sflorian }
3795185a700Sflorian
3805185a700Sflorian return (ISC_R_SUCCESS);
3815185a700Sflorian }
3825185a700Sflorian
3835185a700Sflorian /*
3845185a700Sflorian * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE.
3855185a700Sflorian * In order to ensure as much portability as possible, we provide wrapper
3865185a700Sflorian * functions of these macros.
3875185a700Sflorian * Note that cmsg_space() could run slow on OSes that do not have
3885185a700Sflorian * CMSG_SPACE.
3895185a700Sflorian */
3905185a700Sflorian static inline socklen_t
cmsg_len(socklen_t len)3915185a700Sflorian cmsg_len(socklen_t len) {
3925185a700Sflorian return (CMSG_LEN(len));
3935185a700Sflorian }
3945185a700Sflorian
3955185a700Sflorian static inline socklen_t
cmsg_space(socklen_t len)3965185a700Sflorian cmsg_space(socklen_t len) {
3975185a700Sflorian return (CMSG_SPACE(len));
3985185a700Sflorian }
3995185a700Sflorian
4005185a700Sflorian /*
4015185a700Sflorian * Process control messages received on a socket.
4025185a700Sflorian */
4035185a700Sflorian static void
process_cmsg(isc_socket_t * sock,struct msghdr * msg,isc_socketevent_t * dev)4048b553854Sflorian process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) {
4055185a700Sflorian struct cmsghdr *cmsgp;
4065185a700Sflorian struct in6_pktinfo *pktinfop;
4075185a700Sflorian void *timevalp;
4085185a700Sflorian
4095185a700Sflorian /*
4105185a700Sflorian * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
4115185a700Sflorian * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
4125185a700Sflorian * They are all here, outside of the CPP tests, because it is
4135185a700Sflorian * more consistent with the usual ISC coding style.
4145185a700Sflorian */
4155185a700Sflorian UNUSED(sock);
4165185a700Sflorian UNUSED(msg);
4175185a700Sflorian UNUSED(dev);
4185185a700Sflorian
4195185a700Sflorian if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC)
4205185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
4215185a700Sflorian
4225185a700Sflorian if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
4235185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC;
4245185a700Sflorian
4255185a700Sflorian if (msg->msg_controllen == 0U || msg->msg_control == NULL)
4265185a700Sflorian return;
4275185a700Sflorian
4285185a700Sflorian timevalp = NULL;
4295185a700Sflorian pktinfop = NULL;
4305185a700Sflorian
4315185a700Sflorian cmsgp = CMSG_FIRSTHDR(msg);
4325185a700Sflorian while (cmsgp != NULL) {
4335185a700Sflorian socket_log(sock, NULL, TRACE,
4345185a700Sflorian "processing cmsg %p", cmsgp);
4355185a700Sflorian
4365185a700Sflorian if (cmsgp->cmsg_level == IPPROTO_IPV6
4375185a700Sflorian && cmsgp->cmsg_type == IPV6_PKTINFO) {
4385185a700Sflorian
4395185a700Sflorian pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
4405185a700Sflorian memmove(&dev->pktinfo, pktinfop,
4415185a700Sflorian sizeof(struct in6_pktinfo));
4425185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
4435185a700Sflorian socket_log(sock, NULL, TRACE,
4445185a700Sflorian "interface received on ifindex %u",
4455185a700Sflorian dev->pktinfo.ipi6_ifindex);
4465185a700Sflorian if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr))
4475185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST;
4485185a700Sflorian goto next;
4495185a700Sflorian }
4505185a700Sflorian
4515185a700Sflorian if (cmsgp->cmsg_level == SOL_SOCKET
4525185a700Sflorian && cmsgp->cmsg_type == SCM_TIMESTAMP) {
4535185a700Sflorian struct timeval tv;
4545185a700Sflorian timevalp = CMSG_DATA(cmsgp);
4555185a700Sflorian memmove(&tv, timevalp, sizeof(tv));
4567238a213Sflorian TIMEVAL_TO_TIMESPEC(&tv, &dev->timestamp);
4575185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP;
4585185a700Sflorian goto next;
4595185a700Sflorian }
4605185a700Sflorian
4615185a700Sflorian if (cmsgp->cmsg_level == IPPROTO_IPV6
4625185a700Sflorian && cmsgp->cmsg_type == IPV6_TCLASS) {
4635185a700Sflorian dev->dscp = *(int *)CMSG_DATA(cmsgp);
4645185a700Sflorian dev->dscp >>= 2;
4655185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_DSCP;
4665185a700Sflorian goto next;
4675185a700Sflorian }
4685185a700Sflorian
4695185a700Sflorian if (cmsgp->cmsg_level == IPPROTO_IP
4705185a700Sflorian && (cmsgp->cmsg_type == IP_TOS)) {
4715185a700Sflorian dev->dscp = (int) *(unsigned char *)CMSG_DATA(cmsgp);
4725185a700Sflorian dev->dscp >>= 2;
4735185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_DSCP;
4745185a700Sflorian goto next;
4755185a700Sflorian }
4765185a700Sflorian next:
4775185a700Sflorian cmsgp = CMSG_NXTHDR(msg, cmsgp);
4785185a700Sflorian }
4795185a700Sflorian
4805185a700Sflorian }
4815185a700Sflorian
4825185a700Sflorian /*
4835185a700Sflorian * Construct an iov array and attach it to the msghdr passed in. This is
4845185a700Sflorian * the SEND constructor, which will use the used region of the buffer
4855185a700Sflorian * (if using a buffer list) or will use the internal region (if a single
4865185a700Sflorian * buffer I/O is requested).
4875185a700Sflorian *
4885185a700Sflorian * Nothing can be NULL, and the done event must list at least one buffer
4895185a700Sflorian * on the buffer linked list for this function to be meaningful.
4905185a700Sflorian *
4915185a700Sflorian * If write_countp != NULL, *write_countp will hold the number of bytes
4925185a700Sflorian * this transaction can send.
4935185a700Sflorian */
4945185a700Sflorian static void
build_msghdr_send(isc_socket_t * sock,char * cmsgbuf,isc_socketevent_t * dev,struct msghdr * msg,struct iovec * iov,size_t * write_countp)4958b553854Sflorian build_msghdr_send(isc_socket_t *sock, char* cmsgbuf, isc_socketevent_t *dev,
4965185a700Sflorian struct msghdr *msg, struct iovec *iov, size_t *write_countp)
4975185a700Sflorian {
4985185a700Sflorian unsigned int iovcount;
4995185a700Sflorian isc_buffer_t *buffer;
5005185a700Sflorian isc_region_t used;
5015185a700Sflorian size_t write_count;
5025185a700Sflorian size_t skip_count;
5035185a700Sflorian struct cmsghdr *cmsgp;
5045185a700Sflorian
5055185a700Sflorian memset(msg, 0, sizeof(*msg));
5065185a700Sflorian
5075185a700Sflorian if (!sock->connected) {
508*b1a294b5Sflorian msg->msg_name = (void *)&dev->address;
509*b1a294b5Sflorian msg->msg_namelen = dev->address.ss_len;
5105185a700Sflorian } else {
5115185a700Sflorian msg->msg_name = NULL;
5125185a700Sflorian msg->msg_namelen = 0;
5135185a700Sflorian }
5145185a700Sflorian
5155185a700Sflorian buffer = ISC_LIST_HEAD(dev->bufferlist);
5165185a700Sflorian write_count = 0;
5175185a700Sflorian iovcount = 0;
5185185a700Sflorian
5195185a700Sflorian /*
5205185a700Sflorian * Single buffer I/O? Skip what we've done so far in this region.
5215185a700Sflorian */
5225185a700Sflorian if (buffer == NULL) {
5235185a700Sflorian write_count = dev->region.length - dev->n;
5245185a700Sflorian iov[0].iov_base = (void *)(dev->region.base + dev->n);
5255185a700Sflorian iov[0].iov_len = write_count;
5265185a700Sflorian iovcount = 1;
5275185a700Sflorian
5285185a700Sflorian goto config;
5295185a700Sflorian }
5305185a700Sflorian
5315185a700Sflorian /*
5325185a700Sflorian * Multibuffer I/O.
5335185a700Sflorian * Skip the data in the buffer list that we have already written.
5345185a700Sflorian */
5355185a700Sflorian skip_count = dev->n;
5365185a700Sflorian while (buffer != NULL) {
5375185a700Sflorian if (skip_count < isc_buffer_usedlength(buffer))
5385185a700Sflorian break;
5395185a700Sflorian skip_count -= isc_buffer_usedlength(buffer);
5405185a700Sflorian buffer = ISC_LIST_NEXT(buffer, link);
5415185a700Sflorian }
5425185a700Sflorian
5435185a700Sflorian while (buffer != NULL) {
5445185a700Sflorian INSIST(iovcount < MAXSCATTERGATHER_SEND);
5455185a700Sflorian
5465185a700Sflorian isc_buffer_usedregion(buffer, &used);
5475185a700Sflorian
5485185a700Sflorian if (used.length > 0) {
5495185a700Sflorian iov[iovcount].iov_base = (void *)(used.base
5505185a700Sflorian + skip_count);
5515185a700Sflorian iov[iovcount].iov_len = used.length - skip_count;
5525185a700Sflorian write_count += (used.length - skip_count);
5535185a700Sflorian skip_count = 0;
5545185a700Sflorian iovcount++;
5555185a700Sflorian }
5565185a700Sflorian buffer = ISC_LIST_NEXT(buffer, link);
5575185a700Sflorian }
5585185a700Sflorian
5595185a700Sflorian INSIST(skip_count == 0U);
5605185a700Sflorian
5615185a700Sflorian config:
5625185a700Sflorian msg->msg_iov = iov;
5635185a700Sflorian msg->msg_iovlen = iovcount;
5645185a700Sflorian
5655185a700Sflorian msg->msg_control = NULL;
5665185a700Sflorian msg->msg_controllen = 0;
5675185a700Sflorian msg->msg_flags = 0;
5685185a700Sflorian
5695185a700Sflorian if ((sock->type == isc_sockettype_udp) &&
5705185a700Sflorian ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0))
5715185a700Sflorian {
5725185a700Sflorian struct in6_pktinfo *pktinfop;
5735185a700Sflorian
5745185a700Sflorian socket_log(sock, NULL, TRACE,
5755185a700Sflorian "sendto pktinfo data, ifindex %u",
5765185a700Sflorian dev->pktinfo.ipi6_ifindex);
5775185a700Sflorian
5785185a700Sflorian msg->msg_control = (void *)cmsgbuf;
5795185a700Sflorian msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo));
5805185a700Sflorian INSIST(msg->msg_controllen <= SENDCMSGBUFLEN);
5815185a700Sflorian
5825185a700Sflorian cmsgp = (struct cmsghdr *)cmsgbuf;
5835185a700Sflorian cmsgp->cmsg_level = IPPROTO_IPV6;
5845185a700Sflorian cmsgp->cmsg_type = IPV6_PKTINFO;
5855185a700Sflorian cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo));
5865185a700Sflorian pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
5875185a700Sflorian memmove(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo));
5885185a700Sflorian }
5895185a700Sflorian
5905185a700Sflorian if ((sock->type == isc_sockettype_udp) &&
5915185a700Sflorian ((dev->attributes & ISC_SOCKEVENTATTR_USEMINMTU) != 0))
5925185a700Sflorian {
5935185a700Sflorian int use_min_mtu = 1; /* -1, 0, 1 */
5945185a700Sflorian
5955185a700Sflorian cmsgp = (struct cmsghdr *)(cmsgbuf +
5965185a700Sflorian msg->msg_controllen);
5975185a700Sflorian
5985185a700Sflorian msg->msg_control = (void *)cmsgbuf;
5995185a700Sflorian msg->msg_controllen += cmsg_space(sizeof(use_min_mtu));
6005185a700Sflorian INSIST(msg->msg_controllen <= SENDCMSGBUFLEN);
6015185a700Sflorian
6025185a700Sflorian cmsgp->cmsg_level = IPPROTO_IPV6;
6035185a700Sflorian cmsgp->cmsg_type = IPV6_USE_MIN_MTU;
6045185a700Sflorian cmsgp->cmsg_len = cmsg_len(sizeof(use_min_mtu));
6055185a700Sflorian memmove(CMSG_DATA(cmsgp), &use_min_mtu, sizeof(use_min_mtu));
6065185a700Sflorian }
6075185a700Sflorian
6085185a700Sflorian if (isc_dscp_check_value > -1) {
6095185a700Sflorian if (sock->type == isc_sockettype_udp)
6105185a700Sflorian INSIST((int)dev->dscp == isc_dscp_check_value);
6115185a700Sflorian else if (sock->type == isc_sockettype_tcp)
6125185a700Sflorian INSIST((int)sock->dscp == isc_dscp_check_value);
6135185a700Sflorian }
6145185a700Sflorian
6155185a700Sflorian if ((sock->type == isc_sockettype_udp) &&
6165185a700Sflorian ((dev->attributes & ISC_SOCKEVENTATTR_DSCP) != 0))
6175185a700Sflorian {
6185185a700Sflorian int dscp = (dev->dscp << 2) & 0xff;
6195185a700Sflorian
6205185a700Sflorian INSIST(dev->dscp < 0x40);
6215185a700Sflorian
6225185a700Sflorian if (sock->pf == AF_INET && sock->pktdscp) {
6235185a700Sflorian cmsgp = (struct cmsghdr *)(cmsgbuf +
6245185a700Sflorian msg->msg_controllen);
6255185a700Sflorian msg->msg_control = (void *)cmsgbuf;
6265185a700Sflorian msg->msg_controllen += cmsg_space(sizeof(dscp));
6275185a700Sflorian INSIST(msg->msg_controllen <= SENDCMSGBUFLEN);
6285185a700Sflorian
6295185a700Sflorian cmsgp->cmsg_level = IPPROTO_IP;
6305185a700Sflorian cmsgp->cmsg_type = IP_TOS;
6315185a700Sflorian cmsgp->cmsg_len = cmsg_len(sizeof(char));
6325185a700Sflorian *(unsigned char*)CMSG_DATA(cmsgp) = dscp;
6335185a700Sflorian } else if (sock->pf == AF_INET && sock->dscp != dev->dscp) {
6345185a700Sflorian if (setsockopt(sock->fd, IPPROTO_IP, IP_TOS,
6355185a700Sflorian (void *)&dscp, sizeof(int)) < 0)
6365185a700Sflorian {
6375185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
6385185a700Sflorian "setsockopt(%d, IP_TOS, %.02x)"
6395185a700Sflorian " %s: %s",
6405185a700Sflorian sock->fd, dscp >> 2,
64140adc7c5Sjung "failed", strerror(errno));
6425185a700Sflorian } else
6435185a700Sflorian sock->dscp = dscp;
6445185a700Sflorian }
6455185a700Sflorian
6465185a700Sflorian if (sock->pf == AF_INET6 && sock->pktdscp) {
6475185a700Sflorian cmsgp = (struct cmsghdr *)(cmsgbuf +
6485185a700Sflorian msg->msg_controllen);
6495185a700Sflorian msg->msg_control = (void *)cmsgbuf;
6505185a700Sflorian msg->msg_controllen += cmsg_space(sizeof(dscp));
6515185a700Sflorian INSIST(msg->msg_controllen <= SENDCMSGBUFLEN);
6525185a700Sflorian
6535185a700Sflorian cmsgp->cmsg_level = IPPROTO_IPV6;
6545185a700Sflorian cmsgp->cmsg_type = IPV6_TCLASS;
6555185a700Sflorian cmsgp->cmsg_len = cmsg_len(sizeof(dscp));
6565185a700Sflorian memmove(CMSG_DATA(cmsgp), &dscp, sizeof(dscp));
6575185a700Sflorian } else if (sock->pf == AF_INET6 && sock->dscp != dev->dscp) {
6585185a700Sflorian if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS,
6595185a700Sflorian (void *)&dscp, sizeof(int)) < 0) {
6605185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
6615185a700Sflorian "setsockopt(%d, IPV6_TCLASS, "
6625185a700Sflorian "%.02x) %s: %s",
6635185a700Sflorian sock->fd, dscp >> 2,
66440adc7c5Sjung "failed", strerror(errno));
6655185a700Sflorian } else
6665185a700Sflorian sock->dscp = dscp;
6675185a700Sflorian }
6685185a700Sflorian
6695185a700Sflorian if (msg->msg_controllen != 0 &&
6705185a700Sflorian msg->msg_controllen < SENDCMSGBUFLEN)
6715185a700Sflorian {
6725185a700Sflorian memset(cmsgbuf + msg->msg_controllen, 0,
6735185a700Sflorian SENDCMSGBUFLEN - msg->msg_controllen);
6745185a700Sflorian }
6755185a700Sflorian }
6765185a700Sflorian
6775185a700Sflorian if (write_countp != NULL)
6785185a700Sflorian *write_countp = write_count;
6795185a700Sflorian }
6805185a700Sflorian
6815185a700Sflorian /*
6825185a700Sflorian * Construct an iov array and attach it to the msghdr passed in. This is
6835185a700Sflorian * the RECV constructor, which will use the available region of the buffer
6845185a700Sflorian * (if using a buffer list) or will use the internal region (if a single
6855185a700Sflorian * buffer I/O is requested).
6865185a700Sflorian *
6875185a700Sflorian * Nothing can be NULL, and the done event must list at least one buffer
6885185a700Sflorian * on the buffer linked list for this function to be meaningful.
6895185a700Sflorian *
6905185a700Sflorian * If read_countp != NULL, *read_countp will hold the number of bytes
6915185a700Sflorian * this transaction can receive.
6925185a700Sflorian */
6935185a700Sflorian static void
build_msghdr_recv(isc_socket_t * sock,char * cmsgbuf,isc_socketevent_t * dev,struct msghdr * msg,struct iovec * iov,size_t * read_countp)6948b553854Sflorian build_msghdr_recv(isc_socket_t *sock, char *cmsgbuf, isc_socketevent_t *dev,
6955185a700Sflorian struct msghdr *msg, struct iovec *iov, size_t *read_countp)
6965185a700Sflorian {
6975185a700Sflorian unsigned int iovcount;
6985185a700Sflorian isc_buffer_t *buffer;
6995185a700Sflorian isc_region_t available;
7005185a700Sflorian size_t read_count;
7015185a700Sflorian
7025185a700Sflorian memset(msg, 0, sizeof(struct msghdr));
7035185a700Sflorian
7045185a700Sflorian if (sock->type == isc_sockettype_udp) {
7055185a700Sflorian memset(&dev->address, 0, sizeof(dev->address));
706*b1a294b5Sflorian msg->msg_name = (void *)&dev->address;
707*b1a294b5Sflorian msg->msg_namelen = sizeof(dev->address);
7085185a700Sflorian } else { /* TCP */
7095185a700Sflorian msg->msg_name = NULL;
7105185a700Sflorian msg->msg_namelen = 0;
7115185a700Sflorian dev->address = sock->peer_address;
7125185a700Sflorian }
7135185a700Sflorian
7145185a700Sflorian buffer = ISC_LIST_HEAD(dev->bufferlist);
7155185a700Sflorian read_count = 0;
7165185a700Sflorian
7175185a700Sflorian /*
7185185a700Sflorian * Single buffer I/O? Skip what we've done so far in this region.
7195185a700Sflorian */
7205185a700Sflorian if (buffer == NULL) {
7215185a700Sflorian read_count = dev->region.length - dev->n;
7225185a700Sflorian iov[0].iov_base = (void *)(dev->region.base + dev->n);
7235185a700Sflorian iov[0].iov_len = read_count;
7245185a700Sflorian iovcount = 1;
7255185a700Sflorian
7265185a700Sflorian goto config;
7275185a700Sflorian }
7285185a700Sflorian
7295185a700Sflorian /*
7305185a700Sflorian * Multibuffer I/O.
7315185a700Sflorian * Skip empty buffers.
7325185a700Sflorian */
7335185a700Sflorian while (buffer != NULL) {
7345185a700Sflorian if (isc_buffer_availablelength(buffer) != 0)
7355185a700Sflorian break;
7365185a700Sflorian buffer = ISC_LIST_NEXT(buffer, link);
7375185a700Sflorian }
7385185a700Sflorian
7395185a700Sflorian iovcount = 0;
7405185a700Sflorian while (buffer != NULL) {
7415185a700Sflorian INSIST(iovcount < MAXSCATTERGATHER_RECV);
7425185a700Sflorian
7435185a700Sflorian isc_buffer_availableregion(buffer, &available);
7445185a700Sflorian
7455185a700Sflorian if (available.length > 0) {
7465185a700Sflorian iov[iovcount].iov_base = (void *)(available.base);
7475185a700Sflorian iov[iovcount].iov_len = available.length;
7485185a700Sflorian read_count += available.length;
7495185a700Sflorian iovcount++;
7505185a700Sflorian }
7515185a700Sflorian buffer = ISC_LIST_NEXT(buffer, link);
7525185a700Sflorian }
7535185a700Sflorian
7545185a700Sflorian config:
7555185a700Sflorian
7565185a700Sflorian /*
7575185a700Sflorian * If needed, set up to receive that one extra byte.
7585185a700Sflorian */
7595185a700Sflorian msg->msg_iov = iov;
7605185a700Sflorian msg->msg_iovlen = iovcount;
7615185a700Sflorian
7625185a700Sflorian msg->msg_control = cmsgbuf;
7635185a700Sflorian msg->msg_controllen = RECVCMSGBUFLEN;
7645185a700Sflorian msg->msg_flags = 0;
7655185a700Sflorian
7665185a700Sflorian if (read_countp != NULL)
7675185a700Sflorian *read_countp = read_count;
7685185a700Sflorian }
7695185a700Sflorian
7705185a700Sflorian static void
set_dev_address(struct sockaddr_storage * address,isc_socket_t * sock,isc_socketevent_t * dev)771*b1a294b5Sflorian set_dev_address(struct sockaddr_storage *address, isc_socket_t *sock,
7725185a700Sflorian isc_socketevent_t *dev)
7735185a700Sflorian {
7745185a700Sflorian if (sock->type == isc_sockettype_udp) {
7755185a700Sflorian if (address != NULL)
7765185a700Sflorian dev->address = *address;
7775185a700Sflorian else
7785185a700Sflorian dev->address = sock->peer_address;
7795185a700Sflorian } else if (sock->type == isc_sockettype_tcp) {
7805185a700Sflorian INSIST(address == NULL);
7815185a700Sflorian dev->address = sock->peer_address;
7825185a700Sflorian }
7835185a700Sflorian }
7845185a700Sflorian
7855185a700Sflorian static void
destroy_socketevent(isc_event_t * event)7865185a700Sflorian destroy_socketevent(isc_event_t *event) {
7875185a700Sflorian isc_socketevent_t *ev = (isc_socketevent_t *)event;
7885185a700Sflorian
7895185a700Sflorian INSIST(ISC_LIST_EMPTY(ev->bufferlist));
7905185a700Sflorian
7915185a700Sflorian (ev->destroy)(event);
7925185a700Sflorian }
7935185a700Sflorian
7945185a700Sflorian static isc_socketevent_t *
allocate_socketevent(void * sender,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)7955185a700Sflorian allocate_socketevent(void *sender,
7965185a700Sflorian isc_eventtype_t eventtype, isc_taskaction_t action,
7975185a700Sflorian void *arg)
7985185a700Sflorian {
7995185a700Sflorian isc_socketevent_t *ev;
8005185a700Sflorian
8015185a700Sflorian ev = (isc_socketevent_t *)isc_event_allocate(sender,
8025185a700Sflorian eventtype, action, arg,
8035185a700Sflorian sizeof(*ev));
8045185a700Sflorian
8055185a700Sflorian if (ev == NULL)
8065185a700Sflorian return (NULL);
8075185a700Sflorian
8085185a700Sflorian ev->result = ISC_R_UNSET;
8095185a700Sflorian ISC_LINK_INIT(ev, ev_link);
8105185a700Sflorian ISC_LIST_INIT(ev->bufferlist);
8115185a700Sflorian ev->region.base = NULL;
8125185a700Sflorian ev->n = 0;
8135185a700Sflorian ev->offset = 0;
8145185a700Sflorian ev->attributes = 0;
8155185a700Sflorian ev->destroy = ev->ev_destroy;
8165185a700Sflorian ev->ev_destroy = destroy_socketevent;
8175185a700Sflorian ev->dscp = 0;
8185185a700Sflorian
8195185a700Sflorian return (ev);
8205185a700Sflorian }
8215185a700Sflorian
8225185a700Sflorian #define DOIO_SUCCESS 0 /* i/o ok, event sent */
8235185a700Sflorian #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
8245185a700Sflorian #define DOIO_HARD 2 /* i/o error, event sent */
8255185a700Sflorian #define DOIO_EOF 3 /* EOF, no event sent */
8265185a700Sflorian
8275185a700Sflorian static int
doio_recv(isc_socket_t * sock,isc_socketevent_t * dev)8288b553854Sflorian doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
8295185a700Sflorian int cc;
8305185a700Sflorian struct iovec iov[MAXSCATTERGATHER_RECV];
8315185a700Sflorian size_t read_count;
8325185a700Sflorian size_t actual_count;
8335185a700Sflorian struct msghdr msghdr;
8345185a700Sflorian isc_buffer_t *buffer;
8355185a700Sflorian int recv_errno;
83648050c6bSotto union {
83748050c6bSotto struct msghdr msghdr;
83848050c6bSotto char m[RECVCMSGBUFLEN];
83948050c6bSotto } cmsgbuf;
8405185a700Sflorian
84148050c6bSotto memset(&cmsgbuf, 0, sizeof(cmsgbuf));
84248050c6bSotto
84348050c6bSotto build_msghdr_recv(sock, cmsgbuf.m, dev, &msghdr, iov, &read_count);
8445185a700Sflorian
8455185a700Sflorian cc = recvmsg(sock->fd, &msghdr, 0);
8465185a700Sflorian recv_errno = errno;
8475185a700Sflorian
8485185a700Sflorian if (cc < 0) {
8495185a700Sflorian if (SOFT_ERROR(recv_errno))
8505185a700Sflorian return (DOIO_SOFT);
8515185a700Sflorian
8525185a700Sflorian if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
8535185a700Sflorian socket_log(sock, NULL, IOEVENT,
8545185a700Sflorian "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
85540adc7c5Sjung sock->fd, cc, recv_errno,
85640adc7c5Sjung strerror(recv_errno));
8575185a700Sflorian }
8585185a700Sflorian
8595185a700Sflorian #define SOFT_OR_HARD(_system, _isc) \
8605185a700Sflorian if (recv_errno == _system) { \
8615185a700Sflorian if (sock->connected) { \
8625185a700Sflorian dev->result = _isc; \
8635185a700Sflorian return (DOIO_HARD); \
8645185a700Sflorian } \
8655185a700Sflorian return (DOIO_SOFT); \
8665185a700Sflorian }
8675185a700Sflorian #define ALWAYS_HARD(_system, _isc) \
8685185a700Sflorian if (recv_errno == _system) { \
8695185a700Sflorian dev->result = _isc; \
8705185a700Sflorian return (DOIO_HARD); \
8715185a700Sflorian }
8725185a700Sflorian
8735185a700Sflorian SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
8745185a700Sflorian SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH);
8755185a700Sflorian SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
8765185a700Sflorian SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN);
8775185a700Sflorian /* HPUX 11.11 can return EADDRNOTAVAIL. */
8785185a700Sflorian SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
8795185a700Sflorian ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
8805185a700Sflorian /* Should never get this one but it was seen. */
8815185a700Sflorian SOFT_OR_HARD(ENOPROTOOPT, ISC_R_HOSTUNREACH);
8825185a700Sflorian /*
8835185a700Sflorian * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6
8845185a700Sflorian * errors.
8855185a700Sflorian */
8865185a700Sflorian SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH);
8875185a700Sflorian SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH);
8885185a700Sflorian
8895185a700Sflorian #undef SOFT_OR_HARD
8905185a700Sflorian #undef ALWAYS_HARD
8915185a700Sflorian
8925185a700Sflorian dev->result = isc__errno2result(recv_errno);
8935185a700Sflorian return (DOIO_HARD);
8945185a700Sflorian }
8955185a700Sflorian
8965185a700Sflorian /*
8975185a700Sflorian * On TCP and UNIX sockets, zero length reads indicate EOF,
8985185a700Sflorian * while on UDP sockets, zero length reads are perfectly valid,
8995185a700Sflorian * although strange.
9005185a700Sflorian */
9015185a700Sflorian switch (sock->type) {
9025185a700Sflorian case isc_sockettype_tcp:
9035185a700Sflorian if (cc == 0)
9045185a700Sflorian return (DOIO_EOF);
9055185a700Sflorian break;
9065185a700Sflorian case isc_sockettype_udp:
9075185a700Sflorian break;
9085185a700Sflorian default:
9095185a700Sflorian INSIST(0);
9105185a700Sflorian }
9115185a700Sflorian
9125185a700Sflorian if (sock->type == isc_sockettype_udp) {
913*b1a294b5Sflorian dev->address.ss_len = msghdr.msg_namelen;
9145185a700Sflorian if (isc_sockaddr_getport(&dev->address) == 0) {
9155185a700Sflorian if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
9165185a700Sflorian socket_log(sock, &dev->address, IOEVENT,
9175185a700Sflorian "dropping source port zero packet");
9185185a700Sflorian }
9195185a700Sflorian return (DOIO_SOFT);
9205185a700Sflorian }
9215185a700Sflorian }
9225185a700Sflorian
9235185a700Sflorian socket_log(sock, &dev->address, IOEVENT,
9245185a700Sflorian "packet received correctly");
9255185a700Sflorian
9265185a700Sflorian /*
9275185a700Sflorian * Overflow bit detection. If we received MORE bytes than we should,
9285185a700Sflorian * this indicates an overflow situation. Set the flag in the
9295185a700Sflorian * dev entry and adjust how much we read by one.
9305185a700Sflorian */
9315185a700Sflorian /*
9325185a700Sflorian * If there are control messages attached, run through them and pull
9335185a700Sflorian * out the interesting bits.
9345185a700Sflorian */
9355185a700Sflorian process_cmsg(sock, &msghdr, dev);
9365185a700Sflorian
9375185a700Sflorian /*
9385185a700Sflorian * update the buffers (if any) and the i/o count
9395185a700Sflorian */
9405185a700Sflorian dev->n += cc;
9415185a700Sflorian actual_count = cc;
9425185a700Sflorian buffer = ISC_LIST_HEAD(dev->bufferlist);
9435185a700Sflorian while (buffer != NULL && actual_count > 0U) {
9445185a700Sflorian if (isc_buffer_availablelength(buffer) <= actual_count) {
9455185a700Sflorian actual_count -= isc_buffer_availablelength(buffer);
9465185a700Sflorian isc_buffer_add(buffer,
9475185a700Sflorian isc_buffer_availablelength(buffer));
9485185a700Sflorian } else {
9495185a700Sflorian isc_buffer_add(buffer, actual_count);
9505185a700Sflorian actual_count = 0;
9515185a700Sflorian POST(actual_count);
9525185a700Sflorian break;
9535185a700Sflorian }
9545185a700Sflorian buffer = ISC_LIST_NEXT(buffer, link);
9555185a700Sflorian if (buffer == NULL) {
9565185a700Sflorian INSIST(actual_count == 0U);
9575185a700Sflorian }
9585185a700Sflorian }
9595185a700Sflorian
9605185a700Sflorian /*
9615185a700Sflorian * If we read less than we expected, update counters,
9625185a700Sflorian * and let the upper layer poke the descriptor.
9635185a700Sflorian */
9645185a700Sflorian if (((size_t)cc != read_count) && (dev->n < dev->minimum))
9655185a700Sflorian return (DOIO_SOFT);
9665185a700Sflorian
9675185a700Sflorian /*
9685185a700Sflorian * Full reads are posted, or partials if partials are ok.
9695185a700Sflorian */
9705185a700Sflorian dev->result = ISC_R_SUCCESS;
9715185a700Sflorian return (DOIO_SUCCESS);
9725185a700Sflorian }
9735185a700Sflorian
9745185a700Sflorian /*
9755185a700Sflorian * Returns:
9765185a700Sflorian * DOIO_SUCCESS The operation succeeded. dev->result contains
9775185a700Sflorian * ISC_R_SUCCESS.
9785185a700Sflorian *
9795185a700Sflorian * DOIO_HARD A hard or unexpected I/O error was encountered.
9805185a700Sflorian * dev->result contains the appropriate error.
9815185a700Sflorian *
9825185a700Sflorian * DOIO_SOFT A soft I/O error was encountered. No senddone
9835185a700Sflorian * event was sent. The operation should be retried.
9845185a700Sflorian *
9855185a700Sflorian * No other return values are possible.
9865185a700Sflorian */
9875185a700Sflorian static int
doio_send(isc_socket_t * sock,isc_socketevent_t * dev)9888b553854Sflorian doio_send(isc_socket_t *sock, isc_socketevent_t *dev) {
9895185a700Sflorian int cc;
9905185a700Sflorian struct iovec iov[MAXSCATTERGATHER_SEND];
9915185a700Sflorian size_t write_count;
9925185a700Sflorian struct msghdr msghdr;
9935185a700Sflorian char addrbuf[ISC_SOCKADDR_FORMATSIZE];
9945185a700Sflorian int attempts = 0;
9955185a700Sflorian int send_errno;
99648050c6bSotto union {
99748050c6bSotto struct msghdr msghdr;
99848050c6bSotto char m[SENDCMSGBUFLEN];
99948050c6bSotto } cmsgbuf;
10005185a700Sflorian
100148050c6bSotto memset(&cmsgbuf, 0, sizeof(cmsgbuf));
100248050c6bSotto
100348050c6bSotto build_msghdr_send(sock, cmsgbuf.m, dev, &msghdr, iov, &write_count);
10045185a700Sflorian
10055185a700Sflorian resend:
10065185a700Sflorian cc = sendmsg(sock->fd, &msghdr, 0);
10075185a700Sflorian send_errno = errno;
10085185a700Sflorian
10095185a700Sflorian /*
10105185a700Sflorian * Check for error or block condition.
10115185a700Sflorian */
10125185a700Sflorian if (cc < 0) {
10135185a700Sflorian if (send_errno == EINTR && ++attempts < NRETRIES)
10145185a700Sflorian goto resend;
10155185a700Sflorian
10165185a700Sflorian if (SOFT_ERROR(send_errno)) {
10175185a700Sflorian if (errno == EWOULDBLOCK || errno == EAGAIN)
10185185a700Sflorian dev->result = ISC_R_WOULDBLOCK;
10195185a700Sflorian return (DOIO_SOFT);
10205185a700Sflorian }
10215185a700Sflorian
10225185a700Sflorian #define SOFT_OR_HARD(_system, _isc) \
10235185a700Sflorian if (send_errno == _system) { \
10245185a700Sflorian if (sock->connected) { \
10255185a700Sflorian dev->result = _isc; \
10265185a700Sflorian return (DOIO_HARD); \
10275185a700Sflorian } \
10285185a700Sflorian return (DOIO_SOFT); \
10295185a700Sflorian }
10305185a700Sflorian #define ALWAYS_HARD(_system, _isc) \
10315185a700Sflorian if (send_errno == _system) { \
10325185a700Sflorian dev->result = _isc; \
10335185a700Sflorian return (DOIO_HARD); \
10345185a700Sflorian }
10355185a700Sflorian
10365185a700Sflorian SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
10375185a700Sflorian ALWAYS_HARD(EACCES, ISC_R_NOPERM);
10385185a700Sflorian ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
10395185a700Sflorian ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
10405185a700Sflorian ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
10415185a700Sflorian ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH);
10425185a700Sflorian ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH);
10435185a700Sflorian ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
10445185a700Sflorian ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH);
10455185a700Sflorian ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED);
10465185a700Sflorian ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET);
10475185a700Sflorian
10485185a700Sflorian #undef SOFT_OR_HARD
10495185a700Sflorian #undef ALWAYS_HARD
10505185a700Sflorian
10515185a700Sflorian /*
10525185a700Sflorian * The other error types depend on whether or not the
10535185a700Sflorian * socket is UDP or TCP. If it is UDP, some errors
10545185a700Sflorian * that we expect to be fatal under TCP are merely
10555185a700Sflorian * annoying, and are really soft errors.
10565185a700Sflorian *
10575185a700Sflorian * However, these soft errors are still returned as
10585185a700Sflorian * a status.
10595185a700Sflorian */
10605185a700Sflorian isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
10615185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s",
106240adc7c5Sjung addrbuf, strerror(send_errno));
10635185a700Sflorian dev->result = isc__errno2result(send_errno);
10645185a700Sflorian return (DOIO_HARD);
10655185a700Sflorian }
10665185a700Sflorian
10675185a700Sflorian if (cc == 0) {
10685185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
10695185a700Sflorian "doio_send: send() %s 0", "returned");
10705185a700Sflorian }
10715185a700Sflorian
10725185a700Sflorian /*
10735185a700Sflorian * If we write less than we expected, update counters, poke.
10745185a700Sflorian */
10755185a700Sflorian dev->n += cc;
10765185a700Sflorian if ((size_t)cc != write_count)
10775185a700Sflorian return (DOIO_SOFT);
10785185a700Sflorian
10795185a700Sflorian /*
10805185a700Sflorian * Exactly what we wanted to write. We're done with this
10815185a700Sflorian * entry. Post its completion event.
10825185a700Sflorian */
10835185a700Sflorian dev->result = ISC_R_SUCCESS;
10845185a700Sflorian return (DOIO_SUCCESS);
10855185a700Sflorian }
10865185a700Sflorian
10875185a700Sflorian /*
10885185a700Sflorian * Kill.
10895185a700Sflorian *
10905185a700Sflorian * Caller must ensure that the socket is not locked and no external
10915185a700Sflorian * references exist.
10925185a700Sflorian */
10935185a700Sflorian static void
socketclose(isc_socketmgr_t * manager,isc_socket_t * sock,int fd)10948b553854Sflorian socketclose(isc_socketmgr_t *manager, isc_socket_t *sock, int fd) {
10955185a700Sflorian /*
10965185a700Sflorian * No one has this socket open, so the watcher doesn't have to be
10975185a700Sflorian * poked, and the socket doesn't have to be locked.
10985185a700Sflorian */
10995185a700Sflorian manager->fds[fd] = NULL;
11005185a700Sflorian manager->fdstate[fd] = CLOSE_PENDING;
11015185a700Sflorian select_poke(manager, fd, SELECT_POKE_CLOSE);
11025185a700Sflorian
11035185a700Sflorian if (sock->active == 1) {
11045185a700Sflorian sock->active = 0;
11055185a700Sflorian }
11065185a700Sflorian
11075185a700Sflorian /*
11085185a700Sflorian * update manager->maxfd here (XXX: this should be implemented more
11095185a700Sflorian * efficiently)
11105185a700Sflorian */
11115185a700Sflorian if (manager->maxfd == fd) {
11125185a700Sflorian int i;
11135185a700Sflorian
11145185a700Sflorian manager->maxfd = 0;
11155185a700Sflorian for (i = fd - 1; i >= 0; i--) {
11165185a700Sflorian if (manager->fdstate[i] == MANAGED) {
11175185a700Sflorian manager->maxfd = i;
11185185a700Sflorian break;
11195185a700Sflorian }
11205185a700Sflorian }
11215185a700Sflorian }
11225185a700Sflorian
11235185a700Sflorian }
11245185a700Sflorian
11255185a700Sflorian static void
destroy(isc_socket_t ** sockp)11268b553854Sflorian destroy(isc_socket_t **sockp) {
11275185a700Sflorian int fd;
11288b553854Sflorian isc_socket_t *sock = *sockp;
11298b553854Sflorian isc_socketmgr_t *manager = sock->manager;
11305185a700Sflorian
11315185a700Sflorian socket_log(sock, NULL, CREATION, "destroying");
11325185a700Sflorian
11335185a700Sflorian INSIST(ISC_LIST_EMPTY(sock->recv_list));
11345185a700Sflorian INSIST(ISC_LIST_EMPTY(sock->send_list));
11355185a700Sflorian INSIST(sock->connect_ev == NULL);
11365185a700Sflorian INSIST(sock->fd >= -1 && sock->fd < (int)manager->maxsocks);
11375185a700Sflorian
11385185a700Sflorian if (sock->fd >= 0) {
11395185a700Sflorian fd = sock->fd;
11405185a700Sflorian sock->fd = -1;
11415185a700Sflorian socketclose(manager, sock, fd);
11425185a700Sflorian }
11435185a700Sflorian
11445185a700Sflorian ISC_LIST_UNLINK(manager->socklist, sock, link);
11455185a700Sflorian
11465185a700Sflorian /* can't unlock manager as its memory context is still used */
11475185a700Sflorian free_socket(sockp);
11485185a700Sflorian }
11495185a700Sflorian
11505185a700Sflorian static isc_result_t
allocate_socket(isc_socketmgr_t * manager,isc_sockettype_t type,isc_socket_t ** socketp)11518b553854Sflorian allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
11528b553854Sflorian isc_socket_t **socketp)
11535185a700Sflorian {
11548b553854Sflorian isc_socket_t *sock;
11555185a700Sflorian
11565185a700Sflorian sock = malloc(sizeof(*sock));
11575185a700Sflorian
11585185a700Sflorian if (sock == NULL)
11595185a700Sflorian return (ISC_R_NOMEMORY);
11605185a700Sflorian
11615185a700Sflorian sock->references = 0;
11625185a700Sflorian
11635185a700Sflorian sock->manager = manager;
11645185a700Sflorian sock->type = type;
11655185a700Sflorian sock->fd = -1;
11665185a700Sflorian sock->dscp = 0; /* TOS/TCLASS is zero until set. */
11675185a700Sflorian sock->active = 0;
11685185a700Sflorian
11695185a700Sflorian ISC_LINK_INIT(sock, link);
11705185a700Sflorian
11715185a700Sflorian /*
11725185a700Sflorian * Set up list of readers and writers to be initially empty.
11735185a700Sflorian */
11745185a700Sflorian ISC_LIST_INIT(sock->recv_list);
11755185a700Sflorian ISC_LIST_INIT(sock->send_list);
11765185a700Sflorian sock->connect_ev = NULL;
11775185a700Sflorian sock->pending_recv = 0;
11785185a700Sflorian sock->pending_send = 0;
11795185a700Sflorian sock->connected = 0;
11805185a700Sflorian sock->connecting = 0;
11815185a700Sflorian sock->bound = 0;
11825185a700Sflorian sock->pktdscp = 0;
11835185a700Sflorian
11845185a700Sflorian /*
11855185a700Sflorian * Initialize readable and writable events.
11865185a700Sflorian */
11875185a700Sflorian ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t),
11885185a700Sflorian ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR,
11895185a700Sflorian NULL, sock, sock, NULL);
11905185a700Sflorian ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t),
11915185a700Sflorian ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW,
11925185a700Sflorian NULL, sock, sock, NULL);
11935185a700Sflorian
11945185a700Sflorian *socketp = sock;
11955185a700Sflorian
11965185a700Sflorian return (ISC_R_SUCCESS);
11975185a700Sflorian }
11985185a700Sflorian
11995185a700Sflorian /*
12005185a700Sflorian * This event requires that the various lists be empty, that the reference
12018b553854Sflorian * count be 1. The other socket bits,
12025185a700Sflorian * like the lock, must be initialized as well. The fd associated must be
12035185a700Sflorian * marked as closed, by setting it to -1 on close, or this routine will
12045185a700Sflorian * also close the socket.
12055185a700Sflorian */
12065185a700Sflorian static void
free_socket(isc_socket_t ** socketp)12078b553854Sflorian free_socket(isc_socket_t **socketp) {
12088b553854Sflorian isc_socket_t *sock = *socketp;
12095185a700Sflorian
12105185a700Sflorian INSIST(sock->references == 0);
12115185a700Sflorian INSIST(!sock->connecting);
12125185a700Sflorian INSIST(!sock->pending_recv);
12135185a700Sflorian INSIST(!sock->pending_send);
12145185a700Sflorian INSIST(ISC_LIST_EMPTY(sock->recv_list));
12155185a700Sflorian INSIST(ISC_LIST_EMPTY(sock->send_list));
12165185a700Sflorian INSIST(!ISC_LINK_LINKED(sock, link));
12175185a700Sflorian
12185185a700Sflorian free(sock);
12195185a700Sflorian
12205185a700Sflorian *socketp = NULL;
12215185a700Sflorian }
12225185a700Sflorian
12235185a700Sflorian static void
use_min_mtu(isc_socket_t * sock)12248b553854Sflorian use_min_mtu(isc_socket_t *sock) {
12255185a700Sflorian /* use minimum MTU */
12265185a700Sflorian if (sock->pf == AF_INET6) {
12275185a700Sflorian int on = 1;
12285185a700Sflorian (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
12295185a700Sflorian (void *)&on, sizeof(on));
12305185a700Sflorian }
12315185a700Sflorian }
12325185a700Sflorian
12335185a700Sflorian static void
set_tcp_maxseg(isc_socket_t * sock,int size)12348b553854Sflorian set_tcp_maxseg(isc_socket_t *sock, int size) {
12355185a700Sflorian if (sock->type == isc_sockettype_tcp)
12365185a700Sflorian (void)setsockopt(sock->fd, IPPROTO_TCP, TCP_MAXSEG,
12375185a700Sflorian (void *)&size, sizeof(size));
12385185a700Sflorian }
12395185a700Sflorian
12405185a700Sflorian static isc_result_t
opensocket(isc_socket_t * sock)12418b553854Sflorian opensocket(isc_socket_t *sock)
12425185a700Sflorian {
12435185a700Sflorian isc_result_t result;
12445185a700Sflorian const char *err = "socket";
12455185a700Sflorian int on = 1;
12465185a700Sflorian
12475185a700Sflorian switch (sock->type) {
12485185a700Sflorian case isc_sockettype_udp:
12495185a700Sflorian sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP);
12505185a700Sflorian break;
12515185a700Sflorian case isc_sockettype_tcp:
12525185a700Sflorian sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
12535185a700Sflorian break;
12545185a700Sflorian }
12555185a700Sflorian
12565185a700Sflorian if (sock->fd < 0) {
12575185a700Sflorian switch (errno) {
12585185a700Sflorian case EMFILE:
12595185a700Sflorian case ENFILE:
12605185a700Sflorian isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
12615185a700Sflorian ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
126240adc7c5Sjung "%s: %s", err, strerror(errno));
12635185a700Sflorian /* fallthrough */
12645185a700Sflorian case ENOBUFS:
12655185a700Sflorian return (ISC_R_NORESOURCES);
12665185a700Sflorian
12675185a700Sflorian case EPROTONOSUPPORT:
12685185a700Sflorian case EPFNOSUPPORT:
12695185a700Sflorian case EAFNOSUPPORT:
12705185a700Sflorian /*
12715185a700Sflorian * Linux 2.2 (and maybe others) return EINVAL instead of
12725185a700Sflorian * EAFNOSUPPORT.
12735185a700Sflorian */
12745185a700Sflorian case EINVAL:
12755185a700Sflorian return (ISC_R_FAMILYNOSUPPORT);
12765185a700Sflorian
12775185a700Sflorian default:
12785185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
12795185a700Sflorian "%s() %s: %s", err, "failed",
128040adc7c5Sjung strerror(errno));
12815185a700Sflorian return (ISC_R_UNEXPECTED);
12825185a700Sflorian }
12835185a700Sflorian }
12845185a700Sflorian
12855185a700Sflorian result = make_nonblock(sock->fd);
12865185a700Sflorian if (result != ISC_R_SUCCESS) {
12875185a700Sflorian (void)close(sock->fd);
12885185a700Sflorian return (result);
12895185a700Sflorian }
12905185a700Sflorian
12915185a700Sflorian /*
12925185a700Sflorian * Use minimum mtu if possible.
12935185a700Sflorian */
12945185a700Sflorian if (sock->type == isc_sockettype_tcp && sock->pf == AF_INET6) {
12955185a700Sflorian use_min_mtu(sock);
12965185a700Sflorian set_tcp_maxseg(sock, 1280 - 20 - 40); /* 1280 - TCP - IPV6 */
12975185a700Sflorian }
12985185a700Sflorian
12995185a700Sflorian if (sock->type == isc_sockettype_udp) {
13005185a700Sflorian
13015185a700Sflorian if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP,
13025185a700Sflorian (void *)&on, sizeof(on)) < 0
13035185a700Sflorian && errno != ENOPROTOOPT) {
13045185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
13055185a700Sflorian "setsockopt(%d, SO_TIMESTAMP) %s: %s",
130640adc7c5Sjung sock->fd, "failed", strerror(errno));
13075185a700Sflorian /* Press on... */
13085185a700Sflorian }
13095185a700Sflorian
13105185a700Sflorian /* RFC 3542 */
13115185a700Sflorian if ((sock->pf == AF_INET6)
13125185a700Sflorian && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
13135185a700Sflorian (void *)&on, sizeof(on)) < 0)) {
13145185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
13155185a700Sflorian "setsockopt(%d, IPV6_RECVPKTINFO) "
13165185a700Sflorian "%s: %s", sock->fd, "failed",
131740adc7c5Sjung strerror(errno));
13185185a700Sflorian }
13195185a700Sflorian }
13205185a700Sflorian
13215185a700Sflorian if (sock->active == 0) {
13225185a700Sflorian sock->active = 1;
13235185a700Sflorian }
13245185a700Sflorian
13255185a700Sflorian return (ISC_R_SUCCESS);
13265185a700Sflorian }
13275185a700Sflorian
13285185a700Sflorian /*
13295185a700Sflorian * Create a 'type' socket managed
13305185a700Sflorian * by 'manager'. Events will be posted to 'task' and when dispatched
13315185a700Sflorian * 'action' will be called with 'arg' as the arg value. The new
13325185a700Sflorian * socket is returned in 'socketp'.
13335185a700Sflorian */
13345185a700Sflorian static isc_result_t
socket_create(isc_socketmgr_t * manager0,int pf,isc_sockettype_t type,isc_socket_t ** socketp)13355185a700Sflorian socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
13365185a700Sflorian isc_socket_t **socketp)
13375185a700Sflorian {
13388b553854Sflorian isc_socket_t *sock = NULL;
13398b553854Sflorian isc_socketmgr_t *manager = (isc_socketmgr_t *)manager0;
13405185a700Sflorian isc_result_t result;
13415185a700Sflorian
13425185a700Sflorian REQUIRE(socketp != NULL && *socketp == NULL);
13435185a700Sflorian
13445185a700Sflorian result = allocate_socket(manager, type, &sock);
13455185a700Sflorian if (result != ISC_R_SUCCESS)
13465185a700Sflorian return (result);
13475185a700Sflorian
13485185a700Sflorian switch (sock->type) {
13495185a700Sflorian case isc_sockettype_udp:
13501bf56eb0Sflorian sock->pktdscp = 1;
13515185a700Sflorian break;
13525185a700Sflorian case isc_sockettype_tcp:
13535185a700Sflorian break;
13545185a700Sflorian default:
13555185a700Sflorian INSIST(0);
13565185a700Sflorian }
13575185a700Sflorian
13585185a700Sflorian sock->pf = pf;
13595185a700Sflorian
13605185a700Sflorian result = opensocket(sock);
13615185a700Sflorian if (result != ISC_R_SUCCESS) {
13625185a700Sflorian free_socket(&sock);
13635185a700Sflorian return (result);
13645185a700Sflorian }
13655185a700Sflorian
13665185a700Sflorian sock->references = 1;
13675185a700Sflorian *socketp = (isc_socket_t *)sock;
13685185a700Sflorian
13695185a700Sflorian /*
13705185a700Sflorian * Note we don't have to lock the socket like we normally would because
13715185a700Sflorian * there are no external references to it yet.
13725185a700Sflorian */
13735185a700Sflorian
13745185a700Sflorian manager->fds[sock->fd] = sock;
13755185a700Sflorian manager->fdstate[sock->fd] = MANAGED;
13765185a700Sflorian
13775185a700Sflorian ISC_LIST_APPEND(manager->socklist, sock, link);
13785185a700Sflorian if (manager->maxfd < sock->fd)
13795185a700Sflorian manager->maxfd = sock->fd;
13805185a700Sflorian
13815185a700Sflorian socket_log(sock, NULL, CREATION, "created");
13825185a700Sflorian
13835185a700Sflorian return (ISC_R_SUCCESS);
13845185a700Sflorian }
13855185a700Sflorian
13865185a700Sflorian /*%
13875185a700Sflorian * Create a new 'type' socket managed by 'manager'. Events
13885185a700Sflorian * will be posted to 'task' and when dispatched 'action' will be
13895185a700Sflorian * called with 'arg' as the arg value. The new socket is returned
13905185a700Sflorian * in 'socketp'.
13915185a700Sflorian */
13925185a700Sflorian isc_result_t
isc_socket_create(isc_socketmgr_t * manager0,int pf,isc_sockettype_t type,isc_socket_t ** socketp)13938b553854Sflorian isc_socket_create(isc_socketmgr_t *manager0, int pf, isc_sockettype_t type,
13945185a700Sflorian isc_socket_t **socketp)
13955185a700Sflorian {
13965185a700Sflorian return (socket_create(manager0, pf, type, socketp));
13975185a700Sflorian }
13985185a700Sflorian
13995185a700Sflorian /*
14005185a700Sflorian * Attach to a socket. Caller must explicitly detach when it is done.
14015185a700Sflorian */
14025185a700Sflorian void
isc_socket_attach(isc_socket_t * sock0,isc_socket_t ** socketp)14038b553854Sflorian isc_socket_attach(isc_socket_t *sock0, isc_socket_t **socketp) {
14048b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
14055185a700Sflorian
14065185a700Sflorian REQUIRE(socketp != NULL && *socketp == NULL);
14075185a700Sflorian
14085185a700Sflorian sock->references++;
14095185a700Sflorian
14105185a700Sflorian *socketp = (isc_socket_t *)sock;
14115185a700Sflorian }
14125185a700Sflorian
14135185a700Sflorian /*
14145185a700Sflorian * Dereference a socket. If this is the last reference to it, clean things
14155185a700Sflorian * up by destroying the socket.
14165185a700Sflorian */
14175185a700Sflorian void
isc_socket_detach(isc_socket_t ** socketp)14188b553854Sflorian isc_socket_detach(isc_socket_t **socketp) {
14198b553854Sflorian isc_socket_t *sock;
14201fb015a8Sflorian int kill_socket = 0;
14215185a700Sflorian
14225185a700Sflorian REQUIRE(socketp != NULL);
14238b553854Sflorian sock = (isc_socket_t *)*socketp;
14245185a700Sflorian
14255185a700Sflorian REQUIRE(sock->references > 0);
14265185a700Sflorian sock->references--;
14275185a700Sflorian if (sock->references == 0)
14281fb015a8Sflorian kill_socket = 1;
14295185a700Sflorian
14305185a700Sflorian if (kill_socket)
14315185a700Sflorian destroy(&sock);
14325185a700Sflorian
14335185a700Sflorian *socketp = NULL;
14345185a700Sflorian }
14355185a700Sflorian
14365185a700Sflorian /*
14375185a700Sflorian * I/O is possible on a given socket. Schedule an event to this task that
14385185a700Sflorian * will call an internal function to do the I/O. This will charge the
14395185a700Sflorian * task with the I/O operation and let our select loop handler get back
14405185a700Sflorian * to doing something real as fast as possible.
14415185a700Sflorian *
14425185a700Sflorian * The socket and manager must be locked before calling this function.
14435185a700Sflorian */
14445185a700Sflorian static void
dispatch_recv(isc_socket_t * sock)14458b553854Sflorian dispatch_recv(isc_socket_t *sock) {
14465185a700Sflorian intev_t *iev;
14475185a700Sflorian isc_socketevent_t *ev;
14485185a700Sflorian isc_task_t *sender;
14495185a700Sflorian
14505185a700Sflorian INSIST(!sock->pending_recv);
14515185a700Sflorian
14525185a700Sflorian ev = ISC_LIST_HEAD(sock->recv_list);
14535185a700Sflorian if (ev == NULL)
14545185a700Sflorian return;
145557b02e75Sotto socket_log(sock, NULL, EVENT,
14565185a700Sflorian "dispatch_recv: event %p -> task %p",
14575185a700Sflorian ev, ev->ev_sender);
14585185a700Sflorian sender = ev->ev_sender;
14595185a700Sflorian
14605185a700Sflorian sock->pending_recv = 1;
14615185a700Sflorian iev = &sock->readable_ev;
14625185a700Sflorian
14635185a700Sflorian sock->references++;
14645185a700Sflorian iev->ev_sender = sock;
14655185a700Sflorian iev->ev_action = internal_recv;
14665185a700Sflorian iev->ev_arg = sock;
14675185a700Sflorian
14685185a700Sflorian isc_task_send(sender, (isc_event_t **)&iev);
14695185a700Sflorian }
14705185a700Sflorian
14715185a700Sflorian static void
dispatch_send(isc_socket_t * sock)14728b553854Sflorian dispatch_send(isc_socket_t *sock) {
14735185a700Sflorian intev_t *iev;
14745185a700Sflorian isc_socketevent_t *ev;
14755185a700Sflorian isc_task_t *sender;
14765185a700Sflorian
14775185a700Sflorian INSIST(!sock->pending_send);
14785185a700Sflorian
14795185a700Sflorian ev = ISC_LIST_HEAD(sock->send_list);
14805185a700Sflorian if (ev == NULL)
14815185a700Sflorian return;
148257b02e75Sotto socket_log(sock, NULL, EVENT,
14835185a700Sflorian "dispatch_send: event %p -> task %p",
14845185a700Sflorian ev, ev->ev_sender);
14855185a700Sflorian sender = ev->ev_sender;
14865185a700Sflorian
14875185a700Sflorian sock->pending_send = 1;
14885185a700Sflorian iev = &sock->writable_ev;
14895185a700Sflorian
14905185a700Sflorian sock->references++;
14915185a700Sflorian iev->ev_sender = sock;
14925185a700Sflorian iev->ev_action = internal_send;
14935185a700Sflorian iev->ev_arg = sock;
14945185a700Sflorian
14955185a700Sflorian isc_task_send(sender, (isc_event_t **)&iev);
14965185a700Sflorian }
14975185a700Sflorian
14985185a700Sflorian static void
dispatch_connect(isc_socket_t * sock)14998b553854Sflorian dispatch_connect(isc_socket_t *sock) {
15005185a700Sflorian intev_t *iev;
15015185a700Sflorian isc_socket_connev_t *ev;
15025185a700Sflorian
15035185a700Sflorian iev = &sock->writable_ev;
15045185a700Sflorian
15055185a700Sflorian ev = sock->connect_ev;
15065185a700Sflorian INSIST(ev != NULL); /* XXX */
15075185a700Sflorian
15085185a700Sflorian INSIST(sock->connecting);
15095185a700Sflorian
15105185a700Sflorian sock->references++; /* keep socket around for this internal event */
15115185a700Sflorian iev->ev_sender = sock;
15125185a700Sflorian iev->ev_action = internal_connect;
15135185a700Sflorian iev->ev_arg = sock;
15145185a700Sflorian
15155185a700Sflorian isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
15165185a700Sflorian }
15175185a700Sflorian
15185185a700Sflorian /*
15195185a700Sflorian * Dequeue an item off the given socket's read queue, set the result code
15205185a700Sflorian * in the done event to the one provided, and send it to the task it was
15215185a700Sflorian * destined for.
15225185a700Sflorian *
15235185a700Sflorian * If the event to be sent is on a list, remove it before sending. If
15245185a700Sflorian * asked to, send and detach from the socket as well.
15255185a700Sflorian *
15265185a700Sflorian * Caller must have the socket locked if the event is attached to the socket.
15275185a700Sflorian */
15285185a700Sflorian static void
send_recvdone_event(isc_socket_t * sock,isc_socketevent_t ** dev)15298b553854Sflorian send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
15305185a700Sflorian isc_task_t *task;
15315185a700Sflorian
15325185a700Sflorian task = (*dev)->ev_sender;
15335185a700Sflorian
15345185a700Sflorian (*dev)->ev_sender = sock;
15355185a700Sflorian
15365185a700Sflorian if (ISC_LINK_LINKED(*dev, ev_link))
15375185a700Sflorian ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
15385185a700Sflorian
15395185a700Sflorian if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
15405185a700Sflorian == ISC_SOCKEVENTATTR_ATTACHED)
15415185a700Sflorian isc_task_sendanddetach(&task, (isc_event_t **)dev);
15425185a700Sflorian else
15435185a700Sflorian isc_task_send(task, (isc_event_t **)dev);
15445185a700Sflorian }
15455185a700Sflorian
15465185a700Sflorian /*
15475185a700Sflorian * See comments for send_recvdone_event() above.
15485185a700Sflorian *
15495185a700Sflorian * Caller must have the socket locked if the event is attached to the socket.
15505185a700Sflorian */
15515185a700Sflorian static void
send_senddone_event(isc_socket_t * sock,isc_socketevent_t ** dev)15528b553854Sflorian send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
15535185a700Sflorian isc_task_t *task;
15545185a700Sflorian
15555185a700Sflorian INSIST(dev != NULL && *dev != NULL);
15565185a700Sflorian
15575185a700Sflorian task = (*dev)->ev_sender;
15585185a700Sflorian (*dev)->ev_sender = sock;
15595185a700Sflorian
15605185a700Sflorian if (ISC_LINK_LINKED(*dev, ev_link))
15615185a700Sflorian ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
15625185a700Sflorian
15635185a700Sflorian if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
15645185a700Sflorian == ISC_SOCKEVENTATTR_ATTACHED)
15655185a700Sflorian isc_task_sendanddetach(&task, (isc_event_t **)dev);
15665185a700Sflorian else
15675185a700Sflorian isc_task_send(task, (isc_event_t **)dev);
15685185a700Sflorian }
15695185a700Sflorian
15705185a700Sflorian static void
internal_recv(isc_task_t * me,isc_event_t * ev)15715185a700Sflorian internal_recv(isc_task_t *me, isc_event_t *ev) {
15725185a700Sflorian isc_socketevent_t *dev;
15738b553854Sflorian isc_socket_t *sock;
15745185a700Sflorian
15755185a700Sflorian INSIST(ev->ev_type == ISC_SOCKEVENT_INTR);
15765185a700Sflorian
15775185a700Sflorian sock = ev->ev_sender;
15785185a700Sflorian
15795185a700Sflorian socket_log(sock, NULL, IOEVENT,
15805185a700Sflorian "internal_recv: task %p got event %p", me, ev);
15815185a700Sflorian
15825185a700Sflorian INSIST(sock->pending_recv == 1);
15835185a700Sflorian sock->pending_recv = 0;
15845185a700Sflorian
15855185a700Sflorian INSIST(sock->references > 0);
15865185a700Sflorian sock->references--; /* the internal event is done with this socket */
15875185a700Sflorian if (sock->references == 0) {
15885185a700Sflorian destroy(&sock);
15895185a700Sflorian return;
15905185a700Sflorian }
15915185a700Sflorian
15925185a700Sflorian /*
15935185a700Sflorian * Try to do as much I/O as possible on this socket. There are no
15945185a700Sflorian * limits here, currently.
15955185a700Sflorian */
15965185a700Sflorian dev = ISC_LIST_HEAD(sock->recv_list);
15975185a700Sflorian while (dev != NULL) {
15985185a700Sflorian switch (doio_recv(sock, dev)) {
15995185a700Sflorian case DOIO_SOFT:
16005185a700Sflorian goto poke;
16015185a700Sflorian
16025185a700Sflorian case DOIO_EOF:
16035185a700Sflorian /*
16045185a700Sflorian * read of 0 means the remote end was closed.
16055185a700Sflorian * Run through the event queue and dispatch all
16065185a700Sflorian * the events with an EOF result code.
16075185a700Sflorian */
16085185a700Sflorian do {
16095185a700Sflorian dev->result = ISC_R_EOF;
16105185a700Sflorian send_recvdone_event(sock, &dev);
16115185a700Sflorian dev = ISC_LIST_HEAD(sock->recv_list);
16125185a700Sflorian } while (dev != NULL);
16135185a700Sflorian goto poke;
16145185a700Sflorian
16155185a700Sflorian case DOIO_SUCCESS:
16165185a700Sflorian case DOIO_HARD:
16175185a700Sflorian send_recvdone_event(sock, &dev);
16185185a700Sflorian break;
16195185a700Sflorian }
16205185a700Sflorian
16215185a700Sflorian dev = ISC_LIST_HEAD(sock->recv_list);
16225185a700Sflorian }
16235185a700Sflorian
16245185a700Sflorian poke:
16255185a700Sflorian if (!ISC_LIST_EMPTY(sock->recv_list))
16265185a700Sflorian select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
16275185a700Sflorian }
16285185a700Sflorian
16295185a700Sflorian static void
internal_send(isc_task_t * me,isc_event_t * ev)16305185a700Sflorian internal_send(isc_task_t *me, isc_event_t *ev) {
16315185a700Sflorian isc_socketevent_t *dev;
16328b553854Sflorian isc_socket_t *sock;
16335185a700Sflorian
16345185a700Sflorian INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
16355185a700Sflorian
16365185a700Sflorian /*
16375185a700Sflorian * Find out what socket this is and lock it.
16385185a700Sflorian */
16398b553854Sflorian sock = (isc_socket_t *)ev->ev_sender;
16405185a700Sflorian socket_log(sock, NULL, IOEVENT,
16415185a700Sflorian "internal_send: task %p got event %p", me, ev);
16425185a700Sflorian
16435185a700Sflorian INSIST(sock->pending_send == 1);
16445185a700Sflorian sock->pending_send = 0;
16455185a700Sflorian
16465185a700Sflorian INSIST(sock->references > 0);
16475185a700Sflorian sock->references--; /* the internal event is done with this socket */
16485185a700Sflorian if (sock->references == 0) {
16495185a700Sflorian destroy(&sock);
16505185a700Sflorian return;
16515185a700Sflorian }
16525185a700Sflorian
16535185a700Sflorian /*
16545185a700Sflorian * Try to do as much I/O as possible on this socket. There are no
16555185a700Sflorian * limits here, currently.
16565185a700Sflorian */
16575185a700Sflorian dev = ISC_LIST_HEAD(sock->send_list);
16585185a700Sflorian while (dev != NULL) {
16595185a700Sflorian switch (doio_send(sock, dev)) {
16605185a700Sflorian case DOIO_SOFT:
16615185a700Sflorian goto poke;
16625185a700Sflorian
16635185a700Sflorian case DOIO_HARD:
16645185a700Sflorian case DOIO_SUCCESS:
16655185a700Sflorian send_senddone_event(sock, &dev);
16665185a700Sflorian break;
16675185a700Sflorian }
16685185a700Sflorian
16695185a700Sflorian dev = ISC_LIST_HEAD(sock->send_list);
16705185a700Sflorian }
16715185a700Sflorian
16725185a700Sflorian poke:
16735185a700Sflorian if (!ISC_LIST_EMPTY(sock->send_list))
16745185a700Sflorian select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE);
16755185a700Sflorian }
16765185a700Sflorian
16775185a700Sflorian /*
16785185a700Sflorian * Process read/writes on each fd here. Avoid locking
16795185a700Sflorian * and unlocking twice if both reads and writes are possible.
16805185a700Sflorian */
16815185a700Sflorian static void
process_fd(isc_socketmgr_t * manager,int fd,int readable,int writeable)16821fb015a8Sflorian process_fd(isc_socketmgr_t *manager, int fd, int readable,
16831fb015a8Sflorian int writeable)
16845185a700Sflorian {
16858b553854Sflorian isc_socket_t *sock;
16861fb015a8Sflorian int unwatch_read = 0, unwatch_write = 0;
16875185a700Sflorian
16885185a700Sflorian /*
16895185a700Sflorian * If the socket is going to be closed, don't do more I/O.
16905185a700Sflorian */
16915185a700Sflorian if (manager->fdstate[fd] == CLOSE_PENDING) {
16925185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_READ);
16935185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
16945185a700Sflorian return;
16955185a700Sflorian }
16965185a700Sflorian
16975185a700Sflorian sock = manager->fds[fd];
16985185a700Sflorian if (readable) {
16995185a700Sflorian if (sock == NULL) {
17001fb015a8Sflorian unwatch_read = 1;
17015185a700Sflorian goto check_write;
17025185a700Sflorian }
17035185a700Sflorian if (!SOCK_DEAD(sock)) {
17045185a700Sflorian dispatch_recv(sock);
17055185a700Sflorian }
17061fb015a8Sflorian unwatch_read = 1;
17075185a700Sflorian }
17085185a700Sflorian check_write:
17095185a700Sflorian if (writeable) {
17105185a700Sflorian if (sock == NULL) {
17111fb015a8Sflorian unwatch_write = 1;
17125185a700Sflorian goto unlock_fd;
17135185a700Sflorian }
17145185a700Sflorian if (!SOCK_DEAD(sock)) {
17155185a700Sflorian if (sock->connecting)
17165185a700Sflorian dispatch_connect(sock);
17175185a700Sflorian else
17185185a700Sflorian dispatch_send(sock);
17195185a700Sflorian }
17201fb015a8Sflorian unwatch_write = 1;
17215185a700Sflorian }
17225185a700Sflorian
17235185a700Sflorian unlock_fd:
17245185a700Sflorian if (unwatch_read)
17255185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_READ);
17265185a700Sflorian if (unwatch_write)
17275185a700Sflorian (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE);
17285185a700Sflorian
17295185a700Sflorian }
17305185a700Sflorian
17315185a700Sflorian static void
process_fds(isc_socketmgr_t * manager,int maxfd,fd_set * readfds,fd_set * writefds)17328b553854Sflorian process_fds(isc_socketmgr_t *manager, int maxfd, fd_set *readfds,
17335185a700Sflorian fd_set *writefds)
17345185a700Sflorian {
17355185a700Sflorian int i;
17365185a700Sflorian
17375185a700Sflorian REQUIRE(maxfd <= (int)manager->maxsocks);
17385185a700Sflorian
17395185a700Sflorian for (i = 0; i < maxfd; i++) {
17405185a700Sflorian process_fd(manager, i, FD_ISSET(i, readfds),
17415185a700Sflorian FD_ISSET(i, writefds));
17425185a700Sflorian }
17435185a700Sflorian }
17445185a700Sflorian
17455185a700Sflorian /*
17465185a700Sflorian * Create a new socket manager.
17475185a700Sflorian */
17485185a700Sflorian
17495185a700Sflorian static isc_result_t
setup_watcher(isc_socketmgr_t * manager)17508b553854Sflorian setup_watcher(isc_socketmgr_t *manager) {
17515185a700Sflorian isc_result_t result;
17525185a700Sflorian
17535185a700Sflorian UNUSED(result);
17545185a700Sflorian
17555185a700Sflorian manager->fd_bufsize = sizeof(fd_set);
17565185a700Sflorian
17575185a700Sflorian manager->read_fds = NULL;
17585185a700Sflorian manager->read_fds_copy = NULL;
17595185a700Sflorian manager->write_fds = NULL;
17605185a700Sflorian manager->write_fds_copy = NULL;
17615185a700Sflorian
17625185a700Sflorian manager->read_fds = malloc(manager->fd_bufsize);
17635185a700Sflorian if (manager->read_fds != NULL)
17645185a700Sflorian manager->read_fds_copy = malloc(manager->fd_bufsize);
17655185a700Sflorian if (manager->read_fds_copy != NULL)
17665185a700Sflorian manager->write_fds = malloc(manager->fd_bufsize);
17675185a700Sflorian if (manager->write_fds != NULL) {
17685185a700Sflorian manager->write_fds_copy = malloc(manager->fd_bufsize);
17695185a700Sflorian }
17705185a700Sflorian if (manager->write_fds_copy == NULL) {
17715185a700Sflorian if (manager->write_fds != NULL) {
17725185a700Sflorian free(manager->write_fds);
17735185a700Sflorian }
17745185a700Sflorian if (manager->read_fds_copy != NULL) {
17755185a700Sflorian free(manager->read_fds_copy);
17765185a700Sflorian }
17775185a700Sflorian if (manager->read_fds != NULL) {
17785185a700Sflorian free(manager->read_fds);
17795185a700Sflorian }
17805185a700Sflorian return (ISC_R_NOMEMORY);
17815185a700Sflorian }
17825185a700Sflorian memset(manager->read_fds, 0, manager->fd_bufsize);
17835185a700Sflorian memset(manager->write_fds, 0, manager->fd_bufsize);
17845185a700Sflorian
17855185a700Sflorian manager->maxfd = 0;
17865185a700Sflorian
17875185a700Sflorian return (ISC_R_SUCCESS);
17885185a700Sflorian }
17895185a700Sflorian
17905185a700Sflorian static void
cleanup_watcher(isc_socketmgr_t * manager)17918b553854Sflorian cleanup_watcher(isc_socketmgr_t *manager) {
17925185a700Sflorian
17935185a700Sflorian if (manager->read_fds != NULL)
17945185a700Sflorian free(manager->read_fds);
17955185a700Sflorian if (manager->read_fds_copy != NULL)
17965185a700Sflorian free(manager->read_fds_copy);
17975185a700Sflorian if (manager->write_fds != NULL)
17985185a700Sflorian free(manager->write_fds);
17995185a700Sflorian if (manager->write_fds_copy != NULL)
18005185a700Sflorian free(manager->write_fds_copy);
18015185a700Sflorian }
18025185a700Sflorian
18038b553854Sflorian static isc_result_t
isc_socketmgr_create2(isc_socketmgr_t ** managerp,unsigned int maxsocks)18048b553854Sflorian isc_socketmgr_create2(isc_socketmgr_t **managerp,
18055185a700Sflorian unsigned int maxsocks)
18065185a700Sflorian {
18078b553854Sflorian isc_socketmgr_t *manager;
18085185a700Sflorian isc_result_t result;
18095185a700Sflorian
18105185a700Sflorian REQUIRE(managerp != NULL && *managerp == NULL);
18115185a700Sflorian
18125185a700Sflorian if (socketmgr != NULL) {
18135185a700Sflorian /* Don't allow maxsocks to be updated */
18145185a700Sflorian if (maxsocks > 0 && socketmgr->maxsocks != maxsocks)
18155185a700Sflorian return (ISC_R_EXISTS);
18165185a700Sflorian
18175185a700Sflorian socketmgr->refs++;
18185185a700Sflorian *managerp = (isc_socketmgr_t *)socketmgr;
18195185a700Sflorian return (ISC_R_SUCCESS);
18205185a700Sflorian }
18215185a700Sflorian
18225185a700Sflorian if (maxsocks == 0)
18235185a700Sflorian maxsocks = FD_SETSIZE;
18245185a700Sflorian
18255185a700Sflorian manager = malloc(sizeof(*manager));
18265185a700Sflorian if (manager == NULL)
18275185a700Sflorian return (ISC_R_NOMEMORY);
18285185a700Sflorian
18295185a700Sflorian /* zero-clear so that necessary cleanup on failure will be easy */
18305185a700Sflorian memset(manager, 0, sizeof(*manager));
18315185a700Sflorian manager->maxsocks = maxsocks;
18325148cc0dSderaadt manager->fds = reallocarray(NULL, manager->maxsocks, sizeof(isc_socket_t *));
18335185a700Sflorian if (manager->fds == NULL) {
18345185a700Sflorian result = ISC_R_NOMEMORY;
18355185a700Sflorian goto free_manager;
18365185a700Sflorian }
18375148cc0dSderaadt manager->fdstate = reallocarray(NULL, manager->maxsocks, sizeof(int));
18385185a700Sflorian if (manager->fdstate == NULL) {
18395185a700Sflorian result = ISC_R_NOMEMORY;
18405185a700Sflorian goto free_manager;
18415185a700Sflorian }
18425185a700Sflorian
18435185a700Sflorian memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *));
18445185a700Sflorian ISC_LIST_INIT(manager->socklist);
18455185a700Sflorian
18465185a700Sflorian manager->refs = 1;
18475185a700Sflorian
18485185a700Sflorian /*
18495185a700Sflorian * Set up initial state for the select loop
18505185a700Sflorian */
18515185a700Sflorian result = setup_watcher(manager);
18525185a700Sflorian if (result != ISC_R_SUCCESS)
18535185a700Sflorian goto cleanup;
18545185a700Sflorian
18555185a700Sflorian memset(manager->fdstate, 0, manager->maxsocks * sizeof(int));
18565185a700Sflorian
18575185a700Sflorian socketmgr = manager;
18585185a700Sflorian *managerp = (isc_socketmgr_t *)manager;
18595185a700Sflorian
18605185a700Sflorian return (ISC_R_SUCCESS);
18615185a700Sflorian
18625185a700Sflorian cleanup:
18635185a700Sflorian
18645185a700Sflorian free_manager:
18655185a700Sflorian if (manager->fdstate != NULL) {
18665185a700Sflorian free(manager->fdstate);
18675185a700Sflorian }
18685185a700Sflorian if (manager->fds != NULL) {
18695185a700Sflorian free(manager->fds);
18705185a700Sflorian }
18715185a700Sflorian free(manager);
18725185a700Sflorian
18735185a700Sflorian return (result);
18745185a700Sflorian }
18755185a700Sflorian
18768b553854Sflorian isc_result_t
isc_socketmgr_create(isc_socketmgr_t ** managerp)18778b553854Sflorian isc_socketmgr_create(isc_socketmgr_t **managerp) {
18788b553854Sflorian return (isc_socketmgr_create2(managerp, 0));
18798b553854Sflorian }
18808b553854Sflorian
18815185a700Sflorian void
isc_socketmgr_destroy(isc_socketmgr_t ** managerp)18828b553854Sflorian isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
18838b553854Sflorian isc_socketmgr_t *manager;
18845185a700Sflorian int i;
18855185a700Sflorian
18865185a700Sflorian /*
18875185a700Sflorian * Destroy a socket manager.
18885185a700Sflorian */
18895185a700Sflorian
18905185a700Sflorian REQUIRE(managerp != NULL);
18918b553854Sflorian manager = (isc_socketmgr_t *)*managerp;
18925185a700Sflorian
18935185a700Sflorian manager->refs--;
18945185a700Sflorian if (manager->refs > 0) {
18955185a700Sflorian *managerp = NULL;
18965185a700Sflorian return;
18975185a700Sflorian }
18985185a700Sflorian socketmgr = NULL;
18995185a700Sflorian
19005185a700Sflorian /*
19015185a700Sflorian * Wait for all sockets to be destroyed.
19025185a700Sflorian */
19035185a700Sflorian while (!ISC_LIST_EMPTY(manager->socklist)) {
19048b553854Sflorian isc_taskmgr_dispatch(NULL);
19055185a700Sflorian }
19065185a700Sflorian
19075185a700Sflorian /*
19085185a700Sflorian * Here, poke our select/poll thread. Do this by closing the write
19095185a700Sflorian * half of the pipe, which will send EOF to the read half.
19105185a700Sflorian * This is currently a no-op in the non-threaded case.
19115185a700Sflorian */
19125185a700Sflorian select_poke(manager, 0, SELECT_POKE_SHUTDOWN);
19135185a700Sflorian
19145185a700Sflorian /*
19155185a700Sflorian * Clean up.
19165185a700Sflorian */
19175185a700Sflorian cleanup_watcher(manager);
19185185a700Sflorian
19195185a700Sflorian for (i = 0; i < (int)manager->maxsocks; i++)
19205185a700Sflorian if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */
19215185a700Sflorian (void)close(i);
19225185a700Sflorian
19235185a700Sflorian free(manager->fds);
19245185a700Sflorian free(manager->fdstate);
19255185a700Sflorian
19265185a700Sflorian free(manager);
19275185a700Sflorian
19285185a700Sflorian *managerp = NULL;
19295185a700Sflorian
19305185a700Sflorian socketmgr = NULL;
19315185a700Sflorian }
19325185a700Sflorian
19335185a700Sflorian static isc_result_t
socket_recv(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,unsigned int flags)19348b553854Sflorian socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
19355185a700Sflorian unsigned int flags)
19365185a700Sflorian {
19375185a700Sflorian int io_state;
19385185a700Sflorian isc_task_t *ntask = NULL;
19395185a700Sflorian isc_result_t result = ISC_R_SUCCESS;
19405185a700Sflorian
19415185a700Sflorian dev->ev_sender = task;
19425185a700Sflorian
19435185a700Sflorian if (sock->type == isc_sockettype_udp) {
19445185a700Sflorian io_state = doio_recv(sock, dev);
19455185a700Sflorian } else {
19465185a700Sflorian if (ISC_LIST_EMPTY(sock->recv_list))
19475185a700Sflorian io_state = doio_recv(sock, dev);
19485185a700Sflorian else
19495185a700Sflorian io_state = DOIO_SOFT;
19505185a700Sflorian }
19515185a700Sflorian
19525185a700Sflorian switch (io_state) {
19535185a700Sflorian case DOIO_SOFT:
19545185a700Sflorian /*
19555185a700Sflorian * We couldn't read all or part of the request right now, so
19565185a700Sflorian * queue it.
19575185a700Sflorian *
19585185a700Sflorian * Attach to socket and to task
19595185a700Sflorian */
19605185a700Sflorian isc_task_attach(task, &ntask);
19615185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
19625185a700Sflorian
19635185a700Sflorian /*
19645185a700Sflorian * Enqueue the request. If the socket was previously not being
19655185a700Sflorian * watched, poke the watcher to start paying attention to it.
19665185a700Sflorian */
19675185a700Sflorian if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv)
19685185a700Sflorian select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
19695185a700Sflorian ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
19705185a700Sflorian
197157b02e75Sotto socket_log(sock, NULL, EVENT,
19725185a700Sflorian "socket_recv: event %p -> task %p",
19735185a700Sflorian dev, ntask);
19745185a700Sflorian
19755185a700Sflorian if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
19765185a700Sflorian result = ISC_R_INPROGRESS;
19775185a700Sflorian break;
19785185a700Sflorian
19795185a700Sflorian case DOIO_EOF:
19805185a700Sflorian dev->result = ISC_R_EOF;
19815185a700Sflorian /* fallthrough */
19825185a700Sflorian
19835185a700Sflorian case DOIO_HARD:
19845185a700Sflorian case DOIO_SUCCESS:
19855185a700Sflorian if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
19865185a700Sflorian send_recvdone_event(sock, &dev);
19875185a700Sflorian break;
19885185a700Sflorian }
19895185a700Sflorian
19905185a700Sflorian return (result);
19915185a700Sflorian }
19925185a700Sflorian
19935185a700Sflorian isc_result_t
isc_socket_recvv(isc_socket_t * sock0,isc_bufferlist_t * buflist,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)19948b553854Sflorian isc_socket_recvv(isc_socket_t *sock0, isc_bufferlist_t *buflist,
19955185a700Sflorian unsigned int minimum, isc_task_t *task,
19965185a700Sflorian isc_taskaction_t action, void *arg)
19975185a700Sflorian {
19988b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
19995185a700Sflorian isc_socketevent_t *dev;
20005185a700Sflorian unsigned int iocount;
20015185a700Sflorian isc_buffer_t *buffer;
20025185a700Sflorian
20035185a700Sflorian REQUIRE(buflist != NULL);
20045185a700Sflorian REQUIRE(!ISC_LIST_EMPTY(*buflist));
20055185a700Sflorian REQUIRE(task != NULL);
20065185a700Sflorian REQUIRE(action != NULL);
20075185a700Sflorian
20085185a700Sflorian iocount = isc_bufferlist_availablecount(buflist);
20095185a700Sflorian REQUIRE(iocount > 0);
20105185a700Sflorian
20115185a700Sflorian INSIST(sock->bound);
20125185a700Sflorian
20135185a700Sflorian dev = allocate_socketevent(sock,
20145185a700Sflorian ISC_SOCKEVENT_RECVDONE, action, arg);
20155185a700Sflorian if (dev == NULL)
20165185a700Sflorian return (ISC_R_NOMEMORY);
20175185a700Sflorian
20185185a700Sflorian /*
20195185a700Sflorian * UDP sockets are always partial read
20205185a700Sflorian */
20215185a700Sflorian if (sock->type == isc_sockettype_udp)
20225185a700Sflorian dev->minimum = 1;
20235185a700Sflorian else {
20245185a700Sflorian if (minimum == 0)
20255185a700Sflorian dev->minimum = iocount;
20265185a700Sflorian else
20275185a700Sflorian dev->minimum = minimum;
20285185a700Sflorian }
20295185a700Sflorian
20305185a700Sflorian /*
20315185a700Sflorian * Move each buffer from the passed in list to our internal one.
20325185a700Sflorian */
20335185a700Sflorian buffer = ISC_LIST_HEAD(*buflist);
20345185a700Sflorian while (buffer != NULL) {
20355185a700Sflorian ISC_LIST_DEQUEUE(*buflist, buffer, link);
20365185a700Sflorian ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
20375185a700Sflorian buffer = ISC_LIST_HEAD(*buflist);
20385185a700Sflorian }
20395185a700Sflorian
20405185a700Sflorian return (socket_recv(sock, dev, task, 0));
20415185a700Sflorian }
20425185a700Sflorian
20435185a700Sflorian static isc_result_t
socket_send(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,struct sockaddr_storage * address,struct in6_pktinfo * pktinfo,unsigned int flags)20448b553854Sflorian socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2045*b1a294b5Sflorian struct sockaddr_storage *address, struct in6_pktinfo *pktinfo,
20465185a700Sflorian unsigned int flags)
20475185a700Sflorian {
20485185a700Sflorian int io_state;
20495185a700Sflorian isc_task_t *ntask = NULL;
20505185a700Sflorian isc_result_t result = ISC_R_SUCCESS;
20515185a700Sflorian
20525185a700Sflorian dev->ev_sender = task;
20535185a700Sflorian
20545185a700Sflorian set_dev_address(address, sock, dev);
20555185a700Sflorian if (pktinfo != NULL) {
20565185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
20575185a700Sflorian dev->pktinfo = *pktinfo;
20585185a700Sflorian
20595185a700Sflorian if (!isc_sockaddr_issitelocal(&dev->address) &&
20605185a700Sflorian !isc_sockaddr_islinklocal(&dev->address)) {
20615185a700Sflorian socket_log(sock, NULL, TRACE,
20625185a700Sflorian "pktinfo structure provided, ifindex %u "
20635185a700Sflorian "(set to 0)", pktinfo->ipi6_ifindex);
20645185a700Sflorian
20655185a700Sflorian /*
20665185a700Sflorian * Set the pktinfo index to 0 here, to let the
20675185a700Sflorian * kernel decide what interface it should send on.
20685185a700Sflorian */
20695185a700Sflorian dev->pktinfo.ipi6_ifindex = 0;
20705185a700Sflorian }
20715185a700Sflorian }
20725185a700Sflorian
20735185a700Sflorian if (sock->type == isc_sockettype_udp)
20745185a700Sflorian io_state = doio_send(sock, dev);
20755185a700Sflorian else {
20765185a700Sflorian if (ISC_LIST_EMPTY(sock->send_list))
20775185a700Sflorian io_state = doio_send(sock, dev);
20785185a700Sflorian else
20795185a700Sflorian io_state = DOIO_SOFT;
20805185a700Sflorian }
20815185a700Sflorian
20825185a700Sflorian switch (io_state) {
20835185a700Sflorian case DOIO_SOFT:
20845185a700Sflorian /*
20855185a700Sflorian * We couldn't send all or part of the request right now, so
20865185a700Sflorian * queue it unless ISC_SOCKFLAG_NORETRY is set.
20875185a700Sflorian */
20885185a700Sflorian if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
20895185a700Sflorian isc_task_attach(task, &ntask);
20905185a700Sflorian dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
20915185a700Sflorian
20925185a700Sflorian /*
20935185a700Sflorian * Enqueue the request. If the socket was previously
20945185a700Sflorian * not being watched, poke the watcher to start
20955185a700Sflorian * paying attention to it.
20965185a700Sflorian */
20975185a700Sflorian if (ISC_LIST_EMPTY(sock->send_list) &&
20985185a700Sflorian !sock->pending_send)
20995185a700Sflorian select_poke(sock->manager, sock->fd,
21005185a700Sflorian SELECT_POKE_WRITE);
21015185a700Sflorian ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
21025185a700Sflorian
210357b02e75Sotto socket_log(sock, NULL, EVENT,
21045185a700Sflorian "socket_send: event %p -> task %p",
21055185a700Sflorian dev, ntask);
21065185a700Sflorian
21075185a700Sflorian if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
21085185a700Sflorian result = ISC_R_INPROGRESS;
21095185a700Sflorian break;
21105185a700Sflorian }
21115185a700Sflorian
21125185a700Sflorian /* FALLTHROUGH */
21135185a700Sflorian
21145185a700Sflorian case DOIO_HARD:
21155185a700Sflorian case DOIO_SUCCESS:
21165185a700Sflorian if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
21175185a700Sflorian send_senddone_event(sock, &dev);
21185185a700Sflorian break;
21195185a700Sflorian }
21205185a700Sflorian
21215185a700Sflorian return (result);
21225185a700Sflorian }
21235185a700Sflorian
21245185a700Sflorian isc_result_t
isc_socket_sendv(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg)21258b553854Sflorian isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
21265185a700Sflorian isc_task_t *task, isc_taskaction_t action, void *arg)
21275185a700Sflorian {
21288b553854Sflorian return (isc_socket_sendtov2(sock, buflist, task, action, arg, NULL,
21295185a700Sflorian NULL, 0));
21305185a700Sflorian }
21315185a700Sflorian
21325185a700Sflorian isc_result_t
isc_socket_sendtov2(isc_socket_t * sock0,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,struct sockaddr_storage * address,struct in6_pktinfo * pktinfo,unsigned int flags)21338b553854Sflorian isc_socket_sendtov2(isc_socket_t *sock0, isc_bufferlist_t *buflist,
21345185a700Sflorian isc_task_t *task, isc_taskaction_t action, void *arg,
2135*b1a294b5Sflorian struct sockaddr_storage *address, struct in6_pktinfo *pktinfo,
21365185a700Sflorian unsigned int flags)
21375185a700Sflorian {
21388b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
21395185a700Sflorian isc_socketevent_t *dev;
21405185a700Sflorian unsigned int iocount;
21415185a700Sflorian isc_buffer_t *buffer;
21425185a700Sflorian
21435185a700Sflorian REQUIRE(buflist != NULL);
21445185a700Sflorian REQUIRE(!ISC_LIST_EMPTY(*buflist));
21455185a700Sflorian REQUIRE(task != NULL);
21465185a700Sflorian REQUIRE(action != NULL);
21475185a700Sflorian
21485185a700Sflorian iocount = isc_bufferlist_usedcount(buflist);
21495185a700Sflorian REQUIRE(iocount > 0);
21505185a700Sflorian
21515185a700Sflorian dev = allocate_socketevent(sock,
21525185a700Sflorian ISC_SOCKEVENT_SENDDONE, action, arg);
21535185a700Sflorian if (dev == NULL)
21545185a700Sflorian return (ISC_R_NOMEMORY);
21555185a700Sflorian
21565185a700Sflorian /*
21575185a700Sflorian * Move each buffer from the passed in list to our internal one.
21585185a700Sflorian */
21595185a700Sflorian buffer = ISC_LIST_HEAD(*buflist);
21605185a700Sflorian while (buffer != NULL) {
21615185a700Sflorian ISC_LIST_DEQUEUE(*buflist, buffer, link);
21625185a700Sflorian ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
21635185a700Sflorian buffer = ISC_LIST_HEAD(*buflist);
21645185a700Sflorian }
21655185a700Sflorian
21665185a700Sflorian return (socket_send(sock, dev, task, address, pktinfo, flags));
21675185a700Sflorian }
21685185a700Sflorian
21695185a700Sflorian isc_result_t
isc_socket_bind(isc_socket_t * sock0,struct sockaddr_storage * sockaddr,unsigned int options)2170*b1a294b5Sflorian isc_socket_bind(isc_socket_t *sock0, struct sockaddr_storage *sockaddr,
21715185a700Sflorian unsigned int options) {
21728b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
21735185a700Sflorian int on = 1;
21745185a700Sflorian
21755185a700Sflorian INSIST(!sock->bound);
21765185a700Sflorian
2177*b1a294b5Sflorian if (sock->pf != sockaddr->ss_family) {
21785185a700Sflorian return (ISC_R_FAMILYMISMATCH);
21795185a700Sflorian }
21805185a700Sflorian
21815185a700Sflorian /*
21825185a700Sflorian * Only set SO_REUSEADDR when we want a specific port.
21835185a700Sflorian */
21845185a700Sflorian if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
21855185a700Sflorian isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
21865185a700Sflorian setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
21875185a700Sflorian sizeof(on)) < 0) {
21885185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
21895185a700Sflorian "setsockopt(%d) %s", sock->fd, "failed");
21905185a700Sflorian /* Press on... */
21915185a700Sflorian }
2192*b1a294b5Sflorian if (bind(sock->fd, (struct sockaddr *)sockaddr, sockaddr->ss_len) < 0) {
21935185a700Sflorian switch (errno) {
21945185a700Sflorian case EACCES:
21955185a700Sflorian return (ISC_R_NOPERM);
21965185a700Sflorian case EADDRNOTAVAIL:
21975185a700Sflorian return (ISC_R_ADDRNOTAVAIL);
21985185a700Sflorian case EADDRINUSE:
21995185a700Sflorian return (ISC_R_ADDRINUSE);
22005185a700Sflorian case EINVAL:
22015185a700Sflorian return (ISC_R_BOUND);
22025185a700Sflorian default:
22035185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
220440adc7c5Sjung strerror(errno));
22055185a700Sflorian return (ISC_R_UNEXPECTED);
22065185a700Sflorian }
22075185a700Sflorian }
22085185a700Sflorian
22095185a700Sflorian socket_log(sock, sockaddr, TRACE, "bound");
22105185a700Sflorian sock->bound = 1;
22115185a700Sflorian
22125185a700Sflorian return (ISC_R_SUCCESS);
22135185a700Sflorian }
22145185a700Sflorian
22155185a700Sflorian isc_result_t
isc_socket_connect(isc_socket_t * sock0,struct sockaddr_storage * addr,isc_task_t * task,isc_taskaction_t action,void * arg)2216*b1a294b5Sflorian isc_socket_connect(isc_socket_t *sock0, struct sockaddr_storage *addr,
22175185a700Sflorian isc_task_t *task, isc_taskaction_t action, void *arg)
22185185a700Sflorian {
22198b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
22205185a700Sflorian isc_socket_connev_t *dev;
22215185a700Sflorian isc_task_t *ntask = NULL;
22228b553854Sflorian isc_socketmgr_t *manager;
22235185a700Sflorian int cc;
22245185a700Sflorian char addrbuf[ISC_SOCKADDR_FORMATSIZE];
22255185a700Sflorian
22265185a700Sflorian REQUIRE(addr != NULL);
22275185a700Sflorian REQUIRE(task != NULL);
22285185a700Sflorian REQUIRE(action != NULL);
22295185a700Sflorian
22305185a700Sflorian manager = sock->manager;
22315185a700Sflorian REQUIRE(addr != NULL);
22325185a700Sflorian
22335185a700Sflorian if (isc_sockaddr_ismulticast(addr))
22345185a700Sflorian return (ISC_R_MULTICAST);
22355185a700Sflorian
22365185a700Sflorian REQUIRE(!sock->connecting);
22375185a700Sflorian
22385185a700Sflorian dev = (isc_socket_connev_t *)isc_event_allocate(sock,
22395185a700Sflorian ISC_SOCKEVENT_CONNECT,
22405185a700Sflorian action, arg,
22415185a700Sflorian sizeof(*dev));
22425185a700Sflorian if (dev == NULL) {
22435185a700Sflorian return (ISC_R_NOMEMORY);
22445185a700Sflorian }
22455185a700Sflorian ISC_LINK_INIT(dev, ev_link);
22465185a700Sflorian
22475185a700Sflorian /*
22485185a700Sflorian * Try to do the connect right away, as there can be only one
22495185a700Sflorian * outstanding, and it might happen to complete.
22505185a700Sflorian */
22515185a700Sflorian sock->peer_address = *addr;
2252*b1a294b5Sflorian cc = connect(sock->fd, (struct sockaddr *)addr, addr->ss_len);
22535185a700Sflorian if (cc < 0) {
22545185a700Sflorian /*
22555185a700Sflorian * HP-UX "fails" to connect a UDP socket and sets errno to
22565185a700Sflorian * EINPROGRESS if it's non-blocking. We'd rather regard this as
22575185a700Sflorian * a success and let the user detect it if it's really an error
22585185a700Sflorian * at the time of sending a packet on the socket.
22595185a700Sflorian */
22605185a700Sflorian if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) {
22615185a700Sflorian cc = 0;
22625185a700Sflorian goto success;
22635185a700Sflorian }
22645185a700Sflorian if (SOFT_ERROR(errno) || errno == EINPROGRESS)
22655185a700Sflorian goto queue;
22665185a700Sflorian
22675185a700Sflorian switch (errno) {
22685185a700Sflorian #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
22695185a700Sflorian ERROR_MATCH(EACCES, ISC_R_NOPERM);
22705185a700Sflorian ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
22715185a700Sflorian ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
22725185a700Sflorian ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
22735185a700Sflorian ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
22745185a700Sflorian ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
22755185a700Sflorian ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
22765185a700Sflorian ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
22775185a700Sflorian ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
22785185a700Sflorian ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
22795185a700Sflorian ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
22805185a700Sflorian #undef ERROR_MATCH
22815185a700Sflorian }
22825185a700Sflorian
22835185a700Sflorian sock->connected = 0;
22845185a700Sflorian
22855185a700Sflorian isc_sockaddr_format(addr, addrbuf, sizeof(addrbuf));
22865185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__, "connect(%s) %d/%s",
228740adc7c5Sjung addrbuf, errno, strerror(errno));
22885185a700Sflorian
22895185a700Sflorian isc_event_free(ISC_EVENT_PTR(&dev));
22905185a700Sflorian return (ISC_R_UNEXPECTED);
22915185a700Sflorian
22925185a700Sflorian err_exit:
22935185a700Sflorian sock->connected = 0;
22945185a700Sflorian isc_task_send(task, ISC_EVENT_PTR(&dev));
22955185a700Sflorian
22965185a700Sflorian return (ISC_R_SUCCESS);
22975185a700Sflorian }
22985185a700Sflorian
22995185a700Sflorian /*
23005185a700Sflorian * If connect completed, fire off the done event.
23015185a700Sflorian */
23025185a700Sflorian success:
23035185a700Sflorian if (cc == 0) {
23045185a700Sflorian sock->connected = 1;
23055185a700Sflorian sock->bound = 1;
23065185a700Sflorian dev->result = ISC_R_SUCCESS;
23075185a700Sflorian isc_task_send(task, ISC_EVENT_PTR(&dev));
23085185a700Sflorian
23095185a700Sflorian return (ISC_R_SUCCESS);
23105185a700Sflorian }
23115185a700Sflorian
23125185a700Sflorian queue:
23135185a700Sflorian
23145185a700Sflorian /*
23155185a700Sflorian * Attach to task.
23165185a700Sflorian */
23175185a700Sflorian isc_task_attach(task, &ntask);
23185185a700Sflorian
23195185a700Sflorian sock->connecting = 1;
23205185a700Sflorian
23215185a700Sflorian dev->ev_sender = ntask;
23225185a700Sflorian
23235185a700Sflorian /*
23245185a700Sflorian * Poke watcher here. We still have the socket locked, so there
23255185a700Sflorian * is no race condition. We will keep the lock for such a short
23265185a700Sflorian * bit of time waking it up now or later won't matter all that much.
23275185a700Sflorian */
23285185a700Sflorian if (sock->connect_ev == NULL)
23295185a700Sflorian select_poke(manager, sock->fd, SELECT_POKE_CONNECT);
23305185a700Sflorian
23315185a700Sflorian sock->connect_ev = dev;
23325185a700Sflorian
23335185a700Sflorian return (ISC_R_SUCCESS);
23345185a700Sflorian }
23355185a700Sflorian
23365185a700Sflorian /*
23375185a700Sflorian * Called when a socket with a pending connect() finishes.
23385185a700Sflorian */
23395185a700Sflorian static void
internal_connect(isc_task_t * me,isc_event_t * ev)23405185a700Sflorian internal_connect(isc_task_t *me, isc_event_t *ev) {
23418b553854Sflorian isc_socket_t *sock;
23425185a700Sflorian isc_socket_connev_t *dev;
23435185a700Sflorian isc_task_t *task;
23445185a700Sflorian int cc;
23455185a700Sflorian socklen_t optlen;
23465185a700Sflorian char peerbuf[ISC_SOCKADDR_FORMATSIZE];
23475185a700Sflorian
23485185a700Sflorian UNUSED(me);
23495185a700Sflorian INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
23505185a700Sflorian
23515185a700Sflorian sock = ev->ev_sender;
23525185a700Sflorian
23535185a700Sflorian /*
23545185a700Sflorian * When the internal event was sent the reference count was bumped
23555185a700Sflorian * to keep the socket around for us. Decrement the count here.
23565185a700Sflorian */
23575185a700Sflorian INSIST(sock->references > 0);
23585185a700Sflorian sock->references--;
23595185a700Sflorian if (sock->references == 0) {
23605185a700Sflorian destroy(&sock);
23615185a700Sflorian return;
23625185a700Sflorian }
23635185a700Sflorian
23645185a700Sflorian /*
23655185a700Sflorian * Has this event been canceled?
23665185a700Sflorian */
23675185a700Sflorian dev = sock->connect_ev;
23685185a700Sflorian if (dev == NULL) {
23695185a700Sflorian INSIST(!sock->connecting);
23705185a700Sflorian return;
23715185a700Sflorian }
23725185a700Sflorian
23735185a700Sflorian INSIST(sock->connecting);
23745185a700Sflorian sock->connecting = 0;
23755185a700Sflorian
23765185a700Sflorian /*
23775185a700Sflorian * Get any possible error status here.
23785185a700Sflorian */
23795185a700Sflorian optlen = sizeof(cc);
23805185a700Sflorian if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR,
23815185a700Sflorian (void *)&cc, (void *)&optlen) < 0)
23825185a700Sflorian cc = errno;
23835185a700Sflorian else
23845185a700Sflorian errno = cc;
23855185a700Sflorian
23865185a700Sflorian if (errno != 0) {
23875185a700Sflorian /*
23885185a700Sflorian * If the error is EAGAIN, just re-select on this
23895185a700Sflorian * fd and pretend nothing strange happened.
23905185a700Sflorian */
23915185a700Sflorian if (SOFT_ERROR(errno) || errno == EINPROGRESS) {
23925185a700Sflorian sock->connecting = 1;
23935185a700Sflorian select_poke(sock->manager, sock->fd,
23945185a700Sflorian SELECT_POKE_CONNECT);
23955185a700Sflorian return;
23965185a700Sflorian }
23975185a700Sflorian
23985185a700Sflorian /*
23995185a700Sflorian * Translate other errors into ISC_R_* flavors.
24005185a700Sflorian */
24015185a700Sflorian switch (errno) {
24025185a700Sflorian #define ERROR_MATCH(a, b) case a: dev->result = b; break;
24035185a700Sflorian ERROR_MATCH(EACCES, ISC_R_NOPERM);
24045185a700Sflorian ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
24055185a700Sflorian ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
24065185a700Sflorian ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
24075185a700Sflorian ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
24085185a700Sflorian ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
24095185a700Sflorian ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
24105185a700Sflorian ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
24115185a700Sflorian ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
24125185a700Sflorian ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
24135185a700Sflorian ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT);
24145185a700Sflorian ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
24155185a700Sflorian #undef ERROR_MATCH
24165185a700Sflorian default:
24175185a700Sflorian dev->result = ISC_R_UNEXPECTED;
24185185a700Sflorian isc_sockaddr_format(&sock->peer_address, peerbuf,
24195185a700Sflorian sizeof(peerbuf));
24205185a700Sflorian UNEXPECTED_ERROR(__FILE__, __LINE__,
24215185a700Sflorian "internal_connect: connect(%s) %s",
242240adc7c5Sjung peerbuf, strerror(errno));
24235185a700Sflorian }
24245185a700Sflorian } else {
24255185a700Sflorian dev->result = ISC_R_SUCCESS;
24265185a700Sflorian sock->connected = 1;
24275185a700Sflorian sock->bound = 1;
24285185a700Sflorian }
24295185a700Sflorian
24305185a700Sflorian sock->connect_ev = NULL;
24315185a700Sflorian
24325185a700Sflorian task = dev->ev_sender;
24335185a700Sflorian dev->ev_sender = sock;
24345185a700Sflorian isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev));
24355185a700Sflorian }
24365185a700Sflorian
24375185a700Sflorian /*
24385185a700Sflorian * Run through the list of events on this socket, and cancel the ones
24395185a700Sflorian * queued for task "task" of type "how". "how" is a bitmask.
24405185a700Sflorian */
24415185a700Sflorian void
isc_socket_cancel(isc_socket_t * sock0,isc_task_t * task,unsigned int how)24428b553854Sflorian isc_socket_cancel(isc_socket_t *sock0, isc_task_t *task, unsigned int how) {
24438b553854Sflorian isc_socket_t *sock = (isc_socket_t *)sock0;
24445185a700Sflorian
24455185a700Sflorian /*
24465185a700Sflorian * Quick exit if there is nothing to do. Don't even bother locking
24475185a700Sflorian * in this case.
24485185a700Sflorian */
24495185a700Sflorian if (how == 0)
24505185a700Sflorian return;
24515185a700Sflorian
24525185a700Sflorian /*
24535185a700Sflorian * All of these do the same thing, more or less.
24545185a700Sflorian * Each will:
24555185a700Sflorian * o If the internal event is marked as "posted" try to
24565185a700Sflorian * remove it from the task's queue. If this fails, mark it
24575185a700Sflorian * as canceled instead, and let the task clean it up later.
24585185a700Sflorian * o For each I/O request for that task of that type, post
24595185a700Sflorian * its done event with status of "ISC_R_CANCELED".
24605185a700Sflorian * o Reset any state needed.
24615185a700Sflorian */
24625185a700Sflorian if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV)
24635185a700Sflorian && !ISC_LIST_EMPTY(sock->recv_list)) {
24645185a700Sflorian isc_socketevent_t *dev;
24655185a700Sflorian isc_socketevent_t *next;
24665185a700Sflorian isc_task_t *current_task;
24675185a700Sflorian
24685185a700Sflorian dev = ISC_LIST_HEAD(sock->recv_list);
24695185a700Sflorian
24705185a700Sflorian while (dev != NULL) {
24715185a700Sflorian current_task = dev->ev_sender;
24725185a700Sflorian next = ISC_LIST_NEXT(dev, ev_link);
24735185a700Sflorian
24745185a700Sflorian if ((task == NULL) || (task == current_task)) {
24755185a700Sflorian dev->result = ISC_R_CANCELED;
24765185a700Sflorian send_recvdone_event(sock, &dev);
24775185a700Sflorian }
24785185a700Sflorian dev = next;
24795185a700Sflorian }
24805185a700Sflorian }
24815185a700Sflorian
24825185a700Sflorian if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND)
24835185a700Sflorian && !ISC_LIST_EMPTY(sock->send_list)) {
24845185a700Sflorian isc_socketevent_t *dev;
24855185a700Sflorian isc_socketevent_t *next;
24865185a700Sflorian isc_task_t *current_task;
24875185a700Sflorian
24885185a700Sflorian dev = ISC_LIST_HEAD(sock->send_list);
24895185a700Sflorian
24905185a700Sflorian while (dev != NULL) {
24915185a700Sflorian current_task = dev->ev_sender;
24925185a700Sflorian next = ISC_LIST_NEXT(dev, ev_link);
24935185a700Sflorian
24945185a700Sflorian if ((task == NULL) || (task == current_task)) {
24955185a700Sflorian dev->result = ISC_R_CANCELED;
24965185a700Sflorian send_senddone_event(sock, &dev);
24975185a700Sflorian }
24985185a700Sflorian dev = next;
24995185a700Sflorian }
25005185a700Sflorian }
25015185a700Sflorian
25025185a700Sflorian /*
25035185a700Sflorian * Connecting is not a list.
25045185a700Sflorian */
25055185a700Sflorian if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
25065185a700Sflorian && sock->connect_ev != NULL) {
25075185a700Sflorian isc_socket_connev_t *dev;
25085185a700Sflorian isc_task_t *current_task;
25095185a700Sflorian
25105185a700Sflorian INSIST(sock->connecting);
25115185a700Sflorian sock->connecting = 0;
25125185a700Sflorian
25135185a700Sflorian dev = sock->connect_ev;
25145185a700Sflorian current_task = dev->ev_sender;
25155185a700Sflorian
25165185a700Sflorian if ((task == NULL) || (task == current_task)) {
25175185a700Sflorian sock->connect_ev = NULL;
25185185a700Sflorian
25195185a700Sflorian dev->result = ISC_R_CANCELED;
25205185a700Sflorian dev->ev_sender = sock;
25215185a700Sflorian isc_task_sendanddetach(¤t_task,
25225185a700Sflorian ISC_EVENT_PTR(&dev));
25235185a700Sflorian }
25245185a700Sflorian }
25255185a700Sflorian
25265185a700Sflorian }
25275185a700Sflorian
25285185a700Sflorian /*
25295185a700Sflorian * In our assumed scenario, we can simply use a single static object.
25305185a700Sflorian * XXX: this is not true if the application uses multiple threads with
25315185a700Sflorian * 'multi-context' mode. Fixing this is a future TODO item.
25325185a700Sflorian */
25335185a700Sflorian static isc_socketwait_t swait_private;
25345185a700Sflorian
25355185a700Sflorian int
isc_socketmgr_waitevents(isc_socketmgr_t * manager0,struct timeval * tvp,isc_socketwait_t ** swaitp)25368b553854Sflorian isc_socketmgr_waitevents(isc_socketmgr_t *manager0, struct timeval *tvp,
25375185a700Sflorian isc_socketwait_t **swaitp)
25385185a700Sflorian {
25398b553854Sflorian isc_socketmgr_t *manager = (isc_socketmgr_t *)manager0;
25405185a700Sflorian int n;
25415185a700Sflorian
25425185a700Sflorian REQUIRE(swaitp != NULL && *swaitp == NULL);
25435185a700Sflorian
25445185a700Sflorian if (manager == NULL)
25455185a700Sflorian manager = socketmgr;
25465185a700Sflorian if (manager == NULL)
25475185a700Sflorian return (0);
25485185a700Sflorian
25495185a700Sflorian memmove(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize);
25505185a700Sflorian memmove(manager->write_fds_copy, manager->write_fds,
25515185a700Sflorian manager->fd_bufsize);
25525185a700Sflorian
25535185a700Sflorian swait_private.readset = manager->read_fds_copy;
25545185a700Sflorian swait_private.writeset = manager->write_fds_copy;
25555185a700Sflorian swait_private.maxfd = manager->maxfd + 1;
25565185a700Sflorian
25575185a700Sflorian n = select(swait_private.maxfd, swait_private.readset,
25585185a700Sflorian swait_private.writeset, NULL, tvp);
25595185a700Sflorian
25605185a700Sflorian *swaitp = &swait_private;
25615185a700Sflorian return (n);
25625185a700Sflorian }
25635185a700Sflorian
25645185a700Sflorian isc_result_t
isc_socketmgr_dispatch(isc_socketmgr_t * manager0,isc_socketwait_t * swait)25658b553854Sflorian isc_socketmgr_dispatch(isc_socketmgr_t *manager0, isc_socketwait_t *swait) {
25668b553854Sflorian isc_socketmgr_t *manager = (isc_socketmgr_t *)manager0;
25675185a700Sflorian
25685185a700Sflorian REQUIRE(swait == &swait_private);
25695185a700Sflorian
25705185a700Sflorian if (manager == NULL)
25715185a700Sflorian manager = socketmgr;
25725185a700Sflorian if (manager == NULL)
25735185a700Sflorian return (ISC_R_NOTFOUND);
25745185a700Sflorian
25755185a700Sflorian process_fds(manager, swait->maxfd, swait->readset, swait->writeset);
25765185a700Sflorian return (ISC_R_SUCCESS);
25775185a700Sflorian }
2578