1933707f3Ssthen /* 2933707f3Ssthen * util/netevent.c - event notification 3933707f3Ssthen * 4933707f3Ssthen * Copyright (c) 2007, NLnet Labs. All rights reserved. 5933707f3Ssthen * 6933707f3Ssthen * This software is open source. 7933707f3Ssthen * 8933707f3Ssthen * Redistribution and use in source and binary forms, with or without 9933707f3Ssthen * modification, are permitted provided that the following conditions 10933707f3Ssthen * are met: 11933707f3Ssthen * 12933707f3Ssthen * Redistributions of source code must retain the above copyright notice, 13933707f3Ssthen * this list of conditions and the following disclaimer. 14933707f3Ssthen * 15933707f3Ssthen * Redistributions in binary form must reproduce the above copyright notice, 16933707f3Ssthen * this list of conditions and the following disclaimer in the documentation 17933707f3Ssthen * and/or other materials provided with the distribution. 18933707f3Ssthen * 19933707f3Ssthen * Neither the name of the NLNET LABS nor the names of its contributors may 20933707f3Ssthen * be used to endorse or promote products derived from this software without 21933707f3Ssthen * specific prior written permission. 22933707f3Ssthen * 23933707f3Ssthen * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 240b68ff31Ssthen * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 250b68ff31Ssthen * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 260b68ff31Ssthen * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 270b68ff31Ssthen * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 280b68ff31Ssthen * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 290b68ff31Ssthen * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 300b68ff31Ssthen * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 310b68ff31Ssthen * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 320b68ff31Ssthen * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 330b68ff31Ssthen * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34933707f3Ssthen */ 35933707f3Ssthen 36933707f3Ssthen /** 37933707f3Ssthen * \file 38933707f3Ssthen * 39933707f3Ssthen * This file contains event notification functions. 40933707f3Ssthen */ 41933707f3Ssthen #include "config.h" 42933707f3Ssthen #include "util/netevent.h" 432ee382b6Ssthen #include "util/ub_event.h" 44933707f3Ssthen #include "util/log.h" 45933707f3Ssthen #include "util/net_help.h" 462308e98cSsthen #include "util/tcp_conn_limit.h" 47933707f3Ssthen #include "util/fptr_wlist.h" 4845872187Ssthen #include "util/proxy_protocol.h" 498b7325afSsthen #include "util/timeval_func.h" 50a58bff56Ssthen #include "sldns/pkthdr.h" 51a58bff56Ssthen #include "sldns/sbuffer.h" 5220237c55Ssthen #include "sldns/str2wire.h" 53e10d3884Sbrad #include "dnstap/dnstap.h" 542be9e038Ssthen #include "dnscrypt/dnscrypt.h" 55f6b99bafSsthen #include "services/listen_dnsport.h" 56191f22c6Ssthen #ifdef HAVE_SYS_TYPES_H 57191f22c6Ssthen #include <sys/types.h> 58191f22c6Ssthen #endif 59191f22c6Ssthen #ifdef HAVE_SYS_SOCKET_H 60191f22c6Ssthen #include <sys/socket.h> 61191f22c6Ssthen #endif 62191f22c6Ssthen #ifdef HAVE_NETDB_H 63191f22c6Ssthen #include <netdb.h> 64191f22c6Ssthen #endif 6545872187Ssthen #ifdef HAVE_POLL_H 6645872187Ssthen #include <poll.h> 6745872187Ssthen #endif 68191f22c6Ssthen 69cebdf579Ssthen #ifdef HAVE_OPENSSL_SSL_H 70933707f3Ssthen #include <openssl/ssl.h> 71cebdf579Ssthen #endif 72cebdf579Ssthen #ifdef HAVE_OPENSSL_ERR_H 73933707f3Ssthen #include <openssl/err.h> 74cebdf579Ssthen #endif 758b7325afSsthen #ifdef HAVE_LINUX_NET_TSTAMP_H 768b7325afSsthen #include <linux/net_tstamp.h> 778b7325afSsthen #endif 78933707f3Ssthen /* -------- Start of local definitions -------- */ 79933707f3Ssthen /** if CMSG_ALIGN is not defined on this platform, a workaround */ 80933707f3Ssthen #ifndef CMSG_ALIGN 8132e31f52Ssthen # ifdef __CMSG_ALIGN 8232e31f52Ssthen # define CMSG_ALIGN(n) __CMSG_ALIGN(n) 8332e31f52Ssthen # elif defined(CMSG_DATA_ALIGN) 84933707f3Ssthen # define CMSG_ALIGN _CMSG_DATA_ALIGN 85933707f3Ssthen # else 86933707f3Ssthen # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1)) 87933707f3Ssthen # endif 88933707f3Ssthen #endif 89933707f3Ssthen 90933707f3Ssthen /** if CMSG_LEN is not defined on this platform, a workaround */ 91933707f3Ssthen #ifndef CMSG_LEN 92933707f3Ssthen # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len)) 93933707f3Ssthen #endif 94933707f3Ssthen 95933707f3Ssthen /** if CMSG_SPACE is not defined on this platform, a workaround */ 96933707f3Ssthen #ifndef CMSG_SPACE 97933707f3Ssthen # ifdef _CMSG_HDR_ALIGN 98933707f3Ssthen # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr))) 99933707f3Ssthen # else 100933707f3Ssthen # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr))) 101933707f3Ssthen # endif 102933707f3Ssthen #endif 103933707f3Ssthen 1042308e98cSsthen /** The TCP writing query timeout in milliseconds */ 10577079be7Ssthen #define TCP_QUERY_TIMEOUT 120000 1062308e98cSsthen /** The minimum actual TCP timeout to use, regardless of what we advertise, 1072308e98cSsthen * in msec */ 1082308e98cSsthen #define TCP_QUERY_TIMEOUT_MINIMUM 200 109933707f3Ssthen 110933707f3Ssthen #ifndef NONBLOCKING_IS_BROKEN 111933707f3Ssthen /** number of UDP reads to perform per read indication from select */ 112933707f3Ssthen #define NUM_UDP_PER_SELECT 100 113933707f3Ssthen #else 114933707f3Ssthen #define NUM_UDP_PER_SELECT 1 115933707f3Ssthen #endif 116933707f3Ssthen 11745872187Ssthen /** timeout in millisec to wait for write to unblock, packets dropped after.*/ 11845872187Ssthen #define SEND_BLOCKED_WAIT_TIMEOUT 200 119afda2c03Sflorian /** max number of times to wait for write to unblock, packets dropped after.*/ 120afda2c03Sflorian #define SEND_BLOCKED_MAX_RETRY 5 12145872187Ssthen 1228b7325afSsthen /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ 1238b7325afSsthen #ifndef SO_TIMESTAMP 1248b7325afSsthen #define SO_TIMESTAMP 29 1258b7325afSsthen #endif 1268b7325afSsthen #ifndef SO_TIMESTAMPNS 1278b7325afSsthen #define SO_TIMESTAMPNS 35 1288b7325afSsthen #endif 1298b7325afSsthen #ifndef SO_TIMESTAMPING 1308b7325afSsthen #define SO_TIMESTAMPING 37 1318b7325afSsthen #endif 132933707f3Ssthen /** 1332ee382b6Ssthen * The internal event structure for keeping ub_event info for the event. 134933707f3Ssthen * Possibly other structures (list, tree) this is part of. 135933707f3Ssthen */ 136933707f3Ssthen struct internal_event { 137933707f3Ssthen /** the comm base */ 138933707f3Ssthen struct comm_base* base; 1392ee382b6Ssthen /** ub_event event type */ 1402ee382b6Ssthen struct ub_event* ev; 141933707f3Ssthen }; 142933707f3Ssthen 143933707f3Ssthen /** 144933707f3Ssthen * Internal base structure, so that every thread has its own events. 145933707f3Ssthen */ 146933707f3Ssthen struct internal_base { 1472ee382b6Ssthen /** ub_event event_base type. */ 1482ee382b6Ssthen struct ub_event_base* base; 149933707f3Ssthen /** seconds time pointer points here */ 150e9c7b4efSsthen time_t secs; 151933707f3Ssthen /** timeval with current time */ 152933707f3Ssthen struct timeval now; 153af4988b1Ssthen /** the event used for slow_accept timeouts */ 1542ee382b6Ssthen struct ub_event* slow_accept; 155af4988b1Ssthen /** true if slow_accept is enabled */ 156af4988b1Ssthen int slow_accept_enabled; 15745872187Ssthen /** last log time for slow logging of file descriptor errors */ 15845872187Ssthen time_t last_slow_log; 15945872187Ssthen /** last log time for slow logging of write wait failures */ 16045872187Ssthen time_t last_writewait_log; 161933707f3Ssthen }; 162933707f3Ssthen 163933707f3Ssthen /** 164933707f3Ssthen * Internal timer structure, to store timer event in. 165933707f3Ssthen */ 166933707f3Ssthen struct internal_timer { 1672ee382b6Ssthen /** the super struct from which derived */ 1682ee382b6Ssthen struct comm_timer super; 169933707f3Ssthen /** the comm base */ 170933707f3Ssthen struct comm_base* base; 1712ee382b6Ssthen /** ub_event event type */ 1722ee382b6Ssthen struct ub_event* ev; 173933707f3Ssthen /** is timer enabled */ 174933707f3Ssthen uint8_t enabled; 175933707f3Ssthen }; 176933707f3Ssthen 177933707f3Ssthen /** 178933707f3Ssthen * Internal signal structure, to store signal event in. 179933707f3Ssthen */ 180933707f3Ssthen struct internal_signal { 1812ee382b6Ssthen /** ub_event event type */ 1822ee382b6Ssthen struct ub_event* ev; 183933707f3Ssthen /** next in signal list */ 184933707f3Ssthen struct internal_signal* next; 185933707f3Ssthen }; 186933707f3Ssthen 187933707f3Ssthen /** create a tcp handler with a parent */ 188933707f3Ssthen static struct comm_point* comm_point_create_tcp_handler( 189933707f3Ssthen struct comm_base *base, struct comm_point* parent, size_t bufsize, 190f6b99bafSsthen struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 191191f22c6Ssthen void* callback_arg, struct unbound_socket* socket); 192933707f3Ssthen 193933707f3Ssthen /* -------- End of local definitions -------- */ 194933707f3Ssthen 195933707f3Ssthen struct comm_base* 196933707f3Ssthen comm_base_create(int sigs) 197933707f3Ssthen { 198933707f3Ssthen struct comm_base* b = (struct comm_base*)calloc(1, 199933707f3Ssthen sizeof(struct comm_base)); 2002ee382b6Ssthen const char *evnm="event", *evsys="", *evmethod=""; 2012ee382b6Ssthen 202933707f3Ssthen if(!b) 203933707f3Ssthen return NULL; 204933707f3Ssthen b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 205933707f3Ssthen if(!b->eb) { 206933707f3Ssthen free(b); 207933707f3Ssthen return NULL; 208933707f3Ssthen } 2092ee382b6Ssthen b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now); 210933707f3Ssthen if(!b->eb->base) { 211933707f3Ssthen free(b->eb); 212933707f3Ssthen free(b); 213933707f3Ssthen return NULL; 214933707f3Ssthen } 2152ee382b6Ssthen ub_comm_base_now(b); 2162ee382b6Ssthen ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod); 217550cf4a9Ssthen verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod); 218933707f3Ssthen return b; 219933707f3Ssthen } 220933707f3Ssthen 2210b68ff31Ssthen struct comm_base* 2222ee382b6Ssthen comm_base_create_event(struct ub_event_base* base) 2230b68ff31Ssthen { 2240b68ff31Ssthen struct comm_base* b = (struct comm_base*)calloc(1, 2250b68ff31Ssthen sizeof(struct comm_base)); 2260b68ff31Ssthen if(!b) 2270b68ff31Ssthen return NULL; 2280b68ff31Ssthen b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base)); 2290b68ff31Ssthen if(!b->eb) { 2300b68ff31Ssthen free(b); 2310b68ff31Ssthen return NULL; 2320b68ff31Ssthen } 2330b68ff31Ssthen b->eb->base = base; 2342ee382b6Ssthen ub_comm_base_now(b); 2350b68ff31Ssthen return b; 2360b68ff31Ssthen } 2370b68ff31Ssthen 238933707f3Ssthen void 239933707f3Ssthen comm_base_delete(struct comm_base* b) 240933707f3Ssthen { 241933707f3Ssthen if(!b) 242933707f3Ssthen return; 243af4988b1Ssthen if(b->eb->slow_accept_enabled) { 2442ee382b6Ssthen if(ub_event_del(b->eb->slow_accept) != 0) { 245af4988b1Ssthen log_err("could not event_del slow_accept"); 246af4988b1Ssthen } 2472ee382b6Ssthen ub_event_free(b->eb->slow_accept); 248af4988b1Ssthen } 2492ee382b6Ssthen ub_event_base_free(b->eb->base); 250933707f3Ssthen b->eb->base = NULL; 251933707f3Ssthen free(b->eb); 252933707f3Ssthen free(b); 253933707f3Ssthen } 254933707f3Ssthen 255933707f3Ssthen void 2560b68ff31Ssthen comm_base_delete_no_base(struct comm_base* b) 2570b68ff31Ssthen { 2580b68ff31Ssthen if(!b) 2590b68ff31Ssthen return; 2600b68ff31Ssthen if(b->eb->slow_accept_enabled) { 2612ee382b6Ssthen if(ub_event_del(b->eb->slow_accept) != 0) { 2620b68ff31Ssthen log_err("could not event_del slow_accept"); 2630b68ff31Ssthen } 2642ee382b6Ssthen ub_event_free(b->eb->slow_accept); 2650b68ff31Ssthen } 2660b68ff31Ssthen b->eb->base = NULL; 2670b68ff31Ssthen free(b->eb); 2680b68ff31Ssthen free(b); 2690b68ff31Ssthen } 2700b68ff31Ssthen 2710b68ff31Ssthen void 272e9c7b4efSsthen comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv) 273933707f3Ssthen { 274933707f3Ssthen *tt = &b->eb->secs; 275933707f3Ssthen *tv = &b->eb->now; 276933707f3Ssthen } 277933707f3Ssthen 278933707f3Ssthen void 279933707f3Ssthen comm_base_dispatch(struct comm_base* b) 280933707f3Ssthen { 281933707f3Ssthen int retval; 2822ee382b6Ssthen retval = ub_event_base_dispatch(b->eb->base); 2832ee382b6Ssthen if(retval < 0) { 284933707f3Ssthen fatal_exit("event_dispatch returned error %d, " 285933707f3Ssthen "errno is %s", retval, strerror(errno)); 286933707f3Ssthen } 287933707f3Ssthen } 288933707f3Ssthen 289933707f3Ssthen void comm_base_exit(struct comm_base* b) 290933707f3Ssthen { 2912ee382b6Ssthen if(ub_event_base_loopexit(b->eb->base) != 0) { 292933707f3Ssthen log_err("Could not loopexit"); 293933707f3Ssthen } 294933707f3Ssthen } 295933707f3Ssthen 296af4988b1Ssthen void comm_base_set_slow_accept_handlers(struct comm_base* b, 297af4988b1Ssthen void (*stop_acc)(void*), void (*start_acc)(void*), void* arg) 298af4988b1Ssthen { 299af4988b1Ssthen b->stop_accept = stop_acc; 300af4988b1Ssthen b->start_accept = start_acc; 301af4988b1Ssthen b->cb_arg = arg; 302af4988b1Ssthen } 303af4988b1Ssthen 3042ee382b6Ssthen struct ub_event_base* comm_base_internal(struct comm_base* b) 305933707f3Ssthen { 306933707f3Ssthen return b->eb->base; 307933707f3Ssthen } 308933707f3Ssthen 309933707f3Ssthen /** see if errno for udp has to be logged or not uses globals */ 310933707f3Ssthen static int 311933707f3Ssthen udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 312933707f3Ssthen { 313933707f3Ssthen /* do not log transient errors (unless high verbosity) */ 314933707f3Ssthen #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN) 315933707f3Ssthen switch(errno) { 316933707f3Ssthen # ifdef ENETUNREACH 317933707f3Ssthen case ENETUNREACH: 318933707f3Ssthen # endif 319933707f3Ssthen # ifdef EHOSTDOWN 320933707f3Ssthen case EHOSTDOWN: 321933707f3Ssthen # endif 322933707f3Ssthen # ifdef EHOSTUNREACH 323933707f3Ssthen case EHOSTUNREACH: 324933707f3Ssthen # endif 325933707f3Ssthen # ifdef ENETDOWN 326933707f3Ssthen case ENETDOWN: 327933707f3Ssthen # endif 328191f22c6Ssthen case EPERM: 329e21c60efSsthen case EACCES: 330933707f3Ssthen if(verbosity < VERB_ALGO) 331933707f3Ssthen return 0; 332*98bc733bSsthen break; 333933707f3Ssthen default: 334933707f3Ssthen break; 335933707f3Ssthen } 336933707f3Ssthen #endif 337e9c7b4efSsthen /* permission denied is gotten for every send if the 338e9c7b4efSsthen * network is disconnected (on some OS), squelch it */ 339847d0c57Ssthen if( ((errno == EPERM) 340847d0c57Ssthen # ifdef EADDRNOTAVAIL 341847d0c57Ssthen /* 'Cannot assign requested address' also when disconnected */ 342847d0c57Ssthen || (errno == EADDRNOTAVAIL) 343847d0c57Ssthen # endif 344191f22c6Ssthen ) && verbosity < VERB_ALGO) 345e9c7b4efSsthen return 0; 346938a3a5eSflorian # ifdef EADDRINUSE 347938a3a5eSflorian /* If SO_REUSEADDR is set, we could try to connect to the same server 348938a3a5eSflorian * from the same source port twice. */ 349938a3a5eSflorian if(errno == EADDRINUSE && verbosity < VERB_DETAIL) 350938a3a5eSflorian return 0; 351938a3a5eSflorian # endif 352933707f3Ssthen /* squelch errors where people deploy AAAA ::ffff:bla for 353933707f3Ssthen * authority servers, which we try for intranets. */ 354933707f3Ssthen if(errno == EINVAL && addr_is_ip4mapped( 355933707f3Ssthen (struct sockaddr_storage*)addr, addrlen) && 356933707f3Ssthen verbosity < VERB_DETAIL) 357933707f3Ssthen return 0; 358933707f3Ssthen /* SO_BROADCAST sockopt can give access to 255.255.255.255, 359933707f3Ssthen * but a dns cache does not need it. */ 360933707f3Ssthen if(errno == EACCES && addr_is_broadcast( 361933707f3Ssthen (struct sockaddr_storage*)addr, addrlen) && 362933707f3Ssthen verbosity < VERB_DETAIL) 363933707f3Ssthen return 0; 364933707f3Ssthen return 1; 365933707f3Ssthen } 366933707f3Ssthen 367163a4143Ssthen int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen) 368163a4143Ssthen { 369163a4143Ssthen return udp_send_errno_needs_log(addr, addrlen); 370163a4143Ssthen } 371163a4143Ssthen 372933707f3Ssthen /* send a UDP reply */ 373933707f3Ssthen int 3740b68ff31Ssthen comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, 3759982a05dSsthen struct sockaddr* addr, socklen_t addrlen, int is_connected) 376933707f3Ssthen { 377933707f3Ssthen ssize_t sent; 378933707f3Ssthen log_assert(c->fd != -1); 379933707f3Ssthen #ifdef UNBOUND_DEBUG 3800b68ff31Ssthen if(sldns_buffer_remaining(packet) == 0) 381933707f3Ssthen log_err("error: send empty UDP packet"); 382933707f3Ssthen #endif 383933707f3Ssthen log_assert(addr && addrlen > 0); 3849982a05dSsthen if(!is_connected) { 3850b68ff31Ssthen sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 3860b68ff31Ssthen sldns_buffer_remaining(packet), 0, 387933707f3Ssthen addr, addrlen); 388eba819a2Ssthen } else { 389eba819a2Ssthen sent = send(c->fd, (void*)sldns_buffer_begin(packet), 390eba819a2Ssthen sldns_buffer_remaining(packet), 0); 391eba819a2Ssthen } 392933707f3Ssthen if(sent == -1) { 39332e31f52Ssthen /* try again and block, waiting for IO to complete, 39432e31f52Ssthen * we want to send the answer, and we will wait for 39532e31f52Ssthen * the ethernet interface buffer to have space. */ 39632e31f52Ssthen #ifndef USE_WINSOCK 39745872187Ssthen if(errno == EAGAIN || errno == EINTR || 39832e31f52Ssthen # ifdef EWOULDBLOCK 39932e31f52Ssthen errno == EWOULDBLOCK || 40032e31f52Ssthen # endif 40132e31f52Ssthen errno == ENOBUFS) { 40232e31f52Ssthen #else 40332e31f52Ssthen if(WSAGetLastError() == WSAEINPROGRESS || 40445872187Ssthen WSAGetLastError() == WSAEINTR || 40532e31f52Ssthen WSAGetLastError() == WSAENOBUFS || 40632e31f52Ssthen WSAGetLastError() == WSAEWOULDBLOCK) { 40732e31f52Ssthen #endif 408afda2c03Sflorian int retries = 0; 40945872187Ssthen /* if we set the fd blocking, other threads suddenly 41045872187Ssthen * have a blocking fd that they operate on */ 411afda2c03Sflorian while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 41245872187Ssthen #ifndef USE_WINSOCK 41345872187Ssthen errno == EAGAIN || errno == EINTR || 41445872187Ssthen # ifdef EWOULDBLOCK 41545872187Ssthen errno == EWOULDBLOCK || 41645872187Ssthen # endif 41745872187Ssthen errno == ENOBUFS 41845872187Ssthen #else 41945872187Ssthen WSAGetLastError() == WSAEINPROGRESS || 42045872187Ssthen WSAGetLastError() == WSAEINTR || 42145872187Ssthen WSAGetLastError() == WSAENOBUFS || 42245872187Ssthen WSAGetLastError() == WSAEWOULDBLOCK 42345872187Ssthen #endif 42445872187Ssthen )) { 42545872187Ssthen #if defined(HAVE_POLL) || defined(USE_WINSOCK) 426afda2c03Sflorian int send_nobufs = ( 427afda2c03Sflorian #ifndef USE_WINSOCK 428afda2c03Sflorian errno == ENOBUFS 429afda2c03Sflorian #else 430afda2c03Sflorian WSAGetLastError() == WSAENOBUFS 431afda2c03Sflorian #endif 432afda2c03Sflorian ); 43345872187Ssthen struct pollfd p; 43445872187Ssthen int pret; 43545872187Ssthen memset(&p, 0, sizeof(p)); 43645872187Ssthen p.fd = c->fd; 43745872187Ssthen p.events = POLLOUT | POLLERR | POLLHUP; 43845872187Ssthen # ifndef USE_WINSOCK 43945872187Ssthen pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 44045872187Ssthen # else 44145872187Ssthen pret = WSAPoll(&p, 1, 44245872187Ssthen SEND_BLOCKED_WAIT_TIMEOUT); 44345872187Ssthen # endif 44445872187Ssthen if(pret == 0) { 44545872187Ssthen /* timer expired */ 44645872187Ssthen struct comm_base* b = c->ev->base; 44745872187Ssthen if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 44845872187Ssthen b->eb->secs) { 44945872187Ssthen b->eb->last_writewait_log = b->eb->secs; 45045872187Ssthen verbose(VERB_OPS, "send udp blocked " 45145872187Ssthen "for long, dropping packet."); 45245872187Ssthen } 45345872187Ssthen return 0; 45445872187Ssthen } else if(pret < 0 && 45545872187Ssthen #ifndef USE_WINSOCK 45645872187Ssthen errno != EAGAIN && errno != EINTR && 45745872187Ssthen # ifdef EWOULDBLOCK 45845872187Ssthen errno != EWOULDBLOCK && 45945872187Ssthen # endif 46045872187Ssthen errno != ENOBUFS 46145872187Ssthen #else 46245872187Ssthen WSAGetLastError() != WSAEINPROGRESS && 46345872187Ssthen WSAGetLastError() != WSAEINTR && 46445872187Ssthen WSAGetLastError() != WSAENOBUFS && 46545872187Ssthen WSAGetLastError() != WSAEWOULDBLOCK 46645872187Ssthen #endif 46745872187Ssthen ) { 46845872187Ssthen log_err("poll udp out failed: %s", 46945872187Ssthen sock_strerror(errno)); 47045872187Ssthen return 0; 471afda2c03Sflorian } else if((pret < 0 && 472afda2c03Sflorian #ifndef USE_WINSOCK 473afda2c03Sflorian errno == ENOBUFS 474afda2c03Sflorian #else 475afda2c03Sflorian WSAGetLastError() == WSAENOBUFS 476afda2c03Sflorian #endif 477afda2c03Sflorian ) || (send_nobufs && retries > 0)) { 478afda2c03Sflorian /* ENOBUFS, and poll returned without 479afda2c03Sflorian * a timeout. Or the retried send call 480afda2c03Sflorian * returned ENOBUFS. It is good to 481afda2c03Sflorian * wait a bit for the error to clear. */ 482afda2c03Sflorian /* The timeout is 20*(2^(retries+1)), 483afda2c03Sflorian * it increases exponentially, starting 484afda2c03Sflorian * at 40 msec. After 5 tries, 1240 msec 485afda2c03Sflorian * have passed in total, when poll 486afda2c03Sflorian * returned the error, and 1200 msec 487afda2c03Sflorian * when send returned the errors. */ 488afda2c03Sflorian #ifndef USE_WINSOCK 489afda2c03Sflorian pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 490afda2c03Sflorian #else 491afda2c03Sflorian pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 492afda2c03Sflorian #endif 493afda2c03Sflorian if(pret < 0 && 494afda2c03Sflorian #ifndef USE_WINSOCK 495afda2c03Sflorian errno != EAGAIN && errno != EINTR && 496afda2c03Sflorian # ifdef EWOULDBLOCK 497afda2c03Sflorian errno != EWOULDBLOCK && 498afda2c03Sflorian # endif 499afda2c03Sflorian errno != ENOBUFS 500afda2c03Sflorian #else 501afda2c03Sflorian WSAGetLastError() != WSAEINPROGRESS && 502afda2c03Sflorian WSAGetLastError() != WSAEINTR && 503afda2c03Sflorian WSAGetLastError() != WSAENOBUFS && 504afda2c03Sflorian WSAGetLastError() != WSAEWOULDBLOCK 505afda2c03Sflorian #endif 506afda2c03Sflorian ) { 507afda2c03Sflorian log_err("poll udp out timer failed: %s", 508afda2c03Sflorian sock_strerror(errno)); 509afda2c03Sflorian } 51045872187Ssthen } 51145872187Ssthen #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 512afda2c03Sflorian retries++; 5139982a05dSsthen if (!is_connected) { 51432e31f52Ssthen sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 51532e31f52Ssthen sldns_buffer_remaining(packet), 0, 51632e31f52Ssthen addr, addrlen); 5179982a05dSsthen } else { 5189982a05dSsthen sent = send(c->fd, (void*)sldns_buffer_begin(packet), 5199982a05dSsthen sldns_buffer_remaining(packet), 0); 5209982a05dSsthen } 52145872187Ssthen } 52232e31f52Ssthen } 52332e31f52Ssthen } 52432e31f52Ssthen if(sent == -1) { 525933707f3Ssthen if(!udp_send_errno_needs_log(addr, addrlen)) 526933707f3Ssthen return 0; 5279982a05dSsthen if (!is_connected) { 5282c144df0Ssthen verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno)); 5299982a05dSsthen } else { 5309982a05dSsthen verbose(VERB_OPS, "send failed: %s", sock_strerror(errno)); 5319982a05dSsthen } 532523cba49Santon if(addr) 533933707f3Ssthen log_addr(VERB_OPS, "remote address is", 534933707f3Ssthen (struct sockaddr_storage*)addr, addrlen); 535933707f3Ssthen return 0; 5360b68ff31Ssthen } else if((size_t)sent != sldns_buffer_remaining(packet)) { 537933707f3Ssthen log_err("sent %d in place of %d bytes", 5380b68ff31Ssthen (int)sent, (int)sldns_buffer_remaining(packet)); 539933707f3Ssthen return 0; 540933707f3Ssthen } 541933707f3Ssthen return 1; 542933707f3Ssthen } 543933707f3Ssthen 544933707f3Ssthen #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG)) 545933707f3Ssthen /** print debug ancillary info */ 546933707f3Ssthen static void p_ancil(const char* str, struct comm_reply* r) 547933707f3Ssthen { 548933707f3Ssthen if(r->srctype != 4 && r->srctype != 6) { 549933707f3Ssthen log_info("%s: unknown srctype %d", str, r->srctype); 550933707f3Ssthen return; 551933707f3Ssthen } 552191f22c6Ssthen 553933707f3Ssthen if(r->srctype == 6) { 554191f22c6Ssthen #ifdef IPV6_PKTINFO 555933707f3Ssthen char buf[1024]; 556933707f3Ssthen if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 557933707f3Ssthen buf, (socklen_t)sizeof(buf)) == 0) { 5580b68ff31Ssthen (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf)); 559933707f3Ssthen } 560933707f3Ssthen buf[sizeof(buf)-1]=0; 561933707f3Ssthen log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex); 562191f22c6Ssthen #endif 563933707f3Ssthen } else if(r->srctype == 4) { 564933707f3Ssthen #ifdef IP_PKTINFO 565933707f3Ssthen char buf1[1024], buf2[1024]; 566933707f3Ssthen if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 567933707f3Ssthen buf1, (socklen_t)sizeof(buf1)) == 0) { 5680b68ff31Ssthen (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 569933707f3Ssthen } 570933707f3Ssthen buf1[sizeof(buf1)-1]=0; 571163a4143Ssthen #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST 572933707f3Ssthen if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 573933707f3Ssthen buf2, (socklen_t)sizeof(buf2)) == 0) { 5740b68ff31Ssthen (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2)); 575933707f3Ssthen } 576933707f3Ssthen buf2[sizeof(buf2)-1]=0; 577163a4143Ssthen #else 578163a4143Ssthen buf2[0]=0; 579163a4143Ssthen #endif 580933707f3Ssthen log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex, 581933707f3Ssthen buf1, buf2); 582933707f3Ssthen #elif defined(IP_RECVDSTADDR) 583933707f3Ssthen char buf1[1024]; 584933707f3Ssthen if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 585933707f3Ssthen buf1, (socklen_t)sizeof(buf1)) == 0) { 5860b68ff31Ssthen (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1)); 587933707f3Ssthen } 588933707f3Ssthen buf1[sizeof(buf1)-1]=0; 589933707f3Ssthen log_info("%s: %s", str, buf1); 590933707f3Ssthen #endif /* IP_PKTINFO or PI_RECVDSTDADDR */ 591933707f3Ssthen } 592933707f3Ssthen } 593933707f3Ssthen #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */ 594933707f3Ssthen 595933707f3Ssthen /** send a UDP reply over specified interface*/ 596933707f3Ssthen static int 5970b68ff31Ssthen comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, 598933707f3Ssthen struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 599933707f3Ssthen { 600933707f3Ssthen #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG) 601933707f3Ssthen ssize_t sent; 602933707f3Ssthen struct msghdr msg; 603933707f3Ssthen struct iovec iov[1]; 6040b69e590Sjca union { 6050b69e590Sjca struct cmsghdr hdr; 6060b69e590Sjca char buf[256]; 6070b69e590Sjca } control; 608933707f3Ssthen #ifndef S_SPLINT_S 609933707f3Ssthen struct cmsghdr *cmsg; 610933707f3Ssthen #endif /* S_SPLINT_S */ 611933707f3Ssthen 612933707f3Ssthen log_assert(c->fd != -1); 613933707f3Ssthen #ifdef UNBOUND_DEBUG 6140b68ff31Ssthen if(sldns_buffer_remaining(packet) == 0) 615933707f3Ssthen log_err("error: send empty UDP packet"); 616933707f3Ssthen #endif 617933707f3Ssthen log_assert(addr && addrlen > 0); 618933707f3Ssthen 619933707f3Ssthen msg.msg_name = addr; 620933707f3Ssthen msg.msg_namelen = addrlen; 6210b68ff31Ssthen iov[0].iov_base = sldns_buffer_begin(packet); 6220b68ff31Ssthen iov[0].iov_len = sldns_buffer_remaining(packet); 623933707f3Ssthen msg.msg_iov = iov; 624933707f3Ssthen msg.msg_iovlen = 1; 6250b69e590Sjca msg.msg_control = control.buf; 626933707f3Ssthen #ifndef S_SPLINT_S 627a3167c07Ssthen msg.msg_controllen = sizeof(control.buf); 628933707f3Ssthen #endif /* S_SPLINT_S */ 629933707f3Ssthen msg.msg_flags = 0; 630933707f3Ssthen 631933707f3Ssthen #ifndef S_SPLINT_S 632933707f3Ssthen cmsg = CMSG_FIRSTHDR(&msg); 633933707f3Ssthen if(r->srctype == 4) { 634933707f3Ssthen #ifdef IP_PKTINFO 635a58bff56Ssthen void* cmsg_data; 636933707f3Ssthen msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); 637a3167c07Ssthen log_assert(msg.msg_controllen <= sizeof(control.buf)); 638933707f3Ssthen cmsg->cmsg_level = IPPROTO_IP; 639933707f3Ssthen cmsg->cmsg_type = IP_PKTINFO; 640933707f3Ssthen memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info, 641933707f3Ssthen sizeof(struct in_pktinfo)); 642a58bff56Ssthen /* unset the ifindex to not bypass the routing tables */ 643a58bff56Ssthen cmsg_data = CMSG_DATA(cmsg); 644a58bff56Ssthen ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0; 645933707f3Ssthen cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); 6468b7325afSsthen /* zero the padding bytes inserted by the CMSG_LEN */ 6478b7325afSsthen if(sizeof(struct in_pktinfo) < cmsg->cmsg_len) 6488b7325afSsthen memset(((uint8_t*)(CMSG_DATA(cmsg))) + 6498b7325afSsthen sizeof(struct in_pktinfo), 0, cmsg->cmsg_len 6508b7325afSsthen - sizeof(struct in_pktinfo)); 651933707f3Ssthen #elif defined(IP_SENDSRCADDR) 652933707f3Ssthen msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); 653a3167c07Ssthen log_assert(msg.msg_controllen <= sizeof(control.buf)); 654933707f3Ssthen cmsg->cmsg_level = IPPROTO_IP; 655933707f3Ssthen cmsg->cmsg_type = IP_SENDSRCADDR; 656933707f3Ssthen memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr, 657933707f3Ssthen sizeof(struct in_addr)); 658933707f3Ssthen cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); 6598b7325afSsthen /* zero the padding bytes inserted by the CMSG_LEN */ 6608b7325afSsthen if(sizeof(struct in_addr) < cmsg->cmsg_len) 6618b7325afSsthen memset(((uint8_t*)(CMSG_DATA(cmsg))) + 6628b7325afSsthen sizeof(struct in_addr), 0, cmsg->cmsg_len 6638b7325afSsthen - sizeof(struct in_addr)); 664933707f3Ssthen #else 665933707f3Ssthen verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR"); 666933707f3Ssthen msg.msg_control = NULL; 667933707f3Ssthen #endif /* IP_PKTINFO or IP_SENDSRCADDR */ 668933707f3Ssthen } else if(r->srctype == 6) { 669a58bff56Ssthen void* cmsg_data; 670933707f3Ssthen msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 671a3167c07Ssthen log_assert(msg.msg_controllen <= sizeof(control.buf)); 672933707f3Ssthen cmsg->cmsg_level = IPPROTO_IPV6; 673933707f3Ssthen cmsg->cmsg_type = IPV6_PKTINFO; 674933707f3Ssthen memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info, 675933707f3Ssthen sizeof(struct in6_pktinfo)); 676a58bff56Ssthen /* unset the ifindex to not bypass the routing tables */ 677a58bff56Ssthen cmsg_data = CMSG_DATA(cmsg); 678a58bff56Ssthen ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0; 679933707f3Ssthen cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 6808b7325afSsthen /* zero the padding bytes inserted by the CMSG_LEN */ 6818b7325afSsthen if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 6828b7325afSsthen memset(((uint8_t*)(CMSG_DATA(cmsg))) + 6838b7325afSsthen sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 6848b7325afSsthen - sizeof(struct in6_pktinfo)); 685933707f3Ssthen } else { 686933707f3Ssthen /* try to pass all 0 to use default route */ 687933707f3Ssthen msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); 688a3167c07Ssthen log_assert(msg.msg_controllen <= sizeof(control.buf)); 689933707f3Ssthen cmsg->cmsg_level = IPPROTO_IPV6; 690933707f3Ssthen cmsg->cmsg_type = IPV6_PKTINFO; 691933707f3Ssthen memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo)); 692933707f3Ssthen cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); 6938b7325afSsthen /* zero the padding bytes inserted by the CMSG_LEN */ 6948b7325afSsthen if(sizeof(struct in6_pktinfo) < cmsg->cmsg_len) 6958b7325afSsthen memset(((uint8_t*)(CMSG_DATA(cmsg))) + 6968b7325afSsthen sizeof(struct in6_pktinfo), 0, cmsg->cmsg_len 6978b7325afSsthen - sizeof(struct in6_pktinfo)); 698933707f3Ssthen } 699933707f3Ssthen #endif /* S_SPLINT_S */ 7008b7325afSsthen if(verbosity >= VERB_ALGO && r->srctype != 0) 701933707f3Ssthen p_ancil("send_udp over interface", r); 702933707f3Ssthen sent = sendmsg(c->fd, &msg, 0); 703933707f3Ssthen if(sent == -1) { 70432e31f52Ssthen /* try again and block, waiting for IO to complete, 70532e31f52Ssthen * we want to send the answer, and we will wait for 70632e31f52Ssthen * the ethernet interface buffer to have space. */ 70732e31f52Ssthen #ifndef USE_WINSOCK 70845872187Ssthen if(errno == EAGAIN || errno == EINTR || 70932e31f52Ssthen # ifdef EWOULDBLOCK 71032e31f52Ssthen errno == EWOULDBLOCK || 71132e31f52Ssthen # endif 71232e31f52Ssthen errno == ENOBUFS) { 71332e31f52Ssthen #else 71432e31f52Ssthen if(WSAGetLastError() == WSAEINPROGRESS || 71545872187Ssthen WSAGetLastError() == WSAEINTR || 71632e31f52Ssthen WSAGetLastError() == WSAENOBUFS || 71732e31f52Ssthen WSAGetLastError() == WSAEWOULDBLOCK) { 71832e31f52Ssthen #endif 719afda2c03Sflorian int retries = 0; 720afda2c03Sflorian while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( 72145872187Ssthen #ifndef USE_WINSOCK 72245872187Ssthen errno == EAGAIN || errno == EINTR || 72345872187Ssthen # ifdef EWOULDBLOCK 72445872187Ssthen errno == EWOULDBLOCK || 72545872187Ssthen # endif 72645872187Ssthen errno == ENOBUFS 72745872187Ssthen #else 72845872187Ssthen WSAGetLastError() == WSAEINPROGRESS || 72945872187Ssthen WSAGetLastError() == WSAEINTR || 73045872187Ssthen WSAGetLastError() == WSAENOBUFS || 73145872187Ssthen WSAGetLastError() == WSAEWOULDBLOCK 73245872187Ssthen #endif 73345872187Ssthen )) { 73445872187Ssthen #if defined(HAVE_POLL) || defined(USE_WINSOCK) 735afda2c03Sflorian int send_nobufs = ( 736afda2c03Sflorian #ifndef USE_WINSOCK 737afda2c03Sflorian errno == ENOBUFS 738afda2c03Sflorian #else 739afda2c03Sflorian WSAGetLastError() == WSAENOBUFS 740afda2c03Sflorian #endif 741afda2c03Sflorian ); 74245872187Ssthen struct pollfd p; 74345872187Ssthen int pret; 74445872187Ssthen memset(&p, 0, sizeof(p)); 74545872187Ssthen p.fd = c->fd; 74645872187Ssthen p.events = POLLOUT | POLLERR | POLLHUP; 74745872187Ssthen # ifndef USE_WINSOCK 74845872187Ssthen pret = poll(&p, 1, SEND_BLOCKED_WAIT_TIMEOUT); 74945872187Ssthen # else 75045872187Ssthen pret = WSAPoll(&p, 1, 75145872187Ssthen SEND_BLOCKED_WAIT_TIMEOUT); 75245872187Ssthen # endif 75345872187Ssthen if(pret == 0) { 75445872187Ssthen /* timer expired */ 75545872187Ssthen struct comm_base* b = c->ev->base; 75645872187Ssthen if(b->eb->last_writewait_log+SLOW_LOG_TIME <= 75745872187Ssthen b->eb->secs) { 75845872187Ssthen b->eb->last_writewait_log = b->eb->secs; 75945872187Ssthen verbose(VERB_OPS, "send udp blocked " 76045872187Ssthen "for long, dropping packet."); 76145872187Ssthen } 76245872187Ssthen return 0; 76345872187Ssthen } else if(pret < 0 && 76445872187Ssthen #ifndef USE_WINSOCK 76545872187Ssthen errno != EAGAIN && errno != EINTR && 76645872187Ssthen # ifdef EWOULDBLOCK 76745872187Ssthen errno != EWOULDBLOCK && 76845872187Ssthen # endif 76945872187Ssthen errno != ENOBUFS 77045872187Ssthen #else 77145872187Ssthen WSAGetLastError() != WSAEINPROGRESS && 77245872187Ssthen WSAGetLastError() != WSAEINTR && 77345872187Ssthen WSAGetLastError() != WSAENOBUFS && 77445872187Ssthen WSAGetLastError() != WSAEWOULDBLOCK 77545872187Ssthen #endif 77645872187Ssthen ) { 77745872187Ssthen log_err("poll udp out failed: %s", 77845872187Ssthen sock_strerror(errno)); 77945872187Ssthen return 0; 780afda2c03Sflorian } else if((pret < 0 && 781afda2c03Sflorian #ifndef USE_WINSOCK 782afda2c03Sflorian errno == ENOBUFS 783afda2c03Sflorian #else 784afda2c03Sflorian WSAGetLastError() == WSAENOBUFS 785afda2c03Sflorian #endif 786afda2c03Sflorian ) || (send_nobufs && retries > 0)) { 787afda2c03Sflorian /* ENOBUFS, and poll returned without 788afda2c03Sflorian * a timeout. Or the retried send call 789afda2c03Sflorian * returned ENOBUFS. It is good to 790afda2c03Sflorian * wait a bit for the error to clear. */ 791afda2c03Sflorian /* The timeout is 20*(2^(retries+1)), 792afda2c03Sflorian * it increases exponentially, starting 793afda2c03Sflorian * at 40 msec. After 5 tries, 1240 msec 794afda2c03Sflorian * have passed in total, when poll 795afda2c03Sflorian * returned the error, and 1200 msec 796afda2c03Sflorian * when send returned the errors. */ 797afda2c03Sflorian #ifndef USE_WINSOCK 798afda2c03Sflorian pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 799afda2c03Sflorian #else 800afda2c03Sflorian pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); 801afda2c03Sflorian #endif 802afda2c03Sflorian if(pret < 0 && 803afda2c03Sflorian #ifndef USE_WINSOCK 804afda2c03Sflorian errno != EAGAIN && errno != EINTR && 805afda2c03Sflorian # ifdef EWOULDBLOCK 806afda2c03Sflorian errno != EWOULDBLOCK && 807afda2c03Sflorian # endif 808afda2c03Sflorian errno != ENOBUFS 809afda2c03Sflorian #else 810afda2c03Sflorian WSAGetLastError() != WSAEINPROGRESS && 811afda2c03Sflorian WSAGetLastError() != WSAEINTR && 812afda2c03Sflorian WSAGetLastError() != WSAENOBUFS && 813afda2c03Sflorian WSAGetLastError() != WSAEWOULDBLOCK 814afda2c03Sflorian #endif 815afda2c03Sflorian ) { 816afda2c03Sflorian log_err("poll udp out timer failed: %s", 817afda2c03Sflorian sock_strerror(errno)); 818afda2c03Sflorian } 81945872187Ssthen } 82045872187Ssthen #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ 821afda2c03Sflorian retries++; 82232e31f52Ssthen sent = sendmsg(c->fd, &msg, 0); 82345872187Ssthen } 82432e31f52Ssthen } 82532e31f52Ssthen } 82632e31f52Ssthen if(sent == -1) { 827933707f3Ssthen if(!udp_send_errno_needs_log(addr, addrlen)) 828933707f3Ssthen return 0; 829933707f3Ssthen verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno)); 830933707f3Ssthen log_addr(VERB_OPS, "remote address is", 831933707f3Ssthen (struct sockaddr_storage*)addr, addrlen); 83232e31f52Ssthen #ifdef __NetBSD__ 83332e31f52Ssthen /* netbsd 7 has IP_PKTINFO for recv but not send */ 83432e31f52Ssthen if(errno == EINVAL && r->srctype == 4) 83532e31f52Ssthen log_err("sendmsg: No support for sendmsg(IP_PKTINFO). " 83632e31f52Ssthen "Please disable interface-automatic"); 83732e31f52Ssthen #endif 838933707f3Ssthen return 0; 8390b68ff31Ssthen } else if((size_t)sent != sldns_buffer_remaining(packet)) { 840933707f3Ssthen log_err("sent %d in place of %d bytes", 8410b68ff31Ssthen (int)sent, (int)sldns_buffer_remaining(packet)); 842933707f3Ssthen return 0; 843933707f3Ssthen } 844933707f3Ssthen return 1; 845933707f3Ssthen #else 846933707f3Ssthen (void)c; 847933707f3Ssthen (void)packet; 848933707f3Ssthen (void)addr; 849933707f3Ssthen (void)addrlen; 850933707f3Ssthen (void)r; 851933707f3Ssthen log_err("sendmsg: IPV6_PKTINFO not supported"); 852933707f3Ssthen return 0; 853933707f3Ssthen #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */ 854933707f3Ssthen } 855933707f3Ssthen 856eba819a2Ssthen /** return true is UDP receive error needs to be logged */ 857eba819a2Ssthen static int udp_recv_needs_log(int err) 858eba819a2Ssthen { 859eba819a2Ssthen switch(err) { 8609982a05dSsthen case EACCES: /* some hosts send ICMP 'Permission Denied' */ 8619982a05dSsthen #ifndef USE_WINSOCK 862eba819a2Ssthen case ECONNREFUSED: 863eba819a2Ssthen # ifdef ENETUNREACH 864eba819a2Ssthen case ENETUNREACH: 865eba819a2Ssthen # endif 866eba819a2Ssthen # ifdef EHOSTDOWN 867eba819a2Ssthen case EHOSTDOWN: 868eba819a2Ssthen # endif 869eba819a2Ssthen # ifdef EHOSTUNREACH 870eba819a2Ssthen case EHOSTUNREACH: 871eba819a2Ssthen # endif 872eba819a2Ssthen # ifdef ENETDOWN 873eba819a2Ssthen case ENETDOWN: 874eba819a2Ssthen # endif 8759982a05dSsthen #else /* USE_WINSOCK */ 8769982a05dSsthen case WSAECONNREFUSED: 8779982a05dSsthen case WSAENETUNREACH: 8789982a05dSsthen case WSAEHOSTDOWN: 8799982a05dSsthen case WSAEHOSTUNREACH: 8809982a05dSsthen case WSAENETDOWN: 8819982a05dSsthen #endif 882eba819a2Ssthen if(verbosity >= VERB_ALGO) 883eba819a2Ssthen return 1; 884eba819a2Ssthen return 0; 885eba819a2Ssthen default: 886eba819a2Ssthen break; 887eba819a2Ssthen } 888eba819a2Ssthen return 1; 889eba819a2Ssthen } 890eba819a2Ssthen 89145872187Ssthen /** Parses the PROXYv2 header from buf and updates the comm_reply struct. 89245872187Ssthen * Returns 1 on success, 0 on failure. */ 89345872187Ssthen static int consume_pp2_header(struct sldns_buffer* buf, struct comm_reply* rep, 89445872187Ssthen int stream) { 89545872187Ssthen size_t size; 896d896b962Ssthen struct pp2_header *header; 897d896b962Ssthen int err = pp2_read_header(sldns_buffer_begin(buf), 898d896b962Ssthen sldns_buffer_remaining(buf)); 899d896b962Ssthen if(err) return 0; 900d896b962Ssthen header = (struct pp2_header*)sldns_buffer_begin(buf); 90145872187Ssthen size = PP2_HEADER_SIZE + ntohs(header->len); 90245872187Ssthen if((header->ver_cmd & 0xF) == PP2_CMD_LOCAL) { 90345872187Ssthen /* A connection from the proxy itself. 90445872187Ssthen * No need to do anything with addresses. */ 90545872187Ssthen goto done; 90645872187Ssthen } 907d896b962Ssthen if(header->fam_prot == PP2_UNSPEC_UNSPEC) { 90845872187Ssthen /* Unspecified family and protocol. This could be used for 90945872187Ssthen * health checks by proxies. 91045872187Ssthen * No need to do anything with addresses. */ 91145872187Ssthen goto done; 91245872187Ssthen } 91345872187Ssthen /* Read the proxied address */ 91445872187Ssthen switch(header->fam_prot) { 915d896b962Ssthen case PP2_INET_STREAM: 916d896b962Ssthen case PP2_INET_DGRAM: 91745872187Ssthen { 91845872187Ssthen struct sockaddr_in* addr = 91945872187Ssthen (struct sockaddr_in*)&rep->client_addr; 92045872187Ssthen addr->sin_family = AF_INET; 92145872187Ssthen addr->sin_addr.s_addr = header->addr.addr4.src_addr; 92245872187Ssthen addr->sin_port = header->addr.addr4.src_port; 92345872187Ssthen rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in); 92445872187Ssthen } 92545872187Ssthen /* Ignore the destination address; it should be us. */ 92645872187Ssthen break; 927d896b962Ssthen case PP2_INET6_STREAM: 928d896b962Ssthen case PP2_INET6_DGRAM: 92945872187Ssthen { 93045872187Ssthen struct sockaddr_in6* addr = 93145872187Ssthen (struct sockaddr_in6*)&rep->client_addr; 93245872187Ssthen memset(addr, 0, sizeof(*addr)); 93345872187Ssthen addr->sin6_family = AF_INET6; 93445872187Ssthen memcpy(&addr->sin6_addr, 93545872187Ssthen header->addr.addr6.src_addr, 16); 93645872187Ssthen addr->sin6_port = header->addr.addr6.src_port; 93745872187Ssthen rep->client_addrlen = (socklen_t)sizeof(struct sockaddr_in6); 93845872187Ssthen } 93945872187Ssthen /* Ignore the destination address; it should be us. */ 94045872187Ssthen break; 941d896b962Ssthen default: 942d896b962Ssthen log_err("proxy_protocol: unsupported family and " 943d896b962Ssthen "protocol 0x%x", (int)header->fam_prot); 944d896b962Ssthen return 0; 94545872187Ssthen } 94645872187Ssthen rep->is_proxied = 1; 94745872187Ssthen done: 94845872187Ssthen if(!stream) { 94945872187Ssthen /* We are reading a whole packet; 95045872187Ssthen * Move the rest of the data to overwrite the PROXYv2 header */ 95145872187Ssthen /* XXX can we do better to avoid memmove? */ 9528b7325afSsthen memmove(header, ((char*)header)+size, 95345872187Ssthen sldns_buffer_limit(buf)-size); 95445872187Ssthen sldns_buffer_set_limit(buf, sldns_buffer_limit(buf)-size); 95545872187Ssthen } 95645872187Ssthen return 1; 95745872187Ssthen } 95845872187Ssthen 959d896b962Ssthen #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 960933707f3Ssthen void 961933707f3Ssthen comm_point_udp_ancil_callback(int fd, short event, void* arg) 962933707f3Ssthen { 963933707f3Ssthen struct comm_reply rep; 964933707f3Ssthen struct msghdr msg; 965933707f3Ssthen struct iovec iov[1]; 966933707f3Ssthen ssize_t rcv; 9670b69e590Sjca union { 9680b69e590Sjca struct cmsghdr hdr; 9690b69e590Sjca char buf[256]; 9700b69e590Sjca } ancil; 971933707f3Ssthen int i; 972933707f3Ssthen #ifndef S_SPLINT_S 973933707f3Ssthen struct cmsghdr* cmsg; 974933707f3Ssthen #endif /* S_SPLINT_S */ 9758b7325afSsthen #ifdef HAVE_LINUX_NET_TSTAMP_H 9768b7325afSsthen struct timespec *ts; 9778b7325afSsthen #endif /* HAVE_LINUX_NET_TSTAMP_H */ 978933707f3Ssthen 979933707f3Ssthen rep.c = (struct comm_point*)arg; 980933707f3Ssthen log_assert(rep.c->type == comm_udp); 981933707f3Ssthen 9822ee382b6Ssthen if(!(event&UB_EV_READ)) 983933707f3Ssthen return; 984933707f3Ssthen log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 9852ee382b6Ssthen ub_comm_base_now(rep.c->ev->base); 986933707f3Ssthen for(i=0; i<NUM_UDP_PER_SELECT; i++) { 9870b68ff31Ssthen sldns_buffer_clear(rep.c->buffer); 9888b7325afSsthen timeval_clear(&rep.c->recv_tv); 98945872187Ssthen rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 990933707f3Ssthen log_assert(fd != -1); 9910b68ff31Ssthen log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 99245872187Ssthen msg.msg_name = &rep.remote_addr; 99345872187Ssthen msg.msg_namelen = (socklen_t)sizeof(rep.remote_addr); 9940b68ff31Ssthen iov[0].iov_base = sldns_buffer_begin(rep.c->buffer); 9950b68ff31Ssthen iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer); 996933707f3Ssthen msg.msg_iov = iov; 997933707f3Ssthen msg.msg_iovlen = 1; 9980b69e590Sjca msg.msg_control = ancil.buf; 999933707f3Ssthen #ifndef S_SPLINT_S 1000a3167c07Ssthen msg.msg_controllen = sizeof(ancil.buf); 1001933707f3Ssthen #endif /* S_SPLINT_S */ 1002933707f3Ssthen msg.msg_flags = 0; 100345872187Ssthen rcv = recvmsg(fd, &msg, MSG_DONTWAIT); 1004933707f3Ssthen if(rcv == -1) { 1005eba819a2Ssthen if(errno != EAGAIN && errno != EINTR 1006eba819a2Ssthen && udp_recv_needs_log(errno)) { 1007933707f3Ssthen log_err("recvmsg failed: %s", strerror(errno)); 1008933707f3Ssthen } 1009933707f3Ssthen return; 1010933707f3Ssthen } 101145872187Ssthen rep.remote_addrlen = msg.msg_namelen; 10120b68ff31Ssthen sldns_buffer_skip(rep.c->buffer, rcv); 10130b68ff31Ssthen sldns_buffer_flip(rep.c->buffer); 1014933707f3Ssthen rep.srctype = 0; 101545872187Ssthen rep.is_proxied = 0; 1016933707f3Ssthen #ifndef S_SPLINT_S 1017933707f3Ssthen for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; 1018933707f3Ssthen cmsg = CMSG_NXTHDR(&msg, cmsg)) { 1019933707f3Ssthen if( cmsg->cmsg_level == IPPROTO_IPV6 && 1020933707f3Ssthen cmsg->cmsg_type == IPV6_PKTINFO) { 1021933707f3Ssthen rep.srctype = 6; 1022933707f3Ssthen memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg), 1023933707f3Ssthen sizeof(struct in6_pktinfo)); 1024933707f3Ssthen break; 1025933707f3Ssthen #ifdef IP_PKTINFO 1026933707f3Ssthen } else if( cmsg->cmsg_level == IPPROTO_IP && 1027933707f3Ssthen cmsg->cmsg_type == IP_PKTINFO) { 1028933707f3Ssthen rep.srctype = 4; 1029933707f3Ssthen memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg), 1030933707f3Ssthen sizeof(struct in_pktinfo)); 1031933707f3Ssthen break; 1032933707f3Ssthen #elif defined(IP_RECVDSTADDR) 1033933707f3Ssthen } else if( cmsg->cmsg_level == IPPROTO_IP && 1034933707f3Ssthen cmsg->cmsg_type == IP_RECVDSTADDR) { 1035933707f3Ssthen rep.srctype = 4; 1036933707f3Ssthen memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg), 1037933707f3Ssthen sizeof(struct in_addr)); 1038933707f3Ssthen break; 1039933707f3Ssthen #endif /* IP_PKTINFO or IP_RECVDSTADDR */ 10408b7325afSsthen #ifdef HAVE_LINUX_NET_TSTAMP_H 10418b7325afSsthen } else if( cmsg->cmsg_level == SOL_SOCKET && 10428b7325afSsthen cmsg->cmsg_type == SO_TIMESTAMPNS) { 10438b7325afSsthen ts = (struct timespec *)CMSG_DATA(cmsg); 10448b7325afSsthen TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 10458b7325afSsthen } else if( cmsg->cmsg_level == SOL_SOCKET && 10468b7325afSsthen cmsg->cmsg_type == SO_TIMESTAMPING) { 10478b7325afSsthen ts = (struct timespec *)CMSG_DATA(cmsg); 10488b7325afSsthen TIMESPEC_TO_TIMEVAL(&rep.c->recv_tv, ts); 10498b7325afSsthen } else if( cmsg->cmsg_level == SOL_SOCKET && 10508b7325afSsthen cmsg->cmsg_type == SO_TIMESTAMP) { 10518b7325afSsthen memmove(&rep.c->recv_tv, CMSG_DATA(cmsg), sizeof(struct timeval)); 10528b7325afSsthen #endif /* HAVE_LINUX_NET_TSTAMP_H */ 1053933707f3Ssthen } 1054933707f3Ssthen } 10558b7325afSsthen 10568b7325afSsthen if(verbosity >= VERB_ALGO && rep.srctype != 0) 1057933707f3Ssthen p_ancil("receive_udp on interface", &rep); 1058933707f3Ssthen #endif /* S_SPLINT_S */ 105945872187Ssthen 106045872187Ssthen if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 106145872187Ssthen &rep, 0)) { 106245872187Ssthen log_err("proxy_protocol: could not consume PROXYv2 header"); 106345872187Ssthen return; 106445872187Ssthen } 106545872187Ssthen if(!rep.is_proxied) { 106645872187Ssthen rep.client_addrlen = rep.remote_addrlen; 106745872187Ssthen memmove(&rep.client_addr, &rep.remote_addr, 106845872187Ssthen rep.remote_addrlen); 106945872187Ssthen } 107045872187Ssthen 1071933707f3Ssthen fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1072933707f3Ssthen if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1073933707f3Ssthen /* send back immediate reply */ 1074d896b962Ssthen struct sldns_buffer *buffer; 1075d896b962Ssthen #ifdef USE_DNSCRYPT 1076d896b962Ssthen buffer = rep.c->dnscrypt_buffer; 1077d896b962Ssthen #else 1078d896b962Ssthen buffer = rep.c->buffer; 1079d896b962Ssthen #endif 1080d896b962Ssthen (void)comm_point_send_udp_msg_if(rep.c, buffer, 108145872187Ssthen (struct sockaddr*)&rep.remote_addr, 108245872187Ssthen rep.remote_addrlen, &rep); 1083933707f3Ssthen } 1084bdfc4d55Sflorian if(!rep.c || rep.c->fd == -1) /* commpoint closed */ 1085933707f3Ssthen break; 1086933707f3Ssthen } 1087933707f3Ssthen } 1088d896b962Ssthen #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */ 1089933707f3Ssthen 1090933707f3Ssthen void 1091933707f3Ssthen comm_point_udp_callback(int fd, short event, void* arg) 1092933707f3Ssthen { 1093933707f3Ssthen struct comm_reply rep; 1094933707f3Ssthen ssize_t rcv; 1095933707f3Ssthen int i; 10962be9e038Ssthen struct sldns_buffer *buffer; 1097933707f3Ssthen 1098933707f3Ssthen rep.c = (struct comm_point*)arg; 1099933707f3Ssthen log_assert(rep.c->type == comm_udp); 1100933707f3Ssthen 11012ee382b6Ssthen if(!(event&UB_EV_READ)) 1102933707f3Ssthen return; 1103933707f3Ssthen log_assert(rep.c && rep.c->buffer && rep.c->fd == fd); 11042ee382b6Ssthen ub_comm_base_now(rep.c->ev->base); 1105933707f3Ssthen for(i=0; i<NUM_UDP_PER_SELECT; i++) { 11060b68ff31Ssthen sldns_buffer_clear(rep.c->buffer); 110745872187Ssthen rep.remote_addrlen = (socklen_t)sizeof(rep.remote_addr); 1108933707f3Ssthen log_assert(fd != -1); 11090b68ff31Ssthen log_assert(sldns_buffer_remaining(rep.c->buffer) > 0); 11100b68ff31Ssthen rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 111145872187Ssthen sldns_buffer_remaining(rep.c->buffer), MSG_DONTWAIT, 111245872187Ssthen (struct sockaddr*)&rep.remote_addr, &rep.remote_addrlen); 1113933707f3Ssthen if(rcv == -1) { 1114933707f3Ssthen #ifndef USE_WINSOCK 1115eba819a2Ssthen if(errno != EAGAIN && errno != EINTR 1116eba819a2Ssthen && udp_recv_needs_log(errno)) 1117933707f3Ssthen log_err("recvfrom %d failed: %s", 1118933707f3Ssthen fd, strerror(errno)); 1119933707f3Ssthen #else 1120933707f3Ssthen if(WSAGetLastError() != WSAEINPROGRESS && 1121933707f3Ssthen WSAGetLastError() != WSAECONNRESET && 11229982a05dSsthen WSAGetLastError()!= WSAEWOULDBLOCK && 11239982a05dSsthen udp_recv_needs_log(WSAGetLastError())) 1124933707f3Ssthen log_err("recvfrom failed: %s", 1125933707f3Ssthen wsa_strerror(WSAGetLastError())); 1126933707f3Ssthen #endif 1127933707f3Ssthen return; 1128933707f3Ssthen } 11290b68ff31Ssthen sldns_buffer_skip(rep.c->buffer, rcv); 11300b68ff31Ssthen sldns_buffer_flip(rep.c->buffer); 1131933707f3Ssthen rep.srctype = 0; 113245872187Ssthen rep.is_proxied = 0; 113345872187Ssthen 113445872187Ssthen if(rep.c->pp2_enabled && !consume_pp2_header(rep.c->buffer, 113545872187Ssthen &rep, 0)) { 113645872187Ssthen log_err("proxy_protocol: could not consume PROXYv2 header"); 113745872187Ssthen return; 113845872187Ssthen } 113945872187Ssthen if(!rep.is_proxied) { 114045872187Ssthen rep.client_addrlen = rep.remote_addrlen; 114145872187Ssthen memmove(&rep.client_addr, &rep.remote_addr, 114245872187Ssthen rep.remote_addrlen); 114345872187Ssthen } 114445872187Ssthen 1145933707f3Ssthen fptr_ok(fptr_whitelist_comm_point(rep.c->callback)); 1146933707f3Ssthen if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) { 1147933707f3Ssthen /* send back immediate reply */ 11482be9e038Ssthen #ifdef USE_DNSCRYPT 11492be9e038Ssthen buffer = rep.c->dnscrypt_buffer; 11502be9e038Ssthen #else 11512be9e038Ssthen buffer = rep.c->buffer; 11522be9e038Ssthen #endif 11532be9e038Ssthen (void)comm_point_send_udp_msg(rep.c, buffer, 115445872187Ssthen (struct sockaddr*)&rep.remote_addr, 115545872187Ssthen rep.remote_addrlen, 0); 1156933707f3Ssthen } 1157bdfc4d55Sflorian if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for 1158933707f3Ssthen another UDP port. Note rep.c cannot be reused with TCP fd. */ 1159933707f3Ssthen break; 1160933707f3Ssthen } 1161933707f3Ssthen } 1162933707f3Ssthen 11639982a05dSsthen int adjusted_tcp_timeout(struct comm_point* c) 11649982a05dSsthen { 11659982a05dSsthen if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) 11669982a05dSsthen return TCP_QUERY_TIMEOUT_MINIMUM; 11679982a05dSsthen return c->tcp_timeout_msec; 11689982a05dSsthen } 11699982a05dSsthen 1170933707f3Ssthen /** Use a new tcp handler for new query fd, set to read query */ 1171933707f3Ssthen static void 117277079be7Ssthen setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 1173933707f3Ssthen { 11742308e98cSsthen int handler_usage; 11752c144df0Ssthen log_assert(c->type == comm_tcp || c->type == comm_http); 1176933707f3Ssthen log_assert(c->fd == -1); 11770b68ff31Ssthen sldns_buffer_clear(c->buffer); 11782be9e038Ssthen #ifdef USE_DNSCRYPT 11792be9e038Ssthen if (c->dnscrypt) 11802be9e038Ssthen sldns_buffer_clear(c->dnscrypt_buffer); 11812be9e038Ssthen #endif 1182933707f3Ssthen c->tcp_is_reading = 1; 1183933707f3Ssthen c->tcp_byte_count = 0; 1184e21c60efSsthen c->tcp_keepalive = 0; 118577079be7Ssthen /* if more than half the tcp handlers are in use, use a shorter 118677079be7Ssthen * timeout for this TCP connection, we need to make space for 118777079be7Ssthen * other connections to be able to get attention */ 11882308e98cSsthen /* If > 50% TCP handler structures in use, set timeout to 1/100th 11892308e98cSsthen * configured value. 11902308e98cSsthen * If > 65%TCP handler structures in use, set to 1/500th configured 11912308e98cSsthen * value. 11922308e98cSsthen * If > 80% TCP handler structures in use, set to 0. 11932308e98cSsthen * 11942308e98cSsthen * If the timeout to use falls below 200 milliseconds, an actual 11952308e98cSsthen * timeout of 200ms is used. 11962308e98cSsthen */ 11972308e98cSsthen handler_usage = (cur * 100) / max; 11982308e98cSsthen if(handler_usage > 50 && handler_usage <= 65) 11992308e98cSsthen c->tcp_timeout_msec /= 100; 12002308e98cSsthen else if (handler_usage > 65 && handler_usage <= 80) 12012308e98cSsthen c->tcp_timeout_msec /= 500; 12022308e98cSsthen else if (handler_usage > 80) 12032308e98cSsthen c->tcp_timeout_msec = 0; 12049982a05dSsthen comm_point_start_listening(c, fd, adjusted_tcp_timeout(c)); 1205933707f3Ssthen } 1206933707f3Ssthen 1207163a4143Ssthen void comm_base_handle_slow_accept(int ATTR_UNUSED(fd), 1208163a4143Ssthen short ATTR_UNUSED(event), void* arg) 1209af4988b1Ssthen { 1210af4988b1Ssthen struct comm_base* b = (struct comm_base*)arg; 1211af4988b1Ssthen /* timeout for the slow accept, re-enable accepts again */ 1212af4988b1Ssthen if(b->start_accept) { 1213af4988b1Ssthen verbose(VERB_ALGO, "wait is over, slow accept disabled"); 1214af4988b1Ssthen fptr_ok(fptr_whitelist_start_accept(b->start_accept)); 1215af4988b1Ssthen (*b->start_accept)(b->cb_arg); 1216af4988b1Ssthen b->eb->slow_accept_enabled = 0; 1217af4988b1Ssthen } 1218af4988b1Ssthen } 1219af4988b1Ssthen 1220933707f3Ssthen int comm_point_perform_accept(struct comm_point* c, 1221933707f3Ssthen struct sockaddr_storage* addr, socklen_t* addrlen) 1222933707f3Ssthen { 1223933707f3Ssthen int new_fd; 1224933707f3Ssthen *addrlen = (socklen_t)sizeof(*addr); 122520237c55Ssthen #ifndef HAVE_ACCEPT4 1226933707f3Ssthen new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen); 122720237c55Ssthen #else 122820237c55Ssthen /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */ 122920237c55Ssthen new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK); 123020237c55Ssthen #endif 1231933707f3Ssthen if(new_fd == -1) { 1232933707f3Ssthen #ifndef USE_WINSOCK 1233933707f3Ssthen /* EINTR is signal interrupt. others are closed connection. */ 1234933707f3Ssthen if( errno == EINTR || errno == EAGAIN 1235933707f3Ssthen #ifdef EWOULDBLOCK 1236933707f3Ssthen || errno == EWOULDBLOCK 1237933707f3Ssthen #endif 1238933707f3Ssthen #ifdef ECONNABORTED 1239933707f3Ssthen || errno == ECONNABORTED 1240933707f3Ssthen #endif 1241933707f3Ssthen #ifdef EPROTO 1242933707f3Ssthen || errno == EPROTO 1243933707f3Ssthen #endif /* EPROTO */ 1244933707f3Ssthen ) 1245933707f3Ssthen return -1; 1246af4988b1Ssthen #if defined(ENFILE) && defined(EMFILE) 1247af4988b1Ssthen if(errno == ENFILE || errno == EMFILE) { 1248af4988b1Ssthen /* out of file descriptors, likely outside of our 1249af4988b1Ssthen * control. stop accept() calls for some time */ 1250af4988b1Ssthen if(c->ev->base->stop_accept) { 1251af4988b1Ssthen struct comm_base* b = c->ev->base; 1252af4988b1Ssthen struct timeval tv; 1253af4988b1Ssthen verbose(VERB_ALGO, "out of file descriptors: " 1254af4988b1Ssthen "slow accept"); 125545872187Ssthen ub_comm_base_now(b); 125645872187Ssthen if(b->eb->last_slow_log+SLOW_LOG_TIME <= 125745872187Ssthen b->eb->secs) { 125845872187Ssthen b->eb->last_slow_log = b->eb->secs; 125945872187Ssthen verbose(VERB_OPS, "accept failed, " 126045872187Ssthen "slow down accept for %d " 126145872187Ssthen "msec: %s", 126245872187Ssthen NETEVENT_SLOW_ACCEPT_TIME, 126345872187Ssthen sock_strerror(errno)); 126445872187Ssthen } 1265af4988b1Ssthen b->eb->slow_accept_enabled = 1; 1266af4988b1Ssthen fptr_ok(fptr_whitelist_stop_accept( 1267af4988b1Ssthen b->stop_accept)); 1268af4988b1Ssthen (*b->stop_accept)(b->cb_arg); 1269af4988b1Ssthen /* set timeout, no mallocs */ 1270af4988b1Ssthen tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000; 127177079be7Ssthen tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000; 12722ee382b6Ssthen b->eb->slow_accept = ub_event_new(b->eb->base, 12732ee382b6Ssthen -1, UB_EV_TIMEOUT, 1274af4988b1Ssthen comm_base_handle_slow_accept, b); 12752ee382b6Ssthen if(b->eb->slow_accept == NULL) { 1276af4988b1Ssthen /* we do not want to log here, because 1277af4988b1Ssthen * that would spam the logfiles. 1278af4988b1Ssthen * error: "event_base_set failed." */ 1279af4988b1Ssthen } 12802ee382b6Ssthen else if(ub_event_add(b->eb->slow_accept, &tv) 12812ee382b6Ssthen != 0) { 1282af4988b1Ssthen /* we do not want to log here, 1283af4988b1Ssthen * error: "event_add failed." */ 1284af4988b1Ssthen } 128545872187Ssthen } else { 128645872187Ssthen log_err("accept, with no slow down, " 128745872187Ssthen "failed: %s", sock_strerror(errno)); 1288af4988b1Ssthen } 1289af4988b1Ssthen return -1; 1290af4988b1Ssthen } 1291af4988b1Ssthen #endif 1292933707f3Ssthen #else /* USE_WINSOCK */ 1293933707f3Ssthen if(WSAGetLastError() == WSAEINPROGRESS || 1294933707f3Ssthen WSAGetLastError() == WSAECONNRESET) 1295933707f3Ssthen return -1; 1296933707f3Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 12972ee382b6Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1298933707f3Ssthen return -1; 1299933707f3Ssthen } 1300933707f3Ssthen #endif 13012c144df0Ssthen log_err_addr("accept failed", sock_strerror(errno), addr, 13022c144df0Ssthen *addrlen); 1303933707f3Ssthen return -1; 1304933707f3Ssthen } 13052308e98cSsthen if(c->tcp_conn_limit && c->type == comm_tcp_accept) { 13062308e98cSsthen c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen); 13072308e98cSsthen if(!tcl_new_connection(c->tcl_addr)) { 13082308e98cSsthen if(verbosity >= 3) 13092308e98cSsthen log_err_addr("accept rejected", 13102308e98cSsthen "connection limit exceeded", addr, *addrlen); 13112308e98cSsthen close(new_fd); 13122308e98cSsthen return -1; 13132308e98cSsthen } 13142308e98cSsthen } 131520237c55Ssthen #ifndef HAVE_ACCEPT4 1316933707f3Ssthen fd_set_nonblock(new_fd); 131720237c55Ssthen #endif 1318933707f3Ssthen return new_fd; 1319933707f3Ssthen } 1320933707f3Ssthen 1321933707f3Ssthen #ifdef USE_WINSOCK 1322933707f3Ssthen static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp), 1323e21c60efSsthen #ifdef HAVE_BIO_SET_CALLBACK_EX 1324e21c60efSsthen size_t ATTR_UNUSED(len), 1325e21c60efSsthen #endif 1326e21c60efSsthen int ATTR_UNUSED(argi), long argl, 1327e21c60efSsthen #ifndef HAVE_BIO_SET_CALLBACK_EX 1328e21c60efSsthen long retvalue 1329e21c60efSsthen #else 1330e21c60efSsthen int retvalue, size_t* ATTR_UNUSED(processed) 1331e21c60efSsthen #endif 1332e21c60efSsthen ) 1333933707f3Ssthen { 133420237c55Ssthen int wsa_err = WSAGetLastError(); /* store errcode before it is gone */ 1335933707f3Ssthen verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper, 1336933707f3Ssthen (oper&BIO_CB_RETURN)?"return":"before", 1337933707f3Ssthen (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"), 133820237c55Ssthen wsa_err==WSAEWOULDBLOCK?"wsawb":""); 1339933707f3Ssthen /* on windows, check if previous operation caused EWOULDBLOCK */ 1340933707f3Ssthen if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) || 1341933707f3Ssthen (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) { 134220237c55Ssthen if(wsa_err == WSAEWOULDBLOCK) 13432ee382b6Ssthen ub_winsock_tcp_wouldblock((struct ub_event*) 13442ee382b6Ssthen BIO_get_callback_arg(b), UB_EV_READ); 1345933707f3Ssthen } 1346933707f3Ssthen if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) || 1347933707f3Ssthen (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) { 134820237c55Ssthen if(wsa_err == WSAEWOULDBLOCK) 13492ee382b6Ssthen ub_winsock_tcp_wouldblock((struct ub_event*) 13502ee382b6Ssthen BIO_get_callback_arg(b), UB_EV_WRITE); 1351933707f3Ssthen } 1352933707f3Ssthen /* return original return value */ 1353933707f3Ssthen return retvalue; 1354933707f3Ssthen } 1355933707f3Ssthen 1356933707f3Ssthen /** set win bio callbacks for nonblocking operations */ 1357933707f3Ssthen void 1358933707f3Ssthen comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl) 1359933707f3Ssthen { 1360933707f3Ssthen SSL* ssl = (SSL*)thessl; 1361933707f3Ssthen /* set them both just in case, but usually they are the same BIO */ 1362e21c60efSsthen #ifdef HAVE_BIO_SET_CALLBACK_EX 1363e21c60efSsthen BIO_set_callback_ex(SSL_get_rbio(ssl), &win_bio_cb); 1364e21c60efSsthen #else 1365933707f3Ssthen BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb); 1366e21c60efSsthen #endif 13672ee382b6Ssthen BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev); 1368e21c60efSsthen #ifdef HAVE_BIO_SET_CALLBACK_EX 1369e21c60efSsthen BIO_set_callback_ex(SSL_get_wbio(ssl), &win_bio_cb); 1370e21c60efSsthen #else 1371933707f3Ssthen BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb); 1372e21c60efSsthen #endif 13732ee382b6Ssthen BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev); 1374933707f3Ssthen } 1375933707f3Ssthen #endif 1376933707f3Ssthen 13772c144df0Ssthen #ifdef HAVE_NGHTTP2 13782c144df0Ssthen /** Create http2 session server. Per connection, after TCP accepted.*/ 13792c144df0Ssthen static int http2_session_server_create(struct http2_session* h2_session) 13802c144df0Ssthen { 13812c144df0Ssthen log_assert(h2_session->callbacks); 13822c144df0Ssthen h2_session->is_drop = 0; 13832c144df0Ssthen if(nghttp2_session_server_new(&h2_session->session, 13842c144df0Ssthen h2_session->callbacks, 13852c144df0Ssthen h2_session) == NGHTTP2_ERR_NOMEM) { 13862c144df0Ssthen log_err("failed to create nghttp2 session server"); 13872c144df0Ssthen return 0; 13882c144df0Ssthen } 13892c144df0Ssthen 13902c144df0Ssthen return 1; 13912c144df0Ssthen } 13922c144df0Ssthen 13932c144df0Ssthen /** Submit http2 setting to session. Once per session. */ 13942c144df0Ssthen static int http2_submit_settings(struct http2_session* h2_session) 13952c144df0Ssthen { 13962c144df0Ssthen int ret; 13972c144df0Ssthen nghttp2_settings_entry settings[1] = { 13982c144df0Ssthen {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 13992c144df0Ssthen h2_session->c->http2_max_streams}}; 14002c144df0Ssthen 14012c144df0Ssthen ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE, 14022c144df0Ssthen settings, 1); 14032c144df0Ssthen if(ret) { 14042c144df0Ssthen verbose(VERB_QUERY, "http2: submit_settings failed, " 14052c144df0Ssthen "error: %s", nghttp2_strerror(ret)); 14062c144df0Ssthen return 0; 14072c144df0Ssthen } 14082c144df0Ssthen return 1; 14092c144df0Ssthen } 14102c144df0Ssthen #endif /* HAVE_NGHTTP2 */ 14112c144df0Ssthen 14122c144df0Ssthen 1413933707f3Ssthen void 1414933707f3Ssthen comm_point_tcp_accept_callback(int fd, short event, void* arg) 1415933707f3Ssthen { 1416933707f3Ssthen struct comm_point* c = (struct comm_point*)arg, *c_hdl; 1417933707f3Ssthen int new_fd; 1418933707f3Ssthen log_assert(c->type == comm_tcp_accept); 14192ee382b6Ssthen if(!(event & UB_EV_READ)) { 1420933707f3Ssthen log_info("ignoring tcp accept event %d", (int)event); 1421933707f3Ssthen return; 1422933707f3Ssthen } 14232ee382b6Ssthen ub_comm_base_now(c->ev->base); 1424933707f3Ssthen /* find free tcp handler. */ 1425933707f3Ssthen if(!c->tcp_free) { 1426933707f3Ssthen log_warn("accepted too many tcp, connections full"); 1427933707f3Ssthen return; 1428933707f3Ssthen } 1429933707f3Ssthen /* accept incoming connection. */ 1430933707f3Ssthen c_hdl = c->tcp_free; 1431550cf4a9Ssthen /* clear leftover flags from previous use, and then set the 1432550cf4a9Ssthen * correct event base for the event structure for libevent */ 1433550cf4a9Ssthen ub_event_free(c_hdl->ev->ev); 1434e21c60efSsthen c_hdl->ev->ev = NULL; 1435eba819a2Ssthen if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) || 1436eba819a2Ssthen c_hdl->type == comm_local || c_hdl->type == comm_raw) 1437eba819a2Ssthen c_hdl->tcp_do_toggle_rw = 0; 1438eba819a2Ssthen else c_hdl->tcp_do_toggle_rw = 1; 14392c144df0Ssthen 14402c144df0Ssthen if(c_hdl->type == comm_http) { 14412c144df0Ssthen #ifdef HAVE_NGHTTP2 14422c144df0Ssthen if(!c_hdl->h2_session || 14432c144df0Ssthen !http2_session_server_create(c_hdl->h2_session)) { 14442c144df0Ssthen log_warn("failed to create nghttp2"); 14452c144df0Ssthen return; 14462c144df0Ssthen } 14472c144df0Ssthen if(!c_hdl->h2_session || 14482c144df0Ssthen !http2_submit_settings(c_hdl->h2_session)) { 14492c144df0Ssthen log_warn("failed to submit http2 settings"); 14502c144df0Ssthen return; 14512c144df0Ssthen } 1452eba819a2Ssthen if(!c->ssl) { 1453eba819a2Ssthen c_hdl->tcp_do_toggle_rw = 0; 1454eba819a2Ssthen c_hdl->use_h2 = 1; 1455eba819a2Ssthen } 14562c144df0Ssthen #endif 14572c144df0Ssthen c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 14582c144df0Ssthen UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 14592c144df0Ssthen comm_point_http_handle_callback, c_hdl); 14602c144df0Ssthen } else { 14612c144df0Ssthen c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, 14622c144df0Ssthen UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, 14632c144df0Ssthen comm_point_tcp_handle_callback, c_hdl); 14642c144df0Ssthen } 1465550cf4a9Ssthen if(!c_hdl->ev->ev) { 1466550cf4a9Ssthen log_warn("could not ub_event_new, dropped tcp"); 1467550cf4a9Ssthen return; 1468550cf4a9Ssthen } 1469933707f3Ssthen log_assert(fd != -1); 147077079be7Ssthen (void)fd; 147145872187Ssthen new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.remote_addr, 147245872187Ssthen &c_hdl->repinfo.remote_addrlen); 1473933707f3Ssthen if(new_fd == -1) 1474933707f3Ssthen return; 147545872187Ssthen /* Copy remote_address to client_address. 147645872187Ssthen * Simplest way/time for streams to do that. */ 147745872187Ssthen c_hdl->repinfo.client_addrlen = c_hdl->repinfo.remote_addrlen; 147845872187Ssthen memmove(&c_hdl->repinfo.client_addr, 147945872187Ssthen &c_hdl->repinfo.remote_addr, 148045872187Ssthen c_hdl->repinfo.remote_addrlen); 1481933707f3Ssthen if(c->ssl) { 1482933707f3Ssthen c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd); 1483933707f3Ssthen if(!c_hdl->ssl) { 1484933707f3Ssthen c_hdl->fd = new_fd; 1485933707f3Ssthen comm_point_close(c_hdl); 1486933707f3Ssthen return; 1487933707f3Ssthen } 1488933707f3Ssthen c_hdl->ssl_shake_state = comm_ssl_shake_read; 1489933707f3Ssthen #ifdef USE_WINSOCK 1490933707f3Ssthen comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl); 1491933707f3Ssthen #endif 1492933707f3Ssthen } 1493933707f3Ssthen 1494933707f3Ssthen /* grab the tcp handler buffers */ 1495a58bff56Ssthen c->cur_tcp_count++; 1496933707f3Ssthen c->tcp_free = c_hdl->tcp_free; 1497e21c60efSsthen c_hdl->tcp_free = NULL; 1498933707f3Ssthen if(!c->tcp_free) { 1499933707f3Ssthen /* stop accepting incoming queries for now. */ 1500933707f3Ssthen comm_point_stop_listening(c); 1501933707f3Ssthen } 150277079be7Ssthen setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count); 1503933707f3Ssthen } 1504933707f3Ssthen 1505933707f3Ssthen /** Make tcp handler free for next assignment */ 1506933707f3Ssthen static void 1507933707f3Ssthen reclaim_tcp_handler(struct comm_point* c) 1508933707f3Ssthen { 1509933707f3Ssthen log_assert(c->type == comm_tcp); 1510933707f3Ssthen if(c->ssl) { 1511cebdf579Ssthen #ifdef HAVE_SSL 1512933707f3Ssthen SSL_shutdown(c->ssl); 1513933707f3Ssthen SSL_free(c->ssl); 1514933707f3Ssthen c->ssl = NULL; 1515cebdf579Ssthen #endif 1516933707f3Ssthen } 1517933707f3Ssthen comm_point_close(c); 1518933707f3Ssthen if(c->tcp_parent) { 1519e21c60efSsthen if(c != c->tcp_parent->tcp_free) { 1520a58bff56Ssthen c->tcp_parent->cur_tcp_count--; 1521933707f3Ssthen c->tcp_free = c->tcp_parent->tcp_free; 1522933707f3Ssthen c->tcp_parent->tcp_free = c; 1523e21c60efSsthen } 1524933707f3Ssthen if(!c->tcp_free) { 1525933707f3Ssthen /* re-enable listening on accept socket */ 1526933707f3Ssthen comm_point_start_listening(c->tcp_parent, -1, -1); 1527933707f3Ssthen } 1528933707f3Ssthen } 1529eba819a2Ssthen c->tcp_more_read_again = NULL; 1530eba819a2Ssthen c->tcp_more_write_again = NULL; 1531e21c60efSsthen c->tcp_byte_count = 0; 153245872187Ssthen c->pp2_header_state = pp2_header_none; 1533e21c60efSsthen sldns_buffer_clear(c->buffer); 1534933707f3Ssthen } 1535933707f3Ssthen 1536933707f3Ssthen /** do the callback when writing is done */ 1537933707f3Ssthen static void 1538933707f3Ssthen tcp_callback_writer(struct comm_point* c) 1539933707f3Ssthen { 1540933707f3Ssthen log_assert(c->type == comm_tcp); 1541eba819a2Ssthen if(!c->tcp_write_and_read) { 15420b68ff31Ssthen sldns_buffer_clear(c->buffer); 1543eba819a2Ssthen c->tcp_byte_count = 0; 1544eba819a2Ssthen } 1545933707f3Ssthen if(c->tcp_do_toggle_rw) 1546933707f3Ssthen c->tcp_is_reading = 1; 1547933707f3Ssthen /* switch from listening(write) to listening(read) */ 1548f6b99bafSsthen if(c->tcp_req_info) { 1549f6b99bafSsthen tcp_req_info_handle_writedone(c->tcp_req_info); 1550f6b99bafSsthen } else { 1551c3b38330Ssthen comm_point_stop_listening(c); 1552eba819a2Ssthen if(c->tcp_write_and_read) { 1553eba819a2Ssthen fptr_ok(fptr_whitelist_comm_point(c->callback)); 1554eba819a2Ssthen if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN, 1555eba819a2Ssthen &c->repinfo) ) { 1556eba819a2Ssthen comm_point_start_listening(c, -1, 15579982a05dSsthen adjusted_tcp_timeout(c)); 1558eba819a2Ssthen } 1559eba819a2Ssthen } else { 15609982a05dSsthen comm_point_start_listening(c, -1, 15619982a05dSsthen adjusted_tcp_timeout(c)); 1562933707f3Ssthen } 1563f6b99bafSsthen } 1564eba819a2Ssthen } 1565933707f3Ssthen 1566933707f3Ssthen /** do the callback when reading is done */ 1567933707f3Ssthen static void 1568933707f3Ssthen tcp_callback_reader(struct comm_point* c) 1569933707f3Ssthen { 1570933707f3Ssthen log_assert(c->type == comm_tcp || c->type == comm_local); 15710b68ff31Ssthen sldns_buffer_flip(c->buffer); 1572933707f3Ssthen if(c->tcp_do_toggle_rw) 1573933707f3Ssthen c->tcp_is_reading = 0; 1574933707f3Ssthen c->tcp_byte_count = 0; 1575f6b99bafSsthen if(c->tcp_req_info) { 1576f6b99bafSsthen tcp_req_info_handle_readdone(c->tcp_req_info); 1577f6b99bafSsthen } else { 1578c3b38330Ssthen if(c->type == comm_tcp) 1579c3b38330Ssthen comm_point_stop_listening(c); 1580933707f3Ssthen fptr_ok(fptr_whitelist_comm_point(c->callback)); 1581933707f3Ssthen if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 15829982a05dSsthen comm_point_start_listening(c, -1, 15839982a05dSsthen adjusted_tcp_timeout(c)); 1584933707f3Ssthen } 1585933707f3Ssthen } 1586f6b99bafSsthen } 1587933707f3Ssthen 1588938a3a5eSflorian #ifdef HAVE_SSL 1589ebf5bb73Ssthen /** true if the ssl handshake error has to be squelched from the logs */ 1590a3167c07Ssthen int 1591ebf5bb73Ssthen squelch_err_ssl_handshake(unsigned long err) 1592ebf5bb73Ssthen { 1593ebf5bb73Ssthen if(verbosity >= VERB_QUERY) 1594ebf5bb73Ssthen return 0; /* only squelch on low verbosity */ 15950bdb4f62Ssthen if(ERR_GET_LIB(err) == ERR_LIB_SSL && 15960bdb4f62Ssthen (ERR_GET_REASON(err) == SSL_R_HTTPS_PROXY_REQUEST || 15970bdb4f62Ssthen ERR_GET_REASON(err) == SSL_R_HTTP_REQUEST || 15980bdb4f62Ssthen ERR_GET_REASON(err) == SSL_R_WRONG_VERSION_NUMBER || 15990bdb4f62Ssthen ERR_GET_REASON(err) == SSL_R_SSLV3_ALERT_BAD_CERTIFICATE 1600ebf5bb73Ssthen #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 16010bdb4f62Ssthen || ERR_GET_REASON(err) == SSL_R_NO_SHARED_CIPHER 1602ebf5bb73Ssthen #endif 1603ebf5bb73Ssthen #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 16040bdb4f62Ssthen || ERR_GET_REASON(err) == SSL_R_UNKNOWN_PROTOCOL 16050bdb4f62Ssthen || ERR_GET_REASON(err) == SSL_R_UNSUPPORTED_PROTOCOL 1606ebf5bb73Ssthen # ifdef SSL_R_VERSION_TOO_LOW 16070bdb4f62Ssthen || ERR_GET_REASON(err) == SSL_R_VERSION_TOO_LOW 1608ebf5bb73Ssthen # endif 1609ebf5bb73Ssthen #endif 16100bdb4f62Ssthen )) 1611ebf5bb73Ssthen return 1; 1612ebf5bb73Ssthen return 0; 1613ebf5bb73Ssthen } 1614ebf5bb73Ssthen #endif /* HAVE_SSL */ 1615ebf5bb73Ssthen 1616933707f3Ssthen /** continue ssl handshake */ 1617cebdf579Ssthen #ifdef HAVE_SSL 1618933707f3Ssthen static int 1619933707f3Ssthen ssl_handshake(struct comm_point* c) 1620933707f3Ssthen { 1621933707f3Ssthen int r; 1622933707f3Ssthen if(c->ssl_shake_state == comm_ssl_shake_hs_read) { 1623933707f3Ssthen /* read condition satisfied back to writing */ 1624191f22c6Ssthen comm_point_listen_for_rw(c, 0, 1); 1625933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_none; 1626933707f3Ssthen return 1; 1627933707f3Ssthen } 1628933707f3Ssthen if(c->ssl_shake_state == comm_ssl_shake_hs_write) { 1629933707f3Ssthen /* write condition satisfied, back to reading */ 1630933707f3Ssthen comm_point_listen_for_rw(c, 1, 0); 1631933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_none; 1632933707f3Ssthen return 1; 1633933707f3Ssthen } 1634933707f3Ssthen 1635933707f3Ssthen ERR_clear_error(); 1636933707f3Ssthen r = SSL_do_handshake(c->ssl); 1637933707f3Ssthen if(r != 1) { 1638933707f3Ssthen int want = SSL_get_error(c->ssl, r); 1639933707f3Ssthen if(want == SSL_ERROR_WANT_READ) { 1640933707f3Ssthen if(c->ssl_shake_state == comm_ssl_shake_read) 1641933707f3Ssthen return 1; 1642933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_read; 1643933707f3Ssthen comm_point_listen_for_rw(c, 1, 0); 1644933707f3Ssthen return 1; 1645933707f3Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 1646933707f3Ssthen if(c->ssl_shake_state == comm_ssl_shake_write) 1647933707f3Ssthen return 1; 1648933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_write; 1649933707f3Ssthen comm_point_listen_for_rw(c, 0, 1); 1650933707f3Ssthen return 1; 1651933707f3Ssthen } else if(r == 0) { 1652933707f3Ssthen return 0; /* closed */ 1653933707f3Ssthen } else if(want == SSL_ERROR_SYSCALL) { 1654933707f3Ssthen /* SYSCALL and errno==0 means closed uncleanly */ 1655eaf2578eSsthen #ifdef EPIPE 1656eaf2578eSsthen if(errno == EPIPE && verbosity < 2) 1657eaf2578eSsthen return 0; /* silence 'broken pipe' */ 1658eaf2578eSsthen #endif 1659eaf2578eSsthen #ifdef ECONNRESET 1660eaf2578eSsthen if(errno == ECONNRESET && verbosity < 2) 1661eaf2578eSsthen return 0; /* silence reset by peer */ 1662eaf2578eSsthen #endif 16630bdb4f62Ssthen if(!tcp_connect_errno_needs_log( 166445872187Ssthen (struct sockaddr*)&c->repinfo.remote_addr, 166545872187Ssthen c->repinfo.remote_addrlen)) 16660bdb4f62Ssthen return 0; /* silence connect failures that 16670bdb4f62Ssthen show up because after connect this is the 16680bdb4f62Ssthen first system call that accesses the socket */ 1669933707f3Ssthen if(errno != 0) 1670933707f3Ssthen log_err("SSL_handshake syscall: %s", 1671933707f3Ssthen strerror(errno)); 1672933707f3Ssthen return 0; 1673933707f3Ssthen } else { 1674ebf5bb73Ssthen unsigned long err = ERR_get_error(); 1675ebf5bb73Ssthen if(!squelch_err_ssl_handshake(err)) { 1676f46c52bfSsthen long vr; 1677d896b962Ssthen log_crypto_err_io_code("ssl handshake failed", 1678d896b962Ssthen want, err); 1679f46c52bfSsthen if((vr=SSL_get_verify_result(c->ssl)) != 0) 1680f46c52bfSsthen log_err("ssl handshake cert error: %s", 1681f46c52bfSsthen X509_verify_cert_error_string( 1682f46c52bfSsthen vr)); 168345872187Ssthen log_addr(VERB_OPS, "ssl handshake failed", 168445872187Ssthen &c->repinfo.remote_addr, 168545872187Ssthen c->repinfo.remote_addrlen); 1686ebf5bb73Ssthen } 1687933707f3Ssthen return 0; 1688933707f3Ssthen } 1689933707f3Ssthen } 1690933707f3Ssthen /* this is where peer verification could take place */ 1691938a3a5eSflorian if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) { 1692938a3a5eSflorian /* verification */ 1693938a3a5eSflorian if(SSL_get_verify_result(c->ssl) == X509_V_OK) { 1694191f22c6Ssthen #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1695191f22c6Ssthen X509* x = SSL_get1_peer_certificate(c->ssl); 1696191f22c6Ssthen #else 1697938a3a5eSflorian X509* x = SSL_get_peer_certificate(c->ssl); 1698191f22c6Ssthen #endif 1699938a3a5eSflorian if(!x) { 1700938a3a5eSflorian log_addr(VERB_ALGO, "SSL connection failed: " 1701938a3a5eSflorian "no certificate", 170245872187Ssthen &c->repinfo.remote_addr, 170345872187Ssthen c->repinfo.remote_addrlen); 1704938a3a5eSflorian return 0; 1705938a3a5eSflorian } 1706938a3a5eSflorian log_cert(VERB_ALGO, "peer certificate", x); 1707938a3a5eSflorian #ifdef HAVE_SSL_GET0_PEERNAME 1708938a3a5eSflorian if(SSL_get0_peername(c->ssl)) { 1709938a3a5eSflorian char buf[255]; 1710938a3a5eSflorian snprintf(buf, sizeof(buf), "SSL connection " 1711938a3a5eSflorian "to %s authenticated", 1712938a3a5eSflorian SSL_get0_peername(c->ssl)); 171345872187Ssthen log_addr(VERB_ALGO, buf, &c->repinfo.remote_addr, 171445872187Ssthen c->repinfo.remote_addrlen); 1715938a3a5eSflorian } else { 1716938a3a5eSflorian #endif 1717938a3a5eSflorian log_addr(VERB_ALGO, "SSL connection " 171845872187Ssthen "authenticated", &c->repinfo.remote_addr, 171945872187Ssthen c->repinfo.remote_addrlen); 1720938a3a5eSflorian #ifdef HAVE_SSL_GET0_PEERNAME 1721938a3a5eSflorian } 1722938a3a5eSflorian #endif 1723938a3a5eSflorian X509_free(x); 1724938a3a5eSflorian } else { 1725191f22c6Ssthen #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE 1726191f22c6Ssthen X509* x = SSL_get1_peer_certificate(c->ssl); 1727191f22c6Ssthen #else 1728938a3a5eSflorian X509* x = SSL_get_peer_certificate(c->ssl); 1729191f22c6Ssthen #endif 1730938a3a5eSflorian if(x) { 1731938a3a5eSflorian log_cert(VERB_ALGO, "peer certificate", x); 1732938a3a5eSflorian X509_free(x); 1733938a3a5eSflorian } 1734938a3a5eSflorian log_addr(VERB_ALGO, "SSL connection failed: " 1735938a3a5eSflorian "failed to authenticate", 173645872187Ssthen &c->repinfo.remote_addr, 173745872187Ssthen c->repinfo.remote_addrlen); 1738938a3a5eSflorian return 0; 1739938a3a5eSflorian } 1740938a3a5eSflorian } else { 1741938a3a5eSflorian /* unauthenticated, the verify peer flag was not set 1742938a3a5eSflorian * in c->ssl when the ssl object was created from ssl_ctx */ 174345872187Ssthen log_addr(VERB_ALGO, "SSL connection", &c->repinfo.remote_addr, 174445872187Ssthen c->repinfo.remote_addrlen); 1745938a3a5eSflorian } 1746933707f3Ssthen 1747191f22c6Ssthen #ifdef HAVE_SSL_GET0_ALPN_SELECTED 17482c144df0Ssthen /* check if http2 use is negotiated */ 17492c144df0Ssthen if(c->type == comm_http && c->h2_session) { 17502c144df0Ssthen const unsigned char *alpn; 17512c144df0Ssthen unsigned int alpnlen = 0; 17522c144df0Ssthen SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen); 17532c144df0Ssthen if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) { 17542c144df0Ssthen /* connection upgraded to HTTP2 */ 17552c144df0Ssthen c->tcp_do_toggle_rw = 0; 17562c144df0Ssthen c->use_h2 = 1; 1757f46c52bfSsthen } else { 1758f46c52bfSsthen verbose(VERB_ALGO, "client doesn't support HTTP/2"); 1759f46c52bfSsthen return 0; 17602c144df0Ssthen } 17612c144df0Ssthen } 1762191f22c6Ssthen #endif 17632c144df0Ssthen 1764933707f3Ssthen /* setup listen rw correctly */ 1765933707f3Ssthen if(c->tcp_is_reading) { 1766933707f3Ssthen if(c->ssl_shake_state != comm_ssl_shake_read) 1767933707f3Ssthen comm_point_listen_for_rw(c, 1, 0); 1768933707f3Ssthen } else { 1769191f22c6Ssthen comm_point_listen_for_rw(c, 0, 1); 1770933707f3Ssthen } 1771933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_none; 1772933707f3Ssthen return 1; 1773933707f3Ssthen } 1774cebdf579Ssthen #endif /* HAVE_SSL */ 1775933707f3Ssthen 1776933707f3Ssthen /** ssl read callback on TCP */ 1777933707f3Ssthen static int 1778933707f3Ssthen ssl_handle_read(struct comm_point* c) 1779933707f3Ssthen { 1780cebdf579Ssthen #ifdef HAVE_SSL 1781933707f3Ssthen int r; 1782933707f3Ssthen if(c->ssl_shake_state != comm_ssl_shake_none) { 1783933707f3Ssthen if(!ssl_handshake(c)) 1784933707f3Ssthen return 0; 1785933707f3Ssthen if(c->ssl_shake_state != comm_ssl_shake_none) 1786933707f3Ssthen return 1; 1787933707f3Ssthen } 178845872187Ssthen if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 178945872187Ssthen struct pp2_header* header = NULL; 179045872187Ssthen size_t want_read_size = 0; 179145872187Ssthen size_t current_read_size = 0; 179245872187Ssthen if(c->pp2_header_state == pp2_header_none) { 179345872187Ssthen want_read_size = PP2_HEADER_SIZE; 179445872187Ssthen if(sldns_buffer_remaining(c->buffer)<want_read_size) { 179545872187Ssthen log_err_addr("proxy_protocol: not enough " 179645872187Ssthen "buffer size to read PROXYv2 header", "", 179745872187Ssthen &c->repinfo.remote_addr, 179845872187Ssthen c->repinfo.remote_addrlen); 179945872187Ssthen return 0; 180045872187Ssthen } 180145872187Ssthen verbose(VERB_ALGO, "proxy_protocol: reading fixed " 180245872187Ssthen "part of PROXYv2 header (len %lu)", 180345872187Ssthen (unsigned long)want_read_size); 180445872187Ssthen current_read_size = want_read_size; 180545872187Ssthen if(c->tcp_byte_count < current_read_size) { 180645872187Ssthen ERR_clear_error(); 180745872187Ssthen if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 180845872187Ssthen c->buffer, c->tcp_byte_count), 180945872187Ssthen current_read_size - 181045872187Ssthen c->tcp_byte_count)) <= 0) { 181145872187Ssthen int want = SSL_get_error(c->ssl, r); 181245872187Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 181345872187Ssthen if(c->tcp_req_info) 181445872187Ssthen return tcp_req_info_handle_read_close(c->tcp_req_info); 181545872187Ssthen return 0; /* shutdown, closed */ 181645872187Ssthen } else if(want == SSL_ERROR_WANT_READ) { 181745872187Ssthen #ifdef USE_WINSOCK 181845872187Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 181945872187Ssthen #endif 182045872187Ssthen return 1; /* read more later */ 182145872187Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 182245872187Ssthen c->ssl_shake_state = comm_ssl_shake_hs_write; 182345872187Ssthen comm_point_listen_for_rw(c, 0, 1); 182445872187Ssthen return 1; 182545872187Ssthen } else if(want == SSL_ERROR_SYSCALL) { 182645872187Ssthen #ifdef ECONNRESET 182745872187Ssthen if(errno == ECONNRESET && verbosity < 2) 182845872187Ssthen return 0; /* silence reset by peer */ 182945872187Ssthen #endif 183045872187Ssthen if(errno != 0) 183145872187Ssthen log_err("SSL_read syscall: %s", 183245872187Ssthen strerror(errno)); 183345872187Ssthen return 0; 183445872187Ssthen } 1835d896b962Ssthen log_crypto_err_io("could not SSL_read", 1836d896b962Ssthen want); 183745872187Ssthen return 0; 183845872187Ssthen } 183945872187Ssthen c->tcp_byte_count += r; 1840d896b962Ssthen sldns_buffer_skip(c->buffer, r); 184145872187Ssthen if(c->tcp_byte_count != current_read_size) return 1; 184245872187Ssthen c->pp2_header_state = pp2_header_init; 184345872187Ssthen } 184445872187Ssthen } 184545872187Ssthen if(c->pp2_header_state == pp2_header_init) { 1846d896b962Ssthen int err; 1847d896b962Ssthen err = pp2_read_header( 1848d896b962Ssthen sldns_buffer_begin(c->buffer), 1849d896b962Ssthen sldns_buffer_limit(c->buffer)); 1850d896b962Ssthen if(err) { 185145872187Ssthen log_err("proxy_protocol: could not parse " 1852d896b962Ssthen "PROXYv2 header (%s)", 1853d896b962Ssthen pp_lookup_error(err)); 185445872187Ssthen return 0; 185545872187Ssthen } 1856d896b962Ssthen header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 185745872187Ssthen want_read_size = ntohs(header->len); 1858d896b962Ssthen if(sldns_buffer_limit(c->buffer) < 185945872187Ssthen PP2_HEADER_SIZE + want_read_size) { 186045872187Ssthen log_err_addr("proxy_protocol: not enough " 186145872187Ssthen "buffer size to read PROXYv2 header", "", 186245872187Ssthen &c->repinfo.remote_addr, 186345872187Ssthen c->repinfo.remote_addrlen); 186445872187Ssthen return 0; 186545872187Ssthen } 186645872187Ssthen verbose(VERB_ALGO, "proxy_protocol: reading variable " 186745872187Ssthen "part of PROXYv2 header (len %lu)", 186845872187Ssthen (unsigned long)want_read_size); 186945872187Ssthen current_read_size = PP2_HEADER_SIZE + want_read_size; 187045872187Ssthen if(want_read_size == 0) { 187145872187Ssthen /* nothing more to read; header is complete */ 187245872187Ssthen c->pp2_header_state = pp2_header_done; 187345872187Ssthen } else if(c->tcp_byte_count < current_read_size) { 187445872187Ssthen ERR_clear_error(); 187545872187Ssthen if((r=SSL_read(c->ssl, (void*)sldns_buffer_at( 187645872187Ssthen c->buffer, c->tcp_byte_count), 187745872187Ssthen current_read_size - 187845872187Ssthen c->tcp_byte_count)) <= 0) { 187945872187Ssthen int want = SSL_get_error(c->ssl, r); 188045872187Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 188145872187Ssthen if(c->tcp_req_info) 188245872187Ssthen return tcp_req_info_handle_read_close(c->tcp_req_info); 188345872187Ssthen return 0; /* shutdown, closed */ 188445872187Ssthen } else if(want == SSL_ERROR_WANT_READ) { 188545872187Ssthen #ifdef USE_WINSOCK 188645872187Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 188745872187Ssthen #endif 188845872187Ssthen return 1; /* read more later */ 188945872187Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 189045872187Ssthen c->ssl_shake_state = comm_ssl_shake_hs_write; 189145872187Ssthen comm_point_listen_for_rw(c, 0, 1); 189245872187Ssthen return 1; 189345872187Ssthen } else if(want == SSL_ERROR_SYSCALL) { 189445872187Ssthen #ifdef ECONNRESET 189545872187Ssthen if(errno == ECONNRESET && verbosity < 2) 189645872187Ssthen return 0; /* silence reset by peer */ 189745872187Ssthen #endif 189845872187Ssthen if(errno != 0) 189945872187Ssthen log_err("SSL_read syscall: %s", 190045872187Ssthen strerror(errno)); 190145872187Ssthen return 0; 190245872187Ssthen } 1903d896b962Ssthen log_crypto_err_io("could not SSL_read", 1904d896b962Ssthen want); 190545872187Ssthen return 0; 190645872187Ssthen } 190745872187Ssthen c->tcp_byte_count += r; 1908d896b962Ssthen sldns_buffer_skip(c->buffer, r); 190945872187Ssthen if(c->tcp_byte_count != current_read_size) return 1; 191045872187Ssthen c->pp2_header_state = pp2_header_done; 191145872187Ssthen } 191245872187Ssthen } 191345872187Ssthen if(c->pp2_header_state != pp2_header_done || !header) { 191445872187Ssthen log_err_addr("proxy_protocol: wrong state for the " 191545872187Ssthen "PROXYv2 header", "", &c->repinfo.remote_addr, 191645872187Ssthen c->repinfo.remote_addrlen); 191745872187Ssthen return 0; 191845872187Ssthen } 1919d896b962Ssthen sldns_buffer_flip(c->buffer); 192045872187Ssthen if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 192145872187Ssthen log_err_addr("proxy_protocol: could not consume " 192245872187Ssthen "PROXYv2 header", "", &c->repinfo.remote_addr, 192345872187Ssthen c->repinfo.remote_addrlen); 192445872187Ssthen return 0; 192545872187Ssthen } 192645872187Ssthen verbose(VERB_ALGO, "proxy_protocol: successful read of " 192745872187Ssthen "PROXYv2 header"); 192845872187Ssthen /* Clear and reset the buffer to read the following 192945872187Ssthen * DNS packet(s). */ 193045872187Ssthen sldns_buffer_clear(c->buffer); 193145872187Ssthen c->tcp_byte_count = 0; 193245872187Ssthen return 1; 193345872187Ssthen } 1934933707f3Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) { 1935933707f3Ssthen /* read length bytes */ 1936933707f3Ssthen ERR_clear_error(); 19370b68ff31Ssthen if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer, 1938933707f3Ssthen c->tcp_byte_count), (int)(sizeof(uint16_t) - 1939933707f3Ssthen c->tcp_byte_count))) <= 0) { 1940933707f3Ssthen int want = SSL_get_error(c->ssl, r); 1941933707f3Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 1942f6b99bafSsthen if(c->tcp_req_info) 1943f6b99bafSsthen return tcp_req_info_handle_read_close(c->tcp_req_info); 1944933707f3Ssthen return 0; /* shutdown, closed */ 1945933707f3Ssthen } else if(want == SSL_ERROR_WANT_READ) { 1946191f22c6Ssthen #ifdef USE_WINSOCK 194720237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1948191f22c6Ssthen #endif 1949933707f3Ssthen return 1; /* read more later */ 1950933707f3Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 1951933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_hs_write; 1952933707f3Ssthen comm_point_listen_for_rw(c, 0, 1); 1953933707f3Ssthen return 1; 1954933707f3Ssthen } else if(want == SSL_ERROR_SYSCALL) { 1955550cf4a9Ssthen #ifdef ECONNRESET 1956550cf4a9Ssthen if(errno == ECONNRESET && verbosity < 2) 1957550cf4a9Ssthen return 0; /* silence reset by peer */ 1958550cf4a9Ssthen #endif 1959933707f3Ssthen if(errno != 0) 1960933707f3Ssthen log_err("SSL_read syscall: %s", 1961933707f3Ssthen strerror(errno)); 1962933707f3Ssthen return 0; 1963933707f3Ssthen } 1964d896b962Ssthen log_crypto_err_io("could not SSL_read", want); 1965933707f3Ssthen return 0; 1966933707f3Ssthen } 1967933707f3Ssthen c->tcp_byte_count += r; 196820237c55Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) 1969933707f3Ssthen return 1; 19700b68ff31Ssthen if(sldns_buffer_read_u16_at(c->buffer, 0) > 19710b68ff31Ssthen sldns_buffer_capacity(c->buffer)) { 1972933707f3Ssthen verbose(VERB_QUERY, "ssl: dropped larger than buffer"); 1973933707f3Ssthen return 0; 1974933707f3Ssthen } 19750b68ff31Ssthen sldns_buffer_set_limit(c->buffer, 19760b68ff31Ssthen sldns_buffer_read_u16_at(c->buffer, 0)); 19770b68ff31Ssthen if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 1978933707f3Ssthen verbose(VERB_QUERY, "ssl: dropped bogus too short."); 1979933707f3Ssthen return 0; 1980933707f3Ssthen } 198120237c55Ssthen sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t))); 1982933707f3Ssthen verbose(VERB_ALGO, "Reading ssl tcp query of length %d", 19830b68ff31Ssthen (int)sldns_buffer_limit(c->buffer)); 1984933707f3Ssthen } 198520237c55Ssthen if(sldns_buffer_remaining(c->buffer) > 0) { 1986933707f3Ssthen ERR_clear_error(); 19870b68ff31Ssthen r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 19880b68ff31Ssthen (int)sldns_buffer_remaining(c->buffer)); 1989933707f3Ssthen if(r <= 0) { 1990933707f3Ssthen int want = SSL_get_error(c->ssl, r); 1991933707f3Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 1992f6b99bafSsthen if(c->tcp_req_info) 1993f6b99bafSsthen return tcp_req_info_handle_read_close(c->tcp_req_info); 1994933707f3Ssthen return 0; /* shutdown, closed */ 1995933707f3Ssthen } else if(want == SSL_ERROR_WANT_READ) { 1996191f22c6Ssthen #ifdef USE_WINSOCK 199720237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 1998191f22c6Ssthen #endif 1999933707f3Ssthen return 1; /* read more later */ 2000933707f3Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 2001933707f3Ssthen c->ssl_shake_state = comm_ssl_shake_hs_write; 2002933707f3Ssthen comm_point_listen_for_rw(c, 0, 1); 2003933707f3Ssthen return 1; 2004933707f3Ssthen } else if(want == SSL_ERROR_SYSCALL) { 2005550cf4a9Ssthen #ifdef ECONNRESET 2006550cf4a9Ssthen if(errno == ECONNRESET && verbosity < 2) 2007550cf4a9Ssthen return 0; /* silence reset by peer */ 2008550cf4a9Ssthen #endif 2009933707f3Ssthen if(errno != 0) 2010933707f3Ssthen log_err("SSL_read syscall: %s", 2011933707f3Ssthen strerror(errno)); 2012933707f3Ssthen return 0; 2013933707f3Ssthen } 2014d896b962Ssthen log_crypto_err_io("could not SSL_read", want); 2015933707f3Ssthen return 0; 2016933707f3Ssthen } 20170b68ff31Ssthen sldns_buffer_skip(c->buffer, (ssize_t)r); 201820237c55Ssthen } 20190b68ff31Ssthen if(sldns_buffer_remaining(c->buffer) <= 0) { 2020933707f3Ssthen tcp_callback_reader(c); 2021933707f3Ssthen } 2022933707f3Ssthen return 1; 2023cebdf579Ssthen #else 2024cebdf579Ssthen (void)c; 2025cebdf579Ssthen return 0; 2026cebdf579Ssthen #endif /* HAVE_SSL */ 2027933707f3Ssthen } 2028933707f3Ssthen 2029933707f3Ssthen /** ssl write callback on TCP */ 2030933707f3Ssthen static int 2031933707f3Ssthen ssl_handle_write(struct comm_point* c) 2032933707f3Ssthen { 2033cebdf579Ssthen #ifdef HAVE_SSL 2034933707f3Ssthen int r; 2035933707f3Ssthen if(c->ssl_shake_state != comm_ssl_shake_none) { 2036933707f3Ssthen if(!ssl_handshake(c)) 2037933707f3Ssthen return 0; 2038933707f3Ssthen if(c->ssl_shake_state != comm_ssl_shake_none) 2039933707f3Ssthen return 1; 2040933707f3Ssthen } 2041933707f3Ssthen /* ignore return, if fails we may simply block */ 2042ebf5bb73Ssthen (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE); 2043eba819a2Ssthen if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2044eba819a2Ssthen uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer)); 2045933707f3Ssthen ERR_clear_error(); 2046eba819a2Ssthen if(c->tcp_write_and_read) { 2047eba819a2Ssthen if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) { 2048eba819a2Ssthen /* combine the tcp length and the query for 2049eba819a2Ssthen * write, this emulates writev */ 2050eba819a2Ssthen uint8_t buf[LDNS_RR_BUF_SIZE]; 2051eba819a2Ssthen memmove(buf, &len, sizeof(uint16_t)); 2052eba819a2Ssthen memmove(buf+sizeof(uint16_t), 2053eba819a2Ssthen c->tcp_write_pkt, 2054eba819a2Ssthen c->tcp_write_pkt_len); 2055eba819a2Ssthen r = SSL_write(c->ssl, 2056eba819a2Ssthen (void*)(buf+c->tcp_write_byte_count), 2057eba819a2Ssthen c->tcp_write_pkt_len + 2 - 2058eba819a2Ssthen c->tcp_write_byte_count); 2059eba819a2Ssthen } else { 2060eba819a2Ssthen r = SSL_write(c->ssl, 2061eba819a2Ssthen (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2062eba819a2Ssthen (int)(sizeof(uint16_t)-c->tcp_write_byte_count)); 2063eba819a2Ssthen } 2064eba819a2Ssthen } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) < 206520237c55Ssthen LDNS_RR_BUF_SIZE) { 206620237c55Ssthen /* combine the tcp length and the query for write, 206720237c55Ssthen * this emulates writev */ 206820237c55Ssthen uint8_t buf[LDNS_RR_BUF_SIZE]; 206920237c55Ssthen memmove(buf, &len, sizeof(uint16_t)); 207020237c55Ssthen memmove(buf+sizeof(uint16_t), 207120237c55Ssthen sldns_buffer_current(c->buffer), 207220237c55Ssthen sldns_buffer_remaining(c->buffer)); 207320237c55Ssthen r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count), 207420237c55Ssthen (int)(sizeof(uint16_t)+ 207520237c55Ssthen sldns_buffer_remaining(c->buffer) 207620237c55Ssthen - c->tcp_byte_count)); 207720237c55Ssthen } else { 2078933707f3Ssthen r = SSL_write(c->ssl, 2079933707f3Ssthen (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2080933707f3Ssthen (int)(sizeof(uint16_t)-c->tcp_byte_count)); 208120237c55Ssthen } 2082933707f3Ssthen if(r <= 0) { 2083933707f3Ssthen int want = SSL_get_error(c->ssl, r); 2084933707f3Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 2085933707f3Ssthen return 0; /* closed */ 2086933707f3Ssthen } else if(want == SSL_ERROR_WANT_READ) { 2087550cf4a9Ssthen c->ssl_shake_state = comm_ssl_shake_hs_read; 2088933707f3Ssthen comm_point_listen_for_rw(c, 1, 0); 2089933707f3Ssthen return 1; /* wait for read condition */ 2090933707f3Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 2091191f22c6Ssthen #ifdef USE_WINSOCK 209220237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2093191f22c6Ssthen #endif 2094933707f3Ssthen return 1; /* write more later */ 2095933707f3Ssthen } else if(want == SSL_ERROR_SYSCALL) { 2096550cf4a9Ssthen #ifdef EPIPE 2097550cf4a9Ssthen if(errno == EPIPE && verbosity < 2) 2098550cf4a9Ssthen return 0; /* silence 'broken pipe' */ 2099550cf4a9Ssthen #endif 2100933707f3Ssthen if(errno != 0) 2101933707f3Ssthen log_err("SSL_write syscall: %s", 2102933707f3Ssthen strerror(errno)); 2103933707f3Ssthen return 0; 2104933707f3Ssthen } 2105d896b962Ssthen log_crypto_err_io("could not SSL_write", want); 2106933707f3Ssthen return 0; 2107933707f3Ssthen } 2108eba819a2Ssthen if(c->tcp_write_and_read) { 2109eba819a2Ssthen c->tcp_write_byte_count += r; 2110eba819a2Ssthen if(c->tcp_write_byte_count < sizeof(uint16_t)) 2111eba819a2Ssthen return 1; 2112eba819a2Ssthen } else { 2113933707f3Ssthen c->tcp_byte_count += r; 2114933707f3Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) 2115933707f3Ssthen return 1; 21160b68ff31Ssthen sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 2117933707f3Ssthen sizeof(uint16_t)); 2118eba819a2Ssthen } 2119eba819a2Ssthen if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2120933707f3Ssthen tcp_callback_writer(c); 2121933707f3Ssthen return 1; 2122933707f3Ssthen } 2123933707f3Ssthen } 2124eba819a2Ssthen log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0); 2125eba819a2Ssthen log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2126933707f3Ssthen ERR_clear_error(); 2127eba819a2Ssthen if(c->tcp_write_and_read) { 2128eba819a2Ssthen r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2129eba819a2Ssthen (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count)); 2130eba819a2Ssthen } else { 21310b68ff31Ssthen r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 21320b68ff31Ssthen (int)sldns_buffer_remaining(c->buffer)); 2133eba819a2Ssthen } 2134933707f3Ssthen if(r <= 0) { 2135933707f3Ssthen int want = SSL_get_error(c->ssl, r); 2136933707f3Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 2137933707f3Ssthen return 0; /* closed */ 2138933707f3Ssthen } else if(want == SSL_ERROR_WANT_READ) { 2139550cf4a9Ssthen c->ssl_shake_state = comm_ssl_shake_hs_read; 2140933707f3Ssthen comm_point_listen_for_rw(c, 1, 0); 2141933707f3Ssthen return 1; /* wait for read condition */ 2142933707f3Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 2143191f22c6Ssthen #ifdef USE_WINSOCK 214420237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2145191f22c6Ssthen #endif 2146933707f3Ssthen return 1; /* write more later */ 2147933707f3Ssthen } else if(want == SSL_ERROR_SYSCALL) { 2148550cf4a9Ssthen #ifdef EPIPE 2149550cf4a9Ssthen if(errno == EPIPE && verbosity < 2) 2150550cf4a9Ssthen return 0; /* silence 'broken pipe' */ 2151550cf4a9Ssthen #endif 2152933707f3Ssthen if(errno != 0) 2153933707f3Ssthen log_err("SSL_write syscall: %s", 2154933707f3Ssthen strerror(errno)); 2155933707f3Ssthen return 0; 2156933707f3Ssthen } 2157d896b962Ssthen log_crypto_err_io("could not SSL_write", want); 2158933707f3Ssthen return 0; 2159933707f3Ssthen } 2160eba819a2Ssthen if(c->tcp_write_and_read) { 2161eba819a2Ssthen c->tcp_write_byte_count += r; 2162eba819a2Ssthen } else { 21630b68ff31Ssthen sldns_buffer_skip(c->buffer, (ssize_t)r); 2164eba819a2Ssthen } 2165933707f3Ssthen 2166eba819a2Ssthen if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2167933707f3Ssthen tcp_callback_writer(c); 2168933707f3Ssthen } 2169933707f3Ssthen return 1; 2170cebdf579Ssthen #else 2171cebdf579Ssthen (void)c; 2172cebdf579Ssthen return 0; 2173cebdf579Ssthen #endif /* HAVE_SSL */ 2174933707f3Ssthen } 2175933707f3Ssthen 2176933707f3Ssthen /** handle ssl tcp connection with dns contents */ 2177933707f3Ssthen static int 2178eba819a2Ssthen ssl_handle_it(struct comm_point* c, int is_write) 2179933707f3Ssthen { 2180eba819a2Ssthen /* handle case where renegotiation wants read during write call 2181eba819a2Ssthen * or write during read calls */ 2182eba819a2Ssthen if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write) 2183eba819a2Ssthen return ssl_handle_read(c); 2184eba819a2Ssthen else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read) 2185eba819a2Ssthen return ssl_handle_write(c); 2186eba819a2Ssthen /* handle read events for read operation and write events for a 2187eba819a2Ssthen * write operation */ 2188eba819a2Ssthen else if(!is_write) 2189933707f3Ssthen return ssl_handle_read(c); 2190933707f3Ssthen return ssl_handle_write(c); 2191933707f3Ssthen } 2192933707f3Ssthen 219345872187Ssthen /** 219445872187Ssthen * Handle tcp reading callback. 2195933707f3Ssthen * @param fd: file descriptor of socket. 2196933707f3Ssthen * @param c: comm point to read from into buffer. 2197933707f3Ssthen * @param short_ok: if true, very short packets are OK (for comm_local). 2198933707f3Ssthen * @return: 0 on error 2199933707f3Ssthen */ 2200933707f3Ssthen static int 2201933707f3Ssthen comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok) 2202933707f3Ssthen { 2203933707f3Ssthen ssize_t r; 220445872187Ssthen int recv_initial = 0; 2205933707f3Ssthen log_assert(c->type == comm_tcp || c->type == comm_local); 2206933707f3Ssthen if(c->ssl) 2207eba819a2Ssthen return ssl_handle_it(c, 0); 2208eba819a2Ssthen if(!c->tcp_is_reading && !c->tcp_write_and_read) 2209933707f3Ssthen return 0; 2210933707f3Ssthen 2211933707f3Ssthen log_assert(fd != -1); 221245872187Ssthen if(c->pp2_enabled && c->pp2_header_state != pp2_header_done) { 221345872187Ssthen struct pp2_header* header = NULL; 221445872187Ssthen size_t want_read_size = 0; 221545872187Ssthen size_t current_read_size = 0; 221645872187Ssthen if(c->pp2_header_state == pp2_header_none) { 221745872187Ssthen want_read_size = PP2_HEADER_SIZE; 221845872187Ssthen if(sldns_buffer_remaining(c->buffer)<want_read_size) { 221945872187Ssthen log_err_addr("proxy_protocol: not enough " 222045872187Ssthen "buffer size to read PROXYv2 header", "", 222145872187Ssthen &c->repinfo.remote_addr, 222245872187Ssthen c->repinfo.remote_addrlen); 222345872187Ssthen return 0; 222445872187Ssthen } 222545872187Ssthen verbose(VERB_ALGO, "proxy_protocol: reading fixed " 222645872187Ssthen "part of PROXYv2 header (len %lu)", 222745872187Ssthen (unsigned long)want_read_size); 222845872187Ssthen current_read_size = want_read_size; 222945872187Ssthen if(c->tcp_byte_count < current_read_size) { 223045872187Ssthen r = recv(fd, (void*)sldns_buffer_at(c->buffer, 223145872187Ssthen c->tcp_byte_count), 223245872187Ssthen current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 2233f6b99bafSsthen if(r == 0) { 2234f6b99bafSsthen if(c->tcp_req_info) 2235f6b99bafSsthen return tcp_req_info_handle_read_close(c->tcp_req_info); 2236933707f3Ssthen return 0; 2237f6b99bafSsthen } else if(r == -1) { 223845872187Ssthen goto recv_error_initial; 223945872187Ssthen } 224045872187Ssthen c->tcp_byte_count += r; 2241d896b962Ssthen sldns_buffer_skip(c->buffer, r); 224245872187Ssthen if(c->tcp_byte_count != current_read_size) return 1; 224345872187Ssthen c->pp2_header_state = pp2_header_init; 224445872187Ssthen } 224545872187Ssthen } 224645872187Ssthen if(c->pp2_header_state == pp2_header_init) { 2247d896b962Ssthen int err; 2248d896b962Ssthen err = pp2_read_header( 2249d896b962Ssthen sldns_buffer_begin(c->buffer), 2250d896b962Ssthen sldns_buffer_limit(c->buffer)); 2251d896b962Ssthen if(err) { 225245872187Ssthen log_err("proxy_protocol: could not parse " 2253d896b962Ssthen "PROXYv2 header (%s)", 2254d896b962Ssthen pp_lookup_error(err)); 225545872187Ssthen return 0; 225645872187Ssthen } 2257d896b962Ssthen header = (struct pp2_header*)sldns_buffer_begin(c->buffer); 225845872187Ssthen want_read_size = ntohs(header->len); 2259d896b962Ssthen if(sldns_buffer_limit(c->buffer) < 226045872187Ssthen PP2_HEADER_SIZE + want_read_size) { 226145872187Ssthen log_err_addr("proxy_protocol: not enough " 226245872187Ssthen "buffer size to read PROXYv2 header", "", 226345872187Ssthen &c->repinfo.remote_addr, 226445872187Ssthen c->repinfo.remote_addrlen); 226545872187Ssthen return 0; 226645872187Ssthen } 226745872187Ssthen verbose(VERB_ALGO, "proxy_protocol: reading variable " 226845872187Ssthen "part of PROXYv2 header (len %lu)", 226945872187Ssthen (unsigned long)want_read_size); 227045872187Ssthen current_read_size = PP2_HEADER_SIZE + want_read_size; 227145872187Ssthen if(want_read_size == 0) { 227245872187Ssthen /* nothing more to read; header is complete */ 227345872187Ssthen c->pp2_header_state = pp2_header_done; 227445872187Ssthen } else if(c->tcp_byte_count < current_read_size) { 227545872187Ssthen r = recv(fd, (void*)sldns_buffer_at(c->buffer, 227645872187Ssthen c->tcp_byte_count), 227745872187Ssthen current_read_size-c->tcp_byte_count, MSG_DONTWAIT); 227845872187Ssthen if(r == 0) { 227945872187Ssthen if(c->tcp_req_info) 228045872187Ssthen return tcp_req_info_handle_read_close(c->tcp_req_info); 228145872187Ssthen return 0; 228245872187Ssthen } else if(r == -1) { 228345872187Ssthen goto recv_error; 228445872187Ssthen } 228545872187Ssthen c->tcp_byte_count += r; 2286d896b962Ssthen sldns_buffer_skip(c->buffer, r); 228745872187Ssthen if(c->tcp_byte_count != current_read_size) return 1; 228845872187Ssthen c->pp2_header_state = pp2_header_done; 228945872187Ssthen } 229045872187Ssthen } 229145872187Ssthen if(c->pp2_header_state != pp2_header_done || !header) { 229245872187Ssthen log_err_addr("proxy_protocol: wrong state for the " 229345872187Ssthen "PROXYv2 header", "", &c->repinfo.remote_addr, 229445872187Ssthen c->repinfo.remote_addrlen); 229545872187Ssthen return 0; 229645872187Ssthen } 2297d896b962Ssthen sldns_buffer_flip(c->buffer); 229845872187Ssthen if(!consume_pp2_header(c->buffer, &c->repinfo, 1)) { 229945872187Ssthen log_err_addr("proxy_protocol: could not consume " 230045872187Ssthen "PROXYv2 header", "", &c->repinfo.remote_addr, 230145872187Ssthen c->repinfo.remote_addrlen); 230245872187Ssthen return 0; 230345872187Ssthen } 230445872187Ssthen verbose(VERB_ALGO, "proxy_protocol: successful read of " 230545872187Ssthen "PROXYv2 header"); 230645872187Ssthen /* Clear and reset the buffer to read the following 230745872187Ssthen * DNS packet(s). */ 230845872187Ssthen sldns_buffer_clear(c->buffer); 230945872187Ssthen c->tcp_byte_count = 0; 231045872187Ssthen return 1; 231145872187Ssthen } 231245872187Ssthen 231345872187Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) { 231445872187Ssthen /* read length bytes */ 231545872187Ssthen r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count), 231645872187Ssthen sizeof(uint16_t)-c->tcp_byte_count, MSG_DONTWAIT); 231745872187Ssthen if(r == 0) { 231845872187Ssthen if(c->tcp_req_info) 231945872187Ssthen return tcp_req_info_handle_read_close(c->tcp_req_info); 232045872187Ssthen return 0; 232145872187Ssthen } else if(r == -1) { 232245872187Ssthen if(c->pp2_enabled) goto recv_error; 232345872187Ssthen goto recv_error_initial; 232445872187Ssthen } 232545872187Ssthen c->tcp_byte_count += r; 232645872187Ssthen if(c->tcp_byte_count != sizeof(uint16_t)) 232745872187Ssthen return 1; 232845872187Ssthen if(sldns_buffer_read_u16_at(c->buffer, 0) > 232945872187Ssthen sldns_buffer_capacity(c->buffer)) { 233045872187Ssthen verbose(VERB_QUERY, "tcp: dropped larger than buffer"); 233145872187Ssthen return 0; 233245872187Ssthen } 233345872187Ssthen sldns_buffer_set_limit(c->buffer, 233445872187Ssthen sldns_buffer_read_u16_at(c->buffer, 0)); 233545872187Ssthen if(!short_ok && 233645872187Ssthen sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) { 233745872187Ssthen verbose(VERB_QUERY, "tcp: dropped bogus too short."); 233845872187Ssthen return 0; 233945872187Ssthen } 234045872187Ssthen verbose(VERB_ALGO, "Reading tcp query of length %d", 234145872187Ssthen (int)sldns_buffer_limit(c->buffer)); 234245872187Ssthen } 234345872187Ssthen 234445872187Ssthen if(sldns_buffer_remaining(c->buffer) == 0) 234545872187Ssthen log_err("in comm_point_tcp_handle_read buffer_remaining is " 234645872187Ssthen "not > 0 as expected, continuing with (harmless) 0 " 234745872187Ssthen "length recv"); 234845872187Ssthen r = recv(fd, (void*)sldns_buffer_current(c->buffer), 234945872187Ssthen sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 235045872187Ssthen if(r == 0) { 235145872187Ssthen if(c->tcp_req_info) 235245872187Ssthen return tcp_req_info_handle_read_close(c->tcp_req_info); 235345872187Ssthen return 0; 235445872187Ssthen } else if(r == -1) { 235545872187Ssthen goto recv_error; 235645872187Ssthen } 235745872187Ssthen sldns_buffer_skip(c->buffer, r); 235845872187Ssthen if(sldns_buffer_remaining(c->buffer) <= 0) { 235945872187Ssthen tcp_callback_reader(c); 236045872187Ssthen } 236145872187Ssthen return 1; 236245872187Ssthen 236345872187Ssthen recv_error_initial: 236445872187Ssthen recv_initial = 1; 236545872187Ssthen recv_error: 2366933707f3Ssthen #ifndef USE_WINSOCK 2367933707f3Ssthen if(errno == EINTR || errno == EAGAIN) 2368933707f3Ssthen return 1; 2369933707f3Ssthen #ifdef ECONNRESET 2370933707f3Ssthen if(errno == ECONNRESET && verbosity < 2) 2371933707f3Ssthen return 0; /* silence reset by peer */ 2372933707f3Ssthen #endif 2373*98bc733bSsthen if(recv_initial) { 2374191f22c6Ssthen #ifdef ECONNREFUSED 2375191f22c6Ssthen if(errno == ECONNREFUSED && verbosity < 2) 2376191f22c6Ssthen return 0; /* silence reset by peer */ 2377191f22c6Ssthen #endif 23789982a05dSsthen #ifdef ENETUNREACH 23799982a05dSsthen if(errno == ENETUNREACH && verbosity < 2) 23809982a05dSsthen return 0; /* silence it */ 23819982a05dSsthen #endif 23829982a05dSsthen #ifdef EHOSTDOWN 23839982a05dSsthen if(errno == EHOSTDOWN && verbosity < 2) 23849982a05dSsthen return 0; /* silence it */ 23859982a05dSsthen #endif 23869982a05dSsthen #ifdef EHOSTUNREACH 23879982a05dSsthen if(errno == EHOSTUNREACH && verbosity < 2) 23889982a05dSsthen return 0; /* silence it */ 23899982a05dSsthen #endif 23909982a05dSsthen #ifdef ENETDOWN 23919982a05dSsthen if(errno == ENETDOWN && verbosity < 2) 23929982a05dSsthen return 0; /* silence it */ 23939982a05dSsthen #endif 23949982a05dSsthen #ifdef EACCES 23959982a05dSsthen if(errno == EACCES && verbosity < 2) 23969982a05dSsthen return 0; /* silence it */ 23979982a05dSsthen #endif 23989982a05dSsthen #ifdef ENOTCONN 23999982a05dSsthen if(errno == ENOTCONN) { 2400*98bc733bSsthen log_err_addr("read (in tcp initial) failed and this " 240145872187Ssthen "could be because TCP Fast Open is " 240245872187Ssthen "enabled [--disable-tfo-client " 240345872187Ssthen "--disable-tfo-server] but does not " 240445872187Ssthen "work", sock_strerror(errno), 240545872187Ssthen &c->repinfo.remote_addr, 240645872187Ssthen c->repinfo.remote_addrlen); 24079982a05dSsthen return 0; 24089982a05dSsthen } 24099982a05dSsthen #endif 241045872187Ssthen } 2411933707f3Ssthen #else /* USE_WINSOCK */ 241245872187Ssthen if(recv_initial) { 2413191f22c6Ssthen if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2) 2414191f22c6Ssthen return 0; 2415191f22c6Ssthen if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2) 2416191f22c6Ssthen return 0; 2417191f22c6Ssthen if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2) 2418191f22c6Ssthen return 0; 2419191f22c6Ssthen if(WSAGetLastError() == WSAENETDOWN && verbosity < 2) 2420191f22c6Ssthen return 0; 2421191f22c6Ssthen if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2) 2422191f22c6Ssthen return 0; 242345872187Ssthen } 2424933707f3Ssthen if(WSAGetLastError() == WSAECONNRESET) 2425933707f3Ssthen return 0; 2426933707f3Ssthen if(WSAGetLastError() == WSAEINPROGRESS) 2427933707f3Ssthen return 1; 2428933707f3Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 24292ee382b6Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, 24302ee382b6Ssthen UB_EV_READ); 2431933707f3Ssthen return 1; 2432933707f3Ssthen } 2433933707f3Ssthen #endif 2434*98bc733bSsthen log_err_addr((recv_initial?"read (in tcp initial)":"read (in tcp)"), 2435*98bc733bSsthen sock_strerror(errno), &c->repinfo.remote_addr, 2436*98bc733bSsthen c->repinfo.remote_addrlen); 2437933707f3Ssthen return 0; 2438933707f3Ssthen } 2439933707f3Ssthen 2440933707f3Ssthen /** 2441933707f3Ssthen * Handle tcp writing callback. 2442933707f3Ssthen * @param fd: file descriptor of socket. 2443933707f3Ssthen * @param c: comm point to write buffer out of. 2444933707f3Ssthen * @return: 0 on error 2445933707f3Ssthen */ 2446933707f3Ssthen static int 2447933707f3Ssthen comm_point_tcp_handle_write(int fd, struct comm_point* c) 2448933707f3Ssthen { 2449933707f3Ssthen ssize_t r; 24502be9e038Ssthen struct sldns_buffer *buffer; 2451933707f3Ssthen log_assert(c->type == comm_tcp); 24522be9e038Ssthen #ifdef USE_DNSCRYPT 24532be9e038Ssthen buffer = c->dnscrypt_buffer; 24542be9e038Ssthen #else 24552be9e038Ssthen buffer = c->buffer; 24562be9e038Ssthen #endif 2457eba819a2Ssthen if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read) 2458933707f3Ssthen return 0; 2459933707f3Ssthen log_assert(fd != -1); 2460eba819a2Ssthen if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) { 2461933707f3Ssthen /* check for pending error from nonblocking connect */ 2462933707f3Ssthen /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 2463933707f3Ssthen int error = 0; 2464933707f3Ssthen socklen_t len = (socklen_t)sizeof(error); 2465933707f3Ssthen if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 2466933707f3Ssthen &len) < 0){ 2467933707f3Ssthen #ifndef USE_WINSOCK 2468933707f3Ssthen error = errno; /* on solaris errno is error */ 2469933707f3Ssthen #else /* USE_WINSOCK */ 2470933707f3Ssthen error = WSAGetLastError(); 2471933707f3Ssthen #endif 2472933707f3Ssthen } 2473933707f3Ssthen #ifndef USE_WINSOCK 2474933707f3Ssthen #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 2475933707f3Ssthen if(error == EINPROGRESS || error == EWOULDBLOCK) 2476933707f3Ssthen return 1; /* try again later */ 2477163a4143Ssthen else 2478933707f3Ssthen #endif 2479163a4143Ssthen if(error != 0 && verbosity < 2) 2480933707f3Ssthen return 0; /* silence lots of chatter in the logs */ 2481933707f3Ssthen else if(error != 0) { 2482e10d3884Sbrad log_err_addr("tcp connect", strerror(error), 248345872187Ssthen &c->repinfo.remote_addr, 248445872187Ssthen c->repinfo.remote_addrlen); 2485933707f3Ssthen #else /* USE_WINSOCK */ 2486933707f3Ssthen /* examine error */ 2487933707f3Ssthen if(error == WSAEINPROGRESS) 2488933707f3Ssthen return 1; 2489933707f3Ssthen else if(error == WSAEWOULDBLOCK) { 24902ee382b6Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2491933707f3Ssthen return 1; 2492933707f3Ssthen } else if(error != 0 && verbosity < 2) 2493933707f3Ssthen return 0; 2494933707f3Ssthen else if(error != 0) { 2495e10d3884Sbrad log_err_addr("tcp connect", wsa_strerror(error), 249645872187Ssthen &c->repinfo.remote_addr, 249745872187Ssthen c->repinfo.remote_addrlen); 2498933707f3Ssthen #endif /* USE_WINSOCK */ 2499933707f3Ssthen return 0; 2500933707f3Ssthen } 2501933707f3Ssthen } 2502933707f3Ssthen if(c->ssl) 2503eba819a2Ssthen return ssl_handle_it(c, 1); 2504933707f3Ssthen 250577079be7Ssthen #ifdef USE_MSG_FASTOPEN 250677079be7Ssthen /* Only try this on first use of a connection that uses tfo, 250777079be7Ssthen otherwise fall through to normal write */ 250877079be7Ssthen /* Also, TFO support on WINDOWS not implemented at the moment */ 250977079be7Ssthen if(c->tcp_do_fastopen == 1) { 251077079be7Ssthen /* this form of sendmsg() does both a connect() and send() so need to 251177079be7Ssthen look for various flavours of error*/ 2512eba819a2Ssthen uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 251377079be7Ssthen struct msghdr msg; 251477079be7Ssthen struct iovec iov[2]; 251577079be7Ssthen c->tcp_do_fastopen = 0; 251677079be7Ssthen memset(&msg, 0, sizeof(msg)); 2517eba819a2Ssthen if(c->tcp_write_and_read) { 2518eba819a2Ssthen iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2519eba819a2Ssthen iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2520eba819a2Ssthen iov[1].iov_base = c->tcp_write_pkt; 2521eba819a2Ssthen iov[1].iov_len = c->tcp_write_pkt_len; 2522eba819a2Ssthen } else { 252377079be7Ssthen iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 252477079be7Ssthen iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 25252be9e038Ssthen iov[1].iov_base = sldns_buffer_begin(buffer); 25262be9e038Ssthen iov[1].iov_len = sldns_buffer_limit(buffer); 2527eba819a2Ssthen } 252877079be7Ssthen log_assert(iov[0].iov_len > 0); 252945872187Ssthen msg.msg_name = &c->repinfo.remote_addr; 253045872187Ssthen msg.msg_namelen = c->repinfo.remote_addrlen; 253177079be7Ssthen msg.msg_iov = iov; 253277079be7Ssthen msg.msg_iovlen = 2; 253377079be7Ssthen r = sendmsg(fd, &msg, MSG_FASTOPEN); 253477079be7Ssthen if (r == -1) { 253577079be7Ssthen #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 253677079be7Ssthen /* Handshake is underway, maybe because no TFO cookie available. 2537bdfc4d55Sflorian Come back to write the message*/ 253877079be7Ssthen if(errno == EINPROGRESS || errno == EWOULDBLOCK) 253977079be7Ssthen return 1; 254077079be7Ssthen #endif 254177079be7Ssthen if(errno == EINTR || errno == EAGAIN) 254277079be7Ssthen return 1; 254377079be7Ssthen /* Not handling EISCONN here as shouldn't ever hit that case.*/ 2544e21c60efSsthen if(errno != EPIPE 2545e21c60efSsthen #ifdef EOPNOTSUPP 2546e21c60efSsthen /* if /proc/sys/net/ipv4/tcp_fastopen is 2547e21c60efSsthen * disabled on Linux, sendmsg may return 2548e21c60efSsthen * 'Operation not supported', if so 2549e21c60efSsthen * fallthrough to ordinary connect. */ 2550e21c60efSsthen && errno != EOPNOTSUPP 2551e21c60efSsthen #endif 2552e21c60efSsthen && errno != 0) { 2553e21c60efSsthen if(verbosity < 2) 255477079be7Ssthen return 0; /* silence lots of chatter in the logs */ 255577079be7Ssthen log_err_addr("tcp sendmsg", strerror(errno), 255645872187Ssthen &c->repinfo.remote_addr, 255745872187Ssthen c->repinfo.remote_addrlen); 255877079be7Ssthen return 0; 25592be9e038Ssthen } 2560e21c60efSsthen verbose(VERB_ALGO, "tcp sendmsg for fastopen failed (with %s), try normal connect", strerror(errno)); 25612be9e038Ssthen /* fallthrough to nonFASTOPEN 25622be9e038Ssthen * (MSG_FASTOPEN on Linux 3 produces EPIPE) 25632be9e038Ssthen * we need to perform connect() */ 256445872187Ssthen if(connect(fd, (struct sockaddr *)&c->repinfo.remote_addr, 256545872187Ssthen c->repinfo.remote_addrlen) == -1) { 25662be9e038Ssthen #ifdef EINPROGRESS 25672be9e038Ssthen if(errno == EINPROGRESS) 25682be9e038Ssthen return 1; /* wait until connect done*/ 25692be9e038Ssthen #endif 25702be9e038Ssthen #ifdef USE_WINSOCK 25712be9e038Ssthen if(WSAGetLastError() == WSAEINPROGRESS || 25722be9e038Ssthen WSAGetLastError() == WSAEWOULDBLOCK) 25732be9e038Ssthen return 1; /* wait until connect done*/ 25742be9e038Ssthen #endif 25752be9e038Ssthen if(tcp_connect_errno_needs_log( 257645872187Ssthen (struct sockaddr *)&c->repinfo.remote_addr, 257745872187Ssthen c->repinfo.remote_addrlen)) { 25782be9e038Ssthen log_err_addr("outgoing tcp: connect after EPIPE for fastopen", 257945872187Ssthen strerror(errno), 258045872187Ssthen &c->repinfo.remote_addr, 258145872187Ssthen c->repinfo.remote_addrlen); 25822be9e038Ssthen } 25832be9e038Ssthen return 0; 25842be9e038Ssthen } 25852be9e038Ssthen 258677079be7Ssthen } else { 2587eba819a2Ssthen if(c->tcp_write_and_read) { 2588eba819a2Ssthen c->tcp_write_byte_count += r; 2589eba819a2Ssthen if(c->tcp_write_byte_count < sizeof(uint16_t)) 2590eba819a2Ssthen return 1; 2591eba819a2Ssthen } else { 259277079be7Ssthen c->tcp_byte_count += r; 259377079be7Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) 259477079be7Ssthen return 1; 25952be9e038Ssthen sldns_buffer_set_position(buffer, c->tcp_byte_count - 259677079be7Ssthen sizeof(uint16_t)); 2597eba819a2Ssthen } 2598eba819a2Ssthen if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 259977079be7Ssthen tcp_callback_writer(c); 260077079be7Ssthen return 1; 260177079be7Ssthen } 260277079be7Ssthen } 260377079be7Ssthen } 260477079be7Ssthen #endif /* USE_MSG_FASTOPEN */ 260577079be7Ssthen 2606eba819a2Ssthen if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) { 2607eba819a2Ssthen uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer)); 2608933707f3Ssthen #ifdef HAVE_WRITEV 2609933707f3Ssthen struct iovec iov[2]; 2610eba819a2Ssthen if(c->tcp_write_and_read) { 2611eba819a2Ssthen iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count; 2612eba819a2Ssthen iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count; 2613eba819a2Ssthen iov[1].iov_base = c->tcp_write_pkt; 2614eba819a2Ssthen iov[1].iov_len = c->tcp_write_pkt_len; 2615eba819a2Ssthen } else { 2616933707f3Ssthen iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count; 2617933707f3Ssthen iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count; 26182be9e038Ssthen iov[1].iov_base = sldns_buffer_begin(buffer); 26192be9e038Ssthen iov[1].iov_len = sldns_buffer_limit(buffer); 2620eba819a2Ssthen } 2621933707f3Ssthen log_assert(iov[0].iov_len > 0); 2622933707f3Ssthen r = writev(fd, iov, 2); 2623933707f3Ssthen #else /* HAVE_WRITEV */ 2624eba819a2Ssthen if(c->tcp_write_and_read) { 2625eba819a2Ssthen r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count), 2626eba819a2Ssthen sizeof(uint16_t)-c->tcp_write_byte_count, 0); 2627eba819a2Ssthen } else { 2628933707f3Ssthen r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count), 2629933707f3Ssthen sizeof(uint16_t)-c->tcp_byte_count, 0); 2630eba819a2Ssthen } 2631933707f3Ssthen #endif /* HAVE_WRITEV */ 2632933707f3Ssthen if(r == -1) { 2633933707f3Ssthen #ifndef USE_WINSOCK 2634933707f3Ssthen # ifdef EPIPE 2635933707f3Ssthen if(errno == EPIPE && verbosity < 2) 2636933707f3Ssthen return 0; /* silence 'broken pipe' */ 2637933707f3Ssthen #endif 2638933707f3Ssthen if(errno == EINTR || errno == EAGAIN) 2639933707f3Ssthen return 1; 2640550cf4a9Ssthen #ifdef ECONNRESET 2641550cf4a9Ssthen if(errno == ECONNRESET && verbosity < 2) 2642550cf4a9Ssthen return 0; /* silence reset by peer */ 2643550cf4a9Ssthen #endif 2644e10d3884Sbrad # ifdef HAVE_WRITEV 2645e10d3884Sbrad log_err_addr("tcp writev", strerror(errno), 264645872187Ssthen &c->repinfo.remote_addr, 264745872187Ssthen c->repinfo.remote_addrlen); 2648e10d3884Sbrad # else /* HAVE_WRITEV */ 2649e10d3884Sbrad log_err_addr("tcp send s", strerror(errno), 265045872187Ssthen &c->repinfo.remote_addr, 265145872187Ssthen c->repinfo.remote_addrlen); 2652e10d3884Sbrad # endif /* HAVE_WRITEV */ 2653933707f3Ssthen #else 2654933707f3Ssthen if(WSAGetLastError() == WSAENOTCONN) 2655933707f3Ssthen return 1; 2656933707f3Ssthen if(WSAGetLastError() == WSAEINPROGRESS) 2657933707f3Ssthen return 1; 2658933707f3Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 26592ee382b6Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, 26602ee382b6Ssthen UB_EV_WRITE); 2661933707f3Ssthen return 1; 2662933707f3Ssthen } 2663550cf4a9Ssthen if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2664550cf4a9Ssthen return 0; /* silence reset by peer */ 2665e10d3884Sbrad log_err_addr("tcp send s", 2666e10d3884Sbrad wsa_strerror(WSAGetLastError()), 266745872187Ssthen &c->repinfo.remote_addr, 266845872187Ssthen c->repinfo.remote_addrlen); 2669933707f3Ssthen #endif 2670933707f3Ssthen return 0; 2671933707f3Ssthen } 2672eba819a2Ssthen if(c->tcp_write_and_read) { 2673eba819a2Ssthen c->tcp_write_byte_count += r; 2674eba819a2Ssthen if(c->tcp_write_byte_count < sizeof(uint16_t)) 2675eba819a2Ssthen return 1; 2676eba819a2Ssthen } else { 2677933707f3Ssthen c->tcp_byte_count += r; 2678933707f3Ssthen if(c->tcp_byte_count < sizeof(uint16_t)) 2679933707f3Ssthen return 1; 26802be9e038Ssthen sldns_buffer_set_position(buffer, c->tcp_byte_count - 2681933707f3Ssthen sizeof(uint16_t)); 2682eba819a2Ssthen } 2683eba819a2Ssthen if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2684933707f3Ssthen tcp_callback_writer(c); 2685933707f3Ssthen return 1; 2686933707f3Ssthen } 2687933707f3Ssthen } 2688eba819a2Ssthen log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0); 2689eba819a2Ssthen log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2); 2690eba819a2Ssthen if(c->tcp_write_and_read) { 26919982a05dSsthen r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2), 2692eba819a2Ssthen c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0); 2693eba819a2Ssthen } else { 26942be9e038Ssthen r = send(fd, (void*)sldns_buffer_current(buffer), 26952be9e038Ssthen sldns_buffer_remaining(buffer), 0); 2696eba819a2Ssthen } 2697933707f3Ssthen if(r == -1) { 2698933707f3Ssthen #ifndef USE_WINSOCK 2699933707f3Ssthen if(errno == EINTR || errno == EAGAIN) 2700933707f3Ssthen return 1; 2701550cf4a9Ssthen #ifdef ECONNRESET 2702550cf4a9Ssthen if(errno == ECONNRESET && verbosity < 2) 2703550cf4a9Ssthen return 0; /* silence reset by peer */ 2704550cf4a9Ssthen #endif 2705933707f3Ssthen #else 2706933707f3Ssthen if(WSAGetLastError() == WSAEINPROGRESS) 2707933707f3Ssthen return 1; 2708933707f3Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 27092ee382b6Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 2710933707f3Ssthen return 1; 2711933707f3Ssthen } 2712550cf4a9Ssthen if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 2713550cf4a9Ssthen return 0; /* silence reset by peer */ 2714933707f3Ssthen #endif 27152c144df0Ssthen log_err_addr("tcp send r", sock_strerror(errno), 271645872187Ssthen &c->repinfo.remote_addr, 271745872187Ssthen c->repinfo.remote_addrlen); 2718933707f3Ssthen return 0; 2719933707f3Ssthen } 2720eba819a2Ssthen if(c->tcp_write_and_read) { 2721eba819a2Ssthen c->tcp_write_byte_count += r; 2722eba819a2Ssthen } else { 27232be9e038Ssthen sldns_buffer_skip(buffer, r); 2724eba819a2Ssthen } 2725933707f3Ssthen 2726eba819a2Ssthen if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) { 2727933707f3Ssthen tcp_callback_writer(c); 2728933707f3Ssthen } 2729933707f3Ssthen 2730933707f3Ssthen return 1; 2731933707f3Ssthen } 2732933707f3Ssthen 27338b7325afSsthen /** read again to drain buffers when there could be more to read, returns 0 27348b7325afSsthen * on failure which means the comm point is closed. */ 27358b7325afSsthen static int 2736f6b99bafSsthen tcp_req_info_read_again(int fd, struct comm_point* c) 2737f6b99bafSsthen { 2738f6b99bafSsthen while(c->tcp_req_info->read_again) { 2739f6b99bafSsthen int r; 2740f6b99bafSsthen c->tcp_req_info->read_again = 0; 2741f6b99bafSsthen if(c->tcp_is_reading) 2742f6b99bafSsthen r = comm_point_tcp_handle_read(fd, c, 0); 2743f6b99bafSsthen else r = comm_point_tcp_handle_write(fd, c); 2744f6b99bafSsthen if(!r) { 2745f6b99bafSsthen reclaim_tcp_handler(c); 2746f6b99bafSsthen if(!c->tcp_do_close) { 2747f6b99bafSsthen fptr_ok(fptr_whitelist_comm_point( 2748f6b99bafSsthen c->callback)); 2749f6b99bafSsthen (void)(*c->callback)(c, c->cb_arg, 2750f6b99bafSsthen NETEVENT_CLOSED, NULL); 2751f6b99bafSsthen } 27528b7325afSsthen return 0; 2753f6b99bafSsthen } 2754f6b99bafSsthen } 27558b7325afSsthen return 1; 2756f6b99bafSsthen } 2757f6b99bafSsthen 2758eba819a2Ssthen /** read again to drain buffers when there could be more to read */ 2759eba819a2Ssthen static void 2760eba819a2Ssthen tcp_more_read_again(int fd, struct comm_point* c) 2761eba819a2Ssthen { 2762eba819a2Ssthen /* if the packet is done, but another one could be waiting on 2763eba819a2Ssthen * the connection, the callback signals this, and we try again */ 2764eba819a2Ssthen /* this continues until the read routines get EAGAIN or so, 2765eba819a2Ssthen * and thus does not call the callback, and the bool is 0 */ 2766eba819a2Ssthen int* moreread = c->tcp_more_read_again; 2767eba819a2Ssthen while(moreread && *moreread) { 2768eba819a2Ssthen *moreread = 0; 2769eba819a2Ssthen if(!comm_point_tcp_handle_read(fd, c, 0)) { 2770eba819a2Ssthen reclaim_tcp_handler(c); 2771eba819a2Ssthen if(!c->tcp_do_close) { 2772eba819a2Ssthen fptr_ok(fptr_whitelist_comm_point( 2773eba819a2Ssthen c->callback)); 2774eba819a2Ssthen (void)(*c->callback)(c, c->cb_arg, 2775eba819a2Ssthen NETEVENT_CLOSED, NULL); 2776eba819a2Ssthen } 2777eba819a2Ssthen return; 2778eba819a2Ssthen } 2779eba819a2Ssthen } 2780eba819a2Ssthen } 2781eba819a2Ssthen 2782eba819a2Ssthen /** write again to fill up when there could be more to write */ 2783eba819a2Ssthen static void 2784eba819a2Ssthen tcp_more_write_again(int fd, struct comm_point* c) 2785eba819a2Ssthen { 2786eba819a2Ssthen /* if the packet is done, but another is waiting to be written, 2787eba819a2Ssthen * the callback signals it and we try again. */ 2788eba819a2Ssthen /* this continues until the write routines get EAGAIN or so, 2789eba819a2Ssthen * and thus does not call the callback, and the bool is 0 */ 2790eba819a2Ssthen int* morewrite = c->tcp_more_write_again; 2791eba819a2Ssthen while(morewrite && *morewrite) { 2792eba819a2Ssthen *morewrite = 0; 2793eba819a2Ssthen if(!comm_point_tcp_handle_write(fd, c)) { 2794eba819a2Ssthen reclaim_tcp_handler(c); 2795eba819a2Ssthen if(!c->tcp_do_close) { 2796eba819a2Ssthen fptr_ok(fptr_whitelist_comm_point( 2797eba819a2Ssthen c->callback)); 2798eba819a2Ssthen (void)(*c->callback)(c, c->cb_arg, 2799eba819a2Ssthen NETEVENT_CLOSED, NULL); 2800eba819a2Ssthen } 2801eba819a2Ssthen return; 2802eba819a2Ssthen } 2803eba819a2Ssthen } 2804eba819a2Ssthen } 2805eba819a2Ssthen 2806933707f3Ssthen void 2807933707f3Ssthen comm_point_tcp_handle_callback(int fd, short event, void* arg) 2808933707f3Ssthen { 2809933707f3Ssthen struct comm_point* c = (struct comm_point*)arg; 2810933707f3Ssthen log_assert(c->type == comm_tcp); 28112ee382b6Ssthen ub_comm_base_now(c->ev->base); 2812933707f3Ssthen 28138b7325afSsthen if(c->fd == -1 || c->fd != fd) 28148b7325afSsthen return; /* duplicate event, but commpoint closed. */ 28158b7325afSsthen 28162be9e038Ssthen #ifdef USE_DNSCRYPT 28172be9e038Ssthen /* Initialize if this is a dnscrypt socket */ 28182be9e038Ssthen if(c->tcp_parent) { 28192be9e038Ssthen c->dnscrypt = c->tcp_parent->dnscrypt; 28202be9e038Ssthen } 28212be9e038Ssthen if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) { 28222be9e038Ssthen c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer)); 28232be9e038Ssthen if(!c->dnscrypt_buffer) { 28242be9e038Ssthen log_err("Could not allocate dnscrypt buffer"); 28257191de28Ssthen reclaim_tcp_handler(c); 28267191de28Ssthen if(!c->tcp_do_close) { 28277191de28Ssthen fptr_ok(fptr_whitelist_comm_point( 28287191de28Ssthen c->callback)); 28297191de28Ssthen (void)(*c->callback)(c, c->cb_arg, 28307191de28Ssthen NETEVENT_CLOSED, NULL); 28317191de28Ssthen } 28322be9e038Ssthen return; 28332be9e038Ssthen } 28342be9e038Ssthen } 28352be9e038Ssthen #endif 28362be9e038Ssthen 2837550cf4a9Ssthen if(event&UB_EV_TIMEOUT) { 2838550cf4a9Ssthen verbose(VERB_QUERY, "tcp took too long, dropped"); 2839550cf4a9Ssthen reclaim_tcp_handler(c); 2840550cf4a9Ssthen if(!c->tcp_do_close) { 2841550cf4a9Ssthen fptr_ok(fptr_whitelist_comm_point(c->callback)); 2842550cf4a9Ssthen (void)(*c->callback)(c, c->cb_arg, 2843550cf4a9Ssthen NETEVENT_TIMEOUT, NULL); 2844550cf4a9Ssthen } 2845550cf4a9Ssthen return; 2846550cf4a9Ssthen } 2847eba819a2Ssthen if(event&UB_EV_READ 2848eba819a2Ssthen #ifdef USE_MSG_FASTOPEN 2849eba819a2Ssthen && !(c->tcp_do_fastopen && (event&UB_EV_WRITE)) 2850eba819a2Ssthen #endif 2851eba819a2Ssthen ) { 2852f6b99bafSsthen int has_tcpq = (c->tcp_req_info != NULL); 2853eba819a2Ssthen int* moreread = c->tcp_more_read_again; 2854933707f3Ssthen if(!comm_point_tcp_handle_read(fd, c, 0)) { 2855933707f3Ssthen reclaim_tcp_handler(c); 2856933707f3Ssthen if(!c->tcp_do_close) { 2857933707f3Ssthen fptr_ok(fptr_whitelist_comm_point( 2858933707f3Ssthen c->callback)); 2859933707f3Ssthen (void)(*c->callback)(c, c->cb_arg, 2860933707f3Ssthen NETEVENT_CLOSED, NULL); 2861933707f3Ssthen } 2862eba819a2Ssthen return; 2863933707f3Ssthen } 28648b7325afSsthen if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 28658b7325afSsthen if(!tcp_req_info_read_again(fd, c)) 28668b7325afSsthen return; 28678b7325afSsthen } 2868eba819a2Ssthen if(moreread && *moreread) 2869eba819a2Ssthen tcp_more_read_again(fd, c); 2870933707f3Ssthen return; 2871933707f3Ssthen } 28722ee382b6Ssthen if(event&UB_EV_WRITE) { 2873f6b99bafSsthen int has_tcpq = (c->tcp_req_info != NULL); 2874eba819a2Ssthen int* morewrite = c->tcp_more_write_again; 2875933707f3Ssthen if(!comm_point_tcp_handle_write(fd, c)) { 2876933707f3Ssthen reclaim_tcp_handler(c); 2877933707f3Ssthen if(!c->tcp_do_close) { 2878933707f3Ssthen fptr_ok(fptr_whitelist_comm_point( 2879933707f3Ssthen c->callback)); 2880933707f3Ssthen (void)(*c->callback)(c, c->cb_arg, 2881933707f3Ssthen NETEVENT_CLOSED, NULL); 2882933707f3Ssthen } 2883eba819a2Ssthen return; 2884933707f3Ssthen } 28858b7325afSsthen if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again) { 28868b7325afSsthen if(!tcp_req_info_read_again(fd, c)) 28878b7325afSsthen return; 28888b7325afSsthen } 2889eba819a2Ssthen if(morewrite && *morewrite) 2890eba819a2Ssthen tcp_more_write_again(fd, c); 2891933707f3Ssthen return; 2892933707f3Ssthen } 2893933707f3Ssthen log_err("Ignored event %d for tcphdl.", event); 2894933707f3Ssthen } 2895933707f3Ssthen 2896938a3a5eSflorian /** Make http handler free for next assignment */ 2897938a3a5eSflorian static void 2898938a3a5eSflorian reclaim_http_handler(struct comm_point* c) 2899938a3a5eSflorian { 2900938a3a5eSflorian log_assert(c->type == comm_http); 2901938a3a5eSflorian if(c->ssl) { 2902938a3a5eSflorian #ifdef HAVE_SSL 2903938a3a5eSflorian SSL_shutdown(c->ssl); 2904938a3a5eSflorian SSL_free(c->ssl); 2905938a3a5eSflorian c->ssl = NULL; 2906938a3a5eSflorian #endif 2907938a3a5eSflorian } 2908938a3a5eSflorian comm_point_close(c); 2909938a3a5eSflorian if(c->tcp_parent) { 2910e21c60efSsthen if(c != c->tcp_parent->tcp_free) { 2911938a3a5eSflorian c->tcp_parent->cur_tcp_count--; 2912938a3a5eSflorian c->tcp_free = c->tcp_parent->tcp_free; 2913938a3a5eSflorian c->tcp_parent->tcp_free = c; 2914e21c60efSsthen } 2915938a3a5eSflorian if(!c->tcp_free) { 2916938a3a5eSflorian /* re-enable listening on accept socket */ 2917938a3a5eSflorian comm_point_start_listening(c->tcp_parent, -1, -1); 2918938a3a5eSflorian } 2919938a3a5eSflorian } 2920938a3a5eSflorian } 2921938a3a5eSflorian 2922938a3a5eSflorian /** read more data for http (with ssl) */ 2923938a3a5eSflorian static int 2924938a3a5eSflorian ssl_http_read_more(struct comm_point* c) 2925938a3a5eSflorian { 2926938a3a5eSflorian #ifdef HAVE_SSL 2927938a3a5eSflorian int r; 2928938a3a5eSflorian log_assert(sldns_buffer_remaining(c->buffer) > 0); 2929938a3a5eSflorian ERR_clear_error(); 2930938a3a5eSflorian r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer), 2931938a3a5eSflorian (int)sldns_buffer_remaining(c->buffer)); 2932938a3a5eSflorian if(r <= 0) { 2933938a3a5eSflorian int want = SSL_get_error(c->ssl, r); 2934938a3a5eSflorian if(want == SSL_ERROR_ZERO_RETURN) { 2935938a3a5eSflorian return 0; /* shutdown, closed */ 2936938a3a5eSflorian } else if(want == SSL_ERROR_WANT_READ) { 2937938a3a5eSflorian return 1; /* read more later */ 2938938a3a5eSflorian } else if(want == SSL_ERROR_WANT_WRITE) { 2939938a3a5eSflorian c->ssl_shake_state = comm_ssl_shake_hs_write; 2940938a3a5eSflorian comm_point_listen_for_rw(c, 0, 1); 2941938a3a5eSflorian return 1; 2942938a3a5eSflorian } else if(want == SSL_ERROR_SYSCALL) { 2943550cf4a9Ssthen #ifdef ECONNRESET 2944550cf4a9Ssthen if(errno == ECONNRESET && verbosity < 2) 2945550cf4a9Ssthen return 0; /* silence reset by peer */ 2946550cf4a9Ssthen #endif 2947938a3a5eSflorian if(errno != 0) 2948938a3a5eSflorian log_err("SSL_read syscall: %s", 2949938a3a5eSflorian strerror(errno)); 2950938a3a5eSflorian return 0; 2951938a3a5eSflorian } 2952d896b962Ssthen log_crypto_err_io("could not SSL_read", want); 2953938a3a5eSflorian return 0; 2954938a3a5eSflorian } 2955191f22c6Ssthen verbose(VERB_ALGO, "ssl http read more skip to %d + %d", 2956191f22c6Ssthen (int)sldns_buffer_position(c->buffer), (int)r); 2957938a3a5eSflorian sldns_buffer_skip(c->buffer, (ssize_t)r); 2958938a3a5eSflorian return 1; 2959938a3a5eSflorian #else 2960938a3a5eSflorian (void)c; 2961938a3a5eSflorian return 0; 2962938a3a5eSflorian #endif /* HAVE_SSL */ 2963938a3a5eSflorian } 2964938a3a5eSflorian 2965938a3a5eSflorian /** read more data for http */ 2966938a3a5eSflorian static int 2967938a3a5eSflorian http_read_more(int fd, struct comm_point* c) 2968938a3a5eSflorian { 2969938a3a5eSflorian ssize_t r; 2970938a3a5eSflorian log_assert(sldns_buffer_remaining(c->buffer) > 0); 2971938a3a5eSflorian r = recv(fd, (void*)sldns_buffer_current(c->buffer), 297245872187Ssthen sldns_buffer_remaining(c->buffer), MSG_DONTWAIT); 2973938a3a5eSflorian if(r == 0) { 2974938a3a5eSflorian return 0; 2975938a3a5eSflorian } else if(r == -1) { 2976938a3a5eSflorian #ifndef USE_WINSOCK 2977938a3a5eSflorian if(errno == EINTR || errno == EAGAIN) 2978938a3a5eSflorian return 1; 2979938a3a5eSflorian #else /* USE_WINSOCK */ 2980938a3a5eSflorian if(WSAGetLastError() == WSAECONNRESET) 2981938a3a5eSflorian return 0; 2982938a3a5eSflorian if(WSAGetLastError() == WSAEINPROGRESS) 2983938a3a5eSflorian return 1; 2984938a3a5eSflorian if(WSAGetLastError() == WSAEWOULDBLOCK) { 2985938a3a5eSflorian ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 2986938a3a5eSflorian return 1; 2987938a3a5eSflorian } 2988938a3a5eSflorian #endif 29892c144df0Ssthen log_err_addr("read (in http r)", sock_strerror(errno), 299045872187Ssthen &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 2991938a3a5eSflorian return 0; 2992938a3a5eSflorian } 2993191f22c6Ssthen verbose(VERB_ALGO, "http read more skip to %d + %d", 2994191f22c6Ssthen (int)sldns_buffer_position(c->buffer), (int)r); 2995938a3a5eSflorian sldns_buffer_skip(c->buffer, r); 2996938a3a5eSflorian return 1; 2997938a3a5eSflorian } 2998938a3a5eSflorian 2999938a3a5eSflorian /** return true if http header has been read (one line complete) */ 3000938a3a5eSflorian static int 3001938a3a5eSflorian http_header_done(sldns_buffer* buf) 3002938a3a5eSflorian { 3003938a3a5eSflorian size_t i; 3004938a3a5eSflorian for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 3005938a3a5eSflorian /* there was a \r before the \n, but we ignore that */ 3006938a3a5eSflorian if((char)sldns_buffer_read_u8_at(buf, i) == '\n') 3007938a3a5eSflorian return 1; 3008938a3a5eSflorian } 3009938a3a5eSflorian return 0; 3010938a3a5eSflorian } 3011938a3a5eSflorian 3012938a3a5eSflorian /** return character string into buffer for header line, moves buffer 3013938a3a5eSflorian * past that line and puts zero terminator into linefeed-newline */ 3014938a3a5eSflorian static char* 3015938a3a5eSflorian http_header_line(sldns_buffer* buf) 3016938a3a5eSflorian { 3017938a3a5eSflorian char* result = (char*)sldns_buffer_current(buf); 3018938a3a5eSflorian size_t i; 3019938a3a5eSflorian for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) { 3020938a3a5eSflorian /* terminate the string on the \r */ 3021938a3a5eSflorian if((char)sldns_buffer_read_u8_at(buf, i) == '\r') 3022938a3a5eSflorian sldns_buffer_write_u8_at(buf, i, 0); 3023938a3a5eSflorian /* terminate on the \n and skip past the it and done */ 3024938a3a5eSflorian if((char)sldns_buffer_read_u8_at(buf, i) == '\n') { 3025938a3a5eSflorian sldns_buffer_write_u8_at(buf, i, 0); 3026938a3a5eSflorian sldns_buffer_set_position(buf, i+1); 3027938a3a5eSflorian return result; 3028938a3a5eSflorian } 3029938a3a5eSflorian } 3030938a3a5eSflorian return NULL; 3031938a3a5eSflorian } 3032938a3a5eSflorian 3033938a3a5eSflorian /** move unread buffer to start and clear rest for putting the rest into it */ 3034938a3a5eSflorian static void 3035938a3a5eSflorian http_moveover_buffer(sldns_buffer* buf) 3036938a3a5eSflorian { 3037938a3a5eSflorian size_t pos = sldns_buffer_position(buf); 3038938a3a5eSflorian size_t len = sldns_buffer_remaining(buf); 3039938a3a5eSflorian sldns_buffer_clear(buf); 3040938a3a5eSflorian memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len); 3041938a3a5eSflorian sldns_buffer_set_position(buf, len); 3042938a3a5eSflorian } 3043938a3a5eSflorian 3044938a3a5eSflorian /** a http header is complete, process it */ 3045938a3a5eSflorian static int 3046938a3a5eSflorian http_process_initial_header(struct comm_point* c) 3047938a3a5eSflorian { 3048938a3a5eSflorian char* line = http_header_line(c->buffer); 3049938a3a5eSflorian if(!line) return 1; 3050938a3a5eSflorian verbose(VERB_ALGO, "http header: %s", line); 3051938a3a5eSflorian if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) { 3052938a3a5eSflorian /* check returncode */ 3053938a3a5eSflorian if(line[9] != '2') { 3054938a3a5eSflorian verbose(VERB_ALGO, "http bad status %s", line+9); 3055938a3a5eSflorian return 0; 3056938a3a5eSflorian } 3057938a3a5eSflorian } else if(strncasecmp(line, "Content-Length: ", 16) == 0) { 3058938a3a5eSflorian if(!c->http_is_chunked) 3059938a3a5eSflorian c->tcp_byte_count = (size_t)atoi(line+16); 3060938a3a5eSflorian } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) { 3061938a3a5eSflorian c->tcp_byte_count = 0; 3062938a3a5eSflorian c->http_is_chunked = 1; 3063938a3a5eSflorian } else if(line[0] == 0) { 3064938a3a5eSflorian /* end of initial headers */ 3065938a3a5eSflorian c->http_in_headers = 0; 3066938a3a5eSflorian if(c->http_is_chunked) 3067938a3a5eSflorian c->http_in_chunk_headers = 1; 3068938a3a5eSflorian /* remove header text from front of buffer 3069938a3a5eSflorian * the buffer is going to be used to return the data segment 3070938a3a5eSflorian * itself and we don't want the header to get returned 3071938a3a5eSflorian * prepended with it */ 3072938a3a5eSflorian http_moveover_buffer(c->buffer); 3073938a3a5eSflorian sldns_buffer_flip(c->buffer); 3074938a3a5eSflorian return 1; 3075938a3a5eSflorian } 3076938a3a5eSflorian /* ignore other headers */ 3077938a3a5eSflorian return 1; 3078938a3a5eSflorian } 3079938a3a5eSflorian 3080938a3a5eSflorian /** a chunk header is complete, process it, return 0=fail, 1=continue next 3081938a3a5eSflorian * header line, 2=done with chunked transfer*/ 3082938a3a5eSflorian static int 3083938a3a5eSflorian http_process_chunk_header(struct comm_point* c) 3084938a3a5eSflorian { 3085938a3a5eSflorian char* line = http_header_line(c->buffer); 3086938a3a5eSflorian if(!line) return 1; 3087938a3a5eSflorian if(c->http_in_chunk_headers == 3) { 3088938a3a5eSflorian verbose(VERB_ALGO, "http chunk trailer: %s", line); 3089938a3a5eSflorian /* are we done ? */ 3090938a3a5eSflorian if(line[0] == 0 && c->tcp_byte_count == 0) { 3091938a3a5eSflorian /* callback of http reader when NETEVENT_DONE, 3092938a3a5eSflorian * end of data, with no data in buffer */ 3093938a3a5eSflorian sldns_buffer_set_position(c->buffer, 0); 3094938a3a5eSflorian sldns_buffer_set_limit(c->buffer, 0); 3095938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point(c->callback)); 3096938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 3097938a3a5eSflorian /* return that we are done */ 3098938a3a5eSflorian return 2; 3099938a3a5eSflorian } 3100938a3a5eSflorian if(line[0] == 0) { 3101938a3a5eSflorian /* continue with header of the next chunk */ 3102938a3a5eSflorian c->http_in_chunk_headers = 1; 3103938a3a5eSflorian /* remove header text from front of buffer */ 3104938a3a5eSflorian http_moveover_buffer(c->buffer); 3105938a3a5eSflorian sldns_buffer_flip(c->buffer); 3106938a3a5eSflorian return 1; 3107938a3a5eSflorian } 3108938a3a5eSflorian /* ignore further trail headers */ 3109938a3a5eSflorian return 1; 3110938a3a5eSflorian } 3111938a3a5eSflorian verbose(VERB_ALGO, "http chunk header: %s", line); 3112938a3a5eSflorian if(c->http_in_chunk_headers == 1) { 3113938a3a5eSflorian /* read chunked start line */ 3114938a3a5eSflorian char* end = NULL; 3115938a3a5eSflorian c->tcp_byte_count = (size_t)strtol(line, &end, 16); 3116938a3a5eSflorian if(end == line) 3117938a3a5eSflorian return 0; 3118938a3a5eSflorian c->http_in_chunk_headers = 0; 3119938a3a5eSflorian /* remove header text from front of buffer */ 3120938a3a5eSflorian http_moveover_buffer(c->buffer); 3121938a3a5eSflorian sldns_buffer_flip(c->buffer); 3122938a3a5eSflorian if(c->tcp_byte_count == 0) { 3123938a3a5eSflorian /* done with chunks, process chunk_trailer lines */ 3124938a3a5eSflorian c->http_in_chunk_headers = 3; 3125938a3a5eSflorian } 3126938a3a5eSflorian return 1; 3127938a3a5eSflorian } 3128938a3a5eSflorian /* ignore other headers */ 3129938a3a5eSflorian return 1; 3130938a3a5eSflorian } 3131938a3a5eSflorian 3132191f22c6Ssthen /** handle nonchunked data segment, 0=fail, 1=wait */ 3133938a3a5eSflorian static int 3134938a3a5eSflorian http_nonchunk_segment(struct comm_point* c) 3135938a3a5eSflorian { 3136938a3a5eSflorian /* c->buffer at position..limit has new data we read in. 3137938a3a5eSflorian * the buffer itself is full of nonchunked data. 3138938a3a5eSflorian * we are looking to read tcp_byte_count more data 3139938a3a5eSflorian * and then the transfer is done. */ 3140938a3a5eSflorian size_t remainbufferlen; 3141191f22c6Ssthen size_t got_now = sldns_buffer_limit(c->buffer); 3142938a3a5eSflorian if(c->tcp_byte_count <= got_now) { 3143938a3a5eSflorian /* done, this is the last data fragment */ 3144938a3a5eSflorian c->http_stored = 0; 3145938a3a5eSflorian sldns_buffer_set_position(c->buffer, 0); 3146938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point(c->callback)); 3147938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL); 3148938a3a5eSflorian return 1; 3149938a3a5eSflorian } 3150938a3a5eSflorian /* if we have the buffer space, 3151938a3a5eSflorian * read more data collected into the buffer */ 3152938a3a5eSflorian remainbufferlen = sldns_buffer_capacity(c->buffer) - 3153938a3a5eSflorian sldns_buffer_limit(c->buffer); 3154191f22c6Ssthen if(remainbufferlen+got_now >= c->tcp_byte_count || 31550bdb4f62Ssthen remainbufferlen >= (size_t)(c->ssl?16384:2048)) { 3156938a3a5eSflorian size_t total = sldns_buffer_limit(c->buffer); 3157938a3a5eSflorian sldns_buffer_clear(c->buffer); 3158938a3a5eSflorian sldns_buffer_set_position(c->buffer, total); 3159938a3a5eSflorian c->http_stored = total; 3160938a3a5eSflorian /* return and wait to read more */ 3161938a3a5eSflorian return 1; 3162938a3a5eSflorian } 3163938a3a5eSflorian /* call callback with this data amount, then 3164938a3a5eSflorian * wait for more */ 3165191f22c6Ssthen c->tcp_byte_count -= got_now; 3166938a3a5eSflorian c->http_stored = 0; 3167938a3a5eSflorian sldns_buffer_set_position(c->buffer, 0); 3168938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point(c->callback)); 3169938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3170938a3a5eSflorian /* c->callback has to buffer_clear(c->buffer). */ 3171938a3a5eSflorian /* return and wait to read more */ 3172938a3a5eSflorian return 1; 3173938a3a5eSflorian } 3174938a3a5eSflorian 31759982a05dSsthen /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */ 3176938a3a5eSflorian static int 3177938a3a5eSflorian http_chunked_segment(struct comm_point* c) 3178938a3a5eSflorian { 3179938a3a5eSflorian /* the c->buffer has from position..limit new data we read. */ 3180938a3a5eSflorian /* the current chunk has length tcp_byte_count. 3181938a3a5eSflorian * once we read that read more chunk headers. 3182938a3a5eSflorian */ 3183938a3a5eSflorian size_t remainbufferlen; 3184938a3a5eSflorian size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored; 31859982a05dSsthen verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer)); 3186938a3a5eSflorian if(c->tcp_byte_count <= got_now) { 3187938a3a5eSflorian /* the chunk has completed (with perhaps some extra data 3188938a3a5eSflorian * from next chunk header and next chunk) */ 3189938a3a5eSflorian /* save too much info into temp buffer */ 3190938a3a5eSflorian size_t fraglen; 3191938a3a5eSflorian struct comm_reply repinfo; 3192938a3a5eSflorian c->http_stored = 0; 3193938a3a5eSflorian sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count); 3194938a3a5eSflorian sldns_buffer_clear(c->http_temp); 3195938a3a5eSflorian sldns_buffer_write(c->http_temp, 3196938a3a5eSflorian sldns_buffer_current(c->buffer), 3197938a3a5eSflorian sldns_buffer_remaining(c->buffer)); 3198938a3a5eSflorian sldns_buffer_flip(c->http_temp); 3199938a3a5eSflorian 3200938a3a5eSflorian /* callback with this fragment */ 3201938a3a5eSflorian fraglen = sldns_buffer_position(c->buffer); 3202938a3a5eSflorian sldns_buffer_set_position(c->buffer, 0); 3203938a3a5eSflorian sldns_buffer_set_limit(c->buffer, fraglen); 3204938a3a5eSflorian repinfo = c->repinfo; 3205938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point(c->callback)); 3206938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo); 3207938a3a5eSflorian /* c->callback has to buffer_clear(). */ 3208938a3a5eSflorian 3209938a3a5eSflorian /* is commpoint deleted? */ 3210938a3a5eSflorian if(!repinfo.c) { 3211938a3a5eSflorian return 1; 3212938a3a5eSflorian } 3213938a3a5eSflorian /* copy waiting info */ 3214938a3a5eSflorian sldns_buffer_clear(c->buffer); 3215938a3a5eSflorian sldns_buffer_write(c->buffer, 3216938a3a5eSflorian sldns_buffer_begin(c->http_temp), 3217938a3a5eSflorian sldns_buffer_remaining(c->http_temp)); 3218938a3a5eSflorian sldns_buffer_flip(c->buffer); 3219938a3a5eSflorian /* process end of chunk trailer header lines, until 3220938a3a5eSflorian * an empty line */ 3221938a3a5eSflorian c->http_in_chunk_headers = 3; 3222938a3a5eSflorian /* process more data in buffer (if any) */ 3223938a3a5eSflorian return 2; 3224938a3a5eSflorian } 3225938a3a5eSflorian c->tcp_byte_count -= got_now; 3226938a3a5eSflorian 3227938a3a5eSflorian /* if we have the buffer space, 3228938a3a5eSflorian * read more data collected into the buffer */ 3229938a3a5eSflorian remainbufferlen = sldns_buffer_capacity(c->buffer) - 3230938a3a5eSflorian sldns_buffer_limit(c->buffer); 3231938a3a5eSflorian if(remainbufferlen >= c->tcp_byte_count || 3232938a3a5eSflorian remainbufferlen >= 2048) { 3233938a3a5eSflorian size_t total = sldns_buffer_limit(c->buffer); 3234938a3a5eSflorian sldns_buffer_clear(c->buffer); 3235938a3a5eSflorian sldns_buffer_set_position(c->buffer, total); 3236938a3a5eSflorian c->http_stored = total; 3237938a3a5eSflorian /* return and wait to read more */ 3238938a3a5eSflorian return 1; 3239938a3a5eSflorian } 3240938a3a5eSflorian 3241938a3a5eSflorian /* callback of http reader for a new part of the data */ 3242938a3a5eSflorian c->http_stored = 0; 3243938a3a5eSflorian sldns_buffer_set_position(c->buffer, 0); 3244938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point(c->callback)); 3245938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL); 3246938a3a5eSflorian /* c->callback has to buffer_clear(c->buffer). */ 3247938a3a5eSflorian /* return and wait to read more */ 3248938a3a5eSflorian return 1; 3249938a3a5eSflorian } 3250938a3a5eSflorian 32512c144df0Ssthen #ifdef HAVE_NGHTTP2 32522c144df0Ssthen /** Create new http2 session. Called when creating handling comm point. */ 32539982a05dSsthen static struct http2_session* http2_session_create(struct comm_point* c) 32542c144df0Ssthen { 32552c144df0Ssthen struct http2_session* session = calloc(1, sizeof(*session)); 32562c144df0Ssthen if(!session) { 32572c144df0Ssthen log_err("malloc failure while creating http2 session"); 32582c144df0Ssthen return NULL; 32592c144df0Ssthen } 32602c144df0Ssthen session->c = c; 32612c144df0Ssthen 32622c144df0Ssthen return session; 32632c144df0Ssthen } 32642c144df0Ssthen #endif 32652c144df0Ssthen 32662c144df0Ssthen /** Delete http2 session. After closing connection or on error */ 32679982a05dSsthen static void http2_session_delete(struct http2_session* h2_session) 32682c144df0Ssthen { 32692c144df0Ssthen #ifdef HAVE_NGHTTP2 32702c144df0Ssthen if(h2_session->callbacks) 32712c144df0Ssthen nghttp2_session_callbacks_del(h2_session->callbacks); 32722c144df0Ssthen free(h2_session); 32732c144df0Ssthen #else 32742c144df0Ssthen (void)h2_session; 32752c144df0Ssthen #endif 32762c144df0Ssthen } 32772c144df0Ssthen 32782c144df0Ssthen #ifdef HAVE_NGHTTP2 32792c144df0Ssthen struct http2_stream* http2_stream_create(int32_t stream_id) 32802c144df0Ssthen { 32812c144df0Ssthen struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream)); 32822c144df0Ssthen if(!h2_stream) { 32832c144df0Ssthen log_err("malloc failure while creating http2 stream"); 32842c144df0Ssthen return NULL; 32852c144df0Ssthen } 32862c144df0Ssthen h2_stream->stream_id = stream_id; 32872c144df0Ssthen return h2_stream; 32882c144df0Ssthen } 32892c144df0Ssthen 32902c144df0Ssthen /** Delete http2 stream. After session delete or stream close callback */ 32912c144df0Ssthen static void http2_stream_delete(struct http2_session* h2_session, 32922c144df0Ssthen struct http2_stream* h2_stream) 32932c144df0Ssthen { 32942c144df0Ssthen if(h2_stream->mesh_state) { 32952c144df0Ssthen mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state, 32962c144df0Ssthen h2_session->c); 32972c144df0Ssthen h2_stream->mesh_state = NULL; 32982c144df0Ssthen } 32992c144df0Ssthen http2_req_stream_clear(h2_stream); 33002c144df0Ssthen free(h2_stream); 33012c144df0Ssthen } 33022c144df0Ssthen #endif 33032c144df0Ssthen 33042c144df0Ssthen void http2_stream_add_meshstate(struct http2_stream* h2_stream, 33052c144df0Ssthen struct mesh_area* mesh, struct mesh_state* m) 33062c144df0Ssthen { 33072c144df0Ssthen h2_stream->mesh = mesh; 33082c144df0Ssthen h2_stream->mesh_state = m; 33092c144df0Ssthen } 33102c144df0Ssthen 3311*98bc733bSsthen void http2_stream_remove_mesh_state(struct http2_stream* h2_stream) 3312*98bc733bSsthen { 3313*98bc733bSsthen if(!h2_stream) 3314*98bc733bSsthen return; 3315*98bc733bSsthen h2_stream->mesh_state = NULL; 3316*98bc733bSsthen } 3317*98bc733bSsthen 33182c144df0Ssthen /** delete http2 session server. After closing connection. */ 33192c144df0Ssthen static void http2_session_server_delete(struct http2_session* h2_session) 33202c144df0Ssthen { 33212c144df0Ssthen #ifdef HAVE_NGHTTP2 33222c144df0Ssthen struct http2_stream* h2_stream, *next; 33232c144df0Ssthen nghttp2_session_del(h2_session->session); /* NULL input is fine */ 33242c144df0Ssthen h2_session->session = NULL; 33252c144df0Ssthen for(h2_stream = h2_session->first_stream; h2_stream;) { 33262c144df0Ssthen next = h2_stream->next; 33272c144df0Ssthen http2_stream_delete(h2_session, h2_stream); 33282c144df0Ssthen h2_stream = next; 33292c144df0Ssthen } 33302c144df0Ssthen h2_session->first_stream = NULL; 33312c144df0Ssthen h2_session->is_drop = 0; 33322c144df0Ssthen h2_session->postpone_drop = 0; 33332c144df0Ssthen h2_session->c->h2_stream = NULL; 33342c144df0Ssthen #endif 33352c144df0Ssthen (void)h2_session; 33362c144df0Ssthen } 33372c144df0Ssthen 33382c144df0Ssthen #ifdef HAVE_NGHTTP2 33392c144df0Ssthen void http2_session_add_stream(struct http2_session* h2_session, 33402c144df0Ssthen struct http2_stream* h2_stream) 33412c144df0Ssthen { 33422c144df0Ssthen if(h2_session->first_stream) 33432c144df0Ssthen h2_session->first_stream->prev = h2_stream; 33442c144df0Ssthen h2_stream->next = h2_session->first_stream; 33452c144df0Ssthen h2_session->first_stream = h2_stream; 33462c144df0Ssthen } 33472c144df0Ssthen 33482c144df0Ssthen /** remove stream from session linked list. After stream close callback or 33492c144df0Ssthen * closing connection */ 33509982a05dSsthen static void http2_session_remove_stream(struct http2_session* h2_session, 33512c144df0Ssthen struct http2_stream* h2_stream) 33522c144df0Ssthen { 33532c144df0Ssthen if(h2_stream->prev) 33542c144df0Ssthen h2_stream->prev->next = h2_stream->next; 33552c144df0Ssthen else 33562c144df0Ssthen h2_session->first_stream = h2_stream->next; 33572c144df0Ssthen if(h2_stream->next) 33582c144df0Ssthen h2_stream->next->prev = h2_stream->prev; 33592c144df0Ssthen 33602c144df0Ssthen } 33612c144df0Ssthen 33622c144df0Ssthen int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session), 33632c144df0Ssthen int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg) 33642c144df0Ssthen { 33652c144df0Ssthen struct http2_stream* h2_stream; 33662c144df0Ssthen struct http2_session* h2_session = (struct http2_session*)cb_arg; 33672c144df0Ssthen if(!(h2_stream = nghttp2_session_get_stream_user_data( 33682c144df0Ssthen h2_session->session, stream_id))) { 33692c144df0Ssthen return 0; 33702c144df0Ssthen } 33712c144df0Ssthen http2_session_remove_stream(h2_session, h2_stream); 33722c144df0Ssthen http2_stream_delete(h2_session, h2_stream); 33732c144df0Ssthen return 0; 33742c144df0Ssthen } 33752c144df0Ssthen 33762c144df0Ssthen ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf, 33772c144df0Ssthen size_t len, int ATTR_UNUSED(flags), void* cb_arg) 33782c144df0Ssthen { 33792c144df0Ssthen struct http2_session* h2_session = (struct http2_session*)cb_arg; 3380eba819a2Ssthen ssize_t ret; 33812c144df0Ssthen 33822c144df0Ssthen log_assert(h2_session->c->type == comm_http); 33832c144df0Ssthen log_assert(h2_session->c->h2_session); 33842c144df0Ssthen 3385eba819a2Ssthen #ifdef HAVE_SSL 3386eba819a2Ssthen if(h2_session->c->ssl) { 3387eba819a2Ssthen int r; 33882c144df0Ssthen ERR_clear_error(); 33892c144df0Ssthen r = SSL_read(h2_session->c->ssl, buf, len); 33902c144df0Ssthen if(r <= 0) { 33912c144df0Ssthen int want = SSL_get_error(h2_session->c->ssl, r); 33922c144df0Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 33932c144df0Ssthen return NGHTTP2_ERR_EOF; 33942c144df0Ssthen } else if(want == SSL_ERROR_WANT_READ) { 33952c144df0Ssthen return NGHTTP2_ERR_WOULDBLOCK; 33962c144df0Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 33972c144df0Ssthen h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write; 33982c144df0Ssthen comm_point_listen_for_rw(h2_session->c, 0, 1); 33992c144df0Ssthen return NGHTTP2_ERR_WOULDBLOCK; 34002c144df0Ssthen } else if(want == SSL_ERROR_SYSCALL) { 34012c144df0Ssthen #ifdef ECONNRESET 34022c144df0Ssthen if(errno == ECONNRESET && verbosity < 2) 34032c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 34042c144df0Ssthen #endif 34052c144df0Ssthen if(errno != 0) 34062c144df0Ssthen log_err("SSL_read syscall: %s", 34072c144df0Ssthen strerror(errno)); 34082c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 34092c144df0Ssthen } 3410d896b962Ssthen log_crypto_err_io("could not SSL_read", want); 34112c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 34122c144df0Ssthen } 34132c144df0Ssthen return r; 3414eba819a2Ssthen } 3415eba819a2Ssthen #endif /* HAVE_SSL */ 3416eba819a2Ssthen 341745872187Ssthen ret = recv(h2_session->c->fd, buf, len, MSG_DONTWAIT); 3418eba819a2Ssthen if(ret == 0) { 3419eba819a2Ssthen return NGHTTP2_ERR_EOF; 3420eba819a2Ssthen } else if(ret < 0) { 3421eba819a2Ssthen #ifndef USE_WINSOCK 3422eba819a2Ssthen if(errno == EINTR || errno == EAGAIN) 3423eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3424eba819a2Ssthen #ifdef ECONNRESET 3425eba819a2Ssthen if(errno == ECONNRESET && verbosity < 2) 3426eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 34272c144df0Ssthen #endif 3428eba819a2Ssthen log_err_addr("could not http2 recv: %s", strerror(errno), 342945872187Ssthen &h2_session->c->repinfo.remote_addr, 343045872187Ssthen h2_session->c->repinfo.remote_addrlen); 3431eba819a2Ssthen #else /* USE_WINSOCK */ 3432eba819a2Ssthen if(WSAGetLastError() == WSAECONNRESET) 3433eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3434eba819a2Ssthen if(WSAGetLastError() == WSAEINPROGRESS) 3435eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3436eba819a2Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 3437eba819a2Ssthen ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3438eba819a2Ssthen UB_EV_READ); 3439eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3440eba819a2Ssthen } 3441eba819a2Ssthen log_err_addr("could not http2 recv: %s", 3442eba819a2Ssthen wsa_strerror(WSAGetLastError()), 344345872187Ssthen &h2_session->c->repinfo.remote_addr, 344445872187Ssthen h2_session->c->repinfo.remote_addrlen); 3445eba819a2Ssthen #endif 3446eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3447eba819a2Ssthen } 3448eba819a2Ssthen return ret; 34492c144df0Ssthen } 34502c144df0Ssthen #endif /* HAVE_NGHTTP2 */ 34512c144df0Ssthen 34522c144df0Ssthen /** Handle http2 read */ 34532c144df0Ssthen static int 34542c144df0Ssthen comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c) 34552c144df0Ssthen { 34562c144df0Ssthen #ifdef HAVE_NGHTTP2 34572c144df0Ssthen int ret; 34582c144df0Ssthen log_assert(c->h2_session); 34592c144df0Ssthen 34602c144df0Ssthen /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */ 34612c144df0Ssthen ret = nghttp2_session_recv(c->h2_session->session); 34622c144df0Ssthen if(ret) { 34632c144df0Ssthen if(ret != NGHTTP2_ERR_EOF && 34642c144df0Ssthen ret != NGHTTP2_ERR_CALLBACK_FAILURE) { 3465eba819a2Ssthen char a[256]; 346645872187Ssthen addr_to_str(&c->repinfo.remote_addr, 346745872187Ssthen c->repinfo.remote_addrlen, a, sizeof(a)); 3468eba819a2Ssthen verbose(VERB_QUERY, "http2: session_recv from %s failed, " 3469eba819a2Ssthen "error: %s", a, nghttp2_strerror(ret)); 34702c144df0Ssthen } 34712c144df0Ssthen return 0; 34722c144df0Ssthen } 34732c144df0Ssthen if(nghttp2_session_want_write(c->h2_session->session)) { 34742c144df0Ssthen c->tcp_is_reading = 0; 34752c144df0Ssthen comm_point_stop_listening(c); 34769982a05dSsthen comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 34772c144df0Ssthen } else if(!nghttp2_session_want_read(c->h2_session->session)) 34782c144df0Ssthen return 0; /* connection can be closed */ 34792c144df0Ssthen return 1; 34802c144df0Ssthen #else 34812c144df0Ssthen (void)c; 34822c144df0Ssthen return 0; 34832c144df0Ssthen #endif 34842c144df0Ssthen } 34852c144df0Ssthen 3486938a3a5eSflorian /** 3487938a3a5eSflorian * Handle http reading callback. 3488938a3a5eSflorian * @param fd: file descriptor of socket. 3489938a3a5eSflorian * @param c: comm point to read from into buffer. 3490938a3a5eSflorian * @return: 0 on error 3491938a3a5eSflorian */ 3492938a3a5eSflorian static int 3493938a3a5eSflorian comm_point_http_handle_read(int fd, struct comm_point* c) 3494938a3a5eSflorian { 3495938a3a5eSflorian log_assert(c->type == comm_http); 3496938a3a5eSflorian log_assert(fd != -1); 3497938a3a5eSflorian 3498938a3a5eSflorian /* if we are in ssl handshake, handle SSL handshake */ 3499938a3a5eSflorian #ifdef HAVE_SSL 3500938a3a5eSflorian if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3501938a3a5eSflorian if(!ssl_handshake(c)) 3502938a3a5eSflorian return 0; 3503938a3a5eSflorian if(c->ssl_shake_state != comm_ssl_shake_none) 3504938a3a5eSflorian return 1; 3505938a3a5eSflorian } 3506938a3a5eSflorian #endif /* HAVE_SSL */ 3507938a3a5eSflorian 3508938a3a5eSflorian if(!c->tcp_is_reading) 3509938a3a5eSflorian return 1; 35102c144df0Ssthen 35112c144df0Ssthen if(c->use_h2) { 35122c144df0Ssthen return comm_point_http2_handle_read(fd, c); 35132c144df0Ssthen } 35142c144df0Ssthen 35152c144df0Ssthen /* http version is <= http/1.1 */ 35162c144df0Ssthen 35172c144df0Ssthen if(c->http_min_version >= http_version_2) { 35182c144df0Ssthen /* HTTP/2 failed, not allowed to use lower version. */ 35192c144df0Ssthen return 0; 35202c144df0Ssthen } 35212c144df0Ssthen 3522938a3a5eSflorian /* read more data */ 3523938a3a5eSflorian if(c->ssl) { 3524938a3a5eSflorian if(!ssl_http_read_more(c)) 3525938a3a5eSflorian return 0; 3526938a3a5eSflorian } else { 3527938a3a5eSflorian if(!http_read_more(fd, c)) 3528938a3a5eSflorian return 0; 3529938a3a5eSflorian } 3530938a3a5eSflorian 3531191f22c6Ssthen if(c->http_stored >= sldns_buffer_position(c->buffer)) { 3532191f22c6Ssthen /* read did not work but we wanted more data, there is 3533191f22c6Ssthen * no bytes to process now. */ 3534191f22c6Ssthen return 1; 3535191f22c6Ssthen } 3536938a3a5eSflorian sldns_buffer_flip(c->buffer); 35379982a05dSsthen /* if we are partway in a segment of data, position us at the point 35389982a05dSsthen * where we left off previously */ 35399982a05dSsthen if(c->http_stored < sldns_buffer_limit(c->buffer)) 35409982a05dSsthen sldns_buffer_set_position(c->buffer, c->http_stored); 35419982a05dSsthen else sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer)); 35422c144df0Ssthen 3543938a3a5eSflorian while(sldns_buffer_remaining(c->buffer) > 0) { 35442c144df0Ssthen /* Handle HTTP/1.x data */ 3545938a3a5eSflorian /* if we are reading headers, read more headers */ 3546938a3a5eSflorian if(c->http_in_headers || c->http_in_chunk_headers) { 3547938a3a5eSflorian /* if header is done, process the header */ 3548938a3a5eSflorian if(!http_header_done(c->buffer)) { 3549938a3a5eSflorian /* copy remaining data to front of buffer 3550938a3a5eSflorian * and set rest for writing into it */ 3551938a3a5eSflorian http_moveover_buffer(c->buffer); 3552938a3a5eSflorian /* return and wait to read more */ 3553938a3a5eSflorian return 1; 3554938a3a5eSflorian } 3555938a3a5eSflorian if(!c->http_in_chunk_headers) { 3556938a3a5eSflorian /* process initial headers */ 3557938a3a5eSflorian if(!http_process_initial_header(c)) 3558938a3a5eSflorian return 0; 3559938a3a5eSflorian } else { 3560938a3a5eSflorian /* process chunk headers */ 3561938a3a5eSflorian int r = http_process_chunk_header(c); 3562938a3a5eSflorian if(r == 0) return 0; 3563938a3a5eSflorian if(r == 2) return 1; /* done */ 3564938a3a5eSflorian /* r == 1, continue */ 3565938a3a5eSflorian } 3566938a3a5eSflorian /* see if we have more to process */ 3567938a3a5eSflorian continue; 3568938a3a5eSflorian } 3569938a3a5eSflorian 3570938a3a5eSflorian if(!c->http_is_chunked) { 3571938a3a5eSflorian /* if we are reading nonchunks, process that*/ 3572938a3a5eSflorian return http_nonchunk_segment(c); 3573938a3a5eSflorian } else { 3574938a3a5eSflorian /* if we are reading chunks, read the chunk */ 3575938a3a5eSflorian int r = http_chunked_segment(c); 3576938a3a5eSflorian if(r == 0) return 0; 3577938a3a5eSflorian if(r == 1) return 1; 3578938a3a5eSflorian continue; 3579938a3a5eSflorian } 3580938a3a5eSflorian } 3581938a3a5eSflorian /* broke out of the loop; could not process header instead need 3582938a3a5eSflorian * to read more */ 3583938a3a5eSflorian /* moveover any remaining data and read more data */ 3584938a3a5eSflorian http_moveover_buffer(c->buffer); 3585938a3a5eSflorian /* return and wait to read more */ 3586938a3a5eSflorian return 1; 3587938a3a5eSflorian } 3588938a3a5eSflorian 3589938a3a5eSflorian /** check pending connect for http */ 3590938a3a5eSflorian static int 3591938a3a5eSflorian http_check_connect(int fd, struct comm_point* c) 3592938a3a5eSflorian { 3593938a3a5eSflorian /* check for pending error from nonblocking connect */ 3594938a3a5eSflorian /* from Stevens, unix network programming, vol1, 3rd ed, p450*/ 3595938a3a5eSflorian int error = 0; 3596938a3a5eSflorian socklen_t len = (socklen_t)sizeof(error); 3597938a3a5eSflorian if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 3598938a3a5eSflorian &len) < 0){ 3599938a3a5eSflorian #ifndef USE_WINSOCK 3600938a3a5eSflorian error = errno; /* on solaris errno is error */ 3601938a3a5eSflorian #else /* USE_WINSOCK */ 3602938a3a5eSflorian error = WSAGetLastError(); 3603938a3a5eSflorian #endif 3604938a3a5eSflorian } 3605938a3a5eSflorian #ifndef USE_WINSOCK 3606938a3a5eSflorian #if defined(EINPROGRESS) && defined(EWOULDBLOCK) 3607938a3a5eSflorian if(error == EINPROGRESS || error == EWOULDBLOCK) 3608938a3a5eSflorian return 1; /* try again later */ 3609938a3a5eSflorian else 3610938a3a5eSflorian #endif 3611938a3a5eSflorian if(error != 0 && verbosity < 2) 3612938a3a5eSflorian return 0; /* silence lots of chatter in the logs */ 3613938a3a5eSflorian else if(error != 0) { 3614938a3a5eSflorian log_err_addr("http connect", strerror(error), 361545872187Ssthen &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3616938a3a5eSflorian #else /* USE_WINSOCK */ 3617938a3a5eSflorian /* examine error */ 3618938a3a5eSflorian if(error == WSAEINPROGRESS) 3619938a3a5eSflorian return 1; 3620938a3a5eSflorian else if(error == WSAEWOULDBLOCK) { 3621938a3a5eSflorian ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3622938a3a5eSflorian return 1; 3623938a3a5eSflorian } else if(error != 0 && verbosity < 2) 3624938a3a5eSflorian return 0; 3625938a3a5eSflorian else if(error != 0) { 3626938a3a5eSflorian log_err_addr("http connect", wsa_strerror(error), 362745872187Ssthen &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3628938a3a5eSflorian #endif /* USE_WINSOCK */ 3629938a3a5eSflorian return 0; 3630938a3a5eSflorian } 3631938a3a5eSflorian /* keep on processing this socket */ 3632938a3a5eSflorian return 2; 3633938a3a5eSflorian } 3634938a3a5eSflorian 3635938a3a5eSflorian /** write more data for http (with ssl) */ 3636938a3a5eSflorian static int 3637938a3a5eSflorian ssl_http_write_more(struct comm_point* c) 3638938a3a5eSflorian { 3639938a3a5eSflorian #ifdef HAVE_SSL 3640938a3a5eSflorian int r; 3641938a3a5eSflorian log_assert(sldns_buffer_remaining(c->buffer) > 0); 3642938a3a5eSflorian ERR_clear_error(); 3643938a3a5eSflorian r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer), 3644938a3a5eSflorian (int)sldns_buffer_remaining(c->buffer)); 3645938a3a5eSflorian if(r <= 0) { 3646938a3a5eSflorian int want = SSL_get_error(c->ssl, r); 3647938a3a5eSflorian if(want == SSL_ERROR_ZERO_RETURN) { 3648938a3a5eSflorian return 0; /* closed */ 3649938a3a5eSflorian } else if(want == SSL_ERROR_WANT_READ) { 3650550cf4a9Ssthen c->ssl_shake_state = comm_ssl_shake_hs_read; 3651938a3a5eSflorian comm_point_listen_for_rw(c, 1, 0); 3652938a3a5eSflorian return 1; /* wait for read condition */ 3653938a3a5eSflorian } else if(want == SSL_ERROR_WANT_WRITE) { 3654938a3a5eSflorian return 1; /* write more later */ 3655938a3a5eSflorian } else if(want == SSL_ERROR_SYSCALL) { 3656550cf4a9Ssthen #ifdef EPIPE 3657550cf4a9Ssthen if(errno == EPIPE && verbosity < 2) 3658550cf4a9Ssthen return 0; /* silence 'broken pipe' */ 3659550cf4a9Ssthen #endif 3660938a3a5eSflorian if(errno != 0) 3661938a3a5eSflorian log_err("SSL_write syscall: %s", 3662938a3a5eSflorian strerror(errno)); 3663938a3a5eSflorian return 0; 3664938a3a5eSflorian } 3665d896b962Ssthen log_crypto_err_io("could not SSL_write", want); 3666938a3a5eSflorian return 0; 3667938a3a5eSflorian } 3668938a3a5eSflorian sldns_buffer_skip(c->buffer, (ssize_t)r); 3669938a3a5eSflorian return 1; 3670938a3a5eSflorian #else 3671938a3a5eSflorian (void)c; 3672938a3a5eSflorian return 0; 3673938a3a5eSflorian #endif /* HAVE_SSL */ 3674938a3a5eSflorian } 3675938a3a5eSflorian 3676938a3a5eSflorian /** write more data for http */ 3677938a3a5eSflorian static int 3678938a3a5eSflorian http_write_more(int fd, struct comm_point* c) 3679938a3a5eSflorian { 3680938a3a5eSflorian ssize_t r; 3681938a3a5eSflorian log_assert(sldns_buffer_remaining(c->buffer) > 0); 3682938a3a5eSflorian r = send(fd, (void*)sldns_buffer_current(c->buffer), 3683938a3a5eSflorian sldns_buffer_remaining(c->buffer), 0); 3684938a3a5eSflorian if(r == -1) { 3685938a3a5eSflorian #ifndef USE_WINSOCK 3686938a3a5eSflorian if(errno == EINTR || errno == EAGAIN) 3687938a3a5eSflorian return 1; 3688938a3a5eSflorian #else 3689938a3a5eSflorian if(WSAGetLastError() == WSAEINPROGRESS) 3690938a3a5eSflorian return 1; 3691938a3a5eSflorian if(WSAGetLastError() == WSAEWOULDBLOCK) { 3692938a3a5eSflorian ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 3693938a3a5eSflorian return 1; 3694938a3a5eSflorian } 3695938a3a5eSflorian #endif 36962c144df0Ssthen log_err_addr("http send r", sock_strerror(errno), 369745872187Ssthen &c->repinfo.remote_addr, c->repinfo.remote_addrlen); 3698938a3a5eSflorian return 0; 3699938a3a5eSflorian } 3700938a3a5eSflorian sldns_buffer_skip(c->buffer, r); 3701938a3a5eSflorian return 1; 3702938a3a5eSflorian } 3703938a3a5eSflorian 37042c144df0Ssthen #ifdef HAVE_NGHTTP2 37052c144df0Ssthen ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf, 37062c144df0Ssthen size_t len, int ATTR_UNUSED(flags), void* cb_arg) 37072c144df0Ssthen { 3708eba819a2Ssthen ssize_t ret; 37092c144df0Ssthen struct http2_session* h2_session = (struct http2_session*)cb_arg; 37102c144df0Ssthen log_assert(h2_session->c->type == comm_http); 37112c144df0Ssthen log_assert(h2_session->c->h2_session); 37122c144df0Ssthen 3713eba819a2Ssthen #ifdef HAVE_SSL 3714eba819a2Ssthen if(h2_session->c->ssl) { 3715eba819a2Ssthen int r; 37162c144df0Ssthen ERR_clear_error(); 37172c144df0Ssthen r = SSL_write(h2_session->c->ssl, buf, len); 37182c144df0Ssthen if(r <= 0) { 37192c144df0Ssthen int want = SSL_get_error(h2_session->c->ssl, r); 37202c144df0Ssthen if(want == SSL_ERROR_ZERO_RETURN) { 37212c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 37222c144df0Ssthen } else if(want == SSL_ERROR_WANT_READ) { 37232c144df0Ssthen h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read; 37242c144df0Ssthen comm_point_listen_for_rw(h2_session->c, 1, 0); 37252c144df0Ssthen return NGHTTP2_ERR_WOULDBLOCK; 37262c144df0Ssthen } else if(want == SSL_ERROR_WANT_WRITE) { 37272c144df0Ssthen return NGHTTP2_ERR_WOULDBLOCK; 37282c144df0Ssthen } else if(want == SSL_ERROR_SYSCALL) { 37292c144df0Ssthen #ifdef EPIPE 37302c144df0Ssthen if(errno == EPIPE && verbosity < 2) 37312c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 37322c144df0Ssthen #endif 37332c144df0Ssthen if(errno != 0) 37342c144df0Ssthen log_err("SSL_write syscall: %s", 37352c144df0Ssthen strerror(errno)); 37362c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 37372c144df0Ssthen } 3738d896b962Ssthen log_crypto_err_io("could not SSL_write", want); 37392c144df0Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 37402c144df0Ssthen } 37412c144df0Ssthen return r; 3742eba819a2Ssthen } 3743eba819a2Ssthen #endif /* HAVE_SSL */ 3744eba819a2Ssthen 3745eba819a2Ssthen ret = send(h2_session->c->fd, buf, len, 0); 3746eba819a2Ssthen if(ret == 0) { 3747eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3748eba819a2Ssthen } else if(ret < 0) { 3749eba819a2Ssthen #ifndef USE_WINSOCK 3750eba819a2Ssthen if(errno == EINTR || errno == EAGAIN) 3751eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3752eba819a2Ssthen #ifdef EPIPE 3753eba819a2Ssthen if(errno == EPIPE && verbosity < 2) 3754eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 37552c144df0Ssthen #endif 3756eba819a2Ssthen #ifdef ECONNRESET 3757eba819a2Ssthen if(errno == ECONNRESET && verbosity < 2) 3758eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3759eba819a2Ssthen #endif 3760eba819a2Ssthen log_err_addr("could not http2 write: %s", strerror(errno), 376145872187Ssthen &h2_session->c->repinfo.remote_addr, 376245872187Ssthen h2_session->c->repinfo.remote_addrlen); 3763eba819a2Ssthen #else /* USE_WINSOCK */ 3764eba819a2Ssthen if(WSAGetLastError() == WSAENOTCONN) 3765eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3766eba819a2Ssthen if(WSAGetLastError() == WSAEINPROGRESS) 3767eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3768eba819a2Ssthen if(WSAGetLastError() == WSAEWOULDBLOCK) { 3769eba819a2Ssthen ub_winsock_tcp_wouldblock(h2_session->c->ev->ev, 3770eba819a2Ssthen UB_EV_WRITE); 3771eba819a2Ssthen return NGHTTP2_ERR_WOULDBLOCK; 3772eba819a2Ssthen } 3773eba819a2Ssthen if(WSAGetLastError() == WSAECONNRESET && verbosity < 2) 3774eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3775eba819a2Ssthen log_err_addr("could not http2 write: %s", 3776eba819a2Ssthen wsa_strerror(WSAGetLastError()), 377745872187Ssthen &h2_session->c->repinfo.remote_addr, 377845872187Ssthen h2_session->c->repinfo.remote_addrlen); 3779eba819a2Ssthen #endif 3780eba819a2Ssthen return NGHTTP2_ERR_CALLBACK_FAILURE; 3781eba819a2Ssthen } 3782eba819a2Ssthen return ret; 37832c144df0Ssthen } 37842c144df0Ssthen #endif /* HAVE_NGHTTP2 */ 37852c144df0Ssthen 37862c144df0Ssthen /** Handle http2 writing */ 37872c144df0Ssthen static int 37882c144df0Ssthen comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c) 37892c144df0Ssthen { 37902c144df0Ssthen #ifdef HAVE_NGHTTP2 37912c144df0Ssthen int ret; 37922c144df0Ssthen log_assert(c->h2_session); 37932c144df0Ssthen 37942c144df0Ssthen ret = nghttp2_session_send(c->h2_session->session); 37952c144df0Ssthen if(ret) { 37962c144df0Ssthen verbose(VERB_QUERY, "http2: session_send failed, " 37972c144df0Ssthen "error: %s", nghttp2_strerror(ret)); 37982c144df0Ssthen return 0; 37992c144df0Ssthen } 38002c144df0Ssthen 38012c144df0Ssthen if(nghttp2_session_want_read(c->h2_session->session)) { 38022c144df0Ssthen c->tcp_is_reading = 1; 38032c144df0Ssthen comm_point_stop_listening(c); 38049982a05dSsthen comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 38052c144df0Ssthen } else if(!nghttp2_session_want_write(c->h2_session->session)) 38062c144df0Ssthen return 0; /* connection can be closed */ 38072c144df0Ssthen return 1; 38082c144df0Ssthen #else 38092c144df0Ssthen (void)c; 38102c144df0Ssthen return 0; 38112c144df0Ssthen #endif 38122c144df0Ssthen } 38132c144df0Ssthen 3814938a3a5eSflorian /** 3815938a3a5eSflorian * Handle http writing callback. 3816938a3a5eSflorian * @param fd: file descriptor of socket. 3817938a3a5eSflorian * @param c: comm point to write buffer out of. 3818938a3a5eSflorian * @return: 0 on error 3819938a3a5eSflorian */ 3820938a3a5eSflorian static int 3821938a3a5eSflorian comm_point_http_handle_write(int fd, struct comm_point* c) 3822938a3a5eSflorian { 3823938a3a5eSflorian log_assert(c->type == comm_http); 3824938a3a5eSflorian log_assert(fd != -1); 3825938a3a5eSflorian 3826938a3a5eSflorian /* check pending connect errors, if that fails, we wait for more, 3827938a3a5eSflorian * or we can continue to write contents */ 3828938a3a5eSflorian if(c->tcp_check_nb_connect) { 3829938a3a5eSflorian int r = http_check_connect(fd, c); 3830938a3a5eSflorian if(r == 0) return 0; 3831938a3a5eSflorian if(r == 1) return 1; 3832938a3a5eSflorian c->tcp_check_nb_connect = 0; 3833938a3a5eSflorian } 3834938a3a5eSflorian /* if we are in ssl handshake, handle SSL handshake */ 3835938a3a5eSflorian #ifdef HAVE_SSL 3836938a3a5eSflorian if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) { 3837938a3a5eSflorian if(!ssl_handshake(c)) 3838938a3a5eSflorian return 0; 3839938a3a5eSflorian if(c->ssl_shake_state != comm_ssl_shake_none) 3840938a3a5eSflorian return 1; 3841938a3a5eSflorian } 3842938a3a5eSflorian #endif /* HAVE_SSL */ 3843938a3a5eSflorian if(c->tcp_is_reading) 3844938a3a5eSflorian return 1; 38452c144df0Ssthen 38462c144df0Ssthen if(c->use_h2) { 38472c144df0Ssthen return comm_point_http2_handle_write(fd, c); 38482c144df0Ssthen } 38492c144df0Ssthen 38502c144df0Ssthen /* http version is <= http/1.1 */ 38512c144df0Ssthen 38522c144df0Ssthen if(c->http_min_version >= http_version_2) { 38532c144df0Ssthen /* HTTP/2 failed, not allowed to use lower version. */ 38542c144df0Ssthen return 0; 38552c144df0Ssthen } 38562c144df0Ssthen 3857938a3a5eSflorian /* if we are writing, write more */ 3858938a3a5eSflorian if(c->ssl) { 3859938a3a5eSflorian if(!ssl_http_write_more(c)) 3860938a3a5eSflorian return 0; 3861938a3a5eSflorian } else { 3862938a3a5eSflorian if(!http_write_more(fd, c)) 3863938a3a5eSflorian return 0; 3864938a3a5eSflorian } 3865938a3a5eSflorian 3866938a3a5eSflorian /* we write a single buffer contents, that can contain 3867938a3a5eSflorian * the http request, and then flip to read the results */ 3868938a3a5eSflorian /* see if write is done */ 3869938a3a5eSflorian if(sldns_buffer_remaining(c->buffer) == 0) { 3870938a3a5eSflorian sldns_buffer_clear(c->buffer); 3871938a3a5eSflorian if(c->tcp_do_toggle_rw) 3872938a3a5eSflorian c->tcp_is_reading = 1; 3873938a3a5eSflorian c->tcp_byte_count = 0; 3874938a3a5eSflorian /* switch from listening(write) to listening(read) */ 3875938a3a5eSflorian comm_point_stop_listening(c); 3876938a3a5eSflorian comm_point_start_listening(c, -1, -1); 3877938a3a5eSflorian } 3878938a3a5eSflorian return 1; 3879938a3a5eSflorian } 3880938a3a5eSflorian 3881938a3a5eSflorian void 3882938a3a5eSflorian comm_point_http_handle_callback(int fd, short event, void* arg) 3883938a3a5eSflorian { 3884938a3a5eSflorian struct comm_point* c = (struct comm_point*)arg; 3885938a3a5eSflorian log_assert(c->type == comm_http); 3886938a3a5eSflorian ub_comm_base_now(c->ev->base); 3887938a3a5eSflorian 3888550cf4a9Ssthen if(event&UB_EV_TIMEOUT) { 3889550cf4a9Ssthen verbose(VERB_QUERY, "http took too long, dropped"); 3890550cf4a9Ssthen reclaim_http_handler(c); 3891550cf4a9Ssthen if(!c->tcp_do_close) { 3892550cf4a9Ssthen fptr_ok(fptr_whitelist_comm_point(c->callback)); 3893550cf4a9Ssthen (void)(*c->callback)(c, c->cb_arg, 3894550cf4a9Ssthen NETEVENT_TIMEOUT, NULL); 3895550cf4a9Ssthen } 3896550cf4a9Ssthen return; 3897550cf4a9Ssthen } 3898938a3a5eSflorian if(event&UB_EV_READ) { 3899938a3a5eSflorian if(!comm_point_http_handle_read(fd, c)) { 3900938a3a5eSflorian reclaim_http_handler(c); 3901938a3a5eSflorian if(!c->tcp_do_close) { 3902938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point( 3903938a3a5eSflorian c->callback)); 3904938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, 3905938a3a5eSflorian NETEVENT_CLOSED, NULL); 3906938a3a5eSflorian } 3907938a3a5eSflorian } 3908938a3a5eSflorian return; 3909938a3a5eSflorian } 3910938a3a5eSflorian if(event&UB_EV_WRITE) { 3911938a3a5eSflorian if(!comm_point_http_handle_write(fd, c)) { 3912938a3a5eSflorian reclaim_http_handler(c); 3913938a3a5eSflorian if(!c->tcp_do_close) { 3914938a3a5eSflorian fptr_ok(fptr_whitelist_comm_point( 3915938a3a5eSflorian c->callback)); 3916938a3a5eSflorian (void)(*c->callback)(c, c->cb_arg, 3917938a3a5eSflorian NETEVENT_CLOSED, NULL); 3918938a3a5eSflorian } 3919938a3a5eSflorian } 3920938a3a5eSflorian return; 3921938a3a5eSflorian } 3922938a3a5eSflorian log_err("Ignored event %d for httphdl.", event); 3923938a3a5eSflorian } 3924938a3a5eSflorian 3925933707f3Ssthen void comm_point_local_handle_callback(int fd, short event, void* arg) 3926933707f3Ssthen { 3927933707f3Ssthen struct comm_point* c = (struct comm_point*)arg; 3928933707f3Ssthen log_assert(c->type == comm_local); 39292ee382b6Ssthen ub_comm_base_now(c->ev->base); 3930933707f3Ssthen 39312ee382b6Ssthen if(event&UB_EV_READ) { 3932933707f3Ssthen if(!comm_point_tcp_handle_read(fd, c, 1)) { 3933933707f3Ssthen fptr_ok(fptr_whitelist_comm_point(c->callback)); 3934933707f3Ssthen (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 3935933707f3Ssthen NULL); 3936933707f3Ssthen } 3937933707f3Ssthen return; 3938933707f3Ssthen } 3939933707f3Ssthen log_err("Ignored event %d for localhdl.", event); 3940933707f3Ssthen } 3941933707f3Ssthen 3942933707f3Ssthen void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 3943933707f3Ssthen short event, void* arg) 3944933707f3Ssthen { 3945933707f3Ssthen struct comm_point* c = (struct comm_point*)arg; 3946933707f3Ssthen int err = NETEVENT_NOERROR; 3947933707f3Ssthen log_assert(c->type == comm_raw); 39482ee382b6Ssthen ub_comm_base_now(c->ev->base); 3949933707f3Ssthen 39502ee382b6Ssthen if(event&UB_EV_TIMEOUT) 3951933707f3Ssthen err = NETEVENT_TIMEOUT; 3952933707f3Ssthen fptr_ok(fptr_whitelist_comm_point_raw(c->callback)); 3953933707f3Ssthen (void)(*c->callback)(c, c->cb_arg, err, NULL); 3954933707f3Ssthen } 3955933707f3Ssthen 3956933707f3Ssthen struct comm_point* 39570b68ff31Ssthen comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer, 395845872187Ssthen int pp2_enabled, comm_point_callback_type* callback, 395945872187Ssthen void* callback_arg, struct unbound_socket* socket) 3960933707f3Ssthen { 3961933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 3962933707f3Ssthen sizeof(struct comm_point)); 3963933707f3Ssthen short evbits; 3964933707f3Ssthen if(!c) 3965933707f3Ssthen return NULL; 3966933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 3967933707f3Ssthen sizeof(struct internal_event)); 3968933707f3Ssthen if(!c->ev) { 3969933707f3Ssthen free(c); 3970933707f3Ssthen return NULL; 3971933707f3Ssthen } 3972933707f3Ssthen c->ev->base = base; 3973933707f3Ssthen c->fd = fd; 3974933707f3Ssthen c->buffer = buffer; 3975933707f3Ssthen c->timeout = NULL; 3976933707f3Ssthen c->tcp_is_reading = 0; 3977933707f3Ssthen c->tcp_byte_count = 0; 3978933707f3Ssthen c->tcp_parent = NULL; 3979933707f3Ssthen c->max_tcp_count = 0; 3980a58bff56Ssthen c->cur_tcp_count = 0; 3981933707f3Ssthen c->tcp_handlers = NULL; 3982933707f3Ssthen c->tcp_free = NULL; 3983933707f3Ssthen c->type = comm_udp; 3984933707f3Ssthen c->tcp_do_close = 0; 3985933707f3Ssthen c->do_not_close = 0; 3986933707f3Ssthen c->tcp_do_toggle_rw = 0; 3987933707f3Ssthen c->tcp_check_nb_connect = 0; 398877079be7Ssthen #ifdef USE_MSG_FASTOPEN 398977079be7Ssthen c->tcp_do_fastopen = 0; 399077079be7Ssthen #endif 39912be9e038Ssthen #ifdef USE_DNSCRYPT 39922be9e038Ssthen c->dnscrypt = 0; 39932be9e038Ssthen c->dnscrypt_buffer = buffer; 39942be9e038Ssthen #endif 3995933707f3Ssthen c->inuse = 0; 3996933707f3Ssthen c->callback = callback; 3997933707f3Ssthen c->cb_arg = callback_arg; 3998191f22c6Ssthen c->socket = socket; 399945872187Ssthen c->pp2_enabled = pp2_enabled; 400045872187Ssthen c->pp2_header_state = pp2_header_none; 40012ee382b6Ssthen evbits = UB_EV_READ | UB_EV_PERSIST; 40022ee382b6Ssthen /* ub_event stuff */ 40032ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 40042ee382b6Ssthen comm_point_udp_callback, c); 40052ee382b6Ssthen if(c->ev->ev == NULL) { 4006933707f3Ssthen log_err("could not baseset udp event"); 4007933707f3Ssthen comm_point_delete(c); 4008933707f3Ssthen return NULL; 4009933707f3Ssthen } 40102ee382b6Ssthen if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 4011933707f3Ssthen log_err("could not add udp event"); 4012933707f3Ssthen comm_point_delete(c); 4013933707f3Ssthen return NULL; 4014933707f3Ssthen } 40159982a05dSsthen c->event_added = 1; 4016933707f3Ssthen return c; 4017933707f3Ssthen } 4018933707f3Ssthen 4019d896b962Ssthen #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 4020933707f3Ssthen struct comm_point* 4021933707f3Ssthen comm_point_create_udp_ancil(struct comm_base *base, int fd, 402245872187Ssthen sldns_buffer* buffer, int pp2_enabled, 4023191f22c6Ssthen comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket) 4024933707f3Ssthen { 4025933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4026933707f3Ssthen sizeof(struct comm_point)); 4027933707f3Ssthen short evbits; 4028933707f3Ssthen if(!c) 4029933707f3Ssthen return NULL; 4030933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4031933707f3Ssthen sizeof(struct internal_event)); 4032933707f3Ssthen if(!c->ev) { 4033933707f3Ssthen free(c); 4034933707f3Ssthen return NULL; 4035933707f3Ssthen } 4036933707f3Ssthen c->ev->base = base; 4037933707f3Ssthen c->fd = fd; 4038933707f3Ssthen c->buffer = buffer; 4039933707f3Ssthen c->timeout = NULL; 4040933707f3Ssthen c->tcp_is_reading = 0; 4041933707f3Ssthen c->tcp_byte_count = 0; 4042933707f3Ssthen c->tcp_parent = NULL; 4043933707f3Ssthen c->max_tcp_count = 0; 4044a58bff56Ssthen c->cur_tcp_count = 0; 4045933707f3Ssthen c->tcp_handlers = NULL; 4046933707f3Ssthen c->tcp_free = NULL; 4047933707f3Ssthen c->type = comm_udp; 4048933707f3Ssthen c->tcp_do_close = 0; 4049933707f3Ssthen c->do_not_close = 0; 40502be9e038Ssthen #ifdef USE_DNSCRYPT 40512be9e038Ssthen c->dnscrypt = 0; 40522be9e038Ssthen c->dnscrypt_buffer = buffer; 40532be9e038Ssthen #endif 4054933707f3Ssthen c->inuse = 0; 4055933707f3Ssthen c->tcp_do_toggle_rw = 0; 4056933707f3Ssthen c->tcp_check_nb_connect = 0; 405777079be7Ssthen #ifdef USE_MSG_FASTOPEN 405877079be7Ssthen c->tcp_do_fastopen = 0; 405977079be7Ssthen #endif 4060933707f3Ssthen c->callback = callback; 4061933707f3Ssthen c->cb_arg = callback_arg; 4062191f22c6Ssthen c->socket = socket; 406345872187Ssthen c->pp2_enabled = pp2_enabled; 406445872187Ssthen c->pp2_header_state = pp2_header_none; 40652ee382b6Ssthen evbits = UB_EV_READ | UB_EV_PERSIST; 40662ee382b6Ssthen /* ub_event stuff */ 40672ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 40682ee382b6Ssthen comm_point_udp_ancil_callback, c); 40692ee382b6Ssthen if(c->ev->ev == NULL) { 4070933707f3Ssthen log_err("could not baseset udp event"); 4071933707f3Ssthen comm_point_delete(c); 4072933707f3Ssthen return NULL; 4073933707f3Ssthen } 40742ee382b6Ssthen if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { 4075933707f3Ssthen log_err("could not add udp event"); 4076933707f3Ssthen comm_point_delete(c); 4077933707f3Ssthen return NULL; 4078933707f3Ssthen } 40799982a05dSsthen c->event_added = 1; 4080933707f3Ssthen return c; 4081933707f3Ssthen } 4082d896b962Ssthen #endif 4083933707f3Ssthen 4084933707f3Ssthen static struct comm_point* 4085933707f3Ssthen comm_point_create_tcp_handler(struct comm_base *base, 4086933707f3Ssthen struct comm_point* parent, size_t bufsize, 4087f6b99bafSsthen struct sldns_buffer* spoolbuf, comm_point_callback_type* callback, 4088191f22c6Ssthen void* callback_arg, struct unbound_socket* socket) 4089933707f3Ssthen { 4090933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4091933707f3Ssthen sizeof(struct comm_point)); 4092933707f3Ssthen short evbits; 4093933707f3Ssthen if(!c) 4094933707f3Ssthen return NULL; 4095933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4096933707f3Ssthen sizeof(struct internal_event)); 4097933707f3Ssthen if(!c->ev) { 4098933707f3Ssthen free(c); 4099933707f3Ssthen return NULL; 4100933707f3Ssthen } 4101933707f3Ssthen c->ev->base = base; 4102933707f3Ssthen c->fd = -1; 41030b68ff31Ssthen c->buffer = sldns_buffer_new(bufsize); 4104933707f3Ssthen if(!c->buffer) { 4105933707f3Ssthen free(c->ev); 4106933707f3Ssthen free(c); 4107933707f3Ssthen return NULL; 4108933707f3Ssthen } 4109933707f3Ssthen c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 4110933707f3Ssthen if(!c->timeout) { 41110b68ff31Ssthen sldns_buffer_free(c->buffer); 4112933707f3Ssthen free(c->ev); 4113933707f3Ssthen free(c); 4114933707f3Ssthen return NULL; 4115933707f3Ssthen } 4116933707f3Ssthen c->tcp_is_reading = 0; 4117933707f3Ssthen c->tcp_byte_count = 0; 4118933707f3Ssthen c->tcp_parent = parent; 41192308e98cSsthen c->tcp_timeout_msec = parent->tcp_timeout_msec; 41202308e98cSsthen c->tcp_conn_limit = parent->tcp_conn_limit; 41212308e98cSsthen c->tcl_addr = NULL; 41222308e98cSsthen c->tcp_keepalive = 0; 4123933707f3Ssthen c->max_tcp_count = 0; 4124a58bff56Ssthen c->cur_tcp_count = 0; 4125933707f3Ssthen c->tcp_handlers = NULL; 4126933707f3Ssthen c->tcp_free = NULL; 4127933707f3Ssthen c->type = comm_tcp; 4128933707f3Ssthen c->tcp_do_close = 0; 4129933707f3Ssthen c->do_not_close = 0; 4130933707f3Ssthen c->tcp_do_toggle_rw = 1; 4131933707f3Ssthen c->tcp_check_nb_connect = 0; 413277079be7Ssthen #ifdef USE_MSG_FASTOPEN 413377079be7Ssthen c->tcp_do_fastopen = 0; 413477079be7Ssthen #endif 41352be9e038Ssthen #ifdef USE_DNSCRYPT 41362be9e038Ssthen c->dnscrypt = 0; 41372be9e038Ssthen /* We don't know just yet if this is a dnscrypt channel. Allocation 41382be9e038Ssthen * will be done when handling the callback. */ 41392be9e038Ssthen c->dnscrypt_buffer = c->buffer; 41402be9e038Ssthen #endif 4141933707f3Ssthen c->repinfo.c = c; 4142933707f3Ssthen c->callback = callback; 4143933707f3Ssthen c->cb_arg = callback_arg; 4144191f22c6Ssthen c->socket = socket; 414545872187Ssthen c->pp2_enabled = parent->pp2_enabled; 414645872187Ssthen c->pp2_header_state = pp2_header_none; 4147f6b99bafSsthen if(spoolbuf) { 4148f6b99bafSsthen c->tcp_req_info = tcp_req_info_create(spoolbuf); 4149f6b99bafSsthen if(!c->tcp_req_info) { 4150f6b99bafSsthen log_err("could not create tcp commpoint"); 4151f6b99bafSsthen sldns_buffer_free(c->buffer); 4152f6b99bafSsthen free(c->timeout); 4153f6b99bafSsthen free(c->ev); 4154f6b99bafSsthen free(c); 4155f6b99bafSsthen return NULL; 4156f6b99bafSsthen } 4157f6b99bafSsthen c->tcp_req_info->cp = c; 4158f6b99bafSsthen c->tcp_do_close = 1; 4159f6b99bafSsthen c->tcp_do_toggle_rw = 0; 4160f6b99bafSsthen } 4161933707f3Ssthen /* add to parent free list */ 4162933707f3Ssthen c->tcp_free = parent->tcp_free; 4163933707f3Ssthen parent->tcp_free = c; 41642ee382b6Ssthen /* ub_event stuff */ 41652ee382b6Ssthen evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 41662ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 41672ee382b6Ssthen comm_point_tcp_handle_callback, c); 41682ee382b6Ssthen if(c->ev->ev == NULL) 4169933707f3Ssthen { 4170933707f3Ssthen log_err("could not basetset tcphdl event"); 4171933707f3Ssthen parent->tcp_free = c->tcp_free; 4172f6b99bafSsthen tcp_req_info_delete(c->tcp_req_info); 4173f6b99bafSsthen sldns_buffer_free(c->buffer); 4174f6b99bafSsthen free(c->timeout); 4175933707f3Ssthen free(c->ev); 4176933707f3Ssthen free(c); 4177933707f3Ssthen return NULL; 4178933707f3Ssthen } 4179933707f3Ssthen return c; 4180933707f3Ssthen } 4181933707f3Ssthen 41822c144df0Ssthen static struct comm_point* 41832c144df0Ssthen comm_point_create_http_handler(struct comm_base *base, 41842c144df0Ssthen struct comm_point* parent, size_t bufsize, int harden_large_queries, 41852c144df0Ssthen uint32_t http_max_streams, char* http_endpoint, 4186191f22c6Ssthen comm_point_callback_type* callback, void* callback_arg, 4187191f22c6Ssthen struct unbound_socket* socket) 41882c144df0Ssthen { 41892c144df0Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 41902c144df0Ssthen sizeof(struct comm_point)); 41912c144df0Ssthen short evbits; 41922c144df0Ssthen if(!c) 41932c144df0Ssthen return NULL; 41942c144df0Ssthen c->ev = (struct internal_event*)calloc(1, 41952c144df0Ssthen sizeof(struct internal_event)); 41962c144df0Ssthen if(!c->ev) { 41972c144df0Ssthen free(c); 41982c144df0Ssthen return NULL; 41992c144df0Ssthen } 42002c144df0Ssthen c->ev->base = base; 42012c144df0Ssthen c->fd = -1; 42022c144df0Ssthen c->buffer = sldns_buffer_new(bufsize); 42032c144df0Ssthen if(!c->buffer) { 42042c144df0Ssthen free(c->ev); 42052c144df0Ssthen free(c); 42062c144df0Ssthen return NULL; 42072c144df0Ssthen } 42082c144df0Ssthen c->timeout = (struct timeval*)malloc(sizeof(struct timeval)); 42092c144df0Ssthen if(!c->timeout) { 42102c144df0Ssthen sldns_buffer_free(c->buffer); 42112c144df0Ssthen free(c->ev); 42122c144df0Ssthen free(c); 42132c144df0Ssthen return NULL; 42142c144df0Ssthen } 42152c144df0Ssthen c->tcp_is_reading = 0; 42162c144df0Ssthen c->tcp_byte_count = 0; 42172c144df0Ssthen c->tcp_parent = parent; 42182c144df0Ssthen c->tcp_timeout_msec = parent->tcp_timeout_msec; 42192c144df0Ssthen c->tcp_conn_limit = parent->tcp_conn_limit; 42202c144df0Ssthen c->tcl_addr = NULL; 42212c144df0Ssthen c->tcp_keepalive = 0; 42222c144df0Ssthen c->max_tcp_count = 0; 42232c144df0Ssthen c->cur_tcp_count = 0; 42242c144df0Ssthen c->tcp_handlers = NULL; 42252c144df0Ssthen c->tcp_free = NULL; 42262c144df0Ssthen c->type = comm_http; 42272c144df0Ssthen c->tcp_do_close = 1; 42282c144df0Ssthen c->do_not_close = 0; 42292c144df0Ssthen c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */ 42302c144df0Ssthen c->tcp_check_nb_connect = 0; 42312c144df0Ssthen #ifdef USE_MSG_FASTOPEN 42322c144df0Ssthen c->tcp_do_fastopen = 0; 42332c144df0Ssthen #endif 42342c144df0Ssthen #ifdef USE_DNSCRYPT 42352c144df0Ssthen c->dnscrypt = 0; 42362c144df0Ssthen c->dnscrypt_buffer = NULL; 42372c144df0Ssthen #endif 42382c144df0Ssthen c->repinfo.c = c; 42392c144df0Ssthen c->callback = callback; 42402c144df0Ssthen c->cb_arg = callback_arg; 4241191f22c6Ssthen c->socket = socket; 424245872187Ssthen c->pp2_enabled = 0; 424345872187Ssthen c->pp2_header_state = pp2_header_none; 42442c144df0Ssthen 42452c144df0Ssthen c->http_min_version = http_version_2; 42462c144df0Ssthen c->http2_stream_max_qbuffer_size = bufsize; 42472c144df0Ssthen if(harden_large_queries && bufsize > 512) 42482c144df0Ssthen c->http2_stream_max_qbuffer_size = 512; 42492c144df0Ssthen c->http2_max_streams = http_max_streams; 42502c144df0Ssthen if(!(c->http_endpoint = strdup(http_endpoint))) { 42512c144df0Ssthen log_err("could not strdup http_endpoint"); 42522c144df0Ssthen sldns_buffer_free(c->buffer); 42532c144df0Ssthen free(c->timeout); 42542c144df0Ssthen free(c->ev); 42552c144df0Ssthen free(c); 42562c144df0Ssthen return NULL; 42572c144df0Ssthen } 42582c144df0Ssthen c->use_h2 = 0; 42592c144df0Ssthen #ifdef HAVE_NGHTTP2 42602c144df0Ssthen if(!(c->h2_session = http2_session_create(c))) { 42612c144df0Ssthen log_err("could not create http2 session"); 42622c144df0Ssthen free(c->http_endpoint); 42632c144df0Ssthen sldns_buffer_free(c->buffer); 42642c144df0Ssthen free(c->timeout); 42652c144df0Ssthen free(c->ev); 42662c144df0Ssthen free(c); 42672c144df0Ssthen return NULL; 42682c144df0Ssthen } 42692c144df0Ssthen if(!(c->h2_session->callbacks = http2_req_callbacks_create())) { 42702c144df0Ssthen log_err("could not create http2 callbacks"); 42712c144df0Ssthen http2_session_delete(c->h2_session); 42722c144df0Ssthen free(c->http_endpoint); 42732c144df0Ssthen sldns_buffer_free(c->buffer); 42742c144df0Ssthen free(c->timeout); 42752c144df0Ssthen free(c->ev); 42762c144df0Ssthen free(c); 42772c144df0Ssthen return NULL; 42782c144df0Ssthen } 42792c144df0Ssthen #endif 42802c144df0Ssthen 42812c144df0Ssthen /* add to parent free list */ 42822c144df0Ssthen c->tcp_free = parent->tcp_free; 42832c144df0Ssthen parent->tcp_free = c; 42842c144df0Ssthen /* ub_event stuff */ 42852c144df0Ssthen evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT; 42862c144df0Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 42872c144df0Ssthen comm_point_http_handle_callback, c); 42882c144df0Ssthen if(c->ev->ev == NULL) 42892c144df0Ssthen { 42902c144df0Ssthen log_err("could not set http handler event"); 42912c144df0Ssthen parent->tcp_free = c->tcp_free; 42922c144df0Ssthen http2_session_delete(c->h2_session); 42932c144df0Ssthen sldns_buffer_free(c->buffer); 42942c144df0Ssthen free(c->timeout); 42952c144df0Ssthen free(c->ev); 42962c144df0Ssthen free(c); 42972c144df0Ssthen return NULL; 42982c144df0Ssthen } 42992c144df0Ssthen return c; 43002c144df0Ssthen } 43012c144df0Ssthen 4302933707f3Ssthen struct comm_point* 43032308e98cSsthen comm_point_create_tcp(struct comm_base *base, int fd, int num, 43042c144df0Ssthen int idle_timeout, int harden_large_queries, 43052c144df0Ssthen uint32_t http_max_streams, char* http_endpoint, 43062c144df0Ssthen struct tcl_list* tcp_conn_limit, size_t bufsize, 43072c144df0Ssthen struct sldns_buffer* spoolbuf, enum listen_type port_type, 430845872187Ssthen int pp2_enabled, comm_point_callback_type* callback, 430945872187Ssthen void* callback_arg, struct unbound_socket* socket) 4310933707f3Ssthen { 4311933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4312933707f3Ssthen sizeof(struct comm_point)); 4313933707f3Ssthen short evbits; 4314933707f3Ssthen int i; 4315933707f3Ssthen /* first allocate the TCP accept listener */ 4316933707f3Ssthen if(!c) 4317933707f3Ssthen return NULL; 4318933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4319933707f3Ssthen sizeof(struct internal_event)); 4320933707f3Ssthen if(!c->ev) { 4321933707f3Ssthen free(c); 4322933707f3Ssthen return NULL; 4323933707f3Ssthen } 4324933707f3Ssthen c->ev->base = base; 4325933707f3Ssthen c->fd = fd; 4326933707f3Ssthen c->buffer = NULL; 4327933707f3Ssthen c->timeout = NULL; 4328933707f3Ssthen c->tcp_is_reading = 0; 4329933707f3Ssthen c->tcp_byte_count = 0; 43302308e98cSsthen c->tcp_timeout_msec = idle_timeout; 43312308e98cSsthen c->tcp_conn_limit = tcp_conn_limit; 43322308e98cSsthen c->tcl_addr = NULL; 43332308e98cSsthen c->tcp_keepalive = 0; 4334933707f3Ssthen c->tcp_parent = NULL; 4335933707f3Ssthen c->max_tcp_count = num; 4336a58bff56Ssthen c->cur_tcp_count = 0; 4337933707f3Ssthen c->tcp_handlers = (struct comm_point**)calloc((size_t)num, 4338933707f3Ssthen sizeof(struct comm_point*)); 4339933707f3Ssthen if(!c->tcp_handlers) { 4340933707f3Ssthen free(c->ev); 4341933707f3Ssthen free(c); 4342933707f3Ssthen return NULL; 4343933707f3Ssthen } 4344933707f3Ssthen c->tcp_free = NULL; 4345933707f3Ssthen c->type = comm_tcp_accept; 4346933707f3Ssthen c->tcp_do_close = 0; 4347933707f3Ssthen c->do_not_close = 0; 4348933707f3Ssthen c->tcp_do_toggle_rw = 0; 4349933707f3Ssthen c->tcp_check_nb_connect = 0; 435077079be7Ssthen #ifdef USE_MSG_FASTOPEN 435177079be7Ssthen c->tcp_do_fastopen = 0; 435277079be7Ssthen #endif 43532be9e038Ssthen #ifdef USE_DNSCRYPT 43542be9e038Ssthen c->dnscrypt = 0; 43552be9e038Ssthen c->dnscrypt_buffer = NULL; 43562be9e038Ssthen #endif 4357933707f3Ssthen c->callback = NULL; 4358933707f3Ssthen c->cb_arg = NULL; 4359191f22c6Ssthen c->socket = socket; 436045872187Ssthen c->pp2_enabled = (port_type==listen_type_http?0:pp2_enabled); 436145872187Ssthen c->pp2_header_state = pp2_header_none; 43622ee382b6Ssthen evbits = UB_EV_READ | UB_EV_PERSIST; 43632ee382b6Ssthen /* ub_event stuff */ 43642ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 43652ee382b6Ssthen comm_point_tcp_accept_callback, c); 43662ee382b6Ssthen if(c->ev->ev == NULL) { 43672ee382b6Ssthen log_err("could not baseset tcpacc event"); 43682ee382b6Ssthen comm_point_delete(c); 43692ee382b6Ssthen return NULL; 43702ee382b6Ssthen } 43712ee382b6Ssthen if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4372933707f3Ssthen log_err("could not add tcpacc event"); 4373933707f3Ssthen comm_point_delete(c); 4374933707f3Ssthen return NULL; 4375933707f3Ssthen } 43769982a05dSsthen c->event_added = 1; 43772c144df0Ssthen /* now prealloc the handlers */ 4378933707f3Ssthen for(i=0; i<num; i++) { 43792c144df0Ssthen if(port_type == listen_type_tcp || 43802c144df0Ssthen port_type == listen_type_ssl || 43812c144df0Ssthen port_type == listen_type_tcp_dnscrypt) { 4382933707f3Ssthen c->tcp_handlers[i] = comm_point_create_tcp_handler(base, 4383191f22c6Ssthen c, bufsize, spoolbuf, callback, callback_arg, socket); 43842c144df0Ssthen } else if(port_type == listen_type_http) { 43852c144df0Ssthen c->tcp_handlers[i] = comm_point_create_http_handler( 43862c144df0Ssthen base, c, bufsize, harden_large_queries, 43872c144df0Ssthen http_max_streams, http_endpoint, 4388191f22c6Ssthen callback, callback_arg, socket); 43892c144df0Ssthen } 43902c144df0Ssthen else { 43912c144df0Ssthen log_err("could not create tcp handler, unknown listen " 43922c144df0Ssthen "type"); 43932c144df0Ssthen return NULL; 43942c144df0Ssthen } 4395933707f3Ssthen if(!c->tcp_handlers[i]) { 4396933707f3Ssthen comm_point_delete(c); 4397933707f3Ssthen return NULL; 4398933707f3Ssthen } 4399933707f3Ssthen } 4400933707f3Ssthen 4401933707f3Ssthen return c; 4402933707f3Ssthen } 4403933707f3Ssthen 4404933707f3Ssthen struct comm_point* 4405933707f3Ssthen comm_point_create_tcp_out(struct comm_base *base, size_t bufsize, 440677079be7Ssthen comm_point_callback_type* callback, void* callback_arg) 4407933707f3Ssthen { 4408933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4409933707f3Ssthen sizeof(struct comm_point)); 4410933707f3Ssthen short evbits; 4411933707f3Ssthen if(!c) 4412933707f3Ssthen return NULL; 4413933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4414933707f3Ssthen sizeof(struct internal_event)); 4415933707f3Ssthen if(!c->ev) { 4416933707f3Ssthen free(c); 4417933707f3Ssthen return NULL; 4418933707f3Ssthen } 4419933707f3Ssthen c->ev->base = base; 4420933707f3Ssthen c->fd = -1; 44210b68ff31Ssthen c->buffer = sldns_buffer_new(bufsize); 4422933707f3Ssthen if(!c->buffer) { 4423933707f3Ssthen free(c->ev); 4424933707f3Ssthen free(c); 4425933707f3Ssthen return NULL; 4426933707f3Ssthen } 4427933707f3Ssthen c->timeout = NULL; 4428933707f3Ssthen c->tcp_is_reading = 0; 4429933707f3Ssthen c->tcp_byte_count = 0; 44302308e98cSsthen c->tcp_timeout_msec = TCP_QUERY_TIMEOUT; 44312308e98cSsthen c->tcp_conn_limit = NULL; 44322308e98cSsthen c->tcl_addr = NULL; 44332308e98cSsthen c->tcp_keepalive = 0; 4434933707f3Ssthen c->tcp_parent = NULL; 4435933707f3Ssthen c->max_tcp_count = 0; 4436a58bff56Ssthen c->cur_tcp_count = 0; 4437933707f3Ssthen c->tcp_handlers = NULL; 4438933707f3Ssthen c->tcp_free = NULL; 4439933707f3Ssthen c->type = comm_tcp; 4440933707f3Ssthen c->tcp_do_close = 0; 4441933707f3Ssthen c->do_not_close = 0; 4442933707f3Ssthen c->tcp_do_toggle_rw = 1; 4443933707f3Ssthen c->tcp_check_nb_connect = 1; 444477079be7Ssthen #ifdef USE_MSG_FASTOPEN 444577079be7Ssthen c->tcp_do_fastopen = 1; 444677079be7Ssthen #endif 44472be9e038Ssthen #ifdef USE_DNSCRYPT 44482be9e038Ssthen c->dnscrypt = 0; 44492be9e038Ssthen c->dnscrypt_buffer = c->buffer; 44502be9e038Ssthen #endif 4451933707f3Ssthen c->repinfo.c = c; 4452933707f3Ssthen c->callback = callback; 4453933707f3Ssthen c->cb_arg = callback_arg; 445445872187Ssthen c->pp2_enabled = 0; 445545872187Ssthen c->pp2_header_state = pp2_header_none; 44562ee382b6Ssthen evbits = UB_EV_PERSIST | UB_EV_WRITE; 44572ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 44582ee382b6Ssthen comm_point_tcp_handle_callback, c); 44592ee382b6Ssthen if(c->ev->ev == NULL) 4460933707f3Ssthen { 44612ee382b6Ssthen log_err("could not baseset tcpout event"); 44620b68ff31Ssthen sldns_buffer_free(c->buffer); 4463933707f3Ssthen free(c->ev); 4464933707f3Ssthen free(c); 4465933707f3Ssthen return NULL; 4466933707f3Ssthen } 4467933707f3Ssthen 4468933707f3Ssthen return c; 4469933707f3Ssthen } 4470933707f3Ssthen 4471933707f3Ssthen struct comm_point* 4472938a3a5eSflorian comm_point_create_http_out(struct comm_base *base, size_t bufsize, 4473938a3a5eSflorian comm_point_callback_type* callback, void* callback_arg, 4474938a3a5eSflorian sldns_buffer* temp) 4475938a3a5eSflorian { 4476938a3a5eSflorian struct comm_point* c = (struct comm_point*)calloc(1, 4477938a3a5eSflorian sizeof(struct comm_point)); 4478938a3a5eSflorian short evbits; 4479938a3a5eSflorian if(!c) 4480938a3a5eSflorian return NULL; 4481938a3a5eSflorian c->ev = (struct internal_event*)calloc(1, 4482938a3a5eSflorian sizeof(struct internal_event)); 4483938a3a5eSflorian if(!c->ev) { 4484938a3a5eSflorian free(c); 4485938a3a5eSflorian return NULL; 4486938a3a5eSflorian } 4487938a3a5eSflorian c->ev->base = base; 4488938a3a5eSflorian c->fd = -1; 4489938a3a5eSflorian c->buffer = sldns_buffer_new(bufsize); 4490938a3a5eSflorian if(!c->buffer) { 4491938a3a5eSflorian free(c->ev); 4492938a3a5eSflorian free(c); 4493938a3a5eSflorian return NULL; 4494938a3a5eSflorian } 4495938a3a5eSflorian c->timeout = NULL; 4496938a3a5eSflorian c->tcp_is_reading = 0; 4497938a3a5eSflorian c->tcp_byte_count = 0; 4498938a3a5eSflorian c->tcp_parent = NULL; 4499938a3a5eSflorian c->max_tcp_count = 0; 4500938a3a5eSflorian c->cur_tcp_count = 0; 4501938a3a5eSflorian c->tcp_handlers = NULL; 4502938a3a5eSflorian c->tcp_free = NULL; 4503938a3a5eSflorian c->type = comm_http; 4504938a3a5eSflorian c->tcp_do_close = 0; 4505938a3a5eSflorian c->do_not_close = 0; 4506938a3a5eSflorian c->tcp_do_toggle_rw = 1; 4507938a3a5eSflorian c->tcp_check_nb_connect = 1; 4508938a3a5eSflorian c->http_in_headers = 1; 4509938a3a5eSflorian c->http_in_chunk_headers = 0; 4510938a3a5eSflorian c->http_is_chunked = 0; 4511938a3a5eSflorian c->http_temp = temp; 4512938a3a5eSflorian #ifdef USE_MSG_FASTOPEN 4513938a3a5eSflorian c->tcp_do_fastopen = 1; 4514938a3a5eSflorian #endif 4515938a3a5eSflorian #ifdef USE_DNSCRYPT 4516938a3a5eSflorian c->dnscrypt = 0; 4517938a3a5eSflorian c->dnscrypt_buffer = c->buffer; 4518938a3a5eSflorian #endif 4519938a3a5eSflorian c->repinfo.c = c; 4520938a3a5eSflorian c->callback = callback; 4521938a3a5eSflorian c->cb_arg = callback_arg; 452245872187Ssthen c->pp2_enabled = 0; 452345872187Ssthen c->pp2_header_state = pp2_header_none; 4524938a3a5eSflorian evbits = UB_EV_PERSIST | UB_EV_WRITE; 4525938a3a5eSflorian c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 4526938a3a5eSflorian comm_point_http_handle_callback, c); 4527938a3a5eSflorian if(c->ev->ev == NULL) 4528938a3a5eSflorian { 4529938a3a5eSflorian log_err("could not baseset tcpout event"); 4530938a3a5eSflorian #ifdef HAVE_SSL 4531938a3a5eSflorian SSL_free(c->ssl); 4532938a3a5eSflorian #endif 4533938a3a5eSflorian sldns_buffer_free(c->buffer); 4534938a3a5eSflorian free(c->ev); 4535938a3a5eSflorian free(c); 4536938a3a5eSflorian return NULL; 4537938a3a5eSflorian } 4538938a3a5eSflorian 4539938a3a5eSflorian return c; 4540938a3a5eSflorian } 4541938a3a5eSflorian 4542938a3a5eSflorian struct comm_point* 4543933707f3Ssthen comm_point_create_local(struct comm_base *base, int fd, size_t bufsize, 454477079be7Ssthen comm_point_callback_type* callback, void* callback_arg) 4545933707f3Ssthen { 4546933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4547933707f3Ssthen sizeof(struct comm_point)); 4548933707f3Ssthen short evbits; 4549933707f3Ssthen if(!c) 4550933707f3Ssthen return NULL; 4551933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4552933707f3Ssthen sizeof(struct internal_event)); 4553933707f3Ssthen if(!c->ev) { 4554933707f3Ssthen free(c); 4555933707f3Ssthen return NULL; 4556933707f3Ssthen } 4557933707f3Ssthen c->ev->base = base; 4558933707f3Ssthen c->fd = fd; 45590b68ff31Ssthen c->buffer = sldns_buffer_new(bufsize); 4560933707f3Ssthen if(!c->buffer) { 4561933707f3Ssthen free(c->ev); 4562933707f3Ssthen free(c); 4563933707f3Ssthen return NULL; 4564933707f3Ssthen } 4565933707f3Ssthen c->timeout = NULL; 4566933707f3Ssthen c->tcp_is_reading = 1; 4567933707f3Ssthen c->tcp_byte_count = 0; 4568933707f3Ssthen c->tcp_parent = NULL; 4569933707f3Ssthen c->max_tcp_count = 0; 4570a58bff56Ssthen c->cur_tcp_count = 0; 4571933707f3Ssthen c->tcp_handlers = NULL; 4572933707f3Ssthen c->tcp_free = NULL; 4573933707f3Ssthen c->type = comm_local; 4574933707f3Ssthen c->tcp_do_close = 0; 4575933707f3Ssthen c->do_not_close = 1; 4576933707f3Ssthen c->tcp_do_toggle_rw = 0; 4577933707f3Ssthen c->tcp_check_nb_connect = 0; 457877079be7Ssthen #ifdef USE_MSG_FASTOPEN 457977079be7Ssthen c->tcp_do_fastopen = 0; 458077079be7Ssthen #endif 45812be9e038Ssthen #ifdef USE_DNSCRYPT 45822be9e038Ssthen c->dnscrypt = 0; 45832be9e038Ssthen c->dnscrypt_buffer = c->buffer; 45842be9e038Ssthen #endif 4585933707f3Ssthen c->callback = callback; 4586933707f3Ssthen c->cb_arg = callback_arg; 458745872187Ssthen c->pp2_enabled = 0; 458845872187Ssthen c->pp2_header_state = pp2_header_none; 45892ee382b6Ssthen /* ub_event stuff */ 45902ee382b6Ssthen evbits = UB_EV_PERSIST | UB_EV_READ; 45912ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 45922ee382b6Ssthen comm_point_local_handle_callback, c); 45932ee382b6Ssthen if(c->ev->ev == NULL) { 45942ee382b6Ssthen log_err("could not baseset localhdl event"); 45952ee382b6Ssthen free(c->ev); 45962ee382b6Ssthen free(c); 45972ee382b6Ssthen return NULL; 45982ee382b6Ssthen } 45992ee382b6Ssthen if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4600933707f3Ssthen log_err("could not add localhdl event"); 46012ee382b6Ssthen ub_event_free(c->ev->ev); 4602933707f3Ssthen free(c->ev); 4603933707f3Ssthen free(c); 4604933707f3Ssthen return NULL; 4605933707f3Ssthen } 46069982a05dSsthen c->event_added = 1; 4607933707f3Ssthen return c; 4608933707f3Ssthen } 4609933707f3Ssthen 4610933707f3Ssthen struct comm_point* 4611933707f3Ssthen comm_point_create_raw(struct comm_base* base, int fd, int writing, 461277079be7Ssthen comm_point_callback_type* callback, void* callback_arg) 4613933707f3Ssthen { 4614933707f3Ssthen struct comm_point* c = (struct comm_point*)calloc(1, 4615933707f3Ssthen sizeof(struct comm_point)); 4616933707f3Ssthen short evbits; 4617933707f3Ssthen if(!c) 4618933707f3Ssthen return NULL; 4619933707f3Ssthen c->ev = (struct internal_event*)calloc(1, 4620933707f3Ssthen sizeof(struct internal_event)); 4621933707f3Ssthen if(!c->ev) { 4622933707f3Ssthen free(c); 4623933707f3Ssthen return NULL; 4624933707f3Ssthen } 4625933707f3Ssthen c->ev->base = base; 4626933707f3Ssthen c->fd = fd; 4627933707f3Ssthen c->buffer = NULL; 4628933707f3Ssthen c->timeout = NULL; 4629933707f3Ssthen c->tcp_is_reading = 0; 4630933707f3Ssthen c->tcp_byte_count = 0; 4631933707f3Ssthen c->tcp_parent = NULL; 4632933707f3Ssthen c->max_tcp_count = 0; 4633a58bff56Ssthen c->cur_tcp_count = 0; 4634933707f3Ssthen c->tcp_handlers = NULL; 4635933707f3Ssthen c->tcp_free = NULL; 4636933707f3Ssthen c->type = comm_raw; 4637933707f3Ssthen c->tcp_do_close = 0; 4638933707f3Ssthen c->do_not_close = 1; 4639933707f3Ssthen c->tcp_do_toggle_rw = 0; 4640933707f3Ssthen c->tcp_check_nb_connect = 0; 464177079be7Ssthen #ifdef USE_MSG_FASTOPEN 464277079be7Ssthen c->tcp_do_fastopen = 0; 464377079be7Ssthen #endif 46442be9e038Ssthen #ifdef USE_DNSCRYPT 46452be9e038Ssthen c->dnscrypt = 0; 46462be9e038Ssthen c->dnscrypt_buffer = c->buffer; 46472be9e038Ssthen #endif 4648933707f3Ssthen c->callback = callback; 4649933707f3Ssthen c->cb_arg = callback_arg; 465045872187Ssthen c->pp2_enabled = 0; 465145872187Ssthen c->pp2_header_state = pp2_header_none; 46522ee382b6Ssthen /* ub_event stuff */ 4653933707f3Ssthen if(writing) 46542ee382b6Ssthen evbits = UB_EV_PERSIST | UB_EV_WRITE; 46552ee382b6Ssthen else evbits = UB_EV_PERSIST | UB_EV_READ; 46562ee382b6Ssthen c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, 46572ee382b6Ssthen comm_point_raw_handle_callback, c); 46582ee382b6Ssthen if(c->ev->ev == NULL) { 46592ee382b6Ssthen log_err("could not baseset rawhdl event"); 46602ee382b6Ssthen free(c->ev); 46612ee382b6Ssthen free(c); 46622ee382b6Ssthen return NULL; 46632ee382b6Ssthen } 46642ee382b6Ssthen if (ub_event_add(c->ev->ev, c->timeout) != 0) { 4665933707f3Ssthen log_err("could not add rawhdl event"); 46662ee382b6Ssthen ub_event_free(c->ev->ev); 4667933707f3Ssthen free(c->ev); 4668933707f3Ssthen free(c); 4669933707f3Ssthen return NULL; 4670933707f3Ssthen } 46719982a05dSsthen c->event_added = 1; 4672933707f3Ssthen return c; 4673933707f3Ssthen } 4674933707f3Ssthen 4675933707f3Ssthen void 4676933707f3Ssthen comm_point_close(struct comm_point* c) 4677933707f3Ssthen { 4678933707f3Ssthen if(!c) 4679933707f3Ssthen return; 468020237c55Ssthen if(c->fd != -1) { 4681eba819a2Ssthen verbose(5, "comm_point_close of %d: event_del", c->fd); 46829982a05dSsthen if(c->event_added) { 46832ee382b6Ssthen if(ub_event_del(c->ev->ev) != 0) { 4684933707f3Ssthen log_err("could not event_del on close"); 4685933707f3Ssthen } 46869982a05dSsthen c->event_added = 0; 46879982a05dSsthen } 468820237c55Ssthen } 46892308e98cSsthen tcl_close_connection(c->tcl_addr); 4690f6b99bafSsthen if(c->tcp_req_info) 4691f6b99bafSsthen tcp_req_info_clear(c->tcp_req_info); 46922c144df0Ssthen if(c->h2_session) 46932c144df0Ssthen http2_session_server_delete(c->h2_session); 46948b7325afSsthen /* stop the comm point from reading or writing after it is closed. */ 46958b7325afSsthen if(c->tcp_more_read_again && *c->tcp_more_read_again) 46968b7325afSsthen *c->tcp_more_read_again = 0; 46978b7325afSsthen if(c->tcp_more_write_again && *c->tcp_more_write_again) 46988b7325afSsthen *c->tcp_more_write_again = 0; 46992c144df0Ssthen 4700933707f3Ssthen /* close fd after removing from event lists, or epoll.. is messed up */ 4701933707f3Ssthen if(c->fd != -1 && !c->do_not_close) { 4702191f22c6Ssthen #ifdef USE_WINSOCK 470320237c55Ssthen if(c->type == comm_tcp || c->type == comm_http) { 470420237c55Ssthen /* delete sticky events for the fd, it gets closed */ 470520237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ); 470620237c55Ssthen ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE); 470720237c55Ssthen } 4708191f22c6Ssthen #endif 4709933707f3Ssthen verbose(VERB_ALGO, "close fd %d", c->fd); 47102c144df0Ssthen sock_close(c->fd); 4711933707f3Ssthen } 4712933707f3Ssthen c->fd = -1; 4713933707f3Ssthen } 4714933707f3Ssthen 4715933707f3Ssthen void 4716933707f3Ssthen comm_point_delete(struct comm_point* c) 4717933707f3Ssthen { 4718933707f3Ssthen if(!c) 4719933707f3Ssthen return; 4720938a3a5eSflorian if((c->type == comm_tcp || c->type == comm_http) && c->ssl) { 4721cebdf579Ssthen #ifdef HAVE_SSL 4722933707f3Ssthen SSL_shutdown(c->ssl); 4723933707f3Ssthen SSL_free(c->ssl); 4724cebdf579Ssthen #endif 4725933707f3Ssthen } 47262c144df0Ssthen if(c->type == comm_http && c->http_endpoint) { 47272c144df0Ssthen free(c->http_endpoint); 47282c144df0Ssthen c->http_endpoint = NULL; 47292c144df0Ssthen } 4730933707f3Ssthen comm_point_close(c); 4731933707f3Ssthen if(c->tcp_handlers) { 4732933707f3Ssthen int i; 4733933707f3Ssthen for(i=0; i<c->max_tcp_count; i++) 4734933707f3Ssthen comm_point_delete(c->tcp_handlers[i]); 4735933707f3Ssthen free(c->tcp_handlers); 4736933707f3Ssthen } 4737933707f3Ssthen free(c->timeout); 4738938a3a5eSflorian if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) { 47390b68ff31Ssthen sldns_buffer_free(c->buffer); 47402be9e038Ssthen #ifdef USE_DNSCRYPT 47412be9e038Ssthen if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) { 47422be9e038Ssthen sldns_buffer_free(c->dnscrypt_buffer); 47432be9e038Ssthen } 47442be9e038Ssthen #endif 4745f6b99bafSsthen if(c->tcp_req_info) { 4746f6b99bafSsthen tcp_req_info_delete(c->tcp_req_info); 4747f6b99bafSsthen } 47482c144df0Ssthen if(c->h2_session) { 47492c144df0Ssthen http2_session_delete(c->h2_session); 47502c144df0Ssthen } 47512be9e038Ssthen } 47522ee382b6Ssthen ub_event_free(c->ev->ev); 4753933707f3Ssthen free(c->ev); 4754933707f3Ssthen free(c); 4755933707f3Ssthen } 4756933707f3Ssthen 4757933707f3Ssthen void 4758933707f3Ssthen comm_point_send_reply(struct comm_reply *repinfo) 4759933707f3Ssthen { 47602be9e038Ssthen struct sldns_buffer* buffer; 4761933707f3Ssthen log_assert(repinfo && repinfo->c); 47622be9e038Ssthen #ifdef USE_DNSCRYPT 47632be9e038Ssthen buffer = repinfo->c->dnscrypt_buffer; 47642be9e038Ssthen if(!dnsc_handle_uncurved_request(repinfo)) { 47652be9e038Ssthen return; 47662be9e038Ssthen } 47672be9e038Ssthen #else 47682be9e038Ssthen buffer = repinfo->c->buffer; 47692be9e038Ssthen #endif 4770933707f3Ssthen if(repinfo->c->type == comm_udp) { 4771933707f3Ssthen if(repinfo->srctype) 477245872187Ssthen comm_point_send_udp_msg_if(repinfo->c, buffer, 477345872187Ssthen (struct sockaddr*)&repinfo->remote_addr, 477445872187Ssthen repinfo->remote_addrlen, repinfo); 4775933707f3Ssthen else 47762be9e038Ssthen comm_point_send_udp_msg(repinfo->c, buffer, 477745872187Ssthen (struct sockaddr*)&repinfo->remote_addr, 477845872187Ssthen repinfo->remote_addrlen, 0); 4779e10d3884Sbrad #ifdef USE_DNSTAP 4780191f22c6Ssthen /* 4781191f22c6Ssthen * sending src (client)/dst (local service) addresses over DNSTAP from udp callback 4782191f22c6Ssthen */ 4783191f22c6Ssthen if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) { 47842bdc0ed1Ssthen log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); 478545872187Ssthen log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 47862bdc0ed1Ssthen dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, repinfo->c->buffer); 4787191f22c6Ssthen } 4788e10d3884Sbrad #endif 4789933707f3Ssthen } else { 4790e10d3884Sbrad #ifdef USE_DNSTAP 4791191f22c6Ssthen /* 4792191f22c6Ssthen * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback 4793191f22c6Ssthen */ 4794191f22c6Ssthen if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) { 47952bdc0ed1Ssthen log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr, repinfo->c->socket->addrlen); 479645872187Ssthen log_addr(VERB_ALGO, "response to client", &repinfo->client_addr, repinfo->client_addrlen); 47972bdc0ed1Ssthen dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->client_addr, (void*)repinfo->c->socket->addr, repinfo->c->type, repinfo->c->ssl, 4798191f22c6Ssthen ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer )); 4799191f22c6Ssthen } 4800e10d3884Sbrad #endif 4801f6b99bafSsthen if(repinfo->c->tcp_req_info) { 4802f6b99bafSsthen tcp_req_info_send_reply(repinfo->c->tcp_req_info); 48032c144df0Ssthen } else if(repinfo->c->use_h2) { 48042c144df0Ssthen if(!http2_submit_dns_response(repinfo->c->h2_session)) { 48052c144df0Ssthen comm_point_drop_reply(repinfo); 48062c144df0Ssthen return; 48072c144df0Ssthen } 48082c144df0Ssthen repinfo->c->h2_stream = NULL; 48092c144df0Ssthen repinfo->c->tcp_is_reading = 0; 48102c144df0Ssthen comm_point_stop_listening(repinfo->c); 48112c144df0Ssthen comm_point_start_listening(repinfo->c, -1, 48129982a05dSsthen adjusted_tcp_timeout(repinfo->c)); 48132c144df0Ssthen return; 4814f6b99bafSsthen } else { 481577079be7Ssthen comm_point_start_listening(repinfo->c, -1, 48169982a05dSsthen adjusted_tcp_timeout(repinfo->c)); 4817933707f3Ssthen } 4818933707f3Ssthen } 4819f6b99bafSsthen } 4820933707f3Ssthen 4821933707f3Ssthen void 4822933707f3Ssthen comm_point_drop_reply(struct comm_reply* repinfo) 4823933707f3Ssthen { 4824933707f3Ssthen if(!repinfo) 4825933707f3Ssthen return; 4826ebf5bb73Ssthen log_assert(repinfo->c); 4827933707f3Ssthen log_assert(repinfo->c->type != comm_tcp_accept); 4828933707f3Ssthen if(repinfo->c->type == comm_udp) 4829933707f3Ssthen return; 4830f6b99bafSsthen if(repinfo->c->tcp_req_info) 4831f6b99bafSsthen repinfo->c->tcp_req_info->is_drop = 1; 48322c144df0Ssthen if(repinfo->c->type == comm_http) { 48332c144df0Ssthen if(repinfo->c->h2_session) { 48342c144df0Ssthen repinfo->c->h2_session->is_drop = 1; 48352c144df0Ssthen if(!repinfo->c->h2_session->postpone_drop) 48362c144df0Ssthen reclaim_http_handler(repinfo->c); 48372c144df0Ssthen return; 48382c144df0Ssthen } 48392c144df0Ssthen reclaim_http_handler(repinfo->c); 48402c144df0Ssthen return; 48412c144df0Ssthen } 4842933707f3Ssthen reclaim_tcp_handler(repinfo->c); 4843933707f3Ssthen } 4844933707f3Ssthen 4845933707f3Ssthen void 4846933707f3Ssthen comm_point_stop_listening(struct comm_point* c) 4847933707f3Ssthen { 4848933707f3Ssthen verbose(VERB_ALGO, "comm point stop listening %d", c->fd); 48499982a05dSsthen if(c->event_added) { 48502ee382b6Ssthen if(ub_event_del(c->ev->ev) != 0) { 4851933707f3Ssthen log_err("event_del error to stoplisten"); 4852933707f3Ssthen } 48539982a05dSsthen c->event_added = 0; 48549982a05dSsthen } 4855933707f3Ssthen } 4856933707f3Ssthen 4857933707f3Ssthen void 485877079be7Ssthen comm_point_start_listening(struct comm_point* c, int newfd, int msec) 4859933707f3Ssthen { 4860550cf4a9Ssthen verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 4861550cf4a9Ssthen c->fd==-1?newfd:c->fd, msec); 4862933707f3Ssthen if(c->type == comm_tcp_accept && !c->tcp_free) { 4863933707f3Ssthen /* no use to start listening no free slots. */ 4864933707f3Ssthen return; 4865933707f3Ssthen } 48669982a05dSsthen if(c->event_added) { 48679982a05dSsthen if(ub_event_del(c->ev->ev) != 0) { 48689982a05dSsthen log_err("event_del error to startlisten"); 48699982a05dSsthen } 48709982a05dSsthen c->event_added = 0; 48719982a05dSsthen } 487277079be7Ssthen if(msec != -1 && msec != 0) { 4873933707f3Ssthen if(!c->timeout) { 4874933707f3Ssthen c->timeout = (struct timeval*)malloc(sizeof( 4875933707f3Ssthen struct timeval)); 4876933707f3Ssthen if(!c->timeout) { 4877933707f3Ssthen log_err("cpsl: malloc failed. No net read."); 4878933707f3Ssthen return; 4879933707f3Ssthen } 4880933707f3Ssthen } 48812ee382b6Ssthen ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT); 4882933707f3Ssthen #ifndef S_SPLINT_S /* splint fails on struct timeval. */ 488377079be7Ssthen c->timeout->tv_sec = msec/1000; 488477079be7Ssthen c->timeout->tv_usec = (msec%1000)*1000; 4885933707f3Ssthen #endif /* S_SPLINT_S */ 4886e21c60efSsthen } else { 4887e21c60efSsthen if(msec == 0 || !c->timeout) { 4888e21c60efSsthen ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4889e21c60efSsthen } 4890933707f3Ssthen } 4891938a3a5eSflorian if(c->type == comm_tcp || c->type == comm_http) { 48922ee382b6Ssthen ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4893eba819a2Ssthen if(c->tcp_write_and_read) { 4894eba819a2Ssthen verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd)); 4895eba819a2Ssthen ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 4896eba819a2Ssthen } else if(c->tcp_is_reading) { 4897eba819a2Ssthen verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd)); 48982ee382b6Ssthen ub_event_add_bits(c->ev->ev, UB_EV_READ); 4899eba819a2Ssthen } else { 4900eba819a2Ssthen verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd)); 4901eba819a2Ssthen ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 4902eba819a2Ssthen } 4903933707f3Ssthen } 4904933707f3Ssthen if(newfd != -1) { 4905eba819a2Ssthen if(c->fd != -1 && c->fd != newfd) { 4906eba819a2Ssthen verbose(5, "cpsl close of fd %d for %d", c->fd, newfd); 49072c144df0Ssthen sock_close(c->fd); 4908933707f3Ssthen } 4909933707f3Ssthen c->fd = newfd; 49102ee382b6Ssthen ub_event_set_fd(c->ev->ev, c->fd); 4911933707f3Ssthen } 491277079be7Ssthen if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) { 4913933707f3Ssthen log_err("event_add failed. in cpsl."); 4914e21c60efSsthen return; 4915933707f3Ssthen } 49169982a05dSsthen c->event_added = 1; 4917933707f3Ssthen } 4918933707f3Ssthen 4919933707f3Ssthen void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr) 4920933707f3Ssthen { 4921933707f3Ssthen verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr); 49229982a05dSsthen if(c->event_added) { 49232ee382b6Ssthen if(ub_event_del(c->ev->ev) != 0) { 4924933707f3Ssthen log_err("event_del error to cplf"); 4925933707f3Ssthen } 49269982a05dSsthen c->event_added = 0; 49279982a05dSsthen } 4928e21c60efSsthen if(!c->timeout) { 4929e21c60efSsthen ub_event_del_bits(c->ev->ev, UB_EV_TIMEOUT); 4930e21c60efSsthen } 49312ee382b6Ssthen ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE); 49322ee382b6Ssthen if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ); 49332ee382b6Ssthen if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE); 49342ee382b6Ssthen if(ub_event_add(c->ev->ev, c->timeout) != 0) { 4935933707f3Ssthen log_err("event_add failed. in cplf."); 4936e21c60efSsthen return; 4937933707f3Ssthen } 49389982a05dSsthen c->event_added = 1; 4939933707f3Ssthen } 4940933707f3Ssthen 4941933707f3Ssthen size_t comm_point_get_mem(struct comm_point* c) 4942933707f3Ssthen { 4943933707f3Ssthen size_t s; 4944933707f3Ssthen if(!c) 4945933707f3Ssthen return 0; 4946933707f3Ssthen s = sizeof(*c) + sizeof(*c->ev); 4947933707f3Ssthen if(c->timeout) 4948933707f3Ssthen s += sizeof(*c->timeout); 49492be9e038Ssthen if(c->type == comm_tcp || c->type == comm_local) { 49500b68ff31Ssthen s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer); 49512be9e038Ssthen #ifdef USE_DNSCRYPT 49522be9e038Ssthen s += sizeof(*c->dnscrypt_buffer); 49532be9e038Ssthen if(c->buffer != c->dnscrypt_buffer) { 49542be9e038Ssthen s += sldns_buffer_capacity(c->dnscrypt_buffer); 49552be9e038Ssthen } 49562be9e038Ssthen #endif 49572be9e038Ssthen } 4958933707f3Ssthen if(c->type == comm_tcp_accept) { 4959933707f3Ssthen int i; 4960933707f3Ssthen for(i=0; i<c->max_tcp_count; i++) 4961933707f3Ssthen s += comm_point_get_mem(c->tcp_handlers[i]); 4962933707f3Ssthen } 4963933707f3Ssthen return s; 4964933707f3Ssthen } 4965933707f3Ssthen 4966933707f3Ssthen struct comm_timer* 4967933707f3Ssthen comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg) 4968933707f3Ssthen { 49692ee382b6Ssthen struct internal_timer *tm = (struct internal_timer*)calloc(1, 4970933707f3Ssthen sizeof(struct internal_timer)); 49712ee382b6Ssthen if(!tm) { 4972933707f3Ssthen log_err("malloc failed"); 4973933707f3Ssthen return NULL; 4974933707f3Ssthen } 49752ee382b6Ssthen tm->super.ev_timer = tm; 49762ee382b6Ssthen tm->base = base; 49772ee382b6Ssthen tm->super.callback = cb; 49782ee382b6Ssthen tm->super.cb_arg = cb_arg; 49792ee382b6Ssthen tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 49802ee382b6Ssthen comm_timer_callback, &tm->super); 49812ee382b6Ssthen if(tm->ev == NULL) { 4982933707f3Ssthen log_err("timer_create: event_base_set failed."); 4983933707f3Ssthen free(tm); 4984933707f3Ssthen return NULL; 4985933707f3Ssthen } 49862ee382b6Ssthen return &tm->super; 4987933707f3Ssthen } 4988933707f3Ssthen 4989933707f3Ssthen void 4990933707f3Ssthen comm_timer_disable(struct comm_timer* timer) 4991933707f3Ssthen { 4992933707f3Ssthen if(!timer) 4993933707f3Ssthen return; 49942ee382b6Ssthen ub_timer_del(timer->ev_timer->ev); 4995933707f3Ssthen timer->ev_timer->enabled = 0; 4996933707f3Ssthen } 4997933707f3Ssthen 4998933707f3Ssthen void 4999933707f3Ssthen comm_timer_set(struct comm_timer* timer, struct timeval* tv) 5000933707f3Ssthen { 5001933707f3Ssthen log_assert(tv); 5002933707f3Ssthen if(timer->ev_timer->enabled) 5003933707f3Ssthen comm_timer_disable(timer); 50042ee382b6Ssthen if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base, 50052ee382b6Ssthen comm_timer_callback, timer, tv) != 0) 5006933707f3Ssthen log_err("comm_timer_set: evtimer_add failed."); 5007933707f3Ssthen timer->ev_timer->enabled = 1; 5008933707f3Ssthen } 5009933707f3Ssthen 5010933707f3Ssthen void 5011933707f3Ssthen comm_timer_delete(struct comm_timer* timer) 5012933707f3Ssthen { 5013933707f3Ssthen if(!timer) 5014933707f3Ssthen return; 5015933707f3Ssthen comm_timer_disable(timer); 50162ee382b6Ssthen /* Free the sub struct timer->ev_timer derived from the super struct timer. 50172ee382b6Ssthen * i.e. assert(timer == timer->ev_timer) 50182ee382b6Ssthen */ 50192ee382b6Ssthen ub_event_free(timer->ev_timer->ev); 5020933707f3Ssthen free(timer->ev_timer); 5021933707f3Ssthen } 5022933707f3Ssthen 5023933707f3Ssthen void 5024933707f3Ssthen comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg) 5025933707f3Ssthen { 5026933707f3Ssthen struct comm_timer* tm = (struct comm_timer*)arg; 50272ee382b6Ssthen if(!(event&UB_EV_TIMEOUT)) 5028933707f3Ssthen return; 50292ee382b6Ssthen ub_comm_base_now(tm->ev_timer->base); 5030933707f3Ssthen tm->ev_timer->enabled = 0; 5031933707f3Ssthen fptr_ok(fptr_whitelist_comm_timer(tm->callback)); 5032933707f3Ssthen (*tm->callback)(tm->cb_arg); 5033933707f3Ssthen } 5034933707f3Ssthen 5035933707f3Ssthen int 5036933707f3Ssthen comm_timer_is_set(struct comm_timer* timer) 5037933707f3Ssthen { 5038933707f3Ssthen return (int)timer->ev_timer->enabled; 5039933707f3Ssthen } 5040933707f3Ssthen 5041933707f3Ssthen size_t 50422ee382b6Ssthen comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer)) 5043933707f3Ssthen { 50442ee382b6Ssthen return sizeof(struct internal_timer); 5045933707f3Ssthen } 5046933707f3Ssthen 5047933707f3Ssthen struct comm_signal* 5048933707f3Ssthen comm_signal_create(struct comm_base* base, 5049933707f3Ssthen void (*callback)(int, void*), void* cb_arg) 5050933707f3Ssthen { 5051933707f3Ssthen struct comm_signal* com = (struct comm_signal*)malloc( 5052933707f3Ssthen sizeof(struct comm_signal)); 5053933707f3Ssthen if(!com) { 5054933707f3Ssthen log_err("malloc failed"); 5055933707f3Ssthen return NULL; 5056933707f3Ssthen } 5057933707f3Ssthen com->base = base; 5058933707f3Ssthen com->callback = callback; 5059933707f3Ssthen com->cb_arg = cb_arg; 5060933707f3Ssthen com->ev_signal = NULL; 5061933707f3Ssthen return com; 5062933707f3Ssthen } 5063933707f3Ssthen 5064933707f3Ssthen void 5065933707f3Ssthen comm_signal_callback(int sig, short event, void* arg) 5066933707f3Ssthen { 5067933707f3Ssthen struct comm_signal* comsig = (struct comm_signal*)arg; 50682ee382b6Ssthen if(!(event & UB_EV_SIGNAL)) 5069933707f3Ssthen return; 50702ee382b6Ssthen ub_comm_base_now(comsig->base); 5071933707f3Ssthen fptr_ok(fptr_whitelist_comm_signal(comsig->callback)); 5072933707f3Ssthen (*comsig->callback)(sig, comsig->cb_arg); 5073933707f3Ssthen } 5074933707f3Ssthen 5075933707f3Ssthen int 5076933707f3Ssthen comm_signal_bind(struct comm_signal* comsig, int sig) 5077933707f3Ssthen { 5078933707f3Ssthen struct internal_signal* entry = (struct internal_signal*)calloc(1, 5079933707f3Ssthen sizeof(struct internal_signal)); 5080933707f3Ssthen if(!entry) { 5081933707f3Ssthen log_err("malloc failed"); 5082933707f3Ssthen return 0; 5083933707f3Ssthen } 5084933707f3Ssthen log_assert(comsig); 5085933707f3Ssthen /* add signal event */ 50862ee382b6Ssthen entry->ev = ub_signal_new(comsig->base->eb->base, sig, 50872ee382b6Ssthen comm_signal_callback, comsig); 50882ee382b6Ssthen if(entry->ev == NULL) { 50892ee382b6Ssthen log_err("Could not create signal event"); 5090933707f3Ssthen free(entry); 5091933707f3Ssthen return 0; 5092933707f3Ssthen } 50932ee382b6Ssthen if(ub_signal_add(entry->ev, NULL) != 0) { 5094933707f3Ssthen log_err("Could not add signal handler"); 50952ee382b6Ssthen ub_event_free(entry->ev); 5096933707f3Ssthen free(entry); 5097933707f3Ssthen return 0; 5098933707f3Ssthen } 5099933707f3Ssthen /* link into list */ 5100933707f3Ssthen entry->next = comsig->ev_signal; 5101933707f3Ssthen comsig->ev_signal = entry; 5102933707f3Ssthen return 1; 5103933707f3Ssthen } 5104933707f3Ssthen 5105933707f3Ssthen void 5106933707f3Ssthen comm_signal_delete(struct comm_signal* comsig) 5107933707f3Ssthen { 5108933707f3Ssthen struct internal_signal* p, *np; 5109933707f3Ssthen if(!comsig) 5110933707f3Ssthen return; 5111933707f3Ssthen p=comsig->ev_signal; 5112933707f3Ssthen while(p) { 5113933707f3Ssthen np = p->next; 51142ee382b6Ssthen ub_signal_del(p->ev); 51152ee382b6Ssthen ub_event_free(p->ev); 5116933707f3Ssthen free(p); 5117933707f3Ssthen p = np; 5118933707f3Ssthen } 5119933707f3Ssthen free(comsig); 5120933707f3Ssthen } 5121