1 /* 2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file has functions to get queries from clients. 40 */ 41 #include "config.h" 42 #ifdef HAVE_SYS_TYPES_H 43 # include <sys/types.h> 44 #endif 45 #include <sys/time.h> 46 #include <limits.h> 47 #ifdef USE_TCP_FASTOPEN 48 #include <netinet/tcp.h> 49 #endif 50 #include <ctype.h> 51 #include "services/listen_dnsport.h" 52 #include "services/outside_network.h" 53 #include "util/netevent.h" 54 #include "util/log.h" 55 #include "util/config_file.h" 56 #include "util/net_help.h" 57 #include "sldns/sbuffer.h" 58 #include "sldns/parseutil.h" 59 #include "sldns/wire2str.h" 60 #include "services/mesh.h" 61 #include "util/fptr_wlist.h" 62 #include "util/locks.h" 63 #include "util/timeval_func.h" 64 65 #ifdef HAVE_NETDB_H 66 #include <netdb.h> 67 #endif 68 #include <fcntl.h> 69 70 #ifdef HAVE_SYS_UN_H 71 #include <sys/un.h> 72 #endif 73 74 #ifdef HAVE_SYSTEMD 75 #include <systemd/sd-daemon.h> 76 #endif 77 78 #ifdef HAVE_IFADDRS_H 79 #include <ifaddrs.h> 80 #endif 81 #ifdef HAVE_NET_IF_H 82 #include <net/if.h> 83 #endif 84 85 #ifdef HAVE_TIME_H 86 #include <time.h> 87 #endif 88 #include <sys/time.h> 89 90 #ifdef HAVE_NGTCP2 91 #include <ngtcp2/ngtcp2.h> 92 #include <ngtcp2/ngtcp2_crypto.h> 93 #ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H 94 #include <ngtcp2/ngtcp2_crypto_quictls.h> 95 #else 96 #include <ngtcp2/ngtcp2_crypto_openssl.h> 97 #endif 98 #endif 99 100 #ifdef HAVE_OPENSSL_SSL_H 101 #include <openssl/ssl.h> 102 #endif 103 104 #ifdef HAVE_LINUX_NET_TSTAMP_H 105 #include <linux/net_tstamp.h> 106 #endif 107 108 /** number of queued TCP connections for listen() */ 109 #define TCP_BACKLOG 256 110 111 #ifndef THREADS_DISABLED 112 /** lock on the counter of stream buffer memory */ 113 static lock_basic_type stream_wait_count_lock; 114 /** lock on the counter of HTTP2 query buffer memory */ 115 static lock_basic_type http2_query_buffer_count_lock; 116 /** lock on the counter of HTTP2 response buffer memory */ 117 static lock_basic_type http2_response_buffer_count_lock; 118 #endif 119 /** size (in bytes) of stream wait buffers */ 120 static size_t stream_wait_count = 0; 121 /** is the lock initialised for stream wait buffers */ 122 static int stream_wait_lock_inited = 0; 123 /** size (in bytes) of HTTP2 query buffers */ 124 static size_t http2_query_buffer_count = 0; 125 /** is the lock initialised for HTTP2 query buffers */ 126 static int http2_query_buffer_lock_inited = 0; 127 /** size (in bytes) of HTTP2 response buffers */ 128 static size_t http2_response_buffer_count = 0; 129 /** is the lock initialised for HTTP2 response buffers */ 130 static int http2_response_buffer_lock_inited = 0; 131 132 /** 133 * Debug print of the getaddrinfo returned address. 134 * @param addr: the address returned. 135 * @param additional: additional text that describes the type of socket, 136 * or NULL for no text. 137 */ 138 static void 139 verbose_print_addr(struct addrinfo *addr, const char* additional) 140 { 141 if(verbosity >= VERB_ALGO) { 142 char buf[100]; 143 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr; 144 #ifdef INET6 145 if(addr->ai_family == AF_INET6) 146 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)-> 147 sin6_addr; 148 #endif /* INET6 */ 149 if(inet_ntop(addr->ai_family, sinaddr, buf, 150 (socklen_t)sizeof(buf)) == 0) { 151 (void)strlcpy(buf, "(null)", sizeof(buf)); 152 } 153 buf[sizeof(buf)-1] = 0; 154 verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s", 155 addr->ai_socktype==SOCK_DGRAM?"udp": 156 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", 157 addr->ai_family==AF_INET?"4": 158 addr->ai_family==AF_INET6?"6": 159 "_otherfam", buf, 160 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port), 161 (additional?" ":""), (additional?additional:"")); 162 } 163 } 164 165 void 166 verbose_print_unbound_socket(struct unbound_socket* ub_sock) 167 { 168 if(verbosity >= VERB_ALGO) { 169 char buf[256]; 170 log_info("listing of unbound_socket structure:"); 171 addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf, 172 sizeof(buf)); 173 log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s, 174 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", 175 ub_sock->acl?"yes":"no"); 176 } 177 } 178 179 #ifdef HAVE_SYSTEMD 180 static int 181 systemd_get_activated(int family, int socktype, int listen, 182 struct sockaddr *addr, socklen_t addrlen, 183 const char *path) 184 { 185 int i = 0; 186 int r = 0; 187 int s = -1; 188 const char* listen_pid, *listen_fds; 189 190 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */ 191 192 if((r = sd_booted()) < 1) { 193 if(r == 0) 194 log_warn("systemd is not running"); 195 else 196 log_err("systemd sd_booted(): %s", strerror(-r)); 197 return -1; 198 } 199 200 listen_pid = getenv("LISTEN_PID"); 201 listen_fds = getenv("LISTEN_FDS"); 202 203 if (!listen_pid) { 204 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID"); 205 return -1; 206 } 207 208 if (!listen_fds) { 209 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS"); 210 return -1; 211 } 212 213 if((r = sd_listen_fds(0)) < 1) { 214 if(r == 0) 215 log_warn("systemd: did not return socket, check unit configuration"); 216 else 217 log_err("systemd sd_listen_fds(): %s", strerror(-r)); 218 return -1; 219 } 220 221 for(i = 0; i < r; i++) { 222 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { 223 s = SD_LISTEN_FDS_START + i; 224 break; 225 } 226 } 227 if (s == -1) { 228 if (addr) 229 log_err_addr("systemd sd_listen_fds()", 230 "no such socket", 231 (struct sockaddr_storage *)addr, addrlen); 232 else 233 log_err("systemd sd_listen_fds(): %s", path); 234 } 235 return s; 236 } 237 #endif 238 239 int 240 create_udp_sock(int family, int socktype, struct sockaddr* addr, 241 socklen_t addrlen, int v6only, int* inuse, int* noproto, 242 int rcv, int snd, int listen, int* reuseport, int transparent, 243 int freebind, int use_systemd, int dscp) 244 { 245 int s; 246 char* err; 247 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY) 248 int on=1; 249 #endif 250 #ifdef IPV6_MTU 251 int mtu = IPV6_MIN_MTU; 252 #endif 253 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF) 254 (void)rcv; 255 #endif 256 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF) 257 (void)snd; 258 #endif 259 #ifndef IPV6_V6ONLY 260 (void)v6only; 261 #endif 262 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 263 (void)transparent; 264 #endif 265 #if !defined(IP_FREEBIND) 266 (void)freebind; 267 #endif 268 #ifdef HAVE_SYSTEMD 269 int got_fd_from_systemd = 0; 270 271 if (!use_systemd 272 || (use_systemd 273 && (s = systemd_get_activated(family, socktype, -1, addr, 274 addrlen, NULL)) == -1)) { 275 #else 276 (void)use_systemd; 277 #endif 278 if((s = socket(family, socktype, 0)) == -1) { 279 *inuse = 0; 280 #ifndef USE_WINSOCK 281 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 282 *noproto = 1; 283 return -1; 284 } 285 #else 286 if(WSAGetLastError() == WSAEAFNOSUPPORT || 287 WSAGetLastError() == WSAEPROTONOSUPPORT) { 288 *noproto = 1; 289 return -1; 290 } 291 #endif 292 log_err("can't create socket: %s", sock_strerror(errno)); 293 *noproto = 0; 294 return -1; 295 } 296 #ifdef HAVE_SYSTEMD 297 } else { 298 got_fd_from_systemd = 1; 299 } 300 #endif 301 if(listen) { 302 #ifdef SO_REUSEADDR 303 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 304 (socklen_t)sizeof(on)) < 0) { 305 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 306 sock_strerror(errno)); 307 #ifndef USE_WINSOCK 308 if(errno != ENOSYS) { 309 close(s); 310 *noproto = 0; 311 *inuse = 0; 312 return -1; 313 } 314 #else 315 closesocket(s); 316 *noproto = 0; 317 *inuse = 0; 318 return -1; 319 #endif 320 } 321 #endif /* SO_REUSEADDR */ 322 #ifdef SO_REUSEPORT 323 # ifdef SO_REUSEPORT_LB 324 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 325 * like SO_REUSEPORT on Linux. This is what the users want 326 * with the config option in unbound.conf; if we actually 327 * need local address and port reuse they'll also need to 328 * have SO_REUSEPORT set for them, assume it was _LB they want. 329 */ 330 if (reuseport && *reuseport && 331 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on, 332 (socklen_t)sizeof(on)) < 0) { 333 #ifdef ENOPROTOOPT 334 if(errno != ENOPROTOOPT || verbosity >= 3) 335 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s", 336 strerror(errno)); 337 #endif 338 /* this option is not essential, we can continue */ 339 *reuseport = 0; 340 } 341 # else /* no SO_REUSEPORT_LB */ 342 343 /* try to set SO_REUSEPORT so that incoming 344 * queries are distributed evenly among the receiving threads. 345 * Each thread must have its own socket bound to the same port, 346 * with SO_REUSEPORT set on each socket. 347 */ 348 if (reuseport && *reuseport && 349 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 350 (socklen_t)sizeof(on)) < 0) { 351 #ifdef ENOPROTOOPT 352 if(errno != ENOPROTOOPT || verbosity >= 3) 353 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 354 strerror(errno)); 355 #endif 356 /* this option is not essential, we can continue */ 357 *reuseport = 0; 358 } 359 # endif /* SO_REUSEPORT_LB */ 360 #else 361 (void)reuseport; 362 #endif /* defined(SO_REUSEPORT) */ 363 #ifdef IP_TRANSPARENT 364 if (transparent && 365 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 366 (socklen_t)sizeof(on)) < 0) { 367 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 368 strerror(errno)); 369 } 370 #elif defined(IP_BINDANY) 371 if (transparent && 372 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 373 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 374 (void*)&on, (socklen_t)sizeof(on)) < 0) { 375 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 376 (family==AF_INET6?"V6":""), strerror(errno)); 377 } 378 #elif defined(SO_BINDANY) 379 if (transparent && 380 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, 381 (socklen_t)sizeof(on)) < 0) { 382 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 383 strerror(errno)); 384 } 385 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 386 } 387 #ifdef IP_FREEBIND 388 if(freebind && 389 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 390 (socklen_t)sizeof(on)) < 0) { 391 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 392 strerror(errno)); 393 } 394 #endif /* IP_FREEBIND */ 395 if(rcv) { 396 #ifdef SO_RCVBUF 397 int got; 398 socklen_t slen = (socklen_t)sizeof(got); 399 # ifdef SO_RCVBUFFORCE 400 /* Linux specific: try to use root permission to override 401 * system limits on rcvbuf. The limit is stored in 402 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ 403 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 404 (socklen_t)sizeof(rcv)) < 0) { 405 if(errno != EPERM) { 406 log_err("setsockopt(..., SO_RCVBUFFORCE, " 407 "...) failed: %s", sock_strerror(errno)); 408 sock_close(s); 409 *noproto = 0; 410 *inuse = 0; 411 return -1; 412 } 413 # endif /* SO_RCVBUFFORCE */ 414 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 415 (socklen_t)sizeof(rcv)) < 0) { 416 log_err("setsockopt(..., SO_RCVBUF, " 417 "...) failed: %s", sock_strerror(errno)); 418 sock_close(s); 419 *noproto = 0; 420 *inuse = 0; 421 return -1; 422 } 423 /* check if we got the right thing or if system 424 * reduced to some system max. Warn if so */ 425 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 426 &slen) >= 0 && got < rcv/2) { 427 log_warn("so-rcvbuf %u was not granted. " 428 "Got %u. To fix: start with " 429 "root permissions(linux) or sysctl " 430 "bigger net.core.rmem_max(linux) or " 431 "kern.ipc.maxsockbuf(bsd) values.", 432 (unsigned)rcv, (unsigned)got); 433 } 434 # ifdef SO_RCVBUFFORCE 435 } 436 # endif 437 #endif /* SO_RCVBUF */ 438 } 439 /* first do RCVBUF as the receive buffer is more important */ 440 if(snd) { 441 #ifdef SO_SNDBUF 442 int got; 443 socklen_t slen = (socklen_t)sizeof(got); 444 # ifdef SO_SNDBUFFORCE 445 /* Linux specific: try to use root permission to override 446 * system limits on sndbuf. The limit is stored in 447 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ 448 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 449 (socklen_t)sizeof(snd)) < 0) { 450 if(errno != EPERM) { 451 log_err("setsockopt(..., SO_SNDBUFFORCE, " 452 "...) failed: %s", sock_strerror(errno)); 453 sock_close(s); 454 *noproto = 0; 455 *inuse = 0; 456 return -1; 457 } 458 # endif /* SO_SNDBUFFORCE */ 459 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 460 (socklen_t)sizeof(snd)) < 0) { 461 log_err("setsockopt(..., SO_SNDBUF, " 462 "...) failed: %s", sock_strerror(errno)); 463 sock_close(s); 464 *noproto = 0; 465 *inuse = 0; 466 return -1; 467 } 468 /* check if we got the right thing or if system 469 * reduced to some system max. Warn if so */ 470 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 471 &slen) >= 0 && got < snd/2) { 472 log_warn("so-sndbuf %u was not granted. " 473 "Got %u. To fix: start with " 474 "root permissions(linux) or sysctl " 475 "bigger net.core.wmem_max(linux) or " 476 "kern.ipc.maxsockbuf(bsd) values.", 477 (unsigned)snd, (unsigned)got); 478 } 479 # ifdef SO_SNDBUFFORCE 480 } 481 # endif 482 #endif /* SO_SNDBUF */ 483 } 484 err = set_ip_dscp(s, family, dscp); 485 if(err != NULL) 486 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); 487 if(family == AF_INET6) { 488 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 489 int omit6_set = 0; 490 int action; 491 # endif 492 # if defined(IPV6_V6ONLY) 493 if(v6only 494 # ifdef HAVE_SYSTEMD 495 /* Systemd wants to control if the socket is v6 only 496 * or both, with BindIPv6Only=default, ipv6-only or 497 * both in systemd.socket, so it is not set here. */ 498 && !got_fd_from_systemd 499 # endif 500 ) { 501 int val=(v6only==2)?0:1; 502 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 503 (void*)&val, (socklen_t)sizeof(val)) < 0) { 504 log_err("setsockopt(..., IPV6_V6ONLY" 505 ", ...) failed: %s", sock_strerror(errno)); 506 sock_close(s); 507 *noproto = 0; 508 *inuse = 0; 509 return -1; 510 } 511 } 512 # endif 513 # if defined(IPV6_USE_MIN_MTU) 514 /* 515 * There is no fragmentation of IPv6 datagrams 516 * during forwarding in the network. Therefore 517 * we do not send UDP datagrams larger than 518 * the minimum IPv6 MTU of 1280 octets. The 519 * EDNS0 message length can be larger if the 520 * network stack supports IPV6_USE_MIN_MTU. 521 */ 522 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 523 (void*)&on, (socklen_t)sizeof(on)) < 0) { 524 log_err("setsockopt(..., IPV6_USE_MIN_MTU, " 525 "...) failed: %s", sock_strerror(errno)); 526 sock_close(s); 527 *noproto = 0; 528 *inuse = 0; 529 return -1; 530 } 531 # elif defined(IPV6_MTU) 532 # ifndef USE_WINSOCK 533 /* 534 * On Linux, to send no larger than 1280, the PMTUD is 535 * disabled by default for datagrams anyway, so we set 536 * the MTU to use. 537 */ 538 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, 539 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 540 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 541 sock_strerror(errno)); 542 sock_close(s); 543 *noproto = 0; 544 *inuse = 0; 545 return -1; 546 } 547 # elif defined(IPV6_USER_MTU) 548 /* As later versions of the mingw crosscompiler define 549 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU 550 * instead which is writable; IPV6_MTU is readonly there. */ 551 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, 552 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { 553 if (WSAGetLastError() != WSAENOPROTOOPT) { 554 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", 555 wsa_strerror(WSAGetLastError())); 556 sock_close(s); 557 *noproto = 0; 558 *inuse = 0; 559 return -1; 560 } 561 } 562 # endif /* USE_WINSOCK */ 563 # endif /* IPv6 MTU */ 564 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 565 # if defined(IP_PMTUDISC_OMIT) 566 action = IP_PMTUDISC_OMIT; 567 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 568 &action, (socklen_t)sizeof(action)) < 0) { 569 570 if (errno != EINVAL) { 571 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 572 strerror(errno)); 573 sock_close(s); 574 *noproto = 0; 575 *inuse = 0; 576 return -1; 577 } 578 } 579 else 580 { 581 omit6_set = 1; 582 } 583 # endif 584 if (omit6_set == 0) { 585 action = IP_PMTUDISC_DONT; 586 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, 587 &action, (socklen_t)sizeof(action)) < 0) { 588 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 589 strerror(errno)); 590 sock_close(s); 591 *noproto = 0; 592 *inuse = 0; 593 return -1; 594 } 595 } 596 # endif /* IPV6_MTU_DISCOVER */ 597 } else if(family == AF_INET) { 598 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 599 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that 600 * PMTU information is not accepted, but fragmentation is allowed 601 * if and only if the packet size exceeds the outgoing interface MTU 602 * (and also uses the interface mtu to determine the size of the packets). 603 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks. 604 * FreeBSD already has same semantics without setting the option. */ 605 int omit_set = 0; 606 int action; 607 # if defined(IP_PMTUDISC_OMIT) 608 action = IP_PMTUDISC_OMIT; 609 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 610 &action, (socklen_t)sizeof(action)) < 0) { 611 612 if (errno != EINVAL) { 613 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 614 strerror(errno)); 615 sock_close(s); 616 *noproto = 0; 617 *inuse = 0; 618 return -1; 619 } 620 } 621 else 622 { 623 omit_set = 1; 624 } 625 # endif 626 if (omit_set == 0) { 627 action = IP_PMTUDISC_DONT; 628 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 629 &action, (socklen_t)sizeof(action)) < 0) { 630 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 631 strerror(errno)); 632 sock_close(s); 633 *noproto = 0; 634 *inuse = 0; 635 return -1; 636 } 637 } 638 # elif defined(IP_DONTFRAG) && !defined(__APPLE__) 639 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, 640 * but does not work on that version, so we exclude it */ 641 /* a nonzero value disables fragmentation, according to 642 * docs.oracle.com for ip(4). */ 643 int off = 1; 644 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 645 &off, (socklen_t)sizeof(off)) < 0) { 646 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", 647 strerror(errno)); 648 sock_close(s); 649 *noproto = 0; 650 *inuse = 0; 651 return -1; 652 } 653 # endif /* IPv4 MTU */ 654 } 655 if( 656 #ifdef HAVE_SYSTEMD 657 !got_fd_from_systemd && 658 #endif 659 bind(s, (struct sockaddr*)addr, addrlen) != 0) { 660 *noproto = 0; 661 *inuse = 0; 662 #ifndef USE_WINSOCK 663 #ifdef EADDRINUSE 664 *inuse = (errno == EADDRINUSE); 665 /* detect freebsd jail with no ipv6 permission */ 666 if(family==AF_INET6 && errno==EINVAL) 667 *noproto = 1; 668 else if(errno != EADDRINUSE && 669 !(errno == EACCES && verbosity < 4 && !listen) 670 #ifdef EADDRNOTAVAIL 671 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen) 672 #endif 673 ) { 674 log_err_addr("can't bind socket", strerror(errno), 675 (struct sockaddr_storage*)addr, addrlen); 676 } 677 #endif /* EADDRINUSE */ 678 #else /* USE_WINSOCK */ 679 if(WSAGetLastError() != WSAEADDRINUSE && 680 WSAGetLastError() != WSAEADDRNOTAVAIL && 681 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { 682 log_err_addr("can't bind socket", 683 wsa_strerror(WSAGetLastError()), 684 (struct sockaddr_storage*)addr, addrlen); 685 } 686 #endif /* USE_WINSOCK */ 687 sock_close(s); 688 return -1; 689 } 690 if(!fd_set_nonblock(s)) { 691 *noproto = 0; 692 *inuse = 0; 693 sock_close(s); 694 return -1; 695 } 696 return s; 697 } 698 699 int 700 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, 701 int* reuseport, int transparent, int mss, int nodelay, int freebind, 702 int use_systemd, int dscp, const char* additional) 703 { 704 int s = -1; 705 char* err; 706 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY) 707 int on = 1; 708 #endif 709 #ifdef HAVE_SYSTEMD 710 int got_fd_from_systemd = 0; 711 #endif 712 #ifdef USE_TCP_FASTOPEN 713 int qlen; 714 #endif 715 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY) 716 (void)transparent; 717 #endif 718 #if !defined(IP_FREEBIND) 719 (void)freebind; 720 #endif 721 verbose_print_addr(addr, additional); 722 *noproto = 0; 723 #ifdef HAVE_SYSTEMD 724 if (!use_systemd || 725 (use_systemd 726 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1, 727 addr->ai_addr, addr->ai_addrlen, 728 NULL)) == -1)) { 729 #else 730 (void)use_systemd; 731 #endif 732 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 733 #ifndef USE_WINSOCK 734 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { 735 *noproto = 1; 736 return -1; 737 } 738 #else 739 if(WSAGetLastError() == WSAEAFNOSUPPORT || 740 WSAGetLastError() == WSAEPROTONOSUPPORT) { 741 *noproto = 1; 742 return -1; 743 } 744 #endif 745 log_err("can't create socket: %s", sock_strerror(errno)); 746 return -1; 747 } 748 if(nodelay) { 749 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY) 750 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on, 751 (socklen_t)sizeof(on)) < 0) { 752 #ifndef USE_WINSOCK 753 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 754 strerror(errno)); 755 #else 756 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s", 757 wsa_strerror(WSAGetLastError())); 758 #endif 759 } 760 #else 761 log_warn(" setsockopt(TCP_NODELAY) unsupported"); 762 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */ 763 } 764 if (mss > 0) { 765 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 766 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss, 767 (socklen_t)sizeof(mss)) < 0) { 768 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s", 769 sock_strerror(errno)); 770 } else { 771 verbose(VERB_ALGO, 772 " tcp socket mss set to %d", mss); 773 } 774 #else 775 log_warn(" setsockopt(TCP_MAXSEG) unsupported"); 776 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 777 } 778 #ifdef HAVE_SYSTEMD 779 } else { 780 got_fd_from_systemd = 1; 781 } 782 #endif 783 #ifdef SO_REUSEADDR 784 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 785 (socklen_t)sizeof(on)) < 0) { 786 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", 787 sock_strerror(errno)); 788 sock_close(s); 789 return -1; 790 } 791 #endif /* SO_REUSEADDR */ 792 #ifdef IP_FREEBIND 793 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on, 794 (socklen_t)sizeof(on)) < 0) { 795 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s", 796 strerror(errno)); 797 } 798 #endif /* IP_FREEBIND */ 799 #ifdef SO_REUSEPORT 800 /* try to set SO_REUSEPORT so that incoming 801 * connections are distributed evenly among the receiving threads. 802 * Each thread must have its own socket bound to the same port, 803 * with SO_REUSEPORT set on each socket. 804 */ 805 if (reuseport && *reuseport && 806 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, 807 (socklen_t)sizeof(on)) < 0) { 808 #ifdef ENOPROTOOPT 809 if(errno != ENOPROTOOPT || verbosity >= 3) 810 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s", 811 strerror(errno)); 812 #endif 813 /* this option is not essential, we can continue */ 814 *reuseport = 0; 815 } 816 #else 817 (void)reuseport; 818 #endif /* defined(SO_REUSEPORT) */ 819 #if defined(IPV6_V6ONLY) 820 if(addr->ai_family == AF_INET6 && v6only 821 # ifdef HAVE_SYSTEMD 822 /* Systemd wants to control if the socket is v6 only 823 * or both, with BindIPv6Only=default, ipv6-only or 824 * both in systemd.socket, so it is not set here. */ 825 && !got_fd_from_systemd 826 # endif 827 ) { 828 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 829 (void*)&on, (socklen_t)sizeof(on)) < 0) { 830 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 831 sock_strerror(errno)); 832 sock_close(s); 833 return -1; 834 } 835 } 836 #else 837 (void)v6only; 838 #endif /* IPV6_V6ONLY */ 839 #ifdef IP_TRANSPARENT 840 if (transparent && 841 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on, 842 (socklen_t)sizeof(on)) < 0) { 843 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s", 844 strerror(errno)); 845 } 846 #elif defined(IP_BINDANY) 847 if (transparent && 848 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP), 849 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY), 850 (void*)&on, (socklen_t)sizeof(on)) < 0) { 851 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s", 852 (addr->ai_family==AF_INET6?"V6":""), strerror(errno)); 853 } 854 #elif defined(SO_BINDANY) 855 if (transparent && 856 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t) 857 sizeof(on)) < 0) { 858 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s", 859 strerror(errno)); 860 } 861 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */ 862 err = set_ip_dscp(s, addr->ai_family, dscp); 863 if(err != NULL) 864 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err); 865 if( 866 #ifdef HAVE_SYSTEMD 867 !got_fd_from_systemd && 868 #endif 869 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) { 870 #ifndef USE_WINSOCK 871 /* detect freebsd jail with no ipv6 permission */ 872 if(addr->ai_family==AF_INET6 && errno==EINVAL) 873 *noproto = 1; 874 else { 875 log_err_addr("can't bind socket", strerror(errno), 876 (struct sockaddr_storage*)addr->ai_addr, 877 addr->ai_addrlen); 878 } 879 #else 880 log_err_addr("can't bind socket", 881 wsa_strerror(WSAGetLastError()), 882 (struct sockaddr_storage*)addr->ai_addr, 883 addr->ai_addrlen); 884 #endif 885 sock_close(s); 886 return -1; 887 } 888 if(!fd_set_nonblock(s)) { 889 sock_close(s); 890 return -1; 891 } 892 if(listen(s, TCP_BACKLOG) == -1) { 893 log_err("can't listen: %s", sock_strerror(errno)); 894 sock_close(s); 895 return -1; 896 } 897 #ifdef USE_TCP_FASTOPEN 898 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense 899 against IP spoofing attacks as suggested in RFC7413 */ 900 #ifdef __APPLE__ 901 /* OS X implementation only supports qlen of 1 via this call. Actual 902 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */ 903 qlen = 1; 904 #else 905 /* 5 is recommended on linux */ 906 qlen = 5; 907 #endif 908 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 909 sizeof(qlen))) == -1 ) { 910 #ifdef ENOPROTOOPT 911 /* squelch ENOPROTOOPT: freebsd server mode with kernel support 912 disabled, except when verbosity enabled for debugging */ 913 if(errno != ENOPROTOOPT || verbosity >= 3) { 914 #endif 915 if(errno == EPERM) { 916 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno)); 917 } else { 918 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno)); 919 } 920 #ifdef ENOPROTOOPT 921 } 922 #endif 923 } 924 #endif 925 return s; 926 } 927 928 char* 929 set_ip_dscp(int socket, int addrfamily, int dscp) 930 { 931 int ds; 932 933 if(dscp == 0) 934 return NULL; 935 ds = dscp << 2; 936 switch(addrfamily) { 937 case AF_INET6: 938 #ifdef IPV6_TCLASS 939 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, 940 sizeof(ds)) < 0) 941 return sock_strerror(errno); 942 break; 943 #else 944 return "IPV6_TCLASS not defined on this system"; 945 #endif 946 default: 947 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) 948 return sock_strerror(errno); 949 break; 950 } 951 return NULL; 952 } 953 954 int 955 create_local_accept_sock(const char *path, int* noproto, int use_systemd) 956 { 957 #ifdef HAVE_SYSTEMD 958 int ret; 959 960 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1) 961 return ret; 962 else { 963 #endif 964 #ifdef HAVE_SYS_UN_H 965 int s; 966 struct sockaddr_un usock; 967 #ifndef HAVE_SYSTEMD 968 (void)use_systemd; 969 #endif 970 971 verbose(VERB_ALGO, "creating unix socket %s", path); 972 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN 973 /* this member exists on BSDs, not Linux */ 974 usock.sun_len = (unsigned)sizeof(usock); 975 #endif 976 usock.sun_family = AF_LOCAL; 977 /* length is 92-108, 104 on FreeBSD */ 978 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path)); 979 980 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) { 981 log_err("Cannot create local socket %s (%s)", 982 path, strerror(errno)); 983 return -1; 984 } 985 986 if (unlink(path) && errno != ENOENT) { 987 /* The socket already exists and cannot be removed */ 988 log_err("Cannot remove old local socket %s (%s)", 989 path, strerror(errno)); 990 goto err; 991 } 992 993 if (bind(s, (struct sockaddr *)&usock, 994 (socklen_t)sizeof(struct sockaddr_un)) == -1) { 995 log_err("Cannot bind local socket %s (%s)", 996 path, strerror(errno)); 997 goto err; 998 } 999 1000 if (!fd_set_nonblock(s)) { 1001 log_err("Cannot set non-blocking mode"); 1002 goto err; 1003 } 1004 1005 if (listen(s, TCP_BACKLOG) == -1) { 1006 log_err("can't listen: %s", strerror(errno)); 1007 goto err; 1008 } 1009 1010 (void)noproto; /*unused*/ 1011 return s; 1012 1013 err: 1014 sock_close(s); 1015 return -1; 1016 1017 #ifdef HAVE_SYSTEMD 1018 } 1019 #endif 1020 #else 1021 (void)use_systemd; 1022 (void)path; 1023 log_err("Local sockets are not supported"); 1024 *noproto = 1; 1025 return -1; 1026 #endif 1027 } 1028 1029 1030 /** 1031 * Create socket from getaddrinfo results 1032 */ 1033 static int 1034 make_sock(int stype, const char* ifname, const char* port, 1035 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1036 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1037 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1038 const char* additional) 1039 { 1040 struct addrinfo *res = NULL; 1041 int r, s, inuse, noproto; 1042 hints->ai_socktype = stype; 1043 *noip6 = 0; 1044 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) { 1045 #ifdef USE_WINSOCK 1046 if(r == EAI_NONAME && hints->ai_family == AF_INET6){ 1047 *noip6 = 1; /* 'Host not found' for IP6 on winXP */ 1048 return -1; 1049 } 1050 #endif 1051 log_err("node %s:%s getaddrinfo: %s %s", 1052 ifname?ifname:"default", port, gai_strerror(r), 1053 #ifdef EAI_SYSTEM 1054 (r==EAI_SYSTEM?(char*)strerror(errno):"") 1055 #else 1056 "" 1057 #endif 1058 ); 1059 return -1; 1060 } 1061 if(stype == SOCK_DGRAM) { 1062 verbose_print_addr(res, additional); 1063 s = create_udp_sock(res->ai_family, res->ai_socktype, 1064 (struct sockaddr*)res->ai_addr, res->ai_addrlen, 1065 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, 1066 reuseport, transparent, freebind, use_systemd, dscp); 1067 if(s == -1 && inuse) { 1068 log_err("bind: address already in use"); 1069 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1070 *noip6 = 1; 1071 } 1072 } else { 1073 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, 1074 transparent, tcp_mss, nodelay, freebind, use_systemd, 1075 dscp, additional); 1076 if(s == -1 && noproto && hints->ai_family == AF_INET6){ 1077 *noip6 = 1; 1078 } 1079 } 1080 1081 if(!res->ai_addr) { 1082 log_err("getaddrinfo returned no address"); 1083 freeaddrinfo(res); 1084 sock_close(s); 1085 return -1; 1086 } 1087 ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen); 1088 ub_sock->addrlen = res->ai_addrlen; 1089 if(!ub_sock->addr) { 1090 log_err("out of memory: allocate listening address"); 1091 freeaddrinfo(res); 1092 sock_close(s); 1093 return -1; 1094 } 1095 freeaddrinfo(res); 1096 1097 ub_sock->s = s; 1098 ub_sock->fam = hints->ai_family; 1099 ub_sock->acl = NULL; 1100 1101 return s; 1102 } 1103 1104 /** make socket and first see if ifname contains port override info */ 1105 static int 1106 make_sock_port(int stype, const char* ifname, const char* port, 1107 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, 1108 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, 1109 int use_systemd, int dscp, struct unbound_socket* ub_sock, 1110 const char* additional) 1111 { 1112 char* s = strchr(ifname, '@'); 1113 if(s) { 1114 /* override port with ifspec@port */ 1115 char p[16]; 1116 char newif[128]; 1117 if((size_t)(s-ifname) >= sizeof(newif)) { 1118 log_err("ifname too long: %s", ifname); 1119 *noip6 = 0; 1120 return -1; 1121 } 1122 if(strlen(s+1) >= sizeof(p)) { 1123 log_err("portnumber too long: %s", ifname); 1124 *noip6 = 0; 1125 return -1; 1126 } 1127 (void)strlcpy(newif, ifname, sizeof(newif)); 1128 newif[s-ifname] = 0; 1129 (void)strlcpy(p, s+1, sizeof(p)); 1130 p[strlen(s+1)]=0; 1131 return make_sock(stype, newif, p, hints, v6only, noip6, rcv, 1132 snd, reuseport, transparent, tcp_mss, nodelay, freebind, 1133 use_systemd, dscp, ub_sock, additional); 1134 } 1135 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, 1136 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, 1137 dscp, ub_sock, additional); 1138 } 1139 1140 /** 1141 * Add port to open ports list. 1142 * @param list: list head. changed. 1143 * @param s: fd. 1144 * @param ftype: if fd is UDP. 1145 * @param pp2_enabled: if PROXYv2 is enabled for this port. 1146 * @param ub_sock: socket with address. 1147 * @return false on failure. list in unchanged then. 1148 */ 1149 static int 1150 port_insert(struct listen_port** list, int s, enum listen_type ftype, 1151 int pp2_enabled, struct unbound_socket* ub_sock) 1152 { 1153 struct listen_port* item = (struct listen_port*)malloc( 1154 sizeof(struct listen_port)); 1155 if(!item) 1156 return 0; 1157 item->next = *list; 1158 item->fd = s; 1159 item->ftype = ftype; 1160 item->pp2_enabled = pp2_enabled; 1161 item->socket = ub_sock; 1162 *list = item; 1163 return 1; 1164 } 1165 1166 /** set fd to receive software timestamps */ 1167 static int 1168 set_recvtimestamp(int s) 1169 { 1170 #ifdef HAVE_LINUX_NET_TSTAMP_H 1171 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; 1172 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { 1173 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", 1174 strerror(errno)); 1175 return 0; 1176 } 1177 return 1; 1178 #else 1179 log_err("packets timestamping is not supported on this platform"); 1180 (void)s; 1181 return 0; 1182 #endif 1183 } 1184 1185 /** set fd to receive source address packet info */ 1186 static int 1187 set_recvpktinfo(int s, int family) 1188 { 1189 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) 1190 int on = 1; 1191 #else 1192 (void)s; 1193 #endif 1194 if(family == AF_INET6) { 1195 # ifdef IPV6_RECVPKTINFO 1196 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1197 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1198 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s", 1199 strerror(errno)); 1200 return 0; 1201 } 1202 # elif defined(IPV6_PKTINFO) 1203 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO, 1204 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1205 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s", 1206 strerror(errno)); 1207 return 0; 1208 } 1209 # else 1210 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " 1211 "disable interface-automatic or do-ip6 in config"); 1212 return 0; 1213 # endif /* defined IPV6_RECVPKTINFO */ 1214 1215 } else if(family == AF_INET) { 1216 # ifdef IP_PKTINFO 1217 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO, 1218 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1219 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s", 1220 strerror(errno)); 1221 return 0; 1222 } 1223 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR) 1224 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR, 1225 (void*)&on, (socklen_t)sizeof(on)) < 0) { 1226 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s", 1227 strerror(errno)); 1228 return 0; 1229 } 1230 # else 1231 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable " 1232 "interface-automatic or do-ip4 in config"); 1233 return 0; 1234 # endif /* IP_PKTINFO */ 1235 1236 } 1237 return 1; 1238 } 1239 1240 /** see if interface is ssl, its port number == the ssl port number */ 1241 static int 1242 if_is_ssl(const char* ifname, const char* port, int ssl_port, 1243 struct config_strlist* tls_additional_port) 1244 { 1245 struct config_strlist* s; 1246 char* p = strchr(ifname, '@'); 1247 if(!p && atoi(port) == ssl_port) 1248 return 1; 1249 if(p && atoi(p+1) == ssl_port) 1250 return 1; 1251 for(s = tls_additional_port; s; s = s->next) { 1252 if(p && atoi(p+1) == atoi(s->str)) 1253 return 1; 1254 if(!p && atoi(port) == atoi(s->str)) 1255 return 1; 1256 } 1257 return 0; 1258 } 1259 1260 /** 1261 * Helper for ports_open. Creates one interface (or NULL for default). 1262 * @param ifname: The interface ip address. 1263 * @param do_auto: use automatic interface detection. 1264 * If enabled, then ifname must be the wildcard name. 1265 * @param do_udp: if udp should be used. 1266 * @param do_tcp: if tcp should be used. 1267 * @param hints: for getaddrinfo. family and flags have to be set by caller. 1268 * @param port: Port number to use (as string). 1269 * @param list: list of open ports, appended to, changed to point to list head. 1270 * @param rcv: receive buffer size for UDP 1271 * @param snd: send buffer size for UDP 1272 * @param ssl_port: ssl service port number 1273 * @param tls_additional_port: list of additional ssl service port numbers. 1274 * @param https_port: DoH service port number 1275 * @param proxy_protocol_port: list of PROXYv2 port numbers. 1276 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. 1277 * set to false on exit if reuseport failed due to no kernel support. 1278 * @param transparent: set IP_TRANSPARENT socket option. 1279 * @param tcp_mss: maximum segment size of tcp socket. default if zero. 1280 * @param freebind: set IP_FREEBIND socket option. 1281 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection 1282 * @param use_systemd: if true, fetch sockets from systemd. 1283 * @param dnscrypt_port: dnscrypt service port number 1284 * @param dscp: DSCP to use. 1285 * @param quic_port: dns over quic port number. 1286 * @param http_notls_downstream: if no tls is used for https downstream. 1287 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to 1288 * wait to discard if UDP packets have waited for long in the socket 1289 * buffer. 1290 * @return: returns false on error. 1291 */ 1292 static int 1293 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 1294 struct addrinfo *hints, const char* port, struct listen_port** list, 1295 size_t rcv, size_t snd, int ssl_port, 1296 struct config_strlist* tls_additional_port, int https_port, 1297 struct config_strlist* proxy_protocol_port, 1298 int* reuseport, int transparent, int tcp_mss, int freebind, 1299 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, 1300 int quic_port, int http_notls_downstream, int sock_queue_timeout) 1301 { 1302 int s, noip6=0; 1303 int is_https = if_is_https(ifname, port, https_port); 1304 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); 1305 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); 1306 int nodelay = is_https && http2_nodelay; 1307 struct unbound_socket* ub_sock; 1308 int is_doq = if_is_quic(ifname, port, quic_port); 1309 const char* add = NULL; 1310 1311 if(!do_udp && !do_tcp) 1312 return 0; 1313 1314 if(is_pp2) { 1315 if(is_dnscrypt) { 1316 fatal_exit("PROXYv2 and DNSCrypt combination not " 1317 "supported!"); 1318 } else if(is_https) { 1319 fatal_exit("PROXYv2 and DoH combination not " 1320 "supported!"); 1321 } else if(is_doq) { 1322 fatal_exit("PROXYv2 and DoQ combination not " 1323 "supported!"); 1324 } 1325 } 1326 1327 if(do_auto) { 1328 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1329 if(!ub_sock) 1330 return 0; 1331 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1332 &noip6, rcv, snd, reuseport, transparent, 1333 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1334 (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) { 1335 free(ub_sock->addr); 1336 free(ub_sock); 1337 if(noip6) { 1338 log_warn("IPv6 protocol not available"); 1339 return 1; 1340 } 1341 return 0; 1342 } 1343 /* getting source addr packet info is highly non-portable */ 1344 if(!set_recvpktinfo(s, hints->ai_family)) { 1345 sock_close(s); 1346 free(ub_sock->addr); 1347 free(ub_sock); 1348 return 0; 1349 } 1350 if (sock_queue_timeout && !set_recvtimestamp(s)) { 1351 log_warn("socket timestamping is not available"); 1352 } 1353 if(!port_insert(list, s, is_dnscrypt 1354 ?listen_type_udpancil_dnscrypt:listen_type_udpancil, 1355 is_pp2, ub_sock)) { 1356 sock_close(s); 1357 free(ub_sock->addr); 1358 free(ub_sock); 1359 return 0; 1360 } 1361 } else if(do_udp) { 1362 enum listen_type udp_port_type; 1363 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1364 if(!ub_sock) 1365 return 0; 1366 if(is_dnscrypt) { 1367 udp_port_type = listen_type_udp_dnscrypt; 1368 add = "dnscrypt"; 1369 } else if(is_doq) { 1370 udp_port_type = listen_type_doq; 1371 add = "doq"; 1372 if(((strchr(ifname, '@') && 1373 atoi(strchr(ifname, '@')+1) == 53) || 1374 (!strchr(ifname, '@') && atoi(port) == 53))) { 1375 log_err("DNS over QUIC is not allowed on " 1376 "port 53. Port 53 is for DNS " 1377 "datagrams. Error for " 1378 "interface '%s'.", ifname); 1379 free(ub_sock->addr); 1380 free(ub_sock); 1381 return 0; 1382 } 1383 } else { 1384 udp_port_type = listen_type_udp; 1385 add = NULL; 1386 } 1387 /* regular udp socket */ 1388 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 1389 &noip6, rcv, snd, reuseport, transparent, 1390 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, 1391 add)) == -1) { 1392 free(ub_sock->addr); 1393 free(ub_sock); 1394 if(noip6) { 1395 log_warn("IPv6 protocol not available"); 1396 return 1; 1397 } 1398 return 0; 1399 } 1400 if(udp_port_type == listen_type_doq) { 1401 if(!set_recvpktinfo(s, hints->ai_family)) { 1402 sock_close(s); 1403 free(ub_sock->addr); 1404 free(ub_sock); 1405 return 0; 1406 } 1407 } 1408 if(udp_port_type == listen_type_udp && sock_queue_timeout) 1409 udp_port_type = listen_type_udpancil; 1410 if (sock_queue_timeout) { 1411 if(!set_recvtimestamp(s)) { 1412 log_warn("socket timestamping is not available"); 1413 } else { 1414 if(udp_port_type == listen_type_udp) 1415 udp_port_type = listen_type_udpancil; 1416 } 1417 } 1418 if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) { 1419 sock_close(s); 1420 free(ub_sock->addr); 1421 free(ub_sock); 1422 return 0; 1423 } 1424 } 1425 if(do_tcp) { 1426 int is_ssl = if_is_ssl(ifname, port, ssl_port, 1427 tls_additional_port); 1428 enum listen_type port_type; 1429 ub_sock = calloc(1, sizeof(struct unbound_socket)); 1430 if(!ub_sock) 1431 return 0; 1432 if(is_ssl) { 1433 port_type = listen_type_ssl; 1434 add = "tls"; 1435 } else if(is_https) { 1436 port_type = listen_type_http; 1437 add = "https"; 1438 if(http_notls_downstream) 1439 add = "http"; 1440 } else if(is_dnscrypt) { 1441 port_type = listen_type_tcp_dnscrypt; 1442 add = "dnscrypt"; 1443 } else { 1444 port_type = listen_type_tcp; 1445 add = NULL; 1446 } 1447 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 1448 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, 1449 freebind, use_systemd, dscp, ub_sock, add)) == -1) { 1450 free(ub_sock->addr); 1451 free(ub_sock); 1452 if(noip6) { 1453 /*log_warn("IPv6 protocol not available");*/ 1454 return 1; 1455 } 1456 return 0; 1457 } 1458 if(is_ssl) 1459 verbose(VERB_ALGO, "setup TCP for SSL service"); 1460 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { 1461 sock_close(s); 1462 free(ub_sock->addr); 1463 free(ub_sock); 1464 return 0; 1465 } 1466 } 1467 return 1; 1468 } 1469 1470 /** 1471 * Add items to commpoint list in front. 1472 * @param c: commpoint to add. 1473 * @param front: listen struct. 1474 * @return: false on failure. 1475 */ 1476 static int 1477 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) 1478 { 1479 struct listen_list* item = (struct listen_list*)malloc( 1480 sizeof(struct listen_list)); 1481 if(!item) 1482 return 0; 1483 item->com = c; 1484 item->next = front->cps; 1485 front->cps = item; 1486 return 1; 1487 } 1488 1489 void listen_setup_locks(void) 1490 { 1491 if(!stream_wait_lock_inited) { 1492 lock_basic_init(&stream_wait_count_lock); 1493 stream_wait_lock_inited = 1; 1494 } 1495 if(!http2_query_buffer_lock_inited) { 1496 lock_basic_init(&http2_query_buffer_count_lock); 1497 http2_query_buffer_lock_inited = 1; 1498 } 1499 if(!http2_response_buffer_lock_inited) { 1500 lock_basic_init(&http2_response_buffer_count_lock); 1501 http2_response_buffer_lock_inited = 1; 1502 } 1503 } 1504 1505 void listen_desetup_locks(void) 1506 { 1507 if(stream_wait_lock_inited) { 1508 stream_wait_lock_inited = 0; 1509 lock_basic_destroy(&stream_wait_count_lock); 1510 } 1511 if(http2_query_buffer_lock_inited) { 1512 http2_query_buffer_lock_inited = 0; 1513 lock_basic_destroy(&http2_query_buffer_count_lock); 1514 } 1515 if(http2_response_buffer_lock_inited) { 1516 http2_response_buffer_lock_inited = 0; 1517 lock_basic_destroy(&http2_response_buffer_count_lock); 1518 } 1519 } 1520 1521 struct listen_dnsport* 1522 listen_create(struct comm_base* base, struct listen_port* ports, 1523 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, 1524 int harden_large_queries, uint32_t http_max_streams, 1525 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, 1526 void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table, 1527 struct ub_randstate* rnd, const char* ssl_service_key, 1528 const char* ssl_service_pem, struct config_file* cfg, 1529 comm_point_callback_type* cb, void *cb_arg) 1530 { 1531 struct listen_dnsport* front = (struct listen_dnsport*) 1532 malloc(sizeof(struct listen_dnsport)); 1533 if(!front) 1534 return NULL; 1535 front->cps = NULL; 1536 front->udp_buff = sldns_buffer_new(bufsize); 1537 #ifdef USE_DNSCRYPT 1538 front->dnscrypt_udp_buff = NULL; 1539 #endif 1540 if(!front->udp_buff) { 1541 free(front); 1542 return NULL; 1543 } 1544 1545 /* create comm points as needed */ 1546 while(ports) { 1547 struct comm_point* cp = NULL; 1548 if(ports->ftype == listen_type_udp || 1549 ports->ftype == listen_type_udp_dnscrypt) { 1550 cp = comm_point_create_udp(base, ports->fd, 1551 front->udp_buff, ports->pp2_enabled, cb, 1552 cb_arg, ports->socket); 1553 } else if(ports->ftype == listen_type_doq) { 1554 #ifndef HAVE_NGTCP2 1555 log_warn("Unbound is not compiled with " 1556 "ngtcp2. This is required to use DNS " 1557 "over QUIC."); 1558 #endif 1559 cp = comm_point_create_doq(base, ports->fd, 1560 front->udp_buff, cb, cb_arg, ports->socket, 1561 doq_table, rnd, ssl_service_key, 1562 ssl_service_pem, cfg); 1563 } else if(ports->ftype == listen_type_tcp || 1564 ports->ftype == listen_type_tcp_dnscrypt) { 1565 cp = comm_point_create_tcp(base, ports->fd, 1566 tcp_accept_count, tcp_idle_timeout, 1567 harden_large_queries, 0, NULL, 1568 tcp_conn_limit, bufsize, front->udp_buff, 1569 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1570 ports->socket); 1571 } else if(ports->ftype == listen_type_ssl || 1572 ports->ftype == listen_type_http) { 1573 cp = comm_point_create_tcp(base, ports->fd, 1574 tcp_accept_count, tcp_idle_timeout, 1575 harden_large_queries, 1576 http_max_streams, http_endpoint, 1577 tcp_conn_limit, bufsize, front->udp_buff, 1578 ports->ftype, ports->pp2_enabled, cb, cb_arg, 1579 ports->socket); 1580 if(ports->ftype == listen_type_http) { 1581 if(!sslctx && !http_notls) { 1582 log_warn("HTTPS port configured, but " 1583 "no TLS tls-service-key or " 1584 "tls-service-pem set"); 1585 } 1586 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 1587 if(!http_notls) { 1588 log_warn("Unbound is not compiled " 1589 "with an OpenSSL version " 1590 "supporting ALPN " 1591 "(OpenSSL >= 1.0.2). This " 1592 "is required to use " 1593 "DNS-over-HTTPS"); 1594 } 1595 #endif 1596 #ifndef HAVE_NGHTTP2_NGHTTP2_H 1597 log_warn("Unbound is not compiled with " 1598 "nghttp2. This is required to use " 1599 "DNS-over-HTTPS."); 1600 #endif 1601 } 1602 } else if(ports->ftype == listen_type_udpancil || 1603 ports->ftype == listen_type_udpancil_dnscrypt) { 1604 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) 1605 cp = comm_point_create_udp_ancil(base, ports->fd, 1606 front->udp_buff, ports->pp2_enabled, cb, 1607 cb_arg, ports->socket); 1608 #else 1609 log_warn("This system does not support UDP ancilliary data."); 1610 #endif 1611 } 1612 if(!cp) { 1613 log_err("can't create commpoint"); 1614 listen_delete(front); 1615 return NULL; 1616 } 1617 if((http_notls && ports->ftype == listen_type_http) || 1618 (ports->ftype == listen_type_tcp) || 1619 (ports->ftype == listen_type_udp) || 1620 (ports->ftype == listen_type_udpancil) || 1621 (ports->ftype == listen_type_tcp_dnscrypt) || 1622 (ports->ftype == listen_type_udp_dnscrypt) || 1623 (ports->ftype == listen_type_udpancil_dnscrypt)) 1624 cp->ssl = NULL; 1625 else 1626 cp->ssl = sslctx; 1627 cp->dtenv = dtenv; 1628 cp->do_not_close = 1; 1629 #ifdef USE_DNSCRYPT 1630 if (ports->ftype == listen_type_udp_dnscrypt || 1631 ports->ftype == listen_type_tcp_dnscrypt || 1632 ports->ftype == listen_type_udpancil_dnscrypt) { 1633 cp->dnscrypt = 1; 1634 cp->dnscrypt_buffer = sldns_buffer_new(bufsize); 1635 if(!cp->dnscrypt_buffer) { 1636 log_err("can't alloc dnscrypt_buffer"); 1637 comm_point_delete(cp); 1638 listen_delete(front); 1639 return NULL; 1640 } 1641 front->dnscrypt_udp_buff = cp->dnscrypt_buffer; 1642 } 1643 #endif 1644 if(!listen_cp_insert(cp, front)) { 1645 log_err("malloc failed"); 1646 comm_point_delete(cp); 1647 listen_delete(front); 1648 return NULL; 1649 } 1650 ports = ports->next; 1651 } 1652 if(!front->cps) { 1653 log_err("Could not open sockets to accept queries."); 1654 listen_delete(front); 1655 return NULL; 1656 } 1657 1658 return front; 1659 } 1660 1661 void 1662 listen_list_delete(struct listen_list* list) 1663 { 1664 struct listen_list *p = list, *pn; 1665 while(p) { 1666 pn = p->next; 1667 comm_point_delete(p->com); 1668 free(p); 1669 p = pn; 1670 } 1671 } 1672 1673 void 1674 listen_delete(struct listen_dnsport* front) 1675 { 1676 if(!front) 1677 return; 1678 listen_list_delete(front->cps); 1679 #ifdef USE_DNSCRYPT 1680 if(front->dnscrypt_udp_buff && 1681 front->udp_buff != front->dnscrypt_udp_buff) { 1682 sldns_buffer_free(front->dnscrypt_udp_buff); 1683 } 1684 #endif 1685 sldns_buffer_free(front->udp_buff); 1686 free(front); 1687 } 1688 1689 #ifdef HAVE_GETIFADDRS 1690 static int 1691 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size) 1692 { 1693 struct ifaddrs *ifa; 1694 void *tmpbuf; 1695 int last_ip_addresses_size = *ip_addresses_size; 1696 1697 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) { 1698 sa_family_t family; 1699 const char* atsign; 1700 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */ 1701 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1]; 1702 #else 1703 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1]; 1704 #endif 1705 1706 if((atsign=strrchr(search_ifa, '@')) != NULL) { 1707 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa) 1708 || strncmp(ifa->ifa_name, search_ifa, 1709 atsign-search_ifa) != 0) 1710 continue; 1711 } else { 1712 if(strcmp(ifa->ifa_name, search_ifa) != 0) 1713 continue; 1714 atsign = ""; 1715 } 1716 1717 if(ifa->ifa_addr == NULL) 1718 continue; 1719 1720 family = ifa->ifa_addr->sa_family; 1721 if(family == AF_INET) { 1722 char a4[INET_ADDRSTRLEN + 1]; 1723 struct sockaddr_in *in4 = (struct sockaddr_in *) 1724 ifa->ifa_addr; 1725 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) { 1726 log_err("inet_ntop failed"); 1727 return 0; 1728 } 1729 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1730 a4, atsign); 1731 } 1732 #ifdef INET6 1733 else if(family == AF_INET6) { 1734 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) 1735 ifa->ifa_addr; 1736 char a6[INET6_ADDRSTRLEN + 1]; 1737 char if_index_name[IF_NAMESIZE + 1]; 1738 if_index_name[0] = 0; 1739 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) { 1740 log_err("inet_ntop failed"); 1741 return 0; 1742 } 1743 (void)if_indextoname(in6->sin6_scope_id, 1744 (char *)if_index_name); 1745 if (strlen(if_index_name) != 0) { 1746 snprintf(addr_buf, sizeof(addr_buf), 1747 "%s%%%s%s", a6, if_index_name, atsign); 1748 } else { 1749 snprintf(addr_buf, sizeof(addr_buf), "%s%s", 1750 a6, atsign); 1751 } 1752 } 1753 #endif 1754 else { 1755 continue; 1756 } 1757 verbose(4, "interface %s has address %s", search_ifa, addr_buf); 1758 1759 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1760 if(!tmpbuf) { 1761 log_err("realloc failed: out of memory"); 1762 return 0; 1763 } else { 1764 *ip_addresses = tmpbuf; 1765 } 1766 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf); 1767 if(!(*ip_addresses)[*ip_addresses_size]) { 1768 log_err("strdup failed: out of memory"); 1769 return 0; 1770 } 1771 (*ip_addresses_size)++; 1772 } 1773 1774 if (*ip_addresses_size == last_ip_addresses_size) { 1775 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1)); 1776 if(!tmpbuf) { 1777 log_err("realloc failed: out of memory"); 1778 return 0; 1779 } else { 1780 *ip_addresses = tmpbuf; 1781 } 1782 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa); 1783 if(!(*ip_addresses)[*ip_addresses_size]) { 1784 log_err("strdup failed: out of memory"); 1785 return 0; 1786 } 1787 (*ip_addresses_size)++; 1788 } 1789 return 1; 1790 } 1791 #endif /* HAVE_GETIFADDRS */ 1792 1793 int resolve_interface_names(char** ifs, int num_ifs, 1794 struct config_strlist* list, char*** resif, int* num_resif) 1795 { 1796 #ifdef HAVE_GETIFADDRS 1797 struct ifaddrs *addrs = NULL; 1798 if(num_ifs == 0 && list == NULL) { 1799 *resif = NULL; 1800 *num_resif = 0; 1801 return 1; 1802 } 1803 if(getifaddrs(&addrs) == -1) { 1804 log_err("failed to list interfaces: getifaddrs: %s", 1805 strerror(errno)); 1806 freeifaddrs(addrs); 1807 return 0; 1808 } 1809 if(ifs) { 1810 int i; 1811 for(i=0; i<num_ifs; i++) { 1812 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { 1813 freeifaddrs(addrs); 1814 config_del_strarray(*resif, *num_resif); 1815 *resif = NULL; 1816 *num_resif = 0; 1817 return 0; 1818 } 1819 } 1820 } 1821 if(list) { 1822 struct config_strlist* p; 1823 for(p = list; p; p = p->next) { 1824 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { 1825 freeifaddrs(addrs); 1826 config_del_strarray(*resif, *num_resif); 1827 *resif = NULL; 1828 *num_resif = 0; 1829 return 0; 1830 } 1831 } 1832 } 1833 freeifaddrs(addrs); 1834 return 1; 1835 #else 1836 struct config_strlist* p; 1837 if(num_ifs == 0 && list == NULL) { 1838 *resif = NULL; 1839 *num_resif = 0; 1840 return 1; 1841 } 1842 *num_resif = num_ifs; 1843 for(p = list; p; p = p->next) { 1844 (*num_resif)++; 1845 } 1846 *resif = calloc(*num_resif, sizeof(**resif)); 1847 if(!*resif) { 1848 log_err("out of memory"); 1849 return 0; 1850 } 1851 if(ifs) { 1852 int i; 1853 for(i=0; i<num_ifs; i++) { 1854 (*resif)[i] = strdup(ifs[i]); 1855 if(!((*resif)[i])) { 1856 log_err("out of memory"); 1857 config_del_strarray(*resif, *num_resif); 1858 *resif = NULL; 1859 *num_resif = 0; 1860 return 0; 1861 } 1862 } 1863 } 1864 if(list) { 1865 int idx = num_ifs; 1866 for(p = list; p; p = p->next) { 1867 (*resif)[idx] = strdup(p->str); 1868 if(!((*resif)[idx])) { 1869 log_err("out of memory"); 1870 config_del_strarray(*resif, *num_resif); 1871 *resif = NULL; 1872 *num_resif = 0; 1873 return 0; 1874 } 1875 idx++; 1876 } 1877 } 1878 return 1; 1879 #endif /* HAVE_GETIFADDRS */ 1880 } 1881 1882 struct listen_port* 1883 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, 1884 int* reuseport) 1885 { 1886 struct listen_port* list = NULL; 1887 struct addrinfo hints; 1888 int i, do_ip4, do_ip6; 1889 int do_tcp, do_auto; 1890 char portbuf[32]; 1891 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port); 1892 do_ip4 = cfg->do_ip4; 1893 do_ip6 = cfg->do_ip6; 1894 do_tcp = cfg->do_tcp; 1895 do_auto = cfg->if_automatic && cfg->do_udp; 1896 if(cfg->incoming_num_tcp == 0) 1897 do_tcp = 0; 1898 1899 /* getaddrinfo */ 1900 memset(&hints, 0, sizeof(hints)); 1901 hints.ai_flags = AI_PASSIVE; 1902 /* no name lookups on our listening ports */ 1903 if(num_ifs > 0) 1904 hints.ai_flags |= AI_NUMERICHOST; 1905 hints.ai_family = AF_UNSPEC; 1906 #ifndef INET6 1907 do_ip6 = 0; 1908 #endif 1909 if(!do_ip4 && !do_ip6) { 1910 return NULL; 1911 } 1912 /* create ip4 and ip6 ports so that return addresses are nice. */ 1913 if(do_auto || num_ifs == 0) { 1914 if(do_auto && cfg->if_automatic_ports && 1915 cfg->if_automatic_ports[0]!=0) { 1916 char* now = cfg->if_automatic_ports; 1917 while(now && *now) { 1918 char* after; 1919 int extraport; 1920 while(isspace((unsigned char)*now)) 1921 now++; 1922 if(!*now) 1923 break; 1924 after = now; 1925 extraport = (int)strtol(now, &after, 10); 1926 if(extraport < 0 || extraport > 65535) { 1927 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1928 listening_ports_free(list); 1929 return NULL; 1930 } 1931 if(extraport == 0 && now == after) { 1932 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); 1933 listening_ports_free(list); 1934 return NULL; 1935 } 1936 now = after; 1937 snprintf(portbuf, sizeof(portbuf), "%d", extraport); 1938 if(do_ip6) { 1939 hints.ai_family = AF_INET6; 1940 if(!ports_create_if("::0", 1941 do_auto, cfg->do_udp, do_tcp, 1942 &hints, portbuf, &list, 1943 cfg->so_rcvbuf, cfg->so_sndbuf, 1944 cfg->ssl_port, cfg->tls_additional_port, 1945 cfg->https_port, 1946 cfg->proxy_protocol_port, 1947 reuseport, cfg->ip_transparent, 1948 cfg->tcp_mss, cfg->ip_freebind, 1949 cfg->http_nodelay, cfg->use_systemd, 1950 cfg->dnscrypt_port, cfg->ip_dscp, 1951 cfg->quic_port, cfg->http_notls_downstream, 1952 cfg->sock_queue_timeout)) { 1953 listening_ports_free(list); 1954 return NULL; 1955 } 1956 } 1957 if(do_ip4) { 1958 hints.ai_family = AF_INET; 1959 if(!ports_create_if("0.0.0.0", 1960 do_auto, cfg->do_udp, do_tcp, 1961 &hints, portbuf, &list, 1962 cfg->so_rcvbuf, cfg->so_sndbuf, 1963 cfg->ssl_port, cfg->tls_additional_port, 1964 cfg->https_port, 1965 cfg->proxy_protocol_port, 1966 reuseport, cfg->ip_transparent, 1967 cfg->tcp_mss, cfg->ip_freebind, 1968 cfg->http_nodelay, cfg->use_systemd, 1969 cfg->dnscrypt_port, cfg->ip_dscp, 1970 cfg->quic_port, cfg->http_notls_downstream, 1971 cfg->sock_queue_timeout)) { 1972 listening_ports_free(list); 1973 return NULL; 1974 } 1975 } 1976 } 1977 return list; 1978 } 1979 if(do_ip6) { 1980 hints.ai_family = AF_INET6; 1981 if(!ports_create_if(do_auto?"::0":"::1", 1982 do_auto, cfg->do_udp, do_tcp, 1983 &hints, portbuf, &list, 1984 cfg->so_rcvbuf, cfg->so_sndbuf, 1985 cfg->ssl_port, cfg->tls_additional_port, 1986 cfg->https_port, cfg->proxy_protocol_port, 1987 reuseport, cfg->ip_transparent, 1988 cfg->tcp_mss, cfg->ip_freebind, 1989 cfg->http_nodelay, cfg->use_systemd, 1990 cfg->dnscrypt_port, cfg->ip_dscp, 1991 cfg->quic_port, cfg->http_notls_downstream, 1992 cfg->sock_queue_timeout)) { 1993 listening_ports_free(list); 1994 return NULL; 1995 } 1996 } 1997 if(do_ip4) { 1998 hints.ai_family = AF_INET; 1999 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 2000 do_auto, cfg->do_udp, do_tcp, 2001 &hints, portbuf, &list, 2002 cfg->so_rcvbuf, cfg->so_sndbuf, 2003 cfg->ssl_port, cfg->tls_additional_port, 2004 cfg->https_port, cfg->proxy_protocol_port, 2005 reuseport, cfg->ip_transparent, 2006 cfg->tcp_mss, cfg->ip_freebind, 2007 cfg->http_nodelay, cfg->use_systemd, 2008 cfg->dnscrypt_port, cfg->ip_dscp, 2009 cfg->quic_port, cfg->http_notls_downstream, 2010 cfg->sock_queue_timeout)) { 2011 listening_ports_free(list); 2012 return NULL; 2013 } 2014 } 2015 } else for(i = 0; i<num_ifs; i++) { 2016 if(str_is_ip6(ifs[i])) { 2017 if(!do_ip6) 2018 continue; 2019 hints.ai_family = AF_INET6; 2020 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2021 do_tcp, &hints, portbuf, &list, 2022 cfg->so_rcvbuf, cfg->so_sndbuf, 2023 cfg->ssl_port, cfg->tls_additional_port, 2024 cfg->https_port, cfg->proxy_protocol_port, 2025 reuseport, cfg->ip_transparent, 2026 cfg->tcp_mss, cfg->ip_freebind, 2027 cfg->http_nodelay, cfg->use_systemd, 2028 cfg->dnscrypt_port, cfg->ip_dscp, 2029 cfg->quic_port, cfg->http_notls_downstream, 2030 cfg->sock_queue_timeout)) { 2031 listening_ports_free(list); 2032 return NULL; 2033 } 2034 } else { 2035 if(!do_ip4) 2036 continue; 2037 hints.ai_family = AF_INET; 2038 if(!ports_create_if(ifs[i], 0, cfg->do_udp, 2039 do_tcp, &hints, portbuf, &list, 2040 cfg->so_rcvbuf, cfg->so_sndbuf, 2041 cfg->ssl_port, cfg->tls_additional_port, 2042 cfg->https_port, cfg->proxy_protocol_port, 2043 reuseport, cfg->ip_transparent, 2044 cfg->tcp_mss, cfg->ip_freebind, 2045 cfg->http_nodelay, cfg->use_systemd, 2046 cfg->dnscrypt_port, cfg->ip_dscp, 2047 cfg->quic_port, cfg->http_notls_downstream, 2048 cfg->sock_queue_timeout)) { 2049 listening_ports_free(list); 2050 return NULL; 2051 } 2052 } 2053 } 2054 2055 return list; 2056 } 2057 2058 void listening_ports_free(struct listen_port* list) 2059 { 2060 struct listen_port* nx; 2061 while(list) { 2062 nx = list->next; 2063 if(list->fd != -1) { 2064 sock_close(list->fd); 2065 } 2066 /* rc_ports don't have ub_socket */ 2067 if(list->socket) { 2068 free(list->socket->addr); 2069 free(list->socket); 2070 } 2071 free(list); 2072 list = nx; 2073 } 2074 } 2075 2076 size_t listen_get_mem(struct listen_dnsport* listen) 2077 { 2078 struct listen_list* p; 2079 size_t s = sizeof(*listen) + sizeof(*listen->base) + 2080 sizeof(*listen->udp_buff) + 2081 sldns_buffer_capacity(listen->udp_buff); 2082 #ifdef USE_DNSCRYPT 2083 s += sizeof(*listen->dnscrypt_udp_buff); 2084 if(listen->udp_buff != listen->dnscrypt_udp_buff){ 2085 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff); 2086 } 2087 #endif 2088 for(p = listen->cps; p; p = p->next) { 2089 s += sizeof(*p); 2090 s += comm_point_get_mem(p->com); 2091 } 2092 return s; 2093 } 2094 2095 void listen_stop_accept(struct listen_dnsport* listen) 2096 { 2097 /* do not stop the ones that have no tcp_free list 2098 * (they have already stopped listening) */ 2099 struct listen_list* p; 2100 for(p=listen->cps; p; p=p->next) { 2101 if(p->com->type == comm_tcp_accept && 2102 p->com->tcp_free != NULL) { 2103 comm_point_stop_listening(p->com); 2104 } 2105 } 2106 } 2107 2108 void listen_start_accept(struct listen_dnsport* listen) 2109 { 2110 /* do not start the ones that have no tcp_free list, it is no 2111 * use to listen to them because they have no free tcp handlers */ 2112 struct listen_list* p; 2113 for(p=listen->cps; p; p=p->next) { 2114 if(p->com->type == comm_tcp_accept && 2115 p->com->tcp_free != NULL) { 2116 comm_point_start_listening(p->com, -1, -1); 2117 } 2118 } 2119 } 2120 2121 struct tcp_req_info* 2122 tcp_req_info_create(struct sldns_buffer* spoolbuf) 2123 { 2124 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req)); 2125 if(!req) { 2126 log_err("malloc failure for new stream outoforder processing structure"); 2127 return NULL; 2128 } 2129 memset(req, 0, sizeof(*req)); 2130 req->spool_buffer = spoolbuf; 2131 return req; 2132 } 2133 2134 void 2135 tcp_req_info_delete(struct tcp_req_info* req) 2136 { 2137 if(!req) return; 2138 tcp_req_info_clear(req); 2139 /* cp is pointer back to commpoint that owns this struct and 2140 * called delete on us */ 2141 /* spool_buffer is shared udp buffer, not deleted here */ 2142 free(req); 2143 } 2144 2145 void tcp_req_info_clear(struct tcp_req_info* req) 2146 { 2147 struct tcp_req_open_item* open, *nopen; 2148 struct tcp_req_done_item* item, *nitem; 2149 if(!req) return; 2150 2151 /* free outstanding request mesh reply entries */ 2152 open = req->open_req_list; 2153 while(open) { 2154 nopen = open->next; 2155 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp); 2156 free(open); 2157 open = nopen; 2158 } 2159 req->open_req_list = NULL; 2160 req->num_open_req = 0; 2161 2162 /* free pending writable result packets */ 2163 item = req->done_req_list; 2164 while(item) { 2165 nitem = item->next; 2166 lock_basic_lock(&stream_wait_count_lock); 2167 stream_wait_count -= (sizeof(struct tcp_req_done_item) 2168 +item->len); 2169 lock_basic_unlock(&stream_wait_count_lock); 2170 free(item->buf); 2171 free(item); 2172 item = nitem; 2173 } 2174 req->done_req_list = NULL; 2175 req->num_done_req = 0; 2176 req->read_is_closed = 0; 2177 } 2178 2179 void 2180 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m) 2181 { 2182 struct tcp_req_open_item* open, *prev = NULL; 2183 if(!req || !m) return; 2184 open = req->open_req_list; 2185 while(open) { 2186 if(open->mesh_state == m) { 2187 struct tcp_req_open_item* next; 2188 if(prev) prev->next = open->next; 2189 else req->open_req_list = open->next; 2190 /* caller has to manage the mesh state reply entry */ 2191 next = open->next; 2192 free(open); 2193 req->num_open_req --; 2194 2195 /* prev = prev; */ 2196 open = next; 2197 continue; 2198 } 2199 prev = open; 2200 open = open->next; 2201 } 2202 } 2203 2204 /** setup listening for read or write */ 2205 static void 2206 tcp_req_info_setup_listen(struct tcp_req_info* req) 2207 { 2208 int wr = 0; 2209 int rd = 0; 2210 2211 if(req->cp->tcp_byte_count != 0) { 2212 /* cannot change, halfway through */ 2213 return; 2214 } 2215 2216 if(!req->cp->tcp_is_reading) 2217 wr = 1; 2218 if(!req->read_is_closed) 2219 rd = 1; 2220 2221 if(wr) { 2222 req->cp->tcp_is_reading = 0; 2223 comm_point_stop_listening(req->cp); 2224 comm_point_start_listening(req->cp, -1, 2225 adjusted_tcp_timeout(req->cp)); 2226 } else if(rd) { 2227 req->cp->tcp_is_reading = 1; 2228 comm_point_stop_listening(req->cp); 2229 comm_point_start_listening(req->cp, -1, 2230 adjusted_tcp_timeout(req->cp)); 2231 /* and also read it (from SSL stack buffers), so 2232 * no event read event is expected since the remainder of 2233 * the TLS frame is sitting in the buffers. */ 2234 req->read_again = 1; 2235 } else { 2236 comm_point_stop_listening(req->cp); 2237 comm_point_start_listening(req->cp, -1, 2238 adjusted_tcp_timeout(req->cp)); 2239 comm_point_listen_for_rw(req->cp, 0, 0); 2240 } 2241 } 2242 2243 /** remove first item from list of pending results */ 2244 static struct tcp_req_done_item* 2245 tcp_req_info_pop_done(struct tcp_req_info* req) 2246 { 2247 struct tcp_req_done_item* item; 2248 log_assert(req->num_done_req > 0 && req->done_req_list); 2249 item = req->done_req_list; 2250 lock_basic_lock(&stream_wait_count_lock); 2251 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len); 2252 lock_basic_unlock(&stream_wait_count_lock); 2253 req->done_req_list = req->done_req_list->next; 2254 req->num_done_req --; 2255 return item; 2256 } 2257 2258 /** Send given buffer and setup to write */ 2259 static void 2260 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf, 2261 size_t len) 2262 { 2263 sldns_buffer_clear(req->cp->buffer); 2264 sldns_buffer_write(req->cp->buffer, buf, len); 2265 sldns_buffer_flip(req->cp->buffer); 2266 2267 req->cp->tcp_is_reading = 0; /* we are now writing */ 2268 } 2269 2270 /** pick up the next result and start writing it to the channel */ 2271 static void 2272 tcp_req_pickup_next_result(struct tcp_req_info* req) 2273 { 2274 if(req->num_done_req > 0) { 2275 /* unlist the done item from the list of pending results */ 2276 struct tcp_req_done_item* item = tcp_req_info_pop_done(req); 2277 tcp_req_info_start_write_buf(req, item->buf, item->len); 2278 free(item->buf); 2279 free(item); 2280 } 2281 } 2282 2283 /** the read channel has closed */ 2284 int 2285 tcp_req_info_handle_read_close(struct tcp_req_info* req) 2286 { 2287 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd); 2288 /* reset byte count for (potential) partial read */ 2289 req->cp->tcp_byte_count = 0; 2290 /* if we still have results to write, pick up next and write it */ 2291 if(req->num_done_req != 0) { 2292 tcp_req_pickup_next_result(req); 2293 tcp_req_info_setup_listen(req); 2294 return 1; 2295 } 2296 /* if nothing to do, this closes the connection */ 2297 if(req->num_open_req == 0 && req->num_done_req == 0) 2298 return 0; 2299 /* otherwise, we must be waiting for dns resolve, wait with timeout */ 2300 req->read_is_closed = 1; 2301 tcp_req_info_setup_listen(req); 2302 return 1; 2303 } 2304 2305 void 2306 tcp_req_info_handle_writedone(struct tcp_req_info* req) 2307 { 2308 /* back to reading state, we finished this write event */ 2309 sldns_buffer_clear(req->cp->buffer); 2310 if(req->num_done_req == 0 && req->read_is_closed) { 2311 /* no more to write and nothing to read, close it */ 2312 comm_point_drop_reply(&req->cp->repinfo); 2313 return; 2314 } 2315 req->cp->tcp_is_reading = 1; 2316 /* see if another result needs writing */ 2317 tcp_req_pickup_next_result(req); 2318 2319 /* see if there is more to write, if not stop_listening for writing */ 2320 /* see if new requests are allowed, if so, start_listening 2321 * for reading */ 2322 tcp_req_info_setup_listen(req); 2323 } 2324 2325 void 2326 tcp_req_info_handle_readdone(struct tcp_req_info* req) 2327 { 2328 struct comm_point* c = req->cp; 2329 2330 /* we want to read up several requests, unless there are 2331 * pending answers */ 2332 2333 req->is_drop = 0; 2334 req->is_reply = 0; 2335 req->in_worker_handle = 1; 2336 sldns_buffer_set_limit(req->spool_buffer, 0); 2337 /* handle the current request */ 2338 /* this calls the worker handle request routine that could give 2339 * a cache response, or localdata response, or drop the reply, 2340 * or schedule a mesh entry for later */ 2341 fptr_ok(fptr_whitelist_comm_point(c->callback)); 2342 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) { 2343 req->in_worker_handle = 0; 2344 /* there is an answer, put it up. It is already in the 2345 * c->buffer, just send it. */ 2346 /* since we were just reading a query, the channel is 2347 * clear to write to */ 2348 send_it: 2349 c->tcp_is_reading = 0; 2350 comm_point_stop_listening(c); 2351 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c)); 2352 return; 2353 } 2354 req->in_worker_handle = 0; 2355 /* it should be waiting in the mesh for recursion. 2356 * If mesh failed to add a new entry and called commpoint_drop_reply. 2357 * Then the mesh state has been cleared. */ 2358 if(req->is_drop) { 2359 /* the reply has been dropped, stream has been closed. */ 2360 return; 2361 } 2362 /* If mesh failed(mallocfail) and called commpoint_send_reply with 2363 * something like servfail then we pick up that reply below. */ 2364 if(req->is_reply) { 2365 goto send_it; 2366 } 2367 2368 sldns_buffer_clear(c->buffer); 2369 /* if pending answers, pick up an answer and start sending it */ 2370 tcp_req_pickup_next_result(req); 2371 2372 /* if answers pending, start sending answers */ 2373 /* read more requests if we can have more requests */ 2374 tcp_req_info_setup_listen(req); 2375 } 2376 2377 int 2378 tcp_req_info_add_meshstate(struct tcp_req_info* req, 2379 struct mesh_area* mesh, struct mesh_state* m) 2380 { 2381 struct tcp_req_open_item* item; 2382 log_assert(req && mesh && m); 2383 item = (struct tcp_req_open_item*)malloc(sizeof(*item)); 2384 if(!item) return 0; 2385 item->next = req->open_req_list; 2386 item->mesh = mesh; 2387 item->mesh_state = m; 2388 req->open_req_list = item; 2389 req->num_open_req++; 2390 return 1; 2391 } 2392 2393 /** Add a result to the result list. At the end. */ 2394 static int 2395 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) 2396 { 2397 struct tcp_req_done_item* last = NULL; 2398 struct tcp_req_done_item* item; 2399 size_t space; 2400 2401 /* see if we have space */ 2402 space = sizeof(struct tcp_req_done_item) + len; 2403 lock_basic_lock(&stream_wait_count_lock); 2404 if(stream_wait_count + space > stream_wait_max) { 2405 lock_basic_unlock(&stream_wait_count_lock); 2406 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size"); 2407 return 0; 2408 } 2409 stream_wait_count += space; 2410 lock_basic_unlock(&stream_wait_count_lock); 2411 2412 /* find last element */ 2413 last = req->done_req_list; 2414 while(last && last->next) 2415 last = last->next; 2416 2417 /* create new element */ 2418 item = (struct tcp_req_done_item*)malloc(sizeof(*item)); 2419 if(!item) { 2420 log_err("malloc failure, for stream result list"); 2421 return 0; 2422 } 2423 item->next = NULL; 2424 item->len = len; 2425 item->buf = memdup(buf, len); 2426 if(!item->buf) { 2427 free(item); 2428 log_err("malloc failure, adding reply to stream result list"); 2429 return 0; 2430 } 2431 2432 /* link in */ 2433 if(last) last->next = item; 2434 else req->done_req_list = item; 2435 req->num_done_req++; 2436 return 1; 2437 } 2438 2439 void 2440 tcp_req_info_send_reply(struct tcp_req_info* req) 2441 { 2442 if(req->in_worker_handle) { 2443 /* reply from mesh is in the spool_buffer */ 2444 /* copy now, so that the spool buffer is free for other tasks 2445 * before the callback is done */ 2446 sldns_buffer_clear(req->cp->buffer); 2447 sldns_buffer_write(req->cp->buffer, 2448 sldns_buffer_begin(req->spool_buffer), 2449 sldns_buffer_limit(req->spool_buffer)); 2450 sldns_buffer_flip(req->cp->buffer); 2451 req->is_reply = 1; 2452 return; 2453 } 2454 /* now that the query has been handled, that mesh_reply entry 2455 * should be removed, from the tcp_req_info list, 2456 * the mesh state cleanup removes then with region_cleanup and 2457 * replies_sent true. */ 2458 /* see if we can send it straight away (we are not doing 2459 * anything else). If so, copy to buffer and start */ 2460 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) { 2461 /* buffer is free, and was ready to read new query into, 2462 * but we are now going to use it to send this answer */ 2463 tcp_req_info_start_write_buf(req, 2464 sldns_buffer_begin(req->spool_buffer), 2465 sldns_buffer_limit(req->spool_buffer)); 2466 /* switch to listen to write events */ 2467 comm_point_stop_listening(req->cp); 2468 comm_point_start_listening(req->cp, -1, 2469 adjusted_tcp_timeout(req->cp)); 2470 return; 2471 } 2472 /* queue up the answer behind the others already pending */ 2473 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer), 2474 sldns_buffer_limit(req->spool_buffer))) { 2475 /* drop the connection, we are out of resources */ 2476 comm_point_drop_reply(&req->cp->repinfo); 2477 } 2478 } 2479 2480 size_t tcp_req_info_get_stream_buffer_size(void) 2481 { 2482 size_t s; 2483 if(!stream_wait_lock_inited) 2484 return stream_wait_count; 2485 lock_basic_lock(&stream_wait_count_lock); 2486 s = stream_wait_count; 2487 lock_basic_unlock(&stream_wait_count_lock); 2488 return s; 2489 } 2490 2491 size_t http2_get_query_buffer_size(void) 2492 { 2493 size_t s; 2494 if(!http2_query_buffer_lock_inited) 2495 return http2_query_buffer_count; 2496 lock_basic_lock(&http2_query_buffer_count_lock); 2497 s = http2_query_buffer_count; 2498 lock_basic_unlock(&http2_query_buffer_count_lock); 2499 return s; 2500 } 2501 2502 size_t http2_get_response_buffer_size(void) 2503 { 2504 size_t s; 2505 if(!http2_response_buffer_lock_inited) 2506 return http2_response_buffer_count; 2507 lock_basic_lock(&http2_response_buffer_count_lock); 2508 s = http2_response_buffer_count; 2509 lock_basic_unlock(&http2_response_buffer_count_lock); 2510 return s; 2511 } 2512 2513 #ifdef HAVE_NGHTTP2 2514 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */ 2515 static ssize_t http2_submit_response_read_callback( 2516 nghttp2_session* ATTR_UNUSED(session), 2517 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2518 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2519 { 2520 struct http2_stream* h2_stream; 2521 struct http2_session* h2_session = source->ptr; 2522 size_t copylen = length; 2523 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2524 h2_session->session, stream_id))) { 2525 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2526 "stream"); 2527 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2528 } 2529 if(!h2_stream->rbuffer || 2530 sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2531 verbose(VERB_QUERY, "http2: cannot submit buffer. No data " 2532 "available in rbuffer"); 2533 /* rbuffer will be free'd in frame close cb */ 2534 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2535 } 2536 2537 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer)) 2538 copylen = sldns_buffer_remaining(h2_stream->rbuffer); 2539 if(copylen > SSIZE_MAX) 2540 copylen = SSIZE_MAX; /* will probably never happen */ 2541 2542 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen); 2543 sldns_buffer_skip(h2_stream->rbuffer, copylen); 2544 2545 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) { 2546 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2547 lock_basic_lock(&http2_response_buffer_count_lock); 2548 http2_response_buffer_count -= 2549 sldns_buffer_capacity(h2_stream->rbuffer); 2550 lock_basic_unlock(&http2_response_buffer_count_lock); 2551 sldns_buffer_free(h2_stream->rbuffer); 2552 h2_stream->rbuffer = NULL; 2553 } 2554 2555 return copylen; 2556 } 2557 2558 /** 2559 * Send RST_STREAM frame for stream. 2560 * @param h2_session: http2 session to submit frame to 2561 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM 2562 * @return 0 on error, 1 otherwise 2563 */ 2564 static int http2_submit_rst_stream(struct http2_session* h2_session, 2565 struct http2_stream* h2_stream) 2566 { 2567 int ret = nghttp2_submit_rst_stream(h2_session->session, 2568 NGHTTP2_FLAG_NONE, h2_stream->stream_id, 2569 NGHTTP2_INTERNAL_ERROR); 2570 if(ret) { 2571 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, " 2572 "error: %s", nghttp2_strerror(ret)); 2573 return 0; 2574 } 2575 return 1; 2576 } 2577 2578 /** 2579 * DNS response ready to be submitted to nghttp2, to be prepared for sending 2580 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer 2581 * might be used before this will be sent out. 2582 * @param h2_session: http2 session, containing c->buffer which contains answer 2583 * @return 0 on error, 1 otherwise 2584 */ 2585 int http2_submit_dns_response(struct http2_session* h2_session) 2586 { 2587 int ret; 2588 nghttp2_data_provider data_prd; 2589 char status[4]; 2590 nghttp2_nv headers[3]; 2591 struct http2_stream* h2_stream = h2_session->c->h2_stream; 2592 size_t rlen; 2593 char rlen_str[32]; 2594 2595 if(h2_stream->rbuffer) { 2596 log_err("http2 submit response error: rbuffer already " 2597 "exists"); 2598 return 0; 2599 } 2600 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) { 2601 log_err("http2 submit response error: c->buffer not complete"); 2602 return 0; 2603 } 2604 2605 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2606 verbose(VERB_QUERY, "http2: submit response error: " 2607 "invalid status"); 2608 return 0; 2609 } 2610 2611 rlen = sldns_buffer_remaining(h2_session->c->buffer); 2612 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen); 2613 2614 lock_basic_lock(&http2_response_buffer_count_lock); 2615 if(http2_response_buffer_count + rlen > http2_response_buffer_max) { 2616 lock_basic_unlock(&http2_response_buffer_count_lock); 2617 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2618 "in https-response-buffer-size"); 2619 return http2_submit_rst_stream(h2_session, h2_stream); 2620 } 2621 http2_response_buffer_count += rlen; 2622 lock_basic_unlock(&http2_response_buffer_count_lock); 2623 2624 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) { 2625 lock_basic_lock(&http2_response_buffer_count_lock); 2626 http2_response_buffer_count -= rlen; 2627 lock_basic_unlock(&http2_response_buffer_count_lock); 2628 log_err("http2 submit response error: malloc failure"); 2629 return 0; 2630 } 2631 2632 headers[0].name = (uint8_t*)":status"; 2633 headers[0].namelen = 7; 2634 headers[0].value = (uint8_t*)status; 2635 headers[0].valuelen = 3; 2636 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2637 2638 headers[1].name = (uint8_t*)"content-type"; 2639 headers[1].namelen = 12; 2640 headers[1].value = (uint8_t*)"application/dns-message"; 2641 headers[1].valuelen = 23; 2642 headers[1].flags = NGHTTP2_NV_FLAG_NONE; 2643 2644 headers[2].name = (uint8_t*)"content-length"; 2645 headers[2].namelen = 14; 2646 headers[2].value = (uint8_t*)rlen_str; 2647 headers[2].valuelen = strlen(rlen_str); 2648 headers[2].flags = NGHTTP2_NV_FLAG_NONE; 2649 2650 sldns_buffer_write(h2_stream->rbuffer, 2651 sldns_buffer_current(h2_session->c->buffer), 2652 sldns_buffer_remaining(h2_session->c->buffer)); 2653 sldns_buffer_flip(h2_stream->rbuffer); 2654 2655 data_prd.source.ptr = h2_session; 2656 data_prd.read_callback = http2_submit_response_read_callback; 2657 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2658 headers, 3, &data_prd); 2659 if(ret) { 2660 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2661 "error: %s", nghttp2_strerror(ret)); 2662 return 0; 2663 } 2664 return 1; 2665 } 2666 #else 2667 int http2_submit_dns_response(void* ATTR_UNUSED(v)) 2668 { 2669 return 0; 2670 } 2671 #endif 2672 2673 #ifdef HAVE_NGHTTP2 2674 /** HTTP status to descriptive string */ 2675 static char* http_status_to_str(enum http_status s) 2676 { 2677 switch(s) { 2678 case HTTP_STATUS_OK: 2679 return "OK"; 2680 case HTTP_STATUS_BAD_REQUEST: 2681 return "Bad Request"; 2682 case HTTP_STATUS_NOT_FOUND: 2683 return "Not Found"; 2684 case HTTP_STATUS_PAYLOAD_TOO_LARGE: 2685 return "Payload Too Large"; 2686 case HTTP_STATUS_URI_TOO_LONG: 2687 return "URI Too Long"; 2688 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE: 2689 return "Unsupported Media Type"; 2690 case HTTP_STATUS_NOT_IMPLEMENTED: 2691 return "Not Implemented"; 2692 } 2693 return "Status Unknown"; 2694 } 2695 2696 /** nghttp2 callback. Used to copy error message to nghttp2 session */ 2697 static ssize_t http2_submit_error_read_callback( 2698 nghttp2_session* ATTR_UNUSED(session), 2699 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags, 2700 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg)) 2701 { 2702 struct http2_stream* h2_stream; 2703 struct http2_session* h2_session = source->ptr; 2704 char* msg; 2705 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2706 h2_session->session, stream_id))) { 2707 verbose(VERB_QUERY, "http2: cannot get stream data, closing " 2708 "stream"); 2709 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE; 2710 } 2711 *data_flags |= NGHTTP2_DATA_FLAG_EOF; 2712 msg = http_status_to_str(h2_stream->status); 2713 if(length < strlen(msg)) 2714 return 0; /* not worth trying over multiple frames */ 2715 memcpy(buf, msg, strlen(msg)); 2716 return strlen(msg); 2717 2718 } 2719 2720 /** 2721 * HTTP error response ready to be submitted to nghttp2, to be prepared for 2722 * sending out. Message body will contain descriptive string for HTTP status. 2723 * @param h2_session: http2 session to submit to 2724 * @param h2_stream: http2 stream containing HTTP status to use for error 2725 * @return 0 on error, 1 otherwise 2726 */ 2727 static int http2_submit_error(struct http2_session* h2_session, 2728 struct http2_stream* h2_stream) 2729 { 2730 int ret; 2731 char status[4]; 2732 nghttp2_data_provider data_prd; 2733 nghttp2_nv headers[1]; /* will be copied by nghttp */ 2734 if(snprintf(status, 4, "%d", h2_stream->status) != 3) { 2735 verbose(VERB_QUERY, "http2: submit error failed, " 2736 "invalid status"); 2737 return 0; 2738 } 2739 headers[0].name = (uint8_t*)":status"; 2740 headers[0].namelen = 7; 2741 headers[0].value = (uint8_t*)status; 2742 headers[0].valuelen = 3; 2743 headers[0].flags = NGHTTP2_NV_FLAG_NONE; 2744 2745 data_prd.source.ptr = h2_session; 2746 data_prd.read_callback = http2_submit_error_read_callback; 2747 2748 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id, 2749 headers, 1, &data_prd); 2750 if(ret) { 2751 verbose(VERB_QUERY, "http2: submit error failed, " 2752 "error: %s", nghttp2_strerror(ret)); 2753 return 0; 2754 } 2755 return 1; 2756 } 2757 2758 /** 2759 * Start query handling. Query is stored in the stream, and will be free'd here. 2760 * @param h2_session: http2 session, containing comm point 2761 * @param h2_stream: stream containing buffered query 2762 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no 2763 * reply available (yet). 2764 */ 2765 static int http2_query_read_done(struct http2_session* h2_session, 2766 struct http2_stream* h2_stream) 2767 { 2768 log_assert(h2_stream->qbuffer); 2769 2770 if(h2_session->c->h2_stream) { 2771 verbose(VERB_ALGO, "http2_query_read_done failure: shared " 2772 "buffer already assigned to stream"); 2773 return -1; 2774 } 2775 2776 /* the c->buffer might be used by mesh_send_reply and no be cleard 2777 * need to be cleared before use */ 2778 sldns_buffer_clear(h2_session->c->buffer); 2779 if(sldns_buffer_remaining(h2_session->c->buffer) < 2780 sldns_buffer_remaining(h2_stream->qbuffer)) { 2781 /* qbuffer will be free'd in frame close cb */ 2782 sldns_buffer_clear(h2_session->c->buffer); 2783 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit " 2784 "qbuffer in c->buffer"); 2785 return -1; 2786 } 2787 2788 sldns_buffer_write(h2_session->c->buffer, 2789 sldns_buffer_current(h2_stream->qbuffer), 2790 sldns_buffer_remaining(h2_stream->qbuffer)); 2791 2792 lock_basic_lock(&http2_query_buffer_count_lock); 2793 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer); 2794 lock_basic_unlock(&http2_query_buffer_count_lock); 2795 sldns_buffer_free(h2_stream->qbuffer); 2796 h2_stream->qbuffer = NULL; 2797 2798 sldns_buffer_flip(h2_session->c->buffer); 2799 h2_session->c->h2_stream = h2_stream; 2800 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback)); 2801 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg, 2802 NETEVENT_NOERROR, &h2_session->c->repinfo)) { 2803 return 1; /* answer in c->buffer */ 2804 } 2805 sldns_buffer_clear(h2_session->c->buffer); 2806 h2_session->c->h2_stream = NULL; 2807 return 0; /* mesh state added, or dropped */ 2808 } 2809 2810 /** nghttp2 callback. Used to check if the received frame indicates the end of a 2811 * stream. Gather collected request data and start query handling. */ 2812 static int http2_req_frame_recv_cb(nghttp2_session* session, 2813 const nghttp2_frame* frame, void* cb_arg) 2814 { 2815 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2816 struct http2_stream* h2_stream; 2817 int query_read_done; 2818 2819 if((frame->hd.type != NGHTTP2_DATA && 2820 frame->hd.type != NGHTTP2_HEADERS) || 2821 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) { 2822 return 0; 2823 } 2824 2825 if(!(h2_stream = nghttp2_session_get_stream_user_data( 2826 session, frame->hd.stream_id))) 2827 return 0; 2828 2829 if(h2_stream->invalid_endpoint) { 2830 h2_stream->status = HTTP_STATUS_NOT_FOUND; 2831 goto submit_http_error; 2832 } 2833 2834 if(h2_stream->invalid_content_type) { 2835 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE; 2836 goto submit_http_error; 2837 } 2838 2839 if(h2_stream->http_method != HTTP_METHOD_GET && 2840 h2_stream->http_method != HTTP_METHOD_POST) { 2841 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED; 2842 goto submit_http_error; 2843 } 2844 2845 if(h2_stream->query_too_large) { 2846 if(h2_stream->http_method == HTTP_METHOD_POST) 2847 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE; 2848 else 2849 h2_stream->status = HTTP_STATUS_URI_TOO_LONG; 2850 goto submit_http_error; 2851 } 2852 2853 if(!h2_stream->qbuffer) { 2854 h2_stream->status = HTTP_STATUS_BAD_REQUEST; 2855 goto submit_http_error; 2856 } 2857 2858 if(h2_stream->status) { 2859 submit_http_error: 2860 verbose(VERB_QUERY, "http2 request invalid, returning :status=" 2861 "%d", h2_stream->status); 2862 if(!http2_submit_error(h2_session, h2_stream)) { 2863 return NGHTTP2_ERR_CALLBACK_FAILURE; 2864 } 2865 return 0; 2866 } 2867 h2_stream->status = HTTP_STATUS_OK; 2868 2869 sldns_buffer_flip(h2_stream->qbuffer); 2870 h2_session->postpone_drop = 1; 2871 query_read_done = http2_query_read_done(h2_session, h2_stream); 2872 if(query_read_done < 0) 2873 return NGHTTP2_ERR_CALLBACK_FAILURE; 2874 else if(!query_read_done) { 2875 if(h2_session->is_drop) { 2876 /* connection needs to be closed. Return failure to make 2877 * sure no other action are taken anymore on comm point. 2878 * failure will result in reclaiming (and closing) 2879 * of comm point. */ 2880 verbose(VERB_QUERY, "http2 query dropped in worker cb"); 2881 h2_session->postpone_drop = 0; 2882 return NGHTTP2_ERR_CALLBACK_FAILURE; 2883 } 2884 /* nothing to submit right now, query added to mesh. */ 2885 h2_session->postpone_drop = 0; 2886 return 0; 2887 } 2888 if(!http2_submit_dns_response(h2_session)) { 2889 sldns_buffer_clear(h2_session->c->buffer); 2890 h2_session->c->h2_stream = NULL; 2891 return NGHTTP2_ERR_CALLBACK_FAILURE; 2892 } 2893 verbose(VERB_QUERY, "http2 query submitted to session"); 2894 sldns_buffer_clear(h2_session->c->buffer); 2895 h2_session->c->h2_stream = NULL; 2896 return 0; 2897 } 2898 2899 /** nghttp2 callback. Used to detect start of new streams. */ 2900 static int http2_req_begin_headers_cb(nghttp2_session* session, 2901 const nghttp2_frame* frame, void* cb_arg) 2902 { 2903 struct http2_session* h2_session = (struct http2_session*)cb_arg; 2904 struct http2_stream* h2_stream; 2905 int ret; 2906 if(frame->hd.type != NGHTTP2_HEADERS || 2907 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 2908 /* only interested in request headers */ 2909 return 0; 2910 } 2911 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { 2912 log_err("malloc failure while creating http2 stream"); 2913 return NGHTTP2_ERR_CALLBACK_FAILURE; 2914 } 2915 http2_session_add_stream(h2_session, h2_stream); 2916 ret = nghttp2_session_set_stream_user_data(session, 2917 frame->hd.stream_id, h2_stream); 2918 if(ret) { 2919 /* stream does not exist */ 2920 verbose(VERB_QUERY, "http2: set_stream_user_data failed, " 2921 "error: %s", nghttp2_strerror(ret)); 2922 return NGHTTP2_ERR_CALLBACK_FAILURE; 2923 } 2924 2925 return 0; 2926 } 2927 2928 /** 2929 * base64url decode, store in qbuffer 2930 * @param h2_session: http2 session 2931 * @param h2_stream: http2 stream 2932 * @param start: start of the base64 string 2933 * @param length: length of the base64 string 2934 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer, 2935 * buffer will be NULL is unparseble. 2936 */ 2937 static int http2_buffer_uri_query(struct http2_session* h2_session, 2938 struct http2_stream* h2_stream, const uint8_t* start, size_t length) 2939 { 2940 size_t expectb64len; 2941 int b64len; 2942 if(h2_stream->http_method == HTTP_METHOD_POST) 2943 return 1; 2944 if(length == 0) 2945 return 1; 2946 if(h2_stream->qbuffer) { 2947 verbose(VERB_ALGO, "http2_req_header fail, " 2948 "qbuffer already set"); 2949 return 0; 2950 } 2951 2952 /* calculate size, might be a bit bigger than the real 2953 * decoded buffer size */ 2954 expectb64len = sldns_b64_pton_calculate_size(length); 2955 log_assert(expectb64len > 0); 2956 if(expectb64len > 2957 h2_session->c->http2_stream_max_qbuffer_size) { 2958 h2_stream->query_too_large = 1; 2959 return 1; 2960 } 2961 2962 lock_basic_lock(&http2_query_buffer_count_lock); 2963 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) { 2964 lock_basic_unlock(&http2_query_buffer_count_lock); 2965 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 2966 "in http2-query-buffer-size"); 2967 return http2_submit_rst_stream(h2_session, h2_stream); 2968 } 2969 http2_query_buffer_count += expectb64len; 2970 lock_basic_unlock(&http2_query_buffer_count_lock); 2971 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) { 2972 lock_basic_lock(&http2_query_buffer_count_lock); 2973 http2_query_buffer_count -= expectb64len; 2974 lock_basic_unlock(&http2_query_buffer_count_lock); 2975 log_err("http2_req_header fail, qbuffer " 2976 "malloc failure"); 2977 return 0; 2978 } 2979 2980 if(sldns_b64_contains_nonurl((char const*)start, length)) { 2981 char buf[65536+4]; 2982 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); 2983 /* copy to the scratch buffer temporarily to terminate the 2984 * string with a zero */ 2985 if(length+1 > sizeof(buf)) { 2986 /* too long */ 2987 lock_basic_lock(&http2_query_buffer_count_lock); 2988 http2_query_buffer_count -= expectb64len; 2989 lock_basic_unlock(&http2_query_buffer_count_lock); 2990 sldns_buffer_free(h2_stream->qbuffer); 2991 h2_stream->qbuffer = NULL; 2992 return 1; 2993 } 2994 memmove(buf, start, length); 2995 buf[length] = 0; 2996 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( 2997 h2_stream->qbuffer), expectb64len)) || b64len < 0) { 2998 lock_basic_lock(&http2_query_buffer_count_lock); 2999 http2_query_buffer_count -= expectb64len; 3000 lock_basic_unlock(&http2_query_buffer_count_lock); 3001 sldns_buffer_free(h2_stream->qbuffer); 3002 h2_stream->qbuffer = NULL; 3003 return 1; 3004 } 3005 } else { 3006 if(!(b64len = sldns_b64url_pton( 3007 (char const *)start, length, 3008 sldns_buffer_current(h2_stream->qbuffer), 3009 expectb64len)) || b64len < 0) { 3010 lock_basic_lock(&http2_query_buffer_count_lock); 3011 http2_query_buffer_count -= expectb64len; 3012 lock_basic_unlock(&http2_query_buffer_count_lock); 3013 sldns_buffer_free(h2_stream->qbuffer); 3014 h2_stream->qbuffer = NULL; 3015 /* return without error, method can be an 3016 * unknown POST */ 3017 return 1; 3018 } 3019 } 3020 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); 3021 return 1; 3022 } 3023 3024 /** nghttp2 callback. Used to parse headers from HEADER frames. */ 3025 static int http2_req_header_cb(nghttp2_session* session, 3026 const nghttp2_frame* frame, const uint8_t* name, size_t namelen, 3027 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags), 3028 void* cb_arg) 3029 { 3030 struct http2_stream* h2_stream = NULL; 3031 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3032 /* nghttp2 deals with CONTINUATION frames and provides them as part of 3033 * the HEADER */ 3034 if(frame->hd.type != NGHTTP2_HEADERS || 3035 frame->headers.cat != NGHTTP2_HCAT_REQUEST) { 3036 /* only interested in request headers */ 3037 return 0; 3038 } 3039 if(!(h2_stream = nghttp2_session_get_stream_user_data(session, 3040 frame->hd.stream_id))) 3041 return 0; 3042 3043 /* earlier checks already indicate we can stop handling this query */ 3044 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED || 3045 h2_stream->invalid_content_type || 3046 h2_stream->invalid_endpoint) 3047 return 0; 3048 3049 3050 /* nghttp2 performs some sanity checks in the headers, including: 3051 * name and value are guaranteed to be null terminated 3052 * name is guaranteed to be lowercase 3053 * content-length value is guaranteed to contain digits 3054 */ 3055 3056 if(!h2_stream->http_method && namelen == 7 && 3057 memcmp(":method", name, namelen) == 0) { 3058 /* Case insensitive check on :method value to be on the safe 3059 * side. I failed to find text about case sensitivity in specs. 3060 */ 3061 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0) 3062 h2_stream->http_method = HTTP_METHOD_GET; 3063 else if(valuelen == 4 && 3064 strcasecmp("POST", (const char*)value) == 0) { 3065 h2_stream->http_method = HTTP_METHOD_POST; 3066 if(h2_stream->qbuffer) { 3067 /* POST method uses query from DATA frames */ 3068 lock_basic_lock(&http2_query_buffer_count_lock); 3069 http2_query_buffer_count -= 3070 sldns_buffer_capacity(h2_stream->qbuffer); 3071 lock_basic_unlock(&http2_query_buffer_count_lock); 3072 sldns_buffer_free(h2_stream->qbuffer); 3073 h2_stream->qbuffer = NULL; 3074 } 3075 } else 3076 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED; 3077 return 0; 3078 } 3079 if(namelen == 5 && memcmp(":path", name, namelen) == 0) { 3080 /* :path may contain DNS query, depending on method. Method might 3081 * not be known yet here, so check after finishing receiving 3082 * stream. */ 3083 #define HTTP_QUERY_PARAM "?dns=" 3084 size_t el = strlen(h2_session->c->http_endpoint); 3085 size_t qpl = strlen(HTTP_QUERY_PARAM); 3086 3087 if(valuelen < el || memcmp(h2_session->c->http_endpoint, 3088 value, el) != 0) { 3089 h2_stream->invalid_endpoint = 1; 3090 return 0; 3091 } 3092 /* larger than endpoint only allowed if it is for the query 3093 * parameter */ 3094 if(valuelen <= el+qpl || 3095 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) { 3096 if(valuelen != el) 3097 h2_stream->invalid_endpoint = 1; 3098 return 0; 3099 } 3100 3101 if(!http2_buffer_uri_query(h2_session, h2_stream, 3102 value+(el+qpl), valuelen-(el+qpl))) { 3103 return NGHTTP2_ERR_CALLBACK_FAILURE; 3104 } 3105 return 0; 3106 } 3107 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST, 3108 * and not needed when using GET. Don't enfore. 3109 * If set only allow lowercase "application/dns-message". 3110 * 3111 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST 3112 * be able to handle "application/dns-message". Since that is the only 3113 * content-type supported we can ignore the accept header. 3114 */ 3115 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) { 3116 if(valuelen != 23 || memcmp("application/dns-message", value, 3117 valuelen) != 0) { 3118 h2_stream->invalid_content_type = 1; 3119 } 3120 } 3121 3122 /* Only interested in content-lentg for POST (on not yet known) method. 3123 */ 3124 if((!h2_stream->http_method || 3125 h2_stream->http_method == HTTP_METHOD_POST) && 3126 !h2_stream->content_length && namelen == 14 && 3127 memcmp("content-length", name, namelen) == 0) { 3128 if(valuelen > 5) { 3129 h2_stream->query_too_large = 1; 3130 return 0; 3131 } 3132 /* guaranteed to only contain digits and be null terminated */ 3133 h2_stream->content_length = atoi((const char*)value); 3134 if(h2_stream->content_length > 3135 h2_session->c->http2_stream_max_qbuffer_size) { 3136 h2_stream->query_too_large = 1; 3137 return 0; 3138 } 3139 } 3140 return 0; 3141 } 3142 3143 /** nghttp2 callback. Used to get data from DATA frames, which can contain 3144 * queries in POST requests. */ 3145 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), 3146 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data, 3147 size_t len, void* cb_arg) 3148 { 3149 struct http2_session* h2_session = (struct http2_session*)cb_arg; 3150 struct http2_stream* h2_stream; 3151 size_t qlen = 0; 3152 3153 if(!(h2_stream = nghttp2_session_get_stream_user_data( 3154 h2_session->session, stream_id))) { 3155 return 0; 3156 } 3157 3158 if(h2_stream->query_too_large) 3159 return 0; 3160 3161 if(!h2_stream->qbuffer) { 3162 if(h2_stream->content_length) { 3163 if(h2_stream->content_length < len) 3164 /* getting more data in DATA frame than 3165 * advertised in content-length header. */ 3166 return NGHTTP2_ERR_CALLBACK_FAILURE; 3167 qlen = h2_stream->content_length; 3168 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) { 3169 /* setting this to msg-buffer-size can result in a lot 3170 * of memory consuption. Most queries should fit in a 3171 * single DATA frame, and most POST queries will 3172 * contain content-length which does not impose this 3173 * limit. */ 3174 qlen = len; 3175 } 3176 } 3177 if(!h2_stream->qbuffer && qlen) { 3178 lock_basic_lock(&http2_query_buffer_count_lock); 3179 if(http2_query_buffer_count + qlen > http2_query_buffer_max) { 3180 lock_basic_unlock(&http2_query_buffer_count_lock); 3181 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, " 3182 "in http2-query-buffer-size"); 3183 return http2_submit_rst_stream(h2_session, h2_stream); 3184 } 3185 http2_query_buffer_count += qlen; 3186 lock_basic_unlock(&http2_query_buffer_count_lock); 3187 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) { 3188 lock_basic_lock(&http2_query_buffer_count_lock); 3189 http2_query_buffer_count -= qlen; 3190 lock_basic_unlock(&http2_query_buffer_count_lock); 3191 } 3192 } 3193 3194 if(!h2_stream->qbuffer || 3195 sldns_buffer_remaining(h2_stream->qbuffer) < len) { 3196 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough " 3197 "buffer space for POST query. Can happen on multi " 3198 "frame requests without content-length header"); 3199 h2_stream->query_too_large = 1; 3200 return 0; 3201 } 3202 3203 sldns_buffer_write(h2_stream->qbuffer, data, len); 3204 3205 return 0; 3206 } 3207 3208 void http2_req_stream_clear(struct http2_stream* h2_stream) 3209 { 3210 if(h2_stream->qbuffer) { 3211 lock_basic_lock(&http2_query_buffer_count_lock); 3212 http2_query_buffer_count -= 3213 sldns_buffer_capacity(h2_stream->qbuffer); 3214 lock_basic_unlock(&http2_query_buffer_count_lock); 3215 sldns_buffer_free(h2_stream->qbuffer); 3216 h2_stream->qbuffer = NULL; 3217 } 3218 if(h2_stream->rbuffer) { 3219 lock_basic_lock(&http2_response_buffer_count_lock); 3220 http2_response_buffer_count -= 3221 sldns_buffer_capacity(h2_stream->rbuffer); 3222 lock_basic_unlock(&http2_response_buffer_count_lock); 3223 sldns_buffer_free(h2_stream->rbuffer); 3224 h2_stream->rbuffer = NULL; 3225 } 3226 } 3227 3228 nghttp2_session_callbacks* http2_req_callbacks_create(void) 3229 { 3230 nghttp2_session_callbacks *callbacks; 3231 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) { 3232 log_err("failed to initialize nghttp2 callback"); 3233 return NULL; 3234 } 3235 /* reception of header block started, used to create h2_stream */ 3236 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, 3237 http2_req_begin_headers_cb); 3238 /* complete frame received, used to get data from stream if frame 3239 * has end stream flag, and start processing query */ 3240 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, 3241 http2_req_frame_recv_cb); 3242 /* get request info from headers */ 3243 nghttp2_session_callbacks_set_on_header_callback(callbacks, 3244 http2_req_header_cb); 3245 /* get data from DATA frames, containing POST query */ 3246 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, 3247 http2_req_data_chunk_recv_cb); 3248 3249 /* generic HTTP2 callbacks */ 3250 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb); 3251 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb); 3252 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, 3253 http2_stream_close_cb); 3254 3255 return callbacks; 3256 } 3257 #endif /* HAVE_NGHTTP2 */ 3258 3259 #ifdef HAVE_NGTCP2 3260 struct doq_table* 3261 doq_table_create(struct config_file* cfg, struct ub_randstate* rnd) 3262 { 3263 struct doq_table* table = calloc(1, sizeof(*table)); 3264 if(!table) 3265 return NULL; 3266 table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)* 3267 NGTCP2_MILLISECONDS; 3268 table->sv_scidlen = 16; 3269 table->static_secret_len = 16; 3270 table->static_secret = malloc(table->static_secret_len); 3271 if(!table->static_secret) { 3272 free(table); 3273 return NULL; 3274 } 3275 doq_fill_rand(rnd, table->static_secret, table->static_secret_len); 3276 table->conn_tree = rbtree_create(doq_conn_cmp); 3277 if(!table->conn_tree) { 3278 free(table->static_secret); 3279 free(table); 3280 return NULL; 3281 } 3282 table->conid_tree = rbtree_create(doq_conid_cmp); 3283 if(!table->conid_tree) { 3284 free(table->static_secret); 3285 free(table->conn_tree); 3286 free(table); 3287 return NULL; 3288 } 3289 table->timer_tree = rbtree_create(doq_timer_cmp); 3290 if(!table->timer_tree) { 3291 free(table->static_secret); 3292 free(table->conn_tree); 3293 free(table->conid_tree); 3294 free(table); 3295 return NULL; 3296 } 3297 lock_rw_init(&table->lock); 3298 lock_rw_init(&table->conid_lock); 3299 lock_basic_init(&table->size_lock); 3300 lock_protect(&table->lock, &table->static_secret, 3301 sizeof(table->static_secret)); 3302 lock_protect(&table->lock, &table->static_secret_len, 3303 sizeof(table->static_secret_len)); 3304 lock_protect(&table->lock, table->static_secret, 3305 table->static_secret_len); 3306 lock_protect(&table->lock, &table->sv_scidlen, 3307 sizeof(table->sv_scidlen)); 3308 lock_protect(&table->lock, &table->idle_timeout, 3309 sizeof(table->idle_timeout)); 3310 lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree)); 3311 lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree)); 3312 lock_protect(&table->conid_lock, table->conid_tree, 3313 sizeof(*table->conid_tree)); 3314 lock_protect(&table->lock, table->timer_tree, 3315 sizeof(*table->timer_tree)); 3316 lock_protect(&table->size_lock, &table->current_size, 3317 sizeof(table->current_size)); 3318 return table; 3319 } 3320 3321 /** delete elements from the connection tree */ 3322 static void 3323 conn_tree_del(rbnode_type* node, void* arg) 3324 { 3325 struct doq_table* table = (struct doq_table*)arg; 3326 struct doq_conn* conn; 3327 if(!node) 3328 return; 3329 conn = (struct doq_conn*)node->key; 3330 if(conn->timer.timer_in_list) { 3331 /* Remove timer from list first, because finding the rbnode 3332 * element of the setlist of same timeouts needs tree lookup. 3333 * Edit the tree structure after that lookup. */ 3334 doq_timer_list_remove(conn->table, &conn->timer); 3335 } 3336 if(conn->timer.timer_in_tree) 3337 doq_timer_tree_remove(conn->table, &conn->timer); 3338 doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen); 3339 doq_conn_delete(conn, table); 3340 } 3341 3342 /** delete elements from the connection id tree */ 3343 static void 3344 conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg)) 3345 { 3346 if(!node) 3347 return; 3348 doq_conid_delete((struct doq_conid*)node->key); 3349 } 3350 3351 void 3352 doq_table_delete(struct doq_table* table) 3353 { 3354 if(!table) 3355 return; 3356 lock_rw_destroy(&table->lock); 3357 free(table->static_secret); 3358 if(table->conn_tree) { 3359 traverse_postorder(table->conn_tree, conn_tree_del, table); 3360 free(table->conn_tree); 3361 } 3362 lock_rw_destroy(&table->conid_lock); 3363 if(table->conid_tree) { 3364 /* The tree should be empty, because the doq_conn_delete calls 3365 * above should have also removed their conid elements. */ 3366 traverse_postorder(table->conid_tree, conid_tree_del, NULL); 3367 free(table->conid_tree); 3368 } 3369 lock_basic_destroy(&table->size_lock); 3370 if(table->timer_tree) { 3371 /* The tree should be empty, because the conn_tree_del calls 3372 * above should also have removed them. Also the doq_timer 3373 * is part of the doq_conn struct, so is already freed. */ 3374 free(table->timer_tree); 3375 } 3376 table->write_list_first = NULL; 3377 table->write_list_last = NULL; 3378 free(table); 3379 } 3380 3381 struct doq_timer* 3382 doq_timer_find_time(struct doq_table* table, struct timeval* tv) 3383 { 3384 struct doq_timer key; 3385 struct rbnode_type* node; 3386 memset(&key, 0, sizeof(key)); 3387 key.time.tv_sec = tv->tv_sec; 3388 key.time.tv_usec = tv->tv_usec; 3389 node = rbtree_search(table->timer_tree, &key); 3390 if(node) 3391 return (struct doq_timer*)node->key; 3392 return NULL; 3393 } 3394 3395 void 3396 doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer) 3397 { 3398 if(!timer->timer_in_tree) 3399 return; 3400 rbtree_delete(table->timer_tree, timer); 3401 timer->timer_in_tree = 0; 3402 /* This item could have more timers in the same set. */ 3403 if(timer->setlist_first) { 3404 struct doq_timer* rb_timer = timer->setlist_first; 3405 /* del first element from setlist */ 3406 if(rb_timer->setlist_next) 3407 rb_timer->setlist_next->setlist_prev = NULL; 3408 else 3409 timer->setlist_last = NULL; 3410 timer->setlist_first = rb_timer->setlist_next; 3411 rb_timer->setlist_prev = NULL; 3412 rb_timer->setlist_next = NULL; 3413 rb_timer->timer_in_list = 0; 3414 /* insert it into the tree as new rb element */ 3415 memset(&rb_timer->node, 0, sizeof(rb_timer->node)); 3416 rb_timer->node.key = rb_timer; 3417 rbtree_insert(table->timer_tree, &rb_timer->node); 3418 rb_timer->timer_in_tree = 1; 3419 /* the setlist, if any remainder, moves to the rb element */ 3420 rb_timer->setlist_first = timer->setlist_first; 3421 rb_timer->setlist_last = timer->setlist_last; 3422 timer->setlist_first = NULL; 3423 timer->setlist_last = NULL; 3424 rb_timer->worker_doq_socket = timer->worker_doq_socket; 3425 } 3426 timer->worker_doq_socket = NULL; 3427 } 3428 3429 void 3430 doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer) 3431 { 3432 struct doq_timer* rb_timer; 3433 if(!timer->timer_in_list) 3434 return; 3435 /* The item in the rbtree has the list start and end. */ 3436 rb_timer = doq_timer_find_time(table, &timer->time); 3437 if(rb_timer) { 3438 if(timer->setlist_prev) 3439 timer->setlist_prev->setlist_next = timer->setlist_next; 3440 else 3441 rb_timer->setlist_first = timer->setlist_next; 3442 if(timer->setlist_next) 3443 timer->setlist_next->setlist_prev = timer->setlist_prev; 3444 else 3445 rb_timer->setlist_last = timer->setlist_prev; 3446 timer->setlist_prev = NULL; 3447 timer->setlist_next = NULL; 3448 } 3449 timer->timer_in_list = 0; 3450 } 3451 3452 /** doq append timer to setlist */ 3453 static void 3454 doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer) 3455 { 3456 log_assert(timer->timer_in_list == 0); 3457 timer->timer_in_list = 1; 3458 timer->setlist_next = NULL; 3459 timer->setlist_prev = rb_timer->setlist_last; 3460 if(rb_timer->setlist_last) 3461 rb_timer->setlist_last->setlist_next = timer; 3462 else 3463 rb_timer->setlist_first = timer; 3464 rb_timer->setlist_last = timer; 3465 } 3466 3467 void 3468 doq_timer_unset(struct doq_table* table, struct doq_timer* timer) 3469 { 3470 if(timer->timer_in_list) { 3471 /* Remove timer from list first, because finding the rbnode 3472 * element of the setlist of same timeouts needs tree lookup. 3473 * Edit the tree structure after that lookup. */ 3474 doq_timer_list_remove(table, timer); 3475 } 3476 if(timer->timer_in_tree) 3477 doq_timer_tree_remove(table, timer); 3478 timer->worker_doq_socket = NULL; 3479 } 3480 3481 void doq_timer_set(struct doq_table* table, struct doq_timer* timer, 3482 struct doq_server_socket* worker_doq_socket, struct timeval* tv) 3483 { 3484 struct doq_timer* rb_timer; 3485 if(verbosity >= VERB_ALGO && timer->conn) { 3486 char a[256]; 3487 struct timeval rel; 3488 addr_to_str((void*)&timer->conn->key.paddr.addr, 3489 timer->conn->key.paddr.addrlen, a, sizeof(a)); 3490 timeval_subtract(&rel, tv, worker_doq_socket->now_tv); 3491 verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d", 3492 a, (int)tv->tv_sec, (int)tv->tv_usec, 3493 (int)rel.tv_sec, (int)rel.tv_usec); 3494 } 3495 if(timer->timer_in_tree || timer->timer_in_list) { 3496 if(timer->time.tv_sec == tv->tv_sec && 3497 timer->time.tv_usec == tv->tv_usec) 3498 return; /* already set on that time */ 3499 doq_timer_unset(table, timer); 3500 } 3501 timer->time.tv_sec = tv->tv_sec; 3502 timer->time.tv_usec = tv->tv_usec; 3503 rb_timer = doq_timer_find_time(table, tv); 3504 if(rb_timer) { 3505 /* There is a timeout already with this value. Timer is 3506 * added to the setlist. */ 3507 doq_timer_list_append(rb_timer, timer); 3508 } else { 3509 /* There is no timeout with this value. Make timer a new 3510 * tree element. */ 3511 memset(&timer->node, 0, sizeof(timer->node)); 3512 timer->node.key = timer; 3513 rbtree_insert(table->timer_tree, &timer->node); 3514 timer->timer_in_tree = 1; 3515 timer->setlist_first = NULL; 3516 timer->setlist_last = NULL; 3517 timer->worker_doq_socket = worker_doq_socket; 3518 } 3519 } 3520 3521 struct doq_conn* 3522 doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr, 3523 const uint8_t* dcid, size_t dcidlen, uint32_t version) 3524 { 3525 struct doq_conn* conn = calloc(1, sizeof(*conn)); 3526 if(!conn) 3527 return NULL; 3528 conn->node.key = conn; 3529 conn->doq_socket = c->doq_socket; 3530 conn->table = c->doq_socket->table; 3531 memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen); 3532 conn->key.paddr.addrlen = paddr->addrlen; 3533 memmove(&conn->key.paddr.localaddr, &paddr->localaddr, 3534 paddr->localaddrlen); 3535 conn->key.paddr.localaddrlen = paddr->localaddrlen; 3536 conn->key.paddr.ifindex = paddr->ifindex; 3537 conn->key.dcid = memdup((void*)dcid, dcidlen); 3538 if(!conn->key.dcid) { 3539 free(conn); 3540 return NULL; 3541 } 3542 conn->key.dcidlen = dcidlen; 3543 conn->version = version; 3544 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3545 ngtcp2_ccerr_default(&conn->ccerr); 3546 #else 3547 ngtcp2_connection_close_error_default(&conn->last_error); 3548 #endif 3549 rbtree_init(&conn->stream_tree, &doq_stream_cmp); 3550 conn->timer.conn = conn; 3551 lock_basic_init(&conn->lock); 3552 lock_protect(&conn->lock, &conn->key, sizeof(conn->key)); 3553 lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket)); 3554 lock_protect(&conn->lock, &conn->table, sizeof(conn->table)); 3555 lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted)); 3556 lock_protect(&conn->lock, &conn->version, sizeof(conn->version)); 3557 lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn)); 3558 lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list)); 3559 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 3560 lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr)); 3561 #else 3562 lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error)); 3563 #endif 3564 lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert)); 3565 lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl)); 3566 lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt)); 3567 lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len)); 3568 lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn)); 3569 lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree)); 3570 lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first)); 3571 lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last)); 3572 lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest)); 3573 lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list)); 3574 lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev)); 3575 lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next)); 3576 return conn; 3577 } 3578 3579 /** delete stream tree node */ 3580 static void 3581 stream_tree_del(rbnode_type* node, void* arg) 3582 { 3583 struct doq_table* table = (struct doq_table*)arg; 3584 struct doq_stream* stream; 3585 if(!node) 3586 return; 3587 stream = (struct doq_stream*)node; 3588 if(stream->in) 3589 doq_table_quic_size_subtract(table, stream->inlen); 3590 if(stream->out) 3591 doq_table_quic_size_subtract(table, stream->outlen); 3592 doq_table_quic_size_subtract(table, sizeof(*stream)); 3593 doq_stream_delete(stream); 3594 } 3595 3596 void 3597 doq_conn_delete(struct doq_conn* conn, struct doq_table* table) 3598 { 3599 if(!conn) 3600 return; 3601 lock_basic_destroy(&conn->lock); 3602 lock_rw_wrlock(&conn->table->conid_lock); 3603 doq_conn_clear_conids(conn); 3604 lock_rw_unlock(&conn->table->conid_lock); 3605 ngtcp2_conn_del(conn->conn); 3606 if(conn->stream_tree.count != 0) { 3607 traverse_postorder(&conn->stream_tree, stream_tree_del, table); 3608 } 3609 free(conn->key.dcid); 3610 SSL_free(conn->ssl); 3611 free(conn->close_pkt); 3612 free(conn); 3613 } 3614 3615 int 3616 doq_conn_cmp(const void* key1, const void* key2) 3617 { 3618 struct doq_conn* c = (struct doq_conn*)key1; 3619 struct doq_conn* d = (struct doq_conn*)key2; 3620 int r; 3621 /* Compared in the order destination address, then 3622 * local address, ifindex and then dcid. 3623 * So that for a search for findlessorequal for the destination 3624 * address will find connections to that address, with different 3625 * dcids. 3626 * Also a printout in sorted order prints the connections by IP 3627 * address of destination, and then a number of them depending on the 3628 * dcids. */ 3629 if(c->key.paddr.addrlen != d->key.paddr.addrlen) { 3630 if(c->key.paddr.addrlen < d->key.paddr.addrlen) 3631 return -1; 3632 return 1; 3633 } 3634 if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr, 3635 c->key.paddr.addrlen))!=0) 3636 return r; 3637 if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) { 3638 if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen) 3639 return -1; 3640 return 1; 3641 } 3642 if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr, 3643 c->key.paddr.localaddrlen))!=0) 3644 return r; 3645 if(c->key.paddr.ifindex != d->key.paddr.ifindex) { 3646 if(c->key.paddr.ifindex < d->key.paddr.ifindex) 3647 return -1; 3648 return 1; 3649 } 3650 if(c->key.dcidlen != d->key.dcidlen) { 3651 if(c->key.dcidlen < d->key.dcidlen) 3652 return -1; 3653 return 1; 3654 } 3655 if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0) 3656 return r; 3657 return 0; 3658 } 3659 3660 int doq_conid_cmp(const void* key1, const void* key2) 3661 { 3662 struct doq_conid* c = (struct doq_conid*)key1; 3663 struct doq_conid* d = (struct doq_conid*)key2; 3664 if(c->cidlen != d->cidlen) { 3665 if(c->cidlen < d->cidlen) 3666 return -1; 3667 return 1; 3668 } 3669 return memcmp(c->cid, d->cid, c->cidlen); 3670 } 3671 3672 int doq_timer_cmp(const void* key1, const void* key2) 3673 { 3674 struct doq_timer* e = (struct doq_timer*)key1; 3675 struct doq_timer* f = (struct doq_timer*)key2; 3676 if(e->time.tv_sec < f->time.tv_sec) 3677 return -1; 3678 if(e->time.tv_sec > f->time.tv_sec) 3679 return 1; 3680 if(e->time.tv_usec < f->time.tv_usec) 3681 return -1; 3682 if(e->time.tv_usec > f->time.tv_usec) 3683 return 1; 3684 return 0; 3685 } 3686 3687 int doq_stream_cmp(const void* key1, const void* key2) 3688 { 3689 struct doq_stream* c = (struct doq_stream*)key1; 3690 struct doq_stream* d = (struct doq_stream*)key2; 3691 if(c->stream_id != d->stream_id) { 3692 if(c->stream_id < d->stream_id) 3693 return -1; 3694 return 1; 3695 } 3696 return 0; 3697 } 3698 3699 /** doq store a local address in repinfo */ 3700 static void 3701 doq_repinfo_store_localaddr(struct comm_reply* repinfo, 3702 struct doq_addr_storage* localaddr, socklen_t localaddrlen) 3703 { 3704 /* use the pktinfo that we have for ancillary udp data otherwise, 3705 * this saves space for a sockaddr */ 3706 memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo)); 3707 if(addr_is_ip6((void*)localaddr, localaddrlen)) { 3708 #ifdef IPV6_PKTINFO 3709 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3710 memmove(&repinfo->pktinfo.v6info.ipi6_addr, 3711 &sa6->sin6_addr, sizeof(struct in6_addr)); 3712 repinfo->doq_srcport = sa6->sin6_port; 3713 #endif 3714 repinfo->srctype = 6; 3715 } else { 3716 #ifdef IP_PKTINFO 3717 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3718 memmove(&repinfo->pktinfo.v4info.ipi_addr, 3719 &sa->sin_addr, sizeof(struct in_addr)); 3720 repinfo->doq_srcport = sa->sin_port; 3721 #elif defined(IP_RECVDSTADDR) 3722 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3723 memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr, 3724 sizeof(struct in_addr)); 3725 repinfo->doq_srcport = sa->sin_port; 3726 #endif 3727 repinfo->srctype = 4; 3728 } 3729 } 3730 3731 /** doq retrieve localaddr from repinfo */ 3732 static void 3733 doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo, 3734 struct doq_addr_storage* localaddr, socklen_t* localaddrlen) 3735 { 3736 if(repinfo->srctype == 6) { 3737 #ifdef IPV6_PKTINFO 3738 struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; 3739 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6); 3740 memset(sa6, 0, *localaddrlen); 3741 sa6->sin6_family = AF_INET6; 3742 memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr, 3743 *localaddrlen); 3744 sa6->sin6_port = repinfo->doq_srcport; 3745 #endif 3746 } else { 3747 #ifdef IP_PKTINFO 3748 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3749 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3750 memset(sa, 0, *localaddrlen); 3751 sa->sin_family = AF_INET; 3752 memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr, 3753 *localaddrlen); 3754 sa->sin_port = repinfo->doq_srcport; 3755 #elif defined(IP_RECVDSTADDR) 3756 struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; 3757 *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); 3758 memset(sa, 0, *localaddrlen); 3759 sa->sin_family = AF_INET; 3760 memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr, 3761 sizeof(struct in_addr)); 3762 sa->sin_port = repinfo->doq_srcport; 3763 #endif 3764 } 3765 } 3766 3767 /** doq write a connection key into repinfo, false if it does not fit */ 3768 static int 3769 doq_conn_key_store_repinfo(struct doq_conn_key* key, 3770 struct comm_reply* repinfo) 3771 { 3772 repinfo->is_proxied = 0; 3773 repinfo->doq_ifindex = key->paddr.ifindex; 3774 repinfo->remote_addrlen = key->paddr.addrlen; 3775 memmove(&repinfo->remote_addr, &key->paddr.addr, 3776 repinfo->remote_addrlen); 3777 repinfo->client_addrlen = key->paddr.addrlen; 3778 memmove(&repinfo->client_addr, &key->paddr.addr, 3779 repinfo->client_addrlen); 3780 doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr, 3781 key->paddr.localaddrlen); 3782 if(key->dcidlen > sizeof(repinfo->doq_dcid)) 3783 return 0; 3784 repinfo->doq_dcidlen = key->dcidlen; 3785 memmove(repinfo->doq_dcid, key->dcid, key->dcidlen); 3786 return 1; 3787 } 3788 3789 void 3790 doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo) 3791 { 3792 key->paddr.ifindex = repinfo->doq_ifindex; 3793 key->paddr.addrlen = repinfo->remote_addrlen; 3794 memmove(&key->paddr.addr, &repinfo->remote_addr, 3795 repinfo->remote_addrlen); 3796 doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr, 3797 &key->paddr.localaddrlen); 3798 key->dcidlen = repinfo->doq_dcidlen; 3799 key->dcid = repinfo->doq_dcid; 3800 } 3801 3802 /** doq add a stream to the connection */ 3803 static void 3804 doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream) 3805 { 3806 (void)rbtree_insert(&conn->stream_tree, &stream->node); 3807 } 3808 3809 /** doq delete a stream from the connection */ 3810 static void 3811 doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream) 3812 { 3813 (void)rbtree_delete(&conn->stream_tree, &stream->node); 3814 } 3815 3816 /** doq create new stream */ 3817 static struct doq_stream* 3818 doq_stream_create(int64_t stream_id) 3819 { 3820 struct doq_stream* stream = calloc(1, sizeof(*stream)); 3821 if(!stream) 3822 return NULL; 3823 stream->node.key = stream; 3824 stream->stream_id = stream_id; 3825 return stream; 3826 } 3827 3828 void doq_stream_delete(struct doq_stream* stream) 3829 { 3830 if(!stream) 3831 return; 3832 free(stream->in); 3833 free(stream->out); 3834 free(stream); 3835 } 3836 3837 struct doq_stream* 3838 doq_stream_find(struct doq_conn* conn, int64_t stream_id) 3839 { 3840 rbnode_type* node; 3841 struct doq_stream key; 3842 key.node.key = &key; 3843 key.stream_id = stream_id; 3844 node = rbtree_search(&conn->stream_tree, &key); 3845 if(node) 3846 return (struct doq_stream*)node->key; 3847 return NULL; 3848 } 3849 3850 /** doq put stream on the conn write list */ 3851 static void 3852 doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream) 3853 { 3854 if(stream->on_write_list) 3855 return; 3856 stream->write_prev = conn->stream_write_last; 3857 if(conn->stream_write_last) 3858 conn->stream_write_last->write_next = stream; 3859 else 3860 conn->stream_write_first = stream; 3861 conn->stream_write_last = stream; 3862 stream->write_next = NULL; 3863 stream->on_write_list = 1; 3864 } 3865 3866 /** doq remove stream from the conn write list */ 3867 static void 3868 doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream) 3869 { 3870 if(!stream->on_write_list) 3871 return; 3872 if(stream->write_next) 3873 stream->write_next->write_prev = stream->write_prev; 3874 else conn->stream_write_last = stream->write_prev; 3875 if(stream->write_prev) 3876 stream->write_prev->write_next = stream->write_next; 3877 else conn->stream_write_first = stream->write_next; 3878 stream->write_prev = NULL; 3879 stream->write_next = NULL; 3880 stream->on_write_list = 0; 3881 } 3882 3883 /** doq stream remove in buffer */ 3884 static void 3885 doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table) 3886 { 3887 if(stream->in) { 3888 doq_table_quic_size_subtract(table, stream->inlen); 3889 free(stream->in); 3890 stream->in = NULL; 3891 stream->inlen = 0; 3892 } 3893 } 3894 3895 /** doq stream remove out buffer */ 3896 static void 3897 doq_stream_remove_out_buffer(struct doq_stream* stream, 3898 struct doq_table* table) 3899 { 3900 if(stream->out) { 3901 doq_table_quic_size_subtract(table, stream->outlen); 3902 free(stream->out); 3903 stream->out = NULL; 3904 stream->outlen = 0; 3905 } 3906 } 3907 3908 int 3909 doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, 3910 int send_shutdown) 3911 { 3912 int ret; 3913 if(stream->is_closed) 3914 return 1; 3915 stream->is_closed = 1; 3916 doq_stream_off_write_list(conn, stream); 3917 if(send_shutdown) { 3918 verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d", 3919 (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE); 3920 ret = ngtcp2_conn_shutdown_stream(conn->conn, 3921 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 3922 0, 3923 #endif 3924 stream->stream_id, DOQ_APP_ERROR_CODE); 3925 if(ret != 0) { 3926 log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s", 3927 (int)stream->stream_id, ngtcp2_strerror(ret)); 3928 return 0; 3929 } 3930 doq_conn_write_enable(conn); 3931 } 3932 verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1"); 3933 ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1); 3934 doq_conn_write_enable(conn); 3935 doq_stream_remove_in_buffer(stream, conn->doq_socket->table); 3936 doq_stream_remove_out_buffer(stream, conn->doq_socket->table); 3937 doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream)); 3938 doq_conn_del_stream(conn, stream); 3939 doq_stream_delete(stream); 3940 return 1; 3941 } 3942 3943 /** doq stream pick up answer data from buffer */ 3944 static int 3945 doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf) 3946 { 3947 stream->is_answer_available = 1; 3948 if(stream->out) { 3949 free(stream->out); 3950 stream->out = NULL; 3951 stream->outlen = 0; 3952 } 3953 stream->nwrite = 0; 3954 stream->outlen = sldns_buffer_limit(buf); 3955 /* For quic the output bytes have to stay allocated and available, 3956 * for potential resends, until the remote end has acknowledged them. 3957 * This includes the tcplen start uint16_t, in outlen_wire. */ 3958 stream->outlen_wire = htons(stream->outlen); 3959 stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf)); 3960 if(!stream->out) { 3961 log_err("doq could not send answer: out of memory"); 3962 return 0; 3963 } 3964 return 1; 3965 } 3966 3967 int 3968 doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, 3969 struct sldns_buffer* buf) 3970 { 3971 if(verbosity >= VERB_ALGO) { 3972 char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf), 3973 sldns_buffer_limit(buf)); 3974 verbose(VERB_ALGO, "doq stream %d response\n%s", 3975 (int)stream->stream_id, (s?s:"null")); 3976 free(s); 3977 } 3978 if(stream->out) 3979 doq_table_quic_size_subtract(conn->doq_socket->table, 3980 stream->outlen); 3981 if(!doq_stream_pickup_answer(stream, buf)) 3982 return 0; 3983 doq_table_quic_size_add(conn->doq_socket->table, stream->outlen); 3984 doq_stream_on_write_list(conn, stream); 3985 doq_conn_write_enable(conn); 3986 return 1; 3987 } 3988 3989 /** doq stream data length has completed, allocations can be done. False on 3990 * allocation failure. */ 3991 static int 3992 doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table) 3993 { 3994 if(stream->inlen > 1024*1024) { 3995 log_err("doq stream in length too large %d", 3996 (int)stream->inlen); 3997 return 0; 3998 } 3999 stream->in = calloc(1, stream->inlen); 4000 if(!stream->in) { 4001 log_err("doq could not read stream, calloc failed: " 4002 "out of memory"); 4003 return 0; 4004 } 4005 doq_table_quic_size_add(table, stream->inlen); 4006 return 1; 4007 } 4008 4009 /** doq stream data is complete, the input data has been received. */ 4010 static int 4011 doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream) 4012 { 4013 struct comm_point* c; 4014 if(verbosity >= VERB_ALGO) { 4015 char* s = sldns_wire2str_pkt(stream->in, stream->inlen); 4016 char a[128]; 4017 addr_to_str((void*)&conn->key.paddr.addr, 4018 conn->key.paddr.addrlen, a, sizeof(a)); 4019 verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s", 4020 a, (int)stream->stream_id, (s?s:"null")); 4021 free(s); 4022 } 4023 stream->is_query_complete = 1; 4024 c = conn->doq_socket->cp; 4025 if(!stream->in) { 4026 verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer"); 4027 return 0; 4028 } 4029 if(stream->inlen > sldns_buffer_capacity(c->buffer)) { 4030 verbose(VERB_ALGO, "doq_stream_data_complete: query too long"); 4031 return 0; 4032 } 4033 sldns_buffer_clear(c->buffer); 4034 sldns_buffer_write(c->buffer, stream->in, stream->inlen); 4035 sldns_buffer_flip(c->buffer); 4036 c->repinfo.c = c; 4037 if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) { 4038 verbose(VERB_ALGO, "doq_stream_data_complete: connection " 4039 "DCID too long"); 4040 return 0; 4041 } 4042 c->repinfo.doq_streamid = stream->stream_id; 4043 conn->doq_socket->current_conn = conn; 4044 fptr_ok(fptr_whitelist_comm_point(c->callback)); 4045 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) { 4046 conn->doq_socket->current_conn = NULL; 4047 if(!doq_stream_send_reply(conn, stream, c->buffer)) { 4048 verbose(VERB_ALGO, "doq: failed to send_reply"); 4049 return 0; 4050 } 4051 return 1; 4052 } 4053 conn->doq_socket->current_conn = NULL; 4054 return 1; 4055 } 4056 4057 /** doq receive data for a stream, more bytes of the incoming data */ 4058 static int 4059 doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data, 4060 size_t datalen, int* recv_done, struct doq_table* table) 4061 { 4062 int got_data = 0; 4063 /* read the tcplength uint16_t at the start */ 4064 if(stream->nread < 2) { 4065 uint16_t tcplen = 0; 4066 size_t todolen = 2 - stream->nread; 4067 4068 if(stream->nread > 0) { 4069 /* put in the already read byte if there is one */ 4070 tcplen = stream->inlen; 4071 } 4072 if(datalen < todolen) 4073 todolen = datalen; 4074 memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen); 4075 stream->nread += todolen; 4076 data += todolen; 4077 datalen -= todolen; 4078 if(stream->nread == 2) { 4079 /* the initial length value is completed */ 4080 stream->inlen = ntohs(tcplen); 4081 if(!doq_stream_datalen_complete(stream, table)) 4082 return 0; 4083 } else { 4084 /* store for later */ 4085 stream->inlen = tcplen; 4086 return 1; 4087 } 4088 } 4089 /* if there are more data bytes */ 4090 if(datalen > 0) { 4091 size_t to_write = datalen; 4092 if(stream->nread-2 > stream->inlen) { 4093 verbose(VERB_ALGO, "doq stream buffer too small"); 4094 return 0; 4095 } 4096 if(datalen > stream->inlen - (stream->nread-2)) 4097 to_write = stream->inlen - (stream->nread-2); 4098 if(to_write > 0) { 4099 if(!stream->in) { 4100 verbose(VERB_ALGO, "doq: stream has " 4101 "no buffer"); 4102 return 0; 4103 } 4104 memmove(stream->in+(stream->nread-2), data, to_write); 4105 stream->nread += to_write; 4106 data += to_write; 4107 datalen -= to_write; 4108 got_data = 1; 4109 } 4110 } 4111 /* Are there extra bytes received after the end? If so, log them. */ 4112 if(datalen > 0) { 4113 if(verbosity >= VERB_ALGO) 4114 log_hex("doq stream has extra bytes received after end", 4115 (void*)data, datalen); 4116 } 4117 /* Is the input data complete? */ 4118 if(got_data && stream->nread >= stream->inlen+2) { 4119 if(!stream->in) { 4120 verbose(VERB_ALGO, "doq: completed stream has " 4121 "no buffer"); 4122 return 0; 4123 } 4124 *recv_done = 1; 4125 } 4126 return 1; 4127 } 4128 4129 /** doq receive FIN for a stream. No more bytes are going to arrive. */ 4130 static int 4131 doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int 4132 recv_done) 4133 { 4134 if(!stream->is_query_complete && !recv_done) { 4135 verbose(VERB_ALGO, "doq: stream recv FIN, but is " 4136 "not complete, have %d of %d bytes", 4137 ((int)stream->nread)-2, (int)stream->inlen); 4138 if(!doq_stream_close(conn, stream, 1)) 4139 return 0; 4140 } 4141 return 1; 4142 } 4143 4144 void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) 4145 { 4146 size_t i; 4147 for(i=0; i<len; i++) 4148 buf[i] = ub_random(rnd)&0xff; 4149 } 4150 4151 /** generate new connection id, checks for duplicates. 4152 * caller must hold lock on conid tree. */ 4153 static int 4154 doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data, 4155 size_t datalen) 4156 { 4157 int max_try = 100; 4158 int i; 4159 for(i=0; i<max_try; i++) { 4160 doq_fill_rand(conn->doq_socket->rnd, data, datalen); 4161 if(!doq_conid_find(conn->table, data, datalen)) { 4162 /* Found an unused connection id. */ 4163 return 1; 4164 } 4165 } 4166 verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not " 4167 "generate random unused connection id value in %d attempts.", 4168 max_try); 4169 return 0; 4170 } 4171 4172 /** ngtcp2 rand callback function */ 4173 static void 4174 doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx) 4175 { 4176 struct ub_randstate* rnd = (struct ub_randstate*) 4177 rand_ctx->native_handle; 4178 doq_fill_rand(rnd, dest, destlen); 4179 } 4180 4181 /** ngtcp2 get_new_connection_id callback function */ 4182 static int 4183 doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid, 4184 uint8_t* token, size_t cidlen, void* user_data) 4185 { 4186 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4187 /* Lock the conid tree, so we can check for duplicates while 4188 * generating the id, and then insert it, whilst keeping the tree 4189 * locked against other modifications, guaranteeing uniqueness. */ 4190 lock_rw_wrlock(&doq_conn->table->conid_lock); 4191 if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) { 4192 lock_rw_unlock(&doq_conn->table->conid_lock); 4193 return NGTCP2_ERR_CALLBACK_FAILURE; 4194 } 4195 cid->datalen = cidlen; 4196 if(ngtcp2_crypto_generate_stateless_reset_token(token, 4197 doq_conn->doq_socket->static_secret, 4198 doq_conn->doq_socket->static_secret_len, cid) != 0) { 4199 lock_rw_unlock(&doq_conn->table->conid_lock); 4200 return NGTCP2_ERR_CALLBACK_FAILURE; 4201 } 4202 if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) { 4203 lock_rw_unlock(&doq_conn->table->conid_lock); 4204 return NGTCP2_ERR_CALLBACK_FAILURE; 4205 } 4206 lock_rw_unlock(&doq_conn->table->conid_lock); 4207 return 0; 4208 } 4209 4210 /** ngtcp2 remove_connection_id callback function */ 4211 static int 4212 doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4213 const ngtcp2_cid* cid, void* user_data) 4214 { 4215 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4216 lock_rw_wrlock(&doq_conn->table->conid_lock); 4217 doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen); 4218 lock_rw_unlock(&doq_conn->table->conid_lock); 4219 return 0; 4220 } 4221 4222 /** doq submit a new token */ 4223 static int 4224 doq_submit_new_token(struct doq_conn* conn) 4225 { 4226 uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; 4227 ngtcp2_ssize tokenlen; 4228 int ret; 4229 const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn); 4230 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 4231 4232 tokenlen = ngtcp2_crypto_generate_regular_token(token, 4233 conn->doq_socket->static_secret, 4234 conn->doq_socket->static_secret_len, path->remote.addr, 4235 path->remote.addrlen, ts); 4236 if(tokenlen < 0) { 4237 log_err("doq ngtcp2_crypto_generate_regular_token failed"); 4238 return 1; 4239 } 4240 4241 verbose(VERB_ALGO, "doq submit new token"); 4242 ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen); 4243 if(ret != 0) { 4244 log_err("doq ngtcp2_conn_submit_new_token failed: %s", 4245 ngtcp2_strerror(ret)); 4246 return 0; 4247 } 4248 return 1; 4249 } 4250 4251 /** ngtcp2 handshake_completed callback function */ 4252 static int 4253 doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) 4254 { 4255 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4256 verbose(VERB_ALGO, "doq handshake_completed callback"); 4257 verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d", 4258 (int)ngtcp2_conn_get_max_data_left(doq_conn->conn)); 4259 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4260 verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d", 4261 (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn)); 4262 #endif 4263 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d", 4264 (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn)); 4265 verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d", 4266 (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn)); 4267 verbose(VERB_ALGO, "negotiated cipher name is %s", 4268 SSL_get_cipher_name(doq_conn->ssl)); 4269 if(verbosity > VERB_ALGO) { 4270 const unsigned char* alpn = NULL; 4271 unsigned int alpnlen = 0; 4272 char alpnstr[128]; 4273 SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen); 4274 if(alpnlen > sizeof(alpnstr)-1) 4275 alpnlen = sizeof(alpnstr)-1; 4276 memmove(alpnstr, alpn, alpnlen); 4277 alpnstr[alpnlen]=0; 4278 verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr); 4279 } 4280 4281 if(!doq_submit_new_token(doq_conn)) 4282 return -1; 4283 return 0; 4284 } 4285 4286 /** ngtcp2 stream_open callback function */ 4287 static int 4288 doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4289 void* user_data) 4290 { 4291 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4292 struct doq_stream* stream; 4293 verbose(VERB_ALGO, "doq new stream %x", (int)stream_id); 4294 if(doq_stream_find(doq_conn, stream_id)) { 4295 verbose(VERB_ALGO, "doq: stream with this id already exists"); 4296 return 0; 4297 } 4298 if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */ 4299 !doq_table_quic_size_available(doq_conn->doq_socket->table, 4300 doq_conn->doq_socket->cfg, sizeof(*stream) 4301 + 100 /* estimated query in */ 4302 + 512 /* estimated response out */ 4303 )) { 4304 int rv; 4305 verbose(VERB_ALGO, "doq: no mem for new stream"); 4306 rv = ngtcp2_conn_shutdown_stream(doq_conn->conn, 4307 #ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 4308 0, 4309 #endif 4310 stream_id, NGTCP2_CONNECTION_REFUSED); 4311 if(rv != 0) { 4312 log_err("ngtcp2_conn_shutdown_stream failed: %s", 4313 ngtcp2_strerror(rv)); 4314 return NGTCP2_ERR_CALLBACK_FAILURE; 4315 } 4316 return 0; 4317 } 4318 stream = doq_stream_create(stream_id); 4319 if(!stream) { 4320 log_err("doq: could not doq_stream_create: out of memory"); 4321 return NGTCP2_ERR_CALLBACK_FAILURE; 4322 } 4323 doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream)); 4324 doq_conn_add_stream(doq_conn, stream); 4325 return 0; 4326 } 4327 4328 /** ngtcp2 recv_stream_data callback function */ 4329 static int 4330 doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4331 int64_t stream_id, uint64_t offset, const uint8_t* data, 4332 size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data)) 4333 { 4334 int recv_done = 0; 4335 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4336 struct doq_stream* stream; 4337 verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d " 4338 "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen, 4339 ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), 4340 #ifdef NGTCP2_STREAM_DATA_FLAG_0RTT 4341 ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") 4342 #else 4343 ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") 4344 #endif 4345 ); 4346 stream = doq_stream_find(doq_conn, stream_id); 4347 if(!stream) { 4348 verbose(VERB_ALGO, "doq: received stream data for " 4349 "unknown stream %d", (int)stream_id); 4350 return 0; 4351 } 4352 if(stream->is_closed) { 4353 verbose(VERB_ALGO, "doq: stream is closed, ignore recv data"); 4354 return 0; 4355 } 4356 if(datalen != 0) { 4357 if(!doq_stream_recv_data(stream, data, datalen, &recv_done, 4358 doq_conn->doq_socket->table)) 4359 return NGTCP2_ERR_CALLBACK_FAILURE; 4360 } 4361 if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { 4362 if(!doq_stream_recv_fin(doq_conn, stream, recv_done)) 4363 return NGTCP2_ERR_CALLBACK_FAILURE; 4364 } 4365 ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id, 4366 datalen); 4367 ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen); 4368 if(recv_done) { 4369 if(!doq_stream_data_complete(doq_conn, stream)) 4370 return NGTCP2_ERR_CALLBACK_FAILURE; 4371 } 4372 return 0; 4373 } 4374 4375 /** ngtcp2 stream_close callback function */ 4376 static int 4377 doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, 4378 int64_t stream_id, uint64_t app_error_code, void* user_data, 4379 void* ATTR_UNUSED(stream_user_data)) 4380 { 4381 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4382 struct doq_stream* stream; 4383 if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0) 4384 verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d", 4385 (int)stream_id, 4386 (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)? 4387 "APP_ERROR_CODE_SET ":""), 4388 (int)app_error_code); 4389 else 4390 verbose(VERB_ALGO, "doq stream close for stream id %d", 4391 (int)stream_id); 4392 4393 stream = doq_stream_find(doq_conn, stream_id); 4394 if(!stream) { 4395 verbose(VERB_ALGO, "doq: stream close for " 4396 "unknown stream %d", (int)stream_id); 4397 return 0; 4398 } 4399 if(!doq_stream_close(doq_conn, stream, 0)) 4400 return NGTCP2_ERR_CALLBACK_FAILURE; 4401 return 0; 4402 } 4403 4404 /** ngtcp2 stream_reset callback function */ 4405 static int 4406 doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, 4407 uint64_t final_size, uint64_t app_error_code, void* user_data, 4408 void* ATTR_UNUSED(stream_user_data)) 4409 { 4410 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4411 struct doq_stream* stream; 4412 verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d " 4413 "app_error_code %d", (int)stream_id, (int)final_size, 4414 (int)app_error_code); 4415 4416 stream = doq_stream_find(doq_conn, stream_id); 4417 if(!stream) { 4418 verbose(VERB_ALGO, "doq: stream reset for " 4419 "unknown stream %d", (int)stream_id); 4420 return 0; 4421 } 4422 if(!doq_stream_close(doq_conn, stream, 0)) 4423 return NGTCP2_ERR_CALLBACK_FAILURE; 4424 return 0; 4425 } 4426 4427 /** ngtcp2 acked_stream_data_offset callback function */ 4428 static int 4429 doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn), 4430 int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data, 4431 void* ATTR_UNUSED(stream_user_data)) 4432 { 4433 struct doq_conn* doq_conn = (struct doq_conn*)user_data; 4434 struct doq_stream* stream; 4435 verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d " 4436 "datalen %d", (int)stream_id, (int)offset, (int)datalen); 4437 4438 stream = doq_stream_find(doq_conn, stream_id); 4439 if(!stream) { 4440 verbose(VERB_ALGO, "doq: stream acked data for " 4441 "unknown stream %d", (int)stream_id); 4442 return 0; 4443 } 4444 /* Acked the data from [offset .. offset+datalen). */ 4445 if(stream->is_closed) 4446 return 0; 4447 if(offset+datalen >= stream->outlen) { 4448 doq_stream_remove_in_buffer(stream, 4449 doq_conn->doq_socket->table); 4450 doq_stream_remove_out_buffer(stream, 4451 doq_conn->doq_socket->table); 4452 } 4453 return 0; 4454 } 4455 4456 /** ngtc2p log_printf callback function */ 4457 static void 4458 doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...) 4459 { 4460 char buf[1024]; 4461 va_list ap; 4462 va_start(ap, fmt); 4463 vsnprintf(buf, sizeof(buf), fmt, ap); 4464 verbose(VERB_ALGO, "libngtcp2: %s", buf); 4465 va_end(ap); 4466 } 4467 4468 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4469 /** the doq application tx key callback, false on failure */ 4470 static int 4471 doq_application_tx_key_cb(struct doq_conn* conn) 4472 { 4473 verbose(VERB_ALGO, "doq application tx key cb"); 4474 /* The server does not want to open streams to the client, 4475 * the client instead initiates by opening bidi streams. */ 4476 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d", 4477 (int)ngtcp2_conn_get_max_data_left(conn->conn)); 4478 #ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 4479 verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d", 4480 (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn)); 4481 #endif 4482 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d", 4483 (int)ngtcp2_conn_get_streams_uni_left(conn->conn)); 4484 verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d", 4485 (int)ngtcp2_conn_get_streams_bidi_left(conn->conn)); 4486 return 1; 4487 } 4488 4489 /** quic_method set_encryption_secrets function */ 4490 static int 4491 doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4492 const uint8_t *read_secret, const uint8_t *write_secret, 4493 size_t secret_len) 4494 { 4495 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4496 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4497 ngtcp2_encryption_level 4498 #else 4499 ngtcp2_crypto_level 4500 #endif 4501 level = 4502 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 4503 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4504 #else 4505 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4506 #endif 4507 4508 if(read_secret) { 4509 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level); 4510 if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn, 4511 NULL, NULL, NULL, level, read_secret, secret_len) 4512 != 0) { 4513 log_err("ngtcp2_crypto_derive_and_install_rx_key " 4514 "failed"); 4515 return 0; 4516 } 4517 } 4518 4519 if(write_secret) { 4520 verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level); 4521 if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn, 4522 NULL, NULL, NULL, level, write_secret, secret_len) 4523 != 0) { 4524 log_err("ngtcp2_crypto_derive_and_install_tx_key " 4525 "failed"); 4526 return 0; 4527 } 4528 if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { 4529 if(!doq_application_tx_key_cb(doq_conn)) 4530 return 0; 4531 } 4532 } 4533 return 1; 4534 } 4535 4536 /** quic_method add_handshake_data function */ 4537 static int 4538 doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, 4539 const uint8_t *data, size_t len) 4540 { 4541 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4542 #ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL 4543 ngtcp2_encryption_level 4544 #else 4545 ngtcp2_crypto_level 4546 #endif 4547 level = 4548 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 4549 ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); 4550 #else 4551 ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); 4552 #endif 4553 int rv; 4554 4555 verbose(VERB_ALGO, "doq_add_handshake_data: " 4556 "ngtcp2_con_submit_crypto_data level %d", (int)level); 4557 rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len); 4558 if(rv != 0) { 4559 log_err("ngtcp2_conn_submit_crypto_data failed: %s", 4560 ngtcp2_strerror(rv)); 4561 ngtcp2_conn_set_tls_error(doq_conn->conn, rv); 4562 return 0; 4563 } 4564 return 1; 4565 } 4566 4567 /** quic_method flush_flight function */ 4568 static int 4569 doq_flush_flight(SSL* ATTR_UNUSED(ssl)) 4570 { 4571 return 1; 4572 } 4573 4574 /** quic_method send_alert function */ 4575 static int 4576 doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), 4577 uint8_t alert) 4578 { 4579 struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); 4580 doq_conn->tls_alert = alert; 4581 return 1; 4582 } 4583 #endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */ 4584 4585 /** ALPN select callback for the doq SSL context */ 4586 static int 4587 doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out, 4588 unsigned char* outlen, const unsigned char* in, unsigned int inlen, 4589 void* ATTR_UNUSED(arg)) 4590 { 4591 /* select "doq" */ 4592 int ret = SSL_select_next_proto((void*)out, outlen, 4593 (const unsigned char*)"\x03""doq", 4, in, inlen); 4594 if(ret == OPENSSL_NPN_NEGOTIATED) 4595 return SSL_TLSEXT_ERR_OK; 4596 verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does " 4597 "not have 'doq'"); 4598 return SSL_TLSEXT_ERR_ALERT_FATAL; 4599 } 4600 4601 /** create new tls session for server doq connection */ 4602 static SSL_CTX* 4603 doq_ctx_server_setup(struct doq_server_socket* doq_socket) 4604 { 4605 char* sid_ctx = "unbound server"; 4606 #ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4607 SSL_QUIC_METHOD* quic_method; 4608 #endif 4609 SSL_CTX* ctx = SSL_CTX_new(TLS_server_method()); 4610 if(!ctx) { 4611 log_crypto_err("Could not SSL_CTX_new"); 4612 return NULL; 4613 } 4614 SSL_CTX_set_options(ctx, 4615 (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) | 4616 SSL_OP_SINGLE_ECDH_USE | 4617 SSL_OP_CIPHER_SERVER_PREFERENCE | 4618 SSL_OP_NO_ANTI_REPLAY); 4619 SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); 4620 SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); 4621 SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); 4622 #ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB 4623 SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL); 4624 #endif 4625 SSL_CTX_set_default_verify_paths(ctx); 4626 if(!SSL_CTX_use_certificate_chain_file(ctx, 4627 doq_socket->ssl_service_pem)) { 4628 log_err("doq: error for cert file: %s", 4629 doq_socket->ssl_service_pem); 4630 log_crypto_err("doq: error in " 4631 "SSL_CTX_use_certificate_chain_file"); 4632 SSL_CTX_free(ctx); 4633 return NULL; 4634 } 4635 if(!SSL_CTX_use_PrivateKey_file(ctx, doq_socket->ssl_service_key, 4636 SSL_FILETYPE_PEM)) { 4637 log_err("doq: error for private key file: %s", 4638 doq_socket->ssl_service_key); 4639 log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file"); 4640 SSL_CTX_free(ctx); 4641 return NULL; 4642 } 4643 if(!SSL_CTX_check_private_key(ctx)) { 4644 log_err("doq: error for key file: %s", 4645 doq_socket->ssl_service_key); 4646 log_crypto_err("doq: error in SSL_CTX_check_private_key"); 4647 SSL_CTX_free(ctx); 4648 return NULL; 4649 } 4650 SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx)); 4651 if(doq_socket->ssl_verify_pem && doq_socket->ssl_verify_pem[0]) { 4652 if(!SSL_CTX_load_verify_locations(ctx, 4653 doq_socket->ssl_verify_pem, NULL)) { 4654 log_err("doq: error for verify pem file: %s", 4655 doq_socket->ssl_verify_pem); 4656 log_crypto_err("doq: error in " 4657 "SSL_CTX_load_verify_locations"); 4658 SSL_CTX_free(ctx); 4659 return NULL; 4660 } 4661 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file( 4662 doq_socket->ssl_verify_pem)); 4663 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER| 4664 SSL_VERIFY_CLIENT_ONCE| 4665 SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); 4666 } 4667 4668 SSL_CTX_set_max_early_data(ctx, 0xffffffff); 4669 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4670 if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) { 4671 log_err("ngtcp2_crypto_quictls_configure_server_context failed"); 4672 SSL_CTX_free(ctx); 4673 return NULL; 4674 } 4675 #else 4676 /* The quic_method needs to remain valid during the SSL_CTX 4677 * lifetime, so we allocate it. It is freed with the 4678 * doq_server_socket. */ 4679 quic_method = calloc(1, sizeof(SSL_QUIC_METHOD)); 4680 if(!quic_method) { 4681 log_err("calloc failed: out of memory"); 4682 SSL_CTX_free(ctx); 4683 return NULL; 4684 } 4685 doq_socket->quic_method = quic_method; 4686 quic_method->set_encryption_secrets = doq_set_encryption_secrets; 4687 quic_method->add_handshake_data = doq_add_handshake_data; 4688 quic_method->flush_flight = doq_flush_flight; 4689 quic_method->send_alert = doq_send_alert; 4690 SSL_CTX_set_quic_method(ctx, doq_socket->quic_method); 4691 #endif 4692 return ctx; 4693 } 4694 4695 /** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */ 4696 static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) 4697 { 4698 struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data; 4699 return conn->conn; 4700 } 4701 4702 /** create new SSL session for server connection */ 4703 static SSL* 4704 doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn) 4705 { 4706 SSL* ssl = SSL_new(ctx); 4707 if(!ssl) { 4708 log_crypto_err("doq: SSL_new failed"); 4709 return NULL; 4710 } 4711 #ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 4712 conn->conn_ref.get_conn = &doq_conn_ref_get_conn; 4713 conn->conn_ref.user_data = conn; 4714 SSL_set_app_data(ssl, &conn->conn_ref); 4715 #else 4716 SSL_set_app_data(ssl, conn); 4717 #endif 4718 SSL_set_accept_state(ssl); 4719 SSL_set_quic_early_data_enabled(ssl, 1); 4720 return ssl; 4721 } 4722 4723 /** setup the doq_socket server tls context */ 4724 int 4725 doq_socket_setup_ctx(struct doq_server_socket* doq_socket) 4726 { 4727 doq_socket->ctx = doq_ctx_server_setup(doq_socket); 4728 if(!doq_socket->ctx) 4729 return 0; 4730 return 1; 4731 } 4732 4733 int 4734 doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, 4735 uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen) 4736 { 4737 int rv; 4738 struct ngtcp2_cid dcid, sv_scid, scid_cid; 4739 struct ngtcp2_path path; 4740 struct ngtcp2_callbacks callbacks; 4741 struct ngtcp2_settings settings; 4742 struct ngtcp2_transport_params params; 4743 memset(&dcid, 0, sizeof(dcid)); 4744 memset(&sv_scid, 0, sizeof(sv_scid)); 4745 memset(&scid_cid, 0, sizeof(scid_cid)); 4746 memset(&path, 0, sizeof(path)); 4747 memset(&callbacks, 0, sizeof(callbacks)); 4748 memset(&settings, 0, sizeof(settings)); 4749 memset(¶ms, 0, sizeof(params)); 4750 4751 ngtcp2_cid_init(&scid_cid, scid, scidlen); 4752 ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen); 4753 4754 path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr; 4755 path.remote.addrlen = conn->key.paddr.addrlen; 4756 path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr; 4757 path.local.addrlen = conn->key.paddr.localaddrlen; 4758 4759 callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb; 4760 callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; 4761 callbacks.encrypt = ngtcp2_crypto_encrypt_cb; 4762 callbacks.decrypt = ngtcp2_crypto_decrypt_cb; 4763 callbacks.hp_mask = ngtcp2_crypto_hp_mask; 4764 callbacks.update_key = ngtcp2_crypto_update_key_cb; 4765 callbacks.delete_crypto_aead_ctx = 4766 ngtcp2_crypto_delete_crypto_aead_ctx_cb; 4767 callbacks.delete_crypto_cipher_ctx = 4768 ngtcp2_crypto_delete_crypto_cipher_ctx_cb; 4769 callbacks.get_path_challenge_data = 4770 ngtcp2_crypto_get_path_challenge_data_cb; 4771 callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb; 4772 callbacks.rand = doq_rand_cb; 4773 callbacks.get_new_connection_id = doq_get_new_connection_id_cb; 4774 callbacks.remove_connection_id = doq_remove_connection_id_cb; 4775 callbacks.handshake_completed = doq_handshake_completed_cb; 4776 callbacks.stream_open = doq_stream_open_cb; 4777 callbacks.stream_close = doq_stream_close_cb; 4778 callbacks.stream_reset = doq_stream_reset_cb; 4779 callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb; 4780 callbacks.recv_stream_data = doq_recv_stream_data_cb; 4781 4782 ngtcp2_settings_default(&settings); 4783 if(verbosity >= VERB_ALGO) { 4784 settings.log_printf = doq_log_printf_cb; 4785 } 4786 settings.rand_ctx.native_handle = conn->doq_socket->rnd; 4787 settings.initial_ts = doq_get_timestamp_nanosec(); 4788 settings.max_stream_window = 6*1024*1024; 4789 settings.max_window = 6*1024*1024; 4790 #ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 4791 settings.token = (void*)token; 4792 settings.tokenlen = tokenlen; 4793 #else 4794 settings.token.base = (void*)token; 4795 settings.token.len = tokenlen; 4796 #endif 4797 4798 ngtcp2_transport_params_default(¶ms); 4799 params.max_idle_timeout = conn->doq_socket->idle_timeout; 4800 params.active_connection_id_limit = 7; 4801 params.initial_max_stream_data_bidi_local = 256*1024; 4802 params.initial_max_stream_data_bidi_remote = 256*1024; 4803 params.initial_max_data = 1024*1024; 4804 /* DoQ uses bidi streams, so we allow 0 uni streams. */ 4805 params.initial_max_streams_uni = 0; 4806 /* Initial max on number of bidi streams the remote end can open. 4807 * That is the number of queries it can make, at first. */ 4808 params.initial_max_streams_bidi = 10; 4809 if(ocid) { 4810 ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen); 4811 ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid, 4812 conn->key.dcidlen); 4813 params.retry_scid_present = 1; 4814 } else { 4815 ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid, 4816 conn->key.dcidlen); 4817 } 4818 #ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 4819 params.original_dcid_present = 1; 4820 #endif 4821 doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token, 4822 sizeof(params.stateless_reset_token)); 4823 sv_scid.datalen = conn->doq_socket->sv_scidlen; 4824 lock_rw_wrlock(&conn->table->conid_lock); 4825 if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) { 4826 lock_rw_unlock(&conn->table->conid_lock); 4827 return 0; 4828 } 4829 4830 rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path, 4831 conn->version, &callbacks, &settings, ¶ms, NULL, conn); 4832 if(rv != 0) { 4833 lock_rw_unlock(&conn->table->conid_lock); 4834 log_err("ngtcp2_conn_server_new failed: %s", 4835 ngtcp2_strerror(rv)); 4836 return 0; 4837 } 4838 if(!doq_conn_setup_conids(conn)) { 4839 lock_rw_unlock(&conn->table->conid_lock); 4840 log_err("doq_conn_setup_conids failed: out of memory"); 4841 return 0; 4842 } 4843 lock_rw_unlock(&conn->table->conid_lock); 4844 conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx, 4845 conn); 4846 if(!conn->ssl) { 4847 log_err("doq_ssl_server_setup failed"); 4848 return 0; 4849 } 4850 ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl); 4851 doq_conn_write_enable(conn); 4852 return 1; 4853 } 4854 4855 struct doq_conid* 4856 doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen) 4857 { 4858 struct rbnode_type* node; 4859 struct doq_conid key; 4860 key.node.key = &key; 4861 key.cid = (void*)data; 4862 key.cidlen = datalen; 4863 node = rbtree_search(table->conid_tree, &key); 4864 if(node) 4865 return (struct doq_conid*)node->key; 4866 return NULL; 4867 } 4868 4869 /** insert conid in the conid list */ 4870 static void 4871 doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid) 4872 { 4873 conid->prev = NULL; 4874 conid->next = conn->conid_list; 4875 if(conn->conid_list) 4876 conn->conid_list->prev = conid; 4877 conn->conid_list = conid; 4878 } 4879 4880 /** remove conid from the conid list */ 4881 static void 4882 doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid) 4883 { 4884 if(conid->prev) 4885 conid->prev->next = conid->next; 4886 else conn->conid_list = conid->next; 4887 if(conid->next) 4888 conid->next->prev = conid->prev; 4889 } 4890 4891 /** create a doq_conid */ 4892 static struct doq_conid* 4893 doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key) 4894 { 4895 struct doq_conid* conid; 4896 conid = calloc(1, sizeof(*conid)); 4897 if(!conid) 4898 return NULL; 4899 conid->cid = memdup(data, datalen); 4900 if(!conid->cid) { 4901 free(conid); 4902 return NULL; 4903 } 4904 conid->cidlen = datalen; 4905 conid->node.key = conid; 4906 conid->key = *key; 4907 conid->key.dcid = memdup(key->dcid, key->dcidlen); 4908 if(!conid->key.dcid) { 4909 free(conid->cid); 4910 free(conid); 4911 return NULL; 4912 } 4913 return conid; 4914 } 4915 4916 void 4917 doq_conid_delete(struct doq_conid* conid) 4918 { 4919 if(!conid) 4920 return; 4921 free(conid->key.dcid); 4922 free(conid->cid); 4923 free(conid); 4924 } 4925 4926 /** return true if the conid is for the conn. */ 4927 static int 4928 conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid) 4929 { 4930 if(conid->key.dcidlen == conn->key.dcidlen && 4931 memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0 4932 && conid->key.paddr.addrlen == conn->key.paddr.addrlen && 4933 memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr, 4934 conid->key.paddr.addrlen) == 0 && 4935 conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen && 4936 memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr, 4937 conid->key.paddr.localaddrlen) == 0 && 4938 conid->key.paddr.ifindex == conn->key.paddr.ifindex) 4939 return 1; 4940 return 0; 4941 } 4942 4943 int 4944 doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen) 4945 { 4946 struct doq_conid* conid; 4947 conid = doq_conid_find(conn->table, data, datalen); 4948 if(conid && !conid_is_for_conn(conn, conid)) { 4949 verbose(VERB_ALGO, "doq connection id already exists for " 4950 "another doq_conn. Ignoring second connection id."); 4951 /* Already exists to another conn, ignore it. 4952 * This works, in that the conid is listed in the doq_conn 4953 * conid_list element, and removed from there. So our conid 4954 * tree and list are fine, when created and removed. 4955 * The tree now does not have the lookup element pointing 4956 * to this connection. */ 4957 return 1; 4958 } 4959 if(conid) 4960 return 1; /* already inserted */ 4961 conid = doq_conid_create(data, datalen, &conn->key); 4962 if(!conid) 4963 return 0; 4964 doq_conid_list_insert(conn, conid); 4965 (void)rbtree_insert(conn->table->conid_tree, &conid->node); 4966 return 1; 4967 } 4968 4969 void 4970 doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, 4971 size_t datalen) 4972 { 4973 struct doq_conid* conid; 4974 conid = doq_conid_find(conn->table, data, datalen); 4975 if(conid && !conid_is_for_conn(conn, conid)) 4976 return; 4977 if(conid) { 4978 (void)rbtree_delete(conn->table->conid_tree, 4979 conid->node.key); 4980 doq_conid_list_remove(conn, conid); 4981 doq_conid_delete(conid); 4982 } 4983 } 4984 4985 /** associate the scid array and also the dcid. 4986 * caller must hold the locks on conn and doq_table.conid_lock. */ 4987 static int 4988 doq_conn_setup_id_array_and_dcid(struct doq_conn* conn, 4989 struct ngtcp2_cid* scids, size_t num_scid) 4990 { 4991 size_t i; 4992 for(i=0; i<num_scid; i++) { 4993 if(!doq_conn_associate_conid(conn, scids[i].data, 4994 scids[i].datalen)) 4995 return 0; 4996 } 4997 if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen)) 4998 return 0; 4999 return 1; 5000 } 5001 5002 int 5003 doq_conn_setup_conids(struct doq_conn* conn) 5004 { 5005 size_t num_scid = 5006 #ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID 5007 ngtcp2_conn_get_scid(conn->conn, NULL); 5008 #else 5009 ngtcp2_conn_get_num_scid(conn->conn); 5010 #endif 5011 if(num_scid <= 4) { 5012 struct ngtcp2_cid ids[4]; 5013 /* Usually there are not that many scids when just accepted, 5014 * like only 2. */ 5015 ngtcp2_conn_get_scid(conn->conn, ids); 5016 return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid); 5017 } else { 5018 struct ngtcp2_cid *scids = calloc(num_scid, 5019 sizeof(struct ngtcp2_cid)); 5020 if(!scids) 5021 return 0; 5022 ngtcp2_conn_get_scid(conn->conn, scids); 5023 if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) { 5024 free(scids); 5025 return 0; 5026 } 5027 free(scids); 5028 } 5029 return 1; 5030 } 5031 5032 void 5033 doq_conn_clear_conids(struct doq_conn* conn) 5034 { 5035 struct doq_conid* p, *next; 5036 if(!conn) 5037 return; 5038 p = conn->conid_list; 5039 while(p) { 5040 next = p->next; 5041 (void)rbtree_delete(conn->table->conid_tree, p->node.key); 5042 doq_conid_delete(p); 5043 p = next; 5044 } 5045 conn->conid_list = NULL; 5046 } 5047 5048 ngtcp2_tstamp doq_get_timestamp_nanosec(void) 5049 { 5050 #ifdef CLOCK_REALTIME 5051 struct timespec tp; 5052 memset(&tp, 0, sizeof(tp)); 5053 /* Get a nanosecond time, that can be compared with the event base. */ 5054 if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { 5055 log_err("clock_gettime failed: %s", strerror(errno)); 5056 } 5057 return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + 5058 ((uint64_t)tp.tv_nsec); 5059 #else 5060 struct timeval tv; 5061 if(gettimeofday(&tv, NULL) < 0) { 5062 log_err("gettimeofday failed: %s", strerror(errno)); 5063 } 5064 return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + 5065 ((uint64_t)tv.tv_usec)*((uint64_t)1000); 5066 #endif /* CLOCK_REALTIME */ 5067 } 5068 5069 /** doq start the closing period for the connection. */ 5070 static int 5071 doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn) 5072 { 5073 struct ngtcp2_path_storage ps; 5074 struct ngtcp2_pkt_info pi; 5075 ngtcp2_ssize ret; 5076 if(!conn) 5077 return 1; 5078 if( 5079 #ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 5080 ngtcp2_conn_in_closing_period(conn->conn) 5081 #else 5082 ngtcp2_conn_is_in_closing_period(conn->conn) 5083 #endif 5084 ) 5085 return 1; 5086 if( 5087 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5088 ngtcp2_conn_in_draining_period(conn->conn) 5089 #else 5090 ngtcp2_conn_is_in_draining_period(conn->conn) 5091 #endif 5092 ) { 5093 doq_conn_write_disable(conn); 5094 return 1; 5095 } 5096 ngtcp2_path_storage_zero(&ps); 5097 sldns_buffer_clear(c->doq_socket->pkt_buf); 5098 /* the call to ngtcp2_conn_write_connection_close causes the 5099 * conn to be closed. It is now in the closing period. */ 5100 ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path, 5101 &pi, sldns_buffer_begin(c->doq_socket->pkt_buf), 5102 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5103 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5104 &conn->ccerr 5105 #else 5106 &conn->last_error 5107 #endif 5108 , doq_get_timestamp_nanosec()); 5109 if(ret < 0) { 5110 log_err("doq ngtcp2_conn_write_connection_close failed: %s", 5111 ngtcp2_strerror(ret)); 5112 return 0; 5113 } 5114 if(ret == 0) { 5115 return 0; 5116 } 5117 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5118 sldns_buffer_flip(c->doq_socket->pkt_buf); 5119 5120 /* The close packet is allocated, because it may have to be repeated. 5121 * When incoming packets have this connection dcid. */ 5122 conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf), 5123 sldns_buffer_limit(c->doq_socket->pkt_buf)); 5124 if(!conn->close_pkt) { 5125 log_err("doq: could not allocate close packet: out of memory"); 5126 return 0; 5127 } 5128 conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf); 5129 conn->close_ecn = pi.ecn; 5130 return 1; 5131 } 5132 5133 /** doq send the close packet for the connection, perhaps again. */ 5134 int 5135 doq_conn_send_close(struct comm_point* c, struct doq_conn* conn) 5136 { 5137 if(!conn) 5138 return 0; 5139 if(!conn->close_pkt) 5140 return 0; 5141 if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf)) 5142 return 0; 5143 sldns_buffer_clear(c->doq_socket->pkt_buf); 5144 sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len); 5145 sldns_buffer_flip(c->doq_socket->pkt_buf); 5146 verbose(VERB_ALGO, "doq send connection close"); 5147 doq_send_pkt(c, &conn->key.paddr, conn->close_ecn); 5148 doq_conn_write_disable(conn); 5149 return 1; 5150 } 5151 5152 /** doq close the connection on error. If it returns a failure, it 5153 * does not wait to send a close, and the connection can be dropped. */ 5154 static int 5155 doq_conn_close_error(struct comm_point* c, struct doq_conn* conn) 5156 { 5157 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5158 if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE) 5159 return 0; 5160 #else 5161 if(conn->last_error.type == 5162 NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE) 5163 return 0; 5164 #endif 5165 if(!doq_conn_start_closing_period(c, conn)) 5166 return 0; 5167 if( 5168 #ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 5169 ngtcp2_conn_in_draining_period(conn->conn) 5170 #else 5171 ngtcp2_conn_is_in_draining_period(conn->conn) 5172 #endif 5173 ) { 5174 doq_conn_write_disable(conn); 5175 return 1; 5176 } 5177 doq_conn_write_enable(conn); 5178 if(!doq_conn_send_close(c, conn)) 5179 return 0; 5180 return 1; 5181 } 5182 5183 int 5184 doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, 5185 struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, 5186 int* err_drop) 5187 { 5188 int ret; 5189 ngtcp2_tstamp ts; 5190 struct ngtcp2_path path; 5191 memset(&path, 0, sizeof(path)); 5192 path.remote.addr = (struct sockaddr*)&paddr->addr; 5193 path.remote.addrlen = paddr->addrlen; 5194 path.local.addr = (struct sockaddr*)&paddr->localaddr; 5195 path.local.addrlen = paddr->localaddrlen; 5196 ts = doq_get_timestamp_nanosec(); 5197 5198 ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi, 5199 sldns_buffer_begin(c->doq_socket->pkt_buf), 5200 sldns_buffer_limit(c->doq_socket->pkt_buf), ts); 5201 if(ret != 0) { 5202 if(err_retry) 5203 *err_retry = 0; 5204 if(err_drop) 5205 *err_drop = 0; 5206 if(ret == NGTCP2_ERR_DRAINING) { 5207 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5208 ngtcp2_strerror(ret)); 5209 doq_conn_write_disable(conn); 5210 return 0; 5211 } else if(ret == NGTCP2_ERR_DROP_CONN) { 5212 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5213 ngtcp2_strerror(ret)); 5214 if(err_drop) 5215 *err_drop = 1; 5216 return 0; 5217 } else if(ret == NGTCP2_ERR_RETRY) { 5218 verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", 5219 ngtcp2_strerror(ret)); 5220 if(err_retry) 5221 *err_retry = 1; 5222 if(err_drop) 5223 *err_drop = 1; 5224 return 0; 5225 } else if(ret == NGTCP2_ERR_CRYPTO) { 5226 if( 5227 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5228 !conn->ccerr.error_code 5229 #else 5230 !conn->last_error.error_code 5231 #endif 5232 ) { 5233 /* in picotls the tls alert may need to be 5234 * copied, but this is with openssl. And there 5235 * is conn->tls_alert. */ 5236 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5237 ngtcp2_ccerr_set_tls_alert(&conn->ccerr, 5238 conn->tls_alert, NULL, 0); 5239 #else 5240 ngtcp2_connection_close_error_set_transport_error_tls_alert( 5241 &conn->last_error, conn->tls_alert, 5242 NULL, 0); 5243 #endif 5244 } 5245 } else { 5246 if( 5247 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5248 !conn->ccerr.error_code 5249 #else 5250 !conn->last_error.error_code 5251 #endif 5252 ) { 5253 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5254 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, 5255 NULL, 0); 5256 #else 5257 ngtcp2_connection_close_error_set_transport_error_liberr( 5258 &conn->last_error, ret, NULL, 0); 5259 #endif 5260 } 5261 } 5262 log_err("ngtcp2_conn_read_pkt failed: %s", 5263 ngtcp2_strerror(ret)); 5264 if(!doq_conn_close_error(c, conn)) { 5265 if(err_drop) 5266 *err_drop = 1; 5267 } 5268 return 0; 5269 } 5270 doq_conn_write_enable(conn); 5271 return 1; 5272 } 5273 5274 /** doq stream write is done */ 5275 static void 5276 doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream) 5277 { 5278 /* Cannot deallocate, the buffer may be needed for resends. */ 5279 doq_stream_off_write_list(conn, stream); 5280 } 5281 5282 int 5283 doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, 5284 int* err_drop) 5285 { 5286 struct doq_stream* stream = conn->stream_write_first; 5287 ngtcp2_path_storage ps; 5288 ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); 5289 size_t num_packets = 0, max_packets = 65535; 5290 ngtcp2_path_storage_zero(&ps); 5291 5292 for(;;) { 5293 int64_t stream_id; 5294 uint32_t flags = 0; 5295 ngtcp2_pkt_info pi; 5296 ngtcp2_vec datav[2]; 5297 size_t datav_count = 0; 5298 ngtcp2_ssize ret, ndatalen = 0; 5299 int fin; 5300 5301 if(stream) { 5302 /* data to send */ 5303 verbose(VERB_ALGO, "doq: doq_conn write stream %d", 5304 (int)stream->stream_id); 5305 stream_id = stream->stream_id; 5306 fin = 1; 5307 if(stream->nwrite < 2) { 5308 datav[0].base = ((uint8_t*)&stream-> 5309 outlen_wire) + stream->nwrite; 5310 datav[0].len = 2 - stream->nwrite; 5311 datav[1].base = stream->out; 5312 datav[1].len = stream->outlen; 5313 datav_count = 2; 5314 } else { 5315 datav[0].base = stream->out + 5316 (stream->nwrite-2); 5317 datav[0].len = stream->outlen - 5318 (stream->nwrite-2); 5319 datav_count = 1; 5320 } 5321 } else { 5322 /* no data to send */ 5323 verbose(VERB_ALGO, "doq: doq_conn write stream -1"); 5324 stream_id = -1; 5325 fin = 0; 5326 datav[0].base = NULL; 5327 datav[0].len = 0; 5328 datav_count = 1; 5329 } 5330 5331 /* if more streams, set it to write more */ 5332 if(stream && stream->write_next) 5333 flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; 5334 if(fin) 5335 flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; 5336 5337 sldns_buffer_clear(c->doq_socket->pkt_buf); 5338 ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi, 5339 sldns_buffer_begin(c->doq_socket->pkt_buf), 5340 sldns_buffer_remaining(c->doq_socket->pkt_buf), 5341 &ndatalen, flags, stream_id, datav, datav_count, ts); 5342 if(ret < 0) { 5343 if(ret == NGTCP2_ERR_WRITE_MORE) { 5344 verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen); 5345 if(stream) { 5346 if(ndatalen >= 0) 5347 stream->nwrite += ndatalen; 5348 if(stream->nwrite >= stream->outlen+2) 5349 doq_stream_write_is_done( 5350 conn, stream); 5351 stream = stream->write_next; 5352 } 5353 continue; 5354 } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) { 5355 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED"); 5356 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5357 ngtcp2_ccerr_set_application_error( 5358 &conn->ccerr, -1, NULL, 0); 5359 #else 5360 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5361 #endif 5362 if(err_drop) 5363 *err_drop = 0; 5364 if(!doq_conn_close_error(c, conn)) { 5365 if(err_drop) 5366 *err_drop = 1; 5367 } 5368 return 0; 5369 } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) { 5370 verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR"); 5371 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5372 ngtcp2_ccerr_set_application_error( 5373 &conn->ccerr, -1, NULL, 0); 5374 #else 5375 ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); 5376 #endif 5377 if(err_drop) 5378 *err_drop = 0; 5379 if(!doq_conn_close_error(c, conn)) { 5380 if(err_drop) 5381 *err_drop = 1; 5382 } 5383 return 0; 5384 } 5385 5386 log_err("doq: ngtcp2_conn_writev_stream failed: %s", 5387 ngtcp2_strerror(ret)); 5388 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5389 ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0); 5390 #else 5391 ngtcp2_connection_close_error_set_transport_error_liberr( 5392 &conn->last_error, ret, NULL, 0); 5393 #endif 5394 if(err_drop) 5395 *err_drop = 0; 5396 if(!doq_conn_close_error(c, conn)) { 5397 if(err_drop) 5398 *err_drop = 1; 5399 } 5400 return 0; 5401 } 5402 verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d", 5403 (int)ret, (int)ndatalen); 5404 5405 if(ndatalen >= 0 && stream) { 5406 stream->nwrite += ndatalen; 5407 if(stream->nwrite >= stream->outlen+2) 5408 doq_stream_write_is_done(conn, stream); 5409 } 5410 if(ret == 0) { 5411 /* congestion limited */ 5412 doq_conn_write_disable(conn); 5413 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5414 return 1; 5415 } 5416 sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); 5417 sldns_buffer_flip(c->doq_socket->pkt_buf); 5418 doq_send_pkt(c, &conn->key.paddr, pi.ecn); 5419 5420 if(c->doq_socket->have_blocked_pkt) 5421 break; 5422 if(++num_packets == max_packets) 5423 break; 5424 if(stream) 5425 stream = stream->write_next; 5426 } 5427 ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); 5428 return 1; 5429 } 5430 5431 void 5432 doq_conn_write_enable(struct doq_conn* conn) 5433 { 5434 conn->write_interest = 1; 5435 } 5436 5437 void 5438 doq_conn_write_disable(struct doq_conn* conn) 5439 { 5440 conn->write_interest = 0; 5441 } 5442 5443 /** doq append the connection to the write list */ 5444 static void 5445 doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn) 5446 { 5447 if(conn->on_write_list) 5448 return; 5449 conn->write_prev = table->write_list_last; 5450 if(table->write_list_last) 5451 table->write_list_last->write_next = conn; 5452 else table->write_list_first = conn; 5453 conn->write_next = NULL; 5454 table->write_list_last = conn; 5455 conn->on_write_list = 1; 5456 } 5457 5458 void 5459 doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn) 5460 { 5461 if(!conn->on_write_list) 5462 return; 5463 if(conn->write_next) 5464 conn->write_next->write_prev = conn->write_prev; 5465 else table->write_list_last = conn->write_prev; 5466 if(conn->write_prev) 5467 conn->write_prev->write_next = conn->write_next; 5468 else table->write_list_first = conn->write_next; 5469 conn->write_prev = NULL; 5470 conn->write_next = NULL; 5471 conn->on_write_list = 0; 5472 } 5473 5474 void 5475 doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn) 5476 { 5477 if(conn->write_interest && conn->on_write_list) 5478 return; 5479 if(!conn->write_interest && !conn->on_write_list) 5480 return; 5481 if(conn->write_interest) 5482 doq_conn_write_list_append(table, conn); 5483 else doq_conn_write_list_remove(table, conn); 5484 } 5485 5486 struct doq_conn* 5487 doq_table_pop_first(struct doq_table* table) 5488 { 5489 struct doq_conn* conn = table->write_list_first; 5490 if(!conn) 5491 return NULL; 5492 lock_basic_lock(&conn->lock); 5493 table->write_list_first = conn->write_next; 5494 if(conn->write_next) 5495 conn->write_next->write_prev = NULL; 5496 else table->write_list_last = NULL; 5497 conn->write_next = NULL; 5498 conn->write_prev = NULL; 5499 conn->on_write_list = 0; 5500 return conn; 5501 } 5502 5503 int 5504 doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv) 5505 { 5506 ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn); 5507 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5508 ngtcp2_tstamp t; 5509 5510 if(expiry <= now) { 5511 /* The timer has already expired, add with zero timeout. 5512 * This should call the callback straight away. Calling it 5513 * from the event callbacks is cleaner than calling it here, 5514 * because then it is always called with the same locks and 5515 * so on. This routine only has the conn.lock. */ 5516 t = now; 5517 } else { 5518 t = expiry; 5519 } 5520 5521 /* convert to timeval */ 5522 memset(tv, 0, sizeof(*tv)); 5523 tv->tv_sec = t / NGTCP2_SECONDS; 5524 tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; 5525 5526 /* If we already have a timer, is it the right value? */ 5527 if(conn->timer.timer_in_tree || conn->timer.timer_in_list) { 5528 if(conn->timer.time.tv_sec == tv->tv_sec && 5529 conn->timer.time.tv_usec == tv->tv_usec) 5530 return 0; 5531 } 5532 return 1; 5533 } 5534 5535 /* doq print connection log */ 5536 static void 5537 doq_conn_log_line(struct doq_conn* conn, char* s) 5538 { 5539 char remotestr[256], localstr[256]; 5540 addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen, 5541 remotestr, sizeof(remotestr)); 5542 addr_to_str((void*)&conn->key.paddr.localaddr, 5543 conn->key.paddr.localaddrlen, localstr, sizeof(localstr)); 5544 log_info("doq conn %s %s %s", remotestr, localstr, s); 5545 } 5546 5547 int 5548 doq_conn_handle_timeout(struct doq_conn* conn) 5549 { 5550 ngtcp2_tstamp now = doq_get_timestamp_nanosec(); 5551 int rv; 5552 5553 if(verbosity >= VERB_ALGO) 5554 doq_conn_log_line(conn, "timeout"); 5555 5556 rv = ngtcp2_conn_handle_expiry(conn->conn, now); 5557 if(rv != 0) { 5558 verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s", 5559 ngtcp2_strerror(rv)); 5560 #ifdef HAVE_NGTCP2_CCERR_DEFAULT 5561 ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0); 5562 #else 5563 ngtcp2_connection_close_error_set_transport_error_liberr( 5564 &conn->last_error, rv, NULL, 0); 5565 #endif 5566 if(!doq_conn_close_error(conn->doq_socket->cp, conn)) { 5567 /* failed, return for deletion */ 5568 return 0; 5569 } 5570 return 1; 5571 } 5572 doq_conn_write_enable(conn); 5573 if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) { 5574 /* failed, return for deletion. */ 5575 return 0; 5576 } 5577 return 1; 5578 } 5579 5580 void 5581 doq_table_quic_size_add(struct doq_table* table, size_t add) 5582 { 5583 lock_basic_lock(&table->size_lock); 5584 table->current_size += add; 5585 lock_basic_unlock(&table->size_lock); 5586 } 5587 5588 void 5589 doq_table_quic_size_subtract(struct doq_table* table, size_t subtract) 5590 { 5591 lock_basic_lock(&table->size_lock); 5592 if(table->current_size < subtract) 5593 table->current_size = 0; 5594 else table->current_size -= subtract; 5595 lock_basic_unlock(&table->size_lock); 5596 } 5597 5598 int 5599 doq_table_quic_size_available(struct doq_table* table, 5600 struct config_file* cfg, size_t mem) 5601 { 5602 size_t cur; 5603 lock_basic_lock(&table->size_lock); 5604 cur = table->current_size; 5605 lock_basic_unlock(&table->size_lock); 5606 5607 if(cur + mem > cfg->quic_size) 5608 return 0; 5609 return 1; 5610 } 5611 5612 size_t doq_table_quic_size_get(struct doq_table* table) 5613 { 5614 size_t sz; 5615 if(!table) 5616 return 0; 5617 lock_basic_lock(&table->size_lock); 5618 sz = table->current_size; 5619 lock_basic_unlock(&table->size_lock); 5620 return sz; 5621 } 5622 #endif /* HAVE_NGTCP2 */ 5623