1 /*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include <ctype.h>
51 #include "services/listen_dnsport.h"
52 #include "services/outside_network.h"
53 #include "util/netevent.h"
54 #include "util/log.h"
55 #include "util/config_file.h"
56 #include "util/net_help.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/parseutil.h"
59 #include "services/mesh.h"
60 #include "util/fptr_wlist.h"
61 #include "util/locks.h"
62
63 #ifdef HAVE_NETDB_H
64 #include <netdb.h>
65 #endif
66 #include <fcntl.h>
67
68 #ifdef HAVE_SYS_UN_H
69 #include <sys/un.h>
70 #endif
71
72 #ifdef HAVE_SYSTEMD
73 #include <systemd/sd-daemon.h>
74 #endif
75
76 #ifdef HAVE_IFADDRS_H
77 #include <ifaddrs.h>
78 #endif
79 #ifdef HAVE_NET_IF_H
80 #include <net/if.h>
81 #endif
82 #ifdef HAVE_LINUX_NET_TSTAMP_H
83 #include <linux/net_tstamp.h>
84 #endif
85 /** number of queued TCP connections for listen() */
86 #define TCP_BACKLOG 256
87
88 #ifndef THREADS_DISABLED
89 /** lock on the counter of stream buffer memory */
90 static lock_basic_type stream_wait_count_lock;
91 /** lock on the counter of HTTP2 query buffer memory */
92 static lock_basic_type http2_query_buffer_count_lock;
93 /** lock on the counter of HTTP2 response buffer memory */
94 static lock_basic_type http2_response_buffer_count_lock;
95 #endif
96 /** size (in bytes) of stream wait buffers */
97 static size_t stream_wait_count = 0;
98 /** is the lock initialised for stream wait buffers */
99 static int stream_wait_lock_inited = 0;
100 /** size (in bytes) of HTTP2 query buffers */
101 static size_t http2_query_buffer_count = 0;
102 /** is the lock initialised for HTTP2 query buffers */
103 static int http2_query_buffer_lock_inited = 0;
104 /** size (in bytes) of HTTP2 response buffers */
105 static size_t http2_response_buffer_count = 0;
106 /** is the lock initialised for HTTP2 response buffers */
107 static int http2_response_buffer_lock_inited = 0;
108
109 /**
110 * Debug print of the getaddrinfo returned address.
111 * @param addr: the address returned.
112 */
113 static void
verbose_print_addr(struct addrinfo * addr)114 verbose_print_addr(struct addrinfo *addr)
115 {
116 if(verbosity >= VERB_ALGO) {
117 char buf[100];
118 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
119 #ifdef INET6
120 if(addr->ai_family == AF_INET6)
121 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
122 sin6_addr;
123 #endif /* INET6 */
124 if(inet_ntop(addr->ai_family, sinaddr, buf,
125 (socklen_t)sizeof(buf)) == 0) {
126 (void)strlcpy(buf, "(null)", sizeof(buf));
127 }
128 buf[sizeof(buf)-1] = 0;
129 verbose(VERB_ALGO, "creating %s%s socket %s %d",
130 addr->ai_socktype==SOCK_DGRAM?"udp":
131 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
132 addr->ai_family==AF_INET?"4":
133 addr->ai_family==AF_INET6?"6":
134 "_otherfam", buf,
135 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
136 }
137 }
138
139 void
verbose_print_unbound_socket(struct unbound_socket * ub_sock)140 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
141 {
142 if(verbosity >= VERB_ALGO) {
143 log_info("listing of unbound_socket structure:");
144 verbose_print_addr(ub_sock->addr);
145 log_info("s is: %d, fam is: %s, acl: %s", ub_sock->s,
146 ub_sock->fam == AF_INET?"AF_INET":"AF_INET6",
147 ub_sock->acl?"yes":"no");
148 }
149 }
150
151 #ifdef HAVE_SYSTEMD
152 static int
systemd_get_activated(int family,int socktype,int listen,struct sockaddr * addr,socklen_t addrlen,const char * path)153 systemd_get_activated(int family, int socktype, int listen,
154 struct sockaddr *addr, socklen_t addrlen,
155 const char *path)
156 {
157 int i = 0;
158 int r = 0;
159 int s = -1;
160 const char* listen_pid, *listen_fds;
161
162 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
163
164 if((r = sd_booted()) < 1) {
165 if(r == 0)
166 log_warn("systemd is not running");
167 else
168 log_err("systemd sd_booted(): %s", strerror(-r));
169 return -1;
170 }
171
172 listen_pid = getenv("LISTEN_PID");
173 listen_fds = getenv("LISTEN_FDS");
174
175 if (!listen_pid) {
176 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
177 return -1;
178 }
179
180 if (!listen_fds) {
181 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
182 return -1;
183 }
184
185 if((r = sd_listen_fds(0)) < 1) {
186 if(r == 0)
187 log_warn("systemd: did not return socket, check unit configuration");
188 else
189 log_err("systemd sd_listen_fds(): %s", strerror(-r));
190 return -1;
191 }
192
193 for(i = 0; i < r; i++) {
194 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
195 s = SD_LISTEN_FDS_START + i;
196 break;
197 }
198 }
199 if (s == -1) {
200 if (addr)
201 log_err_addr("systemd sd_listen_fds()",
202 "no such socket",
203 (struct sockaddr_storage *)addr, addrlen);
204 else
205 log_err("systemd sd_listen_fds(): %s", path);
206 }
207 return s;
208 }
209 #endif
210
211 int
create_udp_sock(int family,int socktype,struct sockaddr * addr,socklen_t addrlen,int v6only,int * inuse,int * noproto,int rcv,int snd,int listen,int * reuseport,int transparent,int freebind,int use_systemd,int dscp)212 create_udp_sock(int family, int socktype, struct sockaddr* addr,
213 socklen_t addrlen, int v6only, int* inuse, int* noproto,
214 int rcv, int snd, int listen, int* reuseport, int transparent,
215 int freebind, int use_systemd, int dscp)
216 {
217 int s;
218 char* err;
219 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
220 int on=1;
221 #endif
222 #ifdef IPV6_MTU
223 int mtu = IPV6_MIN_MTU;
224 #endif
225 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
226 (void)rcv;
227 #endif
228 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
229 (void)snd;
230 #endif
231 #ifndef IPV6_V6ONLY
232 (void)v6only;
233 #endif
234 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
235 (void)transparent;
236 #endif
237 #if !defined(IP_FREEBIND)
238 (void)freebind;
239 #endif
240 #ifdef HAVE_SYSTEMD
241 int got_fd_from_systemd = 0;
242
243 if (!use_systemd
244 || (use_systemd
245 && (s = systemd_get_activated(family, socktype, -1, addr,
246 addrlen, NULL)) == -1)) {
247 #else
248 (void)use_systemd;
249 #endif
250 if((s = socket(family, socktype, 0)) == -1) {
251 *inuse = 0;
252 #ifndef USE_WINSOCK
253 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
254 *noproto = 1;
255 return -1;
256 }
257 #else
258 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
259 WSAGetLastError() == WSAEPROTONOSUPPORT) {
260 *noproto = 1;
261 return -1;
262 }
263 #endif
264 log_err("can't create socket: %s", sock_strerror(errno));
265 *noproto = 0;
266 return -1;
267 }
268 #ifdef HAVE_SYSTEMD
269 } else {
270 got_fd_from_systemd = 1;
271 }
272 #endif
273 if(listen) {
274 #ifdef SO_REUSEADDR
275 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
276 (socklen_t)sizeof(on)) < 0) {
277 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
278 sock_strerror(errno));
279 #ifndef USE_WINSOCK
280 if(errno != ENOSYS) {
281 close(s);
282 *noproto = 0;
283 *inuse = 0;
284 return -1;
285 }
286 #else
287 closesocket(s);
288 *noproto = 0;
289 *inuse = 0;
290 return -1;
291 #endif
292 }
293 #endif /* SO_REUSEADDR */
294 #ifdef SO_REUSEPORT
295 # ifdef SO_REUSEPORT_LB
296 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
297 * like SO_REUSEPORT on Linux. This is what the users want
298 * with the config option in unbound.conf; if we actually
299 * need local address and port reuse they'll also need to
300 * have SO_REUSEPORT set for them, assume it was _LB they want.
301 */
302 if (reuseport && *reuseport &&
303 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
304 (socklen_t)sizeof(on)) < 0) {
305 #ifdef ENOPROTOOPT
306 if(errno != ENOPROTOOPT || verbosity >= 3)
307 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
308 strerror(errno));
309 #endif
310 /* this option is not essential, we can continue */
311 *reuseport = 0;
312 }
313 # else /* no SO_REUSEPORT_LB */
314
315 /* try to set SO_REUSEPORT so that incoming
316 * queries are distributed evenly among the receiving threads.
317 * Each thread must have its own socket bound to the same port,
318 * with SO_REUSEPORT set on each socket.
319 */
320 if (reuseport && *reuseport &&
321 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
322 (socklen_t)sizeof(on)) < 0) {
323 #ifdef ENOPROTOOPT
324 if(errno != ENOPROTOOPT || verbosity >= 3)
325 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
326 strerror(errno));
327 #endif
328 /* this option is not essential, we can continue */
329 *reuseport = 0;
330 }
331 # endif /* SO_REUSEPORT_LB */
332 #else
333 (void)reuseport;
334 #endif /* defined(SO_REUSEPORT) */
335 #ifdef IP_TRANSPARENT
336 if (transparent &&
337 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
338 (socklen_t)sizeof(on)) < 0) {
339 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
340 strerror(errno));
341 }
342 #elif defined(IP_BINDANY)
343 if (transparent &&
344 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
345 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
346 (void*)&on, (socklen_t)sizeof(on)) < 0) {
347 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
348 (family==AF_INET6?"V6":""), strerror(errno));
349 }
350 #elif defined(SO_BINDANY)
351 if (transparent &&
352 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
353 (socklen_t)sizeof(on)) < 0) {
354 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
355 strerror(errno));
356 }
357 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
358 }
359 #ifdef IP_FREEBIND
360 if(freebind &&
361 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
362 (socklen_t)sizeof(on)) < 0) {
363 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
364 strerror(errno));
365 }
366 #endif /* IP_FREEBIND */
367 if(rcv) {
368 #ifdef SO_RCVBUF
369 int got;
370 socklen_t slen = (socklen_t)sizeof(got);
371 # ifdef SO_RCVBUFFORCE
372 /* Linux specific: try to use root permission to override
373 * system limits on rcvbuf. The limit is stored in
374 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
375 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
376 (socklen_t)sizeof(rcv)) < 0) {
377 if(errno != EPERM) {
378 log_err("setsockopt(..., SO_RCVBUFFORCE, "
379 "...) failed: %s", sock_strerror(errno));
380 sock_close(s);
381 *noproto = 0;
382 *inuse = 0;
383 return -1;
384 }
385 # endif /* SO_RCVBUFFORCE */
386 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
387 (socklen_t)sizeof(rcv)) < 0) {
388 log_err("setsockopt(..., SO_RCVBUF, "
389 "...) failed: %s", sock_strerror(errno));
390 sock_close(s);
391 *noproto = 0;
392 *inuse = 0;
393 return -1;
394 }
395 /* check if we got the right thing or if system
396 * reduced to some system max. Warn if so */
397 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
398 &slen) >= 0 && got < rcv/2) {
399 log_warn("so-rcvbuf %u was not granted. "
400 "Got %u. To fix: start with "
401 "root permissions(linux) or sysctl "
402 "bigger net.core.rmem_max(linux) or "
403 "kern.ipc.maxsockbuf(bsd) values.",
404 (unsigned)rcv, (unsigned)got);
405 }
406 # ifdef SO_RCVBUFFORCE
407 }
408 # endif
409 #endif /* SO_RCVBUF */
410 }
411 /* first do RCVBUF as the receive buffer is more important */
412 if(snd) {
413 #ifdef SO_SNDBUF
414 int got;
415 socklen_t slen = (socklen_t)sizeof(got);
416 # ifdef SO_SNDBUFFORCE
417 /* Linux specific: try to use root permission to override
418 * system limits on sndbuf. The limit is stored in
419 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
420 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
421 (socklen_t)sizeof(snd)) < 0) {
422 if(errno != EPERM) {
423 log_err("setsockopt(..., SO_SNDBUFFORCE, "
424 "...) failed: %s", sock_strerror(errno));
425 sock_close(s);
426 *noproto = 0;
427 *inuse = 0;
428 return -1;
429 }
430 # endif /* SO_SNDBUFFORCE */
431 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
432 (socklen_t)sizeof(snd)) < 0) {
433 log_err("setsockopt(..., SO_SNDBUF, "
434 "...) failed: %s", sock_strerror(errno));
435 sock_close(s);
436 *noproto = 0;
437 *inuse = 0;
438 return -1;
439 }
440 /* check if we got the right thing or if system
441 * reduced to some system max. Warn if so */
442 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
443 &slen) >= 0 && got < snd/2) {
444 log_warn("so-sndbuf %u was not granted. "
445 "Got %u. To fix: start with "
446 "root permissions(linux) or sysctl "
447 "bigger net.core.wmem_max(linux) or "
448 "kern.ipc.maxsockbuf(bsd) values.",
449 (unsigned)snd, (unsigned)got);
450 }
451 # ifdef SO_SNDBUFFORCE
452 }
453 # endif
454 #endif /* SO_SNDBUF */
455 }
456 err = set_ip_dscp(s, family, dscp);
457 if(err != NULL)
458 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
459 if(family == AF_INET6) {
460 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
461 int omit6_set = 0;
462 int action;
463 # endif
464 # if defined(IPV6_V6ONLY)
465 if(v6only
466 # ifdef HAVE_SYSTEMD
467 /* Systemd wants to control if the socket is v6 only
468 * or both, with BindIPv6Only=default, ipv6-only or
469 * both in systemd.socket, so it is not set here. */
470 && !got_fd_from_systemd
471 # endif
472 ) {
473 int val=(v6only==2)?0:1;
474 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
475 (void*)&val, (socklen_t)sizeof(val)) < 0) {
476 log_err("setsockopt(..., IPV6_V6ONLY"
477 ", ...) failed: %s", sock_strerror(errno));
478 sock_close(s);
479 *noproto = 0;
480 *inuse = 0;
481 return -1;
482 }
483 }
484 # endif
485 # if defined(IPV6_USE_MIN_MTU)
486 /*
487 * There is no fragmentation of IPv6 datagrams
488 * during forwarding in the network. Therefore
489 * we do not send UDP datagrams larger than
490 * the minimum IPv6 MTU of 1280 octets. The
491 * EDNS0 message length can be larger if the
492 * network stack supports IPV6_USE_MIN_MTU.
493 */
494 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
495 (void*)&on, (socklen_t)sizeof(on)) < 0) {
496 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
497 "...) failed: %s", sock_strerror(errno));
498 sock_close(s);
499 *noproto = 0;
500 *inuse = 0;
501 return -1;
502 }
503 # elif defined(IPV6_MTU)
504 # ifndef USE_WINSOCK
505 /*
506 * On Linux, to send no larger than 1280, the PMTUD is
507 * disabled by default for datagrams anyway, so we set
508 * the MTU to use.
509 */
510 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
511 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
512 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
513 sock_strerror(errno));
514 sock_close(s);
515 *noproto = 0;
516 *inuse = 0;
517 return -1;
518 }
519 # elif defined(IPV6_USER_MTU)
520 /* As later versions of the mingw crosscompiler define
521 * IPV6_MTU, do the same for windows but use IPV6_USER_MTU
522 * instead which is writable; IPV6_MTU is readonly there. */
523 if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU,
524 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
525 if (WSAGetLastError() != WSAENOPROTOOPT) {
526 log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s",
527 wsa_strerror(WSAGetLastError()));
528 sock_close(s);
529 *noproto = 0;
530 *inuse = 0;
531 return -1;
532 }
533 }
534 # endif /* USE_WINSOCK */
535 # endif /* IPv6 MTU */
536 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
537 # if defined(IP_PMTUDISC_OMIT)
538 action = IP_PMTUDISC_OMIT;
539 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
540 &action, (socklen_t)sizeof(action)) < 0) {
541
542 if (errno != EINVAL) {
543 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
544 strerror(errno));
545 sock_close(s);
546 *noproto = 0;
547 *inuse = 0;
548 return -1;
549 }
550 }
551 else
552 {
553 omit6_set = 1;
554 }
555 # endif
556 if (omit6_set == 0) {
557 action = IP_PMTUDISC_DONT;
558 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
559 &action, (socklen_t)sizeof(action)) < 0) {
560 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
561 strerror(errno));
562 sock_close(s);
563 *noproto = 0;
564 *inuse = 0;
565 return -1;
566 }
567 }
568 # endif /* IPV6_MTU_DISCOVER */
569 } else if(family == AF_INET) {
570 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
571 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
572 * PMTU information is not accepted, but fragmentation is allowed
573 * if and only if the packet size exceeds the outgoing interface MTU
574 * (and also uses the interface mtu to determine the size of the packets).
575 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
576 * FreeBSD already has same semantics without setting the option. */
577 int omit_set = 0;
578 int action;
579 # if defined(IP_PMTUDISC_OMIT)
580 action = IP_PMTUDISC_OMIT;
581 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
582 &action, (socklen_t)sizeof(action)) < 0) {
583
584 if (errno != EINVAL) {
585 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
586 strerror(errno));
587 sock_close(s);
588 *noproto = 0;
589 *inuse = 0;
590 return -1;
591 }
592 }
593 else
594 {
595 omit_set = 1;
596 }
597 # endif
598 if (omit_set == 0) {
599 action = IP_PMTUDISC_DONT;
600 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
601 &action, (socklen_t)sizeof(action)) < 0) {
602 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
603 strerror(errno));
604 sock_close(s);
605 *noproto = 0;
606 *inuse = 0;
607 return -1;
608 }
609 }
610 # elif defined(IP_DONTFRAG) && !defined(__APPLE__)
611 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
612 * but does not work on that version, so we exclude it */
613 int off = 0;
614 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
615 &off, (socklen_t)sizeof(off)) < 0) {
616 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
617 strerror(errno));
618 sock_close(s);
619 *noproto = 0;
620 *inuse = 0;
621 return -1;
622 }
623 # endif /* IPv4 MTU */
624 }
625 if(
626 #ifdef HAVE_SYSTEMD
627 !got_fd_from_systemd &&
628 #endif
629 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
630 *noproto = 0;
631 *inuse = 0;
632 #ifndef USE_WINSOCK
633 #ifdef EADDRINUSE
634 *inuse = (errno == EADDRINUSE);
635 /* detect freebsd jail with no ipv6 permission */
636 if(family==AF_INET6 && errno==EINVAL)
637 *noproto = 1;
638 else if(errno != EADDRINUSE &&
639 !(errno == EACCES && verbosity < 4 && !listen)
640 #ifdef EADDRNOTAVAIL
641 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
642 #endif
643 ) {
644 log_err_addr("can't bind socket", strerror(errno),
645 (struct sockaddr_storage*)addr, addrlen);
646 }
647 #endif /* EADDRINUSE */
648 #else /* USE_WINSOCK */
649 if(WSAGetLastError() != WSAEADDRINUSE &&
650 WSAGetLastError() != WSAEADDRNOTAVAIL &&
651 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
652 log_err_addr("can't bind socket",
653 wsa_strerror(WSAGetLastError()),
654 (struct sockaddr_storage*)addr, addrlen);
655 }
656 #endif /* USE_WINSOCK */
657 sock_close(s);
658 return -1;
659 }
660 if(!fd_set_nonblock(s)) {
661 *noproto = 0;
662 *inuse = 0;
663 sock_close(s);
664 return -1;
665 }
666 return s;
667 }
668
669 int
create_tcp_accept_sock(struct addrinfo * addr,int v6only,int * noproto,int * reuseport,int transparent,int mss,int nodelay,int freebind,int use_systemd,int dscp)670 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
671 int* reuseport, int transparent, int mss, int nodelay, int freebind,
672 int use_systemd, int dscp)
673 {
674 int s;
675 char* err;
676 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
677 int on = 1;
678 #endif
679 #ifdef HAVE_SYSTEMD
680 int got_fd_from_systemd = 0;
681 #endif
682 #ifdef USE_TCP_FASTOPEN
683 int qlen;
684 #endif
685 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
686 (void)transparent;
687 #endif
688 #if !defined(IP_FREEBIND)
689 (void)freebind;
690 #endif
691 verbose_print_addr(addr);
692 *noproto = 0;
693 #ifdef HAVE_SYSTEMD
694 if (!use_systemd ||
695 (use_systemd
696 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
697 addr->ai_addr, addr->ai_addrlen,
698 NULL)) == -1)) {
699 #else
700 (void)use_systemd;
701 #endif
702 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
703 #ifndef USE_WINSOCK
704 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
705 *noproto = 1;
706 return -1;
707 }
708 #else
709 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
710 WSAGetLastError() == WSAEPROTONOSUPPORT) {
711 *noproto = 1;
712 return -1;
713 }
714 #endif
715 log_err("can't create socket: %s", sock_strerror(errno));
716 return -1;
717 }
718 if(nodelay) {
719 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
720 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
721 (socklen_t)sizeof(on)) < 0) {
722 #ifndef USE_WINSOCK
723 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
724 strerror(errno));
725 #else
726 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
727 wsa_strerror(WSAGetLastError()));
728 #endif
729 }
730 #else
731 log_warn(" setsockopt(TCP_NODELAY) unsupported");
732 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
733 }
734 if (mss > 0) {
735 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
736 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
737 (socklen_t)sizeof(mss)) < 0) {
738 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
739 sock_strerror(errno));
740 } else {
741 verbose(VERB_ALGO,
742 " tcp socket mss set to %d", mss);
743 }
744 #else
745 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
746 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
747 }
748 #ifdef HAVE_SYSTEMD
749 } else {
750 got_fd_from_systemd = 1;
751 }
752 #endif
753 #ifdef SO_REUSEADDR
754 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
755 (socklen_t)sizeof(on)) < 0) {
756 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
757 sock_strerror(errno));
758 sock_close(s);
759 return -1;
760 }
761 #endif /* SO_REUSEADDR */
762 #ifdef IP_FREEBIND
763 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
764 (socklen_t)sizeof(on)) < 0) {
765 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
766 strerror(errno));
767 }
768 #endif /* IP_FREEBIND */
769 #ifdef SO_REUSEPORT
770 /* try to set SO_REUSEPORT so that incoming
771 * connections are distributed evenly among the receiving threads.
772 * Each thread must have its own socket bound to the same port,
773 * with SO_REUSEPORT set on each socket.
774 */
775 if (reuseport && *reuseport &&
776 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
777 (socklen_t)sizeof(on)) < 0) {
778 #ifdef ENOPROTOOPT
779 if(errno != ENOPROTOOPT || verbosity >= 3)
780 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
781 strerror(errno));
782 #endif
783 /* this option is not essential, we can continue */
784 *reuseport = 0;
785 }
786 #else
787 (void)reuseport;
788 #endif /* defined(SO_REUSEPORT) */
789 #if defined(IPV6_V6ONLY)
790 if(addr->ai_family == AF_INET6 && v6only
791 # ifdef HAVE_SYSTEMD
792 /* Systemd wants to control if the socket is v6 only
793 * or both, with BindIPv6Only=default, ipv6-only or
794 * both in systemd.socket, so it is not set here. */
795 && !got_fd_from_systemd
796 # endif
797 ) {
798 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
799 (void*)&on, (socklen_t)sizeof(on)) < 0) {
800 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
801 sock_strerror(errno));
802 sock_close(s);
803 return -1;
804 }
805 }
806 #else
807 (void)v6only;
808 #endif /* IPV6_V6ONLY */
809 #ifdef IP_TRANSPARENT
810 if (transparent &&
811 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
812 (socklen_t)sizeof(on)) < 0) {
813 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
814 strerror(errno));
815 }
816 #elif defined(IP_BINDANY)
817 if (transparent &&
818 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
819 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
820 (void*)&on, (socklen_t)sizeof(on)) < 0) {
821 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
822 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
823 }
824 #elif defined(SO_BINDANY)
825 if (transparent &&
826 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
827 sizeof(on)) < 0) {
828 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
829 strerror(errno));
830 }
831 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
832 err = set_ip_dscp(s, addr->ai_family, dscp);
833 if(err != NULL)
834 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
835 if(
836 #ifdef HAVE_SYSTEMD
837 !got_fd_from_systemd &&
838 #endif
839 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
840 #ifndef USE_WINSOCK
841 /* detect freebsd jail with no ipv6 permission */
842 if(addr->ai_family==AF_INET6 && errno==EINVAL)
843 *noproto = 1;
844 else {
845 log_err_addr("can't bind socket", strerror(errno),
846 (struct sockaddr_storage*)addr->ai_addr,
847 addr->ai_addrlen);
848 }
849 #else
850 log_err_addr("can't bind socket",
851 wsa_strerror(WSAGetLastError()),
852 (struct sockaddr_storage*)addr->ai_addr,
853 addr->ai_addrlen);
854 #endif
855 sock_close(s);
856 return -1;
857 }
858 if(!fd_set_nonblock(s)) {
859 sock_close(s);
860 return -1;
861 }
862 if(listen(s, TCP_BACKLOG) == -1) {
863 log_err("can't listen: %s", sock_strerror(errno));
864 sock_close(s);
865 return -1;
866 }
867 #ifdef USE_TCP_FASTOPEN
868 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
869 against IP spoofing attacks as suggested in RFC7413 */
870 #ifdef __APPLE__
871 /* OS X implementation only supports qlen of 1 via this call. Actual
872 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
873 qlen = 1;
874 #else
875 /* 5 is recommended on linux */
876 qlen = 5;
877 #endif
878 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
879 sizeof(qlen))) == -1 ) {
880 #ifdef ENOPROTOOPT
881 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
882 disabled, except when verbosity enabled for debugging */
883 if(errno != ENOPROTOOPT || verbosity >= 3) {
884 #endif
885 if(errno == EPERM) {
886 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
887 } else {
888 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
889 }
890 #ifdef ENOPROTOOPT
891 }
892 #endif
893 }
894 #endif
895 return s;
896 }
897
898 char*
set_ip_dscp(int socket,int addrfamily,int dscp)899 set_ip_dscp(int socket, int addrfamily, int dscp)
900 {
901 int ds;
902
903 if(dscp == 0)
904 return NULL;
905 ds = dscp << 2;
906 switch(addrfamily) {
907 case AF_INET6:
908 #ifdef IPV6_TCLASS
909 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds,
910 sizeof(ds)) < 0)
911 return sock_strerror(errno);
912 break;
913 #else
914 return "IPV6_TCLASS not defined on this system";
915 #endif
916 default:
917 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
918 return sock_strerror(errno);
919 break;
920 }
921 return NULL;
922 }
923
924 int
create_local_accept_sock(const char * path,int * noproto,int use_systemd)925 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
926 {
927 #ifdef HAVE_SYSTEMD
928 int ret;
929
930 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
931 return ret;
932 else {
933 #endif
934 #ifdef HAVE_SYS_UN_H
935 int s;
936 struct sockaddr_un usock;
937 #ifndef HAVE_SYSTEMD
938 (void)use_systemd;
939 #endif
940
941 verbose(VERB_ALGO, "creating unix socket %s", path);
942 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
943 /* this member exists on BSDs, not Linux */
944 usock.sun_len = (unsigned)sizeof(usock);
945 #endif
946 usock.sun_family = AF_LOCAL;
947 /* length is 92-108, 104 on FreeBSD */
948 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
949
950 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
951 log_err("Cannot create local socket %s (%s)",
952 path, strerror(errno));
953 return -1;
954 }
955
956 if (unlink(path) && errno != ENOENT) {
957 /* The socket already exists and cannot be removed */
958 log_err("Cannot remove old local socket %s (%s)",
959 path, strerror(errno));
960 goto err;
961 }
962
963 if (bind(s, (struct sockaddr *)&usock,
964 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
965 log_err("Cannot bind local socket %s (%s)",
966 path, strerror(errno));
967 goto err;
968 }
969
970 if (!fd_set_nonblock(s)) {
971 log_err("Cannot set non-blocking mode");
972 goto err;
973 }
974
975 if (listen(s, TCP_BACKLOG) == -1) {
976 log_err("can't listen: %s", strerror(errno));
977 goto err;
978 }
979
980 (void)noproto; /*unused*/
981 return s;
982
983 err:
984 sock_close(s);
985 return -1;
986
987 #ifdef HAVE_SYSTEMD
988 }
989 #endif
990 #else
991 (void)use_systemd;
992 (void)path;
993 log_err("Local sockets are not supported");
994 *noproto = 1;
995 return -1;
996 #endif
997 }
998
999
1000 /**
1001 * Create socket from getaddrinfo results
1002 */
1003 static int
make_sock(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)1004 make_sock(int stype, const char* ifname, const char* port,
1005 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1006 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1007 int use_systemd, int dscp, struct unbound_socket* ub_sock)
1008 {
1009 struct addrinfo *res = NULL;
1010 int r, s, inuse, noproto;
1011 hints->ai_socktype = stype;
1012 *noip6 = 0;
1013 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
1014 #ifdef USE_WINSOCK
1015 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
1016 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
1017 return -1;
1018 }
1019 #endif
1020 log_err("node %s:%s getaddrinfo: %s %s",
1021 ifname?ifname:"default", port, gai_strerror(r),
1022 #ifdef EAI_SYSTEM
1023 (r==EAI_SYSTEM?(char*)strerror(errno):"")
1024 #else
1025 ""
1026 #endif
1027 );
1028 return -1;
1029 }
1030 if(stype == SOCK_DGRAM) {
1031 verbose_print_addr(res);
1032 s = create_udp_sock(res->ai_family, res->ai_socktype,
1033 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
1034 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1035 reuseport, transparent, freebind, use_systemd, dscp);
1036 if(s == -1 && inuse) {
1037 log_err("bind: address already in use");
1038 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1039 *noip6 = 1;
1040 }
1041 } else {
1042 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1043 transparent, tcp_mss, nodelay, freebind, use_systemd,
1044 dscp);
1045 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1046 *noip6 = 1;
1047 }
1048 }
1049
1050 ub_sock->addr = res;
1051 ub_sock->s = s;
1052 ub_sock->fam = hints->ai_family;
1053 ub_sock->acl = NULL;
1054
1055 return s;
1056 }
1057
1058 /** make socket and first see if ifname contains port override info */
1059 static int
make_sock_port(int stype,const char * ifname,const char * port,struct addrinfo * hints,int v6only,int * noip6,size_t rcv,size_t snd,int * reuseport,int transparent,int tcp_mss,int nodelay,int freebind,int use_systemd,int dscp,struct unbound_socket * ub_sock)1060 make_sock_port(int stype, const char* ifname, const char* port,
1061 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1062 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1063 int use_systemd, int dscp, struct unbound_socket* ub_sock)
1064 {
1065 char* s = strchr(ifname, '@');
1066 if(s) {
1067 /* override port with ifspec@port */
1068 char p[16];
1069 char newif[128];
1070 if((size_t)(s-ifname) >= sizeof(newif)) {
1071 log_err("ifname too long: %s", ifname);
1072 *noip6 = 0;
1073 return -1;
1074 }
1075 if(strlen(s+1) >= sizeof(p)) {
1076 log_err("portnumber too long: %s", ifname);
1077 *noip6 = 0;
1078 return -1;
1079 }
1080 (void)strlcpy(newif, ifname, sizeof(newif));
1081 newif[s-ifname] = 0;
1082 (void)strlcpy(p, s+1, sizeof(p));
1083 p[strlen(s+1)]=0;
1084 return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1085 snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1086 use_systemd, dscp, ub_sock);
1087 }
1088 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1089 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1090 dscp, ub_sock);
1091 }
1092
1093 /**
1094 * Add port to open ports list.
1095 * @param list: list head. changed.
1096 * @param s: fd.
1097 * @param ftype: if fd is UDP.
1098 * @param pp2_enabled: if PROXYv2 is enabled for this port.
1099 * @param ub_sock: socket with address.
1100 * @return false on failure. list in unchanged then.
1101 */
1102 static int
port_insert(struct listen_port ** list,int s,enum listen_type ftype,int pp2_enabled,struct unbound_socket * ub_sock)1103 port_insert(struct listen_port** list, int s, enum listen_type ftype,
1104 int pp2_enabled, struct unbound_socket* ub_sock)
1105 {
1106 struct listen_port* item = (struct listen_port*)malloc(
1107 sizeof(struct listen_port));
1108 if(!item)
1109 return 0;
1110 item->next = *list;
1111 item->fd = s;
1112 item->ftype = ftype;
1113 item->pp2_enabled = pp2_enabled;
1114 item->socket = ub_sock;
1115 *list = item;
1116 return 1;
1117 }
1118
1119 /** set fd to receive software timestamps */
1120 static int
set_recvtimestamp(int s)1121 set_recvtimestamp(int s)
1122 {
1123 #ifdef HAVE_LINUX_NET_TSTAMP_H
1124 int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE;
1125 if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) {
1126 log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s",
1127 strerror(errno));
1128 return 0;
1129 }
1130 return 1;
1131 #else
1132 log_err("packets timestamping is not supported on this platform");
1133 (void)s;
1134 return 0;
1135 #endif
1136 }
1137
1138 /** set fd to receive source address packet info */
1139 static int
set_recvpktinfo(int s,int family)1140 set_recvpktinfo(int s, int family)
1141 {
1142 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1143 int on = 1;
1144 #else
1145 (void)s;
1146 #endif
1147 if(family == AF_INET6) {
1148 # ifdef IPV6_RECVPKTINFO
1149 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1150 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1151 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1152 strerror(errno));
1153 return 0;
1154 }
1155 # elif defined(IPV6_PKTINFO)
1156 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1157 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1158 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1159 strerror(errno));
1160 return 0;
1161 }
1162 # else
1163 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1164 "disable interface-automatic or do-ip6 in config");
1165 return 0;
1166 # endif /* defined IPV6_RECVPKTINFO */
1167
1168 } else if(family == AF_INET) {
1169 # ifdef IP_PKTINFO
1170 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1171 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1172 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1173 strerror(errno));
1174 return 0;
1175 }
1176 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1177 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1178 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1179 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1180 strerror(errno));
1181 return 0;
1182 }
1183 # else
1184 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1185 "interface-automatic or do-ip4 in config");
1186 return 0;
1187 # endif /* IP_PKTINFO */
1188
1189 }
1190 return 1;
1191 }
1192
1193 /** see if interface is ssl, its port number == the ssl port number */
1194 static int
if_is_ssl(const char * ifname,const char * port,int ssl_port,struct config_strlist * tls_additional_port)1195 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1196 struct config_strlist* tls_additional_port)
1197 {
1198 struct config_strlist* s;
1199 char* p = strchr(ifname, '@');
1200 if(!p && atoi(port) == ssl_port)
1201 return 1;
1202 if(p && atoi(p+1) == ssl_port)
1203 return 1;
1204 for(s = tls_additional_port; s; s = s->next) {
1205 if(p && atoi(p+1) == atoi(s->str))
1206 return 1;
1207 if(!p && atoi(port) == atoi(s->str))
1208 return 1;
1209 }
1210 return 0;
1211 }
1212
1213 /**
1214 * Helper for ports_open. Creates one interface (or NULL for default).
1215 * @param ifname: The interface ip address.
1216 * @param do_auto: use automatic interface detection.
1217 * If enabled, then ifname must be the wildcard name.
1218 * @param do_udp: if udp should be used.
1219 * @param do_tcp: if tcp should be used.
1220 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1221 * @param port: Port number to use (as string).
1222 * @param list: list of open ports, appended to, changed to point to list head.
1223 * @param rcv: receive buffer size for UDP
1224 * @param snd: send buffer size for UDP
1225 * @param ssl_port: ssl service port number
1226 * @param tls_additional_port: list of additional ssl service port numbers.
1227 * @param https_port: DoH service port number
1228 * @param proxy_protocol_port: list of PROXYv2 port numbers.
1229 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1230 * set to false on exit if reuseport failed due to no kernel support.
1231 * @param transparent: set IP_TRANSPARENT socket option.
1232 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1233 * @param freebind: set IP_FREEBIND socket option.
1234 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1235 * @param use_systemd: if true, fetch sockets from systemd.
1236 * @param dnscrypt_port: dnscrypt service port number
1237 * @param dscp: DSCP to use.
1238 * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
1239 * wait to discard if UDP packets have waited for long in the socket
1240 * buffer.
1241 * @return: returns false on error.
1242 */
1243 static int
ports_create_if(const char * ifname,int do_auto,int do_udp,int do_tcp,struct addrinfo * hints,const char * port,struct listen_port ** list,size_t rcv,size_t snd,int ssl_port,struct config_strlist * tls_additional_port,int https_port,struct config_strlist * proxy_protocol_port,int * reuseport,int transparent,int tcp_mss,int freebind,int http2_nodelay,int use_systemd,int dnscrypt_port,int dscp,int sock_queue_timeout)1244 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1245 struct addrinfo *hints, const char* port, struct listen_port** list,
1246 size_t rcv, size_t snd, int ssl_port,
1247 struct config_strlist* tls_additional_port, int https_port,
1248 struct config_strlist* proxy_protocol_port,
1249 int* reuseport, int transparent, int tcp_mss, int freebind,
1250 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
1251 int sock_queue_timeout)
1252 {
1253 int s, noip6=0;
1254 int is_https = if_is_https(ifname, port, https_port);
1255 int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port);
1256 int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
1257 int nodelay = is_https && http2_nodelay;
1258 struct unbound_socket* ub_sock;
1259
1260 if(!do_udp && !do_tcp)
1261 return 0;
1262
1263 if(is_pp2) {
1264 if(is_dnscrypt) {
1265 fatal_exit("PROXYv2 and DNSCrypt combination not "
1266 "supported!");
1267 } else if(is_https) {
1268 fatal_exit("PROXYv2 and DoH combination not "
1269 "supported!");
1270 }
1271 }
1272
1273 if(do_auto) {
1274 ub_sock = calloc(1, sizeof(struct unbound_socket));
1275 if(!ub_sock)
1276 return 0;
1277 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1278 &noip6, rcv, snd, reuseport, transparent,
1279 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1280 if(ub_sock->addr)
1281 freeaddrinfo(ub_sock->addr);
1282 free(ub_sock);
1283 if(noip6) {
1284 log_warn("IPv6 protocol not available");
1285 return 1;
1286 }
1287 return 0;
1288 }
1289 /* getting source addr packet info is highly non-portable */
1290 if(!set_recvpktinfo(s, hints->ai_family)) {
1291 sock_close(s);
1292 if(ub_sock->addr)
1293 freeaddrinfo(ub_sock->addr);
1294 free(ub_sock);
1295 return 0;
1296 }
1297 if (sock_queue_timeout && !set_recvtimestamp(s)) {
1298 log_warn("socket timestamping is not available");
1299 }
1300 if(!port_insert(list, s, is_dnscrypt
1301 ?listen_type_udpancil_dnscrypt:listen_type_udpancil,
1302 is_pp2, ub_sock)) {
1303 sock_close(s);
1304 if(ub_sock->addr)
1305 freeaddrinfo(ub_sock->addr);
1306 free(ub_sock);
1307 return 0;
1308 }
1309 } else if(do_udp) {
1310 ub_sock = calloc(1, sizeof(struct unbound_socket));
1311 if(!ub_sock)
1312 return 0;
1313 /* regular udp socket */
1314 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1315 &noip6, rcv, snd, reuseport, transparent,
1316 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1317 if(ub_sock->addr)
1318 freeaddrinfo(ub_sock->addr);
1319 free(ub_sock);
1320 if(noip6) {
1321 log_warn("IPv6 protocol not available");
1322 return 1;
1323 }
1324 return 0;
1325 }
1326 if (sock_queue_timeout && !set_recvtimestamp(s)) {
1327 log_warn("socket timestamping is not available");
1328 }
1329 if(!port_insert(list, s, is_dnscrypt
1330 ?listen_type_udp_dnscrypt :
1331 (sock_queue_timeout ?
1332 listen_type_udpancil:listen_type_udp),
1333 is_pp2, ub_sock)) {
1334 sock_close(s);
1335 if(ub_sock->addr)
1336 freeaddrinfo(ub_sock->addr);
1337 free(ub_sock);
1338 return 0;
1339 }
1340 }
1341 if(do_tcp) {
1342 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1343 tls_additional_port);
1344 enum listen_type port_type;
1345 ub_sock = calloc(1, sizeof(struct unbound_socket));
1346 if(!ub_sock)
1347 return 0;
1348 if(is_ssl)
1349 port_type = listen_type_ssl;
1350 else if(is_https)
1351 port_type = listen_type_http;
1352 else if(is_dnscrypt)
1353 port_type = listen_type_tcp_dnscrypt;
1354 else
1355 port_type = listen_type_tcp;
1356 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1357 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1358 freebind, use_systemd, dscp, ub_sock)) == -1) {
1359 if(ub_sock->addr)
1360 freeaddrinfo(ub_sock->addr);
1361 free(ub_sock);
1362 if(noip6) {
1363 /*log_warn("IPv6 protocol not available");*/
1364 return 1;
1365 }
1366 return 0;
1367 }
1368 if(is_ssl)
1369 verbose(VERB_ALGO, "setup TCP for SSL service");
1370 if(!port_insert(list, s, port_type, is_pp2, ub_sock)) {
1371 sock_close(s);
1372 if(ub_sock->addr)
1373 freeaddrinfo(ub_sock->addr);
1374 free(ub_sock);
1375 return 0;
1376 }
1377 }
1378 return 1;
1379 }
1380
1381 /**
1382 * Add items to commpoint list in front.
1383 * @param c: commpoint to add.
1384 * @param front: listen struct.
1385 * @return: false on failure.
1386 */
1387 static int
listen_cp_insert(struct comm_point * c,struct listen_dnsport * front)1388 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1389 {
1390 struct listen_list* item = (struct listen_list*)malloc(
1391 sizeof(struct listen_list));
1392 if(!item)
1393 return 0;
1394 item->com = c;
1395 item->next = front->cps;
1396 front->cps = item;
1397 return 1;
1398 }
1399
listen_setup_locks(void)1400 void listen_setup_locks(void)
1401 {
1402 if(!stream_wait_lock_inited) {
1403 lock_basic_init(&stream_wait_count_lock);
1404 stream_wait_lock_inited = 1;
1405 }
1406 if(!http2_query_buffer_lock_inited) {
1407 lock_basic_init(&http2_query_buffer_count_lock);
1408 http2_query_buffer_lock_inited = 1;
1409 }
1410 if(!http2_response_buffer_lock_inited) {
1411 lock_basic_init(&http2_response_buffer_count_lock);
1412 http2_response_buffer_lock_inited = 1;
1413 }
1414 }
1415
listen_desetup_locks(void)1416 void listen_desetup_locks(void)
1417 {
1418 if(stream_wait_lock_inited) {
1419 stream_wait_lock_inited = 0;
1420 lock_basic_destroy(&stream_wait_count_lock);
1421 }
1422 if(http2_query_buffer_lock_inited) {
1423 http2_query_buffer_lock_inited = 0;
1424 lock_basic_destroy(&http2_query_buffer_count_lock);
1425 }
1426 if(http2_response_buffer_lock_inited) {
1427 http2_response_buffer_lock_inited = 0;
1428 lock_basic_destroy(&http2_response_buffer_count_lock);
1429 }
1430 }
1431
1432 struct listen_dnsport*
listen_create(struct comm_base * base,struct listen_port * ports,size_t bufsize,int tcp_accept_count,int tcp_idle_timeout,int harden_large_queries,uint32_t http_max_streams,char * http_endpoint,int http_notls,struct tcl_list * tcp_conn_limit,void * sslctx,struct dt_env * dtenv,comm_point_callback_type * cb,void * cb_arg)1433 listen_create(struct comm_base* base, struct listen_port* ports,
1434 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1435 int harden_large_queries, uint32_t http_max_streams,
1436 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1437 void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1438 void *cb_arg)
1439 {
1440 struct listen_dnsport* front = (struct listen_dnsport*)
1441 malloc(sizeof(struct listen_dnsport));
1442 if(!front)
1443 return NULL;
1444 front->cps = NULL;
1445 front->udp_buff = sldns_buffer_new(bufsize);
1446 #ifdef USE_DNSCRYPT
1447 front->dnscrypt_udp_buff = NULL;
1448 #endif
1449 if(!front->udp_buff) {
1450 free(front);
1451 return NULL;
1452 }
1453
1454 /* create comm points as needed */
1455 while(ports) {
1456 struct comm_point* cp = NULL;
1457 if(ports->ftype == listen_type_udp ||
1458 ports->ftype == listen_type_udp_dnscrypt) {
1459 cp = comm_point_create_udp(base, ports->fd,
1460 front->udp_buff, ports->pp2_enabled, cb,
1461 cb_arg, ports->socket);
1462 } else if(ports->ftype == listen_type_tcp ||
1463 ports->ftype == listen_type_tcp_dnscrypt) {
1464 cp = comm_point_create_tcp(base, ports->fd,
1465 tcp_accept_count, tcp_idle_timeout,
1466 harden_large_queries, 0, NULL,
1467 tcp_conn_limit, bufsize, front->udp_buff,
1468 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1469 ports->socket);
1470 } else if(ports->ftype == listen_type_ssl ||
1471 ports->ftype == listen_type_http) {
1472 cp = comm_point_create_tcp(base, ports->fd,
1473 tcp_accept_count, tcp_idle_timeout,
1474 harden_large_queries,
1475 http_max_streams, http_endpoint,
1476 tcp_conn_limit, bufsize, front->udp_buff,
1477 ports->ftype, ports->pp2_enabled, cb, cb_arg,
1478 ports->socket);
1479 if(ports->ftype == listen_type_http) {
1480 if(!sslctx && !http_notls) {
1481 log_warn("HTTPS port configured, but "
1482 "no TLS tls-service-key or "
1483 "tls-service-pem set");
1484 }
1485 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1486 if(!http_notls) {
1487 log_warn("Unbound is not compiled "
1488 "with an OpenSSL version "
1489 "supporting ALPN "
1490 "(OpenSSL >= 1.0.2). This "
1491 "is required to use "
1492 "DNS-over-HTTPS");
1493 }
1494 #endif
1495 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1496 log_warn("Unbound is not compiled with "
1497 "nghttp2. This is required to use "
1498 "DNS-over-HTTPS.");
1499 #endif
1500 }
1501 } else if(ports->ftype == listen_type_udpancil ||
1502 ports->ftype == listen_type_udpancil_dnscrypt) {
1503 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
1504 cp = comm_point_create_udp_ancil(base, ports->fd,
1505 front->udp_buff, ports->pp2_enabled, cb,
1506 cb_arg, ports->socket);
1507 #else
1508 log_warn("This system does not support UDP ancilliary data.");
1509 #endif
1510 }
1511 if(!cp) {
1512 log_err("can't create commpoint");
1513 listen_delete(front);
1514 return NULL;
1515 }
1516 if((http_notls && ports->ftype == listen_type_http) ||
1517 (ports->ftype == listen_type_tcp) ||
1518 (ports->ftype == listen_type_udp) ||
1519 (ports->ftype == listen_type_udpancil) ||
1520 (ports->ftype == listen_type_tcp_dnscrypt) ||
1521 (ports->ftype == listen_type_udp_dnscrypt) ||
1522 (ports->ftype == listen_type_udpancil_dnscrypt))
1523 cp->ssl = NULL;
1524 else
1525 cp->ssl = sslctx;
1526 cp->dtenv = dtenv;
1527 cp->do_not_close = 1;
1528 #ifdef USE_DNSCRYPT
1529 if (ports->ftype == listen_type_udp_dnscrypt ||
1530 ports->ftype == listen_type_tcp_dnscrypt ||
1531 ports->ftype == listen_type_udpancil_dnscrypt) {
1532 cp->dnscrypt = 1;
1533 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1534 if(!cp->dnscrypt_buffer) {
1535 log_err("can't alloc dnscrypt_buffer");
1536 comm_point_delete(cp);
1537 listen_delete(front);
1538 return NULL;
1539 }
1540 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1541 }
1542 #endif
1543 if(!listen_cp_insert(cp, front)) {
1544 log_err("malloc failed");
1545 comm_point_delete(cp);
1546 listen_delete(front);
1547 return NULL;
1548 }
1549 ports = ports->next;
1550 }
1551 if(!front->cps) {
1552 log_err("Could not open sockets to accept queries.");
1553 listen_delete(front);
1554 return NULL;
1555 }
1556
1557 return front;
1558 }
1559
1560 void
listen_list_delete(struct listen_list * list)1561 listen_list_delete(struct listen_list* list)
1562 {
1563 struct listen_list *p = list, *pn;
1564 while(p) {
1565 pn = p->next;
1566 comm_point_delete(p->com);
1567 free(p);
1568 p = pn;
1569 }
1570 }
1571
1572 void
listen_delete(struct listen_dnsport * front)1573 listen_delete(struct listen_dnsport* front)
1574 {
1575 if(!front)
1576 return;
1577 listen_list_delete(front->cps);
1578 #ifdef USE_DNSCRYPT
1579 if(front->dnscrypt_udp_buff &&
1580 front->udp_buff != front->dnscrypt_udp_buff) {
1581 sldns_buffer_free(front->dnscrypt_udp_buff);
1582 }
1583 #endif
1584 sldns_buffer_free(front->udp_buff);
1585 free(front);
1586 }
1587
1588 #ifdef HAVE_GETIFADDRS
1589 static int
resolve_ifa_name(struct ifaddrs * ifas,const char * search_ifa,char *** ip_addresses,int * ip_addresses_size)1590 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1591 {
1592 struct ifaddrs *ifa;
1593 void *tmpbuf;
1594 int last_ip_addresses_size = *ip_addresses_size;
1595
1596 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1597 sa_family_t family;
1598 const char* atsign;
1599 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */
1600 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1601 #else
1602 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1603 #endif
1604
1605 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1606 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1607 || strncmp(ifa->ifa_name, search_ifa,
1608 atsign-search_ifa) != 0)
1609 continue;
1610 } else {
1611 if(strcmp(ifa->ifa_name, search_ifa) != 0)
1612 continue;
1613 atsign = "";
1614 }
1615
1616 if(ifa->ifa_addr == NULL)
1617 continue;
1618
1619 family = ifa->ifa_addr->sa_family;
1620 if(family == AF_INET) {
1621 char a4[INET_ADDRSTRLEN + 1];
1622 struct sockaddr_in *in4 = (struct sockaddr_in *)
1623 ifa->ifa_addr;
1624 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1625 log_err("inet_ntop failed");
1626 return 0;
1627 }
1628 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1629 a4, atsign);
1630 }
1631 #ifdef INET6
1632 else if(family == AF_INET6) {
1633 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1634 ifa->ifa_addr;
1635 char a6[INET6_ADDRSTRLEN + 1];
1636 char if_index_name[IF_NAMESIZE + 1];
1637 if_index_name[0] = 0;
1638 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1639 log_err("inet_ntop failed");
1640 return 0;
1641 }
1642 (void)if_indextoname(in6->sin6_scope_id,
1643 (char *)if_index_name);
1644 if (strlen(if_index_name) != 0) {
1645 snprintf(addr_buf, sizeof(addr_buf),
1646 "%s%%%s%s", a6, if_index_name, atsign);
1647 } else {
1648 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1649 a6, atsign);
1650 }
1651 }
1652 #endif
1653 else {
1654 continue;
1655 }
1656 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1657
1658 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1659 if(!tmpbuf) {
1660 log_err("realloc failed: out of memory");
1661 return 0;
1662 } else {
1663 *ip_addresses = tmpbuf;
1664 }
1665 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1666 if(!(*ip_addresses)[*ip_addresses_size]) {
1667 log_err("strdup failed: out of memory");
1668 return 0;
1669 }
1670 (*ip_addresses_size)++;
1671 }
1672
1673 if (*ip_addresses_size == last_ip_addresses_size) {
1674 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1675 if(!tmpbuf) {
1676 log_err("realloc failed: out of memory");
1677 return 0;
1678 } else {
1679 *ip_addresses = tmpbuf;
1680 }
1681 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1682 if(!(*ip_addresses)[*ip_addresses_size]) {
1683 log_err("strdup failed: out of memory");
1684 return 0;
1685 }
1686 (*ip_addresses_size)++;
1687 }
1688 return 1;
1689 }
1690 #endif /* HAVE_GETIFADDRS */
1691
resolve_interface_names(char ** ifs,int num_ifs,struct config_strlist * list,char *** resif,int * num_resif)1692 int resolve_interface_names(char** ifs, int num_ifs,
1693 struct config_strlist* list, char*** resif, int* num_resif)
1694 {
1695 #ifdef HAVE_GETIFADDRS
1696 struct ifaddrs *addrs = NULL;
1697 if(num_ifs == 0 && list == NULL) {
1698 *resif = NULL;
1699 *num_resif = 0;
1700 return 1;
1701 }
1702 if(getifaddrs(&addrs) == -1) {
1703 log_err("failed to list interfaces: getifaddrs: %s",
1704 strerror(errno));
1705 freeifaddrs(addrs);
1706 return 0;
1707 }
1708 if(ifs) {
1709 int i;
1710 for(i=0; i<num_ifs; i++) {
1711 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1712 freeifaddrs(addrs);
1713 config_del_strarray(*resif, *num_resif);
1714 *resif = NULL;
1715 *num_resif = 0;
1716 return 0;
1717 }
1718 }
1719 }
1720 if(list) {
1721 struct config_strlist* p;
1722 for(p = list; p; p = p->next) {
1723 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1724 freeifaddrs(addrs);
1725 config_del_strarray(*resif, *num_resif);
1726 *resif = NULL;
1727 *num_resif = 0;
1728 return 0;
1729 }
1730 }
1731 }
1732 freeifaddrs(addrs);
1733 return 1;
1734 #else
1735 struct config_strlist* p;
1736 if(num_ifs == 0 && list == NULL) {
1737 *resif = NULL;
1738 *num_resif = 0;
1739 return 1;
1740 }
1741 *num_resif = num_ifs;
1742 for(p = list; p; p = p->next) {
1743 (*num_resif)++;
1744 }
1745 *resif = calloc(*num_resif, sizeof(**resif));
1746 if(!*resif) {
1747 log_err("out of memory");
1748 return 0;
1749 }
1750 if(ifs) {
1751 int i;
1752 for(i=0; i<num_ifs; i++) {
1753 (*resif)[i] = strdup(ifs[i]);
1754 if(!((*resif)[i])) {
1755 log_err("out of memory");
1756 config_del_strarray(*resif, *num_resif);
1757 *resif = NULL;
1758 *num_resif = 0;
1759 return 0;
1760 }
1761 }
1762 }
1763 if(list) {
1764 int idx = num_ifs;
1765 for(p = list; p; p = p->next) {
1766 (*resif)[idx] = strdup(p->str);
1767 if(!((*resif)[idx])) {
1768 log_err("out of memory");
1769 config_del_strarray(*resif, *num_resif);
1770 *resif = NULL;
1771 *num_resif = 0;
1772 return 0;
1773 }
1774 idx++;
1775 }
1776 }
1777 return 1;
1778 #endif /* HAVE_GETIFADDRS */
1779 }
1780
1781 struct listen_port*
listening_ports_open(struct config_file * cfg,char ** ifs,int num_ifs,int * reuseport)1782 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1783 int* reuseport)
1784 {
1785 struct listen_port* list = NULL;
1786 struct addrinfo hints;
1787 int i, do_ip4, do_ip6;
1788 int do_tcp, do_auto;
1789 char portbuf[32];
1790 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1791 do_ip4 = cfg->do_ip4;
1792 do_ip6 = cfg->do_ip6;
1793 do_tcp = cfg->do_tcp;
1794 do_auto = cfg->if_automatic && cfg->do_udp;
1795 if(cfg->incoming_num_tcp == 0)
1796 do_tcp = 0;
1797
1798 /* getaddrinfo */
1799 memset(&hints, 0, sizeof(hints));
1800 hints.ai_flags = AI_PASSIVE;
1801 /* no name lookups on our listening ports */
1802 if(num_ifs > 0)
1803 hints.ai_flags |= AI_NUMERICHOST;
1804 hints.ai_family = AF_UNSPEC;
1805 #ifndef INET6
1806 do_ip6 = 0;
1807 #endif
1808 if(!do_ip4 && !do_ip6) {
1809 return NULL;
1810 }
1811 /* create ip4 and ip6 ports so that return addresses are nice. */
1812 if(do_auto || num_ifs == 0) {
1813 if(do_auto && cfg->if_automatic_ports &&
1814 cfg->if_automatic_ports[0]!=0) {
1815 char* now = cfg->if_automatic_ports;
1816 while(now && *now) {
1817 char* after;
1818 int extraport;
1819 while(isspace((unsigned char)*now))
1820 now++;
1821 if(!*now)
1822 break;
1823 after = now;
1824 extraport = (int)strtol(now, &after, 10);
1825 if(extraport < 0 || extraport > 65535) {
1826 log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1827 listening_ports_free(list);
1828 return NULL;
1829 }
1830 if(extraport == 0 && now == after) {
1831 log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports);
1832 listening_ports_free(list);
1833 return NULL;
1834 }
1835 now = after;
1836 snprintf(portbuf, sizeof(portbuf), "%d", extraport);
1837 if(do_ip6) {
1838 hints.ai_family = AF_INET6;
1839 if(!ports_create_if("::0",
1840 do_auto, cfg->do_udp, do_tcp,
1841 &hints, portbuf, &list,
1842 cfg->so_rcvbuf, cfg->so_sndbuf,
1843 cfg->ssl_port, cfg->tls_additional_port,
1844 cfg->https_port,
1845 cfg->proxy_protocol_port,
1846 reuseport, cfg->ip_transparent,
1847 cfg->tcp_mss, cfg->ip_freebind,
1848 cfg->http_nodelay, cfg->use_systemd,
1849 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1850 listening_ports_free(list);
1851 return NULL;
1852 }
1853 }
1854 if(do_ip4) {
1855 hints.ai_family = AF_INET;
1856 if(!ports_create_if("0.0.0.0",
1857 do_auto, cfg->do_udp, do_tcp,
1858 &hints, portbuf, &list,
1859 cfg->so_rcvbuf, cfg->so_sndbuf,
1860 cfg->ssl_port, cfg->tls_additional_port,
1861 cfg->https_port,
1862 cfg->proxy_protocol_port,
1863 reuseport, cfg->ip_transparent,
1864 cfg->tcp_mss, cfg->ip_freebind,
1865 cfg->http_nodelay, cfg->use_systemd,
1866 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1867 listening_ports_free(list);
1868 return NULL;
1869 }
1870 }
1871 }
1872 return list;
1873 }
1874 if(do_ip6) {
1875 hints.ai_family = AF_INET6;
1876 if(!ports_create_if(do_auto?"::0":"::1",
1877 do_auto, cfg->do_udp, do_tcp,
1878 &hints, portbuf, &list,
1879 cfg->so_rcvbuf, cfg->so_sndbuf,
1880 cfg->ssl_port, cfg->tls_additional_port,
1881 cfg->https_port, cfg->proxy_protocol_port,
1882 reuseport, cfg->ip_transparent,
1883 cfg->tcp_mss, cfg->ip_freebind,
1884 cfg->http_nodelay, cfg->use_systemd,
1885 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1886 listening_ports_free(list);
1887 return NULL;
1888 }
1889 }
1890 if(do_ip4) {
1891 hints.ai_family = AF_INET;
1892 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1893 do_auto, cfg->do_udp, do_tcp,
1894 &hints, portbuf, &list,
1895 cfg->so_rcvbuf, cfg->so_sndbuf,
1896 cfg->ssl_port, cfg->tls_additional_port,
1897 cfg->https_port, cfg->proxy_protocol_port,
1898 reuseport, cfg->ip_transparent,
1899 cfg->tcp_mss, cfg->ip_freebind,
1900 cfg->http_nodelay, cfg->use_systemd,
1901 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1902 listening_ports_free(list);
1903 return NULL;
1904 }
1905 }
1906 } else for(i = 0; i<num_ifs; i++) {
1907 if(str_is_ip6(ifs[i])) {
1908 if(!do_ip6)
1909 continue;
1910 hints.ai_family = AF_INET6;
1911 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1912 do_tcp, &hints, portbuf, &list,
1913 cfg->so_rcvbuf, cfg->so_sndbuf,
1914 cfg->ssl_port, cfg->tls_additional_port,
1915 cfg->https_port, cfg->proxy_protocol_port,
1916 reuseport, cfg->ip_transparent,
1917 cfg->tcp_mss, cfg->ip_freebind,
1918 cfg->http_nodelay, cfg->use_systemd,
1919 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1920 listening_ports_free(list);
1921 return NULL;
1922 }
1923 } else {
1924 if(!do_ip4)
1925 continue;
1926 hints.ai_family = AF_INET;
1927 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1928 do_tcp, &hints, portbuf, &list,
1929 cfg->so_rcvbuf, cfg->so_sndbuf,
1930 cfg->ssl_port, cfg->tls_additional_port,
1931 cfg->https_port, cfg->proxy_protocol_port,
1932 reuseport, cfg->ip_transparent,
1933 cfg->tcp_mss, cfg->ip_freebind,
1934 cfg->http_nodelay, cfg->use_systemd,
1935 cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
1936 listening_ports_free(list);
1937 return NULL;
1938 }
1939 }
1940 }
1941
1942 return list;
1943 }
1944
listening_ports_free(struct listen_port * list)1945 void listening_ports_free(struct listen_port* list)
1946 {
1947 struct listen_port* nx;
1948 while(list) {
1949 nx = list->next;
1950 if(list->fd != -1) {
1951 sock_close(list->fd);
1952 }
1953 /* rc_ports don't have ub_socket */
1954 if(list->socket) {
1955 if(list->socket->addr)
1956 freeaddrinfo(list->socket->addr);
1957 free(list->socket);
1958 }
1959 free(list);
1960 list = nx;
1961 }
1962 }
1963
listen_get_mem(struct listen_dnsport * listen)1964 size_t listen_get_mem(struct listen_dnsport* listen)
1965 {
1966 struct listen_list* p;
1967 size_t s = sizeof(*listen) + sizeof(*listen->base) +
1968 sizeof(*listen->udp_buff) +
1969 sldns_buffer_capacity(listen->udp_buff);
1970 #ifdef USE_DNSCRYPT
1971 s += sizeof(*listen->dnscrypt_udp_buff);
1972 if(listen->udp_buff != listen->dnscrypt_udp_buff){
1973 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1974 }
1975 #endif
1976 for(p = listen->cps; p; p = p->next) {
1977 s += sizeof(*p);
1978 s += comm_point_get_mem(p->com);
1979 }
1980 return s;
1981 }
1982
listen_stop_accept(struct listen_dnsport * listen)1983 void listen_stop_accept(struct listen_dnsport* listen)
1984 {
1985 /* do not stop the ones that have no tcp_free list
1986 * (they have already stopped listening) */
1987 struct listen_list* p;
1988 for(p=listen->cps; p; p=p->next) {
1989 if(p->com->type == comm_tcp_accept &&
1990 p->com->tcp_free != NULL) {
1991 comm_point_stop_listening(p->com);
1992 }
1993 }
1994 }
1995
listen_start_accept(struct listen_dnsport * listen)1996 void listen_start_accept(struct listen_dnsport* listen)
1997 {
1998 /* do not start the ones that have no tcp_free list, it is no
1999 * use to listen to them because they have no free tcp handlers */
2000 struct listen_list* p;
2001 for(p=listen->cps; p; p=p->next) {
2002 if(p->com->type == comm_tcp_accept &&
2003 p->com->tcp_free != NULL) {
2004 comm_point_start_listening(p->com, -1, -1);
2005 }
2006 }
2007 }
2008
2009 struct tcp_req_info*
tcp_req_info_create(struct sldns_buffer * spoolbuf)2010 tcp_req_info_create(struct sldns_buffer* spoolbuf)
2011 {
2012 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
2013 if(!req) {
2014 log_err("malloc failure for new stream outoforder processing structure");
2015 return NULL;
2016 }
2017 memset(req, 0, sizeof(*req));
2018 req->spool_buffer = spoolbuf;
2019 return req;
2020 }
2021
2022 void
tcp_req_info_delete(struct tcp_req_info * req)2023 tcp_req_info_delete(struct tcp_req_info* req)
2024 {
2025 if(!req) return;
2026 tcp_req_info_clear(req);
2027 /* cp is pointer back to commpoint that owns this struct and
2028 * called delete on us */
2029 /* spool_buffer is shared udp buffer, not deleted here */
2030 free(req);
2031 }
2032
tcp_req_info_clear(struct tcp_req_info * req)2033 void tcp_req_info_clear(struct tcp_req_info* req)
2034 {
2035 struct tcp_req_open_item* open, *nopen;
2036 struct tcp_req_done_item* item, *nitem;
2037 if(!req) return;
2038
2039 /* free outstanding request mesh reply entries */
2040 open = req->open_req_list;
2041 while(open) {
2042 nopen = open->next;
2043 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
2044 free(open);
2045 open = nopen;
2046 }
2047 req->open_req_list = NULL;
2048 req->num_open_req = 0;
2049
2050 /* free pending writable result packets */
2051 item = req->done_req_list;
2052 while(item) {
2053 nitem = item->next;
2054 lock_basic_lock(&stream_wait_count_lock);
2055 stream_wait_count -= (sizeof(struct tcp_req_done_item)
2056 +item->len);
2057 lock_basic_unlock(&stream_wait_count_lock);
2058 free(item->buf);
2059 free(item);
2060 item = nitem;
2061 }
2062 req->done_req_list = NULL;
2063 req->num_done_req = 0;
2064 req->read_is_closed = 0;
2065 }
2066
2067 void
tcp_req_info_remove_mesh_state(struct tcp_req_info * req,struct mesh_state * m)2068 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
2069 {
2070 struct tcp_req_open_item* open, *prev = NULL;
2071 if(!req || !m) return;
2072 open = req->open_req_list;
2073 while(open) {
2074 if(open->mesh_state == m) {
2075 struct tcp_req_open_item* next;
2076 if(prev) prev->next = open->next;
2077 else req->open_req_list = open->next;
2078 /* caller has to manage the mesh state reply entry */
2079 next = open->next;
2080 free(open);
2081 req->num_open_req --;
2082
2083 /* prev = prev; */
2084 open = next;
2085 continue;
2086 }
2087 prev = open;
2088 open = open->next;
2089 }
2090 }
2091
2092 /** setup listening for read or write */
2093 static void
tcp_req_info_setup_listen(struct tcp_req_info * req)2094 tcp_req_info_setup_listen(struct tcp_req_info* req)
2095 {
2096 int wr = 0;
2097 int rd = 0;
2098
2099 if(req->cp->tcp_byte_count != 0) {
2100 /* cannot change, halfway through */
2101 return;
2102 }
2103
2104 if(!req->cp->tcp_is_reading)
2105 wr = 1;
2106 if(!req->read_is_closed)
2107 rd = 1;
2108
2109 if(wr) {
2110 req->cp->tcp_is_reading = 0;
2111 comm_point_stop_listening(req->cp);
2112 comm_point_start_listening(req->cp, -1,
2113 adjusted_tcp_timeout(req->cp));
2114 } else if(rd) {
2115 req->cp->tcp_is_reading = 1;
2116 comm_point_stop_listening(req->cp);
2117 comm_point_start_listening(req->cp, -1,
2118 adjusted_tcp_timeout(req->cp));
2119 /* and also read it (from SSL stack buffers), so
2120 * no event read event is expected since the remainder of
2121 * the TLS frame is sitting in the buffers. */
2122 req->read_again = 1;
2123 } else {
2124 comm_point_stop_listening(req->cp);
2125 comm_point_start_listening(req->cp, -1,
2126 adjusted_tcp_timeout(req->cp));
2127 comm_point_listen_for_rw(req->cp, 0, 0);
2128 }
2129 }
2130
2131 /** remove first item from list of pending results */
2132 static struct tcp_req_done_item*
tcp_req_info_pop_done(struct tcp_req_info * req)2133 tcp_req_info_pop_done(struct tcp_req_info* req)
2134 {
2135 struct tcp_req_done_item* item;
2136 log_assert(req->num_done_req > 0 && req->done_req_list);
2137 item = req->done_req_list;
2138 lock_basic_lock(&stream_wait_count_lock);
2139 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
2140 lock_basic_unlock(&stream_wait_count_lock);
2141 req->done_req_list = req->done_req_list->next;
2142 req->num_done_req --;
2143 return item;
2144 }
2145
2146 /** Send given buffer and setup to write */
2147 static void
tcp_req_info_start_write_buf(struct tcp_req_info * req,uint8_t * buf,size_t len)2148 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
2149 size_t len)
2150 {
2151 sldns_buffer_clear(req->cp->buffer);
2152 sldns_buffer_write(req->cp->buffer, buf, len);
2153 sldns_buffer_flip(req->cp->buffer);
2154
2155 req->cp->tcp_is_reading = 0; /* we are now writing */
2156 }
2157
2158 /** pick up the next result and start writing it to the channel */
2159 static void
tcp_req_pickup_next_result(struct tcp_req_info * req)2160 tcp_req_pickup_next_result(struct tcp_req_info* req)
2161 {
2162 if(req->num_done_req > 0) {
2163 /* unlist the done item from the list of pending results */
2164 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
2165 tcp_req_info_start_write_buf(req, item->buf, item->len);
2166 free(item->buf);
2167 free(item);
2168 }
2169 }
2170
2171 /** the read channel has closed */
2172 int
tcp_req_info_handle_read_close(struct tcp_req_info * req)2173 tcp_req_info_handle_read_close(struct tcp_req_info* req)
2174 {
2175 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
2176 /* reset byte count for (potential) partial read */
2177 req->cp->tcp_byte_count = 0;
2178 /* if we still have results to write, pick up next and write it */
2179 if(req->num_done_req != 0) {
2180 tcp_req_pickup_next_result(req);
2181 tcp_req_info_setup_listen(req);
2182 return 1;
2183 }
2184 /* if nothing to do, this closes the connection */
2185 if(req->num_open_req == 0 && req->num_done_req == 0)
2186 return 0;
2187 /* otherwise, we must be waiting for dns resolve, wait with timeout */
2188 req->read_is_closed = 1;
2189 tcp_req_info_setup_listen(req);
2190 return 1;
2191 }
2192
2193 void
tcp_req_info_handle_writedone(struct tcp_req_info * req)2194 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2195 {
2196 /* back to reading state, we finished this write event */
2197 sldns_buffer_clear(req->cp->buffer);
2198 if(req->num_done_req == 0 && req->read_is_closed) {
2199 /* no more to write and nothing to read, close it */
2200 comm_point_drop_reply(&req->cp->repinfo);
2201 return;
2202 }
2203 req->cp->tcp_is_reading = 1;
2204 /* see if another result needs writing */
2205 tcp_req_pickup_next_result(req);
2206
2207 /* see if there is more to write, if not stop_listening for writing */
2208 /* see if new requests are allowed, if so, start_listening
2209 * for reading */
2210 tcp_req_info_setup_listen(req);
2211 }
2212
2213 void
tcp_req_info_handle_readdone(struct tcp_req_info * req)2214 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2215 {
2216 struct comm_point* c = req->cp;
2217
2218 /* we want to read up several requests, unless there are
2219 * pending answers */
2220
2221 req->is_drop = 0;
2222 req->is_reply = 0;
2223 req->in_worker_handle = 1;
2224 sldns_buffer_set_limit(req->spool_buffer, 0);
2225 /* handle the current request */
2226 /* this calls the worker handle request routine that could give
2227 * a cache response, or localdata response, or drop the reply,
2228 * or schedule a mesh entry for later */
2229 fptr_ok(fptr_whitelist_comm_point(c->callback));
2230 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2231 req->in_worker_handle = 0;
2232 /* there is an answer, put it up. It is already in the
2233 * c->buffer, just send it. */
2234 /* since we were just reading a query, the channel is
2235 * clear to write to */
2236 send_it:
2237 c->tcp_is_reading = 0;
2238 comm_point_stop_listening(c);
2239 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2240 return;
2241 }
2242 req->in_worker_handle = 0;
2243 /* it should be waiting in the mesh for recursion.
2244 * If mesh failed to add a new entry and called commpoint_drop_reply.
2245 * Then the mesh state has been cleared. */
2246 if(req->is_drop) {
2247 /* the reply has been dropped, stream has been closed. */
2248 return;
2249 }
2250 /* If mesh failed(mallocfail) and called commpoint_send_reply with
2251 * something like servfail then we pick up that reply below. */
2252 if(req->is_reply) {
2253 goto send_it;
2254 }
2255
2256 sldns_buffer_clear(c->buffer);
2257 /* if pending answers, pick up an answer and start sending it */
2258 tcp_req_pickup_next_result(req);
2259
2260 /* if answers pending, start sending answers */
2261 /* read more requests if we can have more requests */
2262 tcp_req_info_setup_listen(req);
2263 }
2264
2265 int
tcp_req_info_add_meshstate(struct tcp_req_info * req,struct mesh_area * mesh,struct mesh_state * m)2266 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2267 struct mesh_area* mesh, struct mesh_state* m)
2268 {
2269 struct tcp_req_open_item* item;
2270 log_assert(req && mesh && m);
2271 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2272 if(!item) return 0;
2273 item->next = req->open_req_list;
2274 item->mesh = mesh;
2275 item->mesh_state = m;
2276 req->open_req_list = item;
2277 req->num_open_req++;
2278 return 1;
2279 }
2280
2281 /** Add a result to the result list. At the end. */
2282 static int
tcp_req_info_add_result(struct tcp_req_info * req,uint8_t * buf,size_t len)2283 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2284 {
2285 struct tcp_req_done_item* last = NULL;
2286 struct tcp_req_done_item* item;
2287 size_t space;
2288
2289 /* see if we have space */
2290 space = sizeof(struct tcp_req_done_item) + len;
2291 lock_basic_lock(&stream_wait_count_lock);
2292 if(stream_wait_count + space > stream_wait_max) {
2293 lock_basic_unlock(&stream_wait_count_lock);
2294 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2295 return 0;
2296 }
2297 stream_wait_count += space;
2298 lock_basic_unlock(&stream_wait_count_lock);
2299
2300 /* find last element */
2301 last = req->done_req_list;
2302 while(last && last->next)
2303 last = last->next;
2304
2305 /* create new element */
2306 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2307 if(!item) {
2308 log_err("malloc failure, for stream result list");
2309 return 0;
2310 }
2311 item->next = NULL;
2312 item->len = len;
2313 item->buf = memdup(buf, len);
2314 if(!item->buf) {
2315 free(item);
2316 log_err("malloc failure, adding reply to stream result list");
2317 return 0;
2318 }
2319
2320 /* link in */
2321 if(last) last->next = item;
2322 else req->done_req_list = item;
2323 req->num_done_req++;
2324 return 1;
2325 }
2326
2327 void
tcp_req_info_send_reply(struct tcp_req_info * req)2328 tcp_req_info_send_reply(struct tcp_req_info* req)
2329 {
2330 if(req->in_worker_handle) {
2331 /* reply from mesh is in the spool_buffer */
2332 /* copy now, so that the spool buffer is free for other tasks
2333 * before the callback is done */
2334 sldns_buffer_clear(req->cp->buffer);
2335 sldns_buffer_write(req->cp->buffer,
2336 sldns_buffer_begin(req->spool_buffer),
2337 sldns_buffer_limit(req->spool_buffer));
2338 sldns_buffer_flip(req->cp->buffer);
2339 req->is_reply = 1;
2340 return;
2341 }
2342 /* now that the query has been handled, that mesh_reply entry
2343 * should be removed, from the tcp_req_info list,
2344 * the mesh state cleanup removes then with region_cleanup and
2345 * replies_sent true. */
2346 /* see if we can send it straight away (we are not doing
2347 * anything else). If so, copy to buffer and start */
2348 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2349 /* buffer is free, and was ready to read new query into,
2350 * but we are now going to use it to send this answer */
2351 tcp_req_info_start_write_buf(req,
2352 sldns_buffer_begin(req->spool_buffer),
2353 sldns_buffer_limit(req->spool_buffer));
2354 /* switch to listen to write events */
2355 comm_point_stop_listening(req->cp);
2356 comm_point_start_listening(req->cp, -1,
2357 adjusted_tcp_timeout(req->cp));
2358 return;
2359 }
2360 /* queue up the answer behind the others already pending */
2361 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2362 sldns_buffer_limit(req->spool_buffer))) {
2363 /* drop the connection, we are out of resources */
2364 comm_point_drop_reply(&req->cp->repinfo);
2365 }
2366 }
2367
tcp_req_info_get_stream_buffer_size(void)2368 size_t tcp_req_info_get_stream_buffer_size(void)
2369 {
2370 size_t s;
2371 if(!stream_wait_lock_inited)
2372 return stream_wait_count;
2373 lock_basic_lock(&stream_wait_count_lock);
2374 s = stream_wait_count;
2375 lock_basic_unlock(&stream_wait_count_lock);
2376 return s;
2377 }
2378
http2_get_query_buffer_size(void)2379 size_t http2_get_query_buffer_size(void)
2380 {
2381 size_t s;
2382 if(!http2_query_buffer_lock_inited)
2383 return http2_query_buffer_count;
2384 lock_basic_lock(&http2_query_buffer_count_lock);
2385 s = http2_query_buffer_count;
2386 lock_basic_unlock(&http2_query_buffer_count_lock);
2387 return s;
2388 }
2389
http2_get_response_buffer_size(void)2390 size_t http2_get_response_buffer_size(void)
2391 {
2392 size_t s;
2393 if(!http2_response_buffer_lock_inited)
2394 return http2_response_buffer_count;
2395 lock_basic_lock(&http2_response_buffer_count_lock);
2396 s = http2_response_buffer_count;
2397 lock_basic_unlock(&http2_response_buffer_count_lock);
2398 return s;
2399 }
2400
2401 #ifdef HAVE_NGHTTP2
2402 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
http2_submit_response_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2403 static ssize_t http2_submit_response_read_callback(
2404 nghttp2_session* ATTR_UNUSED(session),
2405 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2406 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2407 {
2408 struct http2_stream* h2_stream;
2409 struct http2_session* h2_session = source->ptr;
2410 size_t copylen = length;
2411 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2412 h2_session->session, stream_id))) {
2413 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2414 "stream");
2415 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2416 }
2417 if(!h2_stream->rbuffer ||
2418 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2419 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2420 "available in rbuffer");
2421 /* rbuffer will be free'd in frame close cb */
2422 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2423 }
2424
2425 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2426 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2427 if(copylen > SSIZE_MAX)
2428 copylen = SSIZE_MAX; /* will probably never happen */
2429
2430 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2431 sldns_buffer_skip(h2_stream->rbuffer, copylen);
2432
2433 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2434 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2435 lock_basic_lock(&http2_response_buffer_count_lock);
2436 http2_response_buffer_count -=
2437 sldns_buffer_capacity(h2_stream->rbuffer);
2438 lock_basic_unlock(&http2_response_buffer_count_lock);
2439 sldns_buffer_free(h2_stream->rbuffer);
2440 h2_stream->rbuffer = NULL;
2441 }
2442
2443 return copylen;
2444 }
2445
2446 /**
2447 * Send RST_STREAM frame for stream.
2448 * @param h2_session: http2 session to submit frame to
2449 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2450 * @return 0 on error, 1 otherwise
2451 */
http2_submit_rst_stream(struct http2_session * h2_session,struct http2_stream * h2_stream)2452 static int http2_submit_rst_stream(struct http2_session* h2_session,
2453 struct http2_stream* h2_stream)
2454 {
2455 int ret = nghttp2_submit_rst_stream(h2_session->session,
2456 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2457 NGHTTP2_INTERNAL_ERROR);
2458 if(ret) {
2459 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2460 "error: %s", nghttp2_strerror(ret));
2461 return 0;
2462 }
2463 return 1;
2464 }
2465
2466 /**
2467 * DNS response ready to be submitted to nghttp2, to be prepared for sending
2468 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2469 * might be used before this will be sent out.
2470 * @param h2_session: http2 session, containing c->buffer which contains answer
2471 * @return 0 on error, 1 otherwise
2472 */
http2_submit_dns_response(struct http2_session * h2_session)2473 int http2_submit_dns_response(struct http2_session* h2_session)
2474 {
2475 int ret;
2476 nghttp2_data_provider data_prd;
2477 char status[4];
2478 nghttp2_nv headers[3];
2479 struct http2_stream* h2_stream = h2_session->c->h2_stream;
2480 size_t rlen;
2481 char rlen_str[32];
2482
2483 if(h2_stream->rbuffer) {
2484 log_err("http2 submit response error: rbuffer already "
2485 "exists");
2486 return 0;
2487 }
2488 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2489 log_err("http2 submit response error: c->buffer not complete");
2490 return 0;
2491 }
2492
2493 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2494 verbose(VERB_QUERY, "http2: submit response error: "
2495 "invalid status");
2496 return 0;
2497 }
2498
2499 rlen = sldns_buffer_remaining(h2_session->c->buffer);
2500 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2501
2502 lock_basic_lock(&http2_response_buffer_count_lock);
2503 if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2504 lock_basic_unlock(&http2_response_buffer_count_lock);
2505 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2506 "in https-response-buffer-size");
2507 return http2_submit_rst_stream(h2_session, h2_stream);
2508 }
2509 http2_response_buffer_count += rlen;
2510 lock_basic_unlock(&http2_response_buffer_count_lock);
2511
2512 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2513 lock_basic_lock(&http2_response_buffer_count_lock);
2514 http2_response_buffer_count -= rlen;
2515 lock_basic_unlock(&http2_response_buffer_count_lock);
2516 log_err("http2 submit response error: malloc failure");
2517 return 0;
2518 }
2519
2520 headers[0].name = (uint8_t*)":status";
2521 headers[0].namelen = 7;
2522 headers[0].value = (uint8_t*)status;
2523 headers[0].valuelen = 3;
2524 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2525
2526 headers[1].name = (uint8_t*)"content-type";
2527 headers[1].namelen = 12;
2528 headers[1].value = (uint8_t*)"application/dns-message";
2529 headers[1].valuelen = 23;
2530 headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2531
2532 headers[2].name = (uint8_t*)"content-length";
2533 headers[2].namelen = 14;
2534 headers[2].value = (uint8_t*)rlen_str;
2535 headers[2].valuelen = strlen(rlen_str);
2536 headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2537
2538 sldns_buffer_write(h2_stream->rbuffer,
2539 sldns_buffer_current(h2_session->c->buffer),
2540 sldns_buffer_remaining(h2_session->c->buffer));
2541 sldns_buffer_flip(h2_stream->rbuffer);
2542
2543 data_prd.source.ptr = h2_session;
2544 data_prd.read_callback = http2_submit_response_read_callback;
2545 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2546 headers, 3, &data_prd);
2547 if(ret) {
2548 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2549 "error: %s", nghttp2_strerror(ret));
2550 return 0;
2551 }
2552 return 1;
2553 }
2554 #else
http2_submit_dns_response(void * ATTR_UNUSED (v))2555 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2556 {
2557 return 0;
2558 }
2559 #endif
2560
2561 #ifdef HAVE_NGHTTP2
2562 /** HTTP status to descriptive string */
http_status_to_str(enum http_status s)2563 static char* http_status_to_str(enum http_status s)
2564 {
2565 switch(s) {
2566 case HTTP_STATUS_OK:
2567 return "OK";
2568 case HTTP_STATUS_BAD_REQUEST:
2569 return "Bad Request";
2570 case HTTP_STATUS_NOT_FOUND:
2571 return "Not Found";
2572 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2573 return "Payload Too Large";
2574 case HTTP_STATUS_URI_TOO_LONG:
2575 return "URI Too Long";
2576 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2577 return "Unsupported Media Type";
2578 case HTTP_STATUS_NOT_IMPLEMENTED:
2579 return "Not Implemented";
2580 }
2581 return "Status Unknown";
2582 }
2583
2584 /** nghttp2 callback. Used to copy error message to nghttp2 session */
http2_submit_error_read_callback(nghttp2_session * ATTR_UNUSED (session),int32_t stream_id,uint8_t * buf,size_t length,uint32_t * data_flags,nghttp2_data_source * source,void * ATTR_UNUSED (cb_arg))2585 static ssize_t http2_submit_error_read_callback(
2586 nghttp2_session* ATTR_UNUSED(session),
2587 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2588 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2589 {
2590 struct http2_stream* h2_stream;
2591 struct http2_session* h2_session = source->ptr;
2592 char* msg;
2593 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2594 h2_session->session, stream_id))) {
2595 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2596 "stream");
2597 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2598 }
2599 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2600 msg = http_status_to_str(h2_stream->status);
2601 if(length < strlen(msg))
2602 return 0; /* not worth trying over multiple frames */
2603 memcpy(buf, msg, strlen(msg));
2604 return strlen(msg);
2605
2606 }
2607
2608 /**
2609 * HTTP error response ready to be submitted to nghttp2, to be prepared for
2610 * sending out. Message body will contain descriptive string for HTTP status.
2611 * @param h2_session: http2 session to submit to
2612 * @param h2_stream: http2 stream containing HTTP status to use for error
2613 * @return 0 on error, 1 otherwise
2614 */
http2_submit_error(struct http2_session * h2_session,struct http2_stream * h2_stream)2615 static int http2_submit_error(struct http2_session* h2_session,
2616 struct http2_stream* h2_stream)
2617 {
2618 int ret;
2619 char status[4];
2620 nghttp2_data_provider data_prd;
2621 nghttp2_nv headers[1]; /* will be copied by nghttp */
2622 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2623 verbose(VERB_QUERY, "http2: submit error failed, "
2624 "invalid status");
2625 return 0;
2626 }
2627 headers[0].name = (uint8_t*)":status";
2628 headers[0].namelen = 7;
2629 headers[0].value = (uint8_t*)status;
2630 headers[0].valuelen = 3;
2631 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2632
2633 data_prd.source.ptr = h2_session;
2634 data_prd.read_callback = http2_submit_error_read_callback;
2635
2636 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2637 headers, 1, &data_prd);
2638 if(ret) {
2639 verbose(VERB_QUERY, "http2: submit error failed, "
2640 "error: %s", nghttp2_strerror(ret));
2641 return 0;
2642 }
2643 return 1;
2644 }
2645
2646 /**
2647 * Start query handling. Query is stored in the stream, and will be free'd here.
2648 * @param h2_session: http2 session, containing comm point
2649 * @param h2_stream: stream containing buffered query
2650 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2651 * reply available (yet).
2652 */
http2_query_read_done(struct http2_session * h2_session,struct http2_stream * h2_stream)2653 static int http2_query_read_done(struct http2_session* h2_session,
2654 struct http2_stream* h2_stream)
2655 {
2656 log_assert(h2_stream->qbuffer);
2657
2658 if(h2_session->c->h2_stream) {
2659 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2660 "buffer already assigned to stream");
2661 return -1;
2662 }
2663
2664 /* the c->buffer might be used by mesh_send_reply and no be cleard
2665 * need to be cleared before use */
2666 sldns_buffer_clear(h2_session->c->buffer);
2667 if(sldns_buffer_remaining(h2_session->c->buffer) <
2668 sldns_buffer_remaining(h2_stream->qbuffer)) {
2669 /* qbuffer will be free'd in frame close cb */
2670 sldns_buffer_clear(h2_session->c->buffer);
2671 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2672 "qbuffer in c->buffer");
2673 return -1;
2674 }
2675
2676 sldns_buffer_write(h2_session->c->buffer,
2677 sldns_buffer_current(h2_stream->qbuffer),
2678 sldns_buffer_remaining(h2_stream->qbuffer));
2679
2680 lock_basic_lock(&http2_query_buffer_count_lock);
2681 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2682 lock_basic_unlock(&http2_query_buffer_count_lock);
2683 sldns_buffer_free(h2_stream->qbuffer);
2684 h2_stream->qbuffer = NULL;
2685
2686 sldns_buffer_flip(h2_session->c->buffer);
2687 h2_session->c->h2_stream = h2_stream;
2688 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2689 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2690 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2691 return 1; /* answer in c->buffer */
2692 }
2693 sldns_buffer_clear(h2_session->c->buffer);
2694 h2_session->c->h2_stream = NULL;
2695 return 0; /* mesh state added, or dropped */
2696 }
2697
2698 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2699 * stream. Gather collected request data and start query handling. */
http2_req_frame_recv_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2700 static int http2_req_frame_recv_cb(nghttp2_session* session,
2701 const nghttp2_frame* frame, void* cb_arg)
2702 {
2703 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2704 struct http2_stream* h2_stream;
2705 int query_read_done;
2706
2707 if((frame->hd.type != NGHTTP2_DATA &&
2708 frame->hd.type != NGHTTP2_HEADERS) ||
2709 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2710 return 0;
2711 }
2712
2713 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2714 session, frame->hd.stream_id)))
2715 return 0;
2716
2717 if(h2_stream->invalid_endpoint) {
2718 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2719 goto submit_http_error;
2720 }
2721
2722 if(h2_stream->invalid_content_type) {
2723 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2724 goto submit_http_error;
2725 }
2726
2727 if(h2_stream->http_method != HTTP_METHOD_GET &&
2728 h2_stream->http_method != HTTP_METHOD_POST) {
2729 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2730 goto submit_http_error;
2731 }
2732
2733 if(h2_stream->query_too_large) {
2734 if(h2_stream->http_method == HTTP_METHOD_POST)
2735 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2736 else
2737 h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2738 goto submit_http_error;
2739 }
2740
2741 if(!h2_stream->qbuffer) {
2742 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2743 goto submit_http_error;
2744 }
2745
2746 if(h2_stream->status) {
2747 submit_http_error:
2748 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2749 "%d", h2_stream->status);
2750 if(!http2_submit_error(h2_session, h2_stream)) {
2751 return NGHTTP2_ERR_CALLBACK_FAILURE;
2752 }
2753 return 0;
2754 }
2755 h2_stream->status = HTTP_STATUS_OK;
2756
2757 sldns_buffer_flip(h2_stream->qbuffer);
2758 h2_session->postpone_drop = 1;
2759 query_read_done = http2_query_read_done(h2_session, h2_stream);
2760 if(query_read_done < 0)
2761 return NGHTTP2_ERR_CALLBACK_FAILURE;
2762 else if(!query_read_done) {
2763 if(h2_session->is_drop) {
2764 /* connection needs to be closed. Return failure to make
2765 * sure no other action are taken anymore on comm point.
2766 * failure will result in reclaiming (and closing)
2767 * of comm point. */
2768 verbose(VERB_QUERY, "http2 query dropped in worker cb");
2769 h2_session->postpone_drop = 0;
2770 return NGHTTP2_ERR_CALLBACK_FAILURE;
2771 }
2772 /* nothing to submit right now, query added to mesh. */
2773 h2_session->postpone_drop = 0;
2774 return 0;
2775 }
2776 if(!http2_submit_dns_response(h2_session)) {
2777 sldns_buffer_clear(h2_session->c->buffer);
2778 h2_session->c->h2_stream = NULL;
2779 return NGHTTP2_ERR_CALLBACK_FAILURE;
2780 }
2781 verbose(VERB_QUERY, "http2 query submitted to session");
2782 sldns_buffer_clear(h2_session->c->buffer);
2783 h2_session->c->h2_stream = NULL;
2784 return 0;
2785 }
2786
2787 /** nghttp2 callback. Used to detect start of new streams. */
http2_req_begin_headers_cb(nghttp2_session * session,const nghttp2_frame * frame,void * cb_arg)2788 static int http2_req_begin_headers_cb(nghttp2_session* session,
2789 const nghttp2_frame* frame, void* cb_arg)
2790 {
2791 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2792 struct http2_stream* h2_stream;
2793 int ret;
2794 if(frame->hd.type != NGHTTP2_HEADERS ||
2795 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2796 /* only interested in request headers */
2797 return 0;
2798 }
2799 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2800 log_err("malloc failure while creating http2 stream");
2801 return NGHTTP2_ERR_CALLBACK_FAILURE;
2802 }
2803 http2_session_add_stream(h2_session, h2_stream);
2804 ret = nghttp2_session_set_stream_user_data(session,
2805 frame->hd.stream_id, h2_stream);
2806 if(ret) {
2807 /* stream does not exist */
2808 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2809 "error: %s", nghttp2_strerror(ret));
2810 return NGHTTP2_ERR_CALLBACK_FAILURE;
2811 }
2812
2813 return 0;
2814 }
2815
2816 /**
2817 * base64url decode, store in qbuffer
2818 * @param h2_session: http2 session
2819 * @param h2_stream: http2 stream
2820 * @param start: start of the base64 string
2821 * @param length: length of the base64 string
2822 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2823 * buffer will be NULL is unparseble.
2824 */
http2_buffer_uri_query(struct http2_session * h2_session,struct http2_stream * h2_stream,const uint8_t * start,size_t length)2825 static int http2_buffer_uri_query(struct http2_session* h2_session,
2826 struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2827 {
2828 size_t expectb64len;
2829 int b64len;
2830 if(h2_stream->http_method == HTTP_METHOD_POST)
2831 return 1;
2832 if(length == 0)
2833 return 1;
2834 if(h2_stream->qbuffer) {
2835 verbose(VERB_ALGO, "http2_req_header fail, "
2836 "qbuffer already set");
2837 return 0;
2838 }
2839
2840 /* calculate size, might be a bit bigger than the real
2841 * decoded buffer size */
2842 expectb64len = sldns_b64_pton_calculate_size(length);
2843 log_assert(expectb64len > 0);
2844 if(expectb64len >
2845 h2_session->c->http2_stream_max_qbuffer_size) {
2846 h2_stream->query_too_large = 1;
2847 return 1;
2848 }
2849
2850 lock_basic_lock(&http2_query_buffer_count_lock);
2851 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2852 lock_basic_unlock(&http2_query_buffer_count_lock);
2853 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2854 "in http2-query-buffer-size");
2855 return http2_submit_rst_stream(h2_session, h2_stream);
2856 }
2857 http2_query_buffer_count += expectb64len;
2858 lock_basic_unlock(&http2_query_buffer_count_lock);
2859 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2860 lock_basic_lock(&http2_query_buffer_count_lock);
2861 http2_query_buffer_count -= expectb64len;
2862 lock_basic_unlock(&http2_query_buffer_count_lock);
2863 log_err("http2_req_header fail, qbuffer "
2864 "malloc failure");
2865 return 0;
2866 }
2867
2868 if(sldns_b64_contains_nonurl((char const*)start, length)) {
2869 char buf[65536+4];
2870 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2871 /* copy to the scratch buffer temporarily to terminate the
2872 * string with a zero */
2873 if(length+1 > sizeof(buf)) {
2874 /* too long */
2875 lock_basic_lock(&http2_query_buffer_count_lock);
2876 http2_query_buffer_count -= expectb64len;
2877 lock_basic_unlock(&http2_query_buffer_count_lock);
2878 sldns_buffer_free(h2_stream->qbuffer);
2879 h2_stream->qbuffer = NULL;
2880 return 1;
2881 }
2882 memmove(buf, start, length);
2883 buf[length] = 0;
2884 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2885 h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2886 lock_basic_lock(&http2_query_buffer_count_lock);
2887 http2_query_buffer_count -= expectb64len;
2888 lock_basic_unlock(&http2_query_buffer_count_lock);
2889 sldns_buffer_free(h2_stream->qbuffer);
2890 h2_stream->qbuffer = NULL;
2891 return 1;
2892 }
2893 } else {
2894 if(!(b64len = sldns_b64url_pton(
2895 (char const *)start, length,
2896 sldns_buffer_current(h2_stream->qbuffer),
2897 expectb64len)) || b64len < 0) {
2898 lock_basic_lock(&http2_query_buffer_count_lock);
2899 http2_query_buffer_count -= expectb64len;
2900 lock_basic_unlock(&http2_query_buffer_count_lock);
2901 sldns_buffer_free(h2_stream->qbuffer);
2902 h2_stream->qbuffer = NULL;
2903 /* return without error, method can be an
2904 * unknown POST */
2905 return 1;
2906 }
2907 }
2908 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2909 return 1;
2910 }
2911
2912 /** nghttp2 callback. Used to parse headers from HEADER frames. */
http2_req_header_cb(nghttp2_session * session,const nghttp2_frame * frame,const uint8_t * name,size_t namelen,const uint8_t * value,size_t valuelen,uint8_t ATTR_UNUSED (flags),void * cb_arg)2913 static int http2_req_header_cb(nghttp2_session* session,
2914 const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2915 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2916 void* cb_arg)
2917 {
2918 struct http2_stream* h2_stream = NULL;
2919 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2920 /* nghttp2 deals with CONTINUATION frames and provides them as part of
2921 * the HEADER */
2922 if(frame->hd.type != NGHTTP2_HEADERS ||
2923 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2924 /* only interested in request headers */
2925 return 0;
2926 }
2927 if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2928 frame->hd.stream_id)))
2929 return 0;
2930
2931 /* earlier checks already indicate we can stop handling this query */
2932 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2933 h2_stream->invalid_content_type ||
2934 h2_stream->invalid_endpoint)
2935 return 0;
2936
2937
2938 /* nghttp2 performs some sanity checks in the headers, including:
2939 * name and value are guaranteed to be null terminated
2940 * name is guaranteed to be lowercase
2941 * content-length value is guaranteed to contain digits
2942 */
2943
2944 if(!h2_stream->http_method && namelen == 7 &&
2945 memcmp(":method", name, namelen) == 0) {
2946 /* Case insensitive check on :method value to be on the safe
2947 * side. I failed to find text about case sensitivity in specs.
2948 */
2949 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2950 h2_stream->http_method = HTTP_METHOD_GET;
2951 else if(valuelen == 4 &&
2952 strcasecmp("POST", (const char*)value) == 0) {
2953 h2_stream->http_method = HTTP_METHOD_POST;
2954 if(h2_stream->qbuffer) {
2955 /* POST method uses query from DATA frames */
2956 lock_basic_lock(&http2_query_buffer_count_lock);
2957 http2_query_buffer_count -=
2958 sldns_buffer_capacity(h2_stream->qbuffer);
2959 lock_basic_unlock(&http2_query_buffer_count_lock);
2960 sldns_buffer_free(h2_stream->qbuffer);
2961 h2_stream->qbuffer = NULL;
2962 }
2963 } else
2964 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2965 return 0;
2966 }
2967 if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2968 /* :path may contain DNS query, depending on method. Method might
2969 * not be known yet here, so check after finishing receiving
2970 * stream. */
2971 #define HTTP_QUERY_PARAM "?dns="
2972 size_t el = strlen(h2_session->c->http_endpoint);
2973 size_t qpl = strlen(HTTP_QUERY_PARAM);
2974
2975 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2976 value, el) != 0) {
2977 h2_stream->invalid_endpoint = 1;
2978 return 0;
2979 }
2980 /* larger than endpoint only allowed if it is for the query
2981 * parameter */
2982 if(valuelen <= el+qpl ||
2983 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2984 if(valuelen != el)
2985 h2_stream->invalid_endpoint = 1;
2986 return 0;
2987 }
2988
2989 if(!http2_buffer_uri_query(h2_session, h2_stream,
2990 value+(el+qpl), valuelen-(el+qpl))) {
2991 return NGHTTP2_ERR_CALLBACK_FAILURE;
2992 }
2993 return 0;
2994 }
2995 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2996 * and not needed when using GET. Don't enfore.
2997 * If set only allow lowercase "application/dns-message".
2998 *
2999 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
3000 * be able to handle "application/dns-message". Since that is the only
3001 * content-type supported we can ignore the accept header.
3002 */
3003 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
3004 if(valuelen != 23 || memcmp("application/dns-message", value,
3005 valuelen) != 0) {
3006 h2_stream->invalid_content_type = 1;
3007 }
3008 }
3009
3010 /* Only interested in content-lentg for POST (on not yet known) method.
3011 */
3012 if((!h2_stream->http_method ||
3013 h2_stream->http_method == HTTP_METHOD_POST) &&
3014 !h2_stream->content_length && namelen == 14 &&
3015 memcmp("content-length", name, namelen) == 0) {
3016 if(valuelen > 5) {
3017 h2_stream->query_too_large = 1;
3018 return 0;
3019 }
3020 /* guaranteed to only contain digits and be null terminated */
3021 h2_stream->content_length = atoi((const char*)value);
3022 if(h2_stream->content_length >
3023 h2_session->c->http2_stream_max_qbuffer_size) {
3024 h2_stream->query_too_large = 1;
3025 return 0;
3026 }
3027 }
3028 return 0;
3029 }
3030
3031 /** nghttp2 callback. Used to get data from DATA frames, which can contain
3032 * queries in POST requests. */
http2_req_data_chunk_recv_cb(nghttp2_session * ATTR_UNUSED (session),uint8_t ATTR_UNUSED (flags),int32_t stream_id,const uint8_t * data,size_t len,void * cb_arg)3033 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
3034 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
3035 size_t len, void* cb_arg)
3036 {
3037 struct http2_session* h2_session = (struct http2_session*)cb_arg;
3038 struct http2_stream* h2_stream;
3039 size_t qlen = 0;
3040
3041 if(!(h2_stream = nghttp2_session_get_stream_user_data(
3042 h2_session->session, stream_id))) {
3043 return 0;
3044 }
3045
3046 if(h2_stream->query_too_large)
3047 return 0;
3048
3049 if(!h2_stream->qbuffer) {
3050 if(h2_stream->content_length) {
3051 if(h2_stream->content_length < len)
3052 /* getting more data in DATA frame than
3053 * advertised in content-length header. */
3054 return NGHTTP2_ERR_CALLBACK_FAILURE;
3055 qlen = h2_stream->content_length;
3056 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
3057 /* setting this to msg-buffer-size can result in a lot
3058 * of memory consuption. Most queries should fit in a
3059 * single DATA frame, and most POST queries will
3060 * contain content-length which does not impose this
3061 * limit. */
3062 qlen = len;
3063 }
3064 }
3065 if(!h2_stream->qbuffer && qlen) {
3066 lock_basic_lock(&http2_query_buffer_count_lock);
3067 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
3068 lock_basic_unlock(&http2_query_buffer_count_lock);
3069 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
3070 "in http2-query-buffer-size");
3071 return http2_submit_rst_stream(h2_session, h2_stream);
3072 }
3073 http2_query_buffer_count += qlen;
3074 lock_basic_unlock(&http2_query_buffer_count_lock);
3075 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
3076 lock_basic_lock(&http2_query_buffer_count_lock);
3077 http2_query_buffer_count -= qlen;
3078 lock_basic_unlock(&http2_query_buffer_count_lock);
3079 }
3080 }
3081
3082 if(!h2_stream->qbuffer ||
3083 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
3084 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
3085 "buffer space for POST query. Can happen on multi "
3086 "frame requests without content-length header");
3087 h2_stream->query_too_large = 1;
3088 return 0;
3089 }
3090
3091 sldns_buffer_write(h2_stream->qbuffer, data, len);
3092
3093 return 0;
3094 }
3095
http2_req_stream_clear(struct http2_stream * h2_stream)3096 void http2_req_stream_clear(struct http2_stream* h2_stream)
3097 {
3098 if(h2_stream->qbuffer) {
3099 lock_basic_lock(&http2_query_buffer_count_lock);
3100 http2_query_buffer_count -=
3101 sldns_buffer_capacity(h2_stream->qbuffer);
3102 lock_basic_unlock(&http2_query_buffer_count_lock);
3103 sldns_buffer_free(h2_stream->qbuffer);
3104 h2_stream->qbuffer = NULL;
3105 }
3106 if(h2_stream->rbuffer) {
3107 lock_basic_lock(&http2_response_buffer_count_lock);
3108 http2_response_buffer_count -=
3109 sldns_buffer_capacity(h2_stream->rbuffer);
3110 lock_basic_unlock(&http2_response_buffer_count_lock);
3111 sldns_buffer_free(h2_stream->rbuffer);
3112 h2_stream->rbuffer = NULL;
3113 }
3114 }
3115
http2_req_callbacks_create(void)3116 nghttp2_session_callbacks* http2_req_callbacks_create(void)
3117 {
3118 nghttp2_session_callbacks *callbacks;
3119 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
3120 log_err("failed to initialize nghttp2 callback");
3121 return NULL;
3122 }
3123 /* reception of header block started, used to create h2_stream */
3124 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
3125 http2_req_begin_headers_cb);
3126 /* complete frame received, used to get data from stream if frame
3127 * has end stream flag, and start processing query */
3128 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
3129 http2_req_frame_recv_cb);
3130 /* get request info from headers */
3131 nghttp2_session_callbacks_set_on_header_callback(callbacks,
3132 http2_req_header_cb);
3133 /* get data from DATA frames, containing POST query */
3134 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
3135 http2_req_data_chunk_recv_cb);
3136
3137 /* generic HTTP2 callbacks */
3138 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
3139 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
3140 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
3141 http2_stream_close_cb);
3142
3143 return callbacks;
3144 }
3145 #endif /* HAVE_NGHTTP2 */
3146