xref: /openbsd-src/usr.sbin/nsd/server.c (revision 24bb5fcea3ed904bc467217bdaadb5dfc618d5bf)
1 /*
2  * server.c -- nsd(8) network input/output
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/param.h>
14 #include <limits.h>
15 #include <sys/socket.h>
16 #include <sys/uio.h>
17 #include <sys/wait.h>
18 
19 #include <netinet/in.h>
20 #ifdef USE_TCP_FASTOPEN
21   #include <netinet/tcp.h>
22 #endif
23 #include <arpa/inet.h>
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 #include <signal.h>
36 #include <netdb.h>
37 #include <poll.h>
38 #ifdef HAVE_SYS_RANDOM_H
39 #include <sys/random.h>
40 #endif
41 #ifndef SHUT_WR
42 #define SHUT_WR 1
43 #endif
44 #ifdef HAVE_MMAP
45 #include <sys/mman.h>
46 #endif /* HAVE_MMAP */
47 #ifdef HAVE_OPENSSL_RAND_H
48 #include <openssl/rand.h>
49 #endif
50 #ifdef HAVE_OPENSSL_SSL_H
51 #include <openssl/ssl.h>
52 #endif
53 #ifdef HAVE_OPENSSL_ERR_H
54 #include <openssl/err.h>
55 #endif
56 #ifdef HAVE_OPENSSL_OCSP_H
57 #include <openssl/ocsp.h>
58 #endif
59 #ifndef USE_MINI_EVENT
60 #  ifdef HAVE_EVENT_H
61 #    include <event.h>
62 #  else
63 #    include <event2/event.h>
64 #    include "event2/event_struct.h"
65 #    include "event2/event_compat.h"
66 #  endif
67 #else
68 #  include "mini_event.h"
69 #endif
70 
71 #include "axfr.h"
72 #include "namedb.h"
73 #include "netio.h"
74 #include "xfrd.h"
75 #include "xfrd-tcp.h"
76 #include "xfrd-disk.h"
77 #include "difffile.h"
78 #include "nsec3.h"
79 #include "ipc.h"
80 #include "udb.h"
81 #include "remote.h"
82 #include "lookup3.h"
83 #include "rrl.h"
84 #ifdef USE_DNSTAP
85 #include "dnstap/dnstap_collector.h"
86 #endif
87 
88 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
89 
90 #ifdef USE_DNSTAP
91 /*
92  * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content
93  * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*)
94  */
95 static void
96 log_addr(const char* descr,
97 #ifdef INET6
98 	struct sockaddr_storage* addr,
99 #else
100 	struct sockaddr_in* addr,
101 #endif
102 	short family)
103 {
104 	char str_buf[64];
105 	if(verbosity < 6)
106 		return;
107 	if(family == AF_INET) {
108 		struct sockaddr_in* s = (struct sockaddr_in*)addr;
109 		inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf));
110 		VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port)));
111 #ifdef INET6
112 	} else {
113 		struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr;
114 		inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf));
115 		VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port)));
116 #endif
117 	}
118 }
119 #endif /* USE_DNSTAP */
120 
121 #ifdef USE_TCP_FASTOPEN
122   #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen"
123   #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2
124 #endif
125 
126 /*
127  * Data for the UDP handlers.
128  */
129 struct udp_handler_data
130 {
131 	struct nsd        *nsd;
132 	struct nsd_socket *socket;
133 	struct event       event;
134 };
135 
136 struct tcp_accept_handler_data {
137 	struct nsd        *nsd;
138 	struct nsd_socket *socket;
139 	int                event_added;
140 	struct event       event;
141 #ifdef HAVE_SSL
142 	/* handler accepts TLS connections on the dedicated port */
143 	int                tls_accept;
144 #endif
145 };
146 
147 /*
148  * These globals are used to enable the TCP accept handlers
149  * when the number of TCP connection drops below the maximum
150  * number of TCP connections.
151  */
152 static size_t tcp_accept_handler_count;
153 static struct tcp_accept_handler_data *tcp_accept_handlers;
154 
155 static struct event slowaccept_event;
156 static int slowaccept;
157 
158 #ifdef HAVE_SSL
159 static unsigned char *ocspdata = NULL;
160 static long ocspdata_len = 0;
161 #endif
162 
163 #ifdef NONBLOCKING_IS_BROKEN
164 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to
165    read multiple times from a socket when reported ready by select. */
166 # define NUM_RECV_PER_SELECT (1)
167 #else /* !NONBLOCKING_IS_BROKEN */
168 # define NUM_RECV_PER_SELECT (100)
169 #endif /* NONBLOCKING_IS_BROKEN */
170 
171 #ifndef HAVE_MMSGHDR
172 struct mmsghdr {
173 	struct msghdr msg_hdr;
174 	unsigned int  msg_len;
175 };
176 #endif
177 
178 static struct mmsghdr msgs[NUM_RECV_PER_SELECT];
179 static struct iovec iovecs[NUM_RECV_PER_SELECT];
180 static struct query *queries[NUM_RECV_PER_SELECT];
181 
182 /*
183  * Data for the TCP connection handlers.
184  *
185  * The TCP handlers use non-blocking I/O.  This is necessary to avoid
186  * blocking the entire server on a slow TCP connection, but does make
187  * reading from and writing to the socket more complicated.
188  *
189  * Basically, whenever a read/write would block (indicated by the
190  * EAGAIN errno variable) we remember the position we were reading
191  * from/writing to and return from the TCP reading/writing event
192  * handler.  When the socket becomes readable/writable again we
193  * continue from the same position.
194  */
195 struct tcp_handler_data
196 {
197 	/*
198 	 * The region used to allocate all TCP connection related
199 	 * data, including this structure.  This region is destroyed
200 	 * when the connection is closed.
201 	 */
202 	region_type*		region;
203 
204 	/*
205 	 * The global nsd structure.
206 	 */
207 	struct nsd*			nsd;
208 
209 	/*
210 	 * The current query data for this TCP connection.
211 	 */
212 	query_type*			query;
213 
214 	/*
215 	 * The query_state is used to remember if we are performing an
216 	 * AXFR, if we're done processing, or if we should discard the
217 	 * query and connection.
218 	 */
219 	query_state_type	query_state;
220 
221 	/*
222 	 * The event for the file descriptor and tcp timeout
223 	 */
224 	struct event event;
225 
226 	/*
227 	 * The bytes_transmitted field is used to remember the number
228 	 * of bytes transmitted when receiving or sending a DNS
229 	 * packet.  The count includes the two additional bytes used
230 	 * to specify the packet length on a TCP connection.
231 	 */
232 	size_t				bytes_transmitted;
233 
234 	/*
235 	 * The number of queries handled by this specific TCP connection.
236 	 */
237 	int					query_count;
238 
239 	/*
240 	 * The timeout in msec for this tcp connection
241 	 */
242 	int	tcp_timeout;
243 
244 	/*
245 	 * If the connection is allowed to have further queries on it.
246 	 */
247 	int tcp_no_more_queries;
248 
249 #ifdef USE_DNSTAP
250 	/* the socket of the accept socket to find proper service (local) address the socket is bound to. */
251 	struct nsd_socket *socket;
252 #endif /* USE_DNSTAP */
253 
254 #ifdef HAVE_SSL
255 	/*
256 	 * TLS object.
257 	 */
258 	SSL* tls;
259 
260 	/*
261 	 * TLS handshake state.
262 	 */
263 	enum { tls_hs_none, tls_hs_read, tls_hs_write,
264 		tls_hs_read_event, tls_hs_write_event } shake_state;
265 #endif
266 	/* list of connections, for service of remaining tcp channels */
267 	struct tcp_handler_data *prev, *next;
268 };
269 /* global that is the list of active tcp channels */
270 static struct tcp_handler_data *tcp_active_list = NULL;
271 
272 /*
273  * Handle incoming queries on the UDP server sockets.
274  */
275 static void handle_udp(int fd, short event, void* arg);
276 
277 /*
278  * Handle incoming connections on the TCP sockets.  These handlers
279  * usually wait for the NETIO_EVENT_READ event (indicating an incoming
280  * connection) but are disabled when the number of current TCP
281  * connections is equal to the maximum number of TCP connections.
282  * Disabling is done by changing the handler to wait for the
283  * NETIO_EVENT_NONE type.  This is done using the function
284  * configure_tcp_accept_handlers.
285  */
286 static void handle_tcp_accept(int fd, short event, void* arg);
287 
288 /*
289  * Handle incoming queries on a TCP connection.  The TCP connections
290  * are configured to be non-blocking and the handler may be called
291  * multiple times before a complete query is received.
292  */
293 static void handle_tcp_reading(int fd, short event, void* arg);
294 
295 /*
296  * Handle outgoing responses on a TCP connection.  The TCP connections
297  * are configured to be non-blocking and the handler may be called
298  * multiple times before a complete response is sent.
299  */
300 static void handle_tcp_writing(int fd, short event, void* arg);
301 
302 #ifdef HAVE_SSL
303 /* Create SSL object and associate fd */
304 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd);
305 /*
306  * Handle TLS handshake. May be called multiple times if incomplete.
307  */
308 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing);
309 
310 /*
311  * Handle incoming queries on a TLS over TCP connection.  The TLS
312  * connections are configured to be non-blocking and the handler may
313  * be called multiple times before a complete query is received.
314  */
315 static void handle_tls_reading(int fd, short event, void* arg);
316 
317 /*
318  * Handle outgoing responses on a TLS over TCP connection.  The TLS
319  * connections are configured to be non-blocking and the handler may
320  * be called multiple times before a complete response is sent.
321  */
322 static void handle_tls_writing(int fd, short event, void* arg);
323 #endif
324 
325 /*
326  * Send all children the quit nonblocking, then close pipe.
327  */
328 static void send_children_quit(struct nsd* nsd);
329 /* same, for shutdown time, waits for child to exit to avoid restart issues */
330 static void send_children_quit_and_wait(struct nsd* nsd);
331 
332 /* set childrens flags to send NSD_STATS to them */
333 #ifdef BIND8_STATS
334 static void set_children_stats(struct nsd* nsd);
335 #endif /* BIND8_STATS */
336 
337 /*
338  * Change the event types the HANDLERS are interested in to EVENT_TYPES.
339  */
340 static void configure_handler_event_types(short event_types);
341 
342 static uint16_t *compressed_dname_offsets = 0;
343 static uint32_t compression_table_capacity = 0;
344 static uint32_t compression_table_size = 0;
345 static domain_type* compressed_dnames[MAXRRSPP];
346 
347 #ifdef USE_TCP_FASTOPEN
348 /* Checks to see if the kernel value must be manually changed in order for
349    TCP Fast Open to support server mode */
350 static void report_tcp_fastopen_config() {
351 
352 	int tcp_fastopen_fp;
353 	uint8_t tcp_fastopen_value;
354 
355 	if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) {
356 		log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
357 	}
358 	if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) {
359 		log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
360 		close(tcp_fastopen_fp);
361 	}
362 	if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) {
363 		log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n");
364 		log_msg(LOG_WARNING, "However the kernel paramenters are not configured to support TCP_FASTOPEN in server mode.\n");
365 		log_msg(LOG_WARNING, "To enable TFO use the command:");
366 		log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n");
367 		log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n");
368 		log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n");
369 		close(tcp_fastopen_fp);
370 	}
371 	close(tcp_fastopen_fp);
372 }
373 #endif
374 
375 /*
376  * Remove the specified pid from the list of child pids.  Returns -1 if
377  * the pid is not in the list, child_num otherwise.  The field is set to 0.
378  */
379 static int
380 delete_child_pid(struct nsd *nsd, pid_t pid)
381 {
382 	size_t i;
383 	for (i = 0; i < nsd->child_count; ++i) {
384 		if (nsd->children[i].pid == pid) {
385 			nsd->children[i].pid = 0;
386 			if(!nsd->children[i].need_to_exit) {
387 				if(nsd->children[i].child_fd != -1)
388 					close(nsd->children[i].child_fd);
389 				nsd->children[i].child_fd = -1;
390 				if(nsd->children[i].handler)
391 					nsd->children[i].handler->fd = -1;
392 			}
393 			return i;
394 		}
395 	}
396 	return -1;
397 }
398 
399 /*
400  * Restart child servers if necessary.
401  */
402 static int
403 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
404 	int* xfrd_sock_p)
405 {
406 	struct main_ipc_handler_data *ipc_data;
407 	size_t i;
408 	int sv[2];
409 
410 	/* Fork the child processes... */
411 	for (i = 0; i < nsd->child_count; ++i) {
412 		if (nsd->children[i].pid <= 0) {
413 			if (nsd->children[i].child_fd != -1)
414 				close(nsd->children[i].child_fd);
415 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
416 				log_msg(LOG_ERR, "socketpair: %s",
417 					strerror(errno));
418 				return -1;
419 			}
420 			nsd->children[i].child_fd = sv[0];
421 			nsd->children[i].parent_fd = sv[1];
422 			nsd->children[i].pid = fork();
423 			switch (nsd->children[i].pid) {
424 			default: /* SERVER MAIN */
425 				close(nsd->children[i].parent_fd);
426 				nsd->children[i].parent_fd = -1;
427 				if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) {
428 					log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
429 				}
430 				if(!nsd->children[i].handler)
431 				{
432 					ipc_data = (struct main_ipc_handler_data*) region_alloc(
433 						region, sizeof(struct main_ipc_handler_data));
434 					ipc_data->nsd = nsd;
435 					ipc_data->child = &nsd->children[i];
436 					ipc_data->child_num = i;
437 					ipc_data->xfrd_sock = xfrd_sock_p;
438 					ipc_data->packet = buffer_create(region, QIOBUFSZ);
439 					ipc_data->forward_mode = 0;
440 					ipc_data->got_bytes = 0;
441 					ipc_data->total_bytes = 0;
442 					ipc_data->acl_num = 0;
443 					nsd->children[i].handler = (struct netio_handler*) region_alloc(
444 						region, sizeof(struct netio_handler));
445 					nsd->children[i].handler->fd = nsd->children[i].child_fd;
446 					nsd->children[i].handler->timeout = NULL;
447 					nsd->children[i].handler->user_data = ipc_data;
448 					nsd->children[i].handler->event_types = NETIO_EVENT_READ;
449 					nsd->children[i].handler->event_handler = parent_handle_child_command;
450 					netio_add_handler(netio, nsd->children[i].handler);
451 				}
452 				/* clear any ongoing ipc */
453 				ipc_data = (struct main_ipc_handler_data*)
454 					nsd->children[i].handler->user_data;
455 				ipc_data->forward_mode = 0;
456 				/* restart - update fd */
457 				nsd->children[i].handler->fd = nsd->children[i].child_fd;
458 				break;
459 			case 0: /* CHILD */
460 				/* the child need not be able to access the
461 				 * nsd.db file */
462 				namedb_close_udb(nsd->db);
463 #ifdef MEMCLEAN /* OS collects memory pages */
464 				region_destroy(region);
465 #endif
466 
467 				if (pledge("stdio rpath inet", NULL) == -1) {
468 					log_msg(LOG_ERR, "pledge");
469 					exit(1);
470 				}
471 
472 				nsd->pid = 0;
473 				nsd->child_count = 0;
474 				nsd->server_kind = nsd->children[i].kind;
475 				nsd->this_child = &nsd->children[i];
476 				nsd->this_child->child_num = i;
477 				/* remove signal flags inherited from parent
478 				   the parent will handle them. */
479 				nsd->signal_hint_reload_hup = 0;
480 				nsd->signal_hint_reload = 0;
481 				nsd->signal_hint_child = 0;
482 				nsd->signal_hint_quit = 0;
483 				nsd->signal_hint_shutdown = 0;
484 				nsd->signal_hint_stats = 0;
485 				nsd->signal_hint_statsusr = 0;
486 				close(*xfrd_sock_p);
487 				close(nsd->this_child->child_fd);
488 				nsd->this_child->child_fd = -1;
489 				if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) {
490 					log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
491 				}
492 				server_child(nsd);
493 				/* NOTREACH */
494 				exit(0);
495 			case -1:
496 				log_msg(LOG_ERR, "fork failed: %s",
497 					strerror(errno));
498 				return -1;
499 			}
500 		}
501 	}
502 	return 0;
503 }
504 
505 #ifdef BIND8_STATS
506 static void set_bind8_alarm(struct nsd* nsd)
507 {
508 	/* resync so that the next alarm is on the next whole minute */
509 	if(nsd->st.period > 0) /* % by 0 gives divbyzero error */
510 		alarm(nsd->st.period - (time(NULL) % nsd->st.period));
511 }
512 #endif
513 
514 /* set zone stat ids for zones initially read in */
515 static void
516 zonestatid_tree_set(struct nsd* nsd)
517 {
518 	struct radnode* n;
519 	for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
520 		zone_type* zone = (zone_type*)n->elem;
521 		zone->zonestatid = getzonestatid(nsd->options, zone->opts);
522 	}
523 }
524 
525 #ifdef USE_ZONE_STATS
526 void
527 server_zonestat_alloc(struct nsd* nsd)
528 {
529 	size_t num = (nsd->options->zonestatnames->count==0?1:
530 			nsd->options->zonestatnames->count);
531 	size_t sz = sizeof(struct nsdst)*num;
532 	char tmpfile[256];
533 	uint8_t z = 0;
534 
535 	/* file names */
536 	nsd->zonestatfname[0] = 0;
537 	nsd->zonestatfname[1] = 0;
538 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0",
539 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
540 	nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile);
541 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1",
542 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
543 	nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile);
544 
545 	/* file descriptors */
546 	nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600);
547 	if(nsd->zonestatfd[0] == -1) {
548 		log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0],
549 			strerror(errno));
550 		exit(1);
551 	}
552 	nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600);
553 	if(nsd->zonestatfd[0] == -1) {
554 		log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1],
555 			strerror(errno));
556 		close(nsd->zonestatfd[0]);
557 		unlink(nsd->zonestatfname[0]);
558 		exit(1);
559 	}
560 
561 #ifdef HAVE_MMAP
562 	if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) {
563 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0],
564 			strerror(errno));
565 		exit(1);
566 	}
567 	if(write(nsd->zonestatfd[0], &z, 1) == -1) {
568 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
569 			nsd->zonestatfname[0], strerror(errno));
570 		exit(1);
571 	}
572 	if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) {
573 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1],
574 			strerror(errno));
575 		exit(1);
576 	}
577 	if(write(nsd->zonestatfd[1], &z, 1) == -1) {
578 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
579 			nsd->zonestatfname[1], strerror(errno));
580 		exit(1);
581 	}
582 	nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
583 		MAP_SHARED, nsd->zonestatfd[0], 0);
584 	if(nsd->zonestat[0] == MAP_FAILED) {
585 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
586 		unlink(nsd->zonestatfname[0]);
587 		unlink(nsd->zonestatfname[1]);
588 		exit(1);
589 	}
590 	nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
591 		MAP_SHARED, nsd->zonestatfd[1], 0);
592 	if(nsd->zonestat[1] == MAP_FAILED) {
593 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
594 		unlink(nsd->zonestatfname[0]);
595 		unlink(nsd->zonestatfname[1]);
596 		exit(1);
597 	}
598 	memset(nsd->zonestat[0], 0, sz);
599 	memset(nsd->zonestat[1], 0, sz);
600 	nsd->zonestatsize[0] = num;
601 	nsd->zonestatsize[1] = num;
602 	nsd->zonestatdesired = num;
603 	nsd->zonestatsizenow = num;
604 	nsd->zonestatnow = nsd->zonestat[0];
605 #endif /* HAVE_MMAP */
606 }
607 
608 void
609 zonestat_remap(struct nsd* nsd, int idx, size_t sz)
610 {
611 #ifdef HAVE_MMAP
612 #ifdef MREMAP_MAYMOVE
613 	nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx],
614 		sizeof(struct nsdst)*nsd->zonestatsize[idx], sz,
615 		MREMAP_MAYMOVE);
616 	if(nsd->zonestat[idx] == MAP_FAILED) {
617 		log_msg(LOG_ERR, "mremap failed: %s", strerror(errno));
618 		exit(1);
619 	}
620 #else /* !HAVE MREMAP */
621 	if(msync(nsd->zonestat[idx],
622 		sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0)
623 		log_msg(LOG_ERR, "msync failed: %s", strerror(errno));
624 	if(munmap(nsd->zonestat[idx],
625 		sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0)
626 		log_msg(LOG_ERR, "munmap failed: %s", strerror(errno));
627 	nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz,
628 		PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0);
629 	if(nsd->zonestat[idx] == MAP_FAILED) {
630 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
631 		exit(1);
632 	}
633 #endif /* MREMAP */
634 #endif /* HAVE_MMAP */
635 }
636 
637 /* realloc the zonestat array for the one that is not currently in use,
638  * to match the desired new size of the array (if applicable) */
639 void
640 server_zonestat_realloc(struct nsd* nsd)
641 {
642 #ifdef HAVE_MMAP
643 	uint8_t z = 0;
644 	size_t sz;
645 	int idx = 0; /* index of the zonestat array that is not in use */
646 	if(nsd->zonestatnow == nsd->zonestat[0])
647 		idx = 1;
648 	if(nsd->zonestatsize[idx] == nsd->zonestatdesired)
649 		return;
650 	sz = sizeof(struct nsdst)*nsd->zonestatdesired;
651 	if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) {
652 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx],
653 			strerror(errno));
654 		exit(1);
655 	}
656 	if(write(nsd->zonestatfd[idx], &z, 1) == -1) {
657 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
658 			nsd->zonestatfname[idx], strerror(errno));
659 		exit(1);
660 	}
661 	zonestat_remap(nsd, idx, sz);
662 	/* zero the newly allocated region */
663 	if(nsd->zonestatdesired > nsd->zonestatsize[idx]) {
664 		memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) *
665 			nsd->zonestatsize[idx], 0, sizeof(struct nsdst) *
666 			(nsd->zonestatdesired - nsd->zonestatsize[idx]));
667 	}
668 	nsd->zonestatsize[idx] = nsd->zonestatdesired;
669 #endif /* HAVE_MMAP */
670 }
671 
672 /* switchover to use the other array for the new children, that
673  * briefly coexist with the old children.  And we want to avoid them
674  * both writing to the same statistics arrays. */
675 void
676 server_zonestat_switch(struct nsd* nsd)
677 {
678 	if(nsd->zonestatnow == nsd->zonestat[0]) {
679 		nsd->zonestatnow = nsd->zonestat[1];
680 		nsd->zonestatsizenow = nsd->zonestatsize[1];
681 	} else {
682 		nsd->zonestatnow = nsd->zonestat[0];
683 		nsd->zonestatsizenow = nsd->zonestatsize[0];
684 	}
685 }
686 #endif /* USE_ZONE_STATS */
687 
688 static void
689 cleanup_dname_compression_tables(void *ptr)
690 {
691 	free(ptr);
692 	compressed_dname_offsets = NULL;
693 	compression_table_capacity = 0;
694 }
695 
696 static void
697 initialize_dname_compression_tables(struct nsd *nsd)
698 {
699 	size_t needed = domain_table_count(nsd->db->domains) + 1;
700 	needed += EXTRA_DOMAIN_NUMBERS;
701 	if(compression_table_capacity < needed) {
702 		if(compressed_dname_offsets) {
703 			region_remove_cleanup(nsd->db->region,
704 				cleanup_dname_compression_tables,
705 				compressed_dname_offsets);
706 			free(compressed_dname_offsets);
707 		}
708 		compressed_dname_offsets = (uint16_t *) xmallocarray(
709 			needed, sizeof(uint16_t));
710 		region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
711 			compressed_dname_offsets);
712 		compression_table_capacity = needed;
713 		compression_table_size=domain_table_count(nsd->db->domains)+1;
714 	}
715 	memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
716 	compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
717 }
718 
719 static int
720 set_cloexec(struct nsd_socket *sock)
721 {
722 	assert(sock != NULL);
723 
724 	if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) {
725 		const char *socktype =
726 			sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp";
727 		log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s",
728 			socktype, strerror(errno));
729 		return -1;
730 	}
731 
732 	return 1;
733 }
734 
735 static int
736 set_reuseport(struct nsd_socket *sock)
737 {
738 #ifdef SO_REUSEPORT
739 	int on = 1;
740 #ifdef SO_REUSEPORT_LB
741 	/* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like
742 	 * SO_REUSEPORT on Linux. This is what the users want with the config
743 	 * option in nsd.conf; if we actually need local address and port reuse
744 	 * they'll also need to have SO_REUSEPORT set for them, assume it was
745 	 * _LB they want.
746 	 */
747 	int opt = SO_REUSEPORT_LB;
748 	static const char optname[] = "SO_REUSEPORT_LB";
749 #else /* !SO_REUSEPORT_LB */
750 	int opt = SO_REUSEPORT;
751 	static const char optname[] = "SO_REUSEPORT";
752 #endif /* SO_REUSEPORT_LB */
753 
754 	if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) {
755 		return 1;
756 	} else if(verbosity >= 3 || errno != ENOPROTOOPT) {
757 		log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
758 			optname, strerror(errno));
759 	}
760 	return -1;
761 #else
762 	(void)sock;
763 #endif /* SO_REUSEPORT */
764 
765 	return 0;
766 }
767 
768 static int
769 set_reuseaddr(struct nsd_socket *sock)
770 {
771 #ifdef SO_REUSEADDR
772 	int on = 1;
773 	if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) {
774 		return 1;
775 	}
776 	log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s",
777 		strerror(errno));
778 	return -1;
779 #endif /* SO_REUSEADDR */
780 	return 0;
781 }
782 
783 static int
784 set_rcvbuf(struct nsd_socket *sock, int rcv)
785 {
786 #ifdef SO_RCVBUF
787 #ifdef SO_RCVBUFFORCE
788 	if(0 == setsockopt(
789 		sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv)))
790 	{
791 		return 1;
792 	}
793 	if(errno == EPERM || errno == ENOBUFS) {
794 		return 0;
795 	}
796 	log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s",
797 		strerror(errno));
798 	return -1;
799 #else /* !SO_RCVBUFFORCE */
800 	if (0 == setsockopt(
801 		sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv)))
802 	{
803 		return 1;
804 	}
805 	if(errno == ENOSYS || errno == ENOBUFS) {
806 		return 0;
807 	}
808 	log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s",
809 		strerror(errno));
810 	return -1;
811 #endif /* SO_RCVBUFFORCE */
812 #endif /* SO_RCVBUF */
813 
814 	return 0;
815 }
816 
817 static int
818 set_sndbuf(struct nsd_socket *sock, int snd)
819 {
820 #ifdef SO_SNDBUF
821 #ifdef SO_SNDBUFFORCE
822 	if(0 == setsockopt(
823 		sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd)))
824 	{
825 		return 1;
826 	}
827 	if(errno == EPERM || errno == ENOBUFS) {
828 		return 0;
829 	}
830 	log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s",
831 		strerror(errno));
832 	return -1;
833 #else /* !SO_SNDBUFFORCE */
834 	if(0 == setsockopt(
835 		sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd)))
836 	{
837 		return 1;
838 	}
839 	if(errno == ENOSYS || errno == ENOBUFS) {
840 		return 0;
841 	}
842 	log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s",
843 		strerror(errno));
844 	return -1;
845 #endif /* SO_SNDBUFFORCE */
846 #endif /* SO_SNDBUF */
847 
848 	return 0;
849 }
850 
851 static int
852 set_nonblock(struct nsd_socket *sock)
853 {
854 	const char *socktype =
855 		sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
856 
857 	if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) {
858 		log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s",
859 			socktype, strerror(errno));
860 		return -1;
861 	}
862 
863 	return 1;
864 }
865 
866 static int
867 set_ipv6_v6only(struct nsd_socket *sock)
868 {
869 #ifdef INET6
870 #ifdef IPV6_V6ONLY
871 	int on = 1;
872 	const char *socktype =
873 		sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
874 
875 	if(0 == setsockopt(
876 		sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)))
877 	{
878 		return 1;
879 	}
880 
881 	log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s",
882 		socktype, strerror(errno));
883 	return -1;
884 #endif /* IPV6_V6ONLY */
885 #endif /* INET6 */
886 
887 	return 0;
888 }
889 
890 static int
891 set_ipv6_use_min_mtu(struct nsd_socket *sock)
892 {
893 #if defined(INET6) && (defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU))
894 #if defined(IPV6_USE_MIN_MTU)
895 	/* There is no fragmentation of IPv6 datagrams during forwarding in the
896 	 * network. Therefore we do not send UDP datagrams larger than the
897 	 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be
898 	 * larger if the network stack supports IPV6_USE_MIN_MTU.
899 	 */
900 	int opt = IPV6_USE_MIN_MTU;
901 	int optval = 1;
902 	static const char optname[] = "IPV6_USE_MIN_MTU";
903 #elif defined(IPV6_MTU)
904 	/* On Linux, PMTUD is disabled by default for datagrams so set the MTU
905 	 * to the MIN MTU to get the same.
906 	 */
907 	int opt = IPV6_MTU;
908 	int optval = IPV6_MIN_MTU;
909 	static const char optname[] = "IPV6_MTU";
910 #endif
911 	if(0 == setsockopt(
912 		sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval)))
913 	{
914 		return 1;
915 	}
916 
917 	log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
918 		optname, strerror(errno));
919 	return -1;
920 #else
921 	(void)sock;
922 #endif /* INET6 */
923 
924 	return 0;
925 }
926 
927 static int
928 set_ipv4_no_pmtu_disc(struct nsd_socket *sock)
929 {
930 	int ret = 0;
931 
932 #if defined(IP_MTU_DISCOVER)
933 	int opt = IP_MTU_DISCOVER;
934 	int optval;
935 # if defined(IP_PMTUDISC_OMIT)
936 	/* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU
937 	 * information and send packets with DF=0. Fragmentation is allowed if
938 	 * and only if the packet size exceeds the outgoing interface MTU or
939 	 * the packet encounters smaller MTU link in network. This mitigates
940 	 * DNS fragmentation attacks by preventing forged PMTU information.
941 	 * FreeBSD already has same semantics without setting the option.
942 	 */
943 	optval = IP_PMTUDISC_OMIT;
944 	if(0 == setsockopt(
945 		sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
946 	{
947 		return 1;
948 	}
949 
950 	log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
951 		"IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno));
952 # endif /* IP_PMTUDISC_OMIT */
953 # if defined(IP_PMTUDISC_DONT)
954 	/* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */
955 	optval = IP_PMTUDISC_DONT;
956 	if(0 == setsockopt(
957 		sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
958 	{
959 		return 1;
960 	}
961 
962 	log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
963 		"IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno));
964 # endif
965 	ret = -1;
966 #elif defined(IP_DONTFRAG)
967 	int off = 0;
968 	if (0 == setsockopt(
969 		sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off)))
970 	{
971 		return 1;
972 	}
973 
974 	log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
975 		strerror(errno));
976 	ret = -1;
977 #else
978 	(void)sock;
979 #endif
980 
981 	return ret;
982 }
983 
984 static int
985 set_ip_freebind(struct nsd_socket *sock)
986 {
987 #ifdef IP_FREEBIND
988 	int on = 1;
989 	const char *socktype =
990 		sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
991 	if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0)
992 	{
993 		return 1;
994 	}
995 	log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s",
996 		socktype, strerror(errno));
997 	return -1;
998 #else
999 	(void)sock;
1000 #endif /* IP_FREEBIND */
1001 
1002 	return 0;
1003 }
1004 
1005 static int
1006 set_ip_transparent(struct nsd_socket *sock)
1007 {
1008 	/*
1009 	The scandalous preprocessor blob here calls for some explanation :)
1010 	POSIX does not specify an option to bind non-local IPs, so
1011 	platforms developed several implementation-specific options,
1012 	all set in the same way, but with different names.
1013 	For additional complexity, some platform manage this setting
1014 	differently for different address families (IPv4 vs IPv6).
1015 	This scandalous preprocessor blob below abstracts such variability
1016 	in the way which leaves the C code as lean and clear as possible.
1017 	*/
1018 
1019 #if defined(IP_TRANSPARENT)
1020 #	define NSD_SOCKET_OPTION_TRANSPARENT 						IP_TRANSPARENT
1021 #	define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL		IPPROTO_IP
1022 #	define NSD_SOCKET_OPTION_TRANSPARENT_NAME 			"IP_TRANSPARENT"
1023 // as of 2020-01, Linux does not support this on IPv6 programmatically
1024 #elif defined(SO_BINDANY)
1025 #	define NSD_SOCKET_OPTION_TRANSPARENT						SO_BINDANY
1026 #	define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL		SOL_SOCKET
1027 #	define NSD_SOCKET_OPTION_TRANSPARENT_NAME 			"SO_BINDANY"
1028 #elif defined(IP_BINDANY)
1029 #	define NSD_SOCKET_OPTION_TRANSPARENT 						IP_BINDANY
1030 #	define NSD_SOCKET_OPTION_TRANSPARENT6						IPV6_BINDANY
1031 #	define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL		IPPROTO_IP
1032 #	define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6	IPPROTO_IPV6
1033 #	define NSD_SOCKET_OPTION_TRANSPARENT_NAME 			"IP_BINDANY"
1034 #endif
1035 
1036 #ifndef NSD_SOCKET_OPTION_TRANSPARENT
1037 	(void)sock;
1038 #else
1039 #	ifndef NSD_SOCKET_OPTION_TRANSPARENT6
1040 #		define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT
1041 #	endif
1042 #	ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6
1043 #		define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL
1044 #	endif
1045 #	ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6
1046 #		define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME
1047 #	endif
1048 
1049 	int on = 1;
1050 	const char *socktype =
1051 		sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
1052 	const int is_ip6 = (sock->addr.ai_family == AF_INET6);
1053 
1054 	if(0 == setsockopt(
1055 		sock->s,
1056 		is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL,
1057 		is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT,
1058 		&on, sizeof(on)))
1059 	{
1060 		return 1;
1061 	}
1062 
1063 	log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s",
1064 		is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno));
1065 	return -1;
1066 #endif
1067 
1068 	return 0;
1069 }
1070 
1071 static int
1072 set_tcp_maxseg(struct nsd_socket *sock, int mss)
1073 {
1074 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
1075 	if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) {
1076 		return 1;
1077 	}
1078 	log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s",
1079 		strerror(errno));
1080 	return -1;
1081 #else
1082 	log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
1083 #endif
1084 	return 0;
1085 }
1086 
1087 #ifdef USE_TCP_FASTOPEN
1088 static int
1089 set_tcp_fastopen(struct nsd_socket *sock)
1090 {
1091 	/* qlen specifies how many outstanding TFO requests to allow. Limit is
1092 	 * a defense against IP spoofing attacks as suggested in RFC7413.
1093 	 */
1094 	int qlen;
1095 
1096 #ifdef __APPLE__
1097 	/* macOS X implementation only supports qlen of 1 via this call. The
1098 	 * actual value is configured by the net.inet.tcp.fastopen_backlog
1099 	 * kernel parameter.
1100 	 */
1101 	qlen = 1;
1102 #else
1103 	/* 5 is recommended on Linux. */
1104 	qlen = 5;
1105 #endif
1106 	if (0 == setsockopt(
1107 		sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)))
1108 	{
1109 		return 1;
1110 	}
1111 
1112 	if (errno == EPERM) {
1113 		log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s "
1114 				 "; this could likely be because sysctl "
1115 				 "net.inet.tcp.fastopen.enabled, "
1116 				 "net.inet.tcp.fastopen.server_enable, or "
1117 				 "net.ipv4.tcp_fastopen is disabled",
1118 			strerror(errno));
1119 	/* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support
1120 	 * disabled, except when verbosity enabled for debugging
1121 	 */
1122 	} else if(errno != ENOPROTOOPT || verbosity >= 3) {
1123 		log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s",
1124 			strerror(errno));
1125 	}
1126 
1127 	return (errno == ENOPROTOOPT ? 0 : -1);
1128 }
1129 #endif /* USE_TCP_FASTOPEN */
1130 
1131 static int
1132 set_bindtodevice(struct nsd_socket *sock)
1133 {
1134 #if defined(SO_BINDTODEVICE)
1135 	if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE,
1136 		sock->device, strlen(sock->device)) == -1)
1137 	{
1138 		log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
1139 		                 "SO_BINDTODEVICE", sock->device, strerror(errno));
1140 		return -1;
1141 	}
1142 
1143 	return 1;
1144 #else
1145 	(void)sock;
1146 	return 0;
1147 #endif
1148 }
1149 
1150 static int
1151 set_setfib(struct nsd_socket *sock)
1152 {
1153 #if defined(SO_SETFIB)
1154 	if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB,
1155 	              (const void *)&sock->fib, sizeof(sock->fib)) == -1)
1156 	{
1157 		log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s",
1158 		                 "SO_SETFIB", sock->fib, strerror(errno));
1159 		return -1;
1160 	}
1161 
1162 	return 1;
1163 #else
1164 	(void)sock;
1165 	return 0;
1166 #endif
1167 }
1168 
1169 static int
1170 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
1171 {
1172 	int rcv = 1*1024*1024, snd = 1*1024*1024;
1173 
1174 	if(-1 == (sock->s = socket(
1175 		sock->addr.ai_family, sock->addr.ai_socktype, 0)))
1176 	{
1177 #ifdef INET6
1178 		if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
1179 		   (sock->addr.ai_family == AF_INET6) &&
1180 		   (errno == EAFNOSUPPORT))
1181 		{
1182 			log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: "
1183 				"not supported");
1184 			return 0;
1185 		}
1186 #endif
1187 		log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
1188 		return -1;
1189 	}
1190 
1191 	set_cloexec(sock);
1192 
1193 	if(nsd->reuseport && reuseport_works && *reuseport_works)
1194 		*reuseport_works = (set_reuseport(sock) == 1);
1195 
1196 	if(nsd->options->receive_buffer_size > 0)
1197 		rcv = nsd->options->receive_buffer_size;
1198 	if(set_rcvbuf(sock, rcv) == -1)
1199 		return -1;
1200 
1201 	if(nsd->options->send_buffer_size > 0)
1202 		snd = nsd->options->send_buffer_size;
1203 	if(set_sndbuf(sock, snd) == -1)
1204 		return -1;
1205 #ifdef INET6
1206 	if(sock->addr.ai_family == AF_INET6) {
1207 		if(set_ipv6_v6only(sock) == -1 ||
1208 		   set_ipv6_use_min_mtu(sock) == -1)
1209 			return -1;
1210 	} else
1211 #endif /* INET6 */
1212 	if(sock->addr.ai_family == AF_INET) {
1213 		if(set_ipv4_no_pmtu_disc(sock) == -1)
1214 			return -1;
1215 	}
1216 
1217 	/* Set socket to non-blocking. Otherwise, on operating systems
1218 	 * with thundering herd problems, the UDP recv could block
1219 	 * after select returns readable.
1220 	 */
1221 	set_nonblock(sock);
1222 
1223 	if(nsd->options->ip_freebind)
1224 		(void)set_ip_freebind(sock);
1225 	if(nsd->options->ip_transparent)
1226 		(void)set_ip_transparent(sock);
1227 	if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
1228 		return -1;
1229 	if(sock->fib != -1 && set_setfib(sock) == -1)
1230 		return -1;
1231 
1232 	if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
1233 		char buf[256];
1234 		addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
1235 		log_msg(LOG_ERR, "can't bind udp socket %s: %s",
1236 			buf, strerror(errno));
1237 		return -1;
1238 	}
1239 
1240 	return 1;
1241 }
1242 
1243 static int
1244 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
1245 {
1246 #ifdef USE_TCP_FASTOPEN
1247 	report_tcp_fastopen_config();
1248 #endif
1249 
1250 	(void)reuseport_works;
1251 
1252 	if(-1 == (sock->s = socket(
1253 		sock->addr.ai_family, sock->addr.ai_socktype, 0)))
1254 	{
1255 #ifdef INET6
1256 		if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
1257 		   (sock->addr.ai_family == AF_INET6) &&
1258 		   (errno == EAFNOSUPPORT))
1259 		{
1260 			log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: "
1261 			                     "not supported");
1262 			return 0;
1263 		}
1264 #endif /* INET6 */
1265 		log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
1266 		return -1;
1267 	}
1268 
1269 	set_cloexec(sock);
1270 
1271 	if(nsd->reuseport && reuseport_works && *reuseport_works)
1272 		*reuseport_works = (set_reuseport(sock) == 1);
1273 
1274 	(void)set_reuseaddr(sock);
1275 
1276 #ifdef INET6
1277 	if(sock->addr.ai_family == AF_INET6) {
1278 		if (set_ipv6_v6only(sock) == -1 ||
1279 		    set_ipv6_use_min_mtu(sock) == -1)
1280 			return -1;
1281 	}
1282 #endif
1283 
1284 	if(nsd->tcp_mss > 0)
1285 		set_tcp_maxseg(sock, nsd->tcp_mss);
1286 	/* (StevensUNP p463), if TCP listening socket is blocking, then
1287 	   it may block in accept, even if select() says readable. */
1288 	(void)set_nonblock(sock);
1289 	if(nsd->options->ip_freebind)
1290 		(void)set_ip_freebind(sock);
1291 	if(nsd->options->ip_transparent)
1292 		(void)set_ip_transparent(sock);
1293 	if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
1294 		return -1;
1295 	if(sock->fib != -1 && set_setfib(sock) == -1)
1296 		return -1;
1297 
1298 	if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
1299 		char buf[256];
1300 		addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
1301 		log_msg(LOG_ERR, "can't bind tcp socket %s: %s",
1302 			buf, strerror(errno));
1303 		return -1;
1304 	}
1305 
1306 #ifdef USE_TCP_FASTOPEN
1307 	(void)set_tcp_fastopen(sock);
1308 #endif
1309 
1310 	if(listen(sock->s, TCP_BACKLOG) == -1) {
1311 		log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
1312 		return -1;
1313 	}
1314 
1315 	return 1;
1316 }
1317 
1318 /*
1319  * Initialize the server, reuseport, create and bind the sockets.
1320  */
1321 int
1322 server_init(struct nsd *nsd)
1323 {
1324 	size_t i;
1325 	int reuseport = 1; /* Determine if REUSEPORT works. */
1326 
1327 	/* open server interface ports */
1328 	for(i = 0; i < nsd->ifs; i++) {
1329 		if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 ||
1330 		   open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1)
1331 		{
1332 			return -1;
1333 		}
1334 	}
1335 
1336 	if(nsd->reuseport && reuseport) {
1337 		size_t ifs = nsd->ifs * nsd->reuseport;
1338 
1339 		/* increase the size of the interface arrays, there are going
1340 		 * to be separate interface file descriptors for every server
1341 		 * instance */
1342 		region_remove_cleanup(nsd->region, free, nsd->udp);
1343 		region_remove_cleanup(nsd->region, free, nsd->tcp);
1344 
1345 		nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp));
1346 		nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp));
1347 		region_add_cleanup(nsd->region, free, nsd->udp);
1348 		region_add_cleanup(nsd->region, free, nsd->tcp);
1349 		if(ifs > nsd->ifs) {
1350 			memset(&nsd->udp[nsd->ifs], 0,
1351 				(ifs-nsd->ifs)*sizeof(*nsd->udp));
1352 			memset(&nsd->tcp[nsd->ifs], 0,
1353 				(ifs-nsd->ifs)*sizeof(*nsd->tcp));
1354 		}
1355 
1356 		for(i = nsd->ifs; i < ifs; i++) {
1357 			nsd->udp[i] = nsd->udp[i%nsd->ifs];
1358 			nsd->udp[i].s = -1;
1359 			if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) {
1360 				return -1;
1361 			}
1362 			/* Turn off REUSEPORT for TCP by copying the socket
1363 			 * file descriptor.
1364 			 * This means we should not close TCP used by
1365 			 * other servers in reuseport enabled mode, in
1366 			 * server_child().
1367 			 */
1368 			nsd->tcp[i] = nsd->tcp[i%nsd->ifs];
1369 		}
1370 
1371 		nsd->ifs = ifs;
1372 	} else {
1373 		nsd->reuseport = 0;
1374 	}
1375 
1376 	return 0;
1377 }
1378 
1379 /*
1380  * Prepare the server for take off.
1381  *
1382  */
1383 int
1384 server_prepare(struct nsd *nsd)
1385 {
1386 #ifdef RATELIMIT
1387 	/* set secret modifier for hashing (udb ptr buckets and rate limits) */
1388 #ifdef HAVE_GETRANDOM
1389 	uint32_t v;
1390 	if(getrandom(&v, sizeof(v), 0) == -1) {
1391 		log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno));
1392 		exit(1);
1393 	}
1394 	hash_set_raninit(v);
1395 #elif defined(HAVE_ARC4RANDOM)
1396 	hash_set_raninit(arc4random());
1397 #else
1398 	uint32_t v = getpid() ^ time(NULL);
1399 	srandom((unsigned long)v);
1400 #  ifdef HAVE_SSL
1401 	if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0)
1402 		hash_set_raninit(v);
1403 	else
1404 #  endif
1405 		hash_set_raninit(random());
1406 #endif
1407 	rrl_mmap_init(nsd->child_count, nsd->options->rrl_size,
1408 		nsd->options->rrl_ratelimit,
1409 		nsd->options->rrl_whitelist_ratelimit,
1410 		nsd->options->rrl_slip,
1411 		nsd->options->rrl_ipv4_prefix_length,
1412 		nsd->options->rrl_ipv6_prefix_length);
1413 #endif /* RATELIMIT */
1414 
1415 	/* Open the database... */
1416 	if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) {
1417 		log_msg(LOG_ERR, "unable to open the database %s: %s",
1418 			nsd->dbfile, strerror(errno));
1419 		unlink(nsd->task[0]->fname);
1420 		unlink(nsd->task[1]->fname);
1421 #ifdef USE_ZONE_STATS
1422 		unlink(nsd->zonestatfname[0]);
1423 		unlink(nsd->zonestatfname[1]);
1424 #endif
1425 		xfrd_del_tempdir(nsd);
1426 		return -1;
1427 	}
1428 	/* check if zone files have been modified */
1429 	/* NULL for taskudb because we send soainfo in a moment, batched up,
1430 	 * for all zones */
1431 	if(nsd->options->zonefiles_check || (nsd->options->database == NULL ||
1432 		nsd->options->database[0] == 0))
1433 		namedb_check_zonefiles(nsd, nsd->options, NULL, NULL);
1434 	zonestatid_tree_set(nsd);
1435 
1436 	compression_table_capacity = 0;
1437 	initialize_dname_compression_tables(nsd);
1438 
1439 #ifdef	BIND8_STATS
1440 	/* Initialize times... */
1441 	time(&nsd->st.boot);
1442 	set_bind8_alarm(nsd);
1443 #endif /* BIND8_STATS */
1444 
1445 	return 0;
1446 }
1447 
1448 /*
1449  * Fork the required number of servers.
1450  */
1451 static int
1452 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
1453 	int* xfrd_sock_p)
1454 {
1455 	size_t i;
1456 
1457 	/* Start all child servers initially.  */
1458 	for (i = 0; i < nsd->child_count; ++i) {
1459 		nsd->children[i].pid = 0;
1460 	}
1461 
1462 	return restart_child_servers(nsd, region, netio, xfrd_sock_p);
1463 }
1464 
1465 static void
1466 server_close_socket(struct nsd_socket *sock)
1467 {
1468 	if(sock->s != -1) {
1469 		close(sock->s);
1470 		sock->s = -1;
1471 	}
1472 }
1473 
1474 void
1475 server_close_all_sockets(struct nsd_socket sockets[], size_t n)
1476 {
1477 	size_t i;
1478 
1479 	/* Close all the sockets... */
1480 	for (i = 0; i < n; ++i) {
1481 		server_close_socket(&sockets[i]);
1482 	}
1483 }
1484 
1485 /*
1486  * Close the sockets, shutdown the server and exit.
1487  * Does not return.
1488  */
1489 void
1490 server_shutdown(struct nsd *nsd)
1491 {
1492 	size_t i;
1493 
1494 	server_close_all_sockets(nsd->udp, nsd->ifs);
1495 	server_close_all_sockets(nsd->tcp, nsd->ifs);
1496 	/* CHILD: close command channel to parent */
1497 	if(nsd->this_child && nsd->this_child->parent_fd != -1)
1498 	{
1499 		close(nsd->this_child->parent_fd);
1500 		nsd->this_child->parent_fd = -1;
1501 	}
1502 	/* SERVER: close command channels to children */
1503 	if(!nsd->this_child)
1504 	{
1505 		for(i=0; i < nsd->child_count; ++i)
1506 			if(nsd->children[i].child_fd != -1)
1507 			{
1508 				close(nsd->children[i].child_fd);
1509 				nsd->children[i].child_fd = -1;
1510 			}
1511 	}
1512 
1513 	tsig_finalize();
1514 #ifdef HAVE_SSL
1515 	daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */
1516 	if (nsd->tls_ctx)
1517 		SSL_CTX_free(nsd->tls_ctx);
1518 #endif
1519 
1520 #ifdef MEMCLEAN /* OS collects memory pages */
1521 #ifdef RATELIMIT
1522 	rrl_mmap_deinit_keep_mmap();
1523 #endif
1524 #ifdef USE_DNSTAP
1525 	dt_collector_destroy(nsd->dt_collector, nsd);
1526 #endif
1527 	udb_base_free_keep_mmap(nsd->task[0]);
1528 	udb_base_free_keep_mmap(nsd->task[1]);
1529 	namedb_close_udb(nsd->db); /* keeps mmap */
1530 	namedb_close(nsd->db);
1531 	nsd_options_destroy(nsd->options);
1532 	region_destroy(nsd->region);
1533 #endif
1534 	log_finalize();
1535 	exit(0);
1536 }
1537 
1538 void
1539 server_prepare_xfrd(struct nsd* nsd)
1540 {
1541 	char tmpfile[256];
1542 	/* create task mmaps */
1543 	nsd->mytask = 0;
1544 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0",
1545 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
1546 	nsd->task[0] = task_file_create(tmpfile);
1547 	if(!nsd->task[0]) {
1548 #ifdef USE_ZONE_STATS
1549 		unlink(nsd->zonestatfname[0]);
1550 		unlink(nsd->zonestatfname[1]);
1551 #endif
1552 		xfrd_del_tempdir(nsd);
1553 		exit(1);
1554 	}
1555 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1",
1556 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
1557 	nsd->task[1] = task_file_create(tmpfile);
1558 	if(!nsd->task[1]) {
1559 		unlink(nsd->task[0]->fname);
1560 #ifdef USE_ZONE_STATS
1561 		unlink(nsd->zonestatfname[0]);
1562 		unlink(nsd->zonestatfname[1]);
1563 #endif
1564 		xfrd_del_tempdir(nsd);
1565 		exit(1);
1566 	}
1567 	assert(udb_base_get_userdata(nsd->task[0])->data == 0);
1568 	assert(udb_base_get_userdata(nsd->task[1])->data == 0);
1569 	/* create xfrd listener structure */
1570 	nsd->xfrd_listener = region_alloc(nsd->region,
1571 		sizeof(netio_handler_type));
1572 	nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*)
1573 		region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data));
1574 	nsd->xfrd_listener->fd = -1;
1575 	((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd =
1576 		nsd;
1577 	((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn =
1578 		xfrd_tcp_create(nsd->region, QIOBUFSZ);
1579 }
1580 
1581 
1582 void
1583 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active)
1584 {
1585 	pid_t pid;
1586 	int sockets[2] = {0,0};
1587 	struct ipc_handler_conn_data *data;
1588 
1589 	if(nsd->xfrd_listener->fd != -1)
1590 		close(nsd->xfrd_listener->fd);
1591 	if(del_db) {
1592 		/* recreate taskdb that xfrd was using, it may be corrupt */
1593 		/* we (or reload) use nsd->mytask, and xfrd uses the other */
1594 		char* tmpfile = nsd->task[1-nsd->mytask]->fname;
1595 		nsd->task[1-nsd->mytask]->fname = NULL;
1596 		/* free alloc already, so udb does not shrink itself */
1597 		udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc);
1598 		nsd->task[1-nsd->mytask]->alloc = NULL;
1599 		udb_base_free(nsd->task[1-nsd->mytask]);
1600 		/* create new file, overwrite the old one */
1601 		nsd->task[1-nsd->mytask] = task_file_create(tmpfile);
1602 		free(tmpfile);
1603 	}
1604 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
1605 		log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
1606 		return;
1607 	}
1608 	pid = fork();
1609 	switch (pid) {
1610 	case -1:
1611 		log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
1612 		break;
1613 	default:
1614 		/* PARENT: close first socket, use second one */
1615 		close(sockets[0]);
1616 		if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) {
1617 			log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1618 		}
1619 		if(del_db) xfrd_free_namedb(nsd);
1620 		/* use other task than I am using, since if xfrd died and is
1621 		 * restarted, the reload is using nsd->mytask */
1622 		nsd->mytask = 1 - nsd->mytask;
1623 
1624 #ifdef HAVE_SETPROCTITLE
1625 		setproctitle("xfrd");
1626 #endif
1627 #ifdef HAVE_CPUSET_T
1628 		if(nsd->use_cpu_affinity) {
1629 			set_cpu_affinity(nsd->xfrd_cpuset);
1630 		}
1631 #endif
1632 
1633 		xfrd_init(sockets[1], nsd, del_db, reload_active, pid);
1634 		/* ENOTREACH */
1635 		break;
1636 	case 0:
1637 		/* CHILD: close second socket, use first one */
1638 		close(sockets[1]);
1639 		if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) {
1640 			log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1641 		}
1642 		nsd->xfrd_listener->fd = sockets[0];
1643 		break;
1644 	}
1645 	/* server-parent only */
1646 	nsd->xfrd_listener->timeout = NULL;
1647 	nsd->xfrd_listener->event_types = NETIO_EVENT_READ;
1648 	nsd->xfrd_listener->event_handler = parent_handle_xfrd_command;
1649 	/* clear ongoing ipc reads */
1650 	data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data;
1651 	data->conn->is_reading = 0;
1652 }
1653 
1654 /** add all soainfo to taskdb */
1655 static void
1656 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb)
1657 {
1658 	struct radnode* n;
1659 	udb_ptr task_last; /* last task, mytask is empty so NULL */
1660 	/* add all SOA INFO to mytask */
1661 	udb_ptr_init(&task_last, taskudb);
1662 	for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
1663 		task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0);
1664 	}
1665 	udb_ptr_unlink(&task_last, taskudb);
1666 }
1667 
1668 void
1669 server_send_soa_xfrd(struct nsd* nsd, int shortsoa)
1670 {
1671 	/* normally this exchanges the SOA from nsd->xfrd and the expire back.
1672 	 *   parent fills one taskdb with soas, xfrd fills other with expires.
1673 	 *   then they exchange and process.
1674 	 * shortsoa: xfrd crashes and needs to be restarted and one taskdb
1675 	 *   may be in use by reload.  Fill SOA in taskdb and give to xfrd.
1676 	 *   expire notifications can be sent back via a normal reload later
1677 	 *   (xfrd will wait for current running reload to finish if any).
1678 	 */
1679 	sig_atomic_t cmd = 0;
1680 	pid_t mypid;
1681 	int xfrd_sock = nsd->xfrd_listener->fd;
1682 	struct udb_base* taskudb = nsd->task[nsd->mytask];
1683 	udb_ptr t;
1684 	if(!shortsoa) {
1685 		if(nsd->signal_hint_shutdown) {
1686 		shutdown:
1687 			log_msg(LOG_WARNING, "signal received, shutting down...");
1688 			server_close_all_sockets(nsd->udp, nsd->ifs);
1689 			server_close_all_sockets(nsd->tcp, nsd->ifs);
1690 #ifdef HAVE_SSL
1691 			daemon_remote_close(nsd->rc);
1692 #endif
1693 			/* Unlink it if possible... */
1694 			unlinkpid(nsd->pidfile);
1695 			unlink(nsd->task[0]->fname);
1696 			unlink(nsd->task[1]->fname);
1697 #ifdef USE_ZONE_STATS
1698 			unlink(nsd->zonestatfname[0]);
1699 			unlink(nsd->zonestatfname[1]);
1700 #endif
1701 			/* write the nsd.db to disk, wait for it to complete */
1702 			udb_base_sync(nsd->db->udb, 1);
1703 			udb_base_close(nsd->db->udb);
1704 			server_shutdown(nsd);
1705 			/* ENOTREACH */
1706 			exit(0);
1707 		}
1708 	}
1709 	if(shortsoa) {
1710 		/* put SOA in xfrd task because mytask may be in use */
1711 		taskudb = nsd->task[1-nsd->mytask];
1712 	}
1713 
1714 	add_all_soa_to_task(nsd, taskudb);
1715 	if(!shortsoa) {
1716 		/* wait for xfrd to signal task is ready, RELOAD signal */
1717 		if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) ||
1718 			cmd != NSD_RELOAD) {
1719 			log_msg(LOG_ERR, "did not get start signal from xfrd");
1720 			exit(1);
1721 		}
1722 		if(nsd->signal_hint_shutdown) {
1723 			goto shutdown;
1724 		}
1725 	}
1726 	/* give xfrd our task, signal it with RELOAD_DONE */
1727 	task_process_sync(taskudb);
1728 	cmd = NSD_RELOAD_DONE;
1729 	if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
1730 		log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1731 			(int)nsd->pid, strerror(errno));
1732 	}
1733 	mypid = getpid();
1734 	if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
1735 		log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1736 			strerror(errno));
1737 	}
1738 
1739 	if(!shortsoa) {
1740 		/* process the xfrd task works (expiry data) */
1741 		nsd->mytask = 1 - nsd->mytask;
1742 		taskudb = nsd->task[nsd->mytask];
1743 		task_remap(taskudb);
1744 		udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
1745 		while(!udb_ptr_is_null(&t)) {
1746 			task_process_expire(nsd->db, TASKLIST(&t));
1747 			udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next);
1748 		}
1749 		udb_ptr_unlink(&t, taskudb);
1750 		task_clear(taskudb);
1751 
1752 		/* tell xfrd that the task is emptied, signal with RELOAD_DONE */
1753 		cmd = NSD_RELOAD_DONE;
1754 		if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
1755 			log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1756 				(int)nsd->pid, strerror(errno));
1757 		}
1758 	}
1759 }
1760 
1761 #ifdef HAVE_SSL
1762 static void
1763 log_crypto_from_err(const char* str, unsigned long err)
1764 {
1765 	/* error:[error code]:[library name]:[function name]:[reason string] */
1766 	char buf[128];
1767 	unsigned long e;
1768 	ERR_error_string_n(err, buf, sizeof(buf));
1769 	log_msg(LOG_ERR, "%s crypto %s", str, buf);
1770 	while( (e=ERR_get_error()) ) {
1771 		ERR_error_string_n(e, buf, sizeof(buf));
1772 		log_msg(LOG_ERR, "and additionally crypto %s", buf);
1773 	}
1774 }
1775 
1776 void
1777 log_crypto_err(const char* str)
1778 {
1779 	log_crypto_from_err(str, ERR_get_error());
1780 }
1781 
1782 /** true if the ssl handshake error has to be squelched from the logs */
1783 static int
1784 squelch_err_ssl_handshake(unsigned long err)
1785 {
1786 	if(verbosity >= 3)
1787 		return 0; /* only squelch on low verbosity */
1788 	/* this is very specific, we could filter on ERR_GET_REASON()
1789 	 * (the third element in ERR_PACK) */
1790 	if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1791 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1792 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1793 		err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1794 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1795 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1796 #endif
1797 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1798 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1799 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1800 #  ifdef SSL_R_VERSION_TOO_LOW
1801 		|| err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1802 #  endif
1803 #endif
1804 		)
1805 		return 1;
1806 	return 0;
1807 }
1808 
1809 void
1810 perform_openssl_init(void)
1811 {
1812 	/* init SSL library */
1813 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS
1814 	ERR_load_crypto_strings();
1815 #endif
1816 	ERR_load_SSL_strings();
1817 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO)
1818 	OpenSSL_add_all_algorithms();
1819 #else
1820 	OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS
1821 		| OPENSSL_INIT_ADD_ALL_DIGESTS
1822 		| OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
1823 #endif
1824 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL)
1825 	(void)SSL_library_init();
1826 #else
1827 	OPENSSL_init_ssl(0, NULL);
1828 #endif
1829 
1830 	if(!RAND_status()) {
1831 		/* try to seed it */
1832 		unsigned char buf[256];
1833 		unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid();
1834 		size_t i;
1835 		v = seed;
1836 		for(i=0; i<256/sizeof(v); i++) {
1837 			memmove(buf+i*sizeof(v), &v, sizeof(v));
1838 			v = v*seed + (unsigned int)i;
1839 		}
1840 		RAND_seed(buf, 256);
1841 		log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time");
1842 	}
1843 }
1844 
1845 static int
1846 get_ocsp(char *filename, unsigned char **ocsp)
1847 {
1848 	BIO *bio;
1849 	OCSP_RESPONSE *response;
1850 	int len = -1;
1851 	unsigned char *p, *buf;
1852 	assert(filename);
1853 
1854 	if ((bio = BIO_new_file(filename, "r")) == NULL) {
1855 		log_crypto_err("get_ocsp: BIO_new_file failed");
1856 		return -1;
1857 	}
1858 
1859 	if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) {
1860 		log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed");
1861 		BIO_free(bio);
1862 		return -1;
1863 	}
1864 
1865 	if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) {
1866 		log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed");
1867 		OCSP_RESPONSE_free(response);
1868 		BIO_free(bio);
1869 		return -1;
1870 	}
1871 
1872 	if ((buf = malloc((size_t) len)) == NULL) {
1873 		log_msg(LOG_ERR, "get_ocsp: malloc failed");
1874 		OCSP_RESPONSE_free(response);
1875 		BIO_free(bio);
1876 		return -1;
1877 	}
1878 
1879 	p = buf;
1880 	if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) {
1881 		log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed");
1882 		free(buf);
1883 		OCSP_RESPONSE_free(response);
1884 		BIO_free(bio);
1885 		return -1;
1886 	}
1887 
1888 	OCSP_RESPONSE_free(response);
1889 	BIO_free(bio);
1890 
1891 	*ocsp = buf;
1892 	return len;
1893 }
1894 
1895 /* further setup ssl ctx after the keys are loaded */
1896 static void
1897 listen_sslctx_setup_2(void* ctxt)
1898 {
1899 	SSL_CTX* ctx = (SSL_CTX*)ctxt;
1900 	(void)ctx;
1901 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO
1902 	if(!SSL_CTX_set_ecdh_auto(ctx,1)) {
1903 		/* ENOTREACH */
1904 		log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE");
1905 	}
1906 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME)
1907 	if(1) {
1908 		EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1);
1909 		if (!ecdh) {
1910 			log_crypto_err("could not find p256, not enabling ECDHE");
1911 		} else {
1912 			if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) {
1913 				log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE");
1914 			}
1915 			EC_KEY_free (ecdh);
1916 		}
1917 	}
1918 #endif
1919 }
1920 
1921 static int
1922 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg))
1923 {
1924 	if(ocspdata) {
1925 		unsigned char *p;
1926 		if ((p=malloc(ocspdata_len)) == NULL) {
1927 			log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure");
1928 			return SSL_TLSEXT_ERR_NOACK;
1929 		}
1930 		memcpy(p, ocspdata, ocspdata_len);
1931 		if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) {
1932 			log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp");
1933 			free(p);
1934 			return SSL_TLSEXT_ERR_NOACK;
1935 		}
1936 		return SSL_TLSEXT_ERR_OK;
1937 	} else {
1938 		return SSL_TLSEXT_ERR_NOACK;
1939 	}
1940 }
1941 
1942 SSL_CTX*
1943 server_tls_ctx_setup(char* key, char* pem, char* verifypem)
1944 {
1945 	SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method());
1946 	if(!ctx) {
1947 		log_crypto_err("could not SSL_CTX_new");
1948 		return NULL;
1949 	}
1950 	/* no SSLv2, SSLv3 because has defects */
1951 #if SSL_OP_NO_SSLv2 != 0
1952 	if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){
1953 		log_crypto_err("could not set SSL_OP_NO_SSLv2");
1954 		SSL_CTX_free(ctx);
1955 		return NULL;
1956 	}
1957 #endif
1958 	if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3)
1959 		!= SSL_OP_NO_SSLv3){
1960 		log_crypto_err("could not set SSL_OP_NO_SSLv3");
1961 		SSL_CTX_free(ctx);
1962 		return 0;
1963 	}
1964 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1)
1965 	/* if we have tls 1.1 disable 1.0 */
1966 	if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1)
1967 		!= SSL_OP_NO_TLSv1){
1968 		log_crypto_err("could not set SSL_OP_NO_TLSv1");
1969 		SSL_CTX_free(ctx);
1970 		return 0;
1971 	}
1972 #endif
1973 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2)
1974 	/* if we have tls 1.2 disable 1.1 */
1975 	if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1)
1976 		!= SSL_OP_NO_TLSv1_1){
1977 		log_crypto_err("could not set SSL_OP_NO_TLSv1_1");
1978 		SSL_CTX_free(ctx);
1979 		return 0;
1980 	}
1981 #endif
1982 #if defined(SSL_OP_NO_RENEGOTIATION)
1983 	/* disable client renegotiation */
1984 	if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) &
1985 		SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) {
1986 		log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION");
1987 		SSL_CTX_free(ctx);
1988 		return 0;
1989 	}
1990 #endif
1991 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20)
1992 	/* if we have sha256, set the cipher list to have no known vulns */
1993 	if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20"))
1994 		log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list");
1995 #endif
1996 	if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) &
1997 		SSL_OP_CIPHER_SERVER_PREFERENCE) !=
1998 		SSL_OP_CIPHER_SERVER_PREFERENCE) {
1999 		log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE");
2000 		SSL_CTX_free(ctx);
2001 		return 0;
2002 	}
2003 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL
2004 	SSL_CTX_set_security_level(ctx, 0);
2005 #endif
2006 	if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
2007 		log_msg(LOG_ERR, "error for cert file: %s", pem);
2008 		log_crypto_err("error in SSL_CTX use_certificate_chain_file");
2009 		SSL_CTX_free(ctx);
2010 		return NULL;
2011 	}
2012 	if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
2013 		log_msg(LOG_ERR, "error for private key file: %s", key);
2014 		log_crypto_err("Error in SSL_CTX use_PrivateKey_file");
2015 		SSL_CTX_free(ctx);
2016 		return NULL;
2017 	}
2018 	if(!SSL_CTX_check_private_key(ctx)) {
2019 		log_msg(LOG_ERR, "error for key file: %s", key);
2020 		log_crypto_err("Error in SSL_CTX check_private_key");
2021 		SSL_CTX_free(ctx);
2022 		return NULL;
2023 	}
2024 	listen_sslctx_setup_2(ctx);
2025 	if(verifypem && verifypem[0]) {
2026 		if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
2027 			log_crypto_err("Error in SSL_CTX verify locations");
2028 			SSL_CTX_free(ctx);
2029 			return NULL;
2030 		}
2031 		SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem));
2032 		SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL);
2033 	}
2034 	return ctx;
2035 }
2036 
2037 SSL_CTX*
2038 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile)
2039 {
2040 	char *key, *pem;
2041 	SSL_CTX *ctx;
2042 
2043 	key = nsd->options->tls_service_key;
2044 	pem = nsd->options->tls_service_pem;
2045 	if(!key || key[0] == 0) {
2046 		log_msg(LOG_ERR, "error: no tls-service-key file specified");
2047 		return NULL;
2048 	}
2049 	if(!pem || pem[0] == 0) {
2050 		log_msg(LOG_ERR, "error: no tls-service-pem file specified");
2051 		return NULL;
2052 	}
2053 
2054 	/* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but
2055 	 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/
2056 	ctx = server_tls_ctx_setup(key, pem, verifypem);
2057 	if(!ctx) {
2058 		log_msg(LOG_ERR, "could not setup server TLS context");
2059 		return NULL;
2060 	}
2061 	if(ocspfile && ocspfile[0]) {
2062 		if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) {
2063 			log_crypto_err("Error reading OCSPfile");
2064 			SSL_CTX_free(ctx);
2065 			return NULL;
2066 		} else {
2067 			VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile));
2068 			if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) {
2069 				log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb");
2070 				SSL_CTX_free(ctx);
2071 				return NULL;
2072 			}
2073 		}
2074 	}
2075 	return ctx;
2076 }
2077 
2078 /* check if tcp_handler_accept_data created for TLS dedicated port */
2079 int
2080 using_tls_port(struct sockaddr* addr, const char* tls_port)
2081 {
2082 	in_port_t port = 0;
2083 
2084 	if (addr->sa_family == AF_INET)
2085 		port = ((struct sockaddr_in*)addr)->sin_port;
2086 #ifndef HAVE_STRUCT_SOCKADDR_IN6
2087 	else
2088 		port = ((struct sockaddr_in6*)addr)->sin6_port;
2089 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */
2090 	if (atoi(tls_port) == ntohs(port))
2091 		return 1;
2092 
2093 	return 0;
2094 }
2095 #endif
2096 
2097 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
2098 ssize_t
2099 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
2100 {
2101 	uint8_t* buf = (uint8_t*) p;
2102 	ssize_t total = 0;
2103 	struct pollfd fd;
2104 	memset(&fd, 0, sizeof(fd));
2105 	fd.fd = s;
2106 	fd.events = POLLIN;
2107 
2108 	while( total < sz) {
2109 		ssize_t ret;
2110 		ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000);
2111 		if(ret == -1) {
2112 			if(errno == EAGAIN)
2113 				/* blocking read */
2114 				continue;
2115 			if(errno == EINTR) {
2116 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
2117 					return -1;
2118 				/* other signals can be handled later */
2119 				continue;
2120 			}
2121 			/* some error */
2122 			return -1;
2123 		}
2124 		if(ret == 0) {
2125 			/* operation timed out */
2126 			return -2;
2127 		}
2128 		ret = read(s, buf+total, sz-total);
2129 		if(ret == -1) {
2130 			if(errno == EAGAIN)
2131 				/* blocking read */
2132 				continue;
2133 			if(errno == EINTR) {
2134 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
2135 					return -1;
2136 				/* other signals can be handled later */
2137 				continue;
2138 			}
2139 			/* some error */
2140 			return -1;
2141 		}
2142 		if(ret == 0) {
2143 			/* closed connection! */
2144 			return 0;
2145 		}
2146 		total += ret;
2147 	}
2148 	return total;
2149 }
2150 
2151 static void
2152 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket)
2153 {
2154 	sig_atomic_t cmd = NSD_QUIT_SYNC;
2155 	udb_ptr t, next;
2156 	udb_base* u = nsd->task[nsd->mytask];
2157 	udb_ptr_init(&next, u);
2158 	udb_ptr_new(&t, u, udb_base_get_userdata(u));
2159 	udb_base_set_userdata(u, 0);
2160 	while(!udb_ptr_is_null(&t)) {
2161 		/* store next in list so this one can be deleted or reused */
2162 		udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next);
2163 		udb_rptr_zero(&TASKLIST(&t)->next, u);
2164 
2165 		/* process task t */
2166 		/* append results for task t and update last_task */
2167 		task_process_in_reload(nsd, u, last_task, &t);
2168 
2169 		/* go to next */
2170 		udb_ptr_set_ptr(&t, u, &next);
2171 
2172 		/* if the parent has quit, we must quit too, poll the fd for cmds */
2173 		if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
2174 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
2175 			if(cmd == NSD_QUIT) {
2176 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
2177 				/* sync to disk (if needed) */
2178 				udb_base_sync(nsd->db->udb, 0);
2179 				/* unlink files of remainder of tasks */
2180 				while(!udb_ptr_is_null(&t)) {
2181 					if(TASKLIST(&t)->task_type == task_apply_xfr) {
2182 						xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno);
2183 					}
2184 					udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next);
2185 				}
2186 				udb_ptr_unlink(&t, u);
2187 				udb_ptr_unlink(&next, u);
2188 				exit(0);
2189 			}
2190 		}
2191 
2192 	}
2193 	udb_ptr_unlink(&t, u);
2194 	udb_ptr_unlink(&next, u);
2195 }
2196 
2197 #ifdef BIND8_STATS
2198 static void
2199 parent_send_stats(struct nsd* nsd, int cmdfd)
2200 {
2201 	size_t i;
2202 	if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) {
2203 		log_msg(LOG_ERR, "could not write stats to reload");
2204 		return;
2205 	}
2206 	for(i=0; i<nsd->child_count; i++)
2207 		if(!write_socket(cmdfd, &nsd->children[i].query_count,
2208 			sizeof(stc_type))) {
2209 			log_msg(LOG_ERR, "could not write stats to reload");
2210 			return;
2211 		}
2212 }
2213 
2214 static void
2215 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last)
2216 {
2217 	struct nsdst s;
2218 	stc_type* p;
2219 	size_t i;
2220 	if(block_read(nsd, cmdfd, &s, sizeof(s),
2221 		RELOAD_SYNC_TIMEOUT) != sizeof(s)) {
2222 		log_msg(LOG_ERR, "could not read stats from oldpar");
2223 		return;
2224 	}
2225 	s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0);
2226 	s.db_mem = region_get_mem(nsd->db->region);
2227 	p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s,
2228 		nsd->child_count);
2229 	if(!p) return;
2230 	for(i=0; i<nsd->child_count; i++) {
2231 		if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!=
2232 			sizeof(stc_type))
2233 			return;
2234 	}
2235 }
2236 #endif /* BIND8_STATS */
2237 
2238 /*
2239  * Reload the database, stop parent, re-fork children and continue.
2240  * as server_main.
2241  */
2242 static void
2243 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
2244 	int cmdsocket)
2245 {
2246 	pid_t mypid;
2247 	sig_atomic_t cmd = NSD_QUIT_SYNC;
2248 	int ret;
2249 	udb_ptr last_task;
2250 	struct sigaction old_sigchld, ign_sigchld;
2251 	/* ignore SIGCHLD from the previous server_main that used this pid */
2252 	memset(&ign_sigchld, 0, sizeof(ign_sigchld));
2253 	ign_sigchld.sa_handler = SIG_IGN;
2254 	sigaction(SIGCHLD, &ign_sigchld, &old_sigchld);
2255 
2256 #ifdef HAVE_SETPROCTITLE
2257 	setproctitle("main");
2258 #endif
2259 #ifdef HAVE_CPUSET_T
2260 	if(nsd->use_cpu_affinity) {
2261 		set_cpu_affinity(nsd->cpuset);
2262 	}
2263 #endif
2264 
2265 	/* see what tasks we got from xfrd */
2266 	task_remap(nsd->task[nsd->mytask]);
2267 	udb_ptr_init(&last_task, nsd->task[nsd->mytask]);
2268 	udb_compact_inhibited(nsd->db->udb, 1);
2269 	reload_process_tasks(nsd, &last_task, cmdsocket);
2270 	udb_compact_inhibited(nsd->db->udb, 0);
2271 	udb_compact(nsd->db->udb);
2272 
2273 #ifndef NDEBUG
2274 	if(nsd_debug_level >= 1)
2275 		region_log_stats(nsd->db->region);
2276 #endif /* NDEBUG */
2277 	/* sync to disk (if needed) */
2278 	udb_base_sync(nsd->db->udb, 0);
2279 
2280 	initialize_dname_compression_tables(nsd);
2281 
2282 #ifdef BIND8_STATS
2283 	/* Restart dumping stats if required.  */
2284 	time(&nsd->st.boot);
2285 	set_bind8_alarm(nsd);
2286 #endif
2287 #ifdef USE_ZONE_STATS
2288 	server_zonestat_realloc(nsd); /* realloc for new children */
2289 	server_zonestat_switch(nsd);
2290 #endif
2291 
2292 	/* listen for the signals of failed children again */
2293 	sigaction(SIGCHLD, &old_sigchld, NULL);
2294 	/* Start new child processes */
2295 	if (server_start_children(nsd, server_region, netio, &nsd->
2296 		xfrd_listener->fd) != 0) {
2297 		send_children_quit(nsd);
2298 		exit(1);
2299 	}
2300 
2301 	/* if the parent has quit, we must quit too, poll the fd for cmds */
2302 	if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
2303 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
2304 		if(cmd == NSD_QUIT) {
2305 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
2306 			send_children_quit(nsd);
2307 			exit(0);
2308 		}
2309 	}
2310 
2311 	/* Send quit command to parent: blocking, wait for receipt. */
2312 	do {
2313 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
2314 		if (!write_socket(cmdsocket, &cmd, sizeof(cmd)))
2315 		{
2316 			log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s",
2317 				strerror(errno));
2318 		}
2319 		/* blocking: wait for parent to really quit. (it sends RELOAD as ack) */
2320 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main"));
2321 		ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd),
2322 			RELOAD_SYNC_TIMEOUT);
2323 		if(ret == -2) {
2324 			DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry"));
2325 		}
2326 	} while (ret == -2);
2327 	if(ret == -1) {
2328 		log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s",
2329 			strerror(errno));
2330 	}
2331 	DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd));
2332 	if(cmd == NSD_QUIT) {
2333 		/* small race condition possible here, parent got quit cmd. */
2334 		send_children_quit(nsd);
2335 		exit(1);
2336 	}
2337 	assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD);
2338 #ifdef BIND8_STATS
2339 	reload_do_stats(cmdsocket, nsd, &last_task);
2340 #endif
2341 	udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
2342 	task_process_sync(nsd->task[nsd->mytask]);
2343 #ifdef USE_ZONE_STATS
2344 	server_zonestat_realloc(nsd); /* realloc for next children */
2345 #endif
2346 
2347 	/* send soainfo to the xfrd process, signal it that reload is done,
2348 	 * it picks up the taskudb */
2349 	cmd = NSD_RELOAD_DONE;
2350 	if(!write_socket(nsd->xfrd_listener->fd, &cmd,  sizeof(cmd))) {
2351 		log_msg(LOG_ERR, "problems sending reload_done xfrd: %s",
2352 			strerror(errno));
2353 	}
2354 	mypid = getpid();
2355 	if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
2356 		log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2357 			strerror(errno));
2358 	}
2359 
2360 	/* try to reopen file */
2361 	if (nsd->file_rotation_ok)
2362 		log_reopen(nsd->log_filename, 1);
2363 	/* exit reload, continue as new server_main */
2364 }
2365 
2366 /*
2367  * Get the mode depending on the signal hints that have been received.
2368  * Multiple signal hints can be received and will be handled in turn.
2369  */
2370 static sig_atomic_t
2371 server_signal_mode(struct nsd *nsd)
2372 {
2373 	if(nsd->signal_hint_quit) {
2374 		nsd->signal_hint_quit = 0;
2375 		return NSD_QUIT;
2376 	}
2377 	else if(nsd->signal_hint_shutdown) {
2378 		nsd->signal_hint_shutdown = 0;
2379 		return NSD_SHUTDOWN;
2380 	}
2381 	else if(nsd->signal_hint_child) {
2382 		nsd->signal_hint_child = 0;
2383 		return NSD_REAP_CHILDREN;
2384 	}
2385 	else if(nsd->signal_hint_reload) {
2386 		nsd->signal_hint_reload = 0;
2387 		return NSD_RELOAD;
2388 	}
2389 	else if(nsd->signal_hint_reload_hup) {
2390 		nsd->signal_hint_reload_hup = 0;
2391 		return NSD_RELOAD_REQ;
2392 	}
2393 	else if(nsd->signal_hint_stats) {
2394 		nsd->signal_hint_stats = 0;
2395 #ifdef BIND8_STATS
2396 		set_bind8_alarm(nsd);
2397 #endif
2398 		return NSD_STATS;
2399 	}
2400 	else if(nsd->signal_hint_statsusr) {
2401 		nsd->signal_hint_statsusr = 0;
2402 		return NSD_STATS;
2403 	}
2404 	return NSD_RUN;
2405 }
2406 
2407 /*
2408  * The main server simply waits for signals and child processes to
2409  * terminate.  Child processes are restarted as necessary.
2410  */
2411 void
2412 server_main(struct nsd *nsd)
2413 {
2414 	region_type *server_region = region_create(xalloc, free);
2415 	netio_type *netio = netio_create(server_region);
2416 	netio_handler_type reload_listener;
2417 	int reload_sockets[2] = {-1, -1};
2418 	struct timespec timeout_spec;
2419 	int status;
2420 	pid_t child_pid;
2421 	pid_t reload_pid = -1;
2422 	sig_atomic_t mode;
2423 
2424 	/* Ensure we are the main process */
2425 	assert(nsd->server_kind == NSD_SERVER_MAIN);
2426 
2427 	/* Add listener for the XFRD process */
2428 	netio_add_handler(netio, nsd->xfrd_listener);
2429 
2430 	/* Start the child processes that handle incoming queries */
2431 	if (server_start_children(nsd, server_region, netio,
2432 		&nsd->xfrd_listener->fd) != 0) {
2433 		send_children_quit(nsd);
2434 		exit(1);
2435 	}
2436 	reload_listener.fd = -1;
2437 
2438 	/* This_child MUST be 0, because this is the parent process */
2439 	assert(nsd->this_child == 0);
2440 
2441 	/* Run the server until we get a shutdown signal */
2442 	while ((mode = nsd->mode) != NSD_SHUTDOWN) {
2443 		/* Did we receive a signal that changes our mode? */
2444 		if(mode == NSD_RUN) {
2445 			nsd->mode = mode = server_signal_mode(nsd);
2446 		}
2447 
2448 		switch (mode) {
2449 		case NSD_RUN:
2450 			/* see if any child processes terminated */
2451 			while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) {
2452 				int is_child = delete_child_pid(nsd, child_pid);
2453 				if (is_child != -1 && nsd->children[is_child].need_to_exit) {
2454 					if(nsd->children[is_child].child_fd == -1)
2455 						nsd->children[is_child].has_exited = 1;
2456 					parent_check_all_children_exited(nsd);
2457 				} else if(is_child != -1) {
2458 					log_msg(LOG_WARNING,
2459 					       "server %d died unexpectedly with status %d, restarting",
2460 					       (int) child_pid, status);
2461 					restart_child_servers(nsd, server_region, netio,
2462 						&nsd->xfrd_listener->fd);
2463 				} else if (child_pid == reload_pid) {
2464 					sig_atomic_t cmd = NSD_RELOAD_DONE;
2465 					pid_t mypid;
2466 					log_msg(LOG_WARNING,
2467 					       "Reload process %d failed with status %d, continuing with old database",
2468 					       (int) child_pid, status);
2469 					reload_pid = -1;
2470 					if(reload_listener.fd != -1) close(reload_listener.fd);
2471 					reload_listener.fd = -1;
2472 					reload_listener.event_types = NETIO_EVENT_NONE;
2473 					task_process_sync(nsd->task[nsd->mytask]);
2474 					/* inform xfrd reload attempt ended */
2475 					if(!write_socket(nsd->xfrd_listener->fd,
2476 						&cmd, sizeof(cmd))) {
2477 						log_msg(LOG_ERR, "problems "
2478 						  "sending SOAEND to xfrd: %s",
2479 						  strerror(errno));
2480 					}
2481 					mypid = getpid();
2482 					if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
2483 						log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2484 							strerror(errno));
2485 					}
2486 				} else if(status != 0) {
2487 					/* check for status, because we get
2488 					 * the old-servermain because reload
2489 					 * is the process-parent of old-main,
2490 					 * and we get older server-processes
2491 					 * that are exiting after a reload */
2492 					log_msg(LOG_WARNING,
2493 					       "process %d terminated with status %d",
2494 					       (int) child_pid, status);
2495 				}
2496 			}
2497 			if (child_pid == -1) {
2498 				if (errno == EINTR) {
2499 					continue;
2500 				}
2501 				if (errno != ECHILD)
2502 					log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
2503 			}
2504 			if (nsd->mode != NSD_RUN)
2505 				break;
2506 
2507 			/* timeout to collect processes. In case no sigchild happens. */
2508 			timeout_spec.tv_sec = 60;
2509 			timeout_spec.tv_nsec = 0;
2510 
2511 			/* listen on ports, timeout for collecting terminated children */
2512 			if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
2513 				if (errno != EINTR) {
2514 					log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
2515 				}
2516 			}
2517 			if(nsd->restart_children) {
2518 				restart_child_servers(nsd, server_region, netio,
2519 					&nsd->xfrd_listener->fd);
2520 				nsd->restart_children = 0;
2521 			}
2522 			if(nsd->reload_failed) {
2523 				sig_atomic_t cmd = NSD_RELOAD_DONE;
2524 				pid_t mypid;
2525 				nsd->reload_failed = 0;
2526 				log_msg(LOG_WARNING,
2527 				       "Reload process %d failed, continuing with old database",
2528 				       (int) reload_pid);
2529 				reload_pid = -1;
2530 				if(reload_listener.fd != -1) close(reload_listener.fd);
2531 				reload_listener.fd = -1;
2532 				reload_listener.event_types = NETIO_EVENT_NONE;
2533 				task_process_sync(nsd->task[nsd->mytask]);
2534 				/* inform xfrd reload attempt ended */
2535 				if(!write_socket(nsd->xfrd_listener->fd,
2536 					&cmd, sizeof(cmd))) {
2537 					log_msg(LOG_ERR, "problems "
2538 					  "sending SOAEND to xfrd: %s",
2539 					  strerror(errno));
2540 				}
2541 				mypid = getpid();
2542 				if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
2543 					log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
2544 						strerror(errno));
2545 				}
2546 			}
2547 
2548 			break;
2549 		case NSD_RELOAD_REQ: {
2550 			sig_atomic_t cmd = NSD_RELOAD_REQ;
2551 			log_msg(LOG_WARNING, "SIGHUP received, reloading...");
2552 			DEBUG(DEBUG_IPC,1, (LOG_INFO,
2553 				"main: ipc send reload_req to xfrd"));
2554 			if(!write_socket(nsd->xfrd_listener->fd,
2555 				&cmd, sizeof(cmd))) {
2556 				log_msg(LOG_ERR, "server_main: could not send "
2557 				"reload_req to xfrd: %s", strerror(errno));
2558 			}
2559 			nsd->mode = NSD_RUN;
2560 			} break;
2561 		case NSD_RELOAD:
2562 			/* Continue to run nsd after reload */
2563 			nsd->mode = NSD_RUN;
2564 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading..."));
2565 			if (reload_pid != -1) {
2566 				log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
2567 				       (int) reload_pid);
2568 				break;
2569 			}
2570 
2571 			/* switch the mytask to keep track of who owns task*/
2572 			nsd->mytask = 1 - nsd->mytask;
2573 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
2574 				log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
2575 				reload_pid = -1;
2576 				break;
2577 			}
2578 
2579 			/* Do actual reload */
2580 			reload_pid = fork();
2581 			switch (reload_pid) {
2582 			case -1:
2583 				log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
2584 				break;
2585 			default:
2586 				/* PARENT */
2587 				close(reload_sockets[0]);
2588 				server_reload(nsd, server_region, netio,
2589 					reload_sockets[1]);
2590 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
2591 				close(reload_sockets[1]);
2592 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
2593 				/* drop stale xfrd ipc data */
2594 				((struct ipc_handler_conn_data*)nsd->
2595 					xfrd_listener->user_data)
2596 					->conn->is_reading = 0;
2597 				reload_pid = -1;
2598 				reload_listener.fd = -1;
2599 				reload_listener.event_types = NETIO_EVENT_NONE;
2600 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
2601 				break;
2602 			case 0:
2603 				/* CHILD */
2604 				/* server_main keep running until NSD_QUIT_SYNC
2605 				 * received from reload. */
2606 				close(reload_sockets[1]);
2607 				reload_listener.fd = reload_sockets[0];
2608 				reload_listener.timeout = NULL;
2609 				reload_listener.user_data = nsd;
2610 				reload_listener.event_types = NETIO_EVENT_READ;
2611 				reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
2612 				netio_add_handler(netio, &reload_listener);
2613 				reload_pid = getppid();
2614 				break;
2615 			}
2616 			break;
2617 		case NSD_QUIT_SYNC:
2618 			/* synchronisation of xfrd, parent and reload */
2619 			if(!nsd->quit_sync_done && reload_listener.fd != -1) {
2620 				sig_atomic_t cmd = NSD_RELOAD;
2621 				/* stop xfrd ipc writes in progress */
2622 				DEBUG(DEBUG_IPC,1, (LOG_INFO,
2623 					"main: ipc send indication reload"));
2624 				if(!write_socket(nsd->xfrd_listener->fd,
2625 					&cmd, sizeof(cmd))) {
2626 					log_msg(LOG_ERR, "server_main: could not send reload "
2627 					"indication to xfrd: %s", strerror(errno));
2628 				}
2629 				/* wait for ACK from xfrd */
2630 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
2631 				nsd->quit_sync_done = 1;
2632 			}
2633 			nsd->mode = NSD_RUN;
2634 			break;
2635 		case NSD_QUIT:
2636 			/* silent shutdown during reload */
2637 			if(reload_listener.fd != -1) {
2638 				/* acknowledge the quit, to sync reload that we will really quit now */
2639 				sig_atomic_t cmd = NSD_RELOAD;
2640 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
2641 				if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
2642 					log_msg(LOG_ERR, "server_main: "
2643 						"could not ack quit: %s", strerror(errno));
2644 				}
2645 #ifdef BIND8_STATS
2646 				parent_send_stats(nsd, reload_listener.fd);
2647 #endif /* BIND8_STATS */
2648 				close(reload_listener.fd);
2649 			}
2650 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence"));
2651 			/* only quit children after xfrd has acked */
2652 			send_children_quit(nsd);
2653 
2654 #ifdef MEMCLEAN /* OS collects memory pages */
2655 			region_destroy(server_region);
2656 #endif
2657 			server_shutdown(nsd);
2658 
2659 			/* ENOTREACH */
2660 			break;
2661 		case NSD_SHUTDOWN:
2662 			break;
2663 		case NSD_REAP_CHILDREN:
2664 			/* continue; wait for child in run loop */
2665 			nsd->mode = NSD_RUN;
2666 			break;
2667 		case NSD_STATS:
2668 #ifdef BIND8_STATS
2669 			set_children_stats(nsd);
2670 #endif
2671 			nsd->mode = NSD_RUN;
2672 			break;
2673 		default:
2674 			log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode);
2675 			nsd->mode = NSD_RUN;
2676 			break;
2677 		}
2678 	}
2679 	log_msg(LOG_WARNING, "signal received, shutting down...");
2680 
2681 	/* close opened ports to avoid race with restart of nsd */
2682 	server_close_all_sockets(nsd->udp, nsd->ifs);
2683 	server_close_all_sockets(nsd->tcp, nsd->ifs);
2684 #ifdef HAVE_SSL
2685 	daemon_remote_close(nsd->rc);
2686 #endif
2687 	send_children_quit_and_wait(nsd);
2688 
2689 	/* Unlink it if possible... */
2690 	unlinkpid(nsd->pidfile);
2691 	unlink(nsd->task[0]->fname);
2692 	unlink(nsd->task[1]->fname);
2693 #ifdef USE_ZONE_STATS
2694 	unlink(nsd->zonestatfname[0]);
2695 	unlink(nsd->zonestatfname[1]);
2696 #endif
2697 #ifdef USE_DNSTAP
2698 	dt_collector_close(nsd->dt_collector, nsd);
2699 #endif
2700 
2701 	if(reload_listener.fd != -1) {
2702 		sig_atomic_t cmd = NSD_QUIT;
2703 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
2704 			"main: ipc send quit to reload-process"));
2705 		if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
2706 			log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
2707 				strerror(errno));
2708 		}
2709 		fsync(reload_listener.fd);
2710 		close(reload_listener.fd);
2711 		/* wait for reload to finish processing */
2712 		while(1) {
2713 			if(waitpid(reload_pid, NULL, 0) == -1) {
2714 				if(errno == EINTR) continue;
2715 				if(errno == ECHILD) break;
2716 				log_msg(LOG_ERR, "waitpid(reload %d): %s",
2717 					(int)reload_pid, strerror(errno));
2718 			}
2719 			break;
2720 		}
2721 	}
2722 	if(nsd->xfrd_listener->fd != -1) {
2723 		/* complete quit, stop xfrd */
2724 		sig_atomic_t cmd = NSD_QUIT;
2725 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
2726 			"main: ipc send quit to xfrd"));
2727 		if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
2728 			log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
2729 				strerror(errno));
2730 		}
2731 		fsync(nsd->xfrd_listener->fd);
2732 		close(nsd->xfrd_listener->fd);
2733 		(void)kill(nsd->pid, SIGTERM);
2734 	}
2735 
2736 #ifdef MEMCLEAN /* OS collects memory pages */
2737 	region_destroy(server_region);
2738 #endif
2739 	/* write the nsd.db to disk, wait for it to complete */
2740 	udb_base_sync(nsd->db->udb, 1);
2741 	udb_base_close(nsd->db->udb);
2742 	server_shutdown(nsd);
2743 }
2744 
2745 static query_state_type
2746 server_process_query(struct nsd *nsd, struct query *query)
2747 {
2748 	return query_process(query, nsd);
2749 }
2750 
2751 static query_state_type
2752 server_process_query_udp(struct nsd *nsd, struct query *query)
2753 {
2754 #ifdef RATELIMIT
2755 	if(query_process(query, nsd) != QUERY_DISCARDED) {
2756 		if(rrl_process_query(query))
2757 			return rrl_slip(query);
2758 		else	return QUERY_PROCESSED;
2759 	}
2760 	return QUERY_DISCARDED;
2761 #else
2762 	return query_process(query, nsd);
2763 #endif
2764 }
2765 
2766 const char*
2767 nsd_event_vs(void)
2768 {
2769 #ifdef USE_MINI_EVENT
2770 	return "";
2771 #else
2772 	return event_get_version();
2773 #endif
2774 }
2775 
2776 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS)
2777 static const char* ub_ev_backend2str(int b)
2778 {
2779 	switch(b) {
2780 	case EVBACKEND_SELECT:	return "select";
2781 	case EVBACKEND_POLL:	return "poll";
2782 	case EVBACKEND_EPOLL:	return "epoll";
2783 	case EVBACKEND_KQUEUE:	return "kqueue";
2784 	case EVBACKEND_DEVPOLL: return "devpoll";
2785 	case EVBACKEND_PORT:	return "evport";
2786 	}
2787 	return "unknown";
2788 }
2789 #endif
2790 
2791 const char*
2792 nsd_event_method(void)
2793 {
2794 #ifdef USE_MINI_EVENT
2795 	return "select";
2796 #else
2797 	struct event_base* b = nsd_child_event_base();
2798 	const char* m = "?";
2799 #  ifdef EV_FEATURE_BACKENDS
2800 	m = ub_ev_backend2str(ev_backend((struct ev_loop*)b));
2801 #  elif defined(HAVE_EVENT_BASE_GET_METHOD)
2802 	m = event_base_get_method(b);
2803 #  endif
2804 #  ifdef MEMCLEAN
2805 	event_base_free(b);
2806 #  endif
2807 	return m;
2808 #endif
2809 }
2810 
2811 struct event_base*
2812 nsd_child_event_base(void)
2813 {
2814 	struct event_base* base;
2815 #ifdef USE_MINI_EVENT
2816 	static time_t secs;
2817 	static struct timeval now;
2818 	base = event_init(&secs, &now);
2819 #else
2820 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
2821 	/* libev */
2822 	base = (struct event_base *)ev_default_loop(EVFLAG_AUTO);
2823 #  else
2824 	/* libevent */
2825 #    ifdef HAVE_EVENT_BASE_NEW
2826 	base = event_base_new();
2827 #    else
2828 	base = event_init();
2829 #    endif
2830 #  endif
2831 #endif
2832 	return base;
2833 }
2834 
2835 static void
2836 add_udp_handler(
2837 	struct nsd *nsd,
2838 	struct nsd_socket *sock,
2839 	struct udp_handler_data *data)
2840 {
2841 	struct event *handler = &data->event;
2842 
2843 	data->nsd = nsd;
2844 	data->socket = sock;
2845 
2846 	memset(handler, 0, sizeof(*handler));
2847 	event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data);
2848 	if(event_base_set(nsd->event_base, handler) != 0)
2849 		log_msg(LOG_ERR, "nsd udp: event_base_set failed");
2850 	if(event_add(handler, NULL) != 0)
2851 		log_msg(LOG_ERR, "nsd udp: event_add failed");
2852 }
2853 
2854 void
2855 add_tcp_handler(
2856 	struct nsd *nsd,
2857 	struct nsd_socket *sock,
2858 	struct tcp_accept_handler_data *data)
2859 {
2860 	struct event *handler = &data->event;
2861 
2862 	data->nsd = nsd;
2863 	data->socket = sock;
2864 
2865 #ifdef HAVE_SSL
2866 	if (nsd->tls_ctx &&
2867 	    nsd->options->tls_port &&
2868 	    using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port))
2869 	{
2870 		data->tls_accept = 1;
2871 		if(verbosity >= 2) {
2872 			char buf[48];
2873 			addrport2str((struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf));
2874 			VERBOSITY(2, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf));
2875 		}
2876 	} else {
2877 		data->tls_accept = 0;
2878 	}
2879 #endif
2880 
2881 	memset(handler, 0, sizeof(*handler));
2882 	event_set(handler, sock->s, EV_PERSIST|EV_READ,	handle_tcp_accept, data);
2883 	if(event_base_set(nsd->event_base, handler) != 0)
2884 		log_msg(LOG_ERR, "nsd tcp: event_base_set failed");
2885 	if(event_add(handler, NULL) != 0)
2886 		log_msg(LOG_ERR, "nsd tcp: event_add failed");
2887 	data->event_added = 1;
2888 }
2889 
2890 /*
2891  * Serve DNS requests.
2892  */
2893 void
2894 server_child(struct nsd *nsd)
2895 {
2896 	size_t i, from, numifs;
2897 	region_type *server_region = region_create(xalloc, free);
2898 	struct event_base* event_base = nsd_child_event_base();
2899 	sig_atomic_t mode;
2900 
2901 	if(!event_base) {
2902 		log_msg(LOG_ERR, "nsd server could not create event base");
2903 		exit(1);
2904 	}
2905 	nsd->event_base = event_base;
2906 	nsd->server_region = server_region;
2907 
2908 #ifdef RATELIMIT
2909 	rrl_init(nsd->this_child->child_num);
2910 #endif
2911 
2912 	assert(nsd->server_kind != NSD_SERVER_MAIN);
2913 	DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
2914 
2915 #ifdef HAVE_SETPROCTITLE
2916 	setproctitle("server %d", nsd->this_child->child_num + 1);
2917 #endif
2918 #ifdef HAVE_CPUSET_T
2919 	if(nsd->use_cpu_affinity) {
2920 		set_cpu_affinity(nsd->this_child->cpuset);
2921 	}
2922 #endif
2923 
2924 	if (!(nsd->server_kind & NSD_SERVER_TCP)) {
2925 		server_close_all_sockets(nsd->tcp, nsd->ifs);
2926 	}
2927 	if (!(nsd->server_kind & NSD_SERVER_UDP)) {
2928 		server_close_all_sockets(nsd->udp, nsd->ifs);
2929 	}
2930 
2931 	if (nsd->this_child->parent_fd != -1) {
2932 		struct event *handler;
2933 		struct ipc_handler_conn_data* user_data =
2934 			(struct ipc_handler_conn_data*)region_alloc(
2935 			server_region, sizeof(struct ipc_handler_conn_data));
2936 		user_data->nsd = nsd;
2937 		user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ);
2938 
2939 		handler = (struct event*) region_alloc(
2940 			server_region, sizeof(*handler));
2941 		memset(handler, 0, sizeof(*handler));
2942 		event_set(handler, nsd->this_child->parent_fd, EV_PERSIST|
2943 			EV_READ, child_handle_parent_command, user_data);
2944 		if(event_base_set(event_base, handler) != 0)
2945 			log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed");
2946 		if(event_add(handler, NULL) != 0)
2947 			log_msg(LOG_ERR, "nsd ipcchild: event_add failed");
2948 	}
2949 
2950 	if(nsd->reuseport) {
2951 		numifs = nsd->ifs / nsd->reuseport;
2952 		from = numifs * nsd->this_child->child_num;
2953 		if(from+numifs > nsd->ifs) { /* should not happen */
2954 			from = 0;
2955 			numifs = nsd->ifs;
2956 		}
2957 	} else {
2958 		from = 0;
2959 		numifs = nsd->ifs;
2960 	}
2961 
2962 	if (nsd->server_kind & NSD_SERVER_UDP) {
2963 		int child = nsd->this_child->child_num;
2964 		memset(msgs, 0, sizeof(msgs));
2965 		for (i = 0; i < NUM_RECV_PER_SELECT; i++) {
2966 			queries[i] = query_create(server_region,
2967 				compressed_dname_offsets,
2968 				compression_table_size, compressed_dnames);
2969 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2970 			iovecs[i].iov_base          = buffer_begin(queries[i]->packet);
2971 			iovecs[i].iov_len           = buffer_remaining(queries[i]->packet);
2972 			msgs[i].msg_hdr.msg_iov     = &iovecs[i];
2973 			msgs[i].msg_hdr.msg_iovlen  = 1;
2974 			msgs[i].msg_hdr.msg_name    = &queries[i]->addr;
2975 			msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
2976 		}
2977 
2978 		for (i = 0; i < nsd->ifs; i++) {
2979 			int listen;
2980 			struct udp_handler_data *data;
2981 
2982 			listen = nsd_bitset_isset(nsd->udp[i].servers, child);
2983 
2984 			if(i >= from && i < (from + numifs) && listen) {
2985 				data = region_alloc_zero(
2986 					nsd->server_region, sizeof(*data));
2987 				add_udp_handler(nsd, &nsd->udp[i], data);
2988 			} else {
2989 				/* close sockets intended for other servers */
2990 				server_close_socket(&nsd->udp[i]);
2991 			}
2992 		}
2993 	}
2994 
2995 	/*
2996 	 * Keep track of all the TCP accept handlers so we can enable
2997 	 * and disable them based on the current number of active TCP
2998 	 * connections.
2999 	 */
3000 	if (nsd->server_kind & NSD_SERVER_TCP) {
3001 		int child = nsd->this_child->child_num;
3002 		tcp_accept_handler_count = numifs;
3003 		tcp_accept_handlers = region_alloc_array(server_region,
3004 			numifs, sizeof(*tcp_accept_handlers));
3005 
3006 		for (i = 0; i < nsd->ifs; i++) {
3007 			int listen;
3008 			struct tcp_accept_handler_data *data;
3009 
3010 			listen = nsd_bitset_isset(nsd->tcp[i].servers, child);
3011 
3012 			if(i >= from && i < (from + numifs) && listen) {
3013 				data = &tcp_accept_handlers[i-from];
3014 				memset(data, 0, sizeof(*data));
3015 				add_tcp_handler(nsd, &nsd->tcp[i], data);
3016 			} else {
3017 				/* close sockets intended for other servers */
3018 				/*
3019 				 * uncomment this once tcp servers are no
3020 				 * longer copied in the tcp fd copy line
3021 				 * in server_init().
3022 				server_close_socket(&nsd->tcp[i]);
3023 				*/
3024 				/* close sockets not meant for this server*/
3025 				if(!listen)
3026 					server_close_socket(&nsd->tcp[i]);
3027 			}
3028 		}
3029 	} else {
3030 		tcp_accept_handler_count = 0;
3031 	}
3032 
3033 	/* The main loop... */
3034 	while ((mode = nsd->mode) != NSD_QUIT) {
3035 		if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
3036 
3037 		/* Do we need to do the statistics... */
3038 		if (mode == NSD_STATS) {
3039 #ifdef BIND8_STATS
3040 			int p = nsd->st.period;
3041 			nsd->st.period = 1; /* force stats printout */
3042 			/* Dump the statistics */
3043 			bind8_stats(nsd);
3044 			nsd->st.period = p;
3045 #else /* !BIND8_STATS */
3046 			log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
3047 #endif /* BIND8_STATS */
3048 
3049 			nsd->mode = NSD_RUN;
3050 		}
3051 		else if (mode == NSD_REAP_CHILDREN) {
3052 			/* got signal, notify parent. parent reaps terminated children. */
3053 			if (nsd->this_child->parent_fd != -1) {
3054 				sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
3055 				if (write(nsd->this_child->parent_fd,
3056 				    &parent_notify,
3057 				    sizeof(parent_notify)) == -1)
3058 				{
3059 					log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
3060 						(int) nsd->this_child->pid, strerror(errno));
3061 				}
3062 			} else /* no parent, so reap 'em */
3063 				while (waitpid(-1, NULL, WNOHANG) > 0) ;
3064 			nsd->mode = NSD_RUN;
3065 		}
3066 		else if(mode == NSD_RUN) {
3067 			/* Wait for a query... */
3068 			if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
3069 				if (errno != EINTR) {
3070 					log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
3071 					break;
3072 				}
3073 			}
3074 		} else if(mode == NSD_QUIT) {
3075 			/* ignore here, quit */
3076 		} else {
3077 			log_msg(LOG_ERR, "mode bad value %d, back to service.",
3078 				(int)mode);
3079 			nsd->mode = NSD_RUN;
3080 		}
3081 	}
3082 
3083 	service_remaining_tcp(nsd);
3084 #ifdef	BIND8_STATS
3085 	bind8_stats(nsd);
3086 #endif /* BIND8_STATS */
3087 
3088 #ifdef MEMCLEAN /* OS collects memory pages */
3089 #ifdef RATELIMIT
3090 	rrl_deinit(nsd->this_child->child_num);
3091 #endif
3092 	event_base_free(event_base);
3093 	region_destroy(server_region);
3094 #endif
3095 	server_shutdown(nsd);
3096 }
3097 
3098 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg)
3099 {
3100 	int* timed_out = (int*)arg;
3101         assert(event & EV_TIMEOUT); (void)event;
3102 	/* wake up the service tcp thread, note event is no longer
3103 	 * registered */
3104 	*timed_out = 1;
3105 }
3106 
3107 void
3108 service_remaining_tcp(struct nsd* nsd)
3109 {
3110 	struct tcp_handler_data* p;
3111 	struct event_base* event_base;
3112 	/* check if it is needed */
3113 	if(nsd->current_tcp_count == 0 || tcp_active_list == NULL)
3114 		return;
3115 	VERBOSITY(4, (LOG_INFO, "service remaining TCP connections"));
3116 
3117 	/* setup event base */
3118 	event_base = nsd_child_event_base();
3119 	if(!event_base) {
3120 		log_msg(LOG_ERR, "nsd remain tcp could not create event base");
3121 		return;
3122 	}
3123 	/* register tcp connections */
3124 	for(p = tcp_active_list; p != NULL; p = p->next) {
3125 		struct timeval timeout;
3126 		int fd = p->event.ev_fd;
3127 #ifdef USE_MINI_EVENT
3128 		short event = p->event.ev_flags & (EV_READ|EV_WRITE);
3129 #else
3130 		short event = p->event.ev_events & (EV_READ|EV_WRITE);
3131 #endif
3132 		void (*fn)(int, short, void*);
3133 #ifdef HAVE_SSL
3134 		if(p->tls) {
3135 			if((event&EV_READ))
3136 				fn = handle_tls_reading;
3137 			else	fn = handle_tls_writing;
3138 		} else {
3139 #endif
3140 			if((event&EV_READ))
3141 				fn = handle_tcp_reading;
3142 			else	fn = handle_tcp_writing;
3143 #ifdef HAVE_SSL
3144 		}
3145 #endif
3146 
3147 		p->tcp_no_more_queries = 1;
3148 		/* set timeout to 1/10 second */
3149 		if(p->tcp_timeout > 100)
3150 			p->tcp_timeout = 100;
3151 		timeout.tv_sec = p->tcp_timeout / 1000;
3152 		timeout.tv_usec = (p->tcp_timeout % 1000)*1000;
3153 		event_del(&p->event);
3154 		memset(&p->event, 0, sizeof(p->event));
3155 		event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT,
3156 			fn, p);
3157 		if(event_base_set(event_base, &p->event) != 0)
3158 			log_msg(LOG_ERR, "event base set failed");
3159 		if(event_add(&p->event, &timeout) != 0)
3160 			log_msg(LOG_ERR, "event add failed");
3161 	}
3162 
3163 	/* handle it */
3164 	while(nsd->current_tcp_count > 0) {
3165 		mode_t m = server_signal_mode(nsd);
3166 		struct event timeout;
3167 		struct timeval tv;
3168 		int timed_out = 0;
3169 		if(m == NSD_QUIT || m == NSD_SHUTDOWN ||
3170 			m == NSD_REAP_CHILDREN) {
3171 			/* quit */
3172 			break;
3173 		}
3174 		/* timer */
3175 		/* have to do something every second */
3176 		tv.tv_sec = 1;
3177 		tv.tv_usec = 0;
3178 		memset(&timeout, 0, sizeof(timeout));
3179 		event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout,
3180 			&timed_out);
3181 		if(event_base_set(event_base, &timeout) != 0)
3182 			log_msg(LOG_ERR, "remaintcp timer: event_base_set failed");
3183 		if(event_add(&timeout, &tv) != 0)
3184 			log_msg(LOG_ERR, "remaintcp timer: event_add failed");
3185 
3186 		/* service loop */
3187 		if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
3188 			if (errno != EINTR) {
3189 				log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
3190 				break;
3191 			}
3192 		}
3193 		if(!timed_out) {
3194 			event_del(&timeout);
3195 		} else {
3196 			/* timed out, quit */
3197 			VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit"));
3198 			break;
3199 		}
3200 	}
3201 #ifdef MEMCLEAN
3202 	event_base_free(event_base);
3203 #endif
3204 	/* continue to quit after return */
3205 }
3206 
3207 /* Implement recvmmsg and sendmmsg if the platform does not. These functions
3208  * are always used, even if nonblocking operations are broken, in which case
3209  * NUM_RECV_PER_SELECT is defined to 1 (one).
3210  */
3211 #if defined(HAVE_RECVMMSG)
3212 #define nsd_recvmmsg recvmmsg
3213 #else /* !HAVE_RECVMMSG */
3214 
3215 static int
3216 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen,
3217              int flags, struct timespec *timeout)
3218 {
3219 	unsigned int vpos = 0;
3220 	ssize_t rcvd;
3221 
3222 	/* timeout is ignored, ensure caller does not expect it to work */
3223 	assert(timeout == NULL); (void)timeout;
3224 
3225 	while(vpos < vlen) {
3226 		rcvd = recvfrom(sockfd,
3227 		                msgvec[vpos].msg_hdr.msg_iov->iov_base,
3228 		                msgvec[vpos].msg_hdr.msg_iov->iov_len,
3229 		                flags,
3230 		                msgvec[vpos].msg_hdr.msg_name,
3231 		               &msgvec[vpos].msg_hdr.msg_namelen);
3232 		if(rcvd < 0) {
3233 			break;
3234 		} else {
3235 			assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX);
3236 			msgvec[vpos].msg_len = (unsigned int)rcvd;
3237 			vpos++;
3238 		}
3239 	}
3240 
3241 	if(vpos) {
3242 		/* error will be picked up next time */
3243 		return (int)vpos;
3244 	} else if(errno == 0) {
3245 		return 0;
3246 	} else if(errno == EAGAIN) {
3247 		return 0;
3248 	}
3249 
3250 	return -1;
3251 }
3252 #endif /* HAVE_RECVMMSG */
3253 
3254 #ifdef HAVE_SENDMMSG
3255 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__)
3256 #else /* !HAVE_SENDMMSG */
3257 
3258 static int
3259 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags)
3260 {
3261 	unsigned int vpos = 0;
3262 	ssize_t snd;
3263 
3264 	while(vpos < vlen) {
3265 		assert(msgvec[vpos].msg_hdr.msg_iovlen == 1);
3266 		snd = sendto(sockfd,
3267 		             msgvec[vpos].msg_hdr.msg_iov->iov_base,
3268 		             msgvec[vpos].msg_hdr.msg_iov->iov_len,
3269 		             flags,
3270 		             msgvec[vpos].msg_hdr.msg_name,
3271 		             msgvec[vpos].msg_hdr.msg_namelen);
3272 		if(snd < 0) {
3273 			break;
3274 		} else {
3275 			msgvec[vpos].msg_len = (unsigned int)snd;
3276 			vpos++;
3277 		}
3278 	}
3279 
3280 	if(vpos) {
3281 		return (int)vpos;
3282 	} else if(errno == 0) {
3283 		return 0;
3284 	}
3285 
3286 	return -1;
3287 }
3288 #endif /* HAVE_SENDMMSG */
3289 
3290 static void
3291 handle_udp(int fd, short event, void* arg)
3292 {
3293 	struct udp_handler_data *data = (struct udp_handler_data *) arg;
3294 	int received, sent, recvcount, i;
3295 	struct query *q;
3296 
3297 	if (!(event & EV_READ)) {
3298 		return;
3299 	}
3300 	recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
3301 	/* this printf strangely gave a performance increase on Linux */
3302 	/* printf("recvcount %d \n", recvcount); */
3303 	if (recvcount == -1) {
3304 		if (errno != EAGAIN && errno != EINTR) {
3305 			log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
3306 			STATUP(data->nsd, rxerr);
3307 			/* No zone statup */
3308 		}
3309 		/* Simply no data available */
3310 		return;
3311 	}
3312 	for (i = 0; i < recvcount; i++) {
3313 	loopstart:
3314 		received = msgs[i].msg_len;
3315 		queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen;
3316 		q = queries[i];
3317 		if (received == -1) {
3318 			log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror(
3319 #if defined(HAVE_RECVMMSG)
3320 				msgs[i].msg_hdr.msg_flags
3321 #else
3322 				errno
3323 #endif
3324 				));
3325 			STATUP(data->nsd, rxerr);
3326 			/* No zone statup */
3327 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3328 			iovecs[i].iov_len = buffer_remaining(q->packet);
3329 			msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
3330 			goto swap_drop;
3331 		}
3332 
3333 		/* Account... */
3334 #ifdef BIND8_STATS
3335 		if (data->socket->addr.ai_family == AF_INET) {
3336 			STATUP(data->nsd, qudp);
3337 		} else if (data->socket->addr.ai_family == AF_INET6) {
3338 			STATUP(data->nsd, qudp6);
3339 		}
3340 #endif
3341 
3342 		buffer_skip(q->packet, received);
3343 		buffer_flip(q->packet);
3344 #ifdef USE_DNSTAP
3345 		/*
3346 		 * sending UDP-query with server address (local) and client address to dnstap process
3347 		 */
3348 		log_addr("query from client", &q->addr, data->socket->addr.ai_family);
3349 		log_addr("to server (local)", &data->socket->addr.ai_addr, data->socket->addr.ai_family);
3350 		dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &q->addr, q->addrlen,
3351 			q->tcp, q->packet);
3352 #endif /* USE_DNSTAP */
3353 
3354 		/* Process and answer the query... */
3355 		if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) {
3356 			if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
3357 				STATUP(data->nsd, nona);
3358 				ZTATUP(data->nsd, q->zone, nona);
3359 			}
3360 
3361 #ifdef USE_ZONE_STATS
3362 			if (data->socket->addr.ai_family == AF_INET) {
3363 				ZTATUP(data->nsd, q->zone, qudp);
3364 			} else if (data->socket->addr.ai_family == AF_INET6) {
3365 				ZTATUP(data->nsd, q->zone, qudp6);
3366 			}
3367 #endif
3368 
3369 			/* Add EDNS0 and TSIG info if necessary.  */
3370 			query_add_optional(q, data->nsd);
3371 
3372 			buffer_flip(q->packet);
3373 			iovecs[i].iov_len = buffer_remaining(q->packet);
3374 #ifdef BIND8_STATS
3375 			/* Account the rcode & TC... */
3376 			STATUP2(data->nsd, rcode, RCODE(q->packet));
3377 			ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
3378 			if (TC(q->packet)) {
3379 				STATUP(data->nsd, truncated);
3380 				ZTATUP(data->nsd, q->zone, truncated);
3381 			}
3382 #endif /* BIND8_STATS */
3383 #ifdef USE_DNSTAP
3384 			/*
3385 			 * sending UDP-response with server address (local) and client address to dnstap process
3386 			 */
3387 			log_addr("from server (local)", &data->socket->addr.ai_addr, data->socket->addr.ai_family);
3388 			log_addr("response to client", &q->addr, data->socket->addr.ai_family);
3389 			dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr,
3390 				&q->addr, q->addrlen, q->tcp, q->packet,
3391 				q->zone);
3392 #endif /* USE_DNSTAP */
3393 		} else {
3394 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3395 			iovecs[i].iov_len = buffer_remaining(q->packet);
3396 			msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
3397 		swap_drop:
3398 			STATUP(data->nsd, dropped);
3399 			ZTATUP(data->nsd, q->zone, dropped);
3400 			if(i != recvcount-1) {
3401 				/* swap with last and decrease recvcount */
3402 				struct mmsghdr mtmp = msgs[i];
3403 				struct iovec iotmp = iovecs[i];
3404 				recvcount--;
3405 				msgs[i] = msgs[recvcount];
3406 				iovecs[i] = iovecs[recvcount];
3407 				queries[i] = queries[recvcount];
3408 				msgs[recvcount] = mtmp;
3409 				iovecs[recvcount] = iotmp;
3410 				queries[recvcount] = q;
3411 				msgs[i].msg_hdr.msg_iov = &iovecs[i];
3412 				msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount];
3413 				goto loopstart;
3414 			} else { recvcount --; }
3415 		}
3416 	}
3417 
3418 	/* send until all are sent */
3419 	i = 0;
3420 	while(i<recvcount) {
3421 		sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
3422 		if(sent == -1) {
3423 			if(errno == ENOBUFS ||
3424 #ifdef EWOULDBLOCK
3425 				errno == EWOULDBLOCK ||
3426 #endif
3427 				errno == EAGAIN) {
3428 				/* block to wait until send buffer avail */
3429 				int flag, errstore;
3430 				if((flag = fcntl(fd, F_GETFL)) == -1) {
3431 					log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno));
3432 					flag = 0;
3433 				}
3434 				flag &= ~O_NONBLOCK;
3435 				if(fcntl(fd, F_SETFL, flag) == -1)
3436 					log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno));
3437 				sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
3438 				errstore = errno;
3439 				flag |= O_NONBLOCK;
3440 				if(fcntl(fd, F_SETFL, flag) == -1)
3441 					log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno));
3442 				if(sent != -1) {
3443 					i += sent;
3444 					continue;
3445 				}
3446 				errno = errstore;
3447 			}
3448 			/* don't log transient network full errors, unless
3449 			 * on higher verbosity */
3450 			if(!(errno == ENOBUFS && verbosity < 1) &&
3451 #ifdef EWOULDBLOCK
3452 			   errno != EWOULDBLOCK &&
3453 #endif
3454 			   errno != EAGAIN) {
3455 				const char* es = strerror(errno);
3456 				char a[64];
3457 				addrport2str(&queries[i]->addr, a, sizeof(a));
3458 				log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
3459 			}
3460 #ifdef BIND8_STATS
3461 			data->nsd->st.txerr += recvcount-i;
3462 #endif /* BIND8_STATS */
3463 			break;
3464 		}
3465 		i += sent;
3466 	}
3467 	for(i=0; i<recvcount; i++) {
3468 		query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
3469 		iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
3470 		msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
3471 	}
3472 }
3473 
3474 #ifdef HAVE_SSL
3475 /*
3476  * Setup an event for the tcp handler.
3477  */
3478 static void
3479 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *),
3480        int fd, short event)
3481 {
3482 	struct timeval timeout;
3483 	struct event_base* ev_base;
3484 
3485 	timeout.tv_sec = data->nsd->tcp_timeout;
3486 	timeout.tv_usec = 0L;
3487 
3488 	ev_base = data->event.ev_base;
3489 	event_del(&data->event);
3490 	memset(&data->event, 0, sizeof(data->event));
3491 	event_set(&data->event, fd, event, fn, data);
3492 	if(event_base_set(ev_base, &data->event) != 0)
3493 		log_msg(LOG_ERR, "event base set failed");
3494 	if(event_add(&data->event, &timeout) != 0)
3495 		log_msg(LOG_ERR, "event add failed");
3496 }
3497 #endif /* HAVE_SSL */
3498 
3499 static void
3500 cleanup_tcp_handler(struct tcp_handler_data* data)
3501 {
3502 	event_del(&data->event);
3503 #ifdef HAVE_SSL
3504 	if(data->tls) {
3505 		SSL_shutdown(data->tls);
3506 		SSL_free(data->tls);
3507 		data->tls = NULL;
3508 	}
3509 #endif
3510 	close(data->event.ev_fd);
3511 	if(data->prev)
3512 		data->prev->next = data->next;
3513 	else	tcp_active_list = data->next;
3514 	if(data->next)
3515 		data->next->prev = data->prev;
3516 
3517 	/*
3518 	 * Enable the TCP accept handlers when the current number of
3519 	 * TCP connections is about to drop below the maximum number
3520 	 * of TCP connections.
3521 	 */
3522 	if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
3523 		configure_handler_event_types(EV_READ|EV_PERSIST);
3524 		if(slowaccept) {
3525 			event_del(&slowaccept_event);
3526 			slowaccept = 0;
3527 		}
3528 	}
3529 	--data->nsd->current_tcp_count;
3530 	assert(data->nsd->current_tcp_count >= 0);
3531 
3532 	region_destroy(data->region);
3533 }
3534 
3535 static void
3536 handle_tcp_reading(int fd, short event, void* arg)
3537 {
3538 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
3539 	ssize_t received;
3540 	struct event_base* ev_base;
3541 	struct timeval timeout;
3542 
3543 	if ((event & EV_TIMEOUT)) {
3544 		/* Connection timed out.  */
3545 		cleanup_tcp_handler(data);
3546 		return;
3547 	}
3548 
3549 	if ((data->nsd->tcp_query_count > 0 &&
3550 		data->query_count >= data->nsd->tcp_query_count) ||
3551 		data->tcp_no_more_queries) {
3552 		/* No more queries allowed on this tcp connection. */
3553 		cleanup_tcp_handler(data);
3554 		return;
3555 	}
3556 
3557 	assert((event & EV_READ));
3558 
3559 	if (data->bytes_transmitted == 0) {
3560 		query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
3561 	}
3562 
3563 	/*
3564 	 * Check if we received the leading packet length bytes yet.
3565 	 */
3566 	if (data->bytes_transmitted < sizeof(uint16_t)) {
3567 		received = read(fd,
3568 				(char *) &data->query->tcplen
3569 				+ data->bytes_transmitted,
3570 				sizeof(uint16_t) - data->bytes_transmitted);
3571 		if (received == -1) {
3572 			if (errno == EAGAIN || errno == EINTR) {
3573 				/*
3574 				 * Read would block, wait until more
3575 				 * data is available.
3576 				 */
3577 				return;
3578 			} else {
3579 				char buf[48];
3580 				addr2str(&data->query->addr, buf, sizeof(buf));
3581 #ifdef ECONNRESET
3582 				if (verbosity >= 2 || errno != ECONNRESET)
3583 #endif /* ECONNRESET */
3584 				log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
3585 				cleanup_tcp_handler(data);
3586 				return;
3587 			}
3588 		} else if (received == 0) {
3589 			/* EOF */
3590 			cleanup_tcp_handler(data);
3591 			return;
3592 		}
3593 
3594 		data->bytes_transmitted += received;
3595 		if (data->bytes_transmitted < sizeof(uint16_t)) {
3596 			/*
3597 			 * Not done with the tcplen yet, wait for more
3598 			 * data to become available.
3599 			 */
3600 			return;
3601 		}
3602 
3603 		assert(data->bytes_transmitted == sizeof(uint16_t));
3604 
3605 		data->query->tcplen = ntohs(data->query->tcplen);
3606 
3607 		/*
3608 		 * Minimum query size is:
3609 		 *
3610 		 *     Size of the header (12)
3611 		 *   + Root domain name   (1)
3612 		 *   + Query class        (2)
3613 		 *   + Query type         (2)
3614 		 */
3615 		if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
3616 			VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
3617 			cleanup_tcp_handler(data);
3618 			return;
3619 		}
3620 
3621 		if (data->query->tcplen > data->query->maxlen) {
3622 			VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
3623 			cleanup_tcp_handler(data);
3624 			return;
3625 		}
3626 
3627 		buffer_set_limit(data->query->packet, data->query->tcplen);
3628 	}
3629 
3630 	assert(buffer_remaining(data->query->packet) > 0);
3631 
3632 	/* Read the (remaining) query data.  */
3633 	received = read(fd,
3634 			buffer_current(data->query->packet),
3635 			buffer_remaining(data->query->packet));
3636 	if (received == -1) {
3637 		if (errno == EAGAIN || errno == EINTR) {
3638 			/*
3639 			 * Read would block, wait until more data is
3640 			 * available.
3641 			 */
3642 			return;
3643 		} else {
3644 			char buf[48];
3645 			addr2str(&data->query->addr, buf, sizeof(buf));
3646 #ifdef ECONNRESET
3647 			if (verbosity >= 2 || errno != ECONNRESET)
3648 #endif /* ECONNRESET */
3649 			log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
3650 			cleanup_tcp_handler(data);
3651 			return;
3652 		}
3653 	} else if (received == 0) {
3654 		/* EOF */
3655 		cleanup_tcp_handler(data);
3656 		return;
3657 	}
3658 
3659 	data->bytes_transmitted += received;
3660 	buffer_skip(data->query->packet, received);
3661 	if (buffer_remaining(data->query->packet) > 0) {
3662 		/*
3663 		 * Message not yet complete, wait for more data to
3664 		 * become available.
3665 		 */
3666 		return;
3667 	}
3668 
3669 	assert(buffer_position(data->query->packet) == data->query->tcplen);
3670 
3671 	/* Account... */
3672 #ifdef BIND8_STATS
3673 #ifndef INET6
3674 	STATUP(data->nsd, ctcp);
3675 #else
3676 	if (data->query->addr.ss_family == AF_INET) {
3677 		STATUP(data->nsd, ctcp);
3678 	} else if (data->query->addr.ss_family == AF_INET6) {
3679 		STATUP(data->nsd, ctcp6);
3680 	}
3681 #endif
3682 #endif /* BIND8_STATS */
3683 
3684 	/* We have a complete query, process it.  */
3685 
3686 	/* tcp-query-count: handle query counter ++ */
3687 	data->query_count++;
3688 
3689 	buffer_flip(data->query->packet);
3690 #ifdef USE_DNSTAP
3691 	/*
3692 	 * and send TCP-query with found address (local) and client address to dnstap process
3693 	 */
3694 	log_addr("query from client", &data->query->addr, data->query->addr.ss_family);
3695 	log_addr("to server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
3696 	dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
3697 		data->query->addrlen, data->query->tcp, data->query->packet);
3698 #endif /* USE_DNSTAP */
3699 	data->query_state = server_process_query(data->nsd, data->query);
3700 	if (data->query_state == QUERY_DISCARDED) {
3701 		/* Drop the packet and the entire connection... */
3702 		STATUP(data->nsd, dropped);
3703 		ZTATUP(data->nsd, data->query->zone, dropped);
3704 		cleanup_tcp_handler(data);
3705 		return;
3706 	}
3707 
3708 #ifdef BIND8_STATS
3709 	if (RCODE(data->query->packet) == RCODE_OK
3710 	    && !AA(data->query->packet))
3711 	{
3712 		STATUP(data->nsd, nona);
3713 		ZTATUP(data->nsd, data->query->zone, nona);
3714 	}
3715 #endif /* BIND8_STATS */
3716 
3717 #ifdef USE_ZONE_STATS
3718 #ifndef INET6
3719 	ZTATUP(data->nsd, data->query->zone, ctcp);
3720 #else
3721 	if (data->query->addr.ss_family == AF_INET) {
3722 		ZTATUP(data->nsd, data->query->zone, ctcp);
3723 	} else if (data->query->addr.ss_family == AF_INET6) {
3724 		ZTATUP(data->nsd, data->query->zone, ctcp6);
3725 	}
3726 #endif
3727 #endif /* USE_ZONE_STATS */
3728 
3729 	query_add_optional(data->query, data->nsd);
3730 
3731 	/* Switch to the tcp write handler.  */
3732 	buffer_flip(data->query->packet);
3733 	data->query->tcplen = buffer_remaining(data->query->packet);
3734 #ifdef BIND8_STATS
3735 	/* Account the rcode & TC... */
3736 	STATUP2(data->nsd, rcode, RCODE(data->query->packet));
3737 	ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
3738 	if (TC(data->query->packet)) {
3739 		STATUP(data->nsd, truncated);
3740 		ZTATUP(data->nsd, data->query->zone, truncated);
3741 	}
3742 #endif /* BIND8_STATS */
3743 #ifdef USE_DNSTAP
3744 	/*
3745 	 * sending TCP-response with found (earlier) address (local) and client address to dnstap process
3746 	 */
3747 	log_addr("from server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
3748 	log_addr("response to client", &data->query->addr, data->query->addr.ss_family);
3749 	dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
3750 		data->query->addrlen, data->query->tcp, data->query->packet,
3751 		data->query->zone);
3752 #endif /* USE_DNSTAP */
3753 	data->bytes_transmitted = 0;
3754 
3755 	timeout.tv_sec = data->tcp_timeout / 1000;
3756 	timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
3757 
3758 	ev_base = data->event.ev_base;
3759 	event_del(&data->event);
3760 	memset(&data->event, 0, sizeof(data->event));
3761 	event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
3762 		handle_tcp_reading, data);
3763 	if(event_base_set(ev_base, &data->event) != 0)
3764 		log_msg(LOG_ERR, "event base set tcpr failed");
3765 	if(event_add(&data->event, &timeout) != 0)
3766 		log_msg(LOG_ERR, "event add tcpr failed");
3767 	/* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
3768 	handle_tcp_writing(fd, EV_WRITE, data);
3769 }
3770 
3771 static void
3772 handle_tcp_writing(int fd, short event, void* arg)
3773 {
3774 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
3775 	ssize_t sent;
3776 	struct query *q = data->query;
3777 	struct timeval timeout;
3778 	struct event_base* ev_base;
3779 
3780 	if ((event & EV_TIMEOUT)) {
3781 		/* Connection timed out.  */
3782 		cleanup_tcp_handler(data);
3783 		return;
3784 	}
3785 
3786 	assert((event & EV_WRITE));
3787 
3788 	if (data->bytes_transmitted < sizeof(q->tcplen)) {
3789 		/* Writing the response packet length.  */
3790 		uint16_t n_tcplen = htons(q->tcplen);
3791 #ifdef HAVE_WRITEV
3792 		struct iovec iov[2];
3793 		iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted;
3794 		iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted;
3795 		iov[1].iov_base = buffer_begin(q->packet);
3796 		iov[1].iov_len = buffer_limit(q->packet);
3797 		sent = writev(fd, iov, 2);
3798 #else /* HAVE_WRITEV */
3799 		sent = write(fd,
3800 			     (const char *) &n_tcplen + data->bytes_transmitted,
3801 			     sizeof(n_tcplen) - data->bytes_transmitted);
3802 #endif /* HAVE_WRITEV */
3803 		if (sent == -1) {
3804 			if (errno == EAGAIN || errno == EINTR) {
3805 				/*
3806 				 * Write would block, wait until
3807 				 * socket becomes writable again.
3808 				 */
3809 				return;
3810 			} else {
3811 #ifdef ECONNRESET
3812 				if(verbosity >= 2 || errno != ECONNRESET)
3813 #endif /* ECONNRESET */
3814 #ifdef EPIPE
3815 				  if(verbosity >= 2 || errno != EPIPE)
3816 #endif /* EPIPE 'broken pipe' */
3817 				    log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
3818 				cleanup_tcp_handler(data);
3819 				return;
3820 			}
3821 		}
3822 
3823 		data->bytes_transmitted += sent;
3824 		if (data->bytes_transmitted < sizeof(q->tcplen)) {
3825 			/*
3826 			 * Writing not complete, wait until socket
3827 			 * becomes writable again.
3828 			 */
3829 			return;
3830 		}
3831 
3832 #ifdef HAVE_WRITEV
3833 		sent -= sizeof(n_tcplen);
3834 		/* handle potential 'packet done' code */
3835 		goto packet_could_be_done;
3836 #endif
3837  	}
3838 
3839 	sent = write(fd,
3840 		     buffer_current(q->packet),
3841 		     buffer_remaining(q->packet));
3842 	if (sent == -1) {
3843 		if (errno == EAGAIN || errno == EINTR) {
3844 			/*
3845 			 * Write would block, wait until
3846 			 * socket becomes writable again.
3847 			 */
3848 			return;
3849 		} else {
3850 #ifdef ECONNRESET
3851 			if(verbosity >= 2 || errno != ECONNRESET)
3852 #endif /* ECONNRESET */
3853 #ifdef EPIPE
3854 				  if(verbosity >= 2 || errno != EPIPE)
3855 #endif /* EPIPE 'broken pipe' */
3856 			log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
3857 			cleanup_tcp_handler(data);
3858 			return;
3859 		}
3860 	}
3861 
3862 	data->bytes_transmitted += sent;
3863 #ifdef HAVE_WRITEV
3864   packet_could_be_done:
3865 #endif
3866 	buffer_skip(q->packet, sent);
3867 	if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
3868 		/*
3869 		 * Still more data to write when socket becomes
3870 		 * writable again.
3871 		 */
3872 		return;
3873 	}
3874 
3875 	assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
3876 
3877 	if (data->query_state == QUERY_IN_AXFR) {
3878 		/* Continue processing AXFR and writing back results.  */
3879 		buffer_clear(q->packet);
3880 		data->query_state = query_axfr(data->nsd, q);
3881 		if (data->query_state != QUERY_PROCESSED) {
3882 			query_add_optional(data->query, data->nsd);
3883 
3884 			/* Reset data. */
3885 			buffer_flip(q->packet);
3886 			q->tcplen = buffer_remaining(q->packet);
3887 			data->bytes_transmitted = 0;
3888 			/* Reset timeout.  */
3889 			timeout.tv_sec = data->tcp_timeout / 1000;
3890 			timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
3891 			ev_base = data->event.ev_base;
3892 			event_del(&data->event);
3893 			memset(&data->event, 0, sizeof(data->event));
3894 			event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
3895 				handle_tcp_writing, data);
3896 			if(event_base_set(ev_base, &data->event) != 0)
3897 				log_msg(LOG_ERR, "event base set tcpw failed");
3898 			if(event_add(&data->event, &timeout) != 0)
3899 				log_msg(LOG_ERR, "event add tcpw failed");
3900 
3901 			/*
3902 			 * Write data if/when the socket is writable
3903 			 * again.
3904 			 */
3905 			return;
3906 		}
3907 	}
3908 
3909 	/*
3910 	 * Done sending, wait for the next request to arrive on the
3911 	 * TCP socket by installing the TCP read handler.
3912 	 */
3913 	if ((data->nsd->tcp_query_count > 0 &&
3914 		data->query_count >= data->nsd->tcp_query_count) ||
3915 		data->tcp_no_more_queries) {
3916 
3917 		(void) shutdown(fd, SHUT_WR);
3918 	}
3919 
3920 	data->bytes_transmitted = 0;
3921 
3922 	timeout.tv_sec = data->tcp_timeout / 1000;
3923 	timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
3924 	ev_base = data->event.ev_base;
3925 	event_del(&data->event);
3926 	memset(&data->event, 0, sizeof(data->event));
3927 	event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
3928 		handle_tcp_reading, data);
3929 	if(event_base_set(ev_base, &data->event) != 0)
3930 		log_msg(LOG_ERR, "event base set tcpw failed");
3931 	if(event_add(&data->event, &timeout) != 0)
3932 		log_msg(LOG_ERR, "event add tcpw failed");
3933 }
3934 
3935 #ifdef HAVE_SSL
3936 /** create SSL object and associate fd */
3937 static SSL*
3938 incoming_ssl_fd(SSL_CTX* ctx, int fd)
3939 {
3940 	SSL* ssl = SSL_new((SSL_CTX*)ctx);
3941 	if(!ssl) {
3942 		log_crypto_err("could not SSL_new");
3943 		return NULL;
3944 	}
3945 	SSL_set_accept_state(ssl);
3946 	(void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
3947 	if(!SSL_set_fd(ssl, fd)) {
3948 		log_crypto_err("could not SSL_set_fd");
3949 		SSL_free(ssl);
3950 		return NULL;
3951 	}
3952 	return ssl;
3953 }
3954 
3955 /** TLS handshake to upgrade TCP connection */
3956 static int
3957 tls_handshake(struct tcp_handler_data* data, int fd, int writing)
3958 {
3959 	int r;
3960 	if(data->shake_state == tls_hs_read_event) {
3961 		/* read condition satisfied back to writing */
3962 		tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
3963 		data->shake_state = tls_hs_none;
3964 		return 1;
3965 	}
3966 	if(data->shake_state == tls_hs_write_event) {
3967 		/* write condition satisfied back to reading */
3968 		tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
3969 		data->shake_state = tls_hs_none;
3970 		return 1;
3971 	}
3972 
3973 	/* (continue to) setup the TLS connection */
3974 	ERR_clear_error();
3975 	r = SSL_do_handshake(data->tls);
3976 
3977 	if(r != 1) {
3978 		int want = SSL_get_error(data->tls, r);
3979 		if(want == SSL_ERROR_WANT_READ) {
3980 			if(data->shake_state == tls_hs_read) {
3981 				/* try again later */
3982 				return 1;
3983 			}
3984 			data->shake_state = tls_hs_read;
3985 			/* switch back to reading mode */
3986 			tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
3987 			return 1;
3988 		} else if(want == SSL_ERROR_WANT_WRITE) {
3989 			if(data->shake_state == tls_hs_write) {
3990 				/* try again later */
3991 				return 1;
3992 			}
3993 			data->shake_state = tls_hs_write;
3994 			/* switch back to writing mode */
3995 			tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
3996 			return 1;
3997 		} else {
3998 			if(r == 0)
3999 				VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely"));
4000 			else {
4001 				unsigned long err = ERR_get_error();
4002 				if(!squelch_err_ssl_handshake(err)) {
4003 					char a[64], s[256];
4004 					addr2str(&data->query->addr, a, sizeof(a));
4005 					snprintf(s, sizeof(s), "TLS handshake failed from %s", a);
4006 					log_crypto_from_err(s, err);
4007 				}
4008 			}
4009 			cleanup_tcp_handler(data);
4010 			return 0;
4011 		}
4012 	}
4013 
4014 	/* Use to log successful upgrade for testing - could be removed*/
4015 	VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded."));
4016 	/* set back to the event we need to have when reading (or writing) */
4017 	if(data->shake_state == tls_hs_read && writing) {
4018 		tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
4019 	} else if(data->shake_state == tls_hs_write && !writing) {
4020 		tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
4021 	}
4022 	data->shake_state = tls_hs_none;
4023 	return 1;
4024 }
4025 
4026 /** handle TLS reading of incoming query */
4027 static void
4028 handle_tls_reading(int fd, short event, void* arg)
4029 {
4030 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
4031 	ssize_t received;
4032 
4033 	if ((event & EV_TIMEOUT)) {
4034 		/* Connection timed out.  */
4035 		cleanup_tcp_handler(data);
4036 		return;
4037 	}
4038 
4039 	if ((data->nsd->tcp_query_count > 0 &&
4040 	    data->query_count >= data->nsd->tcp_query_count) ||
4041 	    data->tcp_no_more_queries) {
4042 		/* No more queries allowed on this tcp connection. */
4043 		cleanup_tcp_handler(data);
4044 		return;
4045 	}
4046 
4047 	assert((event & EV_READ));
4048 
4049 	if (data->bytes_transmitted == 0) {
4050 		query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
4051 	}
4052 
4053 	if(data->shake_state != tls_hs_none) {
4054 		if(!tls_handshake(data, fd, 0))
4055 			return;
4056 		if(data->shake_state != tls_hs_none)
4057 			return;
4058 	}
4059 
4060 	/*
4061 	 * Check if we received the leading packet length bytes yet.
4062 	 */
4063 	if(data->bytes_transmitted < sizeof(uint16_t)) {
4064 		ERR_clear_error();
4065 		if((received=SSL_read(data->tls, (char *) &data->query->tcplen
4066 		    + data->bytes_transmitted,
4067 		    sizeof(uint16_t) - data->bytes_transmitted)) <= 0) {
4068 			int want = SSL_get_error(data->tls, received);
4069 			if(want == SSL_ERROR_ZERO_RETURN) {
4070 				cleanup_tcp_handler(data);
4071 				return; /* shutdown, closed */
4072 			} else if(want == SSL_ERROR_WANT_READ) {
4073 				/* wants to be called again */
4074 				return;
4075 			}
4076 			else if(want == SSL_ERROR_WANT_WRITE) {
4077 				/* switch to writing */
4078 				data->shake_state = tls_hs_write_event;
4079 				tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
4080 				return;
4081 			}
4082 			cleanup_tcp_handler(data);
4083 			log_crypto_err("could not SSL_read");
4084 			return;
4085 		}
4086 
4087 		data->bytes_transmitted += received;
4088 		if (data->bytes_transmitted < sizeof(uint16_t)) {
4089 			/*
4090 			 * Not done with the tcplen yet, wait for more
4091 			 * data to become available.
4092 			 */
4093 			return;
4094 		}
4095 
4096 		assert(data->bytes_transmitted == sizeof(uint16_t));
4097 
4098 		data->query->tcplen = ntohs(data->query->tcplen);
4099 
4100 		/*
4101 		 * Minimum query size is:
4102 		 *
4103 		 *     Size of the header (12)
4104 		 *   + Root domain name   (1)
4105 		 *   + Query class        (2)
4106 		 *   + Query type         (2)
4107 		 */
4108 		if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
4109 			VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
4110 			cleanup_tcp_handler(data);
4111 			return;
4112 		}
4113 
4114 		if (data->query->tcplen > data->query->maxlen) {
4115 			VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
4116 			cleanup_tcp_handler(data);
4117 			return;
4118 		}
4119 
4120 		buffer_set_limit(data->query->packet, data->query->tcplen);
4121 	}
4122 
4123 	assert(buffer_remaining(data->query->packet) > 0);
4124 
4125 	/* Read the (remaining) query data.  */
4126 	ERR_clear_error();
4127 	received = SSL_read(data->tls, (void*)buffer_current(data->query->packet),
4128 			    (int)buffer_remaining(data->query->packet));
4129 	if(received <= 0) {
4130 		int want = SSL_get_error(data->tls, received);
4131 		if(want == SSL_ERROR_ZERO_RETURN) {
4132 			cleanup_tcp_handler(data);
4133 			return; /* shutdown, closed */
4134 		} else if(want == SSL_ERROR_WANT_READ) {
4135 			/* wants to be called again */
4136 			return;
4137 		}
4138 		else if(want == SSL_ERROR_WANT_WRITE) {
4139 			/* switch back writing */
4140 			data->shake_state = tls_hs_write_event;
4141 			tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
4142 			return;
4143 		}
4144 		cleanup_tcp_handler(data);
4145 		log_crypto_err("could not SSL_read");
4146 		return;
4147 	}
4148 
4149 	data->bytes_transmitted += received;
4150 	buffer_skip(data->query->packet, received);
4151 	if (buffer_remaining(data->query->packet) > 0) {
4152 		/*
4153 		 * Message not yet complete, wait for more data to
4154 		 * become available.
4155 		 */
4156 		return;
4157 	}
4158 
4159 	assert(buffer_position(data->query->packet) == data->query->tcplen);
4160 
4161 	/* Account... */
4162 #ifndef INET6
4163 	STATUP(data->nsd, ctls);
4164 #else
4165 	if (data->query->addr.ss_family == AF_INET) {
4166 		STATUP(data->nsd, ctls);
4167 	} else if (data->query->addr.ss_family == AF_INET6) {
4168 		STATUP(data->nsd, ctls6);
4169 	}
4170 #endif
4171 
4172 	/* We have a complete query, process it.  */
4173 
4174 	/* tcp-query-count: handle query counter ++ */
4175 	data->query_count++;
4176 
4177 	buffer_flip(data->query->packet);
4178 #ifdef USE_DNSTAP
4179 	/*
4180 	 * and send TCP-query with found address (local) and client address to dnstap process
4181 	 */
4182 	log_addr("query from client", &data->query->addr, data->query->addr.ss_family);
4183 	log_addr("to server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
4184 	dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
4185 		data->query->addrlen, data->query->tcp, data->query->packet);
4186 #endif /* USE_DNSTAP */
4187 	data->query_state = server_process_query(data->nsd, data->query);
4188 	if (data->query_state == QUERY_DISCARDED) {
4189 		/* Drop the packet and the entire connection... */
4190 		STATUP(data->nsd, dropped);
4191 		ZTATUP(data->nsd, data->query->zone, dropped);
4192 		cleanup_tcp_handler(data);
4193 		return;
4194 	}
4195 
4196 #ifdef BIND8_STATS
4197 	if (RCODE(data->query->packet) == RCODE_OK
4198 	    && !AA(data->query->packet))
4199 	{
4200 		STATUP(data->nsd, nona);
4201 		ZTATUP(data->nsd, data->query->zone, nona);
4202 	}
4203 #endif /* BIND8_STATS */
4204 
4205 #ifdef USE_ZONE_STATS
4206 #ifndef INET6
4207 	ZTATUP(data->nsd, data->query->zone, ctls);
4208 #else
4209 	if (data->query->addr.ss_family == AF_INET) {
4210 		ZTATUP(data->nsd, data->query->zone, ctls);
4211 	} else if (data->query->addr.ss_family == AF_INET6) {
4212 		ZTATUP(data->nsd, data->query->zone, ctls6);
4213 	}
4214 #endif
4215 #endif /* USE_ZONE_STATS */
4216 
4217 	query_add_optional(data->query, data->nsd);
4218 
4219 	/* Switch to the tcp write handler.  */
4220 	buffer_flip(data->query->packet);
4221 	data->query->tcplen = buffer_remaining(data->query->packet);
4222 #ifdef BIND8_STATS
4223 	/* Account the rcode & TC... */
4224 	STATUP2(data->nsd, rcode, RCODE(data->query->packet));
4225 	ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
4226 	if (TC(data->query->packet)) {
4227 		STATUP(data->nsd, truncated);
4228 		ZTATUP(data->nsd, data->query->zone, truncated);
4229 	}
4230 #endif /* BIND8_STATS */
4231 #ifdef USE_DNSTAP
4232 	/*
4233 	 * sending TCP-response with found (earlier) address (local) and client address to dnstap process
4234 	 */
4235 	log_addr("from server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
4236 	log_addr("response to client", &data->query->addr, data->query->addr.ss_family);
4237 	dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
4238 		data->query->addrlen, data->query->tcp, data->query->packet,
4239 		data->query->zone);
4240 #endif /* USE_DNSTAP */
4241 	data->bytes_transmitted = 0;
4242 
4243 	tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
4244 
4245 	/* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
4246 	handle_tls_writing(fd, EV_WRITE, data);
4247 }
4248 
4249 /** handle TLS writing of outgoing response */
4250 static void
4251 handle_tls_writing(int fd, short event, void* arg)
4252 {
4253 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
4254 	ssize_t sent;
4255 	struct query *q = data->query;
4256 	/* static variable that holds reassembly buffer used to put the
4257 	 * TCP length in front of the packet, like writev. */
4258 	static buffer_type* global_tls_temp_buffer = NULL;
4259 	buffer_type* write_buffer;
4260 
4261 	if ((event & EV_TIMEOUT)) {
4262 		/* Connection timed out.  */
4263 		cleanup_tcp_handler(data);
4264 		return;
4265 	}
4266 
4267 	assert((event & EV_WRITE));
4268 
4269 	if(data->shake_state != tls_hs_none) {
4270 		if(!tls_handshake(data, fd, 1))
4271 			return;
4272 		if(data->shake_state != tls_hs_none)
4273 			return;
4274 	}
4275 
4276 	(void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE);
4277 
4278 	/* If we are writing the start of a message, we must include the length
4279 	 * this is done with a copy into write_buffer. */
4280 	write_buffer = NULL;
4281 	if (data->bytes_transmitted == 0) {
4282 		if(!global_tls_temp_buffer) {
4283 			/* gets deallocated when nsd shuts down from
4284 			 * nsd.region */
4285 			global_tls_temp_buffer = buffer_create(nsd.region,
4286 				QIOBUFSZ + sizeof(q->tcplen));
4287 			if (!global_tls_temp_buffer) {
4288 				return;
4289 			}
4290 		}
4291 		write_buffer = global_tls_temp_buffer;
4292 		buffer_clear(write_buffer);
4293 		buffer_write_u16(write_buffer, q->tcplen);
4294 		buffer_write(write_buffer, buffer_current(q->packet),
4295 			(int)buffer_remaining(q->packet));
4296 		buffer_flip(write_buffer);
4297 	} else {
4298 		write_buffer = q->packet;
4299 	}
4300 
4301 	/* Write the response */
4302 	ERR_clear_error();
4303 	sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer));
4304 	if(sent <= 0) {
4305 		int want = SSL_get_error(data->tls, sent);
4306 		if(want == SSL_ERROR_ZERO_RETURN) {
4307 			cleanup_tcp_handler(data);
4308 			/* closed */
4309 		} else if(want == SSL_ERROR_WANT_READ) {
4310 			/* switch back to reading */
4311 			data->shake_state = tls_hs_read_event;
4312 			tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
4313 		} else if(want != SSL_ERROR_WANT_WRITE) {
4314 			cleanup_tcp_handler(data);
4315 			log_crypto_err("could not SSL_write");
4316 		}
4317 		return;
4318 	}
4319 
4320 	buffer_skip(write_buffer, sent);
4321 	if(buffer_remaining(write_buffer) != 0) {
4322 		/* If not all sent, sync up the real buffer if it wasn't used.*/
4323 		if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) {
4324 			buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen));
4325 		}
4326 	}
4327 
4328 	data->bytes_transmitted += sent;
4329 	if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
4330 		/*
4331 		 * Still more data to write when socket becomes
4332 		 * writable again.
4333 		 */
4334 		return;
4335 	}
4336 
4337 	assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
4338 
4339 	if (data->query_state == QUERY_IN_AXFR) {
4340 		/* Continue processing AXFR and writing back results.  */
4341 		buffer_clear(q->packet);
4342 		data->query_state = query_axfr(data->nsd, q);
4343 		if (data->query_state != QUERY_PROCESSED) {
4344 			query_add_optional(data->query, data->nsd);
4345 
4346 			/* Reset data. */
4347 			buffer_flip(q->packet);
4348 			q->tcplen = buffer_remaining(q->packet);
4349 			data->bytes_transmitted = 0;
4350 			/* Reset to writing mode.  */
4351 			tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
4352 
4353 			/*
4354 			 * Write data if/when the socket is writable
4355 			 * again.
4356 			 */
4357 			return;
4358 		}
4359 	}
4360 
4361 	/*
4362 	 * Done sending, wait for the next request to arrive on the
4363 	 * TCP socket by installing the TCP read handler.
4364 	 */
4365 	if ((data->nsd->tcp_query_count > 0 &&
4366 		data->query_count >= data->nsd->tcp_query_count) ||
4367 		data->tcp_no_more_queries) {
4368 
4369 		(void) shutdown(fd, SHUT_WR);
4370 	}
4371 
4372 	data->bytes_transmitted = 0;
4373 
4374 	tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
4375 }
4376 #endif
4377 
4378 static void
4379 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
4380 	void* ATTR_UNUSED(arg))
4381 {
4382 	if(slowaccept) {
4383 		configure_handler_event_types(EV_PERSIST | EV_READ);
4384 		slowaccept = 0;
4385 	}
4386 }
4387 
4388 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
4389 {
4390 #ifndef HAVE_ACCEPT4
4391 	int s = accept(fd, addr, addrlen);
4392 	if (s != -1) {
4393 		if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
4394 			log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
4395 			close(s);
4396 			s = -1;
4397 			errno=EINTR; /* stop error printout as error in accept4
4398 				by setting this errno, it omits printout, in
4399 				later code that calls nsd_accept4 */
4400 		}
4401 	}
4402 	return s;
4403 #else
4404 	return accept4(fd, addr, addrlen, SOCK_NONBLOCK);
4405 #endif /* HAVE_ACCEPT4 */
4406 }
4407 
4408 /*
4409  * Handle an incoming TCP connection.  The connection is accepted and
4410  * a new TCP reader event handler is added.  The TCP handler
4411  * is responsible for cleanup when the connection is closed.
4412  */
4413 static void
4414 handle_tcp_accept(int fd, short event, void* arg)
4415 {
4416 	struct tcp_accept_handler_data *data
4417 		= (struct tcp_accept_handler_data *) arg;
4418 	int s;
4419 	int reject = 0;
4420 	struct tcp_handler_data *tcp_data;
4421 	region_type *tcp_region;
4422 #ifdef INET6
4423 	struct sockaddr_storage addr;
4424 #else
4425 	struct sockaddr_in addr;
4426 #endif
4427 	socklen_t addrlen;
4428 	struct timeval timeout;
4429 
4430 	if (!(event & EV_READ)) {
4431 		return;
4432 	}
4433 
4434 	if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
4435 		reject = data->nsd->options->tcp_reject_overflow;
4436 		if (!reject) {
4437 			return;
4438 		}
4439 	}
4440 
4441 	/* Accept it... */
4442 	addrlen = sizeof(addr);
4443 	s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen);
4444 	if (s == -1) {
4445 		/**
4446 		 * EMFILE and ENFILE is a signal that the limit of open
4447 		 * file descriptors has been reached. Pause accept().
4448 		 * EINTR is a signal interrupt. The others are various OS ways
4449 		 * of saying that the client has closed the connection.
4450 		 */
4451 		if (errno == EMFILE || errno == ENFILE) {
4452 			if (!slowaccept) {
4453 				/* disable accept events */
4454 				struct timeval tv;
4455 				configure_handler_event_types(0);
4456 				tv.tv_sec = SLOW_ACCEPT_TIMEOUT;
4457 				tv.tv_usec = 0L;
4458 				memset(&slowaccept_event, 0,
4459 					sizeof(slowaccept_event));
4460 				event_set(&slowaccept_event, -1, EV_TIMEOUT,
4461 					handle_slowaccept_timeout, NULL);
4462 				(void)event_base_set(data->event.ev_base,
4463 					&slowaccept_event);
4464 				(void)event_add(&slowaccept_event, &tv);
4465 				slowaccept = 1;
4466 				/* We don't want to spam the logs here */
4467 			}
4468 		} else if (errno != EINTR
4469 			&& errno != EWOULDBLOCK
4470 #ifdef ECONNABORTED
4471 			&& errno != ECONNABORTED
4472 #endif /* ECONNABORTED */
4473 #ifdef EPROTO
4474 			&& errno != EPROTO
4475 #endif /* EPROTO */
4476 			) {
4477 			log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
4478 		}
4479 		return;
4480 	}
4481 
4482 	if (reject) {
4483 		shutdown(s, SHUT_RDWR);
4484 		close(s);
4485 		return;
4486 	}
4487 
4488 	/*
4489 	 * This region is deallocated when the TCP connection is
4490 	 * closed by the TCP handler.
4491 	 */
4492 	tcp_region = region_create(xalloc, free);
4493 	tcp_data = (struct tcp_handler_data *) region_alloc(
4494 		tcp_region, sizeof(struct tcp_handler_data));
4495 	tcp_data->region = tcp_region;
4496 	tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
4497 		compression_table_size, compressed_dnames);
4498 	tcp_data->nsd = data->nsd;
4499 	tcp_data->query_count = 0;
4500 #ifdef HAVE_SSL
4501 	tcp_data->shake_state = tls_hs_none;
4502 	tcp_data->tls = NULL;
4503 #endif
4504 	tcp_data->prev = NULL;
4505 	tcp_data->next = NULL;
4506 
4507 	tcp_data->query_state = QUERY_PROCESSED;
4508 	tcp_data->bytes_transmitted = 0;
4509 	memcpy(&tcp_data->query->addr, &addr, addrlen);
4510 	tcp_data->query->addrlen = addrlen;
4511 
4512 	tcp_data->tcp_no_more_queries = 0;
4513 	tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000;
4514 	if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) {
4515 		/* very busy, give smaller timeout */
4516 		tcp_data->tcp_timeout = 200;
4517 	}
4518 	memset(&tcp_data->event, 0, sizeof(tcp_data->event));
4519 	timeout.tv_sec = tcp_data->tcp_timeout / 1000;
4520 	timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000;
4521 
4522 #ifdef USE_DNSTAP
4523 	/* save the address of the connection */
4524 	tcp_data->socket = data->socket;
4525 #endif /* USE_DNSTAP */
4526 
4527 #ifdef HAVE_SSL
4528 	if (data->tls_accept) {
4529 		tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s);
4530 		if(!tcp_data->tls) {
4531 			close(s);
4532 			return;
4533 		}
4534 		tcp_data->shake_state = tls_hs_read;
4535 		memset(&tcp_data->event, 0, sizeof(tcp_data->event));
4536 		event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
4537 			  handle_tls_reading, tcp_data);
4538 	} else {
4539 #endif
4540 		memset(&tcp_data->event, 0, sizeof(tcp_data->event));
4541 		event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
4542 			  handle_tcp_reading, tcp_data);
4543 #ifdef HAVE_SSL
4544 	}
4545 #endif
4546 	if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) {
4547 		log_msg(LOG_ERR, "cannot set tcp event base");
4548 		close(s);
4549 		region_destroy(tcp_region);
4550 		return;
4551 	}
4552 	if(event_add(&tcp_data->event, &timeout) != 0) {
4553 		log_msg(LOG_ERR, "cannot add tcp to event base");
4554 		close(s);
4555 		region_destroy(tcp_region);
4556 		return;
4557 	}
4558 	if(tcp_active_list) {
4559 		tcp_active_list->prev = tcp_data;
4560 		tcp_data->next = tcp_active_list;
4561 	}
4562 	tcp_active_list = tcp_data;
4563 
4564 	/*
4565 	 * Keep track of the total number of TCP handlers installed so
4566 	 * we can stop accepting connections when the maximum number
4567 	 * of simultaneous TCP connections is reached.
4568 	 *
4569 	 * If tcp-reject-overflow is enabled, however, then we do not
4570 	 * change the handler event type; we keep it as-is and accept
4571 	 * overflow TCP connections only so that we can forcibly kill
4572 	 * them off.
4573 	 */
4574 	++data->nsd->current_tcp_count;
4575 	if (!data->nsd->options->tcp_reject_overflow &&
4576 	     data->nsd->current_tcp_count == data->nsd->maximum_tcp_count)
4577 	{
4578 		configure_handler_event_types(0);
4579 	}
4580 }
4581 
4582 static void
4583 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout)
4584 {
4585 	size_t i;
4586 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
4587 	for (i = 0; i < nsd->child_count; ++i) {
4588 		if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
4589 			if (write(nsd->children[i].child_fd,
4590 				&command,
4591 				sizeof(command)) == -1)
4592 			{
4593 				if(errno != EAGAIN && errno != EINTR)
4594 					log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
4595 					(int) command,
4596 					(int) nsd->children[i].pid,
4597 					strerror(errno));
4598 			} else if (timeout > 0) {
4599 				(void)block_read(NULL,
4600 					nsd->children[i].child_fd,
4601 					&command, sizeof(command), timeout);
4602 			}
4603 			fsync(nsd->children[i].child_fd);
4604 			close(nsd->children[i].child_fd);
4605 			nsd->children[i].child_fd = -1;
4606 		}
4607 	}
4608 }
4609 
4610 static void
4611 send_children_quit(struct nsd* nsd)
4612 {
4613 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit"));
4614 	send_children_command(nsd, NSD_QUIT, 0);
4615 }
4616 
4617 static void
4618 send_children_quit_and_wait(struct nsd* nsd)
4619 {
4620 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait"));
4621 	send_children_command(nsd, NSD_QUIT_CHILD, 3);
4622 }
4623 
4624 #ifdef BIND8_STATS
4625 static void
4626 set_children_stats(struct nsd* nsd)
4627 {
4628 	size_t i;
4629 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
4630 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
4631 	for (i = 0; i < nsd->child_count; ++i) {
4632 		nsd->children[i].need_to_send_STATS = 1;
4633 		nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
4634 	}
4635 }
4636 #endif /* BIND8_STATS */
4637 
4638 static void
4639 configure_handler_event_types(short event_types)
4640 {
4641 	size_t i;
4642 
4643 	for (i = 0; i < tcp_accept_handler_count; ++i) {
4644 		struct event* handler = &tcp_accept_handlers[i].event;
4645 		if(event_types) {
4646 			/* reassign */
4647 			int fd = handler->ev_fd;
4648 			struct event_base* base = handler->ev_base;
4649 			if(tcp_accept_handlers[i].event_added)
4650 				event_del(handler);
4651 			memset(handler, 0, sizeof(*handler));
4652 			event_set(handler, fd, event_types,
4653 				handle_tcp_accept, &tcp_accept_handlers[i]);
4654 			if(event_base_set(base, handler) != 0)
4655 				log_msg(LOG_ERR, "conhand: cannot event_base");
4656 			if(event_add(handler, NULL) != 0)
4657 				log_msg(LOG_ERR, "conhand: cannot event_add");
4658 			tcp_accept_handlers[i].event_added = 1;
4659 		} else {
4660 			/* remove */
4661 			if(tcp_accept_handlers[i].event_added) {
4662 				event_del(handler);
4663 				tcp_accept_handlers[i].event_added = 0;
4664 			}
4665 		}
4666 	}
4667 }
4668