xref: /openbsd-src/usr.sbin/nsd/server.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*
2  * server.c -- nsd(8) network input/output
3  *
4  * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/param.h>
14 #include <sys/socket.h>
15 #include <sys/wait.h>
16 
17 #include <netinet/in.h>
18 #include <arpa/inet.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <netdb.h>
32 #ifndef SHUT_WR
33 #define SHUT_WR 1
34 #endif
35 
36 #include "axfr.h"
37 #include "namedb.h"
38 #include "netio.h"
39 #include "xfrd.h"
40 #include "xfrd-tcp.h"
41 #include "difffile.h"
42 #include "nsec3.h"
43 #include "ipc.h"
44 
45 /*
46  * Data for the UDP handlers.
47  */
48 struct udp_handler_data
49 {
50 	struct nsd        *nsd;
51 	struct nsd_socket *socket;
52 	query_type        *query;
53 };
54 
55 /*
56  * Data for the TCP accept handlers.  Most data is simply passed along
57  * to the TCP connection handler.
58  */
59 struct tcp_accept_handler_data {
60 	struct nsd         *nsd;
61 	struct nsd_socket  *socket;
62 	size_t              tcp_accept_handler_count;
63 	netio_handler_type *tcp_accept_handlers;
64 };
65 
66 int slowaccept;
67 struct timespec slowaccept_timeout;
68 
69 /*
70  * Data for the TCP connection handlers.
71  *
72  * The TCP handlers use non-blocking I/O.  This is necessary to avoid
73  * blocking the entire server on a slow TCP connection, but does make
74  * reading from and writing to the socket more complicated.
75  *
76  * Basically, whenever a read/write would block (indicated by the
77  * EAGAIN errno variable) we remember the position we were reading
78  * from/writing to and return from the TCP reading/writing event
79  * handler.  When the socket becomes readable/writable again we
80  * continue from the same position.
81  */
82 struct tcp_handler_data
83 {
84 	/*
85 	 * The region used to allocate all TCP connection related
86 	 * data, including this structure.  This region is destroyed
87 	 * when the connection is closed.
88 	 */
89 	region_type     *region;
90 
91 	/*
92 	 * The global nsd structure.
93 	 */
94 	struct nsd      *nsd;
95 
96 	/*
97 	 * The current query data for this TCP connection.
98 	 */
99 	query_type      *query;
100 
101 	/*
102 	 * These fields are used to enable the TCP accept handlers
103 	 * when the number of TCP connection drops below the maximum
104 	 * number of TCP connections.
105 	 */
106 	size_t              tcp_accept_handler_count;
107 	netio_handler_type *tcp_accept_handlers;
108 
109 	/*
110 	 * The query_state is used to remember if we are performing an
111 	 * AXFR, if we're done processing, or if we should discard the
112 	 * query and connection.
113 	 */
114 	query_state_type query_state;
115 
116 	/*
117 	 * The bytes_transmitted field is used to remember the number
118 	 * of bytes transmitted when receiving or sending a DNS
119 	 * packet.  The count includes the two additional bytes used
120 	 * to specify the packet length on a TCP connection.
121 	 */
122 	size_t           bytes_transmitted;
123 
124 	/*
125 	 * The number of queries handled by this specific TCP connection.
126 	 */
127 	int					query_count;
128 };
129 
130 /*
131  * Handle incoming queries on the UDP server sockets.
132  */
133 static void handle_udp(netio_type *netio,
134 		       netio_handler_type *handler,
135 		       netio_event_types_type event_types);
136 
137 /*
138  * Handle incoming connections on the TCP sockets.  These handlers
139  * usually wait for the NETIO_EVENT_READ event (indicating an incoming
140  * connection) but are disabled when the number of current TCP
141  * connections is equal to the maximum number of TCP connections.
142  * Disabling is done by changing the handler to wait for the
143  * NETIO_EVENT_NONE type.  This is done using the function
144  * configure_tcp_accept_handlers.
145  */
146 static void handle_tcp_accept(netio_type *netio,
147 			      netio_handler_type *handler,
148 			      netio_event_types_type event_types);
149 
150 /*
151  * Handle incoming queries on a TCP connection.  The TCP connections
152  * are configured to be non-blocking and the handler may be called
153  * multiple times before a complete query is received.
154  */
155 static void handle_tcp_reading(netio_type *netio,
156 			       netio_handler_type *handler,
157 			       netio_event_types_type event_types);
158 
159 /*
160  * Handle outgoing responses on a TCP connection.  The TCP connections
161  * are configured to be non-blocking and the handler may be called
162  * multiple times before a complete response is sent.
163  */
164 static void handle_tcp_writing(netio_type *netio,
165 			       netio_handler_type *handler,
166 			       netio_event_types_type event_types);
167 
168 /*
169  * Send all children the quit nonblocking, then close pipe.
170  */
171 static void send_children_quit(struct nsd* nsd);
172 
173 /* set childrens flags to send NSD_STATS to them */
174 #ifdef BIND8_STATS
175 static void set_children_stats(struct nsd* nsd);
176 #endif /* BIND8_STATS */
177 
178 /*
179  * Change the event types the HANDLERS are interested in to
180  * EVENT_TYPES.
181  */
182 static void configure_handler_event_types(size_t count,
183 					  netio_handler_type *handlers,
184 					  netio_event_types_type event_types);
185 
186 /*
187  * start xfrdaemon (again).
188  */
189 static pid_t
190 server_start_xfrd(struct nsd *nsd, netio_handler_type* handler);
191 
192 static uint16_t *compressed_dname_offsets = 0;
193 static uint32_t compression_table_capacity = 0;
194 static uint32_t compression_table_size = 0;
195 
196 /*
197  * Remove the specified pid from the list of child pids.  Returns -1 if
198  * the pid is not in the list, child_num otherwise.  The field is set to 0.
199  */
200 static int
201 delete_child_pid(struct nsd *nsd, pid_t pid)
202 {
203 	size_t i;
204 	for (i = 0; i < nsd->child_count; ++i) {
205 		if (nsd->children[i].pid == pid) {
206 			nsd->children[i].pid = 0;
207 			if(!nsd->children[i].need_to_exit) {
208 				if(nsd->children[i].child_fd != -1)
209 					close(nsd->children[i].child_fd);
210 				nsd->children[i].child_fd = -1;
211 				if(nsd->children[i].handler)
212 					nsd->children[i].handler->fd = -1;
213 			}
214 			return i;
215 		}
216 	}
217 	return -1;
218 }
219 
220 /*
221  * Restart child servers if necessary.
222  */
223 static int
224 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
225 	int* xfrd_sock_p)
226 {
227 	struct main_ipc_handler_data *ipc_data;
228 	size_t i;
229 	int sv[2];
230 
231 	/* Fork the child processes... */
232 	for (i = 0; i < nsd->child_count; ++i) {
233 		if (nsd->children[i].pid <= 0) {
234 			if (nsd->children[i].child_fd != -1)
235 				close(nsd->children[i].child_fd);
236 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
237 				log_msg(LOG_ERR, "socketpair: %s",
238 					strerror(errno));
239 				return -1;
240 			}
241 			nsd->children[i].child_fd = sv[0];
242 			nsd->children[i].parent_fd = sv[1];
243 			nsd->children[i].pid = fork();
244 			switch (nsd->children[i].pid) {
245 			default: /* SERVER MAIN */
246 				close(nsd->children[i].parent_fd);
247 				nsd->children[i].parent_fd = -1;
248 				if(!nsd->children[i].handler)
249 				{
250 					ipc_data = (struct main_ipc_handler_data*) region_alloc(
251 						region, sizeof(struct main_ipc_handler_data));
252 					ipc_data->nsd = nsd;
253 					ipc_data->child = &nsd->children[i];
254 					ipc_data->child_num = i;
255 					ipc_data->xfrd_sock = xfrd_sock_p;
256 					ipc_data->packet = buffer_create(region, QIOBUFSZ);
257 					ipc_data->forward_mode = 0;
258 					ipc_data->got_bytes = 0;
259 					ipc_data->total_bytes = 0;
260 					ipc_data->acl_num = 0;
261 					ipc_data->busy_writing_zone_state = 0;
262 					ipc_data->write_conn = xfrd_tcp_create(region);
263 					nsd->children[i].handler = (struct netio_handler*) region_alloc(
264 						region, sizeof(struct netio_handler));
265 					nsd->children[i].handler->fd = nsd->children[i].child_fd;
266 					nsd->children[i].handler->timeout = NULL;
267 					nsd->children[i].handler->user_data = ipc_data;
268 					nsd->children[i].handler->event_types = NETIO_EVENT_READ;
269 					nsd->children[i].handler->event_handler = parent_handle_child_command;
270 					netio_add_handler(netio, nsd->children[i].handler);
271 				}
272 				/* clear any ongoing ipc */
273 				ipc_data = (struct main_ipc_handler_data*)
274 					nsd->children[i].handler->user_data;
275 				ipc_data->forward_mode = 0;
276 				ipc_data->busy_writing_zone_state = 0;
277 				/* restart - update fd */
278 				nsd->children[i].handler->fd = nsd->children[i].child_fd;
279 				break;
280 			case 0: /* CHILD */
281 				nsd->pid = 0;
282 				nsd->child_count = 0;
283 				nsd->server_kind = nsd->children[i].kind;
284 				nsd->this_child = &nsd->children[i];
285 				/* remove signal flags inherited from parent
286 				   the parent will handle them. */
287 				nsd->signal_hint_reload = 0;
288 				nsd->signal_hint_child = 0;
289 				nsd->signal_hint_quit = 0;
290 				nsd->signal_hint_shutdown = 0;
291 				nsd->signal_hint_stats = 0;
292 				nsd->signal_hint_statsusr = 0;
293 				close(nsd->this_child->child_fd);
294 				nsd->this_child->child_fd = -1;
295 				server_child(nsd);
296 				/* NOTREACH */
297 				exit(0);
298 			case -1:
299 				log_msg(LOG_ERR, "fork failed: %s",
300 					strerror(errno));
301 				return -1;
302 			}
303 		}
304 	}
305 	return 0;
306 }
307 
308 #ifdef BIND8_STATS
309 static void set_bind8_alarm(struct nsd* nsd)
310 {
311 	/* resync so that the next alarm is on the next whole minute */
312 	if(nsd->st.period > 0) /* % by 0 gives divbyzero error */
313 		alarm(nsd->st.period - (time(NULL) % nsd->st.period));
314 }
315 #endif
316 
317 static void
318 cleanup_dname_compression_tables(void *ptr)
319 {
320 	free(ptr);
321 	compressed_dname_offsets = NULL;
322 	compression_table_capacity = 0;
323 }
324 
325 static void
326 initialize_dname_compression_tables(struct nsd *nsd)
327 {
328 	size_t needed = domain_table_count(nsd->db->domains) + 1;
329 	needed += EXTRA_DOMAIN_NUMBERS;
330 	if(compression_table_capacity < needed) {
331 		if(compressed_dname_offsets) {
332 			region_remove_cleanup(nsd->db->region,
333 				cleanup_dname_compression_tables,
334 				compressed_dname_offsets);
335 			free(compressed_dname_offsets);
336 		}
337 		compressed_dname_offsets = (uint16_t *) xalloc(
338 			needed * sizeof(uint16_t));
339 		region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
340 			compressed_dname_offsets);
341 		compression_table_capacity = needed;
342 		compression_table_size=domain_table_count(nsd->db->domains)+1;
343 	}
344 	memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
345 	compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
346 }
347 
348 /*
349  * Initialize the server, create and bind the sockets.
350  *
351  */
352 int
353 server_init(struct nsd *nsd)
354 {
355 	size_t i;
356 #if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)))
357 	int on = 1;
358 #endif
359 
360 	/* UDP */
361 
362 	/* Make a socket... */
363 	for (i = 0; i < nsd->ifs; i++) {
364 		if (!nsd->udp[i].addr) {
365 			nsd->udp[i].s = -1;
366 			continue;
367 		}
368 		if ((nsd->udp[i].s = socket(nsd->udp[i].addr->ai_family, nsd->udp[i].addr->ai_socktype, 0)) == -1) {
369 #if defined(INET6)
370 			if (nsd->udp[i].addr->ai_family == AF_INET6 &&
371 				errno == EAFNOSUPPORT && nsd->grab_ip6_optional) {
372 				log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported");
373 				continue;
374 			}
375 #endif /* INET6 */
376 			log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
377 			return -1;
378 		}
379 
380 #if defined(INET6)
381 		if (nsd->udp[i].addr->ai_family == AF_INET6) {
382 # if defined(IPV6_V6ONLY)
383 			if (setsockopt(nsd->udp[i].s,
384 				       IPPROTO_IPV6, IPV6_V6ONLY,
385 				       &on, sizeof(on)) < 0)
386 			{
387 				log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
388 					strerror(errno));
389 				return -1;
390 			}
391 # endif
392 # if defined(IPV6_USE_MIN_MTU)
393 			/*
394 			 * There is no fragmentation of IPv6 datagrams
395 			 * during forwarding in the network. Therefore
396 			 * we do not send UDP datagrams larger than
397 			 * the minimum IPv6 MTU of 1280 octets. The
398 			 * EDNS0 message length can be larger if the
399 			 * network stack supports IPV6_USE_MIN_MTU.
400 			 */
401 			if (setsockopt(nsd->udp[i].s,
402 				       IPPROTO_IPV6, IPV6_USE_MIN_MTU,
403 				       &on, sizeof(on)) < 0)
404 			{
405 				log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s",
406 					strerror(errno));
407 				return -1;
408 			}
409 # elif defined(IPV6_MTU)
410 			/*
411 			 * On Linux, PMTUD is disabled by default for datagrams
412 			 * so set the MTU equal to the MIN MTU to get the same.
413 			 */
414 			on = IPV6_MIN_MTU;
415 			if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU,
416 				&on, sizeof(on)) < 0)
417 			{
418 				log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s",
419 					strerror(errno));
420 				return -1;
421 			}
422 			on = 1;
423 # endif
424 		}
425 #endif
426 #if defined(AF_INET)
427 		if (nsd->udp[i].addr->ai_family == AF_INET) {
428 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
429 			int action = IP_PMTUDISC_DONT;
430 			if (setsockopt(nsd->udp[i].s, IPPROTO_IP,
431 				IP_MTU_DISCOVER, &action, sizeof(action)) < 0)
432 			{
433 				log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
434 					strerror(errno));
435 				return -1;
436 			}
437 #  elif defined(IP_DONTFRAG)
438 			int off = 0;
439 			if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG,
440 				&off, sizeof(off)) < 0)
441 			{
442 				log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
443 					strerror(errno));
444 				return -1;
445 			}
446 #  endif
447 		}
448 #endif
449 		/* set it nonblocking */
450 		/* otherwise, on OSes with thundering herd problems, the
451 		   UDP recv could block NSD after select returns readable. */
452 		if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) {
453 			log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno));
454 		}
455 
456 		/* Bind it... */
457 		if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) {
458 			log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno));
459 			return -1;
460 		}
461 	}
462 
463 	/* TCP */
464 
465 	/* Make a socket... */
466 	for (i = 0; i < nsd->ifs; i++) {
467 		if (!nsd->tcp[i].addr) {
468 			nsd->tcp[i].s = -1;
469 			continue;
470 		}
471 		if ((nsd->tcp[i].s = socket(nsd->tcp[i].addr->ai_family, nsd->tcp[i].addr->ai_socktype, 0)) == -1) {
472 #if defined(INET6)
473 			if (nsd->tcp[i].addr->ai_family == AF_INET6 &&
474 				errno == EAFNOSUPPORT && nsd->grab_ip6_optional) {
475 				log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported");
476 				continue;
477 			}
478 #endif /* INET6 */
479 			log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
480 			return -1;
481 		}
482 
483 #ifdef	SO_REUSEADDR
484 		if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) {
485 			log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno));
486 		}
487 #endif /* SO_REUSEADDR */
488 
489 #if defined(INET6) && defined(IPV6_V6ONLY)
490 		if (nsd->tcp[i].addr->ai_family == AF_INET6 &&
491 		    setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0)
492 		{
493 			log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno));
494 			return -1;
495 		}
496 #endif
497 		/* set it nonblocking */
498 		/* (StevensUNP p463), if tcp listening socket is blocking, then
499 		   it may block in accept, even if select() says readable. */
500 		if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) {
501 			log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno));
502 		}
503 
504 		/* Bind it... */
505 		if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) {
506 			log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno));
507 			return -1;
508 		}
509 
510 		/* Listen to it... */
511 		if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) {
512 			log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
513 			return -1;
514 		}
515 	}
516 
517 	return 0;
518 }
519 
520 /*
521  * Prepare the server for take off.
522  *
523  */
524 int
525 server_prepare(struct nsd *nsd)
526 {
527 	/* Open the database... */
528 	if ((nsd->db = namedb_open(nsd->dbfile, nsd->options, nsd->child_count)) == NULL) {
529 		log_msg(LOG_ERR, "unable to open the database %s: %s",
530 			nsd->dbfile, strerror(errno));
531 		return -1;
532 	}
533 
534 	/* Read diff file */
535 	if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) {
536 		log_msg(LOG_ERR, "The diff file contains errors. Will continue "
537 						 "without it");
538 	}
539 
540 #ifdef NSEC3
541 	prehash(nsd->db, 0);
542 #endif
543 
544 	compression_table_capacity = 0;
545 	initialize_dname_compression_tables(nsd);
546 
547 #ifdef	BIND8_STATS
548 	/* Initialize times... */
549 	time(&nsd->st.boot);
550 	set_bind8_alarm(nsd);
551 #endif /* BIND8_STATS */
552 
553 	return 0;
554 }
555 
556 /*
557  * Fork the required number of servers.
558  */
559 static int
560 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
561 	int* xfrd_sock_p)
562 {
563 	size_t i;
564 
565 	/* Start all child servers initially.  */
566 	for (i = 0; i < nsd->child_count; ++i) {
567 		nsd->children[i].pid = 0;
568 	}
569 
570 	return restart_child_servers(nsd, region, netio, xfrd_sock_p);
571 }
572 
573 static void
574 close_all_sockets(struct nsd_socket sockets[], size_t n)
575 {
576 	size_t i;
577 
578 	/* Close all the sockets... */
579 	for (i = 0; i < n; ++i) {
580 		if (sockets[i].s != -1) {
581 			close(sockets[i].s);
582 			freeaddrinfo(sockets[i].addr);
583 			sockets[i].s = -1;
584 		}
585 	}
586 }
587 
588 /*
589  * Close the sockets, shutdown the server and exit.
590  * Does not return.
591  *
592  */
593 static void
594 server_shutdown(struct nsd *nsd)
595 {
596 	size_t i;
597 
598 	close_all_sockets(nsd->udp, nsd->ifs);
599 	close_all_sockets(nsd->tcp, nsd->ifs);
600 	/* CHILD: close command channel to parent */
601 	if(nsd->this_child && nsd->this_child->parent_fd != -1)
602 	{
603 		close(nsd->this_child->parent_fd);
604 		nsd->this_child->parent_fd = -1;
605 	}
606 	/* SERVER: close command channels to children */
607 	if(!nsd->this_child)
608 	{
609 		for(i=0; i < nsd->child_count; ++i)
610 			if(nsd->children[i].child_fd != -1)
611 			{
612 				close(nsd->children[i].child_fd);
613 				nsd->children[i].child_fd = -1;
614 			}
615 	}
616 
617 	log_finalize();
618 	tsig_finalize();
619 
620 	nsd_options_destroy(nsd->options);
621 	region_destroy(nsd->region);
622 
623 	exit(0);
624 }
625 
626 static pid_t
627 server_start_xfrd(struct nsd *nsd, netio_handler_type* handler)
628 {
629 	pid_t pid;
630 	int sockets[2] = {0,0};
631 	zone_type* zone;
632 	struct ipc_handler_conn_data *data;
633 	/* no need to send updates for zones, because xfrd will read from fork-memory */
634 	for(zone = nsd->db->zones; zone; zone=zone->next) {
635 		zone->updated = 0;
636 	}
637 
638 	if(handler->fd != -1)
639 		close(handler->fd);
640 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
641 		log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
642 		return -1;
643 	}
644 	pid = fork();
645 	switch (pid) {
646 	case -1:
647 		log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
648 		break;
649 	case 0:
650 		/* CHILD: close first socket, use second one */
651 		close(sockets[0]);
652 		xfrd_init(sockets[1], nsd);
653 		/* ENOTREACH */
654 		break;
655 	default:
656 		/* PARENT: close second socket, use first one */
657 		close(sockets[1]);
658 		handler->fd = sockets[0];
659 		break;
660 	}
661 	/* PARENT only */
662 	handler->timeout = NULL;
663 	handler->event_types = NETIO_EVENT_READ;
664 	handler->event_handler = parent_handle_xfrd_command;
665 	/* clear ongoing ipc reads */
666 	data = (struct ipc_handler_conn_data *) handler->user_data;
667 	data->conn->is_reading = 0;
668 	return pid;
669 }
670 
671 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
672 static ssize_t
673 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
674 {
675 	uint8_t* buf = (uint8_t*) p;
676 	ssize_t total = 0;
677 	fd_set rfds;
678 	struct timeval tv;
679 	FD_ZERO(&rfds);
680 
681 	while( total < sz) {
682 		ssize_t ret;
683 		FD_SET(s, &rfds);
684 		tv.tv_sec = timeout;
685 		tv.tv_usec = 0;
686 		ret = select(s+1, &rfds, NULL, NULL, timeout==-1?NULL:&tv);
687 		if(ret == -1) {
688 			if(errno == EAGAIN)
689 				/* blocking read */
690 				continue;
691 			if(errno == EINTR) {
692 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
693 					return -1;
694 				/* other signals can be handled later */
695 				continue;
696 			}
697 			/* some error */
698 			return -1;
699 		}
700 		if(ret == 0) {
701 			/* operation timed out */
702 			return -2;
703 		}
704 		ret = read(s, buf+total, sz-total);
705 		if(ret == -1) {
706 			if(errno == EAGAIN)
707 				/* blocking read */
708 				continue;
709 			if(errno == EINTR) {
710 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
711 					return -1;
712 				/* other signals can be handled later */
713 				continue;
714 			}
715 			/* some error */
716 			return -1;
717 		}
718 		if(ret == 0) {
719 			/* closed connection! */
720 			return 0;
721 		}
722 		total += ret;
723 	}
724 	return total;
725 }
726 
727 /*
728  * Reload the database, stop parent, re-fork children and continue.
729  * as server_main.
730  */
731 static void
732 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
733 	int cmdsocket, int* xfrd_sock_p)
734 {
735 	pid_t old_pid;
736 	sig_atomic_t cmd = NSD_QUIT_SYNC;
737 	zone_type* zone;
738 	int xfrd_sock = *xfrd_sock_p;
739 	int ret;
740 
741 	if(db_crc_different(nsd->db) == 0) {
742 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
743 			"CRC the same. skipping %s.", nsd->db->filename));
744 	} else {
745 		DEBUG(DEBUG_XFRD,1, (LOG_INFO,
746 			"CRC different. reread of %s.", nsd->db->filename));
747 		namedb_close(nsd->db);
748 		if ((nsd->db = namedb_open(nsd->dbfile, nsd->options,
749 			nsd->child_count)) == NULL) {
750 			log_msg(LOG_ERR, "unable to reload the database: %s", strerror(errno));
751 			exit(1);
752 		}
753 	}
754 	if(!diff_read_file(nsd->db, nsd->options, NULL, nsd->child_count)) {
755 		log_msg(LOG_ERR, "unable to load the diff file: %s", nsd->options->difffile);
756 		exit(1);
757 	}
758 	log_msg(LOG_INFO, "memory recyclebin holds %lu bytes", (unsigned long)
759 		region_get_recycle_size(nsd->db->region));
760 #ifndef NDEBUG
761 	if(nsd_debug_level >= 1)
762 		region_log_stats(nsd->db->region);
763 #endif /* NDEBUG */
764 #ifdef NSEC3
765 #ifdef FULL_PREHASH
766 	prehash(nsd->db, 1);
767 #endif /* FULL_PREHASH */
768 #endif /* NSEC3 */
769 
770 	initialize_dname_compression_tables(nsd);
771 
772 	/* Get our new process id */
773 	old_pid = nsd->pid;
774 	nsd->pid = getpid();
775 
776 #ifdef BIND8_STATS
777 	/* Restart dumping stats if required.  */
778 	time(&nsd->st.boot);
779 	set_bind8_alarm(nsd);
780 #endif
781 
782 	/* Start new child processes */
783 	if (server_start_children(nsd, server_region, netio, xfrd_sock_p) != 0) {
784 		send_children_quit(nsd);
785 		exit(1);
786 	}
787 
788 	/* if the parent has quit, we must quit too, poll the fd for cmds */
789 	if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
790 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", cmd));
791 		if(cmd == NSD_QUIT) {
792 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
793 			send_children_quit(nsd);
794 			exit(0);
795 		}
796 	}
797 
798 	/* Overwrite pid before closing old parent, to avoid race condition:
799 	 * - parent process already closed
800 	 * - pidfile still contains old_pid
801 	 * - control script contacts parent process, using contents of pidfile
802 	 */
803 	if (writepid(nsd) == -1) {
804 		log_msg(LOG_ERR, "cannot overwrite the pidfile %s: %s", nsd->pidfile, strerror(errno));
805 	}
806 
807 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
808 	/* Send quit command to parent: blocking, wait for receipt. */
809 	do {
810 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
811 		if (write_socket(cmdsocket, &cmd, sizeof(cmd)) == -1)
812 		{
813 			log_msg(LOG_ERR, "problems sending command from reload %d to oldnsd %d: %s",
814 				(int)nsd->pid, (int)old_pid, strerror(errno));
815 		}
816 		/* blocking: wait for parent to really quit. (it sends RELOAD as ack) */
817 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main"));
818 		ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd),
819 			RELOAD_SYNC_TIMEOUT);
820 		if(ret == -2) {
821 			DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry"));
822 		}
823 	} while (ret == -2);
824 	if(ret == -1) {
825 		log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s",
826 			strerror(errno));
827 	}
828 	DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, cmd));
829 	if(cmd == NSD_QUIT) {
830 		/* small race condition possible here, parent got quit cmd. */
831 		send_children_quit(nsd);
832 		unlinkpid(nsd->pidfile);
833 		exit(1);
834 	}
835 	assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD);
836 
837 	/* inform xfrd of new SOAs */
838 	cmd = NSD_SOA_BEGIN;
839 	if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
840 		log_msg(LOG_ERR, "problems sending soa begin from reload %d to xfrd: %s",
841 			(int)nsd->pid, strerror(errno));
842 	}
843 	for(zone= nsd->db->zones; zone; zone = zone->next) {
844 		uint16_t sz;
845 		const dname_type *dname_ns=0, *dname_em=0;
846 		if(zone->updated == 0)
847 			continue;
848 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "nsd: sending soa info for zone %s",
849 			dname_to_string(domain_dname(zone->apex),0)));
850 		cmd = NSD_SOA_INFO;
851 		sz = dname_total_size(domain_dname(zone->apex));
852 		if(zone->soa_rrset) {
853 			dname_ns = domain_dname(
854 				rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[0]));
855 			dname_em = domain_dname(
856 				rdata_atom_domain(zone->soa_rrset->rrs[0].rdatas[1]));
857 			sz += sizeof(uint32_t)*6 + sizeof(uint8_t)*2
858 				+ dname_ns->name_size + dname_em->name_size;
859 		}
860 		sz = htons(sz);
861 		/* use blocking writes */
862 		if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd)) ||
863 			!write_socket(xfrd_sock, &sz, sizeof(sz)) ||
864 			!write_socket(xfrd_sock, domain_dname(zone->apex),
865 				dname_total_size(domain_dname(zone->apex))))
866 		{
867 			log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s",
868 				(int)nsd->pid, strerror(errno));
869 		}
870 		if(zone->soa_rrset) {
871 			uint32_t ttl = htonl(zone->soa_rrset->rrs[0].ttl);
872 			assert(dname_ns && dname_em);
873 			assert(zone->soa_rrset->rr_count > 0);
874 			assert(rrset_rrtype(zone->soa_rrset) == TYPE_SOA);
875 			assert(zone->soa_rrset->rrs[0].rdata_count == 7);
876 			if(!write_socket(xfrd_sock, &ttl, sizeof(uint32_t))
877 			   || !write_socket(xfrd_sock, &dname_ns->name_size, sizeof(uint8_t))
878 			   || !write_socket(xfrd_sock, dname_name(dname_ns), dname_ns->name_size)
879 			   || !write_socket(xfrd_sock, &dname_em->name_size, sizeof(uint8_t))
880 			   || !write_socket(xfrd_sock, dname_name(dname_em), dname_em->name_size)
881 			   || !write_socket(xfrd_sock, rdata_atom_data(
882 				zone->soa_rrset->rrs[0].rdatas[2]), sizeof(uint32_t))
883 			   || !write_socket(xfrd_sock, rdata_atom_data(
884 				zone->soa_rrset->rrs[0].rdatas[3]), sizeof(uint32_t))
885 			   || !write_socket(xfrd_sock, rdata_atom_data(
886 				zone->soa_rrset->rrs[0].rdatas[4]), sizeof(uint32_t))
887 			   || !write_socket(xfrd_sock, rdata_atom_data(
888 				zone->soa_rrset->rrs[0].rdatas[5]), sizeof(uint32_t))
889 			   || !write_socket(xfrd_sock, rdata_atom_data(
890 				zone->soa_rrset->rrs[0].rdatas[6]), sizeof(uint32_t)))
891 			{
892 				log_msg(LOG_ERR, "problems sending soa info from reload %d to xfrd: %s",
893 				(int)nsd->pid, strerror(errno));
894 			}
895 		}
896 		zone->updated = 0;
897 	}
898 	cmd = NSD_SOA_END;
899 	if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
900 		log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
901 			(int)nsd->pid, strerror(errno));
902 	}
903 
904 	/* try to reopen file */
905 	if (nsd->file_rotation_ok)
906 		log_reopen(nsd->log_filename, 1);
907 	/* exit reload, continue as new server_main */
908 }
909 
910 /*
911  * Get the mode depending on the signal hints that have been received.
912  * Multiple signal hints can be received and will be handled in turn.
913  */
914 static sig_atomic_t
915 server_signal_mode(struct nsd *nsd)
916 {
917 	if(nsd->signal_hint_quit) {
918 		nsd->signal_hint_quit = 0;
919 		return NSD_QUIT;
920 	}
921 	else if(nsd->signal_hint_shutdown) {
922 		nsd->signal_hint_shutdown = 0;
923 		return NSD_SHUTDOWN;
924 	}
925 	else if(nsd->signal_hint_child) {
926 		nsd->signal_hint_child = 0;
927 		return NSD_REAP_CHILDREN;
928 	}
929 	else if(nsd->signal_hint_reload) {
930 		nsd->signal_hint_reload = 0;
931 		return NSD_RELOAD;
932 	}
933 	else if(nsd->signal_hint_stats) {
934 		nsd->signal_hint_stats = 0;
935 #ifdef BIND8_STATS
936 		set_bind8_alarm(nsd);
937 #endif
938 		return NSD_STATS;
939 	}
940 	else if(nsd->signal_hint_statsusr) {
941 		nsd->signal_hint_statsusr = 0;
942 		return NSD_STATS;
943 	}
944 	return NSD_RUN;
945 }
946 
947 /*
948  * The main server simply waits for signals and child processes to
949  * terminate.  Child processes are restarted as necessary.
950  */
951 void
952 server_main(struct nsd *nsd)
953 {
954 	region_type *server_region = region_create(xalloc, free);
955 	netio_type *netio = netio_create(server_region);
956 	netio_handler_type reload_listener;
957 	netio_handler_type xfrd_listener;
958 	int reload_sockets[2] = {-1, -1};
959 	struct timespec timeout_spec;
960 	int fd;
961 	int status;
962 	pid_t child_pid;
963 	pid_t reload_pid = -1;
964 	pid_t xfrd_pid = -1;
965 	sig_atomic_t mode;
966 
967 	/* Ensure we are the main process */
968 	assert(nsd->server_kind == NSD_SERVER_MAIN);
969 
970 	xfrd_listener.user_data = (struct ipc_handler_conn_data*)region_alloc(
971 		server_region, sizeof(struct ipc_handler_conn_data));
972 	xfrd_listener.fd = -1;
973 	((struct ipc_handler_conn_data*)xfrd_listener.user_data)->nsd = nsd;
974 	((struct ipc_handler_conn_data*)xfrd_listener.user_data)->conn =
975 		xfrd_tcp_create(server_region);
976 
977 	/* Start the XFRD process */
978 	xfrd_pid = server_start_xfrd(nsd, &xfrd_listener);
979 	netio_add_handler(netio, &xfrd_listener);
980 
981 	/* Start the child processes that handle incoming queries */
982 	if (server_start_children(nsd, server_region, netio, &xfrd_listener.fd) != 0) {
983 		send_children_quit(nsd);
984 		exit(1);
985 	}
986 	reload_listener.fd = -1;
987 
988 	/* This_child MUST be 0, because this is the parent process */
989 	assert(nsd->this_child == 0);
990 
991 	/* Run the server until we get a shutdown signal */
992 	while ((mode = nsd->mode) != NSD_SHUTDOWN) {
993 		/* Did we receive a signal that changes our mode? */
994 		if(mode == NSD_RUN) {
995 			nsd->mode = mode = server_signal_mode(nsd);
996 		}
997 
998 		switch (mode) {
999 		case NSD_RUN:
1000 			/* see if any child processes terminated */
1001 			while((child_pid = waitpid(0, &status, WNOHANG)) != -1 && child_pid != 0) {
1002 				int is_child = delete_child_pid(nsd, child_pid);
1003 				if (is_child != -1 && nsd->children[is_child].need_to_exit) {
1004 					if(nsd->children[is_child].child_fd == -1)
1005 						nsd->children[is_child].has_exited = 1;
1006 					parent_check_all_children_exited(nsd);
1007 				} else if(is_child != -1) {
1008 					log_msg(LOG_WARNING,
1009 					       "server %d died unexpectedly with status %d, restarting",
1010 					       (int) child_pid, status);
1011 					restart_child_servers(nsd, server_region, netio,
1012 						&xfrd_listener.fd);
1013 				} else if (child_pid == reload_pid) {
1014 					sig_atomic_t cmd = NSD_SOA_END;
1015 					log_msg(LOG_WARNING,
1016 					       "Reload process %d failed with status %d, continuing with old database",
1017 					       (int) child_pid, status);
1018 					reload_pid = -1;
1019 					if(reload_listener.fd > 0) close(reload_listener.fd);
1020 					reload_listener.fd = -1;
1021 					reload_listener.event_types = NETIO_EVENT_NONE;
1022 					/* inform xfrd reload attempt ended */
1023 					if(!write_socket(xfrd_listener.fd,
1024 						&cmd, sizeof(cmd)) == -1) {
1025 						log_msg(LOG_ERR, "problems "
1026 						  "sending SOAEND to xfrd: %s",
1027 						  strerror(errno));
1028 					}
1029 				} else if (child_pid == xfrd_pid) {
1030 					log_msg(LOG_WARNING,
1031 					       "xfrd process %d failed with status %d, restarting ",
1032 					       (int) child_pid, status);
1033 					xfrd_pid = server_start_xfrd(nsd, &xfrd_listener);
1034 				} else {
1035 					log_msg(LOG_WARNING,
1036 					       "Unknown child %d terminated with status %d",
1037 					       (int) child_pid, status);
1038 				}
1039 			}
1040 			if (child_pid == -1) {
1041 				if (errno == EINTR) {
1042 					continue;
1043 				}
1044 				log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
1045 			}
1046 			if (nsd->mode != NSD_RUN)
1047 				break;
1048 
1049 			/* timeout to collect processes. In case no sigchild happens. */
1050 			timeout_spec.tv_sec = 60;
1051 			timeout_spec.tv_nsec = 0;
1052 
1053 			/* listen on ports, timeout for collecting terminated children */
1054 			if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
1055 				if (errno != EINTR) {
1056 					log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
1057 				}
1058 			}
1059 
1060 			break;
1061 		case NSD_RELOAD:
1062 			/* Continue to run nsd after reload */
1063 			nsd->mode = NSD_RUN;
1064 
1065 			if (reload_pid != -1) {
1066 				log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
1067 				       (int) reload_pid);
1068 				break;
1069 			}
1070 
1071 			log_msg(LOG_WARNING, "signal received, reloading...");
1072 
1073 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
1074 				log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
1075 				reload_pid = -1;
1076 				break;
1077 			}
1078 
1079 			/* Do actual reload */
1080 			reload_pid = fork();
1081 			switch (reload_pid) {
1082 			case -1:
1083 				log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
1084 				break;
1085 			case 0:
1086 				/* CHILD */
1087 				close(reload_sockets[0]);
1088 				server_reload(nsd, server_region, netio,
1089 					reload_sockets[1], &xfrd_listener.fd);
1090 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
1091 				close(reload_sockets[1]);
1092 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
1093 				/* drop stale xfrd ipc data */
1094 				((struct ipc_handler_conn_data*)xfrd_listener.user_data)
1095 					->conn->is_reading = 0;
1096 				reload_pid = -1;
1097 				reload_listener.fd = -1;
1098 				reload_listener.event_types = NETIO_EVENT_NONE;
1099 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
1100 				break;
1101 			default:
1102 				/* PARENT, keep running until NSD_QUIT_SYNC
1103 				 * received from CHILD.
1104 				 */
1105 				close(reload_sockets[1]);
1106 				reload_listener.fd = reload_sockets[0];
1107 				reload_listener.timeout = NULL;
1108 				reload_listener.user_data = nsd;
1109 				reload_listener.event_types = NETIO_EVENT_READ;
1110 				reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
1111 				netio_add_handler(netio, &reload_listener);
1112 				break;
1113 			}
1114 			break;
1115 		case NSD_QUIT_SYNC:
1116 			/* synchronisation of xfrd, parent and reload */
1117 			if(!nsd->quit_sync_done && reload_listener.fd != -1) {
1118 				sig_atomic_t cmd = NSD_RELOAD;
1119 				/* stop xfrd ipc writes in progress */
1120 				DEBUG(DEBUG_IPC,1, (LOG_INFO,
1121 					"main: ipc send indication reload"));
1122 				if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) {
1123 					log_msg(LOG_ERR, "server_main: could not send reload "
1124 					"indication to xfrd: %s", strerror(errno));
1125 				}
1126 				/* wait for ACK from xfrd */
1127 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
1128 				nsd->quit_sync_done = 1;
1129 			}
1130 			nsd->mode = NSD_RUN;
1131 			break;
1132 		case NSD_QUIT:
1133 			/* silent shutdown during reload */
1134 			if(reload_listener.fd != -1) {
1135 				/* acknowledge the quit, to sync reload that we will really quit now */
1136 				sig_atomic_t cmd = NSD_RELOAD;
1137 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
1138 				if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
1139 					log_msg(LOG_ERR, "server_main: "
1140 						"could not ack quit: %s", strerror(errno));
1141 				}
1142 				close(reload_listener.fd);
1143 			}
1144 			/* only quit children after xfrd has acked */
1145 			send_children_quit(nsd);
1146 
1147 			namedb_fd_close(nsd->db);
1148 			region_destroy(server_region);
1149 			server_shutdown(nsd);
1150 
1151 			/* ENOTREACH */
1152 			break;
1153 		case NSD_SHUTDOWN:
1154 			send_children_quit(nsd);
1155 			log_msg(LOG_WARNING, "signal received, shutting down...");
1156 			break;
1157 		case NSD_REAP_CHILDREN:
1158 			/* continue; wait for child in run loop */
1159 			nsd->mode = NSD_RUN;
1160 			break;
1161 		case NSD_STATS:
1162 #ifdef BIND8_STATS
1163 			set_children_stats(nsd);
1164 #endif
1165 			nsd->mode = NSD_RUN;
1166 			break;
1167 		default:
1168 			log_msg(LOG_WARNING, "NSD main server mode invalid: %d", nsd->mode);
1169 			nsd->mode = NSD_RUN;
1170 			break;
1171 		}
1172 	}
1173 
1174 	/* Truncate the pid file.  */
1175 	if ((fd = open(nsd->pidfile, O_WRONLY | O_TRUNC, 0644)) == -1) {
1176 		log_msg(LOG_ERR, "can not truncate the pid file %s: %s", nsd->pidfile, strerror(errno));
1177 	}
1178 	close(fd);
1179 
1180 	/* Unlink it if possible... */
1181 	unlinkpid(nsd->pidfile);
1182 
1183 	if(reload_listener.fd != -1) {
1184 		sig_atomic_t cmd = NSD_QUIT;
1185 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
1186 			"main: ipc send quit to reload-process"));
1187 		if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
1188 			log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
1189 				strerror(errno));
1190 		}
1191 		fsync(reload_listener.fd);
1192 		close(reload_listener.fd);
1193 	}
1194 	if(xfrd_listener.fd != -1) {
1195 		/* complete quit, stop xfrd */
1196 		sig_atomic_t cmd = NSD_QUIT;
1197 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
1198 			"main: ipc send quit to xfrd"));
1199 		if(!write_socket(xfrd_listener.fd, &cmd, sizeof(cmd))) {
1200 			log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
1201 				strerror(errno));
1202 		}
1203 		fsync(xfrd_listener.fd);
1204 		close(xfrd_listener.fd);
1205 		(void)kill(xfrd_pid, SIGTERM);
1206 	}
1207 
1208 	namedb_fd_close(nsd->db);
1209 	region_destroy(server_region);
1210 	server_shutdown(nsd);
1211 }
1212 
1213 static query_state_type
1214 server_process_query(struct nsd *nsd, struct query *query)
1215 {
1216 	return query_process(query, nsd);
1217 }
1218 
1219 
1220 /*
1221  * Serve DNS requests.
1222  */
1223 void
1224 server_child(struct nsd *nsd)
1225 {
1226 	size_t i;
1227 	region_type *server_region = region_create(xalloc, free);
1228 	netio_type *netio = netio_create(server_region);
1229 	netio_handler_type *tcp_accept_handlers;
1230 	query_type *udp_query;
1231 	sig_atomic_t mode;
1232 
1233 	assert(nsd->server_kind != NSD_SERVER_MAIN);
1234 	DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
1235 
1236 	if (!(nsd->server_kind & NSD_SERVER_TCP)) {
1237 		close_all_sockets(nsd->tcp, nsd->ifs);
1238 	}
1239 	if (!(nsd->server_kind & NSD_SERVER_UDP)) {
1240 		close_all_sockets(nsd->udp, nsd->ifs);
1241 	}
1242 
1243 	if (nsd->this_child && nsd->this_child->parent_fd != -1) {
1244 		netio_handler_type *handler;
1245 
1246 		handler = (netio_handler_type *) region_alloc(
1247 			server_region, sizeof(netio_handler_type));
1248 		handler->fd = nsd->this_child->parent_fd;
1249 		handler->timeout = NULL;
1250 		handler->user_data = (struct ipc_handler_conn_data*)region_alloc(
1251 			server_region, sizeof(struct ipc_handler_conn_data));
1252 		((struct ipc_handler_conn_data*)handler->user_data)->nsd = nsd;
1253 		((struct ipc_handler_conn_data*)handler->user_data)->conn =
1254 			xfrd_tcp_create(server_region);
1255 		handler->event_types = NETIO_EVENT_READ;
1256 		handler->event_handler = child_handle_parent_command;
1257 		netio_add_handler(netio, handler);
1258 	}
1259 
1260 	if (nsd->server_kind & NSD_SERVER_UDP) {
1261 		udp_query = query_create(server_region,
1262 			compressed_dname_offsets, compression_table_size);
1263 
1264 		for (i = 0; i < nsd->ifs; ++i) {
1265 			struct udp_handler_data *data;
1266 			netio_handler_type *handler;
1267 
1268 			data = (struct udp_handler_data *) region_alloc(
1269 				server_region,
1270 				sizeof(struct udp_handler_data));
1271 			data->query = udp_query;
1272 			data->nsd = nsd;
1273 			data->socket = &nsd->udp[i];
1274 
1275 			handler = (netio_handler_type *) region_alloc(
1276 				server_region, sizeof(netio_handler_type));
1277 			handler->fd = nsd->udp[i].s;
1278 			handler->timeout = NULL;
1279 			handler->user_data = data;
1280 			handler->event_types = NETIO_EVENT_READ;
1281 			handler->event_handler = handle_udp;
1282 			netio_add_handler(netio, handler);
1283 		}
1284 	}
1285 
1286 	/*
1287 	 * Keep track of all the TCP accept handlers so we can enable
1288 	 * and disable them based on the current number of active TCP
1289 	 * connections.
1290 	 */
1291 	tcp_accept_handlers = (netio_handler_type *) region_alloc(
1292 		server_region, nsd->ifs * sizeof(netio_handler_type));
1293 	if (nsd->server_kind & NSD_SERVER_TCP) {
1294 		for (i = 0; i < nsd->ifs; ++i) {
1295 			struct tcp_accept_handler_data *data;
1296 			netio_handler_type *handler;
1297 
1298 			data = (struct tcp_accept_handler_data *) region_alloc(
1299 				server_region,
1300 				sizeof(struct tcp_accept_handler_data));
1301 			data->nsd = nsd;
1302 			data->socket = &nsd->tcp[i];
1303 			data->tcp_accept_handler_count = nsd->ifs;
1304 			data->tcp_accept_handlers = tcp_accept_handlers;
1305 
1306 			handler = &tcp_accept_handlers[i];
1307 			handler->fd = nsd->tcp[i].s;
1308 			handler->timeout = NULL;
1309 			handler->user_data = data;
1310 			handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_ACCEPT;
1311 			handler->event_handler = handle_tcp_accept;
1312 			netio_add_handler(netio, handler);
1313 		}
1314 	}
1315 
1316 	/* The main loop... */
1317 	while ((mode = nsd->mode) != NSD_QUIT) {
1318 		if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
1319 
1320 		/* Do we need to do the statistics... */
1321 		if (mode == NSD_STATS) {
1322 #ifdef BIND8_STATS
1323 			/* Dump the statistics */
1324 			bind8_stats(nsd);
1325 #else /* !BIND8_STATS */
1326 			log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
1327 #endif /* BIND8_STATS */
1328 
1329 			nsd->mode = NSD_RUN;
1330 		}
1331 		else if (mode == NSD_REAP_CHILDREN) {
1332 			/* got signal, notify parent. parent reaps terminated children. */
1333 			if (nsd->this_child->parent_fd != -1) {
1334 				sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
1335 				if (write(nsd->this_child->parent_fd,
1336 				    &parent_notify,
1337 				    sizeof(parent_notify)) == -1)
1338 				{
1339 					log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
1340 						(int) nsd->this_child->pid, strerror(errno));
1341 				}
1342 			} else /* no parent, so reap 'em */
1343 				while (waitpid(0, NULL, WNOHANG) > 0) ;
1344 			nsd->mode = NSD_RUN;
1345 		}
1346 		else if(mode == NSD_RUN) {
1347 			/* Wait for a query... */
1348 			if (netio_dispatch(netio, NULL, NULL) == -1) {
1349 				if (errno != EINTR) {
1350 					log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
1351 					break;
1352 				}
1353 			}
1354 		} else if(mode == NSD_QUIT) {
1355 			/* ignore here, quit */
1356 		} else {
1357 			log_msg(LOG_ERR, "mode bad value %d, back to service.",
1358 				mode);
1359 			nsd->mode = NSD_RUN;
1360 		}
1361 	}
1362 
1363 #ifdef	BIND8_STATS
1364 	bind8_stats(nsd);
1365 #endif /* BIND8_STATS */
1366 
1367 	namedb_fd_close(nsd->db);
1368 	region_destroy(server_region);
1369 	server_shutdown(nsd);
1370 }
1371 
1372 
1373 static void
1374 handle_udp(netio_type *ATTR_UNUSED(netio),
1375 	   netio_handler_type *handler,
1376 	   netio_event_types_type event_types)
1377 {
1378 	struct udp_handler_data *data
1379 		= (struct udp_handler_data *) handler->user_data;
1380 	int received, sent;
1381 	struct query *q = data->query;
1382 
1383 	if (!(event_types & NETIO_EVENT_READ)) {
1384 		return;
1385 	}
1386 
1387 	/* Account... */
1388 #ifdef BIND8_STATS
1389 	if (data->socket->addr->ai_family == AF_INET) {
1390 		STATUP(data->nsd, qudp);
1391 	} else if (data->socket->addr->ai_family == AF_INET6) {
1392 		STATUP(data->nsd, qudp6);
1393 	}
1394 #endif
1395 
1396 	/* Initialize the query... */
1397 	query_reset(q, UDP_MAX_MESSAGE_LEN, 0);
1398 
1399 	received = recvfrom(handler->fd,
1400 			    buffer_begin(q->packet),
1401 			    buffer_remaining(q->packet),
1402 			    0,
1403 			    (struct sockaddr *)&q->addr,
1404 			    &q->addrlen);
1405 	if (received == -1) {
1406 		if (errno != EAGAIN && errno != EINTR) {
1407 			log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno));
1408 			STATUP(data->nsd, rxerr);
1409 			/* No zone statup */
1410 		}
1411 	} else {
1412 		buffer_skip(q->packet, received);
1413 		buffer_flip(q->packet);
1414 
1415 		/* Process and answer the query... */
1416 		if (server_process_query(data->nsd, q) != QUERY_DISCARDED) {
1417 #ifdef BIND8_STATS
1418 			if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
1419 				STATUP(data->nsd, nona);
1420 				ZTATUP(q->zone, nona);
1421 			}
1422 
1423 # ifdef USE_ZONE_STATS
1424 			if (data->socket->addr->ai_family == AF_INET) {
1425 				ZTATUP(q->zone, qudp);
1426 			} else if (data->socket->addr->ai_family == AF_INET6) {
1427 				ZTATUP(q->zone, qudp6);
1428 			}
1429 # endif
1430 #endif
1431 
1432 			/* Add EDNS0 and TSIG info if necessary.  */
1433 			query_add_optional(q, data->nsd);
1434 
1435 			buffer_flip(q->packet);
1436 
1437 			sent = sendto(handler->fd,
1438 				      buffer_begin(q->packet),
1439 				      buffer_remaining(q->packet),
1440 				      0,
1441 				      (struct sockaddr *) &q->addr,
1442 				      q->addrlen);
1443 			if (sent == -1) {
1444 				log_msg(LOG_ERR, "sendto failed: %s", strerror(errno));
1445 				STATUP(data->nsd, txerr);
1446 				ZTATUP(q->zone, txerr);
1447 			} else if ((size_t) sent != buffer_remaining(q->packet)) {
1448 				log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet));
1449 #ifdef BIND8_STATS
1450 			} else {
1451 				/* Account the rcode & TC... */
1452 				STATUP2(data->nsd, rcode, RCODE(q->packet));
1453 				ZTATUP2(q->zone, rcode, RCODE(q->packet));
1454 				if (TC(q->packet)) {
1455 					STATUP(data->nsd, truncated);
1456 					ZTATUP(q->zone, truncated);
1457 				}
1458 #endif /* BIND8_STATS */
1459 			}
1460 #ifdef BIND8_STATS
1461 		} else {
1462 			STATUP(data->nsd, dropped);
1463 # ifdef USE_ZONE_STATS
1464 			if (q->zone) {
1465 				ZTATUP(q->zone, dropped);
1466 			}
1467 # endif
1468 #endif
1469 		}
1470 	}
1471 }
1472 
1473 
1474 static void
1475 cleanup_tcp_handler(netio_type *netio, netio_handler_type *handler)
1476 {
1477 	struct tcp_handler_data *data
1478 		= (struct tcp_handler_data *) handler->user_data;
1479 	netio_remove_handler(netio, handler);
1480 	close(handler->fd);
1481 	slowaccept = 0;
1482 
1483 	/*
1484 	 * Enable the TCP accept handlers when the current number of
1485 	 * TCP connections is about to drop below the maximum number
1486 	 * of TCP connections.
1487 	 */
1488 	if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
1489 		configure_handler_event_types(data->tcp_accept_handler_count,
1490 					      data->tcp_accept_handlers,
1491 					      NETIO_EVENT_READ);
1492 	}
1493 	--data->nsd->current_tcp_count;
1494 	assert(data->nsd->current_tcp_count >= 0);
1495 
1496 	region_destroy(data->region);
1497 }
1498 
1499 static void
1500 handle_tcp_reading(netio_type *netio,
1501 		   netio_handler_type *handler,
1502 		   netio_event_types_type event_types)
1503 {
1504 	struct tcp_handler_data *data
1505 		= (struct tcp_handler_data *) handler->user_data;
1506 	ssize_t received;
1507 
1508 	if (event_types & NETIO_EVENT_TIMEOUT) {
1509 		/* Connection timed out.  */
1510 		cleanup_tcp_handler(netio, handler);
1511 		return;
1512 	}
1513 
1514 	if (data->nsd->tcp_query_count > 0 &&
1515 		data->query_count >= data->nsd->tcp_query_count) {
1516 		/* No more queries allowed on this tcp connection.  */
1517 		cleanup_tcp_handler(netio, handler);
1518 		return;
1519 	}
1520 
1521 	assert(event_types & NETIO_EVENT_READ);
1522 
1523 	if (data->bytes_transmitted == 0) {
1524 		query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
1525 	}
1526 
1527 	/*
1528 	 * Check if we received the leading packet length bytes yet.
1529 	 */
1530 	if (data->bytes_transmitted < sizeof(uint16_t)) {
1531 		received = read(handler->fd,
1532 				(char *) &data->query->tcplen
1533 				+ data->bytes_transmitted,
1534 				sizeof(uint16_t) - data->bytes_transmitted);
1535 		if (received == -1) {
1536 			if (errno == EAGAIN || errno == EINTR) {
1537 				/*
1538 				 * Read would block, wait until more
1539 				 * data is available.
1540 				 */
1541 				return;
1542 			} else {
1543 #ifdef ECONNRESET
1544 				if (verbosity >= 2 || errno != ECONNRESET)
1545 #endif /* ECONNRESET */
1546 				log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno));
1547 				cleanup_tcp_handler(netio, handler);
1548 				return;
1549 			}
1550 		} else if (received == 0) {
1551 			/* EOF */
1552 			cleanup_tcp_handler(netio, handler);
1553 			return;
1554 		}
1555 
1556 		data->bytes_transmitted += received;
1557 		if (data->bytes_transmitted < sizeof(uint16_t)) {
1558 			/*
1559 			 * Not done with the tcplen yet, wait for more
1560 			 * data to become available.
1561 			 */
1562 			return;
1563 		}
1564 
1565 		assert(data->bytes_transmitted == sizeof(uint16_t));
1566 
1567 		data->query->tcplen = ntohs(data->query->tcplen);
1568 
1569 		/*
1570 		 * Minimum query size is:
1571 		 *
1572 		 *     Size of the header (12)
1573 		 *   + Root domain name   (1)
1574 		 *   + Query class        (2)
1575 		 *   + Query type         (2)
1576 		 */
1577 		if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
1578 			VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
1579 			cleanup_tcp_handler(netio, handler);
1580 			return;
1581 		}
1582 
1583 		if (data->query->tcplen > data->query->maxlen) {
1584 			VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
1585 			cleanup_tcp_handler(netio, handler);
1586 			return;
1587 		}
1588 
1589 		buffer_set_limit(data->query->packet, data->query->tcplen);
1590 	}
1591 
1592 	assert(buffer_remaining(data->query->packet) > 0);
1593 
1594 	/* Read the (remaining) query data.  */
1595 	received = read(handler->fd,
1596 			buffer_current(data->query->packet),
1597 			buffer_remaining(data->query->packet));
1598 	if (received == -1) {
1599 		if (errno == EAGAIN || errno == EINTR) {
1600 			/*
1601 			 * Read would block, wait until more data is
1602 			 * available.
1603 			 */
1604 			return;
1605 		} else {
1606 #ifdef ECONNRESET
1607 			if (verbosity >= 2 || errno != ECONNRESET)
1608 #endif /* ECONNRESET */
1609 			log_msg(LOG_ERR, "failed reading from tcp: %s", strerror(errno));
1610 			cleanup_tcp_handler(netio, handler);
1611 			return;
1612 		}
1613 	} else if (received == 0) {
1614 		/* EOF */
1615 		cleanup_tcp_handler(netio, handler);
1616 		return;
1617 	}
1618 
1619 	data->bytes_transmitted += received;
1620 	buffer_skip(data->query->packet, received);
1621 	if (buffer_remaining(data->query->packet) > 0) {
1622 		/*
1623 		 * Message not yet complete, wait for more data to
1624 		 * become available.
1625 		 */
1626 		return;
1627 	}
1628 
1629 	assert(buffer_position(data->query->packet) == data->query->tcplen);
1630 
1631 	/* Account... */
1632 #ifdef BIND8_STATS
1633 # ifndef INET6
1634 	STATUP(data->nsd, ctcp);
1635 # else
1636 	if (data->query->addr.ss_family == AF_INET) {
1637 		STATUP(data->nsd, ctcp);
1638 	} else if (data->query->addr.ss_family == AF_INET6) {
1639 		STATUP(data->nsd, ctcp6);
1640 	}
1641 # endif
1642 #endif /* BIND8_STATS */
1643 
1644 	/* We have a complete query, process it.  */
1645 
1646 	/* tcp-query-count: handle query counter ++ */
1647 	data->query_count++;
1648 
1649 	buffer_flip(data->query->packet);
1650 	data->query_state = server_process_query(data->nsd, data->query);
1651 	if (data->query_state == QUERY_DISCARDED) {
1652 		/* Drop the packet and the entire connection... */
1653 		STATUP(data->nsd, dropped);
1654 #if defined(BIND8_STATS) && defined(USE_ZONE_STATS)
1655 		if (data->query->zone) {
1656 			ZTATUP(data->query->zone, dropped);
1657 		}
1658 #endif
1659 		cleanup_tcp_handler(netio, handler);
1660 		return;
1661 	}
1662 
1663 #ifdef BIND8_STATS
1664 	if (RCODE(data->query->packet) == RCODE_OK
1665 	    && !AA(data->query->packet))
1666 	{
1667 		STATUP(data->nsd, nona);
1668 		ZTATUP(data->query->zone, nona);
1669 	}
1670 
1671 # ifdef USE_ZONE_STATS
1672 #  ifndef INET6
1673 	ZTATUP(data->query->zone, ctcp);
1674 #  else
1675 	if (data->query->addr.ss_family == AF_INET) {
1676 		ZTATUP(data->query->zone, ctcp);
1677 	} else if (data->query->addr.ss_family == AF_INET6) {
1678 		ZTATUP(data->query->zone, ctcp6);
1679 	}
1680 #  endif
1681 # endif /* USE_ZONE_STATS */
1682 
1683 #endif /* BIND8_STATS */
1684 
1685 	query_add_optional(data->query, data->nsd);
1686 
1687 	/* Switch to the tcp write handler.  */
1688 	buffer_flip(data->query->packet);
1689 	data->query->tcplen = buffer_remaining(data->query->packet);
1690 	data->bytes_transmitted = 0;
1691 
1692 	handler->timeout->tv_sec = data->nsd->tcp_timeout;
1693 	handler->timeout->tv_nsec = 0L;
1694 	timespec_add(handler->timeout, netio_current_time(netio));
1695 
1696 	handler->event_types = NETIO_EVENT_WRITE | NETIO_EVENT_TIMEOUT;
1697 	handler->event_handler = handle_tcp_writing;
1698 }
1699 
1700 static void
1701 handle_tcp_writing(netio_type *netio,
1702 		   netio_handler_type *handler,
1703 		   netio_event_types_type event_types)
1704 {
1705 	struct tcp_handler_data *data
1706 		= (struct tcp_handler_data *) handler->user_data;
1707 	ssize_t sent;
1708 	struct query *q = data->query;
1709 
1710 	if (event_types & NETIO_EVENT_TIMEOUT) {
1711 		/* Connection timed out.  */
1712 		cleanup_tcp_handler(netio, handler);
1713 		return;
1714 	}
1715 
1716 	assert(event_types & NETIO_EVENT_WRITE);
1717 
1718 	if (data->bytes_transmitted < sizeof(q->tcplen)) {
1719 		/* Writing the response packet length.  */
1720 		uint16_t n_tcplen = htons(q->tcplen);
1721 		sent = write(handler->fd,
1722 			     (const char *) &n_tcplen + data->bytes_transmitted,
1723 			     sizeof(n_tcplen) - data->bytes_transmitted);
1724 		if (sent == -1) {
1725 			if (errno == EAGAIN || errno == EINTR) {
1726 				/*
1727 				 * Write would block, wait until
1728 				 * socket becomes writable again.
1729 				 */
1730 				return;
1731 			} else {
1732 #ifdef ECONNRESET
1733 				if(verbosity >= 2 || errno != ECONNRESET)
1734 #endif /* ECONNRESET */
1735 #ifdef EPIPE
1736 					if(verbosity >= 2 || errno != EPIPE)
1737 #endif /* EPIPE 'broken pipe' */
1738 				log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
1739 				cleanup_tcp_handler(netio, handler);
1740 				return;
1741 			}
1742 		}
1743 
1744 		data->bytes_transmitted += sent;
1745 		if (data->bytes_transmitted < sizeof(q->tcplen)) {
1746 			/*
1747 			 * Writing not complete, wait until socket
1748 			 * becomes writable again.
1749 			 */
1750 			return;
1751 		}
1752 
1753 		assert(data->bytes_transmitted == sizeof(q->tcplen));
1754 	}
1755 
1756 	assert(data->bytes_transmitted < q->tcplen + sizeof(q->tcplen));
1757 
1758 	sent = write(handler->fd,
1759 		     buffer_current(q->packet),
1760 		     buffer_remaining(q->packet));
1761 	if (sent == -1) {
1762 		if (errno == EAGAIN || errno == EINTR) {
1763 			/*
1764 			 * Write would block, wait until
1765 			 * socket becomes writable again.
1766 			 */
1767 			return;
1768 		} else {
1769 #ifdef ECONNRESET
1770 			if(verbosity >= 2 || errno != ECONNRESET)
1771 #endif /* ECONNRESET */
1772 #ifdef EPIPE
1773 					if(verbosity >= 2 || errno != EPIPE)
1774 #endif /* EPIPE 'broken pipe' */
1775 			log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
1776 			cleanup_tcp_handler(netio, handler);
1777 			return;
1778 		}
1779 	}
1780 
1781 	buffer_skip(q->packet, sent);
1782 	data->bytes_transmitted += sent;
1783 	if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
1784 		/*
1785 		 * Still more data to write when socket becomes
1786 		 * writable again.
1787 		 */
1788 		return;
1789 	}
1790 
1791 	assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
1792 
1793 	if (data->query_state == QUERY_IN_AXFR) {
1794 		/* Continue processing AXFR and writing back results.  */
1795 		buffer_clear(q->packet);
1796 		data->query_state = query_axfr(data->nsd, q);
1797 		if (data->query_state != QUERY_PROCESSED) {
1798 			query_add_optional(data->query, data->nsd);
1799 
1800 			/* Reset data. */
1801 			buffer_flip(q->packet);
1802 			q->tcplen = buffer_remaining(q->packet);
1803 			data->bytes_transmitted = 0;
1804 			/* Reset timeout.  */
1805 			handler->timeout->tv_sec = data->nsd->tcp_timeout;
1806 			handler->timeout->tv_nsec = 0;
1807 			timespec_add(handler->timeout, netio_current_time(netio));
1808 
1809 			/*
1810 			 * Write data if/when the socket is writable
1811 			 * again.
1812 			 */
1813 			return;
1814 		}
1815 	}
1816 
1817 	/*
1818 	 * Done sending, wait for the next request to arrive on the
1819 	 * TCP socket by installing the TCP read handler.
1820 	 */
1821 	if (data->nsd->tcp_query_count > 0 &&
1822 		data->query_count >= data->nsd->tcp_query_count) {
1823 
1824 		(void) shutdown(handler->fd, SHUT_WR);
1825 	}
1826 
1827 	data->bytes_transmitted = 0;
1828 
1829 	handler->timeout->tv_sec = data->nsd->tcp_timeout;
1830 	handler->timeout->tv_nsec = 0;
1831 	timespec_add(handler->timeout, netio_current_time(netio));
1832 
1833 	handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT;
1834 	handler->event_handler = handle_tcp_reading;
1835 }
1836 
1837 
1838 /*
1839  * Handle an incoming TCP connection.  The connection is accepted and
1840  * a new TCP reader event handler is added to NETIO.  The TCP handler
1841  * is responsible for cleanup when the connection is closed.
1842  */
1843 static void
1844 handle_tcp_accept(netio_type *netio,
1845 		  netio_handler_type *handler,
1846 		  netio_event_types_type event_types)
1847 {
1848 	struct tcp_accept_handler_data *data
1849 		= (struct tcp_accept_handler_data *) handler->user_data;
1850 	int s;
1851 	struct tcp_handler_data *tcp_data;
1852 	region_type *tcp_region;
1853 	netio_handler_type *tcp_handler;
1854 #ifdef INET6
1855 	struct sockaddr_storage addr;
1856 #else
1857 	struct sockaddr_in addr;
1858 #endif
1859 	socklen_t addrlen;
1860 
1861 	if (!(event_types & NETIO_EVENT_READ)) {
1862 		return;
1863 	}
1864 
1865 	if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
1866 		return;
1867 	}
1868 
1869 	/* Accept it... */
1870 	addrlen = sizeof(addr);
1871 	s = accept(handler->fd, (struct sockaddr *) &addr, &addrlen);
1872 	if (s == -1) {
1873 		/**
1874 		 * EMFILE and ENFILE is a signal that the limit of open
1875 		 * file descriptors has been reached. Pause accept().
1876 		 * EINTR is a signal interrupt. The others are various OS ways
1877 		 * of saying that the client has closed the connection.
1878 		 */
1879 		if (errno == EMFILE || errno == ENFILE) {
1880 			if (!slowaccept) {
1881 				slowaccept_timeout.tv_sec = NETIO_SLOW_ACCEPT_TIMEOUT;
1882 				slowaccept_timeout.tv_nsec = 0L;
1883 				timespec_add(&slowaccept_timeout, netio_current_time(netio));
1884 				slowaccept = 1;
1885 				/* We don't want to spam the logs here */
1886 			}
1887 		} else if (errno != EINTR
1888 			&& errno != EWOULDBLOCK
1889 #ifdef ECONNABORTED
1890 			&& errno != ECONNABORTED
1891 #endif /* ECONNABORTED */
1892 #ifdef EPROTO
1893 			&& errno != EPROTO
1894 #endif /* EPROTO */
1895 			) {
1896 			log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
1897 		}
1898 		return;
1899 	}
1900 
1901 	if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
1902 		log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
1903 		close(s);
1904 		return;
1905 	}
1906 
1907 	/*
1908 	 * This region is deallocated when the TCP connection is
1909 	 * closed by the TCP handler.
1910 	 */
1911 	tcp_region = region_create(xalloc, free);
1912 	tcp_data = (struct tcp_handler_data *) region_alloc(
1913 		tcp_region, sizeof(struct tcp_handler_data));
1914 	tcp_data->region = tcp_region;
1915 	tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
1916 		compression_table_size);
1917 	tcp_data->nsd = data->nsd;
1918 	tcp_data->query_count = 0;
1919 
1920 	tcp_data->tcp_accept_handler_count = data->tcp_accept_handler_count;
1921 	tcp_data->tcp_accept_handlers = data->tcp_accept_handlers;
1922 
1923 	tcp_data->query_state = QUERY_PROCESSED;
1924 	tcp_data->bytes_transmitted = 0;
1925 	memcpy(&tcp_data->query->addr, &addr, addrlen);
1926 	tcp_data->query->addrlen = addrlen;
1927 
1928 	tcp_handler = (netio_handler_type *) region_alloc(
1929 		tcp_region, sizeof(netio_handler_type));
1930 	tcp_handler->fd = s;
1931 	tcp_handler->timeout = (struct timespec *) region_alloc(
1932 		tcp_region, sizeof(struct timespec));
1933 	tcp_handler->timeout->tv_sec = data->nsd->tcp_timeout;
1934 	tcp_handler->timeout->tv_nsec = 0L;
1935 	timespec_add(tcp_handler->timeout, netio_current_time(netio));
1936 
1937 	tcp_handler->user_data = tcp_data;
1938 	tcp_handler->event_types = NETIO_EVENT_READ | NETIO_EVENT_TIMEOUT;
1939 	tcp_handler->event_handler = handle_tcp_reading;
1940 
1941 	netio_add_handler(netio, tcp_handler);
1942 
1943 	/*
1944 	 * Keep track of the total number of TCP handlers installed so
1945 	 * we can stop accepting connections when the maximum number
1946 	 * of simultaneous TCP connections is reached.
1947 	 */
1948 	++data->nsd->current_tcp_count;
1949 	if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
1950 		configure_handler_event_types(data->tcp_accept_handler_count,
1951 					      data->tcp_accept_handlers,
1952 					      NETIO_EVENT_NONE);
1953 	}
1954 }
1955 
1956 static void
1957 send_children_quit(struct nsd* nsd)
1958 {
1959 	sig_atomic_t command = NSD_QUIT;
1960 	size_t i;
1961 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
1962 	for (i = 0; i < nsd->child_count; ++i) {
1963 		if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
1964 			if (write(nsd->children[i].child_fd,
1965 				&command,
1966 				sizeof(command)) == -1)
1967 			{
1968 				if(errno != EAGAIN && errno != EINTR)
1969 					log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
1970 					(int) command,
1971 					(int) nsd->children[i].pid,
1972 					strerror(errno));
1973 			}
1974 			fsync(nsd->children[i].child_fd);
1975 			close(nsd->children[i].child_fd);
1976 			nsd->children[i].child_fd = -1;
1977 		}
1978 	}
1979 }
1980 
1981 #ifdef BIND8_STATS
1982 static void
1983 set_children_stats(struct nsd* nsd)
1984 {
1985 	size_t i;
1986 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
1987 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
1988 	for (i = 0; i < nsd->child_count; ++i) {
1989 		nsd->children[i].need_to_send_STATS = 1;
1990 		nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
1991 	}
1992 }
1993 #endif /* BIND8_STATS */
1994 
1995 static void
1996 configure_handler_event_types(size_t count,
1997 			      netio_handler_type *handlers,
1998 			      netio_event_types_type event_types)
1999 {
2000 	size_t i;
2001 
2002 	assert(handlers);
2003 
2004 	for (i = 0; i < count; ++i) {
2005 		handlers[i].event_types = event_types;
2006 	}
2007 }
2008