xref: /openbsd-src/usr.sbin/nsd/server.c (revision 9b9d2a55a62c8e82206c25f94fcc7f4e2765250e)
1 /*
2  * server.c -- nsd(8) network input/output
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/param.h>
14 #include <sys/socket.h>
15 #include <sys/uio.h>
16 #include <sys/wait.h>
17 
18 #include <netinet/in.h>
19 #include <arpa/inet.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30 #include <unistd.h>
31 #include <signal.h>
32 #include <netdb.h>
33 #ifndef SHUT_WR
34 #define SHUT_WR 1
35 #endif
36 #ifdef HAVE_MMAP
37 #include <sys/mman.h>
38 #endif /* HAVE_MMAP */
39 #include <openssl/rand.h>
40 #ifndef USE_MINI_EVENT
41 #  ifdef HAVE_EVENT_H
42 #    include <event.h>
43 #  else
44 #    include <event2/event.h>
45 #    include "event2/event_struct.h"
46 #    include "event2/event_compat.h"
47 #  endif
48 #else
49 #  include "mini_event.h"
50 #endif
51 
52 #include "axfr.h"
53 #include "namedb.h"
54 #include "netio.h"
55 #include "xfrd.h"
56 #include "xfrd-tcp.h"
57 #include "xfrd-disk.h"
58 #include "difffile.h"
59 #include "nsec3.h"
60 #include "ipc.h"
61 #include "udb.h"
62 #include "remote.h"
63 #include "lookup3.h"
64 #include "rrl.h"
65 
66 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
67 
68 /*
69  * Data for the UDP handlers.
70  */
71 struct udp_handler_data
72 {
73 	struct nsd        *nsd;
74 	struct nsd_socket *socket;
75 	query_type        *query;
76 };
77 
78 struct tcp_accept_handler_data {
79 	struct nsd         *nsd;
80 	struct nsd_socket  *socket;
81 	int event_added;
82 	struct event       event;
83 };
84 
85 /*
86  * These globals are used to enable the TCP accept handlers
87  * when the number of TCP connection drops below the maximum
88  * number of TCP connections.
89  */
90 static size_t		tcp_accept_handler_count;
91 static struct tcp_accept_handler_data*	tcp_accept_handlers;
92 
93 static struct event slowaccept_event;
94 static int slowaccept;
95 
96 #ifndef NONBLOCKING_IS_BROKEN
97 #  define NUM_RECV_PER_SELECT 100
98 #endif
99 
100 #if (!defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG))
101 struct mmsghdr msgs[NUM_RECV_PER_SELECT];
102 struct iovec iovecs[NUM_RECV_PER_SELECT];
103 struct query *queries[NUM_RECV_PER_SELECT];
104 #endif
105 
106 /*
107  * Data for the TCP connection handlers.
108  *
109  * The TCP handlers use non-blocking I/O.  This is necessary to avoid
110  * blocking the entire server on a slow TCP connection, but does make
111  * reading from and writing to the socket more complicated.
112  *
113  * Basically, whenever a read/write would block (indicated by the
114  * EAGAIN errno variable) we remember the position we were reading
115  * from/writing to and return from the TCP reading/writing event
116  * handler.  When the socket becomes readable/writable again we
117  * continue from the same position.
118  */
119 struct tcp_handler_data
120 {
121 	/*
122 	 * The region used to allocate all TCP connection related
123 	 * data, including this structure.  This region is destroyed
124 	 * when the connection is closed.
125 	 */
126 	region_type*		region;
127 
128 	/*
129 	 * The global nsd structure.
130 	 */
131 	struct nsd*			nsd;
132 
133 	/*
134 	 * The current query data for this TCP connection.
135 	 */
136 	query_type*			query;
137 
138 	/*
139 	 * The query_state is used to remember if we are performing an
140 	 * AXFR, if we're done processing, or if we should discard the
141 	 * query and connection.
142 	 */
143 	query_state_type	query_state;
144 
145 	/*
146 	 * The event for the file descriptor and tcp timeout
147 	 */
148 	struct event event;
149 
150 	/*
151 	 * The bytes_transmitted field is used to remember the number
152 	 * of bytes transmitted when receiving or sending a DNS
153 	 * packet.  The count includes the two additional bytes used
154 	 * to specify the packet length on a TCP connection.
155 	 */
156 	size_t				bytes_transmitted;
157 
158 	/*
159 	 * The number of queries handled by this specific TCP connection.
160 	 */
161 	int					query_count;
162 };
163 
164 /*
165  * Handle incoming queries on the UDP server sockets.
166  */
167 static void handle_udp(int fd, short event, void* arg);
168 
169 /*
170  * Handle incoming connections on the TCP sockets.  These handlers
171  * usually wait for the NETIO_EVENT_READ event (indicating an incoming
172  * connection) but are disabled when the number of current TCP
173  * connections is equal to the maximum number of TCP connections.
174  * Disabling is done by changing the handler to wait for the
175  * NETIO_EVENT_NONE type.  This is done using the function
176  * configure_tcp_accept_handlers.
177  */
178 static void handle_tcp_accept(int fd, short event, void* arg);
179 
180 /*
181  * Handle incoming queries on a TCP connection.  The TCP connections
182  * are configured to be non-blocking and the handler may be called
183  * multiple times before a complete query is received.
184  */
185 static void handle_tcp_reading(int fd, short event, void* arg);
186 
187 /*
188  * Handle outgoing responses on a TCP connection.  The TCP connections
189  * are configured to be non-blocking and the handler may be called
190  * multiple times before a complete response is sent.
191  */
192 static void handle_tcp_writing(int fd, short event, void* arg);
193 
194 /*
195  * Send all children the quit nonblocking, then close pipe.
196  */
197 static void send_children_quit(struct nsd* nsd);
198 /* same, for shutdown time, waits for child to exit to avoid restart issues */
199 static void send_children_quit_and_wait(struct nsd* nsd);
200 
201 /* set childrens flags to send NSD_STATS to them */
202 #ifdef BIND8_STATS
203 static void set_children_stats(struct nsd* nsd);
204 #endif /* BIND8_STATS */
205 
206 /*
207  * Change the event types the HANDLERS are interested in to EVENT_TYPES.
208  */
209 static void configure_handler_event_types(short event_types);
210 
211 static uint16_t *compressed_dname_offsets = 0;
212 static uint32_t compression_table_capacity = 0;
213 static uint32_t compression_table_size = 0;
214 
215 /*
216  * Remove the specified pid from the list of child pids.  Returns -1 if
217  * the pid is not in the list, child_num otherwise.  The field is set to 0.
218  */
219 static int
220 delete_child_pid(struct nsd *nsd, pid_t pid)
221 {
222 	size_t i;
223 	for (i = 0; i < nsd->child_count; ++i) {
224 		if (nsd->children[i].pid == pid) {
225 			nsd->children[i].pid = 0;
226 			if(!nsd->children[i].need_to_exit) {
227 				if(nsd->children[i].child_fd != -1)
228 					close(nsd->children[i].child_fd);
229 				nsd->children[i].child_fd = -1;
230 				if(nsd->children[i].handler)
231 					nsd->children[i].handler->fd = -1;
232 			}
233 			return i;
234 		}
235 	}
236 	return -1;
237 }
238 
239 /*
240  * Restart child servers if necessary.
241  */
242 static int
243 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
244 	int* xfrd_sock_p)
245 {
246 	struct main_ipc_handler_data *ipc_data;
247 	size_t i;
248 	int sv[2];
249 
250 	/* Fork the child processes... */
251 	for (i = 0; i < nsd->child_count; ++i) {
252 		if (nsd->children[i].pid <= 0) {
253 			if (nsd->children[i].child_fd != -1)
254 				close(nsd->children[i].child_fd);
255 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
256 				log_msg(LOG_ERR, "socketpair: %s",
257 					strerror(errno));
258 				return -1;
259 			}
260 			nsd->children[i].child_fd = sv[0];
261 			nsd->children[i].parent_fd = sv[1];
262 			nsd->children[i].pid = fork();
263 			switch (nsd->children[i].pid) {
264 			default: /* SERVER MAIN */
265 				close(nsd->children[i].parent_fd);
266 				nsd->children[i].parent_fd = -1;
267 				if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) {
268 					log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
269 				}
270 				if(!nsd->children[i].handler)
271 				{
272 					ipc_data = (struct main_ipc_handler_data*) region_alloc(
273 						region, sizeof(struct main_ipc_handler_data));
274 					ipc_data->nsd = nsd;
275 					ipc_data->child = &nsd->children[i];
276 					ipc_data->child_num = i;
277 					ipc_data->xfrd_sock = xfrd_sock_p;
278 					ipc_data->packet = buffer_create(region, QIOBUFSZ);
279 					ipc_data->forward_mode = 0;
280 					ipc_data->got_bytes = 0;
281 					ipc_data->total_bytes = 0;
282 					ipc_data->acl_num = 0;
283 					nsd->children[i].handler = (struct netio_handler*) region_alloc(
284 						region, sizeof(struct netio_handler));
285 					nsd->children[i].handler->fd = nsd->children[i].child_fd;
286 					nsd->children[i].handler->timeout = NULL;
287 					nsd->children[i].handler->user_data = ipc_data;
288 					nsd->children[i].handler->event_types = NETIO_EVENT_READ;
289 					nsd->children[i].handler->event_handler = parent_handle_child_command;
290 					netio_add_handler(netio, nsd->children[i].handler);
291 				}
292 				/* clear any ongoing ipc */
293 				ipc_data = (struct main_ipc_handler_data*)
294 					nsd->children[i].handler->user_data;
295 				ipc_data->forward_mode = 0;
296 				/* restart - update fd */
297 				nsd->children[i].handler->fd = nsd->children[i].child_fd;
298 				break;
299 			case 0: /* CHILD */
300 				/* the child need not be able to access the
301 				 * nsd.db file */
302 				namedb_close_udb(nsd->db);
303 				nsd->pid = 0;
304 				nsd->child_count = 0;
305 				nsd->server_kind = nsd->children[i].kind;
306 				nsd->this_child = &nsd->children[i];
307 				/* remove signal flags inherited from parent
308 				   the parent will handle them. */
309 				nsd->signal_hint_reload_hup = 0;
310 				nsd->signal_hint_reload = 0;
311 				nsd->signal_hint_child = 0;
312 				nsd->signal_hint_quit = 0;
313 				nsd->signal_hint_shutdown = 0;
314 				nsd->signal_hint_stats = 0;
315 				nsd->signal_hint_statsusr = 0;
316 				close(*xfrd_sock_p);
317 				close(nsd->this_child->child_fd);
318 				nsd->this_child->child_fd = -1;
319 				if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) {
320 					log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
321 				}
322 				server_child(nsd);
323 				/* NOTREACH */
324 				exit(0);
325 			case -1:
326 				log_msg(LOG_ERR, "fork failed: %s",
327 					strerror(errno));
328 				return -1;
329 			}
330 		}
331 	}
332 	return 0;
333 }
334 
335 #ifdef BIND8_STATS
336 static void set_bind8_alarm(struct nsd* nsd)
337 {
338 	/* resync so that the next alarm is on the next whole minute */
339 	if(nsd->st.period > 0) /* % by 0 gives divbyzero error */
340 		alarm(nsd->st.period - (time(NULL) % nsd->st.period));
341 }
342 #endif
343 
344 /* set zone stat ids for zones initially read in */
345 static void
346 zonestatid_tree_set(struct nsd* nsd)
347 {
348 	struct radnode* n;
349 	for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
350 		zone_type* zone = (zone_type*)n->elem;
351 		zone->zonestatid = getzonestatid(nsd->options, zone->opts);
352 	}
353 }
354 
355 #ifdef USE_ZONE_STATS
356 void
357 server_zonestat_alloc(struct nsd* nsd)
358 {
359 	size_t num = (nsd->options->zonestatnames->count==0?1:
360 			nsd->options->zonestatnames->count);
361 	size_t sz = sizeof(struct nsdst)*num;
362 	char tmpfile[256];
363 	uint8_t z = 0;
364 
365 	/* file names */
366 	nsd->zonestatfname[0] = 0;
367 	nsd->zonestatfname[1] = 0;
368 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0",
369 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
370 	nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile);
371 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1",
372 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
373 	nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile);
374 
375 	/* file descriptors */
376 	nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600);
377 	if(nsd->zonestatfd[0] == -1) {
378 		log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0],
379 			strerror(errno));
380 		exit(1);
381 	}
382 	nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600);
383 	if(nsd->zonestatfd[0] == -1) {
384 		log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1],
385 			strerror(errno));
386 		close(nsd->zonestatfd[0]);
387 		unlink(nsd->zonestatfname[0]);
388 		exit(1);
389 	}
390 
391 #ifdef HAVE_MMAP
392 	if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) {
393 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0],
394 			strerror(errno));
395 		exit(1);
396 	}
397 	if(write(nsd->zonestatfd[0], &z, 1) == -1) {
398 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
399 			nsd->zonestatfname[0], strerror(errno));
400 		exit(1);
401 	}
402 	if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) {
403 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1],
404 			strerror(errno));
405 		exit(1);
406 	}
407 	if(write(nsd->zonestatfd[1], &z, 1) == -1) {
408 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
409 			nsd->zonestatfname[1], strerror(errno));
410 		exit(1);
411 	}
412 	nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
413 		MAP_SHARED, nsd->zonestatfd[0], 0);
414 	if(nsd->zonestat[0] == MAP_FAILED) {
415 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
416 		unlink(nsd->zonestatfname[0]);
417 		unlink(nsd->zonestatfname[1]);
418 		exit(1);
419 	}
420 	nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
421 		MAP_SHARED, nsd->zonestatfd[1], 0);
422 	if(nsd->zonestat[1] == MAP_FAILED) {
423 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
424 		unlink(nsd->zonestatfname[0]);
425 		unlink(nsd->zonestatfname[1]);
426 		exit(1);
427 	}
428 	memset(nsd->zonestat[0], 0, sz);
429 	memset(nsd->zonestat[1], 0, sz);
430 	nsd->zonestatsize[0] = num;
431 	nsd->zonestatsize[1] = num;
432 	nsd->zonestatdesired = num;
433 	nsd->zonestatsizenow = num;
434 	nsd->zonestatnow = nsd->zonestat[0];
435 #endif /* HAVE_MMAP */
436 }
437 
438 void
439 zonestat_remap(struct nsd* nsd, int idx, size_t sz)
440 {
441 #ifdef HAVE_MMAP
442 #ifdef MREMAP_MAYMOVE
443 	nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx],
444 		sizeof(struct nsdst)*nsd->zonestatsize[idx], sz,
445 		MREMAP_MAYMOVE);
446 	if(nsd->zonestat[idx] == MAP_FAILED) {
447 		log_msg(LOG_ERR, "mremap failed: %s", strerror(errno));
448 		exit(1);
449 	}
450 #else /* !HAVE MREMAP */
451 	if(msync(nsd->zonestat[idx],
452 		sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0)
453 		log_msg(LOG_ERR, "msync failed: %s", strerror(errno));
454 	if(munmap(nsd->zonestat[idx],
455 		sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0)
456 		log_msg(LOG_ERR, "munmap failed: %s", strerror(errno));
457 	nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz,
458 		PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0);
459 	if(nsd->zonestat[idx] == MAP_FAILED) {
460 		log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
461 		exit(1);
462 	}
463 #endif /* MREMAP */
464 #endif /* HAVE_MMAP */
465 }
466 
467 /* realloc the zonestat array for the one that is not currently in use,
468  * to match the desired new size of the array (if applicable) */
469 void
470 server_zonestat_realloc(struct nsd* nsd)
471 {
472 #ifdef HAVE_MMAP
473 	uint8_t z = 0;
474 	size_t sz;
475 	int idx = 0; /* index of the zonestat array that is not in use */
476 	if(nsd->zonestatnow == nsd->zonestat[0])
477 		idx = 1;
478 	if(nsd->zonestatsize[idx] == nsd->zonestatdesired)
479 		return;
480 	sz = sizeof(struct nsdst)*nsd->zonestatdesired;
481 	if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) {
482 		log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx],
483 			strerror(errno));
484 		exit(1);
485 	}
486 	if(write(nsd->zonestatfd[idx], &z, 1) == -1) {
487 		log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
488 			nsd->zonestatfname[idx], strerror(errno));
489 		exit(1);
490 	}
491 	zonestat_remap(nsd, idx, sz);
492 	/* zero the newly allocated region */
493 	if(nsd->zonestatdesired > nsd->zonestatsize[idx]) {
494 		memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) *
495 			nsd->zonestatsize[idx], 0, sizeof(struct nsdst) *
496 			(nsd->zonestatdesired - nsd->zonestatsize[idx]));
497 	}
498 	nsd->zonestatsize[idx] = nsd->zonestatdesired;
499 #endif /* HAVE_MMAP */
500 }
501 
502 /* switchover to use the other array for the new children, that
503  * briefly coexist with the old children.  And we want to avoid them
504  * both writing to the same statistics arrays. */
505 void
506 server_zonestat_switch(struct nsd* nsd)
507 {
508 	if(nsd->zonestatnow == nsd->zonestat[0]) {
509 		nsd->zonestatnow = nsd->zonestat[1];
510 		nsd->zonestatsizenow = nsd->zonestatsize[1];
511 	} else {
512 		nsd->zonestatnow = nsd->zonestat[0];
513 		nsd->zonestatsizenow = nsd->zonestatsize[0];
514 	}
515 }
516 #endif /* USE_ZONE_STATS */
517 
518 static void
519 cleanup_dname_compression_tables(void *ptr)
520 {
521 	free(ptr);
522 	compressed_dname_offsets = NULL;
523 	compression_table_capacity = 0;
524 }
525 
526 static void
527 initialize_dname_compression_tables(struct nsd *nsd)
528 {
529 	size_t needed = domain_table_count(nsd->db->domains) + 1;
530 	needed += EXTRA_DOMAIN_NUMBERS;
531 	if(compression_table_capacity < needed) {
532 		if(compressed_dname_offsets) {
533 			region_remove_cleanup(nsd->db->region,
534 				cleanup_dname_compression_tables,
535 				compressed_dname_offsets);
536 			free(compressed_dname_offsets);
537 		}
538 		compressed_dname_offsets = (uint16_t *) xmallocarray(
539 			needed, sizeof(uint16_t));
540 		region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
541 			compressed_dname_offsets);
542 		compression_table_capacity = needed;
543 		compression_table_size=domain_table_count(nsd->db->domains)+1;
544 	}
545 	memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
546 	compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
547 }
548 
549 /*
550  * Initialize the server, create and bind the sockets.
551  *
552  */
553 int
554 server_init(struct nsd *nsd)
555 {
556 	size_t i;
557 #if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) || defined(IP_TRANSPARENT)))
558 	int on = 1;
559 #endif
560 
561 	/* UDP */
562 
563 	/* Make a socket... */
564 	for (i = 0; i < nsd->ifs; i++) {
565 		if (!nsd->udp[i].addr) {
566 			nsd->udp[i].s = -1;
567 			continue;
568 		}
569 		if ((nsd->udp[i].s = socket(nsd->udp[i].addr->ai_family, nsd->udp[i].addr->ai_socktype, 0)) == -1) {
570 #if defined(INET6)
571 			if (nsd->udp[i].addr->ai_family == AF_INET6 &&
572 				errno == EAFNOSUPPORT && nsd->grab_ip6_optional) {
573 				log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported");
574 				continue;
575 			}
576 #endif /* INET6 */
577 			log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
578 			return -1;
579 		}
580 
581 #if defined(SO_RCVBUF) || defined(SO_SNDBUF)
582 	if(1) {
583 	int rcv = 1*1024*1024;
584 	int snd = 1*1024*1024;
585 
586 #ifdef SO_RCVBUF
587 #  ifdef SO_RCVBUFFORCE
588 	if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
589 		(socklen_t)sizeof(rcv)) < 0) {
590 		if(errno != EPERM && errno != ENOBUFS) {
591 			log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, "
592                                         "...) failed: %s", strerror(errno));
593 			return -1;
594 		}
595 #  else
596 	if(1) {
597 #  endif /* SO_RCVBUFFORCE */
598 		if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
599 			 (socklen_t)sizeof(rcv)) < 0) {
600 			if(errno != ENOBUFS && errno != ENOSYS) {
601 				log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, "
602                                         "...) failed: %s", strerror(errno));
603 				return -1;
604 			}
605 		}
606 	}
607 #endif /* SO_RCVBUF */
608 
609 #ifdef SO_SNDBUF
610 #  ifdef SO_SNDBUFFORCE
611 	if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
612 		(socklen_t)sizeof(snd)) < 0) {
613 		if(errno != EPERM && errno != ENOBUFS) {
614 			log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, "
615                                         "...) failed: %s", strerror(errno));
616 			return -1;
617 		}
618 #  else
619 	if(1) {
620 #  endif /* SO_SNDBUFFORCE */
621 		if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
622 			 (socklen_t)sizeof(snd)) < 0) {
623 			if(errno != ENOBUFS && errno != ENOSYS) {
624 				log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, "
625                                         "...) failed: %s", strerror(errno));
626 				return -1;
627 			}
628 		}
629 	}
630 #endif /* SO_SNDBUF */
631 
632 	}
633 #endif /* defined(SO_RCVBUF) || defined(SO_SNDBUF) */
634 
635 #if defined(INET6)
636 		if (nsd->udp[i].addr->ai_family == AF_INET6) {
637 # if defined(IPV6_V6ONLY)
638 			if (setsockopt(nsd->udp[i].s,
639 				       IPPROTO_IPV6, IPV6_V6ONLY,
640 				       &on, sizeof(on)) < 0)
641 			{
642 				log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
643 					strerror(errno));
644 				return -1;
645 			}
646 # endif
647 # if defined(IPV6_USE_MIN_MTU)
648 			/*
649 			 * There is no fragmentation of IPv6 datagrams
650 			 * during forwarding in the network. Therefore
651 			 * we do not send UDP datagrams larger than
652 			 * the minimum IPv6 MTU of 1280 octets. The
653 			 * EDNS0 message length can be larger if the
654 			 * network stack supports IPV6_USE_MIN_MTU.
655 			 */
656 			if (setsockopt(nsd->udp[i].s,
657 				       IPPROTO_IPV6, IPV6_USE_MIN_MTU,
658 				       &on, sizeof(on)) < 0)
659 			{
660 				log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s",
661 					strerror(errno));
662 				return -1;
663 			}
664 # elif defined(IPV6_MTU)
665 			/*
666 			 * On Linux, PMTUD is disabled by default for datagrams
667 			 * so set the MTU equal to the MIN MTU to get the same.
668 			 */
669 			on = IPV6_MIN_MTU;
670 			if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU,
671 				&on, sizeof(on)) < 0)
672 			{
673 				log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s",
674 					strerror(errno));
675 				return -1;
676 			}
677 			on = 1;
678 # endif
679 		}
680 #endif
681 #if defined(AF_INET)
682 		if (nsd->udp[i].addr->ai_family == AF_INET) {
683 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
684 			int action = IP_PMTUDISC_DONT;
685 			if (setsockopt(nsd->udp[i].s, IPPROTO_IP,
686 				IP_MTU_DISCOVER, &action, sizeof(action)) < 0)
687 			{
688 				log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
689 					strerror(errno));
690 				return -1;
691 			}
692 #  elif defined(IP_DONTFRAG)
693 			int off = 0;
694 			if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG,
695 				&off, sizeof(off)) < 0)
696 			{
697 				log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
698 					strerror(errno));
699 				return -1;
700 			}
701 #  endif
702 		}
703 #endif
704 		/* set it nonblocking */
705 		/* otherwise, on OSes with thundering herd problems, the
706 		   UDP recv could block NSD after select returns readable. */
707 		if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) {
708 			log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno));
709 		}
710 
711 		/* Bind it... */
712 		if (nsd->options->ip_transparent) {
713 #ifdef IP_TRANSPARENT
714 			if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) {
715 				log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for udp: %s",
716 					strerror(errno));
717 			}
718 #endif /* IP_TRANSPARENT */
719 		}
720 
721 		if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) {
722 			log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno));
723 			return -1;
724 		}
725 	}
726 
727 	/* TCP */
728 
729 	/* Make a socket... */
730 	for (i = 0; i < nsd->ifs; i++) {
731 		if (!nsd->tcp[i].addr) {
732 			nsd->tcp[i].s = -1;
733 			continue;
734 		}
735 		if ((nsd->tcp[i].s = socket(nsd->tcp[i].addr->ai_family, nsd->tcp[i].addr->ai_socktype, 0)) == -1) {
736 #if defined(INET6)
737 			if (nsd->tcp[i].addr->ai_family == AF_INET6 &&
738 				errno == EAFNOSUPPORT && nsd->grab_ip6_optional) {
739 				log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported");
740 				continue;
741 			}
742 #endif /* INET6 */
743 			log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
744 			return -1;
745 		}
746 
747 #ifdef	SO_REUSEADDR
748 		if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) {
749 			log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno));
750 		}
751 #endif /* SO_REUSEADDR */
752 
753 #if defined(INET6)
754 		if (nsd->tcp[i].addr->ai_family == AF_INET6) {
755 # if defined(IPV6_V6ONLY)
756 			if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY,
757 				&on, sizeof(on)) < 0) {
758 				log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno));
759 				return -1;
760 			}
761 # endif
762 # if defined(IPV6_USE_MIN_MTU)
763 			/*
764 			 * Use minimum MTU to minimize delays learning working
765 			 * PMTU when communicating through a tunnel.
766 			 */
767 			if (setsockopt(nsd->tcp[i].s,
768 				       IPPROTO_IPV6, IPV6_USE_MIN_MTU,
769 				       &on, sizeof(on)) < 0) {
770 				log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", strerror(errno));
771 				return -1;
772 			}
773 # elif defined(IPV6_MTU)
774 			/*
775 			 * On Linux, PMTUD is disabled by default for datagrams
776 			 * so set the MTU equal to the MIN MTU to get the same.
777 			 */
778 			on = IPV6_MIN_MTU;
779 			if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_MTU,
780 				&on, sizeof(on)) < 0) {
781 				log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", strerror(errno));
782 				return -1;
783 			}
784 			on = 1;
785 # endif
786 		}
787 #endif
788 		/* set it nonblocking */
789 		/* (StevensUNP p463), if tcp listening socket is blocking, then
790 		   it may block in accept, even if select() says readable. */
791 		if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) {
792 			log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno));
793 		}
794 
795 		/* Bind it... */
796 		if (nsd->options->ip_transparent) {
797 #ifdef IP_TRANSPARENT
798 			if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) {
799 				log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for tcp: %s",
800 					strerror(errno));
801 			}
802 #endif /* IP_TRANSPARENT */
803 		}
804 
805 		if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) {
806 			log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno));
807 			return -1;
808 		}
809 
810 		/* Listen to it... */
811 		if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) {
812 			log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
813 			return -1;
814 		}
815 	}
816 
817 	return 0;
818 }
819 
820 /*
821  * Prepare the server for take off.
822  *
823  */
824 int
825 server_prepare(struct nsd *nsd)
826 {
827 #ifdef RATELIMIT
828 	/* set secret modifier for hashing (udb ptr buckets and rate limits) */
829 #ifdef HAVE_ARC4RANDOM
830 	hash_set_raninit(arc4random());
831 #else
832 	uint32_t v = getpid() ^ time(NULL);
833 	srandom((unsigned long)v);
834 	if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0)
835 		hash_set_raninit(v);
836 	else	hash_set_raninit(random());
837 #endif
838 	rrl_mmap_init(nsd->child_count, nsd->options->rrl_size,
839 		nsd->options->rrl_ratelimit,
840 		nsd->options->rrl_whitelist_ratelimit,
841 		nsd->options->rrl_slip,
842 		nsd->options->rrl_ipv4_prefix_length,
843 		nsd->options->rrl_ipv6_prefix_length);
844 #endif /* RATELIMIT */
845 
846 	/* Open the database... */
847 	if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) {
848 		log_msg(LOG_ERR, "unable to open the database %s: %s",
849 			nsd->dbfile, strerror(errno));
850 		unlink(nsd->task[0]->fname);
851 		unlink(nsd->task[1]->fname);
852 #ifdef USE_ZONE_STATS
853 		unlink(nsd->zonestatfname[0]);
854 		unlink(nsd->zonestatfname[1]);
855 #endif
856 		xfrd_del_tempdir(nsd);
857 		return -1;
858 	}
859 	/* check if zone files have been modified */
860 	/* NULL for taskudb because we send soainfo in a moment, batched up,
861 	 * for all zones */
862 	if(nsd->options->zonefiles_check || (nsd->options->database == NULL ||
863 		nsd->options->database[0] == 0))
864 		namedb_check_zonefiles(nsd, nsd->options, NULL, NULL);
865 	zonestatid_tree_set(nsd);
866 
867 	compression_table_capacity = 0;
868 	initialize_dname_compression_tables(nsd);
869 
870 #ifdef	BIND8_STATS
871 	/* Initialize times... */
872 	time(&nsd->st.boot);
873 	set_bind8_alarm(nsd);
874 #endif /* BIND8_STATS */
875 
876 	return 0;
877 }
878 
879 /*
880  * Fork the required number of servers.
881  */
882 static int
883 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
884 	int* xfrd_sock_p)
885 {
886 	size_t i;
887 
888 	/* Start all child servers initially.  */
889 	for (i = 0; i < nsd->child_count; ++i) {
890 		nsd->children[i].pid = 0;
891 	}
892 
893 	return restart_child_servers(nsd, region, netio, xfrd_sock_p);
894 }
895 
896 void
897 server_close_all_sockets(struct nsd_socket sockets[], size_t n)
898 {
899 	size_t i;
900 
901 	/* Close all the sockets... */
902 	for (i = 0; i < n; ++i) {
903 		if (sockets[i].s != -1) {
904 			close(sockets[i].s);
905 			freeaddrinfo(sockets[i].addr);
906 			sockets[i].s = -1;
907 		}
908 	}
909 }
910 
911 /*
912  * Close the sockets, shutdown the server and exit.
913  * Does not return.
914  *
915  */
916 void
917 server_shutdown(struct nsd *nsd)
918 {
919 	size_t i;
920 
921 	server_close_all_sockets(nsd->udp, nsd->ifs);
922 	server_close_all_sockets(nsd->tcp, nsd->ifs);
923 	/* CHILD: close command channel to parent */
924 	if(nsd->this_child && nsd->this_child->parent_fd != -1)
925 	{
926 		close(nsd->this_child->parent_fd);
927 		nsd->this_child->parent_fd = -1;
928 	}
929 	/* SERVER: close command channels to children */
930 	if(!nsd->this_child)
931 	{
932 		for(i=0; i < nsd->child_count; ++i)
933 			if(nsd->children[i].child_fd != -1)
934 			{
935 				close(nsd->children[i].child_fd);
936 				nsd->children[i].child_fd = -1;
937 			}
938 	}
939 
940 	tsig_finalize();
941 #ifdef HAVE_SSL
942 	daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */
943 #endif
944 
945 #if 0 /* OS collects memory pages */
946 	nsd_options_destroy(nsd->options);
947 	region_destroy(nsd->region);
948 #endif
949 	log_finalize();
950 	exit(0);
951 }
952 
953 void
954 server_prepare_xfrd(struct nsd* nsd)
955 {
956 	char tmpfile[256];
957 	/* create task mmaps */
958 	nsd->mytask = 0;
959 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0",
960 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
961 	nsd->task[0] = task_file_create(tmpfile);
962 	if(!nsd->task[0]) {
963 #ifdef USE_ZONE_STATS
964 		unlink(nsd->zonestatfname[0]);
965 		unlink(nsd->zonestatfname[1]);
966 #endif
967 		xfrd_del_tempdir(nsd);
968 		exit(1);
969 	}
970 	snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1",
971 		nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
972 	nsd->task[1] = task_file_create(tmpfile);
973 	if(!nsd->task[1]) {
974 		unlink(nsd->task[0]->fname);
975 #ifdef USE_ZONE_STATS
976 		unlink(nsd->zonestatfname[0]);
977 		unlink(nsd->zonestatfname[1]);
978 #endif
979 		xfrd_del_tempdir(nsd);
980 		exit(1);
981 	}
982 	assert(udb_base_get_userdata(nsd->task[0])->data == 0);
983 	assert(udb_base_get_userdata(nsd->task[1])->data == 0);
984 	/* create xfrd listener structure */
985 	nsd->xfrd_listener = region_alloc(nsd->region,
986 		sizeof(netio_handler_type));
987 	nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*)
988 		region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data));
989 	nsd->xfrd_listener->fd = -1;
990 	((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd =
991 		nsd;
992 	((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn =
993 		xfrd_tcp_create(nsd->region, QIOBUFSZ);
994 }
995 
996 
997 void
998 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active)
999 {
1000 	pid_t pid;
1001 	int sockets[2] = {0,0};
1002 	struct ipc_handler_conn_data *data;
1003 
1004 	if(nsd->xfrd_listener->fd != -1)
1005 		close(nsd->xfrd_listener->fd);
1006 	if(del_db) {
1007 		/* recreate taskdb that xfrd was using, it may be corrupt */
1008 		/* we (or reload) use nsd->mytask, and xfrd uses the other */
1009 		char* tmpfile = nsd->task[1-nsd->mytask]->fname;
1010 		nsd->task[1-nsd->mytask]->fname = NULL;
1011 		/* free alloc already, so udb does not shrink itself */
1012 		udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc);
1013 		nsd->task[1-nsd->mytask]->alloc = NULL;
1014 		udb_base_free(nsd->task[1-nsd->mytask]);
1015 		/* create new file, overwrite the old one */
1016 		nsd->task[1-nsd->mytask] = task_file_create(tmpfile);
1017 		free(tmpfile);
1018 	}
1019 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
1020 		log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
1021 		return;
1022 	}
1023 	pid = fork();
1024 	switch (pid) {
1025 	case -1:
1026 		log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
1027 		break;
1028 	default:
1029 		/* PARENT: close first socket, use second one */
1030 		close(sockets[0]);
1031 		if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) {
1032 			log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1033 		}
1034 		if(del_db) xfrd_free_namedb(nsd);
1035 		/* use other task than I am using, since if xfrd died and is
1036 		 * restarted, the reload is using nsd->mytask */
1037 		nsd->mytask = 1 - nsd->mytask;
1038 		xfrd_init(sockets[1], nsd, del_db, reload_active, pid);
1039 		/* ENOTREACH */
1040 		break;
1041 	case 0:
1042 		/* CHILD: close second socket, use first one */
1043 		close(sockets[1]);
1044 		if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) {
1045 			log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
1046 		}
1047 		nsd->xfrd_listener->fd = sockets[0];
1048 		break;
1049 	}
1050 	/* server-parent only */
1051 	nsd->xfrd_listener->timeout = NULL;
1052 	nsd->xfrd_listener->event_types = NETIO_EVENT_READ;
1053 	nsd->xfrd_listener->event_handler = parent_handle_xfrd_command;
1054 	/* clear ongoing ipc reads */
1055 	data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data;
1056 	data->conn->is_reading = 0;
1057 }
1058 
1059 /** add all soainfo to taskdb */
1060 static void
1061 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb)
1062 {
1063 	struct radnode* n;
1064 	udb_ptr task_last; /* last task, mytask is empty so NULL */
1065 	/* add all SOA INFO to mytask */
1066 	udb_ptr_init(&task_last, taskudb);
1067 	for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
1068 		task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0);
1069 	}
1070 	udb_ptr_unlink(&task_last, taskudb);
1071 }
1072 
1073 void
1074 server_send_soa_xfrd(struct nsd* nsd, int shortsoa)
1075 {
1076 	/* normally this exchanges the SOA from nsd->xfrd and the expire back.
1077 	 *   parent fills one taskdb with soas, xfrd fills other with expires.
1078 	 *   then they exchange and process.
1079 	 * shortsoa: xfrd crashes and needs to be restarted and one taskdb
1080 	 *   may be in use by reload.  Fill SOA in taskdb and give to xfrd.
1081 	 *   expire notifications can be sent back via a normal reload later
1082 	 *   (xfrd will wait for current running reload to finish if any).
1083 	 */
1084 	sig_atomic_t cmd = 0;
1085 	pid_t mypid;
1086 	int xfrd_sock = nsd->xfrd_listener->fd;
1087 	struct udb_base* taskudb = nsd->task[nsd->mytask];
1088 	udb_ptr t;
1089 	if(!shortsoa) {
1090 		if(nsd->signal_hint_shutdown) {
1091 		shutdown:
1092 			log_msg(LOG_WARNING, "signal received, shutting down...");
1093 			server_close_all_sockets(nsd->udp, nsd->ifs);
1094 			server_close_all_sockets(nsd->tcp, nsd->ifs);
1095 #ifdef HAVE_SSL
1096 			daemon_remote_close(nsd->rc);
1097 #endif
1098 			/* Unlink it if possible... */
1099 			unlinkpid(nsd->pidfile);
1100 			unlink(nsd->task[0]->fname);
1101 			unlink(nsd->task[1]->fname);
1102 #ifdef USE_ZONE_STATS
1103 			unlink(nsd->zonestatfname[0]);
1104 			unlink(nsd->zonestatfname[1]);
1105 #endif
1106 			/* write the nsd.db to disk, wait for it to complete */
1107 			udb_base_sync(nsd->db->udb, 1);
1108 			udb_base_close(nsd->db->udb);
1109 			server_shutdown(nsd);
1110 			exit(0);
1111 		}
1112 	}
1113 	if(shortsoa) {
1114 		/* put SOA in xfrd task because mytask may be in use */
1115 		taskudb = nsd->task[1-nsd->mytask];
1116 	}
1117 
1118 	add_all_soa_to_task(nsd, taskudb);
1119 	if(!shortsoa) {
1120 		/* wait for xfrd to signal task is ready, RELOAD signal */
1121 		if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) ||
1122 			cmd != NSD_RELOAD) {
1123 			log_msg(LOG_ERR, "did not get start signal from xfrd");
1124 			exit(1);
1125 		}
1126 		if(nsd->signal_hint_shutdown) {
1127 			goto shutdown;
1128 		}
1129 	}
1130 	/* give xfrd our task, signal it with RELOAD_DONE */
1131 	task_process_sync(taskudb);
1132 	cmd = NSD_RELOAD_DONE;
1133 	if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
1134 		log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1135 			(int)nsd->pid, strerror(errno));
1136 	}
1137 	mypid = getpid();
1138 	if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
1139 		log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1140 			strerror(errno));
1141 	}
1142 
1143 	if(!shortsoa) {
1144 		/* process the xfrd task works (expiry data) */
1145 		nsd->mytask = 1 - nsd->mytask;
1146 		taskudb = nsd->task[nsd->mytask];
1147 		task_remap(taskudb);
1148 		udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
1149 		while(!udb_ptr_is_null(&t)) {
1150 			task_process_expire(nsd->db, TASKLIST(&t));
1151 			udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next);
1152 		}
1153 		udb_ptr_unlink(&t, taskudb);
1154 		task_clear(taskudb);
1155 
1156 		/* tell xfrd that the task is emptied, signal with RELOAD_DONE */
1157 		cmd = NSD_RELOAD_DONE;
1158 		if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
1159 			log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
1160 				(int)nsd->pid, strerror(errno));
1161 		}
1162 	}
1163 }
1164 
1165 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
1166 ssize_t
1167 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
1168 {
1169 	uint8_t* buf = (uint8_t*) p;
1170 	ssize_t total = 0;
1171 	fd_set rfds;
1172 	struct timeval tv;
1173 	FD_ZERO(&rfds);
1174 
1175 	while( total < sz) {
1176 		ssize_t ret;
1177 		FD_SET(s, &rfds);
1178 		tv.tv_sec = timeout;
1179 		tv.tv_usec = 0;
1180 		ret = select(s+1, &rfds, NULL, NULL, timeout==-1?NULL:&tv);
1181 		if(ret == -1) {
1182 			if(errno == EAGAIN)
1183 				/* blocking read */
1184 				continue;
1185 			if(errno == EINTR) {
1186 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
1187 					return -1;
1188 				/* other signals can be handled later */
1189 				continue;
1190 			}
1191 			/* some error */
1192 			return -1;
1193 		}
1194 		if(ret == 0) {
1195 			/* operation timed out */
1196 			return -2;
1197 		}
1198 		ret = read(s, buf+total, sz-total);
1199 		if(ret == -1) {
1200 			if(errno == EAGAIN)
1201 				/* blocking read */
1202 				continue;
1203 			if(errno == EINTR) {
1204 				if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
1205 					return -1;
1206 				/* other signals can be handled later */
1207 				continue;
1208 			}
1209 			/* some error */
1210 			return -1;
1211 		}
1212 		if(ret == 0) {
1213 			/* closed connection! */
1214 			return 0;
1215 		}
1216 		total += ret;
1217 	}
1218 	return total;
1219 }
1220 
1221 static void
1222 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket)
1223 {
1224 	sig_atomic_t cmd = NSD_QUIT_SYNC;
1225 	udb_ptr t, next;
1226 	udb_base* u = nsd->task[nsd->mytask];
1227 	udb_ptr_init(&next, u);
1228 	udb_ptr_new(&t, u, udb_base_get_userdata(u));
1229 	udb_base_set_userdata(u, 0);
1230 	while(!udb_ptr_is_null(&t)) {
1231 		/* store next in list so this one can be deleted or reused */
1232 		udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next);
1233 		udb_rptr_zero(&TASKLIST(&t)->next, u);
1234 
1235 		/* process task t */
1236 		/* append results for task t and update last_task */
1237 		task_process_in_reload(nsd, u, last_task, &t);
1238 
1239 		/* go to next */
1240 		udb_ptr_set_ptr(&t, u, &next);
1241 
1242 		/* if the parent has quit, we must quit too, poll the fd for cmds */
1243 		if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
1244 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
1245 			if(cmd == NSD_QUIT) {
1246 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
1247 				/* sync to disk (if needed) */
1248 				udb_base_sync(nsd->db->udb, 0);
1249 				/* unlink files of remainder of tasks */
1250 				while(!udb_ptr_is_null(&t)) {
1251 					if(TASKLIST(&t)->task_type == task_apply_xfr) {
1252 						xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno);
1253 					}
1254 					udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next);
1255 				}
1256 				udb_ptr_unlink(&t, u);
1257 				udb_ptr_unlink(&next, u);
1258 				exit(0);
1259 			}
1260 		}
1261 
1262 	}
1263 	udb_ptr_unlink(&t, u);
1264 	udb_ptr_unlink(&next, u);
1265 }
1266 
1267 #ifdef BIND8_STATS
1268 static void
1269 parent_send_stats(struct nsd* nsd, int cmdfd)
1270 {
1271 	size_t i;
1272 	if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) {
1273 		log_msg(LOG_ERR, "could not write stats to reload");
1274 		return;
1275 	}
1276 	for(i=0; i<nsd->child_count; i++)
1277 		if(!write_socket(cmdfd, &nsd->children[i].query_count,
1278 			sizeof(stc_t))) {
1279 			log_msg(LOG_ERR, "could not write stats to reload");
1280 			return;
1281 		}
1282 }
1283 
1284 static void
1285 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last)
1286 {
1287 	struct nsdst s;
1288 	stc_t* p;
1289 	size_t i;
1290 	if(block_read(nsd, cmdfd, &s, sizeof(s),
1291 		RELOAD_SYNC_TIMEOUT) != sizeof(s)) {
1292 		log_msg(LOG_ERR, "could not read stats from oldpar");
1293 		return;
1294 	}
1295 	s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0);
1296 	s.db_mem = region_get_mem(nsd->db->region);
1297 	p = (stc_t*)task_new_stat_info(nsd->task[nsd->mytask], last, &s,
1298 		nsd->child_count);
1299 	if(!p) return;
1300 	for(i=0; i<nsd->child_count; i++) {
1301 		if(block_read(nsd, cmdfd, p++, sizeof(stc_t), 1)!=sizeof(stc_t))
1302 			return;
1303 	}
1304 }
1305 #endif /* BIND8_STATS */
1306 
1307 /*
1308  * Reload the database, stop parent, re-fork children and continue.
1309  * as server_main.
1310  */
1311 static void
1312 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
1313 	int cmdsocket)
1314 {
1315 	pid_t mypid;
1316 	sig_atomic_t cmd = NSD_QUIT_SYNC;
1317 	int ret;
1318 	udb_ptr last_task;
1319 	struct sigaction old_sigchld, ign_sigchld;
1320 	/* ignore SIGCHLD from the previous server_main that used this pid */
1321 	memset(&ign_sigchld, 0, sizeof(ign_sigchld));
1322 	ign_sigchld.sa_handler = SIG_IGN;
1323 	sigaction(SIGCHLD, &ign_sigchld, &old_sigchld);
1324 
1325 	/* see what tasks we got from xfrd */
1326 	task_remap(nsd->task[nsd->mytask]);
1327 	udb_ptr_init(&last_task, nsd->task[nsd->mytask]);
1328 	udb_compact_inhibited(nsd->db->udb, 1);
1329 	reload_process_tasks(nsd, &last_task, cmdsocket);
1330 	udb_compact_inhibited(nsd->db->udb, 0);
1331 	udb_compact(nsd->db->udb);
1332 
1333 #ifndef NDEBUG
1334 	if(nsd_debug_level >= 1)
1335 		region_log_stats(nsd->db->region);
1336 #endif /* NDEBUG */
1337 	/* sync to disk (if needed) */
1338 	udb_base_sync(nsd->db->udb, 0);
1339 
1340 	initialize_dname_compression_tables(nsd);
1341 
1342 #ifdef BIND8_STATS
1343 	/* Restart dumping stats if required.  */
1344 	time(&nsd->st.boot);
1345 	set_bind8_alarm(nsd);
1346 #endif
1347 #ifdef USE_ZONE_STATS
1348 	server_zonestat_realloc(nsd); /* realloc for new children */
1349 	server_zonestat_switch(nsd);
1350 #endif
1351 
1352 	/* listen for the signals of failed children again */
1353 	sigaction(SIGCHLD, &old_sigchld, NULL);
1354 	/* Start new child processes */
1355 	if (server_start_children(nsd, server_region, netio, &nsd->
1356 		xfrd_listener->fd) != 0) {
1357 		send_children_quit(nsd);
1358 		exit(1);
1359 	}
1360 
1361 	/* if the parent has quit, we must quit too, poll the fd for cmds */
1362 	if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
1363 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
1364 		if(cmd == NSD_QUIT) {
1365 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
1366 			send_children_quit(nsd);
1367 			exit(0);
1368 		}
1369 	}
1370 
1371 	/* Send quit command to parent: blocking, wait for receipt. */
1372 	do {
1373 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
1374 		if (!write_socket(cmdsocket, &cmd, sizeof(cmd)))
1375 		{
1376 			log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s",
1377 				strerror(errno));
1378 		}
1379 		/* blocking: wait for parent to really quit. (it sends RELOAD as ack) */
1380 		DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main"));
1381 		ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd),
1382 			RELOAD_SYNC_TIMEOUT);
1383 		if(ret == -2) {
1384 			DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry"));
1385 		}
1386 	} while (ret == -2);
1387 	if(ret == -1) {
1388 		log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s",
1389 			strerror(errno));
1390 	}
1391 	DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd));
1392 	if(cmd == NSD_QUIT) {
1393 		/* small race condition possible here, parent got quit cmd. */
1394 		send_children_quit(nsd);
1395 		exit(1);
1396 	}
1397 	assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD);
1398 #ifdef BIND8_STATS
1399 	reload_do_stats(cmdsocket, nsd, &last_task);
1400 #endif
1401 	udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
1402 	task_process_sync(nsd->task[nsd->mytask]);
1403 #ifdef USE_ZONE_STATS
1404 	server_zonestat_realloc(nsd); /* realloc for next children */
1405 #endif
1406 
1407 	/* send soainfo to the xfrd process, signal it that reload is done,
1408 	 * it picks up the taskudb */
1409 	cmd = NSD_RELOAD_DONE;
1410 	if(!write_socket(nsd->xfrd_listener->fd, &cmd,  sizeof(cmd))) {
1411 		log_msg(LOG_ERR, "problems sending reload_done xfrd: %s",
1412 			strerror(errno));
1413 	}
1414 	mypid = getpid();
1415 	if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
1416 		log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1417 			strerror(errno));
1418 	}
1419 
1420 	/* try to reopen file */
1421 	if (nsd->file_rotation_ok)
1422 		log_reopen(nsd->log_filename, 1);
1423 	/* exit reload, continue as new server_main */
1424 }
1425 
1426 /*
1427  * Get the mode depending on the signal hints that have been received.
1428  * Multiple signal hints can be received and will be handled in turn.
1429  */
1430 static sig_atomic_t
1431 server_signal_mode(struct nsd *nsd)
1432 {
1433 	if(nsd->signal_hint_quit) {
1434 		nsd->signal_hint_quit = 0;
1435 		return NSD_QUIT;
1436 	}
1437 	else if(nsd->signal_hint_shutdown) {
1438 		nsd->signal_hint_shutdown = 0;
1439 		return NSD_SHUTDOWN;
1440 	}
1441 	else if(nsd->signal_hint_child) {
1442 		nsd->signal_hint_child = 0;
1443 		return NSD_REAP_CHILDREN;
1444 	}
1445 	else if(nsd->signal_hint_reload) {
1446 		nsd->signal_hint_reload = 0;
1447 		return NSD_RELOAD;
1448 	}
1449 	else if(nsd->signal_hint_reload_hup) {
1450 		nsd->signal_hint_reload_hup = 0;
1451 		return NSD_RELOAD_REQ;
1452 	}
1453 	else if(nsd->signal_hint_stats) {
1454 		nsd->signal_hint_stats = 0;
1455 #ifdef BIND8_STATS
1456 		set_bind8_alarm(nsd);
1457 #endif
1458 		return NSD_STATS;
1459 	}
1460 	else if(nsd->signal_hint_statsusr) {
1461 		nsd->signal_hint_statsusr = 0;
1462 		return NSD_STATS;
1463 	}
1464 	return NSD_RUN;
1465 }
1466 
1467 /*
1468  * The main server simply waits for signals and child processes to
1469  * terminate.  Child processes are restarted as necessary.
1470  */
1471 void
1472 server_main(struct nsd *nsd)
1473 {
1474 	region_type *server_region = region_create(xalloc, free);
1475 	netio_type *netio = netio_create(server_region);
1476 	netio_handler_type reload_listener;
1477 	int reload_sockets[2] = {-1, -1};
1478 	struct timespec timeout_spec;
1479 	int status;
1480 	pid_t child_pid;
1481 	pid_t reload_pid = -1;
1482 	sig_atomic_t mode;
1483 
1484 	/* Ensure we are the main process */
1485 	assert(nsd->server_kind == NSD_SERVER_MAIN);
1486 
1487 	/* Add listener for the XFRD process */
1488 	netio_add_handler(netio, nsd->xfrd_listener);
1489 
1490 	/* Start the child processes that handle incoming queries */
1491 	if (server_start_children(nsd, server_region, netio,
1492 		&nsd->xfrd_listener->fd) != 0) {
1493 		send_children_quit(nsd);
1494 		exit(1);
1495 	}
1496 	reload_listener.fd = -1;
1497 
1498 	/* This_child MUST be 0, because this is the parent process */
1499 	assert(nsd->this_child == 0);
1500 
1501 	/* Run the server until we get a shutdown signal */
1502 	while ((mode = nsd->mode) != NSD_SHUTDOWN) {
1503 		/* Did we receive a signal that changes our mode? */
1504 		if(mode == NSD_RUN) {
1505 			nsd->mode = mode = server_signal_mode(nsd);
1506 		}
1507 
1508 		switch (mode) {
1509 		case NSD_RUN:
1510 			/* see if any child processes terminated */
1511 			while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) {
1512 				int is_child = delete_child_pid(nsd, child_pid);
1513 				if (is_child != -1 && nsd->children[is_child].need_to_exit) {
1514 					if(nsd->children[is_child].child_fd == -1)
1515 						nsd->children[is_child].has_exited = 1;
1516 					parent_check_all_children_exited(nsd);
1517 				} else if(is_child != -1) {
1518 					log_msg(LOG_WARNING,
1519 					       "server %d died unexpectedly with status %d, restarting",
1520 					       (int) child_pid, status);
1521 					restart_child_servers(nsd, server_region, netio,
1522 						&nsd->xfrd_listener->fd);
1523 				} else if (child_pid == reload_pid) {
1524 					sig_atomic_t cmd = NSD_RELOAD_DONE;
1525 					pid_t mypid;
1526 					log_msg(LOG_WARNING,
1527 					       "Reload process %d failed with status %d, continuing with old database",
1528 					       (int) child_pid, status);
1529 					reload_pid = -1;
1530 					if(reload_listener.fd != -1) close(reload_listener.fd);
1531 					reload_listener.fd = -1;
1532 					reload_listener.event_types = NETIO_EVENT_NONE;
1533 					task_process_sync(nsd->task[nsd->mytask]);
1534 					/* inform xfrd reload attempt ended */
1535 					if(!write_socket(nsd->xfrd_listener->fd,
1536 						&cmd, sizeof(cmd))) {
1537 						log_msg(LOG_ERR, "problems "
1538 						  "sending SOAEND to xfrd: %s",
1539 						  strerror(errno));
1540 					}
1541 					mypid = getpid();
1542 					if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
1543 						log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1544 							strerror(errno));
1545 					}
1546 				} else if(status != 0) {
1547 					/* check for status, because we get
1548 					 * the old-servermain because reload
1549 					 * is the process-parent of old-main,
1550 					 * and we get older server-processes
1551 					 * that are exiting after a reload */
1552 					log_msg(LOG_WARNING,
1553 					       "process %d terminated with status %d",
1554 					       (int) child_pid, status);
1555 				}
1556 			}
1557 			if (child_pid == -1) {
1558 				if (errno == EINTR) {
1559 					continue;
1560 				}
1561 				if (errno != ECHILD)
1562 					log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
1563 			}
1564 			if (nsd->mode != NSD_RUN)
1565 				break;
1566 
1567 			/* timeout to collect processes. In case no sigchild happens. */
1568 			timeout_spec.tv_sec = 60;
1569 			timeout_spec.tv_nsec = 0;
1570 
1571 			/* listen on ports, timeout for collecting terminated children */
1572 			if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
1573 				if (errno != EINTR) {
1574 					log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
1575 				}
1576 			}
1577 			if(nsd->restart_children) {
1578 				restart_child_servers(nsd, server_region, netio,
1579 					&nsd->xfrd_listener->fd);
1580 				nsd->restart_children = 0;
1581 			}
1582 			if(nsd->reload_failed) {
1583 				sig_atomic_t cmd = NSD_RELOAD_DONE;
1584 				pid_t mypid;
1585 				nsd->reload_failed = 0;
1586 				log_msg(LOG_WARNING,
1587 				       "Reload process %d failed, continuing with old database",
1588 				       (int) reload_pid);
1589 				reload_pid = -1;
1590 				if(reload_listener.fd != -1) close(reload_listener.fd);
1591 				reload_listener.fd = -1;
1592 				reload_listener.event_types = NETIO_EVENT_NONE;
1593 				task_process_sync(nsd->task[nsd->mytask]);
1594 				/* inform xfrd reload attempt ended */
1595 				if(!write_socket(nsd->xfrd_listener->fd,
1596 					&cmd, sizeof(cmd))) {
1597 					log_msg(LOG_ERR, "problems "
1598 					  "sending SOAEND to xfrd: %s",
1599 					  strerror(errno));
1600 				}
1601 				mypid = getpid();
1602 				if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
1603 					log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
1604 						strerror(errno));
1605 				}
1606 			}
1607 
1608 			break;
1609 		case NSD_RELOAD_REQ: {
1610 			sig_atomic_t cmd = NSD_RELOAD_REQ;
1611 			log_msg(LOG_WARNING, "SIGHUP received, reloading...");
1612 			DEBUG(DEBUG_IPC,1, (LOG_INFO,
1613 				"main: ipc send reload_req to xfrd"));
1614 			if(!write_socket(nsd->xfrd_listener->fd,
1615 				&cmd, sizeof(cmd))) {
1616 				log_msg(LOG_ERR, "server_main: could not send "
1617 				"reload_req to xfrd: %s", strerror(errno));
1618 			}
1619 			nsd->mode = NSD_RUN;
1620 			} break;
1621 		case NSD_RELOAD:
1622 			/* Continue to run nsd after reload */
1623 			nsd->mode = NSD_RUN;
1624 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading..."));
1625 			if (reload_pid != -1) {
1626 				log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
1627 				       (int) reload_pid);
1628 				break;
1629 			}
1630 
1631 			/* switch the mytask to keep track of who owns task*/
1632 			nsd->mytask = 1 - nsd->mytask;
1633 			if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
1634 				log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
1635 				reload_pid = -1;
1636 				break;
1637 			}
1638 
1639 			/* Do actual reload */
1640 			reload_pid = fork();
1641 			switch (reload_pid) {
1642 			case -1:
1643 				log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
1644 				break;
1645 			default:
1646 				/* PARENT */
1647 				close(reload_sockets[0]);
1648 				server_reload(nsd, server_region, netio,
1649 					reload_sockets[1]);
1650 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
1651 				close(reload_sockets[1]);
1652 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
1653 				/* drop stale xfrd ipc data */
1654 				((struct ipc_handler_conn_data*)nsd->
1655 					xfrd_listener->user_data)
1656 					->conn->is_reading = 0;
1657 				reload_pid = -1;
1658 				reload_listener.fd = -1;
1659 				reload_listener.event_types = NETIO_EVENT_NONE;
1660 				DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
1661 				break;
1662 			case 0:
1663 				/* CHILD */
1664 				/* server_main keep running until NSD_QUIT_SYNC
1665 				 * received from reload. */
1666 				close(reload_sockets[1]);
1667 				reload_listener.fd = reload_sockets[0];
1668 				reload_listener.timeout = NULL;
1669 				reload_listener.user_data = nsd;
1670 				reload_listener.event_types = NETIO_EVENT_READ;
1671 				reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
1672 				netio_add_handler(netio, &reload_listener);
1673 				reload_pid = getppid();
1674 				break;
1675 			}
1676 			break;
1677 		case NSD_QUIT_SYNC:
1678 			/* synchronisation of xfrd, parent and reload */
1679 			if(!nsd->quit_sync_done && reload_listener.fd != -1) {
1680 				sig_atomic_t cmd = NSD_RELOAD;
1681 				/* stop xfrd ipc writes in progress */
1682 				DEBUG(DEBUG_IPC,1, (LOG_INFO,
1683 					"main: ipc send indication reload"));
1684 				if(!write_socket(nsd->xfrd_listener->fd,
1685 					&cmd, sizeof(cmd))) {
1686 					log_msg(LOG_ERR, "server_main: could not send reload "
1687 					"indication to xfrd: %s", strerror(errno));
1688 				}
1689 				/* wait for ACK from xfrd */
1690 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
1691 				nsd->quit_sync_done = 1;
1692 			}
1693 			nsd->mode = NSD_RUN;
1694 			break;
1695 		case NSD_QUIT:
1696 			/* silent shutdown during reload */
1697 			if(reload_listener.fd != -1) {
1698 				/* acknowledge the quit, to sync reload that we will really quit now */
1699 				sig_atomic_t cmd = NSD_RELOAD;
1700 				DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
1701 				if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
1702 					log_msg(LOG_ERR, "server_main: "
1703 						"could not ack quit: %s", strerror(errno));
1704 				}
1705 #ifdef BIND8_STATS
1706 				parent_send_stats(nsd, reload_listener.fd);
1707 #endif /* BIND8_STATS */
1708 				close(reload_listener.fd);
1709 			}
1710 			DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence"));
1711 			/* only quit children after xfrd has acked */
1712 			send_children_quit(nsd);
1713 
1714 #if 0 /* OS collects memory pages */
1715 			region_destroy(server_region);
1716 #endif
1717 			server_shutdown(nsd);
1718 
1719 			/* ENOTREACH */
1720 			break;
1721 		case NSD_SHUTDOWN:
1722 			break;
1723 		case NSD_REAP_CHILDREN:
1724 			/* continue; wait for child in run loop */
1725 			nsd->mode = NSD_RUN;
1726 			break;
1727 		case NSD_STATS:
1728 #ifdef BIND8_STATS
1729 			set_children_stats(nsd);
1730 #endif
1731 			nsd->mode = NSD_RUN;
1732 			break;
1733 		default:
1734 			log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode);
1735 			nsd->mode = NSD_RUN;
1736 			break;
1737 		}
1738 	}
1739 	log_msg(LOG_WARNING, "signal received, shutting down...");
1740 
1741 	/* close opened ports to avoid race with restart of nsd */
1742 	server_close_all_sockets(nsd->udp, nsd->ifs);
1743 	server_close_all_sockets(nsd->tcp, nsd->ifs);
1744 #ifdef HAVE_SSL
1745 	daemon_remote_close(nsd->rc);
1746 #endif
1747 	send_children_quit_and_wait(nsd);
1748 
1749 	/* Unlink it if possible... */
1750 	unlinkpid(nsd->pidfile);
1751 	unlink(nsd->task[0]->fname);
1752 	unlink(nsd->task[1]->fname);
1753 #ifdef USE_ZONE_STATS
1754 	unlink(nsd->zonestatfname[0]);
1755 	unlink(nsd->zonestatfname[1]);
1756 #endif
1757 
1758 	if(reload_listener.fd != -1) {
1759 		sig_atomic_t cmd = NSD_QUIT;
1760 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
1761 			"main: ipc send quit to reload-process"));
1762 		if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
1763 			log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
1764 				strerror(errno));
1765 		}
1766 		fsync(reload_listener.fd);
1767 		close(reload_listener.fd);
1768 		/* wait for reload to finish processing */
1769 		while(1) {
1770 			if(waitpid(reload_pid, NULL, 0) == -1) {
1771 				if(errno == EINTR) continue;
1772 				if(errno == ECHILD) break;
1773 				log_msg(LOG_ERR, "waitpid(reload %d): %s",
1774 					(int)reload_pid, strerror(errno));
1775 			}
1776 			break;
1777 		}
1778 	}
1779 	if(nsd->xfrd_listener->fd != -1) {
1780 		/* complete quit, stop xfrd */
1781 		sig_atomic_t cmd = NSD_QUIT;
1782 		DEBUG(DEBUG_IPC,1, (LOG_INFO,
1783 			"main: ipc send quit to xfrd"));
1784 		if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
1785 			log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
1786 				strerror(errno));
1787 		}
1788 		fsync(nsd->xfrd_listener->fd);
1789 		close(nsd->xfrd_listener->fd);
1790 		(void)kill(nsd->pid, SIGTERM);
1791 	}
1792 
1793 #if 0 /* OS collects memory pages */
1794 	region_destroy(server_region);
1795 #endif
1796 	/* write the nsd.db to disk, wait for it to complete */
1797 	udb_base_sync(nsd->db->udb, 1);
1798 	udb_base_close(nsd->db->udb);
1799 	server_shutdown(nsd);
1800 }
1801 
1802 static query_state_type
1803 server_process_query(struct nsd *nsd, struct query *query)
1804 {
1805 	return query_process(query, nsd);
1806 }
1807 
1808 static query_state_type
1809 server_process_query_udp(struct nsd *nsd, struct query *query)
1810 {
1811 #ifdef RATELIMIT
1812 	if(query_process(query, nsd) != QUERY_DISCARDED) {
1813 		if(rrl_process_query(query))
1814 			return rrl_slip(query);
1815 		else	return QUERY_PROCESSED;
1816 	}
1817 	return QUERY_DISCARDED;
1818 #else
1819 	return query_process(query, nsd);
1820 #endif
1821 }
1822 
1823 struct event_base*
1824 nsd_child_event_base(void)
1825 {
1826 	struct event_base* base;
1827 #ifdef USE_MINI_EVENT
1828 	static time_t secs;
1829 	static struct timeval now;
1830 	base = event_init(&secs, &now);
1831 #else
1832 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
1833 	/* libev */
1834 	base = (struct event_base *)ev_default_loop(EVFLAG_AUTO);
1835 #  else
1836 	/* libevent */
1837 #    ifdef HAVE_EVENT_BASE_NEW
1838 	base = event_base_new();
1839 #    else
1840 	base = event_init();
1841 #    endif
1842 #  endif
1843 #endif
1844 	return base;
1845 }
1846 
1847 /*
1848  * Serve DNS requests.
1849  */
1850 void
1851 server_child(struct nsd *nsd)
1852 {
1853 	size_t i;
1854 	region_type *server_region = region_create(xalloc, free);
1855 	struct event_base* event_base = nsd_child_event_base();
1856 	query_type *udp_query;
1857 	sig_atomic_t mode;
1858 
1859 	if(!event_base) {
1860 		log_msg(LOG_ERR, "nsd server could not create event base");
1861 		exit(1);
1862 	}
1863 
1864 #ifdef RATELIMIT
1865 	rrl_init((nsd->this_child - nsd->children)/sizeof(nsd->children[0]));
1866 #endif
1867 
1868 	assert(nsd->server_kind != NSD_SERVER_MAIN);
1869 	DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
1870 
1871 	if (!(nsd->server_kind & NSD_SERVER_TCP)) {
1872 		server_close_all_sockets(nsd->tcp, nsd->ifs);
1873 	}
1874 	if (!(nsd->server_kind & NSD_SERVER_UDP)) {
1875 		server_close_all_sockets(nsd->udp, nsd->ifs);
1876 	}
1877 
1878 	if (nsd->this_child && nsd->this_child->parent_fd != -1) {
1879 		struct event *handler;
1880 		struct ipc_handler_conn_data* user_data =
1881 			(struct ipc_handler_conn_data*)region_alloc(
1882 			server_region, sizeof(struct ipc_handler_conn_data));
1883 		user_data->nsd = nsd;
1884 		user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ);
1885 
1886 		handler = (struct event*) region_alloc(
1887 			server_region, sizeof(*handler));
1888 		event_set(handler, nsd->this_child->parent_fd, EV_PERSIST|
1889 			EV_READ, child_handle_parent_command, user_data);
1890 		if(event_base_set(event_base, handler) != 0)
1891 			log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed");
1892 		if(event_add(handler, NULL) != 0)
1893 			log_msg(LOG_ERR, "nsd ipcchild: event_add failed");
1894 	}
1895 
1896 	if (nsd->server_kind & NSD_SERVER_UDP) {
1897 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG))
1898 		udp_query = query_create(server_region,
1899 			compressed_dname_offsets, compression_table_size);
1900 #else
1901 		udp_query = NULL;
1902 		memset(msgs, 0, sizeof(msgs));
1903 		for (i = 0; i < NUM_RECV_PER_SELECT; i++) {
1904 			queries[i] = query_create(server_region,
1905 				compressed_dname_offsets, compression_table_size);
1906 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
1907 			iovecs[i].iov_base          = buffer_begin(queries[i]->packet);
1908 			iovecs[i].iov_len           = buffer_remaining(queries[i]->packet);;
1909 			msgs[i].msg_hdr.msg_iov     = &iovecs[i];
1910 			msgs[i].msg_hdr.msg_iovlen  = 1;
1911 			msgs[i].msg_hdr.msg_name    = &queries[i]->addr;
1912 			msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
1913 		}
1914 #endif
1915 		for (i = 0; i < nsd->ifs; ++i) {
1916 			struct udp_handler_data *data;
1917 			struct event *handler;
1918 
1919 			data = (struct udp_handler_data *) region_alloc(
1920 				server_region,
1921 				sizeof(struct udp_handler_data));
1922 			data->query = udp_query;
1923 			data->nsd = nsd;
1924 			data->socket = &nsd->udp[i];
1925 
1926 			handler = (struct event*) region_alloc(
1927 				server_region, sizeof(*handler));
1928 			event_set(handler, nsd->udp[i].s, EV_PERSIST|EV_READ,
1929 				handle_udp, data);
1930 			if(event_base_set(event_base, handler) != 0)
1931 				log_msg(LOG_ERR, "nsd udp: event_base_set failed");
1932 			if(event_add(handler, NULL) != 0)
1933 				log_msg(LOG_ERR, "nsd udp: event_add failed");
1934 		}
1935 	}
1936 
1937 	/*
1938 	 * Keep track of all the TCP accept handlers so we can enable
1939 	 * and disable them based on the current number of active TCP
1940 	 * connections.
1941 	 */
1942 	tcp_accept_handler_count = nsd->ifs;
1943 	tcp_accept_handlers = (struct tcp_accept_handler_data*)
1944 		region_alloc_array(server_region,
1945 		nsd->ifs, sizeof(*tcp_accept_handlers));
1946 	if (nsd->server_kind & NSD_SERVER_TCP) {
1947 		for (i = 0; i < nsd->ifs; ++i) {
1948 			struct event *handler = &tcp_accept_handlers[i].event;
1949 			struct tcp_accept_handler_data* data =
1950 				&tcp_accept_handlers[i];
1951 			data->nsd = nsd;
1952 			data->socket = &nsd->tcp[i];
1953 			event_set(handler, nsd->tcp[i].s, EV_PERSIST|EV_READ,
1954 				handle_tcp_accept, data);
1955 			if(event_base_set(event_base, handler) != 0)
1956 				log_msg(LOG_ERR, "nsd tcp: event_base_set failed");
1957 			if(event_add(handler, NULL) != 0)
1958 				log_msg(LOG_ERR, "nsd tcp: event_add failed");
1959 			data->event_added = 1;
1960 		}
1961 	} else tcp_accept_handler_count = 0;
1962 
1963 	/* The main loop... */
1964 	while ((mode = nsd->mode) != NSD_QUIT) {
1965 		if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
1966 
1967 		/* Do we need to do the statistics... */
1968 		if (mode == NSD_STATS) {
1969 #ifdef BIND8_STATS
1970 			int p = nsd->st.period;
1971 			nsd->st.period = 1; /* force stats printout */
1972 			/* Dump the statistics */
1973 			bind8_stats(nsd);
1974 			nsd->st.period = p;
1975 #else /* !BIND8_STATS */
1976 			log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
1977 #endif /* BIND8_STATS */
1978 
1979 			nsd->mode = NSD_RUN;
1980 		}
1981 		else if (mode == NSD_REAP_CHILDREN) {
1982 			/* got signal, notify parent. parent reaps terminated children. */
1983 			if (nsd->this_child->parent_fd != -1) {
1984 				sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
1985 				if (write(nsd->this_child->parent_fd,
1986 				    &parent_notify,
1987 				    sizeof(parent_notify)) == -1)
1988 				{
1989 					log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
1990 						(int) nsd->this_child->pid, strerror(errno));
1991 				}
1992 			} else /* no parent, so reap 'em */
1993 				while (waitpid(-1, NULL, WNOHANG) > 0) ;
1994 			nsd->mode = NSD_RUN;
1995 		}
1996 		else if(mode == NSD_RUN) {
1997 			/* Wait for a query... */
1998 			if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
1999 				if (errno != EINTR) {
2000 					log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
2001 					break;
2002 				}
2003 			}
2004 		} else if(mode == NSD_QUIT) {
2005 			/* ignore here, quit */
2006 		} else {
2007 			log_msg(LOG_ERR, "mode bad value %d, back to service.",
2008 				(int)mode);
2009 			nsd->mode = NSD_RUN;
2010 		}
2011 	}
2012 
2013 #ifdef	BIND8_STATS
2014 	bind8_stats(nsd);
2015 #endif /* BIND8_STATS */
2016 
2017 #if 0 /* OS collects memory pages */
2018 	event_base_free(event_base);
2019 	region_destroy(server_region);
2020 #endif
2021 	server_shutdown(nsd);
2022 }
2023 
2024 #if defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG)
2025 static void
2026 handle_udp(int fd, short event, void* arg)
2027 {
2028 	struct udp_handler_data *data = (struct udp_handler_data *) arg;
2029 	int received, sent, recvcount, i;
2030 	struct query *q;
2031 
2032 	if (!(event & EV_READ)) {
2033 		return;
2034 	}
2035 	recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
2036 	/* this printf strangely gave a performance increase on Linux */
2037 	/* printf("recvcount %d \n", recvcount); */
2038 	if (recvcount == -1) {
2039 		if (errno != EAGAIN && errno != EINTR) {
2040 			log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
2041 			STATUP(data->nsd, rxerr);
2042 			/* No zone statup */
2043 		}
2044 		/* Simply no data available */
2045 		return;
2046 	}
2047 	for (i = 0; i < recvcount; i++) {
2048 	loopstart:
2049 		received = msgs[i].msg_len;
2050 		q = queries[i];
2051 		if (received == -1) {
2052 			log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror(
2053 				msgs[i].msg_hdr.msg_flags));
2054 			STATUP(data->nsd, rxerr);
2055 			/* No zone statup */
2056 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2057 			iovecs[i].iov_len = buffer_remaining(q->packet);
2058 			goto swap_drop;
2059 		}
2060 
2061 		/* Account... */
2062 #ifdef BIND8_STATS
2063 		if (data->socket->addr->ai_family == AF_INET) {
2064 			STATUP(data->nsd, qudp);
2065 		} else if (data->socket->addr->ai_family == AF_INET6) {
2066 			STATUP(data->nsd, qudp6);
2067 		}
2068 #endif
2069 
2070 		buffer_skip(q->packet, received);
2071 		buffer_flip(q->packet);
2072 
2073 		/* Process and answer the query... */
2074 		if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) {
2075 			if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
2076 				STATUP(data->nsd, nona);
2077 				ZTATUP(data->nsd, q->zone, nona);
2078 			}
2079 
2080 #ifdef USE_ZONE_STATS
2081 			if (data->socket->addr->ai_family == AF_INET) {
2082 				ZTATUP(data->nsd, q->zone, qudp);
2083 			} else if (data->socket->addr->ai_family == AF_INET6) {
2084 				ZTATUP(data->nsd, q->zone, qudp6);
2085 			}
2086 #endif
2087 
2088 			/* Add EDNS0 and TSIG info if necessary.  */
2089 			query_add_optional(q, data->nsd);
2090 
2091 			buffer_flip(q->packet);
2092 			iovecs[i].iov_len = buffer_remaining(q->packet);
2093 #ifdef BIND8_STATS
2094 			/* Account the rcode & TC... */
2095 			STATUP2(data->nsd, rcode, RCODE(q->packet));
2096 			ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
2097 			if (TC(q->packet)) {
2098 				STATUP(data->nsd, truncated);
2099 				ZTATUP(data->nsd, q->zone, truncated);
2100 			}
2101 #endif /* BIND8_STATS */
2102 		} else {
2103 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2104 			iovecs[i].iov_len = buffer_remaining(q->packet);
2105 		swap_drop:
2106 			STATUP(data->nsd, dropped);
2107 			ZTATUP(data->nsd, q->zone, dropped);
2108 			if(i != recvcount-1) {
2109 				/* swap with last and decrease recvcount */
2110 				struct mmsghdr mtmp = msgs[i];
2111 				struct iovec iotmp = iovecs[i];
2112 				recvcount--;
2113 				msgs[i] = msgs[recvcount];
2114 				iovecs[i] = iovecs[recvcount];
2115 				queries[i] = queries[recvcount];
2116 				msgs[recvcount] = mtmp;
2117 				iovecs[recvcount] = iotmp;
2118 				queries[recvcount] = q;
2119 				msgs[i].msg_hdr.msg_iov = &iovecs[i];
2120 				msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount];
2121 				goto loopstart;
2122 			} else { recvcount --; }
2123 		}
2124 	}
2125 
2126 	/* send until all are sent */
2127 	i = 0;
2128 	while(i<recvcount) {
2129 		sent = sendmmsg(fd, &msgs[i], recvcount-i, 0);
2130 		if(sent == -1) {
2131 			const char* es = strerror(errno);
2132 			char a[48];
2133 			addr2str(&queries[i]->addr, a, sizeof(a));
2134 			log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
2135 #ifdef BIND8_STATS
2136 			data->nsd->st.txerr += recvcount-i;
2137 #endif /* BIND8_STATS */
2138 			break;
2139 		}
2140 		i += sent;
2141 	}
2142 	for(i=0; i<recvcount; i++) {
2143 		query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2144 		iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
2145 	}
2146 }
2147 
2148 #else /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */
2149 
2150 static void
2151 handle_udp(int fd, short event, void* arg)
2152 {
2153 	struct udp_handler_data *data = (struct udp_handler_data *) arg;
2154 	int received, sent;
2155 #ifndef NONBLOCKING_IS_BROKEN
2156 #ifdef HAVE_RECVMMSG
2157 	int recvcount;
2158 #endif /* HAVE_RECVMMSG */
2159 	int i;
2160 #endif /* NONBLOCKING_IS_BROKEN */
2161 	struct query *q;
2162 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG))
2163 	q = data->query;
2164 #endif
2165 
2166 	if (!(event & EV_READ)) {
2167 		return;
2168 	}
2169 #ifndef NONBLOCKING_IS_BROKEN
2170 #ifdef HAVE_RECVMMSG
2171 	recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
2172 	/* this printf strangely gave a performance increase on Linux */
2173 	/* printf("recvcount %d \n", recvcount); */
2174 	if (recvcount == -1) {
2175 		if (errno != EAGAIN && errno != EINTR) {
2176 			log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
2177 			STATUP(data->nsd, rxerr);
2178 			/* No zone statup */
2179 		}
2180 		/* Simply no data available */
2181 		return;
2182 	}
2183 	for (i = 0; i < recvcount; i++) {
2184 		received = msgs[i].msg_len;
2185 		msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
2186 		if (received == -1) {
2187 			log_msg(LOG_ERR, "recvmmsg failed");
2188 			STATUP(data->nsd, rxerr);
2189 			/* No zone statup */
2190 			/* the error can be found in msgs[i].msg_hdr.msg_flags */
2191 			query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2192 			continue;
2193 		}
2194 		q = queries[i];
2195 #else
2196 	for(i=0; i<NUM_RECV_PER_SELECT; i++) {
2197 #endif /* HAVE_RECVMMSG */
2198 #endif /* NONBLOCKING_IS_BROKEN */
2199 
2200 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG))
2201 		/* Initialize the query... */
2202 		query_reset(q, UDP_MAX_MESSAGE_LEN, 0);
2203 
2204 		received = recvfrom(fd,
2205 				    buffer_begin(q->packet),
2206 				    buffer_remaining(q->packet),
2207 				    0,
2208 				    (struct sockaddr *)&q->addr,
2209 				    &q->addrlen);
2210 		if (received == -1) {
2211 			if (errno != EAGAIN && errno != EINTR) {
2212 				log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno));
2213 				STATUP(data->nsd, rxerr);
2214 				/* No zone statup */
2215 			}
2216 			return;
2217 		}
2218 #endif /* NONBLOCKING_IS_BROKEN || !HAVE_RECVMMSG */
2219 
2220 		/* Account... */
2221 		if (data->socket->addr->ai_family == AF_INET) {
2222 			STATUP(data->nsd, qudp);
2223 		} else if (data->socket->addr->ai_family == AF_INET6) {
2224 			STATUP(data->nsd, qudp6);
2225 		}
2226 
2227 		buffer_skip(q->packet, received);
2228 		buffer_flip(q->packet);
2229 
2230 		/* Process and answer the query... */
2231 		if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) {
2232 			if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
2233 				STATUP(data->nsd, nona);
2234 				ZTATUP(data->nsd, q->zone, nona);
2235 			}
2236 
2237 #ifdef USE_ZONE_STATS
2238 			if (data->socket->addr->ai_family == AF_INET) {
2239 				ZTATUP(data->nsd, q->zone, qudp);
2240 			} else if (data->socket->addr->ai_family == AF_INET6) {
2241 				ZTATUP(data->nsd, q->zone, qudp6);
2242 			}
2243 #endif
2244 
2245 			/* Add EDNS0 and TSIG info if necessary.  */
2246 			query_add_optional(q, data->nsd);
2247 
2248 			buffer_flip(q->packet);
2249 
2250 			sent = sendto(fd,
2251 				      buffer_begin(q->packet),
2252 				      buffer_remaining(q->packet),
2253 				      0,
2254 				      (struct sockaddr *) &q->addr,
2255 				      q->addrlen);
2256 			if (sent == -1) {
2257 				const char* es = strerror(errno);
2258 				char a[48];
2259 				addr2str(&q->addr, a, sizeof(a));
2260 				log_msg(LOG_ERR, "sendto %s failed: %s", a, es);
2261 				STATUP(data->nsd, txerr);
2262 				ZTATUP(data->nsd, q->zone, txerr);
2263 			} else if ((size_t) sent != buffer_remaining(q->packet)) {
2264 				log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet));
2265 			} else {
2266 #ifdef BIND8_STATS
2267 				/* Account the rcode & TC... */
2268 				STATUP2(data->nsd, rcode, RCODE(q->packet));
2269 				ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
2270 				if (TC(q->packet)) {
2271 					STATUP(data->nsd, truncated);
2272 					ZTATUP(data->nsd, q->zone, truncated);
2273 				}
2274 #endif /* BIND8_STATS */
2275 			}
2276 		} else {
2277 			STATUP(data->nsd, dropped);
2278 			ZTATUP(data->nsd, q->zone, dropped);
2279 		}
2280 #ifndef NONBLOCKING_IS_BROKEN
2281 #ifdef HAVE_RECVMMSG
2282 		query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
2283 #endif
2284 	}
2285 #endif
2286 }
2287 #endif /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */
2288 
2289 
2290 static void
2291 cleanup_tcp_handler(struct tcp_handler_data* data)
2292 {
2293 	event_del(&data->event);
2294 	close(data->event.ev_fd);
2295 
2296 	/*
2297 	 * Enable the TCP accept handlers when the current number of
2298 	 * TCP connections is about to drop below the maximum number
2299 	 * of TCP connections.
2300 	 */
2301 	if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
2302 		configure_handler_event_types(EV_READ|EV_PERSIST);
2303 		slowaccept = 0;
2304 	}
2305 	--data->nsd->current_tcp_count;
2306 	assert(data->nsd->current_tcp_count >= 0);
2307 
2308 	region_destroy(data->region);
2309 }
2310 
2311 static void
2312 handle_tcp_reading(int fd, short event, void* arg)
2313 {
2314 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
2315 	ssize_t received;
2316 	struct event_base* ev_base;
2317 	struct timeval timeout;
2318 
2319 	if ((event & EV_TIMEOUT)) {
2320 		/* Connection timed out.  */
2321 		cleanup_tcp_handler(data);
2322 		return;
2323 	}
2324 
2325 	if (data->nsd->tcp_query_count > 0 &&
2326 		data->query_count >= data->nsd->tcp_query_count) {
2327 		/* No more queries allowed on this tcp connection.  */
2328 		cleanup_tcp_handler(data);
2329 		return;
2330 	}
2331 
2332 	assert((event & EV_READ));
2333 
2334 	if (data->bytes_transmitted == 0) {
2335 		query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
2336 	}
2337 
2338 	/*
2339 	 * Check if we received the leading packet length bytes yet.
2340 	 */
2341 	if (data->bytes_transmitted < sizeof(uint16_t)) {
2342 		received = read(fd,
2343 				(char *) &data->query->tcplen
2344 				+ data->bytes_transmitted,
2345 				sizeof(uint16_t) - data->bytes_transmitted);
2346 		if (received == -1) {
2347 			if (errno == EAGAIN || errno == EINTR) {
2348 				/*
2349 				 * Read would block, wait until more
2350 				 * data is available.
2351 				 */
2352 				return;
2353 			} else {
2354 				char buf[48];
2355 				addr2str(&data->query->addr, buf, sizeof(buf));
2356 #ifdef ECONNRESET
2357 				if (verbosity >= 2 || errno != ECONNRESET)
2358 #endif /* ECONNRESET */
2359 				log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
2360 				cleanup_tcp_handler(data);
2361 				return;
2362 			}
2363 		} else if (received == 0) {
2364 			/* EOF */
2365 			cleanup_tcp_handler(data);
2366 			return;
2367 		}
2368 
2369 		data->bytes_transmitted += received;
2370 		if (data->bytes_transmitted < sizeof(uint16_t)) {
2371 			/*
2372 			 * Not done with the tcplen yet, wait for more
2373 			 * data to become available.
2374 			 */
2375 			return;
2376 		}
2377 
2378 		assert(data->bytes_transmitted == sizeof(uint16_t));
2379 
2380 		data->query->tcplen = ntohs(data->query->tcplen);
2381 
2382 		/*
2383 		 * Minimum query size is:
2384 		 *
2385 		 *     Size of the header (12)
2386 		 *   + Root domain name   (1)
2387 		 *   + Query class        (2)
2388 		 *   + Query type         (2)
2389 		 */
2390 		if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
2391 			VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
2392 			cleanup_tcp_handler(data);
2393 			return;
2394 		}
2395 
2396 		if (data->query->tcplen > data->query->maxlen) {
2397 			VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
2398 			cleanup_tcp_handler(data);
2399 			return;
2400 		}
2401 
2402 		buffer_set_limit(data->query->packet, data->query->tcplen);
2403 	}
2404 
2405 	assert(buffer_remaining(data->query->packet) > 0);
2406 
2407 	/* Read the (remaining) query data.  */
2408 	received = read(fd,
2409 			buffer_current(data->query->packet),
2410 			buffer_remaining(data->query->packet));
2411 	if (received == -1) {
2412 		if (errno == EAGAIN || errno == EINTR) {
2413 			/*
2414 			 * Read would block, wait until more data is
2415 			 * available.
2416 			 */
2417 			return;
2418 		} else {
2419 			char buf[48];
2420 			addr2str(&data->query->addr, buf, sizeof(buf));
2421 #ifdef ECONNRESET
2422 			if (verbosity >= 2 || errno != ECONNRESET)
2423 #endif /* ECONNRESET */
2424 			log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
2425 			cleanup_tcp_handler(data);
2426 			return;
2427 		}
2428 	} else if (received == 0) {
2429 		/* EOF */
2430 		cleanup_tcp_handler(data);
2431 		return;
2432 	}
2433 
2434 	data->bytes_transmitted += received;
2435 	buffer_skip(data->query->packet, received);
2436 	if (buffer_remaining(data->query->packet) > 0) {
2437 		/*
2438 		 * Message not yet complete, wait for more data to
2439 		 * become available.
2440 		 */
2441 		return;
2442 	}
2443 
2444 	assert(buffer_position(data->query->packet) == data->query->tcplen);
2445 
2446 	/* Account... */
2447 #ifdef BIND8_STATS
2448 #ifndef INET6
2449 	STATUP(data->nsd, ctcp);
2450 #else
2451 	if (data->query->addr.ss_family == AF_INET) {
2452 		STATUP(data->nsd, ctcp);
2453 	} else if (data->query->addr.ss_family == AF_INET6) {
2454 		STATUP(data->nsd, ctcp6);
2455 	}
2456 #endif
2457 #endif /* BIND8_STATS */
2458 
2459 	/* We have a complete query, process it.  */
2460 
2461 	/* tcp-query-count: handle query counter ++ */
2462 	data->query_count++;
2463 
2464 	buffer_flip(data->query->packet);
2465 	data->query_state = server_process_query(data->nsd, data->query);
2466 	if (data->query_state == QUERY_DISCARDED) {
2467 		/* Drop the packet and the entire connection... */
2468 		STATUP(data->nsd, dropped);
2469 		ZTATUP(data->nsd, data->query->zone, dropped);
2470 		cleanup_tcp_handler(data);
2471 		return;
2472 	}
2473 
2474 #ifdef BIND8_STATS
2475 	if (RCODE(data->query->packet) == RCODE_OK
2476 	    && !AA(data->query->packet))
2477 	{
2478 		STATUP(data->nsd, nona);
2479 		ZTATUP(data->nsd, data->query->zone, nona);
2480 	}
2481 #endif /* BIND8_STATS */
2482 
2483 #ifdef USE_ZONE_STATS
2484 #ifndef INET6
2485 	ZTATUP(data->nsd, data->query->zone, ctcp);
2486 #else
2487 	if (data->query->addr.ss_family == AF_INET) {
2488 		ZTATUP(data->nsd, data->query->zone, ctcp);
2489 	} else if (data->query->addr.ss_family == AF_INET6) {
2490 		ZTATUP(data->nsd, data->query->zone, ctcp6);
2491 	}
2492 #endif
2493 #endif /* USE_ZONE_STATS */
2494 
2495 	query_add_optional(data->query, data->nsd);
2496 
2497 	/* Switch to the tcp write handler.  */
2498 	buffer_flip(data->query->packet);
2499 	data->query->tcplen = buffer_remaining(data->query->packet);
2500 	data->bytes_transmitted = 0;
2501 
2502 	timeout.tv_sec = data->nsd->tcp_timeout;
2503 	timeout.tv_usec = 0L;
2504 
2505 	ev_base = data->event.ev_base;
2506 	event_del(&data->event);
2507 	event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
2508 		handle_tcp_writing, data);
2509 	if(event_base_set(ev_base, &data->event) != 0)
2510 		log_msg(LOG_ERR, "event base set tcpr failed");
2511 	if(event_add(&data->event, &timeout) != 0)
2512 		log_msg(LOG_ERR, "event add tcpr failed");
2513 	/* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
2514 	handle_tcp_writing(fd, EV_WRITE, data);
2515 }
2516 
2517 static void
2518 handle_tcp_writing(int fd, short event, void* arg)
2519 {
2520 	struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
2521 	ssize_t sent;
2522 	struct query *q = data->query;
2523 	struct timeval timeout;
2524 	struct event_base* ev_base;
2525 
2526 	if ((event & EV_TIMEOUT)) {
2527 		/* Connection timed out.  */
2528 		cleanup_tcp_handler(data);
2529 		return;
2530 	}
2531 
2532 	assert((event & EV_WRITE));
2533 
2534 	if (data->bytes_transmitted < sizeof(q->tcplen)) {
2535 		/* Writing the response packet length.  */
2536 		uint16_t n_tcplen = htons(q->tcplen);
2537 #ifdef HAVE_WRITEV
2538 		struct iovec iov[2];
2539 		iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted;
2540 		iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted;
2541 		iov[1].iov_base = buffer_begin(q->packet);
2542 		iov[1].iov_len = buffer_limit(q->packet);
2543 		sent = writev(fd, iov, 2);
2544 #else /* HAVE_WRITEV */
2545 		sent = write(fd,
2546 			     (const char *) &n_tcplen + data->bytes_transmitted,
2547 			     sizeof(n_tcplen) - data->bytes_transmitted);
2548 #endif /* HAVE_WRITEV */
2549 		if (sent == -1) {
2550 			if (errno == EAGAIN || errno == EINTR) {
2551 				/*
2552 				 * Write would block, wait until
2553 				 * socket becomes writable again.
2554 				 */
2555 				return;
2556 			} else {
2557 #ifdef ECONNRESET
2558 				if(verbosity >= 2 || errno != ECONNRESET)
2559 #endif /* ECONNRESET */
2560 #ifdef EPIPE
2561 				  if(verbosity >= 2 || errno != EPIPE)
2562 #endif /* EPIPE 'broken pipe' */
2563 				    log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
2564 				cleanup_tcp_handler(data);
2565 				return;
2566 			}
2567 		}
2568 
2569 		data->bytes_transmitted += sent;
2570 		if (data->bytes_transmitted < sizeof(q->tcplen)) {
2571 			/*
2572 			 * Writing not complete, wait until socket
2573 			 * becomes writable again.
2574 			 */
2575 			return;
2576 		}
2577 
2578 #ifdef HAVE_WRITEV
2579 		sent -= sizeof(n_tcplen);
2580 		/* handle potential 'packet done' code */
2581 		goto packet_could_be_done;
2582 #endif
2583  	}
2584 
2585 	sent = write(fd,
2586 		     buffer_current(q->packet),
2587 		     buffer_remaining(q->packet));
2588 	if (sent == -1) {
2589 		if (errno == EAGAIN || errno == EINTR) {
2590 			/*
2591 			 * Write would block, wait until
2592 			 * socket becomes writable again.
2593 			 */
2594 			return;
2595 		} else {
2596 #ifdef ECONNRESET
2597 			if(verbosity >= 2 || errno != ECONNRESET)
2598 #endif /* ECONNRESET */
2599 #ifdef EPIPE
2600 				  if(verbosity >= 2 || errno != EPIPE)
2601 #endif /* EPIPE 'broken pipe' */
2602 			log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
2603 			cleanup_tcp_handler(data);
2604 			return;
2605 		}
2606 	}
2607 
2608 	data->bytes_transmitted += sent;
2609 #ifdef HAVE_WRITEV
2610   packet_could_be_done:
2611 #endif
2612 	buffer_skip(q->packet, sent);
2613 	if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
2614 		/*
2615 		 * Still more data to write when socket becomes
2616 		 * writable again.
2617 		 */
2618 		return;
2619 	}
2620 
2621 	assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
2622 
2623 	if (data->query_state == QUERY_IN_AXFR) {
2624 		/* Continue processing AXFR and writing back results.  */
2625 		buffer_clear(q->packet);
2626 		data->query_state = query_axfr(data->nsd, q);
2627 		if (data->query_state != QUERY_PROCESSED) {
2628 			query_add_optional(data->query, data->nsd);
2629 
2630 			/* Reset data. */
2631 			buffer_flip(q->packet);
2632 			q->tcplen = buffer_remaining(q->packet);
2633 			data->bytes_transmitted = 0;
2634 			/* Reset timeout.  */
2635 			timeout.tv_sec = data->nsd->tcp_timeout;
2636 			timeout.tv_usec = 0L;
2637 			ev_base = data->event.ev_base;
2638 			event_del(&data->event);
2639 			event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
2640 				handle_tcp_writing, data);
2641 			if(event_base_set(ev_base, &data->event) != 0)
2642 				log_msg(LOG_ERR, "event base set tcpw failed");
2643 			if(event_add(&data->event, &timeout) != 0)
2644 				log_msg(LOG_ERR, "event add tcpw failed");
2645 
2646 			/*
2647 			 * Write data if/when the socket is writable
2648 			 * again.
2649 			 */
2650 			return;
2651 		}
2652 	}
2653 
2654 	/*
2655 	 * Done sending, wait for the next request to arrive on the
2656 	 * TCP socket by installing the TCP read handler.
2657 	 */
2658 	if (data->nsd->tcp_query_count > 0 &&
2659 		data->query_count >= data->nsd->tcp_query_count) {
2660 
2661 		(void) shutdown(fd, SHUT_WR);
2662 	}
2663 
2664 	data->bytes_transmitted = 0;
2665 
2666 	timeout.tv_sec = data->nsd->tcp_timeout;
2667 	timeout.tv_usec = 0L;
2668 	ev_base = data->event.ev_base;
2669 	event_del(&data->event);
2670 	event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
2671 		handle_tcp_reading, data);
2672 	if(event_base_set(ev_base, &data->event) != 0)
2673 		log_msg(LOG_ERR, "event base set tcpw failed");
2674 	if(event_add(&data->event, &timeout) != 0)
2675 		log_msg(LOG_ERR, "event add tcpw failed");
2676 }
2677 
2678 
2679 static void
2680 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
2681 	void* ATTR_UNUSED(arg))
2682 {
2683 	if(slowaccept) {
2684 		configure_handler_event_types(EV_PERSIST | EV_READ);
2685 		slowaccept = 0;
2686 	}
2687 }
2688 
2689 /*
2690  * Handle an incoming TCP connection.  The connection is accepted and
2691  * a new TCP reader event handler is added.  The TCP handler
2692  * is responsible for cleanup when the connection is closed.
2693  */
2694 static void
2695 handle_tcp_accept(int fd, short event, void* arg)
2696 {
2697 	struct tcp_accept_handler_data *data
2698 		= (struct tcp_accept_handler_data *) arg;
2699 	int s;
2700 	struct tcp_handler_data *tcp_data;
2701 	region_type *tcp_region;
2702 #ifdef INET6
2703 	struct sockaddr_storage addr;
2704 #else
2705 	struct sockaddr_in addr;
2706 #endif
2707 	socklen_t addrlen;
2708 	struct timeval timeout;
2709 
2710 	if (!(event & EV_READ)) {
2711 		return;
2712 	}
2713 
2714 	if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
2715 		return;
2716 	}
2717 
2718 	/* Accept it... */
2719 	addrlen = sizeof(addr);
2720 	s = accept(fd, (struct sockaddr *) &addr, &addrlen);
2721 	if (s == -1) {
2722 		/**
2723 		 * EMFILE and ENFILE is a signal that the limit of open
2724 		 * file descriptors has been reached. Pause accept().
2725 		 * EINTR is a signal interrupt. The others are various OS ways
2726 		 * of saying that the client has closed the connection.
2727 		 */
2728 		if (errno == EMFILE || errno == ENFILE) {
2729 			if (!slowaccept) {
2730 				/* disable accept events */
2731 				struct timeval tv;
2732 				configure_handler_event_types(0);
2733 				tv.tv_sec = SLOW_ACCEPT_TIMEOUT;
2734 				tv.tv_usec = 0L;
2735 				event_set(&slowaccept_event, -1, EV_TIMEOUT,
2736 					handle_slowaccept_timeout, NULL);
2737 				(void)event_base_set(data->event.ev_base,
2738 					&slowaccept_event);
2739 				(void)event_add(&slowaccept_event, &tv);
2740 				slowaccept = 1;
2741 				/* We don't want to spam the logs here */
2742 			}
2743 		} else if (errno != EINTR
2744 			&& errno != EWOULDBLOCK
2745 #ifdef ECONNABORTED
2746 			&& errno != ECONNABORTED
2747 #endif /* ECONNABORTED */
2748 #ifdef EPROTO
2749 			&& errno != EPROTO
2750 #endif /* EPROTO */
2751 			) {
2752 			log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
2753 		}
2754 		return;
2755 	}
2756 
2757 	if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
2758 		log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
2759 		close(s);
2760 		return;
2761 	}
2762 
2763 	/*
2764 	 * This region is deallocated when the TCP connection is
2765 	 * closed by the TCP handler.
2766 	 */
2767 	tcp_region = region_create(xalloc, free);
2768 	tcp_data = (struct tcp_handler_data *) region_alloc(
2769 		tcp_region, sizeof(struct tcp_handler_data));
2770 	tcp_data->region = tcp_region;
2771 	tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
2772 		compression_table_size);
2773 	tcp_data->nsd = data->nsd;
2774 	tcp_data->query_count = 0;
2775 
2776 	tcp_data->query_state = QUERY_PROCESSED;
2777 	tcp_data->bytes_transmitted = 0;
2778 	memcpy(&tcp_data->query->addr, &addr, addrlen);
2779 	tcp_data->query->addrlen = addrlen;
2780 
2781 	timeout.tv_sec = data->nsd->tcp_timeout;
2782 	timeout.tv_usec = 0;
2783 
2784 	event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
2785 		handle_tcp_reading, tcp_data);
2786 	if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) {
2787 		log_msg(LOG_ERR, "cannot set tcp event base");
2788 		close(s);
2789 		region_destroy(tcp_region);
2790 		return;
2791 	}
2792 	if(event_add(&tcp_data->event, &timeout) != 0) {
2793 		log_msg(LOG_ERR, "cannot add tcp to event base");
2794 		close(s);
2795 		region_destroy(tcp_region);
2796 		return;
2797 	}
2798 
2799 	/*
2800 	 * Keep track of the total number of TCP handlers installed so
2801 	 * we can stop accepting connections when the maximum number
2802 	 * of simultaneous TCP connections is reached.
2803 	 */
2804 	++data->nsd->current_tcp_count;
2805 	if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
2806 		configure_handler_event_types(0);
2807 	}
2808 }
2809 
2810 static void
2811 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout)
2812 {
2813 	size_t i;
2814 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
2815 	for (i = 0; i < nsd->child_count; ++i) {
2816 		if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
2817 			if (write(nsd->children[i].child_fd,
2818 				&command,
2819 				sizeof(command)) == -1)
2820 			{
2821 				if(errno != EAGAIN && errno != EINTR)
2822 					log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
2823 					(int) command,
2824 					(int) nsd->children[i].pid,
2825 					strerror(errno));
2826 			} else if (timeout > 0) {
2827 				(void)block_read(NULL,
2828 					nsd->children[i].child_fd,
2829 					&command, sizeof(command), timeout);
2830 			}
2831 			fsync(nsd->children[i].child_fd);
2832 			close(nsd->children[i].child_fd);
2833 			nsd->children[i].child_fd = -1;
2834 		}
2835 	}
2836 }
2837 
2838 static void
2839 send_children_quit(struct nsd* nsd)
2840 {
2841 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit"));
2842 	send_children_command(nsd, NSD_QUIT, 0);
2843 }
2844 
2845 static void
2846 send_children_quit_and_wait(struct nsd* nsd)
2847 {
2848 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait"));
2849 	send_children_command(nsd, NSD_QUIT_CHILD, 3);
2850 }
2851 
2852 #ifdef BIND8_STATS
2853 static void
2854 set_children_stats(struct nsd* nsd)
2855 {
2856 	size_t i;
2857 	assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
2858 	DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
2859 	for (i = 0; i < nsd->child_count; ++i) {
2860 		nsd->children[i].need_to_send_STATS = 1;
2861 		nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
2862 	}
2863 }
2864 #endif /* BIND8_STATS */
2865 
2866 static void
2867 configure_handler_event_types(short event_types)
2868 {
2869 	size_t i;
2870 
2871 	for (i = 0; i < tcp_accept_handler_count; ++i) {
2872 		struct event* handler = &tcp_accept_handlers[i].event;
2873 		if(event_types) {
2874 			/* reassign */
2875 			int fd = handler->ev_fd;
2876 			struct event_base* base = handler->ev_base;
2877 			if(tcp_accept_handlers[i].event_added)
2878 				event_del(handler);
2879 			event_set(handler, fd, event_types,
2880 				handle_tcp_accept, &tcp_accept_handlers[i]);
2881 			if(event_base_set(base, handler) != 0)
2882 				log_msg(LOG_ERR, "conhand: cannot event_base");
2883 			if(event_add(handler, NULL) != 0)
2884 				log_msg(LOG_ERR, "conhand: cannot event_add");
2885 			tcp_accept_handlers[i].event_added = 1;
2886 		} else {
2887 			/* remove */
2888 			if(tcp_accept_handlers[i].event_added) {
2889 				event_del(handler);
2890 				tcp_accept_handlers[i].event_added = 0;
2891 			}
2892 		}
2893 	}
2894 }
2895