1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <sys/socket.h> 15 #include <sys/uio.h> 16 #include <sys/wait.h> 17 18 #include <netinet/in.h> 19 #include <arpa/inet.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <stddef.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 #include <unistd.h> 31 #include <signal.h> 32 #include <netdb.h> 33 #include <poll.h> 34 #ifndef SHUT_WR 35 #define SHUT_WR 1 36 #endif 37 #ifdef HAVE_MMAP 38 #include <sys/mman.h> 39 #endif /* HAVE_MMAP */ 40 #ifdef HAVE_OPENSSL_RAND_H 41 #include <openssl/rand.h> 42 #endif 43 #ifndef USE_MINI_EVENT 44 # ifdef HAVE_EVENT_H 45 # include <event.h> 46 # else 47 # include <event2/event.h> 48 # include "event2/event_struct.h" 49 # include "event2/event_compat.h" 50 # endif 51 #else 52 # include "mini_event.h" 53 #endif 54 55 #include "axfr.h" 56 #include "namedb.h" 57 #include "netio.h" 58 #include "xfrd.h" 59 #include "xfrd-tcp.h" 60 #include "xfrd-disk.h" 61 #include "difffile.h" 62 #include "nsec3.h" 63 #include "ipc.h" 64 #include "udb.h" 65 #include "remote.h" 66 #include "lookup3.h" 67 #include "rrl.h" 68 #ifdef USE_DNSTAP 69 #include "dnstap/dnstap_collector.h" 70 #endif 71 72 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 73 74 /* 75 * Data for the UDP handlers. 76 */ 77 struct udp_handler_data 78 { 79 struct nsd *nsd; 80 struct nsd_socket *socket; 81 query_type *query; 82 }; 83 84 struct tcp_accept_handler_data { 85 struct nsd *nsd; 86 struct nsd_socket *socket; 87 int event_added; 88 struct event event; 89 }; 90 91 /* 92 * These globals are used to enable the TCP accept handlers 93 * when the number of TCP connection drops below the maximum 94 * number of TCP connections. 95 */ 96 static size_t tcp_accept_handler_count; 97 static struct tcp_accept_handler_data* tcp_accept_handlers; 98 99 static struct event slowaccept_event; 100 static int slowaccept; 101 102 #ifndef NONBLOCKING_IS_BROKEN 103 # define NUM_RECV_PER_SELECT 100 104 #endif 105 106 #if (!defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG)) 107 struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 108 struct iovec iovecs[NUM_RECV_PER_SELECT]; 109 struct query *queries[NUM_RECV_PER_SELECT]; 110 #endif 111 112 /* 113 * Data for the TCP connection handlers. 114 * 115 * The TCP handlers use non-blocking I/O. This is necessary to avoid 116 * blocking the entire server on a slow TCP connection, but does make 117 * reading from and writing to the socket more complicated. 118 * 119 * Basically, whenever a read/write would block (indicated by the 120 * EAGAIN errno variable) we remember the position we were reading 121 * from/writing to and return from the TCP reading/writing event 122 * handler. When the socket becomes readable/writable again we 123 * continue from the same position. 124 */ 125 struct tcp_handler_data 126 { 127 /* 128 * The region used to allocate all TCP connection related 129 * data, including this structure. This region is destroyed 130 * when the connection is closed. 131 */ 132 region_type* region; 133 134 /* 135 * The global nsd structure. 136 */ 137 struct nsd* nsd; 138 139 /* 140 * The current query data for this TCP connection. 141 */ 142 query_type* query; 143 144 /* 145 * The query_state is used to remember if we are performing an 146 * AXFR, if we're done processing, or if we should discard the 147 * query and connection. 148 */ 149 query_state_type query_state; 150 151 /* 152 * The event for the file descriptor and tcp timeout 153 */ 154 struct event event; 155 156 /* 157 * The bytes_transmitted field is used to remember the number 158 * of bytes transmitted when receiving or sending a DNS 159 * packet. The count includes the two additional bytes used 160 * to specify the packet length on a TCP connection. 161 */ 162 size_t bytes_transmitted; 163 164 /* 165 * The number of queries handled by this specific TCP connection. 166 */ 167 int query_count; 168 169 /* 170 * The timeout in msec for this tcp connection 171 */ 172 int tcp_timeout; 173 }; 174 175 /* 176 * Handle incoming queries on the UDP server sockets. 177 */ 178 static void handle_udp(int fd, short event, void* arg); 179 180 /* 181 * Handle incoming connections on the TCP sockets. These handlers 182 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 183 * connection) but are disabled when the number of current TCP 184 * connections is equal to the maximum number of TCP connections. 185 * Disabling is done by changing the handler to wait for the 186 * NETIO_EVENT_NONE type. This is done using the function 187 * configure_tcp_accept_handlers. 188 */ 189 static void handle_tcp_accept(int fd, short event, void* arg); 190 191 /* 192 * Handle incoming queries on a TCP connection. The TCP connections 193 * are configured to be non-blocking and the handler may be called 194 * multiple times before a complete query is received. 195 */ 196 static void handle_tcp_reading(int fd, short event, void* arg); 197 198 /* 199 * Handle outgoing responses on a TCP connection. The TCP connections 200 * are configured to be non-blocking and the handler may be called 201 * multiple times before a complete response is sent. 202 */ 203 static void handle_tcp_writing(int fd, short event, void* arg); 204 205 /* 206 * Send all children the quit nonblocking, then close pipe. 207 */ 208 static void send_children_quit(struct nsd* nsd); 209 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 210 static void send_children_quit_and_wait(struct nsd* nsd); 211 212 /* set childrens flags to send NSD_STATS to them */ 213 #ifdef BIND8_STATS 214 static void set_children_stats(struct nsd* nsd); 215 #endif /* BIND8_STATS */ 216 217 /* 218 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 219 */ 220 static void configure_handler_event_types(short event_types); 221 222 static uint16_t *compressed_dname_offsets = 0; 223 static uint32_t compression_table_capacity = 0; 224 static uint32_t compression_table_size = 0; 225 static domain_type* compressed_dnames[MAXRRSPP]; 226 227 /* 228 * Remove the specified pid from the list of child pids. Returns -1 if 229 * the pid is not in the list, child_num otherwise. The field is set to 0. 230 */ 231 static int 232 delete_child_pid(struct nsd *nsd, pid_t pid) 233 { 234 size_t i; 235 for (i = 0; i < nsd->child_count; ++i) { 236 if (nsd->children[i].pid == pid) { 237 nsd->children[i].pid = 0; 238 if(!nsd->children[i].need_to_exit) { 239 if(nsd->children[i].child_fd != -1) 240 close(nsd->children[i].child_fd); 241 nsd->children[i].child_fd = -1; 242 if(nsd->children[i].handler) 243 nsd->children[i].handler->fd = -1; 244 } 245 return i; 246 } 247 } 248 return -1; 249 } 250 251 /* 252 * Restart child servers if necessary. 253 */ 254 static int 255 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 256 int* xfrd_sock_p) 257 { 258 struct main_ipc_handler_data *ipc_data; 259 size_t i; 260 int sv[2]; 261 262 /* Fork the child processes... */ 263 for (i = 0; i < nsd->child_count; ++i) { 264 if (nsd->children[i].pid <= 0) { 265 if (nsd->children[i].child_fd != -1) 266 close(nsd->children[i].child_fd); 267 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 268 log_msg(LOG_ERR, "socketpair: %s", 269 strerror(errno)); 270 return -1; 271 } 272 nsd->children[i].child_fd = sv[0]; 273 nsd->children[i].parent_fd = sv[1]; 274 nsd->children[i].pid = fork(); 275 switch (nsd->children[i].pid) { 276 default: /* SERVER MAIN */ 277 close(nsd->children[i].parent_fd); 278 nsd->children[i].parent_fd = -1; 279 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 280 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 281 } 282 if(!nsd->children[i].handler) 283 { 284 ipc_data = (struct main_ipc_handler_data*) region_alloc( 285 region, sizeof(struct main_ipc_handler_data)); 286 ipc_data->nsd = nsd; 287 ipc_data->child = &nsd->children[i]; 288 ipc_data->child_num = i; 289 ipc_data->xfrd_sock = xfrd_sock_p; 290 ipc_data->packet = buffer_create(region, QIOBUFSZ); 291 ipc_data->forward_mode = 0; 292 ipc_data->got_bytes = 0; 293 ipc_data->total_bytes = 0; 294 ipc_data->acl_num = 0; 295 nsd->children[i].handler = (struct netio_handler*) region_alloc( 296 region, sizeof(struct netio_handler)); 297 nsd->children[i].handler->fd = nsd->children[i].child_fd; 298 nsd->children[i].handler->timeout = NULL; 299 nsd->children[i].handler->user_data = ipc_data; 300 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 301 nsd->children[i].handler->event_handler = parent_handle_child_command; 302 netio_add_handler(netio, nsd->children[i].handler); 303 } 304 /* clear any ongoing ipc */ 305 ipc_data = (struct main_ipc_handler_data*) 306 nsd->children[i].handler->user_data; 307 ipc_data->forward_mode = 0; 308 /* restart - update fd */ 309 nsd->children[i].handler->fd = nsd->children[i].child_fd; 310 break; 311 case 0: /* CHILD */ 312 /* the child need not be able to access the 313 * nsd.db file */ 314 namedb_close_udb(nsd->db); 315 #ifdef MEMCLEAN /* OS collects memory pages */ 316 region_destroy(region); 317 #endif 318 319 if (pledge("stdio rpath inet", NULL) == -1) { 320 log_msg(LOG_ERR, "pledge"); 321 exit(1); 322 } 323 324 nsd->pid = 0; 325 nsd->child_count = 0; 326 nsd->server_kind = nsd->children[i].kind; 327 nsd->this_child = &nsd->children[i]; 328 nsd->this_child->child_num = i; 329 /* remove signal flags inherited from parent 330 the parent will handle them. */ 331 nsd->signal_hint_reload_hup = 0; 332 nsd->signal_hint_reload = 0; 333 nsd->signal_hint_child = 0; 334 nsd->signal_hint_quit = 0; 335 nsd->signal_hint_shutdown = 0; 336 nsd->signal_hint_stats = 0; 337 nsd->signal_hint_statsusr = 0; 338 close(*xfrd_sock_p); 339 close(nsd->this_child->child_fd); 340 nsd->this_child->child_fd = -1; 341 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 342 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 343 } 344 server_child(nsd); 345 /* NOTREACH */ 346 exit(0); 347 case -1: 348 log_msg(LOG_ERR, "fork failed: %s", 349 strerror(errno)); 350 return -1; 351 } 352 } 353 } 354 return 0; 355 } 356 357 #ifdef BIND8_STATS 358 static void set_bind8_alarm(struct nsd* nsd) 359 { 360 /* resync so that the next alarm is on the next whole minute */ 361 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 362 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 363 } 364 #endif 365 366 /* set zone stat ids for zones initially read in */ 367 static void 368 zonestatid_tree_set(struct nsd* nsd) 369 { 370 struct radnode* n; 371 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 372 zone_type* zone = (zone_type*)n->elem; 373 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 374 } 375 } 376 377 #ifdef USE_ZONE_STATS 378 void 379 server_zonestat_alloc(struct nsd* nsd) 380 { 381 size_t num = (nsd->options->zonestatnames->count==0?1: 382 nsd->options->zonestatnames->count); 383 size_t sz = sizeof(struct nsdst)*num; 384 char tmpfile[256]; 385 uint8_t z = 0; 386 387 /* file names */ 388 nsd->zonestatfname[0] = 0; 389 nsd->zonestatfname[1] = 0; 390 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 391 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 392 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 393 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 394 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 395 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 396 397 /* file descriptors */ 398 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 399 if(nsd->zonestatfd[0] == -1) { 400 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 401 strerror(errno)); 402 exit(1); 403 } 404 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 405 if(nsd->zonestatfd[0] == -1) { 406 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 407 strerror(errno)); 408 close(nsd->zonestatfd[0]); 409 unlink(nsd->zonestatfname[0]); 410 exit(1); 411 } 412 413 #ifdef HAVE_MMAP 414 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 415 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 416 strerror(errno)); 417 exit(1); 418 } 419 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 420 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 421 nsd->zonestatfname[0], strerror(errno)); 422 exit(1); 423 } 424 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 425 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 426 strerror(errno)); 427 exit(1); 428 } 429 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 430 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 431 nsd->zonestatfname[1], strerror(errno)); 432 exit(1); 433 } 434 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 435 MAP_SHARED, nsd->zonestatfd[0], 0); 436 if(nsd->zonestat[0] == MAP_FAILED) { 437 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 438 unlink(nsd->zonestatfname[0]); 439 unlink(nsd->zonestatfname[1]); 440 exit(1); 441 } 442 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 443 MAP_SHARED, nsd->zonestatfd[1], 0); 444 if(nsd->zonestat[1] == MAP_FAILED) { 445 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 446 unlink(nsd->zonestatfname[0]); 447 unlink(nsd->zonestatfname[1]); 448 exit(1); 449 } 450 memset(nsd->zonestat[0], 0, sz); 451 memset(nsd->zonestat[1], 0, sz); 452 nsd->zonestatsize[0] = num; 453 nsd->zonestatsize[1] = num; 454 nsd->zonestatdesired = num; 455 nsd->zonestatsizenow = num; 456 nsd->zonestatnow = nsd->zonestat[0]; 457 #endif /* HAVE_MMAP */ 458 } 459 460 void 461 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 462 { 463 #ifdef HAVE_MMAP 464 #ifdef MREMAP_MAYMOVE 465 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 466 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 467 MREMAP_MAYMOVE); 468 if(nsd->zonestat[idx] == MAP_FAILED) { 469 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 470 exit(1); 471 } 472 #else /* !HAVE MREMAP */ 473 if(msync(nsd->zonestat[idx], 474 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 475 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 476 if(munmap(nsd->zonestat[idx], 477 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 478 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 479 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 480 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 481 if(nsd->zonestat[idx] == MAP_FAILED) { 482 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 483 exit(1); 484 } 485 #endif /* MREMAP */ 486 #endif /* HAVE_MMAP */ 487 } 488 489 /* realloc the zonestat array for the one that is not currently in use, 490 * to match the desired new size of the array (if applicable) */ 491 void 492 server_zonestat_realloc(struct nsd* nsd) 493 { 494 #ifdef HAVE_MMAP 495 uint8_t z = 0; 496 size_t sz; 497 int idx = 0; /* index of the zonestat array that is not in use */ 498 if(nsd->zonestatnow == nsd->zonestat[0]) 499 idx = 1; 500 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 501 return; 502 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 503 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 504 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 505 strerror(errno)); 506 exit(1); 507 } 508 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 509 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 510 nsd->zonestatfname[idx], strerror(errno)); 511 exit(1); 512 } 513 zonestat_remap(nsd, idx, sz); 514 /* zero the newly allocated region */ 515 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 516 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 517 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 518 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 519 } 520 nsd->zonestatsize[idx] = nsd->zonestatdesired; 521 #endif /* HAVE_MMAP */ 522 } 523 524 /* switchover to use the other array for the new children, that 525 * briefly coexist with the old children. And we want to avoid them 526 * both writing to the same statistics arrays. */ 527 void 528 server_zonestat_switch(struct nsd* nsd) 529 { 530 if(nsd->zonestatnow == nsd->zonestat[0]) { 531 nsd->zonestatnow = nsd->zonestat[1]; 532 nsd->zonestatsizenow = nsd->zonestatsize[1]; 533 } else { 534 nsd->zonestatnow = nsd->zonestat[0]; 535 nsd->zonestatsizenow = nsd->zonestatsize[0]; 536 } 537 } 538 #endif /* USE_ZONE_STATS */ 539 540 static void 541 cleanup_dname_compression_tables(void *ptr) 542 { 543 free(ptr); 544 compressed_dname_offsets = NULL; 545 compression_table_capacity = 0; 546 } 547 548 static void 549 initialize_dname_compression_tables(struct nsd *nsd) 550 { 551 size_t needed = domain_table_count(nsd->db->domains) + 1; 552 needed += EXTRA_DOMAIN_NUMBERS; 553 if(compression_table_capacity < needed) { 554 if(compressed_dname_offsets) { 555 region_remove_cleanup(nsd->db->region, 556 cleanup_dname_compression_tables, 557 compressed_dname_offsets); 558 free(compressed_dname_offsets); 559 } 560 compressed_dname_offsets = (uint16_t *) xmallocarray( 561 needed, sizeof(uint16_t)); 562 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 563 compressed_dname_offsets); 564 compression_table_capacity = needed; 565 compression_table_size=domain_table_count(nsd->db->domains)+1; 566 } 567 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 568 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 569 } 570 571 /* create and bind sockets. */ 572 static int 573 server_init_ifs(struct nsd *nsd, size_t from, size_t to, int* reuseport_works) 574 { 575 struct addrinfo* addr; 576 size_t i; 577 #if defined(SO_REUSEPORT) || defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) || defined(IP_TRANSPARENT)) || defined(IP_FREEBIND) || defined(SO_BINDANY)) 578 int on = 1; 579 #endif 580 581 /* UDP */ 582 583 /* Make a socket... */ 584 for (i = from; i < to; i++) { 585 /* for reuseports copy socket specs of first entries */ 586 addr = nsd->udp[i%nsd->ifs].addr; 587 if (!addr) { 588 nsd->udp[i].s = -1; 589 continue; 590 } 591 nsd->udp[i].fam = (int)addr->ai_family; 592 if ((nsd->udp[i].s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 593 #if defined(INET6) 594 if (addr->ai_family == AF_INET6 && 595 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 596 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported"); 597 continue; 598 } 599 #endif /* INET6 */ 600 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 601 return -1; 602 } 603 604 #ifdef SO_REUSEPORT 605 # ifdef SO_REUSEPORT_LB 606 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 607 * like SO_REUSEPORT on Linux. This is what the users want 608 * with the config option in nsd.conf; if we actually 609 * need local address and port reuse they'll also need to 610 * have SO_REUSEPORT set for them, assume it was _LB they want. 611 */ 612 if(nsd->reuseport && *reuseport_works && 613 setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_REUSEPORT_LB, 614 (void*)&on, (socklen_t)sizeof(on)) < 0) { 615 if(verbosity >= 3 616 #ifdef ENOPROTOOPT 617 || errno != ENOPROTOOPT 618 #endif 619 ) 620 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT_LB, " 621 "...) failed: %s", strerror(errno)); 622 *reuseport_works = 0; 623 } 624 # else /* SO_REUSEPORT_LB */ 625 if(nsd->reuseport && *reuseport_works && 626 setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_REUSEPORT, 627 (void*)&on, (socklen_t)sizeof(on)) < 0) { 628 if(verbosity >= 3 629 #ifdef ENOPROTOOPT 630 || errno != ENOPROTOOPT 631 #endif 632 ) 633 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT, " 634 "...) failed: %s", strerror(errno)); 635 *reuseport_works = 0; 636 } 637 # endif /* SO_REUSEPORT_LB */ 638 #else 639 (void)reuseport_works; 640 #endif /* SO_REUSEPORT */ 641 #if defined(SO_RCVBUF) || defined(SO_SNDBUF) 642 if(1) { 643 int rcv = 1*1024*1024; 644 int snd = 1*1024*1024; 645 646 #ifdef SO_RCVBUF 647 # ifdef SO_RCVBUFFORCE 648 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 649 (socklen_t)sizeof(rcv)) < 0) { 650 if(errno != EPERM && errno != ENOBUFS) { 651 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, " 652 "...) failed: %s", strerror(errno)); 653 return -1; 654 } 655 # else 656 if(1) { 657 # endif /* SO_RCVBUFFORCE */ 658 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 659 (socklen_t)sizeof(rcv)) < 0) { 660 if(errno != ENOBUFS && errno != ENOSYS) { 661 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, " 662 "...) failed: %s", strerror(errno)); 663 return -1; 664 } 665 } 666 } 667 #endif /* SO_RCVBUF */ 668 669 #ifdef SO_SNDBUF 670 # ifdef SO_SNDBUFFORCE 671 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 672 (socklen_t)sizeof(snd)) < 0) { 673 if(errno != EPERM && errno != ENOBUFS) { 674 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, " 675 "...) failed: %s", strerror(errno)); 676 return -1; 677 } 678 # else 679 if(1) { 680 # endif /* SO_SNDBUFFORCE */ 681 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 682 (socklen_t)sizeof(snd)) < 0) { 683 if(errno != ENOBUFS && errno != ENOSYS) { 684 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, " 685 "...) failed: %s", strerror(errno)); 686 return -1; 687 } 688 } 689 } 690 #endif /* SO_SNDBUF */ 691 692 } 693 #endif /* defined(SO_RCVBUF) || defined(SO_SNDBUF) */ 694 695 #if defined(INET6) 696 if (addr->ai_family == AF_INET6) { 697 # if defined(IPV6_V6ONLY) 698 if (setsockopt(nsd->udp[i].s, 699 IPPROTO_IPV6, IPV6_V6ONLY, 700 &on, sizeof(on)) < 0) 701 { 702 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 703 strerror(errno)); 704 return -1; 705 } 706 # endif 707 # if defined(IPV6_USE_MIN_MTU) 708 /* 709 * There is no fragmentation of IPv6 datagrams 710 * during forwarding in the network. Therefore 711 * we do not send UDP datagrams larger than 712 * the minimum IPv6 MTU of 1280 octets. The 713 * EDNS0 message length can be larger if the 714 * network stack supports IPV6_USE_MIN_MTU. 715 */ 716 if (setsockopt(nsd->udp[i].s, 717 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 718 &on, sizeof(on)) < 0) 719 { 720 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", 721 strerror(errno)); 722 return -1; 723 } 724 # elif defined(IPV6_MTU) 725 /* 726 * On Linux, PMTUD is disabled by default for datagrams 727 * so set the MTU equal to the MIN MTU to get the same. 728 */ 729 on = IPV6_MIN_MTU; 730 if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU, 731 &on, sizeof(on)) < 0) 732 { 733 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", 734 strerror(errno)); 735 return -1; 736 } 737 on = 1; 738 # endif 739 } 740 #endif 741 #if defined(AF_INET) 742 if (addr->ai_family == AF_INET) { 743 # if defined(IP_MTU_DISCOVER) 744 int mtudisc_disabled = 0; 745 # if defined(IP_PMTUDISC_OMIT) 746 /* Try IP_PMTUDISC_OMIT first */ 747 748 /* 749 * Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets 750 * ignore PMTU information and send packets with DF=0. 751 * Fragmentation is allowed if and only if the packet 752 * size exceeds the outgoing interface MTU or the packet 753 * encounters smaller MTU link in network. 754 * This mitigates DNS fragmentation attacks by preventing 755 * forged PMTU information. 756 * FreeBSD already has same semantics without setting 757 * the option. 758 */ 759 int action_omit = IP_PMTUDISC_OMIT; 760 if (!mtudisc_disabled) { 761 if(setsockopt(nsd->udp[i].s, IPPROTO_IP, 762 IP_MTU_DISCOVER, &action_omit, 763 sizeof(action_omit)) < 0) 764 { 765 log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", 766 strerror(errno)); 767 } else { 768 mtudisc_disabled = 1; 769 } 770 } 771 # endif /* IP_PMTUDISC_OMIT */ 772 # if defined(IP_PMTUDISC_DONT) 773 /* 774 * Use IP_PMTUDISC_DONT 775 * if IP_PMTUDISC_OMIT failed / undefined 776 */ 777 if (!mtudisc_disabled) { 778 int action_dont = IP_PMTUDISC_DONT; 779 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, 780 IP_MTU_DISCOVER, &action_dont, 781 sizeof(action_dont)) < 0) 782 { 783 log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 784 strerror(errno)); 785 } else { 786 mtudisc_disabled = 1; 787 } 788 } 789 # endif /* IP_PMTUDISC_DONT */ 790 /* exit if all methods to disable PMTUD failed */ 791 if(!mtudisc_disabled) { 792 return -1; 793 } 794 # elif defined(IP_DONTFRAG) 795 int off = 0; 796 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG, 797 &off, sizeof(off)) < 0) 798 { 799 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 800 strerror(errno)); 801 return -1; 802 } 803 # endif 804 } 805 #endif 806 /* set it nonblocking */ 807 /* otherwise, on OSes with thundering herd problems, the 808 UDP recv could block NSD after select returns readable. */ 809 if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { 810 log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno)); 811 } 812 813 /* Bind it... */ 814 if (nsd->options->ip_freebind) { 815 #ifdef IP_FREEBIND 816 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) < 0) { 817 log_msg(LOG_ERR, "setsockopt(...,IP_FREEBIND, ...) failed for udp: %s", 818 strerror(errno)); 819 } 820 #endif /* IP_FREEBIND */ 821 } 822 823 if (nsd->options->ip_transparent) { 824 #ifdef IP_TRANSPARENT 825 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 826 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for udp: %s", 827 strerror(errno)); 828 } 829 #endif /* IP_TRANSPARENT */ 830 #ifdef SO_BINDANY 831 if (setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on)) < 0) { 832 log_msg(LOG_ERR, "setsockopt(...,SO_BINDANY, ...) failed for udp: %s", 833 strerror(errno)); 834 } 835 #endif /* SO_BINDANY */ 836 } 837 838 if ( 839 bind(nsd->udp[i].s, (struct sockaddr *) addr->ai_addr, addr->ai_addrlen) != 0) { 840 log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno)); 841 return -1; 842 } 843 } 844 845 /* TCP */ 846 847 /* Make a socket... */ 848 for (i = from; i < to; i++) { 849 /* for reuseports copy socket specs of first entries */ 850 addr = nsd->tcp[i%nsd->ifs].addr; 851 if (!addr) { 852 nsd->tcp[i].s = -1; 853 continue; 854 } 855 nsd->tcp[i].fam = (int)addr->ai_family; 856 /* turn off REUSEPORT for TCP by copying the socket fd */ 857 if(i >= nsd->ifs) { 858 nsd->tcp[i].s = nsd->tcp[i%nsd->ifs].s; 859 continue; 860 } 861 if ((nsd->tcp[i].s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 862 #if defined(INET6) 863 if (addr->ai_family == AF_INET6 && 864 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 865 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported"); 866 continue; 867 } 868 #endif /* INET6 */ 869 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 870 return -1; 871 } 872 873 #ifdef SO_REUSEPORT 874 if(nsd->reuseport && *reuseport_works && 875 setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEPORT, 876 (void*)&on, (socklen_t)sizeof(on)) < 0) { 877 if(verbosity >= 3 878 #ifdef ENOPROTOOPT 879 || errno != ENOPROTOOPT 880 #endif 881 ) 882 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT, " 883 "...) failed: %s", strerror(errno)); 884 *reuseport_works = 0; 885 } 886 #endif /* SO_REUSEPORT */ 887 #ifdef SO_REUSEADDR 888 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) { 889 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno)); 890 } 891 #endif /* SO_REUSEADDR */ 892 893 #if defined(INET6) 894 if (addr->ai_family == AF_INET6) { 895 # if defined(IPV6_V6ONLY) 896 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, 897 &on, sizeof(on)) < 0) { 898 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno)); 899 return -1; 900 } 901 # endif 902 # if defined(IPV6_USE_MIN_MTU) 903 /* 904 * Use minimum MTU to minimize delays learning working 905 * PMTU when communicating through a tunnel. 906 */ 907 if (setsockopt(nsd->tcp[i].s, 908 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 909 &on, sizeof(on)) < 0) { 910 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", strerror(errno)); 911 return -1; 912 } 913 # elif defined(IPV6_MTU) 914 /* 915 * On Linux, PMTUD is disabled by default for datagrams 916 * so set the MTU equal to the MIN MTU to get the same. 917 */ 918 on = IPV6_MIN_MTU; 919 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_MTU, 920 &on, sizeof(on)) < 0) { 921 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", strerror(errno)); 922 return -1; 923 } 924 on = 1; 925 # endif 926 } 927 #endif 928 /* set maximum segment size to tcp socket */ 929 if(nsd->tcp_mss > 0) { 930 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 931 if(setsockopt(nsd->tcp[i].s, IPPROTO_TCP, TCP_MAXSEG, 932 (void*)&nsd->tcp_mss, 933 sizeof(nsd->tcp_mss)) < 0) { 934 log_msg(LOG_ERR, 935 "setsockopt(...,TCP_MAXSEG,...)" 936 " failed for tcp: %s", strerror(errno)); 937 } 938 #else 939 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 940 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 941 } 942 943 /* set it nonblocking */ 944 /* (StevensUNP p463), if tcp listening socket is blocking, then 945 it may block in accept, even if select() says readable. */ 946 if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { 947 log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno)); 948 } 949 950 /* Bind it... */ 951 if (nsd->options->ip_freebind) { 952 #ifdef IP_FREEBIND 953 if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) < 0) { 954 log_msg(LOG_ERR, "setsockopt(...,IP_FREEBIND, ...) failed for tcp: %s", 955 strerror(errno)); 956 } 957 #endif /* IP_FREEBIND */ 958 } 959 960 if (nsd->options->ip_transparent) { 961 #ifdef IP_TRANSPARENT 962 if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 963 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for tcp: %s", 964 strerror(errno)); 965 } 966 #endif /* IP_TRANSPARENT */ 967 #ifdef SO_BINDANY 968 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on)) < 0) { 969 log_msg(LOG_ERR, "setsockopt(...,SO_BINDANY, ...) failed for tcp: %s", 970 strerror(errno)); 971 } 972 #endif /* SO_BINDANY */ 973 } 974 975 if( 976 bind(nsd->tcp[i].s, (struct sockaddr *) addr->ai_addr, addr->ai_addrlen) != 0) { 977 log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno)); 978 return -1; 979 } 980 981 /* Listen to it... */ 982 if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) { 983 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 984 return -1; 985 } 986 } 987 988 return 0; 989 } 990 991 /* 992 * Initialize the server, reuseport, create and bind the sockets. 993 */ 994 int 995 server_init(struct nsd *nsd) 996 { 997 int reuseport_successful = 1; /* see if reuseport works in OS */ 998 if(nsd->reuseport) { 999 /* increase the size of the udp and tcp interface arrays, 1000 * there are going to be separate interface file descriptors 1001 * for every server instance */ 1002 nsd->udp = xrealloc(nsd->udp, (nsd->ifs*nsd->reuseport)* 1003 sizeof(*nsd->udp)); 1004 nsd->tcp = xrealloc(nsd->tcp, (nsd->ifs*nsd->reuseport)* 1005 sizeof(*nsd->tcp)); 1006 memset(&nsd->udp[nsd->ifs], 0, sizeof(*nsd->udp)* 1007 (nsd->ifs*(nsd->reuseport-1))); 1008 memset(&nsd->tcp[nsd->ifs], 0, sizeof(*nsd->tcp)* 1009 (nsd->ifs*(nsd->reuseport-1))); 1010 } 1011 1012 /* open the server interface ports */ 1013 if(server_init_ifs(nsd, 0, nsd->ifs, &reuseport_successful) == -1) 1014 return -1; 1015 1016 /* continue to open the remaining reuseport ports */ 1017 if(nsd->reuseport && reuseport_successful) { 1018 if(server_init_ifs(nsd, nsd->ifs, nsd->ifs*nsd->reuseport, 1019 &reuseport_successful) == -1) 1020 return -1; 1021 nsd->ifs *= nsd->reuseport; 1022 } else { 1023 nsd->reuseport = 0; 1024 } 1025 return 0; 1026 } 1027 1028 /* 1029 * Prepare the server for take off. 1030 * 1031 */ 1032 int 1033 server_prepare(struct nsd *nsd) 1034 { 1035 #ifdef RATELIMIT 1036 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1037 #ifdef HAVE_ARC4RANDOM 1038 hash_set_raninit(arc4random()); 1039 #else 1040 uint32_t v = getpid() ^ time(NULL); 1041 srandom((unsigned long)v); 1042 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1043 hash_set_raninit(v); 1044 else hash_set_raninit(random()); 1045 #endif 1046 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1047 nsd->options->rrl_ratelimit, 1048 nsd->options->rrl_whitelist_ratelimit, 1049 nsd->options->rrl_slip, 1050 nsd->options->rrl_ipv4_prefix_length, 1051 nsd->options->rrl_ipv6_prefix_length); 1052 #endif /* RATELIMIT */ 1053 1054 /* Open the database... */ 1055 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1056 log_msg(LOG_ERR, "unable to open the database %s: %s", 1057 nsd->dbfile, strerror(errno)); 1058 unlink(nsd->task[0]->fname); 1059 unlink(nsd->task[1]->fname); 1060 #ifdef USE_ZONE_STATS 1061 unlink(nsd->zonestatfname[0]); 1062 unlink(nsd->zonestatfname[1]); 1063 #endif 1064 xfrd_del_tempdir(nsd); 1065 return -1; 1066 } 1067 /* check if zone files have been modified */ 1068 /* NULL for taskudb because we send soainfo in a moment, batched up, 1069 * for all zones */ 1070 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1071 nsd->options->database[0] == 0)) 1072 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1073 zonestatid_tree_set(nsd); 1074 1075 compression_table_capacity = 0; 1076 initialize_dname_compression_tables(nsd); 1077 1078 #ifdef BIND8_STATS 1079 /* Initialize times... */ 1080 time(&nsd->st.boot); 1081 set_bind8_alarm(nsd); 1082 #endif /* BIND8_STATS */ 1083 1084 return 0; 1085 } 1086 1087 /* 1088 * Fork the required number of servers. 1089 */ 1090 static int 1091 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1092 int* xfrd_sock_p) 1093 { 1094 size_t i; 1095 1096 /* Start all child servers initially. */ 1097 for (i = 0; i < nsd->child_count; ++i) { 1098 nsd->children[i].pid = 0; 1099 } 1100 1101 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1102 } 1103 1104 void 1105 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1106 { 1107 size_t i; 1108 1109 /* Close all the sockets... */ 1110 for (i = 0; i < n; ++i) { 1111 if (sockets[i].s != -1) { 1112 close(sockets[i].s); 1113 if(sockets[i].addr) 1114 freeaddrinfo(sockets[i].addr); 1115 sockets[i].s = -1; 1116 } 1117 } 1118 } 1119 1120 /* 1121 * Close the sockets, shutdown the server and exit. 1122 * Does not return. 1123 * 1124 */ 1125 void 1126 server_shutdown(struct nsd *nsd) 1127 { 1128 size_t i; 1129 1130 server_close_all_sockets(nsd->udp, nsd->ifs); 1131 server_close_all_sockets(nsd->tcp, nsd->ifs); 1132 /* CHILD: close command channel to parent */ 1133 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1134 { 1135 close(nsd->this_child->parent_fd); 1136 nsd->this_child->parent_fd = -1; 1137 } 1138 /* SERVER: close command channels to children */ 1139 if(!nsd->this_child) 1140 { 1141 for(i=0; i < nsd->child_count; ++i) 1142 if(nsd->children[i].child_fd != -1) 1143 { 1144 close(nsd->children[i].child_fd); 1145 nsd->children[i].child_fd = -1; 1146 } 1147 } 1148 1149 tsig_finalize(); 1150 #ifdef HAVE_SSL 1151 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1152 #endif 1153 1154 #ifdef MEMCLEAN /* OS collects memory pages */ 1155 #ifdef RATELIMIT 1156 rrl_mmap_deinit_keep_mmap(); 1157 #endif 1158 #ifdef USE_DNSTAP 1159 dt_collector_destroy(nsd->dt_collector, nsd); 1160 #endif 1161 udb_base_free_keep_mmap(nsd->task[0]); 1162 udb_base_free_keep_mmap(nsd->task[1]); 1163 namedb_close_udb(nsd->db); /* keeps mmap */ 1164 namedb_close(nsd->db); 1165 nsd_options_destroy(nsd->options); 1166 region_destroy(nsd->region); 1167 #endif 1168 log_finalize(); 1169 exit(0); 1170 } 1171 1172 void 1173 server_prepare_xfrd(struct nsd* nsd) 1174 { 1175 char tmpfile[256]; 1176 /* create task mmaps */ 1177 nsd->mytask = 0; 1178 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1179 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1180 nsd->task[0] = task_file_create(tmpfile); 1181 if(!nsd->task[0]) { 1182 #ifdef USE_ZONE_STATS 1183 unlink(nsd->zonestatfname[0]); 1184 unlink(nsd->zonestatfname[1]); 1185 #endif 1186 xfrd_del_tempdir(nsd); 1187 exit(1); 1188 } 1189 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1190 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1191 nsd->task[1] = task_file_create(tmpfile); 1192 if(!nsd->task[1]) { 1193 unlink(nsd->task[0]->fname); 1194 #ifdef USE_ZONE_STATS 1195 unlink(nsd->zonestatfname[0]); 1196 unlink(nsd->zonestatfname[1]); 1197 #endif 1198 xfrd_del_tempdir(nsd); 1199 exit(1); 1200 } 1201 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1202 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1203 /* create xfrd listener structure */ 1204 nsd->xfrd_listener = region_alloc(nsd->region, 1205 sizeof(netio_handler_type)); 1206 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1207 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1208 nsd->xfrd_listener->fd = -1; 1209 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1210 nsd; 1211 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1212 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1213 } 1214 1215 1216 void 1217 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1218 { 1219 pid_t pid; 1220 int sockets[2] = {0,0}; 1221 struct ipc_handler_conn_data *data; 1222 1223 if(nsd->xfrd_listener->fd != -1) 1224 close(nsd->xfrd_listener->fd); 1225 if(del_db) { 1226 /* recreate taskdb that xfrd was using, it may be corrupt */ 1227 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1228 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1229 nsd->task[1-nsd->mytask]->fname = NULL; 1230 /* free alloc already, so udb does not shrink itself */ 1231 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1232 nsd->task[1-nsd->mytask]->alloc = NULL; 1233 udb_base_free(nsd->task[1-nsd->mytask]); 1234 /* create new file, overwrite the old one */ 1235 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1236 free(tmpfile); 1237 } 1238 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1239 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1240 return; 1241 } 1242 pid = fork(); 1243 switch (pid) { 1244 case -1: 1245 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1246 break; 1247 default: 1248 /* PARENT: close first socket, use second one */ 1249 close(sockets[0]); 1250 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1251 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1252 } 1253 if(del_db) xfrd_free_namedb(nsd); 1254 /* use other task than I am using, since if xfrd died and is 1255 * restarted, the reload is using nsd->mytask */ 1256 nsd->mytask = 1 - nsd->mytask; 1257 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1258 /* ENOTREACH */ 1259 break; 1260 case 0: 1261 /* CHILD: close second socket, use first one */ 1262 close(sockets[1]); 1263 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1264 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1265 } 1266 nsd->xfrd_listener->fd = sockets[0]; 1267 break; 1268 } 1269 /* server-parent only */ 1270 nsd->xfrd_listener->timeout = NULL; 1271 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1272 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1273 /* clear ongoing ipc reads */ 1274 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1275 data->conn->is_reading = 0; 1276 } 1277 1278 /** add all soainfo to taskdb */ 1279 static void 1280 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1281 { 1282 struct radnode* n; 1283 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1284 /* add all SOA INFO to mytask */ 1285 udb_ptr_init(&task_last, taskudb); 1286 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1287 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1288 } 1289 udb_ptr_unlink(&task_last, taskudb); 1290 } 1291 1292 void 1293 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1294 { 1295 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1296 * parent fills one taskdb with soas, xfrd fills other with expires. 1297 * then they exchange and process. 1298 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1299 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1300 * expire notifications can be sent back via a normal reload later 1301 * (xfrd will wait for current running reload to finish if any). 1302 */ 1303 sig_atomic_t cmd = 0; 1304 pid_t mypid; 1305 int xfrd_sock = nsd->xfrd_listener->fd; 1306 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1307 udb_ptr t; 1308 if(!shortsoa) { 1309 if(nsd->signal_hint_shutdown) { 1310 shutdown: 1311 log_msg(LOG_WARNING, "signal received, shutting down..."); 1312 server_close_all_sockets(nsd->udp, nsd->ifs); 1313 server_close_all_sockets(nsd->tcp, nsd->ifs); 1314 #ifdef HAVE_SSL 1315 daemon_remote_close(nsd->rc); 1316 #endif 1317 /* Unlink it if possible... */ 1318 unlinkpid(nsd->pidfile); 1319 unlink(nsd->task[0]->fname); 1320 unlink(nsd->task[1]->fname); 1321 #ifdef USE_ZONE_STATS 1322 unlink(nsd->zonestatfname[0]); 1323 unlink(nsd->zonestatfname[1]); 1324 #endif 1325 /* write the nsd.db to disk, wait for it to complete */ 1326 udb_base_sync(nsd->db->udb, 1); 1327 udb_base_close(nsd->db->udb); 1328 server_shutdown(nsd); 1329 exit(0); 1330 } 1331 } 1332 if(shortsoa) { 1333 /* put SOA in xfrd task because mytask may be in use */ 1334 taskudb = nsd->task[1-nsd->mytask]; 1335 } 1336 1337 add_all_soa_to_task(nsd, taskudb); 1338 if(!shortsoa) { 1339 /* wait for xfrd to signal task is ready, RELOAD signal */ 1340 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1341 cmd != NSD_RELOAD) { 1342 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1343 exit(1); 1344 } 1345 if(nsd->signal_hint_shutdown) { 1346 goto shutdown; 1347 } 1348 } 1349 /* give xfrd our task, signal it with RELOAD_DONE */ 1350 task_process_sync(taskudb); 1351 cmd = NSD_RELOAD_DONE; 1352 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1353 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1354 (int)nsd->pid, strerror(errno)); 1355 } 1356 mypid = getpid(); 1357 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1358 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1359 strerror(errno)); 1360 } 1361 1362 if(!shortsoa) { 1363 /* process the xfrd task works (expiry data) */ 1364 nsd->mytask = 1 - nsd->mytask; 1365 taskudb = nsd->task[nsd->mytask]; 1366 task_remap(taskudb); 1367 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1368 while(!udb_ptr_is_null(&t)) { 1369 task_process_expire(nsd->db, TASKLIST(&t)); 1370 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1371 } 1372 udb_ptr_unlink(&t, taskudb); 1373 task_clear(taskudb); 1374 1375 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1376 cmd = NSD_RELOAD_DONE; 1377 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1378 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1379 (int)nsd->pid, strerror(errno)); 1380 } 1381 } 1382 } 1383 1384 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 1385 ssize_t 1386 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 1387 { 1388 uint8_t* buf = (uint8_t*) p; 1389 ssize_t total = 0; 1390 struct pollfd fd; 1391 memset(&fd, 0, sizeof(fd)); 1392 fd.fd = s; 1393 fd.events = POLLIN; 1394 1395 while( total < sz) { 1396 ssize_t ret; 1397 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 1398 if(ret == -1) { 1399 if(errno == EAGAIN) 1400 /* blocking read */ 1401 continue; 1402 if(errno == EINTR) { 1403 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1404 return -1; 1405 /* other signals can be handled later */ 1406 continue; 1407 } 1408 /* some error */ 1409 return -1; 1410 } 1411 if(ret == 0) { 1412 /* operation timed out */ 1413 return -2; 1414 } 1415 ret = read(s, buf+total, sz-total); 1416 if(ret == -1) { 1417 if(errno == EAGAIN) 1418 /* blocking read */ 1419 continue; 1420 if(errno == EINTR) { 1421 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1422 return -1; 1423 /* other signals can be handled later */ 1424 continue; 1425 } 1426 /* some error */ 1427 return -1; 1428 } 1429 if(ret == 0) { 1430 /* closed connection! */ 1431 return 0; 1432 } 1433 total += ret; 1434 } 1435 return total; 1436 } 1437 1438 static void 1439 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 1440 { 1441 sig_atomic_t cmd = NSD_QUIT_SYNC; 1442 udb_ptr t, next; 1443 udb_base* u = nsd->task[nsd->mytask]; 1444 udb_ptr_init(&next, u); 1445 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 1446 udb_base_set_userdata(u, 0); 1447 while(!udb_ptr_is_null(&t)) { 1448 /* store next in list so this one can be deleted or reused */ 1449 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 1450 udb_rptr_zero(&TASKLIST(&t)->next, u); 1451 1452 /* process task t */ 1453 /* append results for task t and update last_task */ 1454 task_process_in_reload(nsd, u, last_task, &t); 1455 1456 /* go to next */ 1457 udb_ptr_set_ptr(&t, u, &next); 1458 1459 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1460 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1461 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1462 if(cmd == NSD_QUIT) { 1463 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1464 /* sync to disk (if needed) */ 1465 udb_base_sync(nsd->db->udb, 0); 1466 /* unlink files of remainder of tasks */ 1467 while(!udb_ptr_is_null(&t)) { 1468 if(TASKLIST(&t)->task_type == task_apply_xfr) { 1469 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 1470 } 1471 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 1472 } 1473 udb_ptr_unlink(&t, u); 1474 udb_ptr_unlink(&next, u); 1475 exit(0); 1476 } 1477 } 1478 1479 } 1480 udb_ptr_unlink(&t, u); 1481 udb_ptr_unlink(&next, u); 1482 } 1483 1484 #ifdef BIND8_STATS 1485 static void 1486 parent_send_stats(struct nsd* nsd, int cmdfd) 1487 { 1488 size_t i; 1489 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 1490 log_msg(LOG_ERR, "could not write stats to reload"); 1491 return; 1492 } 1493 for(i=0; i<nsd->child_count; i++) 1494 if(!write_socket(cmdfd, &nsd->children[i].query_count, 1495 sizeof(stc_type))) { 1496 log_msg(LOG_ERR, "could not write stats to reload"); 1497 return; 1498 } 1499 } 1500 1501 static void 1502 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 1503 { 1504 struct nsdst s; 1505 stc_type* p; 1506 size_t i; 1507 if(block_read(nsd, cmdfd, &s, sizeof(s), 1508 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 1509 log_msg(LOG_ERR, "could not read stats from oldpar"); 1510 return; 1511 } 1512 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 1513 s.db_mem = region_get_mem(nsd->db->region); 1514 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 1515 nsd->child_count); 1516 if(!p) return; 1517 for(i=0; i<nsd->child_count; i++) { 1518 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 1519 sizeof(stc_type)) 1520 return; 1521 } 1522 } 1523 #endif /* BIND8_STATS */ 1524 1525 /* 1526 * Reload the database, stop parent, re-fork children and continue. 1527 * as server_main. 1528 */ 1529 static void 1530 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 1531 int cmdsocket) 1532 { 1533 pid_t mypid; 1534 sig_atomic_t cmd = NSD_QUIT_SYNC; 1535 int ret; 1536 udb_ptr last_task; 1537 struct sigaction old_sigchld, ign_sigchld; 1538 /* ignore SIGCHLD from the previous server_main that used this pid */ 1539 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 1540 ign_sigchld.sa_handler = SIG_IGN; 1541 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 1542 1543 /* see what tasks we got from xfrd */ 1544 task_remap(nsd->task[nsd->mytask]); 1545 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 1546 udb_compact_inhibited(nsd->db->udb, 1); 1547 reload_process_tasks(nsd, &last_task, cmdsocket); 1548 udb_compact_inhibited(nsd->db->udb, 0); 1549 udb_compact(nsd->db->udb); 1550 1551 #ifndef NDEBUG 1552 if(nsd_debug_level >= 1) 1553 region_log_stats(nsd->db->region); 1554 #endif /* NDEBUG */ 1555 /* sync to disk (if needed) */ 1556 udb_base_sync(nsd->db->udb, 0); 1557 1558 initialize_dname_compression_tables(nsd); 1559 1560 #ifdef BIND8_STATS 1561 /* Restart dumping stats if required. */ 1562 time(&nsd->st.boot); 1563 set_bind8_alarm(nsd); 1564 #endif 1565 #ifdef USE_ZONE_STATS 1566 server_zonestat_realloc(nsd); /* realloc for new children */ 1567 server_zonestat_switch(nsd); 1568 #endif 1569 1570 /* listen for the signals of failed children again */ 1571 sigaction(SIGCHLD, &old_sigchld, NULL); 1572 /* Start new child processes */ 1573 if (server_start_children(nsd, server_region, netio, &nsd-> 1574 xfrd_listener->fd) != 0) { 1575 send_children_quit(nsd); 1576 exit(1); 1577 } 1578 1579 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1580 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1581 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1582 if(cmd == NSD_QUIT) { 1583 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1584 send_children_quit(nsd); 1585 exit(0); 1586 } 1587 } 1588 1589 /* Send quit command to parent: blocking, wait for receipt. */ 1590 do { 1591 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 1592 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 1593 { 1594 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 1595 strerror(errno)); 1596 } 1597 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 1598 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 1599 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 1600 RELOAD_SYNC_TIMEOUT); 1601 if(ret == -2) { 1602 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 1603 } 1604 } while (ret == -2); 1605 if(ret == -1) { 1606 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 1607 strerror(errno)); 1608 } 1609 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 1610 if(cmd == NSD_QUIT) { 1611 /* small race condition possible here, parent got quit cmd. */ 1612 send_children_quit(nsd); 1613 exit(1); 1614 } 1615 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 1616 #ifdef BIND8_STATS 1617 reload_do_stats(cmdsocket, nsd, &last_task); 1618 #endif 1619 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 1620 task_process_sync(nsd->task[nsd->mytask]); 1621 #ifdef USE_ZONE_STATS 1622 server_zonestat_realloc(nsd); /* realloc for next children */ 1623 #endif 1624 1625 /* send soainfo to the xfrd process, signal it that reload is done, 1626 * it picks up the taskudb */ 1627 cmd = NSD_RELOAD_DONE; 1628 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 1629 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 1630 strerror(errno)); 1631 } 1632 mypid = getpid(); 1633 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1634 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1635 strerror(errno)); 1636 } 1637 1638 /* try to reopen file */ 1639 if (nsd->file_rotation_ok) 1640 log_reopen(nsd->log_filename, 1); 1641 /* exit reload, continue as new server_main */ 1642 } 1643 1644 /* 1645 * Get the mode depending on the signal hints that have been received. 1646 * Multiple signal hints can be received and will be handled in turn. 1647 */ 1648 static sig_atomic_t 1649 server_signal_mode(struct nsd *nsd) 1650 { 1651 if(nsd->signal_hint_quit) { 1652 nsd->signal_hint_quit = 0; 1653 return NSD_QUIT; 1654 } 1655 else if(nsd->signal_hint_shutdown) { 1656 nsd->signal_hint_shutdown = 0; 1657 return NSD_SHUTDOWN; 1658 } 1659 else if(nsd->signal_hint_child) { 1660 nsd->signal_hint_child = 0; 1661 return NSD_REAP_CHILDREN; 1662 } 1663 else if(nsd->signal_hint_reload) { 1664 nsd->signal_hint_reload = 0; 1665 return NSD_RELOAD; 1666 } 1667 else if(nsd->signal_hint_reload_hup) { 1668 nsd->signal_hint_reload_hup = 0; 1669 return NSD_RELOAD_REQ; 1670 } 1671 else if(nsd->signal_hint_stats) { 1672 nsd->signal_hint_stats = 0; 1673 #ifdef BIND8_STATS 1674 set_bind8_alarm(nsd); 1675 #endif 1676 return NSD_STATS; 1677 } 1678 else if(nsd->signal_hint_statsusr) { 1679 nsd->signal_hint_statsusr = 0; 1680 return NSD_STATS; 1681 } 1682 return NSD_RUN; 1683 } 1684 1685 /* 1686 * The main server simply waits for signals and child processes to 1687 * terminate. Child processes are restarted as necessary. 1688 */ 1689 void 1690 server_main(struct nsd *nsd) 1691 { 1692 region_type *server_region = region_create(xalloc, free); 1693 netio_type *netio = netio_create(server_region); 1694 netio_handler_type reload_listener; 1695 int reload_sockets[2] = {-1, -1}; 1696 struct timespec timeout_spec; 1697 int status; 1698 pid_t child_pid; 1699 pid_t reload_pid = -1; 1700 sig_atomic_t mode; 1701 1702 /* Ensure we are the main process */ 1703 assert(nsd->server_kind == NSD_SERVER_MAIN); 1704 1705 /* Add listener for the XFRD process */ 1706 netio_add_handler(netio, nsd->xfrd_listener); 1707 1708 /* Start the child processes that handle incoming queries */ 1709 if (server_start_children(nsd, server_region, netio, 1710 &nsd->xfrd_listener->fd) != 0) { 1711 send_children_quit(nsd); 1712 exit(1); 1713 } 1714 reload_listener.fd = -1; 1715 1716 /* This_child MUST be 0, because this is the parent process */ 1717 assert(nsd->this_child == 0); 1718 1719 /* Run the server until we get a shutdown signal */ 1720 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 1721 /* Did we receive a signal that changes our mode? */ 1722 if(mode == NSD_RUN) { 1723 nsd->mode = mode = server_signal_mode(nsd); 1724 } 1725 1726 switch (mode) { 1727 case NSD_RUN: 1728 /* see if any child processes terminated */ 1729 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 1730 int is_child = delete_child_pid(nsd, child_pid); 1731 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 1732 if(nsd->children[is_child].child_fd == -1) 1733 nsd->children[is_child].has_exited = 1; 1734 parent_check_all_children_exited(nsd); 1735 } else if(is_child != -1) { 1736 log_msg(LOG_WARNING, 1737 "server %d died unexpectedly with status %d, restarting", 1738 (int) child_pid, status); 1739 restart_child_servers(nsd, server_region, netio, 1740 &nsd->xfrd_listener->fd); 1741 } else if (child_pid == reload_pid) { 1742 sig_atomic_t cmd = NSD_RELOAD_DONE; 1743 pid_t mypid; 1744 log_msg(LOG_WARNING, 1745 "Reload process %d failed with status %d, continuing with old database", 1746 (int) child_pid, status); 1747 reload_pid = -1; 1748 if(reload_listener.fd != -1) close(reload_listener.fd); 1749 reload_listener.fd = -1; 1750 reload_listener.event_types = NETIO_EVENT_NONE; 1751 task_process_sync(nsd->task[nsd->mytask]); 1752 /* inform xfrd reload attempt ended */ 1753 if(!write_socket(nsd->xfrd_listener->fd, 1754 &cmd, sizeof(cmd))) { 1755 log_msg(LOG_ERR, "problems " 1756 "sending SOAEND to xfrd: %s", 1757 strerror(errno)); 1758 } 1759 mypid = getpid(); 1760 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1761 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1762 strerror(errno)); 1763 } 1764 } else if(status != 0) { 1765 /* check for status, because we get 1766 * the old-servermain because reload 1767 * is the process-parent of old-main, 1768 * and we get older server-processes 1769 * that are exiting after a reload */ 1770 log_msg(LOG_WARNING, 1771 "process %d terminated with status %d", 1772 (int) child_pid, status); 1773 } 1774 } 1775 if (child_pid == -1) { 1776 if (errno == EINTR) { 1777 continue; 1778 } 1779 if (errno != ECHILD) 1780 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 1781 } 1782 if (nsd->mode != NSD_RUN) 1783 break; 1784 1785 /* timeout to collect processes. In case no sigchild happens. */ 1786 timeout_spec.tv_sec = 60; 1787 timeout_spec.tv_nsec = 0; 1788 1789 /* listen on ports, timeout for collecting terminated children */ 1790 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 1791 if (errno != EINTR) { 1792 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 1793 } 1794 } 1795 if(nsd->restart_children) { 1796 restart_child_servers(nsd, server_region, netio, 1797 &nsd->xfrd_listener->fd); 1798 nsd->restart_children = 0; 1799 } 1800 if(nsd->reload_failed) { 1801 sig_atomic_t cmd = NSD_RELOAD_DONE; 1802 pid_t mypid; 1803 nsd->reload_failed = 0; 1804 log_msg(LOG_WARNING, 1805 "Reload process %d failed, continuing with old database", 1806 (int) reload_pid); 1807 reload_pid = -1; 1808 if(reload_listener.fd != -1) close(reload_listener.fd); 1809 reload_listener.fd = -1; 1810 reload_listener.event_types = NETIO_EVENT_NONE; 1811 task_process_sync(nsd->task[nsd->mytask]); 1812 /* inform xfrd reload attempt ended */ 1813 if(!write_socket(nsd->xfrd_listener->fd, 1814 &cmd, sizeof(cmd))) { 1815 log_msg(LOG_ERR, "problems " 1816 "sending SOAEND to xfrd: %s", 1817 strerror(errno)); 1818 } 1819 mypid = getpid(); 1820 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1821 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1822 strerror(errno)); 1823 } 1824 } 1825 1826 break; 1827 case NSD_RELOAD_REQ: { 1828 sig_atomic_t cmd = NSD_RELOAD_REQ; 1829 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 1830 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1831 "main: ipc send reload_req to xfrd")); 1832 if(!write_socket(nsd->xfrd_listener->fd, 1833 &cmd, sizeof(cmd))) { 1834 log_msg(LOG_ERR, "server_main: could not send " 1835 "reload_req to xfrd: %s", strerror(errno)); 1836 } 1837 nsd->mode = NSD_RUN; 1838 } break; 1839 case NSD_RELOAD: 1840 /* Continue to run nsd after reload */ 1841 nsd->mode = NSD_RUN; 1842 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 1843 if (reload_pid != -1) { 1844 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 1845 (int) reload_pid); 1846 break; 1847 } 1848 1849 /* switch the mytask to keep track of who owns task*/ 1850 nsd->mytask = 1 - nsd->mytask; 1851 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 1852 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 1853 reload_pid = -1; 1854 break; 1855 } 1856 1857 /* Do actual reload */ 1858 reload_pid = fork(); 1859 switch (reload_pid) { 1860 case -1: 1861 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 1862 break; 1863 default: 1864 /* PARENT */ 1865 close(reload_sockets[0]); 1866 server_reload(nsd, server_region, netio, 1867 reload_sockets[1]); 1868 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 1869 close(reload_sockets[1]); 1870 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 1871 /* drop stale xfrd ipc data */ 1872 ((struct ipc_handler_conn_data*)nsd-> 1873 xfrd_listener->user_data) 1874 ->conn->is_reading = 0; 1875 reload_pid = -1; 1876 reload_listener.fd = -1; 1877 reload_listener.event_types = NETIO_EVENT_NONE; 1878 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 1879 break; 1880 case 0: 1881 /* CHILD */ 1882 /* server_main keep running until NSD_QUIT_SYNC 1883 * received from reload. */ 1884 close(reload_sockets[1]); 1885 reload_listener.fd = reload_sockets[0]; 1886 reload_listener.timeout = NULL; 1887 reload_listener.user_data = nsd; 1888 reload_listener.event_types = NETIO_EVENT_READ; 1889 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 1890 netio_add_handler(netio, &reload_listener); 1891 reload_pid = getppid(); 1892 break; 1893 } 1894 break; 1895 case NSD_QUIT_SYNC: 1896 /* synchronisation of xfrd, parent and reload */ 1897 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 1898 sig_atomic_t cmd = NSD_RELOAD; 1899 /* stop xfrd ipc writes in progress */ 1900 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1901 "main: ipc send indication reload")); 1902 if(!write_socket(nsd->xfrd_listener->fd, 1903 &cmd, sizeof(cmd))) { 1904 log_msg(LOG_ERR, "server_main: could not send reload " 1905 "indication to xfrd: %s", strerror(errno)); 1906 } 1907 /* wait for ACK from xfrd */ 1908 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 1909 nsd->quit_sync_done = 1; 1910 } 1911 nsd->mode = NSD_RUN; 1912 break; 1913 case NSD_QUIT: 1914 /* silent shutdown during reload */ 1915 if(reload_listener.fd != -1) { 1916 /* acknowledge the quit, to sync reload that we will really quit now */ 1917 sig_atomic_t cmd = NSD_RELOAD; 1918 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 1919 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1920 log_msg(LOG_ERR, "server_main: " 1921 "could not ack quit: %s", strerror(errno)); 1922 } 1923 #ifdef BIND8_STATS 1924 parent_send_stats(nsd, reload_listener.fd); 1925 #endif /* BIND8_STATS */ 1926 close(reload_listener.fd); 1927 } 1928 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 1929 /* only quit children after xfrd has acked */ 1930 send_children_quit(nsd); 1931 1932 #ifdef MEMCLEAN /* OS collects memory pages */ 1933 region_destroy(server_region); 1934 #endif 1935 server_shutdown(nsd); 1936 1937 /* ENOTREACH */ 1938 break; 1939 case NSD_SHUTDOWN: 1940 break; 1941 case NSD_REAP_CHILDREN: 1942 /* continue; wait for child in run loop */ 1943 nsd->mode = NSD_RUN; 1944 break; 1945 case NSD_STATS: 1946 #ifdef BIND8_STATS 1947 set_children_stats(nsd); 1948 #endif 1949 nsd->mode = NSD_RUN; 1950 break; 1951 default: 1952 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 1953 nsd->mode = NSD_RUN; 1954 break; 1955 } 1956 } 1957 log_msg(LOG_WARNING, "signal received, shutting down..."); 1958 1959 /* close opened ports to avoid race with restart of nsd */ 1960 server_close_all_sockets(nsd->udp, nsd->ifs); 1961 server_close_all_sockets(nsd->tcp, nsd->ifs); 1962 #ifdef HAVE_SSL 1963 daemon_remote_close(nsd->rc); 1964 #endif 1965 send_children_quit_and_wait(nsd); 1966 1967 /* Unlink it if possible... */ 1968 unlinkpid(nsd->pidfile); 1969 unlink(nsd->task[0]->fname); 1970 unlink(nsd->task[1]->fname); 1971 #ifdef USE_ZONE_STATS 1972 unlink(nsd->zonestatfname[0]); 1973 unlink(nsd->zonestatfname[1]); 1974 #endif 1975 #ifdef USE_DNSTAP 1976 dt_collector_close(nsd->dt_collector, nsd); 1977 #endif 1978 1979 if(reload_listener.fd != -1) { 1980 sig_atomic_t cmd = NSD_QUIT; 1981 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1982 "main: ipc send quit to reload-process")); 1983 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1984 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 1985 strerror(errno)); 1986 } 1987 fsync(reload_listener.fd); 1988 close(reload_listener.fd); 1989 /* wait for reload to finish processing */ 1990 while(1) { 1991 if(waitpid(reload_pid, NULL, 0) == -1) { 1992 if(errno == EINTR) continue; 1993 if(errno == ECHILD) break; 1994 log_msg(LOG_ERR, "waitpid(reload %d): %s", 1995 (int)reload_pid, strerror(errno)); 1996 } 1997 break; 1998 } 1999 } 2000 if(nsd->xfrd_listener->fd != -1) { 2001 /* complete quit, stop xfrd */ 2002 sig_atomic_t cmd = NSD_QUIT; 2003 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2004 "main: ipc send quit to xfrd")); 2005 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2006 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2007 strerror(errno)); 2008 } 2009 fsync(nsd->xfrd_listener->fd); 2010 close(nsd->xfrd_listener->fd); 2011 (void)kill(nsd->pid, SIGTERM); 2012 } 2013 2014 #ifdef MEMCLEAN /* OS collects memory pages */ 2015 region_destroy(server_region); 2016 #endif 2017 /* write the nsd.db to disk, wait for it to complete */ 2018 udb_base_sync(nsd->db->udb, 1); 2019 udb_base_close(nsd->db->udb); 2020 server_shutdown(nsd); 2021 } 2022 2023 static query_state_type 2024 server_process_query(struct nsd *nsd, struct query *query) 2025 { 2026 return query_process(query, nsd); 2027 } 2028 2029 static query_state_type 2030 server_process_query_udp(struct nsd *nsd, struct query *query) 2031 { 2032 #ifdef RATELIMIT 2033 if(query_process(query, nsd) != QUERY_DISCARDED) { 2034 if(rrl_process_query(query)) 2035 return rrl_slip(query); 2036 else return QUERY_PROCESSED; 2037 } 2038 return QUERY_DISCARDED; 2039 #else 2040 return query_process(query, nsd); 2041 #endif 2042 } 2043 2044 struct event_base* 2045 nsd_child_event_base(void) 2046 { 2047 struct event_base* base; 2048 #ifdef USE_MINI_EVENT 2049 static time_t secs; 2050 static struct timeval now; 2051 base = event_init(&secs, &now); 2052 #else 2053 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2054 /* libev */ 2055 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2056 # else 2057 /* libevent */ 2058 # ifdef HAVE_EVENT_BASE_NEW 2059 base = event_base_new(); 2060 # else 2061 base = event_init(); 2062 # endif 2063 # endif 2064 #endif 2065 return base; 2066 } 2067 2068 /* 2069 * Serve DNS requests. 2070 */ 2071 void 2072 server_child(struct nsd *nsd) 2073 { 2074 size_t i, from, numifs; 2075 region_type *server_region = region_create(xalloc, free); 2076 struct event_base* event_base = nsd_child_event_base(); 2077 query_type *udp_query; 2078 sig_atomic_t mode; 2079 2080 if(!event_base) { 2081 log_msg(LOG_ERR, "nsd server could not create event base"); 2082 exit(1); 2083 } 2084 nsd->event_base = event_base; 2085 nsd->server_region = server_region; 2086 2087 #ifdef RATELIMIT 2088 rrl_init(nsd->this_child->child_num); 2089 #endif 2090 2091 assert(nsd->server_kind != NSD_SERVER_MAIN); 2092 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 2093 2094 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 2095 server_close_all_sockets(nsd->tcp, nsd->ifs); 2096 } 2097 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 2098 server_close_all_sockets(nsd->udp, nsd->ifs); 2099 } 2100 2101 if (nsd->this_child->parent_fd != -1) { 2102 struct event *handler; 2103 struct ipc_handler_conn_data* user_data = 2104 (struct ipc_handler_conn_data*)region_alloc( 2105 server_region, sizeof(struct ipc_handler_conn_data)); 2106 user_data->nsd = nsd; 2107 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 2108 2109 handler = (struct event*) region_alloc( 2110 server_region, sizeof(*handler)); 2111 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 2112 EV_READ, child_handle_parent_command, user_data); 2113 if(event_base_set(event_base, handler) != 0) 2114 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 2115 if(event_add(handler, NULL) != 0) 2116 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 2117 } 2118 2119 if(nsd->reuseport) { 2120 numifs = nsd->ifs / nsd->reuseport; 2121 from = numifs * nsd->this_child->child_num; 2122 if(from+numifs > nsd->ifs) { /* should not happen */ 2123 from = 0; 2124 numifs = nsd->ifs; 2125 } 2126 } else { 2127 from = 0; 2128 numifs = nsd->ifs; 2129 } 2130 2131 if (nsd->server_kind & NSD_SERVER_UDP) { 2132 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2133 udp_query = query_create(server_region, 2134 compressed_dname_offsets, compression_table_size, 2135 compressed_dnames); 2136 #else 2137 udp_query = NULL; 2138 memset(msgs, 0, sizeof(msgs)); 2139 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 2140 queries[i] = query_create(server_region, 2141 compressed_dname_offsets, 2142 compression_table_size, compressed_dnames); 2143 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2144 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 2145 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; 2146 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2147 msgs[i].msg_hdr.msg_iovlen = 1; 2148 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 2149 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2150 } 2151 #endif 2152 for (i = from; i < from+numifs; ++i) { 2153 struct udp_handler_data *data; 2154 struct event *handler; 2155 2156 data = (struct udp_handler_data *) region_alloc( 2157 server_region, 2158 sizeof(struct udp_handler_data)); 2159 data->query = udp_query; 2160 data->nsd = nsd; 2161 data->socket = &nsd->udp[i]; 2162 2163 handler = (struct event*) region_alloc( 2164 server_region, sizeof(*handler)); 2165 event_set(handler, nsd->udp[i].s, EV_PERSIST|EV_READ, 2166 handle_udp, data); 2167 if(event_base_set(event_base, handler) != 0) 2168 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2169 if(event_add(handler, NULL) != 0) 2170 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2171 } 2172 } 2173 2174 /* 2175 * Keep track of all the TCP accept handlers so we can enable 2176 * and disable them based on the current number of active TCP 2177 * connections. 2178 */ 2179 tcp_accept_handler_count = numifs; 2180 tcp_accept_handlers = (struct tcp_accept_handler_data*) 2181 region_alloc_array(server_region, 2182 numifs, sizeof(*tcp_accept_handlers)); 2183 if (nsd->server_kind & NSD_SERVER_TCP) { 2184 for (i = from; i < numifs; ++i) { 2185 struct event *handler = &tcp_accept_handlers[i-from].event; 2186 struct tcp_accept_handler_data* data = 2187 &tcp_accept_handlers[i-from]; 2188 data->nsd = nsd; 2189 data->socket = &nsd->tcp[i]; 2190 event_set(handler, nsd->tcp[i].s, EV_PERSIST|EV_READ, 2191 handle_tcp_accept, data); 2192 if(event_base_set(event_base, handler) != 0) 2193 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 2194 if(event_add(handler, NULL) != 0) 2195 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 2196 data->event_added = 1; 2197 } 2198 } else tcp_accept_handler_count = 0; 2199 2200 /* The main loop... */ 2201 while ((mode = nsd->mode) != NSD_QUIT) { 2202 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 2203 2204 /* Do we need to do the statistics... */ 2205 if (mode == NSD_STATS) { 2206 #ifdef BIND8_STATS 2207 int p = nsd->st.period; 2208 nsd->st.period = 1; /* force stats printout */ 2209 /* Dump the statistics */ 2210 bind8_stats(nsd); 2211 nsd->st.period = p; 2212 #else /* !BIND8_STATS */ 2213 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 2214 #endif /* BIND8_STATS */ 2215 2216 nsd->mode = NSD_RUN; 2217 } 2218 else if (mode == NSD_REAP_CHILDREN) { 2219 /* got signal, notify parent. parent reaps terminated children. */ 2220 if (nsd->this_child->parent_fd != -1) { 2221 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 2222 if (write(nsd->this_child->parent_fd, 2223 &parent_notify, 2224 sizeof(parent_notify)) == -1) 2225 { 2226 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 2227 (int) nsd->this_child->pid, strerror(errno)); 2228 } 2229 } else /* no parent, so reap 'em */ 2230 while (waitpid(-1, NULL, WNOHANG) > 0) ; 2231 nsd->mode = NSD_RUN; 2232 } 2233 else if(mode == NSD_RUN) { 2234 /* Wait for a query... */ 2235 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2236 if (errno != EINTR) { 2237 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2238 break; 2239 } 2240 } 2241 } else if(mode == NSD_QUIT) { 2242 /* ignore here, quit */ 2243 } else { 2244 log_msg(LOG_ERR, "mode bad value %d, back to service.", 2245 (int)mode); 2246 nsd->mode = NSD_RUN; 2247 } 2248 } 2249 2250 #ifdef BIND8_STATS 2251 bind8_stats(nsd); 2252 #endif /* BIND8_STATS */ 2253 2254 #ifdef MEMCLEAN /* OS collects memory pages */ 2255 #ifdef RATELIMIT 2256 rrl_deinit(nsd->this_child->child_num); 2257 #endif 2258 event_base_free(event_base); 2259 region_destroy(server_region); 2260 #endif 2261 server_shutdown(nsd); 2262 } 2263 2264 #if defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) 2265 static void 2266 handle_udp(int fd, short event, void* arg) 2267 { 2268 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2269 int received, sent, recvcount, i; 2270 struct query *q; 2271 2272 if (!(event & EV_READ)) { 2273 return; 2274 } 2275 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2276 /* this printf strangely gave a performance increase on Linux */ 2277 /* printf("recvcount %d \n", recvcount); */ 2278 if (recvcount == -1) { 2279 if (errno != EAGAIN && errno != EINTR) { 2280 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2281 STATUP(data->nsd, rxerr); 2282 /* No zone statup */ 2283 } 2284 /* Simply no data available */ 2285 return; 2286 } 2287 for (i = 0; i < recvcount; i++) { 2288 loopstart: 2289 received = msgs[i].msg_len; 2290 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 2291 q = queries[i]; 2292 if (received == -1) { 2293 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 2294 msgs[i].msg_hdr.msg_flags)); 2295 STATUP(data->nsd, rxerr); 2296 /* No zone statup */ 2297 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2298 iovecs[i].iov_len = buffer_remaining(q->packet); 2299 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2300 goto swap_drop; 2301 } 2302 2303 /* Account... */ 2304 #ifdef BIND8_STATS 2305 if (data->socket->fam == AF_INET) { 2306 STATUP(data->nsd, qudp); 2307 } else if (data->socket->fam == AF_INET6) { 2308 STATUP(data->nsd, qudp6); 2309 } 2310 #endif 2311 2312 buffer_skip(q->packet, received); 2313 buffer_flip(q->packet); 2314 #ifdef USE_DNSTAP 2315 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 2316 q->tcp, q->packet); 2317 #endif /* USE_DNSTAP */ 2318 2319 /* Process and answer the query... */ 2320 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2321 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2322 STATUP(data->nsd, nona); 2323 ZTATUP(data->nsd, q->zone, nona); 2324 } 2325 2326 #ifdef USE_ZONE_STATS 2327 if (data->socket->fam == AF_INET) { 2328 ZTATUP(data->nsd, q->zone, qudp); 2329 } else if (data->socket->fam == AF_INET6) { 2330 ZTATUP(data->nsd, q->zone, qudp6); 2331 } 2332 #endif 2333 2334 /* Add EDNS0 and TSIG info if necessary. */ 2335 query_add_optional(q, data->nsd); 2336 2337 buffer_flip(q->packet); 2338 iovecs[i].iov_len = buffer_remaining(q->packet); 2339 #ifdef BIND8_STATS 2340 /* Account the rcode & TC... */ 2341 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2342 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2343 if (TC(q->packet)) { 2344 STATUP(data->nsd, truncated); 2345 ZTATUP(data->nsd, q->zone, truncated); 2346 } 2347 #endif /* BIND8_STATS */ 2348 #ifdef USE_DNSTAP 2349 dt_collector_submit_auth_response(data->nsd, 2350 &q->addr, q->addrlen, q->tcp, q->packet, 2351 q->zone); 2352 #endif /* USE_DNSTAP */ 2353 } else { 2354 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2355 iovecs[i].iov_len = buffer_remaining(q->packet); 2356 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2357 swap_drop: 2358 STATUP(data->nsd, dropped); 2359 ZTATUP(data->nsd, q->zone, dropped); 2360 if(i != recvcount-1) { 2361 /* swap with last and decrease recvcount */ 2362 struct mmsghdr mtmp = msgs[i]; 2363 struct iovec iotmp = iovecs[i]; 2364 recvcount--; 2365 msgs[i] = msgs[recvcount]; 2366 iovecs[i] = iovecs[recvcount]; 2367 queries[i] = queries[recvcount]; 2368 msgs[recvcount] = mtmp; 2369 iovecs[recvcount] = iotmp; 2370 queries[recvcount] = q; 2371 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2372 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 2373 goto loopstart; 2374 } else { recvcount --; } 2375 } 2376 } 2377 2378 /* send until all are sent */ 2379 i = 0; 2380 while(i<recvcount) { 2381 sent = sendmmsg(fd, &msgs[i], recvcount-i, 0); 2382 if(sent == -1) { 2383 const char* es = strerror(errno); 2384 char a[48]; 2385 addr2str(&queries[i]->addr, a, sizeof(a)); 2386 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 2387 #ifdef BIND8_STATS 2388 data->nsd->st.txerr += recvcount-i; 2389 #endif /* BIND8_STATS */ 2390 break; 2391 } 2392 i += sent; 2393 } 2394 for(i=0; i<recvcount; i++) { 2395 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2396 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2397 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2398 } 2399 } 2400 2401 #else /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2402 2403 static void 2404 handle_udp(int fd, short event, void* arg) 2405 { 2406 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2407 int received, sent; 2408 #ifndef NONBLOCKING_IS_BROKEN 2409 #ifdef HAVE_RECVMMSG 2410 int recvcount; 2411 #endif /* HAVE_RECVMMSG */ 2412 int i; 2413 #endif /* NONBLOCKING_IS_BROKEN */ 2414 struct query *q; 2415 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2416 q = data->query; 2417 #endif 2418 2419 if (!(event & EV_READ)) { 2420 return; 2421 } 2422 #ifndef NONBLOCKING_IS_BROKEN 2423 #ifdef HAVE_RECVMMSG 2424 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2425 /* this printf strangely gave a performance increase on Linux */ 2426 /* printf("recvcount %d \n", recvcount); */ 2427 if (recvcount == -1) { 2428 if (errno != EAGAIN && errno != EINTR) { 2429 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2430 STATUP(data->nsd, rxerr); 2431 /* No zone statup */ 2432 } 2433 /* Simply no data available */ 2434 return; 2435 } 2436 for (i = 0; i < recvcount; i++) { 2437 received = msgs[i].msg_len; 2438 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 2439 if (received == -1) { 2440 log_msg(LOG_ERR, "recvmmsg failed"); 2441 STATUP(data->nsd, rxerr); 2442 /* No zone statup */ 2443 /* the error can be found in msgs[i].msg_hdr.msg_flags */ 2444 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2445 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2446 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2447 continue; 2448 } 2449 q = queries[i]; 2450 #else 2451 for(i=0; i<NUM_RECV_PER_SELECT; i++) { 2452 #endif /* HAVE_RECVMMSG */ 2453 #endif /* NONBLOCKING_IS_BROKEN */ 2454 2455 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2456 /* Initialize the query... */ 2457 query_reset(q, UDP_MAX_MESSAGE_LEN, 0); 2458 2459 received = recvfrom(fd, 2460 buffer_begin(q->packet), 2461 buffer_remaining(q->packet), 2462 0, 2463 (struct sockaddr *)&q->addr, 2464 &q->addrlen); 2465 if (received == -1) { 2466 if (errno != EAGAIN && errno != EINTR) { 2467 log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); 2468 STATUP(data->nsd, rxerr); 2469 /* No zone statup */ 2470 } 2471 return; 2472 } 2473 #endif /* NONBLOCKING_IS_BROKEN || !HAVE_RECVMMSG */ 2474 2475 /* Account... */ 2476 if (data->socket->fam == AF_INET) { 2477 STATUP(data->nsd, qudp); 2478 } else if (data->socket->fam == AF_INET6) { 2479 STATUP(data->nsd, qudp6); 2480 } 2481 2482 buffer_skip(q->packet, received); 2483 buffer_flip(q->packet); 2484 #ifdef USE_DNSTAP 2485 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 2486 q->tcp, q->packet); 2487 #endif /* USE_DNSTAP */ 2488 2489 /* Process and answer the query... */ 2490 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2491 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2492 STATUP(data->nsd, nona); 2493 ZTATUP(data->nsd, q->zone, nona); 2494 } 2495 2496 #ifdef USE_ZONE_STATS 2497 if (data->socket->fam == AF_INET) { 2498 ZTATUP(data->nsd, q->zone, qudp); 2499 } else if (data->socket->fam == AF_INET6) { 2500 ZTATUP(data->nsd, q->zone, qudp6); 2501 } 2502 #endif 2503 2504 /* Add EDNS0 and TSIG info if necessary. */ 2505 query_add_optional(q, data->nsd); 2506 2507 buffer_flip(q->packet); 2508 2509 sent = sendto(fd, 2510 buffer_begin(q->packet), 2511 buffer_remaining(q->packet), 2512 0, 2513 (struct sockaddr *) &q->addr, 2514 q->addrlen); 2515 if (sent == -1) { 2516 const char* es = strerror(errno); 2517 char a[48]; 2518 addr2str(&q->addr, a, sizeof(a)); 2519 log_msg(LOG_ERR, "sendto %s failed: %s", a, es); 2520 STATUP(data->nsd, txerr); 2521 ZTATUP(data->nsd, q->zone, txerr); 2522 } else if ((size_t) sent != buffer_remaining(q->packet)) { 2523 log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); 2524 } else { 2525 #ifdef BIND8_STATS 2526 /* Account the rcode & TC... */ 2527 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2528 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2529 if (TC(q->packet)) { 2530 STATUP(data->nsd, truncated); 2531 ZTATUP(data->nsd, q->zone, truncated); 2532 } 2533 #endif /* BIND8_STATS */ 2534 #ifdef USE_DNSTAP 2535 dt_collector_submit_auth_response(data->nsd, 2536 &q->addr, q->addrlen, q->tcp, 2537 q->packet, q->zone); 2538 #endif /* USE_DNSTAP */ 2539 } 2540 } else { 2541 STATUP(data->nsd, dropped); 2542 ZTATUP(data->nsd, q->zone, dropped); 2543 } 2544 #ifndef NONBLOCKING_IS_BROKEN 2545 #ifdef HAVE_RECVMMSG 2546 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2547 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2548 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2549 #endif 2550 } 2551 #endif 2552 } 2553 #endif /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2554 2555 2556 static void 2557 cleanup_tcp_handler(struct tcp_handler_data* data) 2558 { 2559 event_del(&data->event); 2560 close(data->event.ev_fd); 2561 2562 /* 2563 * Enable the TCP accept handlers when the current number of 2564 * TCP connections is about to drop below the maximum number 2565 * of TCP connections. 2566 */ 2567 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 2568 configure_handler_event_types(EV_READ|EV_PERSIST); 2569 if(slowaccept) { 2570 event_del(&slowaccept_event); 2571 slowaccept = 0; 2572 } 2573 } 2574 --data->nsd->current_tcp_count; 2575 assert(data->nsd->current_tcp_count >= 0); 2576 2577 region_destroy(data->region); 2578 } 2579 2580 static void 2581 handle_tcp_reading(int fd, short event, void* arg) 2582 { 2583 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2584 ssize_t received; 2585 struct event_base* ev_base; 2586 struct timeval timeout; 2587 2588 if ((event & EV_TIMEOUT)) { 2589 /* Connection timed out. */ 2590 cleanup_tcp_handler(data); 2591 return; 2592 } 2593 2594 if (data->nsd->tcp_query_count > 0 && 2595 data->query_count >= data->nsd->tcp_query_count) { 2596 /* No more queries allowed on this tcp connection. */ 2597 cleanup_tcp_handler(data); 2598 return; 2599 } 2600 2601 assert((event & EV_READ)); 2602 2603 if (data->bytes_transmitted == 0) { 2604 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 2605 } 2606 2607 /* 2608 * Check if we received the leading packet length bytes yet. 2609 */ 2610 if (data->bytes_transmitted < sizeof(uint16_t)) { 2611 received = read(fd, 2612 (char *) &data->query->tcplen 2613 + data->bytes_transmitted, 2614 sizeof(uint16_t) - data->bytes_transmitted); 2615 if (received == -1) { 2616 if (errno == EAGAIN || errno == EINTR) { 2617 /* 2618 * Read would block, wait until more 2619 * data is available. 2620 */ 2621 return; 2622 } else { 2623 char buf[48]; 2624 addr2str(&data->query->addr, buf, sizeof(buf)); 2625 #ifdef ECONNRESET 2626 if (verbosity >= 2 || errno != ECONNRESET) 2627 #endif /* ECONNRESET */ 2628 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2629 cleanup_tcp_handler(data); 2630 return; 2631 } 2632 } else if (received == 0) { 2633 /* EOF */ 2634 cleanup_tcp_handler(data); 2635 return; 2636 } 2637 2638 data->bytes_transmitted += received; 2639 if (data->bytes_transmitted < sizeof(uint16_t)) { 2640 /* 2641 * Not done with the tcplen yet, wait for more 2642 * data to become available. 2643 */ 2644 return; 2645 } 2646 2647 assert(data->bytes_transmitted == sizeof(uint16_t)); 2648 2649 data->query->tcplen = ntohs(data->query->tcplen); 2650 2651 /* 2652 * Minimum query size is: 2653 * 2654 * Size of the header (12) 2655 * + Root domain name (1) 2656 * + Query class (2) 2657 * + Query type (2) 2658 */ 2659 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 2660 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 2661 cleanup_tcp_handler(data); 2662 return; 2663 } 2664 2665 if (data->query->tcplen > data->query->maxlen) { 2666 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 2667 cleanup_tcp_handler(data); 2668 return; 2669 } 2670 2671 buffer_set_limit(data->query->packet, data->query->tcplen); 2672 } 2673 2674 assert(buffer_remaining(data->query->packet) > 0); 2675 2676 /* Read the (remaining) query data. */ 2677 received = read(fd, 2678 buffer_current(data->query->packet), 2679 buffer_remaining(data->query->packet)); 2680 if (received == -1) { 2681 if (errno == EAGAIN || errno == EINTR) { 2682 /* 2683 * Read would block, wait until more data is 2684 * available. 2685 */ 2686 return; 2687 } else { 2688 char buf[48]; 2689 addr2str(&data->query->addr, buf, sizeof(buf)); 2690 #ifdef ECONNRESET 2691 if (verbosity >= 2 || errno != ECONNRESET) 2692 #endif /* ECONNRESET */ 2693 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2694 cleanup_tcp_handler(data); 2695 return; 2696 } 2697 } else if (received == 0) { 2698 /* EOF */ 2699 cleanup_tcp_handler(data); 2700 return; 2701 } 2702 2703 data->bytes_transmitted += received; 2704 buffer_skip(data->query->packet, received); 2705 if (buffer_remaining(data->query->packet) > 0) { 2706 /* 2707 * Message not yet complete, wait for more data to 2708 * become available. 2709 */ 2710 return; 2711 } 2712 2713 assert(buffer_position(data->query->packet) == data->query->tcplen); 2714 2715 /* Account... */ 2716 #ifdef BIND8_STATS 2717 #ifndef INET6 2718 STATUP(data->nsd, ctcp); 2719 #else 2720 if (data->query->addr.ss_family == AF_INET) { 2721 STATUP(data->nsd, ctcp); 2722 } else if (data->query->addr.ss_family == AF_INET6) { 2723 STATUP(data->nsd, ctcp6); 2724 } 2725 #endif 2726 #endif /* BIND8_STATS */ 2727 2728 /* We have a complete query, process it. */ 2729 2730 /* tcp-query-count: handle query counter ++ */ 2731 data->query_count++; 2732 2733 buffer_flip(data->query->packet); 2734 #ifdef USE_DNSTAP 2735 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 2736 data->query->addrlen, data->query->tcp, data->query->packet); 2737 #endif /* USE_DNSTAP */ 2738 data->query_state = server_process_query(data->nsd, data->query); 2739 if (data->query_state == QUERY_DISCARDED) { 2740 /* Drop the packet and the entire connection... */ 2741 STATUP(data->nsd, dropped); 2742 ZTATUP(data->nsd, data->query->zone, dropped); 2743 cleanup_tcp_handler(data); 2744 return; 2745 } 2746 2747 #ifdef BIND8_STATS 2748 if (RCODE(data->query->packet) == RCODE_OK 2749 && !AA(data->query->packet)) 2750 { 2751 STATUP(data->nsd, nona); 2752 ZTATUP(data->nsd, data->query->zone, nona); 2753 } 2754 #endif /* BIND8_STATS */ 2755 2756 #ifdef USE_ZONE_STATS 2757 #ifndef INET6 2758 ZTATUP(data->nsd, data->query->zone, ctcp); 2759 #else 2760 if (data->query->addr.ss_family == AF_INET) { 2761 ZTATUP(data->nsd, data->query->zone, ctcp); 2762 } else if (data->query->addr.ss_family == AF_INET6) { 2763 ZTATUP(data->nsd, data->query->zone, ctcp6); 2764 } 2765 #endif 2766 #endif /* USE_ZONE_STATS */ 2767 2768 query_add_optional(data->query, data->nsd); 2769 2770 /* Switch to the tcp write handler. */ 2771 buffer_flip(data->query->packet); 2772 data->query->tcplen = buffer_remaining(data->query->packet); 2773 #ifdef USE_DNSTAP 2774 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 2775 data->query->addrlen, data->query->tcp, data->query->packet, 2776 data->query->zone); 2777 #endif /* USE_DNSTAP */ 2778 data->bytes_transmitted = 0; 2779 2780 timeout.tv_sec = data->tcp_timeout / 1000; 2781 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2782 2783 ev_base = data->event.ev_base; 2784 event_del(&data->event); 2785 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2786 handle_tcp_writing, data); 2787 if(event_base_set(ev_base, &data->event) != 0) 2788 log_msg(LOG_ERR, "event base set tcpr failed"); 2789 if(event_add(&data->event, &timeout) != 0) 2790 log_msg(LOG_ERR, "event add tcpr failed"); 2791 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 2792 handle_tcp_writing(fd, EV_WRITE, data); 2793 } 2794 2795 static void 2796 handle_tcp_writing(int fd, short event, void* arg) 2797 { 2798 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2799 ssize_t sent; 2800 struct query *q = data->query; 2801 struct timeval timeout; 2802 struct event_base* ev_base; 2803 2804 if ((event & EV_TIMEOUT)) { 2805 /* Connection timed out. */ 2806 cleanup_tcp_handler(data); 2807 return; 2808 } 2809 2810 assert((event & EV_WRITE)); 2811 2812 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2813 /* Writing the response packet length. */ 2814 uint16_t n_tcplen = htons(q->tcplen); 2815 #ifdef HAVE_WRITEV 2816 struct iovec iov[2]; 2817 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 2818 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 2819 iov[1].iov_base = buffer_begin(q->packet); 2820 iov[1].iov_len = buffer_limit(q->packet); 2821 sent = writev(fd, iov, 2); 2822 #else /* HAVE_WRITEV */ 2823 sent = write(fd, 2824 (const char *) &n_tcplen + data->bytes_transmitted, 2825 sizeof(n_tcplen) - data->bytes_transmitted); 2826 #endif /* HAVE_WRITEV */ 2827 if (sent == -1) { 2828 if (errno == EAGAIN || errno == EINTR) { 2829 /* 2830 * Write would block, wait until 2831 * socket becomes writable again. 2832 */ 2833 return; 2834 } else { 2835 #ifdef ECONNRESET 2836 if(verbosity >= 2 || errno != ECONNRESET) 2837 #endif /* ECONNRESET */ 2838 #ifdef EPIPE 2839 if(verbosity >= 2 || errno != EPIPE) 2840 #endif /* EPIPE 'broken pipe' */ 2841 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2842 cleanup_tcp_handler(data); 2843 return; 2844 } 2845 } 2846 2847 data->bytes_transmitted += sent; 2848 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2849 /* 2850 * Writing not complete, wait until socket 2851 * becomes writable again. 2852 */ 2853 return; 2854 } 2855 2856 #ifdef HAVE_WRITEV 2857 sent -= sizeof(n_tcplen); 2858 /* handle potential 'packet done' code */ 2859 goto packet_could_be_done; 2860 #endif 2861 } 2862 2863 sent = write(fd, 2864 buffer_current(q->packet), 2865 buffer_remaining(q->packet)); 2866 if (sent == -1) { 2867 if (errno == EAGAIN || errno == EINTR) { 2868 /* 2869 * Write would block, wait until 2870 * socket becomes writable again. 2871 */ 2872 return; 2873 } else { 2874 #ifdef ECONNRESET 2875 if(verbosity >= 2 || errno != ECONNRESET) 2876 #endif /* ECONNRESET */ 2877 #ifdef EPIPE 2878 if(verbosity >= 2 || errno != EPIPE) 2879 #endif /* EPIPE 'broken pipe' */ 2880 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2881 cleanup_tcp_handler(data); 2882 return; 2883 } 2884 } 2885 2886 data->bytes_transmitted += sent; 2887 #ifdef HAVE_WRITEV 2888 packet_could_be_done: 2889 #endif 2890 buffer_skip(q->packet, sent); 2891 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 2892 /* 2893 * Still more data to write when socket becomes 2894 * writable again. 2895 */ 2896 return; 2897 } 2898 2899 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 2900 2901 if (data->query_state == QUERY_IN_AXFR) { 2902 /* Continue processing AXFR and writing back results. */ 2903 buffer_clear(q->packet); 2904 data->query_state = query_axfr(data->nsd, q); 2905 if (data->query_state != QUERY_PROCESSED) { 2906 query_add_optional(data->query, data->nsd); 2907 2908 /* Reset data. */ 2909 buffer_flip(q->packet); 2910 q->tcplen = buffer_remaining(q->packet); 2911 data->bytes_transmitted = 0; 2912 /* Reset timeout. */ 2913 timeout.tv_sec = data->tcp_timeout / 1000; 2914 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2915 ev_base = data->event.ev_base; 2916 event_del(&data->event); 2917 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2918 handle_tcp_writing, data); 2919 if(event_base_set(ev_base, &data->event) != 0) 2920 log_msg(LOG_ERR, "event base set tcpw failed"); 2921 if(event_add(&data->event, &timeout) != 0) 2922 log_msg(LOG_ERR, "event add tcpw failed"); 2923 2924 /* 2925 * Write data if/when the socket is writable 2926 * again. 2927 */ 2928 return; 2929 } 2930 } 2931 2932 /* 2933 * Done sending, wait for the next request to arrive on the 2934 * TCP socket by installing the TCP read handler. 2935 */ 2936 if (data->nsd->tcp_query_count > 0 && 2937 data->query_count >= data->nsd->tcp_query_count) { 2938 2939 (void) shutdown(fd, SHUT_WR); 2940 } 2941 2942 data->bytes_transmitted = 0; 2943 2944 timeout.tv_sec = data->tcp_timeout / 1000; 2945 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2946 ev_base = data->event.ev_base; 2947 event_del(&data->event); 2948 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 2949 handle_tcp_reading, data); 2950 if(event_base_set(ev_base, &data->event) != 0) 2951 log_msg(LOG_ERR, "event base set tcpw failed"); 2952 if(event_add(&data->event, &timeout) != 0) 2953 log_msg(LOG_ERR, "event add tcpw failed"); 2954 } 2955 2956 2957 static void 2958 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 2959 void* ATTR_UNUSED(arg)) 2960 { 2961 if(slowaccept) { 2962 configure_handler_event_types(EV_PERSIST | EV_READ); 2963 slowaccept = 0; 2964 } 2965 } 2966 2967 /* 2968 * Handle an incoming TCP connection. The connection is accepted and 2969 * a new TCP reader event handler is added. The TCP handler 2970 * is responsible for cleanup when the connection is closed. 2971 */ 2972 static void 2973 handle_tcp_accept(int fd, short event, void* arg) 2974 { 2975 struct tcp_accept_handler_data *data 2976 = (struct tcp_accept_handler_data *) arg; 2977 int s; 2978 struct tcp_handler_data *tcp_data; 2979 region_type *tcp_region; 2980 #ifdef INET6 2981 struct sockaddr_storage addr; 2982 #else 2983 struct sockaddr_in addr; 2984 #endif 2985 socklen_t addrlen; 2986 struct timeval timeout; 2987 2988 if (!(event & EV_READ)) { 2989 return; 2990 } 2991 2992 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 2993 return; 2994 } 2995 2996 /* Accept it... */ 2997 addrlen = sizeof(addr); 2998 #ifndef HAVE_ACCEPT4 2999 s = accept(fd, (struct sockaddr *) &addr, &addrlen); 3000 #else 3001 s = accept4(fd, (struct sockaddr *) &addr, &addrlen, SOCK_NONBLOCK); 3002 #endif 3003 if (s == -1) { 3004 /** 3005 * EMFILE and ENFILE is a signal that the limit of open 3006 * file descriptors has been reached. Pause accept(). 3007 * EINTR is a signal interrupt. The others are various OS ways 3008 * of saying that the client has closed the connection. 3009 */ 3010 if (errno == EMFILE || errno == ENFILE) { 3011 if (!slowaccept) { 3012 /* disable accept events */ 3013 struct timeval tv; 3014 configure_handler_event_types(0); 3015 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 3016 tv.tv_usec = 0L; 3017 event_set(&slowaccept_event, -1, EV_TIMEOUT, 3018 handle_slowaccept_timeout, NULL); 3019 (void)event_base_set(data->event.ev_base, 3020 &slowaccept_event); 3021 (void)event_add(&slowaccept_event, &tv); 3022 slowaccept = 1; 3023 /* We don't want to spam the logs here */ 3024 } 3025 } else if (errno != EINTR 3026 && errno != EWOULDBLOCK 3027 #ifdef ECONNABORTED 3028 && errno != ECONNABORTED 3029 #endif /* ECONNABORTED */ 3030 #ifdef EPROTO 3031 && errno != EPROTO 3032 #endif /* EPROTO */ 3033 ) { 3034 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 3035 } 3036 return; 3037 } 3038 3039 #ifndef HAVE_ACCEPT4 3040 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 3041 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 3042 close(s); 3043 return; 3044 } 3045 #endif 3046 3047 /* 3048 * This region is deallocated when the TCP connection is 3049 * closed by the TCP handler. 3050 */ 3051 tcp_region = region_create(xalloc, free); 3052 tcp_data = (struct tcp_handler_data *) region_alloc( 3053 tcp_region, sizeof(struct tcp_handler_data)); 3054 tcp_data->region = tcp_region; 3055 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 3056 compression_table_size, compressed_dnames); 3057 tcp_data->nsd = data->nsd; 3058 tcp_data->query_count = 0; 3059 3060 tcp_data->query_state = QUERY_PROCESSED; 3061 tcp_data->bytes_transmitted = 0; 3062 memcpy(&tcp_data->query->addr, &addr, addrlen); 3063 tcp_data->query->addrlen = addrlen; 3064 3065 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 3066 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 3067 /* very busy, give smaller timeout */ 3068 tcp_data->tcp_timeout = 200; 3069 } 3070 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 3071 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 3072 3073 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 3074 handle_tcp_reading, tcp_data); 3075 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 3076 log_msg(LOG_ERR, "cannot set tcp event base"); 3077 close(s); 3078 region_destroy(tcp_region); 3079 return; 3080 } 3081 if(event_add(&tcp_data->event, &timeout) != 0) { 3082 log_msg(LOG_ERR, "cannot add tcp to event base"); 3083 close(s); 3084 region_destroy(tcp_region); 3085 return; 3086 } 3087 3088 /* 3089 * Keep track of the total number of TCP handlers installed so 3090 * we can stop accepting connections when the maximum number 3091 * of simultaneous TCP connections is reached. 3092 */ 3093 ++data->nsd->current_tcp_count; 3094 if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3095 configure_handler_event_types(0); 3096 } 3097 } 3098 3099 static void 3100 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 3101 { 3102 size_t i; 3103 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 3104 for (i = 0; i < nsd->child_count; ++i) { 3105 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 3106 if (write(nsd->children[i].child_fd, 3107 &command, 3108 sizeof(command)) == -1) 3109 { 3110 if(errno != EAGAIN && errno != EINTR) 3111 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 3112 (int) command, 3113 (int) nsd->children[i].pid, 3114 strerror(errno)); 3115 } else if (timeout > 0) { 3116 (void)block_read(NULL, 3117 nsd->children[i].child_fd, 3118 &command, sizeof(command), timeout); 3119 } 3120 fsync(nsd->children[i].child_fd); 3121 close(nsd->children[i].child_fd); 3122 nsd->children[i].child_fd = -1; 3123 } 3124 } 3125 } 3126 3127 static void 3128 send_children_quit(struct nsd* nsd) 3129 { 3130 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 3131 send_children_command(nsd, NSD_QUIT, 0); 3132 } 3133 3134 static void 3135 send_children_quit_and_wait(struct nsd* nsd) 3136 { 3137 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 3138 send_children_command(nsd, NSD_QUIT_CHILD, 3); 3139 } 3140 3141 #ifdef BIND8_STATS 3142 static void 3143 set_children_stats(struct nsd* nsd) 3144 { 3145 size_t i; 3146 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 3147 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 3148 for (i = 0; i < nsd->child_count; ++i) { 3149 nsd->children[i].need_to_send_STATS = 1; 3150 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 3151 } 3152 } 3153 #endif /* BIND8_STATS */ 3154 3155 static void 3156 configure_handler_event_types(short event_types) 3157 { 3158 size_t i; 3159 3160 for (i = 0; i < tcp_accept_handler_count; ++i) { 3161 struct event* handler = &tcp_accept_handlers[i].event; 3162 if(event_types) { 3163 /* reassign */ 3164 int fd = handler->ev_fd; 3165 struct event_base* base = handler->ev_base; 3166 if(tcp_accept_handlers[i].event_added) 3167 event_del(handler); 3168 event_set(handler, fd, event_types, 3169 handle_tcp_accept, &tcp_accept_handlers[i]); 3170 if(event_base_set(base, handler) != 0) 3171 log_msg(LOG_ERR, "conhand: cannot event_base"); 3172 if(event_add(handler, NULL) != 0) 3173 log_msg(LOG_ERR, "conhand: cannot event_add"); 3174 tcp_accept_handlers[i].event_added = 1; 3175 } else { 3176 /* remove */ 3177 if(tcp_accept_handlers[i].event_added) { 3178 event_del(handler); 3179 tcp_accept_handlers[i].event_added = 0; 3180 } 3181 } 3182 } 3183 } 3184