1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <sys/socket.h> 15 #include <sys/uio.h> 16 #include <sys/wait.h> 17 18 #include <netinet/in.h> 19 #include <arpa/inet.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <stddef.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 #include <unistd.h> 31 #include <signal.h> 32 #include <netdb.h> 33 #include <poll.h> 34 #ifndef SHUT_WR 35 #define SHUT_WR 1 36 #endif 37 #ifdef HAVE_MMAP 38 #include <sys/mman.h> 39 #endif /* HAVE_MMAP */ 40 #ifdef HAVE_OPENSSL_RAND_H 41 #include <openssl/rand.h> 42 #endif 43 #ifndef USE_MINI_EVENT 44 # ifdef HAVE_EVENT_H 45 # include <event.h> 46 # else 47 # include <event2/event.h> 48 # include "event2/event_struct.h" 49 # include "event2/event_compat.h" 50 # endif 51 #else 52 # include "mini_event.h" 53 #endif 54 55 #include "axfr.h" 56 #include "namedb.h" 57 #include "netio.h" 58 #include "xfrd.h" 59 #include "xfrd-tcp.h" 60 #include "xfrd-disk.h" 61 #include "difffile.h" 62 #include "nsec3.h" 63 #include "ipc.h" 64 #include "udb.h" 65 #include "remote.h" 66 #include "lookup3.h" 67 #include "rrl.h" 68 #ifdef USE_DNSTAP 69 #include "dnstap/dnstap_collector.h" 70 #endif 71 72 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 73 74 /* 75 * Data for the UDP handlers. 76 */ 77 struct udp_handler_data 78 { 79 struct nsd *nsd; 80 struct nsd_socket *socket; 81 query_type *query; 82 }; 83 84 struct tcp_accept_handler_data { 85 struct nsd *nsd; 86 struct nsd_socket *socket; 87 int event_added; 88 struct event event; 89 }; 90 91 /* 92 * These globals are used to enable the TCP accept handlers 93 * when the number of TCP connection drops below the maximum 94 * number of TCP connections. 95 */ 96 static size_t tcp_accept_handler_count; 97 static struct tcp_accept_handler_data* tcp_accept_handlers; 98 99 static struct event slowaccept_event; 100 static int slowaccept; 101 102 #ifndef NONBLOCKING_IS_BROKEN 103 # define NUM_RECV_PER_SELECT 100 104 #endif 105 106 #if (!defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG)) 107 struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 108 struct iovec iovecs[NUM_RECV_PER_SELECT]; 109 struct query *queries[NUM_RECV_PER_SELECT]; 110 #endif 111 112 /* 113 * Data for the TCP connection handlers. 114 * 115 * The TCP handlers use non-blocking I/O. This is necessary to avoid 116 * blocking the entire server on a slow TCP connection, but does make 117 * reading from and writing to the socket more complicated. 118 * 119 * Basically, whenever a read/write would block (indicated by the 120 * EAGAIN errno variable) we remember the position we were reading 121 * from/writing to and return from the TCP reading/writing event 122 * handler. When the socket becomes readable/writable again we 123 * continue from the same position. 124 */ 125 struct tcp_handler_data 126 { 127 /* 128 * The region used to allocate all TCP connection related 129 * data, including this structure. This region is destroyed 130 * when the connection is closed. 131 */ 132 region_type* region; 133 134 /* 135 * The global nsd structure. 136 */ 137 struct nsd* nsd; 138 139 /* 140 * The current query data for this TCP connection. 141 */ 142 query_type* query; 143 144 /* 145 * The query_state is used to remember if we are performing an 146 * AXFR, if we're done processing, or if we should discard the 147 * query and connection. 148 */ 149 query_state_type query_state; 150 151 /* 152 * The event for the file descriptor and tcp timeout 153 */ 154 struct event event; 155 156 /* 157 * The bytes_transmitted field is used to remember the number 158 * of bytes transmitted when receiving or sending a DNS 159 * packet. The count includes the two additional bytes used 160 * to specify the packet length on a TCP connection. 161 */ 162 size_t bytes_transmitted; 163 164 /* 165 * The number of queries handled by this specific TCP connection. 166 */ 167 int query_count; 168 169 /* 170 * The timeout in msec for this tcp connection 171 */ 172 int tcp_timeout; 173 }; 174 175 /* 176 * Handle incoming queries on the UDP server sockets. 177 */ 178 static void handle_udp(int fd, short event, void* arg); 179 180 /* 181 * Handle incoming connections on the TCP sockets. These handlers 182 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 183 * connection) but are disabled when the number of current TCP 184 * connections is equal to the maximum number of TCP connections. 185 * Disabling is done by changing the handler to wait for the 186 * NETIO_EVENT_NONE type. This is done using the function 187 * configure_tcp_accept_handlers. 188 */ 189 static void handle_tcp_accept(int fd, short event, void* arg); 190 191 /* 192 * Handle incoming queries on a TCP connection. The TCP connections 193 * are configured to be non-blocking and the handler may be called 194 * multiple times before a complete query is received. 195 */ 196 static void handle_tcp_reading(int fd, short event, void* arg); 197 198 /* 199 * Handle outgoing responses on a TCP connection. The TCP connections 200 * are configured to be non-blocking and the handler may be called 201 * multiple times before a complete response is sent. 202 */ 203 static void handle_tcp_writing(int fd, short event, void* arg); 204 205 /* 206 * Send all children the quit nonblocking, then close pipe. 207 */ 208 static void send_children_quit(struct nsd* nsd); 209 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 210 static void send_children_quit_and_wait(struct nsd* nsd); 211 212 /* set childrens flags to send NSD_STATS to them */ 213 #ifdef BIND8_STATS 214 static void set_children_stats(struct nsd* nsd); 215 #endif /* BIND8_STATS */ 216 217 /* 218 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 219 */ 220 static void configure_handler_event_types(short event_types); 221 222 static uint16_t *compressed_dname_offsets = 0; 223 static uint32_t compression_table_capacity = 0; 224 static uint32_t compression_table_size = 0; 225 static domain_type* compressed_dnames[MAXRRSPP]; 226 227 /* 228 * Remove the specified pid from the list of child pids. Returns -1 if 229 * the pid is not in the list, child_num otherwise. The field is set to 0. 230 */ 231 static int 232 delete_child_pid(struct nsd *nsd, pid_t pid) 233 { 234 size_t i; 235 for (i = 0; i < nsd->child_count; ++i) { 236 if (nsd->children[i].pid == pid) { 237 nsd->children[i].pid = 0; 238 if(!nsd->children[i].need_to_exit) { 239 if(nsd->children[i].child_fd != -1) 240 close(nsd->children[i].child_fd); 241 nsd->children[i].child_fd = -1; 242 if(nsd->children[i].handler) 243 nsd->children[i].handler->fd = -1; 244 } 245 return i; 246 } 247 } 248 return -1; 249 } 250 251 /* 252 * Restart child servers if necessary. 253 */ 254 static int 255 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 256 int* xfrd_sock_p) 257 { 258 struct main_ipc_handler_data *ipc_data; 259 size_t i; 260 int sv[2]; 261 262 /* Fork the child processes... */ 263 for (i = 0; i < nsd->child_count; ++i) { 264 if (nsd->children[i].pid <= 0) { 265 if (nsd->children[i].child_fd != -1) 266 close(nsd->children[i].child_fd); 267 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 268 log_msg(LOG_ERR, "socketpair: %s", 269 strerror(errno)); 270 return -1; 271 } 272 nsd->children[i].child_fd = sv[0]; 273 nsd->children[i].parent_fd = sv[1]; 274 nsd->children[i].pid = fork(); 275 switch (nsd->children[i].pid) { 276 default: /* SERVER MAIN */ 277 close(nsd->children[i].parent_fd); 278 nsd->children[i].parent_fd = -1; 279 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 280 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 281 } 282 if(!nsd->children[i].handler) 283 { 284 ipc_data = (struct main_ipc_handler_data*) region_alloc( 285 region, sizeof(struct main_ipc_handler_data)); 286 ipc_data->nsd = nsd; 287 ipc_data->child = &nsd->children[i]; 288 ipc_data->child_num = i; 289 ipc_data->xfrd_sock = xfrd_sock_p; 290 ipc_data->packet = buffer_create(region, QIOBUFSZ); 291 ipc_data->forward_mode = 0; 292 ipc_data->got_bytes = 0; 293 ipc_data->total_bytes = 0; 294 ipc_data->acl_num = 0; 295 nsd->children[i].handler = (struct netio_handler*) region_alloc( 296 region, sizeof(struct netio_handler)); 297 nsd->children[i].handler->fd = nsd->children[i].child_fd; 298 nsd->children[i].handler->timeout = NULL; 299 nsd->children[i].handler->user_data = ipc_data; 300 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 301 nsd->children[i].handler->event_handler = parent_handle_child_command; 302 netio_add_handler(netio, nsd->children[i].handler); 303 } 304 /* clear any ongoing ipc */ 305 ipc_data = (struct main_ipc_handler_data*) 306 nsd->children[i].handler->user_data; 307 ipc_data->forward_mode = 0; 308 /* restart - update fd */ 309 nsd->children[i].handler->fd = nsd->children[i].child_fd; 310 break; 311 case 0: /* CHILD */ 312 /* the child need not be able to access the 313 * nsd.db file */ 314 namedb_close_udb(nsd->db); 315 #ifdef MEMCLEAN /* OS collects memory pages */ 316 region_destroy(region); 317 #endif 318 319 if (pledge("stdio rpath inet", NULL) == -1) { 320 log_msg(LOG_ERR, "pledge"); 321 exit(1); 322 } 323 324 nsd->pid = 0; 325 nsd->child_count = 0; 326 nsd->server_kind = nsd->children[i].kind; 327 nsd->this_child = &nsd->children[i]; 328 nsd->this_child->child_num = i; 329 /* remove signal flags inherited from parent 330 the parent will handle them. */ 331 nsd->signal_hint_reload_hup = 0; 332 nsd->signal_hint_reload = 0; 333 nsd->signal_hint_child = 0; 334 nsd->signal_hint_quit = 0; 335 nsd->signal_hint_shutdown = 0; 336 nsd->signal_hint_stats = 0; 337 nsd->signal_hint_statsusr = 0; 338 close(*xfrd_sock_p); 339 close(nsd->this_child->child_fd); 340 nsd->this_child->child_fd = -1; 341 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 342 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 343 } 344 server_child(nsd); 345 /* NOTREACH */ 346 exit(0); 347 case -1: 348 log_msg(LOG_ERR, "fork failed: %s", 349 strerror(errno)); 350 return -1; 351 } 352 } 353 } 354 return 0; 355 } 356 357 #ifdef BIND8_STATS 358 static void set_bind8_alarm(struct nsd* nsd) 359 { 360 /* resync so that the next alarm is on the next whole minute */ 361 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 362 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 363 } 364 #endif 365 366 /* set zone stat ids for zones initially read in */ 367 static void 368 zonestatid_tree_set(struct nsd* nsd) 369 { 370 struct radnode* n; 371 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 372 zone_type* zone = (zone_type*)n->elem; 373 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 374 } 375 } 376 377 #ifdef USE_ZONE_STATS 378 void 379 server_zonestat_alloc(struct nsd* nsd) 380 { 381 size_t num = (nsd->options->zonestatnames->count==0?1: 382 nsd->options->zonestatnames->count); 383 size_t sz = sizeof(struct nsdst)*num; 384 char tmpfile[256]; 385 uint8_t z = 0; 386 387 /* file names */ 388 nsd->zonestatfname[0] = 0; 389 nsd->zonestatfname[1] = 0; 390 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 391 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 392 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 393 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 394 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 395 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 396 397 /* file descriptors */ 398 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 399 if(nsd->zonestatfd[0] == -1) { 400 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 401 strerror(errno)); 402 exit(1); 403 } 404 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 405 if(nsd->zonestatfd[0] == -1) { 406 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 407 strerror(errno)); 408 close(nsd->zonestatfd[0]); 409 unlink(nsd->zonestatfname[0]); 410 exit(1); 411 } 412 413 #ifdef HAVE_MMAP 414 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 415 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 416 strerror(errno)); 417 exit(1); 418 } 419 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 420 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 421 nsd->zonestatfname[0], strerror(errno)); 422 exit(1); 423 } 424 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 425 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 426 strerror(errno)); 427 exit(1); 428 } 429 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 430 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 431 nsd->zonestatfname[1], strerror(errno)); 432 exit(1); 433 } 434 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 435 MAP_SHARED, nsd->zonestatfd[0], 0); 436 if(nsd->zonestat[0] == MAP_FAILED) { 437 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 438 unlink(nsd->zonestatfname[0]); 439 unlink(nsd->zonestatfname[1]); 440 exit(1); 441 } 442 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 443 MAP_SHARED, nsd->zonestatfd[1], 0); 444 if(nsd->zonestat[1] == MAP_FAILED) { 445 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 446 unlink(nsd->zonestatfname[0]); 447 unlink(nsd->zonestatfname[1]); 448 exit(1); 449 } 450 memset(nsd->zonestat[0], 0, sz); 451 memset(nsd->zonestat[1], 0, sz); 452 nsd->zonestatsize[0] = num; 453 nsd->zonestatsize[1] = num; 454 nsd->zonestatdesired = num; 455 nsd->zonestatsizenow = num; 456 nsd->zonestatnow = nsd->zonestat[0]; 457 #endif /* HAVE_MMAP */ 458 } 459 460 void 461 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 462 { 463 #ifdef HAVE_MMAP 464 #ifdef MREMAP_MAYMOVE 465 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 466 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 467 MREMAP_MAYMOVE); 468 if(nsd->zonestat[idx] == MAP_FAILED) { 469 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 470 exit(1); 471 } 472 #else /* !HAVE MREMAP */ 473 if(msync(nsd->zonestat[idx], 474 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 475 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 476 if(munmap(nsd->zonestat[idx], 477 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 478 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 479 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 480 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 481 if(nsd->zonestat[idx] == MAP_FAILED) { 482 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 483 exit(1); 484 } 485 #endif /* MREMAP */ 486 #endif /* HAVE_MMAP */ 487 } 488 489 /* realloc the zonestat array for the one that is not currently in use, 490 * to match the desired new size of the array (if applicable) */ 491 void 492 server_zonestat_realloc(struct nsd* nsd) 493 { 494 #ifdef HAVE_MMAP 495 uint8_t z = 0; 496 size_t sz; 497 int idx = 0; /* index of the zonestat array that is not in use */ 498 if(nsd->zonestatnow == nsd->zonestat[0]) 499 idx = 1; 500 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 501 return; 502 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 503 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 504 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 505 strerror(errno)); 506 exit(1); 507 } 508 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 509 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 510 nsd->zonestatfname[idx], strerror(errno)); 511 exit(1); 512 } 513 zonestat_remap(nsd, idx, sz); 514 /* zero the newly allocated region */ 515 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 516 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 517 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 518 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 519 } 520 nsd->zonestatsize[idx] = nsd->zonestatdesired; 521 #endif /* HAVE_MMAP */ 522 } 523 524 /* switchover to use the other array for the new children, that 525 * briefly coexist with the old children. And we want to avoid them 526 * both writing to the same statistics arrays. */ 527 void 528 server_zonestat_switch(struct nsd* nsd) 529 { 530 if(nsd->zonestatnow == nsd->zonestat[0]) { 531 nsd->zonestatnow = nsd->zonestat[1]; 532 nsd->zonestatsizenow = nsd->zonestatsize[1]; 533 } else { 534 nsd->zonestatnow = nsd->zonestat[0]; 535 nsd->zonestatsizenow = nsd->zonestatsize[0]; 536 } 537 } 538 #endif /* USE_ZONE_STATS */ 539 540 static void 541 cleanup_dname_compression_tables(void *ptr) 542 { 543 free(ptr); 544 compressed_dname_offsets = NULL; 545 compression_table_capacity = 0; 546 } 547 548 static void 549 initialize_dname_compression_tables(struct nsd *nsd) 550 { 551 size_t needed = domain_table_count(nsd->db->domains) + 1; 552 needed += EXTRA_DOMAIN_NUMBERS; 553 if(compression_table_capacity < needed) { 554 if(compressed_dname_offsets) { 555 region_remove_cleanup(nsd->db->region, 556 cleanup_dname_compression_tables, 557 compressed_dname_offsets); 558 free(compressed_dname_offsets); 559 } 560 compressed_dname_offsets = (uint16_t *) xmallocarray( 561 needed, sizeof(uint16_t)); 562 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 563 compressed_dname_offsets); 564 compression_table_capacity = needed; 565 compression_table_size=domain_table_count(nsd->db->domains)+1; 566 } 567 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 568 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 569 } 570 571 /* create and bind sockets. */ 572 static int 573 server_init_ifs(struct nsd *nsd, size_t from, size_t to, int* reuseport_works) 574 { 575 struct addrinfo* addr; 576 size_t i; 577 #if defined(SO_REUSEPORT) || defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) || defined(IP_TRANSPARENT)) || defined(IP_FREEBIND) || defined(SO_BINDANY)) 578 int on = 1; 579 #endif 580 581 /* UDP */ 582 583 /* Make a socket... */ 584 for (i = from; i < to; i++) { 585 /* for reuseports copy socket specs of first entries */ 586 addr = nsd->udp[i%nsd->ifs].addr; 587 if (!addr) { 588 nsd->udp[i].s = -1; 589 continue; 590 } 591 nsd->udp[i].fam = (int)addr->ai_family; 592 if ((nsd->udp[i].s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 593 #if defined(INET6) 594 if (addr->ai_family == AF_INET6 && 595 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 596 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported"); 597 continue; 598 } 599 #endif /* INET6 */ 600 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 601 return -1; 602 } 603 604 #ifdef SO_REUSEPORT 605 # ifdef SO_REUSEPORT_LB 606 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance 607 * like SO_REUSEPORT on Linux. This is what the users want 608 * with the config option in nsd.conf; if we actually 609 * need local address and port reuse they'll also need to 610 * have SO_REUSEPORT set for them, assume it was _LB they want. 611 */ 612 if(nsd->reuseport && *reuseport_works && 613 setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_REUSEPORT_LB, 614 (void*)&on, (socklen_t)sizeof(on)) < 0) { 615 if(verbosity >= 3 616 #ifdef ENOPROTOOPT 617 || errno != ENOPROTOOPT 618 #endif 619 ) 620 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT_LB, " 621 "...) failed: %s", strerror(errno)); 622 *reuseport_works = 0; 623 } 624 # else /* SO_REUSEPORT_LB */ 625 if(nsd->reuseport && *reuseport_works && 626 setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_REUSEPORT, 627 (void*)&on, (socklen_t)sizeof(on)) < 0) { 628 if(verbosity >= 3 629 #ifdef ENOPROTOOPT 630 || errno != ENOPROTOOPT 631 #endif 632 ) 633 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT, " 634 "...) failed: %s", strerror(errno)); 635 *reuseport_works = 0; 636 } 637 # endif /* SO_REUSEPORT_LB */ 638 #else 639 (void)reuseport_works; 640 #endif /* SO_REUSEPORT */ 641 #if defined(SO_RCVBUF) || defined(SO_SNDBUF) 642 if(1) { 643 int rcv = 1*1024*1024; 644 int snd = 1*1024*1024; 645 646 #ifdef SO_RCVBUF 647 # ifdef SO_RCVBUFFORCE 648 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 649 (socklen_t)sizeof(rcv)) < 0) { 650 if(errno != EPERM && errno != ENOBUFS) { 651 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, " 652 "...) failed: %s", strerror(errno)); 653 return -1; 654 } 655 # else 656 if(1) { 657 # endif /* SO_RCVBUFFORCE */ 658 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 659 (socklen_t)sizeof(rcv)) < 0) { 660 if(errno != ENOBUFS && errno != ENOSYS) { 661 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, " 662 "...) failed: %s", strerror(errno)); 663 return -1; 664 } 665 } 666 } 667 #endif /* SO_RCVBUF */ 668 669 #ifdef SO_SNDBUF 670 # ifdef SO_SNDBUFFORCE 671 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 672 (socklen_t)sizeof(snd)) < 0) { 673 if(errno != EPERM && errno != ENOBUFS) { 674 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, " 675 "...) failed: %s", strerror(errno)); 676 return -1; 677 } 678 # else 679 if(1) { 680 # endif /* SO_SNDBUFFORCE */ 681 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 682 (socklen_t)sizeof(snd)) < 0) { 683 if(errno != ENOBUFS && errno != ENOSYS) { 684 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, " 685 "...) failed: %s", strerror(errno)); 686 return -1; 687 } 688 } 689 } 690 #endif /* SO_SNDBUF */ 691 692 } 693 #endif /* defined(SO_RCVBUF) || defined(SO_SNDBUF) */ 694 695 #if defined(INET6) 696 if (addr->ai_family == AF_INET6) { 697 # if defined(IPV6_V6ONLY) 698 if (setsockopt(nsd->udp[i].s, 699 IPPROTO_IPV6, IPV6_V6ONLY, 700 &on, sizeof(on)) < 0) 701 { 702 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 703 strerror(errno)); 704 return -1; 705 } 706 # endif 707 # if defined(IPV6_USE_MIN_MTU) 708 /* 709 * There is no fragmentation of IPv6 datagrams 710 * during forwarding in the network. Therefore 711 * we do not send UDP datagrams larger than 712 * the minimum IPv6 MTU of 1280 octets. The 713 * EDNS0 message length can be larger if the 714 * network stack supports IPV6_USE_MIN_MTU. 715 */ 716 if (setsockopt(nsd->udp[i].s, 717 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 718 &on, sizeof(on)) < 0) 719 { 720 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", 721 strerror(errno)); 722 return -1; 723 } 724 # elif defined(IPV6_MTU) 725 /* 726 * On Linux, PMTUD is disabled by default for datagrams 727 * so set the MTU equal to the MIN MTU to get the same. 728 */ 729 on = IPV6_MIN_MTU; 730 if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU, 731 &on, sizeof(on)) < 0) 732 { 733 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", 734 strerror(errno)); 735 return -1; 736 } 737 on = 1; 738 # endif 739 } 740 #endif 741 #if defined(AF_INET) 742 if (addr->ai_family == AF_INET) { 743 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 744 int action = IP_PMTUDISC_DONT; 745 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, 746 IP_MTU_DISCOVER, &action, sizeof(action)) < 0) 747 { 748 log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 749 strerror(errno)); 750 return -1; 751 } 752 # elif defined(IP_DONTFRAG) 753 int off = 0; 754 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG, 755 &off, sizeof(off)) < 0) 756 { 757 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 758 strerror(errno)); 759 return -1; 760 } 761 # endif 762 } 763 #endif 764 /* set it nonblocking */ 765 /* otherwise, on OSes with thundering herd problems, the 766 UDP recv could block NSD after select returns readable. */ 767 if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { 768 log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno)); 769 } 770 771 /* Bind it... */ 772 if (nsd->options->ip_freebind) { 773 #ifdef IP_FREEBIND 774 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) < 0) { 775 log_msg(LOG_ERR, "setsockopt(...,IP_FREEBIND, ...) failed for udp: %s", 776 strerror(errno)); 777 } 778 #endif /* IP_FREEBIND */ 779 } 780 781 if (nsd->options->ip_transparent) { 782 #ifdef IP_TRANSPARENT 783 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 784 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for udp: %s", 785 strerror(errno)); 786 } 787 #endif /* IP_TRANSPARENT */ 788 #ifdef SO_BINDANY 789 if (setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on)) < 0) { 790 log_msg(LOG_ERR, "setsockopt(...,SO_BINDANY, ...) failed for udp: %s", 791 strerror(errno)); 792 } 793 #endif /* SO_BINDANY */ 794 } 795 796 if ( 797 bind(nsd->udp[i].s, (struct sockaddr *) addr->ai_addr, addr->ai_addrlen) != 0) { 798 log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno)); 799 return -1; 800 } 801 } 802 803 /* TCP */ 804 805 /* Make a socket... */ 806 for (i = from; i < to; i++) { 807 /* for reuseports copy socket specs of first entries */ 808 addr = nsd->tcp[i%nsd->ifs].addr; 809 if (!addr) { 810 nsd->tcp[i].s = -1; 811 continue; 812 } 813 nsd->tcp[i].fam = (int)addr->ai_family; 814 /* turn off REUSEPORT for TCP by copying the socket fd */ 815 if(i >= nsd->ifs) { 816 nsd->tcp[i].s = nsd->tcp[i%nsd->ifs].s; 817 continue; 818 } 819 if ((nsd->tcp[i].s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) { 820 #if defined(INET6) 821 if (addr->ai_family == AF_INET6 && 822 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 823 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported"); 824 continue; 825 } 826 #endif /* INET6 */ 827 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 828 return -1; 829 } 830 831 #ifdef SO_REUSEPORT 832 if(nsd->reuseport && *reuseport_works && 833 setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEPORT, 834 (void*)&on, (socklen_t)sizeof(on)) < 0) { 835 if(verbosity >= 3 836 #ifdef ENOPROTOOPT 837 || errno != ENOPROTOOPT 838 #endif 839 ) 840 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEPORT, " 841 "...) failed: %s", strerror(errno)); 842 *reuseport_works = 0; 843 } 844 #endif /* SO_REUSEPORT */ 845 #ifdef SO_REUSEADDR 846 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) { 847 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno)); 848 } 849 #endif /* SO_REUSEADDR */ 850 851 #if defined(INET6) 852 if (addr->ai_family == AF_INET6) { 853 # if defined(IPV6_V6ONLY) 854 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, 855 &on, sizeof(on)) < 0) { 856 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno)); 857 return -1; 858 } 859 # endif 860 # if defined(IPV6_USE_MIN_MTU) 861 /* 862 * Use minimum MTU to minimize delays learning working 863 * PMTU when communicating through a tunnel. 864 */ 865 if (setsockopt(nsd->tcp[i].s, 866 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 867 &on, sizeof(on)) < 0) { 868 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", strerror(errno)); 869 return -1; 870 } 871 # elif defined(IPV6_MTU) 872 /* 873 * On Linux, PMTUD is disabled by default for datagrams 874 * so set the MTU equal to the MIN MTU to get the same. 875 */ 876 on = IPV6_MIN_MTU; 877 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_MTU, 878 &on, sizeof(on)) < 0) { 879 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", strerror(errno)); 880 return -1; 881 } 882 on = 1; 883 # endif 884 } 885 #endif 886 /* set maximum segment size to tcp socket */ 887 if(nsd->tcp_mss > 0) { 888 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 889 if(setsockopt(nsd->tcp[i].s, IPPROTO_TCP, TCP_MAXSEG, 890 (void*)&nsd->tcp_mss, 891 sizeof(nsd->tcp_mss)) < 0) { 892 log_msg(LOG_ERR, 893 "setsockopt(...,TCP_MAXSEG,...)" 894 " failed for tcp: %s", strerror(errno)); 895 } 896 #else 897 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 898 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ 899 } 900 901 /* set it nonblocking */ 902 /* (StevensUNP p463), if tcp listening socket is blocking, then 903 it may block in accept, even if select() says readable. */ 904 if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { 905 log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno)); 906 } 907 908 /* Bind it... */ 909 if (nsd->options->ip_freebind) { 910 #ifdef IP_FREEBIND 911 if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) < 0) { 912 log_msg(LOG_ERR, "setsockopt(...,IP_FREEBIND, ...) failed for tcp: %s", 913 strerror(errno)); 914 } 915 #endif /* IP_FREEBIND */ 916 } 917 918 if (nsd->options->ip_transparent) { 919 #ifdef IP_TRANSPARENT 920 if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 921 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for tcp: %s", 922 strerror(errno)); 923 } 924 #endif /* IP_TRANSPARENT */ 925 #ifdef SO_BINDANY 926 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on)) < 0) { 927 log_msg(LOG_ERR, "setsockopt(...,SO_BINDANY, ...) failed for tcp: %s", 928 strerror(errno)); 929 } 930 #endif /* SO_BINDANY */ 931 } 932 933 if( 934 bind(nsd->tcp[i].s, (struct sockaddr *) addr->ai_addr, addr->ai_addrlen) != 0) { 935 log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno)); 936 return -1; 937 } 938 939 /* Listen to it... */ 940 if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) { 941 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 942 return -1; 943 } 944 } 945 946 return 0; 947 } 948 949 /* 950 * Initialize the server, reuseport, create and bind the sockets. 951 */ 952 int 953 server_init(struct nsd *nsd) 954 { 955 int reuseport_successful = 1; /* see if reuseport works in OS */ 956 if(nsd->reuseport) { 957 /* increase the size of the udp and tcp interface arrays, 958 * there are going to be separate interface file descriptors 959 * for every server instance */ 960 nsd->udp = xrealloc(nsd->udp, (nsd->ifs*nsd->reuseport)* 961 sizeof(*nsd->udp)); 962 nsd->tcp = xrealloc(nsd->tcp, (nsd->ifs*nsd->reuseport)* 963 sizeof(*nsd->tcp)); 964 memset(&nsd->udp[nsd->ifs], 0, sizeof(*nsd->udp)* 965 (nsd->ifs*(nsd->reuseport-1))); 966 memset(&nsd->tcp[nsd->ifs], 0, sizeof(*nsd->tcp)* 967 (nsd->ifs*(nsd->reuseport-1))); 968 } 969 970 /* open the server interface ports */ 971 if(server_init_ifs(nsd, 0, nsd->ifs, &reuseport_successful) == -1) 972 return -1; 973 974 /* continue to open the remaining reuseport ports */ 975 if(nsd->reuseport && reuseport_successful) { 976 if(server_init_ifs(nsd, nsd->ifs, nsd->ifs*nsd->reuseport, 977 &reuseport_successful) == -1) 978 return -1; 979 nsd->ifs *= nsd->reuseport; 980 } else { 981 nsd->reuseport = 0; 982 } 983 return 0; 984 } 985 986 /* 987 * Prepare the server for take off. 988 * 989 */ 990 int 991 server_prepare(struct nsd *nsd) 992 { 993 #ifdef RATELIMIT 994 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 995 #ifdef HAVE_ARC4RANDOM 996 hash_set_raninit(arc4random()); 997 #else 998 uint32_t v = getpid() ^ time(NULL); 999 srandom((unsigned long)v); 1000 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1001 hash_set_raninit(v); 1002 else hash_set_raninit(random()); 1003 #endif 1004 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1005 nsd->options->rrl_ratelimit, 1006 nsd->options->rrl_whitelist_ratelimit, 1007 nsd->options->rrl_slip, 1008 nsd->options->rrl_ipv4_prefix_length, 1009 nsd->options->rrl_ipv6_prefix_length); 1010 #endif /* RATELIMIT */ 1011 1012 /* Open the database... */ 1013 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1014 log_msg(LOG_ERR, "unable to open the database %s: %s", 1015 nsd->dbfile, strerror(errno)); 1016 unlink(nsd->task[0]->fname); 1017 unlink(nsd->task[1]->fname); 1018 #ifdef USE_ZONE_STATS 1019 unlink(nsd->zonestatfname[0]); 1020 unlink(nsd->zonestatfname[1]); 1021 #endif 1022 xfrd_del_tempdir(nsd); 1023 return -1; 1024 } 1025 /* check if zone files have been modified */ 1026 /* NULL for taskudb because we send soainfo in a moment, batched up, 1027 * for all zones */ 1028 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1029 nsd->options->database[0] == 0)) 1030 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1031 zonestatid_tree_set(nsd); 1032 1033 compression_table_capacity = 0; 1034 initialize_dname_compression_tables(nsd); 1035 1036 #ifdef BIND8_STATS 1037 /* Initialize times... */ 1038 time(&nsd->st.boot); 1039 set_bind8_alarm(nsd); 1040 #endif /* BIND8_STATS */ 1041 1042 return 0; 1043 } 1044 1045 /* 1046 * Fork the required number of servers. 1047 */ 1048 static int 1049 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1050 int* xfrd_sock_p) 1051 { 1052 size_t i; 1053 1054 /* Start all child servers initially. */ 1055 for (i = 0; i < nsd->child_count; ++i) { 1056 nsd->children[i].pid = 0; 1057 } 1058 1059 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1060 } 1061 1062 void 1063 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1064 { 1065 size_t i; 1066 1067 /* Close all the sockets... */ 1068 for (i = 0; i < n; ++i) { 1069 if (sockets[i].s != -1) { 1070 close(sockets[i].s); 1071 if(sockets[i].addr) 1072 freeaddrinfo(sockets[i].addr); 1073 sockets[i].s = -1; 1074 } 1075 } 1076 } 1077 1078 /* 1079 * Close the sockets, shutdown the server and exit. 1080 * Does not return. 1081 * 1082 */ 1083 void 1084 server_shutdown(struct nsd *nsd) 1085 { 1086 size_t i; 1087 1088 server_close_all_sockets(nsd->udp, nsd->ifs); 1089 server_close_all_sockets(nsd->tcp, nsd->ifs); 1090 /* CHILD: close command channel to parent */ 1091 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1092 { 1093 close(nsd->this_child->parent_fd); 1094 nsd->this_child->parent_fd = -1; 1095 } 1096 /* SERVER: close command channels to children */ 1097 if(!nsd->this_child) 1098 { 1099 for(i=0; i < nsd->child_count; ++i) 1100 if(nsd->children[i].child_fd != -1) 1101 { 1102 close(nsd->children[i].child_fd); 1103 nsd->children[i].child_fd = -1; 1104 } 1105 } 1106 1107 tsig_finalize(); 1108 #ifdef HAVE_SSL 1109 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1110 #endif 1111 1112 #ifdef MEMCLEAN /* OS collects memory pages */ 1113 #ifdef RATELIMIT 1114 rrl_mmap_deinit_keep_mmap(); 1115 #endif 1116 #ifdef USE_DNSTAP 1117 dt_collector_destroy(nsd->dt_collector, nsd); 1118 #endif 1119 udb_base_free_keep_mmap(nsd->task[0]); 1120 udb_base_free_keep_mmap(nsd->task[1]); 1121 namedb_close_udb(nsd->db); /* keeps mmap */ 1122 namedb_close(nsd->db); 1123 nsd_options_destroy(nsd->options); 1124 region_destroy(nsd->region); 1125 #endif 1126 log_finalize(); 1127 exit(0); 1128 } 1129 1130 void 1131 server_prepare_xfrd(struct nsd* nsd) 1132 { 1133 char tmpfile[256]; 1134 /* create task mmaps */ 1135 nsd->mytask = 0; 1136 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1137 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1138 nsd->task[0] = task_file_create(tmpfile); 1139 if(!nsd->task[0]) { 1140 #ifdef USE_ZONE_STATS 1141 unlink(nsd->zonestatfname[0]); 1142 unlink(nsd->zonestatfname[1]); 1143 #endif 1144 xfrd_del_tempdir(nsd); 1145 exit(1); 1146 } 1147 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1148 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1149 nsd->task[1] = task_file_create(tmpfile); 1150 if(!nsd->task[1]) { 1151 unlink(nsd->task[0]->fname); 1152 #ifdef USE_ZONE_STATS 1153 unlink(nsd->zonestatfname[0]); 1154 unlink(nsd->zonestatfname[1]); 1155 #endif 1156 xfrd_del_tempdir(nsd); 1157 exit(1); 1158 } 1159 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1160 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1161 /* create xfrd listener structure */ 1162 nsd->xfrd_listener = region_alloc(nsd->region, 1163 sizeof(netio_handler_type)); 1164 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1165 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1166 nsd->xfrd_listener->fd = -1; 1167 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1168 nsd; 1169 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1170 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1171 } 1172 1173 1174 void 1175 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1176 { 1177 pid_t pid; 1178 int sockets[2] = {0,0}; 1179 struct ipc_handler_conn_data *data; 1180 1181 if(nsd->xfrd_listener->fd != -1) 1182 close(nsd->xfrd_listener->fd); 1183 if(del_db) { 1184 /* recreate taskdb that xfrd was using, it may be corrupt */ 1185 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1186 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1187 nsd->task[1-nsd->mytask]->fname = NULL; 1188 /* free alloc already, so udb does not shrink itself */ 1189 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1190 nsd->task[1-nsd->mytask]->alloc = NULL; 1191 udb_base_free(nsd->task[1-nsd->mytask]); 1192 /* create new file, overwrite the old one */ 1193 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1194 free(tmpfile); 1195 } 1196 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1197 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1198 return; 1199 } 1200 pid = fork(); 1201 switch (pid) { 1202 case -1: 1203 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1204 break; 1205 default: 1206 /* PARENT: close first socket, use second one */ 1207 close(sockets[0]); 1208 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1209 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1210 } 1211 if(del_db) xfrd_free_namedb(nsd); 1212 /* use other task than I am using, since if xfrd died and is 1213 * restarted, the reload is using nsd->mytask */ 1214 nsd->mytask = 1 - nsd->mytask; 1215 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1216 /* ENOTREACH */ 1217 break; 1218 case 0: 1219 /* CHILD: close second socket, use first one */ 1220 close(sockets[1]); 1221 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1222 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1223 } 1224 nsd->xfrd_listener->fd = sockets[0]; 1225 break; 1226 } 1227 /* server-parent only */ 1228 nsd->xfrd_listener->timeout = NULL; 1229 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1230 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1231 /* clear ongoing ipc reads */ 1232 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1233 data->conn->is_reading = 0; 1234 } 1235 1236 /** add all soainfo to taskdb */ 1237 static void 1238 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1239 { 1240 struct radnode* n; 1241 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1242 /* add all SOA INFO to mytask */ 1243 udb_ptr_init(&task_last, taskudb); 1244 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1245 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1246 } 1247 udb_ptr_unlink(&task_last, taskudb); 1248 } 1249 1250 void 1251 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1252 { 1253 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1254 * parent fills one taskdb with soas, xfrd fills other with expires. 1255 * then they exchange and process. 1256 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1257 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1258 * expire notifications can be sent back via a normal reload later 1259 * (xfrd will wait for current running reload to finish if any). 1260 */ 1261 sig_atomic_t cmd = 0; 1262 pid_t mypid; 1263 int xfrd_sock = nsd->xfrd_listener->fd; 1264 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1265 udb_ptr t; 1266 if(!shortsoa) { 1267 if(nsd->signal_hint_shutdown) { 1268 shutdown: 1269 log_msg(LOG_WARNING, "signal received, shutting down..."); 1270 server_close_all_sockets(nsd->udp, nsd->ifs); 1271 server_close_all_sockets(nsd->tcp, nsd->ifs); 1272 #ifdef HAVE_SSL 1273 daemon_remote_close(nsd->rc); 1274 #endif 1275 /* Unlink it if possible... */ 1276 unlinkpid(nsd->pidfile); 1277 unlink(nsd->task[0]->fname); 1278 unlink(nsd->task[1]->fname); 1279 #ifdef USE_ZONE_STATS 1280 unlink(nsd->zonestatfname[0]); 1281 unlink(nsd->zonestatfname[1]); 1282 #endif 1283 /* write the nsd.db to disk, wait for it to complete */ 1284 udb_base_sync(nsd->db->udb, 1); 1285 udb_base_close(nsd->db->udb); 1286 server_shutdown(nsd); 1287 exit(0); 1288 } 1289 } 1290 if(shortsoa) { 1291 /* put SOA in xfrd task because mytask may be in use */ 1292 taskudb = nsd->task[1-nsd->mytask]; 1293 } 1294 1295 add_all_soa_to_task(nsd, taskudb); 1296 if(!shortsoa) { 1297 /* wait for xfrd to signal task is ready, RELOAD signal */ 1298 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1299 cmd != NSD_RELOAD) { 1300 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1301 exit(1); 1302 } 1303 if(nsd->signal_hint_shutdown) { 1304 goto shutdown; 1305 } 1306 } 1307 /* give xfrd our task, signal it with RELOAD_DONE */ 1308 task_process_sync(taskudb); 1309 cmd = NSD_RELOAD_DONE; 1310 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1311 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1312 (int)nsd->pid, strerror(errno)); 1313 } 1314 mypid = getpid(); 1315 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1316 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1317 strerror(errno)); 1318 } 1319 1320 if(!shortsoa) { 1321 /* process the xfrd task works (expiry data) */ 1322 nsd->mytask = 1 - nsd->mytask; 1323 taskudb = nsd->task[nsd->mytask]; 1324 task_remap(taskudb); 1325 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1326 while(!udb_ptr_is_null(&t)) { 1327 task_process_expire(nsd->db, TASKLIST(&t)); 1328 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1329 } 1330 udb_ptr_unlink(&t, taskudb); 1331 task_clear(taskudb); 1332 1333 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1334 cmd = NSD_RELOAD_DONE; 1335 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1336 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1337 (int)nsd->pid, strerror(errno)); 1338 } 1339 } 1340 } 1341 1342 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 1343 ssize_t 1344 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 1345 { 1346 uint8_t* buf = (uint8_t*) p; 1347 ssize_t total = 0; 1348 struct pollfd fd; 1349 memset(&fd, 0, sizeof(fd)); 1350 fd.fd = s; 1351 fd.events = POLLIN; 1352 1353 while( total < sz) { 1354 ssize_t ret; 1355 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 1356 if(ret == -1) { 1357 if(errno == EAGAIN) 1358 /* blocking read */ 1359 continue; 1360 if(errno == EINTR) { 1361 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1362 return -1; 1363 /* other signals can be handled later */ 1364 continue; 1365 } 1366 /* some error */ 1367 return -1; 1368 } 1369 if(ret == 0) { 1370 /* operation timed out */ 1371 return -2; 1372 } 1373 ret = read(s, buf+total, sz-total); 1374 if(ret == -1) { 1375 if(errno == EAGAIN) 1376 /* blocking read */ 1377 continue; 1378 if(errno == EINTR) { 1379 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1380 return -1; 1381 /* other signals can be handled later */ 1382 continue; 1383 } 1384 /* some error */ 1385 return -1; 1386 } 1387 if(ret == 0) { 1388 /* closed connection! */ 1389 return 0; 1390 } 1391 total += ret; 1392 } 1393 return total; 1394 } 1395 1396 static void 1397 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 1398 { 1399 sig_atomic_t cmd = NSD_QUIT_SYNC; 1400 udb_ptr t, next; 1401 udb_base* u = nsd->task[nsd->mytask]; 1402 udb_ptr_init(&next, u); 1403 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 1404 udb_base_set_userdata(u, 0); 1405 while(!udb_ptr_is_null(&t)) { 1406 /* store next in list so this one can be deleted or reused */ 1407 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 1408 udb_rptr_zero(&TASKLIST(&t)->next, u); 1409 1410 /* process task t */ 1411 /* append results for task t and update last_task */ 1412 task_process_in_reload(nsd, u, last_task, &t); 1413 1414 /* go to next */ 1415 udb_ptr_set_ptr(&t, u, &next); 1416 1417 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1418 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1419 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1420 if(cmd == NSD_QUIT) { 1421 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1422 /* sync to disk (if needed) */ 1423 udb_base_sync(nsd->db->udb, 0); 1424 /* unlink files of remainder of tasks */ 1425 while(!udb_ptr_is_null(&t)) { 1426 if(TASKLIST(&t)->task_type == task_apply_xfr) { 1427 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 1428 } 1429 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 1430 } 1431 udb_ptr_unlink(&t, u); 1432 udb_ptr_unlink(&next, u); 1433 exit(0); 1434 } 1435 } 1436 1437 } 1438 udb_ptr_unlink(&t, u); 1439 udb_ptr_unlink(&next, u); 1440 } 1441 1442 #ifdef BIND8_STATS 1443 static void 1444 parent_send_stats(struct nsd* nsd, int cmdfd) 1445 { 1446 size_t i; 1447 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 1448 log_msg(LOG_ERR, "could not write stats to reload"); 1449 return; 1450 } 1451 for(i=0; i<nsd->child_count; i++) 1452 if(!write_socket(cmdfd, &nsd->children[i].query_count, 1453 sizeof(stc_type))) { 1454 log_msg(LOG_ERR, "could not write stats to reload"); 1455 return; 1456 } 1457 } 1458 1459 static void 1460 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 1461 { 1462 struct nsdst s; 1463 stc_type* p; 1464 size_t i; 1465 if(block_read(nsd, cmdfd, &s, sizeof(s), 1466 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 1467 log_msg(LOG_ERR, "could not read stats from oldpar"); 1468 return; 1469 } 1470 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 1471 s.db_mem = region_get_mem(nsd->db->region); 1472 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 1473 nsd->child_count); 1474 if(!p) return; 1475 for(i=0; i<nsd->child_count; i++) { 1476 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 1477 sizeof(stc_type)) 1478 return; 1479 } 1480 } 1481 #endif /* BIND8_STATS */ 1482 1483 /* 1484 * Reload the database, stop parent, re-fork children and continue. 1485 * as server_main. 1486 */ 1487 static void 1488 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 1489 int cmdsocket) 1490 { 1491 pid_t mypid; 1492 sig_atomic_t cmd = NSD_QUIT_SYNC; 1493 int ret; 1494 udb_ptr last_task; 1495 struct sigaction old_sigchld, ign_sigchld; 1496 /* ignore SIGCHLD from the previous server_main that used this pid */ 1497 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 1498 ign_sigchld.sa_handler = SIG_IGN; 1499 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 1500 1501 /* see what tasks we got from xfrd */ 1502 task_remap(nsd->task[nsd->mytask]); 1503 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 1504 udb_compact_inhibited(nsd->db->udb, 1); 1505 reload_process_tasks(nsd, &last_task, cmdsocket); 1506 udb_compact_inhibited(nsd->db->udb, 0); 1507 udb_compact(nsd->db->udb); 1508 1509 #ifndef NDEBUG 1510 if(nsd_debug_level >= 1) 1511 region_log_stats(nsd->db->region); 1512 #endif /* NDEBUG */ 1513 /* sync to disk (if needed) */ 1514 udb_base_sync(nsd->db->udb, 0); 1515 1516 initialize_dname_compression_tables(nsd); 1517 1518 #ifdef BIND8_STATS 1519 /* Restart dumping stats if required. */ 1520 time(&nsd->st.boot); 1521 set_bind8_alarm(nsd); 1522 #endif 1523 #ifdef USE_ZONE_STATS 1524 server_zonestat_realloc(nsd); /* realloc for new children */ 1525 server_zonestat_switch(nsd); 1526 #endif 1527 1528 /* listen for the signals of failed children again */ 1529 sigaction(SIGCHLD, &old_sigchld, NULL); 1530 /* Start new child processes */ 1531 if (server_start_children(nsd, server_region, netio, &nsd-> 1532 xfrd_listener->fd) != 0) { 1533 send_children_quit(nsd); 1534 exit(1); 1535 } 1536 1537 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1538 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1539 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1540 if(cmd == NSD_QUIT) { 1541 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1542 send_children_quit(nsd); 1543 exit(0); 1544 } 1545 } 1546 1547 /* Send quit command to parent: blocking, wait for receipt. */ 1548 do { 1549 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 1550 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 1551 { 1552 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 1553 strerror(errno)); 1554 } 1555 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 1556 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 1557 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 1558 RELOAD_SYNC_TIMEOUT); 1559 if(ret == -2) { 1560 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 1561 } 1562 } while (ret == -2); 1563 if(ret == -1) { 1564 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 1565 strerror(errno)); 1566 } 1567 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 1568 if(cmd == NSD_QUIT) { 1569 /* small race condition possible here, parent got quit cmd. */ 1570 send_children_quit(nsd); 1571 exit(1); 1572 } 1573 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 1574 #ifdef BIND8_STATS 1575 reload_do_stats(cmdsocket, nsd, &last_task); 1576 #endif 1577 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 1578 task_process_sync(nsd->task[nsd->mytask]); 1579 #ifdef USE_ZONE_STATS 1580 server_zonestat_realloc(nsd); /* realloc for next children */ 1581 #endif 1582 1583 /* send soainfo to the xfrd process, signal it that reload is done, 1584 * it picks up the taskudb */ 1585 cmd = NSD_RELOAD_DONE; 1586 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 1587 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 1588 strerror(errno)); 1589 } 1590 mypid = getpid(); 1591 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1592 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1593 strerror(errno)); 1594 } 1595 1596 /* try to reopen file */ 1597 if (nsd->file_rotation_ok) 1598 log_reopen(nsd->log_filename, 1); 1599 /* exit reload, continue as new server_main */ 1600 } 1601 1602 /* 1603 * Get the mode depending on the signal hints that have been received. 1604 * Multiple signal hints can be received and will be handled in turn. 1605 */ 1606 static sig_atomic_t 1607 server_signal_mode(struct nsd *nsd) 1608 { 1609 if(nsd->signal_hint_quit) { 1610 nsd->signal_hint_quit = 0; 1611 return NSD_QUIT; 1612 } 1613 else if(nsd->signal_hint_shutdown) { 1614 nsd->signal_hint_shutdown = 0; 1615 return NSD_SHUTDOWN; 1616 } 1617 else if(nsd->signal_hint_child) { 1618 nsd->signal_hint_child = 0; 1619 return NSD_REAP_CHILDREN; 1620 } 1621 else if(nsd->signal_hint_reload) { 1622 nsd->signal_hint_reload = 0; 1623 return NSD_RELOAD; 1624 } 1625 else if(nsd->signal_hint_reload_hup) { 1626 nsd->signal_hint_reload_hup = 0; 1627 return NSD_RELOAD_REQ; 1628 } 1629 else if(nsd->signal_hint_stats) { 1630 nsd->signal_hint_stats = 0; 1631 #ifdef BIND8_STATS 1632 set_bind8_alarm(nsd); 1633 #endif 1634 return NSD_STATS; 1635 } 1636 else if(nsd->signal_hint_statsusr) { 1637 nsd->signal_hint_statsusr = 0; 1638 return NSD_STATS; 1639 } 1640 return NSD_RUN; 1641 } 1642 1643 /* 1644 * The main server simply waits for signals and child processes to 1645 * terminate. Child processes are restarted as necessary. 1646 */ 1647 void 1648 server_main(struct nsd *nsd) 1649 { 1650 region_type *server_region = region_create(xalloc, free); 1651 netio_type *netio = netio_create(server_region); 1652 netio_handler_type reload_listener; 1653 int reload_sockets[2] = {-1, -1}; 1654 struct timespec timeout_spec; 1655 int status; 1656 pid_t child_pid; 1657 pid_t reload_pid = -1; 1658 sig_atomic_t mode; 1659 1660 /* Ensure we are the main process */ 1661 assert(nsd->server_kind == NSD_SERVER_MAIN); 1662 1663 /* Add listener for the XFRD process */ 1664 netio_add_handler(netio, nsd->xfrd_listener); 1665 1666 /* Start the child processes that handle incoming queries */ 1667 if (server_start_children(nsd, server_region, netio, 1668 &nsd->xfrd_listener->fd) != 0) { 1669 send_children_quit(nsd); 1670 exit(1); 1671 } 1672 reload_listener.fd = -1; 1673 1674 /* This_child MUST be 0, because this is the parent process */ 1675 assert(nsd->this_child == 0); 1676 1677 /* Run the server until we get a shutdown signal */ 1678 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 1679 /* Did we receive a signal that changes our mode? */ 1680 if(mode == NSD_RUN) { 1681 nsd->mode = mode = server_signal_mode(nsd); 1682 } 1683 1684 switch (mode) { 1685 case NSD_RUN: 1686 /* see if any child processes terminated */ 1687 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 1688 int is_child = delete_child_pid(nsd, child_pid); 1689 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 1690 if(nsd->children[is_child].child_fd == -1) 1691 nsd->children[is_child].has_exited = 1; 1692 parent_check_all_children_exited(nsd); 1693 } else if(is_child != -1) { 1694 log_msg(LOG_WARNING, 1695 "server %d died unexpectedly with status %d, restarting", 1696 (int) child_pid, status); 1697 restart_child_servers(nsd, server_region, netio, 1698 &nsd->xfrd_listener->fd); 1699 } else if (child_pid == reload_pid) { 1700 sig_atomic_t cmd = NSD_RELOAD_DONE; 1701 pid_t mypid; 1702 log_msg(LOG_WARNING, 1703 "Reload process %d failed with status %d, continuing with old database", 1704 (int) child_pid, status); 1705 reload_pid = -1; 1706 if(reload_listener.fd != -1) close(reload_listener.fd); 1707 reload_listener.fd = -1; 1708 reload_listener.event_types = NETIO_EVENT_NONE; 1709 task_process_sync(nsd->task[nsd->mytask]); 1710 /* inform xfrd reload attempt ended */ 1711 if(!write_socket(nsd->xfrd_listener->fd, 1712 &cmd, sizeof(cmd))) { 1713 log_msg(LOG_ERR, "problems " 1714 "sending SOAEND to xfrd: %s", 1715 strerror(errno)); 1716 } 1717 mypid = getpid(); 1718 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1719 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1720 strerror(errno)); 1721 } 1722 } else if(status != 0) { 1723 /* check for status, because we get 1724 * the old-servermain because reload 1725 * is the process-parent of old-main, 1726 * and we get older server-processes 1727 * that are exiting after a reload */ 1728 log_msg(LOG_WARNING, 1729 "process %d terminated with status %d", 1730 (int) child_pid, status); 1731 } 1732 } 1733 if (child_pid == -1) { 1734 if (errno == EINTR) { 1735 continue; 1736 } 1737 if (errno != ECHILD) 1738 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 1739 } 1740 if (nsd->mode != NSD_RUN) 1741 break; 1742 1743 /* timeout to collect processes. In case no sigchild happens. */ 1744 timeout_spec.tv_sec = 60; 1745 timeout_spec.tv_nsec = 0; 1746 1747 /* listen on ports, timeout for collecting terminated children */ 1748 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 1749 if (errno != EINTR) { 1750 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 1751 } 1752 } 1753 if(nsd->restart_children) { 1754 restart_child_servers(nsd, server_region, netio, 1755 &nsd->xfrd_listener->fd); 1756 nsd->restart_children = 0; 1757 } 1758 if(nsd->reload_failed) { 1759 sig_atomic_t cmd = NSD_RELOAD_DONE; 1760 pid_t mypid; 1761 nsd->reload_failed = 0; 1762 log_msg(LOG_WARNING, 1763 "Reload process %d failed, continuing with old database", 1764 (int) reload_pid); 1765 reload_pid = -1; 1766 if(reload_listener.fd != -1) close(reload_listener.fd); 1767 reload_listener.fd = -1; 1768 reload_listener.event_types = NETIO_EVENT_NONE; 1769 task_process_sync(nsd->task[nsd->mytask]); 1770 /* inform xfrd reload attempt ended */ 1771 if(!write_socket(nsd->xfrd_listener->fd, 1772 &cmd, sizeof(cmd))) { 1773 log_msg(LOG_ERR, "problems " 1774 "sending SOAEND to xfrd: %s", 1775 strerror(errno)); 1776 } 1777 mypid = getpid(); 1778 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1779 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1780 strerror(errno)); 1781 } 1782 } 1783 1784 break; 1785 case NSD_RELOAD_REQ: { 1786 sig_atomic_t cmd = NSD_RELOAD_REQ; 1787 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 1788 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1789 "main: ipc send reload_req to xfrd")); 1790 if(!write_socket(nsd->xfrd_listener->fd, 1791 &cmd, sizeof(cmd))) { 1792 log_msg(LOG_ERR, "server_main: could not send " 1793 "reload_req to xfrd: %s", strerror(errno)); 1794 } 1795 nsd->mode = NSD_RUN; 1796 } break; 1797 case NSD_RELOAD: 1798 /* Continue to run nsd after reload */ 1799 nsd->mode = NSD_RUN; 1800 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 1801 if (reload_pid != -1) { 1802 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 1803 (int) reload_pid); 1804 break; 1805 } 1806 1807 /* switch the mytask to keep track of who owns task*/ 1808 nsd->mytask = 1 - nsd->mytask; 1809 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 1810 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 1811 reload_pid = -1; 1812 break; 1813 } 1814 1815 /* Do actual reload */ 1816 reload_pid = fork(); 1817 switch (reload_pid) { 1818 case -1: 1819 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 1820 break; 1821 default: 1822 /* PARENT */ 1823 close(reload_sockets[0]); 1824 server_reload(nsd, server_region, netio, 1825 reload_sockets[1]); 1826 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 1827 close(reload_sockets[1]); 1828 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 1829 /* drop stale xfrd ipc data */ 1830 ((struct ipc_handler_conn_data*)nsd-> 1831 xfrd_listener->user_data) 1832 ->conn->is_reading = 0; 1833 reload_pid = -1; 1834 reload_listener.fd = -1; 1835 reload_listener.event_types = NETIO_EVENT_NONE; 1836 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 1837 break; 1838 case 0: 1839 /* CHILD */ 1840 /* server_main keep running until NSD_QUIT_SYNC 1841 * received from reload. */ 1842 close(reload_sockets[1]); 1843 reload_listener.fd = reload_sockets[0]; 1844 reload_listener.timeout = NULL; 1845 reload_listener.user_data = nsd; 1846 reload_listener.event_types = NETIO_EVENT_READ; 1847 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 1848 netio_add_handler(netio, &reload_listener); 1849 reload_pid = getppid(); 1850 break; 1851 } 1852 break; 1853 case NSD_QUIT_SYNC: 1854 /* synchronisation of xfrd, parent and reload */ 1855 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 1856 sig_atomic_t cmd = NSD_RELOAD; 1857 /* stop xfrd ipc writes in progress */ 1858 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1859 "main: ipc send indication reload")); 1860 if(!write_socket(nsd->xfrd_listener->fd, 1861 &cmd, sizeof(cmd))) { 1862 log_msg(LOG_ERR, "server_main: could not send reload " 1863 "indication to xfrd: %s", strerror(errno)); 1864 } 1865 /* wait for ACK from xfrd */ 1866 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 1867 nsd->quit_sync_done = 1; 1868 } 1869 nsd->mode = NSD_RUN; 1870 break; 1871 case NSD_QUIT: 1872 /* silent shutdown during reload */ 1873 if(reload_listener.fd != -1) { 1874 /* acknowledge the quit, to sync reload that we will really quit now */ 1875 sig_atomic_t cmd = NSD_RELOAD; 1876 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 1877 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1878 log_msg(LOG_ERR, "server_main: " 1879 "could not ack quit: %s", strerror(errno)); 1880 } 1881 #ifdef BIND8_STATS 1882 parent_send_stats(nsd, reload_listener.fd); 1883 #endif /* BIND8_STATS */ 1884 close(reload_listener.fd); 1885 } 1886 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 1887 /* only quit children after xfrd has acked */ 1888 send_children_quit(nsd); 1889 1890 #ifdef MEMCLEAN /* OS collects memory pages */ 1891 region_destroy(server_region); 1892 #endif 1893 server_shutdown(nsd); 1894 1895 /* ENOTREACH */ 1896 break; 1897 case NSD_SHUTDOWN: 1898 break; 1899 case NSD_REAP_CHILDREN: 1900 /* continue; wait for child in run loop */ 1901 nsd->mode = NSD_RUN; 1902 break; 1903 case NSD_STATS: 1904 #ifdef BIND8_STATS 1905 set_children_stats(nsd); 1906 #endif 1907 nsd->mode = NSD_RUN; 1908 break; 1909 default: 1910 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 1911 nsd->mode = NSD_RUN; 1912 break; 1913 } 1914 } 1915 log_msg(LOG_WARNING, "signal received, shutting down..."); 1916 1917 /* close opened ports to avoid race with restart of nsd */ 1918 server_close_all_sockets(nsd->udp, nsd->ifs); 1919 server_close_all_sockets(nsd->tcp, nsd->ifs); 1920 #ifdef HAVE_SSL 1921 daemon_remote_close(nsd->rc); 1922 #endif 1923 send_children_quit_and_wait(nsd); 1924 1925 /* Unlink it if possible... */ 1926 unlinkpid(nsd->pidfile); 1927 unlink(nsd->task[0]->fname); 1928 unlink(nsd->task[1]->fname); 1929 #ifdef USE_ZONE_STATS 1930 unlink(nsd->zonestatfname[0]); 1931 unlink(nsd->zonestatfname[1]); 1932 #endif 1933 #ifdef USE_DNSTAP 1934 dt_collector_close(nsd->dt_collector, nsd); 1935 #endif 1936 1937 if(reload_listener.fd != -1) { 1938 sig_atomic_t cmd = NSD_QUIT; 1939 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1940 "main: ipc send quit to reload-process")); 1941 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1942 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 1943 strerror(errno)); 1944 } 1945 fsync(reload_listener.fd); 1946 close(reload_listener.fd); 1947 /* wait for reload to finish processing */ 1948 while(1) { 1949 if(waitpid(reload_pid, NULL, 0) == -1) { 1950 if(errno == EINTR) continue; 1951 if(errno == ECHILD) break; 1952 log_msg(LOG_ERR, "waitpid(reload %d): %s", 1953 (int)reload_pid, strerror(errno)); 1954 } 1955 break; 1956 } 1957 } 1958 if(nsd->xfrd_listener->fd != -1) { 1959 /* complete quit, stop xfrd */ 1960 sig_atomic_t cmd = NSD_QUIT; 1961 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1962 "main: ipc send quit to xfrd")); 1963 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 1964 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 1965 strerror(errno)); 1966 } 1967 fsync(nsd->xfrd_listener->fd); 1968 close(nsd->xfrd_listener->fd); 1969 (void)kill(nsd->pid, SIGTERM); 1970 } 1971 1972 #ifdef MEMCLEAN /* OS collects memory pages */ 1973 region_destroy(server_region); 1974 #endif 1975 /* write the nsd.db to disk, wait for it to complete */ 1976 udb_base_sync(nsd->db->udb, 1); 1977 udb_base_close(nsd->db->udb); 1978 server_shutdown(nsd); 1979 } 1980 1981 static query_state_type 1982 server_process_query(struct nsd *nsd, struct query *query) 1983 { 1984 return query_process(query, nsd); 1985 } 1986 1987 static query_state_type 1988 server_process_query_udp(struct nsd *nsd, struct query *query) 1989 { 1990 #ifdef RATELIMIT 1991 if(query_process(query, nsd) != QUERY_DISCARDED) { 1992 if(rrl_process_query(query)) 1993 return rrl_slip(query); 1994 else return QUERY_PROCESSED; 1995 } 1996 return QUERY_DISCARDED; 1997 #else 1998 return query_process(query, nsd); 1999 #endif 2000 } 2001 2002 struct event_base* 2003 nsd_child_event_base(void) 2004 { 2005 struct event_base* base; 2006 #ifdef USE_MINI_EVENT 2007 static time_t secs; 2008 static struct timeval now; 2009 base = event_init(&secs, &now); 2010 #else 2011 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2012 /* libev */ 2013 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2014 # else 2015 /* libevent */ 2016 # ifdef HAVE_EVENT_BASE_NEW 2017 base = event_base_new(); 2018 # else 2019 base = event_init(); 2020 # endif 2021 # endif 2022 #endif 2023 return base; 2024 } 2025 2026 /* 2027 * Serve DNS requests. 2028 */ 2029 void 2030 server_child(struct nsd *nsd) 2031 { 2032 size_t i, from, numifs; 2033 region_type *server_region = region_create(xalloc, free); 2034 struct event_base* event_base = nsd_child_event_base(); 2035 query_type *udp_query; 2036 sig_atomic_t mode; 2037 2038 if(!event_base) { 2039 log_msg(LOG_ERR, "nsd server could not create event base"); 2040 exit(1); 2041 } 2042 nsd->event_base = event_base; 2043 nsd->server_region = server_region; 2044 2045 #ifdef RATELIMIT 2046 rrl_init(nsd->this_child->child_num); 2047 #endif 2048 2049 assert(nsd->server_kind != NSD_SERVER_MAIN); 2050 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 2051 2052 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 2053 server_close_all_sockets(nsd->tcp, nsd->ifs); 2054 } 2055 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 2056 server_close_all_sockets(nsd->udp, nsd->ifs); 2057 } 2058 2059 if (nsd->this_child->parent_fd != -1) { 2060 struct event *handler; 2061 struct ipc_handler_conn_data* user_data = 2062 (struct ipc_handler_conn_data*)region_alloc( 2063 server_region, sizeof(struct ipc_handler_conn_data)); 2064 user_data->nsd = nsd; 2065 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 2066 2067 handler = (struct event*) region_alloc( 2068 server_region, sizeof(*handler)); 2069 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 2070 EV_READ, child_handle_parent_command, user_data); 2071 if(event_base_set(event_base, handler) != 0) 2072 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 2073 if(event_add(handler, NULL) != 0) 2074 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 2075 } 2076 2077 if(nsd->reuseport) { 2078 numifs = nsd->ifs / nsd->reuseport; 2079 from = numifs * nsd->this_child->child_num; 2080 if(from+numifs > nsd->ifs) { /* should not happen */ 2081 from = 0; 2082 numifs = nsd->ifs; 2083 } 2084 } else { 2085 from = 0; 2086 numifs = nsd->ifs; 2087 } 2088 2089 if (nsd->server_kind & NSD_SERVER_UDP) { 2090 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2091 udp_query = query_create(server_region, 2092 compressed_dname_offsets, compression_table_size, 2093 compressed_dnames); 2094 #else 2095 udp_query = NULL; 2096 memset(msgs, 0, sizeof(msgs)); 2097 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 2098 queries[i] = query_create(server_region, 2099 compressed_dname_offsets, 2100 compression_table_size, compressed_dnames); 2101 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2102 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 2103 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; 2104 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2105 msgs[i].msg_hdr.msg_iovlen = 1; 2106 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 2107 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2108 } 2109 #endif 2110 for (i = from; i < from+numifs; ++i) { 2111 struct udp_handler_data *data; 2112 struct event *handler; 2113 2114 data = (struct udp_handler_data *) region_alloc( 2115 server_region, 2116 sizeof(struct udp_handler_data)); 2117 data->query = udp_query; 2118 data->nsd = nsd; 2119 data->socket = &nsd->udp[i]; 2120 2121 handler = (struct event*) region_alloc( 2122 server_region, sizeof(*handler)); 2123 event_set(handler, nsd->udp[i].s, EV_PERSIST|EV_READ, 2124 handle_udp, data); 2125 if(event_base_set(event_base, handler) != 0) 2126 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2127 if(event_add(handler, NULL) != 0) 2128 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2129 } 2130 } 2131 2132 /* 2133 * Keep track of all the TCP accept handlers so we can enable 2134 * and disable them based on the current number of active TCP 2135 * connections. 2136 */ 2137 tcp_accept_handler_count = numifs; 2138 tcp_accept_handlers = (struct tcp_accept_handler_data*) 2139 region_alloc_array(server_region, 2140 numifs, sizeof(*tcp_accept_handlers)); 2141 if (nsd->server_kind & NSD_SERVER_TCP) { 2142 for (i = from; i < numifs; ++i) { 2143 struct event *handler = &tcp_accept_handlers[i-from].event; 2144 struct tcp_accept_handler_data* data = 2145 &tcp_accept_handlers[i-from]; 2146 data->nsd = nsd; 2147 data->socket = &nsd->tcp[i]; 2148 event_set(handler, nsd->tcp[i].s, EV_PERSIST|EV_READ, 2149 handle_tcp_accept, data); 2150 if(event_base_set(event_base, handler) != 0) 2151 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 2152 if(event_add(handler, NULL) != 0) 2153 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 2154 data->event_added = 1; 2155 } 2156 } else tcp_accept_handler_count = 0; 2157 2158 /* The main loop... */ 2159 while ((mode = nsd->mode) != NSD_QUIT) { 2160 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 2161 2162 /* Do we need to do the statistics... */ 2163 if (mode == NSD_STATS) { 2164 #ifdef BIND8_STATS 2165 int p = nsd->st.period; 2166 nsd->st.period = 1; /* force stats printout */ 2167 /* Dump the statistics */ 2168 bind8_stats(nsd); 2169 nsd->st.period = p; 2170 #else /* !BIND8_STATS */ 2171 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 2172 #endif /* BIND8_STATS */ 2173 2174 nsd->mode = NSD_RUN; 2175 } 2176 else if (mode == NSD_REAP_CHILDREN) { 2177 /* got signal, notify parent. parent reaps terminated children. */ 2178 if (nsd->this_child->parent_fd != -1) { 2179 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 2180 if (write(nsd->this_child->parent_fd, 2181 &parent_notify, 2182 sizeof(parent_notify)) == -1) 2183 { 2184 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 2185 (int) nsd->this_child->pid, strerror(errno)); 2186 } 2187 } else /* no parent, so reap 'em */ 2188 while (waitpid(-1, NULL, WNOHANG) > 0) ; 2189 nsd->mode = NSD_RUN; 2190 } 2191 else if(mode == NSD_RUN) { 2192 /* Wait for a query... */ 2193 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2194 if (errno != EINTR) { 2195 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2196 break; 2197 } 2198 } 2199 } else if(mode == NSD_QUIT) { 2200 /* ignore here, quit */ 2201 } else { 2202 log_msg(LOG_ERR, "mode bad value %d, back to service.", 2203 (int)mode); 2204 nsd->mode = NSD_RUN; 2205 } 2206 } 2207 2208 #ifdef BIND8_STATS 2209 bind8_stats(nsd); 2210 #endif /* BIND8_STATS */ 2211 2212 #ifdef MEMCLEAN /* OS collects memory pages */ 2213 #ifdef RATELIMIT 2214 rrl_deinit(nsd->this_child->child_num); 2215 #endif 2216 event_base_free(event_base); 2217 region_destroy(server_region); 2218 #endif 2219 server_shutdown(nsd); 2220 } 2221 2222 #if defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) 2223 static void 2224 handle_udp(int fd, short event, void* arg) 2225 { 2226 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2227 int received, sent, recvcount, i; 2228 struct query *q; 2229 2230 if (!(event & EV_READ)) { 2231 return; 2232 } 2233 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2234 /* this printf strangely gave a performance increase on Linux */ 2235 /* printf("recvcount %d \n", recvcount); */ 2236 if (recvcount == -1) { 2237 if (errno != EAGAIN && errno != EINTR) { 2238 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2239 STATUP(data->nsd, rxerr); 2240 /* No zone statup */ 2241 } 2242 /* Simply no data available */ 2243 return; 2244 } 2245 for (i = 0; i < recvcount; i++) { 2246 loopstart: 2247 received = msgs[i].msg_len; 2248 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 2249 q = queries[i]; 2250 if (received == -1) { 2251 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 2252 msgs[i].msg_hdr.msg_flags)); 2253 STATUP(data->nsd, rxerr); 2254 /* No zone statup */ 2255 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2256 iovecs[i].iov_len = buffer_remaining(q->packet); 2257 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2258 goto swap_drop; 2259 } 2260 2261 /* Account... */ 2262 #ifdef BIND8_STATS 2263 if (data->socket->fam == AF_INET) { 2264 STATUP(data->nsd, qudp); 2265 } else if (data->socket->fam == AF_INET6) { 2266 STATUP(data->nsd, qudp6); 2267 } 2268 #endif 2269 2270 buffer_skip(q->packet, received); 2271 buffer_flip(q->packet); 2272 #ifdef USE_DNSTAP 2273 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 2274 q->tcp, q->packet); 2275 #endif /* USE_DNSTAP */ 2276 2277 /* Process and answer the query... */ 2278 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2279 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2280 STATUP(data->nsd, nona); 2281 ZTATUP(data->nsd, q->zone, nona); 2282 } 2283 2284 #ifdef USE_ZONE_STATS 2285 if (data->socket->fam == AF_INET) { 2286 ZTATUP(data->nsd, q->zone, qudp); 2287 } else if (data->socket->fam == AF_INET6) { 2288 ZTATUP(data->nsd, q->zone, qudp6); 2289 } 2290 #endif 2291 2292 /* Add EDNS0 and TSIG info if necessary. */ 2293 query_add_optional(q, data->nsd); 2294 2295 buffer_flip(q->packet); 2296 iovecs[i].iov_len = buffer_remaining(q->packet); 2297 #ifdef BIND8_STATS 2298 /* Account the rcode & TC... */ 2299 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2300 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2301 if (TC(q->packet)) { 2302 STATUP(data->nsd, truncated); 2303 ZTATUP(data->nsd, q->zone, truncated); 2304 } 2305 #endif /* BIND8_STATS */ 2306 #ifdef USE_DNSTAP 2307 dt_collector_submit_auth_response(data->nsd, 2308 &q->addr, q->addrlen, q->tcp, q->packet, 2309 q->zone); 2310 #endif /* USE_DNSTAP */ 2311 } else { 2312 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2313 iovecs[i].iov_len = buffer_remaining(q->packet); 2314 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2315 swap_drop: 2316 STATUP(data->nsd, dropped); 2317 ZTATUP(data->nsd, q->zone, dropped); 2318 if(i != recvcount-1) { 2319 /* swap with last and decrease recvcount */ 2320 struct mmsghdr mtmp = msgs[i]; 2321 struct iovec iotmp = iovecs[i]; 2322 recvcount--; 2323 msgs[i] = msgs[recvcount]; 2324 iovecs[i] = iovecs[recvcount]; 2325 queries[i] = queries[recvcount]; 2326 msgs[recvcount] = mtmp; 2327 iovecs[recvcount] = iotmp; 2328 queries[recvcount] = q; 2329 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2330 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 2331 goto loopstart; 2332 } else { recvcount --; } 2333 } 2334 } 2335 2336 /* send until all are sent */ 2337 i = 0; 2338 while(i<recvcount) { 2339 sent = sendmmsg(fd, &msgs[i], recvcount-i, 0); 2340 if(sent == -1) { 2341 const char* es = strerror(errno); 2342 char a[48]; 2343 addr2str(&queries[i]->addr, a, sizeof(a)); 2344 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 2345 #ifdef BIND8_STATS 2346 data->nsd->st.txerr += recvcount-i; 2347 #endif /* BIND8_STATS */ 2348 break; 2349 } 2350 i += sent; 2351 } 2352 for(i=0; i<recvcount; i++) { 2353 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2354 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2355 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2356 } 2357 } 2358 2359 #else /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2360 2361 static void 2362 handle_udp(int fd, short event, void* arg) 2363 { 2364 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2365 int received, sent; 2366 #ifndef NONBLOCKING_IS_BROKEN 2367 #ifdef HAVE_RECVMMSG 2368 int recvcount; 2369 #endif /* HAVE_RECVMMSG */ 2370 int i; 2371 #endif /* NONBLOCKING_IS_BROKEN */ 2372 struct query *q; 2373 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2374 q = data->query; 2375 #endif 2376 2377 if (!(event & EV_READ)) { 2378 return; 2379 } 2380 #ifndef NONBLOCKING_IS_BROKEN 2381 #ifdef HAVE_RECVMMSG 2382 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2383 /* this printf strangely gave a performance increase on Linux */ 2384 /* printf("recvcount %d \n", recvcount); */ 2385 if (recvcount == -1) { 2386 if (errno != EAGAIN && errno != EINTR) { 2387 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2388 STATUP(data->nsd, rxerr); 2389 /* No zone statup */ 2390 } 2391 /* Simply no data available */ 2392 return; 2393 } 2394 for (i = 0; i < recvcount; i++) { 2395 received = msgs[i].msg_len; 2396 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 2397 if (received == -1) { 2398 log_msg(LOG_ERR, "recvmmsg failed"); 2399 STATUP(data->nsd, rxerr); 2400 /* No zone statup */ 2401 /* the error can be found in msgs[i].msg_hdr.msg_flags */ 2402 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2403 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2404 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2405 continue; 2406 } 2407 q = queries[i]; 2408 #else 2409 for(i=0; i<NUM_RECV_PER_SELECT; i++) { 2410 #endif /* HAVE_RECVMMSG */ 2411 #endif /* NONBLOCKING_IS_BROKEN */ 2412 2413 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2414 /* Initialize the query... */ 2415 query_reset(q, UDP_MAX_MESSAGE_LEN, 0); 2416 2417 received = recvfrom(fd, 2418 buffer_begin(q->packet), 2419 buffer_remaining(q->packet), 2420 0, 2421 (struct sockaddr *)&q->addr, 2422 &q->addrlen); 2423 if (received == -1) { 2424 if (errno != EAGAIN && errno != EINTR) { 2425 log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); 2426 STATUP(data->nsd, rxerr); 2427 /* No zone statup */ 2428 } 2429 return; 2430 } 2431 #endif /* NONBLOCKING_IS_BROKEN || !HAVE_RECVMMSG */ 2432 2433 /* Account... */ 2434 if (data->socket->fam == AF_INET) { 2435 STATUP(data->nsd, qudp); 2436 } else if (data->socket->fam == AF_INET6) { 2437 STATUP(data->nsd, qudp6); 2438 } 2439 2440 buffer_skip(q->packet, received); 2441 buffer_flip(q->packet); 2442 #ifdef USE_DNSTAP 2443 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 2444 q->tcp, q->packet); 2445 #endif /* USE_DNSTAP */ 2446 2447 /* Process and answer the query... */ 2448 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2449 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2450 STATUP(data->nsd, nona); 2451 ZTATUP(data->nsd, q->zone, nona); 2452 } 2453 2454 #ifdef USE_ZONE_STATS 2455 if (data->socket->fam == AF_INET) { 2456 ZTATUP(data->nsd, q->zone, qudp); 2457 } else if (data->socket->fam == AF_INET6) { 2458 ZTATUP(data->nsd, q->zone, qudp6); 2459 } 2460 #endif 2461 2462 /* Add EDNS0 and TSIG info if necessary. */ 2463 query_add_optional(q, data->nsd); 2464 2465 buffer_flip(q->packet); 2466 2467 sent = sendto(fd, 2468 buffer_begin(q->packet), 2469 buffer_remaining(q->packet), 2470 0, 2471 (struct sockaddr *) &q->addr, 2472 q->addrlen); 2473 if (sent == -1) { 2474 const char* es = strerror(errno); 2475 char a[48]; 2476 addr2str(&q->addr, a, sizeof(a)); 2477 log_msg(LOG_ERR, "sendto %s failed: %s", a, es); 2478 STATUP(data->nsd, txerr); 2479 ZTATUP(data->nsd, q->zone, txerr); 2480 } else if ((size_t) sent != buffer_remaining(q->packet)) { 2481 log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); 2482 } else { 2483 #ifdef BIND8_STATS 2484 /* Account the rcode & TC... */ 2485 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2486 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2487 if (TC(q->packet)) { 2488 STATUP(data->nsd, truncated); 2489 ZTATUP(data->nsd, q->zone, truncated); 2490 } 2491 #endif /* BIND8_STATS */ 2492 #ifdef USE_DNSTAP 2493 dt_collector_submit_auth_response(data->nsd, 2494 &q->addr, q->addrlen, q->tcp, 2495 q->packet, q->zone); 2496 #endif /* USE_DNSTAP */ 2497 } 2498 } else { 2499 STATUP(data->nsd, dropped); 2500 ZTATUP(data->nsd, q->zone, dropped); 2501 } 2502 #ifndef NONBLOCKING_IS_BROKEN 2503 #ifdef HAVE_RECVMMSG 2504 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2505 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2506 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2507 #endif 2508 } 2509 #endif 2510 } 2511 #endif /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2512 2513 2514 static void 2515 cleanup_tcp_handler(struct tcp_handler_data* data) 2516 { 2517 event_del(&data->event); 2518 close(data->event.ev_fd); 2519 2520 /* 2521 * Enable the TCP accept handlers when the current number of 2522 * TCP connections is about to drop below the maximum number 2523 * of TCP connections. 2524 */ 2525 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 2526 configure_handler_event_types(EV_READ|EV_PERSIST); 2527 if(slowaccept) { 2528 event_del(&slowaccept_event); 2529 slowaccept = 0; 2530 } 2531 } 2532 --data->nsd->current_tcp_count; 2533 assert(data->nsd->current_tcp_count >= 0); 2534 2535 region_destroy(data->region); 2536 } 2537 2538 static void 2539 handle_tcp_reading(int fd, short event, void* arg) 2540 { 2541 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2542 ssize_t received; 2543 struct event_base* ev_base; 2544 struct timeval timeout; 2545 2546 if ((event & EV_TIMEOUT)) { 2547 /* Connection timed out. */ 2548 cleanup_tcp_handler(data); 2549 return; 2550 } 2551 2552 if (data->nsd->tcp_query_count > 0 && 2553 data->query_count >= data->nsd->tcp_query_count) { 2554 /* No more queries allowed on this tcp connection. */ 2555 cleanup_tcp_handler(data); 2556 return; 2557 } 2558 2559 assert((event & EV_READ)); 2560 2561 if (data->bytes_transmitted == 0) { 2562 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 2563 } 2564 2565 /* 2566 * Check if we received the leading packet length bytes yet. 2567 */ 2568 if (data->bytes_transmitted < sizeof(uint16_t)) { 2569 received = read(fd, 2570 (char *) &data->query->tcplen 2571 + data->bytes_transmitted, 2572 sizeof(uint16_t) - data->bytes_transmitted); 2573 if (received == -1) { 2574 if (errno == EAGAIN || errno == EINTR) { 2575 /* 2576 * Read would block, wait until more 2577 * data is available. 2578 */ 2579 return; 2580 } else { 2581 char buf[48]; 2582 addr2str(&data->query->addr, buf, sizeof(buf)); 2583 #ifdef ECONNRESET 2584 if (verbosity >= 2 || errno != ECONNRESET) 2585 #endif /* ECONNRESET */ 2586 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2587 cleanup_tcp_handler(data); 2588 return; 2589 } 2590 } else if (received == 0) { 2591 /* EOF */ 2592 cleanup_tcp_handler(data); 2593 return; 2594 } 2595 2596 data->bytes_transmitted += received; 2597 if (data->bytes_transmitted < sizeof(uint16_t)) { 2598 /* 2599 * Not done with the tcplen yet, wait for more 2600 * data to become available. 2601 */ 2602 return; 2603 } 2604 2605 assert(data->bytes_transmitted == sizeof(uint16_t)); 2606 2607 data->query->tcplen = ntohs(data->query->tcplen); 2608 2609 /* 2610 * Minimum query size is: 2611 * 2612 * Size of the header (12) 2613 * + Root domain name (1) 2614 * + Query class (2) 2615 * + Query type (2) 2616 */ 2617 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 2618 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 2619 cleanup_tcp_handler(data); 2620 return; 2621 } 2622 2623 if (data->query->tcplen > data->query->maxlen) { 2624 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 2625 cleanup_tcp_handler(data); 2626 return; 2627 } 2628 2629 buffer_set_limit(data->query->packet, data->query->tcplen); 2630 } 2631 2632 assert(buffer_remaining(data->query->packet) > 0); 2633 2634 /* Read the (remaining) query data. */ 2635 received = read(fd, 2636 buffer_current(data->query->packet), 2637 buffer_remaining(data->query->packet)); 2638 if (received == -1) { 2639 if (errno == EAGAIN || errno == EINTR) { 2640 /* 2641 * Read would block, wait until more data is 2642 * available. 2643 */ 2644 return; 2645 } else { 2646 char buf[48]; 2647 addr2str(&data->query->addr, buf, sizeof(buf)); 2648 #ifdef ECONNRESET 2649 if (verbosity >= 2 || errno != ECONNRESET) 2650 #endif /* ECONNRESET */ 2651 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2652 cleanup_tcp_handler(data); 2653 return; 2654 } 2655 } else if (received == 0) { 2656 /* EOF */ 2657 cleanup_tcp_handler(data); 2658 return; 2659 } 2660 2661 data->bytes_transmitted += received; 2662 buffer_skip(data->query->packet, received); 2663 if (buffer_remaining(data->query->packet) > 0) { 2664 /* 2665 * Message not yet complete, wait for more data to 2666 * become available. 2667 */ 2668 return; 2669 } 2670 2671 assert(buffer_position(data->query->packet) == data->query->tcplen); 2672 2673 /* Account... */ 2674 #ifdef BIND8_STATS 2675 #ifndef INET6 2676 STATUP(data->nsd, ctcp); 2677 #else 2678 if (data->query->addr.ss_family == AF_INET) { 2679 STATUP(data->nsd, ctcp); 2680 } else if (data->query->addr.ss_family == AF_INET6) { 2681 STATUP(data->nsd, ctcp6); 2682 } 2683 #endif 2684 #endif /* BIND8_STATS */ 2685 2686 /* We have a complete query, process it. */ 2687 2688 /* tcp-query-count: handle query counter ++ */ 2689 data->query_count++; 2690 2691 buffer_flip(data->query->packet); 2692 #ifdef USE_DNSTAP 2693 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 2694 data->query->addrlen, data->query->tcp, data->query->packet); 2695 #endif /* USE_DNSTAP */ 2696 data->query_state = server_process_query(data->nsd, data->query); 2697 if (data->query_state == QUERY_DISCARDED) { 2698 /* Drop the packet and the entire connection... */ 2699 STATUP(data->nsd, dropped); 2700 ZTATUP(data->nsd, data->query->zone, dropped); 2701 cleanup_tcp_handler(data); 2702 return; 2703 } 2704 2705 #ifdef BIND8_STATS 2706 if (RCODE(data->query->packet) == RCODE_OK 2707 && !AA(data->query->packet)) 2708 { 2709 STATUP(data->nsd, nona); 2710 ZTATUP(data->nsd, data->query->zone, nona); 2711 } 2712 #endif /* BIND8_STATS */ 2713 2714 #ifdef USE_ZONE_STATS 2715 #ifndef INET6 2716 ZTATUP(data->nsd, data->query->zone, ctcp); 2717 #else 2718 if (data->query->addr.ss_family == AF_INET) { 2719 ZTATUP(data->nsd, data->query->zone, ctcp); 2720 } else if (data->query->addr.ss_family == AF_INET6) { 2721 ZTATUP(data->nsd, data->query->zone, ctcp6); 2722 } 2723 #endif 2724 #endif /* USE_ZONE_STATS */ 2725 2726 query_add_optional(data->query, data->nsd); 2727 2728 /* Switch to the tcp write handler. */ 2729 buffer_flip(data->query->packet); 2730 data->query->tcplen = buffer_remaining(data->query->packet); 2731 #ifdef USE_DNSTAP 2732 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 2733 data->query->addrlen, data->query->tcp, data->query->packet, 2734 data->query->zone); 2735 #endif /* USE_DNSTAP */ 2736 data->bytes_transmitted = 0; 2737 2738 timeout.tv_sec = data->tcp_timeout / 1000; 2739 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2740 2741 ev_base = data->event.ev_base; 2742 event_del(&data->event); 2743 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2744 handle_tcp_writing, data); 2745 if(event_base_set(ev_base, &data->event) != 0) 2746 log_msg(LOG_ERR, "event base set tcpr failed"); 2747 if(event_add(&data->event, &timeout) != 0) 2748 log_msg(LOG_ERR, "event add tcpr failed"); 2749 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 2750 handle_tcp_writing(fd, EV_WRITE, data); 2751 } 2752 2753 static void 2754 handle_tcp_writing(int fd, short event, void* arg) 2755 { 2756 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2757 ssize_t sent; 2758 struct query *q = data->query; 2759 struct timeval timeout; 2760 struct event_base* ev_base; 2761 2762 if ((event & EV_TIMEOUT)) { 2763 /* Connection timed out. */ 2764 cleanup_tcp_handler(data); 2765 return; 2766 } 2767 2768 assert((event & EV_WRITE)); 2769 2770 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2771 /* Writing the response packet length. */ 2772 uint16_t n_tcplen = htons(q->tcplen); 2773 #ifdef HAVE_WRITEV 2774 struct iovec iov[2]; 2775 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 2776 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 2777 iov[1].iov_base = buffer_begin(q->packet); 2778 iov[1].iov_len = buffer_limit(q->packet); 2779 sent = writev(fd, iov, 2); 2780 #else /* HAVE_WRITEV */ 2781 sent = write(fd, 2782 (const char *) &n_tcplen + data->bytes_transmitted, 2783 sizeof(n_tcplen) - data->bytes_transmitted); 2784 #endif /* HAVE_WRITEV */ 2785 if (sent == -1) { 2786 if (errno == EAGAIN || errno == EINTR) { 2787 /* 2788 * Write would block, wait until 2789 * socket becomes writable again. 2790 */ 2791 return; 2792 } else { 2793 #ifdef ECONNRESET 2794 if(verbosity >= 2 || errno != ECONNRESET) 2795 #endif /* ECONNRESET */ 2796 #ifdef EPIPE 2797 if(verbosity >= 2 || errno != EPIPE) 2798 #endif /* EPIPE 'broken pipe' */ 2799 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2800 cleanup_tcp_handler(data); 2801 return; 2802 } 2803 } 2804 2805 data->bytes_transmitted += sent; 2806 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2807 /* 2808 * Writing not complete, wait until socket 2809 * becomes writable again. 2810 */ 2811 return; 2812 } 2813 2814 #ifdef HAVE_WRITEV 2815 sent -= sizeof(n_tcplen); 2816 /* handle potential 'packet done' code */ 2817 goto packet_could_be_done; 2818 #endif 2819 } 2820 2821 sent = write(fd, 2822 buffer_current(q->packet), 2823 buffer_remaining(q->packet)); 2824 if (sent == -1) { 2825 if (errno == EAGAIN || errno == EINTR) { 2826 /* 2827 * Write would block, wait until 2828 * socket becomes writable again. 2829 */ 2830 return; 2831 } else { 2832 #ifdef ECONNRESET 2833 if(verbosity >= 2 || errno != ECONNRESET) 2834 #endif /* ECONNRESET */ 2835 #ifdef EPIPE 2836 if(verbosity >= 2 || errno != EPIPE) 2837 #endif /* EPIPE 'broken pipe' */ 2838 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2839 cleanup_tcp_handler(data); 2840 return; 2841 } 2842 } 2843 2844 data->bytes_transmitted += sent; 2845 #ifdef HAVE_WRITEV 2846 packet_could_be_done: 2847 #endif 2848 buffer_skip(q->packet, sent); 2849 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 2850 /* 2851 * Still more data to write when socket becomes 2852 * writable again. 2853 */ 2854 return; 2855 } 2856 2857 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 2858 2859 if (data->query_state == QUERY_IN_AXFR) { 2860 /* Continue processing AXFR and writing back results. */ 2861 buffer_clear(q->packet); 2862 data->query_state = query_axfr(data->nsd, q); 2863 if (data->query_state != QUERY_PROCESSED) { 2864 query_add_optional(data->query, data->nsd); 2865 2866 /* Reset data. */ 2867 buffer_flip(q->packet); 2868 q->tcplen = buffer_remaining(q->packet); 2869 data->bytes_transmitted = 0; 2870 /* Reset timeout. */ 2871 timeout.tv_sec = data->tcp_timeout / 1000; 2872 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2873 ev_base = data->event.ev_base; 2874 event_del(&data->event); 2875 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2876 handle_tcp_writing, data); 2877 if(event_base_set(ev_base, &data->event) != 0) 2878 log_msg(LOG_ERR, "event base set tcpw failed"); 2879 if(event_add(&data->event, &timeout) != 0) 2880 log_msg(LOG_ERR, "event add tcpw failed"); 2881 2882 /* 2883 * Write data if/when the socket is writable 2884 * again. 2885 */ 2886 return; 2887 } 2888 } 2889 2890 /* 2891 * Done sending, wait for the next request to arrive on the 2892 * TCP socket by installing the TCP read handler. 2893 */ 2894 if (data->nsd->tcp_query_count > 0 && 2895 data->query_count >= data->nsd->tcp_query_count) { 2896 2897 (void) shutdown(fd, SHUT_WR); 2898 } 2899 2900 data->bytes_transmitted = 0; 2901 2902 timeout.tv_sec = data->tcp_timeout / 1000; 2903 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 2904 ev_base = data->event.ev_base; 2905 event_del(&data->event); 2906 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 2907 handle_tcp_reading, data); 2908 if(event_base_set(ev_base, &data->event) != 0) 2909 log_msg(LOG_ERR, "event base set tcpw failed"); 2910 if(event_add(&data->event, &timeout) != 0) 2911 log_msg(LOG_ERR, "event add tcpw failed"); 2912 } 2913 2914 2915 static void 2916 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 2917 void* ATTR_UNUSED(arg)) 2918 { 2919 if(slowaccept) { 2920 configure_handler_event_types(EV_PERSIST | EV_READ); 2921 slowaccept = 0; 2922 } 2923 } 2924 2925 /* 2926 * Handle an incoming TCP connection. The connection is accepted and 2927 * a new TCP reader event handler is added. The TCP handler 2928 * is responsible for cleanup when the connection is closed. 2929 */ 2930 static void 2931 handle_tcp_accept(int fd, short event, void* arg) 2932 { 2933 struct tcp_accept_handler_data *data 2934 = (struct tcp_accept_handler_data *) arg; 2935 int s; 2936 struct tcp_handler_data *tcp_data; 2937 region_type *tcp_region; 2938 #ifdef INET6 2939 struct sockaddr_storage addr; 2940 #else 2941 struct sockaddr_in addr; 2942 #endif 2943 socklen_t addrlen; 2944 struct timeval timeout; 2945 2946 if (!(event & EV_READ)) { 2947 return; 2948 } 2949 2950 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 2951 return; 2952 } 2953 2954 /* Accept it... */ 2955 addrlen = sizeof(addr); 2956 #ifndef HAVE_ACCEPT4 2957 s = accept(fd, (struct sockaddr *) &addr, &addrlen); 2958 #else 2959 s = accept4(fd, (struct sockaddr *) &addr, &addrlen, SOCK_NONBLOCK); 2960 #endif 2961 if (s == -1) { 2962 /** 2963 * EMFILE and ENFILE is a signal that the limit of open 2964 * file descriptors has been reached. Pause accept(). 2965 * EINTR is a signal interrupt. The others are various OS ways 2966 * of saying that the client has closed the connection. 2967 */ 2968 if (errno == EMFILE || errno == ENFILE) { 2969 if (!slowaccept) { 2970 /* disable accept events */ 2971 struct timeval tv; 2972 configure_handler_event_types(0); 2973 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 2974 tv.tv_usec = 0L; 2975 event_set(&slowaccept_event, -1, EV_TIMEOUT, 2976 handle_slowaccept_timeout, NULL); 2977 (void)event_base_set(data->event.ev_base, 2978 &slowaccept_event); 2979 (void)event_add(&slowaccept_event, &tv); 2980 slowaccept = 1; 2981 /* We don't want to spam the logs here */ 2982 } 2983 } else if (errno != EINTR 2984 && errno != EWOULDBLOCK 2985 #ifdef ECONNABORTED 2986 && errno != ECONNABORTED 2987 #endif /* ECONNABORTED */ 2988 #ifdef EPROTO 2989 && errno != EPROTO 2990 #endif /* EPROTO */ 2991 ) { 2992 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 2993 } 2994 return; 2995 } 2996 2997 #ifndef HAVE_ACCEPT4 2998 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 2999 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 3000 close(s); 3001 return; 3002 } 3003 #endif 3004 3005 /* 3006 * This region is deallocated when the TCP connection is 3007 * closed by the TCP handler. 3008 */ 3009 tcp_region = region_create(xalloc, free); 3010 tcp_data = (struct tcp_handler_data *) region_alloc( 3011 tcp_region, sizeof(struct tcp_handler_data)); 3012 tcp_data->region = tcp_region; 3013 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 3014 compression_table_size, compressed_dnames); 3015 tcp_data->nsd = data->nsd; 3016 tcp_data->query_count = 0; 3017 3018 tcp_data->query_state = QUERY_PROCESSED; 3019 tcp_data->bytes_transmitted = 0; 3020 memcpy(&tcp_data->query->addr, &addr, addrlen); 3021 tcp_data->query->addrlen = addrlen; 3022 3023 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 3024 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 3025 /* very busy, give smaller timeout */ 3026 tcp_data->tcp_timeout = 200; 3027 } 3028 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 3029 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 3030 3031 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 3032 handle_tcp_reading, tcp_data); 3033 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 3034 log_msg(LOG_ERR, "cannot set tcp event base"); 3035 close(s); 3036 region_destroy(tcp_region); 3037 return; 3038 } 3039 if(event_add(&tcp_data->event, &timeout) != 0) { 3040 log_msg(LOG_ERR, "cannot add tcp to event base"); 3041 close(s); 3042 region_destroy(tcp_region); 3043 return; 3044 } 3045 3046 /* 3047 * Keep track of the total number of TCP handlers installed so 3048 * we can stop accepting connections when the maximum number 3049 * of simultaneous TCP connections is reached. 3050 */ 3051 ++data->nsd->current_tcp_count; 3052 if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3053 configure_handler_event_types(0); 3054 } 3055 } 3056 3057 static void 3058 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 3059 { 3060 size_t i; 3061 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 3062 for (i = 0; i < nsd->child_count; ++i) { 3063 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 3064 if (write(nsd->children[i].child_fd, 3065 &command, 3066 sizeof(command)) == -1) 3067 { 3068 if(errno != EAGAIN && errno != EINTR) 3069 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 3070 (int) command, 3071 (int) nsd->children[i].pid, 3072 strerror(errno)); 3073 } else if (timeout > 0) { 3074 (void)block_read(NULL, 3075 nsd->children[i].child_fd, 3076 &command, sizeof(command), timeout); 3077 } 3078 fsync(nsd->children[i].child_fd); 3079 close(nsd->children[i].child_fd); 3080 nsd->children[i].child_fd = -1; 3081 } 3082 } 3083 } 3084 3085 static void 3086 send_children_quit(struct nsd* nsd) 3087 { 3088 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 3089 send_children_command(nsd, NSD_QUIT, 0); 3090 } 3091 3092 static void 3093 send_children_quit_and_wait(struct nsd* nsd) 3094 { 3095 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 3096 send_children_command(nsd, NSD_QUIT_CHILD, 3); 3097 } 3098 3099 #ifdef BIND8_STATS 3100 static void 3101 set_children_stats(struct nsd* nsd) 3102 { 3103 size_t i; 3104 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 3105 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 3106 for (i = 0; i < nsd->child_count; ++i) { 3107 nsd->children[i].need_to_send_STATS = 1; 3108 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 3109 } 3110 } 3111 #endif /* BIND8_STATS */ 3112 3113 static void 3114 configure_handler_event_types(short event_types) 3115 { 3116 size_t i; 3117 3118 for (i = 0; i < tcp_accept_handler_count; ++i) { 3119 struct event* handler = &tcp_accept_handlers[i].event; 3120 if(event_types) { 3121 /* reassign */ 3122 int fd = handler->ev_fd; 3123 struct event_base* base = handler->ev_base; 3124 if(tcp_accept_handlers[i].event_added) 3125 event_del(handler); 3126 event_set(handler, fd, event_types, 3127 handle_tcp_accept, &tcp_accept_handlers[i]); 3128 if(event_base_set(base, handler) != 0) 3129 log_msg(LOG_ERR, "conhand: cannot event_base"); 3130 if(event_add(handler, NULL) != 0) 3131 log_msg(LOG_ERR, "conhand: cannot event_add"); 3132 tcp_accept_handlers[i].event_added = 1; 3133 } else { 3134 /* remove */ 3135 if(tcp_accept_handlers[i].event_added) { 3136 event_del(handler); 3137 tcp_accept_handlers[i].event_added = 0; 3138 } 3139 } 3140 } 3141 } 3142