1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifdef HAVE_SYS_RANDOM_H 39 #include <sys/random.h> 40 #endif 41 #ifndef SHUT_WR 42 #define SHUT_WR 1 43 #endif 44 #ifdef HAVE_MMAP 45 #include <sys/mman.h> 46 #endif /* HAVE_MMAP */ 47 #ifdef HAVE_OPENSSL_RAND_H 48 #include <openssl/rand.h> 49 #endif 50 #ifdef HAVE_OPENSSL_SSL_H 51 #include <openssl/ssl.h> 52 #endif 53 #ifdef HAVE_OPENSSL_ERR_H 54 #include <openssl/err.h> 55 #endif 56 #ifdef HAVE_OPENSSL_OCSP_H 57 #include <openssl/ocsp.h> 58 #endif 59 #ifndef USE_MINI_EVENT 60 # ifdef HAVE_EVENT_H 61 # include <event.h> 62 # else 63 # include <event2/event.h> 64 # include "event2/event_struct.h" 65 # include "event2/event_compat.h" 66 # endif 67 #else 68 # include "mini_event.h" 69 #endif 70 71 #include "axfr.h" 72 #include "namedb.h" 73 #include "netio.h" 74 #include "xfrd.h" 75 #include "xfrd-tcp.h" 76 #include "xfrd-disk.h" 77 #include "difffile.h" 78 #include "nsec3.h" 79 #include "ipc.h" 80 #include "udb.h" 81 #include "remote.h" 82 #include "lookup3.h" 83 #include "rrl.h" 84 #include "ixfr.h" 85 #ifdef USE_DNSTAP 86 #include "dnstap/dnstap_collector.h" 87 #endif 88 #include "verify.h" 89 90 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 91 92 #ifdef USE_DNSTAP 93 /* 94 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content 95 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*) 96 */ 97 static void 98 log_addr(const char* descr, 99 #ifdef INET6 100 struct sockaddr_storage* addr 101 #else 102 struct sockaddr_in* addr 103 #endif 104 ) 105 { 106 char str_buf[64]; 107 if(verbosity < 6) 108 return; 109 if( 110 #ifdef INET6 111 addr->ss_family == AF_INET 112 #else 113 addr->sin_family == AF_INET 114 #endif 115 ) { 116 struct sockaddr_in* s = (struct sockaddr_in*)addr; 117 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf)); 118 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port))); 119 #ifdef INET6 120 } else { 121 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr; 122 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf)); 123 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port))); 124 #endif 125 } 126 } 127 #endif /* USE_DNSTAP */ 128 129 #ifdef USE_TCP_FASTOPEN 130 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 131 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 132 #endif 133 134 /* 135 * Data for the UDP handlers. 136 */ 137 struct udp_handler_data 138 { 139 struct nsd *nsd; 140 struct nsd_socket *socket; 141 struct event event; 142 }; 143 144 struct tcp_accept_handler_data { 145 struct nsd *nsd; 146 struct nsd_socket *socket; 147 int event_added; 148 struct event event; 149 #ifdef HAVE_SSL 150 /* handler accepts TLS connections on the dedicated port */ 151 int tls_accept; 152 #endif 153 }; 154 155 /* 156 * These globals are used to enable the TCP accept handlers 157 * when the number of TCP connection drops below the maximum 158 * number of TCP connections. 159 */ 160 static size_t tcp_accept_handler_count; 161 static struct tcp_accept_handler_data *tcp_accept_handlers; 162 163 static struct event slowaccept_event; 164 static int slowaccept; 165 166 #ifdef HAVE_SSL 167 static unsigned char *ocspdata = NULL; 168 static long ocspdata_len = 0; 169 #endif 170 171 #ifdef NONBLOCKING_IS_BROKEN 172 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 173 read multiple times from a socket when reported ready by select. */ 174 # define NUM_RECV_PER_SELECT (1) 175 #else /* !NONBLOCKING_IS_BROKEN */ 176 # define NUM_RECV_PER_SELECT (100) 177 #endif /* NONBLOCKING_IS_BROKEN */ 178 179 #ifndef HAVE_MMSGHDR 180 struct mmsghdr { 181 struct msghdr msg_hdr; 182 unsigned int msg_len; 183 }; 184 #endif 185 186 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 187 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 188 static struct query *queries[NUM_RECV_PER_SELECT]; 189 190 /* 191 * Data for the TCP connection handlers. 192 * 193 * The TCP handlers use non-blocking I/O. This is necessary to avoid 194 * blocking the entire server on a slow TCP connection, but does make 195 * reading from and writing to the socket more complicated. 196 * 197 * Basically, whenever a read/write would block (indicated by the 198 * EAGAIN errno variable) we remember the position we were reading 199 * from/writing to and return from the TCP reading/writing event 200 * handler. When the socket becomes readable/writable again we 201 * continue from the same position. 202 */ 203 struct tcp_handler_data 204 { 205 /* 206 * The region used to allocate all TCP connection related 207 * data, including this structure. This region is destroyed 208 * when the connection is closed. 209 */ 210 region_type* region; 211 212 /* 213 * The global nsd structure. 214 */ 215 struct nsd* nsd; 216 217 /* 218 * The current query data for this TCP connection. 219 */ 220 query_type* query; 221 222 /* 223 * The query_state is used to remember if we are performing an 224 * AXFR, if we're done processing, or if we should discard the 225 * query and connection. 226 */ 227 query_state_type query_state; 228 229 /* 230 * The event for the file descriptor and tcp timeout 231 */ 232 struct event event; 233 234 /* 235 * The bytes_transmitted field is used to remember the number 236 * of bytes transmitted when receiving or sending a DNS 237 * packet. The count includes the two additional bytes used 238 * to specify the packet length on a TCP connection. 239 */ 240 size_t bytes_transmitted; 241 242 /* 243 * The number of queries handled by this specific TCP connection. 244 */ 245 int query_count; 246 247 /* 248 * The timeout in msec for this tcp connection 249 */ 250 int tcp_timeout; 251 252 /* 253 * If the connection is allowed to have further queries on it. 254 */ 255 int tcp_no_more_queries; 256 257 #ifdef USE_DNSTAP 258 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */ 259 struct nsd_socket *socket; 260 #endif /* USE_DNSTAP */ 261 262 #ifdef HAVE_SSL 263 /* 264 * TLS object. 265 */ 266 SSL* tls; 267 268 /* 269 * TLS handshake state. 270 */ 271 enum { tls_hs_none, tls_hs_read, tls_hs_write, 272 tls_hs_read_event, tls_hs_write_event } shake_state; 273 #endif 274 /* list of connections, for service of remaining tcp channels */ 275 struct tcp_handler_data *prev, *next; 276 }; 277 /* global that is the list of active tcp channels */ 278 static struct tcp_handler_data *tcp_active_list = NULL; 279 280 /* 281 * Handle incoming queries on the UDP server sockets. 282 */ 283 static void handle_udp(int fd, short event, void* arg); 284 285 /* 286 * Handle incoming connections on the TCP sockets. These handlers 287 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 288 * connection) but are disabled when the number of current TCP 289 * connections is equal to the maximum number of TCP connections. 290 * Disabling is done by changing the handler to wait for the 291 * NETIO_EVENT_NONE type. This is done using the function 292 * configure_tcp_accept_handlers. 293 */ 294 static void handle_tcp_accept(int fd, short event, void* arg); 295 296 /* 297 * Handle incoming queries on a TCP connection. The TCP connections 298 * are configured to be non-blocking and the handler may be called 299 * multiple times before a complete query is received. 300 */ 301 static void handle_tcp_reading(int fd, short event, void* arg); 302 303 /* 304 * Handle outgoing responses on a TCP connection. The TCP connections 305 * are configured to be non-blocking and the handler may be called 306 * multiple times before a complete response is sent. 307 */ 308 static void handle_tcp_writing(int fd, short event, void* arg); 309 310 #ifdef HAVE_SSL 311 /* Create SSL object and associate fd */ 312 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 313 /* 314 * Handle TLS handshake. May be called multiple times if incomplete. 315 */ 316 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 317 318 /* 319 * Handle incoming queries on a TLS over TCP connection. The TLS 320 * connections are configured to be non-blocking and the handler may 321 * be called multiple times before a complete query is received. 322 */ 323 static void handle_tls_reading(int fd, short event, void* arg); 324 325 /* 326 * Handle outgoing responses on a TLS over TCP connection. The TLS 327 * connections are configured to be non-blocking and the handler may 328 * be called multiple times before a complete response is sent. 329 */ 330 static void handle_tls_writing(int fd, short event, void* arg); 331 #endif 332 333 /* 334 * Send all children the quit nonblocking, then close pipe. 335 */ 336 static void send_children_quit(struct nsd* nsd); 337 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 338 static void send_children_quit_and_wait(struct nsd* nsd); 339 340 /* set childrens flags to send NSD_STATS to them */ 341 #ifdef BIND8_STATS 342 static void set_children_stats(struct nsd* nsd); 343 #endif /* BIND8_STATS */ 344 345 /* 346 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 347 */ 348 static void configure_handler_event_types(short event_types); 349 350 static uint16_t *compressed_dname_offsets = 0; 351 static uint32_t compression_table_capacity = 0; 352 static uint32_t compression_table_size = 0; 353 static domain_type* compressed_dnames[MAXRRSPP]; 354 355 #ifdef USE_TCP_FASTOPEN 356 /* Checks to see if the kernel value must be manually changed in order for 357 TCP Fast Open to support server mode */ 358 static void report_tcp_fastopen_config() { 359 360 int tcp_fastopen_fp; 361 uint8_t tcp_fastopen_value; 362 363 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 364 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 365 } 366 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 367 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 368 close(tcp_fastopen_fp); 369 } 370 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 371 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 372 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n"); 373 log_msg(LOG_WARNING, "To enable TFO use the command:"); 374 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 375 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 376 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 377 close(tcp_fastopen_fp); 378 } 379 close(tcp_fastopen_fp); 380 } 381 #endif 382 383 /* 384 * Remove the specified pid from the list of child pids. Returns -1 if 385 * the pid is not in the list, child_num otherwise. The field is set to 0. 386 */ 387 static int 388 delete_child_pid(struct nsd *nsd, pid_t pid) 389 { 390 size_t i; 391 for (i = 0; i < nsd->child_count; ++i) { 392 if (nsd->children[i].pid == pid) { 393 nsd->children[i].pid = 0; 394 if(!nsd->children[i].need_to_exit) { 395 if(nsd->children[i].child_fd != -1) 396 close(nsd->children[i].child_fd); 397 nsd->children[i].child_fd = -1; 398 if(nsd->children[i].handler) 399 nsd->children[i].handler->fd = -1; 400 } 401 return i; 402 } 403 } 404 return -1; 405 } 406 407 /* 408 * Restart child servers if necessary. 409 */ 410 static int 411 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 412 int* xfrd_sock_p) 413 { 414 struct main_ipc_handler_data *ipc_data; 415 size_t i; 416 int sv[2]; 417 418 /* Fork the child processes... */ 419 for (i = 0; i < nsd->child_count; ++i) { 420 if (nsd->children[i].pid <= 0) { 421 if (nsd->children[i].child_fd != -1) 422 close(nsd->children[i].child_fd); 423 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 424 log_msg(LOG_ERR, "socketpair: %s", 425 strerror(errno)); 426 return -1; 427 } 428 nsd->children[i].child_fd = sv[0]; 429 nsd->children[i].parent_fd = sv[1]; 430 nsd->children[i].pid = fork(); 431 switch (nsd->children[i].pid) { 432 default: /* SERVER MAIN */ 433 close(nsd->children[i].parent_fd); 434 nsd->children[i].parent_fd = -1; 435 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 436 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 437 } 438 if(!nsd->children[i].handler) 439 { 440 ipc_data = (struct main_ipc_handler_data*) region_alloc( 441 region, sizeof(struct main_ipc_handler_data)); 442 ipc_data->nsd = nsd; 443 ipc_data->child = &nsd->children[i]; 444 ipc_data->child_num = i; 445 ipc_data->xfrd_sock = xfrd_sock_p; 446 ipc_data->packet = buffer_create(region, QIOBUFSZ); 447 ipc_data->forward_mode = 0; 448 ipc_data->got_bytes = 0; 449 ipc_data->total_bytes = 0; 450 ipc_data->acl_num = 0; 451 nsd->children[i].handler = (struct netio_handler*) region_alloc( 452 region, sizeof(struct netio_handler)); 453 nsd->children[i].handler->fd = nsd->children[i].child_fd; 454 nsd->children[i].handler->timeout = NULL; 455 nsd->children[i].handler->user_data = ipc_data; 456 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 457 nsd->children[i].handler->event_handler = parent_handle_child_command; 458 netio_add_handler(netio, nsd->children[i].handler); 459 } 460 /* clear any ongoing ipc */ 461 ipc_data = (struct main_ipc_handler_data*) 462 nsd->children[i].handler->user_data; 463 ipc_data->forward_mode = 0; 464 /* restart - update fd */ 465 nsd->children[i].handler->fd = nsd->children[i].child_fd; 466 break; 467 case 0: /* CHILD */ 468 /* the child need not be able to access the 469 * nsd.db file */ 470 namedb_close_udb(nsd->db); 471 #ifdef MEMCLEAN /* OS collects memory pages */ 472 region_destroy(region); 473 #endif 474 475 if (pledge("stdio rpath inet", NULL) == -1) { 476 log_msg(LOG_ERR, "pledge"); 477 exit(1); 478 } 479 480 nsd->pid = 0; 481 nsd->child_count = 0; 482 nsd->server_kind = nsd->children[i].kind; 483 nsd->this_child = &nsd->children[i]; 484 nsd->this_child->child_num = i; 485 /* remove signal flags inherited from parent 486 the parent will handle them. */ 487 nsd->signal_hint_reload_hup = 0; 488 nsd->signal_hint_reload = 0; 489 nsd->signal_hint_child = 0; 490 nsd->signal_hint_quit = 0; 491 nsd->signal_hint_shutdown = 0; 492 nsd->signal_hint_stats = 0; 493 nsd->signal_hint_statsusr = 0; 494 close(*xfrd_sock_p); 495 close(nsd->this_child->child_fd); 496 nsd->this_child->child_fd = -1; 497 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 498 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 499 } 500 server_child(nsd); 501 /* NOTREACH */ 502 exit(0); 503 case -1: 504 log_msg(LOG_ERR, "fork failed: %s", 505 strerror(errno)); 506 return -1; 507 } 508 } 509 } 510 return 0; 511 } 512 513 #ifdef BIND8_STATS 514 static void set_bind8_alarm(struct nsd* nsd) 515 { 516 /* resync so that the next alarm is on the next whole minute */ 517 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 518 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 519 } 520 #endif 521 522 /* set zone stat ids for zones initially read in */ 523 static void 524 zonestatid_tree_set(struct nsd* nsd) 525 { 526 struct radnode* n; 527 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 528 zone_type* zone = (zone_type*)n->elem; 529 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 530 } 531 } 532 533 #ifdef USE_ZONE_STATS 534 void 535 server_zonestat_alloc(struct nsd* nsd) 536 { 537 size_t num = (nsd->options->zonestatnames->count==0?1: 538 nsd->options->zonestatnames->count); 539 size_t sz = sizeof(struct nsdst)*num; 540 char tmpfile[256]; 541 uint8_t z = 0; 542 543 /* file names */ 544 nsd->zonestatfname[0] = 0; 545 nsd->zonestatfname[1] = 0; 546 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 547 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 548 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 549 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 550 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 551 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 552 553 /* file descriptors */ 554 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 555 if(nsd->zonestatfd[0] == -1) { 556 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 557 strerror(errno)); 558 exit(1); 559 } 560 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 561 if(nsd->zonestatfd[0] == -1) { 562 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 563 strerror(errno)); 564 close(nsd->zonestatfd[0]); 565 unlink(nsd->zonestatfname[0]); 566 exit(1); 567 } 568 569 #ifdef HAVE_MMAP 570 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 571 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 572 strerror(errno)); 573 exit(1); 574 } 575 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 576 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 577 nsd->zonestatfname[0], strerror(errno)); 578 exit(1); 579 } 580 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 581 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 582 strerror(errno)); 583 exit(1); 584 } 585 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 586 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 587 nsd->zonestatfname[1], strerror(errno)); 588 exit(1); 589 } 590 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 591 MAP_SHARED, nsd->zonestatfd[0], 0); 592 if(nsd->zonestat[0] == MAP_FAILED) { 593 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 594 unlink(nsd->zonestatfname[0]); 595 unlink(nsd->zonestatfname[1]); 596 exit(1); 597 } 598 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 599 MAP_SHARED, nsd->zonestatfd[1], 0); 600 if(nsd->zonestat[1] == MAP_FAILED) { 601 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 602 unlink(nsd->zonestatfname[0]); 603 unlink(nsd->zonestatfname[1]); 604 exit(1); 605 } 606 memset(nsd->zonestat[0], 0, sz); 607 memset(nsd->zonestat[1], 0, sz); 608 nsd->zonestatsize[0] = num; 609 nsd->zonestatsize[1] = num; 610 nsd->zonestatdesired = num; 611 nsd->zonestatsizenow = num; 612 nsd->zonestatnow = nsd->zonestat[0]; 613 #endif /* HAVE_MMAP */ 614 } 615 616 void 617 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 618 { 619 #ifdef HAVE_MMAP 620 #ifdef MREMAP_MAYMOVE 621 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 622 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 623 MREMAP_MAYMOVE); 624 if(nsd->zonestat[idx] == MAP_FAILED) { 625 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 626 exit(1); 627 } 628 #else /* !HAVE MREMAP */ 629 if(msync(nsd->zonestat[idx], 630 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 631 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 632 if(munmap(nsd->zonestat[idx], 633 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 634 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 635 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 636 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 637 if(nsd->zonestat[idx] == MAP_FAILED) { 638 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 639 exit(1); 640 } 641 #endif /* MREMAP */ 642 #endif /* HAVE_MMAP */ 643 } 644 645 /* realloc the zonestat array for the one that is not currently in use, 646 * to match the desired new size of the array (if applicable) */ 647 void 648 server_zonestat_realloc(struct nsd* nsd) 649 { 650 #ifdef HAVE_MMAP 651 uint8_t z = 0; 652 size_t sz; 653 int idx = 0; /* index of the zonestat array that is not in use */ 654 if(nsd->zonestatnow == nsd->zonestat[0]) 655 idx = 1; 656 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 657 return; 658 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 659 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 660 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 661 strerror(errno)); 662 exit(1); 663 } 664 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 665 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 666 nsd->zonestatfname[idx], strerror(errno)); 667 exit(1); 668 } 669 zonestat_remap(nsd, idx, sz); 670 /* zero the newly allocated region */ 671 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 672 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 673 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 674 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 675 } 676 nsd->zonestatsize[idx] = nsd->zonestatdesired; 677 #endif /* HAVE_MMAP */ 678 } 679 680 /* switchover to use the other array for the new children, that 681 * briefly coexist with the old children. And we want to avoid them 682 * both writing to the same statistics arrays. */ 683 void 684 server_zonestat_switch(struct nsd* nsd) 685 { 686 if(nsd->zonestatnow == nsd->zonestat[0]) { 687 nsd->zonestatnow = nsd->zonestat[1]; 688 nsd->zonestatsizenow = nsd->zonestatsize[1]; 689 } else { 690 nsd->zonestatnow = nsd->zonestat[0]; 691 nsd->zonestatsizenow = nsd->zonestatsize[0]; 692 } 693 } 694 #endif /* USE_ZONE_STATS */ 695 696 static void 697 cleanup_dname_compression_tables(void *ptr) 698 { 699 free(ptr); 700 compressed_dname_offsets = NULL; 701 compression_table_capacity = 0; 702 } 703 704 static void 705 initialize_dname_compression_tables(struct nsd *nsd) 706 { 707 size_t needed = domain_table_count(nsd->db->domains) + 1; 708 needed += EXTRA_DOMAIN_NUMBERS; 709 if(compression_table_capacity < needed) { 710 if(compressed_dname_offsets) { 711 region_remove_cleanup(nsd->db->region, 712 cleanup_dname_compression_tables, 713 compressed_dname_offsets); 714 free(compressed_dname_offsets); 715 } 716 compressed_dname_offsets = (uint16_t *) xmallocarray( 717 needed, sizeof(uint16_t)); 718 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 719 compressed_dname_offsets); 720 compression_table_capacity = needed; 721 compression_table_size=domain_table_count(nsd->db->domains)+1; 722 } 723 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 724 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 725 } 726 727 static int 728 set_cloexec(struct nsd_socket *sock) 729 { 730 assert(sock != NULL); 731 732 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) { 733 const char *socktype = 734 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp"; 735 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s", 736 socktype, strerror(errno)); 737 return -1; 738 } 739 740 return 1; 741 } 742 743 static int 744 set_reuseport(struct nsd_socket *sock) 745 { 746 #ifdef SO_REUSEPORT 747 int on = 1; 748 #ifdef SO_REUSEPORT_LB 749 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 750 * SO_REUSEPORT on Linux. This is what the users want with the config 751 * option in nsd.conf; if we actually need local address and port reuse 752 * they'll also need to have SO_REUSEPORT set for them, assume it was 753 * _LB they want. 754 */ 755 int opt = SO_REUSEPORT_LB; 756 static const char optname[] = "SO_REUSEPORT_LB"; 757 #else /* !SO_REUSEPORT_LB */ 758 int opt = SO_REUSEPORT; 759 static const char optname[] = "SO_REUSEPORT"; 760 #endif /* SO_REUSEPORT_LB */ 761 762 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 763 return 1; 764 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 765 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 766 optname, strerror(errno)); 767 } 768 return -1; 769 #else 770 (void)sock; 771 #endif /* SO_REUSEPORT */ 772 773 return 0; 774 } 775 776 static int 777 set_reuseaddr(struct nsd_socket *sock) 778 { 779 #ifdef SO_REUSEADDR 780 int on = 1; 781 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 782 return 1; 783 } 784 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 785 strerror(errno)); 786 return -1; 787 #endif /* SO_REUSEADDR */ 788 return 0; 789 } 790 791 static int 792 set_rcvbuf(struct nsd_socket *sock, int rcv) 793 { 794 #ifdef SO_RCVBUF 795 #ifdef SO_RCVBUFFORCE 796 if(0 == setsockopt( 797 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 798 { 799 return 1; 800 } 801 if(errno == EPERM || errno == ENOBUFS) { 802 return 0; 803 } 804 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 805 strerror(errno)); 806 return -1; 807 #else /* !SO_RCVBUFFORCE */ 808 if (0 == setsockopt( 809 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 810 { 811 return 1; 812 } 813 if(errno == ENOSYS || errno == ENOBUFS) { 814 return 0; 815 } 816 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 817 strerror(errno)); 818 return -1; 819 #endif /* SO_RCVBUFFORCE */ 820 #endif /* SO_RCVBUF */ 821 822 return 0; 823 } 824 825 static int 826 set_sndbuf(struct nsd_socket *sock, int snd) 827 { 828 #ifdef SO_SNDBUF 829 #ifdef SO_SNDBUFFORCE 830 if(0 == setsockopt( 831 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 832 { 833 return 1; 834 } 835 if(errno == EPERM || errno == ENOBUFS) { 836 return 0; 837 } 838 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 839 strerror(errno)); 840 return -1; 841 #else /* !SO_SNDBUFFORCE */ 842 if(0 == setsockopt( 843 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 844 { 845 return 1; 846 } 847 if(errno == ENOSYS || errno == ENOBUFS) { 848 return 0; 849 } 850 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 851 strerror(errno)); 852 return -1; 853 #endif /* SO_SNDBUFFORCE */ 854 #endif /* SO_SNDBUF */ 855 856 return 0; 857 } 858 859 static int 860 set_nonblock(struct nsd_socket *sock) 861 { 862 const char *socktype = 863 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 864 865 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 866 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 867 socktype, strerror(errno)); 868 return -1; 869 } 870 871 return 1; 872 } 873 874 #ifdef INET6 875 static int 876 set_ipv6_v6only(struct nsd_socket *sock) 877 { 878 #ifdef IPV6_V6ONLY 879 int on = 1; 880 const char *socktype = 881 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 882 883 if(0 == setsockopt( 884 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 885 { 886 return 1; 887 } 888 889 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 890 socktype, strerror(errno)); 891 return -1; 892 #else 893 (void)sock; 894 #endif /* IPV6_V6ONLY */ 895 896 return 0; 897 } 898 #endif /* INET6 */ 899 900 #ifdef INET6 901 static int 902 set_ipv6_use_min_mtu(struct nsd_socket *sock) 903 { 904 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) 905 #if defined(IPV6_USE_MIN_MTU) 906 /* There is no fragmentation of IPv6 datagrams during forwarding in the 907 * network. Therefore we do not send UDP datagrams larger than the 908 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 909 * larger if the network stack supports IPV6_USE_MIN_MTU. 910 */ 911 int opt = IPV6_USE_MIN_MTU; 912 int optval = 1; 913 static const char optname[] = "IPV6_USE_MIN_MTU"; 914 #elif defined(IPV6_MTU) 915 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 916 * to the MIN MTU to get the same. 917 */ 918 int opt = IPV6_MTU; 919 int optval = IPV6_MIN_MTU; 920 static const char optname[] = "IPV6_MTU"; 921 #endif 922 if(0 == setsockopt( 923 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 924 { 925 return 1; 926 } 927 928 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 929 optname, strerror(errno)); 930 return -1; 931 #else 932 (void)sock; 933 #endif /* INET6 */ 934 935 return 0; 936 } 937 #endif /* INET6 */ 938 939 static int 940 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 941 { 942 int ret = 0; 943 944 #if defined(IP_MTU_DISCOVER) 945 int opt = IP_MTU_DISCOVER; 946 int optval; 947 # if defined(IP_PMTUDISC_OMIT) 948 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 949 * information and send packets with DF=0. Fragmentation is allowed if 950 * and only if the packet size exceeds the outgoing interface MTU or 951 * the packet encounters smaller MTU link in network. This mitigates 952 * DNS fragmentation attacks by preventing forged PMTU information. 953 * FreeBSD already has same semantics without setting the option. 954 */ 955 optval = IP_PMTUDISC_OMIT; 956 if(0 == setsockopt( 957 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 958 { 959 return 1; 960 } 961 962 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 963 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 964 # endif /* IP_PMTUDISC_OMIT */ 965 # if defined(IP_PMTUDISC_DONT) 966 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 967 optval = IP_PMTUDISC_DONT; 968 if(0 == setsockopt( 969 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 970 { 971 return 1; 972 } 973 974 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 975 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 976 # endif 977 ret = -1; 978 #elif defined(IP_DONTFRAG) 979 int off = 0; 980 if (0 == setsockopt( 981 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 982 { 983 return 1; 984 } 985 986 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 987 strerror(errno)); 988 ret = -1; 989 #else 990 (void)sock; 991 #endif 992 993 return ret; 994 } 995 996 static int 997 set_ip_freebind(struct nsd_socket *sock) 998 { 999 #ifdef IP_FREEBIND 1000 int on = 1; 1001 const char *socktype = 1002 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1003 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 1004 { 1005 return 1; 1006 } 1007 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 1008 socktype, strerror(errno)); 1009 return -1; 1010 #else 1011 (void)sock; 1012 #endif /* IP_FREEBIND */ 1013 1014 return 0; 1015 } 1016 1017 static int 1018 set_ip_transparent(struct nsd_socket *sock) 1019 { 1020 /* 1021 The scandalous preprocessor blob here calls for some explanation :) 1022 POSIX does not specify an option to bind non-local IPs, so 1023 platforms developed several implementation-specific options, 1024 all set in the same way, but with different names. 1025 For additional complexity, some platform manage this setting 1026 differently for different address families (IPv4 vs IPv6). 1027 This scandalous preprocessor blob below abstracts such variability 1028 in the way which leaves the C code as lean and clear as possible. 1029 */ 1030 1031 #if defined(IP_TRANSPARENT) 1032 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT 1033 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1034 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT" 1035 // as of 2020-01, Linux does not support this on IPv6 programmatically 1036 #elif defined(SO_BINDANY) 1037 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY 1038 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET 1039 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY" 1040 #elif defined(IP_BINDANY) 1041 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY 1042 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY 1043 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1044 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6 1045 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY" 1046 #endif 1047 1048 #ifndef NSD_SOCKET_OPTION_TRANSPARENT 1049 (void)sock; 1050 #else 1051 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6 1052 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT 1053 # endif 1054 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 1055 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL 1056 # endif 1057 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6 1058 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME 1059 # endif 1060 1061 int on = 1; 1062 const char *socktype = 1063 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1064 const int is_ip6 = (sock->addr.ai_family == AF_INET6); 1065 1066 if(0 == setsockopt( 1067 sock->s, 1068 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL, 1069 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT, 1070 &on, sizeof(on))) 1071 { 1072 return 1; 1073 } 1074 1075 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 1076 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno)); 1077 return -1; 1078 #endif 1079 1080 return 0; 1081 } 1082 1083 static int 1084 set_tcp_maxseg(struct nsd_socket *sock, int mss) 1085 { 1086 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 1087 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 1088 return 1; 1089 } 1090 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 1091 strerror(errno)); 1092 return -1; 1093 #else 1094 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 1095 #endif 1096 return 0; 1097 } 1098 1099 #ifdef USE_TCP_FASTOPEN 1100 static int 1101 set_tcp_fastopen(struct nsd_socket *sock) 1102 { 1103 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1104 * a defense against IP spoofing attacks as suggested in RFC7413. 1105 */ 1106 int qlen; 1107 1108 #ifdef __APPLE__ 1109 /* macOS X implementation only supports qlen of 1 via this call. The 1110 * actual value is configured by the net.inet.tcp.fastopen_backlog 1111 * kernel parameter. 1112 */ 1113 qlen = 1; 1114 #else 1115 /* 5 is recommended on Linux. */ 1116 qlen = 5; 1117 #endif 1118 if (0 == setsockopt( 1119 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1120 { 1121 return 1; 1122 } 1123 1124 if (errno == EPERM) { 1125 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1126 "; this could likely be because sysctl " 1127 "net.inet.tcp.fastopen.enabled, " 1128 "net.inet.tcp.fastopen.server_enable, or " 1129 "net.ipv4.tcp_fastopen is disabled", 1130 strerror(errno)); 1131 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1132 * disabled, except when verbosity enabled for debugging 1133 */ 1134 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1135 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1136 strerror(errno)); 1137 } 1138 1139 return (errno == ENOPROTOOPT ? 0 : -1); 1140 } 1141 #endif /* USE_TCP_FASTOPEN */ 1142 1143 static int 1144 set_bindtodevice(struct nsd_socket *sock) 1145 { 1146 #if defined(SO_BINDTODEVICE) 1147 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE, 1148 sock->device, strlen(sock->device)) == -1) 1149 { 1150 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1151 "SO_BINDTODEVICE", sock->device, strerror(errno)); 1152 return -1; 1153 } 1154 1155 return 1; 1156 #else 1157 (void)sock; 1158 return 0; 1159 #endif 1160 } 1161 1162 static int 1163 set_setfib(struct nsd_socket *sock) 1164 { 1165 #if defined(SO_SETFIB) 1166 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB, 1167 (const void *)&sock->fib, sizeof(sock->fib)) == -1) 1168 { 1169 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s", 1170 "SO_SETFIB", sock->fib, strerror(errno)); 1171 return -1; 1172 } 1173 1174 return 1; 1175 #else 1176 (void)sock; 1177 return 0; 1178 #endif 1179 } 1180 1181 static int 1182 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1183 { 1184 int rcv = 1*1024*1024, snd = 1*1024*1024; 1185 1186 if(-1 == (sock->s = socket( 1187 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1188 { 1189 #ifdef INET6 1190 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1191 (sock->addr.ai_family == AF_INET6) && 1192 (errno == EAFNOSUPPORT)) 1193 { 1194 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1195 "not supported"); 1196 return 0; 1197 } 1198 #endif 1199 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1200 return -1; 1201 } 1202 1203 set_cloexec(sock); 1204 1205 if(nsd->reuseport && reuseport_works && *reuseport_works) 1206 *reuseport_works = (set_reuseport(sock) == 1); 1207 1208 if(nsd->options->receive_buffer_size > 0) 1209 rcv = nsd->options->receive_buffer_size; 1210 if(set_rcvbuf(sock, rcv) == -1) 1211 return -1; 1212 1213 if(nsd->options->send_buffer_size > 0) 1214 snd = nsd->options->send_buffer_size; 1215 if(set_sndbuf(sock, snd) == -1) 1216 return -1; 1217 #ifdef INET6 1218 if(sock->addr.ai_family == AF_INET6) { 1219 if(set_ipv6_v6only(sock) == -1 || 1220 set_ipv6_use_min_mtu(sock) == -1) 1221 return -1; 1222 } else 1223 #endif /* INET6 */ 1224 if(sock->addr.ai_family == AF_INET) { 1225 if(set_ipv4_no_pmtu_disc(sock) == -1) 1226 return -1; 1227 } 1228 1229 /* Set socket to non-blocking. Otherwise, on operating systems 1230 * with thundering herd problems, the UDP recv could block 1231 * after select returns readable. 1232 */ 1233 set_nonblock(sock); 1234 1235 if(nsd->options->ip_freebind) 1236 (void)set_ip_freebind(sock); 1237 if(nsd->options->ip_transparent) 1238 (void)set_ip_transparent(sock); 1239 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1240 return -1; 1241 if(sock->fib != -1 && set_setfib(sock) == -1) 1242 return -1; 1243 1244 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1245 char buf[256]; 1246 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1247 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1248 buf, strerror(errno)); 1249 return -1; 1250 } 1251 1252 return 1; 1253 } 1254 1255 static int 1256 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1257 { 1258 #ifdef USE_TCP_FASTOPEN 1259 report_tcp_fastopen_config(); 1260 #endif 1261 1262 (void)reuseport_works; 1263 1264 if(-1 == (sock->s = socket( 1265 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1266 { 1267 #ifdef INET6 1268 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1269 (sock->addr.ai_family == AF_INET6) && 1270 (errno == EAFNOSUPPORT)) 1271 { 1272 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1273 "not supported"); 1274 return 0; 1275 } 1276 #endif /* INET6 */ 1277 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1278 return -1; 1279 } 1280 1281 set_cloexec(sock); 1282 1283 if(nsd->reuseport && reuseport_works && *reuseport_works) 1284 *reuseport_works = (set_reuseport(sock) == 1); 1285 1286 (void)set_reuseaddr(sock); 1287 1288 #ifdef INET6 1289 if(sock->addr.ai_family == AF_INET6) { 1290 if (set_ipv6_v6only(sock) == -1 || 1291 set_ipv6_use_min_mtu(sock) == -1) 1292 return -1; 1293 } 1294 #endif 1295 1296 if(nsd->tcp_mss > 0) 1297 set_tcp_maxseg(sock, nsd->tcp_mss); 1298 /* (StevensUNP p463), if TCP listening socket is blocking, then 1299 it may block in accept, even if select() says readable. */ 1300 (void)set_nonblock(sock); 1301 if(nsd->options->ip_freebind) 1302 (void)set_ip_freebind(sock); 1303 if(nsd->options->ip_transparent) 1304 (void)set_ip_transparent(sock); 1305 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1306 return -1; 1307 if(sock->fib != -1 && set_setfib(sock) == -1) 1308 return -1; 1309 1310 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1311 char buf[256]; 1312 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1313 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1314 buf, strerror(errno)); 1315 return -1; 1316 } 1317 1318 #ifdef USE_TCP_FASTOPEN 1319 (void)set_tcp_fastopen(sock); 1320 #endif 1321 1322 if(listen(sock->s, TCP_BACKLOG) == -1) { 1323 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1324 return -1; 1325 } 1326 1327 return 1; 1328 } 1329 1330 /* 1331 * Initialize the server, reuseport, create and bind the sockets. 1332 */ 1333 int 1334 server_init(struct nsd *nsd) 1335 { 1336 size_t i; 1337 int reuseport = 1; /* Determine if REUSEPORT works. */ 1338 1339 /* open server interface ports */ 1340 for(i = 0; i < nsd->ifs; i++) { 1341 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1342 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1343 { 1344 return -1; 1345 } 1346 } 1347 1348 if(nsd->reuseport && reuseport) { 1349 size_t ifs = nsd->ifs * nsd->reuseport; 1350 1351 /* increase the size of the interface arrays, there are going 1352 * to be separate interface file descriptors for every server 1353 * instance */ 1354 region_remove_cleanup(nsd->region, free, nsd->udp); 1355 region_remove_cleanup(nsd->region, free, nsd->tcp); 1356 1357 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1358 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1359 region_add_cleanup(nsd->region, free, nsd->udp); 1360 region_add_cleanup(nsd->region, free, nsd->tcp); 1361 if(ifs > nsd->ifs) { 1362 memset(&nsd->udp[nsd->ifs], 0, 1363 (ifs-nsd->ifs)*sizeof(*nsd->udp)); 1364 memset(&nsd->tcp[nsd->ifs], 0, 1365 (ifs-nsd->ifs)*sizeof(*nsd->tcp)); 1366 } 1367 1368 for(i = nsd->ifs; i < ifs; i++) { 1369 nsd->udp[i] = nsd->udp[i%nsd->ifs]; 1370 nsd->udp[i].s = -1; 1371 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1372 return -1; 1373 } 1374 /* Turn off REUSEPORT for TCP by copying the socket 1375 * file descriptor. 1376 * This means we should not close TCP used by 1377 * other servers in reuseport enabled mode, in 1378 * server_child(). 1379 */ 1380 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1381 } 1382 1383 nsd->ifs = ifs; 1384 } else { 1385 nsd->reuseport = 0; 1386 } 1387 1388 /* open server interface ports for verifiers */ 1389 for(i = 0; i < nsd->verify_ifs; i++) { 1390 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 || 1391 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1) 1392 { 1393 return -1; 1394 } 1395 } 1396 1397 return 0; 1398 } 1399 1400 /* 1401 * Prepare the server for take off. 1402 * 1403 */ 1404 int 1405 server_prepare(struct nsd *nsd) 1406 { 1407 #ifdef RATELIMIT 1408 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1409 #ifdef HAVE_GETRANDOM 1410 uint32_t v; 1411 if(getrandom(&v, sizeof(v), 0) == -1) { 1412 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno)); 1413 exit(1); 1414 } 1415 hash_set_raninit(v); 1416 #elif defined(HAVE_ARC4RANDOM) 1417 hash_set_raninit(arc4random()); 1418 #else 1419 uint32_t v = getpid() ^ time(NULL); 1420 srandom((unsigned long)v); 1421 # ifdef HAVE_SSL 1422 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1423 hash_set_raninit(v); 1424 else 1425 # endif 1426 hash_set_raninit(random()); 1427 #endif 1428 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1429 nsd->options->rrl_ratelimit, 1430 nsd->options->rrl_whitelist_ratelimit, 1431 nsd->options->rrl_slip, 1432 nsd->options->rrl_ipv4_prefix_length, 1433 nsd->options->rrl_ipv6_prefix_length); 1434 #endif /* RATELIMIT */ 1435 1436 /* Open the database... */ 1437 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1438 log_msg(LOG_ERR, "unable to open the database %s: %s", 1439 nsd->dbfile, strerror(errno)); 1440 unlink(nsd->task[0]->fname); 1441 unlink(nsd->task[1]->fname); 1442 #ifdef USE_ZONE_STATS 1443 unlink(nsd->zonestatfname[0]); 1444 unlink(nsd->zonestatfname[1]); 1445 #endif 1446 xfrd_del_tempdir(nsd); 1447 return -1; 1448 } 1449 /* check if zone files have been modified */ 1450 /* NULL for taskudb because we send soainfo in a moment, batched up, 1451 * for all zones */ 1452 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1453 nsd->options->database[0] == 0)) 1454 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1455 zonestatid_tree_set(nsd); 1456 1457 compression_table_capacity = 0; 1458 initialize_dname_compression_tables(nsd); 1459 1460 #ifdef BIND8_STATS 1461 /* Initialize times... */ 1462 time(&nsd->st.boot); 1463 set_bind8_alarm(nsd); 1464 #endif /* BIND8_STATS */ 1465 1466 return 0; 1467 } 1468 1469 /* 1470 * Fork the required number of servers. 1471 */ 1472 static int 1473 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1474 int* xfrd_sock_p) 1475 { 1476 size_t i; 1477 1478 /* Start all child servers initially. */ 1479 for (i = 0; i < nsd->child_count; ++i) { 1480 nsd->children[i].pid = 0; 1481 } 1482 1483 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1484 } 1485 1486 static void 1487 server_close_socket(struct nsd_socket *sock) 1488 { 1489 if(sock->s != -1) { 1490 close(sock->s); 1491 sock->s = -1; 1492 } 1493 } 1494 1495 void 1496 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1497 { 1498 size_t i; 1499 1500 /* Close all the sockets... */ 1501 for (i = 0; i < n; ++i) { 1502 server_close_socket(&sockets[i]); 1503 } 1504 } 1505 1506 /* 1507 * Close the sockets, shutdown the server and exit. 1508 * Does not return. 1509 */ 1510 void 1511 server_shutdown(struct nsd *nsd) 1512 { 1513 size_t i; 1514 1515 server_close_all_sockets(nsd->udp, nsd->ifs); 1516 server_close_all_sockets(nsd->tcp, nsd->ifs); 1517 /* CHILD: close command channel to parent */ 1518 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1519 { 1520 close(nsd->this_child->parent_fd); 1521 nsd->this_child->parent_fd = -1; 1522 } 1523 /* SERVER: close command channels to children */ 1524 if(!nsd->this_child) 1525 { 1526 for(i=0; i < nsd->child_count; ++i) 1527 if(nsd->children[i].child_fd != -1) 1528 { 1529 close(nsd->children[i].child_fd); 1530 nsd->children[i].child_fd = -1; 1531 } 1532 } 1533 1534 tsig_finalize(); 1535 #ifdef HAVE_SSL 1536 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1537 if (nsd->tls_ctx) 1538 SSL_CTX_free(nsd->tls_ctx); 1539 #endif 1540 1541 #ifdef MEMCLEAN /* OS collects memory pages */ 1542 #ifdef RATELIMIT 1543 rrl_mmap_deinit_keep_mmap(); 1544 #endif 1545 #ifdef USE_DNSTAP 1546 dt_collector_destroy(nsd->dt_collector, nsd); 1547 #endif 1548 udb_base_free_keep_mmap(nsd->task[0]); 1549 udb_base_free_keep_mmap(nsd->task[1]); 1550 namedb_free_ixfr(nsd->db); 1551 namedb_close_udb(nsd->db); /* keeps mmap */ 1552 namedb_close(nsd->db); 1553 nsd_options_destroy(nsd->options); 1554 region_destroy(nsd->region); 1555 #endif 1556 log_finalize(); 1557 exit(0); 1558 } 1559 1560 void 1561 server_prepare_xfrd(struct nsd* nsd) 1562 { 1563 char tmpfile[256]; 1564 /* create task mmaps */ 1565 nsd->mytask = 0; 1566 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1567 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1568 nsd->task[0] = task_file_create(tmpfile); 1569 if(!nsd->task[0]) { 1570 #ifdef USE_ZONE_STATS 1571 unlink(nsd->zonestatfname[0]); 1572 unlink(nsd->zonestatfname[1]); 1573 #endif 1574 xfrd_del_tempdir(nsd); 1575 exit(1); 1576 } 1577 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1578 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1579 nsd->task[1] = task_file_create(tmpfile); 1580 if(!nsd->task[1]) { 1581 unlink(nsd->task[0]->fname); 1582 #ifdef USE_ZONE_STATS 1583 unlink(nsd->zonestatfname[0]); 1584 unlink(nsd->zonestatfname[1]); 1585 #endif 1586 xfrd_del_tempdir(nsd); 1587 exit(1); 1588 } 1589 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1590 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1591 /* create xfrd listener structure */ 1592 nsd->xfrd_listener = region_alloc(nsd->region, 1593 sizeof(netio_handler_type)); 1594 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1595 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1596 nsd->xfrd_listener->fd = -1; 1597 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1598 nsd; 1599 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1600 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1601 } 1602 1603 1604 void 1605 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1606 { 1607 pid_t pid; 1608 int sockets[2] = {0,0}; 1609 struct ipc_handler_conn_data *data; 1610 1611 if(nsd->xfrd_listener->fd != -1) 1612 close(nsd->xfrd_listener->fd); 1613 if(del_db) { 1614 /* recreate taskdb that xfrd was using, it may be corrupt */ 1615 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1616 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1617 nsd->task[1-nsd->mytask]->fname = NULL; 1618 /* free alloc already, so udb does not shrink itself */ 1619 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1620 nsd->task[1-nsd->mytask]->alloc = NULL; 1621 udb_base_free(nsd->task[1-nsd->mytask]); 1622 /* create new file, overwrite the old one */ 1623 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1624 free(tmpfile); 1625 } 1626 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1627 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1628 return; 1629 } 1630 pid = fork(); 1631 switch (pid) { 1632 case -1: 1633 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1634 break; 1635 default: 1636 /* PARENT: close first socket, use second one */ 1637 close(sockets[0]); 1638 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1639 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1640 } 1641 if(del_db) xfrd_free_namedb(nsd); 1642 /* use other task than I am using, since if xfrd died and is 1643 * restarted, the reload is using nsd->mytask */ 1644 nsd->mytask = 1 - nsd->mytask; 1645 1646 #ifdef HAVE_SETPROCTITLE 1647 setproctitle("xfrd"); 1648 #endif 1649 #ifdef HAVE_CPUSET_T 1650 if(nsd->use_cpu_affinity) { 1651 set_cpu_affinity(nsd->xfrd_cpuset); 1652 } 1653 #endif 1654 1655 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1656 /* ENOTREACH */ 1657 break; 1658 case 0: 1659 /* CHILD: close second socket, use first one */ 1660 close(sockets[1]); 1661 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1662 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1663 } 1664 nsd->xfrd_listener->fd = sockets[0]; 1665 break; 1666 } 1667 /* server-parent only */ 1668 nsd->xfrd_listener->timeout = NULL; 1669 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1670 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1671 /* clear ongoing ipc reads */ 1672 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1673 data->conn->is_reading = 0; 1674 } 1675 1676 /** add all soainfo to taskdb */ 1677 static void 1678 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1679 { 1680 struct radnode* n; 1681 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1682 /* add all SOA INFO to mytask */ 1683 udb_ptr_init(&task_last, taskudb); 1684 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1685 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1686 } 1687 udb_ptr_unlink(&task_last, taskudb); 1688 } 1689 1690 void 1691 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1692 { 1693 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1694 * parent fills one taskdb with soas, xfrd fills other with expires. 1695 * then they exchange and process. 1696 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1697 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1698 * expire notifications can be sent back via a normal reload later 1699 * (xfrd will wait for current running reload to finish if any). 1700 */ 1701 sig_atomic_t cmd = 0; 1702 pid_t mypid; 1703 int xfrd_sock = nsd->xfrd_listener->fd; 1704 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1705 udb_ptr t; 1706 if(!shortsoa) { 1707 if(nsd->signal_hint_shutdown) { 1708 shutdown: 1709 log_msg(LOG_WARNING, "signal received, shutting down..."); 1710 server_close_all_sockets(nsd->udp, nsd->ifs); 1711 server_close_all_sockets(nsd->tcp, nsd->ifs); 1712 #ifdef HAVE_SSL 1713 daemon_remote_close(nsd->rc); 1714 #endif 1715 /* Unlink it if possible... */ 1716 unlinkpid(nsd->pidfile); 1717 unlink(nsd->task[0]->fname); 1718 unlink(nsd->task[1]->fname); 1719 #ifdef USE_ZONE_STATS 1720 unlink(nsd->zonestatfname[0]); 1721 unlink(nsd->zonestatfname[1]); 1722 #endif 1723 /* write the nsd.db to disk, wait for it to complete */ 1724 udb_base_sync(nsd->db->udb, 1); 1725 udb_base_close(nsd->db->udb); 1726 server_shutdown(nsd); 1727 /* ENOTREACH */ 1728 exit(0); 1729 } 1730 } 1731 if(shortsoa) { 1732 /* put SOA in xfrd task because mytask may be in use */ 1733 taskudb = nsd->task[1-nsd->mytask]; 1734 } 1735 1736 add_all_soa_to_task(nsd, taskudb); 1737 if(!shortsoa) { 1738 /* wait for xfrd to signal task is ready, RELOAD signal */ 1739 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1740 cmd != NSD_RELOAD) { 1741 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1742 exit(1); 1743 } 1744 if(nsd->signal_hint_shutdown) { 1745 goto shutdown; 1746 } 1747 } 1748 /* give xfrd our task, signal it with RELOAD_DONE */ 1749 task_process_sync(taskudb); 1750 cmd = NSD_RELOAD_DONE; 1751 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1752 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1753 (int)nsd->pid, strerror(errno)); 1754 } 1755 mypid = getpid(); 1756 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1757 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1758 strerror(errno)); 1759 } 1760 1761 if(!shortsoa) { 1762 /* process the xfrd task works (expiry data) */ 1763 nsd->mytask = 1 - nsd->mytask; 1764 taskudb = nsd->task[nsd->mytask]; 1765 task_remap(taskudb); 1766 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1767 while(!udb_ptr_is_null(&t)) { 1768 task_process_expire(nsd->db, TASKLIST(&t)); 1769 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1770 } 1771 udb_ptr_unlink(&t, taskudb); 1772 task_clear(taskudb); 1773 1774 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1775 cmd = NSD_RELOAD_DONE; 1776 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1777 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1778 (int)nsd->pid, strerror(errno)); 1779 } 1780 } 1781 } 1782 1783 #ifdef HAVE_SSL 1784 static void 1785 log_crypto_from_err(const char* str, unsigned long err) 1786 { 1787 /* error:[error code]:[library name]:[function name]:[reason string] */ 1788 char buf[128]; 1789 unsigned long e; 1790 ERR_error_string_n(err, buf, sizeof(buf)); 1791 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1792 while( (e=ERR_get_error()) ) { 1793 ERR_error_string_n(e, buf, sizeof(buf)); 1794 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1795 } 1796 } 1797 1798 void 1799 log_crypto_err(const char* str) 1800 { 1801 log_crypto_from_err(str, ERR_get_error()); 1802 } 1803 1804 /** true if the ssl handshake error has to be squelched from the logs */ 1805 static int 1806 squelch_err_ssl_handshake(unsigned long err) 1807 { 1808 if(verbosity >= 3) 1809 return 0; /* only squelch on low verbosity */ 1810 /* this is very specific, we could filter on ERR_GET_REASON() 1811 * (the third element in ERR_PACK) */ 1812 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1813 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1814 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1815 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1816 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1817 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1818 #endif 1819 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1820 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1821 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1822 # ifdef SSL_R_VERSION_TOO_LOW 1823 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1824 # endif 1825 #endif 1826 ) 1827 return 1; 1828 return 0; 1829 } 1830 1831 void 1832 perform_openssl_init(void) 1833 { 1834 /* init SSL library */ 1835 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1836 ERR_load_crypto_strings(); 1837 #endif 1838 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS) 1839 ERR_load_SSL_strings(); 1840 #endif 1841 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1842 OpenSSL_add_all_algorithms(); 1843 #else 1844 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1845 | OPENSSL_INIT_ADD_ALL_DIGESTS 1846 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1847 #endif 1848 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1849 (void)SSL_library_init(); 1850 #else 1851 OPENSSL_init_ssl(0, NULL); 1852 #endif 1853 1854 if(!RAND_status()) { 1855 /* try to seed it */ 1856 unsigned char buf[256]; 1857 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1858 size_t i; 1859 v = seed; 1860 for(i=0; i<256/sizeof(v); i++) { 1861 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1862 v = v*seed + (unsigned int)i; 1863 } 1864 RAND_seed(buf, 256); 1865 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1866 } 1867 } 1868 1869 static int 1870 get_ocsp(char *filename, unsigned char **ocsp) 1871 { 1872 BIO *bio; 1873 OCSP_RESPONSE *response; 1874 int len = -1; 1875 unsigned char *p, *buf; 1876 assert(filename); 1877 1878 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1879 log_crypto_err("get_ocsp: BIO_new_file failed"); 1880 return -1; 1881 } 1882 1883 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1884 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1885 BIO_free(bio); 1886 return -1; 1887 } 1888 1889 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1890 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1891 OCSP_RESPONSE_free(response); 1892 BIO_free(bio); 1893 return -1; 1894 } 1895 1896 if ((buf = malloc((size_t) len)) == NULL) { 1897 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1898 OCSP_RESPONSE_free(response); 1899 BIO_free(bio); 1900 return -1; 1901 } 1902 1903 p = buf; 1904 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1905 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1906 free(buf); 1907 OCSP_RESPONSE_free(response); 1908 BIO_free(bio); 1909 return -1; 1910 } 1911 1912 OCSP_RESPONSE_free(response); 1913 BIO_free(bio); 1914 1915 *ocsp = buf; 1916 return len; 1917 } 1918 1919 /* further setup ssl ctx after the keys are loaded */ 1920 static void 1921 listen_sslctx_setup_2(void* ctxt) 1922 { 1923 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1924 (void)ctx; 1925 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1926 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1927 /* ENOTREACH */ 1928 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1929 } 1930 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1931 if(1) { 1932 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1933 if (!ecdh) { 1934 log_crypto_err("could not find p256, not enabling ECDHE"); 1935 } else { 1936 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1937 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1938 } 1939 EC_KEY_free (ecdh); 1940 } 1941 } 1942 #endif 1943 } 1944 1945 static int 1946 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1947 { 1948 if(ocspdata) { 1949 unsigned char *p; 1950 if ((p=malloc(ocspdata_len)) == NULL) { 1951 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1952 return SSL_TLSEXT_ERR_NOACK; 1953 } 1954 memcpy(p, ocspdata, ocspdata_len); 1955 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1956 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1957 free(p); 1958 return SSL_TLSEXT_ERR_NOACK; 1959 } 1960 return SSL_TLSEXT_ERR_OK; 1961 } else { 1962 return SSL_TLSEXT_ERR_NOACK; 1963 } 1964 } 1965 1966 SSL_CTX* 1967 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1968 { 1969 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1970 if(!ctx) { 1971 log_crypto_err("could not SSL_CTX_new"); 1972 return NULL; 1973 } 1974 /* no SSLv2, SSLv3 because has defects */ 1975 #if SSL_OP_NO_SSLv2 != 0 1976 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1977 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1978 SSL_CTX_free(ctx); 1979 return NULL; 1980 } 1981 #endif 1982 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1983 != SSL_OP_NO_SSLv3){ 1984 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1985 SSL_CTX_free(ctx); 1986 return 0; 1987 } 1988 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1989 /* if we have tls 1.1 disable 1.0 */ 1990 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1991 != SSL_OP_NO_TLSv1){ 1992 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1993 SSL_CTX_free(ctx); 1994 return 0; 1995 } 1996 #endif 1997 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1998 /* if we have tls 1.2 disable 1.1 */ 1999 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 2000 != SSL_OP_NO_TLSv1_1){ 2001 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 2002 SSL_CTX_free(ctx); 2003 return 0; 2004 } 2005 #endif 2006 #if defined(SSL_OP_NO_RENEGOTIATION) 2007 /* disable client renegotiation */ 2008 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 2009 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 2010 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 2011 SSL_CTX_free(ctx); 2012 return 0; 2013 } 2014 #endif 2015 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 2016 /* if we detect system-wide crypto policies, use those */ 2017 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) { 2018 /* if we have sha256, set the cipher list to have no known vulns */ 2019 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 2020 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 2021 } 2022 #endif 2023 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 2024 SSL_OP_CIPHER_SERVER_PREFERENCE) != 2025 SSL_OP_CIPHER_SERVER_PREFERENCE) { 2026 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 2027 SSL_CTX_free(ctx); 2028 return 0; 2029 } 2030 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 2031 SSL_CTX_set_security_level(ctx, 0); 2032 #endif 2033 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 2034 log_msg(LOG_ERR, "error for cert file: %s", pem); 2035 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 2036 SSL_CTX_free(ctx); 2037 return NULL; 2038 } 2039 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 2040 log_msg(LOG_ERR, "error for private key file: %s", key); 2041 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 2042 SSL_CTX_free(ctx); 2043 return NULL; 2044 } 2045 if(!SSL_CTX_check_private_key(ctx)) { 2046 log_msg(LOG_ERR, "error for key file: %s", key); 2047 log_crypto_err("Error in SSL_CTX check_private_key"); 2048 SSL_CTX_free(ctx); 2049 return NULL; 2050 } 2051 listen_sslctx_setup_2(ctx); 2052 if(verifypem && verifypem[0]) { 2053 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 2054 log_crypto_err("Error in SSL_CTX verify locations"); 2055 SSL_CTX_free(ctx); 2056 return NULL; 2057 } 2058 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 2059 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 2060 } 2061 return ctx; 2062 } 2063 2064 SSL_CTX* 2065 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 2066 { 2067 char *key, *pem; 2068 SSL_CTX *ctx; 2069 2070 key = nsd->options->tls_service_key; 2071 pem = nsd->options->tls_service_pem; 2072 if(!key || key[0] == 0) { 2073 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 2074 return NULL; 2075 } 2076 if(!pem || pem[0] == 0) { 2077 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 2078 return NULL; 2079 } 2080 2081 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 2082 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 2083 ctx = server_tls_ctx_setup(key, pem, verifypem); 2084 if(!ctx) { 2085 log_msg(LOG_ERR, "could not setup server TLS context"); 2086 return NULL; 2087 } 2088 if(ocspfile && ocspfile[0]) { 2089 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 2090 log_crypto_err("Error reading OCSPfile"); 2091 SSL_CTX_free(ctx); 2092 return NULL; 2093 } else { 2094 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 2095 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 2096 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 2097 SSL_CTX_free(ctx); 2098 return NULL; 2099 } 2100 } 2101 } 2102 return ctx; 2103 } 2104 2105 /* check if tcp_handler_accept_data created for TLS dedicated port */ 2106 int 2107 using_tls_port(struct sockaddr* addr, const char* tls_port) 2108 { 2109 in_port_t port = 0; 2110 2111 if (addr->sa_family == AF_INET) 2112 port = ((struct sockaddr_in*)addr)->sin_port; 2113 #ifndef HAVE_STRUCT_SOCKADDR_IN6 2114 else 2115 port = ((struct sockaddr_in6*)addr)->sin6_port; 2116 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 2117 if (atoi(tls_port) == ntohs(port)) 2118 return 1; 2119 2120 return 0; 2121 } 2122 #endif 2123 2124 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 2125 ssize_t 2126 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 2127 { 2128 uint8_t* buf = (uint8_t*) p; 2129 ssize_t total = 0; 2130 struct pollfd fd; 2131 memset(&fd, 0, sizeof(fd)); 2132 fd.fd = s; 2133 fd.events = POLLIN; 2134 2135 while( total < sz) { 2136 ssize_t ret; 2137 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 2138 if(ret == -1) { 2139 if(errno == EAGAIN) 2140 /* blocking read */ 2141 continue; 2142 if(errno == EINTR) { 2143 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2144 return -1; 2145 /* other signals can be handled later */ 2146 continue; 2147 } 2148 /* some error */ 2149 return -1; 2150 } 2151 if(ret == 0) { 2152 /* operation timed out */ 2153 return -2; 2154 } 2155 ret = read(s, buf+total, sz-total); 2156 if(ret == -1) { 2157 if(errno == EAGAIN) 2158 /* blocking read */ 2159 continue; 2160 if(errno == EINTR) { 2161 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2162 return -1; 2163 /* other signals can be handled later */ 2164 continue; 2165 } 2166 /* some error */ 2167 return -1; 2168 } 2169 if(ret == 0) { 2170 /* closed connection! */ 2171 return 0; 2172 } 2173 total += ret; 2174 } 2175 return total; 2176 } 2177 2178 static void 2179 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 2180 { 2181 sig_atomic_t cmd = NSD_QUIT_SYNC; 2182 udb_ptr t, next; 2183 udb_base* u = nsd->task[nsd->mytask]; 2184 udb_ptr_init(&next, u); 2185 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 2186 udb_base_set_userdata(u, 0); 2187 while(!udb_ptr_is_null(&t)) { 2188 /* store next in list so this one can be deleted or reused */ 2189 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 2190 udb_rptr_zero(&TASKLIST(&t)->next, u); 2191 2192 /* process task t */ 2193 /* append results for task t and update last_task */ 2194 task_process_in_reload(nsd, u, last_task, &t); 2195 2196 /* go to next */ 2197 udb_ptr_set_ptr(&t, u, &next); 2198 2199 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2200 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2201 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2202 if(cmd == NSD_QUIT) { 2203 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2204 /* sync to disk (if needed) */ 2205 udb_base_sync(nsd->db->udb, 0); 2206 /* unlink files of remainder of tasks */ 2207 while(!udb_ptr_is_null(&t)) { 2208 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2209 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2210 } 2211 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2212 } 2213 udb_ptr_unlink(&t, u); 2214 udb_ptr_unlink(&next, u); 2215 exit(0); 2216 } 2217 } 2218 2219 } 2220 udb_ptr_unlink(&t, u); 2221 udb_ptr_unlink(&next, u); 2222 } 2223 2224 #ifdef BIND8_STATS 2225 static void 2226 parent_send_stats(struct nsd* nsd, int cmdfd) 2227 { 2228 size_t i; 2229 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2230 log_msg(LOG_ERR, "could not write stats to reload"); 2231 return; 2232 } 2233 for(i=0; i<nsd->child_count; i++) 2234 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2235 sizeof(stc_type))) { 2236 log_msg(LOG_ERR, "could not write stats to reload"); 2237 return; 2238 } 2239 } 2240 2241 static void 2242 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2243 { 2244 struct nsdst s; 2245 stc_type* p; 2246 size_t i; 2247 if(block_read(nsd, cmdfd, &s, sizeof(s), 2248 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2249 log_msg(LOG_ERR, "could not read stats from oldpar"); 2250 return; 2251 } 2252 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2253 s.db_mem = region_get_mem(nsd->db->region); 2254 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2255 nsd->child_count); 2256 if(!p) return; 2257 for(i=0; i<nsd->child_count; i++) { 2258 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2259 sizeof(stc_type)) 2260 return; 2261 } 2262 } 2263 #endif /* BIND8_STATS */ 2264 2265 void server_verify(struct nsd *nsd, int cmdsocket); 2266 2267 /* 2268 * Reload the database, stop parent, re-fork children and continue. 2269 * as server_main. 2270 */ 2271 static void 2272 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2273 int cmdsocket) 2274 { 2275 pid_t mypid; 2276 sig_atomic_t cmd = NSD_QUIT_SYNC; 2277 int ret; 2278 udb_ptr last_task; 2279 struct sigaction old_sigchld, ign_sigchld; 2280 struct radnode* node; 2281 zone_type* zone; 2282 enum soainfo_hint hint; 2283 /* ignore SIGCHLD from the previous server_main that used this pid */ 2284 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2285 ign_sigchld.sa_handler = SIG_IGN; 2286 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2287 2288 #ifdef HAVE_SETPROCTITLE 2289 setproctitle("main"); 2290 #endif 2291 #ifdef HAVE_CPUSET_T 2292 if(nsd->use_cpu_affinity) { 2293 set_cpu_affinity(nsd->cpuset); 2294 } 2295 #endif 2296 2297 /* see what tasks we got from xfrd */ 2298 task_remap(nsd->task[nsd->mytask]); 2299 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2300 udb_compact_inhibited(nsd->db->udb, 1); 2301 reload_process_tasks(nsd, &last_task, cmdsocket); 2302 udb_compact_inhibited(nsd->db->udb, 0); 2303 udb_compact(nsd->db->udb); 2304 2305 #ifndef NDEBUG 2306 if(nsd_debug_level >= 1) 2307 region_log_stats(nsd->db->region); 2308 #endif /* NDEBUG */ 2309 /* sync to disk (if needed) */ 2310 udb_base_sync(nsd->db->udb, 0); 2311 2312 initialize_dname_compression_tables(nsd); 2313 2314 #ifdef BIND8_STATS 2315 /* Restart dumping stats if required. */ 2316 time(&nsd->st.boot); 2317 set_bind8_alarm(nsd); 2318 #endif 2319 #ifdef USE_ZONE_STATS 2320 server_zonestat_realloc(nsd); /* realloc for new children */ 2321 server_zonestat_switch(nsd); 2322 #endif 2323 2324 if(nsd->options->verify_enable) { 2325 #ifdef RATELIMIT 2326 /* allocate resources for rate limiting. use a slot that is guaranteed 2327 not mapped to a file so no persistent data is overwritten */ 2328 rrl_init(nsd->child_count + 1); 2329 #endif 2330 2331 /* spin-up server and execute verifiers for each zone */ 2332 server_verify(nsd, cmdsocket); 2333 #ifdef RATELIMIT 2334 /* deallocate rate limiting resources */ 2335 rrl_deinit(nsd->child_count + 1); 2336 #endif 2337 } 2338 2339 for(node = radix_first(nsd->db->zonetree); 2340 node != NULL; 2341 node = radix_next(node)) 2342 { 2343 zone = (zone_type *)node->elem; 2344 if(zone->is_updated) { 2345 if(zone->is_bad) { 2346 nsd->mode = NSD_RELOAD_FAILED; 2347 hint = soainfo_bad; 2348 } else { 2349 hint = soainfo_ok; 2350 } 2351 /* update(s), verified or not, possibly with subsequent 2352 skipped update(s). skipped update(s) are picked up 2353 by failed update check in xfrd */ 2354 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2355 zone, hint); 2356 } else if(zone->is_skipped) { 2357 /* corrupt or inconsistent update without preceding 2358 update(s), communicate soainfo_gone */ 2359 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2360 zone, soainfo_gone); 2361 } 2362 zone->is_updated = 0; 2363 zone->is_skipped = 0; 2364 } 2365 2366 if(nsd->mode == NSD_RELOAD_FAILED) { 2367 exit(NSD_RELOAD_FAILED); 2368 } 2369 2370 /* listen for the signals of failed children again */ 2371 sigaction(SIGCHLD, &old_sigchld, NULL); 2372 #ifdef USE_DNSTAP 2373 if (nsd->dt_collector) { 2374 int *swap_fd_send; 2375 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes")); 2376 /* Swap fd_send with fd_swap so old serve child and new serve 2377 * childs will not write to the same pipe ends simultaneously */ 2378 swap_fd_send = nsd->dt_collector_fd_send; 2379 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap; 2380 nsd->dt_collector_fd_swap = swap_fd_send; 2381 2382 } 2383 #endif 2384 /* Start new child processes */ 2385 if (server_start_children(nsd, server_region, netio, &nsd-> 2386 xfrd_listener->fd) != 0) { 2387 send_children_quit(nsd); 2388 exit(1); 2389 } 2390 2391 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2392 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2393 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2394 if(cmd == NSD_QUIT) { 2395 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2396 send_children_quit(nsd); 2397 exit(0); 2398 } 2399 } 2400 2401 /* Send quit command to parent: blocking, wait for receipt. */ 2402 do { 2403 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2404 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2405 { 2406 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2407 strerror(errno)); 2408 } 2409 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2410 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2411 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2412 RELOAD_SYNC_TIMEOUT); 2413 if(ret == -2) { 2414 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2415 } 2416 } while (ret == -2); 2417 if(ret == -1) { 2418 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2419 strerror(errno)); 2420 } 2421 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2422 if(cmd == NSD_QUIT) { 2423 /* small race condition possible here, parent got quit cmd. */ 2424 send_children_quit(nsd); 2425 exit(1); 2426 } 2427 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2428 #ifdef BIND8_STATS 2429 reload_do_stats(cmdsocket, nsd, &last_task); 2430 #endif 2431 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2432 task_process_sync(nsd->task[nsd->mytask]); 2433 #ifdef USE_ZONE_STATS 2434 server_zonestat_realloc(nsd); /* realloc for next children */ 2435 #endif 2436 2437 /* send soainfo to the xfrd process, signal it that reload is done, 2438 * it picks up the taskudb */ 2439 cmd = NSD_RELOAD_DONE; 2440 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2441 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2442 strerror(errno)); 2443 } 2444 mypid = getpid(); 2445 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2446 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2447 strerror(errno)); 2448 } 2449 2450 /* try to reopen file */ 2451 if (nsd->file_rotation_ok) 2452 log_reopen(nsd->log_filename, 1); 2453 /* exit reload, continue as new server_main */ 2454 } 2455 2456 /* 2457 * Get the mode depending on the signal hints that have been received. 2458 * Multiple signal hints can be received and will be handled in turn. 2459 */ 2460 static sig_atomic_t 2461 server_signal_mode(struct nsd *nsd) 2462 { 2463 if(nsd->signal_hint_quit) { 2464 nsd->signal_hint_quit = 0; 2465 return NSD_QUIT; 2466 } 2467 else if(nsd->signal_hint_shutdown) { 2468 nsd->signal_hint_shutdown = 0; 2469 return NSD_SHUTDOWN; 2470 } 2471 else if(nsd->signal_hint_child) { 2472 nsd->signal_hint_child = 0; 2473 return NSD_REAP_CHILDREN; 2474 } 2475 else if(nsd->signal_hint_reload) { 2476 nsd->signal_hint_reload = 0; 2477 return NSD_RELOAD; 2478 } 2479 else if(nsd->signal_hint_reload_hup) { 2480 nsd->signal_hint_reload_hup = 0; 2481 return NSD_RELOAD_REQ; 2482 } 2483 else if(nsd->signal_hint_stats) { 2484 nsd->signal_hint_stats = 0; 2485 #ifdef BIND8_STATS 2486 set_bind8_alarm(nsd); 2487 #endif 2488 return NSD_STATS; 2489 } 2490 else if(nsd->signal_hint_statsusr) { 2491 nsd->signal_hint_statsusr = 0; 2492 return NSD_STATS; 2493 } 2494 return NSD_RUN; 2495 } 2496 2497 /* 2498 * The main server simply waits for signals and child processes to 2499 * terminate. Child processes are restarted as necessary. 2500 */ 2501 void 2502 server_main(struct nsd *nsd) 2503 { 2504 region_type *server_region = region_create(xalloc, free); 2505 netio_type *netio = netio_create(server_region); 2506 netio_handler_type reload_listener; 2507 int reload_sockets[2] = {-1, -1}; 2508 struct timespec timeout_spec; 2509 int status; 2510 pid_t child_pid; 2511 pid_t reload_pid = -1; 2512 sig_atomic_t mode; 2513 2514 /* Ensure we are the main process */ 2515 assert(nsd->server_kind == NSD_SERVER_MAIN); 2516 2517 /* Add listener for the XFRD process */ 2518 netio_add_handler(netio, nsd->xfrd_listener); 2519 2520 /* Start the child processes that handle incoming queries */ 2521 if (server_start_children(nsd, server_region, netio, 2522 &nsd->xfrd_listener->fd) != 0) { 2523 send_children_quit(nsd); 2524 exit(1); 2525 } 2526 reload_listener.fd = -1; 2527 2528 /* This_child MUST be 0, because this is the parent process */ 2529 assert(nsd->this_child == 0); 2530 2531 /* Run the server until we get a shutdown signal */ 2532 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2533 /* Did we receive a signal that changes our mode? */ 2534 if(mode == NSD_RUN) { 2535 nsd->mode = mode = server_signal_mode(nsd); 2536 } 2537 2538 switch (mode) { 2539 case NSD_RUN: 2540 /* see if any child processes terminated */ 2541 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2542 int is_child = delete_child_pid(nsd, child_pid); 2543 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2544 if(nsd->children[is_child].child_fd == -1) 2545 nsd->children[is_child].has_exited = 1; 2546 parent_check_all_children_exited(nsd); 2547 } else if(is_child != -1) { 2548 log_msg(LOG_WARNING, 2549 "server %d died unexpectedly with status %d, restarting", 2550 (int) child_pid, status); 2551 restart_child_servers(nsd, server_region, netio, 2552 &nsd->xfrd_listener->fd); 2553 } else if (child_pid == reload_pid) { 2554 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2555 pid_t mypid; 2556 log_msg(LOG_WARNING, 2557 "Reload process %d failed with status %d, continuing with old database", 2558 (int) child_pid, status); 2559 reload_pid = -1; 2560 if(reload_listener.fd != -1) close(reload_listener.fd); 2561 netio_remove_handler(netio, &reload_listener); 2562 reload_listener.fd = -1; 2563 reload_listener.event_types = NETIO_EVENT_NONE; 2564 task_process_sync(nsd->task[nsd->mytask]); 2565 /* inform xfrd reload attempt ended */ 2566 if(!write_socket(nsd->xfrd_listener->fd, 2567 &cmd, sizeof(cmd))) { 2568 log_msg(LOG_ERR, "problems " 2569 "sending SOAEND to xfrd: %s", 2570 strerror(errno)); 2571 } 2572 mypid = getpid(); 2573 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2574 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2575 strerror(errno)); 2576 } 2577 #ifdef USE_DNSTAP 2578 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) { 2579 log_msg(LOG_WARNING, 2580 "dnstap-collector %d terminated with status %d", 2581 (int) child_pid, status); 2582 if(nsd->dt_collector) { 2583 dt_collector_close(nsd->dt_collector, nsd); 2584 dt_collector_destroy(nsd->dt_collector, nsd); 2585 nsd->dt_collector = NULL; 2586 } 2587 /* Only respawn a crashed (or exited) 2588 * dnstap-collector when not reloading, 2589 * to not induce a reload during a 2590 * reload (which would seriously 2591 * disrupt nsd procedures and lead to 2592 * unpredictable results)! 2593 * 2594 * This will *leave* a dnstap-collector 2595 * process terminated, but because 2596 * signalling of the reload process to 2597 * the main process to respawn in this 2598 * situation will be cumbersome, and 2599 * because this situation is so 2600 * specific (and therefore hopefully 2601 * extremely rare or non-existing at 2602 * all), plus the fact that we are left 2603 * with a perfectly function NSD 2604 * (besides not logging dnstap 2605 * messages), I consider it acceptable 2606 * to leave this unresolved. 2607 */ 2608 if(reload_pid == -1 && nsd->options->dnstap_enable) { 2609 nsd->dt_collector = dt_collector_create(nsd); 2610 dt_collector_start(nsd->dt_collector, nsd); 2611 nsd->mode = NSD_RELOAD_REQ; 2612 } 2613 #endif 2614 } else if(status != 0) { 2615 /* check for status, because we get 2616 * the old-servermain because reload 2617 * is the process-parent of old-main, 2618 * and we get older server-processes 2619 * that are exiting after a reload */ 2620 log_msg(LOG_WARNING, 2621 "process %d terminated with status %d", 2622 (int) child_pid, status); 2623 } 2624 } 2625 if (child_pid == -1) { 2626 if (errno == EINTR) { 2627 continue; 2628 } 2629 if (errno != ECHILD) 2630 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2631 } 2632 if (nsd->mode != NSD_RUN) 2633 break; 2634 2635 /* timeout to collect processes. In case no sigchild happens. */ 2636 timeout_spec.tv_sec = 60; 2637 timeout_spec.tv_nsec = 0; 2638 2639 /* listen on ports, timeout for collecting terminated children */ 2640 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2641 if (errno != EINTR) { 2642 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2643 } 2644 } 2645 if(nsd->restart_children) { 2646 restart_child_servers(nsd, server_region, netio, 2647 &nsd->xfrd_listener->fd); 2648 nsd->restart_children = 0; 2649 } 2650 if(nsd->reload_failed) { 2651 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2652 pid_t mypid; 2653 nsd->reload_failed = 0; 2654 log_msg(LOG_WARNING, 2655 "Reload process %d failed, continuing with old database", 2656 (int) reload_pid); 2657 reload_pid = -1; 2658 if(reload_listener.fd != -1) close(reload_listener.fd); 2659 netio_remove_handler(netio, &reload_listener); 2660 reload_listener.fd = -1; 2661 reload_listener.event_types = NETIO_EVENT_NONE; 2662 task_process_sync(nsd->task[nsd->mytask]); 2663 /* inform xfrd reload attempt ended */ 2664 if(!write_socket(nsd->xfrd_listener->fd, 2665 &cmd, sizeof(cmd))) { 2666 log_msg(LOG_ERR, "problems " 2667 "sending SOAEND to xfrd: %s", 2668 strerror(errno)); 2669 } 2670 mypid = getpid(); 2671 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2672 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2673 strerror(errno)); 2674 } 2675 } 2676 2677 break; 2678 case NSD_RELOAD_REQ: { 2679 sig_atomic_t cmd = NSD_RELOAD_REQ; 2680 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2681 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2682 "main: ipc send reload_req to xfrd")); 2683 if(!write_socket(nsd->xfrd_listener->fd, 2684 &cmd, sizeof(cmd))) { 2685 log_msg(LOG_ERR, "server_main: could not send " 2686 "reload_req to xfrd: %s", strerror(errno)); 2687 } 2688 nsd->mode = NSD_RUN; 2689 } break; 2690 case NSD_RELOAD: 2691 /* Continue to run nsd after reload */ 2692 nsd->mode = NSD_RUN; 2693 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2694 if (reload_pid != -1) { 2695 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2696 (int) reload_pid); 2697 break; 2698 } 2699 2700 /* switch the mytask to keep track of who owns task*/ 2701 nsd->mytask = 1 - nsd->mytask; 2702 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2703 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2704 reload_pid = -1; 2705 break; 2706 } 2707 2708 /* Do actual reload */ 2709 reload_pid = fork(); 2710 switch (reload_pid) { 2711 case -1: 2712 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2713 break; 2714 default: 2715 /* PARENT */ 2716 close(reload_sockets[0]); 2717 server_reload(nsd, server_region, netio, 2718 reload_sockets[1]); 2719 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2720 close(reload_sockets[1]); 2721 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2722 /* drop stale xfrd ipc data */ 2723 ((struct ipc_handler_conn_data*)nsd-> 2724 xfrd_listener->user_data) 2725 ->conn->is_reading = 0; 2726 reload_pid = -1; 2727 reload_listener.fd = -1; 2728 reload_listener.event_types = NETIO_EVENT_NONE; 2729 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2730 break; 2731 case 0: 2732 /* CHILD */ 2733 /* server_main keep running until NSD_QUIT_SYNC 2734 * received from reload. */ 2735 close(reload_sockets[1]); 2736 reload_listener.fd = reload_sockets[0]; 2737 reload_listener.timeout = NULL; 2738 reload_listener.user_data = nsd; 2739 reload_listener.event_types = NETIO_EVENT_READ; 2740 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2741 netio_add_handler(netio, &reload_listener); 2742 reload_pid = getppid(); 2743 break; 2744 } 2745 break; 2746 case NSD_QUIT_SYNC: 2747 /* synchronisation of xfrd, parent and reload */ 2748 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2749 sig_atomic_t cmd = NSD_RELOAD; 2750 /* stop xfrd ipc writes in progress */ 2751 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2752 "main: ipc send indication reload")); 2753 if(!write_socket(nsd->xfrd_listener->fd, 2754 &cmd, sizeof(cmd))) { 2755 log_msg(LOG_ERR, "server_main: could not send reload " 2756 "indication to xfrd: %s", strerror(errno)); 2757 } 2758 /* wait for ACK from xfrd */ 2759 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2760 nsd->quit_sync_done = 1; 2761 } 2762 nsd->mode = NSD_RUN; 2763 break; 2764 case NSD_QUIT: 2765 /* silent shutdown during reload */ 2766 if(reload_listener.fd != -1) { 2767 /* acknowledge the quit, to sync reload that we will really quit now */ 2768 sig_atomic_t cmd = NSD_RELOAD; 2769 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2770 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2771 log_msg(LOG_ERR, "server_main: " 2772 "could not ack quit: %s", strerror(errno)); 2773 } 2774 #ifdef BIND8_STATS 2775 parent_send_stats(nsd, reload_listener.fd); 2776 #endif /* BIND8_STATS */ 2777 close(reload_listener.fd); 2778 } 2779 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2780 /* only quit children after xfrd has acked */ 2781 send_children_quit(nsd); 2782 2783 #ifdef MEMCLEAN /* OS collects memory pages */ 2784 region_destroy(server_region); 2785 #endif 2786 server_shutdown(nsd); 2787 2788 /* ENOTREACH */ 2789 break; 2790 case NSD_SHUTDOWN: 2791 break; 2792 case NSD_REAP_CHILDREN: 2793 /* continue; wait for child in run loop */ 2794 nsd->mode = NSD_RUN; 2795 break; 2796 case NSD_STATS: 2797 #ifdef BIND8_STATS 2798 set_children_stats(nsd); 2799 #endif 2800 nsd->mode = NSD_RUN; 2801 break; 2802 default: 2803 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2804 nsd->mode = NSD_RUN; 2805 break; 2806 } 2807 } 2808 log_msg(LOG_WARNING, "signal received, shutting down..."); 2809 2810 /* close opened ports to avoid race with restart of nsd */ 2811 server_close_all_sockets(nsd->udp, nsd->ifs); 2812 server_close_all_sockets(nsd->tcp, nsd->ifs); 2813 #ifdef HAVE_SSL 2814 daemon_remote_close(nsd->rc); 2815 #endif 2816 send_children_quit_and_wait(nsd); 2817 2818 /* Unlink it if possible... */ 2819 unlinkpid(nsd->pidfile); 2820 unlink(nsd->task[0]->fname); 2821 unlink(nsd->task[1]->fname); 2822 #ifdef USE_ZONE_STATS 2823 unlink(nsd->zonestatfname[0]); 2824 unlink(nsd->zonestatfname[1]); 2825 #endif 2826 #ifdef USE_DNSTAP 2827 dt_collector_close(nsd->dt_collector, nsd); 2828 #endif 2829 2830 if(reload_listener.fd != -1) { 2831 sig_atomic_t cmd = NSD_QUIT; 2832 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2833 "main: ipc send quit to reload-process")); 2834 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2835 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2836 strerror(errno)); 2837 } 2838 fsync(reload_listener.fd); 2839 close(reload_listener.fd); 2840 /* wait for reload to finish processing */ 2841 while(1) { 2842 if(waitpid(reload_pid, NULL, 0) == -1) { 2843 if(errno == EINTR) continue; 2844 if(errno == ECHILD) break; 2845 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2846 (int)reload_pid, strerror(errno)); 2847 } 2848 break; 2849 } 2850 } 2851 if(nsd->xfrd_listener->fd != -1) { 2852 /* complete quit, stop xfrd */ 2853 sig_atomic_t cmd = NSD_QUIT; 2854 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2855 "main: ipc send quit to xfrd")); 2856 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2857 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2858 strerror(errno)); 2859 } 2860 fsync(nsd->xfrd_listener->fd); 2861 close(nsd->xfrd_listener->fd); 2862 (void)kill(nsd->pid, SIGTERM); 2863 } 2864 2865 #ifdef MEMCLEAN /* OS collects memory pages */ 2866 region_destroy(server_region); 2867 #endif 2868 /* write the nsd.db to disk, wait for it to complete */ 2869 udb_base_sync(nsd->db->udb, 1); 2870 udb_base_close(nsd->db->udb); 2871 server_shutdown(nsd); 2872 } 2873 2874 static query_state_type 2875 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p) 2876 { 2877 return query_process(query, nsd, now_p); 2878 } 2879 2880 static query_state_type 2881 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p) 2882 { 2883 #ifdef RATELIMIT 2884 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) { 2885 if(query->edns.cookie_status != COOKIE_VALID 2886 && query->edns.cookie_status != COOKIE_VALID_REUSE 2887 && rrl_process_query(query)) 2888 return rrl_slip(query); 2889 else return QUERY_PROCESSED; 2890 } 2891 return QUERY_DISCARDED; 2892 #else 2893 return query_process(query, nsd, now_p); 2894 #endif 2895 } 2896 2897 const char* 2898 nsd_event_vs(void) 2899 { 2900 #ifdef USE_MINI_EVENT 2901 return ""; 2902 #else 2903 return event_get_version(); 2904 #endif 2905 } 2906 2907 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS) 2908 static const char* ub_ev_backend2str(int b) 2909 { 2910 switch(b) { 2911 case EVBACKEND_SELECT: return "select"; 2912 case EVBACKEND_POLL: return "poll"; 2913 case EVBACKEND_EPOLL: return "epoll"; 2914 case EVBACKEND_KQUEUE: return "kqueue"; 2915 case EVBACKEND_DEVPOLL: return "devpoll"; 2916 case EVBACKEND_PORT: return "evport"; 2917 } 2918 return "unknown"; 2919 } 2920 #endif 2921 2922 const char* 2923 nsd_event_method(void) 2924 { 2925 #ifdef USE_MINI_EVENT 2926 return "select"; 2927 #else 2928 struct event_base* b = nsd_child_event_base(); 2929 const char* m = "?"; 2930 # ifdef EV_FEATURE_BACKENDS 2931 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b)); 2932 # elif defined(HAVE_EVENT_BASE_GET_METHOD) 2933 m = event_base_get_method(b); 2934 # endif 2935 # ifdef MEMCLEAN 2936 event_base_free(b); 2937 # endif 2938 return m; 2939 #endif 2940 } 2941 2942 struct event_base* 2943 nsd_child_event_base(void) 2944 { 2945 struct event_base* base; 2946 #ifdef USE_MINI_EVENT 2947 static time_t secs; 2948 static struct timeval now; 2949 base = event_init(&secs, &now); 2950 #else 2951 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2952 /* libev */ 2953 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2954 # else 2955 /* libevent */ 2956 # ifdef HAVE_EVENT_BASE_NEW 2957 base = event_base_new(); 2958 # else 2959 base = event_init(); 2960 # endif 2961 # endif 2962 #endif 2963 return base; 2964 } 2965 2966 static void 2967 add_udp_handler( 2968 struct nsd *nsd, 2969 struct nsd_socket *sock, 2970 struct udp_handler_data *data) 2971 { 2972 struct event *handler = &data->event; 2973 2974 data->nsd = nsd; 2975 data->socket = sock; 2976 2977 memset(handler, 0, sizeof(*handler)); 2978 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2979 if(event_base_set(nsd->event_base, handler) != 0) 2980 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2981 if(event_add(handler, NULL) != 0) 2982 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2983 } 2984 2985 void 2986 add_tcp_handler( 2987 struct nsd *nsd, 2988 struct nsd_socket *sock, 2989 struct tcp_accept_handler_data *data) 2990 { 2991 struct event *handler = &data->event; 2992 2993 data->nsd = nsd; 2994 data->socket = sock; 2995 2996 #ifdef HAVE_SSL 2997 if (nsd->tls_ctx && 2998 nsd->options->tls_port && 2999 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 3000 { 3001 data->tls_accept = 1; 3002 if(verbosity >= 2) { 3003 char buf[48]; 3004 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 3005 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 3006 } 3007 } else { 3008 data->tls_accept = 0; 3009 } 3010 #endif 3011 3012 memset(handler, 0, sizeof(*handler)); 3013 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 3014 if(event_base_set(nsd->event_base, handler) != 0) 3015 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 3016 if(event_add(handler, NULL) != 0) 3017 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 3018 data->event_added = 1; 3019 } 3020 3021 /* 3022 * Serve DNS request to verifiers (short-lived) 3023 */ 3024 void server_verify(struct nsd *nsd, int cmdsocket) 3025 { 3026 size_t size = 0; 3027 struct event cmd_event, signal_event, exit_event; 3028 struct zone *zone; 3029 3030 assert(nsd != NULL); 3031 3032 zone = verify_next_zone(nsd, NULL); 3033 if(zone == NULL) 3034 return; 3035 3036 nsd->server_region = region_create(xalloc, free); 3037 nsd->event_base = nsd_child_event_base(); 3038 3039 nsd->next_zone_to_verify = zone; 3040 nsd->verifier_count = 0; 3041 nsd->verifier_limit = nsd->options->verifier_count; 3042 size = sizeof(struct verifier) * nsd->verifier_limit; 3043 pipe(nsd->verifier_pipe); 3044 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC); 3045 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC); 3046 nsd->verifiers = region_alloc_zero(nsd->server_region, size); 3047 3048 for(size_t i = 0; i < nsd->verifier_limit; i++) { 3049 nsd->verifiers[i].nsd = nsd; 3050 nsd->verifiers[i].zone = NULL; 3051 nsd->verifiers[i].pid = -1; 3052 nsd->verifiers[i].output_stream.fd = -1; 3053 nsd->verifiers[i].output_stream.priority = LOG_INFO; 3054 nsd->verifiers[i].error_stream.fd = -1; 3055 nsd->verifiers[i].error_stream.priority = LOG_ERR; 3056 } 3057 3058 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd); 3059 if(event_base_set(nsd->event_base, &cmd_event) != 0 || 3060 event_add(&cmd_event, NULL) != 0) 3061 { 3062 log_msg(LOG_ERR, "verify: could not add command event"); 3063 goto fail; 3064 } 3065 3066 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd); 3067 if(event_base_set(nsd->event_base, &signal_event) != 0 || 3068 signal_add(&signal_event, NULL) != 0) 3069 { 3070 log_msg(LOG_ERR, "verify: could not add signal event"); 3071 goto fail; 3072 } 3073 3074 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd); 3075 if(event_base_set(nsd->event_base, &exit_event) != 0 || 3076 event_add(&exit_event, NULL) != 0) 3077 { 3078 log_msg(LOG_ERR, "verify: could not add exit event"); 3079 goto fail; 3080 } 3081 3082 memset(msgs, 0, sizeof(msgs)); 3083 for (int i = 0; i < NUM_RECV_PER_SELECT; i++) { 3084 queries[i] = query_create(nsd->server_region, 3085 compressed_dname_offsets, 3086 compression_table_size, compressed_dnames); 3087 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3088 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3089 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3090 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3091 msgs[i].msg_hdr.msg_iovlen = 1; 3092 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3093 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3094 } 3095 3096 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3097 struct udp_handler_data *data; 3098 data = region_alloc_zero( 3099 nsd->server_region, sizeof(*data)); 3100 add_udp_handler(nsd, &nsd->verify_udp[i], data); 3101 } 3102 3103 tcp_accept_handler_count = nsd->verify_ifs; 3104 tcp_accept_handlers = region_alloc_array(nsd->server_region, 3105 nsd->verify_ifs, sizeof(*tcp_accept_handlers)); 3106 3107 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3108 struct tcp_accept_handler_data *data; 3109 data = &tcp_accept_handlers[i]; 3110 memset(data, 0, sizeof(*data)); 3111 add_tcp_handler(nsd, &nsd->verify_tcp[i], data); 3112 } 3113 3114 while(nsd->next_zone_to_verify != NULL && 3115 nsd->verifier_count < nsd->verifier_limit) 3116 { 3117 verify_zone(nsd, nsd->next_zone_to_verify); 3118 nsd->next_zone_to_verify 3119 = verify_next_zone(nsd, nsd->next_zone_to_verify); 3120 } 3121 3122 /* short-lived main loop */ 3123 event_base_dispatch(nsd->event_base); 3124 3125 /* remove command and exit event handlers */ 3126 event_del(&exit_event); 3127 event_del(&signal_event); 3128 event_del(&cmd_event); 3129 3130 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT); 3131 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT); 3132 fail: 3133 event_base_free(nsd->event_base); 3134 close(nsd->verifier_pipe[0]); 3135 close(nsd->verifier_pipe[1]); 3136 region_destroy(nsd->server_region); 3137 3138 nsd->event_base = NULL; 3139 nsd->server_region = NULL; 3140 nsd->verifier_limit = 0; 3141 nsd->verifier_pipe[0] = -1; 3142 nsd->verifier_pipe[1] = -1; 3143 nsd->verifiers = NULL; 3144 } 3145 3146 /* 3147 * Serve DNS requests. 3148 */ 3149 void 3150 server_child(struct nsd *nsd) 3151 { 3152 size_t i, from, numifs; 3153 region_type *server_region = region_create(xalloc, free); 3154 struct event_base* event_base = nsd_child_event_base(); 3155 sig_atomic_t mode; 3156 3157 if(!event_base) { 3158 log_msg(LOG_ERR, "nsd server could not create event base"); 3159 exit(1); 3160 } 3161 nsd->event_base = event_base; 3162 nsd->server_region = server_region; 3163 3164 #ifdef RATELIMIT 3165 rrl_init(nsd->this_child->child_num); 3166 #endif 3167 3168 assert(nsd->server_kind != NSD_SERVER_MAIN); 3169 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 3170 3171 #ifdef HAVE_SETPROCTITLE 3172 setproctitle("server %d", nsd->this_child->child_num + 1); 3173 #endif 3174 #ifdef HAVE_CPUSET_T 3175 if(nsd->use_cpu_affinity) { 3176 set_cpu_affinity(nsd->this_child->cpuset); 3177 } 3178 #endif 3179 3180 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 3181 server_close_all_sockets(nsd->tcp, nsd->ifs); 3182 } 3183 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 3184 server_close_all_sockets(nsd->udp, nsd->ifs); 3185 } 3186 3187 if (nsd->this_child->parent_fd != -1) { 3188 struct event *handler; 3189 struct ipc_handler_conn_data* user_data = 3190 (struct ipc_handler_conn_data*)region_alloc( 3191 server_region, sizeof(struct ipc_handler_conn_data)); 3192 user_data->nsd = nsd; 3193 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 3194 3195 handler = (struct event*) region_alloc( 3196 server_region, sizeof(*handler)); 3197 memset(handler, 0, sizeof(*handler)); 3198 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 3199 EV_READ, child_handle_parent_command, user_data); 3200 if(event_base_set(event_base, handler) != 0) 3201 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 3202 if(event_add(handler, NULL) != 0) 3203 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 3204 } 3205 3206 if(nsd->reuseport) { 3207 numifs = nsd->ifs / nsd->reuseport; 3208 from = numifs * nsd->this_child->child_num; 3209 if(from+numifs > nsd->ifs) { /* should not happen */ 3210 from = 0; 3211 numifs = nsd->ifs; 3212 } 3213 } else { 3214 from = 0; 3215 numifs = nsd->ifs; 3216 } 3217 3218 if (nsd->server_kind & NSD_SERVER_UDP) { 3219 int child = nsd->this_child->child_num; 3220 memset(msgs, 0, sizeof(msgs)); 3221 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3222 queries[i] = query_create(server_region, 3223 compressed_dname_offsets, 3224 compression_table_size, compressed_dnames); 3225 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3226 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3227 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3228 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3229 msgs[i].msg_hdr.msg_iovlen = 1; 3230 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3231 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3232 } 3233 3234 for (i = 0; i < nsd->ifs; i++) { 3235 int listen; 3236 struct udp_handler_data *data; 3237 3238 listen = nsd_bitset_isset(nsd->udp[i].servers, child); 3239 3240 if(i >= from && i < (from + numifs) && listen) { 3241 data = region_alloc_zero( 3242 nsd->server_region, sizeof(*data)); 3243 add_udp_handler(nsd, &nsd->udp[i], data); 3244 } else { 3245 /* close sockets intended for other servers */ 3246 server_close_socket(&nsd->udp[i]); 3247 } 3248 } 3249 } 3250 3251 /* 3252 * Keep track of all the TCP accept handlers so we can enable 3253 * and disable them based on the current number of active TCP 3254 * connections. 3255 */ 3256 if (nsd->server_kind & NSD_SERVER_TCP) { 3257 int child = nsd->this_child->child_num; 3258 tcp_accept_handler_count = numifs; 3259 tcp_accept_handlers = region_alloc_array(server_region, 3260 numifs, sizeof(*tcp_accept_handlers)); 3261 3262 for (i = 0; i < nsd->ifs; i++) { 3263 int listen; 3264 struct tcp_accept_handler_data *data; 3265 3266 listen = nsd_bitset_isset(nsd->tcp[i].servers, child); 3267 3268 if(i >= from && i < (from + numifs) && listen) { 3269 data = &tcp_accept_handlers[i-from]; 3270 memset(data, 0, sizeof(*data)); 3271 add_tcp_handler(nsd, &nsd->tcp[i], data); 3272 } else { 3273 /* close sockets intended for other servers */ 3274 /* 3275 * uncomment this once tcp servers are no 3276 * longer copied in the tcp fd copy line 3277 * in server_init(). 3278 server_close_socket(&nsd->tcp[i]); 3279 */ 3280 /* close sockets not meant for this server*/ 3281 if(!listen) 3282 server_close_socket(&nsd->tcp[i]); 3283 } 3284 } 3285 } else { 3286 tcp_accept_handler_count = 0; 3287 } 3288 3289 /* The main loop... */ 3290 while ((mode = nsd->mode) != NSD_QUIT) { 3291 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 3292 3293 /* Do we need to do the statistics... */ 3294 if (mode == NSD_STATS) { 3295 #ifdef BIND8_STATS 3296 int p = nsd->st.period; 3297 nsd->st.period = 1; /* force stats printout */ 3298 /* Dump the statistics */ 3299 bind8_stats(nsd); 3300 nsd->st.period = p; 3301 #else /* !BIND8_STATS */ 3302 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 3303 #endif /* BIND8_STATS */ 3304 3305 nsd->mode = NSD_RUN; 3306 } 3307 else if (mode == NSD_REAP_CHILDREN) { 3308 /* got signal, notify parent. parent reaps terminated children. */ 3309 if (nsd->this_child->parent_fd != -1) { 3310 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 3311 if (write(nsd->this_child->parent_fd, 3312 &parent_notify, 3313 sizeof(parent_notify)) == -1) 3314 { 3315 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 3316 (int) nsd->this_child->pid, strerror(errno)); 3317 } 3318 } else /* no parent, so reap 'em */ 3319 while (waitpid(-1, NULL, WNOHANG) > 0) ; 3320 nsd->mode = NSD_RUN; 3321 } 3322 else if(mode == NSD_RUN) { 3323 /* Wait for a query... */ 3324 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3325 if (errno != EINTR) { 3326 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3327 break; 3328 } 3329 } 3330 } else if(mode == NSD_QUIT) { 3331 /* ignore here, quit */ 3332 } else { 3333 log_msg(LOG_ERR, "mode bad value %d, back to service.", 3334 (int)mode); 3335 nsd->mode = NSD_RUN; 3336 } 3337 } 3338 3339 service_remaining_tcp(nsd); 3340 #ifdef BIND8_STATS 3341 bind8_stats(nsd); 3342 #endif /* BIND8_STATS */ 3343 3344 #ifdef MEMCLEAN /* OS collects memory pages */ 3345 #ifdef RATELIMIT 3346 rrl_deinit(nsd->this_child->child_num); 3347 #endif 3348 event_base_free(event_base); 3349 region_destroy(server_region); 3350 #endif 3351 server_shutdown(nsd); 3352 } 3353 3354 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 3355 { 3356 int* timed_out = (int*)arg; 3357 assert(event & EV_TIMEOUT); (void)event; 3358 /* wake up the service tcp thread, note event is no longer 3359 * registered */ 3360 *timed_out = 1; 3361 } 3362 3363 void 3364 service_remaining_tcp(struct nsd* nsd) 3365 { 3366 struct tcp_handler_data* p; 3367 struct event_base* event_base; 3368 /* check if it is needed */ 3369 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 3370 return; 3371 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 3372 #ifdef USE_DNSTAP 3373 /* remove dnstap collector, we cannot write there because the new 3374 * child process is using the file descriptor, or the child 3375 * process after that. */ 3376 dt_collector_destroy(nsd->dt_collector, nsd); 3377 nsd->dt_collector = NULL; 3378 #endif 3379 /* setup event base */ 3380 event_base = nsd_child_event_base(); 3381 if(!event_base) { 3382 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 3383 return; 3384 } 3385 /* register tcp connections */ 3386 for(p = tcp_active_list; p != NULL; p = p->next) { 3387 struct timeval timeout; 3388 int fd = p->event.ev_fd; 3389 #ifdef USE_MINI_EVENT 3390 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 3391 #else 3392 short event = p->event.ev_events & (EV_READ|EV_WRITE); 3393 #endif 3394 void (*fn)(int, short, void*); 3395 #ifdef HAVE_SSL 3396 if(p->tls) { 3397 if((event&EV_READ)) 3398 fn = handle_tls_reading; 3399 else fn = handle_tls_writing; 3400 } else { 3401 #endif 3402 if((event&EV_READ)) 3403 fn = handle_tcp_reading; 3404 else fn = handle_tcp_writing; 3405 #ifdef HAVE_SSL 3406 } 3407 #endif 3408 3409 p->tcp_no_more_queries = 1; 3410 /* set timeout to 1/10 second */ 3411 if(p->tcp_timeout > 100) 3412 p->tcp_timeout = 100; 3413 timeout.tv_sec = p->tcp_timeout / 1000; 3414 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 3415 event_del(&p->event); 3416 memset(&p->event, 0, sizeof(p->event)); 3417 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 3418 fn, p); 3419 if(event_base_set(event_base, &p->event) != 0) 3420 log_msg(LOG_ERR, "event base set failed"); 3421 if(event_add(&p->event, &timeout) != 0) 3422 log_msg(LOG_ERR, "event add failed"); 3423 } 3424 3425 /* handle it */ 3426 while(nsd->current_tcp_count > 0) { 3427 mode_t m = server_signal_mode(nsd); 3428 struct event timeout; 3429 struct timeval tv; 3430 int timed_out = 0; 3431 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 3432 m == NSD_REAP_CHILDREN) { 3433 /* quit */ 3434 break; 3435 } 3436 /* timer */ 3437 /* have to do something every second */ 3438 tv.tv_sec = 1; 3439 tv.tv_usec = 0; 3440 memset(&timeout, 0, sizeof(timeout)); 3441 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 3442 &timed_out); 3443 if(event_base_set(event_base, &timeout) != 0) 3444 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 3445 if(event_add(&timeout, &tv) != 0) 3446 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 3447 3448 /* service loop */ 3449 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3450 if (errno != EINTR) { 3451 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3452 break; 3453 } 3454 } 3455 if(!timed_out) { 3456 event_del(&timeout); 3457 } else { 3458 /* timed out, quit */ 3459 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 3460 break; 3461 } 3462 } 3463 #ifdef MEMCLEAN 3464 event_base_free(event_base); 3465 #endif 3466 /* continue to quit after return */ 3467 } 3468 3469 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 3470 * are always used, even if nonblocking operations are broken, in which case 3471 * NUM_RECV_PER_SELECT is defined to 1 (one). 3472 */ 3473 #if defined(HAVE_RECVMMSG) 3474 #define nsd_recvmmsg recvmmsg 3475 #else /* !HAVE_RECVMMSG */ 3476 3477 static int 3478 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 3479 int flags, struct timespec *timeout) 3480 { 3481 unsigned int vpos = 0; 3482 ssize_t rcvd; 3483 3484 /* timeout is ignored, ensure caller does not expect it to work */ 3485 assert(timeout == NULL); (void)timeout; 3486 3487 while(vpos < vlen) { 3488 rcvd = recvfrom(sockfd, 3489 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3490 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3491 flags, 3492 msgvec[vpos].msg_hdr.msg_name, 3493 &msgvec[vpos].msg_hdr.msg_namelen); 3494 if(rcvd < 0) { 3495 break; 3496 } else { 3497 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 3498 msgvec[vpos].msg_len = (unsigned int)rcvd; 3499 vpos++; 3500 } 3501 } 3502 3503 if(vpos) { 3504 /* error will be picked up next time */ 3505 return (int)vpos; 3506 } else if(errno == 0) { 3507 return 0; 3508 } else if(errno == EAGAIN) { 3509 return 0; 3510 } 3511 3512 return -1; 3513 } 3514 #endif /* HAVE_RECVMMSG */ 3515 3516 #ifdef HAVE_SENDMMSG 3517 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 3518 #else /* !HAVE_SENDMMSG */ 3519 3520 static int 3521 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 3522 { 3523 unsigned int vpos = 0; 3524 ssize_t snd; 3525 3526 while(vpos < vlen) { 3527 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3528 snd = sendto(sockfd, 3529 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3530 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3531 flags, 3532 msgvec[vpos].msg_hdr.msg_name, 3533 msgvec[vpos].msg_hdr.msg_namelen); 3534 if(snd < 0) { 3535 break; 3536 } else { 3537 msgvec[vpos].msg_len = (unsigned int)snd; 3538 vpos++; 3539 } 3540 } 3541 3542 if(vpos) { 3543 return (int)vpos; 3544 } else if(errno == 0) { 3545 return 0; 3546 } 3547 3548 return -1; 3549 } 3550 #endif /* HAVE_SENDMMSG */ 3551 3552 static int 3553 port_is_zero( 3554 #ifdef INET6 3555 struct sockaddr_storage *addr 3556 #else 3557 struct sockaddr_in *addr 3558 #endif 3559 ) 3560 { 3561 #ifdef INET6 3562 if(addr->ss_family == AF_INET6) { 3563 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0; 3564 } else if(addr->ss_family == AF_INET) { 3565 return (((struct sockaddr_in *)addr)->sin_port) == 0; 3566 } 3567 return 0; 3568 #else 3569 if(addr->sin_family == AF_INET) { 3570 return addr->sin_port == 0; 3571 } 3572 return 0; 3573 #endif 3574 } 3575 3576 static void 3577 handle_udp(int fd, short event, void* arg) 3578 { 3579 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3580 int received, sent, recvcount, i; 3581 struct query *q; 3582 uint32_t now = 0; 3583 3584 if (!(event & EV_READ)) { 3585 return; 3586 } 3587 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3588 /* this printf strangely gave a performance increase on Linux */ 3589 /* printf("recvcount %d \n", recvcount); */ 3590 if (recvcount == -1) { 3591 if (errno != EAGAIN && errno != EINTR) { 3592 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3593 STATUP(data->nsd, rxerr); 3594 /* No zone statup */ 3595 } 3596 /* Simply no data available */ 3597 return; 3598 } 3599 for (i = 0; i < recvcount; i++) { 3600 loopstart: 3601 received = msgs[i].msg_len; 3602 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3603 q = queries[i]; 3604 if (received == -1) { 3605 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3606 #if defined(HAVE_RECVMMSG) 3607 msgs[i].msg_hdr.msg_flags 3608 #else 3609 errno 3610 #endif 3611 )); 3612 STATUP(data->nsd, rxerr); 3613 /* No zone statup */ 3614 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3615 iovecs[i].iov_len = buffer_remaining(q->packet); 3616 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3617 goto swap_drop; 3618 } 3619 3620 /* Account... */ 3621 #ifdef BIND8_STATS 3622 if (data->socket->addr.ai_family == AF_INET) { 3623 STATUP(data->nsd, qudp); 3624 } else if (data->socket->addr.ai_family == AF_INET6) { 3625 STATUP(data->nsd, qudp6); 3626 } 3627 #endif 3628 3629 buffer_skip(q->packet, received); 3630 buffer_flip(q->packet); 3631 #ifdef USE_DNSTAP 3632 /* 3633 * sending UDP-query with server address (local) and client address to dnstap process 3634 */ 3635 log_addr("query from client", &q->addr); 3636 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3637 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->addr, q->addrlen, 3638 q->tcp, q->packet); 3639 #endif /* USE_DNSTAP */ 3640 3641 /* Process and answer the query... */ 3642 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) { 3643 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3644 STATUP(data->nsd, nona); 3645 ZTATUP(data->nsd, q->zone, nona); 3646 } 3647 3648 #ifdef USE_ZONE_STATS 3649 if (data->socket->addr.ai_family == AF_INET) { 3650 ZTATUP(data->nsd, q->zone, qudp); 3651 } else if (data->socket->addr.ai_family == AF_INET6) { 3652 ZTATUP(data->nsd, q->zone, qudp6); 3653 } 3654 #endif 3655 3656 /* Add EDNS0 and TSIG info if necessary. */ 3657 query_add_optional(q, data->nsd, &now); 3658 3659 buffer_flip(q->packet); 3660 iovecs[i].iov_len = buffer_remaining(q->packet); 3661 #ifdef BIND8_STATS 3662 /* Account the rcode & TC... */ 3663 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3664 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3665 if (TC(q->packet)) { 3666 STATUP(data->nsd, truncated); 3667 ZTATUP(data->nsd, q->zone, truncated); 3668 } 3669 #endif /* BIND8_STATS */ 3670 #ifdef USE_DNSTAP 3671 /* 3672 * sending UDP-response with server address (local) and client address to dnstap process 3673 */ 3674 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 3675 log_addr("response to client", &q->addr); 3676 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, 3677 &q->addr, q->addrlen, q->tcp, q->packet, 3678 q->zone); 3679 #endif /* USE_DNSTAP */ 3680 } else { 3681 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3682 iovecs[i].iov_len = buffer_remaining(q->packet); 3683 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3684 swap_drop: 3685 STATUP(data->nsd, dropped); 3686 ZTATUP(data->nsd, q->zone, dropped); 3687 if(i != recvcount-1) { 3688 /* swap with last and decrease recvcount */ 3689 struct mmsghdr mtmp = msgs[i]; 3690 struct iovec iotmp = iovecs[i]; 3691 recvcount--; 3692 msgs[i] = msgs[recvcount]; 3693 iovecs[i] = iovecs[recvcount]; 3694 queries[i] = queries[recvcount]; 3695 msgs[recvcount] = mtmp; 3696 iovecs[recvcount] = iotmp; 3697 queries[recvcount] = q; 3698 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3699 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3700 goto loopstart; 3701 } else { recvcount --; } 3702 } 3703 } 3704 3705 /* send until all are sent */ 3706 i = 0; 3707 while(i<recvcount) { 3708 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3709 if(sent == -1) { 3710 if(errno == ENOBUFS || 3711 #ifdef EWOULDBLOCK 3712 errno == EWOULDBLOCK || 3713 #endif 3714 errno == EAGAIN) { 3715 /* block to wait until send buffer avail */ 3716 int flag, errstore; 3717 if((flag = fcntl(fd, F_GETFL)) == -1) { 3718 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno)); 3719 flag = 0; 3720 } 3721 flag &= ~O_NONBLOCK; 3722 if(fcntl(fd, F_SETFL, flag) == -1) 3723 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno)); 3724 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3725 errstore = errno; 3726 flag |= O_NONBLOCK; 3727 if(fcntl(fd, F_SETFL, flag) == -1) 3728 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno)); 3729 if(sent != -1) { 3730 i += sent; 3731 continue; 3732 } 3733 errno = errstore; 3734 } 3735 if(errno == EINVAL) { 3736 /* skip the invalid argument entry, 3737 * send the remaining packets in the list */ 3738 if(!(port_is_zero((void*)&queries[i]->addr) && 3739 verbosity < 3)) { 3740 const char* es = strerror(errno); 3741 char a[64]; 3742 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3743 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3744 } 3745 i += 1; 3746 continue; 3747 } 3748 /* don't log transient network full errors, unless 3749 * on higher verbosity */ 3750 if(!(errno == ENOBUFS && verbosity < 1) && 3751 #ifdef EWOULDBLOCK 3752 errno != EWOULDBLOCK && 3753 #endif 3754 errno != EAGAIN) { 3755 const char* es = strerror(errno); 3756 char a[64]; 3757 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3758 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3759 } 3760 #ifdef BIND8_STATS 3761 data->nsd->st.txerr += recvcount-i; 3762 #endif /* BIND8_STATS */ 3763 break; 3764 } 3765 i += sent; 3766 } 3767 for(i=0; i<recvcount; i++) { 3768 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3769 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3770 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3771 } 3772 } 3773 3774 #ifdef HAVE_SSL 3775 /* 3776 * Setup an event for the tcp handler. 3777 */ 3778 static void 3779 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3780 int fd, short event) 3781 { 3782 struct timeval timeout; 3783 struct event_base* ev_base; 3784 3785 timeout.tv_sec = data->nsd->tcp_timeout; 3786 timeout.tv_usec = 0L; 3787 3788 ev_base = data->event.ev_base; 3789 event_del(&data->event); 3790 memset(&data->event, 0, sizeof(data->event)); 3791 event_set(&data->event, fd, event, fn, data); 3792 if(event_base_set(ev_base, &data->event) != 0) 3793 log_msg(LOG_ERR, "event base set failed"); 3794 if(event_add(&data->event, &timeout) != 0) 3795 log_msg(LOG_ERR, "event add failed"); 3796 } 3797 #endif /* HAVE_SSL */ 3798 3799 static void 3800 cleanup_tcp_handler(struct tcp_handler_data* data) 3801 { 3802 event_del(&data->event); 3803 #ifdef HAVE_SSL 3804 if(data->tls) { 3805 SSL_shutdown(data->tls); 3806 SSL_free(data->tls); 3807 data->tls = NULL; 3808 } 3809 #endif 3810 close(data->event.ev_fd); 3811 if(data->prev) 3812 data->prev->next = data->next; 3813 else tcp_active_list = data->next; 3814 if(data->next) 3815 data->next->prev = data->prev; 3816 3817 /* 3818 * Enable the TCP accept handlers when the current number of 3819 * TCP connections is about to drop below the maximum number 3820 * of TCP connections. 3821 */ 3822 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3823 configure_handler_event_types(EV_READ|EV_PERSIST); 3824 if(slowaccept) { 3825 event_del(&slowaccept_event); 3826 slowaccept = 0; 3827 } 3828 } 3829 --data->nsd->current_tcp_count; 3830 assert(data->nsd->current_tcp_count >= 0); 3831 3832 region_destroy(data->region); 3833 } 3834 3835 static void 3836 handle_tcp_reading(int fd, short event, void* arg) 3837 { 3838 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3839 ssize_t received; 3840 struct event_base* ev_base; 3841 struct timeval timeout; 3842 uint32_t now = 0; 3843 3844 if ((event & EV_TIMEOUT)) { 3845 /* Connection timed out. */ 3846 cleanup_tcp_handler(data); 3847 return; 3848 } 3849 3850 if ((data->nsd->tcp_query_count > 0 && 3851 data->query_count >= data->nsd->tcp_query_count) || 3852 data->tcp_no_more_queries) { 3853 /* No more queries allowed on this tcp connection. */ 3854 cleanup_tcp_handler(data); 3855 return; 3856 } 3857 3858 assert((event & EV_READ)); 3859 3860 if (data->bytes_transmitted == 0) { 3861 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3862 } 3863 3864 /* 3865 * Check if we received the leading packet length bytes yet. 3866 */ 3867 if (data->bytes_transmitted < sizeof(uint16_t)) { 3868 received = read(fd, 3869 (char *) &data->query->tcplen 3870 + data->bytes_transmitted, 3871 sizeof(uint16_t) - data->bytes_transmitted); 3872 if (received == -1) { 3873 if (errno == EAGAIN || errno == EINTR) { 3874 /* 3875 * Read would block, wait until more 3876 * data is available. 3877 */ 3878 return; 3879 } else { 3880 char buf[48]; 3881 addr2str(&data->query->addr, buf, sizeof(buf)); 3882 #ifdef ECONNRESET 3883 if (verbosity >= 2 || errno != ECONNRESET) 3884 #endif /* ECONNRESET */ 3885 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3886 cleanup_tcp_handler(data); 3887 return; 3888 } 3889 } else if (received == 0) { 3890 /* EOF */ 3891 cleanup_tcp_handler(data); 3892 return; 3893 } 3894 3895 data->bytes_transmitted += received; 3896 if (data->bytes_transmitted < sizeof(uint16_t)) { 3897 /* 3898 * Not done with the tcplen yet, wait for more 3899 * data to become available. 3900 */ 3901 return; 3902 } 3903 3904 assert(data->bytes_transmitted == sizeof(uint16_t)); 3905 3906 data->query->tcplen = ntohs(data->query->tcplen); 3907 3908 /* 3909 * Minimum query size is: 3910 * 3911 * Size of the header (12) 3912 * + Root domain name (1) 3913 * + Query class (2) 3914 * + Query type (2) 3915 */ 3916 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3917 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3918 cleanup_tcp_handler(data); 3919 return; 3920 } 3921 3922 if (data->query->tcplen > data->query->maxlen) { 3923 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3924 cleanup_tcp_handler(data); 3925 return; 3926 } 3927 3928 buffer_set_limit(data->query->packet, data->query->tcplen); 3929 } 3930 3931 assert(buffer_remaining(data->query->packet) > 0); 3932 3933 /* Read the (remaining) query data. */ 3934 received = read(fd, 3935 buffer_current(data->query->packet), 3936 buffer_remaining(data->query->packet)); 3937 if (received == -1) { 3938 if (errno == EAGAIN || errno == EINTR) { 3939 /* 3940 * Read would block, wait until more data is 3941 * available. 3942 */ 3943 return; 3944 } else { 3945 char buf[48]; 3946 addr2str(&data->query->addr, buf, sizeof(buf)); 3947 #ifdef ECONNRESET 3948 if (verbosity >= 2 || errno != ECONNRESET) 3949 #endif /* ECONNRESET */ 3950 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3951 cleanup_tcp_handler(data); 3952 return; 3953 } 3954 } else if (received == 0) { 3955 /* EOF */ 3956 cleanup_tcp_handler(data); 3957 return; 3958 } 3959 3960 data->bytes_transmitted += received; 3961 buffer_skip(data->query->packet, received); 3962 if (buffer_remaining(data->query->packet) > 0) { 3963 /* 3964 * Message not yet complete, wait for more data to 3965 * become available. 3966 */ 3967 return; 3968 } 3969 3970 assert(buffer_position(data->query->packet) == data->query->tcplen); 3971 3972 /* Account... */ 3973 #ifdef BIND8_STATS 3974 #ifndef INET6 3975 STATUP(data->nsd, ctcp); 3976 #else 3977 if (data->query->addr.ss_family == AF_INET) { 3978 STATUP(data->nsd, ctcp); 3979 } else if (data->query->addr.ss_family == AF_INET6) { 3980 STATUP(data->nsd, ctcp6); 3981 } 3982 #endif 3983 #endif /* BIND8_STATS */ 3984 3985 /* We have a complete query, process it. */ 3986 3987 /* tcp-query-count: handle query counter ++ */ 3988 data->query_count++; 3989 3990 buffer_flip(data->query->packet); 3991 #ifdef USE_DNSTAP 3992 /* 3993 * and send TCP-query with found address (local) and client address to dnstap process 3994 */ 3995 log_addr("query from client", &data->query->addr); 3996 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3997 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 3998 data->query->addrlen, data->query->tcp, data->query->packet); 3999 #endif /* USE_DNSTAP */ 4000 data->query_state = server_process_query(data->nsd, data->query, &now); 4001 if (data->query_state == QUERY_DISCARDED) { 4002 /* Drop the packet and the entire connection... */ 4003 STATUP(data->nsd, dropped); 4004 ZTATUP(data->nsd, data->query->zone, dropped); 4005 cleanup_tcp_handler(data); 4006 return; 4007 } 4008 4009 #ifdef BIND8_STATS 4010 if (RCODE(data->query->packet) == RCODE_OK 4011 && !AA(data->query->packet)) 4012 { 4013 STATUP(data->nsd, nona); 4014 ZTATUP(data->nsd, data->query->zone, nona); 4015 } 4016 #endif /* BIND8_STATS */ 4017 4018 #ifdef USE_ZONE_STATS 4019 #ifndef INET6 4020 ZTATUP(data->nsd, data->query->zone, ctcp); 4021 #else 4022 if (data->query->addr.ss_family == AF_INET) { 4023 ZTATUP(data->nsd, data->query->zone, ctcp); 4024 } else if (data->query->addr.ss_family == AF_INET6) { 4025 ZTATUP(data->nsd, data->query->zone, ctcp6); 4026 } 4027 #endif 4028 #endif /* USE_ZONE_STATS */ 4029 4030 query_add_optional(data->query, data->nsd, &now); 4031 4032 /* Switch to the tcp write handler. */ 4033 buffer_flip(data->query->packet); 4034 data->query->tcplen = buffer_remaining(data->query->packet); 4035 #ifdef BIND8_STATS 4036 /* Account the rcode & TC... */ 4037 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4038 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4039 if (TC(data->query->packet)) { 4040 STATUP(data->nsd, truncated); 4041 ZTATUP(data->nsd, data->query->zone, truncated); 4042 } 4043 #endif /* BIND8_STATS */ 4044 #ifdef USE_DNSTAP 4045 /* 4046 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4047 */ 4048 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4049 log_addr("response to client", &data->query->addr); 4050 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4051 data->query->addrlen, data->query->tcp, data->query->packet, 4052 data->query->zone); 4053 #endif /* USE_DNSTAP */ 4054 data->bytes_transmitted = 0; 4055 4056 timeout.tv_sec = data->tcp_timeout / 1000; 4057 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4058 4059 ev_base = data->event.ev_base; 4060 event_del(&data->event); 4061 memset(&data->event, 0, sizeof(data->event)); 4062 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4063 handle_tcp_reading, data); 4064 if(event_base_set(ev_base, &data->event) != 0) 4065 log_msg(LOG_ERR, "event base set tcpr failed"); 4066 if(event_add(&data->event, &timeout) != 0) 4067 log_msg(LOG_ERR, "event add tcpr failed"); 4068 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4069 handle_tcp_writing(fd, EV_WRITE, data); 4070 } 4071 4072 static void 4073 handle_tcp_writing(int fd, short event, void* arg) 4074 { 4075 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4076 ssize_t sent; 4077 struct query *q = data->query; 4078 struct timeval timeout; 4079 struct event_base* ev_base; 4080 uint32_t now = 0; 4081 4082 if ((event & EV_TIMEOUT)) { 4083 /* Connection timed out. */ 4084 cleanup_tcp_handler(data); 4085 return; 4086 } 4087 4088 assert((event & EV_WRITE)); 4089 4090 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4091 /* Writing the response packet length. */ 4092 uint16_t n_tcplen = htons(q->tcplen); 4093 #ifdef HAVE_WRITEV 4094 struct iovec iov[2]; 4095 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 4096 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 4097 iov[1].iov_base = buffer_begin(q->packet); 4098 iov[1].iov_len = buffer_limit(q->packet); 4099 sent = writev(fd, iov, 2); 4100 #else /* HAVE_WRITEV */ 4101 sent = write(fd, 4102 (const char *) &n_tcplen + data->bytes_transmitted, 4103 sizeof(n_tcplen) - data->bytes_transmitted); 4104 #endif /* HAVE_WRITEV */ 4105 if (sent == -1) { 4106 if (errno == EAGAIN || errno == EINTR) { 4107 /* 4108 * Write would block, wait until 4109 * socket becomes writable again. 4110 */ 4111 return; 4112 } else { 4113 #ifdef ECONNRESET 4114 if(verbosity >= 2 || errno != ECONNRESET) 4115 #endif /* ECONNRESET */ 4116 #ifdef EPIPE 4117 if(verbosity >= 2 || errno != EPIPE) 4118 #endif /* EPIPE 'broken pipe' */ 4119 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4120 cleanup_tcp_handler(data); 4121 return; 4122 } 4123 } 4124 4125 data->bytes_transmitted += sent; 4126 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4127 /* 4128 * Writing not complete, wait until socket 4129 * becomes writable again. 4130 */ 4131 return; 4132 } 4133 4134 #ifdef HAVE_WRITEV 4135 sent -= sizeof(n_tcplen); 4136 /* handle potential 'packet done' code */ 4137 goto packet_could_be_done; 4138 #endif 4139 } 4140 4141 sent = write(fd, 4142 buffer_current(q->packet), 4143 buffer_remaining(q->packet)); 4144 if (sent == -1) { 4145 if (errno == EAGAIN || errno == EINTR) { 4146 /* 4147 * Write would block, wait until 4148 * socket becomes writable again. 4149 */ 4150 return; 4151 } else { 4152 #ifdef ECONNRESET 4153 if(verbosity >= 2 || errno != ECONNRESET) 4154 #endif /* ECONNRESET */ 4155 #ifdef EPIPE 4156 if(verbosity >= 2 || errno != EPIPE) 4157 #endif /* EPIPE 'broken pipe' */ 4158 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4159 cleanup_tcp_handler(data); 4160 return; 4161 } 4162 } 4163 4164 data->bytes_transmitted += sent; 4165 #ifdef HAVE_WRITEV 4166 packet_could_be_done: 4167 #endif 4168 buffer_skip(q->packet, sent); 4169 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4170 /* 4171 * Still more data to write when socket becomes 4172 * writable again. 4173 */ 4174 return; 4175 } 4176 4177 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4178 4179 if (data->query_state == QUERY_IN_AXFR || 4180 data->query_state == QUERY_IN_IXFR) { 4181 /* Continue processing AXFR and writing back results. */ 4182 buffer_clear(q->packet); 4183 if(data->query_state == QUERY_IN_AXFR) 4184 data->query_state = query_axfr(data->nsd, q, 0); 4185 else data->query_state = query_ixfr(data->nsd, q); 4186 if (data->query_state != QUERY_PROCESSED) { 4187 query_add_optional(data->query, data->nsd, &now); 4188 4189 /* Reset data. */ 4190 buffer_flip(q->packet); 4191 q->tcplen = buffer_remaining(q->packet); 4192 data->bytes_transmitted = 0; 4193 /* Reset timeout. */ 4194 timeout.tv_sec = data->tcp_timeout / 1000; 4195 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4196 ev_base = data->event.ev_base; 4197 event_del(&data->event); 4198 memset(&data->event, 0, sizeof(data->event)); 4199 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4200 handle_tcp_writing, data); 4201 if(event_base_set(ev_base, &data->event) != 0) 4202 log_msg(LOG_ERR, "event base set tcpw failed"); 4203 if(event_add(&data->event, &timeout) != 0) 4204 log_msg(LOG_ERR, "event add tcpw failed"); 4205 4206 /* 4207 * Write data if/when the socket is writable 4208 * again. 4209 */ 4210 return; 4211 } 4212 } 4213 4214 /* 4215 * Done sending, wait for the next request to arrive on the 4216 * TCP socket by installing the TCP read handler. 4217 */ 4218 if ((data->nsd->tcp_query_count > 0 && 4219 data->query_count >= data->nsd->tcp_query_count) || 4220 data->tcp_no_more_queries) { 4221 4222 (void) shutdown(fd, SHUT_WR); 4223 } 4224 4225 data->bytes_transmitted = 0; 4226 4227 timeout.tv_sec = data->tcp_timeout / 1000; 4228 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4229 ev_base = data->event.ev_base; 4230 event_del(&data->event); 4231 memset(&data->event, 0, sizeof(data->event)); 4232 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4233 handle_tcp_reading, data); 4234 if(event_base_set(ev_base, &data->event) != 0) 4235 log_msg(LOG_ERR, "event base set tcpw failed"); 4236 if(event_add(&data->event, &timeout) != 0) 4237 log_msg(LOG_ERR, "event add tcpw failed"); 4238 } 4239 4240 #ifdef HAVE_SSL 4241 /** create SSL object and associate fd */ 4242 static SSL* 4243 incoming_ssl_fd(SSL_CTX* ctx, int fd) 4244 { 4245 SSL* ssl = SSL_new((SSL_CTX*)ctx); 4246 if(!ssl) { 4247 log_crypto_err("could not SSL_new"); 4248 return NULL; 4249 } 4250 SSL_set_accept_state(ssl); 4251 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 4252 if(!SSL_set_fd(ssl, fd)) { 4253 log_crypto_err("could not SSL_set_fd"); 4254 SSL_free(ssl); 4255 return NULL; 4256 } 4257 return ssl; 4258 } 4259 4260 /** TLS handshake to upgrade TCP connection */ 4261 static int 4262 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 4263 { 4264 int r; 4265 if(data->shake_state == tls_hs_read_event) { 4266 /* read condition satisfied back to writing */ 4267 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4268 data->shake_state = tls_hs_none; 4269 return 1; 4270 } 4271 if(data->shake_state == tls_hs_write_event) { 4272 /* write condition satisfied back to reading */ 4273 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4274 data->shake_state = tls_hs_none; 4275 return 1; 4276 } 4277 4278 /* (continue to) setup the TLS connection */ 4279 ERR_clear_error(); 4280 r = SSL_do_handshake(data->tls); 4281 4282 if(r != 1) { 4283 int want = SSL_get_error(data->tls, r); 4284 if(want == SSL_ERROR_WANT_READ) { 4285 if(data->shake_state == tls_hs_read) { 4286 /* try again later */ 4287 return 1; 4288 } 4289 data->shake_state = tls_hs_read; 4290 /* switch back to reading mode */ 4291 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4292 return 1; 4293 } else if(want == SSL_ERROR_WANT_WRITE) { 4294 if(data->shake_state == tls_hs_write) { 4295 /* try again later */ 4296 return 1; 4297 } 4298 data->shake_state = tls_hs_write; 4299 /* switch back to writing mode */ 4300 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4301 return 1; 4302 } else { 4303 if(r == 0) 4304 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 4305 else { 4306 unsigned long err = ERR_get_error(); 4307 if(!squelch_err_ssl_handshake(err)) { 4308 char a[64], s[256]; 4309 addr2str(&data->query->addr, a, sizeof(a)); 4310 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 4311 log_crypto_from_err(s, err); 4312 } 4313 } 4314 cleanup_tcp_handler(data); 4315 return 0; 4316 } 4317 } 4318 4319 /* Use to log successful upgrade for testing - could be removed*/ 4320 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 4321 /* set back to the event we need to have when reading (or writing) */ 4322 if(data->shake_state == tls_hs_read && writing) { 4323 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4324 } else if(data->shake_state == tls_hs_write && !writing) { 4325 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4326 } 4327 data->shake_state = tls_hs_none; 4328 return 1; 4329 } 4330 4331 /** handle TLS reading of incoming query */ 4332 static void 4333 handle_tls_reading(int fd, short event, void* arg) 4334 { 4335 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4336 ssize_t received; 4337 uint32_t now = 0; 4338 4339 if ((event & EV_TIMEOUT)) { 4340 /* Connection timed out. */ 4341 cleanup_tcp_handler(data); 4342 return; 4343 } 4344 4345 if ((data->nsd->tcp_query_count > 0 && 4346 data->query_count >= data->nsd->tcp_query_count) || 4347 data->tcp_no_more_queries) { 4348 /* No more queries allowed on this tcp connection. */ 4349 cleanup_tcp_handler(data); 4350 return; 4351 } 4352 4353 assert((event & EV_READ)); 4354 4355 if (data->bytes_transmitted == 0) { 4356 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4357 } 4358 4359 if(data->shake_state != tls_hs_none) { 4360 if(!tls_handshake(data, fd, 0)) 4361 return; 4362 if(data->shake_state != tls_hs_none) 4363 return; 4364 } 4365 4366 /* 4367 * Check if we received the leading packet length bytes yet. 4368 */ 4369 if(data->bytes_transmitted < sizeof(uint16_t)) { 4370 ERR_clear_error(); 4371 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 4372 + data->bytes_transmitted, 4373 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 4374 int want = SSL_get_error(data->tls, received); 4375 if(want == SSL_ERROR_ZERO_RETURN) { 4376 cleanup_tcp_handler(data); 4377 return; /* shutdown, closed */ 4378 } else if(want == SSL_ERROR_WANT_READ) { 4379 /* wants to be called again */ 4380 return; 4381 } 4382 else if(want == SSL_ERROR_WANT_WRITE) { 4383 /* switch to writing */ 4384 data->shake_state = tls_hs_write_event; 4385 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4386 return; 4387 } 4388 cleanup_tcp_handler(data); 4389 log_crypto_err("could not SSL_read"); 4390 return; 4391 } 4392 4393 data->bytes_transmitted += received; 4394 if (data->bytes_transmitted < sizeof(uint16_t)) { 4395 /* 4396 * Not done with the tcplen yet, wait for more 4397 * data to become available. 4398 */ 4399 return; 4400 } 4401 4402 assert(data->bytes_transmitted == sizeof(uint16_t)); 4403 4404 data->query->tcplen = ntohs(data->query->tcplen); 4405 4406 /* 4407 * Minimum query size is: 4408 * 4409 * Size of the header (12) 4410 * + Root domain name (1) 4411 * + Query class (2) 4412 * + Query type (2) 4413 */ 4414 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4415 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4416 cleanup_tcp_handler(data); 4417 return; 4418 } 4419 4420 if (data->query->tcplen > data->query->maxlen) { 4421 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4422 cleanup_tcp_handler(data); 4423 return; 4424 } 4425 4426 buffer_set_limit(data->query->packet, data->query->tcplen); 4427 } 4428 4429 assert(buffer_remaining(data->query->packet) > 0); 4430 4431 /* Read the (remaining) query data. */ 4432 ERR_clear_error(); 4433 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 4434 (int)buffer_remaining(data->query->packet)); 4435 if(received <= 0) { 4436 int want = SSL_get_error(data->tls, received); 4437 if(want == SSL_ERROR_ZERO_RETURN) { 4438 cleanup_tcp_handler(data); 4439 return; /* shutdown, closed */ 4440 } else if(want == SSL_ERROR_WANT_READ) { 4441 /* wants to be called again */ 4442 return; 4443 } 4444 else if(want == SSL_ERROR_WANT_WRITE) { 4445 /* switch back writing */ 4446 data->shake_state = tls_hs_write_event; 4447 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4448 return; 4449 } 4450 cleanup_tcp_handler(data); 4451 log_crypto_err("could not SSL_read"); 4452 return; 4453 } 4454 4455 data->bytes_transmitted += received; 4456 buffer_skip(data->query->packet, received); 4457 if (buffer_remaining(data->query->packet) > 0) { 4458 /* 4459 * Message not yet complete, wait for more data to 4460 * become available. 4461 */ 4462 return; 4463 } 4464 4465 assert(buffer_position(data->query->packet) == data->query->tcplen); 4466 4467 /* Account... */ 4468 #ifndef INET6 4469 STATUP(data->nsd, ctls); 4470 #else 4471 if (data->query->addr.ss_family == AF_INET) { 4472 STATUP(data->nsd, ctls); 4473 } else if (data->query->addr.ss_family == AF_INET6) { 4474 STATUP(data->nsd, ctls6); 4475 } 4476 #endif 4477 4478 /* We have a complete query, process it. */ 4479 4480 /* tcp-query-count: handle query counter ++ */ 4481 data->query_count++; 4482 4483 buffer_flip(data->query->packet); 4484 #ifdef USE_DNSTAP 4485 /* 4486 * and send TCP-query with found address (local) and client address to dnstap process 4487 */ 4488 log_addr("query from client", &data->query->addr); 4489 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4490 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4491 data->query->addrlen, data->query->tcp, data->query->packet); 4492 #endif /* USE_DNSTAP */ 4493 data->query_state = server_process_query(data->nsd, data->query, &now); 4494 if (data->query_state == QUERY_DISCARDED) { 4495 /* Drop the packet and the entire connection... */ 4496 STATUP(data->nsd, dropped); 4497 ZTATUP(data->nsd, data->query->zone, dropped); 4498 cleanup_tcp_handler(data); 4499 return; 4500 } 4501 4502 #ifdef BIND8_STATS 4503 if (RCODE(data->query->packet) == RCODE_OK 4504 && !AA(data->query->packet)) 4505 { 4506 STATUP(data->nsd, nona); 4507 ZTATUP(data->nsd, data->query->zone, nona); 4508 } 4509 #endif /* BIND8_STATS */ 4510 4511 #ifdef USE_ZONE_STATS 4512 #ifndef INET6 4513 ZTATUP(data->nsd, data->query->zone, ctls); 4514 #else 4515 if (data->query->addr.ss_family == AF_INET) { 4516 ZTATUP(data->nsd, data->query->zone, ctls); 4517 } else if (data->query->addr.ss_family == AF_INET6) { 4518 ZTATUP(data->nsd, data->query->zone, ctls6); 4519 } 4520 #endif 4521 #endif /* USE_ZONE_STATS */ 4522 4523 query_add_optional(data->query, data->nsd, &now); 4524 4525 /* Switch to the tcp write handler. */ 4526 buffer_flip(data->query->packet); 4527 data->query->tcplen = buffer_remaining(data->query->packet); 4528 #ifdef BIND8_STATS 4529 /* Account the rcode & TC... */ 4530 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4531 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4532 if (TC(data->query->packet)) { 4533 STATUP(data->nsd, truncated); 4534 ZTATUP(data->nsd, data->query->zone, truncated); 4535 } 4536 #endif /* BIND8_STATS */ 4537 #ifdef USE_DNSTAP 4538 /* 4539 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4540 */ 4541 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4542 log_addr("response to client", &data->query->addr); 4543 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4544 data->query->addrlen, data->query->tcp, data->query->packet, 4545 data->query->zone); 4546 #endif /* USE_DNSTAP */ 4547 data->bytes_transmitted = 0; 4548 4549 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4550 4551 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4552 handle_tls_writing(fd, EV_WRITE, data); 4553 } 4554 4555 /** handle TLS writing of outgoing response */ 4556 static void 4557 handle_tls_writing(int fd, short event, void* arg) 4558 { 4559 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4560 ssize_t sent; 4561 struct query *q = data->query; 4562 /* static variable that holds reassembly buffer used to put the 4563 * TCP length in front of the packet, like writev. */ 4564 static buffer_type* global_tls_temp_buffer = NULL; 4565 buffer_type* write_buffer; 4566 uint32_t now = 0; 4567 4568 if ((event & EV_TIMEOUT)) { 4569 /* Connection timed out. */ 4570 cleanup_tcp_handler(data); 4571 return; 4572 } 4573 4574 assert((event & EV_WRITE)); 4575 4576 if(data->shake_state != tls_hs_none) { 4577 if(!tls_handshake(data, fd, 1)) 4578 return; 4579 if(data->shake_state != tls_hs_none) 4580 return; 4581 } 4582 4583 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 4584 4585 /* If we are writing the start of a message, we must include the length 4586 * this is done with a copy into write_buffer. */ 4587 write_buffer = NULL; 4588 if (data->bytes_transmitted == 0) { 4589 if(!global_tls_temp_buffer) { 4590 /* gets deallocated when nsd shuts down from 4591 * nsd.region */ 4592 global_tls_temp_buffer = buffer_create(nsd.region, 4593 QIOBUFSZ + sizeof(q->tcplen)); 4594 if (!global_tls_temp_buffer) { 4595 return; 4596 } 4597 } 4598 write_buffer = global_tls_temp_buffer; 4599 buffer_clear(write_buffer); 4600 buffer_write_u16(write_buffer, q->tcplen); 4601 buffer_write(write_buffer, buffer_current(q->packet), 4602 (int)buffer_remaining(q->packet)); 4603 buffer_flip(write_buffer); 4604 } else { 4605 write_buffer = q->packet; 4606 } 4607 4608 /* Write the response */ 4609 ERR_clear_error(); 4610 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 4611 if(sent <= 0) { 4612 int want = SSL_get_error(data->tls, sent); 4613 if(want == SSL_ERROR_ZERO_RETURN) { 4614 cleanup_tcp_handler(data); 4615 /* closed */ 4616 } else if(want == SSL_ERROR_WANT_READ) { 4617 /* switch back to reading */ 4618 data->shake_state = tls_hs_read_event; 4619 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4620 } else if(want != SSL_ERROR_WANT_WRITE) { 4621 cleanup_tcp_handler(data); 4622 log_crypto_err("could not SSL_write"); 4623 } 4624 return; 4625 } 4626 4627 buffer_skip(write_buffer, sent); 4628 if(buffer_remaining(write_buffer) != 0) { 4629 /* If not all sent, sync up the real buffer if it wasn't used.*/ 4630 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 4631 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 4632 } 4633 } 4634 4635 data->bytes_transmitted += sent; 4636 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4637 /* 4638 * Still more data to write when socket becomes 4639 * writable again. 4640 */ 4641 return; 4642 } 4643 4644 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4645 4646 if (data->query_state == QUERY_IN_AXFR || 4647 data->query_state == QUERY_IN_IXFR) { 4648 /* Continue processing AXFR and writing back results. */ 4649 buffer_clear(q->packet); 4650 if(data->query_state == QUERY_IN_AXFR) 4651 data->query_state = query_axfr(data->nsd, q, 0); 4652 else data->query_state = query_ixfr(data->nsd, q); 4653 if (data->query_state != QUERY_PROCESSED) { 4654 query_add_optional(data->query, data->nsd, &now); 4655 4656 /* Reset data. */ 4657 buffer_flip(q->packet); 4658 q->tcplen = buffer_remaining(q->packet); 4659 data->bytes_transmitted = 0; 4660 /* Reset to writing mode. */ 4661 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4662 4663 /* 4664 * Write data if/when the socket is writable 4665 * again. 4666 */ 4667 return; 4668 } 4669 } 4670 4671 /* 4672 * Done sending, wait for the next request to arrive on the 4673 * TCP socket by installing the TCP read handler. 4674 */ 4675 if ((data->nsd->tcp_query_count > 0 && 4676 data->query_count >= data->nsd->tcp_query_count) || 4677 data->tcp_no_more_queries) { 4678 4679 (void) shutdown(fd, SHUT_WR); 4680 } 4681 4682 data->bytes_transmitted = 0; 4683 4684 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4685 } 4686 #endif 4687 4688 static void 4689 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4690 void* ATTR_UNUSED(arg)) 4691 { 4692 if(slowaccept) { 4693 configure_handler_event_types(EV_PERSIST | EV_READ); 4694 slowaccept = 0; 4695 } 4696 } 4697 4698 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4699 { 4700 #ifndef HAVE_ACCEPT4 4701 int s = accept(fd, addr, addrlen); 4702 if (s != -1) { 4703 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4704 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4705 close(s); 4706 s = -1; 4707 errno=EINTR; /* stop error printout as error in accept4 4708 by setting this errno, it omits printout, in 4709 later code that calls nsd_accept4 */ 4710 } 4711 } 4712 return s; 4713 #else 4714 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4715 #endif /* HAVE_ACCEPT4 */ 4716 } 4717 4718 /* 4719 * Handle an incoming TCP connection. The connection is accepted and 4720 * a new TCP reader event handler is added. The TCP handler 4721 * is responsible for cleanup when the connection is closed. 4722 */ 4723 static void 4724 handle_tcp_accept(int fd, short event, void* arg) 4725 { 4726 struct tcp_accept_handler_data *data 4727 = (struct tcp_accept_handler_data *) arg; 4728 int s; 4729 int reject = 0; 4730 struct tcp_handler_data *tcp_data; 4731 region_type *tcp_region; 4732 #ifdef INET6 4733 struct sockaddr_storage addr; 4734 #else 4735 struct sockaddr_in addr; 4736 #endif 4737 socklen_t addrlen; 4738 struct timeval timeout; 4739 4740 if (!(event & EV_READ)) { 4741 return; 4742 } 4743 4744 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4745 reject = data->nsd->options->tcp_reject_overflow; 4746 if (!reject) { 4747 return; 4748 } 4749 } 4750 4751 /* Accept it... */ 4752 addrlen = sizeof(addr); 4753 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4754 if (s == -1) { 4755 /** 4756 * EMFILE and ENFILE is a signal that the limit of open 4757 * file descriptors has been reached. Pause accept(). 4758 * EINTR is a signal interrupt. The others are various OS ways 4759 * of saying that the client has closed the connection. 4760 */ 4761 if (errno == EMFILE || errno == ENFILE) { 4762 if (!slowaccept) { 4763 /* disable accept events */ 4764 struct timeval tv; 4765 configure_handler_event_types(0); 4766 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4767 tv.tv_usec = 0L; 4768 memset(&slowaccept_event, 0, 4769 sizeof(slowaccept_event)); 4770 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4771 handle_slowaccept_timeout, NULL); 4772 (void)event_base_set(data->event.ev_base, 4773 &slowaccept_event); 4774 (void)event_add(&slowaccept_event, &tv); 4775 slowaccept = 1; 4776 /* We don't want to spam the logs here */ 4777 } 4778 } else if (errno != EINTR 4779 && errno != EWOULDBLOCK 4780 #ifdef ECONNABORTED 4781 && errno != ECONNABORTED 4782 #endif /* ECONNABORTED */ 4783 #ifdef EPROTO 4784 && errno != EPROTO 4785 #endif /* EPROTO */ 4786 ) { 4787 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4788 } 4789 return; 4790 } 4791 4792 if (reject) { 4793 shutdown(s, SHUT_RDWR); 4794 close(s); 4795 return; 4796 } 4797 4798 /* 4799 * This region is deallocated when the TCP connection is 4800 * closed by the TCP handler. 4801 */ 4802 tcp_region = region_create(xalloc, free); 4803 tcp_data = (struct tcp_handler_data *) region_alloc( 4804 tcp_region, sizeof(struct tcp_handler_data)); 4805 tcp_data->region = tcp_region; 4806 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4807 compression_table_size, compressed_dnames); 4808 tcp_data->nsd = data->nsd; 4809 tcp_data->query_count = 0; 4810 #ifdef HAVE_SSL 4811 tcp_data->shake_state = tls_hs_none; 4812 tcp_data->tls = NULL; 4813 #endif 4814 tcp_data->prev = NULL; 4815 tcp_data->next = NULL; 4816 4817 tcp_data->query_state = QUERY_PROCESSED; 4818 tcp_data->bytes_transmitted = 0; 4819 memcpy(&tcp_data->query->addr, &addr, addrlen); 4820 tcp_data->query->addrlen = addrlen; 4821 4822 tcp_data->tcp_no_more_queries = 0; 4823 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4824 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4825 /* very busy, give smaller timeout */ 4826 tcp_data->tcp_timeout = 200; 4827 } 4828 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4829 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4830 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4831 4832 #ifdef USE_DNSTAP 4833 /* save the address of the connection */ 4834 tcp_data->socket = data->socket; 4835 #endif /* USE_DNSTAP */ 4836 4837 #ifdef HAVE_SSL 4838 if (data->tls_accept) { 4839 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4840 if(!tcp_data->tls) { 4841 close(s); 4842 return; 4843 } 4844 tcp_data->shake_state = tls_hs_read; 4845 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4846 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4847 handle_tls_reading, tcp_data); 4848 } else { 4849 #endif 4850 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4851 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4852 handle_tcp_reading, tcp_data); 4853 #ifdef HAVE_SSL 4854 } 4855 #endif 4856 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4857 log_msg(LOG_ERR, "cannot set tcp event base"); 4858 close(s); 4859 region_destroy(tcp_region); 4860 return; 4861 } 4862 if(event_add(&tcp_data->event, &timeout) != 0) { 4863 log_msg(LOG_ERR, "cannot add tcp to event base"); 4864 close(s); 4865 region_destroy(tcp_region); 4866 return; 4867 } 4868 if(tcp_active_list) { 4869 tcp_active_list->prev = tcp_data; 4870 tcp_data->next = tcp_active_list; 4871 } 4872 tcp_active_list = tcp_data; 4873 4874 /* 4875 * Keep track of the total number of TCP handlers installed so 4876 * we can stop accepting connections when the maximum number 4877 * of simultaneous TCP connections is reached. 4878 * 4879 * If tcp-reject-overflow is enabled, however, then we do not 4880 * change the handler event type; we keep it as-is and accept 4881 * overflow TCP connections only so that we can forcibly kill 4882 * them off. 4883 */ 4884 ++data->nsd->current_tcp_count; 4885 if (!data->nsd->options->tcp_reject_overflow && 4886 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4887 { 4888 configure_handler_event_types(0); 4889 } 4890 } 4891 4892 static void 4893 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4894 { 4895 size_t i; 4896 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4897 for (i = 0; i < nsd->child_count; ++i) { 4898 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4899 if (write(nsd->children[i].child_fd, 4900 &command, 4901 sizeof(command)) == -1) 4902 { 4903 if(errno != EAGAIN && errno != EINTR) 4904 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4905 (int) command, 4906 (int) nsd->children[i].pid, 4907 strerror(errno)); 4908 } else if (timeout > 0) { 4909 (void)block_read(NULL, 4910 nsd->children[i].child_fd, 4911 &command, sizeof(command), timeout); 4912 } 4913 fsync(nsd->children[i].child_fd); 4914 close(nsd->children[i].child_fd); 4915 nsd->children[i].child_fd = -1; 4916 } 4917 } 4918 } 4919 4920 static void 4921 send_children_quit(struct nsd* nsd) 4922 { 4923 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4924 send_children_command(nsd, NSD_QUIT, 0); 4925 } 4926 4927 static void 4928 send_children_quit_and_wait(struct nsd* nsd) 4929 { 4930 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4931 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4932 } 4933 4934 #ifdef BIND8_STATS 4935 static void 4936 set_children_stats(struct nsd* nsd) 4937 { 4938 size_t i; 4939 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4940 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4941 for (i = 0; i < nsd->child_count; ++i) { 4942 nsd->children[i].need_to_send_STATS = 1; 4943 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4944 } 4945 } 4946 #endif /* BIND8_STATS */ 4947 4948 static void 4949 configure_handler_event_types(short event_types) 4950 { 4951 size_t i; 4952 4953 for (i = 0; i < tcp_accept_handler_count; ++i) { 4954 struct event* handler = &tcp_accept_handlers[i].event; 4955 if(event_types) { 4956 /* reassign */ 4957 int fd = handler->ev_fd; 4958 struct event_base* base = handler->ev_base; 4959 if(tcp_accept_handlers[i].event_added) 4960 event_del(handler); 4961 memset(handler, 0, sizeof(*handler)); 4962 event_set(handler, fd, event_types, 4963 handle_tcp_accept, &tcp_accept_handlers[i]); 4964 if(event_base_set(base, handler) != 0) 4965 log_msg(LOG_ERR, "conhand: cannot event_base"); 4966 if(event_add(handler, NULL) != 0) 4967 log_msg(LOG_ERR, "conhand: cannot event_add"); 4968 tcp_accept_handlers[i].event_added = 1; 4969 } else { 4970 /* remove */ 4971 if(tcp_accept_handlers[i].event_added) { 4972 event_del(handler); 4973 tcp_accept_handlers[i].event_added = 0; 4974 } 4975 } 4976 } 4977 } 4978