1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifdef HAVE_SYS_RANDOM_H 39 #include <sys/random.h> 40 #endif 41 #ifndef SHUT_WR 42 #define SHUT_WR 1 43 #endif 44 #ifdef HAVE_MMAP 45 #include <sys/mman.h> 46 #endif /* HAVE_MMAP */ 47 #ifdef HAVE_OPENSSL_RAND_H 48 #include <openssl/rand.h> 49 #endif 50 #ifdef HAVE_OPENSSL_SSL_H 51 #include <openssl/ssl.h> 52 #endif 53 #ifdef HAVE_OPENSSL_ERR_H 54 #include <openssl/err.h> 55 #endif 56 #ifdef HAVE_OPENSSL_OCSP_H 57 #include <openssl/ocsp.h> 58 #endif 59 #ifndef USE_MINI_EVENT 60 # ifdef HAVE_EVENT_H 61 # include <event.h> 62 # else 63 # include <event2/event.h> 64 # include "event2/event_struct.h" 65 # include "event2/event_compat.h" 66 # endif 67 #else 68 # include "mini_event.h" 69 #endif 70 71 #include "axfr.h" 72 #include "namedb.h" 73 #include "netio.h" 74 #include "xfrd.h" 75 #include "xfrd-tcp.h" 76 #include "xfrd-disk.h" 77 #include "difffile.h" 78 #include "nsec3.h" 79 #include "ipc.h" 80 #include "udb.h" 81 #include "remote.h" 82 #include "lookup3.h" 83 #include "rrl.h" 84 #include "ixfr.h" 85 #ifdef USE_DNSTAP 86 #include "dnstap/dnstap_collector.h" 87 #endif 88 #include "verify.h" 89 90 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 91 92 #ifdef USE_DNSTAP 93 /* 94 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content 95 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*) 96 */ 97 static void 98 log_addr(const char* descr, 99 #ifdef INET6 100 struct sockaddr_storage* addr 101 #else 102 struct sockaddr_in* addr 103 #endif 104 ) 105 { 106 char str_buf[64]; 107 if(verbosity < 6) 108 return; 109 if( 110 #ifdef INET6 111 addr->ss_family == AF_INET 112 #else 113 addr->sin_family == AF_INET 114 #endif 115 ) { 116 struct sockaddr_in* s = (struct sockaddr_in*)addr; 117 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf)); 118 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port))); 119 #ifdef INET6 120 } else { 121 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr; 122 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf)); 123 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port))); 124 #endif 125 } 126 } 127 #endif /* USE_DNSTAP */ 128 129 #ifdef USE_TCP_FASTOPEN 130 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 131 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 132 #endif 133 134 /* 135 * Data for the UDP handlers. 136 */ 137 struct udp_handler_data 138 { 139 struct nsd *nsd; 140 struct nsd_socket *socket; 141 struct event event; 142 }; 143 144 struct tcp_accept_handler_data { 145 struct nsd *nsd; 146 struct nsd_socket *socket; 147 int event_added; 148 struct event event; 149 #ifdef HAVE_SSL 150 /* handler accepts TLS connections on the dedicated port */ 151 int tls_accept; 152 #endif 153 }; 154 155 /* 156 * These globals are used to enable the TCP accept handlers 157 * when the number of TCP connection drops below the maximum 158 * number of TCP connections. 159 */ 160 static size_t tcp_accept_handler_count; 161 static struct tcp_accept_handler_data *tcp_accept_handlers; 162 163 static struct event slowaccept_event; 164 static int slowaccept; 165 166 #ifdef HAVE_SSL 167 static unsigned char *ocspdata = NULL; 168 static long ocspdata_len = 0; 169 #endif 170 171 #ifdef NONBLOCKING_IS_BROKEN 172 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 173 read multiple times from a socket when reported ready by select. */ 174 # define NUM_RECV_PER_SELECT (1) 175 #else /* !NONBLOCKING_IS_BROKEN */ 176 # define NUM_RECV_PER_SELECT (100) 177 #endif /* NONBLOCKING_IS_BROKEN */ 178 179 #ifndef HAVE_MMSGHDR 180 struct mmsghdr { 181 struct msghdr msg_hdr; 182 unsigned int msg_len; 183 }; 184 #endif 185 186 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 187 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 188 static struct query *queries[NUM_RECV_PER_SELECT]; 189 190 /* 191 * Data for the TCP connection handlers. 192 * 193 * The TCP handlers use non-blocking I/O. This is necessary to avoid 194 * blocking the entire server on a slow TCP connection, but does make 195 * reading from and writing to the socket more complicated. 196 * 197 * Basically, whenever a read/write would block (indicated by the 198 * EAGAIN errno variable) we remember the position we were reading 199 * from/writing to and return from the TCP reading/writing event 200 * handler. When the socket becomes readable/writable again we 201 * continue from the same position. 202 */ 203 struct tcp_handler_data 204 { 205 /* 206 * The region used to allocate all TCP connection related 207 * data, including this structure. This region is destroyed 208 * when the connection is closed. 209 */ 210 region_type* region; 211 212 /* 213 * The global nsd structure. 214 */ 215 struct nsd* nsd; 216 217 /* 218 * The current query data for this TCP connection. 219 */ 220 query_type* query; 221 222 /* 223 * The query_state is used to remember if we are performing an 224 * AXFR, if we're done processing, or if we should discard the 225 * query and connection. 226 */ 227 query_state_type query_state; 228 229 /* 230 * The event for the file descriptor and tcp timeout 231 */ 232 struct event event; 233 234 /* 235 * The bytes_transmitted field is used to remember the number 236 * of bytes transmitted when receiving or sending a DNS 237 * packet. The count includes the two additional bytes used 238 * to specify the packet length on a TCP connection. 239 */ 240 size_t bytes_transmitted; 241 242 /* 243 * The number of queries handled by this specific TCP connection. 244 */ 245 int query_count; 246 247 /* 248 * The timeout in msec for this tcp connection 249 */ 250 int tcp_timeout; 251 252 /* 253 * If the connection is allowed to have further queries on it. 254 */ 255 int tcp_no_more_queries; 256 257 #ifdef USE_DNSTAP 258 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */ 259 struct nsd_socket *socket; 260 #endif /* USE_DNSTAP */ 261 262 #ifdef HAVE_SSL 263 /* 264 * TLS object. 265 */ 266 SSL* tls; 267 268 /* 269 * TLS handshake state. 270 */ 271 enum { tls_hs_none, tls_hs_read, tls_hs_write, 272 tls_hs_read_event, tls_hs_write_event } shake_state; 273 #endif 274 /* list of connections, for service of remaining tcp channels */ 275 struct tcp_handler_data *prev, *next; 276 }; 277 /* global that is the list of active tcp channels */ 278 static struct tcp_handler_data *tcp_active_list = NULL; 279 280 /* 281 * Handle incoming queries on the UDP server sockets. 282 */ 283 static void handle_udp(int fd, short event, void* arg); 284 285 /* 286 * Handle incoming connections on the TCP sockets. These handlers 287 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 288 * connection) but are disabled when the number of current TCP 289 * connections is equal to the maximum number of TCP connections. 290 * Disabling is done by changing the handler to wait for the 291 * NETIO_EVENT_NONE type. This is done using the function 292 * configure_tcp_accept_handlers. 293 */ 294 static void handle_tcp_accept(int fd, short event, void* arg); 295 296 /* 297 * Handle incoming queries on a TCP connection. The TCP connections 298 * are configured to be non-blocking and the handler may be called 299 * multiple times before a complete query is received. 300 */ 301 static void handle_tcp_reading(int fd, short event, void* arg); 302 303 /* 304 * Handle outgoing responses on a TCP connection. The TCP connections 305 * are configured to be non-blocking and the handler may be called 306 * multiple times before a complete response is sent. 307 */ 308 static void handle_tcp_writing(int fd, short event, void* arg); 309 310 #ifdef HAVE_SSL 311 /* Create SSL object and associate fd */ 312 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 313 /* 314 * Handle TLS handshake. May be called multiple times if incomplete. 315 */ 316 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 317 318 /* 319 * Handle incoming queries on a TLS over TCP connection. The TLS 320 * connections are configured to be non-blocking and the handler may 321 * be called multiple times before a complete query is received. 322 */ 323 static void handle_tls_reading(int fd, short event, void* arg); 324 325 /* 326 * Handle outgoing responses on a TLS over TCP connection. The TLS 327 * connections are configured to be non-blocking and the handler may 328 * be called multiple times before a complete response is sent. 329 */ 330 static void handle_tls_writing(int fd, short event, void* arg); 331 #endif 332 333 /* 334 * Send all children the quit nonblocking, then close pipe. 335 */ 336 static void send_children_quit(struct nsd* nsd); 337 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 338 static void send_children_quit_and_wait(struct nsd* nsd); 339 340 /* set childrens flags to send NSD_STATS to them */ 341 #ifdef BIND8_STATS 342 static void set_children_stats(struct nsd* nsd); 343 #endif /* BIND8_STATS */ 344 345 /* 346 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 347 */ 348 static void configure_handler_event_types(short event_types); 349 350 static uint16_t *compressed_dname_offsets = 0; 351 static uint32_t compression_table_capacity = 0; 352 static uint32_t compression_table_size = 0; 353 static domain_type* compressed_dnames[MAXRRSPP]; 354 355 #ifdef USE_TCP_FASTOPEN 356 /* Checks to see if the kernel value must be manually changed in order for 357 TCP Fast Open to support server mode */ 358 static void report_tcp_fastopen_config() { 359 360 int tcp_fastopen_fp; 361 uint8_t tcp_fastopen_value; 362 363 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 364 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 365 } 366 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 367 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 368 close(tcp_fastopen_fp); 369 } 370 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 371 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 372 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n"); 373 log_msg(LOG_WARNING, "To enable TFO use the command:"); 374 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 375 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 376 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 377 close(tcp_fastopen_fp); 378 } 379 close(tcp_fastopen_fp); 380 } 381 #endif 382 383 /* 384 * Remove the specified pid from the list of child pids. Returns -1 if 385 * the pid is not in the list, child_num otherwise. The field is set to 0. 386 */ 387 static int 388 delete_child_pid(struct nsd *nsd, pid_t pid) 389 { 390 size_t i; 391 for (i = 0; i < nsd->child_count; ++i) { 392 if (nsd->children[i].pid == pid) { 393 nsd->children[i].pid = 0; 394 if(!nsd->children[i].need_to_exit) { 395 if(nsd->children[i].child_fd != -1) 396 close(nsd->children[i].child_fd); 397 nsd->children[i].child_fd = -1; 398 if(nsd->children[i].handler) 399 nsd->children[i].handler->fd = -1; 400 } 401 return i; 402 } 403 } 404 return -1; 405 } 406 407 /* 408 * Restart child servers if necessary. 409 */ 410 static int 411 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 412 int* xfrd_sock_p) 413 { 414 struct main_ipc_handler_data *ipc_data; 415 size_t i; 416 int sv[2]; 417 418 /* Fork the child processes... */ 419 for (i = 0; i < nsd->child_count; ++i) { 420 if (nsd->children[i].pid <= 0) { 421 if (nsd->children[i].child_fd != -1) 422 close(nsd->children[i].child_fd); 423 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 424 log_msg(LOG_ERR, "socketpair: %s", 425 strerror(errno)); 426 return -1; 427 } 428 nsd->children[i].child_fd = sv[0]; 429 nsd->children[i].parent_fd = sv[1]; 430 nsd->children[i].pid = fork(); 431 switch (nsd->children[i].pid) { 432 default: /* SERVER MAIN */ 433 close(nsd->children[i].parent_fd); 434 nsd->children[i].parent_fd = -1; 435 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 436 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 437 } 438 if(!nsd->children[i].handler) 439 { 440 ipc_data = (struct main_ipc_handler_data*) region_alloc( 441 region, sizeof(struct main_ipc_handler_data)); 442 ipc_data->nsd = nsd; 443 ipc_data->child = &nsd->children[i]; 444 ipc_data->child_num = i; 445 ipc_data->xfrd_sock = xfrd_sock_p; 446 ipc_data->packet = buffer_create(region, QIOBUFSZ); 447 ipc_data->forward_mode = 0; 448 ipc_data->got_bytes = 0; 449 ipc_data->total_bytes = 0; 450 ipc_data->acl_num = 0; 451 nsd->children[i].handler = (struct netio_handler*) region_alloc( 452 region, sizeof(struct netio_handler)); 453 nsd->children[i].handler->fd = nsd->children[i].child_fd; 454 nsd->children[i].handler->timeout = NULL; 455 nsd->children[i].handler->user_data = ipc_data; 456 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 457 nsd->children[i].handler->event_handler = parent_handle_child_command; 458 netio_add_handler(netio, nsd->children[i].handler); 459 } 460 /* clear any ongoing ipc */ 461 ipc_data = (struct main_ipc_handler_data*) 462 nsd->children[i].handler->user_data; 463 ipc_data->forward_mode = 0; 464 /* restart - update fd */ 465 nsd->children[i].handler->fd = nsd->children[i].child_fd; 466 break; 467 case 0: /* CHILD */ 468 /* the child need not be able to access the 469 * nsd.db file */ 470 namedb_close_udb(nsd->db); 471 #ifdef MEMCLEAN /* OS collects memory pages */ 472 region_destroy(region); 473 #endif 474 475 if (pledge("stdio rpath inet", NULL) == -1) { 476 log_msg(LOG_ERR, "pledge"); 477 exit(1); 478 } 479 480 nsd->pid = 0; 481 nsd->child_count = 0; 482 nsd->server_kind = nsd->children[i].kind; 483 nsd->this_child = &nsd->children[i]; 484 nsd->this_child->child_num = i; 485 /* remove signal flags inherited from parent 486 the parent will handle them. */ 487 nsd->signal_hint_reload_hup = 0; 488 nsd->signal_hint_reload = 0; 489 nsd->signal_hint_child = 0; 490 nsd->signal_hint_quit = 0; 491 nsd->signal_hint_shutdown = 0; 492 nsd->signal_hint_stats = 0; 493 nsd->signal_hint_statsusr = 0; 494 close(*xfrd_sock_p); 495 close(nsd->this_child->child_fd); 496 nsd->this_child->child_fd = -1; 497 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 498 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 499 } 500 server_child(nsd); 501 /* NOTREACH */ 502 exit(0); 503 case -1: 504 log_msg(LOG_ERR, "fork failed: %s", 505 strerror(errno)); 506 return -1; 507 } 508 } 509 } 510 return 0; 511 } 512 513 #ifdef BIND8_STATS 514 static void set_bind8_alarm(struct nsd* nsd) 515 { 516 /* resync so that the next alarm is on the next whole minute */ 517 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 518 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 519 } 520 #endif 521 522 /* set zone stat ids for zones initially read in */ 523 static void 524 zonestatid_tree_set(struct nsd* nsd) 525 { 526 struct radnode* n; 527 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 528 zone_type* zone = (zone_type*)n->elem; 529 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 530 } 531 } 532 533 #ifdef USE_ZONE_STATS 534 void 535 server_zonestat_alloc(struct nsd* nsd) 536 { 537 size_t num = (nsd->options->zonestatnames->count==0?1: 538 nsd->options->zonestatnames->count); 539 size_t sz = sizeof(struct nsdst)*num; 540 char tmpfile[256]; 541 uint8_t z = 0; 542 543 /* file names */ 544 nsd->zonestatfname[0] = 0; 545 nsd->zonestatfname[1] = 0; 546 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 547 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 548 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 549 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 550 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 551 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 552 553 /* file descriptors */ 554 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 555 if(nsd->zonestatfd[0] == -1) { 556 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 557 strerror(errno)); 558 exit(1); 559 } 560 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 561 if(nsd->zonestatfd[0] == -1) { 562 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 563 strerror(errno)); 564 close(nsd->zonestatfd[0]); 565 unlink(nsd->zonestatfname[0]); 566 exit(1); 567 } 568 569 #ifdef HAVE_MMAP 570 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 571 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 572 strerror(errno)); 573 exit(1); 574 } 575 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 576 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 577 nsd->zonestatfname[0], strerror(errno)); 578 exit(1); 579 } 580 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 581 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 582 strerror(errno)); 583 exit(1); 584 } 585 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 586 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 587 nsd->zonestatfname[1], strerror(errno)); 588 exit(1); 589 } 590 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 591 MAP_SHARED, nsd->zonestatfd[0], 0); 592 if(nsd->zonestat[0] == MAP_FAILED) { 593 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 594 unlink(nsd->zonestatfname[0]); 595 unlink(nsd->zonestatfname[1]); 596 exit(1); 597 } 598 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 599 MAP_SHARED, nsd->zonestatfd[1], 0); 600 if(nsd->zonestat[1] == MAP_FAILED) { 601 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 602 unlink(nsd->zonestatfname[0]); 603 unlink(nsd->zonestatfname[1]); 604 exit(1); 605 } 606 memset(nsd->zonestat[0], 0, sz); 607 memset(nsd->zonestat[1], 0, sz); 608 nsd->zonestatsize[0] = num; 609 nsd->zonestatsize[1] = num; 610 nsd->zonestatdesired = num; 611 nsd->zonestatsizenow = num; 612 nsd->zonestatnow = nsd->zonestat[0]; 613 #endif /* HAVE_MMAP */ 614 } 615 616 void 617 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 618 { 619 #ifdef HAVE_MMAP 620 #ifdef MREMAP_MAYMOVE 621 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 622 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 623 MREMAP_MAYMOVE); 624 if(nsd->zonestat[idx] == MAP_FAILED) { 625 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 626 exit(1); 627 } 628 #else /* !HAVE MREMAP */ 629 if(msync(nsd->zonestat[idx], 630 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 631 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 632 if(munmap(nsd->zonestat[idx], 633 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 634 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 635 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 636 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 637 if(nsd->zonestat[idx] == MAP_FAILED) { 638 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 639 exit(1); 640 } 641 #endif /* MREMAP */ 642 #endif /* HAVE_MMAP */ 643 } 644 645 /* realloc the zonestat array for the one that is not currently in use, 646 * to match the desired new size of the array (if applicable) */ 647 void 648 server_zonestat_realloc(struct nsd* nsd) 649 { 650 #ifdef HAVE_MMAP 651 uint8_t z = 0; 652 size_t sz; 653 int idx = 0; /* index of the zonestat array that is not in use */ 654 if(nsd->zonestatnow == nsd->zonestat[0]) 655 idx = 1; 656 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 657 return; 658 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 659 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 660 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 661 strerror(errno)); 662 exit(1); 663 } 664 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 665 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 666 nsd->zonestatfname[idx], strerror(errno)); 667 exit(1); 668 } 669 zonestat_remap(nsd, idx, sz); 670 /* zero the newly allocated region */ 671 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 672 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 673 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 674 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 675 } 676 nsd->zonestatsize[idx] = nsd->zonestatdesired; 677 #endif /* HAVE_MMAP */ 678 } 679 680 /* switchover to use the other array for the new children, that 681 * briefly coexist with the old children. And we want to avoid them 682 * both writing to the same statistics arrays. */ 683 void 684 server_zonestat_switch(struct nsd* nsd) 685 { 686 if(nsd->zonestatnow == nsd->zonestat[0]) { 687 nsd->zonestatnow = nsd->zonestat[1]; 688 nsd->zonestatsizenow = nsd->zonestatsize[1]; 689 } else { 690 nsd->zonestatnow = nsd->zonestat[0]; 691 nsd->zonestatsizenow = nsd->zonestatsize[0]; 692 } 693 } 694 #endif /* USE_ZONE_STATS */ 695 696 static void 697 cleanup_dname_compression_tables(void *ptr) 698 { 699 free(ptr); 700 compressed_dname_offsets = NULL; 701 compression_table_capacity = 0; 702 } 703 704 static void 705 initialize_dname_compression_tables(struct nsd *nsd) 706 { 707 size_t needed = domain_table_count(nsd->db->domains) + 1; 708 needed += EXTRA_DOMAIN_NUMBERS; 709 if(compression_table_capacity < needed) { 710 if(compressed_dname_offsets) { 711 region_remove_cleanup(nsd->db->region, 712 cleanup_dname_compression_tables, 713 compressed_dname_offsets); 714 free(compressed_dname_offsets); 715 } 716 compressed_dname_offsets = (uint16_t *) xmallocarray( 717 needed, sizeof(uint16_t)); 718 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 719 compressed_dname_offsets); 720 compression_table_capacity = needed; 721 compression_table_size=domain_table_count(nsd->db->domains)+1; 722 } 723 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 724 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 725 } 726 727 static int 728 set_cloexec(struct nsd_socket *sock) 729 { 730 assert(sock != NULL); 731 732 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) { 733 const char *socktype = 734 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp"; 735 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s", 736 socktype, strerror(errno)); 737 return -1; 738 } 739 740 return 1; 741 } 742 743 static int 744 set_reuseport(struct nsd_socket *sock) 745 { 746 #ifdef SO_REUSEPORT 747 int on = 1; 748 #ifdef SO_REUSEPORT_LB 749 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 750 * SO_REUSEPORT on Linux. This is what the users want with the config 751 * option in nsd.conf; if we actually need local address and port reuse 752 * they'll also need to have SO_REUSEPORT set for them, assume it was 753 * _LB they want. 754 */ 755 int opt = SO_REUSEPORT_LB; 756 static const char optname[] = "SO_REUSEPORT_LB"; 757 #else /* !SO_REUSEPORT_LB */ 758 int opt = SO_REUSEPORT; 759 static const char optname[] = "SO_REUSEPORT"; 760 #endif /* SO_REUSEPORT_LB */ 761 762 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 763 return 1; 764 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 765 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 766 optname, strerror(errno)); 767 } 768 return -1; 769 #else 770 (void)sock; 771 #endif /* SO_REUSEPORT */ 772 773 return 0; 774 } 775 776 static int 777 set_reuseaddr(struct nsd_socket *sock) 778 { 779 #ifdef SO_REUSEADDR 780 int on = 1; 781 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 782 return 1; 783 } 784 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 785 strerror(errno)); 786 return -1; 787 #endif /* SO_REUSEADDR */ 788 return 0; 789 } 790 791 static int 792 set_rcvbuf(struct nsd_socket *sock, int rcv) 793 { 794 #ifdef SO_RCVBUF 795 #ifdef SO_RCVBUFFORCE 796 if(0 == setsockopt( 797 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 798 { 799 return 1; 800 } 801 if(errno == EPERM || errno == ENOBUFS) { 802 return 0; 803 } 804 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 805 strerror(errno)); 806 return -1; 807 #else /* !SO_RCVBUFFORCE */ 808 if (0 == setsockopt( 809 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 810 { 811 return 1; 812 } 813 if(errno == ENOSYS || errno == ENOBUFS) { 814 return 0; 815 } 816 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 817 strerror(errno)); 818 return -1; 819 #endif /* SO_RCVBUFFORCE */ 820 #endif /* SO_RCVBUF */ 821 822 return 0; 823 } 824 825 static int 826 set_sndbuf(struct nsd_socket *sock, int snd) 827 { 828 #ifdef SO_SNDBUF 829 #ifdef SO_SNDBUFFORCE 830 if(0 == setsockopt( 831 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 832 { 833 return 1; 834 } 835 if(errno == EPERM || errno == ENOBUFS) { 836 return 0; 837 } 838 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 839 strerror(errno)); 840 return -1; 841 #else /* !SO_SNDBUFFORCE */ 842 if(0 == setsockopt( 843 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 844 { 845 return 1; 846 } 847 if(errno == ENOSYS || errno == ENOBUFS) { 848 return 0; 849 } 850 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 851 strerror(errno)); 852 return -1; 853 #endif /* SO_SNDBUFFORCE */ 854 #endif /* SO_SNDBUF */ 855 856 return 0; 857 } 858 859 static int 860 set_nonblock(struct nsd_socket *sock) 861 { 862 const char *socktype = 863 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 864 865 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 866 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 867 socktype, strerror(errno)); 868 return -1; 869 } 870 871 return 1; 872 } 873 874 #ifdef INET6 875 static int 876 set_ipv6_v6only(struct nsd_socket *sock) 877 { 878 #ifdef IPV6_V6ONLY 879 int on = 1; 880 const char *socktype = 881 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 882 883 if(0 == setsockopt( 884 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 885 { 886 return 1; 887 } 888 889 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 890 socktype, strerror(errno)); 891 return -1; 892 #else 893 (void)sock; 894 #endif /* IPV6_V6ONLY */ 895 896 return 0; 897 } 898 #endif /* INET6 */ 899 900 #ifdef INET6 901 static int 902 set_ipv6_use_min_mtu(struct nsd_socket *sock) 903 { 904 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) 905 #if defined(IPV6_USE_MIN_MTU) 906 /* There is no fragmentation of IPv6 datagrams during forwarding in the 907 * network. Therefore we do not send UDP datagrams larger than the 908 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 909 * larger if the network stack supports IPV6_USE_MIN_MTU. 910 */ 911 int opt = IPV6_USE_MIN_MTU; 912 int optval = 1; 913 static const char optname[] = "IPV6_USE_MIN_MTU"; 914 #elif defined(IPV6_MTU) 915 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 916 * to the MIN MTU to get the same. 917 */ 918 int opt = IPV6_MTU; 919 int optval = IPV6_MIN_MTU; 920 static const char optname[] = "IPV6_MTU"; 921 #endif 922 if(0 == setsockopt( 923 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 924 { 925 return 1; 926 } 927 928 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 929 optname, strerror(errno)); 930 return -1; 931 #else 932 (void)sock; 933 #endif /* INET6 */ 934 935 return 0; 936 } 937 #endif /* INET6 */ 938 939 static int 940 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 941 { 942 int ret = 0; 943 944 #if defined(IP_MTU_DISCOVER) 945 int opt = IP_MTU_DISCOVER; 946 int optval; 947 # if defined(IP_PMTUDISC_OMIT) 948 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 949 * information and send packets with DF=0. Fragmentation is allowed if 950 * and only if the packet size exceeds the outgoing interface MTU or 951 * the packet encounters smaller MTU link in network. This mitigates 952 * DNS fragmentation attacks by preventing forged PMTU information. 953 * FreeBSD already has same semantics without setting the option. 954 */ 955 optval = IP_PMTUDISC_OMIT; 956 if(0 == setsockopt( 957 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 958 { 959 return 1; 960 } 961 962 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 963 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 964 # endif /* IP_PMTUDISC_OMIT */ 965 # if defined(IP_PMTUDISC_DONT) 966 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 967 optval = IP_PMTUDISC_DONT; 968 if(0 == setsockopt( 969 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 970 { 971 return 1; 972 } 973 974 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 975 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 976 # endif 977 ret = -1; 978 #elif defined(IP_DONTFRAG) 979 int off = 0; 980 if (0 == setsockopt( 981 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 982 { 983 return 1; 984 } 985 986 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 987 strerror(errno)); 988 ret = -1; 989 #else 990 (void)sock; 991 #endif 992 993 return ret; 994 } 995 996 static int 997 set_ip_freebind(struct nsd_socket *sock) 998 { 999 #ifdef IP_FREEBIND 1000 int on = 1; 1001 const char *socktype = 1002 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1003 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 1004 { 1005 return 1; 1006 } 1007 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 1008 socktype, strerror(errno)); 1009 return -1; 1010 #else 1011 (void)sock; 1012 #endif /* IP_FREEBIND */ 1013 1014 return 0; 1015 } 1016 1017 static int 1018 set_ip_transparent(struct nsd_socket *sock) 1019 { 1020 /* 1021 The scandalous preprocessor blob here calls for some explanation :) 1022 POSIX does not specify an option to bind non-local IPs, so 1023 platforms developed several implementation-specific options, 1024 all set in the same way, but with different names. 1025 For additional complexity, some platform manage this setting 1026 differently for different address families (IPv4 vs IPv6). 1027 This scandalous preprocessor blob below abstracts such variability 1028 in the way which leaves the C code as lean and clear as possible. 1029 */ 1030 1031 #if defined(IP_TRANSPARENT) 1032 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT 1033 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1034 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT" 1035 // as of 2020-01, Linux does not support this on IPv6 programmatically 1036 #elif defined(SO_BINDANY) 1037 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY 1038 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET 1039 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY" 1040 #elif defined(IP_BINDANY) 1041 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY 1042 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY 1043 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1044 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6 1045 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY" 1046 #endif 1047 1048 #ifndef NSD_SOCKET_OPTION_TRANSPARENT 1049 (void)sock; 1050 #else 1051 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6 1052 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT 1053 # endif 1054 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 1055 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL 1056 # endif 1057 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6 1058 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME 1059 # endif 1060 1061 int on = 1; 1062 const char *socktype = 1063 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1064 const int is_ip6 = (sock->addr.ai_family == AF_INET6); 1065 1066 if(0 == setsockopt( 1067 sock->s, 1068 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL, 1069 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT, 1070 &on, sizeof(on))) 1071 { 1072 return 1; 1073 } 1074 1075 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 1076 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno)); 1077 return -1; 1078 #endif 1079 1080 return 0; 1081 } 1082 1083 static int 1084 set_tcp_maxseg(struct nsd_socket *sock, int mss) 1085 { 1086 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 1087 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 1088 return 1; 1089 } 1090 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 1091 strerror(errno)); 1092 return -1; 1093 #else 1094 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 1095 #endif 1096 return 0; 1097 } 1098 1099 #ifdef USE_TCP_FASTOPEN 1100 static int 1101 set_tcp_fastopen(struct nsd_socket *sock) 1102 { 1103 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1104 * a defense against IP spoofing attacks as suggested in RFC7413. 1105 */ 1106 int qlen; 1107 1108 #ifdef __APPLE__ 1109 /* macOS X implementation only supports qlen of 1 via this call. The 1110 * actual value is configured by the net.inet.tcp.fastopen_backlog 1111 * kernel parameter. 1112 */ 1113 qlen = 1; 1114 #else 1115 /* 5 is recommended on Linux. */ 1116 qlen = 5; 1117 #endif 1118 if (0 == setsockopt( 1119 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1120 { 1121 return 1; 1122 } 1123 1124 if (errno == EPERM) { 1125 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1126 "; this could likely be because sysctl " 1127 "net.inet.tcp.fastopen.enabled, " 1128 "net.inet.tcp.fastopen.server_enable, or " 1129 "net.ipv4.tcp_fastopen is disabled", 1130 strerror(errno)); 1131 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1132 * disabled, except when verbosity enabled for debugging 1133 */ 1134 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1135 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1136 strerror(errno)); 1137 } 1138 1139 return (errno == ENOPROTOOPT ? 0 : -1); 1140 } 1141 #endif /* USE_TCP_FASTOPEN */ 1142 1143 static int 1144 set_bindtodevice(struct nsd_socket *sock) 1145 { 1146 #if defined(SO_BINDTODEVICE) 1147 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE, 1148 sock->device, strlen(sock->device)) == -1) 1149 { 1150 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1151 "SO_BINDTODEVICE", sock->device, strerror(errno)); 1152 return -1; 1153 } 1154 1155 return 1; 1156 #else 1157 (void)sock; 1158 return 0; 1159 #endif 1160 } 1161 1162 static int 1163 set_setfib(struct nsd_socket *sock) 1164 { 1165 #if defined(SO_SETFIB) 1166 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB, 1167 (const void *)&sock->fib, sizeof(sock->fib)) == -1) 1168 { 1169 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s", 1170 "SO_SETFIB", sock->fib, strerror(errno)); 1171 return -1; 1172 } 1173 1174 return 1; 1175 #else 1176 (void)sock; 1177 return 0; 1178 #endif 1179 } 1180 1181 static int 1182 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1183 { 1184 int rcv = 1*1024*1024, snd = 1*1024*1024; 1185 1186 if(-1 == (sock->s = socket( 1187 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1188 { 1189 #ifdef INET6 1190 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1191 (sock->addr.ai_family == AF_INET6) && 1192 (errno == EAFNOSUPPORT)) 1193 { 1194 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1195 "not supported"); 1196 return 0; 1197 } 1198 #endif 1199 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1200 return -1; 1201 } 1202 1203 set_cloexec(sock); 1204 1205 if(nsd->reuseport && reuseport_works && *reuseport_works) 1206 *reuseport_works = (set_reuseport(sock) == 1); 1207 1208 if(nsd->options->receive_buffer_size > 0) 1209 rcv = nsd->options->receive_buffer_size; 1210 if(set_rcvbuf(sock, rcv) == -1) 1211 return -1; 1212 1213 if(nsd->options->send_buffer_size > 0) 1214 snd = nsd->options->send_buffer_size; 1215 if(set_sndbuf(sock, snd) == -1) 1216 return -1; 1217 #ifdef INET6 1218 if(sock->addr.ai_family == AF_INET6) { 1219 if(set_ipv6_v6only(sock) == -1 || 1220 set_ipv6_use_min_mtu(sock) == -1) 1221 return -1; 1222 } else 1223 #endif /* INET6 */ 1224 if(sock->addr.ai_family == AF_INET) { 1225 if(set_ipv4_no_pmtu_disc(sock) == -1) 1226 return -1; 1227 } 1228 1229 /* Set socket to non-blocking. Otherwise, on operating systems 1230 * with thundering herd problems, the UDP recv could block 1231 * after select returns readable. 1232 */ 1233 set_nonblock(sock); 1234 1235 if(nsd->options->ip_freebind) 1236 (void)set_ip_freebind(sock); 1237 if(nsd->options->ip_transparent) 1238 (void)set_ip_transparent(sock); 1239 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1240 return -1; 1241 if(sock->fib != -1 && set_setfib(sock) == -1) 1242 return -1; 1243 1244 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1245 char buf[256]; 1246 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1247 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1248 buf, strerror(errno)); 1249 return -1; 1250 } 1251 1252 return 1; 1253 } 1254 1255 static int 1256 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1257 { 1258 #ifdef USE_TCP_FASTOPEN 1259 report_tcp_fastopen_config(); 1260 #endif 1261 1262 (void)reuseport_works; 1263 1264 if(-1 == (sock->s = socket( 1265 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1266 { 1267 #ifdef INET6 1268 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1269 (sock->addr.ai_family == AF_INET6) && 1270 (errno == EAFNOSUPPORT)) 1271 { 1272 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1273 "not supported"); 1274 return 0; 1275 } 1276 #endif /* INET6 */ 1277 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1278 return -1; 1279 } 1280 1281 set_cloexec(sock); 1282 1283 if(nsd->reuseport && reuseport_works && *reuseport_works) 1284 *reuseport_works = (set_reuseport(sock) == 1); 1285 1286 (void)set_reuseaddr(sock); 1287 1288 #ifdef INET6 1289 if(sock->addr.ai_family == AF_INET6) { 1290 if (set_ipv6_v6only(sock) == -1 || 1291 set_ipv6_use_min_mtu(sock) == -1) 1292 return -1; 1293 } 1294 #endif 1295 1296 if(nsd->tcp_mss > 0) 1297 set_tcp_maxseg(sock, nsd->tcp_mss); 1298 /* (StevensUNP p463), if TCP listening socket is blocking, then 1299 it may block in accept, even if select() says readable. */ 1300 (void)set_nonblock(sock); 1301 if(nsd->options->ip_freebind) 1302 (void)set_ip_freebind(sock); 1303 if(nsd->options->ip_transparent) 1304 (void)set_ip_transparent(sock); 1305 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1306 return -1; 1307 if(sock->fib != -1 && set_setfib(sock) == -1) 1308 return -1; 1309 1310 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1311 char buf[256]; 1312 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1313 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1314 buf, strerror(errno)); 1315 return -1; 1316 } 1317 1318 #ifdef USE_TCP_FASTOPEN 1319 (void)set_tcp_fastopen(sock); 1320 #endif 1321 1322 if(listen(sock->s, TCP_BACKLOG) == -1) { 1323 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1324 return -1; 1325 } 1326 1327 return 1; 1328 } 1329 1330 /* 1331 * Initialize the server, reuseport, create and bind the sockets. 1332 */ 1333 int 1334 server_init(struct nsd *nsd) 1335 { 1336 size_t i; 1337 int reuseport = 1; /* Determine if REUSEPORT works. */ 1338 1339 /* open server interface ports */ 1340 for(i = 0; i < nsd->ifs; i++) { 1341 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1342 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1343 { 1344 return -1; 1345 } 1346 } 1347 1348 if(nsd->reuseport && reuseport) { 1349 size_t ifs = nsd->ifs * nsd->reuseport; 1350 1351 /* increase the size of the interface arrays, there are going 1352 * to be separate interface file descriptors for every server 1353 * instance */ 1354 region_remove_cleanup(nsd->region, free, nsd->udp); 1355 region_remove_cleanup(nsd->region, free, nsd->tcp); 1356 1357 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1358 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1359 region_add_cleanup(nsd->region, free, nsd->udp); 1360 region_add_cleanup(nsd->region, free, nsd->tcp); 1361 if(ifs > nsd->ifs) { 1362 memset(&nsd->udp[nsd->ifs], 0, 1363 (ifs-nsd->ifs)*sizeof(*nsd->udp)); 1364 memset(&nsd->tcp[nsd->ifs], 0, 1365 (ifs-nsd->ifs)*sizeof(*nsd->tcp)); 1366 } 1367 1368 for(i = nsd->ifs; i < ifs; i++) { 1369 nsd->udp[i] = nsd->udp[i%nsd->ifs]; 1370 nsd->udp[i].s = -1; 1371 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1372 return -1; 1373 } 1374 /* Turn off REUSEPORT for TCP by copying the socket 1375 * file descriptor. 1376 * This means we should not close TCP used by 1377 * other servers in reuseport enabled mode, in 1378 * server_child(). 1379 */ 1380 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1381 } 1382 1383 nsd->ifs = ifs; 1384 } else { 1385 nsd->reuseport = 0; 1386 } 1387 1388 /* open server interface ports for verifiers */ 1389 for(i = 0; i < nsd->verify_ifs; i++) { 1390 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 || 1391 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1) 1392 { 1393 return -1; 1394 } 1395 } 1396 1397 return 0; 1398 } 1399 1400 /* 1401 * Prepare the server for take off. 1402 * 1403 */ 1404 int 1405 server_prepare(struct nsd *nsd) 1406 { 1407 #ifdef RATELIMIT 1408 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1409 #ifdef HAVE_GETRANDOM 1410 uint32_t v; 1411 if(getrandom(&v, sizeof(v), 0) == -1) { 1412 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno)); 1413 exit(1); 1414 } 1415 hash_set_raninit(v); 1416 #elif defined(HAVE_ARC4RANDOM) 1417 hash_set_raninit(arc4random()); 1418 #else 1419 uint32_t v = getpid() ^ time(NULL); 1420 srandom((unsigned long)v); 1421 # ifdef HAVE_SSL 1422 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1423 hash_set_raninit(v); 1424 else 1425 # endif 1426 hash_set_raninit(random()); 1427 #endif 1428 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1429 nsd->options->rrl_ratelimit, 1430 nsd->options->rrl_whitelist_ratelimit, 1431 nsd->options->rrl_slip, 1432 nsd->options->rrl_ipv4_prefix_length, 1433 nsd->options->rrl_ipv6_prefix_length); 1434 #endif /* RATELIMIT */ 1435 1436 /* Open the database... */ 1437 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1438 log_msg(LOG_ERR, "unable to open the database %s: %s", 1439 nsd->dbfile, strerror(errno)); 1440 unlink(nsd->task[0]->fname); 1441 unlink(nsd->task[1]->fname); 1442 #ifdef USE_ZONE_STATS 1443 unlink(nsd->zonestatfname[0]); 1444 unlink(nsd->zonestatfname[1]); 1445 #endif 1446 xfrd_del_tempdir(nsd); 1447 return -1; 1448 } 1449 /* check if zone files have been modified */ 1450 /* NULL for taskudb because we send soainfo in a moment, batched up, 1451 * for all zones */ 1452 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1453 nsd->options->database[0] == 0)) 1454 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1455 zonestatid_tree_set(nsd); 1456 1457 compression_table_capacity = 0; 1458 initialize_dname_compression_tables(nsd); 1459 1460 #ifdef BIND8_STATS 1461 /* Initialize times... */ 1462 time(&nsd->st.boot); 1463 set_bind8_alarm(nsd); 1464 #endif /* BIND8_STATS */ 1465 1466 return 0; 1467 } 1468 1469 /* 1470 * Fork the required number of servers. 1471 */ 1472 static int 1473 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1474 int* xfrd_sock_p) 1475 { 1476 size_t i; 1477 1478 /* Start all child servers initially. */ 1479 for (i = 0; i < nsd->child_count; ++i) { 1480 nsd->children[i].pid = 0; 1481 } 1482 1483 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1484 } 1485 1486 static void 1487 server_close_socket(struct nsd_socket *sock) 1488 { 1489 if(sock->s != -1) { 1490 close(sock->s); 1491 sock->s = -1; 1492 } 1493 } 1494 1495 void 1496 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1497 { 1498 size_t i; 1499 1500 /* Close all the sockets... */ 1501 for (i = 0; i < n; ++i) { 1502 server_close_socket(&sockets[i]); 1503 } 1504 } 1505 1506 /* 1507 * Close the sockets, shutdown the server and exit. 1508 * Does not return. 1509 */ 1510 void 1511 server_shutdown(struct nsd *nsd) 1512 { 1513 size_t i; 1514 1515 server_close_all_sockets(nsd->udp, nsd->ifs); 1516 server_close_all_sockets(nsd->tcp, nsd->ifs); 1517 /* CHILD: close command channel to parent */ 1518 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1519 { 1520 close(nsd->this_child->parent_fd); 1521 nsd->this_child->parent_fd = -1; 1522 } 1523 /* SERVER: close command channels to children */ 1524 if(!nsd->this_child) 1525 { 1526 for(i=0; i < nsd->child_count; ++i) 1527 if(nsd->children[i].child_fd != -1) 1528 { 1529 close(nsd->children[i].child_fd); 1530 nsd->children[i].child_fd = -1; 1531 } 1532 } 1533 1534 tsig_finalize(); 1535 #ifdef HAVE_SSL 1536 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1537 if (nsd->tls_ctx) 1538 SSL_CTX_free(nsd->tls_ctx); 1539 #endif 1540 1541 #ifdef MEMCLEAN /* OS collects memory pages */ 1542 #ifdef RATELIMIT 1543 rrl_mmap_deinit_keep_mmap(); 1544 #endif 1545 #ifdef USE_DNSTAP 1546 dt_collector_destroy(nsd->dt_collector, nsd); 1547 #endif 1548 udb_base_free_keep_mmap(nsd->task[0]); 1549 udb_base_free_keep_mmap(nsd->task[1]); 1550 namedb_free_ixfr(nsd->db); 1551 namedb_close_udb(nsd->db); /* keeps mmap */ 1552 namedb_close(nsd->db); 1553 nsd_options_destroy(nsd->options); 1554 region_destroy(nsd->region); 1555 #endif 1556 log_finalize(); 1557 exit(0); 1558 } 1559 1560 void 1561 server_prepare_xfrd(struct nsd* nsd) 1562 { 1563 char tmpfile[256]; 1564 /* create task mmaps */ 1565 nsd->mytask = 0; 1566 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1567 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1568 nsd->task[0] = task_file_create(tmpfile); 1569 if(!nsd->task[0]) { 1570 #ifdef USE_ZONE_STATS 1571 unlink(nsd->zonestatfname[0]); 1572 unlink(nsd->zonestatfname[1]); 1573 #endif 1574 xfrd_del_tempdir(nsd); 1575 exit(1); 1576 } 1577 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1578 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1579 nsd->task[1] = task_file_create(tmpfile); 1580 if(!nsd->task[1]) { 1581 unlink(nsd->task[0]->fname); 1582 #ifdef USE_ZONE_STATS 1583 unlink(nsd->zonestatfname[0]); 1584 unlink(nsd->zonestatfname[1]); 1585 #endif 1586 xfrd_del_tempdir(nsd); 1587 exit(1); 1588 } 1589 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1590 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1591 /* create xfrd listener structure */ 1592 nsd->xfrd_listener = region_alloc(nsd->region, 1593 sizeof(netio_handler_type)); 1594 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1595 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1596 nsd->xfrd_listener->fd = -1; 1597 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1598 nsd; 1599 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1600 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1601 } 1602 1603 1604 void 1605 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1606 { 1607 pid_t pid; 1608 int sockets[2] = {0,0}; 1609 struct ipc_handler_conn_data *data; 1610 1611 if(nsd->xfrd_listener->fd != -1) 1612 close(nsd->xfrd_listener->fd); 1613 if(del_db) { 1614 /* recreate taskdb that xfrd was using, it may be corrupt */ 1615 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1616 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1617 nsd->task[1-nsd->mytask]->fname = NULL; 1618 /* free alloc already, so udb does not shrink itself */ 1619 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1620 nsd->task[1-nsd->mytask]->alloc = NULL; 1621 udb_base_free(nsd->task[1-nsd->mytask]); 1622 /* create new file, overwrite the old one */ 1623 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1624 free(tmpfile); 1625 } 1626 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1627 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1628 return; 1629 } 1630 pid = fork(); 1631 switch (pid) { 1632 case -1: 1633 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1634 break; 1635 default: 1636 /* PARENT: close first socket, use second one */ 1637 close(sockets[0]); 1638 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1639 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1640 } 1641 if(del_db) xfrd_free_namedb(nsd); 1642 /* use other task than I am using, since if xfrd died and is 1643 * restarted, the reload is using nsd->mytask */ 1644 nsd->mytask = 1 - nsd->mytask; 1645 1646 #ifdef HAVE_SETPROCTITLE 1647 setproctitle("xfrd"); 1648 #endif 1649 #ifdef HAVE_CPUSET_T 1650 if(nsd->use_cpu_affinity) { 1651 set_cpu_affinity(nsd->xfrd_cpuset); 1652 } 1653 #endif 1654 1655 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1656 /* ENOTREACH */ 1657 break; 1658 case 0: 1659 /* CHILD: close second socket, use first one */ 1660 close(sockets[1]); 1661 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1662 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1663 } 1664 nsd->xfrd_listener->fd = sockets[0]; 1665 break; 1666 } 1667 /* server-parent only */ 1668 nsd->xfrd_listener->timeout = NULL; 1669 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1670 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1671 /* clear ongoing ipc reads */ 1672 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1673 data->conn->is_reading = 0; 1674 } 1675 1676 /** add all soainfo to taskdb */ 1677 static void 1678 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1679 { 1680 struct radnode* n; 1681 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1682 /* add all SOA INFO to mytask */ 1683 udb_ptr_init(&task_last, taskudb); 1684 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1685 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1686 } 1687 udb_ptr_unlink(&task_last, taskudb); 1688 } 1689 1690 void 1691 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1692 { 1693 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1694 * parent fills one taskdb with soas, xfrd fills other with expires. 1695 * then they exchange and process. 1696 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1697 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1698 * expire notifications can be sent back via a normal reload later 1699 * (xfrd will wait for current running reload to finish if any). 1700 */ 1701 sig_atomic_t cmd = 0; 1702 pid_t mypid; 1703 int xfrd_sock = nsd->xfrd_listener->fd; 1704 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1705 udb_ptr t; 1706 if(!shortsoa) { 1707 if(nsd->signal_hint_shutdown) { 1708 shutdown: 1709 log_msg(LOG_WARNING, "signal received, shutting down..."); 1710 server_close_all_sockets(nsd->udp, nsd->ifs); 1711 server_close_all_sockets(nsd->tcp, nsd->ifs); 1712 #ifdef HAVE_SSL 1713 daemon_remote_close(nsd->rc); 1714 #endif 1715 /* Unlink it if possible... */ 1716 unlinkpid(nsd->pidfile); 1717 unlink(nsd->task[0]->fname); 1718 unlink(nsd->task[1]->fname); 1719 #ifdef USE_ZONE_STATS 1720 unlink(nsd->zonestatfname[0]); 1721 unlink(nsd->zonestatfname[1]); 1722 #endif 1723 /* write the nsd.db to disk, wait for it to complete */ 1724 udb_base_sync(nsd->db->udb, 1); 1725 udb_base_close(nsd->db->udb); 1726 server_shutdown(nsd); 1727 /* ENOTREACH */ 1728 exit(0); 1729 } 1730 } 1731 if(shortsoa) { 1732 /* put SOA in xfrd task because mytask may be in use */ 1733 taskudb = nsd->task[1-nsd->mytask]; 1734 } 1735 1736 add_all_soa_to_task(nsd, taskudb); 1737 if(!shortsoa) { 1738 /* wait for xfrd to signal task is ready, RELOAD signal */ 1739 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1740 cmd != NSD_RELOAD) { 1741 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1742 exit(1); 1743 } 1744 if(nsd->signal_hint_shutdown) { 1745 goto shutdown; 1746 } 1747 } 1748 /* give xfrd our task, signal it with RELOAD_DONE */ 1749 task_process_sync(taskudb); 1750 cmd = NSD_RELOAD_DONE; 1751 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1752 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1753 (int)nsd->pid, strerror(errno)); 1754 } 1755 mypid = getpid(); 1756 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1757 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1758 strerror(errno)); 1759 } 1760 1761 if(!shortsoa) { 1762 /* process the xfrd task works (expiry data) */ 1763 nsd->mytask = 1 - nsd->mytask; 1764 taskudb = nsd->task[nsd->mytask]; 1765 task_remap(taskudb); 1766 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1767 while(!udb_ptr_is_null(&t)) { 1768 task_process_expire(nsd->db, TASKLIST(&t)); 1769 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1770 } 1771 udb_ptr_unlink(&t, taskudb); 1772 task_clear(taskudb); 1773 1774 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1775 cmd = NSD_RELOAD_DONE; 1776 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1777 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1778 (int)nsd->pid, strerror(errno)); 1779 } 1780 } 1781 } 1782 1783 #ifdef HAVE_SSL 1784 static void 1785 log_crypto_from_err(const char* str, unsigned long err) 1786 { 1787 /* error:[error code]:[library name]:[function name]:[reason string] */ 1788 char buf[128]; 1789 unsigned long e; 1790 ERR_error_string_n(err, buf, sizeof(buf)); 1791 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1792 while( (e=ERR_get_error()) ) { 1793 ERR_error_string_n(e, buf, sizeof(buf)); 1794 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1795 } 1796 } 1797 1798 void 1799 log_crypto_err(const char* str) 1800 { 1801 log_crypto_from_err(str, ERR_get_error()); 1802 } 1803 1804 /** true if the ssl handshake error has to be squelched from the logs */ 1805 static int 1806 squelch_err_ssl_handshake(unsigned long err) 1807 { 1808 if(verbosity >= 3) 1809 return 0; /* only squelch on low verbosity */ 1810 /* this is very specific, we could filter on ERR_GET_REASON() 1811 * (the third element in ERR_PACK) */ 1812 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1813 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1814 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1815 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1816 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1817 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1818 #endif 1819 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1820 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1821 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1822 # ifdef SSL_R_VERSION_TOO_LOW 1823 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1824 # endif 1825 #endif 1826 ) 1827 return 1; 1828 return 0; 1829 } 1830 1831 void 1832 perform_openssl_init(void) 1833 { 1834 /* init SSL library */ 1835 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1836 ERR_load_crypto_strings(); 1837 #endif 1838 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS) 1839 ERR_load_SSL_strings(); 1840 #endif 1841 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1842 OpenSSL_add_all_algorithms(); 1843 #else 1844 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1845 | OPENSSL_INIT_ADD_ALL_DIGESTS 1846 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1847 #endif 1848 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1849 (void)SSL_library_init(); 1850 #else 1851 OPENSSL_init_ssl(0, NULL); 1852 #endif 1853 1854 if(!RAND_status()) { 1855 /* try to seed it */ 1856 unsigned char buf[256]; 1857 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1858 size_t i; 1859 v = seed; 1860 for(i=0; i<256/sizeof(v); i++) { 1861 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1862 v = v*seed + (unsigned int)i; 1863 } 1864 RAND_seed(buf, 256); 1865 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1866 } 1867 } 1868 1869 static int 1870 get_ocsp(char *filename, unsigned char **ocsp) 1871 { 1872 BIO *bio; 1873 OCSP_RESPONSE *response; 1874 int len = -1; 1875 unsigned char *p, *buf; 1876 assert(filename); 1877 1878 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1879 log_crypto_err("get_ocsp: BIO_new_file failed"); 1880 return -1; 1881 } 1882 1883 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1884 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1885 BIO_free(bio); 1886 return -1; 1887 } 1888 1889 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1890 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1891 OCSP_RESPONSE_free(response); 1892 BIO_free(bio); 1893 return -1; 1894 } 1895 1896 if ((buf = malloc((size_t) len)) == NULL) { 1897 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1898 OCSP_RESPONSE_free(response); 1899 BIO_free(bio); 1900 return -1; 1901 } 1902 1903 p = buf; 1904 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1905 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1906 free(buf); 1907 OCSP_RESPONSE_free(response); 1908 BIO_free(bio); 1909 return -1; 1910 } 1911 1912 OCSP_RESPONSE_free(response); 1913 BIO_free(bio); 1914 1915 *ocsp = buf; 1916 return len; 1917 } 1918 1919 /* further setup ssl ctx after the keys are loaded */ 1920 static void 1921 listen_sslctx_setup_2(void* ctxt) 1922 { 1923 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1924 (void)ctx; 1925 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1926 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1927 /* ENOTREACH */ 1928 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1929 } 1930 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1931 if(1) { 1932 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1933 if (!ecdh) { 1934 log_crypto_err("could not find p256, not enabling ECDHE"); 1935 } else { 1936 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1937 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1938 } 1939 EC_KEY_free (ecdh); 1940 } 1941 } 1942 #endif 1943 } 1944 1945 static int 1946 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1947 { 1948 if(ocspdata) { 1949 unsigned char *p; 1950 if ((p=malloc(ocspdata_len)) == NULL) { 1951 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1952 return SSL_TLSEXT_ERR_NOACK; 1953 } 1954 memcpy(p, ocspdata, ocspdata_len); 1955 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1956 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1957 free(p); 1958 return SSL_TLSEXT_ERR_NOACK; 1959 } 1960 return SSL_TLSEXT_ERR_OK; 1961 } else { 1962 return SSL_TLSEXT_ERR_NOACK; 1963 } 1964 } 1965 1966 SSL_CTX* 1967 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1968 { 1969 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1970 if(!ctx) { 1971 log_crypto_err("could not SSL_CTX_new"); 1972 return NULL; 1973 } 1974 /* no SSLv2, SSLv3 because has defects */ 1975 #if SSL_OP_NO_SSLv2 != 0 1976 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1977 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1978 SSL_CTX_free(ctx); 1979 return NULL; 1980 } 1981 #endif 1982 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1983 != SSL_OP_NO_SSLv3){ 1984 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1985 SSL_CTX_free(ctx); 1986 return 0; 1987 } 1988 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1989 /* if we have tls 1.1 disable 1.0 */ 1990 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1991 != SSL_OP_NO_TLSv1){ 1992 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1993 SSL_CTX_free(ctx); 1994 return 0; 1995 } 1996 #endif 1997 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1998 /* if we have tls 1.2 disable 1.1 */ 1999 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 2000 != SSL_OP_NO_TLSv1_1){ 2001 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 2002 SSL_CTX_free(ctx); 2003 return 0; 2004 } 2005 #endif 2006 #if defined(SSL_OP_NO_RENEGOTIATION) 2007 /* disable client renegotiation */ 2008 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 2009 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 2010 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 2011 SSL_CTX_free(ctx); 2012 return 0; 2013 } 2014 #endif 2015 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 2016 /* if we detect system-wide crypto policies, use those */ 2017 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) { 2018 /* if we have sha256, set the cipher list to have no known vulns */ 2019 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 2020 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 2021 } 2022 #endif 2023 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 2024 SSL_OP_CIPHER_SERVER_PREFERENCE) != 2025 SSL_OP_CIPHER_SERVER_PREFERENCE) { 2026 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 2027 SSL_CTX_free(ctx); 2028 return 0; 2029 } 2030 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 2031 SSL_CTX_set_security_level(ctx, 0); 2032 #endif 2033 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 2034 log_msg(LOG_ERR, "error for cert file: %s", pem); 2035 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 2036 SSL_CTX_free(ctx); 2037 return NULL; 2038 } 2039 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 2040 log_msg(LOG_ERR, "error for private key file: %s", key); 2041 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 2042 SSL_CTX_free(ctx); 2043 return NULL; 2044 } 2045 if(!SSL_CTX_check_private_key(ctx)) { 2046 log_msg(LOG_ERR, "error for key file: %s", key); 2047 log_crypto_err("Error in SSL_CTX check_private_key"); 2048 SSL_CTX_free(ctx); 2049 return NULL; 2050 } 2051 listen_sslctx_setup_2(ctx); 2052 if(verifypem && verifypem[0]) { 2053 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 2054 log_crypto_err("Error in SSL_CTX verify locations"); 2055 SSL_CTX_free(ctx); 2056 return NULL; 2057 } 2058 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 2059 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 2060 } 2061 return ctx; 2062 } 2063 2064 SSL_CTX* 2065 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 2066 { 2067 char *key, *pem; 2068 SSL_CTX *ctx; 2069 2070 key = nsd->options->tls_service_key; 2071 pem = nsd->options->tls_service_pem; 2072 if(!key || key[0] == 0) { 2073 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 2074 return NULL; 2075 } 2076 if(!pem || pem[0] == 0) { 2077 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 2078 return NULL; 2079 } 2080 2081 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 2082 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 2083 ctx = server_tls_ctx_setup(key, pem, verifypem); 2084 if(!ctx) { 2085 log_msg(LOG_ERR, "could not setup server TLS context"); 2086 return NULL; 2087 } 2088 if(ocspfile && ocspfile[0]) { 2089 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 2090 log_crypto_err("Error reading OCSPfile"); 2091 SSL_CTX_free(ctx); 2092 return NULL; 2093 } else { 2094 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 2095 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 2096 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 2097 SSL_CTX_free(ctx); 2098 return NULL; 2099 } 2100 } 2101 } 2102 return ctx; 2103 } 2104 2105 /* check if tcp_handler_accept_data created for TLS dedicated port */ 2106 int 2107 using_tls_port(struct sockaddr* addr, const char* tls_port) 2108 { 2109 in_port_t port = 0; 2110 2111 if (addr->sa_family == AF_INET) 2112 port = ((struct sockaddr_in*)addr)->sin_port; 2113 #ifndef HAVE_STRUCT_SOCKADDR_IN6 2114 else 2115 port = ((struct sockaddr_in6*)addr)->sin6_port; 2116 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 2117 if (atoi(tls_port) == ntohs(port)) 2118 return 1; 2119 2120 return 0; 2121 } 2122 #endif 2123 2124 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 2125 ssize_t 2126 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 2127 { 2128 uint8_t* buf = (uint8_t*) p; 2129 ssize_t total = 0; 2130 struct pollfd fd; 2131 memset(&fd, 0, sizeof(fd)); 2132 fd.fd = s; 2133 fd.events = POLLIN; 2134 2135 while( total < sz) { 2136 ssize_t ret; 2137 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 2138 if(ret == -1) { 2139 if(errno == EAGAIN) 2140 /* blocking read */ 2141 continue; 2142 if(errno == EINTR) { 2143 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2144 return -1; 2145 /* other signals can be handled later */ 2146 continue; 2147 } 2148 /* some error */ 2149 return -1; 2150 } 2151 if(ret == 0) { 2152 /* operation timed out */ 2153 return -2; 2154 } 2155 ret = read(s, buf+total, sz-total); 2156 if(ret == -1) { 2157 if(errno == EAGAIN) 2158 /* blocking read */ 2159 continue; 2160 if(errno == EINTR) { 2161 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2162 return -1; 2163 /* other signals can be handled later */ 2164 continue; 2165 } 2166 /* some error */ 2167 return -1; 2168 } 2169 if(ret == 0) { 2170 /* closed connection! */ 2171 return 0; 2172 } 2173 total += ret; 2174 } 2175 return total; 2176 } 2177 2178 static void 2179 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 2180 { 2181 sig_atomic_t cmd = NSD_QUIT_SYNC; 2182 udb_ptr t, next; 2183 udb_base* u = nsd->task[nsd->mytask]; 2184 udb_ptr_init(&next, u); 2185 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 2186 udb_base_set_userdata(u, 0); 2187 while(!udb_ptr_is_null(&t)) { 2188 /* store next in list so this one can be deleted or reused */ 2189 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 2190 udb_rptr_zero(&TASKLIST(&t)->next, u); 2191 2192 /* process task t */ 2193 /* append results for task t and update last_task */ 2194 task_process_in_reload(nsd, u, last_task, &t); 2195 2196 /* go to next */ 2197 udb_ptr_set_ptr(&t, u, &next); 2198 2199 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2200 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2201 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2202 if(cmd == NSD_QUIT) { 2203 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2204 /* sync to disk (if needed) */ 2205 udb_base_sync(nsd->db->udb, 0); 2206 /* unlink files of remainder of tasks */ 2207 while(!udb_ptr_is_null(&t)) { 2208 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2209 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2210 } 2211 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2212 } 2213 udb_ptr_unlink(&t, u); 2214 udb_ptr_unlink(&next, u); 2215 exit(0); 2216 } 2217 } 2218 2219 } 2220 udb_ptr_unlink(&t, u); 2221 udb_ptr_unlink(&next, u); 2222 } 2223 2224 #ifdef BIND8_STATS 2225 static void 2226 parent_send_stats(struct nsd* nsd, int cmdfd) 2227 { 2228 size_t i; 2229 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2230 log_msg(LOG_ERR, "could not write stats to reload"); 2231 return; 2232 } 2233 for(i=0; i<nsd->child_count; i++) 2234 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2235 sizeof(stc_type))) { 2236 log_msg(LOG_ERR, "could not write stats to reload"); 2237 return; 2238 } 2239 } 2240 2241 static void 2242 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2243 { 2244 struct nsdst s; 2245 stc_type* p; 2246 size_t i; 2247 if(block_read(nsd, cmdfd, &s, sizeof(s), 2248 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2249 log_msg(LOG_ERR, "could not read stats from oldpar"); 2250 return; 2251 } 2252 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2253 s.db_mem = region_get_mem(nsd->db->region); 2254 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2255 nsd->child_count); 2256 if(!p) return; 2257 for(i=0; i<nsd->child_count; i++) { 2258 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2259 sizeof(stc_type)) 2260 return; 2261 } 2262 } 2263 #endif /* BIND8_STATS */ 2264 2265 void server_verify(struct nsd *nsd, int cmdsocket); 2266 2267 /* 2268 * Reload the database, stop parent, re-fork children and continue. 2269 * as server_main. 2270 */ 2271 static void 2272 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2273 int cmdsocket) 2274 { 2275 pid_t mypid; 2276 sig_atomic_t cmd = NSD_QUIT_SYNC; 2277 int ret; 2278 udb_ptr last_task; 2279 struct sigaction old_sigchld, ign_sigchld; 2280 struct radnode* node; 2281 zone_type* zone; 2282 enum soainfo_hint hint; 2283 /* ignore SIGCHLD from the previous server_main that used this pid */ 2284 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2285 ign_sigchld.sa_handler = SIG_IGN; 2286 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2287 2288 #ifdef HAVE_SETPROCTITLE 2289 setproctitle("main"); 2290 #endif 2291 #ifdef HAVE_CPUSET_T 2292 if(nsd->use_cpu_affinity) { 2293 set_cpu_affinity(nsd->cpuset); 2294 } 2295 #endif 2296 2297 /* see what tasks we got from xfrd */ 2298 task_remap(nsd->task[nsd->mytask]); 2299 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2300 udb_compact_inhibited(nsd->db->udb, 1); 2301 reload_process_tasks(nsd, &last_task, cmdsocket); 2302 udb_compact_inhibited(nsd->db->udb, 0); 2303 udb_compact(nsd->db->udb); 2304 2305 #ifndef NDEBUG 2306 if(nsd_debug_level >= 1) 2307 region_log_stats(nsd->db->region); 2308 #endif /* NDEBUG */ 2309 /* sync to disk (if needed) */ 2310 udb_base_sync(nsd->db->udb, 0); 2311 2312 initialize_dname_compression_tables(nsd); 2313 2314 #ifdef BIND8_STATS 2315 /* Restart dumping stats if required. */ 2316 time(&nsd->st.boot); 2317 set_bind8_alarm(nsd); 2318 #endif 2319 #ifdef USE_ZONE_STATS 2320 server_zonestat_realloc(nsd); /* realloc for new children */ 2321 server_zonestat_switch(nsd); 2322 #endif 2323 2324 if(nsd->options->verify_enable) { 2325 #ifdef RATELIMIT 2326 /* allocate resources for rate limiting. use a slot that is guaranteed 2327 not mapped to a file so no persistent data is overwritten */ 2328 rrl_init(nsd->child_count + 1); 2329 #endif 2330 2331 /* spin-up server and execute verifiers for each zone */ 2332 server_verify(nsd, cmdsocket); 2333 #ifdef RATELIMIT 2334 /* deallocate rate limiting resources */ 2335 rrl_deinit(nsd->child_count + 1); 2336 #endif 2337 } 2338 2339 for(node = radix_first(nsd->db->zonetree); 2340 node != NULL; 2341 node = radix_next(node)) 2342 { 2343 zone = (zone_type *)node->elem; 2344 if(zone->is_updated) { 2345 if(zone->is_bad) { 2346 nsd->mode = NSD_RELOAD_FAILED; 2347 hint = soainfo_bad; 2348 } else { 2349 hint = soainfo_ok; 2350 } 2351 /* update(s), verified or not, possibly with subsequent 2352 skipped update(s). skipped update(s) are picked up 2353 by failed update check in xfrd */ 2354 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2355 zone, hint); 2356 } else if(zone->is_skipped) { 2357 /* corrupt or inconsistent update without preceding 2358 update(s), communicate soainfo_gone */ 2359 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2360 zone, soainfo_gone); 2361 } 2362 zone->is_updated = 0; 2363 zone->is_skipped = 0; 2364 } 2365 2366 if(nsd->mode == NSD_RELOAD_FAILED) { 2367 exit(NSD_RELOAD_FAILED); 2368 } 2369 2370 /* listen for the signals of failed children again */ 2371 sigaction(SIGCHLD, &old_sigchld, NULL); 2372 #ifdef USE_DNSTAP 2373 if (nsd->dt_collector) { 2374 int *swap_fd_send; 2375 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes")); 2376 /* Swap fd_send with fd_swap so old serve child and new serve 2377 * childs will not write to the same pipe ends simultaneously */ 2378 swap_fd_send = nsd->dt_collector_fd_send; 2379 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap; 2380 nsd->dt_collector_fd_swap = swap_fd_send; 2381 2382 } 2383 #endif 2384 /* Start new child processes */ 2385 if (server_start_children(nsd, server_region, netio, &nsd-> 2386 xfrd_listener->fd) != 0) { 2387 send_children_quit(nsd); 2388 exit(1); 2389 } 2390 2391 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2392 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2393 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2394 if(cmd == NSD_QUIT) { 2395 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2396 send_children_quit(nsd); 2397 exit(0); 2398 } 2399 } 2400 2401 /* Send quit command to parent: blocking, wait for receipt. */ 2402 do { 2403 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2404 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2405 { 2406 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2407 strerror(errno)); 2408 } 2409 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2410 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2411 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2412 RELOAD_SYNC_TIMEOUT); 2413 if(ret == -2) { 2414 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2415 } 2416 } while (ret == -2); 2417 if(ret == -1) { 2418 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2419 strerror(errno)); 2420 } 2421 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2422 if(cmd == NSD_QUIT) { 2423 /* small race condition possible here, parent got quit cmd. */ 2424 send_children_quit(nsd); 2425 exit(1); 2426 } 2427 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2428 #ifdef BIND8_STATS 2429 reload_do_stats(cmdsocket, nsd, &last_task); 2430 #endif 2431 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2432 task_process_sync(nsd->task[nsd->mytask]); 2433 #ifdef USE_ZONE_STATS 2434 server_zonestat_realloc(nsd); /* realloc for next children */ 2435 #endif 2436 2437 /* send soainfo to the xfrd process, signal it that reload is done, 2438 * it picks up the taskudb */ 2439 cmd = NSD_RELOAD_DONE; 2440 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2441 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2442 strerror(errno)); 2443 } 2444 mypid = getpid(); 2445 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2446 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2447 strerror(errno)); 2448 } 2449 2450 /* try to reopen file */ 2451 if (nsd->file_rotation_ok) 2452 log_reopen(nsd->log_filename, 1); 2453 /* exit reload, continue as new server_main */ 2454 } 2455 2456 /* 2457 * Get the mode depending on the signal hints that have been received. 2458 * Multiple signal hints can be received and will be handled in turn. 2459 */ 2460 static sig_atomic_t 2461 server_signal_mode(struct nsd *nsd) 2462 { 2463 if(nsd->signal_hint_quit) { 2464 nsd->signal_hint_quit = 0; 2465 return NSD_QUIT; 2466 } 2467 else if(nsd->signal_hint_shutdown) { 2468 nsd->signal_hint_shutdown = 0; 2469 return NSD_SHUTDOWN; 2470 } 2471 else if(nsd->signal_hint_child) { 2472 nsd->signal_hint_child = 0; 2473 return NSD_REAP_CHILDREN; 2474 } 2475 else if(nsd->signal_hint_reload) { 2476 nsd->signal_hint_reload = 0; 2477 return NSD_RELOAD; 2478 } 2479 else if(nsd->signal_hint_reload_hup) { 2480 nsd->signal_hint_reload_hup = 0; 2481 return NSD_RELOAD_REQ; 2482 } 2483 else if(nsd->signal_hint_stats) { 2484 nsd->signal_hint_stats = 0; 2485 #ifdef BIND8_STATS 2486 set_bind8_alarm(nsd); 2487 #endif 2488 return NSD_STATS; 2489 } 2490 else if(nsd->signal_hint_statsusr) { 2491 nsd->signal_hint_statsusr = 0; 2492 return NSD_STATS; 2493 } 2494 return NSD_RUN; 2495 } 2496 2497 /* 2498 * The main server simply waits for signals and child processes to 2499 * terminate. Child processes are restarted as necessary. 2500 */ 2501 void 2502 server_main(struct nsd *nsd) 2503 { 2504 region_type *server_region = region_create(xalloc, free); 2505 netio_type *netio = netio_create(server_region); 2506 netio_handler_type reload_listener; 2507 int reload_sockets[2] = {-1, -1}; 2508 struct timespec timeout_spec; 2509 int status; 2510 pid_t child_pid; 2511 pid_t reload_pid = -1; 2512 sig_atomic_t mode; 2513 2514 /* Ensure we are the main process */ 2515 assert(nsd->server_kind == NSD_SERVER_MAIN); 2516 2517 /* Add listener for the XFRD process */ 2518 netio_add_handler(netio, nsd->xfrd_listener); 2519 2520 /* Start the child processes that handle incoming queries */ 2521 if (server_start_children(nsd, server_region, netio, 2522 &nsd->xfrd_listener->fd) != 0) { 2523 send_children_quit(nsd); 2524 exit(1); 2525 } 2526 reload_listener.fd = -1; 2527 2528 /* This_child MUST be 0, because this is the parent process */ 2529 assert(nsd->this_child == 0); 2530 2531 /* Run the server until we get a shutdown signal */ 2532 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2533 /* Did we receive a signal that changes our mode? */ 2534 if(mode == NSD_RUN) { 2535 nsd->mode = mode = server_signal_mode(nsd); 2536 } 2537 2538 switch (mode) { 2539 case NSD_RUN: 2540 /* see if any child processes terminated */ 2541 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2542 int is_child = delete_child_pid(nsd, child_pid); 2543 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2544 if(nsd->children[is_child].child_fd == -1) 2545 nsd->children[is_child].has_exited = 1; 2546 parent_check_all_children_exited(nsd); 2547 } else if(is_child != -1) { 2548 log_msg(LOG_WARNING, 2549 "server %d died unexpectedly with status %d, restarting", 2550 (int) child_pid, status); 2551 restart_child_servers(nsd, server_region, netio, 2552 &nsd->xfrd_listener->fd); 2553 } else if (child_pid == reload_pid) { 2554 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2555 pid_t mypid; 2556 log_msg(LOG_WARNING, 2557 "Reload process %d failed with status %d, continuing with old database", 2558 (int) child_pid, status); 2559 reload_pid = -1; 2560 if(reload_listener.fd != -1) close(reload_listener.fd); 2561 netio_remove_handler(netio, &reload_listener); 2562 reload_listener.fd = -1; 2563 reload_listener.event_types = NETIO_EVENT_NONE; 2564 task_process_sync(nsd->task[nsd->mytask]); 2565 /* inform xfrd reload attempt ended */ 2566 if(!write_socket(nsd->xfrd_listener->fd, 2567 &cmd, sizeof(cmd))) { 2568 log_msg(LOG_ERR, "problems " 2569 "sending SOAEND to xfrd: %s", 2570 strerror(errno)); 2571 } 2572 mypid = getpid(); 2573 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2574 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2575 strerror(errno)); 2576 } 2577 #ifdef USE_DNSTAP 2578 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) { 2579 log_msg(LOG_WARNING, 2580 "dnstap-collector %d terminated with status %d", 2581 (int) child_pid, status); 2582 if(nsd->dt_collector) { 2583 dt_collector_close(nsd->dt_collector, nsd); 2584 dt_collector_destroy(nsd->dt_collector, nsd); 2585 nsd->dt_collector = NULL; 2586 } 2587 /* Only respawn a crashed (or exited) 2588 * dnstap-collector when not reloading, 2589 * to not induce a reload during a 2590 * reload (which would seriously 2591 * disrupt nsd procedures and lead to 2592 * unpredictable results)! 2593 * 2594 * This will *leave* a dnstap-collector 2595 * process terminated, but because 2596 * signalling of the reload process to 2597 * the main process to respawn in this 2598 * situation will be cumbersome, and 2599 * because this situation is so 2600 * specific (and therefore hopefully 2601 * extremely rare or non-existing at 2602 * all), plus the fact that we are left 2603 * with a perfectly function NSD 2604 * (besides not logging dnstap 2605 * messages), I consider it acceptable 2606 * to leave this unresolved. 2607 */ 2608 if(reload_pid == -1 && nsd->options->dnstap_enable) { 2609 nsd->dt_collector = dt_collector_create(nsd); 2610 dt_collector_start(nsd->dt_collector, nsd); 2611 nsd->mode = NSD_RELOAD_REQ; 2612 } 2613 #endif 2614 } else if(status != 0) { 2615 /* check for status, because we get 2616 * the old-servermain because reload 2617 * is the process-parent of old-main, 2618 * and we get older server-processes 2619 * that are exiting after a reload */ 2620 log_msg(LOG_WARNING, 2621 "process %d terminated with status %d", 2622 (int) child_pid, status); 2623 } 2624 } 2625 if (child_pid == -1) { 2626 if (errno == EINTR) { 2627 continue; 2628 } 2629 if (errno != ECHILD) 2630 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2631 } 2632 if (nsd->mode != NSD_RUN) 2633 break; 2634 2635 /* timeout to collect processes. In case no sigchild happens. */ 2636 timeout_spec.tv_sec = 60; 2637 timeout_spec.tv_nsec = 0; 2638 2639 /* listen on ports, timeout for collecting terminated children */ 2640 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2641 if (errno != EINTR) { 2642 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2643 } 2644 } 2645 if(nsd->restart_children) { 2646 restart_child_servers(nsd, server_region, netio, 2647 &nsd->xfrd_listener->fd); 2648 nsd->restart_children = 0; 2649 } 2650 if(nsd->reload_failed) { 2651 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2652 pid_t mypid; 2653 nsd->reload_failed = 0; 2654 log_msg(LOG_WARNING, 2655 "Reload process %d failed, continuing with old database", 2656 (int) reload_pid); 2657 reload_pid = -1; 2658 if(reload_listener.fd != -1) close(reload_listener.fd); 2659 netio_remove_handler(netio, &reload_listener); 2660 reload_listener.fd = -1; 2661 reload_listener.event_types = NETIO_EVENT_NONE; 2662 task_process_sync(nsd->task[nsd->mytask]); 2663 /* inform xfrd reload attempt ended */ 2664 if(!write_socket(nsd->xfrd_listener->fd, 2665 &cmd, sizeof(cmd))) { 2666 log_msg(LOG_ERR, "problems " 2667 "sending SOAEND to xfrd: %s", 2668 strerror(errno)); 2669 } 2670 mypid = getpid(); 2671 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2672 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2673 strerror(errno)); 2674 } 2675 } 2676 2677 break; 2678 case NSD_RELOAD_REQ: { 2679 sig_atomic_t cmd = NSD_RELOAD_REQ; 2680 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2681 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2682 "main: ipc send reload_req to xfrd")); 2683 if(!write_socket(nsd->xfrd_listener->fd, 2684 &cmd, sizeof(cmd))) { 2685 log_msg(LOG_ERR, "server_main: could not send " 2686 "reload_req to xfrd: %s", strerror(errno)); 2687 } 2688 nsd->mode = NSD_RUN; 2689 } break; 2690 case NSD_RELOAD: 2691 /* Continue to run nsd after reload */ 2692 nsd->mode = NSD_RUN; 2693 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2694 if (reload_pid != -1) { 2695 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2696 (int) reload_pid); 2697 break; 2698 } 2699 2700 /* switch the mytask to keep track of who owns task*/ 2701 nsd->mytask = 1 - nsd->mytask; 2702 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2703 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2704 reload_pid = -1; 2705 break; 2706 } 2707 2708 /* Do actual reload */ 2709 reload_pid = fork(); 2710 switch (reload_pid) { 2711 case -1: 2712 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2713 break; 2714 default: 2715 /* PARENT */ 2716 close(reload_sockets[0]); 2717 server_reload(nsd, server_region, netio, 2718 reload_sockets[1]); 2719 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2720 close(reload_sockets[1]); 2721 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2722 /* drop stale xfrd ipc data */ 2723 ((struct ipc_handler_conn_data*)nsd-> 2724 xfrd_listener->user_data) 2725 ->conn->is_reading = 0; 2726 reload_pid = -1; 2727 reload_listener.fd = -1; 2728 reload_listener.event_types = NETIO_EVENT_NONE; 2729 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2730 break; 2731 case 0: 2732 /* CHILD */ 2733 /* server_main keep running until NSD_QUIT_SYNC 2734 * received from reload. */ 2735 close(reload_sockets[1]); 2736 reload_listener.fd = reload_sockets[0]; 2737 reload_listener.timeout = NULL; 2738 reload_listener.user_data = nsd; 2739 reload_listener.event_types = NETIO_EVENT_READ; 2740 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2741 netio_add_handler(netio, &reload_listener); 2742 reload_pid = getppid(); 2743 break; 2744 } 2745 break; 2746 case NSD_QUIT_SYNC: 2747 /* synchronisation of xfrd, parent and reload */ 2748 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2749 sig_atomic_t cmd = NSD_RELOAD; 2750 /* stop xfrd ipc writes in progress */ 2751 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2752 "main: ipc send indication reload")); 2753 if(!write_socket(nsd->xfrd_listener->fd, 2754 &cmd, sizeof(cmd))) { 2755 log_msg(LOG_ERR, "server_main: could not send reload " 2756 "indication to xfrd: %s", strerror(errno)); 2757 } 2758 /* wait for ACK from xfrd */ 2759 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2760 nsd->quit_sync_done = 1; 2761 } 2762 nsd->mode = NSD_RUN; 2763 break; 2764 case NSD_QUIT: 2765 /* silent shutdown during reload */ 2766 if(reload_listener.fd != -1) { 2767 /* acknowledge the quit, to sync reload that we will really quit now */ 2768 sig_atomic_t cmd = NSD_RELOAD; 2769 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2770 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2771 log_msg(LOG_ERR, "server_main: " 2772 "could not ack quit: %s", strerror(errno)); 2773 } 2774 #ifdef BIND8_STATS 2775 parent_send_stats(nsd, reload_listener.fd); 2776 #endif /* BIND8_STATS */ 2777 close(reload_listener.fd); 2778 } 2779 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2780 /* only quit children after xfrd has acked */ 2781 send_children_quit(nsd); 2782 2783 #ifdef MEMCLEAN /* OS collects memory pages */ 2784 region_destroy(server_region); 2785 #endif 2786 server_shutdown(nsd); 2787 2788 /* ENOTREACH */ 2789 break; 2790 case NSD_SHUTDOWN: 2791 break; 2792 case NSD_REAP_CHILDREN: 2793 /* continue; wait for child in run loop */ 2794 nsd->mode = NSD_RUN; 2795 break; 2796 case NSD_STATS: 2797 #ifdef BIND8_STATS 2798 set_children_stats(nsd); 2799 #endif 2800 nsd->mode = NSD_RUN; 2801 break; 2802 default: 2803 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2804 nsd->mode = NSD_RUN; 2805 break; 2806 } 2807 } 2808 log_msg(LOG_WARNING, "signal received, shutting down..."); 2809 2810 /* close opened ports to avoid race with restart of nsd */ 2811 server_close_all_sockets(nsd->udp, nsd->ifs); 2812 server_close_all_sockets(nsd->tcp, nsd->ifs); 2813 #ifdef HAVE_SSL 2814 daemon_remote_close(nsd->rc); 2815 #endif 2816 send_children_quit_and_wait(nsd); 2817 2818 /* Unlink it if possible... */ 2819 unlinkpid(nsd->pidfile); 2820 unlink(nsd->task[0]->fname); 2821 unlink(nsd->task[1]->fname); 2822 #ifdef USE_ZONE_STATS 2823 unlink(nsd->zonestatfname[0]); 2824 unlink(nsd->zonestatfname[1]); 2825 #endif 2826 #ifdef USE_DNSTAP 2827 dt_collector_close(nsd->dt_collector, nsd); 2828 #endif 2829 2830 if(reload_listener.fd != -1) { 2831 sig_atomic_t cmd = NSD_QUIT; 2832 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2833 "main: ipc send quit to reload-process")); 2834 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2835 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2836 strerror(errno)); 2837 } 2838 fsync(reload_listener.fd); 2839 close(reload_listener.fd); 2840 /* wait for reload to finish processing */ 2841 while(1) { 2842 if(waitpid(reload_pid, NULL, 0) == -1) { 2843 if(errno == EINTR) continue; 2844 if(errno == ECHILD) break; 2845 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2846 (int)reload_pid, strerror(errno)); 2847 } 2848 break; 2849 } 2850 } 2851 if(nsd->xfrd_listener->fd != -1) { 2852 /* complete quit, stop xfrd */ 2853 sig_atomic_t cmd = NSD_QUIT; 2854 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2855 "main: ipc send quit to xfrd")); 2856 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2857 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2858 strerror(errno)); 2859 } 2860 fsync(nsd->xfrd_listener->fd); 2861 close(nsd->xfrd_listener->fd); 2862 (void)kill(nsd->pid, SIGTERM); 2863 } 2864 2865 #ifdef MEMCLEAN /* OS collects memory pages */ 2866 region_destroy(server_region); 2867 #endif 2868 /* write the nsd.db to disk, wait for it to complete */ 2869 udb_base_sync(nsd->db->udb, 1); 2870 udb_base_close(nsd->db->udb); 2871 server_shutdown(nsd); 2872 } 2873 2874 static query_state_type 2875 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p) 2876 { 2877 return query_process(query, nsd, now_p); 2878 } 2879 2880 static query_state_type 2881 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p) 2882 { 2883 #ifdef RATELIMIT 2884 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) { 2885 if(query->edns.cookie_status != COOKIE_VALID 2886 && query->edns.cookie_status != COOKIE_VALID_REUSE 2887 && rrl_process_query(query)) 2888 return rrl_slip(query); 2889 else return QUERY_PROCESSED; 2890 } 2891 return QUERY_DISCARDED; 2892 #else 2893 return query_process(query, nsd, now_p); 2894 #endif 2895 } 2896 2897 const char* 2898 nsd_event_vs(void) 2899 { 2900 #ifdef USE_MINI_EVENT 2901 return ""; 2902 #else 2903 return event_get_version(); 2904 #endif 2905 } 2906 2907 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS) 2908 static const char* ub_ev_backend2str(int b) 2909 { 2910 switch(b) { 2911 case EVBACKEND_SELECT: return "select"; 2912 case EVBACKEND_POLL: return "poll"; 2913 case EVBACKEND_EPOLL: return "epoll"; 2914 case EVBACKEND_KQUEUE: return "kqueue"; 2915 case EVBACKEND_DEVPOLL: return "devpoll"; 2916 case EVBACKEND_PORT: return "evport"; 2917 } 2918 return "unknown"; 2919 } 2920 #endif 2921 2922 const char* 2923 nsd_event_method(void) 2924 { 2925 #ifdef USE_MINI_EVENT 2926 return "select"; 2927 #else 2928 struct event_base* b = nsd_child_event_base(); 2929 const char* m = "?"; 2930 # ifdef EV_FEATURE_BACKENDS 2931 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b)); 2932 # elif defined(HAVE_EVENT_BASE_GET_METHOD) 2933 m = event_base_get_method(b); 2934 # endif 2935 # ifdef MEMCLEAN 2936 event_base_free(b); 2937 # endif 2938 return m; 2939 #endif 2940 } 2941 2942 struct event_base* 2943 nsd_child_event_base(void) 2944 { 2945 struct event_base* base; 2946 #ifdef USE_MINI_EVENT 2947 static time_t secs; 2948 static struct timeval now; 2949 base = event_init(&secs, &now); 2950 #else 2951 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2952 /* libev */ 2953 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2954 # else 2955 /* libevent */ 2956 # ifdef HAVE_EVENT_BASE_NEW 2957 base = event_base_new(); 2958 # else 2959 base = event_init(); 2960 # endif 2961 # endif 2962 #endif 2963 return base; 2964 } 2965 2966 static void 2967 add_udp_handler( 2968 struct nsd *nsd, 2969 struct nsd_socket *sock, 2970 struct udp_handler_data *data) 2971 { 2972 struct event *handler = &data->event; 2973 2974 data->nsd = nsd; 2975 data->socket = sock; 2976 2977 memset(handler, 0, sizeof(*handler)); 2978 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2979 if(event_base_set(nsd->event_base, handler) != 0) 2980 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2981 if(event_add(handler, NULL) != 0) 2982 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2983 } 2984 2985 void 2986 add_tcp_handler( 2987 struct nsd *nsd, 2988 struct nsd_socket *sock, 2989 struct tcp_accept_handler_data *data) 2990 { 2991 struct event *handler = &data->event; 2992 2993 data->nsd = nsd; 2994 data->socket = sock; 2995 2996 #ifdef HAVE_SSL 2997 if (nsd->tls_ctx && 2998 nsd->options->tls_port && 2999 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 3000 { 3001 data->tls_accept = 1; 3002 if(verbosity >= 2) { 3003 char buf[48]; 3004 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 3005 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 3006 } 3007 } else { 3008 data->tls_accept = 0; 3009 } 3010 #endif 3011 3012 memset(handler, 0, sizeof(*handler)); 3013 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 3014 if(event_base_set(nsd->event_base, handler) != 0) 3015 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 3016 if(event_add(handler, NULL) != 0) 3017 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 3018 data->event_added = 1; 3019 } 3020 3021 /* 3022 * Serve DNS request to verifiers (short-lived) 3023 */ 3024 void server_verify(struct nsd *nsd, int cmdsocket) 3025 { 3026 size_t size = 0; 3027 struct event cmd_event, signal_event, exit_event; 3028 struct zone *zone; 3029 size_t i; 3030 3031 assert(nsd != NULL); 3032 3033 zone = verify_next_zone(nsd, NULL); 3034 if(zone == NULL) 3035 return; 3036 3037 nsd->server_region = region_create(xalloc, free); 3038 nsd->event_base = nsd_child_event_base(); 3039 3040 nsd->next_zone_to_verify = zone; 3041 nsd->verifier_count = 0; 3042 nsd->verifier_limit = nsd->options->verifier_count; 3043 size = sizeof(struct verifier) * nsd->verifier_limit; 3044 pipe(nsd->verifier_pipe); 3045 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC); 3046 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC); 3047 nsd->verifiers = region_alloc_zero(nsd->server_region, size); 3048 3049 for(i = 0; i < nsd->verifier_limit; i++) { 3050 nsd->verifiers[i].nsd = nsd; 3051 nsd->verifiers[i].zone = NULL; 3052 nsd->verifiers[i].pid = -1; 3053 nsd->verifiers[i].output_stream.fd = -1; 3054 nsd->verifiers[i].output_stream.priority = LOG_INFO; 3055 nsd->verifiers[i].error_stream.fd = -1; 3056 nsd->verifiers[i].error_stream.priority = LOG_ERR; 3057 } 3058 3059 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd); 3060 if(event_base_set(nsd->event_base, &cmd_event) != 0 || 3061 event_add(&cmd_event, NULL) != 0) 3062 { 3063 log_msg(LOG_ERR, "verify: could not add command event"); 3064 goto fail; 3065 } 3066 3067 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd); 3068 if(event_base_set(nsd->event_base, &signal_event) != 0 || 3069 signal_add(&signal_event, NULL) != 0) 3070 { 3071 log_msg(LOG_ERR, "verify: could not add signal event"); 3072 goto fail; 3073 } 3074 3075 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd); 3076 if(event_base_set(nsd->event_base, &exit_event) != 0 || 3077 event_add(&exit_event, NULL) != 0) 3078 { 3079 log_msg(LOG_ERR, "verify: could not add exit event"); 3080 goto fail; 3081 } 3082 3083 memset(msgs, 0, sizeof(msgs)); 3084 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3085 queries[i] = query_create(nsd->server_region, 3086 compressed_dname_offsets, 3087 compression_table_size, compressed_dnames); 3088 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3089 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3090 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3091 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3092 msgs[i].msg_hdr.msg_iovlen = 1; 3093 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3094 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3095 } 3096 3097 for (i = 0; i < nsd->verify_ifs; i++) { 3098 struct udp_handler_data *data; 3099 data = region_alloc_zero( 3100 nsd->server_region, sizeof(*data)); 3101 add_udp_handler(nsd, &nsd->verify_udp[i], data); 3102 } 3103 3104 tcp_accept_handler_count = nsd->verify_ifs; 3105 tcp_accept_handlers = region_alloc_array(nsd->server_region, 3106 nsd->verify_ifs, sizeof(*tcp_accept_handlers)); 3107 3108 for (i = 0; i < nsd->verify_ifs; i++) { 3109 struct tcp_accept_handler_data *data; 3110 data = &tcp_accept_handlers[i]; 3111 memset(data, 0, sizeof(*data)); 3112 add_tcp_handler(nsd, &nsd->verify_tcp[i], data); 3113 } 3114 3115 while(nsd->next_zone_to_verify != NULL && 3116 nsd->verifier_count < nsd->verifier_limit) 3117 { 3118 verify_zone(nsd, nsd->next_zone_to_verify); 3119 nsd->next_zone_to_verify 3120 = verify_next_zone(nsd, nsd->next_zone_to_verify); 3121 } 3122 3123 /* short-lived main loop */ 3124 event_base_dispatch(nsd->event_base); 3125 3126 /* remove command and exit event handlers */ 3127 event_del(&exit_event); 3128 event_del(&signal_event); 3129 event_del(&cmd_event); 3130 3131 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT); 3132 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT); 3133 fail: 3134 event_base_free(nsd->event_base); 3135 close(nsd->verifier_pipe[0]); 3136 close(nsd->verifier_pipe[1]); 3137 region_destroy(nsd->server_region); 3138 3139 nsd->event_base = NULL; 3140 nsd->server_region = NULL; 3141 nsd->verifier_limit = 0; 3142 nsd->verifier_pipe[0] = -1; 3143 nsd->verifier_pipe[1] = -1; 3144 nsd->verifiers = NULL; 3145 } 3146 3147 /* 3148 * Serve DNS requests. 3149 */ 3150 void 3151 server_child(struct nsd *nsd) 3152 { 3153 size_t i, from, numifs; 3154 region_type *server_region = region_create(xalloc, free); 3155 struct event_base* event_base = nsd_child_event_base(); 3156 sig_atomic_t mode; 3157 3158 if(!event_base) { 3159 log_msg(LOG_ERR, "nsd server could not create event base"); 3160 exit(1); 3161 } 3162 nsd->event_base = event_base; 3163 nsd->server_region = server_region; 3164 3165 #ifdef RATELIMIT 3166 rrl_init(nsd->this_child->child_num); 3167 #endif 3168 3169 assert(nsd->server_kind != NSD_SERVER_MAIN); 3170 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 3171 3172 #ifdef HAVE_SETPROCTITLE 3173 setproctitle("server %d", nsd->this_child->child_num + 1); 3174 #endif 3175 #ifdef HAVE_CPUSET_T 3176 if(nsd->use_cpu_affinity) { 3177 set_cpu_affinity(nsd->this_child->cpuset); 3178 } 3179 #endif 3180 3181 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 3182 server_close_all_sockets(nsd->tcp, nsd->ifs); 3183 } 3184 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 3185 server_close_all_sockets(nsd->udp, nsd->ifs); 3186 } 3187 3188 if (nsd->this_child->parent_fd != -1) { 3189 struct event *handler; 3190 struct ipc_handler_conn_data* user_data = 3191 (struct ipc_handler_conn_data*)region_alloc( 3192 server_region, sizeof(struct ipc_handler_conn_data)); 3193 user_data->nsd = nsd; 3194 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 3195 3196 handler = (struct event*) region_alloc( 3197 server_region, sizeof(*handler)); 3198 memset(handler, 0, sizeof(*handler)); 3199 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 3200 EV_READ, child_handle_parent_command, user_data); 3201 if(event_base_set(event_base, handler) != 0) 3202 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 3203 if(event_add(handler, NULL) != 0) 3204 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 3205 } 3206 3207 if(nsd->reuseport) { 3208 numifs = nsd->ifs / nsd->reuseport; 3209 from = numifs * nsd->this_child->child_num; 3210 if(from+numifs > nsd->ifs) { /* should not happen */ 3211 from = 0; 3212 numifs = nsd->ifs; 3213 } 3214 } else { 3215 from = 0; 3216 numifs = nsd->ifs; 3217 } 3218 3219 if (nsd->server_kind & NSD_SERVER_UDP) { 3220 int child = nsd->this_child->child_num; 3221 memset(msgs, 0, sizeof(msgs)); 3222 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3223 queries[i] = query_create(server_region, 3224 compressed_dname_offsets, 3225 compression_table_size, compressed_dnames); 3226 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3227 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3228 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3229 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3230 msgs[i].msg_hdr.msg_iovlen = 1; 3231 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3232 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3233 } 3234 3235 for (i = 0; i < nsd->ifs; i++) { 3236 int listen; 3237 struct udp_handler_data *data; 3238 3239 listen = nsd_bitset_isset(nsd->udp[i].servers, child); 3240 3241 if(i >= from && i < (from + numifs) && listen) { 3242 data = region_alloc_zero( 3243 nsd->server_region, sizeof(*data)); 3244 add_udp_handler(nsd, &nsd->udp[i], data); 3245 } else { 3246 /* close sockets intended for other servers */ 3247 server_close_socket(&nsd->udp[i]); 3248 } 3249 } 3250 } 3251 3252 /* 3253 * Keep track of all the TCP accept handlers so we can enable 3254 * and disable them based on the current number of active TCP 3255 * connections. 3256 */ 3257 if (nsd->server_kind & NSD_SERVER_TCP) { 3258 int child = nsd->this_child->child_num; 3259 tcp_accept_handler_count = numifs; 3260 tcp_accept_handlers = region_alloc_array(server_region, 3261 numifs, sizeof(*tcp_accept_handlers)); 3262 3263 for (i = 0; i < nsd->ifs; i++) { 3264 int listen; 3265 struct tcp_accept_handler_data *data; 3266 3267 listen = nsd_bitset_isset(nsd->tcp[i].servers, child); 3268 3269 if(i >= from && i < (from + numifs) && listen) { 3270 data = &tcp_accept_handlers[i-from]; 3271 memset(data, 0, sizeof(*data)); 3272 add_tcp_handler(nsd, &nsd->tcp[i], data); 3273 } else { 3274 /* close sockets intended for other servers */ 3275 /* 3276 * uncomment this once tcp servers are no 3277 * longer copied in the tcp fd copy line 3278 * in server_init(). 3279 server_close_socket(&nsd->tcp[i]); 3280 */ 3281 /* close sockets not meant for this server*/ 3282 if(!listen) 3283 server_close_socket(&nsd->tcp[i]); 3284 } 3285 } 3286 } else { 3287 tcp_accept_handler_count = 0; 3288 } 3289 3290 /* The main loop... */ 3291 while ((mode = nsd->mode) != NSD_QUIT) { 3292 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 3293 3294 /* Do we need to do the statistics... */ 3295 if (mode == NSD_STATS) { 3296 #ifdef BIND8_STATS 3297 int p = nsd->st.period; 3298 nsd->st.period = 1; /* force stats printout */ 3299 /* Dump the statistics */ 3300 bind8_stats(nsd); 3301 nsd->st.period = p; 3302 #else /* !BIND8_STATS */ 3303 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 3304 #endif /* BIND8_STATS */ 3305 3306 nsd->mode = NSD_RUN; 3307 } 3308 else if (mode == NSD_REAP_CHILDREN) { 3309 /* got signal, notify parent. parent reaps terminated children. */ 3310 if (nsd->this_child->parent_fd != -1) { 3311 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 3312 if (write(nsd->this_child->parent_fd, 3313 &parent_notify, 3314 sizeof(parent_notify)) == -1) 3315 { 3316 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 3317 (int) nsd->this_child->pid, strerror(errno)); 3318 } 3319 } else /* no parent, so reap 'em */ 3320 while (waitpid(-1, NULL, WNOHANG) > 0) ; 3321 nsd->mode = NSD_RUN; 3322 } 3323 else if(mode == NSD_RUN) { 3324 /* Wait for a query... */ 3325 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3326 if (errno != EINTR) { 3327 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3328 break; 3329 } 3330 } 3331 } else if(mode == NSD_QUIT) { 3332 /* ignore here, quit */ 3333 } else { 3334 log_msg(LOG_ERR, "mode bad value %d, back to service.", 3335 (int)mode); 3336 nsd->mode = NSD_RUN; 3337 } 3338 } 3339 3340 service_remaining_tcp(nsd); 3341 #ifdef BIND8_STATS 3342 bind8_stats(nsd); 3343 #endif /* BIND8_STATS */ 3344 3345 #ifdef MEMCLEAN /* OS collects memory pages */ 3346 #ifdef RATELIMIT 3347 rrl_deinit(nsd->this_child->child_num); 3348 #endif 3349 event_base_free(event_base); 3350 region_destroy(server_region); 3351 #endif 3352 server_shutdown(nsd); 3353 } 3354 3355 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 3356 { 3357 int* timed_out = (int*)arg; 3358 assert(event & EV_TIMEOUT); (void)event; 3359 /* wake up the service tcp thread, note event is no longer 3360 * registered */ 3361 *timed_out = 1; 3362 } 3363 3364 void 3365 service_remaining_tcp(struct nsd* nsd) 3366 { 3367 struct tcp_handler_data* p; 3368 struct event_base* event_base; 3369 /* check if it is needed */ 3370 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 3371 return; 3372 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 3373 #ifdef USE_DNSTAP 3374 /* remove dnstap collector, we cannot write there because the new 3375 * child process is using the file descriptor, or the child 3376 * process after that. */ 3377 dt_collector_destroy(nsd->dt_collector, nsd); 3378 nsd->dt_collector = NULL; 3379 #endif 3380 /* setup event base */ 3381 event_base = nsd_child_event_base(); 3382 if(!event_base) { 3383 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 3384 return; 3385 } 3386 /* register tcp connections */ 3387 for(p = tcp_active_list; p != NULL; p = p->next) { 3388 struct timeval timeout; 3389 int fd = p->event.ev_fd; 3390 #ifdef USE_MINI_EVENT 3391 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 3392 #else 3393 short event = p->event.ev_events & (EV_READ|EV_WRITE); 3394 #endif 3395 void (*fn)(int, short, void*); 3396 #ifdef HAVE_SSL 3397 if(p->tls) { 3398 if((event&EV_READ)) 3399 fn = handle_tls_reading; 3400 else fn = handle_tls_writing; 3401 } else { 3402 #endif 3403 if((event&EV_READ)) 3404 fn = handle_tcp_reading; 3405 else fn = handle_tcp_writing; 3406 #ifdef HAVE_SSL 3407 } 3408 #endif 3409 3410 p->tcp_no_more_queries = 1; 3411 /* set timeout to 1/10 second */ 3412 if(p->tcp_timeout > 100) 3413 p->tcp_timeout = 100; 3414 timeout.tv_sec = p->tcp_timeout / 1000; 3415 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 3416 event_del(&p->event); 3417 memset(&p->event, 0, sizeof(p->event)); 3418 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 3419 fn, p); 3420 if(event_base_set(event_base, &p->event) != 0) 3421 log_msg(LOG_ERR, "event base set failed"); 3422 if(event_add(&p->event, &timeout) != 0) 3423 log_msg(LOG_ERR, "event add failed"); 3424 } 3425 3426 /* handle it */ 3427 while(nsd->current_tcp_count > 0) { 3428 mode_t m = server_signal_mode(nsd); 3429 struct event timeout; 3430 struct timeval tv; 3431 int timed_out = 0; 3432 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 3433 m == NSD_REAP_CHILDREN) { 3434 /* quit */ 3435 break; 3436 } 3437 /* timer */ 3438 /* have to do something every second */ 3439 tv.tv_sec = 1; 3440 tv.tv_usec = 0; 3441 memset(&timeout, 0, sizeof(timeout)); 3442 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 3443 &timed_out); 3444 if(event_base_set(event_base, &timeout) != 0) 3445 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 3446 if(event_add(&timeout, &tv) != 0) 3447 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 3448 3449 /* service loop */ 3450 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3451 if (errno != EINTR) { 3452 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3453 break; 3454 } 3455 } 3456 if(!timed_out) { 3457 event_del(&timeout); 3458 } else { 3459 /* timed out, quit */ 3460 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 3461 break; 3462 } 3463 } 3464 #ifdef MEMCLEAN 3465 event_base_free(event_base); 3466 #endif 3467 /* continue to quit after return */ 3468 } 3469 3470 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 3471 * are always used, even if nonblocking operations are broken, in which case 3472 * NUM_RECV_PER_SELECT is defined to 1 (one). 3473 */ 3474 #if defined(HAVE_RECVMMSG) 3475 #define nsd_recvmmsg recvmmsg 3476 #else /* !HAVE_RECVMMSG */ 3477 3478 static int 3479 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 3480 int flags, struct timespec *timeout) 3481 { 3482 unsigned int vpos = 0; 3483 ssize_t rcvd; 3484 3485 /* timeout is ignored, ensure caller does not expect it to work */ 3486 assert(timeout == NULL); (void)timeout; 3487 3488 while(vpos < vlen) { 3489 rcvd = recvfrom(sockfd, 3490 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3491 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3492 flags, 3493 msgvec[vpos].msg_hdr.msg_name, 3494 &msgvec[vpos].msg_hdr.msg_namelen); 3495 if(rcvd < 0) { 3496 break; 3497 } else { 3498 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 3499 msgvec[vpos].msg_len = (unsigned int)rcvd; 3500 vpos++; 3501 } 3502 } 3503 3504 if(vpos) { 3505 /* error will be picked up next time */ 3506 return (int)vpos; 3507 } else if(errno == 0) { 3508 return 0; 3509 } else if(errno == EAGAIN) { 3510 return 0; 3511 } 3512 3513 return -1; 3514 } 3515 #endif /* HAVE_RECVMMSG */ 3516 3517 #ifdef HAVE_SENDMMSG 3518 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 3519 #else /* !HAVE_SENDMMSG */ 3520 3521 static int 3522 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 3523 { 3524 unsigned int vpos = 0; 3525 ssize_t snd; 3526 3527 while(vpos < vlen) { 3528 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3529 snd = sendto(sockfd, 3530 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3531 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3532 flags, 3533 msgvec[vpos].msg_hdr.msg_name, 3534 msgvec[vpos].msg_hdr.msg_namelen); 3535 if(snd < 0) { 3536 break; 3537 } else { 3538 msgvec[vpos].msg_len = (unsigned int)snd; 3539 vpos++; 3540 } 3541 } 3542 3543 if(vpos) { 3544 return (int)vpos; 3545 } else if(errno == 0) { 3546 return 0; 3547 } 3548 3549 return -1; 3550 } 3551 #endif /* HAVE_SENDMMSG */ 3552 3553 static int 3554 port_is_zero( 3555 #ifdef INET6 3556 struct sockaddr_storage *addr 3557 #else 3558 struct sockaddr_in *addr 3559 #endif 3560 ) 3561 { 3562 #ifdef INET6 3563 if(addr->ss_family == AF_INET6) { 3564 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0; 3565 } else if(addr->ss_family == AF_INET) { 3566 return (((struct sockaddr_in *)addr)->sin_port) == 0; 3567 } 3568 return 0; 3569 #else 3570 if(addr->sin_family == AF_INET) { 3571 return addr->sin_port == 0; 3572 } 3573 return 0; 3574 #endif 3575 } 3576 3577 static void 3578 handle_udp(int fd, short event, void* arg) 3579 { 3580 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3581 int received, sent, recvcount, i; 3582 struct query *q; 3583 uint32_t now = 0; 3584 3585 if (!(event & EV_READ)) { 3586 return; 3587 } 3588 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3589 /* this printf strangely gave a performance increase on Linux */ 3590 /* printf("recvcount %d \n", recvcount); */ 3591 if (recvcount == -1) { 3592 if (errno != EAGAIN && errno != EINTR) { 3593 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3594 STATUP(data->nsd, rxerr); 3595 /* No zone statup */ 3596 } 3597 /* Simply no data available */ 3598 return; 3599 } 3600 for (i = 0; i < recvcount; i++) { 3601 loopstart: 3602 received = msgs[i].msg_len; 3603 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3604 q = queries[i]; 3605 if (received == -1) { 3606 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3607 #if defined(HAVE_RECVMMSG) 3608 msgs[i].msg_hdr.msg_flags 3609 #else 3610 errno 3611 #endif 3612 )); 3613 STATUP(data->nsd, rxerr); 3614 /* No zone statup */ 3615 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3616 iovecs[i].iov_len = buffer_remaining(q->packet); 3617 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3618 goto swap_drop; 3619 } 3620 3621 /* Account... */ 3622 #ifdef BIND8_STATS 3623 if (data->socket->addr.ai_family == AF_INET) { 3624 STATUP(data->nsd, qudp); 3625 } else if (data->socket->addr.ai_family == AF_INET6) { 3626 STATUP(data->nsd, qudp6); 3627 } 3628 #endif 3629 3630 buffer_skip(q->packet, received); 3631 buffer_flip(q->packet); 3632 #ifdef USE_DNSTAP 3633 /* 3634 * sending UDP-query with server address (local) and client address to dnstap process 3635 */ 3636 log_addr("query from client", &q->addr); 3637 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3638 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->addr, q->addrlen, 3639 q->tcp, q->packet); 3640 #endif /* USE_DNSTAP */ 3641 3642 /* Process and answer the query... */ 3643 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) { 3644 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3645 STATUP(data->nsd, nona); 3646 ZTATUP(data->nsd, q->zone, nona); 3647 } 3648 3649 #ifdef USE_ZONE_STATS 3650 if (data->socket->addr.ai_family == AF_INET) { 3651 ZTATUP(data->nsd, q->zone, qudp); 3652 } else if (data->socket->addr.ai_family == AF_INET6) { 3653 ZTATUP(data->nsd, q->zone, qudp6); 3654 } 3655 #endif 3656 3657 /* Add EDNS0 and TSIG info if necessary. */ 3658 query_add_optional(q, data->nsd, &now); 3659 3660 buffer_flip(q->packet); 3661 iovecs[i].iov_len = buffer_remaining(q->packet); 3662 #ifdef BIND8_STATS 3663 /* Account the rcode & TC... */ 3664 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3665 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3666 if (TC(q->packet)) { 3667 STATUP(data->nsd, truncated); 3668 ZTATUP(data->nsd, q->zone, truncated); 3669 } 3670 #endif /* BIND8_STATS */ 3671 #ifdef USE_DNSTAP 3672 /* 3673 * sending UDP-response with server address (local) and client address to dnstap process 3674 */ 3675 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 3676 log_addr("response to client", &q->addr); 3677 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, 3678 &q->addr, q->addrlen, q->tcp, q->packet, 3679 q->zone); 3680 #endif /* USE_DNSTAP */ 3681 } else { 3682 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3683 iovecs[i].iov_len = buffer_remaining(q->packet); 3684 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3685 swap_drop: 3686 STATUP(data->nsd, dropped); 3687 ZTATUP(data->nsd, q->zone, dropped); 3688 if(i != recvcount-1) { 3689 /* swap with last and decrease recvcount */ 3690 struct mmsghdr mtmp = msgs[i]; 3691 struct iovec iotmp = iovecs[i]; 3692 recvcount--; 3693 msgs[i] = msgs[recvcount]; 3694 iovecs[i] = iovecs[recvcount]; 3695 queries[i] = queries[recvcount]; 3696 msgs[recvcount] = mtmp; 3697 iovecs[recvcount] = iotmp; 3698 queries[recvcount] = q; 3699 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3700 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3701 goto loopstart; 3702 } else { recvcount --; } 3703 } 3704 } 3705 3706 /* send until all are sent */ 3707 i = 0; 3708 while(i<recvcount) { 3709 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3710 if(sent == -1) { 3711 if(errno == ENOBUFS || 3712 #ifdef EWOULDBLOCK 3713 errno == EWOULDBLOCK || 3714 #endif 3715 errno == EAGAIN) { 3716 /* block to wait until send buffer avail */ 3717 int flag, errstore; 3718 if((flag = fcntl(fd, F_GETFL)) == -1) { 3719 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno)); 3720 flag = 0; 3721 } 3722 flag &= ~O_NONBLOCK; 3723 if(fcntl(fd, F_SETFL, flag) == -1) 3724 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno)); 3725 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3726 errstore = errno; 3727 flag |= O_NONBLOCK; 3728 if(fcntl(fd, F_SETFL, flag) == -1) 3729 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno)); 3730 if(sent != -1) { 3731 i += sent; 3732 continue; 3733 } 3734 errno = errstore; 3735 } 3736 if(errno == EINVAL) { 3737 /* skip the invalid argument entry, 3738 * send the remaining packets in the list */ 3739 if(!(port_is_zero((void*)&queries[i]->addr) && 3740 verbosity < 3)) { 3741 const char* es = strerror(errno); 3742 char a[64]; 3743 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3744 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3745 } 3746 i += 1; 3747 continue; 3748 } 3749 /* don't log transient network full errors, unless 3750 * on higher verbosity */ 3751 if(!(errno == ENOBUFS && verbosity < 1) && 3752 #ifdef EWOULDBLOCK 3753 errno != EWOULDBLOCK && 3754 #endif 3755 errno != EAGAIN) { 3756 const char* es = strerror(errno); 3757 char a[64]; 3758 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3759 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3760 } 3761 #ifdef BIND8_STATS 3762 data->nsd->st.txerr += recvcount-i; 3763 #endif /* BIND8_STATS */ 3764 break; 3765 } 3766 i += sent; 3767 } 3768 for(i=0; i<recvcount; i++) { 3769 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3770 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3771 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3772 } 3773 } 3774 3775 #ifdef HAVE_SSL 3776 /* 3777 * Setup an event for the tcp handler. 3778 */ 3779 static void 3780 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3781 int fd, short event) 3782 { 3783 struct timeval timeout; 3784 struct event_base* ev_base; 3785 3786 timeout.tv_sec = data->nsd->tcp_timeout; 3787 timeout.tv_usec = 0L; 3788 3789 ev_base = data->event.ev_base; 3790 event_del(&data->event); 3791 memset(&data->event, 0, sizeof(data->event)); 3792 event_set(&data->event, fd, event, fn, data); 3793 if(event_base_set(ev_base, &data->event) != 0) 3794 log_msg(LOG_ERR, "event base set failed"); 3795 if(event_add(&data->event, &timeout) != 0) 3796 log_msg(LOG_ERR, "event add failed"); 3797 } 3798 #endif /* HAVE_SSL */ 3799 3800 static void 3801 cleanup_tcp_handler(struct tcp_handler_data* data) 3802 { 3803 event_del(&data->event); 3804 #ifdef HAVE_SSL 3805 if(data->tls) { 3806 SSL_shutdown(data->tls); 3807 SSL_free(data->tls); 3808 data->tls = NULL; 3809 } 3810 #endif 3811 close(data->event.ev_fd); 3812 if(data->prev) 3813 data->prev->next = data->next; 3814 else tcp_active_list = data->next; 3815 if(data->next) 3816 data->next->prev = data->prev; 3817 3818 /* 3819 * Enable the TCP accept handlers when the current number of 3820 * TCP connections is about to drop below the maximum number 3821 * of TCP connections. 3822 */ 3823 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3824 configure_handler_event_types(EV_READ|EV_PERSIST); 3825 if(slowaccept) { 3826 event_del(&slowaccept_event); 3827 slowaccept = 0; 3828 } 3829 } 3830 --data->nsd->current_tcp_count; 3831 assert(data->nsd->current_tcp_count >= 0); 3832 3833 region_destroy(data->region); 3834 } 3835 3836 static void 3837 handle_tcp_reading(int fd, short event, void* arg) 3838 { 3839 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3840 ssize_t received; 3841 struct event_base* ev_base; 3842 struct timeval timeout; 3843 uint32_t now = 0; 3844 3845 if ((event & EV_TIMEOUT)) { 3846 /* Connection timed out. */ 3847 cleanup_tcp_handler(data); 3848 return; 3849 } 3850 3851 if ((data->nsd->tcp_query_count > 0 && 3852 data->query_count >= data->nsd->tcp_query_count) || 3853 data->tcp_no_more_queries) { 3854 /* No more queries allowed on this tcp connection. */ 3855 cleanup_tcp_handler(data); 3856 return; 3857 } 3858 3859 assert((event & EV_READ)); 3860 3861 if (data->bytes_transmitted == 0) { 3862 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3863 } 3864 3865 /* 3866 * Check if we received the leading packet length bytes yet. 3867 */ 3868 if (data->bytes_transmitted < sizeof(uint16_t)) { 3869 received = read(fd, 3870 (char *) &data->query->tcplen 3871 + data->bytes_transmitted, 3872 sizeof(uint16_t) - data->bytes_transmitted); 3873 if (received == -1) { 3874 if (errno == EAGAIN || errno == EINTR) { 3875 /* 3876 * Read would block, wait until more 3877 * data is available. 3878 */ 3879 return; 3880 } else { 3881 char buf[48]; 3882 addr2str(&data->query->addr, buf, sizeof(buf)); 3883 #ifdef ECONNRESET 3884 if (verbosity >= 2 || errno != ECONNRESET) 3885 #endif /* ECONNRESET */ 3886 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3887 cleanup_tcp_handler(data); 3888 return; 3889 } 3890 } else if (received == 0) { 3891 /* EOF */ 3892 cleanup_tcp_handler(data); 3893 return; 3894 } 3895 3896 data->bytes_transmitted += received; 3897 if (data->bytes_transmitted < sizeof(uint16_t)) { 3898 /* 3899 * Not done with the tcplen yet, wait for more 3900 * data to become available. 3901 */ 3902 return; 3903 } 3904 3905 assert(data->bytes_transmitted == sizeof(uint16_t)); 3906 3907 data->query->tcplen = ntohs(data->query->tcplen); 3908 3909 /* 3910 * Minimum query size is: 3911 * 3912 * Size of the header (12) 3913 * + Root domain name (1) 3914 * + Query class (2) 3915 * + Query type (2) 3916 */ 3917 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3918 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3919 cleanup_tcp_handler(data); 3920 return; 3921 } 3922 3923 if (data->query->tcplen > data->query->maxlen) { 3924 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3925 cleanup_tcp_handler(data); 3926 return; 3927 } 3928 3929 buffer_set_limit(data->query->packet, data->query->tcplen); 3930 } 3931 3932 assert(buffer_remaining(data->query->packet) > 0); 3933 3934 /* Read the (remaining) query data. */ 3935 received = read(fd, 3936 buffer_current(data->query->packet), 3937 buffer_remaining(data->query->packet)); 3938 if (received == -1) { 3939 if (errno == EAGAIN || errno == EINTR) { 3940 /* 3941 * Read would block, wait until more data is 3942 * available. 3943 */ 3944 return; 3945 } else { 3946 char buf[48]; 3947 addr2str(&data->query->addr, buf, sizeof(buf)); 3948 #ifdef ECONNRESET 3949 if (verbosity >= 2 || errno != ECONNRESET) 3950 #endif /* ECONNRESET */ 3951 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3952 cleanup_tcp_handler(data); 3953 return; 3954 } 3955 } else if (received == 0) { 3956 /* EOF */ 3957 cleanup_tcp_handler(data); 3958 return; 3959 } 3960 3961 data->bytes_transmitted += received; 3962 buffer_skip(data->query->packet, received); 3963 if (buffer_remaining(data->query->packet) > 0) { 3964 /* 3965 * Message not yet complete, wait for more data to 3966 * become available. 3967 */ 3968 return; 3969 } 3970 3971 assert(buffer_position(data->query->packet) == data->query->tcplen); 3972 3973 /* Account... */ 3974 #ifdef BIND8_STATS 3975 #ifndef INET6 3976 STATUP(data->nsd, ctcp); 3977 #else 3978 if (data->query->addr.ss_family == AF_INET) { 3979 STATUP(data->nsd, ctcp); 3980 } else if (data->query->addr.ss_family == AF_INET6) { 3981 STATUP(data->nsd, ctcp6); 3982 } 3983 #endif 3984 #endif /* BIND8_STATS */ 3985 3986 /* We have a complete query, process it. */ 3987 3988 /* tcp-query-count: handle query counter ++ */ 3989 data->query_count++; 3990 3991 buffer_flip(data->query->packet); 3992 #ifdef USE_DNSTAP 3993 /* 3994 * and send TCP-query with found address (local) and client address to dnstap process 3995 */ 3996 log_addr("query from client", &data->query->addr); 3997 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3998 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 3999 data->query->addrlen, data->query->tcp, data->query->packet); 4000 #endif /* USE_DNSTAP */ 4001 data->query_state = server_process_query(data->nsd, data->query, &now); 4002 if (data->query_state == QUERY_DISCARDED) { 4003 /* Drop the packet and the entire connection... */ 4004 STATUP(data->nsd, dropped); 4005 ZTATUP(data->nsd, data->query->zone, dropped); 4006 cleanup_tcp_handler(data); 4007 return; 4008 } 4009 4010 #ifdef BIND8_STATS 4011 if (RCODE(data->query->packet) == RCODE_OK 4012 && !AA(data->query->packet)) 4013 { 4014 STATUP(data->nsd, nona); 4015 ZTATUP(data->nsd, data->query->zone, nona); 4016 } 4017 #endif /* BIND8_STATS */ 4018 4019 #ifdef USE_ZONE_STATS 4020 #ifndef INET6 4021 ZTATUP(data->nsd, data->query->zone, ctcp); 4022 #else 4023 if (data->query->addr.ss_family == AF_INET) { 4024 ZTATUP(data->nsd, data->query->zone, ctcp); 4025 } else if (data->query->addr.ss_family == AF_INET6) { 4026 ZTATUP(data->nsd, data->query->zone, ctcp6); 4027 } 4028 #endif 4029 #endif /* USE_ZONE_STATS */ 4030 4031 query_add_optional(data->query, data->nsd, &now); 4032 4033 /* Switch to the tcp write handler. */ 4034 buffer_flip(data->query->packet); 4035 data->query->tcplen = buffer_remaining(data->query->packet); 4036 #ifdef BIND8_STATS 4037 /* Account the rcode & TC... */ 4038 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4039 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4040 if (TC(data->query->packet)) { 4041 STATUP(data->nsd, truncated); 4042 ZTATUP(data->nsd, data->query->zone, truncated); 4043 } 4044 #endif /* BIND8_STATS */ 4045 #ifdef USE_DNSTAP 4046 /* 4047 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4048 */ 4049 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4050 log_addr("response to client", &data->query->addr); 4051 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4052 data->query->addrlen, data->query->tcp, data->query->packet, 4053 data->query->zone); 4054 #endif /* USE_DNSTAP */ 4055 data->bytes_transmitted = 0; 4056 4057 timeout.tv_sec = data->tcp_timeout / 1000; 4058 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4059 4060 ev_base = data->event.ev_base; 4061 event_del(&data->event); 4062 memset(&data->event, 0, sizeof(data->event)); 4063 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4064 handle_tcp_reading, data); 4065 if(event_base_set(ev_base, &data->event) != 0) 4066 log_msg(LOG_ERR, "event base set tcpr failed"); 4067 if(event_add(&data->event, &timeout) != 0) 4068 log_msg(LOG_ERR, "event add tcpr failed"); 4069 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4070 handle_tcp_writing(fd, EV_WRITE, data); 4071 } 4072 4073 static void 4074 handle_tcp_writing(int fd, short event, void* arg) 4075 { 4076 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4077 ssize_t sent; 4078 struct query *q = data->query; 4079 struct timeval timeout; 4080 struct event_base* ev_base; 4081 uint32_t now = 0; 4082 4083 if ((event & EV_TIMEOUT)) { 4084 /* Connection timed out. */ 4085 cleanup_tcp_handler(data); 4086 return; 4087 } 4088 4089 assert((event & EV_WRITE)); 4090 4091 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4092 /* Writing the response packet length. */ 4093 uint16_t n_tcplen = htons(q->tcplen); 4094 #ifdef HAVE_WRITEV 4095 struct iovec iov[2]; 4096 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 4097 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 4098 iov[1].iov_base = buffer_begin(q->packet); 4099 iov[1].iov_len = buffer_limit(q->packet); 4100 sent = writev(fd, iov, 2); 4101 #else /* HAVE_WRITEV */ 4102 sent = write(fd, 4103 (const char *) &n_tcplen + data->bytes_transmitted, 4104 sizeof(n_tcplen) - data->bytes_transmitted); 4105 #endif /* HAVE_WRITEV */ 4106 if (sent == -1) { 4107 if (errno == EAGAIN || errno == EINTR) { 4108 /* 4109 * Write would block, wait until 4110 * socket becomes writable again. 4111 */ 4112 return; 4113 } else { 4114 #ifdef ECONNRESET 4115 if(verbosity >= 2 || errno != ECONNRESET) 4116 #endif /* ECONNRESET */ 4117 #ifdef EPIPE 4118 if(verbosity >= 2 || errno != EPIPE) 4119 #endif /* EPIPE 'broken pipe' */ 4120 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4121 cleanup_tcp_handler(data); 4122 return; 4123 } 4124 } 4125 4126 data->bytes_transmitted += sent; 4127 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4128 /* 4129 * Writing not complete, wait until socket 4130 * becomes writable again. 4131 */ 4132 return; 4133 } 4134 4135 #ifdef HAVE_WRITEV 4136 sent -= sizeof(n_tcplen); 4137 /* handle potential 'packet done' code */ 4138 goto packet_could_be_done; 4139 #endif 4140 } 4141 4142 sent = write(fd, 4143 buffer_current(q->packet), 4144 buffer_remaining(q->packet)); 4145 if (sent == -1) { 4146 if (errno == EAGAIN || errno == EINTR) { 4147 /* 4148 * Write would block, wait until 4149 * socket becomes writable again. 4150 */ 4151 return; 4152 } else { 4153 #ifdef ECONNRESET 4154 if(verbosity >= 2 || errno != ECONNRESET) 4155 #endif /* ECONNRESET */ 4156 #ifdef EPIPE 4157 if(verbosity >= 2 || errno != EPIPE) 4158 #endif /* EPIPE 'broken pipe' */ 4159 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4160 cleanup_tcp_handler(data); 4161 return; 4162 } 4163 } 4164 4165 data->bytes_transmitted += sent; 4166 #ifdef HAVE_WRITEV 4167 packet_could_be_done: 4168 #endif 4169 buffer_skip(q->packet, sent); 4170 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4171 /* 4172 * Still more data to write when socket becomes 4173 * writable again. 4174 */ 4175 return; 4176 } 4177 4178 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4179 4180 if (data->query_state == QUERY_IN_AXFR || 4181 data->query_state == QUERY_IN_IXFR) { 4182 /* Continue processing AXFR and writing back results. */ 4183 buffer_clear(q->packet); 4184 if(data->query_state == QUERY_IN_AXFR) 4185 data->query_state = query_axfr(data->nsd, q, 0); 4186 else data->query_state = query_ixfr(data->nsd, q); 4187 if (data->query_state != QUERY_PROCESSED) { 4188 query_add_optional(data->query, data->nsd, &now); 4189 4190 /* Reset data. */ 4191 buffer_flip(q->packet); 4192 q->tcplen = buffer_remaining(q->packet); 4193 data->bytes_transmitted = 0; 4194 /* Reset timeout. */ 4195 timeout.tv_sec = data->tcp_timeout / 1000; 4196 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4197 ev_base = data->event.ev_base; 4198 event_del(&data->event); 4199 memset(&data->event, 0, sizeof(data->event)); 4200 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4201 handle_tcp_writing, data); 4202 if(event_base_set(ev_base, &data->event) != 0) 4203 log_msg(LOG_ERR, "event base set tcpw failed"); 4204 if(event_add(&data->event, &timeout) != 0) 4205 log_msg(LOG_ERR, "event add tcpw failed"); 4206 4207 /* 4208 * Write data if/when the socket is writable 4209 * again. 4210 */ 4211 return; 4212 } 4213 } 4214 4215 /* 4216 * Done sending, wait for the next request to arrive on the 4217 * TCP socket by installing the TCP read handler. 4218 */ 4219 if ((data->nsd->tcp_query_count > 0 && 4220 data->query_count >= data->nsd->tcp_query_count) || 4221 data->tcp_no_more_queries) { 4222 4223 (void) shutdown(fd, SHUT_WR); 4224 } 4225 4226 data->bytes_transmitted = 0; 4227 4228 timeout.tv_sec = data->tcp_timeout / 1000; 4229 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4230 ev_base = data->event.ev_base; 4231 event_del(&data->event); 4232 memset(&data->event, 0, sizeof(data->event)); 4233 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4234 handle_tcp_reading, data); 4235 if(event_base_set(ev_base, &data->event) != 0) 4236 log_msg(LOG_ERR, "event base set tcpw failed"); 4237 if(event_add(&data->event, &timeout) != 0) 4238 log_msg(LOG_ERR, "event add tcpw failed"); 4239 } 4240 4241 #ifdef HAVE_SSL 4242 /** create SSL object and associate fd */ 4243 static SSL* 4244 incoming_ssl_fd(SSL_CTX* ctx, int fd) 4245 { 4246 SSL* ssl = SSL_new((SSL_CTX*)ctx); 4247 if(!ssl) { 4248 log_crypto_err("could not SSL_new"); 4249 return NULL; 4250 } 4251 SSL_set_accept_state(ssl); 4252 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 4253 if(!SSL_set_fd(ssl, fd)) { 4254 log_crypto_err("could not SSL_set_fd"); 4255 SSL_free(ssl); 4256 return NULL; 4257 } 4258 return ssl; 4259 } 4260 4261 /** TLS handshake to upgrade TCP connection */ 4262 static int 4263 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 4264 { 4265 int r; 4266 if(data->shake_state == tls_hs_read_event) { 4267 /* read condition satisfied back to writing */ 4268 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4269 data->shake_state = tls_hs_none; 4270 return 1; 4271 } 4272 if(data->shake_state == tls_hs_write_event) { 4273 /* write condition satisfied back to reading */ 4274 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4275 data->shake_state = tls_hs_none; 4276 return 1; 4277 } 4278 4279 /* (continue to) setup the TLS connection */ 4280 ERR_clear_error(); 4281 r = SSL_do_handshake(data->tls); 4282 4283 if(r != 1) { 4284 int want = SSL_get_error(data->tls, r); 4285 if(want == SSL_ERROR_WANT_READ) { 4286 if(data->shake_state == tls_hs_read) { 4287 /* try again later */ 4288 return 1; 4289 } 4290 data->shake_state = tls_hs_read; 4291 /* switch back to reading mode */ 4292 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4293 return 1; 4294 } else if(want == SSL_ERROR_WANT_WRITE) { 4295 if(data->shake_state == tls_hs_write) { 4296 /* try again later */ 4297 return 1; 4298 } 4299 data->shake_state = tls_hs_write; 4300 /* switch back to writing mode */ 4301 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4302 return 1; 4303 } else { 4304 if(r == 0) 4305 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 4306 else { 4307 unsigned long err = ERR_get_error(); 4308 if(!squelch_err_ssl_handshake(err)) { 4309 char a[64], s[256]; 4310 addr2str(&data->query->addr, a, sizeof(a)); 4311 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 4312 log_crypto_from_err(s, err); 4313 } 4314 } 4315 cleanup_tcp_handler(data); 4316 return 0; 4317 } 4318 } 4319 4320 /* Use to log successful upgrade for testing - could be removed*/ 4321 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 4322 /* set back to the event we need to have when reading (or writing) */ 4323 if(data->shake_state == tls_hs_read && writing) { 4324 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4325 } else if(data->shake_state == tls_hs_write && !writing) { 4326 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4327 } 4328 data->shake_state = tls_hs_none; 4329 return 1; 4330 } 4331 4332 /** handle TLS reading of incoming query */ 4333 static void 4334 handle_tls_reading(int fd, short event, void* arg) 4335 { 4336 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4337 ssize_t received; 4338 uint32_t now = 0; 4339 4340 if ((event & EV_TIMEOUT)) { 4341 /* Connection timed out. */ 4342 cleanup_tcp_handler(data); 4343 return; 4344 } 4345 4346 if ((data->nsd->tcp_query_count > 0 && 4347 data->query_count >= data->nsd->tcp_query_count) || 4348 data->tcp_no_more_queries) { 4349 /* No more queries allowed on this tcp connection. */ 4350 cleanup_tcp_handler(data); 4351 return; 4352 } 4353 4354 assert((event & EV_READ)); 4355 4356 if (data->bytes_transmitted == 0) { 4357 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4358 } 4359 4360 if(data->shake_state != tls_hs_none) { 4361 if(!tls_handshake(data, fd, 0)) 4362 return; 4363 if(data->shake_state != tls_hs_none) 4364 return; 4365 } 4366 4367 /* 4368 * Check if we received the leading packet length bytes yet. 4369 */ 4370 if(data->bytes_transmitted < sizeof(uint16_t)) { 4371 ERR_clear_error(); 4372 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 4373 + data->bytes_transmitted, 4374 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 4375 int want = SSL_get_error(data->tls, received); 4376 if(want == SSL_ERROR_ZERO_RETURN) { 4377 cleanup_tcp_handler(data); 4378 return; /* shutdown, closed */ 4379 } else if(want == SSL_ERROR_WANT_READ) { 4380 /* wants to be called again */ 4381 return; 4382 } 4383 else if(want == SSL_ERROR_WANT_WRITE) { 4384 /* switch to writing */ 4385 data->shake_state = tls_hs_write_event; 4386 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4387 return; 4388 } 4389 cleanup_tcp_handler(data); 4390 log_crypto_err("could not SSL_read"); 4391 return; 4392 } 4393 4394 data->bytes_transmitted += received; 4395 if (data->bytes_transmitted < sizeof(uint16_t)) { 4396 /* 4397 * Not done with the tcplen yet, wait for more 4398 * data to become available. 4399 */ 4400 return; 4401 } 4402 4403 assert(data->bytes_transmitted == sizeof(uint16_t)); 4404 4405 data->query->tcplen = ntohs(data->query->tcplen); 4406 4407 /* 4408 * Minimum query size is: 4409 * 4410 * Size of the header (12) 4411 * + Root domain name (1) 4412 * + Query class (2) 4413 * + Query type (2) 4414 */ 4415 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4416 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4417 cleanup_tcp_handler(data); 4418 return; 4419 } 4420 4421 if (data->query->tcplen > data->query->maxlen) { 4422 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4423 cleanup_tcp_handler(data); 4424 return; 4425 } 4426 4427 buffer_set_limit(data->query->packet, data->query->tcplen); 4428 } 4429 4430 assert(buffer_remaining(data->query->packet) > 0); 4431 4432 /* Read the (remaining) query data. */ 4433 ERR_clear_error(); 4434 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 4435 (int)buffer_remaining(data->query->packet)); 4436 if(received <= 0) { 4437 int want = SSL_get_error(data->tls, received); 4438 if(want == SSL_ERROR_ZERO_RETURN) { 4439 cleanup_tcp_handler(data); 4440 return; /* shutdown, closed */ 4441 } else if(want == SSL_ERROR_WANT_READ) { 4442 /* wants to be called again */ 4443 return; 4444 } 4445 else if(want == SSL_ERROR_WANT_WRITE) { 4446 /* switch back writing */ 4447 data->shake_state = tls_hs_write_event; 4448 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4449 return; 4450 } 4451 cleanup_tcp_handler(data); 4452 log_crypto_err("could not SSL_read"); 4453 return; 4454 } 4455 4456 data->bytes_transmitted += received; 4457 buffer_skip(data->query->packet, received); 4458 if (buffer_remaining(data->query->packet) > 0) { 4459 /* 4460 * Message not yet complete, wait for more data to 4461 * become available. 4462 */ 4463 return; 4464 } 4465 4466 assert(buffer_position(data->query->packet) == data->query->tcplen); 4467 4468 /* Account... */ 4469 #ifndef INET6 4470 STATUP(data->nsd, ctls); 4471 #else 4472 if (data->query->addr.ss_family == AF_INET) { 4473 STATUP(data->nsd, ctls); 4474 } else if (data->query->addr.ss_family == AF_INET6) { 4475 STATUP(data->nsd, ctls6); 4476 } 4477 #endif 4478 4479 /* We have a complete query, process it. */ 4480 4481 /* tcp-query-count: handle query counter ++ */ 4482 data->query_count++; 4483 4484 buffer_flip(data->query->packet); 4485 #ifdef USE_DNSTAP 4486 /* 4487 * and send TCP-query with found address (local) and client address to dnstap process 4488 */ 4489 log_addr("query from client", &data->query->addr); 4490 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4491 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4492 data->query->addrlen, data->query->tcp, data->query->packet); 4493 #endif /* USE_DNSTAP */ 4494 data->query_state = server_process_query(data->nsd, data->query, &now); 4495 if (data->query_state == QUERY_DISCARDED) { 4496 /* Drop the packet and the entire connection... */ 4497 STATUP(data->nsd, dropped); 4498 ZTATUP(data->nsd, data->query->zone, dropped); 4499 cleanup_tcp_handler(data); 4500 return; 4501 } 4502 4503 #ifdef BIND8_STATS 4504 if (RCODE(data->query->packet) == RCODE_OK 4505 && !AA(data->query->packet)) 4506 { 4507 STATUP(data->nsd, nona); 4508 ZTATUP(data->nsd, data->query->zone, nona); 4509 } 4510 #endif /* BIND8_STATS */ 4511 4512 #ifdef USE_ZONE_STATS 4513 #ifndef INET6 4514 ZTATUP(data->nsd, data->query->zone, ctls); 4515 #else 4516 if (data->query->addr.ss_family == AF_INET) { 4517 ZTATUP(data->nsd, data->query->zone, ctls); 4518 } else if (data->query->addr.ss_family == AF_INET6) { 4519 ZTATUP(data->nsd, data->query->zone, ctls6); 4520 } 4521 #endif 4522 #endif /* USE_ZONE_STATS */ 4523 4524 query_add_optional(data->query, data->nsd, &now); 4525 4526 /* Switch to the tcp write handler. */ 4527 buffer_flip(data->query->packet); 4528 data->query->tcplen = buffer_remaining(data->query->packet); 4529 #ifdef BIND8_STATS 4530 /* Account the rcode & TC... */ 4531 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4532 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4533 if (TC(data->query->packet)) { 4534 STATUP(data->nsd, truncated); 4535 ZTATUP(data->nsd, data->query->zone, truncated); 4536 } 4537 #endif /* BIND8_STATS */ 4538 #ifdef USE_DNSTAP 4539 /* 4540 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4541 */ 4542 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4543 log_addr("response to client", &data->query->addr); 4544 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4545 data->query->addrlen, data->query->tcp, data->query->packet, 4546 data->query->zone); 4547 #endif /* USE_DNSTAP */ 4548 data->bytes_transmitted = 0; 4549 4550 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4551 4552 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4553 handle_tls_writing(fd, EV_WRITE, data); 4554 } 4555 4556 /** handle TLS writing of outgoing response */ 4557 static void 4558 handle_tls_writing(int fd, short event, void* arg) 4559 { 4560 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4561 ssize_t sent; 4562 struct query *q = data->query; 4563 /* static variable that holds reassembly buffer used to put the 4564 * TCP length in front of the packet, like writev. */ 4565 static buffer_type* global_tls_temp_buffer = NULL; 4566 buffer_type* write_buffer; 4567 uint32_t now = 0; 4568 4569 if ((event & EV_TIMEOUT)) { 4570 /* Connection timed out. */ 4571 cleanup_tcp_handler(data); 4572 return; 4573 } 4574 4575 assert((event & EV_WRITE)); 4576 4577 if(data->shake_state != tls_hs_none) { 4578 if(!tls_handshake(data, fd, 1)) 4579 return; 4580 if(data->shake_state != tls_hs_none) 4581 return; 4582 } 4583 4584 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 4585 4586 /* If we are writing the start of a message, we must include the length 4587 * this is done with a copy into write_buffer. */ 4588 write_buffer = NULL; 4589 if (data->bytes_transmitted == 0) { 4590 if(!global_tls_temp_buffer) { 4591 /* gets deallocated when nsd shuts down from 4592 * nsd.region */ 4593 global_tls_temp_buffer = buffer_create(nsd.region, 4594 QIOBUFSZ + sizeof(q->tcplen)); 4595 if (!global_tls_temp_buffer) { 4596 return; 4597 } 4598 } 4599 write_buffer = global_tls_temp_buffer; 4600 buffer_clear(write_buffer); 4601 buffer_write_u16(write_buffer, q->tcplen); 4602 buffer_write(write_buffer, buffer_current(q->packet), 4603 (int)buffer_remaining(q->packet)); 4604 buffer_flip(write_buffer); 4605 } else { 4606 write_buffer = q->packet; 4607 } 4608 4609 /* Write the response */ 4610 ERR_clear_error(); 4611 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 4612 if(sent <= 0) { 4613 int want = SSL_get_error(data->tls, sent); 4614 if(want == SSL_ERROR_ZERO_RETURN) { 4615 cleanup_tcp_handler(data); 4616 /* closed */ 4617 } else if(want == SSL_ERROR_WANT_READ) { 4618 /* switch back to reading */ 4619 data->shake_state = tls_hs_read_event; 4620 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4621 } else if(want != SSL_ERROR_WANT_WRITE) { 4622 cleanup_tcp_handler(data); 4623 log_crypto_err("could not SSL_write"); 4624 } 4625 return; 4626 } 4627 4628 buffer_skip(write_buffer, sent); 4629 if(buffer_remaining(write_buffer) != 0) { 4630 /* If not all sent, sync up the real buffer if it wasn't used.*/ 4631 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 4632 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 4633 } 4634 } 4635 4636 data->bytes_transmitted += sent; 4637 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4638 /* 4639 * Still more data to write when socket becomes 4640 * writable again. 4641 */ 4642 return; 4643 } 4644 4645 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4646 4647 if (data->query_state == QUERY_IN_AXFR || 4648 data->query_state == QUERY_IN_IXFR) { 4649 /* Continue processing AXFR and writing back results. */ 4650 buffer_clear(q->packet); 4651 if(data->query_state == QUERY_IN_AXFR) 4652 data->query_state = query_axfr(data->nsd, q, 0); 4653 else data->query_state = query_ixfr(data->nsd, q); 4654 if (data->query_state != QUERY_PROCESSED) { 4655 query_add_optional(data->query, data->nsd, &now); 4656 4657 /* Reset data. */ 4658 buffer_flip(q->packet); 4659 q->tcplen = buffer_remaining(q->packet); 4660 data->bytes_transmitted = 0; 4661 /* Reset to writing mode. */ 4662 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4663 4664 /* 4665 * Write data if/when the socket is writable 4666 * again. 4667 */ 4668 return; 4669 } 4670 } 4671 4672 /* 4673 * Done sending, wait for the next request to arrive on the 4674 * TCP socket by installing the TCP read handler. 4675 */ 4676 if ((data->nsd->tcp_query_count > 0 && 4677 data->query_count >= data->nsd->tcp_query_count) || 4678 data->tcp_no_more_queries) { 4679 4680 (void) shutdown(fd, SHUT_WR); 4681 } 4682 4683 data->bytes_transmitted = 0; 4684 4685 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4686 } 4687 #endif 4688 4689 static void 4690 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4691 void* ATTR_UNUSED(arg)) 4692 { 4693 if(slowaccept) { 4694 configure_handler_event_types(EV_PERSIST | EV_READ); 4695 slowaccept = 0; 4696 } 4697 } 4698 4699 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4700 { 4701 #ifndef HAVE_ACCEPT4 4702 int s = accept(fd, addr, addrlen); 4703 if (s != -1) { 4704 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4705 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4706 close(s); 4707 s = -1; 4708 errno=EINTR; /* stop error printout as error in accept4 4709 by setting this errno, it omits printout, in 4710 later code that calls nsd_accept4 */ 4711 } 4712 } 4713 return s; 4714 #else 4715 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4716 #endif /* HAVE_ACCEPT4 */ 4717 } 4718 4719 /* 4720 * Handle an incoming TCP connection. The connection is accepted and 4721 * a new TCP reader event handler is added. The TCP handler 4722 * is responsible for cleanup when the connection is closed. 4723 */ 4724 static void 4725 handle_tcp_accept(int fd, short event, void* arg) 4726 { 4727 struct tcp_accept_handler_data *data 4728 = (struct tcp_accept_handler_data *) arg; 4729 int s; 4730 int reject = 0; 4731 struct tcp_handler_data *tcp_data; 4732 region_type *tcp_region; 4733 #ifdef INET6 4734 struct sockaddr_storage addr; 4735 #else 4736 struct sockaddr_in addr; 4737 #endif 4738 socklen_t addrlen; 4739 struct timeval timeout; 4740 4741 if (!(event & EV_READ)) { 4742 return; 4743 } 4744 4745 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4746 reject = data->nsd->options->tcp_reject_overflow; 4747 if (!reject) { 4748 return; 4749 } 4750 } 4751 4752 /* Accept it... */ 4753 addrlen = sizeof(addr); 4754 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4755 if (s == -1) { 4756 /** 4757 * EMFILE and ENFILE is a signal that the limit of open 4758 * file descriptors has been reached. Pause accept(). 4759 * EINTR is a signal interrupt. The others are various OS ways 4760 * of saying that the client has closed the connection. 4761 */ 4762 if (errno == EMFILE || errno == ENFILE) { 4763 if (!slowaccept) { 4764 /* disable accept events */ 4765 struct timeval tv; 4766 configure_handler_event_types(0); 4767 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4768 tv.tv_usec = 0L; 4769 memset(&slowaccept_event, 0, 4770 sizeof(slowaccept_event)); 4771 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4772 handle_slowaccept_timeout, NULL); 4773 (void)event_base_set(data->event.ev_base, 4774 &slowaccept_event); 4775 (void)event_add(&slowaccept_event, &tv); 4776 slowaccept = 1; 4777 /* We don't want to spam the logs here */ 4778 } 4779 } else if (errno != EINTR 4780 && errno != EWOULDBLOCK 4781 #ifdef ECONNABORTED 4782 && errno != ECONNABORTED 4783 #endif /* ECONNABORTED */ 4784 #ifdef EPROTO 4785 && errno != EPROTO 4786 #endif /* EPROTO */ 4787 ) { 4788 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4789 } 4790 return; 4791 } 4792 4793 if (reject) { 4794 shutdown(s, SHUT_RDWR); 4795 close(s); 4796 return; 4797 } 4798 4799 /* 4800 * This region is deallocated when the TCP connection is 4801 * closed by the TCP handler. 4802 */ 4803 tcp_region = region_create(xalloc, free); 4804 tcp_data = (struct tcp_handler_data *) region_alloc( 4805 tcp_region, sizeof(struct tcp_handler_data)); 4806 tcp_data->region = tcp_region; 4807 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4808 compression_table_size, compressed_dnames); 4809 tcp_data->nsd = data->nsd; 4810 tcp_data->query_count = 0; 4811 #ifdef HAVE_SSL 4812 tcp_data->shake_state = tls_hs_none; 4813 tcp_data->tls = NULL; 4814 #endif 4815 tcp_data->prev = NULL; 4816 tcp_data->next = NULL; 4817 4818 tcp_data->query_state = QUERY_PROCESSED; 4819 tcp_data->bytes_transmitted = 0; 4820 memcpy(&tcp_data->query->addr, &addr, addrlen); 4821 tcp_data->query->addrlen = addrlen; 4822 4823 tcp_data->tcp_no_more_queries = 0; 4824 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4825 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4826 /* very busy, give smaller timeout */ 4827 tcp_data->tcp_timeout = 200; 4828 } 4829 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4830 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4831 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4832 4833 #ifdef USE_DNSTAP 4834 /* save the address of the connection */ 4835 tcp_data->socket = data->socket; 4836 #endif /* USE_DNSTAP */ 4837 4838 #ifdef HAVE_SSL 4839 if (data->tls_accept) { 4840 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4841 if(!tcp_data->tls) { 4842 close(s); 4843 return; 4844 } 4845 tcp_data->shake_state = tls_hs_read; 4846 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4847 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4848 handle_tls_reading, tcp_data); 4849 } else { 4850 #endif 4851 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4852 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4853 handle_tcp_reading, tcp_data); 4854 #ifdef HAVE_SSL 4855 } 4856 #endif 4857 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4858 log_msg(LOG_ERR, "cannot set tcp event base"); 4859 close(s); 4860 region_destroy(tcp_region); 4861 return; 4862 } 4863 if(event_add(&tcp_data->event, &timeout) != 0) { 4864 log_msg(LOG_ERR, "cannot add tcp to event base"); 4865 close(s); 4866 region_destroy(tcp_region); 4867 return; 4868 } 4869 if(tcp_active_list) { 4870 tcp_active_list->prev = tcp_data; 4871 tcp_data->next = tcp_active_list; 4872 } 4873 tcp_active_list = tcp_data; 4874 4875 /* 4876 * Keep track of the total number of TCP handlers installed so 4877 * we can stop accepting connections when the maximum number 4878 * of simultaneous TCP connections is reached. 4879 * 4880 * If tcp-reject-overflow is enabled, however, then we do not 4881 * change the handler event type; we keep it as-is and accept 4882 * overflow TCP connections only so that we can forcibly kill 4883 * them off. 4884 */ 4885 ++data->nsd->current_tcp_count; 4886 if (!data->nsd->options->tcp_reject_overflow && 4887 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4888 { 4889 configure_handler_event_types(0); 4890 } 4891 } 4892 4893 static void 4894 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4895 { 4896 size_t i; 4897 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4898 for (i = 0; i < nsd->child_count; ++i) { 4899 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4900 if (write(nsd->children[i].child_fd, 4901 &command, 4902 sizeof(command)) == -1) 4903 { 4904 if(errno != EAGAIN && errno != EINTR) 4905 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4906 (int) command, 4907 (int) nsd->children[i].pid, 4908 strerror(errno)); 4909 } else if (timeout > 0) { 4910 (void)block_read(NULL, 4911 nsd->children[i].child_fd, 4912 &command, sizeof(command), timeout); 4913 } 4914 fsync(nsd->children[i].child_fd); 4915 close(nsd->children[i].child_fd); 4916 nsd->children[i].child_fd = -1; 4917 } 4918 } 4919 } 4920 4921 static void 4922 send_children_quit(struct nsd* nsd) 4923 { 4924 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4925 send_children_command(nsd, NSD_QUIT, 0); 4926 } 4927 4928 static void 4929 send_children_quit_and_wait(struct nsd* nsd) 4930 { 4931 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4932 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4933 } 4934 4935 #ifdef BIND8_STATS 4936 static void 4937 set_children_stats(struct nsd* nsd) 4938 { 4939 size_t i; 4940 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4941 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4942 for (i = 0; i < nsd->child_count; ++i) { 4943 nsd->children[i].need_to_send_STATS = 1; 4944 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4945 } 4946 } 4947 #endif /* BIND8_STATS */ 4948 4949 static void 4950 configure_handler_event_types(short event_types) 4951 { 4952 size_t i; 4953 4954 for (i = 0; i < tcp_accept_handler_count; ++i) { 4955 struct event* handler = &tcp_accept_handlers[i].event; 4956 if(event_types) { 4957 /* reassign */ 4958 int fd = handler->ev_fd; 4959 struct event_base* base = handler->ev_base; 4960 if(tcp_accept_handlers[i].event_added) 4961 event_del(handler); 4962 memset(handler, 0, sizeof(*handler)); 4963 event_set(handler, fd, event_types, 4964 handle_tcp_accept, &tcp_accept_handlers[i]); 4965 if(event_base_set(base, handler) != 0) 4966 log_msg(LOG_ERR, "conhand: cannot event_base"); 4967 if(event_add(handler, NULL) != 0) 4968 log_msg(LOG_ERR, "conhand: cannot event_add"); 4969 tcp_accept_handlers[i].event_added = 1; 4970 } else { 4971 /* remove */ 4972 if(tcp_accept_handlers[i].event_added) { 4973 event_del(handler); 4974 tcp_accept_handlers[i].event_added = 0; 4975 } 4976 } 4977 } 4978 } 4979