1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifdef HAVE_SYS_RANDOM_H 39 #include <sys/random.h> 40 #endif 41 #ifndef SHUT_WR 42 #define SHUT_WR 1 43 #endif 44 #ifdef HAVE_MMAP 45 #include <sys/mman.h> 46 #endif /* HAVE_MMAP */ 47 #ifdef HAVE_OPENSSL_RAND_H 48 #include <openssl/rand.h> 49 #endif 50 #ifdef HAVE_OPENSSL_SSL_H 51 #include <openssl/ssl.h> 52 #endif 53 #ifdef HAVE_OPENSSL_ERR_H 54 #include <openssl/err.h> 55 #endif 56 #ifdef HAVE_OPENSSL_OCSP_H 57 #include <openssl/ocsp.h> 58 #endif 59 #ifndef USE_MINI_EVENT 60 # ifdef HAVE_EVENT_H 61 # include <event.h> 62 # else 63 # include <event2/event.h> 64 # include "event2/event_struct.h" 65 # include "event2/event_compat.h" 66 # endif 67 #else 68 # include "mini_event.h" 69 #endif 70 71 #include "axfr.h" 72 #include "namedb.h" 73 #include "netio.h" 74 #include "xfrd.h" 75 #include "xfrd-tcp.h" 76 #include "xfrd-disk.h" 77 #include "difffile.h" 78 #include "nsec3.h" 79 #include "ipc.h" 80 #include "udb.h" 81 #include "remote.h" 82 #include "lookup3.h" 83 #include "rrl.h" 84 #include "ixfr.h" 85 #ifdef USE_DNSTAP 86 #include "dnstap/dnstap_collector.h" 87 #endif 88 #include "verify.h" 89 90 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 91 92 #ifdef USE_DNSTAP 93 /* 94 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content 95 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*) 96 */ 97 static void 98 log_addr(const char* descr, 99 #ifdef INET6 100 struct sockaddr_storage* addr 101 #else 102 struct sockaddr_in* addr 103 #endif 104 ) 105 { 106 char str_buf[64]; 107 if(verbosity < 6) 108 return; 109 if( 110 #ifdef INET6 111 addr->ss_family == AF_INET 112 #else 113 addr->sin_family == AF_INET 114 #endif 115 ) { 116 struct sockaddr_in* s = (struct sockaddr_in*)addr; 117 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf)); 118 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port))); 119 #ifdef INET6 120 } else { 121 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr; 122 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf)); 123 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port))); 124 #endif 125 } 126 } 127 #endif /* USE_DNSTAP */ 128 129 #ifdef USE_TCP_FASTOPEN 130 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 131 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 132 #endif 133 134 /* 135 * Data for the UDP handlers. 136 */ 137 struct udp_handler_data 138 { 139 struct nsd *nsd; 140 struct nsd_socket *socket; 141 struct event event; 142 }; 143 144 struct tcp_accept_handler_data { 145 struct nsd *nsd; 146 struct nsd_socket *socket; 147 int event_added; 148 struct event event; 149 #ifdef HAVE_SSL 150 /* handler accepts TLS connections on the dedicated port */ 151 int tls_accept; 152 #endif 153 }; 154 155 /* 156 * These globals are used to enable the TCP accept handlers 157 * when the number of TCP connection drops below the maximum 158 * number of TCP connections. 159 */ 160 static size_t tcp_accept_handler_count; 161 static struct tcp_accept_handler_data *tcp_accept_handlers; 162 163 static struct event slowaccept_event; 164 static int slowaccept; 165 166 #ifdef HAVE_SSL 167 static unsigned char *ocspdata = NULL; 168 static long ocspdata_len = 0; 169 #endif 170 171 #ifdef NONBLOCKING_IS_BROKEN 172 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 173 read multiple times from a socket when reported ready by select. */ 174 # define NUM_RECV_PER_SELECT (1) 175 #else /* !NONBLOCKING_IS_BROKEN */ 176 # define NUM_RECV_PER_SELECT (100) 177 #endif /* NONBLOCKING_IS_BROKEN */ 178 179 #ifndef HAVE_MMSGHDR 180 struct mmsghdr { 181 struct msghdr msg_hdr; 182 unsigned int msg_len; 183 }; 184 #endif 185 186 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 187 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 188 static struct query *queries[NUM_RECV_PER_SELECT]; 189 190 /* 191 * Data for the TCP connection handlers. 192 * 193 * The TCP handlers use non-blocking I/O. This is necessary to avoid 194 * blocking the entire server on a slow TCP connection, but does make 195 * reading from and writing to the socket more complicated. 196 * 197 * Basically, whenever a read/write would block (indicated by the 198 * EAGAIN errno variable) we remember the position we were reading 199 * from/writing to and return from the TCP reading/writing event 200 * handler. When the socket becomes readable/writable again we 201 * continue from the same position. 202 */ 203 struct tcp_handler_data 204 { 205 /* 206 * The region used to allocate all TCP connection related 207 * data, including this structure. This region is destroyed 208 * when the connection is closed. 209 */ 210 region_type* region; 211 212 /* 213 * The global nsd structure. 214 */ 215 struct nsd* nsd; 216 217 /* 218 * The current query data for this TCP connection. 219 */ 220 query_type* query; 221 222 /* 223 * The query_state is used to remember if we are performing an 224 * AXFR, if we're done processing, or if we should discard the 225 * query and connection. 226 */ 227 query_state_type query_state; 228 229 /* 230 * The event for the file descriptor and tcp timeout 231 */ 232 struct event event; 233 234 /* 235 * The bytes_transmitted field is used to remember the number 236 * of bytes transmitted when receiving or sending a DNS 237 * packet. The count includes the two additional bytes used 238 * to specify the packet length on a TCP connection. 239 */ 240 size_t bytes_transmitted; 241 242 /* 243 * The number of queries handled by this specific TCP connection. 244 */ 245 int query_count; 246 247 /* 248 * The timeout in msec for this tcp connection 249 */ 250 int tcp_timeout; 251 252 /* 253 * If the connection is allowed to have further queries on it. 254 */ 255 int tcp_no_more_queries; 256 257 #ifdef USE_DNSTAP 258 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */ 259 struct nsd_socket *socket; 260 #endif /* USE_DNSTAP */ 261 262 #ifdef HAVE_SSL 263 /* 264 * TLS object. 265 */ 266 SSL* tls; 267 268 /* 269 * TLS handshake state. 270 */ 271 enum { tls_hs_none, tls_hs_read, tls_hs_write, 272 tls_hs_read_event, tls_hs_write_event } shake_state; 273 #endif 274 /* list of connections, for service of remaining tcp channels */ 275 struct tcp_handler_data *prev, *next; 276 }; 277 /* global that is the list of active tcp channels */ 278 static struct tcp_handler_data *tcp_active_list = NULL; 279 280 /* 281 * Handle incoming queries on the UDP server sockets. 282 */ 283 static void handle_udp(int fd, short event, void* arg); 284 285 /* 286 * Handle incoming connections on the TCP sockets. These handlers 287 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 288 * connection) but are disabled when the number of current TCP 289 * connections is equal to the maximum number of TCP connections. 290 * Disabling is done by changing the handler to wait for the 291 * NETIO_EVENT_NONE type. This is done using the function 292 * configure_tcp_accept_handlers. 293 */ 294 static void handle_tcp_accept(int fd, short event, void* arg); 295 296 /* 297 * Handle incoming queries on a TCP connection. The TCP connections 298 * are configured to be non-blocking and the handler may be called 299 * multiple times before a complete query is received. 300 */ 301 static void handle_tcp_reading(int fd, short event, void* arg); 302 303 /* 304 * Handle outgoing responses on a TCP connection. The TCP connections 305 * are configured to be non-blocking and the handler may be called 306 * multiple times before a complete response is sent. 307 */ 308 static void handle_tcp_writing(int fd, short event, void* arg); 309 310 #ifdef HAVE_SSL 311 /* Create SSL object and associate fd */ 312 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 313 /* 314 * Handle TLS handshake. May be called multiple times if incomplete. 315 */ 316 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 317 318 /* 319 * Handle incoming queries on a TLS over TCP connection. The TLS 320 * connections are configured to be non-blocking and the handler may 321 * be called multiple times before a complete query is received. 322 */ 323 static void handle_tls_reading(int fd, short event, void* arg); 324 325 /* 326 * Handle outgoing responses on a TLS over TCP connection. The TLS 327 * connections are configured to be non-blocking and the handler may 328 * be called multiple times before a complete response is sent. 329 */ 330 static void handle_tls_writing(int fd, short event, void* arg); 331 #endif 332 333 /* 334 * Send all children the quit nonblocking, then close pipe. 335 */ 336 static void send_children_quit(struct nsd* nsd); 337 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 338 static void send_children_quit_and_wait(struct nsd* nsd); 339 340 /* set childrens flags to send NSD_STATS to them */ 341 #ifdef BIND8_STATS 342 static void set_children_stats(struct nsd* nsd); 343 #endif /* BIND8_STATS */ 344 345 /* 346 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 347 */ 348 static void configure_handler_event_types(short event_types); 349 350 static uint16_t *compressed_dname_offsets = 0; 351 static uint32_t compression_table_capacity = 0; 352 static uint32_t compression_table_size = 0; 353 static domain_type* compressed_dnames[MAXRRSPP]; 354 355 #ifdef USE_TCP_FASTOPEN 356 /* Checks to see if the kernel value must be manually changed in order for 357 TCP Fast Open to support server mode */ 358 static void report_tcp_fastopen_config() { 359 360 int tcp_fastopen_fp; 361 uint8_t tcp_fastopen_value; 362 363 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 364 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 365 } 366 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 367 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 368 close(tcp_fastopen_fp); 369 } 370 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 371 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 372 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n"); 373 log_msg(LOG_WARNING, "To enable TFO use the command:"); 374 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 375 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 376 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 377 close(tcp_fastopen_fp); 378 } 379 close(tcp_fastopen_fp); 380 } 381 #endif 382 383 /* 384 * Remove the specified pid from the list of child pids. Returns -1 if 385 * the pid is not in the list, child_num otherwise. The field is set to 0. 386 */ 387 static int 388 delete_child_pid(struct nsd *nsd, pid_t pid) 389 { 390 size_t i; 391 for (i = 0; i < nsd->child_count; ++i) { 392 if (nsd->children[i].pid == pid) { 393 nsd->children[i].pid = 0; 394 if(!nsd->children[i].need_to_exit) { 395 if(nsd->children[i].child_fd != -1) 396 close(nsd->children[i].child_fd); 397 nsd->children[i].child_fd = -1; 398 if(nsd->children[i].handler) 399 nsd->children[i].handler->fd = -1; 400 } 401 return i; 402 } 403 } 404 return -1; 405 } 406 407 /* 408 * Restart child servers if necessary. 409 */ 410 static int 411 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 412 int* xfrd_sock_p) 413 { 414 struct main_ipc_handler_data *ipc_data; 415 size_t i; 416 int sv[2]; 417 418 /* Fork the child processes... */ 419 for (i = 0; i < nsd->child_count; ++i) { 420 if (nsd->children[i].pid <= 0) { 421 if (nsd->children[i].child_fd != -1) 422 close(nsd->children[i].child_fd); 423 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 424 log_msg(LOG_ERR, "socketpair: %s", 425 strerror(errno)); 426 return -1; 427 } 428 nsd->children[i].child_fd = sv[0]; 429 nsd->children[i].parent_fd = sv[1]; 430 nsd->children[i].pid = fork(); 431 switch (nsd->children[i].pid) { 432 default: /* SERVER MAIN */ 433 close(nsd->children[i].parent_fd); 434 nsd->children[i].parent_fd = -1; 435 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 436 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 437 } 438 if(!nsd->children[i].handler) 439 { 440 ipc_data = (struct main_ipc_handler_data*) region_alloc( 441 region, sizeof(struct main_ipc_handler_data)); 442 ipc_data->nsd = nsd; 443 ipc_data->child = &nsd->children[i]; 444 ipc_data->child_num = i; 445 ipc_data->xfrd_sock = xfrd_sock_p; 446 ipc_data->packet = buffer_create(region, QIOBUFSZ); 447 ipc_data->forward_mode = 0; 448 ipc_data->got_bytes = 0; 449 ipc_data->total_bytes = 0; 450 ipc_data->acl_num = 0; 451 nsd->children[i].handler = (struct netio_handler*) region_alloc( 452 region, sizeof(struct netio_handler)); 453 nsd->children[i].handler->fd = nsd->children[i].child_fd; 454 nsd->children[i].handler->timeout = NULL; 455 nsd->children[i].handler->user_data = ipc_data; 456 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 457 nsd->children[i].handler->event_handler = parent_handle_child_command; 458 netio_add_handler(netio, nsd->children[i].handler); 459 } 460 /* clear any ongoing ipc */ 461 ipc_data = (struct main_ipc_handler_data*) 462 nsd->children[i].handler->user_data; 463 ipc_data->forward_mode = 0; 464 /* restart - update fd */ 465 nsd->children[i].handler->fd = nsd->children[i].child_fd; 466 break; 467 case 0: /* CHILD */ 468 /* the child need not be able to access the 469 * nsd.db file */ 470 namedb_close_udb(nsd->db); 471 #ifdef MEMCLEAN /* OS collects memory pages */ 472 region_destroy(region); 473 #endif 474 475 if (pledge("stdio rpath inet", NULL) == -1) { 476 log_msg(LOG_ERR, "pledge"); 477 exit(1); 478 } 479 480 nsd->pid = 0; 481 nsd->child_count = 0; 482 nsd->server_kind = nsd->children[i].kind; 483 nsd->this_child = &nsd->children[i]; 484 nsd->this_child->child_num = i; 485 /* remove signal flags inherited from parent 486 the parent will handle them. */ 487 nsd->signal_hint_reload_hup = 0; 488 nsd->signal_hint_reload = 0; 489 nsd->signal_hint_child = 0; 490 nsd->signal_hint_quit = 0; 491 nsd->signal_hint_shutdown = 0; 492 nsd->signal_hint_stats = 0; 493 nsd->signal_hint_statsusr = 0; 494 close(*xfrd_sock_p); 495 close(nsd->this_child->child_fd); 496 nsd->this_child->child_fd = -1; 497 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 498 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 499 } 500 server_child(nsd); 501 /* NOTREACH */ 502 exit(0); 503 case -1: 504 log_msg(LOG_ERR, "fork failed: %s", 505 strerror(errno)); 506 return -1; 507 } 508 } 509 } 510 return 0; 511 } 512 513 #ifdef BIND8_STATS 514 static void set_bind8_alarm(struct nsd* nsd) 515 { 516 /* resync so that the next alarm is on the next whole minute */ 517 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 518 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 519 } 520 #endif 521 522 /* set zone stat ids for zones initially read in */ 523 static void 524 zonestatid_tree_set(struct nsd* nsd) 525 { 526 struct radnode* n; 527 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 528 zone_type* zone = (zone_type*)n->elem; 529 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 530 } 531 } 532 533 #ifdef USE_ZONE_STATS 534 void 535 server_zonestat_alloc(struct nsd* nsd) 536 { 537 size_t num = (nsd->options->zonestatnames->count==0?1: 538 nsd->options->zonestatnames->count); 539 size_t sz = sizeof(struct nsdst)*num; 540 char tmpfile[256]; 541 uint8_t z = 0; 542 543 /* file names */ 544 nsd->zonestatfname[0] = 0; 545 nsd->zonestatfname[1] = 0; 546 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 547 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 548 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 549 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 550 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 551 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 552 553 /* file descriptors */ 554 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 555 if(nsd->zonestatfd[0] == -1) { 556 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 557 strerror(errno)); 558 exit(1); 559 } 560 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 561 if(nsd->zonestatfd[0] == -1) { 562 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 563 strerror(errno)); 564 close(nsd->zonestatfd[0]); 565 unlink(nsd->zonestatfname[0]); 566 exit(1); 567 } 568 569 #ifdef HAVE_MMAP 570 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 571 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 572 strerror(errno)); 573 exit(1); 574 } 575 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 576 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 577 nsd->zonestatfname[0], strerror(errno)); 578 exit(1); 579 } 580 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 581 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 582 strerror(errno)); 583 exit(1); 584 } 585 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 586 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 587 nsd->zonestatfname[1], strerror(errno)); 588 exit(1); 589 } 590 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 591 MAP_SHARED, nsd->zonestatfd[0], 0); 592 if(nsd->zonestat[0] == MAP_FAILED) { 593 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 594 unlink(nsd->zonestatfname[0]); 595 unlink(nsd->zonestatfname[1]); 596 exit(1); 597 } 598 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 599 MAP_SHARED, nsd->zonestatfd[1], 0); 600 if(nsd->zonestat[1] == MAP_FAILED) { 601 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 602 unlink(nsd->zonestatfname[0]); 603 unlink(nsd->zonestatfname[1]); 604 exit(1); 605 } 606 memset(nsd->zonestat[0], 0, sz); 607 memset(nsd->zonestat[1], 0, sz); 608 nsd->zonestatsize[0] = num; 609 nsd->zonestatsize[1] = num; 610 nsd->zonestatdesired = num; 611 nsd->zonestatsizenow = num; 612 nsd->zonestatnow = nsd->zonestat[0]; 613 #endif /* HAVE_MMAP */ 614 } 615 616 void 617 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 618 { 619 #ifdef HAVE_MMAP 620 #ifdef MREMAP_MAYMOVE 621 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 622 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 623 MREMAP_MAYMOVE); 624 if(nsd->zonestat[idx] == MAP_FAILED) { 625 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 626 exit(1); 627 } 628 #else /* !HAVE MREMAP */ 629 if(msync(nsd->zonestat[idx], 630 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 631 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 632 if(munmap(nsd->zonestat[idx], 633 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 634 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 635 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 636 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 637 if(nsd->zonestat[idx] == MAP_FAILED) { 638 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 639 exit(1); 640 } 641 #endif /* MREMAP */ 642 #endif /* HAVE_MMAP */ 643 } 644 645 /* realloc the zonestat array for the one that is not currently in use, 646 * to match the desired new size of the array (if applicable) */ 647 void 648 server_zonestat_realloc(struct nsd* nsd) 649 { 650 #ifdef HAVE_MMAP 651 uint8_t z = 0; 652 size_t sz; 653 int idx = 0; /* index of the zonestat array that is not in use */ 654 if(nsd->zonestatnow == nsd->zonestat[0]) 655 idx = 1; 656 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 657 return; 658 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 659 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 660 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 661 strerror(errno)); 662 exit(1); 663 } 664 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 665 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 666 nsd->zonestatfname[idx], strerror(errno)); 667 exit(1); 668 } 669 zonestat_remap(nsd, idx, sz); 670 /* zero the newly allocated region */ 671 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 672 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 673 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 674 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 675 } 676 nsd->zonestatsize[idx] = nsd->zonestatdesired; 677 #endif /* HAVE_MMAP */ 678 } 679 680 /* switchover to use the other array for the new children, that 681 * briefly coexist with the old children. And we want to avoid them 682 * both writing to the same statistics arrays. */ 683 void 684 server_zonestat_switch(struct nsd* nsd) 685 { 686 if(nsd->zonestatnow == nsd->zonestat[0]) { 687 nsd->zonestatnow = nsd->zonestat[1]; 688 nsd->zonestatsizenow = nsd->zonestatsize[1]; 689 } else { 690 nsd->zonestatnow = nsd->zonestat[0]; 691 nsd->zonestatsizenow = nsd->zonestatsize[0]; 692 } 693 } 694 #endif /* USE_ZONE_STATS */ 695 696 static void 697 cleanup_dname_compression_tables(void *ptr) 698 { 699 free(ptr); 700 compressed_dname_offsets = NULL; 701 compression_table_capacity = 0; 702 } 703 704 static void 705 initialize_dname_compression_tables(struct nsd *nsd) 706 { 707 size_t needed = domain_table_count(nsd->db->domains) + 1; 708 needed += EXTRA_DOMAIN_NUMBERS; 709 if(compression_table_capacity < needed) { 710 if(compressed_dname_offsets) { 711 region_remove_cleanup(nsd->db->region, 712 cleanup_dname_compression_tables, 713 compressed_dname_offsets); 714 free(compressed_dname_offsets); 715 } 716 compressed_dname_offsets = (uint16_t *) xmallocarray( 717 needed, sizeof(uint16_t)); 718 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 719 compressed_dname_offsets); 720 compression_table_capacity = needed; 721 compression_table_size=domain_table_count(nsd->db->domains)+1; 722 } 723 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 724 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 725 } 726 727 static int 728 set_cloexec(struct nsd_socket *sock) 729 { 730 assert(sock != NULL); 731 732 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) { 733 const char *socktype = 734 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp"; 735 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s", 736 socktype, strerror(errno)); 737 return -1; 738 } 739 740 return 1; 741 } 742 743 static int 744 set_reuseport(struct nsd_socket *sock) 745 { 746 #ifdef SO_REUSEPORT 747 int on = 1; 748 #ifdef SO_REUSEPORT_LB 749 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 750 * SO_REUSEPORT on Linux. This is what the users want with the config 751 * option in nsd.conf; if we actually need local address and port reuse 752 * they'll also need to have SO_REUSEPORT set for them, assume it was 753 * _LB they want. 754 */ 755 int opt = SO_REUSEPORT_LB; 756 static const char optname[] = "SO_REUSEPORT_LB"; 757 #else /* !SO_REUSEPORT_LB */ 758 int opt = SO_REUSEPORT; 759 static const char optname[] = "SO_REUSEPORT"; 760 #endif /* SO_REUSEPORT_LB */ 761 762 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 763 return 1; 764 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 765 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 766 optname, strerror(errno)); 767 } 768 return -1; 769 #else 770 (void)sock; 771 #endif /* SO_REUSEPORT */ 772 773 return 0; 774 } 775 776 static int 777 set_reuseaddr(struct nsd_socket *sock) 778 { 779 #ifdef SO_REUSEADDR 780 int on = 1; 781 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 782 return 1; 783 } 784 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 785 strerror(errno)); 786 return -1; 787 #endif /* SO_REUSEADDR */ 788 return 0; 789 } 790 791 static int 792 set_rcvbuf(struct nsd_socket *sock, int rcv) 793 { 794 #ifdef SO_RCVBUF 795 #ifdef SO_RCVBUFFORCE 796 if(0 == setsockopt( 797 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 798 { 799 return 1; 800 } 801 if(errno == EPERM || errno == ENOBUFS) { 802 return 0; 803 } 804 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 805 strerror(errno)); 806 return -1; 807 #else /* !SO_RCVBUFFORCE */ 808 if (0 == setsockopt( 809 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 810 { 811 return 1; 812 } 813 if(errno == ENOSYS || errno == ENOBUFS) { 814 return 0; 815 } 816 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 817 strerror(errno)); 818 return -1; 819 #endif /* SO_RCVBUFFORCE */ 820 #endif /* SO_RCVBUF */ 821 822 return 0; 823 } 824 825 static int 826 set_sndbuf(struct nsd_socket *sock, int snd) 827 { 828 #ifdef SO_SNDBUF 829 #ifdef SO_SNDBUFFORCE 830 if(0 == setsockopt( 831 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 832 { 833 return 1; 834 } 835 if(errno == EPERM || errno == ENOBUFS) { 836 return 0; 837 } 838 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 839 strerror(errno)); 840 return -1; 841 #else /* !SO_SNDBUFFORCE */ 842 if(0 == setsockopt( 843 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 844 { 845 return 1; 846 } 847 if(errno == ENOSYS || errno == ENOBUFS) { 848 return 0; 849 } 850 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 851 strerror(errno)); 852 return -1; 853 #endif /* SO_SNDBUFFORCE */ 854 #endif /* SO_SNDBUF */ 855 856 return 0; 857 } 858 859 static int 860 set_nonblock(struct nsd_socket *sock) 861 { 862 const char *socktype = 863 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 864 865 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 866 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 867 socktype, strerror(errno)); 868 return -1; 869 } 870 871 return 1; 872 } 873 874 #ifdef INET6 875 static int 876 set_ipv6_v6only(struct nsd_socket *sock) 877 { 878 #ifdef IPV6_V6ONLY 879 int on = 1; 880 const char *socktype = 881 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 882 883 if(0 == setsockopt( 884 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 885 { 886 return 1; 887 } 888 889 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 890 socktype, strerror(errno)); 891 return -1; 892 #else 893 (void)sock; 894 #endif /* IPV6_V6ONLY */ 895 896 return 0; 897 } 898 #endif /* INET6 */ 899 900 #ifdef INET6 901 static int 902 set_ipv6_use_min_mtu(struct nsd_socket *sock) 903 { 904 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) 905 #if defined(IPV6_USE_MIN_MTU) 906 /* There is no fragmentation of IPv6 datagrams during forwarding in the 907 * network. Therefore we do not send UDP datagrams larger than the 908 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 909 * larger if the network stack supports IPV6_USE_MIN_MTU. 910 */ 911 int opt = IPV6_USE_MIN_MTU; 912 int optval = 1; 913 static const char optname[] = "IPV6_USE_MIN_MTU"; 914 #elif defined(IPV6_MTU) 915 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 916 * to the MIN MTU to get the same. 917 */ 918 int opt = IPV6_MTU; 919 int optval = IPV6_MIN_MTU; 920 static const char optname[] = "IPV6_MTU"; 921 #endif 922 if(0 == setsockopt( 923 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 924 { 925 return 1; 926 } 927 928 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 929 optname, strerror(errno)); 930 return -1; 931 #else 932 (void)sock; 933 #endif /* INET6 */ 934 935 return 0; 936 } 937 #endif /* INET6 */ 938 939 static int 940 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 941 { 942 int ret = 0; 943 944 #if defined(IP_MTU_DISCOVER) 945 int opt = IP_MTU_DISCOVER; 946 int optval; 947 # if defined(IP_PMTUDISC_OMIT) 948 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 949 * information and send packets with DF=0. Fragmentation is allowed if 950 * and only if the packet size exceeds the outgoing interface MTU or 951 * the packet encounters smaller MTU link in network. This mitigates 952 * DNS fragmentation attacks by preventing forged PMTU information. 953 * FreeBSD already has same semantics without setting the option. 954 */ 955 optval = IP_PMTUDISC_OMIT; 956 if(0 == setsockopt( 957 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 958 { 959 return 1; 960 } 961 962 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 963 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 964 # endif /* IP_PMTUDISC_OMIT */ 965 # if defined(IP_PMTUDISC_DONT) 966 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 967 optval = IP_PMTUDISC_DONT; 968 if(0 == setsockopt( 969 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 970 { 971 return 1; 972 } 973 974 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 975 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 976 # endif 977 ret = -1; 978 #elif defined(IP_DONTFRAG) 979 int off = 0; 980 if (0 == setsockopt( 981 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 982 { 983 return 1; 984 } 985 986 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 987 strerror(errno)); 988 ret = -1; 989 #else 990 (void)sock; 991 #endif 992 993 return ret; 994 } 995 996 static int 997 set_ip_freebind(struct nsd_socket *sock) 998 { 999 #ifdef IP_FREEBIND 1000 int on = 1; 1001 const char *socktype = 1002 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1003 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 1004 { 1005 return 1; 1006 } 1007 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 1008 socktype, strerror(errno)); 1009 return -1; 1010 #else 1011 (void)sock; 1012 #endif /* IP_FREEBIND */ 1013 1014 return 0; 1015 } 1016 1017 static int 1018 set_ip_transparent(struct nsd_socket *sock) 1019 { 1020 /* 1021 The scandalous preprocessor blob here calls for some explanation :) 1022 POSIX does not specify an option to bind non-local IPs, so 1023 platforms developed several implementation-specific options, 1024 all set in the same way, but with different names. 1025 For additional complexity, some platform manage this setting 1026 differently for different address families (IPv4 vs IPv6). 1027 This scandalous preprocessor blob below abstracts such variability 1028 in the way which leaves the C code as lean and clear as possible. 1029 */ 1030 1031 #if defined(IP_TRANSPARENT) 1032 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT 1033 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1034 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT" 1035 // as of 2020-01, Linux does not support this on IPv6 programmatically 1036 #elif defined(SO_BINDANY) 1037 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY 1038 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET 1039 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY" 1040 #elif defined(IP_BINDANY) 1041 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY 1042 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY 1043 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1044 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6 1045 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY" 1046 #endif 1047 1048 #ifndef NSD_SOCKET_OPTION_TRANSPARENT 1049 (void)sock; 1050 #else 1051 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6 1052 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT 1053 # endif 1054 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 1055 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL 1056 # endif 1057 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6 1058 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME 1059 # endif 1060 1061 int on = 1; 1062 const char *socktype = 1063 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1064 const int is_ip6 = (sock->addr.ai_family == AF_INET6); 1065 1066 if(0 == setsockopt( 1067 sock->s, 1068 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL, 1069 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT, 1070 &on, sizeof(on))) 1071 { 1072 return 1; 1073 } 1074 1075 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 1076 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno)); 1077 return -1; 1078 #endif 1079 1080 return 0; 1081 } 1082 1083 static int 1084 set_tcp_maxseg(struct nsd_socket *sock, int mss) 1085 { 1086 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 1087 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 1088 return 1; 1089 } 1090 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 1091 strerror(errno)); 1092 return -1; 1093 #else 1094 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 1095 #endif 1096 return 0; 1097 } 1098 1099 #ifdef USE_TCP_FASTOPEN 1100 static int 1101 set_tcp_fastopen(struct nsd_socket *sock) 1102 { 1103 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1104 * a defense against IP spoofing attacks as suggested in RFC7413. 1105 */ 1106 int qlen; 1107 1108 #ifdef __APPLE__ 1109 /* macOS X implementation only supports qlen of 1 via this call. The 1110 * actual value is configured by the net.inet.tcp.fastopen_backlog 1111 * kernel parameter. 1112 */ 1113 qlen = 1; 1114 #else 1115 /* 5 is recommended on Linux. */ 1116 qlen = 5; 1117 #endif 1118 if (0 == setsockopt( 1119 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1120 { 1121 return 1; 1122 } 1123 1124 if (errno == EPERM) { 1125 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1126 "; this could likely be because sysctl " 1127 "net.inet.tcp.fastopen.enabled, " 1128 "net.inet.tcp.fastopen.server_enable, or " 1129 "net.ipv4.tcp_fastopen is disabled", 1130 strerror(errno)); 1131 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1132 * disabled, except when verbosity enabled for debugging 1133 */ 1134 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1135 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1136 strerror(errno)); 1137 } 1138 1139 return (errno == ENOPROTOOPT ? 0 : -1); 1140 } 1141 #endif /* USE_TCP_FASTOPEN */ 1142 1143 static int 1144 set_bindtodevice(struct nsd_socket *sock) 1145 { 1146 #if defined(SO_BINDTODEVICE) 1147 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE, 1148 sock->device, strlen(sock->device)) == -1) 1149 { 1150 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1151 "SO_BINDTODEVICE", sock->device, strerror(errno)); 1152 return -1; 1153 } 1154 1155 return 1; 1156 #else 1157 (void)sock; 1158 return 0; 1159 #endif 1160 } 1161 1162 static int 1163 set_setfib(struct nsd_socket *sock) 1164 { 1165 #if defined(SO_SETFIB) 1166 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB, 1167 (const void *)&sock->fib, sizeof(sock->fib)) == -1) 1168 { 1169 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s", 1170 "SO_SETFIB", sock->fib, strerror(errno)); 1171 return -1; 1172 } 1173 1174 return 1; 1175 #else 1176 (void)sock; 1177 return 0; 1178 #endif 1179 } 1180 1181 static int 1182 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1183 { 1184 int rcv = 1*1024*1024, snd = 1*1024*1024; 1185 1186 if(-1 == (sock->s = socket( 1187 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1188 { 1189 #ifdef INET6 1190 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1191 (sock->addr.ai_family == AF_INET6) && 1192 (errno == EAFNOSUPPORT)) 1193 { 1194 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1195 "not supported"); 1196 return 0; 1197 } 1198 #endif 1199 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1200 return -1; 1201 } 1202 1203 set_cloexec(sock); 1204 1205 if(nsd->reuseport && reuseport_works && *reuseport_works) 1206 *reuseport_works = (set_reuseport(sock) == 1); 1207 1208 if(nsd->options->receive_buffer_size > 0) 1209 rcv = nsd->options->receive_buffer_size; 1210 if(set_rcvbuf(sock, rcv) == -1) 1211 return -1; 1212 1213 if(nsd->options->send_buffer_size > 0) 1214 snd = nsd->options->send_buffer_size; 1215 if(set_sndbuf(sock, snd) == -1) 1216 return -1; 1217 #ifdef INET6 1218 if(sock->addr.ai_family == AF_INET6) { 1219 if(set_ipv6_v6only(sock) == -1 || 1220 set_ipv6_use_min_mtu(sock) == -1) 1221 return -1; 1222 } else 1223 #endif /* INET6 */ 1224 if(sock->addr.ai_family == AF_INET) { 1225 if(set_ipv4_no_pmtu_disc(sock) == -1) 1226 return -1; 1227 } 1228 1229 /* Set socket to non-blocking. Otherwise, on operating systems 1230 * with thundering herd problems, the UDP recv could block 1231 * after select returns readable. 1232 */ 1233 set_nonblock(sock); 1234 1235 if(nsd->options->ip_freebind) 1236 (void)set_ip_freebind(sock); 1237 if(nsd->options->ip_transparent) 1238 (void)set_ip_transparent(sock); 1239 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1240 return -1; 1241 if(sock->fib != -1 && set_setfib(sock) == -1) 1242 return -1; 1243 1244 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1245 char buf[256]; 1246 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1247 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1248 buf, strerror(errno)); 1249 return -1; 1250 } 1251 1252 return 1; 1253 } 1254 1255 static int 1256 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1257 { 1258 #ifdef USE_TCP_FASTOPEN 1259 report_tcp_fastopen_config(); 1260 #endif 1261 1262 (void)reuseport_works; 1263 1264 if(-1 == (sock->s = socket( 1265 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1266 { 1267 #ifdef INET6 1268 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1269 (sock->addr.ai_family == AF_INET6) && 1270 (errno == EAFNOSUPPORT)) 1271 { 1272 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1273 "not supported"); 1274 return 0; 1275 } 1276 #endif /* INET6 */ 1277 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1278 return -1; 1279 } 1280 1281 set_cloexec(sock); 1282 1283 if(nsd->reuseport && reuseport_works && *reuseport_works) 1284 *reuseport_works = (set_reuseport(sock) == 1); 1285 1286 (void)set_reuseaddr(sock); 1287 1288 #ifdef INET6 1289 if(sock->addr.ai_family == AF_INET6) { 1290 if (set_ipv6_v6only(sock) == -1 || 1291 set_ipv6_use_min_mtu(sock) == -1) 1292 return -1; 1293 } 1294 #endif 1295 1296 if(nsd->tcp_mss > 0) 1297 set_tcp_maxseg(sock, nsd->tcp_mss); 1298 /* (StevensUNP p463), if TCP listening socket is blocking, then 1299 it may block in accept, even if select() says readable. */ 1300 (void)set_nonblock(sock); 1301 if(nsd->options->ip_freebind) 1302 (void)set_ip_freebind(sock); 1303 if(nsd->options->ip_transparent) 1304 (void)set_ip_transparent(sock); 1305 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1306 return -1; 1307 if(sock->fib != -1 && set_setfib(sock) == -1) 1308 return -1; 1309 1310 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1311 char buf[256]; 1312 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1313 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1314 buf, strerror(errno)); 1315 return -1; 1316 } 1317 1318 #ifdef USE_TCP_FASTOPEN 1319 (void)set_tcp_fastopen(sock); 1320 #endif 1321 1322 if(listen(sock->s, TCP_BACKLOG) == -1) { 1323 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1324 return -1; 1325 } 1326 1327 return 1; 1328 } 1329 1330 /* 1331 * Initialize the server, reuseport, create and bind the sockets. 1332 */ 1333 int 1334 server_init(struct nsd *nsd) 1335 { 1336 size_t i; 1337 int reuseport = 1; /* Determine if REUSEPORT works. */ 1338 1339 /* open server interface ports */ 1340 for(i = 0; i < nsd->ifs; i++) { 1341 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1342 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1343 { 1344 return -1; 1345 } 1346 } 1347 1348 if(nsd->reuseport && reuseport) { 1349 size_t ifs = nsd->ifs * nsd->reuseport; 1350 1351 /* increase the size of the interface arrays, there are going 1352 * to be separate interface file descriptors for every server 1353 * instance */ 1354 region_remove_cleanup(nsd->region, free, nsd->udp); 1355 region_remove_cleanup(nsd->region, free, nsd->tcp); 1356 1357 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1358 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1359 region_add_cleanup(nsd->region, free, nsd->udp); 1360 region_add_cleanup(nsd->region, free, nsd->tcp); 1361 if(ifs > nsd->ifs) { 1362 memset(&nsd->udp[nsd->ifs], 0, 1363 (ifs-nsd->ifs)*sizeof(*nsd->udp)); 1364 memset(&nsd->tcp[nsd->ifs], 0, 1365 (ifs-nsd->ifs)*sizeof(*nsd->tcp)); 1366 } 1367 1368 for(i = nsd->ifs; i < ifs; i++) { 1369 nsd->udp[i] = nsd->udp[i%nsd->ifs]; 1370 nsd->udp[i].s = -1; 1371 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1372 return -1; 1373 } 1374 /* Turn off REUSEPORT for TCP by copying the socket 1375 * file descriptor. 1376 * This means we should not close TCP used by 1377 * other servers in reuseport enabled mode, in 1378 * server_child(). 1379 */ 1380 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1381 } 1382 1383 nsd->ifs = ifs; 1384 } else { 1385 nsd->reuseport = 0; 1386 } 1387 1388 /* open server interface ports for verifiers */ 1389 for(i = 0; i < nsd->verify_ifs; i++) { 1390 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 || 1391 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1) 1392 { 1393 return -1; 1394 } 1395 } 1396 1397 return 0; 1398 } 1399 1400 /* 1401 * Prepare the server for take off. 1402 * 1403 */ 1404 int 1405 server_prepare(struct nsd *nsd) 1406 { 1407 #ifdef RATELIMIT 1408 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1409 #ifdef HAVE_GETRANDOM 1410 uint32_t v; 1411 if(getrandom(&v, sizeof(v), 0) == -1) { 1412 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno)); 1413 exit(1); 1414 } 1415 hash_set_raninit(v); 1416 #elif defined(HAVE_ARC4RANDOM) 1417 hash_set_raninit(arc4random()); 1418 #else 1419 uint32_t v = getpid() ^ time(NULL); 1420 srandom((unsigned long)v); 1421 # ifdef HAVE_SSL 1422 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1423 hash_set_raninit(v); 1424 else 1425 # endif 1426 hash_set_raninit(random()); 1427 #endif 1428 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1429 nsd->options->rrl_ratelimit, 1430 nsd->options->rrl_whitelist_ratelimit, 1431 nsd->options->rrl_slip, 1432 nsd->options->rrl_ipv4_prefix_length, 1433 nsd->options->rrl_ipv6_prefix_length); 1434 #endif /* RATELIMIT */ 1435 1436 /* Open the database... */ 1437 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1438 log_msg(LOG_ERR, "unable to open the database %s: %s", 1439 nsd->dbfile, strerror(errno)); 1440 unlink(nsd->task[0]->fname); 1441 unlink(nsd->task[1]->fname); 1442 #ifdef USE_ZONE_STATS 1443 unlink(nsd->zonestatfname[0]); 1444 unlink(nsd->zonestatfname[1]); 1445 #endif 1446 xfrd_del_tempdir(nsd); 1447 return -1; 1448 } 1449 /* check if zone files have been modified */ 1450 /* NULL for taskudb because we send soainfo in a moment, batched up, 1451 * for all zones */ 1452 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1453 nsd->options->database[0] == 0)) 1454 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1455 zonestatid_tree_set(nsd); 1456 1457 compression_table_capacity = 0; 1458 initialize_dname_compression_tables(nsd); 1459 1460 #ifdef BIND8_STATS 1461 /* Initialize times... */ 1462 time(&nsd->st.boot); 1463 set_bind8_alarm(nsd); 1464 #endif /* BIND8_STATS */ 1465 1466 return 0; 1467 } 1468 1469 /* 1470 * Fork the required number of servers. 1471 */ 1472 static int 1473 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1474 int* xfrd_sock_p) 1475 { 1476 size_t i; 1477 1478 /* Start all child servers initially. */ 1479 for (i = 0; i < nsd->child_count; ++i) { 1480 nsd->children[i].pid = 0; 1481 } 1482 1483 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1484 } 1485 1486 static void 1487 server_close_socket(struct nsd_socket *sock) 1488 { 1489 if(sock->s != -1) { 1490 close(sock->s); 1491 sock->s = -1; 1492 } 1493 } 1494 1495 void 1496 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1497 { 1498 size_t i; 1499 1500 /* Close all the sockets... */ 1501 for (i = 0; i < n; ++i) { 1502 server_close_socket(&sockets[i]); 1503 } 1504 } 1505 1506 /* 1507 * Close the sockets, shutdown the server and exit. 1508 * Does not return. 1509 */ 1510 void 1511 server_shutdown(struct nsd *nsd) 1512 { 1513 size_t i; 1514 1515 server_close_all_sockets(nsd->udp, nsd->ifs); 1516 server_close_all_sockets(nsd->tcp, nsd->ifs); 1517 /* CHILD: close command channel to parent */ 1518 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1519 { 1520 close(nsd->this_child->parent_fd); 1521 nsd->this_child->parent_fd = -1; 1522 } 1523 /* SERVER: close command channels to children */ 1524 if(!nsd->this_child) 1525 { 1526 for(i=0; i < nsd->child_count; ++i) 1527 if(nsd->children[i].child_fd != -1) 1528 { 1529 close(nsd->children[i].child_fd); 1530 nsd->children[i].child_fd = -1; 1531 } 1532 } 1533 1534 tsig_finalize(); 1535 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1536 #ifdef HAVE_SSL 1537 if (nsd->tls_ctx) 1538 SSL_CTX_free(nsd->tls_ctx); 1539 #endif 1540 1541 #ifdef MEMCLEAN /* OS collects memory pages */ 1542 #ifdef RATELIMIT 1543 rrl_mmap_deinit_keep_mmap(); 1544 #endif 1545 #ifdef USE_DNSTAP 1546 dt_collector_destroy(nsd->dt_collector, nsd); 1547 #endif 1548 udb_base_free_keep_mmap(nsd->task[0]); 1549 udb_base_free_keep_mmap(nsd->task[1]); 1550 namedb_free_ixfr(nsd->db); 1551 namedb_close_udb(nsd->db); /* keeps mmap */ 1552 namedb_close(nsd->db); 1553 nsd_options_destroy(nsd->options); 1554 region_destroy(nsd->region); 1555 #endif 1556 log_finalize(); 1557 exit(0); 1558 } 1559 1560 void 1561 server_prepare_xfrd(struct nsd* nsd) 1562 { 1563 char tmpfile[256]; 1564 /* create task mmaps */ 1565 nsd->mytask = 0; 1566 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1567 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1568 nsd->task[0] = task_file_create(tmpfile); 1569 if(!nsd->task[0]) { 1570 #ifdef USE_ZONE_STATS 1571 unlink(nsd->zonestatfname[0]); 1572 unlink(nsd->zonestatfname[1]); 1573 #endif 1574 xfrd_del_tempdir(nsd); 1575 exit(1); 1576 } 1577 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1578 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1579 nsd->task[1] = task_file_create(tmpfile); 1580 if(!nsd->task[1]) { 1581 unlink(nsd->task[0]->fname); 1582 #ifdef USE_ZONE_STATS 1583 unlink(nsd->zonestatfname[0]); 1584 unlink(nsd->zonestatfname[1]); 1585 #endif 1586 xfrd_del_tempdir(nsd); 1587 exit(1); 1588 } 1589 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1590 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1591 /* create xfrd listener structure */ 1592 nsd->xfrd_listener = region_alloc(nsd->region, 1593 sizeof(netio_handler_type)); 1594 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1595 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1596 nsd->xfrd_listener->fd = -1; 1597 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1598 nsd; 1599 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1600 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1601 } 1602 1603 1604 void 1605 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1606 { 1607 pid_t pid; 1608 int sockets[2] = {0,0}; 1609 struct ipc_handler_conn_data *data; 1610 1611 if(nsd->xfrd_listener->fd != -1) 1612 close(nsd->xfrd_listener->fd); 1613 if(del_db) { 1614 /* recreate taskdb that xfrd was using, it may be corrupt */ 1615 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1616 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1617 nsd->task[1-nsd->mytask]->fname = NULL; 1618 /* free alloc already, so udb does not shrink itself */ 1619 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1620 nsd->task[1-nsd->mytask]->alloc = NULL; 1621 udb_base_free(nsd->task[1-nsd->mytask]); 1622 /* create new file, overwrite the old one */ 1623 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1624 free(tmpfile); 1625 } 1626 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1627 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1628 return; 1629 } 1630 pid = fork(); 1631 switch (pid) { 1632 case -1: 1633 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1634 break; 1635 default: 1636 /* PARENT: close first socket, use second one */ 1637 close(sockets[0]); 1638 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1639 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1640 } 1641 if(del_db) xfrd_free_namedb(nsd); 1642 /* use other task than I am using, since if xfrd died and is 1643 * restarted, the reload is using nsd->mytask */ 1644 nsd->mytask = 1 - nsd->mytask; 1645 1646 #ifdef HAVE_SETPROCTITLE 1647 setproctitle("xfrd"); 1648 #endif 1649 #ifdef HAVE_CPUSET_T 1650 if(nsd->use_cpu_affinity) { 1651 set_cpu_affinity(nsd->xfrd_cpuset); 1652 } 1653 #endif 1654 1655 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1656 /* ENOTREACH */ 1657 break; 1658 case 0: 1659 /* CHILD: close second socket, use first one */ 1660 close(sockets[1]); 1661 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1662 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1663 } 1664 nsd->xfrd_listener->fd = sockets[0]; 1665 break; 1666 } 1667 /* server-parent only */ 1668 nsd->xfrd_listener->timeout = NULL; 1669 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1670 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1671 /* clear ongoing ipc reads */ 1672 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1673 data->conn->is_reading = 0; 1674 } 1675 1676 /** add all soainfo to taskdb */ 1677 static void 1678 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1679 { 1680 struct radnode* n; 1681 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1682 /* add all SOA INFO to mytask */ 1683 udb_ptr_init(&task_last, taskudb); 1684 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1685 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1686 } 1687 udb_ptr_unlink(&task_last, taskudb); 1688 } 1689 1690 void 1691 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1692 { 1693 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1694 * parent fills one taskdb with soas, xfrd fills other with expires. 1695 * then they exchange and process. 1696 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1697 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1698 * expire notifications can be sent back via a normal reload later 1699 * (xfrd will wait for current running reload to finish if any). 1700 */ 1701 sig_atomic_t cmd = 0; 1702 pid_t mypid; 1703 int xfrd_sock = nsd->xfrd_listener->fd; 1704 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1705 udb_ptr t; 1706 if(!shortsoa) { 1707 if(nsd->signal_hint_shutdown) { 1708 shutdown: 1709 log_msg(LOG_WARNING, "signal received, shutting down..."); 1710 server_close_all_sockets(nsd->udp, nsd->ifs); 1711 server_close_all_sockets(nsd->tcp, nsd->ifs); 1712 daemon_remote_close(nsd->rc); 1713 /* Unlink it if possible... */ 1714 unlinkpid(nsd->pidfile); 1715 unlink(nsd->task[0]->fname); 1716 unlink(nsd->task[1]->fname); 1717 #ifdef USE_ZONE_STATS 1718 unlink(nsd->zonestatfname[0]); 1719 unlink(nsd->zonestatfname[1]); 1720 #endif 1721 /* write the nsd.db to disk, wait for it to complete */ 1722 udb_base_sync(nsd->db->udb, 1); 1723 udb_base_close(nsd->db->udb); 1724 server_shutdown(nsd); 1725 /* ENOTREACH */ 1726 exit(0); 1727 } 1728 } 1729 if(shortsoa) { 1730 /* put SOA in xfrd task because mytask may be in use */ 1731 taskudb = nsd->task[1-nsd->mytask]; 1732 } 1733 1734 add_all_soa_to_task(nsd, taskudb); 1735 if(!shortsoa) { 1736 /* wait for xfrd to signal task is ready, RELOAD signal */ 1737 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1738 cmd != NSD_RELOAD) { 1739 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1740 exit(1); 1741 } 1742 if(nsd->signal_hint_shutdown) { 1743 goto shutdown; 1744 } 1745 } 1746 /* give xfrd our task, signal it with RELOAD_DONE */ 1747 task_process_sync(taskudb); 1748 cmd = NSD_RELOAD_DONE; 1749 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1750 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1751 (int)nsd->pid, strerror(errno)); 1752 } 1753 mypid = getpid(); 1754 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1755 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1756 strerror(errno)); 1757 } 1758 1759 if(!shortsoa) { 1760 /* process the xfrd task works (expiry data) */ 1761 nsd->mytask = 1 - nsd->mytask; 1762 taskudb = nsd->task[nsd->mytask]; 1763 task_remap(taskudb); 1764 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1765 while(!udb_ptr_is_null(&t)) { 1766 task_process_expire(nsd->db, TASKLIST(&t)); 1767 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1768 } 1769 udb_ptr_unlink(&t, taskudb); 1770 task_clear(taskudb); 1771 1772 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1773 cmd = NSD_RELOAD_DONE; 1774 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1775 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1776 (int)nsd->pid, strerror(errno)); 1777 } 1778 } 1779 } 1780 1781 #ifdef HAVE_SSL 1782 static void 1783 log_crypto_from_err(const char* str, unsigned long err) 1784 { 1785 /* error:[error code]:[library name]:[function name]:[reason string] */ 1786 char buf[128]; 1787 unsigned long e; 1788 ERR_error_string_n(err, buf, sizeof(buf)); 1789 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1790 while( (e=ERR_get_error()) ) { 1791 ERR_error_string_n(e, buf, sizeof(buf)); 1792 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1793 } 1794 } 1795 1796 void 1797 log_crypto_err(const char* str) 1798 { 1799 log_crypto_from_err(str, ERR_get_error()); 1800 } 1801 1802 /** true if the ssl handshake error has to be squelched from the logs */ 1803 static int 1804 squelch_err_ssl_handshake(unsigned long err) 1805 { 1806 if(verbosity >= 3) 1807 return 0; /* only squelch on low verbosity */ 1808 /* this is very specific, we could filter on ERR_GET_REASON() 1809 * (the third element in ERR_PACK) */ 1810 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1811 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1812 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1813 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1814 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1815 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1816 #endif 1817 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1818 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1819 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1820 # ifdef SSL_R_VERSION_TOO_LOW 1821 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1822 # endif 1823 #endif 1824 ) 1825 return 1; 1826 return 0; 1827 } 1828 1829 void 1830 perform_openssl_init(void) 1831 { 1832 /* init SSL library */ 1833 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1834 ERR_load_crypto_strings(); 1835 #endif 1836 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS) 1837 ERR_load_SSL_strings(); 1838 #endif 1839 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1840 OpenSSL_add_all_algorithms(); 1841 #else 1842 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1843 | OPENSSL_INIT_ADD_ALL_DIGESTS 1844 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1845 #endif 1846 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1847 (void)SSL_library_init(); 1848 #else 1849 OPENSSL_init_ssl(0, NULL); 1850 #endif 1851 1852 if(!RAND_status()) { 1853 /* try to seed it */ 1854 unsigned char buf[256]; 1855 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1856 size_t i; 1857 v = seed; 1858 for(i=0; i<256/sizeof(v); i++) { 1859 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1860 v = v*seed + (unsigned int)i; 1861 } 1862 RAND_seed(buf, 256); 1863 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1864 } 1865 } 1866 1867 static int 1868 get_ocsp(char *filename, unsigned char **ocsp) 1869 { 1870 BIO *bio; 1871 OCSP_RESPONSE *response; 1872 int len = -1; 1873 unsigned char *p, *buf; 1874 assert(filename); 1875 1876 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1877 log_crypto_err("get_ocsp: BIO_new_file failed"); 1878 return -1; 1879 } 1880 1881 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1882 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1883 BIO_free(bio); 1884 return -1; 1885 } 1886 1887 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1888 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1889 OCSP_RESPONSE_free(response); 1890 BIO_free(bio); 1891 return -1; 1892 } 1893 1894 if ((buf = malloc((size_t) len)) == NULL) { 1895 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1896 OCSP_RESPONSE_free(response); 1897 BIO_free(bio); 1898 return -1; 1899 } 1900 1901 p = buf; 1902 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1903 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1904 free(buf); 1905 OCSP_RESPONSE_free(response); 1906 BIO_free(bio); 1907 return -1; 1908 } 1909 1910 OCSP_RESPONSE_free(response); 1911 BIO_free(bio); 1912 1913 *ocsp = buf; 1914 return len; 1915 } 1916 1917 /* further setup ssl ctx after the keys are loaded */ 1918 static void 1919 listen_sslctx_setup_2(void* ctxt) 1920 { 1921 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1922 (void)ctx; 1923 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1924 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1925 /* ENOTREACH */ 1926 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1927 } 1928 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1929 if(1) { 1930 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1931 if (!ecdh) { 1932 log_crypto_err("could not find p256, not enabling ECDHE"); 1933 } else { 1934 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1935 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1936 } 1937 EC_KEY_free (ecdh); 1938 } 1939 } 1940 #endif 1941 } 1942 1943 static int 1944 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1945 { 1946 if(ocspdata) { 1947 unsigned char *p; 1948 if ((p=malloc(ocspdata_len)) == NULL) { 1949 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1950 return SSL_TLSEXT_ERR_NOACK; 1951 } 1952 memcpy(p, ocspdata, ocspdata_len); 1953 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1954 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1955 free(p); 1956 return SSL_TLSEXT_ERR_NOACK; 1957 } 1958 return SSL_TLSEXT_ERR_OK; 1959 } else { 1960 return SSL_TLSEXT_ERR_NOACK; 1961 } 1962 } 1963 1964 SSL_CTX* 1965 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1966 { 1967 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1968 if(!ctx) { 1969 log_crypto_err("could not SSL_CTX_new"); 1970 return NULL; 1971 } 1972 /* no SSLv2, SSLv3 because has defects */ 1973 #if SSL_OP_NO_SSLv2 != 0 1974 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1975 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1976 SSL_CTX_free(ctx); 1977 return NULL; 1978 } 1979 #endif 1980 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1981 != SSL_OP_NO_SSLv3){ 1982 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1983 SSL_CTX_free(ctx); 1984 return 0; 1985 } 1986 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1987 /* if we have tls 1.1 disable 1.0 */ 1988 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1989 != SSL_OP_NO_TLSv1){ 1990 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1991 SSL_CTX_free(ctx); 1992 return 0; 1993 } 1994 #endif 1995 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1996 /* if we have tls 1.2 disable 1.1 */ 1997 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 1998 != SSL_OP_NO_TLSv1_1){ 1999 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 2000 SSL_CTX_free(ctx); 2001 return 0; 2002 } 2003 #endif 2004 #if defined(SSL_OP_NO_RENEGOTIATION) 2005 /* disable client renegotiation */ 2006 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 2007 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 2008 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 2009 SSL_CTX_free(ctx); 2010 return 0; 2011 } 2012 #endif 2013 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 2014 /* if we detect system-wide crypto policies, use those */ 2015 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) { 2016 /* if we have sha256, set the cipher list to have no known vulns */ 2017 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 2018 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 2019 } 2020 #endif 2021 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 2022 SSL_OP_CIPHER_SERVER_PREFERENCE) != 2023 SSL_OP_CIPHER_SERVER_PREFERENCE) { 2024 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 2025 SSL_CTX_free(ctx); 2026 return 0; 2027 } 2028 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 2029 SSL_CTX_set_security_level(ctx, 0); 2030 #endif 2031 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 2032 log_msg(LOG_ERR, "error for cert file: %s", pem); 2033 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 2034 SSL_CTX_free(ctx); 2035 return NULL; 2036 } 2037 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 2038 log_msg(LOG_ERR, "error for private key file: %s", key); 2039 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 2040 SSL_CTX_free(ctx); 2041 return NULL; 2042 } 2043 if(!SSL_CTX_check_private_key(ctx)) { 2044 log_msg(LOG_ERR, "error for key file: %s", key); 2045 log_crypto_err("Error in SSL_CTX check_private_key"); 2046 SSL_CTX_free(ctx); 2047 return NULL; 2048 } 2049 listen_sslctx_setup_2(ctx); 2050 if(verifypem && verifypem[0]) { 2051 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 2052 log_crypto_err("Error in SSL_CTX verify locations"); 2053 SSL_CTX_free(ctx); 2054 return NULL; 2055 } 2056 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 2057 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 2058 } 2059 return ctx; 2060 } 2061 2062 SSL_CTX* 2063 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 2064 { 2065 char *key, *pem; 2066 SSL_CTX *ctx; 2067 2068 key = nsd->options->tls_service_key; 2069 pem = nsd->options->tls_service_pem; 2070 if(!key || key[0] == 0) { 2071 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 2072 return NULL; 2073 } 2074 if(!pem || pem[0] == 0) { 2075 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 2076 return NULL; 2077 } 2078 2079 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 2080 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 2081 ctx = server_tls_ctx_setup(key, pem, verifypem); 2082 if(!ctx) { 2083 log_msg(LOG_ERR, "could not setup server TLS context"); 2084 return NULL; 2085 } 2086 if(ocspfile && ocspfile[0]) { 2087 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 2088 log_crypto_err("Error reading OCSPfile"); 2089 SSL_CTX_free(ctx); 2090 return NULL; 2091 } else { 2092 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 2093 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 2094 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 2095 SSL_CTX_free(ctx); 2096 return NULL; 2097 } 2098 } 2099 } 2100 return ctx; 2101 } 2102 2103 /* check if tcp_handler_accept_data created for TLS dedicated port */ 2104 int 2105 using_tls_port(struct sockaddr* addr, const char* tls_port) 2106 { 2107 in_port_t port = 0; 2108 2109 if (addr->sa_family == AF_INET) 2110 port = ((struct sockaddr_in*)addr)->sin_port; 2111 #ifndef HAVE_STRUCT_SOCKADDR_IN6 2112 else 2113 port = ((struct sockaddr_in6*)addr)->sin6_port; 2114 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 2115 if (atoi(tls_port) == ntohs(port)) 2116 return 1; 2117 2118 return 0; 2119 } 2120 #endif 2121 2122 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 2123 ssize_t 2124 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 2125 { 2126 uint8_t* buf = (uint8_t*) p; 2127 ssize_t total = 0; 2128 struct pollfd fd; 2129 memset(&fd, 0, sizeof(fd)); 2130 fd.fd = s; 2131 fd.events = POLLIN; 2132 2133 while( total < sz) { 2134 ssize_t ret; 2135 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 2136 if(ret == -1) { 2137 if(errno == EAGAIN) 2138 /* blocking read */ 2139 continue; 2140 if(errno == EINTR) { 2141 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2142 return -1; 2143 /* other signals can be handled later */ 2144 continue; 2145 } 2146 /* some error */ 2147 return -1; 2148 } 2149 if(ret == 0) { 2150 /* operation timed out */ 2151 return -2; 2152 } 2153 ret = read(s, buf+total, sz-total); 2154 if(ret == -1) { 2155 if(errno == EAGAIN) 2156 /* blocking read */ 2157 continue; 2158 if(errno == EINTR) { 2159 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2160 return -1; 2161 /* other signals can be handled later */ 2162 continue; 2163 } 2164 /* some error */ 2165 return -1; 2166 } 2167 if(ret == 0) { 2168 /* closed connection! */ 2169 return 0; 2170 } 2171 total += ret; 2172 } 2173 return total; 2174 } 2175 2176 static void 2177 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 2178 { 2179 sig_atomic_t cmd = NSD_QUIT_SYNC; 2180 udb_ptr t, next; 2181 udb_base* u = nsd->task[nsd->mytask]; 2182 udb_ptr_init(&next, u); 2183 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 2184 udb_base_set_userdata(u, 0); 2185 while(!udb_ptr_is_null(&t)) { 2186 /* store next in list so this one can be deleted or reused */ 2187 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 2188 udb_rptr_zero(&TASKLIST(&t)->next, u); 2189 2190 /* process task t */ 2191 /* append results for task t and update last_task */ 2192 task_process_in_reload(nsd, u, last_task, &t); 2193 2194 /* go to next */ 2195 udb_ptr_set_ptr(&t, u, &next); 2196 2197 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2198 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2199 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2200 if(cmd == NSD_QUIT) { 2201 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2202 /* sync to disk (if needed) */ 2203 udb_base_sync(nsd->db->udb, 0); 2204 /* unlink files of remainder of tasks */ 2205 while(!udb_ptr_is_null(&t)) { 2206 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2207 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2208 } 2209 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2210 } 2211 udb_ptr_unlink(&t, u); 2212 udb_ptr_unlink(&next, u); 2213 exit(0); 2214 } 2215 } 2216 2217 } 2218 udb_ptr_unlink(&t, u); 2219 udb_ptr_unlink(&next, u); 2220 } 2221 2222 #ifdef BIND8_STATS 2223 static void 2224 parent_send_stats(struct nsd* nsd, int cmdfd) 2225 { 2226 size_t i; 2227 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2228 log_msg(LOG_ERR, "could not write stats to reload"); 2229 return; 2230 } 2231 for(i=0; i<nsd->child_count; i++) 2232 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2233 sizeof(stc_type))) { 2234 log_msg(LOG_ERR, "could not write stats to reload"); 2235 return; 2236 } 2237 } 2238 2239 static void 2240 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2241 { 2242 struct nsdst s; 2243 stc_type* p; 2244 size_t i; 2245 if(block_read(nsd, cmdfd, &s, sizeof(s), 2246 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2247 log_msg(LOG_ERR, "could not read stats from oldpar"); 2248 return; 2249 } 2250 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2251 s.db_mem = region_get_mem(nsd->db->region); 2252 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2253 nsd->child_count); 2254 if(!p) return; 2255 for(i=0; i<nsd->child_count; i++) { 2256 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2257 sizeof(stc_type)) 2258 return; 2259 } 2260 } 2261 #endif /* BIND8_STATS */ 2262 2263 void server_verify(struct nsd *nsd, int cmdsocket); 2264 2265 /* 2266 * Reload the database, stop parent, re-fork children and continue. 2267 * as server_main. 2268 */ 2269 static void 2270 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2271 int cmdsocket) 2272 { 2273 pid_t mypid; 2274 sig_atomic_t cmd = NSD_QUIT_SYNC; 2275 int ret; 2276 udb_ptr last_task; 2277 struct sigaction old_sigchld, ign_sigchld; 2278 struct radnode* node; 2279 zone_type* zone; 2280 enum soainfo_hint hint; 2281 /* ignore SIGCHLD from the previous server_main that used this pid */ 2282 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2283 ign_sigchld.sa_handler = SIG_IGN; 2284 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2285 2286 #ifdef HAVE_SETPROCTITLE 2287 setproctitle("main"); 2288 #endif 2289 #ifdef HAVE_CPUSET_T 2290 if(nsd->use_cpu_affinity) { 2291 set_cpu_affinity(nsd->cpuset); 2292 } 2293 #endif 2294 2295 /* see what tasks we got from xfrd */ 2296 task_remap(nsd->task[nsd->mytask]); 2297 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2298 udb_compact_inhibited(nsd->db->udb, 1); 2299 reload_process_tasks(nsd, &last_task, cmdsocket); 2300 udb_compact_inhibited(nsd->db->udb, 0); 2301 udb_compact(nsd->db->udb); 2302 2303 #ifndef NDEBUG 2304 if(nsd_debug_level >= 1) 2305 region_log_stats(nsd->db->region); 2306 #endif /* NDEBUG */ 2307 /* sync to disk (if needed) */ 2308 udb_base_sync(nsd->db->udb, 0); 2309 2310 initialize_dname_compression_tables(nsd); 2311 2312 #ifdef BIND8_STATS 2313 /* Restart dumping stats if required. */ 2314 time(&nsd->st.boot); 2315 set_bind8_alarm(nsd); 2316 #endif 2317 #ifdef USE_ZONE_STATS 2318 server_zonestat_realloc(nsd); /* realloc for new children */ 2319 server_zonestat_switch(nsd); 2320 #endif 2321 2322 if(nsd->options->verify_enable) { 2323 #ifdef RATELIMIT 2324 /* allocate resources for rate limiting. use a slot that is guaranteed 2325 not mapped to a file so no persistent data is overwritten */ 2326 rrl_init(nsd->child_count + 1); 2327 #endif 2328 2329 /* spin-up server and execute verifiers for each zone */ 2330 server_verify(nsd, cmdsocket); 2331 #ifdef RATELIMIT 2332 /* deallocate rate limiting resources */ 2333 rrl_deinit(nsd->child_count + 1); 2334 #endif 2335 } 2336 2337 for(node = radix_first(nsd->db->zonetree); 2338 node != NULL; 2339 node = radix_next(node)) 2340 { 2341 zone = (zone_type *)node->elem; 2342 if(zone->is_updated) { 2343 if(zone->is_bad) { 2344 nsd->mode = NSD_RELOAD_FAILED; 2345 hint = soainfo_bad; 2346 } else { 2347 hint = soainfo_ok; 2348 } 2349 /* update(s), verified or not, possibly with subsequent 2350 skipped update(s). skipped update(s) are picked up 2351 by failed update check in xfrd */ 2352 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2353 zone, hint); 2354 } else if(zone->is_skipped) { 2355 /* corrupt or inconsistent update without preceding 2356 update(s), communicate soainfo_gone */ 2357 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2358 zone, soainfo_gone); 2359 } 2360 zone->is_updated = 0; 2361 zone->is_skipped = 0; 2362 } 2363 2364 if(nsd->mode == NSD_RELOAD_FAILED) { 2365 exit(NSD_RELOAD_FAILED); 2366 } 2367 2368 /* listen for the signals of failed children again */ 2369 sigaction(SIGCHLD, &old_sigchld, NULL); 2370 #ifdef USE_DNSTAP 2371 if (nsd->dt_collector) { 2372 int *swap_fd_send; 2373 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes")); 2374 /* Swap fd_send with fd_swap so old serve child and new serve 2375 * childs will not write to the same pipe ends simultaneously */ 2376 swap_fd_send = nsd->dt_collector_fd_send; 2377 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap; 2378 nsd->dt_collector_fd_swap = swap_fd_send; 2379 2380 } 2381 #endif 2382 /* Start new child processes */ 2383 if (server_start_children(nsd, server_region, netio, &nsd-> 2384 xfrd_listener->fd) != 0) { 2385 send_children_quit(nsd); 2386 exit(1); 2387 } 2388 2389 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2390 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2391 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2392 if(cmd == NSD_QUIT) { 2393 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2394 send_children_quit(nsd); 2395 exit(0); 2396 } 2397 } 2398 2399 /* Send quit command to parent: blocking, wait for receipt. */ 2400 do { 2401 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2402 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2403 { 2404 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2405 strerror(errno)); 2406 } 2407 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2408 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2409 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2410 RELOAD_SYNC_TIMEOUT); 2411 if(ret == -2) { 2412 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2413 } 2414 } while (ret == -2); 2415 if(ret == -1) { 2416 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2417 strerror(errno)); 2418 } 2419 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2420 if(cmd == NSD_QUIT) { 2421 /* small race condition possible here, parent got quit cmd. */ 2422 send_children_quit(nsd); 2423 exit(1); 2424 } 2425 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2426 #ifdef BIND8_STATS 2427 reload_do_stats(cmdsocket, nsd, &last_task); 2428 #endif 2429 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2430 task_process_sync(nsd->task[nsd->mytask]); 2431 #ifdef USE_ZONE_STATS 2432 server_zonestat_realloc(nsd); /* realloc for next children */ 2433 #endif 2434 2435 /* send soainfo to the xfrd process, signal it that reload is done, 2436 * it picks up the taskudb */ 2437 cmd = NSD_RELOAD_DONE; 2438 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2439 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2440 strerror(errno)); 2441 } 2442 mypid = getpid(); 2443 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2444 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2445 strerror(errno)); 2446 } 2447 2448 /* try to reopen file */ 2449 if (nsd->file_rotation_ok) 2450 log_reopen(nsd->log_filename, 1); 2451 /* exit reload, continue as new server_main */ 2452 } 2453 2454 /* 2455 * Get the mode depending on the signal hints that have been received. 2456 * Multiple signal hints can be received and will be handled in turn. 2457 */ 2458 static sig_atomic_t 2459 server_signal_mode(struct nsd *nsd) 2460 { 2461 if(nsd->signal_hint_quit) { 2462 nsd->signal_hint_quit = 0; 2463 return NSD_QUIT; 2464 } 2465 else if(nsd->signal_hint_shutdown) { 2466 nsd->signal_hint_shutdown = 0; 2467 return NSD_SHUTDOWN; 2468 } 2469 else if(nsd->signal_hint_child) { 2470 nsd->signal_hint_child = 0; 2471 return NSD_REAP_CHILDREN; 2472 } 2473 else if(nsd->signal_hint_reload) { 2474 nsd->signal_hint_reload = 0; 2475 return NSD_RELOAD; 2476 } 2477 else if(nsd->signal_hint_reload_hup) { 2478 nsd->signal_hint_reload_hup = 0; 2479 return NSD_RELOAD_REQ; 2480 } 2481 else if(nsd->signal_hint_stats) { 2482 nsd->signal_hint_stats = 0; 2483 #ifdef BIND8_STATS 2484 set_bind8_alarm(nsd); 2485 #endif 2486 return NSD_STATS; 2487 } 2488 else if(nsd->signal_hint_statsusr) { 2489 nsd->signal_hint_statsusr = 0; 2490 return NSD_STATS; 2491 } 2492 return NSD_RUN; 2493 } 2494 2495 /* 2496 * The main server simply waits for signals and child processes to 2497 * terminate. Child processes are restarted as necessary. 2498 */ 2499 void 2500 server_main(struct nsd *nsd) 2501 { 2502 region_type *server_region = region_create(xalloc, free); 2503 netio_type *netio = netio_create(server_region); 2504 netio_handler_type reload_listener; 2505 int reload_sockets[2] = {-1, -1}; 2506 struct timespec timeout_spec; 2507 int status; 2508 pid_t child_pid; 2509 pid_t reload_pid = -1; 2510 sig_atomic_t mode; 2511 2512 /* Ensure we are the main process */ 2513 assert(nsd->server_kind == NSD_SERVER_MAIN); 2514 2515 /* Add listener for the XFRD process */ 2516 netio_add_handler(netio, nsd->xfrd_listener); 2517 2518 /* Start the child processes that handle incoming queries */ 2519 if (server_start_children(nsd, server_region, netio, 2520 &nsd->xfrd_listener->fd) != 0) { 2521 send_children_quit(nsd); 2522 exit(1); 2523 } 2524 reload_listener.fd = -1; 2525 2526 /* This_child MUST be 0, because this is the parent process */ 2527 assert(nsd->this_child == 0); 2528 2529 /* Run the server until we get a shutdown signal */ 2530 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2531 /* Did we receive a signal that changes our mode? */ 2532 if(mode == NSD_RUN) { 2533 nsd->mode = mode = server_signal_mode(nsd); 2534 } 2535 2536 switch (mode) { 2537 case NSD_RUN: 2538 /* see if any child processes terminated */ 2539 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2540 int is_child = delete_child_pid(nsd, child_pid); 2541 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2542 if(nsd->children[is_child].child_fd == -1) 2543 nsd->children[is_child].has_exited = 1; 2544 parent_check_all_children_exited(nsd); 2545 } else if(is_child != -1) { 2546 log_msg(LOG_WARNING, 2547 "server %d died unexpectedly with status %d, restarting", 2548 (int) child_pid, status); 2549 restart_child_servers(nsd, server_region, netio, 2550 &nsd->xfrd_listener->fd); 2551 } else if (child_pid == reload_pid) { 2552 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2553 pid_t mypid; 2554 log_msg(LOG_WARNING, 2555 "Reload process %d failed with status %d, continuing with old database", 2556 (int) child_pid, status); 2557 reload_pid = -1; 2558 if(reload_listener.fd != -1) close(reload_listener.fd); 2559 netio_remove_handler(netio, &reload_listener); 2560 reload_listener.fd = -1; 2561 reload_listener.event_types = NETIO_EVENT_NONE; 2562 task_process_sync(nsd->task[nsd->mytask]); 2563 /* inform xfrd reload attempt ended */ 2564 if(!write_socket(nsd->xfrd_listener->fd, 2565 &cmd, sizeof(cmd))) { 2566 log_msg(LOG_ERR, "problems " 2567 "sending SOAEND to xfrd: %s", 2568 strerror(errno)); 2569 } 2570 mypid = getpid(); 2571 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2572 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2573 strerror(errno)); 2574 } 2575 #ifdef USE_DNSTAP 2576 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) { 2577 log_msg(LOG_WARNING, 2578 "dnstap-collector %d terminated with status %d", 2579 (int) child_pid, status); 2580 if(nsd->dt_collector) { 2581 dt_collector_close(nsd->dt_collector, nsd); 2582 dt_collector_destroy(nsd->dt_collector, nsd); 2583 nsd->dt_collector = NULL; 2584 } 2585 /* Only respawn a crashed (or exited) 2586 * dnstap-collector when not reloading, 2587 * to not induce a reload during a 2588 * reload (which would seriously 2589 * disrupt nsd procedures and lead to 2590 * unpredictable results)! 2591 * 2592 * This will *leave* a dnstap-collector 2593 * process terminated, but because 2594 * signalling of the reload process to 2595 * the main process to respawn in this 2596 * situation will be cumbersome, and 2597 * because this situation is so 2598 * specific (and therefore hopefully 2599 * extremely rare or non-existing at 2600 * all), plus the fact that we are left 2601 * with a perfectly function NSD 2602 * (besides not logging dnstap 2603 * messages), I consider it acceptable 2604 * to leave this unresolved. 2605 */ 2606 if(reload_pid == -1 && nsd->options->dnstap_enable) { 2607 nsd->dt_collector = dt_collector_create(nsd); 2608 dt_collector_start(nsd->dt_collector, nsd); 2609 nsd->mode = NSD_RELOAD_REQ; 2610 } 2611 #endif 2612 } else if(status != 0) { 2613 /* check for status, because we get 2614 * the old-servermain because reload 2615 * is the process-parent of old-main, 2616 * and we get older server-processes 2617 * that are exiting after a reload */ 2618 log_msg(LOG_WARNING, 2619 "process %d terminated with status %d", 2620 (int) child_pid, status); 2621 } 2622 } 2623 if (child_pid == -1) { 2624 if (errno == EINTR) { 2625 continue; 2626 } 2627 if (errno != ECHILD) 2628 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2629 } 2630 if (nsd->mode != NSD_RUN) 2631 break; 2632 2633 /* timeout to collect processes. In case no sigchild happens. */ 2634 timeout_spec.tv_sec = 60; 2635 timeout_spec.tv_nsec = 0; 2636 2637 /* listen on ports, timeout for collecting terminated children */ 2638 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2639 if (errno != EINTR) { 2640 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2641 } 2642 } 2643 if(nsd->restart_children) { 2644 restart_child_servers(nsd, server_region, netio, 2645 &nsd->xfrd_listener->fd); 2646 nsd->restart_children = 0; 2647 } 2648 if(nsd->reload_failed) { 2649 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2650 pid_t mypid; 2651 nsd->reload_failed = 0; 2652 log_msg(LOG_WARNING, 2653 "Reload process %d failed, continuing with old database", 2654 (int) reload_pid); 2655 reload_pid = -1; 2656 if(reload_listener.fd != -1) close(reload_listener.fd); 2657 netio_remove_handler(netio, &reload_listener); 2658 reload_listener.fd = -1; 2659 reload_listener.event_types = NETIO_EVENT_NONE; 2660 task_process_sync(nsd->task[nsd->mytask]); 2661 /* inform xfrd reload attempt ended */ 2662 if(!write_socket(nsd->xfrd_listener->fd, 2663 &cmd, sizeof(cmd))) { 2664 log_msg(LOG_ERR, "problems " 2665 "sending SOAEND to xfrd: %s", 2666 strerror(errno)); 2667 } 2668 mypid = getpid(); 2669 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2670 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2671 strerror(errno)); 2672 } 2673 } 2674 2675 break; 2676 case NSD_RELOAD_REQ: { 2677 sig_atomic_t cmd = NSD_RELOAD_REQ; 2678 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2679 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2680 "main: ipc send reload_req to xfrd")); 2681 if(!write_socket(nsd->xfrd_listener->fd, 2682 &cmd, sizeof(cmd))) { 2683 log_msg(LOG_ERR, "server_main: could not send " 2684 "reload_req to xfrd: %s", strerror(errno)); 2685 } 2686 nsd->mode = NSD_RUN; 2687 } break; 2688 case NSD_RELOAD: 2689 /* Continue to run nsd after reload */ 2690 nsd->mode = NSD_RUN; 2691 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2692 if (reload_pid != -1) { 2693 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2694 (int) reload_pid); 2695 break; 2696 } 2697 2698 /* switch the mytask to keep track of who owns task*/ 2699 nsd->mytask = 1 - nsd->mytask; 2700 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2701 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2702 reload_pid = -1; 2703 break; 2704 } 2705 2706 /* Do actual reload */ 2707 reload_pid = fork(); 2708 switch (reload_pid) { 2709 case -1: 2710 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2711 break; 2712 default: 2713 /* PARENT */ 2714 close(reload_sockets[0]); 2715 server_reload(nsd, server_region, netio, 2716 reload_sockets[1]); 2717 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2718 close(reload_sockets[1]); 2719 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2720 /* drop stale xfrd ipc data */ 2721 ((struct ipc_handler_conn_data*)nsd-> 2722 xfrd_listener->user_data) 2723 ->conn->is_reading = 0; 2724 reload_pid = -1; 2725 reload_listener.fd = -1; 2726 reload_listener.event_types = NETIO_EVENT_NONE; 2727 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2728 break; 2729 case 0: 2730 /* CHILD */ 2731 /* server_main keep running until NSD_QUIT_SYNC 2732 * received from reload. */ 2733 close(reload_sockets[1]); 2734 reload_listener.fd = reload_sockets[0]; 2735 reload_listener.timeout = NULL; 2736 reload_listener.user_data = nsd; 2737 reload_listener.event_types = NETIO_EVENT_READ; 2738 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2739 netio_add_handler(netio, &reload_listener); 2740 reload_pid = getppid(); 2741 break; 2742 } 2743 break; 2744 case NSD_QUIT_SYNC: 2745 /* synchronisation of xfrd, parent and reload */ 2746 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2747 sig_atomic_t cmd = NSD_RELOAD; 2748 /* stop xfrd ipc writes in progress */ 2749 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2750 "main: ipc send indication reload")); 2751 if(!write_socket(nsd->xfrd_listener->fd, 2752 &cmd, sizeof(cmd))) { 2753 log_msg(LOG_ERR, "server_main: could not send reload " 2754 "indication to xfrd: %s", strerror(errno)); 2755 } 2756 /* wait for ACK from xfrd */ 2757 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2758 nsd->quit_sync_done = 1; 2759 } 2760 nsd->mode = NSD_RUN; 2761 break; 2762 case NSD_QUIT: 2763 /* silent shutdown during reload */ 2764 if(reload_listener.fd != -1) { 2765 /* acknowledge the quit, to sync reload that we will really quit now */ 2766 sig_atomic_t cmd = NSD_RELOAD; 2767 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2768 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2769 log_msg(LOG_ERR, "server_main: " 2770 "could not ack quit: %s", strerror(errno)); 2771 } 2772 #ifdef BIND8_STATS 2773 parent_send_stats(nsd, reload_listener.fd); 2774 #endif /* BIND8_STATS */ 2775 close(reload_listener.fd); 2776 } 2777 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2778 /* only quit children after xfrd has acked */ 2779 send_children_quit(nsd); 2780 2781 #ifdef MEMCLEAN /* OS collects memory pages */ 2782 region_destroy(server_region); 2783 #endif 2784 server_shutdown(nsd); 2785 2786 /* ENOTREACH */ 2787 break; 2788 case NSD_SHUTDOWN: 2789 break; 2790 case NSD_REAP_CHILDREN: 2791 /* continue; wait for child in run loop */ 2792 nsd->mode = NSD_RUN; 2793 break; 2794 case NSD_STATS: 2795 #ifdef BIND8_STATS 2796 set_children_stats(nsd); 2797 #endif 2798 nsd->mode = NSD_RUN; 2799 break; 2800 default: 2801 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2802 nsd->mode = NSD_RUN; 2803 break; 2804 } 2805 } 2806 log_msg(LOG_WARNING, "signal received, shutting down..."); 2807 2808 /* close opened ports to avoid race with restart of nsd */ 2809 server_close_all_sockets(nsd->udp, nsd->ifs); 2810 server_close_all_sockets(nsd->tcp, nsd->ifs); 2811 daemon_remote_close(nsd->rc); 2812 send_children_quit_and_wait(nsd); 2813 2814 /* Unlink it if possible... */ 2815 unlinkpid(nsd->pidfile); 2816 unlink(nsd->task[0]->fname); 2817 unlink(nsd->task[1]->fname); 2818 #ifdef USE_ZONE_STATS 2819 unlink(nsd->zonestatfname[0]); 2820 unlink(nsd->zonestatfname[1]); 2821 #endif 2822 #ifdef USE_DNSTAP 2823 dt_collector_close(nsd->dt_collector, nsd); 2824 #endif 2825 2826 if(reload_listener.fd != -1) { 2827 sig_atomic_t cmd = NSD_QUIT; 2828 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2829 "main: ipc send quit to reload-process")); 2830 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2831 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2832 strerror(errno)); 2833 } 2834 fsync(reload_listener.fd); 2835 close(reload_listener.fd); 2836 /* wait for reload to finish processing */ 2837 while(1) { 2838 if(waitpid(reload_pid, NULL, 0) == -1) { 2839 if(errno == EINTR) continue; 2840 if(errno == ECHILD) break; 2841 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2842 (int)reload_pid, strerror(errno)); 2843 } 2844 break; 2845 } 2846 } 2847 if(nsd->xfrd_listener->fd != -1) { 2848 /* complete quit, stop xfrd */ 2849 sig_atomic_t cmd = NSD_QUIT; 2850 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2851 "main: ipc send quit to xfrd")); 2852 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2853 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2854 strerror(errno)); 2855 } 2856 fsync(nsd->xfrd_listener->fd); 2857 close(nsd->xfrd_listener->fd); 2858 (void)kill(nsd->pid, SIGTERM); 2859 } 2860 2861 #ifdef MEMCLEAN /* OS collects memory pages */ 2862 region_destroy(server_region); 2863 #endif 2864 /* write the nsd.db to disk, wait for it to complete */ 2865 udb_base_sync(nsd->db->udb, 1); 2866 udb_base_close(nsd->db->udb); 2867 server_shutdown(nsd); 2868 } 2869 2870 static query_state_type 2871 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p) 2872 { 2873 return query_process(query, nsd, now_p); 2874 } 2875 2876 static query_state_type 2877 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p) 2878 { 2879 #ifdef RATELIMIT 2880 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) { 2881 if(query->edns.cookie_status != COOKIE_VALID 2882 && query->edns.cookie_status != COOKIE_VALID_REUSE 2883 && rrl_process_query(query)) 2884 return rrl_slip(query); 2885 else return QUERY_PROCESSED; 2886 } 2887 return QUERY_DISCARDED; 2888 #else 2889 return query_process(query, nsd, now_p); 2890 #endif 2891 } 2892 2893 const char* 2894 nsd_event_vs(void) 2895 { 2896 #ifdef USE_MINI_EVENT 2897 return ""; 2898 #else 2899 return event_get_version(); 2900 #endif 2901 } 2902 2903 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS) 2904 static const char* ub_ev_backend2str(int b) 2905 { 2906 switch(b) { 2907 case EVBACKEND_SELECT: return "select"; 2908 case EVBACKEND_POLL: return "poll"; 2909 case EVBACKEND_EPOLL: return "epoll"; 2910 case EVBACKEND_KQUEUE: return "kqueue"; 2911 case EVBACKEND_DEVPOLL: return "devpoll"; 2912 case EVBACKEND_PORT: return "evport"; 2913 } 2914 return "unknown"; 2915 } 2916 #endif 2917 2918 const char* 2919 nsd_event_method(void) 2920 { 2921 #ifdef USE_MINI_EVENT 2922 return "select"; 2923 #else 2924 struct event_base* b = nsd_child_event_base(); 2925 const char* m; 2926 # ifdef EV_FEATURE_BACKENDS 2927 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b)); 2928 # elif defined(HAVE_EVENT_BASE_GET_METHOD) 2929 m = event_base_get_method(b); 2930 # else 2931 m = "?"; 2932 # endif 2933 # ifdef MEMCLEAN 2934 event_base_free(b); 2935 # endif 2936 return m; 2937 #endif 2938 } 2939 2940 struct event_base* 2941 nsd_child_event_base(void) 2942 { 2943 struct event_base* base; 2944 #ifdef USE_MINI_EVENT 2945 static time_t secs; 2946 static struct timeval now; 2947 base = event_init(&secs, &now); 2948 #else 2949 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2950 /* libev */ 2951 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2952 # else 2953 /* libevent */ 2954 # ifdef HAVE_EVENT_BASE_NEW 2955 base = event_base_new(); 2956 # else 2957 base = event_init(); 2958 # endif 2959 # endif 2960 #endif 2961 return base; 2962 } 2963 2964 static void 2965 add_udp_handler( 2966 struct nsd *nsd, 2967 struct nsd_socket *sock, 2968 struct udp_handler_data *data) 2969 { 2970 struct event *handler = &data->event; 2971 2972 data->nsd = nsd; 2973 data->socket = sock; 2974 2975 memset(handler, 0, sizeof(*handler)); 2976 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2977 if(event_base_set(nsd->event_base, handler) != 0) 2978 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2979 if(event_add(handler, NULL) != 0) 2980 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2981 } 2982 2983 void 2984 add_tcp_handler( 2985 struct nsd *nsd, 2986 struct nsd_socket *sock, 2987 struct tcp_accept_handler_data *data) 2988 { 2989 struct event *handler = &data->event; 2990 2991 data->nsd = nsd; 2992 data->socket = sock; 2993 2994 #ifdef HAVE_SSL 2995 if (nsd->tls_ctx && 2996 nsd->options->tls_port && 2997 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 2998 { 2999 data->tls_accept = 1; 3000 if(verbosity >= 2) { 3001 char buf[48]; 3002 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 3003 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 3004 } 3005 } else { 3006 data->tls_accept = 0; 3007 } 3008 #endif 3009 3010 memset(handler, 0, sizeof(*handler)); 3011 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 3012 if(event_base_set(nsd->event_base, handler) != 0) 3013 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 3014 if(event_add(handler, NULL) != 0) 3015 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 3016 data->event_added = 1; 3017 } 3018 3019 /* 3020 * Serve DNS request to verifiers (short-lived) 3021 */ 3022 void server_verify(struct nsd *nsd, int cmdsocket) 3023 { 3024 size_t size = 0; 3025 struct event cmd_event, signal_event, exit_event; 3026 struct zone *zone; 3027 3028 assert(nsd != NULL); 3029 3030 zone = verify_next_zone(nsd, NULL); 3031 if(zone == NULL) 3032 return; 3033 3034 nsd->server_region = region_create(xalloc, free); 3035 nsd->event_base = nsd_child_event_base(); 3036 3037 nsd->next_zone_to_verify = zone; 3038 nsd->verifier_count = 0; 3039 nsd->verifier_limit = nsd->options->verifier_count; 3040 size = sizeof(struct verifier) * nsd->verifier_limit; 3041 if(pipe(nsd->verifier_pipe) == -1) { 3042 log_msg(LOG_ERR, "verify: could not create pipe: %s", 3043 strerror(errno)); 3044 goto fail_pipe; 3045 } 3046 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC); 3047 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC); 3048 nsd->verifiers = region_alloc_zero(nsd->server_region, size); 3049 3050 for(size_t i = 0; i < nsd->verifier_limit; i++) { 3051 nsd->verifiers[i].nsd = nsd; 3052 nsd->verifiers[i].zone = NULL; 3053 nsd->verifiers[i].pid = -1; 3054 nsd->verifiers[i].output_stream.fd = -1; 3055 nsd->verifiers[i].output_stream.priority = LOG_INFO; 3056 nsd->verifiers[i].error_stream.fd = -1; 3057 nsd->verifiers[i].error_stream.priority = LOG_ERR; 3058 } 3059 3060 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd); 3061 if(event_base_set(nsd->event_base, &cmd_event) != 0 || 3062 event_add(&cmd_event, NULL) != 0) 3063 { 3064 log_msg(LOG_ERR, "verify: could not add command event"); 3065 goto fail; 3066 } 3067 3068 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd); 3069 if(event_base_set(nsd->event_base, &signal_event) != 0 || 3070 signal_add(&signal_event, NULL) != 0) 3071 { 3072 log_msg(LOG_ERR, "verify: could not add signal event"); 3073 goto fail; 3074 } 3075 3076 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd); 3077 if(event_base_set(nsd->event_base, &exit_event) != 0 || 3078 event_add(&exit_event, NULL) != 0) 3079 { 3080 log_msg(LOG_ERR, "verify: could not add exit event"); 3081 goto fail; 3082 } 3083 3084 memset(msgs, 0, sizeof(msgs)); 3085 for (int i = 0; i < NUM_RECV_PER_SELECT; i++) { 3086 queries[i] = query_create(nsd->server_region, 3087 compressed_dname_offsets, 3088 compression_table_size, compressed_dnames); 3089 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3090 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3091 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3092 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3093 msgs[i].msg_hdr.msg_iovlen = 1; 3094 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3095 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3096 } 3097 3098 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3099 struct udp_handler_data *data; 3100 data = region_alloc_zero( 3101 nsd->server_region, sizeof(*data)); 3102 add_udp_handler(nsd, &nsd->verify_udp[i], data); 3103 } 3104 3105 tcp_accept_handler_count = nsd->verify_ifs; 3106 tcp_accept_handlers = region_alloc_array(nsd->server_region, 3107 nsd->verify_ifs, sizeof(*tcp_accept_handlers)); 3108 3109 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3110 struct tcp_accept_handler_data *data; 3111 data = &tcp_accept_handlers[i]; 3112 memset(data, 0, sizeof(*data)); 3113 add_tcp_handler(nsd, &nsd->verify_tcp[i], data); 3114 } 3115 3116 while(nsd->next_zone_to_verify != NULL && 3117 nsd->verifier_count < nsd->verifier_limit) 3118 { 3119 verify_zone(nsd, nsd->next_zone_to_verify); 3120 nsd->next_zone_to_verify 3121 = verify_next_zone(nsd, nsd->next_zone_to_verify); 3122 } 3123 3124 /* short-lived main loop */ 3125 event_base_dispatch(nsd->event_base); 3126 3127 /* remove command and exit event handlers */ 3128 event_del(&exit_event); 3129 event_del(&signal_event); 3130 event_del(&cmd_event); 3131 3132 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT); 3133 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT); 3134 fail: 3135 close(nsd->verifier_pipe[0]); 3136 close(nsd->verifier_pipe[1]); 3137 fail_pipe: 3138 event_base_free(nsd->event_base); 3139 region_destroy(nsd->server_region); 3140 3141 nsd->event_base = NULL; 3142 nsd->server_region = NULL; 3143 nsd->verifier_limit = 0; 3144 nsd->verifier_pipe[0] = -1; 3145 nsd->verifier_pipe[1] = -1; 3146 nsd->verifiers = NULL; 3147 } 3148 3149 /* 3150 * Serve DNS requests. 3151 */ 3152 void 3153 server_child(struct nsd *nsd) 3154 { 3155 size_t i, from, numifs; 3156 region_type *server_region = region_create(xalloc, free); 3157 struct event_base* event_base = nsd_child_event_base(); 3158 sig_atomic_t mode; 3159 3160 if(!event_base) { 3161 log_msg(LOG_ERR, "nsd server could not create event base"); 3162 exit(1); 3163 } 3164 nsd->event_base = event_base; 3165 nsd->server_region = server_region; 3166 3167 #ifdef RATELIMIT 3168 rrl_init(nsd->this_child->child_num); 3169 #endif 3170 3171 assert(nsd->server_kind != NSD_SERVER_MAIN); 3172 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 3173 3174 #ifdef HAVE_SETPROCTITLE 3175 setproctitle("server %d", nsd->this_child->child_num + 1); 3176 #endif 3177 #ifdef HAVE_CPUSET_T 3178 if(nsd->use_cpu_affinity) { 3179 set_cpu_affinity(nsd->this_child->cpuset); 3180 } 3181 #endif 3182 3183 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 3184 server_close_all_sockets(nsd->tcp, nsd->ifs); 3185 } 3186 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 3187 server_close_all_sockets(nsd->udp, nsd->ifs); 3188 } 3189 3190 if (nsd->this_child->parent_fd != -1) { 3191 struct event *handler; 3192 struct ipc_handler_conn_data* user_data = 3193 (struct ipc_handler_conn_data*)region_alloc( 3194 server_region, sizeof(struct ipc_handler_conn_data)); 3195 user_data->nsd = nsd; 3196 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 3197 3198 handler = (struct event*) region_alloc( 3199 server_region, sizeof(*handler)); 3200 memset(handler, 0, sizeof(*handler)); 3201 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 3202 EV_READ, child_handle_parent_command, user_data); 3203 if(event_base_set(event_base, handler) != 0) 3204 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 3205 if(event_add(handler, NULL) != 0) 3206 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 3207 } 3208 3209 if(nsd->reuseport) { 3210 numifs = nsd->ifs / nsd->reuseport; 3211 from = numifs * nsd->this_child->child_num; 3212 if(from+numifs > nsd->ifs) { /* should not happen */ 3213 from = 0; 3214 numifs = nsd->ifs; 3215 } 3216 } else { 3217 from = 0; 3218 numifs = nsd->ifs; 3219 } 3220 3221 if (nsd->server_kind & NSD_SERVER_UDP) { 3222 int child = nsd->this_child->child_num; 3223 memset(msgs, 0, sizeof(msgs)); 3224 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3225 queries[i] = query_create(server_region, 3226 compressed_dname_offsets, 3227 compression_table_size, compressed_dnames); 3228 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3229 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3230 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3231 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3232 msgs[i].msg_hdr.msg_iovlen = 1; 3233 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3234 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3235 } 3236 3237 for (i = 0; i < nsd->ifs; i++) { 3238 int listen; 3239 struct udp_handler_data *data; 3240 3241 listen = nsd_bitset_isset(nsd->udp[i].servers, child); 3242 3243 if(i >= from && i < (from + numifs) && listen) { 3244 data = region_alloc_zero( 3245 nsd->server_region, sizeof(*data)); 3246 add_udp_handler(nsd, &nsd->udp[i], data); 3247 } else { 3248 /* close sockets intended for other servers */ 3249 server_close_socket(&nsd->udp[i]); 3250 } 3251 } 3252 } 3253 3254 /* 3255 * Keep track of all the TCP accept handlers so we can enable 3256 * and disable them based on the current number of active TCP 3257 * connections. 3258 */ 3259 if (nsd->server_kind & NSD_SERVER_TCP) { 3260 int child = nsd->this_child->child_num; 3261 tcp_accept_handler_count = numifs; 3262 tcp_accept_handlers = region_alloc_array(server_region, 3263 numifs, sizeof(*tcp_accept_handlers)); 3264 3265 for (i = 0; i < nsd->ifs; i++) { 3266 int listen; 3267 struct tcp_accept_handler_data *data; 3268 3269 listen = nsd_bitset_isset(nsd->tcp[i].servers, child); 3270 3271 if(i >= from && i < (from + numifs) && listen) { 3272 data = &tcp_accept_handlers[i-from]; 3273 memset(data, 0, sizeof(*data)); 3274 add_tcp_handler(nsd, &nsd->tcp[i], data); 3275 } else { 3276 /* close sockets intended for other servers */ 3277 /* 3278 * uncomment this once tcp servers are no 3279 * longer copied in the tcp fd copy line 3280 * in server_init(). 3281 server_close_socket(&nsd->tcp[i]); 3282 */ 3283 /* close sockets not meant for this server*/ 3284 if(!listen) 3285 server_close_socket(&nsd->tcp[i]); 3286 } 3287 } 3288 } else { 3289 tcp_accept_handler_count = 0; 3290 } 3291 3292 /* The main loop... */ 3293 while ((mode = nsd->mode) != NSD_QUIT) { 3294 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 3295 3296 /* Do we need to do the statistics... */ 3297 if (mode == NSD_STATS) { 3298 #ifdef BIND8_STATS 3299 int p = nsd->st.period; 3300 nsd->st.period = 1; /* force stats printout */ 3301 /* Dump the statistics */ 3302 bind8_stats(nsd); 3303 nsd->st.period = p; 3304 #else /* !BIND8_STATS */ 3305 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 3306 #endif /* BIND8_STATS */ 3307 3308 nsd->mode = NSD_RUN; 3309 } 3310 else if (mode == NSD_REAP_CHILDREN) { 3311 /* got signal, notify parent. parent reaps terminated children. */ 3312 if (nsd->this_child->parent_fd != -1) { 3313 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 3314 if (write(nsd->this_child->parent_fd, 3315 &parent_notify, 3316 sizeof(parent_notify)) == -1) 3317 { 3318 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 3319 (int) nsd->this_child->pid, strerror(errno)); 3320 } 3321 } else /* no parent, so reap 'em */ 3322 while (waitpid(-1, NULL, WNOHANG) > 0) ; 3323 nsd->mode = NSD_RUN; 3324 } 3325 else if(mode == NSD_RUN) { 3326 /* Wait for a query... */ 3327 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3328 if (errno != EINTR) { 3329 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3330 break; 3331 } 3332 } 3333 } else if(mode == NSD_QUIT) { 3334 /* ignore here, quit */ 3335 } else { 3336 log_msg(LOG_ERR, "mode bad value %d, back to service.", 3337 (int)mode); 3338 nsd->mode = NSD_RUN; 3339 } 3340 } 3341 3342 service_remaining_tcp(nsd); 3343 #ifdef BIND8_STATS 3344 bind8_stats(nsd); 3345 #endif /* BIND8_STATS */ 3346 3347 #ifdef MEMCLEAN /* OS collects memory pages */ 3348 #ifdef RATELIMIT 3349 rrl_deinit(nsd->this_child->child_num); 3350 #endif 3351 event_base_free(event_base); 3352 region_destroy(server_region); 3353 #endif 3354 server_shutdown(nsd); 3355 } 3356 3357 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 3358 { 3359 int* timed_out = (int*)arg; 3360 assert(event & EV_TIMEOUT); (void)event; 3361 /* wake up the service tcp thread, note event is no longer 3362 * registered */ 3363 *timed_out = 1; 3364 } 3365 3366 void 3367 service_remaining_tcp(struct nsd* nsd) 3368 { 3369 struct tcp_handler_data* p; 3370 struct event_base* event_base; 3371 /* check if it is needed */ 3372 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 3373 return; 3374 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 3375 #ifdef USE_DNSTAP 3376 /* remove dnstap collector, we cannot write there because the new 3377 * child process is using the file descriptor, or the child 3378 * process after that. */ 3379 dt_collector_destroy(nsd->dt_collector, nsd); 3380 nsd->dt_collector = NULL; 3381 #endif 3382 /* setup event base */ 3383 event_base = nsd_child_event_base(); 3384 if(!event_base) { 3385 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 3386 return; 3387 } 3388 /* register tcp connections */ 3389 for(p = tcp_active_list; p != NULL; p = p->next) { 3390 struct timeval timeout; 3391 int fd = p->event.ev_fd; 3392 #ifdef USE_MINI_EVENT 3393 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 3394 #else 3395 short event = p->event.ev_events & (EV_READ|EV_WRITE); 3396 #endif 3397 void (*fn)(int, short, void*); 3398 #ifdef HAVE_SSL 3399 if(p->tls) { 3400 if((event&EV_READ)) 3401 fn = handle_tls_reading; 3402 else fn = handle_tls_writing; 3403 } else { 3404 #endif 3405 if((event&EV_READ)) 3406 fn = handle_tcp_reading; 3407 else fn = handle_tcp_writing; 3408 #ifdef HAVE_SSL 3409 } 3410 #endif 3411 3412 p->tcp_no_more_queries = 1; 3413 /* set timeout to 1/10 second */ 3414 if(p->tcp_timeout > 100) 3415 p->tcp_timeout = 100; 3416 timeout.tv_sec = p->tcp_timeout / 1000; 3417 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 3418 event_del(&p->event); 3419 memset(&p->event, 0, sizeof(p->event)); 3420 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 3421 fn, p); 3422 if(event_base_set(event_base, &p->event) != 0) 3423 log_msg(LOG_ERR, "event base set failed"); 3424 if(event_add(&p->event, &timeout) != 0) 3425 log_msg(LOG_ERR, "event add failed"); 3426 } 3427 3428 /* handle it */ 3429 while(nsd->current_tcp_count > 0) { 3430 mode_t m = server_signal_mode(nsd); 3431 struct event timeout; 3432 struct timeval tv; 3433 int timed_out = 0; 3434 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 3435 m == NSD_REAP_CHILDREN) { 3436 /* quit */ 3437 break; 3438 } 3439 /* timer */ 3440 /* have to do something every second */ 3441 tv.tv_sec = 1; 3442 tv.tv_usec = 0; 3443 memset(&timeout, 0, sizeof(timeout)); 3444 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 3445 &timed_out); 3446 if(event_base_set(event_base, &timeout) != 0) 3447 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 3448 if(event_add(&timeout, &tv) != 0) 3449 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 3450 3451 /* service loop */ 3452 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3453 if (errno != EINTR) { 3454 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3455 break; 3456 } 3457 } 3458 if(!timed_out) { 3459 event_del(&timeout); 3460 } else { 3461 /* timed out, quit */ 3462 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 3463 break; 3464 } 3465 } 3466 #ifdef MEMCLEAN 3467 event_base_free(event_base); 3468 #endif 3469 /* continue to quit after return */ 3470 } 3471 3472 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 3473 * are always used, even if nonblocking operations are broken, in which case 3474 * NUM_RECV_PER_SELECT is defined to 1 (one). 3475 */ 3476 #if defined(HAVE_RECVMMSG) 3477 #define nsd_recvmmsg recvmmsg 3478 #else /* !HAVE_RECVMMSG */ 3479 3480 static int 3481 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 3482 int flags, struct timespec *timeout) 3483 { 3484 unsigned int vpos = 0; 3485 ssize_t rcvd; 3486 3487 /* timeout is ignored, ensure caller does not expect it to work */ 3488 assert(timeout == NULL); (void)timeout; 3489 3490 while(vpos < vlen) { 3491 rcvd = recvfrom(sockfd, 3492 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3493 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3494 flags, 3495 msgvec[vpos].msg_hdr.msg_name, 3496 &msgvec[vpos].msg_hdr.msg_namelen); 3497 if(rcvd < 0) { 3498 break; 3499 } else { 3500 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 3501 msgvec[vpos].msg_len = (unsigned int)rcvd; 3502 vpos++; 3503 } 3504 } 3505 3506 if(vpos) { 3507 /* error will be picked up next time */ 3508 return (int)vpos; 3509 } else if(errno == 0) { 3510 return 0; 3511 } else if(errno == EAGAIN) { 3512 return 0; 3513 } 3514 3515 return -1; 3516 } 3517 #endif /* HAVE_RECVMMSG */ 3518 3519 #ifdef HAVE_SENDMMSG 3520 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 3521 #else /* !HAVE_SENDMMSG */ 3522 3523 static int 3524 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 3525 { 3526 unsigned int vpos = 0; 3527 ssize_t snd; 3528 3529 while(vpos < vlen) { 3530 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3531 snd = sendto(sockfd, 3532 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3533 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3534 flags, 3535 msgvec[vpos].msg_hdr.msg_name, 3536 msgvec[vpos].msg_hdr.msg_namelen); 3537 if(snd < 0) { 3538 break; 3539 } else { 3540 msgvec[vpos].msg_len = (unsigned int)snd; 3541 vpos++; 3542 } 3543 } 3544 3545 if(vpos) { 3546 return (int)vpos; 3547 } else if(errno == 0) { 3548 return 0; 3549 } 3550 3551 return -1; 3552 } 3553 #endif /* HAVE_SENDMMSG */ 3554 3555 static int 3556 port_is_zero( 3557 #ifdef INET6 3558 struct sockaddr_storage *addr 3559 #else 3560 struct sockaddr_in *addr 3561 #endif 3562 ) 3563 { 3564 #ifdef INET6 3565 if(addr->ss_family == AF_INET6) { 3566 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0; 3567 } else if(addr->ss_family == AF_INET) { 3568 return (((struct sockaddr_in *)addr)->sin_port) == 0; 3569 } 3570 return 0; 3571 #else 3572 if(addr->sin_family == AF_INET) { 3573 return addr->sin_port == 0; 3574 } 3575 return 0; 3576 #endif 3577 } 3578 3579 static void 3580 handle_udp(int fd, short event, void* arg) 3581 { 3582 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3583 int received, sent, recvcount, i; 3584 struct query *q; 3585 uint32_t now = 0; 3586 3587 if (!(event & EV_READ)) { 3588 return; 3589 } 3590 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3591 /* this printf strangely gave a performance increase on Linux */ 3592 /* printf("recvcount %d \n", recvcount); */ 3593 if (recvcount == -1) { 3594 if (errno != EAGAIN && errno != EINTR) { 3595 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3596 STATUP(data->nsd, rxerr); 3597 /* No zone statup */ 3598 } 3599 /* Simply no data available */ 3600 return; 3601 } 3602 for (i = 0; i < recvcount; i++) { 3603 loopstart: 3604 received = msgs[i].msg_len; 3605 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3606 q = queries[i]; 3607 if (received == -1) { 3608 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3609 #if defined(HAVE_RECVMMSG) 3610 msgs[i].msg_hdr.msg_flags 3611 #else 3612 errno 3613 #endif 3614 )); 3615 STATUP(data->nsd, rxerr); 3616 /* No zone statup */ 3617 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3618 iovecs[i].iov_len = buffer_remaining(q->packet); 3619 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3620 goto swap_drop; 3621 } 3622 3623 /* Account... */ 3624 #ifdef BIND8_STATS 3625 if (data->socket->addr.ai_family == AF_INET) { 3626 STATUP(data->nsd, qudp); 3627 } else if (data->socket->addr.ai_family == AF_INET6) { 3628 STATUP(data->nsd, qudp6); 3629 } 3630 #endif 3631 3632 buffer_skip(q->packet, received); 3633 buffer_flip(q->packet); 3634 #ifdef USE_DNSTAP 3635 /* 3636 * sending UDP-query with server address (local) and client address to dnstap process 3637 */ 3638 log_addr("query from client", &q->addr); 3639 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3640 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->addr, q->addrlen, 3641 q->tcp, q->packet); 3642 #endif /* USE_DNSTAP */ 3643 3644 /* Process and answer the query... */ 3645 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) { 3646 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3647 STATUP(data->nsd, nona); 3648 ZTATUP(data->nsd, q->zone, nona); 3649 } 3650 3651 #ifdef USE_ZONE_STATS 3652 if (data->socket->addr.ai_family == AF_INET) { 3653 ZTATUP(data->nsd, q->zone, qudp); 3654 } else if (data->socket->addr.ai_family == AF_INET6) { 3655 ZTATUP(data->nsd, q->zone, qudp6); 3656 } 3657 #endif 3658 3659 /* Add EDNS0 and TSIG info if necessary. */ 3660 query_add_optional(q, data->nsd, &now); 3661 3662 buffer_flip(q->packet); 3663 iovecs[i].iov_len = buffer_remaining(q->packet); 3664 #ifdef BIND8_STATS 3665 /* Account the rcode & TC... */ 3666 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3667 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3668 if (TC(q->packet)) { 3669 STATUP(data->nsd, truncated); 3670 ZTATUP(data->nsd, q->zone, truncated); 3671 } 3672 #endif /* BIND8_STATS */ 3673 #ifdef USE_DNSTAP 3674 /* 3675 * sending UDP-response with server address (local) and client address to dnstap process 3676 */ 3677 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 3678 log_addr("response to client", &q->addr); 3679 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, 3680 &q->addr, q->addrlen, q->tcp, q->packet, 3681 q->zone); 3682 #endif /* USE_DNSTAP */ 3683 } else { 3684 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3685 iovecs[i].iov_len = buffer_remaining(q->packet); 3686 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3687 swap_drop: 3688 STATUP(data->nsd, dropped); 3689 ZTATUP(data->nsd, q->zone, dropped); 3690 if(i != recvcount-1) { 3691 /* swap with last and decrease recvcount */ 3692 struct mmsghdr mtmp = msgs[i]; 3693 struct iovec iotmp = iovecs[i]; 3694 recvcount--; 3695 msgs[i] = msgs[recvcount]; 3696 iovecs[i] = iovecs[recvcount]; 3697 queries[i] = queries[recvcount]; 3698 msgs[recvcount] = mtmp; 3699 iovecs[recvcount] = iotmp; 3700 queries[recvcount] = q; 3701 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3702 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3703 goto loopstart; 3704 } else { recvcount --; } 3705 } 3706 } 3707 3708 /* send until all are sent */ 3709 i = 0; 3710 while(i<recvcount) { 3711 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3712 if(sent == -1) { 3713 if(errno == ENOBUFS || 3714 #ifdef EWOULDBLOCK 3715 errno == EWOULDBLOCK || 3716 #endif 3717 errno == EAGAIN) { 3718 /* block to wait until send buffer avail */ 3719 int flag, errstore; 3720 if((flag = fcntl(fd, F_GETFL)) == -1) { 3721 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno)); 3722 flag = 0; 3723 } 3724 flag &= ~O_NONBLOCK; 3725 if(fcntl(fd, F_SETFL, flag) == -1) 3726 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno)); 3727 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3728 errstore = errno; 3729 flag |= O_NONBLOCK; 3730 if(fcntl(fd, F_SETFL, flag) == -1) 3731 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno)); 3732 if(sent != -1) { 3733 i += sent; 3734 continue; 3735 } 3736 errno = errstore; 3737 } 3738 if(errno == EINVAL) { 3739 /* skip the invalid argument entry, 3740 * send the remaining packets in the list */ 3741 if(!(port_is_zero((void*)&queries[i]->addr) && 3742 verbosity < 3)) { 3743 const char* es = strerror(errno); 3744 char a[64]; 3745 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3746 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3747 } 3748 i += 1; 3749 continue; 3750 } 3751 /* don't log transient network full errors, unless 3752 * on higher verbosity */ 3753 if(!(errno == ENOBUFS && verbosity < 1) && 3754 #ifdef EWOULDBLOCK 3755 errno != EWOULDBLOCK && 3756 #endif 3757 errno != EAGAIN) { 3758 const char* es = strerror(errno); 3759 char a[64]; 3760 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3761 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3762 } 3763 #ifdef BIND8_STATS 3764 data->nsd->st.txerr += recvcount-i; 3765 #endif /* BIND8_STATS */ 3766 break; 3767 } 3768 i += sent; 3769 } 3770 for(i=0; i<recvcount; i++) { 3771 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3772 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3773 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3774 } 3775 } 3776 3777 #ifdef HAVE_SSL 3778 /* 3779 * Setup an event for the tcp handler. 3780 */ 3781 static void 3782 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3783 int fd, short event) 3784 { 3785 struct timeval timeout; 3786 struct event_base* ev_base; 3787 3788 timeout.tv_sec = data->nsd->tcp_timeout; 3789 timeout.tv_usec = 0L; 3790 3791 ev_base = data->event.ev_base; 3792 event_del(&data->event); 3793 memset(&data->event, 0, sizeof(data->event)); 3794 event_set(&data->event, fd, event, fn, data); 3795 if(event_base_set(ev_base, &data->event) != 0) 3796 log_msg(LOG_ERR, "event base set failed"); 3797 if(event_add(&data->event, &timeout) != 0) 3798 log_msg(LOG_ERR, "event add failed"); 3799 } 3800 #endif /* HAVE_SSL */ 3801 3802 static void 3803 cleanup_tcp_handler(struct tcp_handler_data* data) 3804 { 3805 event_del(&data->event); 3806 #ifdef HAVE_SSL 3807 if(data->tls) { 3808 SSL_shutdown(data->tls); 3809 SSL_free(data->tls); 3810 data->tls = NULL; 3811 } 3812 #endif 3813 close(data->event.ev_fd); 3814 if(data->prev) 3815 data->prev->next = data->next; 3816 else tcp_active_list = data->next; 3817 if(data->next) 3818 data->next->prev = data->prev; 3819 3820 /* 3821 * Enable the TCP accept handlers when the current number of 3822 * TCP connections is about to drop below the maximum number 3823 * of TCP connections. 3824 */ 3825 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3826 configure_handler_event_types(EV_READ|EV_PERSIST); 3827 if(slowaccept) { 3828 event_del(&slowaccept_event); 3829 slowaccept = 0; 3830 } 3831 } 3832 --data->nsd->current_tcp_count; 3833 assert(data->nsd->current_tcp_count >= 0); 3834 3835 region_destroy(data->region); 3836 } 3837 3838 static void 3839 handle_tcp_reading(int fd, short event, void* arg) 3840 { 3841 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3842 ssize_t received; 3843 struct event_base* ev_base; 3844 struct timeval timeout; 3845 uint32_t now = 0; 3846 3847 if ((event & EV_TIMEOUT)) { 3848 /* Connection timed out. */ 3849 cleanup_tcp_handler(data); 3850 return; 3851 } 3852 3853 if ((data->nsd->tcp_query_count > 0 && 3854 data->query_count >= data->nsd->tcp_query_count) || 3855 data->tcp_no_more_queries) { 3856 /* No more queries allowed on this tcp connection. */ 3857 cleanup_tcp_handler(data); 3858 return; 3859 } 3860 3861 assert((event & EV_READ)); 3862 3863 if (data->bytes_transmitted == 0) { 3864 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3865 } 3866 3867 /* 3868 * Check if we received the leading packet length bytes yet. 3869 */ 3870 if (data->bytes_transmitted < sizeof(uint16_t)) { 3871 received = read(fd, 3872 (char *) &data->query->tcplen 3873 + data->bytes_transmitted, 3874 sizeof(uint16_t) - data->bytes_transmitted); 3875 if (received == -1) { 3876 if (errno == EAGAIN || errno == EINTR) { 3877 /* 3878 * Read would block, wait until more 3879 * data is available. 3880 */ 3881 return; 3882 } else { 3883 char buf[48]; 3884 addr2str(&data->query->addr, buf, sizeof(buf)); 3885 #ifdef ECONNRESET 3886 if (verbosity >= 2 || errno != ECONNRESET) 3887 #endif /* ECONNRESET */ 3888 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3889 cleanup_tcp_handler(data); 3890 return; 3891 } 3892 } else if (received == 0) { 3893 /* EOF */ 3894 cleanup_tcp_handler(data); 3895 return; 3896 } 3897 3898 data->bytes_transmitted += received; 3899 if (data->bytes_transmitted < sizeof(uint16_t)) { 3900 /* 3901 * Not done with the tcplen yet, wait for more 3902 * data to become available. 3903 */ 3904 return; 3905 } 3906 3907 assert(data->bytes_transmitted == sizeof(uint16_t)); 3908 3909 data->query->tcplen = ntohs(data->query->tcplen); 3910 3911 /* 3912 * Minimum query size is: 3913 * 3914 * Size of the header (12) 3915 * + Root domain name (1) 3916 * + Query class (2) 3917 * + Query type (2) 3918 */ 3919 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3920 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3921 cleanup_tcp_handler(data); 3922 return; 3923 } 3924 3925 if (data->query->tcplen > data->query->maxlen) { 3926 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3927 cleanup_tcp_handler(data); 3928 return; 3929 } 3930 3931 buffer_set_limit(data->query->packet, data->query->tcplen); 3932 } 3933 3934 assert(buffer_remaining(data->query->packet) > 0); 3935 3936 /* Read the (remaining) query data. */ 3937 received = read(fd, 3938 buffer_current(data->query->packet), 3939 buffer_remaining(data->query->packet)); 3940 if (received == -1) { 3941 if (errno == EAGAIN || errno == EINTR) { 3942 /* 3943 * Read would block, wait until more data is 3944 * available. 3945 */ 3946 return; 3947 } else { 3948 char buf[48]; 3949 addr2str(&data->query->addr, buf, sizeof(buf)); 3950 #ifdef ECONNRESET 3951 if (verbosity >= 2 || errno != ECONNRESET) 3952 #endif /* ECONNRESET */ 3953 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3954 cleanup_tcp_handler(data); 3955 return; 3956 } 3957 } else if (received == 0) { 3958 /* EOF */ 3959 cleanup_tcp_handler(data); 3960 return; 3961 } 3962 3963 data->bytes_transmitted += received; 3964 buffer_skip(data->query->packet, received); 3965 if (buffer_remaining(data->query->packet) > 0) { 3966 /* 3967 * Message not yet complete, wait for more data to 3968 * become available. 3969 */ 3970 return; 3971 } 3972 3973 assert(buffer_position(data->query->packet) == data->query->tcplen); 3974 3975 /* Account... */ 3976 #ifdef BIND8_STATS 3977 #ifndef INET6 3978 STATUP(data->nsd, ctcp); 3979 #else 3980 if (data->query->addr.ss_family == AF_INET) { 3981 STATUP(data->nsd, ctcp); 3982 } else if (data->query->addr.ss_family == AF_INET6) { 3983 STATUP(data->nsd, ctcp6); 3984 } 3985 #endif 3986 #endif /* BIND8_STATS */ 3987 3988 /* We have a complete query, process it. */ 3989 3990 /* tcp-query-count: handle query counter ++ */ 3991 data->query_count++; 3992 3993 buffer_flip(data->query->packet); 3994 #ifdef USE_DNSTAP 3995 /* 3996 * and send TCP-query with found address (local) and client address to dnstap process 3997 */ 3998 log_addr("query from client", &data->query->addr); 3999 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4000 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4001 data->query->addrlen, data->query->tcp, data->query->packet); 4002 #endif /* USE_DNSTAP */ 4003 data->query_state = server_process_query(data->nsd, data->query, &now); 4004 if (data->query_state == QUERY_DISCARDED) { 4005 /* Drop the packet and the entire connection... */ 4006 STATUP(data->nsd, dropped); 4007 ZTATUP(data->nsd, data->query->zone, dropped); 4008 cleanup_tcp_handler(data); 4009 return; 4010 } 4011 4012 #ifdef BIND8_STATS 4013 if (RCODE(data->query->packet) == RCODE_OK 4014 && !AA(data->query->packet)) 4015 { 4016 STATUP(data->nsd, nona); 4017 ZTATUP(data->nsd, data->query->zone, nona); 4018 } 4019 #endif /* BIND8_STATS */ 4020 4021 #ifdef USE_ZONE_STATS 4022 #ifndef INET6 4023 ZTATUP(data->nsd, data->query->zone, ctcp); 4024 #else 4025 if (data->query->addr.ss_family == AF_INET) { 4026 ZTATUP(data->nsd, data->query->zone, ctcp); 4027 } else if (data->query->addr.ss_family == AF_INET6) { 4028 ZTATUP(data->nsd, data->query->zone, ctcp6); 4029 } 4030 #endif 4031 #endif /* USE_ZONE_STATS */ 4032 4033 query_add_optional(data->query, data->nsd, &now); 4034 4035 /* Switch to the tcp write handler. */ 4036 buffer_flip(data->query->packet); 4037 data->query->tcplen = buffer_remaining(data->query->packet); 4038 #ifdef BIND8_STATS 4039 /* Account the rcode & TC... */ 4040 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4041 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4042 if (TC(data->query->packet)) { 4043 STATUP(data->nsd, truncated); 4044 ZTATUP(data->nsd, data->query->zone, truncated); 4045 } 4046 #endif /* BIND8_STATS */ 4047 #ifdef USE_DNSTAP 4048 /* 4049 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4050 */ 4051 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4052 log_addr("response to client", &data->query->addr); 4053 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4054 data->query->addrlen, data->query->tcp, data->query->packet, 4055 data->query->zone); 4056 #endif /* USE_DNSTAP */ 4057 data->bytes_transmitted = 0; 4058 4059 timeout.tv_sec = data->tcp_timeout / 1000; 4060 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4061 4062 ev_base = data->event.ev_base; 4063 event_del(&data->event); 4064 memset(&data->event, 0, sizeof(data->event)); 4065 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4066 handle_tcp_reading, data); 4067 if(event_base_set(ev_base, &data->event) != 0) 4068 log_msg(LOG_ERR, "event base set tcpr failed"); 4069 if(event_add(&data->event, &timeout) != 0) 4070 log_msg(LOG_ERR, "event add tcpr failed"); 4071 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4072 handle_tcp_writing(fd, EV_WRITE, data); 4073 } 4074 4075 static void 4076 handle_tcp_writing(int fd, short event, void* arg) 4077 { 4078 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4079 ssize_t sent; 4080 struct query *q = data->query; 4081 struct timeval timeout; 4082 struct event_base* ev_base; 4083 uint32_t now = 0; 4084 4085 if ((event & EV_TIMEOUT)) { 4086 /* Connection timed out. */ 4087 cleanup_tcp_handler(data); 4088 return; 4089 } 4090 4091 assert((event & EV_WRITE)); 4092 4093 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4094 /* Writing the response packet length. */ 4095 uint16_t n_tcplen = htons(q->tcplen); 4096 #ifdef HAVE_WRITEV 4097 struct iovec iov[2]; 4098 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 4099 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 4100 iov[1].iov_base = buffer_begin(q->packet); 4101 iov[1].iov_len = buffer_limit(q->packet); 4102 sent = writev(fd, iov, 2); 4103 #else /* HAVE_WRITEV */ 4104 sent = write(fd, 4105 (const char *) &n_tcplen + data->bytes_transmitted, 4106 sizeof(n_tcplen) - data->bytes_transmitted); 4107 #endif /* HAVE_WRITEV */ 4108 if (sent == -1) { 4109 if (errno == EAGAIN || errno == EINTR) { 4110 /* 4111 * Write would block, wait until 4112 * socket becomes writable again. 4113 */ 4114 return; 4115 } else { 4116 #ifdef ECONNRESET 4117 if(verbosity >= 2 || errno != ECONNRESET) 4118 #endif /* ECONNRESET */ 4119 #ifdef EPIPE 4120 if(verbosity >= 2 || errno != EPIPE) 4121 #endif /* EPIPE 'broken pipe' */ 4122 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4123 cleanup_tcp_handler(data); 4124 return; 4125 } 4126 } 4127 4128 data->bytes_transmitted += sent; 4129 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4130 /* 4131 * Writing not complete, wait until socket 4132 * becomes writable again. 4133 */ 4134 return; 4135 } 4136 4137 #ifdef HAVE_WRITEV 4138 sent -= sizeof(n_tcplen); 4139 /* handle potential 'packet done' code */ 4140 goto packet_could_be_done; 4141 #endif 4142 } 4143 4144 sent = write(fd, 4145 buffer_current(q->packet), 4146 buffer_remaining(q->packet)); 4147 if (sent == -1) { 4148 if (errno == EAGAIN || errno == EINTR) { 4149 /* 4150 * Write would block, wait until 4151 * socket becomes writable again. 4152 */ 4153 return; 4154 } else { 4155 #ifdef ECONNRESET 4156 if(verbosity >= 2 || errno != ECONNRESET) 4157 #endif /* ECONNRESET */ 4158 #ifdef EPIPE 4159 if(verbosity >= 2 || errno != EPIPE) 4160 #endif /* EPIPE 'broken pipe' */ 4161 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4162 cleanup_tcp_handler(data); 4163 return; 4164 } 4165 } 4166 4167 data->bytes_transmitted += sent; 4168 #ifdef HAVE_WRITEV 4169 packet_could_be_done: 4170 #endif 4171 buffer_skip(q->packet, sent); 4172 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4173 /* 4174 * Still more data to write when socket becomes 4175 * writable again. 4176 */ 4177 return; 4178 } 4179 4180 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4181 4182 if (data->query_state == QUERY_IN_AXFR || 4183 data->query_state == QUERY_IN_IXFR) { 4184 /* Continue processing AXFR and writing back results. */ 4185 buffer_clear(q->packet); 4186 if(data->query_state == QUERY_IN_AXFR) 4187 data->query_state = query_axfr(data->nsd, q, 0); 4188 else data->query_state = query_ixfr(data->nsd, q); 4189 if (data->query_state != QUERY_PROCESSED) { 4190 query_add_optional(data->query, data->nsd, &now); 4191 4192 /* Reset data. */ 4193 buffer_flip(q->packet); 4194 q->tcplen = buffer_remaining(q->packet); 4195 data->bytes_transmitted = 0; 4196 /* Reset timeout. */ 4197 timeout.tv_sec = data->tcp_timeout / 1000; 4198 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4199 ev_base = data->event.ev_base; 4200 event_del(&data->event); 4201 memset(&data->event, 0, sizeof(data->event)); 4202 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4203 handle_tcp_writing, data); 4204 if(event_base_set(ev_base, &data->event) != 0) 4205 log_msg(LOG_ERR, "event base set tcpw failed"); 4206 if(event_add(&data->event, &timeout) != 0) 4207 log_msg(LOG_ERR, "event add tcpw failed"); 4208 4209 /* 4210 * Write data if/when the socket is writable 4211 * again. 4212 */ 4213 return; 4214 } 4215 } 4216 4217 /* 4218 * Done sending, wait for the next request to arrive on the 4219 * TCP socket by installing the TCP read handler. 4220 */ 4221 if ((data->nsd->tcp_query_count > 0 && 4222 data->query_count >= data->nsd->tcp_query_count) || 4223 data->tcp_no_more_queries) { 4224 4225 (void) shutdown(fd, SHUT_WR); 4226 } 4227 4228 data->bytes_transmitted = 0; 4229 4230 timeout.tv_sec = data->tcp_timeout / 1000; 4231 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4232 ev_base = data->event.ev_base; 4233 event_del(&data->event); 4234 memset(&data->event, 0, sizeof(data->event)); 4235 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4236 handle_tcp_reading, data); 4237 if(event_base_set(ev_base, &data->event) != 0) 4238 log_msg(LOG_ERR, "event base set tcpw failed"); 4239 if(event_add(&data->event, &timeout) != 0) 4240 log_msg(LOG_ERR, "event add tcpw failed"); 4241 } 4242 4243 #ifdef HAVE_SSL 4244 /** create SSL object and associate fd */ 4245 static SSL* 4246 incoming_ssl_fd(SSL_CTX* ctx, int fd) 4247 { 4248 SSL* ssl = SSL_new((SSL_CTX*)ctx); 4249 if(!ssl) { 4250 log_crypto_err("could not SSL_new"); 4251 return NULL; 4252 } 4253 SSL_set_accept_state(ssl); 4254 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 4255 if(!SSL_set_fd(ssl, fd)) { 4256 log_crypto_err("could not SSL_set_fd"); 4257 SSL_free(ssl); 4258 return NULL; 4259 } 4260 return ssl; 4261 } 4262 4263 /** TLS handshake to upgrade TCP connection */ 4264 static int 4265 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 4266 { 4267 int r; 4268 if(data->shake_state == tls_hs_read_event) { 4269 /* read condition satisfied back to writing */ 4270 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4271 data->shake_state = tls_hs_none; 4272 return 1; 4273 } 4274 if(data->shake_state == tls_hs_write_event) { 4275 /* write condition satisfied back to reading */ 4276 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4277 data->shake_state = tls_hs_none; 4278 return 1; 4279 } 4280 4281 /* (continue to) setup the TLS connection */ 4282 ERR_clear_error(); 4283 r = SSL_do_handshake(data->tls); 4284 4285 if(r != 1) { 4286 int want = SSL_get_error(data->tls, r); 4287 if(want == SSL_ERROR_WANT_READ) { 4288 if(data->shake_state == tls_hs_read) { 4289 /* try again later */ 4290 return 1; 4291 } 4292 data->shake_state = tls_hs_read; 4293 /* switch back to reading mode */ 4294 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4295 return 1; 4296 } else if(want == SSL_ERROR_WANT_WRITE) { 4297 if(data->shake_state == tls_hs_write) { 4298 /* try again later */ 4299 return 1; 4300 } 4301 data->shake_state = tls_hs_write; 4302 /* switch back to writing mode */ 4303 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4304 return 1; 4305 } else { 4306 if(r == 0) 4307 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 4308 else { 4309 unsigned long err = ERR_get_error(); 4310 if(!squelch_err_ssl_handshake(err)) { 4311 char a[64], s[256]; 4312 addr2str(&data->query->addr, a, sizeof(a)); 4313 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 4314 log_crypto_from_err(s, err); 4315 } 4316 } 4317 cleanup_tcp_handler(data); 4318 return 0; 4319 } 4320 } 4321 4322 /* Use to log successful upgrade for testing - could be removed*/ 4323 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 4324 /* set back to the event we need to have when reading (or writing) */ 4325 if(data->shake_state == tls_hs_read && writing) { 4326 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4327 } else if(data->shake_state == tls_hs_write && !writing) { 4328 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4329 } 4330 data->shake_state = tls_hs_none; 4331 return 1; 4332 } 4333 4334 /** handle TLS reading of incoming query */ 4335 static void 4336 handle_tls_reading(int fd, short event, void* arg) 4337 { 4338 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4339 ssize_t received; 4340 uint32_t now = 0; 4341 4342 if ((event & EV_TIMEOUT)) { 4343 /* Connection timed out. */ 4344 cleanup_tcp_handler(data); 4345 return; 4346 } 4347 4348 if ((data->nsd->tcp_query_count > 0 && 4349 data->query_count >= data->nsd->tcp_query_count) || 4350 data->tcp_no_more_queries) { 4351 /* No more queries allowed on this tcp connection. */ 4352 cleanup_tcp_handler(data); 4353 return; 4354 } 4355 4356 assert((event & EV_READ)); 4357 4358 if (data->bytes_transmitted == 0) { 4359 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4360 } 4361 4362 if(data->shake_state != tls_hs_none) { 4363 if(!tls_handshake(data, fd, 0)) 4364 return; 4365 if(data->shake_state != tls_hs_none) 4366 return; 4367 } 4368 4369 /* 4370 * Check if we received the leading packet length bytes yet. 4371 */ 4372 if(data->bytes_transmitted < sizeof(uint16_t)) { 4373 ERR_clear_error(); 4374 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 4375 + data->bytes_transmitted, 4376 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 4377 int want = SSL_get_error(data->tls, received); 4378 if(want == SSL_ERROR_ZERO_RETURN) { 4379 cleanup_tcp_handler(data); 4380 return; /* shutdown, closed */ 4381 } else if(want == SSL_ERROR_WANT_READ) { 4382 /* wants to be called again */ 4383 return; 4384 } 4385 else if(want == SSL_ERROR_WANT_WRITE) { 4386 /* switch to writing */ 4387 data->shake_state = tls_hs_write_event; 4388 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4389 return; 4390 } 4391 cleanup_tcp_handler(data); 4392 log_crypto_err("could not SSL_read"); 4393 return; 4394 } 4395 4396 data->bytes_transmitted += received; 4397 if (data->bytes_transmitted < sizeof(uint16_t)) { 4398 /* 4399 * Not done with the tcplen yet, wait for more 4400 * data to become available. 4401 */ 4402 return; 4403 } 4404 4405 assert(data->bytes_transmitted == sizeof(uint16_t)); 4406 4407 data->query->tcplen = ntohs(data->query->tcplen); 4408 4409 /* 4410 * Minimum query size is: 4411 * 4412 * Size of the header (12) 4413 * + Root domain name (1) 4414 * + Query class (2) 4415 * + Query type (2) 4416 */ 4417 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4418 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4419 cleanup_tcp_handler(data); 4420 return; 4421 } 4422 4423 if (data->query->tcplen > data->query->maxlen) { 4424 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4425 cleanup_tcp_handler(data); 4426 return; 4427 } 4428 4429 buffer_set_limit(data->query->packet, data->query->tcplen); 4430 } 4431 4432 assert(buffer_remaining(data->query->packet) > 0); 4433 4434 /* Read the (remaining) query data. */ 4435 ERR_clear_error(); 4436 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 4437 (int)buffer_remaining(data->query->packet)); 4438 if(received <= 0) { 4439 int want = SSL_get_error(data->tls, received); 4440 if(want == SSL_ERROR_ZERO_RETURN) { 4441 cleanup_tcp_handler(data); 4442 return; /* shutdown, closed */ 4443 } else if(want == SSL_ERROR_WANT_READ) { 4444 /* wants to be called again */ 4445 return; 4446 } 4447 else if(want == SSL_ERROR_WANT_WRITE) { 4448 /* switch back writing */ 4449 data->shake_state = tls_hs_write_event; 4450 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4451 return; 4452 } 4453 cleanup_tcp_handler(data); 4454 log_crypto_err("could not SSL_read"); 4455 return; 4456 } 4457 4458 data->bytes_transmitted += received; 4459 buffer_skip(data->query->packet, received); 4460 if (buffer_remaining(data->query->packet) > 0) { 4461 /* 4462 * Message not yet complete, wait for more data to 4463 * become available. 4464 */ 4465 return; 4466 } 4467 4468 assert(buffer_position(data->query->packet) == data->query->tcplen); 4469 4470 /* Account... */ 4471 #ifndef INET6 4472 STATUP(data->nsd, ctls); 4473 #else 4474 if (data->query->addr.ss_family == AF_INET) { 4475 STATUP(data->nsd, ctls); 4476 } else if (data->query->addr.ss_family == AF_INET6) { 4477 STATUP(data->nsd, ctls6); 4478 } 4479 #endif 4480 4481 /* We have a complete query, process it. */ 4482 4483 /* tcp-query-count: handle query counter ++ */ 4484 data->query_count++; 4485 4486 buffer_flip(data->query->packet); 4487 #ifdef USE_DNSTAP 4488 /* 4489 * and send TCP-query with found address (local) and client address to dnstap process 4490 */ 4491 log_addr("query from client", &data->query->addr); 4492 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4493 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4494 data->query->addrlen, data->query->tcp, data->query->packet); 4495 #endif /* USE_DNSTAP */ 4496 data->query_state = server_process_query(data->nsd, data->query, &now); 4497 if (data->query_state == QUERY_DISCARDED) { 4498 /* Drop the packet and the entire connection... */ 4499 STATUP(data->nsd, dropped); 4500 ZTATUP(data->nsd, data->query->zone, dropped); 4501 cleanup_tcp_handler(data); 4502 return; 4503 } 4504 4505 #ifdef BIND8_STATS 4506 if (RCODE(data->query->packet) == RCODE_OK 4507 && !AA(data->query->packet)) 4508 { 4509 STATUP(data->nsd, nona); 4510 ZTATUP(data->nsd, data->query->zone, nona); 4511 } 4512 #endif /* BIND8_STATS */ 4513 4514 #ifdef USE_ZONE_STATS 4515 #ifndef INET6 4516 ZTATUP(data->nsd, data->query->zone, ctls); 4517 #else 4518 if (data->query->addr.ss_family == AF_INET) { 4519 ZTATUP(data->nsd, data->query->zone, ctls); 4520 } else if (data->query->addr.ss_family == AF_INET6) { 4521 ZTATUP(data->nsd, data->query->zone, ctls6); 4522 } 4523 #endif 4524 #endif /* USE_ZONE_STATS */ 4525 4526 query_add_optional(data->query, data->nsd, &now); 4527 4528 /* Switch to the tcp write handler. */ 4529 buffer_flip(data->query->packet); 4530 data->query->tcplen = buffer_remaining(data->query->packet); 4531 #ifdef BIND8_STATS 4532 /* Account the rcode & TC... */ 4533 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4534 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4535 if (TC(data->query->packet)) { 4536 STATUP(data->nsd, truncated); 4537 ZTATUP(data->nsd, data->query->zone, truncated); 4538 } 4539 #endif /* BIND8_STATS */ 4540 #ifdef USE_DNSTAP 4541 /* 4542 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4543 */ 4544 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4545 log_addr("response to client", &data->query->addr); 4546 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4547 data->query->addrlen, data->query->tcp, data->query->packet, 4548 data->query->zone); 4549 #endif /* USE_DNSTAP */ 4550 data->bytes_transmitted = 0; 4551 4552 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4553 4554 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4555 handle_tls_writing(fd, EV_WRITE, data); 4556 } 4557 4558 /** handle TLS writing of outgoing response */ 4559 static void 4560 handle_tls_writing(int fd, short event, void* arg) 4561 { 4562 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4563 ssize_t sent; 4564 struct query *q = data->query; 4565 /* static variable that holds reassembly buffer used to put the 4566 * TCP length in front of the packet, like writev. */ 4567 static buffer_type* global_tls_temp_buffer = NULL; 4568 buffer_type* write_buffer; 4569 uint32_t now = 0; 4570 4571 if ((event & EV_TIMEOUT)) { 4572 /* Connection timed out. */ 4573 cleanup_tcp_handler(data); 4574 return; 4575 } 4576 4577 assert((event & EV_WRITE)); 4578 4579 if(data->shake_state != tls_hs_none) { 4580 if(!tls_handshake(data, fd, 1)) 4581 return; 4582 if(data->shake_state != tls_hs_none) 4583 return; 4584 } 4585 4586 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 4587 4588 /* If we are writing the start of a message, we must include the length 4589 * this is done with a copy into write_buffer. */ 4590 write_buffer = NULL; 4591 if (data->bytes_transmitted == 0) { 4592 if(!global_tls_temp_buffer) { 4593 /* gets deallocated when nsd shuts down from 4594 * nsd.region */ 4595 global_tls_temp_buffer = buffer_create(nsd.region, 4596 QIOBUFSZ + sizeof(q->tcplen)); 4597 if (!global_tls_temp_buffer) { 4598 return; 4599 } 4600 } 4601 write_buffer = global_tls_temp_buffer; 4602 buffer_clear(write_buffer); 4603 buffer_write_u16(write_buffer, q->tcplen); 4604 buffer_write(write_buffer, buffer_current(q->packet), 4605 (int)buffer_remaining(q->packet)); 4606 buffer_flip(write_buffer); 4607 } else { 4608 write_buffer = q->packet; 4609 } 4610 4611 /* Write the response */ 4612 ERR_clear_error(); 4613 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 4614 if(sent <= 0) { 4615 int want = SSL_get_error(data->tls, sent); 4616 if(want == SSL_ERROR_ZERO_RETURN) { 4617 cleanup_tcp_handler(data); 4618 /* closed */ 4619 } else if(want == SSL_ERROR_WANT_READ) { 4620 /* switch back to reading */ 4621 data->shake_state = tls_hs_read_event; 4622 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4623 } else if(want != SSL_ERROR_WANT_WRITE) { 4624 cleanup_tcp_handler(data); 4625 log_crypto_err("could not SSL_write"); 4626 } 4627 return; 4628 } 4629 4630 buffer_skip(write_buffer, sent); 4631 if(buffer_remaining(write_buffer) != 0) { 4632 /* If not all sent, sync up the real buffer if it wasn't used.*/ 4633 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 4634 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 4635 } 4636 } 4637 4638 data->bytes_transmitted += sent; 4639 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4640 /* 4641 * Still more data to write when socket becomes 4642 * writable again. 4643 */ 4644 return; 4645 } 4646 4647 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4648 4649 if (data->query_state == QUERY_IN_AXFR || 4650 data->query_state == QUERY_IN_IXFR) { 4651 /* Continue processing AXFR and writing back results. */ 4652 buffer_clear(q->packet); 4653 if(data->query_state == QUERY_IN_AXFR) 4654 data->query_state = query_axfr(data->nsd, q, 0); 4655 else data->query_state = query_ixfr(data->nsd, q); 4656 if (data->query_state != QUERY_PROCESSED) { 4657 query_add_optional(data->query, data->nsd, &now); 4658 4659 /* Reset data. */ 4660 buffer_flip(q->packet); 4661 q->tcplen = buffer_remaining(q->packet); 4662 data->bytes_transmitted = 0; 4663 /* Reset to writing mode. */ 4664 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4665 4666 /* 4667 * Write data if/when the socket is writable 4668 * again. 4669 */ 4670 return; 4671 } 4672 } 4673 4674 /* 4675 * Done sending, wait for the next request to arrive on the 4676 * TCP socket by installing the TCP read handler. 4677 */ 4678 if ((data->nsd->tcp_query_count > 0 && 4679 data->query_count >= data->nsd->tcp_query_count) || 4680 data->tcp_no_more_queries) { 4681 4682 (void) shutdown(fd, SHUT_WR); 4683 } 4684 4685 data->bytes_transmitted = 0; 4686 4687 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4688 } 4689 #endif 4690 4691 static void 4692 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4693 void* ATTR_UNUSED(arg)) 4694 { 4695 if(slowaccept) { 4696 configure_handler_event_types(EV_PERSIST | EV_READ); 4697 slowaccept = 0; 4698 } 4699 } 4700 4701 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4702 { 4703 #ifndef HAVE_ACCEPT4 4704 int s = accept(fd, addr, addrlen); 4705 if (s != -1) { 4706 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4707 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4708 close(s); 4709 s = -1; 4710 errno=EINTR; /* stop error printout as error in accept4 4711 by setting this errno, it omits printout, in 4712 later code that calls nsd_accept4 */ 4713 } 4714 } 4715 return s; 4716 #else 4717 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4718 #endif /* HAVE_ACCEPT4 */ 4719 } 4720 4721 /* 4722 * Handle an incoming TCP connection. The connection is accepted and 4723 * a new TCP reader event handler is added. The TCP handler 4724 * is responsible for cleanup when the connection is closed. 4725 */ 4726 static void 4727 handle_tcp_accept(int fd, short event, void* arg) 4728 { 4729 struct tcp_accept_handler_data *data 4730 = (struct tcp_accept_handler_data *) arg; 4731 int s; 4732 int reject = 0; 4733 struct tcp_handler_data *tcp_data; 4734 region_type *tcp_region; 4735 #ifdef INET6 4736 struct sockaddr_storage addr; 4737 #else 4738 struct sockaddr_in addr; 4739 #endif 4740 socklen_t addrlen; 4741 struct timeval timeout; 4742 4743 if (!(event & EV_READ)) { 4744 return; 4745 } 4746 4747 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4748 reject = data->nsd->options->tcp_reject_overflow; 4749 if (!reject) { 4750 return; 4751 } 4752 } 4753 4754 /* Accept it... */ 4755 addrlen = sizeof(addr); 4756 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4757 if (s == -1) { 4758 /** 4759 * EMFILE and ENFILE is a signal that the limit of open 4760 * file descriptors has been reached. Pause accept(). 4761 * EINTR is a signal interrupt. The others are various OS ways 4762 * of saying that the client has closed the connection. 4763 */ 4764 if (errno == EMFILE || errno == ENFILE) { 4765 if (!slowaccept) { 4766 /* disable accept events */ 4767 struct timeval tv; 4768 configure_handler_event_types(0); 4769 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4770 tv.tv_usec = 0L; 4771 memset(&slowaccept_event, 0, 4772 sizeof(slowaccept_event)); 4773 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4774 handle_slowaccept_timeout, NULL); 4775 (void)event_base_set(data->event.ev_base, 4776 &slowaccept_event); 4777 (void)event_add(&slowaccept_event, &tv); 4778 slowaccept = 1; 4779 /* We don't want to spam the logs here */ 4780 } 4781 } else if (errno != EINTR 4782 && errno != EWOULDBLOCK 4783 #ifdef ECONNABORTED 4784 && errno != ECONNABORTED 4785 #endif /* ECONNABORTED */ 4786 #ifdef EPROTO 4787 && errno != EPROTO 4788 #endif /* EPROTO */ 4789 ) { 4790 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4791 } 4792 return; 4793 } 4794 4795 if (reject) { 4796 shutdown(s, SHUT_RDWR); 4797 close(s); 4798 return; 4799 } 4800 4801 /* 4802 * This region is deallocated when the TCP connection is 4803 * closed by the TCP handler. 4804 */ 4805 tcp_region = region_create(xalloc, free); 4806 tcp_data = (struct tcp_handler_data *) region_alloc( 4807 tcp_region, sizeof(struct tcp_handler_data)); 4808 tcp_data->region = tcp_region; 4809 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4810 compression_table_size, compressed_dnames); 4811 tcp_data->nsd = data->nsd; 4812 tcp_data->query_count = 0; 4813 #ifdef HAVE_SSL 4814 tcp_data->shake_state = tls_hs_none; 4815 tcp_data->tls = NULL; 4816 #endif 4817 tcp_data->prev = NULL; 4818 tcp_data->next = NULL; 4819 4820 tcp_data->query_state = QUERY_PROCESSED; 4821 tcp_data->bytes_transmitted = 0; 4822 memcpy(&tcp_data->query->addr, &addr, addrlen); 4823 tcp_data->query->addrlen = addrlen; 4824 4825 tcp_data->tcp_no_more_queries = 0; 4826 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4827 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4828 /* very busy, give smaller timeout */ 4829 tcp_data->tcp_timeout = 200; 4830 } 4831 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4832 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4833 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4834 4835 #ifdef USE_DNSTAP 4836 /* save the address of the connection */ 4837 tcp_data->socket = data->socket; 4838 #endif /* USE_DNSTAP */ 4839 4840 #ifdef HAVE_SSL 4841 if (data->tls_accept) { 4842 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4843 if(!tcp_data->tls) { 4844 close(s); 4845 return; 4846 } 4847 tcp_data->shake_state = tls_hs_read; 4848 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4849 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4850 handle_tls_reading, tcp_data); 4851 } else { 4852 #endif 4853 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4854 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4855 handle_tcp_reading, tcp_data); 4856 #ifdef HAVE_SSL 4857 } 4858 #endif 4859 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4860 log_msg(LOG_ERR, "cannot set tcp event base"); 4861 close(s); 4862 region_destroy(tcp_region); 4863 return; 4864 } 4865 if(event_add(&tcp_data->event, &timeout) != 0) { 4866 log_msg(LOG_ERR, "cannot add tcp to event base"); 4867 close(s); 4868 region_destroy(tcp_region); 4869 return; 4870 } 4871 if(tcp_active_list) { 4872 tcp_active_list->prev = tcp_data; 4873 tcp_data->next = tcp_active_list; 4874 } 4875 tcp_active_list = tcp_data; 4876 4877 /* 4878 * Keep track of the total number of TCP handlers installed so 4879 * we can stop accepting connections when the maximum number 4880 * of simultaneous TCP connections is reached. 4881 * 4882 * If tcp-reject-overflow is enabled, however, then we do not 4883 * change the handler event type; we keep it as-is and accept 4884 * overflow TCP connections only so that we can forcibly kill 4885 * them off. 4886 */ 4887 ++data->nsd->current_tcp_count; 4888 if (!data->nsd->options->tcp_reject_overflow && 4889 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4890 { 4891 configure_handler_event_types(0); 4892 } 4893 } 4894 4895 static void 4896 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4897 { 4898 size_t i; 4899 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4900 for (i = 0; i < nsd->child_count; ++i) { 4901 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4902 if (write(nsd->children[i].child_fd, 4903 &command, 4904 sizeof(command)) == -1) 4905 { 4906 if(errno != EAGAIN && errno != EINTR) 4907 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4908 (int) command, 4909 (int) nsd->children[i].pid, 4910 strerror(errno)); 4911 } else if (timeout > 0) { 4912 (void)block_read(NULL, 4913 nsd->children[i].child_fd, 4914 &command, sizeof(command), timeout); 4915 } 4916 fsync(nsd->children[i].child_fd); 4917 close(nsd->children[i].child_fd); 4918 nsd->children[i].child_fd = -1; 4919 } 4920 } 4921 } 4922 4923 static void 4924 send_children_quit(struct nsd* nsd) 4925 { 4926 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4927 send_children_command(nsd, NSD_QUIT, 0); 4928 } 4929 4930 static void 4931 send_children_quit_and_wait(struct nsd* nsd) 4932 { 4933 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4934 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4935 } 4936 4937 #ifdef BIND8_STATS 4938 static void 4939 set_children_stats(struct nsd* nsd) 4940 { 4941 size_t i; 4942 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4943 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4944 for (i = 0; i < nsd->child_count; ++i) { 4945 nsd->children[i].need_to_send_STATS = 1; 4946 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4947 } 4948 } 4949 #endif /* BIND8_STATS */ 4950 4951 static void 4952 configure_handler_event_types(short event_types) 4953 { 4954 size_t i; 4955 4956 for (i = 0; i < tcp_accept_handler_count; ++i) { 4957 struct event* handler = &tcp_accept_handlers[i].event; 4958 if(event_types) { 4959 /* reassign */ 4960 int fd = handler->ev_fd; 4961 struct event_base* base = handler->ev_base; 4962 if(tcp_accept_handlers[i].event_added) 4963 event_del(handler); 4964 memset(handler, 0, sizeof(*handler)); 4965 event_set(handler, fd, event_types, 4966 handle_tcp_accept, &tcp_accept_handlers[i]); 4967 if(event_base_set(base, handler) != 0) 4968 log_msg(LOG_ERR, "conhand: cannot event_base"); 4969 if(event_add(handler, NULL) != 0) 4970 log_msg(LOG_ERR, "conhand: cannot event_add"); 4971 tcp_accept_handlers[i].event_added = 1; 4972 } else { 4973 /* remove */ 4974 if(tcp_accept_handlers[i].event_added) { 4975 event_del(handler); 4976 tcp_accept_handlers[i].event_added = 0; 4977 } 4978 } 4979 } 4980 } 4981