1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifdef HAVE_SYS_RANDOM_H 39 #include <sys/random.h> 40 #endif 41 #ifndef SHUT_WR 42 #define SHUT_WR 1 43 #endif 44 #ifdef HAVE_MMAP 45 #include <sys/mman.h> 46 #endif /* HAVE_MMAP */ 47 #ifdef HAVE_OPENSSL_RAND_H 48 #include <openssl/rand.h> 49 #endif 50 #ifdef HAVE_OPENSSL_SSL_H 51 #include <openssl/ssl.h> 52 #endif 53 #ifdef HAVE_OPENSSL_ERR_H 54 #include <openssl/err.h> 55 #endif 56 #ifdef HAVE_OPENSSL_OCSP_H 57 #include <openssl/ocsp.h> 58 #endif 59 #ifndef USE_MINI_EVENT 60 # ifdef HAVE_EVENT_H 61 # include <event.h> 62 # else 63 # include <event2/event.h> 64 # include "event2/event_struct.h" 65 # include "event2/event_compat.h" 66 # endif 67 #else 68 # include "mini_event.h" 69 #endif 70 71 #include "axfr.h" 72 #include "namedb.h" 73 #include "netio.h" 74 #include "xfrd.h" 75 #include "xfrd-tcp.h" 76 #include "xfrd-disk.h" 77 #include "difffile.h" 78 #include "nsec3.h" 79 #include "ipc.h" 80 #include "udb.h" 81 #include "remote.h" 82 #include "lookup3.h" 83 #include "rrl.h" 84 #include "ixfr.h" 85 #ifdef USE_DNSTAP 86 #include "dnstap/dnstap_collector.h" 87 #endif 88 #include "verify.h" 89 90 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 91 92 #ifdef USE_DNSTAP 93 /* 94 * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content 95 * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*) 96 */ 97 static void 98 log_addr(const char* descr, 99 #ifdef INET6 100 struct sockaddr_storage* addr 101 #else 102 struct sockaddr_in* addr 103 #endif 104 ) 105 { 106 char str_buf[64]; 107 if(verbosity < 6) 108 return; 109 if( 110 #ifdef INET6 111 addr->ss_family == AF_INET 112 #else 113 addr->sin_family == AF_INET 114 #endif 115 ) { 116 struct sockaddr_in* s = (struct sockaddr_in*)addr; 117 inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf)); 118 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port))); 119 #ifdef INET6 120 } else { 121 struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr; 122 inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf)); 123 VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port))); 124 #endif 125 } 126 } 127 #endif /* USE_DNSTAP */ 128 129 #ifdef USE_TCP_FASTOPEN 130 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 131 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 132 #endif 133 134 /* 135 * Data for the UDP handlers. 136 */ 137 struct udp_handler_data 138 { 139 struct nsd *nsd; 140 struct nsd_socket *socket; 141 struct event event; 142 }; 143 144 struct tcp_accept_handler_data { 145 struct nsd *nsd; 146 struct nsd_socket *socket; 147 int event_added; 148 struct event event; 149 #ifdef HAVE_SSL 150 /* handler accepts TLS connections on the dedicated port */ 151 int tls_accept; 152 #endif 153 }; 154 155 /* 156 * These globals are used to enable the TCP accept handlers 157 * when the number of TCP connection drops below the maximum 158 * number of TCP connections. 159 */ 160 static size_t tcp_accept_handler_count; 161 static struct tcp_accept_handler_data *tcp_accept_handlers; 162 163 static struct event slowaccept_event; 164 static int slowaccept; 165 166 #ifdef HAVE_SSL 167 static unsigned char *ocspdata = NULL; 168 static long ocspdata_len = 0; 169 #endif 170 171 #ifdef NONBLOCKING_IS_BROKEN 172 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 173 read multiple times from a socket when reported ready by select. */ 174 # define NUM_RECV_PER_SELECT (1) 175 #else /* !NONBLOCKING_IS_BROKEN */ 176 # define NUM_RECV_PER_SELECT (100) 177 #endif /* NONBLOCKING_IS_BROKEN */ 178 179 #ifndef HAVE_MMSGHDR 180 struct mmsghdr { 181 struct msghdr msg_hdr; 182 unsigned int msg_len; 183 }; 184 #endif 185 186 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 187 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 188 static struct query *queries[NUM_RECV_PER_SELECT]; 189 190 /* 191 * Data for the TCP connection handlers. 192 * 193 * The TCP handlers use non-blocking I/O. This is necessary to avoid 194 * blocking the entire server on a slow TCP connection, but does make 195 * reading from and writing to the socket more complicated. 196 * 197 * Basically, whenever a read/write would block (indicated by the 198 * EAGAIN errno variable) we remember the position we were reading 199 * from/writing to and return from the TCP reading/writing event 200 * handler. When the socket becomes readable/writable again we 201 * continue from the same position. 202 */ 203 struct tcp_handler_data 204 { 205 /* 206 * The region used to allocate all TCP connection related 207 * data, including this structure. This region is destroyed 208 * when the connection is closed. 209 */ 210 region_type* region; 211 212 /* 213 * The global nsd structure. 214 */ 215 struct nsd* nsd; 216 217 /* 218 * The current query data for this TCP connection. 219 */ 220 query_type* query; 221 222 /* 223 * The query_state is used to remember if we are performing an 224 * AXFR, if we're done processing, or if we should discard the 225 * query and connection. 226 */ 227 query_state_type query_state; 228 229 /* 230 * The event for the file descriptor and tcp timeout 231 */ 232 struct event event; 233 234 /* 235 * The bytes_transmitted field is used to remember the number 236 * of bytes transmitted when receiving or sending a DNS 237 * packet. The count includes the two additional bytes used 238 * to specify the packet length on a TCP connection. 239 */ 240 size_t bytes_transmitted; 241 242 /* 243 * The number of queries handled by this specific TCP connection. 244 */ 245 int query_count; 246 247 /* 248 * The timeout in msec for this tcp connection 249 */ 250 int tcp_timeout; 251 252 /* 253 * If the connection is allowed to have further queries on it. 254 */ 255 int tcp_no_more_queries; 256 257 #ifdef USE_DNSTAP 258 /* the socket of the accept socket to find proper service (local) address the socket is bound to. */ 259 struct nsd_socket *socket; 260 #endif /* USE_DNSTAP */ 261 262 #ifdef HAVE_SSL 263 /* 264 * TLS object. 265 */ 266 SSL* tls; 267 268 /* 269 * TLS handshake state. 270 */ 271 enum { tls_hs_none, tls_hs_read, tls_hs_write, 272 tls_hs_read_event, tls_hs_write_event } shake_state; 273 #endif 274 /* list of connections, for service of remaining tcp channels */ 275 struct tcp_handler_data *prev, *next; 276 }; 277 /* global that is the list of active tcp channels */ 278 static struct tcp_handler_data *tcp_active_list = NULL; 279 280 /* 281 * Handle incoming queries on the UDP server sockets. 282 */ 283 static void handle_udp(int fd, short event, void* arg); 284 285 /* 286 * Handle incoming connections on the TCP sockets. These handlers 287 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 288 * connection) but are disabled when the number of current TCP 289 * connections is equal to the maximum number of TCP connections. 290 * Disabling is done by changing the handler to wait for the 291 * NETIO_EVENT_NONE type. This is done using the function 292 * configure_tcp_accept_handlers. 293 */ 294 static void handle_tcp_accept(int fd, short event, void* arg); 295 296 /* 297 * Handle incoming queries on a TCP connection. The TCP connections 298 * are configured to be non-blocking and the handler may be called 299 * multiple times before a complete query is received. 300 */ 301 static void handle_tcp_reading(int fd, short event, void* arg); 302 303 /* 304 * Handle outgoing responses on a TCP connection. The TCP connections 305 * are configured to be non-blocking and the handler may be called 306 * multiple times before a complete response is sent. 307 */ 308 static void handle_tcp_writing(int fd, short event, void* arg); 309 310 #ifdef HAVE_SSL 311 /* Create SSL object and associate fd */ 312 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 313 /* 314 * Handle TLS handshake. May be called multiple times if incomplete. 315 */ 316 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 317 318 /* 319 * Handle incoming queries on a TLS over TCP connection. The TLS 320 * connections are configured to be non-blocking and the handler may 321 * be called multiple times before a complete query is received. 322 */ 323 static void handle_tls_reading(int fd, short event, void* arg); 324 325 /* 326 * Handle outgoing responses on a TLS over TCP connection. The TLS 327 * connections are configured to be non-blocking and the handler may 328 * be called multiple times before a complete response is sent. 329 */ 330 static void handle_tls_writing(int fd, short event, void* arg); 331 #endif 332 333 /* 334 * Send all children the quit nonblocking, then close pipe. 335 */ 336 static void send_children_quit(struct nsd* nsd); 337 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 338 static void send_children_quit_and_wait(struct nsd* nsd); 339 340 /* set childrens flags to send NSD_STATS to them */ 341 #ifdef BIND8_STATS 342 static void set_children_stats(struct nsd* nsd); 343 #endif /* BIND8_STATS */ 344 345 /* 346 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 347 */ 348 static void configure_handler_event_types(short event_types); 349 350 static uint16_t *compressed_dname_offsets = 0; 351 static uint32_t compression_table_capacity = 0; 352 static uint32_t compression_table_size = 0; 353 static domain_type* compressed_dnames[MAXRRSPP]; 354 355 #ifdef USE_TCP_FASTOPEN 356 /* Checks to see if the kernel value must be manually changed in order for 357 TCP Fast Open to support server mode */ 358 static void report_tcp_fastopen_config() { 359 360 int tcp_fastopen_fp; 361 uint8_t tcp_fastopen_value; 362 363 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 364 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 365 } 366 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 367 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 368 close(tcp_fastopen_fp); 369 } 370 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 371 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 372 log_msg(LOG_WARNING, "However the kernel parameters are not configured to support TCP_FASTOPEN in server mode.\n"); 373 log_msg(LOG_WARNING, "To enable TFO use the command:"); 374 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 375 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 376 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 377 close(tcp_fastopen_fp); 378 } 379 close(tcp_fastopen_fp); 380 } 381 #endif 382 383 /* 384 * Remove the specified pid from the list of child pids. Returns -1 if 385 * the pid is not in the list, child_num otherwise. The field is set to 0. 386 */ 387 static int 388 delete_child_pid(struct nsd *nsd, pid_t pid) 389 { 390 size_t i; 391 for (i = 0; i < nsd->child_count; ++i) { 392 if (nsd->children[i].pid == pid) { 393 nsd->children[i].pid = 0; 394 if(!nsd->children[i].need_to_exit) { 395 if(nsd->children[i].child_fd != -1) 396 close(nsd->children[i].child_fd); 397 nsd->children[i].child_fd = -1; 398 if(nsd->children[i].handler) 399 nsd->children[i].handler->fd = -1; 400 } 401 return i; 402 } 403 } 404 return -1; 405 } 406 407 /* 408 * Restart child servers if necessary. 409 */ 410 static int 411 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 412 int* xfrd_sock_p) 413 { 414 struct main_ipc_handler_data *ipc_data; 415 size_t i; 416 int sv[2]; 417 418 /* Fork the child processes... */ 419 for (i = 0; i < nsd->child_count; ++i) { 420 if (nsd->children[i].pid <= 0) { 421 if (nsd->children[i].child_fd != -1) 422 close(nsd->children[i].child_fd); 423 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 424 log_msg(LOG_ERR, "socketpair: %s", 425 strerror(errno)); 426 return -1; 427 } 428 nsd->children[i].child_fd = sv[0]; 429 nsd->children[i].parent_fd = sv[1]; 430 nsd->children[i].pid = fork(); 431 switch (nsd->children[i].pid) { 432 default: /* SERVER MAIN */ 433 close(nsd->children[i].parent_fd); 434 nsd->children[i].parent_fd = -1; 435 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 436 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 437 } 438 if(!nsd->children[i].handler) 439 { 440 ipc_data = (struct main_ipc_handler_data*) region_alloc( 441 region, sizeof(struct main_ipc_handler_data)); 442 ipc_data->nsd = nsd; 443 ipc_data->child = &nsd->children[i]; 444 ipc_data->child_num = i; 445 ipc_data->xfrd_sock = xfrd_sock_p; 446 ipc_data->packet = buffer_create(region, QIOBUFSZ); 447 ipc_data->forward_mode = 0; 448 ipc_data->got_bytes = 0; 449 ipc_data->total_bytes = 0; 450 ipc_data->acl_num = 0; 451 nsd->children[i].handler = (struct netio_handler*) region_alloc( 452 region, sizeof(struct netio_handler)); 453 nsd->children[i].handler->fd = nsd->children[i].child_fd; 454 nsd->children[i].handler->timeout = NULL; 455 nsd->children[i].handler->user_data = ipc_data; 456 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 457 nsd->children[i].handler->event_handler = parent_handle_child_command; 458 netio_add_handler(netio, nsd->children[i].handler); 459 } 460 /* clear any ongoing ipc */ 461 ipc_data = (struct main_ipc_handler_data*) 462 nsd->children[i].handler->user_data; 463 ipc_data->forward_mode = 0; 464 /* restart - update fd */ 465 nsd->children[i].handler->fd = nsd->children[i].child_fd; 466 break; 467 case 0: /* CHILD */ 468 /* the child need not be able to access the 469 * nsd.db file */ 470 namedb_close_udb(nsd->db); 471 #ifdef MEMCLEAN /* OS collects memory pages */ 472 region_destroy(region); 473 #endif 474 nsd->pid = 0; 475 nsd->child_count = 0; 476 nsd->server_kind = nsd->children[i].kind; 477 nsd->this_child = &nsd->children[i]; 478 nsd->this_child->child_num = i; 479 /* remove signal flags inherited from parent 480 the parent will handle them. */ 481 nsd->signal_hint_reload_hup = 0; 482 nsd->signal_hint_reload = 0; 483 nsd->signal_hint_child = 0; 484 nsd->signal_hint_quit = 0; 485 nsd->signal_hint_shutdown = 0; 486 nsd->signal_hint_stats = 0; 487 nsd->signal_hint_statsusr = 0; 488 close(*xfrd_sock_p); 489 close(nsd->this_child->child_fd); 490 nsd->this_child->child_fd = -1; 491 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 492 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 493 } 494 server_child(nsd); 495 /* NOTREACH */ 496 exit(0); 497 case -1: 498 log_msg(LOG_ERR, "fork failed: %s", 499 strerror(errno)); 500 return -1; 501 } 502 } 503 } 504 return 0; 505 } 506 507 #ifdef BIND8_STATS 508 static void set_bind8_alarm(struct nsd* nsd) 509 { 510 /* resync so that the next alarm is on the next whole minute */ 511 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 512 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 513 } 514 #endif 515 516 /* set zone stat ids for zones initially read in */ 517 static void 518 zonestatid_tree_set(struct nsd* nsd) 519 { 520 struct radnode* n; 521 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 522 zone_type* zone = (zone_type*)n->elem; 523 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 524 } 525 } 526 527 #ifdef USE_ZONE_STATS 528 void 529 server_zonestat_alloc(struct nsd* nsd) 530 { 531 size_t num = (nsd->options->zonestatnames->count==0?1: 532 nsd->options->zonestatnames->count); 533 size_t sz = sizeof(struct nsdst)*num; 534 char tmpfile[256]; 535 uint8_t z = 0; 536 537 /* file names */ 538 nsd->zonestatfname[0] = 0; 539 nsd->zonestatfname[1] = 0; 540 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 541 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 542 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 543 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 544 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 545 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 546 547 /* file descriptors */ 548 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 549 if(nsd->zonestatfd[0] == -1) { 550 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 551 strerror(errno)); 552 exit(1); 553 } 554 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 555 if(nsd->zonestatfd[0] == -1) { 556 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 557 strerror(errno)); 558 close(nsd->zonestatfd[0]); 559 unlink(nsd->zonestatfname[0]); 560 exit(1); 561 } 562 563 #ifdef HAVE_MMAP 564 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 565 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 566 strerror(errno)); 567 exit(1); 568 } 569 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 570 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 571 nsd->zonestatfname[0], strerror(errno)); 572 exit(1); 573 } 574 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 575 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 576 strerror(errno)); 577 exit(1); 578 } 579 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 580 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 581 nsd->zonestatfname[1], strerror(errno)); 582 exit(1); 583 } 584 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 585 MAP_SHARED, nsd->zonestatfd[0], 0); 586 if(nsd->zonestat[0] == MAP_FAILED) { 587 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 588 unlink(nsd->zonestatfname[0]); 589 unlink(nsd->zonestatfname[1]); 590 exit(1); 591 } 592 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 593 MAP_SHARED, nsd->zonestatfd[1], 0); 594 if(nsd->zonestat[1] == MAP_FAILED) { 595 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 596 unlink(nsd->zonestatfname[0]); 597 unlink(nsd->zonestatfname[1]); 598 exit(1); 599 } 600 memset(nsd->zonestat[0], 0, sz); 601 memset(nsd->zonestat[1], 0, sz); 602 nsd->zonestatsize[0] = num; 603 nsd->zonestatsize[1] = num; 604 nsd->zonestatdesired = num; 605 nsd->zonestatsizenow = num; 606 nsd->zonestatnow = nsd->zonestat[0]; 607 #endif /* HAVE_MMAP */ 608 } 609 610 void 611 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 612 { 613 #ifdef HAVE_MMAP 614 #ifdef MREMAP_MAYMOVE 615 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 616 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 617 MREMAP_MAYMOVE); 618 if(nsd->zonestat[idx] == MAP_FAILED) { 619 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 620 exit(1); 621 } 622 #else /* !HAVE MREMAP */ 623 if(msync(nsd->zonestat[idx], 624 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 625 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 626 if(munmap(nsd->zonestat[idx], 627 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 628 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 629 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 630 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 631 if(nsd->zonestat[idx] == MAP_FAILED) { 632 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 633 exit(1); 634 } 635 #endif /* MREMAP */ 636 #endif /* HAVE_MMAP */ 637 } 638 639 /* realloc the zonestat array for the one that is not currently in use, 640 * to match the desired new size of the array (if applicable) */ 641 void 642 server_zonestat_realloc(struct nsd* nsd) 643 { 644 #ifdef HAVE_MMAP 645 uint8_t z = 0; 646 size_t sz; 647 int idx = 0; /* index of the zonestat array that is not in use */ 648 if(nsd->zonestatnow == nsd->zonestat[0]) 649 idx = 1; 650 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 651 return; 652 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 653 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 654 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 655 strerror(errno)); 656 exit(1); 657 } 658 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 659 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 660 nsd->zonestatfname[idx], strerror(errno)); 661 exit(1); 662 } 663 zonestat_remap(nsd, idx, sz); 664 /* zero the newly allocated region */ 665 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 666 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 667 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 668 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 669 } 670 nsd->zonestatsize[idx] = nsd->zonestatdesired; 671 #endif /* HAVE_MMAP */ 672 } 673 674 /* switchover to use the other array for the new children, that 675 * briefly coexist with the old children. And we want to avoid them 676 * both writing to the same statistics arrays. */ 677 void 678 server_zonestat_switch(struct nsd* nsd) 679 { 680 if(nsd->zonestatnow == nsd->zonestat[0]) { 681 nsd->zonestatnow = nsd->zonestat[1]; 682 nsd->zonestatsizenow = nsd->zonestatsize[1]; 683 } else { 684 nsd->zonestatnow = nsd->zonestat[0]; 685 nsd->zonestatsizenow = nsd->zonestatsize[0]; 686 } 687 } 688 #endif /* USE_ZONE_STATS */ 689 690 static void 691 cleanup_dname_compression_tables(void *ptr) 692 { 693 free(ptr); 694 compressed_dname_offsets = NULL; 695 compression_table_capacity = 0; 696 } 697 698 static void 699 initialize_dname_compression_tables(struct nsd *nsd) 700 { 701 size_t needed = domain_table_count(nsd->db->domains) + 1; 702 needed += EXTRA_DOMAIN_NUMBERS; 703 if(compression_table_capacity < needed) { 704 if(compressed_dname_offsets) { 705 region_remove_cleanup(nsd->db->region, 706 cleanup_dname_compression_tables, 707 compressed_dname_offsets); 708 free(compressed_dname_offsets); 709 } 710 compressed_dname_offsets = (uint16_t *) xmallocarray( 711 needed, sizeof(uint16_t)); 712 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 713 compressed_dname_offsets); 714 compression_table_capacity = needed; 715 compression_table_size=domain_table_count(nsd->db->domains)+1; 716 } 717 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 718 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 719 } 720 721 static int 722 set_cloexec(struct nsd_socket *sock) 723 { 724 assert(sock != NULL); 725 726 if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) { 727 const char *socktype = 728 sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp"; 729 log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s", 730 socktype, strerror(errno)); 731 return -1; 732 } 733 734 return 1; 735 } 736 737 static int 738 set_reuseport(struct nsd_socket *sock) 739 { 740 #ifdef SO_REUSEPORT 741 int on = 1; 742 #ifdef SO_REUSEPORT_LB 743 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 744 * SO_REUSEPORT on Linux. This is what the users want with the config 745 * option in nsd.conf; if we actually need local address and port reuse 746 * they'll also need to have SO_REUSEPORT set for them, assume it was 747 * _LB they want. 748 */ 749 int opt = SO_REUSEPORT_LB; 750 static const char optname[] = "SO_REUSEPORT_LB"; 751 #else /* !SO_REUSEPORT_LB */ 752 int opt = SO_REUSEPORT; 753 static const char optname[] = "SO_REUSEPORT"; 754 #endif /* SO_REUSEPORT_LB */ 755 756 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 757 return 1; 758 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 759 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 760 optname, strerror(errno)); 761 } 762 return -1; 763 #else 764 (void)sock; 765 #endif /* SO_REUSEPORT */ 766 767 return 0; 768 } 769 770 static int 771 set_reuseaddr(struct nsd_socket *sock) 772 { 773 #ifdef SO_REUSEADDR 774 int on = 1; 775 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 776 return 1; 777 } 778 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 779 strerror(errno)); 780 return -1; 781 #endif /* SO_REUSEADDR */ 782 return 0; 783 } 784 785 static int 786 set_rcvbuf(struct nsd_socket *sock, int rcv) 787 { 788 #ifdef SO_RCVBUF 789 #ifdef SO_RCVBUFFORCE 790 if(0 == setsockopt( 791 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 792 { 793 return 1; 794 } 795 if(errno == EPERM || errno == ENOBUFS) { 796 return 0; 797 } 798 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 799 strerror(errno)); 800 return -1; 801 #else /* !SO_RCVBUFFORCE */ 802 if (0 == setsockopt( 803 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 804 { 805 return 1; 806 } 807 if(errno == ENOSYS || errno == ENOBUFS) { 808 return 0; 809 } 810 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 811 strerror(errno)); 812 return -1; 813 #endif /* SO_RCVBUFFORCE */ 814 #endif /* SO_RCVBUF */ 815 816 return 0; 817 } 818 819 static int 820 set_sndbuf(struct nsd_socket *sock, int snd) 821 { 822 #ifdef SO_SNDBUF 823 #ifdef SO_SNDBUFFORCE 824 if(0 == setsockopt( 825 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 826 { 827 return 1; 828 } 829 if(errno == EPERM || errno == ENOBUFS) { 830 return 0; 831 } 832 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 833 strerror(errno)); 834 return -1; 835 #else /* !SO_SNDBUFFORCE */ 836 if(0 == setsockopt( 837 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 838 { 839 return 1; 840 } 841 if(errno == ENOSYS || errno == ENOBUFS) { 842 return 0; 843 } 844 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 845 strerror(errno)); 846 return -1; 847 #endif /* SO_SNDBUFFORCE */ 848 #endif /* SO_SNDBUF */ 849 850 return 0; 851 } 852 853 static int 854 set_nonblock(struct nsd_socket *sock) 855 { 856 const char *socktype = 857 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 858 859 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 860 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 861 socktype, strerror(errno)); 862 return -1; 863 } 864 865 return 1; 866 } 867 868 #ifdef INET6 869 static int 870 set_ipv6_v6only(struct nsd_socket *sock) 871 { 872 #ifdef IPV6_V6ONLY 873 int on = 1; 874 const char *socktype = 875 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 876 877 if(0 == setsockopt( 878 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 879 { 880 return 1; 881 } 882 883 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 884 socktype, strerror(errno)); 885 return -1; 886 #else 887 (void)sock; 888 #endif /* IPV6_V6ONLY */ 889 890 return 0; 891 } 892 #endif /* INET6 */ 893 894 #ifdef INET6 895 static int 896 set_ipv6_use_min_mtu(struct nsd_socket *sock) 897 { 898 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) 899 #if defined(IPV6_USE_MIN_MTU) 900 /* There is no fragmentation of IPv6 datagrams during forwarding in the 901 * network. Therefore we do not send UDP datagrams larger than the 902 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 903 * larger if the network stack supports IPV6_USE_MIN_MTU. 904 */ 905 int opt = IPV6_USE_MIN_MTU; 906 int optval = 1; 907 static const char optname[] = "IPV6_USE_MIN_MTU"; 908 #elif defined(IPV6_MTU) 909 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 910 * to the MIN MTU to get the same. 911 */ 912 int opt = IPV6_MTU; 913 int optval = IPV6_MIN_MTU; 914 static const char optname[] = "IPV6_MTU"; 915 #endif 916 if(0 == setsockopt( 917 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 918 { 919 return 1; 920 } 921 922 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 923 optname, strerror(errno)); 924 return -1; 925 #else 926 (void)sock; 927 #endif /* INET6 */ 928 929 return 0; 930 } 931 #endif /* INET6 */ 932 933 static int 934 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 935 { 936 int ret = 0; 937 938 #if defined(IP_MTU_DISCOVER) 939 int opt = IP_MTU_DISCOVER; 940 int optval; 941 # if defined(IP_PMTUDISC_OMIT) 942 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 943 * information and send packets with DF=0. Fragmentation is allowed if 944 * and only if the packet size exceeds the outgoing interface MTU or 945 * the packet encounters smaller MTU link in network. This mitigates 946 * DNS fragmentation attacks by preventing forged PMTU information. 947 * FreeBSD already has same semantics without setting the option. 948 */ 949 optval = IP_PMTUDISC_OMIT; 950 if(0 == setsockopt( 951 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 952 { 953 return 1; 954 } 955 956 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 957 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 958 # endif /* IP_PMTUDISC_OMIT */ 959 # if defined(IP_PMTUDISC_DONT) 960 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 961 optval = IP_PMTUDISC_DONT; 962 if(0 == setsockopt( 963 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 964 { 965 return 1; 966 } 967 968 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 969 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 970 # endif 971 ret = -1; 972 #elif defined(IP_DONTFRAG) 973 int off = 0; 974 if (0 == setsockopt( 975 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 976 { 977 return 1; 978 } 979 980 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 981 strerror(errno)); 982 ret = -1; 983 #else 984 (void)sock; 985 #endif 986 987 return ret; 988 } 989 990 static int 991 set_ip_freebind(struct nsd_socket *sock) 992 { 993 #ifdef IP_FREEBIND 994 int on = 1; 995 const char *socktype = 996 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 997 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 998 { 999 return 1; 1000 } 1001 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 1002 socktype, strerror(errno)); 1003 return -1; 1004 #else 1005 (void)sock; 1006 #endif /* IP_FREEBIND */ 1007 1008 return 0; 1009 } 1010 1011 static int 1012 set_ip_transparent(struct nsd_socket *sock) 1013 { 1014 /* 1015 The scandalous preprocessor blob here calls for some explanation :) 1016 POSIX does not specify an option to bind non-local IPs, so 1017 platforms developed several implementation-specific options, 1018 all set in the same way, but with different names. 1019 For additional complexity, some platform manage this setting 1020 differently for different address families (IPv4 vs IPv6). 1021 This scandalous preprocessor blob below abstracts such variability 1022 in the way which leaves the C code as lean and clear as possible. 1023 */ 1024 1025 #if defined(IP_TRANSPARENT) 1026 # define NSD_SOCKET_OPTION_TRANSPARENT IP_TRANSPARENT 1027 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1028 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_TRANSPARENT" 1029 // as of 2020-01, Linux does not support this on IPv6 programmatically 1030 #elif defined(SO_BINDANY) 1031 # define NSD_SOCKET_OPTION_TRANSPARENT SO_BINDANY 1032 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL SOL_SOCKET 1033 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "SO_BINDANY" 1034 #elif defined(IP_BINDANY) 1035 # define NSD_SOCKET_OPTION_TRANSPARENT IP_BINDANY 1036 # define NSD_SOCKET_OPTION_TRANSPARENT6 IPV6_BINDANY 1037 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL IPPROTO_IP 1038 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 IPPROTO_IPV6 1039 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME "IP_BINDANY" 1040 #endif 1041 1042 #ifndef NSD_SOCKET_OPTION_TRANSPARENT 1043 (void)sock; 1044 #else 1045 # ifndef NSD_SOCKET_OPTION_TRANSPARENT6 1046 # define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT 1047 # endif 1048 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 1049 # define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL 1050 # endif 1051 # ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6 1052 # define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME 1053 # endif 1054 1055 int on = 1; 1056 const char *socktype = 1057 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 1058 const int is_ip6 = (sock->addr.ai_family == AF_INET6); 1059 1060 if(0 == setsockopt( 1061 sock->s, 1062 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL, 1063 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT, 1064 &on, sizeof(on))) 1065 { 1066 return 1; 1067 } 1068 1069 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 1070 is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno)); 1071 return -1; 1072 #endif 1073 1074 return 0; 1075 } 1076 1077 static int 1078 set_tcp_maxseg(struct nsd_socket *sock, int mss) 1079 { 1080 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 1081 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 1082 return 1; 1083 } 1084 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 1085 strerror(errno)); 1086 return -1; 1087 #else 1088 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 1089 #endif 1090 return 0; 1091 } 1092 1093 #ifdef USE_TCP_FASTOPEN 1094 static int 1095 set_tcp_fastopen(struct nsd_socket *sock) 1096 { 1097 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1098 * a defense against IP spoofing attacks as suggested in RFC7413. 1099 */ 1100 int qlen; 1101 1102 #ifdef __APPLE__ 1103 /* macOS X implementation only supports qlen of 1 via this call. The 1104 * actual value is configured by the net.inet.tcp.fastopen_backlog 1105 * kernel parameter. 1106 */ 1107 qlen = 1; 1108 #else 1109 /* 5 is recommended on Linux. */ 1110 qlen = 5; 1111 #endif 1112 if (0 == setsockopt( 1113 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1114 { 1115 return 1; 1116 } 1117 1118 if (errno == EPERM) { 1119 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1120 "; this could likely be because sysctl " 1121 "net.inet.tcp.fastopen.enabled, " 1122 "net.inet.tcp.fastopen.server_enable, or " 1123 "net.ipv4.tcp_fastopen is disabled", 1124 strerror(errno)); 1125 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1126 * disabled, except when verbosity enabled for debugging 1127 */ 1128 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1129 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1130 strerror(errno)); 1131 } 1132 1133 return (errno == ENOPROTOOPT ? 0 : -1); 1134 } 1135 #endif /* USE_TCP_FASTOPEN */ 1136 1137 static int 1138 set_bindtodevice(struct nsd_socket *sock) 1139 { 1140 #if defined(SO_BINDTODEVICE) 1141 if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE, 1142 sock->device, strlen(sock->device)) == -1) 1143 { 1144 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 1145 "SO_BINDTODEVICE", sock->device, strerror(errno)); 1146 return -1; 1147 } 1148 1149 return 1; 1150 #else 1151 (void)sock; 1152 return 0; 1153 #endif 1154 } 1155 1156 static int 1157 set_setfib(struct nsd_socket *sock) 1158 { 1159 #if defined(SO_SETFIB) 1160 if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB, 1161 (const void *)&sock->fib, sizeof(sock->fib)) == -1) 1162 { 1163 log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s", 1164 "SO_SETFIB", sock->fib, strerror(errno)); 1165 return -1; 1166 } 1167 1168 return 1; 1169 #else 1170 (void)sock; 1171 return 0; 1172 #endif 1173 } 1174 1175 static int 1176 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1177 { 1178 int rcv = 1*1024*1024, snd = 1*1024*1024; 1179 1180 if(-1 == (sock->s = socket( 1181 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1182 { 1183 #ifdef INET6 1184 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1185 (sock->addr.ai_family == AF_INET6) && 1186 (errno == EAFNOSUPPORT)) 1187 { 1188 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1189 "not supported"); 1190 return 0; 1191 } 1192 #endif 1193 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1194 return -1; 1195 } 1196 1197 set_cloexec(sock); 1198 1199 if(nsd->reuseport && reuseport_works && *reuseport_works) 1200 *reuseport_works = (set_reuseport(sock) == 1); 1201 1202 if(nsd->options->receive_buffer_size > 0) 1203 rcv = nsd->options->receive_buffer_size; 1204 if(set_rcvbuf(sock, rcv) == -1) 1205 return -1; 1206 1207 if(nsd->options->send_buffer_size > 0) 1208 snd = nsd->options->send_buffer_size; 1209 if(set_sndbuf(sock, snd) == -1) 1210 return -1; 1211 #ifdef INET6 1212 if(sock->addr.ai_family == AF_INET6) { 1213 if(set_ipv6_v6only(sock) == -1 || 1214 set_ipv6_use_min_mtu(sock) == -1) 1215 return -1; 1216 } else 1217 #endif /* INET6 */ 1218 if(sock->addr.ai_family == AF_INET) { 1219 if(set_ipv4_no_pmtu_disc(sock) == -1) 1220 return -1; 1221 } 1222 1223 /* Set socket to non-blocking. Otherwise, on operating systems 1224 * with thundering herd problems, the UDP recv could block 1225 * after select returns readable. 1226 */ 1227 set_nonblock(sock); 1228 1229 if(nsd->options->ip_freebind) 1230 (void)set_ip_freebind(sock); 1231 if(nsd->options->ip_transparent) 1232 (void)set_ip_transparent(sock); 1233 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1234 return -1; 1235 if(sock->fib != -1 && set_setfib(sock) == -1) 1236 return -1; 1237 1238 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1239 char buf[256]; 1240 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1241 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1242 buf, strerror(errno)); 1243 return -1; 1244 } 1245 1246 return 1; 1247 } 1248 1249 static int 1250 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1251 { 1252 #ifdef USE_TCP_FASTOPEN 1253 report_tcp_fastopen_config(); 1254 #endif 1255 1256 (void)reuseport_works; 1257 1258 if(-1 == (sock->s = socket( 1259 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1260 { 1261 #ifdef INET6 1262 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1263 (sock->addr.ai_family == AF_INET6) && 1264 (errno == EAFNOSUPPORT)) 1265 { 1266 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1267 "not supported"); 1268 return 0; 1269 } 1270 #endif /* INET6 */ 1271 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1272 return -1; 1273 } 1274 1275 set_cloexec(sock); 1276 1277 if(nsd->reuseport && reuseport_works && *reuseport_works) 1278 *reuseport_works = (set_reuseport(sock) == 1); 1279 1280 (void)set_reuseaddr(sock); 1281 1282 #ifdef INET6 1283 if(sock->addr.ai_family == AF_INET6) { 1284 if (set_ipv6_v6only(sock) == -1 || 1285 set_ipv6_use_min_mtu(sock) == -1) 1286 return -1; 1287 } 1288 #endif 1289 1290 if(nsd->tcp_mss > 0) 1291 set_tcp_maxseg(sock, nsd->tcp_mss); 1292 /* (StevensUNP p463), if TCP listening socket is blocking, then 1293 it may block in accept, even if select() says readable. */ 1294 (void)set_nonblock(sock); 1295 if(nsd->options->ip_freebind) 1296 (void)set_ip_freebind(sock); 1297 if(nsd->options->ip_transparent) 1298 (void)set_ip_transparent(sock); 1299 if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1) 1300 return -1; 1301 if(sock->fib != -1 && set_setfib(sock) == -1) 1302 return -1; 1303 1304 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1305 char buf[256]; 1306 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1307 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1308 buf, strerror(errno)); 1309 return -1; 1310 } 1311 1312 #ifdef USE_TCP_FASTOPEN 1313 (void)set_tcp_fastopen(sock); 1314 #endif 1315 1316 if(listen(sock->s, TCP_BACKLOG) == -1) { 1317 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1318 return -1; 1319 } 1320 1321 return 1; 1322 } 1323 1324 /* 1325 * Initialize the server, reuseport, create and bind the sockets. 1326 */ 1327 int 1328 server_init(struct nsd *nsd) 1329 { 1330 size_t i; 1331 int reuseport = 1; /* Determine if REUSEPORT works. */ 1332 1333 /* open server interface ports */ 1334 for(i = 0; i < nsd->ifs; i++) { 1335 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1336 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1337 { 1338 return -1; 1339 } 1340 } 1341 1342 if(nsd->reuseport && reuseport) { 1343 size_t ifs = nsd->ifs * nsd->reuseport; 1344 1345 /* increase the size of the interface arrays, there are going 1346 * to be separate interface file descriptors for every server 1347 * instance */ 1348 region_remove_cleanup(nsd->region, free, nsd->udp); 1349 region_remove_cleanup(nsd->region, free, nsd->tcp); 1350 1351 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1352 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1353 region_add_cleanup(nsd->region, free, nsd->udp); 1354 region_add_cleanup(nsd->region, free, nsd->tcp); 1355 if(ifs > nsd->ifs) { 1356 memset(&nsd->udp[nsd->ifs], 0, 1357 (ifs-nsd->ifs)*sizeof(*nsd->udp)); 1358 memset(&nsd->tcp[nsd->ifs], 0, 1359 (ifs-nsd->ifs)*sizeof(*nsd->tcp)); 1360 } 1361 1362 for(i = nsd->ifs; i < ifs; i++) { 1363 nsd->udp[i] = nsd->udp[i%nsd->ifs]; 1364 nsd->udp[i].s = -1; 1365 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1366 return -1; 1367 } 1368 /* Turn off REUSEPORT for TCP by copying the socket 1369 * file descriptor. 1370 * This means we should not close TCP used by 1371 * other servers in reuseport enabled mode, in 1372 * server_child(). 1373 */ 1374 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1375 } 1376 1377 nsd->ifs = ifs; 1378 } else { 1379 nsd->reuseport = 0; 1380 } 1381 1382 /* open server interface ports for verifiers */ 1383 for(i = 0; i < nsd->verify_ifs; i++) { 1384 if(open_udp_socket(nsd, &nsd->verify_udp[i], NULL) == -1 || 1385 open_tcp_socket(nsd, &nsd->verify_tcp[i], NULL) == -1) 1386 { 1387 return -1; 1388 } 1389 } 1390 1391 return 0; 1392 } 1393 1394 /* 1395 * Prepare the server for take off. 1396 * 1397 */ 1398 int 1399 server_prepare(struct nsd *nsd) 1400 { 1401 #ifdef RATELIMIT 1402 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1403 #ifdef HAVE_GETRANDOM 1404 uint32_t v; 1405 if(getrandom(&v, sizeof(v), 0) == -1) { 1406 log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno)); 1407 exit(1); 1408 } 1409 hash_set_raninit(v); 1410 #elif defined(HAVE_ARC4RANDOM) 1411 hash_set_raninit(arc4random()); 1412 #else 1413 uint32_t v = getpid() ^ time(NULL); 1414 srandom((unsigned long)v); 1415 # ifdef HAVE_SSL 1416 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1417 hash_set_raninit(v); 1418 else 1419 # endif 1420 hash_set_raninit(random()); 1421 #endif 1422 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1423 nsd->options->rrl_ratelimit, 1424 nsd->options->rrl_whitelist_ratelimit, 1425 nsd->options->rrl_slip, 1426 nsd->options->rrl_ipv4_prefix_length, 1427 nsd->options->rrl_ipv6_prefix_length); 1428 #endif /* RATELIMIT */ 1429 1430 /* Open the database... */ 1431 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1432 log_msg(LOG_ERR, "unable to open the database %s: %s", 1433 nsd->dbfile, strerror(errno)); 1434 unlink(nsd->task[0]->fname); 1435 unlink(nsd->task[1]->fname); 1436 #ifdef USE_ZONE_STATS 1437 unlink(nsd->zonestatfname[0]); 1438 unlink(nsd->zonestatfname[1]); 1439 #endif 1440 xfrd_del_tempdir(nsd); 1441 return -1; 1442 } 1443 /* check if zone files have been modified */ 1444 /* NULL for taskudb because we send soainfo in a moment, batched up, 1445 * for all zones */ 1446 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1447 nsd->options->database[0] == 0)) 1448 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1449 zonestatid_tree_set(nsd); 1450 1451 compression_table_capacity = 0; 1452 initialize_dname_compression_tables(nsd); 1453 1454 #ifdef BIND8_STATS 1455 /* Initialize times... */ 1456 time(&nsd->st.boot); 1457 set_bind8_alarm(nsd); 1458 #endif /* BIND8_STATS */ 1459 1460 return 0; 1461 } 1462 1463 /* 1464 * Fork the required number of servers. 1465 */ 1466 static int 1467 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1468 int* xfrd_sock_p) 1469 { 1470 size_t i; 1471 1472 /* Start all child servers initially. */ 1473 for (i = 0; i < nsd->child_count; ++i) { 1474 nsd->children[i].pid = 0; 1475 } 1476 1477 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1478 } 1479 1480 static void 1481 server_close_socket(struct nsd_socket *sock) 1482 { 1483 if(sock->s != -1) { 1484 close(sock->s); 1485 sock->s = -1; 1486 } 1487 } 1488 1489 void 1490 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1491 { 1492 size_t i; 1493 1494 /* Close all the sockets... */ 1495 for (i = 0; i < n; ++i) { 1496 server_close_socket(&sockets[i]); 1497 } 1498 } 1499 1500 /* 1501 * Close the sockets, shutdown the server and exit. 1502 * Does not return. 1503 */ 1504 void 1505 server_shutdown(struct nsd *nsd) 1506 { 1507 size_t i; 1508 1509 server_close_all_sockets(nsd->udp, nsd->ifs); 1510 server_close_all_sockets(nsd->tcp, nsd->ifs); 1511 /* CHILD: close command channel to parent */ 1512 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1513 { 1514 close(nsd->this_child->parent_fd); 1515 nsd->this_child->parent_fd = -1; 1516 } 1517 /* SERVER: close command channels to children */ 1518 if(!nsd->this_child) 1519 { 1520 for(i=0; i < nsd->child_count; ++i) 1521 if(nsd->children[i].child_fd != -1) 1522 { 1523 close(nsd->children[i].child_fd); 1524 nsd->children[i].child_fd = -1; 1525 } 1526 } 1527 1528 tsig_finalize(); 1529 #ifdef HAVE_SSL 1530 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1531 if (nsd->tls_ctx) 1532 SSL_CTX_free(nsd->tls_ctx); 1533 #endif 1534 1535 #ifdef MEMCLEAN /* OS collects memory pages */ 1536 #ifdef RATELIMIT 1537 rrl_mmap_deinit_keep_mmap(); 1538 #endif 1539 #ifdef USE_DNSTAP 1540 dt_collector_destroy(nsd->dt_collector, nsd); 1541 #endif 1542 udb_base_free_keep_mmap(nsd->task[0]); 1543 udb_base_free_keep_mmap(nsd->task[1]); 1544 namedb_free_ixfr(nsd->db); 1545 namedb_close_udb(nsd->db); /* keeps mmap */ 1546 namedb_close(nsd->db); 1547 nsd_options_destroy(nsd->options); 1548 region_destroy(nsd->region); 1549 #endif 1550 log_finalize(); 1551 exit(0); 1552 } 1553 1554 void 1555 server_prepare_xfrd(struct nsd* nsd) 1556 { 1557 char tmpfile[256]; 1558 /* create task mmaps */ 1559 nsd->mytask = 0; 1560 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1561 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1562 nsd->task[0] = task_file_create(tmpfile); 1563 if(!nsd->task[0]) { 1564 #ifdef USE_ZONE_STATS 1565 unlink(nsd->zonestatfname[0]); 1566 unlink(nsd->zonestatfname[1]); 1567 #endif 1568 xfrd_del_tempdir(nsd); 1569 exit(1); 1570 } 1571 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1572 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1573 nsd->task[1] = task_file_create(tmpfile); 1574 if(!nsd->task[1]) { 1575 unlink(nsd->task[0]->fname); 1576 #ifdef USE_ZONE_STATS 1577 unlink(nsd->zonestatfname[0]); 1578 unlink(nsd->zonestatfname[1]); 1579 #endif 1580 xfrd_del_tempdir(nsd); 1581 exit(1); 1582 } 1583 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1584 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1585 /* create xfrd listener structure */ 1586 nsd->xfrd_listener = region_alloc(nsd->region, 1587 sizeof(netio_handler_type)); 1588 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1589 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1590 nsd->xfrd_listener->fd = -1; 1591 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1592 nsd; 1593 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1594 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1595 } 1596 1597 1598 void 1599 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1600 { 1601 pid_t pid; 1602 int sockets[2] = {0,0}; 1603 struct ipc_handler_conn_data *data; 1604 1605 if(nsd->xfrd_listener->fd != -1) 1606 close(nsd->xfrd_listener->fd); 1607 if(del_db) { 1608 /* recreate taskdb that xfrd was using, it may be corrupt */ 1609 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1610 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1611 nsd->task[1-nsd->mytask]->fname = NULL; 1612 /* free alloc already, so udb does not shrink itself */ 1613 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1614 nsd->task[1-nsd->mytask]->alloc = NULL; 1615 udb_base_free(nsd->task[1-nsd->mytask]); 1616 /* create new file, overwrite the old one */ 1617 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1618 free(tmpfile); 1619 } 1620 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1621 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1622 return; 1623 } 1624 pid = fork(); 1625 switch (pid) { 1626 case -1: 1627 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1628 break; 1629 default: 1630 /* PARENT: close first socket, use second one */ 1631 close(sockets[0]); 1632 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1633 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1634 } 1635 if(del_db) xfrd_free_namedb(nsd); 1636 /* use other task than I am using, since if xfrd died and is 1637 * restarted, the reload is using nsd->mytask */ 1638 nsd->mytask = 1 - nsd->mytask; 1639 1640 #ifdef HAVE_SETPROCTITLE 1641 setproctitle("xfrd"); 1642 #endif 1643 #ifdef HAVE_CPUSET_T 1644 if(nsd->use_cpu_affinity) { 1645 set_cpu_affinity(nsd->xfrd_cpuset); 1646 } 1647 #endif 1648 1649 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1650 /* ENOTREACH */ 1651 break; 1652 case 0: 1653 /* CHILD: close second socket, use first one */ 1654 close(sockets[1]); 1655 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1656 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1657 } 1658 nsd->xfrd_listener->fd = sockets[0]; 1659 break; 1660 } 1661 /* server-parent only */ 1662 nsd->xfrd_listener->timeout = NULL; 1663 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1664 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1665 /* clear ongoing ipc reads */ 1666 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1667 data->conn->is_reading = 0; 1668 } 1669 1670 /** add all soainfo to taskdb */ 1671 static void 1672 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1673 { 1674 struct radnode* n; 1675 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1676 /* add all SOA INFO to mytask */ 1677 udb_ptr_init(&task_last, taskudb); 1678 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1679 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1680 } 1681 udb_ptr_unlink(&task_last, taskudb); 1682 } 1683 1684 void 1685 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1686 { 1687 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1688 * parent fills one taskdb with soas, xfrd fills other with expires. 1689 * then they exchange and process. 1690 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1691 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1692 * expire notifications can be sent back via a normal reload later 1693 * (xfrd will wait for current running reload to finish if any). 1694 */ 1695 sig_atomic_t cmd = 0; 1696 pid_t mypid; 1697 int xfrd_sock = nsd->xfrd_listener->fd; 1698 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1699 udb_ptr t; 1700 if(!shortsoa) { 1701 if(nsd->signal_hint_shutdown) { 1702 shutdown: 1703 log_msg(LOG_WARNING, "signal received, shutting down..."); 1704 server_close_all_sockets(nsd->udp, nsd->ifs); 1705 server_close_all_sockets(nsd->tcp, nsd->ifs); 1706 #ifdef HAVE_SSL 1707 daemon_remote_close(nsd->rc); 1708 #endif 1709 /* Unlink it if possible... */ 1710 unlinkpid(nsd->pidfile); 1711 unlink(nsd->task[0]->fname); 1712 unlink(nsd->task[1]->fname); 1713 #ifdef USE_ZONE_STATS 1714 unlink(nsd->zonestatfname[0]); 1715 unlink(nsd->zonestatfname[1]); 1716 #endif 1717 /* write the nsd.db to disk, wait for it to complete */ 1718 udb_base_sync(nsd->db->udb, 1); 1719 udb_base_close(nsd->db->udb); 1720 server_shutdown(nsd); 1721 /* ENOTREACH */ 1722 exit(0); 1723 } 1724 } 1725 if(shortsoa) { 1726 /* put SOA in xfrd task because mytask may be in use */ 1727 taskudb = nsd->task[1-nsd->mytask]; 1728 } 1729 1730 add_all_soa_to_task(nsd, taskudb); 1731 if(!shortsoa) { 1732 /* wait for xfrd to signal task is ready, RELOAD signal */ 1733 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1734 cmd != NSD_RELOAD) { 1735 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1736 exit(1); 1737 } 1738 if(nsd->signal_hint_shutdown) { 1739 goto shutdown; 1740 } 1741 } 1742 /* give xfrd our task, signal it with RELOAD_DONE */ 1743 task_process_sync(taskudb); 1744 cmd = NSD_RELOAD_DONE; 1745 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1746 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1747 (int)nsd->pid, strerror(errno)); 1748 } 1749 mypid = getpid(); 1750 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1751 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1752 strerror(errno)); 1753 } 1754 1755 if(!shortsoa) { 1756 /* process the xfrd task works (expiry data) */ 1757 nsd->mytask = 1 - nsd->mytask; 1758 taskudb = nsd->task[nsd->mytask]; 1759 task_remap(taskudb); 1760 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1761 while(!udb_ptr_is_null(&t)) { 1762 task_process_expire(nsd->db, TASKLIST(&t)); 1763 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1764 } 1765 udb_ptr_unlink(&t, taskudb); 1766 task_clear(taskudb); 1767 1768 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1769 cmd = NSD_RELOAD_DONE; 1770 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1771 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1772 (int)nsd->pid, strerror(errno)); 1773 } 1774 } 1775 } 1776 1777 #ifdef HAVE_SSL 1778 static void 1779 log_crypto_from_err(const char* str, unsigned long err) 1780 { 1781 /* error:[error code]:[library name]:[function name]:[reason string] */ 1782 char buf[128]; 1783 unsigned long e; 1784 ERR_error_string_n(err, buf, sizeof(buf)); 1785 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1786 while( (e=ERR_get_error()) ) { 1787 ERR_error_string_n(e, buf, sizeof(buf)); 1788 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1789 } 1790 } 1791 1792 void 1793 log_crypto_err(const char* str) 1794 { 1795 log_crypto_from_err(str, ERR_get_error()); 1796 } 1797 1798 /** true if the ssl handshake error has to be squelched from the logs */ 1799 static int 1800 squelch_err_ssl_handshake(unsigned long err) 1801 { 1802 if(verbosity >= 3) 1803 return 0; /* only squelch on low verbosity */ 1804 /* this is very specific, we could filter on ERR_GET_REASON() 1805 * (the third element in ERR_PACK) */ 1806 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1807 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1808 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1809 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1810 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1811 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1812 #endif 1813 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1814 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1815 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1816 # ifdef SSL_R_VERSION_TOO_LOW 1817 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1818 # endif 1819 #endif 1820 ) 1821 return 1; 1822 return 0; 1823 } 1824 1825 void 1826 perform_openssl_init(void) 1827 { 1828 /* init SSL library */ 1829 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1830 ERR_load_crypto_strings(); 1831 #endif 1832 #if defined(HAVE_ERR_LOAD_SSL_STRINGS) && !defined(DEPRECATED_ERR_LOAD_SSL_STRINGS) 1833 ERR_load_SSL_strings(); 1834 #endif 1835 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1836 OpenSSL_add_all_algorithms(); 1837 #else 1838 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1839 | OPENSSL_INIT_ADD_ALL_DIGESTS 1840 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1841 #endif 1842 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1843 (void)SSL_library_init(); 1844 #else 1845 OPENSSL_init_ssl(0, NULL); 1846 #endif 1847 1848 if(!RAND_status()) { 1849 /* try to seed it */ 1850 unsigned char buf[256]; 1851 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1852 size_t i; 1853 v = seed; 1854 for(i=0; i<256/sizeof(v); i++) { 1855 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1856 v = v*seed + (unsigned int)i; 1857 } 1858 RAND_seed(buf, 256); 1859 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1860 } 1861 } 1862 1863 static int 1864 get_ocsp(char *filename, unsigned char **ocsp) 1865 { 1866 BIO *bio; 1867 OCSP_RESPONSE *response; 1868 int len = -1; 1869 unsigned char *p, *buf; 1870 assert(filename); 1871 1872 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1873 log_crypto_err("get_ocsp: BIO_new_file failed"); 1874 return -1; 1875 } 1876 1877 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1878 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1879 BIO_free(bio); 1880 return -1; 1881 } 1882 1883 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1884 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1885 OCSP_RESPONSE_free(response); 1886 BIO_free(bio); 1887 return -1; 1888 } 1889 1890 if ((buf = malloc((size_t) len)) == NULL) { 1891 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1892 OCSP_RESPONSE_free(response); 1893 BIO_free(bio); 1894 return -1; 1895 } 1896 1897 p = buf; 1898 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1899 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1900 free(buf); 1901 OCSP_RESPONSE_free(response); 1902 BIO_free(bio); 1903 return -1; 1904 } 1905 1906 OCSP_RESPONSE_free(response); 1907 BIO_free(bio); 1908 1909 *ocsp = buf; 1910 return len; 1911 } 1912 1913 /* further setup ssl ctx after the keys are loaded */ 1914 static void 1915 listen_sslctx_setup_2(void* ctxt) 1916 { 1917 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1918 (void)ctx; 1919 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1920 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1921 /* ENOTREACH */ 1922 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1923 } 1924 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1925 if(1) { 1926 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1927 if (!ecdh) { 1928 log_crypto_err("could not find p256, not enabling ECDHE"); 1929 } else { 1930 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1931 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1932 } 1933 EC_KEY_free (ecdh); 1934 } 1935 } 1936 #endif 1937 } 1938 1939 static int 1940 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1941 { 1942 if(ocspdata) { 1943 unsigned char *p; 1944 if ((p=malloc(ocspdata_len)) == NULL) { 1945 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1946 return SSL_TLSEXT_ERR_NOACK; 1947 } 1948 memcpy(p, ocspdata, ocspdata_len); 1949 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1950 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1951 free(p); 1952 return SSL_TLSEXT_ERR_NOACK; 1953 } 1954 return SSL_TLSEXT_ERR_OK; 1955 } else { 1956 return SSL_TLSEXT_ERR_NOACK; 1957 } 1958 } 1959 1960 SSL_CTX* 1961 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1962 { 1963 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1964 if(!ctx) { 1965 log_crypto_err("could not SSL_CTX_new"); 1966 return NULL; 1967 } 1968 /* no SSLv2, SSLv3 because has defects */ 1969 #if SSL_OP_NO_SSLv2 != 0 1970 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1971 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1972 SSL_CTX_free(ctx); 1973 return NULL; 1974 } 1975 #endif 1976 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1977 != SSL_OP_NO_SSLv3){ 1978 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1979 SSL_CTX_free(ctx); 1980 return 0; 1981 } 1982 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1983 /* if we have tls 1.1 disable 1.0 */ 1984 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1985 != SSL_OP_NO_TLSv1){ 1986 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1987 SSL_CTX_free(ctx); 1988 return 0; 1989 } 1990 #endif 1991 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1992 /* if we have tls 1.2 disable 1.1 */ 1993 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 1994 != SSL_OP_NO_TLSv1_1){ 1995 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 1996 SSL_CTX_free(ctx); 1997 return 0; 1998 } 1999 #endif 2000 #if defined(SSL_OP_NO_RENEGOTIATION) 2001 /* disable client renegotiation */ 2002 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 2003 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 2004 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 2005 SSL_CTX_free(ctx); 2006 return 0; 2007 } 2008 #endif 2009 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 2010 /* if we detect system-wide crypto policies, use those */ 2011 if (access( "/etc/crypto-policies/config", F_OK ) != 0 ) { 2012 /* if we have sha256, set the cipher list to have no known vulns */ 2013 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 2014 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 2015 } 2016 #endif 2017 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 2018 SSL_OP_CIPHER_SERVER_PREFERENCE) != 2019 SSL_OP_CIPHER_SERVER_PREFERENCE) { 2020 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 2021 SSL_CTX_free(ctx); 2022 return 0; 2023 } 2024 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 2025 SSL_CTX_set_security_level(ctx, 0); 2026 #endif 2027 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 2028 log_msg(LOG_ERR, "error for cert file: %s", pem); 2029 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 2030 SSL_CTX_free(ctx); 2031 return NULL; 2032 } 2033 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 2034 log_msg(LOG_ERR, "error for private key file: %s", key); 2035 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 2036 SSL_CTX_free(ctx); 2037 return NULL; 2038 } 2039 if(!SSL_CTX_check_private_key(ctx)) { 2040 log_msg(LOG_ERR, "error for key file: %s", key); 2041 log_crypto_err("Error in SSL_CTX check_private_key"); 2042 SSL_CTX_free(ctx); 2043 return NULL; 2044 } 2045 listen_sslctx_setup_2(ctx); 2046 if(verifypem && verifypem[0]) { 2047 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 2048 log_crypto_err("Error in SSL_CTX verify locations"); 2049 SSL_CTX_free(ctx); 2050 return NULL; 2051 } 2052 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 2053 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 2054 } 2055 return ctx; 2056 } 2057 2058 SSL_CTX* 2059 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 2060 { 2061 char *key, *pem; 2062 SSL_CTX *ctx; 2063 2064 key = nsd->options->tls_service_key; 2065 pem = nsd->options->tls_service_pem; 2066 if(!key || key[0] == 0) { 2067 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 2068 return NULL; 2069 } 2070 if(!pem || pem[0] == 0) { 2071 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 2072 return NULL; 2073 } 2074 2075 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 2076 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 2077 ctx = server_tls_ctx_setup(key, pem, verifypem); 2078 if(!ctx) { 2079 log_msg(LOG_ERR, "could not setup server TLS context"); 2080 return NULL; 2081 } 2082 if(ocspfile && ocspfile[0]) { 2083 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 2084 log_crypto_err("Error reading OCSPfile"); 2085 SSL_CTX_free(ctx); 2086 return NULL; 2087 } else { 2088 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 2089 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 2090 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 2091 SSL_CTX_free(ctx); 2092 return NULL; 2093 } 2094 } 2095 } 2096 return ctx; 2097 } 2098 2099 /* check if tcp_handler_accept_data created for TLS dedicated port */ 2100 int 2101 using_tls_port(struct sockaddr* addr, const char* tls_port) 2102 { 2103 in_port_t port = 0; 2104 2105 if (addr->sa_family == AF_INET) 2106 port = ((struct sockaddr_in*)addr)->sin_port; 2107 #ifndef HAVE_STRUCT_SOCKADDR_IN6 2108 else 2109 port = ((struct sockaddr_in6*)addr)->sin6_port; 2110 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 2111 if (atoi(tls_port) == ntohs(port)) 2112 return 1; 2113 2114 return 0; 2115 } 2116 #endif 2117 2118 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 2119 ssize_t 2120 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 2121 { 2122 uint8_t* buf = (uint8_t*) p; 2123 ssize_t total = 0; 2124 struct pollfd fd; 2125 memset(&fd, 0, sizeof(fd)); 2126 fd.fd = s; 2127 fd.events = POLLIN; 2128 2129 while( total < sz) { 2130 ssize_t ret; 2131 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 2132 if(ret == -1) { 2133 if(errno == EAGAIN) 2134 /* blocking read */ 2135 continue; 2136 if(errno == EINTR) { 2137 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2138 return -1; 2139 /* other signals can be handled later */ 2140 continue; 2141 } 2142 /* some error */ 2143 return -1; 2144 } 2145 if(ret == 0) { 2146 /* operation timed out */ 2147 return -2; 2148 } 2149 ret = read(s, buf+total, sz-total); 2150 if(ret == -1) { 2151 if(errno == EAGAIN) 2152 /* blocking read */ 2153 continue; 2154 if(errno == EINTR) { 2155 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 2156 return -1; 2157 /* other signals can be handled later */ 2158 continue; 2159 } 2160 /* some error */ 2161 return -1; 2162 } 2163 if(ret == 0) { 2164 /* closed connection! */ 2165 return 0; 2166 } 2167 total += ret; 2168 } 2169 return total; 2170 } 2171 2172 static void 2173 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 2174 { 2175 sig_atomic_t cmd = NSD_QUIT_SYNC; 2176 udb_ptr t, next; 2177 udb_base* u = nsd->task[nsd->mytask]; 2178 udb_ptr_init(&next, u); 2179 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 2180 udb_base_set_userdata(u, 0); 2181 while(!udb_ptr_is_null(&t)) { 2182 /* store next in list so this one can be deleted or reused */ 2183 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 2184 udb_rptr_zero(&TASKLIST(&t)->next, u); 2185 2186 /* process task t */ 2187 /* append results for task t and update last_task */ 2188 task_process_in_reload(nsd, u, last_task, &t); 2189 2190 /* go to next */ 2191 udb_ptr_set_ptr(&t, u, &next); 2192 2193 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2194 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2195 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2196 if(cmd == NSD_QUIT) { 2197 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2198 /* sync to disk (if needed) */ 2199 udb_base_sync(nsd->db->udb, 0); 2200 /* unlink files of remainder of tasks */ 2201 while(!udb_ptr_is_null(&t)) { 2202 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2203 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2204 } 2205 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2206 } 2207 udb_ptr_unlink(&t, u); 2208 udb_ptr_unlink(&next, u); 2209 exit(0); 2210 } 2211 } 2212 2213 } 2214 udb_ptr_unlink(&t, u); 2215 udb_ptr_unlink(&next, u); 2216 } 2217 2218 #ifdef BIND8_STATS 2219 static void 2220 parent_send_stats(struct nsd* nsd, int cmdfd) 2221 { 2222 size_t i; 2223 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2224 log_msg(LOG_ERR, "could not write stats to reload"); 2225 return; 2226 } 2227 for(i=0; i<nsd->child_count; i++) 2228 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2229 sizeof(stc_type))) { 2230 log_msg(LOG_ERR, "could not write stats to reload"); 2231 return; 2232 } 2233 } 2234 2235 static void 2236 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2237 { 2238 struct nsdst s; 2239 stc_type* p; 2240 size_t i; 2241 if(block_read(nsd, cmdfd, &s, sizeof(s), 2242 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2243 log_msg(LOG_ERR, "could not read stats from oldpar"); 2244 return; 2245 } 2246 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2247 s.db_mem = region_get_mem(nsd->db->region); 2248 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2249 nsd->child_count); 2250 if(!p) return; 2251 for(i=0; i<nsd->child_count; i++) { 2252 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2253 sizeof(stc_type)) 2254 return; 2255 } 2256 } 2257 #endif /* BIND8_STATS */ 2258 2259 void server_verify(struct nsd *nsd, int cmdsocket); 2260 2261 /* 2262 * Reload the database, stop parent, re-fork children and continue. 2263 * as server_main. 2264 */ 2265 static void 2266 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2267 int cmdsocket) 2268 { 2269 pid_t mypid; 2270 sig_atomic_t cmd = NSD_QUIT_SYNC; 2271 int ret; 2272 udb_ptr last_task; 2273 struct sigaction old_sigchld, ign_sigchld; 2274 struct radnode* node; 2275 zone_type* zone; 2276 enum soainfo_hint hint; 2277 /* ignore SIGCHLD from the previous server_main that used this pid */ 2278 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2279 ign_sigchld.sa_handler = SIG_IGN; 2280 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2281 2282 #ifdef HAVE_SETPROCTITLE 2283 setproctitle("main"); 2284 #endif 2285 #ifdef HAVE_CPUSET_T 2286 if(nsd->use_cpu_affinity) { 2287 set_cpu_affinity(nsd->cpuset); 2288 } 2289 #endif 2290 2291 /* see what tasks we got from xfrd */ 2292 task_remap(nsd->task[nsd->mytask]); 2293 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2294 udb_compact_inhibited(nsd->db->udb, 1); 2295 reload_process_tasks(nsd, &last_task, cmdsocket); 2296 udb_compact_inhibited(nsd->db->udb, 0); 2297 udb_compact(nsd->db->udb); 2298 2299 #ifndef NDEBUG 2300 if(nsd_debug_level >= 1) 2301 region_log_stats(nsd->db->region); 2302 #endif /* NDEBUG */ 2303 /* sync to disk (if needed) */ 2304 udb_base_sync(nsd->db->udb, 0); 2305 2306 initialize_dname_compression_tables(nsd); 2307 2308 #ifdef BIND8_STATS 2309 /* Restart dumping stats if required. */ 2310 time(&nsd->st.boot); 2311 set_bind8_alarm(nsd); 2312 #endif 2313 #ifdef USE_ZONE_STATS 2314 server_zonestat_realloc(nsd); /* realloc for new children */ 2315 server_zonestat_switch(nsd); 2316 #endif 2317 2318 if(nsd->options->verify_enable) { 2319 #ifdef RATELIMIT 2320 /* allocate resources for rate limiting. use a slot that is guaranteed 2321 not mapped to a file so no persistent data is overwritten */ 2322 rrl_init(nsd->child_count + 1); 2323 #endif 2324 2325 /* spin-up server and execute verifiers for each zone */ 2326 server_verify(nsd, cmdsocket); 2327 #ifdef RATELIMIT 2328 /* deallocate rate limiting resources */ 2329 rrl_deinit(nsd->child_count + 1); 2330 #endif 2331 } 2332 2333 for(node = radix_first(nsd->db->zonetree); 2334 node != NULL; 2335 node = radix_next(node)) 2336 { 2337 zone = (zone_type *)node->elem; 2338 if(zone->is_updated) { 2339 if(zone->is_bad) { 2340 nsd->mode = NSD_RELOAD_FAILED; 2341 hint = soainfo_bad; 2342 } else { 2343 hint = soainfo_ok; 2344 } 2345 /* update(s), verified or not, possibly with subsequent 2346 skipped update(s). skipped update(s) are picked up 2347 by failed update check in xfrd */ 2348 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2349 zone, hint); 2350 } else if(zone->is_skipped) { 2351 /* corrupt or inconsistent update without preceding 2352 update(s), communicate soainfo_gone */ 2353 task_new_soainfo(nsd->task[nsd->mytask], &last_task, 2354 zone, soainfo_gone); 2355 } 2356 zone->is_updated = 0; 2357 zone->is_skipped = 0; 2358 } 2359 2360 if(nsd->mode == NSD_RELOAD_FAILED) { 2361 exit(NSD_RELOAD_FAILED); 2362 } 2363 2364 /* listen for the signals of failed children again */ 2365 sigaction(SIGCHLD, &old_sigchld, NULL); 2366 #ifdef USE_DNSTAP 2367 if (nsd->dt_collector) { 2368 int *swap_fd_send; 2369 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes")); 2370 /* Swap fd_send with fd_swap so old serve child and new serve 2371 * childs will not write to the same pipe ends simultaneously */ 2372 swap_fd_send = nsd->dt_collector_fd_send; 2373 nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap; 2374 nsd->dt_collector_fd_swap = swap_fd_send; 2375 2376 } 2377 #endif 2378 /* Start new child processes */ 2379 if (server_start_children(nsd, server_region, netio, &nsd-> 2380 xfrd_listener->fd) != 0) { 2381 send_children_quit(nsd); 2382 exit(1); 2383 } 2384 2385 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2386 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2387 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2388 if(cmd == NSD_QUIT) { 2389 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2390 send_children_quit(nsd); 2391 exit(0); 2392 } 2393 } 2394 2395 /* Send quit command to parent: blocking, wait for receipt. */ 2396 do { 2397 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2398 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2399 { 2400 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2401 strerror(errno)); 2402 } 2403 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2404 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2405 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2406 RELOAD_SYNC_TIMEOUT); 2407 if(ret == -2) { 2408 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2409 } 2410 } while (ret == -2); 2411 if(ret == -1) { 2412 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2413 strerror(errno)); 2414 } 2415 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2416 if(cmd == NSD_QUIT) { 2417 /* small race condition possible here, parent got quit cmd. */ 2418 send_children_quit(nsd); 2419 exit(1); 2420 } 2421 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2422 #ifdef BIND8_STATS 2423 reload_do_stats(cmdsocket, nsd, &last_task); 2424 #endif 2425 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2426 task_process_sync(nsd->task[nsd->mytask]); 2427 #ifdef USE_ZONE_STATS 2428 server_zonestat_realloc(nsd); /* realloc for next children */ 2429 #endif 2430 2431 /* send soainfo to the xfrd process, signal it that reload is done, 2432 * it picks up the taskudb */ 2433 cmd = NSD_RELOAD_DONE; 2434 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2435 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2436 strerror(errno)); 2437 } 2438 mypid = getpid(); 2439 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2440 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2441 strerror(errno)); 2442 } 2443 2444 /* try to reopen file */ 2445 if (nsd->file_rotation_ok) 2446 log_reopen(nsd->log_filename, 1); 2447 /* exit reload, continue as new server_main */ 2448 } 2449 2450 /* 2451 * Get the mode depending on the signal hints that have been received. 2452 * Multiple signal hints can be received and will be handled in turn. 2453 */ 2454 static sig_atomic_t 2455 server_signal_mode(struct nsd *nsd) 2456 { 2457 if(nsd->signal_hint_quit) { 2458 nsd->signal_hint_quit = 0; 2459 return NSD_QUIT; 2460 } 2461 else if(nsd->signal_hint_shutdown) { 2462 nsd->signal_hint_shutdown = 0; 2463 return NSD_SHUTDOWN; 2464 } 2465 else if(nsd->signal_hint_child) { 2466 nsd->signal_hint_child = 0; 2467 return NSD_REAP_CHILDREN; 2468 } 2469 else if(nsd->signal_hint_reload) { 2470 nsd->signal_hint_reload = 0; 2471 return NSD_RELOAD; 2472 } 2473 else if(nsd->signal_hint_reload_hup) { 2474 nsd->signal_hint_reload_hup = 0; 2475 return NSD_RELOAD_REQ; 2476 } 2477 else if(nsd->signal_hint_stats) { 2478 nsd->signal_hint_stats = 0; 2479 #ifdef BIND8_STATS 2480 set_bind8_alarm(nsd); 2481 #endif 2482 return NSD_STATS; 2483 } 2484 else if(nsd->signal_hint_statsusr) { 2485 nsd->signal_hint_statsusr = 0; 2486 return NSD_STATS; 2487 } 2488 return NSD_RUN; 2489 } 2490 2491 /* 2492 * The main server simply waits for signals and child processes to 2493 * terminate. Child processes are restarted as necessary. 2494 */ 2495 void 2496 server_main(struct nsd *nsd) 2497 { 2498 region_type *server_region = region_create(xalloc, free); 2499 netio_type *netio = netio_create(server_region); 2500 netio_handler_type reload_listener; 2501 int reload_sockets[2] = {-1, -1}; 2502 struct timespec timeout_spec; 2503 int status; 2504 pid_t child_pid; 2505 pid_t reload_pid = -1; 2506 sig_atomic_t mode; 2507 2508 /* Ensure we are the main process */ 2509 assert(nsd->server_kind == NSD_SERVER_MAIN); 2510 2511 /* Add listener for the XFRD process */ 2512 netio_add_handler(netio, nsd->xfrd_listener); 2513 2514 /* Start the child processes that handle incoming queries */ 2515 if (server_start_children(nsd, server_region, netio, 2516 &nsd->xfrd_listener->fd) != 0) { 2517 send_children_quit(nsd); 2518 exit(1); 2519 } 2520 reload_listener.fd = -1; 2521 2522 /* This_child MUST be 0, because this is the parent process */ 2523 assert(nsd->this_child == 0); 2524 2525 /* Run the server until we get a shutdown signal */ 2526 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2527 /* Did we receive a signal that changes our mode? */ 2528 if(mode == NSD_RUN) { 2529 nsd->mode = mode = server_signal_mode(nsd); 2530 } 2531 2532 switch (mode) { 2533 case NSD_RUN: 2534 /* see if any child processes terminated */ 2535 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2536 int is_child = delete_child_pid(nsd, child_pid); 2537 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2538 if(nsd->children[is_child].child_fd == -1) 2539 nsd->children[is_child].has_exited = 1; 2540 parent_check_all_children_exited(nsd); 2541 } else if(is_child != -1) { 2542 log_msg(LOG_WARNING, 2543 "server %d died unexpectedly with status %d, restarting", 2544 (int) child_pid, status); 2545 restart_child_servers(nsd, server_region, netio, 2546 &nsd->xfrd_listener->fd); 2547 } else if (child_pid == reload_pid) { 2548 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2549 pid_t mypid; 2550 log_msg(LOG_WARNING, 2551 "Reload process %d failed with status %d, continuing with old database", 2552 (int) child_pid, status); 2553 reload_pid = -1; 2554 if(reload_listener.fd != -1) close(reload_listener.fd); 2555 netio_remove_handler(netio, &reload_listener); 2556 reload_listener.fd = -1; 2557 reload_listener.event_types = NETIO_EVENT_NONE; 2558 task_process_sync(nsd->task[nsd->mytask]); 2559 /* inform xfrd reload attempt ended */ 2560 if(!write_socket(nsd->xfrd_listener->fd, 2561 &cmd, sizeof(cmd))) { 2562 log_msg(LOG_ERR, "problems " 2563 "sending SOAEND to xfrd: %s", 2564 strerror(errno)); 2565 } 2566 mypid = getpid(); 2567 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2568 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2569 strerror(errno)); 2570 } 2571 #ifdef USE_DNSTAP 2572 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) { 2573 log_msg(LOG_WARNING, 2574 "dnstap-collector %d terminated with status %d", 2575 (int) child_pid, status); 2576 if(nsd->dt_collector) { 2577 dt_collector_close(nsd->dt_collector, nsd); 2578 dt_collector_destroy(nsd->dt_collector, nsd); 2579 nsd->dt_collector = NULL; 2580 } 2581 /* Only respawn a crashed (or exited) 2582 * dnstap-collector when not reloading, 2583 * to not induce a reload during a 2584 * reload (which would seriously 2585 * disrupt nsd procedures and lead to 2586 * unpredictable results)! 2587 * 2588 * This will *leave* a dnstap-collector 2589 * process terminated, but because 2590 * signalling of the reload process to 2591 * the main process to respawn in this 2592 * situation will be cumbersome, and 2593 * because this situation is so 2594 * specific (and therefore hopefully 2595 * extremely rare or non-existing at 2596 * all), plus the fact that we are left 2597 * with a perfectly function NSD 2598 * (besides not logging dnstap 2599 * messages), I consider it acceptable 2600 * to leave this unresolved. 2601 */ 2602 if(reload_pid == -1 && nsd->options->dnstap_enable) { 2603 nsd->dt_collector = dt_collector_create(nsd); 2604 dt_collector_start(nsd->dt_collector, nsd); 2605 nsd->mode = NSD_RELOAD_REQ; 2606 } 2607 #endif 2608 } else if(status != 0) { 2609 /* check for status, because we get 2610 * the old-servermain because reload 2611 * is the process-parent of old-main, 2612 * and we get older server-processes 2613 * that are exiting after a reload */ 2614 log_msg(LOG_WARNING, 2615 "process %d terminated with status %d", 2616 (int) child_pid, status); 2617 } 2618 } 2619 if (child_pid == -1) { 2620 if (errno == EINTR) { 2621 continue; 2622 } 2623 if (errno != ECHILD) 2624 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2625 } 2626 if (nsd->mode != NSD_RUN) 2627 break; 2628 2629 /* timeout to collect processes. In case no sigchild happens. */ 2630 timeout_spec.tv_sec = 60; 2631 timeout_spec.tv_nsec = 0; 2632 2633 /* listen on ports, timeout for collecting terminated children */ 2634 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2635 if (errno != EINTR) { 2636 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2637 } 2638 } 2639 if(nsd->restart_children) { 2640 restart_child_servers(nsd, server_region, netio, 2641 &nsd->xfrd_listener->fd); 2642 nsd->restart_children = 0; 2643 } 2644 if(nsd->reload_failed) { 2645 sig_atomic_t cmd = NSD_RELOAD_FAILED; 2646 pid_t mypid; 2647 nsd->reload_failed = 0; 2648 log_msg(LOG_WARNING, 2649 "Reload process %d failed, continuing with old database", 2650 (int) reload_pid); 2651 reload_pid = -1; 2652 if(reload_listener.fd != -1) close(reload_listener.fd); 2653 netio_remove_handler(netio, &reload_listener); 2654 reload_listener.fd = -1; 2655 reload_listener.event_types = NETIO_EVENT_NONE; 2656 task_process_sync(nsd->task[nsd->mytask]); 2657 /* inform xfrd reload attempt ended */ 2658 if(!write_socket(nsd->xfrd_listener->fd, 2659 &cmd, sizeof(cmd))) { 2660 log_msg(LOG_ERR, "problems " 2661 "sending SOAEND to xfrd: %s", 2662 strerror(errno)); 2663 } 2664 mypid = getpid(); 2665 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2666 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2667 strerror(errno)); 2668 } 2669 } 2670 2671 break; 2672 case NSD_RELOAD_REQ: { 2673 sig_atomic_t cmd = NSD_RELOAD_REQ; 2674 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2675 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2676 "main: ipc send reload_req to xfrd")); 2677 if(!write_socket(nsd->xfrd_listener->fd, 2678 &cmd, sizeof(cmd))) { 2679 log_msg(LOG_ERR, "server_main: could not send " 2680 "reload_req to xfrd: %s", strerror(errno)); 2681 } 2682 nsd->mode = NSD_RUN; 2683 } break; 2684 case NSD_RELOAD: 2685 /* Continue to run nsd after reload */ 2686 nsd->mode = NSD_RUN; 2687 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2688 if (reload_pid != -1) { 2689 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2690 (int) reload_pid); 2691 break; 2692 } 2693 2694 /* switch the mytask to keep track of who owns task*/ 2695 nsd->mytask = 1 - nsd->mytask; 2696 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2697 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2698 reload_pid = -1; 2699 break; 2700 } 2701 2702 /* Do actual reload */ 2703 reload_pid = fork(); 2704 switch (reload_pid) { 2705 case -1: 2706 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2707 break; 2708 default: 2709 /* PARENT */ 2710 close(reload_sockets[0]); 2711 server_reload(nsd, server_region, netio, 2712 reload_sockets[1]); 2713 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2714 close(reload_sockets[1]); 2715 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2716 /* drop stale xfrd ipc data */ 2717 ((struct ipc_handler_conn_data*)nsd-> 2718 xfrd_listener->user_data) 2719 ->conn->is_reading = 0; 2720 reload_pid = -1; 2721 reload_listener.fd = -1; 2722 reload_listener.event_types = NETIO_EVENT_NONE; 2723 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2724 break; 2725 case 0: 2726 /* CHILD */ 2727 /* server_main keep running until NSD_QUIT_SYNC 2728 * received from reload. */ 2729 close(reload_sockets[1]); 2730 reload_listener.fd = reload_sockets[0]; 2731 reload_listener.timeout = NULL; 2732 reload_listener.user_data = nsd; 2733 reload_listener.event_types = NETIO_EVENT_READ; 2734 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2735 netio_add_handler(netio, &reload_listener); 2736 reload_pid = getppid(); 2737 break; 2738 } 2739 break; 2740 case NSD_QUIT_SYNC: 2741 /* synchronisation of xfrd, parent and reload */ 2742 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2743 sig_atomic_t cmd = NSD_RELOAD; 2744 /* stop xfrd ipc writes in progress */ 2745 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2746 "main: ipc send indication reload")); 2747 if(!write_socket(nsd->xfrd_listener->fd, 2748 &cmd, sizeof(cmd))) { 2749 log_msg(LOG_ERR, "server_main: could not send reload " 2750 "indication to xfrd: %s", strerror(errno)); 2751 } 2752 /* wait for ACK from xfrd */ 2753 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2754 nsd->quit_sync_done = 1; 2755 } 2756 nsd->mode = NSD_RUN; 2757 break; 2758 case NSD_QUIT: 2759 /* silent shutdown during reload */ 2760 if(reload_listener.fd != -1) { 2761 /* acknowledge the quit, to sync reload that we will really quit now */ 2762 sig_atomic_t cmd = NSD_RELOAD; 2763 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2764 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2765 log_msg(LOG_ERR, "server_main: " 2766 "could not ack quit: %s", strerror(errno)); 2767 } 2768 #ifdef BIND8_STATS 2769 parent_send_stats(nsd, reload_listener.fd); 2770 #endif /* BIND8_STATS */ 2771 close(reload_listener.fd); 2772 } 2773 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2774 /* only quit children after xfrd has acked */ 2775 send_children_quit(nsd); 2776 2777 #ifdef MEMCLEAN /* OS collects memory pages */ 2778 region_destroy(server_region); 2779 #endif 2780 server_shutdown(nsd); 2781 2782 /* ENOTREACH */ 2783 break; 2784 case NSD_SHUTDOWN: 2785 break; 2786 case NSD_REAP_CHILDREN: 2787 /* continue; wait for child in run loop */ 2788 nsd->mode = NSD_RUN; 2789 break; 2790 case NSD_STATS: 2791 #ifdef BIND8_STATS 2792 set_children_stats(nsd); 2793 #endif 2794 nsd->mode = NSD_RUN; 2795 break; 2796 default: 2797 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2798 nsd->mode = NSD_RUN; 2799 break; 2800 } 2801 } 2802 log_msg(LOG_WARNING, "signal received, shutting down..."); 2803 2804 /* close opened ports to avoid race with restart of nsd */ 2805 server_close_all_sockets(nsd->udp, nsd->ifs); 2806 server_close_all_sockets(nsd->tcp, nsd->ifs); 2807 #ifdef HAVE_SSL 2808 daemon_remote_close(nsd->rc); 2809 #endif 2810 send_children_quit_and_wait(nsd); 2811 2812 /* Unlink it if possible... */ 2813 unlinkpid(nsd->pidfile); 2814 unlink(nsd->task[0]->fname); 2815 unlink(nsd->task[1]->fname); 2816 #ifdef USE_ZONE_STATS 2817 unlink(nsd->zonestatfname[0]); 2818 unlink(nsd->zonestatfname[1]); 2819 #endif 2820 #ifdef USE_DNSTAP 2821 dt_collector_close(nsd->dt_collector, nsd); 2822 #endif 2823 2824 if(reload_listener.fd != -1) { 2825 sig_atomic_t cmd = NSD_QUIT; 2826 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2827 "main: ipc send quit to reload-process")); 2828 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2829 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2830 strerror(errno)); 2831 } 2832 fsync(reload_listener.fd); 2833 close(reload_listener.fd); 2834 /* wait for reload to finish processing */ 2835 while(1) { 2836 if(waitpid(reload_pid, NULL, 0) == -1) { 2837 if(errno == EINTR) continue; 2838 if(errno == ECHILD) break; 2839 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2840 (int)reload_pid, strerror(errno)); 2841 } 2842 break; 2843 } 2844 } 2845 if(nsd->xfrd_listener->fd != -1) { 2846 /* complete quit, stop xfrd */ 2847 sig_atomic_t cmd = NSD_QUIT; 2848 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2849 "main: ipc send quit to xfrd")); 2850 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2851 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2852 strerror(errno)); 2853 } 2854 fsync(nsd->xfrd_listener->fd); 2855 close(nsd->xfrd_listener->fd); 2856 (void)kill(nsd->pid, SIGTERM); 2857 } 2858 2859 #ifdef MEMCLEAN /* OS collects memory pages */ 2860 region_destroy(server_region); 2861 #endif 2862 /* write the nsd.db to disk, wait for it to complete */ 2863 udb_base_sync(nsd->db->udb, 1); 2864 udb_base_close(nsd->db->udb); 2865 server_shutdown(nsd); 2866 } 2867 2868 static query_state_type 2869 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p) 2870 { 2871 return query_process(query, nsd, now_p); 2872 } 2873 2874 static query_state_type 2875 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p) 2876 { 2877 #ifdef RATELIMIT 2878 if(query_process(query, nsd, now_p) != QUERY_DISCARDED) { 2879 if(query->edns.cookie_status != COOKIE_VALID 2880 && query->edns.cookie_status != COOKIE_VALID_REUSE 2881 && rrl_process_query(query)) 2882 return rrl_slip(query); 2883 else return QUERY_PROCESSED; 2884 } 2885 return QUERY_DISCARDED; 2886 #else 2887 return query_process(query, nsd, now_p); 2888 #endif 2889 } 2890 2891 const char* 2892 nsd_event_vs(void) 2893 { 2894 #ifdef USE_MINI_EVENT 2895 return ""; 2896 #else 2897 return event_get_version(); 2898 #endif 2899 } 2900 2901 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS) 2902 static const char* ub_ev_backend2str(int b) 2903 { 2904 switch(b) { 2905 case EVBACKEND_SELECT: return "select"; 2906 case EVBACKEND_POLL: return "poll"; 2907 case EVBACKEND_EPOLL: return "epoll"; 2908 case EVBACKEND_KQUEUE: return "kqueue"; 2909 case EVBACKEND_DEVPOLL: return "devpoll"; 2910 case EVBACKEND_PORT: return "evport"; 2911 } 2912 return "unknown"; 2913 } 2914 #endif 2915 2916 const char* 2917 nsd_event_method(void) 2918 { 2919 #ifdef USE_MINI_EVENT 2920 return "select"; 2921 #else 2922 struct event_base* b = nsd_child_event_base(); 2923 const char* m = "?"; 2924 # ifdef EV_FEATURE_BACKENDS 2925 m = ub_ev_backend2str(ev_backend((struct ev_loop*)b)); 2926 # elif defined(HAVE_EVENT_BASE_GET_METHOD) 2927 m = event_base_get_method(b); 2928 # endif 2929 # ifdef MEMCLEAN 2930 event_base_free(b); 2931 # endif 2932 return m; 2933 #endif 2934 } 2935 2936 struct event_base* 2937 nsd_child_event_base(void) 2938 { 2939 struct event_base* base; 2940 #ifdef USE_MINI_EVENT 2941 static time_t secs; 2942 static struct timeval now; 2943 base = event_init(&secs, &now); 2944 #else 2945 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2946 /* libev */ 2947 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2948 # else 2949 /* libevent */ 2950 # ifdef HAVE_EVENT_BASE_NEW 2951 base = event_base_new(); 2952 # else 2953 base = event_init(); 2954 # endif 2955 # endif 2956 #endif 2957 return base; 2958 } 2959 2960 static void 2961 add_udp_handler( 2962 struct nsd *nsd, 2963 struct nsd_socket *sock, 2964 struct udp_handler_data *data) 2965 { 2966 struct event *handler = &data->event; 2967 2968 data->nsd = nsd; 2969 data->socket = sock; 2970 2971 memset(handler, 0, sizeof(*handler)); 2972 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2973 if(event_base_set(nsd->event_base, handler) != 0) 2974 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2975 if(event_add(handler, NULL) != 0) 2976 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2977 } 2978 2979 void 2980 add_tcp_handler( 2981 struct nsd *nsd, 2982 struct nsd_socket *sock, 2983 struct tcp_accept_handler_data *data) 2984 { 2985 struct event *handler = &data->event; 2986 2987 data->nsd = nsd; 2988 data->socket = sock; 2989 2990 #ifdef HAVE_SSL 2991 if (nsd->tls_ctx && 2992 nsd->options->tls_port && 2993 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 2994 { 2995 data->tls_accept = 1; 2996 if(verbosity >= 2) { 2997 char buf[48]; 2998 addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 2999 VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 3000 } 3001 } else { 3002 data->tls_accept = 0; 3003 } 3004 #endif 3005 3006 memset(handler, 0, sizeof(*handler)); 3007 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 3008 if(event_base_set(nsd->event_base, handler) != 0) 3009 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 3010 if(event_add(handler, NULL) != 0) 3011 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 3012 data->event_added = 1; 3013 } 3014 3015 /* 3016 * Serve DNS request to verifiers (short-lived) 3017 */ 3018 void server_verify(struct nsd *nsd, int cmdsocket) 3019 { 3020 size_t size = 0; 3021 struct event cmd_event, signal_event, exit_event; 3022 struct zone *zone; 3023 3024 assert(nsd != NULL); 3025 3026 zone = verify_next_zone(nsd, NULL); 3027 if(zone == NULL) 3028 return; 3029 3030 nsd->server_region = region_create(xalloc, free); 3031 nsd->event_base = nsd_child_event_base(); 3032 3033 nsd->next_zone_to_verify = zone; 3034 nsd->verifier_count = 0; 3035 nsd->verifier_limit = nsd->options->verifier_count; 3036 size = sizeof(struct verifier) * nsd->verifier_limit; 3037 pipe(nsd->verifier_pipe); 3038 fcntl(nsd->verifier_pipe[0], F_SETFD, FD_CLOEXEC); 3039 fcntl(nsd->verifier_pipe[1], F_SETFD, FD_CLOEXEC); 3040 nsd->verifiers = region_alloc_zero(nsd->server_region, size); 3041 3042 for(size_t i = 0; i < nsd->verifier_limit; i++) { 3043 nsd->verifiers[i].nsd = nsd; 3044 nsd->verifiers[i].zone = NULL; 3045 nsd->verifiers[i].pid = -1; 3046 nsd->verifiers[i].output_stream.fd = -1; 3047 nsd->verifiers[i].output_stream.priority = LOG_INFO; 3048 nsd->verifiers[i].error_stream.fd = -1; 3049 nsd->verifiers[i].error_stream.priority = LOG_ERR; 3050 } 3051 3052 event_set(&cmd_event, cmdsocket, EV_READ|EV_PERSIST, verify_handle_command, nsd); 3053 if(event_base_set(nsd->event_base, &cmd_event) != 0 || 3054 event_add(&cmd_event, NULL) != 0) 3055 { 3056 log_msg(LOG_ERR, "verify: could not add command event"); 3057 goto fail; 3058 } 3059 3060 event_set(&signal_event, SIGCHLD, EV_SIGNAL|EV_PERSIST, verify_handle_signal, nsd); 3061 if(event_base_set(nsd->event_base, &signal_event) != 0 || 3062 signal_add(&signal_event, NULL) != 0) 3063 { 3064 log_msg(LOG_ERR, "verify: could not add signal event"); 3065 goto fail; 3066 } 3067 3068 event_set(&exit_event, nsd->verifier_pipe[0], EV_READ|EV_PERSIST, verify_handle_exit, nsd); 3069 if(event_base_set(nsd->event_base, &exit_event) != 0 || 3070 event_add(&exit_event, NULL) != 0) 3071 { 3072 log_msg(LOG_ERR, "verify: could not add exit event"); 3073 goto fail; 3074 } 3075 3076 memset(msgs, 0, sizeof(msgs)); 3077 for (int i = 0; i < NUM_RECV_PER_SELECT; i++) { 3078 queries[i] = query_create(nsd->server_region, 3079 compressed_dname_offsets, 3080 compression_table_size, compressed_dnames); 3081 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3082 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3083 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3084 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3085 msgs[i].msg_hdr.msg_iovlen = 1; 3086 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3087 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3088 } 3089 3090 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3091 struct udp_handler_data *data; 3092 data = region_alloc_zero( 3093 nsd->server_region, sizeof(*data)); 3094 add_udp_handler(nsd, &nsd->verify_udp[i], data); 3095 } 3096 3097 tcp_accept_handler_count = nsd->verify_ifs; 3098 tcp_accept_handlers = region_alloc_array(nsd->server_region, 3099 nsd->verify_ifs, sizeof(*tcp_accept_handlers)); 3100 3101 for (size_t i = 0; i < nsd->verify_ifs; i++) { 3102 struct tcp_accept_handler_data *data; 3103 data = &tcp_accept_handlers[i]; 3104 memset(data, 0, sizeof(*data)); 3105 add_tcp_handler(nsd, &nsd->verify_tcp[i], data); 3106 } 3107 3108 while(nsd->next_zone_to_verify != NULL && 3109 nsd->verifier_count < nsd->verifier_limit) 3110 { 3111 verify_zone(nsd, nsd->next_zone_to_verify); 3112 nsd->next_zone_to_verify 3113 = verify_next_zone(nsd, nsd->next_zone_to_verify); 3114 } 3115 3116 /* short-lived main loop */ 3117 event_base_dispatch(nsd->event_base); 3118 3119 /* remove command and exit event handlers */ 3120 event_del(&exit_event); 3121 event_del(&signal_event); 3122 event_del(&cmd_event); 3123 3124 assert(nsd->next_zone_to_verify == NULL || nsd->mode == NSD_QUIT); 3125 assert(nsd->verifier_count == 0 || nsd->mode == NSD_QUIT); 3126 fail: 3127 event_base_free(nsd->event_base); 3128 close(nsd->verifier_pipe[0]); 3129 close(nsd->verifier_pipe[1]); 3130 region_destroy(nsd->server_region); 3131 3132 nsd->event_base = NULL; 3133 nsd->server_region = NULL; 3134 nsd->verifier_limit = 0; 3135 nsd->verifier_pipe[0] = -1; 3136 nsd->verifier_pipe[1] = -1; 3137 nsd->verifiers = NULL; 3138 } 3139 3140 /* 3141 * Serve DNS requests. 3142 */ 3143 void 3144 server_child(struct nsd *nsd) 3145 { 3146 size_t i, from, numifs; 3147 region_type *server_region = region_create(xalloc, free); 3148 struct event_base* event_base = nsd_child_event_base(); 3149 sig_atomic_t mode; 3150 3151 if(!event_base) { 3152 log_msg(LOG_ERR, "nsd server could not create event base"); 3153 exit(1); 3154 } 3155 nsd->event_base = event_base; 3156 nsd->server_region = server_region; 3157 3158 #ifdef RATELIMIT 3159 rrl_init(nsd->this_child->child_num); 3160 #endif 3161 3162 assert(nsd->server_kind != NSD_SERVER_MAIN); 3163 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 3164 3165 #ifdef HAVE_SETPROCTITLE 3166 setproctitle("server %d", nsd->this_child->child_num + 1); 3167 #endif 3168 #ifdef HAVE_CPUSET_T 3169 if(nsd->use_cpu_affinity) { 3170 set_cpu_affinity(nsd->this_child->cpuset); 3171 } 3172 #endif 3173 3174 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 3175 server_close_all_sockets(nsd->tcp, nsd->ifs); 3176 } 3177 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 3178 server_close_all_sockets(nsd->udp, nsd->ifs); 3179 } 3180 3181 if (nsd->this_child->parent_fd != -1) { 3182 struct event *handler; 3183 struct ipc_handler_conn_data* user_data = 3184 (struct ipc_handler_conn_data*)region_alloc( 3185 server_region, sizeof(struct ipc_handler_conn_data)); 3186 user_data->nsd = nsd; 3187 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 3188 3189 handler = (struct event*) region_alloc( 3190 server_region, sizeof(*handler)); 3191 memset(handler, 0, sizeof(*handler)); 3192 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 3193 EV_READ, child_handle_parent_command, user_data); 3194 if(event_base_set(event_base, handler) != 0) 3195 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 3196 if(event_add(handler, NULL) != 0) 3197 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 3198 } 3199 3200 if(nsd->reuseport) { 3201 numifs = nsd->ifs / nsd->reuseport; 3202 from = numifs * nsd->this_child->child_num; 3203 if(from+numifs > nsd->ifs) { /* should not happen */ 3204 from = 0; 3205 numifs = nsd->ifs; 3206 } 3207 } else { 3208 from = 0; 3209 numifs = nsd->ifs; 3210 } 3211 3212 if (nsd->server_kind & NSD_SERVER_UDP) { 3213 int child = nsd->this_child->child_num; 3214 memset(msgs, 0, sizeof(msgs)); 3215 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 3216 queries[i] = query_create(server_region, 3217 compressed_dname_offsets, 3218 compression_table_size, compressed_dnames); 3219 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3220 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 3221 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3222 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3223 msgs[i].msg_hdr.msg_iovlen = 1; 3224 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 3225 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3226 } 3227 3228 for (i = 0; i < nsd->ifs; i++) { 3229 int listen; 3230 struct udp_handler_data *data; 3231 3232 listen = nsd_bitset_isset(nsd->udp[i].servers, child); 3233 3234 if(i >= from && i < (from + numifs) && listen) { 3235 data = region_alloc_zero( 3236 nsd->server_region, sizeof(*data)); 3237 add_udp_handler(nsd, &nsd->udp[i], data); 3238 } else { 3239 /* close sockets intended for other servers */ 3240 server_close_socket(&nsd->udp[i]); 3241 } 3242 } 3243 } 3244 3245 /* 3246 * Keep track of all the TCP accept handlers so we can enable 3247 * and disable them based on the current number of active TCP 3248 * connections. 3249 */ 3250 if (nsd->server_kind & NSD_SERVER_TCP) { 3251 int child = nsd->this_child->child_num; 3252 tcp_accept_handler_count = numifs; 3253 tcp_accept_handlers = region_alloc_array(server_region, 3254 numifs, sizeof(*tcp_accept_handlers)); 3255 3256 for (i = 0; i < nsd->ifs; i++) { 3257 int listen; 3258 struct tcp_accept_handler_data *data; 3259 3260 listen = nsd_bitset_isset(nsd->tcp[i].servers, child); 3261 3262 if(i >= from && i < (from + numifs) && listen) { 3263 data = &tcp_accept_handlers[i-from]; 3264 memset(data, 0, sizeof(*data)); 3265 add_tcp_handler(nsd, &nsd->tcp[i], data); 3266 } else { 3267 /* close sockets intended for other servers */ 3268 /* 3269 * uncomment this once tcp servers are no 3270 * longer copied in the tcp fd copy line 3271 * in server_init(). 3272 server_close_socket(&nsd->tcp[i]); 3273 */ 3274 /* close sockets not meant for this server*/ 3275 if(!listen) 3276 server_close_socket(&nsd->tcp[i]); 3277 } 3278 } 3279 } else { 3280 tcp_accept_handler_count = 0; 3281 } 3282 3283 /* The main loop... */ 3284 while ((mode = nsd->mode) != NSD_QUIT) { 3285 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 3286 3287 /* Do we need to do the statistics... */ 3288 if (mode == NSD_STATS) { 3289 #ifdef BIND8_STATS 3290 int p = nsd->st.period; 3291 nsd->st.period = 1; /* force stats printout */ 3292 /* Dump the statistics */ 3293 bind8_stats(nsd); 3294 nsd->st.period = p; 3295 #else /* !BIND8_STATS */ 3296 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 3297 #endif /* BIND8_STATS */ 3298 3299 nsd->mode = NSD_RUN; 3300 } 3301 else if (mode == NSD_REAP_CHILDREN) { 3302 /* got signal, notify parent. parent reaps terminated children. */ 3303 if (nsd->this_child->parent_fd != -1) { 3304 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 3305 if (write(nsd->this_child->parent_fd, 3306 &parent_notify, 3307 sizeof(parent_notify)) == -1) 3308 { 3309 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 3310 (int) nsd->this_child->pid, strerror(errno)); 3311 } 3312 } else /* no parent, so reap 'em */ 3313 while (waitpid(-1, NULL, WNOHANG) > 0) ; 3314 nsd->mode = NSD_RUN; 3315 } 3316 else if(mode == NSD_RUN) { 3317 /* Wait for a query... */ 3318 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3319 if (errno != EINTR) { 3320 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3321 break; 3322 } 3323 } 3324 } else if(mode == NSD_QUIT) { 3325 /* ignore here, quit */ 3326 } else { 3327 log_msg(LOG_ERR, "mode bad value %d, back to service.", 3328 (int)mode); 3329 nsd->mode = NSD_RUN; 3330 } 3331 } 3332 3333 service_remaining_tcp(nsd); 3334 #ifdef BIND8_STATS 3335 bind8_stats(nsd); 3336 #endif /* BIND8_STATS */ 3337 3338 #ifdef MEMCLEAN /* OS collects memory pages */ 3339 #ifdef RATELIMIT 3340 rrl_deinit(nsd->this_child->child_num); 3341 #endif 3342 event_base_free(event_base); 3343 region_destroy(server_region); 3344 #endif 3345 server_shutdown(nsd); 3346 } 3347 3348 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 3349 { 3350 int* timed_out = (int*)arg; 3351 assert(event & EV_TIMEOUT); (void)event; 3352 /* wake up the service tcp thread, note event is no longer 3353 * registered */ 3354 *timed_out = 1; 3355 } 3356 3357 void 3358 service_remaining_tcp(struct nsd* nsd) 3359 { 3360 struct tcp_handler_data* p; 3361 struct event_base* event_base; 3362 /* check if it is needed */ 3363 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 3364 return; 3365 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 3366 #ifdef USE_DNSTAP 3367 /* remove dnstap collector, we cannot write there because the new 3368 * child process is using the file descriptor, or the child 3369 * process after that. */ 3370 dt_collector_destroy(nsd->dt_collector, nsd); 3371 nsd->dt_collector = NULL; 3372 #endif 3373 /* setup event base */ 3374 event_base = nsd_child_event_base(); 3375 if(!event_base) { 3376 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 3377 return; 3378 } 3379 /* register tcp connections */ 3380 for(p = tcp_active_list; p != NULL; p = p->next) { 3381 struct timeval timeout; 3382 int fd = p->event.ev_fd; 3383 #ifdef USE_MINI_EVENT 3384 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 3385 #else 3386 short event = p->event.ev_events & (EV_READ|EV_WRITE); 3387 #endif 3388 void (*fn)(int, short, void*); 3389 #ifdef HAVE_SSL 3390 if(p->tls) { 3391 if((event&EV_READ)) 3392 fn = handle_tls_reading; 3393 else fn = handle_tls_writing; 3394 } else { 3395 #endif 3396 if((event&EV_READ)) 3397 fn = handle_tcp_reading; 3398 else fn = handle_tcp_writing; 3399 #ifdef HAVE_SSL 3400 } 3401 #endif 3402 3403 p->tcp_no_more_queries = 1; 3404 /* set timeout to 1/10 second */ 3405 if(p->tcp_timeout > 100) 3406 p->tcp_timeout = 100; 3407 timeout.tv_sec = p->tcp_timeout / 1000; 3408 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 3409 event_del(&p->event); 3410 memset(&p->event, 0, sizeof(p->event)); 3411 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 3412 fn, p); 3413 if(event_base_set(event_base, &p->event) != 0) 3414 log_msg(LOG_ERR, "event base set failed"); 3415 if(event_add(&p->event, &timeout) != 0) 3416 log_msg(LOG_ERR, "event add failed"); 3417 } 3418 3419 /* handle it */ 3420 while(nsd->current_tcp_count > 0) { 3421 mode_t m = server_signal_mode(nsd); 3422 struct event timeout; 3423 struct timeval tv; 3424 int timed_out = 0; 3425 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 3426 m == NSD_REAP_CHILDREN) { 3427 /* quit */ 3428 break; 3429 } 3430 /* timer */ 3431 /* have to do something every second */ 3432 tv.tv_sec = 1; 3433 tv.tv_usec = 0; 3434 memset(&timeout, 0, sizeof(timeout)); 3435 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 3436 &timed_out); 3437 if(event_base_set(event_base, &timeout) != 0) 3438 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 3439 if(event_add(&timeout, &tv) != 0) 3440 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 3441 3442 /* service loop */ 3443 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 3444 if (errno != EINTR) { 3445 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 3446 break; 3447 } 3448 } 3449 if(!timed_out) { 3450 event_del(&timeout); 3451 } else { 3452 /* timed out, quit */ 3453 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 3454 break; 3455 } 3456 } 3457 #ifdef MEMCLEAN 3458 event_base_free(event_base); 3459 #endif 3460 /* continue to quit after return */ 3461 } 3462 3463 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 3464 * are always used, even if nonblocking operations are broken, in which case 3465 * NUM_RECV_PER_SELECT is defined to 1 (one). 3466 */ 3467 #if defined(HAVE_RECVMMSG) 3468 #define nsd_recvmmsg recvmmsg 3469 #else /* !HAVE_RECVMMSG */ 3470 3471 static int 3472 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 3473 int flags, struct timespec *timeout) 3474 { 3475 unsigned int vpos = 0; 3476 ssize_t rcvd; 3477 3478 /* timeout is ignored, ensure caller does not expect it to work */ 3479 assert(timeout == NULL); (void)timeout; 3480 3481 while(vpos < vlen) { 3482 rcvd = recvfrom(sockfd, 3483 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3484 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3485 flags, 3486 msgvec[vpos].msg_hdr.msg_name, 3487 &msgvec[vpos].msg_hdr.msg_namelen); 3488 if(rcvd < 0) { 3489 break; 3490 } else { 3491 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 3492 msgvec[vpos].msg_len = (unsigned int)rcvd; 3493 vpos++; 3494 } 3495 } 3496 3497 if(vpos) { 3498 /* error will be picked up next time */ 3499 return (int)vpos; 3500 } else if(errno == 0) { 3501 return 0; 3502 } else if(errno == EAGAIN) { 3503 return 0; 3504 } 3505 3506 return -1; 3507 } 3508 #endif /* HAVE_RECVMMSG */ 3509 3510 #ifdef HAVE_SENDMMSG 3511 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 3512 #else /* !HAVE_SENDMMSG */ 3513 3514 static int 3515 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 3516 { 3517 unsigned int vpos = 0; 3518 ssize_t snd; 3519 3520 while(vpos < vlen) { 3521 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3522 snd = sendto(sockfd, 3523 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3524 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3525 flags, 3526 msgvec[vpos].msg_hdr.msg_name, 3527 msgvec[vpos].msg_hdr.msg_namelen); 3528 if(snd < 0) { 3529 break; 3530 } else { 3531 msgvec[vpos].msg_len = (unsigned int)snd; 3532 vpos++; 3533 } 3534 } 3535 3536 if(vpos) { 3537 return (int)vpos; 3538 } else if(errno == 0) { 3539 return 0; 3540 } 3541 3542 return -1; 3543 } 3544 #endif /* HAVE_SENDMMSG */ 3545 3546 static int 3547 port_is_zero( 3548 #ifdef INET6 3549 struct sockaddr_storage *addr 3550 #else 3551 struct sockaddr_in *addr 3552 #endif 3553 ) 3554 { 3555 #ifdef INET6 3556 if(addr->ss_family == AF_INET6) { 3557 return (((struct sockaddr_in6 *)addr)->sin6_port) == 0; 3558 } else if(addr->ss_family == AF_INET) { 3559 return (((struct sockaddr_in *)addr)->sin_port) == 0; 3560 } 3561 return 0; 3562 #else 3563 if(addr->sin_family == AF_INET) { 3564 return addr->sin_port == 0; 3565 } 3566 return 0; 3567 #endif 3568 } 3569 3570 static void 3571 handle_udp(int fd, short event, void* arg) 3572 { 3573 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3574 int received, sent, recvcount, i; 3575 struct query *q; 3576 uint32_t now = 0; 3577 3578 if (!(event & EV_READ)) { 3579 return; 3580 } 3581 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3582 /* this printf strangely gave a performance increase on Linux */ 3583 /* printf("recvcount %d \n", recvcount); */ 3584 if (recvcount == -1) { 3585 if (errno != EAGAIN && errno != EINTR) { 3586 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3587 STATUP(data->nsd, rxerr); 3588 /* No zone statup */ 3589 } 3590 /* Simply no data available */ 3591 return; 3592 } 3593 for (i = 0; i < recvcount; i++) { 3594 loopstart: 3595 received = msgs[i].msg_len; 3596 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3597 q = queries[i]; 3598 if (received == -1) { 3599 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3600 #if defined(HAVE_RECVMMSG) 3601 msgs[i].msg_hdr.msg_flags 3602 #else 3603 errno 3604 #endif 3605 )); 3606 STATUP(data->nsd, rxerr); 3607 /* No zone statup */ 3608 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3609 iovecs[i].iov_len = buffer_remaining(q->packet); 3610 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3611 goto swap_drop; 3612 } 3613 3614 /* Account... */ 3615 #ifdef BIND8_STATS 3616 if (data->socket->addr.ai_family == AF_INET) { 3617 STATUP(data->nsd, qudp); 3618 } else if (data->socket->addr.ai_family == AF_INET6) { 3619 STATUP(data->nsd, qudp6); 3620 } 3621 #endif 3622 3623 buffer_skip(q->packet, received); 3624 buffer_flip(q->packet); 3625 #ifdef USE_DNSTAP 3626 /* 3627 * sending UDP-query with server address (local) and client address to dnstap process 3628 */ 3629 log_addr("query from client", &q->addr); 3630 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3631 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->addr, q->addrlen, 3632 q->tcp, q->packet); 3633 #endif /* USE_DNSTAP */ 3634 3635 /* Process and answer the query... */ 3636 if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) { 3637 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3638 STATUP(data->nsd, nona); 3639 ZTATUP(data->nsd, q->zone, nona); 3640 } 3641 3642 #ifdef USE_ZONE_STATS 3643 if (data->socket->addr.ai_family == AF_INET) { 3644 ZTATUP(data->nsd, q->zone, qudp); 3645 } else if (data->socket->addr.ai_family == AF_INET6) { 3646 ZTATUP(data->nsd, q->zone, qudp6); 3647 } 3648 #endif 3649 3650 /* Add EDNS0 and TSIG info if necessary. */ 3651 query_add_optional(q, data->nsd, &now); 3652 3653 buffer_flip(q->packet); 3654 iovecs[i].iov_len = buffer_remaining(q->packet); 3655 #ifdef BIND8_STATS 3656 /* Account the rcode & TC... */ 3657 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3658 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3659 if (TC(q->packet)) { 3660 STATUP(data->nsd, truncated); 3661 ZTATUP(data->nsd, q->zone, truncated); 3662 } 3663 #endif /* BIND8_STATS */ 3664 #ifdef USE_DNSTAP 3665 /* 3666 * sending UDP-response with server address (local) and client address to dnstap process 3667 */ 3668 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 3669 log_addr("response to client", &q->addr); 3670 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, 3671 &q->addr, q->addrlen, q->tcp, q->packet, 3672 q->zone); 3673 #endif /* USE_DNSTAP */ 3674 } else { 3675 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3676 iovecs[i].iov_len = buffer_remaining(q->packet); 3677 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3678 swap_drop: 3679 STATUP(data->nsd, dropped); 3680 ZTATUP(data->nsd, q->zone, dropped); 3681 if(i != recvcount-1) { 3682 /* swap with last and decrease recvcount */ 3683 struct mmsghdr mtmp = msgs[i]; 3684 struct iovec iotmp = iovecs[i]; 3685 recvcount--; 3686 msgs[i] = msgs[recvcount]; 3687 iovecs[i] = iovecs[recvcount]; 3688 queries[i] = queries[recvcount]; 3689 msgs[recvcount] = mtmp; 3690 iovecs[recvcount] = iotmp; 3691 queries[recvcount] = q; 3692 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3693 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3694 goto loopstart; 3695 } else { recvcount --; } 3696 } 3697 } 3698 3699 /* send until all are sent */ 3700 i = 0; 3701 while(i<recvcount) { 3702 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3703 if(sent == -1) { 3704 if(errno == ENOBUFS || 3705 #ifdef EWOULDBLOCK 3706 errno == EWOULDBLOCK || 3707 #endif 3708 errno == EAGAIN) { 3709 /* block to wait until send buffer avail */ 3710 int flag, errstore; 3711 if((flag = fcntl(fd, F_GETFL)) == -1) { 3712 log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno)); 3713 flag = 0; 3714 } 3715 flag &= ~O_NONBLOCK; 3716 if(fcntl(fd, F_SETFL, flag) == -1) 3717 log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno)); 3718 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3719 errstore = errno; 3720 flag |= O_NONBLOCK; 3721 if(fcntl(fd, F_SETFL, flag) == -1) 3722 log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno)); 3723 if(sent != -1) { 3724 i += sent; 3725 continue; 3726 } 3727 errno = errstore; 3728 } 3729 if(errno == EINVAL) { 3730 /* skip the invalid argument entry, 3731 * send the remaining packets in the list */ 3732 if(!(port_is_zero((void*)&queries[i]->addr) && 3733 verbosity < 3)) { 3734 const char* es = strerror(errno); 3735 char a[64]; 3736 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3737 log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3738 } 3739 i += 1; 3740 continue; 3741 } 3742 /* don't log transient network full errors, unless 3743 * on higher verbosity */ 3744 if(!(errno == ENOBUFS && verbosity < 1) && 3745 #ifdef EWOULDBLOCK 3746 errno != EWOULDBLOCK && 3747 #endif 3748 errno != EAGAIN) { 3749 const char* es = strerror(errno); 3750 char a[64]; 3751 addrport2str((void*)&queries[i]->addr, a, sizeof(a)); 3752 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3753 } 3754 #ifdef BIND8_STATS 3755 data->nsd->st.txerr += recvcount-i; 3756 #endif /* BIND8_STATS */ 3757 break; 3758 } 3759 i += sent; 3760 } 3761 for(i=0; i<recvcount; i++) { 3762 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3763 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3764 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3765 } 3766 } 3767 3768 #ifdef HAVE_SSL 3769 /* 3770 * Setup an event for the tcp handler. 3771 */ 3772 static void 3773 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3774 int fd, short event) 3775 { 3776 struct timeval timeout; 3777 struct event_base* ev_base; 3778 3779 timeout.tv_sec = data->nsd->tcp_timeout; 3780 timeout.tv_usec = 0L; 3781 3782 ev_base = data->event.ev_base; 3783 event_del(&data->event); 3784 memset(&data->event, 0, sizeof(data->event)); 3785 event_set(&data->event, fd, event, fn, data); 3786 if(event_base_set(ev_base, &data->event) != 0) 3787 log_msg(LOG_ERR, "event base set failed"); 3788 if(event_add(&data->event, &timeout) != 0) 3789 log_msg(LOG_ERR, "event add failed"); 3790 } 3791 #endif /* HAVE_SSL */ 3792 3793 static void 3794 cleanup_tcp_handler(struct tcp_handler_data* data) 3795 { 3796 event_del(&data->event); 3797 #ifdef HAVE_SSL 3798 if(data->tls) { 3799 SSL_shutdown(data->tls); 3800 SSL_free(data->tls); 3801 data->tls = NULL; 3802 } 3803 #endif 3804 close(data->event.ev_fd); 3805 if(data->prev) 3806 data->prev->next = data->next; 3807 else tcp_active_list = data->next; 3808 if(data->next) 3809 data->next->prev = data->prev; 3810 3811 /* 3812 * Enable the TCP accept handlers when the current number of 3813 * TCP connections is about to drop below the maximum number 3814 * of TCP connections. 3815 */ 3816 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3817 configure_handler_event_types(EV_READ|EV_PERSIST); 3818 if(slowaccept) { 3819 event_del(&slowaccept_event); 3820 slowaccept = 0; 3821 } 3822 } 3823 --data->nsd->current_tcp_count; 3824 assert(data->nsd->current_tcp_count >= 0); 3825 3826 region_destroy(data->region); 3827 } 3828 3829 static void 3830 handle_tcp_reading(int fd, short event, void* arg) 3831 { 3832 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3833 ssize_t received; 3834 struct event_base* ev_base; 3835 struct timeval timeout; 3836 uint32_t now = 0; 3837 3838 if ((event & EV_TIMEOUT)) { 3839 /* Connection timed out. */ 3840 cleanup_tcp_handler(data); 3841 return; 3842 } 3843 3844 if ((data->nsd->tcp_query_count > 0 && 3845 data->query_count >= data->nsd->tcp_query_count) || 3846 data->tcp_no_more_queries) { 3847 /* No more queries allowed on this tcp connection. */ 3848 cleanup_tcp_handler(data); 3849 return; 3850 } 3851 3852 assert((event & EV_READ)); 3853 3854 if (data->bytes_transmitted == 0) { 3855 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3856 } 3857 3858 /* 3859 * Check if we received the leading packet length bytes yet. 3860 */ 3861 if (data->bytes_transmitted < sizeof(uint16_t)) { 3862 received = read(fd, 3863 (char *) &data->query->tcplen 3864 + data->bytes_transmitted, 3865 sizeof(uint16_t) - data->bytes_transmitted); 3866 if (received == -1) { 3867 if (errno == EAGAIN || errno == EINTR) { 3868 /* 3869 * Read would block, wait until more 3870 * data is available. 3871 */ 3872 return; 3873 } else { 3874 char buf[48]; 3875 addr2str(&data->query->addr, buf, sizeof(buf)); 3876 #ifdef ECONNRESET 3877 if (verbosity >= 2 || errno != ECONNRESET) 3878 #endif /* ECONNRESET */ 3879 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3880 cleanup_tcp_handler(data); 3881 return; 3882 } 3883 } else if (received == 0) { 3884 /* EOF */ 3885 cleanup_tcp_handler(data); 3886 return; 3887 } 3888 3889 data->bytes_transmitted += received; 3890 if (data->bytes_transmitted < sizeof(uint16_t)) { 3891 /* 3892 * Not done with the tcplen yet, wait for more 3893 * data to become available. 3894 */ 3895 return; 3896 } 3897 3898 assert(data->bytes_transmitted == sizeof(uint16_t)); 3899 3900 data->query->tcplen = ntohs(data->query->tcplen); 3901 3902 /* 3903 * Minimum query size is: 3904 * 3905 * Size of the header (12) 3906 * + Root domain name (1) 3907 * + Query class (2) 3908 * + Query type (2) 3909 */ 3910 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3911 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3912 cleanup_tcp_handler(data); 3913 return; 3914 } 3915 3916 if (data->query->tcplen > data->query->maxlen) { 3917 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3918 cleanup_tcp_handler(data); 3919 return; 3920 } 3921 3922 buffer_set_limit(data->query->packet, data->query->tcplen); 3923 } 3924 3925 assert(buffer_remaining(data->query->packet) > 0); 3926 3927 /* Read the (remaining) query data. */ 3928 received = read(fd, 3929 buffer_current(data->query->packet), 3930 buffer_remaining(data->query->packet)); 3931 if (received == -1) { 3932 if (errno == EAGAIN || errno == EINTR) { 3933 /* 3934 * Read would block, wait until more data is 3935 * available. 3936 */ 3937 return; 3938 } else { 3939 char buf[48]; 3940 addr2str(&data->query->addr, buf, sizeof(buf)); 3941 #ifdef ECONNRESET 3942 if (verbosity >= 2 || errno != ECONNRESET) 3943 #endif /* ECONNRESET */ 3944 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3945 cleanup_tcp_handler(data); 3946 return; 3947 } 3948 } else if (received == 0) { 3949 /* EOF */ 3950 cleanup_tcp_handler(data); 3951 return; 3952 } 3953 3954 data->bytes_transmitted += received; 3955 buffer_skip(data->query->packet, received); 3956 if (buffer_remaining(data->query->packet) > 0) { 3957 /* 3958 * Message not yet complete, wait for more data to 3959 * become available. 3960 */ 3961 return; 3962 } 3963 3964 assert(buffer_position(data->query->packet) == data->query->tcplen); 3965 3966 /* Account... */ 3967 #ifdef BIND8_STATS 3968 #ifndef INET6 3969 STATUP(data->nsd, ctcp); 3970 #else 3971 if (data->query->addr.ss_family == AF_INET) { 3972 STATUP(data->nsd, ctcp); 3973 } else if (data->query->addr.ss_family == AF_INET6) { 3974 STATUP(data->nsd, ctcp6); 3975 } 3976 #endif 3977 #endif /* BIND8_STATS */ 3978 3979 /* We have a complete query, process it. */ 3980 3981 /* tcp-query-count: handle query counter ++ */ 3982 data->query_count++; 3983 3984 buffer_flip(data->query->packet); 3985 #ifdef USE_DNSTAP 3986 /* 3987 * and send TCP-query with found address (local) and client address to dnstap process 3988 */ 3989 log_addr("query from client", &data->query->addr); 3990 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 3991 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 3992 data->query->addrlen, data->query->tcp, data->query->packet); 3993 #endif /* USE_DNSTAP */ 3994 data->query_state = server_process_query(data->nsd, data->query, &now); 3995 if (data->query_state == QUERY_DISCARDED) { 3996 /* Drop the packet and the entire connection... */ 3997 STATUP(data->nsd, dropped); 3998 ZTATUP(data->nsd, data->query->zone, dropped); 3999 cleanup_tcp_handler(data); 4000 return; 4001 } 4002 4003 #ifdef BIND8_STATS 4004 if (RCODE(data->query->packet) == RCODE_OK 4005 && !AA(data->query->packet)) 4006 { 4007 STATUP(data->nsd, nona); 4008 ZTATUP(data->nsd, data->query->zone, nona); 4009 } 4010 #endif /* BIND8_STATS */ 4011 4012 #ifdef USE_ZONE_STATS 4013 #ifndef INET6 4014 ZTATUP(data->nsd, data->query->zone, ctcp); 4015 #else 4016 if (data->query->addr.ss_family == AF_INET) { 4017 ZTATUP(data->nsd, data->query->zone, ctcp); 4018 } else if (data->query->addr.ss_family == AF_INET6) { 4019 ZTATUP(data->nsd, data->query->zone, ctcp6); 4020 } 4021 #endif 4022 #endif /* USE_ZONE_STATS */ 4023 4024 query_add_optional(data->query, data->nsd, &now); 4025 4026 /* Switch to the tcp write handler. */ 4027 buffer_flip(data->query->packet); 4028 data->query->tcplen = buffer_remaining(data->query->packet); 4029 #ifdef BIND8_STATS 4030 /* Account the rcode & TC... */ 4031 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4032 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4033 if (TC(data->query->packet)) { 4034 STATUP(data->nsd, truncated); 4035 ZTATUP(data->nsd, data->query->zone, truncated); 4036 } 4037 #endif /* BIND8_STATS */ 4038 #ifdef USE_DNSTAP 4039 /* 4040 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4041 */ 4042 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4043 log_addr("response to client", &data->query->addr); 4044 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4045 data->query->addrlen, data->query->tcp, data->query->packet, 4046 data->query->zone); 4047 #endif /* USE_DNSTAP */ 4048 data->bytes_transmitted = 0; 4049 4050 timeout.tv_sec = data->tcp_timeout / 1000; 4051 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4052 4053 ev_base = data->event.ev_base; 4054 event_del(&data->event); 4055 memset(&data->event, 0, sizeof(data->event)); 4056 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4057 handle_tcp_reading, data); 4058 if(event_base_set(ev_base, &data->event) != 0) 4059 log_msg(LOG_ERR, "event base set tcpr failed"); 4060 if(event_add(&data->event, &timeout) != 0) 4061 log_msg(LOG_ERR, "event add tcpr failed"); 4062 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4063 handle_tcp_writing(fd, EV_WRITE, data); 4064 } 4065 4066 static void 4067 handle_tcp_writing(int fd, short event, void* arg) 4068 { 4069 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4070 ssize_t sent; 4071 struct query *q = data->query; 4072 struct timeval timeout; 4073 struct event_base* ev_base; 4074 uint32_t now = 0; 4075 4076 if ((event & EV_TIMEOUT)) { 4077 /* Connection timed out. */ 4078 cleanup_tcp_handler(data); 4079 return; 4080 } 4081 4082 assert((event & EV_WRITE)); 4083 4084 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4085 /* Writing the response packet length. */ 4086 uint16_t n_tcplen = htons(q->tcplen); 4087 #ifdef HAVE_WRITEV 4088 struct iovec iov[2]; 4089 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 4090 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 4091 iov[1].iov_base = buffer_begin(q->packet); 4092 iov[1].iov_len = buffer_limit(q->packet); 4093 sent = writev(fd, iov, 2); 4094 #else /* HAVE_WRITEV */ 4095 sent = write(fd, 4096 (const char *) &n_tcplen + data->bytes_transmitted, 4097 sizeof(n_tcplen) - data->bytes_transmitted); 4098 #endif /* HAVE_WRITEV */ 4099 if (sent == -1) { 4100 if (errno == EAGAIN || errno == EINTR) { 4101 /* 4102 * Write would block, wait until 4103 * socket becomes writable again. 4104 */ 4105 return; 4106 } else { 4107 #ifdef ECONNRESET 4108 if(verbosity >= 2 || errno != ECONNRESET) 4109 #endif /* ECONNRESET */ 4110 #ifdef EPIPE 4111 if(verbosity >= 2 || errno != EPIPE) 4112 #endif /* EPIPE 'broken pipe' */ 4113 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4114 cleanup_tcp_handler(data); 4115 return; 4116 } 4117 } 4118 4119 data->bytes_transmitted += sent; 4120 if (data->bytes_transmitted < sizeof(q->tcplen)) { 4121 /* 4122 * Writing not complete, wait until socket 4123 * becomes writable again. 4124 */ 4125 return; 4126 } 4127 4128 #ifdef HAVE_WRITEV 4129 sent -= sizeof(n_tcplen); 4130 /* handle potential 'packet done' code */ 4131 goto packet_could_be_done; 4132 #endif 4133 } 4134 4135 sent = write(fd, 4136 buffer_current(q->packet), 4137 buffer_remaining(q->packet)); 4138 if (sent == -1) { 4139 if (errno == EAGAIN || errno == EINTR) { 4140 /* 4141 * Write would block, wait until 4142 * socket becomes writable again. 4143 */ 4144 return; 4145 } else { 4146 #ifdef ECONNRESET 4147 if(verbosity >= 2 || errno != ECONNRESET) 4148 #endif /* ECONNRESET */ 4149 #ifdef EPIPE 4150 if(verbosity >= 2 || errno != EPIPE) 4151 #endif /* EPIPE 'broken pipe' */ 4152 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 4153 cleanup_tcp_handler(data); 4154 return; 4155 } 4156 } 4157 4158 data->bytes_transmitted += sent; 4159 #ifdef HAVE_WRITEV 4160 packet_could_be_done: 4161 #endif 4162 buffer_skip(q->packet, sent); 4163 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4164 /* 4165 * Still more data to write when socket becomes 4166 * writable again. 4167 */ 4168 return; 4169 } 4170 4171 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4172 4173 if (data->query_state == QUERY_IN_AXFR || 4174 data->query_state == QUERY_IN_IXFR) { 4175 /* Continue processing AXFR and writing back results. */ 4176 buffer_clear(q->packet); 4177 if(data->query_state == QUERY_IN_AXFR) 4178 data->query_state = query_axfr(data->nsd, q, 0); 4179 else data->query_state = query_ixfr(data->nsd, q); 4180 if (data->query_state != QUERY_PROCESSED) { 4181 query_add_optional(data->query, data->nsd, &now); 4182 4183 /* Reset data. */ 4184 buffer_flip(q->packet); 4185 q->tcplen = buffer_remaining(q->packet); 4186 data->bytes_transmitted = 0; 4187 /* Reset timeout. */ 4188 timeout.tv_sec = data->tcp_timeout / 1000; 4189 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4190 ev_base = data->event.ev_base; 4191 event_del(&data->event); 4192 memset(&data->event, 0, sizeof(data->event)); 4193 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 4194 handle_tcp_writing, data); 4195 if(event_base_set(ev_base, &data->event) != 0) 4196 log_msg(LOG_ERR, "event base set tcpw failed"); 4197 if(event_add(&data->event, &timeout) != 0) 4198 log_msg(LOG_ERR, "event add tcpw failed"); 4199 4200 /* 4201 * Write data if/when the socket is writable 4202 * again. 4203 */ 4204 return; 4205 } 4206 } 4207 4208 /* 4209 * Done sending, wait for the next request to arrive on the 4210 * TCP socket by installing the TCP read handler. 4211 */ 4212 if ((data->nsd->tcp_query_count > 0 && 4213 data->query_count >= data->nsd->tcp_query_count) || 4214 data->tcp_no_more_queries) { 4215 4216 (void) shutdown(fd, SHUT_WR); 4217 } 4218 4219 data->bytes_transmitted = 0; 4220 4221 timeout.tv_sec = data->tcp_timeout / 1000; 4222 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 4223 ev_base = data->event.ev_base; 4224 event_del(&data->event); 4225 memset(&data->event, 0, sizeof(data->event)); 4226 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 4227 handle_tcp_reading, data); 4228 if(event_base_set(ev_base, &data->event) != 0) 4229 log_msg(LOG_ERR, "event base set tcpw failed"); 4230 if(event_add(&data->event, &timeout) != 0) 4231 log_msg(LOG_ERR, "event add tcpw failed"); 4232 } 4233 4234 #ifdef HAVE_SSL 4235 /** create SSL object and associate fd */ 4236 static SSL* 4237 incoming_ssl_fd(SSL_CTX* ctx, int fd) 4238 { 4239 SSL* ssl = SSL_new((SSL_CTX*)ctx); 4240 if(!ssl) { 4241 log_crypto_err("could not SSL_new"); 4242 return NULL; 4243 } 4244 SSL_set_accept_state(ssl); 4245 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 4246 if(!SSL_set_fd(ssl, fd)) { 4247 log_crypto_err("could not SSL_set_fd"); 4248 SSL_free(ssl); 4249 return NULL; 4250 } 4251 return ssl; 4252 } 4253 4254 /** TLS handshake to upgrade TCP connection */ 4255 static int 4256 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 4257 { 4258 int r; 4259 if(data->shake_state == tls_hs_read_event) { 4260 /* read condition satisfied back to writing */ 4261 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4262 data->shake_state = tls_hs_none; 4263 return 1; 4264 } 4265 if(data->shake_state == tls_hs_write_event) { 4266 /* write condition satisfied back to reading */ 4267 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4268 data->shake_state = tls_hs_none; 4269 return 1; 4270 } 4271 4272 /* (continue to) setup the TLS connection */ 4273 ERR_clear_error(); 4274 r = SSL_do_handshake(data->tls); 4275 4276 if(r != 1) { 4277 int want = SSL_get_error(data->tls, r); 4278 if(want == SSL_ERROR_WANT_READ) { 4279 if(data->shake_state == tls_hs_read) { 4280 /* try again later */ 4281 return 1; 4282 } 4283 data->shake_state = tls_hs_read; 4284 /* switch back to reading mode */ 4285 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4286 return 1; 4287 } else if(want == SSL_ERROR_WANT_WRITE) { 4288 if(data->shake_state == tls_hs_write) { 4289 /* try again later */ 4290 return 1; 4291 } 4292 data->shake_state = tls_hs_write; 4293 /* switch back to writing mode */ 4294 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4295 return 1; 4296 } else { 4297 if(r == 0) 4298 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 4299 else { 4300 unsigned long err = ERR_get_error(); 4301 if(!squelch_err_ssl_handshake(err)) { 4302 char a[64], s[256]; 4303 addr2str(&data->query->addr, a, sizeof(a)); 4304 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 4305 log_crypto_from_err(s, err); 4306 } 4307 } 4308 cleanup_tcp_handler(data); 4309 return 0; 4310 } 4311 } 4312 4313 /* Use to log successful upgrade for testing - could be removed*/ 4314 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 4315 /* set back to the event we need to have when reading (or writing) */ 4316 if(data->shake_state == tls_hs_read && writing) { 4317 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 4318 } else if(data->shake_state == tls_hs_write && !writing) { 4319 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 4320 } 4321 data->shake_state = tls_hs_none; 4322 return 1; 4323 } 4324 4325 /** handle TLS reading of incoming query */ 4326 static void 4327 handle_tls_reading(int fd, short event, void* arg) 4328 { 4329 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4330 ssize_t received; 4331 uint32_t now = 0; 4332 4333 if ((event & EV_TIMEOUT)) { 4334 /* Connection timed out. */ 4335 cleanup_tcp_handler(data); 4336 return; 4337 } 4338 4339 if ((data->nsd->tcp_query_count > 0 && 4340 data->query_count >= data->nsd->tcp_query_count) || 4341 data->tcp_no_more_queries) { 4342 /* No more queries allowed on this tcp connection. */ 4343 cleanup_tcp_handler(data); 4344 return; 4345 } 4346 4347 assert((event & EV_READ)); 4348 4349 if (data->bytes_transmitted == 0) { 4350 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 4351 } 4352 4353 if(data->shake_state != tls_hs_none) { 4354 if(!tls_handshake(data, fd, 0)) 4355 return; 4356 if(data->shake_state != tls_hs_none) 4357 return; 4358 } 4359 4360 /* 4361 * Check if we received the leading packet length bytes yet. 4362 */ 4363 if(data->bytes_transmitted < sizeof(uint16_t)) { 4364 ERR_clear_error(); 4365 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 4366 + data->bytes_transmitted, 4367 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 4368 int want = SSL_get_error(data->tls, received); 4369 if(want == SSL_ERROR_ZERO_RETURN) { 4370 cleanup_tcp_handler(data); 4371 return; /* shutdown, closed */ 4372 } else if(want == SSL_ERROR_WANT_READ) { 4373 /* wants to be called again */ 4374 return; 4375 } 4376 else if(want == SSL_ERROR_WANT_WRITE) { 4377 /* switch to writing */ 4378 data->shake_state = tls_hs_write_event; 4379 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4380 return; 4381 } 4382 cleanup_tcp_handler(data); 4383 log_crypto_err("could not SSL_read"); 4384 return; 4385 } 4386 4387 data->bytes_transmitted += received; 4388 if (data->bytes_transmitted < sizeof(uint16_t)) { 4389 /* 4390 * Not done with the tcplen yet, wait for more 4391 * data to become available. 4392 */ 4393 return; 4394 } 4395 4396 assert(data->bytes_transmitted == sizeof(uint16_t)); 4397 4398 data->query->tcplen = ntohs(data->query->tcplen); 4399 4400 /* 4401 * Minimum query size is: 4402 * 4403 * Size of the header (12) 4404 * + Root domain name (1) 4405 * + Query class (2) 4406 * + Query type (2) 4407 */ 4408 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 4409 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 4410 cleanup_tcp_handler(data); 4411 return; 4412 } 4413 4414 if (data->query->tcplen > data->query->maxlen) { 4415 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 4416 cleanup_tcp_handler(data); 4417 return; 4418 } 4419 4420 buffer_set_limit(data->query->packet, data->query->tcplen); 4421 } 4422 4423 assert(buffer_remaining(data->query->packet) > 0); 4424 4425 /* Read the (remaining) query data. */ 4426 ERR_clear_error(); 4427 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 4428 (int)buffer_remaining(data->query->packet)); 4429 if(received <= 0) { 4430 int want = SSL_get_error(data->tls, received); 4431 if(want == SSL_ERROR_ZERO_RETURN) { 4432 cleanup_tcp_handler(data); 4433 return; /* shutdown, closed */ 4434 } else if(want == SSL_ERROR_WANT_READ) { 4435 /* wants to be called again */ 4436 return; 4437 } 4438 else if(want == SSL_ERROR_WANT_WRITE) { 4439 /* switch back writing */ 4440 data->shake_state = tls_hs_write_event; 4441 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4442 return; 4443 } 4444 cleanup_tcp_handler(data); 4445 log_crypto_err("could not SSL_read"); 4446 return; 4447 } 4448 4449 data->bytes_transmitted += received; 4450 buffer_skip(data->query->packet, received); 4451 if (buffer_remaining(data->query->packet) > 0) { 4452 /* 4453 * Message not yet complete, wait for more data to 4454 * become available. 4455 */ 4456 return; 4457 } 4458 4459 assert(buffer_position(data->query->packet) == data->query->tcplen); 4460 4461 /* Account... */ 4462 #ifndef INET6 4463 STATUP(data->nsd, ctls); 4464 #else 4465 if (data->query->addr.ss_family == AF_INET) { 4466 STATUP(data->nsd, ctls); 4467 } else if (data->query->addr.ss_family == AF_INET6) { 4468 STATUP(data->nsd, ctls6); 4469 } 4470 #endif 4471 4472 /* We have a complete query, process it. */ 4473 4474 /* tcp-query-count: handle query counter ++ */ 4475 data->query_count++; 4476 4477 buffer_flip(data->query->packet); 4478 #ifdef USE_DNSTAP 4479 /* 4480 * and send TCP-query with found address (local) and client address to dnstap process 4481 */ 4482 log_addr("query from client", &data->query->addr); 4483 log_addr("to server (local)", (void*)&data->socket->addr.ai_addr); 4484 dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4485 data->query->addrlen, data->query->tcp, data->query->packet); 4486 #endif /* USE_DNSTAP */ 4487 data->query_state = server_process_query(data->nsd, data->query, &now); 4488 if (data->query_state == QUERY_DISCARDED) { 4489 /* Drop the packet and the entire connection... */ 4490 STATUP(data->nsd, dropped); 4491 ZTATUP(data->nsd, data->query->zone, dropped); 4492 cleanup_tcp_handler(data); 4493 return; 4494 } 4495 4496 #ifdef BIND8_STATS 4497 if (RCODE(data->query->packet) == RCODE_OK 4498 && !AA(data->query->packet)) 4499 { 4500 STATUP(data->nsd, nona); 4501 ZTATUP(data->nsd, data->query->zone, nona); 4502 } 4503 #endif /* BIND8_STATS */ 4504 4505 #ifdef USE_ZONE_STATS 4506 #ifndef INET6 4507 ZTATUP(data->nsd, data->query->zone, ctls); 4508 #else 4509 if (data->query->addr.ss_family == AF_INET) { 4510 ZTATUP(data->nsd, data->query->zone, ctls); 4511 } else if (data->query->addr.ss_family == AF_INET6) { 4512 ZTATUP(data->nsd, data->query->zone, ctls6); 4513 } 4514 #endif 4515 #endif /* USE_ZONE_STATS */ 4516 4517 query_add_optional(data->query, data->nsd, &now); 4518 4519 /* Switch to the tcp write handler. */ 4520 buffer_flip(data->query->packet); 4521 data->query->tcplen = buffer_remaining(data->query->packet); 4522 #ifdef BIND8_STATS 4523 /* Account the rcode & TC... */ 4524 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 4525 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 4526 if (TC(data->query->packet)) { 4527 STATUP(data->nsd, truncated); 4528 ZTATUP(data->nsd, data->query->zone, truncated); 4529 } 4530 #endif /* BIND8_STATS */ 4531 #ifdef USE_DNSTAP 4532 /* 4533 * sending TCP-response with found (earlier) address (local) and client address to dnstap process 4534 */ 4535 log_addr("from server (local)", (void*)&data->socket->addr.ai_addr); 4536 log_addr("response to client", &data->query->addr); 4537 dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr, 4538 data->query->addrlen, data->query->tcp, data->query->packet, 4539 data->query->zone); 4540 #endif /* USE_DNSTAP */ 4541 data->bytes_transmitted = 0; 4542 4543 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4544 4545 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 4546 handle_tls_writing(fd, EV_WRITE, data); 4547 } 4548 4549 /** handle TLS writing of outgoing response */ 4550 static void 4551 handle_tls_writing(int fd, short event, void* arg) 4552 { 4553 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 4554 ssize_t sent; 4555 struct query *q = data->query; 4556 /* static variable that holds reassembly buffer used to put the 4557 * TCP length in front of the packet, like writev. */ 4558 static buffer_type* global_tls_temp_buffer = NULL; 4559 buffer_type* write_buffer; 4560 uint32_t now = 0; 4561 4562 if ((event & EV_TIMEOUT)) { 4563 /* Connection timed out. */ 4564 cleanup_tcp_handler(data); 4565 return; 4566 } 4567 4568 assert((event & EV_WRITE)); 4569 4570 if(data->shake_state != tls_hs_none) { 4571 if(!tls_handshake(data, fd, 1)) 4572 return; 4573 if(data->shake_state != tls_hs_none) 4574 return; 4575 } 4576 4577 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 4578 4579 /* If we are writing the start of a message, we must include the length 4580 * this is done with a copy into write_buffer. */ 4581 write_buffer = NULL; 4582 if (data->bytes_transmitted == 0) { 4583 if(!global_tls_temp_buffer) { 4584 /* gets deallocated when nsd shuts down from 4585 * nsd.region */ 4586 global_tls_temp_buffer = buffer_create(nsd.region, 4587 QIOBUFSZ + sizeof(q->tcplen)); 4588 if (!global_tls_temp_buffer) { 4589 return; 4590 } 4591 } 4592 write_buffer = global_tls_temp_buffer; 4593 buffer_clear(write_buffer); 4594 buffer_write_u16(write_buffer, q->tcplen); 4595 buffer_write(write_buffer, buffer_current(q->packet), 4596 (int)buffer_remaining(q->packet)); 4597 buffer_flip(write_buffer); 4598 } else { 4599 write_buffer = q->packet; 4600 } 4601 4602 /* Write the response */ 4603 ERR_clear_error(); 4604 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 4605 if(sent <= 0) { 4606 int want = SSL_get_error(data->tls, sent); 4607 if(want == SSL_ERROR_ZERO_RETURN) { 4608 cleanup_tcp_handler(data); 4609 /* closed */ 4610 } else if(want == SSL_ERROR_WANT_READ) { 4611 /* switch back to reading */ 4612 data->shake_state = tls_hs_read_event; 4613 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4614 } else if(want != SSL_ERROR_WANT_WRITE) { 4615 cleanup_tcp_handler(data); 4616 log_crypto_err("could not SSL_write"); 4617 } 4618 return; 4619 } 4620 4621 buffer_skip(write_buffer, sent); 4622 if(buffer_remaining(write_buffer) != 0) { 4623 /* If not all sent, sync up the real buffer if it wasn't used.*/ 4624 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 4625 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 4626 } 4627 } 4628 4629 data->bytes_transmitted += sent; 4630 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4631 /* 4632 * Still more data to write when socket becomes 4633 * writable again. 4634 */ 4635 return; 4636 } 4637 4638 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4639 4640 if (data->query_state == QUERY_IN_AXFR || 4641 data->query_state == QUERY_IN_IXFR) { 4642 /* Continue processing AXFR and writing back results. */ 4643 buffer_clear(q->packet); 4644 if(data->query_state == QUERY_IN_AXFR) 4645 data->query_state = query_axfr(data->nsd, q, 0); 4646 else data->query_state = query_ixfr(data->nsd, q); 4647 if (data->query_state != QUERY_PROCESSED) { 4648 query_add_optional(data->query, data->nsd, &now); 4649 4650 /* Reset data. */ 4651 buffer_flip(q->packet); 4652 q->tcplen = buffer_remaining(q->packet); 4653 data->bytes_transmitted = 0; 4654 /* Reset to writing mode. */ 4655 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4656 4657 /* 4658 * Write data if/when the socket is writable 4659 * again. 4660 */ 4661 return; 4662 } 4663 } 4664 4665 /* 4666 * Done sending, wait for the next request to arrive on the 4667 * TCP socket by installing the TCP read handler. 4668 */ 4669 if ((data->nsd->tcp_query_count > 0 && 4670 data->query_count >= data->nsd->tcp_query_count) || 4671 data->tcp_no_more_queries) { 4672 4673 (void) shutdown(fd, SHUT_WR); 4674 } 4675 4676 data->bytes_transmitted = 0; 4677 4678 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4679 } 4680 #endif 4681 4682 static void 4683 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4684 void* ATTR_UNUSED(arg)) 4685 { 4686 if(slowaccept) { 4687 configure_handler_event_types(EV_PERSIST | EV_READ); 4688 slowaccept = 0; 4689 } 4690 } 4691 4692 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4693 { 4694 #ifndef HAVE_ACCEPT4 4695 int s = accept(fd, addr, addrlen); 4696 if (s != -1) { 4697 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4698 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4699 close(s); 4700 s = -1; 4701 errno=EINTR; /* stop error printout as error in accept4 4702 by setting this errno, it omits printout, in 4703 later code that calls nsd_accept4 */ 4704 } 4705 } 4706 return s; 4707 #else 4708 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4709 #endif /* HAVE_ACCEPT4 */ 4710 } 4711 4712 /* 4713 * Handle an incoming TCP connection. The connection is accepted and 4714 * a new TCP reader event handler is added. The TCP handler 4715 * is responsible for cleanup when the connection is closed. 4716 */ 4717 static void 4718 handle_tcp_accept(int fd, short event, void* arg) 4719 { 4720 struct tcp_accept_handler_data *data 4721 = (struct tcp_accept_handler_data *) arg; 4722 int s; 4723 int reject = 0; 4724 struct tcp_handler_data *tcp_data; 4725 region_type *tcp_region; 4726 #ifdef INET6 4727 struct sockaddr_storage addr; 4728 #else 4729 struct sockaddr_in addr; 4730 #endif 4731 socklen_t addrlen; 4732 struct timeval timeout; 4733 4734 if (!(event & EV_READ)) { 4735 return; 4736 } 4737 4738 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4739 reject = data->nsd->options->tcp_reject_overflow; 4740 if (!reject) { 4741 return; 4742 } 4743 } 4744 4745 /* Accept it... */ 4746 addrlen = sizeof(addr); 4747 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4748 if (s == -1) { 4749 /** 4750 * EMFILE and ENFILE is a signal that the limit of open 4751 * file descriptors has been reached. Pause accept(). 4752 * EINTR is a signal interrupt. The others are various OS ways 4753 * of saying that the client has closed the connection. 4754 */ 4755 if (errno == EMFILE || errno == ENFILE) { 4756 if (!slowaccept) { 4757 /* disable accept events */ 4758 struct timeval tv; 4759 configure_handler_event_types(0); 4760 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4761 tv.tv_usec = 0L; 4762 memset(&slowaccept_event, 0, 4763 sizeof(slowaccept_event)); 4764 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4765 handle_slowaccept_timeout, NULL); 4766 (void)event_base_set(data->event.ev_base, 4767 &slowaccept_event); 4768 (void)event_add(&slowaccept_event, &tv); 4769 slowaccept = 1; 4770 /* We don't want to spam the logs here */ 4771 } 4772 } else if (errno != EINTR 4773 && errno != EWOULDBLOCK 4774 #ifdef ECONNABORTED 4775 && errno != ECONNABORTED 4776 #endif /* ECONNABORTED */ 4777 #ifdef EPROTO 4778 && errno != EPROTO 4779 #endif /* EPROTO */ 4780 ) { 4781 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4782 } 4783 return; 4784 } 4785 4786 if (reject) { 4787 shutdown(s, SHUT_RDWR); 4788 close(s); 4789 return; 4790 } 4791 4792 /* 4793 * This region is deallocated when the TCP connection is 4794 * closed by the TCP handler. 4795 */ 4796 tcp_region = region_create(xalloc, free); 4797 tcp_data = (struct tcp_handler_data *) region_alloc( 4798 tcp_region, sizeof(struct tcp_handler_data)); 4799 tcp_data->region = tcp_region; 4800 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4801 compression_table_size, compressed_dnames); 4802 tcp_data->nsd = data->nsd; 4803 tcp_data->query_count = 0; 4804 #ifdef HAVE_SSL 4805 tcp_data->shake_state = tls_hs_none; 4806 tcp_data->tls = NULL; 4807 #endif 4808 tcp_data->prev = NULL; 4809 tcp_data->next = NULL; 4810 4811 tcp_data->query_state = QUERY_PROCESSED; 4812 tcp_data->bytes_transmitted = 0; 4813 memcpy(&tcp_data->query->addr, &addr, addrlen); 4814 tcp_data->query->addrlen = addrlen; 4815 4816 tcp_data->tcp_no_more_queries = 0; 4817 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4818 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4819 /* very busy, give smaller timeout */ 4820 tcp_data->tcp_timeout = 200; 4821 } 4822 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4823 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4824 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4825 4826 #ifdef USE_DNSTAP 4827 /* save the address of the connection */ 4828 tcp_data->socket = data->socket; 4829 #endif /* USE_DNSTAP */ 4830 4831 #ifdef HAVE_SSL 4832 if (data->tls_accept) { 4833 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4834 if(!tcp_data->tls) { 4835 close(s); 4836 return; 4837 } 4838 tcp_data->shake_state = tls_hs_read; 4839 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4840 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4841 handle_tls_reading, tcp_data); 4842 } else { 4843 #endif 4844 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4845 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4846 handle_tcp_reading, tcp_data); 4847 #ifdef HAVE_SSL 4848 } 4849 #endif 4850 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4851 log_msg(LOG_ERR, "cannot set tcp event base"); 4852 close(s); 4853 region_destroy(tcp_region); 4854 return; 4855 } 4856 if(event_add(&tcp_data->event, &timeout) != 0) { 4857 log_msg(LOG_ERR, "cannot add tcp to event base"); 4858 close(s); 4859 region_destroy(tcp_region); 4860 return; 4861 } 4862 if(tcp_active_list) { 4863 tcp_active_list->prev = tcp_data; 4864 tcp_data->next = tcp_active_list; 4865 } 4866 tcp_active_list = tcp_data; 4867 4868 /* 4869 * Keep track of the total number of TCP handlers installed so 4870 * we can stop accepting connections when the maximum number 4871 * of simultaneous TCP connections is reached. 4872 * 4873 * If tcp-reject-overflow is enabled, however, then we do not 4874 * change the handler event type; we keep it as-is and accept 4875 * overflow TCP connections only so that we can forcibly kill 4876 * them off. 4877 */ 4878 ++data->nsd->current_tcp_count; 4879 if (!data->nsd->options->tcp_reject_overflow && 4880 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4881 { 4882 configure_handler_event_types(0); 4883 } 4884 } 4885 4886 static void 4887 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4888 { 4889 size_t i; 4890 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4891 for (i = 0; i < nsd->child_count; ++i) { 4892 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4893 if (write(nsd->children[i].child_fd, 4894 &command, 4895 sizeof(command)) == -1) 4896 { 4897 if(errno != EAGAIN && errno != EINTR) 4898 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4899 (int) command, 4900 (int) nsd->children[i].pid, 4901 strerror(errno)); 4902 } else if (timeout > 0) { 4903 (void)block_read(NULL, 4904 nsd->children[i].child_fd, 4905 &command, sizeof(command), timeout); 4906 } 4907 fsync(nsd->children[i].child_fd); 4908 close(nsd->children[i].child_fd); 4909 nsd->children[i].child_fd = -1; 4910 } 4911 } 4912 } 4913 4914 static void 4915 send_children_quit(struct nsd* nsd) 4916 { 4917 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4918 send_children_command(nsd, NSD_QUIT, 0); 4919 } 4920 4921 static void 4922 send_children_quit_and_wait(struct nsd* nsd) 4923 { 4924 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4925 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4926 } 4927 4928 #ifdef BIND8_STATS 4929 static void 4930 set_children_stats(struct nsd* nsd) 4931 { 4932 size_t i; 4933 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4934 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4935 for (i = 0; i < nsd->child_count; ++i) { 4936 nsd->children[i].need_to_send_STATS = 1; 4937 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4938 } 4939 } 4940 #endif /* BIND8_STATS */ 4941 4942 static void 4943 configure_handler_event_types(short event_types) 4944 { 4945 size_t i; 4946 4947 for (i = 0; i < tcp_accept_handler_count; ++i) { 4948 struct event* handler = &tcp_accept_handlers[i].event; 4949 if(event_types) { 4950 /* reassign */ 4951 int fd = handler->ev_fd; 4952 struct event_base* base = handler->ev_base; 4953 if(tcp_accept_handlers[i].event_added) 4954 event_del(handler); 4955 memset(handler, 0, sizeof(*handler)); 4956 event_set(handler, fd, event_types, 4957 handle_tcp_accept, &tcp_accept_handlers[i]); 4958 if(event_base_set(base, handler) != 0) 4959 log_msg(LOG_ERR, "conhand: cannot event_base"); 4960 if(event_add(handler, NULL) != 0) 4961 log_msg(LOG_ERR, "conhand: cannot event_add"); 4962 tcp_accept_handlers[i].event_added = 1; 4963 } else { 4964 /* remove */ 4965 if(tcp_accept_handlers[i].event_added) { 4966 event_del(handler); 4967 tcp_accept_handlers[i].event_added = 0; 4968 } 4969 } 4970 } 4971 } 4972