1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifndef SHUT_WR 39 #define SHUT_WR 1 40 #endif 41 #ifdef HAVE_MMAP 42 #include <sys/mman.h> 43 #endif /* HAVE_MMAP */ 44 #ifdef HAVE_OPENSSL_RAND_H 45 #include <openssl/rand.h> 46 #endif 47 #ifdef HAVE_OPENSSL_SSL_H 48 #include <openssl/ssl.h> 49 #endif 50 #ifdef HAVE_OPENSSL_ERR_H 51 #include <openssl/err.h> 52 #endif 53 #ifdef HAVE_OPENSSL_OCSP_H 54 #include <openssl/ocsp.h> 55 #endif 56 #ifndef USE_MINI_EVENT 57 # ifdef HAVE_EVENT_H 58 # include <event.h> 59 # else 60 # include <event2/event.h> 61 # include "event2/event_struct.h" 62 # include "event2/event_compat.h" 63 # endif 64 #else 65 # include "mini_event.h" 66 #endif 67 68 #include "axfr.h" 69 #include "namedb.h" 70 #include "netio.h" 71 #include "xfrd.h" 72 #include "xfrd-tcp.h" 73 #include "xfrd-disk.h" 74 #include "difffile.h" 75 #include "nsec3.h" 76 #include "ipc.h" 77 #include "udb.h" 78 #include "remote.h" 79 #include "lookup3.h" 80 #include "rrl.h" 81 #ifdef USE_DNSTAP 82 #include "dnstap/dnstap_collector.h" 83 #endif 84 85 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 86 87 #ifdef USE_TCP_FASTOPEN 88 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 89 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 90 #endif 91 92 /* 93 * Data for the UDP handlers. 94 */ 95 struct udp_handler_data 96 { 97 struct nsd *nsd; 98 struct nsd_socket *socket; 99 struct event event; 100 }; 101 102 struct tcp_accept_handler_data { 103 struct nsd *nsd; 104 struct nsd_socket *socket; 105 int event_added; 106 struct event event; 107 #ifdef HAVE_SSL 108 /* handler accepts TLS connections on the dedicated port */ 109 int tls_accept; 110 #endif 111 }; 112 113 /* 114 * These globals are used to enable the TCP accept handlers 115 * when the number of TCP connection drops below the maximum 116 * number of TCP connections. 117 */ 118 static size_t tcp_accept_handler_count; 119 static struct tcp_accept_handler_data *tcp_accept_handlers; 120 121 static struct event slowaccept_event; 122 static int slowaccept; 123 124 #ifdef HAVE_SSL 125 static unsigned char *ocspdata = NULL; 126 static long ocspdata_len = 0; 127 #endif 128 129 #ifdef NONBLOCKING_IS_BROKEN 130 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 131 read multiple times from a socket when reported ready by select. */ 132 # define NUM_RECV_PER_SELECT (1) 133 #else /* !NONBLOCKING_IS_BROKEN */ 134 # define NUM_RECV_PER_SELECT (100) 135 #endif /* NONBLOCKING_IS_BROKEN */ 136 137 #ifndef HAVE_MMSGHDR 138 struct mmsghdr { 139 struct msghdr msg_hdr; 140 unsigned int msg_len; 141 }; 142 #endif 143 144 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 145 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 146 static struct query *queries[NUM_RECV_PER_SELECT]; 147 148 /* 149 * Data for the TCP connection handlers. 150 * 151 * The TCP handlers use non-blocking I/O. This is necessary to avoid 152 * blocking the entire server on a slow TCP connection, but does make 153 * reading from and writing to the socket more complicated. 154 * 155 * Basically, whenever a read/write would block (indicated by the 156 * EAGAIN errno variable) we remember the position we were reading 157 * from/writing to and return from the TCP reading/writing event 158 * handler. When the socket becomes readable/writable again we 159 * continue from the same position. 160 */ 161 struct tcp_handler_data 162 { 163 /* 164 * The region used to allocate all TCP connection related 165 * data, including this structure. This region is destroyed 166 * when the connection is closed. 167 */ 168 region_type* region; 169 170 /* 171 * The global nsd structure. 172 */ 173 struct nsd* nsd; 174 175 /* 176 * The current query data for this TCP connection. 177 */ 178 query_type* query; 179 180 /* 181 * The query_state is used to remember if we are performing an 182 * AXFR, if we're done processing, or if we should discard the 183 * query and connection. 184 */ 185 query_state_type query_state; 186 187 /* 188 * The event for the file descriptor and tcp timeout 189 */ 190 struct event event; 191 192 /* 193 * The bytes_transmitted field is used to remember the number 194 * of bytes transmitted when receiving or sending a DNS 195 * packet. The count includes the two additional bytes used 196 * to specify the packet length on a TCP connection. 197 */ 198 size_t bytes_transmitted; 199 200 /* 201 * The number of queries handled by this specific TCP connection. 202 */ 203 int query_count; 204 205 /* 206 * The timeout in msec for this tcp connection 207 */ 208 int tcp_timeout; 209 #ifdef HAVE_SSL 210 /* 211 * TLS object. 212 */ 213 SSL* tls; 214 215 /* 216 * TLS handshake state. 217 */ 218 enum { tls_hs_none, tls_hs_read, tls_hs_write, 219 tls_hs_read_event, tls_hs_write_event } shake_state; 220 #endif 221 /* list of connections, for service of remaining tcp channels */ 222 struct tcp_handler_data *prev, *next; 223 }; 224 /* global that is the list of active tcp channels */ 225 static struct tcp_handler_data *tcp_active_list = NULL; 226 227 /* 228 * Handle incoming queries on the UDP server sockets. 229 */ 230 static void handle_udp(int fd, short event, void* arg); 231 232 /* 233 * Handle incoming connections on the TCP sockets. These handlers 234 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 235 * connection) but are disabled when the number of current TCP 236 * connections is equal to the maximum number of TCP connections. 237 * Disabling is done by changing the handler to wait for the 238 * NETIO_EVENT_NONE type. This is done using the function 239 * configure_tcp_accept_handlers. 240 */ 241 static void handle_tcp_accept(int fd, short event, void* arg); 242 243 /* 244 * Handle incoming queries on a TCP connection. The TCP connections 245 * are configured to be non-blocking and the handler may be called 246 * multiple times before a complete query is received. 247 */ 248 static void handle_tcp_reading(int fd, short event, void* arg); 249 250 /* 251 * Handle outgoing responses on a TCP connection. The TCP connections 252 * are configured to be non-blocking and the handler may be called 253 * multiple times before a complete response is sent. 254 */ 255 static void handle_tcp_writing(int fd, short event, void* arg); 256 257 #ifdef HAVE_SSL 258 /* Create SSL object and associate fd */ 259 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 260 /* 261 * Handle TLS handshake. May be called multiple times if incomplete. 262 */ 263 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 264 265 /* 266 * Handle incoming queries on a TLS over TCP connection. The TLS 267 * connections are configured to be non-blocking and the handler may 268 * be called multiple times before a complete query is received. 269 */ 270 static void handle_tls_reading(int fd, short event, void* arg); 271 272 /* 273 * Handle outgoing responses on a TLS over TCP connection. The TLS 274 * connections are configured to be non-blocking and the handler may 275 * be called multiple times before a complete response is sent. 276 */ 277 static void handle_tls_writing(int fd, short event, void* arg); 278 #endif 279 280 /* 281 * Send all children the quit nonblocking, then close pipe. 282 */ 283 static void send_children_quit(struct nsd* nsd); 284 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 285 static void send_children_quit_and_wait(struct nsd* nsd); 286 287 /* set childrens flags to send NSD_STATS to them */ 288 #ifdef BIND8_STATS 289 static void set_children_stats(struct nsd* nsd); 290 #endif /* BIND8_STATS */ 291 292 /* 293 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 294 */ 295 static void configure_handler_event_types(short event_types); 296 297 static uint16_t *compressed_dname_offsets = 0; 298 static uint32_t compression_table_capacity = 0; 299 static uint32_t compression_table_size = 0; 300 static domain_type* compressed_dnames[MAXRRSPP]; 301 302 #ifdef USE_TCP_FASTOPEN 303 /* Checks to see if the kernel value must be manually changed in order for 304 TCP Fast Open to support server mode */ 305 static void report_tcp_fastopen_config() { 306 307 int tcp_fastopen_fp; 308 uint8_t tcp_fastopen_value; 309 310 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 311 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 312 } 313 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 314 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 315 close(tcp_fastopen_fp); 316 } 317 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 318 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 319 log_msg(LOG_WARNING, "However the kernel paramenters are not configured to support TCP_FASTOPEN in server mode.\n"); 320 log_msg(LOG_WARNING, "To enable TFO use the command:"); 321 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 322 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 323 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 324 close(tcp_fastopen_fp); 325 } 326 close(tcp_fastopen_fp); 327 } 328 #endif 329 330 /* 331 * Remove the specified pid from the list of child pids. Returns -1 if 332 * the pid is not in the list, child_num otherwise. The field is set to 0. 333 */ 334 static int 335 delete_child_pid(struct nsd *nsd, pid_t pid) 336 { 337 size_t i; 338 for (i = 0; i < nsd->child_count; ++i) { 339 if (nsd->children[i].pid == pid) { 340 nsd->children[i].pid = 0; 341 if(!nsd->children[i].need_to_exit) { 342 if(nsd->children[i].child_fd != -1) 343 close(nsd->children[i].child_fd); 344 nsd->children[i].child_fd = -1; 345 if(nsd->children[i].handler) 346 nsd->children[i].handler->fd = -1; 347 } 348 return i; 349 } 350 } 351 return -1; 352 } 353 354 /* 355 * Restart child servers if necessary. 356 */ 357 static int 358 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 359 int* xfrd_sock_p) 360 { 361 struct main_ipc_handler_data *ipc_data; 362 size_t i; 363 int sv[2]; 364 365 /* Fork the child processes... */ 366 for (i = 0; i < nsd->child_count; ++i) { 367 if (nsd->children[i].pid <= 0) { 368 if (nsd->children[i].child_fd != -1) 369 close(nsd->children[i].child_fd); 370 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 371 log_msg(LOG_ERR, "socketpair: %s", 372 strerror(errno)); 373 return -1; 374 } 375 nsd->children[i].child_fd = sv[0]; 376 nsd->children[i].parent_fd = sv[1]; 377 nsd->children[i].pid = fork(); 378 switch (nsd->children[i].pid) { 379 default: /* SERVER MAIN */ 380 close(nsd->children[i].parent_fd); 381 nsd->children[i].parent_fd = -1; 382 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 383 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 384 } 385 if(!nsd->children[i].handler) 386 { 387 ipc_data = (struct main_ipc_handler_data*) region_alloc( 388 region, sizeof(struct main_ipc_handler_data)); 389 ipc_data->nsd = nsd; 390 ipc_data->child = &nsd->children[i]; 391 ipc_data->child_num = i; 392 ipc_data->xfrd_sock = xfrd_sock_p; 393 ipc_data->packet = buffer_create(region, QIOBUFSZ); 394 ipc_data->forward_mode = 0; 395 ipc_data->got_bytes = 0; 396 ipc_data->total_bytes = 0; 397 ipc_data->acl_num = 0; 398 nsd->children[i].handler = (struct netio_handler*) region_alloc( 399 region, sizeof(struct netio_handler)); 400 nsd->children[i].handler->fd = nsd->children[i].child_fd; 401 nsd->children[i].handler->timeout = NULL; 402 nsd->children[i].handler->user_data = ipc_data; 403 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 404 nsd->children[i].handler->event_handler = parent_handle_child_command; 405 netio_add_handler(netio, nsd->children[i].handler); 406 } 407 /* clear any ongoing ipc */ 408 ipc_data = (struct main_ipc_handler_data*) 409 nsd->children[i].handler->user_data; 410 ipc_data->forward_mode = 0; 411 /* restart - update fd */ 412 nsd->children[i].handler->fd = nsd->children[i].child_fd; 413 break; 414 case 0: /* CHILD */ 415 /* the child need not be able to access the 416 * nsd.db file */ 417 namedb_close_udb(nsd->db); 418 #ifdef MEMCLEAN /* OS collects memory pages */ 419 region_destroy(region); 420 #endif 421 422 if (pledge("stdio rpath inet", NULL) == -1) { 423 log_msg(LOG_ERR, "pledge"); 424 exit(1); 425 } 426 427 nsd->pid = 0; 428 nsd->child_count = 0; 429 nsd->server_kind = nsd->children[i].kind; 430 nsd->this_child = &nsd->children[i]; 431 nsd->this_child->child_num = i; 432 /* remove signal flags inherited from parent 433 the parent will handle them. */ 434 nsd->signal_hint_reload_hup = 0; 435 nsd->signal_hint_reload = 0; 436 nsd->signal_hint_child = 0; 437 nsd->signal_hint_quit = 0; 438 nsd->signal_hint_shutdown = 0; 439 nsd->signal_hint_stats = 0; 440 nsd->signal_hint_statsusr = 0; 441 close(*xfrd_sock_p); 442 close(nsd->this_child->child_fd); 443 nsd->this_child->child_fd = -1; 444 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 445 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 446 } 447 server_child(nsd); 448 /* NOTREACH */ 449 exit(0); 450 case -1: 451 log_msg(LOG_ERR, "fork failed: %s", 452 strerror(errno)); 453 return -1; 454 } 455 } 456 } 457 return 0; 458 } 459 460 #ifdef BIND8_STATS 461 static void set_bind8_alarm(struct nsd* nsd) 462 { 463 /* resync so that the next alarm is on the next whole minute */ 464 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 465 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 466 } 467 #endif 468 469 /* set zone stat ids for zones initially read in */ 470 static void 471 zonestatid_tree_set(struct nsd* nsd) 472 { 473 struct radnode* n; 474 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 475 zone_type* zone = (zone_type*)n->elem; 476 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 477 } 478 } 479 480 #ifdef USE_ZONE_STATS 481 void 482 server_zonestat_alloc(struct nsd* nsd) 483 { 484 size_t num = (nsd->options->zonestatnames->count==0?1: 485 nsd->options->zonestatnames->count); 486 size_t sz = sizeof(struct nsdst)*num; 487 char tmpfile[256]; 488 uint8_t z = 0; 489 490 /* file names */ 491 nsd->zonestatfname[0] = 0; 492 nsd->zonestatfname[1] = 0; 493 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 494 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 495 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 496 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 497 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 498 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 499 500 /* file descriptors */ 501 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 502 if(nsd->zonestatfd[0] == -1) { 503 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 504 strerror(errno)); 505 exit(1); 506 } 507 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 508 if(nsd->zonestatfd[0] == -1) { 509 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 510 strerror(errno)); 511 close(nsd->zonestatfd[0]); 512 unlink(nsd->zonestatfname[0]); 513 exit(1); 514 } 515 516 #ifdef HAVE_MMAP 517 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 518 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 519 strerror(errno)); 520 exit(1); 521 } 522 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 523 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 524 nsd->zonestatfname[0], strerror(errno)); 525 exit(1); 526 } 527 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 528 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 529 strerror(errno)); 530 exit(1); 531 } 532 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 533 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 534 nsd->zonestatfname[1], strerror(errno)); 535 exit(1); 536 } 537 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 538 MAP_SHARED, nsd->zonestatfd[0], 0); 539 if(nsd->zonestat[0] == MAP_FAILED) { 540 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 541 unlink(nsd->zonestatfname[0]); 542 unlink(nsd->zonestatfname[1]); 543 exit(1); 544 } 545 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 546 MAP_SHARED, nsd->zonestatfd[1], 0); 547 if(nsd->zonestat[1] == MAP_FAILED) { 548 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 549 unlink(nsd->zonestatfname[0]); 550 unlink(nsd->zonestatfname[1]); 551 exit(1); 552 } 553 memset(nsd->zonestat[0], 0, sz); 554 memset(nsd->zonestat[1], 0, sz); 555 nsd->zonestatsize[0] = num; 556 nsd->zonestatsize[1] = num; 557 nsd->zonestatdesired = num; 558 nsd->zonestatsizenow = num; 559 nsd->zonestatnow = nsd->zonestat[0]; 560 #endif /* HAVE_MMAP */ 561 } 562 563 void 564 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 565 { 566 #ifdef HAVE_MMAP 567 #ifdef MREMAP_MAYMOVE 568 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 569 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 570 MREMAP_MAYMOVE); 571 if(nsd->zonestat[idx] == MAP_FAILED) { 572 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 573 exit(1); 574 } 575 #else /* !HAVE MREMAP */ 576 if(msync(nsd->zonestat[idx], 577 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 578 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 579 if(munmap(nsd->zonestat[idx], 580 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 581 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 582 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 583 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 584 if(nsd->zonestat[idx] == MAP_FAILED) { 585 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 586 exit(1); 587 } 588 #endif /* MREMAP */ 589 #endif /* HAVE_MMAP */ 590 } 591 592 /* realloc the zonestat array for the one that is not currently in use, 593 * to match the desired new size of the array (if applicable) */ 594 void 595 server_zonestat_realloc(struct nsd* nsd) 596 { 597 #ifdef HAVE_MMAP 598 uint8_t z = 0; 599 size_t sz; 600 int idx = 0; /* index of the zonestat array that is not in use */ 601 if(nsd->zonestatnow == nsd->zonestat[0]) 602 idx = 1; 603 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 604 return; 605 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 606 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 607 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 608 strerror(errno)); 609 exit(1); 610 } 611 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 612 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 613 nsd->zonestatfname[idx], strerror(errno)); 614 exit(1); 615 } 616 zonestat_remap(nsd, idx, sz); 617 /* zero the newly allocated region */ 618 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 619 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 620 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 621 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 622 } 623 nsd->zonestatsize[idx] = nsd->zonestatdesired; 624 #endif /* HAVE_MMAP */ 625 } 626 627 /* switchover to use the other array for the new children, that 628 * briefly coexist with the old children. And we want to avoid them 629 * both writing to the same statistics arrays. */ 630 void 631 server_zonestat_switch(struct nsd* nsd) 632 { 633 if(nsd->zonestatnow == nsd->zonestat[0]) { 634 nsd->zonestatnow = nsd->zonestat[1]; 635 nsd->zonestatsizenow = nsd->zonestatsize[1]; 636 } else { 637 nsd->zonestatnow = nsd->zonestat[0]; 638 nsd->zonestatsizenow = nsd->zonestatsize[0]; 639 } 640 } 641 #endif /* USE_ZONE_STATS */ 642 643 static void 644 cleanup_dname_compression_tables(void *ptr) 645 { 646 free(ptr); 647 compressed_dname_offsets = NULL; 648 compression_table_capacity = 0; 649 } 650 651 static void 652 initialize_dname_compression_tables(struct nsd *nsd) 653 { 654 size_t needed = domain_table_count(nsd->db->domains) + 1; 655 needed += EXTRA_DOMAIN_NUMBERS; 656 if(compression_table_capacity < needed) { 657 if(compressed_dname_offsets) { 658 region_remove_cleanup(nsd->db->region, 659 cleanup_dname_compression_tables, 660 compressed_dname_offsets); 661 free(compressed_dname_offsets); 662 } 663 compressed_dname_offsets = (uint16_t *) xmallocarray( 664 needed, sizeof(uint16_t)); 665 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 666 compressed_dname_offsets); 667 compression_table_capacity = needed; 668 compression_table_size=domain_table_count(nsd->db->domains)+1; 669 } 670 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 671 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 672 } 673 674 static int 675 set_reuseport(struct nsd_socket *sock) 676 { 677 #ifdef SO_REUSEPORT 678 int on = 1; 679 #ifdef SO_REUSEPORT_LB 680 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 681 * SO_REUSEPORT on Linux. This is what the users want with the config 682 * option in nsd.conf; if we actually need local address and port reuse 683 * they'll also need to have SO_REUSEPORT set for them, assume it was 684 * _LB they want. 685 */ 686 int opt = SO_REUSEPORT_LB; 687 static const char optname[] = "SO_REUSEPORT_LB"; 688 #else /* !SO_REUSEPORT_LB */ 689 int opt = SO_REUSEPORT; 690 static const char optname[] = "SO_REUSEPORT"; 691 #endif /* SO_REUSEPORT_LB */ 692 693 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 694 return 1; 695 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 696 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 697 optname, strerror(errno)); 698 } 699 return -1; 700 #else 701 (void)sock; 702 #endif /* SO_REUSEPORT */ 703 704 return 0; 705 } 706 707 static int 708 set_reuseaddr(struct nsd_socket *sock) 709 { 710 #ifdef SO_REUSEADDR 711 int on = 1; 712 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 713 return 1; 714 } 715 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 716 strerror(errno)); 717 return -1; 718 #endif /* SO_REUSEADDR */ 719 return 0; 720 } 721 722 static int 723 set_rcvbuf(struct nsd_socket *sock, int rcv) 724 { 725 #ifdef SO_RCVBUF 726 #ifdef SO_RCVBUFFORCE 727 if(0 == setsockopt( 728 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 729 { 730 return 1; 731 } 732 if(errno == EPERM || errno == ENOBUFS) { 733 return 0; 734 } 735 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 736 strerror(errno)); 737 return -1; 738 #else /* !SO_RCVBUFFORCE */ 739 if (0 == setsockopt( 740 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 741 { 742 return 1; 743 } 744 if(errno == ENOSYS || errno == ENOBUFS) { 745 return 0; 746 } 747 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 748 strerror(errno)); 749 return -1; 750 #endif /* SO_RCVBUFFORCE */ 751 #endif /* SO_RCVBUF */ 752 753 return 0; 754 } 755 756 static int 757 set_sndbuf(struct nsd_socket *sock, int snd) 758 { 759 #ifdef SO_SNDBUF 760 #ifdef SO_SNDBUFFORCE 761 if(0 == setsockopt( 762 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 763 { 764 return 1; 765 } 766 if(errno == EPERM || errno == ENOBUFS) { 767 return 0; 768 } 769 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 770 strerror(errno)); 771 return -1; 772 #else /* !SO_SNDBUFFORCE */ 773 if(0 == setsockopt( 774 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 775 { 776 return 1; 777 } 778 if(errno == ENOSYS || errno == ENOBUFS) { 779 return 0; 780 } 781 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 782 strerror(errno)); 783 return -1; 784 #endif /* SO_SNDBUFFORCE */ 785 #endif /* SO_SNDBUF */ 786 787 return 0; 788 } 789 790 static int 791 set_nonblock(struct nsd_socket *sock) 792 { 793 const char *socktype = 794 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 795 796 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 797 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 798 socktype, strerror(errno)); 799 return -1; 800 } 801 802 return 1; 803 } 804 805 static int 806 set_ipv6_v6only(struct nsd_socket *sock) 807 { 808 #ifdef INET6 809 #ifdef IPV6_V6ONLY 810 int on = 1; 811 const char *socktype = 812 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 813 814 if(0 == setsockopt( 815 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 816 { 817 return 1; 818 } 819 820 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 821 socktype, strerror(errno)); 822 return -1; 823 #endif /* IPV6_V6ONLY */ 824 #endif /* INET6 */ 825 826 return 0; 827 } 828 829 static int 830 set_ipv6_use_min_mtu(struct nsd_socket *sock) 831 { 832 #if defined(INET6) && (defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)) 833 #if defined(IPV6_USE_MIN_MTU) 834 /* There is no fragmentation of IPv6 datagrams during forwarding in the 835 * network. Therefore we do not send UDP datagrams larger than the 836 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 837 * larger if the network stack supports IPV6_USE_MIN_MTU. 838 */ 839 int opt = IPV6_USE_MIN_MTU; 840 int optval = 1; 841 static const char optname[] = "IPV6_USE_MIN_MTU"; 842 #elif defined(IPV6_MTU) 843 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 844 * to the MIN MTU to get the same. 845 */ 846 int opt = IPV6_MTU; 847 int optval = IPV6_MIN_MTU; 848 static const char optname[] = "IPV6_MTU"; 849 #endif 850 if(0 == setsockopt( 851 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 852 { 853 return 1; 854 } 855 856 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 857 optname, strerror(errno)); 858 return -1; 859 #else 860 (void)sock; 861 #endif /* INET6 */ 862 863 return 0; 864 } 865 866 static int 867 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 868 { 869 int ret = 0; 870 871 #if defined(IP_MTU_DISCOVER) 872 int opt = IP_MTU_DISCOVER; 873 int optval; 874 # if defined(IP_PMTUDISC_OMIT) 875 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 876 * information and send packets with DF=0. Fragmentation is allowed if 877 * and only if the packet size exceeds the outgoing interface MTU or 878 * the packet encounters smaller MTU link in network. This mitigates 879 * DNS fragmentation attacks by preventing forged PMTU information. 880 * FreeBSD already has same semantics without setting the option. 881 */ 882 optval = IP_PMTUDISC_OMIT; 883 if(0 == setsockopt( 884 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 885 { 886 return 1; 887 } 888 889 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 890 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 891 # endif /* IP_PMTUDISC_OMIT */ 892 # if defined(IP_PMTUDISC_DONT) 893 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 894 optval = IP_PMTUDISC_DONT; 895 if(0 == setsockopt( 896 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 897 { 898 return 1; 899 } 900 901 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 902 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 903 # endif 904 ret = -1; 905 #elif defined(IP_DONTFRAG) 906 int off = 0; 907 if (0 == setsockopt( 908 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 909 { 910 return 1; 911 } 912 913 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 914 strerror(errno)); 915 ret = -1; 916 #else 917 (void)sock; 918 #endif 919 920 return ret; 921 } 922 923 static int 924 set_ip_freebind(struct nsd_socket *sock) 925 { 926 #ifdef IP_FREEBIND 927 int on = 1; 928 const char *socktype = 929 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 930 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 931 { 932 return 1; 933 } 934 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 935 socktype, strerror(errno)); 936 return -1; 937 #else 938 (void)sock; 939 #endif /* IP_FREEBIND */ 940 941 return 0; 942 } 943 944 static int 945 set_ip_transparent(struct nsd_socket *sock) 946 { 947 #if defined(IP_TRANSPARENT) 948 int on = 1; 949 const char *socktype = 950 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 951 if(0 == setsockopt( 952 sock->s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on))) 953 { 954 return 1; 955 } 956 957 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 958 "IP_TRANSPARENT", socktype, strerror(errno)); 959 return -1; 960 #elif defined(SO_BINDANY) 961 int on = 1; 962 const char *socktype = 963 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 964 if(0 == setsockopt( 965 sock->s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on))) 966 { 967 return 1; 968 } 969 970 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 971 "SO_BINDANY", socktype, strerror(errno)); 972 return -1; 973 #else 974 (void)sock; 975 #endif 976 977 return 0; 978 } 979 980 static int 981 set_tcp_maxseg(struct nsd_socket *sock, int mss) 982 { 983 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 984 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 985 return 1; 986 } 987 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 988 strerror(errno)); 989 return -1; 990 #else 991 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 992 #endif 993 return 0; 994 } 995 996 #ifdef USE_TCP_FASTOPEN 997 static int 998 set_tcp_fastopen(struct nsd_socket *sock) 999 { 1000 /* qlen specifies how many outstanding TFO requests to allow. Limit is 1001 * a defense against IP spoofing attacks as suggested in RFC7413. 1002 */ 1003 int qlen; 1004 1005 #ifdef __APPLE__ 1006 /* macOS X implementation only supports qlen of 1 via this call. The 1007 * actual value is configured by the net.inet.tcp.fastopen_backlog 1008 * kernel parameter. 1009 */ 1010 qlen = 1; 1011 #else 1012 /* 5 is recommended on Linux. */ 1013 qlen = 5; 1014 #endif 1015 if (0 == setsockopt( 1016 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1017 { 1018 return 1; 1019 } 1020 1021 if (errno == EPERM) { 1022 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1023 "; this could likely be because sysctl " 1024 "net.inet.tcp.fastopen.enabled, " 1025 "net.inet.tcp.fastopen.server_enable, or " 1026 "net.ipv4.tcp_fastopen is disabled", 1027 strerror(errno)); 1028 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1029 * disabled, except when verbosity enabled for debugging 1030 */ 1031 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1032 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1033 strerror(errno)); 1034 } 1035 1036 return (errno == ENOPROTOOPT ? 0 : -1); 1037 } 1038 #endif /* USE_TCP_FASTOPEN */ 1039 1040 static int 1041 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1042 { 1043 int rcv = 1*1024*1024, snd = 1*1024*1024; 1044 1045 if(-1 == (sock->s = socket( 1046 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1047 { 1048 #ifdef INET6 1049 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1050 (sock->addr.ai_family == AF_INET6) && 1051 (errno == EAFNOSUPPORT)) 1052 { 1053 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1054 "not supported"); 1055 return 0; 1056 } 1057 #endif 1058 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1059 return -1; 1060 } 1061 1062 if(nsd->reuseport && reuseport_works && *reuseport_works) 1063 *reuseport_works = (set_reuseport(sock) == 1); 1064 1065 if(nsd->options->receive_buffer_size > 0) 1066 rcv = nsd->options->receive_buffer_size; 1067 if(set_rcvbuf(sock, rcv) == -1) 1068 return -1; 1069 1070 if(nsd->options->send_buffer_size > 0) 1071 snd = nsd->options->send_buffer_size; 1072 if(set_sndbuf(sock, snd) == -1) 1073 return -1; 1074 #ifdef INET6 1075 if(sock->addr.ai_family == AF_INET6) { 1076 if(set_ipv6_v6only(sock) == -1 || 1077 set_ipv6_use_min_mtu(sock) == -1) 1078 return -1; 1079 } else 1080 #endif /* INET6 */ 1081 if(sock->addr.ai_family == AF_INET) { 1082 if(set_ipv4_no_pmtu_disc(sock) == -1) 1083 return -1; 1084 } 1085 1086 /* Set socket to non-blocking. Otherwise, on operating systems 1087 * with thundering herd problems, the UDP recv could block 1088 * after select returns readable. 1089 */ 1090 set_nonblock(sock); 1091 1092 if(nsd->options->ip_freebind) 1093 (void)set_ip_freebind(sock); 1094 if(nsd->options->ip_transparent) 1095 (void)set_ip_transparent(sock); 1096 1097 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1098 char buf[256]; 1099 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1100 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1101 buf, strerror(errno)); 1102 return -1; 1103 } 1104 1105 return 1; 1106 } 1107 1108 static int 1109 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1110 { 1111 #ifdef USE_TCP_FASTOPEN 1112 report_tcp_fastopen_config(); 1113 #endif 1114 1115 (void)reuseport_works; 1116 1117 if(-1 == (sock->s = socket( 1118 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1119 { 1120 #ifdef INET6 1121 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1122 (sock->addr.ai_family == AF_INET6) && 1123 (errno == EAFNOSUPPORT)) 1124 { 1125 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1126 "not supported"); 1127 return 0; 1128 } 1129 #endif /* INET6 */ 1130 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1131 return -1; 1132 } 1133 1134 if(nsd->reuseport && reuseport_works && *reuseport_works) 1135 *reuseport_works = (set_reuseport(sock) == 1); 1136 1137 (void)set_reuseaddr(sock); 1138 1139 #ifdef INET6 1140 if(sock->addr.ai_family == AF_INET6) { 1141 if (set_ipv6_v6only(sock) == -1 || 1142 set_ipv6_use_min_mtu(sock) == -1) 1143 return -1; 1144 } 1145 #endif 1146 1147 if(nsd->tcp_mss > 0) 1148 set_tcp_maxseg(sock, nsd->tcp_mss); 1149 /* (StevensUNP p463), if TCP listening socket is blocking, then 1150 it may block in accept, even if select() says readable. */ 1151 (void)set_nonblock(sock); 1152 if(nsd->options->ip_freebind) 1153 (void)set_ip_freebind(sock); 1154 if(nsd->options->ip_transparent) 1155 (void)set_ip_transparent(sock); 1156 1157 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1158 char buf[256]; 1159 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1160 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1161 buf, strerror(errno)); 1162 return -1; 1163 } 1164 1165 #ifdef USE_TCP_FASTOPEN 1166 (void)set_tcp_fastopen(sock); 1167 #endif 1168 1169 if(listen(sock->s, TCP_BACKLOG) == -1) { 1170 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1171 return -1; 1172 } 1173 1174 return 1; 1175 } 1176 1177 /* 1178 * Initialize the server, reuseport, create and bind the sockets. 1179 */ 1180 int 1181 server_init(struct nsd *nsd) 1182 { 1183 size_t i; 1184 int reuseport = 1; /* Determine if REUSEPORT works. */ 1185 1186 /* open server interface ports */ 1187 for(i = 0; i < nsd->ifs; i++) { 1188 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1189 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1190 { 1191 return -1; 1192 } 1193 } 1194 1195 if(nsd->reuseport && reuseport) { 1196 size_t ifs = nsd->ifs * nsd->reuseport; 1197 1198 /* increase the size of the interface arrays, there are going 1199 * to be separate interface file descriptors for every server 1200 * instance */ 1201 region_remove_cleanup(nsd->region, free, nsd->udp); 1202 region_remove_cleanup(nsd->region, free, nsd->tcp); 1203 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1204 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1205 region_add_cleanup(nsd->region, free, nsd->udp); 1206 region_add_cleanup(nsd->region, free, nsd->tcp); 1207 1208 for(i = nsd->ifs; i < ifs; i++) { 1209 nsd->udp[i].addr = nsd->udp[i%nsd->ifs].addr; 1210 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1211 return -1; 1212 } 1213 /* Turn off REUSEPORT for TCP by copying the socket 1214 * file descriptor. 1215 */ 1216 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1217 } 1218 1219 nsd->ifs = ifs; 1220 } else { 1221 nsd->reuseport = 0; 1222 } 1223 1224 return 0; 1225 } 1226 1227 /* 1228 * Prepare the server for take off. 1229 * 1230 */ 1231 int 1232 server_prepare(struct nsd *nsd) 1233 { 1234 #ifdef RATELIMIT 1235 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1236 #ifdef HAVE_ARC4RANDOM 1237 hash_set_raninit(arc4random()); 1238 #else 1239 uint32_t v = getpid() ^ time(NULL); 1240 srandom((unsigned long)v); 1241 # ifdef HAVE_SSL 1242 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1243 hash_set_raninit(v); 1244 else 1245 # endif 1246 hash_set_raninit(random()); 1247 #endif 1248 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1249 nsd->options->rrl_ratelimit, 1250 nsd->options->rrl_whitelist_ratelimit, 1251 nsd->options->rrl_slip, 1252 nsd->options->rrl_ipv4_prefix_length, 1253 nsd->options->rrl_ipv6_prefix_length); 1254 #endif /* RATELIMIT */ 1255 1256 /* Open the database... */ 1257 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1258 log_msg(LOG_ERR, "unable to open the database %s: %s", 1259 nsd->dbfile, strerror(errno)); 1260 unlink(nsd->task[0]->fname); 1261 unlink(nsd->task[1]->fname); 1262 #ifdef USE_ZONE_STATS 1263 unlink(nsd->zonestatfname[0]); 1264 unlink(nsd->zonestatfname[1]); 1265 #endif 1266 xfrd_del_tempdir(nsd); 1267 return -1; 1268 } 1269 /* check if zone files have been modified */ 1270 /* NULL for taskudb because we send soainfo in a moment, batched up, 1271 * for all zones */ 1272 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1273 nsd->options->database[0] == 0)) 1274 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1275 zonestatid_tree_set(nsd); 1276 1277 compression_table_capacity = 0; 1278 initialize_dname_compression_tables(nsd); 1279 1280 #ifdef BIND8_STATS 1281 /* Initialize times... */ 1282 time(&nsd->st.boot); 1283 set_bind8_alarm(nsd); 1284 #endif /* BIND8_STATS */ 1285 1286 return 0; 1287 } 1288 1289 /* 1290 * Fork the required number of servers. 1291 */ 1292 static int 1293 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1294 int* xfrd_sock_p) 1295 { 1296 size_t i; 1297 1298 /* Start all child servers initially. */ 1299 for (i = 0; i < nsd->child_count; ++i) { 1300 nsd->children[i].pid = 0; 1301 } 1302 1303 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1304 } 1305 1306 void 1307 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1308 { 1309 size_t i; 1310 1311 /* Close all the sockets... */ 1312 for (i = 0; i < n; ++i) { 1313 if (sockets[i].s != -1) { 1314 close(sockets[i].s); 1315 sockets[i].s = -1; 1316 } 1317 } 1318 } 1319 1320 /* 1321 * Close the sockets, shutdown the server and exit. 1322 * Does not return. 1323 */ 1324 void 1325 server_shutdown(struct nsd *nsd) 1326 { 1327 size_t i; 1328 1329 server_close_all_sockets(nsd->udp, nsd->ifs); 1330 server_close_all_sockets(nsd->tcp, nsd->ifs); 1331 /* CHILD: close command channel to parent */ 1332 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1333 { 1334 close(nsd->this_child->parent_fd); 1335 nsd->this_child->parent_fd = -1; 1336 } 1337 /* SERVER: close command channels to children */ 1338 if(!nsd->this_child) 1339 { 1340 for(i=0; i < nsd->child_count; ++i) 1341 if(nsd->children[i].child_fd != -1) 1342 { 1343 close(nsd->children[i].child_fd); 1344 nsd->children[i].child_fd = -1; 1345 } 1346 } 1347 1348 tsig_finalize(); 1349 #ifdef HAVE_SSL 1350 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1351 if (nsd->tls_ctx) 1352 SSL_CTX_free(nsd->tls_ctx); 1353 #endif 1354 1355 #ifdef MEMCLEAN /* OS collects memory pages */ 1356 #ifdef RATELIMIT 1357 rrl_mmap_deinit_keep_mmap(); 1358 #endif 1359 #ifdef USE_DNSTAP 1360 dt_collector_destroy(nsd->dt_collector, nsd); 1361 #endif 1362 udb_base_free_keep_mmap(nsd->task[0]); 1363 udb_base_free_keep_mmap(nsd->task[1]); 1364 namedb_close_udb(nsd->db); /* keeps mmap */ 1365 namedb_close(nsd->db); 1366 nsd_options_destroy(nsd->options); 1367 region_destroy(nsd->region); 1368 #endif 1369 log_finalize(); 1370 exit(0); 1371 } 1372 1373 void 1374 server_prepare_xfrd(struct nsd* nsd) 1375 { 1376 char tmpfile[256]; 1377 /* create task mmaps */ 1378 nsd->mytask = 0; 1379 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1380 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1381 nsd->task[0] = task_file_create(tmpfile); 1382 if(!nsd->task[0]) { 1383 #ifdef USE_ZONE_STATS 1384 unlink(nsd->zonestatfname[0]); 1385 unlink(nsd->zonestatfname[1]); 1386 #endif 1387 xfrd_del_tempdir(nsd); 1388 exit(1); 1389 } 1390 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1391 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1392 nsd->task[1] = task_file_create(tmpfile); 1393 if(!nsd->task[1]) { 1394 unlink(nsd->task[0]->fname); 1395 #ifdef USE_ZONE_STATS 1396 unlink(nsd->zonestatfname[0]); 1397 unlink(nsd->zonestatfname[1]); 1398 #endif 1399 xfrd_del_tempdir(nsd); 1400 exit(1); 1401 } 1402 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1403 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1404 /* create xfrd listener structure */ 1405 nsd->xfrd_listener = region_alloc(nsd->region, 1406 sizeof(netio_handler_type)); 1407 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1408 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1409 nsd->xfrd_listener->fd = -1; 1410 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1411 nsd; 1412 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1413 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1414 } 1415 1416 1417 void 1418 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1419 { 1420 pid_t pid; 1421 int sockets[2] = {0,0}; 1422 struct ipc_handler_conn_data *data; 1423 1424 if(nsd->xfrd_listener->fd != -1) 1425 close(nsd->xfrd_listener->fd); 1426 if(del_db) { 1427 /* recreate taskdb that xfrd was using, it may be corrupt */ 1428 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1429 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1430 nsd->task[1-nsd->mytask]->fname = NULL; 1431 /* free alloc already, so udb does not shrink itself */ 1432 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1433 nsd->task[1-nsd->mytask]->alloc = NULL; 1434 udb_base_free(nsd->task[1-nsd->mytask]); 1435 /* create new file, overwrite the old one */ 1436 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1437 free(tmpfile); 1438 } 1439 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1440 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1441 return; 1442 } 1443 pid = fork(); 1444 switch (pid) { 1445 case -1: 1446 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1447 break; 1448 default: 1449 /* PARENT: close first socket, use second one */ 1450 close(sockets[0]); 1451 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1452 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1453 } 1454 if(del_db) xfrd_free_namedb(nsd); 1455 /* use other task than I am using, since if xfrd died and is 1456 * restarted, the reload is using nsd->mytask */ 1457 nsd->mytask = 1 - nsd->mytask; 1458 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1459 /* ENOTREACH */ 1460 break; 1461 case 0: 1462 /* CHILD: close second socket, use first one */ 1463 close(sockets[1]); 1464 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1465 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1466 } 1467 nsd->xfrd_listener->fd = sockets[0]; 1468 break; 1469 } 1470 /* server-parent only */ 1471 nsd->xfrd_listener->timeout = NULL; 1472 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1473 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1474 /* clear ongoing ipc reads */ 1475 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1476 data->conn->is_reading = 0; 1477 } 1478 1479 /** add all soainfo to taskdb */ 1480 static void 1481 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1482 { 1483 struct radnode* n; 1484 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1485 /* add all SOA INFO to mytask */ 1486 udb_ptr_init(&task_last, taskudb); 1487 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1488 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1489 } 1490 udb_ptr_unlink(&task_last, taskudb); 1491 } 1492 1493 void 1494 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1495 { 1496 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1497 * parent fills one taskdb with soas, xfrd fills other with expires. 1498 * then they exchange and process. 1499 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1500 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1501 * expire notifications can be sent back via a normal reload later 1502 * (xfrd will wait for current running reload to finish if any). 1503 */ 1504 sig_atomic_t cmd = 0; 1505 pid_t mypid; 1506 int xfrd_sock = nsd->xfrd_listener->fd; 1507 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1508 udb_ptr t; 1509 if(!shortsoa) { 1510 if(nsd->signal_hint_shutdown) { 1511 shutdown: 1512 log_msg(LOG_WARNING, "signal received, shutting down..."); 1513 server_close_all_sockets(nsd->udp, nsd->ifs); 1514 server_close_all_sockets(nsd->tcp, nsd->ifs); 1515 #ifdef HAVE_SSL 1516 daemon_remote_close(nsd->rc); 1517 #endif 1518 /* Unlink it if possible... */ 1519 unlinkpid(nsd->pidfile); 1520 unlink(nsd->task[0]->fname); 1521 unlink(nsd->task[1]->fname); 1522 #ifdef USE_ZONE_STATS 1523 unlink(nsd->zonestatfname[0]); 1524 unlink(nsd->zonestatfname[1]); 1525 #endif 1526 /* write the nsd.db to disk, wait for it to complete */ 1527 udb_base_sync(nsd->db->udb, 1); 1528 udb_base_close(nsd->db->udb); 1529 server_shutdown(nsd); 1530 exit(0); 1531 } 1532 } 1533 if(shortsoa) { 1534 /* put SOA in xfrd task because mytask may be in use */ 1535 taskudb = nsd->task[1-nsd->mytask]; 1536 } 1537 1538 add_all_soa_to_task(nsd, taskudb); 1539 if(!shortsoa) { 1540 /* wait for xfrd to signal task is ready, RELOAD signal */ 1541 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1542 cmd != NSD_RELOAD) { 1543 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1544 exit(1); 1545 } 1546 if(nsd->signal_hint_shutdown) { 1547 goto shutdown; 1548 } 1549 } 1550 /* give xfrd our task, signal it with RELOAD_DONE */ 1551 task_process_sync(taskudb); 1552 cmd = NSD_RELOAD_DONE; 1553 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1554 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1555 (int)nsd->pid, strerror(errno)); 1556 } 1557 mypid = getpid(); 1558 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1559 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1560 strerror(errno)); 1561 } 1562 1563 if(!shortsoa) { 1564 /* process the xfrd task works (expiry data) */ 1565 nsd->mytask = 1 - nsd->mytask; 1566 taskudb = nsd->task[nsd->mytask]; 1567 task_remap(taskudb); 1568 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1569 while(!udb_ptr_is_null(&t)) { 1570 task_process_expire(nsd->db, TASKLIST(&t)); 1571 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1572 } 1573 udb_ptr_unlink(&t, taskudb); 1574 task_clear(taskudb); 1575 1576 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1577 cmd = NSD_RELOAD_DONE; 1578 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1579 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1580 (int)nsd->pid, strerror(errno)); 1581 } 1582 } 1583 } 1584 1585 #ifdef HAVE_SSL 1586 static void 1587 log_crypto_from_err(const char* str, unsigned long err) 1588 { 1589 /* error:[error code]:[library name]:[function name]:[reason string] */ 1590 char buf[128]; 1591 unsigned long e; 1592 ERR_error_string_n(err, buf, sizeof(buf)); 1593 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1594 while( (e=ERR_get_error()) ) { 1595 ERR_error_string_n(e, buf, sizeof(buf)); 1596 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1597 } 1598 } 1599 1600 void 1601 log_crypto_err(const char* str) 1602 { 1603 log_crypto_from_err(str, ERR_get_error()); 1604 } 1605 1606 /** true if the ssl handshake error has to be squelched from the logs */ 1607 static int 1608 squelch_err_ssl_handshake(unsigned long err) 1609 { 1610 if(verbosity >= 3) 1611 return 0; /* only squelch on low verbosity */ 1612 /* this is very specific, we could filter on ERR_GET_REASON() 1613 * (the third element in ERR_PACK) */ 1614 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1615 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1616 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1617 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1618 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1619 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1620 #endif 1621 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1622 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1623 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1624 # ifdef SSL_R_VERSION_TOO_LOW 1625 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1626 # endif 1627 #endif 1628 ) 1629 return 1; 1630 return 0; 1631 } 1632 1633 void 1634 perform_openssl_init(void) 1635 { 1636 /* init SSL library */ 1637 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1638 ERR_load_crypto_strings(); 1639 #endif 1640 ERR_load_SSL_strings(); 1641 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1642 OpenSSL_add_all_algorithms(); 1643 #else 1644 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1645 | OPENSSL_INIT_ADD_ALL_DIGESTS 1646 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1647 #endif 1648 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1649 (void)SSL_library_init(); 1650 #else 1651 OPENSSL_init_ssl(0, NULL); 1652 #endif 1653 1654 if(!RAND_status()) { 1655 /* try to seed it */ 1656 unsigned char buf[256]; 1657 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1658 size_t i; 1659 v = seed; 1660 for(i=0; i<256/sizeof(v); i++) { 1661 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1662 v = v*seed + (unsigned int)i; 1663 } 1664 RAND_seed(buf, 256); 1665 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1666 } 1667 } 1668 1669 static int 1670 get_ocsp(char *filename, unsigned char **ocsp) 1671 { 1672 BIO *bio; 1673 OCSP_RESPONSE *response; 1674 int len = -1; 1675 unsigned char *p, *buf; 1676 assert(filename); 1677 1678 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1679 log_crypto_err("get_ocsp: BIO_new_file failed"); 1680 return -1; 1681 } 1682 1683 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1684 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1685 BIO_free(bio); 1686 return -1; 1687 } 1688 1689 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1690 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1691 OCSP_RESPONSE_free(response); 1692 BIO_free(bio); 1693 return -1; 1694 } 1695 1696 if ((buf = malloc((size_t) len)) == NULL) { 1697 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1698 OCSP_RESPONSE_free(response); 1699 BIO_free(bio); 1700 return -1; 1701 } 1702 1703 p = buf; 1704 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1705 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1706 free(buf); 1707 OCSP_RESPONSE_free(response); 1708 BIO_free(bio); 1709 return -1; 1710 } 1711 1712 OCSP_RESPONSE_free(response); 1713 BIO_free(bio); 1714 1715 *ocsp = buf; 1716 return len; 1717 } 1718 1719 /* further setup ssl ctx after the keys are loaded */ 1720 static void 1721 listen_sslctx_setup_2(void* ctxt) 1722 { 1723 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1724 (void)ctx; 1725 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1726 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1727 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1728 } 1729 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1730 if(1) { 1731 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1732 if (!ecdh) { 1733 log_crypto_err("could not find p256, not enabling ECDHE"); 1734 } else { 1735 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1736 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1737 } 1738 EC_KEY_free (ecdh); 1739 } 1740 } 1741 #endif 1742 } 1743 1744 static int 1745 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1746 { 1747 if(ocspdata) { 1748 unsigned char *p; 1749 if ((p=malloc(ocspdata_len)) == NULL) { 1750 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1751 return SSL_TLSEXT_ERR_NOACK; 1752 } 1753 memcpy(p, ocspdata, ocspdata_len); 1754 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1755 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1756 free(p); 1757 return SSL_TLSEXT_ERR_NOACK; 1758 } 1759 return SSL_TLSEXT_ERR_OK; 1760 } else { 1761 return SSL_TLSEXT_ERR_NOACK; 1762 } 1763 } 1764 1765 SSL_CTX* 1766 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1767 { 1768 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1769 if(!ctx) { 1770 log_crypto_err("could not SSL_CTX_new"); 1771 return NULL; 1772 } 1773 /* no SSLv2, SSLv3 because has defects */ 1774 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1775 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1776 SSL_CTX_free(ctx); 1777 return NULL; 1778 } 1779 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1780 != SSL_OP_NO_SSLv3){ 1781 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1782 SSL_CTX_free(ctx); 1783 return 0; 1784 } 1785 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1786 /* if we have tls 1.1 disable 1.0 */ 1787 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1788 != SSL_OP_NO_TLSv1){ 1789 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1790 SSL_CTX_free(ctx); 1791 return 0; 1792 } 1793 #endif 1794 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1795 /* if we have tls 1.2 disable 1.1 */ 1796 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 1797 != SSL_OP_NO_TLSv1_1){ 1798 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 1799 SSL_CTX_free(ctx); 1800 return 0; 1801 } 1802 #endif 1803 #if defined(SSL_OP_NO_RENEGOTIATION) 1804 /* disable client renegotiation */ 1805 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 1806 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 1807 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 1808 SSL_CTX_free(ctx); 1809 return 0; 1810 } 1811 #endif 1812 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 1813 /* if we have sha256, set the cipher list to have no known vulns */ 1814 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 1815 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 1816 #endif 1817 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 1818 SSL_OP_CIPHER_SERVER_PREFERENCE) != 1819 SSL_OP_CIPHER_SERVER_PREFERENCE) { 1820 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 1821 SSL_CTX_free(ctx); 1822 return 0; 1823 } 1824 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 1825 SSL_CTX_set_security_level(ctx, 0); 1826 #endif 1827 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 1828 log_msg(LOG_ERR, "error for cert file: %s", pem); 1829 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 1830 SSL_CTX_free(ctx); 1831 return NULL; 1832 } 1833 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 1834 log_msg(LOG_ERR, "error for private key file: %s", key); 1835 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 1836 SSL_CTX_free(ctx); 1837 return NULL; 1838 } 1839 if(!SSL_CTX_check_private_key(ctx)) { 1840 log_msg(LOG_ERR, "error for key file: %s", key); 1841 log_crypto_err("Error in SSL_CTX check_private_key"); 1842 SSL_CTX_free(ctx); 1843 return NULL; 1844 } 1845 listen_sslctx_setup_2(ctx); 1846 if(verifypem && verifypem[0]) { 1847 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 1848 log_crypto_err("Error in SSL_CTX verify locations"); 1849 SSL_CTX_free(ctx); 1850 return NULL; 1851 } 1852 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 1853 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 1854 } 1855 return ctx; 1856 } 1857 1858 SSL_CTX* 1859 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 1860 { 1861 char *key, *pem; 1862 SSL_CTX *ctx; 1863 1864 key = nsd->options->tls_service_key; 1865 pem = nsd->options->tls_service_pem; 1866 if(!key || key[0] == 0) { 1867 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 1868 return NULL; 1869 } 1870 if(!pem || pem[0] == 0) { 1871 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 1872 return NULL; 1873 } 1874 1875 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 1876 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 1877 ctx = server_tls_ctx_setup(key, pem, verifypem); 1878 if(!ctx) { 1879 log_msg(LOG_ERR, "could not setup server TLS context"); 1880 return NULL; 1881 } 1882 if(ocspfile && ocspfile[0]) { 1883 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 1884 log_crypto_err("Error reading OCSPfile"); 1885 SSL_CTX_free(ctx); 1886 return NULL; 1887 } else { 1888 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 1889 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 1890 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 1891 SSL_CTX_free(ctx); 1892 return NULL; 1893 } 1894 } 1895 } 1896 return ctx; 1897 } 1898 1899 /* check if tcp_handler_accept_data created for TLS dedicated port */ 1900 int 1901 using_tls_port(struct sockaddr* addr, const char* tls_port) 1902 { 1903 in_port_t port = 0; 1904 1905 if (addr->sa_family == AF_INET) 1906 port = ((struct sockaddr_in*)addr)->sin_port; 1907 #ifndef HAVE_STRUCT_SOCKADDR_IN6 1908 else 1909 port = ((struct sockaddr_in6*)addr)->sin6_port; 1910 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 1911 if (atoi(tls_port) == ntohs(port)) 1912 return 1; 1913 1914 return 0; 1915 } 1916 #endif 1917 1918 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 1919 ssize_t 1920 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 1921 { 1922 uint8_t* buf = (uint8_t*) p; 1923 ssize_t total = 0; 1924 struct pollfd fd; 1925 memset(&fd, 0, sizeof(fd)); 1926 fd.fd = s; 1927 fd.events = POLLIN; 1928 1929 while( total < sz) { 1930 ssize_t ret; 1931 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 1932 if(ret == -1) { 1933 if(errno == EAGAIN) 1934 /* blocking read */ 1935 continue; 1936 if(errno == EINTR) { 1937 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1938 return -1; 1939 /* other signals can be handled later */ 1940 continue; 1941 } 1942 /* some error */ 1943 return -1; 1944 } 1945 if(ret == 0) { 1946 /* operation timed out */ 1947 return -2; 1948 } 1949 ret = read(s, buf+total, sz-total); 1950 if(ret == -1) { 1951 if(errno == EAGAIN) 1952 /* blocking read */ 1953 continue; 1954 if(errno == EINTR) { 1955 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1956 return -1; 1957 /* other signals can be handled later */ 1958 continue; 1959 } 1960 /* some error */ 1961 return -1; 1962 } 1963 if(ret == 0) { 1964 /* closed connection! */ 1965 return 0; 1966 } 1967 total += ret; 1968 } 1969 return total; 1970 } 1971 1972 static void 1973 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 1974 { 1975 sig_atomic_t cmd = NSD_QUIT_SYNC; 1976 udb_ptr t, next; 1977 udb_base* u = nsd->task[nsd->mytask]; 1978 udb_ptr_init(&next, u); 1979 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 1980 udb_base_set_userdata(u, 0); 1981 while(!udb_ptr_is_null(&t)) { 1982 /* store next in list so this one can be deleted or reused */ 1983 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 1984 udb_rptr_zero(&TASKLIST(&t)->next, u); 1985 1986 /* process task t */ 1987 /* append results for task t and update last_task */ 1988 task_process_in_reload(nsd, u, last_task, &t); 1989 1990 /* go to next */ 1991 udb_ptr_set_ptr(&t, u, &next); 1992 1993 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1994 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1995 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1996 if(cmd == NSD_QUIT) { 1997 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1998 /* sync to disk (if needed) */ 1999 udb_base_sync(nsd->db->udb, 0); 2000 /* unlink files of remainder of tasks */ 2001 while(!udb_ptr_is_null(&t)) { 2002 if(TASKLIST(&t)->task_type == task_apply_xfr) { 2003 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 2004 } 2005 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2006 } 2007 udb_ptr_unlink(&t, u); 2008 udb_ptr_unlink(&next, u); 2009 exit(0); 2010 } 2011 } 2012 2013 } 2014 udb_ptr_unlink(&t, u); 2015 udb_ptr_unlink(&next, u); 2016 } 2017 2018 #ifdef BIND8_STATS 2019 static void 2020 parent_send_stats(struct nsd* nsd, int cmdfd) 2021 { 2022 size_t i; 2023 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2024 log_msg(LOG_ERR, "could not write stats to reload"); 2025 return; 2026 } 2027 for(i=0; i<nsd->child_count; i++) 2028 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2029 sizeof(stc_type))) { 2030 log_msg(LOG_ERR, "could not write stats to reload"); 2031 return; 2032 } 2033 } 2034 2035 static void 2036 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2037 { 2038 struct nsdst s; 2039 stc_type* p; 2040 size_t i; 2041 if(block_read(nsd, cmdfd, &s, sizeof(s), 2042 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2043 log_msg(LOG_ERR, "could not read stats from oldpar"); 2044 return; 2045 } 2046 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2047 s.db_mem = region_get_mem(nsd->db->region); 2048 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2049 nsd->child_count); 2050 if(!p) return; 2051 for(i=0; i<nsd->child_count; i++) { 2052 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2053 sizeof(stc_type)) 2054 return; 2055 } 2056 } 2057 #endif /* BIND8_STATS */ 2058 2059 /* 2060 * Reload the database, stop parent, re-fork children and continue. 2061 * as server_main. 2062 */ 2063 static void 2064 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2065 int cmdsocket) 2066 { 2067 pid_t mypid; 2068 sig_atomic_t cmd = NSD_QUIT_SYNC; 2069 int ret; 2070 udb_ptr last_task; 2071 struct sigaction old_sigchld, ign_sigchld; 2072 /* ignore SIGCHLD from the previous server_main that used this pid */ 2073 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2074 ign_sigchld.sa_handler = SIG_IGN; 2075 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2076 2077 /* see what tasks we got from xfrd */ 2078 task_remap(nsd->task[nsd->mytask]); 2079 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2080 udb_compact_inhibited(nsd->db->udb, 1); 2081 reload_process_tasks(nsd, &last_task, cmdsocket); 2082 udb_compact_inhibited(nsd->db->udb, 0); 2083 udb_compact(nsd->db->udb); 2084 2085 #ifndef NDEBUG 2086 if(nsd_debug_level >= 1) 2087 region_log_stats(nsd->db->region); 2088 #endif /* NDEBUG */ 2089 /* sync to disk (if needed) */ 2090 udb_base_sync(nsd->db->udb, 0); 2091 2092 initialize_dname_compression_tables(nsd); 2093 2094 #ifdef BIND8_STATS 2095 /* Restart dumping stats if required. */ 2096 time(&nsd->st.boot); 2097 set_bind8_alarm(nsd); 2098 #endif 2099 #ifdef USE_ZONE_STATS 2100 server_zonestat_realloc(nsd); /* realloc for new children */ 2101 server_zonestat_switch(nsd); 2102 #endif 2103 2104 /* listen for the signals of failed children again */ 2105 sigaction(SIGCHLD, &old_sigchld, NULL); 2106 /* Start new child processes */ 2107 if (server_start_children(nsd, server_region, netio, &nsd-> 2108 xfrd_listener->fd) != 0) { 2109 send_children_quit(nsd); 2110 exit(1); 2111 } 2112 2113 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2114 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2115 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2116 if(cmd == NSD_QUIT) { 2117 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2118 send_children_quit(nsd); 2119 exit(0); 2120 } 2121 } 2122 2123 /* Send quit command to parent: blocking, wait for receipt. */ 2124 do { 2125 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2126 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2127 { 2128 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2129 strerror(errno)); 2130 } 2131 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2132 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2133 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2134 RELOAD_SYNC_TIMEOUT); 2135 if(ret == -2) { 2136 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2137 } 2138 } while (ret == -2); 2139 if(ret == -1) { 2140 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2141 strerror(errno)); 2142 } 2143 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2144 if(cmd == NSD_QUIT) { 2145 /* small race condition possible here, parent got quit cmd. */ 2146 send_children_quit(nsd); 2147 exit(1); 2148 } 2149 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2150 #ifdef BIND8_STATS 2151 reload_do_stats(cmdsocket, nsd, &last_task); 2152 #endif 2153 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2154 task_process_sync(nsd->task[nsd->mytask]); 2155 #ifdef USE_ZONE_STATS 2156 server_zonestat_realloc(nsd); /* realloc for next children */ 2157 #endif 2158 2159 /* send soainfo to the xfrd process, signal it that reload is done, 2160 * it picks up the taskudb */ 2161 cmd = NSD_RELOAD_DONE; 2162 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2163 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2164 strerror(errno)); 2165 } 2166 mypid = getpid(); 2167 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2168 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2169 strerror(errno)); 2170 } 2171 2172 /* try to reopen file */ 2173 if (nsd->file_rotation_ok) 2174 log_reopen(nsd->log_filename, 1); 2175 /* exit reload, continue as new server_main */ 2176 } 2177 2178 /* 2179 * Get the mode depending on the signal hints that have been received. 2180 * Multiple signal hints can be received and will be handled in turn. 2181 */ 2182 static sig_atomic_t 2183 server_signal_mode(struct nsd *nsd) 2184 { 2185 if(nsd->signal_hint_quit) { 2186 nsd->signal_hint_quit = 0; 2187 return NSD_QUIT; 2188 } 2189 else if(nsd->signal_hint_shutdown) { 2190 nsd->signal_hint_shutdown = 0; 2191 return NSD_SHUTDOWN; 2192 } 2193 else if(nsd->signal_hint_child) { 2194 nsd->signal_hint_child = 0; 2195 return NSD_REAP_CHILDREN; 2196 } 2197 else if(nsd->signal_hint_reload) { 2198 nsd->signal_hint_reload = 0; 2199 return NSD_RELOAD; 2200 } 2201 else if(nsd->signal_hint_reload_hup) { 2202 nsd->signal_hint_reload_hup = 0; 2203 return NSD_RELOAD_REQ; 2204 } 2205 else if(nsd->signal_hint_stats) { 2206 nsd->signal_hint_stats = 0; 2207 #ifdef BIND8_STATS 2208 set_bind8_alarm(nsd); 2209 #endif 2210 return NSD_STATS; 2211 } 2212 else if(nsd->signal_hint_statsusr) { 2213 nsd->signal_hint_statsusr = 0; 2214 return NSD_STATS; 2215 } 2216 return NSD_RUN; 2217 } 2218 2219 /* 2220 * The main server simply waits for signals and child processes to 2221 * terminate. Child processes are restarted as necessary. 2222 */ 2223 void 2224 server_main(struct nsd *nsd) 2225 { 2226 region_type *server_region = region_create(xalloc, free); 2227 netio_type *netio = netio_create(server_region); 2228 netio_handler_type reload_listener; 2229 int reload_sockets[2] = {-1, -1}; 2230 struct timespec timeout_spec; 2231 int status; 2232 pid_t child_pid; 2233 pid_t reload_pid = -1; 2234 sig_atomic_t mode; 2235 2236 /* Ensure we are the main process */ 2237 assert(nsd->server_kind == NSD_SERVER_MAIN); 2238 2239 /* Add listener for the XFRD process */ 2240 netio_add_handler(netio, nsd->xfrd_listener); 2241 2242 /* Start the child processes that handle incoming queries */ 2243 if (server_start_children(nsd, server_region, netio, 2244 &nsd->xfrd_listener->fd) != 0) { 2245 send_children_quit(nsd); 2246 exit(1); 2247 } 2248 reload_listener.fd = -1; 2249 2250 /* This_child MUST be 0, because this is the parent process */ 2251 assert(nsd->this_child == 0); 2252 2253 /* Run the server until we get a shutdown signal */ 2254 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2255 /* Did we receive a signal that changes our mode? */ 2256 if(mode == NSD_RUN) { 2257 nsd->mode = mode = server_signal_mode(nsd); 2258 } 2259 2260 switch (mode) { 2261 case NSD_RUN: 2262 /* see if any child processes terminated */ 2263 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2264 int is_child = delete_child_pid(nsd, child_pid); 2265 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2266 if(nsd->children[is_child].child_fd == -1) 2267 nsd->children[is_child].has_exited = 1; 2268 parent_check_all_children_exited(nsd); 2269 } else if(is_child != -1) { 2270 log_msg(LOG_WARNING, 2271 "server %d died unexpectedly with status %d, restarting", 2272 (int) child_pid, status); 2273 restart_child_servers(nsd, server_region, netio, 2274 &nsd->xfrd_listener->fd); 2275 } else if (child_pid == reload_pid) { 2276 sig_atomic_t cmd = NSD_RELOAD_DONE; 2277 pid_t mypid; 2278 log_msg(LOG_WARNING, 2279 "Reload process %d failed with status %d, continuing with old database", 2280 (int) child_pid, status); 2281 reload_pid = -1; 2282 if(reload_listener.fd != -1) close(reload_listener.fd); 2283 reload_listener.fd = -1; 2284 reload_listener.event_types = NETIO_EVENT_NONE; 2285 task_process_sync(nsd->task[nsd->mytask]); 2286 /* inform xfrd reload attempt ended */ 2287 if(!write_socket(nsd->xfrd_listener->fd, 2288 &cmd, sizeof(cmd))) { 2289 log_msg(LOG_ERR, "problems " 2290 "sending SOAEND to xfrd: %s", 2291 strerror(errno)); 2292 } 2293 mypid = getpid(); 2294 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2295 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2296 strerror(errno)); 2297 } 2298 } else if(status != 0) { 2299 /* check for status, because we get 2300 * the old-servermain because reload 2301 * is the process-parent of old-main, 2302 * and we get older server-processes 2303 * that are exiting after a reload */ 2304 log_msg(LOG_WARNING, 2305 "process %d terminated with status %d", 2306 (int) child_pid, status); 2307 } 2308 } 2309 if (child_pid == -1) { 2310 if (errno == EINTR) { 2311 continue; 2312 } 2313 if (errno != ECHILD) 2314 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2315 } 2316 if (nsd->mode != NSD_RUN) 2317 break; 2318 2319 /* timeout to collect processes. In case no sigchild happens. */ 2320 timeout_spec.tv_sec = 60; 2321 timeout_spec.tv_nsec = 0; 2322 2323 /* listen on ports, timeout for collecting terminated children */ 2324 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2325 if (errno != EINTR) { 2326 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2327 } 2328 } 2329 if(nsd->restart_children) { 2330 restart_child_servers(nsd, server_region, netio, 2331 &nsd->xfrd_listener->fd); 2332 nsd->restart_children = 0; 2333 } 2334 if(nsd->reload_failed) { 2335 sig_atomic_t cmd = NSD_RELOAD_DONE; 2336 pid_t mypid; 2337 nsd->reload_failed = 0; 2338 log_msg(LOG_WARNING, 2339 "Reload process %d failed, continuing with old database", 2340 (int) reload_pid); 2341 reload_pid = -1; 2342 if(reload_listener.fd != -1) close(reload_listener.fd); 2343 reload_listener.fd = -1; 2344 reload_listener.event_types = NETIO_EVENT_NONE; 2345 task_process_sync(nsd->task[nsd->mytask]); 2346 /* inform xfrd reload attempt ended */ 2347 if(!write_socket(nsd->xfrd_listener->fd, 2348 &cmd, sizeof(cmd))) { 2349 log_msg(LOG_ERR, "problems " 2350 "sending SOAEND to xfrd: %s", 2351 strerror(errno)); 2352 } 2353 mypid = getpid(); 2354 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2355 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2356 strerror(errno)); 2357 } 2358 } 2359 2360 break; 2361 case NSD_RELOAD_REQ: { 2362 sig_atomic_t cmd = NSD_RELOAD_REQ; 2363 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2364 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2365 "main: ipc send reload_req to xfrd")); 2366 if(!write_socket(nsd->xfrd_listener->fd, 2367 &cmd, sizeof(cmd))) { 2368 log_msg(LOG_ERR, "server_main: could not send " 2369 "reload_req to xfrd: %s", strerror(errno)); 2370 } 2371 nsd->mode = NSD_RUN; 2372 } break; 2373 case NSD_RELOAD: 2374 /* Continue to run nsd after reload */ 2375 nsd->mode = NSD_RUN; 2376 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2377 if (reload_pid != -1) { 2378 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2379 (int) reload_pid); 2380 break; 2381 } 2382 2383 /* switch the mytask to keep track of who owns task*/ 2384 nsd->mytask = 1 - nsd->mytask; 2385 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2386 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2387 reload_pid = -1; 2388 break; 2389 } 2390 2391 /* Do actual reload */ 2392 reload_pid = fork(); 2393 switch (reload_pid) { 2394 case -1: 2395 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2396 break; 2397 default: 2398 /* PARENT */ 2399 close(reload_sockets[0]); 2400 server_reload(nsd, server_region, netio, 2401 reload_sockets[1]); 2402 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2403 close(reload_sockets[1]); 2404 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2405 /* drop stale xfrd ipc data */ 2406 ((struct ipc_handler_conn_data*)nsd-> 2407 xfrd_listener->user_data) 2408 ->conn->is_reading = 0; 2409 reload_pid = -1; 2410 reload_listener.fd = -1; 2411 reload_listener.event_types = NETIO_EVENT_NONE; 2412 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2413 break; 2414 case 0: 2415 /* CHILD */ 2416 /* server_main keep running until NSD_QUIT_SYNC 2417 * received from reload. */ 2418 close(reload_sockets[1]); 2419 reload_listener.fd = reload_sockets[0]; 2420 reload_listener.timeout = NULL; 2421 reload_listener.user_data = nsd; 2422 reload_listener.event_types = NETIO_EVENT_READ; 2423 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2424 netio_add_handler(netio, &reload_listener); 2425 reload_pid = getppid(); 2426 break; 2427 } 2428 break; 2429 case NSD_QUIT_SYNC: 2430 /* synchronisation of xfrd, parent and reload */ 2431 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2432 sig_atomic_t cmd = NSD_RELOAD; 2433 /* stop xfrd ipc writes in progress */ 2434 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2435 "main: ipc send indication reload")); 2436 if(!write_socket(nsd->xfrd_listener->fd, 2437 &cmd, sizeof(cmd))) { 2438 log_msg(LOG_ERR, "server_main: could not send reload " 2439 "indication to xfrd: %s", strerror(errno)); 2440 } 2441 /* wait for ACK from xfrd */ 2442 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2443 nsd->quit_sync_done = 1; 2444 } 2445 nsd->mode = NSD_RUN; 2446 break; 2447 case NSD_QUIT: 2448 /* silent shutdown during reload */ 2449 if(reload_listener.fd != -1) { 2450 /* acknowledge the quit, to sync reload that we will really quit now */ 2451 sig_atomic_t cmd = NSD_RELOAD; 2452 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2453 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2454 log_msg(LOG_ERR, "server_main: " 2455 "could not ack quit: %s", strerror(errno)); 2456 } 2457 #ifdef BIND8_STATS 2458 parent_send_stats(nsd, reload_listener.fd); 2459 #endif /* BIND8_STATS */ 2460 close(reload_listener.fd); 2461 } 2462 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2463 /* only quit children after xfrd has acked */ 2464 send_children_quit(nsd); 2465 2466 #ifdef MEMCLEAN /* OS collects memory pages */ 2467 region_destroy(server_region); 2468 #endif 2469 server_shutdown(nsd); 2470 2471 /* ENOTREACH */ 2472 break; 2473 case NSD_SHUTDOWN: 2474 break; 2475 case NSD_REAP_CHILDREN: 2476 /* continue; wait for child in run loop */ 2477 nsd->mode = NSD_RUN; 2478 break; 2479 case NSD_STATS: 2480 #ifdef BIND8_STATS 2481 set_children_stats(nsd); 2482 #endif 2483 nsd->mode = NSD_RUN; 2484 break; 2485 default: 2486 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2487 nsd->mode = NSD_RUN; 2488 break; 2489 } 2490 } 2491 log_msg(LOG_WARNING, "signal received, shutting down..."); 2492 2493 /* close opened ports to avoid race with restart of nsd */ 2494 server_close_all_sockets(nsd->udp, nsd->ifs); 2495 server_close_all_sockets(nsd->tcp, nsd->ifs); 2496 #ifdef HAVE_SSL 2497 daemon_remote_close(nsd->rc); 2498 #endif 2499 send_children_quit_and_wait(nsd); 2500 2501 /* Unlink it if possible... */ 2502 unlinkpid(nsd->pidfile); 2503 unlink(nsd->task[0]->fname); 2504 unlink(nsd->task[1]->fname); 2505 #ifdef USE_ZONE_STATS 2506 unlink(nsd->zonestatfname[0]); 2507 unlink(nsd->zonestatfname[1]); 2508 #endif 2509 #ifdef USE_DNSTAP 2510 dt_collector_close(nsd->dt_collector, nsd); 2511 #endif 2512 2513 if(reload_listener.fd != -1) { 2514 sig_atomic_t cmd = NSD_QUIT; 2515 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2516 "main: ipc send quit to reload-process")); 2517 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2518 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2519 strerror(errno)); 2520 } 2521 fsync(reload_listener.fd); 2522 close(reload_listener.fd); 2523 /* wait for reload to finish processing */ 2524 while(1) { 2525 if(waitpid(reload_pid, NULL, 0) == -1) { 2526 if(errno == EINTR) continue; 2527 if(errno == ECHILD) break; 2528 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2529 (int)reload_pid, strerror(errno)); 2530 } 2531 break; 2532 } 2533 } 2534 if(nsd->xfrd_listener->fd != -1) { 2535 /* complete quit, stop xfrd */ 2536 sig_atomic_t cmd = NSD_QUIT; 2537 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2538 "main: ipc send quit to xfrd")); 2539 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2540 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2541 strerror(errno)); 2542 } 2543 fsync(nsd->xfrd_listener->fd); 2544 close(nsd->xfrd_listener->fd); 2545 (void)kill(nsd->pid, SIGTERM); 2546 } 2547 2548 #ifdef MEMCLEAN /* OS collects memory pages */ 2549 region_destroy(server_region); 2550 #endif 2551 /* write the nsd.db to disk, wait for it to complete */ 2552 udb_base_sync(nsd->db->udb, 1); 2553 udb_base_close(nsd->db->udb); 2554 server_shutdown(nsd); 2555 } 2556 2557 static query_state_type 2558 server_process_query(struct nsd *nsd, struct query *query) 2559 { 2560 return query_process(query, nsd); 2561 } 2562 2563 static query_state_type 2564 server_process_query_udp(struct nsd *nsd, struct query *query) 2565 { 2566 #ifdef RATELIMIT 2567 if(query_process(query, nsd) != QUERY_DISCARDED) { 2568 if(rrl_process_query(query)) 2569 return rrl_slip(query); 2570 else return QUERY_PROCESSED; 2571 } 2572 return QUERY_DISCARDED; 2573 #else 2574 return query_process(query, nsd); 2575 #endif 2576 } 2577 2578 struct event_base* 2579 nsd_child_event_base(void) 2580 { 2581 struct event_base* base; 2582 #ifdef USE_MINI_EVENT 2583 static time_t secs; 2584 static struct timeval now; 2585 base = event_init(&secs, &now); 2586 #else 2587 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2588 /* libev */ 2589 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2590 # else 2591 /* libevent */ 2592 # ifdef HAVE_EVENT_BASE_NEW 2593 base = event_base_new(); 2594 # else 2595 base = event_init(); 2596 # endif 2597 # endif 2598 #endif 2599 return base; 2600 } 2601 2602 static void 2603 add_udp_handler( 2604 struct nsd *nsd, 2605 struct nsd_socket *sock, 2606 struct udp_handler_data *data) 2607 { 2608 struct event *handler = &data->event; 2609 2610 data->nsd = nsd; 2611 data->socket = sock; 2612 2613 memset(handler, 0, sizeof(*handler)); 2614 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2615 if(event_base_set(nsd->event_base, handler) != 0) 2616 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2617 if(event_add(handler, NULL) != 0) 2618 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2619 } 2620 2621 void 2622 add_tcp_handler( 2623 struct nsd *nsd, 2624 struct nsd_socket *sock, 2625 struct tcp_accept_handler_data *data) 2626 { 2627 struct event *handler = &data->event; 2628 2629 data->nsd = nsd; 2630 data->socket = sock; 2631 2632 #ifdef HAVE_SSL 2633 if (nsd->tls_ctx && 2634 nsd->options->tls_port && 2635 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 2636 { 2637 data->tls_accept = 1; 2638 if(verbosity >= 2) { 2639 char buf[48]; 2640 addrport2str((struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 2641 VERBOSITY(2, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 2642 } 2643 } else { 2644 data->tls_accept = 0; 2645 } 2646 #endif 2647 2648 memset(handler, 0, sizeof(*handler)); 2649 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 2650 if(event_base_set(nsd->event_base, handler) != 0) 2651 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 2652 if(event_add(handler, NULL) != 0) 2653 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 2654 data->event_added = 1; 2655 } 2656 2657 /* 2658 * Serve DNS requests. 2659 */ 2660 void 2661 server_child(struct nsd *nsd) 2662 { 2663 size_t i, from, numifs; 2664 region_type *server_region = region_create(xalloc, free); 2665 struct event_base* event_base = nsd_child_event_base(); 2666 sig_atomic_t mode; 2667 2668 if(!event_base) { 2669 log_msg(LOG_ERR, "nsd server could not create event base"); 2670 exit(1); 2671 } 2672 nsd->event_base = event_base; 2673 nsd->server_region = server_region; 2674 2675 #ifdef RATELIMIT 2676 rrl_init(nsd->this_child->child_num); 2677 #endif 2678 2679 assert(nsd->server_kind != NSD_SERVER_MAIN); 2680 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 2681 2682 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 2683 server_close_all_sockets(nsd->tcp, nsd->ifs); 2684 } 2685 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 2686 server_close_all_sockets(nsd->udp, nsd->ifs); 2687 } 2688 2689 if (nsd->this_child->parent_fd != -1) { 2690 struct event *handler; 2691 struct ipc_handler_conn_data* user_data = 2692 (struct ipc_handler_conn_data*)region_alloc( 2693 server_region, sizeof(struct ipc_handler_conn_data)); 2694 user_data->nsd = nsd; 2695 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 2696 2697 handler = (struct event*) region_alloc( 2698 server_region, sizeof(*handler)); 2699 memset(handler, 0, sizeof(*handler)); 2700 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 2701 EV_READ, child_handle_parent_command, user_data); 2702 if(event_base_set(event_base, handler) != 0) 2703 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 2704 if(event_add(handler, NULL) != 0) 2705 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 2706 } 2707 2708 if(nsd->reuseport) { 2709 numifs = nsd->ifs / nsd->reuseport; 2710 from = numifs * nsd->this_child->child_num; 2711 if(from+numifs > nsd->ifs) { /* should not happen */ 2712 from = 0; 2713 numifs = nsd->ifs; 2714 } 2715 } else { 2716 from = 0; 2717 numifs = nsd->ifs; 2718 } 2719 2720 if (nsd->server_kind & NSD_SERVER_UDP) { 2721 memset(msgs, 0, sizeof(msgs)); 2722 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 2723 queries[i] = query_create(server_region, 2724 compressed_dname_offsets, 2725 compression_table_size, compressed_dnames); 2726 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2727 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 2728 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; 2729 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2730 msgs[i].msg_hdr.msg_iovlen = 1; 2731 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 2732 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2733 } 2734 2735 for (i = from; i < from+numifs; ++i) { 2736 struct udp_handler_data *data = region_alloc_zero( 2737 nsd->server_region, sizeof(*data)); 2738 add_udp_handler(nsd, &nsd->udp[i], data); 2739 } 2740 } 2741 2742 /* 2743 * Keep track of all the TCP accept handlers so we can enable 2744 * and disable them based on the current number of active TCP 2745 * connections. 2746 */ 2747 if (nsd->server_kind & NSD_SERVER_TCP) { 2748 tcp_accept_handler_count = numifs; 2749 tcp_accept_handlers = region_alloc_array(server_region, 2750 numifs, sizeof(*tcp_accept_handlers)); 2751 2752 for (i = from; i < numifs; i++) { 2753 struct tcp_accept_handler_data *data = 2754 &tcp_accept_handlers[i-from]; 2755 memset(data, 0, sizeof(*data)); 2756 add_tcp_handler(nsd, &nsd->tcp[i], data); 2757 } 2758 } else { 2759 tcp_accept_handler_count = 0; 2760 } 2761 2762 /* The main loop... */ 2763 while ((mode = nsd->mode) != NSD_QUIT) { 2764 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 2765 2766 /* Do we need to do the statistics... */ 2767 if (mode == NSD_STATS) { 2768 #ifdef BIND8_STATS 2769 int p = nsd->st.period; 2770 nsd->st.period = 1; /* force stats printout */ 2771 /* Dump the statistics */ 2772 bind8_stats(nsd); 2773 nsd->st.period = p; 2774 #else /* !BIND8_STATS */ 2775 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 2776 #endif /* BIND8_STATS */ 2777 2778 nsd->mode = NSD_RUN; 2779 } 2780 else if (mode == NSD_REAP_CHILDREN) { 2781 /* got signal, notify parent. parent reaps terminated children. */ 2782 if (nsd->this_child->parent_fd != -1) { 2783 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 2784 if (write(nsd->this_child->parent_fd, 2785 &parent_notify, 2786 sizeof(parent_notify)) == -1) 2787 { 2788 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 2789 (int) nsd->this_child->pid, strerror(errno)); 2790 } 2791 } else /* no parent, so reap 'em */ 2792 while (waitpid(-1, NULL, WNOHANG) > 0) ; 2793 nsd->mode = NSD_RUN; 2794 } 2795 else if(mode == NSD_RUN) { 2796 /* Wait for a query... */ 2797 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2798 if (errno != EINTR) { 2799 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2800 break; 2801 } 2802 } 2803 } else if(mode == NSD_QUIT) { 2804 /* ignore here, quit */ 2805 } else { 2806 log_msg(LOG_ERR, "mode bad value %d, back to service.", 2807 (int)mode); 2808 nsd->mode = NSD_RUN; 2809 } 2810 } 2811 2812 service_remaining_tcp(nsd); 2813 #ifdef BIND8_STATS 2814 bind8_stats(nsd); 2815 #endif /* BIND8_STATS */ 2816 2817 #ifdef MEMCLEAN /* OS collects memory pages */ 2818 #ifdef RATELIMIT 2819 rrl_deinit(nsd->this_child->child_num); 2820 #endif 2821 event_base_free(event_base); 2822 region_destroy(server_region); 2823 #endif 2824 server_shutdown(nsd); 2825 } 2826 2827 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 2828 { 2829 int* timed_out = (int*)arg; 2830 assert(event & EV_TIMEOUT); 2831 /* wake up the service tcp thread, note event is no longer 2832 * registered */ 2833 *timed_out = 1; 2834 } 2835 2836 void 2837 service_remaining_tcp(struct nsd* nsd) 2838 { 2839 struct tcp_handler_data* p; 2840 struct event_base* event_base; 2841 /* check if it is needed */ 2842 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 2843 return; 2844 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 2845 2846 /* setup event base */ 2847 event_base = nsd_child_event_base(); 2848 if(!event_base) { 2849 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 2850 return; 2851 } 2852 /* register tcp connections */ 2853 for(p = tcp_active_list; p != NULL; p = p->next) { 2854 struct timeval timeout; 2855 int fd = p->event.ev_fd; 2856 #ifdef USE_MINI_EVENT 2857 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 2858 #else 2859 short event = p->event.ev_events & (EV_READ|EV_WRITE); 2860 #endif 2861 void (*fn)(int, short, void*); 2862 #ifdef HAVE_SSL 2863 if(p->tls) { 2864 if((event&EV_READ)) 2865 fn = handle_tls_reading; 2866 else fn = handle_tls_writing; 2867 } else { 2868 #endif 2869 if((event&EV_READ)) 2870 fn = handle_tcp_reading; 2871 else fn = handle_tcp_writing; 2872 #ifdef HAVE_SSL 2873 } 2874 #endif 2875 2876 /* set timeout to 1/10 second */ 2877 if(p->tcp_timeout > 100) 2878 p->tcp_timeout = 100; 2879 timeout.tv_sec = p->tcp_timeout / 1000; 2880 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 2881 event_del(&p->event); 2882 memset(&p->event, 0, sizeof(p->event)); 2883 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 2884 fn, p); 2885 if(event_base_set(event_base, &p->event) != 0) 2886 log_msg(LOG_ERR, "event base set failed"); 2887 if(event_add(&p->event, &timeout) != 0) 2888 log_msg(LOG_ERR, "event add failed"); 2889 } 2890 2891 /* handle it */ 2892 while(nsd->current_tcp_count > 0) { 2893 mode_t m = server_signal_mode(nsd); 2894 struct event timeout; 2895 struct timeval tv; 2896 int timed_out = 0; 2897 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 2898 m == NSD_REAP_CHILDREN) { 2899 /* quit */ 2900 break; 2901 } 2902 /* timer */ 2903 /* have to do something every second */ 2904 tv.tv_sec = 1; 2905 tv.tv_usec = 0; 2906 memset(&timeout, 0, sizeof(timeout)); 2907 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 2908 &timed_out); 2909 if(event_base_set(event_base, &timeout) != 0) 2910 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 2911 if(event_add(&timeout, &tv) != 0) 2912 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 2913 2914 /* service loop */ 2915 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2916 if (errno != EINTR) { 2917 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2918 break; 2919 } 2920 } 2921 if(!timed_out) { 2922 event_del(&timeout); 2923 } else { 2924 /* timed out, quit */ 2925 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 2926 break; 2927 } 2928 } 2929 #ifdef MEMCLEAN 2930 event_base_free(event_base); 2931 #endif 2932 /* continue to quit after return */ 2933 } 2934 2935 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 2936 * are always used, even if nonblocking operations are broken, in which case 2937 * NUM_RECV_PER_SELECT is defined to 1 (one). 2938 */ 2939 #if defined(HAVE_RECVMMSG) 2940 #define nsd_recvmmsg recvmmsg 2941 #else /* !HAVE_RECVMMSG */ 2942 2943 static int 2944 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 2945 int flags, struct timespec *timeout) 2946 { 2947 int orig_errno; 2948 unsigned int vpos = 0; 2949 ssize_t rcvd; 2950 2951 /* timeout is ignored, ensure caller does not expect it to work */ 2952 assert(timeout == NULL); 2953 2954 orig_errno = errno; 2955 errno = 0; 2956 while(vpos < vlen) { 2957 rcvd = recvfrom(sockfd, 2958 msgvec[vpos].msg_hdr.msg_iov->iov_base, 2959 msgvec[vpos].msg_hdr.msg_iov->iov_len, 2960 flags, 2961 msgvec[vpos].msg_hdr.msg_name, 2962 &msgvec[vpos].msg_hdr.msg_namelen); 2963 if(rcvd < 0) { 2964 break; 2965 } else { 2966 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 2967 msgvec[vpos].msg_len = (unsigned int)rcvd; 2968 vpos++; 2969 } 2970 } 2971 2972 if(vpos) { 2973 /* error will be picked up next time */ 2974 return (int)vpos; 2975 } else if(errno == 0) { 2976 errno = orig_errno; 2977 return 0; 2978 } else if(errno == EAGAIN) { 2979 return 0; 2980 } 2981 2982 return -1; 2983 } 2984 #endif /* HAVE_RECVMMSG */ 2985 2986 #ifdef HAVE_SENDMMSG 2987 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 2988 #else /* !HAVE_SENDMMSG */ 2989 2990 static int 2991 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 2992 { 2993 int orig_errno; 2994 unsigned int vpos = 0; 2995 ssize_t snd; 2996 2997 orig_errno = errno; 2998 errno = 0; 2999 while(vpos < vlen) { 3000 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 3001 snd = sendto(sockfd, 3002 msgvec[vpos].msg_hdr.msg_iov->iov_base, 3003 msgvec[vpos].msg_hdr.msg_iov->iov_len, 3004 flags, 3005 msgvec[vpos].msg_hdr.msg_name, 3006 msgvec[vpos].msg_hdr.msg_namelen); 3007 if(snd < 0) { 3008 break; 3009 } else { 3010 msgvec[vpos].msg_len = (unsigned int)snd; 3011 vpos++; 3012 } 3013 } 3014 3015 if(vpos) { 3016 return (int)vpos; 3017 } else if(errno == 0) { 3018 errno = orig_errno; 3019 return 0; 3020 } 3021 3022 return -1; 3023 } 3024 #endif /* HAVE_SENDMMSG */ 3025 3026 static void 3027 handle_udp(int fd, short event, void* arg) 3028 { 3029 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3030 int received, sent, recvcount, i; 3031 struct query *q; 3032 3033 if (!(event & EV_READ)) { 3034 return; 3035 } 3036 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3037 /* this printf strangely gave a performance increase on Linux */ 3038 /* printf("recvcount %d \n", recvcount); */ 3039 if (recvcount == -1) { 3040 if (errno != EAGAIN && errno != EINTR) { 3041 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3042 STATUP(data->nsd, rxerr); 3043 /* No zone statup */ 3044 } 3045 /* Simply no data available */ 3046 return; 3047 } 3048 for (i = 0; i < recvcount; i++) { 3049 loopstart: 3050 received = msgs[i].msg_len; 3051 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3052 q = queries[i]; 3053 if (received == -1) { 3054 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3055 #if defined(HAVE_RECVMMSG) 3056 msgs[i].msg_hdr.msg_flags 3057 #else 3058 errno 3059 #endif 3060 )); 3061 STATUP(data->nsd, rxerr); 3062 /* No zone statup */ 3063 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3064 iovecs[i].iov_len = buffer_remaining(q->packet); 3065 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3066 goto swap_drop; 3067 } 3068 3069 /* Account... */ 3070 #ifdef BIND8_STATS 3071 if (data->socket->addr.ai_family == AF_INET) { 3072 STATUP(data->nsd, qudp); 3073 } else if (data->socket->addr.ai_family == AF_INET6) { 3074 STATUP(data->nsd, qudp6); 3075 } 3076 #endif 3077 3078 buffer_skip(q->packet, received); 3079 buffer_flip(q->packet); 3080 #ifdef USE_DNSTAP 3081 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 3082 q->tcp, q->packet); 3083 #endif /* USE_DNSTAP */ 3084 3085 /* Process and answer the query... */ 3086 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 3087 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3088 STATUP(data->nsd, nona); 3089 ZTATUP(data->nsd, q->zone, nona); 3090 } 3091 3092 #ifdef USE_ZONE_STATS 3093 if (data->socket->addr.ai_family == AF_INET) { 3094 ZTATUP(data->nsd, q->zone, qudp); 3095 } else if (data->socket->addr.ai_family == AF_INET6) { 3096 ZTATUP(data->nsd, q->zone, qudp6); 3097 } 3098 #endif 3099 3100 /* Add EDNS0 and TSIG info if necessary. */ 3101 query_add_optional(q, data->nsd); 3102 3103 buffer_flip(q->packet); 3104 iovecs[i].iov_len = buffer_remaining(q->packet); 3105 #ifdef BIND8_STATS 3106 /* Account the rcode & TC... */ 3107 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3108 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3109 if (TC(q->packet)) { 3110 STATUP(data->nsd, truncated); 3111 ZTATUP(data->nsd, q->zone, truncated); 3112 } 3113 #endif /* BIND8_STATS */ 3114 #ifdef USE_DNSTAP 3115 dt_collector_submit_auth_response(data->nsd, 3116 &q->addr, q->addrlen, q->tcp, q->packet, 3117 q->zone); 3118 #endif /* USE_DNSTAP */ 3119 } else { 3120 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3121 iovecs[i].iov_len = buffer_remaining(q->packet); 3122 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3123 swap_drop: 3124 STATUP(data->nsd, dropped); 3125 ZTATUP(data->nsd, q->zone, dropped); 3126 if(i != recvcount-1) { 3127 /* swap with last and decrease recvcount */ 3128 struct mmsghdr mtmp = msgs[i]; 3129 struct iovec iotmp = iovecs[i]; 3130 recvcount--; 3131 msgs[i] = msgs[recvcount]; 3132 iovecs[i] = iovecs[recvcount]; 3133 queries[i] = queries[recvcount]; 3134 msgs[recvcount] = mtmp; 3135 iovecs[recvcount] = iotmp; 3136 queries[recvcount] = q; 3137 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3138 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3139 goto loopstart; 3140 } else { recvcount --; } 3141 } 3142 } 3143 3144 /* send until all are sent */ 3145 i = 0; 3146 while(i<recvcount) { 3147 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3148 if(sent == -1) { 3149 /* don't log transient network full errors, unless 3150 * on higher verbosity */ 3151 if(!(errno == ENOBUFS && verbosity < 1) && 3152 #ifdef EWOULDBLOCK 3153 !(errno == EWOULDBLOCK && verbosity < 1) && 3154 #endif 3155 !(errno == EAGAIN && verbosity < 1)) { 3156 const char* es = strerror(errno); 3157 char a[48]; 3158 addr2str(&queries[i]->addr, a, sizeof(a)); 3159 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3160 } 3161 #ifdef BIND8_STATS 3162 data->nsd->st.txerr += recvcount-i; 3163 #endif /* BIND8_STATS */ 3164 break; 3165 } 3166 i += sent; 3167 } 3168 for(i=0; i<recvcount; i++) { 3169 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3170 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3171 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3172 } 3173 } 3174 3175 #ifdef HAVE_SSL 3176 /* 3177 * Setup an event for the tcp handler. 3178 */ 3179 static void 3180 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3181 int fd, short event) 3182 { 3183 struct timeval timeout; 3184 struct event_base* ev_base; 3185 3186 timeout.tv_sec = data->nsd->tcp_timeout; 3187 timeout.tv_usec = 0L; 3188 3189 ev_base = data->event.ev_base; 3190 event_del(&data->event); 3191 memset(&data->event, 0, sizeof(data->event)); 3192 event_set(&data->event, fd, event, fn, data); 3193 if(event_base_set(ev_base, &data->event) != 0) 3194 log_msg(LOG_ERR, "event base set failed"); 3195 if(event_add(&data->event, &timeout) != 0) 3196 log_msg(LOG_ERR, "event add failed"); 3197 } 3198 #endif /* HAVE_SSL */ 3199 3200 static void 3201 cleanup_tcp_handler(struct tcp_handler_data* data) 3202 { 3203 event_del(&data->event); 3204 #ifdef HAVE_SSL 3205 if(data->tls) { 3206 SSL_shutdown(data->tls); 3207 SSL_free(data->tls); 3208 data->tls = NULL; 3209 } 3210 #endif 3211 close(data->event.ev_fd); 3212 if(data->prev) 3213 data->prev->next = data->next; 3214 else tcp_active_list = data->next; 3215 if(data->next) 3216 data->next->prev = data->prev; 3217 3218 /* 3219 * Enable the TCP accept handlers when the current number of 3220 * TCP connections is about to drop below the maximum number 3221 * of TCP connections. 3222 */ 3223 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3224 configure_handler_event_types(EV_READ|EV_PERSIST); 3225 if(slowaccept) { 3226 event_del(&slowaccept_event); 3227 slowaccept = 0; 3228 } 3229 } 3230 --data->nsd->current_tcp_count; 3231 assert(data->nsd->current_tcp_count >= 0); 3232 3233 region_destroy(data->region); 3234 } 3235 3236 static void 3237 handle_tcp_reading(int fd, short event, void* arg) 3238 { 3239 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3240 ssize_t received; 3241 struct event_base* ev_base; 3242 struct timeval timeout; 3243 3244 if ((event & EV_TIMEOUT)) { 3245 /* Connection timed out. */ 3246 cleanup_tcp_handler(data); 3247 return; 3248 } 3249 3250 if (data->nsd->tcp_query_count > 0 && 3251 data->query_count >= data->nsd->tcp_query_count) { 3252 /* No more queries allowed on this tcp connection. */ 3253 cleanup_tcp_handler(data); 3254 return; 3255 } 3256 3257 assert((event & EV_READ)); 3258 3259 if (data->bytes_transmitted == 0) { 3260 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3261 } 3262 3263 /* 3264 * Check if we received the leading packet length bytes yet. 3265 */ 3266 if (data->bytes_transmitted < sizeof(uint16_t)) { 3267 received = read(fd, 3268 (char *) &data->query->tcplen 3269 + data->bytes_transmitted, 3270 sizeof(uint16_t) - data->bytes_transmitted); 3271 if (received == -1) { 3272 if (errno == EAGAIN || errno == EINTR) { 3273 /* 3274 * Read would block, wait until more 3275 * data is available. 3276 */ 3277 return; 3278 } else { 3279 char buf[48]; 3280 addr2str(&data->query->addr, buf, sizeof(buf)); 3281 #ifdef ECONNRESET 3282 if (verbosity >= 2 || errno != ECONNRESET) 3283 #endif /* ECONNRESET */ 3284 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3285 cleanup_tcp_handler(data); 3286 return; 3287 } 3288 } else if (received == 0) { 3289 /* EOF */ 3290 cleanup_tcp_handler(data); 3291 return; 3292 } 3293 3294 data->bytes_transmitted += received; 3295 if (data->bytes_transmitted < sizeof(uint16_t)) { 3296 /* 3297 * Not done with the tcplen yet, wait for more 3298 * data to become available. 3299 */ 3300 return; 3301 } 3302 3303 assert(data->bytes_transmitted == sizeof(uint16_t)); 3304 3305 data->query->tcplen = ntohs(data->query->tcplen); 3306 3307 /* 3308 * Minimum query size is: 3309 * 3310 * Size of the header (12) 3311 * + Root domain name (1) 3312 * + Query class (2) 3313 * + Query type (2) 3314 */ 3315 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3316 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3317 cleanup_tcp_handler(data); 3318 return; 3319 } 3320 3321 if (data->query->tcplen > data->query->maxlen) { 3322 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3323 cleanup_tcp_handler(data); 3324 return; 3325 } 3326 3327 buffer_set_limit(data->query->packet, data->query->tcplen); 3328 } 3329 3330 assert(buffer_remaining(data->query->packet) > 0); 3331 3332 /* Read the (remaining) query data. */ 3333 received = read(fd, 3334 buffer_current(data->query->packet), 3335 buffer_remaining(data->query->packet)); 3336 if (received == -1) { 3337 if (errno == EAGAIN || errno == EINTR) { 3338 /* 3339 * Read would block, wait until more data is 3340 * available. 3341 */ 3342 return; 3343 } else { 3344 char buf[48]; 3345 addr2str(&data->query->addr, buf, sizeof(buf)); 3346 #ifdef ECONNRESET 3347 if (verbosity >= 2 || errno != ECONNRESET) 3348 #endif /* ECONNRESET */ 3349 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3350 cleanup_tcp_handler(data); 3351 return; 3352 } 3353 } else if (received == 0) { 3354 /* EOF */ 3355 cleanup_tcp_handler(data); 3356 return; 3357 } 3358 3359 data->bytes_transmitted += received; 3360 buffer_skip(data->query->packet, received); 3361 if (buffer_remaining(data->query->packet) > 0) { 3362 /* 3363 * Message not yet complete, wait for more data to 3364 * become available. 3365 */ 3366 return; 3367 } 3368 3369 assert(buffer_position(data->query->packet) == data->query->tcplen); 3370 3371 /* Account... */ 3372 #ifdef BIND8_STATS 3373 #ifndef INET6 3374 STATUP(data->nsd, ctcp); 3375 #else 3376 if (data->query->addr.ss_family == AF_INET) { 3377 STATUP(data->nsd, ctcp); 3378 } else if (data->query->addr.ss_family == AF_INET6) { 3379 STATUP(data->nsd, ctcp6); 3380 } 3381 #endif 3382 #endif /* BIND8_STATS */ 3383 3384 /* We have a complete query, process it. */ 3385 3386 /* tcp-query-count: handle query counter ++ */ 3387 data->query_count++; 3388 3389 buffer_flip(data->query->packet); 3390 #ifdef USE_DNSTAP 3391 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 3392 data->query->addrlen, data->query->tcp, data->query->packet); 3393 #endif /* USE_DNSTAP */ 3394 data->query_state = server_process_query(data->nsd, data->query); 3395 if (data->query_state == QUERY_DISCARDED) { 3396 /* Drop the packet and the entire connection... */ 3397 STATUP(data->nsd, dropped); 3398 ZTATUP(data->nsd, data->query->zone, dropped); 3399 cleanup_tcp_handler(data); 3400 return; 3401 } 3402 3403 #ifdef BIND8_STATS 3404 if (RCODE(data->query->packet) == RCODE_OK 3405 && !AA(data->query->packet)) 3406 { 3407 STATUP(data->nsd, nona); 3408 ZTATUP(data->nsd, data->query->zone, nona); 3409 } 3410 #endif /* BIND8_STATS */ 3411 3412 #ifdef USE_ZONE_STATS 3413 #ifndef INET6 3414 ZTATUP(data->nsd, data->query->zone, ctcp); 3415 #else 3416 if (data->query->addr.ss_family == AF_INET) { 3417 ZTATUP(data->nsd, data->query->zone, ctcp); 3418 } else if (data->query->addr.ss_family == AF_INET6) { 3419 ZTATUP(data->nsd, data->query->zone, ctcp6); 3420 } 3421 #endif 3422 #endif /* USE_ZONE_STATS */ 3423 3424 query_add_optional(data->query, data->nsd); 3425 3426 /* Switch to the tcp write handler. */ 3427 buffer_flip(data->query->packet); 3428 data->query->tcplen = buffer_remaining(data->query->packet); 3429 #ifdef BIND8_STATS 3430 /* Account the rcode & TC... */ 3431 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 3432 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 3433 if (TC(data->query->packet)) { 3434 STATUP(data->nsd, truncated); 3435 ZTATUP(data->nsd, data->query->zone, truncated); 3436 } 3437 #endif /* BIND8_STATS */ 3438 #ifdef USE_DNSTAP 3439 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 3440 data->query->addrlen, data->query->tcp, data->query->packet, 3441 data->query->zone); 3442 #endif /* USE_DNSTAP */ 3443 data->bytes_transmitted = 0; 3444 3445 timeout.tv_sec = data->tcp_timeout / 1000; 3446 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3447 3448 ev_base = data->event.ev_base; 3449 event_del(&data->event); 3450 memset(&data->event, 0, sizeof(data->event)); 3451 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 3452 handle_tcp_reading, data); 3453 if(event_base_set(ev_base, &data->event) != 0) 3454 log_msg(LOG_ERR, "event base set tcpr failed"); 3455 if(event_add(&data->event, &timeout) != 0) 3456 log_msg(LOG_ERR, "event add tcpr failed"); 3457 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 3458 handle_tcp_writing(fd, EV_WRITE, data); 3459 } 3460 3461 static void 3462 handle_tcp_writing(int fd, short event, void* arg) 3463 { 3464 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3465 ssize_t sent; 3466 struct query *q = data->query; 3467 struct timeval timeout; 3468 struct event_base* ev_base; 3469 3470 if ((event & EV_TIMEOUT)) { 3471 /* Connection timed out. */ 3472 cleanup_tcp_handler(data); 3473 return; 3474 } 3475 3476 assert((event & EV_WRITE)); 3477 3478 if (data->bytes_transmitted < sizeof(q->tcplen)) { 3479 /* Writing the response packet length. */ 3480 uint16_t n_tcplen = htons(q->tcplen); 3481 #ifdef HAVE_WRITEV 3482 struct iovec iov[2]; 3483 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 3484 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 3485 iov[1].iov_base = buffer_begin(q->packet); 3486 iov[1].iov_len = buffer_limit(q->packet); 3487 sent = writev(fd, iov, 2); 3488 #else /* HAVE_WRITEV */ 3489 sent = write(fd, 3490 (const char *) &n_tcplen + data->bytes_transmitted, 3491 sizeof(n_tcplen) - data->bytes_transmitted); 3492 #endif /* HAVE_WRITEV */ 3493 if (sent == -1) { 3494 if (errno == EAGAIN || errno == EINTR) { 3495 /* 3496 * Write would block, wait until 3497 * socket becomes writable again. 3498 */ 3499 return; 3500 } else { 3501 #ifdef ECONNRESET 3502 if(verbosity >= 2 || errno != ECONNRESET) 3503 #endif /* ECONNRESET */ 3504 #ifdef EPIPE 3505 if(verbosity >= 2 || errno != EPIPE) 3506 #endif /* EPIPE 'broken pipe' */ 3507 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 3508 cleanup_tcp_handler(data); 3509 return; 3510 } 3511 } 3512 3513 data->bytes_transmitted += sent; 3514 if (data->bytes_transmitted < sizeof(q->tcplen)) { 3515 /* 3516 * Writing not complete, wait until socket 3517 * becomes writable again. 3518 */ 3519 return; 3520 } 3521 3522 #ifdef HAVE_WRITEV 3523 sent -= sizeof(n_tcplen); 3524 /* handle potential 'packet done' code */ 3525 goto packet_could_be_done; 3526 #endif 3527 } 3528 3529 sent = write(fd, 3530 buffer_current(q->packet), 3531 buffer_remaining(q->packet)); 3532 if (sent == -1) { 3533 if (errno == EAGAIN || errno == EINTR) { 3534 /* 3535 * Write would block, wait until 3536 * socket becomes writable again. 3537 */ 3538 return; 3539 } else { 3540 #ifdef ECONNRESET 3541 if(verbosity >= 2 || errno != ECONNRESET) 3542 #endif /* ECONNRESET */ 3543 #ifdef EPIPE 3544 if(verbosity >= 2 || errno != EPIPE) 3545 #endif /* EPIPE 'broken pipe' */ 3546 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 3547 cleanup_tcp_handler(data); 3548 return; 3549 } 3550 } 3551 3552 data->bytes_transmitted += sent; 3553 #ifdef HAVE_WRITEV 3554 packet_could_be_done: 3555 #endif 3556 buffer_skip(q->packet, sent); 3557 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 3558 /* 3559 * Still more data to write when socket becomes 3560 * writable again. 3561 */ 3562 return; 3563 } 3564 3565 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 3566 3567 if (data->query_state == QUERY_IN_AXFR) { 3568 /* Continue processing AXFR and writing back results. */ 3569 buffer_clear(q->packet); 3570 data->query_state = query_axfr(data->nsd, q); 3571 if (data->query_state != QUERY_PROCESSED) { 3572 query_add_optional(data->query, data->nsd); 3573 3574 /* Reset data. */ 3575 buffer_flip(q->packet); 3576 q->tcplen = buffer_remaining(q->packet); 3577 data->bytes_transmitted = 0; 3578 /* Reset timeout. */ 3579 timeout.tv_sec = data->tcp_timeout / 1000; 3580 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3581 ev_base = data->event.ev_base; 3582 event_del(&data->event); 3583 memset(&data->event, 0, sizeof(data->event)); 3584 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 3585 handle_tcp_writing, data); 3586 if(event_base_set(ev_base, &data->event) != 0) 3587 log_msg(LOG_ERR, "event base set tcpw failed"); 3588 if(event_add(&data->event, &timeout) != 0) 3589 log_msg(LOG_ERR, "event add tcpw failed"); 3590 3591 /* 3592 * Write data if/when the socket is writable 3593 * again. 3594 */ 3595 return; 3596 } 3597 } 3598 3599 /* 3600 * Done sending, wait for the next request to arrive on the 3601 * TCP socket by installing the TCP read handler. 3602 */ 3603 if (data->nsd->tcp_query_count > 0 && 3604 data->query_count >= data->nsd->tcp_query_count) { 3605 3606 (void) shutdown(fd, SHUT_WR); 3607 } 3608 3609 data->bytes_transmitted = 0; 3610 3611 timeout.tv_sec = data->tcp_timeout / 1000; 3612 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3613 ev_base = data->event.ev_base; 3614 event_del(&data->event); 3615 memset(&data->event, 0, sizeof(data->event)); 3616 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 3617 handle_tcp_reading, data); 3618 if(event_base_set(ev_base, &data->event) != 0) 3619 log_msg(LOG_ERR, "event base set tcpw failed"); 3620 if(event_add(&data->event, &timeout) != 0) 3621 log_msg(LOG_ERR, "event add tcpw failed"); 3622 } 3623 3624 #ifdef HAVE_SSL 3625 /** create SSL object and associate fd */ 3626 static SSL* 3627 incoming_ssl_fd(SSL_CTX* ctx, int fd) 3628 { 3629 SSL* ssl = SSL_new((SSL_CTX*)ctx); 3630 if(!ssl) { 3631 log_crypto_err("could not SSL_new"); 3632 return NULL; 3633 } 3634 SSL_set_accept_state(ssl); 3635 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 3636 if(!SSL_set_fd(ssl, fd)) { 3637 log_crypto_err("could not SSL_set_fd"); 3638 SSL_free(ssl); 3639 return NULL; 3640 } 3641 return ssl; 3642 } 3643 3644 /** TLS handshake to upgrade TCP connection */ 3645 static int 3646 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 3647 { 3648 int r; 3649 if(data->shake_state == tls_hs_read_event) { 3650 /* read condition satisfied back to writing */ 3651 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3652 data->shake_state = tls_hs_none; 3653 return 1; 3654 } 3655 if(data->shake_state == tls_hs_write_event) { 3656 /* write condition satisfied back to reading */ 3657 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3658 data->shake_state = tls_hs_none; 3659 return 1; 3660 } 3661 3662 /* (continue to) setup the TLS connection */ 3663 ERR_clear_error(); 3664 r = SSL_do_handshake(data->tls); 3665 3666 if(r != 1) { 3667 int want = SSL_get_error(data->tls, r); 3668 if(want == SSL_ERROR_WANT_READ) { 3669 if(data->shake_state == tls_hs_read) { 3670 /* try again later */ 3671 return 1; 3672 } 3673 data->shake_state = tls_hs_read; 3674 /* switch back to reading mode */ 3675 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3676 return 1; 3677 } else if(want == SSL_ERROR_WANT_WRITE) { 3678 if(data->shake_state == tls_hs_write) { 3679 /* try again later */ 3680 return 1; 3681 } 3682 data->shake_state = tls_hs_write; 3683 /* switch back to writing mode */ 3684 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3685 return 1; 3686 } else { 3687 if(r == 0) 3688 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 3689 else { 3690 unsigned long err = ERR_get_error(); 3691 if(!squelch_err_ssl_handshake(err)) { 3692 char a[64], s[256]; 3693 addr2str(&data->query->addr, a, sizeof(a)); 3694 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 3695 log_crypto_from_err(s, err); 3696 } 3697 } 3698 cleanup_tcp_handler(data); 3699 return 0; 3700 } 3701 } 3702 3703 /* Use to log successful upgrade for testing - could be removed*/ 3704 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 3705 /* set back to the event we need to have when reading (or writing) */ 3706 if(data->shake_state == tls_hs_read && writing) { 3707 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3708 } else if(data->shake_state == tls_hs_write && !writing) { 3709 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3710 } 3711 data->shake_state = tls_hs_none; 3712 return 1; 3713 } 3714 3715 /** handle TLS reading of incoming query */ 3716 static void 3717 handle_tls_reading(int fd, short event, void* arg) 3718 { 3719 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3720 ssize_t received; 3721 3722 if ((event & EV_TIMEOUT)) { 3723 /* Connection timed out. */ 3724 cleanup_tcp_handler(data); 3725 return; 3726 } 3727 3728 if (data->nsd->tcp_query_count > 0 && 3729 data->query_count >= data->nsd->tcp_query_count) { 3730 /* No more queries allowed on this tcp connection. */ 3731 cleanup_tcp_handler(data); 3732 return; 3733 } 3734 3735 assert((event & EV_READ)); 3736 3737 if (data->bytes_transmitted == 0) { 3738 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3739 } 3740 3741 if(data->shake_state != tls_hs_none) { 3742 if(!tls_handshake(data, fd, 0)) 3743 return; 3744 if(data->shake_state != tls_hs_none) 3745 return; 3746 } 3747 3748 /* 3749 * Check if we received the leading packet length bytes yet. 3750 */ 3751 if(data->bytes_transmitted < sizeof(uint16_t)) { 3752 ERR_clear_error(); 3753 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 3754 + data->bytes_transmitted, 3755 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 3756 int want = SSL_get_error(data->tls, received); 3757 if(want == SSL_ERROR_ZERO_RETURN) { 3758 cleanup_tcp_handler(data); 3759 return; /* shutdown, closed */ 3760 } else if(want == SSL_ERROR_WANT_READ) { 3761 /* wants to be called again */ 3762 return; 3763 } 3764 else if(want == SSL_ERROR_WANT_WRITE) { 3765 /* switch to writing */ 3766 data->shake_state = tls_hs_write_event; 3767 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3768 return; 3769 } 3770 cleanup_tcp_handler(data); 3771 log_crypto_err("could not SSL_read"); 3772 return; 3773 } 3774 3775 data->bytes_transmitted += received; 3776 if (data->bytes_transmitted < sizeof(uint16_t)) { 3777 /* 3778 * Not done with the tcplen yet, wait for more 3779 * data to become available. 3780 */ 3781 return; 3782 } 3783 3784 assert(data->bytes_transmitted == sizeof(uint16_t)); 3785 3786 data->query->tcplen = ntohs(data->query->tcplen); 3787 3788 /* 3789 * Minimum query size is: 3790 * 3791 * Size of the header (12) 3792 * + Root domain name (1) 3793 * + Query class (2) 3794 * + Query type (2) 3795 */ 3796 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3797 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3798 cleanup_tcp_handler(data); 3799 return; 3800 } 3801 3802 if (data->query->tcplen > data->query->maxlen) { 3803 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3804 cleanup_tcp_handler(data); 3805 return; 3806 } 3807 3808 buffer_set_limit(data->query->packet, data->query->tcplen); 3809 } 3810 3811 assert(buffer_remaining(data->query->packet) > 0); 3812 3813 /* Read the (remaining) query data. */ 3814 ERR_clear_error(); 3815 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 3816 (int)buffer_remaining(data->query->packet)); 3817 if(received <= 0) { 3818 int want = SSL_get_error(data->tls, received); 3819 if(want == SSL_ERROR_ZERO_RETURN) { 3820 cleanup_tcp_handler(data); 3821 return; /* shutdown, closed */ 3822 } else if(want == SSL_ERROR_WANT_READ) { 3823 /* wants to be called again */ 3824 return; 3825 } 3826 else if(want == SSL_ERROR_WANT_WRITE) { 3827 /* switch back writing */ 3828 data->shake_state = tls_hs_write_event; 3829 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3830 return; 3831 } 3832 cleanup_tcp_handler(data); 3833 log_crypto_err("could not SSL_read"); 3834 return; 3835 } 3836 3837 data->bytes_transmitted += received; 3838 buffer_skip(data->query->packet, received); 3839 if (buffer_remaining(data->query->packet) > 0) { 3840 /* 3841 * Message not yet complete, wait for more data to 3842 * become available. 3843 */ 3844 return; 3845 } 3846 3847 assert(buffer_position(data->query->packet) == data->query->tcplen); 3848 3849 /* Account... */ 3850 #ifndef INET6 3851 STATUP(data->nsd, ctls); 3852 #else 3853 if (data->query->addr.ss_family == AF_INET) { 3854 STATUP(data->nsd, ctls); 3855 } else if (data->query->addr.ss_family == AF_INET6) { 3856 STATUP(data->nsd, ctls6); 3857 } 3858 #endif 3859 3860 /* We have a complete query, process it. */ 3861 3862 /* tcp-query-count: handle query counter ++ */ 3863 data->query_count++; 3864 3865 buffer_flip(data->query->packet); 3866 #ifdef USE_DNSTAP 3867 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 3868 data->query->addrlen, data->query->tcp, data->query->packet); 3869 #endif /* USE_DNSTAP */ 3870 data->query_state = server_process_query(data->nsd, data->query); 3871 if (data->query_state == QUERY_DISCARDED) { 3872 /* Drop the packet and the entire connection... */ 3873 STATUP(data->nsd, dropped); 3874 ZTATUP(data->nsd, data->query->zone, dropped); 3875 cleanup_tcp_handler(data); 3876 return; 3877 } 3878 3879 #ifdef BIND8_STATS 3880 if (RCODE(data->query->packet) == RCODE_OK 3881 && !AA(data->query->packet)) 3882 { 3883 STATUP(data->nsd, nona); 3884 ZTATUP(data->nsd, data->query->zone, nona); 3885 } 3886 #endif /* BIND8_STATS */ 3887 3888 #ifdef USE_ZONE_STATS 3889 #ifndef INET6 3890 ZTATUP(data->nsd, data->query->zone, ctls); 3891 #else 3892 if (data->query->addr.ss_family == AF_INET) { 3893 ZTATUP(data->nsd, data->query->zone, ctls); 3894 } else if (data->query->addr.ss_family == AF_INET6) { 3895 ZTATUP(data->nsd, data->query->zone, ctls6); 3896 } 3897 #endif 3898 #endif /* USE_ZONE_STATS */ 3899 3900 query_add_optional(data->query, data->nsd); 3901 3902 /* Switch to the tcp write handler. */ 3903 buffer_flip(data->query->packet); 3904 data->query->tcplen = buffer_remaining(data->query->packet); 3905 #ifdef BIND8_STATS 3906 /* Account the rcode & TC... */ 3907 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 3908 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 3909 if (TC(data->query->packet)) { 3910 STATUP(data->nsd, truncated); 3911 ZTATUP(data->nsd, data->query->zone, truncated); 3912 } 3913 #endif /* BIND8_STATS */ 3914 #ifdef USE_DNSTAP 3915 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 3916 data->query->addrlen, data->query->tcp, data->query->packet, 3917 data->query->zone); 3918 #endif /* USE_DNSTAP */ 3919 data->bytes_transmitted = 0; 3920 3921 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3922 3923 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 3924 handle_tls_writing(fd, EV_WRITE, data); 3925 } 3926 3927 /** handle TLS writing of outgoing response */ 3928 static void 3929 handle_tls_writing(int fd, short event, void* arg) 3930 { 3931 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3932 ssize_t sent; 3933 struct query *q = data->query; 3934 /* static variable that holds reassembly buffer used to put the 3935 * TCP length in front of the packet, like writev. */ 3936 static buffer_type* global_tls_temp_buffer = NULL; 3937 buffer_type* write_buffer; 3938 3939 if ((event & EV_TIMEOUT)) { 3940 /* Connection timed out. */ 3941 cleanup_tcp_handler(data); 3942 return; 3943 } 3944 3945 assert((event & EV_WRITE)); 3946 3947 if(data->shake_state != tls_hs_none) { 3948 if(!tls_handshake(data, fd, 1)) 3949 return; 3950 if(data->shake_state != tls_hs_none) 3951 return; 3952 } 3953 3954 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 3955 3956 /* If we are writing the start of a message, we must include the length 3957 * this is done with a copy into write_buffer. */ 3958 write_buffer = NULL; 3959 if (data->bytes_transmitted == 0) { 3960 if(!global_tls_temp_buffer) { 3961 /* gets deallocated when nsd shuts down from 3962 * nsd.region */ 3963 global_tls_temp_buffer = buffer_create(nsd.region, 3964 QIOBUFSZ + sizeof(q->tcplen)); 3965 if (!global_tls_temp_buffer) { 3966 return; 3967 } 3968 } 3969 write_buffer = global_tls_temp_buffer; 3970 buffer_clear(write_buffer); 3971 buffer_write_u16(write_buffer, q->tcplen); 3972 buffer_write(write_buffer, buffer_current(q->packet), 3973 (int)buffer_remaining(q->packet)); 3974 buffer_flip(write_buffer); 3975 } else { 3976 write_buffer = q->packet; 3977 } 3978 3979 /* Write the response */ 3980 ERR_clear_error(); 3981 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 3982 if(sent <= 0) { 3983 int want = SSL_get_error(data->tls, sent); 3984 if(want == SSL_ERROR_ZERO_RETURN) { 3985 cleanup_tcp_handler(data); 3986 /* closed */ 3987 } else if(want == SSL_ERROR_WANT_READ) { 3988 /* switch back to reading */ 3989 data->shake_state = tls_hs_read_event; 3990 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 3991 } else if(want != SSL_ERROR_WANT_WRITE) { 3992 cleanup_tcp_handler(data); 3993 log_crypto_err("could not SSL_write"); 3994 } 3995 return; 3996 } 3997 3998 buffer_skip(write_buffer, sent); 3999 if(buffer_remaining(write_buffer) != 0) { 4000 /* If not all sent, sync up the real buffer if it wasn't used.*/ 4001 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 4002 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 4003 } 4004 } 4005 4006 data->bytes_transmitted += sent; 4007 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4008 /* 4009 * Still more data to write when socket becomes 4010 * writable again. 4011 */ 4012 return; 4013 } 4014 4015 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4016 4017 if (data->query_state == QUERY_IN_AXFR) { 4018 /* Continue processing AXFR and writing back results. */ 4019 buffer_clear(q->packet); 4020 data->query_state = query_axfr(data->nsd, q); 4021 if (data->query_state != QUERY_PROCESSED) { 4022 query_add_optional(data->query, data->nsd); 4023 4024 /* Reset data. */ 4025 buffer_flip(q->packet); 4026 q->tcplen = buffer_remaining(q->packet); 4027 data->bytes_transmitted = 0; 4028 /* Reset to writing mode. */ 4029 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4030 4031 /* 4032 * Write data if/when the socket is writable 4033 * again. 4034 */ 4035 return; 4036 } 4037 } 4038 4039 /* 4040 * Done sending, wait for the next request to arrive on the 4041 * TCP socket by installing the TCP read handler. 4042 */ 4043 if (data->nsd->tcp_query_count > 0 && 4044 data->query_count >= data->nsd->tcp_query_count) { 4045 4046 (void) shutdown(fd, SHUT_WR); 4047 } 4048 4049 data->bytes_transmitted = 0; 4050 4051 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4052 } 4053 #endif 4054 4055 static void 4056 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4057 void* ATTR_UNUSED(arg)) 4058 { 4059 if(slowaccept) { 4060 configure_handler_event_types(EV_PERSIST | EV_READ); 4061 slowaccept = 0; 4062 } 4063 } 4064 4065 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4066 { 4067 #ifndef HAVE_ACCEPT4 4068 int s = accept(fd, addr, addrlen); 4069 if (s != -1) { 4070 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4071 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4072 close(s); 4073 s = -1; 4074 errno=EINTR; /* stop error printout as error in accept4 4075 by setting this errno, it omits printout, in 4076 later code that calls nsd_accept4 */ 4077 } 4078 } 4079 return s; 4080 #else 4081 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4082 #endif /* HAVE_ACCEPT4 */ 4083 } 4084 4085 /* 4086 * Handle an incoming TCP connection. The connection is accepted and 4087 * a new TCP reader event handler is added. The TCP handler 4088 * is responsible for cleanup when the connection is closed. 4089 */ 4090 static void 4091 handle_tcp_accept(int fd, short event, void* arg) 4092 { 4093 struct tcp_accept_handler_data *data 4094 = (struct tcp_accept_handler_data *) arg; 4095 int s; 4096 int reject = 0; 4097 struct tcp_handler_data *tcp_data; 4098 region_type *tcp_region; 4099 #ifdef INET6 4100 struct sockaddr_storage addr; 4101 #else 4102 struct sockaddr_in addr; 4103 #endif 4104 socklen_t addrlen; 4105 struct timeval timeout; 4106 4107 if (!(event & EV_READ)) { 4108 return; 4109 } 4110 4111 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4112 reject = data->nsd->options->tcp_reject_overflow; 4113 if (!reject) { 4114 return; 4115 } 4116 } 4117 4118 /* Accept it... */ 4119 addrlen = sizeof(addr); 4120 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4121 if (s == -1) { 4122 /** 4123 * EMFILE and ENFILE is a signal that the limit of open 4124 * file descriptors has been reached. Pause accept(). 4125 * EINTR is a signal interrupt. The others are various OS ways 4126 * of saying that the client has closed the connection. 4127 */ 4128 if (errno == EMFILE || errno == ENFILE) { 4129 if (!slowaccept) { 4130 /* disable accept events */ 4131 struct timeval tv; 4132 configure_handler_event_types(0); 4133 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4134 tv.tv_usec = 0L; 4135 memset(&slowaccept_event, 0, 4136 sizeof(slowaccept_event)); 4137 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4138 handle_slowaccept_timeout, NULL); 4139 (void)event_base_set(data->event.ev_base, 4140 &slowaccept_event); 4141 (void)event_add(&slowaccept_event, &tv); 4142 slowaccept = 1; 4143 /* We don't want to spam the logs here */ 4144 } 4145 } else if (errno != EINTR 4146 && errno != EWOULDBLOCK 4147 #ifdef ECONNABORTED 4148 && errno != ECONNABORTED 4149 #endif /* ECONNABORTED */ 4150 #ifdef EPROTO 4151 && errno != EPROTO 4152 #endif /* EPROTO */ 4153 ) { 4154 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4155 } 4156 return; 4157 } 4158 4159 if (reject) { 4160 shutdown(s, SHUT_RDWR); 4161 close(s); 4162 return; 4163 } 4164 4165 /* 4166 * This region is deallocated when the TCP connection is 4167 * closed by the TCP handler. 4168 */ 4169 tcp_region = region_create(xalloc, free); 4170 tcp_data = (struct tcp_handler_data *) region_alloc( 4171 tcp_region, sizeof(struct tcp_handler_data)); 4172 tcp_data->region = tcp_region; 4173 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4174 compression_table_size, compressed_dnames); 4175 tcp_data->nsd = data->nsd; 4176 tcp_data->query_count = 0; 4177 #ifdef HAVE_SSL 4178 tcp_data->shake_state = tls_hs_none; 4179 tcp_data->tls = NULL; 4180 #endif 4181 tcp_data->prev = NULL; 4182 tcp_data->next = NULL; 4183 4184 tcp_data->query_state = QUERY_PROCESSED; 4185 tcp_data->bytes_transmitted = 0; 4186 memcpy(&tcp_data->query->addr, &addr, addrlen); 4187 tcp_data->query->addrlen = addrlen; 4188 4189 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4190 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4191 /* very busy, give smaller timeout */ 4192 tcp_data->tcp_timeout = 200; 4193 } 4194 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4195 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4196 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4197 4198 #ifdef HAVE_SSL 4199 if (data->tls_accept) { 4200 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4201 if(!tcp_data->tls) { 4202 close(s); 4203 return; 4204 } 4205 tcp_data->shake_state = tls_hs_read; 4206 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4207 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4208 handle_tls_reading, tcp_data); 4209 } else { 4210 #endif 4211 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4212 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4213 handle_tcp_reading, tcp_data); 4214 #ifdef HAVE_SSL 4215 } 4216 #endif 4217 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4218 log_msg(LOG_ERR, "cannot set tcp event base"); 4219 close(s); 4220 region_destroy(tcp_region); 4221 return; 4222 } 4223 if(event_add(&tcp_data->event, &timeout) != 0) { 4224 log_msg(LOG_ERR, "cannot add tcp to event base"); 4225 close(s); 4226 region_destroy(tcp_region); 4227 return; 4228 } 4229 if(tcp_active_list) { 4230 tcp_active_list->prev = tcp_data; 4231 tcp_data->next = tcp_active_list; 4232 } 4233 tcp_active_list = tcp_data; 4234 4235 /* 4236 * Keep track of the total number of TCP handlers installed so 4237 * we can stop accepting connections when the maximum number 4238 * of simultaneous TCP connections is reached. 4239 * 4240 * If tcp-reject-overflow is enabled, however, then we do not 4241 * change the handler event type; we keep it as-is and accept 4242 * overflow TCP connections only so that we can forcibly kill 4243 * them off. 4244 */ 4245 ++data->nsd->current_tcp_count; 4246 if (!data->nsd->options->tcp_reject_overflow && 4247 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4248 { 4249 configure_handler_event_types(0); 4250 } 4251 } 4252 4253 static void 4254 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4255 { 4256 size_t i; 4257 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4258 for (i = 0; i < nsd->child_count; ++i) { 4259 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4260 if (write(nsd->children[i].child_fd, 4261 &command, 4262 sizeof(command)) == -1) 4263 { 4264 if(errno != EAGAIN && errno != EINTR) 4265 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4266 (int) command, 4267 (int) nsd->children[i].pid, 4268 strerror(errno)); 4269 } else if (timeout > 0) { 4270 (void)block_read(NULL, 4271 nsd->children[i].child_fd, 4272 &command, sizeof(command), timeout); 4273 } 4274 fsync(nsd->children[i].child_fd); 4275 close(nsd->children[i].child_fd); 4276 nsd->children[i].child_fd = -1; 4277 } 4278 } 4279 } 4280 4281 static void 4282 send_children_quit(struct nsd* nsd) 4283 { 4284 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4285 send_children_command(nsd, NSD_QUIT, 0); 4286 } 4287 4288 static void 4289 send_children_quit_and_wait(struct nsd* nsd) 4290 { 4291 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4292 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4293 } 4294 4295 #ifdef BIND8_STATS 4296 static void 4297 set_children_stats(struct nsd* nsd) 4298 { 4299 size_t i; 4300 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4301 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4302 for (i = 0; i < nsd->child_count; ++i) { 4303 nsd->children[i].need_to_send_STATS = 1; 4304 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4305 } 4306 } 4307 #endif /* BIND8_STATS */ 4308 4309 static void 4310 configure_handler_event_types(short event_types) 4311 { 4312 size_t i; 4313 4314 for (i = 0; i < tcp_accept_handler_count; ++i) { 4315 struct event* handler = &tcp_accept_handlers[i].event; 4316 if(event_types) { 4317 /* reassign */ 4318 int fd = handler->ev_fd; 4319 struct event_base* base = handler->ev_base; 4320 if(tcp_accept_handlers[i].event_added) 4321 event_del(handler); 4322 memset(handler, 0, sizeof(*handler)); 4323 event_set(handler, fd, event_types, 4324 handle_tcp_accept, &tcp_accept_handlers[i]); 4325 if(event_base_set(base, handler) != 0) 4326 log_msg(LOG_ERR, "conhand: cannot event_base"); 4327 if(event_add(handler, NULL) != 0) 4328 log_msg(LOG_ERR, "conhand: cannot event_add"); 4329 tcp_accept_handlers[i].event_added = 1; 4330 } else { 4331 /* remove */ 4332 if(tcp_accept_handlers[i].event_added) { 4333 event_del(handler); 4334 tcp_accept_handlers[i].event_added = 0; 4335 } 4336 } 4337 } 4338 } 4339