1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <limits.h> 15 #include <sys/socket.h> 16 #include <sys/uio.h> 17 #include <sys/wait.h> 18 19 #include <netinet/in.h> 20 #ifdef USE_TCP_FASTOPEN 21 #include <netinet/tcp.h> 22 #endif 23 #include <arpa/inet.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 #include <signal.h> 36 #include <netdb.h> 37 #include <poll.h> 38 #ifndef SHUT_WR 39 #define SHUT_WR 1 40 #endif 41 #ifdef HAVE_MMAP 42 #include <sys/mman.h> 43 #endif /* HAVE_MMAP */ 44 #ifdef HAVE_OPENSSL_RAND_H 45 #include <openssl/rand.h> 46 #endif 47 #ifdef HAVE_OPENSSL_SSL_H 48 #include <openssl/ssl.h> 49 #endif 50 #ifdef HAVE_OPENSSL_ERR_H 51 #include <openssl/err.h> 52 #endif 53 #ifdef HAVE_OPENSSL_OCSP_H 54 #include <openssl/ocsp.h> 55 #endif 56 #ifndef USE_MINI_EVENT 57 # ifdef HAVE_EVENT_H 58 # include <event.h> 59 # else 60 # include <event2/event.h> 61 # include "event2/event_struct.h" 62 # include "event2/event_compat.h" 63 # endif 64 #else 65 # include "mini_event.h" 66 #endif 67 68 #include "axfr.h" 69 #include "namedb.h" 70 #include "netio.h" 71 #include "xfrd.h" 72 #include "xfrd-tcp.h" 73 #include "xfrd-disk.h" 74 #include "difffile.h" 75 #include "nsec3.h" 76 #include "ipc.h" 77 #include "udb.h" 78 #include "remote.h" 79 #include "lookup3.h" 80 #include "rrl.h" 81 #ifdef USE_DNSTAP 82 #include "dnstap/dnstap_collector.h" 83 #endif 84 85 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 86 87 #ifdef USE_TCP_FASTOPEN 88 #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen" 89 #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2 90 #endif 91 92 /* 93 * Data for the UDP handlers. 94 */ 95 struct udp_handler_data 96 { 97 struct nsd *nsd; 98 struct nsd_socket *socket; 99 struct event event; 100 }; 101 102 struct tcp_accept_handler_data { 103 struct nsd *nsd; 104 struct nsd_socket *socket; 105 int event_added; 106 struct event event; 107 #ifdef HAVE_SSL 108 /* handler accepts TLS connections on the dedicated port */ 109 int tls_accept; 110 #endif 111 }; 112 113 /* 114 * These globals are used to enable the TCP accept handlers 115 * when the number of TCP connection drops below the maximum 116 * number of TCP connections. 117 */ 118 static size_t tcp_accept_handler_count; 119 static struct tcp_accept_handler_data *tcp_accept_handlers; 120 121 static struct event slowaccept_event; 122 static int slowaccept; 123 124 #ifdef HAVE_SSL 125 static unsigned char *ocspdata = NULL; 126 static long ocspdata_len = 0; 127 #endif 128 129 #ifdef NONBLOCKING_IS_BROKEN 130 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to 131 read multiple times from a socket when reported ready by select. */ 132 # define NUM_RECV_PER_SELECT (1) 133 #else /* !NONBLOCKING_IS_BROKEN */ 134 # define NUM_RECV_PER_SELECT (100) 135 #endif /* NONBLOCKING_IS_BROKEN */ 136 137 #ifndef HAVE_MMSGHDR 138 struct mmsghdr { 139 struct msghdr msg_hdr; 140 unsigned int msg_len; 141 }; 142 #endif 143 144 static struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 145 static struct iovec iovecs[NUM_RECV_PER_SELECT]; 146 static struct query *queries[NUM_RECV_PER_SELECT]; 147 148 /* 149 * Data for the TCP connection handlers. 150 * 151 * The TCP handlers use non-blocking I/O. This is necessary to avoid 152 * blocking the entire server on a slow TCP connection, but does make 153 * reading from and writing to the socket more complicated. 154 * 155 * Basically, whenever a read/write would block (indicated by the 156 * EAGAIN errno variable) we remember the position we were reading 157 * from/writing to and return from the TCP reading/writing event 158 * handler. When the socket becomes readable/writable again we 159 * continue from the same position. 160 */ 161 struct tcp_handler_data 162 { 163 /* 164 * The region used to allocate all TCP connection related 165 * data, including this structure. This region is destroyed 166 * when the connection is closed. 167 */ 168 region_type* region; 169 170 /* 171 * The global nsd structure. 172 */ 173 struct nsd* nsd; 174 175 /* 176 * The current query data for this TCP connection. 177 */ 178 query_type* query; 179 180 /* 181 * The query_state is used to remember if we are performing an 182 * AXFR, if we're done processing, or if we should discard the 183 * query and connection. 184 */ 185 query_state_type query_state; 186 187 /* 188 * The event for the file descriptor and tcp timeout 189 */ 190 struct event event; 191 192 /* 193 * The bytes_transmitted field is used to remember the number 194 * of bytes transmitted when receiving or sending a DNS 195 * packet. The count includes the two additional bytes used 196 * to specify the packet length on a TCP connection. 197 */ 198 size_t bytes_transmitted; 199 200 /* 201 * The number of queries handled by this specific TCP connection. 202 */ 203 int query_count; 204 205 /* 206 * The timeout in msec for this tcp connection 207 */ 208 int tcp_timeout; 209 #ifdef HAVE_SSL 210 /* 211 * TLS object. 212 */ 213 SSL* tls; 214 215 /* 216 * TLS handshake state. 217 */ 218 enum { tls_hs_none, tls_hs_read, tls_hs_write, 219 tls_hs_read_event, tls_hs_write_event } shake_state; 220 #endif 221 /* list of connections, for service of remaining tcp channels */ 222 struct tcp_handler_data *prev, *next; 223 }; 224 /* global that is the list of active tcp channels */ 225 static struct tcp_handler_data *tcp_active_list = NULL; 226 227 /* 228 * Handle incoming queries on the UDP server sockets. 229 */ 230 static void handle_udp(int fd, short event, void* arg); 231 232 /* 233 * Handle incoming connections on the TCP sockets. These handlers 234 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 235 * connection) but are disabled when the number of current TCP 236 * connections is equal to the maximum number of TCP connections. 237 * Disabling is done by changing the handler to wait for the 238 * NETIO_EVENT_NONE type. This is done using the function 239 * configure_tcp_accept_handlers. 240 */ 241 static void handle_tcp_accept(int fd, short event, void* arg); 242 243 /* 244 * Handle incoming queries on a TCP connection. The TCP connections 245 * are configured to be non-blocking and the handler may be called 246 * multiple times before a complete query is received. 247 */ 248 static void handle_tcp_reading(int fd, short event, void* arg); 249 250 /* 251 * Handle outgoing responses on a TCP connection. The TCP connections 252 * are configured to be non-blocking and the handler may be called 253 * multiple times before a complete response is sent. 254 */ 255 static void handle_tcp_writing(int fd, short event, void* arg); 256 257 #ifdef HAVE_SSL 258 /* Create SSL object and associate fd */ 259 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd); 260 /* 261 * Handle TLS handshake. May be called multiple times if incomplete. 262 */ 263 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing); 264 265 /* 266 * Handle incoming queries on a TLS over TCP connection. The TLS 267 * connections are configured to be non-blocking and the handler may 268 * be called multiple times before a complete query is received. 269 */ 270 static void handle_tls_reading(int fd, short event, void* arg); 271 272 /* 273 * Handle outgoing responses on a TLS over TCP connection. The TLS 274 * connections are configured to be non-blocking and the handler may 275 * be called multiple times before a complete response is sent. 276 */ 277 static void handle_tls_writing(int fd, short event, void* arg); 278 #endif 279 280 /* 281 * Send all children the quit nonblocking, then close pipe. 282 */ 283 static void send_children_quit(struct nsd* nsd); 284 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 285 static void send_children_quit_and_wait(struct nsd* nsd); 286 287 /* set childrens flags to send NSD_STATS to them */ 288 #ifdef BIND8_STATS 289 static void set_children_stats(struct nsd* nsd); 290 #endif /* BIND8_STATS */ 291 292 /* 293 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 294 */ 295 static void configure_handler_event_types(short event_types); 296 297 static uint16_t *compressed_dname_offsets = 0; 298 static uint32_t compression_table_capacity = 0; 299 static uint32_t compression_table_size = 0; 300 static domain_type* compressed_dnames[MAXRRSPP]; 301 302 #ifdef USE_TCP_FASTOPEN 303 /* Checks to see if the kernel value must be manually changed in order for 304 TCP Fast Open to support server mode */ 305 static void report_tcp_fastopen_config() { 306 307 int tcp_fastopen_fp; 308 uint8_t tcp_fastopen_value; 309 310 if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) { 311 log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 312 } 313 if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) { 314 log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno)); 315 close(tcp_fastopen_fp); 316 } 317 if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) { 318 log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n"); 319 log_msg(LOG_WARNING, "However the kernel paramenters are not configured to support TCP_FASTOPEN in server mode.\n"); 320 log_msg(LOG_WARNING, "To enable TFO use the command:"); 321 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n"); 322 log_msg(LOG_WARNING, " 'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n"); 323 log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n"); 324 close(tcp_fastopen_fp); 325 } 326 close(tcp_fastopen_fp); 327 } 328 #endif 329 330 /* 331 * Remove the specified pid from the list of child pids. Returns -1 if 332 * the pid is not in the list, child_num otherwise. The field is set to 0. 333 */ 334 static int 335 delete_child_pid(struct nsd *nsd, pid_t pid) 336 { 337 size_t i; 338 for (i = 0; i < nsd->child_count; ++i) { 339 if (nsd->children[i].pid == pid) { 340 nsd->children[i].pid = 0; 341 if(!nsd->children[i].need_to_exit) { 342 if(nsd->children[i].child_fd != -1) 343 close(nsd->children[i].child_fd); 344 nsd->children[i].child_fd = -1; 345 if(nsd->children[i].handler) 346 nsd->children[i].handler->fd = -1; 347 } 348 return i; 349 } 350 } 351 return -1; 352 } 353 354 /* 355 * Restart child servers if necessary. 356 */ 357 static int 358 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 359 int* xfrd_sock_p) 360 { 361 struct main_ipc_handler_data *ipc_data; 362 size_t i; 363 int sv[2]; 364 365 /* Fork the child processes... */ 366 for (i = 0; i < nsd->child_count; ++i) { 367 if (nsd->children[i].pid <= 0) { 368 if (nsd->children[i].child_fd != -1) 369 close(nsd->children[i].child_fd); 370 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 371 log_msg(LOG_ERR, "socketpair: %s", 372 strerror(errno)); 373 return -1; 374 } 375 nsd->children[i].child_fd = sv[0]; 376 nsd->children[i].parent_fd = sv[1]; 377 nsd->children[i].pid = fork(); 378 switch (nsd->children[i].pid) { 379 default: /* SERVER MAIN */ 380 close(nsd->children[i].parent_fd); 381 nsd->children[i].parent_fd = -1; 382 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 383 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 384 } 385 if(!nsd->children[i].handler) 386 { 387 ipc_data = (struct main_ipc_handler_data*) region_alloc( 388 region, sizeof(struct main_ipc_handler_data)); 389 ipc_data->nsd = nsd; 390 ipc_data->child = &nsd->children[i]; 391 ipc_data->child_num = i; 392 ipc_data->xfrd_sock = xfrd_sock_p; 393 ipc_data->packet = buffer_create(region, QIOBUFSZ); 394 ipc_data->forward_mode = 0; 395 ipc_data->got_bytes = 0; 396 ipc_data->total_bytes = 0; 397 ipc_data->acl_num = 0; 398 nsd->children[i].handler = (struct netio_handler*) region_alloc( 399 region, sizeof(struct netio_handler)); 400 nsd->children[i].handler->fd = nsd->children[i].child_fd; 401 nsd->children[i].handler->timeout = NULL; 402 nsd->children[i].handler->user_data = ipc_data; 403 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 404 nsd->children[i].handler->event_handler = parent_handle_child_command; 405 netio_add_handler(netio, nsd->children[i].handler); 406 } 407 /* clear any ongoing ipc */ 408 ipc_data = (struct main_ipc_handler_data*) 409 nsd->children[i].handler->user_data; 410 ipc_data->forward_mode = 0; 411 /* restart - update fd */ 412 nsd->children[i].handler->fd = nsd->children[i].child_fd; 413 break; 414 case 0: /* CHILD */ 415 /* the child need not be able to access the 416 * nsd.db file */ 417 namedb_close_udb(nsd->db); 418 #ifdef MEMCLEAN /* OS collects memory pages */ 419 region_destroy(region); 420 #endif 421 nsd->pid = 0; 422 nsd->child_count = 0; 423 nsd->server_kind = nsd->children[i].kind; 424 nsd->this_child = &nsd->children[i]; 425 nsd->this_child->child_num = i; 426 /* remove signal flags inherited from parent 427 the parent will handle them. */ 428 nsd->signal_hint_reload_hup = 0; 429 nsd->signal_hint_reload = 0; 430 nsd->signal_hint_child = 0; 431 nsd->signal_hint_quit = 0; 432 nsd->signal_hint_shutdown = 0; 433 nsd->signal_hint_stats = 0; 434 nsd->signal_hint_statsusr = 0; 435 close(*xfrd_sock_p); 436 close(nsd->this_child->child_fd); 437 nsd->this_child->child_fd = -1; 438 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 439 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 440 } 441 server_child(nsd); 442 /* NOTREACH */ 443 exit(0); 444 case -1: 445 log_msg(LOG_ERR, "fork failed: %s", 446 strerror(errno)); 447 return -1; 448 } 449 } 450 } 451 return 0; 452 } 453 454 #ifdef BIND8_STATS 455 static void set_bind8_alarm(struct nsd* nsd) 456 { 457 /* resync so that the next alarm is on the next whole minute */ 458 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 459 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 460 } 461 #endif 462 463 /* set zone stat ids for zones initially read in */ 464 static void 465 zonestatid_tree_set(struct nsd* nsd) 466 { 467 struct radnode* n; 468 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 469 zone_type* zone = (zone_type*)n->elem; 470 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 471 } 472 } 473 474 #ifdef USE_ZONE_STATS 475 void 476 server_zonestat_alloc(struct nsd* nsd) 477 { 478 size_t num = (nsd->options->zonestatnames->count==0?1: 479 nsd->options->zonestatnames->count); 480 size_t sz = sizeof(struct nsdst)*num; 481 char tmpfile[256]; 482 uint8_t z = 0; 483 484 /* file names */ 485 nsd->zonestatfname[0] = 0; 486 nsd->zonestatfname[1] = 0; 487 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 488 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 489 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 490 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 491 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 492 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 493 494 /* file descriptors */ 495 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 496 if(nsd->zonestatfd[0] == -1) { 497 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 498 strerror(errno)); 499 exit(1); 500 } 501 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 502 if(nsd->zonestatfd[0] == -1) { 503 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 504 strerror(errno)); 505 close(nsd->zonestatfd[0]); 506 unlink(nsd->zonestatfname[0]); 507 exit(1); 508 } 509 510 #ifdef HAVE_MMAP 511 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 512 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 513 strerror(errno)); 514 exit(1); 515 } 516 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 517 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 518 nsd->zonestatfname[0], strerror(errno)); 519 exit(1); 520 } 521 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 522 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 523 strerror(errno)); 524 exit(1); 525 } 526 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 527 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 528 nsd->zonestatfname[1], strerror(errno)); 529 exit(1); 530 } 531 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 532 MAP_SHARED, nsd->zonestatfd[0], 0); 533 if(nsd->zonestat[0] == MAP_FAILED) { 534 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 535 unlink(nsd->zonestatfname[0]); 536 unlink(nsd->zonestatfname[1]); 537 exit(1); 538 } 539 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 540 MAP_SHARED, nsd->zonestatfd[1], 0); 541 if(nsd->zonestat[1] == MAP_FAILED) { 542 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 543 unlink(nsd->zonestatfname[0]); 544 unlink(nsd->zonestatfname[1]); 545 exit(1); 546 } 547 memset(nsd->zonestat[0], 0, sz); 548 memset(nsd->zonestat[1], 0, sz); 549 nsd->zonestatsize[0] = num; 550 nsd->zonestatsize[1] = num; 551 nsd->zonestatdesired = num; 552 nsd->zonestatsizenow = num; 553 nsd->zonestatnow = nsd->zonestat[0]; 554 #endif /* HAVE_MMAP */ 555 } 556 557 void 558 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 559 { 560 #ifdef HAVE_MMAP 561 #ifdef MREMAP_MAYMOVE 562 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 563 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 564 MREMAP_MAYMOVE); 565 if(nsd->zonestat[idx] == MAP_FAILED) { 566 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 567 exit(1); 568 } 569 #else /* !HAVE MREMAP */ 570 if(msync(nsd->zonestat[idx], 571 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 572 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 573 if(munmap(nsd->zonestat[idx], 574 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 575 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 576 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 577 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 578 if(nsd->zonestat[idx] == MAP_FAILED) { 579 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 580 exit(1); 581 } 582 #endif /* MREMAP */ 583 #endif /* HAVE_MMAP */ 584 } 585 586 /* realloc the zonestat array for the one that is not currently in use, 587 * to match the desired new size of the array (if applicable) */ 588 void 589 server_zonestat_realloc(struct nsd* nsd) 590 { 591 #ifdef HAVE_MMAP 592 uint8_t z = 0; 593 size_t sz; 594 int idx = 0; /* index of the zonestat array that is not in use */ 595 if(nsd->zonestatnow == nsd->zonestat[0]) 596 idx = 1; 597 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 598 return; 599 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 600 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 601 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 602 strerror(errno)); 603 exit(1); 604 } 605 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 606 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 607 nsd->zonestatfname[idx], strerror(errno)); 608 exit(1); 609 } 610 zonestat_remap(nsd, idx, sz); 611 /* zero the newly allocated region */ 612 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 613 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 614 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 615 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 616 } 617 nsd->zonestatsize[idx] = nsd->zonestatdesired; 618 #endif /* HAVE_MMAP */ 619 } 620 621 /* switchover to use the other array for the new children, that 622 * briefly coexist with the old children. And we want to avoid them 623 * both writing to the same statistics arrays. */ 624 void 625 server_zonestat_switch(struct nsd* nsd) 626 { 627 if(nsd->zonestatnow == nsd->zonestat[0]) { 628 nsd->zonestatnow = nsd->zonestat[1]; 629 nsd->zonestatsizenow = nsd->zonestatsize[1]; 630 } else { 631 nsd->zonestatnow = nsd->zonestat[0]; 632 nsd->zonestatsizenow = nsd->zonestatsize[0]; 633 } 634 } 635 #endif /* USE_ZONE_STATS */ 636 637 static void 638 cleanup_dname_compression_tables(void *ptr) 639 { 640 free(ptr); 641 compressed_dname_offsets = NULL; 642 compression_table_capacity = 0; 643 } 644 645 static void 646 initialize_dname_compression_tables(struct nsd *nsd) 647 { 648 size_t needed = domain_table_count(nsd->db->domains) + 1; 649 needed += EXTRA_DOMAIN_NUMBERS; 650 if(compression_table_capacity < needed) { 651 if(compressed_dname_offsets) { 652 region_remove_cleanup(nsd->db->region, 653 cleanup_dname_compression_tables, 654 compressed_dname_offsets); 655 free(compressed_dname_offsets); 656 } 657 compressed_dname_offsets = (uint16_t *) xmallocarray( 658 needed, sizeof(uint16_t)); 659 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 660 compressed_dname_offsets); 661 compression_table_capacity = needed; 662 compression_table_size=domain_table_count(nsd->db->domains)+1; 663 } 664 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 665 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 666 } 667 668 static int 669 set_reuseport(struct nsd_socket *sock) 670 { 671 #ifdef SO_REUSEPORT 672 int on = 1; 673 #ifdef SO_REUSEPORT_LB 674 /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like 675 * SO_REUSEPORT on Linux. This is what the users want with the config 676 * option in nsd.conf; if we actually need local address and port reuse 677 * they'll also need to have SO_REUSEPORT set for them, assume it was 678 * _LB they want. 679 */ 680 int opt = SO_REUSEPORT_LB; 681 static const char optname[] = "SO_REUSEPORT_LB"; 682 #else /* !SO_REUSEPORT_LB */ 683 int opt = SO_REUSEPORT; 684 static const char optname[] = "SO_REUSEPORT"; 685 #endif /* SO_REUSEPORT_LB */ 686 687 if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) { 688 return 1; 689 } else if(verbosity >= 3 || errno != ENOPROTOOPT) { 690 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 691 optname, strerror(errno)); 692 } 693 return -1; 694 #else 695 (void)sock; 696 #endif /* SO_REUSEPORT */ 697 698 return 0; 699 } 700 701 static int 702 set_reuseaddr(struct nsd_socket *sock) 703 { 704 #ifdef SO_REUSEADDR 705 int on = 1; 706 if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) { 707 return 1; 708 } 709 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", 710 strerror(errno)); 711 return -1; 712 #endif /* SO_REUSEADDR */ 713 return 0; 714 } 715 716 static int 717 set_rcvbuf(struct nsd_socket *sock, int rcv) 718 { 719 #ifdef SO_RCVBUF 720 #ifdef SO_RCVBUFFORCE 721 if(0 == setsockopt( 722 sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv))) 723 { 724 return 1; 725 } 726 if(errno == EPERM || errno == ENOBUFS) { 727 return 0; 728 } 729 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s", 730 strerror(errno)); 731 return -1; 732 #else /* !SO_RCVBUFFORCE */ 733 if (0 == setsockopt( 734 sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv))) 735 { 736 return 1; 737 } 738 if(errno == ENOSYS || errno == ENOBUFS) { 739 return 0; 740 } 741 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s", 742 strerror(errno)); 743 return -1; 744 #endif /* SO_RCVBUFFORCE */ 745 #endif /* SO_RCVBUF */ 746 747 return 0; 748 } 749 750 static int 751 set_sndbuf(struct nsd_socket *sock, int snd) 752 { 753 #ifdef SO_SNDBUF 754 #ifdef SO_SNDBUFFORCE 755 if(0 == setsockopt( 756 sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd))) 757 { 758 return 1; 759 } 760 if(errno == EPERM || errno == ENOBUFS) { 761 return 0; 762 } 763 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s", 764 strerror(errno)); 765 return -1; 766 #else /* !SO_SNDBUFFORCE */ 767 if(0 == setsockopt( 768 sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd))) 769 { 770 return 1; 771 } 772 if(errno == ENOSYS || errno == ENOBUFS) { 773 return 0; 774 } 775 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s", 776 strerror(errno)); 777 return -1; 778 #endif /* SO_SNDBUFFORCE */ 779 #endif /* SO_SNDBUF */ 780 781 return 0; 782 } 783 784 static int 785 set_nonblock(struct nsd_socket *sock) 786 { 787 const char *socktype = 788 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 789 790 if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) { 791 log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s", 792 socktype, strerror(errno)); 793 return -1; 794 } 795 796 return 1; 797 } 798 799 static int 800 set_ipv6_v6only(struct nsd_socket *sock) 801 { 802 #ifdef INET6 803 #ifdef IPV6_V6ONLY 804 int on = 1; 805 const char *socktype = 806 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 807 808 if(0 == setsockopt( 809 sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on))) 810 { 811 return 1; 812 } 813 814 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s", 815 socktype, strerror(errno)); 816 return -1; 817 #endif /* IPV6_V6ONLY */ 818 #endif /* INET6 */ 819 820 return 0; 821 } 822 823 static int 824 set_ipv6_use_min_mtu(struct nsd_socket *sock) 825 { 826 #if defined(INET6) && (defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)) 827 #if defined(IPV6_USE_MIN_MTU) 828 /* There is no fragmentation of IPv6 datagrams during forwarding in the 829 * network. Therefore we do not send UDP datagrams larger than the 830 * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be 831 * larger if the network stack supports IPV6_USE_MIN_MTU. 832 */ 833 int opt = IPV6_USE_MIN_MTU; 834 int optval = 1; 835 static const char optname[] = "IPV6_USE_MIN_MTU"; 836 #elif defined(IPV6_MTU) 837 /* On Linux, PMTUD is disabled by default for datagrams so set the MTU 838 * to the MIN MTU to get the same. 839 */ 840 int opt = IPV6_MTU; 841 int optval = IPV6_MIN_MTU; 842 static const char optname[] = "IPV6_MTU"; 843 #endif 844 if(0 == setsockopt( 845 sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval))) 846 { 847 return 1; 848 } 849 850 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s", 851 optname, strerror(errno)); 852 return -1; 853 #else 854 (void)sock; 855 #endif /* INET6 */ 856 857 return 0; 858 } 859 860 static int 861 set_ipv4_no_pmtu_disc(struct nsd_socket *sock) 862 { 863 int ret = 0; 864 865 #if defined(IP_MTU_DISCOVER) 866 int opt = IP_MTU_DISCOVER; 867 int optval; 868 # if defined(IP_PMTUDISC_OMIT) 869 /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU 870 * information and send packets with DF=0. Fragmentation is allowed if 871 * and only if the packet size exceeds the outgoing interface MTU or 872 * the packet encounters smaller MTU link in network. This mitigates 873 * DNS fragmentation attacks by preventing forged PMTU information. 874 * FreeBSD already has same semantics without setting the option. 875 */ 876 optval = IP_PMTUDISC_OMIT; 877 if(0 == setsockopt( 878 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 879 { 880 return 1; 881 } 882 883 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 884 "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno)); 885 # endif /* IP_PMTUDISC_OMIT */ 886 # if defined(IP_PMTUDISC_DONT) 887 /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */ 888 optval = IP_PMTUDISC_DONT; 889 if(0 == setsockopt( 890 sock->s, IPPROTO_IP, opt, &optval, sizeof(optval))) 891 { 892 return 1; 893 } 894 895 log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s", 896 "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno)); 897 # endif 898 ret = -1; 899 #elif defined(IP_DONTFRAG) 900 int off = 0; 901 if (0 == setsockopt( 902 sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off))) 903 { 904 return 1; 905 } 906 907 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 908 strerror(errno)); 909 ret = -1; 910 #else 911 (void)sock; 912 #endif 913 914 return ret; 915 } 916 917 static int 918 set_ip_freebind(struct nsd_socket *sock) 919 { 920 #ifdef IP_FREEBIND 921 int on = 1; 922 const char *socktype = 923 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 924 if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0) 925 { 926 return 1; 927 } 928 log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s", 929 socktype, strerror(errno)); 930 return -1; 931 #else 932 (void)sock; 933 #endif /* IP_FREEBIND */ 934 935 return 0; 936 } 937 938 static int 939 set_ip_transparent(struct nsd_socket *sock) 940 { 941 #if defined(IP_TRANSPARENT) 942 int on = 1; 943 const char *socktype = 944 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 945 if(0 == setsockopt( 946 sock->s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on))) 947 { 948 return 1; 949 } 950 951 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 952 "IP_TRANSPARENT", socktype, strerror(errno)); 953 return -1; 954 #elif defined(SO_BINDANY) 955 int on = 1; 956 const char *socktype = 957 sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp"; 958 if(0 == setsockopt( 959 sock->s, SOL_SOCKET, SO_BINDANY, &on, sizeof(on))) 960 { 961 return 1; 962 } 963 964 log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s", 965 "SO_BINDANY", socktype, strerror(errno)); 966 return -1; 967 #else 968 (void)sock; 969 #endif 970 971 return 0; 972 } 973 974 static int 975 set_tcp_maxseg(struct nsd_socket *sock, int mss) 976 { 977 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG) 978 if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) { 979 return 1; 980 } 981 log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s", 982 strerror(errno)); 983 return -1; 984 #else 985 log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported"); 986 #endif 987 return 0; 988 } 989 990 #ifdef USE_TCP_FASTOPEN 991 static int 992 set_tcp_fastopen(struct nsd_socket *sock) 993 { 994 /* qlen specifies how many outstanding TFO requests to allow. Limit is 995 * a defense against IP spoofing attacks as suggested in RFC7413. 996 */ 997 int qlen; 998 999 #ifdef __APPLE__ 1000 /* macOS X implementation only supports qlen of 1 via this call. The 1001 * actual value is configured by the net.inet.tcp.fastopen_backlog 1002 * kernel parameter. 1003 */ 1004 qlen = 1; 1005 #else 1006 /* 5 is recommended on Linux. */ 1007 qlen = 5; 1008 #endif 1009 if (0 == setsockopt( 1010 sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) 1011 { 1012 return 1; 1013 } 1014 1015 if (errno == EPERM) { 1016 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s " 1017 "; this could likely be because sysctl " 1018 "net.inet.tcp.fastopen.enabled, " 1019 "net.inet.tcp.fastopen.server_enable, or " 1020 "net.ipv4.tcp_fastopen is disabled", 1021 strerror(errno)); 1022 /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support 1023 * disabled, except when verbosity enabled for debugging 1024 */ 1025 } else if(errno != ENOPROTOOPT || verbosity >= 3) { 1026 log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s", 1027 strerror(errno)); 1028 } 1029 1030 return (errno == ENOPROTOOPT ? 0 : -1); 1031 } 1032 #endif /* USE_TCP_FASTOPEN */ 1033 1034 static int 1035 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1036 { 1037 int rcv = 1*1024*1024, snd = 1*1024*1024; 1038 1039 if(-1 == (sock->s = socket( 1040 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1041 { 1042 #ifdef INET6 1043 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1044 (sock->addr.ai_family == AF_INET6) && 1045 (errno == EAFNOSUPPORT)) 1046 { 1047 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: " 1048 "not supported"); 1049 return 0; 1050 } 1051 #endif 1052 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1053 return -1; 1054 } 1055 1056 if(nsd->reuseport && reuseport_works && *reuseport_works) 1057 *reuseport_works = (set_reuseport(sock) == 1); 1058 1059 if(nsd->options->receive_buffer_size > 0) 1060 rcv = nsd->options->receive_buffer_size; 1061 if(set_rcvbuf(sock, rcv) == -1) 1062 return -1; 1063 1064 if(nsd->options->send_buffer_size > 0) 1065 snd = nsd->options->send_buffer_size; 1066 if(set_sndbuf(sock, snd) == -1) 1067 return -1; 1068 #ifdef INET6 1069 if(sock->addr.ai_family == AF_INET6) { 1070 if(set_ipv6_v6only(sock) == -1 || 1071 set_ipv6_use_min_mtu(sock) == -1) 1072 return -1; 1073 } else 1074 #endif /* INET6 */ 1075 if(sock->addr.ai_family == AF_INET) { 1076 if(set_ipv4_no_pmtu_disc(sock) == -1) 1077 return -1; 1078 } 1079 1080 /* Set socket to non-blocking. Otherwise, on operating systems 1081 * with thundering herd problems, the UDP recv could block 1082 * after select returns readable. 1083 */ 1084 set_nonblock(sock); 1085 1086 if(nsd->options->ip_freebind) 1087 (void)set_ip_freebind(sock); 1088 if(nsd->options->ip_transparent) 1089 (void)set_ip_transparent(sock); 1090 1091 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1092 char buf[256]; 1093 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1094 log_msg(LOG_ERR, "can't bind udp socket %s: %s", 1095 buf, strerror(errno)); 1096 return -1; 1097 } 1098 1099 return 1; 1100 } 1101 1102 static int 1103 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works) 1104 { 1105 #ifdef USE_TCP_FASTOPEN 1106 report_tcp_fastopen_config(); 1107 #endif 1108 1109 (void)reuseport_works; 1110 1111 if(-1 == (sock->s = socket( 1112 sock->addr.ai_family, sock->addr.ai_socktype, 0))) 1113 { 1114 #ifdef INET6 1115 if((sock->flags & NSD_SOCKET_IS_OPTIONAL) && 1116 (sock->addr.ai_family == AF_INET6) && 1117 (errno == EAFNOSUPPORT)) 1118 { 1119 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: " 1120 "not supported"); 1121 return 0; 1122 } 1123 #endif /* INET6 */ 1124 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 1125 return -1; 1126 } 1127 1128 if(nsd->reuseport && reuseport_works && *reuseport_works) 1129 *reuseport_works = (set_reuseport(sock) == 1); 1130 1131 (void)set_reuseaddr(sock); 1132 1133 #ifdef INET6 1134 if(sock->addr.ai_family == AF_INET6) { 1135 if (set_ipv6_v6only(sock) == -1 || 1136 set_ipv6_use_min_mtu(sock) == -1) 1137 return -1; 1138 } 1139 #endif 1140 1141 if(nsd->tcp_mss > 0) 1142 set_tcp_maxseg(sock, nsd->tcp_mss); 1143 /* (StevensUNP p463), if TCP listening socket is blocking, then 1144 it may block in accept, even if select() says readable. */ 1145 (void)set_nonblock(sock); 1146 if(nsd->options->ip_freebind) 1147 (void)set_ip_freebind(sock); 1148 if(nsd->options->ip_transparent) 1149 (void)set_ip_transparent(sock); 1150 1151 if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) { 1152 char buf[256]; 1153 addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf)); 1154 log_msg(LOG_ERR, "can't bind tcp socket %s: %s", 1155 buf, strerror(errno)); 1156 return -1; 1157 } 1158 1159 #ifdef USE_TCP_FASTOPEN 1160 (void)set_tcp_fastopen(sock); 1161 #endif 1162 1163 if(listen(sock->s, TCP_BACKLOG) == -1) { 1164 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 1165 return -1; 1166 } 1167 1168 return 1; 1169 } 1170 1171 /* 1172 * Initialize the server, reuseport, create and bind the sockets. 1173 */ 1174 int 1175 server_init(struct nsd *nsd) 1176 { 1177 size_t i; 1178 int reuseport = 1; /* Determine if REUSEPORT works. */ 1179 1180 /* open server interface ports */ 1181 for(i = 0; i < nsd->ifs; i++) { 1182 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 || 1183 open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1) 1184 { 1185 return -1; 1186 } 1187 } 1188 1189 if(nsd->reuseport && reuseport) { 1190 size_t ifs = nsd->ifs * nsd->reuseport; 1191 1192 /* increase the size of the interface arrays, there are going 1193 * to be separate interface file descriptors for every server 1194 * instance */ 1195 region_remove_cleanup(nsd->region, free, nsd->udp); 1196 region_remove_cleanup(nsd->region, free, nsd->tcp); 1197 nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp)); 1198 nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp)); 1199 region_add_cleanup(nsd->region, free, nsd->udp); 1200 region_add_cleanup(nsd->region, free, nsd->tcp); 1201 1202 for(i = nsd->ifs; i < ifs; i++) { 1203 nsd->udp[i].addr = nsd->udp[i%nsd->ifs].addr; 1204 if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) { 1205 return -1; 1206 } 1207 /* Turn off REUSEPORT for TCP by copying the socket 1208 * file descriptor. 1209 */ 1210 nsd->tcp[i] = nsd->tcp[i%nsd->ifs]; 1211 } 1212 1213 nsd->ifs = ifs; 1214 } else { 1215 nsd->reuseport = 0; 1216 } 1217 1218 return 0; 1219 } 1220 1221 /* 1222 * Prepare the server for take off. 1223 * 1224 */ 1225 int 1226 server_prepare(struct nsd *nsd) 1227 { 1228 #ifdef RATELIMIT 1229 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 1230 #ifdef HAVE_ARC4RANDOM 1231 hash_set_raninit(arc4random()); 1232 #else 1233 uint32_t v = getpid() ^ time(NULL); 1234 srandom((unsigned long)v); 1235 # ifdef HAVE_SSL 1236 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 1237 hash_set_raninit(v); 1238 else 1239 # endif 1240 hash_set_raninit(random()); 1241 #endif 1242 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 1243 nsd->options->rrl_ratelimit, 1244 nsd->options->rrl_whitelist_ratelimit, 1245 nsd->options->rrl_slip, 1246 nsd->options->rrl_ipv4_prefix_length, 1247 nsd->options->rrl_ipv6_prefix_length); 1248 #endif /* RATELIMIT */ 1249 1250 /* Open the database... */ 1251 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 1252 log_msg(LOG_ERR, "unable to open the database %s: %s", 1253 nsd->dbfile, strerror(errno)); 1254 unlink(nsd->task[0]->fname); 1255 unlink(nsd->task[1]->fname); 1256 #ifdef USE_ZONE_STATS 1257 unlink(nsd->zonestatfname[0]); 1258 unlink(nsd->zonestatfname[1]); 1259 #endif 1260 xfrd_del_tempdir(nsd); 1261 return -1; 1262 } 1263 /* check if zone files have been modified */ 1264 /* NULL for taskudb because we send soainfo in a moment, batched up, 1265 * for all zones */ 1266 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 1267 nsd->options->database[0] == 0)) 1268 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 1269 zonestatid_tree_set(nsd); 1270 1271 compression_table_capacity = 0; 1272 initialize_dname_compression_tables(nsd); 1273 1274 #ifdef BIND8_STATS 1275 /* Initialize times... */ 1276 time(&nsd->st.boot); 1277 set_bind8_alarm(nsd); 1278 #endif /* BIND8_STATS */ 1279 1280 return 0; 1281 } 1282 1283 /* 1284 * Fork the required number of servers. 1285 */ 1286 static int 1287 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 1288 int* xfrd_sock_p) 1289 { 1290 size_t i; 1291 1292 /* Start all child servers initially. */ 1293 for (i = 0; i < nsd->child_count; ++i) { 1294 nsd->children[i].pid = 0; 1295 } 1296 1297 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 1298 } 1299 1300 void 1301 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 1302 { 1303 size_t i; 1304 1305 /* Close all the sockets... */ 1306 for (i = 0; i < n; ++i) { 1307 if (sockets[i].s != -1) { 1308 close(sockets[i].s); 1309 sockets[i].s = -1; 1310 } 1311 } 1312 } 1313 1314 /* 1315 * Close the sockets, shutdown the server and exit. 1316 * Does not return. 1317 */ 1318 void 1319 server_shutdown(struct nsd *nsd) 1320 { 1321 size_t i; 1322 1323 server_close_all_sockets(nsd->udp, nsd->ifs); 1324 server_close_all_sockets(nsd->tcp, nsd->ifs); 1325 /* CHILD: close command channel to parent */ 1326 if(nsd->this_child && nsd->this_child->parent_fd != -1) 1327 { 1328 close(nsd->this_child->parent_fd); 1329 nsd->this_child->parent_fd = -1; 1330 } 1331 /* SERVER: close command channels to children */ 1332 if(!nsd->this_child) 1333 { 1334 for(i=0; i < nsd->child_count; ++i) 1335 if(nsd->children[i].child_fd != -1) 1336 { 1337 close(nsd->children[i].child_fd); 1338 nsd->children[i].child_fd = -1; 1339 } 1340 } 1341 1342 tsig_finalize(); 1343 #ifdef HAVE_SSL 1344 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 1345 if (nsd->tls_ctx) 1346 SSL_CTX_free(nsd->tls_ctx); 1347 #endif 1348 1349 #ifdef MEMCLEAN /* OS collects memory pages */ 1350 #ifdef RATELIMIT 1351 rrl_mmap_deinit_keep_mmap(); 1352 #endif 1353 #ifdef USE_DNSTAP 1354 dt_collector_destroy(nsd->dt_collector, nsd); 1355 #endif 1356 udb_base_free_keep_mmap(nsd->task[0]); 1357 udb_base_free_keep_mmap(nsd->task[1]); 1358 namedb_close_udb(nsd->db); /* keeps mmap */ 1359 namedb_close(nsd->db); 1360 nsd_options_destroy(nsd->options); 1361 region_destroy(nsd->region); 1362 #endif 1363 log_finalize(); 1364 exit(0); 1365 } 1366 1367 void 1368 server_prepare_xfrd(struct nsd* nsd) 1369 { 1370 char tmpfile[256]; 1371 /* create task mmaps */ 1372 nsd->mytask = 0; 1373 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 1374 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1375 nsd->task[0] = task_file_create(tmpfile); 1376 if(!nsd->task[0]) { 1377 #ifdef USE_ZONE_STATS 1378 unlink(nsd->zonestatfname[0]); 1379 unlink(nsd->zonestatfname[1]); 1380 #endif 1381 xfrd_del_tempdir(nsd); 1382 exit(1); 1383 } 1384 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 1385 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 1386 nsd->task[1] = task_file_create(tmpfile); 1387 if(!nsd->task[1]) { 1388 unlink(nsd->task[0]->fname); 1389 #ifdef USE_ZONE_STATS 1390 unlink(nsd->zonestatfname[0]); 1391 unlink(nsd->zonestatfname[1]); 1392 #endif 1393 xfrd_del_tempdir(nsd); 1394 exit(1); 1395 } 1396 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 1397 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 1398 /* create xfrd listener structure */ 1399 nsd->xfrd_listener = region_alloc(nsd->region, 1400 sizeof(netio_handler_type)); 1401 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 1402 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 1403 nsd->xfrd_listener->fd = -1; 1404 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 1405 nsd; 1406 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 1407 xfrd_tcp_create(nsd->region, QIOBUFSZ); 1408 } 1409 1410 1411 void 1412 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 1413 { 1414 pid_t pid; 1415 int sockets[2] = {0,0}; 1416 struct ipc_handler_conn_data *data; 1417 1418 if(nsd->xfrd_listener->fd != -1) 1419 close(nsd->xfrd_listener->fd); 1420 if(del_db) { 1421 /* recreate taskdb that xfrd was using, it may be corrupt */ 1422 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1423 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1424 nsd->task[1-nsd->mytask]->fname = NULL; 1425 /* free alloc already, so udb does not shrink itself */ 1426 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1427 nsd->task[1-nsd->mytask]->alloc = NULL; 1428 udb_base_free(nsd->task[1-nsd->mytask]); 1429 /* create new file, overwrite the old one */ 1430 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1431 free(tmpfile); 1432 } 1433 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1434 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1435 return; 1436 } 1437 pid = fork(); 1438 switch (pid) { 1439 case -1: 1440 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1441 break; 1442 default: 1443 /* PARENT: close first socket, use second one */ 1444 close(sockets[0]); 1445 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1446 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1447 } 1448 if(del_db) xfrd_free_namedb(nsd); 1449 /* use other task than I am using, since if xfrd died and is 1450 * restarted, the reload is using nsd->mytask */ 1451 nsd->mytask = 1 - nsd->mytask; 1452 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1453 /* ENOTREACH */ 1454 break; 1455 case 0: 1456 /* CHILD: close second socket, use first one */ 1457 close(sockets[1]); 1458 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1459 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1460 } 1461 nsd->xfrd_listener->fd = sockets[0]; 1462 break; 1463 } 1464 /* server-parent only */ 1465 nsd->xfrd_listener->timeout = NULL; 1466 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1467 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1468 /* clear ongoing ipc reads */ 1469 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1470 data->conn->is_reading = 0; 1471 } 1472 1473 /** add all soainfo to taskdb */ 1474 static void 1475 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1476 { 1477 struct radnode* n; 1478 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1479 /* add all SOA INFO to mytask */ 1480 udb_ptr_init(&task_last, taskudb); 1481 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1482 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1483 } 1484 udb_ptr_unlink(&task_last, taskudb); 1485 } 1486 1487 void 1488 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1489 { 1490 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1491 * parent fills one taskdb with soas, xfrd fills other with expires. 1492 * then they exchange and process. 1493 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1494 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1495 * expire notifications can be sent back via a normal reload later 1496 * (xfrd will wait for current running reload to finish if any). 1497 */ 1498 sig_atomic_t cmd = 0; 1499 pid_t mypid; 1500 int xfrd_sock = nsd->xfrd_listener->fd; 1501 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1502 udb_ptr t; 1503 if(!shortsoa) { 1504 if(nsd->signal_hint_shutdown) { 1505 shutdown: 1506 log_msg(LOG_WARNING, "signal received, shutting down..."); 1507 server_close_all_sockets(nsd->udp, nsd->ifs); 1508 server_close_all_sockets(nsd->tcp, nsd->ifs); 1509 #ifdef HAVE_SSL 1510 daemon_remote_close(nsd->rc); 1511 #endif 1512 /* Unlink it if possible... */ 1513 unlinkpid(nsd->pidfile); 1514 unlink(nsd->task[0]->fname); 1515 unlink(nsd->task[1]->fname); 1516 #ifdef USE_ZONE_STATS 1517 unlink(nsd->zonestatfname[0]); 1518 unlink(nsd->zonestatfname[1]); 1519 #endif 1520 /* write the nsd.db to disk, wait for it to complete */ 1521 udb_base_sync(nsd->db->udb, 1); 1522 udb_base_close(nsd->db->udb); 1523 server_shutdown(nsd); 1524 exit(0); 1525 } 1526 } 1527 if(shortsoa) { 1528 /* put SOA in xfrd task because mytask may be in use */ 1529 taskudb = nsd->task[1-nsd->mytask]; 1530 } 1531 1532 add_all_soa_to_task(nsd, taskudb); 1533 if(!shortsoa) { 1534 /* wait for xfrd to signal task is ready, RELOAD signal */ 1535 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1536 cmd != NSD_RELOAD) { 1537 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1538 exit(1); 1539 } 1540 if(nsd->signal_hint_shutdown) { 1541 goto shutdown; 1542 } 1543 } 1544 /* give xfrd our task, signal it with RELOAD_DONE */ 1545 task_process_sync(taskudb); 1546 cmd = NSD_RELOAD_DONE; 1547 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1548 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1549 (int)nsd->pid, strerror(errno)); 1550 } 1551 mypid = getpid(); 1552 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1553 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1554 strerror(errno)); 1555 } 1556 1557 if(!shortsoa) { 1558 /* process the xfrd task works (expiry data) */ 1559 nsd->mytask = 1 - nsd->mytask; 1560 taskudb = nsd->task[nsd->mytask]; 1561 task_remap(taskudb); 1562 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1563 while(!udb_ptr_is_null(&t)) { 1564 task_process_expire(nsd->db, TASKLIST(&t)); 1565 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1566 } 1567 udb_ptr_unlink(&t, taskudb); 1568 task_clear(taskudb); 1569 1570 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1571 cmd = NSD_RELOAD_DONE; 1572 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1573 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1574 (int)nsd->pid, strerror(errno)); 1575 } 1576 } 1577 } 1578 1579 #ifdef HAVE_SSL 1580 static void 1581 log_crypto_from_err(const char* str, unsigned long err) 1582 { 1583 /* error:[error code]:[library name]:[function name]:[reason string] */ 1584 char buf[128]; 1585 unsigned long e; 1586 ERR_error_string_n(err, buf, sizeof(buf)); 1587 log_msg(LOG_ERR, "%s crypto %s", str, buf); 1588 while( (e=ERR_get_error()) ) { 1589 ERR_error_string_n(e, buf, sizeof(buf)); 1590 log_msg(LOG_ERR, "and additionally crypto %s", buf); 1591 } 1592 } 1593 1594 void 1595 log_crypto_err(const char* str) 1596 { 1597 log_crypto_from_err(str, ERR_get_error()); 1598 } 1599 1600 /** true if the ssl handshake error has to be squelched from the logs */ 1601 static int 1602 squelch_err_ssl_handshake(unsigned long err) 1603 { 1604 if(verbosity >= 3) 1605 return 0; /* only squelch on low verbosity */ 1606 /* this is very specific, we could filter on ERR_GET_REASON() 1607 * (the third element in ERR_PACK) */ 1608 if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) || 1609 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) || 1610 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) || 1611 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE) 1612 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO 1613 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER) 1614 #endif 1615 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO 1616 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL) 1617 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL) 1618 # ifdef SSL_R_VERSION_TOO_LOW 1619 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW) 1620 # endif 1621 #endif 1622 ) 1623 return 1; 1624 return 0; 1625 } 1626 1627 void 1628 perform_openssl_init(void) 1629 { 1630 /* init SSL library */ 1631 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS 1632 ERR_load_crypto_strings(); 1633 #endif 1634 ERR_load_SSL_strings(); 1635 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO) 1636 OpenSSL_add_all_algorithms(); 1637 #else 1638 OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS 1639 | OPENSSL_INIT_ADD_ALL_DIGESTS 1640 | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); 1641 #endif 1642 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL) 1643 (void)SSL_library_init(); 1644 #else 1645 OPENSSL_init_ssl(0, NULL); 1646 #endif 1647 1648 if(!RAND_status()) { 1649 /* try to seed it */ 1650 unsigned char buf[256]; 1651 unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid(); 1652 size_t i; 1653 v = seed; 1654 for(i=0; i<256/sizeof(v); i++) { 1655 memmove(buf+i*sizeof(v), &v, sizeof(v)); 1656 v = v*seed + (unsigned int)i; 1657 } 1658 RAND_seed(buf, 256); 1659 log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time"); 1660 } 1661 } 1662 1663 static int 1664 get_ocsp(char *filename, unsigned char **ocsp) 1665 { 1666 BIO *bio; 1667 OCSP_RESPONSE *response; 1668 int len = -1; 1669 unsigned char *p, *buf; 1670 assert(filename); 1671 1672 if ((bio = BIO_new_file(filename, "r")) == NULL) { 1673 log_crypto_err("get_ocsp: BIO_new_file failed"); 1674 return -1; 1675 } 1676 1677 if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) { 1678 log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed"); 1679 BIO_free(bio); 1680 return -1; 1681 } 1682 1683 if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) { 1684 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed"); 1685 OCSP_RESPONSE_free(response); 1686 BIO_free(bio); 1687 return -1; 1688 } 1689 1690 if ((buf = malloc((size_t) len)) == NULL) { 1691 log_msg(LOG_ERR, "get_ocsp: malloc failed"); 1692 OCSP_RESPONSE_free(response); 1693 BIO_free(bio); 1694 return -1; 1695 } 1696 1697 p = buf; 1698 if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) { 1699 log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed"); 1700 free(buf); 1701 OCSP_RESPONSE_free(response); 1702 BIO_free(bio); 1703 return -1; 1704 } 1705 1706 OCSP_RESPONSE_free(response); 1707 BIO_free(bio); 1708 1709 *ocsp = buf; 1710 return len; 1711 } 1712 1713 /* further setup ssl ctx after the keys are loaded */ 1714 static void 1715 listen_sslctx_setup_2(void* ctxt) 1716 { 1717 SSL_CTX* ctx = (SSL_CTX*)ctxt; 1718 (void)ctx; 1719 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO 1720 if(!SSL_CTX_set_ecdh_auto(ctx,1)) { 1721 log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE"); 1722 } 1723 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME) 1724 if(1) { 1725 EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1); 1726 if (!ecdh) { 1727 log_crypto_err("could not find p256, not enabling ECDHE"); 1728 } else { 1729 if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) { 1730 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE"); 1731 } 1732 EC_KEY_free (ecdh); 1733 } 1734 } 1735 #endif 1736 } 1737 1738 static int 1739 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg)) 1740 { 1741 if(ocspdata) { 1742 unsigned char *p; 1743 if ((p=malloc(ocspdata_len)) == NULL) { 1744 log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure"); 1745 return SSL_TLSEXT_ERR_NOACK; 1746 } 1747 memcpy(p, ocspdata, ocspdata_len); 1748 if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) { 1749 log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp"); 1750 free(p); 1751 return SSL_TLSEXT_ERR_NOACK; 1752 } 1753 return SSL_TLSEXT_ERR_OK; 1754 } else { 1755 return SSL_TLSEXT_ERR_NOACK; 1756 } 1757 } 1758 1759 SSL_CTX* 1760 server_tls_ctx_setup(char* key, char* pem, char* verifypem) 1761 { 1762 SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method()); 1763 if(!ctx) { 1764 log_crypto_err("could not SSL_CTX_new"); 1765 return NULL; 1766 } 1767 /* no SSLv2, SSLv3 because has defects */ 1768 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){ 1769 log_crypto_err("could not set SSL_OP_NO_SSLv2"); 1770 SSL_CTX_free(ctx); 1771 return NULL; 1772 } 1773 if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3) 1774 != SSL_OP_NO_SSLv3){ 1775 log_crypto_err("could not set SSL_OP_NO_SSLv3"); 1776 SSL_CTX_free(ctx); 1777 return 0; 1778 } 1779 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1) 1780 /* if we have tls 1.1 disable 1.0 */ 1781 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1) 1782 != SSL_OP_NO_TLSv1){ 1783 log_crypto_err("could not set SSL_OP_NO_TLSv1"); 1784 SSL_CTX_free(ctx); 1785 return 0; 1786 } 1787 #endif 1788 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2) 1789 /* if we have tls 1.2 disable 1.1 */ 1790 if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1) 1791 != SSL_OP_NO_TLSv1_1){ 1792 log_crypto_err("could not set SSL_OP_NO_TLSv1_1"); 1793 SSL_CTX_free(ctx); 1794 return 0; 1795 } 1796 #endif 1797 #if defined(SSL_OP_NO_RENEGOTIATION) 1798 /* disable client renegotiation */ 1799 if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) & 1800 SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) { 1801 log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION"); 1802 SSL_CTX_free(ctx); 1803 return 0; 1804 } 1805 #endif 1806 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20) 1807 /* if we have sha256, set the cipher list to have no known vulns */ 1808 if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20")) 1809 log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list"); 1810 #endif 1811 if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) & 1812 SSL_OP_CIPHER_SERVER_PREFERENCE) != 1813 SSL_OP_CIPHER_SERVER_PREFERENCE) { 1814 log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE"); 1815 SSL_CTX_free(ctx); 1816 return 0; 1817 } 1818 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL 1819 SSL_CTX_set_security_level(ctx, 0); 1820 #endif 1821 if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) { 1822 log_msg(LOG_ERR, "error for cert file: %s", pem); 1823 log_crypto_err("error in SSL_CTX use_certificate_chain_file"); 1824 SSL_CTX_free(ctx); 1825 return NULL; 1826 } 1827 if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) { 1828 log_msg(LOG_ERR, "error for private key file: %s", key); 1829 log_crypto_err("Error in SSL_CTX use_PrivateKey_file"); 1830 SSL_CTX_free(ctx); 1831 return NULL; 1832 } 1833 if(!SSL_CTX_check_private_key(ctx)) { 1834 log_msg(LOG_ERR, "error for key file: %s", key); 1835 log_crypto_err("Error in SSL_CTX check_private_key"); 1836 SSL_CTX_free(ctx); 1837 return NULL; 1838 } 1839 listen_sslctx_setup_2(ctx); 1840 if(verifypem && verifypem[0]) { 1841 if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) { 1842 log_crypto_err("Error in SSL_CTX verify locations"); 1843 SSL_CTX_free(ctx); 1844 return NULL; 1845 } 1846 SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem)); 1847 SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL); 1848 } 1849 return ctx; 1850 } 1851 1852 SSL_CTX* 1853 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile) 1854 { 1855 char *key, *pem; 1856 SSL_CTX *ctx; 1857 1858 key = nsd->options->tls_service_key; 1859 pem = nsd->options->tls_service_pem; 1860 if(!key || key[0] == 0) { 1861 log_msg(LOG_ERR, "error: no tls-service-key file specified"); 1862 return NULL; 1863 } 1864 if(!pem || pem[0] == 0) { 1865 log_msg(LOG_ERR, "error: no tls-service-pem file specified"); 1866 return NULL; 1867 } 1868 1869 /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but 1870 * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/ 1871 ctx = server_tls_ctx_setup(key, pem, verifypem); 1872 if(!ctx) { 1873 log_msg(LOG_ERR, "could not setup server TLS context"); 1874 return NULL; 1875 } 1876 if(ocspfile && ocspfile[0]) { 1877 if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) { 1878 log_crypto_err("Error reading OCSPfile"); 1879 SSL_CTX_free(ctx); 1880 return NULL; 1881 } else { 1882 VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile)); 1883 if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) { 1884 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb"); 1885 SSL_CTX_free(ctx); 1886 return NULL; 1887 } 1888 } 1889 } 1890 return ctx; 1891 } 1892 1893 /* check if tcp_handler_accept_data created for TLS dedicated port */ 1894 int 1895 using_tls_port(struct sockaddr* addr, const char* tls_port) 1896 { 1897 in_port_t port = 0; 1898 1899 if (addr->sa_family == AF_INET) 1900 port = ((struct sockaddr_in*)addr)->sin_port; 1901 #ifndef HAVE_STRUCT_SOCKADDR_IN6 1902 else 1903 port = ((struct sockaddr_in6*)addr)->sin6_port; 1904 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */ 1905 if (atoi(tls_port) == ntohs(port)) 1906 return 1; 1907 1908 return 0; 1909 } 1910 #endif 1911 1912 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 1913 ssize_t 1914 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 1915 { 1916 uint8_t* buf = (uint8_t*) p; 1917 ssize_t total = 0; 1918 struct pollfd fd; 1919 memset(&fd, 0, sizeof(fd)); 1920 fd.fd = s; 1921 fd.events = POLLIN; 1922 1923 while( total < sz) { 1924 ssize_t ret; 1925 ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000); 1926 if(ret == -1) { 1927 if(errno == EAGAIN) 1928 /* blocking read */ 1929 continue; 1930 if(errno == EINTR) { 1931 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1932 return -1; 1933 /* other signals can be handled later */ 1934 continue; 1935 } 1936 /* some error */ 1937 return -1; 1938 } 1939 if(ret == 0) { 1940 /* operation timed out */ 1941 return -2; 1942 } 1943 ret = read(s, buf+total, sz-total); 1944 if(ret == -1) { 1945 if(errno == EAGAIN) 1946 /* blocking read */ 1947 continue; 1948 if(errno == EINTR) { 1949 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1950 return -1; 1951 /* other signals can be handled later */ 1952 continue; 1953 } 1954 /* some error */ 1955 return -1; 1956 } 1957 if(ret == 0) { 1958 /* closed connection! */ 1959 return 0; 1960 } 1961 total += ret; 1962 } 1963 return total; 1964 } 1965 1966 static void 1967 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 1968 { 1969 sig_atomic_t cmd = NSD_QUIT_SYNC; 1970 udb_ptr t, next; 1971 udb_base* u = nsd->task[nsd->mytask]; 1972 udb_ptr_init(&next, u); 1973 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 1974 udb_base_set_userdata(u, 0); 1975 while(!udb_ptr_is_null(&t)) { 1976 /* store next in list so this one can be deleted or reused */ 1977 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 1978 udb_rptr_zero(&TASKLIST(&t)->next, u); 1979 1980 /* process task t */ 1981 /* append results for task t and update last_task */ 1982 task_process_in_reload(nsd, u, last_task, &t); 1983 1984 /* go to next */ 1985 udb_ptr_set_ptr(&t, u, &next); 1986 1987 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1988 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1989 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1990 if(cmd == NSD_QUIT) { 1991 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1992 /* sync to disk (if needed) */ 1993 udb_base_sync(nsd->db->udb, 0); 1994 /* unlink files of remainder of tasks */ 1995 while(!udb_ptr_is_null(&t)) { 1996 if(TASKLIST(&t)->task_type == task_apply_xfr) { 1997 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 1998 } 1999 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 2000 } 2001 udb_ptr_unlink(&t, u); 2002 udb_ptr_unlink(&next, u); 2003 exit(0); 2004 } 2005 } 2006 2007 } 2008 udb_ptr_unlink(&t, u); 2009 udb_ptr_unlink(&next, u); 2010 } 2011 2012 #ifdef BIND8_STATS 2013 static void 2014 parent_send_stats(struct nsd* nsd, int cmdfd) 2015 { 2016 size_t i; 2017 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 2018 log_msg(LOG_ERR, "could not write stats to reload"); 2019 return; 2020 } 2021 for(i=0; i<nsd->child_count; i++) 2022 if(!write_socket(cmdfd, &nsd->children[i].query_count, 2023 sizeof(stc_type))) { 2024 log_msg(LOG_ERR, "could not write stats to reload"); 2025 return; 2026 } 2027 } 2028 2029 static void 2030 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 2031 { 2032 struct nsdst s; 2033 stc_type* p; 2034 size_t i; 2035 if(block_read(nsd, cmdfd, &s, sizeof(s), 2036 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 2037 log_msg(LOG_ERR, "could not read stats from oldpar"); 2038 return; 2039 } 2040 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 2041 s.db_mem = region_get_mem(nsd->db->region); 2042 p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 2043 nsd->child_count); 2044 if(!p) return; 2045 for(i=0; i<nsd->child_count; i++) { 2046 if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!= 2047 sizeof(stc_type)) 2048 return; 2049 } 2050 } 2051 #endif /* BIND8_STATS */ 2052 2053 /* 2054 * Reload the database, stop parent, re-fork children and continue. 2055 * as server_main. 2056 */ 2057 static void 2058 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 2059 int cmdsocket) 2060 { 2061 pid_t mypid; 2062 sig_atomic_t cmd = NSD_QUIT_SYNC; 2063 int ret; 2064 udb_ptr last_task; 2065 struct sigaction old_sigchld, ign_sigchld; 2066 /* ignore SIGCHLD from the previous server_main that used this pid */ 2067 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 2068 ign_sigchld.sa_handler = SIG_IGN; 2069 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 2070 2071 /* see what tasks we got from xfrd */ 2072 task_remap(nsd->task[nsd->mytask]); 2073 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 2074 udb_compact_inhibited(nsd->db->udb, 1); 2075 reload_process_tasks(nsd, &last_task, cmdsocket); 2076 udb_compact_inhibited(nsd->db->udb, 0); 2077 udb_compact(nsd->db->udb); 2078 2079 #ifndef NDEBUG 2080 if(nsd_debug_level >= 1) 2081 region_log_stats(nsd->db->region); 2082 #endif /* NDEBUG */ 2083 /* sync to disk (if needed) */ 2084 udb_base_sync(nsd->db->udb, 0); 2085 2086 initialize_dname_compression_tables(nsd); 2087 2088 #ifdef BIND8_STATS 2089 /* Restart dumping stats if required. */ 2090 time(&nsd->st.boot); 2091 set_bind8_alarm(nsd); 2092 #endif 2093 #ifdef USE_ZONE_STATS 2094 server_zonestat_realloc(nsd); /* realloc for new children */ 2095 server_zonestat_switch(nsd); 2096 #endif 2097 2098 /* listen for the signals of failed children again */ 2099 sigaction(SIGCHLD, &old_sigchld, NULL); 2100 /* Start new child processes */ 2101 if (server_start_children(nsd, server_region, netio, &nsd-> 2102 xfrd_listener->fd) != 0) { 2103 send_children_quit(nsd); 2104 exit(1); 2105 } 2106 2107 /* if the parent has quit, we must quit too, poll the fd for cmds */ 2108 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 2109 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 2110 if(cmd == NSD_QUIT) { 2111 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 2112 send_children_quit(nsd); 2113 exit(0); 2114 } 2115 } 2116 2117 /* Send quit command to parent: blocking, wait for receipt. */ 2118 do { 2119 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 2120 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 2121 { 2122 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 2123 strerror(errno)); 2124 } 2125 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 2126 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 2127 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 2128 RELOAD_SYNC_TIMEOUT); 2129 if(ret == -2) { 2130 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 2131 } 2132 } while (ret == -2); 2133 if(ret == -1) { 2134 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 2135 strerror(errno)); 2136 } 2137 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 2138 if(cmd == NSD_QUIT) { 2139 /* small race condition possible here, parent got quit cmd. */ 2140 send_children_quit(nsd); 2141 exit(1); 2142 } 2143 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 2144 #ifdef BIND8_STATS 2145 reload_do_stats(cmdsocket, nsd, &last_task); 2146 #endif 2147 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 2148 task_process_sync(nsd->task[nsd->mytask]); 2149 #ifdef USE_ZONE_STATS 2150 server_zonestat_realloc(nsd); /* realloc for next children */ 2151 #endif 2152 2153 /* send soainfo to the xfrd process, signal it that reload is done, 2154 * it picks up the taskudb */ 2155 cmd = NSD_RELOAD_DONE; 2156 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2157 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 2158 strerror(errno)); 2159 } 2160 mypid = getpid(); 2161 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2162 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2163 strerror(errno)); 2164 } 2165 2166 /* try to reopen file */ 2167 if (nsd->file_rotation_ok) 2168 log_reopen(nsd->log_filename, 1); 2169 /* exit reload, continue as new server_main */ 2170 } 2171 2172 /* 2173 * Get the mode depending on the signal hints that have been received. 2174 * Multiple signal hints can be received and will be handled in turn. 2175 */ 2176 static sig_atomic_t 2177 server_signal_mode(struct nsd *nsd) 2178 { 2179 if(nsd->signal_hint_quit) { 2180 nsd->signal_hint_quit = 0; 2181 return NSD_QUIT; 2182 } 2183 else if(nsd->signal_hint_shutdown) { 2184 nsd->signal_hint_shutdown = 0; 2185 return NSD_SHUTDOWN; 2186 } 2187 else if(nsd->signal_hint_child) { 2188 nsd->signal_hint_child = 0; 2189 return NSD_REAP_CHILDREN; 2190 } 2191 else if(nsd->signal_hint_reload) { 2192 nsd->signal_hint_reload = 0; 2193 return NSD_RELOAD; 2194 } 2195 else if(nsd->signal_hint_reload_hup) { 2196 nsd->signal_hint_reload_hup = 0; 2197 return NSD_RELOAD_REQ; 2198 } 2199 else if(nsd->signal_hint_stats) { 2200 nsd->signal_hint_stats = 0; 2201 #ifdef BIND8_STATS 2202 set_bind8_alarm(nsd); 2203 #endif 2204 return NSD_STATS; 2205 } 2206 else if(nsd->signal_hint_statsusr) { 2207 nsd->signal_hint_statsusr = 0; 2208 return NSD_STATS; 2209 } 2210 return NSD_RUN; 2211 } 2212 2213 /* 2214 * The main server simply waits for signals and child processes to 2215 * terminate. Child processes are restarted as necessary. 2216 */ 2217 void 2218 server_main(struct nsd *nsd) 2219 { 2220 region_type *server_region = region_create(xalloc, free); 2221 netio_type *netio = netio_create(server_region); 2222 netio_handler_type reload_listener; 2223 int reload_sockets[2] = {-1, -1}; 2224 struct timespec timeout_spec; 2225 int status; 2226 pid_t child_pid; 2227 pid_t reload_pid = -1; 2228 sig_atomic_t mode; 2229 2230 /* Ensure we are the main process */ 2231 assert(nsd->server_kind == NSD_SERVER_MAIN); 2232 2233 /* Add listener for the XFRD process */ 2234 netio_add_handler(netio, nsd->xfrd_listener); 2235 2236 /* Start the child processes that handle incoming queries */ 2237 if (server_start_children(nsd, server_region, netio, 2238 &nsd->xfrd_listener->fd) != 0) { 2239 send_children_quit(nsd); 2240 exit(1); 2241 } 2242 reload_listener.fd = -1; 2243 2244 /* This_child MUST be 0, because this is the parent process */ 2245 assert(nsd->this_child == 0); 2246 2247 /* Run the server until we get a shutdown signal */ 2248 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 2249 /* Did we receive a signal that changes our mode? */ 2250 if(mode == NSD_RUN) { 2251 nsd->mode = mode = server_signal_mode(nsd); 2252 } 2253 2254 switch (mode) { 2255 case NSD_RUN: 2256 /* see if any child processes terminated */ 2257 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 2258 int is_child = delete_child_pid(nsd, child_pid); 2259 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 2260 if(nsd->children[is_child].child_fd == -1) 2261 nsd->children[is_child].has_exited = 1; 2262 parent_check_all_children_exited(nsd); 2263 } else if(is_child != -1) { 2264 log_msg(LOG_WARNING, 2265 "server %d died unexpectedly with status %d, restarting", 2266 (int) child_pid, status); 2267 restart_child_servers(nsd, server_region, netio, 2268 &nsd->xfrd_listener->fd); 2269 } else if (child_pid == reload_pid) { 2270 sig_atomic_t cmd = NSD_RELOAD_DONE; 2271 pid_t mypid; 2272 log_msg(LOG_WARNING, 2273 "Reload process %d failed with status %d, continuing with old database", 2274 (int) child_pid, status); 2275 reload_pid = -1; 2276 if(reload_listener.fd != -1) close(reload_listener.fd); 2277 reload_listener.fd = -1; 2278 reload_listener.event_types = NETIO_EVENT_NONE; 2279 task_process_sync(nsd->task[nsd->mytask]); 2280 /* inform xfrd reload attempt ended */ 2281 if(!write_socket(nsd->xfrd_listener->fd, 2282 &cmd, sizeof(cmd))) { 2283 log_msg(LOG_ERR, "problems " 2284 "sending SOAEND to xfrd: %s", 2285 strerror(errno)); 2286 } 2287 mypid = getpid(); 2288 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2289 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2290 strerror(errno)); 2291 } 2292 } else if(status != 0) { 2293 /* check for status, because we get 2294 * the old-servermain because reload 2295 * is the process-parent of old-main, 2296 * and we get older server-processes 2297 * that are exiting after a reload */ 2298 log_msg(LOG_WARNING, 2299 "process %d terminated with status %d", 2300 (int) child_pid, status); 2301 } 2302 } 2303 if (child_pid == -1) { 2304 if (errno == EINTR) { 2305 continue; 2306 } 2307 if (errno != ECHILD) 2308 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 2309 } 2310 if (nsd->mode != NSD_RUN) 2311 break; 2312 2313 /* timeout to collect processes. In case no sigchild happens. */ 2314 timeout_spec.tv_sec = 60; 2315 timeout_spec.tv_nsec = 0; 2316 2317 /* listen on ports, timeout for collecting terminated children */ 2318 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 2319 if (errno != EINTR) { 2320 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 2321 } 2322 } 2323 if(nsd->restart_children) { 2324 restart_child_servers(nsd, server_region, netio, 2325 &nsd->xfrd_listener->fd); 2326 nsd->restart_children = 0; 2327 } 2328 if(nsd->reload_failed) { 2329 sig_atomic_t cmd = NSD_RELOAD_DONE; 2330 pid_t mypid; 2331 nsd->reload_failed = 0; 2332 log_msg(LOG_WARNING, 2333 "Reload process %d failed, continuing with old database", 2334 (int) reload_pid); 2335 reload_pid = -1; 2336 if(reload_listener.fd != -1) close(reload_listener.fd); 2337 reload_listener.fd = -1; 2338 reload_listener.event_types = NETIO_EVENT_NONE; 2339 task_process_sync(nsd->task[nsd->mytask]); 2340 /* inform xfrd reload attempt ended */ 2341 if(!write_socket(nsd->xfrd_listener->fd, 2342 &cmd, sizeof(cmd))) { 2343 log_msg(LOG_ERR, "problems " 2344 "sending SOAEND to xfrd: %s", 2345 strerror(errno)); 2346 } 2347 mypid = getpid(); 2348 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 2349 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 2350 strerror(errno)); 2351 } 2352 } 2353 2354 break; 2355 case NSD_RELOAD_REQ: { 2356 sig_atomic_t cmd = NSD_RELOAD_REQ; 2357 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 2358 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2359 "main: ipc send reload_req to xfrd")); 2360 if(!write_socket(nsd->xfrd_listener->fd, 2361 &cmd, sizeof(cmd))) { 2362 log_msg(LOG_ERR, "server_main: could not send " 2363 "reload_req to xfrd: %s", strerror(errno)); 2364 } 2365 nsd->mode = NSD_RUN; 2366 } break; 2367 case NSD_RELOAD: 2368 /* Continue to run nsd after reload */ 2369 nsd->mode = NSD_RUN; 2370 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 2371 if (reload_pid != -1) { 2372 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 2373 (int) reload_pid); 2374 break; 2375 } 2376 2377 /* switch the mytask to keep track of who owns task*/ 2378 nsd->mytask = 1 - nsd->mytask; 2379 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 2380 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 2381 reload_pid = -1; 2382 break; 2383 } 2384 2385 /* Do actual reload */ 2386 reload_pid = fork(); 2387 switch (reload_pid) { 2388 case -1: 2389 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 2390 break; 2391 default: 2392 /* PARENT */ 2393 close(reload_sockets[0]); 2394 server_reload(nsd, server_region, netio, 2395 reload_sockets[1]); 2396 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 2397 close(reload_sockets[1]); 2398 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 2399 /* drop stale xfrd ipc data */ 2400 ((struct ipc_handler_conn_data*)nsd-> 2401 xfrd_listener->user_data) 2402 ->conn->is_reading = 0; 2403 reload_pid = -1; 2404 reload_listener.fd = -1; 2405 reload_listener.event_types = NETIO_EVENT_NONE; 2406 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 2407 break; 2408 case 0: 2409 /* CHILD */ 2410 /* server_main keep running until NSD_QUIT_SYNC 2411 * received from reload. */ 2412 close(reload_sockets[1]); 2413 reload_listener.fd = reload_sockets[0]; 2414 reload_listener.timeout = NULL; 2415 reload_listener.user_data = nsd; 2416 reload_listener.event_types = NETIO_EVENT_READ; 2417 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 2418 netio_add_handler(netio, &reload_listener); 2419 reload_pid = getppid(); 2420 break; 2421 } 2422 break; 2423 case NSD_QUIT_SYNC: 2424 /* synchronisation of xfrd, parent and reload */ 2425 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 2426 sig_atomic_t cmd = NSD_RELOAD; 2427 /* stop xfrd ipc writes in progress */ 2428 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2429 "main: ipc send indication reload")); 2430 if(!write_socket(nsd->xfrd_listener->fd, 2431 &cmd, sizeof(cmd))) { 2432 log_msg(LOG_ERR, "server_main: could not send reload " 2433 "indication to xfrd: %s", strerror(errno)); 2434 } 2435 /* wait for ACK from xfrd */ 2436 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 2437 nsd->quit_sync_done = 1; 2438 } 2439 nsd->mode = NSD_RUN; 2440 break; 2441 case NSD_QUIT: 2442 /* silent shutdown during reload */ 2443 if(reload_listener.fd != -1) { 2444 /* acknowledge the quit, to sync reload that we will really quit now */ 2445 sig_atomic_t cmd = NSD_RELOAD; 2446 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 2447 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2448 log_msg(LOG_ERR, "server_main: " 2449 "could not ack quit: %s", strerror(errno)); 2450 } 2451 #ifdef BIND8_STATS 2452 parent_send_stats(nsd, reload_listener.fd); 2453 #endif /* BIND8_STATS */ 2454 close(reload_listener.fd); 2455 } 2456 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 2457 /* only quit children after xfrd has acked */ 2458 send_children_quit(nsd); 2459 2460 #ifdef MEMCLEAN /* OS collects memory pages */ 2461 region_destroy(server_region); 2462 #endif 2463 server_shutdown(nsd); 2464 2465 /* ENOTREACH */ 2466 break; 2467 case NSD_SHUTDOWN: 2468 break; 2469 case NSD_REAP_CHILDREN: 2470 /* continue; wait for child in run loop */ 2471 nsd->mode = NSD_RUN; 2472 break; 2473 case NSD_STATS: 2474 #ifdef BIND8_STATS 2475 set_children_stats(nsd); 2476 #endif 2477 nsd->mode = NSD_RUN; 2478 break; 2479 default: 2480 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 2481 nsd->mode = NSD_RUN; 2482 break; 2483 } 2484 } 2485 log_msg(LOG_WARNING, "signal received, shutting down..."); 2486 2487 /* close opened ports to avoid race with restart of nsd */ 2488 server_close_all_sockets(nsd->udp, nsd->ifs); 2489 server_close_all_sockets(nsd->tcp, nsd->ifs); 2490 #ifdef HAVE_SSL 2491 daemon_remote_close(nsd->rc); 2492 #endif 2493 send_children_quit_and_wait(nsd); 2494 2495 /* Unlink it if possible... */ 2496 unlinkpid(nsd->pidfile); 2497 unlink(nsd->task[0]->fname); 2498 unlink(nsd->task[1]->fname); 2499 #ifdef USE_ZONE_STATS 2500 unlink(nsd->zonestatfname[0]); 2501 unlink(nsd->zonestatfname[1]); 2502 #endif 2503 #ifdef USE_DNSTAP 2504 dt_collector_close(nsd->dt_collector, nsd); 2505 #endif 2506 2507 if(reload_listener.fd != -1) { 2508 sig_atomic_t cmd = NSD_QUIT; 2509 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2510 "main: ipc send quit to reload-process")); 2511 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 2512 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 2513 strerror(errno)); 2514 } 2515 fsync(reload_listener.fd); 2516 close(reload_listener.fd); 2517 /* wait for reload to finish processing */ 2518 while(1) { 2519 if(waitpid(reload_pid, NULL, 0) == -1) { 2520 if(errno == EINTR) continue; 2521 if(errno == ECHILD) break; 2522 log_msg(LOG_ERR, "waitpid(reload %d): %s", 2523 (int)reload_pid, strerror(errno)); 2524 } 2525 break; 2526 } 2527 } 2528 if(nsd->xfrd_listener->fd != -1) { 2529 /* complete quit, stop xfrd */ 2530 sig_atomic_t cmd = NSD_QUIT; 2531 DEBUG(DEBUG_IPC,1, (LOG_INFO, 2532 "main: ipc send quit to xfrd")); 2533 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 2534 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 2535 strerror(errno)); 2536 } 2537 fsync(nsd->xfrd_listener->fd); 2538 close(nsd->xfrd_listener->fd); 2539 (void)kill(nsd->pid, SIGTERM); 2540 } 2541 2542 #ifdef MEMCLEAN /* OS collects memory pages */ 2543 region_destroy(server_region); 2544 #endif 2545 /* write the nsd.db to disk, wait for it to complete */ 2546 udb_base_sync(nsd->db->udb, 1); 2547 udb_base_close(nsd->db->udb); 2548 server_shutdown(nsd); 2549 } 2550 2551 static query_state_type 2552 server_process_query(struct nsd *nsd, struct query *query) 2553 { 2554 return query_process(query, nsd); 2555 } 2556 2557 static query_state_type 2558 server_process_query_udp(struct nsd *nsd, struct query *query) 2559 { 2560 #ifdef RATELIMIT 2561 if(query_process(query, nsd) != QUERY_DISCARDED) { 2562 if(rrl_process_query(query)) 2563 return rrl_slip(query); 2564 else return QUERY_PROCESSED; 2565 } 2566 return QUERY_DISCARDED; 2567 #else 2568 return query_process(query, nsd); 2569 #endif 2570 } 2571 2572 struct event_base* 2573 nsd_child_event_base(void) 2574 { 2575 struct event_base* base; 2576 #ifdef USE_MINI_EVENT 2577 static time_t secs; 2578 static struct timeval now; 2579 base = event_init(&secs, &now); 2580 #else 2581 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 2582 /* libev */ 2583 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 2584 # else 2585 /* libevent */ 2586 # ifdef HAVE_EVENT_BASE_NEW 2587 base = event_base_new(); 2588 # else 2589 base = event_init(); 2590 # endif 2591 # endif 2592 #endif 2593 return base; 2594 } 2595 2596 static void 2597 add_udp_handler( 2598 struct nsd *nsd, 2599 struct nsd_socket *sock, 2600 struct udp_handler_data *data) 2601 { 2602 struct event *handler = &data->event; 2603 2604 data->nsd = nsd; 2605 data->socket = sock; 2606 2607 memset(handler, 0, sizeof(*handler)); 2608 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data); 2609 if(event_base_set(nsd->event_base, handler) != 0) 2610 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 2611 if(event_add(handler, NULL) != 0) 2612 log_msg(LOG_ERR, "nsd udp: event_add failed"); 2613 } 2614 2615 void 2616 add_tcp_handler( 2617 struct nsd *nsd, 2618 struct nsd_socket *sock, 2619 struct tcp_accept_handler_data *data) 2620 { 2621 struct event *handler = &data->event; 2622 2623 data->nsd = nsd; 2624 data->socket = sock; 2625 2626 #ifdef HAVE_SSL 2627 if (nsd->tls_ctx && 2628 nsd->options->tls_port && 2629 using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port)) 2630 { 2631 data->tls_accept = 1; 2632 if(verbosity >= 2) { 2633 char buf[48]; 2634 addrport2str((struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf)); 2635 VERBOSITY(2, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf)); 2636 } 2637 } else { 2638 data->tls_accept = 0; 2639 } 2640 #endif 2641 2642 memset(handler, 0, sizeof(*handler)); 2643 event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data); 2644 if(event_base_set(nsd->event_base, handler) != 0) 2645 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 2646 if(event_add(handler, NULL) != 0) 2647 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 2648 data->event_added = 1; 2649 } 2650 2651 /* 2652 * Serve DNS requests. 2653 */ 2654 void 2655 server_child(struct nsd *nsd) 2656 { 2657 size_t i, from, numifs; 2658 region_type *server_region = region_create(xalloc, free); 2659 struct event_base* event_base = nsd_child_event_base(); 2660 sig_atomic_t mode; 2661 2662 if(!event_base) { 2663 log_msg(LOG_ERR, "nsd server could not create event base"); 2664 exit(1); 2665 } 2666 nsd->event_base = event_base; 2667 nsd->server_region = server_region; 2668 2669 #ifdef RATELIMIT 2670 rrl_init(nsd->this_child->child_num); 2671 #endif 2672 2673 assert(nsd->server_kind != NSD_SERVER_MAIN); 2674 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 2675 2676 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 2677 server_close_all_sockets(nsd->tcp, nsd->ifs); 2678 } 2679 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 2680 server_close_all_sockets(nsd->udp, nsd->ifs); 2681 } 2682 2683 if (nsd->this_child->parent_fd != -1) { 2684 struct event *handler; 2685 struct ipc_handler_conn_data* user_data = 2686 (struct ipc_handler_conn_data*)region_alloc( 2687 server_region, sizeof(struct ipc_handler_conn_data)); 2688 user_data->nsd = nsd; 2689 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 2690 2691 handler = (struct event*) region_alloc( 2692 server_region, sizeof(*handler)); 2693 memset(handler, 0, sizeof(*handler)); 2694 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 2695 EV_READ, child_handle_parent_command, user_data); 2696 if(event_base_set(event_base, handler) != 0) 2697 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 2698 if(event_add(handler, NULL) != 0) 2699 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 2700 } 2701 2702 if(nsd->reuseport) { 2703 numifs = nsd->ifs / nsd->reuseport; 2704 from = numifs * nsd->this_child->child_num; 2705 if(from+numifs > nsd->ifs) { /* should not happen */ 2706 from = 0; 2707 numifs = nsd->ifs; 2708 } 2709 } else { 2710 from = 0; 2711 numifs = nsd->ifs; 2712 } 2713 2714 if (nsd->server_kind & NSD_SERVER_UDP) { 2715 memset(msgs, 0, sizeof(msgs)); 2716 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 2717 queries[i] = query_create(server_region, 2718 compressed_dname_offsets, 2719 compression_table_size, compressed_dnames); 2720 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2721 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 2722 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; 2723 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2724 msgs[i].msg_hdr.msg_iovlen = 1; 2725 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 2726 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2727 } 2728 2729 for (i = from; i < from+numifs; ++i) { 2730 struct udp_handler_data *data = region_alloc_zero( 2731 nsd->server_region, sizeof(*data)); 2732 add_udp_handler(nsd, &nsd->udp[i], data); 2733 } 2734 } 2735 2736 /* 2737 * Keep track of all the TCP accept handlers so we can enable 2738 * and disable them based on the current number of active TCP 2739 * connections. 2740 */ 2741 if (nsd->server_kind & NSD_SERVER_TCP) { 2742 tcp_accept_handler_count = numifs; 2743 tcp_accept_handlers = region_alloc_array(server_region, 2744 numifs, sizeof(*tcp_accept_handlers)); 2745 2746 for (i = from; i < numifs; i++) { 2747 struct tcp_accept_handler_data *data = 2748 &tcp_accept_handlers[i-from]; 2749 memset(data, 0, sizeof(*data)); 2750 add_tcp_handler(nsd, &nsd->tcp[i], data); 2751 } 2752 } else { 2753 tcp_accept_handler_count = 0; 2754 } 2755 2756 /* The main loop... */ 2757 while ((mode = nsd->mode) != NSD_QUIT) { 2758 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 2759 2760 /* Do we need to do the statistics... */ 2761 if (mode == NSD_STATS) { 2762 #ifdef BIND8_STATS 2763 int p = nsd->st.period; 2764 nsd->st.period = 1; /* force stats printout */ 2765 /* Dump the statistics */ 2766 bind8_stats(nsd); 2767 nsd->st.period = p; 2768 #else /* !BIND8_STATS */ 2769 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 2770 #endif /* BIND8_STATS */ 2771 2772 nsd->mode = NSD_RUN; 2773 } 2774 else if (mode == NSD_REAP_CHILDREN) { 2775 /* got signal, notify parent. parent reaps terminated children. */ 2776 if (nsd->this_child->parent_fd != -1) { 2777 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 2778 if (write(nsd->this_child->parent_fd, 2779 &parent_notify, 2780 sizeof(parent_notify)) == -1) 2781 { 2782 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 2783 (int) nsd->this_child->pid, strerror(errno)); 2784 } 2785 } else /* no parent, so reap 'em */ 2786 while (waitpid(-1, NULL, WNOHANG) > 0) ; 2787 nsd->mode = NSD_RUN; 2788 } 2789 else if(mode == NSD_RUN) { 2790 /* Wait for a query... */ 2791 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2792 if (errno != EINTR) { 2793 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2794 break; 2795 } 2796 } 2797 } else if(mode == NSD_QUIT) { 2798 /* ignore here, quit */ 2799 } else { 2800 log_msg(LOG_ERR, "mode bad value %d, back to service.", 2801 (int)mode); 2802 nsd->mode = NSD_RUN; 2803 } 2804 } 2805 2806 service_remaining_tcp(nsd); 2807 #ifdef BIND8_STATS 2808 bind8_stats(nsd); 2809 #endif /* BIND8_STATS */ 2810 2811 #ifdef MEMCLEAN /* OS collects memory pages */ 2812 #ifdef RATELIMIT 2813 rrl_deinit(nsd->this_child->child_num); 2814 #endif 2815 event_base_free(event_base); 2816 region_destroy(server_region); 2817 #endif 2818 server_shutdown(nsd); 2819 } 2820 2821 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg) 2822 { 2823 int* timed_out = (int*)arg; 2824 assert(event & EV_TIMEOUT); 2825 /* wake up the service tcp thread, note event is no longer 2826 * registered */ 2827 *timed_out = 1; 2828 } 2829 2830 void 2831 service_remaining_tcp(struct nsd* nsd) 2832 { 2833 struct tcp_handler_data* p; 2834 struct event_base* event_base; 2835 /* check if it is needed */ 2836 if(nsd->current_tcp_count == 0 || tcp_active_list == NULL) 2837 return; 2838 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections")); 2839 2840 /* setup event base */ 2841 event_base = nsd_child_event_base(); 2842 if(!event_base) { 2843 log_msg(LOG_ERR, "nsd remain tcp could not create event base"); 2844 return; 2845 } 2846 /* register tcp connections */ 2847 for(p = tcp_active_list; p != NULL; p = p->next) { 2848 struct timeval timeout; 2849 int fd = p->event.ev_fd; 2850 #ifdef USE_MINI_EVENT 2851 short event = p->event.ev_flags & (EV_READ|EV_WRITE); 2852 #else 2853 short event = p->event.ev_events & (EV_READ|EV_WRITE); 2854 #endif 2855 void (*fn)(int, short, void*); 2856 #ifdef HAVE_SSL 2857 if(p->tls) { 2858 if((event&EV_READ)) 2859 fn = handle_tls_reading; 2860 else fn = handle_tls_writing; 2861 } else { 2862 #endif 2863 if((event&EV_READ)) 2864 fn = handle_tcp_reading; 2865 else fn = handle_tcp_writing; 2866 #ifdef HAVE_SSL 2867 } 2868 #endif 2869 2870 /* set timeout to 1/10 second */ 2871 if(p->tcp_timeout > 100) 2872 p->tcp_timeout = 100; 2873 timeout.tv_sec = p->tcp_timeout / 1000; 2874 timeout.tv_usec = (p->tcp_timeout % 1000)*1000; 2875 event_del(&p->event); 2876 memset(&p->event, 0, sizeof(p->event)); 2877 event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT, 2878 fn, p); 2879 if(event_base_set(event_base, &p->event) != 0) 2880 log_msg(LOG_ERR, "event base set failed"); 2881 if(event_add(&p->event, &timeout) != 0) 2882 log_msg(LOG_ERR, "event add failed"); 2883 } 2884 2885 /* handle it */ 2886 while(nsd->current_tcp_count > 0) { 2887 mode_t m = server_signal_mode(nsd); 2888 struct event timeout; 2889 struct timeval tv; 2890 int timed_out = 0; 2891 if(m == NSD_QUIT || m == NSD_SHUTDOWN || 2892 m == NSD_REAP_CHILDREN) { 2893 /* quit */ 2894 break; 2895 } 2896 /* timer */ 2897 /* have to do something every second */ 2898 tv.tv_sec = 1; 2899 tv.tv_usec = 0; 2900 memset(&timeout, 0, sizeof(timeout)); 2901 event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout, 2902 &timed_out); 2903 if(event_base_set(event_base, &timeout) != 0) 2904 log_msg(LOG_ERR, "remaintcp timer: event_base_set failed"); 2905 if(event_add(&timeout, &tv) != 0) 2906 log_msg(LOG_ERR, "remaintcp timer: event_add failed"); 2907 2908 /* service loop */ 2909 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 2910 if (errno != EINTR) { 2911 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2912 break; 2913 } 2914 } 2915 if(!timed_out) { 2916 event_del(&timeout); 2917 } else { 2918 /* timed out, quit */ 2919 VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit")); 2920 break; 2921 } 2922 } 2923 #ifdef MEMCLEAN 2924 event_base_free(event_base); 2925 #endif 2926 /* continue to quit after return */ 2927 } 2928 2929 /* Implement recvmmsg and sendmmsg if the platform does not. These functions 2930 * are always used, even if nonblocking operations are broken, in which case 2931 * NUM_RECV_PER_SELECT is defined to 1 (one). 2932 */ 2933 #if defined(HAVE_RECVMMSG) 2934 #define nsd_recvmmsg recvmmsg 2935 #else /* !HAVE_RECVMMSG */ 2936 2937 static int 2938 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, 2939 int flags, struct timespec *timeout) 2940 { 2941 int orig_errno; 2942 unsigned int vpos = 0; 2943 ssize_t rcvd; 2944 2945 /* timeout is ignored, ensure caller does not expect it to work */ 2946 assert(timeout == NULL); 2947 2948 orig_errno = errno; 2949 errno = 0; 2950 while(vpos < vlen) { 2951 rcvd = recvfrom(sockfd, 2952 msgvec[vpos].msg_hdr.msg_iov->iov_base, 2953 msgvec[vpos].msg_hdr.msg_iov->iov_len, 2954 flags, 2955 msgvec[vpos].msg_hdr.msg_name, 2956 &msgvec[vpos].msg_hdr.msg_namelen); 2957 if(rcvd < 0) { 2958 break; 2959 } else { 2960 assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX); 2961 msgvec[vpos].msg_len = (unsigned int)rcvd; 2962 vpos++; 2963 } 2964 } 2965 2966 if(vpos) { 2967 /* error will be picked up next time */ 2968 return (int)vpos; 2969 } else if(errno == 0) { 2970 errno = orig_errno; 2971 return 0; 2972 } else if(errno == EAGAIN) { 2973 return 0; 2974 } 2975 2976 return -1; 2977 } 2978 #endif /* HAVE_RECVMMSG */ 2979 2980 #ifdef HAVE_SENDMMSG 2981 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__) 2982 #else /* !HAVE_SENDMMSG */ 2983 2984 static int 2985 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags) 2986 { 2987 int orig_errno; 2988 unsigned int vpos = 0; 2989 ssize_t snd; 2990 2991 orig_errno = errno; 2992 errno = 0; 2993 while(vpos < vlen) { 2994 assert(msgvec[vpos].msg_hdr.msg_iovlen == 1); 2995 snd = sendto(sockfd, 2996 msgvec[vpos].msg_hdr.msg_iov->iov_base, 2997 msgvec[vpos].msg_hdr.msg_iov->iov_len, 2998 flags, 2999 msgvec[vpos].msg_hdr.msg_name, 3000 msgvec[vpos].msg_hdr.msg_namelen); 3001 if(snd < 0) { 3002 break; 3003 } else { 3004 msgvec[vpos].msg_len = (unsigned int)snd; 3005 vpos++; 3006 } 3007 } 3008 3009 if(vpos) { 3010 return (int)vpos; 3011 } else if(errno == 0) { 3012 errno = orig_errno; 3013 return 0; 3014 } 3015 3016 return -1; 3017 } 3018 #endif /* HAVE_SENDMMSG */ 3019 3020 static void 3021 handle_udp(int fd, short event, void* arg) 3022 { 3023 struct udp_handler_data *data = (struct udp_handler_data *) arg; 3024 int received, sent, recvcount, i; 3025 struct query *q; 3026 3027 if (!(event & EV_READ)) { 3028 return; 3029 } 3030 recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 3031 /* this printf strangely gave a performance increase on Linux */ 3032 /* printf("recvcount %d \n", recvcount); */ 3033 if (recvcount == -1) { 3034 if (errno != EAGAIN && errno != EINTR) { 3035 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 3036 STATUP(data->nsd, rxerr); 3037 /* No zone statup */ 3038 } 3039 /* Simply no data available */ 3040 return; 3041 } 3042 for (i = 0; i < recvcount; i++) { 3043 loopstart: 3044 received = msgs[i].msg_len; 3045 queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen; 3046 q = queries[i]; 3047 if (received == -1) { 3048 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 3049 #if defined(HAVE_RECVMMSG) 3050 msgs[i].msg_hdr.msg_flags 3051 #else 3052 errno 3053 #endif 3054 )); 3055 STATUP(data->nsd, rxerr); 3056 /* No zone statup */ 3057 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3058 iovecs[i].iov_len = buffer_remaining(q->packet); 3059 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3060 goto swap_drop; 3061 } 3062 3063 /* Account... */ 3064 #ifdef BIND8_STATS 3065 if (data->socket->addr.ai_family == AF_INET) { 3066 STATUP(data->nsd, qudp); 3067 } else if (data->socket->addr.ai_family == AF_INET6) { 3068 STATUP(data->nsd, qudp6); 3069 } 3070 #endif 3071 3072 buffer_skip(q->packet, received); 3073 buffer_flip(q->packet); 3074 #ifdef USE_DNSTAP 3075 dt_collector_submit_auth_query(data->nsd, &q->addr, q->addrlen, 3076 q->tcp, q->packet); 3077 #endif /* USE_DNSTAP */ 3078 3079 /* Process and answer the query... */ 3080 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 3081 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 3082 STATUP(data->nsd, nona); 3083 ZTATUP(data->nsd, q->zone, nona); 3084 } 3085 3086 #ifdef USE_ZONE_STATS 3087 if (data->socket->addr.ai_family == AF_INET) { 3088 ZTATUP(data->nsd, q->zone, qudp); 3089 } else if (data->socket->addr.ai_family == AF_INET6) { 3090 ZTATUP(data->nsd, q->zone, qudp6); 3091 } 3092 #endif 3093 3094 /* Add EDNS0 and TSIG info if necessary. */ 3095 query_add_optional(q, data->nsd); 3096 3097 buffer_flip(q->packet); 3098 iovecs[i].iov_len = buffer_remaining(q->packet); 3099 #ifdef BIND8_STATS 3100 /* Account the rcode & TC... */ 3101 STATUP2(data->nsd, rcode, RCODE(q->packet)); 3102 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 3103 if (TC(q->packet)) { 3104 STATUP(data->nsd, truncated); 3105 ZTATUP(data->nsd, q->zone, truncated); 3106 } 3107 #endif /* BIND8_STATS */ 3108 #ifdef USE_DNSTAP 3109 dt_collector_submit_auth_response(data->nsd, 3110 &q->addr, q->addrlen, q->tcp, q->packet, 3111 q->zone); 3112 #endif /* USE_DNSTAP */ 3113 } else { 3114 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3115 iovecs[i].iov_len = buffer_remaining(q->packet); 3116 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3117 swap_drop: 3118 STATUP(data->nsd, dropped); 3119 ZTATUP(data->nsd, q->zone, dropped); 3120 if(i != recvcount-1) { 3121 /* swap with last and decrease recvcount */ 3122 struct mmsghdr mtmp = msgs[i]; 3123 struct iovec iotmp = iovecs[i]; 3124 recvcount--; 3125 msgs[i] = msgs[recvcount]; 3126 iovecs[i] = iovecs[recvcount]; 3127 queries[i] = queries[recvcount]; 3128 msgs[recvcount] = mtmp; 3129 iovecs[recvcount] = iotmp; 3130 queries[recvcount] = q; 3131 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 3132 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 3133 goto loopstart; 3134 } else { recvcount --; } 3135 } 3136 } 3137 3138 /* send until all are sent */ 3139 i = 0; 3140 while(i<recvcount) { 3141 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0); 3142 if(sent == -1) { 3143 /* don't log transient network full errors, unless 3144 * on higher verbosity */ 3145 if(!(errno == ENOBUFS && verbosity < 1) && 3146 #ifdef EWOULDBLOCK 3147 !(errno == EWOULDBLOCK && verbosity < 1) && 3148 #endif 3149 !(errno == EAGAIN && verbosity < 1)) { 3150 const char* es = strerror(errno); 3151 char a[48]; 3152 addr2str(&queries[i]->addr, a, sizeof(a)); 3153 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 3154 } 3155 #ifdef BIND8_STATS 3156 data->nsd->st.txerr += recvcount-i; 3157 #endif /* BIND8_STATS */ 3158 break; 3159 } 3160 i += sent; 3161 } 3162 for(i=0; i<recvcount; i++) { 3163 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 3164 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 3165 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 3166 } 3167 } 3168 3169 #ifdef HAVE_SSL 3170 /* 3171 * Setup an event for the tcp handler. 3172 */ 3173 static void 3174 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *), 3175 int fd, short event) 3176 { 3177 struct timeval timeout; 3178 struct event_base* ev_base; 3179 3180 timeout.tv_sec = data->nsd->tcp_timeout; 3181 timeout.tv_usec = 0L; 3182 3183 ev_base = data->event.ev_base; 3184 event_del(&data->event); 3185 memset(&data->event, 0, sizeof(data->event)); 3186 event_set(&data->event, fd, event, fn, data); 3187 if(event_base_set(ev_base, &data->event) != 0) 3188 log_msg(LOG_ERR, "event base set failed"); 3189 if(event_add(&data->event, &timeout) != 0) 3190 log_msg(LOG_ERR, "event add failed"); 3191 } 3192 #endif /* HAVE_SSL */ 3193 3194 static void 3195 cleanup_tcp_handler(struct tcp_handler_data* data) 3196 { 3197 event_del(&data->event); 3198 #ifdef HAVE_SSL 3199 if(data->tls) { 3200 SSL_shutdown(data->tls); 3201 SSL_free(data->tls); 3202 data->tls = NULL; 3203 } 3204 #endif 3205 close(data->event.ev_fd); 3206 if(data->prev) 3207 data->prev->next = data->next; 3208 else tcp_active_list = data->next; 3209 if(data->next) 3210 data->next->prev = data->prev; 3211 3212 /* 3213 * Enable the TCP accept handlers when the current number of 3214 * TCP connections is about to drop below the maximum number 3215 * of TCP connections. 3216 */ 3217 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 3218 configure_handler_event_types(EV_READ|EV_PERSIST); 3219 if(slowaccept) { 3220 event_del(&slowaccept_event); 3221 slowaccept = 0; 3222 } 3223 } 3224 --data->nsd->current_tcp_count; 3225 assert(data->nsd->current_tcp_count >= 0); 3226 3227 region_destroy(data->region); 3228 } 3229 3230 static void 3231 handle_tcp_reading(int fd, short event, void* arg) 3232 { 3233 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3234 ssize_t received; 3235 struct event_base* ev_base; 3236 struct timeval timeout; 3237 3238 if ((event & EV_TIMEOUT)) { 3239 /* Connection timed out. */ 3240 cleanup_tcp_handler(data); 3241 return; 3242 } 3243 3244 if (data->nsd->tcp_query_count > 0 && 3245 data->query_count >= data->nsd->tcp_query_count) { 3246 /* No more queries allowed on this tcp connection. */ 3247 cleanup_tcp_handler(data); 3248 return; 3249 } 3250 3251 assert((event & EV_READ)); 3252 3253 if (data->bytes_transmitted == 0) { 3254 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3255 } 3256 3257 /* 3258 * Check if we received the leading packet length bytes yet. 3259 */ 3260 if (data->bytes_transmitted < sizeof(uint16_t)) { 3261 received = read(fd, 3262 (char *) &data->query->tcplen 3263 + data->bytes_transmitted, 3264 sizeof(uint16_t) - data->bytes_transmitted); 3265 if (received == -1) { 3266 if (errno == EAGAIN || errno == EINTR) { 3267 /* 3268 * Read would block, wait until more 3269 * data is available. 3270 */ 3271 return; 3272 } else { 3273 char buf[48]; 3274 addr2str(&data->query->addr, buf, sizeof(buf)); 3275 #ifdef ECONNRESET 3276 if (verbosity >= 2 || errno != ECONNRESET) 3277 #endif /* ECONNRESET */ 3278 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3279 cleanup_tcp_handler(data); 3280 return; 3281 } 3282 } else if (received == 0) { 3283 /* EOF */ 3284 cleanup_tcp_handler(data); 3285 return; 3286 } 3287 3288 data->bytes_transmitted += received; 3289 if (data->bytes_transmitted < sizeof(uint16_t)) { 3290 /* 3291 * Not done with the tcplen yet, wait for more 3292 * data to become available. 3293 */ 3294 return; 3295 } 3296 3297 assert(data->bytes_transmitted == sizeof(uint16_t)); 3298 3299 data->query->tcplen = ntohs(data->query->tcplen); 3300 3301 /* 3302 * Minimum query size is: 3303 * 3304 * Size of the header (12) 3305 * + Root domain name (1) 3306 * + Query class (2) 3307 * + Query type (2) 3308 */ 3309 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3310 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3311 cleanup_tcp_handler(data); 3312 return; 3313 } 3314 3315 if (data->query->tcplen > data->query->maxlen) { 3316 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3317 cleanup_tcp_handler(data); 3318 return; 3319 } 3320 3321 buffer_set_limit(data->query->packet, data->query->tcplen); 3322 } 3323 3324 assert(buffer_remaining(data->query->packet) > 0); 3325 3326 /* Read the (remaining) query data. */ 3327 received = read(fd, 3328 buffer_current(data->query->packet), 3329 buffer_remaining(data->query->packet)); 3330 if (received == -1) { 3331 if (errno == EAGAIN || errno == EINTR) { 3332 /* 3333 * Read would block, wait until more data is 3334 * available. 3335 */ 3336 return; 3337 } else { 3338 char buf[48]; 3339 addr2str(&data->query->addr, buf, sizeof(buf)); 3340 #ifdef ECONNRESET 3341 if (verbosity >= 2 || errno != ECONNRESET) 3342 #endif /* ECONNRESET */ 3343 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 3344 cleanup_tcp_handler(data); 3345 return; 3346 } 3347 } else if (received == 0) { 3348 /* EOF */ 3349 cleanup_tcp_handler(data); 3350 return; 3351 } 3352 3353 data->bytes_transmitted += received; 3354 buffer_skip(data->query->packet, received); 3355 if (buffer_remaining(data->query->packet) > 0) { 3356 /* 3357 * Message not yet complete, wait for more data to 3358 * become available. 3359 */ 3360 return; 3361 } 3362 3363 assert(buffer_position(data->query->packet) == data->query->tcplen); 3364 3365 /* Account... */ 3366 #ifdef BIND8_STATS 3367 #ifndef INET6 3368 STATUP(data->nsd, ctcp); 3369 #else 3370 if (data->query->addr.ss_family == AF_INET) { 3371 STATUP(data->nsd, ctcp); 3372 } else if (data->query->addr.ss_family == AF_INET6) { 3373 STATUP(data->nsd, ctcp6); 3374 } 3375 #endif 3376 #endif /* BIND8_STATS */ 3377 3378 /* We have a complete query, process it. */ 3379 3380 /* tcp-query-count: handle query counter ++ */ 3381 data->query_count++; 3382 3383 buffer_flip(data->query->packet); 3384 #ifdef USE_DNSTAP 3385 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 3386 data->query->addrlen, data->query->tcp, data->query->packet); 3387 #endif /* USE_DNSTAP */ 3388 data->query_state = server_process_query(data->nsd, data->query); 3389 if (data->query_state == QUERY_DISCARDED) { 3390 /* Drop the packet and the entire connection... */ 3391 STATUP(data->nsd, dropped); 3392 ZTATUP(data->nsd, data->query->zone, dropped); 3393 cleanup_tcp_handler(data); 3394 return; 3395 } 3396 3397 #ifdef BIND8_STATS 3398 if (RCODE(data->query->packet) == RCODE_OK 3399 && !AA(data->query->packet)) 3400 { 3401 STATUP(data->nsd, nona); 3402 ZTATUP(data->nsd, data->query->zone, nona); 3403 } 3404 #endif /* BIND8_STATS */ 3405 3406 #ifdef USE_ZONE_STATS 3407 #ifndef INET6 3408 ZTATUP(data->nsd, data->query->zone, ctcp); 3409 #else 3410 if (data->query->addr.ss_family == AF_INET) { 3411 ZTATUP(data->nsd, data->query->zone, ctcp); 3412 } else if (data->query->addr.ss_family == AF_INET6) { 3413 ZTATUP(data->nsd, data->query->zone, ctcp6); 3414 } 3415 #endif 3416 #endif /* USE_ZONE_STATS */ 3417 3418 query_add_optional(data->query, data->nsd); 3419 3420 /* Switch to the tcp write handler. */ 3421 buffer_flip(data->query->packet); 3422 data->query->tcplen = buffer_remaining(data->query->packet); 3423 #ifdef BIND8_STATS 3424 /* Account the rcode & TC... */ 3425 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 3426 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 3427 if (TC(data->query->packet)) { 3428 STATUP(data->nsd, truncated); 3429 ZTATUP(data->nsd, data->query->zone, truncated); 3430 } 3431 #endif /* BIND8_STATS */ 3432 #ifdef USE_DNSTAP 3433 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 3434 data->query->addrlen, data->query->tcp, data->query->packet, 3435 data->query->zone); 3436 #endif /* USE_DNSTAP */ 3437 data->bytes_transmitted = 0; 3438 3439 timeout.tv_sec = data->tcp_timeout / 1000; 3440 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3441 3442 ev_base = data->event.ev_base; 3443 event_del(&data->event); 3444 memset(&data->event, 0, sizeof(data->event)); 3445 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 3446 handle_tcp_reading, data); 3447 if(event_base_set(ev_base, &data->event) != 0) 3448 log_msg(LOG_ERR, "event base set tcpr failed"); 3449 if(event_add(&data->event, &timeout) != 0) 3450 log_msg(LOG_ERR, "event add tcpr failed"); 3451 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 3452 handle_tcp_writing(fd, EV_WRITE, data); 3453 } 3454 3455 static void 3456 handle_tcp_writing(int fd, short event, void* arg) 3457 { 3458 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3459 ssize_t sent; 3460 struct query *q = data->query; 3461 struct timeval timeout; 3462 struct event_base* ev_base; 3463 3464 if ((event & EV_TIMEOUT)) { 3465 /* Connection timed out. */ 3466 cleanup_tcp_handler(data); 3467 return; 3468 } 3469 3470 assert((event & EV_WRITE)); 3471 3472 if (data->bytes_transmitted < sizeof(q->tcplen)) { 3473 /* Writing the response packet length. */ 3474 uint16_t n_tcplen = htons(q->tcplen); 3475 #ifdef HAVE_WRITEV 3476 struct iovec iov[2]; 3477 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 3478 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 3479 iov[1].iov_base = buffer_begin(q->packet); 3480 iov[1].iov_len = buffer_limit(q->packet); 3481 sent = writev(fd, iov, 2); 3482 #else /* HAVE_WRITEV */ 3483 sent = write(fd, 3484 (const char *) &n_tcplen + data->bytes_transmitted, 3485 sizeof(n_tcplen) - data->bytes_transmitted); 3486 #endif /* HAVE_WRITEV */ 3487 if (sent == -1) { 3488 if (errno == EAGAIN || errno == EINTR) { 3489 /* 3490 * Write would block, wait until 3491 * socket becomes writable again. 3492 */ 3493 return; 3494 } else { 3495 #ifdef ECONNRESET 3496 if(verbosity >= 2 || errno != ECONNRESET) 3497 #endif /* ECONNRESET */ 3498 #ifdef EPIPE 3499 if(verbosity >= 2 || errno != EPIPE) 3500 #endif /* EPIPE 'broken pipe' */ 3501 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 3502 cleanup_tcp_handler(data); 3503 return; 3504 } 3505 } 3506 3507 data->bytes_transmitted += sent; 3508 if (data->bytes_transmitted < sizeof(q->tcplen)) { 3509 /* 3510 * Writing not complete, wait until socket 3511 * becomes writable again. 3512 */ 3513 return; 3514 } 3515 3516 #ifdef HAVE_WRITEV 3517 sent -= sizeof(n_tcplen); 3518 /* handle potential 'packet done' code */ 3519 goto packet_could_be_done; 3520 #endif 3521 } 3522 3523 sent = write(fd, 3524 buffer_current(q->packet), 3525 buffer_remaining(q->packet)); 3526 if (sent == -1) { 3527 if (errno == EAGAIN || errno == EINTR) { 3528 /* 3529 * Write would block, wait until 3530 * socket becomes writable again. 3531 */ 3532 return; 3533 } else { 3534 #ifdef ECONNRESET 3535 if(verbosity >= 2 || errno != ECONNRESET) 3536 #endif /* ECONNRESET */ 3537 #ifdef EPIPE 3538 if(verbosity >= 2 || errno != EPIPE) 3539 #endif /* EPIPE 'broken pipe' */ 3540 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 3541 cleanup_tcp_handler(data); 3542 return; 3543 } 3544 } 3545 3546 data->bytes_transmitted += sent; 3547 #ifdef HAVE_WRITEV 3548 packet_could_be_done: 3549 #endif 3550 buffer_skip(q->packet, sent); 3551 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 3552 /* 3553 * Still more data to write when socket becomes 3554 * writable again. 3555 */ 3556 return; 3557 } 3558 3559 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 3560 3561 if (data->query_state == QUERY_IN_AXFR) { 3562 /* Continue processing AXFR and writing back results. */ 3563 buffer_clear(q->packet); 3564 data->query_state = query_axfr(data->nsd, q); 3565 if (data->query_state != QUERY_PROCESSED) { 3566 query_add_optional(data->query, data->nsd); 3567 3568 /* Reset data. */ 3569 buffer_flip(q->packet); 3570 q->tcplen = buffer_remaining(q->packet); 3571 data->bytes_transmitted = 0; 3572 /* Reset timeout. */ 3573 timeout.tv_sec = data->tcp_timeout / 1000; 3574 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3575 ev_base = data->event.ev_base; 3576 event_del(&data->event); 3577 memset(&data->event, 0, sizeof(data->event)); 3578 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 3579 handle_tcp_writing, data); 3580 if(event_base_set(ev_base, &data->event) != 0) 3581 log_msg(LOG_ERR, "event base set tcpw failed"); 3582 if(event_add(&data->event, &timeout) != 0) 3583 log_msg(LOG_ERR, "event add tcpw failed"); 3584 3585 /* 3586 * Write data if/when the socket is writable 3587 * again. 3588 */ 3589 return; 3590 } 3591 } 3592 3593 /* 3594 * Done sending, wait for the next request to arrive on the 3595 * TCP socket by installing the TCP read handler. 3596 */ 3597 if (data->nsd->tcp_query_count > 0 && 3598 data->query_count >= data->nsd->tcp_query_count) { 3599 3600 (void) shutdown(fd, SHUT_WR); 3601 } 3602 3603 data->bytes_transmitted = 0; 3604 3605 timeout.tv_sec = data->tcp_timeout / 1000; 3606 timeout.tv_usec = (data->tcp_timeout % 1000)*1000; 3607 ev_base = data->event.ev_base; 3608 event_del(&data->event); 3609 memset(&data->event, 0, sizeof(data->event)); 3610 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 3611 handle_tcp_reading, data); 3612 if(event_base_set(ev_base, &data->event) != 0) 3613 log_msg(LOG_ERR, "event base set tcpw failed"); 3614 if(event_add(&data->event, &timeout) != 0) 3615 log_msg(LOG_ERR, "event add tcpw failed"); 3616 } 3617 3618 #ifdef HAVE_SSL 3619 /** create SSL object and associate fd */ 3620 static SSL* 3621 incoming_ssl_fd(SSL_CTX* ctx, int fd) 3622 { 3623 SSL* ssl = SSL_new((SSL_CTX*)ctx); 3624 if(!ssl) { 3625 log_crypto_err("could not SSL_new"); 3626 return NULL; 3627 } 3628 SSL_set_accept_state(ssl); 3629 (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); 3630 if(!SSL_set_fd(ssl, fd)) { 3631 log_crypto_err("could not SSL_set_fd"); 3632 SSL_free(ssl); 3633 return NULL; 3634 } 3635 return ssl; 3636 } 3637 3638 /** TLS handshake to upgrade TCP connection */ 3639 static int 3640 tls_handshake(struct tcp_handler_data* data, int fd, int writing) 3641 { 3642 int r; 3643 if(data->shake_state == tls_hs_read_event) { 3644 /* read condition satisfied back to writing */ 3645 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3646 data->shake_state = tls_hs_none; 3647 return 1; 3648 } 3649 if(data->shake_state == tls_hs_write_event) { 3650 /* write condition satisfied back to reading */ 3651 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3652 data->shake_state = tls_hs_none; 3653 return 1; 3654 } 3655 3656 /* (continue to) setup the TLS connection */ 3657 ERR_clear_error(); 3658 r = SSL_do_handshake(data->tls); 3659 3660 if(r != 1) { 3661 int want = SSL_get_error(data->tls, r); 3662 if(want == SSL_ERROR_WANT_READ) { 3663 if(data->shake_state == tls_hs_read) { 3664 /* try again later */ 3665 return 1; 3666 } 3667 data->shake_state = tls_hs_read; 3668 /* switch back to reading mode */ 3669 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3670 return 1; 3671 } else if(want == SSL_ERROR_WANT_WRITE) { 3672 if(data->shake_state == tls_hs_write) { 3673 /* try again later */ 3674 return 1; 3675 } 3676 data->shake_state = tls_hs_write; 3677 /* switch back to writing mode */ 3678 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3679 return 1; 3680 } else { 3681 if(r == 0) 3682 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely")); 3683 else { 3684 unsigned long err = ERR_get_error(); 3685 if(!squelch_err_ssl_handshake(err)) { 3686 char a[64], s[256]; 3687 addr2str(&data->query->addr, a, sizeof(a)); 3688 snprintf(s, sizeof(s), "TLS handshake failed from %s", a); 3689 log_crypto_from_err(s, err); 3690 } 3691 } 3692 cleanup_tcp_handler(data); 3693 return 0; 3694 } 3695 } 3696 3697 /* Use to log successful upgrade for testing - could be removed*/ 3698 VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded.")); 3699 /* set back to the event we need to have when reading (or writing) */ 3700 if(data->shake_state == tls_hs_read && writing) { 3701 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE); 3702 } else if(data->shake_state == tls_hs_write && !writing) { 3703 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ); 3704 } 3705 data->shake_state = tls_hs_none; 3706 return 1; 3707 } 3708 3709 /** handle TLS reading of incoming query */ 3710 static void 3711 handle_tls_reading(int fd, short event, void* arg) 3712 { 3713 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3714 ssize_t received; 3715 3716 if ((event & EV_TIMEOUT)) { 3717 /* Connection timed out. */ 3718 cleanup_tcp_handler(data); 3719 return; 3720 } 3721 3722 if (data->nsd->tcp_query_count > 0 && 3723 data->query_count >= data->nsd->tcp_query_count) { 3724 /* No more queries allowed on this tcp connection. */ 3725 cleanup_tcp_handler(data); 3726 return; 3727 } 3728 3729 assert((event & EV_READ)); 3730 3731 if (data->bytes_transmitted == 0) { 3732 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 3733 } 3734 3735 if(data->shake_state != tls_hs_none) { 3736 if(!tls_handshake(data, fd, 0)) 3737 return; 3738 if(data->shake_state != tls_hs_none) 3739 return; 3740 } 3741 3742 /* 3743 * Check if we received the leading packet length bytes yet. 3744 */ 3745 if(data->bytes_transmitted < sizeof(uint16_t)) { 3746 ERR_clear_error(); 3747 if((received=SSL_read(data->tls, (char *) &data->query->tcplen 3748 + data->bytes_transmitted, 3749 sizeof(uint16_t) - data->bytes_transmitted)) <= 0) { 3750 int want = SSL_get_error(data->tls, received); 3751 if(want == SSL_ERROR_ZERO_RETURN) { 3752 cleanup_tcp_handler(data); 3753 return; /* shutdown, closed */ 3754 } else if(want == SSL_ERROR_WANT_READ) { 3755 /* wants to be called again */ 3756 return; 3757 } 3758 else if(want == SSL_ERROR_WANT_WRITE) { 3759 /* switch to writing */ 3760 data->shake_state = tls_hs_write_event; 3761 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3762 return; 3763 } 3764 cleanup_tcp_handler(data); 3765 log_crypto_err("could not SSL_read"); 3766 return; 3767 } 3768 3769 data->bytes_transmitted += received; 3770 if (data->bytes_transmitted < sizeof(uint16_t)) { 3771 /* 3772 * Not done with the tcplen yet, wait for more 3773 * data to become available. 3774 */ 3775 return; 3776 } 3777 3778 assert(data->bytes_transmitted == sizeof(uint16_t)); 3779 3780 data->query->tcplen = ntohs(data->query->tcplen); 3781 3782 /* 3783 * Minimum query size is: 3784 * 3785 * Size of the header (12) 3786 * + Root domain name (1) 3787 * + Query class (2) 3788 * + Query type (2) 3789 */ 3790 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 3791 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 3792 cleanup_tcp_handler(data); 3793 return; 3794 } 3795 3796 if (data->query->tcplen > data->query->maxlen) { 3797 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 3798 cleanup_tcp_handler(data); 3799 return; 3800 } 3801 3802 buffer_set_limit(data->query->packet, data->query->tcplen); 3803 } 3804 3805 assert(buffer_remaining(data->query->packet) > 0); 3806 3807 /* Read the (remaining) query data. */ 3808 ERR_clear_error(); 3809 received = SSL_read(data->tls, (void*)buffer_current(data->query->packet), 3810 (int)buffer_remaining(data->query->packet)); 3811 if(received <= 0) { 3812 int want = SSL_get_error(data->tls, received); 3813 if(want == SSL_ERROR_ZERO_RETURN) { 3814 cleanup_tcp_handler(data); 3815 return; /* shutdown, closed */ 3816 } else if(want == SSL_ERROR_WANT_READ) { 3817 /* wants to be called again */ 3818 return; 3819 } 3820 else if(want == SSL_ERROR_WANT_WRITE) { 3821 /* switch back writing */ 3822 data->shake_state = tls_hs_write_event; 3823 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3824 return; 3825 } 3826 cleanup_tcp_handler(data); 3827 log_crypto_err("could not SSL_read"); 3828 return; 3829 } 3830 3831 data->bytes_transmitted += received; 3832 buffer_skip(data->query->packet, received); 3833 if (buffer_remaining(data->query->packet) > 0) { 3834 /* 3835 * Message not yet complete, wait for more data to 3836 * become available. 3837 */ 3838 return; 3839 } 3840 3841 assert(buffer_position(data->query->packet) == data->query->tcplen); 3842 3843 /* Account... */ 3844 #ifndef INET6 3845 STATUP(data->nsd, ctls); 3846 #else 3847 if (data->query->addr.ss_family == AF_INET) { 3848 STATUP(data->nsd, ctls); 3849 } else if (data->query->addr.ss_family == AF_INET6) { 3850 STATUP(data->nsd, ctls6); 3851 } 3852 #endif 3853 3854 /* We have a complete query, process it. */ 3855 3856 /* tcp-query-count: handle query counter ++ */ 3857 data->query_count++; 3858 3859 buffer_flip(data->query->packet); 3860 #ifdef USE_DNSTAP 3861 dt_collector_submit_auth_query(data->nsd, &data->query->addr, 3862 data->query->addrlen, data->query->tcp, data->query->packet); 3863 #endif /* USE_DNSTAP */ 3864 data->query_state = server_process_query(data->nsd, data->query); 3865 if (data->query_state == QUERY_DISCARDED) { 3866 /* Drop the packet and the entire connection... */ 3867 STATUP(data->nsd, dropped); 3868 ZTATUP(data->nsd, data->query->zone, dropped); 3869 cleanup_tcp_handler(data); 3870 return; 3871 } 3872 3873 #ifdef BIND8_STATS 3874 if (RCODE(data->query->packet) == RCODE_OK 3875 && !AA(data->query->packet)) 3876 { 3877 STATUP(data->nsd, nona); 3878 ZTATUP(data->nsd, data->query->zone, nona); 3879 } 3880 #endif /* BIND8_STATS */ 3881 3882 #ifdef USE_ZONE_STATS 3883 #ifndef INET6 3884 ZTATUP(data->nsd, data->query->zone, ctls); 3885 #else 3886 if (data->query->addr.ss_family == AF_INET) { 3887 ZTATUP(data->nsd, data->query->zone, ctls); 3888 } else if (data->query->addr.ss_family == AF_INET6) { 3889 ZTATUP(data->nsd, data->query->zone, ctls6); 3890 } 3891 #endif 3892 #endif /* USE_ZONE_STATS */ 3893 3894 query_add_optional(data->query, data->nsd); 3895 3896 /* Switch to the tcp write handler. */ 3897 buffer_flip(data->query->packet); 3898 data->query->tcplen = buffer_remaining(data->query->packet); 3899 #ifdef BIND8_STATS 3900 /* Account the rcode & TC... */ 3901 STATUP2(data->nsd, rcode, RCODE(data->query->packet)); 3902 ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet)); 3903 if (TC(data->query->packet)) { 3904 STATUP(data->nsd, truncated); 3905 ZTATUP(data->nsd, data->query->zone, truncated); 3906 } 3907 #endif /* BIND8_STATS */ 3908 #ifdef USE_DNSTAP 3909 dt_collector_submit_auth_response(data->nsd, &data->query->addr, 3910 data->query->addrlen, data->query->tcp, data->query->packet, 3911 data->query->zone); 3912 #endif /* USE_DNSTAP */ 3913 data->bytes_transmitted = 0; 3914 3915 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 3916 3917 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 3918 handle_tls_writing(fd, EV_WRITE, data); 3919 } 3920 3921 /** handle TLS writing of outgoing response */ 3922 static void 3923 handle_tls_writing(int fd, short event, void* arg) 3924 { 3925 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 3926 ssize_t sent; 3927 struct query *q = data->query; 3928 /* static variable that holds reassembly buffer used to put the 3929 * TCP length in front of the packet, like writev. */ 3930 static buffer_type* global_tls_temp_buffer = NULL; 3931 buffer_type* write_buffer; 3932 3933 if ((event & EV_TIMEOUT)) { 3934 /* Connection timed out. */ 3935 cleanup_tcp_handler(data); 3936 return; 3937 } 3938 3939 assert((event & EV_WRITE)); 3940 3941 if(data->shake_state != tls_hs_none) { 3942 if(!tls_handshake(data, fd, 1)) 3943 return; 3944 if(data->shake_state != tls_hs_none) 3945 return; 3946 } 3947 3948 (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE); 3949 3950 /* If we are writing the start of a message, we must include the length 3951 * this is done with a copy into write_buffer. */ 3952 write_buffer = NULL; 3953 if (data->bytes_transmitted == 0) { 3954 if(!global_tls_temp_buffer) { 3955 /* gets deallocated when nsd shuts down from 3956 * nsd.region */ 3957 global_tls_temp_buffer = buffer_create(nsd.region, 3958 QIOBUFSZ + sizeof(q->tcplen)); 3959 if (!global_tls_temp_buffer) { 3960 return; 3961 } 3962 } 3963 write_buffer = global_tls_temp_buffer; 3964 buffer_clear(write_buffer); 3965 buffer_write_u16(write_buffer, q->tcplen); 3966 buffer_write(write_buffer, buffer_current(q->packet), 3967 (int)buffer_remaining(q->packet)); 3968 buffer_flip(write_buffer); 3969 } else { 3970 write_buffer = q->packet; 3971 } 3972 3973 /* Write the response */ 3974 ERR_clear_error(); 3975 sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer)); 3976 if(sent <= 0) { 3977 int want = SSL_get_error(data->tls, sent); 3978 if(want == SSL_ERROR_ZERO_RETURN) { 3979 cleanup_tcp_handler(data); 3980 /* closed */ 3981 } else if(want == SSL_ERROR_WANT_READ) { 3982 /* switch back to reading */ 3983 data->shake_state = tls_hs_read_event; 3984 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 3985 } else if(want != SSL_ERROR_WANT_WRITE) { 3986 cleanup_tcp_handler(data); 3987 log_crypto_err("could not SSL_write"); 3988 } 3989 return; 3990 } 3991 3992 buffer_skip(write_buffer, sent); 3993 if(buffer_remaining(write_buffer) != 0) { 3994 /* If not all sent, sync up the real buffer if it wasn't used.*/ 3995 if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) { 3996 buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen)); 3997 } 3998 } 3999 4000 data->bytes_transmitted += sent; 4001 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 4002 /* 4003 * Still more data to write when socket becomes 4004 * writable again. 4005 */ 4006 return; 4007 } 4008 4009 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 4010 4011 if (data->query_state == QUERY_IN_AXFR) { 4012 /* Continue processing AXFR and writing back results. */ 4013 buffer_clear(q->packet); 4014 data->query_state = query_axfr(data->nsd, q); 4015 if (data->query_state != QUERY_PROCESSED) { 4016 query_add_optional(data->query, data->nsd); 4017 4018 /* Reset data. */ 4019 buffer_flip(q->packet); 4020 q->tcplen = buffer_remaining(q->packet); 4021 data->bytes_transmitted = 0; 4022 /* Reset to writing mode. */ 4023 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT); 4024 4025 /* 4026 * Write data if/when the socket is writable 4027 * again. 4028 */ 4029 return; 4030 } 4031 } 4032 4033 /* 4034 * Done sending, wait for the next request to arrive on the 4035 * TCP socket by installing the TCP read handler. 4036 */ 4037 if (data->nsd->tcp_query_count > 0 && 4038 data->query_count >= data->nsd->tcp_query_count) { 4039 4040 (void) shutdown(fd, SHUT_WR); 4041 } 4042 4043 data->bytes_transmitted = 0; 4044 4045 tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT); 4046 } 4047 #endif 4048 4049 static void 4050 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 4051 void* ATTR_UNUSED(arg)) 4052 { 4053 if(slowaccept) { 4054 configure_handler_event_types(EV_PERSIST | EV_READ); 4055 slowaccept = 0; 4056 } 4057 } 4058 4059 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen) 4060 { 4061 #ifndef HAVE_ACCEPT4 4062 int s = accept(fd, addr, addrlen); 4063 if (s != -1) { 4064 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 4065 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 4066 close(s); 4067 s = -1; 4068 errno=EINTR; /* stop error printout as error in accept4 4069 by setting this errno, it omits printout, in 4070 later code that calls nsd_accept4 */ 4071 } 4072 } 4073 return s; 4074 #else 4075 return accept4(fd, addr, addrlen, SOCK_NONBLOCK); 4076 #endif /* HAVE_ACCEPT4 */ 4077 } 4078 4079 /* 4080 * Handle an incoming TCP connection. The connection is accepted and 4081 * a new TCP reader event handler is added. The TCP handler 4082 * is responsible for cleanup when the connection is closed. 4083 */ 4084 static void 4085 handle_tcp_accept(int fd, short event, void* arg) 4086 { 4087 struct tcp_accept_handler_data *data 4088 = (struct tcp_accept_handler_data *) arg; 4089 int s; 4090 int reject = 0; 4091 struct tcp_handler_data *tcp_data; 4092 region_type *tcp_region; 4093 #ifdef INET6 4094 struct sockaddr_storage addr; 4095 #else 4096 struct sockaddr_in addr; 4097 #endif 4098 socklen_t addrlen; 4099 struct timeval timeout; 4100 4101 if (!(event & EV_READ)) { 4102 return; 4103 } 4104 4105 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 4106 reject = data->nsd->options->tcp_reject_overflow; 4107 if (!reject) { 4108 return; 4109 } 4110 } 4111 4112 /* Accept it... */ 4113 addrlen = sizeof(addr); 4114 s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen); 4115 if (s == -1) { 4116 /** 4117 * EMFILE and ENFILE is a signal that the limit of open 4118 * file descriptors has been reached. Pause accept(). 4119 * EINTR is a signal interrupt. The others are various OS ways 4120 * of saying that the client has closed the connection. 4121 */ 4122 if (errno == EMFILE || errno == ENFILE) { 4123 if (!slowaccept) { 4124 /* disable accept events */ 4125 struct timeval tv; 4126 configure_handler_event_types(0); 4127 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 4128 tv.tv_usec = 0L; 4129 memset(&slowaccept_event, 0, 4130 sizeof(slowaccept_event)); 4131 event_set(&slowaccept_event, -1, EV_TIMEOUT, 4132 handle_slowaccept_timeout, NULL); 4133 (void)event_base_set(data->event.ev_base, 4134 &slowaccept_event); 4135 (void)event_add(&slowaccept_event, &tv); 4136 slowaccept = 1; 4137 /* We don't want to spam the logs here */ 4138 } 4139 } else if (errno != EINTR 4140 && errno != EWOULDBLOCK 4141 #ifdef ECONNABORTED 4142 && errno != ECONNABORTED 4143 #endif /* ECONNABORTED */ 4144 #ifdef EPROTO 4145 && errno != EPROTO 4146 #endif /* EPROTO */ 4147 ) { 4148 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 4149 } 4150 return; 4151 } 4152 4153 if (reject) { 4154 shutdown(s, SHUT_RDWR); 4155 close(s); 4156 return; 4157 } 4158 4159 /* 4160 * This region is deallocated when the TCP connection is 4161 * closed by the TCP handler. 4162 */ 4163 tcp_region = region_create(xalloc, free); 4164 tcp_data = (struct tcp_handler_data *) region_alloc( 4165 tcp_region, sizeof(struct tcp_handler_data)); 4166 tcp_data->region = tcp_region; 4167 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 4168 compression_table_size, compressed_dnames); 4169 tcp_data->nsd = data->nsd; 4170 tcp_data->query_count = 0; 4171 #ifdef HAVE_SSL 4172 tcp_data->shake_state = tls_hs_none; 4173 tcp_data->tls = NULL; 4174 #endif 4175 tcp_data->prev = NULL; 4176 tcp_data->next = NULL; 4177 4178 tcp_data->query_state = QUERY_PROCESSED; 4179 tcp_data->bytes_transmitted = 0; 4180 memcpy(&tcp_data->query->addr, &addr, addrlen); 4181 tcp_data->query->addrlen = addrlen; 4182 4183 tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000; 4184 if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) { 4185 /* very busy, give smaller timeout */ 4186 tcp_data->tcp_timeout = 200; 4187 } 4188 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4189 timeout.tv_sec = tcp_data->tcp_timeout / 1000; 4190 timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000; 4191 4192 #ifdef HAVE_SSL 4193 if (data->tls_accept) { 4194 tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s); 4195 if(!tcp_data->tls) { 4196 close(s); 4197 return; 4198 } 4199 tcp_data->shake_state = tls_hs_read; 4200 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4201 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4202 handle_tls_reading, tcp_data); 4203 } else { 4204 #endif 4205 memset(&tcp_data->event, 0, sizeof(tcp_data->event)); 4206 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 4207 handle_tcp_reading, tcp_data); 4208 #ifdef HAVE_SSL 4209 } 4210 #endif 4211 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 4212 log_msg(LOG_ERR, "cannot set tcp event base"); 4213 close(s); 4214 region_destroy(tcp_region); 4215 return; 4216 } 4217 if(event_add(&tcp_data->event, &timeout) != 0) { 4218 log_msg(LOG_ERR, "cannot add tcp to event base"); 4219 close(s); 4220 region_destroy(tcp_region); 4221 return; 4222 } 4223 if(tcp_active_list) { 4224 tcp_active_list->prev = tcp_data; 4225 tcp_data->next = tcp_active_list; 4226 } 4227 tcp_active_list = tcp_data; 4228 4229 /* 4230 * Keep track of the total number of TCP handlers installed so 4231 * we can stop accepting connections when the maximum number 4232 * of simultaneous TCP connections is reached. 4233 * 4234 * If tcp-reject-overflow is enabled, however, then we do not 4235 * change the handler event type; we keep it as-is and accept 4236 * overflow TCP connections only so that we can forcibly kill 4237 * them off. 4238 */ 4239 ++data->nsd->current_tcp_count; 4240 if (!data->nsd->options->tcp_reject_overflow && 4241 data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) 4242 { 4243 configure_handler_event_types(0); 4244 } 4245 } 4246 4247 static void 4248 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 4249 { 4250 size_t i; 4251 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4252 for (i = 0; i < nsd->child_count; ++i) { 4253 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 4254 if (write(nsd->children[i].child_fd, 4255 &command, 4256 sizeof(command)) == -1) 4257 { 4258 if(errno != EAGAIN && errno != EINTR) 4259 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 4260 (int) command, 4261 (int) nsd->children[i].pid, 4262 strerror(errno)); 4263 } else if (timeout > 0) { 4264 (void)block_read(NULL, 4265 nsd->children[i].child_fd, 4266 &command, sizeof(command), timeout); 4267 } 4268 fsync(nsd->children[i].child_fd); 4269 close(nsd->children[i].child_fd); 4270 nsd->children[i].child_fd = -1; 4271 } 4272 } 4273 } 4274 4275 static void 4276 send_children_quit(struct nsd* nsd) 4277 { 4278 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 4279 send_children_command(nsd, NSD_QUIT, 0); 4280 } 4281 4282 static void 4283 send_children_quit_and_wait(struct nsd* nsd) 4284 { 4285 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 4286 send_children_command(nsd, NSD_QUIT_CHILD, 3); 4287 } 4288 4289 #ifdef BIND8_STATS 4290 static void 4291 set_children_stats(struct nsd* nsd) 4292 { 4293 size_t i; 4294 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 4295 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 4296 for (i = 0; i < nsd->child_count; ++i) { 4297 nsd->children[i].need_to_send_STATS = 1; 4298 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 4299 } 4300 } 4301 #endif /* BIND8_STATS */ 4302 4303 static void 4304 configure_handler_event_types(short event_types) 4305 { 4306 size_t i; 4307 4308 for (i = 0; i < tcp_accept_handler_count; ++i) { 4309 struct event* handler = &tcp_accept_handlers[i].event; 4310 if(event_types) { 4311 /* reassign */ 4312 int fd = handler->ev_fd; 4313 struct event_base* base = handler->ev_base; 4314 if(tcp_accept_handlers[i].event_added) 4315 event_del(handler); 4316 memset(handler, 0, sizeof(*handler)); 4317 event_set(handler, fd, event_types, 4318 handle_tcp_accept, &tcp_accept_handlers[i]); 4319 if(event_base_set(base, handler) != 0) 4320 log_msg(LOG_ERR, "conhand: cannot event_base"); 4321 if(event_add(handler, NULL) != 0) 4322 log_msg(LOG_ERR, "conhand: cannot event_add"); 4323 tcp_accept_handlers[i].event_added = 1; 4324 } else { 4325 /* remove */ 4326 if(tcp_accept_handlers[i].event_added) { 4327 event_del(handler); 4328 tcp_accept_handlers[i].event_added = 0; 4329 } 4330 } 4331 } 4332 } 4333