1 /* 2 * server.c -- nsd(8) network input/output 3 * 4 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved. 5 * 6 * See LICENSE for the license. 7 * 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/param.h> 14 #include <sys/socket.h> 15 #include <sys/uio.h> 16 #include <sys/wait.h> 17 18 #include <netinet/in.h> 19 #include <arpa/inet.h> 20 21 #include <assert.h> 22 #include <ctype.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <stddef.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 #include <unistd.h> 31 #include <signal.h> 32 #include <netdb.h> 33 #ifndef SHUT_WR 34 #define SHUT_WR 1 35 #endif 36 #ifdef HAVE_MMAP 37 #include <sys/mman.h> 38 #endif /* HAVE_MMAP */ 39 #include <openssl/rand.h> 40 #ifndef USE_MINI_EVENT 41 # ifdef HAVE_EVENT_H 42 # include <event.h> 43 # else 44 # include <event2/event.h> 45 # include "event2/event_struct.h" 46 # include "event2/event_compat.h" 47 # endif 48 #else 49 # include "mini_event.h" 50 #endif 51 52 #include "axfr.h" 53 #include "namedb.h" 54 #include "netio.h" 55 #include "xfrd.h" 56 #include "xfrd-tcp.h" 57 #include "xfrd-disk.h" 58 #include "difffile.h" 59 #include "nsec3.h" 60 #include "ipc.h" 61 #include "udb.h" 62 #include "remote.h" 63 #include "lookup3.h" 64 #include "rrl.h" 65 66 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */ 67 68 /* 69 * Data for the UDP handlers. 70 */ 71 struct udp_handler_data 72 { 73 struct nsd *nsd; 74 struct nsd_socket *socket; 75 query_type *query; 76 }; 77 78 struct tcp_accept_handler_data { 79 struct nsd *nsd; 80 struct nsd_socket *socket; 81 int event_added; 82 struct event event; 83 }; 84 85 /* 86 * These globals are used to enable the TCP accept handlers 87 * when the number of TCP connection drops below the maximum 88 * number of TCP connections. 89 */ 90 static size_t tcp_accept_handler_count; 91 static struct tcp_accept_handler_data* tcp_accept_handlers; 92 93 static struct event slowaccept_event; 94 static int slowaccept; 95 96 #ifndef NONBLOCKING_IS_BROKEN 97 # define NUM_RECV_PER_SELECT 100 98 #endif 99 100 #if (!defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG)) 101 struct mmsghdr msgs[NUM_RECV_PER_SELECT]; 102 struct iovec iovecs[NUM_RECV_PER_SELECT]; 103 struct query *queries[NUM_RECV_PER_SELECT]; 104 #endif 105 106 /* 107 * Data for the TCP connection handlers. 108 * 109 * The TCP handlers use non-blocking I/O. This is necessary to avoid 110 * blocking the entire server on a slow TCP connection, but does make 111 * reading from and writing to the socket more complicated. 112 * 113 * Basically, whenever a read/write would block (indicated by the 114 * EAGAIN errno variable) we remember the position we were reading 115 * from/writing to and return from the TCP reading/writing event 116 * handler. When the socket becomes readable/writable again we 117 * continue from the same position. 118 */ 119 struct tcp_handler_data 120 { 121 /* 122 * The region used to allocate all TCP connection related 123 * data, including this structure. This region is destroyed 124 * when the connection is closed. 125 */ 126 region_type* region; 127 128 /* 129 * The global nsd structure. 130 */ 131 struct nsd* nsd; 132 133 /* 134 * The current query data for this TCP connection. 135 */ 136 query_type* query; 137 138 /* 139 * The query_state is used to remember if we are performing an 140 * AXFR, if we're done processing, or if we should discard the 141 * query and connection. 142 */ 143 query_state_type query_state; 144 145 /* 146 * The event for the file descriptor and tcp timeout 147 */ 148 struct event event; 149 150 /* 151 * The bytes_transmitted field is used to remember the number 152 * of bytes transmitted when receiving or sending a DNS 153 * packet. The count includes the two additional bytes used 154 * to specify the packet length on a TCP connection. 155 */ 156 size_t bytes_transmitted; 157 158 /* 159 * The number of queries handled by this specific TCP connection. 160 */ 161 int query_count; 162 }; 163 164 /* 165 * Handle incoming queries on the UDP server sockets. 166 */ 167 static void handle_udp(int fd, short event, void* arg); 168 169 /* 170 * Handle incoming connections on the TCP sockets. These handlers 171 * usually wait for the NETIO_EVENT_READ event (indicating an incoming 172 * connection) but are disabled when the number of current TCP 173 * connections is equal to the maximum number of TCP connections. 174 * Disabling is done by changing the handler to wait for the 175 * NETIO_EVENT_NONE type. This is done using the function 176 * configure_tcp_accept_handlers. 177 */ 178 static void handle_tcp_accept(int fd, short event, void* arg); 179 180 /* 181 * Handle incoming queries on a TCP connection. The TCP connections 182 * are configured to be non-blocking and the handler may be called 183 * multiple times before a complete query is received. 184 */ 185 static void handle_tcp_reading(int fd, short event, void* arg); 186 187 /* 188 * Handle outgoing responses on a TCP connection. The TCP connections 189 * are configured to be non-blocking and the handler may be called 190 * multiple times before a complete response is sent. 191 */ 192 static void handle_tcp_writing(int fd, short event, void* arg); 193 194 /* 195 * Send all children the quit nonblocking, then close pipe. 196 */ 197 static void send_children_quit(struct nsd* nsd); 198 /* same, for shutdown time, waits for child to exit to avoid restart issues */ 199 static void send_children_quit_and_wait(struct nsd* nsd); 200 201 /* set childrens flags to send NSD_STATS to them */ 202 #ifdef BIND8_STATS 203 static void set_children_stats(struct nsd* nsd); 204 #endif /* BIND8_STATS */ 205 206 /* 207 * Change the event types the HANDLERS are interested in to EVENT_TYPES. 208 */ 209 static void configure_handler_event_types(short event_types); 210 211 static uint16_t *compressed_dname_offsets = 0; 212 static uint32_t compression_table_capacity = 0; 213 static uint32_t compression_table_size = 0; 214 215 /* 216 * Remove the specified pid from the list of child pids. Returns -1 if 217 * the pid is not in the list, child_num otherwise. The field is set to 0. 218 */ 219 static int 220 delete_child_pid(struct nsd *nsd, pid_t pid) 221 { 222 size_t i; 223 for (i = 0; i < nsd->child_count; ++i) { 224 if (nsd->children[i].pid == pid) { 225 nsd->children[i].pid = 0; 226 if(!nsd->children[i].need_to_exit) { 227 if(nsd->children[i].child_fd != -1) 228 close(nsd->children[i].child_fd); 229 nsd->children[i].child_fd = -1; 230 if(nsd->children[i].handler) 231 nsd->children[i].handler->fd = -1; 232 } 233 return i; 234 } 235 } 236 return -1; 237 } 238 239 /* 240 * Restart child servers if necessary. 241 */ 242 static int 243 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio, 244 int* xfrd_sock_p) 245 { 246 struct main_ipc_handler_data *ipc_data; 247 size_t i; 248 int sv[2]; 249 250 /* Fork the child processes... */ 251 for (i = 0; i < nsd->child_count; ++i) { 252 if (nsd->children[i].pid <= 0) { 253 if (nsd->children[i].child_fd != -1) 254 close(nsd->children[i].child_fd); 255 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 256 log_msg(LOG_ERR, "socketpair: %s", 257 strerror(errno)); 258 return -1; 259 } 260 nsd->children[i].child_fd = sv[0]; 261 nsd->children[i].parent_fd = sv[1]; 262 nsd->children[i].pid = fork(); 263 switch (nsd->children[i].pid) { 264 default: /* SERVER MAIN */ 265 close(nsd->children[i].parent_fd); 266 nsd->children[i].parent_fd = -1; 267 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) { 268 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 269 } 270 if(!nsd->children[i].handler) 271 { 272 ipc_data = (struct main_ipc_handler_data*) region_alloc( 273 region, sizeof(struct main_ipc_handler_data)); 274 ipc_data->nsd = nsd; 275 ipc_data->child = &nsd->children[i]; 276 ipc_data->child_num = i; 277 ipc_data->xfrd_sock = xfrd_sock_p; 278 ipc_data->packet = buffer_create(region, QIOBUFSZ); 279 ipc_data->forward_mode = 0; 280 ipc_data->got_bytes = 0; 281 ipc_data->total_bytes = 0; 282 ipc_data->acl_num = 0; 283 nsd->children[i].handler = (struct netio_handler*) region_alloc( 284 region, sizeof(struct netio_handler)); 285 nsd->children[i].handler->fd = nsd->children[i].child_fd; 286 nsd->children[i].handler->timeout = NULL; 287 nsd->children[i].handler->user_data = ipc_data; 288 nsd->children[i].handler->event_types = NETIO_EVENT_READ; 289 nsd->children[i].handler->event_handler = parent_handle_child_command; 290 netio_add_handler(netio, nsd->children[i].handler); 291 } 292 /* clear any ongoing ipc */ 293 ipc_data = (struct main_ipc_handler_data*) 294 nsd->children[i].handler->user_data; 295 ipc_data->forward_mode = 0; 296 /* restart - update fd */ 297 nsd->children[i].handler->fd = nsd->children[i].child_fd; 298 break; 299 case 0: /* CHILD */ 300 /* the child need not be able to access the 301 * nsd.db file */ 302 namedb_close_udb(nsd->db); 303 nsd->pid = 0; 304 nsd->child_count = 0; 305 nsd->server_kind = nsd->children[i].kind; 306 nsd->this_child = &nsd->children[i]; 307 /* remove signal flags inherited from parent 308 the parent will handle them. */ 309 nsd->signal_hint_reload_hup = 0; 310 nsd->signal_hint_reload = 0; 311 nsd->signal_hint_child = 0; 312 nsd->signal_hint_quit = 0; 313 nsd->signal_hint_shutdown = 0; 314 nsd->signal_hint_stats = 0; 315 nsd->signal_hint_statsusr = 0; 316 close(*xfrd_sock_p); 317 close(nsd->this_child->child_fd); 318 nsd->this_child->child_fd = -1; 319 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) { 320 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 321 } 322 server_child(nsd); 323 /* NOTREACH */ 324 exit(0); 325 case -1: 326 log_msg(LOG_ERR, "fork failed: %s", 327 strerror(errno)); 328 return -1; 329 } 330 } 331 } 332 return 0; 333 } 334 335 #ifdef BIND8_STATS 336 static void set_bind8_alarm(struct nsd* nsd) 337 { 338 /* resync so that the next alarm is on the next whole minute */ 339 if(nsd->st.period > 0) /* % by 0 gives divbyzero error */ 340 alarm(nsd->st.period - (time(NULL) % nsd->st.period)); 341 } 342 #endif 343 344 /* set zone stat ids for zones initially read in */ 345 static void 346 zonestatid_tree_set(struct nsd* nsd) 347 { 348 struct radnode* n; 349 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 350 zone_type* zone = (zone_type*)n->elem; 351 zone->zonestatid = getzonestatid(nsd->options, zone->opts); 352 } 353 } 354 355 #ifdef USE_ZONE_STATS 356 void 357 server_zonestat_alloc(struct nsd* nsd) 358 { 359 size_t num = (nsd->options->zonestatnames->count==0?1: 360 nsd->options->zonestatnames->count); 361 size_t sz = sizeof(struct nsdst)*num; 362 char tmpfile[256]; 363 uint8_t z = 0; 364 365 /* file names */ 366 nsd->zonestatfname[0] = 0; 367 nsd->zonestatfname[1] = 0; 368 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0", 369 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 370 nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile); 371 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1", 372 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 373 nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile); 374 375 /* file descriptors */ 376 nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600); 377 if(nsd->zonestatfd[0] == -1) { 378 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0], 379 strerror(errno)); 380 exit(1); 381 } 382 nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600); 383 if(nsd->zonestatfd[0] == -1) { 384 log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1], 385 strerror(errno)); 386 close(nsd->zonestatfd[0]); 387 unlink(nsd->zonestatfname[0]); 388 exit(1); 389 } 390 391 #ifdef HAVE_MMAP 392 if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) { 393 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0], 394 strerror(errno)); 395 exit(1); 396 } 397 if(write(nsd->zonestatfd[0], &z, 1) == -1) { 398 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 399 nsd->zonestatfname[0], strerror(errno)); 400 exit(1); 401 } 402 if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) { 403 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1], 404 strerror(errno)); 405 exit(1); 406 } 407 if(write(nsd->zonestatfd[1], &z, 1) == -1) { 408 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 409 nsd->zonestatfname[1], strerror(errno)); 410 exit(1); 411 } 412 nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 413 MAP_SHARED, nsd->zonestatfd[0], 0); 414 if(nsd->zonestat[0] == MAP_FAILED) { 415 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 416 unlink(nsd->zonestatfname[0]); 417 unlink(nsd->zonestatfname[1]); 418 exit(1); 419 } 420 nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE, 421 MAP_SHARED, nsd->zonestatfd[1], 0); 422 if(nsd->zonestat[1] == MAP_FAILED) { 423 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 424 unlink(nsd->zonestatfname[0]); 425 unlink(nsd->zonestatfname[1]); 426 exit(1); 427 } 428 memset(nsd->zonestat[0], 0, sz); 429 memset(nsd->zonestat[1], 0, sz); 430 nsd->zonestatsize[0] = num; 431 nsd->zonestatsize[1] = num; 432 nsd->zonestatdesired = num; 433 nsd->zonestatsizenow = num; 434 nsd->zonestatnow = nsd->zonestat[0]; 435 #endif /* HAVE_MMAP */ 436 } 437 438 void 439 zonestat_remap(struct nsd* nsd, int idx, size_t sz) 440 { 441 #ifdef HAVE_MMAP 442 #ifdef MREMAP_MAYMOVE 443 nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx], 444 sizeof(struct nsdst)*nsd->zonestatsize[idx], sz, 445 MREMAP_MAYMOVE); 446 if(nsd->zonestat[idx] == MAP_FAILED) { 447 log_msg(LOG_ERR, "mremap failed: %s", strerror(errno)); 448 exit(1); 449 } 450 #else /* !HAVE MREMAP */ 451 if(msync(nsd->zonestat[idx], 452 sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0) 453 log_msg(LOG_ERR, "msync failed: %s", strerror(errno)); 454 if(munmap(nsd->zonestat[idx], 455 sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0) 456 log_msg(LOG_ERR, "munmap failed: %s", strerror(errno)); 457 nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz, 458 PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0); 459 if(nsd->zonestat[idx] == MAP_FAILED) { 460 log_msg(LOG_ERR, "mmap failed: %s", strerror(errno)); 461 exit(1); 462 } 463 #endif /* MREMAP */ 464 #endif /* HAVE_MMAP */ 465 } 466 467 /* realloc the zonestat array for the one that is not currently in use, 468 * to match the desired new size of the array (if applicable) */ 469 void 470 server_zonestat_realloc(struct nsd* nsd) 471 { 472 #ifdef HAVE_MMAP 473 uint8_t z = 0; 474 size_t sz; 475 int idx = 0; /* index of the zonestat array that is not in use */ 476 if(nsd->zonestatnow == nsd->zonestat[0]) 477 idx = 1; 478 if(nsd->zonestatsize[idx] == nsd->zonestatdesired) 479 return; 480 sz = sizeof(struct nsdst)*nsd->zonestatdesired; 481 if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) { 482 log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx], 483 strerror(errno)); 484 exit(1); 485 } 486 if(write(nsd->zonestatfd[idx], &z, 1) == -1) { 487 log_msg(LOG_ERR, "cannot extend stat file %s (%s)", 488 nsd->zonestatfname[idx], strerror(errno)); 489 exit(1); 490 } 491 zonestat_remap(nsd, idx, sz); 492 /* zero the newly allocated region */ 493 if(nsd->zonestatdesired > nsd->zonestatsize[idx]) { 494 memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) * 495 nsd->zonestatsize[idx], 0, sizeof(struct nsdst) * 496 (nsd->zonestatdesired - nsd->zonestatsize[idx])); 497 } 498 nsd->zonestatsize[idx] = nsd->zonestatdesired; 499 #endif /* HAVE_MMAP */ 500 } 501 502 /* switchover to use the other array for the new children, that 503 * briefly coexist with the old children. And we want to avoid them 504 * both writing to the same statistics arrays. */ 505 void 506 server_zonestat_switch(struct nsd* nsd) 507 { 508 if(nsd->zonestatnow == nsd->zonestat[0]) { 509 nsd->zonestatnow = nsd->zonestat[1]; 510 nsd->zonestatsizenow = nsd->zonestatsize[1]; 511 } else { 512 nsd->zonestatnow = nsd->zonestat[0]; 513 nsd->zonestatsizenow = nsd->zonestatsize[0]; 514 } 515 } 516 #endif /* USE_ZONE_STATS */ 517 518 static void 519 cleanup_dname_compression_tables(void *ptr) 520 { 521 free(ptr); 522 compressed_dname_offsets = NULL; 523 compression_table_capacity = 0; 524 } 525 526 static void 527 initialize_dname_compression_tables(struct nsd *nsd) 528 { 529 size_t needed = domain_table_count(nsd->db->domains) + 1; 530 needed += EXTRA_DOMAIN_NUMBERS; 531 if(compression_table_capacity < needed) { 532 if(compressed_dname_offsets) { 533 region_remove_cleanup(nsd->db->region, 534 cleanup_dname_compression_tables, 535 compressed_dname_offsets); 536 free(compressed_dname_offsets); 537 } 538 compressed_dname_offsets = (uint16_t *) xmallocarray( 539 needed, sizeof(uint16_t)); 540 region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables, 541 compressed_dname_offsets); 542 compression_table_capacity = needed; 543 compression_table_size=domain_table_count(nsd->db->domains)+1; 544 } 545 memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t)); 546 compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */ 547 } 548 549 /* 550 * Initialize the server, create and bind the sockets. 551 * 552 */ 553 int 554 server_init(struct nsd *nsd) 555 { 556 size_t i; 557 #if defined(SO_REUSEADDR) || (defined(INET6) && (defined(IPV6_V6ONLY) || defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU) || defined(IP_TRANSPARENT))) 558 int on = 1; 559 #endif 560 561 /* UDP */ 562 563 /* Make a socket... */ 564 for (i = 0; i < nsd->ifs; i++) { 565 if (!nsd->udp[i].addr) { 566 nsd->udp[i].s = -1; 567 continue; 568 } 569 if ((nsd->udp[i].s = socket(nsd->udp[i].addr->ai_family, nsd->udp[i].addr->ai_socktype, 0)) == -1) { 570 #if defined(INET6) 571 if (nsd->udp[i].addr->ai_family == AF_INET6 && 572 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 573 log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: not supported"); 574 continue; 575 } 576 #endif /* INET6 */ 577 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 578 return -1; 579 } 580 581 #if defined(SO_RCVBUF) || defined(SO_SNDBUF) 582 if(1) { 583 int rcv = 1*1024*1024; 584 int snd = 1*1024*1024; 585 586 #ifdef SO_RCVBUF 587 # ifdef SO_RCVBUFFORCE 588 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 589 (socklen_t)sizeof(rcv)) < 0) { 590 if(errno != EPERM && errno != ENOBUFS) { 591 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, " 592 "...) failed: %s", strerror(errno)); 593 return -1; 594 } 595 # else 596 if(1) { 597 # endif /* SO_RCVBUFFORCE */ 598 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 599 (socklen_t)sizeof(rcv)) < 0) { 600 if(errno != ENOBUFS && errno != ENOSYS) { 601 log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, " 602 "...) failed: %s", strerror(errno)); 603 return -1; 604 } 605 } 606 } 607 #endif /* SO_RCVBUF */ 608 609 #ifdef SO_SNDBUF 610 # ifdef SO_SNDBUFFORCE 611 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 612 (socklen_t)sizeof(snd)) < 0) { 613 if(errno != EPERM && errno != ENOBUFS) { 614 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, " 615 "...) failed: %s", strerror(errno)); 616 return -1; 617 } 618 # else 619 if(1) { 620 # endif /* SO_SNDBUFFORCE */ 621 if(setsockopt(nsd->udp[i].s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 622 (socklen_t)sizeof(snd)) < 0) { 623 if(errno != ENOBUFS && errno != ENOSYS) { 624 log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, " 625 "...) failed: %s", strerror(errno)); 626 return -1; 627 } 628 } 629 } 630 #endif /* SO_SNDBUF */ 631 632 } 633 #endif /* defined(SO_RCVBUF) || defined(SO_SNDBUF) */ 634 635 #if defined(INET6) 636 if (nsd->udp[i].addr->ai_family == AF_INET6) { 637 # if defined(IPV6_V6ONLY) 638 if (setsockopt(nsd->udp[i].s, 639 IPPROTO_IPV6, IPV6_V6ONLY, 640 &on, sizeof(on)) < 0) 641 { 642 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", 643 strerror(errno)); 644 return -1; 645 } 646 # endif 647 # if defined(IPV6_USE_MIN_MTU) 648 /* 649 * There is no fragmentation of IPv6 datagrams 650 * during forwarding in the network. Therefore 651 * we do not send UDP datagrams larger than 652 * the minimum IPv6 MTU of 1280 octets. The 653 * EDNS0 message length can be larger if the 654 * network stack supports IPV6_USE_MIN_MTU. 655 */ 656 if (setsockopt(nsd->udp[i].s, 657 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 658 &on, sizeof(on)) < 0) 659 { 660 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", 661 strerror(errno)); 662 return -1; 663 } 664 # elif defined(IPV6_MTU) 665 /* 666 * On Linux, PMTUD is disabled by default for datagrams 667 * so set the MTU equal to the MIN MTU to get the same. 668 */ 669 on = IPV6_MIN_MTU; 670 if (setsockopt(nsd->udp[i].s, IPPROTO_IPV6, IPV6_MTU, 671 &on, sizeof(on)) < 0) 672 { 673 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", 674 strerror(errno)); 675 return -1; 676 } 677 on = 1; 678 # endif 679 } 680 #endif 681 #if defined(AF_INET) 682 if (nsd->udp[i].addr->ai_family == AF_INET) { 683 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) 684 int action = IP_PMTUDISC_DONT; 685 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, 686 IP_MTU_DISCOVER, &action, sizeof(action)) < 0) 687 { 688 log_msg(LOG_ERR, "setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", 689 strerror(errno)); 690 return -1; 691 } 692 # elif defined(IP_DONTFRAG) 693 int off = 0; 694 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_DONTFRAG, 695 &off, sizeof(off)) < 0) 696 { 697 log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s", 698 strerror(errno)); 699 return -1; 700 } 701 # endif 702 } 703 #endif 704 /* set it nonblocking */ 705 /* otherwise, on OSes with thundering herd problems, the 706 UDP recv could block NSD after select returns readable. */ 707 if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { 708 log_msg(LOG_ERR, "cannot fcntl udp: %s", strerror(errno)); 709 } 710 711 /* Bind it... */ 712 if (nsd->options->ip_transparent) { 713 #ifdef IP_TRANSPARENT 714 if (setsockopt(nsd->udp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 715 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for udp: %s", 716 strerror(errno)); 717 } 718 #endif /* IP_TRANSPARENT */ 719 } 720 721 if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) { 722 log_msg(LOG_ERR, "can't bind udp socket: %s", strerror(errno)); 723 return -1; 724 } 725 } 726 727 /* TCP */ 728 729 /* Make a socket... */ 730 for (i = 0; i < nsd->ifs; i++) { 731 if (!nsd->tcp[i].addr) { 732 nsd->tcp[i].s = -1; 733 continue; 734 } 735 if ((nsd->tcp[i].s = socket(nsd->tcp[i].addr->ai_family, nsd->tcp[i].addr->ai_socktype, 0)) == -1) { 736 #if defined(INET6) 737 if (nsd->tcp[i].addr->ai_family == AF_INET6 && 738 errno == EAFNOSUPPORT && nsd->grab_ip6_optional) { 739 log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: not supported"); 740 continue; 741 } 742 #endif /* INET6 */ 743 log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno)); 744 return -1; 745 } 746 747 #ifdef SO_REUSEADDR 748 if (setsockopt(nsd->tcp[i].s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) { 749 log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s", strerror(errno)); 750 } 751 #endif /* SO_REUSEADDR */ 752 753 #if defined(INET6) 754 if (nsd->tcp[i].addr->ai_family == AF_INET6) { 755 # if defined(IPV6_V6ONLY) 756 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_V6ONLY, 757 &on, sizeof(on)) < 0) { 758 log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed: %s", strerror(errno)); 759 return -1; 760 } 761 # endif 762 # if defined(IPV6_USE_MIN_MTU) 763 /* 764 * Use minimum MTU to minimize delays learning working 765 * PMTU when communicating through a tunnel. 766 */ 767 if (setsockopt(nsd->tcp[i].s, 768 IPPROTO_IPV6, IPV6_USE_MIN_MTU, 769 &on, sizeof(on)) < 0) { 770 log_msg(LOG_ERR, "setsockopt(..., IPV6_USE_MIN_MTU, ...) failed: %s", strerror(errno)); 771 return -1; 772 } 773 # elif defined(IPV6_MTU) 774 /* 775 * On Linux, PMTUD is disabled by default for datagrams 776 * so set the MTU equal to the MIN MTU to get the same. 777 */ 778 on = IPV6_MIN_MTU; 779 if (setsockopt(nsd->tcp[i].s, IPPROTO_IPV6, IPV6_MTU, 780 &on, sizeof(on)) < 0) { 781 log_msg(LOG_ERR, "setsockopt(..., IPV6_MTU, ...) failed: %s", strerror(errno)); 782 return -1; 783 } 784 on = 1; 785 # endif 786 } 787 #endif 788 /* set it nonblocking */ 789 /* (StevensUNP p463), if tcp listening socket is blocking, then 790 it may block in accept, even if select() says readable. */ 791 if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { 792 log_msg(LOG_ERR, "cannot fcntl tcp: %s", strerror(errno)); 793 } 794 795 /* Bind it... */ 796 if (nsd->options->ip_transparent) { 797 #ifdef IP_TRANSPARENT 798 if (setsockopt(nsd->tcp[i].s, IPPROTO_IP, IP_TRANSPARENT, &on, sizeof(on)) < 0) { 799 log_msg(LOG_ERR, "setsockopt(...,IP_TRANSPARENT, ...) failed for tcp: %s", 800 strerror(errno)); 801 } 802 #endif /* IP_TRANSPARENT */ 803 } 804 805 if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) { 806 log_msg(LOG_ERR, "can't bind tcp socket: %s", strerror(errno)); 807 return -1; 808 } 809 810 /* Listen to it... */ 811 if (listen(nsd->tcp[i].s, TCP_BACKLOG) == -1) { 812 log_msg(LOG_ERR, "can't listen: %s", strerror(errno)); 813 return -1; 814 } 815 } 816 817 return 0; 818 } 819 820 /* 821 * Prepare the server for take off. 822 * 823 */ 824 int 825 server_prepare(struct nsd *nsd) 826 { 827 #ifdef RATELIMIT 828 /* set secret modifier for hashing (udb ptr buckets and rate limits) */ 829 #ifdef HAVE_ARC4RANDOM 830 hash_set_raninit(arc4random()); 831 #else 832 uint32_t v = getpid() ^ time(NULL); 833 srandom((unsigned long)v); 834 if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0) 835 hash_set_raninit(v); 836 else hash_set_raninit(random()); 837 #endif 838 rrl_mmap_init(nsd->child_count, nsd->options->rrl_size, 839 nsd->options->rrl_ratelimit, 840 nsd->options->rrl_whitelist_ratelimit, 841 nsd->options->rrl_slip, 842 nsd->options->rrl_ipv4_prefix_length, 843 nsd->options->rrl_ipv6_prefix_length); 844 #endif /* RATELIMIT */ 845 846 /* Open the database... */ 847 if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) { 848 log_msg(LOG_ERR, "unable to open the database %s: %s", 849 nsd->dbfile, strerror(errno)); 850 unlink(nsd->task[0]->fname); 851 unlink(nsd->task[1]->fname); 852 #ifdef USE_ZONE_STATS 853 unlink(nsd->zonestatfname[0]); 854 unlink(nsd->zonestatfname[1]); 855 #endif 856 xfrd_del_tempdir(nsd); 857 return -1; 858 } 859 /* check if zone files have been modified */ 860 /* NULL for taskudb because we send soainfo in a moment, batched up, 861 * for all zones */ 862 if(nsd->options->zonefiles_check || (nsd->options->database == NULL || 863 nsd->options->database[0] == 0)) 864 namedb_check_zonefiles(nsd, nsd->options, NULL, NULL); 865 zonestatid_tree_set(nsd); 866 867 compression_table_capacity = 0; 868 initialize_dname_compression_tables(nsd); 869 870 #ifdef BIND8_STATS 871 /* Initialize times... */ 872 time(&nsd->st.boot); 873 set_bind8_alarm(nsd); 874 #endif /* BIND8_STATS */ 875 876 return 0; 877 } 878 879 /* 880 * Fork the required number of servers. 881 */ 882 static int 883 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio, 884 int* xfrd_sock_p) 885 { 886 size_t i; 887 888 /* Start all child servers initially. */ 889 for (i = 0; i < nsd->child_count; ++i) { 890 nsd->children[i].pid = 0; 891 } 892 893 return restart_child_servers(nsd, region, netio, xfrd_sock_p); 894 } 895 896 void 897 server_close_all_sockets(struct nsd_socket sockets[], size_t n) 898 { 899 size_t i; 900 901 /* Close all the sockets... */ 902 for (i = 0; i < n; ++i) { 903 if (sockets[i].s != -1) { 904 close(sockets[i].s); 905 freeaddrinfo(sockets[i].addr); 906 sockets[i].s = -1; 907 } 908 } 909 } 910 911 /* 912 * Close the sockets, shutdown the server and exit. 913 * Does not return. 914 * 915 */ 916 void 917 server_shutdown(struct nsd *nsd) 918 { 919 size_t i; 920 921 server_close_all_sockets(nsd->udp, nsd->ifs); 922 server_close_all_sockets(nsd->tcp, nsd->ifs); 923 /* CHILD: close command channel to parent */ 924 if(nsd->this_child && nsd->this_child->parent_fd != -1) 925 { 926 close(nsd->this_child->parent_fd); 927 nsd->this_child->parent_fd = -1; 928 } 929 /* SERVER: close command channels to children */ 930 if(!nsd->this_child) 931 { 932 for(i=0; i < nsd->child_count; ++i) 933 if(nsd->children[i].child_fd != -1) 934 { 935 close(nsd->children[i].child_fd); 936 nsd->children[i].child_fd = -1; 937 } 938 } 939 940 tsig_finalize(); 941 #ifdef HAVE_SSL 942 daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */ 943 #endif 944 945 #if 0 /* OS collects memory pages */ 946 nsd_options_destroy(nsd->options); 947 region_destroy(nsd->region); 948 #endif 949 log_finalize(); 950 exit(0); 951 } 952 953 void 954 server_prepare_xfrd(struct nsd* nsd) 955 { 956 char tmpfile[256]; 957 /* create task mmaps */ 958 nsd->mytask = 0; 959 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0", 960 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 961 nsd->task[0] = task_file_create(tmpfile); 962 if(!nsd->task[0]) { 963 #ifdef USE_ZONE_STATS 964 unlink(nsd->zonestatfname[0]); 965 unlink(nsd->zonestatfname[1]); 966 #endif 967 xfrd_del_tempdir(nsd); 968 exit(1); 969 } 970 snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1", 971 nsd->options->xfrdir, (int)getpid(), (unsigned)getpid()); 972 nsd->task[1] = task_file_create(tmpfile); 973 if(!nsd->task[1]) { 974 unlink(nsd->task[0]->fname); 975 #ifdef USE_ZONE_STATS 976 unlink(nsd->zonestatfname[0]); 977 unlink(nsd->zonestatfname[1]); 978 #endif 979 xfrd_del_tempdir(nsd); 980 exit(1); 981 } 982 assert(udb_base_get_userdata(nsd->task[0])->data == 0); 983 assert(udb_base_get_userdata(nsd->task[1])->data == 0); 984 /* create xfrd listener structure */ 985 nsd->xfrd_listener = region_alloc(nsd->region, 986 sizeof(netio_handler_type)); 987 nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*) 988 region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data)); 989 nsd->xfrd_listener->fd = -1; 990 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd = 991 nsd; 992 ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn = 993 xfrd_tcp_create(nsd->region, QIOBUFSZ); 994 } 995 996 997 void 998 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active) 999 { 1000 pid_t pid; 1001 int sockets[2] = {0,0}; 1002 struct ipc_handler_conn_data *data; 1003 1004 if(nsd->xfrd_listener->fd != -1) 1005 close(nsd->xfrd_listener->fd); 1006 if(del_db) { 1007 /* recreate taskdb that xfrd was using, it may be corrupt */ 1008 /* we (or reload) use nsd->mytask, and xfrd uses the other */ 1009 char* tmpfile = nsd->task[1-nsd->mytask]->fname; 1010 nsd->task[1-nsd->mytask]->fname = NULL; 1011 /* free alloc already, so udb does not shrink itself */ 1012 udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc); 1013 nsd->task[1-nsd->mytask]->alloc = NULL; 1014 udb_base_free(nsd->task[1-nsd->mytask]); 1015 /* create new file, overwrite the old one */ 1016 nsd->task[1-nsd->mytask] = task_file_create(tmpfile); 1017 free(tmpfile); 1018 } 1019 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) { 1020 log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno)); 1021 return; 1022 } 1023 pid = fork(); 1024 switch (pid) { 1025 case -1: 1026 log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno)); 1027 break; 1028 default: 1029 /* PARENT: close first socket, use second one */ 1030 close(sockets[0]); 1031 if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) { 1032 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1033 } 1034 if(del_db) xfrd_free_namedb(nsd); 1035 /* use other task than I am using, since if xfrd died and is 1036 * restarted, the reload is using nsd->mytask */ 1037 nsd->mytask = 1 - nsd->mytask; 1038 xfrd_init(sockets[1], nsd, del_db, reload_active, pid); 1039 /* ENOTREACH */ 1040 break; 1041 case 0: 1042 /* CHILD: close second socket, use first one */ 1043 close(sockets[1]); 1044 if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) { 1045 log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno)); 1046 } 1047 nsd->xfrd_listener->fd = sockets[0]; 1048 break; 1049 } 1050 /* server-parent only */ 1051 nsd->xfrd_listener->timeout = NULL; 1052 nsd->xfrd_listener->event_types = NETIO_EVENT_READ; 1053 nsd->xfrd_listener->event_handler = parent_handle_xfrd_command; 1054 /* clear ongoing ipc reads */ 1055 data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data; 1056 data->conn->is_reading = 0; 1057 } 1058 1059 /** add all soainfo to taskdb */ 1060 static void 1061 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb) 1062 { 1063 struct radnode* n; 1064 udb_ptr task_last; /* last task, mytask is empty so NULL */ 1065 /* add all SOA INFO to mytask */ 1066 udb_ptr_init(&task_last, taskudb); 1067 for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) { 1068 task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0); 1069 } 1070 udb_ptr_unlink(&task_last, taskudb); 1071 } 1072 1073 void 1074 server_send_soa_xfrd(struct nsd* nsd, int shortsoa) 1075 { 1076 /* normally this exchanges the SOA from nsd->xfrd and the expire back. 1077 * parent fills one taskdb with soas, xfrd fills other with expires. 1078 * then they exchange and process. 1079 * shortsoa: xfrd crashes and needs to be restarted and one taskdb 1080 * may be in use by reload. Fill SOA in taskdb and give to xfrd. 1081 * expire notifications can be sent back via a normal reload later 1082 * (xfrd will wait for current running reload to finish if any). 1083 */ 1084 sig_atomic_t cmd = 0; 1085 pid_t mypid; 1086 int xfrd_sock = nsd->xfrd_listener->fd; 1087 struct udb_base* taskudb = nsd->task[nsd->mytask]; 1088 udb_ptr t; 1089 if(!shortsoa) { 1090 if(nsd->signal_hint_shutdown) { 1091 shutdown: 1092 log_msg(LOG_WARNING, "signal received, shutting down..."); 1093 server_close_all_sockets(nsd->udp, nsd->ifs); 1094 server_close_all_sockets(nsd->tcp, nsd->ifs); 1095 #ifdef HAVE_SSL 1096 daemon_remote_close(nsd->rc); 1097 #endif 1098 /* Unlink it if possible... */ 1099 unlinkpid(nsd->pidfile); 1100 unlink(nsd->task[0]->fname); 1101 unlink(nsd->task[1]->fname); 1102 #ifdef USE_ZONE_STATS 1103 unlink(nsd->zonestatfname[0]); 1104 unlink(nsd->zonestatfname[1]); 1105 #endif 1106 /* write the nsd.db to disk, wait for it to complete */ 1107 udb_base_sync(nsd->db->udb, 1); 1108 udb_base_close(nsd->db->udb); 1109 server_shutdown(nsd); 1110 exit(0); 1111 } 1112 } 1113 if(shortsoa) { 1114 /* put SOA in xfrd task because mytask may be in use */ 1115 taskudb = nsd->task[1-nsd->mytask]; 1116 } 1117 1118 add_all_soa_to_task(nsd, taskudb); 1119 if(!shortsoa) { 1120 /* wait for xfrd to signal task is ready, RELOAD signal */ 1121 if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) || 1122 cmd != NSD_RELOAD) { 1123 log_msg(LOG_ERR, "did not get start signal from xfrd"); 1124 exit(1); 1125 } 1126 if(nsd->signal_hint_shutdown) { 1127 goto shutdown; 1128 } 1129 } 1130 /* give xfrd our task, signal it with RELOAD_DONE */ 1131 task_process_sync(taskudb); 1132 cmd = NSD_RELOAD_DONE; 1133 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1134 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1135 (int)nsd->pid, strerror(errno)); 1136 } 1137 mypid = getpid(); 1138 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1139 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1140 strerror(errno)); 1141 } 1142 1143 if(!shortsoa) { 1144 /* process the xfrd task works (expiry data) */ 1145 nsd->mytask = 1 - nsd->mytask; 1146 taskudb = nsd->task[nsd->mytask]; 1147 task_remap(taskudb); 1148 udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb)); 1149 while(!udb_ptr_is_null(&t)) { 1150 task_process_expire(nsd->db, TASKLIST(&t)); 1151 udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next); 1152 } 1153 udb_ptr_unlink(&t, taskudb); 1154 task_clear(taskudb); 1155 1156 /* tell xfrd that the task is emptied, signal with RELOAD_DONE */ 1157 cmd = NSD_RELOAD_DONE; 1158 if(!write_socket(xfrd_sock, &cmd, sizeof(cmd))) { 1159 log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s", 1160 (int)nsd->pid, strerror(errno)); 1161 } 1162 } 1163 } 1164 1165 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */ 1166 ssize_t 1167 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout) 1168 { 1169 uint8_t* buf = (uint8_t*) p; 1170 ssize_t total = 0; 1171 fd_set rfds; 1172 struct timeval tv; 1173 FD_ZERO(&rfds); 1174 1175 while( total < sz) { 1176 ssize_t ret; 1177 FD_SET(s, &rfds); 1178 tv.tv_sec = timeout; 1179 tv.tv_usec = 0; 1180 ret = select(s+1, &rfds, NULL, NULL, timeout==-1?NULL:&tv); 1181 if(ret == -1) { 1182 if(errno == EAGAIN) 1183 /* blocking read */ 1184 continue; 1185 if(errno == EINTR) { 1186 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1187 return -1; 1188 /* other signals can be handled later */ 1189 continue; 1190 } 1191 /* some error */ 1192 return -1; 1193 } 1194 if(ret == 0) { 1195 /* operation timed out */ 1196 return -2; 1197 } 1198 ret = read(s, buf+total, sz-total); 1199 if(ret == -1) { 1200 if(errno == EAGAIN) 1201 /* blocking read */ 1202 continue; 1203 if(errno == EINTR) { 1204 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown)) 1205 return -1; 1206 /* other signals can be handled later */ 1207 continue; 1208 } 1209 /* some error */ 1210 return -1; 1211 } 1212 if(ret == 0) { 1213 /* closed connection! */ 1214 return 0; 1215 } 1216 total += ret; 1217 } 1218 return total; 1219 } 1220 1221 static void 1222 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket) 1223 { 1224 sig_atomic_t cmd = NSD_QUIT_SYNC; 1225 udb_ptr t, next; 1226 udb_base* u = nsd->task[nsd->mytask]; 1227 udb_ptr_init(&next, u); 1228 udb_ptr_new(&t, u, udb_base_get_userdata(u)); 1229 udb_base_set_userdata(u, 0); 1230 while(!udb_ptr_is_null(&t)) { 1231 /* store next in list so this one can be deleted or reused */ 1232 udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next); 1233 udb_rptr_zero(&TASKLIST(&t)->next, u); 1234 1235 /* process task t */ 1236 /* append results for task t and update last_task */ 1237 task_process_in_reload(nsd, u, last_task, &t); 1238 1239 /* go to next */ 1240 udb_ptr_set_ptr(&t, u, &next); 1241 1242 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1243 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1244 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1245 if(cmd == NSD_QUIT) { 1246 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1247 /* sync to disk (if needed) */ 1248 udb_base_sync(nsd->db->udb, 0); 1249 /* unlink files of remainder of tasks */ 1250 while(!udb_ptr_is_null(&t)) { 1251 if(TASKLIST(&t)->task_type == task_apply_xfr) { 1252 xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno); 1253 } 1254 udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next); 1255 } 1256 udb_ptr_unlink(&t, u); 1257 udb_ptr_unlink(&next, u); 1258 exit(0); 1259 } 1260 } 1261 1262 } 1263 udb_ptr_unlink(&t, u); 1264 udb_ptr_unlink(&next, u); 1265 } 1266 1267 #ifdef BIND8_STATS 1268 static void 1269 parent_send_stats(struct nsd* nsd, int cmdfd) 1270 { 1271 size_t i; 1272 if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) { 1273 log_msg(LOG_ERR, "could not write stats to reload"); 1274 return; 1275 } 1276 for(i=0; i<nsd->child_count; i++) 1277 if(!write_socket(cmdfd, &nsd->children[i].query_count, 1278 sizeof(stc_t))) { 1279 log_msg(LOG_ERR, "could not write stats to reload"); 1280 return; 1281 } 1282 } 1283 1284 static void 1285 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last) 1286 { 1287 struct nsdst s; 1288 stc_t* p; 1289 size_t i; 1290 if(block_read(nsd, cmdfd, &s, sizeof(s), 1291 RELOAD_SYNC_TIMEOUT) != sizeof(s)) { 1292 log_msg(LOG_ERR, "could not read stats from oldpar"); 1293 return; 1294 } 1295 s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0); 1296 s.db_mem = region_get_mem(nsd->db->region); 1297 p = (stc_t*)task_new_stat_info(nsd->task[nsd->mytask], last, &s, 1298 nsd->child_count); 1299 if(!p) return; 1300 for(i=0; i<nsd->child_count; i++) { 1301 if(block_read(nsd, cmdfd, p++, sizeof(stc_t), 1)!=sizeof(stc_t)) 1302 return; 1303 } 1304 } 1305 #endif /* BIND8_STATS */ 1306 1307 /* 1308 * Reload the database, stop parent, re-fork children and continue. 1309 * as server_main. 1310 */ 1311 static void 1312 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio, 1313 int cmdsocket) 1314 { 1315 pid_t mypid; 1316 sig_atomic_t cmd = NSD_QUIT_SYNC; 1317 int ret; 1318 udb_ptr last_task; 1319 struct sigaction old_sigchld, ign_sigchld; 1320 /* ignore SIGCHLD from the previous server_main that used this pid */ 1321 memset(&ign_sigchld, 0, sizeof(ign_sigchld)); 1322 ign_sigchld.sa_handler = SIG_IGN; 1323 sigaction(SIGCHLD, &ign_sigchld, &old_sigchld); 1324 1325 /* see what tasks we got from xfrd */ 1326 task_remap(nsd->task[nsd->mytask]); 1327 udb_ptr_init(&last_task, nsd->task[nsd->mytask]); 1328 udb_compact_inhibited(nsd->db->udb, 1); 1329 reload_process_tasks(nsd, &last_task, cmdsocket); 1330 udb_compact_inhibited(nsd->db->udb, 0); 1331 udb_compact(nsd->db->udb); 1332 1333 #ifndef NDEBUG 1334 if(nsd_debug_level >= 1) 1335 region_log_stats(nsd->db->region); 1336 #endif /* NDEBUG */ 1337 /* sync to disk (if needed) */ 1338 udb_base_sync(nsd->db->udb, 0); 1339 1340 initialize_dname_compression_tables(nsd); 1341 1342 #ifdef BIND8_STATS 1343 /* Restart dumping stats if required. */ 1344 time(&nsd->st.boot); 1345 set_bind8_alarm(nsd); 1346 #endif 1347 #ifdef USE_ZONE_STATS 1348 server_zonestat_realloc(nsd); /* realloc for new children */ 1349 server_zonestat_switch(nsd); 1350 #endif 1351 1352 /* listen for the signals of failed children again */ 1353 sigaction(SIGCHLD, &old_sigchld, NULL); 1354 /* Start new child processes */ 1355 if (server_start_children(nsd, server_region, netio, &nsd-> 1356 xfrd_listener->fd) != 0) { 1357 send_children_quit(nsd); 1358 exit(1); 1359 } 1360 1361 /* if the parent has quit, we must quit too, poll the fd for cmds */ 1362 if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) { 1363 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd)); 1364 if(cmd == NSD_QUIT) { 1365 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd")); 1366 send_children_quit(nsd); 1367 exit(0); 1368 } 1369 } 1370 1371 /* Send quit command to parent: blocking, wait for receipt. */ 1372 do { 1373 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main")); 1374 if (!write_socket(cmdsocket, &cmd, sizeof(cmd))) 1375 { 1376 log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s", 1377 strerror(errno)); 1378 } 1379 /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */ 1380 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main")); 1381 ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 1382 RELOAD_SYNC_TIMEOUT); 1383 if(ret == -2) { 1384 DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry")); 1385 } 1386 } while (ret == -2); 1387 if(ret == -1) { 1388 log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s", 1389 strerror(errno)); 1390 } 1391 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd)); 1392 if(cmd == NSD_QUIT) { 1393 /* small race condition possible here, parent got quit cmd. */ 1394 send_children_quit(nsd); 1395 exit(1); 1396 } 1397 assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD); 1398 #ifdef BIND8_STATS 1399 reload_do_stats(cmdsocket, nsd, &last_task); 1400 #endif 1401 udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]); 1402 task_process_sync(nsd->task[nsd->mytask]); 1403 #ifdef USE_ZONE_STATS 1404 server_zonestat_realloc(nsd); /* realloc for next children */ 1405 #endif 1406 1407 /* send soainfo to the xfrd process, signal it that reload is done, 1408 * it picks up the taskudb */ 1409 cmd = NSD_RELOAD_DONE; 1410 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 1411 log_msg(LOG_ERR, "problems sending reload_done xfrd: %s", 1412 strerror(errno)); 1413 } 1414 mypid = getpid(); 1415 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1416 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1417 strerror(errno)); 1418 } 1419 1420 /* try to reopen file */ 1421 if (nsd->file_rotation_ok) 1422 log_reopen(nsd->log_filename, 1); 1423 /* exit reload, continue as new server_main */ 1424 } 1425 1426 /* 1427 * Get the mode depending on the signal hints that have been received. 1428 * Multiple signal hints can be received and will be handled in turn. 1429 */ 1430 static sig_atomic_t 1431 server_signal_mode(struct nsd *nsd) 1432 { 1433 if(nsd->signal_hint_quit) { 1434 nsd->signal_hint_quit = 0; 1435 return NSD_QUIT; 1436 } 1437 else if(nsd->signal_hint_shutdown) { 1438 nsd->signal_hint_shutdown = 0; 1439 return NSD_SHUTDOWN; 1440 } 1441 else if(nsd->signal_hint_child) { 1442 nsd->signal_hint_child = 0; 1443 return NSD_REAP_CHILDREN; 1444 } 1445 else if(nsd->signal_hint_reload) { 1446 nsd->signal_hint_reload = 0; 1447 return NSD_RELOAD; 1448 } 1449 else if(nsd->signal_hint_reload_hup) { 1450 nsd->signal_hint_reload_hup = 0; 1451 return NSD_RELOAD_REQ; 1452 } 1453 else if(nsd->signal_hint_stats) { 1454 nsd->signal_hint_stats = 0; 1455 #ifdef BIND8_STATS 1456 set_bind8_alarm(nsd); 1457 #endif 1458 return NSD_STATS; 1459 } 1460 else if(nsd->signal_hint_statsusr) { 1461 nsd->signal_hint_statsusr = 0; 1462 return NSD_STATS; 1463 } 1464 return NSD_RUN; 1465 } 1466 1467 /* 1468 * The main server simply waits for signals and child processes to 1469 * terminate. Child processes are restarted as necessary. 1470 */ 1471 void 1472 server_main(struct nsd *nsd) 1473 { 1474 region_type *server_region = region_create(xalloc, free); 1475 netio_type *netio = netio_create(server_region); 1476 netio_handler_type reload_listener; 1477 int reload_sockets[2] = {-1, -1}; 1478 struct timespec timeout_spec; 1479 int status; 1480 pid_t child_pid; 1481 pid_t reload_pid = -1; 1482 sig_atomic_t mode; 1483 1484 /* Ensure we are the main process */ 1485 assert(nsd->server_kind == NSD_SERVER_MAIN); 1486 1487 /* Add listener for the XFRD process */ 1488 netio_add_handler(netio, nsd->xfrd_listener); 1489 1490 /* Start the child processes that handle incoming queries */ 1491 if (server_start_children(nsd, server_region, netio, 1492 &nsd->xfrd_listener->fd) != 0) { 1493 send_children_quit(nsd); 1494 exit(1); 1495 } 1496 reload_listener.fd = -1; 1497 1498 /* This_child MUST be 0, because this is the parent process */ 1499 assert(nsd->this_child == 0); 1500 1501 /* Run the server until we get a shutdown signal */ 1502 while ((mode = nsd->mode) != NSD_SHUTDOWN) { 1503 /* Did we receive a signal that changes our mode? */ 1504 if(mode == NSD_RUN) { 1505 nsd->mode = mode = server_signal_mode(nsd); 1506 } 1507 1508 switch (mode) { 1509 case NSD_RUN: 1510 /* see if any child processes terminated */ 1511 while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) { 1512 int is_child = delete_child_pid(nsd, child_pid); 1513 if (is_child != -1 && nsd->children[is_child].need_to_exit) { 1514 if(nsd->children[is_child].child_fd == -1) 1515 nsd->children[is_child].has_exited = 1; 1516 parent_check_all_children_exited(nsd); 1517 } else if(is_child != -1) { 1518 log_msg(LOG_WARNING, 1519 "server %d died unexpectedly with status %d, restarting", 1520 (int) child_pid, status); 1521 restart_child_servers(nsd, server_region, netio, 1522 &nsd->xfrd_listener->fd); 1523 } else if (child_pid == reload_pid) { 1524 sig_atomic_t cmd = NSD_RELOAD_DONE; 1525 pid_t mypid; 1526 log_msg(LOG_WARNING, 1527 "Reload process %d failed with status %d, continuing with old database", 1528 (int) child_pid, status); 1529 reload_pid = -1; 1530 if(reload_listener.fd != -1) close(reload_listener.fd); 1531 reload_listener.fd = -1; 1532 reload_listener.event_types = NETIO_EVENT_NONE; 1533 task_process_sync(nsd->task[nsd->mytask]); 1534 /* inform xfrd reload attempt ended */ 1535 if(!write_socket(nsd->xfrd_listener->fd, 1536 &cmd, sizeof(cmd))) { 1537 log_msg(LOG_ERR, "problems " 1538 "sending SOAEND to xfrd: %s", 1539 strerror(errno)); 1540 } 1541 mypid = getpid(); 1542 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1543 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1544 strerror(errno)); 1545 } 1546 } else if(status != 0) { 1547 /* check for status, because we get 1548 * the old-servermain because reload 1549 * is the process-parent of old-main, 1550 * and we get older server-processes 1551 * that are exiting after a reload */ 1552 log_msg(LOG_WARNING, 1553 "process %d terminated with status %d", 1554 (int) child_pid, status); 1555 } 1556 } 1557 if (child_pid == -1) { 1558 if (errno == EINTR) { 1559 continue; 1560 } 1561 if (errno != ECHILD) 1562 log_msg(LOG_WARNING, "wait failed: %s", strerror(errno)); 1563 } 1564 if (nsd->mode != NSD_RUN) 1565 break; 1566 1567 /* timeout to collect processes. In case no sigchild happens. */ 1568 timeout_spec.tv_sec = 60; 1569 timeout_spec.tv_nsec = 0; 1570 1571 /* listen on ports, timeout for collecting terminated children */ 1572 if(netio_dispatch(netio, &timeout_spec, 0) == -1) { 1573 if (errno != EINTR) { 1574 log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno)); 1575 } 1576 } 1577 if(nsd->restart_children) { 1578 restart_child_servers(nsd, server_region, netio, 1579 &nsd->xfrd_listener->fd); 1580 nsd->restart_children = 0; 1581 } 1582 if(nsd->reload_failed) { 1583 sig_atomic_t cmd = NSD_RELOAD_DONE; 1584 pid_t mypid; 1585 nsd->reload_failed = 0; 1586 log_msg(LOG_WARNING, 1587 "Reload process %d failed, continuing with old database", 1588 (int) reload_pid); 1589 reload_pid = -1; 1590 if(reload_listener.fd != -1) close(reload_listener.fd); 1591 reload_listener.fd = -1; 1592 reload_listener.event_types = NETIO_EVENT_NONE; 1593 task_process_sync(nsd->task[nsd->mytask]); 1594 /* inform xfrd reload attempt ended */ 1595 if(!write_socket(nsd->xfrd_listener->fd, 1596 &cmd, sizeof(cmd))) { 1597 log_msg(LOG_ERR, "problems " 1598 "sending SOAEND to xfrd: %s", 1599 strerror(errno)); 1600 } 1601 mypid = getpid(); 1602 if(!write_socket(nsd->xfrd_listener->fd, &mypid, sizeof(mypid))) { 1603 log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s", 1604 strerror(errno)); 1605 } 1606 } 1607 1608 break; 1609 case NSD_RELOAD_REQ: { 1610 sig_atomic_t cmd = NSD_RELOAD_REQ; 1611 log_msg(LOG_WARNING, "SIGHUP received, reloading..."); 1612 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1613 "main: ipc send reload_req to xfrd")); 1614 if(!write_socket(nsd->xfrd_listener->fd, 1615 &cmd, sizeof(cmd))) { 1616 log_msg(LOG_ERR, "server_main: could not send " 1617 "reload_req to xfrd: %s", strerror(errno)); 1618 } 1619 nsd->mode = NSD_RUN; 1620 } break; 1621 case NSD_RELOAD: 1622 /* Continue to run nsd after reload */ 1623 nsd->mode = NSD_RUN; 1624 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading...")); 1625 if (reload_pid != -1) { 1626 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)", 1627 (int) reload_pid); 1628 break; 1629 } 1630 1631 /* switch the mytask to keep track of who owns task*/ 1632 nsd->mytask = 1 - nsd->mytask; 1633 if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) { 1634 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno)); 1635 reload_pid = -1; 1636 break; 1637 } 1638 1639 /* Do actual reload */ 1640 reload_pid = fork(); 1641 switch (reload_pid) { 1642 case -1: 1643 log_msg(LOG_ERR, "fork failed: %s", strerror(errno)); 1644 break; 1645 default: 1646 /* PARENT */ 1647 close(reload_sockets[0]); 1648 server_reload(nsd, server_region, netio, 1649 reload_sockets[1]); 1650 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main")); 1651 close(reload_sockets[1]); 1652 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed")); 1653 /* drop stale xfrd ipc data */ 1654 ((struct ipc_handler_conn_data*)nsd-> 1655 xfrd_listener->user_data) 1656 ->conn->is_reading = 0; 1657 reload_pid = -1; 1658 reload_listener.fd = -1; 1659 reload_listener.event_types = NETIO_EVENT_NONE; 1660 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run")); 1661 break; 1662 case 0: 1663 /* CHILD */ 1664 /* server_main keep running until NSD_QUIT_SYNC 1665 * received from reload. */ 1666 close(reload_sockets[1]); 1667 reload_listener.fd = reload_sockets[0]; 1668 reload_listener.timeout = NULL; 1669 reload_listener.user_data = nsd; 1670 reload_listener.event_types = NETIO_EVENT_READ; 1671 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */ 1672 netio_add_handler(netio, &reload_listener); 1673 reload_pid = getppid(); 1674 break; 1675 } 1676 break; 1677 case NSD_QUIT_SYNC: 1678 /* synchronisation of xfrd, parent and reload */ 1679 if(!nsd->quit_sync_done && reload_listener.fd != -1) { 1680 sig_atomic_t cmd = NSD_RELOAD; 1681 /* stop xfrd ipc writes in progress */ 1682 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1683 "main: ipc send indication reload")); 1684 if(!write_socket(nsd->xfrd_listener->fd, 1685 &cmd, sizeof(cmd))) { 1686 log_msg(LOG_ERR, "server_main: could not send reload " 1687 "indication to xfrd: %s", strerror(errno)); 1688 } 1689 /* wait for ACK from xfrd */ 1690 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd")); 1691 nsd->quit_sync_done = 1; 1692 } 1693 nsd->mode = NSD_RUN; 1694 break; 1695 case NSD_QUIT: 1696 /* silent shutdown during reload */ 1697 if(reload_listener.fd != -1) { 1698 /* acknowledge the quit, to sync reload that we will really quit now */ 1699 sig_atomic_t cmd = NSD_RELOAD; 1700 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload")); 1701 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1702 log_msg(LOG_ERR, "server_main: " 1703 "could not ack quit: %s", strerror(errno)); 1704 } 1705 #ifdef BIND8_STATS 1706 parent_send_stats(nsd, reload_listener.fd); 1707 #endif /* BIND8_STATS */ 1708 close(reload_listener.fd); 1709 } 1710 DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence")); 1711 /* only quit children after xfrd has acked */ 1712 send_children_quit(nsd); 1713 1714 #if 0 /* OS collects memory pages */ 1715 region_destroy(server_region); 1716 #endif 1717 server_shutdown(nsd); 1718 1719 /* ENOTREACH */ 1720 break; 1721 case NSD_SHUTDOWN: 1722 break; 1723 case NSD_REAP_CHILDREN: 1724 /* continue; wait for child in run loop */ 1725 nsd->mode = NSD_RUN; 1726 break; 1727 case NSD_STATS: 1728 #ifdef BIND8_STATS 1729 set_children_stats(nsd); 1730 #endif 1731 nsd->mode = NSD_RUN; 1732 break; 1733 default: 1734 log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode); 1735 nsd->mode = NSD_RUN; 1736 break; 1737 } 1738 } 1739 log_msg(LOG_WARNING, "signal received, shutting down..."); 1740 1741 /* close opened ports to avoid race with restart of nsd */ 1742 server_close_all_sockets(nsd->udp, nsd->ifs); 1743 server_close_all_sockets(nsd->tcp, nsd->ifs); 1744 #ifdef HAVE_SSL 1745 daemon_remote_close(nsd->rc); 1746 #endif 1747 send_children_quit_and_wait(nsd); 1748 1749 /* Unlink it if possible... */ 1750 unlinkpid(nsd->pidfile); 1751 unlink(nsd->task[0]->fname); 1752 unlink(nsd->task[1]->fname); 1753 #ifdef USE_ZONE_STATS 1754 unlink(nsd->zonestatfname[0]); 1755 unlink(nsd->zonestatfname[1]); 1756 #endif 1757 1758 if(reload_listener.fd != -1) { 1759 sig_atomic_t cmd = NSD_QUIT; 1760 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1761 "main: ipc send quit to reload-process")); 1762 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) { 1763 log_msg(LOG_ERR, "server_main: could not send quit to reload: %s", 1764 strerror(errno)); 1765 } 1766 fsync(reload_listener.fd); 1767 close(reload_listener.fd); 1768 /* wait for reload to finish processing */ 1769 while(1) { 1770 if(waitpid(reload_pid, NULL, 0) == -1) { 1771 if(errno == EINTR) continue; 1772 if(errno == ECHILD) break; 1773 log_msg(LOG_ERR, "waitpid(reload %d): %s", 1774 (int)reload_pid, strerror(errno)); 1775 } 1776 break; 1777 } 1778 } 1779 if(nsd->xfrd_listener->fd != -1) { 1780 /* complete quit, stop xfrd */ 1781 sig_atomic_t cmd = NSD_QUIT; 1782 DEBUG(DEBUG_IPC,1, (LOG_INFO, 1783 "main: ipc send quit to xfrd")); 1784 if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) { 1785 log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s", 1786 strerror(errno)); 1787 } 1788 fsync(nsd->xfrd_listener->fd); 1789 close(nsd->xfrd_listener->fd); 1790 (void)kill(nsd->pid, SIGTERM); 1791 } 1792 1793 #if 0 /* OS collects memory pages */ 1794 region_destroy(server_region); 1795 #endif 1796 /* write the nsd.db to disk, wait for it to complete */ 1797 udb_base_sync(nsd->db->udb, 1); 1798 udb_base_close(nsd->db->udb); 1799 server_shutdown(nsd); 1800 } 1801 1802 static query_state_type 1803 server_process_query(struct nsd *nsd, struct query *query) 1804 { 1805 return query_process(query, nsd); 1806 } 1807 1808 static query_state_type 1809 server_process_query_udp(struct nsd *nsd, struct query *query) 1810 { 1811 #ifdef RATELIMIT 1812 if(query_process(query, nsd) != QUERY_DISCARDED) { 1813 if(rrl_process_query(query)) 1814 return rrl_slip(query); 1815 else return QUERY_PROCESSED; 1816 } 1817 return QUERY_DISCARDED; 1818 #else 1819 return query_process(query, nsd); 1820 #endif 1821 } 1822 1823 struct event_base* 1824 nsd_child_event_base(void) 1825 { 1826 struct event_base* base; 1827 #ifdef USE_MINI_EVENT 1828 static time_t secs; 1829 static struct timeval now; 1830 base = event_init(&secs, &now); 1831 #else 1832 # if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) 1833 /* libev */ 1834 base = (struct event_base *)ev_default_loop(EVFLAG_AUTO); 1835 # else 1836 /* libevent */ 1837 # ifdef HAVE_EVENT_BASE_NEW 1838 base = event_base_new(); 1839 # else 1840 base = event_init(); 1841 # endif 1842 # endif 1843 #endif 1844 return base; 1845 } 1846 1847 /* 1848 * Serve DNS requests. 1849 */ 1850 void 1851 server_child(struct nsd *nsd) 1852 { 1853 size_t i; 1854 region_type *server_region = region_create(xalloc, free); 1855 struct event_base* event_base = nsd_child_event_base(); 1856 query_type *udp_query; 1857 sig_atomic_t mode; 1858 1859 if(!event_base) { 1860 log_msg(LOG_ERR, "nsd server could not create event base"); 1861 exit(1); 1862 } 1863 1864 #ifdef RATELIMIT 1865 rrl_init((nsd->this_child - nsd->children)/sizeof(nsd->children[0])); 1866 #endif 1867 1868 assert(nsd->server_kind != NSD_SERVER_MAIN); 1869 DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started")); 1870 1871 if (!(nsd->server_kind & NSD_SERVER_TCP)) { 1872 server_close_all_sockets(nsd->tcp, nsd->ifs); 1873 } 1874 if (!(nsd->server_kind & NSD_SERVER_UDP)) { 1875 server_close_all_sockets(nsd->udp, nsd->ifs); 1876 } 1877 1878 if (nsd->this_child && nsd->this_child->parent_fd != -1) { 1879 struct event *handler; 1880 struct ipc_handler_conn_data* user_data = 1881 (struct ipc_handler_conn_data*)region_alloc( 1882 server_region, sizeof(struct ipc_handler_conn_data)); 1883 user_data->nsd = nsd; 1884 user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ); 1885 1886 handler = (struct event*) region_alloc( 1887 server_region, sizeof(*handler)); 1888 event_set(handler, nsd->this_child->parent_fd, EV_PERSIST| 1889 EV_READ, child_handle_parent_command, user_data); 1890 if(event_base_set(event_base, handler) != 0) 1891 log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed"); 1892 if(event_add(handler, NULL) != 0) 1893 log_msg(LOG_ERR, "nsd ipcchild: event_add failed"); 1894 } 1895 1896 if (nsd->server_kind & NSD_SERVER_UDP) { 1897 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 1898 udp_query = query_create(server_region, 1899 compressed_dname_offsets, compression_table_size); 1900 #else 1901 udp_query = NULL; 1902 memset(msgs, 0, sizeof(msgs)); 1903 for (i = 0; i < NUM_RECV_PER_SELECT; i++) { 1904 queries[i] = query_create(server_region, 1905 compressed_dname_offsets, compression_table_size); 1906 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 1907 iovecs[i].iov_base = buffer_begin(queries[i]->packet); 1908 iovecs[i].iov_len = buffer_remaining(queries[i]->packet);; 1909 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 1910 msgs[i].msg_hdr.msg_iovlen = 1; 1911 msgs[i].msg_hdr.msg_name = &queries[i]->addr; 1912 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 1913 } 1914 #endif 1915 for (i = 0; i < nsd->ifs; ++i) { 1916 struct udp_handler_data *data; 1917 struct event *handler; 1918 1919 data = (struct udp_handler_data *) region_alloc( 1920 server_region, 1921 sizeof(struct udp_handler_data)); 1922 data->query = udp_query; 1923 data->nsd = nsd; 1924 data->socket = &nsd->udp[i]; 1925 1926 handler = (struct event*) region_alloc( 1927 server_region, sizeof(*handler)); 1928 event_set(handler, nsd->udp[i].s, EV_PERSIST|EV_READ, 1929 handle_udp, data); 1930 if(event_base_set(event_base, handler) != 0) 1931 log_msg(LOG_ERR, "nsd udp: event_base_set failed"); 1932 if(event_add(handler, NULL) != 0) 1933 log_msg(LOG_ERR, "nsd udp: event_add failed"); 1934 } 1935 } 1936 1937 /* 1938 * Keep track of all the TCP accept handlers so we can enable 1939 * and disable them based on the current number of active TCP 1940 * connections. 1941 */ 1942 tcp_accept_handler_count = nsd->ifs; 1943 tcp_accept_handlers = (struct tcp_accept_handler_data*) 1944 region_alloc_array(server_region, 1945 nsd->ifs, sizeof(*tcp_accept_handlers)); 1946 if (nsd->server_kind & NSD_SERVER_TCP) { 1947 for (i = 0; i < nsd->ifs; ++i) { 1948 struct event *handler = &tcp_accept_handlers[i].event; 1949 struct tcp_accept_handler_data* data = 1950 &tcp_accept_handlers[i]; 1951 data->nsd = nsd; 1952 data->socket = &nsd->tcp[i]; 1953 event_set(handler, nsd->tcp[i].s, EV_PERSIST|EV_READ, 1954 handle_tcp_accept, data); 1955 if(event_base_set(event_base, handler) != 0) 1956 log_msg(LOG_ERR, "nsd tcp: event_base_set failed"); 1957 if(event_add(handler, NULL) != 0) 1958 log_msg(LOG_ERR, "nsd tcp: event_add failed"); 1959 data->event_added = 1; 1960 } 1961 } else tcp_accept_handler_count = 0; 1962 1963 /* The main loop... */ 1964 while ((mode = nsd->mode) != NSD_QUIT) { 1965 if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd); 1966 1967 /* Do we need to do the statistics... */ 1968 if (mode == NSD_STATS) { 1969 #ifdef BIND8_STATS 1970 int p = nsd->st.period; 1971 nsd->st.period = 1; /* force stats printout */ 1972 /* Dump the statistics */ 1973 bind8_stats(nsd); 1974 nsd->st.period = p; 1975 #else /* !BIND8_STATS */ 1976 log_msg(LOG_NOTICE, "Statistics support not enabled at compile time."); 1977 #endif /* BIND8_STATS */ 1978 1979 nsd->mode = NSD_RUN; 1980 } 1981 else if (mode == NSD_REAP_CHILDREN) { 1982 /* got signal, notify parent. parent reaps terminated children. */ 1983 if (nsd->this_child->parent_fd != -1) { 1984 sig_atomic_t parent_notify = NSD_REAP_CHILDREN; 1985 if (write(nsd->this_child->parent_fd, 1986 &parent_notify, 1987 sizeof(parent_notify)) == -1) 1988 { 1989 log_msg(LOG_ERR, "problems sending command from %d to parent: %s", 1990 (int) nsd->this_child->pid, strerror(errno)); 1991 } 1992 } else /* no parent, so reap 'em */ 1993 while (waitpid(-1, NULL, WNOHANG) > 0) ; 1994 nsd->mode = NSD_RUN; 1995 } 1996 else if(mode == NSD_RUN) { 1997 /* Wait for a query... */ 1998 if(event_base_loop(event_base, EVLOOP_ONCE) == -1) { 1999 if (errno != EINTR) { 2000 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno)); 2001 break; 2002 } 2003 } 2004 } else if(mode == NSD_QUIT) { 2005 /* ignore here, quit */ 2006 } else { 2007 log_msg(LOG_ERR, "mode bad value %d, back to service.", 2008 (int)mode); 2009 nsd->mode = NSD_RUN; 2010 } 2011 } 2012 2013 #ifdef BIND8_STATS 2014 bind8_stats(nsd); 2015 #endif /* BIND8_STATS */ 2016 2017 #if 0 /* OS collects memory pages */ 2018 event_base_free(event_base); 2019 region_destroy(server_region); 2020 #endif 2021 server_shutdown(nsd); 2022 } 2023 2024 #if defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) 2025 static void 2026 handle_udp(int fd, short event, void* arg) 2027 { 2028 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2029 int received, sent, recvcount, i; 2030 struct query *q; 2031 2032 if (!(event & EV_READ)) { 2033 return; 2034 } 2035 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2036 /* this printf strangely gave a performance increase on Linux */ 2037 /* printf("recvcount %d \n", recvcount); */ 2038 if (recvcount == -1) { 2039 if (errno != EAGAIN && errno != EINTR) { 2040 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2041 STATUP(data->nsd, rxerr); 2042 /* No zone statup */ 2043 } 2044 /* Simply no data available */ 2045 return; 2046 } 2047 for (i = 0; i < recvcount; i++) { 2048 loopstart: 2049 received = msgs[i].msg_len; 2050 q = queries[i]; 2051 if (received == -1) { 2052 log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror( 2053 msgs[i].msg_hdr.msg_flags)); 2054 STATUP(data->nsd, rxerr); 2055 /* No zone statup */ 2056 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2057 iovecs[i].iov_len = buffer_remaining(q->packet); 2058 goto swap_drop; 2059 } 2060 2061 /* Account... */ 2062 #ifdef BIND8_STATS 2063 if (data->socket->addr->ai_family == AF_INET) { 2064 STATUP(data->nsd, qudp); 2065 } else if (data->socket->addr->ai_family == AF_INET6) { 2066 STATUP(data->nsd, qudp6); 2067 } 2068 #endif 2069 2070 buffer_skip(q->packet, received); 2071 buffer_flip(q->packet); 2072 2073 /* Process and answer the query... */ 2074 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2075 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2076 STATUP(data->nsd, nona); 2077 ZTATUP(data->nsd, q->zone, nona); 2078 } 2079 2080 #ifdef USE_ZONE_STATS 2081 if (data->socket->addr->ai_family == AF_INET) { 2082 ZTATUP(data->nsd, q->zone, qudp); 2083 } else if (data->socket->addr->ai_family == AF_INET6) { 2084 ZTATUP(data->nsd, q->zone, qudp6); 2085 } 2086 #endif 2087 2088 /* Add EDNS0 and TSIG info if necessary. */ 2089 query_add_optional(q, data->nsd); 2090 2091 buffer_flip(q->packet); 2092 iovecs[i].iov_len = buffer_remaining(q->packet); 2093 #ifdef BIND8_STATS 2094 /* Account the rcode & TC... */ 2095 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2096 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2097 if (TC(q->packet)) { 2098 STATUP(data->nsd, truncated); 2099 ZTATUP(data->nsd, q->zone, truncated); 2100 } 2101 #endif /* BIND8_STATS */ 2102 } else { 2103 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2104 iovecs[i].iov_len = buffer_remaining(q->packet); 2105 swap_drop: 2106 STATUP(data->nsd, dropped); 2107 ZTATUP(data->nsd, q->zone, dropped); 2108 if(i != recvcount-1) { 2109 /* swap with last and decrease recvcount */ 2110 struct mmsghdr mtmp = msgs[i]; 2111 struct iovec iotmp = iovecs[i]; 2112 recvcount--; 2113 msgs[i] = msgs[recvcount]; 2114 iovecs[i] = iovecs[recvcount]; 2115 queries[i] = queries[recvcount]; 2116 msgs[recvcount] = mtmp; 2117 iovecs[recvcount] = iotmp; 2118 queries[recvcount] = q; 2119 msgs[i].msg_hdr.msg_iov = &iovecs[i]; 2120 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount]; 2121 goto loopstart; 2122 } else { recvcount --; } 2123 } 2124 } 2125 2126 /* send until all are sent */ 2127 i = 0; 2128 while(i<recvcount) { 2129 sent = sendmmsg(fd, &msgs[i], recvcount-i, 0); 2130 if(sent == -1) { 2131 const char* es = strerror(errno); 2132 char a[48]; 2133 addr2str(&queries[i]->addr, a, sizeof(a)); 2134 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es); 2135 #ifdef BIND8_STATS 2136 data->nsd->st.txerr += recvcount-i; 2137 #endif /* BIND8_STATS */ 2138 break; 2139 } 2140 i += sent; 2141 } 2142 for(i=0; i<recvcount; i++) { 2143 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2144 iovecs[i].iov_len = buffer_remaining(queries[i]->packet); 2145 } 2146 } 2147 2148 #else /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2149 2150 static void 2151 handle_udp(int fd, short event, void* arg) 2152 { 2153 struct udp_handler_data *data = (struct udp_handler_data *) arg; 2154 int received, sent; 2155 #ifndef NONBLOCKING_IS_BROKEN 2156 #ifdef HAVE_RECVMMSG 2157 int recvcount; 2158 #endif /* HAVE_RECVMMSG */ 2159 int i; 2160 #endif /* NONBLOCKING_IS_BROKEN */ 2161 struct query *q; 2162 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2163 q = data->query; 2164 #endif 2165 2166 if (!(event & EV_READ)) { 2167 return; 2168 } 2169 #ifndef NONBLOCKING_IS_BROKEN 2170 #ifdef HAVE_RECVMMSG 2171 recvcount = recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL); 2172 /* this printf strangely gave a performance increase on Linux */ 2173 /* printf("recvcount %d \n", recvcount); */ 2174 if (recvcount == -1) { 2175 if (errno != EAGAIN && errno != EINTR) { 2176 log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno)); 2177 STATUP(data->nsd, rxerr); 2178 /* No zone statup */ 2179 } 2180 /* Simply no data available */ 2181 return; 2182 } 2183 for (i = 0; i < recvcount; i++) { 2184 received = msgs[i].msg_len; 2185 msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen; 2186 if (received == -1) { 2187 log_msg(LOG_ERR, "recvmmsg failed"); 2188 STATUP(data->nsd, rxerr); 2189 /* No zone statup */ 2190 /* the error can be found in msgs[i].msg_hdr.msg_flags */ 2191 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2192 continue; 2193 } 2194 q = queries[i]; 2195 #else 2196 for(i=0; i<NUM_RECV_PER_SELECT; i++) { 2197 #endif /* HAVE_RECVMMSG */ 2198 #endif /* NONBLOCKING_IS_BROKEN */ 2199 2200 #if (defined(NONBLOCKING_IS_BROKEN) || !defined(HAVE_RECVMMSG)) 2201 /* Initialize the query... */ 2202 query_reset(q, UDP_MAX_MESSAGE_LEN, 0); 2203 2204 received = recvfrom(fd, 2205 buffer_begin(q->packet), 2206 buffer_remaining(q->packet), 2207 0, 2208 (struct sockaddr *)&q->addr, 2209 &q->addrlen); 2210 if (received == -1) { 2211 if (errno != EAGAIN && errno != EINTR) { 2212 log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); 2213 STATUP(data->nsd, rxerr); 2214 /* No zone statup */ 2215 } 2216 return; 2217 } 2218 #endif /* NONBLOCKING_IS_BROKEN || !HAVE_RECVMMSG */ 2219 2220 /* Account... */ 2221 if (data->socket->addr->ai_family == AF_INET) { 2222 STATUP(data->nsd, qudp); 2223 } else if (data->socket->addr->ai_family == AF_INET6) { 2224 STATUP(data->nsd, qudp6); 2225 } 2226 2227 buffer_skip(q->packet, received); 2228 buffer_flip(q->packet); 2229 2230 /* Process and answer the query... */ 2231 if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) { 2232 if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) { 2233 STATUP(data->nsd, nona); 2234 ZTATUP(data->nsd, q->zone, nona); 2235 } 2236 2237 #ifdef USE_ZONE_STATS 2238 if (data->socket->addr->ai_family == AF_INET) { 2239 ZTATUP(data->nsd, q->zone, qudp); 2240 } else if (data->socket->addr->ai_family == AF_INET6) { 2241 ZTATUP(data->nsd, q->zone, qudp6); 2242 } 2243 #endif 2244 2245 /* Add EDNS0 and TSIG info if necessary. */ 2246 query_add_optional(q, data->nsd); 2247 2248 buffer_flip(q->packet); 2249 2250 sent = sendto(fd, 2251 buffer_begin(q->packet), 2252 buffer_remaining(q->packet), 2253 0, 2254 (struct sockaddr *) &q->addr, 2255 q->addrlen); 2256 if (sent == -1) { 2257 const char* es = strerror(errno); 2258 char a[48]; 2259 addr2str(&q->addr, a, sizeof(a)); 2260 log_msg(LOG_ERR, "sendto %s failed: %s", a, es); 2261 STATUP(data->nsd, txerr); 2262 ZTATUP(data->nsd, q->zone, txerr); 2263 } else if ((size_t) sent != buffer_remaining(q->packet)) { 2264 log_msg(LOG_ERR, "sent %d in place of %d bytes", sent, (int) buffer_remaining(q->packet)); 2265 } else { 2266 #ifdef BIND8_STATS 2267 /* Account the rcode & TC... */ 2268 STATUP2(data->nsd, rcode, RCODE(q->packet)); 2269 ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet)); 2270 if (TC(q->packet)) { 2271 STATUP(data->nsd, truncated); 2272 ZTATUP(data->nsd, q->zone, truncated); 2273 } 2274 #endif /* BIND8_STATS */ 2275 } 2276 } else { 2277 STATUP(data->nsd, dropped); 2278 ZTATUP(data->nsd, q->zone, dropped); 2279 } 2280 #ifndef NONBLOCKING_IS_BROKEN 2281 #ifdef HAVE_RECVMMSG 2282 query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0); 2283 #endif 2284 } 2285 #endif 2286 } 2287 #endif /* defined(HAVE_SENDMMSG) && !defined(NONBLOCKING_IS_BROKEN) && defined(HAVE_RECVMMSG) */ 2288 2289 2290 static void 2291 cleanup_tcp_handler(struct tcp_handler_data* data) 2292 { 2293 event_del(&data->event); 2294 close(data->event.ev_fd); 2295 2296 /* 2297 * Enable the TCP accept handlers when the current number of 2298 * TCP connections is about to drop below the maximum number 2299 * of TCP connections. 2300 */ 2301 if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 2302 configure_handler_event_types(EV_READ|EV_PERSIST); 2303 slowaccept = 0; 2304 } 2305 --data->nsd->current_tcp_count; 2306 assert(data->nsd->current_tcp_count >= 0); 2307 2308 region_destroy(data->region); 2309 } 2310 2311 static void 2312 handle_tcp_reading(int fd, short event, void* arg) 2313 { 2314 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2315 ssize_t received; 2316 struct event_base* ev_base; 2317 struct timeval timeout; 2318 2319 if ((event & EV_TIMEOUT)) { 2320 /* Connection timed out. */ 2321 cleanup_tcp_handler(data); 2322 return; 2323 } 2324 2325 if (data->nsd->tcp_query_count > 0 && 2326 data->query_count >= data->nsd->tcp_query_count) { 2327 /* No more queries allowed on this tcp connection. */ 2328 cleanup_tcp_handler(data); 2329 return; 2330 } 2331 2332 assert((event & EV_READ)); 2333 2334 if (data->bytes_transmitted == 0) { 2335 query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1); 2336 } 2337 2338 /* 2339 * Check if we received the leading packet length bytes yet. 2340 */ 2341 if (data->bytes_transmitted < sizeof(uint16_t)) { 2342 received = read(fd, 2343 (char *) &data->query->tcplen 2344 + data->bytes_transmitted, 2345 sizeof(uint16_t) - data->bytes_transmitted); 2346 if (received == -1) { 2347 if (errno == EAGAIN || errno == EINTR) { 2348 /* 2349 * Read would block, wait until more 2350 * data is available. 2351 */ 2352 return; 2353 } else { 2354 char buf[48]; 2355 addr2str(&data->query->addr, buf, sizeof(buf)); 2356 #ifdef ECONNRESET 2357 if (verbosity >= 2 || errno != ECONNRESET) 2358 #endif /* ECONNRESET */ 2359 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2360 cleanup_tcp_handler(data); 2361 return; 2362 } 2363 } else if (received == 0) { 2364 /* EOF */ 2365 cleanup_tcp_handler(data); 2366 return; 2367 } 2368 2369 data->bytes_transmitted += received; 2370 if (data->bytes_transmitted < sizeof(uint16_t)) { 2371 /* 2372 * Not done with the tcplen yet, wait for more 2373 * data to become available. 2374 */ 2375 return; 2376 } 2377 2378 assert(data->bytes_transmitted == sizeof(uint16_t)); 2379 2380 data->query->tcplen = ntohs(data->query->tcplen); 2381 2382 /* 2383 * Minimum query size is: 2384 * 2385 * Size of the header (12) 2386 * + Root domain name (1) 2387 * + Query class (2) 2388 * + Query type (2) 2389 */ 2390 if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) { 2391 VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection")); 2392 cleanup_tcp_handler(data); 2393 return; 2394 } 2395 2396 if (data->query->tcplen > data->query->maxlen) { 2397 VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection")); 2398 cleanup_tcp_handler(data); 2399 return; 2400 } 2401 2402 buffer_set_limit(data->query->packet, data->query->tcplen); 2403 } 2404 2405 assert(buffer_remaining(data->query->packet) > 0); 2406 2407 /* Read the (remaining) query data. */ 2408 received = read(fd, 2409 buffer_current(data->query->packet), 2410 buffer_remaining(data->query->packet)); 2411 if (received == -1) { 2412 if (errno == EAGAIN || errno == EINTR) { 2413 /* 2414 * Read would block, wait until more data is 2415 * available. 2416 */ 2417 return; 2418 } else { 2419 char buf[48]; 2420 addr2str(&data->query->addr, buf, sizeof(buf)); 2421 #ifdef ECONNRESET 2422 if (verbosity >= 2 || errno != ECONNRESET) 2423 #endif /* ECONNRESET */ 2424 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno)); 2425 cleanup_tcp_handler(data); 2426 return; 2427 } 2428 } else if (received == 0) { 2429 /* EOF */ 2430 cleanup_tcp_handler(data); 2431 return; 2432 } 2433 2434 data->bytes_transmitted += received; 2435 buffer_skip(data->query->packet, received); 2436 if (buffer_remaining(data->query->packet) > 0) { 2437 /* 2438 * Message not yet complete, wait for more data to 2439 * become available. 2440 */ 2441 return; 2442 } 2443 2444 assert(buffer_position(data->query->packet) == data->query->tcplen); 2445 2446 /* Account... */ 2447 #ifdef BIND8_STATS 2448 #ifndef INET6 2449 STATUP(data->nsd, ctcp); 2450 #else 2451 if (data->query->addr.ss_family == AF_INET) { 2452 STATUP(data->nsd, ctcp); 2453 } else if (data->query->addr.ss_family == AF_INET6) { 2454 STATUP(data->nsd, ctcp6); 2455 } 2456 #endif 2457 #endif /* BIND8_STATS */ 2458 2459 /* We have a complete query, process it. */ 2460 2461 /* tcp-query-count: handle query counter ++ */ 2462 data->query_count++; 2463 2464 buffer_flip(data->query->packet); 2465 data->query_state = server_process_query(data->nsd, data->query); 2466 if (data->query_state == QUERY_DISCARDED) { 2467 /* Drop the packet and the entire connection... */ 2468 STATUP(data->nsd, dropped); 2469 ZTATUP(data->nsd, data->query->zone, dropped); 2470 cleanup_tcp_handler(data); 2471 return; 2472 } 2473 2474 #ifdef BIND8_STATS 2475 if (RCODE(data->query->packet) == RCODE_OK 2476 && !AA(data->query->packet)) 2477 { 2478 STATUP(data->nsd, nona); 2479 ZTATUP(data->nsd, data->query->zone, nona); 2480 } 2481 #endif /* BIND8_STATS */ 2482 2483 #ifdef USE_ZONE_STATS 2484 #ifndef INET6 2485 ZTATUP(data->nsd, data->query->zone, ctcp); 2486 #else 2487 if (data->query->addr.ss_family == AF_INET) { 2488 ZTATUP(data->nsd, data->query->zone, ctcp); 2489 } else if (data->query->addr.ss_family == AF_INET6) { 2490 ZTATUP(data->nsd, data->query->zone, ctcp6); 2491 } 2492 #endif 2493 #endif /* USE_ZONE_STATS */ 2494 2495 query_add_optional(data->query, data->nsd); 2496 2497 /* Switch to the tcp write handler. */ 2498 buffer_flip(data->query->packet); 2499 data->query->tcplen = buffer_remaining(data->query->packet); 2500 data->bytes_transmitted = 0; 2501 2502 timeout.tv_sec = data->nsd->tcp_timeout; 2503 timeout.tv_usec = 0L; 2504 2505 ev_base = data->event.ev_base; 2506 event_del(&data->event); 2507 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2508 handle_tcp_writing, data); 2509 if(event_base_set(ev_base, &data->event) != 0) 2510 log_msg(LOG_ERR, "event base set tcpr failed"); 2511 if(event_add(&data->event, &timeout) != 0) 2512 log_msg(LOG_ERR, "event add tcpr failed"); 2513 /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/ 2514 handle_tcp_writing(fd, EV_WRITE, data); 2515 } 2516 2517 static void 2518 handle_tcp_writing(int fd, short event, void* arg) 2519 { 2520 struct tcp_handler_data *data = (struct tcp_handler_data *) arg; 2521 ssize_t sent; 2522 struct query *q = data->query; 2523 struct timeval timeout; 2524 struct event_base* ev_base; 2525 2526 if ((event & EV_TIMEOUT)) { 2527 /* Connection timed out. */ 2528 cleanup_tcp_handler(data); 2529 return; 2530 } 2531 2532 assert((event & EV_WRITE)); 2533 2534 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2535 /* Writing the response packet length. */ 2536 uint16_t n_tcplen = htons(q->tcplen); 2537 #ifdef HAVE_WRITEV 2538 struct iovec iov[2]; 2539 iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted; 2540 iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 2541 iov[1].iov_base = buffer_begin(q->packet); 2542 iov[1].iov_len = buffer_limit(q->packet); 2543 sent = writev(fd, iov, 2); 2544 #else /* HAVE_WRITEV */ 2545 sent = write(fd, 2546 (const char *) &n_tcplen + data->bytes_transmitted, 2547 sizeof(n_tcplen) - data->bytes_transmitted); 2548 #endif /* HAVE_WRITEV */ 2549 if (sent == -1) { 2550 if (errno == EAGAIN || errno == EINTR) { 2551 /* 2552 * Write would block, wait until 2553 * socket becomes writable again. 2554 */ 2555 return; 2556 } else { 2557 #ifdef ECONNRESET 2558 if(verbosity >= 2 || errno != ECONNRESET) 2559 #endif /* ECONNRESET */ 2560 #ifdef EPIPE 2561 if(verbosity >= 2 || errno != EPIPE) 2562 #endif /* EPIPE 'broken pipe' */ 2563 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2564 cleanup_tcp_handler(data); 2565 return; 2566 } 2567 } 2568 2569 data->bytes_transmitted += sent; 2570 if (data->bytes_transmitted < sizeof(q->tcplen)) { 2571 /* 2572 * Writing not complete, wait until socket 2573 * becomes writable again. 2574 */ 2575 return; 2576 } 2577 2578 #ifdef HAVE_WRITEV 2579 sent -= sizeof(n_tcplen); 2580 /* handle potential 'packet done' code */ 2581 goto packet_could_be_done; 2582 #endif 2583 } 2584 2585 sent = write(fd, 2586 buffer_current(q->packet), 2587 buffer_remaining(q->packet)); 2588 if (sent == -1) { 2589 if (errno == EAGAIN || errno == EINTR) { 2590 /* 2591 * Write would block, wait until 2592 * socket becomes writable again. 2593 */ 2594 return; 2595 } else { 2596 #ifdef ECONNRESET 2597 if(verbosity >= 2 || errno != ECONNRESET) 2598 #endif /* ECONNRESET */ 2599 #ifdef EPIPE 2600 if(verbosity >= 2 || errno != EPIPE) 2601 #endif /* EPIPE 'broken pipe' */ 2602 log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno)); 2603 cleanup_tcp_handler(data); 2604 return; 2605 } 2606 } 2607 2608 data->bytes_transmitted += sent; 2609 #ifdef HAVE_WRITEV 2610 packet_could_be_done: 2611 #endif 2612 buffer_skip(q->packet, sent); 2613 if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) { 2614 /* 2615 * Still more data to write when socket becomes 2616 * writable again. 2617 */ 2618 return; 2619 } 2620 2621 assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen)); 2622 2623 if (data->query_state == QUERY_IN_AXFR) { 2624 /* Continue processing AXFR and writing back results. */ 2625 buffer_clear(q->packet); 2626 data->query_state = query_axfr(data->nsd, q); 2627 if (data->query_state != QUERY_PROCESSED) { 2628 query_add_optional(data->query, data->nsd); 2629 2630 /* Reset data. */ 2631 buffer_flip(q->packet); 2632 q->tcplen = buffer_remaining(q->packet); 2633 data->bytes_transmitted = 0; 2634 /* Reset timeout. */ 2635 timeout.tv_sec = data->nsd->tcp_timeout; 2636 timeout.tv_usec = 0L; 2637 ev_base = data->event.ev_base; 2638 event_del(&data->event); 2639 event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT, 2640 handle_tcp_writing, data); 2641 if(event_base_set(ev_base, &data->event) != 0) 2642 log_msg(LOG_ERR, "event base set tcpw failed"); 2643 if(event_add(&data->event, &timeout) != 0) 2644 log_msg(LOG_ERR, "event add tcpw failed"); 2645 2646 /* 2647 * Write data if/when the socket is writable 2648 * again. 2649 */ 2650 return; 2651 } 2652 } 2653 2654 /* 2655 * Done sending, wait for the next request to arrive on the 2656 * TCP socket by installing the TCP read handler. 2657 */ 2658 if (data->nsd->tcp_query_count > 0 && 2659 data->query_count >= data->nsd->tcp_query_count) { 2660 2661 (void) shutdown(fd, SHUT_WR); 2662 } 2663 2664 data->bytes_transmitted = 0; 2665 2666 timeout.tv_sec = data->nsd->tcp_timeout; 2667 timeout.tv_usec = 0L; 2668 ev_base = data->event.ev_base; 2669 event_del(&data->event); 2670 event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT, 2671 handle_tcp_reading, data); 2672 if(event_base_set(ev_base, &data->event) != 0) 2673 log_msg(LOG_ERR, "event base set tcpw failed"); 2674 if(event_add(&data->event, &timeout) != 0) 2675 log_msg(LOG_ERR, "event add tcpw failed"); 2676 } 2677 2678 2679 static void 2680 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), 2681 void* ATTR_UNUSED(arg)) 2682 { 2683 if(slowaccept) { 2684 configure_handler_event_types(EV_PERSIST | EV_READ); 2685 slowaccept = 0; 2686 } 2687 } 2688 2689 /* 2690 * Handle an incoming TCP connection. The connection is accepted and 2691 * a new TCP reader event handler is added. The TCP handler 2692 * is responsible for cleanup when the connection is closed. 2693 */ 2694 static void 2695 handle_tcp_accept(int fd, short event, void* arg) 2696 { 2697 struct tcp_accept_handler_data *data 2698 = (struct tcp_accept_handler_data *) arg; 2699 int s; 2700 struct tcp_handler_data *tcp_data; 2701 region_type *tcp_region; 2702 #ifdef INET6 2703 struct sockaddr_storage addr; 2704 #else 2705 struct sockaddr_in addr; 2706 #endif 2707 socklen_t addrlen; 2708 struct timeval timeout; 2709 2710 if (!(event & EV_READ)) { 2711 return; 2712 } 2713 2714 if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) { 2715 return; 2716 } 2717 2718 /* Accept it... */ 2719 addrlen = sizeof(addr); 2720 s = accept(fd, (struct sockaddr *) &addr, &addrlen); 2721 if (s == -1) { 2722 /** 2723 * EMFILE and ENFILE is a signal that the limit of open 2724 * file descriptors has been reached. Pause accept(). 2725 * EINTR is a signal interrupt. The others are various OS ways 2726 * of saying that the client has closed the connection. 2727 */ 2728 if (errno == EMFILE || errno == ENFILE) { 2729 if (!slowaccept) { 2730 /* disable accept events */ 2731 struct timeval tv; 2732 configure_handler_event_types(0); 2733 tv.tv_sec = SLOW_ACCEPT_TIMEOUT; 2734 tv.tv_usec = 0L; 2735 event_set(&slowaccept_event, -1, EV_TIMEOUT, 2736 handle_slowaccept_timeout, NULL); 2737 (void)event_base_set(data->event.ev_base, 2738 &slowaccept_event); 2739 (void)event_add(&slowaccept_event, &tv); 2740 slowaccept = 1; 2741 /* We don't want to spam the logs here */ 2742 } 2743 } else if (errno != EINTR 2744 && errno != EWOULDBLOCK 2745 #ifdef ECONNABORTED 2746 && errno != ECONNABORTED 2747 #endif /* ECONNABORTED */ 2748 #ifdef EPROTO 2749 && errno != EPROTO 2750 #endif /* EPROTO */ 2751 ) { 2752 log_msg(LOG_ERR, "accept failed: %s", strerror(errno)); 2753 } 2754 return; 2755 } 2756 2757 if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) { 2758 log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); 2759 close(s); 2760 return; 2761 } 2762 2763 /* 2764 * This region is deallocated when the TCP connection is 2765 * closed by the TCP handler. 2766 */ 2767 tcp_region = region_create(xalloc, free); 2768 tcp_data = (struct tcp_handler_data *) region_alloc( 2769 tcp_region, sizeof(struct tcp_handler_data)); 2770 tcp_data->region = tcp_region; 2771 tcp_data->query = query_create(tcp_region, compressed_dname_offsets, 2772 compression_table_size); 2773 tcp_data->nsd = data->nsd; 2774 tcp_data->query_count = 0; 2775 2776 tcp_data->query_state = QUERY_PROCESSED; 2777 tcp_data->bytes_transmitted = 0; 2778 memcpy(&tcp_data->query->addr, &addr, addrlen); 2779 tcp_data->query->addrlen = addrlen; 2780 2781 timeout.tv_sec = data->nsd->tcp_timeout; 2782 timeout.tv_usec = 0; 2783 2784 event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT, 2785 handle_tcp_reading, tcp_data); 2786 if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) { 2787 log_msg(LOG_ERR, "cannot set tcp event base"); 2788 close(s); 2789 region_destroy(tcp_region); 2790 return; 2791 } 2792 if(event_add(&tcp_data->event, &timeout) != 0) { 2793 log_msg(LOG_ERR, "cannot add tcp to event base"); 2794 close(s); 2795 region_destroy(tcp_region); 2796 return; 2797 } 2798 2799 /* 2800 * Keep track of the total number of TCP handlers installed so 2801 * we can stop accepting connections when the maximum number 2802 * of simultaneous TCP connections is reached. 2803 */ 2804 ++data->nsd->current_tcp_count; 2805 if (data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) { 2806 configure_handler_event_types(0); 2807 } 2808 } 2809 2810 static void 2811 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout) 2812 { 2813 size_t i; 2814 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 2815 for (i = 0; i < nsd->child_count; ++i) { 2816 if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) { 2817 if (write(nsd->children[i].child_fd, 2818 &command, 2819 sizeof(command)) == -1) 2820 { 2821 if(errno != EAGAIN && errno != EINTR) 2822 log_msg(LOG_ERR, "problems sending command %d to server %d: %s", 2823 (int) command, 2824 (int) nsd->children[i].pid, 2825 strerror(errno)); 2826 } else if (timeout > 0) { 2827 (void)block_read(NULL, 2828 nsd->children[i].child_fd, 2829 &command, sizeof(command), timeout); 2830 } 2831 fsync(nsd->children[i].child_fd); 2832 close(nsd->children[i].child_fd); 2833 nsd->children[i].child_fd = -1; 2834 } 2835 } 2836 } 2837 2838 static void 2839 send_children_quit(struct nsd* nsd) 2840 { 2841 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit")); 2842 send_children_command(nsd, NSD_QUIT, 0); 2843 } 2844 2845 static void 2846 send_children_quit_and_wait(struct nsd* nsd) 2847 { 2848 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait")); 2849 send_children_command(nsd, NSD_QUIT_CHILD, 3); 2850 } 2851 2852 #ifdef BIND8_STATS 2853 static void 2854 set_children_stats(struct nsd* nsd) 2855 { 2856 size_t i; 2857 assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0); 2858 DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children")); 2859 for (i = 0; i < nsd->child_count; ++i) { 2860 nsd->children[i].need_to_send_STATS = 1; 2861 nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE; 2862 } 2863 } 2864 #endif /* BIND8_STATS */ 2865 2866 static void 2867 configure_handler_event_types(short event_types) 2868 { 2869 size_t i; 2870 2871 for (i = 0; i < tcp_accept_handler_count; ++i) { 2872 struct event* handler = &tcp_accept_handlers[i].event; 2873 if(event_types) { 2874 /* reassign */ 2875 int fd = handler->ev_fd; 2876 struct event_base* base = handler->ev_base; 2877 if(tcp_accept_handlers[i].event_added) 2878 event_del(handler); 2879 event_set(handler, fd, event_types, 2880 handle_tcp_accept, &tcp_accept_handlers[i]); 2881 if(event_base_set(base, handler) != 0) 2882 log_msg(LOG_ERR, "conhand: cannot event_base"); 2883 if(event_add(handler, NULL) != 0) 2884 log_msg(LOG_ERR, "conhand: cannot event_add"); 2885 tcp_accept_handlers[i].event_added = 1; 2886 } else { 2887 /* remove */ 2888 if(tcp_accept_handlers[i].event_added) { 2889 event_del(handler); 2890 tcp_accept_handlers[i].event_added = 0; 2891 } 2892 } 2893 } 2894 } 2895