1 /* $NetBSD: dispatch.c,v 1.6 2020/08/03 17:23:41 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * This Source Code Form is subject to the terms of the Mozilla Public 7 * License, v. 2.0. If a copy of the MPL was not distributed with this 8 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * See the COPYRIGHT file distributed with this work for additional 11 * information regarding copyright ownership. 12 */ 13 14 /*! \file */ 15 16 #include <inttypes.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <sys/types.h> 20 #include <unistd.h> 21 22 #include <isc/mem.h> 23 #include <isc/mutex.h> 24 #include <isc/portset.h> 25 #include <isc/print.h> 26 #include <isc/random.h> 27 #include <isc/socket.h> 28 #include <isc/stats.h> 29 #include <isc/string.h> 30 #include <isc/task.h> 31 #include <isc/time.h> 32 #include <isc/util.h> 33 34 #include <dns/acl.h> 35 #include <dns/dispatch.h> 36 #include <dns/events.h> 37 #include <dns/log.h> 38 #include <dns/message.h> 39 #include <dns/portlist.h> 40 #include <dns/stats.h> 41 #include <dns/tcpmsg.h> 42 #include <dns/types.h> 43 44 typedef ISC_LIST(dns_dispentry_t) dns_displist_t; 45 46 typedef struct dispsocket dispsocket_t; 47 typedef ISC_LIST(dispsocket_t) dispsocketlist_t; 48 49 typedef struct dispportentry dispportentry_t; 50 typedef ISC_LIST(dispportentry_t) dispportlist_t; 51 52 typedef struct dns_qid { 53 unsigned int magic; 54 unsigned int qid_nbuckets; /*%< hash table size */ 55 unsigned int qid_increment; /*%< id increment on collision */ 56 isc_mutex_t lock; 57 dns_displist_t *qid_table; /*%< the table itself */ 58 dispsocketlist_t *sock_table; /*%< socket table */ 59 } dns_qid_t; 60 61 struct dns_dispatchmgr { 62 /* Unlocked. */ 63 unsigned int magic; 64 isc_mem_t *mctx; 65 dns_acl_t *blackhole; 66 dns_portlist_t *portlist; 67 isc_stats_t *stats; 68 69 /* Locked by "lock". */ 70 isc_mutex_t lock; 71 unsigned int state; 72 ISC_LIST(dns_dispatch_t) list; 73 74 /* locked by buffer_lock */ 75 dns_qid_t *qid; 76 isc_mutex_t buffer_lock; 77 unsigned int buffers; /*%< allocated buffers */ 78 unsigned int buffersize; /*%< size of each buffer */ 79 unsigned int maxbuffers; /*%< max buffers */ 80 81 /* Locked internally. */ 82 isc_mutex_t depool_lock; 83 isc_mempool_t *depool; /*%< pool for dispatch events */ 84 isc_mutex_t rpool_lock; 85 isc_mempool_t *rpool; /*%< pool for replies */ 86 isc_mutex_t dpool_lock; 87 isc_mempool_t *dpool; /*%< dispatch allocations */ 88 isc_mutex_t bpool_lock; 89 isc_mempool_t *bpool; /*%< pool for buffers */ 90 isc_mutex_t spool_lock; 91 isc_mempool_t *spool; /*%< pool for dispsocks */ 92 93 /*% 94 * Locked by qid->lock if qid exists; otherwise, can be used without 95 * being locked. 96 * Memory footprint considerations: this is a simple implementation of 97 * available ports, i.e., an ordered array of the actual port numbers. 98 * This will require about 256KB of memory in the worst case (128KB for 99 * each of IPv4 and IPv6). We could reduce it by representing it as a 100 * more sophisticated way such as a list (or array) of ranges that are 101 * searched to identify a specific port. Our decision here is the saved 102 * memory isn't worth the implementation complexity, considering the 103 * fact that the whole BIND9 process (which is mainly named) already 104 * requires a pretty large memory footprint. We may, however, have to 105 * revisit the decision when we want to use it as a separate module for 106 * an environment where memory requirement is severer. 107 */ 108 in_port_t *v4ports; /*%< available ports for IPv4 */ 109 unsigned int nv4ports; /*%< # of available ports for IPv4 */ 110 in_port_t *v6ports; /*%< available ports for IPv4 */ 111 unsigned int nv6ports; /*%< # of available ports for IPv4 */ 112 }; 113 114 #define MGR_SHUTTINGDOWN 0x00000001U 115 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0) 116 117 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0) 118 119 struct dns_dispentry { 120 unsigned int magic; 121 dns_dispatch_t *disp; 122 dns_messageid_t id; 123 in_port_t port; 124 unsigned int bucket; 125 isc_sockaddr_t host; 126 isc_task_t *task; 127 isc_taskaction_t action; 128 void *arg; 129 bool item_out; 130 dispsocket_t *dispsocket; 131 ISC_LIST(dns_dispatchevent_t) items; 132 ISC_LINK(dns_dispentry_t) link; 133 }; 134 135 /*% 136 * Maximum number of dispatch sockets that can be pooled for reuse. The 137 * appropriate value may vary, but experiments have shown a busy caching server 138 * may need more than 1000 sockets concurrently opened. The maximum allowable 139 * number of dispatch sockets (per manager) will be set to the double of this 140 * value. 141 */ 142 #ifndef DNS_DISPATCH_POOLSOCKS 143 #define DNS_DISPATCH_POOLSOCKS 2048 144 #endif /* ifndef DNS_DISPATCH_POOLSOCKS */ 145 146 /*% 147 * Quota to control the number of dispatch sockets. If a dispatch has more 148 * than the quota of sockets, new queries will purge oldest ones, so that 149 * a massive number of outstanding queries won't prevent subsequent queries 150 * (especially if the older ones take longer time and result in timeout). 151 */ 152 #ifndef DNS_DISPATCH_SOCKSQUOTA 153 #define DNS_DISPATCH_SOCKSQUOTA 3072 154 #endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */ 155 156 struct dispsocket { 157 unsigned int magic; 158 isc_socket_t *socket; 159 dns_dispatch_t *disp; 160 isc_sockaddr_t host; 161 in_port_t localport; /* XXX: should be removed later */ 162 dispportentry_t *portentry; 163 dns_dispentry_t *resp; 164 isc_task_t *task; 165 ISC_LINK(dispsocket_t) link; 166 unsigned int bucket; 167 ISC_LINK(dispsocket_t) blink; 168 }; 169 170 /*% 171 * A port table entry. We remember every port we first open in a table with a 172 * reference counter so that we can 'reuse' the same port (with different 173 * destination addresses) using the SO_REUSEADDR socket option. 174 */ 175 struct dispportentry { 176 in_port_t port; 177 isc_refcount_t refs; 178 ISC_LINK(struct dispportentry) link; 179 }; 180 181 #ifndef DNS_DISPATCH_PORTTABLESIZE 182 #define DNS_DISPATCH_PORTTABLESIZE 1024 183 #endif /* ifndef DNS_DISPATCH_PORTTABLESIZE */ 184 185 #define INVALID_BUCKET (0xffffdead) 186 187 /*% 188 * Number of tasks for each dispatch that use separate sockets for different 189 * transactions. This must be a power of 2 as it will divide 32 bit numbers 190 * to get an uniformly random tasks selection. See get_dispsocket(). 191 */ 192 #define MAX_INTERNAL_TASKS 64 193 194 struct dns_dispatch { 195 /* Unlocked. */ 196 unsigned int magic; /*%< magic */ 197 dns_dispatchmgr_t *mgr; /*%< dispatch manager */ 198 int ntasks; 199 /*% 200 * internal task buckets. We use multiple tasks to distribute various 201 * socket events well when using separate dispatch sockets. We use the 202 * 1st task (task[0]) for internal control events. 203 */ 204 isc_task_t *task[MAX_INTERNAL_TASKS]; 205 isc_socket_t *socket; /*%< isc socket attached to */ 206 isc_sockaddr_t local; /*%< local address */ 207 in_port_t localport; /*%< local UDP port */ 208 isc_sockaddr_t peer; /*%< peer address (TCP) */ 209 isc_dscp_t dscp; /*%< "listen-on" DSCP value */ 210 unsigned int maxrequests; /*%< max requests */ 211 isc_event_t *ctlevent; 212 213 isc_mutex_t sepool_lock; 214 isc_mempool_t *sepool; /*%< pool for socket events */ 215 216 /*% Locked by mgr->lock. */ 217 ISC_LINK(dns_dispatch_t) link; 218 219 /* Locked by "lock". */ 220 isc_mutex_t lock; /*%< locks all below */ 221 isc_sockettype_t socktype; 222 unsigned int attributes; 223 unsigned int refcount; /*%< number of users */ 224 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */ 225 unsigned int shutting_down : 1, shutdown_out : 1, connected : 1, 226 tcpmsg_valid : 1, recv_pending : 1; /*%< is a 227 * recv() 228 * pending? 229 * */ 230 isc_result_t shutdown_why; 231 ISC_LIST(dispsocket_t) activesockets; 232 ISC_LIST(dispsocket_t) inactivesockets; 233 unsigned int nsockets; 234 unsigned int requests; /*%< how many requests we have */ 235 unsigned int tcpbuffers; /*%< allocated buffers */ 236 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ 237 dns_qid_t *qid; 238 dispportlist_t *port_table; /*%< hold ports 'owned' by us */ 239 isc_mempool_t *portpool; /*%< port table entries */ 240 }; 241 242 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') 243 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) 244 245 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') 246 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) 247 248 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') 249 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) 250 251 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') 252 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) 253 254 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') 255 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) 256 257 #define DNS_QID(disp) \ 258 ((disp)->socktype == isc_sockettype_tcp) ? (disp)->qid \ 259 : (disp)->mgr->qid 260 261 /*% 262 * Locking a query port buffer is a bit tricky. We access the buffer without 263 * locking until qid is created. Technically, there is a possibility of race 264 * between the creation of qid and access to the port buffer; in practice, 265 * however, this should be safe because qid isn't created until the first 266 * dispatch is created and there should be no contending situation until then. 267 */ 268 #define PORTBUFLOCK(mgr) \ 269 if ((mgr)->qid != NULL) \ 270 LOCK(&((mgr)->qid->lock)) 271 #define PORTBUFUNLOCK(mgr) \ 272 if ((mgr)->qid != NULL) \ 273 UNLOCK((&(mgr)->qid->lock)) 274 275 /* 276 * Statics. 277 */ 278 static dns_dispentry_t * 279 entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t, 280 unsigned int); 281 static bool 282 destroy_disp_ok(dns_dispatch_t *); 283 static void 284 destroy_disp(isc_task_t *task, isc_event_t *event); 285 static void 286 destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); 287 static void 288 deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); 289 static void 290 udp_exrecv(isc_task_t *, isc_event_t *); 291 static void 292 udp_shrecv(isc_task_t *, isc_event_t *); 293 static void 294 udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); 295 static void 296 tcp_recv(isc_task_t *, isc_event_t *); 297 static isc_result_t 298 startrecv(dns_dispatch_t *, dispsocket_t *); 299 static uint32_t 300 dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); 301 static void 302 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); 303 static void * 304 allocate_udp_buffer(dns_dispatch_t *disp); 305 static inline void 306 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev); 307 static inline dns_dispatchevent_t * 308 allocate_devent(dns_dispatch_t *disp); 309 static void 310 do_cancel(dns_dispatch_t *disp); 311 static dns_dispentry_t * 312 linear_first(dns_qid_t *disp); 313 static dns_dispentry_t * 314 linear_next(dns_qid_t *disp, dns_dispentry_t *resp); 315 static void 316 dispatch_free(dns_dispatch_t **dispp); 317 static isc_result_t 318 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, 319 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, 320 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly); 321 static isc_result_t 322 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 323 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 324 unsigned int maxrequests, unsigned int attributes, 325 dns_dispatch_t **dispp, isc_socket_t *dup_socket); 326 static bool 327 destroy_mgr_ok(dns_dispatchmgr_t *mgr); 328 static void 329 destroy_mgr(dns_dispatchmgr_t **mgrp); 330 static isc_result_t 331 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 332 unsigned int increment, dns_qid_t **qidp, bool needaddrtable); 333 static void 334 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); 335 static isc_result_t 336 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, 337 unsigned int options, isc_socket_t **sockp, 338 isc_socket_t *dup_socket, bool duponly); 339 static bool 340 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 341 isc_sockaddr_t *sockaddrp); 342 343 #define LVL(x) ISC_LOG_DEBUG(x) 344 345 static void 346 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) 347 ISC_FORMAT_PRINTF(3, 4); 348 349 static void 350 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { 351 char msgbuf[2048]; 352 va_list ap; 353 354 if (!isc_log_wouldlog(dns_lctx, level)) { 355 return; 356 } 357 358 va_start(ap, fmt); 359 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 360 va_end(ap); 361 362 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 363 DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr, 364 msgbuf); 365 } 366 367 static inline void 368 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { 369 if (mgr->stats != NULL) { 370 isc_stats_increment(mgr->stats, counter); 371 } 372 } 373 374 static inline void 375 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { 376 if (mgr->stats != NULL) { 377 isc_stats_decrement(mgr->stats, counter); 378 } 379 } 380 381 static void 382 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) 383 ISC_FORMAT_PRINTF(3, 4); 384 385 static void 386 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { 387 char msgbuf[2048]; 388 va_list ap; 389 390 if (!isc_log_wouldlog(dns_lctx, level)) { 391 return; 392 } 393 394 va_start(ap, fmt); 395 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 396 va_end(ap); 397 398 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 399 DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp, 400 msgbuf); 401 } 402 403 static void 404 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, 405 const char *fmt, ...) ISC_FORMAT_PRINTF(4, 5); 406 407 static void 408 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, 409 const char *fmt, ...) { 410 char msgbuf[2048]; 411 char peerbuf[256]; 412 va_list ap; 413 414 if (!isc_log_wouldlog(dns_lctx, level)) { 415 return; 416 } 417 418 va_start(ap, fmt); 419 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 420 va_end(ap); 421 422 if (VALID_RESPONSE(resp)) { 423 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf)); 424 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 425 DNS_LOGMODULE_DISPATCH, level, 426 "dispatch %p response %p %s: %s", disp, resp, 427 peerbuf, msgbuf); 428 } else { 429 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 430 DNS_LOGMODULE_DISPATCH, level, 431 "dispatch %p req/resp %p: %s", disp, resp, 432 msgbuf); 433 } 434 } 435 436 /* 437 * Return a hash of the destination and message id. 438 */ 439 static uint32_t 440 dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, 441 in_port_t port) { 442 uint32_t ret; 443 444 ret = isc_sockaddr_hash(dest, true); 445 ret ^= ((uint32_t)id << 16) | port; 446 ret %= qid->qid_nbuckets; 447 448 INSIST(ret < qid->qid_nbuckets); 449 450 return (ret); 451 } 452 453 /* 454 * Find the first entry in 'qid'. Returns NULL if there are no entries. 455 */ 456 static dns_dispentry_t * 457 linear_first(dns_qid_t *qid) { 458 dns_dispentry_t *ret; 459 unsigned int bucket; 460 461 bucket = 0; 462 463 while (bucket < qid->qid_nbuckets) { 464 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 465 if (ret != NULL) { 466 return (ret); 467 } 468 bucket++; 469 } 470 471 return (NULL); 472 } 473 474 /* 475 * Find the next entry after 'resp' in 'qid'. Return NULL if there are 476 * no more entries. 477 */ 478 static dns_dispentry_t * 479 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) { 480 dns_dispentry_t *ret; 481 unsigned int bucket; 482 483 ret = ISC_LIST_NEXT(resp, link); 484 if (ret != NULL) { 485 return (ret); 486 } 487 488 bucket = resp->bucket; 489 bucket++; 490 while (bucket < qid->qid_nbuckets) { 491 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 492 if (ret != NULL) { 493 return (ret); 494 } 495 bucket++; 496 } 497 498 return (NULL); 499 } 500 501 /* 502 * The dispatch must be locked. 503 */ 504 static bool 505 destroy_disp_ok(dns_dispatch_t *disp) { 506 if (disp->refcount != 0) { 507 return (false); 508 } 509 510 if (disp->recv_pending != 0) { 511 return (false); 512 } 513 514 if (!ISC_LIST_EMPTY(disp->activesockets)) { 515 return (false); 516 } 517 518 if (disp->shutting_down == 0) { 519 return (false); 520 } 521 522 return (true); 523 } 524 525 /* 526 * Called when refcount reaches 0 (and safe to destroy). 527 * 528 * The dispatcher must be locked. 529 * The manager must not be locked. 530 */ 531 static void 532 destroy_disp(isc_task_t *task, isc_event_t *event) { 533 dns_dispatch_t *disp; 534 dns_dispatchmgr_t *mgr; 535 bool killmgr; 536 dispsocket_t *dispsocket; 537 int i; 538 539 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); 540 541 UNUSED(task); 542 543 disp = event->ev_arg; 544 mgr = disp->mgr; 545 546 LOCK(&mgr->lock); 547 ISC_LIST_UNLINK(mgr->list, disp, link); 548 549 dispatch_log(disp, LVL(90), 550 "shutting down; detaching from sock %p, task %p", 551 disp->socket, disp->task[0]); /* XXXX */ 552 553 if (disp->sepool != NULL) { 554 isc_mempool_destroy(&disp->sepool); 555 isc_mutex_destroy(&disp->sepool_lock); 556 } 557 558 if (disp->socket != NULL) { 559 isc_socket_detach(&disp->socket); 560 } 561 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { 562 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); 563 destroy_dispsocket(disp, &dispsocket); 564 } 565 for (i = 0; i < disp->ntasks; i++) { 566 isc_task_detach(&disp->task[i]); 567 } 568 isc_event_free(&event); 569 570 dispatch_free(&disp); 571 572 killmgr = destroy_mgr_ok(mgr); 573 UNLOCK(&mgr->lock); 574 if (killmgr) { 575 destroy_mgr(&mgr); 576 } 577 } 578 579 /*% 580 * Manipulate port table per dispatch: find an entry for a given port number, 581 * create a new entry, and decrement a given entry with possible clean-up. 582 */ 583 static dispportentry_t * 584 port_search(dns_dispatch_t *disp, in_port_t port) { 585 dispportentry_t *portentry; 586 587 REQUIRE(disp->port_table != NULL); 588 589 portentry = ISC_LIST_HEAD( 590 disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE]); 591 while (portentry != NULL) { 592 if (portentry->port == port) { 593 return (portentry); 594 } 595 portentry = ISC_LIST_NEXT(portentry, link); 596 } 597 598 return (NULL); 599 } 600 601 static dispportentry_t * 602 new_portentry(dns_dispatch_t *disp, in_port_t port) { 603 dispportentry_t *portentry; 604 dns_qid_t *qid; 605 606 REQUIRE(disp->port_table != NULL); 607 608 portentry = isc_mempool_get(disp->portpool); 609 if (portentry == NULL) { 610 return (portentry); 611 } 612 613 portentry->port = port; 614 isc_refcount_init(&portentry->refs, 1); 615 ISC_LINK_INIT(portentry, link); 616 qid = DNS_QID(disp); 617 LOCK(&qid->lock); 618 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], 619 portentry, link); 620 UNLOCK(&qid->lock); 621 622 return (portentry); 623 } 624 625 /*% 626 * The caller must not hold the qid->lock. 627 */ 628 static void 629 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { 630 dns_qid_t *qid; 631 dispportentry_t *portentry = *portentryp; 632 *portentryp = NULL; 633 634 REQUIRE(disp->port_table != NULL); 635 REQUIRE(portentry != NULL); 636 637 if (isc_refcount_decrement(&portentry->refs) == 1) { 638 qid = DNS_QID(disp); 639 LOCK(&qid->lock); 640 ISC_LIST_UNLINK(disp->port_table[portentry->port % 641 DNS_DISPATCH_PORTTABLESIZE], 642 portentry, link); 643 isc_mempool_put(disp->portpool, portentry); 644 UNLOCK(&qid->lock); 645 } 646 } 647 648 /*% 649 * Find a dispsocket for socket address 'dest', and port number 'port'. 650 * Return NULL if no such entry exists. Requires qid->lock to be held. 651 */ 652 static dispsocket_t * 653 socket_search(dns_qid_t *qid, const isc_sockaddr_t *dest, in_port_t port, 654 unsigned int bucket) { 655 dispsocket_t *dispsock; 656 657 REQUIRE(VALID_QID(qid)); 658 REQUIRE(bucket < qid->qid_nbuckets); 659 660 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); 661 662 while (dispsock != NULL) { 663 if (dispsock->portentry != NULL && 664 dispsock->portentry->port == port && 665 isc_sockaddr_equal(dest, &dispsock->host)) 666 { 667 return (dispsock); 668 } 669 dispsock = ISC_LIST_NEXT(dispsock, blink); 670 } 671 672 return (NULL); 673 } 674 675 /*% 676 * Make a new socket for a single dispatch with a random port number. 677 * The caller must hold the disp->lock 678 */ 679 static isc_result_t 680 get_dispsocket(dns_dispatch_t *disp, const isc_sockaddr_t *dest, 681 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp, 682 in_port_t *portp) { 683 int i; 684 dns_dispatchmgr_t *mgr = disp->mgr; 685 isc_socket_t *sock = NULL; 686 isc_result_t result = ISC_R_FAILURE; 687 in_port_t port; 688 isc_sockaddr_t localaddr; 689 unsigned int bucket = 0; 690 dispsocket_t *dispsock; 691 unsigned int nports; 692 in_port_t *ports; 693 isc_socket_options_t bindoptions; 694 dispportentry_t *portentry = NULL; 695 dns_qid_t *qid; 696 697 if (isc_sockaddr_pf(&disp->local) == AF_INET) { 698 nports = disp->mgr->nv4ports; 699 ports = disp->mgr->v4ports; 700 } else { 701 nports = disp->mgr->nv6ports; 702 ports = disp->mgr->v6ports; 703 } 704 if (nports == 0) { 705 return (ISC_R_ADDRNOTAVAIL); 706 } 707 708 dispsock = ISC_LIST_HEAD(disp->inactivesockets); 709 if (dispsock != NULL) { 710 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); 711 sock = dispsock->socket; 712 dispsock->socket = NULL; 713 } else { 714 dispsock = isc_mempool_get(mgr->spool); 715 if (dispsock == NULL) { 716 return (ISC_R_NOMEMORY); 717 } 718 719 disp->nsockets++; 720 dispsock->socket = NULL; 721 dispsock->disp = disp; 722 dispsock->resp = NULL; 723 dispsock->portentry = NULL; 724 dispsock->task = NULL; 725 isc_task_attach(disp->task[isc_random_uniform(disp->ntasks)], 726 &dispsock->task); 727 ISC_LINK_INIT(dispsock, link); 728 ISC_LINK_INIT(dispsock, blink); 729 dispsock->magic = DISPSOCK_MAGIC; 730 } 731 732 /* 733 * Pick up a random UDP port and open a new socket with it. Avoid 734 * choosing ports that share the same destination because it will be 735 * very likely to fail in bind(2) or connect(2). 736 */ 737 localaddr = disp->local; 738 qid = DNS_QID(disp); 739 740 for (i = 0; i < 64; i++) { 741 port = ports[isc_random_uniform(nports)]; 742 isc_sockaddr_setport(&localaddr, port); 743 744 LOCK(&qid->lock); 745 bucket = dns_hash(qid, dest, 0, port); 746 if (socket_search(qid, dest, port, bucket) != NULL) { 747 UNLOCK(&qid->lock); 748 continue; 749 } 750 UNLOCK(&qid->lock); 751 bindoptions = 0; 752 portentry = port_search(disp, port); 753 754 if (portentry != NULL) { 755 bindoptions |= ISC_SOCKET_REUSEADDRESS; 756 } 757 result = open_socket(sockmgr, &localaddr, bindoptions, &sock, 758 NULL, false); 759 if (result == ISC_R_SUCCESS) { 760 if (portentry == NULL) { 761 portentry = new_portentry(disp, port); 762 if (portentry == NULL) { 763 result = ISC_R_NOMEMORY; 764 break; 765 } 766 } else { 767 isc_refcount_increment(&portentry->refs); 768 } 769 break; 770 } else if (result == ISC_R_NOPERM) { 771 char buf[ISC_SOCKADDR_FORMATSIZE]; 772 isc_sockaddr_format(&localaddr, buf, sizeof(buf)); 773 dispatch_log(disp, ISC_LOG_WARNING, 774 "open_socket(%s) -> %s: continuing", buf, 775 isc_result_totext(result)); 776 } else if (result != ISC_R_ADDRINUSE) { 777 break; 778 } 779 } 780 781 if (result == ISC_R_SUCCESS) { 782 dispsock->socket = sock; 783 dispsock->host = *dest; 784 dispsock->portentry = portentry; 785 dispsock->bucket = bucket; 786 LOCK(&qid->lock); 787 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); 788 UNLOCK(&qid->lock); 789 *dispsockp = dispsock; 790 *portp = port; 791 } else { 792 /* 793 * We could keep it in the inactive list, but since this should 794 * be an exceptional case and might be resource shortage, we'd 795 * rather destroy it. 796 */ 797 if (sock != NULL) { 798 isc_socket_detach(&sock); 799 } 800 destroy_dispsocket(disp, &dispsock); 801 } 802 803 return (result); 804 } 805 806 /*% 807 * Destroy a dedicated dispatch socket. 808 */ 809 static void 810 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { 811 dispsocket_t *dispsock; 812 dns_qid_t *qid; 813 814 /* 815 * The dispatch must be locked. 816 */ 817 818 REQUIRE(dispsockp != NULL && *dispsockp != NULL); 819 dispsock = *dispsockp; 820 *dispsockp = NULL; 821 REQUIRE(!ISC_LINK_LINKED(dispsock, link)); 822 823 disp->nsockets--; 824 dispsock->magic = 0; 825 if (dispsock->portentry != NULL) { 826 deref_portentry(disp, &dispsock->portentry); 827 } 828 if (dispsock->socket != NULL) { 829 isc_socket_detach(&dispsock->socket); 830 } 831 if (ISC_LINK_LINKED(dispsock, blink)) { 832 qid = DNS_QID(disp); 833 LOCK(&qid->lock); 834 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 835 blink); 836 UNLOCK(&qid->lock); 837 } 838 if (dispsock->task != NULL) { 839 isc_task_detach(&dispsock->task); 840 } 841 isc_mempool_put(disp->mgr->spool, dispsock); 842 } 843 844 /*% 845 * Deactivate a dedicated dispatch socket. Move it to the inactive list for 846 * future reuse unless the total number of sockets are exceeding the maximum. 847 */ 848 static void 849 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { 850 isc_result_t result; 851 dns_qid_t *qid; 852 853 /* 854 * The dispatch must be locked. 855 */ 856 ISC_LIST_UNLINK(disp->activesockets, dispsock, link); 857 if (dispsock->resp != NULL) { 858 INSIST(dispsock->resp->dispsocket == dispsock); 859 dispsock->resp->dispsocket = NULL; 860 } 861 862 INSIST(dispsock->portentry != NULL); 863 deref_portentry(disp, &dispsock->portentry); 864 865 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) { 866 destroy_dispsocket(disp, &dispsock); 867 } else { 868 result = isc_socket_close(dispsock->socket); 869 870 qid = DNS_QID(disp); 871 LOCK(&qid->lock); 872 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 873 blink); 874 UNLOCK(&qid->lock); 875 876 if (result == ISC_R_SUCCESS) { 877 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); 878 } else { 879 /* 880 * If the underlying system does not allow this 881 * optimization, destroy this temporary structure (and 882 * create a new one for a new transaction). 883 */ 884 INSIST(result == ISC_R_NOTIMPLEMENTED); 885 destroy_dispsocket(disp, &dispsock); 886 } 887 } 888 } 889 890 /* 891 * Find an entry for query ID 'id', socket address 'dest', and port number 892 * 'port'. 893 * Return NULL if no such entry exists. 894 */ 895 static dns_dispentry_t * 896 entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, 897 in_port_t port, unsigned int bucket) { 898 dns_dispentry_t *res; 899 900 REQUIRE(VALID_QID(qid)); 901 REQUIRE(bucket < qid->qid_nbuckets); 902 903 res = ISC_LIST_HEAD(qid->qid_table[bucket]); 904 905 while (res != NULL) { 906 if (res->id == id && isc_sockaddr_equal(dest, &res->host) && 907 res->port == port) { 908 return (res); 909 } 910 res = ISC_LIST_NEXT(res, link); 911 } 912 913 return (NULL); 914 } 915 916 static void 917 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) { 918 isc_mempool_t *bpool; 919 INSIST(buf != NULL && len != 0); 920 921 switch (disp->socktype) { 922 case isc_sockettype_tcp: 923 INSIST(disp->tcpbuffers > 0); 924 disp->tcpbuffers--; 925 isc_mem_put(disp->mgr->mctx, buf, len); 926 break; 927 case isc_sockettype_udp: 928 LOCK(&disp->mgr->buffer_lock); 929 INSIST(disp->mgr->buffers > 0); 930 INSIST(len == disp->mgr->buffersize); 931 disp->mgr->buffers--; 932 bpool = disp->mgr->bpool; 933 UNLOCK(&disp->mgr->buffer_lock); 934 isc_mempool_put(bpool, buf); 935 break; 936 default: 937 INSIST(0); 938 ISC_UNREACHABLE(); 939 } 940 } 941 942 static void * 943 allocate_udp_buffer(dns_dispatch_t *disp) { 944 isc_mempool_t *bpool; 945 void *temp; 946 947 LOCK(&disp->mgr->buffer_lock); 948 if (disp->mgr->buffers >= disp->mgr->maxbuffers) { 949 UNLOCK(&disp->mgr->buffer_lock); 950 return (NULL); 951 } 952 bpool = disp->mgr->bpool; 953 disp->mgr->buffers++; 954 UNLOCK(&disp->mgr->buffer_lock); 955 956 temp = isc_mempool_get(bpool); 957 958 if (temp == NULL) { 959 LOCK(&disp->mgr->buffer_lock); 960 disp->mgr->buffers--; 961 UNLOCK(&disp->mgr->buffer_lock); 962 } 963 964 return (temp); 965 } 966 967 static inline void 968 free_sevent(isc_event_t *ev) { 969 isc_mempool_t *pool = ev->ev_destroy_arg; 970 isc_socketevent_t *sev = (isc_socketevent_t *)ev; 971 isc_mempool_put(pool, sev); 972 } 973 974 static inline isc_socketevent_t * 975 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock, isc_eventtype_t type, 976 isc_taskaction_t action, const void *arg) { 977 isc_socketevent_t *ev; 978 void *deconst_arg; 979 980 ev = isc_mempool_get(disp->sepool); 981 if (ev == NULL) { 982 return (NULL); 983 } 984 DE_CONST(arg, deconst_arg); 985 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, action, deconst_arg, 986 sock, free_sevent, disp->sepool); 987 ev->result = ISC_R_UNSET; 988 ISC_LINK_INIT(ev, ev_link); 989 ev->region.base = NULL; 990 ev->n = 0; 991 ev->offset = 0; 992 ev->attributes = 0; 993 994 return (ev); 995 } 996 997 static inline void 998 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) { 999 if (disp->failsafe_ev == ev) { 1000 INSIST(disp->shutdown_out == 1); 1001 disp->shutdown_out = 0; 1002 1003 return; 1004 } 1005 1006 isc_mempool_put(disp->mgr->depool, ev); 1007 } 1008 1009 static inline dns_dispatchevent_t * 1010 allocate_devent(dns_dispatch_t *disp) { 1011 dns_dispatchevent_t *ev; 1012 1013 ev = isc_mempool_get(disp->mgr->depool); 1014 if (ev == NULL) { 1015 return (NULL); 1016 } 1017 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, NULL, NULL, NULL, NULL, 1018 NULL); 1019 1020 return (ev); 1021 } 1022 1023 static void 1024 udp_exrecv(isc_task_t *task, isc_event_t *ev) { 1025 dispsocket_t *dispsock = ev->ev_arg; 1026 1027 UNUSED(task); 1028 1029 REQUIRE(VALID_DISPSOCK(dispsock)); 1030 udp_recv(ev, dispsock->disp, dispsock); 1031 } 1032 1033 static void 1034 udp_shrecv(isc_task_t *task, isc_event_t *ev) { 1035 dns_dispatch_t *disp = ev->ev_arg; 1036 1037 UNUSED(task); 1038 1039 REQUIRE(VALID_DISPATCH(disp)); 1040 udp_recv(ev, disp, NULL); 1041 } 1042 1043 /* 1044 * General flow: 1045 * 1046 * If I/O result == CANCELED or error, free the buffer. 1047 * 1048 * If query, free the buffer, restart. 1049 * 1050 * If response: 1051 * Allocate event, fill in details. 1052 * If cannot allocate, free buffer, restart. 1053 * find target. If not found, free buffer, restart. 1054 * if event queue is not empty, queue. else, send. 1055 * restart. 1056 */ 1057 static void 1058 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { 1059 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; 1060 dns_messageid_t id; 1061 isc_result_t dres; 1062 isc_buffer_t source; 1063 unsigned int flags; 1064 dns_dispentry_t *resp = NULL; 1065 dns_dispatchevent_t *rev; 1066 unsigned int bucket; 1067 bool killit; 1068 bool queue_response; 1069 dns_dispatchmgr_t *mgr; 1070 dns_qid_t *qid; 1071 isc_netaddr_t netaddr; 1072 int match; 1073 int result; 1074 bool qidlocked = false; 1075 1076 LOCK(&disp->lock); 1077 1078 mgr = disp->mgr; 1079 qid = mgr->qid; 1080 1081 LOCK(&disp->mgr->buffer_lock); 1082 dispatch_log(disp, LVL(90), 1083 "got packet: requests %d, buffers %d, recvs %d", 1084 disp->requests, disp->mgr->buffers, disp->recv_pending); 1085 UNLOCK(&disp->mgr->buffer_lock); 1086 1087 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { 1088 /* 1089 * Unless the receive event was imported from a listening 1090 * interface, in which case the event type is 1091 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending. 1092 */ 1093 INSIST(disp->recv_pending != 0); 1094 disp->recv_pending = 0; 1095 } 1096 1097 if (dispsock != NULL && 1098 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) 1099 { 1100 /* 1101 * dispsock->resp can be NULL if this transaction was canceled 1102 * just after receiving a response. Since this socket is 1103 * exclusively used and there should be at most one receive 1104 * event the canceled event should have been no effect. So 1105 * we can (and should) deactivate the socket right now. 1106 */ 1107 deactivate_dispsocket(disp, dispsock); 1108 dispsock = NULL; 1109 } 1110 1111 if (disp->shutting_down) { 1112 /* 1113 * This dispatcher is shutting down. 1114 */ 1115 free_buffer(disp, ev->region.base, ev->region.length); 1116 1117 isc_event_free(&ev_in); 1118 ev = NULL; 1119 1120 killit = destroy_disp_ok(disp); 1121 UNLOCK(&disp->lock); 1122 if (killit) { 1123 isc_task_send(disp->task[0], &disp->ctlevent); 1124 } 1125 1126 return; 1127 } 1128 1129 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 1130 if (dispsock != NULL) { 1131 resp = dispsock->resp; 1132 id = resp->id; 1133 if (ev->result != ISC_R_SUCCESS) { 1134 /* 1135 * This is most likely a network error on a 1136 * connected socket. It makes no sense to 1137 * check the address or parse the packet, but it 1138 * will help to return the error to the caller. 1139 */ 1140 goto sendresponse; 1141 } 1142 } else { 1143 free_buffer(disp, ev->region.base, ev->region.length); 1144 1145 isc_event_free(&ev_in); 1146 UNLOCK(&disp->lock); 1147 return; 1148 } 1149 } else if (ev->result != ISC_R_SUCCESS) { 1150 free_buffer(disp, ev->region.base, ev->region.length); 1151 1152 if (ev->result != ISC_R_CANCELED) { 1153 dispatch_log(disp, ISC_LOG_ERROR, 1154 "odd socket result in udp_recv(): %s", 1155 isc_result_totext(ev->result)); 1156 } 1157 1158 isc_event_free(&ev_in); 1159 UNLOCK(&disp->lock); 1160 return; 1161 } 1162 1163 /* 1164 * If this is from a blackholed address, drop it. 1165 */ 1166 isc_netaddr_fromsockaddr(&netaddr, &ev->address); 1167 if (disp->mgr->blackhole != NULL && 1168 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match, 1169 NULL) == ISC_R_SUCCESS && 1170 match > 0) 1171 { 1172 if (isc_log_wouldlog(dns_lctx, LVL(10))) { 1173 char netaddrstr[ISC_NETADDR_FORMATSIZE]; 1174 isc_netaddr_format(&netaddr, netaddrstr, 1175 sizeof(netaddrstr)); 1176 dispatch_log(disp, LVL(10), "blackholed packet from %s", 1177 netaddrstr); 1178 } 1179 free_buffer(disp, ev->region.base, ev->region.length); 1180 goto restart; 1181 } 1182 1183 /* 1184 * Peek into the buffer to see what we can see. 1185 */ 1186 isc_buffer_init(&source, ev->region.base, ev->region.length); 1187 isc_buffer_add(&source, ev->n); 1188 dres = dns_message_peekheader(&source, &id, &flags); 1189 if (dres != ISC_R_SUCCESS) { 1190 free_buffer(disp, ev->region.base, ev->region.length); 1191 dispatch_log(disp, LVL(10), "got garbage packet"); 1192 goto restart; 1193 } 1194 1195 dispatch_log(disp, LVL(92), 1196 "got valid DNS message header, /QR %c, id %u", 1197 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); 1198 1199 /* 1200 * Look at flags. If query, drop it. If response, 1201 * look to see where it goes. 1202 */ 1203 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1204 /* query */ 1205 free_buffer(disp, ev->region.base, ev->region.length); 1206 goto restart; 1207 } 1208 1209 /* 1210 * Search for the corresponding response. If we are using an exclusive 1211 * socket, we've already identified it and we can skip the search; but 1212 * the ID and the address must match the expected ones. 1213 */ 1214 if (resp == NULL) { 1215 bucket = dns_hash(qid, &ev->address, id, disp->localport); 1216 LOCK(&qid->lock); 1217 qidlocked = true; 1218 resp = entry_search(qid, &ev->address, id, disp->localport, 1219 bucket); 1220 dispatch_log(disp, LVL(90), 1221 "search for response in bucket %d: %s", bucket, 1222 (resp == NULL ? "not found" : "found")); 1223 1224 if (resp == NULL) { 1225 inc_stats(mgr, dns_resstatscounter_mismatch); 1226 free_buffer(disp, ev->region.base, ev->region.length); 1227 goto unlock; 1228 } 1229 } else if (resp->id != id || 1230 !isc_sockaddr_equal(&ev->address, &resp->host)) { 1231 dispatch_log(disp, LVL(90), 1232 "response to an exclusive socket doesn't match"); 1233 inc_stats(mgr, dns_resstatscounter_mismatch); 1234 free_buffer(disp, ev->region.base, ev->region.length); 1235 goto unlock; 1236 } 1237 1238 /* 1239 * Now that we have the original dispatch the query was sent 1240 * from check that the address and port the response was 1241 * sent to make sense. 1242 */ 1243 if (disp != resp->disp) { 1244 isc_sockaddr_t a1; 1245 isc_sockaddr_t a2; 1246 1247 /* 1248 * Check that the socket types and ports match. 1249 */ 1250 if (disp->socktype != resp->disp->socktype || 1251 isc_sockaddr_getport(&disp->local) != 1252 isc_sockaddr_getport(&resp->disp->local)) 1253 { 1254 free_buffer(disp, ev->region.base, ev->region.length); 1255 goto unlock; 1256 } 1257 1258 /* 1259 * If each dispatch is bound to a different address 1260 * then fail. 1261 * 1262 * Note under Linux a packet can be sent out via IPv4 socket 1263 * and the response be received via a IPv6 socket. 1264 * 1265 * Requests sent out via IPv6 should always come back in 1266 * via IPv6. 1267 */ 1268 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 && 1269 isc_sockaddr_pf(&disp->local) != PF_INET6) 1270 { 1271 free_buffer(disp, ev->region.base, ev->region.length); 1272 goto unlock; 1273 } 1274 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local)); 1275 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local)); 1276 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) && 1277 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) && 1278 !isc_sockaddr_eqaddr(&a2, &disp->local)) 1279 { 1280 free_buffer(disp, ev->region.base, ev->region.length); 1281 goto unlock; 1282 } 1283 } 1284 1285 sendresponse: 1286 queue_response = resp->item_out; 1287 rev = allocate_devent(resp->disp); 1288 if (rev == NULL) { 1289 free_buffer(disp, ev->region.base, ev->region.length); 1290 goto unlock; 1291 } 1292 1293 /* 1294 * At this point, rev contains the event we want to fill in, and 1295 * resp contains the information on the place to send it to. 1296 * Send the event off. 1297 */ 1298 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); 1299 isc_buffer_add(&rev->buffer, ev->n); 1300 rev->result = ev->result; 1301 rev->id = id; 1302 rev->addr = ev->address; 1303 rev->pktinfo = ev->pktinfo; 1304 rev->attributes = ev->attributes; 1305 if (queue_response) { 1306 ISC_LIST_APPEND(resp->items, rev, ev_link); 1307 } else { 1308 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, 1309 resp->action, resp->arg, resp, NULL, NULL); 1310 request_log(disp, resp, LVL(90), 1311 "[a] Sent event %p buffer %p len %d to task %p", 1312 rev, rev->buffer.base, rev->buffer.length, 1313 resp->task); 1314 resp->item_out = true; 1315 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1316 } 1317 unlock: 1318 if (qidlocked) { 1319 UNLOCK(&qid->lock); 1320 } 1321 1322 /* 1323 * Restart recv() to get the next packet. 1324 */ 1325 restart: 1326 result = startrecv(disp, dispsock); 1327 if (result != ISC_R_SUCCESS && dispsock != NULL) { 1328 /* 1329 * XXX: wired. There seems to be no recovery process other than 1330 * deactivate this socket anyway (since we cannot start 1331 * receiving, we won't be able to receive a cancel event 1332 * from the user). 1333 */ 1334 deactivate_dispsocket(disp, dispsock); 1335 } 1336 isc_event_free(&ev_in); 1337 UNLOCK(&disp->lock); 1338 } 1339 1340 /* 1341 * General flow: 1342 * 1343 * If I/O result == CANCELED, EOF, or error, notify everyone as the 1344 * various queues drain. 1345 * 1346 * If query, restart. 1347 * 1348 * If response: 1349 * Allocate event, fill in details. 1350 * If cannot allocate, restart. 1351 * find target. If not found, restart. 1352 * if event queue is not empty, queue. else, send. 1353 * restart. 1354 */ 1355 static void 1356 tcp_recv(isc_task_t *task, isc_event_t *ev_in) { 1357 dns_dispatch_t *disp = ev_in->ev_arg; 1358 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg; 1359 dns_messageid_t id; 1360 isc_result_t dres; 1361 unsigned int flags; 1362 dns_dispentry_t *resp; 1363 dns_dispatchevent_t *rev; 1364 unsigned int bucket; 1365 bool killit; 1366 bool queue_response; 1367 dns_qid_t *qid; 1368 int level; 1369 char buf[ISC_SOCKADDR_FORMATSIZE]; 1370 1371 UNUSED(task); 1372 1373 REQUIRE(VALID_DISPATCH(disp)); 1374 1375 qid = disp->qid; 1376 1377 LOCK(&disp->lock); 1378 1379 dispatch_log(disp, LVL(90), 1380 "got TCP packet: requests %d, buffers %d, recvs %d", 1381 disp->requests, disp->tcpbuffers, disp->recv_pending); 1382 1383 INSIST(disp->recv_pending != 0); 1384 disp->recv_pending = 0; 1385 1386 if (disp->refcount == 0) { 1387 /* 1388 * This dispatcher is shutting down. Force cancellation. 1389 */ 1390 tcpmsg->result = ISC_R_CANCELED; 1391 } 1392 1393 if (tcpmsg->result != ISC_R_SUCCESS) { 1394 switch (tcpmsg->result) { 1395 case ISC_R_CANCELED: 1396 break; 1397 1398 case ISC_R_EOF: 1399 dispatch_log(disp, LVL(90), "shutting down on EOF"); 1400 do_cancel(disp); 1401 break; 1402 1403 case ISC_R_CONNECTIONRESET: 1404 level = ISC_LOG_INFO; 1405 goto logit; 1406 1407 default: 1408 level = ISC_LOG_ERROR; 1409 logit: 1410 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf)); 1411 dispatch_log(disp, level, 1412 "shutting down due to TCP " 1413 "receive error: %s: %s", 1414 buf, isc_result_totext(tcpmsg->result)); 1415 do_cancel(disp); 1416 break; 1417 } 1418 1419 /* 1420 * The event is statically allocated in the tcpmsg 1421 * structure, and destroy_disp() frees the tcpmsg, so we must 1422 * free the event *before* calling destroy_disp(). 1423 */ 1424 isc_event_free(&ev_in); 1425 1426 disp->shutting_down = 1; 1427 disp->shutdown_why = tcpmsg->result; 1428 1429 /* 1430 * If the recv() was canceled pass the word on. 1431 */ 1432 killit = destroy_disp_ok(disp); 1433 UNLOCK(&disp->lock); 1434 if (killit) { 1435 isc_task_send(disp->task[0], &disp->ctlevent); 1436 } 1437 return; 1438 } 1439 1440 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p", 1441 tcpmsg->result, tcpmsg->buffer.length, 1442 tcpmsg->buffer.base); 1443 1444 /* 1445 * Peek into the buffer to see what we can see. 1446 */ 1447 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags); 1448 if (dres != ISC_R_SUCCESS) { 1449 dispatch_log(disp, LVL(10), "got garbage packet"); 1450 goto restart; 1451 } 1452 1453 dispatch_log(disp, LVL(92), 1454 "got valid DNS message header, /QR %c, id %u", 1455 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); 1456 1457 /* 1458 * Allocate an event to send to the query or response client, and 1459 * allocate a new buffer for our use. 1460 */ 1461 1462 /* 1463 * Look at flags. If query, drop it. If response, 1464 * look to see where it goes. 1465 */ 1466 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1467 /* 1468 * Query. 1469 */ 1470 goto restart; 1471 } 1472 1473 /* 1474 * Response. 1475 */ 1476 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); 1477 LOCK(&qid->lock); 1478 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); 1479 dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", 1480 bucket, (resp == NULL ? "not found" : "found")); 1481 1482 if (resp == NULL) { 1483 goto unlock; 1484 } 1485 queue_response = resp->item_out; 1486 rev = allocate_devent(disp); 1487 if (rev == NULL) { 1488 goto unlock; 1489 } 1490 1491 /* 1492 * At this point, rev contains the event we want to fill in, and 1493 * resp contains the information on the place to send it to. 1494 * Send the event off. 1495 */ 1496 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer); 1497 disp->tcpbuffers++; 1498 rev->result = ISC_R_SUCCESS; 1499 rev->id = id; 1500 rev->addr = tcpmsg->address; 1501 if (queue_response) { 1502 ISC_LIST_APPEND(resp->items, rev, ev_link); 1503 } else { 1504 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, 1505 resp->action, resp->arg, resp, NULL, NULL); 1506 request_log(disp, resp, LVL(90), 1507 "[b] Sent event %p buffer %p len %d to task %p", 1508 rev, rev->buffer.base, rev->buffer.length, 1509 resp->task); 1510 resp->item_out = true; 1511 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1512 } 1513 unlock: 1514 UNLOCK(&qid->lock); 1515 1516 /* 1517 * Restart recv() to get the next packet. 1518 */ 1519 restart: 1520 (void)startrecv(disp, NULL); 1521 1522 isc_event_free(&ev_in); 1523 UNLOCK(&disp->lock); 1524 } 1525 1526 /* 1527 * disp must be locked. 1528 */ 1529 static isc_result_t 1530 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { 1531 isc_result_t res; 1532 isc_region_t region; 1533 isc_socket_t *sock; 1534 1535 if (disp->shutting_down == 1) { 1536 return (ISC_R_SUCCESS); 1537 } 1538 1539 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { 1540 return (ISC_R_SUCCESS); 1541 } 1542 1543 if (disp->recv_pending != 0 && dispsock == NULL) { 1544 return (ISC_R_SUCCESS); 1545 } 1546 1547 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 1548 dispsock == NULL) { 1549 return (ISC_R_SUCCESS); 1550 } 1551 1552 if (dispsock != NULL) { 1553 sock = dispsock->socket; 1554 } else { 1555 sock = disp->socket; 1556 } 1557 INSIST(sock != NULL); 1558 1559 switch (disp->socktype) { 1560 /* 1561 * UDP reads are always maximal. 1562 */ 1563 case isc_sockettype_udp: 1564 region.length = disp->mgr->buffersize; 1565 region.base = allocate_udp_buffer(disp); 1566 if (region.base == NULL) { 1567 return (ISC_R_NOMEMORY); 1568 } 1569 if (dispsock != NULL) { 1570 isc_task_t *dt = dispsock->task; 1571 isc_socketevent_t *sev = allocate_sevent( 1572 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_exrecv, 1573 dispsock); 1574 if (sev == NULL) { 1575 free_buffer(disp, region.base, region.length); 1576 return (ISC_R_NOMEMORY); 1577 } 1578 1579 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); 1580 if (res != ISC_R_SUCCESS) { 1581 free_buffer(disp, region.base, region.length); 1582 return (res); 1583 } 1584 } else { 1585 isc_task_t *dt = disp->task[0]; 1586 isc_socketevent_t *sev = allocate_sevent( 1587 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_shrecv, 1588 disp); 1589 if (sev == NULL) { 1590 free_buffer(disp, region.base, region.length); 1591 return (ISC_R_NOMEMORY); 1592 } 1593 1594 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); 1595 if (res != ISC_R_SUCCESS) { 1596 free_buffer(disp, region.base, region.length); 1597 disp->shutdown_why = res; 1598 disp->shutting_down = 1; 1599 do_cancel(disp); 1600 return (ISC_R_SUCCESS); /* recover by cancel */ 1601 } 1602 INSIST(disp->recv_pending == 0); 1603 disp->recv_pending = 1; 1604 } 1605 break; 1606 1607 case isc_sockettype_tcp: 1608 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], 1609 tcp_recv, disp); 1610 if (res != ISC_R_SUCCESS) { 1611 disp->shutdown_why = res; 1612 disp->shutting_down = 1; 1613 do_cancel(disp); 1614 return (ISC_R_SUCCESS); /* recover by cancel */ 1615 } 1616 INSIST(disp->recv_pending == 0); 1617 disp->recv_pending = 1; 1618 break; 1619 default: 1620 INSIST(0); 1621 ISC_UNREACHABLE(); 1622 } 1623 1624 return (ISC_R_SUCCESS); 1625 } 1626 1627 /* 1628 * Mgr must be locked when calling this function. 1629 */ 1630 static bool 1631 destroy_mgr_ok(dns_dispatchmgr_t *mgr) { 1632 mgr_log(mgr, LVL(90), 1633 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, " 1634 "depool=%d, rpool=%d, dpool=%d", 1635 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list), 1636 isc_mempool_getallocated(mgr->depool), 1637 isc_mempool_getallocated(mgr->rpool), 1638 isc_mempool_getallocated(mgr->dpool)); 1639 if (!MGR_IS_SHUTTINGDOWN(mgr)) { 1640 return (false); 1641 } 1642 if (!ISC_LIST_EMPTY(mgr->list)) { 1643 return (false); 1644 } 1645 if (isc_mempool_getallocated(mgr->depool) != 0) { 1646 return (false); 1647 } 1648 if (isc_mempool_getallocated(mgr->rpool) != 0) { 1649 return (false); 1650 } 1651 if (isc_mempool_getallocated(mgr->dpool) != 0) { 1652 return (false); 1653 } 1654 1655 return (true); 1656 } 1657 1658 /* 1659 * Mgr must be unlocked when calling this function. 1660 */ 1661 static void 1662 destroy_mgr(dns_dispatchmgr_t **mgrp) { 1663 dns_dispatchmgr_t *mgr; 1664 1665 mgr = *mgrp; 1666 *mgrp = NULL; 1667 1668 mgr->magic = 0; 1669 isc_mutex_destroy(&mgr->lock); 1670 mgr->state = 0; 1671 1672 isc_mempool_destroy(&mgr->depool); 1673 isc_mempool_destroy(&mgr->rpool); 1674 isc_mempool_destroy(&mgr->dpool); 1675 if (mgr->bpool != NULL) { 1676 isc_mempool_destroy(&mgr->bpool); 1677 } 1678 if (mgr->spool != NULL) { 1679 isc_mempool_destroy(&mgr->spool); 1680 } 1681 1682 isc_mutex_destroy(&mgr->spool_lock); 1683 isc_mutex_destroy(&mgr->bpool_lock); 1684 isc_mutex_destroy(&mgr->dpool_lock); 1685 isc_mutex_destroy(&mgr->rpool_lock); 1686 isc_mutex_destroy(&mgr->depool_lock); 1687 1688 if (mgr->qid != NULL) { 1689 qid_destroy(mgr->mctx, &mgr->qid); 1690 } 1691 1692 isc_mutex_destroy(&mgr->buffer_lock); 1693 1694 if (mgr->blackhole != NULL) { 1695 dns_acl_detach(&mgr->blackhole); 1696 } 1697 1698 if (mgr->stats != NULL) { 1699 isc_stats_detach(&mgr->stats); 1700 } 1701 1702 if (mgr->v4ports != NULL) { 1703 isc_mem_put(mgr->mctx, mgr->v4ports, 1704 mgr->nv4ports * sizeof(in_port_t)); 1705 } 1706 if (mgr->v6ports != NULL) { 1707 isc_mem_put(mgr->mctx, mgr->v6ports, 1708 mgr->nv6ports * sizeof(in_port_t)); 1709 } 1710 isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t)); 1711 } 1712 1713 static isc_result_t 1714 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, 1715 unsigned int options, isc_socket_t **sockp, 1716 isc_socket_t *dup_socket, bool duponly) { 1717 isc_socket_t *sock; 1718 isc_result_t result; 1719 1720 sock = *sockp; 1721 if (sock != NULL) { 1722 result = isc_socket_open(sock); 1723 if (result != ISC_R_SUCCESS) { 1724 return (result); 1725 } 1726 } else if (dup_socket != NULL && 1727 (!isc_socket_hasreuseport() || duponly)) { 1728 result = isc_socket_dup(dup_socket, &sock); 1729 if (result != ISC_R_SUCCESS) { 1730 return (result); 1731 } 1732 1733 isc_socket_setname(sock, "dispatcher", NULL); 1734 *sockp = sock; 1735 return (ISC_R_SUCCESS); 1736 } else { 1737 result = isc_socket_create(mgr, isc_sockaddr_pf(local), 1738 isc_sockettype_udp, &sock); 1739 if (result != ISC_R_SUCCESS) { 1740 return (result); 1741 } 1742 } 1743 1744 isc_socket_setname(sock, "dispatcher", NULL); 1745 1746 #ifndef ISC_ALLOW_MAPPED 1747 isc_socket_ipv6only(sock, true); 1748 #endif /* ifndef ISC_ALLOW_MAPPED */ 1749 result = isc_socket_bind(sock, local, options); 1750 if (result != ISC_R_SUCCESS) { 1751 if (*sockp == NULL) { 1752 isc_socket_detach(&sock); 1753 } else { 1754 isc_socket_close(sock); 1755 } 1756 return (result); 1757 } 1758 1759 *sockp = sock; 1760 return (ISC_R_SUCCESS); 1761 } 1762 1763 /*% 1764 * Create a temporary port list to set the initial default set of dispatch 1765 * ports: [1024, 65535]. This is almost meaningless as the application will 1766 * normally set the ports explicitly, but is provided to fill some minor corner 1767 * cases. 1768 */ 1769 static isc_result_t 1770 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { 1771 isc_result_t result; 1772 1773 result = isc_portset_create(mctx, portsetp); 1774 if (result != ISC_R_SUCCESS) { 1775 return (result); 1776 } 1777 isc_portset_addrange(*portsetp, 1024, 65535); 1778 1779 return (ISC_R_SUCCESS); 1780 } 1781 1782 /* 1783 * Publics. 1784 */ 1785 1786 isc_result_t 1787 dns_dispatchmgr_create(isc_mem_t *mctx, dns_dispatchmgr_t **mgrp) { 1788 dns_dispatchmgr_t *mgr; 1789 isc_result_t result; 1790 isc_portset_t *v4portset = NULL; 1791 isc_portset_t *v6portset = NULL; 1792 1793 REQUIRE(mctx != NULL); 1794 REQUIRE(mgrp != NULL && *mgrp == NULL); 1795 1796 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); 1797 1798 mgr->mctx = NULL; 1799 isc_mem_attach(mctx, &mgr->mctx); 1800 1801 mgr->blackhole = NULL; 1802 mgr->stats = NULL; 1803 1804 isc_mutex_init(&mgr->lock); 1805 isc_mutex_init(&mgr->buffer_lock); 1806 isc_mutex_init(&mgr->depool_lock); 1807 isc_mutex_init(&mgr->rpool_lock); 1808 isc_mutex_init(&mgr->dpool_lock); 1809 isc_mutex_init(&mgr->bpool_lock); 1810 isc_mutex_init(&mgr->spool_lock); 1811 1812 mgr->depool = NULL; 1813 isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t), 1814 &mgr->depool); 1815 1816 mgr->rpool = NULL; 1817 isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t), &mgr->rpool); 1818 1819 mgr->dpool = NULL; 1820 isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t), &mgr->dpool); 1821 1822 isc_mempool_setname(mgr->depool, "dispmgr_depool"); 1823 isc_mempool_setmaxalloc(mgr->depool, 32768); 1824 isc_mempool_setfreemax(mgr->depool, 32768); 1825 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock); 1826 isc_mempool_setfillcount(mgr->depool, 32); 1827 1828 isc_mempool_setname(mgr->rpool, "dispmgr_rpool"); 1829 isc_mempool_setmaxalloc(mgr->rpool, 32768); 1830 isc_mempool_setfreemax(mgr->rpool, 32768); 1831 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock); 1832 isc_mempool_setfillcount(mgr->rpool, 32); 1833 1834 isc_mempool_setname(mgr->dpool, "dispmgr_dpool"); 1835 isc_mempool_setmaxalloc(mgr->dpool, 32768); 1836 isc_mempool_setfreemax(mgr->dpool, 32768); 1837 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock); 1838 isc_mempool_setfillcount(mgr->dpool, 32); 1839 1840 mgr->buffers = 0; 1841 mgr->buffersize = 0; 1842 mgr->maxbuffers = 0; 1843 mgr->bpool = NULL; 1844 mgr->spool = NULL; 1845 mgr->qid = NULL; 1846 mgr->state = 0; 1847 ISC_LIST_INIT(mgr->list); 1848 mgr->v4ports = NULL; 1849 mgr->v6ports = NULL; 1850 mgr->nv4ports = 0; 1851 mgr->nv6ports = 0; 1852 mgr->magic = DNS_DISPATCHMGR_MAGIC; 1853 1854 result = create_default_portset(mctx, &v4portset); 1855 if (result == ISC_R_SUCCESS) { 1856 result = create_default_portset(mctx, &v6portset); 1857 if (result == ISC_R_SUCCESS) { 1858 result = dns_dispatchmgr_setavailports(mgr, v4portset, 1859 v6portset); 1860 } 1861 } 1862 if (v4portset != NULL) { 1863 isc_portset_destroy(mctx, &v4portset); 1864 } 1865 if (v6portset != NULL) { 1866 isc_portset_destroy(mctx, &v6portset); 1867 } 1868 if (result != ISC_R_SUCCESS) { 1869 goto kill_dpool; 1870 } 1871 1872 *mgrp = mgr; 1873 return (ISC_R_SUCCESS); 1874 1875 kill_dpool: 1876 isc_mempool_destroy(&mgr->dpool); 1877 isc_mempool_destroy(&mgr->rpool); 1878 isc_mempool_destroy(&mgr->depool); 1879 isc_mutex_destroy(&mgr->spool_lock); 1880 isc_mutex_destroy(&mgr->bpool_lock); 1881 isc_mutex_destroy(&mgr->dpool_lock); 1882 isc_mutex_destroy(&mgr->rpool_lock); 1883 isc_mutex_destroy(&mgr->depool_lock); 1884 isc_mutex_destroy(&mgr->buffer_lock); 1885 isc_mutex_destroy(&mgr->lock); 1886 isc_mem_putanddetach(&mctx, mgr, sizeof(dns_dispatchmgr_t)); 1887 1888 return (result); 1889 } 1890 1891 void 1892 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { 1893 REQUIRE(VALID_DISPATCHMGR(mgr)); 1894 if (mgr->blackhole != NULL) { 1895 dns_acl_detach(&mgr->blackhole); 1896 } 1897 dns_acl_attach(blackhole, &mgr->blackhole); 1898 } 1899 1900 dns_acl_t * 1901 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) { 1902 REQUIRE(VALID_DISPATCHMGR(mgr)); 1903 return (mgr->blackhole); 1904 } 1905 1906 void 1907 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, 1908 dns_portlist_t *portlist) { 1909 REQUIRE(VALID_DISPATCHMGR(mgr)); 1910 UNUSED(portlist); 1911 1912 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ 1913 return; 1914 } 1915 1916 dns_portlist_t * 1917 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { 1918 REQUIRE(VALID_DISPATCHMGR(mgr)); 1919 return (NULL); /* this function is deprecated */ 1920 } 1921 1922 isc_result_t 1923 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, 1924 isc_portset_t *v6portset) { 1925 in_port_t *v4ports, *v6ports, p; 1926 unsigned int nv4ports, nv6ports, i4, i6; 1927 1928 REQUIRE(VALID_DISPATCHMGR(mgr)); 1929 1930 nv4ports = isc_portset_nports(v4portset); 1931 nv6ports = isc_portset_nports(v6portset); 1932 1933 v4ports = NULL; 1934 if (nv4ports != 0) { 1935 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); 1936 } 1937 v6ports = NULL; 1938 if (nv6ports != 0) { 1939 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); 1940 } 1941 1942 p = 0; 1943 i4 = 0; 1944 i6 = 0; 1945 do { 1946 if (isc_portset_isset(v4portset, p)) { 1947 INSIST(i4 < nv4ports); 1948 v4ports[i4++] = p; 1949 } 1950 if (isc_portset_isset(v6portset, p)) { 1951 INSIST(i6 < nv6ports); 1952 v6ports[i6++] = p; 1953 } 1954 } while (p++ < 65535); 1955 INSIST(i4 == nv4ports && i6 == nv6ports); 1956 1957 PORTBUFLOCK(mgr); 1958 if (mgr->v4ports != NULL) { 1959 isc_mem_put(mgr->mctx, mgr->v4ports, 1960 mgr->nv4ports * sizeof(in_port_t)); 1961 } 1962 mgr->v4ports = v4ports; 1963 mgr->nv4ports = nv4ports; 1964 1965 if (mgr->v6ports != NULL) { 1966 isc_mem_put(mgr->mctx, mgr->v6ports, 1967 mgr->nv6ports * sizeof(in_port_t)); 1968 } 1969 mgr->v6ports = v6ports; 1970 mgr->nv6ports = nv6ports; 1971 PORTBUFUNLOCK(mgr); 1972 1973 return (ISC_R_SUCCESS); 1974 } 1975 1976 static isc_result_t 1977 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, unsigned int buffersize, 1978 unsigned int maxbuffers, unsigned int maxrequests, 1979 unsigned int buckets, unsigned int increment) { 1980 isc_result_t result; 1981 1982 REQUIRE(VALID_DISPATCHMGR(mgr)); 1983 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 1984 REQUIRE(maxbuffers > 0); 1985 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 1986 REQUIRE(increment > buckets); 1987 1988 /* 1989 * Keep some number of items around. This should be a config 1990 * option. For now, keep 8, but later keep at least two even 1991 * if the caller wants less. This allows us to ensure certain 1992 * things, like an event can be "freed" and the next allocation 1993 * will always succeed. 1994 * 1995 * Note that if limits are placed on anything here, we use one 1996 * event internally, so the actual limit should be "wanted + 1." 1997 * 1998 * XXXMLG 1999 */ 2000 2001 if (maxbuffers < 8) { 2002 maxbuffers = 8; 2003 } 2004 2005 LOCK(&mgr->buffer_lock); 2006 2007 /* Create or adjust buffer pool */ 2008 if (mgr->bpool != NULL) { 2009 /* 2010 * We only increase the maxbuffers to avoid accidental buffer 2011 * shortage. Ideally we'd separate the manager-wide maximum 2012 * from per-dispatch limits and respect the latter within the 2013 * global limit. But at this moment that's deemed to be 2014 * overkilling and isn't worth additional implementation 2015 * complexity. 2016 */ 2017 if (maxbuffers > mgr->maxbuffers) { 2018 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2019 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2020 mgr->maxbuffers = maxbuffers; 2021 } 2022 } else { 2023 isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool); 2024 isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); 2025 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2026 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2027 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock); 2028 isc_mempool_setfillcount(mgr->bpool, 32); 2029 } 2030 2031 /* Create or adjust socket pool */ 2032 if (mgr->spool != NULL) { 2033 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) { 2034 isc_mempool_setmaxalloc(mgr->spool, 2035 DNS_DISPATCH_POOLSOCKS * 2); 2036 isc_mempool_setfreemax(mgr->spool, 2037 DNS_DISPATCH_POOLSOCKS * 2); 2038 } 2039 UNLOCK(&mgr->buffer_lock); 2040 return (ISC_R_SUCCESS); 2041 } 2042 isc_mempool_create(mgr->mctx, sizeof(dispsocket_t), &mgr->spool); 2043 2044 isc_mempool_setname(mgr->spool, "dispmgr_spool"); 2045 isc_mempool_setmaxalloc(mgr->spool, maxrequests); 2046 isc_mempool_setfreemax(mgr->spool, maxrequests); 2047 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock); 2048 isc_mempool_setfillcount(mgr->spool, 32); 2049 2050 result = qid_allocate(mgr, buckets, increment, &mgr->qid, true); 2051 if (result != ISC_R_SUCCESS) { 2052 goto cleanup; 2053 } 2054 2055 mgr->buffersize = buffersize; 2056 mgr->maxbuffers = maxbuffers; 2057 UNLOCK(&mgr->buffer_lock); 2058 return (ISC_R_SUCCESS); 2059 2060 cleanup: 2061 isc_mempool_destroy(&mgr->bpool); 2062 if (mgr->spool != NULL) { 2063 isc_mempool_destroy(&mgr->spool); 2064 } 2065 UNLOCK(&mgr->buffer_lock); 2066 return (result); 2067 } 2068 2069 void 2070 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { 2071 dns_dispatchmgr_t *mgr; 2072 bool killit; 2073 2074 REQUIRE(mgrp != NULL); 2075 REQUIRE(VALID_DISPATCHMGR(*mgrp)); 2076 2077 mgr = *mgrp; 2078 *mgrp = NULL; 2079 2080 LOCK(&mgr->lock); 2081 mgr->state |= MGR_SHUTTINGDOWN; 2082 killit = destroy_mgr_ok(mgr); 2083 UNLOCK(&mgr->lock); 2084 2085 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit); 2086 2087 if (killit) { 2088 destroy_mgr(&mgr); 2089 } 2090 } 2091 2092 void 2093 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { 2094 REQUIRE(VALID_DISPATCHMGR(mgr)); 2095 REQUIRE(ISC_LIST_EMPTY(mgr->list)); 2096 REQUIRE(mgr->stats == NULL); 2097 2098 isc_stats_attach(stats, &mgr->stats); 2099 } 2100 2101 static int 2102 port_cmp(const void *key, const void *ent) { 2103 in_port_t p1 = *(const in_port_t *)key; 2104 in_port_t p2 = *(const in_port_t *)ent; 2105 2106 if (p1 < p2) { 2107 return (-1); 2108 } else if (p1 == p2) { 2109 return (0); 2110 } else { 2111 return (1); 2112 } 2113 } 2114 2115 static bool 2116 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2117 isc_sockaddr_t *sockaddrp) { 2118 isc_sockaddr_t sockaddr; 2119 isc_result_t result; 2120 in_port_t *ports, port; 2121 unsigned int nports; 2122 bool available = false; 2123 2124 REQUIRE(sock != NULL || sockaddrp != NULL); 2125 2126 PORTBUFLOCK(mgr); 2127 if (sock != NULL) { 2128 sockaddrp = &sockaddr; 2129 result = isc_socket_getsockname(sock, sockaddrp); 2130 if (result != ISC_R_SUCCESS) { 2131 goto unlock; 2132 } 2133 } 2134 2135 if (isc_sockaddr_pf(sockaddrp) == AF_INET) { 2136 ports = mgr->v4ports; 2137 nports = mgr->nv4ports; 2138 } else { 2139 ports = mgr->v6ports; 2140 nports = mgr->nv6ports; 2141 } 2142 if (ports == NULL) { 2143 goto unlock; 2144 } 2145 2146 port = isc_sockaddr_getport(sockaddrp); 2147 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) 2148 { 2149 available = true; 2150 } 2151 2152 unlock: 2153 PORTBUFUNLOCK(mgr); 2154 return (available); 2155 } 2156 2157 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) 2158 2159 static bool 2160 local_addr_match(dns_dispatch_t *disp, const isc_sockaddr_t *addr) { 2161 isc_sockaddr_t sockaddr; 2162 isc_result_t result; 2163 2164 REQUIRE(disp->socket != NULL); 2165 2166 if (addr == NULL) { 2167 return (true); 2168 } 2169 2170 /* 2171 * Don't match wildcard ports unless the port is available in the 2172 * current configuration. 2173 */ 2174 if (isc_sockaddr_getport(addr) == 0 && 2175 isc_sockaddr_getport(&disp->local) == 0 && 2176 !portavailable(disp->mgr, disp->socket, NULL)) 2177 { 2178 return (false); 2179 } 2180 2181 /* 2182 * Check if we match the binding <address,port>. 2183 * Wildcard ports match/fail here. 2184 */ 2185 if (isc_sockaddr_equal(&disp->local, addr)) { 2186 return (true); 2187 } 2188 if (isc_sockaddr_getport(addr) == 0) { 2189 return (false); 2190 } 2191 2192 /* 2193 * Check if we match a bound wildcard port <address,port>. 2194 */ 2195 if (!isc_sockaddr_eqaddr(&disp->local, addr)) { 2196 return (false); 2197 } 2198 result = isc_socket_getsockname(disp->socket, &sockaddr); 2199 if (result != ISC_R_SUCCESS) { 2200 return (false); 2201 } 2202 2203 return (isc_sockaddr_equal(&sockaddr, addr)); 2204 } 2205 2206 /* 2207 * Requires mgr be locked. 2208 * 2209 * No dispatcher can be locked by this thread when calling this function. 2210 * 2211 * 2212 * NOTE: 2213 * If a matching dispatcher is found, it is locked after this function 2214 * returns, and must be unlocked by the caller. 2215 */ 2216 static isc_result_t 2217 dispatch_find(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *local, 2218 unsigned int attributes, unsigned int mask, 2219 dns_dispatch_t **dispp) { 2220 dns_dispatch_t *disp; 2221 isc_result_t result; 2222 2223 /* 2224 * Make certain that we will not match a private or exclusive dispatch. 2225 */ 2226 attributes &= ~(DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); 2227 mask |= (DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); 2228 2229 disp = ISC_LIST_HEAD(mgr->list); 2230 while (disp != NULL) { 2231 LOCK(&disp->lock); 2232 if ((disp->shutting_down == 0) && 2233 ATTRMATCH(disp->attributes, attributes, mask) && 2234 local_addr_match(disp, local)) 2235 { 2236 break; 2237 } 2238 UNLOCK(&disp->lock); 2239 disp = ISC_LIST_NEXT(disp, link); 2240 } 2241 2242 if (disp == NULL) { 2243 result = ISC_R_NOTFOUND; 2244 goto out; 2245 } 2246 2247 *dispp = disp; 2248 result = ISC_R_SUCCESS; 2249 out: 2250 2251 return (result); 2252 } 2253 2254 static isc_result_t 2255 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 2256 unsigned int increment, dns_qid_t **qidp, bool needsocktable) { 2257 dns_qid_t *qid; 2258 unsigned int i; 2259 2260 REQUIRE(VALID_DISPATCHMGR(mgr)); 2261 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2262 REQUIRE(increment > buckets); 2263 REQUIRE(qidp != NULL && *qidp == NULL); 2264 2265 qid = isc_mem_get(mgr->mctx, sizeof(*qid)); 2266 2267 qid->qid_table = isc_mem_get(mgr->mctx, 2268 buckets * sizeof(dns_displist_t)); 2269 2270 qid->sock_table = NULL; 2271 if (needsocktable) { 2272 qid->sock_table = isc_mem_get( 2273 mgr->mctx, buckets * sizeof(dispsocketlist_t)); 2274 } 2275 2276 isc_mutex_init(&qid->lock); 2277 2278 for (i = 0; i < buckets; i++) { 2279 ISC_LIST_INIT(qid->qid_table[i]); 2280 if (qid->sock_table != NULL) { 2281 ISC_LIST_INIT(qid->sock_table[i]); 2282 } 2283 } 2284 2285 qid->qid_nbuckets = buckets; 2286 qid->qid_increment = increment; 2287 qid->magic = QID_MAGIC; 2288 *qidp = qid; 2289 return (ISC_R_SUCCESS); 2290 } 2291 2292 static void 2293 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { 2294 dns_qid_t *qid; 2295 2296 REQUIRE(qidp != NULL); 2297 qid = *qidp; 2298 *qidp = NULL; 2299 2300 REQUIRE(VALID_QID(qid)); 2301 2302 qid->magic = 0; 2303 isc_mem_put(mctx, qid->qid_table, 2304 qid->qid_nbuckets * sizeof(dns_displist_t)); 2305 if (qid->sock_table != NULL) { 2306 isc_mem_put(mctx, qid->sock_table, 2307 qid->qid_nbuckets * sizeof(dispsocketlist_t)); 2308 } 2309 isc_mutex_destroy(&qid->lock); 2310 isc_mem_put(mctx, qid, sizeof(*qid)); 2311 } 2312 2313 /* 2314 * Allocate and set important limits. 2315 */ 2316 static isc_result_t 2317 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, 2318 dns_dispatch_t **dispp) { 2319 dns_dispatch_t *disp; 2320 isc_result_t result; 2321 2322 REQUIRE(VALID_DISPATCHMGR(mgr)); 2323 REQUIRE(dispp != NULL && *dispp == NULL); 2324 2325 /* 2326 * Set up the dispatcher, mostly. Don't bother setting some of 2327 * the options that are controlled by tcp vs. udp, etc. 2328 */ 2329 2330 disp = isc_mempool_get(mgr->dpool); 2331 if (disp == NULL) { 2332 return (ISC_R_NOMEMORY); 2333 } 2334 2335 disp->magic = 0; 2336 disp->mgr = mgr; 2337 disp->maxrequests = maxrequests; 2338 disp->attributes = 0; 2339 ISC_LINK_INIT(disp, link); 2340 disp->refcount = 1; 2341 disp->recv_pending = 0; 2342 memset(&disp->local, 0, sizeof(disp->local)); 2343 memset(&disp->peer, 0, sizeof(disp->peer)); 2344 disp->localport = 0; 2345 disp->shutting_down = 0; 2346 disp->shutdown_out = 0; 2347 disp->connected = 0; 2348 disp->tcpmsg_valid = 0; 2349 disp->shutdown_why = ISC_R_UNEXPECTED; 2350 disp->requests = 0; 2351 disp->tcpbuffers = 0; 2352 disp->qid = NULL; 2353 ISC_LIST_INIT(disp->activesockets); 2354 ISC_LIST_INIT(disp->inactivesockets); 2355 disp->nsockets = 0; 2356 disp->port_table = NULL; 2357 disp->portpool = NULL; 2358 disp->dscp = -1; 2359 2360 isc_mutex_init(&disp->lock); 2361 2362 disp->failsafe_ev = allocate_devent(disp); 2363 if (disp->failsafe_ev == NULL) { 2364 result = ISC_R_NOMEMORY; 2365 goto kill_lock; 2366 } 2367 2368 disp->magic = DISPATCH_MAGIC; 2369 2370 *dispp = disp; 2371 return (ISC_R_SUCCESS); 2372 2373 /* 2374 * error returns 2375 */ 2376 kill_lock: 2377 isc_mutex_destroy(&disp->lock); 2378 isc_mempool_put(mgr->dpool, disp); 2379 2380 return (result); 2381 } 2382 2383 /* 2384 * MUST be unlocked, and not used by anything. 2385 */ 2386 static void 2387 dispatch_free(dns_dispatch_t **dispp) { 2388 dns_dispatch_t *disp; 2389 dns_dispatchmgr_t *mgr; 2390 2391 REQUIRE(VALID_DISPATCH(*dispp)); 2392 disp = *dispp; 2393 *dispp = NULL; 2394 2395 mgr = disp->mgr; 2396 REQUIRE(VALID_DISPATCHMGR(mgr)); 2397 2398 if (disp->tcpmsg_valid) { 2399 dns_tcpmsg_invalidate(&disp->tcpmsg); 2400 disp->tcpmsg_valid = 0; 2401 } 2402 2403 INSIST(disp->tcpbuffers == 0); 2404 INSIST(disp->requests == 0); 2405 INSIST(disp->recv_pending == 0); 2406 INSIST(ISC_LIST_EMPTY(disp->activesockets)); 2407 INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); 2408 2409 isc_mempool_put(mgr->depool, disp->failsafe_ev); 2410 disp->failsafe_ev = NULL; 2411 2412 if (disp->qid != NULL) { 2413 qid_destroy(mgr->mctx, &disp->qid); 2414 } 2415 2416 if (disp->port_table != NULL) { 2417 for (int i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { 2418 INSIST(ISC_LIST_EMPTY(disp->port_table[i])); 2419 } 2420 isc_mem_put(mgr->mctx, disp->port_table, 2421 sizeof(disp->port_table[0]) * 2422 DNS_DISPATCH_PORTTABLESIZE); 2423 } 2424 2425 if (disp->portpool != NULL) { 2426 isc_mempool_destroy(&disp->portpool); 2427 } 2428 2429 disp->mgr = NULL; 2430 isc_mutex_destroy(&disp->lock); 2431 disp->magic = 0; 2432 isc_mempool_put(mgr->dpool, disp); 2433 } 2434 2435 isc_result_t 2436 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2437 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2438 const isc_sockaddr_t *destaddr, unsigned int buffersize, 2439 unsigned int maxbuffers, unsigned int maxrequests, 2440 unsigned int buckets, unsigned int increment, 2441 unsigned int attributes, dns_dispatch_t **dispp) { 2442 isc_result_t result; 2443 dns_dispatch_t *disp; 2444 2445 UNUSED(maxbuffers); 2446 UNUSED(buffersize); 2447 2448 REQUIRE(VALID_DISPATCHMGR(mgr)); 2449 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp); 2450 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0); 2451 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0); 2452 2453 if (destaddr == NULL) { 2454 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ 2455 } 2456 2457 LOCK(&mgr->lock); 2458 2459 /* 2460 * dispatch_allocate() checks mgr for us. 2461 * qid_allocate() checks buckets and increment for us. 2462 */ 2463 disp = NULL; 2464 result = dispatch_allocate(mgr, maxrequests, &disp); 2465 if (result != ISC_R_SUCCESS) { 2466 UNLOCK(&mgr->lock); 2467 return (result); 2468 } 2469 2470 result = qid_allocate(mgr, buckets, increment, &disp->qid, false); 2471 if (result != ISC_R_SUCCESS) { 2472 goto deallocate_dispatch; 2473 } 2474 2475 disp->socktype = isc_sockettype_tcp; 2476 disp->socket = NULL; 2477 isc_socket_attach(sock, &disp->socket); 2478 2479 disp->sepool = NULL; 2480 2481 disp->ntasks = 1; 2482 disp->task[0] = NULL; 2483 result = isc_task_create(taskmgr, 50, &disp->task[0]); 2484 if (result != ISC_R_SUCCESS) { 2485 goto kill_socket; 2486 } 2487 2488 disp->ctlevent = 2489 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, 2490 destroy_disp, disp, sizeof(isc_event_t)); 2491 2492 isc_task_setname(disp->task[0], "tcpdispatch", disp); 2493 2494 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); 2495 disp->tcpmsg_valid = 1; 2496 2497 disp->attributes = attributes; 2498 2499 if (localaddr == NULL) { 2500 if (destaddr != NULL) { 2501 switch (isc_sockaddr_pf(destaddr)) { 2502 case AF_INET: 2503 isc_sockaddr_any(&disp->local); 2504 break; 2505 case AF_INET6: 2506 isc_sockaddr_any6(&disp->local); 2507 break; 2508 } 2509 } 2510 } else { 2511 disp->local = *localaddr; 2512 } 2513 2514 if (destaddr != NULL) { 2515 disp->peer = *destaddr; 2516 } 2517 2518 /* 2519 * Append it to the dispatcher list. 2520 */ 2521 ISC_LIST_APPEND(mgr->list, disp, link); 2522 UNLOCK(&mgr->lock); 2523 2524 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); 2525 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); 2526 *dispp = disp; 2527 2528 return (ISC_R_SUCCESS); 2529 2530 kill_socket: 2531 isc_socket_detach(&disp->socket); 2532 deallocate_dispatch: 2533 dispatch_free(&disp); 2534 2535 UNLOCK(&mgr->lock); 2536 2537 return (result); 2538 } 2539 2540 isc_result_t 2541 dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, 2542 const isc_sockaddr_t *localaddr, bool *connected, 2543 dns_dispatch_t **dispp) { 2544 dns_dispatch_t *disp; 2545 isc_result_t result; 2546 isc_sockaddr_t peeraddr; 2547 isc_sockaddr_t sockname; 2548 unsigned int attributes, mask; 2549 bool match = false; 2550 2551 REQUIRE(VALID_DISPATCHMGR(mgr)); 2552 REQUIRE(destaddr != NULL); 2553 REQUIRE(dispp != NULL && *dispp == NULL); 2554 2555 /* First pass */ 2556 attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED; 2557 mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE | 2558 DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED; 2559 2560 LOCK(&mgr->lock); 2561 disp = ISC_LIST_HEAD(mgr->list); 2562 while (disp != NULL && !match) { 2563 LOCK(&disp->lock); 2564 if ((disp->shutting_down == 0) && 2565 ATTRMATCH(disp->attributes, attributes, mask) && 2566 (localaddr == NULL || 2567 isc_sockaddr_eqaddr(localaddr, &disp->local))) 2568 { 2569 result = isc_socket_getsockname(disp->socket, 2570 &sockname); 2571 if (result == ISC_R_SUCCESS) { 2572 result = isc_socket_getpeername(disp->socket, 2573 &peeraddr); 2574 } 2575 if (result == ISC_R_SUCCESS && 2576 isc_sockaddr_equal(destaddr, &peeraddr) && 2577 (localaddr == NULL || 2578 isc_sockaddr_eqaddr(localaddr, &sockname))) 2579 { 2580 /* attach */ 2581 disp->refcount++; 2582 *dispp = disp; 2583 match = true; 2584 if (connected != NULL) { 2585 *connected = true; 2586 } 2587 } 2588 } 2589 UNLOCK(&disp->lock); 2590 disp = ISC_LIST_NEXT(disp, link); 2591 } 2592 if (match || connected == NULL) { 2593 UNLOCK(&mgr->lock); 2594 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); 2595 } 2596 2597 /* Second pass, only if connected != NULL */ 2598 attributes = DNS_DISPATCHATTR_TCP; 2599 2600 disp = ISC_LIST_HEAD(mgr->list); 2601 while (disp != NULL && !match) { 2602 LOCK(&disp->lock); 2603 if ((disp->shutting_down == 0) && 2604 ATTRMATCH(disp->attributes, attributes, mask) && 2605 (localaddr == NULL || 2606 isc_sockaddr_eqaddr(localaddr, &disp->local)) && 2607 isc_sockaddr_equal(destaddr, &disp->peer)) 2608 { 2609 /* attach */ 2610 disp->refcount++; 2611 *dispp = disp; 2612 match = true; 2613 } 2614 UNLOCK(&disp->lock); 2615 disp = ISC_LIST_NEXT(disp, link); 2616 } 2617 UNLOCK(&mgr->lock); 2618 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); 2619 } 2620 2621 isc_result_t 2622 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2623 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2624 unsigned int buffersize, unsigned int maxbuffers, 2625 unsigned int maxrequests, unsigned int buckets, 2626 unsigned int increment, unsigned int attributes, 2627 unsigned int mask, dns_dispatch_t **dispp, 2628 dns_dispatch_t *dup_dispatch) { 2629 isc_result_t result; 2630 dns_dispatch_t *disp = NULL; 2631 2632 REQUIRE(VALID_DISPATCHMGR(mgr)); 2633 REQUIRE(sockmgr != NULL); 2634 REQUIRE(localaddr != NULL); 2635 REQUIRE(taskmgr != NULL); 2636 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 2637 REQUIRE(maxbuffers > 0); 2638 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2639 REQUIRE(increment > buckets); 2640 REQUIRE(dispp != NULL && *dispp == NULL); 2641 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); 2642 2643 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, 2644 maxrequests, buckets, increment); 2645 if (result != ISC_R_SUCCESS) { 2646 return (result); 2647 } 2648 2649 LOCK(&mgr->lock); 2650 2651 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 2652 REQUIRE(isc_sockaddr_getport(localaddr) == 0); 2653 goto createudp; 2654 } 2655 2656 /* 2657 * See if we have a dispatcher that matches. 2658 */ 2659 if (dup_dispatch == NULL) { 2660 result = dispatch_find(mgr, localaddr, attributes, mask, &disp); 2661 if (result == ISC_R_SUCCESS) { 2662 disp->refcount++; 2663 2664 if (disp->maxrequests < maxrequests) { 2665 disp->maxrequests = maxrequests; 2666 } 2667 2668 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 2669 0 && 2670 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 2671 { 2672 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 2673 if (disp->recv_pending != 0) { 2674 isc_socket_cancel(disp->socket, 2675 disp->task[0], 2676 ISC_SOCKCANCEL_RECV); 2677 } 2678 } 2679 2680 UNLOCK(&disp->lock); 2681 UNLOCK(&mgr->lock); 2682 2683 *dispp = disp; 2684 2685 return (ISC_R_SUCCESS); 2686 } 2687 } 2688 2689 createudp: 2690 /* 2691 * Nope, create one. 2692 */ 2693 result = dispatch_createudp( 2694 mgr, sockmgr, taskmgr, localaddr, maxrequests, attributes, 2695 &disp, dup_dispatch == NULL ? NULL : dup_dispatch->socket); 2696 2697 if (result != ISC_R_SUCCESS) { 2698 UNLOCK(&mgr->lock); 2699 return (result); 2700 } 2701 2702 UNLOCK(&mgr->lock); 2703 *dispp = disp; 2704 2705 return (ISC_R_SUCCESS); 2706 } 2707 2708 isc_result_t 2709 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2710 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2711 unsigned int buffersize, unsigned int maxbuffers, 2712 unsigned int maxrequests, unsigned int buckets, 2713 unsigned int increment, unsigned int attributes, 2714 unsigned int mask, dns_dispatch_t **dispp) { 2715 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, 2716 buffersize, maxbuffers, maxrequests, 2717 buckets, increment, attributes, mask, 2718 dispp, NULL)); 2719 } 2720 2721 /* 2722 * mgr should be locked. 2723 */ 2724 2725 #ifndef DNS_DISPATCH_HELD 2726 #define DNS_DISPATCH_HELD 20U 2727 #endif /* ifndef DNS_DISPATCH_HELD */ 2728 2729 static isc_result_t 2730 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, 2731 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, 2732 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly) { 2733 unsigned int i, j; 2734 isc_socket_t *held[DNS_DISPATCH_HELD]; 2735 isc_sockaddr_t localaddr_bound; 2736 isc_socket_t *sock = NULL; 2737 isc_result_t result = ISC_R_SUCCESS; 2738 bool anyport; 2739 2740 INSIST(sockp != NULL && *sockp == NULL); 2741 2742 localaddr_bound = *localaddr; 2743 anyport = (isc_sockaddr_getport(localaddr) == 0); 2744 2745 if (anyport) { 2746 unsigned int nports; 2747 in_port_t *ports; 2748 2749 /* 2750 * If no port is specified, we first try to pick up a random 2751 * port by ourselves. 2752 */ 2753 if (isc_sockaddr_pf(localaddr) == AF_INET) { 2754 nports = disp->mgr->nv4ports; 2755 ports = disp->mgr->v4ports; 2756 } else { 2757 nports = disp->mgr->nv6ports; 2758 ports = disp->mgr->v6ports; 2759 } 2760 if (nports == 0) { 2761 return (ISC_R_ADDRNOTAVAIL); 2762 } 2763 2764 for (i = 0; i < 1024; i++) { 2765 in_port_t prt; 2766 2767 prt = ports[isc_random_uniform(nports)]; 2768 isc_sockaddr_setport(&localaddr_bound, prt); 2769 result = open_socket(sockmgr, &localaddr_bound, 0, 2770 &sock, NULL, false); 2771 /* 2772 * Continue if the port chosen is already in use 2773 * or the OS has reserved it. 2774 */ 2775 if (result == ISC_R_NOPERM || result == ISC_R_ADDRINUSE) 2776 { 2777 continue; 2778 } 2779 disp->localport = prt; 2780 *sockp = sock; 2781 return (result); 2782 } 2783 2784 /* 2785 * If this fails 1024 times, we then ask the kernel for 2786 * choosing one. 2787 */ 2788 } else { 2789 /* Allow to reuse address for non-random ports. */ 2790 result = open_socket(sockmgr, localaddr, 2791 ISC_SOCKET_REUSEADDRESS, &sock, dup_socket, 2792 duponly); 2793 2794 if (result == ISC_R_SUCCESS) { 2795 *sockp = sock; 2796 } 2797 2798 return (result); 2799 } 2800 2801 memset(held, 0, sizeof(held)); 2802 i = 0; 2803 2804 for (j = 0; j < 0xffffU; j++) { 2805 result = open_socket(sockmgr, localaddr, 0, &sock, NULL, false); 2806 if (result != ISC_R_SUCCESS) { 2807 goto end; 2808 } else if (portavailable(mgr, sock, NULL)) { 2809 break; 2810 } 2811 if (held[i] != NULL) { 2812 isc_socket_detach(&held[i]); 2813 } 2814 held[i++] = sock; 2815 sock = NULL; 2816 if (i == DNS_DISPATCH_HELD) { 2817 i = 0; 2818 } 2819 } 2820 if (j == 0xffffU) { 2821 mgr_log(mgr, ISC_LOG_ERROR, 2822 "avoid-v%s-udp-ports: unable to allocate " 2823 "an available port", 2824 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); 2825 result = ISC_R_FAILURE; 2826 goto end; 2827 } 2828 *sockp = sock; 2829 2830 end: 2831 for (i = 0; i < DNS_DISPATCH_HELD; i++) { 2832 if (held[i] != NULL) { 2833 isc_socket_detach(&held[i]); 2834 } 2835 } 2836 2837 return (result); 2838 } 2839 2840 static isc_result_t 2841 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2842 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2843 unsigned int maxrequests, unsigned int attributes, 2844 dns_dispatch_t **dispp, isc_socket_t *dup_socket) { 2845 isc_result_t result; 2846 dns_dispatch_t *disp; 2847 isc_socket_t *sock = NULL; 2848 int i = 0; 2849 bool duponly = ((attributes & DNS_DISPATCHATTR_CANREUSE) == 0); 2850 2851 /* This is an attribute needed only at creation time */ 2852 attributes &= ~DNS_DISPATCHATTR_CANREUSE; 2853 /* 2854 * dispatch_allocate() checks mgr for us. 2855 */ 2856 disp = NULL; 2857 result = dispatch_allocate(mgr, maxrequests, &disp); 2858 if (result != ISC_R_SUCCESS) { 2859 return (result); 2860 } 2861 2862 disp->socktype = isc_sockettype_udp; 2863 2864 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { 2865 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, 2866 dup_socket, duponly); 2867 if (result != ISC_R_SUCCESS) { 2868 goto deallocate_dispatch; 2869 } 2870 2871 if (isc_log_wouldlog(dns_lctx, 90)) { 2872 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 2873 2874 isc_sockaddr_format(localaddr, addrbuf, 2875 ISC_SOCKADDR_FORMATSIZE); 2876 mgr_log(mgr, LVL(90), 2877 "dns_dispatch_createudp: Created" 2878 " UDP dispatch for %s with socket fd %d", 2879 addrbuf, isc_socket_getfd(sock)); 2880 } 2881 } else { 2882 isc_sockaddr_t sa_any; 2883 2884 /* 2885 * For dispatches using exclusive sockets with a specific 2886 * source address, we only check if the specified address is 2887 * available on the system. Query sockets will be created later 2888 * on demand. 2889 */ 2890 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); 2891 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { 2892 result = open_socket(sockmgr, localaddr, 0, &sock, NULL, 2893 false); 2894 if (sock != NULL) { 2895 isc_socket_detach(&sock); 2896 } 2897 if (result != ISC_R_SUCCESS) { 2898 goto deallocate_dispatch; 2899 } 2900 } 2901 2902 disp->port_table = isc_mem_get( 2903 mgr->mctx, sizeof(disp->port_table[0]) * 2904 DNS_DISPATCH_PORTTABLESIZE); 2905 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { 2906 ISC_LIST_INIT(disp->port_table[i]); 2907 } 2908 2909 isc_mempool_create(mgr->mctx, sizeof(dispportentry_t), 2910 &disp->portpool); 2911 isc_mempool_setname(disp->portpool, "disp_portpool"); 2912 isc_mempool_setfreemax(disp->portpool, 128); 2913 } 2914 disp->socket = sock; 2915 disp->local = *localaddr; 2916 2917 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 2918 disp->ntasks = MAX_INTERNAL_TASKS; 2919 } else { 2920 disp->ntasks = 1; 2921 } 2922 for (i = 0; i < disp->ntasks; i++) { 2923 disp->task[i] = NULL; 2924 result = isc_task_create(taskmgr, 0, &disp->task[i]); 2925 if (result != ISC_R_SUCCESS) { 2926 while (--i >= 0) { 2927 isc_task_shutdown(disp->task[i]); 2928 isc_task_detach(&disp->task[i]); 2929 } 2930 goto kill_socket; 2931 } 2932 isc_task_setname(disp->task[i], "udpdispatch", disp); 2933 } 2934 2935 disp->ctlevent = 2936 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, 2937 destroy_disp, disp, sizeof(isc_event_t)); 2938 2939 disp->sepool = NULL; 2940 isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t), &disp->sepool); 2941 2942 isc_mutex_init(&disp->sepool_lock); 2943 2944 isc_mempool_setname(disp->sepool, "disp_sepool"); 2945 isc_mempool_setmaxalloc(disp->sepool, 32768); 2946 isc_mempool_setfreemax(disp->sepool, 32768); 2947 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock); 2948 isc_mempool_setfillcount(disp->sepool, 16); 2949 2950 attributes &= ~DNS_DISPATCHATTR_TCP; 2951 attributes |= DNS_DISPATCHATTR_UDP; 2952 disp->attributes = attributes; 2953 2954 /* 2955 * Append it to the dispatcher list. 2956 */ 2957 ISC_LIST_APPEND(mgr->list, disp, link); 2958 2959 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); 2960 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ 2961 if (disp->socket != NULL) { 2962 dispatch_log(disp, LVL(90), "created socket %p", disp->socket); 2963 } 2964 2965 *dispp = disp; 2966 2967 return (result); 2968 2969 /* 2970 * Error returns. 2971 */ 2972 kill_socket: 2973 if (disp->socket != NULL) { 2974 isc_socket_detach(&disp->socket); 2975 } 2976 deallocate_dispatch: 2977 dispatch_free(&disp); 2978 2979 return (result); 2980 } 2981 2982 void 2983 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { 2984 REQUIRE(VALID_DISPATCH(disp)); 2985 REQUIRE(dispp != NULL && *dispp == NULL); 2986 2987 LOCK(&disp->lock); 2988 disp->refcount++; 2989 UNLOCK(&disp->lock); 2990 2991 *dispp = disp; 2992 } 2993 2994 /* 2995 * It is important to lock the manager while we are deleting the dispatch, 2996 * since dns_dispatch_getudp will call dispatch_find, which returns to 2997 * the caller a dispatch but does not attach to it until later. _getudp 2998 * locks the manager, however, so locking it here will keep us from attaching 2999 * to a dispatcher that is in the process of going away. 3000 */ 3001 void 3002 dns_dispatch_detach(dns_dispatch_t **dispp) { 3003 dns_dispatch_t *disp; 3004 dispsocket_t *dispsock; 3005 bool killit; 3006 3007 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); 3008 3009 disp = *dispp; 3010 *dispp = NULL; 3011 3012 LOCK(&disp->lock); 3013 3014 INSIST(disp->refcount > 0); 3015 disp->refcount--; 3016 if (disp->refcount == 0) { 3017 if (disp->recv_pending > 0) { 3018 isc_socket_cancel(disp->socket, disp->task[0], 3019 ISC_SOCKCANCEL_RECV); 3020 } 3021 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3022 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) 3023 { 3024 isc_socket_cancel(dispsock->socket, dispsock->task, 3025 ISC_SOCKCANCEL_RECV); 3026 } 3027 disp->shutting_down = 1; 3028 } 3029 3030 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount); 3031 3032 killit = destroy_disp_ok(disp); 3033 UNLOCK(&disp->lock); 3034 if (killit) { 3035 isc_task_send(disp->task[0], &disp->ctlevent); 3036 } 3037 } 3038 3039 isc_result_t 3040 dns_dispatch_addresponse(dns_dispatch_t *disp, unsigned int options, 3041 const isc_sockaddr_t *dest, isc_task_t *task, 3042 isc_taskaction_t action, void *arg, 3043 dns_messageid_t *idp, dns_dispentry_t **resp, 3044 isc_socketmgr_t *sockmgr) { 3045 dns_dispentry_t *res; 3046 unsigned int bucket; 3047 in_port_t localport = 0; 3048 dns_messageid_t id; 3049 int i; 3050 bool ok; 3051 dns_qid_t *qid; 3052 dispsocket_t *dispsocket = NULL; 3053 isc_result_t result; 3054 3055 REQUIRE(VALID_DISPATCH(disp)); 3056 REQUIRE(task != NULL); 3057 REQUIRE(dest != NULL); 3058 REQUIRE(resp != NULL && *resp == NULL); 3059 REQUIRE(idp != NULL); 3060 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3061 REQUIRE(sockmgr != NULL); 3062 } 3063 3064 LOCK(&disp->lock); 3065 3066 if (disp->shutting_down == 1) { 3067 UNLOCK(&disp->lock); 3068 return (ISC_R_SHUTTINGDOWN); 3069 } 3070 3071 if (disp->requests >= disp->maxrequests) { 3072 UNLOCK(&disp->lock); 3073 return (ISC_R_QUOTA); 3074 } 3075 3076 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 3077 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) 3078 { 3079 dispsocket_t *oldestsocket; 3080 dns_dispentry_t *oldestresp; 3081 dns_dispatchevent_t *rev; 3082 3083 /* 3084 * Kill oldest outstanding query if the number of sockets 3085 * exceeds the quota to keep the room for new queries. 3086 */ 3087 oldestsocket = ISC_LIST_HEAD(disp->activesockets); 3088 oldestresp = oldestsocket->resp; 3089 if (oldestresp != NULL && !oldestresp->item_out) { 3090 rev = allocate_devent(oldestresp->disp); 3091 if (rev != NULL) { 3092 rev->buffer.base = NULL; 3093 rev->result = ISC_R_CANCELED; 3094 rev->id = oldestresp->id; 3095 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, 3096 DNS_EVENT_DISPATCH, 3097 oldestresp->action, 3098 oldestresp->arg, oldestresp, 3099 NULL, NULL); 3100 oldestresp->item_out = true; 3101 isc_task_send(oldestresp->task, 3102 ISC_EVENT_PTR(&rev)); 3103 inc_stats(disp->mgr, 3104 dns_resstatscounter_dispabort); 3105 } 3106 } 3107 3108 /* 3109 * Move this entry to the tail so that it won't (easily) be 3110 * examined before actually being canceled. 3111 */ 3112 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); 3113 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); 3114 } 3115 3116 qid = DNS_QID(disp); 3117 3118 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3119 /* 3120 * Get a separate UDP socket with a random port number. 3121 */ 3122 result = get_dispsocket(disp, dest, sockmgr, &dispsocket, 3123 &localport); 3124 if (result != ISC_R_SUCCESS) { 3125 UNLOCK(&disp->lock); 3126 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); 3127 return (result); 3128 } 3129 } else { 3130 localport = disp->localport; 3131 } 3132 3133 /* 3134 * Try somewhat hard to find an unique ID unless FIXEDID is set 3135 * in which case we use the id passed in via *idp. 3136 */ 3137 LOCK(&qid->lock); 3138 if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { 3139 id = *idp; 3140 } else { 3141 id = (dns_messageid_t)isc_random16(); 3142 } 3143 ok = false; 3144 i = 0; 3145 do { 3146 bucket = dns_hash(qid, dest, id, localport); 3147 if (entry_search(qid, dest, id, localport, bucket) == NULL) { 3148 ok = true; 3149 break; 3150 } 3151 if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0) { 3152 break; 3153 } 3154 id += qid->qid_increment; 3155 id &= 0x0000ffff; 3156 } while (i++ < 64); 3157 UNLOCK(&qid->lock); 3158 3159 if (!ok) { 3160 UNLOCK(&disp->lock); 3161 return (ISC_R_NOMORE); 3162 } 3163 3164 res = isc_mempool_get(disp->mgr->rpool); 3165 if (res == NULL) { 3166 if (dispsocket != NULL) { 3167 destroy_dispsocket(disp, &dispsocket); 3168 } 3169 UNLOCK(&disp->lock); 3170 return (ISC_R_NOMEMORY); 3171 } 3172 3173 disp->refcount++; 3174 disp->requests++; 3175 res->task = NULL; 3176 isc_task_attach(task, &res->task); 3177 res->disp = disp; 3178 res->id = id; 3179 res->port = localport; 3180 res->bucket = bucket; 3181 res->host = *dest; 3182 res->action = action; 3183 res->arg = arg; 3184 res->dispsocket = dispsocket; 3185 if (dispsocket != NULL) { 3186 dispsocket->resp = res; 3187 } 3188 res->item_out = false; 3189 ISC_LIST_INIT(res->items); 3190 ISC_LINK_INIT(res, link); 3191 res->magic = RESPONSE_MAGIC; 3192 3193 LOCK(&qid->lock); 3194 ISC_LIST_APPEND(qid->qid_table[bucket], res, link); 3195 UNLOCK(&qid->lock); 3196 3197 inc_stats(disp->mgr, (qid == disp->mgr->qid) 3198 ? dns_resstatscounter_disprequdp 3199 : dns_resstatscounter_dispreqtcp); 3200 3201 request_log(disp, res, LVL(90), "attached to task %p", res->task); 3202 3203 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || 3204 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) 3205 { 3206 result = startrecv(disp, dispsocket); 3207 if (result != ISC_R_SUCCESS) { 3208 LOCK(&qid->lock); 3209 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3210 UNLOCK(&qid->lock); 3211 3212 if (dispsocket != NULL) { 3213 destroy_dispsocket(disp, &dispsocket); 3214 } 3215 3216 disp->refcount--; 3217 disp->requests--; 3218 3219 dec_stats(disp->mgr, 3220 (qid == disp->mgr->qid) 3221 ? dns_resstatscounter_disprequdp 3222 : dns_resstatscounter_dispreqtcp); 3223 3224 UNLOCK(&disp->lock); 3225 isc_task_detach(&res->task); 3226 isc_mempool_put(disp->mgr->rpool, res); 3227 return (result); 3228 } 3229 } 3230 3231 if (dispsocket != NULL) { 3232 ISC_LIST_APPEND(disp->activesockets, dispsocket, link); 3233 } 3234 3235 UNLOCK(&disp->lock); 3236 3237 *idp = id; 3238 *resp = res; 3239 3240 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3241 INSIST(res->dispsocket != NULL); 3242 } 3243 3244 return (ISC_R_SUCCESS); 3245 } 3246 3247 void 3248 dns_dispatch_starttcp(dns_dispatch_t *disp) { 3249 REQUIRE(VALID_DISPATCH(disp)); 3250 3251 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); 3252 3253 LOCK(&disp->lock); 3254 if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) { 3255 disp->attributes |= DNS_DISPATCHATTR_CONNECTED; 3256 (void)startrecv(disp, NULL); 3257 } 3258 UNLOCK(&disp->lock); 3259 } 3260 3261 isc_result_t 3262 dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) { 3263 dns_dispatch_t *disp; 3264 dns_dispatchevent_t *ev; 3265 3266 REQUIRE(VALID_RESPONSE(resp)); 3267 REQUIRE(sockevent != NULL && *sockevent != NULL); 3268 3269 disp = resp->disp; 3270 REQUIRE(VALID_DISPATCH(disp)); 3271 3272 ev = *sockevent; 3273 *sockevent = NULL; 3274 3275 LOCK(&disp->lock); 3276 3277 REQUIRE(resp->item_out); 3278 resp->item_out = false; 3279 3280 if (ev->buffer.base != NULL) { 3281 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3282 } 3283 free_devent(disp, ev); 3284 3285 if (disp->shutting_down == 1) { 3286 UNLOCK(&disp->lock); 3287 return (ISC_R_SHUTTINGDOWN); 3288 } 3289 ev = ISC_LIST_HEAD(resp->items); 3290 if (ev != NULL) { 3291 ISC_LIST_UNLINK(resp->items, ev, ev_link); 3292 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, 3293 resp->action, resp->arg, resp, NULL, NULL); 3294 request_log(disp, resp, LVL(90), 3295 "[c] Sent event %p buffer %p len %d to task %p", ev, 3296 ev->buffer.base, ev->buffer.length, resp->task); 3297 resp->item_out = true; 3298 isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); 3299 } 3300 UNLOCK(&disp->lock); 3301 return (ISC_R_SUCCESS); 3302 } 3303 3304 void 3305 dns_dispatch_removeresponse(dns_dispentry_t **resp, 3306 dns_dispatchevent_t **sockevent) { 3307 dns_dispatchmgr_t *mgr; 3308 dns_dispatch_t *disp; 3309 dns_dispentry_t *res; 3310 dispsocket_t *dispsock; 3311 dns_dispatchevent_t *ev; 3312 unsigned int bucket; 3313 bool killit; 3314 unsigned int n; 3315 isc_eventlist_t events; 3316 dns_qid_t *qid; 3317 3318 REQUIRE(resp != NULL); 3319 REQUIRE(VALID_RESPONSE(*resp)); 3320 3321 res = *resp; 3322 *resp = NULL; 3323 3324 disp = res->disp; 3325 REQUIRE(VALID_DISPATCH(disp)); 3326 mgr = disp->mgr; 3327 REQUIRE(VALID_DISPATCHMGR(mgr)); 3328 3329 qid = DNS_QID(disp); 3330 3331 if (sockevent != NULL) { 3332 REQUIRE(*sockevent != NULL); 3333 ev = *sockevent; 3334 *sockevent = NULL; 3335 } else { 3336 ev = NULL; 3337 } 3338 3339 LOCK(&disp->lock); 3340 3341 INSIST(disp->requests > 0); 3342 disp->requests--; 3343 dec_stats(disp->mgr, (qid == disp->mgr->qid) 3344 ? dns_resstatscounter_disprequdp 3345 : dns_resstatscounter_dispreqtcp); 3346 INSIST(disp->refcount > 0); 3347 disp->refcount--; 3348 if (disp->refcount == 0) { 3349 if (disp->recv_pending > 0) { 3350 isc_socket_cancel(disp->socket, disp->task[0], 3351 ISC_SOCKCANCEL_RECV); 3352 } 3353 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3354 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) 3355 { 3356 isc_socket_cancel(dispsock->socket, dispsock->task, 3357 ISC_SOCKCANCEL_RECV); 3358 } 3359 disp->shutting_down = 1; 3360 } 3361 3362 bucket = res->bucket; 3363 3364 LOCK(&qid->lock); 3365 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3366 UNLOCK(&qid->lock); 3367 3368 if (ev == NULL && res->item_out) { 3369 /* 3370 * We've posted our event, but the caller hasn't gotten it 3371 * yet. Take it back. 3372 */ 3373 ISC_LIST_INIT(events); 3374 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, NULL, 3375 &events); 3376 /* 3377 * We had better have gotten it back. 3378 */ 3379 INSIST(n == 1); 3380 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events); 3381 } 3382 3383 if (ev != NULL) { 3384 REQUIRE(res->item_out); 3385 res->item_out = false; 3386 if (ev->buffer.base != NULL) { 3387 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3388 } 3389 free_devent(disp, ev); 3390 } 3391 3392 request_log(disp, res, LVL(90), "detaching from task %p", res->task); 3393 isc_task_detach(&res->task); 3394 3395 if (res->dispsocket != NULL) { 3396 isc_socket_cancel(res->dispsocket->socket, 3397 res->dispsocket->task, ISC_SOCKCANCEL_RECV); 3398 res->dispsocket->resp = NULL; 3399 } 3400 3401 /* 3402 * Free any buffered responses as well 3403 */ 3404 ev = ISC_LIST_HEAD(res->items); 3405 while (ev != NULL) { 3406 ISC_LIST_UNLINK(res->items, ev, ev_link); 3407 if (ev->buffer.base != NULL) { 3408 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3409 } 3410 free_devent(disp, ev); 3411 ev = ISC_LIST_HEAD(res->items); 3412 } 3413 res->magic = 0; 3414 isc_mempool_put(disp->mgr->rpool, res); 3415 if (disp->shutting_down == 1) { 3416 do_cancel(disp); 3417 } else { 3418 (void)startrecv(disp, NULL); 3419 } 3420 3421 killit = destroy_disp_ok(disp); 3422 UNLOCK(&disp->lock); 3423 if (killit) { 3424 isc_task_send(disp->task[0], &disp->ctlevent); 3425 } 3426 } 3427 3428 /* 3429 * disp must be locked. 3430 */ 3431 static void 3432 do_cancel(dns_dispatch_t *disp) { 3433 dns_dispatchevent_t *ev; 3434 dns_dispentry_t *resp; 3435 dns_qid_t *qid; 3436 3437 if (disp->shutdown_out == 1) { 3438 return; 3439 } 3440 3441 qid = DNS_QID(disp); 3442 3443 /* 3444 * Search for the first response handler without packets outstanding 3445 * unless a specific handler is given. 3446 */ 3447 LOCK(&qid->lock); 3448 for (resp = linear_first(qid); resp != NULL && resp->item_out; 3449 /* Empty. */) 3450 { 3451 resp = linear_next(qid, resp); 3452 } 3453 3454 /* 3455 * No one to send the cancel event to, so nothing to do. 3456 */ 3457 if (resp == NULL) { 3458 goto unlock; 3459 } 3460 3461 /* 3462 * Send the shutdown failsafe event to this resp. 3463 */ 3464 ev = disp->failsafe_ev; 3465 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, 3466 resp->action, resp->arg, resp, NULL, NULL); 3467 ev->result = disp->shutdown_why; 3468 ev->buffer.base = NULL; 3469 ev->buffer.length = 0; 3470 disp->shutdown_out = 1; 3471 request_log(disp, resp, LVL(10), "cancel: failsafe event %p -> task %p", 3472 ev, resp->task); 3473 resp->item_out = true; 3474 isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); 3475 unlock: 3476 UNLOCK(&qid->lock); 3477 } 3478 3479 isc_socket_t * 3480 dns_dispatch_getsocket(dns_dispatch_t *disp) { 3481 REQUIRE(VALID_DISPATCH(disp)); 3482 3483 return (disp->socket); 3484 } 3485 3486 isc_socket_t * 3487 dns_dispatch_getentrysocket(dns_dispentry_t *resp) { 3488 REQUIRE(VALID_RESPONSE(resp)); 3489 3490 if (resp->dispsocket != NULL) { 3491 return (resp->dispsocket->socket); 3492 } else { 3493 return (NULL); 3494 } 3495 } 3496 3497 isc_result_t 3498 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { 3499 REQUIRE(VALID_DISPATCH(disp)); 3500 REQUIRE(addrp != NULL); 3501 3502 if (disp->socktype == isc_sockettype_udp) { 3503 *addrp = disp->local; 3504 return (ISC_R_SUCCESS); 3505 } 3506 return (ISC_R_NOTIMPLEMENTED); 3507 } 3508 3509 void 3510 dns_dispatch_cancel(dns_dispatch_t *disp) { 3511 REQUIRE(VALID_DISPATCH(disp)); 3512 3513 LOCK(&disp->lock); 3514 3515 if (disp->shutting_down == 1) { 3516 UNLOCK(&disp->lock); 3517 return; 3518 } 3519 3520 disp->shutdown_why = ISC_R_CANCELED; 3521 disp->shutting_down = 1; 3522 do_cancel(disp); 3523 3524 UNLOCK(&disp->lock); 3525 3526 return; 3527 } 3528 3529 unsigned int 3530 dns_dispatch_getattributes(dns_dispatch_t *disp) { 3531 REQUIRE(VALID_DISPATCH(disp)); 3532 3533 /* 3534 * We don't bother locking disp here; it's the caller's responsibility 3535 * to use only non volatile flags. 3536 */ 3537 return (disp->attributes); 3538 } 3539 3540 void 3541 dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, 3542 unsigned int mask) { 3543 REQUIRE(VALID_DISPATCH(disp)); 3544 /* Exclusive attribute can only be set on creation */ 3545 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); 3546 /* Also, a dispatch with randomport specified cannot start listening */ 3547 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || 3548 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); 3549 3550 /* XXXMLG 3551 * Should check for valid attributes here! 3552 */ 3553 3554 LOCK(&disp->lock); 3555 3556 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) { 3557 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && 3558 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) 3559 { 3560 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; 3561 (void)startrecv(disp, NULL); 3562 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 3563 0 && 3564 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 3565 { 3566 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 3567 if (disp->recv_pending != 0) { 3568 isc_socket_cancel(disp->socket, disp->task[0], 3569 ISC_SOCKCANCEL_RECV); 3570 } 3571 } 3572 } 3573 3574 disp->attributes &= ~mask; 3575 disp->attributes |= (attributes & mask); 3576 UNLOCK(&disp->lock); 3577 } 3578 3579 void 3580 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { 3581 void *buf; 3582 isc_socketevent_t *sevent, *newsevent; 3583 3584 REQUIRE(VALID_DISPATCH(disp)); 3585 REQUIRE(event != NULL); 3586 3587 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { 3588 return; 3589 } 3590 3591 sevent = (isc_socketevent_t *)event; 3592 INSIST(sevent->n <= disp->mgr->buffersize); 3593 3594 newsevent = (isc_socketevent_t *)isc_event_allocate( 3595 disp->mgr->mctx, NULL, DNS_EVENT_IMPORTRECVDONE, udp_shrecv, 3596 disp, sizeof(isc_socketevent_t)); 3597 3598 buf = allocate_udp_buffer(disp); 3599 if (buf == NULL) { 3600 isc_event_free(ISC_EVENT_PTR(&newsevent)); 3601 return; 3602 } 3603 memmove(buf, sevent->region.base, sevent->n); 3604 newsevent->region.base = buf; 3605 newsevent->region.length = disp->mgr->buffersize; 3606 newsevent->n = sevent->n; 3607 newsevent->result = sevent->result; 3608 newsevent->address = sevent->address; 3609 newsevent->timestamp = sevent->timestamp; 3610 newsevent->pktinfo = sevent->pktinfo; 3611 newsevent->attributes = sevent->attributes; 3612 3613 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); 3614 } 3615 3616 dns_dispatch_t * 3617 dns_dispatchset_get(dns_dispatchset_t *dset) { 3618 dns_dispatch_t *disp; 3619 3620 /* check that dispatch set is configured */ 3621 if (dset == NULL || dset->ndisp == 0) { 3622 return (NULL); 3623 } 3624 3625 LOCK(&dset->lock); 3626 disp = dset->dispatches[dset->cur]; 3627 dset->cur++; 3628 if (dset->cur == dset->ndisp) { 3629 dset->cur = 0; 3630 } 3631 UNLOCK(&dset->lock); 3632 3633 return (disp); 3634 } 3635 3636 isc_result_t 3637 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr, 3638 isc_taskmgr_t *taskmgr, dns_dispatch_t *source, 3639 dns_dispatchset_t **dsetp, int n) { 3640 isc_result_t result; 3641 dns_dispatchset_t *dset; 3642 dns_dispatchmgr_t *mgr; 3643 int i, j; 3644 3645 REQUIRE(VALID_DISPATCH(source)); 3646 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0); 3647 REQUIRE(dsetp != NULL && *dsetp == NULL); 3648 3649 mgr = source->mgr; 3650 3651 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); 3652 memset(dset, 0, sizeof(*dset)); 3653 3654 isc_mutex_init(&dset->lock); 3655 3656 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n); 3657 3658 isc_mem_attach(mctx, &dset->mctx); 3659 dset->ndisp = n; 3660 dset->cur = 0; 3661 3662 dset->dispatches[0] = NULL; 3663 dns_dispatch_attach(source, &dset->dispatches[0]); 3664 3665 LOCK(&mgr->lock); 3666 for (i = 1; i < n; i++) { 3667 dset->dispatches[i] = NULL; 3668 result = dispatch_createudp( 3669 mgr, sockmgr, taskmgr, &source->local, 3670 source->maxrequests, source->attributes, 3671 &dset->dispatches[i], source->socket); 3672 if (result != ISC_R_SUCCESS) { 3673 goto fail; 3674 } 3675 } 3676 3677 UNLOCK(&mgr->lock); 3678 *dsetp = dset; 3679 3680 return (ISC_R_SUCCESS); 3681 3682 fail: 3683 UNLOCK(&mgr->lock); 3684 3685 for (j = 0; j < i; j++) { 3686 dns_dispatch_detach(&(dset->dispatches[j])); 3687 } 3688 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); 3689 if (dset->mctx == mctx) { 3690 isc_mem_detach(&dset->mctx); 3691 } 3692 3693 isc_mutex_destroy(&dset->lock); 3694 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); 3695 return (result); 3696 } 3697 3698 void 3699 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) { 3700 int i; 3701 3702 REQUIRE(dset != NULL); 3703 3704 for (i = 0; i < dset->ndisp; i++) { 3705 isc_socket_t *sock; 3706 sock = dns_dispatch_getsocket(dset->dispatches[i]); 3707 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL); 3708 } 3709 } 3710 3711 void 3712 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { 3713 dns_dispatchset_t *dset; 3714 int i; 3715 3716 REQUIRE(dsetp != NULL && *dsetp != NULL); 3717 3718 dset = *dsetp; 3719 *dsetp = NULL; 3720 for (i = 0; i < dset->ndisp; i++) { 3721 dns_dispatch_detach(&(dset->dispatches[i])); 3722 } 3723 isc_mem_put(dset->mctx, dset->dispatches, 3724 sizeof(dns_dispatch_t *) * dset->ndisp); 3725 isc_mutex_destroy(&dset->lock); 3726 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); 3727 } 3728 3729 void 3730 dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) { 3731 REQUIRE(VALID_DISPATCH(disp)); 3732 disp->dscp = dscp; 3733 } 3734 3735 isc_dscp_t 3736 dns_dispatch_getdscp(dns_dispatch_t *disp) { 3737 REQUIRE(VALID_DISPATCH(disp)); 3738 return (disp->dscp); 3739 } 3740 3741 #if 0 3742 void 3743 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) { 3744 dns_dispatch_t *disp; 3745 char foo[1024]; 3746 3747 disp = ISC_LIST_HEAD(mgr->list); 3748 while (disp != NULL) { 3749 isc_sockaddr_format(&disp->local, foo, sizeof(foo)); 3750 printf("\tdispatch %p, addr %s\n", disp, foo); 3751 disp = ISC_LIST_NEXT(disp, link); 3752 } 3753 } 3754 #endif /* if 0 */ 3755