1 /* $NetBSD: dispatch.c,v 1.7 2021/02/19 16:42:15 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * This Source Code Form is subject to the terms of the Mozilla Public 7 * License, v. 2.0. If a copy of the MPL was not distributed with this 8 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 9 * 10 * See the COPYRIGHT file distributed with this work for additional 11 * information regarding copyright ownership. 12 */ 13 14 /*! \file */ 15 16 #include <inttypes.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <sys/types.h> 20 #include <unistd.h> 21 22 #include <isc/mem.h> 23 #include <isc/mutex.h> 24 #include <isc/portset.h> 25 #include <isc/print.h> 26 #include <isc/random.h> 27 #include <isc/socket.h> 28 #include <isc/stats.h> 29 #include <isc/string.h> 30 #include <isc/task.h> 31 #include <isc/time.h> 32 #include <isc/util.h> 33 34 #include <dns/acl.h> 35 #include <dns/dispatch.h> 36 #include <dns/events.h> 37 #include <dns/log.h> 38 #include <dns/message.h> 39 #include <dns/portlist.h> 40 #include <dns/stats.h> 41 #include <dns/tcpmsg.h> 42 #include <dns/types.h> 43 44 typedef ISC_LIST(dns_dispentry_t) dns_displist_t; 45 46 typedef struct dispsocket dispsocket_t; 47 typedef ISC_LIST(dispsocket_t) dispsocketlist_t; 48 49 typedef struct dispportentry dispportentry_t; 50 typedef ISC_LIST(dispportentry_t) dispportlist_t; 51 52 typedef struct dns_qid { 53 unsigned int magic; 54 unsigned int qid_nbuckets; /*%< hash table size */ 55 unsigned int qid_increment; /*%< id increment on collision */ 56 isc_mutex_t lock; 57 dns_displist_t *qid_table; /*%< the table itself */ 58 dispsocketlist_t *sock_table; /*%< socket table */ 59 } dns_qid_t; 60 61 struct dns_dispatchmgr { 62 /* Unlocked. */ 63 unsigned int magic; 64 isc_mem_t *mctx; 65 dns_acl_t *blackhole; 66 dns_portlist_t *portlist; 67 isc_stats_t *stats; 68 69 /* Locked by "lock". */ 70 isc_mutex_t lock; 71 unsigned int state; 72 ISC_LIST(dns_dispatch_t) list; 73 74 /* locked by buffer_lock */ 75 dns_qid_t *qid; 76 isc_mutex_t buffer_lock; 77 unsigned int buffers; /*%< allocated buffers */ 78 unsigned int buffersize; /*%< size of each buffer */ 79 unsigned int maxbuffers; /*%< max buffers */ 80 81 /* Locked internally. */ 82 isc_mutex_t depool_lock; 83 isc_mempool_t *depool; /*%< pool for dispatch events */ 84 isc_mutex_t rpool_lock; 85 isc_mempool_t *rpool; /*%< pool for replies */ 86 isc_mutex_t dpool_lock; 87 isc_mempool_t *dpool; /*%< dispatch allocations */ 88 isc_mutex_t bpool_lock; 89 isc_mempool_t *bpool; /*%< pool for buffers */ 90 isc_mutex_t spool_lock; 91 isc_mempool_t *spool; /*%< pool for dispsocks */ 92 93 /*% 94 * Locked by qid->lock if qid exists; otherwise, can be used without 95 * being locked. 96 * Memory footprint considerations: this is a simple implementation of 97 * available ports, i.e., an ordered array of the actual port numbers. 98 * This will require about 256KB of memory in the worst case (128KB for 99 * each of IPv4 and IPv6). We could reduce it by representing it as a 100 * more sophisticated way such as a list (or array) of ranges that are 101 * searched to identify a specific port. Our decision here is the saved 102 * memory isn't worth the implementation complexity, considering the 103 * fact that the whole BIND9 process (which is mainly named) already 104 * requires a pretty large memory footprint. We may, however, have to 105 * revisit the decision when we want to use it as a separate module for 106 * an environment where memory requirement is severer. 107 */ 108 in_port_t *v4ports; /*%< available ports for IPv4 */ 109 unsigned int nv4ports; /*%< # of available ports for IPv4 */ 110 in_port_t *v6ports; /*%< available ports for IPv4 */ 111 unsigned int nv6ports; /*%< # of available ports for IPv4 */ 112 }; 113 114 #define MGR_SHUTTINGDOWN 0x00000001U 115 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0) 116 117 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0) 118 119 struct dns_dispentry { 120 unsigned int magic; 121 dns_dispatch_t *disp; 122 dns_messageid_t id; 123 in_port_t port; 124 unsigned int bucket; 125 isc_sockaddr_t host; 126 isc_task_t *task; 127 isc_taskaction_t action; 128 void *arg; 129 bool item_out; 130 dispsocket_t *dispsocket; 131 ISC_LIST(dns_dispatchevent_t) items; 132 ISC_LINK(dns_dispentry_t) link; 133 }; 134 135 /*% 136 * Maximum number of dispatch sockets that can be pooled for reuse. The 137 * appropriate value may vary, but experiments have shown a busy caching server 138 * may need more than 1000 sockets concurrently opened. The maximum allowable 139 * number of dispatch sockets (per manager) will be set to the double of this 140 * value. 141 */ 142 #ifndef DNS_DISPATCH_POOLSOCKS 143 #define DNS_DISPATCH_POOLSOCKS 2048 144 #endif /* ifndef DNS_DISPATCH_POOLSOCKS */ 145 146 /*% 147 * Quota to control the number of dispatch sockets. If a dispatch has more 148 * than the quota of sockets, new queries will purge oldest ones, so that 149 * a massive number of outstanding queries won't prevent subsequent queries 150 * (especially if the older ones take longer time and result in timeout). 151 */ 152 #ifndef DNS_DISPATCH_SOCKSQUOTA 153 #define DNS_DISPATCH_SOCKSQUOTA 3072 154 #endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */ 155 156 struct dispsocket { 157 unsigned int magic; 158 isc_socket_t *socket; 159 dns_dispatch_t *disp; 160 isc_sockaddr_t host; 161 in_port_t localport; /* XXX: should be removed later */ 162 dispportentry_t *portentry; 163 dns_dispentry_t *resp; 164 isc_task_t *task; 165 ISC_LINK(dispsocket_t) link; 166 unsigned int bucket; 167 ISC_LINK(dispsocket_t) blink; 168 }; 169 170 /*% 171 * A port table entry. We remember every port we first open in a table with a 172 * reference counter so that we can 'reuse' the same port (with different 173 * destination addresses) using the SO_REUSEADDR socket option. 174 */ 175 struct dispportentry { 176 in_port_t port; 177 isc_refcount_t refs; 178 ISC_LINK(struct dispportentry) link; 179 }; 180 181 #ifndef DNS_DISPATCH_PORTTABLESIZE 182 #define DNS_DISPATCH_PORTTABLESIZE 1024 183 #endif /* ifndef DNS_DISPATCH_PORTTABLESIZE */ 184 185 #define INVALID_BUCKET (0xffffdead) 186 187 /*% 188 * Number of tasks for each dispatch that use separate sockets for different 189 * transactions. This must be a power of 2 as it will divide 32 bit numbers 190 * to get an uniformly random tasks selection. See get_dispsocket(). 191 */ 192 #define MAX_INTERNAL_TASKS 64 193 194 struct dns_dispatch { 195 /* Unlocked. */ 196 unsigned int magic; /*%< magic */ 197 dns_dispatchmgr_t *mgr; /*%< dispatch manager */ 198 int ntasks; 199 /*% 200 * internal task buckets. We use multiple tasks to distribute various 201 * socket events well when using separate dispatch sockets. We use the 202 * 1st task (task[0]) for internal control events. 203 */ 204 isc_task_t *task[MAX_INTERNAL_TASKS]; 205 isc_socket_t *socket; /*%< isc socket attached to */ 206 isc_sockaddr_t local; /*%< local address */ 207 in_port_t localport; /*%< local UDP port */ 208 isc_sockaddr_t peer; /*%< peer address (TCP) */ 209 isc_dscp_t dscp; /*%< "listen-on" DSCP value */ 210 unsigned int maxrequests; /*%< max requests */ 211 isc_event_t *ctlevent; 212 213 isc_mutex_t sepool_lock; 214 isc_mempool_t *sepool; /*%< pool for socket events */ 215 216 /*% Locked by mgr->lock. */ 217 ISC_LINK(dns_dispatch_t) link; 218 219 /* Locked by "lock". */ 220 isc_mutex_t lock; /*%< locks all below */ 221 isc_sockettype_t socktype; 222 unsigned int attributes; 223 unsigned int refcount; /*%< number of users */ 224 dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */ 225 unsigned int shutting_down : 1, shutdown_out : 1, connected : 1, 226 tcpmsg_valid : 1, recv_pending : 1; /*%< is a 227 * recv() 228 * pending? 229 * */ 230 isc_result_t shutdown_why; 231 ISC_LIST(dispsocket_t) activesockets; 232 ISC_LIST(dispsocket_t) inactivesockets; 233 unsigned int nsockets; 234 unsigned int requests; /*%< how many requests we have */ 235 unsigned int tcpbuffers; /*%< allocated buffers */ 236 dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ 237 dns_qid_t *qid; 238 dispportlist_t *port_table; /*%< hold ports 'owned' by us */ 239 isc_mempool_t *portpool; /*%< port table entries */ 240 }; 241 242 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') 243 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) 244 245 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') 246 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) 247 248 #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') 249 #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) 250 251 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') 252 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) 253 254 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') 255 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) 256 257 #define DNS_QID(disp) \ 258 ((disp)->socktype == isc_sockettype_tcp) ? (disp)->qid \ 259 : (disp)->mgr->qid 260 261 /*% 262 * Locking a query port buffer is a bit tricky. We access the buffer without 263 * locking until qid is created. Technically, there is a possibility of race 264 * between the creation of qid and access to the port buffer; in practice, 265 * however, this should be safe because qid isn't created until the first 266 * dispatch is created and there should be no contending situation until then. 267 */ 268 #define PORTBUFLOCK(mgr) \ 269 if ((mgr)->qid != NULL) \ 270 LOCK(&((mgr)->qid->lock)) 271 #define PORTBUFUNLOCK(mgr) \ 272 if ((mgr)->qid != NULL) \ 273 UNLOCK((&(mgr)->qid->lock)) 274 275 /* 276 * Statics. 277 */ 278 static dns_dispentry_t * 279 entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t, 280 unsigned int); 281 static bool 282 destroy_disp_ok(dns_dispatch_t *); 283 static void 284 destroy_disp(isc_task_t *task, isc_event_t *event); 285 static void 286 destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); 287 static void 288 deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); 289 static void 290 udp_exrecv(isc_task_t *, isc_event_t *); 291 static void 292 udp_shrecv(isc_task_t *, isc_event_t *); 293 static void 294 udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); 295 static void 296 tcp_recv(isc_task_t *, isc_event_t *); 297 static isc_result_t 298 startrecv(dns_dispatch_t *, dispsocket_t *); 299 static uint32_t 300 dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); 301 static void 302 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); 303 static void * 304 allocate_udp_buffer(dns_dispatch_t *disp); 305 static inline void 306 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev); 307 static inline dns_dispatchevent_t * 308 allocate_devent(dns_dispatch_t *disp); 309 static void 310 do_cancel(dns_dispatch_t *disp); 311 static dns_dispentry_t * 312 linear_first(dns_qid_t *disp); 313 static dns_dispentry_t * 314 linear_next(dns_qid_t *disp, dns_dispentry_t *resp); 315 static void 316 dispatch_free(dns_dispatch_t **dispp); 317 static isc_result_t 318 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, 319 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, 320 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly); 321 static isc_result_t 322 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 323 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 324 unsigned int maxrequests, unsigned int attributes, 325 dns_dispatch_t **dispp, isc_socket_t *dup_socket); 326 static bool 327 destroy_mgr_ok(dns_dispatchmgr_t *mgr); 328 static void 329 destroy_mgr(dns_dispatchmgr_t **mgrp); 330 static isc_result_t 331 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 332 unsigned int increment, dns_qid_t **qidp, bool needaddrtable); 333 static void 334 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); 335 static isc_result_t 336 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, 337 unsigned int options, isc_socket_t **sockp, 338 isc_socket_t *dup_socket, bool duponly); 339 static bool 340 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 341 isc_sockaddr_t *sockaddrp); 342 343 #define LVL(x) ISC_LOG_DEBUG(x) 344 345 static void 346 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) 347 ISC_FORMAT_PRINTF(3, 4); 348 349 static void 350 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { 351 char msgbuf[2048]; 352 va_list ap; 353 354 if (!isc_log_wouldlog(dns_lctx, level)) { 355 return; 356 } 357 358 va_start(ap, fmt); 359 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 360 va_end(ap); 361 362 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 363 DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr, 364 msgbuf); 365 } 366 367 static inline void 368 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { 369 if (mgr->stats != NULL) { 370 isc_stats_increment(mgr->stats, counter); 371 } 372 } 373 374 static inline void 375 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { 376 if (mgr->stats != NULL) { 377 isc_stats_decrement(mgr->stats, counter); 378 } 379 } 380 381 static void 382 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) 383 ISC_FORMAT_PRINTF(3, 4); 384 385 static void 386 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { 387 char msgbuf[2048]; 388 va_list ap; 389 390 if (!isc_log_wouldlog(dns_lctx, level)) { 391 return; 392 } 393 394 va_start(ap, fmt); 395 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 396 va_end(ap); 397 398 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 399 DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp, 400 msgbuf); 401 } 402 403 static void 404 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, 405 const char *fmt, ...) ISC_FORMAT_PRINTF(4, 5); 406 407 static void 408 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, 409 const char *fmt, ...) { 410 char msgbuf[2048]; 411 char peerbuf[256]; 412 va_list ap; 413 414 if (!isc_log_wouldlog(dns_lctx, level)) { 415 return; 416 } 417 418 va_start(ap, fmt); 419 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 420 va_end(ap); 421 422 if (VALID_RESPONSE(resp)) { 423 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf)); 424 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 425 DNS_LOGMODULE_DISPATCH, level, 426 "dispatch %p response %p %s: %s", disp, resp, 427 peerbuf, msgbuf); 428 } else { 429 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, 430 DNS_LOGMODULE_DISPATCH, level, 431 "dispatch %p req/resp %p: %s", disp, resp, 432 msgbuf); 433 } 434 } 435 436 /* 437 * Return a hash of the destination and message id. 438 */ 439 static uint32_t 440 dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, 441 in_port_t port) { 442 uint32_t ret; 443 444 ret = isc_sockaddr_hash(dest, true); 445 ret ^= ((uint32_t)id << 16) | port; 446 ret %= qid->qid_nbuckets; 447 448 INSIST(ret < qid->qid_nbuckets); 449 450 return (ret); 451 } 452 453 /* 454 * Find the first entry in 'qid'. Returns NULL if there are no entries. 455 */ 456 static dns_dispentry_t * 457 linear_first(dns_qid_t *qid) { 458 dns_dispentry_t *ret; 459 unsigned int bucket; 460 461 bucket = 0; 462 463 while (bucket < qid->qid_nbuckets) { 464 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 465 if (ret != NULL) { 466 return (ret); 467 } 468 bucket++; 469 } 470 471 return (NULL); 472 } 473 474 /* 475 * Find the next entry after 'resp' in 'qid'. Return NULL if there are 476 * no more entries. 477 */ 478 static dns_dispentry_t * 479 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) { 480 dns_dispentry_t *ret; 481 unsigned int bucket; 482 483 ret = ISC_LIST_NEXT(resp, link); 484 if (ret != NULL) { 485 return (ret); 486 } 487 488 bucket = resp->bucket; 489 bucket++; 490 while (bucket < qid->qid_nbuckets) { 491 ret = ISC_LIST_HEAD(qid->qid_table[bucket]); 492 if (ret != NULL) { 493 return (ret); 494 } 495 bucket++; 496 } 497 498 return (NULL); 499 } 500 501 /* 502 * The dispatch must be locked. 503 */ 504 static bool 505 destroy_disp_ok(dns_dispatch_t *disp) { 506 if (disp->refcount != 0) { 507 return (false); 508 } 509 510 if (disp->recv_pending != 0) { 511 return (false); 512 } 513 514 if (!ISC_LIST_EMPTY(disp->activesockets)) { 515 return (false); 516 } 517 518 if (disp->shutting_down == 0) { 519 return (false); 520 } 521 522 return (true); 523 } 524 525 /* 526 * Called when refcount reaches 0 (and safe to destroy). 527 * 528 * The dispatcher must be locked. 529 * The manager must not be locked. 530 */ 531 static void 532 destroy_disp(isc_task_t *task, isc_event_t *event) { 533 dns_dispatch_t *disp; 534 dns_dispatchmgr_t *mgr; 535 bool killmgr; 536 dispsocket_t *dispsocket; 537 int i; 538 539 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); 540 541 UNUSED(task); 542 543 disp = event->ev_arg; 544 mgr = disp->mgr; 545 546 LOCK(&mgr->lock); 547 ISC_LIST_UNLINK(mgr->list, disp, link); 548 549 dispatch_log(disp, LVL(90), 550 "shutting down; detaching from sock %p, task %p", 551 disp->socket, disp->task[0]); /* XXXX */ 552 553 if (disp->sepool != NULL) { 554 isc_mempool_destroy(&disp->sepool); 555 isc_mutex_destroy(&disp->sepool_lock); 556 } 557 558 if (disp->socket != NULL) { 559 isc_socket_detach(&disp->socket); 560 } 561 while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { 562 ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); 563 destroy_dispsocket(disp, &dispsocket); 564 } 565 for (i = 0; i < disp->ntasks; i++) { 566 isc_task_detach(&disp->task[i]); 567 } 568 isc_event_free(&event); 569 570 dispatch_free(&disp); 571 572 killmgr = destroy_mgr_ok(mgr); 573 UNLOCK(&mgr->lock); 574 if (killmgr) { 575 destroy_mgr(&mgr); 576 } 577 } 578 579 /*% 580 * Manipulate port table per dispatch: find an entry for a given port number, 581 * create a new entry, and decrement a given entry with possible clean-up. 582 */ 583 static dispportentry_t * 584 port_search(dns_dispatch_t *disp, in_port_t port) { 585 dispportentry_t *portentry; 586 587 REQUIRE(disp->port_table != NULL); 588 589 portentry = ISC_LIST_HEAD( 590 disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE]); 591 while (portentry != NULL) { 592 if (portentry->port == port) { 593 return (portentry); 594 } 595 portentry = ISC_LIST_NEXT(portentry, link); 596 } 597 598 return (NULL); 599 } 600 601 static dispportentry_t * 602 new_portentry(dns_dispatch_t *disp, in_port_t port) { 603 dispportentry_t *portentry; 604 dns_qid_t *qid; 605 606 REQUIRE(disp->port_table != NULL); 607 608 portentry = isc_mempool_get(disp->portpool); 609 if (portentry == NULL) { 610 return (portentry); 611 } 612 613 portentry->port = port; 614 isc_refcount_init(&portentry->refs, 1); 615 ISC_LINK_INIT(portentry, link); 616 qid = DNS_QID(disp); 617 LOCK(&qid->lock); 618 ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], 619 portentry, link); 620 UNLOCK(&qid->lock); 621 622 return (portentry); 623 } 624 625 /*% 626 * The caller must hold the qid->lock. 627 */ 628 static void 629 deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { 630 dispportentry_t *portentry = *portentryp; 631 *portentryp = NULL; 632 633 REQUIRE(disp->port_table != NULL); 634 REQUIRE(portentry != NULL); 635 636 if (isc_refcount_decrement(&portentry->refs) == 1) { 637 ISC_LIST_UNLINK(disp->port_table[portentry->port % 638 DNS_DISPATCH_PORTTABLESIZE], 639 portentry, link); 640 isc_mempool_put(disp->portpool, portentry); 641 } 642 } 643 644 /*% 645 * Find a dispsocket for socket address 'dest', and port number 'port'. 646 * Return NULL if no such entry exists. Requires qid->lock to be held. 647 */ 648 static dispsocket_t * 649 socket_search(dns_qid_t *qid, const isc_sockaddr_t *dest, in_port_t port, 650 unsigned int bucket) { 651 dispsocket_t *dispsock; 652 653 REQUIRE(VALID_QID(qid)); 654 REQUIRE(bucket < qid->qid_nbuckets); 655 656 dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); 657 658 while (dispsock != NULL) { 659 if (dispsock->portentry != NULL && 660 dispsock->portentry->port == port && 661 isc_sockaddr_equal(dest, &dispsock->host)) 662 { 663 return (dispsock); 664 } 665 dispsock = ISC_LIST_NEXT(dispsock, blink); 666 } 667 668 return (NULL); 669 } 670 671 /*% 672 * Make a new socket for a single dispatch with a random port number. 673 * The caller must hold the disp->lock 674 */ 675 static isc_result_t 676 get_dispsocket(dns_dispatch_t *disp, const isc_sockaddr_t *dest, 677 isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp, 678 in_port_t *portp) { 679 int i; 680 dns_dispatchmgr_t *mgr = disp->mgr; 681 isc_socket_t *sock = NULL; 682 isc_result_t result = ISC_R_FAILURE; 683 in_port_t port; 684 isc_sockaddr_t localaddr; 685 unsigned int bucket = 0; 686 dispsocket_t *dispsock; 687 unsigned int nports; 688 in_port_t *ports; 689 isc_socket_options_t bindoptions; 690 dispportentry_t *portentry = NULL; 691 dns_qid_t *qid; 692 693 if (isc_sockaddr_pf(&disp->local) == AF_INET) { 694 nports = disp->mgr->nv4ports; 695 ports = disp->mgr->v4ports; 696 } else { 697 nports = disp->mgr->nv6ports; 698 ports = disp->mgr->v6ports; 699 } 700 if (nports == 0) { 701 return (ISC_R_ADDRNOTAVAIL); 702 } 703 704 dispsock = ISC_LIST_HEAD(disp->inactivesockets); 705 if (dispsock != NULL) { 706 ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); 707 sock = dispsock->socket; 708 dispsock->socket = NULL; 709 } else { 710 dispsock = isc_mempool_get(mgr->spool); 711 if (dispsock == NULL) { 712 return (ISC_R_NOMEMORY); 713 } 714 715 disp->nsockets++; 716 dispsock->socket = NULL; 717 dispsock->disp = disp; 718 dispsock->resp = NULL; 719 dispsock->portentry = NULL; 720 dispsock->task = NULL; 721 isc_task_attach(disp->task[isc_random_uniform(disp->ntasks)], 722 &dispsock->task); 723 ISC_LINK_INIT(dispsock, link); 724 ISC_LINK_INIT(dispsock, blink); 725 dispsock->magic = DISPSOCK_MAGIC; 726 } 727 728 /* 729 * Pick up a random UDP port and open a new socket with it. Avoid 730 * choosing ports that share the same destination because it will be 731 * very likely to fail in bind(2) or connect(2). 732 */ 733 localaddr = disp->local; 734 qid = DNS_QID(disp); 735 736 for (i = 0; i < 64; i++) { 737 port = ports[isc_random_uniform(nports)]; 738 isc_sockaddr_setport(&localaddr, port); 739 740 LOCK(&qid->lock); 741 bucket = dns_hash(qid, dest, 0, port); 742 if (socket_search(qid, dest, port, bucket) != NULL) { 743 UNLOCK(&qid->lock); 744 continue; 745 } 746 UNLOCK(&qid->lock); 747 bindoptions = 0; 748 portentry = port_search(disp, port); 749 750 if (portentry != NULL) { 751 bindoptions |= ISC_SOCKET_REUSEADDRESS; 752 } 753 result = open_socket(sockmgr, &localaddr, bindoptions, &sock, 754 NULL, false); 755 if (result == ISC_R_SUCCESS) { 756 if (portentry == NULL) { 757 portentry = new_portentry(disp, port); 758 if (portentry == NULL) { 759 result = ISC_R_NOMEMORY; 760 break; 761 } 762 } else { 763 isc_refcount_increment(&portentry->refs); 764 } 765 break; 766 } else if (result == ISC_R_NOPERM) { 767 char buf[ISC_SOCKADDR_FORMATSIZE]; 768 isc_sockaddr_format(&localaddr, buf, sizeof(buf)); 769 dispatch_log(disp, ISC_LOG_WARNING, 770 "open_socket(%s) -> %s: continuing", buf, 771 isc_result_totext(result)); 772 } else if (result != ISC_R_ADDRINUSE) { 773 break; 774 } 775 } 776 777 if (result == ISC_R_SUCCESS) { 778 dispsock->socket = sock; 779 dispsock->host = *dest; 780 dispsock->bucket = bucket; 781 LOCK(&qid->lock); 782 dispsock->portentry = portentry; 783 ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); 784 UNLOCK(&qid->lock); 785 *dispsockp = dispsock; 786 *portp = port; 787 } else { 788 /* 789 * We could keep it in the inactive list, but since this should 790 * be an exceptional case and might be resource shortage, we'd 791 * rather destroy it. 792 */ 793 if (sock != NULL) { 794 isc_socket_detach(&sock); 795 } 796 destroy_dispsocket(disp, &dispsock); 797 } 798 799 return (result); 800 } 801 802 /*% 803 * Destroy a dedicated dispatch socket. 804 */ 805 static void 806 destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { 807 dispsocket_t *dispsock; 808 dns_qid_t *qid = DNS_QID(disp); 809 810 /* 811 * The dispatch must be locked. 812 */ 813 814 REQUIRE(dispsockp != NULL && *dispsockp != NULL); 815 dispsock = *dispsockp; 816 *dispsockp = NULL; 817 REQUIRE(!ISC_LINK_LINKED(dispsock, link)); 818 819 disp->nsockets--; 820 dispsock->magic = 0; 821 if (dispsock->portentry != NULL) { 822 /* socket_search() tests and dereferences portentry. */ 823 LOCK(&qid->lock); 824 deref_portentry(disp, &dispsock->portentry); 825 UNLOCK(&qid->lock); 826 } 827 if (dispsock->socket != NULL) { 828 isc_socket_detach(&dispsock->socket); 829 } 830 if (ISC_LINK_LINKED(dispsock, blink)) { 831 LOCK(&qid->lock); 832 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 833 blink); 834 UNLOCK(&qid->lock); 835 } 836 if (dispsock->task != NULL) { 837 isc_task_detach(&dispsock->task); 838 } 839 isc_mempool_put(disp->mgr->spool, dispsock); 840 } 841 842 /*% 843 * Deactivate a dedicated dispatch socket. Move it to the inactive list for 844 * future reuse unless the total number of sockets are exceeding the maximum. 845 */ 846 static void 847 deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { 848 isc_result_t result; 849 dns_qid_t *qid = DNS_QID(disp); 850 851 /* 852 * The dispatch must be locked. 853 */ 854 ISC_LIST_UNLINK(disp->activesockets, dispsock, link); 855 if (dispsock->resp != NULL) { 856 INSIST(dispsock->resp->dispsocket == dispsock); 857 dispsock->resp->dispsocket = NULL; 858 } 859 860 INSIST(dispsock->portentry != NULL); 861 /* socket_search() tests and dereferences portentry. */ 862 LOCK(&qid->lock); 863 deref_portentry(disp, &dispsock->portentry); 864 UNLOCK(&qid->lock); 865 866 if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) { 867 destroy_dispsocket(disp, &dispsock); 868 } else { 869 result = isc_socket_close(dispsock->socket); 870 871 LOCK(&qid->lock); 872 ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, 873 blink); 874 UNLOCK(&qid->lock); 875 876 if (result == ISC_R_SUCCESS) { 877 ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); 878 } else { 879 /* 880 * If the underlying system does not allow this 881 * optimization, destroy this temporary structure (and 882 * create a new one for a new transaction). 883 */ 884 INSIST(result == ISC_R_NOTIMPLEMENTED); 885 destroy_dispsocket(disp, &dispsock); 886 } 887 } 888 } 889 890 /* 891 * Find an entry for query ID 'id', socket address 'dest', and port number 892 * 'port'. 893 * Return NULL if no such entry exists. 894 */ 895 static dns_dispentry_t * 896 entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, 897 in_port_t port, unsigned int bucket) { 898 dns_dispentry_t *res; 899 900 REQUIRE(VALID_QID(qid)); 901 REQUIRE(bucket < qid->qid_nbuckets); 902 903 res = ISC_LIST_HEAD(qid->qid_table[bucket]); 904 905 while (res != NULL) { 906 if (res->id == id && isc_sockaddr_equal(dest, &res->host) && 907 res->port == port) { 908 return (res); 909 } 910 res = ISC_LIST_NEXT(res, link); 911 } 912 913 return (NULL); 914 } 915 916 static void 917 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) { 918 isc_mempool_t *bpool; 919 INSIST(buf != NULL && len != 0); 920 921 switch (disp->socktype) { 922 case isc_sockettype_tcp: 923 INSIST(disp->tcpbuffers > 0); 924 disp->tcpbuffers--; 925 isc_mem_put(disp->mgr->mctx, buf, len); 926 break; 927 case isc_sockettype_udp: 928 LOCK(&disp->mgr->buffer_lock); 929 INSIST(disp->mgr->buffers > 0); 930 INSIST(len == disp->mgr->buffersize); 931 disp->mgr->buffers--; 932 bpool = disp->mgr->bpool; 933 UNLOCK(&disp->mgr->buffer_lock); 934 isc_mempool_put(bpool, buf); 935 break; 936 default: 937 INSIST(0); 938 ISC_UNREACHABLE(); 939 } 940 } 941 942 static void * 943 allocate_udp_buffer(dns_dispatch_t *disp) { 944 isc_mempool_t *bpool; 945 void *temp; 946 947 LOCK(&disp->mgr->buffer_lock); 948 if (disp->mgr->buffers >= disp->mgr->maxbuffers) { 949 UNLOCK(&disp->mgr->buffer_lock); 950 return (NULL); 951 } 952 bpool = disp->mgr->bpool; 953 disp->mgr->buffers++; 954 UNLOCK(&disp->mgr->buffer_lock); 955 956 temp = isc_mempool_get(bpool); 957 958 if (temp == NULL) { 959 LOCK(&disp->mgr->buffer_lock); 960 disp->mgr->buffers--; 961 UNLOCK(&disp->mgr->buffer_lock); 962 } 963 964 return (temp); 965 } 966 967 static inline void 968 free_sevent(isc_event_t *ev) { 969 isc_mempool_t *pool = ev->ev_destroy_arg; 970 isc_socketevent_t *sev = (isc_socketevent_t *)ev; 971 isc_mempool_put(pool, sev); 972 } 973 974 static inline isc_socketevent_t * 975 allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock, isc_eventtype_t type, 976 isc_taskaction_t action, const void *arg) { 977 isc_socketevent_t *ev; 978 void *deconst_arg; 979 980 ev = isc_mempool_get(disp->sepool); 981 if (ev == NULL) { 982 return (NULL); 983 } 984 DE_CONST(arg, deconst_arg); 985 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, action, deconst_arg, 986 sock, free_sevent, disp->sepool); 987 ev->result = ISC_R_UNSET; 988 ISC_LINK_INIT(ev, ev_link); 989 ev->region.base = NULL; 990 ev->n = 0; 991 ev->offset = 0; 992 ev->attributes = 0; 993 994 return (ev); 995 } 996 997 static inline void 998 free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) { 999 if (disp->failsafe_ev == ev) { 1000 INSIST(disp->shutdown_out == 1); 1001 disp->shutdown_out = 0; 1002 1003 return; 1004 } 1005 1006 isc_mempool_put(disp->mgr->depool, ev); 1007 } 1008 1009 static inline dns_dispatchevent_t * 1010 allocate_devent(dns_dispatch_t *disp) { 1011 dns_dispatchevent_t *ev; 1012 1013 ev = isc_mempool_get(disp->mgr->depool); 1014 if (ev == NULL) { 1015 return (NULL); 1016 } 1017 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, NULL, NULL, NULL, NULL, 1018 NULL); 1019 1020 return (ev); 1021 } 1022 1023 static void 1024 udp_exrecv(isc_task_t *task, isc_event_t *ev) { 1025 dispsocket_t *dispsock = ev->ev_arg; 1026 1027 UNUSED(task); 1028 1029 REQUIRE(VALID_DISPSOCK(dispsock)); 1030 udp_recv(ev, dispsock->disp, dispsock); 1031 } 1032 1033 static void 1034 udp_shrecv(isc_task_t *task, isc_event_t *ev) { 1035 dns_dispatch_t *disp = ev->ev_arg; 1036 1037 UNUSED(task); 1038 1039 REQUIRE(VALID_DISPATCH(disp)); 1040 udp_recv(ev, disp, NULL); 1041 } 1042 1043 /* 1044 * General flow: 1045 * 1046 * If I/O result == CANCELED or error, free the buffer. 1047 * 1048 * If query, free the buffer, restart. 1049 * 1050 * If response: 1051 * Allocate event, fill in details. 1052 * If cannot allocate, free buffer, restart. 1053 * find target. If not found, free buffer, restart. 1054 * if event queue is not empty, queue. else, send. 1055 * restart. 1056 */ 1057 static void 1058 udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { 1059 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; 1060 dns_messageid_t id; 1061 isc_result_t dres; 1062 isc_buffer_t source; 1063 unsigned int flags; 1064 dns_dispentry_t *resp = NULL; 1065 dns_dispatchevent_t *rev; 1066 unsigned int bucket; 1067 bool killit; 1068 bool queue_response; 1069 dns_dispatchmgr_t *mgr; 1070 dns_qid_t *qid; 1071 isc_netaddr_t netaddr; 1072 int match; 1073 int result; 1074 bool qidlocked = false; 1075 1076 LOCK(&disp->lock); 1077 1078 mgr = disp->mgr; 1079 qid = mgr->qid; 1080 1081 LOCK(&disp->mgr->buffer_lock); 1082 dispatch_log(disp, LVL(90), 1083 "got packet: requests %d, buffers %d, recvs %d", 1084 disp->requests, disp->mgr->buffers, disp->recv_pending); 1085 UNLOCK(&disp->mgr->buffer_lock); 1086 1087 if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { 1088 /* 1089 * Unless the receive event was imported from a listening 1090 * interface, in which case the event type is 1091 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending. 1092 */ 1093 INSIST(disp->recv_pending != 0); 1094 disp->recv_pending = 0; 1095 } 1096 1097 if (dispsock != NULL && 1098 (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) 1099 { 1100 /* 1101 * dispsock->resp can be NULL if this transaction was canceled 1102 * just after receiving a response. Since this socket is 1103 * exclusively used and there should be at most one receive 1104 * event the canceled event should have been no effect. So 1105 * we can (and should) deactivate the socket right now. 1106 */ 1107 deactivate_dispsocket(disp, dispsock); 1108 dispsock = NULL; 1109 } 1110 1111 if (disp->shutting_down) { 1112 /* 1113 * This dispatcher is shutting down. 1114 */ 1115 free_buffer(disp, ev->region.base, ev->region.length); 1116 1117 isc_event_free(&ev_in); 1118 ev = NULL; 1119 1120 killit = destroy_disp_ok(disp); 1121 UNLOCK(&disp->lock); 1122 if (killit) { 1123 isc_task_send(disp->task[0], &disp->ctlevent); 1124 } 1125 1126 return; 1127 } 1128 1129 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 1130 if (dispsock != NULL) { 1131 resp = dispsock->resp; 1132 id = resp->id; 1133 if (ev->result != ISC_R_SUCCESS) { 1134 /* 1135 * This is most likely a network error on a 1136 * connected socket. It makes no sense to 1137 * check the address or parse the packet, but it 1138 * will help to return the error to the caller. 1139 */ 1140 goto sendresponse; 1141 } 1142 } else { 1143 free_buffer(disp, ev->region.base, ev->region.length); 1144 1145 isc_event_free(&ev_in); 1146 UNLOCK(&disp->lock); 1147 return; 1148 } 1149 } else if (ev->result != ISC_R_SUCCESS) { 1150 free_buffer(disp, ev->region.base, ev->region.length); 1151 1152 if (ev->result != ISC_R_CANCELED) { 1153 dispatch_log(disp, ISC_LOG_ERROR, 1154 "odd socket result in udp_recv(): %s", 1155 isc_result_totext(ev->result)); 1156 } 1157 1158 isc_event_free(&ev_in); 1159 UNLOCK(&disp->lock); 1160 return; 1161 } 1162 1163 /* 1164 * If this is from a blackholed address, drop it. 1165 */ 1166 isc_netaddr_fromsockaddr(&netaddr, &ev->address); 1167 if (disp->mgr->blackhole != NULL && 1168 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match, 1169 NULL) == ISC_R_SUCCESS && 1170 match > 0) 1171 { 1172 if (isc_log_wouldlog(dns_lctx, LVL(10))) { 1173 char netaddrstr[ISC_NETADDR_FORMATSIZE]; 1174 isc_netaddr_format(&netaddr, netaddrstr, 1175 sizeof(netaddrstr)); 1176 dispatch_log(disp, LVL(10), "blackholed packet from %s", 1177 netaddrstr); 1178 } 1179 free_buffer(disp, ev->region.base, ev->region.length); 1180 goto restart; 1181 } 1182 1183 /* 1184 * Peek into the buffer to see what we can see. 1185 */ 1186 isc_buffer_init(&source, ev->region.base, ev->region.length); 1187 isc_buffer_add(&source, ev->n); 1188 dres = dns_message_peekheader(&source, &id, &flags); 1189 if (dres != ISC_R_SUCCESS) { 1190 free_buffer(disp, ev->region.base, ev->region.length); 1191 dispatch_log(disp, LVL(10), "got garbage packet"); 1192 goto restart; 1193 } 1194 1195 dispatch_log(disp, LVL(92), 1196 "got valid DNS message header, /QR %c, id %u", 1197 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); 1198 1199 /* 1200 * Look at flags. If query, drop it. If response, 1201 * look to see where it goes. 1202 */ 1203 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1204 /* query */ 1205 free_buffer(disp, ev->region.base, ev->region.length); 1206 goto restart; 1207 } 1208 1209 /* 1210 * Search for the corresponding response. If we are using an exclusive 1211 * socket, we've already identified it and we can skip the search; but 1212 * the ID and the address must match the expected ones. 1213 */ 1214 if (resp == NULL) { 1215 bucket = dns_hash(qid, &ev->address, id, disp->localport); 1216 LOCK(&qid->lock); 1217 qidlocked = true; 1218 resp = entry_search(qid, &ev->address, id, disp->localport, 1219 bucket); 1220 dispatch_log(disp, LVL(90), 1221 "search for response in bucket %d: %s", bucket, 1222 (resp == NULL ? "not found" : "found")); 1223 1224 } else if (resp->id != id || 1225 !isc_sockaddr_equal(&ev->address, &resp->host)) { 1226 dispatch_log(disp, LVL(90), 1227 "response to an exclusive socket doesn't match"); 1228 inc_stats(mgr, dns_resstatscounter_mismatch); 1229 free_buffer(disp, ev->region.base, ev->region.length); 1230 goto unlock; 1231 } 1232 1233 if (resp == NULL) { 1234 inc_stats(mgr, dns_resstatscounter_mismatch); 1235 free_buffer(disp, ev->region.base, ev->region.length); 1236 goto unlock; 1237 } 1238 1239 /* 1240 * Now that we have the original dispatch the query was sent 1241 * from check that the address and port the response was 1242 * sent to make sense. 1243 */ 1244 if (disp != resp->disp) { 1245 isc_sockaddr_t a1; 1246 isc_sockaddr_t a2; 1247 1248 /* 1249 * Check that the socket types and ports match. 1250 */ 1251 if (disp->socktype != resp->disp->socktype || 1252 isc_sockaddr_getport(&disp->local) != 1253 isc_sockaddr_getport(&resp->disp->local)) 1254 { 1255 free_buffer(disp, ev->region.base, ev->region.length); 1256 goto unlock; 1257 } 1258 1259 /* 1260 * If each dispatch is bound to a different address 1261 * then fail. 1262 * 1263 * Note under Linux a packet can be sent out via IPv4 socket 1264 * and the response be received via a IPv6 socket. 1265 * 1266 * Requests sent out via IPv6 should always come back in 1267 * via IPv6. 1268 */ 1269 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 && 1270 isc_sockaddr_pf(&disp->local) != PF_INET6) 1271 { 1272 free_buffer(disp, ev->region.base, ev->region.length); 1273 goto unlock; 1274 } 1275 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local)); 1276 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local)); 1277 if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) && 1278 !isc_sockaddr_eqaddr(&a1, &resp->disp->local) && 1279 !isc_sockaddr_eqaddr(&a2, &disp->local)) 1280 { 1281 free_buffer(disp, ev->region.base, ev->region.length); 1282 goto unlock; 1283 } 1284 } 1285 1286 sendresponse: 1287 queue_response = resp->item_out; 1288 rev = allocate_devent(resp->disp); 1289 if (rev == NULL) { 1290 free_buffer(disp, ev->region.base, ev->region.length); 1291 goto unlock; 1292 } 1293 1294 /* 1295 * At this point, rev contains the event we want to fill in, and 1296 * resp contains the information on the place to send it to. 1297 * Send the event off. 1298 */ 1299 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); 1300 isc_buffer_add(&rev->buffer, ev->n); 1301 rev->result = ev->result; 1302 rev->id = id; 1303 rev->addr = ev->address; 1304 rev->pktinfo = ev->pktinfo; 1305 rev->attributes = ev->attributes; 1306 if (queue_response) { 1307 ISC_LIST_APPEND(resp->items, rev, ev_link); 1308 } else { 1309 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, 1310 resp->action, resp->arg, resp, NULL, NULL); 1311 request_log(disp, resp, LVL(90), 1312 "[a] Sent event %p buffer %p len %d to task %p", 1313 rev, rev->buffer.base, rev->buffer.length, 1314 resp->task); 1315 resp->item_out = true; 1316 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1317 } 1318 unlock: 1319 if (qidlocked) { 1320 UNLOCK(&qid->lock); 1321 } 1322 1323 /* 1324 * Restart recv() to get the next packet. 1325 */ 1326 restart: 1327 result = startrecv(disp, dispsock); 1328 if (result != ISC_R_SUCCESS && dispsock != NULL) { 1329 /* 1330 * XXX: wired. There seems to be no recovery process other than 1331 * deactivate this socket anyway (since we cannot start 1332 * receiving, we won't be able to receive a cancel event 1333 * from the user). 1334 */ 1335 deactivate_dispsocket(disp, dispsock); 1336 } 1337 isc_event_free(&ev_in); 1338 UNLOCK(&disp->lock); 1339 } 1340 1341 /* 1342 * General flow: 1343 * 1344 * If I/O result == CANCELED, EOF, or error, notify everyone as the 1345 * various queues drain. 1346 * 1347 * If query, restart. 1348 * 1349 * If response: 1350 * Allocate event, fill in details. 1351 * If cannot allocate, restart. 1352 * find target. If not found, restart. 1353 * if event queue is not empty, queue. else, send. 1354 * restart. 1355 */ 1356 static void 1357 tcp_recv(isc_task_t *task, isc_event_t *ev_in) { 1358 dns_dispatch_t *disp = ev_in->ev_arg; 1359 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg; 1360 dns_messageid_t id; 1361 isc_result_t dres; 1362 unsigned int flags; 1363 dns_dispentry_t *resp; 1364 dns_dispatchevent_t *rev; 1365 unsigned int bucket; 1366 bool killit; 1367 bool queue_response; 1368 dns_qid_t *qid; 1369 int level; 1370 char buf[ISC_SOCKADDR_FORMATSIZE]; 1371 1372 UNUSED(task); 1373 1374 REQUIRE(VALID_DISPATCH(disp)); 1375 1376 qid = disp->qid; 1377 1378 LOCK(&disp->lock); 1379 1380 dispatch_log(disp, LVL(90), 1381 "got TCP packet: requests %d, buffers %d, recvs %d", 1382 disp->requests, disp->tcpbuffers, disp->recv_pending); 1383 1384 INSIST(disp->recv_pending != 0); 1385 disp->recv_pending = 0; 1386 1387 if (disp->refcount == 0) { 1388 /* 1389 * This dispatcher is shutting down. Force cancellation. 1390 */ 1391 tcpmsg->result = ISC_R_CANCELED; 1392 } 1393 1394 if (tcpmsg->result != ISC_R_SUCCESS) { 1395 switch (tcpmsg->result) { 1396 case ISC_R_CANCELED: 1397 break; 1398 1399 case ISC_R_EOF: 1400 dispatch_log(disp, LVL(90), "shutting down on EOF"); 1401 do_cancel(disp); 1402 break; 1403 1404 case ISC_R_CONNECTIONRESET: 1405 level = ISC_LOG_INFO; 1406 goto logit; 1407 1408 default: 1409 level = ISC_LOG_ERROR; 1410 logit: 1411 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf)); 1412 dispatch_log(disp, level, 1413 "shutting down due to TCP " 1414 "receive error: %s: %s", 1415 buf, isc_result_totext(tcpmsg->result)); 1416 do_cancel(disp); 1417 break; 1418 } 1419 1420 /* 1421 * The event is statically allocated in the tcpmsg 1422 * structure, and destroy_disp() frees the tcpmsg, so we must 1423 * free the event *before* calling destroy_disp(). 1424 */ 1425 isc_event_free(&ev_in); 1426 1427 disp->shutting_down = 1; 1428 disp->shutdown_why = tcpmsg->result; 1429 1430 /* 1431 * If the recv() was canceled pass the word on. 1432 */ 1433 killit = destroy_disp_ok(disp); 1434 UNLOCK(&disp->lock); 1435 if (killit) { 1436 isc_task_send(disp->task[0], &disp->ctlevent); 1437 } 1438 return; 1439 } 1440 1441 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p", 1442 tcpmsg->result, tcpmsg->buffer.length, 1443 tcpmsg->buffer.base); 1444 1445 /* 1446 * Peek into the buffer to see what we can see. 1447 */ 1448 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags); 1449 if (dres != ISC_R_SUCCESS) { 1450 dispatch_log(disp, LVL(10), "got garbage packet"); 1451 goto restart; 1452 } 1453 1454 dispatch_log(disp, LVL(92), 1455 "got valid DNS message header, /QR %c, id %u", 1456 (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); 1457 1458 /* 1459 * Allocate an event to send to the query or response client, and 1460 * allocate a new buffer for our use. 1461 */ 1462 1463 /* 1464 * Look at flags. If query, drop it. If response, 1465 * look to see where it goes. 1466 */ 1467 if ((flags & DNS_MESSAGEFLAG_QR) == 0) { 1468 /* 1469 * Query. 1470 */ 1471 goto restart; 1472 } 1473 1474 /* 1475 * Response. 1476 */ 1477 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); 1478 LOCK(&qid->lock); 1479 resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); 1480 dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", 1481 bucket, (resp == NULL ? "not found" : "found")); 1482 1483 if (resp == NULL) { 1484 goto unlock; 1485 } 1486 queue_response = resp->item_out; 1487 rev = allocate_devent(disp); 1488 if (rev == NULL) { 1489 goto unlock; 1490 } 1491 1492 /* 1493 * At this point, rev contains the event we want to fill in, and 1494 * resp contains the information on the place to send it to. 1495 * Send the event off. 1496 */ 1497 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer); 1498 disp->tcpbuffers++; 1499 rev->result = ISC_R_SUCCESS; 1500 rev->id = id; 1501 rev->addr = tcpmsg->address; 1502 if (queue_response) { 1503 ISC_LIST_APPEND(resp->items, rev, ev_link); 1504 } else { 1505 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, 1506 resp->action, resp->arg, resp, NULL, NULL); 1507 request_log(disp, resp, LVL(90), 1508 "[b] Sent event %p buffer %p len %d to task %p", 1509 rev, rev->buffer.base, rev->buffer.length, 1510 resp->task); 1511 resp->item_out = true; 1512 isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); 1513 } 1514 unlock: 1515 UNLOCK(&qid->lock); 1516 1517 /* 1518 * Restart recv() to get the next packet. 1519 */ 1520 restart: 1521 (void)startrecv(disp, NULL); 1522 1523 isc_event_free(&ev_in); 1524 UNLOCK(&disp->lock); 1525 } 1526 1527 /* 1528 * disp must be locked. 1529 */ 1530 static isc_result_t 1531 startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { 1532 isc_result_t res; 1533 isc_region_t region; 1534 isc_socket_t *sock; 1535 1536 if (disp->shutting_down == 1) { 1537 return (ISC_R_SUCCESS); 1538 } 1539 1540 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { 1541 return (ISC_R_SUCCESS); 1542 } 1543 1544 if (disp->recv_pending != 0 && dispsock == NULL) { 1545 return (ISC_R_SUCCESS); 1546 } 1547 1548 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 1549 dispsock == NULL) { 1550 return (ISC_R_SUCCESS); 1551 } 1552 1553 if (dispsock != NULL) { 1554 sock = dispsock->socket; 1555 } else { 1556 sock = disp->socket; 1557 } 1558 INSIST(sock != NULL); 1559 1560 switch (disp->socktype) { 1561 /* 1562 * UDP reads are always maximal. 1563 */ 1564 case isc_sockettype_udp: 1565 region.length = disp->mgr->buffersize; 1566 region.base = allocate_udp_buffer(disp); 1567 if (region.base == NULL) { 1568 return (ISC_R_NOMEMORY); 1569 } 1570 if (dispsock != NULL) { 1571 isc_task_t *dt = dispsock->task; 1572 isc_socketevent_t *sev = allocate_sevent( 1573 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_exrecv, 1574 dispsock); 1575 if (sev == NULL) { 1576 free_buffer(disp, region.base, region.length); 1577 return (ISC_R_NOMEMORY); 1578 } 1579 1580 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); 1581 if (res != ISC_R_SUCCESS) { 1582 free_buffer(disp, region.base, region.length); 1583 return (res); 1584 } 1585 } else { 1586 isc_task_t *dt = disp->task[0]; 1587 isc_socketevent_t *sev = allocate_sevent( 1588 disp, sock, ISC_SOCKEVENT_RECVDONE, udp_shrecv, 1589 disp); 1590 if (sev == NULL) { 1591 free_buffer(disp, region.base, region.length); 1592 return (ISC_R_NOMEMORY); 1593 } 1594 1595 res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); 1596 if (res != ISC_R_SUCCESS) { 1597 free_buffer(disp, region.base, region.length); 1598 disp->shutdown_why = res; 1599 disp->shutting_down = 1; 1600 do_cancel(disp); 1601 return (ISC_R_SUCCESS); /* recover by cancel */ 1602 } 1603 INSIST(disp->recv_pending == 0); 1604 disp->recv_pending = 1; 1605 } 1606 break; 1607 1608 case isc_sockettype_tcp: 1609 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], 1610 tcp_recv, disp); 1611 if (res != ISC_R_SUCCESS) { 1612 disp->shutdown_why = res; 1613 disp->shutting_down = 1; 1614 do_cancel(disp); 1615 return (ISC_R_SUCCESS); /* recover by cancel */ 1616 } 1617 INSIST(disp->recv_pending == 0); 1618 disp->recv_pending = 1; 1619 break; 1620 default: 1621 INSIST(0); 1622 ISC_UNREACHABLE(); 1623 } 1624 1625 return (ISC_R_SUCCESS); 1626 } 1627 1628 /* 1629 * Mgr must be locked when calling this function. 1630 */ 1631 static bool 1632 destroy_mgr_ok(dns_dispatchmgr_t *mgr) { 1633 mgr_log(mgr, LVL(90), 1634 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, " 1635 "depool=%d, rpool=%d, dpool=%d", 1636 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list), 1637 isc_mempool_getallocated(mgr->depool), 1638 isc_mempool_getallocated(mgr->rpool), 1639 isc_mempool_getallocated(mgr->dpool)); 1640 if (!MGR_IS_SHUTTINGDOWN(mgr)) { 1641 return (false); 1642 } 1643 if (!ISC_LIST_EMPTY(mgr->list)) { 1644 return (false); 1645 } 1646 if (isc_mempool_getallocated(mgr->depool) != 0) { 1647 return (false); 1648 } 1649 if (isc_mempool_getallocated(mgr->rpool) != 0) { 1650 return (false); 1651 } 1652 if (isc_mempool_getallocated(mgr->dpool) != 0) { 1653 return (false); 1654 } 1655 1656 return (true); 1657 } 1658 1659 /* 1660 * Mgr must be unlocked when calling this function. 1661 */ 1662 static void 1663 destroy_mgr(dns_dispatchmgr_t **mgrp) { 1664 dns_dispatchmgr_t *mgr; 1665 1666 mgr = *mgrp; 1667 *mgrp = NULL; 1668 1669 mgr->magic = 0; 1670 isc_mutex_destroy(&mgr->lock); 1671 mgr->state = 0; 1672 1673 isc_mempool_destroy(&mgr->depool); 1674 isc_mempool_destroy(&mgr->rpool); 1675 isc_mempool_destroy(&mgr->dpool); 1676 if (mgr->bpool != NULL) { 1677 isc_mempool_destroy(&mgr->bpool); 1678 } 1679 if (mgr->spool != NULL) { 1680 isc_mempool_destroy(&mgr->spool); 1681 } 1682 1683 isc_mutex_destroy(&mgr->spool_lock); 1684 isc_mutex_destroy(&mgr->bpool_lock); 1685 isc_mutex_destroy(&mgr->dpool_lock); 1686 isc_mutex_destroy(&mgr->rpool_lock); 1687 isc_mutex_destroy(&mgr->depool_lock); 1688 1689 if (mgr->qid != NULL) { 1690 qid_destroy(mgr->mctx, &mgr->qid); 1691 } 1692 1693 isc_mutex_destroy(&mgr->buffer_lock); 1694 1695 if (mgr->blackhole != NULL) { 1696 dns_acl_detach(&mgr->blackhole); 1697 } 1698 1699 if (mgr->stats != NULL) { 1700 isc_stats_detach(&mgr->stats); 1701 } 1702 1703 if (mgr->v4ports != NULL) { 1704 isc_mem_put(mgr->mctx, mgr->v4ports, 1705 mgr->nv4ports * sizeof(in_port_t)); 1706 } 1707 if (mgr->v6ports != NULL) { 1708 isc_mem_put(mgr->mctx, mgr->v6ports, 1709 mgr->nv6ports * sizeof(in_port_t)); 1710 } 1711 isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t)); 1712 } 1713 1714 static isc_result_t 1715 open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, 1716 unsigned int options, isc_socket_t **sockp, 1717 isc_socket_t *dup_socket, bool duponly) { 1718 isc_socket_t *sock; 1719 isc_result_t result; 1720 1721 sock = *sockp; 1722 if (sock != NULL) { 1723 result = isc_socket_open(sock); 1724 if (result != ISC_R_SUCCESS) { 1725 return (result); 1726 } 1727 } else if (dup_socket != NULL && 1728 (!isc_socket_hasreuseport() || duponly)) { 1729 result = isc_socket_dup(dup_socket, &sock); 1730 if (result != ISC_R_SUCCESS) { 1731 return (result); 1732 } 1733 1734 isc_socket_setname(sock, "dispatcher", NULL); 1735 *sockp = sock; 1736 return (ISC_R_SUCCESS); 1737 } else { 1738 result = isc_socket_create(mgr, isc_sockaddr_pf(local), 1739 isc_sockettype_udp, &sock); 1740 if (result != ISC_R_SUCCESS) { 1741 return (result); 1742 } 1743 } 1744 1745 isc_socket_setname(sock, "dispatcher", NULL); 1746 1747 #ifndef ISC_ALLOW_MAPPED 1748 isc_socket_ipv6only(sock, true); 1749 #endif /* ifndef ISC_ALLOW_MAPPED */ 1750 result = isc_socket_bind(sock, local, options); 1751 if (result != ISC_R_SUCCESS) { 1752 if (*sockp == NULL) { 1753 isc_socket_detach(&sock); 1754 } else { 1755 isc_socket_close(sock); 1756 } 1757 return (result); 1758 } 1759 1760 *sockp = sock; 1761 return (ISC_R_SUCCESS); 1762 } 1763 1764 /*% 1765 * Create a temporary port list to set the initial default set of dispatch 1766 * ports: [1024, 65535]. This is almost meaningless as the application will 1767 * normally set the ports explicitly, but is provided to fill some minor corner 1768 * cases. 1769 */ 1770 static isc_result_t 1771 create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { 1772 isc_result_t result; 1773 1774 result = isc_portset_create(mctx, portsetp); 1775 if (result != ISC_R_SUCCESS) { 1776 return (result); 1777 } 1778 isc_portset_addrange(*portsetp, 1024, 65535); 1779 1780 return (ISC_R_SUCCESS); 1781 } 1782 1783 /* 1784 * Publics. 1785 */ 1786 1787 isc_result_t 1788 dns_dispatchmgr_create(isc_mem_t *mctx, dns_dispatchmgr_t **mgrp) { 1789 dns_dispatchmgr_t *mgr; 1790 isc_result_t result; 1791 isc_portset_t *v4portset = NULL; 1792 isc_portset_t *v6portset = NULL; 1793 1794 REQUIRE(mctx != NULL); 1795 REQUIRE(mgrp != NULL && *mgrp == NULL); 1796 1797 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); 1798 1799 mgr->mctx = NULL; 1800 isc_mem_attach(mctx, &mgr->mctx); 1801 1802 mgr->blackhole = NULL; 1803 mgr->stats = NULL; 1804 1805 isc_mutex_init(&mgr->lock); 1806 isc_mutex_init(&mgr->buffer_lock); 1807 isc_mutex_init(&mgr->depool_lock); 1808 isc_mutex_init(&mgr->rpool_lock); 1809 isc_mutex_init(&mgr->dpool_lock); 1810 isc_mutex_init(&mgr->bpool_lock); 1811 isc_mutex_init(&mgr->spool_lock); 1812 1813 mgr->depool = NULL; 1814 isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t), 1815 &mgr->depool); 1816 1817 mgr->rpool = NULL; 1818 isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t), &mgr->rpool); 1819 1820 mgr->dpool = NULL; 1821 isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t), &mgr->dpool); 1822 1823 isc_mempool_setname(mgr->depool, "dispmgr_depool"); 1824 isc_mempool_setmaxalloc(mgr->depool, 32768); 1825 isc_mempool_setfreemax(mgr->depool, 32768); 1826 isc_mempool_associatelock(mgr->depool, &mgr->depool_lock); 1827 isc_mempool_setfillcount(mgr->depool, 32); 1828 1829 isc_mempool_setname(mgr->rpool, "dispmgr_rpool"); 1830 isc_mempool_setmaxalloc(mgr->rpool, 32768); 1831 isc_mempool_setfreemax(mgr->rpool, 32768); 1832 isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock); 1833 isc_mempool_setfillcount(mgr->rpool, 32); 1834 1835 isc_mempool_setname(mgr->dpool, "dispmgr_dpool"); 1836 isc_mempool_setmaxalloc(mgr->dpool, 32768); 1837 isc_mempool_setfreemax(mgr->dpool, 32768); 1838 isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock); 1839 isc_mempool_setfillcount(mgr->dpool, 32); 1840 1841 mgr->buffers = 0; 1842 mgr->buffersize = 0; 1843 mgr->maxbuffers = 0; 1844 mgr->bpool = NULL; 1845 mgr->spool = NULL; 1846 mgr->qid = NULL; 1847 mgr->state = 0; 1848 ISC_LIST_INIT(mgr->list); 1849 mgr->v4ports = NULL; 1850 mgr->v6ports = NULL; 1851 mgr->nv4ports = 0; 1852 mgr->nv6ports = 0; 1853 mgr->magic = DNS_DISPATCHMGR_MAGIC; 1854 1855 result = create_default_portset(mctx, &v4portset); 1856 if (result == ISC_R_SUCCESS) { 1857 result = create_default_portset(mctx, &v6portset); 1858 if (result == ISC_R_SUCCESS) { 1859 result = dns_dispatchmgr_setavailports(mgr, v4portset, 1860 v6portset); 1861 } 1862 } 1863 if (v4portset != NULL) { 1864 isc_portset_destroy(mctx, &v4portset); 1865 } 1866 if (v6portset != NULL) { 1867 isc_portset_destroy(mctx, &v6portset); 1868 } 1869 if (result != ISC_R_SUCCESS) { 1870 goto kill_dpool; 1871 } 1872 1873 *mgrp = mgr; 1874 return (ISC_R_SUCCESS); 1875 1876 kill_dpool: 1877 isc_mempool_destroy(&mgr->dpool); 1878 isc_mempool_destroy(&mgr->rpool); 1879 isc_mempool_destroy(&mgr->depool); 1880 isc_mutex_destroy(&mgr->spool_lock); 1881 isc_mutex_destroy(&mgr->bpool_lock); 1882 isc_mutex_destroy(&mgr->dpool_lock); 1883 isc_mutex_destroy(&mgr->rpool_lock); 1884 isc_mutex_destroy(&mgr->depool_lock); 1885 isc_mutex_destroy(&mgr->buffer_lock); 1886 isc_mutex_destroy(&mgr->lock); 1887 isc_mem_putanddetach(&mctx, mgr, sizeof(dns_dispatchmgr_t)); 1888 1889 return (result); 1890 } 1891 1892 void 1893 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { 1894 REQUIRE(VALID_DISPATCHMGR(mgr)); 1895 if (mgr->blackhole != NULL) { 1896 dns_acl_detach(&mgr->blackhole); 1897 } 1898 dns_acl_attach(blackhole, &mgr->blackhole); 1899 } 1900 1901 dns_acl_t * 1902 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) { 1903 REQUIRE(VALID_DISPATCHMGR(mgr)); 1904 return (mgr->blackhole); 1905 } 1906 1907 void 1908 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, 1909 dns_portlist_t *portlist) { 1910 REQUIRE(VALID_DISPATCHMGR(mgr)); 1911 UNUSED(portlist); 1912 1913 /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ 1914 return; 1915 } 1916 1917 dns_portlist_t * 1918 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { 1919 REQUIRE(VALID_DISPATCHMGR(mgr)); 1920 return (NULL); /* this function is deprecated */ 1921 } 1922 1923 isc_result_t 1924 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, 1925 isc_portset_t *v6portset) { 1926 in_port_t *v4ports, *v6ports, p; 1927 unsigned int nv4ports, nv6ports, i4, i6; 1928 1929 REQUIRE(VALID_DISPATCHMGR(mgr)); 1930 1931 nv4ports = isc_portset_nports(v4portset); 1932 nv6ports = isc_portset_nports(v6portset); 1933 1934 v4ports = NULL; 1935 if (nv4ports != 0) { 1936 v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); 1937 } 1938 v6ports = NULL; 1939 if (nv6ports != 0) { 1940 v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); 1941 } 1942 1943 p = 0; 1944 i4 = 0; 1945 i6 = 0; 1946 do { 1947 if (isc_portset_isset(v4portset, p)) { 1948 INSIST(i4 < nv4ports); 1949 v4ports[i4++] = p; 1950 } 1951 if (isc_portset_isset(v6portset, p)) { 1952 INSIST(i6 < nv6ports); 1953 v6ports[i6++] = p; 1954 } 1955 } while (p++ < 65535); 1956 INSIST(i4 == nv4ports && i6 == nv6ports); 1957 1958 PORTBUFLOCK(mgr); 1959 if (mgr->v4ports != NULL) { 1960 isc_mem_put(mgr->mctx, mgr->v4ports, 1961 mgr->nv4ports * sizeof(in_port_t)); 1962 } 1963 mgr->v4ports = v4ports; 1964 mgr->nv4ports = nv4ports; 1965 1966 if (mgr->v6ports != NULL) { 1967 isc_mem_put(mgr->mctx, mgr->v6ports, 1968 mgr->nv6ports * sizeof(in_port_t)); 1969 } 1970 mgr->v6ports = v6ports; 1971 mgr->nv6ports = nv6ports; 1972 PORTBUFUNLOCK(mgr); 1973 1974 return (ISC_R_SUCCESS); 1975 } 1976 1977 static isc_result_t 1978 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, unsigned int buffersize, 1979 unsigned int maxbuffers, unsigned int maxrequests, 1980 unsigned int buckets, unsigned int increment) { 1981 isc_result_t result; 1982 1983 REQUIRE(VALID_DISPATCHMGR(mgr)); 1984 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 1985 REQUIRE(maxbuffers > 0); 1986 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 1987 REQUIRE(increment > buckets); 1988 1989 /* 1990 * Keep some number of items around. This should be a config 1991 * option. For now, keep 8, but later keep at least two even 1992 * if the caller wants less. This allows us to ensure certain 1993 * things, like an event can be "freed" and the next allocation 1994 * will always succeed. 1995 * 1996 * Note that if limits are placed on anything here, we use one 1997 * event internally, so the actual limit should be "wanted + 1." 1998 * 1999 * XXXMLG 2000 */ 2001 2002 if (maxbuffers < 8) { 2003 maxbuffers = 8; 2004 } 2005 2006 LOCK(&mgr->buffer_lock); 2007 2008 /* Create or adjust buffer pool */ 2009 if (mgr->bpool != NULL) { 2010 /* 2011 * We only increase the maxbuffers to avoid accidental buffer 2012 * shortage. Ideally we'd separate the manager-wide maximum 2013 * from per-dispatch limits and respect the latter within the 2014 * global limit. But at this moment that's deemed to be 2015 * overkilling and isn't worth additional implementation 2016 * complexity. 2017 */ 2018 if (maxbuffers > mgr->maxbuffers) { 2019 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2020 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2021 mgr->maxbuffers = maxbuffers; 2022 } 2023 } else { 2024 isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool); 2025 isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); 2026 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); 2027 isc_mempool_setfreemax(mgr->bpool, maxbuffers); 2028 isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock); 2029 isc_mempool_setfillcount(mgr->bpool, 32); 2030 } 2031 2032 /* Create or adjust socket pool */ 2033 if (mgr->spool != NULL) { 2034 if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) { 2035 isc_mempool_setmaxalloc(mgr->spool, 2036 DNS_DISPATCH_POOLSOCKS * 2); 2037 isc_mempool_setfreemax(mgr->spool, 2038 DNS_DISPATCH_POOLSOCKS * 2); 2039 } 2040 UNLOCK(&mgr->buffer_lock); 2041 return (ISC_R_SUCCESS); 2042 } 2043 isc_mempool_create(mgr->mctx, sizeof(dispsocket_t), &mgr->spool); 2044 2045 isc_mempool_setname(mgr->spool, "dispmgr_spool"); 2046 isc_mempool_setmaxalloc(mgr->spool, maxrequests); 2047 isc_mempool_setfreemax(mgr->spool, maxrequests); 2048 isc_mempool_associatelock(mgr->spool, &mgr->spool_lock); 2049 isc_mempool_setfillcount(mgr->spool, 32); 2050 2051 result = qid_allocate(mgr, buckets, increment, &mgr->qid, true); 2052 if (result != ISC_R_SUCCESS) { 2053 goto cleanup; 2054 } 2055 2056 mgr->buffersize = buffersize; 2057 mgr->maxbuffers = maxbuffers; 2058 UNLOCK(&mgr->buffer_lock); 2059 return (ISC_R_SUCCESS); 2060 2061 cleanup: 2062 isc_mempool_destroy(&mgr->bpool); 2063 if (mgr->spool != NULL) { 2064 isc_mempool_destroy(&mgr->spool); 2065 } 2066 UNLOCK(&mgr->buffer_lock); 2067 return (result); 2068 } 2069 2070 void 2071 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { 2072 dns_dispatchmgr_t *mgr; 2073 bool killit; 2074 2075 REQUIRE(mgrp != NULL); 2076 REQUIRE(VALID_DISPATCHMGR(*mgrp)); 2077 2078 mgr = *mgrp; 2079 *mgrp = NULL; 2080 2081 LOCK(&mgr->lock); 2082 mgr->state |= MGR_SHUTTINGDOWN; 2083 killit = destroy_mgr_ok(mgr); 2084 UNLOCK(&mgr->lock); 2085 2086 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit); 2087 2088 if (killit) { 2089 destroy_mgr(&mgr); 2090 } 2091 } 2092 2093 void 2094 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { 2095 REQUIRE(VALID_DISPATCHMGR(mgr)); 2096 REQUIRE(ISC_LIST_EMPTY(mgr->list)); 2097 REQUIRE(mgr->stats == NULL); 2098 2099 isc_stats_attach(stats, &mgr->stats); 2100 } 2101 2102 static int 2103 port_cmp(const void *key, const void *ent) { 2104 in_port_t p1 = *(const in_port_t *)key; 2105 in_port_t p2 = *(const in_port_t *)ent; 2106 2107 if (p1 < p2) { 2108 return (-1); 2109 } else if (p1 == p2) { 2110 return (0); 2111 } else { 2112 return (1); 2113 } 2114 } 2115 2116 static bool 2117 portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2118 isc_sockaddr_t *sockaddrp) { 2119 isc_sockaddr_t sockaddr; 2120 isc_result_t result; 2121 in_port_t *ports, port; 2122 unsigned int nports; 2123 bool available = false; 2124 2125 REQUIRE(sock != NULL || sockaddrp != NULL); 2126 2127 PORTBUFLOCK(mgr); 2128 if (sock != NULL) { 2129 sockaddrp = &sockaddr; 2130 result = isc_socket_getsockname(sock, sockaddrp); 2131 if (result != ISC_R_SUCCESS) { 2132 goto unlock; 2133 } 2134 } 2135 2136 if (isc_sockaddr_pf(sockaddrp) == AF_INET) { 2137 ports = mgr->v4ports; 2138 nports = mgr->nv4ports; 2139 } else { 2140 ports = mgr->v6ports; 2141 nports = mgr->nv6ports; 2142 } 2143 if (ports == NULL) { 2144 goto unlock; 2145 } 2146 2147 port = isc_sockaddr_getport(sockaddrp); 2148 if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) 2149 { 2150 available = true; 2151 } 2152 2153 unlock: 2154 PORTBUFUNLOCK(mgr); 2155 return (available); 2156 } 2157 2158 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) 2159 2160 static bool 2161 local_addr_match(dns_dispatch_t *disp, const isc_sockaddr_t *addr) { 2162 isc_sockaddr_t sockaddr; 2163 isc_result_t result; 2164 2165 REQUIRE(disp->socket != NULL); 2166 2167 if (addr == NULL) { 2168 return (true); 2169 } 2170 2171 /* 2172 * Don't match wildcard ports unless the port is available in the 2173 * current configuration. 2174 */ 2175 if (isc_sockaddr_getport(addr) == 0 && 2176 isc_sockaddr_getport(&disp->local) == 0 && 2177 !portavailable(disp->mgr, disp->socket, NULL)) 2178 { 2179 return (false); 2180 } 2181 2182 /* 2183 * Check if we match the binding <address,port>. 2184 * Wildcard ports match/fail here. 2185 */ 2186 if (isc_sockaddr_equal(&disp->local, addr)) { 2187 return (true); 2188 } 2189 if (isc_sockaddr_getport(addr) == 0) { 2190 return (false); 2191 } 2192 2193 /* 2194 * Check if we match a bound wildcard port <address,port>. 2195 */ 2196 if (!isc_sockaddr_eqaddr(&disp->local, addr)) { 2197 return (false); 2198 } 2199 result = isc_socket_getsockname(disp->socket, &sockaddr); 2200 if (result != ISC_R_SUCCESS) { 2201 return (false); 2202 } 2203 2204 return (isc_sockaddr_equal(&sockaddr, addr)); 2205 } 2206 2207 /* 2208 * Requires mgr be locked. 2209 * 2210 * No dispatcher can be locked by this thread when calling this function. 2211 * 2212 * 2213 * NOTE: 2214 * If a matching dispatcher is found, it is locked after this function 2215 * returns, and must be unlocked by the caller. 2216 */ 2217 static isc_result_t 2218 dispatch_find(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *local, 2219 unsigned int attributes, unsigned int mask, 2220 dns_dispatch_t **dispp) { 2221 dns_dispatch_t *disp; 2222 isc_result_t result; 2223 2224 /* 2225 * Make certain that we will not match a private or exclusive dispatch. 2226 */ 2227 attributes &= ~(DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); 2228 mask |= (DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); 2229 2230 disp = ISC_LIST_HEAD(mgr->list); 2231 while (disp != NULL) { 2232 LOCK(&disp->lock); 2233 if ((disp->shutting_down == 0) && 2234 ATTRMATCH(disp->attributes, attributes, mask) && 2235 local_addr_match(disp, local)) 2236 { 2237 break; 2238 } 2239 UNLOCK(&disp->lock); 2240 disp = ISC_LIST_NEXT(disp, link); 2241 } 2242 2243 if (disp == NULL) { 2244 result = ISC_R_NOTFOUND; 2245 goto out; 2246 } 2247 2248 *dispp = disp; 2249 result = ISC_R_SUCCESS; 2250 out: 2251 2252 return (result); 2253 } 2254 2255 static isc_result_t 2256 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, 2257 unsigned int increment, dns_qid_t **qidp, bool needsocktable) { 2258 dns_qid_t *qid; 2259 unsigned int i; 2260 2261 REQUIRE(VALID_DISPATCHMGR(mgr)); 2262 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2263 REQUIRE(increment > buckets); 2264 REQUIRE(qidp != NULL && *qidp == NULL); 2265 2266 qid = isc_mem_get(mgr->mctx, sizeof(*qid)); 2267 2268 qid->qid_table = isc_mem_get(mgr->mctx, 2269 buckets * sizeof(dns_displist_t)); 2270 2271 qid->sock_table = NULL; 2272 if (needsocktable) { 2273 qid->sock_table = isc_mem_get( 2274 mgr->mctx, buckets * sizeof(dispsocketlist_t)); 2275 } 2276 2277 isc_mutex_init(&qid->lock); 2278 2279 for (i = 0; i < buckets; i++) { 2280 ISC_LIST_INIT(qid->qid_table[i]); 2281 if (qid->sock_table != NULL) { 2282 ISC_LIST_INIT(qid->sock_table[i]); 2283 } 2284 } 2285 2286 qid->qid_nbuckets = buckets; 2287 qid->qid_increment = increment; 2288 qid->magic = QID_MAGIC; 2289 *qidp = qid; 2290 return (ISC_R_SUCCESS); 2291 } 2292 2293 static void 2294 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { 2295 dns_qid_t *qid; 2296 2297 REQUIRE(qidp != NULL); 2298 qid = *qidp; 2299 *qidp = NULL; 2300 2301 REQUIRE(VALID_QID(qid)); 2302 2303 qid->magic = 0; 2304 isc_mem_put(mctx, qid->qid_table, 2305 qid->qid_nbuckets * sizeof(dns_displist_t)); 2306 if (qid->sock_table != NULL) { 2307 isc_mem_put(mctx, qid->sock_table, 2308 qid->qid_nbuckets * sizeof(dispsocketlist_t)); 2309 } 2310 isc_mutex_destroy(&qid->lock); 2311 isc_mem_put(mctx, qid, sizeof(*qid)); 2312 } 2313 2314 /* 2315 * Allocate and set important limits. 2316 */ 2317 static isc_result_t 2318 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, 2319 dns_dispatch_t **dispp) { 2320 dns_dispatch_t *disp; 2321 isc_result_t result; 2322 2323 REQUIRE(VALID_DISPATCHMGR(mgr)); 2324 REQUIRE(dispp != NULL && *dispp == NULL); 2325 2326 /* 2327 * Set up the dispatcher, mostly. Don't bother setting some of 2328 * the options that are controlled by tcp vs. udp, etc. 2329 */ 2330 2331 disp = isc_mempool_get(mgr->dpool); 2332 if (disp == NULL) { 2333 return (ISC_R_NOMEMORY); 2334 } 2335 2336 disp->magic = 0; 2337 disp->mgr = mgr; 2338 disp->maxrequests = maxrequests; 2339 disp->attributes = 0; 2340 ISC_LINK_INIT(disp, link); 2341 disp->refcount = 1; 2342 disp->recv_pending = 0; 2343 memset(&disp->local, 0, sizeof(disp->local)); 2344 memset(&disp->peer, 0, sizeof(disp->peer)); 2345 disp->localport = 0; 2346 disp->shutting_down = 0; 2347 disp->shutdown_out = 0; 2348 disp->connected = 0; 2349 disp->tcpmsg_valid = 0; 2350 disp->shutdown_why = ISC_R_UNEXPECTED; 2351 disp->requests = 0; 2352 disp->tcpbuffers = 0; 2353 disp->qid = NULL; 2354 ISC_LIST_INIT(disp->activesockets); 2355 ISC_LIST_INIT(disp->inactivesockets); 2356 disp->nsockets = 0; 2357 disp->port_table = NULL; 2358 disp->portpool = NULL; 2359 disp->dscp = -1; 2360 2361 isc_mutex_init(&disp->lock); 2362 2363 disp->failsafe_ev = allocate_devent(disp); 2364 if (disp->failsafe_ev == NULL) { 2365 result = ISC_R_NOMEMORY; 2366 goto kill_lock; 2367 } 2368 2369 disp->magic = DISPATCH_MAGIC; 2370 2371 *dispp = disp; 2372 return (ISC_R_SUCCESS); 2373 2374 /* 2375 * error returns 2376 */ 2377 kill_lock: 2378 isc_mutex_destroy(&disp->lock); 2379 isc_mempool_put(mgr->dpool, disp); 2380 2381 return (result); 2382 } 2383 2384 /* 2385 * MUST be unlocked, and not used by anything. 2386 */ 2387 static void 2388 dispatch_free(dns_dispatch_t **dispp) { 2389 dns_dispatch_t *disp; 2390 dns_dispatchmgr_t *mgr; 2391 2392 REQUIRE(VALID_DISPATCH(*dispp)); 2393 disp = *dispp; 2394 *dispp = NULL; 2395 2396 mgr = disp->mgr; 2397 REQUIRE(VALID_DISPATCHMGR(mgr)); 2398 2399 if (disp->tcpmsg_valid) { 2400 dns_tcpmsg_invalidate(&disp->tcpmsg); 2401 disp->tcpmsg_valid = 0; 2402 } 2403 2404 INSIST(disp->tcpbuffers == 0); 2405 INSIST(disp->requests == 0); 2406 INSIST(disp->recv_pending == 0); 2407 INSIST(ISC_LIST_EMPTY(disp->activesockets)); 2408 INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); 2409 2410 isc_mempool_put(mgr->depool, disp->failsafe_ev); 2411 disp->failsafe_ev = NULL; 2412 2413 if (disp->qid != NULL) { 2414 qid_destroy(mgr->mctx, &disp->qid); 2415 } 2416 2417 if (disp->port_table != NULL) { 2418 for (int i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { 2419 INSIST(ISC_LIST_EMPTY(disp->port_table[i])); 2420 } 2421 isc_mem_put(mgr->mctx, disp->port_table, 2422 sizeof(disp->port_table[0]) * 2423 DNS_DISPATCH_PORTTABLESIZE); 2424 } 2425 2426 if (disp->portpool != NULL) { 2427 isc_mempool_destroy(&disp->portpool); 2428 } 2429 2430 disp->mgr = NULL; 2431 isc_mutex_destroy(&disp->lock); 2432 disp->magic = 0; 2433 isc_mempool_put(mgr->dpool, disp); 2434 } 2435 2436 isc_result_t 2437 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, 2438 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2439 const isc_sockaddr_t *destaddr, unsigned int buffersize, 2440 unsigned int maxbuffers, unsigned int maxrequests, 2441 unsigned int buckets, unsigned int increment, 2442 unsigned int attributes, dns_dispatch_t **dispp) { 2443 isc_result_t result; 2444 dns_dispatch_t *disp; 2445 2446 UNUSED(maxbuffers); 2447 UNUSED(buffersize); 2448 2449 REQUIRE(VALID_DISPATCHMGR(mgr)); 2450 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp); 2451 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0); 2452 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0); 2453 2454 if (destaddr == NULL) { 2455 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ 2456 } 2457 2458 LOCK(&mgr->lock); 2459 2460 /* 2461 * dispatch_allocate() checks mgr for us. 2462 * qid_allocate() checks buckets and increment for us. 2463 */ 2464 disp = NULL; 2465 result = dispatch_allocate(mgr, maxrequests, &disp); 2466 if (result != ISC_R_SUCCESS) { 2467 UNLOCK(&mgr->lock); 2468 return (result); 2469 } 2470 2471 result = qid_allocate(mgr, buckets, increment, &disp->qid, false); 2472 if (result != ISC_R_SUCCESS) { 2473 goto deallocate_dispatch; 2474 } 2475 2476 disp->socktype = isc_sockettype_tcp; 2477 disp->socket = NULL; 2478 isc_socket_attach(sock, &disp->socket); 2479 2480 disp->sepool = NULL; 2481 2482 disp->ntasks = 1; 2483 disp->task[0] = NULL; 2484 result = isc_task_create(taskmgr, 50, &disp->task[0]); 2485 if (result != ISC_R_SUCCESS) { 2486 goto kill_socket; 2487 } 2488 2489 disp->ctlevent = 2490 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, 2491 destroy_disp, disp, sizeof(isc_event_t)); 2492 2493 isc_task_setname(disp->task[0], "tcpdispatch", disp); 2494 2495 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); 2496 disp->tcpmsg_valid = 1; 2497 2498 disp->attributes = attributes; 2499 2500 if (localaddr == NULL) { 2501 if (destaddr != NULL) { 2502 switch (isc_sockaddr_pf(destaddr)) { 2503 case AF_INET: 2504 isc_sockaddr_any(&disp->local); 2505 break; 2506 case AF_INET6: 2507 isc_sockaddr_any6(&disp->local); 2508 break; 2509 } 2510 } 2511 } else { 2512 disp->local = *localaddr; 2513 } 2514 2515 if (destaddr != NULL) { 2516 disp->peer = *destaddr; 2517 } 2518 2519 /* 2520 * Append it to the dispatcher list. 2521 */ 2522 ISC_LIST_APPEND(mgr->list, disp, link); 2523 UNLOCK(&mgr->lock); 2524 2525 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); 2526 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); 2527 *dispp = disp; 2528 2529 return (ISC_R_SUCCESS); 2530 2531 kill_socket: 2532 isc_socket_detach(&disp->socket); 2533 deallocate_dispatch: 2534 dispatch_free(&disp); 2535 2536 UNLOCK(&mgr->lock); 2537 2538 return (result); 2539 } 2540 2541 isc_result_t 2542 dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, 2543 const isc_sockaddr_t *localaddr, bool *connected, 2544 dns_dispatch_t **dispp) { 2545 dns_dispatch_t *disp; 2546 isc_result_t result; 2547 isc_sockaddr_t peeraddr; 2548 isc_sockaddr_t sockname; 2549 unsigned int attributes, mask; 2550 bool match = false; 2551 2552 REQUIRE(VALID_DISPATCHMGR(mgr)); 2553 REQUIRE(destaddr != NULL); 2554 REQUIRE(dispp != NULL && *dispp == NULL); 2555 2556 /* First pass */ 2557 attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED; 2558 mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE | 2559 DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED; 2560 2561 LOCK(&mgr->lock); 2562 disp = ISC_LIST_HEAD(mgr->list); 2563 while (disp != NULL && !match) { 2564 LOCK(&disp->lock); 2565 if ((disp->shutting_down == 0) && 2566 ATTRMATCH(disp->attributes, attributes, mask) && 2567 (localaddr == NULL || 2568 isc_sockaddr_eqaddr(localaddr, &disp->local))) 2569 { 2570 result = isc_socket_getsockname(disp->socket, 2571 &sockname); 2572 if (result == ISC_R_SUCCESS) { 2573 result = isc_socket_getpeername(disp->socket, 2574 &peeraddr); 2575 } 2576 if (result == ISC_R_SUCCESS && 2577 isc_sockaddr_equal(destaddr, &peeraddr) && 2578 (localaddr == NULL || 2579 isc_sockaddr_eqaddr(localaddr, &sockname))) 2580 { 2581 /* attach */ 2582 disp->refcount++; 2583 *dispp = disp; 2584 match = true; 2585 if (connected != NULL) { 2586 *connected = true; 2587 } 2588 } 2589 } 2590 UNLOCK(&disp->lock); 2591 disp = ISC_LIST_NEXT(disp, link); 2592 } 2593 if (match || connected == NULL) { 2594 UNLOCK(&mgr->lock); 2595 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); 2596 } 2597 2598 /* Second pass, only if connected != NULL */ 2599 attributes = DNS_DISPATCHATTR_TCP; 2600 2601 disp = ISC_LIST_HEAD(mgr->list); 2602 while (disp != NULL && !match) { 2603 LOCK(&disp->lock); 2604 if ((disp->shutting_down == 0) && 2605 ATTRMATCH(disp->attributes, attributes, mask) && 2606 (localaddr == NULL || 2607 isc_sockaddr_eqaddr(localaddr, &disp->local)) && 2608 isc_sockaddr_equal(destaddr, &disp->peer)) 2609 { 2610 /* attach */ 2611 disp->refcount++; 2612 *dispp = disp; 2613 match = true; 2614 } 2615 UNLOCK(&disp->lock); 2616 disp = ISC_LIST_NEXT(disp, link); 2617 } 2618 UNLOCK(&mgr->lock); 2619 return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); 2620 } 2621 2622 isc_result_t 2623 dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2624 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2625 unsigned int buffersize, unsigned int maxbuffers, 2626 unsigned int maxrequests, unsigned int buckets, 2627 unsigned int increment, unsigned int attributes, 2628 unsigned int mask, dns_dispatch_t **dispp, 2629 dns_dispatch_t *dup_dispatch) { 2630 isc_result_t result; 2631 dns_dispatch_t *disp = NULL; 2632 2633 REQUIRE(VALID_DISPATCHMGR(mgr)); 2634 REQUIRE(sockmgr != NULL); 2635 REQUIRE(localaddr != NULL); 2636 REQUIRE(taskmgr != NULL); 2637 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); 2638 REQUIRE(maxbuffers > 0); 2639 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ 2640 REQUIRE(increment > buckets); 2641 REQUIRE(dispp != NULL && *dispp == NULL); 2642 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); 2643 2644 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, 2645 maxrequests, buckets, increment); 2646 if (result != ISC_R_SUCCESS) { 2647 return (result); 2648 } 2649 2650 LOCK(&mgr->lock); 2651 2652 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 2653 REQUIRE(isc_sockaddr_getport(localaddr) == 0); 2654 goto createudp; 2655 } 2656 2657 /* 2658 * See if we have a dispatcher that matches. 2659 */ 2660 if (dup_dispatch == NULL) { 2661 result = dispatch_find(mgr, localaddr, attributes, mask, &disp); 2662 if (result == ISC_R_SUCCESS) { 2663 disp->refcount++; 2664 2665 if (disp->maxrequests < maxrequests) { 2666 disp->maxrequests = maxrequests; 2667 } 2668 2669 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 2670 0 && 2671 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 2672 { 2673 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 2674 if (disp->recv_pending != 0) { 2675 isc_socket_cancel(disp->socket, 2676 disp->task[0], 2677 ISC_SOCKCANCEL_RECV); 2678 } 2679 } 2680 2681 UNLOCK(&disp->lock); 2682 UNLOCK(&mgr->lock); 2683 2684 *dispp = disp; 2685 2686 return (ISC_R_SUCCESS); 2687 } 2688 } 2689 2690 createudp: 2691 /* 2692 * Nope, create one. 2693 */ 2694 result = dispatch_createudp( 2695 mgr, sockmgr, taskmgr, localaddr, maxrequests, attributes, 2696 &disp, dup_dispatch == NULL ? NULL : dup_dispatch->socket); 2697 2698 if (result != ISC_R_SUCCESS) { 2699 UNLOCK(&mgr->lock); 2700 return (result); 2701 } 2702 2703 UNLOCK(&mgr->lock); 2704 *dispp = disp; 2705 2706 return (ISC_R_SUCCESS); 2707 } 2708 2709 isc_result_t 2710 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2711 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2712 unsigned int buffersize, unsigned int maxbuffers, 2713 unsigned int maxrequests, unsigned int buckets, 2714 unsigned int increment, unsigned int attributes, 2715 unsigned int mask, dns_dispatch_t **dispp) { 2716 return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, 2717 buffersize, maxbuffers, maxrequests, 2718 buckets, increment, attributes, mask, 2719 dispp, NULL)); 2720 } 2721 2722 /* 2723 * mgr should be locked. 2724 */ 2725 2726 #ifndef DNS_DISPATCH_HELD 2727 #define DNS_DISPATCH_HELD 20U 2728 #endif /* ifndef DNS_DISPATCH_HELD */ 2729 2730 static isc_result_t 2731 get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, 2732 isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, 2733 isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly) { 2734 unsigned int i, j; 2735 isc_socket_t *held[DNS_DISPATCH_HELD]; 2736 isc_sockaddr_t localaddr_bound; 2737 isc_socket_t *sock = NULL; 2738 isc_result_t result = ISC_R_SUCCESS; 2739 bool anyport; 2740 2741 INSIST(sockp != NULL && *sockp == NULL); 2742 2743 localaddr_bound = *localaddr; 2744 anyport = (isc_sockaddr_getport(localaddr) == 0); 2745 2746 if (anyport) { 2747 unsigned int nports; 2748 in_port_t *ports; 2749 2750 /* 2751 * If no port is specified, we first try to pick up a random 2752 * port by ourselves. 2753 */ 2754 if (isc_sockaddr_pf(localaddr) == AF_INET) { 2755 nports = disp->mgr->nv4ports; 2756 ports = disp->mgr->v4ports; 2757 } else { 2758 nports = disp->mgr->nv6ports; 2759 ports = disp->mgr->v6ports; 2760 } 2761 if (nports == 0) { 2762 return (ISC_R_ADDRNOTAVAIL); 2763 } 2764 2765 for (i = 0; i < 1024; i++) { 2766 in_port_t prt; 2767 2768 prt = ports[isc_random_uniform(nports)]; 2769 isc_sockaddr_setport(&localaddr_bound, prt); 2770 result = open_socket(sockmgr, &localaddr_bound, 0, 2771 &sock, NULL, false); 2772 /* 2773 * Continue if the port chosen is already in use 2774 * or the OS has reserved it. 2775 */ 2776 if (result == ISC_R_NOPERM || result == ISC_R_ADDRINUSE) 2777 { 2778 continue; 2779 } 2780 disp->localport = prt; 2781 *sockp = sock; 2782 return (result); 2783 } 2784 2785 /* 2786 * If this fails 1024 times, we then ask the kernel for 2787 * choosing one. 2788 */ 2789 } else { 2790 /* Allow to reuse address for non-random ports. */ 2791 result = open_socket(sockmgr, localaddr, 2792 ISC_SOCKET_REUSEADDRESS, &sock, dup_socket, 2793 duponly); 2794 2795 if (result == ISC_R_SUCCESS) { 2796 *sockp = sock; 2797 } 2798 2799 return (result); 2800 } 2801 2802 memset(held, 0, sizeof(held)); 2803 i = 0; 2804 2805 for (j = 0; j < 0xffffU; j++) { 2806 result = open_socket(sockmgr, localaddr, 0, &sock, NULL, false); 2807 if (result != ISC_R_SUCCESS) { 2808 goto end; 2809 } else if (portavailable(mgr, sock, NULL)) { 2810 break; 2811 } 2812 if (held[i] != NULL) { 2813 isc_socket_detach(&held[i]); 2814 } 2815 held[i++] = sock; 2816 sock = NULL; 2817 if (i == DNS_DISPATCH_HELD) { 2818 i = 0; 2819 } 2820 } 2821 if (j == 0xffffU) { 2822 mgr_log(mgr, ISC_LOG_ERROR, 2823 "avoid-v%s-udp-ports: unable to allocate " 2824 "an available port", 2825 isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); 2826 result = ISC_R_FAILURE; 2827 goto end; 2828 } 2829 *sockp = sock; 2830 2831 end: 2832 for (i = 0; i < DNS_DISPATCH_HELD; i++) { 2833 if (held[i] != NULL) { 2834 isc_socket_detach(&held[i]); 2835 } 2836 } 2837 2838 return (result); 2839 } 2840 2841 static isc_result_t 2842 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, 2843 isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, 2844 unsigned int maxrequests, unsigned int attributes, 2845 dns_dispatch_t **dispp, isc_socket_t *dup_socket) { 2846 isc_result_t result; 2847 dns_dispatch_t *disp; 2848 isc_socket_t *sock = NULL; 2849 int i = 0; 2850 bool duponly = ((attributes & DNS_DISPATCHATTR_CANREUSE) == 0); 2851 2852 /* This is an attribute needed only at creation time */ 2853 attributes &= ~DNS_DISPATCHATTR_CANREUSE; 2854 /* 2855 * dispatch_allocate() checks mgr for us. 2856 */ 2857 disp = NULL; 2858 result = dispatch_allocate(mgr, maxrequests, &disp); 2859 if (result != ISC_R_SUCCESS) { 2860 return (result); 2861 } 2862 2863 disp->socktype = isc_sockettype_udp; 2864 2865 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { 2866 result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, 2867 dup_socket, duponly); 2868 if (result != ISC_R_SUCCESS) { 2869 goto deallocate_dispatch; 2870 } 2871 2872 if (isc_log_wouldlog(dns_lctx, 90)) { 2873 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 2874 2875 isc_sockaddr_format(localaddr, addrbuf, 2876 ISC_SOCKADDR_FORMATSIZE); 2877 mgr_log(mgr, LVL(90), 2878 "dns_dispatch_createudp: Created" 2879 " UDP dispatch for %s with socket fd %d", 2880 addrbuf, isc_socket_getfd(sock)); 2881 } 2882 } else { 2883 isc_sockaddr_t sa_any; 2884 2885 /* 2886 * For dispatches using exclusive sockets with a specific 2887 * source address, we only check if the specified address is 2888 * available on the system. Query sockets will be created later 2889 * on demand. 2890 */ 2891 isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); 2892 if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { 2893 result = open_socket(sockmgr, localaddr, 0, &sock, NULL, 2894 false); 2895 if (sock != NULL) { 2896 isc_socket_detach(&sock); 2897 } 2898 if (result != ISC_R_SUCCESS) { 2899 goto deallocate_dispatch; 2900 } 2901 } 2902 2903 disp->port_table = isc_mem_get( 2904 mgr->mctx, sizeof(disp->port_table[0]) * 2905 DNS_DISPATCH_PORTTABLESIZE); 2906 for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { 2907 ISC_LIST_INIT(disp->port_table[i]); 2908 } 2909 2910 isc_mempool_create(mgr->mctx, sizeof(dispportentry_t), 2911 &disp->portpool); 2912 isc_mempool_setname(disp->portpool, "disp_portpool"); 2913 isc_mempool_setfreemax(disp->portpool, 128); 2914 } 2915 disp->socket = sock; 2916 disp->local = *localaddr; 2917 2918 if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 2919 disp->ntasks = MAX_INTERNAL_TASKS; 2920 } else { 2921 disp->ntasks = 1; 2922 } 2923 for (i = 0; i < disp->ntasks; i++) { 2924 disp->task[i] = NULL; 2925 result = isc_task_create(taskmgr, 0, &disp->task[i]); 2926 if (result != ISC_R_SUCCESS) { 2927 while (--i >= 0) { 2928 isc_task_shutdown(disp->task[i]); 2929 isc_task_detach(&disp->task[i]); 2930 } 2931 goto kill_socket; 2932 } 2933 isc_task_setname(disp->task[i], "udpdispatch", disp); 2934 } 2935 2936 disp->ctlevent = 2937 isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, 2938 destroy_disp, disp, sizeof(isc_event_t)); 2939 2940 disp->sepool = NULL; 2941 isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t), &disp->sepool); 2942 2943 isc_mutex_init(&disp->sepool_lock); 2944 2945 isc_mempool_setname(disp->sepool, "disp_sepool"); 2946 isc_mempool_setmaxalloc(disp->sepool, 32768); 2947 isc_mempool_setfreemax(disp->sepool, 32768); 2948 isc_mempool_associatelock(disp->sepool, &disp->sepool_lock); 2949 isc_mempool_setfillcount(disp->sepool, 16); 2950 2951 attributes &= ~DNS_DISPATCHATTR_TCP; 2952 attributes |= DNS_DISPATCHATTR_UDP; 2953 disp->attributes = attributes; 2954 2955 /* 2956 * Append it to the dispatcher list. 2957 */ 2958 ISC_LIST_APPEND(mgr->list, disp, link); 2959 2960 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); 2961 dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ 2962 if (disp->socket != NULL) { 2963 dispatch_log(disp, LVL(90), "created socket %p", disp->socket); 2964 } 2965 2966 *dispp = disp; 2967 2968 return (result); 2969 2970 /* 2971 * Error returns. 2972 */ 2973 kill_socket: 2974 if (disp->socket != NULL) { 2975 isc_socket_detach(&disp->socket); 2976 } 2977 deallocate_dispatch: 2978 dispatch_free(&disp); 2979 2980 return (result); 2981 } 2982 2983 void 2984 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { 2985 REQUIRE(VALID_DISPATCH(disp)); 2986 REQUIRE(dispp != NULL && *dispp == NULL); 2987 2988 LOCK(&disp->lock); 2989 disp->refcount++; 2990 UNLOCK(&disp->lock); 2991 2992 *dispp = disp; 2993 } 2994 2995 /* 2996 * It is important to lock the manager while we are deleting the dispatch, 2997 * since dns_dispatch_getudp will call dispatch_find, which returns to 2998 * the caller a dispatch but does not attach to it until later. _getudp 2999 * locks the manager, however, so locking it here will keep us from attaching 3000 * to a dispatcher that is in the process of going away. 3001 */ 3002 void 3003 dns_dispatch_detach(dns_dispatch_t **dispp) { 3004 dns_dispatch_t *disp; 3005 dispsocket_t *dispsock; 3006 bool killit; 3007 3008 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); 3009 3010 disp = *dispp; 3011 *dispp = NULL; 3012 3013 LOCK(&disp->lock); 3014 3015 INSIST(disp->refcount > 0); 3016 disp->refcount--; 3017 if (disp->refcount == 0) { 3018 if (disp->recv_pending > 0) { 3019 isc_socket_cancel(disp->socket, disp->task[0], 3020 ISC_SOCKCANCEL_RECV); 3021 } 3022 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3023 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) 3024 { 3025 isc_socket_cancel(dispsock->socket, dispsock->task, 3026 ISC_SOCKCANCEL_RECV); 3027 } 3028 disp->shutting_down = 1; 3029 } 3030 3031 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount); 3032 3033 killit = destroy_disp_ok(disp); 3034 UNLOCK(&disp->lock); 3035 if (killit) { 3036 isc_task_send(disp->task[0], &disp->ctlevent); 3037 } 3038 } 3039 3040 isc_result_t 3041 dns_dispatch_addresponse(dns_dispatch_t *disp, unsigned int options, 3042 const isc_sockaddr_t *dest, isc_task_t *task, 3043 isc_taskaction_t action, void *arg, 3044 dns_messageid_t *idp, dns_dispentry_t **resp, 3045 isc_socketmgr_t *sockmgr) { 3046 dns_dispentry_t *res; 3047 unsigned int bucket; 3048 in_port_t localport = 0; 3049 dns_messageid_t id; 3050 int i; 3051 bool ok; 3052 dns_qid_t *qid; 3053 dispsocket_t *dispsocket = NULL; 3054 isc_result_t result; 3055 3056 REQUIRE(VALID_DISPATCH(disp)); 3057 REQUIRE(task != NULL); 3058 REQUIRE(dest != NULL); 3059 REQUIRE(resp != NULL && *resp == NULL); 3060 REQUIRE(idp != NULL); 3061 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3062 REQUIRE(sockmgr != NULL); 3063 } 3064 3065 LOCK(&disp->lock); 3066 3067 if (disp->shutting_down == 1) { 3068 UNLOCK(&disp->lock); 3069 return (ISC_R_SHUTTINGDOWN); 3070 } 3071 3072 if (disp->requests >= disp->maxrequests) { 3073 UNLOCK(&disp->lock); 3074 return (ISC_R_QUOTA); 3075 } 3076 3077 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && 3078 disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) 3079 { 3080 dispsocket_t *oldestsocket; 3081 dns_dispentry_t *oldestresp; 3082 dns_dispatchevent_t *rev; 3083 3084 /* 3085 * Kill oldest outstanding query if the number of sockets 3086 * exceeds the quota to keep the room for new queries. 3087 */ 3088 oldestsocket = ISC_LIST_HEAD(disp->activesockets); 3089 oldestresp = oldestsocket->resp; 3090 if (oldestresp != NULL && !oldestresp->item_out) { 3091 rev = allocate_devent(oldestresp->disp); 3092 if (rev != NULL) { 3093 rev->buffer.base = NULL; 3094 rev->result = ISC_R_CANCELED; 3095 rev->id = oldestresp->id; 3096 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, 3097 DNS_EVENT_DISPATCH, 3098 oldestresp->action, 3099 oldestresp->arg, oldestresp, 3100 NULL, NULL); 3101 oldestresp->item_out = true; 3102 isc_task_send(oldestresp->task, 3103 ISC_EVENT_PTR(&rev)); 3104 inc_stats(disp->mgr, 3105 dns_resstatscounter_dispabort); 3106 } 3107 } 3108 3109 /* 3110 * Move this entry to the tail so that it won't (easily) be 3111 * examined before actually being canceled. 3112 */ 3113 ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); 3114 ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); 3115 } 3116 3117 qid = DNS_QID(disp); 3118 3119 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3120 /* 3121 * Get a separate UDP socket with a random port number. 3122 */ 3123 result = get_dispsocket(disp, dest, sockmgr, &dispsocket, 3124 &localport); 3125 if (result != ISC_R_SUCCESS) { 3126 UNLOCK(&disp->lock); 3127 inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); 3128 return (result); 3129 } 3130 } else { 3131 localport = disp->localport; 3132 } 3133 3134 /* 3135 * Try somewhat hard to find an unique ID unless FIXEDID is set 3136 * in which case we use the id passed in via *idp. 3137 */ 3138 LOCK(&qid->lock); 3139 if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { 3140 id = *idp; 3141 } else { 3142 id = (dns_messageid_t)isc_random16(); 3143 } 3144 ok = false; 3145 i = 0; 3146 do { 3147 bucket = dns_hash(qid, dest, id, localport); 3148 if (entry_search(qid, dest, id, localport, bucket) == NULL) { 3149 ok = true; 3150 break; 3151 } 3152 if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0) { 3153 break; 3154 } 3155 id += qid->qid_increment; 3156 id &= 0x0000ffff; 3157 } while (i++ < 64); 3158 UNLOCK(&qid->lock); 3159 3160 if (!ok) { 3161 UNLOCK(&disp->lock); 3162 return (ISC_R_NOMORE); 3163 } 3164 3165 res = isc_mempool_get(disp->mgr->rpool); 3166 if (res == NULL) { 3167 if (dispsocket != NULL) { 3168 destroy_dispsocket(disp, &dispsocket); 3169 } 3170 UNLOCK(&disp->lock); 3171 return (ISC_R_NOMEMORY); 3172 } 3173 3174 disp->refcount++; 3175 disp->requests++; 3176 res->task = NULL; 3177 isc_task_attach(task, &res->task); 3178 res->disp = disp; 3179 res->id = id; 3180 res->port = localport; 3181 res->bucket = bucket; 3182 res->host = *dest; 3183 res->action = action; 3184 res->arg = arg; 3185 res->dispsocket = dispsocket; 3186 if (dispsocket != NULL) { 3187 dispsocket->resp = res; 3188 } 3189 res->item_out = false; 3190 ISC_LIST_INIT(res->items); 3191 ISC_LINK_INIT(res, link); 3192 res->magic = RESPONSE_MAGIC; 3193 3194 LOCK(&qid->lock); 3195 ISC_LIST_APPEND(qid->qid_table[bucket], res, link); 3196 UNLOCK(&qid->lock); 3197 3198 inc_stats(disp->mgr, (qid == disp->mgr->qid) 3199 ? dns_resstatscounter_disprequdp 3200 : dns_resstatscounter_dispreqtcp); 3201 3202 request_log(disp, res, LVL(90), "attached to task %p", res->task); 3203 3204 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || 3205 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) 3206 { 3207 result = startrecv(disp, dispsocket); 3208 if (result != ISC_R_SUCCESS) { 3209 LOCK(&qid->lock); 3210 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3211 UNLOCK(&qid->lock); 3212 3213 if (dispsocket != NULL) { 3214 destroy_dispsocket(disp, &dispsocket); 3215 } 3216 3217 disp->refcount--; 3218 disp->requests--; 3219 3220 dec_stats(disp->mgr, 3221 (qid == disp->mgr->qid) 3222 ? dns_resstatscounter_disprequdp 3223 : dns_resstatscounter_dispreqtcp); 3224 3225 UNLOCK(&disp->lock); 3226 isc_task_detach(&res->task); 3227 isc_mempool_put(disp->mgr->rpool, res); 3228 return (result); 3229 } 3230 } 3231 3232 if (dispsocket != NULL) { 3233 ISC_LIST_APPEND(disp->activesockets, dispsocket, link); 3234 } 3235 3236 UNLOCK(&disp->lock); 3237 3238 *idp = id; 3239 *resp = res; 3240 3241 if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { 3242 INSIST(res->dispsocket != NULL); 3243 } 3244 3245 return (ISC_R_SUCCESS); 3246 } 3247 3248 void 3249 dns_dispatch_starttcp(dns_dispatch_t *disp) { 3250 REQUIRE(VALID_DISPATCH(disp)); 3251 3252 dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); 3253 3254 LOCK(&disp->lock); 3255 if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) { 3256 disp->attributes |= DNS_DISPATCHATTR_CONNECTED; 3257 (void)startrecv(disp, NULL); 3258 } 3259 UNLOCK(&disp->lock); 3260 } 3261 3262 isc_result_t 3263 dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) { 3264 dns_dispatch_t *disp; 3265 dns_dispatchevent_t *ev; 3266 3267 REQUIRE(VALID_RESPONSE(resp)); 3268 REQUIRE(sockevent != NULL && *sockevent != NULL); 3269 3270 disp = resp->disp; 3271 REQUIRE(VALID_DISPATCH(disp)); 3272 3273 ev = *sockevent; 3274 *sockevent = NULL; 3275 3276 LOCK(&disp->lock); 3277 3278 REQUIRE(resp->item_out); 3279 resp->item_out = false; 3280 3281 if (ev->buffer.base != NULL) { 3282 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3283 } 3284 free_devent(disp, ev); 3285 3286 if (disp->shutting_down == 1) { 3287 UNLOCK(&disp->lock); 3288 return (ISC_R_SHUTTINGDOWN); 3289 } 3290 ev = ISC_LIST_HEAD(resp->items); 3291 if (ev != NULL) { 3292 ISC_LIST_UNLINK(resp->items, ev, ev_link); 3293 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, 3294 resp->action, resp->arg, resp, NULL, NULL); 3295 request_log(disp, resp, LVL(90), 3296 "[c] Sent event %p buffer %p len %d to task %p", ev, 3297 ev->buffer.base, ev->buffer.length, resp->task); 3298 resp->item_out = true; 3299 isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); 3300 } 3301 UNLOCK(&disp->lock); 3302 return (ISC_R_SUCCESS); 3303 } 3304 3305 void 3306 dns_dispatch_removeresponse(dns_dispentry_t **resp, 3307 dns_dispatchevent_t **sockevent) { 3308 dns_dispatchmgr_t *mgr; 3309 dns_dispatch_t *disp; 3310 dns_dispentry_t *res; 3311 dispsocket_t *dispsock; 3312 dns_dispatchevent_t *ev; 3313 unsigned int bucket; 3314 bool killit; 3315 unsigned int n; 3316 isc_eventlist_t events; 3317 dns_qid_t *qid; 3318 3319 REQUIRE(resp != NULL); 3320 REQUIRE(VALID_RESPONSE(*resp)); 3321 3322 res = *resp; 3323 *resp = NULL; 3324 3325 disp = res->disp; 3326 REQUIRE(VALID_DISPATCH(disp)); 3327 mgr = disp->mgr; 3328 REQUIRE(VALID_DISPATCHMGR(mgr)); 3329 3330 qid = DNS_QID(disp); 3331 3332 if (sockevent != NULL) { 3333 REQUIRE(*sockevent != NULL); 3334 ev = *sockevent; 3335 *sockevent = NULL; 3336 } else { 3337 ev = NULL; 3338 } 3339 3340 LOCK(&disp->lock); 3341 3342 INSIST(disp->requests > 0); 3343 disp->requests--; 3344 dec_stats(disp->mgr, (qid == disp->mgr->qid) 3345 ? dns_resstatscounter_disprequdp 3346 : dns_resstatscounter_dispreqtcp); 3347 INSIST(disp->refcount > 0); 3348 disp->refcount--; 3349 if (disp->refcount == 0) { 3350 if (disp->recv_pending > 0) { 3351 isc_socket_cancel(disp->socket, disp->task[0], 3352 ISC_SOCKCANCEL_RECV); 3353 } 3354 for (dispsock = ISC_LIST_HEAD(disp->activesockets); 3355 dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) 3356 { 3357 isc_socket_cancel(dispsock->socket, dispsock->task, 3358 ISC_SOCKCANCEL_RECV); 3359 } 3360 disp->shutting_down = 1; 3361 } 3362 3363 bucket = res->bucket; 3364 3365 LOCK(&qid->lock); 3366 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); 3367 UNLOCK(&qid->lock); 3368 3369 if (ev == NULL && res->item_out) { 3370 /* 3371 * We've posted our event, but the caller hasn't gotten it 3372 * yet. Take it back. 3373 */ 3374 ISC_LIST_INIT(events); 3375 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, NULL, 3376 &events); 3377 /* 3378 * We had better have gotten it back. 3379 */ 3380 INSIST(n == 1); 3381 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events); 3382 } 3383 3384 if (ev != NULL) { 3385 REQUIRE(res->item_out); 3386 res->item_out = false; 3387 if (ev->buffer.base != NULL) { 3388 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3389 } 3390 free_devent(disp, ev); 3391 } 3392 3393 request_log(disp, res, LVL(90), "detaching from task %p", res->task); 3394 isc_task_detach(&res->task); 3395 3396 if (res->dispsocket != NULL) { 3397 isc_socket_cancel(res->dispsocket->socket, 3398 res->dispsocket->task, ISC_SOCKCANCEL_RECV); 3399 res->dispsocket->resp = NULL; 3400 } 3401 3402 /* 3403 * Free any buffered responses as well 3404 */ 3405 ev = ISC_LIST_HEAD(res->items); 3406 while (ev != NULL) { 3407 ISC_LIST_UNLINK(res->items, ev, ev_link); 3408 if (ev->buffer.base != NULL) { 3409 free_buffer(disp, ev->buffer.base, ev->buffer.length); 3410 } 3411 free_devent(disp, ev); 3412 ev = ISC_LIST_HEAD(res->items); 3413 } 3414 res->magic = 0; 3415 isc_mempool_put(disp->mgr->rpool, res); 3416 if (disp->shutting_down == 1) { 3417 do_cancel(disp); 3418 } else { 3419 (void)startrecv(disp, NULL); 3420 } 3421 3422 killit = destroy_disp_ok(disp); 3423 UNLOCK(&disp->lock); 3424 if (killit) { 3425 isc_task_send(disp->task[0], &disp->ctlevent); 3426 } 3427 } 3428 3429 /* 3430 * disp must be locked. 3431 */ 3432 static void 3433 do_cancel(dns_dispatch_t *disp) { 3434 dns_dispatchevent_t *ev; 3435 dns_dispentry_t *resp; 3436 dns_qid_t *qid; 3437 3438 if (disp->shutdown_out == 1) { 3439 return; 3440 } 3441 3442 qid = DNS_QID(disp); 3443 3444 /* 3445 * Search for the first response handler without packets outstanding 3446 * unless a specific handler is given. 3447 */ 3448 LOCK(&qid->lock); 3449 for (resp = linear_first(qid); resp != NULL && resp->item_out; 3450 /* Empty. */) 3451 { 3452 resp = linear_next(qid, resp); 3453 } 3454 3455 /* 3456 * No one to send the cancel event to, so nothing to do. 3457 */ 3458 if (resp == NULL) { 3459 goto unlock; 3460 } 3461 3462 /* 3463 * Send the shutdown failsafe event to this resp. 3464 */ 3465 ev = disp->failsafe_ev; 3466 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, 3467 resp->action, resp->arg, resp, NULL, NULL); 3468 ev->result = disp->shutdown_why; 3469 ev->buffer.base = NULL; 3470 ev->buffer.length = 0; 3471 disp->shutdown_out = 1; 3472 request_log(disp, resp, LVL(10), "cancel: failsafe event %p -> task %p", 3473 ev, resp->task); 3474 resp->item_out = true; 3475 isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); 3476 unlock: 3477 UNLOCK(&qid->lock); 3478 } 3479 3480 isc_socket_t * 3481 dns_dispatch_getsocket(dns_dispatch_t *disp) { 3482 REQUIRE(VALID_DISPATCH(disp)); 3483 3484 return (disp->socket); 3485 } 3486 3487 isc_socket_t * 3488 dns_dispatch_getentrysocket(dns_dispentry_t *resp) { 3489 REQUIRE(VALID_RESPONSE(resp)); 3490 3491 if (resp->dispsocket != NULL) { 3492 return (resp->dispsocket->socket); 3493 } else { 3494 return (NULL); 3495 } 3496 } 3497 3498 isc_result_t 3499 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { 3500 REQUIRE(VALID_DISPATCH(disp)); 3501 REQUIRE(addrp != NULL); 3502 3503 if (disp->socktype == isc_sockettype_udp) { 3504 *addrp = disp->local; 3505 return (ISC_R_SUCCESS); 3506 } 3507 return (ISC_R_NOTIMPLEMENTED); 3508 } 3509 3510 void 3511 dns_dispatch_cancel(dns_dispatch_t *disp) { 3512 REQUIRE(VALID_DISPATCH(disp)); 3513 3514 LOCK(&disp->lock); 3515 3516 if (disp->shutting_down == 1) { 3517 UNLOCK(&disp->lock); 3518 return; 3519 } 3520 3521 disp->shutdown_why = ISC_R_CANCELED; 3522 disp->shutting_down = 1; 3523 do_cancel(disp); 3524 3525 UNLOCK(&disp->lock); 3526 3527 return; 3528 } 3529 3530 unsigned int 3531 dns_dispatch_getattributes(dns_dispatch_t *disp) { 3532 REQUIRE(VALID_DISPATCH(disp)); 3533 3534 /* 3535 * We don't bother locking disp here; it's the caller's responsibility 3536 * to use only non volatile flags. 3537 */ 3538 return (disp->attributes); 3539 } 3540 3541 void 3542 dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, 3543 unsigned int mask) { 3544 REQUIRE(VALID_DISPATCH(disp)); 3545 /* Exclusive attribute can only be set on creation */ 3546 REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); 3547 /* Also, a dispatch with randomport specified cannot start listening */ 3548 REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || 3549 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); 3550 3551 /* XXXMLG 3552 * Should check for valid attributes here! 3553 */ 3554 3555 LOCK(&disp->lock); 3556 3557 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) { 3558 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && 3559 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) 3560 { 3561 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; 3562 (void)startrecv(disp, NULL); 3563 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 3564 0 && 3565 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) 3566 { 3567 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; 3568 if (disp->recv_pending != 0) { 3569 isc_socket_cancel(disp->socket, disp->task[0], 3570 ISC_SOCKCANCEL_RECV); 3571 } 3572 } 3573 } 3574 3575 disp->attributes &= ~mask; 3576 disp->attributes |= (attributes & mask); 3577 UNLOCK(&disp->lock); 3578 } 3579 3580 void 3581 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { 3582 void *buf; 3583 isc_socketevent_t *sevent, *newsevent; 3584 3585 REQUIRE(VALID_DISPATCH(disp)); 3586 REQUIRE(event != NULL); 3587 3588 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { 3589 return; 3590 } 3591 3592 sevent = (isc_socketevent_t *)event; 3593 INSIST(sevent->n <= disp->mgr->buffersize); 3594 3595 newsevent = (isc_socketevent_t *)isc_event_allocate( 3596 disp->mgr->mctx, NULL, DNS_EVENT_IMPORTRECVDONE, udp_shrecv, 3597 disp, sizeof(isc_socketevent_t)); 3598 3599 buf = allocate_udp_buffer(disp); 3600 if (buf == NULL) { 3601 isc_event_free(ISC_EVENT_PTR(&newsevent)); 3602 return; 3603 } 3604 memmove(buf, sevent->region.base, sevent->n); 3605 newsevent->region.base = buf; 3606 newsevent->region.length = disp->mgr->buffersize; 3607 newsevent->n = sevent->n; 3608 newsevent->result = sevent->result; 3609 newsevent->address = sevent->address; 3610 newsevent->timestamp = sevent->timestamp; 3611 newsevent->pktinfo = sevent->pktinfo; 3612 newsevent->attributes = sevent->attributes; 3613 3614 isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); 3615 } 3616 3617 dns_dispatch_t * 3618 dns_dispatchset_get(dns_dispatchset_t *dset) { 3619 dns_dispatch_t *disp; 3620 3621 /* check that dispatch set is configured */ 3622 if (dset == NULL || dset->ndisp == 0) { 3623 return (NULL); 3624 } 3625 3626 LOCK(&dset->lock); 3627 disp = dset->dispatches[dset->cur]; 3628 dset->cur++; 3629 if (dset->cur == dset->ndisp) { 3630 dset->cur = 0; 3631 } 3632 UNLOCK(&dset->lock); 3633 3634 return (disp); 3635 } 3636 3637 isc_result_t 3638 dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr, 3639 isc_taskmgr_t *taskmgr, dns_dispatch_t *source, 3640 dns_dispatchset_t **dsetp, int n) { 3641 isc_result_t result; 3642 dns_dispatchset_t *dset; 3643 dns_dispatchmgr_t *mgr; 3644 int i, j; 3645 3646 REQUIRE(VALID_DISPATCH(source)); 3647 REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0); 3648 REQUIRE(dsetp != NULL && *dsetp == NULL); 3649 3650 mgr = source->mgr; 3651 3652 dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); 3653 memset(dset, 0, sizeof(*dset)); 3654 3655 isc_mutex_init(&dset->lock); 3656 3657 dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n); 3658 3659 isc_mem_attach(mctx, &dset->mctx); 3660 dset->ndisp = n; 3661 dset->cur = 0; 3662 3663 dset->dispatches[0] = NULL; 3664 dns_dispatch_attach(source, &dset->dispatches[0]); 3665 3666 LOCK(&mgr->lock); 3667 for (i = 1; i < n; i++) { 3668 dset->dispatches[i] = NULL; 3669 result = dispatch_createudp( 3670 mgr, sockmgr, taskmgr, &source->local, 3671 source->maxrequests, source->attributes, 3672 &dset->dispatches[i], source->socket); 3673 if (result != ISC_R_SUCCESS) { 3674 goto fail; 3675 } 3676 } 3677 3678 UNLOCK(&mgr->lock); 3679 *dsetp = dset; 3680 3681 return (ISC_R_SUCCESS); 3682 3683 fail: 3684 UNLOCK(&mgr->lock); 3685 3686 for (j = 0; j < i; j++) { 3687 dns_dispatch_detach(&(dset->dispatches[j])); 3688 } 3689 isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); 3690 if (dset->mctx == mctx) { 3691 isc_mem_detach(&dset->mctx); 3692 } 3693 3694 isc_mutex_destroy(&dset->lock); 3695 isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); 3696 return (result); 3697 } 3698 3699 void 3700 dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) { 3701 int i; 3702 3703 REQUIRE(dset != NULL); 3704 3705 for (i = 0; i < dset->ndisp; i++) { 3706 isc_socket_t *sock; 3707 sock = dns_dispatch_getsocket(dset->dispatches[i]); 3708 isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL); 3709 } 3710 } 3711 3712 void 3713 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { 3714 dns_dispatchset_t *dset; 3715 int i; 3716 3717 REQUIRE(dsetp != NULL && *dsetp != NULL); 3718 3719 dset = *dsetp; 3720 *dsetp = NULL; 3721 for (i = 0; i < dset->ndisp; i++) { 3722 dns_dispatch_detach(&(dset->dispatches[i])); 3723 } 3724 isc_mem_put(dset->mctx, dset->dispatches, 3725 sizeof(dns_dispatch_t *) * dset->ndisp); 3726 isc_mutex_destroy(&dset->lock); 3727 isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); 3728 } 3729 3730 void 3731 dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) { 3732 REQUIRE(VALID_DISPATCH(disp)); 3733 disp->dscp = dscp; 3734 } 3735 3736 isc_dscp_t 3737 dns_dispatch_getdscp(dns_dispatch_t *disp) { 3738 REQUIRE(VALID_DISPATCH(disp)); 3739 return (disp->dscp); 3740 } 3741 3742 #if 0 3743 void 3744 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) { 3745 dns_dispatch_t *disp; 3746 char foo[1024]; 3747 3748 disp = ISC_LIST_HEAD(mgr->list); 3749 while (disp != NULL) { 3750 isc_sockaddr_format(&disp->local, foo, sizeof(foo)); 3751 printf("\tdispatch %p, addr %s\n", disp, foo); 3752 disp = ISC_LIST_NEXT(disp, link); 3753 } 3754 } 3755 #endif /* if 0 */ 3756