xref: /netbsd-src/external/mpl/bind/dist/lib/dns/dispatch.c (revision bcda20f65a8566e103791ec395f7f499ef322704)
1 /*	$NetBSD: dispatch.c,v 1.11 2025/01/26 16:25:22 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <inttypes.h>
19 #include <stdbool.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23 
24 #include <isc/async.h>
25 #include <isc/hash.h>
26 #include <isc/hashmap.h>
27 #include <isc/loop.h>
28 #include <isc/mem.h>
29 #include <isc/mutex.h>
30 #include <isc/net.h>
31 #include <isc/netmgr.h>
32 #include <isc/portset.h>
33 #include <isc/random.h>
34 #include <isc/stats.h>
35 #include <isc/string.h>
36 #include <isc/tid.h>
37 #include <isc/time.h>
38 #include <isc/tls.h>
39 #include <isc/urcu.h>
40 #include <isc/util.h>
41 
42 #include <dns/acl.h>
43 #include <dns/dispatch.h>
44 #include <dns/log.h>
45 #include <dns/message.h>
46 #include <dns/stats.h>
47 #include <dns/transport.h>
48 #include <dns/types.h>
49 
50 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
51 
52 struct dns_dispatchmgr {
53 	/* Unlocked. */
54 	unsigned int magic;
55 	isc_refcount_t references;
56 	isc_mem_t *mctx;
57 	dns_acl_t *blackhole;
58 	isc_stats_t *stats;
59 	isc_nm_t *nm;
60 
61 	uint32_t nloops;
62 
63 	struct cds_lfht **tcps;
64 
65 	struct cds_lfht *qids;
66 
67 	in_port_t *v4ports;    /*%< available ports for IPv4 */
68 	unsigned int nv4ports; /*%< # of available ports for IPv4 */
69 	in_port_t *v6ports;    /*%< available ports for IPv4 */
70 	unsigned int nv6ports; /*%< # of available ports for IPv4 */
71 };
72 
73 typedef enum {
74 	DNS_DISPATCHSTATE_NONE = 0UL,
75 	DNS_DISPATCHSTATE_CONNECTING,
76 	DNS_DISPATCHSTATE_CONNECTED,
77 	DNS_DISPATCHSTATE_CANCELED,
78 } dns_dispatchstate_t;
79 
80 struct dns_dispentry {
81 	unsigned int magic;
82 	isc_refcount_t references;
83 	isc_mem_t *mctx;
84 	dns_dispatch_t *disp;
85 	isc_loop_t *loop;
86 	isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */
87 	dns_dispatchstate_t state;
88 	dns_transport_t *transport;
89 	isc_tlsctx_cache_t *tlsctx_cache;
90 	unsigned int retries;
91 	unsigned int timeout;
92 	isc_time_t start;
93 	isc_sockaddr_t local;
94 	isc_sockaddr_t peer;
95 	in_port_t port;
96 	dns_messageid_t id;
97 	dispatch_cb_t connected;
98 	dispatch_cb_t sent;
99 	dispatch_cb_t response;
100 	void *arg;
101 	bool reading;
102 	isc_result_t result;
103 	ISC_LINK(dns_dispentry_t) alink;
104 	ISC_LINK(dns_dispentry_t) plink;
105 	ISC_LINK(dns_dispentry_t) rlink;
106 
107 	struct cds_lfht_node ht_node;
108 	struct rcu_head rcu_head;
109 };
110 
111 struct dns_dispatch {
112 	/* Unlocked. */
113 	unsigned int magic; /*%< magic */
114 	uint32_t tid;
115 	isc_socktype_t socktype;
116 	isc_refcount_t references;
117 	isc_mem_t *mctx;
118 	dns_dispatchmgr_t *mgr;	    /*%< dispatch manager */
119 	isc_nmhandle_t *handle;	    /*%< netmgr handle for TCP connection */
120 	isc_sockaddr_t local;	    /*%< local address */
121 	isc_sockaddr_t peer;	    /*%< peer address (TCP) */
122 	dns_transport_t *transport; /*%< TCP transport parameters */
123 
124 	dns_dispatchopt_t options;
125 	dns_dispatchstate_t state;
126 
127 	bool reading;
128 
129 	dns_displist_t pending;
130 	dns_displist_t active;
131 
132 	uint_fast32_t requests; /*%< how many requests we have */
133 
134 	unsigned int timedout;
135 
136 	struct cds_lfht_node ht_node;
137 	struct rcu_head rcu_head;
138 };
139 
140 #define RESPONSE_MAGIC	  ISC_MAGIC('D', 'r', 's', 'p')
141 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
142 
143 #define DISPATCH_MAGIC	  ISC_MAGIC('D', 'i', 's', 'p')
144 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
145 
146 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
147 #define VALID_DISPATCHMGR(e)  ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
148 
149 #if DNS_DISPATCH_TRACE
150 #define dns_dispentry_ref(ptr) \
151 	dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__)
152 #define dns_dispentry_unref(ptr) \
153 	dns_dispentry__unref(ptr, __func__, __FILE__, __LINE__)
154 #define dns_dispentry_attach(ptr, ptrp) \
155 	dns_dispentry__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
156 #define dns_dispentry_detach(ptrp) \
157 	dns_dispentry__detach(ptrp, __func__, __FILE__, __LINE__)
158 ISC_REFCOUNT_TRACE_DECL(dns_dispentry);
159 #else
160 ISC_REFCOUNT_DECL(dns_dispentry);
161 #endif
162 
163 /*
164  * The number of attempts to find unique <addr, port, query_id> combination
165  */
166 #define QID_MAX_TRIES 64
167 
168 /*
169  * Initial and minimum QID table sizes.
170  */
171 #define QIDS_INIT_SIZE (1 << 4) /* Must be power of 2 */
172 #define QIDS_MIN_SIZE  (1 << 4) /* Must be power of 2 */
173 
174 /*
175  * Statics.
176  */
177 static void
178 dispatchmgr_destroy(dns_dispatchmgr_t *mgr);
179 
180 static void
181 udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
182 	 void *arg);
183 static void
184 tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
185 	 void *arg);
186 static void
187 dispentry_cancel(dns_dispentry_t *resp, isc_result_t result);
188 static isc_result_t
189 dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
190 		   uint32_t tid, dns_dispatch_t **dispp);
191 static void
192 udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp);
193 static void
194 udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp);
195 static void
196 tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp);
197 static void
198 tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp,
199 		     int32_t timeout);
200 static void
201 udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout);
202 
203 static const char *
204 socktype2str(dns_dispentry_t *resp) {
205 	dns_transport_type_t transport_type = DNS_TRANSPORT_UDP;
206 	dns_dispatch_t *disp = resp->disp;
207 
208 	if (disp->socktype == isc_socktype_tcp) {
209 		if (resp->transport != NULL) {
210 			transport_type =
211 				dns_transport_get_type(resp->transport);
212 		} else {
213 			transport_type = DNS_TRANSPORT_TCP;
214 		}
215 	}
216 
217 	switch (transport_type) {
218 	case DNS_TRANSPORT_UDP:
219 		return "UDP";
220 	case DNS_TRANSPORT_TCP:
221 		return "TCP";
222 	case DNS_TRANSPORT_TLS:
223 		return "TLS";
224 	case DNS_TRANSPORT_HTTP:
225 		return "HTTP";
226 	default:
227 		return "<unexpected>";
228 	}
229 }
230 
231 static const char *
232 state2str(dns_dispatchstate_t state) {
233 	switch (state) {
234 	case DNS_DISPATCHSTATE_NONE:
235 		return "none";
236 	case DNS_DISPATCHSTATE_CONNECTING:
237 		return "connecting";
238 	case DNS_DISPATCHSTATE_CONNECTED:
239 		return "connected";
240 	case DNS_DISPATCHSTATE_CANCELED:
241 		return "canceled";
242 	default:
243 		return "<unexpected>";
244 	}
245 }
246 
247 static void
248 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
249 	ISC_FORMAT_PRINTF(3, 4);
250 
251 static void
252 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
253 	char msgbuf[2048];
254 	va_list ap;
255 
256 	if (!isc_log_wouldlog(dns_lctx, level)) {
257 		return;
258 	}
259 
260 	va_start(ap, fmt);
261 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
262 	va_end(ap);
263 
264 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
265 		      DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr,
266 		      msgbuf);
267 }
268 
269 static void
270 inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
271 	if (mgr->stats != NULL) {
272 		isc_stats_increment(mgr->stats, counter);
273 	}
274 }
275 
276 static void
277 dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
278 	if (mgr->stats != NULL) {
279 		isc_stats_decrement(mgr->stats, counter);
280 	}
281 }
282 
283 static void
284 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
285 	ISC_FORMAT_PRINTF(3, 4);
286 
287 static void
288 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
289 	char msgbuf[2048];
290 	va_list ap;
291 	int r;
292 
293 	if (!isc_log_wouldlog(dns_lctx, level)) {
294 		return;
295 	}
296 
297 	va_start(ap, fmt);
298 	r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
299 	if (r < 0) {
300 		msgbuf[0] = '\0';
301 	} else if ((unsigned int)r >= sizeof(msgbuf)) {
302 		/* Truncated */
303 		msgbuf[sizeof(msgbuf) - 1] = '\0';
304 	}
305 	va_end(ap);
306 
307 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
308 		      DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp,
309 		      msgbuf);
310 }
311 
312 static void
313 dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...)
314 	ISC_FORMAT_PRINTF(3, 4);
315 
316 static void
317 dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) {
318 	char msgbuf[2048];
319 	va_list ap;
320 	int r;
321 
322 	if (!isc_log_wouldlog(dns_lctx, level)) {
323 		return;
324 	}
325 
326 	va_start(ap, fmt);
327 	r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
328 	if (r < 0) {
329 		msgbuf[0] = '\0';
330 	} else if ((unsigned int)r >= sizeof(msgbuf)) {
331 		/* Truncated */
332 		msgbuf[sizeof(msgbuf) - 1] = '\0';
333 	}
334 	va_end(ap);
335 
336 	dispatch_log(resp->disp, level, "%s response %p: %s",
337 		     socktype2str(resp), resp, msgbuf);
338 }
339 
340 /*%
341  * Choose a random port number for a dispatch entry.
342  */
343 static isc_result_t
344 setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp,
345 	     const isc_sockaddr_t *dest, in_port_t *portp) {
346 	dns_dispatchmgr_t *mgr = disp->mgr;
347 	unsigned int nports;
348 	in_port_t *ports = NULL;
349 	in_port_t port = *portp;
350 
351 	if (resp->retries++ > 5) {
352 		return ISC_R_FAILURE;
353 	}
354 
355 	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
356 		nports = mgr->nv4ports;
357 		ports = mgr->v4ports;
358 	} else {
359 		nports = mgr->nv6ports;
360 		ports = mgr->v6ports;
361 	}
362 	if (nports == 0) {
363 		return ISC_R_ADDRNOTAVAIL;
364 	}
365 
366 	resp->local = disp->local;
367 	resp->peer = *dest;
368 
369 	if (port == 0) {
370 		port = ports[isc_random_uniform(nports)];
371 		isc_sockaddr_setport(&resp->local, port);
372 		*portp = port;
373 	}
374 	resp->port = port;
375 
376 	return ISC_R_SUCCESS;
377 }
378 
379 static uint32_t
380 qid_hash(const dns_dispentry_t *dispentry) {
381 	isc_hash32_t hash;
382 
383 	isc_hash32_init(&hash);
384 
385 	isc_sockaddr_hash_ex(&hash, &dispentry->peer, true);
386 	isc_hash32_hash(&hash, &dispentry->id, sizeof(dispentry->id), true);
387 	isc_hash32_hash(&hash, &dispentry->port, sizeof(dispentry->port), true);
388 
389 	return isc_hash32_finalize(&hash);
390 }
391 
392 static int
393 qid_match(struct cds_lfht_node *node, const void *key0) {
394 	const dns_dispentry_t *dispentry =
395 		caa_container_of(node, dns_dispentry_t, ht_node);
396 	const dns_dispentry_t *key = key0;
397 
398 	return dispentry->id == key->id && dispentry->port == key->port &&
399 	       isc_sockaddr_equal(&dispentry->peer, &key->peer);
400 }
401 
402 static void
403 dispentry_destroy_rcu(struct rcu_head *rcu_head) {
404 	dns_dispentry_t *resp = caa_container_of(rcu_head, dns_dispentry_t,
405 						 rcu_head);
406 	isc_mem_putanddetach(&resp->mctx, resp, sizeof(*resp));
407 }
408 
409 static void
410 dispentry_destroy(dns_dispentry_t *resp) {
411 	dns_dispatch_t *disp = resp->disp;
412 
413 	/*
414 	 * We need to call this from here in case there's an external event that
415 	 * shuts down our dispatch (like ISC_R_SHUTTINGDOWN).
416 	 */
417 	dispentry_cancel(resp, ISC_R_CANCELED);
418 
419 	INSIST(disp->requests > 0);
420 	disp->requests--;
421 
422 	resp->magic = 0;
423 
424 	INSIST(!ISC_LINK_LINKED(resp, plink));
425 	INSIST(!ISC_LINK_LINKED(resp, alink));
426 	INSIST(!ISC_LINK_LINKED(resp, rlink));
427 
428 	dispentry_log(resp, ISC_LOG_DEBUG(90), "destroying");
429 
430 	if (resp->handle != NULL) {
431 		dispentry_log(resp, ISC_LOG_DEBUG(90),
432 			      "detaching handle %p from %p", resp->handle,
433 			      &resp->handle);
434 		isc_nmhandle_detach(&resp->handle);
435 	}
436 
437 	if (resp->tlsctx_cache != NULL) {
438 		isc_tlsctx_cache_detach(&resp->tlsctx_cache);
439 	}
440 
441 	if (resp->transport != NULL) {
442 		dns_transport_detach(&resp->transport);
443 	}
444 
445 	dns_dispatch_detach(&disp); /* DISPATCH001 */
446 
447 	call_rcu(&resp->rcu_head, dispentry_destroy_rcu);
448 }
449 
450 #if DNS_DISPATCH_TRACE
451 ISC_REFCOUNT_TRACE_IMPL(dns_dispentry, dispentry_destroy);
452 #else
453 ISC_REFCOUNT_IMPL(dns_dispentry, dispentry_destroy);
454 #endif
455 
456 /*
457  * How long in milliseconds has it been since this dispentry
458  * started reading?
459  */
460 static unsigned int
461 dispentry_runtime(dns_dispentry_t *resp, const isc_time_t *now) {
462 	if (isc_time_isepoch(&resp->start)) {
463 		return 0;
464 	}
465 
466 	return isc_time_microdiff(now, &resp->start) / 1000;
467 }
468 
469 /*
470  * General flow:
471  *
472  * If I/O result == CANCELED or error, free the buffer.
473  *
474  * If query, free the buffer, restart.
475  *
476  * If response:
477  *	Allocate event, fill in details.
478  *		If cannot allocate, free buffer, restart.
479  *	find target.  If not found, free buffer, restart.
480  *	if event queue is not empty, queue.  else, send.
481  *	restart.
482  */
483 static void
484 udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
485 	 void *arg) {
486 	dns_dispentry_t *resp = (dns_dispentry_t *)arg;
487 	dns_dispatch_t *disp = NULL;
488 	dns_messageid_t id;
489 	isc_result_t dres;
490 	isc_buffer_t source;
491 	unsigned int flags;
492 	isc_sockaddr_t peer;
493 	isc_netaddr_t netaddr;
494 	int match, timeout = 0;
495 	bool respond = true;
496 	isc_time_t now;
497 
498 	REQUIRE(VALID_RESPONSE(resp));
499 	REQUIRE(VALID_DISPATCH(resp->disp));
500 
501 	disp = resp->disp;
502 
503 	REQUIRE(disp->tid == isc_tid());
504 	INSIST(resp->reading);
505 	resp->reading = false;
506 
507 	if (resp->state == DNS_DISPATCHSTATE_CANCELED) {
508 		/*
509 		 * Nobody is interested in the callback if the response
510 		 * has been canceled already.  Detach from the response
511 		 * and the handle.
512 		 */
513 		respond = false;
514 		eresult = ISC_R_CANCELED;
515 	}
516 
517 	dispentry_log(resp, ISC_LOG_DEBUG(90),
518 		      "read callback:%s, requests %" PRIuFAST32,
519 		      isc_result_totext(eresult), disp->requests);
520 
521 	if (eresult != ISC_R_SUCCESS) {
522 		/*
523 		 * This is most likely a network error on a connected
524 		 * socket, a timeout, or the query has been canceled.
525 		 * It makes no sense to check the address or parse the
526 		 * packet, but we can return the error to the caller.
527 		 */
528 		goto done;
529 	}
530 
531 	peer = isc_nmhandle_peeraddr(handle);
532 	isc_netaddr_fromsockaddr(&netaddr, &peer);
533 
534 	/*
535 	 * If this is from a blackholed address, drop it.
536 	 */
537 	if (disp->mgr->blackhole != NULL &&
538 	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match,
539 			  NULL) == ISC_R_SUCCESS &&
540 	    match > 0)
541 	{
542 		if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(10))) {
543 			char netaddrstr[ISC_NETADDR_FORMATSIZE];
544 			isc_netaddr_format(&netaddr, netaddrstr,
545 					   sizeof(netaddrstr));
546 			dispentry_log(resp, ISC_LOG_DEBUG(10),
547 				      "blackholed packet from %s", netaddrstr);
548 		}
549 		goto next;
550 	}
551 
552 	/*
553 	 * Peek into the buffer to see what we can see.
554 	 */
555 	id = resp->id;
556 	isc_buffer_init(&source, region->base, region->length);
557 	isc_buffer_add(&source, region->length);
558 	dres = dns_message_peekheader(&source, &id, &flags);
559 	if (dres != ISC_R_SUCCESS) {
560 		char netaddrstr[ISC_NETADDR_FORMATSIZE];
561 		isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr));
562 		dispentry_log(resp, ISC_LOG_DEBUG(10),
563 			      "got garbage packet from %s", netaddrstr);
564 		goto next;
565 	}
566 
567 	dispentry_log(resp, ISC_LOG_DEBUG(92),
568 		      "got valid DNS message header, /QR %c, id %u",
569 		      (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
570 
571 	/*
572 	 * Look at the message flags.  If it's a query, ignore it.
573 	 */
574 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
575 		goto next;
576 	}
577 
578 	/*
579 	 * The QID and the address must match the expected ones.
580 	 */
581 	if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) {
582 		dispentry_log(resp, ISC_LOG_DEBUG(90),
583 			      "response doesn't match");
584 		inc_stats(disp->mgr, dns_resstatscounter_mismatch);
585 		goto next;
586 	}
587 
588 	/*
589 	 * We have the right resp, so call the caller back.
590 	 */
591 	goto done;
592 
593 next:
594 	/*
595 	 * This is the wrong response.  Check whether there is still enough
596 	 * time to wait for the correct one to arrive before the timeout fires.
597 	 */
598 	now = isc_loop_now(resp->loop);
599 	if (resp->timeout > 0) {
600 		timeout = resp->timeout - dispentry_runtime(resp, &now);
601 		if (timeout <= 0) {
602 			/*
603 			 * The time window for receiving the correct response is
604 			 * already closed, libuv has just not processed the
605 			 * socket timer yet.  Invoke the read callback,
606 			 * indicating a timeout.
607 			 */
608 			eresult = ISC_R_TIMEDOUT;
609 			goto done;
610 		}
611 	}
612 
613 	/*
614 	 * Do not invoke the read callback just yet and instead wait for the
615 	 * proper response to arrive until the original timeout fires.
616 	 */
617 	respond = false;
618 	udp_dispatch_getnext(resp, timeout);
619 
620 done:
621 	if (respond) {
622 		dispentry_log(resp, ISC_LOG_DEBUG(90),
623 			      "UDP read callback on %p: %s", handle,
624 			      isc_result_totext(eresult));
625 		resp->response(eresult, region, resp->arg);
626 	}
627 
628 	dns_dispentry_detach(&resp); /* DISPENTRY003 */
629 }
630 
631 static isc_result_t
632 tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) {
633 	dns_dispentry_t *resp = NULL;
634 	resp = ISC_LIST_HEAD(disp->active);
635 	if (resp != NULL) {
636 		disp->timedout++;
637 
638 		*respp = resp;
639 		return ISC_R_TIMEDOUT;
640 	}
641 
642 	return ISC_R_NOTFOUND;
643 }
644 
645 /*
646  * NOTE: Must be RCU read locked!
647  */
648 static isc_result_t
649 tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region,
650 		 isc_sockaddr_t *peer, dns_dispentry_t **respp) {
651 	isc_buffer_t source;
652 	dns_messageid_t id;
653 	unsigned int flags;
654 	isc_result_t result = ISC_R_SUCCESS;
655 
656 	dispatch_log(disp, ISC_LOG_DEBUG(90),
657 		     "TCP read success, length == %d, addr = %p",
658 		     region->length, region->base);
659 
660 	/*
661 	 * Peek into the buffer to see what we can see.
662 	 */
663 	isc_buffer_init(&source, region->base, region->length);
664 	isc_buffer_add(&source, region->length);
665 	result = dns_message_peekheader(&source, &id, &flags);
666 	if (result != ISC_R_SUCCESS) {
667 		dispatch_log(disp, ISC_LOG_DEBUG(10), "got garbage packet");
668 		return ISC_R_UNEXPECTED;
669 	}
670 
671 	dispatch_log(disp, ISC_LOG_DEBUG(92),
672 		     "got valid DNS message header, /QR %c, id %u",
673 		     (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
674 
675 	/*
676 	 * Look at the message flags.  If it's a query, ignore it and keep
677 	 * reading.
678 	 */
679 	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
680 		dispatch_log(disp, ISC_LOG_DEBUG(10),
681 			     "got DNS query instead of answer");
682 		return ISC_R_UNEXPECTED;
683 	}
684 
685 	/*
686 	 * We have a valid response; find the associated dispentry object
687 	 * and call the caller back.
688 	 */
689 	dns_dispentry_t key = {
690 		.id = id,
691 		.peer = *peer,
692 		.port = isc_sockaddr_getport(&disp->local),
693 	};
694 	struct cds_lfht_iter iter;
695 	cds_lfht_lookup(disp->mgr->qids, qid_hash(&key), qid_match, &key,
696 			&iter);
697 
698 	dns_dispentry_t *resp = cds_lfht_entry(cds_lfht_iter_get_node(&iter),
699 					       dns_dispentry_t, ht_node);
700 
701 	/* Skip responses that are not ours */
702 	if (resp != NULL && resp->disp == disp) {
703 		if (!resp->reading) {
704 			/*
705 			 * We already got a message for this QID and weren't
706 			 * expecting any more.
707 			 */
708 			result = ISC_R_UNEXPECTED;
709 		} else {
710 			*respp = resp;
711 		}
712 	} else {
713 		result = ISC_R_NOTFOUND;
714 	}
715 	dispatch_log(disp, ISC_LOG_DEBUG(90),
716 		     "search for response in hashtable: %s",
717 		     isc_result_totext(result));
718 
719 	return result;
720 }
721 
722 static void
723 tcp_recv_add(dns_displist_t *resps, dns_dispentry_t *resp,
724 	     isc_result_t result) {
725 	dns_dispentry_ref(resp); /* DISPENTRY009 */
726 	ISC_LIST_UNLINK(resp->disp->active, resp, alink);
727 	ISC_LIST_APPEND(*resps, resp, rlink);
728 	INSIST(resp->reading);
729 	resp->reading = false;
730 	resp->result = result;
731 }
732 
733 static void
734 tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps,
735 		  isc_result_t result) {
736 	dns_dispentry_t *resp = NULL, *next = NULL;
737 
738 	/*
739 	 * If there are any active responses, shut them all down.
740 	 */
741 	for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) {
742 		next = ISC_LIST_NEXT(resp, alink);
743 		tcp_recv_add(resps, resp, result);
744 	}
745 	disp->state = DNS_DISPATCHSTATE_CANCELED;
746 }
747 
748 static void
749 tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) {
750 	dns_dispentry_t *resp = NULL, *next = NULL;
751 
752 	for (resp = ISC_LIST_HEAD(*resps); resp != NULL; resp = next) {
753 		next = ISC_LIST_NEXT(resp, rlink);
754 		ISC_LIST_UNLINK(*resps, resp, rlink);
755 
756 		dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s",
757 			      isc_result_totext(resp->result));
758 		resp->response(resp->result, region, resp->arg);
759 		dns_dispentry_detach(&resp); /* DISPENTRY009 */
760 	}
761 }
762 
763 /*
764  * General flow:
765  *
766  * If I/O result == CANCELED, EOF, or error, notify everyone as the
767  * various queues drain.
768  *
769  * If response:
770  *	Allocate event, fill in details.
771  *		If cannot allocate, restart.
772  *	find target.  If not found, restart.
773  *	if event queue is not empty, queue.  else, send.
774  *	restart.
775  */
776 static void
777 tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
778 	 void *arg) {
779 	dns_dispatch_t *disp = (dns_dispatch_t *)arg;
780 	dns_dispentry_t *resp = NULL;
781 	char buf[ISC_SOCKADDR_FORMATSIZE];
782 	isc_sockaddr_t peer;
783 	dns_displist_t resps = ISC_LIST_INITIALIZER;
784 	isc_time_t now;
785 	int timeout = 0;
786 
787 	REQUIRE(VALID_DISPATCH(disp));
788 
789 	REQUIRE(disp->tid == isc_tid());
790 	INSIST(disp->reading);
791 	disp->reading = false;
792 
793 	dispatch_log(disp, ISC_LOG_DEBUG(90),
794 		     "TCP read:%s:requests %" PRIuFAST32,
795 		     isc_result_totext(result), disp->requests);
796 
797 	peer = isc_nmhandle_peeraddr(handle);
798 
799 	rcu_read_lock();
800 	/*
801 	 * Phase 1: Process timeout and success.
802 	 */
803 	switch (result) {
804 	case ISC_R_TIMEDOUT:
805 		/*
806 		 * Time out the oldest response in the active queue.
807 		 */
808 		result = tcp_recv_oldest(disp, &resp);
809 		break;
810 	case ISC_R_SUCCESS:
811 		/* We got an answer */
812 		result = tcp_recv_success(disp, region, &peer, &resp);
813 		break;
814 
815 	default:
816 		break;
817 	}
818 
819 	if (resp != NULL) {
820 		tcp_recv_add(&resps, resp, result);
821 	}
822 
823 	/*
824 	 * Phase 2: Look if we timed out before.
825 	 */
826 
827 	if (result == ISC_R_NOTFOUND) {
828 		if (disp->timedout > 0) {
829 			/* There was active query that timed-out before */
830 			disp->timedout--;
831 		} else {
832 			result = ISC_R_UNEXPECTED;
833 		}
834 	}
835 
836 	/*
837 	 * Phase 3: Trigger timeouts.  It's possible that the responses would
838 	 * have been timed out out already, but non-matching TCP reads have
839 	 * prevented this.
840 	 */
841 	resp = ISC_LIST_HEAD(disp->active);
842 	if (resp != NULL) {
843 		now = isc_loop_now(resp->loop);
844 	}
845 	while (resp != NULL) {
846 		dns_dispentry_t *next = ISC_LIST_NEXT(resp, alink);
847 
848 		if (resp->timeout > 0) {
849 			timeout = resp->timeout - dispentry_runtime(resp, &now);
850 			if (timeout <= 0) {
851 				tcp_recv_add(&resps, resp, ISC_R_TIMEDOUT);
852 			}
853 		}
854 
855 		resp = next;
856 	}
857 
858 	/*
859 	 * Phase 4: log if we errored out.
860 	 */
861 	switch (result) {
862 	case ISC_R_SUCCESS:
863 	case ISC_R_TIMEDOUT:
864 	case ISC_R_NOTFOUND:
865 		break;
866 
867 	case ISC_R_SHUTTINGDOWN:
868 	case ISC_R_CANCELED:
869 	case ISC_R_EOF:
870 	case ISC_R_CONNECTIONRESET:
871 		isc_sockaddr_format(&peer, buf, sizeof(buf));
872 		dispatch_log(disp, ISC_LOG_DEBUG(90),
873 			     "shutting down TCP: %s: %s", buf,
874 			     isc_result_totext(result));
875 		tcp_recv_shutdown(disp, &resps, result);
876 		break;
877 	default:
878 		isc_sockaddr_format(&peer, buf, sizeof(buf));
879 		dispatch_log(disp, ISC_LOG_ERROR,
880 			     "shutting down due to TCP "
881 			     "receive error: %s: %s",
882 			     buf, isc_result_totext(result));
883 		tcp_recv_shutdown(disp, &resps, result);
884 		break;
885 	}
886 
887 	/*
888 	 * Phase 5: Resume reading if there are still active responses
889 	 */
890 	resp = ISC_LIST_HEAD(disp->active);
891 	if (resp != NULL) {
892 		if (resp->timeout > 0) {
893 			timeout = resp->timeout - dispentry_runtime(resp, &now);
894 			INSIST(timeout > 0);
895 		}
896 		tcp_startrecv(disp, resp);
897 		if (timeout > 0) {
898 			isc_nmhandle_settimeout(handle, timeout);
899 		}
900 	}
901 
902 	rcu_read_unlock();
903 
904 	/*
905 	 * Phase 6: Process all scheduled callbacks.
906 	 */
907 	tcp_recv_processall(&resps, region);
908 
909 	dns_dispatch_detach(&disp); /* DISPATCH002 */
910 }
911 
912 /*%
913  * Create a temporary port list to set the initial default set of dispatch
914  * ephemeral ports.  This is almost meaningless as the application will
915  * normally set the ports explicitly, but is provided to fill some minor corner
916  * cases.
917  */
918 static void
919 create_default_portset(isc_mem_t *mctx, int family, isc_portset_t **portsetp) {
920 	in_port_t low, high;
921 
922 	isc_net_getudpportrange(family, &low, &high);
923 
924 	isc_portset_create(mctx, portsetp);
925 	isc_portset_addrange(*portsetp, low, high);
926 }
927 
928 static isc_result_t
929 setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
930 	      isc_portset_t *v6portset) {
931 	in_port_t *v4ports, *v6ports, p = 0;
932 	unsigned int nv4ports, nv6ports, i4 = 0, i6 = 0;
933 
934 	nv4ports = isc_portset_nports(v4portset);
935 	nv6ports = isc_portset_nports(v6portset);
936 
937 	v4ports = NULL;
938 	if (nv4ports != 0) {
939 		v4ports = isc_mem_cget(mgr->mctx, nv4ports, sizeof(in_port_t));
940 	}
941 	v6ports = NULL;
942 	if (nv6ports != 0) {
943 		v6ports = isc_mem_cget(mgr->mctx, nv6ports, sizeof(in_port_t));
944 	}
945 
946 	do {
947 		if (isc_portset_isset(v4portset, p)) {
948 			INSIST(i4 < nv4ports);
949 			v4ports[i4++] = p;
950 		}
951 		if (isc_portset_isset(v6portset, p)) {
952 			INSIST(i6 < nv6ports);
953 			v6ports[i6++] = p;
954 		}
955 	} while (p++ < 65535);
956 	INSIST(i4 == nv4ports && i6 == nv6ports);
957 
958 	if (mgr->v4ports != NULL) {
959 		isc_mem_cput(mgr->mctx, mgr->v4ports, mgr->nv4ports,
960 			     sizeof(in_port_t));
961 	}
962 	mgr->v4ports = v4ports;
963 	mgr->nv4ports = nv4ports;
964 
965 	if (mgr->v6ports != NULL) {
966 		isc_mem_cput(mgr->mctx, mgr->v6ports, mgr->nv6ports,
967 			     sizeof(in_port_t));
968 	}
969 	mgr->v6ports = v6ports;
970 	mgr->nv6ports = nv6ports;
971 
972 	return ISC_R_SUCCESS;
973 }
974 
975 /*
976  * Publics.
977  */
978 
979 isc_result_t
980 dns_dispatchmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm,
981 		       dns_dispatchmgr_t **mgrp) {
982 	dns_dispatchmgr_t *mgr = NULL;
983 	isc_portset_t *v4portset = NULL;
984 	isc_portset_t *v6portset = NULL;
985 
986 	REQUIRE(mctx != NULL);
987 	REQUIRE(mgrp != NULL && *mgrp == NULL);
988 
989 	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
990 	*mgr = (dns_dispatchmgr_t){
991 		.magic = 0,
992 		.nloops = isc_loopmgr_nloops(loopmgr),
993 	};
994 
995 #if DNS_DISPATCH_TRACE
996 	fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n",
997 		__func__, __FILE__, __LINE__, mgr);
998 #endif
999 	isc_refcount_init(&mgr->references, 1);
1000 
1001 	isc_mem_attach(mctx, &mgr->mctx);
1002 	isc_nm_attach(nm, &mgr->nm);
1003 
1004 	mgr->tcps = isc_mem_cget(mgr->mctx, mgr->nloops, sizeof(mgr->tcps[0]));
1005 	for (size_t i = 0; i < mgr->nloops; i++) {
1006 		mgr->tcps[i] = cds_lfht_new(
1007 			2, 2, 0, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING,
1008 			NULL);
1009 	}
1010 
1011 	create_default_portset(mgr->mctx, AF_INET, &v4portset);
1012 	create_default_portset(mgr->mctx, AF_INET6, &v6portset);
1013 
1014 	setavailports(mgr, v4portset, v6portset);
1015 
1016 	isc_portset_destroy(mgr->mctx, &v4portset);
1017 	isc_portset_destroy(mgr->mctx, &v6portset);
1018 
1019 	mgr->qids = cds_lfht_new(QIDS_INIT_SIZE, QIDS_MIN_SIZE, 0,
1020 				 CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING,
1021 				 NULL);
1022 
1023 	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1024 
1025 	*mgrp = mgr;
1026 	return ISC_R_SUCCESS;
1027 }
1028 
1029 #if DNS_DISPATCH_TRACE
1030 ISC_REFCOUNT_TRACE_IMPL(dns_dispatchmgr, dispatchmgr_destroy);
1031 #else
1032 ISC_REFCOUNT_IMPL(dns_dispatchmgr, dispatchmgr_destroy);
1033 #endif
1034 
1035 void
1036 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1037 	REQUIRE(VALID_DISPATCHMGR(mgr));
1038 	if (mgr->blackhole != NULL) {
1039 		dns_acl_detach(&mgr->blackhole);
1040 	}
1041 	dns_acl_attach(blackhole, &mgr->blackhole);
1042 }
1043 
1044 dns_acl_t *
1045 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1046 	REQUIRE(VALID_DISPATCHMGR(mgr));
1047 	return mgr->blackhole;
1048 }
1049 
1050 isc_result_t
1051 dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
1052 			      isc_portset_t *v6portset) {
1053 	REQUIRE(VALID_DISPATCHMGR(mgr));
1054 	return setavailports(mgr, v4portset, v6portset);
1055 }
1056 
1057 static void
1058 dispatchmgr_destroy(dns_dispatchmgr_t *mgr) {
1059 	REQUIRE(VALID_DISPATCHMGR(mgr));
1060 
1061 	isc_refcount_destroy(&mgr->references);
1062 
1063 	mgr->magic = 0;
1064 
1065 	RUNTIME_CHECK(!cds_lfht_destroy(mgr->qids, NULL));
1066 
1067 	for (size_t i = 0; i < mgr->nloops; i++) {
1068 		RUNTIME_CHECK(!cds_lfht_destroy(mgr->tcps[i], NULL));
1069 	}
1070 	isc_mem_cput(mgr->mctx, mgr->tcps, mgr->nloops, sizeof(mgr->tcps[0]));
1071 
1072 	if (mgr->blackhole != NULL) {
1073 		dns_acl_detach(&mgr->blackhole);
1074 	}
1075 
1076 	if (mgr->stats != NULL) {
1077 		isc_stats_detach(&mgr->stats);
1078 	}
1079 
1080 	if (mgr->v4ports != NULL) {
1081 		isc_mem_cput(mgr->mctx, mgr->v4ports, mgr->nv4ports,
1082 			     sizeof(in_port_t));
1083 	}
1084 	if (mgr->v6ports != NULL) {
1085 		isc_mem_cput(mgr->mctx, mgr->v6ports, mgr->nv6ports,
1086 			     sizeof(in_port_t));
1087 	}
1088 
1089 	isc_nm_detach(&mgr->nm);
1090 
1091 	isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t));
1092 }
1093 
1094 void
1095 dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
1096 	REQUIRE(VALID_DISPATCHMGR(mgr));
1097 	REQUIRE(mgr->stats == NULL);
1098 
1099 	isc_stats_attach(stats, &mgr->stats);
1100 }
1101 
1102 /*
1103  * Allocate and set important limits.
1104  */
1105 static void
1106 dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, uint32_t tid,
1107 		  dns_dispatch_t **dispp) {
1108 	dns_dispatch_t *disp = NULL;
1109 
1110 	REQUIRE(VALID_DISPATCHMGR(mgr));
1111 	REQUIRE(dispp != NULL && *dispp == NULL);
1112 
1113 	/*
1114 	 * Set up the dispatcher, mostly.  Don't bother setting some of
1115 	 * the options that are controlled by tcp vs. udp, etc.
1116 	 */
1117 
1118 	disp = isc_mem_get(mgr->mctx, sizeof(*disp));
1119 	*disp = (dns_dispatch_t){
1120 		.socktype = type,
1121 		.active = ISC_LIST_INITIALIZER,
1122 		.pending = ISC_LIST_INITIALIZER,
1123 		.tid = tid,
1124 		.magic = DISPATCH_MAGIC,
1125 	};
1126 
1127 	isc_mem_attach(mgr->mctx, &disp->mctx);
1128 
1129 	dns_dispatchmgr_attach(mgr, &disp->mgr);
1130 #if DNS_DISPATCH_TRACE
1131 	fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n",
1132 		__func__, __FILE__, __LINE__, disp);
1133 #endif
1134 	isc_refcount_init(&disp->references, 1); /* DISPATCH000 */
1135 
1136 	*dispp = disp;
1137 }
1138 
1139 struct dispatch_key {
1140 	const isc_sockaddr_t *local;
1141 	const isc_sockaddr_t *peer;
1142 	const dns_transport_t *transport;
1143 };
1144 
1145 static uint32_t
1146 dispatch_hash(struct dispatch_key *key) {
1147 	uint32_t hashval = isc_sockaddr_hash(key->peer, false);
1148 	if (key->local) {
1149 		hashval ^= isc_sockaddr_hash(key->local, true);
1150 	}
1151 
1152 	return hashval;
1153 }
1154 
1155 static int
1156 dispatch_match(struct cds_lfht_node *node, const void *key0) {
1157 	dns_dispatch_t *disp = caa_container_of(node, dns_dispatch_t, ht_node);
1158 	const struct dispatch_key *key = key0;
1159 	isc_sockaddr_t local;
1160 	isc_sockaddr_t peer;
1161 
1162 	if (disp->handle != NULL) {
1163 		local = isc_nmhandle_localaddr(disp->handle);
1164 		peer = isc_nmhandle_peeraddr(disp->handle);
1165 	} else {
1166 		local = disp->local;
1167 		peer = disp->peer;
1168 	}
1169 
1170 	return isc_sockaddr_equal(&peer, key->peer) &&
1171 	       disp->transport == key->transport &&
1172 	       (key->local == NULL || isc_sockaddr_equal(&local, key->local));
1173 }
1174 
1175 isc_result_t
1176 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
1177 		       const isc_sockaddr_t *destaddr,
1178 		       dns_transport_t *transport, dns_dispatchopt_t options,
1179 		       dns_dispatch_t **dispp) {
1180 	dns_dispatch_t *disp = NULL;
1181 	uint32_t tid = isc_tid();
1182 
1183 	REQUIRE(VALID_DISPATCHMGR(mgr));
1184 	REQUIRE(destaddr != NULL);
1185 
1186 	dispatch_allocate(mgr, isc_socktype_tcp, tid, &disp);
1187 
1188 	disp->options = options;
1189 	disp->peer = *destaddr;
1190 	if (transport != NULL) {
1191 		dns_transport_attach(transport, &disp->transport);
1192 	}
1193 
1194 	if (localaddr != NULL) {
1195 		disp->local = *localaddr;
1196 	} else {
1197 		int pf;
1198 		pf = isc_sockaddr_pf(destaddr);
1199 		isc_sockaddr_anyofpf(&disp->local, pf);
1200 		isc_sockaddr_setport(&disp->local, 0);
1201 	}
1202 
1203 	/*
1204 	 * Append it to the dispatcher list.
1205 	 */
1206 	struct dispatch_key key = {
1207 		.local = &disp->local,
1208 		.peer = &disp->peer,
1209 		.transport = transport,
1210 	};
1211 
1212 	if ((disp->options & DNS_DISPATCHOPT_UNSHARED) == 0) {
1213 		rcu_read_lock();
1214 		cds_lfht_add(mgr->tcps[tid], dispatch_hash(&key),
1215 			     &disp->ht_node);
1216 		rcu_read_unlock();
1217 	}
1218 
1219 	if (isc_log_wouldlog(dns_lctx, 90)) {
1220 		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1221 
1222 		isc_sockaddr_format(&disp->local, addrbuf,
1223 				    ISC_SOCKADDR_FORMATSIZE);
1224 
1225 		mgr_log(mgr, ISC_LOG_DEBUG(90),
1226 			"dns_dispatch_createtcp: created TCP dispatch %p for "
1227 			"%s",
1228 			disp, addrbuf);
1229 	}
1230 	*dispp = disp;
1231 
1232 	return ISC_R_SUCCESS;
1233 }
1234 
1235 isc_result_t
1236 dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr,
1237 		    const isc_sockaddr_t *localaddr, dns_transport_t *transport,
1238 		    dns_dispatch_t **dispp) {
1239 	dns_dispatch_t *disp_connected = NULL;
1240 	dns_dispatch_t *disp_fallback = NULL;
1241 	isc_result_t result = ISC_R_NOTFOUND;
1242 	uint32_t tid = isc_tid();
1243 
1244 	REQUIRE(VALID_DISPATCHMGR(mgr));
1245 	REQUIRE(destaddr != NULL);
1246 	REQUIRE(dispp != NULL && *dispp == NULL);
1247 
1248 	struct dispatch_key key = {
1249 		.local = localaddr,
1250 		.peer = destaddr,
1251 		.transport = transport,
1252 	};
1253 
1254 	rcu_read_lock();
1255 	struct cds_lfht_iter iter;
1256 	dns_dispatch_t *disp = NULL;
1257 	cds_lfht_for_each_entry_duplicate(mgr->tcps[tid], dispatch_hash(&key),
1258 					  dispatch_match, &key, &iter, disp,
1259 					  ht_node) {
1260 		INSIST(disp->tid == isc_tid());
1261 		INSIST(disp->socktype == isc_socktype_tcp);
1262 
1263 		switch (disp->state) {
1264 		case DNS_DISPATCHSTATE_NONE:
1265 			/* A dispatch in indeterminate state, skip it */
1266 			break;
1267 		case DNS_DISPATCHSTATE_CONNECTED:
1268 			if (ISC_LIST_EMPTY(disp->active)) {
1269 				/* Ignore dispatch with no responses */
1270 				break;
1271 			}
1272 			/* We found a connected dispatch */
1273 			dns_dispatch_attach(disp, &disp_connected);
1274 			break;
1275 		case DNS_DISPATCHSTATE_CONNECTING:
1276 			if (ISC_LIST_EMPTY(disp->pending)) {
1277 				/* Ignore dispatch with no responses */
1278 				break;
1279 			}
1280 			/* We found "a" dispatch, store it for later */
1281 			if (disp_fallback == NULL) {
1282 				dns_dispatch_attach(disp, &disp_fallback);
1283 			}
1284 			break;
1285 		case DNS_DISPATCHSTATE_CANCELED:
1286 			/* A canceled dispatch, skip it. */
1287 			break;
1288 		default:
1289 			UNREACHABLE();
1290 		}
1291 
1292 		if (disp_connected != NULL) {
1293 			break;
1294 		}
1295 	}
1296 	rcu_read_unlock();
1297 
1298 	if (disp_connected != NULL) {
1299 		/* We found connected dispatch */
1300 		INSIST(disp_connected->handle != NULL);
1301 
1302 		*dispp = disp_connected;
1303 		disp_connected = NULL;
1304 
1305 		result = ISC_R_SUCCESS;
1306 
1307 		if (disp_fallback != NULL) {
1308 			dns_dispatch_detach(&disp_fallback);
1309 		}
1310 	} else if (disp_fallback != NULL) {
1311 		*dispp = disp_fallback;
1312 
1313 		result = ISC_R_SUCCESS;
1314 	}
1315 
1316 	return result;
1317 }
1318 
1319 isc_result_t
1320 dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
1321 		       dns_dispatch_t **dispp) {
1322 	isc_result_t result;
1323 	dns_dispatch_t *disp = NULL;
1324 
1325 	REQUIRE(VALID_DISPATCHMGR(mgr));
1326 	REQUIRE(localaddr != NULL);
1327 	REQUIRE(dispp != NULL && *dispp == NULL);
1328 
1329 	result = dispatch_createudp(mgr, localaddr, isc_tid(), &disp);
1330 	if (result == ISC_R_SUCCESS) {
1331 		*dispp = disp;
1332 	}
1333 
1334 	return result;
1335 }
1336 
1337 static isc_result_t
1338 dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr,
1339 		   uint32_t tid, dns_dispatch_t **dispp) {
1340 	isc_result_t result = ISC_R_SUCCESS;
1341 	dns_dispatch_t *disp = NULL;
1342 	isc_sockaddr_t sa_any;
1343 
1344 	/*
1345 	 * Check whether this address/port is available locally.
1346 	 */
1347 	isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
1348 	if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
1349 		result = isc_nm_checkaddr(localaddr, isc_socktype_udp);
1350 		if (result != ISC_R_SUCCESS) {
1351 			return result;
1352 		}
1353 	}
1354 
1355 	dispatch_allocate(mgr, isc_socktype_udp, tid, &disp);
1356 
1357 	if (isc_log_wouldlog(dns_lctx, 90)) {
1358 		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1359 
1360 		isc_sockaddr_format(localaddr, addrbuf,
1361 				    ISC_SOCKADDR_FORMATSIZE);
1362 		mgr_log(mgr, ISC_LOG_DEBUG(90),
1363 			"dispatch_createudp: created UDP dispatch %p for %s",
1364 			disp, addrbuf);
1365 	}
1366 
1367 	disp->local = *localaddr;
1368 
1369 	/*
1370 	 * Don't append it to the dispatcher list, we don't care about UDP, only
1371 	 * TCP should be searched
1372 	 *
1373 	 * ISC_LIST_APPEND(mgr->list, disp, link);
1374 	 */
1375 
1376 	*dispp = disp;
1377 
1378 	return result;
1379 }
1380 
1381 static void
1382 dispatch_destroy_rcu(struct rcu_head *rcu_head) {
1383 	dns_dispatch_t *disp = caa_container_of(rcu_head, dns_dispatch_t,
1384 						rcu_head);
1385 
1386 	isc_mem_putanddetach(&disp->mctx, disp, sizeof(*disp));
1387 }
1388 
1389 static void
1390 dispatch_destroy(dns_dispatch_t *disp) {
1391 	dns_dispatchmgr_t *mgr = disp->mgr;
1392 	uint32_t tid = isc_tid();
1393 
1394 	disp->magic = 0;
1395 
1396 	if (disp->socktype == isc_socktype_tcp &&
1397 	    (disp->options & DNS_DISPATCHOPT_UNSHARED) == 0)
1398 	{
1399 		(void)cds_lfht_del(mgr->tcps[tid], &disp->ht_node);
1400 	}
1401 
1402 	INSIST(disp->requests == 0);
1403 	INSIST(ISC_LIST_EMPTY(disp->pending));
1404 	INSIST(ISC_LIST_EMPTY(disp->active));
1405 
1406 	dispatch_log(disp, ISC_LOG_DEBUG(90), "destroying dispatch %p", disp);
1407 
1408 	if (disp->handle) {
1409 		dispatch_log(disp, ISC_LOG_DEBUG(90),
1410 			     "detaching TCP handle %p from %p", disp->handle,
1411 			     &disp->handle);
1412 		isc_nmhandle_detach(&disp->handle);
1413 	}
1414 	if (disp->transport != NULL) {
1415 		dns_transport_detach(&disp->transport);
1416 	}
1417 	dns_dispatchmgr_detach(&disp->mgr);
1418 
1419 	call_rcu(&disp->rcu_head, dispatch_destroy_rcu);
1420 }
1421 
1422 #if DNS_DISPATCH_TRACE
1423 ISC_REFCOUNT_TRACE_IMPL(dns_dispatch, dispatch_destroy);
1424 #else
1425 ISC_REFCOUNT_IMPL(dns_dispatch, dispatch_destroy);
1426 #endif
1427 
1428 isc_result_t
1429 dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop,
1430 		 dns_dispatchopt_t options, unsigned int timeout,
1431 		 const isc_sockaddr_t *dest, dns_transport_t *transport,
1432 		 isc_tlsctx_cache_t *tlsctx_cache, dispatch_cb_t connected,
1433 		 dispatch_cb_t sent, dispatch_cb_t response, void *arg,
1434 		 dns_messageid_t *idp, dns_dispentry_t **respp) {
1435 	REQUIRE(VALID_DISPATCH(disp));
1436 	REQUIRE(dest != NULL);
1437 	REQUIRE(respp != NULL && *respp == NULL);
1438 	REQUIRE(idp != NULL);
1439 	REQUIRE(disp->socktype == isc_socktype_tcp ||
1440 		disp->socktype == isc_socktype_udp);
1441 	REQUIRE(connected != NULL);
1442 	REQUIRE(response != NULL);
1443 	REQUIRE(sent != NULL);
1444 	REQUIRE(loop != NULL);
1445 	REQUIRE(disp->tid == isc_tid());
1446 	REQUIRE(disp->transport == transport);
1447 
1448 	if (disp->state == DNS_DISPATCHSTATE_CANCELED) {
1449 		return ISC_R_CANCELED;
1450 	}
1451 
1452 	in_port_t localport = isc_sockaddr_getport(&disp->local);
1453 	dns_dispentry_t *resp = isc_mem_get(disp->mctx, sizeof(*resp));
1454 	*resp = (dns_dispentry_t){
1455 		.timeout = timeout,
1456 		.port = localport,
1457 		.peer = *dest,
1458 		.loop = loop,
1459 		.connected = connected,
1460 		.sent = sent,
1461 		.response = response,
1462 		.arg = arg,
1463 		.alink = ISC_LINK_INITIALIZER,
1464 		.plink = ISC_LINK_INITIALIZER,
1465 		.rlink = ISC_LINK_INITIALIZER,
1466 		.magic = RESPONSE_MAGIC,
1467 	};
1468 
1469 #if DNS_DISPATCH_TRACE
1470 	fprintf(stderr, "dns_dispentry__init:%s:%s:%d:%p->references = 1\n",
1471 		__func__, __FILE__, __LINE__, resp);
1472 #endif
1473 	isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */
1474 
1475 	if (disp->socktype == isc_socktype_udp) {
1476 		isc_result_t result = setup_socket(disp, resp, dest,
1477 						   &localport);
1478 		if (result != ISC_R_SUCCESS) {
1479 			isc_mem_put(disp->mctx, resp, sizeof(*resp));
1480 			inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
1481 			return result;
1482 		}
1483 	}
1484 
1485 	isc_result_t result = ISC_R_NOMORE;
1486 	size_t i = 0;
1487 	rcu_read_lock();
1488 	do {
1489 		/*
1490 		 * Try somewhat hard to find a unique ID. Start with
1491 		 * a random number unless DNS_DISPATCHOPT_FIXEDID is set,
1492 		 * in which case we start with the ID passed in via *idp.
1493 		 */
1494 		resp->id = ((options & DNS_DISPATCHOPT_FIXEDID) != 0)
1495 				   ? *idp
1496 				   : (dns_messageid_t)isc_random16();
1497 
1498 		struct cds_lfht_node *node =
1499 			cds_lfht_add_unique(disp->mgr->qids, qid_hash(resp),
1500 					    qid_match, resp, &resp->ht_node);
1501 
1502 		if (node != &resp->ht_node) {
1503 			if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) {
1504 				/*
1505 				 * When using fixed ID, we either must
1506 				 * use it or fail
1507 				 */
1508 				goto fail;
1509 			}
1510 		} else {
1511 			result = ISC_R_SUCCESS;
1512 			break;
1513 		}
1514 	} while (i++ < QID_MAX_TRIES);
1515 fail:
1516 	if (result != ISC_R_SUCCESS) {
1517 		isc_mem_put(disp->mctx, resp, sizeof(*resp));
1518 		rcu_read_unlock();
1519 		return result;
1520 	}
1521 
1522 	isc_mem_attach(disp->mctx, &resp->mctx);
1523 
1524 	if (transport != NULL) {
1525 		dns_transport_attach(transport, &resp->transport);
1526 	}
1527 
1528 	if (tlsctx_cache != NULL) {
1529 		isc_tlsctx_cache_attach(tlsctx_cache, &resp->tlsctx_cache);
1530 	}
1531 
1532 	dns_dispatch_attach(disp, &resp->disp); /* DISPATCH001 */
1533 
1534 	disp->requests++;
1535 
1536 	inc_stats(disp->mgr, (disp->socktype == isc_socktype_udp)
1537 				     ? dns_resstatscounter_disprequdp
1538 				     : dns_resstatscounter_dispreqtcp);
1539 
1540 	rcu_read_unlock();
1541 
1542 	*idp = resp->id;
1543 	*respp = resp;
1544 
1545 	return ISC_R_SUCCESS;
1546 }
1547 
1548 isc_result_t
1549 dns_dispatch_getnext(dns_dispentry_t *resp) {
1550 	REQUIRE(VALID_RESPONSE(resp));
1551 	REQUIRE(VALID_DISPATCH(resp->disp));
1552 
1553 	dns_dispatch_t *disp = resp->disp;
1554 	isc_result_t result = ISC_R_SUCCESS;
1555 	int32_t timeout = 0;
1556 
1557 	dispentry_log(resp, ISC_LOG_DEBUG(90), "getnext for QID %d", resp->id);
1558 
1559 	if (resp->timeout > 0) {
1560 		isc_time_t now = isc_loop_now(resp->loop);
1561 		timeout = resp->timeout - dispentry_runtime(resp, &now);
1562 		if (timeout <= 0) {
1563 			return ISC_R_TIMEDOUT;
1564 		}
1565 	}
1566 
1567 	REQUIRE(disp->tid == isc_tid());
1568 	switch (disp->socktype) {
1569 	case isc_socktype_udp:
1570 		udp_dispatch_getnext(resp, timeout);
1571 		break;
1572 	case isc_socktype_tcp:
1573 		tcp_dispatch_getnext(disp, resp, timeout);
1574 		break;
1575 	default:
1576 		UNREACHABLE();
1577 	}
1578 
1579 	return result;
1580 }
1581 
1582 /*
1583  * NOTE: Must be RCU read locked!
1584  */
1585 static void
1586 udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
1587 	REQUIRE(VALID_RESPONSE(resp));
1588 	REQUIRE(VALID_DISPATCH(resp->disp));
1589 	REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr));
1590 
1591 	dns_dispatch_t *disp = resp->disp;
1592 	bool respond = false;
1593 
1594 	REQUIRE(disp->tid == isc_tid());
1595 	dispentry_log(resp, ISC_LOG_DEBUG(90),
1596 		      "canceling response: %s, %s/%s (%s/%s), "
1597 		      "requests %" PRIuFAST32,
1598 		      isc_result_totext(result), state2str(resp->state),
1599 		      resp->reading ? "reading" : "not reading",
1600 		      state2str(disp->state),
1601 		      disp->reading ? "reading" : "not reading",
1602 		      disp->requests);
1603 
1604 	if (ISC_LINK_LINKED(resp, alink)) {
1605 		ISC_LIST_UNLINK(disp->active, resp, alink);
1606 	}
1607 
1608 	switch (resp->state) {
1609 	case DNS_DISPATCHSTATE_NONE:
1610 		break;
1611 
1612 	case DNS_DISPATCHSTATE_CONNECTING:
1613 		break;
1614 
1615 	case DNS_DISPATCHSTATE_CONNECTED:
1616 		if (resp->reading) {
1617 			respond = true;
1618 			dispentry_log(resp, ISC_LOG_DEBUG(90),
1619 				      "canceling read on %p", resp->handle);
1620 			isc_nm_cancelread(resp->handle);
1621 		}
1622 		break;
1623 
1624 	case DNS_DISPATCHSTATE_CANCELED:
1625 		goto unlock;
1626 
1627 	default:
1628 		UNREACHABLE();
1629 	}
1630 
1631 	dec_stats(disp->mgr, dns_resstatscounter_disprequdp);
1632 
1633 	(void)cds_lfht_del(disp->mgr->qids, &resp->ht_node);
1634 
1635 	resp->state = DNS_DISPATCHSTATE_CANCELED;
1636 
1637 unlock:
1638 	if (respond) {
1639 		dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s",
1640 			      isc_result_totext(result));
1641 		resp->response(result, NULL, resp->arg);
1642 	}
1643 }
1644 
1645 /*
1646  * NOTE: Must be RCU read locked!
1647  */
1648 static void
1649 tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
1650 	REQUIRE(VALID_RESPONSE(resp));
1651 	REQUIRE(VALID_DISPATCH(resp->disp));
1652 	REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr));
1653 
1654 	dns_dispatch_t *disp = resp->disp;
1655 	dns_displist_t resps = ISC_LIST_INITIALIZER;
1656 
1657 	REQUIRE(disp->tid == isc_tid());
1658 	dispentry_log(resp, ISC_LOG_DEBUG(90),
1659 		      "canceling response: %s, %s/%s (%s/%s), "
1660 		      "requests %" PRIuFAST32,
1661 		      isc_result_totext(result), state2str(resp->state),
1662 		      resp->reading ? "reading" : "not reading",
1663 		      state2str(disp->state),
1664 		      disp->reading ? "reading" : "not reading",
1665 		      disp->requests);
1666 
1667 	switch (resp->state) {
1668 	case DNS_DISPATCHSTATE_NONE:
1669 		break;
1670 
1671 	case DNS_DISPATCHSTATE_CONNECTING:
1672 		break;
1673 
1674 	case DNS_DISPATCHSTATE_CONNECTED:
1675 		if (resp->reading) {
1676 			tcp_recv_add(&resps, resp, ISC_R_CANCELED);
1677 		}
1678 
1679 		INSIST(!ISC_LINK_LINKED(resp, alink));
1680 
1681 		if (ISC_LIST_EMPTY(disp->active)) {
1682 			INSIST(disp->handle != NULL);
1683 
1684 #if DISPATCH_TCP_KEEPALIVE
1685 			/*
1686 			 * This is an experimental code that keeps the TCP
1687 			 * connection open for 1 second before it is finally
1688 			 * closed.  By keeping the TCP connection open, it can
1689 			 * be reused by dns_request that uses
1690 			 * dns_dispatch_gettcp() to join existing TCP
1691 			 * connections.
1692 			 *
1693 			 * It is disabled for now, because it changes the
1694 			 * behaviour, but I am keeping the code here for future
1695 			 * reference when we improve the dns_dispatch to reuse
1696 			 * the TCP connections also in the resolver.
1697 			 *
1698 			 * The TCP connection reuse should be seamless and not
1699 			 * require any extra handling on the client side though.
1700 			 */
1701 			isc_nmhandle_cleartimeout(disp->handle);
1702 			isc_nmhandle_settimeout(disp->handle, 1000);
1703 
1704 			if (!disp->reading) {
1705 				dispentry_log(resp, ISC_LOG_DEBUG(90),
1706 					      "final 1 second timeout on %p",
1707 					      disp->handle);
1708 				tcp_startrecv(disp, NULL);
1709 			}
1710 #else
1711 			if (disp->reading) {
1712 				dispentry_log(resp, ISC_LOG_DEBUG(90),
1713 					      "canceling read on %p",
1714 					      disp->handle);
1715 				isc_nm_cancelread(disp->handle);
1716 			}
1717 #endif
1718 		}
1719 		break;
1720 
1721 	case DNS_DISPATCHSTATE_CANCELED:
1722 		goto unlock;
1723 
1724 	default:
1725 		UNREACHABLE();
1726 	}
1727 
1728 	dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp);
1729 
1730 	(void)cds_lfht_del(disp->mgr->qids, &resp->ht_node);
1731 
1732 	resp->state = DNS_DISPATCHSTATE_CANCELED;
1733 
1734 unlock:
1735 
1736 	/*
1737 	 * NOTE: Calling the response callback directly from here should be done
1738 	 * asynchronously, as the dns_dispatch_done() is usually called directly
1739 	 * from the response callback, so there's a slight chance that the call
1740 	 * stack will get higher here, but it's mitigated by the ".reading"
1741 	 * flag, so we don't ever go into a loop.
1742 	 */
1743 
1744 	tcp_recv_processall(&resps, NULL);
1745 }
1746 
1747 static void
1748 dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) {
1749 	REQUIRE(VALID_RESPONSE(resp));
1750 	REQUIRE(VALID_DISPATCH(resp->disp));
1751 
1752 	dns_dispatch_t *disp = resp->disp;
1753 
1754 	rcu_read_lock();
1755 	switch (disp->socktype) {
1756 	case isc_socktype_udp:
1757 		udp_dispentry_cancel(resp, result);
1758 		break;
1759 	case isc_socktype_tcp:
1760 		tcp_dispentry_cancel(resp, result);
1761 		break;
1762 	default:
1763 		UNREACHABLE();
1764 	}
1765 	rcu_read_unlock();
1766 }
1767 
1768 void
1769 dns_dispatch_done(dns_dispentry_t **respp) {
1770 	REQUIRE(VALID_RESPONSE(*respp));
1771 
1772 	dns_dispentry_t *resp = *respp;
1773 	*respp = NULL;
1774 
1775 	dispentry_cancel(resp, ISC_R_CANCELED);
1776 	dns_dispentry_detach(&resp); /* DISPENTRY000 */
1777 }
1778 
1779 static void
1780 udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) {
1781 	REQUIRE(VALID_RESPONSE(resp));
1782 
1783 	dispentry_log(resp, ISC_LOG_DEBUG(90), "attaching handle %p to %p",
1784 		      handle, &resp->handle);
1785 	isc_nmhandle_attach(handle, &resp->handle);
1786 	dns_dispentry_ref(resp); /* DISPENTRY003 */
1787 	dispentry_log(resp, ISC_LOG_DEBUG(90), "reading");
1788 	isc_nm_read(resp->handle, udp_recv, resp);
1789 	resp->reading = true;
1790 }
1791 
1792 static void
1793 tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp) {
1794 	REQUIRE(VALID_DISPATCH(disp));
1795 	REQUIRE(disp->socktype == isc_socktype_tcp);
1796 
1797 	dns_dispatch_ref(disp); /* DISPATCH002 */
1798 	if (resp != NULL) {
1799 		dispentry_log(resp, ISC_LOG_DEBUG(90), "reading from %p",
1800 			      disp->handle);
1801 		INSIST(!isc_time_isepoch(&resp->start));
1802 	} else {
1803 		dispatch_log(disp, ISC_LOG_DEBUG(90),
1804 			     "TCP reading without response from %p",
1805 			     disp->handle);
1806 	}
1807 	isc_nm_read(disp->handle, tcp_recv, disp);
1808 	disp->reading = true;
1809 }
1810 
1811 static void
1812 resp_connected(void *arg) {
1813 	dns_dispentry_t *resp = arg;
1814 	dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s",
1815 		      isc_result_totext(resp->result));
1816 
1817 	resp->connected(resp->result, NULL, resp->arg);
1818 	dns_dispentry_detach(&resp); /* DISPENTRY005 */
1819 }
1820 
1821 static void
1822 tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
1823 	dns_dispatch_t *disp = (dns_dispatch_t *)arg;
1824 	dns_dispentry_t *resp = NULL;
1825 	dns_dispentry_t *next = NULL;
1826 	dns_displist_t resps = ISC_LIST_INITIALIZER;
1827 
1828 	if (isc_log_wouldlog(dns_lctx, 90)) {
1829 		char localbuf[ISC_SOCKADDR_FORMATSIZE];
1830 		char peerbuf[ISC_SOCKADDR_FORMATSIZE];
1831 		if (handle != NULL) {
1832 			isc_sockaddr_t local = isc_nmhandle_localaddr(handle);
1833 			isc_sockaddr_t peer = isc_nmhandle_peeraddr(handle);
1834 
1835 			isc_sockaddr_format(&local, localbuf,
1836 					    ISC_SOCKADDR_FORMATSIZE);
1837 			isc_sockaddr_format(&peer, peerbuf,
1838 					    ISC_SOCKADDR_FORMATSIZE);
1839 		} else {
1840 			isc_sockaddr_format(&disp->local, localbuf,
1841 					    ISC_SOCKADDR_FORMATSIZE);
1842 			isc_sockaddr_format(&disp->peer, peerbuf,
1843 					    ISC_SOCKADDR_FORMATSIZE);
1844 		}
1845 
1846 		dispatch_log(disp, ISC_LOG_DEBUG(90),
1847 			     "connected from %s to %s: %s", localbuf, peerbuf,
1848 			     isc_result_totext(eresult));
1849 	}
1850 
1851 	REQUIRE(disp->tid == isc_tid());
1852 	INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING);
1853 
1854 	/*
1855 	 * If there are pending responses, call the connect
1856 	 * callbacks for all of them.
1857 	 */
1858 	for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) {
1859 		next = ISC_LIST_NEXT(resp, plink);
1860 		ISC_LIST_UNLINK(disp->pending, resp, plink);
1861 		ISC_LIST_APPEND(resps, resp, rlink);
1862 		resp->result = eresult;
1863 
1864 		if (resp->state == DNS_DISPATCHSTATE_CANCELED) {
1865 			resp->result = ISC_R_CANCELED;
1866 		} else if (eresult == ISC_R_SUCCESS) {
1867 			resp->state = DNS_DISPATCHSTATE_CONNECTED;
1868 			ISC_LIST_APPEND(disp->active, resp, alink);
1869 			resp->reading = true;
1870 			dispentry_log(resp, ISC_LOG_DEBUG(90), "start reading");
1871 		} else {
1872 			resp->state = DNS_DISPATCHSTATE_NONE;
1873 		}
1874 	}
1875 
1876 	if (ISC_LIST_EMPTY(disp->active)) {
1877 		/* All responses have been canceled */
1878 		disp->state = DNS_DISPATCHSTATE_CANCELED;
1879 	} else if (eresult == ISC_R_SUCCESS) {
1880 		disp->state = DNS_DISPATCHSTATE_CONNECTED;
1881 		isc_nmhandle_attach(handle, &disp->handle);
1882 		tcp_startrecv(disp, resp);
1883 	} else {
1884 		disp->state = DNS_DISPATCHSTATE_NONE;
1885 	}
1886 
1887 	for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) {
1888 		next = ISC_LIST_NEXT(resp, rlink);
1889 		ISC_LIST_UNLINK(resps, resp, rlink);
1890 
1891 		resp_connected(resp);
1892 	}
1893 
1894 	dns_dispatch_detach(&disp); /* DISPATCH003 */
1895 }
1896 
1897 static void
1898 udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
1899 	dns_dispentry_t *resp = (dns_dispentry_t *)arg;
1900 	dns_dispatch_t *disp = resp->disp;
1901 
1902 	dispentry_log(resp, ISC_LOG_DEBUG(90), "connected: %s",
1903 		      isc_result_totext(eresult));
1904 
1905 	REQUIRE(disp->tid == isc_tid());
1906 	switch (resp->state) {
1907 	case DNS_DISPATCHSTATE_CANCELED:
1908 		eresult = ISC_R_CANCELED;
1909 		ISC_LIST_UNLINK(disp->pending, resp, plink);
1910 		goto unlock;
1911 	case DNS_DISPATCHSTATE_CONNECTING:
1912 		ISC_LIST_UNLINK(disp->pending, resp, plink);
1913 		break;
1914 	default:
1915 		UNREACHABLE();
1916 	}
1917 
1918 	switch (eresult) {
1919 	case ISC_R_CANCELED:
1920 		break;
1921 	case ISC_R_SUCCESS:
1922 		resp->state = DNS_DISPATCHSTATE_CONNECTED;
1923 		udp_startrecv(handle, resp);
1924 		break;
1925 	case ISC_R_NOPERM:
1926 	case ISC_R_ADDRINUSE: {
1927 		in_port_t localport = isc_sockaddr_getport(&disp->local);
1928 		isc_result_t result;
1929 
1930 		/* probably a port collision; try a different one */
1931 		result = setup_socket(disp, resp, &resp->peer, &localport);
1932 		if (result == ISC_R_SUCCESS) {
1933 			udp_dispatch_connect(disp, resp);
1934 			goto detach;
1935 		}
1936 		resp->state = DNS_DISPATCHSTATE_NONE;
1937 		break;
1938 	}
1939 	default:
1940 		resp->state = DNS_DISPATCHSTATE_NONE;
1941 		break;
1942 	}
1943 unlock:
1944 
1945 	dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s",
1946 		      isc_result_totext(eresult));
1947 	resp->connected(eresult, NULL, resp->arg);
1948 
1949 detach:
1950 	dns_dispentry_detach(&resp); /* DISPENTRY004 */
1951 }
1952 
1953 static void
1954 udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) {
1955 	REQUIRE(disp->tid == isc_tid());
1956 	resp->state = DNS_DISPATCHSTATE_CONNECTING;
1957 	resp->start = isc_loop_now(resp->loop);
1958 	dns_dispentry_ref(resp); /* DISPENTRY004 */
1959 	ISC_LIST_APPEND(disp->pending, resp, plink);
1960 
1961 	isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer,
1962 			  udp_connected, resp, resp->timeout);
1963 }
1964 
1965 static isc_result_t
1966 tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) {
1967 	dns_transport_type_t transport_type = DNS_TRANSPORT_TCP;
1968 	isc_tlsctx_t *tlsctx = NULL;
1969 	isc_tlsctx_client_session_cache_t *sess_cache = NULL;
1970 
1971 	if (resp->transport != NULL) {
1972 		transport_type = dns_transport_get_type(resp->transport);
1973 	}
1974 
1975 	if (transport_type == DNS_TRANSPORT_TLS) {
1976 		isc_result_t result;
1977 
1978 		result = dns_transport_get_tlsctx(
1979 			resp->transport, &resp->peer, resp->tlsctx_cache,
1980 			resp->mctx, &tlsctx, &sess_cache);
1981 
1982 		if (result != ISC_R_SUCCESS) {
1983 			return result;
1984 		}
1985 		INSIST(tlsctx != NULL);
1986 	}
1987 
1988 	/* Check whether the dispatch is already connecting or connected. */
1989 	REQUIRE(disp->tid == isc_tid());
1990 	switch (disp->state) {
1991 	case DNS_DISPATCHSTATE_NONE:
1992 		/* First connection, continue with connecting */
1993 		disp->state = DNS_DISPATCHSTATE_CONNECTING;
1994 		resp->state = DNS_DISPATCHSTATE_CONNECTING;
1995 		resp->start = isc_loop_now(resp->loop);
1996 		dns_dispentry_ref(resp); /* DISPENTRY005 */
1997 		ISC_LIST_APPEND(disp->pending, resp, plink);
1998 
1999 		char localbuf[ISC_SOCKADDR_FORMATSIZE];
2000 		char peerbuf[ISC_SOCKADDR_FORMATSIZE];
2001 
2002 		isc_sockaddr_format(&disp->local, localbuf,
2003 				    ISC_SOCKADDR_FORMATSIZE);
2004 		isc_sockaddr_format(&disp->peer, peerbuf,
2005 				    ISC_SOCKADDR_FORMATSIZE);
2006 
2007 		dns_dispatch_ref(disp); /* DISPATCH003 */
2008 		dispentry_log(resp, ISC_LOG_DEBUG(90),
2009 			      "connecting from %s to %s, timeout %u", localbuf,
2010 			      peerbuf, resp->timeout);
2011 
2012 		isc_nm_streamdnsconnect(disp->mgr->nm, &disp->local,
2013 					&disp->peer, tcp_connected, disp,
2014 					resp->timeout, tlsctx, sess_cache,
2015 					ISC_NM_PROXY_NONE, NULL);
2016 		break;
2017 
2018 	case DNS_DISPATCHSTATE_CONNECTING:
2019 		/* Connection pending; add resp to the list */
2020 		resp->state = DNS_DISPATCHSTATE_CONNECTING;
2021 		resp->start = isc_loop_now(resp->loop);
2022 		dns_dispentry_ref(resp); /* DISPENTRY005 */
2023 		ISC_LIST_APPEND(disp->pending, resp, plink);
2024 		break;
2025 
2026 	case DNS_DISPATCHSTATE_CONNECTED:
2027 		resp->state = DNS_DISPATCHSTATE_CONNECTED;
2028 		resp->start = isc_loop_now(resp->loop);
2029 
2030 		/* Add the resp to the reading list */
2031 		ISC_LIST_APPEND(disp->active, resp, alink);
2032 		dispentry_log(resp, ISC_LOG_DEBUG(90),
2033 			      "already connected; attaching");
2034 		resp->reading = true;
2035 
2036 		if (!disp->reading) {
2037 			/* Restart the reading */
2038 			tcp_startrecv(disp, resp);
2039 		}
2040 
2041 		/* Already connected; call the connected cb asynchronously */
2042 		dns_dispentry_ref(resp); /* DISPENTRY005 */
2043 		isc_async_run(resp->loop, resp_connected, resp);
2044 		break;
2045 
2046 	default:
2047 		UNREACHABLE();
2048 	}
2049 
2050 	return ISC_R_SUCCESS;
2051 }
2052 
2053 isc_result_t
2054 dns_dispatch_connect(dns_dispentry_t *resp) {
2055 	REQUIRE(VALID_RESPONSE(resp));
2056 	REQUIRE(VALID_DISPATCH(resp->disp));
2057 
2058 	dns_dispatch_t *disp = resp->disp;
2059 
2060 	switch (disp->socktype) {
2061 	case isc_socktype_tcp:
2062 		return tcp_dispatch_connect(disp, resp);
2063 
2064 	case isc_socktype_udp:
2065 		udp_dispatch_connect(disp, resp);
2066 		return ISC_R_SUCCESS;
2067 
2068 	default:
2069 		UNREACHABLE();
2070 	}
2071 }
2072 
2073 static void
2074 send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
2075 	dns_dispentry_t *resp = (dns_dispentry_t *)cbarg;
2076 
2077 	REQUIRE(VALID_RESPONSE(resp));
2078 
2079 	dns_dispatch_t *disp = resp->disp;
2080 
2081 	REQUIRE(VALID_DISPATCH(disp));
2082 
2083 	dispentry_log(resp, ISC_LOG_DEBUG(90), "sent: %s",
2084 		      isc_result_totext(result));
2085 
2086 	resp->sent(result, NULL, resp->arg);
2087 
2088 	if (result != ISC_R_SUCCESS) {
2089 		dispentry_cancel(resp, result);
2090 	}
2091 
2092 	dns_dispentry_detach(&resp); /* DISPENTRY007 */
2093 	isc_nmhandle_detach(&handle);
2094 }
2095 
2096 static void
2097 tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp,
2098 		     int32_t timeout) {
2099 	REQUIRE(timeout <= INT16_MAX);
2100 
2101 	dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading");
2102 
2103 	if (!resp->reading) {
2104 		ISC_LIST_APPEND(disp->active, resp, alink);
2105 		resp->reading = true;
2106 	}
2107 
2108 	if (disp->reading) {
2109 		return;
2110 	}
2111 
2112 	if (timeout > 0) {
2113 		isc_nmhandle_settimeout(disp->handle, timeout);
2114 	}
2115 
2116 	dns_dispatch_ref(disp); /* DISPATCH002 */
2117 	isc_nm_read(disp->handle, tcp_recv, disp);
2118 	disp->reading = true;
2119 }
2120 
2121 static void
2122 udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) {
2123 	REQUIRE(timeout <= INT16_MAX);
2124 
2125 	if (resp->reading) {
2126 		return;
2127 	}
2128 
2129 	if (timeout > 0) {
2130 		isc_nmhandle_settimeout(resp->handle, timeout);
2131 	}
2132 
2133 	dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading");
2134 
2135 	dns_dispentry_ref(resp); /* DISPENTRY003 */
2136 	isc_nm_read(resp->handle, udp_recv, resp);
2137 	resp->reading = true;
2138 }
2139 
2140 void
2141 dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) {
2142 	REQUIRE(VALID_RESPONSE(resp));
2143 	REQUIRE(VALID_DISPATCH(resp->disp));
2144 
2145 	dns_dispatch_t *disp = resp->disp;
2146 
2147 	dispentry_log(resp, ISC_LOG_DEBUG(90), "resume");
2148 
2149 	REQUIRE(disp->tid == isc_tid());
2150 	switch (disp->socktype) {
2151 	case isc_socktype_udp: {
2152 		udp_dispatch_getnext(resp, timeout);
2153 		break;
2154 	}
2155 	case isc_socktype_tcp:
2156 		INSIST(disp->timedout > 0);
2157 		disp->timedout--;
2158 		tcp_dispatch_getnext(disp, resp, timeout);
2159 		break;
2160 	default:
2161 		UNREACHABLE();
2162 	}
2163 }
2164 
2165 void
2166 dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r) {
2167 	REQUIRE(VALID_RESPONSE(resp));
2168 	REQUIRE(VALID_DISPATCH(resp->disp));
2169 
2170 	dns_dispatch_t *disp = resp->disp;
2171 	isc_nmhandle_t *sendhandle = NULL;
2172 
2173 	dispentry_log(resp, ISC_LOG_DEBUG(90), "sending");
2174 	switch (disp->socktype) {
2175 	case isc_socktype_udp:
2176 		isc_nmhandle_attach(resp->handle, &sendhandle);
2177 		break;
2178 	case isc_socktype_tcp:
2179 		isc_nmhandle_attach(disp->handle, &sendhandle);
2180 		break;
2181 	default:
2182 		UNREACHABLE();
2183 	}
2184 	dns_dispentry_ref(resp); /* DISPENTRY007 */
2185 	isc_nm_send(sendhandle, r, send_done, resp);
2186 }
2187 
2188 isc_result_t
2189 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2190 	REQUIRE(VALID_DISPATCH(disp));
2191 	REQUIRE(addrp != NULL);
2192 
2193 	if (disp->socktype == isc_socktype_udp) {
2194 		*addrp = disp->local;
2195 		return ISC_R_SUCCESS;
2196 	}
2197 	return ISC_R_NOTIMPLEMENTED;
2198 }
2199 
2200 isc_result_t
2201 dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) {
2202 	REQUIRE(VALID_RESPONSE(resp));
2203 	REQUIRE(VALID_DISPATCH(resp->disp));
2204 	REQUIRE(addrp != NULL);
2205 
2206 	dns_dispatch_t *disp = resp->disp;
2207 
2208 	switch (disp->socktype) {
2209 	case isc_socktype_tcp:
2210 		*addrp = disp->local;
2211 		return ISC_R_SUCCESS;
2212 	case isc_socktype_udp:
2213 		*addrp = isc_nmhandle_localaddr(resp->handle);
2214 		return ISC_R_SUCCESS;
2215 	default:
2216 		UNREACHABLE();
2217 	}
2218 }
2219 
2220 dns_dispatch_t *
2221 dns_dispatchset_get(dns_dispatchset_t *dset) {
2222 	uint32_t tid = isc_tid();
2223 
2224 	/* check that dispatch set is configured */
2225 	if (dset == NULL || dset->ndisp == 0) {
2226 		return NULL;
2227 	}
2228 
2229 	INSIST(tid < dset->ndisp);
2230 
2231 	return dset->dispatches[tid];
2232 }
2233 
2234 isc_result_t
2235 dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source,
2236 		       dns_dispatchset_t **dsetp, uint32_t ndisp) {
2237 	isc_result_t result;
2238 	dns_dispatchset_t *dset = NULL;
2239 	dns_dispatchmgr_t *mgr = NULL;
2240 	size_t i;
2241 
2242 	REQUIRE(VALID_DISPATCH(source));
2243 	REQUIRE(source->socktype == isc_socktype_udp);
2244 	REQUIRE(dsetp != NULL && *dsetp == NULL);
2245 
2246 	mgr = source->mgr;
2247 
2248 	dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t));
2249 	*dset = (dns_dispatchset_t){ .ndisp = ndisp };
2250 
2251 	isc_mem_attach(mctx, &dset->mctx);
2252 
2253 	dset->dispatches = isc_mem_cget(dset->mctx, ndisp,
2254 					sizeof(dns_dispatch_t *));
2255 
2256 	dset->dispatches[0] = NULL;
2257 	dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */
2258 
2259 	for (i = 1; i < dset->ndisp; i++) {
2260 		result = dispatch_createudp(mgr, &source->local, i,
2261 					    &dset->dispatches[i]);
2262 		if (result != ISC_R_SUCCESS) {
2263 			goto fail;
2264 		}
2265 	}
2266 
2267 	*dsetp = dset;
2268 
2269 	return ISC_R_SUCCESS;
2270 
2271 fail:
2272 	for (size_t j = 0; j < i; j++) {
2273 		dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */
2274 	}
2275 	isc_mem_cput(dset->mctx, dset->dispatches, ndisp,
2276 		     sizeof(dns_dispatch_t *));
2277 
2278 	isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
2279 	return result;
2280 }
2281 
2282 void
2283 dns_dispatchset_destroy(dns_dispatchset_t **dsetp) {
2284 	REQUIRE(dsetp != NULL && *dsetp != NULL);
2285 
2286 	dns_dispatchset_t *dset = *dsetp;
2287 	*dsetp = NULL;
2288 
2289 	for (size_t i = 0; i < dset->ndisp; i++) {
2290 		dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */
2291 	}
2292 	isc_mem_cput(dset->mctx, dset->dispatches, dset->ndisp,
2293 		     sizeof(dns_dispatch_t *));
2294 	isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t));
2295 }
2296 
2297 isc_result_t
2298 dns_dispatch_checkperm(dns_dispatch_t *disp) {
2299 	REQUIRE(VALID_DISPATCH(disp));
2300 
2301 	if (disp->handle == NULL || disp->socktype == isc_socktype_udp) {
2302 		return ISC_R_NOPERM;
2303 	}
2304 
2305 	return isc_nm_xfr_checkperm(disp->handle);
2306 }
2307