xref: /netbsd-src/external/mpl/bind/dist/lib/isc/netmgr/netmgr.c (revision 9fb66d812c00ebfb445c0b47dea128f32aa6fe96)
1 /*	$NetBSD: netmgr.c,v 1.5 2021/04/03 22:20:26 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * This Source Code Form is subject to the terms of the Mozilla Public
7  * License, v. 2.0. If a copy of the MPL was not distributed with this
8  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9  *
10  * See the COPYRIGHT file distributed with this work for additional
11  * information regarding copyright ownership.
12  */
13 
14 #include <inttypes.h>
15 #include <unistd.h>
16 #include <uv.h>
17 
18 #include <isc/atomic.h>
19 #include <isc/buffer.h>
20 #include <isc/condition.h>
21 #include <isc/errno.h>
22 #include <isc/magic.h>
23 #include <isc/mem.h>
24 #include <isc/netmgr.h>
25 #include <isc/print.h>
26 #include <isc/quota.h>
27 #include <isc/random.h>
28 #include <isc/refcount.h>
29 #include <isc/region.h>
30 #include <isc/result.h>
31 #include <isc/sockaddr.h>
32 #include <isc/stats.h>
33 #include <isc/strerr.h>
34 #include <isc/thread.h>
35 #include <isc/util.h>
36 
37 #include "netmgr-int.h"
38 #include "uv-compat.h"
39 
40 #ifdef NETMGR_TRACE
41 #include <execinfo.h>
42 
43 #endif
44 
45 /*%
46  * How many isc_nmhandles and isc_nm_uvreqs will we be
47  * caching for reuse in a socket.
48  */
49 #define ISC_NM_HANDLES_STACK_SIZE 600
50 #define ISC_NM_REQS_STACK_SIZE	  600
51 
52 /*%
53  * Shortcut index arrays to get access to statistics counters.
54  */
55 
56 static const isc_statscounter_t udp4statsindex[] = {
57 	isc_sockstatscounter_udp4open,
58 	isc_sockstatscounter_udp4openfail,
59 	isc_sockstatscounter_udp4close,
60 	isc_sockstatscounter_udp4bindfail,
61 	isc_sockstatscounter_udp4connectfail,
62 	isc_sockstatscounter_udp4connect,
63 	-1,
64 	-1,
65 	isc_sockstatscounter_udp4sendfail,
66 	isc_sockstatscounter_udp4recvfail,
67 	isc_sockstatscounter_udp4active
68 };
69 
70 static const isc_statscounter_t udp6statsindex[] = {
71 	isc_sockstatscounter_udp6open,
72 	isc_sockstatscounter_udp6openfail,
73 	isc_sockstatscounter_udp6close,
74 	isc_sockstatscounter_udp6bindfail,
75 	isc_sockstatscounter_udp6connectfail,
76 	isc_sockstatscounter_udp6connect,
77 	-1,
78 	-1,
79 	isc_sockstatscounter_udp6sendfail,
80 	isc_sockstatscounter_udp6recvfail,
81 	isc_sockstatscounter_udp6active
82 };
83 
84 static const isc_statscounter_t tcp4statsindex[] = {
85 	isc_sockstatscounter_tcp4open,	      isc_sockstatscounter_tcp4openfail,
86 	isc_sockstatscounter_tcp4close,	      isc_sockstatscounter_tcp4bindfail,
87 	isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect,
88 	isc_sockstatscounter_tcp4acceptfail,  isc_sockstatscounter_tcp4accept,
89 	isc_sockstatscounter_tcp4sendfail,    isc_sockstatscounter_tcp4recvfail,
90 	isc_sockstatscounter_tcp4active
91 };
92 
93 static const isc_statscounter_t tcp6statsindex[] = {
94 	isc_sockstatscounter_tcp6open,	      isc_sockstatscounter_tcp6openfail,
95 	isc_sockstatscounter_tcp6close,	      isc_sockstatscounter_tcp6bindfail,
96 	isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect,
97 	isc_sockstatscounter_tcp6acceptfail,  isc_sockstatscounter_tcp6accept,
98 	isc_sockstatscounter_tcp6sendfail,    isc_sockstatscounter_tcp6recvfail,
99 	isc_sockstatscounter_tcp6active
100 };
101 
102 #if 0
103 /* XXX: not currently used */
104 static const isc_statscounter_t unixstatsindex[] = {
105 	isc_sockstatscounter_unixopen,
106 	isc_sockstatscounter_unixopenfail,
107 	isc_sockstatscounter_unixclose,
108 	isc_sockstatscounter_unixbindfail,
109 	isc_sockstatscounter_unixconnectfail,
110 	isc_sockstatscounter_unixconnect,
111 	isc_sockstatscounter_unixacceptfail,
112 	isc_sockstatscounter_unixaccept,
113 	isc_sockstatscounter_unixsendfail,
114 	isc_sockstatscounter_unixrecvfail,
115 	isc_sockstatscounter_unixactive
116 };
117 #endif /* if 0 */
118 
119 /*
120  * libuv is not thread safe, but has mechanisms to pass messages
121  * between threads. Each socket is owned by a thread. For UDP
122  * sockets we have a set of sockets for each interface and we can
123  * choose a sibling and send the message directly. For TCP, or if
124  * we're calling from a non-networking thread, we need to pass the
125  * request using async_cb.
126  */
127 
128 ISC_THREAD_LOCAL int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
129 
130 static void
131 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG);
132 static void
133 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle);
134 static isc_threadresult_t
135 nm_thread(isc_threadarg_t worker0);
136 static void
137 async_cb(uv_async_t *handle);
138 static bool
139 process_queue(isc__networker_t *worker, isc_queue_t *queue);
140 static bool
141 process_priority_queue(isc__networker_t *worker);
142 static bool
143 process_normal_queue(isc__networker_t *worker);
144 static void
145 process_queues(isc__networker_t *worker);
146 
147 static void
148 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0);
149 static void
150 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0);
151 static void
152 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0);
153 static void
154 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0);
155 static void
156 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0);
157 /*%<
158  * Issue a 'handle closed' callback on the socket.
159  */
160 
161 static void
162 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG);
163 
164 int
165 isc_nm_tid(void) {
166 	return (isc__nm_tid_v);
167 }
168 
169 bool
170 isc__nm_in_netthread(void) {
171 	return (isc__nm_tid_v >= 0);
172 }
173 
174 #ifdef WIN32
175 static void
176 isc__nm_winsock_initialize(void) {
177 	WORD wVersionRequested = MAKEWORD(2, 2);
178 	WSADATA wsaData;
179 	int result;
180 
181 	result = WSAStartup(wVersionRequested, &wsaData);
182 	if (result != 0) {
183 		char strbuf[ISC_STRERRORSIZE];
184 		strerror_r(result, strbuf, sizeof(strbuf));
185 		UNEXPECTED_ERROR(__FILE__, __LINE__,
186 				 "WSAStartup() failed with error code %lu: %s",
187 				 result, strbuf);
188 	}
189 
190 	/*
191 	 * Confirm that the WinSock DLL supports version 2.2.  Note that if the
192 	 * DLL supports versions greater than 2.2 in addition to 2.2, it will
193 	 * still return 2.2 in wVersion since that is the version we requested.
194 	 */
195 	if (LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 2) {
196 		UNEXPECTED_ERROR(__FILE__, __LINE__,
197 				 "Unusable WinSock DLL version: %u.%u",
198 				 LOBYTE(wsaData.wVersion),
199 				 HIBYTE(wsaData.wVersion));
200 	}
201 }
202 
203 static void
204 isc__nm_winsock_destroy(void) {
205 	WSACleanup();
206 }
207 #endif /* WIN32 */
208 
209 isc_nm_t *
210 isc_nm_start(isc_mem_t *mctx, uint32_t workers) {
211 	isc_nm_t *mgr = NULL;
212 	char name[32];
213 
214 #ifdef WIN32
215 	isc__nm_winsock_initialize();
216 #endif /* WIN32 */
217 
218 	isc__nm_tls_initialize();
219 
220 	mgr = isc_mem_get(mctx, sizeof(*mgr));
221 	*mgr = (isc_nm_t){ .nworkers = workers };
222 
223 	isc_mem_attach(mctx, &mgr->mctx);
224 	isc_mutex_init(&mgr->lock);
225 	isc_condition_init(&mgr->wkstatecond);
226 	isc_refcount_init(&mgr->references, 1);
227 	atomic_init(&mgr->maxudp, 0);
228 	atomic_init(&mgr->interlocked, false);
229 
230 #ifdef NETMGR_TRACE
231 	ISC_LIST_INIT(mgr->active_sockets);
232 #endif
233 
234 	/*
235 	 * Default TCP timeout values.
236 	 * May be updated by isc_nm_tcptimeouts().
237 	 */
238 	atomic_init(&mgr->init, 30000);
239 	atomic_init(&mgr->idle, 30000);
240 	atomic_init(&mgr->keepalive, 30000);
241 	atomic_init(&mgr->advertised, 30000);
242 
243 	isc_mutex_init(&mgr->reqlock);
244 	isc_mempool_create(mgr->mctx, sizeof(isc__nm_uvreq_t), &mgr->reqpool);
245 	isc_mempool_setname(mgr->reqpool, "nm_reqpool");
246 	isc_mempool_setfreemax(mgr->reqpool, 4096);
247 	isc_mempool_associatelock(mgr->reqpool, &mgr->reqlock);
248 	isc_mempool_setfillcount(mgr->reqpool, 32);
249 
250 	isc_mutex_init(&mgr->evlock);
251 	isc_mempool_create(mgr->mctx, sizeof(isc__netievent_storage_t),
252 			   &mgr->evpool);
253 	isc_mempool_setname(mgr->evpool, "nm_evpool");
254 	isc_mempool_setfreemax(mgr->evpool, 4096);
255 	isc_mempool_associatelock(mgr->evpool, &mgr->evlock);
256 	isc_mempool_setfillcount(mgr->evpool, 32);
257 
258 	mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t));
259 	for (size_t i = 0; i < workers; i++) {
260 		int r;
261 		isc__networker_t *worker = &mgr->workers[i];
262 		*worker = (isc__networker_t){
263 			.mgr = mgr,
264 			.id = i,
265 		};
266 
267 		r = uv_loop_init(&worker->loop);
268 		RUNTIME_CHECK(r == 0);
269 
270 		worker->loop.data = &mgr->workers[i];
271 
272 		r = uv_async_init(&worker->loop, &worker->async, async_cb);
273 		RUNTIME_CHECK(r == 0);
274 
275 		isc_mutex_init(&worker->lock);
276 		isc_condition_init(&worker->cond);
277 
278 		worker->ievents = isc_queue_new(mgr->mctx, 128);
279 		worker->ievents_prio = isc_queue_new(mgr->mctx, 128);
280 		worker->recvbuf = isc_mem_get(mctx, ISC_NETMGR_RECVBUF_SIZE);
281 
282 		/*
283 		 * We need to do this here and not in nm_thread to avoid a
284 		 * race - we could exit isc_nm_start, launch nm_destroy,
285 		 * and nm_thread would still not be up.
286 		 */
287 		mgr->workers_running++;
288 		isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread);
289 
290 		snprintf(name, sizeof(name), "net-%zu", i);
291 		isc_thread_setname(worker->thread, name);
292 	}
293 
294 	mgr->magic = NM_MAGIC;
295 	return (mgr);
296 }
297 
298 /*
299  * Free the resources of the network manager.
300  */
301 static void
302 nm_destroy(isc_nm_t **mgr0) {
303 	REQUIRE(VALID_NM(*mgr0));
304 	REQUIRE(!isc__nm_in_netthread());
305 
306 	isc_nm_t *mgr = *mgr0;
307 	*mgr0 = NULL;
308 
309 	isc_refcount_destroy(&mgr->references);
310 
311 	mgr->magic = 0;
312 
313 	for (size_t i = 0; i < mgr->nworkers; i++) {
314 		isc__networker_t *worker = &mgr->workers[i];
315 		isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
316 		isc__nm_enqueue_ievent(worker, event);
317 	}
318 
319 	LOCK(&mgr->lock);
320 	while (mgr->workers_running > 0) {
321 		WAIT(&mgr->wkstatecond, &mgr->lock);
322 	}
323 	UNLOCK(&mgr->lock);
324 
325 	for (size_t i = 0; i < mgr->nworkers; i++) {
326 		isc__networker_t *worker = &mgr->workers[i];
327 		isc__netievent_t *ievent = NULL;
328 		int r;
329 
330 		/* Empty the async event queues */
331 		while ((ievent = (isc__netievent_t *)isc_queue_dequeue(
332 				worker->ievents)) != NULL)
333 		{
334 			isc_mempool_put(mgr->evpool, ievent);
335 		}
336 
337 		while ((ievent = (isc__netievent_t *)isc_queue_dequeue(
338 				worker->ievents_prio)) != NULL)
339 		{
340 			isc_mempool_put(mgr->evpool, ievent);
341 		}
342 
343 		r = uv_loop_close(&worker->loop);
344 		INSIST(r == 0);
345 
346 		isc_queue_destroy(worker->ievents);
347 		isc_queue_destroy(worker->ievents_prio);
348 		isc_mutex_destroy(&worker->lock);
349 		isc_condition_destroy(&worker->cond);
350 
351 		isc_mem_put(mgr->mctx, worker->recvbuf,
352 			    ISC_NETMGR_RECVBUF_SIZE);
353 		isc_thread_join(worker->thread, NULL);
354 	}
355 
356 	if (mgr->stats != NULL) {
357 		isc_stats_detach(&mgr->stats);
358 	}
359 
360 	isc_condition_destroy(&mgr->wkstatecond);
361 	isc_mutex_destroy(&mgr->lock);
362 
363 	isc_mempool_destroy(&mgr->evpool);
364 	isc_mutex_destroy(&mgr->evlock);
365 
366 	isc_mempool_destroy(&mgr->reqpool);
367 	isc_mutex_destroy(&mgr->reqlock);
368 
369 	isc_mem_put(mgr->mctx, mgr->workers,
370 		    mgr->nworkers * sizeof(isc__networker_t));
371 	isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
372 
373 #ifdef WIN32
374 	isc__nm_winsock_destroy();
375 #endif /* WIN32 */
376 }
377 
378 void
379 isc_nm_pause(isc_nm_t *mgr) {
380 	REQUIRE(VALID_NM(mgr));
381 	REQUIRE(!isc__nm_in_netthread());
382 
383 	isc__nm_acquire_interlocked_force(mgr);
384 
385 	for (size_t i = 0; i < mgr->nworkers; i++) {
386 		isc__networker_t *worker = &mgr->workers[i];
387 		isc__netievent_resume_t *event =
388 			isc__nm_get_netievent_pause(mgr);
389 		isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
390 	}
391 
392 	LOCK(&mgr->lock);
393 	while (mgr->workers_paused != mgr->workers_running) {
394 		WAIT(&mgr->wkstatecond, &mgr->lock);
395 	}
396 	UNLOCK(&mgr->lock);
397 }
398 
399 void
400 isc_nm_resume(isc_nm_t *mgr) {
401 	REQUIRE(VALID_NM(mgr));
402 	REQUIRE(!isc__nm_in_netthread());
403 
404 	for (size_t i = 0; i < mgr->nworkers; i++) {
405 		isc__networker_t *worker = &mgr->workers[i];
406 		isc__netievent_resume_t *event =
407 			isc__nm_get_netievent_resume(mgr);
408 		isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
409 	}
410 
411 	LOCK(&mgr->lock);
412 	while (mgr->workers_paused != 0) {
413 		WAIT(&mgr->wkstatecond, &mgr->lock);
414 	}
415 	UNLOCK(&mgr->lock);
416 
417 	isc__nm_drop_interlocked(mgr);
418 }
419 
420 void
421 isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
422 	REQUIRE(VALID_NM(mgr));
423 	REQUIRE(dst != NULL && *dst == NULL);
424 
425 	isc_refcount_increment(&mgr->references);
426 
427 	*dst = mgr;
428 }
429 
430 void
431 isc_nm_detach(isc_nm_t **mgr0) {
432 	isc_nm_t *mgr = NULL;
433 
434 	REQUIRE(mgr0 != NULL);
435 	REQUIRE(VALID_NM(*mgr0));
436 
437 	mgr = *mgr0;
438 	*mgr0 = NULL;
439 
440 	if (isc_refcount_decrement(&mgr->references) == 1) {
441 		nm_destroy(&mgr);
442 	}
443 }
444 
445 void
446 isc_nm_closedown(isc_nm_t *mgr) {
447 	REQUIRE(VALID_NM(mgr));
448 
449 	atomic_store(&mgr->closing, true);
450 	for (size_t i = 0; i < mgr->nworkers; i++) {
451 		isc__netievent_t *event = NULL;
452 		event = isc__nm_get_netievent_shutdown(mgr);
453 		isc__nm_enqueue_ievent(&mgr->workers[i], event);
454 	}
455 }
456 
457 void
458 isc_nm_destroy(isc_nm_t **mgr0) {
459 	isc_nm_t *mgr = NULL;
460 	int counter = 0;
461 	uint_fast32_t references;
462 
463 	REQUIRE(mgr0 != NULL);
464 	REQUIRE(VALID_NM(*mgr0));
465 
466 	mgr = *mgr0;
467 
468 	/*
469 	 * Close active connections.
470 	 */
471 	isc_nm_closedown(mgr);
472 
473 	/*
474 	 * Wait for the manager to be dereferenced elsewhere.
475 	 */
476 	while ((references = isc_refcount_current(&mgr->references)) > 1 &&
477 	       counter++ < 1000)
478 	{
479 #ifdef WIN32
480 		_sleep(10);
481 #else  /* ifdef WIN32 */
482 		usleep(10000);
483 #endif /* ifdef WIN32 */
484 	}
485 
486 #ifdef NETMGR_TRACE
487 	isc__nm_dump_active(mgr);
488 #endif
489 
490 	INSIST(references == 1);
491 
492 	/*
493 	 * Detach final reference.
494 	 */
495 	isc_nm_detach(mgr0);
496 }
497 
498 void
499 isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {
500 	REQUIRE(VALID_NM(mgr));
501 
502 	atomic_store(&mgr->maxudp, maxudp);
503 }
504 
505 void
506 isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle,
507 		   uint32_t keepalive, uint32_t advertised) {
508 	REQUIRE(VALID_NM(mgr));
509 
510 	atomic_store(&mgr->init, init * 100);
511 	atomic_store(&mgr->idle, idle * 100);
512 	atomic_store(&mgr->keepalive, keepalive * 100);
513 	atomic_store(&mgr->advertised, advertised * 100);
514 }
515 
516 void
517 isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle,
518 		   uint32_t *keepalive, uint32_t *advertised) {
519 	REQUIRE(VALID_NM(mgr));
520 
521 	if (initial != NULL) {
522 		*initial = atomic_load(&mgr->init) / 100;
523 	}
524 
525 	if (idle != NULL) {
526 		*idle = atomic_load(&mgr->idle) / 100;
527 	}
528 
529 	if (keepalive != NULL) {
530 		*keepalive = atomic_load(&mgr->keepalive) / 100;
531 	}
532 
533 	if (advertised != NULL) {
534 		*advertised = atomic_load(&mgr->advertised) / 100;
535 	}
536 }
537 
538 /*
539  * nm_thread is a single worker thread, that runs uv_run event loop
540  * until asked to stop.
541  */
542 static isc_threadresult_t
543 nm_thread(isc_threadarg_t worker0) {
544 	isc__networker_t *worker = (isc__networker_t *)worker0;
545 	isc_nm_t *mgr = worker->mgr;
546 
547 	isc__nm_tid_v = worker->id;
548 	isc_thread_setaffinity(isc__nm_tid_v);
549 
550 	while (true) {
551 		int r = uv_run(&worker->loop, UV_RUN_DEFAULT);
552 		/* There's always the async handle until we are done */
553 		INSIST(r > 0 || worker->finished);
554 
555 		if (worker->paused) {
556 			LOCK(&worker->lock);
557 			/* We need to lock the worker first otherwise
558 			 * isc_nm_resume() might slip in before WAIT() in the
559 			 * while loop starts and the signal never gets delivered
560 			 * and we are forever stuck in the paused loop.
561 			 */
562 
563 			LOCK(&mgr->lock);
564 			mgr->workers_paused++;
565 			SIGNAL(&mgr->wkstatecond);
566 			UNLOCK(&mgr->lock);
567 
568 			while (worker->paused) {
569 				WAIT(&worker->cond, &worker->lock);
570 				(void)process_priority_queue(worker);
571 			}
572 
573 			LOCK(&mgr->lock);
574 			mgr->workers_paused--;
575 			SIGNAL(&mgr->wkstatecond);
576 			UNLOCK(&mgr->lock);
577 
578 			UNLOCK(&worker->lock);
579 		}
580 
581 		if (r == 0) {
582 			INSIST(worker->finished);
583 			break;
584 		}
585 
586 		INSIST(!worker->finished);
587 
588 		/*
589 		 * Empty the async queue.
590 		 */
591 		process_queues(worker);
592 	}
593 
594 	LOCK(&mgr->lock);
595 	mgr->workers_running--;
596 	SIGNAL(&mgr->wkstatecond);
597 	UNLOCK(&mgr->lock);
598 
599 	return ((isc_threadresult_t)0);
600 }
601 
602 /*
603  * async_cb is a universal callback for 'async' events sent to event loop.
604  * It's the only way to safely pass data to the libuv event loop. We use a
605  * single async event and a lockless queue of 'isc__netievent_t' structures
606  * passed from other threads.
607  */
608 static void
609 async_cb(uv_async_t *handle) {
610 	isc__networker_t *worker = (isc__networker_t *)handle->loop->data;
611 	process_queues(worker);
612 }
613 
614 static void
615 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
616 	UNUSED(ev0);
617 	worker->finished = true;
618 	/* Close the async handler */
619 	uv_close((uv_handle_t *)&worker->async, NULL);
620 	/* uv_stop(&worker->loop); */
621 }
622 
623 static void
624 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
625 	UNUSED(ev0);
626 	REQUIRE(worker->paused == false);
627 	worker->paused = true;
628 	uv_stop(&worker->loop);
629 }
630 
631 static void
632 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
633 	UNUSED(ev0);
634 	REQUIRE(worker->paused == true);
635 	worker->paused = false;
636 }
637 
638 static bool
639 process_priority_queue(isc__networker_t *worker) {
640 	return (process_queue(worker, worker->ievents_prio));
641 }
642 
643 static bool
644 process_normal_queue(isc__networker_t *worker) {
645 	return (process_queue(worker, worker->ievents));
646 }
647 
648 static void
649 process_queues(isc__networker_t *worker) {
650 	if (!process_priority_queue(worker)) {
651 		return;
652 	}
653 	(void)process_normal_queue(worker);
654 }
655 
656 /*
657  * The two macros here generate the individual cases for the process_netievent()
658  * function.  The NETIEVENT_CASE(type) macro is the common case, and
659  * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the
660  * process_queue() to stop, e.g. it's only used for the netievent that
661  * stops/pauses processing the enqueued netievents.
662  */
663 #define NETIEVENT_CASE(type)                                               \
664 	case netievent_##type: {                                           \
665 		isc__nm_async_##type(worker, ievent);                      \
666 		isc__nm_put_netievent_##type(                              \
667 			worker->mgr, (isc__netievent_##type##_t *)ievent); \
668 		return (true);                                             \
669 	}
670 
671 #define NETIEVENT_CASE_NOMORE(type)                                \
672 	case netievent_##type: {                                   \
673 		isc__nm_async_##type(worker, ievent);              \
674 		isc__nm_put_netievent_##type(worker->mgr, ievent); \
675 		return (false);                                    \
676 	}
677 
678 static bool
679 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) {
680 	REQUIRE(worker->id == isc_nm_tid());
681 
682 	switch (ievent->type) {
683 		/* Don't process more ievents when we are stopping */
684 		NETIEVENT_CASE_NOMORE(stop);
685 
686 		NETIEVENT_CASE(udpconnect);
687 		NETIEVENT_CASE(udplisten);
688 		NETIEVENT_CASE(udpstop);
689 		NETIEVENT_CASE(udpsend);
690 		NETIEVENT_CASE(udpread);
691 		NETIEVENT_CASE(udpcancel);
692 		NETIEVENT_CASE(udpclose);
693 
694 		NETIEVENT_CASE(tcpaccept);
695 		NETIEVENT_CASE(tcpconnect);
696 		NETIEVENT_CASE(tcplisten);
697 		NETIEVENT_CASE(tcpstartread);
698 		NETIEVENT_CASE(tcppauseread);
699 		NETIEVENT_CASE(tcpsend);
700 		NETIEVENT_CASE(tcpstop);
701 		NETIEVENT_CASE(tcpcancel);
702 		NETIEVENT_CASE(tcpclose);
703 
704 		NETIEVENT_CASE(tcpdnsaccept);
705 		NETIEVENT_CASE(tcpdnslisten);
706 		NETIEVENT_CASE(tcpdnsconnect);
707 		NETIEVENT_CASE(tcpdnssend);
708 		NETIEVENT_CASE(tcpdnscancel);
709 		NETIEVENT_CASE(tcpdnsclose);
710 		NETIEVENT_CASE(tcpdnsread);
711 		NETIEVENT_CASE(tcpdnsstop);
712 
713 		NETIEVENT_CASE(tlsstartread);
714 		NETIEVENT_CASE(tlssend);
715 		NETIEVENT_CASE(tlsclose);
716 		NETIEVENT_CASE(tlsconnect);
717 		NETIEVENT_CASE(tlsdobio);
718 
719 		NETIEVENT_CASE(tlsdnssend);
720 		NETIEVENT_CASE(tlsdnscancel);
721 		NETIEVENT_CASE(tlsdnsclose);
722 		NETIEVENT_CASE(tlsdnsread);
723 		NETIEVENT_CASE(tlsdnsstop);
724 
725 		NETIEVENT_CASE(connectcb);
726 		NETIEVENT_CASE(readcb);
727 		NETIEVENT_CASE(sendcb);
728 
729 		NETIEVENT_CASE(close);
730 		NETIEVENT_CASE(detach);
731 
732 		NETIEVENT_CASE(shutdown);
733 		NETIEVENT_CASE(resume);
734 		NETIEVENT_CASE_NOMORE(pause);
735 
736 	default:
737 		INSIST(0);
738 		ISC_UNREACHABLE();
739 	}
740 	return (true);
741 }
742 
743 static bool
744 process_queue(isc__networker_t *worker, isc_queue_t *queue) {
745 	isc__netievent_t *ievent = NULL;
746 
747 	while ((ievent = (isc__netievent_t *)isc_queue_dequeue(queue)) != NULL)
748 	{
749 		if (!process_netievent(worker, ievent)) {
750 			return (false);
751 		}
752 	}
753 	return (true);
754 }
755 
756 void *
757 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) {
758 	isc__netievent_storage_t *event = isc_mempool_get(mgr->evpool);
759 
760 	*event = (isc__netievent_storage_t){ .ni.type = type };
761 	return (event);
762 }
763 
764 void
765 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) {
766 	isc_mempool_put(mgr->evpool, ievent);
767 }
768 
769 NETIEVENT_SOCKET_DEF(tcpclose);
770 NETIEVENT_SOCKET_DEF(tcplisten);
771 NETIEVENT_SOCKET_DEF(tcppauseread);
772 NETIEVENT_SOCKET_DEF(tcpstartread);
773 NETIEVENT_SOCKET_DEF(tcpstop);
774 NETIEVENT_SOCKET_DEF(tlsclose);
775 NETIEVENT_SOCKET_DEF(tlsconnect);
776 NETIEVENT_SOCKET_DEF(tlsdobio);
777 NETIEVENT_SOCKET_DEF(tlsstartread);
778 NETIEVENT_SOCKET_DEF(udpclose);
779 NETIEVENT_SOCKET_DEF(udplisten);
780 NETIEVENT_SOCKET_DEF(udpread);
781 NETIEVENT_SOCKET_DEF(udpsend);
782 NETIEVENT_SOCKET_DEF(udpstop);
783 
784 NETIEVENT_SOCKET_DEF(tcpdnsclose);
785 NETIEVENT_SOCKET_DEF(tcpdnsread);
786 NETIEVENT_SOCKET_DEF(tcpdnsstop);
787 NETIEVENT_SOCKET_DEF(tcpdnslisten);
788 NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect);
789 NETIEVENT_SOCKET_REQ_DEF(tcpdnssend);
790 NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel);
791 NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept);
792 
793 NETIEVENT_SOCKET_DEF(tlsdnsclose);
794 NETIEVENT_SOCKET_DEF(tlsdnsread);
795 NETIEVENT_SOCKET_DEF(tlsdnsstop);
796 NETIEVENT_SOCKET_REQ_DEF(tlsdnssend);
797 NETIEVENT_SOCKET_HANDLE_DEF(tlsdnscancel);
798 
799 NETIEVENT_SOCKET_REQ_DEF(tcpconnect);
800 NETIEVENT_SOCKET_REQ_DEF(tcpsend);
801 NETIEVENT_SOCKET_REQ_DEF(tlssend);
802 NETIEVENT_SOCKET_REQ_DEF(udpconnect);
803 
804 NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb);
805 NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb);
806 NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb);
807 
808 NETIEVENT_SOCKET_DEF(detach);
809 NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel);
810 NETIEVENT_SOCKET_HANDLE_DEF(udpcancel);
811 
812 NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept);
813 
814 NETIEVENT_SOCKET_DEF(close);
815 NETIEVENT_DEF(pause);
816 NETIEVENT_DEF(resume);
817 NETIEVENT_DEF(shutdown);
818 NETIEVENT_DEF(stop);
819 
820 void
821 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker,
822 			     isc__netievent_t *event) {
823 	/*
824 	 * If we are already in the matching nmthread, process the ievent
825 	 * directly.
826 	 */
827 	if (worker->id == isc_nm_tid()) {
828 		process_netievent(worker, event);
829 		return;
830 	}
831 
832 	isc__nm_enqueue_ievent(worker, event);
833 }
834 
835 void
836 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
837 	if (event->type > netievent_prio) {
838 		/*
839 		 * We need to make sure this signal will be delivered and
840 		 * the queue will be processed.
841 		 */
842 		LOCK(&worker->lock);
843 		isc_queue_enqueue(worker->ievents_prio, (uintptr_t)event);
844 		SIGNAL(&worker->cond);
845 		UNLOCK(&worker->lock);
846 	} else {
847 		isc_queue_enqueue(worker->ievents, (uintptr_t)event);
848 	}
849 	uv_async_send(&worker->async);
850 }
851 
852 bool
853 isc__nmsocket_active(isc_nmsocket_t *sock) {
854 	REQUIRE(VALID_NMSOCK(sock));
855 	if (sock->parent != NULL) {
856 		return (atomic_load(&sock->parent->active));
857 	}
858 
859 	return (atomic_load(&sock->active));
860 }
861 
862 bool
863 isc__nmsocket_deactivate(isc_nmsocket_t *sock) {
864 	REQUIRE(VALID_NMSOCK(sock));
865 
866 	if (sock->parent != NULL) {
867 		return (atomic_compare_exchange_strong(&sock->parent->active,
868 						       &(bool){ true }, false));
869 	}
870 
871 	return (atomic_compare_exchange_strong(&sock->active, &(bool){ true },
872 					       false));
873 }
874 
875 void
876 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) {
877 	REQUIRE(VALID_NMSOCK(sock));
878 	REQUIRE(target != NULL && *target == NULL);
879 
880 	isc_nmsocket_t *rsock = NULL;
881 
882 	if (sock->parent != NULL) {
883 		rsock = sock->parent;
884 		INSIST(rsock->parent == NULL); /* sanity check */
885 	} else {
886 		rsock = sock;
887 	}
888 
889 	NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %lu\n", rsock,
890 			 isc_refcount_current(&rsock->references) + 1);
891 
892 	isc_refcount_increment0(&rsock->references);
893 
894 	*target = sock;
895 }
896 
897 /*
898  * Free all resources inside a socket (including its children if any).
899  */
900 static void
901 nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) {
902 	isc_nmhandle_t *handle = NULL;
903 	isc__nm_uvreq_t *uvreq = NULL;
904 
905 	REQUIRE(VALID_NMSOCK(sock));
906 	REQUIRE(!isc__nmsocket_active(sock));
907 
908 	NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %lu\n", sock,
909 			 isc_refcount_current(&sock->references));
910 
911 	atomic_store(&sock->destroying, true);
912 
913 	if (sock->parent == NULL && sock->children != NULL) {
914 		/*
915 		 * We shouldn't be here unless there are no active handles,
916 		 * so we can clean up and free the children.
917 		 */
918 		for (size_t i = 0; i < sock->nchildren; i++) {
919 			if (!atomic_load(&sock->children[i].destroying)) {
920 				nmsocket_cleanup(&sock->children[i],
921 						 false FLARG_PASS);
922 			}
923 		}
924 
925 		/*
926 		 * This was a parent socket; free the children.
927 		 */
928 		isc_mem_put(sock->mgr->mctx, sock->children,
929 			    sock->nchildren * sizeof(*sock));
930 		sock->children = NULL;
931 		sock->nchildren = 0;
932 	}
933 	if (sock->statsindex != NULL) {
934 		isc__nm_decstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
935 	}
936 
937 	sock->statichandle = NULL;
938 
939 	if (sock->outerhandle != NULL) {
940 		isc__nmhandle_detach(&sock->outerhandle FLARG_PASS);
941 	}
942 
943 	if (sock->outer != NULL) {
944 		isc___nmsocket_detach(&sock->outer FLARG_PASS);
945 	}
946 
947 	while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) {
948 		nmhandle_free(sock, handle);
949 	}
950 
951 	if (sock->buf != NULL) {
952 		isc_mem_free(sock->mgr->mctx, sock->buf);
953 	}
954 
955 	if (sock->quota != NULL) {
956 		isc_quota_detach(&sock->quota);
957 	}
958 
959 	sock->pquota = NULL;
960 
961 	if (sock->timer_initialized) {
962 		sock->timer_initialized = false;
963 		/* We might be in timer callback */
964 		if (!uv_is_closing((uv_handle_t *)&sock->timer)) {
965 			uv_timer_stop(&sock->timer);
966 			uv_close((uv_handle_t *)&sock->timer, NULL);
967 		}
968 	}
969 
970 	isc_astack_destroy(sock->inactivehandles);
971 
972 	while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
973 		isc_mempool_put(sock->mgr->reqpool, uvreq);
974 	}
975 
976 	isc_astack_destroy(sock->inactivereqs);
977 	sock->magic = 0;
978 
979 	isc_mem_free(sock->mgr->mctx, sock->ah_frees);
980 	isc_mem_free(sock->mgr->mctx, sock->ah_handles);
981 	isc_mutex_destroy(&sock->lock);
982 	isc_condition_destroy(&sock->cond);
983 	isc_condition_destroy(&sock->scond);
984 #ifdef NETMGR_TRACE
985 	LOCK(&sock->mgr->lock);
986 	ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link);
987 	UNLOCK(&sock->mgr->lock);
988 #endif
989 	if (dofree) {
990 		isc_nm_t *mgr = sock->mgr;
991 		isc_mem_put(mgr->mctx, sock, sizeof(*sock));
992 		isc_nm_detach(&mgr);
993 	} else {
994 		isc_nm_detach(&sock->mgr);
995 	}
996 }
997 
998 static void
999 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) {
1000 	int active_handles;
1001 	bool destroy = false;
1002 
1003 	if (sock->parent != NULL) {
1004 		/*
1005 		 * This is a child socket and cannot be destroyed except
1006 		 * as a side effect of destroying the parent, so let's go
1007 		 * see if the parent is ready to be destroyed.
1008 		 */
1009 		nmsocket_maybe_destroy(sock->parent FLARG_PASS);
1010 		return;
1011 	}
1012 
1013 	/*
1014 	 * This is a parent socket (or a standalone). See whether the
1015 	 * children have active handles before deciding whether to
1016 	 * accept destruction.
1017 	 */
1018 	LOCK(&sock->lock);
1019 	if (atomic_load(&sock->active) || atomic_load(&sock->destroying) ||
1020 	    !atomic_load(&sock->closed) || atomic_load(&sock->references) != 0)
1021 	{
1022 		UNLOCK(&sock->lock);
1023 		return;
1024 	}
1025 
1026 	active_handles = atomic_load(&sock->ah);
1027 	if (sock->children != NULL) {
1028 		for (size_t i = 0; i < sock->nchildren; i++) {
1029 			LOCK(&sock->children[i].lock);
1030 			active_handles += atomic_load(&sock->children[i].ah);
1031 			UNLOCK(&sock->children[i].lock);
1032 		}
1033 	}
1034 
1035 	if (active_handles == 0 || sock->statichandle != NULL) {
1036 		destroy = true;
1037 	}
1038 
1039 	NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n",
1040 			 __func__, sock, active_handles, sock->statichandle);
1041 
1042 	if (destroy) {
1043 		atomic_store(&sock->destroying, true);
1044 		UNLOCK(&sock->lock);
1045 		nmsocket_cleanup(sock, true FLARG_PASS);
1046 	} else {
1047 		UNLOCK(&sock->lock);
1048 	}
1049 }
1050 
1051 void
1052 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) {
1053 	REQUIRE(sock->parent == NULL);
1054 
1055 	NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = %lu\n",
1056 			 sock, isc_refcount_current(&sock->references));
1057 
1058 	/*
1059 	 * The final external reference to the socket is gone. We can try
1060 	 * destroying the socket, but we have to wait for all the inflight
1061 	 * handles to finish first.
1062 	 */
1063 	atomic_store(&sock->active, false);
1064 
1065 	/*
1066 	 * If the socket has children, they'll need to be marked inactive
1067 	 * so they can be cleaned up too.
1068 	 */
1069 	if (sock->children != NULL) {
1070 		for (size_t i = 0; i < sock->nchildren; i++) {
1071 			atomic_store(&sock->children[i].active, false);
1072 		}
1073 	}
1074 
1075 	/*
1076 	 * If we're here then we already stopped listening; otherwise
1077 	 * we'd have a hanging reference from the listening process.
1078 	 *
1079 	 * If it's a regular socket we may need to close it.
1080 	 */
1081 	if (!atomic_load(&sock->closed)) {
1082 		switch (sock->type) {
1083 		case isc_nm_udpsocket:
1084 			isc__nm_udp_close(sock);
1085 			return;
1086 		case isc_nm_tcpsocket:
1087 			isc__nm_tcp_close(sock);
1088 			return;
1089 		case isc_nm_tcpdnssocket:
1090 			isc__nm_tcpdns_close(sock);
1091 			return;
1092 		case isc_nm_tlssocket:
1093 			isc__nm_tls_close(sock);
1094 			break;
1095 		case isc_nm_tlsdnssocket:
1096 			isc__nm_tlsdns_close(sock);
1097 			return;
1098 		default:
1099 			break;
1100 		}
1101 	}
1102 
1103 	nmsocket_maybe_destroy(sock FLARG_PASS);
1104 }
1105 
1106 void
1107 isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) {
1108 	REQUIRE(sockp != NULL && *sockp != NULL);
1109 	REQUIRE(VALID_NMSOCK(*sockp));
1110 
1111 	isc_nmsocket_t *sock = *sockp, *rsock = NULL;
1112 	*sockp = NULL;
1113 
1114 	/*
1115 	 * If the socket is a part of a set (a child socket) we are
1116 	 * counting references for the whole set at the parent.
1117 	 */
1118 	if (sock->parent != NULL) {
1119 		rsock = sock->parent;
1120 		INSIST(rsock->parent == NULL); /* Sanity check */
1121 	} else {
1122 		rsock = sock;
1123 	}
1124 
1125 	NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %lu\n", rsock,
1126 			 isc_refcount_current(&rsock->references) - 1);
1127 
1128 	if (isc_refcount_decrement(&rsock->references) == 1) {
1129 		isc___nmsocket_prep_destroy(rsock FLARG_PASS);
1130 	}
1131 }
1132 
1133 void
1134 isc_nmsocket_close(isc_nmsocket_t **sockp) {
1135 	REQUIRE(sockp != NULL);
1136 	REQUIRE(VALID_NMSOCK(*sockp));
1137 	REQUIRE((*sockp)->type == isc_nm_udplistener ||
1138 		(*sockp)->type == isc_nm_tcplistener ||
1139 		(*sockp)->type == isc_nm_tcpdnslistener ||
1140 		(*sockp)->type == isc_nm_tlsdnslistener);
1141 
1142 	isc__nmsocket_detach(sockp);
1143 }
1144 
1145 void
1146 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1147 		    isc_nmiface_t *iface FLARG) {
1148 	uint16_t family;
1149 
1150 	REQUIRE(sock != NULL);
1151 	REQUIRE(mgr != NULL);
1152 	REQUIRE(iface != NULL);
1153 
1154 	family = iface->addr.type.sa.sa_family;
1155 
1156 	*sock = (isc_nmsocket_t){ .type = type,
1157 				  .iface = iface,
1158 				  .fd = -1,
1159 				  .ah_size = 32,
1160 				  .inactivehandles = isc_astack_new(
1161 					  mgr->mctx, ISC_NM_HANDLES_STACK_SIZE),
1162 				  .inactivereqs = isc_astack_new(
1163 					  mgr->mctx, ISC_NM_REQS_STACK_SIZE) };
1164 
1165 #ifdef NETMGR_TRACE
1166 	sock->backtrace_size = backtrace(sock->backtrace, TRACE_SIZE);
1167 	ISC_LINK_INIT(sock, active_link);
1168 	ISC_LIST_INIT(sock->active_handles);
1169 	LOCK(&mgr->lock);
1170 	ISC_LIST_APPEND(mgr->active_sockets, sock, active_link);
1171 	UNLOCK(&mgr->lock);
1172 #endif
1173 
1174 	isc_nm_attach(mgr, &sock->mgr);
1175 	sock->uv_handle.handle.data = sock;
1176 
1177 	sock->ah_frees = isc_mem_allocate(mgr->mctx,
1178 					  sock->ah_size * sizeof(size_t));
1179 	sock->ah_handles = isc_mem_allocate(
1180 		mgr->mctx, sock->ah_size * sizeof(isc_nmhandle_t *));
1181 	ISC_LINK_INIT(&sock->quotacb, link);
1182 	for (size_t i = 0; i < 32; i++) {
1183 		sock->ah_frees[i] = i;
1184 		sock->ah_handles[i] = NULL;
1185 	}
1186 
1187 	switch (type) {
1188 	case isc_nm_udpsocket:
1189 	case isc_nm_udplistener:
1190 		if (family == AF_INET) {
1191 			sock->statsindex = udp4statsindex;
1192 		} else {
1193 			sock->statsindex = udp6statsindex;
1194 		}
1195 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1196 		break;
1197 	case isc_nm_tcpsocket:
1198 	case isc_nm_tcplistener:
1199 	case isc_nm_tcpdnssocket:
1200 	case isc_nm_tcpdnslistener:
1201 	case isc_nm_tlsdnssocket:
1202 	case isc_nm_tlsdnslistener:
1203 		if (family == AF_INET) {
1204 			sock->statsindex = tcp4statsindex;
1205 		} else {
1206 			sock->statsindex = tcp6statsindex;
1207 		}
1208 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1209 		break;
1210 	default:
1211 		break;
1212 	}
1213 
1214 	isc_mutex_init(&sock->lock);
1215 	isc_condition_init(&sock->cond);
1216 	isc_condition_init(&sock->scond);
1217 	isc_refcount_init(&sock->references, 1);
1218 
1219 	NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %lu\n", sock,
1220 			 isc_refcount_current(&sock->references));
1221 
1222 	atomic_init(&sock->active, true);
1223 	atomic_init(&sock->sequential, false);
1224 	atomic_init(&sock->readpaused, false);
1225 	atomic_init(&sock->closing, false);
1226 
1227 	atomic_store(&sock->active_child_connections, 0);
1228 
1229 	sock->magic = NMSOCK_MAGIC;
1230 }
1231 
1232 void
1233 isc__nmsocket_clearcb(isc_nmsocket_t *sock) {
1234 	REQUIRE(VALID_NMSOCK(sock));
1235 	REQUIRE(!isc__nm_in_netthread() || sock->tid == isc_nm_tid());
1236 
1237 	sock->recv_cb = NULL;
1238 	sock->recv_cbarg = NULL;
1239 	sock->accept_cb = NULL;
1240 	sock->accept_cbarg = NULL;
1241 	sock->connect_cb = NULL;
1242 	sock->connect_cbarg = NULL;
1243 }
1244 
1245 void
1246 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
1247 	isc__networker_t *worker = NULL;
1248 
1249 	REQUIRE(VALID_NMSOCK(sock));
1250 	if (buf->base == NULL) {
1251 		/* Empty buffer: might happen in case of error. */
1252 		return;
1253 	}
1254 	worker = &sock->mgr->workers[sock->tid];
1255 
1256 	REQUIRE(worker->recvbuf_inuse);
1257 	if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
1258 	    buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
1259 	{
1260 		/* Can happen in case of out-of-order recvmmsg in libuv1.36 */
1261 		return;
1262 	}
1263 	REQUIRE(buf->base == worker->recvbuf);
1264 	worker->recvbuf_inuse = false;
1265 }
1266 
1267 static isc_nmhandle_t *
1268 alloc_handle(isc_nmsocket_t *sock) {
1269 	isc_nmhandle_t *handle =
1270 		isc_mem_get(sock->mgr->mctx,
1271 			    sizeof(isc_nmhandle_t) + sock->extrahandlesize);
1272 
1273 	*handle = (isc_nmhandle_t){ .magic = NMHANDLE_MAGIC };
1274 #ifdef NETMGR_TRACE
1275 	ISC_LINK_INIT(handle, active_link);
1276 #endif
1277 	isc_refcount_init(&handle->references, 1);
1278 
1279 	return (handle);
1280 }
1281 
1282 isc_nmhandle_t *
1283 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
1284 		   isc_sockaddr_t *local FLARG) {
1285 	isc_nmhandle_t *handle = NULL;
1286 	size_t handlenum;
1287 	int pos;
1288 
1289 	REQUIRE(VALID_NMSOCK(sock));
1290 
1291 	handle = isc_astack_pop(sock->inactivehandles);
1292 
1293 	if (handle == NULL) {
1294 		handle = alloc_handle(sock);
1295 	} else {
1296 		isc_refcount_init(&handle->references, 1);
1297 		INSIST(VALID_NMHANDLE(handle));
1298 	}
1299 
1300 	NETMGR_TRACE_LOG("isc__nmhandle_get():handle %p->references = %lu\n",
1301 			 handle, isc_refcount_current(&handle->references));
1302 
1303 	isc___nmsocket_attach(sock, &handle->sock FLARG_PASS);
1304 
1305 #ifdef NETMGR_TRACE
1306 	handle->backtrace_size = backtrace(handle->backtrace, TRACE_SIZE);
1307 #endif
1308 
1309 	if (peer != NULL) {
1310 		memmove(&handle->peer, peer, sizeof(isc_sockaddr_t));
1311 	} else {
1312 		memmove(&handle->peer, &sock->peer, sizeof(isc_sockaddr_t));
1313 	}
1314 
1315 	if (local != NULL) {
1316 		memmove(&handle->local, local, sizeof(isc_sockaddr_t));
1317 	} else if (sock->iface != NULL) {
1318 		memmove(&handle->local, &sock->iface->addr,
1319 			sizeof(isc_sockaddr_t));
1320 	} else {
1321 		INSIST(0);
1322 		ISC_UNREACHABLE();
1323 	}
1324 
1325 	LOCK(&sock->lock);
1326 	/* We need to add this handle to the list of active handles */
1327 	if ((size_t)atomic_load(&sock->ah) == sock->ah_size) {
1328 		sock->ah_frees =
1329 			isc_mem_reallocate(sock->mgr->mctx, sock->ah_frees,
1330 					   sock->ah_size * 2 * sizeof(size_t));
1331 		sock->ah_handles = isc_mem_reallocate(
1332 			sock->mgr->mctx, sock->ah_handles,
1333 			sock->ah_size * 2 * sizeof(isc_nmhandle_t *));
1334 
1335 		for (size_t i = sock->ah_size; i < sock->ah_size * 2; i++) {
1336 			sock->ah_frees[i] = i;
1337 			sock->ah_handles[i] = NULL;
1338 		}
1339 
1340 		sock->ah_size *= 2;
1341 	}
1342 
1343 	handlenum = atomic_fetch_add(&sock->ah, 1);
1344 	pos = sock->ah_frees[handlenum];
1345 
1346 	INSIST(sock->ah_handles[pos] == NULL);
1347 	sock->ah_handles[pos] = handle;
1348 	handle->ah_pos = pos;
1349 #ifdef NETMGR_TRACE
1350 	ISC_LIST_APPEND(sock->active_handles, handle, active_link);
1351 #endif
1352 	UNLOCK(&sock->lock);
1353 
1354 	if (sock->type == isc_nm_tcpsocket || sock->type == isc_nm_tlssocket ||
1355 	    (sock->type == isc_nm_udpsocket && atomic_load(&sock->client)) ||
1356 	    (sock->type == isc_nm_tcpdnssocket && atomic_load(&sock->client)) ||
1357 	    (sock->type == isc_nm_tlsdnssocket && atomic_load(&sock->client)))
1358 	{
1359 		INSIST(sock->statichandle == NULL);
1360 
1361 		/*
1362 		 * statichandle must be assigned, not attached;
1363 		 * otherwise, if a handle was detached elsewhere
1364 		 * it could never reach 0 references, and the
1365 		 * handle and socket would never be freed.
1366 		 */
1367 		sock->statichandle = handle;
1368 	}
1369 
1370 	return (handle);
1371 }
1372 
1373 void
1374 isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) {
1375 	REQUIRE(VALID_NMHANDLE(handle));
1376 	REQUIRE(handlep != NULL && *handlep == NULL);
1377 
1378 	NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = %lu\n",
1379 			 handle, isc_refcount_current(&handle->references) + 1);
1380 
1381 	isc_refcount_increment(&handle->references);
1382 	*handlep = handle;
1383 }
1384 
1385 bool
1386 isc_nmhandle_is_stream(isc_nmhandle_t *handle) {
1387 	REQUIRE(VALID_NMHANDLE(handle));
1388 
1389 	return (handle->sock->type == isc_nm_tcpsocket ||
1390 		handle->sock->type == isc_nm_tcpdnssocket ||
1391 		handle->sock->type == isc_nm_tlssocket ||
1392 		handle->sock->type == isc_nm_tlsdnssocket);
1393 }
1394 
1395 static void
1396 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1397 	size_t extra = sock->extrahandlesize;
1398 
1399 	isc_refcount_destroy(&handle->references);
1400 
1401 	if (handle->dofree != NULL) {
1402 		handle->dofree(handle->opaque);
1403 	}
1404 
1405 	*handle = (isc_nmhandle_t){ .magic = 0 };
1406 
1407 	isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
1408 }
1409 
1410 static void
1411 nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1412 	size_t handlenum;
1413 	bool reuse = false;
1414 
1415 	/*
1416 	 * We do all of this under lock to avoid races with socket
1417 	 * destruction.  We have to do this now, because at this point the
1418 	 * socket is either unused or still attached to event->sock.
1419 	 */
1420 	LOCK(&sock->lock);
1421 
1422 	INSIST(sock->ah_handles[handle->ah_pos] == handle);
1423 	INSIST(sock->ah_size > handle->ah_pos);
1424 	INSIST(atomic_load(&sock->ah) > 0);
1425 
1426 #ifdef NETMGR_TRACE
1427 	ISC_LIST_UNLINK(sock->active_handles, handle, active_link);
1428 #endif
1429 
1430 	sock->ah_handles[handle->ah_pos] = NULL;
1431 	handlenum = atomic_fetch_sub(&sock->ah, 1) - 1;
1432 	sock->ah_frees[handlenum] = handle->ah_pos;
1433 	handle->ah_pos = 0;
1434 	if (atomic_load(&sock->active)) {
1435 		reuse = isc_astack_trypush(sock->inactivehandles, handle);
1436 	}
1437 	if (!reuse) {
1438 		nmhandle_free(sock, handle);
1439 	}
1440 	UNLOCK(&sock->lock);
1441 }
1442 
1443 void
1444 isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) {
1445 	isc_nmsocket_t *sock = NULL;
1446 	isc_nmhandle_t *handle = NULL;
1447 
1448 	REQUIRE(handlep != NULL);
1449 	REQUIRE(VALID_NMHANDLE(*handlep));
1450 
1451 	handle = *handlep;
1452 	*handlep = NULL;
1453 
1454 	sock = handle->sock;
1455 	if (sock->tid == isc_nm_tid()) {
1456 		nmhandle_detach_cb(&handle FLARG_PASS);
1457 	} else {
1458 		isc__netievent_detach_t *event =
1459 			isc__nm_get_netievent_detach(sock->mgr, sock);
1460 		/*
1461 		 * we are using implicit "attach" as the last reference
1462 		 * need to be destroyed explicitly in the async callback
1463 		 */
1464 		event->handle = handle;
1465 		FLARG_IEVENT_PASS(event);
1466 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1467 				       (isc__netievent_t *)event);
1468 	}
1469 }
1470 
1471 static void
1472 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) {
1473 	isc_nmsocket_t *sock = NULL;
1474 	isc_nmhandle_t *handle = NULL;
1475 
1476 	REQUIRE(handlep != NULL);
1477 	REQUIRE(VALID_NMHANDLE(*handlep));
1478 
1479 	handle = *handlep;
1480 	*handlep = NULL;
1481 
1482 	NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %lu\n",
1483 			 handle, isc_refcount_current(&handle->references) - 1);
1484 
1485 	if (isc_refcount_decrement(&handle->references) > 1) {
1486 		return;
1487 	}
1488 
1489 	/* We need an acquire memory barrier here */
1490 	(void)isc_refcount_current(&handle->references);
1491 
1492 	sock = handle->sock;
1493 	handle->sock = NULL;
1494 
1495 	if (handle->doreset != NULL) {
1496 		handle->doreset(handle->opaque);
1497 	}
1498 
1499 	nmhandle_deactivate(sock, handle);
1500 
1501 	/*
1502 	 * The handle is gone now. If the socket has a callback configured
1503 	 * for that (e.g., to perform cleanup after request processing),
1504 	 * call it now, or schedule it to run asynchronously.
1505 	 */
1506 	if (sock->closehandle_cb != NULL) {
1507 		if (sock->tid == isc_nm_tid()) {
1508 			sock->closehandle_cb(sock);
1509 		} else {
1510 			isc__netievent_close_t *event =
1511 				isc__nm_get_netievent_close(sock->mgr, sock);
1512 			isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1513 					       (isc__netievent_t *)event);
1514 		}
1515 	}
1516 
1517 	if (handle == sock->statichandle) {
1518 		/* statichandle is assigned, not attached. */
1519 		sock->statichandle = NULL;
1520 	}
1521 
1522 	isc___nmsocket_detach(&sock FLARG_PASS);
1523 }
1524 
1525 void *
1526 isc_nmhandle_getdata(isc_nmhandle_t *handle) {
1527 	REQUIRE(VALID_NMHANDLE(handle));
1528 
1529 	return (handle->opaque);
1530 }
1531 
1532 int
1533 isc_nmhandle_getfd(isc_nmhandle_t *handle) {
1534 	REQUIRE(VALID_NMHANDLE(handle));
1535 
1536 	return (handle->sock->fd);
1537 }
1538 
1539 void
1540 isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg,
1541 		     isc_nm_opaquecb_t doreset, isc_nm_opaquecb_t dofree) {
1542 	REQUIRE(VALID_NMHANDLE(handle));
1543 
1544 	handle->opaque = arg;
1545 	handle->doreset = doreset;
1546 	handle->dofree = dofree;
1547 }
1548 
1549 void
1550 isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
1551 	REQUIRE(VALID_NMHANDLE(handle));
1552 
1553 	switch (handle->sock->type) {
1554 	case isc_nm_udpsocket:
1555 		isc__nm_udp_settimeout(handle, timeout);
1556 		break;
1557 	case isc_nm_tcpsocket:
1558 		isc__nm_tcp_settimeout(handle, timeout);
1559 		break;
1560 	case isc_nm_tcpdnssocket:
1561 		isc__nm_tcpdns_settimeout(handle, timeout);
1562 		break;
1563 	case isc_nm_tlsdnssocket:
1564 		isc__nm_tlsdns_settimeout(handle, timeout);
1565 		break;
1566 	default:
1567 		INSIST(0);
1568 		ISC_UNREACHABLE();
1569 	}
1570 }
1571 
1572 void *
1573 isc_nmhandle_getextra(isc_nmhandle_t *handle) {
1574 	REQUIRE(VALID_NMHANDLE(handle));
1575 
1576 	return (handle->extra);
1577 }
1578 
1579 isc_sockaddr_t
1580 isc_nmhandle_peeraddr(isc_nmhandle_t *handle) {
1581 	REQUIRE(VALID_NMHANDLE(handle));
1582 
1583 	return (handle->peer);
1584 }
1585 
1586 isc_sockaddr_t
1587 isc_nmhandle_localaddr(isc_nmhandle_t *handle) {
1588 	REQUIRE(VALID_NMHANDLE(handle));
1589 
1590 	return (handle->local);
1591 }
1592 
1593 isc_nm_t *
1594 isc_nmhandle_netmgr(isc_nmhandle_t *handle) {
1595 	REQUIRE(VALID_NMHANDLE(handle));
1596 	REQUIRE(VALID_NMSOCK(handle->sock));
1597 
1598 	return (handle->sock->mgr);
1599 }
1600 
1601 isc__nm_uvreq_t *
1602 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
1603 	isc__nm_uvreq_t *req = NULL;
1604 
1605 	REQUIRE(VALID_NM(mgr));
1606 	REQUIRE(VALID_NMSOCK(sock));
1607 
1608 	if (sock != NULL && isc__nmsocket_active(sock)) {
1609 		/* Try to reuse one */
1610 		req = isc_astack_pop(sock->inactivereqs);
1611 	}
1612 
1613 	if (req == NULL) {
1614 		req = isc_mempool_get(mgr->reqpool);
1615 	}
1616 
1617 	*req = (isc__nm_uvreq_t){ .magic = 0 };
1618 	ISC_LINK_INIT(req, link);
1619 	req->uv_req.req.data = req;
1620 	isc___nmsocket_attach(sock, &req->sock FLARG_PASS);
1621 	req->magic = UVREQ_MAGIC;
1622 
1623 	return (req);
1624 }
1625 
1626 void
1627 isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) {
1628 	isc__nm_uvreq_t *req = NULL;
1629 	isc_nmhandle_t *handle = NULL;
1630 
1631 	REQUIRE(req0 != NULL);
1632 	REQUIRE(VALID_UVREQ(*req0));
1633 
1634 	req = *req0;
1635 	*req0 = NULL;
1636 
1637 	INSIST(sock == req->sock);
1638 
1639 	req->magic = 0;
1640 
1641 	/*
1642 	 * We need to save this first to make sure that handle,
1643 	 * sock, and the netmgr won't all disappear.
1644 	 */
1645 	handle = req->handle;
1646 	req->handle = NULL;
1647 
1648 	if (!isc__nmsocket_active(sock) ||
1649 	    !isc_astack_trypush(sock->inactivereqs, req)) {
1650 		isc_mempool_put(sock->mgr->reqpool, req);
1651 	}
1652 
1653 	if (handle != NULL) {
1654 		isc__nmhandle_detach(&handle FLARG_PASS);
1655 	}
1656 
1657 	isc___nmsocket_detach(&sock FLARG_PASS);
1658 }
1659 
1660 void
1661 isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb,
1662 	    void *cbarg) {
1663 	REQUIRE(VALID_NMHANDLE(handle));
1664 
1665 	switch (handle->sock->type) {
1666 	case isc_nm_udpsocket:
1667 	case isc_nm_udplistener:
1668 		isc__nm_udp_send(handle, region, cb, cbarg);
1669 		break;
1670 	case isc_nm_tcpsocket:
1671 		isc__nm_tcp_send(handle, region, cb, cbarg);
1672 		break;
1673 	case isc_nm_tcpdnssocket:
1674 		isc__nm_tcpdns_send(handle, region, cb, cbarg);
1675 		break;
1676 	case isc_nm_tlssocket:
1677 		isc__nm_tls_send(handle, region, cb, cbarg);
1678 		break;
1679 	case isc_nm_tlsdnssocket:
1680 		isc__nm_tlsdns_send(handle, region, cb, cbarg);
1681 		break;
1682 	default:
1683 		INSIST(0);
1684 		ISC_UNREACHABLE();
1685 	}
1686 }
1687 
1688 void
1689 isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
1690 	REQUIRE(VALID_NMHANDLE(handle));
1691 
1692 	/*
1693 	 * This is always called via callback (from accept or connect), and
1694 	 * caller must attach to the handle, so the references always need to be
1695 	 * at least 2.
1696 	 */
1697 	REQUIRE(isc_refcount_current(&handle->references) >= 2);
1698 
1699 	switch (handle->sock->type) {
1700 	case isc_nm_udpsocket:
1701 		isc__nm_udp_read(handle, cb, cbarg);
1702 		break;
1703 	case isc_nm_tcpsocket:
1704 		isc__nm_tcp_read(handle, cb, cbarg);
1705 		break;
1706 	case isc_nm_tcpdnssocket:
1707 		isc__nm_tcpdns_read(handle, cb, cbarg);
1708 		break;
1709 	case isc_nm_tlssocket:
1710 		isc__nm_tls_read(handle, cb, cbarg);
1711 		break;
1712 	case isc_nm_tlsdnssocket:
1713 		isc__nm_tlsdns_read(handle, cb, cbarg);
1714 		break;
1715 	default:
1716 		INSIST(0);
1717 		ISC_UNREACHABLE();
1718 	}
1719 }
1720 
1721 void
1722 isc_nm_cancelread(isc_nmhandle_t *handle) {
1723 	REQUIRE(VALID_NMHANDLE(handle));
1724 
1725 	switch (handle->sock->type) {
1726 	case isc_nm_udpsocket:
1727 		isc__nm_udp_cancelread(handle);
1728 		break;
1729 	case isc_nm_tcpsocket:
1730 		isc__nm_tcp_cancelread(handle);
1731 		break;
1732 	case isc_nm_tcpdnssocket:
1733 		isc__nm_tcpdns_cancelread(handle);
1734 		break;
1735 	case isc_nm_tlsdnssocket:
1736 		isc__nm_tlsdns_cancelread(handle);
1737 		break;
1738 	default:
1739 		INSIST(0);
1740 		ISC_UNREACHABLE();
1741 	}
1742 }
1743 
1744 void
1745 isc_nm_pauseread(isc_nmhandle_t *handle) {
1746 	REQUIRE(VALID_NMHANDLE(handle));
1747 
1748 	isc_nmsocket_t *sock = handle->sock;
1749 
1750 	switch (sock->type) {
1751 	case isc_nm_tcpsocket:
1752 		isc__nm_tcp_pauseread(handle);
1753 		break;
1754 	case isc_nm_tlssocket:
1755 		isc__nm_tls_pauseread(handle);
1756 		break;
1757 	default:
1758 		INSIST(0);
1759 		ISC_UNREACHABLE();
1760 	}
1761 }
1762 
1763 void
1764 isc_nm_resumeread(isc_nmhandle_t *handle) {
1765 	REQUIRE(VALID_NMHANDLE(handle));
1766 
1767 	isc_nmsocket_t *sock = handle->sock;
1768 
1769 	switch (sock->type) {
1770 	case isc_nm_tcpsocket:
1771 		isc__nm_tcp_resumeread(handle);
1772 		break;
1773 	case isc_nm_tlssocket:
1774 		isc__nm_tls_resumeread(handle);
1775 		break;
1776 	default:
1777 		INSIST(0);
1778 		ISC_UNREACHABLE();
1779 	}
1780 }
1781 
1782 void
1783 isc_nm_stoplistening(isc_nmsocket_t *sock) {
1784 	REQUIRE(VALID_NMSOCK(sock));
1785 
1786 	switch (sock->type) {
1787 	case isc_nm_udplistener:
1788 		isc__nm_udp_stoplistening(sock);
1789 		break;
1790 	case isc_nm_tcpdnslistener:
1791 		isc__nm_tcpdns_stoplistening(sock);
1792 		break;
1793 	case isc_nm_tcplistener:
1794 		isc__nm_tcp_stoplistening(sock);
1795 		break;
1796 	case isc_nm_tlslistener:
1797 		isc__nm_tls_stoplistening(sock);
1798 		break;
1799 	case isc_nm_tlsdnslistener:
1800 		isc__nm_tlsdns_stoplistening(sock);
1801 		break;
1802 	default:
1803 		INSIST(0);
1804 		ISC_UNREACHABLE();
1805 	}
1806 }
1807 
1808 void
1809 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1810 		  isc_result_t eresult) {
1811 	REQUIRE(VALID_NMSOCK(sock));
1812 	REQUIRE(VALID_UVREQ(uvreq));
1813 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1814 
1815 	if (eresult == ISC_R_SUCCESS) {
1816 		isc__netievent_connectcb_t ievent = { .sock = sock,
1817 						      .req = uvreq,
1818 						      .result = eresult };
1819 		isc__nm_async_connectcb(NULL, (isc__netievent_t *)&ievent);
1820 	} else {
1821 		isc__netievent_connectcb_t *ievent =
1822 			isc__nm_get_netievent_connectcb(sock->mgr, sock, uvreq,
1823 							eresult);
1824 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1825 				       (isc__netievent_t *)ievent);
1826 	}
1827 }
1828 
1829 void
1830 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) {
1831 	isc__netievent_connectcb_t *ievent = (isc__netievent_connectcb_t *)ev0;
1832 	isc_nmsocket_t *sock = ievent->sock;
1833 	isc__nm_uvreq_t *uvreq = ievent->req;
1834 	isc_result_t eresult = ievent->result;
1835 
1836 	UNUSED(worker);
1837 
1838 	REQUIRE(VALID_NMSOCK(sock));
1839 	REQUIRE(VALID_UVREQ(uvreq));
1840 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1841 	REQUIRE(ievent->sock->tid == isc_nm_tid());
1842 	REQUIRE(uvreq->cb.connect != NULL);
1843 
1844 	uvreq->cb.connect(uvreq->handle, eresult, uvreq->cbarg);
1845 
1846 	isc__nm_uvreq_put(&uvreq, sock);
1847 }
1848 
1849 void
1850 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1851 	       isc_result_t eresult) {
1852 	REQUIRE(VALID_NMSOCK(sock));
1853 	REQUIRE(VALID_UVREQ(uvreq));
1854 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1855 
1856 	if (eresult == ISC_R_SUCCESS) {
1857 		isc__netievent_readcb_t ievent = { .sock = sock,
1858 						   .req = uvreq,
1859 						   .result = eresult };
1860 
1861 		isc__nm_async_readcb(NULL, (isc__netievent_t *)&ievent);
1862 	} else {
1863 		isc__netievent_readcb_t *ievent = isc__nm_get_netievent_readcb(
1864 			sock->mgr, sock, uvreq, eresult);
1865 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1866 				       (isc__netievent_t *)ievent);
1867 	}
1868 }
1869 
1870 void
1871 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) {
1872 	isc__netievent_readcb_t *ievent = (isc__netievent_readcb_t *)ev0;
1873 	isc_nmsocket_t *sock = ievent->sock;
1874 	isc__nm_uvreq_t *uvreq = ievent->req;
1875 	isc_result_t eresult = ievent->result;
1876 	isc_region_t region = { .base = (unsigned char *)uvreq->uvbuf.base,
1877 				.length = uvreq->uvbuf.len };
1878 
1879 	UNUSED(worker);
1880 
1881 	REQUIRE(VALID_NMSOCK(sock));
1882 	REQUIRE(VALID_UVREQ(uvreq));
1883 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1884 	REQUIRE(sock->tid == isc_nm_tid());
1885 
1886 	uvreq->cb.recv(uvreq->handle, eresult, &region, uvreq->cbarg);
1887 
1888 	isc__nm_uvreq_put(&uvreq, sock);
1889 }
1890 
1891 void
1892 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1893 	       isc_result_t eresult) {
1894 	REQUIRE(VALID_NMSOCK(sock));
1895 	REQUIRE(VALID_UVREQ(uvreq));
1896 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1897 
1898 	if (eresult == ISC_R_SUCCESS) {
1899 		isc__netievent_sendcb_t ievent = { .sock = sock,
1900 						   .req = uvreq,
1901 						   .result = eresult };
1902 		isc__nm_async_sendcb(NULL, (isc__netievent_t *)&ievent);
1903 	} else {
1904 		isc__netievent_sendcb_t *ievent = isc__nm_get_netievent_sendcb(
1905 			sock->mgr, sock, uvreq, eresult);
1906 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1907 				       (isc__netievent_t *)ievent);
1908 	}
1909 }
1910 
1911 void
1912 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) {
1913 	isc__netievent_sendcb_t *ievent = (isc__netievent_sendcb_t *)ev0;
1914 	isc_nmsocket_t *sock = ievent->sock;
1915 	isc__nm_uvreq_t *uvreq = ievent->req;
1916 	isc_result_t eresult = ievent->result;
1917 
1918 	UNUSED(worker);
1919 
1920 	REQUIRE(VALID_NMSOCK(sock));
1921 	REQUIRE(VALID_UVREQ(uvreq));
1922 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1923 	REQUIRE(sock->tid == isc_nm_tid());
1924 
1925 	uvreq->cb.send(uvreq->handle, eresult, uvreq->cbarg);
1926 
1927 	isc__nm_uvreq_put(&uvreq, sock);
1928 }
1929 
1930 static void
1931 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) {
1932 	isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0;
1933 	isc_nmsocket_t *sock = ievent->sock;
1934 
1935 	REQUIRE(VALID_NMSOCK(ievent->sock));
1936 	REQUIRE(sock->tid == isc_nm_tid());
1937 	REQUIRE(sock->closehandle_cb != NULL);
1938 
1939 	UNUSED(worker);
1940 
1941 	ievent->sock->closehandle_cb(sock);
1942 }
1943 
1944 void
1945 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) {
1946 	isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0;
1947 	FLARG_IEVENT(ievent);
1948 
1949 	REQUIRE(VALID_NMSOCK(ievent->sock));
1950 	REQUIRE(VALID_NMHANDLE(ievent->handle));
1951 	REQUIRE(ievent->sock->tid == isc_nm_tid());
1952 
1953 	UNUSED(worker);
1954 
1955 	nmhandle_detach_cb(&ievent->handle FLARG_PASS);
1956 }
1957 
1958 static void
1959 shutdown_walk_cb(uv_handle_t *handle, void *arg) {
1960 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1961 	UNUSED(arg);
1962 
1963 	if (uv_is_closing(handle)) {
1964 		return;
1965 	}
1966 
1967 	switch (handle->type) {
1968 	case UV_UDP:
1969 	case UV_TCP:
1970 		break;
1971 	default:
1972 		return;
1973 	}
1974 
1975 	REQUIRE(VALID_NMSOCK(sock));
1976 	switch (sock->type) {
1977 	case isc_nm_udpsocket:
1978 		isc__nm_udp_shutdown(sock);
1979 		break;
1980 	case isc_nm_tcpsocket:
1981 		isc__nm_tcp_shutdown(sock);
1982 		break;
1983 	case isc_nm_tcpdnssocket:
1984 		isc__nm_tcpdns_shutdown(sock);
1985 		break;
1986 	case isc_nm_tlsdnssocket:
1987 		/* dummy now */
1988 		break;
1989 	case isc_nm_udplistener:
1990 	case isc_nm_tcplistener:
1991 	case isc_nm_tcpdnslistener:
1992 	case isc_nm_tlsdnslistener:
1993 		return;
1994 	default:
1995 		INSIST(0);
1996 		ISC_UNREACHABLE();
1997 	}
1998 }
1999 
2000 void
2001 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
2002 	UNUSED(ev0);
2003 	uv_walk(&worker->loop, shutdown_walk_cb, NULL);
2004 }
2005 
2006 bool
2007 isc__nm_acquire_interlocked(isc_nm_t *mgr) {
2008 	LOCK(&mgr->lock);
2009 	bool success = atomic_compare_exchange_strong(&mgr->interlocked,
2010 						      &(bool){ false }, true);
2011 	UNLOCK(&mgr->lock);
2012 	return (success);
2013 }
2014 
2015 void
2016 isc__nm_drop_interlocked(isc_nm_t *mgr) {
2017 	LOCK(&mgr->lock);
2018 	bool success = atomic_compare_exchange_strong(&mgr->interlocked,
2019 						      &(bool){ true }, false);
2020 	INSIST(success);
2021 	BROADCAST(&mgr->wkstatecond);
2022 	UNLOCK(&mgr->lock);
2023 }
2024 
2025 void
2026 isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
2027 	LOCK(&mgr->lock);
2028 	while (!atomic_compare_exchange_strong(&mgr->interlocked,
2029 					       &(bool){ false }, true))
2030 	{
2031 		WAIT(&mgr->wkstatecond, &mgr->lock);
2032 	}
2033 	UNLOCK(&mgr->lock);
2034 }
2035 
2036 void
2037 isc_nm_setstats(isc_nm_t *mgr, isc_stats_t *stats) {
2038 	REQUIRE(VALID_NM(mgr));
2039 	REQUIRE(mgr->stats == NULL);
2040 	REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2041 
2042 	isc_stats_attach(stats, &mgr->stats);
2043 }
2044 
2045 void
2046 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2047 	REQUIRE(VALID_NM(mgr));
2048 	REQUIRE(counterid != -1);
2049 
2050 	if (mgr->stats != NULL) {
2051 		isc_stats_increment(mgr->stats, counterid);
2052 	}
2053 }
2054 
2055 void
2056 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2057 	REQUIRE(VALID_NM(mgr));
2058 	REQUIRE(counterid != -1);
2059 
2060 	if (mgr->stats != NULL) {
2061 		isc_stats_decrement(mgr->stats, counterid);
2062 	}
2063 }
2064 
2065 isc_result_t
2066 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
2067 #ifdef WIN32
2068 	SOCKET sock;
2069 	sock = socket(domain, type, protocol);
2070 	if (sock == INVALID_SOCKET) {
2071 		char strbuf[ISC_STRERRORSIZE];
2072 		DWORD socket_errno = WSAGetLastError();
2073 		switch (socket_errno) {
2074 		case WSAEMFILE:
2075 		case WSAENOBUFS:
2076 			return (ISC_R_NORESOURCES);
2077 
2078 		case WSAEPROTONOSUPPORT:
2079 		case WSAEPFNOSUPPORT:
2080 		case WSAEAFNOSUPPORT:
2081 			return (ISC_R_FAMILYNOSUPPORT);
2082 		default:
2083 			strerror_r(socket_errno, strbuf, sizeof(strbuf));
2084 			UNEXPECTED_ERROR(
2085 				__FILE__, __LINE__,
2086 				"socket() failed with error code %lu: %s",
2087 				socket_errno, strbuf);
2088 			return (ISC_R_UNEXPECTED);
2089 		}
2090 	}
2091 #else
2092 	int sock = socket(domain, type, protocol);
2093 	if (sock < 0) {
2094 		return (isc_errno_toresult(errno));
2095 	}
2096 #endif
2097 	*sockp = (uv_os_sock_t)sock;
2098 	return (ISC_R_SUCCESS);
2099 }
2100 
2101 void
2102 isc__nm_closesocket(uv_os_sock_t sock) {
2103 #ifdef WIN32
2104 	closesocket(sock);
2105 #else
2106 	close(sock);
2107 #endif
2108 }
2109 
2110 #define setsockopt_on(socket, level, name) \
2111 	setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
2112 
2113 isc_result_t
2114 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
2115 	/*
2116 	 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
2117 	 */
2118 #ifdef IP_FREEBIND
2119 	UNUSED(sa_family);
2120 	if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
2121 		return (ISC_R_FAILURE);
2122 	}
2123 	return (ISC_R_SUCCESS);
2124 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
2125 	if (sa_family == AF_INET) {
2126 #if defined(IP_BINDANY)
2127 		if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
2128 			return (ISC_R_FAILURE);
2129 		}
2130 		return (ISC_R_SUCCESS);
2131 #endif
2132 	} else if (sa_family == AF_INET6) {
2133 #if defined(IPV6_BINDANY)
2134 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
2135 			return (ISC_R_FAILURE);
2136 		}
2137 		return (ISC_R_SUCCESS);
2138 #endif
2139 	}
2140 	return (ISC_R_NOTIMPLEMENTED);
2141 #elif defined(SO_BINDANY)
2142 	UNUSED(sa_family);
2143 	if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
2144 		return (ISC_R_FAILURE);
2145 	}
2146 	return (ISC_R_SUCCESS);
2147 #else
2148 	UNUSED(fd);
2149 	UNUSED(sa_family);
2150 	return (ISC_R_NOTIMPLEMENTED);
2151 #endif
2152 }
2153 
2154 isc_result_t
2155 isc__nm_socket_reuse(uv_os_sock_t fd) {
2156 	/*
2157 	 * Generally, the SO_REUSEADDR socket option allows reuse of
2158 	 * local addresses.
2159 	 *
2160 	 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
2161 	 * additional refinements for programs that use multicast.
2162 	 *
2163 	 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
2164 	 * rather than steal it from the current listener, so we don't use it
2165 	 * here, but rather in isc__nm_socket_reuse_lb().
2166 	 *
2167 	 * On Windows, it also allows a socket to forcibly bind to a port in use
2168 	 * by another socket.
2169 	 */
2170 
2171 #if defined(SO_REUSEPORT) && !defined(__linux__)
2172 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
2173 		return (ISC_R_FAILURE);
2174 	}
2175 	return (ISC_R_SUCCESS);
2176 #elif defined(SO_REUSEADDR)
2177 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
2178 		return (ISC_R_FAILURE);
2179 	}
2180 	return (ISC_R_SUCCESS);
2181 #else
2182 	UNUSED(fd);
2183 	return (ISC_R_NOTIMPLEMENTED);
2184 #endif
2185 }
2186 
2187 isc_result_t
2188 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
2189 	/*
2190 	 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
2191 	 * bound to an identical socket address. For UDP sockets, the use of
2192 	 * this option can provide better distribution of incoming datagrams to
2193 	 * multiple processes (or threads) as compared to the traditional
2194 	 * technique of having multiple processes compete to receive datagrams
2195 	 * on the same socket.
2196 	 *
2197 	 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
2198 	 */
2199 #if defined(SO_REUSEPORT_LB)
2200 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
2201 		return (ISC_R_FAILURE);
2202 	} else {
2203 		return (ISC_R_SUCCESS);
2204 	}
2205 #elif defined(SO_REUSEPORT) && defined(__linux__)
2206 	if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
2207 		return (ISC_R_FAILURE);
2208 	} else {
2209 		return (ISC_R_SUCCESS);
2210 	}
2211 #else
2212 	UNUSED(fd);
2213 	return (ISC_R_NOTIMPLEMENTED);
2214 #endif
2215 }
2216 
2217 isc_result_t
2218 isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
2219 #ifdef SO_INCOMING_CPU
2220 	if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
2221 		return (ISC_R_FAILURE);
2222 	} else {
2223 		return (ISC_R_SUCCESS);
2224 	}
2225 #else
2226 	UNUSED(fd);
2227 #endif
2228 	return (ISC_R_NOTIMPLEMENTED);
2229 }
2230 
2231 isc_result_t
2232 isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family) {
2233 	/*
2234 	 * Set the Don't Fragment flag on IP packets
2235 	 */
2236 	if (sa_family == AF_INET6) {
2237 #if defined(IPV6_DONTFRAG)
2238 		if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
2239 			return (ISC_R_FAILURE);
2240 		} else {
2241 			return (ISC_R_SUCCESS);
2242 		}
2243 #elif defined(IPV6_MTU_DISCOVER)
2244 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
2245 			       &(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
2246 		{
2247 			return (ISC_R_FAILURE);
2248 		} else {
2249 			return (ISC_R_SUCCESS);
2250 		}
2251 #else
2252 		UNUSED(fd);
2253 #endif
2254 	} else if (sa_family == AF_INET) {
2255 #if defined(IP_DONTFRAG)
2256 		if (setsockopt_on(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
2257 			return (ISC_R_FAILURE);
2258 		} else {
2259 			return (ISC_R_SUCCESS);
2260 		}
2261 #elif defined(IP_MTU_DISCOVER)
2262 		if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
2263 			       &(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
2264 		{
2265 			return (ISC_R_FAILURE);
2266 		} else {
2267 			return (ISC_R_SUCCESS);
2268 		}
2269 #else
2270 		UNUSED(fd);
2271 #endif
2272 	} else {
2273 		return (ISC_R_FAMILYNOSUPPORT);
2274 	}
2275 
2276 	return (ISC_R_NOTIMPLEMENTED);
2277 }
2278 
2279 #if defined(_WIN32)
2280 #define TIMEOUT_TYPE	DWORD
2281 #define TIMEOUT_DIV	1000
2282 #define TIMEOUT_OPTNAME TCP_MAXRT
2283 #elif defined(TCP_CONNECTIONTIMEOUT)
2284 #define TIMEOUT_TYPE	int
2285 #define TIMEOUT_DIV	1000
2286 #define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT
2287 #elif defined(TCP_RXT_CONNDROPTIME)
2288 #define TIMEOUT_TYPE	int
2289 #define TIMEOUT_DIV	1000
2290 #define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME
2291 #elif defined(TCP_USER_TIMEOUT)
2292 #define TIMEOUT_TYPE	unsigned int
2293 #define TIMEOUT_DIV	1
2294 #define TIMEOUT_OPTNAME TCP_USER_TIMEOUT
2295 #elif defined(TCP_KEEPINIT)
2296 #define TIMEOUT_TYPE	int
2297 #define TIMEOUT_DIV	1000
2298 #define TIMEOUT_OPTNAME TCP_KEEPINIT
2299 #endif
2300 
2301 isc_result_t
2302 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
2303 #if defined(TIMEOUT_OPTNAME)
2304 	TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
2305 
2306 	if (timeout == 0) {
2307 		timeout = 1;
2308 	}
2309 
2310 	if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
2311 		       sizeof(timeout)) == -1)
2312 	{
2313 		return (ISC_R_FAILURE);
2314 	}
2315 
2316 	return (ISC_R_SUCCESS);
2317 #else
2318 	UNUSED(fd);
2319 	UNUSED(timeout_ms);
2320 
2321 	return (ISC_R_SUCCESS);
2322 #endif
2323 }
2324 
2325 #ifdef NETMGR_TRACE
2326 /*
2327  * Dump all active sockets in netmgr. We output to stderr
2328  * as the logger might be already shut down.
2329  */
2330 
2331 static const char *
2332 nmsocket_type_totext(isc_nmsocket_type type) {
2333 	switch (type) {
2334 	case isc_nm_udpsocket:
2335 		return ("isc_nm_udpsocket");
2336 	case isc_nm_udplistener:
2337 		return ("isc_nm_udplistener");
2338 	case isc_nm_tcpsocket:
2339 		return ("isc_nm_tcpsocket");
2340 	case isc_nm_tcplistener:
2341 		return ("isc_nm_tcplistener");
2342 	case isc_nm_tcpdnslistener:
2343 		return ("isc_nm_tcpdnslistener");
2344 	case isc_nm_tcpdnssocket:
2345 		return ("isc_nm_tcpdnssocket");
2346 	case isc_nm_tlssocket:
2347 		return ("isc_nm_tlssocket");
2348 	case isc_nm_tlslistener:
2349 		return ("isc_nm_tlslistener");
2350 	case isc_nm_tlsdnslistener:
2351 		return ("isc_nm_tlsdnslistener");
2352 	case isc_nm_tlsdnssocket:
2353 		return ("isc_nm_tlsdnssocket");
2354 	default:
2355 		INSIST(0);
2356 		ISC_UNREACHABLE();
2357 	}
2358 }
2359 
2360 static void
2361 nmhandle_dump(isc_nmhandle_t *handle) {
2362 	fprintf(stderr, "Active handle %p, refs %lu\n", handle,
2363 		isc_refcount_current(&handle->references));
2364 	fprintf(stderr, "Created by:\n");
2365 	backtrace_symbols_fd(handle->backtrace, handle->backtrace_size,
2366 			     STDERR_FILENO);
2367 	fprintf(stderr, "\n\n");
2368 }
2369 
2370 static void
2371 nmsocket_dump(isc_nmsocket_t *sock) {
2372 	isc_nmhandle_t *handle = NULL;
2373 
2374 	LOCK(&sock->lock);
2375 	fprintf(stderr, "\n=================\n");
2376 	fprintf(stderr, "Active %s socket %p, type %s, refs %lu\n",
2377 		sock->client ? "client" : "server", sock,
2378 		nmsocket_type_totext(sock->type),
2379 		isc_refcount_current(&sock->references));
2380 	fprintf(stderr,
2381 		"Parent %p, listener %p, server %p, statichandle = %p\n",
2382 		sock->parent, sock->listener, sock->server, sock->statichandle);
2383 	fprintf(stderr, "Flags:%s%s%s%s%s\n", sock->active ? " active" : "",
2384 		sock->closing ? " closing" : "",
2385 		sock->destroying ? " destroying" : "",
2386 		sock->connecting ? " connecting" : "",
2387 		sock->accepting ? " accepting" : "");
2388 	fprintf(stderr, "Created by:\n");
2389 	backtrace_symbols_fd(sock->backtrace, sock->backtrace_size,
2390 			     STDERR_FILENO);
2391 	fprintf(stderr, "\n");
2392 
2393 	for (handle = ISC_LIST_HEAD(sock->active_handles); handle != NULL;
2394 	     handle = ISC_LIST_NEXT(handle, active_link))
2395 	{
2396 		static bool first = true;
2397 		if (first) {
2398 			fprintf(stderr, "Active handles:\n");
2399 			first = false;
2400 		}
2401 		nmhandle_dump(handle);
2402 	}
2403 
2404 	fprintf(stderr, "\n");
2405 	UNLOCK(&sock->lock);
2406 }
2407 
2408 void
2409 isc__nm_dump_active(isc_nm_t *nm) {
2410 	isc_nmsocket_t *sock = NULL;
2411 
2412 	REQUIRE(VALID_NM(nm));
2413 
2414 	LOCK(&nm->lock);
2415 	for (sock = ISC_LIST_HEAD(nm->active_sockets); sock != NULL;
2416 	     sock = ISC_LIST_NEXT(sock, active_link))
2417 	{
2418 		static bool first = true;
2419 		if (first) {
2420 			fprintf(stderr, "Outstanding sockets\n");
2421 			first = false;
2422 		}
2423 		nmsocket_dump(sock);
2424 	}
2425 	UNLOCK(&nm->lock);
2426 }
2427 #endif
2428