xref: /netbsd-src/external/mpl/bind/dist/lib/isc/netmgr/tcp.c (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /*	$NetBSD: tcp.c,v 1.10 2024/02/21 22:52:32 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #include <libgen.h>
17 #include <unistd.h>
18 #include <uv.h>
19 
20 #include <isc/atomic.h>
21 #include <isc/barrier.h>
22 #include <isc/buffer.h>
23 #include <isc/condition.h>
24 #include <isc/errno.h>
25 #include <isc/log.h>
26 #include <isc/magic.h>
27 #include <isc/mem.h>
28 #include <isc/netmgr.h>
29 #include <isc/quota.h>
30 #include <isc/random.h>
31 #include <isc/refcount.h>
32 #include <isc/region.h>
33 #include <isc/result.h>
34 #include <isc/sockaddr.h>
35 #include <isc/stdtime.h>
36 #include <isc/thread.h>
37 #include <isc/util.h>
38 
39 #include "netmgr-int.h"
40 #include "uv-compat.h"
41 
42 static atomic_uint_fast32_t last_tcpquota_log = 0;
43 
44 static bool
45 can_log_tcp_quota(void) {
46 	isc_stdtime_t now, last;
47 
48 	isc_stdtime_get(&now);
49 	last = atomic_exchange_relaxed(&last_tcpquota_log, now);
50 	if (now != last) {
51 		return (true);
52 	}
53 
54 	return (false);
55 }
56 
57 static isc_result_t
58 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
59 
60 static void
61 tcp_close_direct(isc_nmsocket_t *sock);
62 
63 static isc_result_t
64 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
65 static void
66 tcp_connect_cb(uv_connect_t *uvreq, int status);
67 
68 static void
69 tcp_connection_cb(uv_stream_t *server, int status);
70 
71 static void
72 tcp_close_cb(uv_handle_t *uvhandle);
73 
74 static isc_result_t
75 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
76 
77 static void
78 quota_accept_cb(isc_quota_t *quota, void *sock0);
79 
80 static void
81 failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
82 
83 static void
84 stop_tcp_parent(isc_nmsocket_t *sock);
85 static void
86 stop_tcp_child(isc_nmsocket_t *sock);
87 
88 static void
89 failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
90 	REQUIRE(atomic_load(&sock->accepting));
91 	REQUIRE(sock->server);
92 
93 	/*
94 	 * Detach the quota early to make room for other connections;
95 	 * otherwise it'd be detached later asynchronously, and clog
96 	 * the quota unnecessarily.
97 	 */
98 	if (sock->quota != NULL) {
99 		isc_quota_detach(&sock->quota);
100 	}
101 
102 	isc__nmsocket_detach(&sock->server);
103 
104 	atomic_store(&sock->accepting, false);
105 
106 	switch (eresult) {
107 	case ISC_R_NOTCONNECTED:
108 		/* IGNORE: The client disconnected before we could accept */
109 		break;
110 	default:
111 		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
112 			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
113 			      "Accepting TCP connection failed: %s",
114 			      isc_result_totext(eresult));
115 	}
116 }
117 
118 static isc_result_t
119 tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
120 	isc__networker_t *worker = NULL;
121 	isc_result_t result = ISC_R_UNSET;
122 	int r;
123 
124 	REQUIRE(VALID_NMSOCK(sock));
125 	REQUIRE(VALID_UVREQ(req));
126 
127 	REQUIRE(isc__nm_in_netthread());
128 	REQUIRE(sock->tid == isc_nm_tid());
129 
130 	worker = &sock->mgr->workers[sock->tid];
131 
132 	atomic_store(&sock->connecting, true);
133 
134 	/* 2 minute timeout */
135 	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
136 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
137 
138 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
139 	UV_RUNTIME_CHECK(uv_tcp_init, r);
140 	uv_handle_set_data(&sock->uv_handle.handle, sock);
141 
142 	r = uv_timer_init(&worker->loop, &sock->read_timer);
143 	UV_RUNTIME_CHECK(uv_timer_init, r);
144 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
145 
146 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
147 	if (r != 0) {
148 		isc__nm_closesocket(sock->fd);
149 		isc__nm_incstats(sock, STATID_OPENFAIL);
150 		goto done;
151 	}
152 	isc__nm_incstats(sock, STATID_OPEN);
153 
154 	if (req->local.length != 0) {
155 		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
156 		if (r != 0) {
157 			isc__nm_incstats(sock, STATID_BINDFAIL);
158 			goto done;
159 		}
160 	}
161 
162 	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
163 
164 	uv_handle_set_data(&req->uv_req.handle, req);
165 	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
166 			   &req->peer.type.sa, tcp_connect_cb);
167 	if (r != 0) {
168 		isc__nm_incstats(sock, STATID_CONNECTFAIL);
169 		goto done;
170 	}
171 
172 	uv_handle_set_data((uv_handle_t *)&sock->read_timer,
173 			   &req->uv_req.connect);
174 	isc__nmsocket_timer_start(sock);
175 
176 	atomic_store(&sock->connected, true);
177 
178 done:
179 	result = isc__nm_uverr2result(r);
180 	LOCK(&sock->lock);
181 	sock->result = result;
182 	SIGNAL(&sock->cond);
183 	if (!atomic_load(&sock->active)) {
184 		WAIT(&sock->scond, &sock->lock);
185 	}
186 	INSIST(atomic_load(&sock->active));
187 	UNLOCK(&sock->lock);
188 
189 	return (result);
190 }
191 
192 void
193 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
194 	isc__netievent_tcpconnect_t *ievent =
195 		(isc__netievent_tcpconnect_t *)ev0;
196 	isc_nmsocket_t *sock = ievent->sock;
197 	isc__nm_uvreq_t *req = ievent->req;
198 	isc_result_t result = ISC_R_SUCCESS;
199 
200 	UNUSED(worker);
201 
202 	REQUIRE(VALID_NMSOCK(sock));
203 	REQUIRE(sock->type == isc_nm_tcpsocket);
204 	REQUIRE(sock->parent == NULL);
205 	REQUIRE(sock->tid == isc_nm_tid());
206 
207 	result = tcp_connect_direct(sock, req);
208 	if (result != ISC_R_SUCCESS) {
209 		atomic_store(&sock->active, false);
210 		if (sock->fd != (uv_os_sock_t)(-1)) {
211 			isc__nm_tcp_close(sock);
212 		}
213 		isc__nm_connectcb(sock, req, result, true);
214 	}
215 
216 	/*
217 	 * The sock is now attached to the handle.
218 	 */
219 	isc__nmsocket_detach(&sock);
220 }
221 
222 static void
223 tcp_connect_cb(uv_connect_t *uvreq, int status) {
224 	isc_result_t result = ISC_R_UNSET;
225 	isc__nm_uvreq_t *req = NULL;
226 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
227 	struct sockaddr_storage ss;
228 	int r;
229 
230 	REQUIRE(VALID_NMSOCK(sock));
231 	REQUIRE(sock->tid == isc_nm_tid());
232 
233 	req = uv_handle_get_data((uv_handle_t *)uvreq);
234 
235 	REQUIRE(VALID_UVREQ(req));
236 	REQUIRE(VALID_NMHANDLE(req->handle));
237 
238 	if (atomic_load(&sock->timedout)) {
239 		result = ISC_R_TIMEDOUT;
240 		goto error;
241 	} else if (!atomic_load(&sock->connecting)) {
242 		/*
243 		 * The connect was cancelled from timeout; just clean up
244 		 * the req.
245 		 */
246 		isc__nm_uvreq_put(&req, sock);
247 		return;
248 	} else if (isc__nm_closing(sock)) {
249 		/* Network manager shutting down */
250 		result = ISC_R_SHUTTINGDOWN;
251 		goto error;
252 	} else if (isc__nmsocket_closing(sock)) {
253 		/* Connection canceled */
254 		result = ISC_R_CANCELED;
255 		goto error;
256 	} else if (status == UV_ETIMEDOUT) {
257 		/* Timeout status code here indicates hard error */
258 		result = ISC_R_TIMEDOUT;
259 		goto error;
260 	} else if (status == UV_EADDRINUSE) {
261 		/*
262 		 * On FreeBSD the TCP connect() call sometimes results in a
263 		 * spurious transient EADDRINUSE. Try a few more times before
264 		 * giving up.
265 		 */
266 		if (--req->connect_tries > 0) {
267 			r = uv_tcp_connect(&req->uv_req.connect,
268 					   &sock->uv_handle.tcp,
269 					   &req->peer.type.sa, tcp_connect_cb);
270 			if (r != 0) {
271 				result = isc__nm_uverr2result(r);
272 				goto error;
273 			}
274 			return;
275 		}
276 		result = isc__nm_uverr2result(status);
277 		goto error;
278 	} else if (status != 0) {
279 		result = isc__nm_uverr2result(status);
280 		goto error;
281 	}
282 
283 	isc__nmsocket_timer_stop(sock);
284 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
285 
286 	isc__nm_incstats(sock, STATID_CONNECT);
287 	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
288 			       &(int){ sizeof(ss) });
289 	if (r != 0) {
290 		result = isc__nm_uverr2result(r);
291 		goto error;
292 	}
293 
294 	atomic_store(&sock->connecting, false);
295 
296 	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
297 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
298 
299 	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
300 
301 	return;
302 error:
303 	isc__nm_failed_connect_cb(sock, req, result, false);
304 }
305 
306 void
307 isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
308 		  isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
309 		  size_t extrahandlesize) {
310 	isc_result_t result = ISC_R_SUCCESS;
311 	isc_nmsocket_t *sock = NULL;
312 	isc__netievent_tcpconnect_t *ievent = NULL;
313 	isc__nm_uvreq_t *req = NULL;
314 	sa_family_t sa_family;
315 
316 	REQUIRE(VALID_NM(mgr));
317 	REQUIRE(local != NULL);
318 	REQUIRE(peer != NULL);
319 
320 	sa_family = peer->type.sa.sa_family;
321 
322 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
323 	isc__nmsocket_init(sock, mgr, isc_nm_tcpsocket, local);
324 
325 	sock->extrahandlesize = extrahandlesize;
326 	sock->connect_timeout = timeout;
327 	sock->result = ISC_R_UNSET;
328 	sock->fd = (uv_os_sock_t)-1;
329 	atomic_init(&sock->client, true);
330 
331 	req = isc__nm_uvreq_get(mgr, sock);
332 	req->cb.connect = cb;
333 	req->cbarg = cbarg;
334 	req->peer = *peer;
335 	req->local = *local;
336 	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
337 
338 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
339 	if (result != ISC_R_SUCCESS) {
340 		if (isc__nm_in_netthread()) {
341 			sock->tid = isc_nm_tid();
342 			isc__nmsocket_clearcb(sock);
343 			isc__nm_connectcb(sock, req, result, false);
344 		} else {
345 			isc__nmsocket_clearcb(sock);
346 			sock->tid = isc_random_uniform(mgr->nlisteners);
347 			isc__nm_connectcb(sock, req, result, true);
348 		}
349 		atomic_store(&sock->closed, true);
350 		isc__nmsocket_detach(&sock);
351 		return;
352 	}
353 
354 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
355 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
356 
357 	ievent = isc__nm_get_netievent_tcpconnect(mgr, sock, req);
358 
359 	if (isc__nm_in_netthread()) {
360 		atomic_store(&sock->active, true);
361 		sock->tid = isc_nm_tid();
362 		isc__nm_async_tcpconnect(&mgr->workers[sock->tid],
363 					 (isc__netievent_t *)ievent);
364 		isc__nm_put_netievent_tcpconnect(mgr, ievent);
365 	} else {
366 		atomic_init(&sock->active, false);
367 		sock->tid = isc_random_uniform(mgr->nlisteners);
368 		isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
369 				       (isc__netievent_t *)ievent);
370 	}
371 	LOCK(&sock->lock);
372 	while (sock->result == ISC_R_UNSET) {
373 		WAIT(&sock->cond, &sock->lock);
374 	}
375 	atomic_store(&sock->active, true);
376 	BROADCAST(&sock->scond);
377 	UNLOCK(&sock->lock);
378 }
379 
380 static uv_os_sock_t
381 isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
382 	isc_result_t result;
383 	uv_os_sock_t sock;
384 
385 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
386 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
387 
388 	(void)isc__nm_socket_incoming_cpu(sock);
389 	(void)isc__nm_socket_v6only(sock, sa_family);
390 
391 	/* FIXME: set mss */
392 
393 	result = isc__nm_socket_reuse(sock);
394 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
395 
396 	if (mgr->load_balance_sockets) {
397 		result = isc__nm_socket_reuse_lb(sock);
398 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
399 	}
400 
401 	return (sock);
402 }
403 
404 static void
405 start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
406 		uv_os_sock_t fd, int tid) {
407 	isc__netievent_tcplisten_t *ievent = NULL;
408 	isc_nmsocket_t *csock = &sock->children[tid];
409 
410 	isc__nmsocket_init(csock, mgr, isc_nm_tcpsocket, iface);
411 	csock->parent = sock;
412 	csock->accept_cb = sock->accept_cb;
413 	csock->accept_cbarg = sock->accept_cbarg;
414 	csock->extrahandlesize = sock->extrahandlesize;
415 	csock->backlog = sock->backlog;
416 	csock->tid = tid;
417 	/*
418 	 * We don't attach to quota, just assign - to avoid
419 	 * increasing quota unnecessarily.
420 	 */
421 	csock->pquota = sock->pquota;
422 	isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
423 
424 	if (mgr->load_balance_sockets) {
425 		UNUSED(fd);
426 		csock->fd = isc__nm_tcp_lb_socket(mgr,
427 						  iface->type.sa.sa_family);
428 	} else {
429 		csock->fd = dup(fd);
430 	}
431 	REQUIRE(csock->fd >= 0);
432 
433 	ievent = isc__nm_get_netievent_tcplisten(mgr, csock);
434 	isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
435 				     (isc__netievent_t *)ievent);
436 }
437 
438 static void
439 enqueue_stoplistening(isc_nmsocket_t *sock) {
440 	isc__netievent_tcpstop_t *ievent =
441 		isc__nm_get_netievent_tcpstop(sock->mgr, sock);
442 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
443 			       (isc__netievent_t *)ievent);
444 }
445 
446 isc_result_t
447 isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface,
448 		 isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
449 		 size_t extrahandlesize, int backlog, isc_quota_t *quota,
450 		 isc_nmsocket_t **sockp) {
451 	isc_result_t result = ISC_R_SUCCESS;
452 	isc_nmsocket_t *sock = NULL;
453 	size_t children_size = 0;
454 	uv_os_sock_t fd = -1;
455 
456 	REQUIRE(VALID_NM(mgr));
457 
458 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
459 	isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface);
460 
461 	atomic_init(&sock->rchildren, 0);
462 	sock->nchildren = mgr->nlisteners;
463 	children_size = sock->nchildren * sizeof(sock->children[0]);
464 	sock->children = isc_mem_get(mgr->mctx, children_size);
465 	memset(sock->children, 0, children_size);
466 
467 	sock->result = ISC_R_UNSET;
468 
469 	sock->accept_cb = accept_cb;
470 	sock->accept_cbarg = accept_cbarg;
471 	sock->extrahandlesize = extrahandlesize;
472 	sock->backlog = backlog;
473 	sock->pquota = quota;
474 
475 	sock->tid = 0;
476 	sock->fd = -1;
477 
478 	if (!mgr->load_balance_sockets) {
479 		fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family);
480 	}
481 
482 	isc_barrier_init(&sock->startlistening, sock->nchildren);
483 
484 	for (size_t i = 0; i < sock->nchildren; i++) {
485 		if ((int)i == isc_nm_tid()) {
486 			continue;
487 		}
488 		start_tcp_child(mgr, iface, sock, fd, i);
489 	}
490 
491 	if (isc__nm_in_netthread()) {
492 		start_tcp_child(mgr, iface, sock, fd, isc_nm_tid());
493 	}
494 
495 	if (!mgr->load_balance_sockets) {
496 		isc__nm_closesocket(fd);
497 	}
498 
499 	LOCK(&sock->lock);
500 	while (atomic_load(&sock->rchildren) != sock->nchildren) {
501 		WAIT(&sock->cond, &sock->lock);
502 	}
503 	result = sock->result;
504 	atomic_store(&sock->active, true);
505 	UNLOCK(&sock->lock);
506 
507 	INSIST(result != ISC_R_UNSET);
508 
509 	if (result == ISC_R_SUCCESS) {
510 		REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
511 		*sockp = sock;
512 	} else {
513 		atomic_store(&sock->active, false);
514 		enqueue_stoplistening(sock);
515 		isc_nmsocket_close(&sock);
516 	}
517 
518 	return (result);
519 }
520 
521 void
522 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
523 	isc__netievent_tcplisten_t *ievent = (isc__netievent_tcplisten_t *)ev0;
524 	sa_family_t sa_family;
525 	int r;
526 	int flags = 0;
527 	isc_nmsocket_t *sock = NULL;
528 	isc_result_t result;
529 	isc_nm_t *mgr;
530 
531 	REQUIRE(VALID_NMSOCK(ievent->sock));
532 	REQUIRE(ievent->sock->tid == isc_nm_tid());
533 	REQUIRE(VALID_NMSOCK(ievent->sock->parent));
534 
535 	sock = ievent->sock;
536 	sa_family = sock->iface.type.sa.sa_family;
537 	mgr = sock->mgr;
538 
539 	REQUIRE(sock->type == isc_nm_tcpsocket);
540 	REQUIRE(sock->parent != NULL);
541 	REQUIRE(sock->tid == isc_nm_tid());
542 
543 	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
544 	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
545 
546 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
547 	UV_RUNTIME_CHECK(uv_tcp_init, r);
548 
549 	uv_handle_set_data(&sock->uv_handle.handle, sock);
550 	/* This keeps the socket alive after everything else is gone */
551 	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
552 
553 	r = uv_timer_init(&worker->loop, &sock->read_timer);
554 	UV_RUNTIME_CHECK(uv_timer_init, r);
555 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
556 
557 	LOCK(&sock->parent->lock);
558 
559 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
560 	if (r < 0) {
561 		isc__nm_closesocket(sock->fd);
562 		isc__nm_incstats(sock, STATID_OPENFAIL);
563 		goto done;
564 	}
565 	isc__nm_incstats(sock, STATID_OPEN);
566 
567 	if (sa_family == AF_INET6) {
568 		flags = UV_TCP_IPV6ONLY;
569 	}
570 
571 	if (mgr->load_balance_sockets) {
572 		r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
573 					&sock->iface.type.sa, flags);
574 		if (r < 0) {
575 			isc__nm_incstats(sock, STATID_BINDFAIL);
576 			goto done;
577 		}
578 	} else {
579 		if (sock->parent->fd == -1) {
580 			r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
581 						&sock->iface.type.sa, flags);
582 			if (r < 0) {
583 				isc__nm_incstats(sock, STATID_BINDFAIL);
584 				goto done;
585 			}
586 			sock->parent->uv_handle.tcp.flags =
587 				sock->uv_handle.tcp.flags;
588 			sock->parent->fd = sock->fd;
589 		} else {
590 			/* The socket is already bound, just copy the flags */
591 			sock->uv_handle.tcp.flags =
592 				sock->parent->uv_handle.tcp.flags;
593 		}
594 	}
595 
596 	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
597 
598 	/*
599 	 * The callback will run in the same thread uv_listen() was called
600 	 * from, so a race with tcp_connection_cb() isn't possible.
601 	 */
602 	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
603 		      tcp_connection_cb);
604 	if (r != 0) {
605 		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
606 			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
607 			      "uv_listen failed: %s",
608 			      isc_result_totext(isc__nm_uverr2result(r)));
609 		isc__nm_incstats(sock, STATID_BINDFAIL);
610 		goto done;
611 	}
612 
613 	atomic_store(&sock->listening, true);
614 
615 done:
616 	result = isc__nm_uverr2result(r);
617 	if (result != ISC_R_SUCCESS) {
618 		sock->pquota = NULL;
619 	}
620 
621 	atomic_fetch_add(&sock->parent->rchildren, 1);
622 	if (sock->parent->result == ISC_R_UNSET) {
623 		sock->parent->result = result;
624 	}
625 	SIGNAL(&sock->parent->cond);
626 	UNLOCK(&sock->parent->lock);
627 
628 	isc_barrier_wait(&sock->parent->startlistening);
629 }
630 
631 static void
632 tcp_connection_cb(uv_stream_t *server, int status) {
633 	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
634 	isc_result_t result;
635 	isc_quota_t *quota = NULL;
636 
637 	if (status != 0) {
638 		result = isc__nm_uverr2result(status);
639 		goto done;
640 	}
641 
642 	REQUIRE(VALID_NMSOCK(ssock));
643 	REQUIRE(ssock->tid == isc_nm_tid());
644 
645 	if (isc__nmsocket_closing(ssock)) {
646 		result = ISC_R_CANCELED;
647 		goto done;
648 	}
649 
650 	if (ssock->pquota != NULL) {
651 		result = isc_quota_attach_cb(ssock->pquota, &quota,
652 					     &ssock->quotacb);
653 		if (result == ISC_R_QUOTA) {
654 			isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
655 			goto done;
656 		}
657 	}
658 
659 	result = accept_connection(ssock, quota);
660 done:
661 	isc__nm_accept_connection_log(result, can_log_tcp_quota());
662 }
663 
664 void
665 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) {
666 	REQUIRE(VALID_NMSOCK(sock));
667 	REQUIRE(sock->type == isc_nm_tcplistener);
668 
669 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
670 					    true))
671 	{
672 		UNREACHABLE();
673 	}
674 
675 	if (!isc__nm_in_netthread()) {
676 		enqueue_stoplistening(sock);
677 	} else {
678 		stop_tcp_parent(sock);
679 	}
680 }
681 
682 void
683 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0) {
684 	isc__netievent_tcpstop_t *ievent = (isc__netievent_tcpstop_t *)ev0;
685 	isc_nmsocket_t *sock = ievent->sock;
686 
687 	UNUSED(worker);
688 
689 	REQUIRE(VALID_NMSOCK(sock));
690 	REQUIRE(sock->tid == isc_nm_tid());
691 
692 	if (sock->parent != NULL) {
693 		stop_tcp_child(sock);
694 		return;
695 	}
696 
697 	stop_tcp_parent(sock);
698 }
699 
700 void
701 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
702 	REQUIRE(VALID_NMSOCK(sock));
703 	REQUIRE(result != ISC_R_SUCCESS);
704 
705 	isc__nmsocket_timer_stop(sock);
706 	isc__nm_stop_reading(sock);
707 
708 	if (!sock->recv_read) {
709 		goto destroy;
710 	}
711 	sock->recv_read = false;
712 
713 	if (sock->recv_cb != NULL) {
714 		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
715 		isc__nmsocket_clearcb(sock);
716 		isc__nm_readcb(sock, req, result);
717 	}
718 
719 destroy:
720 	isc__nmsocket_prep_destroy(sock);
721 
722 	/*
723 	 * We need to detach from quota after the read callback function had a
724 	 * chance to be executed.
725 	 */
726 	if (sock->quota != NULL) {
727 		isc_quota_detach(&sock->quota);
728 	}
729 }
730 
731 void
732 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
733 	REQUIRE(VALID_NMHANDLE(handle));
734 	REQUIRE(VALID_NMSOCK(handle->sock));
735 
736 	isc_nmsocket_t *sock = handle->sock;
737 	isc__netievent_tcpstartread_t *ievent = NULL;
738 
739 	REQUIRE(sock->type == isc_nm_tcpsocket);
740 	REQUIRE(sock->statichandle == handle);
741 
742 	sock->recv_cb = cb;
743 	sock->recv_cbarg = cbarg;
744 	sock->recv_read = true;
745 	if (sock->read_timeout == 0) {
746 		sock->read_timeout =
747 			(atomic_load(&sock->keepalive)
748 				 ? atomic_load(&sock->mgr->keepalive)
749 				 : atomic_load(&sock->mgr->idle));
750 	}
751 
752 	ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
753 
754 	/*
755 	 * This MUST be done asynchronously, no matter which thread we're
756 	 * in. The callback function for isc_nm_read() often calls
757 	 * isc_nm_read() again; if we tried to do that synchronously
758 	 * we'd clash in processbuffer() and grow the stack indefinitely.
759 	 */
760 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
761 			       (isc__netievent_t *)ievent);
762 
763 	return;
764 }
765 
766 void
767 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) {
768 	isc__netievent_tcpstartread_t *ievent =
769 		(isc__netievent_tcpstartread_t *)ev0;
770 	isc_nmsocket_t *sock = ievent->sock;
771 	isc_result_t result;
772 
773 	REQUIRE(VALID_NMSOCK(sock));
774 	REQUIRE(sock->tid == isc_nm_tid());
775 	UNUSED(worker);
776 
777 	if (isc__nmsocket_closing(sock)) {
778 		result = ISC_R_CANCELED;
779 	} else {
780 		result = isc__nm_start_reading(sock);
781 	}
782 
783 	if (result != ISC_R_SUCCESS) {
784 		atomic_store(&sock->reading, true);
785 		isc__nm_tcp_failed_read_cb(sock, result);
786 		return;
787 	}
788 
789 	isc__nmsocket_timer_start(sock);
790 }
791 
792 void
793 isc__nm_tcp_pauseread(isc_nmhandle_t *handle) {
794 	isc__netievent_tcppauseread_t *ievent = NULL;
795 	isc_nmsocket_t *sock = NULL;
796 
797 	REQUIRE(VALID_NMHANDLE(handle));
798 
799 	sock = handle->sock;
800 
801 	REQUIRE(VALID_NMSOCK(sock));
802 
803 	if (!atomic_compare_exchange_strong(&sock->readpaused, &(bool){ false },
804 					    true))
805 	{
806 		return;
807 	}
808 
809 	ievent = isc__nm_get_netievent_tcppauseread(sock->mgr, sock);
810 
811 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
812 				     (isc__netievent_t *)ievent);
813 
814 	return;
815 }
816 
817 void
818 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0) {
819 	isc__netievent_tcppauseread_t *ievent =
820 		(isc__netievent_tcppauseread_t *)ev0;
821 	isc_nmsocket_t *sock = ievent->sock;
822 
823 	REQUIRE(VALID_NMSOCK(sock));
824 	REQUIRE(sock->tid == isc_nm_tid());
825 	UNUSED(worker);
826 
827 	isc__nmsocket_timer_stop(sock);
828 	isc__nm_stop_reading(sock);
829 }
830 
831 void
832 isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
833 	REQUIRE(VALID_NMHANDLE(handle));
834 	REQUIRE(VALID_NMSOCK(handle->sock));
835 
836 	isc__netievent_tcpstartread_t *ievent = NULL;
837 	isc_nmsocket_t *sock = handle->sock;
838 
839 	REQUIRE(sock->tid == isc_nm_tid());
840 
841 	if (sock->recv_cb == NULL) {
842 		/* We are no longer reading */
843 		return;
844 	}
845 
846 	if (!isc__nmsocket_active(sock)) {
847 		atomic_store(&sock->reading, true);
848 		isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED);
849 		return;
850 	}
851 
852 	if (!atomic_compare_exchange_strong(&sock->readpaused, &(bool){ true },
853 					    false))
854 	{
855 		return;
856 	}
857 
858 	ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
859 
860 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
861 				     (isc__netievent_t *)ievent);
862 }
863 
864 void
865 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
866 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
867 	isc__nm_uvreq_t *req = NULL;
868 
869 	REQUIRE(VALID_NMSOCK(sock));
870 	REQUIRE(sock->tid == isc_nm_tid());
871 	REQUIRE(atomic_load(&sock->reading));
872 	REQUIRE(buf != NULL);
873 
874 	if (isc__nmsocket_closing(sock)) {
875 		isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED);
876 		goto free;
877 	}
878 
879 	if (nread < 0) {
880 		if (nread != UV_EOF) {
881 			isc__nm_incstats(sock, STATID_RECVFAIL);
882 		}
883 
884 		isc__nm_tcp_failed_read_cb(sock, isc__nm_uverr2result(nread));
885 
886 		goto free;
887 	}
888 
889 	req = isc__nm_get_read_req(sock, NULL);
890 
891 	/*
892 	 * The callback will be called synchronously because the
893 	 * result is ISC_R_SUCCESS, so we don't need to retain
894 	 * the buffer
895 	 */
896 	req->uvbuf.base = buf->base;
897 	req->uvbuf.len = nread;
898 
899 	if (!atomic_load(&sock->client)) {
900 		sock->read_timeout =
901 			(atomic_load(&sock->keepalive)
902 				 ? atomic_load(&sock->mgr->keepalive)
903 				 : atomic_load(&sock->mgr->idle));
904 	}
905 
906 	isc__nm_readcb(sock, req, ISC_R_SUCCESS);
907 
908 	/* The readcb could have paused the reading */
909 	if (atomic_load(&sock->reading)) {
910 		/* The timer will be updated */
911 		isc__nmsocket_timer_restart(sock);
912 	}
913 
914 free:
915 	if (nread < 0) {
916 		/*
917 		 * The buffer may be a null buffer on error.
918 		 */
919 		if (buf->base == NULL && buf->len == 0) {
920 			return;
921 		}
922 	}
923 
924 	isc__nm_free_uvbuf(sock, buf);
925 }
926 
927 static void
928 quota_accept_cb(isc_quota_t *quota, void *sock0) {
929 	isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
930 	isc__netievent_tcpaccept_t *ievent = NULL;
931 
932 	REQUIRE(VALID_NMSOCK(sock));
933 
934 	/*
935 	 * Create a tcpaccept event and pass it using the async channel.
936 	 */
937 	ievent = isc__nm_get_netievent_tcpaccept(sock->mgr, sock, quota);
938 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
939 				     (isc__netievent_t *)ievent);
940 }
941 
942 /*
943  * This is called after we get a quota_accept_cb() callback.
944  */
945 void
946 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
947 	isc__netievent_tcpaccept_t *ievent = (isc__netievent_tcpaccept_t *)ev0;
948 	isc_nmsocket_t *sock = ievent->sock;
949 	isc_result_t result;
950 
951 	UNUSED(worker);
952 
953 	REQUIRE(VALID_NMSOCK(sock));
954 	REQUIRE(sock->tid == isc_nm_tid());
955 
956 	result = accept_connection(sock, ievent->quota);
957 	isc__nm_accept_connection_log(result, can_log_tcp_quota());
958 }
959 
960 static isc_result_t
961 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
962 	isc_nmsocket_t *csock = NULL;
963 	isc__networker_t *worker = NULL;
964 	int r;
965 	isc_result_t result;
966 	struct sockaddr_storage ss;
967 	isc_sockaddr_t local;
968 	isc_nmhandle_t *handle = NULL;
969 
970 	REQUIRE(VALID_NMSOCK(ssock));
971 	REQUIRE(ssock->tid == isc_nm_tid());
972 
973 	if (isc__nmsocket_closing(ssock)) {
974 		if (quota != NULL) {
975 			isc_quota_detach(&quota);
976 		}
977 		return (ISC_R_CANCELED);
978 	}
979 
980 	csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
981 	isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpsocket, &ssock->iface);
982 	csock->tid = ssock->tid;
983 	csock->extrahandlesize = ssock->extrahandlesize;
984 	isc__nmsocket_attach(ssock, &csock->server);
985 	csock->recv_cb = ssock->recv_cb;
986 	csock->recv_cbarg = ssock->recv_cbarg;
987 	csock->quota = quota;
988 	atomic_init(&csock->accepting, true);
989 
990 	worker = &csock->mgr->workers[isc_nm_tid()];
991 
992 	r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
993 	UV_RUNTIME_CHECK(uv_tcp_init, r);
994 	uv_handle_set_data(&csock->uv_handle.handle, csock);
995 
996 	r = uv_timer_init(&worker->loop, &csock->read_timer);
997 	UV_RUNTIME_CHECK(uv_timer_init, r);
998 	uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
999 
1000 	r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
1001 	if (r != 0) {
1002 		result = isc__nm_uverr2result(r);
1003 		goto failure;
1004 	}
1005 
1006 	r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
1007 			       &(int){ sizeof(ss) });
1008 	if (r != 0) {
1009 		result = isc__nm_uverr2result(r);
1010 		goto failure;
1011 	}
1012 
1013 	result = isc_sockaddr_fromsockaddr(&csock->peer,
1014 					   (struct sockaddr *)&ss);
1015 	if (result != ISC_R_SUCCESS) {
1016 		goto failure;
1017 	}
1018 
1019 	r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
1020 			       &(int){ sizeof(ss) });
1021 	if (r != 0) {
1022 		result = isc__nm_uverr2result(r);
1023 		goto failure;
1024 	}
1025 
1026 	result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss);
1027 	if (result != ISC_R_SUCCESS) {
1028 		goto failure;
1029 	}
1030 
1031 	handle = isc__nmhandle_get(csock, NULL, &local);
1032 
1033 	result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
1034 	if (result != ISC_R_SUCCESS) {
1035 		isc_nmhandle_detach(&handle);
1036 		goto failure;
1037 	}
1038 
1039 	atomic_store(&csock->accepting, false);
1040 
1041 	isc__nm_incstats(csock, STATID_ACCEPT);
1042 
1043 	csock->read_timeout = atomic_load(&csock->mgr->init);
1044 
1045 	atomic_fetch_add(&ssock->parent->active_child_connections, 1);
1046 
1047 	/*
1048 	 * The acceptcb needs to attach to the handle if it wants to keep the
1049 	 * connection alive
1050 	 */
1051 	isc_nmhandle_detach(&handle);
1052 
1053 	/*
1054 	 * sock is now attached to the handle.
1055 	 */
1056 	isc__nmsocket_detach(&csock);
1057 
1058 	return (ISC_R_SUCCESS);
1059 
1060 failure:
1061 	atomic_store(&csock->active, false);
1062 
1063 	failed_accept_cb(csock, result);
1064 
1065 	isc__nmsocket_prep_destroy(csock);
1066 
1067 	isc__nmsocket_detach(&csock);
1068 
1069 	return (result);
1070 }
1071 
1072 void
1073 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1074 		 isc_nm_cb_t cb, void *cbarg) {
1075 	REQUIRE(VALID_NMHANDLE(handle));
1076 	REQUIRE(VALID_NMSOCK(handle->sock));
1077 
1078 	isc_nmsocket_t *sock = handle->sock;
1079 	isc__netievent_tcpsend_t *ievent = NULL;
1080 	isc__nm_uvreq_t *uvreq = NULL;
1081 
1082 	REQUIRE(sock->type == isc_nm_tcpsocket);
1083 
1084 	uvreq = isc__nm_uvreq_get(sock->mgr, sock);
1085 	uvreq->uvbuf.base = (char *)region->base;
1086 	uvreq->uvbuf.len = region->length;
1087 
1088 	isc_nmhandle_attach(handle, &uvreq->handle);
1089 
1090 	uvreq->cb.send = cb;
1091 	uvreq->cbarg = cbarg;
1092 
1093 	ievent = isc__nm_get_netievent_tcpsend(sock->mgr, sock, uvreq);
1094 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
1095 				     (isc__netievent_t *)ievent);
1096 
1097 	return;
1098 }
1099 
1100 static void
1101 tcp_send_cb(uv_write_t *req, int status) {
1102 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
1103 	isc_nmsocket_t *sock = NULL;
1104 
1105 	REQUIRE(VALID_UVREQ(uvreq));
1106 	REQUIRE(VALID_NMSOCK(uvreq->sock));
1107 
1108 	sock = uvreq->sock;
1109 
1110 	isc_nm_timer_stop(uvreq->timer);
1111 	isc_nm_timer_detach(&uvreq->timer);
1112 
1113 	if (status < 0) {
1114 		isc__nm_incstats(sock, STATID_SENDFAIL);
1115 		isc__nm_failed_send_cb(sock, uvreq,
1116 				       isc__nm_uverr2result(status));
1117 		return;
1118 	}
1119 
1120 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
1121 }
1122 
1123 /*
1124  * Handle 'tcpsend' async event - send a packet on the socket
1125  */
1126 void
1127 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0) {
1128 	isc_result_t result;
1129 	isc__netievent_tcpsend_t *ievent = (isc__netievent_tcpsend_t *)ev0;
1130 	isc_nmsocket_t *sock = ievent->sock;
1131 	isc__nm_uvreq_t *uvreq = ievent->req;
1132 
1133 	REQUIRE(sock->type == isc_nm_tcpsocket);
1134 	REQUIRE(sock->tid == isc_nm_tid());
1135 	UNUSED(worker);
1136 
1137 	if (sock->write_timeout == 0) {
1138 		sock->write_timeout =
1139 			(atomic_load(&sock->keepalive)
1140 				 ? atomic_load(&sock->mgr->keepalive)
1141 				 : atomic_load(&sock->mgr->idle));
1142 	}
1143 
1144 	result = tcp_send_direct(sock, uvreq);
1145 	if (result != ISC_R_SUCCESS) {
1146 		isc__nm_incstats(sock, STATID_SENDFAIL);
1147 		isc__nm_failed_send_cb(sock, uvreq, result);
1148 	}
1149 }
1150 
1151 static isc_result_t
1152 tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
1153 	REQUIRE(VALID_NMSOCK(sock));
1154 	REQUIRE(VALID_UVREQ(req));
1155 	REQUIRE(sock->tid == isc_nm_tid());
1156 	REQUIRE(sock->type == isc_nm_tcpsocket);
1157 
1158 	int r;
1159 
1160 	if (isc__nmsocket_closing(sock)) {
1161 		return (ISC_R_CANCELED);
1162 	}
1163 
1164 	r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, &req->uvbuf,
1165 		     1, tcp_send_cb);
1166 	if (r < 0) {
1167 		return (isc__nm_uverr2result(r));
1168 	}
1169 
1170 	isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb, req,
1171 			    &req->timer);
1172 	if (sock->write_timeout > 0) {
1173 		isc_nm_timer_start(req->timer, sock->write_timeout);
1174 	}
1175 
1176 	return (ISC_R_SUCCESS);
1177 }
1178 
1179 static void
1180 tcp_stop_cb(uv_handle_t *handle) {
1181 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1182 	uv_handle_set_data(handle, NULL);
1183 
1184 	REQUIRE(VALID_NMSOCK(sock));
1185 	REQUIRE(sock->tid == isc_nm_tid());
1186 	REQUIRE(atomic_load(&sock->closing));
1187 
1188 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1189 					    true))
1190 	{
1191 		UNREACHABLE();
1192 	}
1193 
1194 	isc__nm_incstats(sock, STATID_CLOSE);
1195 
1196 	atomic_store(&sock->listening, false);
1197 
1198 	isc__nmsocket_detach(&sock);
1199 }
1200 
1201 static void
1202 tcp_close_sock(isc_nmsocket_t *sock) {
1203 	REQUIRE(VALID_NMSOCK(sock));
1204 	REQUIRE(sock->tid == isc_nm_tid());
1205 	REQUIRE(atomic_load(&sock->closing));
1206 
1207 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1208 					    true))
1209 	{
1210 		UNREACHABLE();
1211 	}
1212 
1213 	isc__nm_incstats(sock, STATID_CLOSE);
1214 
1215 	if (sock->server != NULL) {
1216 		isc__nmsocket_detach(&sock->server);
1217 	}
1218 
1219 	atomic_store(&sock->connected, false);
1220 
1221 	isc__nmsocket_prep_destroy(sock);
1222 }
1223 
1224 static void
1225 tcp_close_cb(uv_handle_t *handle) {
1226 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1227 	uv_handle_set_data(handle, NULL);
1228 
1229 	tcp_close_sock(sock);
1230 }
1231 
1232 static void
1233 read_timer_close_cb(uv_handle_t *handle) {
1234 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1235 	uv_handle_set_data(handle, NULL);
1236 
1237 	if (sock->parent) {
1238 		uv_close(&sock->uv_handle.handle, tcp_stop_cb);
1239 	} else if (uv_is_closing(&sock->uv_handle.handle)) {
1240 		tcp_close_sock(sock);
1241 	} else {
1242 		uv_close(&sock->uv_handle.handle, tcp_close_cb);
1243 	}
1244 }
1245 
1246 static void
1247 stop_tcp_child(isc_nmsocket_t *sock) {
1248 	REQUIRE(sock->type == isc_nm_tcpsocket);
1249 	REQUIRE(sock->tid == isc_nm_tid());
1250 
1251 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1252 					    true))
1253 	{
1254 		return;
1255 	}
1256 
1257 	tcp_close_direct(sock);
1258 
1259 	atomic_fetch_sub(&sock->parent->rchildren, 1);
1260 
1261 	isc_barrier_wait(&sock->parent->stoplistening);
1262 }
1263 
1264 static void
1265 stop_tcp_parent(isc_nmsocket_t *sock) {
1266 	isc_nmsocket_t *csock = NULL;
1267 
1268 	REQUIRE(VALID_NMSOCK(sock));
1269 	REQUIRE(sock->tid == isc_nm_tid());
1270 	REQUIRE(sock->type == isc_nm_tcplistener);
1271 
1272 	isc_barrier_init(&sock->stoplistening, sock->nchildren);
1273 
1274 	for (size_t i = 0; i < sock->nchildren; i++) {
1275 		csock = &sock->children[i];
1276 		REQUIRE(VALID_NMSOCK(csock));
1277 
1278 		if ((int)i == isc_nm_tid()) {
1279 			/*
1280 			 * We need to schedule closing the other sockets first
1281 			 */
1282 			continue;
1283 		}
1284 
1285 		atomic_store(&csock->active, false);
1286 		enqueue_stoplistening(csock);
1287 	}
1288 
1289 	csock = &sock->children[isc_nm_tid()];
1290 	atomic_store(&csock->active, false);
1291 	stop_tcp_child(csock);
1292 
1293 	atomic_store(&sock->closed, true);
1294 	isc__nmsocket_prep_destroy(sock);
1295 }
1296 
1297 static void
1298 tcp_close_direct(isc_nmsocket_t *sock) {
1299 	REQUIRE(VALID_NMSOCK(sock));
1300 	REQUIRE(sock->tid == isc_nm_tid());
1301 	REQUIRE(atomic_load(&sock->closing));
1302 
1303 	if (sock->server != NULL) {
1304 		REQUIRE(VALID_NMSOCK(sock->server));
1305 		REQUIRE(VALID_NMSOCK(sock->server->parent));
1306 		if (sock->server->parent != NULL) {
1307 			atomic_fetch_sub(
1308 				&sock->server->parent->active_child_connections,
1309 				1);
1310 		}
1311 	}
1312 
1313 	if (sock->quota != NULL) {
1314 		isc_quota_detach(&sock->quota);
1315 	}
1316 
1317 	isc__nmsocket_timer_stop(sock);
1318 	isc__nm_stop_reading(sock);
1319 
1320 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
1321 	uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
1322 }
1323 
1324 void
1325 isc__nm_tcp_close(isc_nmsocket_t *sock) {
1326 	REQUIRE(VALID_NMSOCK(sock));
1327 	REQUIRE(sock->type == isc_nm_tcpsocket);
1328 	REQUIRE(!isc__nmsocket_active(sock));
1329 
1330 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1331 					    true))
1332 	{
1333 		return;
1334 	}
1335 
1336 	if (sock->tid == isc_nm_tid()) {
1337 		tcp_close_direct(sock);
1338 	} else {
1339 		/*
1340 		 * We need to create an event and pass it using async channel
1341 		 */
1342 		isc__netievent_tcpclose_t *ievent =
1343 			isc__nm_get_netievent_tcpclose(sock->mgr, sock);
1344 
1345 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1346 				       (isc__netievent_t *)ievent);
1347 	}
1348 }
1349 
1350 void
1351 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0) {
1352 	isc__netievent_tcpclose_t *ievent = (isc__netievent_tcpclose_t *)ev0;
1353 	isc_nmsocket_t *sock = ievent->sock;
1354 
1355 	REQUIRE(VALID_NMSOCK(sock));
1356 	REQUIRE(sock->tid == isc_nm_tid());
1357 
1358 	UNUSED(worker);
1359 
1360 	tcp_close_direct(sock);
1361 }
1362 
1363 static void
1364 tcp_close_connect_cb(uv_handle_t *handle) {
1365 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1366 
1367 	REQUIRE(VALID_NMSOCK(sock));
1368 
1369 	REQUIRE(isc__nm_in_netthread());
1370 	REQUIRE(sock->tid == isc_nm_tid());
1371 
1372 	isc__nmsocket_prep_destroy(sock);
1373 	isc__nmsocket_detach(&sock);
1374 }
1375 
1376 void
1377 isc__nm_tcp_shutdown(isc_nmsocket_t *sock) {
1378 	REQUIRE(VALID_NMSOCK(sock));
1379 	REQUIRE(sock->tid == isc_nm_tid());
1380 	REQUIRE(sock->type == isc_nm_tcpsocket);
1381 
1382 	/*
1383 	 * If the socket is active, mark it inactive and
1384 	 * continue. If it isn't active, stop now.
1385 	 */
1386 	if (!isc__nmsocket_deactivate(sock)) {
1387 		return;
1388 	}
1389 
1390 	if (atomic_load(&sock->accepting)) {
1391 		return;
1392 	}
1393 
1394 	if (atomic_load(&sock->connecting)) {
1395 		isc_nmsocket_t *tsock = NULL;
1396 		isc__nmsocket_attach(sock, &tsock);
1397 		uv_close(&sock->uv_handle.handle, tcp_close_connect_cb);
1398 		return;
1399 	}
1400 
1401 	if (sock->statichandle != NULL) {
1402 		if (isc__nm_closing(sock)) {
1403 			isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
1404 		} else {
1405 			isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
1406 		}
1407 		return;
1408 	}
1409 
1410 	/*
1411 	 * Otherwise, we just send the socket to abyss...
1412 	 */
1413 	if (sock->parent == NULL) {
1414 		isc__nmsocket_prep_destroy(sock);
1415 	}
1416 }
1417 
1418 void
1419 isc__nm_tcp_cancelread(isc_nmhandle_t *handle) {
1420 	isc_nmsocket_t *sock = NULL;
1421 	isc__netievent_tcpcancel_t *ievent = NULL;
1422 
1423 	REQUIRE(VALID_NMHANDLE(handle));
1424 
1425 	sock = handle->sock;
1426 
1427 	REQUIRE(VALID_NMSOCK(sock));
1428 	REQUIRE(sock->type == isc_nm_tcpsocket);
1429 
1430 	ievent = isc__nm_get_netievent_tcpcancel(sock->mgr, sock, handle);
1431 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1432 			       (isc__netievent_t *)ievent);
1433 }
1434 
1435 void
1436 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0) {
1437 	isc__netievent_tcpcancel_t *ievent = (isc__netievent_tcpcancel_t *)ev0;
1438 	isc_nmsocket_t *sock = ievent->sock;
1439 
1440 	REQUIRE(VALID_NMSOCK(sock));
1441 	REQUIRE(sock->tid == isc_nm_tid());
1442 	UNUSED(worker);
1443 
1444 	uv_timer_stop(&sock->read_timer);
1445 
1446 	isc__nm_tcp_failed_read_cb(sock, ISC_R_EOF);
1447 }
1448 
1449 int_fast32_t
1450 isc__nm_tcp_listener_nactive(isc_nmsocket_t *listener) {
1451 	int_fast32_t nactive;
1452 
1453 	REQUIRE(VALID_NMSOCK(listener));
1454 
1455 	nactive = atomic_load(&listener->active_child_connections);
1456 	INSIST(nactive >= 0);
1457 	return (nactive);
1458 }
1459