xref: /netbsd-src/external/mpl/dhcp/bind/dist/lib/isc/netmgr/tcpdns.c (revision 4afad4b7fa6d4a0d3dedf41d1587a7250710ae54)
1 /*	$NetBSD: tcpdns.c,v 1.1 2024/02/18 20:57:55 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #include <libgen.h>
17 #include <unistd.h>
18 #include <uv.h>
19 
20 #include <isc/atomic.h>
21 #include <isc/barrier.h>
22 #include <isc/buffer.h>
23 #include <isc/condition.h>
24 #include <isc/errno.h>
25 #include <isc/log.h>
26 #include <isc/magic.h>
27 #include <isc/mem.h>
28 #include <isc/netmgr.h>
29 #include <isc/quota.h>
30 #include <isc/random.h>
31 #include <isc/refcount.h>
32 #include <isc/region.h>
33 #include <isc/result.h>
34 #include <isc/sockaddr.h>
35 #include <isc/stdtime.h>
36 #include <isc/thread.h>
37 #include <isc/util.h>
38 
39 #include "netmgr-int.h"
40 #include "uv-compat.h"
41 
42 static atomic_uint_fast32_t last_tcpdnsquota_log = 0;
43 
44 static bool
can_log_tcpdns_quota(void)45 can_log_tcpdns_quota(void) {
46 	isc_stdtime_t now, last;
47 
48 	isc_stdtime_get(&now);
49 	last = atomic_exchange_relaxed(&last_tcpdnsquota_log, now);
50 	if (now != last) {
51 		return (true);
52 	}
53 
54 	return (false);
55 }
56 
57 static isc_result_t
58 tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
59 
60 static void
61 tcpdns_close_direct(isc_nmsocket_t *sock);
62 
63 static void
64 tcpdns_connect_cb(uv_connect_t *uvreq, int status);
65 
66 static void
67 tcpdns_connection_cb(uv_stream_t *server, int status);
68 
69 static void
70 tcpdns_close_cb(uv_handle_t *uvhandle);
71 
72 static isc_result_t
73 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
74 
75 static void
76 quota_accept_cb(isc_quota_t *quota, void *sock0);
77 
78 static void
79 stop_tcpdns_parent(isc_nmsocket_t *sock);
80 static void
81 stop_tcpdns_child(isc_nmsocket_t *sock);
82 
83 static isc_result_t
tcpdns_connect_direct(isc_nmsocket_t * sock,isc__nm_uvreq_t * req)84 tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
85 	isc__networker_t *worker = NULL;
86 	isc_result_t result = ISC_R_UNSET;
87 	int r;
88 
89 	REQUIRE(VALID_NMSOCK(sock));
90 	REQUIRE(VALID_UVREQ(req));
91 
92 	REQUIRE(isc__nm_in_netthread());
93 	REQUIRE(sock->tid == isc_nm_tid());
94 
95 	worker = &sock->mgr->workers[sock->tid];
96 
97 	atomic_store(&sock->connecting, true);
98 
99 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
100 	UV_RUNTIME_CHECK(uv_tcp_init, r);
101 	uv_handle_set_data(&sock->uv_handle.handle, sock);
102 
103 	r = uv_timer_init(&worker->loop, &sock->read_timer);
104 	UV_RUNTIME_CHECK(uv_timer_init, r);
105 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
106 
107 	if (isc__nm_closing(sock)) {
108 		result = ISC_R_CANCELED;
109 		goto error;
110 	}
111 
112 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
113 	if (r != 0) {
114 		isc__nm_closesocket(sock->fd);
115 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
116 		goto done;
117 	}
118 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
119 
120 	if (req->local.length != 0) {
121 		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
122 		/*
123 		 * In case of shared socket UV_EINVAL will be returned and needs
124 		 * to be ignored
125 		 */
126 		if (r != 0 && r != UV_EINVAL) {
127 			isc__nm_incstats(sock->mgr,
128 					 sock->statsindex[STATID_BINDFAIL]);
129 			goto done;
130 		}
131 	}
132 
133 	uv_handle_set_data(&req->uv_req.handle, req);
134 	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
135 			   &req->peer.type.sa, tcpdns_connect_cb);
136 	if (r != 0) {
137 		isc__nm_incstats(sock->mgr,
138 				 sock->statsindex[STATID_CONNECTFAIL]);
139 		goto done;
140 	}
141 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]);
142 
143 	uv_handle_set_data((uv_handle_t *)&sock->read_timer,
144 			   &req->uv_req.connect);
145 	isc__nmsocket_timer_start(sock);
146 
147 	atomic_store(&sock->connected, true);
148 
149 done:
150 	result = isc__nm_uverr2result(r);
151 error:
152 	LOCK(&sock->lock);
153 	sock->result = result;
154 	SIGNAL(&sock->cond);
155 	if (!atomic_load(&sock->active)) {
156 		WAIT(&sock->scond, &sock->lock);
157 	}
158 	INSIST(atomic_load(&sock->active));
159 	UNLOCK(&sock->lock);
160 
161 	return (result);
162 }
163 
164 void
isc__nm_async_tcpdnsconnect(isc__networker_t * worker,isc__netievent_t * ev0)165 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
166 	isc__netievent_tcpdnsconnect_t *ievent =
167 		(isc__netievent_tcpdnsconnect_t *)ev0;
168 	isc_nmsocket_t *sock = ievent->sock;
169 	isc__nm_uvreq_t *req = ievent->req;
170 	isc_result_t result = ISC_R_SUCCESS;
171 
172 	UNUSED(worker);
173 
174 	REQUIRE(VALID_NMSOCK(sock));
175 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
176 	REQUIRE(sock->parent == NULL);
177 	REQUIRE(sock->tid == isc_nm_tid());
178 
179 	result = tcpdns_connect_direct(sock, req);
180 	if (result != ISC_R_SUCCESS) {
181 		isc__nmsocket_clearcb(sock);
182 		isc__nm_connectcb(sock, req, result, true);
183 		atomic_store(&sock->active, false);
184 		isc__nm_tcpdns_close(sock);
185 	}
186 
187 	/*
188 	 * The sock is now attached to the handle.
189 	 */
190 	isc__nmsocket_detach(&sock);
191 }
192 
193 static void
tcpdns_connect_cb(uv_connect_t * uvreq,int status)194 tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
195 	isc_result_t result;
196 	isc__nm_uvreq_t *req = NULL;
197 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
198 	struct sockaddr_storage ss;
199 	int r;
200 
201 	REQUIRE(VALID_NMSOCK(sock));
202 	REQUIRE(sock->tid == isc_nm_tid());
203 
204 	isc__nmsocket_timer_stop(sock);
205 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
206 
207 	req = uv_handle_get_data((uv_handle_t *)uvreq);
208 
209 	REQUIRE(VALID_UVREQ(req));
210 	REQUIRE(VALID_NMHANDLE(req->handle));
211 
212 	if (atomic_load(&sock->timedout)) {
213 		result = ISC_R_TIMEDOUT;
214 		goto error;
215 	}
216 
217 	if (isc__nmsocket_closing(sock)) {
218 		/* Socket was closed midflight by isc__nm_tcpdns_shutdown() */
219 		result = ISC_R_CANCELED;
220 		goto error;
221 	} else if (status == UV_ETIMEDOUT) {
222 		/* Timeout status code here indicates hard error */
223 		result = ISC_R_TIMEDOUT;
224 		goto error;
225 	} else if (status != 0) {
226 		result = isc__nm_uverr2result(status);
227 		goto error;
228 	}
229 
230 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]);
231 	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
232 			       &(int){ sizeof(ss) });
233 	if (r != 0) {
234 		result = isc__nm_uverr2result(r);
235 		goto error;
236 	}
237 
238 	atomic_store(&sock->connecting, false);
239 
240 	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
241 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
242 
243 	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
244 
245 	return;
246 
247 error:
248 	isc__nm_failed_connect_cb(sock, req, result, false);
249 }
250 
251 void
isc_nm_tcpdnsconnect(isc_nm_t * mgr,isc_sockaddr_t * local,isc_sockaddr_t * peer,isc_nm_cb_t cb,void * cbarg,unsigned int timeout,size_t extrahandlesize)252 isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
253 		     isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
254 		     size_t extrahandlesize) {
255 	isc_result_t result = ISC_R_SUCCESS;
256 	isc_nmsocket_t *sock = NULL;
257 	isc__netievent_tcpdnsconnect_t *ievent = NULL;
258 	isc__nm_uvreq_t *req = NULL;
259 	sa_family_t sa_family;
260 
261 	REQUIRE(VALID_NM(mgr));
262 	REQUIRE(local != NULL);
263 	REQUIRE(peer != NULL);
264 
265 	sa_family = peer->type.sa.sa_family;
266 
267 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
268 	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnssocket, local);
269 
270 	sock->extrahandlesize = extrahandlesize;
271 	sock->connect_timeout = timeout;
272 	sock->result = ISC_R_UNSET;
273 	atomic_init(&sock->client, true);
274 
275 	req = isc__nm_uvreq_get(mgr, sock);
276 	req->cb.connect = cb;
277 	req->cbarg = cbarg;
278 	req->peer = *peer;
279 	req->local = *local;
280 	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
281 
282 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
283 	if (result != ISC_R_SUCCESS) {
284 		if (isc__nm_in_netthread()) {
285 			sock->tid = isc_nm_tid();
286 		}
287 		isc__nmsocket_clearcb(sock);
288 		isc__nm_connectcb(sock, req, result, true);
289 		atomic_store(&sock->closed, true);
290 		isc__nmsocket_detach(&sock);
291 		return;
292 	}
293 
294 	/* 2 minute timeout */
295 	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
296 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
297 
298 	ievent = isc__nm_get_netievent_tcpdnsconnect(mgr, sock, req);
299 
300 	if (isc__nm_in_netthread()) {
301 		atomic_store(&sock->active, true);
302 		sock->tid = isc_nm_tid();
303 		isc__nm_async_tcpdnsconnect(&mgr->workers[sock->tid],
304 					    (isc__netievent_t *)ievent);
305 		isc__nm_put_netievent_tcpdnsconnect(mgr, ievent);
306 	} else {
307 		atomic_init(&sock->active, false);
308 		sock->tid = isc_random_uniform(mgr->nlisteners);
309 		isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
310 				       (isc__netievent_t *)ievent);
311 	}
312 
313 	LOCK(&sock->lock);
314 	while (sock->result == ISC_R_UNSET) {
315 		WAIT(&sock->cond, &sock->lock);
316 	}
317 	atomic_store(&sock->active, true);
318 	BROADCAST(&sock->scond);
319 	UNLOCK(&sock->lock);
320 }
321 
322 static uv_os_sock_t
isc__nm_tcpdns_lb_socket(isc_nm_t * mgr,sa_family_t sa_family)323 isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
324 	isc_result_t result;
325 	uv_os_sock_t sock;
326 
327 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
328 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
329 
330 	(void)isc__nm_socket_incoming_cpu(sock);
331 
332 	/* FIXME: set mss */
333 
334 	result = isc__nm_socket_reuse(sock);
335 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
336 
337 #ifndef _WIN32
338 	if (mgr->load_balance_sockets) {
339 		result = isc__nm_socket_reuse_lb(sock);
340 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
341 	}
342 #endif
343 
344 	return (sock);
345 }
346 
347 static void
enqueue_stoplistening(isc_nmsocket_t * sock)348 enqueue_stoplistening(isc_nmsocket_t *sock) {
349 	isc__netievent_tcpdnsstop_t *ievent =
350 		isc__nm_get_netievent_tcpdnsstop(sock->mgr, sock);
351 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
352 			       (isc__netievent_t *)ievent);
353 }
354 
355 static void
start_tcpdns_child(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nmsocket_t * sock,uv_os_sock_t fd,int tid)356 start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
357 		   uv_os_sock_t fd, int tid) {
358 	isc__netievent_tcpdnslisten_t *ievent = NULL;
359 	isc_nmsocket_t *csock = &sock->children[tid];
360 
361 	isc__nmsocket_init(csock, mgr, isc_nm_tcpdnssocket, iface);
362 	csock->parent = sock;
363 	csock->accept_cb = sock->accept_cb;
364 	csock->accept_cbarg = sock->accept_cbarg;
365 	csock->recv_cb = sock->recv_cb;
366 	csock->recv_cbarg = sock->recv_cbarg;
367 	csock->extrahandlesize = sock->extrahandlesize;
368 	csock->backlog = sock->backlog;
369 	csock->tid = tid;
370 	/*
371 	 * We don't attach to quota, just assign - to avoid
372 	 * increasing quota unnecessarily.
373 	 */
374 	csock->pquota = sock->pquota;
375 	isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
376 
377 #ifdef _WIN32
378 	UNUSED(fd);
379 	csock->fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family);
380 #else
381 	if (mgr->load_balance_sockets) {
382 		UNUSED(fd);
383 		csock->fd = isc__nm_tcpdns_lb_socket(mgr,
384 						     iface->type.sa.sa_family);
385 	} else {
386 		csock->fd = dup(fd);
387 	}
388 #endif
389 	REQUIRE(csock->fd >= 0);
390 
391 	ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock);
392 	isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
393 				     (isc__netievent_t *)ievent);
394 }
395 isc_result_t
isc_nm_listentcpdns(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nm_recv_cb_t recv_cb,void * recv_cbarg,isc_nm_accept_cb_t accept_cb,void * accept_cbarg,size_t extrahandlesize,int backlog,isc_quota_t * quota,isc_nmsocket_t ** sockp)396 isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface,
397 		    isc_nm_recv_cb_t recv_cb, void *recv_cbarg,
398 		    isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
399 		    size_t extrahandlesize, int backlog, isc_quota_t *quota,
400 		    isc_nmsocket_t **sockp) {
401 	isc_result_t result = ISC_R_SUCCESS;
402 	isc_nmsocket_t *sock = NULL;
403 	size_t children_size = 0;
404 	uv_os_sock_t fd = -1;
405 
406 	REQUIRE(VALID_NM(mgr));
407 
408 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
409 	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface);
410 
411 	atomic_init(&sock->rchildren, 0);
412 #if defined(WIN32)
413 	sock->nchildren = 1;
414 #else
415 	sock->nchildren = mgr->nlisteners;
416 #endif
417 	children_size = sock->nchildren * sizeof(sock->children[0]);
418 	sock->children = isc_mem_get(mgr->mctx, children_size);
419 	memset(sock->children, 0, children_size);
420 
421 	sock->result = ISC_R_UNSET;
422 	sock->accept_cb = accept_cb;
423 	sock->accept_cbarg = accept_cbarg;
424 	sock->recv_cb = recv_cb;
425 	sock->recv_cbarg = recv_cbarg;
426 	sock->extrahandlesize = extrahandlesize;
427 	sock->backlog = backlog;
428 	sock->pquota = quota;
429 
430 	sock->tid = 0;
431 	sock->fd = -1;
432 
433 #ifndef _WIN32
434 	if (!mgr->load_balance_sockets) {
435 		fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family);
436 	}
437 #endif
438 
439 	isc_barrier_init(&sock->startlistening, sock->nchildren);
440 
441 	for (size_t i = 0; i < sock->nchildren; i++) {
442 		if ((int)i == isc_nm_tid()) {
443 			continue;
444 		}
445 		start_tcpdns_child(mgr, iface, sock, fd, i);
446 	}
447 
448 	if (isc__nm_in_netthread()) {
449 		start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid());
450 	}
451 
452 #ifndef _WIN32
453 	if (!mgr->load_balance_sockets) {
454 		isc__nm_closesocket(fd);
455 	}
456 #endif
457 
458 	LOCK(&sock->lock);
459 	while (atomic_load(&sock->rchildren) != sock->nchildren) {
460 		WAIT(&sock->cond, &sock->lock);
461 	}
462 	result = sock->result;
463 	atomic_store(&sock->active, true);
464 	UNLOCK(&sock->lock);
465 
466 	INSIST(result != ISC_R_UNSET);
467 
468 	if (result == ISC_R_SUCCESS) {
469 		REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
470 		*sockp = sock;
471 	} else {
472 		atomic_store(&sock->active, false);
473 		enqueue_stoplistening(sock);
474 		isc_nmsocket_close(&sock);
475 	}
476 
477 	return (result);
478 }
479 
480 void
isc__nm_async_tcpdnslisten(isc__networker_t * worker,isc__netievent_t * ev0)481 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) {
482 	isc__netievent_tcpdnslisten_t *ievent =
483 		(isc__netievent_tcpdnslisten_t *)ev0;
484 	sa_family_t sa_family;
485 	int r;
486 	int flags = 0;
487 	isc_nmsocket_t *sock = NULL;
488 	isc_result_t result = ISC_R_UNSET;
489 	isc_nm_t *mgr = NULL;
490 
491 	REQUIRE(VALID_NMSOCK(ievent->sock));
492 	REQUIRE(ievent->sock->tid == isc_nm_tid());
493 	REQUIRE(VALID_NMSOCK(ievent->sock->parent));
494 
495 	sock = ievent->sock;
496 	sa_family = sock->iface.type.sa.sa_family;
497 	mgr = sock->mgr;
498 
499 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
500 	REQUIRE(sock->parent != NULL);
501 	REQUIRE(sock->tid == isc_nm_tid());
502 
503 	/* TODO: set min mss */
504 
505 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
506 	UV_RUNTIME_CHECK(uv_tcp_init, r);
507 	uv_handle_set_data(&sock->uv_handle.handle, sock);
508 	/* This keeps the socket alive after everything else is gone */
509 	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
510 
511 	r = uv_timer_init(&worker->loop, &sock->read_timer);
512 	UV_RUNTIME_CHECK(uv_timer_init, r);
513 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
514 
515 	LOCK(&sock->parent->lock);
516 
517 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
518 	if (r < 0) {
519 		isc__nm_closesocket(sock->fd);
520 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
521 		goto done;
522 	}
523 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
524 
525 	if (sa_family == AF_INET6) {
526 		flags = UV_TCP_IPV6ONLY;
527 	}
528 
529 #ifdef _WIN32
530 	r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa,
531 				flags);
532 	if (r < 0) {
533 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
534 		goto done;
535 	}
536 #else
537 	if (mgr->load_balance_sockets) {
538 		r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
539 					&sock->iface.type.sa, flags);
540 		if (r < 0) {
541 			isc__nm_incstats(sock->mgr,
542 					 sock->statsindex[STATID_BINDFAIL]);
543 			goto done;
544 		}
545 	} else {
546 		if (sock->parent->fd == -1) {
547 			r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
548 						&sock->iface.type.sa, flags);
549 			if (r < 0) {
550 				isc__nm_incstats(sock->mgr, STATID_BINDFAIL);
551 				goto done;
552 			}
553 			sock->parent->uv_handle.tcp.flags =
554 				sock->uv_handle.tcp.flags;
555 			sock->parent->fd = sock->fd;
556 		} else {
557 			/* The socket is already bound, just copy the flags */
558 			sock->uv_handle.tcp.flags =
559 				sock->parent->uv_handle.tcp.flags;
560 		}
561 	}
562 #endif
563 
564 	/*
565 	 * The callback will run in the same thread uv_listen() was called
566 	 * from, so a race with tcpdns_connection_cb() isn't possible.
567 	 */
568 	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
569 		      tcpdns_connection_cb);
570 	if (r != 0) {
571 		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
572 			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
573 			      "uv_listen failed: %s",
574 			      isc_result_totext(isc__nm_uverr2result(r)));
575 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
576 		goto done;
577 	}
578 
579 	atomic_store(&sock->listening, true);
580 
581 done:
582 	result = isc__nm_uverr2result(r);
583 	if (result != ISC_R_SUCCESS) {
584 		sock->pquota = NULL;
585 	}
586 
587 	atomic_fetch_add(&sock->parent->rchildren, 1);
588 	if (sock->parent->result == ISC_R_UNSET) {
589 		sock->parent->result = result;
590 	}
591 	SIGNAL(&sock->parent->cond);
592 	UNLOCK(&sock->parent->lock);
593 
594 	isc_barrier_wait(&sock->parent->startlistening);
595 }
596 
597 static void
tcpdns_connection_cb(uv_stream_t * server,int status)598 tcpdns_connection_cb(uv_stream_t *server, int status) {
599 	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
600 	isc_result_t result;
601 	isc_quota_t *quota = NULL;
602 
603 	if (status != 0) {
604 		result = isc__nm_uverr2result(status);
605 		goto done;
606 	}
607 
608 	REQUIRE(VALID_NMSOCK(ssock));
609 	REQUIRE(ssock->tid == isc_nm_tid());
610 
611 	if (isc__nmsocket_closing(ssock)) {
612 		result = ISC_R_CANCELED;
613 		goto done;
614 	}
615 
616 	if (ssock->pquota != NULL) {
617 		result = isc_quota_attach_cb(ssock->pquota, &quota,
618 					     &ssock->quotacb);
619 		if (result == ISC_R_QUOTA) {
620 			isc__nm_incstats(ssock->mgr,
621 					 ssock->statsindex[STATID_ACCEPTFAIL]);
622 			goto done;
623 		}
624 	}
625 
626 	result = accept_connection(ssock, quota);
627 done:
628 	isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
629 }
630 
631 void
isc__nm_tcpdns_stoplistening(isc_nmsocket_t * sock)632 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) {
633 	REQUIRE(VALID_NMSOCK(sock));
634 	REQUIRE(sock->type == isc_nm_tcpdnslistener);
635 
636 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
637 					    true))
638 	{
639 		UNREACHABLE();
640 	}
641 
642 	if (!isc__nm_in_netthread()) {
643 		enqueue_stoplistening(sock);
644 	} else {
645 		stop_tcpdns_parent(sock);
646 	}
647 }
648 
649 void
isc__nm_async_tcpdnsstop(isc__networker_t * worker,isc__netievent_t * ev0)650 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
651 	isc__netievent_tcpdnsstop_t *ievent =
652 		(isc__netievent_tcpdnsstop_t *)ev0;
653 	isc_nmsocket_t *sock = ievent->sock;
654 
655 	UNUSED(worker);
656 
657 	REQUIRE(VALID_NMSOCK(sock));
658 	REQUIRE(sock->tid == isc_nm_tid());
659 
660 	if (sock->parent != NULL) {
661 		stop_tcpdns_child(sock);
662 		return;
663 	}
664 
665 	stop_tcpdns_parent(sock);
666 }
667 
668 void
isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result)669 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
670 	REQUIRE(VALID_NMSOCK(sock));
671 	REQUIRE(result != ISC_R_SUCCESS);
672 
673 	isc__nmsocket_timer_stop(sock);
674 	isc__nm_stop_reading(sock);
675 
676 	if (!sock->recv_read) {
677 		goto destroy;
678 	}
679 	sock->recv_read = false;
680 
681 	if (sock->recv_cb != NULL) {
682 		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
683 		isc__nmsocket_clearcb(sock);
684 		isc__nm_readcb(sock, req, result);
685 	}
686 
687 destroy:
688 	isc__nmsocket_prep_destroy(sock);
689 
690 	/*
691 	 * We need to detach from quota after the read callback function had a
692 	 * chance to be executed.
693 	 */
694 	if (sock->quota != NULL) {
695 		isc_quota_detach(&sock->quota);
696 	}
697 }
698 
699 void
isc__nm_tcpdns_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)700 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
701 	REQUIRE(VALID_NMHANDLE(handle));
702 	REQUIRE(VALID_NMSOCK(handle->sock));
703 
704 	isc_nmsocket_t *sock = handle->sock;
705 	isc__netievent_tcpdnsread_t *ievent = NULL;
706 
707 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
708 	REQUIRE(sock->statichandle == handle);
709 	REQUIRE(sock->tid == isc_nm_tid());
710 	REQUIRE(!sock->recv_read);
711 
712 	sock->recv_cb = cb;
713 	sock->recv_cbarg = cbarg;
714 	sock->recv_read = true;
715 	if (sock->read_timeout == 0) {
716 		sock->read_timeout =
717 			(atomic_load(&sock->keepalive)
718 				 ? atomic_load(&sock->mgr->keepalive)
719 				 : atomic_load(&sock->mgr->idle));
720 	}
721 
722 	ievent = isc__nm_get_netievent_tcpdnsread(sock->mgr, sock);
723 
724 	/*
725 	 * This MUST be done asynchronously, no matter which thread we're
726 	 * in. The callback function for isc_nm_read() often calls
727 	 * isc_nm_read() again; if we tried to do that synchronously
728 	 * we'd clash in processbuffer() and grow the stack indefinitely.
729 	 */
730 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
731 			       (isc__netievent_t *)ievent);
732 
733 	return;
734 }
735 
736 void
isc__nm_async_tcpdnsread(isc__networker_t * worker,isc__netievent_t * ev0)737 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
738 	isc__netievent_tcpdnsread_t *ievent =
739 		(isc__netievent_tcpdnsread_t *)ev0;
740 	isc_nmsocket_t *sock = ievent->sock;
741 	isc_result_t result;
742 
743 	UNUSED(worker);
744 
745 	REQUIRE(VALID_NMSOCK(sock));
746 	REQUIRE(sock->tid == isc_nm_tid());
747 
748 	if (isc__nmsocket_closing(sock)) {
749 		result = ISC_R_CANCELED;
750 	} else {
751 		result = isc__nm_process_sock_buffer(sock);
752 	}
753 
754 	if (result != ISC_R_SUCCESS) {
755 		sock->reading = true;
756 		isc__nm_failed_read_cb(sock, result, false);
757 	}
758 }
759 
760 /*
761  * Process a single packet from the incoming buffer.
762  *
763  * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
764  * was processed; return ISC_R_NOMORE if there isn't a full message
765  * to be processed.
766  *
767  * The caller will need to unreference the handle.
768  */
769 isc_result_t
isc__nm_tcpdns_processbuffer(isc_nmsocket_t * sock)770 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock) {
771 	size_t len;
772 	isc__nm_uvreq_t *req = NULL;
773 	isc_nmhandle_t *handle = NULL;
774 
775 	REQUIRE(VALID_NMSOCK(sock));
776 	REQUIRE(sock->tid == isc_nm_tid());
777 
778 	if (isc__nmsocket_closing(sock)) {
779 		return (ISC_R_CANCELED);
780 	}
781 
782 	/*
783 	 * If we don't even have the length yet, we can't do
784 	 * anything.
785 	 */
786 	if (sock->buf_len < 2) {
787 		return (ISC_R_NOMORE);
788 	}
789 
790 	/*
791 	 * Process the first packet from the buffer, leaving
792 	 * the rest (if any) for later.
793 	 */
794 	len = ntohs(*(uint16_t *)sock->buf);
795 	if (len > sock->buf_len - 2) {
796 		return (ISC_R_NOMORE);
797 	}
798 
799 	req = isc__nm_get_read_req(sock, NULL);
800 	REQUIRE(VALID_UVREQ(req));
801 
802 	/*
803 	 * We need to launch the resume_processing after the buffer has
804 	 * been consumed, thus we need to delay the detaching the handle.
805 	 */
806 	isc_nmhandle_attach(req->handle, &handle);
807 
808 	/*
809 	 * The callback will be called synchronously because the
810 	 * result is ISC_R_SUCCESS, so we don't need to have
811 	 * the buffer on the heap
812 	 */
813 	req->uvbuf.base = (char *)sock->buf + 2;
814 	req->uvbuf.len = len;
815 
816 	/*
817 	 * If isc__nm_tcpdns_read() was called, it will be satisfied by single
818 	 * DNS message in the next call.
819 	 */
820 	sock->recv_read = false;
821 
822 	/*
823 	 * The assertion failure here means that there's a errnoneous extra
824 	 * nmhandle detach happening in the callback and resume_processing gets
825 	 * called while we are still processing the buffer.
826 	 */
827 	REQUIRE(sock->processing == false);
828 	sock->processing = true;
829 	isc__nm_readcb(sock, req, ISC_R_SUCCESS);
830 	sock->processing = false;
831 
832 	len += 2;
833 	sock->buf_len -= len;
834 	if (sock->buf_len > 0) {
835 		memmove(sock->buf, sock->buf + len, sock->buf_len);
836 	}
837 
838 	isc_nmhandle_detach(&handle);
839 
840 	return (ISC_R_SUCCESS);
841 }
842 
843 void
isc__nm_tcpdns_read_cb(uv_stream_t * stream,ssize_t nread,const uv_buf_t * buf)844 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
845 		       const uv_buf_t *buf) {
846 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
847 	uint8_t *base = NULL;
848 	size_t len;
849 	isc_result_t result;
850 
851 	REQUIRE(VALID_NMSOCK(sock));
852 	REQUIRE(sock->tid == isc_nm_tid());
853 	REQUIRE(sock->reading);
854 	REQUIRE(buf != NULL);
855 
856 	if (isc__nmsocket_closing(sock)) {
857 		isc__nm_failed_read_cb(sock, ISC_R_CANCELED, true);
858 		goto free;
859 	}
860 
861 	if (nread < 0) {
862 		if (nread != UV_EOF) {
863 			isc__nm_incstats(sock->mgr,
864 					 sock->statsindex[STATID_RECVFAIL]);
865 		}
866 
867 		isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nread), true);
868 		goto free;
869 	}
870 
871 	base = (uint8_t *)buf->base;
872 	len = nread;
873 
874 	/*
875 	 * FIXME: We can avoid the memmove here if we know we have received full
876 	 * packet; e.g. we should be smarter, a.s. there are just few situations
877 	 *
878 	 * The tcp_alloc_buf should be smarter and point the uv_read_start to
879 	 * the position where previous read has ended in the sock->buf, that way
880 	 * the data could be read directly into sock->buf.
881 	 */
882 
883 	if (sock->buf_len + len > sock->buf_size) {
884 		isc__nm_alloc_dnsbuf(sock, sock->buf_len + len);
885 	}
886 	memmove(sock->buf + sock->buf_len, base, len);
887 	sock->buf_len += len;
888 
889 	if (!atomic_load(&sock->client)) {
890 		sock->read_timeout = atomic_load(&sock->mgr->idle);
891 	}
892 
893 	result = isc__nm_process_sock_buffer(sock);
894 	if (result != ISC_R_SUCCESS) {
895 		isc__nm_failed_read_cb(sock, result, true);
896 	}
897 free:
898 	if (nread < 0) {
899 		/*
900 		 * The buffer may be a null buffer on error.
901 		 */
902 		if (buf->base == NULL && buf->len == 0) {
903 			return;
904 		}
905 	}
906 
907 	isc__nm_free_uvbuf(sock, buf);
908 }
909 
910 static void
quota_accept_cb(isc_quota_t * quota,void * sock0)911 quota_accept_cb(isc_quota_t *quota, void *sock0) {
912 	isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
913 
914 	REQUIRE(VALID_NMSOCK(sock));
915 
916 	/*
917 	 * Create a tcpdnsaccept event and pass it using the async channel.
918 	 */
919 
920 	isc__netievent_tcpdnsaccept_t *ievent =
921 		isc__nm_get_netievent_tcpdnsaccept(sock->mgr, sock, quota);
922 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
923 				     (isc__netievent_t *)ievent);
924 }
925 
926 /*
927  * This is called after we get a quota_accept_cb() callback.
928  */
929 void
isc__nm_async_tcpdnsaccept(isc__networker_t * worker,isc__netievent_t * ev0)930 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
931 	isc__netievent_tcpdnsaccept_t *ievent =
932 		(isc__netievent_tcpdnsaccept_t *)ev0;
933 	isc_result_t result;
934 
935 	UNUSED(worker);
936 
937 	REQUIRE(VALID_NMSOCK(ievent->sock));
938 	REQUIRE(ievent->sock->tid == isc_nm_tid());
939 
940 	result = accept_connection(ievent->sock, ievent->quota);
941 	isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
942 }
943 
944 static isc_result_t
accept_connection(isc_nmsocket_t * ssock,isc_quota_t * quota)945 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
946 	isc_nmsocket_t *csock = NULL;
947 	isc__networker_t *worker = NULL;
948 	int r;
949 	isc_result_t result;
950 	struct sockaddr_storage peer_ss;
951 	struct sockaddr_storage local_ss;
952 	isc_sockaddr_t local;
953 	isc_nmhandle_t *handle = NULL;
954 
955 	REQUIRE(VALID_NMSOCK(ssock));
956 	REQUIRE(ssock->tid == isc_nm_tid());
957 
958 	if (isc__nmsocket_closing(ssock)) {
959 		if (quota != NULL) {
960 			isc_quota_detach(&quota);
961 		}
962 		return (ISC_R_CANCELED);
963 	}
964 
965 	REQUIRE(ssock->accept_cb != NULL);
966 
967 	csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
968 	isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpdnssocket,
969 			   &ssock->iface);
970 	csock->tid = ssock->tid;
971 	csock->extrahandlesize = ssock->extrahandlesize;
972 	isc__nmsocket_attach(ssock, &csock->server);
973 	csock->recv_cb = ssock->recv_cb;
974 	csock->recv_cbarg = ssock->recv_cbarg;
975 	csock->quota = quota;
976 	csock->accepting = true;
977 
978 	worker = &csock->mgr->workers[csock->tid];
979 
980 	r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
981 	UV_RUNTIME_CHECK(uv_tcp_init, r);
982 	uv_handle_set_data(&csock->uv_handle.handle, csock);
983 
984 	r = uv_timer_init(&worker->loop, &csock->read_timer);
985 	UV_RUNTIME_CHECK(uv_timer_init, r);
986 	uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
987 
988 	r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
989 	if (r != 0) {
990 		result = isc__nm_uverr2result(r);
991 		goto failure;
992 	}
993 
994 	r = uv_tcp_getpeername(&csock->uv_handle.tcp,
995 			       (struct sockaddr *)&peer_ss,
996 			       &(int){ sizeof(peer_ss) });
997 	if (r != 0) {
998 		result = isc__nm_uverr2result(r);
999 		goto failure;
1000 	}
1001 
1002 	result = isc_sockaddr_fromsockaddr(&csock->peer,
1003 					   (struct sockaddr *)&peer_ss);
1004 	if (result != ISC_R_SUCCESS) {
1005 		goto failure;
1006 	}
1007 
1008 	r = uv_tcp_getsockname(&csock->uv_handle.tcp,
1009 			       (struct sockaddr *)&local_ss,
1010 			       &(int){ sizeof(local_ss) });
1011 	if (r != 0) {
1012 		result = isc__nm_uverr2result(r);
1013 		goto failure;
1014 	}
1015 
1016 	result = isc_sockaddr_fromsockaddr(&local,
1017 					   (struct sockaddr *)&local_ss);
1018 	if (result != ISC_R_SUCCESS) {
1019 		goto failure;
1020 	}
1021 
1022 	/*
1023 	 * The handle will be either detached on acceptcb failure or in the
1024 	 * readcb.
1025 	 */
1026 	handle = isc__nmhandle_get(csock, NULL, &local);
1027 
1028 	result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
1029 	if (result != ISC_R_SUCCESS) {
1030 		isc_nmhandle_detach(&handle);
1031 		goto failure;
1032 	}
1033 
1034 	csock->accepting = false;
1035 
1036 	isc__nm_incstats(csock->mgr, csock->statsindex[STATID_ACCEPT]);
1037 
1038 	csock->read_timeout = atomic_load(&csock->mgr->init);
1039 
1040 	csock->closehandle_cb = isc__nm_resume_processing;
1041 
1042 	/*
1043 	 * We need to keep the handle alive until we fail to read or connection
1044 	 * is closed by the other side, it will be detached via
1045 	 * prep_destroy()->tcpdns_close_direct().
1046 	 */
1047 	isc_nmhandle_attach(handle, &csock->recv_handle);
1048 	result = isc__nm_process_sock_buffer(csock);
1049 	if (result != ISC_R_SUCCESS) {
1050 		isc_nmhandle_detach(&csock->recv_handle);
1051 		isc_nmhandle_detach(&handle);
1052 		goto failure;
1053 	}
1054 
1055 	/*
1056 	 * The initial timer has been set, update the read timeout for the next
1057 	 * reads.
1058 	 */
1059 	csock->read_timeout = (atomic_load(&csock->keepalive)
1060 				       ? atomic_load(&csock->mgr->keepalive)
1061 				       : atomic_load(&csock->mgr->idle));
1062 
1063 	isc_nmhandle_detach(&handle);
1064 
1065 	/*
1066 	 * sock is now attached to the handle.
1067 	 */
1068 	isc__nmsocket_detach(&csock);
1069 
1070 	return (ISC_R_SUCCESS);
1071 
1072 failure:
1073 
1074 	atomic_store(&csock->active, false);
1075 
1076 	isc__nm_failed_accept_cb(csock, result);
1077 
1078 	isc__nmsocket_prep_destroy(csock);
1079 
1080 	isc__nmsocket_detach(&csock);
1081 
1082 	return (result);
1083 }
1084 
1085 void
isc__nm_tcpdns_send(isc_nmhandle_t * handle,isc_region_t * region,isc_nm_cb_t cb,void * cbarg)1086 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1087 		    isc_nm_cb_t cb, void *cbarg) {
1088 	REQUIRE(VALID_NMHANDLE(handle));
1089 	REQUIRE(VALID_NMSOCK(handle->sock));
1090 
1091 	isc_nmsocket_t *sock = handle->sock;
1092 	isc__netievent_tcpdnssend_t *ievent = NULL;
1093 	isc__nm_uvreq_t *uvreq = NULL;
1094 
1095 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1096 
1097 	uvreq = isc__nm_uvreq_get(sock->mgr, sock);
1098 	*(uint16_t *)uvreq->tcplen = htons(region->length);
1099 	uvreq->uvbuf.base = (char *)region->base;
1100 	uvreq->uvbuf.len = region->length;
1101 
1102 	isc_nmhandle_attach(handle, &uvreq->handle);
1103 
1104 	uvreq->cb.send = cb;
1105 	uvreq->cbarg = cbarg;
1106 
1107 	ievent = isc__nm_get_netievent_tcpdnssend(sock->mgr, sock, uvreq);
1108 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
1109 				     (isc__netievent_t *)ievent);
1110 
1111 	return;
1112 }
1113 
1114 static void
tcpdns_send_cb(uv_write_t * req,int status)1115 tcpdns_send_cb(uv_write_t *req, int status) {
1116 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
1117 	isc_nmsocket_t *sock = NULL;
1118 
1119 	REQUIRE(VALID_UVREQ(uvreq));
1120 	REQUIRE(VALID_NMSOCK(uvreq->sock));
1121 
1122 	sock = uvreq->sock;
1123 
1124 	isc_nm_timer_stop(uvreq->timer);
1125 	isc_nm_timer_detach(&uvreq->timer);
1126 
1127 	if (status < 0) {
1128 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
1129 		isc__nm_failed_send_cb(sock, uvreq,
1130 				       isc__nm_uverr2result(status));
1131 		return;
1132 	}
1133 
1134 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
1135 }
1136 
1137 /*
1138  * Handle 'tcpsend' async event - send a packet on the socket
1139  */
1140 void
isc__nm_async_tcpdnssend(isc__networker_t * worker,isc__netievent_t * ev0)1141 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
1142 	isc__netievent_tcpdnssend_t *ievent =
1143 		(isc__netievent_tcpdnssend_t *)ev0;
1144 
1145 	REQUIRE(VALID_UVREQ(ievent->req));
1146 	REQUIRE(VALID_NMSOCK(ievent->sock));
1147 	REQUIRE(ievent->sock->type == isc_nm_tcpdnssocket);
1148 	REQUIRE(ievent->sock->tid == isc_nm_tid());
1149 
1150 	isc_result_t result;
1151 	isc_nmsocket_t *sock = ievent->sock;
1152 	isc__nm_uvreq_t *uvreq = ievent->req;
1153 
1154 	if (sock->write_timeout == 0) {
1155 		sock->write_timeout =
1156 			(atomic_load(&sock->keepalive)
1157 				 ? atomic_load(&sock->mgr->keepalive)
1158 				 : atomic_load(&sock->mgr->idle));
1159 	}
1160 
1161 	uv_buf_t bufs[2] = { { .base = uvreq->tcplen, .len = 2 },
1162 			     { .base = uvreq->uvbuf.base,
1163 			       .len = uvreq->uvbuf.len } };
1164 	int nbufs = 2;
1165 	int r;
1166 
1167 	UNUSED(worker);
1168 
1169 	if (isc__nmsocket_closing(sock)) {
1170 		result = ISC_R_CANCELED;
1171 		goto fail;
1172 	}
1173 
1174 	r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
1175 
1176 	if (r == (int)(bufs[0].len + bufs[1].len)) {
1177 		/* Wrote everything */
1178 		isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, true);
1179 		return;
1180 	}
1181 
1182 	if (r == 1) {
1183 		/* Partial write of DNSMSG length */
1184 		bufs[0].base = uvreq->tcplen + 1;
1185 		bufs[0].len = 1;
1186 	} else if (r > 0) {
1187 		/* Partial write of DNSMSG */
1188 		nbufs = 1;
1189 		bufs[0].base = uvreq->uvbuf.base + (r - 2);
1190 		bufs[0].len = uvreq->uvbuf.len - (r - 2);
1191 	} else if (r == UV_ENOSYS || r == UV_EAGAIN) {
1192 		/* uv_try_write not supported, send asynchronously */
1193 	} else {
1194 		/* error sending data */
1195 		result = isc__nm_uverr2result(r);
1196 		goto fail;
1197 	}
1198 
1199 	r = uv_write(&uvreq->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
1200 		     tcpdns_send_cb);
1201 	if (r < 0) {
1202 		result = isc__nm_uverr2result(r);
1203 		goto fail;
1204 	}
1205 
1206 	isc_nm_timer_create(uvreq->handle, isc__nmsocket_writetimeout_cb, uvreq,
1207 			    &uvreq->timer);
1208 	if (sock->write_timeout > 0) {
1209 		isc_nm_timer_start(uvreq->timer, sock->write_timeout);
1210 	}
1211 
1212 	return;
1213 fail:
1214 	if (result != ISC_R_SUCCESS) {
1215 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
1216 		isc__nm_failed_send_cb(sock, uvreq, result);
1217 	}
1218 }
1219 
1220 static void
tcpdns_stop_cb(uv_handle_t * handle)1221 tcpdns_stop_cb(uv_handle_t *handle) {
1222 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1223 
1224 	REQUIRE(VALID_NMSOCK(sock));
1225 	REQUIRE(sock->tid == isc_nm_tid());
1226 	REQUIRE(atomic_load(&sock->closing));
1227 
1228 	uv_handle_set_data(handle, NULL);
1229 
1230 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1231 					    true))
1232 	{
1233 		UNREACHABLE();
1234 	}
1235 
1236 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
1237 
1238 	atomic_store(&sock->listening, false);
1239 
1240 	isc__nmsocket_detach(&sock);
1241 }
1242 
1243 static void
tcpdns_close_sock(isc_nmsocket_t * sock)1244 tcpdns_close_sock(isc_nmsocket_t *sock) {
1245 	REQUIRE(VALID_NMSOCK(sock));
1246 	REQUIRE(sock->tid == isc_nm_tid());
1247 	REQUIRE(atomic_load(&sock->closing));
1248 
1249 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1250 					    true))
1251 	{
1252 		UNREACHABLE();
1253 	}
1254 
1255 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
1256 
1257 	if (sock->server != NULL) {
1258 		isc__nmsocket_detach(&sock->server);
1259 	}
1260 
1261 	atomic_store(&sock->connected, false);
1262 
1263 	isc__nmsocket_prep_destroy(sock);
1264 }
1265 
1266 static void
tcpdns_close_cb(uv_handle_t * handle)1267 tcpdns_close_cb(uv_handle_t *handle) {
1268 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1269 
1270 	uv_handle_set_data(handle, NULL);
1271 
1272 	tcpdns_close_sock(sock);
1273 }
1274 
1275 static void
read_timer_close_cb(uv_handle_t * timer)1276 read_timer_close_cb(uv_handle_t *timer) {
1277 	isc_nmsocket_t *sock = uv_handle_get_data(timer);
1278 	uv_handle_set_data(timer, NULL);
1279 
1280 	REQUIRE(VALID_NMSOCK(sock));
1281 
1282 	if (sock->parent) {
1283 		uv_close(&sock->uv_handle.handle, tcpdns_stop_cb);
1284 	} else if (uv_is_closing(&sock->uv_handle.handle)) {
1285 		tcpdns_close_sock(sock);
1286 	} else {
1287 		uv_close(&sock->uv_handle.handle, tcpdns_close_cb);
1288 	}
1289 }
1290 
1291 static void
stop_tcpdns_child(isc_nmsocket_t * sock)1292 stop_tcpdns_child(isc_nmsocket_t *sock) {
1293 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1294 	REQUIRE(sock->tid == isc_nm_tid());
1295 
1296 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1297 					    true))
1298 	{
1299 		return;
1300 	}
1301 
1302 	tcpdns_close_direct(sock);
1303 
1304 	atomic_fetch_sub(&sock->parent->rchildren, 1);
1305 
1306 	isc_barrier_wait(&sock->parent->stoplistening);
1307 }
1308 
1309 static void
stop_tcpdns_parent(isc_nmsocket_t * sock)1310 stop_tcpdns_parent(isc_nmsocket_t *sock) {
1311 	isc_nmsocket_t *csock = NULL;
1312 
1313 	REQUIRE(VALID_NMSOCK(sock));
1314 	REQUIRE(sock->tid == isc_nm_tid());
1315 	REQUIRE(sock->type == isc_nm_tcpdnslistener);
1316 
1317 	isc_barrier_init(&sock->stoplistening, sock->nchildren);
1318 
1319 	for (size_t i = 0; i < sock->nchildren; i++) {
1320 		csock = &sock->children[i];
1321 		REQUIRE(VALID_NMSOCK(csock));
1322 
1323 		if ((int)i == isc_nm_tid()) {
1324 			/*
1325 			 * We need to schedule closing the other sockets first
1326 			 */
1327 			continue;
1328 		}
1329 
1330 		atomic_store(&csock->active, false);
1331 		enqueue_stoplistening(csock);
1332 	}
1333 
1334 	csock = &sock->children[isc_nm_tid()];
1335 	atomic_store(&csock->active, false);
1336 	stop_tcpdns_child(csock);
1337 
1338 	atomic_store(&sock->closed, true);
1339 	isc__nmsocket_prep_destroy(sock);
1340 }
1341 
1342 static void
tcpdns_close_direct(isc_nmsocket_t * sock)1343 tcpdns_close_direct(isc_nmsocket_t *sock) {
1344 	REQUIRE(VALID_NMSOCK(sock));
1345 	REQUIRE(sock->tid == isc_nm_tid());
1346 	REQUIRE(atomic_load(&sock->closing));
1347 
1348 	if (sock->quota != NULL) {
1349 		isc_quota_detach(&sock->quota);
1350 	}
1351 
1352 	if (sock->recv_handle != NULL) {
1353 		isc_nmhandle_detach(&sock->recv_handle);
1354 	}
1355 
1356 	isc__nmsocket_timer_stop(sock);
1357 	isc__nm_stop_reading(sock);
1358 
1359 	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
1360 	uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
1361 }
1362 
1363 void
isc__nm_tcpdns_close(isc_nmsocket_t * sock)1364 isc__nm_tcpdns_close(isc_nmsocket_t *sock) {
1365 	REQUIRE(VALID_NMSOCK(sock));
1366 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1367 	REQUIRE(!isc__nmsocket_active(sock));
1368 
1369 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1370 					    true))
1371 	{
1372 		return;
1373 	}
1374 
1375 	if (sock->tid == isc_nm_tid()) {
1376 		tcpdns_close_direct(sock);
1377 	} else {
1378 		/*
1379 		 * We need to create an event and pass it using async channel
1380 		 */
1381 		isc__netievent_tcpdnsclose_t *ievent =
1382 			isc__nm_get_netievent_tcpdnsclose(sock->mgr, sock);
1383 
1384 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1385 				       (isc__netievent_t *)ievent);
1386 	}
1387 }
1388 
1389 void
isc__nm_async_tcpdnsclose(isc__networker_t * worker,isc__netievent_t * ev0)1390 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
1391 	isc__netievent_tcpdnsclose_t *ievent =
1392 		(isc__netievent_tcpdnsclose_t *)ev0;
1393 	isc_nmsocket_t *sock = ievent->sock;
1394 
1395 	UNUSED(worker);
1396 
1397 	REQUIRE(VALID_NMSOCK(sock));
1398 	REQUIRE(sock->tid == isc_nm_tid());
1399 
1400 	tcpdns_close_direct(sock);
1401 }
1402 
1403 static void
tcpdns_close_connect_cb(uv_handle_t * handle)1404 tcpdns_close_connect_cb(uv_handle_t *handle) {
1405 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1406 
1407 	REQUIRE(VALID_NMSOCK(sock));
1408 
1409 	REQUIRE(isc__nm_in_netthread());
1410 	REQUIRE(sock->tid == isc_nm_tid());
1411 
1412 	isc__nmsocket_prep_destroy(sock);
1413 	isc__nmsocket_detach(&sock);
1414 }
1415 
1416 void
isc__nm_tcpdns_shutdown(isc_nmsocket_t * sock)1417 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock) {
1418 	REQUIRE(VALID_NMSOCK(sock));
1419 	REQUIRE(sock->tid == isc_nm_tid());
1420 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1421 
1422 	/*
1423 	 * If the socket is active, mark it inactive and
1424 	 * continue. If it isn't active, stop now.
1425 	 */
1426 	if (!isc__nmsocket_deactivate(sock)) {
1427 		return;
1428 	}
1429 
1430 	if (sock->accepting) {
1431 		return;
1432 	}
1433 
1434 	if (atomic_load(&sock->connecting)) {
1435 		isc_nmsocket_t *tsock = NULL;
1436 		isc__nmsocket_attach(sock, &tsock);
1437 		uv_close(&sock->uv_handle.handle, tcpdns_close_connect_cb);
1438 		return;
1439 	}
1440 
1441 	if (sock->statichandle != NULL) {
1442 		isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
1443 		return;
1444 	}
1445 
1446 	/*
1447 	 * Otherwise, we just send the socket to abyss...
1448 	 */
1449 	if (sock->parent == NULL) {
1450 		isc__nmsocket_prep_destroy(sock);
1451 	}
1452 }
1453 
1454 void
isc__nm_tcpdns_cancelread(isc_nmhandle_t * handle)1455 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle) {
1456 	isc_nmsocket_t *sock = NULL;
1457 	isc__netievent_tcpdnscancel_t *ievent = NULL;
1458 
1459 	REQUIRE(VALID_NMHANDLE(handle));
1460 
1461 	sock = handle->sock;
1462 
1463 	REQUIRE(VALID_NMSOCK(sock));
1464 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1465 
1466 	ievent = isc__nm_get_netievent_tcpdnscancel(sock->mgr, sock, handle);
1467 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1468 			       (isc__netievent_t *)ievent);
1469 }
1470 
1471 void
isc__nm_async_tcpdnscancel(isc__networker_t * worker,isc__netievent_t * ev0)1472 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
1473 	isc__netievent_tcpdnscancel_t *ievent =
1474 		(isc__netievent_tcpdnscancel_t *)ev0;
1475 	isc_nmsocket_t *sock = ievent->sock;
1476 
1477 	UNUSED(worker);
1478 
1479 	REQUIRE(VALID_NMSOCK(sock));
1480 	REQUIRE(sock->tid == isc_nm_tid());
1481 
1482 	isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
1483 }
1484 
1485 void
isc_nm_tcpdns_sequential(isc_nmhandle_t * handle)1486 isc_nm_tcpdns_sequential(isc_nmhandle_t *handle) {
1487 	isc_nmsocket_t *sock = NULL;
1488 
1489 	REQUIRE(VALID_NMHANDLE(handle));
1490 	REQUIRE(VALID_NMSOCK(handle->sock));
1491 	REQUIRE(handle->sock->type == isc_nm_tcpdnssocket);
1492 
1493 	sock = handle->sock;
1494 
1495 	/*
1496 	 * We don't want pipelining on this connection. That means
1497 	 * that we need to pause after reading each request, and
1498 	 * resume only after the request has been processed. This
1499 	 * is done in resume_processing(), which is the socket's
1500 	 * closehandle_cb callback, called whenever a handle
1501 	 * is released.
1502 	 */
1503 
1504 	isc__nmsocket_timer_stop(sock);
1505 	isc__nm_stop_reading(sock);
1506 	atomic_store(&sock->sequential, true);
1507 }
1508