1 /* $NetBSD: netmgr.c,v 1.1 2024/02/18 20:57:55 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16 #include <inttypes.h>
17 #include <unistd.h>
18 #include <uv.h>
19 #ifdef HAVE_LIBCTRACE
20 #include <execinfo.h>
21 #endif /* ifdef HAVE_LIBCTRACE */
22
23 #include <isc/atomic.h>
24 #include <isc/barrier.h>
25 #include <isc/buffer.h>
26 #include <isc/condition.h>
27 #include <isc/errno.h>
28 #include <isc/list.h>
29 #include <isc/log.h>
30 #include <isc/magic.h>
31 #include <isc/mem.h>
32 #include <isc/netmgr.h>
33 #include <isc/print.h>
34 #include <isc/quota.h>
35 #include <isc/random.h>
36 #include <isc/refcount.h>
37 #include <isc/region.h>
38 #include <isc/result.h>
39 #include <isc/sockaddr.h>
40 #include <isc/stats.h>
41 #include <isc/strerr.h>
42 #include <isc/task.h>
43 #include <isc/thread.h>
44 #include <isc/util.h>
45
46 #include "netmgr-int.h"
47 #include "netmgr_p.h"
48 #include "openssl_shim.h"
49 #include "trampoline_p.h"
50 #include "uv-compat.h"
51
52 /*%
53 * How many isc_nmhandles and isc_nm_uvreqs will we be
54 * caching for reuse in a socket.
55 */
56 #define ISC_NM_HANDLES_STACK_SIZE 600
57 #define ISC_NM_REQS_STACK_SIZE 600
58
59 /*%
60 * Shortcut index arrays to get access to statistics counters.
61 */
62
63 static const isc_statscounter_t udp4statsindex[] = {
64 isc_sockstatscounter_udp4open,
65 isc_sockstatscounter_udp4openfail,
66 isc_sockstatscounter_udp4close,
67 isc_sockstatscounter_udp4bindfail,
68 isc_sockstatscounter_udp4connectfail,
69 isc_sockstatscounter_udp4connect,
70 -1,
71 -1,
72 isc_sockstatscounter_udp4sendfail,
73 isc_sockstatscounter_udp4recvfail,
74 isc_sockstatscounter_udp4active
75 };
76
77 static const isc_statscounter_t udp6statsindex[] = {
78 isc_sockstatscounter_udp6open,
79 isc_sockstatscounter_udp6openfail,
80 isc_sockstatscounter_udp6close,
81 isc_sockstatscounter_udp6bindfail,
82 isc_sockstatscounter_udp6connectfail,
83 isc_sockstatscounter_udp6connect,
84 -1,
85 -1,
86 isc_sockstatscounter_udp6sendfail,
87 isc_sockstatscounter_udp6recvfail,
88 isc_sockstatscounter_udp6active
89 };
90
91 static const isc_statscounter_t tcp4statsindex[] = {
92 isc_sockstatscounter_tcp4open, isc_sockstatscounter_tcp4openfail,
93 isc_sockstatscounter_tcp4close, isc_sockstatscounter_tcp4bindfail,
94 isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect,
95 isc_sockstatscounter_tcp4acceptfail, isc_sockstatscounter_tcp4accept,
96 isc_sockstatscounter_tcp4sendfail, isc_sockstatscounter_tcp4recvfail,
97 isc_sockstatscounter_tcp4active
98 };
99
100 static const isc_statscounter_t tcp6statsindex[] = {
101 isc_sockstatscounter_tcp6open, isc_sockstatscounter_tcp6openfail,
102 isc_sockstatscounter_tcp6close, isc_sockstatscounter_tcp6bindfail,
103 isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect,
104 isc_sockstatscounter_tcp6acceptfail, isc_sockstatscounter_tcp6accept,
105 isc_sockstatscounter_tcp6sendfail, isc_sockstatscounter_tcp6recvfail,
106 isc_sockstatscounter_tcp6active
107 };
108
109 #if 0
110 /* XXX: not currently used */
111 static const isc_statscounter_t unixstatsindex[] = {
112 isc_sockstatscounter_unixopen,
113 isc_sockstatscounter_unixopenfail,
114 isc_sockstatscounter_unixclose,
115 isc_sockstatscounter_unixbindfail,
116 isc_sockstatscounter_unixconnectfail,
117 isc_sockstatscounter_unixconnect,
118 isc_sockstatscounter_unixacceptfail,
119 isc_sockstatscounter_unixaccept,
120 isc_sockstatscounter_unixsendfail,
121 isc_sockstatscounter_unixrecvfail,
122 isc_sockstatscounter_unixactive
123 };
124 #endif /* if 0 */
125
126 /*
127 * libuv is not thread safe, but has mechanisms to pass messages
128 * between threads. Each socket is owned by a thread. For UDP
129 * sockets we have a set of sockets for each interface and we can
130 * choose a sibling and send the message directly. For TCP, or if
131 * we're calling from a non-networking thread, we need to pass the
132 * request using async_cb.
133 */
134
135 #if defined(HAVE_THREAD_LOCAL)
136 #include <threads.h>
137 static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
138 #elif defined(HAVE___THREAD)
139 static __thread int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
140 #elif HAVE___DECLSPEC_THREAD
141 __declspec(thread) int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
142 #endif /* if defined(HAVE_THREAD_LOCAL) */
143
144 static void
145 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG);
146 static void
147 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle);
148 static isc_threadresult_t
149 nm_thread(isc_threadarg_t worker0);
150 static void
151 async_cb(uv_async_t *handle);
152
153 static bool
154 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent);
155 static isc_result_t
156 process_queue(isc__networker_t *worker, netievent_type_t type);
157 static void
158 wait_for_priority_queue(isc__networker_t *worker);
159 static void
160 drain_queue(isc__networker_t *worker, netievent_type_t type);
161
162 static void
163 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0);
164 static void
165 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0);
166 static void
167 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0);
168 static void
169 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0);
170 static void
171 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0);
172
173 static void
174 isc__nm_threadpool_initialize(uint32_t workers);
175 static void
176 isc__nm_work_cb(uv_work_t *req);
177 static void
178 isc__nm_after_work_cb(uv_work_t *req, int status);
179
180 void
181 isc__nmsocket_reset(isc_nmsocket_t *sock);
182
183 /*%<
184 * Issue a 'handle closed' callback on the socket.
185 */
186
187 static void
188 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG);
189
190 int
isc_nm_tid(void)191 isc_nm_tid(void) {
192 return (isc__nm_tid_v);
193 }
194
195 bool
isc__nm_in_netthread(void)196 isc__nm_in_netthread(void) {
197 return (isc__nm_tid_v >= 0);
198 }
199
200 #ifdef WIN32
201 static void
isc__nm_winsock_initialize(void)202 isc__nm_winsock_initialize(void) {
203 WORD wVersionRequested = MAKEWORD(2, 2);
204 WSADATA wsaData;
205 int result;
206
207 result = WSAStartup(wVersionRequested, &wsaData);
208 if (result != 0) {
209 char strbuf[ISC_STRERRORSIZE];
210 strerror_r(result, strbuf, sizeof(strbuf));
211 UNEXPECTED_ERROR(__FILE__, __LINE__,
212 "WSAStartup() failed with error code %lu: %s",
213 result, strbuf);
214 }
215
216 /*
217 * Confirm that the WinSock DLL supports version 2.2. Note that if the
218 * DLL supports versions greater than 2.2 in addition to 2.2, it will
219 * still return 2.2 in wVersion since that is the version we requested.
220 */
221 if (LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 2) {
222 UNEXPECTED_ERROR(__FILE__, __LINE__,
223 "Unusable WinSock DLL version: %u.%u",
224 LOBYTE(wsaData.wVersion),
225 HIBYTE(wsaData.wVersion));
226 }
227 }
228
229 static void
isc__nm_winsock_destroy(void)230 isc__nm_winsock_destroy(void) {
231 WSACleanup();
232 }
233 #endif /* WIN32 */
234
235 static void
isc__nm_threadpool_initialize(uint32_t nworkers)236 isc__nm_threadpool_initialize(uint32_t nworkers) {
237 char buf[11];
238 int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf,
239 &(size_t){ sizeof(buf) });
240 if (r == UV_ENOENT) {
241 snprintf(buf, sizeof(buf), "%" PRIu32, nworkers);
242 uv_os_setenv("UV_THREADPOOL_SIZE", buf);
243 }
244 }
245
246 #if HAVE_DECL_UV_UDP_MMSG_FREE
247 #define MINIMAL_UV_VERSION UV_VERSION(1, 40, 0)
248 #elif HAVE_DECL_UV_UDP_RECVMMSG
249 #define MAXIMAL_UV_VERSION UV_VERSION(1, 39, 99)
250 #define MINIMAL_UV_VERSION UV_VERSION(1, 37, 0)
251 #elif _WIN32
252 #define MINIMAL_UV_VERSION UV_VERSION(1, 0, 0)
253 #else
254 #define MAXIMAL_UV_VERSION UV_VERSION(1, 34, 99)
255 #define MINIMAL_UV_VERSION UV_VERSION(1, 0, 0)
256 #endif
257
258 void
isc__netmgr_create(isc_mem_t * mctx,uint32_t nworkers,isc_nm_t ** netmgrp)259 isc__netmgr_create(isc_mem_t *mctx, uint32_t nworkers, isc_nm_t **netmgrp) {
260 isc_nm_t *mgr = NULL;
261 char name[32];
262
263 REQUIRE(nworkers > 0);
264
265 #ifdef MAXIMAL_UV_VERSION
266 if (uv_version() > MAXIMAL_UV_VERSION) {
267 isc_error_fatal(__FILE__, __LINE__,
268 "libuv version too new: running with libuv %s "
269 "when compiled with libuv %s will lead to "
270 "libuv failures",
271 uv_version_string(), UV_VERSION_STRING);
272 }
273 #endif /* MAXIMAL_UV_VERSION */
274
275 if (uv_version() < MINIMAL_UV_VERSION) {
276 isc_error_fatal(__FILE__, __LINE__,
277 "libuv version too old: running with libuv %s "
278 "when compiled with libuv %s will lead to "
279 "libuv failures",
280 uv_version_string(), UV_VERSION_STRING);
281 }
282
283 #ifdef WIN32
284 isc__nm_winsock_initialize();
285 #endif /* WIN32 */
286
287 isc__nm_threadpool_initialize(nworkers);
288
289 mgr = isc_mem_get(mctx, sizeof(*mgr));
290 *mgr = (isc_nm_t){
291 .nworkers = nworkers * 2,
292 .nlisteners = nworkers,
293 };
294
295 isc_mem_attach(mctx, &mgr->mctx);
296 isc_mutex_init(&mgr->lock);
297 isc_condition_init(&mgr->wkstatecond);
298 isc_condition_init(&mgr->wkpausecond);
299 isc_refcount_init(&mgr->references, 1);
300 atomic_init(&mgr->maxudp, 0);
301 atomic_init(&mgr->interlocked, ISC_NETMGR_NON_INTERLOCKED);
302 atomic_init(&mgr->workers_paused, 0);
303 atomic_init(&mgr->paused, false);
304 atomic_init(&mgr->closing, false);
305 #if HAVE_SO_REUSEPORT_LB
306 mgr->load_balance_sockets = true;
307 #else
308 mgr->load_balance_sockets = false;
309 #endif
310
311 #ifdef NETMGR_TRACE
312 ISC_LIST_INIT(mgr->active_sockets);
313 #endif
314
315 /*
316 * Default TCP timeout values.
317 * May be updated by isc_nm_tcptimeouts().
318 */
319 atomic_init(&mgr->init, 30000);
320 atomic_init(&mgr->idle, 30000);
321 atomic_init(&mgr->keepalive, 30000);
322 atomic_init(&mgr->advertised, 30000);
323
324 isc_barrier_init(&mgr->pausing, mgr->nworkers);
325 isc_barrier_init(&mgr->resuming, mgr->nworkers);
326
327 mgr->workers = isc_mem_get(mctx,
328 mgr->nworkers * sizeof(isc__networker_t));
329 for (int i = 0; i < mgr->nworkers; i++) {
330 isc__networker_t *worker = &mgr->workers[i];
331 int r;
332
333 *worker = (isc__networker_t){
334 .mgr = mgr,
335 .id = i,
336 };
337
338 r = uv_loop_init(&worker->loop);
339 UV_RUNTIME_CHECK(uv_loop_init, r);
340
341 worker->loop.data = &mgr->workers[i];
342
343 r = uv_async_init(&worker->loop, &worker->async, async_cb);
344 UV_RUNTIME_CHECK(uv_async_init, r);
345
346 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
347 isc_mutex_init(&worker->ievents[type].lock);
348 isc_condition_init(&worker->ievents[type].cond);
349 ISC_LIST_INIT(worker->ievents[type].list);
350 }
351
352 worker->recvbuf = isc_mem_get(mctx, ISC_NETMGR_RECVBUF_SIZE);
353 worker->sendbuf = isc_mem_get(mctx, ISC_NETMGR_SENDBUF_SIZE);
354
355 /*
356 * We need to do this here and not in nm_thread to avoid a
357 * race - we could exit isc_nm_start, launch nm_destroy,
358 * and nm_thread would still not be up.
359 */
360 mgr->workers_running++;
361 isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread);
362
363 snprintf(name, sizeof(name), "net-%d", i);
364 isc_thread_setname(worker->thread, name);
365 }
366
367 mgr->magic = NM_MAGIC;
368 *netmgrp = mgr;
369 }
370
371 /*
372 * Free the resources of the network manager.
373 */
374 static void
nm_destroy(isc_nm_t ** mgr0)375 nm_destroy(isc_nm_t **mgr0) {
376 REQUIRE(VALID_NM(*mgr0));
377 REQUIRE(!isc__nm_in_netthread());
378
379 isc_nm_t *mgr = *mgr0;
380 *mgr0 = NULL;
381
382 isc_refcount_destroy(&mgr->references);
383
384 mgr->magic = 0;
385
386 for (int i = 0; i < mgr->nworkers; i++) {
387 isc__networker_t *worker = &mgr->workers[i];
388 isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
389 isc__nm_enqueue_ievent(worker, event);
390 }
391
392 LOCK(&mgr->lock);
393 while (mgr->workers_running > 0) {
394 WAIT(&mgr->wkstatecond, &mgr->lock);
395 }
396 UNLOCK(&mgr->lock);
397
398 for (int i = 0; i < mgr->nworkers; i++) {
399 isc__networker_t *worker = &mgr->workers[i];
400 int r;
401
402 r = uv_loop_close(&worker->loop);
403 UV_RUNTIME_CHECK(uv_loop_close, r);
404
405 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
406 INSIST(ISC_LIST_EMPTY(worker->ievents[type].list));
407 isc_condition_destroy(&worker->ievents[type].cond);
408 isc_mutex_destroy(&worker->ievents[type].lock);
409 }
410
411 isc_mem_put(mgr->mctx, worker->sendbuf,
412 ISC_NETMGR_SENDBUF_SIZE);
413 isc_mem_put(mgr->mctx, worker->recvbuf,
414 ISC_NETMGR_RECVBUF_SIZE);
415 isc_thread_join(worker->thread, NULL);
416 }
417
418 if (mgr->stats != NULL) {
419 isc_stats_detach(&mgr->stats);
420 }
421
422 isc_barrier_destroy(&mgr->resuming);
423 isc_barrier_destroy(&mgr->pausing);
424
425 isc_condition_destroy(&mgr->wkstatecond);
426 isc_condition_destroy(&mgr->wkpausecond);
427 isc_mutex_destroy(&mgr->lock);
428
429 isc_mem_put(mgr->mctx, mgr->workers,
430 mgr->nworkers * sizeof(isc__networker_t));
431 isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
432
433 #ifdef WIN32
434 isc__nm_winsock_destroy();
435 #endif /* WIN32 */
436 }
437
438 static void
enqueue_pause(isc__networker_t * worker)439 enqueue_pause(isc__networker_t *worker) {
440 isc__netievent_pause_t *event =
441 isc__nm_get_netievent_pause(worker->mgr);
442 isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
443 }
444
445 static void
isc__nm_async_pause(isc__networker_t * worker,isc__netievent_t * ev0)446 isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
447 UNUSED(ev0);
448 REQUIRE(worker->paused == false);
449
450 worker->paused = true;
451 uv_stop(&worker->loop);
452 }
453
454 void
isc_nm_pause(isc_nm_t * mgr)455 isc_nm_pause(isc_nm_t *mgr) {
456 REQUIRE(VALID_NM(mgr));
457 REQUIRE(!atomic_load(&mgr->paused));
458
459 isc__nm_acquire_interlocked_force(mgr);
460
461 if (isc__nm_in_netthread()) {
462 REQUIRE(isc_nm_tid() == 0);
463 }
464
465 for (int i = 0; i < mgr->nworkers; i++) {
466 isc__networker_t *worker = &mgr->workers[i];
467 if (i == isc_nm_tid()) {
468 isc__nm_async_pause(worker, NULL);
469 } else {
470 enqueue_pause(worker);
471 }
472 }
473
474 if (isc__nm_in_netthread()) {
475 atomic_fetch_add(&mgr->workers_paused, 1);
476 isc_barrier_wait(&mgr->pausing);
477 }
478
479 LOCK(&mgr->lock);
480 while (atomic_load(&mgr->workers_paused) != mgr->workers_running) {
481 WAIT(&mgr->wkstatecond, &mgr->lock);
482 }
483 UNLOCK(&mgr->lock);
484
485 REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ false },
486 true));
487 }
488
489 static void
enqueue_resume(isc__networker_t * worker)490 enqueue_resume(isc__networker_t *worker) {
491 isc__netievent_resume_t *event =
492 isc__nm_get_netievent_resume(worker->mgr);
493 isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
494 }
495
496 static void
isc__nm_async_resume(isc__networker_t * worker,isc__netievent_t * ev0)497 isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
498 UNUSED(ev0);
499 REQUIRE(worker->paused == true);
500
501 worker->paused = false;
502 }
503
504 void
isc_nm_resume(isc_nm_t * mgr)505 isc_nm_resume(isc_nm_t *mgr) {
506 REQUIRE(VALID_NM(mgr));
507 REQUIRE(atomic_load(&mgr->paused));
508
509 if (isc__nm_in_netthread()) {
510 REQUIRE(isc_nm_tid() == 0);
511 drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIORITY);
512 }
513
514 for (int i = 0; i < mgr->nworkers; i++) {
515 isc__networker_t *worker = &mgr->workers[i];
516 if (i == isc_nm_tid()) {
517 isc__nm_async_resume(worker, NULL);
518 } else {
519 enqueue_resume(worker);
520 }
521 }
522
523 if (isc__nm_in_netthread()) {
524 drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIVILEGED);
525
526 atomic_fetch_sub(&mgr->workers_paused, 1);
527 isc_barrier_wait(&mgr->resuming);
528 }
529
530 LOCK(&mgr->lock);
531 while (atomic_load(&mgr->workers_paused) != 0) {
532 WAIT(&mgr->wkstatecond, &mgr->lock);
533 }
534 UNLOCK(&mgr->lock);
535
536 REQUIRE(atomic_compare_exchange_strong(&mgr->paused, &(bool){ true },
537 false));
538
539 isc__nm_drop_interlocked(mgr);
540 }
541
542 void
isc_nm_attach(isc_nm_t * mgr,isc_nm_t ** dst)543 isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
544 REQUIRE(VALID_NM(mgr));
545 REQUIRE(dst != NULL && *dst == NULL);
546
547 isc_refcount_increment(&mgr->references);
548
549 *dst = mgr;
550 }
551
552 void
isc_nm_detach(isc_nm_t ** mgr0)553 isc_nm_detach(isc_nm_t **mgr0) {
554 isc_nm_t *mgr = NULL;
555
556 REQUIRE(mgr0 != NULL);
557 REQUIRE(VALID_NM(*mgr0));
558
559 mgr = *mgr0;
560 *mgr0 = NULL;
561
562 if (isc_refcount_decrement(&mgr->references) == 1) {
563 nm_destroy(&mgr);
564 }
565 }
566
567 void
isc__netmgr_shutdown(isc_nm_t * mgr)568 isc__netmgr_shutdown(isc_nm_t *mgr) {
569 REQUIRE(VALID_NM(mgr));
570
571 atomic_store(&mgr->closing, true);
572 for (int i = 0; i < mgr->nworkers; i++) {
573 isc__netievent_t *event = NULL;
574 event = isc__nm_get_netievent_shutdown(mgr);
575 isc__nm_enqueue_ievent(&mgr->workers[i], event);
576 }
577 }
578
579 void
isc__netmgr_destroy(isc_nm_t ** netmgrp)580 isc__netmgr_destroy(isc_nm_t **netmgrp) {
581 isc_nm_t *mgr = NULL;
582 int counter = 0;
583
584 REQUIRE(VALID_NM(*netmgrp));
585
586 mgr = *netmgrp;
587
588 /*
589 * Close active connections.
590 */
591 isc__netmgr_shutdown(mgr);
592
593 /*
594 * Wait for the manager to be dereferenced elsewhere.
595 */
596 while (isc_refcount_current(&mgr->references) > 1 && counter++ < 1000) {
597 uv_sleep(10);
598 }
599
600 #ifdef NETMGR_TRACE
601 if (isc_refcount_current(&mgr->references) > 1) {
602 isc__nm_dump_active(mgr);
603 UNREACHABLE();
604 }
605 #endif
606
607 /*
608 * Now just patiently wait
609 */
610 while (isc_refcount_current(&mgr->references) > 1) {
611 uv_sleep(10);
612 }
613
614 /*
615 * Detach final reference.
616 */
617 isc_nm_detach(netmgrp);
618 }
619
620 void
isc_nm_maxudp(isc_nm_t * mgr,uint32_t maxudp)621 isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {
622 REQUIRE(VALID_NM(mgr));
623
624 atomic_store(&mgr->maxudp, maxudp);
625 }
626
627 void
isc_nmhandle_setwritetimeout(isc_nmhandle_t * handle,uint64_t write_timeout)628 isc_nmhandle_setwritetimeout(isc_nmhandle_t *handle, uint64_t write_timeout) {
629 REQUIRE(VALID_NMHANDLE(handle));
630 REQUIRE(VALID_NMSOCK(handle->sock));
631
632 handle->sock->write_timeout = write_timeout;
633 }
634
635 void
isc_nm_settimeouts(isc_nm_t * mgr,uint32_t init,uint32_t idle,uint32_t keepalive,uint32_t advertised)636 isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle,
637 uint32_t keepalive, uint32_t advertised) {
638 REQUIRE(VALID_NM(mgr));
639
640 atomic_store(&mgr->init, init);
641 atomic_store(&mgr->idle, idle);
642 atomic_store(&mgr->keepalive, keepalive);
643 atomic_store(&mgr->advertised, advertised);
644 }
645
646 bool
isc_nm_getloadbalancesockets(isc_nm_t * mgr)647 isc_nm_getloadbalancesockets(isc_nm_t *mgr) {
648 REQUIRE(VALID_NM(mgr));
649
650 return (mgr->load_balance_sockets);
651 }
652
653 void
isc_nm_setloadbalancesockets(isc_nm_t * mgr,bool enabled)654 isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled) {
655 REQUIRE(VALID_NM(mgr));
656
657 #if HAVE_SO_REUSEPORT_LB
658 mgr->load_balance_sockets = enabled;
659 #else
660 UNUSED(enabled);
661 #endif
662 }
663
664 void
isc_nm_gettimeouts(isc_nm_t * mgr,uint32_t * initial,uint32_t * idle,uint32_t * keepalive,uint32_t * advertised)665 isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle,
666 uint32_t *keepalive, uint32_t *advertised) {
667 REQUIRE(VALID_NM(mgr));
668
669 if (initial != NULL) {
670 *initial = atomic_load(&mgr->init);
671 }
672
673 if (idle != NULL) {
674 *idle = atomic_load(&mgr->idle);
675 }
676
677 if (keepalive != NULL) {
678 *keepalive = atomic_load(&mgr->keepalive);
679 }
680
681 if (advertised != NULL) {
682 *advertised = atomic_load(&mgr->advertised);
683 }
684 }
685
686 /*
687 * nm_thread is a single worker thread, that runs uv_run event loop
688 * until asked to stop.
689 *
690 * There are four queues for asynchronous events:
691 *
692 * 1. priority queue - netievents on the priority queue are run even when
693 * the taskmgr enters exclusive mode and the netmgr is paused. This
694 * is needed to properly start listening on the interfaces, free
695 * resources on shutdown, or resume from a pause.
696 *
697 * 2. privileged task queue - only privileged tasks are queued here and
698 * this is the first queue that gets processed when network manager
699 * is unpaused using isc_nm_resume(). All netmgr workers need to
700 * clean the privileged task queue before they all proceed to normal
701 * operation. Both task queues are processed when the workers are
702 * shutting down.
703 *
704 * 3. task queue - only (traditional) tasks are scheduled here, and this
705 * queue and the privileged task queue are both processed when the
706 * netmgr workers are finishing. This is needed to process the task
707 * shutdown events.
708 *
709 * 4. normal queue - this is the queue with netmgr events, e.g. reading,
710 * sending, callbacks, etc.
711 */
712
713 static isc_threadresult_t
nm_thread(isc_threadarg_t worker0)714 nm_thread(isc_threadarg_t worker0) {
715 isc__networker_t *worker = (isc__networker_t *)worker0;
716 isc_nm_t *mgr = worker->mgr;
717
718 isc__nm_tid_v = worker->id;
719
720 while (true) {
721 /*
722 * uv_run() runs async_cb() in a loop, which processes
723 * all four event queues until a "pause" or "stop" event
724 * is encountered. On pause, we process only priority and
725 * privileged events until resuming.
726 */
727 int r = uv_run(&worker->loop, UV_RUN_DEFAULT);
728 INSIST(r > 0 || worker->finished);
729
730 if (worker->paused) {
731 INSIST(atomic_load(&mgr->interlocked) != isc_nm_tid());
732
733 atomic_fetch_add(&mgr->workers_paused, 1);
734 if (isc_barrier_wait(&mgr->pausing) != 0) {
735 LOCK(&mgr->lock);
736 SIGNAL(&mgr->wkstatecond);
737 UNLOCK(&mgr->lock);
738 }
739
740 while (worker->paused) {
741 wait_for_priority_queue(worker);
742 }
743
744 /*
745 * All workers must drain the privileged event
746 * queue before we resume from pause.
747 */
748 drain_queue(worker, NETIEVENT_PRIVILEGED);
749
750 atomic_fetch_sub(&mgr->workers_paused, 1);
751 if (isc_barrier_wait(&mgr->resuming) != 0) {
752 LOCK(&mgr->lock);
753 SIGNAL(&mgr->wkstatecond);
754 UNLOCK(&mgr->lock);
755 }
756 }
757
758 if (r == 0) {
759 INSIST(worker->finished);
760 break;
761 }
762
763 INSIST(!worker->finished);
764 }
765
766 /*
767 * We are shutting down. Drain the queues.
768 */
769 drain_queue(worker, NETIEVENT_PRIVILEGED);
770 drain_queue(worker, NETIEVENT_TASK);
771
772 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
773 LOCK(&worker->ievents[type].lock);
774 INSIST(ISC_LIST_EMPTY(worker->ievents[type].list));
775 UNLOCK(&worker->ievents[type].lock);
776 }
777
778 LOCK(&mgr->lock);
779 mgr->workers_running--;
780 SIGNAL(&mgr->wkstatecond);
781 UNLOCK(&mgr->lock);
782
783 return ((isc_threadresult_t)0);
784 }
785
786 static bool
process_all_queues(isc__networker_t * worker)787 process_all_queues(isc__networker_t *worker) {
788 bool reschedule = false;
789 /*
790 * The queue processing functions will return false when the
791 * system is pausing or stopping and we don't want to process
792 * the other queues in such case, but we need the async event
793 * to be rescheduled in the next uv_run().
794 */
795 for (size_t type = 0; type < NETIEVENT_MAX; type++) {
796 isc_result_t result = process_queue(worker, type);
797 switch (result) {
798 case ISC_R_SUSPEND:
799 reschedule = true;
800 break;
801 case ISC_R_EMPTY:
802 /* empty queue */
803 break;
804 case ISC_R_SUCCESS:
805 reschedule = true;
806 break;
807 default:
808 UNREACHABLE();
809 }
810 }
811
812 return (reschedule);
813 }
814
815 /*
816 * async_cb() is a universal callback for 'async' events sent to event loop.
817 * It's the only way to safely pass data to the libuv event loop. We use a
818 * single async event and a set of lockless queues of 'isc__netievent_t'
819 * structures passed from other threads.
820 */
821 static void
async_cb(uv_async_t * handle)822 async_cb(uv_async_t *handle) {
823 isc__networker_t *worker = (isc__networker_t *)handle->loop->data;
824
825 if (process_all_queues(worker)) {
826 /*
827 * If we didn't process all the events, we need to enqueue
828 * async_cb to be run in the next iteration of the uv_loop
829 */
830 uv_async_send(handle);
831 }
832 }
833
834 static void
isc__nm_async_stop(isc__networker_t * worker,isc__netievent_t * ev0)835 isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
836 UNUSED(ev0);
837 worker->finished = true;
838 /* Close the async handler */
839 uv_close((uv_handle_t *)&worker->async, NULL);
840 }
841
842 void
isc_nm_task_enqueue(isc_nm_t * nm,isc_task_t * task,int threadid)843 isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) {
844 isc__netievent_t *event = NULL;
845 int tid;
846 isc__networker_t *worker = NULL;
847
848 if (threadid == -1) {
849 tid = (int)isc_random_uniform(nm->nlisteners);
850 } else if (threadid == ISC_NM_TASK_SLOW_OFFSET) {
851 tid = nm->nlisteners +
852 (int)isc_random_uniform(nm->nworkers - nm->nlisteners);
853 } else if (threadid < ISC_NM_TASK_SLOW_OFFSET) {
854 tid = nm->nlisteners + (ISC_NM_TASK_SLOW(threadid) %
855 (nm->nworkers - nm->nlisteners));
856 } else {
857 tid = threadid % nm->nlisteners;
858 }
859
860 worker = &nm->workers[tid];
861
862 if (isc_task_privileged(task)) {
863 event = (isc__netievent_t *)
864 isc__nm_get_netievent_privilegedtask(nm, task);
865 } else {
866 event = (isc__netievent_t *)isc__nm_get_netievent_task(nm,
867 task);
868 }
869
870 isc__nm_enqueue_ievent(worker, event);
871 }
872
873 #define isc__nm_async_privilegedtask(worker, ev0) \
874 isc__nm_async_task(worker, ev0)
875
876 static void
isc__nm_async_task(isc__networker_t * worker,isc__netievent_t * ev0)877 isc__nm_async_task(isc__networker_t *worker, isc__netievent_t *ev0) {
878 isc__netievent_task_t *ievent = (isc__netievent_task_t *)ev0;
879 isc_result_t result;
880
881 UNUSED(worker);
882
883 result = isc_task_run(ievent->task);
884
885 switch (result) {
886 case ISC_R_QUOTA:
887 isc_task_ready(ievent->task);
888 return;
889 case ISC_R_SUCCESS:
890 return;
891 default:
892 UNREACHABLE();
893 }
894 }
895
896 static void
wait_for_priority_queue(isc__networker_t * worker)897 wait_for_priority_queue(isc__networker_t *worker) {
898 isc_condition_t *cond = &worker->ievents[NETIEVENT_PRIORITY].cond;
899 isc_mutex_t *lock = &worker->ievents[NETIEVENT_PRIORITY].lock;
900 isc__netievent_list_t *list =
901 &(worker->ievents[NETIEVENT_PRIORITY].list);
902
903 LOCK(lock);
904 while (ISC_LIST_EMPTY(*list)) {
905 WAIT(cond, lock);
906 }
907 UNLOCK(lock);
908
909 drain_queue(worker, NETIEVENT_PRIORITY);
910 }
911
912 static void
drain_queue(isc__networker_t * worker,netievent_type_t type)913 drain_queue(isc__networker_t *worker, netievent_type_t type) {
914 bool empty = false;
915 while (!empty) {
916 if (process_queue(worker, type) == ISC_R_EMPTY) {
917 LOCK(&worker->ievents[type].lock);
918 empty = ISC_LIST_EMPTY(worker->ievents[type].list);
919 UNLOCK(&worker->ievents[type].lock);
920 }
921 }
922 }
923
924 /*
925 * The two macros here generate the individual cases for the process_netievent()
926 * function. The NETIEVENT_CASE(type) macro is the common case, and
927 * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the
928 * process_queue() to stop, e.g. it's only used for the netievent that
929 * stops/pauses processing the enqueued netievents.
930 */
931 #define NETIEVENT_CASE(type) \
932 case netievent_##type: { \
933 isc__nm_async_##type(worker, ievent); \
934 isc__nm_put_netievent_##type( \
935 worker->mgr, (isc__netievent_##type##_t *)ievent); \
936 return (true); \
937 }
938
939 #define NETIEVENT_CASE_NOMORE(type) \
940 case netievent_##type: { \
941 isc__nm_async_##type(worker, ievent); \
942 isc__nm_put_netievent_##type(worker->mgr, ievent); \
943 return (false); \
944 }
945
946 static bool
process_netievent(isc__networker_t * worker,isc__netievent_t * ievent)947 process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) {
948 REQUIRE(worker->id == isc_nm_tid());
949
950 switch (ievent->type) {
951 /* Don't process more ievents when we are stopping */
952 NETIEVENT_CASE_NOMORE(stop);
953
954 NETIEVENT_CASE(privilegedtask);
955 NETIEVENT_CASE(task);
956
957 NETIEVENT_CASE(udpconnect);
958 NETIEVENT_CASE(udplisten);
959 NETIEVENT_CASE(udpstop);
960 NETIEVENT_CASE(udpsend);
961 NETIEVENT_CASE(udpread);
962 NETIEVENT_CASE(udpcancel);
963 NETIEVENT_CASE(udpclose);
964
965 NETIEVENT_CASE(tcpaccept);
966 NETIEVENT_CASE(tcpconnect);
967 NETIEVENT_CASE(tcplisten);
968 NETIEVENT_CASE(tcpstartread);
969 NETIEVENT_CASE(tcppauseread);
970 NETIEVENT_CASE(tcpsend);
971 NETIEVENT_CASE(tcpstop);
972 NETIEVENT_CASE(tcpcancel);
973 NETIEVENT_CASE(tcpclose);
974
975 NETIEVENT_CASE(tcpdnsaccept);
976 NETIEVENT_CASE(tcpdnslisten);
977 NETIEVENT_CASE(tcpdnsconnect);
978 NETIEVENT_CASE(tcpdnssend);
979 NETIEVENT_CASE(tcpdnscancel);
980 NETIEVENT_CASE(tcpdnsclose);
981 NETIEVENT_CASE(tcpdnsread);
982 NETIEVENT_CASE(tcpdnsstop);
983
984 NETIEVENT_CASE(connectcb);
985 NETIEVENT_CASE(readcb);
986 NETIEVENT_CASE(sendcb);
987
988 NETIEVENT_CASE(close);
989 NETIEVENT_CASE(detach);
990
991 NETIEVENT_CASE(shutdown);
992 NETIEVENT_CASE(resume);
993 NETIEVENT_CASE_NOMORE(pause);
994 default:
995 UNREACHABLE();
996 }
997 return (true);
998 }
999
1000 static isc_result_t
process_queue(isc__networker_t * worker,netievent_type_t type)1001 process_queue(isc__networker_t *worker, netievent_type_t type) {
1002 isc__netievent_t *ievent = NULL;
1003 isc__netievent_list_t list;
1004
1005 ISC_LIST_INIT(list);
1006
1007 LOCK(&worker->ievents[type].lock);
1008 ISC_LIST_MOVE(list, worker->ievents[type].list);
1009 UNLOCK(&worker->ievents[type].lock);
1010
1011 ievent = ISC_LIST_HEAD(list);
1012 if (ievent == NULL) {
1013 /* There's nothing scheduled */
1014 return (ISC_R_EMPTY);
1015 }
1016
1017 while (ievent != NULL) {
1018 isc__netievent_t *next = ISC_LIST_NEXT(ievent, link);
1019 ISC_LIST_DEQUEUE(list, ievent, link);
1020
1021 if (!process_netievent(worker, ievent)) {
1022 /* The netievent told us to stop */
1023 if (!ISC_LIST_EMPTY(list)) {
1024 /*
1025 * Reschedule the rest of the unprocessed
1026 * events.
1027 */
1028 LOCK(&worker->ievents[type].lock);
1029 ISC_LIST_PREPENDLIST(worker->ievents[type].list,
1030 list, link);
1031 UNLOCK(&worker->ievents[type].lock);
1032 }
1033 return (ISC_R_SUSPEND);
1034 }
1035
1036 ievent = next;
1037 }
1038
1039 /* We processed at least one */
1040 return (ISC_R_SUCCESS);
1041 }
1042
1043 void *
isc__nm_get_netievent(isc_nm_t * mgr,isc__netievent_type type)1044 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) {
1045 isc__netievent_storage_t *event = isc_mem_get(mgr->mctx,
1046 sizeof(*event));
1047
1048 *event = (isc__netievent_storage_t){ .ni.type = type };
1049 ISC_LINK_INIT(&(event->ni), link);
1050 return (event);
1051 }
1052
1053 void
isc__nm_put_netievent(isc_nm_t * mgr,void * ievent)1054 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) {
1055 isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t));
1056 }
1057
1058 NETIEVENT_SOCKET_DEF(tcpclose);
1059 NETIEVENT_SOCKET_DEF(tcplisten);
1060 NETIEVENT_SOCKET_DEF(tcppauseread);
1061 NETIEVENT_SOCKET_DEF(tcpstartread);
1062 NETIEVENT_SOCKET_DEF(tcpstop);
1063 NETIEVENT_SOCKET_DEF(udpclose);
1064 NETIEVENT_SOCKET_DEF(udplisten);
1065 NETIEVENT_SOCKET_DEF(udpread);
1066 NETIEVENT_SOCKET_DEF(udpsend);
1067 NETIEVENT_SOCKET_DEF(udpstop);
1068
1069 NETIEVENT_SOCKET_DEF(tcpdnsclose);
1070 NETIEVENT_SOCKET_DEF(tcpdnsread);
1071 NETIEVENT_SOCKET_DEF(tcpdnsstop);
1072 NETIEVENT_SOCKET_DEF(tcpdnslisten);
1073 NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect);
1074 NETIEVENT_SOCKET_REQ_DEF(tcpdnssend);
1075 NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel);
1076 NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept);
1077
1078 NETIEVENT_SOCKET_REQ_DEF(tcpconnect);
1079 NETIEVENT_SOCKET_REQ_DEF(tcpsend);
1080 NETIEVENT_SOCKET_REQ_DEF(udpconnect);
1081 NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb);
1082 NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb);
1083 NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb);
1084
1085 NETIEVENT_SOCKET_DEF(detach);
1086 NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel);
1087 NETIEVENT_SOCKET_HANDLE_DEF(udpcancel);
1088
1089 NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept);
1090
1091 NETIEVENT_SOCKET_DEF(close);
1092 NETIEVENT_DEF(pause);
1093 NETIEVENT_DEF(resume);
1094 NETIEVENT_DEF(shutdown);
1095 NETIEVENT_DEF(stop);
1096
1097 NETIEVENT_TASK_DEF(task);
1098 NETIEVENT_TASK_DEF(privilegedtask);
1099
1100 void
isc__nm_maybe_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1101 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker,
1102 isc__netievent_t *event) {
1103 /*
1104 * If we are already in the matching nmthread, process the ievent
1105 * directly.
1106 */
1107 if (worker->id == isc_nm_tid()) {
1108 process_netievent(worker, event);
1109 return;
1110 }
1111
1112 isc__nm_enqueue_ievent(worker, event);
1113 }
1114
1115 void
isc__nm_enqueue_ievent(isc__networker_t * worker,isc__netievent_t * event)1116 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
1117 netievent_type_t type;
1118
1119 if (event->type > netievent_prio) {
1120 type = NETIEVENT_PRIORITY;
1121 } else {
1122 switch (event->type) {
1123 case netievent_prio:
1124 UNREACHABLE();
1125 break;
1126 case netievent_privilegedtask:
1127 type = NETIEVENT_PRIVILEGED;
1128 break;
1129 case netievent_task:
1130 type = NETIEVENT_TASK;
1131 break;
1132 default:
1133 type = NETIEVENT_NORMAL;
1134 break;
1135 }
1136 }
1137
1138 /*
1139 * We need to make sure this signal will be delivered and
1140 * the queue will be processed.
1141 */
1142 LOCK(&worker->ievents[type].lock);
1143 ISC_LIST_ENQUEUE(worker->ievents[type].list, event, link);
1144 if (type == NETIEVENT_PRIORITY) {
1145 SIGNAL(&worker->ievents[type].cond);
1146 }
1147 UNLOCK(&worker->ievents[type].lock);
1148
1149 uv_async_send(&worker->async);
1150 }
1151
1152 bool
isc__nmsocket_active(isc_nmsocket_t * sock)1153 isc__nmsocket_active(isc_nmsocket_t *sock) {
1154 REQUIRE(VALID_NMSOCK(sock));
1155 if (sock->parent != NULL) {
1156 return (atomic_load(&sock->parent->active));
1157 }
1158
1159 return (atomic_load(&sock->active));
1160 }
1161
1162 bool
isc__nmsocket_deactivate(isc_nmsocket_t * sock)1163 isc__nmsocket_deactivate(isc_nmsocket_t *sock) {
1164 REQUIRE(VALID_NMSOCK(sock));
1165
1166 if (sock->parent != NULL) {
1167 return (atomic_compare_exchange_strong(&sock->parent->active,
1168 &(bool){ true }, false));
1169 }
1170
1171 return (atomic_compare_exchange_strong(&sock->active, &(bool){ true },
1172 false));
1173 }
1174
1175 void
isc___nmsocket_attach(isc_nmsocket_t * sock,isc_nmsocket_t ** target FLARG)1176 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) {
1177 REQUIRE(VALID_NMSOCK(sock));
1178 REQUIRE(target != NULL && *target == NULL);
1179
1180 isc_nmsocket_t *rsock = NULL;
1181
1182 if (sock->parent != NULL) {
1183 rsock = sock->parent;
1184 INSIST(rsock->parent == NULL); /* sanity check */
1185 } else {
1186 rsock = sock;
1187 }
1188
1189 NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %" PRIuFAST32
1190 "\n",
1191 rsock, isc_refcount_current(&rsock->references) + 1);
1192
1193 isc_refcount_increment0(&rsock->references);
1194
1195 *target = sock;
1196 }
1197
1198 /*
1199 * Free all resources inside a socket (including its children if any).
1200 */
1201 static void
nmsocket_cleanup(isc_nmsocket_t * sock,bool dofree FLARG)1202 nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) {
1203 isc_nmhandle_t *handle = NULL;
1204 isc__nm_uvreq_t *uvreq = NULL;
1205
1206 REQUIRE(VALID_NMSOCK(sock));
1207 REQUIRE(!isc__nmsocket_active(sock));
1208
1209 NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %" PRIuFAST32
1210 "\n",
1211 sock, isc_refcount_current(&sock->references));
1212
1213 atomic_store(&sock->destroying, true);
1214
1215 if (sock->parent == NULL && sock->children != NULL) {
1216 /*
1217 * We shouldn't be here unless there are no active handles,
1218 * so we can clean up and free the children.
1219 */
1220 for (size_t i = 0; i < sock->nchildren; i++) {
1221 if (!atomic_load(&sock->children[i].destroying)) {
1222 nmsocket_cleanup(&sock->children[i],
1223 false FLARG_PASS);
1224 }
1225 }
1226
1227 /*
1228 * This was a parent socket: destroy the listening
1229 * barriers that synchronized the children.
1230 */
1231 isc_barrier_destroy(&sock->startlistening);
1232 isc_barrier_destroy(&sock->stoplistening);
1233
1234 /*
1235 * Now free them.
1236 */
1237 isc_mem_put(sock->mgr->mctx, sock->children,
1238 sock->nchildren * sizeof(*sock));
1239 sock->children = NULL;
1240 sock->nchildren = 0;
1241 }
1242 if (sock->statsindex != NULL) {
1243 isc__nm_decstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1244 }
1245
1246 sock->statichandle = NULL;
1247
1248 if (sock->outerhandle != NULL) {
1249 isc__nmhandle_detach(&sock->outerhandle FLARG_PASS);
1250 }
1251
1252 if (sock->outer != NULL) {
1253 isc___nmsocket_detach(&sock->outer FLARG_PASS);
1254 }
1255
1256 while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) {
1257 nmhandle_free(sock, handle);
1258 }
1259
1260 if (sock->buf != NULL) {
1261 isc_mem_free(sock->mgr->mctx, sock->buf);
1262 }
1263
1264 if (sock->quota != NULL) {
1265 isc_quota_detach(&sock->quota);
1266 }
1267
1268 sock->pquota = NULL;
1269
1270 isc_astack_destroy(sock->inactivehandles);
1271
1272 while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
1273 isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq));
1274 }
1275
1276 isc_astack_destroy(sock->inactivereqs);
1277 sock->magic = 0;
1278
1279 isc_condition_destroy(&sock->scond);
1280 isc_condition_destroy(&sock->cond);
1281 isc_mutex_destroy(&sock->lock);
1282 #ifdef NETMGR_TRACE
1283 LOCK(&sock->mgr->lock);
1284 ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link);
1285 UNLOCK(&sock->mgr->lock);
1286 #endif
1287 if (dofree) {
1288 isc_nm_t *mgr = sock->mgr;
1289 isc_mem_put(mgr->mctx, sock, sizeof(*sock));
1290 isc_nm_detach(&mgr);
1291 } else {
1292 isc_nm_detach(&sock->mgr);
1293 }
1294 }
1295
1296 static void
nmsocket_maybe_destroy(isc_nmsocket_t * sock FLARG)1297 nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) {
1298 int active_handles;
1299 bool destroy = false;
1300
1301 NETMGR_TRACE_LOG("%s():%p->references = %" PRIuFAST32 "\n", __func__,
1302 sock, isc_refcount_current(&sock->references));
1303
1304 if (sock->parent != NULL) {
1305 /*
1306 * This is a child socket and cannot be destroyed except
1307 * as a side effect of destroying the parent, so let's go
1308 * see if the parent is ready to be destroyed.
1309 */
1310 nmsocket_maybe_destroy(sock->parent FLARG_PASS);
1311 return;
1312 }
1313
1314 /*
1315 * This is a parent socket (or a standalone). See whether the
1316 * children have active handles before deciding whether to
1317 * accept destruction.
1318 */
1319 LOCK(&sock->lock);
1320 if (atomic_load(&sock->active) || atomic_load(&sock->destroying) ||
1321 !atomic_load(&sock->closed) || atomic_load(&sock->references) != 0)
1322 {
1323 UNLOCK(&sock->lock);
1324 return;
1325 }
1326
1327 active_handles = atomic_load(&sock->ah);
1328 if (sock->children != NULL) {
1329 for (size_t i = 0; i < sock->nchildren; i++) {
1330 LOCK(&sock->children[i].lock);
1331 active_handles += atomic_load(&sock->children[i].ah);
1332 UNLOCK(&sock->children[i].lock);
1333 }
1334 }
1335
1336 if (active_handles == 0 || sock->statichandle != NULL) {
1337 destroy = true;
1338 }
1339
1340 NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n",
1341 __func__, sock, active_handles, sock->statichandle);
1342
1343 if (destroy) {
1344 atomic_store(&sock->destroying, true);
1345 UNLOCK(&sock->lock);
1346 nmsocket_cleanup(sock, true FLARG_PASS);
1347 } else {
1348 UNLOCK(&sock->lock);
1349 }
1350 }
1351
1352 void
isc___nmsocket_prep_destroy(isc_nmsocket_t * sock FLARG)1353 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) {
1354 REQUIRE(sock->parent == NULL);
1355
1356 NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = "
1357 "%" PRIuFAST32 "\n",
1358 sock, isc_refcount_current(&sock->references));
1359
1360 /*
1361 * The final external reference to the socket is gone. We can try
1362 * destroying the socket, but we have to wait for all the inflight
1363 * handles to finish first.
1364 */
1365 atomic_store(&sock->active, false);
1366
1367 /*
1368 * If the socket has children, they'll need to be marked inactive
1369 * so they can be cleaned up too.
1370 */
1371 if (sock->children != NULL) {
1372 for (size_t i = 0; i < sock->nchildren; i++) {
1373 atomic_store(&sock->children[i].active, false);
1374 }
1375 }
1376
1377 /*
1378 * If we're here then we already stopped listening; otherwise
1379 * we'd have a hanging reference from the listening process.
1380 *
1381 * If it's a regular socket we may need to close it.
1382 */
1383 if (!atomic_load(&sock->closed)) {
1384 switch (sock->type) {
1385 case isc_nm_udpsocket:
1386 isc__nm_udp_close(sock);
1387 return;
1388 case isc_nm_tcpsocket:
1389 isc__nm_tcp_close(sock);
1390 return;
1391 case isc_nm_tcpdnssocket:
1392 isc__nm_tcpdns_close(sock);
1393 return;
1394 default:
1395 break;
1396 }
1397 }
1398
1399 nmsocket_maybe_destroy(sock FLARG_PASS);
1400 }
1401
1402 void
isc___nmsocket_detach(isc_nmsocket_t ** sockp FLARG)1403 isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) {
1404 REQUIRE(sockp != NULL && *sockp != NULL);
1405 REQUIRE(VALID_NMSOCK(*sockp));
1406
1407 isc_nmsocket_t *sock = *sockp, *rsock = NULL;
1408 *sockp = NULL;
1409
1410 /*
1411 * If the socket is a part of a set (a child socket) we are
1412 * counting references for the whole set at the parent.
1413 */
1414 if (sock->parent != NULL) {
1415 rsock = sock->parent;
1416 INSIST(rsock->parent == NULL); /* Sanity check */
1417 } else {
1418 rsock = sock;
1419 }
1420
1421 NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %" PRIuFAST32
1422 "\n",
1423 rsock, isc_refcount_current(&rsock->references) - 1);
1424
1425 if (isc_refcount_decrement(&rsock->references) == 1) {
1426 isc___nmsocket_prep_destroy(rsock FLARG_PASS);
1427 }
1428 }
1429
1430 void
isc_nmsocket_close(isc_nmsocket_t ** sockp)1431 isc_nmsocket_close(isc_nmsocket_t **sockp) {
1432 REQUIRE(sockp != NULL);
1433 REQUIRE(VALID_NMSOCK(*sockp));
1434 REQUIRE((*sockp)->type == isc_nm_udplistener ||
1435 (*sockp)->type == isc_nm_tcplistener ||
1436 (*sockp)->type == isc_nm_tcpdnslistener);
1437
1438 isc__nmsocket_detach(sockp);
1439 }
1440
1441 void
isc___nmsocket_init(isc_nmsocket_t * sock,isc_nm_t * mgr,isc_nmsocket_type type,isc_sockaddr_t * iface FLARG)1442 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1443 isc_sockaddr_t *iface FLARG) {
1444 uint16_t family;
1445
1446 REQUIRE(sock != NULL);
1447 REQUIRE(mgr != NULL);
1448 REQUIRE(iface != NULL);
1449
1450 family = iface->type.sa.sa_family;
1451
1452 *sock = (isc_nmsocket_t){ .type = type,
1453 .iface = *iface,
1454 .fd = -1,
1455 .inactivehandles = isc_astack_new(
1456 mgr->mctx, ISC_NM_HANDLES_STACK_SIZE),
1457 .inactivereqs = isc_astack_new(
1458 mgr->mctx, ISC_NM_REQS_STACK_SIZE) };
1459
1460 #if NETMGR_TRACE
1461 sock->backtrace_size = backtrace(sock->backtrace, TRACE_SIZE);
1462 ISC_LINK_INIT(sock, active_link);
1463 ISC_LIST_INIT(sock->active_handles);
1464 LOCK(&mgr->lock);
1465 ISC_LIST_APPEND(mgr->active_sockets, sock, active_link);
1466 UNLOCK(&mgr->lock);
1467 #endif
1468
1469 isc_nm_attach(mgr, &sock->mgr);
1470 sock->uv_handle.handle.data = sock;
1471
1472 ISC_LINK_INIT(&sock->quotacb, link);
1473
1474 switch (type) {
1475 case isc_nm_udpsocket:
1476 case isc_nm_udplistener:
1477 if (family == AF_INET) {
1478 sock->statsindex = udp4statsindex;
1479 } else {
1480 sock->statsindex = udp6statsindex;
1481 }
1482 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1483 break;
1484 case isc_nm_tcpsocket:
1485 case isc_nm_tcplistener:
1486 case isc_nm_tcpdnssocket:
1487 case isc_nm_tcpdnslistener:
1488 if (family == AF_INET) {
1489 sock->statsindex = tcp4statsindex;
1490 } else {
1491 sock->statsindex = tcp6statsindex;
1492 }
1493 isc__nm_incstats(sock->mgr, sock->statsindex[STATID_ACTIVE]);
1494 break;
1495 default:
1496 break;
1497 }
1498
1499 isc_mutex_init(&sock->lock);
1500 isc_condition_init(&sock->cond);
1501 isc_condition_init(&sock->scond);
1502 isc_refcount_init(&sock->references, 1);
1503
1504 NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %" PRIuFAST32
1505 "\n",
1506 sock, isc_refcount_current(&sock->references));
1507
1508 atomic_init(&sock->active, true);
1509 atomic_init(&sock->sequential, false);
1510 atomic_init(&sock->readpaused, false);
1511 atomic_init(&sock->closing, false);
1512 atomic_init(&sock->listening, 0);
1513 atomic_init(&sock->closed, 0);
1514 atomic_init(&sock->destroying, 0);
1515 atomic_init(&sock->ah, 0);
1516 atomic_init(&sock->client, 0);
1517 atomic_init(&sock->connecting, false);
1518 atomic_init(&sock->keepalive, false);
1519 atomic_init(&sock->connected, false);
1520 atomic_init(&sock->timedout, false);
1521
1522 atomic_init(&sock->active_child_connections, 0);
1523
1524 sock->magic = NMSOCK_MAGIC;
1525 }
1526
1527 void
isc__nmsocket_clearcb(isc_nmsocket_t * sock)1528 isc__nmsocket_clearcb(isc_nmsocket_t *sock) {
1529 REQUIRE(VALID_NMSOCK(sock));
1530 REQUIRE(!isc__nm_in_netthread() || sock->tid == isc_nm_tid());
1531
1532 sock->recv_cb = NULL;
1533 sock->recv_cbarg = NULL;
1534 sock->accept_cb = NULL;
1535 sock->accept_cbarg = NULL;
1536 sock->connect_cb = NULL;
1537 sock->connect_cbarg = NULL;
1538 }
1539
1540 void
isc__nm_free_uvbuf(isc_nmsocket_t * sock,const uv_buf_t * buf)1541 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
1542 isc__networker_t *worker = NULL;
1543
1544 REQUIRE(VALID_NMSOCK(sock));
1545
1546 worker = &sock->mgr->workers[sock->tid];
1547 REQUIRE(buf->base == worker->recvbuf);
1548
1549 worker->recvbuf_inuse = false;
1550 }
1551
1552 static isc_nmhandle_t *
alloc_handle(isc_nmsocket_t * sock)1553 alloc_handle(isc_nmsocket_t *sock) {
1554 isc_nmhandle_t *handle =
1555 isc_mem_get(sock->mgr->mctx,
1556 sizeof(isc_nmhandle_t) + sock->extrahandlesize);
1557
1558 *handle = (isc_nmhandle_t){ .magic = NMHANDLE_MAGIC };
1559 #ifdef NETMGR_TRACE
1560 ISC_LINK_INIT(handle, active_link);
1561 #endif
1562 isc_refcount_init(&handle->references, 1);
1563
1564 return (handle);
1565 }
1566
1567 isc_nmhandle_t *
isc___nmhandle_get(isc_nmsocket_t * sock,isc_sockaddr_t * peer,isc_sockaddr_t * local FLARG)1568 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
1569 isc_sockaddr_t *local FLARG) {
1570 isc_nmhandle_t *handle = NULL;
1571
1572 REQUIRE(VALID_NMSOCK(sock));
1573
1574 handle = isc_astack_pop(sock->inactivehandles);
1575
1576 if (handle == NULL) {
1577 handle = alloc_handle(sock);
1578 } else {
1579 isc_refcount_init(&handle->references, 1);
1580 INSIST(VALID_NMHANDLE(handle));
1581 }
1582
1583 NETMGR_TRACE_LOG(
1584 "isc__nmhandle_get():handle %p->references = %" PRIuFAST32 "\n",
1585 handle, isc_refcount_current(&handle->references));
1586
1587 isc___nmsocket_attach(sock, &handle->sock FLARG_PASS);
1588
1589 #if NETMGR_TRACE
1590 handle->backtrace_size = backtrace(handle->backtrace, TRACE_SIZE);
1591 #endif
1592
1593 if (peer != NULL) {
1594 handle->peer = *peer;
1595 } else {
1596 handle->peer = sock->peer;
1597 }
1598
1599 if (local != NULL) {
1600 handle->local = *local;
1601 } else {
1602 handle->local = sock->iface;
1603 }
1604
1605 (void)atomic_fetch_add(&sock->ah, 1);
1606
1607 #ifdef NETMGR_TRACE
1608 LOCK(&sock->lock);
1609 ISC_LIST_APPEND(sock->active_handles, handle, active_link);
1610 UNLOCK(&sock->lock);
1611 #endif
1612
1613 switch (sock->type) {
1614 case isc_nm_udpsocket:
1615 case isc_nm_tcpdnssocket:
1616 if (!atomic_load(&sock->client)) {
1617 break;
1618 }
1619 FALLTHROUGH;
1620 case isc_nm_tcpsocket:
1621 INSIST(sock->statichandle == NULL);
1622
1623 /*
1624 * statichandle must be assigned, not attached;
1625 * otherwise, if a handle was detached elsewhere
1626 * it could never reach 0 references, and the
1627 * handle and socket would never be freed.
1628 */
1629 sock->statichandle = handle;
1630 break;
1631 default:
1632 break;
1633 }
1634
1635 return (handle);
1636 }
1637
1638 void
isc__nmhandle_attach(isc_nmhandle_t * handle,isc_nmhandle_t ** handlep FLARG)1639 isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) {
1640 REQUIRE(VALID_NMHANDLE(handle));
1641 REQUIRE(handlep != NULL && *handlep == NULL);
1642
1643 NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = "
1644 "%" PRIuFAST32 "\n",
1645 handle, isc_refcount_current(&handle->references) + 1);
1646
1647 isc_refcount_increment(&handle->references);
1648 *handlep = handle;
1649 }
1650
1651 bool
isc_nmhandle_is_stream(isc_nmhandle_t * handle)1652 isc_nmhandle_is_stream(isc_nmhandle_t *handle) {
1653 REQUIRE(VALID_NMHANDLE(handle));
1654
1655 return (handle->sock->type == isc_nm_tcpsocket ||
1656 handle->sock->type == isc_nm_tcpdnssocket);
1657 }
1658
1659 static void
nmhandle_free(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1660 nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1661 size_t extra = sock->extrahandlesize;
1662
1663 isc_refcount_destroy(&handle->references);
1664
1665 if (handle->dofree != NULL) {
1666 handle->dofree(handle->opaque);
1667 }
1668
1669 *handle = (isc_nmhandle_t){ .magic = 0 };
1670
1671 isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
1672 }
1673
1674 static void
nmhandle_deactivate(isc_nmsocket_t * sock,isc_nmhandle_t * handle)1675 nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
1676 bool reuse = false;
1677
1678 /*
1679 * We do all of this under lock to avoid races with socket
1680 * destruction. We have to do this now, because at this point the
1681 * socket is either unused or still attached to event->sock.
1682 */
1683 LOCK(&sock->lock);
1684
1685 #ifdef NETMGR_TRACE
1686 ISC_LIST_UNLINK(sock->active_handles, handle, active_link);
1687 #endif
1688
1689 INSIST(atomic_fetch_sub(&sock->ah, 1) > 0);
1690
1691 #if !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__
1692 if (atomic_load(&sock->active)) {
1693 reuse = isc_astack_trypush(sock->inactivehandles, handle);
1694 }
1695 #endif /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
1696 if (!reuse) {
1697 nmhandle_free(sock, handle);
1698 }
1699 UNLOCK(&sock->lock);
1700 }
1701
1702 void
isc__nmhandle_detach(isc_nmhandle_t ** handlep FLARG)1703 isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) {
1704 isc_nmsocket_t *sock = NULL;
1705 isc_nmhandle_t *handle = NULL;
1706
1707 REQUIRE(handlep != NULL);
1708 REQUIRE(VALID_NMHANDLE(*handlep));
1709
1710 handle = *handlep;
1711 *handlep = NULL;
1712
1713 /*
1714 * If the closehandle_cb is set, it needs to run asynchronously to
1715 * ensure correct ordering of the isc__nm_process_sock_buffer().
1716 */
1717 sock = handle->sock;
1718 if (sock->tid == isc_nm_tid() && sock->closehandle_cb == NULL) {
1719 nmhandle_detach_cb(&handle FLARG_PASS);
1720 } else {
1721 isc__netievent_detach_t *event =
1722 isc__nm_get_netievent_detach(sock->mgr, sock);
1723 /*
1724 * we are using implicit "attach" as the last reference
1725 * need to be destroyed explicitly in the async callback
1726 */
1727 event->handle = handle;
1728 FLARG_IEVENT_PASS(event);
1729 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1730 (isc__netievent_t *)event);
1731 }
1732 }
1733
1734 void
1735 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1736
1737 static void
nmhandle_detach_cb(isc_nmhandle_t ** handlep FLARG)1738 nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) {
1739 isc_nmsocket_t *sock = NULL;
1740 isc_nmhandle_t *handle = NULL;
1741
1742 REQUIRE(handlep != NULL);
1743 REQUIRE(VALID_NMHANDLE(*handlep));
1744
1745 handle = *handlep;
1746 *handlep = NULL;
1747
1748 NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %" PRIuFAST32
1749 "\n",
1750 handle, isc_refcount_current(&handle->references) - 1);
1751
1752 if (isc_refcount_decrement(&handle->references) > 1) {
1753 return;
1754 }
1755
1756 /* We need an acquire memory barrier here */
1757 (void)isc_refcount_current(&handle->references);
1758
1759 sock = handle->sock;
1760 handle->sock = NULL;
1761
1762 if (handle->doreset != NULL) {
1763 handle->doreset(handle->opaque);
1764 }
1765
1766 nmhandle_deactivate(sock, handle);
1767
1768 /*
1769 * The handle is gone now. If the socket has a callback configured
1770 * for that (e.g., to perform cleanup after request processing),
1771 * call it now, or schedule it to run asynchronously.
1772 */
1773 if (sock->closehandle_cb != NULL) {
1774 if (sock->tid == isc_nm_tid()) {
1775 sock->closehandle_cb(sock);
1776 } else {
1777 isc__netievent_close_t *event =
1778 isc__nm_get_netievent_close(sock->mgr, sock);
1779 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1780 (isc__netievent_t *)event);
1781 }
1782 }
1783
1784 if (handle == sock->statichandle) {
1785 /* statichandle is assigned, not attached. */
1786 sock->statichandle = NULL;
1787 }
1788
1789 isc___nmsocket_detach(&sock FLARG_PASS);
1790 }
1791
1792 void *
isc_nmhandle_getdata(isc_nmhandle_t * handle)1793 isc_nmhandle_getdata(isc_nmhandle_t *handle) {
1794 REQUIRE(VALID_NMHANDLE(handle));
1795
1796 return (handle->opaque);
1797 }
1798
1799 int
isc_nmhandle_getfd(isc_nmhandle_t * handle)1800 isc_nmhandle_getfd(isc_nmhandle_t *handle) {
1801 REQUIRE(VALID_NMHANDLE(handle));
1802
1803 return (handle->sock->fd);
1804 }
1805
1806 void
isc_nmhandle_setdata(isc_nmhandle_t * handle,void * arg,isc_nm_opaquecb_t doreset,isc_nm_opaquecb_t dofree)1807 isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg,
1808 isc_nm_opaquecb_t doreset, isc_nm_opaquecb_t dofree) {
1809 REQUIRE(VALID_NMHANDLE(handle));
1810
1811 handle->opaque = arg;
1812 handle->doreset = doreset;
1813 handle->dofree = dofree;
1814 }
1815
1816 void
isc__nm_alloc_dnsbuf(isc_nmsocket_t * sock,size_t len)1817 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
1818 REQUIRE(len <= NM_BIG_BUF);
1819
1820 if (sock->buf == NULL) {
1821 /* We don't have the buffer at all */
1822 size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF;
1823 sock->buf = isc_mem_allocate(sock->mgr->mctx, alloc_len);
1824 sock->buf_size = alloc_len;
1825 } else {
1826 /* We have the buffer but it's too small */
1827 sock->buf = isc_mem_reallocate(sock->mgr->mctx, sock->buf,
1828 NM_BIG_BUF);
1829 sock->buf_size = NM_BIG_BUF;
1830 }
1831 }
1832
1833 void
isc__nm_failed_send_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult)1834 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1835 isc_result_t eresult) {
1836 REQUIRE(VALID_NMSOCK(sock));
1837 REQUIRE(VALID_UVREQ(req));
1838
1839 if (req->cb.send != NULL) {
1840 isc__nm_sendcb(sock, req, eresult, true);
1841 } else {
1842 isc__nm_uvreq_put(&req, sock);
1843 }
1844 }
1845
1846 void
isc__nm_failed_accept_cb(isc_nmsocket_t * sock,isc_result_t eresult)1847 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
1848 REQUIRE(sock->accepting);
1849 REQUIRE(sock->server);
1850
1851 /*
1852 * Detach the quota early to make room for other connections;
1853 * otherwise it'd be detached later asynchronously, and clog
1854 * the quota unnecessarily.
1855 */
1856 if (sock->quota != NULL) {
1857 isc_quota_detach(&sock->quota);
1858 }
1859
1860 isc__nmsocket_detach(&sock->server);
1861
1862 sock->accepting = false;
1863
1864 switch (eresult) {
1865 case ISC_R_NOTCONNECTED:
1866 /* IGNORE: The client disconnected before we could accept */
1867 break;
1868 default:
1869 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1870 ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
1871 "Accepting TCP connection failed: %s",
1872 isc_result_totext(eresult));
1873 }
1874 }
1875
1876 void
isc__nm_failed_connect_cb(isc_nmsocket_t * sock,isc__nm_uvreq_t * req,isc_result_t eresult,bool async)1877 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1878 isc_result_t eresult, bool async) {
1879 REQUIRE(VALID_NMSOCK(sock));
1880 REQUIRE(VALID_UVREQ(req));
1881 REQUIRE(sock->tid == isc_nm_tid());
1882 REQUIRE(req->cb.connect != NULL);
1883
1884 isc__nmsocket_timer_stop(sock);
1885 uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
1886
1887 INSIST(atomic_compare_exchange_strong(&sock->connecting,
1888 &(bool){ true }, false));
1889
1890 isc__nmsocket_clearcb(sock);
1891 isc__nm_connectcb(sock, req, eresult, async);
1892
1893 isc__nmsocket_prep_destroy(sock);
1894 }
1895
1896 void
isc__nm_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result,bool async)1897 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async) {
1898 REQUIRE(VALID_NMSOCK(sock));
1899 UNUSED(async);
1900
1901 switch (sock->type) {
1902 case isc_nm_udpsocket:
1903 isc__nm_udp_failed_read_cb(sock, result);
1904 return;
1905 case isc_nm_tcpsocket:
1906 isc__nm_tcp_failed_read_cb(sock, result);
1907 return;
1908 case isc_nm_tcpdnssocket:
1909 isc__nm_tcpdns_failed_read_cb(sock, result);
1910 return;
1911 default:
1912 UNREACHABLE();
1913 }
1914 }
1915
1916 void
isc__nmsocket_connecttimeout_cb(uv_timer_t * timer)1917 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer) {
1918 uv_connect_t *uvreq = uv_handle_get_data((uv_handle_t *)timer);
1919 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
1920 isc__nm_uvreq_t *req = uv_handle_get_data((uv_handle_t *)uvreq);
1921
1922 REQUIRE(VALID_NMSOCK(sock));
1923 REQUIRE(sock->tid == isc_nm_tid());
1924 REQUIRE(atomic_load(&sock->connecting));
1925 REQUIRE(VALID_UVREQ(req));
1926 REQUIRE(VALID_NMHANDLE(req->handle));
1927
1928 isc__nmsocket_timer_stop(sock);
1929
1930 /*
1931 * Mark the connection as timed out and shutdown the socket.
1932 */
1933
1934 INSIST(atomic_compare_exchange_strong(&sock->timedout, &(bool){ false },
1935 true));
1936 isc__nmsocket_clearcb(sock);
1937 isc__nmsocket_shutdown(sock);
1938 }
1939
1940 void
isc__nm_accept_connection_log(isc_result_t result,bool can_log_quota)1941 isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota) {
1942 int level;
1943
1944 switch (result) {
1945 case ISC_R_SUCCESS:
1946 case ISC_R_NOCONN:
1947 return;
1948 case ISC_R_QUOTA:
1949 case ISC_R_SOFTQUOTA:
1950 if (!can_log_quota) {
1951 return;
1952 }
1953 level = ISC_LOG_INFO;
1954 break;
1955 case ISC_R_NOTCONNECTED:
1956 level = ISC_LOG_INFO;
1957 break;
1958 default:
1959 level = ISC_LOG_ERROR;
1960 }
1961
1962 isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR,
1963 level, "Accepting TCP connection failed: %s",
1964 isc_result_totext(result));
1965 }
1966
1967 void
isc__nmsocket_writetimeout_cb(void * data,isc_result_t eresult)1968 isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) {
1969 isc__nm_uvreq_t *req = data;
1970 isc_nmsocket_t *sock = NULL;
1971
1972 REQUIRE(eresult == ISC_R_TIMEDOUT);
1973 REQUIRE(VALID_UVREQ(req));
1974 REQUIRE(VALID_NMSOCK(req->sock));
1975
1976 sock = req->sock;
1977
1978 isc__nmsocket_reset(sock);
1979 }
1980
1981 void
isc__nmsocket_readtimeout_cb(uv_timer_t * timer)1982 isc__nmsocket_readtimeout_cb(uv_timer_t *timer) {
1983 isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer);
1984
1985 REQUIRE(VALID_NMSOCK(sock));
1986 REQUIRE(sock->tid == isc_nm_tid());
1987 REQUIRE(sock->reading);
1988
1989 if (atomic_load(&sock->client)) {
1990 uv_timer_stop(timer);
1991
1992 if (sock->recv_cb != NULL) {
1993 isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
1994 isc__nm_readcb(sock, req, ISC_R_TIMEDOUT);
1995 }
1996
1997 if (!isc__nmsocket_timer_running(sock)) {
1998 isc__nmsocket_clearcb(sock);
1999 isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
2000 }
2001 } else {
2002 isc__nm_failed_read_cb(sock, ISC_R_TIMEDOUT, false);
2003 }
2004 }
2005
2006 void
isc__nmsocket_timer_restart(isc_nmsocket_t * sock)2007 isc__nmsocket_timer_restart(isc_nmsocket_t *sock) {
2008 REQUIRE(VALID_NMSOCK(sock));
2009
2010 if (atomic_load(&sock->connecting)) {
2011 int r;
2012
2013 if (sock->connect_timeout == 0) {
2014 return;
2015 }
2016
2017 r = uv_timer_start(&sock->read_timer,
2018 isc__nmsocket_connecttimeout_cb,
2019 sock->connect_timeout + 10, 0);
2020 UV_RUNTIME_CHECK(uv_timer_start, r);
2021
2022 } else {
2023 int r;
2024
2025 if (sock->read_timeout == 0) {
2026 return;
2027 }
2028
2029 r = uv_timer_start(&sock->read_timer,
2030 isc__nmsocket_readtimeout_cb,
2031 sock->read_timeout, 0);
2032 UV_RUNTIME_CHECK(uv_timer_start, r);
2033 }
2034 }
2035
2036 bool
isc__nmsocket_timer_running(isc_nmsocket_t * sock)2037 isc__nmsocket_timer_running(isc_nmsocket_t *sock) {
2038 REQUIRE(VALID_NMSOCK(sock));
2039
2040 return (uv_is_active((uv_handle_t *)&sock->read_timer));
2041 }
2042
2043 void
isc__nmsocket_timer_start(isc_nmsocket_t * sock)2044 isc__nmsocket_timer_start(isc_nmsocket_t *sock) {
2045 REQUIRE(VALID_NMSOCK(sock));
2046
2047 if (isc__nmsocket_timer_running(sock)) {
2048 return;
2049 }
2050
2051 isc__nmsocket_timer_restart(sock);
2052 }
2053
2054 void
isc__nmsocket_timer_stop(isc_nmsocket_t * sock)2055 isc__nmsocket_timer_stop(isc_nmsocket_t *sock) {
2056 int r;
2057
2058 REQUIRE(VALID_NMSOCK(sock));
2059
2060 /* uv_timer_stop() is idempotent, no need to check if running */
2061
2062 r = uv_timer_stop(&sock->read_timer);
2063 UV_RUNTIME_CHECK(uv_timer_stop, r);
2064 }
2065
2066 isc__nm_uvreq_t *
isc__nm_get_read_req(isc_nmsocket_t * sock,isc_sockaddr_t * sockaddr)2067 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
2068 isc__nm_uvreq_t *req = NULL;
2069
2070 req = isc__nm_uvreq_get(sock->mgr, sock);
2071 req->cb.recv = sock->recv_cb;
2072 req->cbarg = sock->recv_cbarg;
2073
2074 switch (sock->type) {
2075 case isc_nm_tcpsocket:
2076 isc_nmhandle_attach(sock->statichandle, &req->handle);
2077 break;
2078 default:
2079 if (atomic_load(&sock->client)) {
2080 isc_nmhandle_attach(sock->statichandle, &req->handle);
2081 } else {
2082 req->handle = isc__nmhandle_get(sock, sockaddr, NULL);
2083 }
2084 break;
2085 }
2086
2087 return (req);
2088 }
2089
2090 /*%<
2091 * Allocator callback for read operations.
2092 *
2093 * Note this doesn't actually allocate anything, it just assigns the
2094 * worker's receive buffer to a socket, and marks it as "in use".
2095 */
2096 void
isc__nm_alloc_cb(uv_handle_t * handle,size_t size,uv_buf_t * buf)2097 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
2098 isc_nmsocket_t *sock = uv_handle_get_data(handle);
2099 isc__networker_t *worker = NULL;
2100
2101 REQUIRE(VALID_NMSOCK(sock));
2102 REQUIRE(isc__nm_in_netthread());
2103 /*
2104 * The size provided by libuv is only suggested size, and it always
2105 * defaults to 64 * 1024 in the current versions of libuv (see
2106 * src/unix/udp.c and src/unix/stream.c).
2107 */
2108 UNUSED(size);
2109
2110 worker = &sock->mgr->workers[sock->tid];
2111 INSIST(!worker->recvbuf_inuse);
2112 INSIST(worker->recvbuf != NULL);
2113
2114 switch (sock->type) {
2115 case isc_nm_udpsocket:
2116 buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
2117 break;
2118 case isc_nm_tcpsocket:
2119 case isc_nm_tcpdnssocket:
2120 buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
2121 break;
2122 default:
2123 UNREACHABLE();
2124 }
2125
2126 REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
2127 buf->base = worker->recvbuf;
2128
2129 worker->recvbuf_inuse = true;
2130 }
2131
2132 isc_result_t
isc__nm_start_reading(isc_nmsocket_t * sock)2133 isc__nm_start_reading(isc_nmsocket_t *sock) {
2134 isc_result_t result = ISC_R_SUCCESS;
2135 int r;
2136
2137 if (sock->reading) {
2138 return (ISC_R_SUCCESS);
2139 }
2140
2141 switch (sock->type) {
2142 case isc_nm_udpsocket:
2143 r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
2144 isc__nm_udp_read_cb);
2145 break;
2146 case isc_nm_tcpsocket:
2147 r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2148 isc__nm_tcp_read_cb);
2149 break;
2150 case isc_nm_tcpdnssocket:
2151 r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
2152 isc__nm_tcpdns_read_cb);
2153 break;
2154 default:
2155 UNREACHABLE();
2156 }
2157
2158 if (r != 0) {
2159 result = isc__nm_uverr2result(r);
2160 } else {
2161 sock->reading = true;
2162 }
2163
2164 return (result);
2165 }
2166
2167 void
isc__nm_stop_reading(isc_nmsocket_t * sock)2168 isc__nm_stop_reading(isc_nmsocket_t *sock) {
2169 int r;
2170
2171 if (!sock->reading) {
2172 return;
2173 }
2174
2175 switch (sock->type) {
2176 case isc_nm_udpsocket:
2177 r = uv_udp_recv_stop(&sock->uv_handle.udp);
2178 UV_RUNTIME_CHECK(uv_udp_recv_stop, r);
2179 break;
2180 case isc_nm_tcpsocket:
2181 case isc_nm_tcpdnssocket:
2182 r = uv_read_stop(&sock->uv_handle.stream);
2183 UV_RUNTIME_CHECK(uv_read_stop, r);
2184 break;
2185 default:
2186 UNREACHABLE();
2187 }
2188 sock->reading = false;
2189 }
2190
2191 bool
isc__nm_closing(isc_nmsocket_t * sock)2192 isc__nm_closing(isc_nmsocket_t *sock) {
2193 return (atomic_load(&sock->mgr->closing));
2194 }
2195
2196 bool
isc__nmsocket_closing(isc_nmsocket_t * sock)2197 isc__nmsocket_closing(isc_nmsocket_t *sock) {
2198 return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
2199 atomic_load(&sock->mgr->closing) ||
2200 (sock->server != NULL && !isc__nmsocket_active(sock->server)));
2201 }
2202
2203 static isc_result_t
processbuffer(isc_nmsocket_t * sock)2204 processbuffer(isc_nmsocket_t *sock) {
2205 switch (sock->type) {
2206 case isc_nm_tcpdnssocket:
2207 return (isc__nm_tcpdns_processbuffer(sock));
2208 default:
2209 UNREACHABLE();
2210 }
2211 }
2212
2213 /*
2214 * Process a DNS message.
2215 *
2216 * If we only have an incomplete DNS message, we don't touch any
2217 * timers. If we do have a full message, reset the timer.
2218 *
2219 * Stop reading if this is a client socket, or if the server socket
2220 * has been set to sequential mode, or the number of queries we are
2221 * processing simultaneously has reached the clients-per-connection
2222 * limit. In this case we'll be called again by resume_processing()
2223 * later.
2224 */
2225 isc_result_t
isc__nm_process_sock_buffer(isc_nmsocket_t * sock)2226 isc__nm_process_sock_buffer(isc_nmsocket_t *sock) {
2227 for (;;) {
2228 int_fast32_t ah = atomic_load(&sock->ah);
2229 isc_result_t result = processbuffer(sock);
2230 switch (result) {
2231 case ISC_R_NOMORE:
2232 /*
2233 * Don't reset the timer until we have a
2234 * full DNS message.
2235 */
2236 result = isc__nm_start_reading(sock);
2237 if (result != ISC_R_SUCCESS) {
2238 return (result);
2239 }
2240 /*
2241 * Start the timer only if there are no externally used
2242 * active handles, there's always one active handle
2243 * attached internally to sock->recv_handle in
2244 * accept_connection()
2245 */
2246 if (ah == 1) {
2247 isc__nmsocket_timer_start(sock);
2248 }
2249 goto done;
2250 case ISC_R_CANCELED:
2251 isc__nmsocket_timer_stop(sock);
2252 isc__nm_stop_reading(sock);
2253 goto done;
2254 case ISC_R_SUCCESS:
2255 /*
2256 * Stop the timer on the successful message read, this
2257 * also allows to restart the timer when we have no more
2258 * data.
2259 */
2260 isc__nmsocket_timer_stop(sock);
2261
2262 if (atomic_load(&sock->client) ||
2263 atomic_load(&sock->sequential) ||
2264 ah >= STREAM_CLIENTS_PER_CONN)
2265 {
2266 isc__nm_stop_reading(sock);
2267 goto done;
2268 }
2269 break;
2270 default:
2271 UNREACHABLE();
2272 }
2273 }
2274 done:
2275 return (ISC_R_SUCCESS);
2276 }
2277
2278 void
isc__nm_resume_processing(void * arg)2279 isc__nm_resume_processing(void *arg) {
2280 isc_nmsocket_t *sock = (isc_nmsocket_t *)arg;
2281
2282 REQUIRE(VALID_NMSOCK(sock));
2283 REQUIRE(sock->tid == isc_nm_tid());
2284 REQUIRE(!atomic_load(&sock->client));
2285
2286 if (isc__nmsocket_closing(sock)) {
2287 return;
2288 }
2289
2290 isc__nm_process_sock_buffer(sock);
2291 }
2292
2293 void
isc_nmhandle_cleartimeout(isc_nmhandle_t * handle)2294 isc_nmhandle_cleartimeout(isc_nmhandle_t *handle) {
2295 REQUIRE(VALID_NMHANDLE(handle));
2296 REQUIRE(VALID_NMSOCK(handle->sock));
2297
2298 switch (handle->sock->type) {
2299 default:
2300 handle->sock->read_timeout = 0;
2301
2302 if (uv_is_active((uv_handle_t *)&handle->sock->read_timer)) {
2303 isc__nmsocket_timer_stop(handle->sock);
2304 }
2305 }
2306 }
2307
2308 void
isc_nmhandle_settimeout(isc_nmhandle_t * handle,uint32_t timeout)2309 isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
2310 REQUIRE(VALID_NMHANDLE(handle));
2311 REQUIRE(VALID_NMSOCK(handle->sock));
2312
2313 switch (handle->sock->type) {
2314 default:
2315 handle->sock->read_timeout = timeout;
2316 isc__nmsocket_timer_restart(handle->sock);
2317 }
2318 }
2319
2320 void
isc_nmhandle_keepalive(isc_nmhandle_t * handle,bool value)2321 isc_nmhandle_keepalive(isc_nmhandle_t *handle, bool value) {
2322 isc_nmsocket_t *sock = NULL;
2323
2324 REQUIRE(VALID_NMHANDLE(handle));
2325 REQUIRE(VALID_NMSOCK(handle->sock));
2326
2327 sock = handle->sock;
2328
2329 switch (sock->type) {
2330 case isc_nm_tcpsocket:
2331 case isc_nm_tcpdnssocket:
2332 atomic_store(&sock->keepalive, value);
2333 sock->read_timeout = value ? atomic_load(&sock->mgr->keepalive)
2334 : atomic_load(&sock->mgr->idle);
2335 sock->write_timeout = value ? atomic_load(&sock->mgr->keepalive)
2336 : atomic_load(&sock->mgr->idle);
2337 break;
2338 default:
2339 /*
2340 * For any other protocol, this is a no-op.
2341 */
2342 return;
2343 }
2344 }
2345
2346 void *
isc_nmhandle_getextra(isc_nmhandle_t * handle)2347 isc_nmhandle_getextra(isc_nmhandle_t *handle) {
2348 REQUIRE(VALID_NMHANDLE(handle));
2349
2350 return (handle->extra);
2351 }
2352
2353 isc_sockaddr_t
isc_nmhandle_peeraddr(isc_nmhandle_t * handle)2354 isc_nmhandle_peeraddr(isc_nmhandle_t *handle) {
2355 REQUIRE(VALID_NMHANDLE(handle));
2356
2357 return (handle->peer);
2358 }
2359
2360 isc_sockaddr_t
isc_nmhandle_localaddr(isc_nmhandle_t * handle)2361 isc_nmhandle_localaddr(isc_nmhandle_t *handle) {
2362 REQUIRE(VALID_NMHANDLE(handle));
2363
2364 return (handle->local);
2365 }
2366
2367 isc_nm_t *
isc_nmhandle_netmgr(isc_nmhandle_t * handle)2368 isc_nmhandle_netmgr(isc_nmhandle_t *handle) {
2369 REQUIRE(VALID_NMHANDLE(handle));
2370 REQUIRE(VALID_NMSOCK(handle->sock));
2371
2372 return (handle->sock->mgr);
2373 }
2374
2375 isc__nm_uvreq_t *
isc___nm_uvreq_get(isc_nm_t * mgr,isc_nmsocket_t * sock FLARG)2376 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
2377 isc__nm_uvreq_t *req = NULL;
2378
2379 REQUIRE(VALID_NM(mgr));
2380 REQUIRE(VALID_NMSOCK(sock));
2381
2382 if (sock != NULL && isc__nmsocket_active(sock)) {
2383 /* Try to reuse one */
2384 req = isc_astack_pop(sock->inactivereqs);
2385 }
2386
2387 if (req == NULL) {
2388 req = isc_mem_get(mgr->mctx, sizeof(*req));
2389 }
2390
2391 *req = (isc__nm_uvreq_t){ .magic = 0 };
2392 ISC_LINK_INIT(req, link);
2393 req->uv_req.req.data = req;
2394 isc___nmsocket_attach(sock, &req->sock FLARG_PASS);
2395 req->magic = UVREQ_MAGIC;
2396
2397 return (req);
2398 }
2399
2400 void
isc___nm_uvreq_put(isc__nm_uvreq_t ** req0,isc_nmsocket_t * sock FLARG)2401 isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) {
2402 isc__nm_uvreq_t *req = NULL;
2403 isc_nmhandle_t *handle = NULL;
2404
2405 REQUIRE(req0 != NULL);
2406 REQUIRE(VALID_UVREQ(*req0));
2407
2408 req = *req0;
2409 *req0 = NULL;
2410
2411 INSIST(sock == req->sock);
2412
2413 req->magic = 0;
2414
2415 /*
2416 * We need to save this first to make sure that handle,
2417 * sock, and the netmgr won't all disappear.
2418 */
2419 handle = req->handle;
2420 req->handle = NULL;
2421
2422 #if !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__
2423 if (!isc__nmsocket_active(sock) ||
2424 !isc_astack_trypush(sock->inactivereqs, req))
2425 {
2426 isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
2427 }
2428 #else /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
2429 isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
2430 #endif /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
2431
2432 if (handle != NULL) {
2433 isc__nmhandle_detach(&handle FLARG_PASS);
2434 }
2435
2436 isc___nmsocket_detach(&sock FLARG_PASS);
2437 }
2438
2439 void
isc_nm_send(isc_nmhandle_t * handle,isc_region_t * region,isc_nm_cb_t cb,void * cbarg)2440 isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb,
2441 void *cbarg) {
2442 REQUIRE(VALID_NMHANDLE(handle));
2443
2444 switch (handle->sock->type) {
2445 case isc_nm_udpsocket:
2446 case isc_nm_udplistener:
2447 isc__nm_udp_send(handle, region, cb, cbarg);
2448 break;
2449 case isc_nm_tcpsocket:
2450 isc__nm_tcp_send(handle, region, cb, cbarg);
2451 break;
2452 case isc_nm_tcpdnssocket:
2453 isc__nm_tcpdns_send(handle, region, cb, cbarg);
2454 break;
2455 default:
2456 UNREACHABLE();
2457 }
2458 }
2459
2460 void
isc_nm_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)2461 isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
2462 REQUIRE(VALID_NMHANDLE(handle));
2463
2464 /*
2465 * This is always called via callback (from accept or connect), and
2466 * caller must attach to the handle, so the references always need to be
2467 * at least 2.
2468 */
2469 REQUIRE(isc_refcount_current(&handle->references) >= 2);
2470
2471 switch (handle->sock->type) {
2472 case isc_nm_udpsocket:
2473 isc__nm_udp_read(handle, cb, cbarg);
2474 break;
2475 case isc_nm_tcpsocket:
2476 isc__nm_tcp_read(handle, cb, cbarg);
2477 break;
2478 case isc_nm_tcpdnssocket:
2479 isc__nm_tcpdns_read(handle, cb, cbarg);
2480 break;
2481 default:
2482 UNREACHABLE();
2483 }
2484 }
2485
2486 void
isc_nm_cancelread(isc_nmhandle_t * handle)2487 isc_nm_cancelread(isc_nmhandle_t *handle) {
2488 REQUIRE(VALID_NMHANDLE(handle));
2489
2490 switch (handle->sock->type) {
2491 case isc_nm_udpsocket:
2492 isc__nm_udp_cancelread(handle);
2493 break;
2494 case isc_nm_tcpsocket:
2495 isc__nm_tcp_cancelread(handle);
2496 break;
2497 case isc_nm_tcpdnssocket:
2498 isc__nm_tcpdns_cancelread(handle);
2499 break;
2500 default:
2501 UNREACHABLE();
2502 }
2503 }
2504
2505 void
isc_nm_pauseread(isc_nmhandle_t * handle)2506 isc_nm_pauseread(isc_nmhandle_t *handle) {
2507 REQUIRE(VALID_NMHANDLE(handle));
2508
2509 isc_nmsocket_t *sock = handle->sock;
2510
2511 switch (sock->type) {
2512 case isc_nm_tcpsocket:
2513 isc__nm_tcp_pauseread(handle);
2514 break;
2515 default:
2516 UNREACHABLE();
2517 }
2518 }
2519
2520 void
isc_nm_resumeread(isc_nmhandle_t * handle)2521 isc_nm_resumeread(isc_nmhandle_t *handle) {
2522 REQUIRE(VALID_NMHANDLE(handle));
2523
2524 isc_nmsocket_t *sock = handle->sock;
2525
2526 switch (sock->type) {
2527 case isc_nm_tcpsocket:
2528 isc__nm_tcp_resumeread(handle);
2529 break;
2530 default:
2531 UNREACHABLE();
2532 }
2533 }
2534
2535 void
isc_nm_stoplistening(isc_nmsocket_t * sock)2536 isc_nm_stoplistening(isc_nmsocket_t *sock) {
2537 REQUIRE(VALID_NMSOCK(sock));
2538
2539 switch (sock->type) {
2540 case isc_nm_udplistener:
2541 isc__nm_udp_stoplistening(sock);
2542 break;
2543 case isc_nm_tcpdnslistener:
2544 isc__nm_tcpdns_stoplistening(sock);
2545 break;
2546 case isc_nm_tcplistener:
2547 isc__nm_tcp_stoplistening(sock);
2548 break;
2549 default:
2550 UNREACHABLE();
2551 }
2552 }
2553
2554 void
isc__nm_connectcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2555 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2556 isc_result_t eresult, bool async) {
2557 REQUIRE(VALID_NMSOCK(sock));
2558 REQUIRE(VALID_UVREQ(uvreq));
2559 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2560
2561 if (!async) {
2562 isc__netievent_connectcb_t ievent = { .sock = sock,
2563 .req = uvreq,
2564 .result = eresult };
2565 isc__nm_async_connectcb(NULL, (isc__netievent_t *)&ievent);
2566 } else {
2567 isc__netievent_connectcb_t *ievent =
2568 isc__nm_get_netievent_connectcb(sock->mgr, sock, uvreq,
2569 eresult);
2570 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2571 (isc__netievent_t *)ievent);
2572 }
2573 }
2574
2575 void
isc__nm_async_connectcb(isc__networker_t * worker,isc__netievent_t * ev0)2576 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2577 isc__netievent_connectcb_t *ievent = (isc__netievent_connectcb_t *)ev0;
2578 isc_nmsocket_t *sock = ievent->sock;
2579 isc__nm_uvreq_t *uvreq = ievent->req;
2580 isc_result_t eresult = ievent->result;
2581
2582 UNUSED(worker);
2583
2584 REQUIRE(VALID_NMSOCK(sock));
2585 REQUIRE(VALID_UVREQ(uvreq));
2586 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2587 REQUIRE(ievent->sock->tid == isc_nm_tid());
2588 REQUIRE(uvreq->cb.connect != NULL);
2589
2590 uvreq->cb.connect(uvreq->handle, eresult, uvreq->cbarg);
2591
2592 isc__nm_uvreq_put(&uvreq, sock);
2593 }
2594
2595 void
isc__nm_readcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult)2596 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2597 isc_result_t eresult) {
2598 REQUIRE(VALID_NMSOCK(sock));
2599 REQUIRE(VALID_UVREQ(uvreq));
2600 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2601
2602 if (eresult == ISC_R_SUCCESS || eresult == ISC_R_TIMEDOUT) {
2603 isc__netievent_readcb_t ievent = { .sock = sock,
2604 .req = uvreq,
2605 .result = eresult };
2606
2607 isc__nm_async_readcb(NULL, (isc__netievent_t *)&ievent);
2608 } else {
2609 isc__netievent_readcb_t *ievent = isc__nm_get_netievent_readcb(
2610 sock->mgr, sock, uvreq, eresult);
2611 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2612 (isc__netievent_t *)ievent);
2613 }
2614 }
2615
2616 void
isc__nm_async_readcb(isc__networker_t * worker,isc__netievent_t * ev0)2617 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2618 isc__netievent_readcb_t *ievent = (isc__netievent_readcb_t *)ev0;
2619 isc_nmsocket_t *sock = ievent->sock;
2620 isc__nm_uvreq_t *uvreq = ievent->req;
2621 isc_result_t eresult = ievent->result;
2622 isc_region_t region;
2623
2624 UNUSED(worker);
2625
2626 REQUIRE(VALID_NMSOCK(sock));
2627 REQUIRE(VALID_UVREQ(uvreq));
2628 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2629 REQUIRE(sock->tid == isc_nm_tid());
2630
2631 region.base = (unsigned char *)uvreq->uvbuf.base;
2632 region.length = uvreq->uvbuf.len;
2633
2634 uvreq->cb.recv(uvreq->handle, eresult, ®ion, uvreq->cbarg);
2635
2636 isc__nm_uvreq_put(&uvreq, sock);
2637 }
2638
2639 void
isc__nm_sendcb(isc_nmsocket_t * sock,isc__nm_uvreq_t * uvreq,isc_result_t eresult,bool async)2640 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
2641 isc_result_t eresult, bool async) {
2642 REQUIRE(VALID_NMSOCK(sock));
2643 REQUIRE(VALID_UVREQ(uvreq));
2644 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2645
2646 if (!async) {
2647 isc__netievent_sendcb_t ievent = { .sock = sock,
2648 .req = uvreq,
2649 .result = eresult };
2650 isc__nm_async_sendcb(NULL, (isc__netievent_t *)&ievent);
2651 return;
2652 }
2653
2654 isc__netievent_sendcb_t *ievent =
2655 isc__nm_get_netievent_sendcb(sock->mgr, sock, uvreq, eresult);
2656 isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
2657 (isc__netievent_t *)ievent);
2658 }
2659
2660 void
isc__nm_async_sendcb(isc__networker_t * worker,isc__netievent_t * ev0)2661 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) {
2662 isc__netievent_sendcb_t *ievent = (isc__netievent_sendcb_t *)ev0;
2663 isc_nmsocket_t *sock = ievent->sock;
2664 isc__nm_uvreq_t *uvreq = ievent->req;
2665 isc_result_t eresult = ievent->result;
2666
2667 UNUSED(worker);
2668
2669 REQUIRE(VALID_NMSOCK(sock));
2670 REQUIRE(VALID_UVREQ(uvreq));
2671 REQUIRE(VALID_NMHANDLE(uvreq->handle));
2672 REQUIRE(sock->tid == isc_nm_tid());
2673
2674 uvreq->cb.send(uvreq->handle, eresult, uvreq->cbarg);
2675
2676 isc__nm_uvreq_put(&uvreq, sock);
2677 }
2678
2679 static void
isc__nm_async_close(isc__networker_t * worker,isc__netievent_t * ev0)2680 isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) {
2681 isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0;
2682 isc_nmsocket_t *sock = ievent->sock;
2683
2684 REQUIRE(VALID_NMSOCK(ievent->sock));
2685 REQUIRE(sock->tid == isc_nm_tid());
2686 REQUIRE(sock->closehandle_cb != NULL);
2687
2688 UNUSED(worker);
2689
2690 ievent->sock->closehandle_cb(sock);
2691 }
2692
2693 void
isc__nm_async_detach(isc__networker_t * worker,isc__netievent_t * ev0)2694 isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) {
2695 isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0;
2696 FLARG_IEVENT(ievent);
2697
2698 REQUIRE(VALID_NMSOCK(ievent->sock));
2699 REQUIRE(VALID_NMHANDLE(ievent->handle));
2700 REQUIRE(ievent->sock->tid == isc_nm_tid());
2701
2702 UNUSED(worker);
2703
2704 nmhandle_detach_cb(&ievent->handle FLARG_PASS);
2705 }
2706
2707 static void
reset_shutdown(uv_handle_t * handle)2708 reset_shutdown(uv_handle_t *handle) {
2709 isc_nmsocket_t *sock = uv_handle_get_data(handle);
2710
2711 isc__nmsocket_shutdown(sock);
2712 isc__nmsocket_detach(&sock);
2713 }
2714
2715 void
isc__nmsocket_reset(isc_nmsocket_t * sock)2716 isc__nmsocket_reset(isc_nmsocket_t *sock) {
2717 REQUIRE(VALID_NMSOCK(sock));
2718
2719 switch (sock->type) {
2720 case isc_nm_tcpsocket:
2721 case isc_nm_tcpdnssocket:
2722 /*
2723 * This can be called from the TCP write timeout.
2724 */
2725 REQUIRE(sock->parent == NULL);
2726 break;
2727 default:
2728 UNREACHABLE();
2729 break;
2730 }
2731
2732 if (!uv_is_closing(&sock->uv_handle.handle) &&
2733 uv_is_active(&sock->uv_handle.handle))
2734 {
2735 /*
2736 * The real shutdown will be handled in the respective
2737 * close functions.
2738 */
2739 isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
2740 int r = uv_tcp_close_reset(&sock->uv_handle.tcp,
2741 reset_shutdown);
2742 UV_RUNTIME_CHECK(uv_tcp_close_reset, r);
2743 } else {
2744 isc__nmsocket_shutdown(sock);
2745 }
2746 }
2747
2748 void
isc__nmsocket_shutdown(isc_nmsocket_t * sock)2749 isc__nmsocket_shutdown(isc_nmsocket_t *sock) {
2750 REQUIRE(VALID_NMSOCK(sock));
2751 switch (sock->type) {
2752 case isc_nm_udpsocket:
2753 isc__nm_udp_shutdown(sock);
2754 break;
2755 case isc_nm_tcpsocket:
2756 isc__nm_tcp_shutdown(sock);
2757 break;
2758 case isc_nm_tcpdnssocket:
2759 isc__nm_tcpdns_shutdown(sock);
2760 break;
2761 case isc_nm_udplistener:
2762 case isc_nm_tcplistener:
2763 case isc_nm_tcpdnslistener:
2764 return;
2765 default:
2766 UNREACHABLE();
2767 }
2768 }
2769
2770 static void
shutdown_walk_cb(uv_handle_t * handle,void * arg)2771 shutdown_walk_cb(uv_handle_t *handle, void *arg) {
2772 isc_nmsocket_t *sock = uv_handle_get_data(handle);
2773 UNUSED(arg);
2774
2775 if (uv_is_closing(handle)) {
2776 return;
2777 }
2778
2779 switch (handle->type) {
2780 case UV_UDP:
2781 isc__nmsocket_shutdown(sock);
2782 return;
2783 case UV_TCP:
2784 switch (sock->type) {
2785 case isc_nm_tcpsocket:
2786 case isc_nm_tcpdnssocket:
2787 if (sock->parent == NULL) {
2788 /* Reset the TCP connections on shutdown */
2789 isc__nmsocket_reset(sock);
2790 return;
2791 }
2792 FALLTHROUGH;
2793 default:
2794 isc__nmsocket_shutdown(sock);
2795 }
2796
2797 return;
2798 default:
2799 return;
2800 }
2801 }
2802
2803 void
isc__nm_async_shutdown(isc__networker_t * worker,isc__netievent_t * ev0)2804 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
2805 UNUSED(ev0);
2806
2807 uv_walk(&worker->loop, shutdown_walk_cb, NULL);
2808 }
2809
2810 bool
isc__nm_acquire_interlocked(isc_nm_t * mgr)2811 isc__nm_acquire_interlocked(isc_nm_t *mgr) {
2812 if (!isc__nm_in_netthread()) {
2813 return (false);
2814 }
2815
2816 LOCK(&mgr->lock);
2817 bool success = atomic_compare_exchange_strong(
2818 &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2819 isc_nm_tid());
2820
2821 UNLOCK(&mgr->lock);
2822 return (success);
2823 }
2824
2825 void
isc__nm_drop_interlocked(isc_nm_t * mgr)2826 isc__nm_drop_interlocked(isc_nm_t *mgr) {
2827 if (!isc__nm_in_netthread()) {
2828 return;
2829 }
2830
2831 LOCK(&mgr->lock);
2832 int tid = atomic_exchange(&mgr->interlocked,
2833 ISC_NETMGR_NON_INTERLOCKED);
2834 INSIST(tid != ISC_NETMGR_NON_INTERLOCKED);
2835 BROADCAST(&mgr->wkstatecond);
2836 UNLOCK(&mgr->lock);
2837 }
2838
2839 void
isc__nm_acquire_interlocked_force(isc_nm_t * mgr)2840 isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
2841 if (!isc__nm_in_netthread()) {
2842 return;
2843 }
2844
2845 LOCK(&mgr->lock);
2846 while (!atomic_compare_exchange_strong(
2847 &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
2848 isc_nm_tid()))
2849 {
2850 WAIT(&mgr->wkstatecond, &mgr->lock);
2851 }
2852 UNLOCK(&mgr->lock);
2853 }
2854
2855 void
isc_nm_setstats(isc_nm_t * mgr,isc_stats_t * stats)2856 isc_nm_setstats(isc_nm_t *mgr, isc_stats_t *stats) {
2857 REQUIRE(VALID_NM(mgr));
2858 REQUIRE(mgr->stats == NULL);
2859 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2860
2861 isc_stats_attach(stats, &mgr->stats);
2862 }
2863
2864 void
isc__nm_incstats(isc_nm_t * mgr,isc_statscounter_t counterid)2865 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2866 REQUIRE(VALID_NM(mgr));
2867 REQUIRE(counterid != -1);
2868
2869 if (mgr->stats != NULL) {
2870 isc_stats_increment(mgr->stats, counterid);
2871 }
2872 }
2873
2874 void
isc__nm_decstats(isc_nm_t * mgr,isc_statscounter_t counterid)2875 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
2876 REQUIRE(VALID_NM(mgr));
2877 REQUIRE(counterid != -1);
2878
2879 if (mgr->stats != NULL) {
2880 isc_stats_decrement(mgr->stats, counterid);
2881 }
2882 }
2883
2884 isc_result_t
isc__nm_socket(int domain,int type,int protocol,uv_os_sock_t * sockp)2885 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
2886 #ifdef WIN32
2887 SOCKET sock;
2888 sock = socket(domain, type, protocol);
2889 if (sock == INVALID_SOCKET) {
2890 char strbuf[ISC_STRERRORSIZE];
2891 DWORD socket_errno = WSAGetLastError();
2892 switch (socket_errno) {
2893 case WSAEMFILE:
2894 case WSAENOBUFS:
2895 return (ISC_R_NORESOURCES);
2896
2897 case WSAEPROTONOSUPPORT:
2898 case WSAEPFNOSUPPORT:
2899 case WSAEAFNOSUPPORT:
2900 return (ISC_R_FAMILYNOSUPPORT);
2901 default:
2902 strerror_r(socket_errno, strbuf, sizeof(strbuf));
2903 UNEXPECTED_ERROR(
2904 __FILE__, __LINE__,
2905 "socket() failed with error code %lu: %s",
2906 socket_errno, strbuf);
2907 return (ISC_R_UNEXPECTED);
2908 }
2909 }
2910 #else
2911 int sock = socket(domain, type, protocol);
2912 if (sock < 0) {
2913 return (isc_errno_toresult(errno));
2914 }
2915 #endif
2916 *sockp = (uv_os_sock_t)sock;
2917 return (ISC_R_SUCCESS);
2918 }
2919
2920 void
isc__nm_closesocket(uv_os_sock_t sock)2921 isc__nm_closesocket(uv_os_sock_t sock) {
2922 #ifdef WIN32
2923 closesocket(sock);
2924 #else
2925 close(sock);
2926 #endif
2927 }
2928
2929 #define setsockopt_on(socket, level, name) \
2930 setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
2931
2932 #define setsockopt_off(socket, level, name) \
2933 setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
2934
2935 isc_result_t
isc__nm_socket_freebind(uv_os_sock_t fd,sa_family_t sa_family)2936 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
2937 /*
2938 * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
2939 */
2940 #ifdef IP_FREEBIND
2941 UNUSED(sa_family);
2942 if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
2943 return (ISC_R_FAILURE);
2944 }
2945 return (ISC_R_SUCCESS);
2946 #elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
2947 if (sa_family == AF_INET) {
2948 #if defined(IP_BINDANY)
2949 if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
2950 return (ISC_R_FAILURE);
2951 }
2952 return (ISC_R_SUCCESS);
2953 #endif
2954 } else if (sa_family == AF_INET6) {
2955 #if defined(IPV6_BINDANY)
2956 if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
2957 return (ISC_R_FAILURE);
2958 }
2959 return (ISC_R_SUCCESS);
2960 #endif
2961 }
2962 return (ISC_R_NOTIMPLEMENTED);
2963 #elif defined(SO_BINDANY)
2964 UNUSED(sa_family);
2965 if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
2966 return (ISC_R_FAILURE);
2967 }
2968 return (ISC_R_SUCCESS);
2969 #else
2970 UNUSED(fd);
2971 UNUSED(sa_family);
2972 return (ISC_R_NOTIMPLEMENTED);
2973 #endif
2974 }
2975
2976 isc_result_t
isc__nm_socket_reuse(uv_os_sock_t fd)2977 isc__nm_socket_reuse(uv_os_sock_t fd) {
2978 /*
2979 * Generally, the SO_REUSEADDR socket option allows reuse of
2980 * local addresses.
2981 *
2982 * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
2983 * additional refinements for programs that use multicast.
2984 *
2985 * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
2986 * rather than steal it from the current listener, so we don't use it
2987 * here, but rather in isc__nm_socket_reuse_lb().
2988 *
2989 * On Windows, it also allows a socket to forcibly bind to a port in use
2990 * by another socket.
2991 */
2992
2993 #if defined(SO_REUSEPORT) && !defined(__linux__)
2994 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
2995 return (ISC_R_FAILURE);
2996 }
2997 return (ISC_R_SUCCESS);
2998 #elif defined(SO_REUSEADDR)
2999 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
3000 return (ISC_R_FAILURE);
3001 }
3002 return (ISC_R_SUCCESS);
3003 #else
3004 UNUSED(fd);
3005 return (ISC_R_NOTIMPLEMENTED);
3006 #endif
3007 }
3008
3009 isc_result_t
isc__nm_socket_reuse_lb(uv_os_sock_t fd)3010 isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
3011 /*
3012 * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
3013 * bound to an identical socket address. For UDP sockets, the use of
3014 * this option can provide better distribution of incoming datagrams to
3015 * multiple processes (or threads) as compared to the traditional
3016 * technique of having multiple processes compete to receive datagrams
3017 * on the same socket.
3018 *
3019 * On Linux, the same thing is achieved simply with SO_REUSEPORT.
3020 */
3021 #if defined(SO_REUSEPORT_LB)
3022 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
3023 return (ISC_R_FAILURE);
3024 } else {
3025 return (ISC_R_SUCCESS);
3026 }
3027 #elif defined(SO_REUSEPORT) && defined(__linux__)
3028 if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
3029 return (ISC_R_FAILURE);
3030 } else {
3031 return (ISC_R_SUCCESS);
3032 }
3033 #else
3034 UNUSED(fd);
3035 return (ISC_R_NOTIMPLEMENTED);
3036 #endif
3037 }
3038
3039 isc_result_t
isc__nm_socket_incoming_cpu(uv_os_sock_t fd)3040 isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
3041 #ifdef SO_INCOMING_CPU
3042 if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
3043 return (ISC_R_FAILURE);
3044 } else {
3045 return (ISC_R_SUCCESS);
3046 }
3047 #else
3048 UNUSED(fd);
3049 #endif
3050 return (ISC_R_NOTIMPLEMENTED);
3051 }
3052
3053 isc_result_t
isc__nm_socket_disable_pmtud(uv_os_sock_t fd,sa_family_t sa_family)3054 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
3055 /*
3056 * Disable the Path MTU Discovery on IP packets
3057 */
3058 if (sa_family == AF_INET6) {
3059 #if defined(IPV6_DONTFRAG)
3060 if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
3061 return (ISC_R_FAILURE);
3062 } else {
3063 return (ISC_R_SUCCESS);
3064 }
3065 #elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3066 if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
3067 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3068 {
3069 return (ISC_R_FAILURE);
3070 } else {
3071 return (ISC_R_SUCCESS);
3072 }
3073 #else
3074 UNUSED(fd);
3075 #endif
3076 } else if (sa_family == AF_INET) {
3077 #if defined(IP_DONTFRAG)
3078 if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
3079 return (ISC_R_FAILURE);
3080 } else {
3081 return (ISC_R_SUCCESS);
3082 }
3083 #elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
3084 if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
3085 &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
3086 {
3087 return (ISC_R_FAILURE);
3088 } else {
3089 return (ISC_R_SUCCESS);
3090 }
3091 #else
3092 UNUSED(fd);
3093 #endif
3094 } else {
3095 return (ISC_R_FAMILYNOSUPPORT);
3096 }
3097
3098 return (ISC_R_NOTIMPLEMENTED);
3099 }
3100
3101 #if defined(_WIN32)
3102 #define TIMEOUT_TYPE DWORD
3103 #define TIMEOUT_DIV 1000
3104 #define TIMEOUT_OPTNAME TCP_MAXRT
3105 #elif defined(TCP_CONNECTIONTIMEOUT)
3106 #define TIMEOUT_TYPE int
3107 #define TIMEOUT_DIV 1000
3108 #define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT
3109 #elif defined(TCP_RXT_CONNDROPTIME)
3110 #define TIMEOUT_TYPE int
3111 #define TIMEOUT_DIV 1000
3112 #define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME
3113 #elif defined(TCP_USER_TIMEOUT)
3114 #define TIMEOUT_TYPE unsigned int
3115 #define TIMEOUT_DIV 1
3116 #define TIMEOUT_OPTNAME TCP_USER_TIMEOUT
3117 #elif defined(TCP_KEEPINIT)
3118 #define TIMEOUT_TYPE int
3119 #define TIMEOUT_DIV 1000
3120 #define TIMEOUT_OPTNAME TCP_KEEPINIT
3121 #endif
3122
3123 isc_result_t
isc__nm_socket_connectiontimeout(uv_os_sock_t fd,int timeout_ms)3124 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
3125 #if defined(TIMEOUT_OPTNAME)
3126 TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
3127
3128 if (timeout == 0) {
3129 timeout = 1;
3130 }
3131
3132 if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
3133 sizeof(timeout)) == -1)
3134 {
3135 return (ISC_R_FAILURE);
3136 }
3137
3138 return (ISC_R_SUCCESS);
3139 #else
3140 UNUSED(fd);
3141 UNUSED(timeout_ms);
3142
3143 return (ISC_R_SUCCESS);
3144 #endif
3145 }
3146
3147 isc_result_t
isc__nm_socket_tcp_nodelay(uv_os_sock_t fd)3148 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) {
3149 #ifdef TCP_NODELAY
3150 if (setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY) == -1) {
3151 return (ISC_R_FAILURE);
3152 } else {
3153 return (ISC_R_SUCCESS);
3154 }
3155 #else
3156 UNUSED(fd);
3157 return (ISC_R_SUCCESS);
3158 #endif
3159 }
3160
3161 static isc_threadresult_t
isc__nm_work_run(isc_threadarg_t arg)3162 isc__nm_work_run(isc_threadarg_t arg) {
3163 isc__nm_work_t *work = (isc__nm_work_t *)arg;
3164
3165 work->cb(work->data);
3166
3167 return ((isc_threadresult_t)0);
3168 }
3169
3170 static void
isc__nm_work_cb(uv_work_t * req)3171 isc__nm_work_cb(uv_work_t *req) {
3172 isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3173
3174 if (isc_tid_v == SIZE_MAX) {
3175 isc__trampoline_t *trampoline_arg =
3176 isc__trampoline_get(isc__nm_work_run, work);
3177 (void)isc__trampoline_run(trampoline_arg);
3178 } else {
3179 (void)isc__nm_work_run((isc_threadarg_t)work);
3180 }
3181 }
3182
3183 static void
isc__nm_after_work_cb(uv_work_t * req,int status)3184 isc__nm_after_work_cb(uv_work_t *req, int status) {
3185 isc_result_t result = ISC_R_SUCCESS;
3186 isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
3187 isc_nm_t *netmgr = work->netmgr;
3188
3189 if (status != 0) {
3190 result = isc__nm_uverr2result(status);
3191 }
3192
3193 work->after_cb(work->data, result);
3194
3195 isc_mem_put(netmgr->mctx, work, sizeof(*work));
3196
3197 isc_nm_detach(&netmgr);
3198 }
3199
3200 void
isc_nm_work_offload(isc_nm_t * netmgr,isc_nm_workcb_t work_cb,isc_nm_after_workcb_t after_work_cb,void * data)3201 isc_nm_work_offload(isc_nm_t *netmgr, isc_nm_workcb_t work_cb,
3202 isc_nm_after_workcb_t after_work_cb, void *data) {
3203 isc__networker_t *worker = NULL;
3204 isc__nm_work_t *work = NULL;
3205 int r;
3206
3207 REQUIRE(isc__nm_in_netthread());
3208 REQUIRE(VALID_NM(netmgr));
3209
3210 worker = &netmgr->workers[isc_nm_tid()];
3211
3212 work = isc_mem_get(netmgr->mctx, sizeof(*work));
3213 *work = (isc__nm_work_t){
3214 .cb = work_cb,
3215 .after_cb = after_work_cb,
3216 .data = data,
3217 };
3218
3219 isc_nm_attach(netmgr, &work->netmgr);
3220
3221 uv_req_set_data((uv_req_t *)&work->req, work);
3222
3223 r = uv_queue_work(&worker->loop, &work->req, isc__nm_work_cb,
3224 isc__nm_after_work_cb);
3225 UV_RUNTIME_CHECK(uv_queue_work, r);
3226 }
3227
3228 void
isc_nm_timer_create(isc_nmhandle_t * handle,isc_nm_timer_cb cb,void * cbarg,isc_nm_timer_t ** timerp)3229 isc_nm_timer_create(isc_nmhandle_t *handle, isc_nm_timer_cb cb, void *cbarg,
3230 isc_nm_timer_t **timerp) {
3231 isc__networker_t *worker = NULL;
3232 isc_nmsocket_t *sock = NULL;
3233 isc_nm_timer_t *timer = NULL;
3234 int r;
3235
3236 REQUIRE(isc__nm_in_netthread());
3237 REQUIRE(VALID_NMHANDLE(handle));
3238 REQUIRE(VALID_NMSOCK(handle->sock));
3239
3240 sock = handle->sock;
3241 worker = &sock->mgr->workers[isc_nm_tid()];
3242
3243 timer = isc_mem_get(sock->mgr->mctx, sizeof(*timer));
3244 *timer = (isc_nm_timer_t){ .cb = cb, .cbarg = cbarg };
3245 isc_refcount_init(&timer->references, 1);
3246 isc_nmhandle_attach(handle, &timer->handle);
3247
3248 r = uv_timer_init(&worker->loop, &timer->timer);
3249 UV_RUNTIME_CHECK(uv_timer_init, r);
3250
3251 uv_handle_set_data((uv_handle_t *)&timer->timer, timer);
3252
3253 *timerp = timer;
3254 }
3255
3256 void
isc_nm_timer_attach(isc_nm_timer_t * timer,isc_nm_timer_t ** timerp)3257 isc_nm_timer_attach(isc_nm_timer_t *timer, isc_nm_timer_t **timerp) {
3258 REQUIRE(timer != NULL);
3259 REQUIRE(timerp != NULL && *timerp == NULL);
3260
3261 isc_refcount_increment(&timer->references);
3262 *timerp = timer;
3263 }
3264
3265 static void
timer_destroy(uv_handle_t * uvhandle)3266 timer_destroy(uv_handle_t *uvhandle) {
3267 isc_nm_timer_t *timer = uv_handle_get_data(uvhandle);
3268 isc_nmhandle_t *handle = timer->handle;
3269 isc_mem_t *mctx = timer->handle->sock->mgr->mctx;
3270
3271 isc_mem_put(mctx, timer, sizeof(*timer));
3272
3273 isc_nmhandle_detach(&handle);
3274 }
3275
3276 void
isc_nm_timer_detach(isc_nm_timer_t ** timerp)3277 isc_nm_timer_detach(isc_nm_timer_t **timerp) {
3278 isc_nm_timer_t *timer = NULL;
3279 isc_nmhandle_t *handle = NULL;
3280
3281 REQUIRE(timerp != NULL && *timerp != NULL);
3282
3283 timer = *timerp;
3284 *timerp = NULL;
3285
3286 handle = timer->handle;
3287
3288 REQUIRE(isc__nm_in_netthread());
3289 REQUIRE(VALID_NMHANDLE(handle));
3290 REQUIRE(VALID_NMSOCK(handle->sock));
3291
3292 if (isc_refcount_decrement(&timer->references) == 1) {
3293 int r = uv_timer_stop(&timer->timer);
3294 UV_RUNTIME_CHECK(uv_timer_stop, r);
3295 uv_close((uv_handle_t *)&timer->timer, timer_destroy);
3296 }
3297 }
3298
3299 static void
timer_cb(uv_timer_t * uvtimer)3300 timer_cb(uv_timer_t *uvtimer) {
3301 isc_nm_timer_t *timer = uv_handle_get_data((uv_handle_t *)uvtimer);
3302
3303 REQUIRE(timer->cb != NULL);
3304
3305 timer->cb(timer->cbarg, ISC_R_TIMEDOUT);
3306 }
3307
3308 void
isc_nm_timer_start(isc_nm_timer_t * timer,uint64_t timeout)3309 isc_nm_timer_start(isc_nm_timer_t *timer, uint64_t timeout) {
3310 int r = uv_timer_start(&timer->timer, timer_cb, timeout, 0);
3311 UV_RUNTIME_CHECK(uv_timer_start, r);
3312 }
3313
3314 void
isc_nm_timer_stop(isc_nm_timer_t * timer)3315 isc_nm_timer_stop(isc_nm_timer_t *timer) {
3316 int r = uv_timer_stop(&timer->timer);
3317 UV_RUNTIME_CHECK(uv_timer_stop, r);
3318 }
3319
3320 #ifdef NETMGR_TRACE
3321 /*
3322 * Dump all active sockets in netmgr. We output to stderr
3323 * as the logger might be already shut down.
3324 */
3325
3326 static const char *
nmsocket_type_totext(isc_nmsocket_type type)3327 nmsocket_type_totext(isc_nmsocket_type type) {
3328 switch (type) {
3329 case isc_nm_udpsocket:
3330 return ("isc_nm_udpsocket");
3331 case isc_nm_udplistener:
3332 return ("isc_nm_udplistener");
3333 case isc_nm_tcpsocket:
3334 return ("isc_nm_tcpsocket");
3335 case isc_nm_tcplistener:
3336 return ("isc_nm_tcplistener");
3337 case isc_nm_tcpdnslistener:
3338 return ("isc_nm_tcpdnslistener");
3339 case isc_nm_tcpdnssocket:
3340 return ("isc_nm_tcpdnssocket");
3341 default:
3342 UNREACHABLE();
3343 }
3344 }
3345
3346 static void
nmhandle_dump(isc_nmhandle_t * handle)3347 nmhandle_dump(isc_nmhandle_t *handle) {
3348 fprintf(stderr, "Active handle %p, refs %" PRIuFAST32 "\n", handle,
3349 isc_refcount_current(&handle->references));
3350 fprintf(stderr, "Created by:\n");
3351 backtrace_symbols_fd(handle->backtrace, handle->backtrace_size,
3352 STDERR_FILENO);
3353 fprintf(stderr, "\n\n");
3354 }
3355
3356 static void
nmsocket_dump(isc_nmsocket_t * sock)3357 nmsocket_dump(isc_nmsocket_t *sock) {
3358 isc_nmhandle_t *handle = NULL;
3359
3360 LOCK(&sock->lock);
3361 fprintf(stderr, "\n=================\n");
3362 fprintf(stderr, "Active %s socket %p, type %s, refs %" PRIuFAST32 "\n",
3363 atomic_load(&sock->client) ? "client" : "server", sock,
3364 nmsocket_type_totext(sock->type),
3365 isc_refcount_current(&sock->references));
3366 fprintf(stderr,
3367 "Parent %p, listener %p, server %p, statichandle = "
3368 "%p\n",
3369 sock->parent, sock->listener, sock->server, sock->statichandle);
3370 fprintf(stderr, "Flags:%s%s%s%s%s\n",
3371 atomic_load(&sock->active) ? " active" : "",
3372 atomic_load(&sock->closing) ? " closing" : "",
3373 atomic_load(&sock->destroying) ? " destroying" : "",
3374 atomic_load(&sock->connecting) ? " connecting" : "",
3375 sock->accepting ? " accepting" : "");
3376 fprintf(stderr, "Created by:\n");
3377 backtrace_symbols_fd(sock->backtrace, sock->backtrace_size,
3378 STDERR_FILENO);
3379 fprintf(stderr, "\n");
3380
3381 for (handle = ISC_LIST_HEAD(sock->active_handles); handle != NULL;
3382 handle = ISC_LIST_NEXT(handle, active_link))
3383 {
3384 static bool first = true;
3385 if (first) {
3386 fprintf(stderr, "Active handles:\n");
3387 first = false;
3388 }
3389 nmhandle_dump(handle);
3390 }
3391
3392 fprintf(stderr, "\n");
3393 UNLOCK(&sock->lock);
3394 }
3395
3396 void
isc__nm_dump_active(isc_nm_t * nm)3397 isc__nm_dump_active(isc_nm_t *nm) {
3398 isc_nmsocket_t *sock = NULL;
3399
3400 REQUIRE(VALID_NM(nm));
3401
3402 LOCK(&nm->lock);
3403 for (sock = ISC_LIST_HEAD(nm->active_sockets); sock != NULL;
3404 sock = ISC_LIST_NEXT(sock, active_link))
3405 {
3406 static bool first = true;
3407 if (first) {
3408 fprintf(stderr, "Outstanding sockets\n");
3409 first = false;
3410 }
3411 nmsocket_dump(sock);
3412 }
3413 UNLOCK(&nm->lock);
3414 }
3415 #endif
3416