xref: /netbsd-src/external/mpl/dhcp/bind/dist/lib/isc/netmgr/netmgr-int.h (revision 4afad4b7fa6d4a0d3dedf41d1587a7250710ae54)
1 /*	$NetBSD: netmgr-int.h,v 1.1 2024/02/18 20:57:55 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 #pragma once
17 
18 #include <unistd.h>
19 #include <uv.h>
20 
21 #include <openssl/err.h>
22 #include <openssl/ssl.h>
23 
24 #include <isc/astack.h>
25 #include <isc/atomic.h>
26 #include <isc/barrier.h>
27 #include <isc/buffer.h>
28 #include <isc/condition.h>
29 #include <isc/magic.h>
30 #include <isc/mem.h>
31 #include <isc/netmgr.h>
32 #include <isc/quota.h>
33 #include <isc/random.h>
34 #include <isc/refcount.h>
35 #include <isc/region.h>
36 #include <isc/result.h>
37 #include <isc/rwlock.h>
38 #include <isc/sockaddr.h>
39 #include <isc/stats.h>
40 #include <isc/thread.h>
41 #include <isc/util.h>
42 
43 #include "uv-compat.h"
44 
45 #define ISC_NETMGR_TID_UNKNOWN -1
46 
47 /* Must be different from ISC_NETMGR_TID_UNKNOWN */
48 #define ISC_NETMGR_NON_INTERLOCKED -2
49 
50 /*
51  * Receive buffers
52  */
53 #if HAVE_DECL_UV_UDP_MMSG_CHUNK
54 /*
55  * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
56  * libuv will not receive more that 20 datagrams in a single recvmmsg call.
57  */
58 #define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
59 #else
60 /*
61  * A single DNS message size
62  */
63 #define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
64 #endif
65 
66 /*
67  * The TCP receive buffer can fit one maximum sized DNS message plus its size,
68  * the receive buffer here affects TCP, DoT and DoH.
69  */
70 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
71 
72 /* Pick the larger buffer */
73 #define ISC_NETMGR_RECVBUF_SIZE                                     \
74 	(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
75 		 ? ISC_NETMGR_UDP_RECVBUF_SIZE                      \
76 		 : ISC_NETMGR_TCP_RECVBUF_SIZE)
77 
78 /*
79  * Send buffer
80  */
81 #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
82 
83 /*%
84  * Regular TCP buffer size.
85  */
86 #define NM_REG_BUF 4096
87 
88 /*%
89  * Larger buffer for when the regular one isn't enough; this will
90  * hold two full DNS packets with lengths.  netmgr receives 64k at
91  * most in TCPDNS connections, so there's no risk of overrun
92  * when using a buffer this size.
93  */
94 #define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
95 
96 /*
97  * Define NETMGR_TRACE to activate tracing of handles and sockets.
98  * This will impair performance but enables us to quickly determine,
99  * if netmgr resources haven't been cleaned up on shutdown, which ones
100  * are still in use.
101  */
102 #ifdef NETMGR_TRACE
103 #define TRACE_SIZE 8
104 
105 void
106 isc__nm_dump_active(isc_nm_t *nm);
107 
108 #if defined(__linux__)
109 #include <syscall.h>
110 #define gettid() (uint32_t) syscall(SYS_gettid)
111 #elif defined(_WIN32)
112 #define gettid() (uint32_t) GetCurrentThreadId()
113 #else
114 #define gettid() (uint32_t) pthread_self()
115 #endif
116 
117 #ifdef NETMGR_TRACE_VERBOSE
118 #define NETMGR_TRACE_LOG(format, ...)                                \
119 	fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \
120 		isc_nm_tid(), file, line, func, __VA_ARGS__)
121 #else
122 #define NETMGR_TRACE_LOG(format, ...) \
123 	(void)file;                   \
124 	(void)line;                   \
125 	(void)func;
126 #endif
127 
128 #define FLARG_PASS , file, line, func
129 #define FLARG                                              \
130 	, const char *file __attribute__((unused)),        \
131 		unsigned int line __attribute__((unused)), \
132 		const char *func __attribute__((unused))
133 #define FLARG_IEVENT(ievent)              \
134 	const char *file = ievent->file;  \
135 	unsigned int line = ievent->line; \
136 	const char *func = ievent->func;
137 #define FLARG_IEVENT_PASS(ievent) \
138 	ievent->file = file;      \
139 	ievent->line = line;      \
140 	ievent->func = func;
141 #define isc__nm_uvreq_get(req, sock) \
142 	isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__)
143 #define isc__nm_uvreq_put(req, sock) \
144 	isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__)
145 #define isc__nmsocket_init(sock, mgr, type, iface)                      \
146 	isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \
147 			    __func__)
148 #define isc__nmsocket_put(sockp) \
149 	isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__)
150 #define isc__nmsocket_attach(sock, target) \
151 	isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__)
152 #define isc__nmsocket_detach(socketp) \
153 	isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__)
154 #define isc__nmsocket_close(socketp) \
155 	isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__)
156 #define isc__nmhandle_get(sock, peer, local) \
157 	isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__)
158 #define isc__nmsocket_prep_destroy(sock) \
159 	isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__)
160 #else
161 #define NETMGR_TRACE_LOG(format, ...)
162 
163 #define FLARG_PASS
164 #define FLARG
165 #define FLARG_IEVENT(ievent)
166 #define FLARG_IEVENT_PASS(ievent)
167 #define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock)
168 #define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock)
169 #define isc__nmsocket_init(sock, mgr, type, iface) \
170 	isc___nmsocket_init(sock, mgr, type, iface)
171 #define isc__nmsocket_put(sockp)	   isc___nmsocket_put(sockp)
172 #define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target)
173 #define isc__nmsocket_detach(socketp)	   isc___nmsocket_detach(socketp)
174 #define isc__nmsocket_close(socketp)	   isc___nmsocket_close(socketp)
175 #define isc__nmhandle_get(sock, peer, local) \
176 	isc___nmhandle_get(sock, peer, local)
177 #define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock)
178 #endif
179 
180 /*
181  * Queue types in the order of processing priority.
182  */
183 typedef enum {
184 	NETIEVENT_PRIORITY = 0,
185 	NETIEVENT_PRIVILEGED = 1,
186 	NETIEVENT_TASK = 2,
187 	NETIEVENT_NORMAL = 3,
188 	NETIEVENT_MAX = 4,
189 } netievent_type_t;
190 
191 typedef struct isc__nm_uvreq isc__nm_uvreq_t;
192 typedef struct isc__netievent isc__netievent_t;
193 
194 typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t;
195 
196 typedef struct ievent {
197 	isc_mutex_t lock;
198 	isc_condition_t cond;
199 	isc__netievent_list_t list;
200 } ievent_t;
201 
202 /*
203  * Single network event loop worker.
204  */
205 typedef struct isc__networker {
206 	isc_nm_t *mgr;
207 	int id;		  /* thread id */
208 	uv_loop_t loop;	  /* libuv loop structure */
209 	uv_async_t async; /* async channel to send
210 			   * data to this networker */
211 	bool paused;
212 	bool finished;
213 	isc_thread_t thread;
214 	ievent_t ievents[NETIEVENT_MAX];
215 
216 	isc_refcount_t references;
217 	atomic_int_fast64_t pktcount;
218 	char *recvbuf;
219 	char *sendbuf;
220 	bool recvbuf_inuse;
221 } isc__networker_t;
222 
223 /*
224  * A general handle for a connection bound to a networker.  For UDP
225  * connections we have peer address here, so both TCP and UDP can be
226  * handled with a simple send-like function
227  */
228 #define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
229 #define VALID_NMHANDLE(t)                      \
230 	(ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \
231 	 atomic_load(&(t)->references) > 0)
232 
233 typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
234 
235 struct isc_nmhandle {
236 	int magic;
237 	isc_refcount_t references;
238 
239 	/*
240 	 * The socket is not 'attached' in the traditional
241 	 * reference-counting sense. Instead, we keep all handles in an
242 	 * array in the socket object.  This way, we don't have circular
243 	 * dependencies and we can close all handles when we're destroying
244 	 * the socket.
245 	 */
246 	isc_nmsocket_t *sock;
247 
248 	isc_sockaddr_t peer;
249 	isc_sockaddr_t local;
250 	isc_nm_opaquecb_t doreset; /* reset extra callback, external */
251 	isc_nm_opaquecb_t dofree;  /* free extra callback, external */
252 #ifdef NETMGR_TRACE
253 	void *backtrace[TRACE_SIZE];
254 	int backtrace_size;
255 	LINK(isc_nmhandle_t) active_link;
256 #endif
257 	void *opaque;
258 	char extra[];
259 };
260 
261 typedef enum isc__netievent_type {
262 	netievent_udpconnect,
263 	netievent_udpclose,
264 	netievent_udpsend,
265 	netievent_udpread,
266 	netievent_udpcancel,
267 
268 	netievent_tcpconnect,
269 	netievent_tcpclose,
270 	netievent_tcpsend,
271 	netievent_tcpstartread,
272 	netievent_tcppauseread,
273 	netievent_tcpaccept,
274 	netievent_tcpcancel,
275 
276 	netievent_tcpdnsaccept,
277 	netievent_tcpdnsconnect,
278 	netievent_tcpdnsclose,
279 	netievent_tcpdnssend,
280 	netievent_tcpdnsread,
281 	netievent_tcpdnscancel,
282 
283 	netievent_shutdown,
284 	netievent_stop,
285 	netievent_pause,
286 
287 	netievent_connectcb,
288 	netievent_readcb,
289 	netievent_sendcb,
290 
291 	netievent_task,
292 	netievent_privilegedtask,
293 
294 	/*
295 	 * event type values higher than this will be treated
296 	 * as high-priority events, which can be processed
297 	 * while the netmgr is pausing or paused.
298 	 */
299 	netievent_prio = 0xff,
300 
301 	netievent_udplisten,
302 	netievent_udpstop,
303 	netievent_tcplisten,
304 	netievent_tcpstop,
305 	netievent_tcpdnslisten,
306 	netievent_tcpdnsstop,
307 
308 	netievent_resume,
309 	netievent_detach,
310 	netievent_close,
311 } isc__netievent_type;
312 
313 typedef union {
314 	isc_nm_recv_cb_t recv;
315 	isc_nm_cb_t send;
316 	isc_nm_cb_t connect;
317 	isc_nm_accept_cb_t accept;
318 } isc__nm_cb_t;
319 
320 /*
321  * Wrapper around uv_req_t with 'our' fields in it.  req->data should
322  * always point to its parent.  Note that we always allocate more than
323  * sizeof(struct) because we make room for different req types;
324  */
325 #define UVREQ_MAGIC    ISC_MAGIC('N', 'M', 'U', 'R')
326 #define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
327 
328 struct isc__nm_uvreq {
329 	int magic;
330 	isc_nmsocket_t *sock;
331 	isc_nmhandle_t *handle;
332 	char tcplen[2];	       /* The TCP DNS message length */
333 	uv_buf_t uvbuf;	       /* translated isc_region_t, to be
334 				* sent or received */
335 	isc_sockaddr_t local;  /* local address */
336 	isc_sockaddr_t peer;   /* peer address */
337 	isc__nm_cb_t cb;       /* callback */
338 	void *cbarg;	       /* callback argument */
339 	isc_nm_timer_t *timer; /* TCP write timer */
340 
341 	union {
342 		uv_handle_t handle;
343 		uv_req_t req;
344 		uv_getaddrinfo_t getaddrinfo;
345 		uv_getnameinfo_t getnameinfo;
346 		uv_shutdown_t shutdown;
347 		uv_write_t write;
348 		uv_connect_t connect;
349 		uv_udp_send_t udp_send;
350 		uv_fs_t fs;
351 		uv_work_t work;
352 	} uv_req;
353 	ISC_LINK(isc__nm_uvreq_t) link;
354 };
355 
356 struct isc_nm_timer {
357 	isc_refcount_t references;
358 	uv_timer_t timer;
359 	isc_nmhandle_t *handle;
360 	isc_nm_timer_cb cb;
361 	void *cbarg;
362 };
363 
364 void *
365 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type);
366 /*%<
367  * Allocate an ievent and set the type.
368  */
369 void
370 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent);
371 
372 /*
373  * The macros here are used to simulate the "inheritance" in C, there's the base
374  * netievent structure that contains just its own type and socket, and there are
375  * extended netievent types that also have handles or requests or other data.
376  *
377  * The macros here ensure that:
378  *
379  *   1. every netievent type has matching definition, declaration and
380  *      implementation
381  *
382  *   2. we handle all the netievent types of same subclass the same, e.g. if the
383  *      extended netievent contains handle, we always attach to the handle in
384  *      the ctor and detach from the handle in dtor.
385  *
386  * There are three macros here for each netievent subclass:
387  *
388  *   1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in
389  *   this header
390  *
391  *   2. NETIEVENT_*_DECL(type) generates the declaration of the get and put
392  *      functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used
393  *      below in this header
394  *
395  *   3. NETIEVENT_*_DEF(type) generates the definition of the functions; used
396  *   either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.)
397  */
398 
399 #define NETIEVENT__SOCKET                \
400 	isc__netievent_type type;        \
401 	ISC_LINK(isc__netievent_t) link; \
402 	isc_nmsocket_t *sock;            \
403 	const char *file;                \
404 	unsigned int line;               \
405 	const char *func
406 
407 typedef struct isc__netievent__socket {
408 	NETIEVENT__SOCKET;
409 } isc__netievent__socket_t;
410 
411 #define NETIEVENT_SOCKET_TYPE(type) \
412 	typedef isc__netievent__socket_t isc__netievent_##type##_t
413 
414 #define NETIEVENT_SOCKET_DECL(type)                              \
415 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
416 		isc_nm_t *nm, isc_nmsocket_t *sock);             \
417 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
418 					  isc__netievent_##type##_t *ievent)
419 
420 #define NETIEVENT_SOCKET_DEF(type)                                             \
421 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
422 		isc_nm_t *nm, isc_nmsocket_t *sock) {                          \
423 		isc__netievent_##type##_t *ievent =                            \
424 			isc__nm_get_netievent(nm, netievent_##type);           \
425 		isc__nmsocket_attach(sock, &ievent->sock);                     \
426                                                                                \
427 		return (ievent);                                               \
428 	}                                                                      \
429                                                                                \
430 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
431 					  isc__netievent_##type##_t *ievent) { \
432 		isc__nmsocket_detach(&ievent->sock);                           \
433 		isc__nm_put_netievent(nm, ievent);                             \
434 	}
435 
436 typedef struct isc__netievent__socket_req {
437 	NETIEVENT__SOCKET;
438 	isc__nm_uvreq_t *req;
439 } isc__netievent__socket_req_t;
440 
441 #define NETIEVENT_SOCKET_REQ_TYPE(type) \
442 	typedef isc__netievent__socket_req_t isc__netievent_##type##_t
443 
444 #define NETIEVENT_SOCKET_REQ_DECL(type)                                    \
445 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(           \
446 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \
447 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                    \
448 					  isc__netievent_##type##_t *ievent)
449 
450 #define NETIEVENT_SOCKET_REQ_DEF(type)                                         \
451 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
452 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {    \
453 		isc__netievent_##type##_t *ievent =                            \
454 			isc__nm_get_netievent(nm, netievent_##type);           \
455 		isc__nmsocket_attach(sock, &ievent->sock);                     \
456 		ievent->req = req;                                             \
457                                                                                \
458 		return (ievent);                                               \
459 	}                                                                      \
460                                                                                \
461 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
462 					  isc__netievent_##type##_t *ievent) { \
463 		isc__nmsocket_detach(&ievent->sock);                           \
464 		isc__nm_put_netievent(nm, ievent);                             \
465 	}
466 
467 typedef struct isc__netievent__socket_req_result {
468 	NETIEVENT__SOCKET;
469 	isc__nm_uvreq_t *req;
470 	isc_result_t result;
471 } isc__netievent__socket_req_result_t;
472 
473 #define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \
474 	typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t
475 
476 #define NETIEVENT_SOCKET_REQ_RESULT_DECL(type)                            \
477 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(          \
478 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
479 		isc_result_t result);                                     \
480 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                   \
481 					  isc__netievent_##type##_t *ievent)
482 
483 #define NETIEVENT_SOCKET_REQ_RESULT_DEF(type)                                  \
484 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
485 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req,      \
486 		isc_result_t result) {                                         \
487 		isc__netievent_##type##_t *ievent =                            \
488 			isc__nm_get_netievent(nm, netievent_##type);           \
489 		isc__nmsocket_attach(sock, &ievent->sock);                     \
490 		ievent->req = req;                                             \
491 		ievent->result = result;                                       \
492                                                                                \
493 		return (ievent);                                               \
494 	}                                                                      \
495                                                                                \
496 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
497 					  isc__netievent_##type##_t *ievent) { \
498 		isc__nmsocket_detach(&ievent->sock);                           \
499 		isc__nm_put_netievent(nm, ievent);                             \
500 	}
501 
502 typedef struct isc__netievent__socket_handle {
503 	NETIEVENT__SOCKET;
504 	isc_nmhandle_t *handle;
505 } isc__netievent__socket_handle_t;
506 
507 #define NETIEVENT_SOCKET_HANDLE_TYPE(type) \
508 	typedef isc__netievent__socket_handle_t isc__netievent_##type##_t
509 
510 #define NETIEVENT_SOCKET_HANDLE_DECL(type)                                   \
511 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(             \
512 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \
513 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                      \
514 					  isc__netievent_##type##_t *ievent)
515 
516 #define NETIEVENT_SOCKET_HANDLE_DEF(type)                                      \
517 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
518 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) {  \
519 		isc__netievent_##type##_t *ievent =                            \
520 			isc__nm_get_netievent(nm, netievent_##type);           \
521 		isc__nmsocket_attach(sock, &ievent->sock);                     \
522 		isc_nmhandle_attach(handle, &ievent->handle);                  \
523                                                                                \
524 		return (ievent);                                               \
525 	}                                                                      \
526                                                                                \
527 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
528 					  isc__netievent_##type##_t *ievent) { \
529 		isc__nmsocket_detach(&ievent->sock);                           \
530 		isc_nmhandle_detach(&ievent->handle);                          \
531 		isc__nm_put_netievent(nm, ievent);                             \
532 	}
533 
534 typedef struct isc__netievent__socket_quota {
535 	NETIEVENT__SOCKET;
536 	isc_quota_t *quota;
537 } isc__netievent__socket_quota_t;
538 
539 #define NETIEVENT_SOCKET_QUOTA_TYPE(type) \
540 	typedef isc__netievent__socket_quota_t isc__netievent_##type##_t
541 
542 #define NETIEVENT_SOCKET_QUOTA_DECL(type)                                \
543 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(         \
544 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \
545 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                  \
546 					  isc__netievent_##type##_t *ievent)
547 
548 #define NETIEVENT_SOCKET_QUOTA_DEF(type)                                       \
549 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
550 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) {      \
551 		isc__netievent_##type##_t *ievent =                            \
552 			isc__nm_get_netievent(nm, netievent_##type);           \
553 		isc__nmsocket_attach(sock, &ievent->sock);                     \
554 		ievent->quota = quota;                                         \
555                                                                                \
556 		return (ievent);                                               \
557 	}                                                                      \
558                                                                                \
559 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
560 					  isc__netievent_##type##_t *ievent) { \
561 		isc__nmsocket_detach(&ievent->sock);                           \
562 		isc__nm_put_netievent(nm, ievent);                             \
563 	}
564 
565 typedef struct isc__netievent__task {
566 	isc__netievent_type type;
567 	ISC_LINK(isc__netievent_t) link;
568 	isc_task_t *task;
569 } isc__netievent__task_t;
570 
571 #define NETIEVENT_TASK_TYPE(type) \
572 	typedef isc__netievent__task_t isc__netievent_##type##_t;
573 
574 #define NETIEVENT_TASK_DECL(type)                                \
575 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
576 		isc_nm_t *nm, isc_task_t *task);                 \
577 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
578 					  isc__netievent_##type##_t *ievent);
579 
580 #define NETIEVENT_TASK_DEF(type)                                               \
581 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
582 		isc_nm_t *nm, isc_task_t *task) {                              \
583 		isc__netievent_##type##_t *ievent =                            \
584 			isc__nm_get_netievent(nm, netievent_##type);           \
585 		ievent->task = task;                                           \
586                                                                                \
587 		return (ievent);                                               \
588 	}                                                                      \
589                                                                                \
590 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
591 					  isc__netievent_##type##_t *ievent) { \
592 		ievent->task = NULL;                                           \
593 		isc__nm_put_netievent(nm, ievent);                             \
594 	}
595 
596 typedef struct isc__netievent_udpsend {
597 	NETIEVENT__SOCKET;
598 	isc_sockaddr_t peer;
599 	isc__nm_uvreq_t *req;
600 } isc__netievent_udpsend_t;
601 
602 struct isc__netievent {
603 	isc__netievent_type type;
604 	ISC_LINK(isc__netievent_t) link;
605 };
606 
607 #define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t
608 
609 #define NETIEVENT_DECL(type)                                                   \
610 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \
611 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
612 					  isc__netievent_##type##_t *ievent)
613 
614 #define NETIEVENT_DEF(type)                                                    \
615 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
616 		isc_nm_t *nm) {                                                \
617 		isc__netievent_##type##_t *ievent =                            \
618 			isc__nm_get_netievent(nm, netievent_##type);           \
619                                                                                \
620 		return (ievent);                                               \
621 	}                                                                      \
622                                                                                \
623 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
624 					  isc__netievent_##type##_t *ievent) { \
625 		isc__nm_put_netievent(nm, ievent);                             \
626 	}
627 
628 typedef union {
629 	isc__netievent_t ni;
630 	isc__netievent__socket_t nis;
631 	isc__netievent__socket_req_t nisr;
632 	isc__netievent_udpsend_t nius;
633 	isc__netievent__socket_quota_t nisq;
634 } isc__netievent_storage_t;
635 
636 /*
637  * Work item for a uv_work threadpool.
638  */
639 typedef struct isc__nm_work {
640 	isc_nm_t *netmgr;
641 	uv_work_t req;
642 	isc_nm_workcb_t cb;
643 	isc_nm_after_workcb_t after_cb;
644 	void *data;
645 } isc__nm_work_t;
646 
647 /*
648  * Network manager
649  */
650 #define NM_MAGIC    ISC_MAGIC('N', 'E', 'T', 'M')
651 #define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
652 
653 struct isc_nm {
654 	int magic;
655 	isc_refcount_t references;
656 	isc_mem_t *mctx;
657 	int nworkers;
658 	int nlisteners;
659 	isc_mutex_t lock;
660 	isc_condition_t wkstatecond;
661 	isc_condition_t wkpausecond;
662 	isc__networker_t *workers;
663 
664 	isc_stats_t *stats;
665 
666 	uint_fast32_t workers_running;
667 	atomic_uint_fast32_t workers_paused;
668 	atomic_uint_fast32_t maxudp;
669 
670 	bool load_balance_sockets;
671 
672 	atomic_bool paused;
673 
674 	/*
675 	 * Active connections are being closed and new connections are
676 	 * no longer allowed.
677 	 */
678 	atomic_bool closing;
679 
680 	/*
681 	 * A worker is actively waiting for other workers, for example to
682 	 * stop listening; that means no other thread can do the same thing
683 	 * or pause, or we'll deadlock. We have to either re-enqueue our
684 	 * event or wait for the other one to finish if we want to pause.
685 	 */
686 	atomic_int interlocked;
687 
688 	/*
689 	 * Timeout values for TCP connections, corresponding to
690 	 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout,
691 	 * and tcp-advertised-timeout. Note that these are stored in
692 	 * milliseconds so they can be used directly with the libuv timer,
693 	 * but they are configured in tenths of seconds.
694 	 */
695 	atomic_uint_fast32_t init;
696 	atomic_uint_fast32_t idle;
697 	atomic_uint_fast32_t keepalive;
698 	atomic_uint_fast32_t advertised;
699 
700 	isc_barrier_t pausing;
701 	isc_barrier_t resuming;
702 
703 #ifdef NETMGR_TRACE
704 	ISC_LIST(isc_nmsocket_t) active_sockets;
705 #endif
706 };
707 
708 typedef enum isc_nmsocket_type {
709 	isc_nm_udpsocket,
710 	isc_nm_udplistener, /* Aggregate of nm_udpsocks */
711 	isc_nm_tcpsocket,
712 	isc_nm_tcplistener,
713 	isc_nm_tcpdnslistener,
714 	isc_nm_tcpdnssocket,
715 } isc_nmsocket_type;
716 
717 /*%
718  * A universal structure for either a single socket or a group of
719  * dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
720  */
721 #define NMSOCK_MAGIC	ISC_MAGIC('N', 'M', 'S', 'K')
722 #define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
723 
724 /*%
725  * Index into socket stat counter arrays.
726  */
727 enum {
728 	STATID_OPEN = 0,
729 	STATID_OPENFAIL = 1,
730 	STATID_CLOSE = 2,
731 	STATID_BINDFAIL = 3,
732 	STATID_CONNECTFAIL = 4,
733 	STATID_CONNECT = 5,
734 	STATID_ACCEPTFAIL = 6,
735 	STATID_ACCEPT = 7,
736 	STATID_SENDFAIL = 8,
737 	STATID_RECVFAIL = 9,
738 	STATID_ACTIVE = 10
739 };
740 
741 typedef void (*isc_nm_closehandlecb_t)(void *arg);
742 /*%<
743  * Opaque callback function, used for isc_nmhandle 'reset' and 'free'
744  * callbacks.
745  */
746 
747 struct isc_nmsocket {
748 	/*% Unlocked, RO */
749 	int magic;
750 	int tid;
751 	isc_nmsocket_type type;
752 	isc_nm_t *mgr;
753 
754 	/*% Parent socket for multithreaded listeners */
755 	isc_nmsocket_t *parent;
756 	/*% Listener socket this connection was accepted on */
757 	isc_nmsocket_t *listener;
758 	/*% Self socket */
759 	isc_nmsocket_t *self;
760 
761 	isc_barrier_t startlistening;
762 	isc_barrier_t stoplistening;
763 
764 	/*%
765 	 * quota is the TCP client, attached when a TCP connection
766 	 * is established. pquota is a non-attached pointer to the
767 	 * TCP client quota, stored in listening sockets but only
768 	 * attached in connected sockets.
769 	 */
770 	isc_quota_t *quota;
771 	isc_quota_t *pquota;
772 	isc_quota_cb_t quotacb;
773 
774 	/*%
775 	 * Socket statistics
776 	 */
777 	const isc_statscounter_t *statsindex;
778 
779 	/*%
780 	 * TCP read/connect timeout timers.
781 	 */
782 	uv_timer_t read_timer;
783 	uint64_t read_timeout;
784 	uint64_t connect_timeout;
785 
786 	/*%
787 	 * TCP write timeout timer.
788 	 */
789 	uint64_t write_timeout;
790 
791 	/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
792 	isc_nmsocket_t *outer;
793 
794 	/*% server socket for connections */
795 	isc_nmsocket_t *server;
796 
797 	/*% Child sockets for multi-socket setups */
798 	isc_nmsocket_t *children;
799 	uint_fast32_t nchildren;
800 	isc_sockaddr_t iface;
801 	isc_nmhandle_t *statichandle;
802 	isc_nmhandle_t *outerhandle;
803 
804 	/*% Extra data allocated at the end of each isc_nmhandle_t */
805 	size_t extrahandlesize;
806 
807 	/*% TCP backlog */
808 	int backlog;
809 
810 	/*% libuv data */
811 	uv_os_sock_t fd;
812 	union uv_any_handle uv_handle;
813 
814 	/*% Peer address */
815 	isc_sockaddr_t peer;
816 
817 	/* Atomic */
818 	/*% Number of running (e.g. listening) child sockets */
819 	atomic_uint_fast32_t rchildren;
820 
821 	/*%
822 	 * Socket is active if it's listening, working, etc. If it's
823 	 * closing, then it doesn't make a sense, for example, to
824 	 * push handles or reqs for reuse.
825 	 */
826 	atomic_bool active;
827 	atomic_bool destroying;
828 
829 	/*%
830 	 * Socket is closed if it's not active and all the possible
831 	 * callbacks were fired, there are no active handles, etc.
832 	 * If active==false but closed==false, that means the socket
833 	 * is closing.
834 	 */
835 	atomic_bool closing;
836 	atomic_bool closed;
837 	atomic_bool listening;
838 	atomic_bool connecting;
839 	atomic_bool connected;
840 	bool accepting;
841 	bool reading;
842 	atomic_bool timedout;
843 	isc_refcount_t references;
844 
845 	/*%
846 	 * Established an outgoing connection, as client not server.
847 	 */
848 	atomic_bool client;
849 
850 	/*%
851 	 * TCPDNS socket has been set not to pipeline.
852 	 */
853 	atomic_bool sequential;
854 
855 	/*%
856 	 * The socket is processing read callback, this is guard to not read
857 	 * data before the readcb is back.
858 	 */
859 	bool processing;
860 
861 	/*%
862 	 * A TCP socket has had isc_nm_pauseread() called.
863 	 */
864 	atomic_bool readpaused;
865 
866 	/*%
867 	 * A TCP or TCPDNS socket has been set to use the keepalive
868 	 * timeout instead of the default idle timeout.
869 	 */
870 	atomic_bool keepalive;
871 
872 	/*%
873 	 * 'spare' handles for that can be reused to avoid allocations,
874 	 * for UDP.
875 	 */
876 	isc_astack_t *inactivehandles;
877 	isc_astack_t *inactivereqs;
878 
879 	/*%
880 	 * Used to wait for TCP listening events to complete, and
881 	 * for the number of running children to reach zero during
882 	 * shutdown.
883 	 *
884 	 * We use two condition variables to prevent the race where the netmgr
885 	 * threads would be able to finish and destroy the socket before it's
886 	 * unlocked by the isc_nm_listen<proto>() function.  So, the flow is as
887 	 * follows:
888 	 *
889 	 *   1. parent thread creates all children sockets and passes then to
890 	 *      netthreads, looks at the signaling variable and WAIT(cond) until
891 	 *      the childrens are done initializing
892 	 *
893 	 *   2. the events get picked by netthreads, calls the libuv API (and
894 	 *      either succeeds or fails) and WAIT(scond) until all other
895 	 *      children sockets in netthreads are initialized and the listening
896 	 *      socket lock is unlocked
897 	 *
898 	 *   3. the control is given back to the parent thread which now either
899 	 *      returns success or shutdowns the listener if an error has
900 	 *      occured in the children netthread
901 	 *
902 	 * NOTE: The other approach would be doing an extra attach to the parent
903 	 * listening socket, and then detach it in the parent thread, but that
904 	 * breaks the promise that once the libuv socket is initialized on the
905 	 * nmsocket, the nmsocket needs to be handled only by matching
906 	 * netthread, so in fact that would add a complexity in a way that
907 	 * isc__nmsocket_detach would have to be converted to use an
908 	 * asynchrounous netievent.
909 	 */
910 	isc_mutex_t lock;
911 	isc_condition_t cond;
912 	isc_condition_t scond;
913 
914 	/*%
915 	 * Used to pass a result back from listen or connect events.
916 	 */
917 	isc_result_t result;
918 
919 	/*%
920 	 * Current number of active handles.
921 	 */
922 	atomic_int_fast32_t ah;
923 
924 	/*% Buffer for TCPDNS processing */
925 	size_t buf_size;
926 	size_t buf_len;
927 	unsigned char *buf;
928 
929 	/*%
930 	 * This function will be called with handle->sock
931 	 * as the argument whenever a handle's references drop
932 	 * to zero, after its reset callback has been called.
933 	 */
934 	isc_nm_closehandlecb_t closehandle_cb;
935 
936 	isc_nmhandle_t *recv_handle;
937 	isc_nm_recv_cb_t recv_cb;
938 	void *recv_cbarg;
939 	bool recv_read;
940 
941 	isc_nm_cb_t connect_cb;
942 	void *connect_cbarg;
943 
944 	isc_nm_accept_cb_t accept_cb;
945 	void *accept_cbarg;
946 
947 	atomic_int_fast32_t active_child_connections;
948 
949 #ifdef NETMGR_TRACE
950 	void *backtrace[TRACE_SIZE];
951 	int backtrace_size;
952 	LINK(isc_nmsocket_t) active_link;
953 	ISC_LIST(isc_nmhandle_t) active_handles;
954 #endif
955 };
956 
957 bool
958 isc__nm_in_netthread(void);
959 /*%
960  * Returns 'true' if we're in the network thread.
961  */
962 
963 void
964 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
965 /*%<
966  * If the caller is already in the matching nmthread, process the netievent
967  * directly, if not enqueue using isc__nm_enqueue_ievent().
968  */
969 
970 void
971 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
972 /*%<
973  * Enqueue an ievent onto a specific worker queue. (This the only safe
974  * way to use an isc__networker_t from another thread.)
975  */
976 
977 void
978 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
979 /*%<
980  * Free a buffer allocated for a receive operation.
981  *
982  * Note that as currently implemented, this doesn't actually
983  * free anything, marks the isc__networker's UDP receive buffer
984  * as "not in use".
985  */
986 
987 isc_nmhandle_t *
988 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
989 		   isc_sockaddr_t *local FLARG);
990 /*%<
991  * Get a handle for the socket 'sock', allocating a new one
992  * if there isn't one available in 'sock->inactivehandles'.
993  *
994  * If 'peer' is not NULL, set the handle's peer address to 'peer',
995  * otherwise set it to 'sock->peer'.
996  *
997  * If 'local' is not NULL, set the handle's local address to 'local',
998  * otherwise set it to 'sock->iface->addr'.
999  *
1000  * 'sock' will be attached to 'handle->sock'. The caller may need
1001  * to detach the socket afterward.
1002  */
1003 
1004 isc__nm_uvreq_t *
1005 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG);
1006 /*%<
1007  * Get a UV request structure for the socket 'sock', allocating a
1008  * new one if there isn't one available in 'sock->inactivereqs'.
1009  */
1010 
1011 void
1012 isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG);
1013 /*%<
1014  * Completes the use of a UV request structure, setting '*req' to NULL.
1015  *
1016  * The UV request is pushed onto the 'sock->inactivereqs' stack or,
1017  * if that doesn't work, freed.
1018  */
1019 
1020 void
1021 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1022 		    isc_sockaddr_t *iface FLARG);
1023 /*%<
1024  * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'
1025  * and its interface to 'iface'.
1026  */
1027 
1028 void
1029 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG);
1030 /*%<
1031  * Attach to a socket, increasing refcount
1032  */
1033 
1034 void
1035 isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG);
1036 /*%<
1037  * Detach from socket, decreasing refcount and possibly destroying the
1038  * socket if it's no longer referenced.
1039  */
1040 
1041 void
1042 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG);
1043 /*%<
1044  * Market 'sock' as inactive, close it if necessary, and destroy it
1045  * if there are no remaining references or active handles.
1046  */
1047 
1048 void
1049 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1050 /*%<
1051  * Initiate the socket shutdown which actively calls the active
1052  * callbacks.
1053  */
1054 
1055 bool
1056 isc__nmsocket_active(isc_nmsocket_t *sock);
1057 /*%<
1058  * Determine whether 'sock' is active by checking 'sock->active'
1059  * or, for child sockets, 'sock->parent->active'.
1060  */
1061 
1062 bool
1063 isc__nmsocket_deactivate(isc_nmsocket_t *sock);
1064 /*%<
1065  * @brief Deactivate active socket
1066  *
1067  * Atomically deactive the socket by setting @p sock->active or, for child
1068  * sockets, @p sock->parent->active to @c false
1069  *
1070  * @param[in] sock - valid nmsocket
1071  * @return @c false if the socket was already inactive, @c true otherwise
1072  */
1073 
1074 void
1075 isc__nmsocket_clearcb(isc_nmsocket_t *sock);
1076 /*%<
1077  * Clear the recv and accept callbacks in 'sock'.
1078  */
1079 
1080 void
1081 isc__nmsocket_timer_stop(isc_nmsocket_t *sock);
1082 void
1083 isc__nmsocket_timer_start(isc_nmsocket_t *sock);
1084 void
1085 isc__nmsocket_timer_restart(isc_nmsocket_t *sock);
1086 bool
1087 isc__nmsocket_timer_running(isc_nmsocket_t *sock);
1088 /*%<
1089  * Start/stop/restart/check the timeout on the socket
1090  */
1091 
1092 void
1093 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1094 		  isc_result_t eresult, bool async);
1095 
1096 void
1097 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0);
1098 /*%<
1099  * Issue a connect callback on the socket, used to call the callback
1100  */
1101 
1102 void
1103 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1104 	       isc_result_t eresult);
1105 void
1106 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0);
1107 
1108 /*%<
1109  * Issue a read callback on the socket, used to call the callback
1110  * on failed conditions when the event can't be scheduled on the uv loop.
1111  *
1112  */
1113 
1114 void
1115 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1116 	       isc_result_t eresult, bool async);
1117 void
1118 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0);
1119 /*%<
1120  * Issue a write callback on the socket, used to call the callback
1121  * on failed conditions when the event can't be scheduled on the uv loop.
1122  */
1123 
1124 void
1125 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0);
1126 /*%<
1127  * Walk through all uv handles, get the underlying sockets and issue
1128  * close on them.
1129  */
1130 
1131 void
1132 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1133 		 isc_nm_cb_t cb, void *cbarg);
1134 /*%<
1135  * Back-end implementation of isc_nm_send() for UDP handles.
1136  */
1137 
1138 void
1139 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1140 /*
1141  * Back-end implementation of isc_nm_read() for UDP handles.
1142  */
1143 
1144 void
1145 isc__nm_udp_close(isc_nmsocket_t *sock);
1146 /*%<
1147  * Close a UDP socket.
1148  */
1149 
1150 void
1151 isc__nm_udp_cancelread(isc_nmhandle_t *handle);
1152 /*%<
1153  * Stop reading on a connected UDP handle.
1154  */
1155 
1156 void
1157 isc__nm_udp_shutdown(isc_nmsocket_t *sock);
1158 /*%<
1159  * Called during the shutdown process to close and clean up connected
1160  * sockets.
1161  */
1162 
1163 void
1164 isc__nm_udp_stoplistening(isc_nmsocket_t *sock);
1165 /*%<
1166  * Stop listening on 'sock'.
1167  */
1168 
1169 void
1170 isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1171 /*%<
1172  * Set or clear the recv timeout for the UDP socket associated with 'handle'.
1173  */
1174 
1175 void
1176 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1177 void
1178 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1179 void
1180 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1181 void
1182 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1183 void
1184 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0);
1185 void
1186 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1187 void
1188 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1189 /*%<
1190  * Callback handlers for asynchronous UDP events (listen, stoplisten, send).
1191  */
1192 
1193 void
1194 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1195 		 isc_nm_cb_t cb, void *cbarg);
1196 /*%<
1197  * Back-end implementation of isc_nm_send() for TCP handles.
1198  */
1199 
1200 void
1201 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1202 /*
1203  * Back-end implementation of isc_nm_read() for TCP handles.
1204  */
1205 
1206 void
1207 isc__nm_tcp_close(isc_nmsocket_t *sock);
1208 /*%<
1209  * Close a TCP socket.
1210  */
1211 void
1212 isc__nm_tcp_pauseread(isc_nmhandle_t *handle);
1213 /*%<
1214  * Pause reading on this handle, while still remembering the callback.
1215  */
1216 
1217 void
1218 isc__nm_tcp_resumeread(isc_nmhandle_t *handle);
1219 /*%<
1220  * Resume reading from socket.
1221  *
1222  */
1223 
1224 void
1225 isc__nm_tcp_shutdown(isc_nmsocket_t *sock);
1226 /*%<
1227  * Called during the shutdown process to close and clean up connected
1228  * sockets.
1229  */
1230 
1231 void
1232 isc__nm_tcp_cancelread(isc_nmhandle_t *handle);
1233 /*%<
1234  * Stop reading on a connected TCP handle.
1235  */
1236 
1237 void
1238 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock);
1239 /*%<
1240  * Stop listening on 'sock'.
1241  */
1242 
1243 int_fast32_t
1244 isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock);
1245 /*%<
1246  * Returns the number of active connections for the TCP listener socket.
1247  */
1248 
1249 void
1250 isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1251 /*%<
1252  * Set the read timeout for the TCP socket associated with 'handle'.
1253  */
1254 
1255 void
1256 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1257 void
1258 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1259 void
1260 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1261 void
1262 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1263 void
1264 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1265 void
1266 isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0);
1267 void
1268 isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1269 void
1270 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0);
1271 void
1272 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1273 void
1274 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1275 void
1276 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1277 /*%<
1278  * Callback handlers for asynchronous TCP events (connect, listen,
1279  * stoplisten, send, read, pause, close).
1280  */
1281 
1282 void
1283 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1284 void
1285 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1286 void
1287 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1288 
1289 void
1290 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1291 		    isc_nm_cb_t cb, void *cbarg);
1292 /*%<
1293  * Back-end implementation of isc_nm_send() for TCPDNS handles.
1294  */
1295 
1296 void
1297 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock);
1298 
1299 void
1300 isc__nm_tcpdns_close(isc_nmsocket_t *sock);
1301 /*%<
1302  * Close a TCPDNS socket.
1303  */
1304 
1305 void
1306 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock);
1307 /*%<
1308  * Stop listening on 'sock'.
1309  */
1310 
1311 void
1312 isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1313 /*%<
1314  * Set the read timeout and reset the timer for the TCPDNS socket
1315  * associated with 'handle', and the TCP socket it wraps around.
1316  */
1317 
1318 void
1319 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1320 void
1321 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1322 void
1323 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1324 void
1325 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
1326 void
1327 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
1328 void
1329 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
1330 void
1331 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
1332 void
1333 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
1334 /*%<
1335  * Callback handlers for asynchronous TCPDNS events.
1336  */
1337 
1338 void
1339 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1340 /*
1341  * Back-end implementation of isc_nm_read() for TCPDNS handles.
1342  */
1343 
1344 void
1345 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle);
1346 /*%<
1347  * Stop reading on a connected TCPDNS handle.
1348  */
1349 
1350 #define isc__nm_uverr2result(x) \
1351 	isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__)
1352 isc_result_t
1353 isc___nm_uverr2result(int uverr, bool dolog, const char *file,
1354 		      unsigned int line, const char *func);
1355 /*%<
1356  * Convert a libuv error value into an isc_result_t.  The
1357  * list of supported error values is not complete; new users
1358  * of this function should add any expected errors that are
1359  * not already there.
1360  */
1361 
1362 bool
1363 isc__nm_acquire_interlocked(isc_nm_t *mgr);
1364 /*%<
1365  * Try to acquire interlocked state; return true if successful.
1366  */
1367 
1368 void
1369 isc__nm_drop_interlocked(isc_nm_t *mgr);
1370 /*%<
1371  * Drop interlocked state; signal waiters.
1372  */
1373 
1374 void
1375 isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
1376 /*%<
1377  * Actively wait for interlocked state.
1378  */
1379 
1380 void
1381 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1382 /*%<
1383  * Increment socket-related statistics counters.
1384  */
1385 
1386 void
1387 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1388 /*%<
1389  * Decrement socket-related statistics counters.
1390  */
1391 
1392 isc_result_t
1393 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp);
1394 /*%<
1395  * Platform independent socket() version
1396  */
1397 
1398 void
1399 isc__nm_closesocket(uv_os_sock_t sock);
1400 /*%<
1401  * Platform independent closesocket() version
1402  */
1403 
1404 isc_result_t
1405 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
1406 /*%<
1407  * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
1408  */
1409 
1410 isc_result_t
1411 isc__nm_socket_reuse(uv_os_sock_t fd);
1412 /*%<
1413  * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
1414  */
1415 
1416 isc_result_t
1417 isc__nm_socket_reuse_lb(uv_os_sock_t fd);
1418 /*%<
1419  * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
1420  */
1421 
1422 isc_result_t
1423 isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
1424 /*%<
1425  * Set the SO_INCOMING_CPU socket option on the fd if available
1426  */
1427 
1428 isc_result_t
1429 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family);
1430 /*%<
1431  * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket
1432  * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT
1433  */
1434 
1435 isc_result_t
1436 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
1437 /*%<
1438  * Set the connection timeout in milliseconds, on non-Linux platforms,
1439  * the minimum value must be at least 1000 (1 second).
1440  */
1441 
1442 isc_result_t
1443 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd);
1444 /*%<
1445  * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY).
1446  */
1447 
1448 /*
1449  * typedef all the netievent types
1450  */
1451 
1452 NETIEVENT_SOCKET_TYPE(close);
1453 NETIEVENT_SOCKET_TYPE(tcpclose);
1454 NETIEVENT_SOCKET_TYPE(tcplisten);
1455 NETIEVENT_SOCKET_TYPE(tcppauseread);
1456 NETIEVENT_SOCKET_TYPE(tcpstop);
1457 NETIEVENT_SOCKET_TYPE(udpclose);
1458 NETIEVENT_SOCKET_TYPE(udplisten);
1459 NETIEVENT_SOCKET_TYPE(udpread);
1460 /* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */
1461 NETIEVENT_SOCKET_TYPE(udpstop);
1462 
1463 NETIEVENT_SOCKET_TYPE(tcpdnsclose);
1464 NETIEVENT_SOCKET_TYPE(tcpdnsread);
1465 NETIEVENT_SOCKET_TYPE(tcpdnsstop);
1466 NETIEVENT_SOCKET_TYPE(tcpdnslisten);
1467 NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect);
1468 NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend);
1469 NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel);
1470 NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept);
1471 
1472 NETIEVENT_SOCKET_REQ_TYPE(tcpconnect);
1473 NETIEVENT_SOCKET_REQ_TYPE(tcpsend);
1474 NETIEVENT_SOCKET_TYPE(tcpstartread);
1475 NETIEVENT_SOCKET_REQ_TYPE(udpconnect);
1476 
1477 NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb);
1478 NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb);
1479 NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb);
1480 
1481 NETIEVENT_SOCKET_HANDLE_TYPE(detach);
1482 NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel);
1483 NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel);
1484 
1485 NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept);
1486 
1487 NETIEVENT_TYPE(pause);
1488 NETIEVENT_TYPE(resume);
1489 NETIEVENT_TYPE(shutdown);
1490 NETIEVENT_TYPE(stop);
1491 
1492 NETIEVENT_TASK_TYPE(task);
1493 NETIEVENT_TASK_TYPE(privilegedtask);
1494 
1495 /* Now declared the helper functions */
1496 
1497 NETIEVENT_SOCKET_DECL(close);
1498 NETIEVENT_SOCKET_DECL(tcpclose);
1499 NETIEVENT_SOCKET_DECL(tcplisten);
1500 NETIEVENT_SOCKET_DECL(tcppauseread);
1501 NETIEVENT_SOCKET_DECL(tcpstartread);
1502 NETIEVENT_SOCKET_DECL(tcpstop);
1503 NETIEVENT_SOCKET_DECL(udpclose);
1504 NETIEVENT_SOCKET_DECL(udplisten);
1505 NETIEVENT_SOCKET_DECL(udpread);
1506 NETIEVENT_SOCKET_DECL(udpsend);
1507 NETIEVENT_SOCKET_DECL(udpstop);
1508 
1509 NETIEVENT_SOCKET_DECL(tcpdnsclose);
1510 NETIEVENT_SOCKET_DECL(tcpdnsread);
1511 NETIEVENT_SOCKET_DECL(tcpdnsstop);
1512 NETIEVENT_SOCKET_DECL(tcpdnslisten);
1513 NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect);
1514 NETIEVENT_SOCKET_REQ_DECL(tcpdnssend);
1515 NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel);
1516 NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept);
1517 
1518 NETIEVENT_SOCKET_REQ_DECL(tcpconnect);
1519 NETIEVENT_SOCKET_REQ_DECL(tcpsend);
1520 NETIEVENT_SOCKET_REQ_DECL(udpconnect);
1521 
1522 NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb);
1523 NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb);
1524 NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb);
1525 
1526 NETIEVENT_SOCKET_HANDLE_DECL(udpcancel);
1527 NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel);
1528 NETIEVENT_SOCKET_DECL(detach);
1529 
1530 NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept);
1531 
1532 NETIEVENT_DECL(pause);
1533 NETIEVENT_DECL(resume);
1534 NETIEVENT_DECL(shutdown);
1535 NETIEVENT_DECL(stop);
1536 
1537 NETIEVENT_TASK_DECL(task);
1538 NETIEVENT_TASK_DECL(privilegedtask);
1539 
1540 void
1541 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1542 void
1543 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1544 void
1545 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1546 
1547 isc_result_t
1548 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock);
1549 
1550 isc__nm_uvreq_t *
1551 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr);
1552 
1553 void
1554 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
1555 
1556 void
1557 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
1558 		    const struct sockaddr *addr, unsigned flags);
1559 void
1560 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1561 void
1562 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1563 
1564 isc_result_t
1565 isc__nm_start_reading(isc_nmsocket_t *sock);
1566 void
1567 isc__nm_stop_reading(isc_nmsocket_t *sock);
1568 isc_result_t
1569 isc__nm_process_sock_buffer(isc_nmsocket_t *sock);
1570 void
1571 isc__nm_resume_processing(void *arg);
1572 bool
1573 isc__nmsocket_closing(isc_nmsocket_t *sock);
1574 bool
1575 isc__nm_closing(isc_nmsocket_t *sock);
1576 
1577 void
1578 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len);
1579 
1580 void
1581 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1582 		       isc_result_t eresult);
1583 void
1584 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
1585 void
1586 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1587 			  isc_result_t eresult, bool async);
1588 void
1589 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async);
1590 
1591 void
1592 isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota);
1593 
1594 /*
1595  * Timeout callbacks
1596  */
1597 void
1598 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer);
1599 void
1600 isc__nmsocket_readtimeout_cb(uv_timer_t *timer);
1601 void
1602 isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult);
1603 
1604 /*%<
1605  *
1606  * Maximum number of simultaneous handles in flight supported for a single
1607  * connected TCPDNS socket. This value was chosen arbitrarily, and may be
1608  * changed in the future.
1609  */
1610 #define STREAM_CLIENTS_PER_CONN 23
1611 
1612 #define UV_RUNTIME_CHECK(func, ret)                                           \
1613 	if (ret != 0) {                                                       \
1614 		isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \
1615 				uv_strerror(ret));                            \
1616 	}
1617