xref: /netbsd-src/external/mpl/bind/dist/lib/isc/netmgr/netmgr-int.h (revision a04395531661c5e8d314125d5ae77d4cbedd5d73)
1 /*	$NetBSD: netmgr-int.h,v 1.7 2021/08/19 11:50:18 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * This Source Code Form is subject to the terms of the Mozilla Public
7  * License, v. 2.0. If a copy of the MPL was not distributed with this
8  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9  *
10  * See the COPYRIGHT file distributed with this work for additional
11  * information regarding copyright ownership.
12  */
13 
14 #pragma once
15 
16 #include <unistd.h>
17 #include <uv.h>
18 
19 #include <openssl/err.h>
20 #include <openssl/ssl.h>
21 
22 #include <isc/astack.h>
23 #include <isc/atomic.h>
24 #include <isc/barrier.h>
25 #include <isc/buffer.h>
26 #include <isc/condition.h>
27 #include <isc/magic.h>
28 #include <isc/mem.h>
29 #include <isc/netmgr.h>
30 #include <isc/queue.h>
31 #include <isc/quota.h>
32 #include <isc/random.h>
33 #include <isc/refcount.h>
34 #include <isc/region.h>
35 #include <isc/result.h>
36 #include <isc/rwlock.h>
37 #include <isc/sockaddr.h>
38 #include <isc/stats.h>
39 #include <isc/thread.h>
40 #include <isc/util.h>
41 
42 #include "uv-compat.h"
43 
44 #define ISC_NETMGR_TID_UNKNOWN -1
45 
46 /* Must be different from ISC_NETMGR_TID_UNKNOWN */
47 #define ISC_NETMGR_NON_INTERLOCKED -2
48 
49 #if !defined(WIN32)
50 /*
51  * New versions of libuv support recvmmsg on unices.
52  * Since recvbuf is only allocated per worker allocating a bigger one is not
53  * that wasteful.
54  * 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing
55  * will break if the original value changes.
56  */
57 #define ISC_NETMGR_RECVBUF_SIZE (20 * 65536)
58 #else
59 #define ISC_NETMGR_RECVBUF_SIZE (65536)
60 #endif
61 
62 #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
63 
64 /*%
65  * Regular TCP buffer size.
66  */
67 #define NM_REG_BUF 4096
68 
69 /*%
70  * Larger buffer for when the regular one isn't enough; this will
71  * hold two full DNS packets with lengths.  netmgr receives 64k at
72  * most in TCPDNS connections, so there's no risk of overrun
73  * when using a buffer this size.
74  */
75 #define NM_BIG_BUF (65535 + 2) * 2
76 
77 #if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__))
78 #define HAVE_SO_REUSEPORT_LB 1
79 #endif
80 
81 /*
82  * Define NETMGR_TRACE to activate tracing of handles and sockets.
83  * This will impair performance but enables us to quickly determine,
84  * if netmgr resources haven't been cleaned up on shutdown, which ones
85  * are still in use.
86  */
87 #ifdef NETMGR_TRACE
88 #define TRACE_SIZE 8
89 
90 void
91 isc__nm_dump_active(isc_nm_t *nm);
92 
93 #if defined(__linux__)
94 #include <syscall.h>
95 #define gettid() (uint32_t) syscall(SYS_gettid)
96 #elif defined(_WIN32)
97 #define gettid() (uint32_t) GetCurrentThreadId()
98 #else
99 #define gettid() (uint32_t) pthread_self()
100 #endif
101 
102 #ifdef NETMGR_TRACE_VERBOSE
103 #define NETMGR_TRACE_LOG(format, ...)                                \
104 	fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \
105 		isc_nm_tid(), file, line, func, __VA_ARGS__)
106 #else
107 #define NETMGR_TRACE_LOG(format, ...) \
108 	(void)file;                   \
109 	(void)line;                   \
110 	(void)func;
111 #endif
112 
113 #define FLARG_PASS , file, line, func
114 #define FLARG                                              \
115 	, const char *file __attribute__((unused)),        \
116 		unsigned int line __attribute__((unused)), \
117 		const char *func __attribute__((unused))
118 #define FLARG_IEVENT(ievent)              \
119 	const char *file = ievent->file;  \
120 	unsigned int line = ievent->line; \
121 	const char *func = ievent->func;
122 #define FLARG_IEVENT_PASS(ievent) \
123 	ievent->file = file;      \
124 	ievent->line = line;      \
125 	ievent->func = func;
126 #define isc__nm_uvreq_get(req, sock) \
127 	isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__)
128 #define isc__nm_uvreq_put(req, sock) \
129 	isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__)
130 #define isc__nmsocket_init(sock, mgr, type, iface)                      \
131 	isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \
132 			    __func__)
133 #define isc__nmsocket_put(sockp) \
134 	isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__)
135 #define isc__nmsocket_attach(sock, target) \
136 	isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__)
137 #define isc__nmsocket_detach(socketp) \
138 	isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__)
139 #define isc__nmsocket_close(socketp) \
140 	isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__)
141 #define isc__nmhandle_get(sock, peer, local) \
142 	isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__)
143 #define isc__nmsocket_prep_destroy(sock) \
144 	isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__)
145 #else
146 #define NETMGR_TRACE_LOG(format, ...)
147 
148 #define FLARG_PASS
149 #define FLARG
150 #define FLARG_IEVENT(ievent)
151 #define FLARG_IEVENT_PASS(ievent)
152 #define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock)
153 #define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock)
154 #define isc__nmsocket_init(sock, mgr, type, iface) \
155 	isc___nmsocket_init(sock, mgr, type, iface)
156 #define isc__nmsocket_put(sockp)	   isc___nmsocket_put(sockp)
157 #define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target)
158 #define isc__nmsocket_detach(socketp)	   isc___nmsocket_detach(socketp)
159 #define isc__nmsocket_close(socketp)	   isc___nmsocket_close(socketp)
160 #define isc__nmhandle_get(sock, peer, local) \
161 	isc___nmhandle_get(sock, peer, local)
162 #define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock)
163 #endif
164 
165 /*
166  * Queue types in the order of processing priority.
167  */
168 typedef enum {
169 	NETIEVENT_PRIORITY = 0,
170 	NETIEVENT_PRIVILEGED = 1,
171 	NETIEVENT_TASK = 2,
172 	NETIEVENT_NORMAL = 3,
173 	NETIEVENT_MAX = 4,
174 } netievent_type_t;
175 
176 /*
177  * Single network event loop worker.
178  */
179 typedef struct isc__networker {
180 	isc_nm_t *mgr;
181 	int id;		  /* thread id */
182 	uv_loop_t loop;	  /* libuv loop structure */
183 	uv_async_t async; /* async channel to send
184 			   * data to this networker */
185 	isc_mutex_t lock;
186 	bool paused;
187 	bool finished;
188 	isc_thread_t thread;
189 	isc_queue_t *ievents[NETIEVENT_MAX];
190 	atomic_uint_fast32_t nievents[NETIEVENT_MAX];
191 	isc_condition_t cond_prio;
192 
193 	isc_refcount_t references;
194 	atomic_int_fast64_t pktcount;
195 	char *recvbuf;
196 	char *sendbuf;
197 	bool recvbuf_inuse;
198 } isc__networker_t;
199 
200 /*
201  * A general handle for a connection bound to a networker.  For UDP
202  * connections we have peer address here, so both TCP and UDP can be
203  * handled with a simple send-like function
204  */
205 #define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
206 #define VALID_NMHANDLE(t)                      \
207 	(ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \
208 	 atomic_load(&(t)->references) > 0)
209 
210 typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
211 
212 struct isc_nmhandle {
213 	int magic;
214 	isc_refcount_t references;
215 
216 	/*
217 	 * The socket is not 'attached' in the traditional
218 	 * reference-counting sense. Instead, we keep all handles in an
219 	 * array in the socket object.  This way, we don't have circular
220 	 * dependencies and we can close all handles when we're destroying
221 	 * the socket.
222 	 */
223 	isc_nmsocket_t *sock;
224 	size_t ah_pos; /* Position in the socket's 'active handles' array */
225 
226 	isc_sockaddr_t peer;
227 	isc_sockaddr_t local;
228 	isc_nm_opaquecb_t doreset; /* reset extra callback, external */
229 	isc_nm_opaquecb_t dofree;  /* free extra callback, external */
230 #ifdef NETMGR_TRACE
231 	void *backtrace[TRACE_SIZE];
232 	int backtrace_size;
233 	LINK(isc_nmhandle_t) active_link;
234 #endif
235 	void *opaque;
236 	char extra[];
237 };
238 
239 typedef enum isc__netievent_type {
240 	netievent_udpconnect,
241 	netievent_udpclose,
242 	netievent_udpsend,
243 	netievent_udpread,
244 	netievent_udpcancel,
245 
246 	netievent_tcpconnect,
247 	netievent_tcpclose,
248 	netievent_tcpsend,
249 	netievent_tcpstartread,
250 	netievent_tcppauseread,
251 	netievent_tcpaccept,
252 	netievent_tcpcancel,
253 
254 	netievent_tcpdnsaccept,
255 	netievent_tcpdnsconnect,
256 	netievent_tcpdnsclose,
257 	netievent_tcpdnssend,
258 	netievent_tcpdnsread,
259 	netievent_tcpdnscancel,
260 
261 	netievent_shutdown,
262 	netievent_stop,
263 	netievent_pause,
264 
265 	netievent_connectcb,
266 	netievent_readcb,
267 	netievent_sendcb,
268 
269 	netievent_task,
270 	netievent_privilegedtask,
271 
272 	/*
273 	 * event type values higher than this will be treated
274 	 * as high-priority events, which can be processed
275 	 * while the netmgr is pausing or paused.
276 	 */
277 	netievent_prio = 0xff,
278 
279 	netievent_udplisten,
280 	netievent_udpstop,
281 	netievent_tcplisten,
282 	netievent_tcpstop,
283 	netievent_tcpdnslisten,
284 	netievent_tcpdnsstop,
285 
286 	netievent_resume,
287 	netievent_detach,
288 	netievent_close,
289 } isc__netievent_type;
290 
291 typedef union {
292 	isc_nm_recv_cb_t recv;
293 	isc_nm_cb_t send;
294 	isc_nm_cb_t connect;
295 	isc_nm_accept_cb_t accept;
296 } isc__nm_cb_t;
297 
298 /*
299  * Wrapper around uv_req_t with 'our' fields in it.  req->data should
300  * always point to its parent.  Note that we always allocate more than
301  * sizeof(struct) because we make room for different req types;
302  */
303 #define UVREQ_MAGIC    ISC_MAGIC('N', 'M', 'U', 'R')
304 #define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
305 
306 typedef struct isc__nm_uvreq isc__nm_uvreq_t;
307 struct isc__nm_uvreq {
308 	int magic;
309 	isc_nmsocket_t *sock;
310 	isc_nmhandle_t *handle;
311 	char tcplen[2];	      /* The TCP DNS message length */
312 	uv_buf_t uvbuf;	      /* translated isc_region_t, to be
313 			       * sent or received */
314 	isc_sockaddr_t local; /* local address */
315 	isc_sockaddr_t peer;  /* peer address */
316 	isc__nm_cb_t cb;      /* callback */
317 	void *cbarg;	      /* callback argument */
318 	uv_pipe_t ipc;	      /* used for sending socket
319 			       * uv_handles to other threads */
320 	union {
321 		uv_handle_t handle;
322 		uv_req_t req;
323 		uv_getaddrinfo_t getaddrinfo;
324 		uv_getnameinfo_t getnameinfo;
325 		uv_shutdown_t shutdown;
326 		uv_write_t write;
327 		uv_connect_t connect;
328 		uv_udp_send_t udp_send;
329 		uv_fs_t fs;
330 		uv_work_t work;
331 	} uv_req;
332 	ISC_LINK(isc__nm_uvreq_t) link;
333 };
334 
335 void *
336 isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type);
337 /*%<
338  * Allocate an ievent and set the type.
339  */
340 void
341 isc__nm_put_netievent(isc_nm_t *mgr, void *ievent);
342 
343 /*
344  * The macros here are used to simulate the "inheritance" in C, there's the base
345  * netievent structure that contains just its own type and socket, and there are
346  * extended netievent types that also have handles or requests or other data.
347  *
348  * The macros here ensure that:
349  *
350  *   1. every netievent type has matching definition, declaration and
351  *      implementation
352  *
353  *   2. we handle all the netievent types of same subclass the same, e.g. if the
354  *      extended netievent contains handle, we always attach to the handle in
355  *      the ctor and detach from the handle in dtor.
356  *
357  * There are three macros here for each netievent subclass:
358  *
359  *   1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in
360  *   this header
361  *
362  *   2. NETIEVENT_*_DECL(type) generates the declaration of the get and put
363  *      functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used
364  *      below in this header
365  *
366  *   3. NETIEVENT_*_DEF(type) generates the definition of the functions; used
367  *   either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.)
368  */
369 
370 #define NETIEVENT__SOCKET         \
371 	isc__netievent_type type; \
372 	isc_nmsocket_t *sock;     \
373 	const char *file;         \
374 	unsigned int line;        \
375 	const char *func
376 
377 typedef struct isc__netievent__socket {
378 	NETIEVENT__SOCKET;
379 } isc__netievent__socket_t;
380 
381 #define NETIEVENT_SOCKET_TYPE(type) \
382 	typedef isc__netievent__socket_t isc__netievent_##type##_t
383 
384 #define NETIEVENT_SOCKET_DECL(type)                              \
385 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
386 		isc_nm_t *nm, isc_nmsocket_t *sock);             \
387 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
388 					  isc__netievent_##type##_t *ievent)
389 
390 #define NETIEVENT_SOCKET_DEF(type)                                             \
391 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
392 		isc_nm_t *nm, isc_nmsocket_t *sock) {                          \
393 		isc__netievent_##type##_t *ievent =                            \
394 			isc__nm_get_netievent(nm, netievent_##type);           \
395 		isc__nmsocket_attach(sock, &ievent->sock);                     \
396                                                                                \
397 		return (ievent);                                               \
398 	}                                                                      \
399                                                                                \
400 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
401 					  isc__netievent_##type##_t *ievent) { \
402 		isc__nmsocket_detach(&ievent->sock);                           \
403 		isc__nm_put_netievent(nm, ievent);                             \
404 	}
405 
406 typedef struct isc__netievent__socket_req {
407 	NETIEVENT__SOCKET;
408 	isc__nm_uvreq_t *req;
409 } isc__netievent__socket_req_t;
410 
411 #define NETIEVENT_SOCKET_REQ_TYPE(type) \
412 	typedef isc__netievent__socket_req_t isc__netievent_##type##_t
413 
414 #define NETIEVENT_SOCKET_REQ_DECL(type)                                    \
415 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(           \
416 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \
417 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                    \
418 					  isc__netievent_##type##_t *ievent)
419 
420 #define NETIEVENT_SOCKET_REQ_DEF(type)                                         \
421 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
422 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {    \
423 		isc__netievent_##type##_t *ievent =                            \
424 			isc__nm_get_netievent(nm, netievent_##type);           \
425 		isc__nmsocket_attach(sock, &ievent->sock);                     \
426 		ievent->req = req;                                             \
427                                                                                \
428 		return (ievent);                                               \
429 	}                                                                      \
430                                                                                \
431 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
432 					  isc__netievent_##type##_t *ievent) { \
433 		isc__nmsocket_detach(&ievent->sock);                           \
434 		isc__nm_put_netievent(nm, ievent);                             \
435 	}
436 
437 typedef struct isc__netievent__socket_req_result {
438 	isc__netievent_type type;
439 	isc_nmsocket_t *sock;
440 	isc__nm_uvreq_t *req;
441 	isc_result_t result;
442 } isc__netievent__socket_req_result_t;
443 
444 #define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \
445 	typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t
446 
447 #define NETIEVENT_SOCKET_REQ_RESULT_DECL(type)                            \
448 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(          \
449 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
450 		isc_result_t result);                                     \
451 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                   \
452 					  isc__netievent_##type##_t *ievent)
453 
454 #define NETIEVENT_SOCKET_REQ_RESULT_DEF(type)                                  \
455 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
456 		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req,      \
457 		isc_result_t result) {                                         \
458 		isc__netievent_##type##_t *ievent =                            \
459 			isc__nm_get_netievent(nm, netievent_##type);           \
460 		isc__nmsocket_attach(sock, &ievent->sock);                     \
461 		ievent->req = req;                                             \
462 		ievent->result = result;                                       \
463                                                                                \
464 		return (ievent);                                               \
465 	}                                                                      \
466                                                                                \
467 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
468 					  isc__netievent_##type##_t *ievent) { \
469 		isc__nmsocket_detach(&ievent->sock);                           \
470 		isc__nm_put_netievent(nm, ievent);                             \
471 	}
472 
473 typedef struct isc__netievent__socket_handle {
474 	NETIEVENT__SOCKET;
475 	isc_nmhandle_t *handle;
476 } isc__netievent__socket_handle_t;
477 
478 #define NETIEVENT_SOCKET_HANDLE_TYPE(type) \
479 	typedef isc__netievent__socket_handle_t isc__netievent_##type##_t
480 
481 #define NETIEVENT_SOCKET_HANDLE_DECL(type)                                   \
482 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(             \
483 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \
484 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                      \
485 					  isc__netievent_##type##_t *ievent)
486 
487 #define NETIEVENT_SOCKET_HANDLE_DEF(type)                                      \
488 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
489 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) {  \
490 		isc__netievent_##type##_t *ievent =                            \
491 			isc__nm_get_netievent(nm, netievent_##type);           \
492 		isc__nmsocket_attach(sock, &ievent->sock);                     \
493 		isc_nmhandle_attach(handle, &ievent->handle);                  \
494                                                                                \
495 		return (ievent);                                               \
496 	}                                                                      \
497                                                                                \
498 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
499 					  isc__netievent_##type##_t *ievent) { \
500 		isc__nmsocket_detach(&ievent->sock);                           \
501 		isc_nmhandle_detach(&ievent->handle);                          \
502 		isc__nm_put_netievent(nm, ievent);                             \
503 	}
504 
505 typedef struct isc__netievent__socket_quota {
506 	NETIEVENT__SOCKET;
507 	isc_quota_t *quota;
508 } isc__netievent__socket_quota_t;
509 
510 #define NETIEVENT_SOCKET_QUOTA_TYPE(type) \
511 	typedef isc__netievent__socket_quota_t isc__netievent_##type##_t
512 
513 #define NETIEVENT_SOCKET_QUOTA_DECL(type)                                \
514 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(         \
515 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \
516 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                  \
517 					  isc__netievent_##type##_t *ievent)
518 
519 #define NETIEVENT_SOCKET_QUOTA_DEF(type)                                       \
520 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
521 		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) {      \
522 		isc__netievent_##type##_t *ievent =                            \
523 			isc__nm_get_netievent(nm, netievent_##type);           \
524 		isc__nmsocket_attach(sock, &ievent->sock);                     \
525 		ievent->quota = quota;                                         \
526                                                                                \
527 		return (ievent);                                               \
528 	}                                                                      \
529                                                                                \
530 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
531 					  isc__netievent_##type##_t *ievent) { \
532 		isc__nmsocket_detach(&ievent->sock);                           \
533 		isc__nm_put_netievent(nm, ievent);                             \
534 	}
535 
536 typedef struct isc__netievent__task {
537 	isc__netievent_type type;
538 	isc_task_t *task;
539 } isc__netievent__task_t;
540 
541 #define NETIEVENT_TASK_TYPE(type) \
542 	typedef isc__netievent__task_t isc__netievent_##type##_t;
543 
544 #define NETIEVENT_TASK_DECL(type)                                \
545 	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
546 		isc_nm_t *nm, isc_task_t *task);                 \
547 	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
548 					  isc__netievent_##type##_t *ievent);
549 
550 #define NETIEVENT_TASK_DEF(type)                                               \
551 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
552 		isc_nm_t *nm, isc_task_t *task) {                              \
553 		isc__netievent_##type##_t *ievent =                            \
554 			isc__nm_get_netievent(nm, netievent_##type);           \
555 		ievent->task = task;                                           \
556                                                                                \
557 		return (ievent);                                               \
558 	}                                                                      \
559                                                                                \
560 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
561 					  isc__netievent_##type##_t *ievent) { \
562 		ievent->task = NULL;                                           \
563 		isc__nm_put_netievent(nm, ievent);                             \
564 	}
565 
566 typedef struct isc__netievent_udpsend {
567 	NETIEVENT__SOCKET;
568 	isc_sockaddr_t peer;
569 	isc__nm_uvreq_t *req;
570 } isc__netievent_udpsend_t;
571 
572 typedef struct isc__netievent {
573 	isc__netievent_type type;
574 } isc__netievent_t;
575 
576 #define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t
577 
578 #define NETIEVENT_DECL(type)                                                   \
579 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \
580 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
581 					  isc__netievent_##type##_t *ievent)
582 
583 #define NETIEVENT_DEF(type)                                                    \
584 	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
585 		isc_nm_t *nm) {                                                \
586 		isc__netievent_##type##_t *ievent =                            \
587 			isc__nm_get_netievent(nm, netievent_##type);           \
588                                                                                \
589 		return (ievent);                                               \
590 	}                                                                      \
591                                                                                \
592 	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
593 					  isc__netievent_##type##_t *ievent) { \
594 		isc__nm_put_netievent(nm, ievent);                             \
595 	}
596 
597 typedef union {
598 	isc__netievent_t ni;
599 	isc__netievent__socket_t nis;
600 	isc__netievent__socket_req_t nisr;
601 	isc__netievent_udpsend_t nius;
602 	isc__netievent__socket_quota_t nisq;
603 } isc__netievent_storage_t;
604 
605 /*
606  * Work item for a uv_work threadpool.
607  */
608 typedef struct isc__nm_work {
609 	isc_nm_t *netmgr;
610 	uv_work_t req;
611 	isc_nm_workcb_t cb;
612 	isc_nm_after_workcb_t after_cb;
613 	void *data;
614 } isc__nm_work_t;
615 
616 /*
617  * Network manager
618  */
619 #define NM_MAGIC    ISC_MAGIC('N', 'E', 'T', 'M')
620 #define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
621 
622 struct isc_nm {
623 	int magic;
624 	isc_refcount_t references;
625 	isc_mem_t *mctx;
626 	int nworkers;
627 	isc_mutex_t lock;
628 	isc_condition_t wkstatecond;
629 	isc_condition_t wkpausecond;
630 	isc__networker_t *workers;
631 
632 	isc_stats_t *stats;
633 
634 	isc_mempool_t *reqpool;
635 	isc_mutex_t reqlock;
636 
637 	isc_mempool_t *evpool;
638 	isc_mutex_t evlock;
639 
640 	uint_fast32_t workers_running;
641 	atomic_uint_fast32_t workers_paused;
642 	atomic_uint_fast32_t maxudp;
643 
644 	atomic_bool paused;
645 
646 	/*
647 	 * Active connections are being closed and new connections are
648 	 * no longer allowed.
649 	 */
650 	atomic_bool closing;
651 
652 	/*
653 	 * A worker is actively waiting for other workers, for example to
654 	 * stop listening; that means no other thread can do the same thing
655 	 * or pause, or we'll deadlock. We have to either re-enqueue our
656 	 * event or wait for the other one to finish if we want to pause.
657 	 */
658 	atomic_int interlocked;
659 
660 	/*
661 	 * Timeout values for TCP connections, corresponding to
662 	 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout,
663 	 * and tcp-advertised-timeout. Note that these are stored in
664 	 * milliseconds so they can be used directly with the libuv timer,
665 	 * but they are configured in tenths of seconds.
666 	 */
667 	atomic_uint_fast32_t init;
668 	atomic_uint_fast32_t idle;
669 	atomic_uint_fast32_t keepalive;
670 	atomic_uint_fast32_t advertised;
671 
672 	isc_barrier_t pausing;
673 	isc_barrier_t resuming;
674 
675 #ifdef NETMGR_TRACE
676 	ISC_LIST(isc_nmsocket_t) active_sockets;
677 #endif
678 };
679 
680 typedef enum isc_nmsocket_type {
681 	isc_nm_udpsocket,
682 	isc_nm_udplistener, /* Aggregate of nm_udpsocks */
683 	isc_nm_tcpsocket,
684 	isc_nm_tcplistener,
685 	isc_nm_tcpdnslistener,
686 	isc_nm_tcpdnssocket,
687 } isc_nmsocket_type;
688 
689 /*%
690  * A universal structure for either a single socket or a group of
691  * dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
692  */
693 #define NMSOCK_MAGIC	ISC_MAGIC('N', 'M', 'S', 'K')
694 #define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
695 
696 /*%
697  * Index into socket stat counter arrays.
698  */
699 enum {
700 	STATID_OPEN = 0,
701 	STATID_OPENFAIL = 1,
702 	STATID_CLOSE = 2,
703 	STATID_BINDFAIL = 3,
704 	STATID_CONNECTFAIL = 4,
705 	STATID_CONNECT = 5,
706 	STATID_ACCEPTFAIL = 6,
707 	STATID_ACCEPT = 7,
708 	STATID_SENDFAIL = 8,
709 	STATID_RECVFAIL = 9,
710 	STATID_ACTIVE = 10
711 };
712 
713 typedef void (*isc_nm_closehandlecb_t)(void *arg);
714 /*%<
715  * Opaque callback function, used for isc_nmhandle 'reset' and 'free'
716  * callbacks.
717  */
718 
719 struct isc_nmsocket {
720 	/*% Unlocked, RO */
721 	int magic;
722 	int tid;
723 	isc_nmsocket_type type;
724 	isc_nm_t *mgr;
725 
726 	/*% Parent socket for multithreaded listeners */
727 	isc_nmsocket_t *parent;
728 	/*% Listener socket this connection was accepted on */
729 	isc_nmsocket_t *listener;
730 	/*% Self socket */
731 	isc_nmsocket_t *self;
732 
733 	isc_barrier_t startlistening;
734 	isc_barrier_t stoplistening;
735 
736 	/*%
737 	 * quota is the TCP client, attached when a TCP connection
738 	 * is established. pquota is a non-attached pointer to the
739 	 * TCP client quota, stored in listening sockets but only
740 	 * attached in connected sockets.
741 	 */
742 	isc_quota_t *quota;
743 	isc_quota_t *pquota;
744 	isc_quota_cb_t quotacb;
745 
746 	/*%
747 	 * Socket statistics
748 	 */
749 	const isc_statscounter_t *statsindex;
750 
751 	/*%
752 	 * TCP read/connect timeout timers.
753 	 */
754 	uv_timer_t timer;
755 	uint64_t read_timeout;
756 	uint64_t connect_timeout;
757 
758 	/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
759 	isc_nmsocket_t *outer;
760 
761 	/*% server socket for connections */
762 	isc_nmsocket_t *server;
763 
764 	/*% Child sockets for multi-socket setups */
765 	isc_nmsocket_t *children;
766 	uint_fast32_t nchildren;
767 	isc_sockaddr_t iface;
768 	isc_nmhandle_t *statichandle;
769 	isc_nmhandle_t *outerhandle;
770 
771 	/*% Extra data allocated at the end of each isc_nmhandle_t */
772 	size_t extrahandlesize;
773 
774 	/*% TCP backlog */
775 	int backlog;
776 
777 	/*% libuv data */
778 	uv_os_sock_t fd;
779 	union uv_any_handle uv_handle;
780 
781 	/*% Peer address */
782 	isc_sockaddr_t peer;
783 
784 	/* Atomic */
785 	/*% Number of running (e.g. listening) child sockets */
786 	atomic_uint_fast32_t rchildren;
787 
788 	/*%
789 	 * Socket is active if it's listening, working, etc. If it's
790 	 * closing, then it doesn't make a sense, for example, to
791 	 * push handles or reqs for reuse.
792 	 */
793 	atomic_bool active;
794 	atomic_bool destroying;
795 
796 	/*%
797 	 * Socket is closed if it's not active and all the possible
798 	 * callbacks were fired, there are no active handles, etc.
799 	 * If active==false but closed==false, that means the socket
800 	 * is closing.
801 	 */
802 	atomic_bool closing;
803 	atomic_bool closed;
804 	atomic_bool listening;
805 	atomic_bool connecting;
806 	atomic_bool connected;
807 	bool accepting;
808 	bool reading;
809 	isc_refcount_t references;
810 
811 	/*%
812 	 * Established an outgoing connection, as client not server.
813 	 */
814 	atomic_bool client;
815 
816 	/*%
817 	 * TCPDNS socket has been set not to pipeline.
818 	 */
819 	atomic_bool sequential;
820 
821 	/*%
822 	 * The socket is processing read callback, this is guard to not read
823 	 * data before the readcb is back.
824 	 */
825 	bool processing;
826 
827 	/*%
828 	 * A TCP socket has had isc_nm_pauseread() called.
829 	 */
830 	atomic_bool readpaused;
831 
832 	/*%
833 	 * A TCP or TCPDNS socket has been set to use the keepalive
834 	 * timeout instead of the default idle timeout.
835 	 */
836 	atomic_bool keepalive;
837 
838 	/*%
839 	 * 'spare' handles for that can be reused to avoid allocations,
840 	 * for UDP.
841 	 */
842 	isc_astack_t *inactivehandles;
843 	isc_astack_t *inactivereqs;
844 
845 	/*%
846 	 * Used to wait for TCP listening events to complete, and
847 	 * for the number of running children to reach zero during
848 	 * shutdown.
849 	 *
850 	 * We use two condition variables to prevent the race where the netmgr
851 	 * threads would be able to finish and destroy the socket before it's
852 	 * unlocked by the isc_nm_listen<proto>() function.  So, the flow is as
853 	 * follows:
854 	 *
855 	 *   1. parent thread creates all children sockets and passes then to
856 	 *      netthreads, looks at the signaling variable and WAIT(cond) until
857 	 *      the childrens are done initializing
858 	 *
859 	 *   2. the events get picked by netthreads, calls the libuv API (and
860 	 *      either succeeds or fails) and WAIT(scond) until all other
861 	 *      children sockets in netthreads are initialized and the listening
862 	 *      socket lock is unlocked
863 	 *
864 	 *   3. the control is given back to the parent thread which now either
865 	 *      returns success or shutdowns the listener if an error has
866 	 *      occured in the children netthread
867 	 *
868 	 * NOTE: The other approach would be doing an extra attach to the parent
869 	 * listening socket, and then detach it in the parent thread, but that
870 	 * breaks the promise that once the libuv socket is initialized on the
871 	 * nmsocket, the nmsocket needs to be handled only by matching
872 	 * netthread, so in fact that would add a complexity in a way that
873 	 * isc__nmsocket_detach would have to be converted to use an
874 	 * asynchrounous netievent.
875 	 */
876 	isc_mutex_t lock;
877 	isc_condition_t cond;
878 	isc_condition_t scond;
879 
880 	/*%
881 	 * Used to pass a result back from listen or connect events.
882 	 */
883 	isc_result_t result;
884 
885 	/*%
886 	 * List of active handles.
887 	 * ah - current position in 'ah_frees'; this represents the
888 	 *	current number of active handles;
889 	 * ah_size - size of the 'ah_frees' and 'ah_handles' arrays
890 	 * ah_handles - array pointers to active handles
891 	 *
892 	 * Adding a handle
893 	 *  - if ah == ah_size, reallocate
894 	 *  - x = ah_frees[ah]
895 	 *  - ah_frees[ah++] = 0;
896 	 *  - ah_handles[x] = handle
897 	 *  - x must be stored with the handle!
898 	 * Removing a handle:
899 	 *  - ah_frees[--ah] = x
900 	 *  - ah_handles[x] = NULL;
901 	 *
902 	 * XXX: for now this is locked with socket->lock, but we
903 	 * might want to change it to something lockless in the
904 	 * future.
905 	 */
906 	atomic_int_fast32_t ah;
907 	size_t ah_size;
908 	size_t *ah_frees;
909 	isc_nmhandle_t **ah_handles;
910 
911 	/*% Buffer for TCPDNS processing */
912 	size_t buf_size;
913 	size_t buf_len;
914 	unsigned char *buf;
915 
916 	/*%
917 	 * This function will be called with handle->sock
918 	 * as the argument whenever a handle's references drop
919 	 * to zero, after its reset callback has been called.
920 	 */
921 	isc_nm_closehandlecb_t closehandle_cb;
922 
923 	isc_nmhandle_t *recv_handle;
924 	isc_nm_recv_cb_t recv_cb;
925 	void *recv_cbarg;
926 	bool recv_read;
927 
928 	isc_nm_cb_t connect_cb;
929 	void *connect_cbarg;
930 
931 	isc_nm_accept_cb_t accept_cb;
932 	void *accept_cbarg;
933 
934 	atomic_int_fast32_t active_child_connections;
935 
936 #ifdef NETMGR_TRACE
937 	void *backtrace[TRACE_SIZE];
938 	int backtrace_size;
939 	LINK(isc_nmsocket_t) active_link;
940 	ISC_LIST(isc_nmhandle_t) active_handles;
941 #endif
942 };
943 
944 bool
945 isc__nm_in_netthread(void);
946 /*%
947  * Returns 'true' if we're in the network thread.
948  */
949 
950 void
951 isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
952 /*%<
953  * If the caller is already in the matching nmthread, process the netievent
954  * directly, if not enqueue using isc__nm_enqueue_ievent().
955  */
956 
957 void
958 isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
959 /*%<
960  * Enqueue an ievent onto a specific worker queue. (This the only safe
961  * way to use an isc__networker_t from another thread.)
962  */
963 
964 void
965 isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
966 /*%<
967  * Free a buffer allocated for a receive operation.
968  *
969  * Note that as currently implemented, this doesn't actually
970  * free anything, marks the isc__networker's UDP receive buffer
971  * as "not in use".
972  */
973 
974 isc_nmhandle_t *
975 isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
976 		   isc_sockaddr_t *local FLARG);
977 /*%<
978  * Get a handle for the socket 'sock', allocating a new one
979  * if there isn't one available in 'sock->inactivehandles'.
980  *
981  * If 'peer' is not NULL, set the handle's peer address to 'peer',
982  * otherwise set it to 'sock->peer'.
983  *
984  * If 'local' is not NULL, set the handle's local address to 'local',
985  * otherwise set it to 'sock->iface->addr'.
986  *
987  * 'sock' will be attached to 'handle->sock'. The caller may need
988  * to detach the socket afterward.
989  */
990 
991 isc__nm_uvreq_t *
992 isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG);
993 /*%<
994  * Get a UV request structure for the socket 'sock', allocating a
995  * new one if there isn't one available in 'sock->inactivereqs'.
996  */
997 
998 void
999 isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG);
1000 /*%<
1001  * Completes the use of a UV request structure, setting '*req' to NULL.
1002  *
1003  * The UV request is pushed onto the 'sock->inactivereqs' stack or,
1004  * if that doesn't work, freed.
1005  */
1006 
1007 void
1008 isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1009 		    isc_sockaddr_t *iface FLARG);
1010 /*%<
1011  * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'
1012  * and its interface to 'iface'.
1013  */
1014 
1015 void
1016 isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG);
1017 /*%<
1018  * Attach to a socket, increasing refcount
1019  */
1020 
1021 void
1022 isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG);
1023 /*%<
1024  * Detach from socket, decreasing refcount and possibly destroying the
1025  * socket if it's no longer referenced.
1026  */
1027 
1028 void
1029 isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG);
1030 /*%<
1031  * Market 'sock' as inactive, close it if necessary, and destroy it
1032  * if there are no remaining references or active handles.
1033  */
1034 
1035 void
1036 isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1037 /*%<
1038  * Initiate the socket shutdown which actively calls the active
1039  * callbacks.
1040  */
1041 
1042 bool
1043 isc__nmsocket_active(isc_nmsocket_t *sock);
1044 /*%<
1045  * Determine whether 'sock' is active by checking 'sock->active'
1046  * or, for child sockets, 'sock->parent->active'.
1047  */
1048 
1049 bool
1050 isc__nmsocket_deactivate(isc_nmsocket_t *sock);
1051 /*%<
1052  * @brief Deactivate active socket
1053  *
1054  * Atomically deactive the socket by setting @p sock->active or, for child
1055  * sockets, @p sock->parent->active to @c false
1056  *
1057  * @param[in] sock - valid nmsocket
1058  * @return @c false if the socket was already inactive, @c true otherwise
1059  */
1060 
1061 void
1062 isc__nmsocket_clearcb(isc_nmsocket_t *sock);
1063 /*%<
1064  * Clear the recv and accept callbacks in 'sock'.
1065  */
1066 
1067 void
1068 isc__nmsocket_timer_stop(isc_nmsocket_t *sock);
1069 void
1070 isc__nmsocket_timer_start(isc_nmsocket_t *sock);
1071 void
1072 isc__nmsocket_timer_restart(isc_nmsocket_t *sock);
1073 bool
1074 isc__nmsocket_timer_running(isc_nmsocket_t *sock);
1075 /*%<
1076  * Start/stop/restart/check the timeout on the socket
1077  */
1078 
1079 void
1080 isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1081 		  isc_result_t eresult, bool async);
1082 
1083 void
1084 isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0);
1085 /*%<
1086  * Issue a connect callback on the socket, used to call the callback
1087  */
1088 
1089 void
1090 isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1091 	       isc_result_t eresult);
1092 void
1093 isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0);
1094 
1095 /*%<
1096  * Issue a read callback on the socket, used to call the callback
1097  * on failed conditions when the event can't be scheduled on the uv loop.
1098  *
1099  */
1100 
1101 void
1102 isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1103 	       isc_result_t eresult, bool async);
1104 void
1105 isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0);
1106 /*%<
1107  * Issue a write callback on the socket, used to call the callback
1108  * on failed conditions when the event can't be scheduled on the uv loop.
1109  */
1110 
1111 void
1112 isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0);
1113 /*%<
1114  * Walk through all uv handles, get the underlying sockets and issue
1115  * close on them.
1116  */
1117 
1118 void
1119 isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1120 		 isc_nm_cb_t cb, void *cbarg);
1121 /*%<
1122  * Back-end implementation of isc_nm_send() for UDP handles.
1123  */
1124 
1125 void
1126 isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1127 /*
1128  * Back-end implementation of isc_nm_read() for UDP handles.
1129  */
1130 
1131 void
1132 isc__nm_udp_close(isc_nmsocket_t *sock);
1133 /*%<
1134  * Close a UDP socket.
1135  */
1136 
1137 void
1138 isc__nm_udp_cancelread(isc_nmhandle_t *handle);
1139 /*%<
1140  * Stop reading on a connected UDP handle.
1141  */
1142 
1143 void
1144 isc__nm_udp_shutdown(isc_nmsocket_t *sock);
1145 /*%<
1146  * Called during the shutdown process to close and clean up connected
1147  * sockets.
1148  */
1149 
1150 void
1151 isc__nm_udp_stoplistening(isc_nmsocket_t *sock);
1152 /*%<
1153  * Stop listening on 'sock'.
1154  */
1155 
1156 void
1157 isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1158 /*%<
1159  * Set or clear the recv timeout for the UDP socket associated with 'handle'.
1160  */
1161 
1162 void
1163 isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1164 void
1165 isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1166 void
1167 isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1168 void
1169 isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1170 void
1171 isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0);
1172 void
1173 isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1174 void
1175 isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1176 /*%<
1177  * Callback handlers for asynchronous UDP events (listen, stoplisten, send).
1178  */
1179 
1180 void
1181 isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1182 		 isc_nm_cb_t cb, void *cbarg);
1183 /*%<
1184  * Back-end implementation of isc_nm_send() for TCP handles.
1185  */
1186 
1187 void
1188 isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1189 /*
1190  * Back-end implementation of isc_nm_read() for TCP handles.
1191  */
1192 
1193 void
1194 isc__nm_tcp_close(isc_nmsocket_t *sock);
1195 /*%<
1196  * Close a TCP socket.
1197  */
1198 void
1199 isc__nm_tcp_pauseread(isc_nmhandle_t *handle);
1200 /*%<
1201  * Pause reading on this handle, while still remembering the callback.
1202  */
1203 
1204 void
1205 isc__nm_tcp_resumeread(isc_nmhandle_t *handle);
1206 /*%<
1207  * Resume reading from socket.
1208  *
1209  */
1210 
1211 void
1212 isc__nm_tcp_shutdown(isc_nmsocket_t *sock);
1213 /*%<
1214  * Called during the shutdown process to close and clean up connected
1215  * sockets.
1216  */
1217 
1218 void
1219 isc__nm_tcp_cancelread(isc_nmhandle_t *handle);
1220 /*%<
1221  * Stop reading on a connected TCP handle.
1222  */
1223 
1224 void
1225 isc__nm_tcp_stoplistening(isc_nmsocket_t *sock);
1226 /*%<
1227  * Stop listening on 'sock'.
1228  */
1229 
1230 int_fast32_t
1231 isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock);
1232 /*%<
1233  * Returns the number of active connections for the TCP listener socket.
1234  */
1235 
1236 void
1237 isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1238 /*%<
1239  * Set the read timeout for the TCP socket associated with 'handle'.
1240  */
1241 
1242 void
1243 isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1244 void
1245 isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1246 void
1247 isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1248 void
1249 isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1250 void
1251 isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1252 void
1253 isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0);
1254 void
1255 isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1256 void
1257 isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0);
1258 void
1259 isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1260 void
1261 isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1262 void
1263 isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1264 /*%<
1265  * Callback handlers for asynchronous TCP events (connect, listen,
1266  * stoplisten, send, read, pause, close).
1267  */
1268 
1269 void
1270 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1271 void
1272 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1273 void
1274 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1275 void
1276 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1277 		    isc_nm_cb_t cb, void *cbarg);
1278 /*%<
1279  * Back-end implementation of isc_nm_send() for TCPDNS handles.
1280  */
1281 
1282 void
1283 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock);
1284 
1285 void
1286 isc__nm_tcpdns_close(isc_nmsocket_t *sock);
1287 /*%<
1288  * Close a TCPDNS socket.
1289  */
1290 
1291 void
1292 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock);
1293 /*%<
1294  * Stop listening on 'sock'.
1295  */
1296 
1297 void
1298 isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1299 /*%<
1300  * Set the read timeout and reset the timer for the TCPDNS socket
1301  * associated with 'handle', and the TCP socket it wraps around.
1302  */
1303 
1304 void
1305 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1306 void
1307 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1308 void
1309 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
1310 void
1311 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
1312 void
1313 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
1314 void
1315 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
1316 
1317 void
1318 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
1319 
1320 void
1321 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1322 
1323 void
1324 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle);
1325 /*%<
1326  * Stop reading on a connected TCPDNS handle.
1327  */
1328 
1329 #define isc__nm_uverr2result(x) \
1330 	isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__)
1331 isc_result_t
1332 isc___nm_uverr2result(int uverr, bool dolog, const char *file,
1333 		      unsigned int line, const char *func);
1334 /*%<
1335  * Convert a libuv error value into an isc_result_t.  The
1336  * list of supported error values is not complete; new users
1337  * of this function should add any expected errors that are
1338  * not already there.
1339  */
1340 
1341 bool
1342 isc__nm_acquire_interlocked(isc_nm_t *mgr);
1343 /*%<
1344  * Try to acquire interlocked state; return true if successful.
1345  */
1346 
1347 void
1348 isc__nm_drop_interlocked(isc_nm_t *mgr);
1349 /*%<
1350  * Drop interlocked state; signal waiters.
1351  */
1352 
1353 void
1354 isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
1355 /*%<
1356  * Actively wait for interlocked state.
1357  */
1358 
1359 void
1360 isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1361 /*%<
1362  * Increment socket-related statistics counters.
1363  */
1364 
1365 void
1366 isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1367 /*%<
1368  * Decrement socket-related statistics counters.
1369  */
1370 
1371 isc_result_t
1372 isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp);
1373 /*%<
1374  * Platform independent socket() version
1375  */
1376 
1377 void
1378 isc__nm_closesocket(uv_os_sock_t sock);
1379 /*%<
1380  * Platform independent closesocket() version
1381  */
1382 
1383 isc_result_t
1384 isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
1385 /*%<
1386  * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
1387  */
1388 
1389 isc_result_t
1390 isc__nm_socket_reuse(uv_os_sock_t fd);
1391 /*%<
1392  * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
1393  */
1394 
1395 isc_result_t
1396 isc__nm_socket_reuse_lb(uv_os_sock_t fd);
1397 /*%<
1398  * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
1399  */
1400 
1401 isc_result_t
1402 isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
1403 /*%<
1404  * Set the SO_INCOMING_CPU socket option on the fd if available
1405  */
1406 
1407 isc_result_t
1408 isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family);
1409 /*%<
1410  * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket
1411  * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT
1412  */
1413 
1414 isc_result_t
1415 isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
1416 /*%<
1417  * Set the connection timeout in milliseconds, on non-Linux platforms,
1418  * the minimum value must be at least 1000 (1 second).
1419  */
1420 
1421 isc_result_t
1422 isc__nm_socket_tcp_nodelay(uv_os_sock_t fd);
1423 /*%<
1424  * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY).
1425  */
1426 
1427 /*
1428  * typedef all the netievent types
1429  */
1430 
1431 NETIEVENT_SOCKET_TYPE(close);
1432 NETIEVENT_SOCKET_TYPE(tcpclose);
1433 NETIEVENT_SOCKET_TYPE(tcplisten);
1434 NETIEVENT_SOCKET_TYPE(tcppauseread);
1435 NETIEVENT_SOCKET_TYPE(tcpstop);
1436 NETIEVENT_SOCKET_TYPE(udpclose);
1437 NETIEVENT_SOCKET_TYPE(udplisten);
1438 NETIEVENT_SOCKET_TYPE(udpread);
1439 /* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */
1440 NETIEVENT_SOCKET_TYPE(udpstop);
1441 
1442 NETIEVENT_SOCKET_TYPE(tcpdnsclose);
1443 NETIEVENT_SOCKET_TYPE(tcpdnsread);
1444 NETIEVENT_SOCKET_TYPE(tcpdnsstop);
1445 NETIEVENT_SOCKET_TYPE(tcpdnslisten);
1446 NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect);
1447 NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend);
1448 NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel);
1449 NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept);
1450 
1451 NETIEVENT_SOCKET_REQ_TYPE(tcpconnect);
1452 NETIEVENT_SOCKET_REQ_TYPE(tcpsend);
1453 NETIEVENT_SOCKET_TYPE(tcpstartread);
1454 NETIEVENT_SOCKET_REQ_TYPE(udpconnect);
1455 
1456 NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb);
1457 NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb);
1458 NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb);
1459 
1460 NETIEVENT_SOCKET_HANDLE_TYPE(detach);
1461 NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel);
1462 NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel);
1463 
1464 NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept);
1465 
1466 NETIEVENT_TYPE(pause);
1467 NETIEVENT_TYPE(resume);
1468 NETIEVENT_TYPE(shutdown);
1469 NETIEVENT_TYPE(stop);
1470 
1471 NETIEVENT_TASK_TYPE(task);
1472 NETIEVENT_TASK_TYPE(privilegedtask);
1473 
1474 /* Now declared the helper functions */
1475 
1476 NETIEVENT_SOCKET_DECL(close);
1477 NETIEVENT_SOCKET_DECL(tcpclose);
1478 NETIEVENT_SOCKET_DECL(tcplisten);
1479 NETIEVENT_SOCKET_DECL(tcppauseread);
1480 NETIEVENT_SOCKET_DECL(tcpstartread);
1481 NETIEVENT_SOCKET_DECL(tcpstop);
1482 NETIEVENT_SOCKET_DECL(udpclose);
1483 NETIEVENT_SOCKET_DECL(udplisten);
1484 NETIEVENT_SOCKET_DECL(udpread);
1485 NETIEVENT_SOCKET_DECL(udpsend);
1486 NETIEVENT_SOCKET_DECL(udpstop);
1487 
1488 NETIEVENT_SOCKET_DECL(tcpdnsclose);
1489 NETIEVENT_SOCKET_DECL(tcpdnsread);
1490 NETIEVENT_SOCKET_DECL(tcpdnsstop);
1491 NETIEVENT_SOCKET_DECL(tcpdnslisten);
1492 NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect);
1493 NETIEVENT_SOCKET_REQ_DECL(tcpdnssend);
1494 NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel);
1495 NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept);
1496 
1497 NETIEVENT_SOCKET_REQ_DECL(tcpconnect);
1498 NETIEVENT_SOCKET_REQ_DECL(tcpsend);
1499 NETIEVENT_SOCKET_REQ_DECL(udpconnect);
1500 
1501 NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb);
1502 NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb);
1503 NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb);
1504 
1505 NETIEVENT_SOCKET_HANDLE_DECL(udpcancel);
1506 NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel);
1507 NETIEVENT_SOCKET_DECL(detach);
1508 
1509 NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept);
1510 
1511 NETIEVENT_DECL(pause);
1512 NETIEVENT_DECL(resume);
1513 NETIEVENT_DECL(shutdown);
1514 NETIEVENT_DECL(stop);
1515 
1516 NETIEVENT_TASK_DECL(task);
1517 NETIEVENT_TASK_DECL(privilegedtask);
1518 
1519 void
1520 isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1521 void
1522 isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1523 void
1524 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1525 
1526 isc_result_t
1527 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock);
1528 
1529 isc__nm_uvreq_t *
1530 isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr);
1531 
1532 void
1533 isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
1534 
1535 void
1536 isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
1537 		    const struct sockaddr *addr, unsigned flags);
1538 void
1539 isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1540 void
1541 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1542 
1543 void
1544 isc__nm_start_reading(isc_nmsocket_t *sock);
1545 void
1546 isc__nm_stop_reading(isc_nmsocket_t *sock);
1547 void
1548 isc__nm_process_sock_buffer(isc_nmsocket_t *sock);
1549 void
1550 isc__nm_resume_processing(void *arg);
1551 bool
1552 isc__nmsocket_closing(isc_nmsocket_t *sock);
1553 bool
1554 isc__nm_closing(isc_nmsocket_t *sock);
1555 
1556 void
1557 isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len);
1558 
1559 void
1560 isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1561 		       isc_result_t eresult);
1562 void
1563 isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
1564 void
1565 isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1566 			  isc_result_t eresult, bool async);
1567 void
1568 isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async);
1569 
1570 void
1571 isc__nmsocket_connecttimeout_cb(uv_timer_t *timer);
1572 
1573 #define STREAM_CLIENTS_PER_CONN 23
1574