xref: /minix3/external/bsd/bind/dist/lib/isc/win32/socket.c (revision 00b67f09dd46474d133c95011a48590a8e8f94c7)
1 /*	$NetBSD: socket.c,v 1.10 2015/07/08 17:29:00 christos Exp $	*/
2 
3 /*
4  * Copyright (C) 2004-2015  Internet Systems Consortium, Inc. ("ISC")
5  * Copyright (C) 2000-2003  Internet Software Consortium.
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /* Id */
21 
22 /* This code uses functions which are only available on Server 2003 and
23  * higher, and Windows XP and higher.
24  *
25  * This code is by nature multithreaded and takes advantage of various
26  * features to pass on information through the completion port for
27  * when I/O is completed.  All sends, receives, accepts, and connects are
28  * completed through the completion port.
29  *
30  * The number of Completion Port Worker threads used is the total number
31  * of CPU's + 1. This increases the likelihood that a Worker Thread is
32  * available for processing a completed request.
33  *
34  * XXXPDM 5 August, 2002
35  */
36 
37 #define MAKE_EXTERNAL 1
38 #include <config.h>
39 
40 #include <sys/types.h>
41 
42 #ifndef _WINSOCKAPI_
43 #define _WINSOCKAPI_   /* Prevent inclusion of winsock.h in windows.h */
44 #endif
45 
46 #include <errno.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <io.h>
52 #include <fcntl.h>
53 #include <process.h>
54 
55 #include <isc/app.h>
56 #include <isc/buffer.h>
57 #include <isc/bufferlist.h>
58 #include <isc/condition.h>
59 #include <isc/list.h>
60 #include <isc/log.h>
61 #include <isc/mem.h>
62 #include <isc/msgs.h>
63 #include <isc/mutex.h>
64 #include <isc/net.h>
65 #include <isc/once.h>
66 #include <isc/os.h>
67 #include <isc/platform.h>
68 #include <isc/print.h>
69 #include <isc/region.h>
70 #include <isc/socket.h>
71 #include <isc/stats.h>
72 #include <isc/strerror.h>
73 #include <isc/syslog.h>
74 #include <isc/task.h>
75 #include <isc/thread.h>
76 #include <isc/util.h>
77 #include <isc/win32os.h>
78 
79 #include <mswsock.h>
80 
81 #include "errno2result.h"
82 
83 /*
84  * Set by the -T dscp option on the command line. If set to a value
85  * other than -1, we check to make sure DSCP values match it, and
86  * assert if not.
87  */
88 int isc_dscp_check_value = -1;
89 
90 /*
91  * How in the world can Microsoft exist with APIs like this?
92  * We can't actually call this directly, because it turns out
93  * no library exports this function.  Instead, we need to
94  * issue a runtime call to get the address.
95  */
96 LPFN_CONNECTEX ISCConnectEx;
97 LPFN_ACCEPTEX ISCAcceptEx;
98 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
99 
100 /*
101  * Run expensive internal consistency checks.
102  */
103 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
104 #define CONSISTENT(sock) consistent(sock)
105 #else
106 #define CONSISTENT(sock) do {} while (/*CONSTCOND*/0)
107 #endif
108 static void consistent(isc_socket_t *sock);
109 
110 /*
111  * Define this macro to control the behavior of connection
112  * resets on UDP sockets.  See Microsoft KnowledgeBase Article Q263823
113  * for details.
114  * NOTE: This requires that Windows 2000 systems install Service Pack 2
115  * or later.
116  */
117 #ifndef SIO_UDP_CONNRESET
118 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
119 #endif
120 
121 /*
122  * Some systems define the socket length argument as an int, some as size_t,
123  * some as socklen_t.  This is here so it can be easily changed if needed.
124  */
125 #ifndef ISC_SOCKADDR_LEN_T
126 #define ISC_SOCKADDR_LEN_T unsigned int
127 #endif
128 
129 /*
130  * Define what the possible "soft" errors can be.  These are non-fatal returns
131  * of various network related functions, like recv() and so on.
132  */
133 #define SOFT_ERROR(e)	((e) == WSAEINTR || \
134 			 (e) == WSAEWOULDBLOCK || \
135 			 (e) == EWOULDBLOCK || \
136 			 (e) == EINTR || \
137 			 (e) == EAGAIN || \
138 			 (e) == 0)
139 
140 /*
141  * Pending errors are not really errors and should be
142  * kept separate
143  */
144 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
145 
146 #define DOIO_SUCCESS	  0       /* i/o ok, event sent */
147 #define DOIO_SOFT	  1       /* i/o ok, soft error, no event sent */
148 #define DOIO_HARD	  2       /* i/o error, event sent */
149 #define DOIO_EOF	  3       /* EOF, no event sent */
150 #define DOIO_PENDING	  4       /* status when i/o is in process */
151 #define DOIO_NEEDMORE	  5       /* IO was processed, but we need more due to minimum */
152 
153 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
154 
155 /*
156  * DLVL(90)  --  Function entry/exit and other tracing.
157  * DLVL(70)  --  Socket "correctness" -- including returning of events, etc.
158  * DLVL(60)  --  Socket data send/receive
159  * DLVL(50)  --  Event tracing, including receiving/sending completion events.
160  * DLVL(20)  --  Socket creation/destruction.
161  */
162 #define TRACE_LEVEL		90
163 #define CORRECTNESS_LEVEL	70
164 #define IOEVENT_LEVEL		60
165 #define EVENT_LEVEL		50
166 #define CREATION_LEVEL		20
167 
168 #define TRACE		DLVL(TRACE_LEVEL)
169 #define CORRECTNESS	DLVL(CORRECTNESS_LEVEL)
170 #define IOEVENT		DLVL(IOEVENT_LEVEL)
171 #define EVENT		DLVL(EVENT_LEVEL)
172 #define CREATION	DLVL(CREATION_LEVEL)
173 
174 typedef isc_event_t intev_t;
175 
176 /*
177  * Socket State
178  */
179 enum {
180   SOCK_INITIALIZED,	/* Socket Initialized */
181   SOCK_OPEN,		/* Socket opened but nothing yet to do */
182   SOCK_DATA,		/* Socket sending or receiving data */
183   SOCK_LISTEN,		/* TCP Socket listening for connects */
184   SOCK_ACCEPT,		/* TCP socket is waiting to accept */
185   SOCK_CONNECT,		/* TCP Socket connecting */
186   SOCK_CLOSED,		/* Socket has been closed */
187 };
188 
189 #define SOCKET_MAGIC		ISC_MAGIC('I', 'O', 'i', 'o')
190 #define VALID_SOCKET(t)		ISC_MAGIC_VALID(t, SOCKET_MAGIC)
191 
192 /*
193  * IPv6 control information.  If the socket is an IPv6 socket we want
194  * to collect the destination address and interface so the client can
195  * set them on outgoing packets.
196  */
197 #ifdef ISC_PLATFORM_HAVEIPV6
198 #ifndef USE_CMSG
199 #define USE_CMSG	1
200 #endif
201 #endif
202 
203 /*
204  * We really  don't want to try and use these control messages. Win32
205  * doesn't have this mechanism before XP.
206  */
207 #undef USE_CMSG
208 
209 /*
210  * Message header for recvmsg and sendmsg calls.
211  * Used value-result for recvmsg, value only for sendmsg.
212  */
213 struct msghdr {
214 	SOCKADDR_STORAGE to_addr;	/* UDP send/recv address */
215 	int      to_addr_len;		/* length of the address */
216 	WSABUF  *msg_iov;		/* scatter/gather array */
217 	u_int   msg_iovlen;             /* # elements in msg_iov */
218 	void	*msg_control;           /* ancillary data, see below */
219 	u_int   msg_controllen;         /* ancillary data buffer len */
220 	u_int	msg_totallen;		/* total length of this message */
221 } msghdr;
222 
223 /*
224  * The size to raise the receive buffer to.
225  */
226 #define RCVBUFSIZE (32*1024)
227 
228 /*
229  * The number of times a send operation is repeated if the result
230  * is WSAEINTR.
231  */
232 #define NRETRIES 10
233 
234 struct isc_socket {
235 	/* Not locked. */
236 	unsigned int		magic;
237 	isc_socketmgr_t	       *manager;
238 	isc_mutex_t		lock;
239 	isc_sockettype_t	type;
240 
241 	/* Pointers to scatter/gather buffers */
242 	WSABUF			iov[ISC_SOCKET_MAXSCATTERGATHER];
243 
244 	/* Locked by socket lock. */
245 	ISC_LINK(isc_socket_t)	link;
246 	unsigned int		references; /* EXTERNAL references */
247 	SOCKET			fd;	/* file handle */
248 	int			pf;	/* protocol family */
249 	char			name[16];
250 	void *			tag;
251 
252 	/*
253 	 * Each recv() call uses this buffer.  It is a per-socket receive
254 	 * buffer that allows us to decouple the system recv() from the
255 	 * recv_list done events.  This means the items on the recv_list
256 	 * can be removed without having to cancel pending system recv()
257 	 * calls.  It also allows us to read-ahead in some cases.
258 	 */
259 	struct {
260 		SOCKADDR_STORAGE	from_addr;	   // UDP send/recv address
261 		int		from_addr_len;	   // length of the address
262 		char		*base;		   // the base of the buffer
263 		char		*consume_position; // where to start copying data from next
264 		unsigned int	len;		   // the actual size of this buffer
265 		unsigned int	remaining;	   // the number of bytes remaining
266 	} recvbuf;
267 
268 	ISC_LIST(isc_socketevent_t)		send_list;
269 	ISC_LIST(isc_socketevent_t)		recv_list;
270 	ISC_LIST(isc_socket_newconnev_t)	accept_list;
271 	isc_socket_connev_t		       *connect_ev;
272 
273 	isc_sockaddr_t		address;  /* remote address */
274 
275 	unsigned int		listener : 1,	/* listener socket */
276 				connected : 1,
277 				pending_connect : 1, /* connect pending */
278 				bound : 1,	/* bound to local addr */
279 				dupped : 1;     /* created by isc_socket_dup() */
280 	unsigned int		pending_iocp;	/* Should equal the counters below. Debug. */
281 	unsigned int		pending_recv;  /* Number of outstanding recv() calls. */
282 	unsigned int		pending_send;  /* Number of outstanding send() calls. */
283 	unsigned int		pending_accept; /* Number of outstanding accept() calls. */
284 	unsigned int		state; /* Socket state. Debugging and consistency checking. */
285 	int			state_lineno;  /* line which last touched state */
286 };
287 
288 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (/*CONSTCOND*/0)
289 
290 /*
291  * Buffer structure
292  */
293 typedef struct buflist buflist_t;
294 
295 struct buflist {
296 	void			*buf;
297 	unsigned int		buflen;
298 	ISC_LINK(buflist_t)	link;
299 };
300 
301 /*
302  * I/O Completion ports Info structures
303  */
304 
305 static HANDLE hHeapHandle = NULL;
306 typedef struct IoCompletionInfo {
307 	OVERLAPPED		overlapped;
308 	isc_socketevent_t	*dev;  /* send()/recv() done event */
309 	isc_socket_connev_t	*cdev; /* connect() done event */
310 	isc_socket_newconnev_t	*adev; /* accept() done event */
311 	void			*acceptbuffer;
312 	DWORD			received_bytes;
313 	int			request_type;
314 	struct msghdr		messagehdr;
315 	ISC_LIST(buflist_t)	bufferlist;	/*%< list of buffers */
316 } IoCompletionInfo;
317 
318 /*
319  * Define a maximum number of I/O Completion Port worker threads
320  * to handle the load on the Completion Port. The actual number
321  * used is the number of CPU's + 1.
322  */
323 #define MAX_IOCPTHREADS 20
324 
325 #define SOCKET_MANAGER_MAGIC	ISC_MAGIC('I', 'O', 'm', 'g')
326 #define VALID_MANAGER(m)	ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
327 
328 struct isc_socketmgr {
329 	/* Not locked. */
330 	unsigned int			magic;
331 	isc_mem_t		       *mctx;
332 	isc_mutex_t			lock;
333 	isc_stats_t		       *stats;
334 
335 	/* Locked by manager lock. */
336 	ISC_LIST(isc_socket_t)		socklist;
337 	isc_boolean_t			bShutdown;
338 	isc_condition_t			shutdown_ok;
339 	HANDLE				hIoCompletionPort;
340 	int				maxIOCPThreads;
341 	HANDLE				hIOCPThreads[MAX_IOCPTHREADS];
342 	DWORD				dwIOCPThreadIds[MAX_IOCPTHREADS];
343 
344 	/*
345 	 * Debugging.
346 	 * Modified by InterlockedIncrement() and InterlockedDecrement()
347 	 */
348 	LONG				totalSockets;
349 	LONG				iocp_total;
350 };
351 
352 enum {
353 	SOCKET_RECV,
354 	SOCKET_SEND,
355 	SOCKET_ACCEPT,
356 	SOCKET_CONNECT
357 };
358 
359 /*
360  * send() and recv() iovec counts
361  */
362 #define MAXSCATTERGATHER_SEND	(ISC_SOCKET_MAXSCATTERGATHER)
363 #define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER)
364 
365 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
366 				  isc_sockettype_t type,
367 				  isc_socket_t **socketp,
368 				  isc_socket_t *dup_socket);
369 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
370 static void maybe_free_socket(isc_socket_t **, int);
371 static void free_socket(isc_socket_t **, int);
372 static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
373 static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
374 static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
375 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
376 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
377 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
378 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
379 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
380 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
381 static void queue_receive_request(isc_socket_t *sock);
382 
383 /*
384  * This is used to dump the contents of the sock structure
385  * You should make sure that the sock is locked before
386  * dumping it. Since the code uses simple printf() statements
387  * it should only be used interactively.
388  */
389 void
sock_dump(isc_socket_t * sock)390 sock_dump(isc_socket_t *sock) {
391 	isc_socketevent_t *ldev;
392 	isc_socket_newconnev_t *ndev;
393 
394 #if 0
395 	isc_sockaddr_t addr;
396 	char socktext[ISC_SOCKADDR_FORMATSIZE];
397 	isc_result_t result;
398 
399 	result = isc_socket_getpeername(sock, &addr);
400 	if (result == ISC_R_SUCCESS) {
401 		isc_sockaddr_format(&addr, socktext, sizeof(socktext));
402 		printf("Remote Socket: %s\n", socktext);
403 	}
404 	result = isc_socket_getsockname(sock, &addr);
405 	if (result == ISC_R_SUCCESS) {
406 		isc_sockaddr_format(&addr, socktext, sizeof(socktext));
407 		printf("This Socket: %s\n", socktext);
408 	}
409 #endif
410 
411 	printf("\n\t\tSock Dump\n");
412 	printf("\t\tfd: %u\n", sock->fd);
413 	printf("\t\treferences: %d\n", sock->references);
414 	printf("\t\tpending_accept: %d\n", sock->pending_accept);
415 	printf("\t\tconnecting: %d\n", sock->pending_connect);
416 	printf("\t\tconnected: %d\n", sock->connected);
417 	printf("\t\tbound: %d\n", sock->bound);
418 	printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
419 	printf("\t\tsocket type: %d\n", sock->type);
420 
421 	printf("\n\t\tSock Recv List\n");
422 	ldev = ISC_LIST_HEAD(sock->recv_list);
423 	while (ldev != NULL) {
424 		printf("\t\tdev: %p\n", ldev);
425 		ldev = ISC_LIST_NEXT(ldev, ev_link);
426 	}
427 
428 	printf("\n\t\tSock Send List\n");
429 	ldev = ISC_LIST_HEAD(sock->send_list);
430 	while (ldev != NULL) {
431 		printf("\t\tdev: %p\n", ldev);
432 		ldev = ISC_LIST_NEXT(ldev, ev_link);
433 	}
434 
435 	printf("\n\t\tSock Accept List\n");
436 	ndev = ISC_LIST_HEAD(sock->accept_list);
437 	while (ndev != NULL) {
438 		printf("\t\tdev: %p\n", ldev);
439 		ndev = ISC_LIST_NEXT(ndev, ev_link);
440 	}
441 }
442 
443 static void
444 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
445 	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
446 	   isc_msgcat_t *msgcat, int msgset, int message,
447 	   const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
448 
449 /*  This function will add an entry to the I/O completion port
450  *  that will signal the I/O thread to exit (gracefully)
451  */
452 static void
signal_iocompletionport_exit(isc_socketmgr_t * manager)453 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
454 	int i;
455 	int errval;
456 	char strbuf[ISC_STRERRORSIZE];
457 
458 	REQUIRE(VALID_MANAGER(manager));
459 	for (i = 0; i < manager->maxIOCPThreads; i++) {
460 		if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
461 						0, 0, 0)) {
462 			errval = GetLastError();
463 			isc__strerror(errval, strbuf, sizeof(strbuf));
464 			FATAL_ERROR(__FILE__, __LINE__,
465 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
466 				ISC_MSG_FAILED,
467 				"Can't request service thread to exit: %s"),
468 				strbuf);
469 		}
470 	}
471 }
472 
473 /*
474  * Create the worker threads for the I/O Completion Port
475  */
476 void
iocompletionport_createthreads(int total_threads,isc_socketmgr_t * manager)477 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
478 	int errval;
479 	char strbuf[ISC_STRERRORSIZE];
480 	int i;
481 
482 	INSIST(total_threads > 0);
483 	REQUIRE(VALID_MANAGER(manager));
484 	/*
485 	 * We need at least one
486 	 */
487 	for (i = 0; i < total_threads; i++) {
488 		manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
489 						manager, 0,
490 						&manager->dwIOCPThreadIds[i]);
491 		if (manager->hIOCPThreads[i] == NULL) {
492 			errval = GetLastError();
493 			isc__strerror(errval, strbuf, sizeof(strbuf));
494 			FATAL_ERROR(__FILE__, __LINE__,
495 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
496 				ISC_MSG_FAILED,
497 				"Can't create IOCP thread: %s"),
498 				strbuf);
499 			exit(1);
500 		}
501 	}
502 }
503 
504 /*
505  *  Create/initialise the I/O completion port
506  */
507 void
iocompletionport_init(isc_socketmgr_t * manager)508 iocompletionport_init(isc_socketmgr_t *manager) {
509 	int errval;
510 	char strbuf[ISC_STRERRORSIZE];
511 
512 	REQUIRE(VALID_MANAGER(manager));
513 	/*
514 	 * Create a private heap to handle the socket overlapped structure
515 	 * The minimum number of structures is 10, there is no maximum
516 	 */
517 	hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
518 	if (hHeapHandle == NULL) {
519 		errval = GetLastError();
520 		isc__strerror(errval, strbuf, sizeof(strbuf));
521 		FATAL_ERROR(__FILE__, __LINE__,
522 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
523 					   ISC_MSG_FAILED,
524 					   "HeapCreate() failed during "
525 					   "initialization: %s"),
526 			    strbuf);
527 		exit(1);
528 	}
529 
530 	manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
531 
532 	/* Now Create the Completion Port */
533 	manager->hIoCompletionPort = CreateIoCompletionPort(
534 			INVALID_HANDLE_VALUE, NULL,
535 			0, manager->maxIOCPThreads);
536 	if (manager->hIoCompletionPort == NULL) {
537 		errval = GetLastError();
538 		isc__strerror(errval, strbuf, sizeof(strbuf));
539 		FATAL_ERROR(__FILE__, __LINE__,
540 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
541 				ISC_MSG_FAILED,
542 				"CreateIoCompletionPort() failed "
543 				"during initialization: %s"),
544 				strbuf);
545 		exit(1);
546 	}
547 
548 	/*
549 	 * Worker threads for servicing the I/O
550 	 */
551 	iocompletionport_createthreads(manager->maxIOCPThreads, manager);
552 }
553 
554 /*
555  * Associate a socket with an IO Completion Port.  This allows us to queue events for it
556  * and have our worker pool of threads process them.
557  */
558 void
iocompletionport_update(isc_socket_t * sock)559 iocompletionport_update(isc_socket_t *sock) {
560 	HANDLE hiocp;
561 	char strbuf[ISC_STRERRORSIZE];
562 
563 	REQUIRE(VALID_SOCKET(sock));
564 
565 	hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
566 		sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
567 
568 	if (hiocp == NULL) {
569 		DWORD errval = GetLastError();
570 		isc__strerror(errval, strbuf, sizeof(strbuf));
571 		isc_log_iwrite(isc_lctx,
572 				ISC_LOGCATEGORY_GENERAL,
573 				ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
574 				isc_msgcat, ISC_MSGSET_SOCKET,
575 				ISC_MSG_TOOMANYHANDLES,
576 				"iocompletionport_update: failed to open"
577 				" io completion port: %s",
578 				strbuf);
579 
580 		/* XXXMLG temporary hack to make failures detected.
581 		 * This function should return errors to the caller, not
582 		 * exit here.
583 		 */
584 		FATAL_ERROR(__FILE__, __LINE__,
585 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
586 				ISC_MSG_FAILED,
587 				"CreateIoCompletionPort() failed "
588 				"during initialization: %s"),
589 				strbuf);
590 		exit(1);
591 	}
592 
593 	InterlockedIncrement(&sock->manager->iocp_total);
594 }
595 
596 /*
597  * Routine to cleanup and then close the socket.
598  * Only close the socket here if it is NOT associated
599  * with an event, otherwise the WSAWaitForMultipleEvents
600  * may fail due to the fact that the Wait should not
601  * be running while closing an event or a socket.
602  * The socket is locked before calling this function
603  */
604 void
socket_close(isc_socket_t * sock)605 socket_close(isc_socket_t *sock) {
606 
607 	REQUIRE(sock != NULL);
608 
609 	if (sock->fd != INVALID_SOCKET) {
610 		closesocket(sock->fd);
611 		sock->fd = INVALID_SOCKET;
612 		_set_state(sock, SOCK_CLOSED);
613 		InterlockedDecrement(&sock->manager->totalSockets);
614 	}
615 }
616 
617 static isc_once_t initialise_once = ISC_ONCE_INIT;
618 static isc_boolean_t initialised = ISC_FALSE;
619 
620 static void
initialise(void)621 initialise(void) {
622 	WORD wVersionRequested;
623 	WSADATA wsaData;
624 	int err;
625 	SOCKET sock;
626 	GUID GUIDConnectEx = WSAID_CONNECTEX;
627 	GUID GUIDAcceptEx = WSAID_ACCEPTEX;
628 	GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
629 	DWORD dwBytes;
630 
631 	/* Need Winsock 2.2 or better */
632 	wVersionRequested = MAKEWORD(2, 2);
633 
634 	err = WSAStartup(wVersionRequested, &wsaData);
635 	if (err != 0) {
636 		char strbuf[ISC_STRERRORSIZE];
637 		isc__strerror(err, strbuf, sizeof(strbuf));
638 		FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
639 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
640 					   ISC_MSG_FAILED, "failed"),
641 			    strbuf);
642 		exit(1);
643 	}
644 	/*
645 	 * The following APIs do not exist as functions in a library, but we must
646 	 * ask winsock for them.  They are "extensions" -- but why they cannot be
647 	 * actual functions is beyond me.  So, ask winsock for the pointers to the
648 	 * functions we need.
649 	 */
650 	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
651 	INSIST(sock != INVALID_SOCKET);
652 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
653 		 &GUIDConnectEx, sizeof(GUIDConnectEx),
654 		 &ISCConnectEx, sizeof(ISCConnectEx),
655 		 &dwBytes, NULL, NULL);
656 	INSIST(err == 0);
657 
658 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
659 		 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
660 		 &ISCAcceptEx, sizeof(ISCAcceptEx),
661 		 &dwBytes, NULL, NULL);
662 	INSIST(err == 0);
663 
664 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
665 		 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
666 		 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
667 		 &dwBytes, NULL, NULL);
668 	INSIST(err == 0);
669 
670 	closesocket(sock);
671 
672 	initialised = ISC_TRUE;
673 }
674 
675 /*
676  * Initialize socket services
677  */
678 void
InitSockets(void)679 InitSockets(void) {
680 	RUNTIME_CHECK(isc_once_do(&initialise_once,
681 				  initialise) == ISC_R_SUCCESS);
682 	if (!initialised)
683 		exit(1);
684 }
685 
686 int
internal_sendmsg(isc_socket_t * sock,IoCompletionInfo * lpo,struct msghdr * messagehdr,int flags,int * Error)687 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
688 		 struct msghdr *messagehdr, int flags, int *Error)
689 {
690 	int Result;
691 	DWORD BytesSent;
692 	DWORD Flags = flags;
693 	int total_sent;
694 
695 	*Error = 0;
696 	Result = WSASendTo(sock->fd, messagehdr->msg_iov,
697 			   messagehdr->msg_iovlen, &BytesSent,
698 			   Flags, (SOCKADDR *)&messagehdr->to_addr,
699 			   messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
700 			   NULL);
701 
702 	total_sent = (int)BytesSent;
703 
704 	/* Check for errors.*/
705 	if (Result == SOCKET_ERROR) {
706 		*Error = WSAGetLastError();
707 
708 		switch (*Error) {
709 		case WSA_IO_INCOMPLETE:
710 		case WSA_WAIT_IO_COMPLETION:
711 		case WSA_IO_PENDING:
712 		case NO_ERROR:		/* Strange, but okay */
713 			sock->pending_iocp++;
714 			sock->pending_send++;
715 			break;
716 
717 		default:
718 			return (-1);
719 			break;
720 		}
721 	} else {
722 		sock->pending_iocp++;
723 		sock->pending_send++;
724 	}
725 
726 	if (lpo != NULL)
727 		return (0);
728 	else
729 		return (total_sent);
730 }
731 
732 static void
queue_receive_request(isc_socket_t * sock)733 queue_receive_request(isc_socket_t *sock) {
734 	DWORD Flags = 0;
735 	DWORD NumBytes = 0;
736 	int Result;
737 	int Error;
738 	int need_retry;
739 	WSABUF iov[1];
740 	IoCompletionInfo *lpo = NULL;
741 	isc_result_t isc_result;
742 
743  retry:
744 	need_retry = ISC_FALSE;
745 
746 	/*
747 	 * If we already have a receive pending, do nothing.
748 	 */
749 	if (sock->pending_recv > 0) {
750 		if (lpo != NULL)
751 			HeapFree(hHeapHandle, 0, lpo);
752 		return;
753 	}
754 
755 	/*
756 	 * If no one is waiting, do nothing.
757 	 */
758 	if (ISC_LIST_EMPTY(sock->recv_list)) {
759 		if (lpo != NULL)
760 			HeapFree(hHeapHandle, 0, lpo);
761 		return;
762 	}
763 
764 	INSIST(sock->recvbuf.remaining == 0);
765 	INSIST(sock->fd != INVALID_SOCKET);
766 
767 	iov[0].len = sock->recvbuf.len;
768 	iov[0].buf = sock->recvbuf.base;
769 
770 	if (lpo == NULL) {
771 		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
772 						    HEAP_ZERO_MEMORY,
773 						    sizeof(IoCompletionInfo));
774 		RUNTIME_CHECK(lpo != NULL);
775 	} else
776 		ZeroMemory(lpo, sizeof(IoCompletionInfo));
777 	lpo->request_type = SOCKET_RECV;
778 
779 	sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
780 
781 	Error = 0;
782 	Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
783 			     &NumBytes, &Flags,
784 			     (SOCKADDR *)&sock->recvbuf.from_addr,
785 			     &sock->recvbuf.from_addr_len,
786 			     (LPWSAOVERLAPPED)lpo, NULL);
787 
788 	/* Check for errors. */
789 	if (Result == SOCKET_ERROR) {
790 		Error = WSAGetLastError();
791 
792 		switch (Error) {
793 		case WSA_IO_PENDING:
794 			sock->pending_iocp++;
795 			sock->pending_recv++;
796 			break;
797 
798 		/* direct error: no completion event */
799 		case ERROR_HOST_UNREACHABLE:
800 		case WSAENETRESET:
801 		case WSAECONNRESET:
802 			if (!sock->connected) {
803 				/* soft error */
804 				need_retry = ISC_TRUE;
805 				break;
806 			}
807 			/* FALLTHROUGH */
808 
809 		default:
810 			isc_result = isc__errno2result(Error);
811 			if (isc_result == ISC_R_UNEXPECTED)
812 				UNEXPECTED_ERROR(__FILE__, __LINE__,
813 					"WSARecvFrom: Windows error code: %d, isc result %d",
814 					Error, isc_result);
815 			send_recvdone_abort(sock, isc_result);
816 			HeapFree(hHeapHandle, 0, lpo);
817 			lpo = NULL;
818 			break;
819 		}
820 	} else {
821 		/*
822 		 * The recv() finished immediately, but we will still get
823 		 * a completion event.  Rather than duplicate code, let
824 		 * that thread handle sending the data along its way.
825 		 */
826 		sock->pending_iocp++;
827 		sock->pending_recv++;
828 	}
829 
830 	socket_log(__LINE__, sock, NULL, IOEVENT,
831 		   isc_msgcat, ISC_MSGSET_SOCKET,
832 		   ISC_MSG_DOIORECV,
833 		   "queue_io_request: fd %d result %d error %d",
834 		   sock->fd, Result, Error);
835 
836 	CONSISTENT(sock);
837 
838 	if (need_retry)
839 		goto retry;
840 }
841 
842 static void
manager_log(isc_socketmgr_t * sockmgr,isc_logcategory_t * category,isc_logmodule_t * module,int level,const char * fmt,...)843 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
844 	    isc_logmodule_t *module, int level, const char *fmt, ...)
845 {
846 	char msgbuf[2048];
847 	va_list ap;
848 
849 	if (!isc_log_wouldlog(isc_lctx, level))
850 		return;
851 
852 	va_start(ap, fmt);
853 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
854 	va_end(ap);
855 
856 	isc_log_write(isc_lctx, category, module, level,
857 		      "sockmgr %p: %s", sockmgr, msgbuf);
858 }
859 
860 static void
socket_log(int lineno,isc_socket_t * sock,isc_sockaddr_t * address,isc_logcategory_t * category,isc_logmodule_t * module,int level,isc_msgcat_t * msgcat,int msgset,int message,const char * fmt,...)861 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
862 	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
863 	   isc_msgcat_t *msgcat, int msgset, int message,
864 	   const char *fmt, ...)
865 {
866 	char msgbuf[2048];
867 	char peerbuf[256];
868 	va_list ap;
869 
870 
871 	if (!isc_log_wouldlog(isc_lctx, level))
872 		return;
873 
874 	va_start(ap, fmt);
875 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
876 	va_end(ap);
877 
878 	if (address == NULL) {
879 		isc_log_iwrite(isc_lctx, category, module, level,
880 			       msgcat, msgset, message,
881 			       "socket %p line %d: %s", sock, lineno, msgbuf);
882 	} else {
883 		isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
884 		isc_log_iwrite(isc_lctx, category, module, level,
885 			       msgcat, msgset, message,
886 				   "socket %p line %d peer %s: %s", sock, lineno,
887 				   peerbuf, msgbuf);
888 	}
889 
890 }
891 
892 /*
893  * Make an fd SOCKET non-blocking.
894  */
895 static isc_result_t
make_nonblock(SOCKET fd)896 make_nonblock(SOCKET fd) {
897 	int ret;
898 	unsigned long flags = 1;
899 	char strbuf[ISC_STRERRORSIZE];
900 
901 	/* Set the socket to non-blocking */
902 	ret = ioctlsocket(fd, FIONBIO, &flags);
903 
904 	if (ret == -1) {
905 		isc__strerror(errno, strbuf, sizeof(strbuf));
906 		UNEXPECTED_ERROR(__FILE__, __LINE__,
907 				 "ioctlsocket(%d, FIOBIO, %d): %s",
908 				 fd, flags, strbuf);
909 
910 		return (ISC_R_UNEXPECTED);
911 	}
912 
913 	return (ISC_R_SUCCESS);
914 }
915 
916 /*
917  * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
918  * to not work correctly, returning a WSACONNRESET error when a WSASendTo
919  * fails with an "ICMP port unreachable" response and preventing the
920  * socket from using the WSARecvFrom in subsequent operations.
921  * The function below fixes this, but requires that Windows 2000
922  * Service Pack 2 or later be installed on the system.  NT 4.0
923  * systems are not affected by this and work correctly.
924  * See Microsoft Knowledge Base Article Q263823 for details of this.
925  */
926 isc_result_t
connection_reset_fix(SOCKET fd)927 connection_reset_fix(SOCKET fd) {
928 	DWORD dwBytesReturned = 0;
929 	BOOL  bNewBehavior = FALSE;
930 	DWORD status;
931 
932 	if (isc_win32os_versioncheck(5, 0, 0, 0) < 0)
933 		return (ISC_R_SUCCESS); /*  NT 4.0 has no problem */
934 
935 	/* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
936 	status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
937 			  sizeof(bNewBehavior), NULL, 0,
938 			  &dwBytesReturned, NULL, NULL);
939 	if (status != SOCKET_ERROR)
940 		return (ISC_R_SUCCESS);
941 	else {
942 		UNEXPECTED_ERROR(__FILE__, __LINE__,
943 				 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
944 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
945 						ISC_MSG_FAILED, "failed"));
946 		return (ISC_R_UNEXPECTED);
947 	}
948 }
949 
950 /*
951  * Construct an iov array and attach it to the msghdr passed in.  This is
952  * the SEND constructor, which will use the used region of the buffer
953  * (if using a buffer list) or will use the internal region (if a single
954  * buffer I/O is requested).
955  *
956  * Nothing can be NULL, and the done event must list at least one buffer
957  * on the buffer linked list for this function to be meaningful.
958  */
959 static void
build_msghdr_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * msg,char * cmsg,WSABUF * iov,IoCompletionInfo * lpo)960 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
961 		  struct msghdr *msg, char *cmsg, WSABUF *iov,
962 		  IoCompletionInfo  *lpo)
963 {
964 	unsigned int iovcount;
965 	isc_buffer_t *buffer;
966 	buflist_t  *cpbuffer;
967 	isc_region_t used;
968 	size_t write_count;
969 	size_t skip_count;
970 
971 	memset(msg, 0, sizeof(*msg));
972 
973 	memmove(&msg->to_addr, &dev->address.type, dev->address.length);
974 	msg->to_addr_len = dev->address.length;
975 
976 	buffer = ISC_LIST_HEAD(dev->bufferlist);
977 	write_count = 0;
978 	iovcount = 0;
979 
980 	/*
981 	 * Single buffer I/O?  Skip what we've done so far in this region.
982 	 */
983 	if (buffer == NULL) {
984 		write_count = dev->region.length - dev->n;
985 		cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
986 		RUNTIME_CHECK(cpbuffer != NULL);
987 		cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
988 		RUNTIME_CHECK(cpbuffer->buf != NULL);
989 
990 		socket_log(__LINE__, sock, NULL, TRACE,
991 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
992 		   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
993 		   cpbuffer->buf, write_count);
994 
995 		memmove(cpbuffer->buf,(dev->region.base + dev->n), write_count);
996 		cpbuffer->buflen = (unsigned int)write_count;
997 		ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
998 		iov[0].buf = cpbuffer->buf;
999 		iov[0].len = (u_long)write_count;
1000 		iovcount = 1;
1001 
1002 		goto config;
1003 	}
1004 
1005 	/*
1006 	 * Multibuffer I/O.
1007 	 * Skip the data in the buffer list that we have already written.
1008 	 */
1009 	skip_count = dev->n;
1010 	while (buffer != NULL) {
1011 		REQUIRE(ISC_BUFFER_VALID(buffer));
1012 		if (skip_count < isc_buffer_usedlength(buffer))
1013 			break;
1014 		skip_count -= isc_buffer_usedlength(buffer);
1015 		buffer = ISC_LIST_NEXT(buffer, link);
1016 	}
1017 
1018 	while (buffer != NULL) {
1019 		INSIST(iovcount < MAXSCATTERGATHER_SEND);
1020 
1021 		isc_buffer_usedregion(buffer, &used);
1022 
1023 		if (used.length > 0) {
1024 			int uselen = (int)(used.length - skip_count);
1025 			cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1026 			RUNTIME_CHECK(cpbuffer != NULL);
1027 			cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1028 			RUNTIME_CHECK(cpbuffer->buf != NULL);
1029 
1030 			socket_log(__LINE__, sock, NULL, TRACE,
1031 			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1032 			   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1033 			   cpbuffer->buf, write_count);
1034 
1035 			memmove(cpbuffer->buf,(used.base + skip_count), uselen);
1036 			cpbuffer->buflen = uselen;
1037 			iov[iovcount].buf = cpbuffer->buf;
1038 			iov[iovcount].len = (u_long)(used.length - skip_count);
1039 			write_count += uselen;
1040 			skip_count = 0;
1041 			iovcount++;
1042 		}
1043 		buffer = ISC_LIST_NEXT(buffer, link);
1044 	}
1045 
1046 	INSIST(skip_count == 0);
1047 
1048  config:
1049 	msg->msg_iov = iov;
1050 	msg->msg_iovlen = iovcount;
1051 	msg->msg_totallen = (u_int)write_count;
1052 }
1053 
1054 static void
set_dev_address(isc_sockaddr_t * address,isc_socket_t * sock,isc_socketevent_t * dev)1055 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1056 		isc_socketevent_t *dev)
1057 {
1058 	if (sock->type == isc_sockettype_udp) {
1059 		if (address != NULL)
1060 			dev->address = *address;
1061 		else
1062 			dev->address = sock->address;
1063 	} else if (sock->type == isc_sockettype_tcp) {
1064 		INSIST(address == NULL);
1065 		dev->address = sock->address;
1066 	}
1067 }
1068 
1069 static void
destroy_socketevent(isc_event_t * event)1070 destroy_socketevent(isc_event_t *event) {
1071 	isc_socketevent_t *ev = (isc_socketevent_t *)event;
1072 
1073 	INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1074 
1075 	(ev->destroy)(event);
1076 }
1077 
1078 static isc_socketevent_t *
allocate_socketevent(isc_mem_t * mctx,isc_socket_t * sock,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)1079 allocate_socketevent(isc_mem_t *mctx, isc_socket_t *sock,
1080 		     isc_eventtype_t eventtype, isc_taskaction_t action,
1081 		     void *arg)
1082 {
1083 	isc_socketevent_t *ev;
1084 
1085 	ev = (isc_socketevent_t *)isc_event_allocate(mctx, sock, eventtype,
1086 						     action, arg,
1087 						     sizeof(*ev));
1088 	if (ev == NULL)
1089 		return (NULL);
1090 
1091 	ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1092 	ISC_LINK_INIT(ev, ev_link);
1093 	ISC_LIST_INIT(ev->bufferlist);
1094 	ev->region.base = NULL;
1095 	ev->n = 0;
1096 	ev->offset = 0;
1097 	ev->attributes = 0;
1098 	ev->destroy = ev->ev_destroy;
1099 	ev->ev_destroy = destroy_socketevent;
1100 	ev->dscp = 0;
1101 
1102 	return (ev);
1103 }
1104 
1105 #if defined(ISC_SOCKET_DEBUG)
1106 static void
dump_msg(struct msghdr * msg,isc_socket_t * sock)1107 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1108 	unsigned int i;
1109 
1110 	printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1111 	printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1112 	printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1113 	for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1114 		printf("\t\t%u\tbase %p, len %u\n", i,
1115 		       msg->msg_iov[i].buf, msg->msg_iov[i].len);
1116 }
1117 #endif
1118 
1119 /*
1120  * map the error code
1121  */
1122 int
map_socket_error(isc_socket_t * sock,int windows_errno,int * isc_errno,char * errorstring,size_t bufsize)1123 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1124 		 char *errorstring, size_t bufsize) {
1125 
1126 	int doreturn;
1127 	switch (windows_errno) {
1128 	case WSAECONNREFUSED:
1129 		*isc_errno = ISC_R_CONNREFUSED;
1130 		if (sock->connected)
1131 			doreturn = DOIO_HARD;
1132 		else
1133 			doreturn = DOIO_SOFT;
1134 		break;
1135 	case WSAENETUNREACH:
1136 	case ERROR_NETWORK_UNREACHABLE:
1137 		*isc_errno = ISC_R_NETUNREACH;
1138 		if (sock->connected)
1139 			doreturn = DOIO_HARD;
1140 		else
1141 			doreturn = DOIO_SOFT;
1142 		break;
1143 	case ERROR_PORT_UNREACHABLE:
1144 	case ERROR_HOST_UNREACHABLE:
1145 	case WSAEHOSTUNREACH:
1146 		*isc_errno = ISC_R_HOSTUNREACH;
1147 		if (sock->connected)
1148 			doreturn = DOIO_HARD;
1149 		else
1150 			doreturn = DOIO_SOFT;
1151 		break;
1152 	case WSAENETDOWN:
1153 		*isc_errno = ISC_R_NETDOWN;
1154 		if (sock->connected)
1155 			doreturn = DOIO_HARD;
1156 		else
1157 			doreturn = DOIO_SOFT;
1158 		break;
1159 	case WSAEHOSTDOWN:
1160 		*isc_errno = ISC_R_HOSTDOWN;
1161 		if (sock->connected)
1162 			doreturn = DOIO_HARD;
1163 		else
1164 			doreturn = DOIO_SOFT;
1165 		break;
1166 	case WSAEACCES:
1167 		*isc_errno = ISC_R_NOPERM;
1168 		if (sock->connected)
1169 			doreturn = DOIO_HARD;
1170 		else
1171 			doreturn = DOIO_SOFT;
1172 		break;
1173 	case WSAECONNRESET:
1174 	case WSAENETRESET:
1175 	case WSAECONNABORTED:
1176 	case WSAEDISCON:
1177 		*isc_errno = ISC_R_CONNECTIONRESET;
1178 		if (sock->connected)
1179 			doreturn = DOIO_HARD;
1180 		else
1181 			doreturn = DOIO_SOFT;
1182 		break;
1183 	case WSAENOTCONN:
1184 		*isc_errno = ISC_R_NOTCONNECTED;
1185 		if (sock->connected)
1186 			doreturn = DOIO_HARD;
1187 		else
1188 			doreturn = DOIO_SOFT;
1189 		break;
1190 	case ERROR_OPERATION_ABORTED:
1191 	case ERROR_CONNECTION_ABORTED:
1192 	case ERROR_REQUEST_ABORTED:
1193 		*isc_errno = ISC_R_CONNECTIONRESET;
1194 		doreturn = DOIO_HARD;
1195 		break;
1196 	case WSAENOBUFS:
1197 		*isc_errno = ISC_R_NORESOURCES;
1198 		doreturn = DOIO_HARD;
1199 		break;
1200 	case WSAEAFNOSUPPORT:
1201 		*isc_errno = ISC_R_FAMILYNOSUPPORT;
1202 		doreturn = DOIO_HARD;
1203 		break;
1204 	case WSAEADDRNOTAVAIL:
1205 		*isc_errno = ISC_R_ADDRNOTAVAIL;
1206 		doreturn = DOIO_HARD;
1207 		break;
1208 	case WSAEDESTADDRREQ:
1209 		*isc_errno = ISC_R_BADADDRESSFORM;
1210 		doreturn = DOIO_HARD;
1211 		break;
1212 	case ERROR_NETNAME_DELETED:
1213 		*isc_errno = ISC_R_NETDOWN;
1214 		doreturn = DOIO_HARD;
1215 		break;
1216 	default:
1217 		*isc_errno = ISC_R_IOERROR;
1218 		doreturn = DOIO_HARD;
1219 		break;
1220 	}
1221 	if (doreturn == DOIO_HARD) {
1222 		isc__strerror(windows_errno, errorstring, bufsize);
1223 	}
1224 	return (doreturn);
1225 }
1226 
1227 static void
fill_recv(isc_socket_t * sock,isc_socketevent_t * dev)1228 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1229 	isc_region_t r;
1230 	int copylen;
1231 	isc_buffer_t *buffer;
1232 
1233 	INSIST(dev->n < dev->minimum);
1234 	INSIST(sock->recvbuf.remaining > 0);
1235 	INSIST(sock->pending_recv == 0);
1236 
1237 	if (sock->type == isc_sockettype_udp) {
1238 		dev->address.length = sock->recvbuf.from_addr_len;
1239 		memmove(&dev->address.type, &sock->recvbuf.from_addr,
1240 			sock->recvbuf.from_addr_len);
1241 		if (isc_sockaddr_getport(&dev->address) == 0) {
1242 			if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1243 				socket_log(__LINE__, sock, &dev->address, IOEVENT,
1244 					   isc_msgcat, ISC_MSGSET_SOCKET,
1245 					   ISC_MSG_ZEROPORT,
1246 					   "dropping source port zero packet");
1247 			}
1248 			sock->recvbuf.remaining = 0;
1249 			return;
1250 		}
1251 	} else if (sock->type == isc_sockettype_tcp) {
1252 		dev->address = sock->address;
1253 	}
1254 
1255 	/*
1256 	 * Run through the list of buffers we were given, and find the
1257 	 * first one with space.  Once it is found, loop through, filling
1258 	 * the buffers as much as possible.
1259 	 */
1260 	buffer = ISC_LIST_HEAD(dev->bufferlist);
1261 	if (buffer != NULL) { // Multi-buffer receive
1262 		while (buffer != NULL && sock->recvbuf.remaining > 0) {
1263 			REQUIRE(ISC_BUFFER_VALID(buffer));
1264 			if (isc_buffer_availablelength(buffer) > 0) {
1265 				isc_buffer_availableregion(buffer, &r);
1266 				copylen = min(r.length,
1267 					      sock->recvbuf.remaining);
1268 				memmove(r.base, sock->recvbuf.consume_position,
1269 					copylen);
1270 				sock->recvbuf.consume_position += copylen;
1271 				sock->recvbuf.remaining -= copylen;
1272 				isc_buffer_add(buffer, copylen);
1273 				dev->n += copylen;
1274 			}
1275 			buffer = ISC_LIST_NEXT(buffer, link);
1276 		}
1277 	} else { // Single-buffer receive
1278 		copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1279 		memmove(dev->region.base + dev->n,
1280 			sock->recvbuf.consume_position, copylen);
1281 		sock->recvbuf.consume_position += copylen;
1282 		sock->recvbuf.remaining -= copylen;
1283 		dev->n += copylen;
1284 	}
1285 
1286 	/*
1287 	 * UDP receives are all-consuming.  That is, if we have 4k worth of
1288 	 * data in our receive buffer, and the caller only gave us
1289 	 * 1k of space, we will toss the remaining 3k of data.  TCP
1290 	 * will keep the extra data around and use it for later requests.
1291 	 */
1292 	if (sock->type == isc_sockettype_udp)
1293 		sock->recvbuf.remaining = 0;
1294 }
1295 
1296 /*
1297  * Copy out as much data from the internal buffer to done events.
1298  * As each done event is filled, send it along its way.
1299  */
1300 static void
completeio_recv(isc_socket_t * sock)1301 completeio_recv(isc_socket_t *sock)
1302 {
1303 	isc_socketevent_t *dev;
1304 
1305 	/*
1306 	 * If we are in the process of filling our buffer, we cannot
1307 	 * touch it yet, so don't.
1308 	 */
1309 	if (sock->pending_recv > 0)
1310 		return;
1311 
1312 	while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1313 		dev = ISC_LIST_HEAD(sock->recv_list);
1314 
1315 		/*
1316 		 * See if we have sufficient data in our receive buffer
1317 		 * to handle this.  If we do, copy out the data.
1318 		 */
1319 		fill_recv(sock, dev);
1320 
1321 		/*
1322 		 * Did we satisfy it?
1323 		 */
1324 		if (dev->n >= dev->minimum) {
1325 			dev->result = ISC_R_SUCCESS;
1326 			send_recvdone_event(sock, &dev);
1327 		}
1328 	}
1329 }
1330 
1331 /*
1332  * Returns:
1333  *	DOIO_SUCCESS	The operation succeeded.  dev->result contains
1334  *			ISC_R_SUCCESS.
1335  *
1336  *	DOIO_HARD	A hard or unexpected I/O error was encountered.
1337  *			dev->result contains the appropriate error.
1338  *
1339  *	DOIO_SOFT	A soft I/O error was encountered.  No senddone
1340  *			event was sent.  The operation should be retried.
1341  *
1342  *	No other return values are possible.
1343  */
1344 static int
completeio_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int cc,int send_errno)1345 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1346 		struct msghdr *messagehdr, int cc, int send_errno)
1347 {
1348 	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1349 	char strbuf[ISC_STRERRORSIZE];
1350 
1351 	if (send_errno != 0) {
1352 		if (SOFT_ERROR(send_errno))
1353 			return (DOIO_SOFT);
1354 
1355 		return (map_socket_error(sock, send_errno, &dev->result,
1356 			strbuf, sizeof(strbuf)));
1357 
1358 		/*
1359 		 * The other error types depend on whether or not the
1360 		 * socket is UDP or TCP.  If it is UDP, some errors
1361 		 * that we expect to be fatal under TCP are merely
1362 		 * annoying, and are really soft errors.
1363 		 *
1364 		 * However, these soft errors are still returned as
1365 		 * a status.
1366 		 */
1367 		isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1368 		isc__strerror(send_errno, strbuf, sizeof(strbuf));
1369 		UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1370 				 addrbuf, strbuf);
1371 		dev->result = isc__errno2result(send_errno);
1372 		return (DOIO_HARD);
1373 	}
1374 
1375 	/*
1376 	 * If we write less than we expected, update counters, poke.
1377 	 */
1378 	dev->n += cc;
1379 	if (cc != messagehdr->msg_totallen)
1380 		return (DOIO_SOFT);
1381 
1382 	/*
1383 	 * Exactly what we wanted to write.  We're done with this
1384 	 * entry.  Post its completion event.
1385 	 */
1386 	dev->result = ISC_R_SUCCESS;
1387 	return (DOIO_SUCCESS);
1388 }
1389 
1390 static int
startio_send(isc_socket_t * sock,isc_socketevent_t * dev,int * nbytes,int * send_errno)1391 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1392 	     int *send_errno)
1393 {
1394 	char *cmsg = NULL;
1395 	char strbuf[ISC_STRERRORSIZE];
1396 	IoCompletionInfo *lpo;
1397 	int status;
1398 	struct msghdr *msghdr;
1399 
1400 	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1401 					    HEAP_ZERO_MEMORY,
1402 					    sizeof(IoCompletionInfo));
1403 	RUNTIME_CHECK(lpo != NULL);
1404 	lpo->request_type = SOCKET_SEND;
1405 	lpo->dev = dev;
1406 	msghdr = &lpo->messagehdr;
1407 	memset(msghdr, 0, sizeof(struct msghdr));
1408 	ISC_LIST_INIT(lpo->bufferlist);
1409 
1410 	build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1411 
1412 	*nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1413 
1414 	if (*nbytes <= 0) {
1415 		/*
1416 		 * I/O has been initiated
1417 		 * completion will be through the completion port
1418 		 */
1419 		if (PENDING_ERROR(*send_errno)) {
1420 			status = DOIO_PENDING;
1421 			goto done;
1422 		}
1423 
1424 		if (SOFT_ERROR(*send_errno)) {
1425 			status = DOIO_SOFT;
1426 			goto done;
1427 		}
1428 
1429 		/*
1430 		 * If we got this far then something is wrong
1431 		 */
1432 		if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1433 			isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1434 			socket_log(__LINE__, sock, NULL, IOEVENT,
1435 				   isc_msgcat, ISC_MSGSET_SOCKET,
1436 				   ISC_MSG_INTERNALSEND,
1437 				   "startio_send: internal_sendmsg(%d) %d "
1438 				   "bytes, err %d/%s",
1439 				   sock->fd, *nbytes, *send_errno, strbuf);
1440 		}
1441 		status = DOIO_HARD;
1442 		goto done;
1443 	}
1444 	dev->result = ISC_R_SUCCESS;
1445 	status = DOIO_SOFT;
1446  done:
1447 	_set_state(sock, SOCK_DATA);
1448 	return (status);
1449 }
1450 
1451 static void
use_min_mtu(isc_socket_t * sock)1452 use_min_mtu(isc_socket_t *sock) {
1453 #ifdef IPV6_USE_MIN_MTU
1454 	/* use minimum MTU */
1455 	if (sock->pf == AF_INET6) {
1456 		int on = 1;
1457 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
1458 				(void *)&on, sizeof(on));
1459 	}
1460 #else
1461 	UNUSED(sock);
1462 #endif
1463 }
1464 
1465 static isc_result_t
allocate_socket(isc_socketmgr_t * manager,isc_sockettype_t type,isc_socket_t ** socketp)1466 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1467 		isc_socket_t **socketp) {
1468 	isc_socket_t *sock;
1469 	isc_result_t result;
1470 
1471 	sock = isc_mem_get(manager->mctx, sizeof(*sock));
1472 
1473 	if (sock == NULL)
1474 		return (ISC_R_NOMEMORY);
1475 
1476 	sock->magic = 0;
1477 	sock->references = 0;
1478 
1479 	sock->manager = manager;
1480 	sock->type = type;
1481 	sock->fd = INVALID_SOCKET;
1482 
1483 	ISC_LINK_INIT(sock, link);
1484 
1485 	/*
1486 	 * Set up list of readers and writers to be initially empty.
1487 	 */
1488 	ISC_LIST_INIT(sock->recv_list);
1489 	ISC_LIST_INIT(sock->send_list);
1490 	ISC_LIST_INIT(sock->accept_list);
1491 	sock->connect_ev = NULL;
1492 	sock->pending_accept = 0;
1493 	sock->pending_recv = 0;
1494 	sock->pending_send = 0;
1495 	sock->pending_iocp = 0;
1496 	sock->listener = 0;
1497 	sock->connected = 0;
1498 	sock->pending_connect = 0;
1499 	sock->bound = 0;
1500 	sock->dupped = 0;
1501 	memset(sock->name, 0, sizeof(sock->name));	// zero the name field
1502 	_set_state(sock, SOCK_INITIALIZED);
1503 
1504 	sock->recvbuf.len = 65536;
1505 	sock->recvbuf.consume_position = sock->recvbuf.base;
1506 	sock->recvbuf.remaining = 0;
1507 	sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1508 	if (sock->recvbuf.base == NULL) {
1509 		result = ISC_R_NOMEMORY;
1510 		goto error;
1511 	}
1512 
1513 	/*
1514 	 * Initialize the lock.
1515 	 */
1516 	result = isc_mutex_init(&sock->lock);
1517 	if (result != ISC_R_SUCCESS)
1518 		goto error;
1519 
1520 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1521 		   "allocated");
1522 
1523 	sock->magic = SOCKET_MAGIC;
1524 	*socketp = sock;
1525 
1526 	return (ISC_R_SUCCESS);
1527 
1528  error:
1529 	if (sock->recvbuf.base != NULL)
1530 		isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1531 	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1532 
1533 	return (result);
1534 }
1535 
1536 /*
1537  * Verify that the socket state is consistent.
1538  */
1539 static void
consistent(isc_socket_t * sock)1540 consistent(isc_socket_t *sock) {
1541 
1542 	isc_socketevent_t *dev;
1543 	isc_socket_newconnev_t *nev;
1544 	unsigned int count;
1545 	char *crash_reason;
1546 	isc_boolean_t crash = ISC_FALSE;
1547 
1548 	REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1549 		+ sock->pending_accept + sock->pending_connect);
1550 
1551 	dev = ISC_LIST_HEAD(sock->send_list);
1552 	count = 0;
1553 	while (dev != NULL) {
1554 		count++;
1555 		dev = ISC_LIST_NEXT(dev, ev_link);
1556 	}
1557 	if (count > sock->pending_send) {
1558 		crash = ISC_TRUE;
1559 		crash_reason = "send_list > sock->pending_send";
1560 	}
1561 
1562 	nev = ISC_LIST_HEAD(sock->accept_list);
1563 	count = 0;
1564 	while (nev != NULL) {
1565 		count++;
1566 		nev = ISC_LIST_NEXT(nev, ev_link);
1567 	}
1568 	if (count > sock->pending_accept) {
1569 		crash = ISC_TRUE;
1570 		crash_reason = "send_list > sock->pending_send";
1571 	}
1572 
1573 	if (crash) {
1574 		socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1575 			   ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1576 			   crash_reason);
1577 		sock_dump(sock);
1578 		INSIST(crash == ISC_FALSE);
1579 	}
1580 }
1581 
1582 /*
1583  * Maybe free the socket.
1584  *
1585  * This function will verify tht the socket is no longer in use in any way,
1586  * either internally or externally.  This is the only place where this
1587  * check is to be made; if some bit of code believes that IT is done with
1588  * the socket (e.g., some reference counter reaches zero), it should call
1589  * this function.
1590  *
1591  * When calling this function, the socket must be locked, and the manager
1592  * must be unlocked.
1593  *
1594  * When this function returns, *socketp will be NULL.  No tricks to try
1595  * to hold on to this pointer are allowed.
1596  */
1597 static void
maybe_free_socket(isc_socket_t ** socketp,int lineno)1598 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1599 	isc_socket_t *sock = *socketp;
1600 	*socketp = NULL;
1601 
1602 	INSIST(VALID_SOCKET(sock));
1603 	CONSISTENT(sock);
1604 
1605 	if (sock->pending_iocp > 0
1606 	    || sock->pending_recv > 0
1607 	    || sock->pending_send > 0
1608 	    || sock->pending_accept > 0
1609 	    || sock->references > 0
1610 	    || sock->pending_connect == 1
1611 	    || !ISC_LIST_EMPTY(sock->recv_list)
1612 	    || !ISC_LIST_EMPTY(sock->send_list)
1613 	    || !ISC_LIST_EMPTY(sock->accept_list)
1614 	    || sock->fd != INVALID_SOCKET) {
1615 		UNLOCK(&sock->lock);
1616 		return;
1617 	}
1618 	UNLOCK(&sock->lock);
1619 
1620 	free_socket(&sock, lineno);
1621 }
1622 
1623 void
free_socket(isc_socket_t ** sockp,int lineno)1624 free_socket(isc_socket_t **sockp, int lineno) {
1625 	isc_socketmgr_t *manager;
1626 	isc_socket_t *sock = *sockp;
1627 	*sockp = NULL;
1628 
1629 	/*
1630 	 * Seems we can free the socket after all.
1631 	 */
1632 	manager = sock->manager;
1633 	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1634 		   ISC_MSGSET_SOCKET, ISC_MSG_DESTROYING,
1635 		   "freeing socket line %d fd %d lock %p semaphore %p",
1636 		   lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1637 
1638 	sock->magic = 0;
1639 	DESTROYLOCK(&sock->lock);
1640 
1641 	if (sock->recvbuf.base != NULL)
1642 		isc_mem_put(manager->mctx, sock->recvbuf.base,
1643 			    sock->recvbuf.len);
1644 
1645 	LOCK(&manager->lock);
1646 	if (ISC_LINK_LINKED(sock, link))
1647 		ISC_LIST_UNLINK(manager->socklist, sock, link);
1648 	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1649 
1650 	if (ISC_LIST_EMPTY(manager->socklist))
1651 		SIGNAL(&manager->shutdown_ok);
1652 	UNLOCK(&manager->lock);
1653 }
1654 
1655 /*
1656  * Create a new 'type' socket managed by 'manager'.  Events
1657  * will be posted to 'task' and when dispatched 'action' will be
1658  * called with 'arg' as the arg value.  The new socket is returned
1659  * in 'socketp'.
1660  */
1661 static isc_result_t
socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp,isc_socket_t * dup_socket)1662 socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1663 	      isc_socket_t **socketp, isc_socket_t *dup_socket)
1664 {
1665 	isc_socket_t *sock = NULL;
1666 	isc_result_t result;
1667 #if defined(USE_CMSG)
1668 	int on = 1;
1669 #endif
1670 #if defined(SO_RCVBUF)
1671 	ISC_SOCKADDR_LEN_T optlen;
1672 	int size;
1673 #endif
1674 	int socket_errno;
1675 	char strbuf[ISC_STRERRORSIZE];
1676 
1677 	REQUIRE(VALID_MANAGER(manager));
1678 	REQUIRE(socketp != NULL && *socketp == NULL);
1679 	REQUIRE(type != isc_sockettype_fdwatch);
1680 
1681 #ifndef SOCK_RAW
1682 	if (type == isc_sockettype_raw)
1683 		return (ISC_R_NOTIMPLEMENTED);
1684 #endif
1685 
1686 	result = allocate_socket(manager, type, &sock);
1687 	if (result != ISC_R_SUCCESS)
1688 		return (result);
1689 
1690 	sock->pf = pf;
1691 	switch (type) {
1692 	case isc_sockettype_udp:
1693 		sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1694 		if (sock->fd != INVALID_SOCKET) {
1695 			result = connection_reset_fix(sock->fd);
1696 			if (result != ISC_R_SUCCESS) {
1697 				socket_log(__LINE__, sock,
1698 					NULL, EVENT, NULL, 0, 0,
1699 					"closed %d %d %d "
1700 					"con_reset_fix_failed",
1701 					sock->pending_recv,
1702 					sock->pending_send,
1703 					sock->references);
1704 				closesocket(sock->fd);
1705 				_set_state(sock, SOCK_CLOSED);
1706 				sock->fd = INVALID_SOCKET;
1707 				free_socket(&sock, __LINE__);
1708 				return (result);
1709 			}
1710 		}
1711 		break;
1712 	case isc_sockettype_tcp:
1713 		sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1714 		break;
1715 #ifdef SOCK_RAW
1716 	case isc_sockettype_raw:
1717 		sock->fd = socket(pf, SOCK_RAW, 0);
1718 #ifdef PF_ROUTE
1719 		if (pf == PF_ROUTE)
1720 			sock->bound = 1;
1721 #endif
1722 		break;
1723 #endif
1724 	}
1725 
1726 	if (sock->fd == INVALID_SOCKET) {
1727 		socket_errno = WSAGetLastError();
1728 		free_socket(&sock, __LINE__);
1729 
1730 		switch (socket_errno) {
1731 		case WSAEMFILE:
1732 		case WSAENOBUFS:
1733 			return (ISC_R_NORESOURCES);
1734 
1735 		case WSAEPROTONOSUPPORT:
1736 		case WSAEPFNOSUPPORT:
1737 		case WSAEAFNOSUPPORT:
1738 			return (ISC_R_FAMILYNOSUPPORT);
1739 
1740 		default:
1741 			isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1742 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1743 					 "socket() %s: %s",
1744 					 isc_msgcat_get(isc_msgcat,
1745 							ISC_MSGSET_GENERAL,
1746 							ISC_MSG_FAILED,
1747 							"failed"),
1748 					 strbuf);
1749 			return (ISC_R_UNEXPECTED);
1750 		}
1751 	}
1752 
1753 	result = make_nonblock(sock->fd);
1754 	if (result != ISC_R_SUCCESS) {
1755 		socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1756 			"closed %d %d %d make_nonblock_failed",
1757 			sock->pending_recv, sock->pending_send,
1758 			sock->references);
1759 		closesocket(sock->fd);
1760 		sock->fd = INVALID_SOCKET;
1761 		free_socket(&sock, __LINE__);
1762 		return (result);
1763 	}
1764 
1765 	/*
1766 	 * Use minimum mtu if possible.
1767 	 */
1768 	use_min_mtu(sock);
1769 
1770 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1771 	if (type == isc_sockettype_udp) {
1772 
1773 #if defined(USE_CMSG)
1774 #if defined(ISC_PLATFORM_HAVEIPV6)
1775 #ifdef IPV6_RECVPKTINFO
1776 		/* 2292bis */
1777 		if ((pf == AF_INET6)
1778 		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1779 				   (char *)&on, sizeof(on)) < 0)) {
1780 			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1781 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1782 					 "setsockopt(%d, IPV6_RECVPKTINFO) "
1783 					 "%s: %s", sock->fd,
1784 					 isc_msgcat_get(isc_msgcat,
1785 							ISC_MSGSET_GENERAL,
1786 							ISC_MSG_FAILED,
1787 							"failed"),
1788 					 strbuf);
1789 		}
1790 #else
1791 		/* 2292 */
1792 		if ((pf == AF_INET6)
1793 		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1794 				   (char *)&on, sizeof(on)) < 0)) {
1795 			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1796 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1797 					 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1798 					 sock->fd,
1799 					 isc_msgcat_get(isc_msgcat,
1800 							ISC_MSGSET_GENERAL,
1801 							ISC_MSG_FAILED,
1802 							"failed"),
1803 					 strbuf);
1804 		}
1805 #endif /* IPV6_RECVPKTINFO */
1806 #endif /* ISC_PLATFORM_HAVEIPV6 */
1807 #endif /* defined(USE_CMSG) */
1808 
1809 #if defined(SO_RCVBUF)
1810 	       optlen = sizeof(size);
1811 	       if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1812 			      (char *)&size, &optlen) >= 0 &&
1813 		    size < RCVBUFSIZE) {
1814 		       size = RCVBUFSIZE;
1815 		       (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1816 					(char *)&size, sizeof(size));
1817 	       }
1818 #endif
1819 
1820 	}
1821 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1822 
1823 	_set_state(sock, SOCK_OPEN);
1824 	sock->references = 1;
1825 	*socketp = sock;
1826 
1827 	iocompletionport_update(sock);
1828 
1829 	if (dup_socket) {
1830 #ifndef ISC_ALLOW_MAPPED
1831 		isc__socket_ipv6only(sock, ISC_TRUE);
1832 #endif
1833 
1834 		if (dup_socket->bound) {
1835 			isc_sockaddr_t local;
1836 
1837 			result = isc__socket_getsockname(dup_socket, &local);
1838 			if (result != ISC_R_SUCCESS) {
1839 				isc_socket_close(sock);
1840 				return (result);
1841 			}
1842 			result = isc__socket_bind(sock, &local,
1843 						  ISC_SOCKET_REUSEADDRESS);
1844 			if (result != ISC_R_SUCCESS) {
1845 				isc_socket_close(sock);
1846 				return (result);
1847 			}
1848 		}
1849 		sock->dupped = 1;
1850 	}
1851 
1852 	/*
1853 	 * Note we don't have to lock the socket like we normally would because
1854 	 * there are no external references to it yet.
1855 	 */
1856 	LOCK(&manager->lock);
1857 	ISC_LIST_APPEND(manager->socklist, sock, link);
1858 	InterlockedIncrement(&manager->totalSockets);
1859 	UNLOCK(&manager->lock);
1860 
1861 	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1862 		   ISC_MSGSET_SOCKET, ISC_MSG_CREATED,
1863 		   "created %u type %u", sock->fd, type);
1864 
1865 	return (ISC_R_SUCCESS);
1866 }
1867 
1868 isc_result_t
isc__socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp)1869 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1870 		   isc_socket_t **socketp)
1871 {
1872 	return (socket_create(manager, pf, type, socketp, NULL));
1873 }
1874 
1875 isc_result_t
isc__socket_dup(isc_socket_t * sock,isc_socket_t ** socketp)1876 isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) {
1877 	REQUIRE(VALID_SOCKET(sock));
1878 	REQUIRE(socketp != NULL && *socketp == NULL);
1879 
1880 	return (socket_create(sock->manager, sock->pf, sock->type,
1881 			      socketp, sock));
1882 }
1883 
1884 isc_result_t
isc_socket_open(isc_socket_t * sock)1885 isc_socket_open(isc_socket_t *sock) {
1886 	REQUIRE(VALID_SOCKET(sock));
1887 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1888 
1889 	return (ISC_R_NOTIMPLEMENTED);
1890 }
1891 
1892 /*
1893  * Attach to a socket.  Caller must explicitly detach when it is done.
1894  */
1895 void
isc__socket_attach(isc_socket_t * sock,isc_socket_t ** socketp)1896 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1897 	REQUIRE(VALID_SOCKET(sock));
1898 	REQUIRE(socketp != NULL && *socketp == NULL);
1899 
1900 	LOCK(&sock->lock);
1901 	CONSISTENT(sock);
1902 	sock->references++;
1903 	UNLOCK(&sock->lock);
1904 
1905 	*socketp = sock;
1906 }
1907 
1908 /*
1909  * Dereference a socket.  If this is the last reference to it, clean things
1910  * up by destroying the socket.
1911  */
1912 void
isc__socket_detach(isc_socket_t ** socketp)1913 isc__socket_detach(isc_socket_t **socketp) {
1914 	isc_socket_t *sock;
1915 
1916 	REQUIRE(socketp != NULL);
1917 	sock = *socketp;
1918 	REQUIRE(VALID_SOCKET(sock));
1919 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1920 
1921 	LOCK(&sock->lock);
1922 	CONSISTENT(sock);
1923 	REQUIRE(sock->references > 0);
1924 	sock->references--;
1925 
1926 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1927 		"detach_socket %d %d %d",
1928 		sock->pending_recv, sock->pending_send,
1929 		sock->references);
1930 
1931 	if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1932 		closesocket(sock->fd);
1933 		sock->fd = INVALID_SOCKET;
1934 		_set_state(sock, SOCK_CLOSED);
1935 	}
1936 
1937 	maybe_free_socket(&sock, __LINE__);
1938 
1939 	*socketp = NULL;
1940 }
1941 
1942 isc_result_t
isc_socket_close(isc_socket_t * sock)1943 isc_socket_close(isc_socket_t *sock) {
1944 	REQUIRE(VALID_SOCKET(sock));
1945 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1946 
1947 	return (ISC_R_NOTIMPLEMENTED);
1948 }
1949 
1950 /*
1951  * Dequeue an item off the given socket's read queue, set the result code
1952  * in the done event to the one provided, and send it to the task it was
1953  * destined for.
1954  *
1955  * If the event to be sent is on a list, remove it before sending.  If
1956  * asked to, send and detach from the task as well.
1957  *
1958  * Caller must have the socket locked if the event is attached to the socket.
1959  */
1960 static void
send_recvdone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1961 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1962 	isc_task_t *task;
1963 
1964 	task = (*dev)->ev_sender;
1965 	(*dev)->ev_sender = sock;
1966 
1967 	if (ISC_LINK_LINKED(*dev, ev_link))
1968 		ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1969 
1970 	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1971 	    == ISC_SOCKEVENTATTR_ATTACHED)
1972 		isc_task_sendanddetach(&task, (isc_event_t **)dev);
1973 	else
1974 		isc_task_send(task, (isc_event_t **)dev);
1975 
1976 	CONSISTENT(sock);
1977 }
1978 
1979 /*
1980  * See comments for send_recvdone_event() above.
1981  */
1982 static void
send_senddone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1983 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1984 	isc_task_t *task;
1985 
1986 	INSIST(dev != NULL && *dev != NULL);
1987 
1988 	task = (*dev)->ev_sender;
1989 	(*dev)->ev_sender = sock;
1990 
1991 	if (ISC_LINK_LINKED(*dev, ev_link))
1992 		ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1993 
1994 	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1995 	    == ISC_SOCKEVENTATTR_ATTACHED)
1996 		isc_task_sendanddetach(&task, (isc_event_t **)dev);
1997 	else
1998 		isc_task_send(task, (isc_event_t **)dev);
1999 
2000 	CONSISTENT(sock);
2001 }
2002 
2003 /*
2004  * See comments for send_recvdone_event() above.
2005  */
2006 static void
send_acceptdone_event(isc_socket_t * sock,isc_socket_newconnev_t ** adev)2007 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
2008 	isc_task_t *task;
2009 
2010 	INSIST(adev != NULL && *adev != NULL);
2011 
2012 	task = (*adev)->ev_sender;
2013 	(*adev)->ev_sender = sock;
2014 
2015 	if (ISC_LINK_LINKED(*adev, ev_link))
2016 		ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
2017 
2018 	isc_task_sendanddetach(&task, (isc_event_t **)adev);
2019 
2020 	CONSISTENT(sock);
2021 }
2022 
2023 /*
2024  * See comments for send_recvdone_event() above.
2025  */
2026 static void
send_connectdone_event(isc_socket_t * sock,isc_socket_connev_t ** cdev)2027 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
2028 	isc_task_t *task;
2029 
2030 	INSIST(cdev != NULL && *cdev != NULL);
2031 
2032 	task = (*cdev)->ev_sender;
2033 	(*cdev)->ev_sender = sock;
2034 
2035 	sock->connect_ev = NULL;
2036 
2037 	isc_task_sendanddetach(&task, (isc_event_t **)cdev);
2038 
2039 	CONSISTENT(sock);
2040 }
2041 
2042 /*
2043  * On entry to this function, the event delivered is the internal
2044  * readable event, and the first item on the accept_list should be
2045  * the done event we want to send.  If the list is empty, this is a no-op,
2046  * so just close the new connection, unlock, and return.
2047  *
2048  * Note the socket is locked before entering here
2049  */
2050 static void
internal_accept(isc_socket_t * sock,IoCompletionInfo * lpo,int accept_errno)2051 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2052 	isc_socket_newconnev_t *adev;
2053 	isc_result_t result = ISC_R_SUCCESS;
2054 	isc_socket_t *nsock;
2055 	struct sockaddr *localaddr;
2056 	int localaddr_len = sizeof(*localaddr);
2057 	struct sockaddr *remoteaddr;
2058 	int remoteaddr_len = sizeof(*remoteaddr);
2059 
2060 	INSIST(VALID_SOCKET(sock));
2061 	LOCK(&sock->lock);
2062 	CONSISTENT(sock);
2063 
2064 	socket_log(__LINE__, sock, NULL, TRACE,
2065 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2066 		   "internal_accept called");
2067 
2068 	INSIST(sock->listener);
2069 
2070 	INSIST(sock->pending_iocp > 0);
2071 	sock->pending_iocp--;
2072 	INSIST(sock->pending_accept > 0);
2073 	sock->pending_accept--;
2074 
2075 	adev = lpo->adev;
2076 
2077 	/*
2078 	 * If the event is no longer in the list we can just return.
2079 	 */
2080 	if (!acceptdone_is_active(sock, adev))
2081 		goto done;
2082 
2083 	nsock = adev->newsocket;
2084 
2085 	/*
2086 	 * Pull off the done event.
2087 	 */
2088 	ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2089 
2090 	/*
2091 	 * Extract the addresses from the socket, copy them into the structure,
2092 	 * and return the new socket.
2093 	 */
2094 	ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2095 		sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2096 		(LPSOCKADDR *)&localaddr, &localaddr_len,
2097 		(LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2098 	memmove(&adev->address.type, remoteaddr, remoteaddr_len);
2099 	adev->address.length = remoteaddr_len;
2100 	nsock->address = adev->address;
2101 	nsock->pf = adev->address.type.sa.sa_family;
2102 
2103 	socket_log(__LINE__, nsock, &nsock->address, TRACE,
2104 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2105 		   "internal_accept parent %p", sock);
2106 
2107 	result = make_nonblock(adev->newsocket->fd);
2108 	INSIST(result == ISC_R_SUCCESS);
2109 
2110 	/*
2111 	 * Use minimum mtu if possible.
2112 	 */
2113 	use_min_mtu(adev->newsocket);
2114 
2115 	INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2116 			  (char *)&sock->fd, sizeof(sock->fd)) == 0);
2117 
2118 	/*
2119 	 * Hook it up into the manager.
2120 	 */
2121 	nsock->bound = 1;
2122 	nsock->connected = 1;
2123 	_set_state(nsock, SOCK_OPEN);
2124 
2125 	LOCK(&nsock->manager->lock);
2126 	ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2127 	InterlockedIncrement(&nsock->manager->totalSockets);
2128 	UNLOCK(&nsock->manager->lock);
2129 
2130 	socket_log(__LINE__, sock, &nsock->address, CREATION,
2131 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2132 		   "accepted_connection new_socket %p fd %d",
2133 		   nsock, nsock->fd);
2134 
2135 	adev->result = result;
2136 	send_acceptdone_event(sock, &adev);
2137 
2138 done:
2139 	CONSISTENT(sock);
2140 	UNLOCK(&sock->lock);
2141 
2142 	HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2143 	lpo->acceptbuffer = NULL;
2144 }
2145 
2146 /*
2147  * Called when a socket with a pending connect() finishes.
2148  * Note that the socket is locked before entering.
2149  */
2150 static void
internal_connect(isc_socket_t * sock,IoCompletionInfo * lpo,int connect_errno)2151 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2152 	isc_socket_connev_t *cdev;
2153 	char strbuf[ISC_STRERRORSIZE];
2154 
2155 	INSIST(VALID_SOCKET(sock));
2156 
2157 	LOCK(&sock->lock);
2158 
2159 	INSIST(sock->pending_iocp > 0);
2160 	sock->pending_iocp--;
2161 	INSIST(sock->pending_connect == 1);
2162 	sock->pending_connect = 0;
2163 
2164 	/*
2165 	 * Has this event been canceled?
2166 	 */
2167 	cdev = lpo->cdev;
2168 	if (!connectdone_is_active(sock, cdev)) {
2169 		sock->pending_connect = 0;
2170 		if (sock->fd != INVALID_SOCKET) {
2171 			closesocket(sock->fd);
2172 			sock->fd = INVALID_SOCKET;
2173 			_set_state(sock, SOCK_CLOSED);
2174 		}
2175 		CONSISTENT(sock);
2176 		UNLOCK(&sock->lock);
2177 		return;
2178 	}
2179 
2180 	/*
2181 	 * Check possible Windows network event error status here.
2182 	 */
2183 	if (connect_errno != 0) {
2184 		/*
2185 		 * If the error is SOFT, just try again on this
2186 		 * fd and pretend nothing strange happened.
2187 		 */
2188 		if (SOFT_ERROR(connect_errno) ||
2189 		    connect_errno == WSAEINPROGRESS) {
2190 			sock->pending_connect = 1;
2191 			CONSISTENT(sock);
2192 			UNLOCK(&sock->lock);
2193 			return;
2194 		}
2195 
2196 		/*
2197 		 * Translate other errors into ISC_R_* flavors.
2198 		 */
2199 		switch (connect_errno) {
2200 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2201 			ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2202 			ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2203 			ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2204 			ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2205 			ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2206 			ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2207 			ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2208 			ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2209 			ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2210 			ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2211 			ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2212 			ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2213 #undef ERROR_MATCH
2214 		default:
2215 			cdev->result = ISC_R_UNEXPECTED;
2216 			isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2217 			UNEXPECTED_ERROR(__FILE__, __LINE__,
2218 					 "internal_connect: connect() %s",
2219 					 strbuf);
2220 		}
2221 	} else {
2222 		INSIST(setsockopt(sock->fd, SOL_SOCKET,
2223 				  SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2224 		cdev->result = ISC_R_SUCCESS;
2225 		sock->connected = 1;
2226 		socket_log(__LINE__, sock, &sock->address, IOEVENT,
2227 			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2228 			   "internal_connect: success");
2229 	}
2230 
2231 	send_connectdone_event(sock, &cdev);
2232 
2233 	UNLOCK(&sock->lock);
2234 }
2235 
2236 /*
2237  * Loop through the socket, returning ISC_R_EOF for each done event pending.
2238  */
2239 static void
send_recvdone_abort(isc_socket_t * sock,isc_result_t result)2240 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2241 	isc_socketevent_t *dev;
2242 
2243 	while (!ISC_LIST_EMPTY(sock->recv_list)) {
2244 		dev = ISC_LIST_HEAD(sock->recv_list);
2245 		dev->result = result;
2246 		send_recvdone_event(sock, &dev);
2247 	}
2248 }
2249 
2250 /*
2251  * Take the data we received in our private buffer, and if any recv() calls on
2252  * our list are satisfied, send the corresponding done event.
2253  *
2254  * If we need more data (there are still items on the recv_list after we consume all
2255  * our data) then arrange for another system recv() call to fill our buffers.
2256  */
2257 static void
internal_recv(isc_socket_t * sock,int nbytes)2258 internal_recv(isc_socket_t *sock, int nbytes)
2259 {
2260 	INSIST(VALID_SOCKET(sock));
2261 
2262 	LOCK(&sock->lock);
2263 	CONSISTENT(sock);
2264 
2265 	socket_log(__LINE__, sock, NULL, IOEVENT,
2266 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2267 		   "internal_recv: %d bytes received", nbytes);
2268 
2269 	/*
2270 	 * If we got here, the I/O operation succeeded.  However, we might still have removed this
2271 	 * event from our notification list (or never placed it on it due to immediate completion.)
2272 	 * Handle the reference counting here, and handle the cancellation event just after.
2273 	 */
2274 	INSIST(sock->pending_iocp > 0);
2275 	sock->pending_iocp--;
2276 	INSIST(sock->pending_recv > 0);
2277 	sock->pending_recv--;
2278 
2279 	/*
2280 	 * The only way we could have gotten here is that our I/O has successfully completed.
2281 	 * Update our pointers, and move on.  The only odd case here is that we might not
2282 	 * have received enough data on a TCP stream to satisfy the minimum requirements.  If
2283 	 * this is the case, we will re-issue the recv() call for what we need.
2284 	 *
2285 	 * We do check for a recv() of 0 bytes on a TCP stream.  This means the remote end
2286 	 * has closed.
2287 	 */
2288 	if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2289 		send_recvdone_abort(sock, ISC_R_EOF);
2290 		maybe_free_socket(&sock, __LINE__);
2291 		return;
2292 	}
2293 	sock->recvbuf.remaining = nbytes;
2294 	sock->recvbuf.consume_position = sock->recvbuf.base;
2295 	completeio_recv(sock);
2296 
2297 	/*
2298 	 * If there are more receivers waiting for data, queue another receive
2299 	 * here.
2300 	 */
2301 	queue_receive_request(sock);
2302 
2303 	/*
2304 	 * Unlock and/or destroy if we are the last thing this socket has left to do.
2305 	 */
2306 	maybe_free_socket(&sock, __LINE__);
2307 }
2308 
2309 static void
internal_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int nbytes,int send_errno,IoCompletionInfo * lpo)2310 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2311 	      struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2312 {
2313 	buflist_t *buffer;
2314 
2315 	/*
2316 	 * Find out what socket this is and lock it.
2317 	 */
2318 	INSIST(VALID_SOCKET(sock));
2319 
2320 	LOCK(&sock->lock);
2321 	CONSISTENT(sock);
2322 
2323 	socket_log(__LINE__, sock, NULL, IOEVENT,
2324 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2325 		   "internal_send: task got socket event %p", dev);
2326 
2327 	buffer = ISC_LIST_HEAD(lpo->bufferlist);
2328 	while (buffer != NULL) {
2329 		ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2330 
2331 		socket_log(__LINE__, sock, NULL, TRACE,
2332 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2333 		   "free_buffer %p %p", buffer, buffer->buf);
2334 
2335 		HeapFree(hHeapHandle, 0, buffer->buf);
2336 		HeapFree(hHeapHandle, 0, buffer);
2337 		buffer = ISC_LIST_HEAD(lpo->bufferlist);
2338 	}
2339 
2340 	INSIST(sock->pending_iocp > 0);
2341 	sock->pending_iocp--;
2342 	INSIST(sock->pending_send > 0);
2343 	sock->pending_send--;
2344 
2345 	/* If the event is no longer in the list we can just return */
2346 	if (!senddone_is_active(sock, dev))
2347 		goto done;
2348 
2349 	/*
2350 	 * Set the error code and send things on its way.
2351 	 */
2352 	switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2353 	case DOIO_SOFT:
2354 		break;
2355 	case DOIO_HARD:
2356 	case DOIO_SUCCESS:
2357 		send_senddone_event(sock, &dev);
2358 		break;
2359 	}
2360 
2361  done:
2362 	maybe_free_socket(&sock, __LINE__);
2363 }
2364 
2365 /*
2366  * These return if the done event passed in is on the list (or for connect, is
2367  * the one we're waiting for.  Using these ensures we will not double-send an
2368  * event.
2369  */
2370 static isc_boolean_t
senddone_is_active(isc_socket_t * sock,isc_socketevent_t * dev)2371 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2372 {
2373 	isc_socketevent_t *ldev;
2374 
2375 	ldev = ISC_LIST_HEAD(sock->send_list);
2376 	while (ldev != NULL && ldev != dev)
2377 		ldev = ISC_LIST_NEXT(ldev, ev_link);
2378 
2379 	return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2380 }
2381 
2382 static isc_boolean_t
acceptdone_is_active(isc_socket_t * sock,isc_socket_newconnev_t * dev)2383 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2384 {
2385 	isc_socket_newconnev_t *ldev;
2386 
2387 	ldev = ISC_LIST_HEAD(sock->accept_list);
2388 	while (ldev != NULL && ldev != dev)
2389 		ldev = ISC_LIST_NEXT(ldev, ev_link);
2390 
2391 	return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2392 }
2393 
2394 static isc_boolean_t
connectdone_is_active(isc_socket_t * sock,isc_socket_connev_t * dev)2395 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2396 {
2397 	return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2398 }
2399 
2400 //
2401 // The Windows network stack seems to have two very distinct paths depending
2402 // on what is installed.  Specifically, if something is looking at network
2403 // connections (like an anti-virus or anti-malware application, such as
2404 // McAfee products) Windows may return additional error conditions which
2405 // were not previously returned.
2406 //
2407 // One specific one is when a TCP SYN scan is used.  In this situation,
2408 // Windows responds with the SYN-ACK, but the scanner never responds with
2409 // the 3rd packet, the ACK.  Windows consiers this a partially open connection.
2410 // Most Unix networking stacks, and Windows without McAfee installed, will
2411 // not return this to the caller.  However, with this product installed,
2412 // Windows returns this as a failed status on the Accept() call.  Here, we
2413 // will just re-issue the ISCAcceptEx() call as if nothing had happened.
2414 //
2415 // This code should only be called when the listening socket has received
2416 // such an error.  Additionally, the "parent" socket must be locked.
2417 // Additionally, the lpo argument is re-used here, and must not be freed
2418 // by the caller.
2419 //
2420 static isc_result_t
restart_accept(isc_socket_t * parent,IoCompletionInfo * lpo)2421 restart_accept(isc_socket_t *parent, IoCompletionInfo *lpo)
2422 {
2423 	isc_socket_t *nsock = lpo->adev->newsocket;
2424 	SOCKET new_fd;
2425 
2426 	/*
2427 	 * AcceptEx() requires we pass in a socket.  Note that we carefully
2428 	 * do not close the previous socket in case of an error message returned by
2429 	 * our new socket() call.  If we return an error here, our caller will
2430 	 * clean up.
2431 	 */
2432 	new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP);
2433 	if (nsock->fd == INVALID_SOCKET) {
2434 		return (ISC_R_FAILURE); // parent will ask windows for error message
2435 	}
2436 	closesocket(nsock->fd);
2437 	nsock->fd = new_fd;
2438 
2439 	memset(&lpo->overlapped, 0, sizeof(lpo->overlapped));
2440 
2441 	ISCAcceptEx(parent->fd,
2442 		    nsock->fd,				/* Accepted Socket */
2443 		    lpo->acceptbuffer,			/* Buffer for initial Recv */
2444 		    0,					/* Length of Buffer */
2445 		    sizeof(SOCKADDR_STORAGE) + 16,	/* Local address length + 16 */
2446 		    sizeof(SOCKADDR_STORAGE) + 16,	/* Remote address lengh + 16 */
2447 		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
2448 		    (LPOVERLAPPED)lpo			/* Overlapped structure */
2449 		    );
2450 
2451 	InterlockedDecrement(&nsock->manager->iocp_total);
2452 	iocompletionport_update(nsock);
2453 
2454 	return (ISC_R_SUCCESS);
2455 }
2456 
2457 /*
2458  * This is the I/O Completion Port Worker Function. It loops forever
2459  * waiting for I/O to complete and then forwards them for further
2460  * processing. There are a number of these in separate threads.
2461  */
2462 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext)2463 SocketIoThread(LPVOID ThreadContext) {
2464 	isc_socketmgr_t *manager = ThreadContext;
2465 	BOOL bSuccess = FALSE;
2466 	DWORD nbytes;
2467 	IoCompletionInfo *lpo = NULL;
2468 	isc_socket_t *sock = NULL;
2469 	int request;
2470 	struct msghdr *messagehdr = NULL;
2471 	int errval;
2472 	char strbuf[ISC_STRERRORSIZE];
2473 	int errstatus;
2474 
2475 	REQUIRE(VALID_MANAGER(manager));
2476 
2477 	/*
2478 	 * Set the thread priority high enough so I/O will
2479 	 * preempt normal recv packet processing, but not
2480 	 * higher than the timer sync thread.
2481 	 */
2482 	if (!SetThreadPriority(GetCurrentThread(),
2483 			       THREAD_PRIORITY_ABOVE_NORMAL)) {
2484 		errval = GetLastError();
2485 		isc__strerror(errval, strbuf, sizeof(strbuf));
2486 		FATAL_ERROR(__FILE__, __LINE__,
2487 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2488 				ISC_MSG_FAILED,
2489 				"Can't set thread priority: %s"),
2490 				strbuf);
2491 	}
2492 
2493 	/*
2494 	 * Loop forever waiting on I/O Completions and then processing them
2495 	 */
2496 	while (TRUE) {
2497 		wait_again:
2498 		bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2499 						     &nbytes,
2500 						     (PULONG_PTR)&sock,
2501 						     (LPWSAOVERLAPPED *)&lpo,
2502 						     INFINITE);
2503 		if (lpo == NULL) /* Received request to exit */
2504 			break;
2505 
2506 		REQUIRE(VALID_SOCKET(sock));
2507 
2508 		request = lpo->request_type;
2509 
2510 		errstatus = 0;
2511 		if (!bSuccess) {
2512 			isc_result_t isc_result;
2513 
2514 			/*
2515 			 * Did the I/O operation complete?
2516 			 */
2517 			errstatus = GetLastError();
2518 			isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2519 
2520 			LOCK(&sock->lock);
2521 			CONSISTENT(sock);
2522 			switch (request) {
2523 			case SOCKET_RECV:
2524 				INSIST(sock->pending_iocp > 0);
2525 				sock->pending_iocp--;
2526 				INSIST(sock->pending_recv > 0);
2527 				sock->pending_recv--;
2528 				if (!sock->connected &&
2529 				    ((errstatus == ERROR_HOST_UNREACHABLE) ||
2530 				     (errstatus == WSAENETRESET) ||
2531 				     (errstatus == WSAECONNRESET))) {
2532 					/* ignore soft errors */
2533 					queue_receive_request(sock);
2534 					break;
2535 				}
2536 				send_recvdone_abort(sock, isc_result);
2537 				if (isc_result == ISC_R_UNEXPECTED) {
2538 					UNEXPECTED_ERROR(__FILE__, __LINE__,
2539 						"SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2540 						errstatus, isc_result);
2541 				}
2542 				break;
2543 
2544 			case SOCKET_SEND:
2545 				INSIST(sock->pending_iocp > 0);
2546 				sock->pending_iocp--;
2547 				INSIST(sock->pending_send > 0);
2548 				sock->pending_send--;
2549 				if (senddone_is_active(sock, lpo->dev)) {
2550 					lpo->dev->result = isc_result;
2551 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2552 						"canceled_send");
2553 					send_senddone_event(sock, &lpo->dev);
2554 				}
2555 				break;
2556 
2557 			case SOCKET_ACCEPT:
2558 				INSIST(sock->pending_iocp > 0);
2559 				INSIST(sock->pending_accept > 0);
2560 
2561 				socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2562 					"Accept: errstatus=%d isc_result=%d", errstatus, isc_result);
2563 
2564 				if (acceptdone_is_active(sock, lpo->adev)) {
2565 					if (restart_accept(sock, lpo) == ISC_R_SUCCESS) {
2566 						UNLOCK(&sock->lock);
2567 						goto wait_again;
2568 					} else {
2569 						errstatus = GetLastError();
2570 						isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2571 						socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2572 							"restart_accept() failed: errstatus=%d isc_result=%d",
2573 							errstatus, isc_result);
2574 					}
2575 				}
2576 
2577 				sock->pending_iocp--;
2578 				sock->pending_accept--;
2579 				if (acceptdone_is_active(sock, lpo->adev)) {
2580 					closesocket(lpo->adev->newsocket->fd);
2581 					lpo->adev->newsocket->fd = INVALID_SOCKET;
2582 					lpo->adev->newsocket->references--;
2583 					free_socket(&lpo->adev->newsocket, __LINE__);
2584 					lpo->adev->result = isc_result;
2585 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2586 						"canceled_accept");
2587 					send_acceptdone_event(sock, &lpo->adev);
2588 				}
2589 				break;
2590 
2591 			case SOCKET_CONNECT:
2592 				INSIST(sock->pending_iocp > 0);
2593 				sock->pending_iocp--;
2594 				INSIST(sock->pending_connect == 1);
2595 				sock->pending_connect = 0;
2596 				if (connectdone_is_active(sock, lpo->cdev)) {
2597 					lpo->cdev->result = isc_result;
2598 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2599 						"canceled_connect");
2600 					send_connectdone_event(sock, &lpo->cdev);
2601 				}
2602 				break;
2603 			}
2604 			maybe_free_socket(&sock, __LINE__);
2605 
2606 			if (lpo != NULL)
2607 				HeapFree(hHeapHandle, 0, lpo);
2608 			continue;
2609 		}
2610 
2611 		messagehdr = &lpo->messagehdr;
2612 
2613 		switch (request) {
2614 		case SOCKET_RECV:
2615 			internal_recv(sock, nbytes);
2616 			break;
2617 		case SOCKET_SEND:
2618 			internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2619 			break;
2620 		case SOCKET_ACCEPT:
2621 			internal_accept(sock, lpo, errstatus);
2622 			break;
2623 		case SOCKET_CONNECT:
2624 			internal_connect(sock, lpo, errstatus);
2625 			break;
2626 		}
2627 
2628 		if (lpo != NULL)
2629 			HeapFree(hHeapHandle, 0, lpo);
2630 	}
2631 
2632 	/*
2633 	 * Exit Completion Port Thread
2634 	 */
2635 	manager_log(manager, TRACE,
2636 		    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2637 				   ISC_MSG_EXITING, "SocketIoThread exiting"));
2638 	return ((isc_threadresult_t)0);
2639 }
2640 
2641 /*
2642  * Create a new socket manager.
2643  */
2644 isc_result_t
isc__socketmgr_create(isc_mem_t * mctx,isc_socketmgr_t ** managerp)2645 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2646 	return (isc_socketmgr_create2(mctx, managerp, 0));
2647 }
2648 
2649 isc_result_t
isc__socketmgr_create2(isc_mem_t * mctx,isc_socketmgr_t ** managerp,unsigned int maxsocks)2650 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2651 		       unsigned int maxsocks)
2652 {
2653 	isc_socketmgr_t *manager;
2654 	isc_result_t result;
2655 
2656 	REQUIRE(managerp != NULL && *managerp == NULL);
2657 
2658 	if (maxsocks != 0)
2659 		return (ISC_R_NOTIMPLEMENTED);
2660 
2661 	manager = isc_mem_get(mctx, sizeof(*manager));
2662 	if (manager == NULL)
2663 		return (ISC_R_NOMEMORY);
2664 
2665 	InitSockets();
2666 
2667 	manager->magic = SOCKET_MANAGER_MAGIC;
2668 	manager->mctx = NULL;
2669 	manager->stats = NULL;
2670 	ISC_LIST_INIT(manager->socklist);
2671 	result = isc_mutex_init(&manager->lock);
2672 	if (result != ISC_R_SUCCESS) {
2673 		isc_mem_put(mctx, manager, sizeof(*manager));
2674 		return (result);
2675 	}
2676 	if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2677 		DESTROYLOCK(&manager->lock);
2678 		isc_mem_put(mctx, manager, sizeof(*manager));
2679 		UNEXPECTED_ERROR(__FILE__, __LINE__,
2680 				 "isc_condition_init() %s",
2681 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2682 						ISC_MSG_FAILED, "failed"));
2683 		return (ISC_R_UNEXPECTED);
2684 	}
2685 
2686 	isc_mem_attach(mctx, &manager->mctx);
2687 
2688 	iocompletionport_init(manager);	/* Create the Completion Ports */
2689 
2690 	manager->bShutdown = ISC_FALSE;
2691 	manager->totalSockets = 0;
2692 	manager->iocp_total = 0;
2693 
2694 	*managerp = manager;
2695 
2696 	return (ISC_R_SUCCESS);
2697 }
2698 
2699 isc_result_t
isc_socketmgr_getmaxsockets(isc_socketmgr_t * manager,unsigned int * nsockp)2700 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2701 	REQUIRE(VALID_MANAGER(manager));
2702 	REQUIRE(nsockp != NULL);
2703 
2704 	return (ISC_R_NOTIMPLEMENTED);
2705 }
2706 
2707 void
isc_socketmgr_setstats(isc_socketmgr_t * manager,isc_stats_t * stats)2708 isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2709 	REQUIRE(VALID_MANAGER(manager));
2710 	REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2711 	REQUIRE(manager->stats == NULL);
2712 	REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2713 
2714 	isc_stats_attach(stats, &manager->stats);
2715 }
2716 
2717 void
isc__socketmgr_destroy(isc_socketmgr_t ** managerp)2718 isc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2719 	isc_socketmgr_t *manager;
2720 	int i;
2721 	isc_mem_t *mctx;
2722 
2723 	/*
2724 	 * Destroy a socket manager.
2725 	 */
2726 
2727 	REQUIRE(managerp != NULL);
2728 	manager = *managerp;
2729 	REQUIRE(VALID_MANAGER(manager));
2730 
2731 	LOCK(&manager->lock);
2732 
2733 	/*
2734 	 * Wait for all sockets to be destroyed.
2735 	 */
2736 	while (!ISC_LIST_EMPTY(manager->socklist)) {
2737 		manager_log(manager, CREATION,
2738 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2739 					   ISC_MSG_SOCKETSREMAIN,
2740 					   "sockets exist"));
2741 		WAIT(&manager->shutdown_ok, &manager->lock);
2742 	}
2743 
2744 	UNLOCK(&manager->lock);
2745 
2746 	/*
2747 	 * Here, we need to had some wait code for the completion port
2748 	 * thread.
2749 	 */
2750 	signal_iocompletionport_exit(manager);
2751 	manager->bShutdown = ISC_TRUE;
2752 
2753 	/*
2754 	 * Wait for threads to exit.
2755 	 */
2756 	for (i = 0; i < manager->maxIOCPThreads; i++) {
2757 		if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2758 			NULL) != ISC_R_SUCCESS)
2759 			UNEXPECTED_ERROR(__FILE__, __LINE__,
2760 				 "isc_thread_join() for Completion Port %s",
2761 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2762 						ISC_MSG_FAILED, "failed"));
2763 	}
2764 	/*
2765 	 * Clean up.
2766 	 */
2767 
2768 	CloseHandle(manager->hIoCompletionPort);
2769 
2770 	(void)isc_condition_destroy(&manager->shutdown_ok);
2771 
2772 	DESTROYLOCK(&manager->lock);
2773 	if (manager->stats != NULL)
2774 		isc_stats_detach(&manager->stats);
2775 	manager->magic = 0;
2776 	mctx= manager->mctx;
2777 	isc_mem_put(mctx, manager, sizeof(*manager));
2778 
2779 	isc_mem_detach(&mctx);
2780 
2781 	*managerp = NULL;
2782 }
2783 
2784 static void
queue_receive_event(isc_socket_t * sock,isc_task_t * task,isc_socketevent_t * dev)2785 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2786 {
2787 	isc_task_t *ntask = NULL;
2788 
2789 	isc_task_attach(task, &ntask);
2790 	dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2791 
2792 	/*
2793 	 * Enqueue the request.
2794 	 */
2795 	INSIST(!ISC_LINK_LINKED(dev, ev_link));
2796 	ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2797 
2798 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2799 		   "queue_receive_event: event %p -> task %p",
2800 		   dev, ntask);
2801 }
2802 
2803 /*
2804  * Check the pending receive queue, and if we have data pending, give it to this
2805  * caller.  If we have none, queue an I/O request.  If this caller is not the first
2806  * on the list, then we will just queue this event and return.
2807  *
2808  * Caller must have the socket locked.
2809  */
2810 static isc_result_t
socket_recv(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,unsigned int flags)2811 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2812 	    unsigned int flags)
2813 {
2814 	isc_result_t result = ISC_R_SUCCESS;
2815 
2816 	dev->ev_sender = task;
2817 
2818 	if (sock->fd == INVALID_SOCKET)
2819 		return (ISC_R_EOF);
2820 
2821 	/*
2822 	 * Queue our event on the list of things to do.  Call our function to
2823 	 * attempt to fill buffers as much as possible, and return done events.
2824 	 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2825 	 * here and tell our caller that we could not satisfy it immediately.
2826 	 */
2827 	queue_receive_event(sock, task, dev);
2828 	if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2829 		result = ISC_R_INPROGRESS;
2830 
2831 	completeio_recv(sock);
2832 
2833 	/*
2834 	 * If there are more receivers waiting for data, queue another receive
2835 	 * here.  If the
2836 	 */
2837 	queue_receive_request(sock);
2838 
2839 	return (result);
2840 }
2841 
2842 isc_result_t
isc__socket_recvv(isc_socket_t * sock,isc_bufferlist_t * buflist,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2843 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2844 		 unsigned int minimum, isc_task_t *task,
2845 		 isc_taskaction_t action, void *arg)
2846 {
2847 	isc_socketevent_t *dev;
2848 	isc_socketmgr_t *manager;
2849 	unsigned int iocount;
2850 	isc_buffer_t *buffer;
2851 	isc_result_t ret;
2852 
2853 	REQUIRE(VALID_SOCKET(sock));
2854 	LOCK(&sock->lock);
2855 	CONSISTENT(sock);
2856 
2857 	/*
2858 	 * Make sure that the socket is not closed.  XXXMLG change error here?
2859 	 */
2860 	if (sock->fd == INVALID_SOCKET) {
2861 		UNLOCK(&sock->lock);
2862 		return (ISC_R_CONNREFUSED);
2863 	}
2864 
2865 	REQUIRE(buflist != NULL);
2866 	REQUIRE(!ISC_LIST_EMPTY(*buflist));
2867 	REQUIRE(task != NULL);
2868 	REQUIRE(action != NULL);
2869 
2870 	manager = sock->manager;
2871 	REQUIRE(VALID_MANAGER(manager));
2872 
2873 	iocount = isc_bufferlist_availablecount(buflist);
2874 	REQUIRE(iocount > 0);
2875 
2876 	INSIST(sock->bound);
2877 
2878 	dev = allocate_socketevent(manager->mctx, sock,
2879 				   ISC_SOCKEVENT_RECVDONE, action, arg);
2880 	if (dev == NULL) {
2881 		UNLOCK(&sock->lock);
2882 		return (ISC_R_NOMEMORY);
2883 	}
2884 
2885 	/*
2886 	 * UDP sockets are always partial read
2887 	 */
2888 	if (sock->type == isc_sockettype_udp)
2889 		dev->minimum = 1;
2890 	else {
2891 		if (minimum == 0)
2892 			dev->minimum = iocount;
2893 		else
2894 			dev->minimum = minimum;
2895 	}
2896 
2897 	/*
2898 	 * Move each buffer from the passed in list to our internal one.
2899 	 */
2900 	buffer = ISC_LIST_HEAD(*buflist);
2901 	while (buffer != NULL) {
2902 		ISC_LIST_DEQUEUE(*buflist, buffer, link);
2903 		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2904 		buffer = ISC_LIST_HEAD(*buflist);
2905 	}
2906 
2907 	ret = socket_recv(sock, dev, task, 0);
2908 
2909 	UNLOCK(&sock->lock);
2910 	return (ret);
2911 }
2912 
2913 isc_result_t
isc__socket_recv(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2914 isc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2915 		 unsigned int minimum, isc_task_t *task,
2916 		 isc_taskaction_t action, void *arg)
2917 {
2918 	isc_socketevent_t *dev;
2919 	isc_socketmgr_t *manager;
2920 	isc_result_t ret;
2921 
2922 	REQUIRE(VALID_SOCKET(sock));
2923 	LOCK(&sock->lock);
2924 	CONSISTENT(sock);
2925 
2926 	/*
2927 	 * make sure that the socket's not closed
2928 	 */
2929 	if (sock->fd == INVALID_SOCKET) {
2930 		UNLOCK(&sock->lock);
2931 		return (ISC_R_CONNREFUSED);
2932 	}
2933 	REQUIRE(action != NULL);
2934 
2935 	manager = sock->manager;
2936 	REQUIRE(VALID_MANAGER(manager));
2937 
2938 	INSIST(sock->bound);
2939 
2940 	dev = allocate_socketevent(manager->mctx, sock,
2941 				   ISC_SOCKEVENT_RECVDONE, action, arg);
2942 	if (dev == NULL) {
2943 		UNLOCK(&sock->lock);
2944 		return (ISC_R_NOMEMORY);
2945 	}
2946 
2947 	ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2948 	UNLOCK(&sock->lock);
2949 	return (ret);
2950 }
2951 
2952 isc_result_t
isc__socket_recv2(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_socketevent_t * event,unsigned int flags)2953 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2954 		  unsigned int minimum, isc_task_t *task,
2955 		  isc_socketevent_t *event, unsigned int flags)
2956 {
2957 	isc_result_t ret;
2958 
2959 	REQUIRE(VALID_SOCKET(sock));
2960 	LOCK(&sock->lock);
2961 	CONSISTENT(sock);
2962 
2963 	event->result = ISC_R_UNEXPECTED;
2964 	event->ev_sender = sock;
2965 	/*
2966 	 * make sure that the socket's not closed
2967 	 */
2968 	if (sock->fd == INVALID_SOCKET) {
2969 		UNLOCK(&sock->lock);
2970 		return (ISC_R_CONNREFUSED);
2971 	}
2972 
2973 	ISC_LIST_INIT(event->bufferlist);
2974 	event->region = *region;
2975 	event->n = 0;
2976 	event->offset = 0;
2977 	event->attributes = 0;
2978 
2979 	/*
2980 	 * UDP sockets are always partial read.
2981 	 */
2982 	if (sock->type == isc_sockettype_udp)
2983 		event->minimum = 1;
2984 	else {
2985 		if (minimum == 0)
2986 			event->minimum = region->length;
2987 		else
2988 			event->minimum = minimum;
2989 	}
2990 
2991 	ret = socket_recv(sock, event, task, flags);
2992 	UNLOCK(&sock->lock);
2993 	return (ret);
2994 }
2995 
2996 /*
2997  * Caller must have the socket locked.
2998  */
2999 static isc_result_t
socket_send(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3000 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
3001 	    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3002 	    unsigned int flags)
3003 {
3004 	int io_state;
3005 	int send_errno = 0;
3006 	int cc = 0;
3007 	isc_task_t *ntask = NULL;
3008 	isc_result_t result = ISC_R_SUCCESS;
3009 
3010 	dev->ev_sender = task;
3011 
3012 	set_dev_address(address, sock, dev);
3013 	if (pktinfo != NULL) {
3014 		socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
3015 			   ISC_MSG_PKTINFOPROVIDED,
3016 			   "pktinfo structure provided, ifindex %u (set to 0)",
3017 			   pktinfo->ipi6_ifindex);
3018 
3019 		dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
3020 		dev->pktinfo = *pktinfo;
3021 		/*
3022 		 * Set the pktinfo index to 0 here, to let the kernel decide
3023 		 * what interface it should send on.
3024 		 */
3025 		dev->pktinfo.ipi6_ifindex = 0;
3026 	}
3027 
3028 	io_state = startio_send(sock, dev, &cc, &send_errno);
3029 	switch (io_state) {
3030 	case DOIO_PENDING:	/* I/O started. Enqueue completion event. */
3031 	case DOIO_SOFT:
3032 		/*
3033 		 * We couldn't send all or part of the request right now, so
3034 		 * queue it unless ISC_SOCKFLAG_NORETRY is set.
3035 		 */
3036 		if ((flags & ISC_SOCKFLAG_NORETRY) == 0 ||
3037 		    io_state == DOIO_PENDING) {
3038 			isc_task_attach(task, &ntask);
3039 			dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
3040 
3041 			/*
3042 			 * Enqueue the request.
3043 			 */
3044 			INSIST(!ISC_LINK_LINKED(dev, ev_link));
3045 			ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
3046 
3047 			socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
3048 				   "socket_send: event %p -> task %p",
3049 				   dev, ntask);
3050 
3051 			if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
3052 				result = ISC_R_INPROGRESS;
3053 			break;
3054 		}
3055 
3056 	case DOIO_SUCCESS:
3057 		break;
3058 	}
3059 
3060 	return (result);
3061 }
3062 
3063 isc_result_t
isc__socket_send(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg)3064 isc__socket_send(isc_socket_t *sock, isc_region_t *region,
3065 		 isc_task_t *task, isc_taskaction_t action, void *arg)
3066 {
3067 	/*
3068 	 * REQUIRE() checking is performed in isc_socket_sendto().
3069 	 */
3070 	return (isc_socket_sendto(sock, region, task, action, arg, NULL,
3071 				  NULL));
3072 }
3073 
3074 isc_result_t
isc__socket_sendto(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3075 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
3076 		   isc_task_t *task, isc_taskaction_t action, void *arg,
3077 		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3078 {
3079 	isc_socketevent_t *dev;
3080 	isc_socketmgr_t *manager;
3081 	isc_result_t ret;
3082 
3083 	REQUIRE(VALID_SOCKET(sock));
3084 	REQUIRE(sock->type != isc_sockettype_fdwatch);
3085 
3086 	LOCK(&sock->lock);
3087 	CONSISTENT(sock);
3088 
3089 	/*
3090 	 * make sure that the socket's not closed
3091 	 */
3092 	if (sock->fd == INVALID_SOCKET) {
3093 		UNLOCK(&sock->lock);
3094 		return (ISC_R_CONNREFUSED);
3095 	}
3096 	REQUIRE(region != NULL);
3097 	REQUIRE(task != NULL);
3098 	REQUIRE(action != NULL);
3099 
3100 	manager = sock->manager;
3101 	REQUIRE(VALID_MANAGER(manager));
3102 
3103 	INSIST(sock->bound);
3104 
3105 	dev = allocate_socketevent(manager->mctx, sock,
3106 				   ISC_SOCKEVENT_SENDDONE, action, arg);
3107 	if (dev == NULL) {
3108 		UNLOCK(&sock->lock);
3109 		return (ISC_R_NOMEMORY);
3110 	}
3111 	dev->region = *region;
3112 
3113 	ret = socket_send(sock, dev, task, address, pktinfo, 0);
3114 	UNLOCK(&sock->lock);
3115 	return (ret);
3116 }
3117 
3118 isc_result_t
isc__socket_sendv(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg)3119 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3120 		  isc_task_t *task, isc_taskaction_t action, void *arg)
3121 {
3122 	return (isc_socket_sendtov2(sock, buflist, task, action, arg, NULL,
3123 				    NULL, 0));
3124 }
3125 
3126 isc_result_t
isc__socket_sendtov(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3127 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3128 		    isc_task_t *task, isc_taskaction_t action, void *arg,
3129 		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3130 {
3131 	return (isc_socket_sendtov2(sock, buflist, task, action, arg, address,
3132 				    pktinfo, 0));
3133 }
3134 
3135 isc_result_t
isc__socket_sendtov2(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3136 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist,
3137 		     isc_task_t *task, isc_taskaction_t action, void *arg,
3138 		     isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3139 		     unsigned int flags)
3140 {
3141 	isc_socketevent_t *dev;
3142 	isc_socketmgr_t *manager;
3143 	unsigned int iocount;
3144 	isc_buffer_t *buffer;
3145 	isc_result_t ret;
3146 
3147 	REQUIRE(VALID_SOCKET(sock));
3148 
3149 	LOCK(&sock->lock);
3150 	CONSISTENT(sock);
3151 
3152 	/*
3153 	 * make sure that the socket's not closed
3154 	 */
3155 	if (sock->fd == INVALID_SOCKET) {
3156 		UNLOCK(&sock->lock);
3157 		return (ISC_R_CONNREFUSED);
3158 	}
3159 	REQUIRE(buflist != NULL);
3160 	REQUIRE(!ISC_LIST_EMPTY(*buflist));
3161 	REQUIRE(task != NULL);
3162 	REQUIRE(action != NULL);
3163 
3164 	manager = sock->manager;
3165 	REQUIRE(VALID_MANAGER(manager));
3166 
3167 	iocount = isc_bufferlist_usedcount(buflist);
3168 	REQUIRE(iocount > 0);
3169 
3170 	dev = allocate_socketevent(manager->mctx, sock,
3171 				   ISC_SOCKEVENT_SENDDONE, action, arg);
3172 	if (dev == NULL) {
3173 		UNLOCK(&sock->lock);
3174 		return (ISC_R_NOMEMORY);
3175 	}
3176 
3177 	/*
3178 	 * Move each buffer from the passed in list to our internal one.
3179 	 */
3180 	buffer = ISC_LIST_HEAD(*buflist);
3181 	while (buffer != NULL) {
3182 		ISC_LIST_DEQUEUE(*buflist, buffer, link);
3183 		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3184 		buffer = ISC_LIST_HEAD(*buflist);
3185 	}
3186 
3187 	ret = socket_send(sock, dev, task, address, pktinfo, flags);
3188 	UNLOCK(&sock->lock);
3189 	return (ret);
3190 }
3191 
3192 isc_result_t
isc__socket_sendto2(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,isc_socketevent_t * event,unsigned int flags)3193 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3194 		    isc_task_t *task,
3195 		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3196 		    isc_socketevent_t *event, unsigned int flags)
3197 {
3198 	isc_result_t ret;
3199 
3200 	REQUIRE(VALID_SOCKET(sock));
3201 	LOCK(&sock->lock);
3202 	CONSISTENT(sock);
3203 
3204 	REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3205 	if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3206 		REQUIRE(sock->type == isc_sockettype_udp);
3207 	event->ev_sender = sock;
3208 	event->result = ISC_R_UNEXPECTED;
3209 	/*
3210 	 * make sure that the socket's not closed
3211 	 */
3212 	if (sock->fd == INVALID_SOCKET) {
3213 		UNLOCK(&sock->lock);
3214 		return (ISC_R_CONNREFUSED);
3215 	}
3216 	ISC_LIST_INIT(event->bufferlist);
3217 	event->region = *region;
3218 	event->n = 0;
3219 	event->offset = 0;
3220 	event->attributes = 0;
3221 
3222 	ret = socket_send(sock, event, task, address, pktinfo, flags);
3223 	UNLOCK(&sock->lock);
3224 	return (ret);
3225 }
3226 
3227 isc_result_t
isc__socket_bind(isc_socket_t * sock,isc_sockaddr_t * sockaddr,unsigned int options)3228 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3229 		 unsigned int options) {
3230 	int bind_errno;
3231 	char strbuf[ISC_STRERRORSIZE];
3232 	int on = 1;
3233 
3234 	REQUIRE(VALID_SOCKET(sock));
3235 	LOCK(&sock->lock);
3236 	CONSISTENT(sock);
3237 
3238 	/*
3239 	 * make sure that the socket's not closed
3240 	 */
3241 	if (sock->fd == INVALID_SOCKET) {
3242 		UNLOCK(&sock->lock);
3243 		return (ISC_R_CONNREFUSED);
3244 	}
3245 
3246 	INSIST(!sock->bound);
3247 	INSIST(!sock->dupped);
3248 
3249 	if (sock->pf != sockaddr->type.sa.sa_family) {
3250 		UNLOCK(&sock->lock);
3251 		return (ISC_R_FAMILYMISMATCH);
3252 	}
3253 	/*
3254 	 * Only set SO_REUSEADDR when we want a specific port.
3255 	 */
3256 	if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3257 	    isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3258 	    setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3259 		       sizeof(on)) < 0) {
3260 		UNEXPECTED_ERROR(__FILE__, __LINE__,
3261 				 "setsockopt(%d) %s", sock->fd,
3262 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3263 						ISC_MSG_FAILED, "failed"));
3264 		/* Press on... */
3265 	}
3266 	if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3267 		bind_errno = WSAGetLastError();
3268 		UNLOCK(&sock->lock);
3269 		switch (bind_errno) {
3270 		case WSAEACCES:
3271 			return (ISC_R_NOPERM);
3272 		case WSAEADDRNOTAVAIL:
3273 			return (ISC_R_ADDRNOTAVAIL);
3274 		case WSAEADDRINUSE:
3275 			return (ISC_R_ADDRINUSE);
3276 		case WSAEINVAL:
3277 			return (ISC_R_BOUND);
3278 		default:
3279 			isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3280 			UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3281 					 strbuf);
3282 			return (ISC_R_UNEXPECTED);
3283 		}
3284 	}
3285 
3286 	socket_log(__LINE__, sock, sockaddr, TRACE,
3287 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3288 	sock->bound = 1;
3289 
3290 	UNLOCK(&sock->lock);
3291 	return (ISC_R_SUCCESS);
3292 }
3293 
3294 isc_result_t
isc__socket_filter(isc_socket_t * sock,const char * filter)3295 isc__socket_filter(isc_socket_t *sock, const char *filter) {
3296 	UNUSED(sock);
3297 	UNUSED(filter);
3298 
3299 	REQUIRE(VALID_SOCKET(sock));
3300 	return (ISC_R_NOTIMPLEMENTED);
3301 }
3302 
3303 /*
3304  * Set up to listen on a given socket.  We do this by creating an internal
3305  * event that will be dispatched when the socket has read activity.  The
3306  * watcher will send the internal event to the task when there is a new
3307  * connection.
3308  *
3309  * Unlike in read, we don't preallocate a done event here.  Every time there
3310  * is a new connection we'll have to allocate a new one anyway, so we might
3311  * as well keep things simple rather than having to track them.
3312  */
3313 isc_result_t
isc__socket_listen(isc_socket_t * sock,unsigned int backlog)3314 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3315 	char strbuf[ISC_STRERRORSIZE];
3316 
3317 	REQUIRE(VALID_SOCKET(sock));
3318 
3319 	LOCK(&sock->lock);
3320 	CONSISTENT(sock);
3321 
3322 	/*
3323 	 * make sure that the socket's not closed
3324 	 */
3325 	if (sock->fd == INVALID_SOCKET) {
3326 		UNLOCK(&sock->lock);
3327 		return (ISC_R_CONNREFUSED);
3328 	}
3329 
3330 	REQUIRE(!sock->listener);
3331 	REQUIRE(sock->bound);
3332 	REQUIRE(sock->type == isc_sockettype_tcp);
3333 
3334 	if (backlog == 0)
3335 		backlog = SOMAXCONN;
3336 
3337 	if (listen(sock->fd, (int)backlog) < 0) {
3338 		UNLOCK(&sock->lock);
3339 		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3340 
3341 		UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3342 
3343 		return (ISC_R_UNEXPECTED);
3344 	}
3345 
3346 	socket_log(__LINE__, sock, NULL, TRACE,
3347 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3348 	sock->listener = 1;
3349 	_set_state(sock, SOCK_LISTEN);
3350 
3351 	UNLOCK(&sock->lock);
3352 	return (ISC_R_SUCCESS);
3353 }
3354 
3355 /*
3356  * This should try to do aggressive accept() XXXMLG
3357  */
3358 isc_result_t
isc__socket_accept(isc_socket_t * sock,isc_task_t * task,isc_taskaction_t action,void * arg)3359 isc__socket_accept(isc_socket_t *sock,
3360 		   isc_task_t *task, isc_taskaction_t action, void *arg)
3361 {
3362 	isc_socket_newconnev_t *adev;
3363 	isc_socketmgr_t *manager;
3364 	isc_task_t *ntask = NULL;
3365 	isc_socket_t *nsock;
3366 	isc_result_t result;
3367 	IoCompletionInfo *lpo;
3368 
3369 	REQUIRE(VALID_SOCKET(sock));
3370 
3371 	manager = sock->manager;
3372 	REQUIRE(VALID_MANAGER(manager));
3373 
3374 	LOCK(&sock->lock);
3375 	CONSISTENT(sock);
3376 
3377 	/*
3378 	 * make sure that the socket's not closed
3379 	 */
3380 	if (sock->fd == INVALID_SOCKET) {
3381 		UNLOCK(&sock->lock);
3382 		return (ISC_R_CONNREFUSED);
3383 	}
3384 
3385 	REQUIRE(sock->listener);
3386 
3387 	/*
3388 	 * Sender field is overloaded here with the task we will be sending
3389 	 * this event to.  Just before the actual event is delivered the
3390 	 * actual ev_sender will be touched up to be the socket.
3391 	 */
3392 	adev = (isc_socket_newconnev_t *)
3393 		isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3394 				   action, arg, sizeof(*adev));
3395 	if (adev == NULL) {
3396 		UNLOCK(&sock->lock);
3397 		return (ISC_R_NOMEMORY);
3398 	}
3399 	ISC_LINK_INIT(adev, ev_link);
3400 
3401 	result = allocate_socket(manager, sock->type, &nsock);
3402 	if (result != ISC_R_SUCCESS) {
3403 		isc_event_free((isc_event_t **)&adev);
3404 		UNLOCK(&sock->lock);
3405 		return (result);
3406 	}
3407 
3408 	/*
3409 	 * AcceptEx() requires we pass in a socket.
3410 	 */
3411 	nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3412 	if (nsock->fd == INVALID_SOCKET) {
3413 		free_socket(&nsock, __LINE__);
3414 		isc_event_free((isc_event_t **)&adev);
3415 		UNLOCK(&sock->lock);
3416 		return (ISC_R_FAILURE); // XXXMLG need real error message
3417 	}
3418 
3419 	/*
3420 	 * Attach to socket and to task.
3421 	 */
3422 	isc_task_attach(task, &ntask);
3423 	if (isc_task_exiting(ntask)) {
3424 		free_socket(&nsock, __LINE__);
3425 		isc_task_detach(&ntask);
3426 		isc_event_free(ISC_EVENT_PTR(&adev));
3427 		UNLOCK(&sock->lock);
3428 		return (ISC_R_SHUTTINGDOWN);
3429 	}
3430 	nsock->references++;
3431 
3432 	adev->ev_sender = ntask;
3433 	adev->newsocket = nsock;
3434 	_set_state(nsock, SOCK_ACCEPT);
3435 
3436 	/*
3437 	 * Queue io completion for an accept().
3438 	 */
3439 	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3440 					    HEAP_ZERO_MEMORY,
3441 					    sizeof(IoCompletionInfo));
3442 	RUNTIME_CHECK(lpo != NULL);
3443 	lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3444 		(sizeof(SOCKADDR_STORAGE) + 16) * 2);
3445 	RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3446 
3447 	lpo->adev = adev;
3448 	lpo->request_type = SOCKET_ACCEPT;
3449 
3450 	ISCAcceptEx(sock->fd,
3451 		    nsock->fd,				/* Accepted Socket */
3452 		    lpo->acceptbuffer,			/* Buffer for initial Recv */
3453 		    0,					/* Length of Buffer */
3454 		    sizeof(SOCKADDR_STORAGE) + 16,		/* Local address length + 16 */
3455 		    sizeof(SOCKADDR_STORAGE) + 16,		/* Remote address lengh + 16 */
3456 		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
3457 		    (LPOVERLAPPED)lpo			/* Overlapped structure */
3458 		    );
3459 	iocompletionport_update(nsock);
3460 
3461 	socket_log(__LINE__, sock, NULL, TRACE,
3462 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3463 		   "accepting for nsock %p fd %d", nsock, nsock->fd);
3464 
3465 	/*
3466 	 * Enqueue the event
3467 	 */
3468 	ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3469 	sock->pending_accept++;
3470 	sock->pending_iocp++;
3471 
3472 	UNLOCK(&sock->lock);
3473 	return (ISC_R_SUCCESS);
3474 }
3475 
3476 isc_result_t
isc__socket_connect(isc_socket_t * sock,isc_sockaddr_t * addr,isc_task_t * task,isc_taskaction_t action,void * arg)3477 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3478 		    isc_task_t *task, isc_taskaction_t action, void *arg)
3479 {
3480 	char strbuf[ISC_STRERRORSIZE];
3481 	isc_socket_connev_t *cdev;
3482 	isc_task_t *ntask = NULL;
3483 	isc_socketmgr_t *manager;
3484 	IoCompletionInfo *lpo;
3485 	int bind_errno;
3486 
3487 	REQUIRE(VALID_SOCKET(sock));
3488 	REQUIRE(addr != NULL);
3489 	REQUIRE(task != NULL);
3490 	REQUIRE(action != NULL);
3491 
3492 	manager = sock->manager;
3493 	REQUIRE(VALID_MANAGER(manager));
3494 	REQUIRE(addr != NULL);
3495 
3496 	if (isc_sockaddr_ismulticast(addr))
3497 		return (ISC_R_MULTICAST);
3498 
3499 	LOCK(&sock->lock);
3500 	CONSISTENT(sock);
3501 
3502 	/*
3503 	 * make sure that the socket's not closed
3504 	 */
3505 	if (sock->fd == INVALID_SOCKET) {
3506 		UNLOCK(&sock->lock);
3507 		return (ISC_R_CONNREFUSED);
3508 	}
3509 
3510 	/*
3511 	 * Windows sockets won't connect unless the socket is bound.
3512 	 */
3513 	if (!sock->bound) {
3514 		isc_sockaddr_t any;
3515 
3516 		isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3517 		if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3518 			bind_errno = WSAGetLastError();
3519 			UNLOCK(&sock->lock);
3520 			switch (bind_errno) {
3521 			case WSAEACCES:
3522 				return (ISC_R_NOPERM);
3523 			case WSAEADDRNOTAVAIL:
3524 				return (ISC_R_ADDRNOTAVAIL);
3525 			case WSAEADDRINUSE:
3526 				return (ISC_R_ADDRINUSE);
3527 			case WSAEINVAL:
3528 				return (ISC_R_BOUND);
3529 			default:
3530 				isc__strerror(bind_errno, strbuf,
3531 					      sizeof(strbuf));
3532 				UNEXPECTED_ERROR(__FILE__, __LINE__,
3533 						 "bind: %s", strbuf);
3534 				return (ISC_R_UNEXPECTED);
3535 			}
3536 		}
3537 		sock->bound = 1;
3538 	}
3539 
3540 	REQUIRE(!sock->pending_connect);
3541 
3542 	cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3543 							ISC_SOCKEVENT_CONNECT,
3544 							action,	arg,
3545 							sizeof(*cdev));
3546 	if (cdev == NULL) {
3547 		UNLOCK(&sock->lock);
3548 		return (ISC_R_NOMEMORY);
3549 	}
3550 	ISC_LINK_INIT(cdev, ev_link);
3551 
3552 	if (sock->type == isc_sockettype_tcp) {
3553 		/*
3554 		 * Queue io completion for an accept().
3555 		 */
3556 		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3557 						    HEAP_ZERO_MEMORY,
3558 						    sizeof(IoCompletionInfo));
3559 		lpo->cdev = cdev;
3560 		lpo->request_type = SOCKET_CONNECT;
3561 
3562 		sock->address = *addr;
3563 		ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3564 			NULL, 0, NULL, (LPOVERLAPPED)lpo);
3565 
3566 		/*
3567 		 * Attach to task.
3568 		 */
3569 		isc_task_attach(task, &ntask);
3570 		cdev->ev_sender = ntask;
3571 
3572 		sock->pending_connect = 1;
3573 		_set_state(sock, SOCK_CONNECT);
3574 
3575 		/*
3576 		 * Enqueue the request.
3577 		 */
3578 		sock->connect_ev = cdev;
3579 		sock->pending_iocp++;
3580 	} else {
3581 		WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3582 		cdev->result = ISC_R_SUCCESS;
3583 		isc_task_send(task, (isc_event_t **)&cdev);
3584 	}
3585 	CONSISTENT(sock);
3586 	UNLOCK(&sock->lock);
3587 
3588 	return (ISC_R_SUCCESS);
3589 }
3590 
3591 isc_result_t
isc__socket_getpeername(isc_socket_t * sock,isc_sockaddr_t * addressp)3592 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3593 	isc_result_t result;
3594 
3595 	REQUIRE(VALID_SOCKET(sock));
3596 	REQUIRE(addressp != NULL);
3597 
3598 	LOCK(&sock->lock);
3599 	CONSISTENT(sock);
3600 
3601 	/*
3602 	 * make sure that the socket's not closed
3603 	 */
3604 	if (sock->fd == INVALID_SOCKET) {
3605 		UNLOCK(&sock->lock);
3606 		return (ISC_R_CONNREFUSED);
3607 	}
3608 
3609 	if (sock->connected) {
3610 		*addressp = sock->address;
3611 		result = ISC_R_SUCCESS;
3612 	} else {
3613 		result = ISC_R_NOTCONNECTED;
3614 	}
3615 
3616 	UNLOCK(&sock->lock);
3617 
3618 	return (result);
3619 }
3620 
3621 isc_result_t
isc__socket_getsockname(isc_socket_t * sock,isc_sockaddr_t * addressp)3622 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3623 	ISC_SOCKADDR_LEN_T len;
3624 	isc_result_t result;
3625 	char strbuf[ISC_STRERRORSIZE];
3626 
3627 	REQUIRE(VALID_SOCKET(sock));
3628 	REQUIRE(addressp != NULL);
3629 
3630 	LOCK(&sock->lock);
3631 	CONSISTENT(sock);
3632 
3633 	/*
3634 	 * make sure that the socket's not closed
3635 	 */
3636 	if (sock->fd == INVALID_SOCKET) {
3637 		UNLOCK(&sock->lock);
3638 		return (ISC_R_CONNREFUSED);
3639 	}
3640 
3641 	if (!sock->bound) {
3642 		result = ISC_R_NOTBOUND;
3643 		goto out;
3644 	}
3645 
3646 	result = ISC_R_SUCCESS;
3647 
3648 	len = sizeof(addressp->type);
3649 	if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3650 		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3651 		UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3652 				 strbuf);
3653 		result = ISC_R_UNEXPECTED;
3654 		goto out;
3655 	}
3656 	addressp->length = (unsigned int)len;
3657 
3658  out:
3659 	UNLOCK(&sock->lock);
3660 
3661 	return (result);
3662 }
3663 
3664 /*
3665  * Run through the list of events on this socket, and cancel the ones
3666  * queued for task "task" of type "how".  "how" is a bitmask.
3667  */
3668 void
isc__socket_cancel(isc_socket_t * sock,isc_task_t * task,unsigned int how)3669 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3670 
3671 	REQUIRE(VALID_SOCKET(sock));
3672 
3673 	/*
3674 	 * Quick exit if there is nothing to do.  Don't even bother locking
3675 	 * in this case.
3676 	 */
3677 	if (how == 0)
3678 		return;
3679 
3680 	LOCK(&sock->lock);
3681 	CONSISTENT(sock);
3682 
3683 	/*
3684 	 * make sure that the socket's not closed
3685 	 */
3686 	if (sock->fd == INVALID_SOCKET) {
3687 		UNLOCK(&sock->lock);
3688 		return;
3689 	}
3690 
3691 	/*
3692 	 * All of these do the same thing, more or less.
3693 	 * Each will:
3694 	 *	o If the internal event is marked as "posted" try to
3695 	 *	  remove it from the task's queue.  If this fails, mark it
3696 	 *	  as canceled instead, and let the task clean it up later.
3697 	 *	o For each I/O request for that task of that type, post
3698 	 *	  its done event with status of "ISC_R_CANCELED".
3699 	 *	o Reset any state needed.
3700 	 */
3701 
3702 	if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3703 		isc_socketevent_t      *dev;
3704 		isc_socketevent_t      *next;
3705 		isc_task_t	       *current_task;
3706 
3707 		dev = ISC_LIST_HEAD(sock->recv_list);
3708 		while (dev != NULL) {
3709 			current_task = dev->ev_sender;
3710 			next = ISC_LIST_NEXT(dev, ev_link);
3711 			if ((task == NULL) || (task == current_task)) {
3712 				dev->result = ISC_R_CANCELED;
3713 				send_recvdone_event(sock, &dev);
3714 			}
3715 			dev = next;
3716 		}
3717 	}
3718 	how &= ~ISC_SOCKCANCEL_RECV;
3719 
3720 	if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3721 		isc_socketevent_t      *dev;
3722 		isc_socketevent_t      *next;
3723 		isc_task_t	       *current_task;
3724 
3725 		dev = ISC_LIST_HEAD(sock->send_list);
3726 
3727 		while (dev != NULL) {
3728 			current_task = dev->ev_sender;
3729 			next = ISC_LIST_NEXT(dev, ev_link);
3730 			if ((task == NULL) || (task == current_task)) {
3731 				dev->result = ISC_R_CANCELED;
3732 				send_senddone_event(sock, &dev);
3733 			}
3734 			dev = next;
3735 		}
3736 	}
3737 	how &= ~ISC_SOCKCANCEL_SEND;
3738 
3739 	if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3740 	    && !ISC_LIST_EMPTY(sock->accept_list)) {
3741 		isc_socket_newconnev_t *dev;
3742 		isc_socket_newconnev_t *next;
3743 		isc_task_t	       *current_task;
3744 
3745 		dev = ISC_LIST_HEAD(sock->accept_list);
3746 		while (dev != NULL) {
3747 			current_task = dev->ev_sender;
3748 			next = ISC_LIST_NEXT(dev, ev_link);
3749 
3750 			if ((task == NULL) || (task == current_task)) {
3751 
3752 				dev->newsocket->references--;
3753 				closesocket(dev->newsocket->fd);
3754 				dev->newsocket->fd = INVALID_SOCKET;
3755 				free_socket(&dev->newsocket, __LINE__);
3756 
3757 				dev->result = ISC_R_CANCELED;
3758 				send_acceptdone_event(sock, &dev);
3759 			}
3760 
3761 			dev = next;
3762 		}
3763 	}
3764 	how &= ~ISC_SOCKCANCEL_ACCEPT;
3765 
3766 	/*
3767 	 * Connecting is not a list.
3768 	 */
3769 	if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3770 	    && sock->connect_ev != NULL) {
3771 		isc_socket_connev_t    *dev;
3772 		isc_task_t	       *current_task;
3773 
3774 		INSIST(sock->pending_connect);
3775 
3776 		dev = sock->connect_ev;
3777 		current_task = dev->ev_sender;
3778 
3779 		if ((task == NULL) || (task == current_task)) {
3780 			closesocket(sock->fd);
3781 			sock->fd = INVALID_SOCKET;
3782 			_set_state(sock, SOCK_CLOSED);
3783 
3784 			sock->connect_ev = NULL;
3785 			dev->result = ISC_R_CANCELED;
3786 			send_connectdone_event(sock, &dev);
3787 		}
3788 	}
3789 	how &= ~ISC_SOCKCANCEL_CONNECT;
3790 
3791 	maybe_free_socket(&sock, __LINE__);
3792 }
3793 
3794 isc_sockettype_t
isc__socket_gettype(isc_socket_t * sock)3795 isc__socket_gettype(isc_socket_t *sock) {
3796 	isc_sockettype_t type;
3797 
3798 	REQUIRE(VALID_SOCKET(sock));
3799 
3800 	LOCK(&sock->lock);
3801 
3802 	/*
3803 	 * make sure that the socket's not closed
3804 	 */
3805 	if (sock->fd == INVALID_SOCKET) {
3806 		UNLOCK(&sock->lock);
3807 		return (ISC_R_CONNREFUSED);
3808 	}
3809 
3810 	type = sock->type;
3811 	UNLOCK(&sock->lock);
3812 	return (type);
3813 }
3814 
3815 isc_boolean_t
isc__socket_isbound(isc_socket_t * sock)3816 isc__socket_isbound(isc_socket_t *sock) {
3817 	isc_boolean_t val;
3818 
3819 	REQUIRE(VALID_SOCKET(sock));
3820 
3821 	LOCK(&sock->lock);
3822 	CONSISTENT(sock);
3823 
3824 	/*
3825 	 * make sure that the socket's not closed
3826 	 */
3827 	if (sock->fd == INVALID_SOCKET) {
3828 		UNLOCK(&sock->lock);
3829 		return (ISC_FALSE);
3830 	}
3831 
3832 	val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3833 	UNLOCK(&sock->lock);
3834 
3835 	return (val);
3836 }
3837 
3838 void
isc__socket_ipv6only(isc_socket_t * sock,isc_boolean_t yes)3839 isc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3840 #if defined(IPV6_V6ONLY)
3841 	int onoff = yes ? 1 : 0;
3842 #else
3843 	UNUSED(yes);
3844 #endif
3845 
3846 	REQUIRE(VALID_SOCKET(sock));
3847 
3848 #ifdef IPV6_V6ONLY
3849 	if (sock->pf == AF_INET6) {
3850 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3851 				 (char *)&onoff, sizeof(onoff));
3852 	}
3853 #endif
3854 }
3855 
3856 void
isc__socket_dscp(isc_socket_t * sock,isc_dscp_t dscp)3857 isc__socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) {
3858 #if !defined(IP_TOS) && !defined(IPV6_TCLASS)
3859 	UNUSED(dscp);
3860 #else
3861 	if (dscp < 0)
3862 		return;
3863 
3864 	dscp <<= 2;
3865 	dscp &= 0xff;
3866 #endif
3867 
3868 	REQUIRE(VALID_SOCKET(sock));
3869 
3870 #ifdef IP_TOS
3871 	if (sock->pf == AF_INET) {
3872 		(void)setsockopt(sock->fd, IPPROTO_IP, IP_TOS,
3873 				 (char *)&dscp, sizeof(dscp));
3874 	}
3875 #endif
3876 #ifdef IPV6_TCLASS
3877 	if (sock->pf == AF_INET6) {
3878 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS,
3879 				 (char *)&dscp, sizeof(dscp));
3880 	}
3881 #endif
3882 }
3883 
3884 void
isc__socket_cleanunix(isc_sockaddr_t * addr,isc_boolean_t active)3885 isc__socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3886 	UNUSED(addr);
3887 	UNUSED(active);
3888 }
3889 
3890 isc_result_t
isc__socket_permunix(isc_sockaddr_t * addr,isc_uint32_t perm,isc_uint32_t owner,isc_uint32_t group)3891 isc__socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3892 		     isc_uint32_t owner,	isc_uint32_t group)
3893 {
3894 	UNUSED(addr);
3895 	UNUSED(perm);
3896 	UNUSED(owner);
3897 	UNUSED(group);
3898 	return (ISC_R_NOTIMPLEMENTED);
3899 }
3900 
3901 void
isc__socket_setname(isc_socket_t * socket,const char * name,void * tag)3902 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3903 
3904 	/*
3905 	 * Name 'socket'.
3906 	 */
3907 
3908 	REQUIRE(VALID_SOCKET(socket));
3909 
3910 	LOCK(&socket->lock);
3911 	memset(socket->name, 0, sizeof(socket->name));
3912 	strncpy(socket->name, name, sizeof(socket->name) - 1);
3913 	socket->tag = tag;
3914 	UNLOCK(&socket->lock);
3915 }
3916 
3917 const char *
isc__socket_getname(isc_socket_t * socket)3918 isc__socket_getname(isc_socket_t *socket) {
3919 	return (socket->name);
3920 }
3921 
3922 void *
isc__socket_gettag(isc_socket_t * socket)3923 isc__socket_gettag(isc_socket_t *socket) {
3924 	return (socket->tag);
3925 }
3926 
3927 int
isc__socket_getfd(isc_socket_t * socket)3928 isc__socket_getfd(isc_socket_t *socket) {
3929 	return ((short) socket->fd);
3930 }
3931 
3932 void
isc__socketmgr_setreserved(isc_socketmgr_t * manager,isc_uint32_t reserved)3933 isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3934 	UNUSED(manager);
3935 	UNUSED(reserved);
3936 }
3937 
3938 void
isc___socketmgr_maxudp(isc_socketmgr_t * manager,int maxudp)3939 isc___socketmgr_maxudp(isc_socketmgr_t *manager, int maxudp) {
3940 
3941 	UNUSED(manager);
3942 	UNUSED(maxudp);
3943 }
3944 
3945 isc_socketevent_t *
isc_socket_socketevent(isc_mem_t * mctx,void * sender,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)3946 isc_socket_socketevent(isc_mem_t *mctx, void *sender,
3947 		       isc_eventtype_t eventtype, isc_taskaction_t action,
3948 		       void *arg)
3949 {
3950 	return (allocate_socketevent(mctx, sender, eventtype, action, arg));
3951 }
3952 
3953 #ifdef HAVE_LIBXML2
3954 
3955 static const char *
_socktype(isc_sockettype_t type)3956 _socktype(isc_sockettype_t type) {
3957 	if (type == isc_sockettype_udp)
3958 		return ("udp");
3959 	else if (type == isc_sockettype_tcp)
3960 		return ("tcp");
3961 	else if (type == isc_sockettype_unix)
3962 		return ("unix");
3963 	else if (type == isc_sockettype_fdwatch)
3964 		return ("fdwatch");
3965 	else
3966 		return ("not-initialized");
3967 }
3968 
3969 #define TRY0(a) do { xmlrc = (a); if (xmlrc < 0) goto error; } while(/*CONSTCOND*/0)
3970 int
isc_socketmgr_renderxml(isc_socketmgr_t * mgr,xmlTextWriterPtr writer)3971 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
3972 {
3973 	isc_socket_t *sock = NULL;
3974 	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3975 	isc_sockaddr_t addr;
3976 	ISC_SOCKADDR_LEN_T len;
3977 	int xmlrc;
3978 
3979 	LOCK(&mgr->lock);
3980 
3981 #ifndef ISC_PLATFORM_USETHREADS
3982 	TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "references"));
3983 	TRY0(xmlTextWriterWriteFormatString(writer, "%d", mgr->refs));
3984 	TRY0(xmlTextWriterEndElement(writer));
3985 #endif
3986 
3987 	TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets"));
3988 	sock = ISC_LIST_HEAD(mgr->socklist);
3989 	while (sock != NULL) {
3990 		LOCK(&sock->lock);
3991 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket"));
3992 
3993 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id"));
3994 		TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock));
3995 		TRY0(xmlTextWriterEndElement(writer));
3996 
3997 		if (sock->name[0] != 0) {
3998 			TRY0(xmlTextWriterStartElement(writer,
3999 						       ISC_XMLCHAR "name"));
4000 			TRY0(xmlTextWriterWriteFormatString(writer, "%s",
4001 							    sock->name));
4002 			TRY0(xmlTextWriterEndElement(writer)); /* name */
4003 		}
4004 
4005 		TRY0(xmlTextWriterStartElement(writer,
4006 					       ISC_XMLCHAR "references"));
4007 		TRY0(xmlTextWriterWriteFormatString(writer, "%d",
4008 						    sock->references));
4009 		TRY0(xmlTextWriterEndElement(writer));
4010 
4011 		TRY0(xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
4012 					  ISC_XMLCHAR _socktype(sock->type)));
4013 
4014 		if (sock->connected) {
4015 			isc_sockaddr_format(&sock->address, peerbuf,
4016 					    sizeof(peerbuf));
4017 			TRY0(xmlTextWriterWriteElement(writer,
4018 						  ISC_XMLCHAR "peer-address",
4019 						  ISC_XMLCHAR peerbuf));
4020 		}
4021 
4022 		len = sizeof(addr);
4023 		if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
4024 			isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
4025 			TRY0(xmlTextWriterWriteElement(writer,
4026 						  ISC_XMLCHAR "local-address",
4027 						  ISC_XMLCHAR peerbuf));
4028 		}
4029 
4030 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states"));
4031 		if (sock->pending_recv)
4032 			TRY0(xmlTextWriterWriteElement(writer,
4033 						ISC_XMLCHAR "state",
4034 						ISC_XMLCHAR "pending-receive"));
4035 		if (sock->pending_send)
4036 			TRY0(xmlTextWriterWriteElement(writer,
4037 						  ISC_XMLCHAR "state",
4038 						  ISC_XMLCHAR "pending-send"));
4039 		if (sock->pending_accept)
4040 			TRY0(xmlTextWriterWriteElement(writer,
4041 						 ISC_XMLCHAR "state",
4042 						 ISC_XMLCHAR "pending_accept"));
4043 		if (sock->listener)
4044 			TRY0(xmlTextWriterWriteElement(writer,
4045 						       ISC_XMLCHAR "state",
4046 						       ISC_XMLCHAR "listener"));
4047 		if (sock->connected)
4048 			TRY0(xmlTextWriterWriteElement(writer,
4049 						     ISC_XMLCHAR "state",
4050 						     ISC_XMLCHAR "connected"));
4051 		if (sock->pending_connect)
4052 			TRY0(xmlTextWriterWriteElement(writer,
4053 						  ISC_XMLCHAR "state",
4054 						  ISC_XMLCHAR "connecting"));
4055 		if (sock->bound)
4056 			TRY0(xmlTextWriterWriteElement(writer,
4057 						  ISC_XMLCHAR "state",
4058 						  ISC_XMLCHAR "bound"));
4059 
4060 		TRY0(xmlTextWriterEndElement(writer)); /* states */
4061 
4062 		TRY0(xmlTextWriterEndElement(writer)); /* socket */
4063 
4064 		UNLOCK(&sock->lock);
4065 		sock = ISC_LIST_NEXT(sock, link);
4066 	}
4067 	TRY0(xmlTextWriterEndElement(writer)); /* sockets */
4068 
4069 error:
4070 	if (sock != NULL)
4071 		UNLOCK(&sock->lock);
4072 
4073 	UNLOCK(&mgr->lock);
4074 
4075 	return (xmlrc);
4076 }
4077 #endif /* HAVE_LIBXML2 */
4078 
4079 /*
4080  * Replace ../socket_api.c
4081  */
4082 
4083 isc_result_t
isc__socket_register(void)4084 isc__socket_register(void) {
4085 	return (ISC_R_SUCCESS);
4086 }
4087 
4088 isc_result_t
isc_socketmgr_createinctx(isc_mem_t * mctx,isc_appctx_t * actx,isc_socketmgr_t ** managerp)4089 isc_socketmgr_createinctx(isc_mem_t *mctx, isc_appctx_t *actx,
4090 			  isc_socketmgr_t **managerp)
4091 {
4092 	isc_result_t result;
4093 
4094 	result = isc_socketmgr_create(mctx, managerp);
4095 
4096 	if (result == ISC_R_SUCCESS)
4097 		isc_appctx_setsocketmgr(actx, *managerp);
4098 
4099 	return (result);
4100 }
4101