1 /* $NetBSD: socket.c,v 1.10 2015/07/08 17:29:00 christos Exp $ */ 2 3 /* 4 * Copyright (C) 2004-2015 Internet Systems Consortium, Inc. ("ISC") 5 * Copyright (C) 2000-2003 Internet Software Consortium. 6 * 7 * Permission to use, copy, modify, and/or distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 * PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 /* Id */ 21 22 /* This code uses functions which are only available on Server 2003 and 23 * higher, and Windows XP and higher. 24 * 25 * This code is by nature multithreaded and takes advantage of various 26 * features to pass on information through the completion port for 27 * when I/O is completed. All sends, receives, accepts, and connects are 28 * completed through the completion port. 29 * 30 * The number of Completion Port Worker threads used is the total number 31 * of CPU's + 1. This increases the likelihood that a Worker Thread is 32 * available for processing a completed request. 33 * 34 * XXXPDM 5 August, 2002 35 */ 36 37 #define MAKE_EXTERNAL 1 38 #include <config.h> 39 40 #include <sys/types.h> 41 42 #ifndef _WINSOCKAPI_ 43 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */ 44 #endif 45 46 #include <errno.h> 47 #include <stddef.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <io.h> 52 #include <fcntl.h> 53 #include <process.h> 54 55 #include <isc/app.h> 56 #include <isc/buffer.h> 57 #include <isc/bufferlist.h> 58 #include <isc/condition.h> 59 #include <isc/list.h> 60 #include <isc/log.h> 61 #include <isc/mem.h> 62 #include <isc/msgs.h> 63 #include <isc/mutex.h> 64 #include <isc/net.h> 65 #include <isc/once.h> 66 #include <isc/os.h> 67 #include <isc/platform.h> 68 #include <isc/print.h> 69 #include <isc/region.h> 70 #include <isc/socket.h> 71 #include <isc/stats.h> 72 #include <isc/strerror.h> 73 #include <isc/syslog.h> 74 #include <isc/task.h> 75 #include <isc/thread.h> 76 #include <isc/util.h> 77 #include <isc/win32os.h> 78 79 #include <mswsock.h> 80 81 #include "errno2result.h" 82 83 /* 84 * Set by the -T dscp option on the command line. If set to a value 85 * other than -1, we check to make sure DSCP values match it, and 86 * assert if not. 87 */ 88 int isc_dscp_check_value = -1; 89 90 /* 91 * How in the world can Microsoft exist with APIs like this? 92 * We can't actually call this directly, because it turns out 93 * no library exports this function. Instead, we need to 94 * issue a runtime call to get the address. 95 */ 96 LPFN_CONNECTEX ISCConnectEx; 97 LPFN_ACCEPTEX ISCAcceptEx; 98 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs; 99 100 /* 101 * Run expensive internal consistency checks. 102 */ 103 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS 104 #define CONSISTENT(sock) consistent(sock) 105 #else 106 #define CONSISTENT(sock) do {} while (/*CONSTCOND*/0) 107 #endif 108 static void consistent(isc_socket_t *sock); 109 110 /* 111 * Define this macro to control the behavior of connection 112 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823 113 * for details. 114 * NOTE: This requires that Windows 2000 systems install Service Pack 2 115 * or later. 116 */ 117 #ifndef SIO_UDP_CONNRESET 118 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12) 119 #endif 120 121 /* 122 * Some systems define the socket length argument as an int, some as size_t, 123 * some as socklen_t. This is here so it can be easily changed if needed. 124 */ 125 #ifndef ISC_SOCKADDR_LEN_T 126 #define ISC_SOCKADDR_LEN_T unsigned int 127 #endif 128 129 /* 130 * Define what the possible "soft" errors can be. These are non-fatal returns 131 * of various network related functions, like recv() and so on. 132 */ 133 #define SOFT_ERROR(e) ((e) == WSAEINTR || \ 134 (e) == WSAEWOULDBLOCK || \ 135 (e) == EWOULDBLOCK || \ 136 (e) == EINTR || \ 137 (e) == EAGAIN || \ 138 (e) == 0) 139 140 /* 141 * Pending errors are not really errors and should be 142 * kept separate 143 */ 144 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0) 145 146 #define DOIO_SUCCESS 0 /* i/o ok, event sent */ 147 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 148 #define DOIO_HARD 2 /* i/o error, event sent */ 149 #define DOIO_EOF 3 /* EOF, no event sent */ 150 #define DOIO_PENDING 4 /* status when i/o is in process */ 151 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */ 152 153 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 154 155 /* 156 * DLVL(90) -- Function entry/exit and other tracing. 157 * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 158 * DLVL(60) -- Socket data send/receive 159 * DLVL(50) -- Event tracing, including receiving/sending completion events. 160 * DLVL(20) -- Socket creation/destruction. 161 */ 162 #define TRACE_LEVEL 90 163 #define CORRECTNESS_LEVEL 70 164 #define IOEVENT_LEVEL 60 165 #define EVENT_LEVEL 50 166 #define CREATION_LEVEL 20 167 168 #define TRACE DLVL(TRACE_LEVEL) 169 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 170 #define IOEVENT DLVL(IOEVENT_LEVEL) 171 #define EVENT DLVL(EVENT_LEVEL) 172 #define CREATION DLVL(CREATION_LEVEL) 173 174 typedef isc_event_t intev_t; 175 176 /* 177 * Socket State 178 */ 179 enum { 180 SOCK_INITIALIZED, /* Socket Initialized */ 181 SOCK_OPEN, /* Socket opened but nothing yet to do */ 182 SOCK_DATA, /* Socket sending or receiving data */ 183 SOCK_LISTEN, /* TCP Socket listening for connects */ 184 SOCK_ACCEPT, /* TCP socket is waiting to accept */ 185 SOCK_CONNECT, /* TCP Socket connecting */ 186 SOCK_CLOSED, /* Socket has been closed */ 187 }; 188 189 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 190 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC) 191 192 /* 193 * IPv6 control information. If the socket is an IPv6 socket we want 194 * to collect the destination address and interface so the client can 195 * set them on outgoing packets. 196 */ 197 #ifdef ISC_PLATFORM_HAVEIPV6 198 #ifndef USE_CMSG 199 #define USE_CMSG 1 200 #endif 201 #endif 202 203 /* 204 * We really don't want to try and use these control messages. Win32 205 * doesn't have this mechanism before XP. 206 */ 207 #undef USE_CMSG 208 209 /* 210 * Message header for recvmsg and sendmsg calls. 211 * Used value-result for recvmsg, value only for sendmsg. 212 */ 213 struct msghdr { 214 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */ 215 int to_addr_len; /* length of the address */ 216 WSABUF *msg_iov; /* scatter/gather array */ 217 u_int msg_iovlen; /* # elements in msg_iov */ 218 void *msg_control; /* ancillary data, see below */ 219 u_int msg_controllen; /* ancillary data buffer len */ 220 u_int msg_totallen; /* total length of this message */ 221 } msghdr; 222 223 /* 224 * The size to raise the receive buffer to. 225 */ 226 #define RCVBUFSIZE (32*1024) 227 228 /* 229 * The number of times a send operation is repeated if the result 230 * is WSAEINTR. 231 */ 232 #define NRETRIES 10 233 234 struct isc_socket { 235 /* Not locked. */ 236 unsigned int magic; 237 isc_socketmgr_t *manager; 238 isc_mutex_t lock; 239 isc_sockettype_t type; 240 241 /* Pointers to scatter/gather buffers */ 242 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER]; 243 244 /* Locked by socket lock. */ 245 ISC_LINK(isc_socket_t) link; 246 unsigned int references; /* EXTERNAL references */ 247 SOCKET fd; /* file handle */ 248 int pf; /* protocol family */ 249 char name[16]; 250 void * tag; 251 252 /* 253 * Each recv() call uses this buffer. It is a per-socket receive 254 * buffer that allows us to decouple the system recv() from the 255 * recv_list done events. This means the items on the recv_list 256 * can be removed without having to cancel pending system recv() 257 * calls. It also allows us to read-ahead in some cases. 258 */ 259 struct { 260 SOCKADDR_STORAGE from_addr; // UDP send/recv address 261 int from_addr_len; // length of the address 262 char *base; // the base of the buffer 263 char *consume_position; // where to start copying data from next 264 unsigned int len; // the actual size of this buffer 265 unsigned int remaining; // the number of bytes remaining 266 } recvbuf; 267 268 ISC_LIST(isc_socketevent_t) send_list; 269 ISC_LIST(isc_socketevent_t) recv_list; 270 ISC_LIST(isc_socket_newconnev_t) accept_list; 271 isc_socket_connev_t *connect_ev; 272 273 isc_sockaddr_t address; /* remote address */ 274 275 unsigned int listener : 1, /* listener socket */ 276 connected : 1, 277 pending_connect : 1, /* connect pending */ 278 bound : 1, /* bound to local addr */ 279 dupped : 1; /* created by isc_socket_dup() */ 280 unsigned int pending_iocp; /* Should equal the counters below. Debug. */ 281 unsigned int pending_recv; /* Number of outstanding recv() calls. */ 282 unsigned int pending_send; /* Number of outstanding send() calls. */ 283 unsigned int pending_accept; /* Number of outstanding accept() calls. */ 284 unsigned int state; /* Socket state. Debugging and consistency checking. */ 285 int state_lineno; /* line which last touched state */ 286 }; 287 288 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (/*CONSTCOND*/0) 289 290 /* 291 * Buffer structure 292 */ 293 typedef struct buflist buflist_t; 294 295 struct buflist { 296 void *buf; 297 unsigned int buflen; 298 ISC_LINK(buflist_t) link; 299 }; 300 301 /* 302 * I/O Completion ports Info structures 303 */ 304 305 static HANDLE hHeapHandle = NULL; 306 typedef struct IoCompletionInfo { 307 OVERLAPPED overlapped; 308 isc_socketevent_t *dev; /* send()/recv() done event */ 309 isc_socket_connev_t *cdev; /* connect() done event */ 310 isc_socket_newconnev_t *adev; /* accept() done event */ 311 void *acceptbuffer; 312 DWORD received_bytes; 313 int request_type; 314 struct msghdr messagehdr; 315 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */ 316 } IoCompletionInfo; 317 318 /* 319 * Define a maximum number of I/O Completion Port worker threads 320 * to handle the load on the Completion Port. The actual number 321 * used is the number of CPU's + 1. 322 */ 323 #define MAX_IOCPTHREADS 20 324 325 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 326 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 327 328 struct isc_socketmgr { 329 /* Not locked. */ 330 unsigned int magic; 331 isc_mem_t *mctx; 332 isc_mutex_t lock; 333 isc_stats_t *stats; 334 335 /* Locked by manager lock. */ 336 ISC_LIST(isc_socket_t) socklist; 337 isc_boolean_t bShutdown; 338 isc_condition_t shutdown_ok; 339 HANDLE hIoCompletionPort; 340 int maxIOCPThreads; 341 HANDLE hIOCPThreads[MAX_IOCPTHREADS]; 342 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS]; 343 344 /* 345 * Debugging. 346 * Modified by InterlockedIncrement() and InterlockedDecrement() 347 */ 348 LONG totalSockets; 349 LONG iocp_total; 350 }; 351 352 enum { 353 SOCKET_RECV, 354 SOCKET_SEND, 355 SOCKET_ACCEPT, 356 SOCKET_CONNECT 357 }; 358 359 /* 360 * send() and recv() iovec counts 361 */ 362 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 363 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 364 365 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf, 366 isc_sockettype_t type, 367 isc_socket_t **socketp, 368 isc_socket_t *dup_socket); 369 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext); 370 static void maybe_free_socket(isc_socket_t **, int); 371 static void free_socket(isc_socket_t **, int); 372 static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev); 373 static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev); 374 static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev); 375 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev); 376 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev); 377 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev); 378 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev); 379 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result); 380 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev); 381 static void queue_receive_request(isc_socket_t *sock); 382 383 /* 384 * This is used to dump the contents of the sock structure 385 * You should make sure that the sock is locked before 386 * dumping it. Since the code uses simple printf() statements 387 * it should only be used interactively. 388 */ 389 void 390 sock_dump(isc_socket_t *sock) { 391 isc_socketevent_t *ldev; 392 isc_socket_newconnev_t *ndev; 393 394 #if 0 395 isc_sockaddr_t addr; 396 char socktext[ISC_SOCKADDR_FORMATSIZE]; 397 isc_result_t result; 398 399 result = isc_socket_getpeername(sock, &addr); 400 if (result == ISC_R_SUCCESS) { 401 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 402 printf("Remote Socket: %s\n", socktext); 403 } 404 result = isc_socket_getsockname(sock, &addr); 405 if (result == ISC_R_SUCCESS) { 406 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 407 printf("This Socket: %s\n", socktext); 408 } 409 #endif 410 411 printf("\n\t\tSock Dump\n"); 412 printf("\t\tfd: %u\n", sock->fd); 413 printf("\t\treferences: %d\n", sock->references); 414 printf("\t\tpending_accept: %d\n", sock->pending_accept); 415 printf("\t\tconnecting: %d\n", sock->pending_connect); 416 printf("\t\tconnected: %d\n", sock->connected); 417 printf("\t\tbound: %d\n", sock->bound); 418 printf("\t\tpending_iocp: %d\n", sock->pending_iocp); 419 printf("\t\tsocket type: %d\n", sock->type); 420 421 printf("\n\t\tSock Recv List\n"); 422 ldev = ISC_LIST_HEAD(sock->recv_list); 423 while (ldev != NULL) { 424 printf("\t\tdev: %p\n", ldev); 425 ldev = ISC_LIST_NEXT(ldev, ev_link); 426 } 427 428 printf("\n\t\tSock Send List\n"); 429 ldev = ISC_LIST_HEAD(sock->send_list); 430 while (ldev != NULL) { 431 printf("\t\tdev: %p\n", ldev); 432 ldev = ISC_LIST_NEXT(ldev, ev_link); 433 } 434 435 printf("\n\t\tSock Accept List\n"); 436 ndev = ISC_LIST_HEAD(sock->accept_list); 437 while (ndev != NULL) { 438 printf("\t\tdev: %p\n", ldev); 439 ndev = ISC_LIST_NEXT(ndev, ev_link); 440 } 441 } 442 443 static void 444 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 445 isc_logcategory_t *category, isc_logmodule_t *module, int level, 446 isc_msgcat_t *msgcat, int msgset, int message, 447 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10); 448 449 /* This function will add an entry to the I/O completion port 450 * that will signal the I/O thread to exit (gracefully) 451 */ 452 static void 453 signal_iocompletionport_exit(isc_socketmgr_t *manager) { 454 int i; 455 int errval; 456 char strbuf[ISC_STRERRORSIZE]; 457 458 REQUIRE(VALID_MANAGER(manager)); 459 for (i = 0; i < manager->maxIOCPThreads; i++) { 460 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort, 461 0, 0, 0)) { 462 errval = GetLastError(); 463 isc__strerror(errval, strbuf, sizeof(strbuf)); 464 FATAL_ERROR(__FILE__, __LINE__, 465 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 466 ISC_MSG_FAILED, 467 "Can't request service thread to exit: %s"), 468 strbuf); 469 } 470 } 471 } 472 473 /* 474 * Create the worker threads for the I/O Completion Port 475 */ 476 void 477 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) { 478 int errval; 479 char strbuf[ISC_STRERRORSIZE]; 480 int i; 481 482 INSIST(total_threads > 0); 483 REQUIRE(VALID_MANAGER(manager)); 484 /* 485 * We need at least one 486 */ 487 for (i = 0; i < total_threads; i++) { 488 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread, 489 manager, 0, 490 &manager->dwIOCPThreadIds[i]); 491 if (manager->hIOCPThreads[i] == NULL) { 492 errval = GetLastError(); 493 isc__strerror(errval, strbuf, sizeof(strbuf)); 494 FATAL_ERROR(__FILE__, __LINE__, 495 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 496 ISC_MSG_FAILED, 497 "Can't create IOCP thread: %s"), 498 strbuf); 499 exit(1); 500 } 501 } 502 } 503 504 /* 505 * Create/initialise the I/O completion port 506 */ 507 void 508 iocompletionport_init(isc_socketmgr_t *manager) { 509 int errval; 510 char strbuf[ISC_STRERRORSIZE]; 511 512 REQUIRE(VALID_MANAGER(manager)); 513 /* 514 * Create a private heap to handle the socket overlapped structure 515 * The minimum number of structures is 10, there is no maximum 516 */ 517 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0); 518 if (hHeapHandle == NULL) { 519 errval = GetLastError(); 520 isc__strerror(errval, strbuf, sizeof(strbuf)); 521 FATAL_ERROR(__FILE__, __LINE__, 522 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 523 ISC_MSG_FAILED, 524 "HeapCreate() failed during " 525 "initialization: %s"), 526 strbuf); 527 exit(1); 528 } 529 530 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS); 531 532 /* Now Create the Completion Port */ 533 manager->hIoCompletionPort = CreateIoCompletionPort( 534 INVALID_HANDLE_VALUE, NULL, 535 0, manager->maxIOCPThreads); 536 if (manager->hIoCompletionPort == NULL) { 537 errval = GetLastError(); 538 isc__strerror(errval, strbuf, sizeof(strbuf)); 539 FATAL_ERROR(__FILE__, __LINE__, 540 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 541 ISC_MSG_FAILED, 542 "CreateIoCompletionPort() failed " 543 "during initialization: %s"), 544 strbuf); 545 exit(1); 546 } 547 548 /* 549 * Worker threads for servicing the I/O 550 */ 551 iocompletionport_createthreads(manager->maxIOCPThreads, manager); 552 } 553 554 /* 555 * Associate a socket with an IO Completion Port. This allows us to queue events for it 556 * and have our worker pool of threads process them. 557 */ 558 void 559 iocompletionport_update(isc_socket_t *sock) { 560 HANDLE hiocp; 561 char strbuf[ISC_STRERRORSIZE]; 562 563 REQUIRE(VALID_SOCKET(sock)); 564 565 hiocp = CreateIoCompletionPort((HANDLE)sock->fd, 566 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0); 567 568 if (hiocp == NULL) { 569 DWORD errval = GetLastError(); 570 isc__strerror(errval, strbuf, sizeof(strbuf)); 571 isc_log_iwrite(isc_lctx, 572 ISC_LOGCATEGORY_GENERAL, 573 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 574 isc_msgcat, ISC_MSGSET_SOCKET, 575 ISC_MSG_TOOMANYHANDLES, 576 "iocompletionport_update: failed to open" 577 " io completion port: %s", 578 strbuf); 579 580 /* XXXMLG temporary hack to make failures detected. 581 * This function should return errors to the caller, not 582 * exit here. 583 */ 584 FATAL_ERROR(__FILE__, __LINE__, 585 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 586 ISC_MSG_FAILED, 587 "CreateIoCompletionPort() failed " 588 "during initialization: %s"), 589 strbuf); 590 exit(1); 591 } 592 593 InterlockedIncrement(&sock->manager->iocp_total); 594 } 595 596 /* 597 * Routine to cleanup and then close the socket. 598 * Only close the socket here if it is NOT associated 599 * with an event, otherwise the WSAWaitForMultipleEvents 600 * may fail due to the fact that the Wait should not 601 * be running while closing an event or a socket. 602 * The socket is locked before calling this function 603 */ 604 void 605 socket_close(isc_socket_t *sock) { 606 607 REQUIRE(sock != NULL); 608 609 if (sock->fd != INVALID_SOCKET) { 610 closesocket(sock->fd); 611 sock->fd = INVALID_SOCKET; 612 _set_state(sock, SOCK_CLOSED); 613 InterlockedDecrement(&sock->manager->totalSockets); 614 } 615 } 616 617 static isc_once_t initialise_once = ISC_ONCE_INIT; 618 static isc_boolean_t initialised = ISC_FALSE; 619 620 static void 621 initialise(void) { 622 WORD wVersionRequested; 623 WSADATA wsaData; 624 int err; 625 SOCKET sock; 626 GUID GUIDConnectEx = WSAID_CONNECTEX; 627 GUID GUIDAcceptEx = WSAID_ACCEPTEX; 628 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS; 629 DWORD dwBytes; 630 631 /* Need Winsock 2.2 or better */ 632 wVersionRequested = MAKEWORD(2, 2); 633 634 err = WSAStartup(wVersionRequested, &wsaData); 635 if (err != 0) { 636 char strbuf[ISC_STRERRORSIZE]; 637 isc__strerror(err, strbuf, sizeof(strbuf)); 638 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s", 639 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 640 ISC_MSG_FAILED, "failed"), 641 strbuf); 642 exit(1); 643 } 644 /* 645 * The following APIs do not exist as functions in a library, but we must 646 * ask winsock for them. They are "extensions" -- but why they cannot be 647 * actual functions is beyond me. So, ask winsock for the pointers to the 648 * functions we need. 649 */ 650 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 651 INSIST(sock != INVALID_SOCKET); 652 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 653 &GUIDConnectEx, sizeof(GUIDConnectEx), 654 &ISCConnectEx, sizeof(ISCConnectEx), 655 &dwBytes, NULL, NULL); 656 INSIST(err == 0); 657 658 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 659 &GUIDAcceptEx, sizeof(GUIDAcceptEx), 660 &ISCAcceptEx, sizeof(ISCAcceptEx), 661 &dwBytes, NULL, NULL); 662 INSIST(err == 0); 663 664 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 665 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs), 666 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs), 667 &dwBytes, NULL, NULL); 668 INSIST(err == 0); 669 670 closesocket(sock); 671 672 initialised = ISC_TRUE; 673 } 674 675 /* 676 * Initialize socket services 677 */ 678 void 679 InitSockets(void) { 680 RUNTIME_CHECK(isc_once_do(&initialise_once, 681 initialise) == ISC_R_SUCCESS); 682 if (!initialised) 683 exit(1); 684 } 685 686 int 687 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo, 688 struct msghdr *messagehdr, int flags, int *Error) 689 { 690 int Result; 691 DWORD BytesSent; 692 DWORD Flags = flags; 693 int total_sent; 694 695 *Error = 0; 696 Result = WSASendTo(sock->fd, messagehdr->msg_iov, 697 messagehdr->msg_iovlen, &BytesSent, 698 Flags, (SOCKADDR *)&messagehdr->to_addr, 699 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo, 700 NULL); 701 702 total_sent = (int)BytesSent; 703 704 /* Check for errors.*/ 705 if (Result == SOCKET_ERROR) { 706 *Error = WSAGetLastError(); 707 708 switch (*Error) { 709 case WSA_IO_INCOMPLETE: 710 case WSA_WAIT_IO_COMPLETION: 711 case WSA_IO_PENDING: 712 case NO_ERROR: /* Strange, but okay */ 713 sock->pending_iocp++; 714 sock->pending_send++; 715 break; 716 717 default: 718 return (-1); 719 break; 720 } 721 } else { 722 sock->pending_iocp++; 723 sock->pending_send++; 724 } 725 726 if (lpo != NULL) 727 return (0); 728 else 729 return (total_sent); 730 } 731 732 static void 733 queue_receive_request(isc_socket_t *sock) { 734 DWORD Flags = 0; 735 DWORD NumBytes = 0; 736 int Result; 737 int Error; 738 int need_retry; 739 WSABUF iov[1]; 740 IoCompletionInfo *lpo = NULL; 741 isc_result_t isc_result; 742 743 retry: 744 need_retry = ISC_FALSE; 745 746 /* 747 * If we already have a receive pending, do nothing. 748 */ 749 if (sock->pending_recv > 0) { 750 if (lpo != NULL) 751 HeapFree(hHeapHandle, 0, lpo); 752 return; 753 } 754 755 /* 756 * If no one is waiting, do nothing. 757 */ 758 if (ISC_LIST_EMPTY(sock->recv_list)) { 759 if (lpo != NULL) 760 HeapFree(hHeapHandle, 0, lpo); 761 return; 762 } 763 764 INSIST(sock->recvbuf.remaining == 0); 765 INSIST(sock->fd != INVALID_SOCKET); 766 767 iov[0].len = sock->recvbuf.len; 768 iov[0].buf = sock->recvbuf.base; 769 770 if (lpo == NULL) { 771 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 772 HEAP_ZERO_MEMORY, 773 sizeof(IoCompletionInfo)); 774 RUNTIME_CHECK(lpo != NULL); 775 } else 776 ZeroMemory(lpo, sizeof(IoCompletionInfo)); 777 lpo->request_type = SOCKET_RECV; 778 779 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr); 780 781 Error = 0; 782 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1, 783 &NumBytes, &Flags, 784 (SOCKADDR *)&sock->recvbuf.from_addr, 785 &sock->recvbuf.from_addr_len, 786 (LPWSAOVERLAPPED)lpo, NULL); 787 788 /* Check for errors. */ 789 if (Result == SOCKET_ERROR) { 790 Error = WSAGetLastError(); 791 792 switch (Error) { 793 case WSA_IO_PENDING: 794 sock->pending_iocp++; 795 sock->pending_recv++; 796 break; 797 798 /* direct error: no completion event */ 799 case ERROR_HOST_UNREACHABLE: 800 case WSAENETRESET: 801 case WSAECONNRESET: 802 if (!sock->connected) { 803 /* soft error */ 804 need_retry = ISC_TRUE; 805 break; 806 } 807 /* FALLTHROUGH */ 808 809 default: 810 isc_result = isc__errno2result(Error); 811 if (isc_result == ISC_R_UNEXPECTED) 812 UNEXPECTED_ERROR(__FILE__, __LINE__, 813 "WSARecvFrom: Windows error code: %d, isc result %d", 814 Error, isc_result); 815 send_recvdone_abort(sock, isc_result); 816 HeapFree(hHeapHandle, 0, lpo); 817 lpo = NULL; 818 break; 819 } 820 } else { 821 /* 822 * The recv() finished immediately, but we will still get 823 * a completion event. Rather than duplicate code, let 824 * that thread handle sending the data along its way. 825 */ 826 sock->pending_iocp++; 827 sock->pending_recv++; 828 } 829 830 socket_log(__LINE__, sock, NULL, IOEVENT, 831 isc_msgcat, ISC_MSGSET_SOCKET, 832 ISC_MSG_DOIORECV, 833 "queue_io_request: fd %d result %d error %d", 834 sock->fd, Result, Error); 835 836 CONSISTENT(sock); 837 838 if (need_retry) 839 goto retry; 840 } 841 842 static void 843 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, 844 isc_logmodule_t *module, int level, const char *fmt, ...) 845 { 846 char msgbuf[2048]; 847 va_list ap; 848 849 if (!isc_log_wouldlog(isc_lctx, level)) 850 return; 851 852 va_start(ap, fmt); 853 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 854 va_end(ap); 855 856 isc_log_write(isc_lctx, category, module, level, 857 "sockmgr %p: %s", sockmgr, msgbuf); 858 } 859 860 static void 861 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 862 isc_logcategory_t *category, isc_logmodule_t *module, int level, 863 isc_msgcat_t *msgcat, int msgset, int message, 864 const char *fmt, ...) 865 { 866 char msgbuf[2048]; 867 char peerbuf[256]; 868 va_list ap; 869 870 871 if (!isc_log_wouldlog(isc_lctx, level)) 872 return; 873 874 va_start(ap, fmt); 875 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 876 va_end(ap); 877 878 if (address == NULL) { 879 isc_log_iwrite(isc_lctx, category, module, level, 880 msgcat, msgset, message, 881 "socket %p line %d: %s", sock, lineno, msgbuf); 882 } else { 883 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 884 isc_log_iwrite(isc_lctx, category, module, level, 885 msgcat, msgset, message, 886 "socket %p line %d peer %s: %s", sock, lineno, 887 peerbuf, msgbuf); 888 } 889 890 } 891 892 /* 893 * Make an fd SOCKET non-blocking. 894 */ 895 static isc_result_t 896 make_nonblock(SOCKET fd) { 897 int ret; 898 unsigned long flags = 1; 899 char strbuf[ISC_STRERRORSIZE]; 900 901 /* Set the socket to non-blocking */ 902 ret = ioctlsocket(fd, FIONBIO, &flags); 903 904 if (ret == -1) { 905 isc__strerror(errno, strbuf, sizeof(strbuf)); 906 UNEXPECTED_ERROR(__FILE__, __LINE__, 907 "ioctlsocket(%d, FIOBIO, %d): %s", 908 fd, flags, strbuf); 909 910 return (ISC_R_UNEXPECTED); 911 } 912 913 return (ISC_R_SUCCESS); 914 } 915 916 /* 917 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom 918 * to not work correctly, returning a WSACONNRESET error when a WSASendTo 919 * fails with an "ICMP port unreachable" response and preventing the 920 * socket from using the WSARecvFrom in subsequent operations. 921 * The function below fixes this, but requires that Windows 2000 922 * Service Pack 2 or later be installed on the system. NT 4.0 923 * systems are not affected by this and work correctly. 924 * See Microsoft Knowledge Base Article Q263823 for details of this. 925 */ 926 isc_result_t 927 connection_reset_fix(SOCKET fd) { 928 DWORD dwBytesReturned = 0; 929 BOOL bNewBehavior = FALSE; 930 DWORD status; 931 932 if (isc_win32os_versioncheck(5, 0, 0, 0) < 0) 933 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */ 934 935 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */ 936 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior, 937 sizeof(bNewBehavior), NULL, 0, 938 &dwBytesReturned, NULL, NULL); 939 if (status != SOCKET_ERROR) 940 return (ISC_R_SUCCESS); 941 else { 942 UNEXPECTED_ERROR(__FILE__, __LINE__, 943 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s", 944 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 945 ISC_MSG_FAILED, "failed")); 946 return (ISC_R_UNEXPECTED); 947 } 948 } 949 950 /* 951 * Construct an iov array and attach it to the msghdr passed in. This is 952 * the SEND constructor, which will use the used region of the buffer 953 * (if using a buffer list) or will use the internal region (if a single 954 * buffer I/O is requested). 955 * 956 * Nothing can be NULL, and the done event must list at least one buffer 957 * on the buffer linked list for this function to be meaningful. 958 */ 959 static void 960 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, 961 struct msghdr *msg, char *cmsg, WSABUF *iov, 962 IoCompletionInfo *lpo) 963 { 964 unsigned int iovcount; 965 isc_buffer_t *buffer; 966 buflist_t *cpbuffer; 967 isc_region_t used; 968 size_t write_count; 969 size_t skip_count; 970 971 memset(msg, 0, sizeof(*msg)); 972 973 memmove(&msg->to_addr, &dev->address.type, dev->address.length); 974 msg->to_addr_len = dev->address.length; 975 976 buffer = ISC_LIST_HEAD(dev->bufferlist); 977 write_count = 0; 978 iovcount = 0; 979 980 /* 981 * Single buffer I/O? Skip what we've done so far in this region. 982 */ 983 if (buffer == NULL) { 984 write_count = dev->region.length - dev->n; 985 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 986 RUNTIME_CHECK(cpbuffer != NULL); 987 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count); 988 RUNTIME_CHECK(cpbuffer->buf != NULL); 989 990 socket_log(__LINE__, sock, NULL, TRACE, 991 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 992 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 993 cpbuffer->buf, write_count); 994 995 memmove(cpbuffer->buf,(dev->region.base + dev->n), write_count); 996 cpbuffer->buflen = (unsigned int)write_count; 997 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link); 998 iov[0].buf = cpbuffer->buf; 999 iov[0].len = (u_long)write_count; 1000 iovcount = 1; 1001 1002 goto config; 1003 } 1004 1005 /* 1006 * Multibuffer I/O. 1007 * Skip the data in the buffer list that we have already written. 1008 */ 1009 skip_count = dev->n; 1010 while (buffer != NULL) { 1011 REQUIRE(ISC_BUFFER_VALID(buffer)); 1012 if (skip_count < isc_buffer_usedlength(buffer)) 1013 break; 1014 skip_count -= isc_buffer_usedlength(buffer); 1015 buffer = ISC_LIST_NEXT(buffer, link); 1016 } 1017 1018 while (buffer != NULL) { 1019 INSIST(iovcount < MAXSCATTERGATHER_SEND); 1020 1021 isc_buffer_usedregion(buffer, &used); 1022 1023 if (used.length > 0) { 1024 int uselen = (int)(used.length - skip_count); 1025 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 1026 RUNTIME_CHECK(cpbuffer != NULL); 1027 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen); 1028 RUNTIME_CHECK(cpbuffer->buf != NULL); 1029 1030 socket_log(__LINE__, sock, NULL, TRACE, 1031 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 1032 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 1033 cpbuffer->buf, write_count); 1034 1035 memmove(cpbuffer->buf,(used.base + skip_count), uselen); 1036 cpbuffer->buflen = uselen; 1037 iov[iovcount].buf = cpbuffer->buf; 1038 iov[iovcount].len = (u_long)(used.length - skip_count); 1039 write_count += uselen; 1040 skip_count = 0; 1041 iovcount++; 1042 } 1043 buffer = ISC_LIST_NEXT(buffer, link); 1044 } 1045 1046 INSIST(skip_count == 0); 1047 1048 config: 1049 msg->msg_iov = iov; 1050 msg->msg_iovlen = iovcount; 1051 msg->msg_totallen = (u_int)write_count; 1052 } 1053 1054 static void 1055 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock, 1056 isc_socketevent_t *dev) 1057 { 1058 if (sock->type == isc_sockettype_udp) { 1059 if (address != NULL) 1060 dev->address = *address; 1061 else 1062 dev->address = sock->address; 1063 } else if (sock->type == isc_sockettype_tcp) { 1064 INSIST(address == NULL); 1065 dev->address = sock->address; 1066 } 1067 } 1068 1069 static void 1070 destroy_socketevent(isc_event_t *event) { 1071 isc_socketevent_t *ev = (isc_socketevent_t *)event; 1072 1073 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 1074 1075 (ev->destroy)(event); 1076 } 1077 1078 static isc_socketevent_t * 1079 allocate_socketevent(isc_mem_t *mctx, isc_socket_t *sock, 1080 isc_eventtype_t eventtype, isc_taskaction_t action, 1081 void *arg) 1082 { 1083 isc_socketevent_t *ev; 1084 1085 ev = (isc_socketevent_t *)isc_event_allocate(mctx, sock, eventtype, 1086 action, arg, 1087 sizeof(*ev)); 1088 if (ev == NULL) 1089 return (NULL); 1090 1091 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set 1092 ISC_LINK_INIT(ev, ev_link); 1093 ISC_LIST_INIT(ev->bufferlist); 1094 ev->region.base = NULL; 1095 ev->n = 0; 1096 ev->offset = 0; 1097 ev->attributes = 0; 1098 ev->destroy = ev->ev_destroy; 1099 ev->ev_destroy = destroy_socketevent; 1100 ev->dscp = 0; 1101 1102 return (ev); 1103 } 1104 1105 #if defined(ISC_SOCKET_DEBUG) 1106 static void 1107 dump_msg(struct msghdr *msg, isc_socket_t *sock) { 1108 unsigned int i; 1109 1110 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd); 1111 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); 1112 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); 1113 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) 1114 printf("\t\t%u\tbase %p, len %u\n", i, 1115 msg->msg_iov[i].buf, msg->msg_iov[i].len); 1116 } 1117 #endif 1118 1119 /* 1120 * map the error code 1121 */ 1122 int 1123 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno, 1124 char *errorstring, size_t bufsize) { 1125 1126 int doreturn; 1127 switch (windows_errno) { 1128 case WSAECONNREFUSED: 1129 *isc_errno = ISC_R_CONNREFUSED; 1130 if (sock->connected) 1131 doreturn = DOIO_HARD; 1132 else 1133 doreturn = DOIO_SOFT; 1134 break; 1135 case WSAENETUNREACH: 1136 case ERROR_NETWORK_UNREACHABLE: 1137 *isc_errno = ISC_R_NETUNREACH; 1138 if (sock->connected) 1139 doreturn = DOIO_HARD; 1140 else 1141 doreturn = DOIO_SOFT; 1142 break; 1143 case ERROR_PORT_UNREACHABLE: 1144 case ERROR_HOST_UNREACHABLE: 1145 case WSAEHOSTUNREACH: 1146 *isc_errno = ISC_R_HOSTUNREACH; 1147 if (sock->connected) 1148 doreturn = DOIO_HARD; 1149 else 1150 doreturn = DOIO_SOFT; 1151 break; 1152 case WSAENETDOWN: 1153 *isc_errno = ISC_R_NETDOWN; 1154 if (sock->connected) 1155 doreturn = DOIO_HARD; 1156 else 1157 doreturn = DOIO_SOFT; 1158 break; 1159 case WSAEHOSTDOWN: 1160 *isc_errno = ISC_R_HOSTDOWN; 1161 if (sock->connected) 1162 doreturn = DOIO_HARD; 1163 else 1164 doreturn = DOIO_SOFT; 1165 break; 1166 case WSAEACCES: 1167 *isc_errno = ISC_R_NOPERM; 1168 if (sock->connected) 1169 doreturn = DOIO_HARD; 1170 else 1171 doreturn = DOIO_SOFT; 1172 break; 1173 case WSAECONNRESET: 1174 case WSAENETRESET: 1175 case WSAECONNABORTED: 1176 case WSAEDISCON: 1177 *isc_errno = ISC_R_CONNECTIONRESET; 1178 if (sock->connected) 1179 doreturn = DOIO_HARD; 1180 else 1181 doreturn = DOIO_SOFT; 1182 break; 1183 case WSAENOTCONN: 1184 *isc_errno = ISC_R_NOTCONNECTED; 1185 if (sock->connected) 1186 doreturn = DOIO_HARD; 1187 else 1188 doreturn = DOIO_SOFT; 1189 break; 1190 case ERROR_OPERATION_ABORTED: 1191 case ERROR_CONNECTION_ABORTED: 1192 case ERROR_REQUEST_ABORTED: 1193 *isc_errno = ISC_R_CONNECTIONRESET; 1194 doreturn = DOIO_HARD; 1195 break; 1196 case WSAENOBUFS: 1197 *isc_errno = ISC_R_NORESOURCES; 1198 doreturn = DOIO_HARD; 1199 break; 1200 case WSAEAFNOSUPPORT: 1201 *isc_errno = ISC_R_FAMILYNOSUPPORT; 1202 doreturn = DOIO_HARD; 1203 break; 1204 case WSAEADDRNOTAVAIL: 1205 *isc_errno = ISC_R_ADDRNOTAVAIL; 1206 doreturn = DOIO_HARD; 1207 break; 1208 case WSAEDESTADDRREQ: 1209 *isc_errno = ISC_R_BADADDRESSFORM; 1210 doreturn = DOIO_HARD; 1211 break; 1212 case ERROR_NETNAME_DELETED: 1213 *isc_errno = ISC_R_NETDOWN; 1214 doreturn = DOIO_HARD; 1215 break; 1216 default: 1217 *isc_errno = ISC_R_IOERROR; 1218 doreturn = DOIO_HARD; 1219 break; 1220 } 1221 if (doreturn == DOIO_HARD) { 1222 isc__strerror(windows_errno, errorstring, bufsize); 1223 } 1224 return (doreturn); 1225 } 1226 1227 static void 1228 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) { 1229 isc_region_t r; 1230 int copylen; 1231 isc_buffer_t *buffer; 1232 1233 INSIST(dev->n < dev->minimum); 1234 INSIST(sock->recvbuf.remaining > 0); 1235 INSIST(sock->pending_recv == 0); 1236 1237 if (sock->type == isc_sockettype_udp) { 1238 dev->address.length = sock->recvbuf.from_addr_len; 1239 memmove(&dev->address.type, &sock->recvbuf.from_addr, 1240 sock->recvbuf.from_addr_len); 1241 if (isc_sockaddr_getport(&dev->address) == 0) { 1242 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1243 socket_log(__LINE__, sock, &dev->address, IOEVENT, 1244 isc_msgcat, ISC_MSGSET_SOCKET, 1245 ISC_MSG_ZEROPORT, 1246 "dropping source port zero packet"); 1247 } 1248 sock->recvbuf.remaining = 0; 1249 return; 1250 } 1251 } else if (sock->type == isc_sockettype_tcp) { 1252 dev->address = sock->address; 1253 } 1254 1255 /* 1256 * Run through the list of buffers we were given, and find the 1257 * first one with space. Once it is found, loop through, filling 1258 * the buffers as much as possible. 1259 */ 1260 buffer = ISC_LIST_HEAD(dev->bufferlist); 1261 if (buffer != NULL) { // Multi-buffer receive 1262 while (buffer != NULL && sock->recvbuf.remaining > 0) { 1263 REQUIRE(ISC_BUFFER_VALID(buffer)); 1264 if (isc_buffer_availablelength(buffer) > 0) { 1265 isc_buffer_availableregion(buffer, &r); 1266 copylen = min(r.length, 1267 sock->recvbuf.remaining); 1268 memmove(r.base, sock->recvbuf.consume_position, 1269 copylen); 1270 sock->recvbuf.consume_position += copylen; 1271 sock->recvbuf.remaining -= copylen; 1272 isc_buffer_add(buffer, copylen); 1273 dev->n += copylen; 1274 } 1275 buffer = ISC_LIST_NEXT(buffer, link); 1276 } 1277 } else { // Single-buffer receive 1278 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining); 1279 memmove(dev->region.base + dev->n, 1280 sock->recvbuf.consume_position, copylen); 1281 sock->recvbuf.consume_position += copylen; 1282 sock->recvbuf.remaining -= copylen; 1283 dev->n += copylen; 1284 } 1285 1286 /* 1287 * UDP receives are all-consuming. That is, if we have 4k worth of 1288 * data in our receive buffer, and the caller only gave us 1289 * 1k of space, we will toss the remaining 3k of data. TCP 1290 * will keep the extra data around and use it for later requests. 1291 */ 1292 if (sock->type == isc_sockettype_udp) 1293 sock->recvbuf.remaining = 0; 1294 } 1295 1296 /* 1297 * Copy out as much data from the internal buffer to done events. 1298 * As each done event is filled, send it along its way. 1299 */ 1300 static void 1301 completeio_recv(isc_socket_t *sock) 1302 { 1303 isc_socketevent_t *dev; 1304 1305 /* 1306 * If we are in the process of filling our buffer, we cannot 1307 * touch it yet, so don't. 1308 */ 1309 if (sock->pending_recv > 0) 1310 return; 1311 1312 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) { 1313 dev = ISC_LIST_HEAD(sock->recv_list); 1314 1315 /* 1316 * See if we have sufficient data in our receive buffer 1317 * to handle this. If we do, copy out the data. 1318 */ 1319 fill_recv(sock, dev); 1320 1321 /* 1322 * Did we satisfy it? 1323 */ 1324 if (dev->n >= dev->minimum) { 1325 dev->result = ISC_R_SUCCESS; 1326 send_recvdone_event(sock, &dev); 1327 } 1328 } 1329 } 1330 1331 /* 1332 * Returns: 1333 * DOIO_SUCCESS The operation succeeded. dev->result contains 1334 * ISC_R_SUCCESS. 1335 * 1336 * DOIO_HARD A hard or unexpected I/O error was encountered. 1337 * dev->result contains the appropriate error. 1338 * 1339 * DOIO_SOFT A soft I/O error was encountered. No senddone 1340 * event was sent. The operation should be retried. 1341 * 1342 * No other return values are possible. 1343 */ 1344 static int 1345 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev, 1346 struct msghdr *messagehdr, int cc, int send_errno) 1347 { 1348 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1349 char strbuf[ISC_STRERRORSIZE]; 1350 1351 if (send_errno != 0) { 1352 if (SOFT_ERROR(send_errno)) 1353 return (DOIO_SOFT); 1354 1355 return (map_socket_error(sock, send_errno, &dev->result, 1356 strbuf, sizeof(strbuf))); 1357 1358 /* 1359 * The other error types depend on whether or not the 1360 * socket is UDP or TCP. If it is UDP, some errors 1361 * that we expect to be fatal under TCP are merely 1362 * annoying, and are really soft errors. 1363 * 1364 * However, these soft errors are still returned as 1365 * a status. 1366 */ 1367 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1368 isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1369 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s", 1370 addrbuf, strbuf); 1371 dev->result = isc__errno2result(send_errno); 1372 return (DOIO_HARD); 1373 } 1374 1375 /* 1376 * If we write less than we expected, update counters, poke. 1377 */ 1378 dev->n += cc; 1379 if (cc != messagehdr->msg_totallen) 1380 return (DOIO_SOFT); 1381 1382 /* 1383 * Exactly what we wanted to write. We're done with this 1384 * entry. Post its completion event. 1385 */ 1386 dev->result = ISC_R_SUCCESS; 1387 return (DOIO_SUCCESS); 1388 } 1389 1390 static int 1391 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, 1392 int *send_errno) 1393 { 1394 char *cmsg = NULL; 1395 char strbuf[ISC_STRERRORSIZE]; 1396 IoCompletionInfo *lpo; 1397 int status; 1398 struct msghdr *msghdr; 1399 1400 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 1401 HEAP_ZERO_MEMORY, 1402 sizeof(IoCompletionInfo)); 1403 RUNTIME_CHECK(lpo != NULL); 1404 lpo->request_type = SOCKET_SEND; 1405 lpo->dev = dev; 1406 msghdr = &lpo->messagehdr; 1407 memset(msghdr, 0, sizeof(struct msghdr)); 1408 ISC_LIST_INIT(lpo->bufferlist); 1409 1410 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo); 1411 1412 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno); 1413 1414 if (*nbytes <= 0) { 1415 /* 1416 * I/O has been initiated 1417 * completion will be through the completion port 1418 */ 1419 if (PENDING_ERROR(*send_errno)) { 1420 status = DOIO_PENDING; 1421 goto done; 1422 } 1423 1424 if (SOFT_ERROR(*send_errno)) { 1425 status = DOIO_SOFT; 1426 goto done; 1427 } 1428 1429 /* 1430 * If we got this far then something is wrong 1431 */ 1432 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1433 isc__strerror(*send_errno, strbuf, sizeof(strbuf)); 1434 socket_log(__LINE__, sock, NULL, IOEVENT, 1435 isc_msgcat, ISC_MSGSET_SOCKET, 1436 ISC_MSG_INTERNALSEND, 1437 "startio_send: internal_sendmsg(%d) %d " 1438 "bytes, err %d/%s", 1439 sock->fd, *nbytes, *send_errno, strbuf); 1440 } 1441 status = DOIO_HARD; 1442 goto done; 1443 } 1444 dev->result = ISC_R_SUCCESS; 1445 status = DOIO_SOFT; 1446 done: 1447 _set_state(sock, SOCK_DATA); 1448 return (status); 1449 } 1450 1451 static void 1452 use_min_mtu(isc_socket_t *sock) { 1453 #ifdef IPV6_USE_MIN_MTU 1454 /* use minimum MTU */ 1455 if (sock->pf == AF_INET6) { 1456 int on = 1; 1457 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, 1458 (void *)&on, sizeof(on)); 1459 } 1460 #else 1461 UNUSED(sock); 1462 #endif 1463 } 1464 1465 static isc_result_t 1466 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, 1467 isc_socket_t **socketp) { 1468 isc_socket_t *sock; 1469 isc_result_t result; 1470 1471 sock = isc_mem_get(manager->mctx, sizeof(*sock)); 1472 1473 if (sock == NULL) 1474 return (ISC_R_NOMEMORY); 1475 1476 sock->magic = 0; 1477 sock->references = 0; 1478 1479 sock->manager = manager; 1480 sock->type = type; 1481 sock->fd = INVALID_SOCKET; 1482 1483 ISC_LINK_INIT(sock, link); 1484 1485 /* 1486 * Set up list of readers and writers to be initially empty. 1487 */ 1488 ISC_LIST_INIT(sock->recv_list); 1489 ISC_LIST_INIT(sock->send_list); 1490 ISC_LIST_INIT(sock->accept_list); 1491 sock->connect_ev = NULL; 1492 sock->pending_accept = 0; 1493 sock->pending_recv = 0; 1494 sock->pending_send = 0; 1495 sock->pending_iocp = 0; 1496 sock->listener = 0; 1497 sock->connected = 0; 1498 sock->pending_connect = 0; 1499 sock->bound = 0; 1500 sock->dupped = 0; 1501 memset(sock->name, 0, sizeof(sock->name)); // zero the name field 1502 _set_state(sock, SOCK_INITIALIZED); 1503 1504 sock->recvbuf.len = 65536; 1505 sock->recvbuf.consume_position = sock->recvbuf.base; 1506 sock->recvbuf.remaining = 0; 1507 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size 1508 if (sock->recvbuf.base == NULL) { 1509 result = ISC_R_NOMEMORY; 1510 goto error; 1511 } 1512 1513 /* 1514 * Initialize the lock. 1515 */ 1516 result = isc_mutex_init(&sock->lock); 1517 if (result != ISC_R_SUCCESS) 1518 goto error; 1519 1520 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1521 "allocated"); 1522 1523 sock->magic = SOCKET_MAGIC; 1524 *socketp = sock; 1525 1526 return (ISC_R_SUCCESS); 1527 1528 error: 1529 if (sock->recvbuf.base != NULL) 1530 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); 1531 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1532 1533 return (result); 1534 } 1535 1536 /* 1537 * Verify that the socket state is consistent. 1538 */ 1539 static void 1540 consistent(isc_socket_t *sock) { 1541 1542 isc_socketevent_t *dev; 1543 isc_socket_newconnev_t *nev; 1544 unsigned int count; 1545 char *crash_reason; 1546 isc_boolean_t crash = ISC_FALSE; 1547 1548 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send 1549 + sock->pending_accept + sock->pending_connect); 1550 1551 dev = ISC_LIST_HEAD(sock->send_list); 1552 count = 0; 1553 while (dev != NULL) { 1554 count++; 1555 dev = ISC_LIST_NEXT(dev, ev_link); 1556 } 1557 if (count > sock->pending_send) { 1558 crash = ISC_TRUE; 1559 crash_reason = "send_list > sock->pending_send"; 1560 } 1561 1562 nev = ISC_LIST_HEAD(sock->accept_list); 1563 count = 0; 1564 while (nev != NULL) { 1565 count++; 1566 nev = ISC_LIST_NEXT(nev, ev_link); 1567 } 1568 if (count > sock->pending_accept) { 1569 crash = ISC_TRUE; 1570 crash_reason = "send_list > sock->pending_send"; 1571 } 1572 1573 if (crash) { 1574 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1575 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s", 1576 crash_reason); 1577 sock_dump(sock); 1578 INSIST(crash == ISC_FALSE); 1579 } 1580 } 1581 1582 /* 1583 * Maybe free the socket. 1584 * 1585 * This function will verify tht the socket is no longer in use in any way, 1586 * either internally or externally. This is the only place where this 1587 * check is to be made; if some bit of code believes that IT is done with 1588 * the socket (e.g., some reference counter reaches zero), it should call 1589 * this function. 1590 * 1591 * When calling this function, the socket must be locked, and the manager 1592 * must be unlocked. 1593 * 1594 * When this function returns, *socketp will be NULL. No tricks to try 1595 * to hold on to this pointer are allowed. 1596 */ 1597 static void 1598 maybe_free_socket(isc_socket_t **socketp, int lineno) { 1599 isc_socket_t *sock = *socketp; 1600 *socketp = NULL; 1601 1602 INSIST(VALID_SOCKET(sock)); 1603 CONSISTENT(sock); 1604 1605 if (sock->pending_iocp > 0 1606 || sock->pending_recv > 0 1607 || sock->pending_send > 0 1608 || sock->pending_accept > 0 1609 || sock->references > 0 1610 || sock->pending_connect == 1 1611 || !ISC_LIST_EMPTY(sock->recv_list) 1612 || !ISC_LIST_EMPTY(sock->send_list) 1613 || !ISC_LIST_EMPTY(sock->accept_list) 1614 || sock->fd != INVALID_SOCKET) { 1615 UNLOCK(&sock->lock); 1616 return; 1617 } 1618 UNLOCK(&sock->lock); 1619 1620 free_socket(&sock, lineno); 1621 } 1622 1623 void 1624 free_socket(isc_socket_t **sockp, int lineno) { 1625 isc_socketmgr_t *manager; 1626 isc_socket_t *sock = *sockp; 1627 *sockp = NULL; 1628 1629 /* 1630 * Seems we can free the socket after all. 1631 */ 1632 manager = sock->manager; 1633 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, 1634 ISC_MSGSET_SOCKET, ISC_MSG_DESTROYING, 1635 "freeing socket line %d fd %d lock %p semaphore %p", 1636 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore); 1637 1638 sock->magic = 0; 1639 DESTROYLOCK(&sock->lock); 1640 1641 if (sock->recvbuf.base != NULL) 1642 isc_mem_put(manager->mctx, sock->recvbuf.base, 1643 sock->recvbuf.len); 1644 1645 LOCK(&manager->lock); 1646 if (ISC_LINK_LINKED(sock, link)) 1647 ISC_LIST_UNLINK(manager->socklist, sock, link); 1648 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1649 1650 if (ISC_LIST_EMPTY(manager->socklist)) 1651 SIGNAL(&manager->shutdown_ok); 1652 UNLOCK(&manager->lock); 1653 } 1654 1655 /* 1656 * Create a new 'type' socket managed by 'manager'. Events 1657 * will be posted to 'task' and when dispatched 'action' will be 1658 * called with 'arg' as the arg value. The new socket is returned 1659 * in 'socketp'. 1660 */ 1661 static isc_result_t 1662 socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 1663 isc_socket_t **socketp, isc_socket_t *dup_socket) 1664 { 1665 isc_socket_t *sock = NULL; 1666 isc_result_t result; 1667 #if defined(USE_CMSG) 1668 int on = 1; 1669 #endif 1670 #if defined(SO_RCVBUF) 1671 ISC_SOCKADDR_LEN_T optlen; 1672 int size; 1673 #endif 1674 int socket_errno; 1675 char strbuf[ISC_STRERRORSIZE]; 1676 1677 REQUIRE(VALID_MANAGER(manager)); 1678 REQUIRE(socketp != NULL && *socketp == NULL); 1679 REQUIRE(type != isc_sockettype_fdwatch); 1680 1681 #ifndef SOCK_RAW 1682 if (type == isc_sockettype_raw) 1683 return (ISC_R_NOTIMPLEMENTED); 1684 #endif 1685 1686 result = allocate_socket(manager, type, &sock); 1687 if (result != ISC_R_SUCCESS) 1688 return (result); 1689 1690 sock->pf = pf; 1691 switch (type) { 1692 case isc_sockettype_udp: 1693 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); 1694 if (sock->fd != INVALID_SOCKET) { 1695 result = connection_reset_fix(sock->fd); 1696 if (result != ISC_R_SUCCESS) { 1697 socket_log(__LINE__, sock, 1698 NULL, EVENT, NULL, 0, 0, 1699 "closed %d %d %d " 1700 "con_reset_fix_failed", 1701 sock->pending_recv, 1702 sock->pending_send, 1703 sock->references); 1704 closesocket(sock->fd); 1705 _set_state(sock, SOCK_CLOSED); 1706 sock->fd = INVALID_SOCKET; 1707 free_socket(&sock, __LINE__); 1708 return (result); 1709 } 1710 } 1711 break; 1712 case isc_sockettype_tcp: 1713 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); 1714 break; 1715 #ifdef SOCK_RAW 1716 case isc_sockettype_raw: 1717 sock->fd = socket(pf, SOCK_RAW, 0); 1718 #ifdef PF_ROUTE 1719 if (pf == PF_ROUTE) 1720 sock->bound = 1; 1721 #endif 1722 break; 1723 #endif 1724 } 1725 1726 if (sock->fd == INVALID_SOCKET) { 1727 socket_errno = WSAGetLastError(); 1728 free_socket(&sock, __LINE__); 1729 1730 switch (socket_errno) { 1731 case WSAEMFILE: 1732 case WSAENOBUFS: 1733 return (ISC_R_NORESOURCES); 1734 1735 case WSAEPROTONOSUPPORT: 1736 case WSAEPFNOSUPPORT: 1737 case WSAEAFNOSUPPORT: 1738 return (ISC_R_FAMILYNOSUPPORT); 1739 1740 default: 1741 isc__strerror(socket_errno, strbuf, sizeof(strbuf)); 1742 UNEXPECTED_ERROR(__FILE__, __LINE__, 1743 "socket() %s: %s", 1744 isc_msgcat_get(isc_msgcat, 1745 ISC_MSGSET_GENERAL, 1746 ISC_MSG_FAILED, 1747 "failed"), 1748 strbuf); 1749 return (ISC_R_UNEXPECTED); 1750 } 1751 } 1752 1753 result = make_nonblock(sock->fd); 1754 if (result != ISC_R_SUCCESS) { 1755 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1756 "closed %d %d %d make_nonblock_failed", 1757 sock->pending_recv, sock->pending_send, 1758 sock->references); 1759 closesocket(sock->fd); 1760 sock->fd = INVALID_SOCKET; 1761 free_socket(&sock, __LINE__); 1762 return (result); 1763 } 1764 1765 /* 1766 * Use minimum mtu if possible. 1767 */ 1768 use_min_mtu(sock); 1769 1770 #if defined(USE_CMSG) || defined(SO_RCVBUF) 1771 if (type == isc_sockettype_udp) { 1772 1773 #if defined(USE_CMSG) 1774 #if defined(ISC_PLATFORM_HAVEIPV6) 1775 #ifdef IPV6_RECVPKTINFO 1776 /* 2292bis */ 1777 if ((pf == AF_INET6) 1778 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1779 (char *)&on, sizeof(on)) < 0)) { 1780 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1781 UNEXPECTED_ERROR(__FILE__, __LINE__, 1782 "setsockopt(%d, IPV6_RECVPKTINFO) " 1783 "%s: %s", sock->fd, 1784 isc_msgcat_get(isc_msgcat, 1785 ISC_MSGSET_GENERAL, 1786 ISC_MSG_FAILED, 1787 "failed"), 1788 strbuf); 1789 } 1790 #else 1791 /* 2292 */ 1792 if ((pf == AF_INET6) 1793 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, 1794 (char *)&on, sizeof(on)) < 0)) { 1795 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1796 UNEXPECTED_ERROR(__FILE__, __LINE__, 1797 "setsockopt(%d, IPV6_PKTINFO) %s: %s", 1798 sock->fd, 1799 isc_msgcat_get(isc_msgcat, 1800 ISC_MSGSET_GENERAL, 1801 ISC_MSG_FAILED, 1802 "failed"), 1803 strbuf); 1804 } 1805 #endif /* IPV6_RECVPKTINFO */ 1806 #endif /* ISC_PLATFORM_HAVEIPV6 */ 1807 #endif /* defined(USE_CMSG) */ 1808 1809 #if defined(SO_RCVBUF) 1810 optlen = sizeof(size); 1811 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1812 (char *)&size, &optlen) >= 0 && 1813 size < RCVBUFSIZE) { 1814 size = RCVBUFSIZE; 1815 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1816 (char *)&size, sizeof(size)); 1817 } 1818 #endif 1819 1820 } 1821 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ 1822 1823 _set_state(sock, SOCK_OPEN); 1824 sock->references = 1; 1825 *socketp = sock; 1826 1827 iocompletionport_update(sock); 1828 1829 if (dup_socket) { 1830 #ifndef ISC_ALLOW_MAPPED 1831 isc__socket_ipv6only(sock, ISC_TRUE); 1832 #endif 1833 1834 if (dup_socket->bound) { 1835 isc_sockaddr_t local; 1836 1837 result = isc__socket_getsockname(dup_socket, &local); 1838 if (result != ISC_R_SUCCESS) { 1839 isc_socket_close(sock); 1840 return (result); 1841 } 1842 result = isc__socket_bind(sock, &local, 1843 ISC_SOCKET_REUSEADDRESS); 1844 if (result != ISC_R_SUCCESS) { 1845 isc_socket_close(sock); 1846 return (result); 1847 } 1848 } 1849 sock->dupped = 1; 1850 } 1851 1852 /* 1853 * Note we don't have to lock the socket like we normally would because 1854 * there are no external references to it yet. 1855 */ 1856 LOCK(&manager->lock); 1857 ISC_LIST_APPEND(manager->socklist, sock, link); 1858 InterlockedIncrement(&manager->totalSockets); 1859 UNLOCK(&manager->lock); 1860 1861 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, 1862 ISC_MSGSET_SOCKET, ISC_MSG_CREATED, 1863 "created %u type %u", sock->fd, type); 1864 1865 return (ISC_R_SUCCESS); 1866 } 1867 1868 isc_result_t 1869 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 1870 isc_socket_t **socketp) 1871 { 1872 return (socket_create(manager, pf, type, socketp, NULL)); 1873 } 1874 1875 isc_result_t 1876 isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) { 1877 REQUIRE(VALID_SOCKET(sock)); 1878 REQUIRE(socketp != NULL && *socketp == NULL); 1879 1880 return (socket_create(sock->manager, sock->pf, sock->type, 1881 socketp, sock)); 1882 } 1883 1884 isc_result_t 1885 isc_socket_open(isc_socket_t *sock) { 1886 REQUIRE(VALID_SOCKET(sock)); 1887 REQUIRE(sock->type != isc_sockettype_fdwatch); 1888 1889 return (ISC_R_NOTIMPLEMENTED); 1890 } 1891 1892 /* 1893 * Attach to a socket. Caller must explicitly detach when it is done. 1894 */ 1895 void 1896 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { 1897 REQUIRE(VALID_SOCKET(sock)); 1898 REQUIRE(socketp != NULL && *socketp == NULL); 1899 1900 LOCK(&sock->lock); 1901 CONSISTENT(sock); 1902 sock->references++; 1903 UNLOCK(&sock->lock); 1904 1905 *socketp = sock; 1906 } 1907 1908 /* 1909 * Dereference a socket. If this is the last reference to it, clean things 1910 * up by destroying the socket. 1911 */ 1912 void 1913 isc__socket_detach(isc_socket_t **socketp) { 1914 isc_socket_t *sock; 1915 1916 REQUIRE(socketp != NULL); 1917 sock = *socketp; 1918 REQUIRE(VALID_SOCKET(sock)); 1919 REQUIRE(sock->type != isc_sockettype_fdwatch); 1920 1921 LOCK(&sock->lock); 1922 CONSISTENT(sock); 1923 REQUIRE(sock->references > 0); 1924 sock->references--; 1925 1926 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1927 "detach_socket %d %d %d", 1928 sock->pending_recv, sock->pending_send, 1929 sock->references); 1930 1931 if (sock->references == 0 && sock->fd != INVALID_SOCKET) { 1932 closesocket(sock->fd); 1933 sock->fd = INVALID_SOCKET; 1934 _set_state(sock, SOCK_CLOSED); 1935 } 1936 1937 maybe_free_socket(&sock, __LINE__); 1938 1939 *socketp = NULL; 1940 } 1941 1942 isc_result_t 1943 isc_socket_close(isc_socket_t *sock) { 1944 REQUIRE(VALID_SOCKET(sock)); 1945 REQUIRE(sock->type != isc_sockettype_fdwatch); 1946 1947 return (ISC_R_NOTIMPLEMENTED); 1948 } 1949 1950 /* 1951 * Dequeue an item off the given socket's read queue, set the result code 1952 * in the done event to the one provided, and send it to the task it was 1953 * destined for. 1954 * 1955 * If the event to be sent is on a list, remove it before sending. If 1956 * asked to, send and detach from the task as well. 1957 * 1958 * Caller must have the socket locked if the event is attached to the socket. 1959 */ 1960 static void 1961 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1962 isc_task_t *task; 1963 1964 task = (*dev)->ev_sender; 1965 (*dev)->ev_sender = sock; 1966 1967 if (ISC_LINK_LINKED(*dev, ev_link)) 1968 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1969 1970 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1971 == ISC_SOCKEVENTATTR_ATTACHED) 1972 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1973 else 1974 isc_task_send(task, (isc_event_t **)dev); 1975 1976 CONSISTENT(sock); 1977 } 1978 1979 /* 1980 * See comments for send_recvdone_event() above. 1981 */ 1982 static void 1983 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1984 isc_task_t *task; 1985 1986 INSIST(dev != NULL && *dev != NULL); 1987 1988 task = (*dev)->ev_sender; 1989 (*dev)->ev_sender = sock; 1990 1991 if (ISC_LINK_LINKED(*dev, ev_link)) 1992 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1993 1994 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1995 == ISC_SOCKEVENTATTR_ATTACHED) 1996 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1997 else 1998 isc_task_send(task, (isc_event_t **)dev); 1999 2000 CONSISTENT(sock); 2001 } 2002 2003 /* 2004 * See comments for send_recvdone_event() above. 2005 */ 2006 static void 2007 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) { 2008 isc_task_t *task; 2009 2010 INSIST(adev != NULL && *adev != NULL); 2011 2012 task = (*adev)->ev_sender; 2013 (*adev)->ev_sender = sock; 2014 2015 if (ISC_LINK_LINKED(*adev, ev_link)) 2016 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link); 2017 2018 isc_task_sendanddetach(&task, (isc_event_t **)adev); 2019 2020 CONSISTENT(sock); 2021 } 2022 2023 /* 2024 * See comments for send_recvdone_event() above. 2025 */ 2026 static void 2027 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) { 2028 isc_task_t *task; 2029 2030 INSIST(cdev != NULL && *cdev != NULL); 2031 2032 task = (*cdev)->ev_sender; 2033 (*cdev)->ev_sender = sock; 2034 2035 sock->connect_ev = NULL; 2036 2037 isc_task_sendanddetach(&task, (isc_event_t **)cdev); 2038 2039 CONSISTENT(sock); 2040 } 2041 2042 /* 2043 * On entry to this function, the event delivered is the internal 2044 * readable event, and the first item on the accept_list should be 2045 * the done event we want to send. If the list is empty, this is a no-op, 2046 * so just close the new connection, unlock, and return. 2047 * 2048 * Note the socket is locked before entering here 2049 */ 2050 static void 2051 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) { 2052 isc_socket_newconnev_t *adev; 2053 isc_result_t result = ISC_R_SUCCESS; 2054 isc_socket_t *nsock; 2055 struct sockaddr *localaddr; 2056 int localaddr_len = sizeof(*localaddr); 2057 struct sockaddr *remoteaddr; 2058 int remoteaddr_len = sizeof(*remoteaddr); 2059 2060 INSIST(VALID_SOCKET(sock)); 2061 LOCK(&sock->lock); 2062 CONSISTENT(sock); 2063 2064 socket_log(__LINE__, sock, NULL, TRACE, 2065 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 2066 "internal_accept called"); 2067 2068 INSIST(sock->listener); 2069 2070 INSIST(sock->pending_iocp > 0); 2071 sock->pending_iocp--; 2072 INSIST(sock->pending_accept > 0); 2073 sock->pending_accept--; 2074 2075 adev = lpo->adev; 2076 2077 /* 2078 * If the event is no longer in the list we can just return. 2079 */ 2080 if (!acceptdone_is_active(sock, adev)) 2081 goto done; 2082 2083 nsock = adev->newsocket; 2084 2085 /* 2086 * Pull off the done event. 2087 */ 2088 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link); 2089 2090 /* 2091 * Extract the addresses from the socket, copy them into the structure, 2092 * and return the new socket. 2093 */ 2094 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0, 2095 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16, 2096 (LPSOCKADDR *)&localaddr, &localaddr_len, 2097 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len); 2098 memmove(&adev->address.type, remoteaddr, remoteaddr_len); 2099 adev->address.length = remoteaddr_len; 2100 nsock->address = adev->address; 2101 nsock->pf = adev->address.type.sa.sa_family; 2102 2103 socket_log(__LINE__, nsock, &nsock->address, TRACE, 2104 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 2105 "internal_accept parent %p", sock); 2106 2107 result = make_nonblock(adev->newsocket->fd); 2108 INSIST(result == ISC_R_SUCCESS); 2109 2110 /* 2111 * Use minimum mtu if possible. 2112 */ 2113 use_min_mtu(adev->newsocket); 2114 2115 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, 2116 (char *)&sock->fd, sizeof(sock->fd)) == 0); 2117 2118 /* 2119 * Hook it up into the manager. 2120 */ 2121 nsock->bound = 1; 2122 nsock->connected = 1; 2123 _set_state(nsock, SOCK_OPEN); 2124 2125 LOCK(&nsock->manager->lock); 2126 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link); 2127 InterlockedIncrement(&nsock->manager->totalSockets); 2128 UNLOCK(&nsock->manager->lock); 2129 2130 socket_log(__LINE__, sock, &nsock->address, CREATION, 2131 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2132 "accepted_connection new_socket %p fd %d", 2133 nsock, nsock->fd); 2134 2135 adev->result = result; 2136 send_acceptdone_event(sock, &adev); 2137 2138 done: 2139 CONSISTENT(sock); 2140 UNLOCK(&sock->lock); 2141 2142 HeapFree(hHeapHandle, 0, lpo->acceptbuffer); 2143 lpo->acceptbuffer = NULL; 2144 } 2145 2146 /* 2147 * Called when a socket with a pending connect() finishes. 2148 * Note that the socket is locked before entering. 2149 */ 2150 static void 2151 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) { 2152 isc_socket_connev_t *cdev; 2153 char strbuf[ISC_STRERRORSIZE]; 2154 2155 INSIST(VALID_SOCKET(sock)); 2156 2157 LOCK(&sock->lock); 2158 2159 INSIST(sock->pending_iocp > 0); 2160 sock->pending_iocp--; 2161 INSIST(sock->pending_connect == 1); 2162 sock->pending_connect = 0; 2163 2164 /* 2165 * Has this event been canceled? 2166 */ 2167 cdev = lpo->cdev; 2168 if (!connectdone_is_active(sock, cdev)) { 2169 sock->pending_connect = 0; 2170 if (sock->fd != INVALID_SOCKET) { 2171 closesocket(sock->fd); 2172 sock->fd = INVALID_SOCKET; 2173 _set_state(sock, SOCK_CLOSED); 2174 } 2175 CONSISTENT(sock); 2176 UNLOCK(&sock->lock); 2177 return; 2178 } 2179 2180 /* 2181 * Check possible Windows network event error status here. 2182 */ 2183 if (connect_errno != 0) { 2184 /* 2185 * If the error is SOFT, just try again on this 2186 * fd and pretend nothing strange happened. 2187 */ 2188 if (SOFT_ERROR(connect_errno) || 2189 connect_errno == WSAEINPROGRESS) { 2190 sock->pending_connect = 1; 2191 CONSISTENT(sock); 2192 UNLOCK(&sock->lock); 2193 return; 2194 } 2195 2196 /* 2197 * Translate other errors into ISC_R_* flavors. 2198 */ 2199 switch (connect_errno) { 2200 #define ERROR_MATCH(a, b) case a: cdev->result = b; break; 2201 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM); 2202 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2203 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2204 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED); 2205 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); 2206 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN); 2207 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH); 2208 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN); 2209 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES); 2210 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET); 2211 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET); 2212 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT); 2213 #undef ERROR_MATCH 2214 default: 2215 cdev->result = ISC_R_UNEXPECTED; 2216 isc__strerror(connect_errno, strbuf, sizeof(strbuf)); 2217 UNEXPECTED_ERROR(__FILE__, __LINE__, 2218 "internal_connect: connect() %s", 2219 strbuf); 2220 } 2221 } else { 2222 INSIST(setsockopt(sock->fd, SOL_SOCKET, 2223 SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0); 2224 cdev->result = ISC_R_SUCCESS; 2225 sock->connected = 1; 2226 socket_log(__LINE__, sock, &sock->address, IOEVENT, 2227 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2228 "internal_connect: success"); 2229 } 2230 2231 send_connectdone_event(sock, &cdev); 2232 2233 UNLOCK(&sock->lock); 2234 } 2235 2236 /* 2237 * Loop through the socket, returning ISC_R_EOF for each done event pending. 2238 */ 2239 static void 2240 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) { 2241 isc_socketevent_t *dev; 2242 2243 while (!ISC_LIST_EMPTY(sock->recv_list)) { 2244 dev = ISC_LIST_HEAD(sock->recv_list); 2245 dev->result = result; 2246 send_recvdone_event(sock, &dev); 2247 } 2248 } 2249 2250 /* 2251 * Take the data we received in our private buffer, and if any recv() calls on 2252 * our list are satisfied, send the corresponding done event. 2253 * 2254 * If we need more data (there are still items on the recv_list after we consume all 2255 * our data) then arrange for another system recv() call to fill our buffers. 2256 */ 2257 static void 2258 internal_recv(isc_socket_t *sock, int nbytes) 2259 { 2260 INSIST(VALID_SOCKET(sock)); 2261 2262 LOCK(&sock->lock); 2263 CONSISTENT(sock); 2264 2265 socket_log(__LINE__, sock, NULL, IOEVENT, 2266 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, 2267 "internal_recv: %d bytes received", nbytes); 2268 2269 /* 2270 * If we got here, the I/O operation succeeded. However, we might still have removed this 2271 * event from our notification list (or never placed it on it due to immediate completion.) 2272 * Handle the reference counting here, and handle the cancellation event just after. 2273 */ 2274 INSIST(sock->pending_iocp > 0); 2275 sock->pending_iocp--; 2276 INSIST(sock->pending_recv > 0); 2277 sock->pending_recv--; 2278 2279 /* 2280 * The only way we could have gotten here is that our I/O has successfully completed. 2281 * Update our pointers, and move on. The only odd case here is that we might not 2282 * have received enough data on a TCP stream to satisfy the minimum requirements. If 2283 * this is the case, we will re-issue the recv() call for what we need. 2284 * 2285 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end 2286 * has closed. 2287 */ 2288 if (nbytes == 0 && sock->type == isc_sockettype_tcp) { 2289 send_recvdone_abort(sock, ISC_R_EOF); 2290 maybe_free_socket(&sock, __LINE__); 2291 return; 2292 } 2293 sock->recvbuf.remaining = nbytes; 2294 sock->recvbuf.consume_position = sock->recvbuf.base; 2295 completeio_recv(sock); 2296 2297 /* 2298 * If there are more receivers waiting for data, queue another receive 2299 * here. 2300 */ 2301 queue_receive_request(sock); 2302 2303 /* 2304 * Unlock and/or destroy if we are the last thing this socket has left to do. 2305 */ 2306 maybe_free_socket(&sock, __LINE__); 2307 } 2308 2309 static void 2310 internal_send(isc_socket_t *sock, isc_socketevent_t *dev, 2311 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo) 2312 { 2313 buflist_t *buffer; 2314 2315 /* 2316 * Find out what socket this is and lock it. 2317 */ 2318 INSIST(VALID_SOCKET(sock)); 2319 2320 LOCK(&sock->lock); 2321 CONSISTENT(sock); 2322 2323 socket_log(__LINE__, sock, NULL, IOEVENT, 2324 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, 2325 "internal_send: task got socket event %p", dev); 2326 2327 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2328 while (buffer != NULL) { 2329 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link); 2330 2331 socket_log(__LINE__, sock, NULL, TRACE, 2332 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 2333 "free_buffer %p %p", buffer, buffer->buf); 2334 2335 HeapFree(hHeapHandle, 0, buffer->buf); 2336 HeapFree(hHeapHandle, 0, buffer); 2337 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2338 } 2339 2340 INSIST(sock->pending_iocp > 0); 2341 sock->pending_iocp--; 2342 INSIST(sock->pending_send > 0); 2343 sock->pending_send--; 2344 2345 /* If the event is no longer in the list we can just return */ 2346 if (!senddone_is_active(sock, dev)) 2347 goto done; 2348 2349 /* 2350 * Set the error code and send things on its way. 2351 */ 2352 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) { 2353 case DOIO_SOFT: 2354 break; 2355 case DOIO_HARD: 2356 case DOIO_SUCCESS: 2357 send_senddone_event(sock, &dev); 2358 break; 2359 } 2360 2361 done: 2362 maybe_free_socket(&sock, __LINE__); 2363 } 2364 2365 /* 2366 * These return if the done event passed in is on the list (or for connect, is 2367 * the one we're waiting for. Using these ensures we will not double-send an 2368 * event. 2369 */ 2370 static isc_boolean_t 2371 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev) 2372 { 2373 isc_socketevent_t *ldev; 2374 2375 ldev = ISC_LIST_HEAD(sock->send_list); 2376 while (ldev != NULL && ldev != dev) 2377 ldev = ISC_LIST_NEXT(ldev, ev_link); 2378 2379 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2380 } 2381 2382 static isc_boolean_t 2383 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev) 2384 { 2385 isc_socket_newconnev_t *ldev; 2386 2387 ldev = ISC_LIST_HEAD(sock->accept_list); 2388 while (ldev != NULL && ldev != dev) 2389 ldev = ISC_LIST_NEXT(ldev, ev_link); 2390 2391 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2392 } 2393 2394 static isc_boolean_t 2395 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev) 2396 { 2397 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE); 2398 } 2399 2400 // 2401 // The Windows network stack seems to have two very distinct paths depending 2402 // on what is installed. Specifically, if something is looking at network 2403 // connections (like an anti-virus or anti-malware application, such as 2404 // McAfee products) Windows may return additional error conditions which 2405 // were not previously returned. 2406 // 2407 // One specific one is when a TCP SYN scan is used. In this situation, 2408 // Windows responds with the SYN-ACK, but the scanner never responds with 2409 // the 3rd packet, the ACK. Windows consiers this a partially open connection. 2410 // Most Unix networking stacks, and Windows without McAfee installed, will 2411 // not return this to the caller. However, with this product installed, 2412 // Windows returns this as a failed status on the Accept() call. Here, we 2413 // will just re-issue the ISCAcceptEx() call as if nothing had happened. 2414 // 2415 // This code should only be called when the listening socket has received 2416 // such an error. Additionally, the "parent" socket must be locked. 2417 // Additionally, the lpo argument is re-used here, and must not be freed 2418 // by the caller. 2419 // 2420 static isc_result_t 2421 restart_accept(isc_socket_t *parent, IoCompletionInfo *lpo) 2422 { 2423 isc_socket_t *nsock = lpo->adev->newsocket; 2424 SOCKET new_fd; 2425 2426 /* 2427 * AcceptEx() requires we pass in a socket. Note that we carefully 2428 * do not close the previous socket in case of an error message returned by 2429 * our new socket() call. If we return an error here, our caller will 2430 * clean up. 2431 */ 2432 new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP); 2433 if (nsock->fd == INVALID_SOCKET) { 2434 return (ISC_R_FAILURE); // parent will ask windows for error message 2435 } 2436 closesocket(nsock->fd); 2437 nsock->fd = new_fd; 2438 2439 memset(&lpo->overlapped, 0, sizeof(lpo->overlapped)); 2440 2441 ISCAcceptEx(parent->fd, 2442 nsock->fd, /* Accepted Socket */ 2443 lpo->acceptbuffer, /* Buffer for initial Recv */ 2444 0, /* Length of Buffer */ 2445 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */ 2446 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */ 2447 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */ 2448 (LPOVERLAPPED)lpo /* Overlapped structure */ 2449 ); 2450 2451 InterlockedDecrement(&nsock->manager->iocp_total); 2452 iocompletionport_update(nsock); 2453 2454 return (ISC_R_SUCCESS); 2455 } 2456 2457 /* 2458 * This is the I/O Completion Port Worker Function. It loops forever 2459 * waiting for I/O to complete and then forwards them for further 2460 * processing. There are a number of these in separate threads. 2461 */ 2462 static isc_threadresult_t WINAPI 2463 SocketIoThread(LPVOID ThreadContext) { 2464 isc_socketmgr_t *manager = ThreadContext; 2465 BOOL bSuccess = FALSE; 2466 DWORD nbytes; 2467 IoCompletionInfo *lpo = NULL; 2468 isc_socket_t *sock = NULL; 2469 int request; 2470 struct msghdr *messagehdr = NULL; 2471 int errval; 2472 char strbuf[ISC_STRERRORSIZE]; 2473 int errstatus; 2474 2475 REQUIRE(VALID_MANAGER(manager)); 2476 2477 /* 2478 * Set the thread priority high enough so I/O will 2479 * preempt normal recv packet processing, but not 2480 * higher than the timer sync thread. 2481 */ 2482 if (!SetThreadPriority(GetCurrentThread(), 2483 THREAD_PRIORITY_ABOVE_NORMAL)) { 2484 errval = GetLastError(); 2485 isc__strerror(errval, strbuf, sizeof(strbuf)); 2486 FATAL_ERROR(__FILE__, __LINE__, 2487 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2488 ISC_MSG_FAILED, 2489 "Can't set thread priority: %s"), 2490 strbuf); 2491 } 2492 2493 /* 2494 * Loop forever waiting on I/O Completions and then processing them 2495 */ 2496 while (TRUE) { 2497 wait_again: 2498 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort, 2499 &nbytes, 2500 (PULONG_PTR)&sock, 2501 (LPWSAOVERLAPPED *)&lpo, 2502 INFINITE); 2503 if (lpo == NULL) /* Received request to exit */ 2504 break; 2505 2506 REQUIRE(VALID_SOCKET(sock)); 2507 2508 request = lpo->request_type; 2509 2510 errstatus = 0; 2511 if (!bSuccess) { 2512 isc_result_t isc_result; 2513 2514 /* 2515 * Did the I/O operation complete? 2516 */ 2517 errstatus = GetLastError(); 2518 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__); 2519 2520 LOCK(&sock->lock); 2521 CONSISTENT(sock); 2522 switch (request) { 2523 case SOCKET_RECV: 2524 INSIST(sock->pending_iocp > 0); 2525 sock->pending_iocp--; 2526 INSIST(sock->pending_recv > 0); 2527 sock->pending_recv--; 2528 if (!sock->connected && 2529 ((errstatus == ERROR_HOST_UNREACHABLE) || 2530 (errstatus == WSAENETRESET) || 2531 (errstatus == WSAECONNRESET))) { 2532 /* ignore soft errors */ 2533 queue_receive_request(sock); 2534 break; 2535 } 2536 send_recvdone_abort(sock, isc_result); 2537 if (isc_result == ISC_R_UNEXPECTED) { 2538 UNEXPECTED_ERROR(__FILE__, __LINE__, 2539 "SOCKET_RECV: Windows error code: %d, returning ISC error %d", 2540 errstatus, isc_result); 2541 } 2542 break; 2543 2544 case SOCKET_SEND: 2545 INSIST(sock->pending_iocp > 0); 2546 sock->pending_iocp--; 2547 INSIST(sock->pending_send > 0); 2548 sock->pending_send--; 2549 if (senddone_is_active(sock, lpo->dev)) { 2550 lpo->dev->result = isc_result; 2551 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2552 "canceled_send"); 2553 send_senddone_event(sock, &lpo->dev); 2554 } 2555 break; 2556 2557 case SOCKET_ACCEPT: 2558 INSIST(sock->pending_iocp > 0); 2559 INSIST(sock->pending_accept > 0); 2560 2561 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2562 "Accept: errstatus=%d isc_result=%d", errstatus, isc_result); 2563 2564 if (acceptdone_is_active(sock, lpo->adev)) { 2565 if (restart_accept(sock, lpo) == ISC_R_SUCCESS) { 2566 UNLOCK(&sock->lock); 2567 goto wait_again; 2568 } else { 2569 errstatus = GetLastError(); 2570 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__); 2571 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2572 "restart_accept() failed: errstatus=%d isc_result=%d", 2573 errstatus, isc_result); 2574 } 2575 } 2576 2577 sock->pending_iocp--; 2578 sock->pending_accept--; 2579 if (acceptdone_is_active(sock, lpo->adev)) { 2580 closesocket(lpo->adev->newsocket->fd); 2581 lpo->adev->newsocket->fd = INVALID_SOCKET; 2582 lpo->adev->newsocket->references--; 2583 free_socket(&lpo->adev->newsocket, __LINE__); 2584 lpo->adev->result = isc_result; 2585 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2586 "canceled_accept"); 2587 send_acceptdone_event(sock, &lpo->adev); 2588 } 2589 break; 2590 2591 case SOCKET_CONNECT: 2592 INSIST(sock->pending_iocp > 0); 2593 sock->pending_iocp--; 2594 INSIST(sock->pending_connect == 1); 2595 sock->pending_connect = 0; 2596 if (connectdone_is_active(sock, lpo->cdev)) { 2597 lpo->cdev->result = isc_result; 2598 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2599 "canceled_connect"); 2600 send_connectdone_event(sock, &lpo->cdev); 2601 } 2602 break; 2603 } 2604 maybe_free_socket(&sock, __LINE__); 2605 2606 if (lpo != NULL) 2607 HeapFree(hHeapHandle, 0, lpo); 2608 continue; 2609 } 2610 2611 messagehdr = &lpo->messagehdr; 2612 2613 switch (request) { 2614 case SOCKET_RECV: 2615 internal_recv(sock, nbytes); 2616 break; 2617 case SOCKET_SEND: 2618 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo); 2619 break; 2620 case SOCKET_ACCEPT: 2621 internal_accept(sock, lpo, errstatus); 2622 break; 2623 case SOCKET_CONNECT: 2624 internal_connect(sock, lpo, errstatus); 2625 break; 2626 } 2627 2628 if (lpo != NULL) 2629 HeapFree(hHeapHandle, 0, lpo); 2630 } 2631 2632 /* 2633 * Exit Completion Port Thread 2634 */ 2635 manager_log(manager, TRACE, 2636 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2637 ISC_MSG_EXITING, "SocketIoThread exiting")); 2638 return ((isc_threadresult_t)0); 2639 } 2640 2641 /* 2642 * Create a new socket manager. 2643 */ 2644 isc_result_t 2645 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { 2646 return (isc_socketmgr_create2(mctx, managerp, 0)); 2647 } 2648 2649 isc_result_t 2650 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, 2651 unsigned int maxsocks) 2652 { 2653 isc_socketmgr_t *manager; 2654 isc_result_t result; 2655 2656 REQUIRE(managerp != NULL && *managerp == NULL); 2657 2658 if (maxsocks != 0) 2659 return (ISC_R_NOTIMPLEMENTED); 2660 2661 manager = isc_mem_get(mctx, sizeof(*manager)); 2662 if (manager == NULL) 2663 return (ISC_R_NOMEMORY); 2664 2665 InitSockets(); 2666 2667 manager->magic = SOCKET_MANAGER_MAGIC; 2668 manager->mctx = NULL; 2669 manager->stats = NULL; 2670 ISC_LIST_INIT(manager->socklist); 2671 result = isc_mutex_init(&manager->lock); 2672 if (result != ISC_R_SUCCESS) { 2673 isc_mem_put(mctx, manager, sizeof(*manager)); 2674 return (result); 2675 } 2676 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { 2677 DESTROYLOCK(&manager->lock); 2678 isc_mem_put(mctx, manager, sizeof(*manager)); 2679 UNEXPECTED_ERROR(__FILE__, __LINE__, 2680 "isc_condition_init() %s", 2681 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2682 ISC_MSG_FAILED, "failed")); 2683 return (ISC_R_UNEXPECTED); 2684 } 2685 2686 isc_mem_attach(mctx, &manager->mctx); 2687 2688 iocompletionport_init(manager); /* Create the Completion Ports */ 2689 2690 manager->bShutdown = ISC_FALSE; 2691 manager->totalSockets = 0; 2692 manager->iocp_total = 0; 2693 2694 *managerp = manager; 2695 2696 return (ISC_R_SUCCESS); 2697 } 2698 2699 isc_result_t 2700 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { 2701 REQUIRE(VALID_MANAGER(manager)); 2702 REQUIRE(nsockp != NULL); 2703 2704 return (ISC_R_NOTIMPLEMENTED); 2705 } 2706 2707 void 2708 isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) { 2709 REQUIRE(VALID_MANAGER(manager)); 2710 REQUIRE(ISC_LIST_EMPTY(manager->socklist)); 2711 REQUIRE(manager->stats == NULL); 2712 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); 2713 2714 isc_stats_attach(stats, &manager->stats); 2715 } 2716 2717 void 2718 isc__socketmgr_destroy(isc_socketmgr_t **managerp) { 2719 isc_socketmgr_t *manager; 2720 int i; 2721 isc_mem_t *mctx; 2722 2723 /* 2724 * Destroy a socket manager. 2725 */ 2726 2727 REQUIRE(managerp != NULL); 2728 manager = *managerp; 2729 REQUIRE(VALID_MANAGER(manager)); 2730 2731 LOCK(&manager->lock); 2732 2733 /* 2734 * Wait for all sockets to be destroyed. 2735 */ 2736 while (!ISC_LIST_EMPTY(manager->socklist)) { 2737 manager_log(manager, CREATION, 2738 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2739 ISC_MSG_SOCKETSREMAIN, 2740 "sockets exist")); 2741 WAIT(&manager->shutdown_ok, &manager->lock); 2742 } 2743 2744 UNLOCK(&manager->lock); 2745 2746 /* 2747 * Here, we need to had some wait code for the completion port 2748 * thread. 2749 */ 2750 signal_iocompletionport_exit(manager); 2751 manager->bShutdown = ISC_TRUE; 2752 2753 /* 2754 * Wait for threads to exit. 2755 */ 2756 for (i = 0; i < manager->maxIOCPThreads; i++) { 2757 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], 2758 NULL) != ISC_R_SUCCESS) 2759 UNEXPECTED_ERROR(__FILE__, __LINE__, 2760 "isc_thread_join() for Completion Port %s", 2761 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2762 ISC_MSG_FAILED, "failed")); 2763 } 2764 /* 2765 * Clean up. 2766 */ 2767 2768 CloseHandle(manager->hIoCompletionPort); 2769 2770 (void)isc_condition_destroy(&manager->shutdown_ok); 2771 2772 DESTROYLOCK(&manager->lock); 2773 if (manager->stats != NULL) 2774 isc_stats_detach(&manager->stats); 2775 manager->magic = 0; 2776 mctx= manager->mctx; 2777 isc_mem_put(mctx, manager, sizeof(*manager)); 2778 2779 isc_mem_detach(&mctx); 2780 2781 *managerp = NULL; 2782 } 2783 2784 static void 2785 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev) 2786 { 2787 isc_task_t *ntask = NULL; 2788 2789 isc_task_attach(task, &ntask); 2790 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2791 2792 /* 2793 * Enqueue the request. 2794 */ 2795 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 2796 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2797 2798 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2799 "queue_receive_event: event %p -> task %p", 2800 dev, ntask); 2801 } 2802 2803 /* 2804 * Check the pending receive queue, and if we have data pending, give it to this 2805 * caller. If we have none, queue an I/O request. If this caller is not the first 2806 * on the list, then we will just queue this event and return. 2807 * 2808 * Caller must have the socket locked. 2809 */ 2810 static isc_result_t 2811 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2812 unsigned int flags) 2813 { 2814 isc_result_t result = ISC_R_SUCCESS; 2815 2816 dev->ev_sender = task; 2817 2818 if (sock->fd == INVALID_SOCKET) 2819 return (ISC_R_EOF); 2820 2821 /* 2822 * Queue our event on the list of things to do. Call our function to 2823 * attempt to fill buffers as much as possible, and return done events. 2824 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE 2825 * here and tell our caller that we could not satisfy it immediately. 2826 */ 2827 queue_receive_event(sock, task, dev); 2828 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2829 result = ISC_R_INPROGRESS; 2830 2831 completeio_recv(sock); 2832 2833 /* 2834 * If there are more receivers waiting for data, queue another receive 2835 * here. If the 2836 */ 2837 queue_receive_request(sock); 2838 2839 return (result); 2840 } 2841 2842 isc_result_t 2843 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2844 unsigned int minimum, isc_task_t *task, 2845 isc_taskaction_t action, void *arg) 2846 { 2847 isc_socketevent_t *dev; 2848 isc_socketmgr_t *manager; 2849 unsigned int iocount; 2850 isc_buffer_t *buffer; 2851 isc_result_t ret; 2852 2853 REQUIRE(VALID_SOCKET(sock)); 2854 LOCK(&sock->lock); 2855 CONSISTENT(sock); 2856 2857 /* 2858 * Make sure that the socket is not closed. XXXMLG change error here? 2859 */ 2860 if (sock->fd == INVALID_SOCKET) { 2861 UNLOCK(&sock->lock); 2862 return (ISC_R_CONNREFUSED); 2863 } 2864 2865 REQUIRE(buflist != NULL); 2866 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2867 REQUIRE(task != NULL); 2868 REQUIRE(action != NULL); 2869 2870 manager = sock->manager; 2871 REQUIRE(VALID_MANAGER(manager)); 2872 2873 iocount = isc_bufferlist_availablecount(buflist); 2874 REQUIRE(iocount > 0); 2875 2876 INSIST(sock->bound); 2877 2878 dev = allocate_socketevent(manager->mctx, sock, 2879 ISC_SOCKEVENT_RECVDONE, action, arg); 2880 if (dev == NULL) { 2881 UNLOCK(&sock->lock); 2882 return (ISC_R_NOMEMORY); 2883 } 2884 2885 /* 2886 * UDP sockets are always partial read 2887 */ 2888 if (sock->type == isc_sockettype_udp) 2889 dev->minimum = 1; 2890 else { 2891 if (minimum == 0) 2892 dev->minimum = iocount; 2893 else 2894 dev->minimum = minimum; 2895 } 2896 2897 /* 2898 * Move each buffer from the passed in list to our internal one. 2899 */ 2900 buffer = ISC_LIST_HEAD(*buflist); 2901 while (buffer != NULL) { 2902 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2903 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2904 buffer = ISC_LIST_HEAD(*buflist); 2905 } 2906 2907 ret = socket_recv(sock, dev, task, 0); 2908 2909 UNLOCK(&sock->lock); 2910 return (ret); 2911 } 2912 2913 isc_result_t 2914 isc__socket_recv(isc_socket_t *sock, isc_region_t *region, 2915 unsigned int minimum, isc_task_t *task, 2916 isc_taskaction_t action, void *arg) 2917 { 2918 isc_socketevent_t *dev; 2919 isc_socketmgr_t *manager; 2920 isc_result_t ret; 2921 2922 REQUIRE(VALID_SOCKET(sock)); 2923 LOCK(&sock->lock); 2924 CONSISTENT(sock); 2925 2926 /* 2927 * make sure that the socket's not closed 2928 */ 2929 if (sock->fd == INVALID_SOCKET) { 2930 UNLOCK(&sock->lock); 2931 return (ISC_R_CONNREFUSED); 2932 } 2933 REQUIRE(action != NULL); 2934 2935 manager = sock->manager; 2936 REQUIRE(VALID_MANAGER(manager)); 2937 2938 INSIST(sock->bound); 2939 2940 dev = allocate_socketevent(manager->mctx, sock, 2941 ISC_SOCKEVENT_RECVDONE, action, arg); 2942 if (dev == NULL) { 2943 UNLOCK(&sock->lock); 2944 return (ISC_R_NOMEMORY); 2945 } 2946 2947 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0); 2948 UNLOCK(&sock->lock); 2949 return (ret); 2950 } 2951 2952 isc_result_t 2953 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region, 2954 unsigned int minimum, isc_task_t *task, 2955 isc_socketevent_t *event, unsigned int flags) 2956 { 2957 isc_result_t ret; 2958 2959 REQUIRE(VALID_SOCKET(sock)); 2960 LOCK(&sock->lock); 2961 CONSISTENT(sock); 2962 2963 event->result = ISC_R_UNEXPECTED; 2964 event->ev_sender = sock; 2965 /* 2966 * make sure that the socket's not closed 2967 */ 2968 if (sock->fd == INVALID_SOCKET) { 2969 UNLOCK(&sock->lock); 2970 return (ISC_R_CONNREFUSED); 2971 } 2972 2973 ISC_LIST_INIT(event->bufferlist); 2974 event->region = *region; 2975 event->n = 0; 2976 event->offset = 0; 2977 event->attributes = 0; 2978 2979 /* 2980 * UDP sockets are always partial read. 2981 */ 2982 if (sock->type == isc_sockettype_udp) 2983 event->minimum = 1; 2984 else { 2985 if (minimum == 0) 2986 event->minimum = region->length; 2987 else 2988 event->minimum = minimum; 2989 } 2990 2991 ret = socket_recv(sock, event, task, flags); 2992 UNLOCK(&sock->lock); 2993 return (ret); 2994 } 2995 2996 /* 2997 * Caller must have the socket locked. 2998 */ 2999 static isc_result_t 3000 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 3001 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 3002 unsigned int flags) 3003 { 3004 int io_state; 3005 int send_errno = 0; 3006 int cc = 0; 3007 isc_task_t *ntask = NULL; 3008 isc_result_t result = ISC_R_SUCCESS; 3009 3010 dev->ev_sender = task; 3011 3012 set_dev_address(address, sock, dev); 3013 if (pktinfo != NULL) { 3014 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, 3015 ISC_MSG_PKTINFOPROVIDED, 3016 "pktinfo structure provided, ifindex %u (set to 0)", 3017 pktinfo->ipi6_ifindex); 3018 3019 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 3020 dev->pktinfo = *pktinfo; 3021 /* 3022 * Set the pktinfo index to 0 here, to let the kernel decide 3023 * what interface it should send on. 3024 */ 3025 dev->pktinfo.ipi6_ifindex = 0; 3026 } 3027 3028 io_state = startio_send(sock, dev, &cc, &send_errno); 3029 switch (io_state) { 3030 case DOIO_PENDING: /* I/O started. Enqueue completion event. */ 3031 case DOIO_SOFT: 3032 /* 3033 * We couldn't send all or part of the request right now, so 3034 * queue it unless ISC_SOCKFLAG_NORETRY is set. 3035 */ 3036 if ((flags & ISC_SOCKFLAG_NORETRY) == 0 || 3037 io_state == DOIO_PENDING) { 3038 isc_task_attach(task, &ntask); 3039 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 3040 3041 /* 3042 * Enqueue the request. 3043 */ 3044 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 3045 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 3046 3047 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 3048 "socket_send: event %p -> task %p", 3049 dev, ntask); 3050 3051 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 3052 result = ISC_R_INPROGRESS; 3053 break; 3054 } 3055 3056 case DOIO_SUCCESS: 3057 break; 3058 } 3059 3060 return (result); 3061 } 3062 3063 isc_result_t 3064 isc__socket_send(isc_socket_t *sock, isc_region_t *region, 3065 isc_task_t *task, isc_taskaction_t action, void *arg) 3066 { 3067 /* 3068 * REQUIRE() checking is performed in isc_socket_sendto(). 3069 */ 3070 return (isc_socket_sendto(sock, region, task, action, arg, NULL, 3071 NULL)); 3072 } 3073 3074 isc_result_t 3075 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region, 3076 isc_task_t *task, isc_taskaction_t action, void *arg, 3077 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 3078 { 3079 isc_socketevent_t *dev; 3080 isc_socketmgr_t *manager; 3081 isc_result_t ret; 3082 3083 REQUIRE(VALID_SOCKET(sock)); 3084 REQUIRE(sock->type != isc_sockettype_fdwatch); 3085 3086 LOCK(&sock->lock); 3087 CONSISTENT(sock); 3088 3089 /* 3090 * make sure that the socket's not closed 3091 */ 3092 if (sock->fd == INVALID_SOCKET) { 3093 UNLOCK(&sock->lock); 3094 return (ISC_R_CONNREFUSED); 3095 } 3096 REQUIRE(region != NULL); 3097 REQUIRE(task != NULL); 3098 REQUIRE(action != NULL); 3099 3100 manager = sock->manager; 3101 REQUIRE(VALID_MANAGER(manager)); 3102 3103 INSIST(sock->bound); 3104 3105 dev = allocate_socketevent(manager->mctx, sock, 3106 ISC_SOCKEVENT_SENDDONE, action, arg); 3107 if (dev == NULL) { 3108 UNLOCK(&sock->lock); 3109 return (ISC_R_NOMEMORY); 3110 } 3111 dev->region = *region; 3112 3113 ret = socket_send(sock, dev, task, address, pktinfo, 0); 3114 UNLOCK(&sock->lock); 3115 return (ret); 3116 } 3117 3118 isc_result_t 3119 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 3120 isc_task_t *task, isc_taskaction_t action, void *arg) 3121 { 3122 return (isc_socket_sendtov2(sock, buflist, task, action, arg, NULL, 3123 NULL, 0)); 3124 } 3125 3126 isc_result_t 3127 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, 3128 isc_task_t *task, isc_taskaction_t action, void *arg, 3129 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 3130 { 3131 return (isc_socket_sendtov2(sock, buflist, task, action, arg, address, 3132 pktinfo, 0)); 3133 } 3134 3135 isc_result_t 3136 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist, 3137 isc_task_t *task, isc_taskaction_t action, void *arg, 3138 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 3139 unsigned int flags) 3140 { 3141 isc_socketevent_t *dev; 3142 isc_socketmgr_t *manager; 3143 unsigned int iocount; 3144 isc_buffer_t *buffer; 3145 isc_result_t ret; 3146 3147 REQUIRE(VALID_SOCKET(sock)); 3148 3149 LOCK(&sock->lock); 3150 CONSISTENT(sock); 3151 3152 /* 3153 * make sure that the socket's not closed 3154 */ 3155 if (sock->fd == INVALID_SOCKET) { 3156 UNLOCK(&sock->lock); 3157 return (ISC_R_CONNREFUSED); 3158 } 3159 REQUIRE(buflist != NULL); 3160 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 3161 REQUIRE(task != NULL); 3162 REQUIRE(action != NULL); 3163 3164 manager = sock->manager; 3165 REQUIRE(VALID_MANAGER(manager)); 3166 3167 iocount = isc_bufferlist_usedcount(buflist); 3168 REQUIRE(iocount > 0); 3169 3170 dev = allocate_socketevent(manager->mctx, sock, 3171 ISC_SOCKEVENT_SENDDONE, action, arg); 3172 if (dev == NULL) { 3173 UNLOCK(&sock->lock); 3174 return (ISC_R_NOMEMORY); 3175 } 3176 3177 /* 3178 * Move each buffer from the passed in list to our internal one. 3179 */ 3180 buffer = ISC_LIST_HEAD(*buflist); 3181 while (buffer != NULL) { 3182 ISC_LIST_DEQUEUE(*buflist, buffer, link); 3183 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 3184 buffer = ISC_LIST_HEAD(*buflist); 3185 } 3186 3187 ret = socket_send(sock, dev, task, address, pktinfo, flags); 3188 UNLOCK(&sock->lock); 3189 return (ret); 3190 } 3191 3192 isc_result_t 3193 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region, 3194 isc_task_t *task, 3195 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 3196 isc_socketevent_t *event, unsigned int flags) 3197 { 3198 isc_result_t ret; 3199 3200 REQUIRE(VALID_SOCKET(sock)); 3201 LOCK(&sock->lock); 3202 CONSISTENT(sock); 3203 3204 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0); 3205 if ((flags & ISC_SOCKFLAG_NORETRY) != 0) 3206 REQUIRE(sock->type == isc_sockettype_udp); 3207 event->ev_sender = sock; 3208 event->result = ISC_R_UNEXPECTED; 3209 /* 3210 * make sure that the socket's not closed 3211 */ 3212 if (sock->fd == INVALID_SOCKET) { 3213 UNLOCK(&sock->lock); 3214 return (ISC_R_CONNREFUSED); 3215 } 3216 ISC_LIST_INIT(event->bufferlist); 3217 event->region = *region; 3218 event->n = 0; 3219 event->offset = 0; 3220 event->attributes = 0; 3221 3222 ret = socket_send(sock, event, task, address, pktinfo, flags); 3223 UNLOCK(&sock->lock); 3224 return (ret); 3225 } 3226 3227 isc_result_t 3228 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 3229 unsigned int options) { 3230 int bind_errno; 3231 char strbuf[ISC_STRERRORSIZE]; 3232 int on = 1; 3233 3234 REQUIRE(VALID_SOCKET(sock)); 3235 LOCK(&sock->lock); 3236 CONSISTENT(sock); 3237 3238 /* 3239 * make sure that the socket's not closed 3240 */ 3241 if (sock->fd == INVALID_SOCKET) { 3242 UNLOCK(&sock->lock); 3243 return (ISC_R_CONNREFUSED); 3244 } 3245 3246 INSIST(!sock->bound); 3247 INSIST(!sock->dupped); 3248 3249 if (sock->pf != sockaddr->type.sa.sa_family) { 3250 UNLOCK(&sock->lock); 3251 return (ISC_R_FAMILYMISMATCH); 3252 } 3253 /* 3254 * Only set SO_REUSEADDR when we want a specific port. 3255 */ 3256 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 3257 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 3258 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, 3259 sizeof(on)) < 0) { 3260 UNEXPECTED_ERROR(__FILE__, __LINE__, 3261 "setsockopt(%d) %s", sock->fd, 3262 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3263 ISC_MSG_FAILED, "failed")); 3264 /* Press on... */ 3265 } 3266 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 3267 bind_errno = WSAGetLastError(); 3268 UNLOCK(&sock->lock); 3269 switch (bind_errno) { 3270 case WSAEACCES: 3271 return (ISC_R_NOPERM); 3272 case WSAEADDRNOTAVAIL: 3273 return (ISC_R_ADDRNOTAVAIL); 3274 case WSAEADDRINUSE: 3275 return (ISC_R_ADDRINUSE); 3276 case WSAEINVAL: 3277 return (ISC_R_BOUND); 3278 default: 3279 isc__strerror(bind_errno, strbuf, sizeof(strbuf)); 3280 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 3281 strbuf); 3282 return (ISC_R_UNEXPECTED); 3283 } 3284 } 3285 3286 socket_log(__LINE__, sock, sockaddr, TRACE, 3287 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound"); 3288 sock->bound = 1; 3289 3290 UNLOCK(&sock->lock); 3291 return (ISC_R_SUCCESS); 3292 } 3293 3294 isc_result_t 3295 isc__socket_filter(isc_socket_t *sock, const char *filter) { 3296 UNUSED(sock); 3297 UNUSED(filter); 3298 3299 REQUIRE(VALID_SOCKET(sock)); 3300 return (ISC_R_NOTIMPLEMENTED); 3301 } 3302 3303 /* 3304 * Set up to listen on a given socket. We do this by creating an internal 3305 * event that will be dispatched when the socket has read activity. The 3306 * watcher will send the internal event to the task when there is a new 3307 * connection. 3308 * 3309 * Unlike in read, we don't preallocate a done event here. Every time there 3310 * is a new connection we'll have to allocate a new one anyway, so we might 3311 * as well keep things simple rather than having to track them. 3312 */ 3313 isc_result_t 3314 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) { 3315 char strbuf[ISC_STRERRORSIZE]; 3316 3317 REQUIRE(VALID_SOCKET(sock)); 3318 3319 LOCK(&sock->lock); 3320 CONSISTENT(sock); 3321 3322 /* 3323 * make sure that the socket's not closed 3324 */ 3325 if (sock->fd == INVALID_SOCKET) { 3326 UNLOCK(&sock->lock); 3327 return (ISC_R_CONNREFUSED); 3328 } 3329 3330 REQUIRE(!sock->listener); 3331 REQUIRE(sock->bound); 3332 REQUIRE(sock->type == isc_sockettype_tcp); 3333 3334 if (backlog == 0) 3335 backlog = SOMAXCONN; 3336 3337 if (listen(sock->fd, (int)backlog) < 0) { 3338 UNLOCK(&sock->lock); 3339 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3340 3341 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); 3342 3343 return (ISC_R_UNEXPECTED); 3344 } 3345 3346 socket_log(__LINE__, sock, NULL, TRACE, 3347 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening"); 3348 sock->listener = 1; 3349 _set_state(sock, SOCK_LISTEN); 3350 3351 UNLOCK(&sock->lock); 3352 return (ISC_R_SUCCESS); 3353 } 3354 3355 /* 3356 * This should try to do aggressive accept() XXXMLG 3357 */ 3358 isc_result_t 3359 isc__socket_accept(isc_socket_t *sock, 3360 isc_task_t *task, isc_taskaction_t action, void *arg) 3361 { 3362 isc_socket_newconnev_t *adev; 3363 isc_socketmgr_t *manager; 3364 isc_task_t *ntask = NULL; 3365 isc_socket_t *nsock; 3366 isc_result_t result; 3367 IoCompletionInfo *lpo; 3368 3369 REQUIRE(VALID_SOCKET(sock)); 3370 3371 manager = sock->manager; 3372 REQUIRE(VALID_MANAGER(manager)); 3373 3374 LOCK(&sock->lock); 3375 CONSISTENT(sock); 3376 3377 /* 3378 * make sure that the socket's not closed 3379 */ 3380 if (sock->fd == INVALID_SOCKET) { 3381 UNLOCK(&sock->lock); 3382 return (ISC_R_CONNREFUSED); 3383 } 3384 3385 REQUIRE(sock->listener); 3386 3387 /* 3388 * Sender field is overloaded here with the task we will be sending 3389 * this event to. Just before the actual event is delivered the 3390 * actual ev_sender will be touched up to be the socket. 3391 */ 3392 adev = (isc_socket_newconnev_t *) 3393 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN, 3394 action, arg, sizeof(*adev)); 3395 if (adev == NULL) { 3396 UNLOCK(&sock->lock); 3397 return (ISC_R_NOMEMORY); 3398 } 3399 ISC_LINK_INIT(adev, ev_link); 3400 3401 result = allocate_socket(manager, sock->type, &nsock); 3402 if (result != ISC_R_SUCCESS) { 3403 isc_event_free((isc_event_t **)&adev); 3404 UNLOCK(&sock->lock); 3405 return (result); 3406 } 3407 3408 /* 3409 * AcceptEx() requires we pass in a socket. 3410 */ 3411 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 3412 if (nsock->fd == INVALID_SOCKET) { 3413 free_socket(&nsock, __LINE__); 3414 isc_event_free((isc_event_t **)&adev); 3415 UNLOCK(&sock->lock); 3416 return (ISC_R_FAILURE); // XXXMLG need real error message 3417 } 3418 3419 /* 3420 * Attach to socket and to task. 3421 */ 3422 isc_task_attach(task, &ntask); 3423 if (isc_task_exiting(ntask)) { 3424 free_socket(&nsock, __LINE__); 3425 isc_task_detach(&ntask); 3426 isc_event_free(ISC_EVENT_PTR(&adev)); 3427 UNLOCK(&sock->lock); 3428 return (ISC_R_SHUTTINGDOWN); 3429 } 3430 nsock->references++; 3431 3432 adev->ev_sender = ntask; 3433 adev->newsocket = nsock; 3434 _set_state(nsock, SOCK_ACCEPT); 3435 3436 /* 3437 * Queue io completion for an accept(). 3438 */ 3439 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3440 HEAP_ZERO_MEMORY, 3441 sizeof(IoCompletionInfo)); 3442 RUNTIME_CHECK(lpo != NULL); 3443 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, 3444 (sizeof(SOCKADDR_STORAGE) + 16) * 2); 3445 RUNTIME_CHECK(lpo->acceptbuffer != NULL); 3446 3447 lpo->adev = adev; 3448 lpo->request_type = SOCKET_ACCEPT; 3449 3450 ISCAcceptEx(sock->fd, 3451 nsock->fd, /* Accepted Socket */ 3452 lpo->acceptbuffer, /* Buffer for initial Recv */ 3453 0, /* Length of Buffer */ 3454 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */ 3455 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */ 3456 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */ 3457 (LPOVERLAPPED)lpo /* Overlapped structure */ 3458 ); 3459 iocompletionport_update(nsock); 3460 3461 socket_log(__LINE__, sock, NULL, TRACE, 3462 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, 3463 "accepting for nsock %p fd %d", nsock, nsock->fd); 3464 3465 /* 3466 * Enqueue the event 3467 */ 3468 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link); 3469 sock->pending_accept++; 3470 sock->pending_iocp++; 3471 3472 UNLOCK(&sock->lock); 3473 return (ISC_R_SUCCESS); 3474 } 3475 3476 isc_result_t 3477 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 3478 isc_task_t *task, isc_taskaction_t action, void *arg) 3479 { 3480 char strbuf[ISC_STRERRORSIZE]; 3481 isc_socket_connev_t *cdev; 3482 isc_task_t *ntask = NULL; 3483 isc_socketmgr_t *manager; 3484 IoCompletionInfo *lpo; 3485 int bind_errno; 3486 3487 REQUIRE(VALID_SOCKET(sock)); 3488 REQUIRE(addr != NULL); 3489 REQUIRE(task != NULL); 3490 REQUIRE(action != NULL); 3491 3492 manager = sock->manager; 3493 REQUIRE(VALID_MANAGER(manager)); 3494 REQUIRE(addr != NULL); 3495 3496 if (isc_sockaddr_ismulticast(addr)) 3497 return (ISC_R_MULTICAST); 3498 3499 LOCK(&sock->lock); 3500 CONSISTENT(sock); 3501 3502 /* 3503 * make sure that the socket's not closed 3504 */ 3505 if (sock->fd == INVALID_SOCKET) { 3506 UNLOCK(&sock->lock); 3507 return (ISC_R_CONNREFUSED); 3508 } 3509 3510 /* 3511 * Windows sockets won't connect unless the socket is bound. 3512 */ 3513 if (!sock->bound) { 3514 isc_sockaddr_t any; 3515 3516 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr)); 3517 if (bind(sock->fd, &any.type.sa, any.length) < 0) { 3518 bind_errno = WSAGetLastError(); 3519 UNLOCK(&sock->lock); 3520 switch (bind_errno) { 3521 case WSAEACCES: 3522 return (ISC_R_NOPERM); 3523 case WSAEADDRNOTAVAIL: 3524 return (ISC_R_ADDRNOTAVAIL); 3525 case WSAEADDRINUSE: 3526 return (ISC_R_ADDRINUSE); 3527 case WSAEINVAL: 3528 return (ISC_R_BOUND); 3529 default: 3530 isc__strerror(bind_errno, strbuf, 3531 sizeof(strbuf)); 3532 UNEXPECTED_ERROR(__FILE__, __LINE__, 3533 "bind: %s", strbuf); 3534 return (ISC_R_UNEXPECTED); 3535 } 3536 } 3537 sock->bound = 1; 3538 } 3539 3540 REQUIRE(!sock->pending_connect); 3541 3542 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, 3543 ISC_SOCKEVENT_CONNECT, 3544 action, arg, 3545 sizeof(*cdev)); 3546 if (cdev == NULL) { 3547 UNLOCK(&sock->lock); 3548 return (ISC_R_NOMEMORY); 3549 } 3550 ISC_LINK_INIT(cdev, ev_link); 3551 3552 if (sock->type == isc_sockettype_tcp) { 3553 /* 3554 * Queue io completion for an accept(). 3555 */ 3556 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3557 HEAP_ZERO_MEMORY, 3558 sizeof(IoCompletionInfo)); 3559 lpo->cdev = cdev; 3560 lpo->request_type = SOCKET_CONNECT; 3561 3562 sock->address = *addr; 3563 ISCConnectEx(sock->fd, &addr->type.sa, addr->length, 3564 NULL, 0, NULL, (LPOVERLAPPED)lpo); 3565 3566 /* 3567 * Attach to task. 3568 */ 3569 isc_task_attach(task, &ntask); 3570 cdev->ev_sender = ntask; 3571 3572 sock->pending_connect = 1; 3573 _set_state(sock, SOCK_CONNECT); 3574 3575 /* 3576 * Enqueue the request. 3577 */ 3578 sock->connect_ev = cdev; 3579 sock->pending_iocp++; 3580 } else { 3581 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL); 3582 cdev->result = ISC_R_SUCCESS; 3583 isc_task_send(task, (isc_event_t **)&cdev); 3584 } 3585 CONSISTENT(sock); 3586 UNLOCK(&sock->lock); 3587 3588 return (ISC_R_SUCCESS); 3589 } 3590 3591 isc_result_t 3592 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3593 isc_result_t result; 3594 3595 REQUIRE(VALID_SOCKET(sock)); 3596 REQUIRE(addressp != NULL); 3597 3598 LOCK(&sock->lock); 3599 CONSISTENT(sock); 3600 3601 /* 3602 * make sure that the socket's not closed 3603 */ 3604 if (sock->fd == INVALID_SOCKET) { 3605 UNLOCK(&sock->lock); 3606 return (ISC_R_CONNREFUSED); 3607 } 3608 3609 if (sock->connected) { 3610 *addressp = sock->address; 3611 result = ISC_R_SUCCESS; 3612 } else { 3613 result = ISC_R_NOTCONNECTED; 3614 } 3615 3616 UNLOCK(&sock->lock); 3617 3618 return (result); 3619 } 3620 3621 isc_result_t 3622 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3623 ISC_SOCKADDR_LEN_T len; 3624 isc_result_t result; 3625 char strbuf[ISC_STRERRORSIZE]; 3626 3627 REQUIRE(VALID_SOCKET(sock)); 3628 REQUIRE(addressp != NULL); 3629 3630 LOCK(&sock->lock); 3631 CONSISTENT(sock); 3632 3633 /* 3634 * make sure that the socket's not closed 3635 */ 3636 if (sock->fd == INVALID_SOCKET) { 3637 UNLOCK(&sock->lock); 3638 return (ISC_R_CONNREFUSED); 3639 } 3640 3641 if (!sock->bound) { 3642 result = ISC_R_NOTBOUND; 3643 goto out; 3644 } 3645 3646 result = ISC_R_SUCCESS; 3647 3648 len = sizeof(addressp->type); 3649 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { 3650 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3651 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", 3652 strbuf); 3653 result = ISC_R_UNEXPECTED; 3654 goto out; 3655 } 3656 addressp->length = (unsigned int)len; 3657 3658 out: 3659 UNLOCK(&sock->lock); 3660 3661 return (result); 3662 } 3663 3664 /* 3665 * Run through the list of events on this socket, and cancel the ones 3666 * queued for task "task" of type "how". "how" is a bitmask. 3667 */ 3668 void 3669 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { 3670 3671 REQUIRE(VALID_SOCKET(sock)); 3672 3673 /* 3674 * Quick exit if there is nothing to do. Don't even bother locking 3675 * in this case. 3676 */ 3677 if (how == 0) 3678 return; 3679 3680 LOCK(&sock->lock); 3681 CONSISTENT(sock); 3682 3683 /* 3684 * make sure that the socket's not closed 3685 */ 3686 if (sock->fd == INVALID_SOCKET) { 3687 UNLOCK(&sock->lock); 3688 return; 3689 } 3690 3691 /* 3692 * All of these do the same thing, more or less. 3693 * Each will: 3694 * o If the internal event is marked as "posted" try to 3695 * remove it from the task's queue. If this fails, mark it 3696 * as canceled instead, and let the task clean it up later. 3697 * o For each I/O request for that task of that type, post 3698 * its done event with status of "ISC_R_CANCELED". 3699 * o Reset any state needed. 3700 */ 3701 3702 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) { 3703 isc_socketevent_t *dev; 3704 isc_socketevent_t *next; 3705 isc_task_t *current_task; 3706 3707 dev = ISC_LIST_HEAD(sock->recv_list); 3708 while (dev != NULL) { 3709 current_task = dev->ev_sender; 3710 next = ISC_LIST_NEXT(dev, ev_link); 3711 if ((task == NULL) || (task == current_task)) { 3712 dev->result = ISC_R_CANCELED; 3713 send_recvdone_event(sock, &dev); 3714 } 3715 dev = next; 3716 } 3717 } 3718 how &= ~ISC_SOCKCANCEL_RECV; 3719 3720 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) { 3721 isc_socketevent_t *dev; 3722 isc_socketevent_t *next; 3723 isc_task_t *current_task; 3724 3725 dev = ISC_LIST_HEAD(sock->send_list); 3726 3727 while (dev != NULL) { 3728 current_task = dev->ev_sender; 3729 next = ISC_LIST_NEXT(dev, ev_link); 3730 if ((task == NULL) || (task == current_task)) { 3731 dev->result = ISC_R_CANCELED; 3732 send_senddone_event(sock, &dev); 3733 } 3734 dev = next; 3735 } 3736 } 3737 how &= ~ISC_SOCKCANCEL_SEND; 3738 3739 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT) 3740 && !ISC_LIST_EMPTY(sock->accept_list)) { 3741 isc_socket_newconnev_t *dev; 3742 isc_socket_newconnev_t *next; 3743 isc_task_t *current_task; 3744 3745 dev = ISC_LIST_HEAD(sock->accept_list); 3746 while (dev != NULL) { 3747 current_task = dev->ev_sender; 3748 next = ISC_LIST_NEXT(dev, ev_link); 3749 3750 if ((task == NULL) || (task == current_task)) { 3751 3752 dev->newsocket->references--; 3753 closesocket(dev->newsocket->fd); 3754 dev->newsocket->fd = INVALID_SOCKET; 3755 free_socket(&dev->newsocket, __LINE__); 3756 3757 dev->result = ISC_R_CANCELED; 3758 send_acceptdone_event(sock, &dev); 3759 } 3760 3761 dev = next; 3762 } 3763 } 3764 how &= ~ISC_SOCKCANCEL_ACCEPT; 3765 3766 /* 3767 * Connecting is not a list. 3768 */ 3769 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 3770 && sock->connect_ev != NULL) { 3771 isc_socket_connev_t *dev; 3772 isc_task_t *current_task; 3773 3774 INSIST(sock->pending_connect); 3775 3776 dev = sock->connect_ev; 3777 current_task = dev->ev_sender; 3778 3779 if ((task == NULL) || (task == current_task)) { 3780 closesocket(sock->fd); 3781 sock->fd = INVALID_SOCKET; 3782 _set_state(sock, SOCK_CLOSED); 3783 3784 sock->connect_ev = NULL; 3785 dev->result = ISC_R_CANCELED; 3786 send_connectdone_event(sock, &dev); 3787 } 3788 } 3789 how &= ~ISC_SOCKCANCEL_CONNECT; 3790 3791 maybe_free_socket(&sock, __LINE__); 3792 } 3793 3794 isc_sockettype_t 3795 isc__socket_gettype(isc_socket_t *sock) { 3796 isc_sockettype_t type; 3797 3798 REQUIRE(VALID_SOCKET(sock)); 3799 3800 LOCK(&sock->lock); 3801 3802 /* 3803 * make sure that the socket's not closed 3804 */ 3805 if (sock->fd == INVALID_SOCKET) { 3806 UNLOCK(&sock->lock); 3807 return (ISC_R_CONNREFUSED); 3808 } 3809 3810 type = sock->type; 3811 UNLOCK(&sock->lock); 3812 return (type); 3813 } 3814 3815 isc_boolean_t 3816 isc__socket_isbound(isc_socket_t *sock) { 3817 isc_boolean_t val; 3818 3819 REQUIRE(VALID_SOCKET(sock)); 3820 3821 LOCK(&sock->lock); 3822 CONSISTENT(sock); 3823 3824 /* 3825 * make sure that the socket's not closed 3826 */ 3827 if (sock->fd == INVALID_SOCKET) { 3828 UNLOCK(&sock->lock); 3829 return (ISC_FALSE); 3830 } 3831 3832 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE); 3833 UNLOCK(&sock->lock); 3834 3835 return (val); 3836 } 3837 3838 void 3839 isc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) { 3840 #if defined(IPV6_V6ONLY) 3841 int onoff = yes ? 1 : 0; 3842 #else 3843 UNUSED(yes); 3844 #endif 3845 3846 REQUIRE(VALID_SOCKET(sock)); 3847 3848 #ifdef IPV6_V6ONLY 3849 if (sock->pf == AF_INET6) { 3850 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, 3851 (char *)&onoff, sizeof(onoff)); 3852 } 3853 #endif 3854 } 3855 3856 void 3857 isc__socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) { 3858 #if !defined(IP_TOS) && !defined(IPV6_TCLASS) 3859 UNUSED(dscp); 3860 #else 3861 if (dscp < 0) 3862 return; 3863 3864 dscp <<= 2; 3865 dscp &= 0xff; 3866 #endif 3867 3868 REQUIRE(VALID_SOCKET(sock)); 3869 3870 #ifdef IP_TOS 3871 if (sock->pf == AF_INET) { 3872 (void)setsockopt(sock->fd, IPPROTO_IP, IP_TOS, 3873 (char *)&dscp, sizeof(dscp)); 3874 } 3875 #endif 3876 #ifdef IPV6_TCLASS 3877 if (sock->pf == AF_INET6) { 3878 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS, 3879 (char *)&dscp, sizeof(dscp)); 3880 } 3881 #endif 3882 } 3883 3884 void 3885 isc__socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) { 3886 UNUSED(addr); 3887 UNUSED(active); 3888 } 3889 3890 isc_result_t 3891 isc__socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm, 3892 isc_uint32_t owner, isc_uint32_t group) 3893 { 3894 UNUSED(addr); 3895 UNUSED(perm); 3896 UNUSED(owner); 3897 UNUSED(group); 3898 return (ISC_R_NOTIMPLEMENTED); 3899 } 3900 3901 void 3902 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) { 3903 3904 /* 3905 * Name 'socket'. 3906 */ 3907 3908 REQUIRE(VALID_SOCKET(socket)); 3909 3910 LOCK(&socket->lock); 3911 memset(socket->name, 0, sizeof(socket->name)); 3912 strncpy(socket->name, name, sizeof(socket->name) - 1); 3913 socket->tag = tag; 3914 UNLOCK(&socket->lock); 3915 } 3916 3917 const char * 3918 isc__socket_getname(isc_socket_t *socket) { 3919 return (socket->name); 3920 } 3921 3922 void * 3923 isc__socket_gettag(isc_socket_t *socket) { 3924 return (socket->tag); 3925 } 3926 3927 int 3928 isc__socket_getfd(isc_socket_t *socket) { 3929 return ((short) socket->fd); 3930 } 3931 3932 void 3933 isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) { 3934 UNUSED(manager); 3935 UNUSED(reserved); 3936 } 3937 3938 void 3939 isc___socketmgr_maxudp(isc_socketmgr_t *manager, int maxudp) { 3940 3941 UNUSED(manager); 3942 UNUSED(maxudp); 3943 } 3944 3945 isc_socketevent_t * 3946 isc_socket_socketevent(isc_mem_t *mctx, void *sender, 3947 isc_eventtype_t eventtype, isc_taskaction_t action, 3948 void *arg) 3949 { 3950 return (allocate_socketevent(mctx, sender, eventtype, action, arg)); 3951 } 3952 3953 #ifdef HAVE_LIBXML2 3954 3955 static const char * 3956 _socktype(isc_sockettype_t type) { 3957 if (type == isc_sockettype_udp) 3958 return ("udp"); 3959 else if (type == isc_sockettype_tcp) 3960 return ("tcp"); 3961 else if (type == isc_sockettype_unix) 3962 return ("unix"); 3963 else if (type == isc_sockettype_fdwatch) 3964 return ("fdwatch"); 3965 else 3966 return ("not-initialized"); 3967 } 3968 3969 #define TRY0(a) do { xmlrc = (a); if (xmlrc < 0) goto error; } while(/*CONSTCOND*/0) 3970 int 3971 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer) 3972 { 3973 isc_socket_t *sock = NULL; 3974 char peerbuf[ISC_SOCKADDR_FORMATSIZE]; 3975 isc_sockaddr_t addr; 3976 ISC_SOCKADDR_LEN_T len; 3977 int xmlrc; 3978 3979 LOCK(&mgr->lock); 3980 3981 #ifndef ISC_PLATFORM_USETHREADS 3982 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "references")); 3983 TRY0(xmlTextWriterWriteFormatString(writer, "%d", mgr->refs)); 3984 TRY0(xmlTextWriterEndElement(writer)); 3985 #endif 3986 3987 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets")); 3988 sock = ISC_LIST_HEAD(mgr->socklist); 3989 while (sock != NULL) { 3990 LOCK(&sock->lock); 3991 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket")); 3992 3993 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id")); 3994 TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock)); 3995 TRY0(xmlTextWriterEndElement(writer)); 3996 3997 if (sock->name[0] != 0) { 3998 TRY0(xmlTextWriterStartElement(writer, 3999 ISC_XMLCHAR "name")); 4000 TRY0(xmlTextWriterWriteFormatString(writer, "%s", 4001 sock->name)); 4002 TRY0(xmlTextWriterEndElement(writer)); /* name */ 4003 } 4004 4005 TRY0(xmlTextWriterStartElement(writer, 4006 ISC_XMLCHAR "references")); 4007 TRY0(xmlTextWriterWriteFormatString(writer, "%d", 4008 sock->references)); 4009 TRY0(xmlTextWriterEndElement(writer)); 4010 4011 TRY0(xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type", 4012 ISC_XMLCHAR _socktype(sock->type))); 4013 4014 if (sock->connected) { 4015 isc_sockaddr_format(&sock->address, peerbuf, 4016 sizeof(peerbuf)); 4017 TRY0(xmlTextWriterWriteElement(writer, 4018 ISC_XMLCHAR "peer-address", 4019 ISC_XMLCHAR peerbuf)); 4020 } 4021 4022 len = sizeof(addr); 4023 if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) { 4024 isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf)); 4025 TRY0(xmlTextWriterWriteElement(writer, 4026 ISC_XMLCHAR "local-address", 4027 ISC_XMLCHAR peerbuf)); 4028 } 4029 4030 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states")); 4031 if (sock->pending_recv) 4032 TRY0(xmlTextWriterWriteElement(writer, 4033 ISC_XMLCHAR "state", 4034 ISC_XMLCHAR "pending-receive")); 4035 if (sock->pending_send) 4036 TRY0(xmlTextWriterWriteElement(writer, 4037 ISC_XMLCHAR "state", 4038 ISC_XMLCHAR "pending-send")); 4039 if (sock->pending_accept) 4040 TRY0(xmlTextWriterWriteElement(writer, 4041 ISC_XMLCHAR "state", 4042 ISC_XMLCHAR "pending_accept")); 4043 if (sock->listener) 4044 TRY0(xmlTextWriterWriteElement(writer, 4045 ISC_XMLCHAR "state", 4046 ISC_XMLCHAR "listener")); 4047 if (sock->connected) 4048 TRY0(xmlTextWriterWriteElement(writer, 4049 ISC_XMLCHAR "state", 4050 ISC_XMLCHAR "connected")); 4051 if (sock->pending_connect) 4052 TRY0(xmlTextWriterWriteElement(writer, 4053 ISC_XMLCHAR "state", 4054 ISC_XMLCHAR "connecting")); 4055 if (sock->bound) 4056 TRY0(xmlTextWriterWriteElement(writer, 4057 ISC_XMLCHAR "state", 4058 ISC_XMLCHAR "bound")); 4059 4060 TRY0(xmlTextWriterEndElement(writer)); /* states */ 4061 4062 TRY0(xmlTextWriterEndElement(writer)); /* socket */ 4063 4064 UNLOCK(&sock->lock); 4065 sock = ISC_LIST_NEXT(sock, link); 4066 } 4067 TRY0(xmlTextWriterEndElement(writer)); /* sockets */ 4068 4069 error: 4070 if (sock != NULL) 4071 UNLOCK(&sock->lock); 4072 4073 UNLOCK(&mgr->lock); 4074 4075 return (xmlrc); 4076 } 4077 #endif /* HAVE_LIBXML2 */ 4078 4079 /* 4080 * Replace ../socket_api.c 4081 */ 4082 4083 isc_result_t 4084 isc__socket_register(void) { 4085 return (ISC_R_SUCCESS); 4086 } 4087 4088 isc_result_t 4089 isc_socketmgr_createinctx(isc_mem_t *mctx, isc_appctx_t *actx, 4090 isc_socketmgr_t **managerp) 4091 { 4092 isc_result_t result; 4093 4094 result = isc_socketmgr_create(mctx, managerp); 4095 4096 if (result == ISC_R_SUCCESS) 4097 isc_appctx_setsocketmgr(actx, *managerp); 4098 4099 return (result); 4100 } 4101