1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to 5 * deal in the Software without restriction, including without limitation the 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. 20 */ 21 22 #include <assert.h> 23 #include <stdlib.h> 24 25 #include "uv.h" 26 #include "internal.h" 27 28 29 /* Whether there are any non-IFS LSPs stacked on TCP */ 30 int uv_tcp_non_ifs_lsp_ipv4; 31 int uv_tcp_non_ifs_lsp_ipv6; 32 33 /* Ip address used to bind to any port at any interface */ 34 struct sockaddr_in uv_addr_ip4_any_; 35 struct sockaddr_in6 uv_addr_ip6_any_; 36 37 38 /* 39 * Retrieves the pointer to a winsock extension function. 40 */ 41 static BOOL uv__get_extension_function(SOCKET socket, GUID guid, 42 void **target) { 43 int result; 44 DWORD bytes; 45 46 result = WSAIoctl(socket, 47 SIO_GET_EXTENSION_FUNCTION_POINTER, 48 &guid, 49 sizeof(guid), 50 (void*)target, 51 sizeof(*target), 52 &bytes, 53 NULL, 54 NULL); 55 56 if (result == SOCKET_ERROR) { 57 *target = NULL; 58 return FALSE; 59 } else { 60 return TRUE; 61 } 62 } 63 64 65 BOOL uv__get_acceptex_function(SOCKET socket, LPFN_ACCEPTEX* target) { 66 const GUID wsaid_acceptex = WSAID_ACCEPTEX; 67 return uv__get_extension_function(socket, wsaid_acceptex, (void**)target); 68 } 69 70 71 BOOL uv__get_connectex_function(SOCKET socket, LPFN_CONNECTEX* target) { 72 const GUID wsaid_connectex = WSAID_CONNECTEX; 73 return uv__get_extension_function(socket, wsaid_connectex, (void**)target); 74 } 75 76 77 78 void uv__winsock_init(void) { 79 WSADATA wsa_data; 80 int errorno; 81 SOCKET dummy; 82 WSAPROTOCOL_INFOW protocol_info; 83 int opt_len; 84 85 /* Set implicit binding address used by connectEx */ 86 if (uv_ip4_addr("0.0.0.0", 0, &uv_addr_ip4_any_)) { 87 abort(); 88 } 89 90 if (uv_ip6_addr("::", 0, &uv_addr_ip6_any_)) { 91 abort(); 92 } 93 94 /* Skip initialization in safe mode without network support */ 95 if (1 == GetSystemMetrics(SM_CLEANBOOT)) return; 96 97 /* Initialize winsock */ 98 errorno = WSAStartup(MAKEWORD(2, 2), &wsa_data); 99 if (errorno != 0) { 100 uv_fatal_error(errorno, "WSAStartup"); 101 } 102 103 /* Try to detect non-IFS LSPs */ 104 uv_tcp_non_ifs_lsp_ipv4 = 1; 105 dummy = socket(AF_INET, SOCK_STREAM, IPPROTO_IP); 106 if (dummy != INVALID_SOCKET) { 107 opt_len = (int) sizeof protocol_info; 108 if (getsockopt(dummy, 109 SOL_SOCKET, 110 SO_PROTOCOL_INFOW, 111 (char*) &protocol_info, 112 &opt_len) == 0) { 113 if (protocol_info.dwServiceFlags1 & XP1_IFS_HANDLES) 114 uv_tcp_non_ifs_lsp_ipv4 = 0; 115 } 116 closesocket(dummy); 117 } 118 119 /* Try to detect IPV6 support and non-IFS LSPs */ 120 uv_tcp_non_ifs_lsp_ipv6 = 1; 121 dummy = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP); 122 if (dummy != INVALID_SOCKET) { 123 opt_len = (int) sizeof protocol_info; 124 if (getsockopt(dummy, 125 SOL_SOCKET, 126 SO_PROTOCOL_INFOW, 127 (char*) &protocol_info, 128 &opt_len) == 0) { 129 if (protocol_info.dwServiceFlags1 & XP1_IFS_HANDLES) 130 uv_tcp_non_ifs_lsp_ipv6 = 0; 131 } 132 closesocket(dummy); 133 } 134 } 135 136 137 int uv__ntstatus_to_winsock_error(NTSTATUS status) { 138 switch (status) { 139 case STATUS_SUCCESS: 140 return ERROR_SUCCESS; 141 142 case STATUS_PENDING: 143 return ERROR_IO_PENDING; 144 145 case STATUS_INVALID_HANDLE: 146 case STATUS_OBJECT_TYPE_MISMATCH: 147 return WSAENOTSOCK; 148 149 case STATUS_INSUFFICIENT_RESOURCES: 150 case STATUS_PAGEFILE_QUOTA: 151 case STATUS_COMMITMENT_LIMIT: 152 case STATUS_WORKING_SET_QUOTA: 153 case STATUS_NO_MEMORY: 154 case STATUS_QUOTA_EXCEEDED: 155 case STATUS_TOO_MANY_PAGING_FILES: 156 case STATUS_REMOTE_RESOURCES: 157 return WSAENOBUFS; 158 159 case STATUS_TOO_MANY_ADDRESSES: 160 case STATUS_SHARING_VIOLATION: 161 case STATUS_ADDRESS_ALREADY_EXISTS: 162 return WSAEADDRINUSE; 163 164 case STATUS_LINK_TIMEOUT: 165 case STATUS_IO_TIMEOUT: 166 case STATUS_TIMEOUT: 167 return WSAETIMEDOUT; 168 169 case STATUS_GRACEFUL_DISCONNECT: 170 return WSAEDISCON; 171 172 case STATUS_REMOTE_DISCONNECT: 173 case STATUS_CONNECTION_RESET: 174 case STATUS_LINK_FAILED: 175 case STATUS_CONNECTION_DISCONNECTED: 176 case STATUS_PORT_UNREACHABLE: 177 case STATUS_HOPLIMIT_EXCEEDED: 178 return WSAECONNRESET; 179 180 case STATUS_LOCAL_DISCONNECT: 181 case STATUS_TRANSACTION_ABORTED: 182 case STATUS_CONNECTION_ABORTED: 183 return WSAECONNABORTED; 184 185 case STATUS_BAD_NETWORK_PATH: 186 case STATUS_NETWORK_UNREACHABLE: 187 case STATUS_PROTOCOL_UNREACHABLE: 188 return WSAENETUNREACH; 189 190 case STATUS_HOST_UNREACHABLE: 191 return WSAEHOSTUNREACH; 192 193 case STATUS_CANCELLED: 194 case STATUS_REQUEST_ABORTED: 195 return WSAEINTR; 196 197 case STATUS_BUFFER_OVERFLOW: 198 case STATUS_INVALID_BUFFER_SIZE: 199 return WSAEMSGSIZE; 200 201 case STATUS_BUFFER_TOO_SMALL: 202 case STATUS_ACCESS_VIOLATION: 203 return WSAEFAULT; 204 205 case STATUS_DEVICE_NOT_READY: 206 case STATUS_REQUEST_NOT_ACCEPTED: 207 return WSAEWOULDBLOCK; 208 209 case STATUS_INVALID_NETWORK_RESPONSE: 210 case STATUS_NETWORK_BUSY: 211 case STATUS_NO_SUCH_DEVICE: 212 case STATUS_NO_SUCH_FILE: 213 case STATUS_OBJECT_PATH_NOT_FOUND: 214 case STATUS_OBJECT_NAME_NOT_FOUND: 215 case STATUS_UNEXPECTED_NETWORK_ERROR: 216 return WSAENETDOWN; 217 218 case STATUS_INVALID_CONNECTION: 219 return WSAENOTCONN; 220 221 case STATUS_REMOTE_NOT_LISTENING: 222 case STATUS_CONNECTION_REFUSED: 223 return WSAECONNREFUSED; 224 225 case STATUS_PIPE_DISCONNECTED: 226 return WSAESHUTDOWN; 227 228 case STATUS_CONFLICTING_ADDRESSES: 229 case STATUS_INVALID_ADDRESS: 230 case STATUS_INVALID_ADDRESS_COMPONENT: 231 return WSAEADDRNOTAVAIL; 232 233 case STATUS_NOT_SUPPORTED: 234 case STATUS_NOT_IMPLEMENTED: 235 return WSAEOPNOTSUPP; 236 237 case STATUS_ACCESS_DENIED: 238 return WSAEACCES; 239 240 default: 241 if ((status & (FACILITY_NTWIN32 << 16)) == (FACILITY_NTWIN32 << 16) && 242 (status & (ERROR_SEVERITY_ERROR | ERROR_SEVERITY_WARNING))) { 243 /* It's a windows error that has been previously mapped to an ntstatus 244 * code. */ 245 return (DWORD) (status & 0xffff); 246 } else { 247 /* The default fallback for unmappable ntstatus codes. */ 248 return WSAEINVAL; 249 } 250 } 251 } 252 253 254 /* 255 * This function provides a workaround for a bug in the winsock implementation 256 * of WSARecv. The problem is that when SetFileCompletionNotificationModes is 257 * used to avoid IOCP notifications of completed reads, WSARecv does not 258 * reliably indicate whether we can expect a completion package to be posted 259 * when the receive buffer is smaller than the received datagram. 260 * 261 * However it is desirable to use SetFileCompletionNotificationModes because 262 * it yields a massive performance increase. 263 * 264 * This function provides a workaround for that bug, but it only works for the 265 * specific case that we need it for. E.g. it assumes that the "avoid iocp" 266 * bit has been set, and supports only overlapped operation. It also requires 267 * the user to use the default msafd driver, doesn't work when other LSPs are 268 * stacked on top of it. 269 */ 270 int WSAAPI uv__wsarecv_workaround(SOCKET socket, WSABUF* buffers, 271 DWORD buffer_count, DWORD* bytes, DWORD* flags, WSAOVERLAPPED *overlapped, 272 LPWSAOVERLAPPED_COMPLETION_ROUTINE completion_routine) { 273 NTSTATUS status; 274 void* apc_context; 275 IO_STATUS_BLOCK* iosb = (IO_STATUS_BLOCK*) &overlapped->Internal; 276 AFD_RECV_INFO info; 277 DWORD error; 278 279 if (overlapped == NULL || completion_routine != NULL) { 280 WSASetLastError(WSAEINVAL); 281 return SOCKET_ERROR; 282 } 283 284 info.BufferArray = buffers; 285 info.BufferCount = buffer_count; 286 info.AfdFlags = AFD_OVERLAPPED; 287 info.TdiFlags = TDI_RECEIVE_NORMAL; 288 289 if (*flags & MSG_PEEK) { 290 info.TdiFlags |= TDI_RECEIVE_PEEK; 291 } 292 293 if (*flags & MSG_PARTIAL) { 294 info.TdiFlags |= TDI_RECEIVE_PARTIAL; 295 } 296 297 if (!((intptr_t) overlapped->hEvent & 1)) { 298 apc_context = (void*) overlapped; 299 } else { 300 apc_context = NULL; 301 } 302 303 iosb->Status = STATUS_PENDING; 304 iosb->Pointer = 0; 305 306 status = pNtDeviceIoControlFile((HANDLE) socket, 307 overlapped->hEvent, 308 NULL, 309 apc_context, 310 iosb, 311 IOCTL_AFD_RECEIVE, 312 &info, 313 sizeof(info), 314 NULL, 315 0); 316 317 *flags = 0; 318 *bytes = (DWORD) iosb->Information; 319 320 switch (status) { 321 case STATUS_SUCCESS: 322 error = ERROR_SUCCESS; 323 break; 324 325 case STATUS_PENDING: 326 error = WSA_IO_PENDING; 327 break; 328 329 case STATUS_BUFFER_OVERFLOW: 330 error = WSAEMSGSIZE; 331 break; 332 333 case STATUS_RECEIVE_EXPEDITED: 334 error = ERROR_SUCCESS; 335 *flags = MSG_OOB; 336 break; 337 338 case STATUS_RECEIVE_PARTIAL_EXPEDITED: 339 error = ERROR_SUCCESS; 340 *flags = MSG_PARTIAL | MSG_OOB; 341 break; 342 343 case STATUS_RECEIVE_PARTIAL: 344 error = ERROR_SUCCESS; 345 *flags = MSG_PARTIAL; 346 break; 347 348 default: 349 error = uv__ntstatus_to_winsock_error(status); 350 break; 351 } 352 353 WSASetLastError(error); 354 355 if (error == ERROR_SUCCESS) { 356 return 0; 357 } else { 358 return SOCKET_ERROR; 359 } 360 } 361 362 363 /* See description of uv__wsarecv_workaround. */ 364 int WSAAPI uv__wsarecvfrom_workaround(SOCKET socket, WSABUF* buffers, 365 DWORD buffer_count, DWORD* bytes, DWORD* flags, struct sockaddr* addr, 366 int* addr_len, WSAOVERLAPPED *overlapped, 367 LPWSAOVERLAPPED_COMPLETION_ROUTINE completion_routine) { 368 NTSTATUS status; 369 void* apc_context; 370 IO_STATUS_BLOCK* iosb = (IO_STATUS_BLOCK*) &overlapped->Internal; 371 AFD_RECV_DATAGRAM_INFO info; 372 DWORD error; 373 374 if (overlapped == NULL || addr == NULL || addr_len == NULL || 375 completion_routine != NULL) { 376 WSASetLastError(WSAEINVAL); 377 return SOCKET_ERROR; 378 } 379 380 info.BufferArray = buffers; 381 info.BufferCount = buffer_count; 382 info.AfdFlags = AFD_OVERLAPPED; 383 info.TdiFlags = TDI_RECEIVE_NORMAL; 384 info.Address = addr; 385 info.AddressLength = addr_len; 386 387 if (*flags & MSG_PEEK) { 388 info.TdiFlags |= TDI_RECEIVE_PEEK; 389 } 390 391 if (*flags & MSG_PARTIAL) { 392 info.TdiFlags |= TDI_RECEIVE_PARTIAL; 393 } 394 395 if (!((intptr_t) overlapped->hEvent & 1)) { 396 apc_context = (void*) overlapped; 397 } else { 398 apc_context = NULL; 399 } 400 401 iosb->Status = STATUS_PENDING; 402 iosb->Pointer = 0; 403 404 status = pNtDeviceIoControlFile((HANDLE) socket, 405 overlapped->hEvent, 406 NULL, 407 apc_context, 408 iosb, 409 IOCTL_AFD_RECEIVE_DATAGRAM, 410 &info, 411 sizeof(info), 412 NULL, 413 0); 414 415 *flags = 0; 416 *bytes = (DWORD) iosb->Information; 417 418 switch (status) { 419 case STATUS_SUCCESS: 420 error = ERROR_SUCCESS; 421 break; 422 423 case STATUS_PENDING: 424 error = WSA_IO_PENDING; 425 break; 426 427 case STATUS_BUFFER_OVERFLOW: 428 error = WSAEMSGSIZE; 429 break; 430 431 case STATUS_RECEIVE_EXPEDITED: 432 error = ERROR_SUCCESS; 433 *flags = MSG_OOB; 434 break; 435 436 case STATUS_RECEIVE_PARTIAL_EXPEDITED: 437 error = ERROR_SUCCESS; 438 *flags = MSG_PARTIAL | MSG_OOB; 439 break; 440 441 case STATUS_RECEIVE_PARTIAL: 442 error = ERROR_SUCCESS; 443 *flags = MSG_PARTIAL; 444 break; 445 446 default: 447 error = uv__ntstatus_to_winsock_error(status); 448 break; 449 } 450 451 WSASetLastError(error); 452 453 if (error == ERROR_SUCCESS) { 454 return 0; 455 } else { 456 return SOCKET_ERROR; 457 } 458 } 459 460 461 int WSAAPI uv__msafd_poll(SOCKET socket, AFD_POLL_INFO* info_in, 462 AFD_POLL_INFO* info_out, OVERLAPPED* overlapped) { 463 IO_STATUS_BLOCK iosb; 464 IO_STATUS_BLOCK* iosb_ptr; 465 HANDLE event = NULL; 466 void* apc_context; 467 NTSTATUS status; 468 DWORD error; 469 470 if (overlapped != NULL) { 471 /* Overlapped operation. */ 472 iosb_ptr = (IO_STATUS_BLOCK*) &overlapped->Internal; 473 event = overlapped->hEvent; 474 475 /* Do not report iocp completion if hEvent is tagged. */ 476 if ((uintptr_t) event & 1) { 477 event = (HANDLE)((uintptr_t) event & ~(uintptr_t) 1); 478 apc_context = NULL; 479 } else { 480 apc_context = overlapped; 481 } 482 483 } else { 484 /* Blocking operation. */ 485 iosb_ptr = &iosb; 486 event = CreateEvent(NULL, FALSE, FALSE, NULL); 487 if (event == NULL) { 488 return SOCKET_ERROR; 489 } 490 apc_context = NULL; 491 } 492 493 iosb_ptr->Status = STATUS_PENDING; 494 status = pNtDeviceIoControlFile((HANDLE) socket, 495 event, 496 NULL, 497 apc_context, 498 iosb_ptr, 499 IOCTL_AFD_POLL, 500 info_in, 501 sizeof *info_in, 502 info_out, 503 sizeof *info_out); 504 505 if (overlapped == NULL) { 506 /* If this is a blocking operation, wait for the event to become signaled, 507 * and then grab the real status from the io status block. */ 508 if (status == STATUS_PENDING) { 509 DWORD r = WaitForSingleObject(event, INFINITE); 510 511 if (r == WAIT_FAILED) { 512 DWORD saved_error = GetLastError(); 513 CloseHandle(event); 514 WSASetLastError(saved_error); 515 return SOCKET_ERROR; 516 } 517 518 status = iosb.Status; 519 } 520 521 CloseHandle(event); 522 } 523 524 switch (status) { 525 case STATUS_SUCCESS: 526 error = ERROR_SUCCESS; 527 break; 528 529 case STATUS_PENDING: 530 error = WSA_IO_PENDING; 531 break; 532 533 default: 534 error = uv__ntstatus_to_winsock_error(status); 535 break; 536 } 537 538 WSASetLastError(error); 539 540 if (error == ERROR_SUCCESS) { 541 return 0; 542 } else { 543 return SOCKET_ERROR; 544 } 545 } 546 547 int uv__convert_to_localhost_if_unspecified(const struct sockaddr* addr, 548 struct sockaddr_storage* storage) { 549 struct sockaddr_in* dest4; 550 struct sockaddr_in6* dest6; 551 552 if (addr == NULL) 553 return UV_EINVAL; 554 555 switch (addr->sa_family) { 556 case AF_INET: 557 dest4 = (struct sockaddr_in*) storage; 558 memcpy(dest4, addr, sizeof(*dest4)); 559 if (dest4->sin_addr.s_addr == 0) 560 dest4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 561 return 0; 562 case AF_INET6: 563 dest6 = (struct sockaddr_in6*) storage; 564 memcpy(dest6, addr, sizeof(*dest6)); 565 if (memcmp(&dest6->sin6_addr, 566 &uv_addr_ip6_any_.sin6_addr, 567 sizeof(uv_addr_ip6_any_.sin6_addr)) == 0) { 568 struct in6_addr init_sin6_addr = IN6ADDR_LOOPBACK_INIT; 569 dest6->sin6_addr = init_sin6_addr; 570 } 571 return 0; 572 default: 573 return UV_EINVAL; 574 } 575 } 576