1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. All rights reserved. 3 * Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #if defined(__FreeBSD__) 10 #include <sys/event.h> 11 #define SPDK_KEVENT 12 #else 13 #include <sys/epoll.h> 14 #define SPDK_EPOLL 15 #endif 16 17 #if defined(__linux__) 18 #include <linux/errqueue.h> 19 #endif 20 21 #include "spdk/env.h" 22 #include "spdk/log.h" 23 #include "spdk/pipe.h" 24 #include "spdk/sock.h" 25 #include "spdk/util.h" 26 #include "spdk/string.h" 27 #include "spdk_internal/sock.h" 28 #include "../sock_kernel.h" 29 30 #include "openssl/crypto.h" 31 #include "openssl/err.h" 32 #include "openssl/ssl.h" 33 34 #define MAX_TMPBUF 1024 35 #define PORTNUMLEN 32 36 37 #if defined(SO_ZEROCOPY) && defined(MSG_ZEROCOPY) 38 #define SPDK_ZEROCOPY 39 #endif 40 41 struct spdk_posix_sock { 42 struct spdk_sock base; 43 int fd; 44 45 uint32_t sendmsg_idx; 46 47 struct spdk_pipe *recv_pipe; 48 void *recv_buf; 49 int recv_buf_sz; 50 bool pipe_has_data; 51 bool socket_has_data; 52 bool zcopy; 53 54 int placement_id; 55 56 SSL_CTX *ctx; 57 SSL *ssl; 58 59 TAILQ_ENTRY(spdk_posix_sock) link; 60 }; 61 62 TAILQ_HEAD(spdk_has_data_list, spdk_posix_sock); 63 64 struct spdk_posix_sock_group_impl { 65 struct spdk_sock_group_impl base; 66 int fd; 67 struct spdk_has_data_list socks_with_data; 68 int placement_id; 69 }; 70 71 static struct spdk_sock_impl_opts g_spdk_posix_sock_impl_opts = { 72 .recv_buf_size = DEFAULT_SO_RCVBUF_SIZE, 73 .send_buf_size = DEFAULT_SO_SNDBUF_SIZE, 74 .enable_recv_pipe = true, 75 .enable_quickack = false, 76 .enable_placement_id = PLACEMENT_NONE, 77 .enable_zerocopy_send_server = true, 78 .enable_zerocopy_send_client = false, 79 .zerocopy_threshold = 0, 80 .tls_version = 0, 81 .enable_ktls = false, 82 .psk_key = NULL, 83 .psk_identity = NULL, 84 .get_key = NULL, 85 .get_key_ctx = NULL 86 }; 87 88 static struct spdk_sock_map g_map = { 89 .entries = STAILQ_HEAD_INITIALIZER(g_map.entries), 90 .mtx = PTHREAD_MUTEX_INITIALIZER 91 }; 92 93 __attribute((destructor)) static void 94 posix_sock_map_cleanup(void) 95 { 96 spdk_sock_map_cleanup(&g_map); 97 } 98 99 #define __posix_sock(sock) (struct spdk_posix_sock *)sock 100 #define __posix_group_impl(group) (struct spdk_posix_sock_group_impl *)group 101 102 static void 103 posix_sock_copy_impl_opts(struct spdk_sock_impl_opts *dest, const struct spdk_sock_impl_opts *src, 104 size_t len) 105 { 106 #define FIELD_OK(field) \ 107 offsetof(struct spdk_sock_impl_opts, field) + sizeof(src->field) <= len 108 109 #define SET_FIELD(field) \ 110 if (FIELD_OK(field)) { \ 111 dest->field = src->field; \ 112 } 113 114 SET_FIELD(recv_buf_size); 115 SET_FIELD(send_buf_size); 116 SET_FIELD(enable_recv_pipe); 117 SET_FIELD(enable_zerocopy_send); 118 SET_FIELD(enable_quickack); 119 SET_FIELD(enable_placement_id); 120 SET_FIELD(enable_zerocopy_send_server); 121 SET_FIELD(enable_zerocopy_send_client); 122 SET_FIELD(zerocopy_threshold); 123 SET_FIELD(tls_version); 124 SET_FIELD(enable_ktls); 125 SET_FIELD(psk_key); 126 SET_FIELD(psk_identity); 127 SET_FIELD(get_key); 128 SET_FIELD(get_key_ctx); 129 130 #undef SET_FIELD 131 #undef FIELD_OK 132 } 133 134 static int 135 posix_sock_impl_get_opts(struct spdk_sock_impl_opts *opts, size_t *len) 136 { 137 if (!opts || !len) { 138 errno = EINVAL; 139 return -1; 140 } 141 142 assert(sizeof(*opts) >= *len); 143 memset(opts, 0, *len); 144 145 posix_sock_copy_impl_opts(opts, &g_spdk_posix_sock_impl_opts, *len); 146 *len = spdk_min(*len, sizeof(g_spdk_posix_sock_impl_opts)); 147 148 return 0; 149 } 150 151 static int 152 posix_sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, size_t len) 153 { 154 if (!opts) { 155 errno = EINVAL; 156 return -1; 157 } 158 159 assert(sizeof(*opts) >= len); 160 posix_sock_copy_impl_opts(&g_spdk_posix_sock_impl_opts, opts, len); 161 162 return 0; 163 } 164 165 static void 166 posix_opts_get_impl_opts(const struct spdk_sock_opts *opts, struct spdk_sock_impl_opts *dest) 167 { 168 /* Copy the default impl_opts first to cover cases when user's impl_opts is smaller */ 169 memcpy(dest, &g_spdk_posix_sock_impl_opts, sizeof(*dest)); 170 171 if (opts->impl_opts != NULL) { 172 assert(sizeof(*dest) >= opts->impl_opts_size); 173 posix_sock_copy_impl_opts(dest, opts->impl_opts, opts->impl_opts_size); 174 } 175 } 176 177 static int 178 posix_sock_getaddr(struct spdk_sock *_sock, char *saddr, int slen, uint16_t *sport, 179 char *caddr, int clen, uint16_t *cport) 180 { 181 struct spdk_posix_sock *sock = __posix_sock(_sock); 182 struct sockaddr_storage sa; 183 socklen_t salen; 184 int rc; 185 186 assert(sock != NULL); 187 188 memset(&sa, 0, sizeof sa); 189 salen = sizeof sa; 190 rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen); 191 if (rc != 0) { 192 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno); 193 return -1; 194 } 195 196 switch (sa.ss_family) { 197 case AF_UNIX: 198 /* Acceptable connection types that don't have IPs */ 199 return 0; 200 case AF_INET: 201 case AF_INET6: 202 /* Code below will get IP addresses */ 203 break; 204 default: 205 /* Unsupported socket family */ 206 return -1; 207 } 208 209 rc = get_addr_str((struct sockaddr *)&sa, saddr, slen); 210 if (rc != 0) { 211 SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno); 212 return -1; 213 } 214 215 if (sport) { 216 if (sa.ss_family == AF_INET) { 217 *sport = ntohs(((struct sockaddr_in *) &sa)->sin_port); 218 } else if (sa.ss_family == AF_INET6) { 219 *sport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port); 220 } 221 } 222 223 memset(&sa, 0, sizeof sa); 224 salen = sizeof sa; 225 rc = getpeername(sock->fd, (struct sockaddr *) &sa, &salen); 226 if (rc != 0) { 227 SPDK_ERRLOG("getpeername() failed (errno=%d)\n", errno); 228 return -1; 229 } 230 231 rc = get_addr_str((struct sockaddr *)&sa, caddr, clen); 232 if (rc != 0) { 233 SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno); 234 return -1; 235 } 236 237 if (cport) { 238 if (sa.ss_family == AF_INET) { 239 *cport = ntohs(((struct sockaddr_in *) &sa)->sin_port); 240 } else if (sa.ss_family == AF_INET6) { 241 *cport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port); 242 } 243 } 244 245 return 0; 246 } 247 248 enum posix_sock_create_type { 249 SPDK_SOCK_CREATE_LISTEN, 250 SPDK_SOCK_CREATE_CONNECT, 251 }; 252 253 static int 254 posix_sock_alloc_pipe(struct spdk_posix_sock *sock, int sz) 255 { 256 uint8_t *new_buf; 257 struct spdk_pipe *new_pipe; 258 struct iovec siov[2]; 259 struct iovec diov[2]; 260 int sbytes; 261 ssize_t bytes; 262 int rc; 263 264 if (sock->recv_buf_sz == sz) { 265 return 0; 266 } 267 268 /* If the new size is 0, just free the pipe */ 269 if (sz == 0) { 270 spdk_pipe_destroy(sock->recv_pipe); 271 free(sock->recv_buf); 272 sock->recv_pipe = NULL; 273 sock->recv_buf = NULL; 274 return 0; 275 } else if (sz < MIN_SOCK_PIPE_SIZE) { 276 SPDK_ERRLOG("The size of the pipe must be larger than %d\n", MIN_SOCK_PIPE_SIZE); 277 return -1; 278 } 279 280 /* Round up to next 64 byte multiple */ 281 rc = posix_memalign((void **)&new_buf, 64, sz); 282 if (rc != 0) { 283 SPDK_ERRLOG("socket recv buf allocation failed\n"); 284 return -ENOMEM; 285 } 286 memset(new_buf, 0, sz); 287 288 new_pipe = spdk_pipe_create(new_buf, sz); 289 if (new_pipe == NULL) { 290 SPDK_ERRLOG("socket pipe allocation failed\n"); 291 free(new_buf); 292 return -ENOMEM; 293 } 294 295 if (sock->recv_pipe != NULL) { 296 /* Pull all of the data out of the old pipe */ 297 sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov); 298 if (sbytes > sz) { 299 /* Too much data to fit into the new pipe size */ 300 spdk_pipe_destroy(new_pipe); 301 free(new_buf); 302 return -EINVAL; 303 } 304 305 sbytes = spdk_pipe_writer_get_buffer(new_pipe, sz, diov); 306 assert(sbytes == sz); 307 308 bytes = spdk_iovcpy(siov, 2, diov, 2); 309 spdk_pipe_writer_advance(new_pipe, bytes); 310 311 spdk_pipe_destroy(sock->recv_pipe); 312 free(sock->recv_buf); 313 } 314 315 sock->recv_buf_sz = sz; 316 sock->recv_buf = new_buf; 317 sock->recv_pipe = new_pipe; 318 319 return 0; 320 } 321 322 static int 323 posix_sock_set_recvbuf(struct spdk_sock *_sock, int sz) 324 { 325 struct spdk_posix_sock *sock = __posix_sock(_sock); 326 int min_size; 327 int rc; 328 329 assert(sock != NULL); 330 331 if (_sock->impl_opts.enable_recv_pipe) { 332 rc = posix_sock_alloc_pipe(sock, sz); 333 if (rc) { 334 return rc; 335 } 336 } 337 338 /* Set kernel buffer size to be at least MIN_SO_RCVBUF_SIZE and 339 * g_spdk_posix_sock_impl_opts.recv_buf_size. */ 340 min_size = spdk_max(MIN_SO_RCVBUF_SIZE, g_spdk_posix_sock_impl_opts.recv_buf_size); 341 342 if (sz < min_size) { 343 sz = min_size; 344 } 345 346 rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz)); 347 if (rc < 0) { 348 return rc; 349 } 350 351 _sock->impl_opts.recv_buf_size = sz; 352 353 return 0; 354 } 355 356 static int 357 posix_sock_set_sendbuf(struct spdk_sock *_sock, int sz) 358 { 359 struct spdk_posix_sock *sock = __posix_sock(_sock); 360 int min_size; 361 int rc; 362 363 assert(sock != NULL); 364 365 /* Set kernel buffer size to be at least MIN_SO_SNDBUF_SIZE and 366 * g_spdk_posix_sock_impl_opts.send_buf_size. */ 367 min_size = spdk_max(MIN_SO_SNDBUF_SIZE, g_spdk_posix_sock_impl_opts.send_buf_size); 368 369 if (sz < min_size) { 370 sz = min_size; 371 } 372 373 rc = setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz)); 374 if (rc < 0) { 375 return rc; 376 } 377 378 _sock->impl_opts.send_buf_size = sz; 379 380 return 0; 381 } 382 383 static void 384 posix_sock_init(struct spdk_posix_sock *sock, bool enable_zero_copy) 385 { 386 #if defined(SPDK_ZEROCOPY) || defined(__linux__) 387 int flag; 388 int rc; 389 #endif 390 391 #if defined(SPDK_ZEROCOPY) 392 flag = 1; 393 394 if (enable_zero_copy) { 395 /* Try to turn on zero copy sends */ 396 rc = setsockopt(sock->fd, SOL_SOCKET, SO_ZEROCOPY, &flag, sizeof(flag)); 397 if (rc == 0) { 398 sock->zcopy = true; 399 } 400 } 401 #endif 402 403 #if defined(__linux__) 404 flag = 1; 405 406 if (sock->base.impl_opts.enable_quickack) { 407 rc = setsockopt(sock->fd, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(flag)); 408 if (rc != 0) { 409 SPDK_ERRLOG("quickack was failed to set\n"); 410 } 411 } 412 413 spdk_sock_get_placement_id(sock->fd, sock->base.impl_opts.enable_placement_id, 414 &sock->placement_id); 415 416 if (sock->base.impl_opts.enable_placement_id == PLACEMENT_MARK) { 417 /* Save placement_id */ 418 spdk_sock_map_insert(&g_map, sock->placement_id, NULL); 419 } 420 #endif 421 } 422 423 static struct spdk_posix_sock * 424 posix_sock_alloc(int fd, struct spdk_sock_impl_opts *impl_opts, bool enable_zero_copy) 425 { 426 struct spdk_posix_sock *sock; 427 428 sock = calloc(1, sizeof(*sock)); 429 if (sock == NULL) { 430 SPDK_ERRLOG("sock allocation failed\n"); 431 return NULL; 432 } 433 434 sock->fd = fd; 435 memcpy(&sock->base.impl_opts, impl_opts, sizeof(*impl_opts)); 436 posix_sock_init(sock, enable_zero_copy); 437 438 return sock; 439 } 440 441 static int 442 posix_fd_create(struct addrinfo *res, struct spdk_sock_opts *opts, 443 struct spdk_sock_impl_opts *impl_opts) 444 { 445 int fd; 446 int val = 1; 447 int rc, sz; 448 #if defined(__linux__) 449 int to; 450 #endif 451 452 fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); 453 if (fd < 0) { 454 /* error */ 455 return -1; 456 } 457 458 sz = impl_opts->recv_buf_size; 459 rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz)); 460 if (rc) { 461 /* Not fatal */ 462 } 463 464 sz = impl_opts->send_buf_size; 465 rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz)); 466 if (rc) { 467 /* Not fatal */ 468 } 469 470 rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val); 471 if (rc != 0) { 472 close(fd); 473 /* error */ 474 return -1; 475 } 476 rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val); 477 if (rc != 0) { 478 close(fd); 479 /* error */ 480 return -1; 481 } 482 483 #if defined(SO_PRIORITY) 484 if (opts->priority) { 485 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opts->priority, sizeof val); 486 if (rc != 0) { 487 close(fd); 488 /* error */ 489 return -1; 490 } 491 } 492 #endif 493 494 if (res->ai_family == AF_INET6) { 495 rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val); 496 if (rc != 0) { 497 close(fd); 498 /* error */ 499 return -1; 500 } 501 } 502 503 if (opts->ack_timeout) { 504 #if defined(__linux__) 505 to = opts->ack_timeout; 506 rc = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &to, sizeof(to)); 507 if (rc != 0) { 508 close(fd); 509 /* error */ 510 return -1; 511 } 512 #else 513 SPDK_WARNLOG("TCP_USER_TIMEOUT is not supported.\n"); 514 #endif 515 } 516 517 return fd; 518 } 519 520 static unsigned int 521 posix_sock_tls_psk_server_cb(SSL *ssl, 522 const char *id, 523 unsigned char *psk, 524 unsigned int max_psk_len) 525 { 526 long key_len; 527 unsigned char *psk_k = NULL; 528 const char *cipher = NULL; 529 struct spdk_sock_impl_opts *impl_opts; 530 uint8_t sock_psk[PSK_MAX_PSK_LEN] = {}; 531 int rc; 532 533 impl_opts = SSL_get_app_data(ssl); 534 SPDK_DEBUGLOG(sock_posix, "Received PSK ID '%s'\n", id); 535 if (id == NULL) { 536 SPDK_ERRLOG("Received empty PSK ID\n"); 537 goto err; 538 } 539 540 SPDK_DEBUGLOG(sock_posix, "Length of Client's PSK KEY %u\n", max_psk_len); 541 542 if (impl_opts->get_key) { 543 rc = impl_opts->get_key(sock_psk, PSK_MAX_PSK_LEN, &cipher, id, impl_opts->get_key_ctx); 544 assert(cipher == NULL); 545 if (rc < 0) { 546 goto err; 547 } 548 psk_k = OPENSSL_hexstr2buf(sock_psk, &key_len); 549 } else { 550 if (impl_opts->psk_key == NULL) { 551 SPDK_ERRLOG("PSK is not set\n"); 552 goto err; 553 } 554 555 SPDK_DEBUGLOG(sock_posix, "Length of Client's PSK ID %lu\n", strlen(impl_opts->psk_identity)); 556 if (strcmp(impl_opts->psk_identity, id) != 0) { 557 SPDK_ERRLOG("Unknown Client's PSK ID\n"); 558 goto err; 559 } 560 psk_k = OPENSSL_hexstr2buf(impl_opts->psk_key, &key_len); 561 } 562 if (psk_k == NULL) { 563 SPDK_ERRLOG("Could not unhexlify PSK\n"); 564 goto err; 565 } 566 if (key_len > max_psk_len) { 567 SPDK_ERRLOG("Insufficient buffer size to copy PSK\n"); 568 OPENSSL_free(psk_k); 569 goto err; 570 } 571 memcpy(psk, psk_k, key_len); 572 OPENSSL_free(psk_k); 573 574 return key_len; 575 576 err: 577 return 0; 578 } 579 580 static unsigned int 581 posix_sock_tls_psk_client_cb(SSL *ssl, const char *hint, 582 char *identity, 583 unsigned int max_identity_len, 584 unsigned char *psk, 585 unsigned int max_psk_len) 586 { 587 long key_len; 588 unsigned char *default_psk; 589 struct spdk_sock_impl_opts *impl_opts; 590 591 impl_opts = SSL_get_app_data(ssl); 592 593 if (hint) { 594 SPDK_DEBUGLOG(sock_posix, "Received PSK identity hint '%s'\n", hint); 595 } 596 597 if (impl_opts->psk_key == NULL) { 598 SPDK_ERRLOG("PSK is not set\n"); 599 goto err; 600 } 601 default_psk = OPENSSL_hexstr2buf(impl_opts->psk_key, &key_len); 602 if (default_psk == NULL) { 603 SPDK_ERRLOG("Could not unhexlify PSK\n"); 604 goto err; 605 } 606 if ((strlen(impl_opts->psk_identity) + 1 > max_identity_len) 607 || (key_len > max_psk_len)) { 608 OPENSSL_free(default_psk); 609 SPDK_ERRLOG("PSK ID or Key buffer is not sufficient\n"); 610 goto err; 611 } 612 spdk_strcpy_pad(identity, impl_opts->psk_identity, strlen(impl_opts->psk_identity), 0); 613 SPDK_DEBUGLOG(sock_posix, "Sending PSK identity '%s'\n", identity); 614 615 memcpy(psk, default_psk, key_len); 616 SPDK_DEBUGLOG(sock_posix, "Provided out-of-band (OOB) PSK for TLS1.3 client\n"); 617 OPENSSL_free(default_psk); 618 619 return key_len; 620 621 err: 622 return 0; 623 } 624 625 static SSL_CTX * 626 posix_sock_create_ssl_context(const SSL_METHOD *method, struct spdk_sock_opts *opts, 627 struct spdk_sock_impl_opts *impl_opts) 628 { 629 SSL_CTX *ctx; 630 int tls_version = 0; 631 bool ktls_enabled = false; 632 #ifdef SSL_OP_ENABLE_KTLS 633 long options; 634 #endif 635 636 SSL_library_init(); 637 OpenSSL_add_all_algorithms(); 638 SSL_load_error_strings(); 639 /* Produce a SSL CTX in SSL V2 and V3 standards compliant way */ 640 ctx = SSL_CTX_new(method); 641 if (!ctx) { 642 SPDK_ERRLOG("SSL_CTX_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL)); 643 return NULL; 644 } 645 SPDK_DEBUGLOG(sock_posix, "SSL context created\n"); 646 647 switch (impl_opts->tls_version) { 648 case 0: 649 /* auto-negotioation */ 650 break; 651 case SPDK_TLS_VERSION_1_1: 652 tls_version = TLS1_1_VERSION; 653 break; 654 case SPDK_TLS_VERSION_1_2: 655 tls_version = TLS1_2_VERSION; 656 break; 657 case SPDK_TLS_VERSION_1_3: 658 tls_version = TLS1_3_VERSION; 659 break; 660 default: 661 SPDK_ERRLOG("Incorrect TLS version provided: %d\n", impl_opts->tls_version); 662 goto err; 663 } 664 665 if (tls_version) { 666 SPDK_DEBUGLOG(sock_posix, "Hardening TLS version to '%d'='0x%X'\n", impl_opts->tls_version, 667 tls_version); 668 if (!SSL_CTX_set_min_proto_version(ctx, tls_version)) { 669 SPDK_ERRLOG("Unable to set Min TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version); 670 goto err; 671 } 672 if (!SSL_CTX_set_max_proto_version(ctx, tls_version)) { 673 SPDK_ERRLOG("Unable to set Max TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version); 674 goto err; 675 } 676 } 677 if (impl_opts->enable_ktls) { 678 SPDK_DEBUGLOG(sock_posix, "Enabling kTLS offload\n"); 679 #ifdef SSL_OP_ENABLE_KTLS 680 options = SSL_CTX_set_options(ctx, SSL_OP_ENABLE_KTLS); 681 ktls_enabled = options & SSL_OP_ENABLE_KTLS; 682 #else 683 ktls_enabled = false; 684 #endif 685 if (!ktls_enabled) { 686 SPDK_ERRLOG("Unable to set kTLS offload via SSL_CTX_set_options(). Configure openssl with 'enable-ktls'\n"); 687 goto err; 688 } 689 } 690 691 return ctx; 692 693 err: 694 SSL_CTX_free(ctx); 695 return NULL; 696 } 697 698 static SSL * 699 ssl_sock_connect_loop(SSL_CTX *ctx, int fd, struct spdk_sock_impl_opts *impl_opts) 700 { 701 int rc; 702 SSL *ssl; 703 int ssl_get_error; 704 705 ssl = SSL_new(ctx); 706 if (!ssl) { 707 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL)); 708 return NULL; 709 } 710 SSL_set_fd(ssl, fd); 711 SSL_set_app_data(ssl, impl_opts); 712 SSL_set_psk_client_callback(ssl, posix_sock_tls_psk_client_cb); 713 SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl); 714 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 715 while ((rc = SSL_connect(ssl)) != 1) { 716 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 717 ssl_get_error = SSL_get_error(ssl, rc); 718 SPDK_DEBUGLOG(sock_posix, "SSL_connect failed %d = SSL_connect(%p), %d = SSL_get_error(%p, %d)\n", 719 rc, ssl, ssl_get_error, ssl, rc); 720 switch (ssl_get_error) { 721 case SSL_ERROR_WANT_READ: 722 case SSL_ERROR_WANT_WRITE: 723 continue; 724 default: 725 break; 726 } 727 SPDK_ERRLOG("SSL_connect() failed, errno = %d\n", errno); 728 SSL_free(ssl); 729 return NULL; 730 } 731 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 732 SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n", 733 SSL_CIPHER_get_name(SSL_get_current_cipher(ssl))); 734 return ssl; 735 } 736 737 static SSL * 738 ssl_sock_accept_loop(SSL_CTX *ctx, int fd, struct spdk_sock_impl_opts *impl_opts) 739 { 740 int rc; 741 SSL *ssl; 742 int ssl_get_error; 743 744 ssl = SSL_new(ctx); 745 if (!ssl) { 746 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL)); 747 return NULL; 748 } 749 SSL_set_fd(ssl, fd); 750 SSL_set_app_data(ssl, impl_opts); 751 SSL_set_psk_server_callback(ssl, posix_sock_tls_psk_server_cb); 752 SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl); 753 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 754 while ((rc = SSL_accept(ssl)) != 1) { 755 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 756 ssl_get_error = SSL_get_error(ssl, rc); 757 SPDK_DEBUGLOG(sock_posix, "SSL_accept failed %d = SSL_accept(%p), %d = SSL_get_error(%p, %d)\n", rc, 758 ssl, ssl_get_error, ssl, rc); 759 switch (ssl_get_error) { 760 case SSL_ERROR_WANT_READ: 761 case SSL_ERROR_WANT_WRITE: 762 continue; 763 default: 764 break; 765 } 766 SPDK_ERRLOG("SSL_accept() failed, errno = %d\n", errno); 767 SSL_free(ssl); 768 return NULL; 769 } 770 SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl); 771 SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n", 772 SSL_CIPHER_get_name(SSL_get_current_cipher(ssl))); 773 return ssl; 774 } 775 776 static ssize_t 777 SSL_readv(SSL *ssl, const struct iovec *iov, int iovcnt) 778 { 779 int i, rc = 0; 780 ssize_t total = 0; 781 782 for (i = 0; i < iovcnt; i++) { 783 rc = SSL_read(ssl, iov[i].iov_base, iov[i].iov_len); 784 785 if (rc > 0) { 786 total += rc; 787 } 788 if (rc != (int)iov[i].iov_len) { 789 break; 790 } 791 } 792 if (total > 0) { 793 errno = 0; 794 return total; 795 } 796 switch (SSL_get_error(ssl, rc)) { 797 case SSL_ERROR_ZERO_RETURN: 798 errno = ENOTCONN; 799 return 0; 800 case SSL_ERROR_WANT_READ: 801 case SSL_ERROR_WANT_WRITE: 802 case SSL_ERROR_WANT_CONNECT: 803 case SSL_ERROR_WANT_ACCEPT: 804 case SSL_ERROR_WANT_X509_LOOKUP: 805 case SSL_ERROR_WANT_ASYNC: 806 case SSL_ERROR_WANT_ASYNC_JOB: 807 case SSL_ERROR_WANT_CLIENT_HELLO_CB: 808 errno = EAGAIN; 809 return -1; 810 case SSL_ERROR_SYSCALL: 811 case SSL_ERROR_SSL: 812 errno = ENOTCONN; 813 return -1; 814 default: 815 errno = ENOTCONN; 816 return -1; 817 } 818 } 819 820 static ssize_t 821 SSL_writev(SSL *ssl, struct iovec *iov, int iovcnt) 822 { 823 int i, rc = 0; 824 ssize_t total = 0; 825 826 for (i = 0; i < iovcnt; i++) { 827 rc = SSL_write(ssl, iov[i].iov_base, iov[i].iov_len); 828 829 if (rc > 0) { 830 total += rc; 831 } 832 if (rc != (int)iov[i].iov_len) { 833 break; 834 } 835 } 836 if (total > 0) { 837 errno = 0; 838 return total; 839 } 840 switch (SSL_get_error(ssl, rc)) { 841 case SSL_ERROR_ZERO_RETURN: 842 errno = ENOTCONN; 843 return 0; 844 case SSL_ERROR_WANT_READ: 845 case SSL_ERROR_WANT_WRITE: 846 case SSL_ERROR_WANT_CONNECT: 847 case SSL_ERROR_WANT_ACCEPT: 848 case SSL_ERROR_WANT_X509_LOOKUP: 849 case SSL_ERROR_WANT_ASYNC: 850 case SSL_ERROR_WANT_ASYNC_JOB: 851 case SSL_ERROR_WANT_CLIENT_HELLO_CB: 852 errno = EAGAIN; 853 return -1; 854 case SSL_ERROR_SYSCALL: 855 case SSL_ERROR_SSL: 856 errno = ENOTCONN; 857 return -1; 858 default: 859 errno = ENOTCONN; 860 return -1; 861 } 862 } 863 864 static struct spdk_sock * 865 posix_sock_create(const char *ip, int port, 866 enum posix_sock_create_type type, 867 struct spdk_sock_opts *opts, 868 bool enable_ssl) 869 { 870 struct spdk_posix_sock *sock; 871 struct spdk_sock_impl_opts impl_opts; 872 char buf[MAX_TMPBUF]; 873 char portnum[PORTNUMLEN]; 874 char *p; 875 struct addrinfo hints, *res, *res0; 876 int fd, flag; 877 int rc; 878 bool enable_zcopy_user_opts = true; 879 bool enable_zcopy_impl_opts = true; 880 SSL_CTX *ctx = 0; 881 SSL *ssl = 0; 882 883 assert(opts != NULL); 884 posix_opts_get_impl_opts(opts, &impl_opts); 885 886 if (ip == NULL) { 887 return NULL; 888 } 889 if (ip[0] == '[') { 890 snprintf(buf, sizeof(buf), "%s", ip + 1); 891 p = strchr(buf, ']'); 892 if (p != NULL) { 893 *p = '\0'; 894 } 895 ip = (const char *) &buf[0]; 896 } 897 898 snprintf(portnum, sizeof portnum, "%d", port); 899 memset(&hints, 0, sizeof hints); 900 hints.ai_family = PF_UNSPEC; 901 hints.ai_socktype = SOCK_STREAM; 902 hints.ai_flags = AI_NUMERICSERV; 903 hints.ai_flags |= AI_PASSIVE; 904 hints.ai_flags |= AI_NUMERICHOST; 905 rc = getaddrinfo(ip, portnum, &hints, &res0); 906 if (rc != 0) { 907 SPDK_ERRLOG("getaddrinfo() failed %s (%d)\n", gai_strerror(rc), rc); 908 return NULL; 909 } 910 911 /* try listen */ 912 fd = -1; 913 for (res = res0; res != NULL; res = res->ai_next) { 914 retry: 915 fd = posix_fd_create(res, opts, &impl_opts); 916 if (fd < 0) { 917 continue; 918 } 919 if (type == SPDK_SOCK_CREATE_LISTEN) { 920 rc = bind(fd, res->ai_addr, res->ai_addrlen); 921 if (rc != 0) { 922 SPDK_ERRLOG("bind() failed at port %d, errno = %d\n", port, errno); 923 switch (errno) { 924 case EINTR: 925 /* interrupted? */ 926 close(fd); 927 goto retry; 928 case EADDRNOTAVAIL: 929 SPDK_ERRLOG("IP address %s not available. " 930 "Verify IP address in config file " 931 "and make sure setup script is " 932 "run before starting spdk app.\n", ip); 933 /* FALLTHROUGH */ 934 default: 935 /* try next family */ 936 close(fd); 937 fd = -1; 938 continue; 939 } 940 } 941 /* bind OK */ 942 rc = listen(fd, 512); 943 if (rc != 0) { 944 SPDK_ERRLOG("listen() failed, errno = %d\n", errno); 945 close(fd); 946 fd = -1; 947 break; 948 } 949 enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_server; 950 } else if (type == SPDK_SOCK_CREATE_CONNECT) { 951 rc = connect(fd, res->ai_addr, res->ai_addrlen); 952 if (rc != 0) { 953 SPDK_ERRLOG("connect() failed, errno = %d\n", errno); 954 /* try next family */ 955 close(fd); 956 fd = -1; 957 continue; 958 } 959 enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_client; 960 if (enable_ssl) { 961 ctx = posix_sock_create_ssl_context(TLS_client_method(), opts, &impl_opts); 962 if (!ctx) { 963 SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno); 964 close(fd); 965 fd = -1; 966 break; 967 } 968 ssl = ssl_sock_connect_loop(ctx, fd, &impl_opts); 969 if (!ssl) { 970 SPDK_ERRLOG("ssl_sock_connect_loop() failed, errno = %d\n", errno); 971 close(fd); 972 fd = -1; 973 SSL_CTX_free(ctx); 974 break; 975 } 976 } 977 } 978 979 flag = fcntl(fd, F_GETFL); 980 if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) { 981 SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno); 982 SSL_free(ssl); 983 SSL_CTX_free(ctx); 984 close(fd); 985 fd = -1; 986 break; 987 } 988 break; 989 } 990 freeaddrinfo(res0); 991 992 if (fd < 0) { 993 return NULL; 994 } 995 996 /* Only enable zero copy for non-loopback and non-ssl sockets. */ 997 enable_zcopy_user_opts = opts->zcopy && !sock_is_loopback(fd) && !enable_ssl; 998 999 sock = posix_sock_alloc(fd, &impl_opts, enable_zcopy_user_opts && enable_zcopy_impl_opts); 1000 if (sock == NULL) { 1001 SPDK_ERRLOG("sock allocation failed\n"); 1002 SSL_free(ssl); 1003 SSL_CTX_free(ctx); 1004 close(fd); 1005 return NULL; 1006 } 1007 1008 if (ctx) { 1009 sock->ctx = ctx; 1010 } 1011 1012 if (ssl) { 1013 sock->ssl = ssl; 1014 } 1015 1016 return &sock->base; 1017 } 1018 1019 static struct spdk_sock * 1020 posix_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts) 1021 { 1022 return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, false); 1023 } 1024 1025 static struct spdk_sock * 1026 posix_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts) 1027 { 1028 return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, false); 1029 } 1030 1031 static struct spdk_sock * 1032 _posix_sock_accept(struct spdk_sock *_sock, bool enable_ssl) 1033 { 1034 struct spdk_posix_sock *sock = __posix_sock(_sock); 1035 struct sockaddr_storage sa; 1036 socklen_t salen; 1037 int rc, fd; 1038 struct spdk_posix_sock *new_sock; 1039 int flag; 1040 SSL_CTX *ctx = 0; 1041 SSL *ssl = 0; 1042 1043 memset(&sa, 0, sizeof(sa)); 1044 salen = sizeof(sa); 1045 1046 assert(sock != NULL); 1047 1048 rc = accept(sock->fd, (struct sockaddr *)&sa, &salen); 1049 1050 if (rc == -1) { 1051 return NULL; 1052 } 1053 1054 fd = rc; 1055 1056 flag = fcntl(fd, F_GETFL); 1057 if ((!(flag & O_NONBLOCK)) && (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0)) { 1058 SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno); 1059 close(fd); 1060 return NULL; 1061 } 1062 1063 #if defined(SO_PRIORITY) 1064 /* The priority is not inherited, so call this function again */ 1065 if (sock->base.opts.priority) { 1066 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &sock->base.opts.priority, sizeof(int)); 1067 if (rc != 0) { 1068 close(fd); 1069 return NULL; 1070 } 1071 } 1072 #endif 1073 1074 /* Establish SSL connection */ 1075 if (enable_ssl) { 1076 ctx = posix_sock_create_ssl_context(TLS_server_method(), &sock->base.opts, &sock->base.impl_opts); 1077 if (!ctx) { 1078 SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno); 1079 close(fd); 1080 return NULL; 1081 } 1082 ssl = ssl_sock_accept_loop(ctx, fd, &sock->base.impl_opts); 1083 if (!ssl) { 1084 SPDK_ERRLOG("ssl_sock_accept_loop() failed, errno = %d\n", errno); 1085 close(fd); 1086 SSL_CTX_free(ctx); 1087 return NULL; 1088 } 1089 } 1090 1091 /* Inherit the zero copy feature from the listen socket */ 1092 new_sock = posix_sock_alloc(fd, &sock->base.impl_opts, sock->zcopy); 1093 if (new_sock == NULL) { 1094 close(fd); 1095 SSL_free(ssl); 1096 SSL_CTX_free(ctx); 1097 return NULL; 1098 } 1099 1100 if (ctx) { 1101 new_sock->ctx = ctx; 1102 } 1103 1104 if (ssl) { 1105 new_sock->ssl = ssl; 1106 } 1107 1108 return &new_sock->base; 1109 } 1110 1111 static struct spdk_sock * 1112 posix_sock_accept(struct spdk_sock *_sock) 1113 { 1114 return _posix_sock_accept(_sock, false); 1115 } 1116 1117 static int 1118 posix_sock_close(struct spdk_sock *_sock) 1119 { 1120 struct spdk_posix_sock *sock = __posix_sock(_sock); 1121 1122 assert(TAILQ_EMPTY(&_sock->pending_reqs)); 1123 1124 if (sock->ssl != NULL) { 1125 SSL_shutdown(sock->ssl); 1126 } 1127 1128 /* If the socket fails to close, the best choice is to 1129 * leak the fd but continue to free the rest of the sock 1130 * memory. */ 1131 close(sock->fd); 1132 1133 SSL_free(sock->ssl); 1134 SSL_CTX_free(sock->ctx); 1135 1136 spdk_pipe_destroy(sock->recv_pipe); 1137 free(sock->recv_buf); 1138 free(sock); 1139 1140 return 0; 1141 } 1142 1143 #ifdef SPDK_ZEROCOPY 1144 static int 1145 _sock_check_zcopy(struct spdk_sock *sock) 1146 { 1147 struct spdk_posix_sock *psock = __posix_sock(sock); 1148 struct msghdr msgh = {}; 1149 uint8_t buf[sizeof(struct cmsghdr) + sizeof(struct sock_extended_err)]; 1150 ssize_t rc; 1151 struct sock_extended_err *serr; 1152 struct cmsghdr *cm; 1153 uint32_t idx; 1154 struct spdk_sock_request *req, *treq; 1155 bool found; 1156 1157 msgh.msg_control = buf; 1158 msgh.msg_controllen = sizeof(buf); 1159 1160 while (true) { 1161 rc = recvmsg(psock->fd, &msgh, MSG_ERRQUEUE); 1162 1163 if (rc < 0) { 1164 if (errno == EWOULDBLOCK || errno == EAGAIN) { 1165 return 0; 1166 } 1167 1168 if (!TAILQ_EMPTY(&sock->pending_reqs)) { 1169 SPDK_ERRLOG("Attempting to receive from ERRQUEUE yielded error, but pending list still has orphaned entries\n"); 1170 } else { 1171 SPDK_WARNLOG("Recvmsg yielded an error!\n"); 1172 } 1173 return 0; 1174 } 1175 1176 cm = CMSG_FIRSTHDR(&msgh); 1177 if (!(cm && 1178 ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || 1179 (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR)))) { 1180 SPDK_WARNLOG("Unexpected cmsg level or type!\n"); 1181 return 0; 1182 } 1183 1184 serr = (struct sock_extended_err *)CMSG_DATA(cm); 1185 if (serr->ee_errno != 0 || serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { 1186 SPDK_WARNLOG("Unexpected extended error origin\n"); 1187 return 0; 1188 } 1189 1190 /* Most of the time, the pending_reqs array is in the exact 1191 * order we need such that all of the requests to complete are 1192 * in order, in the front. It is guaranteed that all requests 1193 * belonging to the same sendmsg call are sequential, so once 1194 * we encounter one match we can stop looping as soon as a 1195 * non-match is found. 1196 */ 1197 idx = serr->ee_info; 1198 while (true) { 1199 found = false; 1200 TAILQ_FOREACH_SAFE(req, &sock->pending_reqs, internal.link, treq) { 1201 if (!req->internal.is_zcopy) { 1202 /* This wasn't a zcopy request. It was just waiting in line to complete */ 1203 rc = spdk_sock_request_put(sock, req, 0); 1204 if (rc < 0) { 1205 return rc; 1206 } 1207 } else if (req->internal.offset == idx) { 1208 found = true; 1209 rc = spdk_sock_request_put(sock, req, 0); 1210 if (rc < 0) { 1211 return rc; 1212 } 1213 } else if (found) { 1214 break; 1215 } 1216 } 1217 1218 if (idx == serr->ee_data) { 1219 break; 1220 } 1221 1222 if (idx == UINT32_MAX) { 1223 idx = 0; 1224 } else { 1225 idx++; 1226 } 1227 } 1228 } 1229 1230 return 0; 1231 } 1232 #endif 1233 1234 static int 1235 _sock_flush(struct spdk_sock *sock) 1236 { 1237 struct spdk_posix_sock *psock = __posix_sock(sock); 1238 struct msghdr msg = {}; 1239 int flags; 1240 struct iovec iovs[IOV_BATCH_SIZE]; 1241 int iovcnt; 1242 int retval; 1243 struct spdk_sock_request *req; 1244 int i; 1245 ssize_t rc, sent; 1246 unsigned int offset; 1247 size_t len; 1248 bool is_zcopy = false; 1249 1250 /* Can't flush from within a callback or we end up with recursive calls */ 1251 if (sock->cb_cnt > 0) { 1252 errno = EAGAIN; 1253 return -1; 1254 } 1255 1256 #ifdef SPDK_ZEROCOPY 1257 if (psock->zcopy) { 1258 flags = MSG_ZEROCOPY | MSG_NOSIGNAL; 1259 } else 1260 #endif 1261 { 1262 flags = MSG_NOSIGNAL; 1263 } 1264 1265 iovcnt = spdk_sock_prep_reqs(sock, iovs, 0, NULL, &flags); 1266 if (iovcnt == 0) { 1267 return 0; 1268 } 1269 1270 #ifdef SPDK_ZEROCOPY 1271 is_zcopy = flags & MSG_ZEROCOPY; 1272 #endif 1273 1274 /* Perform the vectored write */ 1275 msg.msg_iov = iovs; 1276 msg.msg_iovlen = iovcnt; 1277 1278 if (psock->ssl) { 1279 rc = SSL_writev(psock->ssl, iovs, iovcnt); 1280 } else { 1281 rc = sendmsg(psock->fd, &msg, flags); 1282 } 1283 if (rc <= 0) { 1284 if (rc == 0 || errno == EAGAIN || errno == EWOULDBLOCK || (errno == ENOBUFS && psock->zcopy)) { 1285 errno = EAGAIN; 1286 } 1287 return -1; 1288 } 1289 1290 sent = rc; 1291 1292 if (is_zcopy) { 1293 /* Handling overflow case, because we use psock->sendmsg_idx - 1 for the 1294 * req->internal.offset, so sendmsg_idx should not be zero */ 1295 if (spdk_unlikely(psock->sendmsg_idx == UINT32_MAX)) { 1296 psock->sendmsg_idx = 1; 1297 } else { 1298 psock->sendmsg_idx++; 1299 } 1300 } 1301 1302 /* Consume the requests that were actually written */ 1303 req = TAILQ_FIRST(&sock->queued_reqs); 1304 while (req) { 1305 offset = req->internal.offset; 1306 1307 /* req->internal.is_zcopy is true when the whole req or part of it is sent with zerocopy */ 1308 req->internal.is_zcopy = is_zcopy; 1309 1310 for (i = 0; i < req->iovcnt; i++) { 1311 /* Advance by the offset first */ 1312 if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) { 1313 offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len; 1314 continue; 1315 } 1316 1317 /* Calculate the remaining length of this element */ 1318 len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset; 1319 1320 if (len > (size_t)rc) { 1321 /* This element was partially sent. */ 1322 req->internal.offset += rc; 1323 return sent; 1324 } 1325 1326 offset = 0; 1327 req->internal.offset += len; 1328 rc -= len; 1329 } 1330 1331 /* Handled a full request. */ 1332 spdk_sock_request_pend(sock, req); 1333 1334 if (!req->internal.is_zcopy && req == TAILQ_FIRST(&sock->pending_reqs)) { 1335 /* The sendmsg syscall above isn't currently asynchronous, 1336 * so it's already done. */ 1337 retval = spdk_sock_request_put(sock, req, 0); 1338 if (retval) { 1339 break; 1340 } 1341 } else { 1342 /* Re-use the offset field to hold the sendmsg call index. The 1343 * index is 0 based, so subtract one here because we've already 1344 * incremented above. */ 1345 req->internal.offset = psock->sendmsg_idx - 1; 1346 } 1347 1348 if (rc == 0) { 1349 break; 1350 } 1351 1352 req = TAILQ_FIRST(&sock->queued_reqs); 1353 } 1354 1355 return sent; 1356 } 1357 1358 static int 1359 posix_sock_flush(struct spdk_sock *sock) 1360 { 1361 #ifdef SPDK_ZEROCOPY 1362 struct spdk_posix_sock *psock = __posix_sock(sock); 1363 1364 if (psock->zcopy && !TAILQ_EMPTY(&sock->pending_reqs)) { 1365 _sock_check_zcopy(sock); 1366 } 1367 #endif 1368 1369 return _sock_flush(sock); 1370 } 1371 1372 static ssize_t 1373 posix_sock_recv_from_pipe(struct spdk_posix_sock *sock, struct iovec *diov, int diovcnt) 1374 { 1375 struct iovec siov[2]; 1376 int sbytes; 1377 ssize_t bytes; 1378 struct spdk_posix_sock_group_impl *group; 1379 1380 sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov); 1381 if (sbytes < 0) { 1382 errno = EINVAL; 1383 return -1; 1384 } else if (sbytes == 0) { 1385 errno = EAGAIN; 1386 return -1; 1387 } 1388 1389 bytes = spdk_iovcpy(siov, 2, diov, diovcnt); 1390 1391 if (bytes == 0) { 1392 /* The only way this happens is if diov is 0 length */ 1393 errno = EINVAL; 1394 return -1; 1395 } 1396 1397 spdk_pipe_reader_advance(sock->recv_pipe, bytes); 1398 1399 /* If we drained the pipe, mark it appropriately */ 1400 if (spdk_pipe_reader_bytes_available(sock->recv_pipe) == 0) { 1401 assert(sock->pipe_has_data == true); 1402 1403 group = __posix_group_impl(sock->base.group_impl); 1404 if (group && !sock->socket_has_data) { 1405 TAILQ_REMOVE(&group->socks_with_data, sock, link); 1406 } 1407 1408 sock->pipe_has_data = false; 1409 } 1410 1411 return bytes; 1412 } 1413 1414 static inline ssize_t 1415 posix_sock_read(struct spdk_posix_sock *sock) 1416 { 1417 struct iovec iov[2]; 1418 int bytes_avail, bytes_recvd; 1419 struct spdk_posix_sock_group_impl *group; 1420 1421 bytes_avail = spdk_pipe_writer_get_buffer(sock->recv_pipe, sock->recv_buf_sz, iov); 1422 1423 if (bytes_avail <= 0) { 1424 return bytes_avail; 1425 } 1426 1427 if (sock->ssl) { 1428 bytes_recvd = SSL_readv(sock->ssl, iov, 2); 1429 } else { 1430 bytes_recvd = readv(sock->fd, iov, 2); 1431 } 1432 1433 assert(sock->pipe_has_data == false); 1434 1435 if (bytes_recvd <= 0) { 1436 /* Errors count as draining the socket data */ 1437 if (sock->base.group_impl && sock->socket_has_data) { 1438 group = __posix_group_impl(sock->base.group_impl); 1439 TAILQ_REMOVE(&group->socks_with_data, sock, link); 1440 } 1441 1442 sock->socket_has_data = false; 1443 1444 return bytes_recvd; 1445 } 1446 1447 spdk_pipe_writer_advance(sock->recv_pipe, bytes_recvd); 1448 1449 #if DEBUG 1450 if (sock->base.group_impl) { 1451 assert(sock->socket_has_data == true); 1452 } 1453 #endif 1454 1455 sock->pipe_has_data = true; 1456 if (bytes_recvd < bytes_avail) { 1457 /* We drained the kernel socket entirely. */ 1458 sock->socket_has_data = false; 1459 } 1460 1461 return bytes_recvd; 1462 } 1463 1464 static ssize_t 1465 posix_sock_readv(struct spdk_sock *_sock, struct iovec *iov, int iovcnt) 1466 { 1467 struct spdk_posix_sock *sock = __posix_sock(_sock); 1468 struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->base.group_impl); 1469 int rc, i; 1470 size_t len; 1471 1472 if (sock->recv_pipe == NULL) { 1473 assert(sock->pipe_has_data == false); 1474 if (group && sock->socket_has_data) { 1475 sock->socket_has_data = false; 1476 TAILQ_REMOVE(&group->socks_with_data, sock, link); 1477 } 1478 if (sock->ssl) { 1479 return SSL_readv(sock->ssl, iov, iovcnt); 1480 } else { 1481 return readv(sock->fd, iov, iovcnt); 1482 } 1483 } 1484 1485 /* If the socket is not in a group, we must assume it always has 1486 * data waiting for us because it is not epolled */ 1487 if (!sock->pipe_has_data && (group == NULL || sock->socket_has_data)) { 1488 /* If the user is receiving a sufficiently large amount of data, 1489 * receive directly to their buffers. */ 1490 len = 0; 1491 for (i = 0; i < iovcnt; i++) { 1492 len += iov[i].iov_len; 1493 } 1494 1495 if (len >= MIN_SOCK_PIPE_SIZE) { 1496 /* TODO: Should this detect if kernel socket is drained? */ 1497 if (sock->ssl) { 1498 return SSL_readv(sock->ssl, iov, iovcnt); 1499 } else { 1500 return readv(sock->fd, iov, iovcnt); 1501 } 1502 } 1503 1504 /* Otherwise, do a big read into our pipe */ 1505 rc = posix_sock_read(sock); 1506 if (rc <= 0) { 1507 return rc; 1508 } 1509 } 1510 1511 return posix_sock_recv_from_pipe(sock, iov, iovcnt); 1512 } 1513 1514 static ssize_t 1515 posix_sock_recv(struct spdk_sock *sock, void *buf, size_t len) 1516 { 1517 struct iovec iov[1]; 1518 1519 iov[0].iov_base = buf; 1520 iov[0].iov_len = len; 1521 1522 return posix_sock_readv(sock, iov, 1); 1523 } 1524 1525 static void 1526 posix_sock_readv_async(struct spdk_sock *sock, struct spdk_sock_request *req) 1527 { 1528 req->cb_fn(req->cb_arg, -ENOTSUP); 1529 } 1530 1531 static ssize_t 1532 posix_sock_writev(struct spdk_sock *_sock, struct iovec *iov, int iovcnt) 1533 { 1534 struct spdk_posix_sock *sock = __posix_sock(_sock); 1535 int rc; 1536 1537 /* In order to process a writev, we need to flush any asynchronous writes 1538 * first. */ 1539 rc = _sock_flush(_sock); 1540 if (rc < 0) { 1541 return rc; 1542 } 1543 1544 if (!TAILQ_EMPTY(&_sock->queued_reqs)) { 1545 /* We weren't able to flush all requests */ 1546 errno = EAGAIN; 1547 return -1; 1548 } 1549 1550 if (sock->ssl) { 1551 return SSL_writev(sock->ssl, iov, iovcnt); 1552 } else { 1553 return writev(sock->fd, iov, iovcnt); 1554 } 1555 } 1556 1557 static int 1558 posix_sock_recv_next(struct spdk_sock *_sock, void **buf, void **ctx) 1559 { 1560 struct spdk_posix_sock *sock = __posix_sock(_sock); 1561 struct iovec iov; 1562 ssize_t rc; 1563 1564 if (sock->recv_pipe != NULL) { 1565 errno = ENOTSUP; 1566 return -1; 1567 } 1568 1569 iov.iov_len = spdk_sock_group_get_buf(_sock->group_impl->group, &iov.iov_base, ctx); 1570 if (iov.iov_len == 0) { 1571 errno = ENOBUFS; 1572 return -1; 1573 } 1574 1575 rc = posix_sock_readv(_sock, &iov, 1); 1576 if (rc <= 0) { 1577 spdk_sock_group_provide_buf(_sock->group_impl->group, iov.iov_base, iov.iov_len, *ctx); 1578 return rc; 1579 } 1580 1581 *buf = iov.iov_base; 1582 1583 return rc; 1584 } 1585 1586 static void 1587 posix_sock_writev_async(struct spdk_sock *sock, struct spdk_sock_request *req) 1588 { 1589 int rc; 1590 1591 spdk_sock_request_queue(sock, req); 1592 1593 /* If there are a sufficient number queued, just flush them out immediately. */ 1594 if (sock->queued_iovcnt >= IOV_BATCH_SIZE) { 1595 rc = _sock_flush(sock); 1596 if (rc < 0 && errno != EAGAIN) { 1597 spdk_sock_abort_requests(sock); 1598 } 1599 } 1600 } 1601 1602 static int 1603 posix_sock_set_recvlowat(struct spdk_sock *_sock, int nbytes) 1604 { 1605 struct spdk_posix_sock *sock = __posix_sock(_sock); 1606 int val; 1607 int rc; 1608 1609 assert(sock != NULL); 1610 1611 val = nbytes; 1612 rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVLOWAT, &val, sizeof val); 1613 if (rc != 0) { 1614 return -1; 1615 } 1616 return 0; 1617 } 1618 1619 static bool 1620 posix_sock_is_ipv6(struct spdk_sock *_sock) 1621 { 1622 struct spdk_posix_sock *sock = __posix_sock(_sock); 1623 struct sockaddr_storage sa; 1624 socklen_t salen; 1625 int rc; 1626 1627 assert(sock != NULL); 1628 1629 memset(&sa, 0, sizeof sa); 1630 salen = sizeof sa; 1631 rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen); 1632 if (rc != 0) { 1633 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno); 1634 return false; 1635 } 1636 1637 return (sa.ss_family == AF_INET6); 1638 } 1639 1640 static bool 1641 posix_sock_is_ipv4(struct spdk_sock *_sock) 1642 { 1643 struct spdk_posix_sock *sock = __posix_sock(_sock); 1644 struct sockaddr_storage sa; 1645 socklen_t salen; 1646 int rc; 1647 1648 assert(sock != NULL); 1649 1650 memset(&sa, 0, sizeof sa); 1651 salen = sizeof sa; 1652 rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen); 1653 if (rc != 0) { 1654 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno); 1655 return false; 1656 } 1657 1658 return (sa.ss_family == AF_INET); 1659 } 1660 1661 static bool 1662 posix_sock_is_connected(struct spdk_sock *_sock) 1663 { 1664 struct spdk_posix_sock *sock = __posix_sock(_sock); 1665 uint8_t byte; 1666 int rc; 1667 1668 rc = recv(sock->fd, &byte, 1, MSG_PEEK); 1669 if (rc == 0) { 1670 return false; 1671 } 1672 1673 if (rc < 0) { 1674 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1675 return true; 1676 } 1677 1678 return false; 1679 } 1680 1681 return true; 1682 } 1683 1684 static struct spdk_sock_group_impl * 1685 posix_sock_group_impl_get_optimal(struct spdk_sock *_sock, struct spdk_sock_group_impl *hint) 1686 { 1687 struct spdk_posix_sock *sock = __posix_sock(_sock); 1688 struct spdk_sock_group_impl *group_impl; 1689 1690 if (sock->placement_id != -1) { 1691 spdk_sock_map_lookup(&g_map, sock->placement_id, &group_impl, hint); 1692 return group_impl; 1693 } 1694 1695 return NULL; 1696 } 1697 1698 static struct spdk_sock_group_impl * 1699 posix_sock_group_impl_create(void) 1700 { 1701 struct spdk_posix_sock_group_impl *group_impl; 1702 int fd; 1703 1704 #if defined(SPDK_EPOLL) 1705 fd = epoll_create1(0); 1706 #elif defined(SPDK_KEVENT) 1707 fd = kqueue(); 1708 #endif 1709 if (fd == -1) { 1710 return NULL; 1711 } 1712 1713 group_impl = calloc(1, sizeof(*group_impl)); 1714 if (group_impl == NULL) { 1715 SPDK_ERRLOG("group_impl allocation failed\n"); 1716 close(fd); 1717 return NULL; 1718 } 1719 1720 group_impl->fd = fd; 1721 TAILQ_INIT(&group_impl->socks_with_data); 1722 group_impl->placement_id = -1; 1723 1724 if (g_spdk_posix_sock_impl_opts.enable_placement_id == PLACEMENT_CPU) { 1725 spdk_sock_map_insert(&g_map, spdk_env_get_current_core(), &group_impl->base); 1726 group_impl->placement_id = spdk_env_get_current_core(); 1727 } 1728 1729 return &group_impl->base; 1730 } 1731 1732 static void 1733 posix_sock_mark(struct spdk_posix_sock_group_impl *group, struct spdk_posix_sock *sock, 1734 int placement_id) 1735 { 1736 #if defined(SO_MARK) 1737 int rc; 1738 1739 rc = setsockopt(sock->fd, SOL_SOCKET, SO_MARK, 1740 &placement_id, sizeof(placement_id)); 1741 if (rc != 0) { 1742 /* Not fatal */ 1743 SPDK_ERRLOG("Error setting SO_MARK\n"); 1744 return; 1745 } 1746 1747 rc = spdk_sock_map_insert(&g_map, placement_id, &group->base); 1748 if (rc != 0) { 1749 /* Not fatal */ 1750 SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc); 1751 return; 1752 } 1753 1754 sock->placement_id = placement_id; 1755 #endif 1756 } 1757 1758 static void 1759 posix_sock_update_mark(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock) 1760 { 1761 struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group); 1762 1763 if (group->placement_id == -1) { 1764 group->placement_id = spdk_sock_map_find_free(&g_map); 1765 1766 /* If a free placement id is found, update existing sockets in this group */ 1767 if (group->placement_id != -1) { 1768 struct spdk_sock *sock, *tmp; 1769 1770 TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) { 1771 posix_sock_mark(group, __posix_sock(sock), group->placement_id); 1772 } 1773 } 1774 } 1775 1776 if (group->placement_id != -1) { 1777 /* 1778 * group placement id is already determined for this poll group. 1779 * Mark socket with group's placement id. 1780 */ 1781 posix_sock_mark(group, __posix_sock(_sock), group->placement_id); 1782 } 1783 } 1784 1785 static int 1786 posix_sock_group_impl_add_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock) 1787 { 1788 struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group); 1789 struct spdk_posix_sock *sock = __posix_sock(_sock); 1790 int rc; 1791 1792 #if defined(SPDK_EPOLL) 1793 struct epoll_event event; 1794 1795 memset(&event, 0, sizeof(event)); 1796 /* EPOLLERR is always on even if we don't set it, but be explicit for clarity */ 1797 event.events = EPOLLIN | EPOLLERR; 1798 event.data.ptr = sock; 1799 1800 rc = epoll_ctl(group->fd, EPOLL_CTL_ADD, sock->fd, &event); 1801 #elif defined(SPDK_KEVENT) 1802 struct kevent event; 1803 struct timespec ts = {0}; 1804 1805 EV_SET(&event, sock->fd, EVFILT_READ, EV_ADD, 0, 0, sock); 1806 1807 rc = kevent(group->fd, &event, 1, NULL, 0, &ts); 1808 #endif 1809 1810 if (rc != 0) { 1811 return rc; 1812 } 1813 1814 /* switched from another polling group due to scheduling */ 1815 if (spdk_unlikely(sock->recv_pipe != NULL && 1816 (spdk_pipe_reader_bytes_available(sock->recv_pipe) > 0))) { 1817 sock->pipe_has_data = true; 1818 sock->socket_has_data = false; 1819 TAILQ_INSERT_TAIL(&group->socks_with_data, sock, link); 1820 } 1821 1822 if (g_spdk_posix_sock_impl_opts.enable_placement_id == PLACEMENT_MARK) { 1823 posix_sock_update_mark(_group, _sock); 1824 } else if (sock->placement_id != -1) { 1825 rc = spdk_sock_map_insert(&g_map, sock->placement_id, &group->base); 1826 if (rc != 0) { 1827 SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc); 1828 /* Do not treat this as an error. The system will continue running. */ 1829 } 1830 } 1831 1832 return rc; 1833 } 1834 1835 static int 1836 posix_sock_group_impl_remove_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock) 1837 { 1838 struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group); 1839 struct spdk_posix_sock *sock = __posix_sock(_sock); 1840 int rc; 1841 1842 if (sock->pipe_has_data || sock->socket_has_data) { 1843 TAILQ_REMOVE(&group->socks_with_data, sock, link); 1844 sock->pipe_has_data = false; 1845 sock->socket_has_data = false; 1846 } 1847 1848 if (sock->placement_id != -1) { 1849 spdk_sock_map_release(&g_map, sock->placement_id); 1850 } 1851 1852 #if defined(SPDK_EPOLL) 1853 struct epoll_event event; 1854 1855 /* Event parameter is ignored but some old kernel version still require it. */ 1856 rc = epoll_ctl(group->fd, EPOLL_CTL_DEL, sock->fd, &event); 1857 #elif defined(SPDK_KEVENT) 1858 struct kevent event; 1859 struct timespec ts = {0}; 1860 1861 EV_SET(&event, sock->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); 1862 1863 rc = kevent(group->fd, &event, 1, NULL, 0, &ts); 1864 if (rc == 0 && event.flags & EV_ERROR) { 1865 rc = -1; 1866 errno = event.data; 1867 } 1868 #endif 1869 1870 spdk_sock_abort_requests(_sock); 1871 1872 return rc; 1873 } 1874 1875 static int 1876 posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events, 1877 struct spdk_sock **socks) 1878 { 1879 struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group); 1880 struct spdk_sock *sock, *tmp; 1881 int num_events, i, rc; 1882 struct spdk_posix_sock *psock, *ptmp; 1883 #if defined(SPDK_EPOLL) 1884 struct epoll_event events[MAX_EVENTS_PER_POLL]; 1885 #elif defined(SPDK_KEVENT) 1886 struct kevent events[MAX_EVENTS_PER_POLL]; 1887 struct timespec ts = {0}; 1888 #endif 1889 1890 #ifdef SPDK_ZEROCOPY 1891 /* When all of the following conditions are met 1892 * - non-blocking socket 1893 * - zero copy is enabled 1894 * - interrupts suppressed (i.e. busy polling) 1895 * - the NIC tx queue is full at the time sendmsg() is called 1896 * - epoll_wait determines there is an EPOLLIN event for the socket 1897 * then we can get into a situation where data we've sent is queued 1898 * up in the kernel network stack, but interrupts have been suppressed 1899 * because other traffic is flowing so the kernel misses the signal 1900 * to flush the software tx queue. If there wasn't incoming data 1901 * pending on the socket, then epoll_wait would have been sufficient 1902 * to kick off the send operation, but since there is a pending event 1903 * epoll_wait does not trigger the necessary operation. 1904 * 1905 * We deal with this by checking for all of the above conditions and 1906 * additionally looking for EPOLLIN events that were not consumed from 1907 * the last poll loop. We take this to mean that the upper layer is 1908 * unable to consume them because it is blocked waiting for resources 1909 * to free up, and those resources are most likely freed in response 1910 * to a pending asynchronous write completing. 1911 * 1912 * Additionally, sockets that have the same placement_id actually share 1913 * an underlying hardware queue. That means polling one of them is 1914 * equivalent to polling all of them. As a quick mechanism to avoid 1915 * making extra poll() calls, stash the last placement_id during the loop 1916 * and only poll if it's not the same. The overwhelmingly common case 1917 * is that all sockets in this list have the same placement_id because 1918 * SPDK is intentionally grouping sockets by that value, so even 1919 * though this won't stop all extra calls to poll(), it's very fast 1920 * and will catch all of them in practice. 1921 */ 1922 int last_placement_id = -1; 1923 1924 TAILQ_FOREACH(psock, &group->socks_with_data, link) { 1925 if (psock->zcopy && psock->placement_id >= 0 && 1926 psock->placement_id != last_placement_id) { 1927 struct pollfd pfd = {psock->fd, POLLIN | POLLERR, 0}; 1928 1929 poll(&pfd, 1, 0); 1930 last_placement_id = psock->placement_id; 1931 } 1932 } 1933 #endif 1934 1935 /* This must be a TAILQ_FOREACH_SAFE because while flushing, 1936 * a completion callback could remove the sock from the 1937 * group. */ 1938 TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) { 1939 rc = _sock_flush(sock); 1940 if (rc < 0 && errno != EAGAIN) { 1941 spdk_sock_abort_requests(sock); 1942 } 1943 } 1944 1945 assert(max_events > 0); 1946 1947 #if defined(SPDK_EPOLL) 1948 num_events = epoll_wait(group->fd, events, max_events, 0); 1949 #elif defined(SPDK_KEVENT) 1950 num_events = kevent(group->fd, NULL, 0, events, max_events, &ts); 1951 #endif 1952 1953 if (num_events == -1) { 1954 return -1; 1955 } else if (num_events == 0 && !TAILQ_EMPTY(&_group->socks)) { 1956 sock = TAILQ_FIRST(&_group->socks); 1957 psock = __posix_sock(sock); 1958 /* poll() is called here to busy poll the queue associated with 1959 * first socket in list and potentially reap incoming data. 1960 */ 1961 if (sock->opts.priority) { 1962 struct pollfd pfd = {0, 0, 0}; 1963 1964 pfd.fd = psock->fd; 1965 pfd.events = POLLIN | POLLERR; 1966 poll(&pfd, 1, 0); 1967 } 1968 } 1969 1970 for (i = 0; i < num_events; i++) { 1971 #if defined(SPDK_EPOLL) 1972 sock = events[i].data.ptr; 1973 psock = __posix_sock(sock); 1974 1975 #ifdef SPDK_ZEROCOPY 1976 if (events[i].events & EPOLLERR) { 1977 rc = _sock_check_zcopy(sock); 1978 /* If the socket was closed or removed from 1979 * the group in response to a send ack, don't 1980 * add it to the array here. */ 1981 if (rc || sock->cb_fn == NULL) { 1982 continue; 1983 } 1984 } 1985 #endif 1986 if ((events[i].events & EPOLLIN) == 0) { 1987 continue; 1988 } 1989 1990 #elif defined(SPDK_KEVENT) 1991 sock = events[i].udata; 1992 psock = __posix_sock(sock); 1993 #endif 1994 1995 /* If the socket is not already in the list, add it now */ 1996 if (!psock->socket_has_data && !psock->pipe_has_data) { 1997 TAILQ_INSERT_TAIL(&group->socks_with_data, psock, link); 1998 } 1999 psock->socket_has_data = true; 2000 } 2001 2002 num_events = 0; 2003 2004 TAILQ_FOREACH_SAFE(psock, &group->socks_with_data, link, ptmp) { 2005 if (num_events == max_events) { 2006 break; 2007 } 2008 2009 /* If the socket's cb_fn is NULL, just remove it from the 2010 * list and do not add it to socks array */ 2011 if (spdk_unlikely(psock->base.cb_fn == NULL)) { 2012 psock->socket_has_data = false; 2013 psock->pipe_has_data = false; 2014 TAILQ_REMOVE(&group->socks_with_data, psock, link); 2015 continue; 2016 } 2017 2018 socks[num_events++] = &psock->base; 2019 } 2020 2021 /* Cycle the has_data list so that each time we poll things aren't 2022 * in the same order. Say we have 6 sockets in the list, named as follows: 2023 * A B C D E F 2024 * And all 6 sockets had epoll events, but max_events is only 3. That means 2025 * psock currently points at D. We want to rearrange the list to the following: 2026 * D E F A B C 2027 * 2028 * The variables below are named according to this example to make it easier to 2029 * follow the swaps. 2030 */ 2031 if (psock != NULL) { 2032 struct spdk_posix_sock *pa, *pc, *pd, *pf; 2033 2034 /* Capture pointers to the elements we need */ 2035 pd = psock; 2036 pc = TAILQ_PREV(pd, spdk_has_data_list, link); 2037 pa = TAILQ_FIRST(&group->socks_with_data); 2038 pf = TAILQ_LAST(&group->socks_with_data, spdk_has_data_list); 2039 2040 /* Break the link between C and D */ 2041 pc->link.tqe_next = NULL; 2042 2043 /* Connect F to A */ 2044 pf->link.tqe_next = pa; 2045 pa->link.tqe_prev = &pf->link.tqe_next; 2046 2047 /* Fix up the list first/last pointers */ 2048 group->socks_with_data.tqh_first = pd; 2049 group->socks_with_data.tqh_last = &pc->link.tqe_next; 2050 2051 /* D is in front of the list, make tqe prev pointer point to the head of list */ 2052 pd->link.tqe_prev = &group->socks_with_data.tqh_first; 2053 } 2054 2055 return num_events; 2056 } 2057 2058 static int 2059 posix_sock_group_impl_close(struct spdk_sock_group_impl *_group) 2060 { 2061 struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group); 2062 int rc; 2063 2064 if (g_spdk_posix_sock_impl_opts.enable_placement_id == PLACEMENT_CPU) { 2065 spdk_sock_map_release(&g_map, spdk_env_get_current_core()); 2066 } 2067 2068 rc = close(group->fd); 2069 free(group); 2070 return rc; 2071 } 2072 2073 static struct spdk_net_impl g_posix_net_impl = { 2074 .name = "posix", 2075 .getaddr = posix_sock_getaddr, 2076 .connect = posix_sock_connect, 2077 .listen = posix_sock_listen, 2078 .accept = posix_sock_accept, 2079 .close = posix_sock_close, 2080 .recv = posix_sock_recv, 2081 .readv = posix_sock_readv, 2082 .readv_async = posix_sock_readv_async, 2083 .writev = posix_sock_writev, 2084 .recv_next = posix_sock_recv_next, 2085 .writev_async = posix_sock_writev_async, 2086 .flush = posix_sock_flush, 2087 .set_recvlowat = posix_sock_set_recvlowat, 2088 .set_recvbuf = posix_sock_set_recvbuf, 2089 .set_sendbuf = posix_sock_set_sendbuf, 2090 .is_ipv6 = posix_sock_is_ipv6, 2091 .is_ipv4 = posix_sock_is_ipv4, 2092 .is_connected = posix_sock_is_connected, 2093 .group_impl_get_optimal = posix_sock_group_impl_get_optimal, 2094 .group_impl_create = posix_sock_group_impl_create, 2095 .group_impl_add_sock = posix_sock_group_impl_add_sock, 2096 .group_impl_remove_sock = posix_sock_group_impl_remove_sock, 2097 .group_impl_poll = posix_sock_group_impl_poll, 2098 .group_impl_close = posix_sock_group_impl_close, 2099 .get_opts = posix_sock_impl_get_opts, 2100 .set_opts = posix_sock_impl_set_opts, 2101 }; 2102 2103 SPDK_NET_IMPL_REGISTER(posix, &g_posix_net_impl, DEFAULT_SOCK_PRIORITY + 1); 2104 2105 static struct spdk_sock * 2106 ssl_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts) 2107 { 2108 return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, true); 2109 } 2110 2111 static struct spdk_sock * 2112 ssl_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts) 2113 { 2114 return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, true); 2115 } 2116 2117 static struct spdk_sock * 2118 ssl_sock_accept(struct spdk_sock *_sock) 2119 { 2120 return _posix_sock_accept(_sock, true); 2121 } 2122 2123 static struct spdk_net_impl g_ssl_net_impl = { 2124 .name = "ssl", 2125 .getaddr = posix_sock_getaddr, 2126 .connect = ssl_sock_connect, 2127 .listen = ssl_sock_listen, 2128 .accept = ssl_sock_accept, 2129 .close = posix_sock_close, 2130 .recv = posix_sock_recv, 2131 .readv = posix_sock_readv, 2132 .writev = posix_sock_writev, 2133 .recv_next = posix_sock_recv_next, 2134 .writev_async = posix_sock_writev_async, 2135 .flush = posix_sock_flush, 2136 .set_recvlowat = posix_sock_set_recvlowat, 2137 .set_recvbuf = posix_sock_set_recvbuf, 2138 .set_sendbuf = posix_sock_set_sendbuf, 2139 .is_ipv6 = posix_sock_is_ipv6, 2140 .is_ipv4 = posix_sock_is_ipv4, 2141 .is_connected = posix_sock_is_connected, 2142 .group_impl_get_optimal = posix_sock_group_impl_get_optimal, 2143 .group_impl_create = posix_sock_group_impl_create, 2144 .group_impl_add_sock = posix_sock_group_impl_add_sock, 2145 .group_impl_remove_sock = posix_sock_group_impl_remove_sock, 2146 .group_impl_poll = posix_sock_group_impl_poll, 2147 .group_impl_close = posix_sock_group_impl_close, 2148 .get_opts = posix_sock_impl_get_opts, 2149 .set_opts = posix_sock_impl_set_opts, 2150 }; 2151 2152 SPDK_NET_IMPL_REGISTER(ssl, &g_ssl_net_impl, DEFAULT_SOCK_PRIORITY); 2153 SPDK_LOG_REGISTER_COMPONENT(sock_posix) 2154