1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * NVMe over RDMA transport 36 */ 37 38 #include "spdk/stdinc.h" 39 40 #include "spdk/assert.h" 41 #include "spdk/log.h" 42 #include "spdk/trace.h" 43 #include "spdk/queue.h" 44 #include "spdk/nvme.h" 45 #include "spdk/nvmf_spec.h" 46 #include "spdk/string.h" 47 #include "spdk/endian.h" 48 #include "spdk/likely.h" 49 #include "spdk/config.h" 50 51 #include "nvme_internal.h" 52 #include "spdk_internal/rdma.h" 53 54 #define NVME_RDMA_TIME_OUT_IN_MS 2000 55 #define NVME_RDMA_RW_BUFFER_SIZE 131072 56 57 /* 58 * NVME RDMA qpair Resource Defaults 59 */ 60 #define NVME_RDMA_DEFAULT_TX_SGE 2 61 #define NVME_RDMA_DEFAULT_RX_SGE 1 62 63 /* Max number of NVMe-oF SGL descriptors supported by the host */ 64 #define NVME_RDMA_MAX_SGL_DESCRIPTORS 16 65 66 /* number of STAILQ entries for holding pending RDMA CM events. */ 67 #define NVME_RDMA_NUM_CM_EVENTS 256 68 69 /* CM event processing timeout */ 70 #define NVME_RDMA_QPAIR_CM_EVENT_TIMEOUT_US 1000000 71 72 /* The default size for a shared rdma completion queue. */ 73 #define DEFAULT_NVME_RDMA_CQ_SIZE 4096 74 75 /* 76 * In the special case of a stale connection we don't expose a mechanism 77 * for the user to retry the connection so we need to handle it internally. 78 */ 79 #define NVME_RDMA_STALE_CONN_RETRY_MAX 5 80 #define NVME_RDMA_STALE_CONN_RETRY_DELAY_US 10000 81 82 /* 83 * Maximum value of transport_retry_count used by RDMA controller 84 */ 85 #define NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT 7 86 87 /* 88 * Maximum value of transport_ack_timeout used by RDMA controller 89 */ 90 #define NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 91 92 /* 93 * Number of poller cycles to keep a pointer to destroyed qpairs 94 * in the poll group. 95 */ 96 #define NVME_RDMA_DESTROYED_QPAIR_EXPIRATION_CYCLES 50 97 98 /* 99 * The max length of keyed SGL data block (3 bytes) 100 */ 101 #define NVME_RDMA_MAX_KEYED_SGL_LENGTH ((1u << 24u) - 1) 102 103 #define WC_PER_QPAIR(queue_depth) (queue_depth * 2) 104 105 enum nvme_rdma_wr_type { 106 RDMA_WR_TYPE_RECV, 107 RDMA_WR_TYPE_SEND, 108 }; 109 110 struct nvme_rdma_wr { 111 /* Using this instead of the enum allows this struct to only occupy one byte. */ 112 uint8_t type; 113 }; 114 115 struct spdk_nvmf_cmd { 116 struct spdk_nvme_cmd cmd; 117 struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS]; 118 }; 119 120 struct spdk_nvme_rdma_hooks g_nvme_hooks = {}; 121 122 /* Mapping from virtual address to ibv_mr pointer for a protection domain */ 123 struct spdk_nvme_rdma_mr_map { 124 struct ibv_pd *pd; 125 struct spdk_mem_map *map; 126 uint64_t ref; 127 LIST_ENTRY(spdk_nvme_rdma_mr_map) link; 128 }; 129 130 /* STAILQ wrapper for cm events. */ 131 struct nvme_rdma_cm_event_entry { 132 struct rdma_cm_event *evt; 133 STAILQ_ENTRY(nvme_rdma_cm_event_entry) link; 134 }; 135 136 /* NVMe RDMA transport extensions for spdk_nvme_ctrlr */ 137 struct nvme_rdma_ctrlr { 138 struct spdk_nvme_ctrlr ctrlr; 139 140 struct ibv_pd *pd; 141 142 uint16_t max_sge; 143 144 struct rdma_event_channel *cm_channel; 145 146 STAILQ_HEAD(, nvme_rdma_cm_event_entry) pending_cm_events; 147 148 STAILQ_HEAD(, nvme_rdma_cm_event_entry) free_cm_events; 149 150 struct nvme_rdma_cm_event_entry *cm_events; 151 }; 152 153 struct nvme_rdma_destroyed_qpair { 154 struct nvme_rdma_qpair *destroyed_qpair_tracker; 155 uint32_t completed_cycles; 156 STAILQ_ENTRY(nvme_rdma_destroyed_qpair) link; 157 }; 158 159 struct nvme_rdma_poller { 160 struct ibv_context *device; 161 struct ibv_cq *cq; 162 int required_num_wc; 163 int current_num_wc; 164 STAILQ_ENTRY(nvme_rdma_poller) link; 165 }; 166 167 struct nvme_rdma_poll_group { 168 struct spdk_nvme_transport_poll_group group; 169 STAILQ_HEAD(, nvme_rdma_poller) pollers; 170 int num_pollers; 171 STAILQ_HEAD(, nvme_rdma_destroyed_qpair) destroyed_qpairs; 172 }; 173 174 struct spdk_nvme_recv_wr_list { 175 struct ibv_recv_wr *first; 176 struct ibv_recv_wr *last; 177 }; 178 179 /* Memory regions */ 180 union nvme_rdma_mr { 181 struct ibv_mr *mr; 182 uint64_t key; 183 }; 184 185 /* NVMe RDMA qpair extensions for spdk_nvme_qpair */ 186 struct nvme_rdma_qpair { 187 struct spdk_nvme_qpair qpair; 188 189 struct spdk_rdma_qp *rdma_qp; 190 struct rdma_cm_id *cm_id; 191 struct ibv_cq *cq; 192 193 struct spdk_nvme_rdma_req *rdma_reqs; 194 195 uint32_t max_send_sge; 196 197 uint32_t max_recv_sge; 198 199 uint16_t num_entries; 200 201 bool delay_cmd_submit; 202 203 bool poll_group_disconnect_in_progress; 204 205 uint32_t num_completions; 206 207 /* Parallel arrays of response buffers + response SGLs of size num_entries */ 208 struct ibv_sge *rsp_sgls; 209 struct spdk_nvme_rdma_rsp *rsps; 210 211 struct ibv_recv_wr *rsp_recv_wrs; 212 213 struct spdk_nvme_recv_wr_list recvs_to_post; 214 215 /* Memory region describing all rsps for this qpair */ 216 union nvme_rdma_mr rsp_mr; 217 218 /* 219 * Array of num_entries NVMe commands registered as RDMA message buffers. 220 * Indexed by rdma_req->id. 221 */ 222 struct spdk_nvmf_cmd *cmds; 223 224 /* Memory region describing all cmds for this qpair */ 225 union nvme_rdma_mr cmd_mr; 226 227 struct spdk_nvme_rdma_mr_map *mr_map; 228 229 TAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs; 230 TAILQ_HEAD(, spdk_nvme_rdma_req) outstanding_reqs; 231 232 /* Counts of outstanding send and recv objects */ 233 uint16_t current_num_recvs; 234 uint16_t current_num_sends; 235 236 /* Placed at the end of the struct since it is not used frequently */ 237 struct rdma_cm_event *evt; 238 239 /* Used by poll group to keep the qpair around until it is ready to remove it. */ 240 bool defer_deletion_to_pg; 241 }; 242 243 enum NVME_RDMA_COMPLETION_FLAGS { 244 NVME_RDMA_SEND_COMPLETED = 1u << 0, 245 NVME_RDMA_RECV_COMPLETED = 1u << 1, 246 }; 247 248 struct spdk_nvme_rdma_req { 249 uint16_t id; 250 uint16_t completion_flags: 2; 251 uint16_t reserved: 14; 252 /* if completion of RDMA_RECV received before RDMA_SEND, we will complete nvme request 253 * during processing of RDMA_SEND. To complete the request we must know the index 254 * of nvme_cpl received in RDMA_RECV, so store it in this field */ 255 uint16_t rsp_idx; 256 257 struct nvme_rdma_wr rdma_wr; 258 259 struct ibv_send_wr send_wr; 260 261 struct nvme_request *req; 262 263 struct ibv_sge send_sgl[NVME_RDMA_DEFAULT_TX_SGE]; 264 265 TAILQ_ENTRY(spdk_nvme_rdma_req) link; 266 }; 267 268 enum nvme_rdma_key_type { 269 NVME_RDMA_MR_RKEY, 270 NVME_RDMA_MR_LKEY 271 }; 272 273 struct spdk_nvme_rdma_rsp { 274 struct spdk_nvme_cpl cpl; 275 struct nvme_rdma_qpair *rqpair; 276 uint16_t idx; 277 struct nvme_rdma_wr rdma_wr; 278 }; 279 280 static const char *rdma_cm_event_str[] = { 281 "RDMA_CM_EVENT_ADDR_RESOLVED", 282 "RDMA_CM_EVENT_ADDR_ERROR", 283 "RDMA_CM_EVENT_ROUTE_RESOLVED", 284 "RDMA_CM_EVENT_ROUTE_ERROR", 285 "RDMA_CM_EVENT_CONNECT_REQUEST", 286 "RDMA_CM_EVENT_CONNECT_RESPONSE", 287 "RDMA_CM_EVENT_CONNECT_ERROR", 288 "RDMA_CM_EVENT_UNREACHABLE", 289 "RDMA_CM_EVENT_REJECTED", 290 "RDMA_CM_EVENT_ESTABLISHED", 291 "RDMA_CM_EVENT_DISCONNECTED", 292 "RDMA_CM_EVENT_DEVICE_REMOVAL", 293 "RDMA_CM_EVENT_MULTICAST_JOIN", 294 "RDMA_CM_EVENT_MULTICAST_ERROR", 295 "RDMA_CM_EVENT_ADDR_CHANGE", 296 "RDMA_CM_EVENT_TIMEWAIT_EXIT" 297 }; 298 299 static LIST_HEAD(, spdk_nvme_rdma_mr_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps); 300 static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; 301 struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, 302 uint32_t qp_num); 303 304 static inline void * 305 nvme_rdma_calloc(size_t nmemb, size_t size) 306 { 307 if (!g_nvme_hooks.get_rkey) { 308 return calloc(nmemb, size); 309 } else { 310 return spdk_zmalloc(nmemb * size, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 311 } 312 } 313 314 static inline void 315 nvme_rdma_free(void *buf) 316 { 317 if (!g_nvme_hooks.get_rkey) { 318 free(buf); 319 } else { 320 spdk_free(buf); 321 } 322 } 323 324 static int nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, 325 struct spdk_nvme_qpair *qpair); 326 327 static inline struct nvme_rdma_qpair * 328 nvme_rdma_qpair(struct spdk_nvme_qpair *qpair) 329 { 330 assert(qpair->trtype == SPDK_NVME_TRANSPORT_RDMA); 331 return SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair); 332 } 333 334 static inline struct nvme_rdma_poll_group * 335 nvme_rdma_poll_group(struct spdk_nvme_transport_poll_group *group) 336 { 337 return (SPDK_CONTAINEROF(group, struct nvme_rdma_poll_group, group)); 338 } 339 340 static inline struct nvme_rdma_ctrlr * 341 nvme_rdma_ctrlr(struct spdk_nvme_ctrlr *ctrlr) 342 { 343 assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA); 344 return SPDK_CONTAINEROF(ctrlr, struct nvme_rdma_ctrlr, ctrlr); 345 } 346 347 static struct spdk_nvme_rdma_req * 348 nvme_rdma_req_get(struct nvme_rdma_qpair *rqpair) 349 { 350 struct spdk_nvme_rdma_req *rdma_req; 351 352 rdma_req = TAILQ_FIRST(&rqpair->free_reqs); 353 if (rdma_req) { 354 TAILQ_REMOVE(&rqpair->free_reqs, rdma_req, link); 355 TAILQ_INSERT_TAIL(&rqpair->outstanding_reqs, rdma_req, link); 356 } 357 358 return rdma_req; 359 } 360 361 static void 362 nvme_rdma_req_put(struct nvme_rdma_qpair *rqpair, struct spdk_nvme_rdma_req *rdma_req) 363 { 364 rdma_req->completion_flags = 0; 365 rdma_req->req = NULL; 366 TAILQ_INSERT_HEAD(&rqpair->free_reqs, rdma_req, link); 367 } 368 369 static void 370 nvme_rdma_req_complete(struct spdk_nvme_rdma_req *rdma_req, 371 struct spdk_nvme_cpl *rsp) 372 { 373 struct nvme_request *req = rdma_req->req; 374 struct nvme_rdma_qpair *rqpair; 375 376 assert(req != NULL); 377 378 rqpair = nvme_rdma_qpair(req->qpair); 379 TAILQ_REMOVE(&rqpair->outstanding_reqs, rdma_req, link); 380 381 nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, rsp); 382 nvme_free_request(req); 383 } 384 385 static const char * 386 nvme_rdma_cm_event_str_get(uint32_t event) 387 { 388 if (event < SPDK_COUNTOF(rdma_cm_event_str)) { 389 return rdma_cm_event_str[event]; 390 } else { 391 return "Undefined"; 392 } 393 } 394 395 396 static int 397 nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair) 398 { 399 struct rdma_cm_event *event = rqpair->evt; 400 struct spdk_nvmf_rdma_accept_private_data *accept_data; 401 int rc = 0; 402 403 if (event) { 404 switch (event->event) { 405 case RDMA_CM_EVENT_ADDR_RESOLVED: 406 case RDMA_CM_EVENT_ADDR_ERROR: 407 case RDMA_CM_EVENT_ROUTE_RESOLVED: 408 case RDMA_CM_EVENT_ROUTE_ERROR: 409 break; 410 case RDMA_CM_EVENT_CONNECT_REQUEST: 411 break; 412 case RDMA_CM_EVENT_CONNECT_ERROR: 413 break; 414 case RDMA_CM_EVENT_UNREACHABLE: 415 case RDMA_CM_EVENT_REJECTED: 416 break; 417 case RDMA_CM_EVENT_CONNECT_RESPONSE: 418 rc = spdk_rdma_qp_complete_connect(rqpair->rdma_qp); 419 /* fall through */ 420 case RDMA_CM_EVENT_ESTABLISHED: 421 accept_data = (struct spdk_nvmf_rdma_accept_private_data *)event->param.conn.private_data; 422 if (accept_data == NULL) { 423 rc = -1; 424 } else { 425 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested queue depth %d. Actually got queue depth %d.\n", 426 rqpair->num_entries, accept_data->crqsize); 427 rqpair->num_entries = spdk_min(rqpair->num_entries, accept_data->crqsize); 428 } 429 break; 430 case RDMA_CM_EVENT_DISCONNECTED: 431 rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE; 432 break; 433 case RDMA_CM_EVENT_DEVICE_REMOVAL: 434 rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 435 break; 436 case RDMA_CM_EVENT_MULTICAST_JOIN: 437 case RDMA_CM_EVENT_MULTICAST_ERROR: 438 break; 439 case RDMA_CM_EVENT_ADDR_CHANGE: 440 rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL; 441 break; 442 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 443 break; 444 default: 445 SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event); 446 break; 447 } 448 rqpair->evt = NULL; 449 rdma_ack_cm_event(event); 450 } 451 452 return rc; 453 } 454 455 /* 456 * This function must be called under the nvme controller's lock 457 * because it touches global controller variables. The lock is taken 458 * by the generic transport code before invoking a few of the functions 459 * in this file: nvme_rdma_ctrlr_connect_qpair, nvme_rdma_ctrlr_delete_io_qpair, 460 * and conditionally nvme_rdma_qpair_process_completions when it is calling 461 * completions on the admin qpair. When adding a new call to this function, please 462 * verify that it is in a situation where it falls under the lock. 463 */ 464 static int 465 nvme_rdma_poll_events(struct nvme_rdma_ctrlr *rctrlr) 466 { 467 struct nvme_rdma_cm_event_entry *entry, *tmp; 468 struct nvme_rdma_qpair *event_qpair; 469 struct rdma_cm_event *event; 470 struct rdma_event_channel *channel = rctrlr->cm_channel; 471 472 STAILQ_FOREACH_SAFE(entry, &rctrlr->pending_cm_events, link, tmp) { 473 event_qpair = nvme_rdma_qpair(entry->evt->id->context); 474 if (event_qpair->evt == NULL) { 475 event_qpair->evt = entry->evt; 476 STAILQ_REMOVE(&rctrlr->pending_cm_events, entry, nvme_rdma_cm_event_entry, link); 477 STAILQ_INSERT_HEAD(&rctrlr->free_cm_events, entry, link); 478 } 479 } 480 481 while (rdma_get_cm_event(channel, &event) == 0) { 482 event_qpair = nvme_rdma_qpair(event->id->context); 483 if (event_qpair->evt == NULL) { 484 event_qpair->evt = event; 485 } else { 486 assert(rctrlr == nvme_rdma_ctrlr(event_qpair->qpair.ctrlr)); 487 entry = STAILQ_FIRST(&rctrlr->free_cm_events); 488 if (entry == NULL) { 489 rdma_ack_cm_event(event); 490 return -ENOMEM; 491 } 492 STAILQ_REMOVE(&rctrlr->free_cm_events, entry, nvme_rdma_cm_event_entry, link); 493 entry->evt = event; 494 STAILQ_INSERT_TAIL(&rctrlr->pending_cm_events, entry, link); 495 } 496 } 497 498 if (errno == EAGAIN || errno == EWOULDBLOCK) { 499 return 0; 500 } else { 501 return errno; 502 } 503 } 504 505 static int 506 nvme_rdma_validate_cm_event(enum rdma_cm_event_type expected_evt_type, 507 struct rdma_cm_event *reaped_evt) 508 { 509 int rc = -EBADMSG; 510 511 if (expected_evt_type == reaped_evt->event) { 512 return 0; 513 } 514 515 switch (expected_evt_type) { 516 case RDMA_CM_EVENT_ESTABLISHED: 517 /* 518 * There is an enum ib_cm_rej_reason in the kernel headers that sets 10 as 519 * IB_CM_REJ_STALE_CONN. I can't find the corresponding userspace but we get 520 * the same values here. 521 */ 522 if (reaped_evt->event == RDMA_CM_EVENT_REJECTED && reaped_evt->status == 10) { 523 rc = -ESTALE; 524 } else if (reaped_evt->event == RDMA_CM_EVENT_CONNECT_RESPONSE) { 525 /* 526 * If we are using a qpair which is not created using rdma cm API 527 * then we will receive RDMA_CM_EVENT_CONNECT_RESPONSE instead of 528 * RDMA_CM_EVENT_ESTABLISHED. 529 */ 530 return 0; 531 } 532 break; 533 default: 534 break; 535 } 536 537 SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n", 538 nvme_rdma_cm_event_str_get(expected_evt_type), 539 nvme_rdma_cm_event_str_get(reaped_evt->event), reaped_evt->event, 540 reaped_evt->status); 541 return rc; 542 } 543 544 static int 545 nvme_rdma_process_event(struct nvme_rdma_qpair *rqpair, 546 struct rdma_event_channel *channel, 547 enum rdma_cm_event_type evt) 548 { 549 struct nvme_rdma_ctrlr *rctrlr; 550 uint64_t timeout_ticks; 551 int rc = 0, rc2; 552 553 if (rqpair->evt != NULL) { 554 rc = nvme_rdma_qpair_process_cm_event(rqpair); 555 if (rc) { 556 return rc; 557 } 558 } 559 560 timeout_ticks = (NVME_RDMA_QPAIR_CM_EVENT_TIMEOUT_US * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC + 561 spdk_get_ticks(); 562 rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr); 563 assert(rctrlr != NULL); 564 565 while (!rqpair->evt && spdk_get_ticks() < timeout_ticks && rc == 0) { 566 rc = nvme_rdma_poll_events(rctrlr); 567 } 568 569 if (rc) { 570 return rc; 571 } 572 573 if (rqpair->evt == NULL) { 574 return -EADDRNOTAVAIL; 575 } 576 577 rc = nvme_rdma_validate_cm_event(evt, rqpair->evt); 578 579 rc2 = nvme_rdma_qpair_process_cm_event(rqpair); 580 /* bad message takes precedence over the other error codes from processing the event. */ 581 return rc == 0 ? rc2 : rc; 582 } 583 584 static int 585 nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair) 586 { 587 int rc; 588 struct spdk_rdma_qp_init_attr attr = {}; 589 struct ibv_device_attr dev_attr; 590 struct nvme_rdma_ctrlr *rctrlr; 591 592 rc = ibv_query_device(rqpair->cm_id->verbs, &dev_attr); 593 if (rc != 0) { 594 SPDK_ERRLOG("Failed to query RDMA device attributes.\n"); 595 return -1; 596 } 597 598 if (rqpair->qpair.poll_group) { 599 assert(!rqpair->cq); 600 rc = nvme_poll_group_connect_qpair(&rqpair->qpair); 601 if (rc) { 602 SPDK_ERRLOG("Unable to activate the rdmaqpair.\n"); 603 return -1; 604 } 605 assert(rqpair->cq); 606 } else { 607 rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0); 608 if (!rqpair->cq) { 609 SPDK_ERRLOG("Unable to create completion queue: errno %d: %s\n", errno, spdk_strerror(errno)); 610 return -1; 611 } 612 } 613 614 rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr); 615 if (g_nvme_hooks.get_ibv_pd) { 616 rctrlr->pd = g_nvme_hooks.get_ibv_pd(&rctrlr->ctrlr.trid, rqpair->cm_id->verbs); 617 } else { 618 rctrlr->pd = NULL; 619 } 620 621 attr.pd = rctrlr->pd; 622 attr.send_cq = rqpair->cq; 623 attr.recv_cq = rqpair->cq; 624 attr.cap.max_send_wr = rqpair->num_entries; /* SEND operations */ 625 attr.cap.max_recv_wr = rqpair->num_entries; /* RECV operations */ 626 attr.cap.max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, dev_attr.max_sge); 627 attr.cap.max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, dev_attr.max_sge); 628 629 rqpair->rdma_qp = spdk_rdma_qp_create(rqpair->cm_id, &attr); 630 631 if (!rqpair->rdma_qp) { 632 return -1; 633 } 634 635 /* ibv_create_qp will change the values in attr.cap. Make sure we store the proper value. */ 636 rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge); 637 rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge); 638 rqpair->current_num_recvs = 0; 639 rqpair->current_num_sends = 0; 640 641 rctrlr->pd = rqpair->rdma_qp->qp->pd; 642 643 rqpair->cm_id->context = &rqpair->qpair; 644 645 return 0; 646 } 647 648 static inline int 649 nvme_rdma_qpair_submit_sends(struct nvme_rdma_qpair *rqpair) 650 { 651 struct ibv_send_wr *bad_send_wr; 652 int rc; 653 654 rc = spdk_rdma_qp_flush_send_wrs(rqpair->rdma_qp, &bad_send_wr); 655 656 if (spdk_unlikely(rc)) { 657 SPDK_ERRLOG("Failed to post WRs on send queue, errno %d (%s), bad_wr %p\n", 658 rc, spdk_strerror(rc), bad_send_wr); 659 while (bad_send_wr != NULL) { 660 assert(rqpair->current_num_sends > 0); 661 rqpair->current_num_sends--; 662 bad_send_wr = bad_send_wr->next; 663 } 664 return rc; 665 } 666 667 return 0; 668 } 669 670 static inline int 671 nvme_rdma_qpair_submit_recvs(struct nvme_rdma_qpair *rqpair) 672 { 673 struct ibv_recv_wr *bad_recv_wr; 674 int rc = 0; 675 676 if (rqpair->recvs_to_post.first) { 677 rc = ibv_post_recv(rqpair->rdma_qp->qp, rqpair->recvs_to_post.first, &bad_recv_wr); 678 if (spdk_unlikely(rc)) { 679 SPDK_ERRLOG("Failed to post WRs on receive queue, errno %d (%s), bad_wr %p\n", 680 rc, spdk_strerror(rc), bad_recv_wr); 681 while (bad_recv_wr != NULL) { 682 assert(rqpair->current_num_sends > 0); 683 rqpair->current_num_recvs--; 684 bad_recv_wr = bad_recv_wr->next; 685 } 686 } 687 688 rqpair->recvs_to_post.first = NULL; 689 } 690 return rc; 691 } 692 693 /* Append the given send wr structure to the qpair's outstanding sends list. */ 694 /* This function accepts only a single wr. */ 695 static inline int 696 nvme_rdma_qpair_queue_send_wr(struct nvme_rdma_qpair *rqpair, struct ibv_send_wr *wr) 697 { 698 assert(wr->next == NULL); 699 700 assert(rqpair->current_num_sends < rqpair->num_entries); 701 702 rqpair->current_num_sends++; 703 spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, wr); 704 705 if (!rqpair->delay_cmd_submit) { 706 return nvme_rdma_qpair_submit_sends(rqpair); 707 } 708 709 return 0; 710 } 711 712 /* Append the given recv wr structure to the qpair's outstanding recvs list. */ 713 /* This function accepts only a single wr. */ 714 static inline int 715 nvme_rdma_qpair_queue_recv_wr(struct nvme_rdma_qpair *rqpair, struct ibv_recv_wr *wr) 716 { 717 718 assert(wr->next == NULL); 719 assert(rqpair->current_num_recvs < rqpair->num_entries); 720 721 rqpair->current_num_recvs++; 722 if (rqpair->recvs_to_post.first == NULL) { 723 rqpair->recvs_to_post.first = wr; 724 } else { 725 rqpair->recvs_to_post.last->next = wr; 726 } 727 728 rqpair->recvs_to_post.last = wr; 729 730 if (!rqpair->delay_cmd_submit) { 731 return nvme_rdma_qpair_submit_recvs(rqpair); 732 } 733 734 return 0; 735 } 736 737 #define nvme_rdma_trace_ibv_sge(sg_list) \ 738 if (sg_list) { \ 739 SPDK_DEBUGLOG(SPDK_LOG_NVME, "local addr %p length 0x%x lkey 0x%x\n", \ 740 (void *)(sg_list)->addr, (sg_list)->length, (sg_list)->lkey); \ 741 } 742 743 static int 744 nvme_rdma_post_recv(struct nvme_rdma_qpair *rqpair, uint16_t rsp_idx) 745 { 746 struct ibv_recv_wr *wr; 747 748 wr = &rqpair->rsp_recv_wrs[rsp_idx]; 749 wr->next = NULL; 750 nvme_rdma_trace_ibv_sge(wr->sg_list); 751 return nvme_rdma_qpair_queue_recv_wr(rqpair, wr); 752 } 753 754 static int 755 nvme_rdma_reg_mr(struct rdma_cm_id *cm_id, union nvme_rdma_mr *mr, void *mem, size_t length) 756 { 757 if (!g_nvme_hooks.get_rkey) { 758 mr->mr = rdma_reg_msgs(cm_id, mem, length); 759 if (mr->mr == NULL) { 760 SPDK_ERRLOG("Unable to register mr: %s (%d)\n", 761 spdk_strerror(errno), errno); 762 return -1; 763 } 764 } else { 765 mr->key = g_nvme_hooks.get_rkey(cm_id->pd, mem, length); 766 } 767 768 return 0; 769 } 770 771 static void 772 nvme_rdma_dereg_mr(union nvme_rdma_mr *mr) 773 { 774 if (!g_nvme_hooks.get_rkey) { 775 if (mr->mr && rdma_dereg_mr(mr->mr)) { 776 SPDK_ERRLOG("Unable to de-register mr\n"); 777 } 778 } else { 779 if (mr->key) { 780 g_nvme_hooks.put_rkey(mr->key); 781 } 782 } 783 memset(mr, 0, sizeof(*mr)); 784 } 785 786 static uint32_t 787 nvme_rdma_mr_get_lkey(union nvme_rdma_mr *mr) 788 { 789 uint32_t lkey; 790 791 if (!g_nvme_hooks.get_rkey) { 792 lkey = mr->mr->lkey; 793 } else { 794 lkey = *((uint64_t *) mr->key); 795 } 796 797 return lkey; 798 } 799 800 static void 801 nvme_rdma_unregister_rsps(struct nvme_rdma_qpair *rqpair) 802 { 803 nvme_rdma_dereg_mr(&rqpair->rsp_mr); 804 } 805 806 static void 807 nvme_rdma_free_rsps(struct nvme_rdma_qpair *rqpair) 808 { 809 nvme_rdma_free(rqpair->rsps); 810 rqpair->rsps = NULL; 811 nvme_rdma_free(rqpair->rsp_sgls); 812 rqpair->rsp_sgls = NULL; 813 nvme_rdma_free(rqpair->rsp_recv_wrs); 814 rqpair->rsp_recv_wrs = NULL; 815 } 816 817 static int 818 nvme_rdma_alloc_rsps(struct nvme_rdma_qpair *rqpair) 819 { 820 rqpair->rsps = NULL; 821 rqpair->rsp_recv_wrs = NULL; 822 823 rqpair->rsp_sgls = nvme_rdma_calloc(rqpair->num_entries, sizeof(*rqpair->rsp_sgls)); 824 if (!rqpair->rsp_sgls) { 825 SPDK_ERRLOG("Failed to allocate rsp_sgls\n"); 826 goto fail; 827 } 828 829 rqpair->rsp_recv_wrs = nvme_rdma_calloc(rqpair->num_entries, sizeof(*rqpair->rsp_recv_wrs)); 830 if (!rqpair->rsp_recv_wrs) { 831 SPDK_ERRLOG("Failed to allocate rsp_recv_wrs\n"); 832 goto fail; 833 } 834 835 rqpair->rsps = nvme_rdma_calloc(rqpair->num_entries, sizeof(*rqpair->rsps)); 836 if (!rqpair->rsps) { 837 SPDK_ERRLOG("can not allocate rdma rsps\n"); 838 goto fail; 839 } 840 841 return 0; 842 fail: 843 nvme_rdma_free_rsps(rqpair); 844 return -ENOMEM; 845 } 846 847 static int 848 nvme_rdma_register_rsps(struct nvme_rdma_qpair *rqpair) 849 { 850 uint16_t i; 851 int rc; 852 uint32_t lkey; 853 854 rc = nvme_rdma_reg_mr(rqpair->cm_id, &rqpair->rsp_mr, 855 rqpair->rsps, rqpair->num_entries * sizeof(*rqpair->rsps)); 856 857 if (rc < 0) { 858 goto fail; 859 } 860 861 lkey = nvme_rdma_mr_get_lkey(&rqpair->rsp_mr); 862 863 for (i = 0; i < rqpair->num_entries; i++) { 864 struct ibv_sge *rsp_sgl = &rqpair->rsp_sgls[i]; 865 struct spdk_nvme_rdma_rsp *rsp = &rqpair->rsps[i]; 866 867 rsp->rqpair = rqpair; 868 rsp->rdma_wr.type = RDMA_WR_TYPE_RECV; 869 rsp->idx = i; 870 rsp_sgl->addr = (uint64_t)&rqpair->rsps[i]; 871 rsp_sgl->length = sizeof(struct spdk_nvme_cpl); 872 rsp_sgl->lkey = lkey; 873 874 rqpair->rsp_recv_wrs[i].wr_id = (uint64_t)&rsp->rdma_wr; 875 rqpair->rsp_recv_wrs[i].next = NULL; 876 rqpair->rsp_recv_wrs[i].sg_list = rsp_sgl; 877 rqpair->rsp_recv_wrs[i].num_sge = 1; 878 879 rc = nvme_rdma_post_recv(rqpair, i); 880 if (rc) { 881 goto fail; 882 } 883 } 884 885 rc = nvme_rdma_qpair_submit_recvs(rqpair); 886 if (rc) { 887 goto fail; 888 } 889 890 return 0; 891 892 fail: 893 nvme_rdma_unregister_rsps(rqpair); 894 return rc; 895 } 896 897 static void 898 nvme_rdma_unregister_reqs(struct nvme_rdma_qpair *rqpair) 899 { 900 nvme_rdma_dereg_mr(&rqpair->cmd_mr); 901 } 902 903 static void 904 nvme_rdma_free_reqs(struct nvme_rdma_qpair *rqpair) 905 { 906 if (!rqpair->rdma_reqs) { 907 return; 908 } 909 910 nvme_rdma_free(rqpair->cmds); 911 rqpair->cmds = NULL; 912 913 nvme_rdma_free(rqpair->rdma_reqs); 914 rqpair->rdma_reqs = NULL; 915 } 916 917 static int 918 nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair) 919 { 920 uint16_t i; 921 922 rqpair->rdma_reqs = nvme_rdma_calloc(rqpair->num_entries, sizeof(struct spdk_nvme_rdma_req)); 923 if (rqpair->rdma_reqs == NULL) { 924 SPDK_ERRLOG("Failed to allocate rdma_reqs\n"); 925 goto fail; 926 } 927 928 rqpair->cmds = nvme_rdma_calloc(rqpair->num_entries, sizeof(*rqpair->cmds)); 929 if (!rqpair->cmds) { 930 SPDK_ERRLOG("Failed to allocate RDMA cmds\n"); 931 goto fail; 932 } 933 934 935 TAILQ_INIT(&rqpair->free_reqs); 936 TAILQ_INIT(&rqpair->outstanding_reqs); 937 for (i = 0; i < rqpair->num_entries; i++) { 938 struct spdk_nvme_rdma_req *rdma_req; 939 struct spdk_nvmf_cmd *cmd; 940 941 rdma_req = &rqpair->rdma_reqs[i]; 942 rdma_req->rdma_wr.type = RDMA_WR_TYPE_SEND; 943 cmd = &rqpair->cmds[i]; 944 945 rdma_req->id = i; 946 947 /* The first RDMA sgl element will always point 948 * at this data structure. Depending on whether 949 * an NVMe-oF SGL is required, the length of 950 * this element may change. */ 951 rdma_req->send_sgl[0].addr = (uint64_t)cmd; 952 rdma_req->send_wr.wr_id = (uint64_t)&rdma_req->rdma_wr; 953 rdma_req->send_wr.next = NULL; 954 rdma_req->send_wr.opcode = IBV_WR_SEND; 955 rdma_req->send_wr.send_flags = IBV_SEND_SIGNALED; 956 rdma_req->send_wr.sg_list = rdma_req->send_sgl; 957 rdma_req->send_wr.imm_data = 0; 958 959 TAILQ_INSERT_TAIL(&rqpair->free_reqs, rdma_req, link); 960 } 961 962 return 0; 963 fail: 964 nvme_rdma_free_reqs(rqpair); 965 return -ENOMEM; 966 } 967 968 static int 969 nvme_rdma_register_reqs(struct nvme_rdma_qpair *rqpair) 970 { 971 int i; 972 int rc; 973 uint32_t lkey; 974 975 rc = nvme_rdma_reg_mr(rqpair->cm_id, &rqpair->cmd_mr, 976 rqpair->cmds, rqpair->num_entries * sizeof(*rqpair->cmds)); 977 978 if (rc < 0) { 979 goto fail; 980 } 981 982 lkey = nvme_rdma_mr_get_lkey(&rqpair->cmd_mr); 983 984 for (i = 0; i < rqpair->num_entries; i++) { 985 rqpair->rdma_reqs[i].send_sgl[0].lkey = lkey; 986 } 987 988 return 0; 989 990 fail: 991 nvme_rdma_unregister_reqs(rqpair); 992 return -ENOMEM; 993 } 994 995 static int 996 nvme_rdma_resolve_addr(struct nvme_rdma_qpair *rqpair, 997 struct sockaddr *src_addr, 998 struct sockaddr *dst_addr, 999 struct rdma_event_channel *cm_channel) 1000 { 1001 int ret; 1002 1003 ret = rdma_resolve_addr(rqpair->cm_id, src_addr, dst_addr, 1004 NVME_RDMA_TIME_OUT_IN_MS); 1005 if (ret) { 1006 SPDK_ERRLOG("rdma_resolve_addr, %d\n", errno); 1007 return ret; 1008 } 1009 1010 ret = nvme_rdma_process_event(rqpair, cm_channel, RDMA_CM_EVENT_ADDR_RESOLVED); 1011 if (ret) { 1012 SPDK_ERRLOG("RDMA address resolution error\n"); 1013 return -1; 1014 } 1015 1016 if (rqpair->qpair.ctrlr->opts.transport_ack_timeout != SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED) { 1017 #ifdef SPDK_CONFIG_RDMA_SET_ACK_TIMEOUT 1018 uint8_t timeout = rqpair->qpair.ctrlr->opts.transport_ack_timeout; 1019 ret = rdma_set_option(rqpair->cm_id, RDMA_OPTION_ID, 1020 RDMA_OPTION_ID_ACK_TIMEOUT, 1021 &timeout, sizeof(timeout)); 1022 if (ret) { 1023 SPDK_NOTICELOG("Can't apply RDMA_OPTION_ID_ACK_TIMEOUT %d, ret %d\n", timeout, ret); 1024 } 1025 #else 1026 SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport_ack_timeout is not supported\n"); 1027 #endif 1028 } 1029 1030 1031 ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS); 1032 if (ret) { 1033 SPDK_ERRLOG("rdma_resolve_route\n"); 1034 return ret; 1035 } 1036 1037 ret = nvme_rdma_process_event(rqpair, cm_channel, RDMA_CM_EVENT_ROUTE_RESOLVED); 1038 if (ret) { 1039 SPDK_ERRLOG("RDMA route resolution error\n"); 1040 return -1; 1041 } 1042 1043 return 0; 1044 } 1045 1046 static int 1047 nvme_rdma_connect(struct nvme_rdma_qpair *rqpair) 1048 { 1049 struct rdma_conn_param param = {}; 1050 struct spdk_nvmf_rdma_request_private_data request_data = {}; 1051 struct ibv_device_attr attr; 1052 int ret; 1053 struct spdk_nvme_ctrlr *ctrlr; 1054 struct nvme_rdma_ctrlr *rctrlr; 1055 1056 ret = ibv_query_device(rqpair->cm_id->verbs, &attr); 1057 if (ret != 0) { 1058 SPDK_ERRLOG("Failed to query RDMA device attributes.\n"); 1059 return ret; 1060 } 1061 1062 param.responder_resources = spdk_min(rqpair->num_entries, attr.max_qp_rd_atom); 1063 1064 ctrlr = rqpair->qpair.ctrlr; 1065 if (!ctrlr) { 1066 return -1; 1067 } 1068 rctrlr = nvme_rdma_ctrlr(ctrlr); 1069 assert(rctrlr != NULL); 1070 1071 request_data.qid = rqpair->qpair.id; 1072 request_data.hrqsize = rqpair->num_entries; 1073 request_data.hsqsize = rqpair->num_entries - 1; 1074 request_data.cntlid = ctrlr->cntlid; 1075 1076 param.private_data = &request_data; 1077 param.private_data_len = sizeof(request_data); 1078 param.retry_count = ctrlr->opts.transport_retry_count; 1079 param.rnr_retry_count = 7; 1080 1081 /* Fields below are ignored by rdma cm if qpair has been 1082 * created using rdma cm API. */ 1083 param.srq = 0; 1084 param.qp_num = rqpair->rdma_qp->qp->qp_num; 1085 1086 ret = rdma_connect(rqpair->cm_id, ¶m); 1087 if (ret) { 1088 SPDK_ERRLOG("nvme rdma connect error\n"); 1089 return ret; 1090 } 1091 1092 ret = nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_ESTABLISHED); 1093 if (ret == -ESTALE) { 1094 SPDK_NOTICELOG("Received a stale connection notice during connection.\n"); 1095 return -EAGAIN; 1096 } else if (ret) { 1097 SPDK_ERRLOG("RDMA connect error %d\n", ret); 1098 return ret; 1099 } else { 1100 return 0; 1101 } 1102 } 1103 1104 static int 1105 nvme_rdma_parse_addr(struct sockaddr_storage *sa, int family, const char *addr, const char *service) 1106 { 1107 struct addrinfo *res; 1108 struct addrinfo hints; 1109 int ret; 1110 1111 memset(&hints, 0, sizeof(hints)); 1112 hints.ai_family = family; 1113 hints.ai_socktype = SOCK_STREAM; 1114 hints.ai_protocol = 0; 1115 1116 ret = getaddrinfo(addr, service, &hints, &res); 1117 if (ret) { 1118 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(ret), ret); 1119 return ret; 1120 } 1121 1122 if (res->ai_addrlen > sizeof(*sa)) { 1123 SPDK_ERRLOG("getaddrinfo() ai_addrlen %zu too large\n", (size_t)res->ai_addrlen); 1124 ret = EINVAL; 1125 } else { 1126 memcpy(sa, res->ai_addr, res->ai_addrlen); 1127 } 1128 1129 freeaddrinfo(res); 1130 return ret; 1131 } 1132 1133 static int 1134 nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map, 1135 enum spdk_mem_map_notify_action action, 1136 void *vaddr, size_t size) 1137 { 1138 struct ibv_pd *pd = cb_ctx; 1139 struct ibv_mr *mr; 1140 int rc; 1141 1142 switch (action) { 1143 case SPDK_MEM_MAP_NOTIFY_REGISTER: 1144 if (!g_nvme_hooks.get_rkey) { 1145 mr = ibv_reg_mr(pd, vaddr, size, 1146 IBV_ACCESS_LOCAL_WRITE | 1147 IBV_ACCESS_REMOTE_READ | 1148 IBV_ACCESS_REMOTE_WRITE); 1149 if (mr == NULL) { 1150 SPDK_ERRLOG("ibv_reg_mr() failed\n"); 1151 return -EFAULT; 1152 } else { 1153 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); 1154 } 1155 } else { 1156 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, 1157 g_nvme_hooks.get_rkey(pd, vaddr, size)); 1158 } 1159 break; 1160 case SPDK_MEM_MAP_NOTIFY_UNREGISTER: 1161 if (!g_nvme_hooks.get_rkey) { 1162 mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); 1163 if (mr) { 1164 ibv_dereg_mr(mr); 1165 } 1166 } 1167 rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); 1168 break; 1169 default: 1170 SPDK_UNREACHABLE(); 1171 } 1172 1173 return rc; 1174 } 1175 1176 static int 1177 nvme_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) 1178 { 1179 /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ 1180 return addr_1 == addr_2; 1181 } 1182 1183 static int 1184 nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair) 1185 { 1186 struct ibv_pd *pd = rqpair->rdma_qp->qp->pd; 1187 struct spdk_nvme_rdma_mr_map *mr_map; 1188 const struct spdk_mem_map_ops nvme_rdma_map_ops = { 1189 .notify_cb = nvme_rdma_mr_map_notify, 1190 .are_contiguous = nvme_rdma_check_contiguous_entries 1191 }; 1192 1193 pthread_mutex_lock(&g_rdma_mr_maps_mutex); 1194 1195 /* Look up existing mem map registration for this pd */ 1196 LIST_FOREACH(mr_map, &g_rdma_mr_maps, link) { 1197 if (mr_map->pd == pd) { 1198 mr_map->ref++; 1199 rqpair->mr_map = mr_map; 1200 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1201 return 0; 1202 } 1203 } 1204 1205 mr_map = nvme_rdma_calloc(1, sizeof(*mr_map)); 1206 if (mr_map == NULL) { 1207 SPDK_ERRLOG("Failed to allocate mr_map\n"); 1208 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1209 return -1; 1210 } 1211 1212 mr_map->ref = 1; 1213 mr_map->pd = pd; 1214 mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd); 1215 if (mr_map->map == NULL) { 1216 SPDK_ERRLOG("spdk_mem_map_alloc() failed\n"); 1217 nvme_rdma_free(mr_map); 1218 1219 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1220 return -1; 1221 } 1222 1223 rqpair->mr_map = mr_map; 1224 LIST_INSERT_HEAD(&g_rdma_mr_maps, mr_map, link); 1225 1226 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1227 1228 return 0; 1229 } 1230 1231 static void 1232 nvme_rdma_unregister_mem(struct nvme_rdma_qpair *rqpair) 1233 { 1234 struct spdk_nvme_rdma_mr_map *mr_map; 1235 1236 mr_map = rqpair->mr_map; 1237 rqpair->mr_map = NULL; 1238 1239 if (mr_map == NULL) { 1240 return; 1241 } 1242 1243 pthread_mutex_lock(&g_rdma_mr_maps_mutex); 1244 1245 assert(mr_map->ref > 0); 1246 mr_map->ref--; 1247 if (mr_map->ref == 0) { 1248 LIST_REMOVE(mr_map, link); 1249 spdk_mem_map_free(&mr_map->map); 1250 nvme_rdma_free(mr_map); 1251 } 1252 1253 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1254 } 1255 1256 static int 1257 _nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1258 { 1259 struct sockaddr_storage dst_addr; 1260 struct sockaddr_storage src_addr; 1261 bool src_addr_specified; 1262 int rc; 1263 struct nvme_rdma_ctrlr *rctrlr; 1264 struct nvme_rdma_qpair *rqpair; 1265 int family; 1266 1267 rqpair = nvme_rdma_qpair(qpair); 1268 rctrlr = nvme_rdma_ctrlr(ctrlr); 1269 assert(rctrlr != NULL); 1270 1271 switch (ctrlr->trid.adrfam) { 1272 case SPDK_NVMF_ADRFAM_IPV4: 1273 family = AF_INET; 1274 break; 1275 case SPDK_NVMF_ADRFAM_IPV6: 1276 family = AF_INET6; 1277 break; 1278 default: 1279 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam); 1280 return -1; 1281 } 1282 1283 SPDK_DEBUGLOG(SPDK_LOG_NVME, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family); 1284 1285 memset(&dst_addr, 0, sizeof(dst_addr)); 1286 1287 SPDK_DEBUGLOG(SPDK_LOG_NVME, "trsvcid is %s\n", ctrlr->trid.trsvcid); 1288 rc = nvme_rdma_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid); 1289 if (rc != 0) { 1290 SPDK_ERRLOG("dst_addr nvme_rdma_parse_addr() failed\n"); 1291 return -1; 1292 } 1293 1294 if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) { 1295 memset(&src_addr, 0, sizeof(src_addr)); 1296 rc = nvme_rdma_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid); 1297 if (rc != 0) { 1298 SPDK_ERRLOG("src_addr nvme_rdma_parse_addr() failed\n"); 1299 return -1; 1300 } 1301 src_addr_specified = true; 1302 } else { 1303 src_addr_specified = false; 1304 } 1305 1306 rc = rdma_create_id(rctrlr->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP); 1307 if (rc < 0) { 1308 SPDK_ERRLOG("rdma_create_id() failed\n"); 1309 return -1; 1310 } 1311 1312 rc = nvme_rdma_resolve_addr(rqpair, 1313 src_addr_specified ? (struct sockaddr *)&src_addr : NULL, 1314 (struct sockaddr *)&dst_addr, rctrlr->cm_channel); 1315 if (rc < 0) { 1316 SPDK_ERRLOG("nvme_rdma_resolve_addr() failed\n"); 1317 return -1; 1318 } 1319 1320 rc = nvme_rdma_qpair_init(rqpair); 1321 if (rc < 0) { 1322 SPDK_ERRLOG("nvme_rdma_qpair_init() failed\n"); 1323 return -1; 1324 } 1325 1326 rc = nvme_rdma_connect(rqpair); 1327 if (rc != 0) { 1328 SPDK_ERRLOG("Unable to connect the rqpair\n"); 1329 return rc; 1330 } 1331 1332 rc = nvme_rdma_register_reqs(rqpair); 1333 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc); 1334 if (rc) { 1335 SPDK_ERRLOG("Unable to register rqpair RDMA requests\n"); 1336 return -1; 1337 } 1338 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA requests registered\n"); 1339 1340 rc = nvme_rdma_register_rsps(rqpair); 1341 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc); 1342 if (rc < 0) { 1343 SPDK_ERRLOG("Unable to register rqpair RDMA responses\n"); 1344 return -1; 1345 } 1346 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA responses registered\n"); 1347 1348 rc = nvme_rdma_register_mem(rqpair); 1349 if (rc < 0) { 1350 SPDK_ERRLOG("Unable to register memory for RDMA\n"); 1351 return -1; 1352 } 1353 1354 rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries); 1355 if (rc < 0) { 1356 rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 1357 SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n"); 1358 return rc; 1359 } 1360 1361 return 0; 1362 } 1363 1364 static int 1365 nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1366 { 1367 int rc; 1368 int retry_count = 0; 1369 1370 rc = _nvme_rdma_ctrlr_connect_qpair(ctrlr, qpair); 1371 1372 /* 1373 * -EAGAIN represents the special case where the target side still thought it was connected. 1374 * Most NICs will fail the first connection attempt, and the NICs will clean up whatever 1375 * state they need to. After that, subsequent connection attempts will succeed. 1376 */ 1377 if (rc == -EAGAIN) { 1378 SPDK_NOTICELOG("Detected stale connection on Target side for qpid: %d\n", qpair->id); 1379 do { 1380 nvme_delay(NVME_RDMA_STALE_CONN_RETRY_DELAY_US); 1381 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 1382 rc = _nvme_rdma_ctrlr_connect_qpair(ctrlr, qpair); 1383 retry_count++; 1384 } while (rc == -EAGAIN && retry_count < NVME_RDMA_STALE_CONN_RETRY_MAX); 1385 } 1386 1387 return rc; 1388 } 1389 1390 /* 1391 * Build SGL describing empty payload. 1392 */ 1393 static int 1394 nvme_rdma_build_null_request(struct spdk_nvme_rdma_req *rdma_req) 1395 { 1396 struct nvme_request *req = rdma_req->req; 1397 1398 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1399 1400 /* The first element of this SGL is pointing at an 1401 * spdk_nvmf_cmd object. For this particular command, 1402 * we only need the first 64 bytes corresponding to 1403 * the NVMe command. */ 1404 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); 1405 1406 /* The RDMA SGL needs one element describing the NVMe command. */ 1407 rdma_req->send_wr.num_sge = 1; 1408 1409 req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; 1410 req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; 1411 req->cmd.dptr.sgl1.keyed.length = 0; 1412 req->cmd.dptr.sgl1.keyed.key = 0; 1413 req->cmd.dptr.sgl1.address = 0; 1414 1415 return 0; 1416 } 1417 1418 static inline bool 1419 nvme_rdma_get_key(struct spdk_mem_map *map, void *payload, uint64_t size, 1420 enum nvme_rdma_key_type key_type, uint32_t *key) 1421 { 1422 struct ibv_mr *mr; 1423 uint64_t real_size = size; 1424 uint32_t _key = 0; 1425 1426 if (!g_nvme_hooks.get_rkey) { 1427 mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)payload, &real_size); 1428 1429 if (spdk_unlikely(!mr)) { 1430 SPDK_ERRLOG("No translation for ptr %p, size %lu\n", payload, size); 1431 return false; 1432 } 1433 switch (key_type) { 1434 case NVME_RDMA_MR_RKEY: 1435 _key = mr->rkey; 1436 break; 1437 case NVME_RDMA_MR_LKEY: 1438 _key = mr->lkey; 1439 break; 1440 default: 1441 SPDK_ERRLOG("Invalid key type %d\n", key_type); 1442 assert(0); 1443 return false; 1444 } 1445 } else { 1446 _key = spdk_mem_map_translate(map, (uint64_t)payload, &real_size); 1447 } 1448 1449 if (spdk_unlikely(real_size < size)) { 1450 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n"); 1451 return false; 1452 } 1453 1454 *key = _key; 1455 return true; 1456 } 1457 1458 /* 1459 * Build inline SGL describing contiguous payload buffer. 1460 */ 1461 static int 1462 nvme_rdma_build_contig_inline_request(struct nvme_rdma_qpair *rqpair, 1463 struct spdk_nvme_rdma_req *rdma_req) 1464 { 1465 struct nvme_request *req = rdma_req->req; 1466 uint32_t lkey = 0; 1467 void *payload; 1468 1469 payload = req->payload.contig_or_cb_arg + req->payload_offset; 1470 assert(req->payload_size != 0); 1471 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 1472 1473 if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, payload, req->payload_size, 1474 NVME_RDMA_MR_LKEY, &lkey))) { 1475 return -1; 1476 } 1477 1478 rdma_req->send_sgl[1].lkey = lkey; 1479 1480 /* The first element of this SGL is pointing at an 1481 * spdk_nvmf_cmd object. For this particular command, 1482 * we only need the first 64 bytes corresponding to 1483 * the NVMe command. */ 1484 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); 1485 1486 rdma_req->send_sgl[1].addr = (uint64_t)payload; 1487 rdma_req->send_sgl[1].length = (uint32_t)req->payload_size; 1488 1489 /* The RDMA SGL contains two elements. The first describes 1490 * the NVMe command and the second describes the data 1491 * payload. */ 1492 rdma_req->send_wr.num_sge = 2; 1493 1494 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1495 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1496 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 1497 req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size; 1498 /* Inline only supported for icdoff == 0 currently. This function will 1499 * not get called for controllers with other values. */ 1500 req->cmd.dptr.sgl1.address = (uint64_t)0; 1501 1502 return 0; 1503 } 1504 1505 /* 1506 * Build SGL describing contiguous payload buffer. 1507 */ 1508 static int 1509 nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair, 1510 struct spdk_nvme_rdma_req *rdma_req) 1511 { 1512 struct nvme_request *req = rdma_req->req; 1513 void *payload = req->payload.contig_or_cb_arg + req->payload_offset; 1514 uint32_t rkey = 0; 1515 1516 assert(req->payload_size != 0); 1517 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 1518 1519 if (spdk_unlikely(req->payload_size > NVME_RDMA_MAX_KEYED_SGL_LENGTH)) { 1520 SPDK_ERRLOG("SGL length %u exceeds max keyed SGL block size %u\n", 1521 req->payload_size, NVME_RDMA_MAX_KEYED_SGL_LENGTH); 1522 return -1; 1523 } 1524 1525 if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, payload, req->payload_size, 1526 NVME_RDMA_MR_RKEY, &rkey))) { 1527 return -1; 1528 } 1529 1530 req->cmd.dptr.sgl1.keyed.key = rkey; 1531 1532 /* The first element of this SGL is pointing at an 1533 * spdk_nvmf_cmd object. For this particular command, 1534 * we only need the first 64 bytes corresponding to 1535 * the NVMe command. */ 1536 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); 1537 1538 /* The RDMA SGL needs one element describing the NVMe command. */ 1539 rdma_req->send_wr.num_sge = 1; 1540 1541 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1542 req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; 1543 req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; 1544 req->cmd.dptr.sgl1.keyed.length = req->payload_size; 1545 req->cmd.dptr.sgl1.address = (uint64_t)payload; 1546 1547 return 0; 1548 } 1549 1550 /* 1551 * Build SGL describing scattered payload buffer. 1552 */ 1553 static int 1554 nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair, 1555 struct spdk_nvme_rdma_req *rdma_req) 1556 { 1557 struct nvme_request *req = rdma_req->req; 1558 struct spdk_nvmf_cmd *cmd = &rqpair->cmds[rdma_req->id]; 1559 void *virt_addr; 1560 uint32_t remaining_size; 1561 uint32_t sge_length; 1562 int rc, max_num_sgl, num_sgl_desc; 1563 uint32_t rkey = 0; 1564 1565 assert(req->payload_size != 0); 1566 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1567 assert(req->payload.reset_sgl_fn != NULL); 1568 assert(req->payload.next_sge_fn != NULL); 1569 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1570 1571 max_num_sgl = req->qpair->ctrlr->max_sges; 1572 1573 remaining_size = req->payload_size; 1574 num_sgl_desc = 0; 1575 do { 1576 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &sge_length); 1577 if (rc) { 1578 return -1; 1579 } 1580 1581 sge_length = spdk_min(remaining_size, sge_length); 1582 1583 if (spdk_unlikely(sge_length > NVME_RDMA_MAX_KEYED_SGL_LENGTH)) { 1584 SPDK_ERRLOG("SGL length %u exceeds max keyed SGL block size %u\n", 1585 sge_length, NVME_RDMA_MAX_KEYED_SGL_LENGTH); 1586 return -1; 1587 } 1588 1589 if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, sge_length, 1590 NVME_RDMA_MR_RKEY, &rkey))) { 1591 return -1; 1592 } 1593 1594 cmd->sgl[num_sgl_desc].keyed.key = rkey; 1595 cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; 1596 cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; 1597 cmd->sgl[num_sgl_desc].keyed.length = sge_length; 1598 cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr; 1599 1600 remaining_size -= sge_length; 1601 num_sgl_desc++; 1602 } while (remaining_size > 0 && num_sgl_desc < max_num_sgl); 1603 1604 1605 /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */ 1606 if (remaining_size > 0) { 1607 return -1; 1608 } 1609 1610 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1611 1612 /* The RDMA SGL needs one element describing some portion 1613 * of the spdk_nvmf_cmd structure. */ 1614 rdma_req->send_wr.num_sge = 1; 1615 1616 /* 1617 * If only one SGL descriptor is required, it can be embedded directly in the command 1618 * as a data block descriptor. 1619 */ 1620 if (num_sgl_desc == 1) { 1621 /* The first element of this SGL is pointing at an 1622 * spdk_nvmf_cmd object. For this particular command, 1623 * we only need the first 64 bytes corresponding to 1624 * the NVMe command. */ 1625 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); 1626 1627 req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type; 1628 req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype; 1629 req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length; 1630 req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key; 1631 req->cmd.dptr.sgl1.address = cmd->sgl[0].address; 1632 } else { 1633 /* 1634 * Otherwise, The SGL descriptor embedded in the command must point to the list of 1635 * SGL descriptors used to describe the operation. In that case it is a last segment descriptor. 1636 */ 1637 uint32_t descriptors_size = sizeof(struct spdk_nvme_sgl_descriptor) * num_sgl_desc; 1638 1639 if (spdk_unlikely(descriptors_size > rqpair->qpair.ctrlr->ioccsz_bytes)) { 1640 SPDK_ERRLOG("Size of SGL descriptors (%u) exceeds ICD (%u)\n", 1641 descriptors_size, rqpair->qpair.ctrlr->ioccsz_bytes); 1642 return -1; 1643 } 1644 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd) + descriptors_size; 1645 1646 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; 1647 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 1648 req->cmd.dptr.sgl1.unkeyed.length = descriptors_size; 1649 req->cmd.dptr.sgl1.address = (uint64_t)0; 1650 } 1651 1652 return 0; 1653 } 1654 1655 /* 1656 * Build inline SGL describing sgl payload buffer. 1657 */ 1658 static int 1659 nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair, 1660 struct spdk_nvme_rdma_req *rdma_req) 1661 { 1662 struct nvme_request *req = rdma_req->req; 1663 uint32_t lkey = 0; 1664 uint32_t length; 1665 void *virt_addr; 1666 int rc; 1667 1668 assert(req->payload_size != 0); 1669 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1670 assert(req->payload.reset_sgl_fn != NULL); 1671 assert(req->payload.next_sge_fn != NULL); 1672 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1673 1674 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length); 1675 if (rc) { 1676 return -1; 1677 } 1678 1679 if (length < req->payload_size) { 1680 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Inline SGL request split so sending separately.\n"); 1681 return nvme_rdma_build_sgl_request(rqpair, rdma_req); 1682 } 1683 1684 if (length > req->payload_size) { 1685 length = req->payload_size; 1686 } 1687 1688 if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, length, 1689 NVME_RDMA_MR_LKEY, &lkey))) { 1690 return -1; 1691 } 1692 1693 rdma_req->send_sgl[1].addr = (uint64_t)virt_addr; 1694 rdma_req->send_sgl[1].length = length; 1695 rdma_req->send_sgl[1].lkey = lkey; 1696 1697 rdma_req->send_wr.num_sge = 2; 1698 1699 /* The first element of this SGL is pointing at an 1700 * spdk_nvmf_cmd object. For this particular command, 1701 * we only need the first 64 bytes corresponding to 1702 * the NVMe command. */ 1703 rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); 1704 1705 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1706 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1707 req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET; 1708 req->cmd.dptr.sgl1.unkeyed.length = (uint32_t)req->payload_size; 1709 /* Inline only supported for icdoff == 0 currently. This function will 1710 * not get called for controllers with other values. */ 1711 req->cmd.dptr.sgl1.address = (uint64_t)0; 1712 1713 return 0; 1714 } 1715 1716 static int 1717 nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req, 1718 struct spdk_nvme_rdma_req *rdma_req) 1719 { 1720 struct spdk_nvme_ctrlr *ctrlr = rqpair->qpair.ctrlr; 1721 enum nvme_payload_type payload_type; 1722 bool icd_supported; 1723 int rc; 1724 1725 assert(rdma_req->req == NULL); 1726 rdma_req->req = req; 1727 req->cmd.cid = rdma_req->id; 1728 payload_type = nvme_payload_type(&req->payload); 1729 /* 1730 * Check if icdoff is non zero, to avoid interop conflicts with 1731 * targets with non-zero icdoff. Both SPDK and the Linux kernel 1732 * targets use icdoff = 0. For targets with non-zero icdoff, we 1733 * will currently just not use inline data for now. 1734 */ 1735 icd_supported = spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER 1736 && req->payload_size <= ctrlr->ioccsz_bytes && ctrlr->icdoff == 0; 1737 1738 if (req->payload_size == 0) { 1739 rc = nvme_rdma_build_null_request(rdma_req); 1740 } else if (payload_type == NVME_PAYLOAD_TYPE_CONTIG) { 1741 if (icd_supported) { 1742 rc = nvme_rdma_build_contig_inline_request(rqpair, rdma_req); 1743 } else { 1744 rc = nvme_rdma_build_contig_request(rqpair, rdma_req); 1745 } 1746 } else if (payload_type == NVME_PAYLOAD_TYPE_SGL) { 1747 if (icd_supported) { 1748 rc = nvme_rdma_build_sgl_inline_request(rqpair, rdma_req); 1749 } else { 1750 rc = nvme_rdma_build_sgl_request(rqpair, rdma_req); 1751 } 1752 } else { 1753 rc = -1; 1754 } 1755 1756 if (rc) { 1757 rdma_req->req = NULL; 1758 return rc; 1759 } 1760 1761 memcpy(&rqpair->cmds[rdma_req->id], &req->cmd, sizeof(req->cmd)); 1762 return 0; 1763 } 1764 1765 static struct spdk_nvme_qpair * 1766 nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, 1767 uint16_t qid, uint32_t qsize, 1768 enum spdk_nvme_qprio qprio, 1769 uint32_t num_requests, 1770 bool delay_cmd_submit) 1771 { 1772 struct nvme_rdma_qpair *rqpair; 1773 struct spdk_nvme_qpair *qpair; 1774 int rc; 1775 1776 rqpair = nvme_rdma_calloc(1, sizeof(struct nvme_rdma_qpair)); 1777 if (!rqpair) { 1778 SPDK_ERRLOG("failed to get create rqpair\n"); 1779 return NULL; 1780 } 1781 1782 rqpair->num_entries = qsize; 1783 rqpair->delay_cmd_submit = delay_cmd_submit; 1784 qpair = &rqpair->qpair; 1785 rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests); 1786 if (rc != 0) { 1787 return NULL; 1788 } 1789 1790 rc = nvme_rdma_alloc_reqs(rqpair); 1791 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc); 1792 if (rc) { 1793 SPDK_ERRLOG("Unable to allocate rqpair RDMA requests\n"); 1794 nvme_rdma_free(rqpair); 1795 return NULL; 1796 } 1797 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA requests allocated\n"); 1798 1799 rc = nvme_rdma_alloc_rsps(rqpair); 1800 SPDK_DEBUGLOG(SPDK_LOG_NVME, "rc =%d\n", rc); 1801 if (rc < 0) { 1802 SPDK_ERRLOG("Unable to allocate rqpair RDMA responses\n"); 1803 nvme_rdma_free_reqs(rqpair); 1804 nvme_rdma_free(rqpair); 1805 return NULL; 1806 } 1807 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RDMA responses allocated\n"); 1808 1809 return qpair; 1810 } 1811 1812 static void 1813 nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1814 { 1815 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 1816 struct nvme_rdma_ctrlr *rctrlr = NULL; 1817 struct nvme_rdma_cm_event_entry *entry, *tmp; 1818 1819 nvme_rdma_unregister_mem(rqpair); 1820 nvme_rdma_unregister_reqs(rqpair); 1821 nvme_rdma_unregister_rsps(rqpair); 1822 1823 if (rqpair->evt) { 1824 rdma_ack_cm_event(rqpair->evt); 1825 rqpair->evt = NULL; 1826 } 1827 1828 /* 1829 * This works because we have the controller lock both in 1830 * this function and in the function where we add new events. 1831 */ 1832 if (qpair->ctrlr != NULL) { 1833 rctrlr = nvme_rdma_ctrlr(qpair->ctrlr); 1834 STAILQ_FOREACH_SAFE(entry, &rctrlr->pending_cm_events, link, tmp) { 1835 if (nvme_rdma_qpair(entry->evt->id->context) == rqpair) { 1836 STAILQ_REMOVE(&rctrlr->pending_cm_events, entry, nvme_rdma_cm_event_entry, link); 1837 rdma_ack_cm_event(entry->evt); 1838 STAILQ_INSERT_HEAD(&rctrlr->free_cm_events, entry, link); 1839 } 1840 } 1841 } 1842 1843 if (rqpair->cm_id) { 1844 if (rqpair->rdma_qp) { 1845 spdk_rdma_qp_disconnect(rqpair->rdma_qp); 1846 if (rctrlr != NULL) { 1847 if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) { 1848 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Target did not respond to qpair disconnect.\n"); 1849 } 1850 } 1851 spdk_rdma_qp_destroy(rqpair->rdma_qp); 1852 rqpair->rdma_qp = NULL; 1853 } 1854 1855 rdma_destroy_id(rqpair->cm_id); 1856 rqpair->cm_id = NULL; 1857 } 1858 1859 if (rqpair->cq) { 1860 ibv_destroy_cq(rqpair->cq); 1861 rqpair->cq = NULL; 1862 } 1863 } 1864 1865 static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); 1866 1867 static int 1868 nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1869 { 1870 struct nvme_rdma_qpair *rqpair; 1871 1872 rqpair = nvme_rdma_qpair(qpair); 1873 nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair); 1874 if (rqpair->defer_deletion_to_pg) { 1875 nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); 1876 return 0; 1877 } 1878 1879 nvme_rdma_qpair_abort_reqs(qpair, 1); 1880 nvme_qpair_deinit(qpair); 1881 1882 nvme_rdma_free_reqs(rqpair); 1883 nvme_rdma_free_rsps(rqpair); 1884 nvme_rdma_free(rqpair); 1885 1886 return 0; 1887 } 1888 1889 static struct spdk_nvme_qpair * 1890 nvme_rdma_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1891 const struct spdk_nvme_io_qpair_opts *opts) 1892 { 1893 return nvme_rdma_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio, 1894 opts->io_queue_requests, 1895 opts->delay_cmd_submit); 1896 } 1897 1898 static int 1899 nvme_rdma_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) 1900 { 1901 /* do nothing here */ 1902 return 0; 1903 } 1904 1905 static int nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); 1906 1907 static struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transport_id *trid, 1908 const struct spdk_nvme_ctrlr_opts *opts, 1909 void *devhandle) 1910 { 1911 struct nvme_rdma_ctrlr *rctrlr; 1912 union spdk_nvme_cap_register cap; 1913 union spdk_nvme_vs_register vs; 1914 struct ibv_context **contexts; 1915 struct ibv_device_attr dev_attr; 1916 int i, flag, rc; 1917 1918 rctrlr = nvme_rdma_calloc(1, sizeof(struct nvme_rdma_ctrlr)); 1919 if (rctrlr == NULL) { 1920 SPDK_ERRLOG("could not allocate ctrlr\n"); 1921 return NULL; 1922 } 1923 1924 rctrlr->ctrlr.opts = *opts; 1925 rctrlr->ctrlr.trid = *trid; 1926 1927 if (opts->transport_retry_count > NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT) { 1928 SPDK_NOTICELOG("transport_retry_count exceeds max value %d, use max value\n", 1929 NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT); 1930 rctrlr->ctrlr.opts.transport_retry_count = NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT; 1931 } 1932 1933 if (opts->transport_ack_timeout > NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { 1934 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", 1935 NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); 1936 rctrlr->ctrlr.opts.transport_ack_timeout = NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; 1937 } 1938 1939 contexts = rdma_get_devices(NULL); 1940 if (contexts == NULL) { 1941 SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); 1942 nvme_rdma_free(rctrlr); 1943 return NULL; 1944 } 1945 1946 i = 0; 1947 rctrlr->max_sge = NVME_RDMA_MAX_SGL_DESCRIPTORS; 1948 1949 while (contexts[i] != NULL) { 1950 rc = ibv_query_device(contexts[i], &dev_attr); 1951 if (rc < 0) { 1952 SPDK_ERRLOG("Failed to query RDMA device attributes.\n"); 1953 rdma_free_devices(contexts); 1954 nvme_rdma_free(rctrlr); 1955 return NULL; 1956 } 1957 rctrlr->max_sge = spdk_min(rctrlr->max_sge, (uint16_t)dev_attr.max_sge); 1958 i++; 1959 } 1960 1961 rdma_free_devices(contexts); 1962 1963 rc = nvme_ctrlr_construct(&rctrlr->ctrlr); 1964 if (rc != 0) { 1965 nvme_rdma_free(rctrlr); 1966 return NULL; 1967 } 1968 1969 STAILQ_INIT(&rctrlr->pending_cm_events); 1970 STAILQ_INIT(&rctrlr->free_cm_events); 1971 rctrlr->cm_events = nvme_rdma_calloc(NVME_RDMA_NUM_CM_EVENTS, sizeof(*rctrlr->cm_events)); 1972 if (rctrlr->cm_events == NULL) { 1973 SPDK_ERRLOG("unable to allocat buffers to hold CM events.\n"); 1974 goto destruct_ctrlr; 1975 } 1976 1977 for (i = 0; i < NVME_RDMA_NUM_CM_EVENTS; i++) { 1978 STAILQ_INSERT_TAIL(&rctrlr->free_cm_events, &rctrlr->cm_events[i], link); 1979 } 1980 1981 rctrlr->cm_channel = rdma_create_event_channel(); 1982 if (rctrlr->cm_channel == NULL) { 1983 SPDK_ERRLOG("rdma_create_event_channel() failed\n"); 1984 goto destruct_ctrlr; 1985 } 1986 1987 flag = fcntl(rctrlr->cm_channel->fd, F_GETFL); 1988 if (fcntl(rctrlr->cm_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) { 1989 SPDK_ERRLOG("Cannot set event channel to non blocking\n"); 1990 goto destruct_ctrlr; 1991 } 1992 1993 rctrlr->ctrlr.adminq = nvme_rdma_ctrlr_create_qpair(&rctrlr->ctrlr, 0, 1994 rctrlr->ctrlr.opts.admin_queue_size, 0, 1995 rctrlr->ctrlr.opts.admin_queue_size, false); 1996 if (!rctrlr->ctrlr.adminq) { 1997 SPDK_ERRLOG("failed to create admin qpair\n"); 1998 goto destruct_ctrlr; 1999 } 2000 2001 rc = nvme_transport_ctrlr_connect_qpair(&rctrlr->ctrlr, rctrlr->ctrlr.adminq); 2002 if (rc < 0) { 2003 SPDK_ERRLOG("failed to connect admin qpair\n"); 2004 goto destruct_ctrlr; 2005 } 2006 2007 if (nvme_ctrlr_get_cap(&rctrlr->ctrlr, &cap)) { 2008 SPDK_ERRLOG("get_cap() failed\n"); 2009 goto destruct_ctrlr; 2010 } 2011 2012 if (nvme_ctrlr_get_vs(&rctrlr->ctrlr, &vs)) { 2013 SPDK_ERRLOG("get_vs() failed\n"); 2014 goto destruct_ctrlr; 2015 } 2016 2017 if (nvme_ctrlr_add_process(&rctrlr->ctrlr, 0) != 0) { 2018 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n"); 2019 goto destruct_ctrlr; 2020 } 2021 2022 nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs); 2023 2024 SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n"); 2025 return &rctrlr->ctrlr; 2026 2027 destruct_ctrlr: 2028 nvme_ctrlr_destruct(&rctrlr->ctrlr); 2029 return NULL; 2030 } 2031 2032 static int 2033 nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) 2034 { 2035 struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr); 2036 struct nvme_rdma_cm_event_entry *entry; 2037 2038 if (ctrlr->adminq) { 2039 nvme_rdma_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq); 2040 } 2041 2042 STAILQ_FOREACH(entry, &rctrlr->pending_cm_events, link) { 2043 rdma_ack_cm_event(entry->evt); 2044 } 2045 2046 STAILQ_INIT(&rctrlr->free_cm_events); 2047 STAILQ_INIT(&rctrlr->pending_cm_events); 2048 nvme_rdma_free(rctrlr->cm_events); 2049 2050 if (rctrlr->cm_channel) { 2051 rdma_destroy_event_channel(rctrlr->cm_channel); 2052 rctrlr->cm_channel = NULL; 2053 } 2054 2055 nvme_ctrlr_destruct_finish(ctrlr); 2056 2057 nvme_rdma_free(rctrlr); 2058 2059 return 0; 2060 } 2061 2062 static int 2063 nvme_rdma_qpair_submit_request(struct spdk_nvme_qpair *qpair, 2064 struct nvme_request *req) 2065 { 2066 struct nvme_rdma_qpair *rqpair; 2067 struct spdk_nvme_rdma_req *rdma_req; 2068 struct ibv_send_wr *wr; 2069 2070 rqpair = nvme_rdma_qpair(qpair); 2071 assert(rqpair != NULL); 2072 assert(req != NULL); 2073 2074 rdma_req = nvme_rdma_req_get(rqpair); 2075 if (!rdma_req) { 2076 /* Inform the upper layer to try again later. */ 2077 return -EAGAIN; 2078 } 2079 2080 if (nvme_rdma_req_init(rqpair, req, rdma_req)) { 2081 SPDK_ERRLOG("nvme_rdma_req_init() failed\n"); 2082 TAILQ_REMOVE(&rqpair->outstanding_reqs, rdma_req, link); 2083 nvme_rdma_req_put(rqpair, rdma_req); 2084 return -1; 2085 } 2086 2087 wr = &rdma_req->send_wr; 2088 wr->next = NULL; 2089 nvme_rdma_trace_ibv_sge(wr->sg_list); 2090 return nvme_rdma_qpair_queue_send_wr(rqpair, wr); 2091 } 2092 2093 static int 2094 nvme_rdma_qpair_reset(struct spdk_nvme_qpair *qpair) 2095 { 2096 /* Currently, doing nothing here */ 2097 return 0; 2098 } 2099 2100 static void 2101 nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 2102 { 2103 struct spdk_nvme_rdma_req *rdma_req, *tmp; 2104 struct spdk_nvme_cpl cpl; 2105 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2106 2107 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2108 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2109 cpl.status.dnr = dnr; 2110 2111 /* 2112 * We cannot abort requests at the RDMA layer without 2113 * unregistering them. If we do, we can still get error 2114 * free completions on the shared completion queue. 2115 */ 2116 if (nvme_qpair_get_state(qpair) > NVME_QPAIR_DISCONNECTING && 2117 nvme_qpair_get_state(qpair) != NVME_QPAIR_DESTROYING) { 2118 nvme_ctrlr_disconnect_qpair(qpair); 2119 } 2120 2121 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) { 2122 nvme_rdma_req_complete(rdma_req, &cpl); 2123 nvme_rdma_req_put(rqpair, rdma_req); 2124 } 2125 } 2126 2127 static void 2128 nvme_rdma_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 2129 { 2130 uint64_t t02; 2131 struct spdk_nvme_rdma_req *rdma_req, *tmp; 2132 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2133 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 2134 struct spdk_nvme_ctrlr_process *active_proc; 2135 2136 /* Don't check timeouts during controller initialization. */ 2137 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 2138 return; 2139 } 2140 2141 if (nvme_qpair_is_admin_queue(qpair)) { 2142 active_proc = nvme_ctrlr_get_current_process(ctrlr); 2143 } else { 2144 active_proc = qpair->active_proc; 2145 } 2146 2147 /* Only check timeouts if the current process has a timeout callback. */ 2148 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 2149 return; 2150 } 2151 2152 t02 = spdk_get_ticks(); 2153 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) { 2154 assert(rdma_req->req != NULL); 2155 2156 if (nvme_request_check_timeout(rdma_req->req, rdma_req->id, active_proc, t02)) { 2157 /* 2158 * The requests are in order, so as soon as one has not timed out, 2159 * stop iterating. 2160 */ 2161 break; 2162 } 2163 } 2164 } 2165 2166 static inline int 2167 nvme_rdma_request_ready(struct nvme_rdma_qpair *rqpair, struct spdk_nvme_rdma_req *rdma_req) 2168 { 2169 nvme_rdma_req_complete(rdma_req, &rqpair->rsps[rdma_req->rsp_idx].cpl); 2170 nvme_rdma_req_put(rqpair, rdma_req); 2171 return nvme_rdma_post_recv(rqpair, rdma_req->rsp_idx); 2172 } 2173 2174 #define MAX_COMPLETIONS_PER_POLL 128 2175 2176 static void 2177 nvme_rdma_fail_qpair(struct spdk_nvme_qpair *qpair, int failure_reason) 2178 { 2179 if (failure_reason == IBV_WC_RETRY_EXC_ERR) { 2180 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE; 2181 } else if (qpair->transport_failure_reason == SPDK_NVME_QPAIR_FAILURE_NONE) { 2182 qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN; 2183 } 2184 2185 nvme_ctrlr_disconnect_qpair(qpair); 2186 } 2187 2188 static void 2189 nvme_rdma_conditional_fail_qpair(struct nvme_rdma_qpair *rqpair, struct nvme_rdma_poll_group *group) 2190 { 2191 struct nvme_rdma_destroyed_qpair *qpair_tracker; 2192 2193 assert(rqpair); 2194 if (group) { 2195 STAILQ_FOREACH(qpair_tracker, &group->destroyed_qpairs, link) { 2196 if (qpair_tracker->destroyed_qpair_tracker == rqpair) { 2197 return; 2198 } 2199 } 2200 } 2201 nvme_rdma_fail_qpair(&rqpair->qpair, 0); 2202 } 2203 2204 static int 2205 nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, 2206 struct nvme_rdma_poll_group *group, 2207 struct nvme_rdma_qpair *rdma_qpair) 2208 { 2209 struct ibv_wc wc[MAX_COMPLETIONS_PER_POLL]; 2210 struct nvme_rdma_qpair *rqpair; 2211 struct spdk_nvme_rdma_req *rdma_req; 2212 struct spdk_nvme_rdma_rsp *rdma_rsp; 2213 struct nvme_rdma_wr *rdma_wr; 2214 uint32_t reaped = 0; 2215 int completion_rc = 0; 2216 int rc, i; 2217 2218 rc = ibv_poll_cq(cq, batch_size, wc); 2219 if (rc < 0) { 2220 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", 2221 errno, spdk_strerror(errno)); 2222 return -ECANCELED; 2223 } else if (rc == 0) { 2224 return 0; 2225 } 2226 2227 for (i = 0; i < rc; i++) { 2228 rdma_wr = (struct nvme_rdma_wr *)wc[i].wr_id; 2229 switch (rdma_wr->type) { 2230 case RDMA_WR_TYPE_RECV: 2231 rdma_rsp = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvme_rdma_rsp, rdma_wr); 2232 rqpair = rdma_rsp->rqpair; 2233 assert(rqpair->current_num_recvs > 0); 2234 rqpair->current_num_recvs--; 2235 2236 if (wc[i].status) { 2237 SPDK_ERRLOG("CQ error on Queue Pair %p, Response Index %lu (%d): %s\n", 2238 rqpair, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status)); 2239 nvme_rdma_conditional_fail_qpair(rqpair, group); 2240 completion_rc = -ENXIO; 2241 continue; 2242 } 2243 2244 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CQ recv completion\n"); 2245 2246 if (wc[i].byte_len < sizeof(struct spdk_nvme_cpl)) { 2247 SPDK_ERRLOG("recv length %u less than expected response size\n", wc[i].byte_len); 2248 nvme_rdma_conditional_fail_qpair(rqpair, group); 2249 completion_rc = -ENXIO; 2250 continue; 2251 } 2252 rdma_req = &rqpair->rdma_reqs[rdma_rsp->cpl.cid]; 2253 rdma_req->completion_flags |= NVME_RDMA_RECV_COMPLETED; 2254 rdma_req->rsp_idx = rdma_rsp->idx; 2255 2256 if ((rdma_req->completion_flags & NVME_RDMA_SEND_COMPLETED) != 0) { 2257 if (spdk_unlikely(nvme_rdma_request_ready(rqpair, rdma_req))) { 2258 SPDK_ERRLOG("Unable to re-post rx descriptor\n"); 2259 nvme_rdma_conditional_fail_qpair(rqpair, group); 2260 completion_rc = -ENXIO; 2261 continue; 2262 } 2263 reaped++; 2264 rqpair->num_completions++; 2265 } 2266 break; 2267 2268 case RDMA_WR_TYPE_SEND: 2269 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvme_rdma_req, rdma_wr); 2270 2271 /* If we are flushing I/O */ 2272 if (wc[i].status) { 2273 rqpair = rdma_req->req ? nvme_rdma_qpair(rdma_req->req->qpair) : NULL; 2274 if (!rqpair) { 2275 rqpair = rdma_qpair != NULL ? rdma_qpair : nvme_rdma_poll_group_get_qpair_by_id(group, 2276 wc[i].qp_num); 2277 } 2278 assert(rqpair); 2279 assert(rqpair->current_num_sends > 0); 2280 rqpair->current_num_sends--; 2281 nvme_rdma_conditional_fail_qpair(rqpair, group); 2282 SPDK_ERRLOG("CQ error on Queue Pair %p, Response Index %lu (%d): %s\n", 2283 rqpair, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status)); 2284 completion_rc = -ENXIO; 2285 continue; 2286 } 2287 2288 rqpair = nvme_rdma_qpair(rdma_req->req->qpair); 2289 rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED; 2290 rqpair->current_num_sends--; 2291 2292 if ((rdma_req->completion_flags & NVME_RDMA_RECV_COMPLETED) != 0) { 2293 if (spdk_unlikely(nvme_rdma_request_ready(rqpair, rdma_req))) { 2294 SPDK_ERRLOG("Unable to re-post rx descriptor\n"); 2295 nvme_rdma_conditional_fail_qpair(rqpair, group); 2296 completion_rc = -ENXIO; 2297 continue; 2298 } 2299 reaped++; 2300 rqpair->num_completions++; 2301 } 2302 break; 2303 2304 default: 2305 SPDK_ERRLOG("Received an unexpected opcode on the CQ: %d\n", rdma_wr->type); 2306 return -ECANCELED; 2307 } 2308 } 2309 2310 if (completion_rc) { 2311 return completion_rc; 2312 } 2313 2314 return reaped; 2315 } 2316 2317 static void 2318 dummy_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx) 2319 { 2320 2321 } 2322 2323 static int 2324 nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair, 2325 uint32_t max_completions) 2326 { 2327 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2328 int rc = 0, batch_size; 2329 struct ibv_cq *cq; 2330 struct nvme_rdma_ctrlr *rctrlr; 2331 2332 /* 2333 * This is used during the connection phase. It's possible that we are still reaping error completions 2334 * from other qpairs so we need to call the poll group function. Also, it's more correct since the cq 2335 * is shared. 2336 */ 2337 if (qpair->poll_group != NULL) { 2338 return spdk_nvme_poll_group_process_completions(qpair->poll_group->group, max_completions, 2339 dummy_disconnected_qpair_cb); 2340 } 2341 2342 if (max_completions == 0) { 2343 max_completions = rqpair->num_entries; 2344 } else { 2345 max_completions = spdk_min(max_completions, rqpair->num_entries); 2346 } 2347 2348 if (nvme_qpair_is_admin_queue(&rqpair->qpair)) { 2349 rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr); 2350 nvme_rdma_poll_events(rctrlr); 2351 } 2352 nvme_rdma_qpair_process_cm_event(rqpair); 2353 2354 if (spdk_unlikely(qpair->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE)) { 2355 nvme_rdma_fail_qpair(qpair, 0); 2356 return -ENXIO; 2357 } 2358 2359 cq = rqpair->cq; 2360 2361 rqpair->num_completions = 0; 2362 do { 2363 batch_size = spdk_min((max_completions - rqpair->num_completions), MAX_COMPLETIONS_PER_POLL); 2364 rc = nvme_rdma_cq_process_completions(cq, batch_size, NULL, rqpair); 2365 2366 if (rc == 0) { 2367 break; 2368 /* Handle the case where we fail to poll the cq. */ 2369 } else if (rc == -ECANCELED) { 2370 nvme_rdma_fail_qpair(qpair, 0); 2371 return -ENXIO; 2372 } else if (rc == -ENXIO) { 2373 return rc; 2374 } 2375 } while (rqpair->num_completions < max_completions); 2376 2377 if (spdk_unlikely(nvme_rdma_qpair_submit_sends(rqpair) || 2378 nvme_rdma_qpair_submit_recvs(rqpair))) { 2379 nvme_rdma_fail_qpair(qpair, 0); 2380 return -ENXIO; 2381 } 2382 2383 if (spdk_unlikely(rqpair->qpair.ctrlr->timeout_enabled)) { 2384 nvme_rdma_qpair_check_timeout(qpair); 2385 } 2386 2387 return rqpair->num_completions; 2388 } 2389 2390 static uint32_t 2391 nvme_rdma_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) 2392 { 2393 /* max_mr_size by ibv_query_device indicates the largest value that we can 2394 * set for a registered memory region. It is independent from the actual 2395 * I/O size and is very likely to be larger than 2 MiB which is the 2396 * granularity we currently register memory regions. Hence return 2397 * UINT32_MAX here and let the generic layer use the controller data to 2398 * moderate this value. 2399 */ 2400 return UINT32_MAX; 2401 } 2402 2403 static uint16_t 2404 nvme_rdma_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) 2405 { 2406 struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr); 2407 2408 return rctrlr->max_sge; 2409 } 2410 2411 static int 2412 nvme_rdma_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, 2413 int (*iter_fn)(struct nvme_request *req, void *arg), 2414 void *arg) 2415 { 2416 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2417 struct spdk_nvme_rdma_req *rdma_req, *tmp; 2418 int rc; 2419 2420 assert(iter_fn != NULL); 2421 2422 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) { 2423 assert(rdma_req->req != NULL); 2424 2425 rc = iter_fn(rdma_req->req, arg); 2426 if (rc != 0) { 2427 return rc; 2428 } 2429 } 2430 2431 return 0; 2432 } 2433 2434 static void 2435 nvme_rdma_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 2436 { 2437 struct spdk_nvme_rdma_req *rdma_req, *tmp; 2438 struct spdk_nvme_cpl cpl; 2439 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2440 2441 cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION; 2442 cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2443 2444 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->outstanding_reqs, link, tmp) { 2445 assert(rdma_req->req != NULL); 2446 2447 if (rdma_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 2448 continue; 2449 } 2450 2451 nvme_rdma_req_complete(rdma_req, &cpl); 2452 nvme_rdma_req_put(rqpair, rdma_req); 2453 } 2454 } 2455 2456 static int 2457 nvme_rdma_poller_create(struct nvme_rdma_poll_group *group, struct ibv_context *ctx) 2458 { 2459 struct nvme_rdma_poller *poller; 2460 2461 poller = calloc(1, sizeof(*poller)); 2462 if (poller == NULL) { 2463 SPDK_ERRLOG("Unable to allocate poller.\n"); 2464 return -ENOMEM; 2465 } 2466 2467 poller->device = ctx; 2468 poller->cq = ibv_create_cq(poller->device, DEFAULT_NVME_RDMA_CQ_SIZE, group, NULL, 0); 2469 2470 if (poller->cq == NULL) { 2471 free(poller); 2472 return -EINVAL; 2473 } 2474 2475 STAILQ_INSERT_HEAD(&group->pollers, poller, link); 2476 group->num_pollers++; 2477 poller->current_num_wc = DEFAULT_NVME_RDMA_CQ_SIZE; 2478 poller->required_num_wc = 0; 2479 return 0; 2480 } 2481 2482 static void 2483 nvme_rdma_poll_group_free_pollers(struct nvme_rdma_poll_group *group) 2484 { 2485 struct nvme_rdma_poller *poller, *tmp_poller; 2486 2487 STAILQ_FOREACH_SAFE(poller, &group->pollers, link, tmp_poller) { 2488 if (poller->cq) { 2489 ibv_destroy_cq(poller->cq); 2490 } 2491 STAILQ_REMOVE(&group->pollers, poller, nvme_rdma_poller, link); 2492 free(poller); 2493 } 2494 } 2495 2496 static struct spdk_nvme_transport_poll_group * 2497 nvme_rdma_poll_group_create(void) 2498 { 2499 struct nvme_rdma_poll_group *group; 2500 struct ibv_context **contexts; 2501 int i = 0; 2502 2503 group = calloc(1, sizeof(*group)); 2504 if (group == NULL) { 2505 SPDK_ERRLOG("Unable to allocate poll group.\n"); 2506 return NULL; 2507 } 2508 2509 STAILQ_INIT(&group->pollers); 2510 2511 contexts = rdma_get_devices(NULL); 2512 if (contexts == NULL) { 2513 SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); 2514 free(group); 2515 return NULL; 2516 } 2517 2518 while (contexts[i] != NULL) { 2519 if (nvme_rdma_poller_create(group, contexts[i])) { 2520 nvme_rdma_poll_group_free_pollers(group); 2521 free(group); 2522 rdma_free_devices(contexts); 2523 return NULL; 2524 } 2525 i++; 2526 } 2527 2528 rdma_free_devices(contexts); 2529 STAILQ_INIT(&group->destroyed_qpairs); 2530 return &group->group; 2531 } 2532 2533 struct nvme_rdma_qpair * 2534 nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, uint32_t qp_num) 2535 { 2536 struct spdk_nvme_qpair *qpair; 2537 struct nvme_rdma_destroyed_qpair *rqpair_tracker; 2538 struct nvme_rdma_qpair *rqpair; 2539 2540 STAILQ_FOREACH(qpair, &group->group.disconnected_qpairs, poll_group_stailq) { 2541 rqpair = nvme_rdma_qpair(qpair); 2542 if (rqpair->rdma_qp->qp->qp_num == qp_num) { 2543 return rqpair; 2544 } 2545 } 2546 2547 STAILQ_FOREACH(qpair, &group->group.connected_qpairs, poll_group_stailq) { 2548 rqpair = nvme_rdma_qpair(qpair); 2549 if (rqpair->rdma_qp->qp->qp_num == qp_num) { 2550 return rqpair; 2551 } 2552 } 2553 2554 STAILQ_FOREACH(rqpair_tracker, &group->destroyed_qpairs, link) { 2555 rqpair = rqpair_tracker->destroyed_qpair_tracker; 2556 if (rqpair->rdma_qp->qp->qp_num == qp_num) { 2557 return rqpair; 2558 } 2559 } 2560 2561 return NULL; 2562 } 2563 2564 static int 2565 nvme_rdma_resize_cq(struct nvme_rdma_qpair *rqpair, struct nvme_rdma_poller *poller) 2566 { 2567 int current_num_wc, required_num_wc; 2568 2569 required_num_wc = poller->required_num_wc + WC_PER_QPAIR(rqpair->num_entries); 2570 current_num_wc = poller->current_num_wc; 2571 if (current_num_wc < required_num_wc) { 2572 current_num_wc = spdk_max(current_num_wc * 2, required_num_wc); 2573 } 2574 2575 if (poller->current_num_wc != current_num_wc) { 2576 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Resize RDMA CQ from %d to %d\n", poller->current_num_wc, 2577 current_num_wc); 2578 if (ibv_resize_cq(poller->cq, current_num_wc)) { 2579 SPDK_ERRLOG("RDMA CQ resize failed: errno %d: %s\n", errno, spdk_strerror(errno)); 2580 return -1; 2581 } 2582 2583 poller->current_num_wc = current_num_wc; 2584 } 2585 2586 poller->required_num_wc = required_num_wc; 2587 return 0; 2588 } 2589 2590 static int 2591 nvme_rdma_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 2592 { 2593 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2594 struct nvme_rdma_poll_group *group = nvme_rdma_poll_group(qpair->poll_group); 2595 struct nvme_rdma_poller *poller; 2596 2597 assert(rqpair->cq == NULL); 2598 2599 STAILQ_FOREACH(poller, &group->pollers, link) { 2600 if (poller->device == rqpair->cm_id->verbs) { 2601 if (nvme_rdma_resize_cq(rqpair, poller)) { 2602 return -EPROTO; 2603 } 2604 rqpair->cq = poller->cq; 2605 break; 2606 } 2607 } 2608 2609 if (rqpair->cq == NULL) { 2610 SPDK_ERRLOG("Unable to find a cq for qpair %p on poll group %p\n", qpair, qpair->poll_group); 2611 return -EINVAL; 2612 } 2613 2614 return 0; 2615 } 2616 2617 static int 2618 nvme_rdma_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 2619 { 2620 struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); 2621 struct nvme_rdma_poll_group *group; 2622 struct nvme_rdma_destroyed_qpair *destroyed_qpair; 2623 enum nvme_qpair_state state; 2624 2625 if (rqpair->poll_group_disconnect_in_progress) { 2626 return -EINPROGRESS; 2627 } 2628 2629 rqpair->poll_group_disconnect_in_progress = true; 2630 state = nvme_qpair_get_state(qpair); 2631 group = nvme_rdma_poll_group(qpair->poll_group); 2632 rqpair->cq = NULL; 2633 2634 /* 2635 * We want to guard against an endless recursive loop while making 2636 * sure the qpair is disconnected before we disconnect it from the qpair. 2637 */ 2638 if (state > NVME_QPAIR_DISCONNECTING && state != NVME_QPAIR_DESTROYING) { 2639 nvme_ctrlr_disconnect_qpair(qpair); 2640 } 2641 2642 /* 2643 * If this fails, the system is in serious trouble, 2644 * just let the qpair get cleaned up immediately. 2645 */ 2646 destroyed_qpair = calloc(1, sizeof(*destroyed_qpair)); 2647 if (destroyed_qpair == NULL) { 2648 return 0; 2649 } 2650 2651 destroyed_qpair->destroyed_qpair_tracker = rqpair; 2652 destroyed_qpair->completed_cycles = 0; 2653 STAILQ_INSERT_TAIL(&group->destroyed_qpairs, destroyed_qpair, link); 2654 2655 rqpair->defer_deletion_to_pg = true; 2656 2657 rqpair->poll_group_disconnect_in_progress = false; 2658 return 0; 2659 } 2660 2661 static int 2662 nvme_rdma_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 2663 struct spdk_nvme_qpair *qpair) 2664 { 2665 return 0; 2666 } 2667 2668 static int 2669 nvme_rdma_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 2670 struct spdk_nvme_qpair *qpair) 2671 { 2672 if (qpair->poll_group_tailq_head == &tgroup->connected_qpairs) { 2673 return nvme_poll_group_disconnect_qpair(qpair); 2674 } 2675 2676 return 0; 2677 } 2678 2679 static void 2680 nvme_rdma_poll_group_delete_qpair(struct nvme_rdma_poll_group *group, 2681 struct nvme_rdma_destroyed_qpair *qpair_tracker) 2682 { 2683 struct nvme_rdma_qpair *rqpair = qpair_tracker->destroyed_qpair_tracker; 2684 2685 rqpair->defer_deletion_to_pg = false; 2686 if (nvme_qpair_get_state(&rqpair->qpair) == NVME_QPAIR_DESTROYING) { 2687 nvme_rdma_ctrlr_delete_io_qpair(rqpair->qpair.ctrlr, &rqpair->qpair); 2688 } 2689 STAILQ_REMOVE(&group->destroyed_qpairs, qpair_tracker, nvme_rdma_destroyed_qpair, link); 2690 free(qpair_tracker); 2691 } 2692 2693 static int64_t 2694 nvme_rdma_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 2695 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 2696 { 2697 struct spdk_nvme_qpair *qpair, *tmp_qpair; 2698 struct nvme_rdma_destroyed_qpair *qpair_tracker, *tmp_qpair_tracker; 2699 struct nvme_rdma_qpair *rqpair; 2700 struct nvme_rdma_poll_group *group; 2701 struct nvme_rdma_poller *poller; 2702 int num_qpairs = 0, batch_size, rc; 2703 int64_t total_completions = 0; 2704 uint64_t completions_allowed = 0; 2705 uint64_t completions_per_poller = 0; 2706 uint64_t poller_completions = 0; 2707 2708 2709 if (completions_per_qpair == 0) { 2710 completions_per_qpair = MAX_COMPLETIONS_PER_POLL; 2711 } 2712 2713 group = nvme_rdma_poll_group(tgroup); 2714 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 2715 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2716 } 2717 2718 STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { 2719 rqpair = nvme_rdma_qpair(qpair); 2720 rqpair->num_completions = 0; 2721 nvme_rdma_qpair_process_cm_event(rqpair); 2722 2723 if (spdk_unlikely(qpair->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE)) { 2724 nvme_rdma_fail_qpair(qpair, 0); 2725 disconnected_qpair_cb(qpair, tgroup->group->ctx); 2726 continue; 2727 } 2728 num_qpairs++; 2729 } 2730 2731 completions_allowed = completions_per_qpair * num_qpairs; 2732 completions_per_poller = spdk_max(completions_allowed / group->num_pollers, 1); 2733 2734 STAILQ_FOREACH(poller, &group->pollers, link) { 2735 poller_completions = 0; 2736 do { 2737 batch_size = spdk_min((completions_per_poller - poller_completions), MAX_COMPLETIONS_PER_POLL); 2738 rc = nvme_rdma_cq_process_completions(poller->cq, batch_size, group, NULL); 2739 if (rc <= 0) { 2740 if (rc == -ECANCELED) { 2741 return -EIO; 2742 } 2743 break; 2744 } 2745 2746 poller_completions += rc; 2747 } while (poller_completions < completions_per_poller); 2748 total_completions += poller_completions; 2749 } 2750 2751 STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { 2752 rqpair = nvme_rdma_qpair(qpair); 2753 if (spdk_unlikely(qpair->ctrlr->timeout_enabled)) { 2754 nvme_rdma_qpair_check_timeout(qpair); 2755 } 2756 2757 nvme_rdma_qpair_submit_sends(rqpair); 2758 nvme_rdma_qpair_submit_recvs(rqpair); 2759 nvme_qpair_resubmit_requests(&rqpair->qpair, rqpair->num_completions); 2760 } 2761 2762 /* 2763 * Once a qpair is disconnected, we can still get flushed completions for those disconnected qpairs. 2764 * For most pieces of hardware, those requests will complete immediately. However, there are certain 2765 * cases where flushed requests will linger. Default is to destroy qpair after all completions are freed, 2766 * but have a fallback for other cases where we don't get all of our completions back. 2767 */ 2768 STAILQ_FOREACH_SAFE(qpair_tracker, &group->destroyed_qpairs, link, tmp_qpair_tracker) { 2769 qpair_tracker->completed_cycles++; 2770 rqpair = qpair_tracker->destroyed_qpair_tracker; 2771 if ((rqpair->current_num_sends == 0 && rqpair->current_num_recvs == 0) || 2772 qpair_tracker->completed_cycles > NVME_RDMA_DESTROYED_QPAIR_EXPIRATION_CYCLES) { 2773 nvme_rdma_poll_group_delete_qpair(group, qpair_tracker); 2774 } 2775 } 2776 2777 return total_completions; 2778 } 2779 2780 static int 2781 nvme_rdma_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 2782 { 2783 struct nvme_rdma_poll_group *group = nvme_rdma_poll_group(tgroup); 2784 struct nvme_rdma_destroyed_qpair *qpair_tracker, *tmp_qpair_tracker; 2785 struct nvme_rdma_qpair *rqpair; 2786 2787 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 2788 return -EBUSY; 2789 } 2790 2791 STAILQ_FOREACH_SAFE(qpair_tracker, &group->destroyed_qpairs, link, tmp_qpair_tracker) { 2792 rqpair = qpair_tracker->destroyed_qpair_tracker; 2793 if (nvme_qpair_get_state(&rqpair->qpair) == NVME_QPAIR_DESTROYING) { 2794 rqpair->defer_deletion_to_pg = false; 2795 nvme_rdma_ctrlr_delete_io_qpair(rqpair->qpair.ctrlr, &rqpair->qpair); 2796 } 2797 2798 STAILQ_REMOVE(&group->destroyed_qpairs, qpair_tracker, nvme_rdma_destroyed_qpair, link); 2799 free(qpair_tracker); 2800 } 2801 2802 nvme_rdma_poll_group_free_pollers(group); 2803 free(group); 2804 2805 return 0; 2806 } 2807 2808 void 2809 spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks) 2810 { 2811 g_nvme_hooks = *hooks; 2812 } 2813 2814 const struct spdk_nvme_transport_ops rdma_ops = { 2815 .name = "RDMA", 2816 .type = SPDK_NVME_TRANSPORT_RDMA, 2817 .ctrlr_construct = nvme_rdma_ctrlr_construct, 2818 .ctrlr_scan = nvme_fabric_ctrlr_scan, 2819 .ctrlr_destruct = nvme_rdma_ctrlr_destruct, 2820 .ctrlr_enable = nvme_rdma_ctrlr_enable, 2821 2822 .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4, 2823 .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8, 2824 .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4, 2825 .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8, 2826 2827 .ctrlr_get_max_xfer_size = nvme_rdma_ctrlr_get_max_xfer_size, 2828 .ctrlr_get_max_sges = nvme_rdma_ctrlr_get_max_sges, 2829 2830 .ctrlr_create_io_qpair = nvme_rdma_ctrlr_create_io_qpair, 2831 .ctrlr_delete_io_qpair = nvme_rdma_ctrlr_delete_io_qpair, 2832 .ctrlr_connect_qpair = nvme_rdma_ctrlr_connect_qpair, 2833 .ctrlr_disconnect_qpair = nvme_rdma_ctrlr_disconnect_qpair, 2834 2835 .qpair_abort_reqs = nvme_rdma_qpair_abort_reqs, 2836 .qpair_reset = nvme_rdma_qpair_reset, 2837 .qpair_submit_request = nvme_rdma_qpair_submit_request, 2838 .qpair_process_completions = nvme_rdma_qpair_process_completions, 2839 .qpair_iterate_requests = nvme_rdma_qpair_iterate_requests, 2840 .admin_qpair_abort_aers = nvme_rdma_admin_qpair_abort_aers, 2841 2842 .poll_group_create = nvme_rdma_poll_group_create, 2843 .poll_group_connect_qpair = nvme_rdma_poll_group_connect_qpair, 2844 .poll_group_disconnect_qpair = nvme_rdma_poll_group_disconnect_qpair, 2845 .poll_group_add = nvme_rdma_poll_group_add, 2846 .poll_group_remove = nvme_rdma_poll_group_remove, 2847 .poll_group_process_completions = nvme_rdma_poll_group_process_completions, 2848 .poll_group_destroy = nvme_rdma_poll_group_destroy, 2849 2850 }; 2851 2852 SPDK_NVME_TRANSPORT_REGISTER(rdma, &rdma_ops); 2853