1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2018 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include <infiniband/verbs.h> 37 #include <rdma/rdma_cma.h> 38 #include <rdma/rdma_verbs.h> 39 40 #include "nvmf_internal.h" 41 #include "transport.h" 42 43 #include "spdk/config.h" 44 #include "spdk/assert.h" 45 #include "spdk/thread.h" 46 #include "spdk/nvmf.h" 47 #include "spdk/nvmf_spec.h" 48 #include "spdk/string.h" 49 #include "spdk/trace.h" 50 #include "spdk/util.h" 51 52 #include "spdk_internal/log.h" 53 54 struct spdk_nvme_rdma_hooks g_nvmf_hooks = {}; 55 56 /* 57 RDMA Connection Resource Defaults 58 */ 59 #define NVMF_DEFAULT_TX_SGE SPDK_NVMF_MAX_SGL_ENTRIES 60 #define NVMF_DEFAULT_RSP_SGE 1 61 #define NVMF_DEFAULT_RX_SGE 2 62 63 /* The RDMA completion queue size */ 64 #define DEFAULT_NVMF_RDMA_CQ_SIZE 4096 65 #define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2) 66 67 /* Timeout for destroying defunct rqpairs */ 68 #define NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US 4000000 69 70 enum spdk_nvmf_rdma_request_state { 71 /* The request is not currently in use */ 72 RDMA_REQUEST_STATE_FREE = 0, 73 74 /* Initial state when request first received */ 75 RDMA_REQUEST_STATE_NEW, 76 77 /* The request is queued until a data buffer is available. */ 78 RDMA_REQUEST_STATE_NEED_BUFFER, 79 80 /* The request is waiting on RDMA queue depth availability 81 * to transfer data from the host to the controller. 82 */ 83 RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 84 85 /* The request is currently transferring data from the host to the controller. */ 86 RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 87 88 /* The request is ready to execute at the block device */ 89 RDMA_REQUEST_STATE_READY_TO_EXECUTE, 90 91 /* The request is currently executing at the block device */ 92 RDMA_REQUEST_STATE_EXECUTING, 93 94 /* The request finished executing at the block device */ 95 RDMA_REQUEST_STATE_EXECUTED, 96 97 /* The request is waiting on RDMA queue depth availability 98 * to transfer data from the controller to the host. 99 */ 100 RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 101 102 /* The request is ready to send a completion */ 103 RDMA_REQUEST_STATE_READY_TO_COMPLETE, 104 105 /* The request is currently transferring data from the controller to the host. */ 106 RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 107 108 /* The request currently has an outstanding completion without an 109 * associated data transfer. 110 */ 111 RDMA_REQUEST_STATE_COMPLETING, 112 113 /* The request completed and can be marked free. */ 114 RDMA_REQUEST_STATE_COMPLETED, 115 116 /* Terminator */ 117 RDMA_REQUEST_NUM_STATES, 118 }; 119 120 #define OBJECT_NVMF_RDMA_IO 0x40 121 122 #define TRACE_GROUP_NVMF_RDMA 0x4 123 #define TRACE_RDMA_REQUEST_STATE_NEW SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x0) 124 #define TRACE_RDMA_REQUEST_STATE_NEED_BUFFER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x1) 125 #define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x2) 126 #define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x3) 127 #define TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x4) 128 #define TRACE_RDMA_REQUEST_STATE_EXECUTING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x5) 129 #define TRACE_RDMA_REQUEST_STATE_EXECUTED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x6) 130 #define TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x7) 131 #define TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x8) 132 #define TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x9) 133 #define TRACE_RDMA_REQUEST_STATE_COMPLETING SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xA) 134 #define TRACE_RDMA_REQUEST_STATE_COMPLETED SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xB) 135 #define TRACE_RDMA_QP_CREATE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xC) 136 #define TRACE_RDMA_IBV_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xD) 137 #define TRACE_RDMA_CM_ASYNC_EVENT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xE) 138 #define TRACE_RDMA_QP_STATE_CHANGE SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0xF) 139 #define TRACE_RDMA_QP_DISCONNECT SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x10) 140 #define TRACE_RDMA_QP_DESTROY SPDK_TPOINT_ID(TRACE_GROUP_NVMF_RDMA, 0x11) 141 142 SPDK_TRACE_REGISTER_FN(nvmf_trace, "nvmf_rdma", TRACE_GROUP_NVMF_RDMA) 143 { 144 spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r'); 145 spdk_trace_register_description("RDMA_REQ_NEW", "", 146 TRACE_RDMA_REQUEST_STATE_NEW, 147 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1, 1, "cmid: "); 148 spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", "", 149 TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 150 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 151 spdk_trace_register_description("RDMA_REQ_TX_PENDING_C_TO_H", "", 152 TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 153 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 154 spdk_trace_register_description("RDMA_REQ_TX_PENDING_H_TO_C", "", 155 TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 156 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 157 spdk_trace_register_description("RDMA_REQ_TX_H_TO_C", "", 158 TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 159 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 160 spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE", "", 161 TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 162 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 163 spdk_trace_register_description("RDMA_REQ_EXECUTING", "", 164 TRACE_RDMA_REQUEST_STATE_EXECUTING, 165 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 166 spdk_trace_register_description("RDMA_REQ_EXECUTED", "", 167 TRACE_RDMA_REQUEST_STATE_EXECUTED, 168 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 169 spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPLETE", "", 170 TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 171 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 172 spdk_trace_register_description("RDMA_REQ_COMPLETING_CONTROLLER_TO_HOST", "", 173 TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 174 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 175 spdk_trace_register_description("RDMA_REQ_COMPLETING_INCAPSULE", "", 176 TRACE_RDMA_REQUEST_STATE_COMPLETING, 177 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 178 spdk_trace_register_description("RDMA_REQ_COMPLETED", "", 179 TRACE_RDMA_REQUEST_STATE_COMPLETED, 180 OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0, 1, "cmid: "); 181 182 spdk_trace_register_description("RDMA_QP_CREATE", "", TRACE_RDMA_QP_CREATE, 183 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 184 spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", "", TRACE_RDMA_IBV_ASYNC_EVENT, 185 OWNER_NONE, OBJECT_NONE, 0, 0, "type: "); 186 spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", "", TRACE_RDMA_CM_ASYNC_EVENT, 187 OWNER_NONE, OBJECT_NONE, 0, 0, "type: "); 188 spdk_trace_register_description("RDMA_QP_STATE_CHANGE", "", TRACE_RDMA_QP_STATE_CHANGE, 189 OWNER_NONE, OBJECT_NONE, 0, 1, "state: "); 190 spdk_trace_register_description("RDMA_QP_DISCONNECT", "", TRACE_RDMA_QP_DISCONNECT, 191 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 192 spdk_trace_register_description("RDMA_QP_DESTROY", "", TRACE_RDMA_QP_DESTROY, 193 OWNER_NONE, OBJECT_NONE, 0, 0, ""); 194 } 195 196 enum spdk_nvmf_rdma_wr_type { 197 RDMA_WR_TYPE_RECV, 198 RDMA_WR_TYPE_SEND, 199 RDMA_WR_TYPE_DATA, 200 }; 201 202 struct spdk_nvmf_rdma_wr { 203 enum spdk_nvmf_rdma_wr_type type; 204 }; 205 206 /* This structure holds commands as they are received off the wire. 207 * It must be dynamically paired with a full request object 208 * (spdk_nvmf_rdma_request) to service a request. It is separate 209 * from the request because RDMA does not appear to order 210 * completions, so occasionally we'll get a new incoming 211 * command when there aren't any free request objects. 212 */ 213 struct spdk_nvmf_rdma_recv { 214 struct ibv_recv_wr wr; 215 struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE]; 216 217 struct spdk_nvmf_rdma_qpair *qpair; 218 219 /* In-capsule data buffer */ 220 uint8_t *buf; 221 222 struct spdk_nvmf_rdma_wr rdma_wr; 223 224 STAILQ_ENTRY(spdk_nvmf_rdma_recv) link; 225 }; 226 227 struct spdk_nvmf_rdma_request_data { 228 struct spdk_nvmf_rdma_wr rdma_wr; 229 struct ibv_send_wr wr; 230 struct ibv_sge sgl[SPDK_NVMF_MAX_SGL_ENTRIES]; 231 void *buffers[SPDK_NVMF_MAX_SGL_ENTRIES]; 232 }; 233 234 struct spdk_nvmf_rdma_request { 235 struct spdk_nvmf_request req; 236 bool data_from_pool; 237 238 enum spdk_nvmf_rdma_request_state state; 239 240 struct spdk_nvmf_rdma_recv *recv; 241 242 struct { 243 struct spdk_nvmf_rdma_wr rdma_wr; 244 struct ibv_send_wr wr; 245 struct ibv_sge sgl[NVMF_DEFAULT_RSP_SGE]; 246 } rsp; 247 248 struct spdk_nvmf_rdma_request_data data; 249 250 uint32_t num_outstanding_data_wr; 251 252 TAILQ_ENTRY(spdk_nvmf_rdma_request) link; 253 STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link; 254 }; 255 256 enum spdk_nvmf_rdma_qpair_disconnect_flags { 257 RDMA_QP_DISCONNECTING = 1, 258 RDMA_QP_RECV_DRAINED = 1 << 1, 259 RDMA_QP_SEND_DRAINED = 1 << 2 260 }; 261 262 struct spdk_nvmf_rdma_qpair { 263 struct spdk_nvmf_qpair qpair; 264 265 struct spdk_nvmf_rdma_port *port; 266 struct spdk_nvmf_rdma_poller *poller; 267 268 struct rdma_cm_id *cm_id; 269 struct rdma_cm_id *listen_id; 270 271 /* The maximum number of I/O outstanding on this connection at one time */ 272 uint16_t max_queue_depth; 273 274 /* The maximum number of active RDMA READ and ATOMIC operations at one time */ 275 uint16_t max_read_depth; 276 277 /* The maximum number of RDMA SEND operations at one time */ 278 uint32_t max_send_depth; 279 280 /* The current number of outstanding WRs from this qpair's 281 * recv queue. Should not exceed device->attr.max_queue_depth. 282 */ 283 uint16_t current_recv_depth; 284 285 /* The current number of posted WRs from this qpair's 286 * send queue. Should not exceed max_send_depth. 287 */ 288 uint32_t current_send_depth; 289 290 /* The current number of active RDMA READ operations */ 291 uint16_t current_read_depth; 292 293 /* The maximum number of SGEs per WR on the send queue */ 294 uint32_t max_send_sge; 295 296 /* The maximum number of SGEs per WR on the recv queue */ 297 uint32_t max_recv_sge; 298 299 /* Receives that are waiting for a request object */ 300 STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue; 301 302 /* Queues to track requests in critical states */ 303 STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue; 304 305 STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue; 306 307 STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_write_queue; 308 309 /* Number of requests not in the free state */ 310 uint32_t qd; 311 312 /* Array of size "max_queue_depth" containing RDMA requests. */ 313 struct spdk_nvmf_rdma_request *reqs; 314 315 /* Array of size "max_queue_depth" containing RDMA recvs. */ 316 struct spdk_nvmf_rdma_recv *recvs; 317 318 /* Array of size "max_queue_depth" containing 64 byte capsules 319 * used for receive. 320 */ 321 union nvmf_h2c_msg *cmds; 322 struct ibv_mr *cmds_mr; 323 324 /* Array of size "max_queue_depth" containing 16 byte completions 325 * to be sent back to the user. 326 */ 327 union nvmf_c2h_msg *cpls; 328 struct ibv_mr *cpls_mr; 329 330 /* Array of size "max_queue_depth * InCapsuleDataSize" containing 331 * buffers to be used for in capsule data. 332 */ 333 void *bufs; 334 struct ibv_mr *bufs_mr; 335 336 TAILQ_ENTRY(spdk_nvmf_rdma_qpair) link; 337 338 /* IBV queue pair attributes: they are used to manage 339 * qp state and recover from errors. 340 */ 341 struct ibv_qp_attr ibv_attr; 342 343 uint32_t disconnect_flags; 344 345 /* Poller registered in case the qpair doesn't properly 346 * complete the qpair destruct process and becomes defunct. 347 */ 348 349 struct spdk_poller *destruct_poller; 350 351 /* There are several ways a disconnect can start on a qpair 352 * and they are not all mutually exclusive. It is important 353 * that we only initialize one of these paths. 354 */ 355 bool disconnect_started; 356 }; 357 358 struct spdk_nvmf_rdma_poller { 359 struct spdk_nvmf_rdma_device *device; 360 struct spdk_nvmf_rdma_poll_group *group; 361 362 int num_cqe; 363 int required_num_wr; 364 struct ibv_cq *cq; 365 366 TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs; 367 368 TAILQ_ENTRY(spdk_nvmf_rdma_poller) link; 369 }; 370 371 struct spdk_nvmf_rdma_poll_group { 372 struct spdk_nvmf_transport_poll_group group; 373 374 /* Requests that are waiting to obtain a data buffer */ 375 TAILQ_HEAD(, spdk_nvmf_rdma_request) pending_data_buf_queue; 376 377 TAILQ_HEAD(, spdk_nvmf_rdma_poller) pollers; 378 }; 379 380 /* Assuming rdma_cm uses just one protection domain per ibv_context. */ 381 struct spdk_nvmf_rdma_device { 382 struct ibv_device_attr attr; 383 struct ibv_context *context; 384 385 struct spdk_mem_map *map; 386 struct ibv_pd *pd; 387 388 TAILQ_ENTRY(spdk_nvmf_rdma_device) link; 389 }; 390 391 struct spdk_nvmf_rdma_port { 392 struct spdk_nvme_transport_id trid; 393 struct rdma_cm_id *id; 394 struct spdk_nvmf_rdma_device *device; 395 uint32_t ref; 396 TAILQ_ENTRY(spdk_nvmf_rdma_port) link; 397 }; 398 399 struct spdk_nvmf_rdma_transport { 400 struct spdk_nvmf_transport transport; 401 402 struct rdma_event_channel *event_channel; 403 404 struct spdk_mempool *data_wr_pool; 405 406 pthread_mutex_t lock; 407 408 /* fields used to poll RDMA/IB events */ 409 nfds_t npoll_fds; 410 struct pollfd *poll_fds; 411 412 TAILQ_HEAD(, spdk_nvmf_rdma_device) devices; 413 TAILQ_HEAD(, spdk_nvmf_rdma_port) ports; 414 }; 415 416 static inline int 417 spdk_nvmf_rdma_check_ibv_state(enum ibv_qp_state state) 418 { 419 switch (state) { 420 case IBV_QPS_RESET: 421 case IBV_QPS_INIT: 422 case IBV_QPS_RTR: 423 case IBV_QPS_RTS: 424 case IBV_QPS_SQD: 425 case IBV_QPS_SQE: 426 case IBV_QPS_ERR: 427 return 0; 428 default: 429 return -1; 430 } 431 } 432 433 static enum ibv_qp_state 434 spdk_nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) { 435 enum ibv_qp_state old_state, new_state; 436 struct ibv_qp_init_attr init_attr; 437 int rc; 438 439 /* All the attributes needed for recovery */ 440 static int spdk_nvmf_ibv_attr_mask = 441 IBV_QP_STATE | 442 IBV_QP_PKEY_INDEX | 443 IBV_QP_PORT | 444 IBV_QP_ACCESS_FLAGS | 445 IBV_QP_AV | 446 IBV_QP_PATH_MTU | 447 IBV_QP_DEST_QPN | 448 IBV_QP_RQ_PSN | 449 IBV_QP_MAX_DEST_RD_ATOMIC | 450 IBV_QP_MIN_RNR_TIMER | 451 IBV_QP_SQ_PSN | 452 IBV_QP_TIMEOUT | 453 IBV_QP_RETRY_CNT | 454 IBV_QP_RNR_RETRY | 455 IBV_QP_MAX_QP_RD_ATOMIC; 456 457 old_state = rqpair->ibv_attr.qp_state; 458 rc = ibv_query_qp(rqpair->cm_id->qp, &rqpair->ibv_attr, 459 spdk_nvmf_ibv_attr_mask, &init_attr); 460 461 if (rc) 462 { 463 SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n"); 464 assert(false); 465 } 466 467 new_state = rqpair->ibv_attr.qp_state; 468 469 rc = spdk_nvmf_rdma_check_ibv_state(new_state); 470 if (rc) 471 { 472 SPDK_ERRLOG("QP#%d: bad state updated: %u, maybe hardware issue\n", rqpair->qpair.qid, new_state); 473 /* 474 * IBV_QPS_UNKNOWN undefined if lib version smaller than libibverbs-1.1.8 475 * IBV_QPS_UNKNOWN is the enum element after IBV_QPS_ERR 476 */ 477 return IBV_QPS_ERR + 1; 478 } 479 480 if (old_state != new_state) 481 { 482 spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0, 483 (uintptr_t)rqpair->cm_id, new_state); 484 } 485 return new_state; 486 } 487 488 static const char *str_ibv_qp_state[] = { 489 "IBV_QPS_RESET", 490 "IBV_QPS_INIT", 491 "IBV_QPS_RTR", 492 "IBV_QPS_RTS", 493 "IBV_QPS_SQD", 494 "IBV_QPS_SQE", 495 "IBV_QPS_ERR", 496 "IBV_QPS_UNKNOWN" 497 }; 498 499 static int 500 spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair, 501 enum ibv_qp_state new_state) 502 { 503 int rc; 504 enum ibv_qp_state state; 505 static int attr_mask_rc[] = { 506 [IBV_QPS_RESET] = IBV_QP_STATE, 507 [IBV_QPS_INIT] = (IBV_QP_STATE | 508 IBV_QP_PKEY_INDEX | 509 IBV_QP_PORT | 510 IBV_QP_ACCESS_FLAGS), 511 [IBV_QPS_RTR] = (IBV_QP_STATE | 512 IBV_QP_AV | 513 IBV_QP_PATH_MTU | 514 IBV_QP_DEST_QPN | 515 IBV_QP_RQ_PSN | 516 IBV_QP_MAX_DEST_RD_ATOMIC | 517 IBV_QP_MIN_RNR_TIMER), 518 [IBV_QPS_RTS] = (IBV_QP_STATE | 519 IBV_QP_SQ_PSN | 520 IBV_QP_TIMEOUT | 521 IBV_QP_RETRY_CNT | 522 IBV_QP_RNR_RETRY | 523 IBV_QP_MAX_QP_RD_ATOMIC), 524 [IBV_QPS_SQD] = IBV_QP_STATE, 525 [IBV_QPS_SQE] = IBV_QP_STATE, 526 [IBV_QPS_ERR] = IBV_QP_STATE, 527 }; 528 529 rc = spdk_nvmf_rdma_check_ibv_state(new_state); 530 if (rc) { 531 SPDK_ERRLOG("QP#%d: bad state requested: %u\n", 532 rqpair->qpair.qid, new_state); 533 return rc; 534 } 535 536 rqpair->ibv_attr.cur_qp_state = rqpair->ibv_attr.qp_state; 537 rqpair->ibv_attr.qp_state = new_state; 538 rqpair->ibv_attr.ah_attr.port_num = rqpair->ibv_attr.port_num; 539 540 rc = ibv_modify_qp(rqpair->cm_id->qp, &rqpair->ibv_attr, 541 attr_mask_rc[new_state]); 542 543 if (rc) { 544 SPDK_ERRLOG("QP#%d: failed to set state to: %s, %d (%s)\n", 545 rqpair->qpair.qid, str_ibv_qp_state[new_state], errno, strerror(errno)); 546 return rc; 547 } 548 549 state = spdk_nvmf_rdma_update_ibv_state(rqpair); 550 551 if (state != new_state) { 552 SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n", 553 rqpair->qpair.qid, str_ibv_qp_state[new_state], 554 str_ibv_qp_state[state]); 555 return -1; 556 } 557 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "IBV QP#%u changed to: %s\n", rqpair->qpair.qid, 558 str_ibv_qp_state[state]); 559 return 0; 560 } 561 562 static void 563 nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req) 564 { 565 SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", req->data_from_pool); 566 if (req->req.cmd) { 567 SPDK_ERRLOG("\t\tRequest opcode: %d\n", req->req.cmd->nvmf_cmd.opcode); 568 } 569 if (req->recv) { 570 SPDK_ERRLOG("\t\tRequest recv wr_id%lu\n", req->recv->wr.wr_id); 571 } 572 } 573 574 static void 575 nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair) 576 { 577 int i; 578 579 SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid); 580 for (i = 0; i < rqpair->max_queue_depth; i++) { 581 if (rqpair->reqs[i].state != RDMA_REQUEST_STATE_FREE) { 582 nvmf_rdma_dump_request(&rqpair->reqs[i]); 583 } 584 } 585 } 586 587 static void 588 spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair) 589 { 590 spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0); 591 592 spdk_poller_unregister(&rqpair->destruct_poller); 593 594 if (rqpair->qd != 0) { 595 nvmf_rdma_dump_qpair_contents(rqpair); 596 SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd); 597 } 598 599 if (rqpair->poller) { 600 TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link); 601 } 602 603 if (rqpair->cmds_mr) { 604 ibv_dereg_mr(rqpair->cmds_mr); 605 } 606 607 if (rqpair->cpls_mr) { 608 ibv_dereg_mr(rqpair->cpls_mr); 609 } 610 611 if (rqpair->bufs_mr) { 612 ibv_dereg_mr(rqpair->bufs_mr); 613 } 614 615 if (rqpair->cm_id) { 616 rdma_destroy_qp(rqpair->cm_id); 617 rdma_destroy_id(rqpair->cm_id); 618 619 if (rqpair->poller) { 620 rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth); 621 } 622 } 623 624 /* Free all memory */ 625 spdk_dma_free(rqpair->cmds); 626 spdk_dma_free(rqpair->cpls); 627 spdk_dma_free(rqpair->bufs); 628 free(rqpair->reqs); 629 free(rqpair->recvs); 630 free(rqpair); 631 } 632 633 static int 634 spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair) 635 { 636 struct spdk_nvmf_rdma_transport *rtransport; 637 struct spdk_nvmf_rdma_qpair *rqpair; 638 struct spdk_nvmf_rdma_poller *rpoller; 639 int rc, i, num_cqe, required_num_wr;; 640 struct spdk_nvmf_rdma_recv *rdma_recv; 641 struct spdk_nvmf_rdma_request *rdma_req; 642 struct spdk_nvmf_transport *transport; 643 struct spdk_nvmf_rdma_device *device; 644 struct ibv_qp_init_attr ibv_init_attr; 645 646 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 647 rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport); 648 transport = &rtransport->transport; 649 device = rqpair->port->device; 650 651 memset(&ibv_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 652 ibv_init_attr.qp_context = rqpair; 653 ibv_init_attr.qp_type = IBV_QPT_RC; 654 ibv_init_attr.send_cq = rqpair->poller->cq; 655 ibv_init_attr.recv_cq = rqpair->poller->cq; 656 ibv_init_attr.cap.max_send_wr = rqpair->max_queue_depth * 657 2 + 1; /* SEND, READ, and WRITE operations + dummy drain WR */ 658 ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth + 659 1; /* RECV operations + dummy drain WR */ 660 ibv_init_attr.cap.max_send_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_TX_SGE); 661 ibv_init_attr.cap.max_recv_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE); 662 663 /* Enlarge CQ size dynamically */ 664 rpoller = rqpair->poller; 665 required_num_wr = rpoller->required_num_wr + MAX_WR_PER_QP(rqpair->max_queue_depth); 666 num_cqe = rpoller->num_cqe; 667 if (num_cqe < required_num_wr) { 668 num_cqe = spdk_max(num_cqe * 2, required_num_wr); 669 num_cqe = spdk_min(num_cqe, device->attr.max_cqe); 670 } 671 672 if (rpoller->num_cqe != num_cqe) { 673 if (required_num_wr > device->attr.max_cqe) { 674 SPDK_ERRLOG("RDMA CQE requirement (%d) exceeds device max_cqe limitation (%d)\n", 675 required_num_wr, device->attr.max_cqe); 676 rdma_destroy_id(rqpair->cm_id); 677 rqpair->cm_id = NULL; 678 spdk_nvmf_rdma_qpair_destroy(rqpair); 679 return -1; 680 } 681 682 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Resize RDMA CQ from %d to %d\n", rpoller->num_cqe, num_cqe); 683 rc = ibv_resize_cq(rpoller->cq, num_cqe); 684 if (rc) { 685 SPDK_ERRLOG("RDMA CQ resize failed: errno %d: %s\n", errno, spdk_strerror(errno)); 686 rdma_destroy_id(rqpair->cm_id); 687 rqpair->cm_id = NULL; 688 spdk_nvmf_rdma_qpair_destroy(rqpair); 689 return -1; 690 } 691 692 rpoller->num_cqe = num_cqe; 693 } 694 695 rc = rdma_create_qp(rqpair->cm_id, rqpair->port->device->pd, &ibv_init_attr); 696 if (rc) { 697 SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno)); 698 rdma_destroy_id(rqpair->cm_id); 699 rqpair->cm_id = NULL; 700 spdk_nvmf_rdma_qpair_destroy(rqpair); 701 return -1; 702 } 703 704 rpoller->required_num_wr = required_num_wr; 705 706 rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2 + 1), 707 ibv_init_attr.cap.max_send_wr); 708 rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, ibv_init_attr.cap.max_send_sge); 709 rqpair->max_recv_sge = spdk_min(NVMF_DEFAULT_RX_SGE, ibv_init_attr.cap.max_recv_sge); 710 spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0); 711 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair); 712 713 rqpair->reqs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->reqs)); 714 rqpair->recvs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->recvs)); 715 rqpair->cmds = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cmds), 716 0x1000, NULL); 717 rqpair->cpls = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cpls), 718 0x1000, NULL); 719 720 721 if (transport->opts.in_capsule_data_size > 0) { 722 rqpair->bufs = spdk_dma_zmalloc(rqpair->max_queue_depth * 723 transport->opts.in_capsule_data_size, 724 0x1000, NULL); 725 } 726 727 if (!rqpair->reqs || !rqpair->recvs || !rqpair->cmds || 728 !rqpair->cpls || (transport->opts.in_capsule_data_size && !rqpair->bufs)) { 729 SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n"); 730 spdk_nvmf_rdma_qpair_destroy(rqpair); 731 return -1; 732 } 733 734 rqpair->cmds_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cmds, 735 rqpair->max_queue_depth * sizeof(*rqpair->cmds), 736 IBV_ACCESS_LOCAL_WRITE); 737 rqpair->cpls_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->cpls, 738 rqpair->max_queue_depth * sizeof(*rqpair->cpls), 739 0); 740 741 if (transport->opts.in_capsule_data_size) { 742 rqpair->bufs_mr = ibv_reg_mr(rqpair->cm_id->pd, rqpair->bufs, 743 rqpair->max_queue_depth * 744 transport->opts.in_capsule_data_size, 745 IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 746 } 747 748 if (!rqpair->cmds_mr || !rqpair->cpls_mr || (transport->opts.in_capsule_data_size && 749 !rqpair->bufs_mr)) { 750 SPDK_ERRLOG("Unable to register required memory for RDMA queue.\n"); 751 spdk_nvmf_rdma_qpair_destroy(rqpair); 752 return -1; 753 } 754 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n", 755 rqpair->cmds, rqpair->max_queue_depth * sizeof(*rqpair->cmds), rqpair->cmds_mr->lkey); 756 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n", 757 rqpair->cpls, rqpair->max_queue_depth * sizeof(*rqpair->cpls), rqpair->cpls_mr->lkey); 758 if (rqpair->bufs && rqpair->bufs_mr) { 759 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n", 760 rqpair->bufs, rqpair->max_queue_depth * 761 transport->opts.in_capsule_data_size, rqpair->bufs_mr->lkey); 762 } 763 764 STAILQ_INIT(&rqpair->free_queue); 765 STAILQ_INIT(&rqpair->pending_rdma_read_queue); 766 STAILQ_INIT(&rqpair->pending_rdma_write_queue); 767 768 rqpair->current_recv_depth = rqpair->max_queue_depth; 769 for (i = 0; i < rqpair->max_queue_depth; i++) { 770 struct ibv_recv_wr *bad_wr = NULL; 771 772 rdma_recv = &rqpair->recvs[i]; 773 rdma_recv->qpair = rqpair; 774 775 /* Set up memory to receive commands */ 776 if (rqpair->bufs) { 777 rdma_recv->buf = (void *)((uintptr_t)rqpair->bufs + (i * 778 transport->opts.in_capsule_data_size)); 779 } 780 781 rdma_recv->rdma_wr.type = RDMA_WR_TYPE_RECV; 782 783 rdma_recv->sgl[0].addr = (uintptr_t)&rqpair->cmds[i]; 784 rdma_recv->sgl[0].length = sizeof(rqpair->cmds[i]); 785 rdma_recv->sgl[0].lkey = rqpair->cmds_mr->lkey; 786 rdma_recv->wr.num_sge = 1; 787 788 if (rdma_recv->buf && rqpair->bufs_mr) { 789 rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf; 790 rdma_recv->sgl[1].length = transport->opts.in_capsule_data_size; 791 rdma_recv->sgl[1].lkey = rqpair->bufs_mr->lkey; 792 rdma_recv->wr.num_sge++; 793 } 794 795 rdma_recv->wr.wr_id = (uintptr_t)&rdma_recv->rdma_wr; 796 rdma_recv->wr.sg_list = rdma_recv->sgl; 797 798 rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_recv->wr, &bad_wr); 799 assert(rqpair->current_recv_depth > 0); 800 rqpair->current_recv_depth--; 801 if (rc) { 802 SPDK_ERRLOG("Unable to post capsule for RDMA RECV\n"); 803 spdk_nvmf_rdma_qpair_destroy(rqpair); 804 return -1; 805 } 806 } 807 assert(rqpair->current_recv_depth == 0); 808 809 for (i = 0; i < rqpair->max_queue_depth; i++) { 810 rdma_req = &rqpair->reqs[i]; 811 812 rdma_req->req.qpair = &rqpair->qpair; 813 rdma_req->req.cmd = NULL; 814 815 /* Set up memory to send responses */ 816 rdma_req->req.rsp = &rqpair->cpls[i]; 817 818 rdma_req->rsp.sgl[0].addr = (uintptr_t)&rqpair->cpls[i]; 819 rdma_req->rsp.sgl[0].length = sizeof(rqpair->cpls[i]); 820 rdma_req->rsp.sgl[0].lkey = rqpair->cpls_mr->lkey; 821 822 rdma_req->rsp.rdma_wr.type = RDMA_WR_TYPE_SEND; 823 rdma_req->rsp.wr.wr_id = (uintptr_t)&rdma_req->rsp.rdma_wr; 824 rdma_req->rsp.wr.next = NULL; 825 rdma_req->rsp.wr.opcode = IBV_WR_SEND; 826 rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED; 827 rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl; 828 rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl); 829 830 /* Set up memory for data buffers */ 831 rdma_req->data.rdma_wr.type = RDMA_WR_TYPE_DATA; 832 rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data.rdma_wr; 833 rdma_req->data.wr.next = NULL; 834 rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED; 835 rdma_req->data.wr.sg_list = rdma_req->data.sgl; 836 rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl); 837 838 /* Initialize request state to FREE */ 839 rdma_req->state = RDMA_REQUEST_STATE_FREE; 840 STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link); 841 } 842 843 return 0; 844 } 845 846 static int 847 request_transfer_in(struct spdk_nvmf_request *req) 848 { 849 int rc; 850 struct spdk_nvmf_rdma_request *rdma_req; 851 struct spdk_nvmf_qpair *qpair; 852 struct spdk_nvmf_rdma_qpair *rqpair; 853 struct ibv_send_wr *bad_wr = NULL; 854 855 qpair = req->qpair; 856 rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); 857 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 858 859 assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER); 860 assert(rdma_req != NULL); 861 862 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA READ POSTED. Request: %p Connection: %p\n", req, qpair); 863 864 rc = ibv_post_send(rqpair->cm_id->qp, &rdma_req->data.wr, &bad_wr); 865 if (rc) { 866 SPDK_ERRLOG("Unable to transfer data from host to target\n"); 867 return -1; 868 } 869 rqpair->current_read_depth += rdma_req->num_outstanding_data_wr; 870 rqpair->current_send_depth += rdma_req->num_outstanding_data_wr; 871 return 0; 872 } 873 874 static int 875 request_transfer_out(struct spdk_nvmf_request *req, int *data_posted) 876 { 877 int rc; 878 struct spdk_nvmf_rdma_request *rdma_req; 879 struct spdk_nvmf_qpair *qpair; 880 struct spdk_nvmf_rdma_qpair *rqpair; 881 struct spdk_nvme_cpl *rsp; 882 struct ibv_recv_wr *bad_recv_wr = NULL; 883 struct ibv_send_wr *send_wr, *bad_send_wr = NULL; 884 885 *data_posted = 0; 886 qpair = req->qpair; 887 rsp = &req->rsp->nvme_cpl; 888 rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); 889 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 890 891 /* Advance our sq_head pointer */ 892 if (qpair->sq_head == qpair->sq_head_max) { 893 qpair->sq_head = 0; 894 } else { 895 qpair->sq_head++; 896 } 897 rsp->sqhd = qpair->sq_head; 898 899 /* Post the capsule to the recv buffer */ 900 assert(rdma_req->recv != NULL); 901 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv, 902 rqpair); 903 rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr); 904 if (rc) { 905 SPDK_ERRLOG("Unable to re-post rx descriptor\n"); 906 return rc; 907 } 908 rdma_req->recv = NULL; 909 assert(rqpair->current_recv_depth > 0); 910 rqpair->current_recv_depth--; 911 912 /* Build the response which consists of an optional 913 * RDMA WRITE to transfer data, plus an RDMA SEND 914 * containing the response. 915 */ 916 send_wr = &rdma_req->rsp.wr; 917 918 if (rsp->status.sc == SPDK_NVME_SC_SUCCESS && 919 req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 920 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA WRITE POSTED. Request: %p Connection: %p\n", req, qpair); 921 send_wr = &rdma_req->data.wr; 922 *data_posted = 1; 923 } 924 925 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA SEND POSTED. Request: %p Connection: %p\n", req, qpair); 926 927 /* Send the completion */ 928 rc = ibv_post_send(rqpair->cm_id->qp, send_wr, &bad_send_wr); 929 if (rc) { 930 SPDK_ERRLOG("Unable to send response capsule\n"); 931 return rc; 932 } 933 /* +1 for the rsp wr */ 934 rqpair->current_send_depth += rdma_req->num_outstanding_data_wr + 1; 935 936 return 0; 937 } 938 939 static int 940 spdk_nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair) 941 { 942 struct spdk_nvmf_rdma_accept_private_data accept_data; 943 struct rdma_conn_param ctrlr_event_data = {}; 944 int rc; 945 946 accept_data.recfmt = 0; 947 accept_data.crqsize = rqpair->max_queue_depth; 948 949 ctrlr_event_data.private_data = &accept_data; 950 ctrlr_event_data.private_data_len = sizeof(accept_data); 951 if (id->ps == RDMA_PS_TCP) { 952 ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */ 953 ctrlr_event_data.initiator_depth = rqpair->max_read_depth; 954 } 955 956 rc = rdma_accept(id, &ctrlr_event_data); 957 if (rc) { 958 SPDK_ERRLOG("Error %d on rdma_accept\n", errno); 959 } else { 960 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Sent back the accept\n"); 961 } 962 963 return rc; 964 } 965 966 static void 967 spdk_nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error) 968 { 969 struct spdk_nvmf_rdma_reject_private_data rej_data; 970 971 rej_data.recfmt = 0; 972 rej_data.sts = error; 973 974 rdma_reject(id, &rej_data, sizeof(rej_data)); 975 } 976 977 static int 978 nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event, 979 new_qpair_fn cb_fn) 980 { 981 struct spdk_nvmf_rdma_transport *rtransport; 982 struct spdk_nvmf_rdma_qpair *rqpair = NULL; 983 struct spdk_nvmf_rdma_port *port; 984 struct rdma_conn_param *rdma_param = NULL; 985 const struct spdk_nvmf_rdma_request_private_data *private_data = NULL; 986 uint16_t max_queue_depth; 987 uint16_t max_read_depth; 988 989 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 990 991 assert(event->id != NULL); /* Impossible. Can't even reject the connection. */ 992 assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */ 993 994 rdma_param = &event->param.conn; 995 if (rdma_param->private_data == NULL || 996 rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) { 997 SPDK_ERRLOG("connect request: no private data provided\n"); 998 spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH); 999 return -1; 1000 } 1001 1002 private_data = rdma_param->private_data; 1003 if (private_data->recfmt != 0) { 1004 SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n"); 1005 spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT); 1006 return -1; 1007 } 1008 1009 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Connect Recv on fabric intf name %s, dev_name %s\n", 1010 event->id->verbs->device->name, event->id->verbs->device->dev_name); 1011 1012 port = event->listen_id->context; 1013 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Listen Id was %p with verbs %p. ListenAddr: %p\n", 1014 event->listen_id, event->listen_id->verbs, port); 1015 1016 /* Figure out the supported queue depth. This is a multi-step process 1017 * that takes into account hardware maximums, host provided values, 1018 * and our target's internal memory limits */ 1019 1020 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Calculating Queue Depth\n"); 1021 1022 /* Start with the maximum queue depth allowed by the target */ 1023 max_queue_depth = rtransport->transport.opts.max_queue_depth; 1024 max_read_depth = rtransport->transport.opts.max_queue_depth; 1025 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Target Max Queue Depth: %d\n", 1026 rtransport->transport.opts.max_queue_depth); 1027 1028 /* Next check the local NIC's hardware limitations */ 1029 SPDK_DEBUGLOG(SPDK_LOG_RDMA, 1030 "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n", 1031 port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom); 1032 max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr); 1033 max_read_depth = spdk_min(max_read_depth, port->device->attr.max_qp_init_rd_atom); 1034 1035 /* Next check the remote NIC's hardware limitations */ 1036 SPDK_DEBUGLOG(SPDK_LOG_RDMA, 1037 "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n", 1038 rdma_param->initiator_depth, rdma_param->responder_resources); 1039 if (rdma_param->initiator_depth > 0) { 1040 max_read_depth = spdk_min(max_read_depth, rdma_param->initiator_depth); 1041 } 1042 1043 /* Finally check for the host software requested values, which are 1044 * optional. */ 1045 if (rdma_param->private_data != NULL && 1046 rdma_param->private_data_len >= sizeof(struct spdk_nvmf_rdma_request_private_data)) { 1047 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Receive Queue Size: %d\n", private_data->hrqsize); 1048 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Host Send Queue Size: %d\n", private_data->hsqsize); 1049 max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize); 1050 max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1); 1051 } 1052 1053 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Final Negotiated Queue Depth: %d R/W Depth: %d\n", 1054 max_queue_depth, max_read_depth); 1055 1056 rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair)); 1057 if (rqpair == NULL) { 1058 SPDK_ERRLOG("Could not allocate new connection.\n"); 1059 spdk_nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES); 1060 return -1; 1061 } 1062 1063 rqpair->port = port; 1064 rqpair->max_queue_depth = max_queue_depth; 1065 rqpair->max_read_depth = max_read_depth; 1066 rqpair->cm_id = event->id; 1067 rqpair->listen_id = event->listen_id; 1068 rqpair->qpair.transport = transport; 1069 STAILQ_INIT(&rqpair->incoming_queue); 1070 event->id->context = &rqpair->qpair; 1071 1072 cb_fn(&rqpair->qpair); 1073 1074 return 0; 1075 } 1076 1077 static int 1078 spdk_nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map, 1079 enum spdk_mem_map_notify_action action, 1080 void *vaddr, size_t size) 1081 { 1082 struct ibv_pd *pd = cb_ctx; 1083 struct ibv_mr *mr; 1084 1085 switch (action) { 1086 case SPDK_MEM_MAP_NOTIFY_REGISTER: 1087 if (!g_nvmf_hooks.get_rkey) { 1088 mr = ibv_reg_mr(pd, vaddr, size, 1089 IBV_ACCESS_LOCAL_WRITE | 1090 IBV_ACCESS_REMOTE_READ | 1091 IBV_ACCESS_REMOTE_WRITE); 1092 if (mr == NULL) { 1093 SPDK_ERRLOG("ibv_reg_mr() failed\n"); 1094 return -1; 1095 } else { 1096 spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); 1097 } 1098 } else { 1099 spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, 1100 g_nvmf_hooks.get_rkey(pd, vaddr, size)); 1101 } 1102 break; 1103 case SPDK_MEM_MAP_NOTIFY_UNREGISTER: 1104 if (!g_nvmf_hooks.get_rkey) { 1105 mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); 1106 spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); 1107 if (mr) { 1108 ibv_dereg_mr(mr); 1109 } 1110 } 1111 break; 1112 } 1113 1114 return 0; 1115 } 1116 1117 static int 1118 spdk_nvmf_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) 1119 { 1120 /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ 1121 return addr_1 == addr_2; 1122 } 1123 1124 static void 1125 spdk_nvmf_rdma_request_free_buffers(struct spdk_nvmf_rdma_request *rdma_req, 1126 struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport) 1127 { 1128 for (uint32_t i = 0; i < rdma_req->req.iovcnt; i++) { 1129 if (group->buf_cache_count < group->buf_cache_size) { 1130 STAILQ_INSERT_HEAD(&group->buf_cache, 1131 (struct spdk_nvmf_transport_pg_cache_buf *)rdma_req->data.buffers[i], link); 1132 group->buf_cache_count++; 1133 } else { 1134 spdk_mempool_put(transport->data_buf_pool, rdma_req->data.buffers[i]); 1135 } 1136 rdma_req->req.iov[i].iov_base = NULL; 1137 rdma_req->data.buffers[i] = NULL; 1138 rdma_req->req.iov[i].iov_len = 0; 1139 1140 } 1141 rdma_req->data_from_pool = false; 1142 } 1143 1144 typedef enum spdk_nvme_data_transfer spdk_nvme_data_transfer_t; 1145 1146 static spdk_nvme_data_transfer_t 1147 spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req) 1148 { 1149 enum spdk_nvme_data_transfer xfer; 1150 struct spdk_nvme_cmd *cmd = &rdma_req->req.cmd->nvme_cmd; 1151 struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1; 1152 1153 #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL 1154 rdma_req->rsp.wr.opcode = IBV_WR_SEND; 1155 rdma_req->rsp.wr.imm_data = 0; 1156 #endif 1157 1158 /* Figure out data transfer direction */ 1159 if (cmd->opc == SPDK_NVME_OPC_FABRIC) { 1160 xfer = spdk_nvme_opc_get_data_transfer(rdma_req->req.cmd->nvmf_cmd.fctype); 1161 } else { 1162 xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 1163 1164 /* Some admin commands are special cases */ 1165 if ((rdma_req->req.qpair->qid == 0) && 1166 ((cmd->opc == SPDK_NVME_OPC_GET_FEATURES) || 1167 (cmd->opc == SPDK_NVME_OPC_SET_FEATURES))) { 1168 switch (cmd->cdw10 & 0xff) { 1169 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 1170 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 1171 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 1172 break; 1173 default: 1174 xfer = SPDK_NVME_DATA_NONE; 1175 } 1176 } 1177 } 1178 1179 if (xfer == SPDK_NVME_DATA_NONE) { 1180 return xfer; 1181 } 1182 1183 /* Even for commands that may transfer data, they could have specified 0 length. 1184 * We want those to show up with xfer SPDK_NVME_DATA_NONE. 1185 */ 1186 switch (sgl->generic.type) { 1187 case SPDK_NVME_SGL_TYPE_DATA_BLOCK: 1188 case SPDK_NVME_SGL_TYPE_BIT_BUCKET: 1189 case SPDK_NVME_SGL_TYPE_SEGMENT: 1190 case SPDK_NVME_SGL_TYPE_LAST_SEGMENT: 1191 case SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK: 1192 if (sgl->unkeyed.length == 0) { 1193 xfer = SPDK_NVME_DATA_NONE; 1194 } 1195 break; 1196 case SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK: 1197 if (sgl->keyed.length == 0) { 1198 xfer = SPDK_NVME_DATA_NONE; 1199 } 1200 break; 1201 } 1202 1203 return xfer; 1204 } 1205 1206 static int 1207 spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport, 1208 struct spdk_nvmf_rdma_device *device, 1209 struct spdk_nvmf_rdma_request *rdma_req) 1210 { 1211 struct spdk_nvmf_rdma_qpair *rqpair; 1212 struct spdk_nvmf_rdma_poll_group *rgroup; 1213 void *buf = NULL; 1214 uint32_t length = rdma_req->req.length; 1215 uint64_t translation_len; 1216 uint32_t i = 0; 1217 int rc = 0; 1218 1219 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 1220 rgroup = rqpair->poller->group; 1221 rdma_req->req.iovcnt = 0; 1222 while (length) { 1223 if (!(STAILQ_EMPTY(&rgroup->group.buf_cache))) { 1224 rgroup->group.buf_cache_count--; 1225 buf = STAILQ_FIRST(&rgroup->group.buf_cache); 1226 STAILQ_REMOVE_HEAD(&rgroup->group.buf_cache, link); 1227 assert(buf != NULL); 1228 } else { 1229 buf = spdk_mempool_get(rtransport->transport.data_buf_pool); 1230 if (!buf) { 1231 rc = -ENOMEM; 1232 goto err_exit; 1233 } 1234 } 1235 1236 rdma_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) & 1237 ~NVMF_DATA_BUFFER_MASK); 1238 rdma_req->req.iov[i].iov_len = spdk_min(length, rtransport->transport.opts.io_unit_size); 1239 rdma_req->req.iovcnt++; 1240 rdma_req->data.buffers[i] = buf; 1241 rdma_req->data.wr.sg_list[i].addr = (uintptr_t)(rdma_req->req.iov[i].iov_base); 1242 rdma_req->data.wr.sg_list[i].length = rdma_req->req.iov[i].iov_len; 1243 translation_len = rdma_req->req.iov[i].iov_len; 1244 1245 if (!g_nvmf_hooks.get_rkey) { 1246 rdma_req->data.wr.sg_list[i].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map, 1247 (uint64_t)buf, &translation_len))->lkey; 1248 } else { 1249 rdma_req->data.wr.sg_list[i].lkey = spdk_mem_map_translate(device->map, 1250 (uint64_t)buf, &translation_len); 1251 } 1252 1253 length -= rdma_req->req.iov[i].iov_len; 1254 1255 if (translation_len < rdma_req->req.iov[i].iov_len) { 1256 SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions\n"); 1257 rc = -EINVAL; 1258 goto err_exit; 1259 } 1260 i++; 1261 } 1262 1263 assert(rdma_req->req.iovcnt <= rqpair->max_send_sge); 1264 1265 rdma_req->data_from_pool = true; 1266 1267 return rc; 1268 1269 err_exit: 1270 spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport); 1271 while (i) { 1272 i--; 1273 rdma_req->data.wr.sg_list[i].addr = 0; 1274 rdma_req->data.wr.sg_list[i].length = 0; 1275 rdma_req->data.wr.sg_list[i].lkey = 0; 1276 } 1277 rdma_req->req.iovcnt = 0; 1278 return rc; 1279 } 1280 1281 static int 1282 spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport, 1283 struct spdk_nvmf_rdma_device *device, 1284 struct spdk_nvmf_rdma_request *rdma_req) 1285 { 1286 struct spdk_nvme_cmd *cmd; 1287 struct spdk_nvme_cpl *rsp; 1288 struct spdk_nvme_sgl_descriptor *sgl; 1289 1290 cmd = &rdma_req->req.cmd->nvme_cmd; 1291 rsp = &rdma_req->req.rsp->nvme_cpl; 1292 sgl = &cmd->dptr.sgl1; 1293 1294 if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && 1295 (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS || 1296 sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { 1297 if (sgl->keyed.length > rtransport->transport.opts.max_io_size) { 1298 SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n", 1299 sgl->keyed.length, rtransport->transport.opts.max_io_size); 1300 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 1301 return -1; 1302 } 1303 #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL 1304 if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) { 1305 if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) { 1306 rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV; 1307 rdma_req->rsp.wr.imm_data = sgl->keyed.key; 1308 } 1309 } 1310 #endif 1311 1312 /* fill request length and populate iovs */ 1313 rdma_req->req.length = sgl->keyed.length; 1314 1315 if (spdk_nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req) < 0) { 1316 /* No available buffers. Queue this request up. */ 1317 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req); 1318 return 0; 1319 } 1320 1321 /* backward compatible */ 1322 rdma_req->req.data = rdma_req->req.iov[0].iov_base; 1323 1324 /* rdma wr specifics */ 1325 rdma_req->data.wr.num_sge = rdma_req->req.iovcnt; 1326 rdma_req->data.wr.wr.rdma.rkey = sgl->keyed.key; 1327 rdma_req->data.wr.wr.rdma.remote_addr = sgl->address; 1328 if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 1329 rdma_req->data.wr.opcode = IBV_WR_RDMA_WRITE; 1330 rdma_req->data.wr.next = &rdma_req->rsp.wr; 1331 } else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { 1332 rdma_req->data.wr.opcode = IBV_WR_RDMA_READ; 1333 rdma_req->data.wr.next = NULL; 1334 } 1335 1336 /* set the number of outstanding data WRs for this request. */ 1337 rdma_req->num_outstanding_data_wr = 1; 1338 1339 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req, 1340 rdma_req->req.iovcnt); 1341 1342 return 0; 1343 } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && 1344 sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { 1345 uint64_t offset = sgl->address; 1346 uint32_t max_len = rtransport->transport.opts.in_capsule_data_size; 1347 1348 SPDK_DEBUGLOG(SPDK_LOG_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", 1349 offset, sgl->unkeyed.length); 1350 1351 if (offset > max_len) { 1352 SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", 1353 offset, max_len); 1354 rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; 1355 return -1; 1356 } 1357 max_len -= (uint32_t)offset; 1358 1359 if (sgl->unkeyed.length > max_len) { 1360 SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", 1361 sgl->unkeyed.length, max_len); 1362 rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; 1363 return -1; 1364 } 1365 1366 rdma_req->num_outstanding_data_wr = 0; 1367 rdma_req->req.data = rdma_req->recv->buf + offset; 1368 rdma_req->data_from_pool = false; 1369 rdma_req->req.length = sgl->unkeyed.length; 1370 1371 rdma_req->req.iov[0].iov_base = rdma_req->req.data; 1372 rdma_req->req.iov[0].iov_len = rdma_req->req.length; 1373 rdma_req->req.iovcnt = 1; 1374 1375 return 0; 1376 } 1377 1378 SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", 1379 sgl->generic.type, sgl->generic.subtype); 1380 rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; 1381 return -1; 1382 } 1383 1384 static void 1385 nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req, 1386 struct spdk_nvmf_rdma_transport *rtransport) 1387 { 1388 struct spdk_nvmf_rdma_qpair *rqpair; 1389 struct spdk_nvmf_rdma_poll_group *rgroup; 1390 1391 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 1392 if (rdma_req->data_from_pool) { 1393 rgroup = rqpair->poller->group; 1394 1395 spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport); 1396 } 1397 rdma_req->num_outstanding_data_wr = 0; 1398 rdma_req->req.length = 0; 1399 rdma_req->req.iovcnt = 0; 1400 rdma_req->req.data = NULL; 1401 rqpair->qd--; 1402 STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link); 1403 rdma_req->state = RDMA_REQUEST_STATE_FREE; 1404 } 1405 1406 static bool 1407 spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, 1408 struct spdk_nvmf_rdma_request *rdma_req) 1409 { 1410 struct spdk_nvmf_rdma_qpair *rqpair; 1411 struct spdk_nvmf_rdma_device *device; 1412 struct spdk_nvmf_rdma_poll_group *rgroup; 1413 struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl; 1414 int rc; 1415 struct spdk_nvmf_rdma_recv *rdma_recv; 1416 enum spdk_nvmf_rdma_request_state prev_state; 1417 bool progress = false; 1418 int data_posted; 1419 1420 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 1421 device = rqpair->port->device; 1422 rgroup = rqpair->poller->group; 1423 1424 assert(rdma_req->state != RDMA_REQUEST_STATE_FREE); 1425 1426 /* If the queue pair is in an error state, force the request to the completed state 1427 * to release resources. */ 1428 if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 1429 if (rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER) { 1430 TAILQ_REMOVE(&rgroup->pending_data_buf_queue, rdma_req, link); 1431 } 1432 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 1433 } 1434 1435 /* The loop here is to allow for several back-to-back state changes. */ 1436 do { 1437 prev_state = rdma_req->state; 1438 1439 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p entering state %d\n", rdma_req, prev_state); 1440 1441 switch (rdma_req->state) { 1442 case RDMA_REQUEST_STATE_FREE: 1443 /* Some external code must kick a request into RDMA_REQUEST_STATE_NEW 1444 * to escape this state. */ 1445 break; 1446 case RDMA_REQUEST_STATE_NEW: 1447 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0, 1448 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1449 rdma_recv = rdma_req->recv; 1450 1451 /* The first element of the SGL is the NVMe command */ 1452 rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr; 1453 memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp)); 1454 1455 if (rqpair->ibv_attr.qp_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 1456 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 1457 break; 1458 } 1459 1460 /* The next state transition depends on the data transfer needs of this request. */ 1461 rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req); 1462 1463 /* If no data to transfer, ready to execute. */ 1464 if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) { 1465 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; 1466 break; 1467 } 1468 1469 rdma_req->state = RDMA_REQUEST_STATE_NEED_BUFFER; 1470 TAILQ_INSERT_TAIL(&rgroup->pending_data_buf_queue, rdma_req, link); 1471 break; 1472 case RDMA_REQUEST_STATE_NEED_BUFFER: 1473 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0, 1474 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1475 1476 assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE); 1477 1478 if (rdma_req != TAILQ_FIRST(&rgroup->pending_data_buf_queue)) { 1479 /* This request needs to wait in line to obtain a buffer */ 1480 break; 1481 } 1482 1483 /* Try to get a data buffer */ 1484 rc = spdk_nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req); 1485 if (rc < 0) { 1486 TAILQ_REMOVE(&rgroup->pending_data_buf_queue, rdma_req, link); 1487 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1488 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; 1489 break; 1490 } 1491 1492 if (!rdma_req->req.data) { 1493 /* No buffers available. */ 1494 break; 1495 } 1496 1497 TAILQ_REMOVE(&rgroup->pending_data_buf_queue, rdma_req, link); 1498 1499 /* If data is transferring from host to controller and the data didn't 1500 * arrive using in capsule data, we need to do a transfer from the host. 1501 */ 1502 if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) { 1503 STAILQ_INSERT_TAIL(&rqpair->pending_rdma_read_queue, rdma_req, state_link); 1504 rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING; 1505 break; 1506 } 1507 1508 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; 1509 break; 1510 case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING: 1511 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0, 1512 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1513 1514 if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_read_queue)) { 1515 /* This request needs to wait in line to perform RDMA */ 1516 break; 1517 } 1518 if (rqpair->current_send_depth + rdma_req->num_outstanding_data_wr > rqpair->max_send_depth 1519 || rqpair->current_read_depth + rdma_req->num_outstanding_data_wr > rqpair->max_read_depth) { 1520 /* We can only have so many WRs outstanding. we have to wait until some finish. */ 1521 break; 1522 } 1523 1524 /* We have already verified that this request is the head of the queue. */ 1525 STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_read_queue, state_link); 1526 1527 rc = request_transfer_in(&rdma_req->req); 1528 if (!rc) { 1529 rdma_req->state = RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER; 1530 } else { 1531 rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1532 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; 1533 } 1534 break; 1535 case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER: 1536 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0, 1537 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1538 /* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE 1539 * to escape this state. */ 1540 break; 1541 case RDMA_REQUEST_STATE_READY_TO_EXECUTE: 1542 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0, 1543 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1544 rdma_req->state = RDMA_REQUEST_STATE_EXECUTING; 1545 spdk_nvmf_request_exec(&rdma_req->req); 1546 break; 1547 case RDMA_REQUEST_STATE_EXECUTING: 1548 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0, 1549 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1550 /* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED 1551 * to escape this state. */ 1552 break; 1553 case RDMA_REQUEST_STATE_EXECUTED: 1554 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0, 1555 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1556 if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { 1557 STAILQ_INSERT_TAIL(&rqpair->pending_rdma_write_queue, rdma_req, state_link); 1558 rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING; 1559 } else { 1560 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; 1561 } 1562 break; 1563 case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING: 1564 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0, 1565 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1566 1567 if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_write_queue)) { 1568 /* This request needs to wait in line to perform RDMA */ 1569 break; 1570 } 1571 if ((rqpair->current_send_depth + rdma_req->num_outstanding_data_wr + 1) > 1572 rqpair->max_send_depth) { 1573 /* We can only have so many WRs outstanding. we have to wait until some finish. 1574 * +1 since each request has an additional wr in the resp. */ 1575 break; 1576 } 1577 1578 /* We have already verified that this request is the head of the queue. */ 1579 STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_write_queue, state_link); 1580 1581 /* The data transfer will be kicked off from 1582 * RDMA_REQUEST_STATE_READY_TO_COMPLETE state. 1583 */ 1584 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE; 1585 break; 1586 case RDMA_REQUEST_STATE_READY_TO_COMPLETE: 1587 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0, 1588 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1589 rc = request_transfer_out(&rdma_req->req, &data_posted); 1590 assert(rc == 0); /* No good way to handle this currently */ 1591 if (rc) { 1592 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 1593 } else { 1594 rdma_req->state = data_posted ? RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST : 1595 RDMA_REQUEST_STATE_COMPLETING; 1596 } 1597 break; 1598 case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST: 1599 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0, 1600 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1601 /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED 1602 * to escape this state. */ 1603 break; 1604 case RDMA_REQUEST_STATE_COMPLETING: 1605 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0, 1606 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1607 /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED 1608 * to escape this state. */ 1609 break; 1610 case RDMA_REQUEST_STATE_COMPLETED: 1611 spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0, 1612 (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); 1613 1614 nvmf_rdma_request_free(rdma_req, rtransport); 1615 break; 1616 case RDMA_REQUEST_NUM_STATES: 1617 default: 1618 assert(0); 1619 break; 1620 } 1621 1622 if (rdma_req->state != prev_state) { 1623 progress = true; 1624 } 1625 } while (rdma_req->state != prev_state); 1626 1627 return progress; 1628 } 1629 1630 /* Public API callbacks begin here */ 1631 1632 #define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128 1633 #define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128 1634 #define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 64 1635 #define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096 1636 #define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072 1637 #define SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE (SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE / SPDK_NVMF_MAX_SGL_ENTRIES) 1638 #define SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS 4096 1639 #define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE 32 1640 1641 static void 1642 spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts) 1643 { 1644 opts->max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH; 1645 opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR; 1646 opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE; 1647 opts->max_io_size = SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE; 1648 opts->io_unit_size = SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE; 1649 opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH; 1650 opts->num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS; 1651 opts->buf_cache_size = SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE; 1652 } 1653 1654 static int spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport); 1655 1656 static struct spdk_nvmf_transport * 1657 spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) 1658 { 1659 int rc; 1660 struct spdk_nvmf_rdma_transport *rtransport; 1661 struct spdk_nvmf_rdma_device *device, *tmp; 1662 struct ibv_context **contexts; 1663 uint32_t i; 1664 int flag; 1665 uint32_t sge_count; 1666 uint32_t min_shared_buffers; 1667 int max_device_sge = SPDK_NVMF_MAX_SGL_ENTRIES; 1668 1669 rtransport = calloc(1, sizeof(*rtransport)); 1670 if (!rtransport) { 1671 return NULL; 1672 } 1673 1674 if (pthread_mutex_init(&rtransport->lock, NULL)) { 1675 SPDK_ERRLOG("pthread_mutex_init() failed\n"); 1676 free(rtransport); 1677 return NULL; 1678 } 1679 1680 TAILQ_INIT(&rtransport->devices); 1681 TAILQ_INIT(&rtransport->ports); 1682 1683 rtransport->transport.ops = &spdk_nvmf_transport_rdma; 1684 1685 SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n" 1686 " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" 1687 " max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" 1688 " in_capsule_data_size=%d, max_aq_depth=%d\n" 1689 " num_shared_buffers=%d\n", 1690 opts->max_queue_depth, 1691 opts->max_io_size, 1692 opts->max_qpairs_per_ctrlr, 1693 opts->io_unit_size, 1694 opts->in_capsule_data_size, 1695 opts->max_aq_depth, 1696 opts->num_shared_buffers); 1697 1698 /* I/O unit size cannot be larger than max I/O size */ 1699 if (opts->io_unit_size > opts->max_io_size) { 1700 opts->io_unit_size = opts->max_io_size; 1701 } 1702 1703 if (opts->num_shared_buffers < (SPDK_NVMF_MAX_SGL_ENTRIES * 2)) { 1704 SPDK_ERRLOG("The number of shared data buffers (%d) is less than" 1705 "the minimum number required to guarantee that forward progress can be made (%d)\n", 1706 opts->num_shared_buffers, (SPDK_NVMF_MAX_SGL_ENTRIES * 2)); 1707 spdk_nvmf_rdma_destroy(&rtransport->transport); 1708 return NULL; 1709 } 1710 1711 min_shared_buffers = spdk_thread_get_count() * opts->buf_cache_size; 1712 if (min_shared_buffers > opts->num_shared_buffers) { 1713 SPDK_ERRLOG("There are not enough buffers to satisfy" 1714 "per-poll group caches for each thread. (%" PRIu32 ")" 1715 "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers); 1716 SPDK_ERRLOG("Please specify a larger number of shared buffers\n"); 1717 spdk_nvmf_rdma_destroy(&rtransport->transport); 1718 return NULL; 1719 } 1720 1721 sge_count = opts->max_io_size / opts->io_unit_size; 1722 if (sge_count > NVMF_DEFAULT_TX_SGE) { 1723 SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size); 1724 spdk_nvmf_rdma_destroy(&rtransport->transport); 1725 return NULL; 1726 } 1727 1728 rtransport->event_channel = rdma_create_event_channel(); 1729 if (rtransport->event_channel == NULL) { 1730 SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno)); 1731 spdk_nvmf_rdma_destroy(&rtransport->transport); 1732 return NULL; 1733 } 1734 1735 flag = fcntl(rtransport->event_channel->fd, F_GETFL); 1736 if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) { 1737 SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n", 1738 rtransport->event_channel->fd, spdk_strerror(errno)); 1739 spdk_nvmf_rdma_destroy(&rtransport->transport); 1740 return NULL; 1741 } 1742 1743 rtransport->data_wr_pool = spdk_mempool_create("spdk_nvmf_rdma_wr_data", 1744 opts->max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES, 1745 sizeof(struct spdk_nvmf_rdma_request_data), 1746 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 1747 SPDK_ENV_SOCKET_ID_ANY); 1748 if (!rtransport->data_wr_pool) { 1749 SPDK_ERRLOG("Unable to allocate work request pool for poll group\n"); 1750 spdk_nvmf_rdma_destroy(&rtransport->transport); 1751 return NULL; 1752 } 1753 1754 contexts = rdma_get_devices(NULL); 1755 if (contexts == NULL) { 1756 SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); 1757 spdk_nvmf_rdma_destroy(&rtransport->transport); 1758 return NULL; 1759 } 1760 1761 i = 0; 1762 rc = 0; 1763 while (contexts[i] != NULL) { 1764 device = calloc(1, sizeof(*device)); 1765 if (!device) { 1766 SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n"); 1767 rc = -ENOMEM; 1768 break; 1769 } 1770 device->context = contexts[i]; 1771 rc = ibv_query_device(device->context, &device->attr); 1772 if (rc < 0) { 1773 SPDK_ERRLOG("Failed to query RDMA device attributes.\n"); 1774 free(device); 1775 break; 1776 1777 } 1778 1779 max_device_sge = spdk_min(max_device_sge, device->attr.max_sge); 1780 1781 #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL 1782 if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) { 1783 SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,"); 1784 SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id); 1785 } 1786 1787 /** 1788 * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE. 1789 * The Soft-RoCE RXE driver does not currently support send with invalidate, 1790 * but incorrectly reports that it does. There are changes making their way 1791 * through the kernel now that will enable this feature. When they are merged, 1792 * we can conditionally enable this feature. 1793 * 1794 * TODO: enable this for versions of the kernel rxe driver that support it. 1795 */ 1796 if (device->attr.vendor_id == 0) { 1797 device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS); 1798 } 1799 #endif 1800 1801 /* set up device context async ev fd as NON_BLOCKING */ 1802 flag = fcntl(device->context->async_fd, F_GETFL); 1803 rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK); 1804 if (rc < 0) { 1805 SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n"); 1806 free(device); 1807 break; 1808 } 1809 1810 TAILQ_INSERT_TAIL(&rtransport->devices, device, link); 1811 i++; 1812 } 1813 rdma_free_devices(contexts); 1814 1815 if (opts->io_unit_size * max_device_sge < opts->max_io_size) { 1816 /* divide and round up. */ 1817 opts->io_unit_size = (opts->max_io_size + max_device_sge - 1) / max_device_sge; 1818 1819 /* round up to the nearest 4k. */ 1820 opts->io_unit_size = (opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT - 1) & ~NVMF_DATA_BUFFER_MASK; 1821 1822 opts->io_unit_size = spdk_max(opts->io_unit_size, SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE); 1823 SPDK_NOTICELOG("Adjusting the io unit size to fit the device's maximum I/O size. New I/O unit size %u\n", 1824 opts->io_unit_size); 1825 } 1826 1827 if (rc < 0) { 1828 spdk_nvmf_rdma_destroy(&rtransport->transport); 1829 return NULL; 1830 } 1831 1832 /* Set up poll descriptor array to monitor events from RDMA and IB 1833 * in a single poll syscall 1834 */ 1835 rtransport->npoll_fds = i + 1; 1836 i = 0; 1837 rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd)); 1838 if (rtransport->poll_fds == NULL) { 1839 SPDK_ERRLOG("poll_fds allocation failed\n"); 1840 spdk_nvmf_rdma_destroy(&rtransport->transport); 1841 return NULL; 1842 } 1843 1844 rtransport->poll_fds[i].fd = rtransport->event_channel->fd; 1845 rtransport->poll_fds[i++].events = POLLIN; 1846 1847 TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) { 1848 rtransport->poll_fds[i].fd = device->context->async_fd; 1849 rtransport->poll_fds[i++].events = POLLIN; 1850 } 1851 1852 return &rtransport->transport; 1853 } 1854 1855 static int 1856 spdk_nvmf_rdma_destroy(struct spdk_nvmf_transport *transport) 1857 { 1858 struct spdk_nvmf_rdma_transport *rtransport; 1859 struct spdk_nvmf_rdma_port *port, *port_tmp; 1860 struct spdk_nvmf_rdma_device *device, *device_tmp; 1861 1862 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 1863 1864 TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) { 1865 TAILQ_REMOVE(&rtransport->ports, port, link); 1866 rdma_destroy_id(port->id); 1867 free(port); 1868 } 1869 1870 if (rtransport->poll_fds != NULL) { 1871 free(rtransport->poll_fds); 1872 } 1873 1874 if (rtransport->event_channel != NULL) { 1875 rdma_destroy_event_channel(rtransport->event_channel); 1876 } 1877 1878 TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) { 1879 TAILQ_REMOVE(&rtransport->devices, device, link); 1880 if (device->map) { 1881 spdk_mem_map_free(&device->map); 1882 } 1883 if (device->pd) { 1884 if (!g_nvmf_hooks.get_ibv_pd) { 1885 ibv_dealloc_pd(device->pd); 1886 } 1887 } 1888 free(device); 1889 } 1890 1891 if (rtransport->data_wr_pool != NULL) { 1892 if (spdk_mempool_count(rtransport->data_wr_pool) != 1893 (transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES)) { 1894 SPDK_ERRLOG("transport wr pool count is %zu but should be %u\n", 1895 spdk_mempool_count(rtransport->data_wr_pool), 1896 transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES); 1897 } 1898 } 1899 1900 spdk_mempool_free(rtransport->data_wr_pool); 1901 pthread_mutex_destroy(&rtransport->lock); 1902 free(rtransport); 1903 1904 return 0; 1905 } 1906 1907 static int 1908 spdk_nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id, 1909 struct spdk_nvme_transport_id *trid, 1910 bool peer); 1911 1912 const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = { 1913 .notify_cb = spdk_nvmf_rdma_mem_notify, 1914 .are_contiguous = spdk_nvmf_rdma_check_contiguous_entries 1915 }; 1916 1917 static int 1918 spdk_nvmf_rdma_listen(struct spdk_nvmf_transport *transport, 1919 const struct spdk_nvme_transport_id *trid) 1920 { 1921 struct spdk_nvmf_rdma_transport *rtransport; 1922 struct spdk_nvmf_rdma_device *device; 1923 struct spdk_nvmf_rdma_port *port_tmp, *port; 1924 struct ibv_pd *pd; 1925 struct addrinfo *res; 1926 struct addrinfo hints; 1927 int family; 1928 int rc; 1929 1930 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 1931 1932 port = calloc(1, sizeof(*port)); 1933 if (!port) { 1934 return -ENOMEM; 1935 } 1936 1937 /* Selectively copy the trid. Things like NQN don't matter here - that 1938 * mapping is enforced elsewhere. 1939 */ 1940 port->trid.trtype = SPDK_NVME_TRANSPORT_RDMA; 1941 port->trid.adrfam = trid->adrfam; 1942 snprintf(port->trid.traddr, sizeof(port->trid.traddr), "%s", trid->traddr); 1943 snprintf(port->trid.trsvcid, sizeof(port->trid.trsvcid), "%s", trid->trsvcid); 1944 1945 pthread_mutex_lock(&rtransport->lock); 1946 assert(rtransport->event_channel != NULL); 1947 TAILQ_FOREACH(port_tmp, &rtransport->ports, link) { 1948 if (spdk_nvme_transport_id_compare(&port_tmp->trid, &port->trid) == 0) { 1949 port_tmp->ref++; 1950 free(port); 1951 /* Already listening at this address */ 1952 pthread_mutex_unlock(&rtransport->lock); 1953 return 0; 1954 } 1955 } 1956 1957 rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP); 1958 if (rc < 0) { 1959 SPDK_ERRLOG("rdma_create_id() failed\n"); 1960 free(port); 1961 pthread_mutex_unlock(&rtransport->lock); 1962 return rc; 1963 } 1964 1965 switch (port->trid.adrfam) { 1966 case SPDK_NVMF_ADRFAM_IPV4: 1967 family = AF_INET; 1968 break; 1969 case SPDK_NVMF_ADRFAM_IPV6: 1970 family = AF_INET6; 1971 break; 1972 default: 1973 SPDK_ERRLOG("Unhandled ADRFAM %d\n", port->trid.adrfam); 1974 free(port); 1975 pthread_mutex_unlock(&rtransport->lock); 1976 return -EINVAL; 1977 } 1978 1979 memset(&hints, 0, sizeof(hints)); 1980 hints.ai_family = family; 1981 hints.ai_flags = AI_NUMERICSERV; 1982 hints.ai_socktype = SOCK_STREAM; 1983 hints.ai_protocol = 0; 1984 1985 rc = getaddrinfo(port->trid.traddr, port->trid.trsvcid, &hints, &res); 1986 if (rc) { 1987 SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc); 1988 free(port); 1989 pthread_mutex_unlock(&rtransport->lock); 1990 return -EINVAL; 1991 } 1992 1993 rc = rdma_bind_addr(port->id, res->ai_addr); 1994 freeaddrinfo(res); 1995 1996 if (rc < 0) { 1997 SPDK_ERRLOG("rdma_bind_addr() failed\n"); 1998 rdma_destroy_id(port->id); 1999 free(port); 2000 pthread_mutex_unlock(&rtransport->lock); 2001 return rc; 2002 } 2003 2004 if (!port->id->verbs) { 2005 SPDK_ERRLOG("ibv_context is null\n"); 2006 rdma_destroy_id(port->id); 2007 free(port); 2008 pthread_mutex_unlock(&rtransport->lock); 2009 return -1; 2010 } 2011 2012 rc = rdma_listen(port->id, 10); /* 10 = backlog */ 2013 if (rc < 0) { 2014 SPDK_ERRLOG("rdma_listen() failed\n"); 2015 rdma_destroy_id(port->id); 2016 free(port); 2017 pthread_mutex_unlock(&rtransport->lock); 2018 return rc; 2019 } 2020 2021 TAILQ_FOREACH(device, &rtransport->devices, link) { 2022 if (device->context == port->id->verbs) { 2023 port->device = device; 2024 break; 2025 } 2026 } 2027 if (!port->device) { 2028 SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n", 2029 port->id->verbs); 2030 rdma_destroy_id(port->id); 2031 free(port); 2032 pthread_mutex_unlock(&rtransport->lock); 2033 return -EINVAL; 2034 } 2035 2036 pd = NULL; 2037 if (g_nvmf_hooks.get_ibv_pd) { 2038 if (spdk_nvmf_rdma_trid_from_cm_id(port->id, &port->trid, 1) < 0) { 2039 rdma_destroy_id(port->id); 2040 free(port); 2041 pthread_mutex_unlock(&rtransport->lock); 2042 return -EINVAL; 2043 } 2044 2045 pd = g_nvmf_hooks.get_ibv_pd(&port->trid, port->id->verbs); 2046 } 2047 2048 if (device->pd == NULL) { 2049 /* Haven't created a protection domain yet. */ 2050 2051 if (!g_nvmf_hooks.get_ibv_pd) { 2052 device->pd = ibv_alloc_pd(device->context); 2053 if (!device->pd) { 2054 SPDK_ERRLOG("Unable to allocate protection domain.\n"); 2055 rdma_destroy_id(port->id); 2056 free(port); 2057 pthread_mutex_unlock(&rtransport->lock); 2058 return -ENOMEM; 2059 } 2060 } else { 2061 device->pd = pd; 2062 } 2063 2064 assert(device->map == NULL); 2065 2066 device->map = spdk_mem_map_alloc(0, &g_nvmf_rdma_map_ops, device->pd); 2067 if (!device->map) { 2068 SPDK_ERRLOG("Unable to allocate memory map for listen address\n"); 2069 if (!g_nvmf_hooks.get_ibv_pd) { 2070 ibv_dealloc_pd(device->pd); 2071 } 2072 rdma_destroy_id(port->id); 2073 free(port); 2074 pthread_mutex_unlock(&rtransport->lock); 2075 return -ENOMEM; 2076 } 2077 } else if (g_nvmf_hooks.get_ibv_pd) { 2078 /* A protection domain exists for this device, but the user has 2079 * enabled hooks. Verify that they only supply one pd per device. */ 2080 if (device->pd != pd) { 2081 SPDK_ERRLOG("The NVMe-oF target only supports one protection domain per device.\n"); 2082 rdma_destroy_id(port->id); 2083 free(port); 2084 pthread_mutex_unlock(&rtransport->lock); 2085 return -EINVAL; 2086 } 2087 } 2088 2089 assert(device->map != NULL); 2090 assert(device->pd != NULL); 2091 2092 SPDK_INFOLOG(SPDK_LOG_RDMA, "*** NVMf Target Listening on %s port %d ***\n", 2093 port->trid.traddr, ntohs(rdma_get_src_port(port->id))); 2094 2095 port->ref = 1; 2096 2097 TAILQ_INSERT_TAIL(&rtransport->ports, port, link); 2098 pthread_mutex_unlock(&rtransport->lock); 2099 2100 return 0; 2101 } 2102 2103 static int 2104 spdk_nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport, 2105 const struct spdk_nvme_transport_id *_trid) 2106 { 2107 struct spdk_nvmf_rdma_transport *rtransport; 2108 struct spdk_nvmf_rdma_port *port, *tmp; 2109 struct spdk_nvme_transport_id trid = {}; 2110 2111 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 2112 2113 /* Selectively copy the trid. Things like NQN don't matter here - that 2114 * mapping is enforced elsewhere. 2115 */ 2116 trid.trtype = SPDK_NVME_TRANSPORT_RDMA; 2117 trid.adrfam = _trid->adrfam; 2118 snprintf(trid.traddr, sizeof(port->trid.traddr), "%s", _trid->traddr); 2119 snprintf(trid.trsvcid, sizeof(port->trid.trsvcid), "%s", _trid->trsvcid); 2120 2121 pthread_mutex_lock(&rtransport->lock); 2122 TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) { 2123 if (spdk_nvme_transport_id_compare(&port->trid, &trid) == 0) { 2124 assert(port->ref > 0); 2125 port->ref--; 2126 if (port->ref == 0) { 2127 TAILQ_REMOVE(&rtransport->ports, port, link); 2128 rdma_destroy_id(port->id); 2129 free(port); 2130 } 2131 break; 2132 } 2133 } 2134 2135 pthread_mutex_unlock(&rtransport->lock); 2136 return 0; 2137 } 2138 2139 static bool 2140 spdk_nvmf_rdma_qpair_is_idle(struct spdk_nvmf_qpair *qpair) 2141 { 2142 struct spdk_nvmf_rdma_qpair *rqpair; 2143 2144 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2145 2146 if (rqpair->qd == 0) { 2147 return true; 2148 } 2149 return false; 2150 } 2151 2152 static void 2153 spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport, 2154 struct spdk_nvmf_rdma_qpair *rqpair, bool drain) 2155 { 2156 struct spdk_nvmf_rdma_request *rdma_req, *req_tmp; 2157 2158 /* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */ 2159 STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) { 2160 if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { 2161 break; 2162 } 2163 } 2164 2165 /* Then RDMA writes since reads have stronger restrictions than writes */ 2166 STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_write_queue, state_link, req_tmp) { 2167 if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { 2168 break; 2169 } 2170 } 2171 2172 /* The second highest priority is I/O waiting on memory buffers. */ 2173 TAILQ_FOREACH_SAFE(rdma_req, &rqpair->poller->group->pending_data_buf_queue, link, 2174 req_tmp) { 2175 if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { 2176 break; 2177 } 2178 } 2179 2180 while (!STAILQ_EMPTY(&rqpair->free_queue) && !STAILQ_EMPTY(&rqpair->incoming_queue)) { 2181 2182 rdma_req = STAILQ_FIRST(&rqpair->free_queue); 2183 STAILQ_REMOVE_HEAD(&rqpair->free_queue, state_link); 2184 rdma_req->recv = STAILQ_FIRST(&rqpair->incoming_queue); 2185 STAILQ_REMOVE_HEAD(&rqpair->incoming_queue, link); 2186 2187 rqpair->qd++; 2188 rdma_req->state = RDMA_REQUEST_STATE_NEW; 2189 if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) { 2190 break; 2191 } 2192 } 2193 } 2194 2195 static void 2196 _nvmf_rdma_qpair_disconnect(void *ctx) 2197 { 2198 struct spdk_nvmf_qpair *qpair = ctx; 2199 2200 spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); 2201 } 2202 2203 static void 2204 _nvmf_rdma_try_disconnect(void *ctx) 2205 { 2206 struct spdk_nvmf_qpair *qpair = ctx; 2207 struct spdk_nvmf_poll_group *group; 2208 2209 /* Read the group out of the qpair. This is normally set and accessed only from 2210 * the thread that created the group. Here, we're not on that thread necessarily. 2211 * The data member qpair->group begins it's life as NULL and then is assigned to 2212 * a pointer and never changes. So fortunately reading this and checking for 2213 * non-NULL is thread safe in the x86_64 memory model. */ 2214 group = qpair->group; 2215 2216 if (group == NULL) { 2217 /* The qpair hasn't been assigned to a group yet, so we can't 2218 * process a disconnect. Send a message to ourself and try again. */ 2219 spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_try_disconnect, qpair); 2220 return; 2221 } 2222 2223 spdk_thread_send_msg(group->thread, _nvmf_rdma_qpair_disconnect, qpair); 2224 } 2225 2226 static inline void 2227 spdk_nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair) 2228 { 2229 if (__sync_bool_compare_and_swap(&rqpair->disconnect_started, false, true)) { 2230 _nvmf_rdma_try_disconnect(&rqpair->qpair); 2231 } 2232 } 2233 2234 static void spdk_nvmf_rdma_destroy_drained_qpair(struct spdk_nvmf_rdma_qpair *rqpair, 2235 struct spdk_nvmf_rdma_transport *rtransport) 2236 { 2237 if (rqpair->current_send_depth == 0 && rqpair->current_recv_depth == rqpair->max_queue_depth) { 2238 /* The qpair has been drained. Free the resources. */ 2239 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, true); 2240 spdk_nvmf_rdma_qpair_destroy(rqpair); 2241 } 2242 } 2243 2244 2245 static int 2246 nvmf_rdma_disconnect(struct rdma_cm_event *evt) 2247 { 2248 struct spdk_nvmf_qpair *qpair; 2249 struct spdk_nvmf_rdma_qpair *rqpair; 2250 2251 if (evt->id == NULL) { 2252 SPDK_ERRLOG("disconnect request: missing cm_id\n"); 2253 return -1; 2254 } 2255 2256 qpair = evt->id->context; 2257 if (qpair == NULL) { 2258 SPDK_ERRLOG("disconnect request: no active connection\n"); 2259 return -1; 2260 } 2261 2262 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2263 2264 spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair->cm_id, 0); 2265 2266 spdk_nvmf_rdma_update_ibv_state(rqpair); 2267 2268 spdk_nvmf_rdma_start_disconnect(rqpair); 2269 2270 return 0; 2271 } 2272 2273 #ifdef DEBUG 2274 static const char *CM_EVENT_STR[] = { 2275 "RDMA_CM_EVENT_ADDR_RESOLVED", 2276 "RDMA_CM_EVENT_ADDR_ERROR", 2277 "RDMA_CM_EVENT_ROUTE_RESOLVED", 2278 "RDMA_CM_EVENT_ROUTE_ERROR", 2279 "RDMA_CM_EVENT_CONNECT_REQUEST", 2280 "RDMA_CM_EVENT_CONNECT_RESPONSE", 2281 "RDMA_CM_EVENT_CONNECT_ERROR", 2282 "RDMA_CM_EVENT_UNREACHABLE", 2283 "RDMA_CM_EVENT_REJECTED", 2284 "RDMA_CM_EVENT_ESTABLISHED", 2285 "RDMA_CM_EVENT_DISCONNECTED", 2286 "RDMA_CM_EVENT_DEVICE_REMOVAL", 2287 "RDMA_CM_EVENT_MULTICAST_JOIN", 2288 "RDMA_CM_EVENT_MULTICAST_ERROR", 2289 "RDMA_CM_EVENT_ADDR_CHANGE", 2290 "RDMA_CM_EVENT_TIMEWAIT_EXIT" 2291 }; 2292 #endif /* DEBUG */ 2293 2294 static void 2295 spdk_nvmf_process_cm_event(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn) 2296 { 2297 struct spdk_nvmf_rdma_transport *rtransport; 2298 struct rdma_cm_event *event; 2299 int rc; 2300 2301 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 2302 2303 if (rtransport->event_channel == NULL) { 2304 return; 2305 } 2306 2307 while (1) { 2308 rc = rdma_get_cm_event(rtransport->event_channel, &event); 2309 if (rc == 0) { 2310 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]); 2311 2312 spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event); 2313 2314 switch (event->event) { 2315 case RDMA_CM_EVENT_ADDR_RESOLVED: 2316 case RDMA_CM_EVENT_ADDR_ERROR: 2317 case RDMA_CM_EVENT_ROUTE_RESOLVED: 2318 case RDMA_CM_EVENT_ROUTE_ERROR: 2319 /* No action required. The target never attempts to resolve routes. */ 2320 break; 2321 case RDMA_CM_EVENT_CONNECT_REQUEST: 2322 rc = nvmf_rdma_connect(transport, event, cb_fn); 2323 if (rc < 0) { 2324 SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc); 2325 break; 2326 } 2327 break; 2328 case RDMA_CM_EVENT_CONNECT_RESPONSE: 2329 /* The target never initiates a new connection. So this will not occur. */ 2330 break; 2331 case RDMA_CM_EVENT_CONNECT_ERROR: 2332 /* Can this happen? The docs say it can, but not sure what causes it. */ 2333 break; 2334 case RDMA_CM_EVENT_UNREACHABLE: 2335 case RDMA_CM_EVENT_REJECTED: 2336 /* These only occur on the client side. */ 2337 break; 2338 case RDMA_CM_EVENT_ESTABLISHED: 2339 /* TODO: Should we be waiting for this event anywhere? */ 2340 break; 2341 case RDMA_CM_EVENT_DISCONNECTED: 2342 case RDMA_CM_EVENT_DEVICE_REMOVAL: 2343 rc = nvmf_rdma_disconnect(event); 2344 if (rc < 0) { 2345 SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc); 2346 break; 2347 } 2348 break; 2349 case RDMA_CM_EVENT_MULTICAST_JOIN: 2350 case RDMA_CM_EVENT_MULTICAST_ERROR: 2351 /* Multicast is not used */ 2352 break; 2353 case RDMA_CM_EVENT_ADDR_CHANGE: 2354 /* Not utilizing this event */ 2355 break; 2356 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2357 /* For now, do nothing. The target never re-uses queue pairs. */ 2358 break; 2359 default: 2360 SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event); 2361 break; 2362 } 2363 2364 rdma_ack_cm_event(event); 2365 } else { 2366 if (errno != EAGAIN && errno != EWOULDBLOCK) { 2367 SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno)); 2368 } 2369 break; 2370 } 2371 } 2372 } 2373 2374 static void 2375 spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device) 2376 { 2377 int rc; 2378 struct spdk_nvmf_rdma_qpair *rqpair; 2379 struct ibv_async_event event; 2380 enum ibv_qp_state state; 2381 2382 rc = ibv_get_async_event(device->context, &event); 2383 2384 if (rc) { 2385 SPDK_ERRLOG("Failed to get async_event (%d): %s\n", 2386 errno, spdk_strerror(errno)); 2387 return; 2388 } 2389 2390 SPDK_NOTICELOG("Async event: %s\n", 2391 ibv_event_type_str(event.event_type)); 2392 2393 switch (event.event_type) { 2394 case IBV_EVENT_QP_FATAL: 2395 rqpair = event.element.qp->qp_context; 2396 spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 2397 (uintptr_t)rqpair->cm_id, event.event_type); 2398 spdk_nvmf_rdma_update_ibv_state(rqpair); 2399 spdk_nvmf_rdma_start_disconnect(rqpair); 2400 break; 2401 case IBV_EVENT_QP_LAST_WQE_REACHED: 2402 /* This event only occurs for shared receive queues, which are not currently supported. */ 2403 break; 2404 case IBV_EVENT_SQ_DRAINED: 2405 /* This event occurs frequently in both error and non-error states. 2406 * Check if the qpair is in an error state before sending a message. 2407 * Note that we're not on the correct thread to access the qpair, but 2408 * the operations that the below calls make all happen to be thread 2409 * safe. */ 2410 rqpair = event.element.qp->qp_context; 2411 spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 2412 (uintptr_t)rqpair->cm_id, event.event_type); 2413 state = spdk_nvmf_rdma_update_ibv_state(rqpair); 2414 if (state == IBV_QPS_ERR) { 2415 spdk_nvmf_rdma_start_disconnect(rqpair); 2416 } 2417 break; 2418 case IBV_EVENT_QP_REQ_ERR: 2419 case IBV_EVENT_QP_ACCESS_ERR: 2420 case IBV_EVENT_COMM_EST: 2421 case IBV_EVENT_PATH_MIG: 2422 case IBV_EVENT_PATH_MIG_ERR: 2423 rqpair = event.element.qp->qp_context; 2424 spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 2425 (uintptr_t)rqpair->cm_id, event.event_type); 2426 spdk_nvmf_rdma_update_ibv_state(rqpair); 2427 break; 2428 case IBV_EVENT_CQ_ERR: 2429 case IBV_EVENT_DEVICE_FATAL: 2430 case IBV_EVENT_PORT_ACTIVE: 2431 case IBV_EVENT_PORT_ERR: 2432 case IBV_EVENT_LID_CHANGE: 2433 case IBV_EVENT_PKEY_CHANGE: 2434 case IBV_EVENT_SM_CHANGE: 2435 case IBV_EVENT_SRQ_ERR: 2436 case IBV_EVENT_SRQ_LIMIT_REACHED: 2437 case IBV_EVENT_CLIENT_REREGISTER: 2438 case IBV_EVENT_GID_CHANGE: 2439 default: 2440 spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type); 2441 break; 2442 } 2443 ibv_ack_async_event(&event); 2444 } 2445 2446 static void 2447 spdk_nvmf_rdma_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn) 2448 { 2449 int nfds, i = 0; 2450 struct spdk_nvmf_rdma_transport *rtransport; 2451 struct spdk_nvmf_rdma_device *device, *tmp; 2452 2453 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 2454 nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0); 2455 2456 if (nfds <= 0) { 2457 return; 2458 } 2459 2460 /* The first poll descriptor is RDMA CM event */ 2461 if (rtransport->poll_fds[i++].revents & POLLIN) { 2462 spdk_nvmf_process_cm_event(transport, cb_fn); 2463 nfds--; 2464 } 2465 2466 if (nfds == 0) { 2467 return; 2468 } 2469 2470 /* Second and subsequent poll descriptors are IB async events */ 2471 TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) { 2472 if (rtransport->poll_fds[i++].revents & POLLIN) { 2473 spdk_nvmf_process_ib_event(device); 2474 nfds--; 2475 } 2476 } 2477 /* check all flagged fd's have been served */ 2478 assert(nfds == 0); 2479 } 2480 2481 static void 2482 spdk_nvmf_rdma_discover(struct spdk_nvmf_transport *transport, 2483 struct spdk_nvme_transport_id *trid, 2484 struct spdk_nvmf_discovery_log_page_entry *entry) 2485 { 2486 entry->trtype = SPDK_NVMF_TRTYPE_RDMA; 2487 entry->adrfam = trid->adrfam; 2488 entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_SPECIFIED; 2489 2490 spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' '); 2491 spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' '); 2492 2493 entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED; 2494 entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE; 2495 entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM; 2496 } 2497 2498 static struct spdk_nvmf_transport_poll_group * 2499 spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport) 2500 { 2501 struct spdk_nvmf_rdma_transport *rtransport; 2502 struct spdk_nvmf_rdma_poll_group *rgroup; 2503 struct spdk_nvmf_rdma_poller *poller, *tpoller; 2504 struct spdk_nvmf_rdma_device *device; 2505 2506 rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport); 2507 2508 rgroup = calloc(1, sizeof(*rgroup)); 2509 if (!rgroup) { 2510 return NULL; 2511 } 2512 2513 TAILQ_INIT(&rgroup->pollers); 2514 TAILQ_INIT(&rgroup->pending_data_buf_queue); 2515 2516 pthread_mutex_lock(&rtransport->lock); 2517 TAILQ_FOREACH(device, &rtransport->devices, link) { 2518 poller = calloc(1, sizeof(*poller)); 2519 if (!poller) { 2520 SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n"); 2521 goto err_exit; 2522 } 2523 2524 poller->device = device; 2525 poller->group = rgroup; 2526 2527 TAILQ_INIT(&poller->qpairs); 2528 2529 poller->cq = ibv_create_cq(device->context, DEFAULT_NVMF_RDMA_CQ_SIZE, poller, NULL, 0); 2530 if (!poller->cq) { 2531 SPDK_ERRLOG("Unable to create completion queue\n"); 2532 free(poller); 2533 goto err_exit; 2534 } 2535 poller->num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE; 2536 2537 TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link); 2538 } 2539 2540 pthread_mutex_unlock(&rtransport->lock); 2541 return &rgroup->group; 2542 2543 err_exit: 2544 TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tpoller) { 2545 TAILQ_REMOVE(&rgroup->pollers, poller, link); 2546 if (poller->cq) { 2547 ibv_destroy_cq(poller->cq); 2548 } 2549 free(poller); 2550 } 2551 2552 free(rgroup); 2553 pthread_mutex_unlock(&rtransport->lock); 2554 return NULL; 2555 } 2556 2557 static void 2558 spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 2559 { 2560 struct spdk_nvmf_rdma_poll_group *rgroup; 2561 struct spdk_nvmf_rdma_poller *poller, *tmp; 2562 struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair; 2563 2564 rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); 2565 2566 if (!rgroup) { 2567 return; 2568 } 2569 2570 TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) { 2571 TAILQ_REMOVE(&rgroup->pollers, poller, link); 2572 2573 if (poller->cq) { 2574 ibv_destroy_cq(poller->cq); 2575 } 2576 TAILQ_FOREACH_SAFE(qpair, &poller->qpairs, link, tmp_qpair) { 2577 spdk_nvmf_rdma_qpair_destroy(qpair); 2578 } 2579 2580 free(poller); 2581 } 2582 2583 if (!TAILQ_EMPTY(&rgroup->pending_data_buf_queue)) { 2584 SPDK_ERRLOG("Pending I/O list wasn't empty on poll group destruction\n"); 2585 } 2586 2587 free(rgroup); 2588 } 2589 2590 static void 2591 spdk_nvmf_rdma_qpair_reject_connection(struct spdk_nvmf_rdma_qpair *rqpair) 2592 { 2593 spdk_nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES); 2594 spdk_nvmf_rdma_qpair_destroy(rqpair); 2595 } 2596 2597 static int 2598 spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2599 struct spdk_nvmf_qpair *qpair) 2600 { 2601 struct spdk_nvmf_rdma_poll_group *rgroup; 2602 struct spdk_nvmf_rdma_qpair *rqpair; 2603 struct spdk_nvmf_rdma_device *device; 2604 struct spdk_nvmf_rdma_poller *poller; 2605 int rc; 2606 2607 rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); 2608 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2609 2610 device = rqpair->port->device; 2611 2612 TAILQ_FOREACH(poller, &rgroup->pollers, link) { 2613 if (poller->device == device) { 2614 break; 2615 } 2616 } 2617 2618 if (!poller) { 2619 SPDK_ERRLOG("No poller found for device.\n"); 2620 return -1; 2621 } 2622 2623 TAILQ_INSERT_TAIL(&poller->qpairs, rqpair, link); 2624 rqpair->poller = poller; 2625 2626 rc = spdk_nvmf_rdma_qpair_initialize(qpair); 2627 if (rc < 0) { 2628 SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair); 2629 return -1; 2630 } 2631 2632 rc = spdk_nvmf_rdma_event_accept(rqpair->cm_id, rqpair); 2633 if (rc) { 2634 /* Try to reject, but we probably can't */ 2635 spdk_nvmf_rdma_qpair_reject_connection(rqpair); 2636 return -1; 2637 } 2638 2639 spdk_nvmf_rdma_update_ibv_state(rqpair); 2640 2641 return 0; 2642 } 2643 2644 static int 2645 spdk_nvmf_rdma_request_free(struct spdk_nvmf_request *req) 2646 { 2647 struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req); 2648 struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport, 2649 struct spdk_nvmf_rdma_transport, transport); 2650 2651 nvmf_rdma_request_free(rdma_req, rtransport); 2652 return 0; 2653 } 2654 2655 static int 2656 spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req) 2657 { 2658 struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport, 2659 struct spdk_nvmf_rdma_transport, transport); 2660 struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, 2661 struct spdk_nvmf_rdma_request, req); 2662 struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, 2663 struct spdk_nvmf_rdma_qpair, qpair); 2664 2665 if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) { 2666 /* The connection is alive, so process the request as normal */ 2667 rdma_req->state = RDMA_REQUEST_STATE_EXECUTED; 2668 } else { 2669 /* The connection is dead. Move the request directly to the completed state. */ 2670 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 2671 } 2672 2673 spdk_nvmf_rdma_request_process(rtransport, rdma_req); 2674 2675 return 0; 2676 } 2677 2678 static int 2679 spdk_nvmf_rdma_destroy_defunct_qpair(void *ctx) 2680 { 2681 struct spdk_nvmf_rdma_qpair *rqpair = ctx; 2682 struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, 2683 struct spdk_nvmf_rdma_transport, transport); 2684 2685 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, true); 2686 spdk_nvmf_rdma_qpair_destroy(rqpair); 2687 2688 return 0; 2689 } 2690 2691 static void 2692 spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair) 2693 { 2694 struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2695 2696 if (rqpair->disconnect_flags & RDMA_QP_DISCONNECTING) { 2697 return; 2698 } 2699 2700 rqpair->disconnect_flags |= RDMA_QP_DISCONNECTING; 2701 2702 /* This happens only when the qpair is disconnected before 2703 * it is added to the poll group. Since there is no poll group, 2704 * the RDMA qp has not been initialized yet and the RDMA CM 2705 * event has not yet been acknowledged, so we need to reject it. 2706 */ 2707 if (rqpair->qpair.state == SPDK_NVMF_QPAIR_UNINITIALIZED) { 2708 spdk_nvmf_rdma_qpair_reject_connection(rqpair); 2709 return; 2710 } 2711 2712 if (rqpair->ibv_attr.qp_state != IBV_QPS_ERR) { 2713 spdk_nvmf_rdma_set_ibv_state(rqpair, IBV_QPS_ERR); 2714 } 2715 2716 rqpair->destruct_poller = spdk_poller_register(spdk_nvmf_rdma_destroy_defunct_qpair, (void *)rqpair, 2717 NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US); 2718 } 2719 2720 #ifdef DEBUG 2721 static int 2722 spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req) 2723 { 2724 return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST || 2725 rdma_req->state == RDMA_REQUEST_STATE_COMPLETING; 2726 } 2727 #endif 2728 2729 static int 2730 spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, 2731 struct spdk_nvmf_rdma_poller *rpoller) 2732 { 2733 struct ibv_wc wc[32]; 2734 struct spdk_nvmf_rdma_wr *rdma_wr; 2735 struct spdk_nvmf_rdma_request *rdma_req; 2736 struct spdk_nvmf_rdma_recv *rdma_recv; 2737 struct spdk_nvmf_rdma_qpair *rqpair; 2738 int reaped, i; 2739 int count = 0; 2740 bool error = false; 2741 2742 /* Poll for completing operations. */ 2743 reaped = ibv_poll_cq(rpoller->cq, 32, wc); 2744 if (reaped < 0) { 2745 SPDK_ERRLOG("Error polling CQ! (%d): %s\n", 2746 errno, spdk_strerror(errno)); 2747 return -1; 2748 } 2749 2750 for (i = 0; i < reaped; i++) { 2751 2752 rdma_wr = (struct spdk_nvmf_rdma_wr *)wc[i].wr_id; 2753 2754 /* Handle error conditions */ 2755 if (wc[i].status) { 2756 SPDK_DEBUGLOG(SPDK_LOG_RDMA, "CQ error on CQ %p, Request 0x%lu (%d): %s\n", 2757 rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status)); 2758 2759 error = true; 2760 2761 switch (rdma_wr->type) { 2762 case RDMA_WR_TYPE_SEND: 2763 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr); 2764 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 2765 2766 SPDK_ERRLOG("data=%p length=%u\n", rdma_req->req.data, rdma_req->req.length); 2767 /* We're going to attempt an error recovery, so force the request into 2768 * the completed state. */ 2769 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 2770 rqpair->current_send_depth--; 2771 2772 assert(rdma_req->num_outstanding_data_wr == 0); 2773 spdk_nvmf_rdma_request_process(rtransport, rdma_req); 2774 break; 2775 case RDMA_WR_TYPE_RECV: 2776 rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr); 2777 rqpair = rdma_recv->qpair; 2778 2779 /* Dump this into the incoming queue. This gets cleaned up when 2780 * the queue pair disconnects or recovers. */ 2781 STAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link); 2782 rqpair->current_recv_depth++; 2783 2784 /* Don't worry about responding to recv overflow, we are disconnecting anyways */ 2785 break; 2786 case RDMA_WR_TYPE_DATA: 2787 /* If the data transfer fails still force the queue into the error state, 2788 * if we were performing an RDMA_READ, we need to force the request into a 2789 * completed state since it wasn't linked to a send. However, in the RDMA_WRITE 2790 * case, we should wait for the SEND to complete. */ 2791 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr); 2792 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 2793 2794 SPDK_ERRLOG("data=%p length=%u\n", rdma_req->req.data, rdma_req->req.length); 2795 assert(rdma_req->num_outstanding_data_wr > 0); 2796 rdma_req->num_outstanding_data_wr--; 2797 if (rdma_req->data.wr.opcode == IBV_WR_RDMA_READ) { 2798 rqpair->current_read_depth--; 2799 if (rdma_req->num_outstanding_data_wr == 0) { 2800 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 2801 } 2802 } 2803 rqpair->current_send_depth--; 2804 break; 2805 default: 2806 SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode); 2807 continue; 2808 } 2809 2810 if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) { 2811 /* Disconnect the connection. */ 2812 spdk_nvmf_rdma_start_disconnect(rqpair); 2813 } else { 2814 spdk_nvmf_rdma_destroy_drained_qpair(rqpair, rtransport); 2815 } 2816 continue; 2817 } 2818 2819 switch (wc[i].opcode) { 2820 case IBV_WC_SEND: 2821 assert(rdma_wr->type == RDMA_WR_TYPE_SEND); 2822 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, rsp.rdma_wr); 2823 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 2824 2825 assert(spdk_nvmf_rdma_req_is_completing(rdma_req)); 2826 2827 rdma_req->state = RDMA_REQUEST_STATE_COMPLETED; 2828 rqpair->current_send_depth--; 2829 spdk_nvmf_rdma_request_process(rtransport, rdma_req); 2830 2831 count++; 2832 2833 assert(rdma_req->num_outstanding_data_wr == 0); 2834 /* Try to process other queued requests */ 2835 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, false); 2836 break; 2837 2838 case IBV_WC_RDMA_WRITE: 2839 assert(rdma_wr->type == RDMA_WR_TYPE_DATA); 2840 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr); 2841 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 2842 rqpair->current_send_depth--; 2843 rdma_req->num_outstanding_data_wr--; 2844 2845 /* Try to process other queued requests */ 2846 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, false); 2847 break; 2848 2849 case IBV_WC_RDMA_READ: 2850 assert(rdma_wr->type == RDMA_WR_TYPE_DATA); 2851 rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data.rdma_wr); 2852 rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair); 2853 rqpair->current_send_depth--; 2854 2855 assert(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER); 2856 /* wait for all outstanding reads associated with the same rdma_req to complete before proceeding. */ 2857 assert(rdma_req->num_outstanding_data_wr > 0); 2858 rqpair->current_read_depth--; 2859 rdma_req->num_outstanding_data_wr--; 2860 if (rdma_req->num_outstanding_data_wr == 0) { 2861 rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE; 2862 spdk_nvmf_rdma_request_process(rtransport, rdma_req); 2863 } 2864 2865 /* Try to process other queued requests */ 2866 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, false); 2867 break; 2868 2869 case IBV_WC_RECV: 2870 assert(rdma_wr->type == RDMA_WR_TYPE_RECV); 2871 rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr); 2872 rqpair = rdma_recv->qpair; 2873 /* The qpair should not send more requests than are allowed per qpair. */ 2874 if (rqpair->current_recv_depth >= rqpair->max_queue_depth) { 2875 spdk_nvmf_rdma_start_disconnect(rqpair); 2876 } else { 2877 rqpair->current_recv_depth++; 2878 } 2879 STAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link); 2880 /* Try to process other queued requests */ 2881 spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, false); 2882 break; 2883 2884 default: 2885 SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode); 2886 continue; 2887 } 2888 2889 if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) { 2890 spdk_nvmf_rdma_destroy_drained_qpair(rqpair, rtransport); 2891 } 2892 } 2893 2894 if (error == true) { 2895 return -1; 2896 } 2897 2898 return count; 2899 } 2900 2901 static int 2902 spdk_nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2903 { 2904 struct spdk_nvmf_rdma_transport *rtransport; 2905 struct spdk_nvmf_rdma_poll_group *rgroup; 2906 struct spdk_nvmf_rdma_poller *rpoller; 2907 int count, rc; 2908 2909 rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport); 2910 rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group); 2911 2912 count = 0; 2913 TAILQ_FOREACH(rpoller, &rgroup->pollers, link) { 2914 rc = spdk_nvmf_rdma_poller_poll(rtransport, rpoller); 2915 if (rc < 0) { 2916 return rc; 2917 } 2918 count += rc; 2919 } 2920 2921 return count; 2922 } 2923 2924 static int 2925 spdk_nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id, 2926 struct spdk_nvme_transport_id *trid, 2927 bool peer) 2928 { 2929 struct sockaddr *saddr; 2930 uint16_t port; 2931 2932 trid->trtype = SPDK_NVME_TRANSPORT_RDMA; 2933 2934 if (peer) { 2935 saddr = rdma_get_peer_addr(id); 2936 } else { 2937 saddr = rdma_get_local_addr(id); 2938 } 2939 switch (saddr->sa_family) { 2940 case AF_INET: { 2941 struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr; 2942 2943 trid->adrfam = SPDK_NVMF_ADRFAM_IPV4; 2944 inet_ntop(AF_INET, &saddr_in->sin_addr, 2945 trid->traddr, sizeof(trid->traddr)); 2946 if (peer) { 2947 port = ntohs(rdma_get_dst_port(id)); 2948 } else { 2949 port = ntohs(rdma_get_src_port(id)); 2950 } 2951 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port); 2952 break; 2953 } 2954 case AF_INET6: { 2955 struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr; 2956 trid->adrfam = SPDK_NVMF_ADRFAM_IPV6; 2957 inet_ntop(AF_INET6, &saddr_in->sin6_addr, 2958 trid->traddr, sizeof(trid->traddr)); 2959 if (peer) { 2960 port = ntohs(rdma_get_dst_port(id)); 2961 } else { 2962 port = ntohs(rdma_get_src_port(id)); 2963 } 2964 snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port); 2965 break; 2966 } 2967 default: 2968 return -1; 2969 2970 } 2971 2972 return 0; 2973 } 2974 2975 static int 2976 spdk_nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2977 struct spdk_nvme_transport_id *trid) 2978 { 2979 struct spdk_nvmf_rdma_qpair *rqpair; 2980 2981 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2982 2983 return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true); 2984 } 2985 2986 static int 2987 spdk_nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2988 struct spdk_nvme_transport_id *trid) 2989 { 2990 struct spdk_nvmf_rdma_qpair *rqpair; 2991 2992 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 2993 2994 return spdk_nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false); 2995 } 2996 2997 static int 2998 spdk_nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2999 struct spdk_nvme_transport_id *trid) 3000 { 3001 struct spdk_nvmf_rdma_qpair *rqpair; 3002 3003 rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); 3004 3005 return spdk_nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false); 3006 } 3007 3008 void 3009 spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks) 3010 { 3011 g_nvmf_hooks = *hooks; 3012 } 3013 3014 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = { 3015 .type = SPDK_NVME_TRANSPORT_RDMA, 3016 .opts_init = spdk_nvmf_rdma_opts_init, 3017 .create = spdk_nvmf_rdma_create, 3018 .destroy = spdk_nvmf_rdma_destroy, 3019 3020 .listen = spdk_nvmf_rdma_listen, 3021 .stop_listen = spdk_nvmf_rdma_stop_listen, 3022 .accept = spdk_nvmf_rdma_accept, 3023 3024 .listener_discover = spdk_nvmf_rdma_discover, 3025 3026 .poll_group_create = spdk_nvmf_rdma_poll_group_create, 3027 .poll_group_destroy = spdk_nvmf_rdma_poll_group_destroy, 3028 .poll_group_add = spdk_nvmf_rdma_poll_group_add, 3029 .poll_group_poll = spdk_nvmf_rdma_poll_group_poll, 3030 3031 .req_free = spdk_nvmf_rdma_request_free, 3032 .req_complete = spdk_nvmf_rdma_request_complete, 3033 3034 .qpair_fini = spdk_nvmf_rdma_close_qpair, 3035 .qpair_is_idle = spdk_nvmf_rdma_qpair_is_idle, 3036 .qpair_get_peer_trid = spdk_nvmf_rdma_qpair_get_peer_trid, 3037 .qpair_get_local_trid = spdk_nvmf_rdma_qpair_get_local_trid, 3038 .qpair_get_listen_trid = spdk_nvmf_rdma_qpair_get_listen_trid, 3039 3040 }; 3041 3042 SPDK_LOG_REGISTER_COMPONENT("rdma", SPDK_LOG_RDMA) 3043