1 /*- 2 * BSD LICENSE 3 * Copyright (c) Intel Corporation. All rights reserved. 4 * Copyright (c) 2019, Nutanix Inc. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * NVMe over vfio-user transport 35 */ 36 37 #include <vfio-user/libvfio-user.h> 38 #include <vfio-user/pci_defs.h> 39 40 #include "spdk/barrier.h" 41 #include "spdk/stdinc.h" 42 #include "spdk/assert.h" 43 #include "spdk/thread.h" 44 #include "spdk/nvmf_transport.h" 45 #include "spdk/sock.h" 46 #include "spdk/string.h" 47 #include "spdk/util.h" 48 #include "spdk/log.h" 49 50 #include "transport.h" 51 52 #include "nvmf_internal.h" 53 54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256 55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32 56 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64 57 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB) 58 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 59 60 #define NVMF_VFIO_USER_DOORBELLS_OFFSET 0x1000 61 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000 62 63 #define NVME_REG_CFG_SIZE 0x1000 64 #define NVME_REG_BAR0_SIZE 0x4000 65 #define NVME_IRQ_INTX_NUM 1 66 #define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 67 68 struct nvmf_vfio_user_req; 69 struct nvmf_vfio_user_qpair; 70 71 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg); 72 73 /* 1 more for PRP2 list itself */ 74 #define NVMF_VFIO_USER_MAX_IOVECS (NVMF_REQ_MAX_BUFFERS + 1) 75 76 enum nvmf_vfio_user_req_state { 77 VFIO_USER_REQUEST_STATE_FREE = 0, 78 VFIO_USER_REQUEST_STATE_EXECUTING, 79 }; 80 81 struct nvmf_vfio_user_req { 82 struct spdk_nvmf_request req; 83 struct spdk_nvme_cpl rsp; 84 struct spdk_nvme_cmd cmd; 85 86 enum nvmf_vfio_user_req_state state; 87 nvmf_vfio_user_req_cb_fn cb_fn; 88 void *cb_arg; 89 90 /* old CC before prop_set_cc fabric command */ 91 union spdk_nvme_cc_register cc; 92 93 /* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */ 94 dma_sg_t *sg; 95 struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS]; 96 uint8_t iovcnt; 97 98 TAILQ_ENTRY(nvmf_vfio_user_req) link; 99 }; 100 101 /* 102 * A NVMe queue. 103 */ 104 struct nvme_q { 105 bool is_cq; 106 107 void *addr; 108 109 dma_sg_t *sg; 110 struct iovec iov; 111 112 uint32_t size; 113 uint64_t prp1; 114 115 union { 116 struct { 117 uint32_t head; 118 /* multiple SQs can be mapped to the same CQ */ 119 uint16_t cqid; 120 }; 121 struct { 122 uint32_t tail; 123 uint16_t iv; 124 bool ien; 125 bool phase; 126 }; 127 }; 128 }; 129 130 enum nvmf_vfio_user_qpair_state { 131 VFIO_USER_QPAIR_UNINITIALIZED = 0, 132 VFIO_USER_QPAIR_ACTIVE, 133 VFIO_USER_QPAIR_SQ_DELETED, 134 VFIO_USER_QPAIR_INACTIVE, 135 VFIO_USER_QPAIR_ERROR, 136 }; 137 138 struct nvmf_vfio_user_qpair { 139 struct spdk_nvmf_qpair qpair; 140 struct spdk_nvmf_transport_poll_group *group; 141 struct nvmf_vfio_user_ctrlr *ctrlr; 142 struct nvmf_vfio_user_req *reqs_internal; 143 uint32_t qsize; 144 struct nvme_q cq; 145 struct nvme_q sq; 146 enum nvmf_vfio_user_qpair_state state; 147 148 /* Copy of Create IO SQ command */ 149 struct spdk_nvme_cmd create_io_sq_cmd; 150 151 TAILQ_HEAD(, nvmf_vfio_user_req) reqs; 152 /* Poll group entry */ 153 TAILQ_ENTRY(nvmf_vfio_user_qpair) link; 154 /* Connected queue pair entry */ 155 TAILQ_ENTRY(nvmf_vfio_user_qpair) tailq; 156 }; 157 158 struct nvmf_vfio_user_poll_group { 159 struct spdk_nvmf_transport_poll_group group; 160 TAILQ_HEAD(, nvmf_vfio_user_qpair) qps; 161 }; 162 163 struct nvmf_vfio_user_ctrlr { 164 struct nvmf_vfio_user_endpoint *endpoint; 165 struct nvmf_vfio_user_transport *transport; 166 167 /* Connected queue pairs list */ 168 TAILQ_HEAD(, nvmf_vfio_user_qpair) connected_qps; 169 170 struct spdk_thread *thread; 171 struct spdk_poller *vfu_ctx_poller; 172 173 uint16_t cntlid; 174 175 struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR]; 176 177 TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link; 178 179 volatile uint32_t *doorbells; 180 181 /* internal CSTS.CFS register for vfio-user fatal errors */ 182 uint32_t cfs : 1; 183 }; 184 185 struct nvmf_vfio_user_endpoint { 186 vfu_ctx_t *vfu_ctx; 187 struct msixcap *msix; 188 vfu_pci_config_space_t *pci_config_space; 189 int devmem_fd; 190 volatile uint32_t *doorbells; 191 192 struct spdk_nvme_transport_id trid; 193 const struct spdk_nvmf_subsystem *subsystem; 194 195 struct nvmf_vfio_user_ctrlr *ctrlr; 196 pthread_mutex_t lock; 197 198 TAILQ_ENTRY(nvmf_vfio_user_endpoint) link; 199 }; 200 201 struct nvmf_vfio_user_transport_opts { 202 bool disable_mappable_bar0; 203 }; 204 205 struct nvmf_vfio_user_transport { 206 struct spdk_nvmf_transport transport; 207 struct nvmf_vfio_user_transport_opts transport_opts; 208 pthread_mutex_t lock; 209 TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints; 210 }; 211 212 /* 213 * function prototypes 214 */ 215 static volatile uint32_t * 216 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 217 218 static volatile uint32_t * 219 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 220 221 static int 222 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req); 223 224 static struct nvmf_vfio_user_req * 225 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair); 226 227 static int 228 nvme_cmd_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, 229 uint32_t max_iovcnt, uint32_t len, size_t mps, 230 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 231 { 232 uint64_t prp1, prp2; 233 void *vva; 234 uint32_t i; 235 uint32_t residue_len, nents; 236 uint64_t *prp_list; 237 uint32_t iovcnt; 238 239 assert(max_iovcnt > 0); 240 241 prp1 = cmd->dptr.prp.prp1; 242 prp2 = cmd->dptr.prp.prp2; 243 244 /* PRP1 may started with unaligned page address */ 245 residue_len = mps - (prp1 % mps); 246 residue_len = spdk_min(len, residue_len); 247 248 vva = gpa_to_vva(prv, prp1, residue_len, PROT_READ | PROT_WRITE); 249 if (spdk_unlikely(vva == NULL)) { 250 SPDK_ERRLOG("GPA to VVA failed\n"); 251 return -EINVAL; 252 } 253 len -= residue_len; 254 if (len && max_iovcnt < 2) { 255 SPDK_ERRLOG("Too many page entries, at least two iovs are required\n"); 256 return -ERANGE; 257 } 258 iovs[0].iov_base = vva; 259 iovs[0].iov_len = residue_len; 260 261 if (len) { 262 if (spdk_unlikely(prp2 == 0)) { 263 SPDK_ERRLOG("no PRP2, %d remaining\n", len); 264 return -EINVAL; 265 } 266 267 if (len <= mps) { 268 /* 2 PRP used */ 269 iovcnt = 2; 270 vva = gpa_to_vva(prv, prp2, len, PROT_READ | PROT_WRITE); 271 if (spdk_unlikely(vva == NULL)) { 272 SPDK_ERRLOG("no VVA for %#" PRIx64 ", len%#x\n", 273 prp2, len); 274 return -EINVAL; 275 } 276 iovs[1].iov_base = vva; 277 iovs[1].iov_len = len; 278 } else { 279 /* PRP list used */ 280 nents = (len + mps - 1) / mps; 281 if (spdk_unlikely(nents + 1 > max_iovcnt)) { 282 SPDK_ERRLOG("Too many page entries\n"); 283 return -ERANGE; 284 } 285 286 vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list), PROT_READ); 287 if (spdk_unlikely(vva == NULL)) { 288 SPDK_ERRLOG("no VVA for %#" PRIx64 ", nents=%#x\n", 289 prp2, nents); 290 return -EINVAL; 291 } 292 prp_list = vva; 293 i = 0; 294 while (len != 0) { 295 residue_len = spdk_min(len, mps); 296 vva = gpa_to_vva(prv, prp_list[i], residue_len, PROT_READ | PROT_WRITE); 297 if (spdk_unlikely(vva == NULL)) { 298 SPDK_ERRLOG("no VVA for %#" PRIx64 ", residue_len=%#x\n", 299 prp_list[i], residue_len); 300 return -EINVAL; 301 } 302 iovs[i + 1].iov_base = vva; 303 iovs[i + 1].iov_len = residue_len; 304 len -= residue_len; 305 i++; 306 } 307 iovcnt = i + 1; 308 } 309 } else { 310 /* 1 PRP used */ 311 iovcnt = 1; 312 } 313 314 assert(iovcnt <= max_iovcnt); 315 return iovcnt; 316 } 317 318 static int 319 nvme_cmd_map_sgls_data(void *prv, struct spdk_nvme_sgl_descriptor *sgls, uint32_t num_sgls, 320 struct iovec *iovs, uint32_t max_iovcnt, 321 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 322 { 323 uint32_t i; 324 void *vva; 325 326 if (spdk_unlikely(max_iovcnt < num_sgls)) { 327 return -ERANGE; 328 } 329 330 for (i = 0; i < num_sgls; i++) { 331 if (spdk_unlikely(sgls[i].unkeyed.type != SPDK_NVME_SGL_TYPE_DATA_BLOCK)) { 332 SPDK_ERRLOG("Invalid SGL type %u\n", sgls[i].unkeyed.type); 333 return -EINVAL; 334 } 335 vva = gpa_to_vva(prv, sgls[i].address, sgls[i].unkeyed.length, PROT_READ | PROT_WRITE); 336 if (spdk_unlikely(vva == NULL)) { 337 SPDK_ERRLOG("GPA to VVA failed\n"); 338 return -EINVAL; 339 } 340 iovs[i].iov_base = vva; 341 iovs[i].iov_len = sgls[i].unkeyed.length; 342 } 343 344 return num_sgls; 345 } 346 347 static int 348 nvme_cmd_map_sgls(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 349 uint32_t len, size_t mps, 350 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 351 { 352 struct spdk_nvme_sgl_descriptor *sgl, *last_sgl; 353 uint32_t num_sgls, seg_len; 354 void *vva; 355 int ret; 356 uint32_t total_iovcnt = 0; 357 358 /* SGL cases */ 359 sgl = &cmd->dptr.sgl1; 360 361 /* only one SGL segment */ 362 if (sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 363 assert(max_iovcnt > 0); 364 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ | PROT_WRITE); 365 if (spdk_unlikely(vva == NULL)) { 366 SPDK_ERRLOG("GPA to VVA failed\n"); 367 return -EINVAL; 368 } 369 iovs[0].iov_base = vva; 370 iovs[0].iov_len = sgl->unkeyed.length; 371 assert(sgl->unkeyed.length == len); 372 373 return 1; 374 } 375 376 for (;;) { 377 if (spdk_unlikely((sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_SEGMENT) && 378 (sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_LAST_SEGMENT))) { 379 SPDK_ERRLOG("Invalid SGL type %u\n", sgl->unkeyed.type); 380 return -EINVAL; 381 } 382 383 seg_len = sgl->unkeyed.length; 384 if (spdk_unlikely(seg_len % sizeof(struct spdk_nvme_sgl_descriptor))) { 385 SPDK_ERRLOG("Invalid SGL segment len %u\n", seg_len); 386 return -EINVAL; 387 } 388 389 num_sgls = seg_len / sizeof(struct spdk_nvme_sgl_descriptor); 390 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ); 391 if (spdk_unlikely(vva == NULL)) { 392 SPDK_ERRLOG("GPA to VVA failed\n"); 393 return -EINVAL; 394 } 395 396 /* sgl point to the first segment */ 397 sgl = (struct spdk_nvme_sgl_descriptor *)vva; 398 last_sgl = &sgl[num_sgls - 1]; 399 400 /* we are done */ 401 if (last_sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 402 /* map whole sgl list */ 403 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls, &iovs[total_iovcnt], 404 max_iovcnt - total_iovcnt, gpa_to_vva); 405 if (spdk_unlikely(ret < 0)) { 406 return ret; 407 } 408 total_iovcnt += ret; 409 410 return total_iovcnt; 411 } 412 413 if (num_sgls > 1) { 414 /* map whole sgl exclude last_sgl */ 415 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls - 1, &iovs[total_iovcnt], 416 max_iovcnt - total_iovcnt, gpa_to_vva); 417 if (spdk_unlikely(ret < 0)) { 418 return ret; 419 } 420 total_iovcnt += ret; 421 } 422 423 /* move to next level's segments */ 424 sgl = last_sgl; 425 } 426 427 return 0; 428 } 429 430 static int 431 nvme_map_cmd(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 432 uint32_t len, size_t mps, 433 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 434 { 435 if (cmd->psdt == SPDK_NVME_PSDT_PRP) { 436 return nvme_cmd_map_prps(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 437 } 438 439 return nvme_cmd_map_sgls(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 440 } 441 442 static char * 443 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint) 444 { 445 return endpoint->trid.traddr; 446 } 447 448 static char * 449 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr) 450 { 451 if (!ctrlr || !ctrlr->endpoint) { 452 return "Null Ctrlr"; 453 } 454 455 return endpoint_id(ctrlr->endpoint); 456 } 457 458 static inline uint16_t 459 io_q_id(struct nvme_q *q) 460 { 461 462 struct nvmf_vfio_user_qpair *vu_qpair; 463 464 assert(q); 465 466 if (q->is_cq) { 467 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq); 468 } else { 469 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq); 470 } 471 assert(vu_qpair); 472 return vu_qpair->qpair.qid; 473 } 474 475 static void 476 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 477 { 478 assert(ctrlr != NULL); 479 480 if (ctrlr->cfs == 0) { 481 SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr)); 482 } 483 484 ctrlr->cfs = 1U; 485 } 486 487 static inline bool 488 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *vu_ctrlr) 489 { 490 assert(vu_ctrlr != NULL); 491 assert(vu_ctrlr->endpoint != NULL); 492 493 vfu_pci_config_space_t *pci = vu_ctrlr->endpoint->pci_config_space; 494 495 return (!pci->hdr.cmd.id || vu_ctrlr->endpoint->msix->mxc.mxe); 496 } 497 498 static void 499 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint) 500 { 501 if (endpoint->doorbells) { 502 munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE); 503 } 504 505 if (endpoint->devmem_fd > 0) { 506 close(endpoint->devmem_fd); 507 } 508 509 vfu_destroy_ctx(endpoint->vfu_ctx); 510 511 pthread_mutex_destroy(&endpoint->lock); 512 free(endpoint); 513 } 514 515 /* called when process exits */ 516 static int 517 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport, 518 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 519 { 520 struct nvmf_vfio_user_transport *vu_transport; 521 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 522 523 SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n"); 524 525 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 526 transport); 527 528 (void)pthread_mutex_destroy(&vu_transport->lock); 529 530 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 531 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 532 nvmf_vfio_user_destroy_endpoint(endpoint); 533 } 534 535 free(vu_transport); 536 537 if (cb_fn) { 538 cb_fn(cb_arg); 539 } 540 541 return 0; 542 } 543 544 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = { 545 { 546 "disable_mappable_bar0", 547 offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0), 548 spdk_json_decode_bool, true 549 }, 550 }; 551 552 static struct spdk_nvmf_transport * 553 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts) 554 { 555 struct nvmf_vfio_user_transport *vu_transport; 556 int err; 557 558 vu_transport = calloc(1, sizeof(*vu_transport)); 559 if (vu_transport == NULL) { 560 SPDK_ERRLOG("Transport alloc fail: %m\n"); 561 return NULL; 562 } 563 564 err = pthread_mutex_init(&vu_transport->lock, NULL); 565 if (err != 0) { 566 SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err); 567 goto err; 568 } 569 570 TAILQ_INIT(&vu_transport->endpoints); 571 572 if (opts->transport_specific != NULL && 573 spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder, 574 SPDK_COUNTOF(vfio_user_transport_opts_decoder), 575 vu_transport)) { 576 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 577 free(vu_transport); 578 return NULL; 579 } 580 581 SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n", 582 vu_transport->transport_opts.disable_mappable_bar0); 583 584 return &vu_transport->transport; 585 586 err: 587 free(vu_transport); 588 589 return NULL; 590 } 591 592 static uint32_t 593 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr) 594 { 595 assert(ctrlr != NULL); 596 assert(ctrlr->qp[0] != NULL); 597 assert(ctrlr->qp[0]->qpair.ctrlr != NULL); 598 599 return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1; 600 } 601 602 static void * 603 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov, int prot) 604 { 605 int ret; 606 607 assert(ctx != NULL); 608 assert(sg != NULL); 609 assert(iov != NULL); 610 611 ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, prot); 612 if (ret < 0) { 613 return NULL; 614 } 615 616 ret = vfu_map_sg(ctx, sg, iov, 1, 0); 617 if (ret != 0) { 618 return NULL; 619 } 620 621 assert(iov->iov_base != NULL); 622 return iov->iov_base; 623 } 624 625 static inline uint32_t 626 sq_head(struct nvmf_vfio_user_qpair *qpair) 627 { 628 assert(qpair != NULL); 629 return qpair->sq.head; 630 } 631 632 static inline void 633 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair) 634 { 635 assert(ctrlr != NULL); 636 assert(qpair != NULL); 637 qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size; 638 } 639 640 static int 641 map_q(struct nvmf_vfio_user_ctrlr *vu_ctrlr, struct nvme_q *q, bool is_cq, bool unmap) 642 { 643 uint64_t len; 644 645 assert(q->size); 646 assert(q->addr == NULL); 647 648 if (is_cq) { 649 len = q->size * sizeof(struct spdk_nvme_cpl); 650 } else { 651 len = q->size * sizeof(struct spdk_nvme_cmd); 652 } 653 654 q->addr = map_one(vu_ctrlr->endpoint->vfu_ctx, q->prp1, len, q->sg, 655 &q->iov, is_cq ? PROT_READ | PROT_WRITE : PROT_READ); 656 if (q->addr == NULL) { 657 return -EFAULT; 658 } 659 660 if (unmap) { 661 memset(q->addr, 0, len); 662 } 663 664 return 0; 665 } 666 667 static int 668 asq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 669 { 670 struct nvme_q *sq; 671 const struct spdk_nvmf_registers *regs; 672 int ret; 673 674 assert(ctrlr != NULL); 675 assert(ctrlr->qp[0] != NULL); 676 assert(ctrlr->qp[0]->sq.addr == NULL); 677 /* XXX ctrlr->asq == 0 is a valid memory address */ 678 679 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 680 sq = &ctrlr->qp[0]->sq; 681 sq->size = regs->aqa.bits.asqs + 1; 682 sq->prp1 = regs->asq; 683 sq->head = 0; 684 sq->cqid = 0; 685 sq->is_cq = false; 686 687 ret = map_q(ctrlr, sq, false, true); 688 if (ret) { 689 return ret; 690 } 691 692 *tdbl(ctrlr, sq) = 0; 693 694 return 0; 695 } 696 697 static inline int 698 queue_index(uint16_t qid, int is_cq) 699 { 700 return (qid * 2) + is_cq; 701 } 702 703 static volatile uint32_t * 704 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 705 { 706 assert(ctrlr != NULL); 707 assert(q != NULL); 708 assert(!q->is_cq); 709 710 return &ctrlr->doorbells[queue_index(io_q_id(q), false)]; 711 } 712 713 static volatile uint32_t * 714 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 715 { 716 assert(ctrlr != NULL); 717 assert(q != NULL); 718 assert(q->is_cq); 719 720 return &ctrlr->doorbells[queue_index(io_q_id(q), true)]; 721 } 722 723 static inline bool 724 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 725 { 726 assert(ctrlr != NULL); 727 assert(q != NULL); 728 assert(q->is_cq); 729 730 return ((q->tail + 1) % q->size) == *hdbl(ctrlr, q); 731 } 732 733 static inline void 734 cq_tail_advance(struct nvme_q *q) 735 { 736 assert(q != NULL); 737 assert(q->is_cq); 738 739 assert(q->tail < q->size); 740 q->tail++; 741 742 if (spdk_unlikely(q->tail == q->size)) { 743 q->tail = 0; 744 q->phase = !q->phase; 745 } 746 } 747 748 static int 749 acq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 750 { 751 struct nvme_q *cq; 752 const struct spdk_nvmf_registers *regs; 753 int ret; 754 755 assert(ctrlr != NULL); 756 assert(ctrlr->qp[0] != NULL); 757 assert(ctrlr->qp[0]->cq.addr == NULL); 758 759 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 760 assert(regs != NULL); 761 cq = &ctrlr->qp[0]->cq; 762 cq->size = regs->aqa.bits.acqs + 1; 763 cq->prp1 = regs->acq; 764 cq->tail = 0; 765 cq->is_cq = true; 766 cq->ien = true; 767 cq->phase = true; 768 769 ret = map_q(ctrlr, cq, true, true); 770 if (ret) { 771 return ret; 772 } 773 *hdbl(ctrlr, cq) = 0; 774 775 return 0; 776 } 777 778 static inline dma_sg_t * 779 vu_req_to_sg_t(struct nvmf_vfio_user_req *vu_req, uint32_t iovcnt) 780 { 781 return (dma_sg_t *)((uintptr_t)vu_req->sg + iovcnt * dma_sg_size()); 782 } 783 784 static void * 785 _map_one(void *prv, uint64_t addr, uint64_t len, int prot) 786 { 787 struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv; 788 struct spdk_nvmf_qpair *qpair; 789 struct nvmf_vfio_user_req *vu_req; 790 struct nvmf_vfio_user_qpair *vu_qpair; 791 void *ret; 792 793 assert(req != NULL); 794 qpair = req->qpair; 795 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 796 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 797 798 assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS); 799 ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len, 800 vu_req_to_sg_t(vu_req, vu_req->iovcnt), 801 &vu_req->iov[vu_req->iovcnt], prot); 802 if (spdk_likely(ret != NULL)) { 803 vu_req->iovcnt++; 804 } 805 return ret; 806 } 807 808 static int 809 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req, 810 struct iovec *iov, uint32_t length) 811 { 812 /* Map PRP list to from Guest physical memory to 813 * virtual memory address. 814 */ 815 return nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS, 816 length, 4096, _map_one); 817 } 818 819 static int 820 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 821 struct nvmf_vfio_user_qpair *vu_qpair); 822 823 /* 824 * Posts a CQE in the completion queue. 825 * 826 * @ctrlr: the vfio-user controller 827 * @cq: the completion queue 828 * @cdw0: cdw0 as reported by NVMf 829 * @sqid: submission queue ID 830 * @cid: command identifier in NVMe command 831 * @sc: the NVMe CQE status code 832 * @sct: the NVMe CQE status code type 833 */ 834 static int 835 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *cq, 836 uint32_t cdw0, uint16_t sqid, uint16_t cid, uint16_t sc, uint16_t sct) 837 { 838 struct spdk_nvme_cpl *cpl; 839 const struct spdk_nvmf_registers *regs; 840 int err; 841 842 assert(ctrlr != NULL); 843 844 if (spdk_unlikely(cq == NULL || cq->addr == NULL)) { 845 return 0; 846 } 847 848 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 849 if (regs->csts.bits.shst != SPDK_NVME_SHST_NORMAL) { 850 SPDK_DEBUGLOG(nvmf_vfio, 851 "%s: ignore completion SQ%d cid=%d status=%#x\n", 852 ctrlr_id(ctrlr), sqid, cid, sc); 853 return 0; 854 } 855 856 if (cq_is_full(ctrlr, cq)) { 857 SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n", 858 ctrlr_id(ctrlr), io_q_id(cq), cq->tail, *hdbl(ctrlr, cq)); 859 return -1; 860 } 861 862 cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail; 863 864 assert(ctrlr->qp[sqid] != NULL); 865 SPDK_DEBUGLOG(nvmf_vfio, 866 "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n", 867 ctrlr_id(ctrlr), sqid, cid, sc, sq_head(ctrlr->qp[sqid]), 868 cq->tail); 869 870 cpl->sqhd = sq_head(ctrlr->qp[sqid]); 871 cpl->sqid = sqid; 872 cpl->cid = cid; 873 cpl->cdw0 = cdw0; 874 cpl->status.dnr = 0x0; 875 cpl->status.m = 0x0; 876 cpl->status.sct = sct; 877 cpl->status.p = cq->phase; 878 cpl->status.sc = sc; 879 880 cq_tail_advance(cq); 881 882 /* 883 * this function now executes at SPDK thread context, we 884 * might be triggerring interrupts from vfio-user thread context so 885 * check for race conditions. 886 */ 887 if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) { 888 err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv); 889 if (err != 0) { 890 SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n", 891 ctrlr_id(ctrlr)); 892 return err; 893 } 894 } 895 896 return 0; 897 } 898 899 static bool 900 io_q_exists(struct nvmf_vfio_user_ctrlr *vu_ctrlr, const uint16_t qid, const bool is_cq) 901 { 902 assert(vu_ctrlr != NULL); 903 904 if (qid == 0 || qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 905 return false; 906 } 907 908 if (vu_ctrlr->qp[qid] == NULL) { 909 return false; 910 } 911 912 if (!is_cq) { 913 if (vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_SQ_DELETED || 914 vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_UNINITIALIZED) { 915 return false; 916 } 917 } 918 919 return true; 920 } 921 922 static void 923 unmap_qp(struct nvmf_vfio_user_qpair *qp) 924 { 925 struct nvmf_vfio_user_ctrlr *ctrlr; 926 927 if (qp->ctrlr == NULL) { 928 return; 929 } 930 ctrlr = qp->ctrlr; 931 932 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap QP%d\n", 933 ctrlr_id(ctrlr), qp->qpair.qid); 934 935 if (qp->sq.addr != NULL) { 936 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->sq.sg, &qp->sq.iov, 1); 937 qp->sq.addr = NULL; 938 } 939 940 if (qp->cq.addr != NULL) { 941 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->cq.sg, &qp->cq.iov, 1); 942 qp->cq.addr = NULL; 943 } 944 } 945 946 static int 947 remap_qp(struct nvmf_vfio_user_qpair *vu_qpair) 948 { 949 struct nvme_q *sq, *cq; 950 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 951 int ret; 952 953 vu_ctrlr = vu_qpair->ctrlr; 954 sq = &vu_qpair->sq; 955 cq = &vu_qpair->cq; 956 957 if (sq->size) { 958 ret = map_q(vu_ctrlr, sq, false, false); 959 if (ret) { 960 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n", 961 io_q_id(sq), sq->prp1, sq->prp1 + sq->size * sizeof(struct spdk_nvme_cmd)); 962 return -EFAULT; 963 } 964 } 965 966 if (cq->size) { 967 ret = map_q(vu_ctrlr, cq, true, false); 968 if (ret) { 969 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n", 970 io_q_id(cq), cq->prp1, cq->prp1 + cq->size * sizeof(struct spdk_nvme_cpl)); 971 return -EFAULT; 972 } 973 974 } 975 976 return 0; 977 } 978 979 static void 980 free_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid) 981 { 982 struct nvmf_vfio_user_qpair *qpair; 983 struct nvmf_vfio_user_req *vu_req; 984 uint32_t i; 985 986 if (ctrlr == NULL) { 987 return; 988 } 989 990 qpair = ctrlr->qp[qid]; 991 if (qpair == NULL) { 992 return; 993 } 994 995 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr), 996 qid, qpair); 997 998 unmap_qp(qpair); 999 1000 for (i = 0; i < qpair->qsize; i++) { 1001 vu_req = &qpair->reqs_internal[i]; 1002 free(vu_req->sg); 1003 } 1004 free(qpair->reqs_internal); 1005 1006 free(qpair->sq.sg); 1007 free(qpair->cq.sg); 1008 free(qpair); 1009 1010 ctrlr->qp[qid] = NULL; 1011 } 1012 1013 /* This function can only fail because of memory allocation errors. */ 1014 static int 1015 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport, 1016 const uint32_t qsize, const uint16_t id) 1017 { 1018 uint32_t i; 1019 struct nvmf_vfio_user_qpair *qpair; 1020 struct nvmf_vfio_user_req *vu_req, *tmp; 1021 struct spdk_nvmf_request *req; 1022 1023 assert(ctrlr != NULL); 1024 assert(transport != NULL); 1025 1026 qpair = calloc(1, sizeof(*qpair)); 1027 if (qpair == NULL) { 1028 return -ENOMEM; 1029 } 1030 qpair->sq.sg = calloc(1, dma_sg_size()); 1031 if (qpair->sq.sg == NULL) { 1032 free(qpair); 1033 return -ENOMEM; 1034 } 1035 qpair->cq.sg = calloc(1, dma_sg_size()); 1036 if (qpair->cq.sg == NULL) { 1037 free(qpair->sq.sg); 1038 free(qpair); 1039 return -ENOMEM; 1040 } 1041 1042 qpair->qpair.qid = id; 1043 qpair->qpair.transport = transport; 1044 qpair->ctrlr = ctrlr; 1045 qpair->qsize = qsize; 1046 1047 TAILQ_INIT(&qpair->reqs); 1048 1049 qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req)); 1050 if (qpair->reqs_internal == NULL) { 1051 SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr)); 1052 goto reqs_err; 1053 } 1054 1055 for (i = 0; i < qsize; i++) { 1056 vu_req = &qpair->reqs_internal[i]; 1057 vu_req->sg = calloc(NVMF_VFIO_USER_MAX_IOVECS, dma_sg_size()); 1058 if (vu_req->sg == NULL) { 1059 goto sg_err; 1060 } 1061 1062 req = &vu_req->req; 1063 req->qpair = &qpair->qpair; 1064 req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp; 1065 req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd; 1066 1067 TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link); 1068 } 1069 1070 ctrlr->qp[id] = qpair; 1071 return 0; 1072 1073 sg_err: 1074 TAILQ_FOREACH_SAFE(vu_req, &qpair->reqs, link, tmp) { 1075 free(vu_req->sg); 1076 } 1077 free(qpair->reqs_internal); 1078 1079 reqs_err: 1080 free(qpair->sq.sg); 1081 free(qpair->cq.sg); 1082 free(qpair); 1083 return -ENOMEM; 1084 } 1085 1086 /* 1087 * Creates a completion or submission I/O queue. Returns 0 on success, -errno 1088 * on error. 1089 */ 1090 static int 1091 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1092 struct spdk_nvme_cmd *cmd, const bool is_cq) 1093 { 1094 uint16_t qid; 1095 uint32_t qsize; 1096 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1097 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1098 int err = 0; 1099 struct nvmf_vfio_user_qpair *vu_qpair; 1100 struct nvme_q *io_q; 1101 1102 assert(ctrlr != NULL); 1103 assert(cmd != NULL); 1104 1105 qid = cmd->cdw10_bits.create_io_q.qid; 1106 if (qid == 0 || qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 1107 SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr), 1108 qid, NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR); 1109 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1110 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1111 goto out; 1112 } 1113 1114 if (io_q_exists(ctrlr, qid, is_cq)) { 1115 SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr), 1116 is_cq ? 'C' : 'S', qid); 1117 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1118 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1119 goto out; 1120 } 1121 1122 qsize = cmd->cdw10_bits.create_io_q.qsize + 1; 1123 if (qsize == 1 || qsize > max_queue_size(ctrlr)) { 1124 SPDK_ERRLOG("%s: invalid I/O queue size %u\n", ctrlr_id(ctrlr), qsize); 1125 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1126 sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE; 1127 goto out; 1128 } 1129 1130 SPDK_DEBUGLOG(nvmf_vfio, 1131 "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr), 1132 is_cq ? 'C' : 'S', qid, qsize); 1133 1134 if (is_cq) { 1135 err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, qsize, qid); 1136 if (err != 0) { 1137 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1138 goto out; 1139 } 1140 1141 io_q = &ctrlr->qp[qid]->cq; 1142 if (cmd->cdw11_bits.create_io_cq.pc != 0x1) { 1143 SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr)); 1144 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 1145 goto out; 1146 } 1147 io_q->ien = cmd->cdw11_bits.create_io_cq.ien; 1148 io_q->iv = cmd->cdw11_bits.create_io_cq.iv; 1149 io_q->phase = true; 1150 } else { 1151 if (cmd->cdw11_bits.create_io_sq.cqid == 0) { 1152 SPDK_ERRLOG("%s: invalid CQID 0\n", ctrlr_id(ctrlr)); 1153 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1154 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1155 goto out; 1156 1157 } 1158 /* CQ must be created before SQ */ 1159 if (!io_q_exists(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) { 1160 SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr), 1161 cmd->cdw11_bits.create_io_sq.cqid); 1162 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1163 sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID; 1164 goto out; 1165 } 1166 1167 if (cmd->cdw11_bits.create_io_sq.pc != 0x1) { 1168 SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr)); 1169 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 1170 goto out; 1171 } 1172 /* TODO: support shared IO CQ */ 1173 if (qid != cmd->cdw11_bits.create_io_sq.cqid) { 1174 SPDK_ERRLOG("%s: doesn't support shared CQ now\n", ctrlr_id(ctrlr)); 1175 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1176 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1177 } 1178 1179 io_q = &ctrlr->qp[qid]->sq; 1180 io_q->cqid = cmd->cdw11_bits.create_io_sq.cqid; 1181 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr), 1182 qid, io_q->cqid); 1183 } 1184 1185 io_q->is_cq = is_cq; 1186 io_q->size = qsize; 1187 io_q->prp1 = cmd->dptr.prp.prp1; 1188 1189 err = map_q(ctrlr, io_q, is_cq, true); 1190 if (err) { 1191 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1192 SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr)); 1193 goto out; 1194 } 1195 1196 SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n", 1197 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1198 qid, cmd->dptr.prp.prp1, (unsigned long long)io_q->addr); 1199 1200 if (is_cq) { 1201 *hdbl(ctrlr, io_q) = 0; 1202 } else { 1203 vu_qpair = ctrlr->qp[qid]; 1204 *tdbl(ctrlr, io_q) = 0; 1205 vu_qpair->sq.head = 0; 1206 1207 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1208 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1209 } else { 1210 /* 1211 * Create our new I/O qpair. This asynchronously invokes, on a 1212 * suitable poll group, the nvmf_vfio_user_poll_group_add() 1213 * callback, which will call spdk_nvmf_request_exec_fabrics() 1214 * with a generated fabrics connect command. This command is 1215 * then eventually completed via handle_queue_connect_rsp(). 1216 */ 1217 vu_qpair->create_io_sq_cmd = *cmd; 1218 spdk_nvmf_tgt_new_qpair(ctrlr->transport->transport.tgt, 1219 &vu_qpair->qpair); 1220 return 0; 1221 } 1222 } 1223 1224 out: 1225 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1226 } 1227 1228 /* For ADMIN I/O DELETE COMPLETION QUEUE the NVMf library will disconnect and free 1229 * queue pair, so save the command in a context. 1230 */ 1231 struct vfio_user_delete_cq_ctx { 1232 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1233 struct spdk_nvme_cmd delete_io_cq_cmd; 1234 }; 1235 1236 static void 1237 vfio_user_qpair_delete_cb(void *cb_arg) 1238 { 1239 struct vfio_user_delete_cq_ctx *ctx = cb_arg; 1240 struct nvmf_vfio_user_ctrlr *vu_ctrlr = ctx->vu_ctrlr; 1241 1242 post_completion(vu_ctrlr, &vu_ctrlr->qp[0]->cq, 0, 0, ctx->delete_io_cq_cmd.cid, 1243 SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 1244 free(ctx); 1245 } 1246 1247 /* 1248 * Deletes a completion or submission I/O queue. 1249 */ 1250 static int 1251 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1252 struct spdk_nvme_cmd *cmd, const bool is_cq) 1253 { 1254 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1255 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1256 struct nvmf_vfio_user_qpair *vu_qpair; 1257 struct vfio_user_delete_cq_ctx *ctx; 1258 1259 SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n", 1260 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1261 cmd->cdw10_bits.delete_io_q.qid); 1262 1263 if (!io_q_exists(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq)) { 1264 SPDK_ERRLOG("%s: I/O %cQ%d does not exist\n", ctrlr_id(ctrlr), 1265 is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid); 1266 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1267 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1268 goto out; 1269 } 1270 1271 vu_qpair = ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]; 1272 if (is_cq) { 1273 /* SQ must have been deleted first */ 1274 if (vu_qpair->state != VFIO_USER_QPAIR_SQ_DELETED) { 1275 SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr)); 1276 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1277 sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION; 1278 goto out; 1279 } 1280 ctx = calloc(1, sizeof(*ctx)); 1281 if (!ctx) { 1282 sct = SPDK_NVME_SCT_GENERIC; 1283 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1284 goto out; 1285 } 1286 ctx->vu_ctrlr = ctrlr; 1287 ctx->delete_io_cq_cmd = *cmd; 1288 spdk_nvmf_qpair_disconnect(&vu_qpair->qpair, vfio_user_qpair_delete_cb, ctx); 1289 return 0; 1290 } else { 1291 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1292 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%u is already deleted\n", ctrlr_id(ctrlr), 1293 cmd->cdw10_bits.delete_io_q.qid); 1294 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1295 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1296 goto out; 1297 } 1298 1299 /* 1300 * This doesn't actually delete the SQ, We're merely telling the poll_group_poll 1301 * function to skip checking this SQ. The queue pair will be disconnected in Delete 1302 * IO CQ command. 1303 */ 1304 vu_qpair->state = VFIO_USER_QPAIR_SQ_DELETED; 1305 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, vu_qpair->sq.sg, &vu_qpair->sq.iov, 1); 1306 vu_qpair->sq.addr = NULL; 1307 } 1308 1309 out: 1310 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1311 } 1312 1313 /* 1314 * Returns 0 on success and -errno on error. 1315 */ 1316 static int 1317 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd) 1318 { 1319 assert(ctrlr != NULL); 1320 assert(cmd != NULL); 1321 1322 if (cmd->fuse != 0) { 1323 /* Fused admin commands are not supported. */ 1324 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, 1325 SPDK_NVME_SC_INVALID_FIELD, 1326 SPDK_NVME_SCT_GENERIC); 1327 } 1328 1329 switch (cmd->opc) { 1330 case SPDK_NVME_OPC_CREATE_IO_CQ: 1331 case SPDK_NVME_OPC_CREATE_IO_SQ: 1332 return handle_create_io_q(ctrlr, cmd, 1333 cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ); 1334 case SPDK_NVME_OPC_DELETE_IO_SQ: 1335 case SPDK_NVME_OPC_DELETE_IO_CQ: 1336 return handle_del_io_q(ctrlr, cmd, 1337 cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ); 1338 default: 1339 return handle_cmd_req(ctrlr, cmd, ctrlr->qp[0]); 1340 } 1341 } 1342 1343 static int 1344 handle_cmd_rsp(struct nvmf_vfio_user_req *vu_req, void *cb_arg) 1345 { 1346 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1347 struct nvmf_vfio_user_ctrlr *vu_ctrlr = vu_qpair->ctrlr; 1348 uint16_t sqid, cqid; 1349 1350 assert(vu_qpair != NULL); 1351 assert(vu_req != NULL); 1352 assert(vu_ctrlr != NULL); 1353 1354 if (spdk_likely(vu_req->iovcnt)) { 1355 vfu_unmap_sg(vu_ctrlr->endpoint->vfu_ctx, vu_req->sg, vu_req->iov, vu_req->iovcnt); 1356 } 1357 sqid = vu_qpair->qpair.qid; 1358 cqid = vu_ctrlr->qp[sqid]->sq.cqid; 1359 1360 return post_completion(vu_ctrlr, &vu_ctrlr->qp[cqid]->cq, 1361 vu_req->req.rsp->nvme_cpl.cdw0, 1362 sqid, 1363 vu_req->req.cmd->nvme_cmd.cid, 1364 vu_req->req.rsp->nvme_cpl.status.sc, 1365 vu_req->req.rsp->nvme_cpl.status.sct); 1366 } 1367 1368 static int 1369 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair, 1370 struct spdk_nvme_cmd *cmd) 1371 { 1372 assert(qpair != NULL); 1373 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1374 return consume_admin_cmd(ctrlr, cmd); 1375 } 1376 1377 return handle_cmd_req(ctrlr, cmd, qpair); 1378 } 1379 1380 /* Returns the number of commands processed, or a negative value on error. */ 1381 static int 1382 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail, 1383 struct nvmf_vfio_user_qpair *qpair) 1384 { 1385 struct spdk_nvme_cmd *queue; 1386 int count = 0; 1387 1388 assert(ctrlr != NULL); 1389 assert(qpair != NULL); 1390 1391 queue = qpair->sq.addr; 1392 while (sq_head(qpair) != new_tail) { 1393 int err; 1394 struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)]; 1395 1396 count++; 1397 1398 /* 1399 * SQHD must contain the new head pointer, so we must increase 1400 * it before we generate a completion. 1401 */ 1402 sqhd_advance(ctrlr, qpair); 1403 1404 err = consume_cmd(ctrlr, qpair, cmd); 1405 if (err != 0) { 1406 return err; 1407 } 1408 } 1409 1410 return count; 1411 } 1412 1413 static int 1414 enable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1415 { 1416 int err; 1417 1418 assert(ctrlr != NULL); 1419 1420 err = acq_setup(ctrlr); 1421 if (err != 0) { 1422 return err; 1423 } 1424 1425 err = asq_setup(ctrlr); 1426 if (err != 0) { 1427 return err; 1428 } 1429 1430 return 0; 1431 } 1432 1433 static void 1434 disable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1435 { 1436 assert(ctrlr->qp[0] != NULL); 1437 1438 unmap_qp(ctrlr->qp[0]); 1439 } 1440 1441 static void 1442 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1443 { 1444 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1445 struct nvmf_vfio_user_ctrlr *ctrlr; 1446 struct nvmf_vfio_user_qpair *qpair; 1447 int ret; 1448 1449 /* 1450 * We're not interested in any DMA regions that aren't mappable (we don't 1451 * support clients that don't share their memory). 1452 */ 1453 if (!info->vaddr) { 1454 return; 1455 } 1456 1457 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1458 (info->mapping.iov_len & MASK_2MB)) { 1459 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1460 (uintptr_t)info->mapping.iov_base, 1461 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1462 return; 1463 } 1464 1465 assert(endpoint != NULL); 1466 if (endpoint->ctrlr == NULL) { 1467 return; 1468 } 1469 ctrlr = endpoint->ctrlr; 1470 1471 SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1472 (uintptr_t)info->mapping.iov_base, 1473 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1474 1475 /* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also 1476 * check the protection bits before registering. 1477 */ 1478 if (info->prot == (PROT_WRITE | PROT_READ)) { 1479 ret = spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len); 1480 if (ret) { 1481 SPDK_ERRLOG("Memory region register %#lx-%#lx failed, ret=%d\n", 1482 (uint64_t)(uintptr_t)info->mapping.iov_base, 1483 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1484 ret); 1485 } 1486 } 1487 1488 pthread_mutex_lock(&endpoint->lock); 1489 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1490 if (qpair->state != VFIO_USER_QPAIR_INACTIVE) { 1491 continue; 1492 } 1493 1494 ret = remap_qp(qpair); 1495 if (ret) { 1496 continue; 1497 } 1498 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1499 SPDK_DEBUGLOG(nvmf_vfio, "Remap QP %u successfully\n", qpair->qpair.qid); 1500 } 1501 pthread_mutex_unlock(&endpoint->lock); 1502 } 1503 1504 static int 1505 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1506 { 1507 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1508 struct nvmf_vfio_user_ctrlr *ctrlr; 1509 struct nvmf_vfio_user_qpair *qpair; 1510 void *map_start, *map_end; 1511 int ret = 0; 1512 1513 if (!info->vaddr) { 1514 return 0; 1515 } 1516 1517 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1518 (info->mapping.iov_len & MASK_2MB)) { 1519 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1520 (uintptr_t)info->mapping.iov_base, 1521 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1522 return 0; 1523 } 1524 1525 assert(endpoint != NULL); 1526 if (endpoint->ctrlr == NULL) { 1527 return 0; 1528 } 1529 ctrlr = endpoint->ctrlr; 1530 1531 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1532 (uintptr_t)info->mapping.iov_base, 1533 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1534 1535 map_start = info->mapping.iov_base; 1536 map_end = info->mapping.iov_base + info->mapping.iov_len; 1537 1538 pthread_mutex_lock(&endpoint->lock); 1539 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1540 if ((qpair->cq.addr >= map_start && qpair->cq.addr <= map_end) || 1541 (qpair->sq.addr >= map_start && qpair->sq.addr <= map_end)) { 1542 /* TODO: Ideally we should disconnect this queue pair 1543 * before returning to caller. 1544 */ 1545 unmap_qp(qpair); 1546 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1547 } 1548 } 1549 pthread_mutex_unlock(&endpoint->lock); 1550 1551 if (info->prot == (PROT_WRITE | PROT_READ)) { 1552 ret = spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len); 1553 if (ret) { 1554 SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed, ret=%d\n", 1555 (uint64_t)(uintptr_t)info->mapping.iov_base, 1556 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1557 ret); 1558 } 1559 } 1560 1561 return 0; 1562 } 1563 1564 static int 1565 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1566 { 1567 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1568 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1569 bool disable_admin = false; 1570 int ret; 1571 1572 assert(vu_qpair != NULL); 1573 assert(req != NULL); 1574 1575 if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) { 1576 assert(vu_qpair->ctrlr != NULL); 1577 assert(req != NULL); 1578 1579 memcpy(req->req.data, 1580 &req->req.rsp->prop_get_rsp.value.u64, 1581 req->req.length); 1582 } else { 1583 assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET); 1584 assert(vu_qpair->ctrlr != NULL); 1585 vu_ctrlr = vu_qpair->ctrlr; 1586 1587 if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) { 1588 union spdk_nvme_cc_register cc, diff; 1589 1590 cc.raw = req->req.cmd->prop_set_cmd.value.u64; 1591 diff.raw = cc.raw ^ req->cc.raw; 1592 1593 if (diff.bits.en) { 1594 if (cc.bits.en) { 1595 SPDK_DEBUGLOG(nvmf_vfio, "%s: MAP Admin queue\n", ctrlr_id(vu_ctrlr)); 1596 ret = enable_admin_queue(vu_ctrlr); 1597 if (ret) { 1598 SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(vu_ctrlr)); 1599 return ret; 1600 } 1601 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1602 } else { 1603 disable_admin = true; 1604 } 1605 } 1606 1607 if (diff.bits.shn) { 1608 if (cc.bits.shn == SPDK_NVME_SHN_NORMAL || cc.bits.shn == SPDK_NVME_SHN_ABRUPT) { 1609 disable_admin = true; 1610 } 1611 } 1612 1613 if (disable_admin) { 1614 SPDK_DEBUGLOG(nvmf_vfio, 1615 "%s: UNMAP Admin queue\n", 1616 ctrlr_id(vu_ctrlr)); 1617 vu_qpair->state = VFIO_USER_QPAIR_INACTIVE; 1618 disable_admin_queue(vu_ctrlr); 1619 /* For PCIe controller reset or shutdown, we will drop all AER responses */ 1620 nvmf_ctrlr_abort_aer(vu_qpair->qpair.ctrlr); 1621 } 1622 } 1623 } 1624 1625 return 0; 1626 } 1627 1628 /* 1629 * Handles a write at offset 0x1000 or more; this is the non-mapped path when a 1630 * doorbell is written via access_bar0_fn(). 1631 * 1632 * DSTRD is set to fixed value 0 for NVMf. 1633 * 1634 */ 1635 static int 1636 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf, 1637 const size_t count, loff_t pos, const bool is_write) 1638 { 1639 assert(ctrlr != NULL); 1640 assert(buf != NULL); 1641 1642 if (count != sizeof(uint32_t)) { 1643 SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n", 1644 ctrlr_id(ctrlr), count); 1645 errno = EINVAL; 1646 return -1; 1647 } 1648 1649 pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET; 1650 1651 /* pos must be dword aligned */ 1652 if ((pos & 0x3) != 0) { 1653 SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos); 1654 errno = EINVAL; 1655 return -1; 1656 } 1657 1658 /* convert byte offset to array index */ 1659 pos >>= 2; 1660 1661 if (pos >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) { 1662 SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos); 1663 errno = EINVAL; 1664 return -1; 1665 } 1666 1667 if (is_write) { 1668 ctrlr->doorbells[pos] = *buf; 1669 spdk_wmb(); 1670 } else { 1671 spdk_rmb(); 1672 *buf = ctrlr->doorbells[pos]; 1673 } 1674 return 0; 1675 } 1676 1677 static ssize_t 1678 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, 1679 bool is_write) 1680 { 1681 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1682 struct nvmf_vfio_user_ctrlr *ctrlr; 1683 struct nvmf_vfio_user_req *req; 1684 const struct spdk_nvmf_registers *regs; 1685 int ret; 1686 1687 ctrlr = endpoint->ctrlr; 1688 1689 SPDK_DEBUGLOG(nvmf_vfio, 1690 "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n", 1691 endpoint_id(endpoint), is_write ? "write" : "read", 1692 ctrlr, count, pos); 1693 1694 if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) { 1695 /* 1696 * The fact that the doorbells can be memory mapped doesn't mean 1697 * that the client (VFIO in QEMU) is obliged to memory map them, 1698 * it might still elect to access them via regular read/write; 1699 * we might also have had disable_mappable_bar0 set. 1700 */ 1701 ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count, 1702 pos, is_write); 1703 if (ret == 0) { 1704 return count; 1705 } 1706 return ret; 1707 } 1708 1709 /* Construct a Fabric Property Get/Set command and send it */ 1710 req = get_nvmf_vfio_user_req(ctrlr->qp[0]); 1711 if (req == NULL) { 1712 errno = ENOBUFS; 1713 return -1; 1714 } 1715 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 1716 req->cc.raw = regs->cc.raw; 1717 1718 req->cb_fn = nvmf_vfio_user_prop_req_rsp; 1719 req->cb_arg = ctrlr->qp[0]; 1720 req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC; 1721 req->req.cmd->prop_set_cmd.cid = 0; 1722 req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1; 1723 req->req.cmd->prop_set_cmd.ofst = pos; 1724 if (is_write) { 1725 req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; 1726 if (req->req.cmd->prop_set_cmd.attrib.size) { 1727 req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf; 1728 } else { 1729 req->req.cmd->prop_set_cmd.value.u32.high = 0; 1730 req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf; 1731 } 1732 } else { 1733 req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; 1734 } 1735 req->req.length = count; 1736 req->req.data = buf; 1737 1738 spdk_nvmf_request_exec_fabrics(&req->req); 1739 1740 return count; 1741 } 1742 1743 /* 1744 * NVMe driver reads 4096 bytes, which is the extended PCI configuration space 1745 * available on PCI-X 2.0 and PCI Express buses 1746 */ 1747 static ssize_t 1748 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, 1749 bool is_write) 1750 { 1751 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1752 1753 if (is_write) { 1754 SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n", 1755 endpoint_id(endpoint), offset, offset + count); 1756 errno = EINVAL; 1757 return -1; 1758 } 1759 1760 if (offset + count > PCI_CFG_SPACE_EXP_SIZE) { 1761 SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", 1762 endpoint_id(endpoint), offset, count, 1763 PCI_CFG_SPACE_EXP_SIZE); 1764 errno = ERANGE; 1765 return -1; 1766 } 1767 1768 memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count); 1769 1770 return count; 1771 } 1772 1773 static void 1774 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg) 1775 { 1776 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1777 1778 if (level >= LOG_DEBUG) { 1779 SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1780 } else if (level >= LOG_INFO) { 1781 SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1782 } else if (level >= LOG_NOTICE) { 1783 SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg); 1784 } else if (level >= LOG_WARNING) { 1785 SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg); 1786 } else { 1787 SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg); 1788 } 1789 } 1790 1791 static int 1792 vfio_user_get_log_level(void) 1793 { 1794 int level; 1795 1796 if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio")) { 1797 return LOG_DEBUG; 1798 } 1799 1800 level = spdk_log_to_syslog_level(spdk_log_get_level()); 1801 if (level < 0) { 1802 return LOG_ERR; 1803 } 1804 1805 return level; 1806 } 1807 1808 static void 1809 init_pci_config_space(vfu_pci_config_space_t *p) 1810 { 1811 /* MLBAR */ 1812 p->hdr.bars[0].raw = 0x0; 1813 /* MUBAR */ 1814 p->hdr.bars[1].raw = 0x0; 1815 1816 /* vendor specific, let's set them to zero for now */ 1817 p->hdr.bars[3].raw = 0x0; 1818 p->hdr.bars[4].raw = 0x0; 1819 p->hdr.bars[5].raw = 0x0; 1820 1821 /* enable INTx */ 1822 p->hdr.intr.ipin = 0x1; 1823 } 1824 1825 static int 1826 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport, 1827 struct nvmf_vfio_user_endpoint *endpoint) 1828 { 1829 int ret; 1830 ssize_t cap_offset; 1831 vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; 1832 1833 struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 }; 1834 struct pxcap pxcap = { 1835 .hdr.id = PCI_CAP_ID_EXP, 1836 .pxcaps.ver = 0x2, 1837 .pxdcap = {.rer = 0x1, .flrc = 0x1}, 1838 .pxdcap2.ctds = 0x1 1839 }; 1840 1841 struct msixcap msixcap = { 1842 .hdr.id = PCI_CAP_ID_MSIX, 1843 .mxc.ts = NVME_IRQ_MSIX_NUM - 1, 1844 .mtab = {.tbir = 0x4, .to = 0x0}, 1845 .mpba = {.pbir = 0x5, .pbao = 0x0} 1846 }; 1847 1848 static struct iovec sparse_mmap[] = { 1849 { 1850 .iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET, 1851 .iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE, 1852 }, 1853 }; 1854 1855 ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); 1856 if (ret < 0) { 1857 SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx); 1858 return ret; 1859 } 1860 vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0); 1861 /* 1862 * 0x02, controller uses the NVM Express programming interface 1863 * 0x08, non-volatile memory controller 1864 * 0x01, mass storage controller 1865 */ 1866 vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02); 1867 1868 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap); 1869 if (cap_offset < 0) { 1870 SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx); 1871 return ret; 1872 } 1873 1874 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap); 1875 if (cap_offset < 0) { 1876 SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx); 1877 return ret; 1878 } 1879 1880 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap); 1881 if (cap_offset < 0) { 1882 SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx); 1883 return ret; 1884 } 1885 1886 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE, 1887 access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1888 if (ret < 0) { 1889 SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx); 1890 return ret; 1891 } 1892 1893 if (vu_transport->transport_opts.disable_mappable_bar0) { 1894 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1895 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1896 NULL, 0, -1, 0); 1897 } else { 1898 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1899 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1900 sparse_mmap, 1, endpoint->devmem_fd, 0); 1901 } 1902 1903 if (ret < 0) { 1904 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx); 1905 return ret; 1906 } 1907 1908 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE, 1909 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1910 if (ret < 0) { 1911 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx); 1912 return ret; 1913 } 1914 1915 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE, 1916 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1917 if (ret < 0) { 1918 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx); 1919 return ret; 1920 } 1921 1922 ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb); 1923 if (ret < 0) { 1924 SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx); 1925 return ret; 1926 } 1927 1928 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 1929 if (ret < 0) { 1930 SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx); 1931 return ret; 1932 } 1933 1934 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM); 1935 if (ret < 0) { 1936 SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx); 1937 return ret; 1938 } 1939 1940 ret = vfu_realize_ctx(vfu_ctx); 1941 if (ret < 0) { 1942 SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx); 1943 return ret; 1944 } 1945 1946 endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); 1947 assert(endpoint->pci_config_space != NULL); 1948 init_pci_config_space(endpoint->pci_config_space); 1949 1950 assert(cap_offset != 0); 1951 endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); 1952 1953 return 0; 1954 } 1955 1956 static void 1957 _free_ctrlr(void *ctx) 1958 { 1959 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1960 1961 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 1962 free(ctrlr); 1963 } 1964 1965 static void 1966 free_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr, bool free_qps) 1967 { 1968 int i; 1969 assert(ctrlr != NULL); 1970 1971 SPDK_DEBUGLOG(nvmf_vfio, "free %s\n", ctrlr_id(ctrlr)); 1972 1973 if (free_qps) { 1974 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1975 free_qp(ctrlr, i); 1976 } 1977 } 1978 1979 if (ctrlr->thread == spdk_get_thread()) { 1980 _free_ctrlr(ctrlr); 1981 } else { 1982 spdk_thread_send_msg(ctrlr->thread, _free_ctrlr, ctrlr); 1983 } 1984 } 1985 1986 static void 1987 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport, 1988 struct nvmf_vfio_user_endpoint *endpoint) 1989 { 1990 struct nvmf_vfio_user_ctrlr *ctrlr; 1991 int err = 0; 1992 1993 /* First, construct a vfio-user CUSTOM transport controller */ 1994 ctrlr = calloc(1, sizeof(*ctrlr)); 1995 if (ctrlr == NULL) { 1996 err = -ENOMEM; 1997 goto out; 1998 } 1999 ctrlr->cntlid = 0xffff; 2000 ctrlr->transport = transport; 2001 ctrlr->endpoint = endpoint; 2002 ctrlr->doorbells = endpoint->doorbells; 2003 TAILQ_INIT(&ctrlr->connected_qps); 2004 2005 /* Then, construct an admin queue pair */ 2006 err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0); 2007 if (err != 0) { 2008 free(ctrlr); 2009 goto out; 2010 } 2011 endpoint->ctrlr = ctrlr; 2012 2013 /* Notify the generic layer about the new admin queue pair */ 2014 spdk_nvmf_tgt_new_qpair(transport->transport.tgt, &ctrlr->qp[0]->qpair); 2015 2016 out: 2017 if (err != 0) { 2018 SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n", 2019 endpoint_id(endpoint), strerror(-err)); 2020 } 2021 } 2022 2023 static int 2024 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport, 2025 const struct spdk_nvme_transport_id *trid, 2026 struct spdk_nvmf_listen_opts *listen_opts) 2027 { 2028 struct nvmf_vfio_user_transport *vu_transport; 2029 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2030 char *path = NULL; 2031 char uuid[PATH_MAX] = {}; 2032 int fd; 2033 int err; 2034 2035 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2036 transport); 2037 2038 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2039 /* Only compare traddr */ 2040 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2041 return -EEXIST; 2042 } 2043 } 2044 2045 endpoint = calloc(1, sizeof(*endpoint)); 2046 if (!endpoint) { 2047 return -ENOMEM; 2048 } 2049 2050 endpoint->devmem_fd = -1; 2051 memcpy(&endpoint->trid, trid, sizeof(endpoint->trid)); 2052 2053 err = asprintf(&path, "%s/bar0", endpoint_id(endpoint)); 2054 if (err == -1) { 2055 goto out; 2056 } 2057 2058 fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); 2059 if (fd == -1) { 2060 SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n", 2061 endpoint_id(endpoint), path); 2062 err = fd; 2063 free(path); 2064 goto out; 2065 } 2066 free(path); 2067 2068 endpoint->devmem_fd = fd; 2069 err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE); 2070 if (err != 0) { 2071 goto out; 2072 } 2073 2074 endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE, 2075 PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET); 2076 if (endpoint->doorbells == MAP_FAILED) { 2077 endpoint->doorbells = NULL; 2078 err = -errno; 2079 goto out; 2080 } 2081 2082 snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint)); 2083 2084 endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB, 2085 endpoint, VFU_DEV_TYPE_PCI); 2086 if (endpoint->vfu_ctx == NULL) { 2087 SPDK_ERRLOG("%s: error creating libmuser context: %m\n", 2088 endpoint_id(endpoint)); 2089 err = -1; 2090 goto out; 2091 } 2092 vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, vfio_user_get_log_level()); 2093 2094 err = vfio_user_dev_info_fill(vu_transport, endpoint); 2095 if (err < 0) { 2096 goto out; 2097 } 2098 2099 pthread_mutex_init(&endpoint->lock, NULL); 2100 TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link); 2101 SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells); 2102 2103 out: 2104 if (err != 0) { 2105 nvmf_vfio_user_destroy_endpoint(endpoint); 2106 } 2107 2108 return err; 2109 } 2110 2111 static void 2112 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport, 2113 const struct spdk_nvme_transport_id *trid) 2114 { 2115 struct nvmf_vfio_user_transport *vu_transport; 2116 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2117 2118 assert(trid != NULL); 2119 assert(trid->traddr != NULL); 2120 2121 SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr); 2122 2123 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2124 transport); 2125 2126 pthread_mutex_lock(&vu_transport->lock); 2127 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2128 if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) { 2129 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 2130 if (endpoint->ctrlr) { 2131 /* Users may kill NVMeoF target while VM 2132 * is connected, free all resources. 2133 */ 2134 free_ctrlr(endpoint->ctrlr, true); 2135 } 2136 nvmf_vfio_user_destroy_endpoint(endpoint); 2137 pthread_mutex_unlock(&vu_transport->lock); 2138 2139 return; 2140 } 2141 } 2142 pthread_mutex_unlock(&vu_transport->lock); 2143 2144 SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr); 2145 } 2146 2147 static void 2148 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport, 2149 struct spdk_nvmf_subsystem *subsystem, 2150 struct spdk_nvmf_ctrlr_data *cdata) 2151 { 2152 memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls)); 2153 cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED; 2154 /* libvfio-user can only support 1 connection for now */ 2155 cdata->oncs.reservations = 0; 2156 } 2157 2158 static int 2159 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport, 2160 const struct spdk_nvmf_subsystem *subsystem, 2161 const struct spdk_nvme_transport_id *trid) 2162 { 2163 struct nvmf_vfio_user_transport *vu_transport; 2164 struct nvmf_vfio_user_endpoint *endpoint; 2165 2166 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport); 2167 2168 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2169 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2170 break; 2171 } 2172 } 2173 2174 if (endpoint == NULL) { 2175 return -ENOENT; 2176 } 2177 2178 endpoint->subsystem = subsystem; 2179 2180 return 0; 2181 } 2182 2183 /* 2184 * Executed periodically at a default SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 2185 * frequency. 2186 * 2187 * For each transport endpoint (which at the libvfio-user level corresponds to 2188 * a socket), if we don't currently have a controller set up, peek to see if the 2189 * socket is able to accept a new connection. 2190 * 2191 * This poller also takes care of handling the creation of any pending new 2192 * qpairs. 2193 * 2194 * Returns the number of events handled. 2195 */ 2196 static uint32_t 2197 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport) 2198 { 2199 struct nvmf_vfio_user_transport *vu_transport; 2200 struct nvmf_vfio_user_endpoint *endpoint; 2201 uint32_t count = 0; 2202 int err; 2203 2204 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2205 transport); 2206 2207 pthread_mutex_lock(&vu_transport->lock); 2208 2209 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2210 if (endpoint->ctrlr != NULL) { 2211 continue; 2212 } 2213 2214 err = vfu_attach_ctx(endpoint->vfu_ctx); 2215 if (err != 0) { 2216 if (errno == EAGAIN || errno == EWOULDBLOCK) { 2217 continue; 2218 } 2219 2220 pthread_mutex_unlock(&vu_transport->lock); 2221 return 1; 2222 } 2223 2224 count++; 2225 2226 /* Construct a controller */ 2227 nvmf_vfio_user_create_ctrlr(vu_transport, endpoint); 2228 } 2229 2230 pthread_mutex_unlock(&vu_transport->lock); 2231 2232 return count; 2233 } 2234 2235 static void 2236 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport, 2237 struct spdk_nvme_transport_id *trid, 2238 struct spdk_nvmf_discovery_log_page_entry *entry) 2239 { } 2240 2241 static struct spdk_nvmf_transport_poll_group * 2242 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport) 2243 { 2244 struct nvmf_vfio_user_poll_group *vu_group; 2245 2246 SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n"); 2247 2248 vu_group = calloc(1, sizeof(*vu_group)); 2249 if (vu_group == NULL) { 2250 SPDK_ERRLOG("Error allocating poll group: %m"); 2251 return NULL; 2252 } 2253 2254 TAILQ_INIT(&vu_group->qps); 2255 2256 return &vu_group->group; 2257 } 2258 2259 /* called when process exits */ 2260 static void 2261 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 2262 { 2263 struct nvmf_vfio_user_poll_group *vu_group; 2264 2265 SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n"); 2266 2267 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2268 2269 free(vu_group); 2270 } 2271 2272 static void 2273 vfio_user_qpair_disconnect_cb(void *ctx) 2274 { 2275 struct nvmf_vfio_user_endpoint *endpoint = ctx; 2276 struct nvmf_vfio_user_ctrlr *ctrlr; 2277 2278 pthread_mutex_lock(&endpoint->lock); 2279 ctrlr = endpoint->ctrlr; 2280 if (!ctrlr) { 2281 pthread_mutex_unlock(&endpoint->lock); 2282 return; 2283 } 2284 2285 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2286 endpoint->ctrlr = NULL; 2287 free_ctrlr(ctrlr, false); 2288 pthread_mutex_unlock(&endpoint->lock); 2289 return; 2290 } 2291 pthread_mutex_unlock(&endpoint->lock); 2292 } 2293 2294 static int 2295 vfio_user_destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 2296 { 2297 struct nvmf_vfio_user_qpair *qpair; 2298 struct nvmf_vfio_user_endpoint *endpoint; 2299 2300 SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr)); 2301 2302 endpoint = ctrlr->endpoint; 2303 assert(endpoint != NULL); 2304 2305 pthread_mutex_lock(&endpoint->lock); 2306 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2307 endpoint->ctrlr = NULL; 2308 free_ctrlr(ctrlr, false); 2309 pthread_mutex_unlock(&endpoint->lock); 2310 return 0; 2311 } 2312 2313 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 2314 spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint); 2315 } 2316 pthread_mutex_unlock(&endpoint->lock); 2317 2318 return 0; 2319 } 2320 2321 /* 2322 * Poll for and process any incoming vfio-user messages. 2323 */ 2324 static int 2325 vfio_user_poll_vfu_ctx(void *ctx) 2326 { 2327 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 2328 int ret; 2329 2330 assert(ctrlr != NULL); 2331 2332 /* This will call access_bar0_fn() if there are any writes 2333 * to the portion of the BAR that is not mmap'd */ 2334 ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx); 2335 if (spdk_unlikely(ret == -1)) { 2336 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 2337 2338 /* initiator shutdown or reset, waiting for another re-connect */ 2339 if (errno == ENOTCONN) { 2340 vfio_user_destroy_ctrlr(ctrlr); 2341 return SPDK_POLLER_BUSY; 2342 } 2343 2344 fail_ctrlr(ctrlr); 2345 } 2346 2347 return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 2348 } 2349 2350 static int 2351 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 2352 { 2353 struct nvmf_vfio_user_poll_group *vu_group; 2354 struct nvmf_vfio_user_qpair *qpair = cb_arg; 2355 struct nvmf_vfio_user_ctrlr *ctrlr; 2356 struct nvmf_vfio_user_endpoint *endpoint; 2357 2358 assert(qpair != NULL); 2359 assert(req != NULL); 2360 2361 ctrlr = qpair->ctrlr; 2362 endpoint = ctrlr->endpoint; 2363 assert(ctrlr != NULL); 2364 assert(endpoint != NULL); 2365 2366 if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) { 2367 SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct); 2368 endpoint->ctrlr = NULL; 2369 free_ctrlr(ctrlr, true); 2370 return -1; 2371 } 2372 2373 vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group); 2374 TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link); 2375 qpair->state = VFIO_USER_QPAIR_ACTIVE; 2376 2377 pthread_mutex_lock(&endpoint->lock); 2378 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 2379 ctrlr->cntlid = qpair->qpair.ctrlr->cntlid; 2380 ctrlr->thread = spdk_get_thread(); 2381 ctrlr->vfu_ctx_poller = SPDK_POLLER_REGISTER(vfio_user_poll_vfu_ctx, ctrlr, 0); 2382 } else { 2383 /* For I/O queues this command was generated in response to an 2384 * ADMIN I/O CREATE SUBMISSION QUEUE command which has not yet 2385 * been completed. Complete it now. 2386 */ 2387 post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, 2388 qpair->create_io_sq_cmd.cid, SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 2389 } 2390 TAILQ_INSERT_TAIL(&ctrlr->connected_qps, qpair, tailq); 2391 pthread_mutex_unlock(&endpoint->lock); 2392 2393 free(req->req.data); 2394 req->req.data = NULL; 2395 2396 return 0; 2397 } 2398 2399 /* 2400 * Add the given qpair to the given poll group. New qpairs are added via 2401 * spdk_nvmf_tgt_new_qpair(), which picks a poll group, then calls back 2402 * here via nvmf_transport_poll_group_add(). 2403 */ 2404 static int 2405 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2406 struct spdk_nvmf_qpair *qpair) 2407 { 2408 struct nvmf_vfio_user_qpair *vu_qpair; 2409 struct nvmf_vfio_user_req *vu_req; 2410 struct nvmf_vfio_user_ctrlr *ctrlr; 2411 struct spdk_nvmf_request *req; 2412 struct spdk_nvmf_fabric_connect_data *data; 2413 bool admin; 2414 2415 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2416 vu_qpair->group = group; 2417 ctrlr = vu_qpair->ctrlr; 2418 2419 SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n", 2420 ctrlr_id(ctrlr), vu_qpair->qpair.qid, 2421 vu_qpair, qpair, group); 2422 2423 admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair); 2424 2425 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2426 if (vu_req == NULL) { 2427 return -1; 2428 } 2429 2430 req = &vu_req->req; 2431 req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; 2432 req->cmd->connect_cmd.cid = 0; 2433 req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; 2434 req->cmd->connect_cmd.recfmt = 0; 2435 req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1; 2436 req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid; 2437 2438 req->length = sizeof(struct spdk_nvmf_fabric_connect_data); 2439 req->data = calloc(1, req->length); 2440 if (req->data == NULL) { 2441 nvmf_vfio_user_req_free(req); 2442 return -ENOMEM; 2443 } 2444 2445 data = (struct spdk_nvmf_fabric_connect_data *)req->data; 2446 data->cntlid = admin ? 0xFFFF : ctrlr->cntlid; 2447 snprintf(data->subnqn, sizeof(data->subnqn), "%s", 2448 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem)); 2449 2450 vu_req->cb_fn = handle_queue_connect_rsp; 2451 vu_req->cb_arg = vu_qpair; 2452 2453 SPDK_DEBUGLOG(nvmf_vfio, 2454 "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n", 2455 ctrlr_id(ctrlr), qpair->qid, data->cntlid); 2456 2457 spdk_nvmf_request_exec_fabrics(req); 2458 return 0; 2459 } 2460 2461 static int 2462 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2463 struct spdk_nvmf_qpair *qpair) 2464 { 2465 struct nvmf_vfio_user_qpair *vu_qpair; 2466 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2467 struct nvmf_vfio_user_endpoint *endpoint; 2468 struct nvmf_vfio_user_poll_group *vu_group; 2469 2470 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2471 vu_ctrlr = vu_qpair->ctrlr; 2472 endpoint = vu_ctrlr->endpoint; 2473 2474 SPDK_DEBUGLOG(nvmf_vfio, 2475 "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n", 2476 ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group); 2477 2478 2479 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2480 TAILQ_REMOVE(&vu_group->qps, vu_qpair, link); 2481 2482 pthread_mutex_lock(&endpoint->lock); 2483 TAILQ_REMOVE(&vu_ctrlr->connected_qps, vu_qpair, tailq); 2484 pthread_mutex_unlock(&endpoint->lock); 2485 2486 return 0; 2487 } 2488 2489 static void 2490 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req) 2491 { 2492 memset(&vu_req->cmd, 0, sizeof(vu_req->cmd)); 2493 memset(&vu_req->rsp, 0, sizeof(vu_req->rsp)); 2494 vu_req->iovcnt = 0; 2495 vu_req->state = VFIO_USER_REQUEST_STATE_FREE; 2496 2497 TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link); 2498 } 2499 2500 static int 2501 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req) 2502 { 2503 struct nvmf_vfio_user_qpair *vu_qpair; 2504 struct nvmf_vfio_user_req *vu_req; 2505 2506 assert(req != NULL); 2507 2508 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2509 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2510 2511 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2512 2513 return 0; 2514 } 2515 2516 static int 2517 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req) 2518 { 2519 struct nvmf_vfio_user_qpair *vu_qpair; 2520 struct nvmf_vfio_user_req *vu_req; 2521 2522 assert(req != NULL); 2523 2524 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2525 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2526 2527 if (vu_req->cb_fn != NULL) { 2528 if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) { 2529 fail_ctrlr(vu_qpair->ctrlr); 2530 } 2531 } 2532 2533 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2534 2535 return 0; 2536 } 2537 2538 static void 2539 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair, 2540 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2541 { 2542 struct nvmf_vfio_user_qpair *vu_qpair; 2543 2544 assert(qpair != NULL); 2545 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2546 free_qp(vu_qpair->ctrlr, qpair->qid); 2547 2548 if (cb_fn) { 2549 cb_fn(cb_arg); 2550 } 2551 } 2552 2553 /** 2554 * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available. 2555 */ 2556 static struct nvmf_vfio_user_req * 2557 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair) 2558 { 2559 struct nvmf_vfio_user_req *req; 2560 2561 assert(qpair != NULL); 2562 2563 if (TAILQ_EMPTY(&qpair->reqs)) { 2564 return NULL; 2565 } 2566 2567 req = TAILQ_FIRST(&qpair->reqs); 2568 TAILQ_REMOVE(&qpair->reqs, req, link); 2569 2570 return req; 2571 } 2572 2573 static int 2574 get_nvmf_io_req_length(struct spdk_nvmf_request *req) 2575 { 2576 uint16_t nr; 2577 uint32_t nlb, nsid; 2578 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2579 struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; 2580 struct spdk_nvmf_ns *ns; 2581 2582 nsid = cmd->nsid; 2583 ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); 2584 if (ns == NULL || ns->bdev == NULL) { 2585 SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid); 2586 return -EINVAL; 2587 } 2588 2589 if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { 2590 nr = cmd->cdw10_bits.dsm.nr + 1; 2591 return nr * sizeof(struct spdk_nvme_dsm_range); 2592 } 2593 2594 nlb = (cmd->cdw12 & 0x0000ffffu) + 1; 2595 return nlb * spdk_bdev_get_block_size(ns->bdev); 2596 } 2597 2598 static int 2599 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2600 { 2601 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2602 uint32_t len = 0; 2603 uint8_t fid; 2604 int iovcnt; 2605 2606 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2607 req->length = 0; 2608 req->data = NULL; 2609 2610 if (req->xfer == SPDK_NVME_DATA_NONE) { 2611 return 0; 2612 } 2613 2614 switch (cmd->opc) { 2615 case SPDK_NVME_OPC_IDENTIFY: 2616 len = 4096; 2617 break; 2618 case SPDK_NVME_OPC_GET_LOG_PAGE: 2619 len = (((cmd->cdw11_bits.get_log_page.numdu << 16) | cmd->cdw10_bits.get_log_page.numdl) + 1) * 4; 2620 break; 2621 case SPDK_NVME_OPC_GET_FEATURES: 2622 case SPDK_NVME_OPC_SET_FEATURES: 2623 fid = cmd->cdw10_bits.set_features.fid; 2624 switch (fid) { 2625 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 2626 len = 4096; 2627 break; 2628 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 2629 len = 256; 2630 break; 2631 case SPDK_NVME_FEAT_TIMESTAMP: 2632 len = 8; 2633 break; 2634 case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT: 2635 len = 512; 2636 break; 2637 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 2638 if (cmd->cdw11_bits.feat_host_identifier.bits.exhid) { 2639 len = 16; 2640 } else { 2641 len = 8; 2642 } 2643 break; 2644 default: 2645 return 0; 2646 } 2647 break; 2648 default: 2649 return 0; 2650 } 2651 2652 /* ADMIN command will not use SGL */ 2653 if (cmd->psdt != 0) { 2654 return -EINVAL; 2655 } 2656 2657 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len); 2658 if (iovcnt < 0) { 2659 SPDK_ERRLOG("%s: map Admin Opc %x failed\n", 2660 ctrlr_id(ctrlr), cmd->opc); 2661 return -1; 2662 } 2663 req->length = len; 2664 req->data = req->iov[0].iov_base; 2665 req->iovcnt = iovcnt; 2666 2667 return 0; 2668 } 2669 2670 /* 2671 * Map an I/O command's buffers. 2672 * 2673 * Returns 0 on success and -errno on failure. 2674 */ 2675 static int 2676 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2677 { 2678 int len, iovcnt; 2679 struct spdk_nvme_cmd *cmd; 2680 2681 assert(ctrlr != NULL); 2682 assert(req != NULL); 2683 2684 cmd = &req->cmd->nvme_cmd; 2685 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2686 req->length = 0; 2687 req->data = NULL; 2688 2689 if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) { 2690 return 0; 2691 } 2692 2693 len = get_nvmf_io_req_length(req); 2694 if (len < 0) { 2695 return -EINVAL; 2696 } 2697 req->length = len; 2698 2699 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, req->length); 2700 if (iovcnt < 0) { 2701 SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc); 2702 return -EFAULT; 2703 } 2704 req->data = req->iov[0].iov_base; 2705 req->iovcnt = iovcnt; 2706 2707 return 0; 2708 } 2709 2710 static int 2711 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 2712 struct nvmf_vfio_user_qpair *vu_qpair) 2713 { 2714 int err; 2715 struct nvmf_vfio_user_req *vu_req; 2716 struct spdk_nvmf_request *req; 2717 2718 assert(ctrlr != NULL); 2719 assert(cmd != NULL); 2720 2721 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2722 if (spdk_unlikely(vu_req == NULL)) { 2723 SPDK_ERRLOG("%s: no request for NVMe command opc 0x%x\n", ctrlr_id(ctrlr), cmd->opc); 2724 return post_completion(ctrlr, &vu_qpair->cq, 0, 0, cmd->cid, 2725 SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, SPDK_NVME_SCT_GENERIC); 2726 2727 } 2728 req = &vu_req->req; 2729 2730 assert(req->qpair != NULL); 2731 SPDK_DEBUGLOG(nvmf_vfio, "%s: handle qid%u, req opc=%#x cid=%d\n", 2732 ctrlr_id(ctrlr), req->qpair->qid, cmd->opc, cmd->cid); 2733 2734 vu_req->cb_fn = handle_cmd_rsp; 2735 vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2736 req->cmd->nvme_cmd = *cmd; 2737 2738 if (nvmf_qpair_is_admin_queue(req->qpair)) { 2739 err = map_admin_cmd_req(ctrlr, req); 2740 } else { 2741 switch (cmd->opc) { 2742 case SPDK_NVME_OPC_RESERVATION_REGISTER: 2743 case SPDK_NVME_OPC_RESERVATION_REPORT: 2744 case SPDK_NVME_OPC_RESERVATION_ACQUIRE: 2745 case SPDK_NVME_OPC_RESERVATION_RELEASE: 2746 err = -ENOTSUP; 2747 break; 2748 default: 2749 err = map_io_cmd_req(ctrlr, req); 2750 break; 2751 } 2752 } 2753 2754 if (spdk_unlikely(err < 0)) { 2755 SPDK_ERRLOG("%s: process NVMe command opc 0x%x failed\n", 2756 ctrlr_id(ctrlr), cmd->opc); 2757 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2758 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2759 err = handle_cmd_rsp(vu_req, vu_req->cb_arg); 2760 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2761 return err; 2762 } 2763 2764 vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING; 2765 spdk_nvmf_request_exec(req); 2766 2767 return 0; 2768 } 2769 2770 /* Returns the number of commands processed, or a negative value on error. */ 2771 static int 2772 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair) 2773 { 2774 struct nvmf_vfio_user_ctrlr *ctrlr; 2775 uint32_t new_tail; 2776 int count = 0; 2777 2778 assert(qpair != NULL); 2779 2780 ctrlr = qpair->ctrlr; 2781 2782 /* Load-Acquire. */ 2783 new_tail = *tdbl(ctrlr, &qpair->sq); 2784 2785 /* 2786 * Ensure that changes to the queue are visible to us. 2787 * The host driver should write the queue first, do a wmb(), and then 2788 * update the SQ tail doorbell (their Store-Release). 2789 */ 2790 spdk_rmb(); 2791 2792 new_tail = new_tail & 0xffffu; 2793 if (spdk_unlikely(new_tail >= qpair->sq.size)) { 2794 union spdk_nvme_async_event_completion event = {}; 2795 2796 SPDK_DEBUGLOG(nvmf_vfio, "%s: invalid SQ%u doorbell value %u\n", ctrlr_id(ctrlr), qpair->qpair.qid, 2797 new_tail); 2798 event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_ERROR; 2799 event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_INVALID_DB_WRITE; 2800 nvmf_ctrlr_async_event_error_event(qpair->qpair.ctrlr, event); 2801 2802 return 0; 2803 } 2804 2805 if (sq_head(qpair) == new_tail) { 2806 return 0; 2807 } 2808 2809 count = handle_sq_tdbl_write(ctrlr, new_tail, qpair); 2810 if (count < 0) { 2811 fail_ctrlr(ctrlr); 2812 } 2813 2814 return count; 2815 } 2816 2817 /* 2818 * vfio-user transport poll handler. Note that the library context is polled in 2819 * a separate poller (->vfu_ctx_poller), so this poller only needs to poll the 2820 * active qpairs. 2821 * 2822 * Returns the number of commands processed, or a negative value on error. 2823 */ 2824 static int 2825 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2826 { 2827 struct nvmf_vfio_user_poll_group *vu_group; 2828 struct nvmf_vfio_user_qpair *vu_qpair, *tmp; 2829 int count = 0; 2830 2831 assert(group != NULL); 2832 2833 spdk_rmb(); 2834 2835 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2836 2837 TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) { 2838 int ret; 2839 2840 if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) { 2841 continue; 2842 } 2843 2844 ret = nvmf_vfio_user_qpair_poll(vu_qpair); 2845 2846 if (ret < 0) { 2847 return ret; 2848 } 2849 2850 count += ret; 2851 } 2852 2853 return count; 2854 } 2855 2856 static int 2857 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2858 struct spdk_nvme_transport_id *trid) 2859 { 2860 struct nvmf_vfio_user_qpair *vu_qpair; 2861 struct nvmf_vfio_user_ctrlr *ctrlr; 2862 2863 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2864 ctrlr = vu_qpair->ctrlr; 2865 2866 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2867 return 0; 2868 } 2869 2870 static int 2871 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2872 struct spdk_nvme_transport_id *trid) 2873 { 2874 return 0; 2875 } 2876 2877 static int 2878 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2879 struct spdk_nvme_transport_id *trid) 2880 { 2881 struct nvmf_vfio_user_qpair *vu_qpair; 2882 struct nvmf_vfio_user_ctrlr *ctrlr; 2883 2884 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2885 ctrlr = vu_qpair->ctrlr; 2886 2887 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2888 return 0; 2889 } 2890 2891 static void 2892 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2893 struct spdk_nvmf_request *req) 2894 { 2895 struct nvmf_vfio_user_qpair *vu_qpair; 2896 struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL; 2897 uint32_t i; 2898 uint16_t cid; 2899 2900 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2901 2902 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2903 for (i = 0; i < vu_qpair->qsize; i++) { 2904 vu_req = &vu_qpair->reqs_internal[i]; 2905 if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) { 2906 vu_req_to_abort = vu_req; 2907 break; 2908 } 2909 } 2910 2911 if (vu_req_to_abort == NULL) { 2912 spdk_nvmf_request_complete(req); 2913 return; 2914 } 2915 2916 req->req_to_abort = &vu_req_to_abort->req; 2917 nvmf_ctrlr_abort_request(req); 2918 } 2919 2920 static void 2921 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts) 2922 { 2923 opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH; 2924 opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2925 opts->in_capsule_data_size = 0; 2926 opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE; 2927 opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE; 2928 opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH; 2929 opts->num_shared_buffers = 0; 2930 opts->buf_cache_size = 0; 2931 opts->association_timeout = 0; 2932 opts->transport_specific = NULL; 2933 } 2934 2935 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = { 2936 .name = "VFIOUSER", 2937 .type = SPDK_NVME_TRANSPORT_VFIOUSER, 2938 .opts_init = nvmf_vfio_user_opts_init, 2939 .create = nvmf_vfio_user_create, 2940 .destroy = nvmf_vfio_user_destroy, 2941 2942 .listen = nvmf_vfio_user_listen, 2943 .stop_listen = nvmf_vfio_user_stop_listen, 2944 .accept = nvmf_vfio_user_accept, 2945 .cdata_init = nvmf_vfio_user_cdata_init, 2946 .listen_associate = nvmf_vfio_user_listen_associate, 2947 2948 .listener_discover = nvmf_vfio_user_discover, 2949 2950 .poll_group_create = nvmf_vfio_user_poll_group_create, 2951 .poll_group_destroy = nvmf_vfio_user_poll_group_destroy, 2952 .poll_group_add = nvmf_vfio_user_poll_group_add, 2953 .poll_group_remove = nvmf_vfio_user_poll_group_remove, 2954 .poll_group_poll = nvmf_vfio_user_poll_group_poll, 2955 2956 .req_free = nvmf_vfio_user_req_free, 2957 .req_complete = nvmf_vfio_user_req_complete, 2958 2959 .qpair_fini = nvmf_vfio_user_close_qpair, 2960 .qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid, 2961 .qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid, 2962 .qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid, 2963 .qpair_abort_request = nvmf_vfio_user_qpair_abort_request, 2964 }; 2965 2966 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user); 2967 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio) 2968