1 /*- 2 * BSD LICENSE 3 * Copyright (c) Intel Corporation. All rights reserved. 4 * Copyright (c) 2019, Nutanix Inc. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * NVMe over vfio-user transport 35 */ 36 37 #include <vfio-user/libvfio-user.h> 38 #include <vfio-user/pci_defs.h> 39 40 #include "spdk/barrier.h" 41 #include "spdk/stdinc.h" 42 #include "spdk/assert.h" 43 #include "spdk/thread.h" 44 #include "spdk/nvmf_transport.h" 45 #include "spdk/sock.h" 46 #include "spdk/string.h" 47 #include "spdk/util.h" 48 #include "spdk/log.h" 49 50 #include "transport.h" 51 52 #include "nvmf_internal.h" 53 54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256 55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32 56 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB) 57 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 58 59 #define NVME_DOORBELLS_OFFSET 0x1000 60 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000 61 62 #define NVME_REG_CFG_SIZE PCI_CFG_SPACE_EXP_SIZE 63 #define NVME_REG_BAR0_SIZE (NVME_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE) 64 #define NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR ((NVMF_VFIO_USER_DOORBELLS_SIZE) / 8) 65 #define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR 66 /* MSIX Table Size */ 67 #define NVME_BAR4_SIZE SPDK_ALIGN_CEIL((NVME_IRQ_MSIX_NUM * 16), 0x1000) 68 /* MSIX Pending Bit Array Size */ 69 #define NVME_BAR5_SIZE SPDK_ALIGN_CEIL((NVME_IRQ_MSIX_NUM / 8), 0x1000) 70 71 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR (NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR / 4) 72 73 struct nvmf_vfio_user_req; 74 struct nvmf_vfio_user_qpair; 75 76 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg); 77 78 /* 1 more for PRP2 list itself */ 79 #define NVMF_VFIO_USER_MAX_IOVECS (NVMF_REQ_MAX_BUFFERS + 1) 80 81 enum nvmf_vfio_user_req_state { 82 VFIO_USER_REQUEST_STATE_FREE = 0, 83 VFIO_USER_REQUEST_STATE_EXECUTING, 84 }; 85 86 struct nvmf_vfio_user_req { 87 struct spdk_nvmf_request req; 88 struct spdk_nvme_cpl rsp; 89 struct spdk_nvme_cmd cmd; 90 91 enum nvmf_vfio_user_req_state state; 92 nvmf_vfio_user_req_cb_fn cb_fn; 93 void *cb_arg; 94 95 /* old CC before prop_set_cc fabric command */ 96 union spdk_nvme_cc_register cc; 97 98 /* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */ 99 dma_sg_t *sg; 100 struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS]; 101 uint8_t iovcnt; 102 103 TAILQ_ENTRY(nvmf_vfio_user_req) link; 104 }; 105 106 /* 107 * A NVMe queue. 108 */ 109 struct nvme_q { 110 bool is_cq; 111 112 void *addr; 113 114 dma_sg_t *sg; 115 struct iovec iov; 116 117 uint32_t size; 118 uint64_t prp1; 119 120 union { 121 struct { 122 uint32_t head; 123 /* multiple SQs can be mapped to the same CQ */ 124 uint16_t cqid; 125 }; 126 struct { 127 uint32_t tail; 128 uint16_t iv; 129 bool ien; 130 bool phase; 131 }; 132 }; 133 }; 134 135 enum nvmf_vfio_user_qpair_state { 136 VFIO_USER_QPAIR_UNINITIALIZED = 0, 137 VFIO_USER_QPAIR_ACTIVE, 138 VFIO_USER_QPAIR_SQ_DELETED, 139 VFIO_USER_QPAIR_INACTIVE, 140 VFIO_USER_QPAIR_ERROR, 141 }; 142 143 struct nvmf_vfio_user_qpair { 144 struct spdk_nvmf_qpair qpair; 145 struct spdk_nvmf_transport_poll_group *group; 146 struct nvmf_vfio_user_ctrlr *ctrlr; 147 struct nvmf_vfio_user_req *reqs_internal; 148 uint32_t qsize; 149 struct nvme_q cq; 150 struct nvme_q sq; 151 enum nvmf_vfio_user_qpair_state state; 152 153 /* Copy of Create IO SQ command */ 154 struct spdk_nvme_cmd create_io_sq_cmd; 155 156 TAILQ_HEAD(, nvmf_vfio_user_req) reqs; 157 /* Poll group entry */ 158 TAILQ_ENTRY(nvmf_vfio_user_qpair) link; 159 /* Connected queue pair entry */ 160 TAILQ_ENTRY(nvmf_vfio_user_qpair) tailq; 161 }; 162 163 struct nvmf_vfio_user_poll_group { 164 struct spdk_nvmf_transport_poll_group group; 165 TAILQ_HEAD(, nvmf_vfio_user_qpair) qps; 166 }; 167 168 struct nvmf_vfio_user_ctrlr { 169 struct nvmf_vfio_user_endpoint *endpoint; 170 struct nvmf_vfio_user_transport *transport; 171 172 /* Connected queue pairs list */ 173 TAILQ_HEAD(, nvmf_vfio_user_qpair) connected_qps; 174 175 struct spdk_thread *thread; 176 struct spdk_poller *vfu_ctx_poller; 177 178 uint16_t cntlid; 179 struct spdk_nvmf_ctrlr *ctrlr; 180 181 struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR]; 182 183 TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link; 184 185 volatile uint32_t *doorbells; 186 187 /* internal CSTS.CFS register for vfio-user fatal errors */ 188 uint32_t cfs : 1; 189 }; 190 191 struct nvmf_vfio_user_endpoint { 192 vfu_ctx_t *vfu_ctx; 193 struct msixcap *msix; 194 vfu_pci_config_space_t *pci_config_space; 195 int devmem_fd; 196 volatile uint32_t *doorbells; 197 198 struct spdk_nvme_transport_id trid; 199 const struct spdk_nvmf_subsystem *subsystem; 200 201 struct nvmf_vfio_user_ctrlr *ctrlr; 202 pthread_mutex_t lock; 203 204 TAILQ_ENTRY(nvmf_vfio_user_endpoint) link; 205 }; 206 207 struct nvmf_vfio_user_transport_opts { 208 bool disable_mappable_bar0; 209 }; 210 211 struct nvmf_vfio_user_transport { 212 struct spdk_nvmf_transport transport; 213 struct nvmf_vfio_user_transport_opts transport_opts; 214 pthread_mutex_t lock; 215 TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints; 216 }; 217 218 /* 219 * function prototypes 220 */ 221 static volatile uint32_t * 222 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 223 224 static volatile uint32_t * 225 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 226 227 static int 228 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req); 229 230 static struct nvmf_vfio_user_req * 231 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair); 232 233 static int 234 nvme_cmd_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, 235 uint32_t max_iovcnt, uint32_t len, size_t mps, 236 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 237 { 238 uint64_t prp1, prp2; 239 void *vva; 240 uint32_t i; 241 uint32_t residue_len, nents; 242 uint64_t *prp_list; 243 uint32_t iovcnt; 244 245 assert(max_iovcnt > 0); 246 247 prp1 = cmd->dptr.prp.prp1; 248 prp2 = cmd->dptr.prp.prp2; 249 250 /* PRP1 may started with unaligned page address */ 251 residue_len = mps - (prp1 % mps); 252 residue_len = spdk_min(len, residue_len); 253 254 vva = gpa_to_vva(prv, prp1, residue_len, PROT_READ | PROT_WRITE); 255 if (spdk_unlikely(vva == NULL)) { 256 SPDK_ERRLOG("GPA to VVA failed\n"); 257 return -EINVAL; 258 } 259 len -= residue_len; 260 if (len && max_iovcnt < 2) { 261 SPDK_ERRLOG("Too many page entries, at least two iovs are required\n"); 262 return -ERANGE; 263 } 264 iovs[0].iov_base = vva; 265 iovs[0].iov_len = residue_len; 266 267 if (len) { 268 if (spdk_unlikely(prp2 == 0)) { 269 SPDK_ERRLOG("no PRP2, %d remaining\n", len); 270 return -EINVAL; 271 } 272 273 if (len <= mps) { 274 /* 2 PRP used */ 275 iovcnt = 2; 276 vva = gpa_to_vva(prv, prp2, len, PROT_READ | PROT_WRITE); 277 if (spdk_unlikely(vva == NULL)) { 278 SPDK_ERRLOG("no VVA for %#" PRIx64 ", len%#x\n", 279 prp2, len); 280 return -EINVAL; 281 } 282 iovs[1].iov_base = vva; 283 iovs[1].iov_len = len; 284 } else { 285 /* PRP list used */ 286 nents = (len + mps - 1) / mps; 287 if (spdk_unlikely(nents + 1 > max_iovcnt)) { 288 SPDK_ERRLOG("Too many page entries\n"); 289 return -ERANGE; 290 } 291 292 vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list), PROT_READ); 293 if (spdk_unlikely(vva == NULL)) { 294 SPDK_ERRLOG("no VVA for %#" PRIx64 ", nents=%#x\n", 295 prp2, nents); 296 return -EINVAL; 297 } 298 prp_list = vva; 299 i = 0; 300 while (len != 0) { 301 residue_len = spdk_min(len, mps); 302 vva = gpa_to_vva(prv, prp_list[i], residue_len, PROT_READ | PROT_WRITE); 303 if (spdk_unlikely(vva == NULL)) { 304 SPDK_ERRLOG("no VVA for %#" PRIx64 ", residue_len=%#x\n", 305 prp_list[i], residue_len); 306 return -EINVAL; 307 } 308 iovs[i + 1].iov_base = vva; 309 iovs[i + 1].iov_len = residue_len; 310 len -= residue_len; 311 i++; 312 } 313 iovcnt = i + 1; 314 } 315 } else { 316 /* 1 PRP used */ 317 iovcnt = 1; 318 } 319 320 assert(iovcnt <= max_iovcnt); 321 return iovcnt; 322 } 323 324 static int 325 nvme_cmd_map_sgls_data(void *prv, struct spdk_nvme_sgl_descriptor *sgls, uint32_t num_sgls, 326 struct iovec *iovs, uint32_t max_iovcnt, 327 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 328 { 329 uint32_t i; 330 void *vva; 331 332 if (spdk_unlikely(max_iovcnt < num_sgls)) { 333 return -ERANGE; 334 } 335 336 for (i = 0; i < num_sgls; i++) { 337 if (spdk_unlikely(sgls[i].unkeyed.type != SPDK_NVME_SGL_TYPE_DATA_BLOCK)) { 338 SPDK_ERRLOG("Invalid SGL type %u\n", sgls[i].unkeyed.type); 339 return -EINVAL; 340 } 341 vva = gpa_to_vva(prv, sgls[i].address, sgls[i].unkeyed.length, PROT_READ | PROT_WRITE); 342 if (spdk_unlikely(vva == NULL)) { 343 SPDK_ERRLOG("GPA to VVA failed\n"); 344 return -EINVAL; 345 } 346 iovs[i].iov_base = vva; 347 iovs[i].iov_len = sgls[i].unkeyed.length; 348 } 349 350 return num_sgls; 351 } 352 353 static int 354 nvme_cmd_map_sgls(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 355 uint32_t len, size_t mps, 356 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 357 { 358 struct spdk_nvme_sgl_descriptor *sgl, *last_sgl; 359 uint32_t num_sgls, seg_len; 360 void *vva; 361 int ret; 362 uint32_t total_iovcnt = 0; 363 364 /* SGL cases */ 365 sgl = &cmd->dptr.sgl1; 366 367 /* only one SGL segment */ 368 if (sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 369 assert(max_iovcnt > 0); 370 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ | PROT_WRITE); 371 if (spdk_unlikely(vva == NULL)) { 372 SPDK_ERRLOG("GPA to VVA failed\n"); 373 return -EINVAL; 374 } 375 iovs[0].iov_base = vva; 376 iovs[0].iov_len = sgl->unkeyed.length; 377 assert(sgl->unkeyed.length == len); 378 379 return 1; 380 } 381 382 for (;;) { 383 if (spdk_unlikely((sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_SEGMENT) && 384 (sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_LAST_SEGMENT))) { 385 SPDK_ERRLOG("Invalid SGL type %u\n", sgl->unkeyed.type); 386 return -EINVAL; 387 } 388 389 seg_len = sgl->unkeyed.length; 390 if (spdk_unlikely(seg_len % sizeof(struct spdk_nvme_sgl_descriptor))) { 391 SPDK_ERRLOG("Invalid SGL segment len %u\n", seg_len); 392 return -EINVAL; 393 } 394 395 num_sgls = seg_len / sizeof(struct spdk_nvme_sgl_descriptor); 396 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ); 397 if (spdk_unlikely(vva == NULL)) { 398 SPDK_ERRLOG("GPA to VVA failed\n"); 399 return -EINVAL; 400 } 401 402 /* sgl point to the first segment */ 403 sgl = (struct spdk_nvme_sgl_descriptor *)vva; 404 last_sgl = &sgl[num_sgls - 1]; 405 406 /* we are done */ 407 if (last_sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 408 /* map whole sgl list */ 409 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls, &iovs[total_iovcnt], 410 max_iovcnt - total_iovcnt, gpa_to_vva); 411 if (spdk_unlikely(ret < 0)) { 412 return ret; 413 } 414 total_iovcnt += ret; 415 416 return total_iovcnt; 417 } 418 419 if (num_sgls > 1) { 420 /* map whole sgl exclude last_sgl */ 421 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls - 1, &iovs[total_iovcnt], 422 max_iovcnt - total_iovcnt, gpa_to_vva); 423 if (spdk_unlikely(ret < 0)) { 424 return ret; 425 } 426 total_iovcnt += ret; 427 } 428 429 /* move to next level's segments */ 430 sgl = last_sgl; 431 } 432 433 return 0; 434 } 435 436 static int 437 nvme_map_cmd(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 438 uint32_t len, size_t mps, 439 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 440 { 441 if (cmd->psdt == SPDK_NVME_PSDT_PRP) { 442 return nvme_cmd_map_prps(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 443 } 444 445 return nvme_cmd_map_sgls(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 446 } 447 448 static char * 449 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint) 450 { 451 return endpoint->trid.traddr; 452 } 453 454 static char * 455 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr) 456 { 457 if (!ctrlr || !ctrlr->endpoint) { 458 return "Null Ctrlr"; 459 } 460 461 return endpoint_id(ctrlr->endpoint); 462 } 463 464 static inline uint16_t 465 io_q_id(struct nvme_q *q) 466 { 467 468 struct nvmf_vfio_user_qpair *vu_qpair; 469 470 assert(q); 471 472 if (q->is_cq) { 473 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq); 474 } else { 475 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq); 476 } 477 assert(vu_qpair); 478 return vu_qpair->qpair.qid; 479 } 480 481 static void 482 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 483 { 484 assert(ctrlr != NULL); 485 486 if (ctrlr->cfs == 0) { 487 SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr)); 488 } 489 490 ctrlr->cfs = 1U; 491 } 492 493 static inline bool 494 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *vu_ctrlr) 495 { 496 assert(vu_ctrlr != NULL); 497 assert(vu_ctrlr->endpoint != NULL); 498 499 vfu_pci_config_space_t *pci = vu_ctrlr->endpoint->pci_config_space; 500 501 return (!pci->hdr.cmd.id || vu_ctrlr->endpoint->msix->mxc.mxe); 502 } 503 504 static void 505 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint) 506 { 507 if (endpoint->doorbells) { 508 munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE); 509 } 510 511 if (endpoint->devmem_fd > 0) { 512 close(endpoint->devmem_fd); 513 } 514 515 if (endpoint->vfu_ctx) { 516 vfu_destroy_ctx(endpoint->vfu_ctx); 517 } 518 519 pthread_mutex_destroy(&endpoint->lock); 520 free(endpoint); 521 } 522 523 /* called when process exits */ 524 static int 525 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport, 526 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 527 { 528 struct nvmf_vfio_user_transport *vu_transport; 529 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 530 531 SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n"); 532 533 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 534 transport); 535 536 (void)pthread_mutex_destroy(&vu_transport->lock); 537 538 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 539 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 540 nvmf_vfio_user_destroy_endpoint(endpoint); 541 } 542 543 free(vu_transport); 544 545 if (cb_fn) { 546 cb_fn(cb_arg); 547 } 548 549 return 0; 550 } 551 552 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = { 553 { 554 "disable_mappable_bar0", 555 offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0), 556 spdk_json_decode_bool, true 557 }, 558 }; 559 560 static struct spdk_nvmf_transport * 561 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts) 562 { 563 struct nvmf_vfio_user_transport *vu_transport; 564 int err; 565 566 if (opts->max_qpairs_per_ctrlr > NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR) { 567 SPDK_ERRLOG("Invalid max_qpairs_per_ctrlr=%d, supported max_qpairs_per_ctrlr=%d\n", 568 opts->max_qpairs_per_ctrlr, NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR); 569 return NULL; 570 } 571 572 vu_transport = calloc(1, sizeof(*vu_transport)); 573 if (vu_transport == NULL) { 574 SPDK_ERRLOG("Transport alloc fail: %m\n"); 575 return NULL; 576 } 577 578 err = pthread_mutex_init(&vu_transport->lock, NULL); 579 if (err != 0) { 580 SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err); 581 goto err; 582 } 583 584 TAILQ_INIT(&vu_transport->endpoints); 585 586 if (opts->transport_specific != NULL && 587 spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder, 588 SPDK_COUNTOF(vfio_user_transport_opts_decoder), 589 vu_transport)) { 590 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 591 free(vu_transport); 592 return NULL; 593 } 594 595 SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n", 596 vu_transport->transport_opts.disable_mappable_bar0); 597 598 return &vu_transport->transport; 599 600 err: 601 free(vu_transport); 602 603 return NULL; 604 } 605 606 static uint32_t 607 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr) 608 { 609 assert(ctrlr != NULL); 610 assert(ctrlr->qp[0] != NULL); 611 assert(ctrlr->qp[0]->qpair.ctrlr != NULL); 612 613 return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1; 614 } 615 616 static void * 617 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov, int prot) 618 { 619 int ret; 620 621 assert(ctx != NULL); 622 assert(sg != NULL); 623 assert(iov != NULL); 624 625 ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, prot); 626 if (ret < 0) { 627 return NULL; 628 } 629 630 ret = vfu_map_sg(ctx, sg, iov, 1, 0); 631 if (ret != 0) { 632 return NULL; 633 } 634 635 assert(iov->iov_base != NULL); 636 return iov->iov_base; 637 } 638 639 static inline uint32_t 640 sq_head(struct nvmf_vfio_user_qpair *qpair) 641 { 642 assert(qpair != NULL); 643 return qpair->sq.head; 644 } 645 646 static inline void 647 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair) 648 { 649 assert(ctrlr != NULL); 650 assert(qpair != NULL); 651 qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size; 652 } 653 654 static int 655 map_q(struct nvmf_vfio_user_ctrlr *vu_ctrlr, struct nvme_q *q, bool is_cq, bool unmap) 656 { 657 uint64_t len; 658 659 assert(q->size); 660 assert(q->addr == NULL); 661 662 if (is_cq) { 663 len = q->size * sizeof(struct spdk_nvme_cpl); 664 } else { 665 len = q->size * sizeof(struct spdk_nvme_cmd); 666 } 667 668 q->addr = map_one(vu_ctrlr->endpoint->vfu_ctx, q->prp1, len, q->sg, 669 &q->iov, is_cq ? PROT_READ | PROT_WRITE : PROT_READ); 670 if (q->addr == NULL) { 671 return -EFAULT; 672 } 673 674 if (unmap) { 675 memset(q->addr, 0, len); 676 } 677 678 return 0; 679 } 680 681 static int 682 asq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 683 { 684 struct nvme_q *sq; 685 const struct spdk_nvmf_registers *regs; 686 int ret; 687 688 assert(ctrlr != NULL); 689 assert(ctrlr->qp[0] != NULL); 690 assert(ctrlr->qp[0]->sq.addr == NULL); 691 /* XXX ctrlr->asq == 0 is a valid memory address */ 692 693 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 694 sq = &ctrlr->qp[0]->sq; 695 sq->size = regs->aqa.bits.asqs + 1; 696 sq->prp1 = regs->asq; 697 sq->head = 0; 698 sq->cqid = 0; 699 sq->is_cq = false; 700 701 ret = map_q(ctrlr, sq, false, true); 702 if (ret) { 703 return ret; 704 } 705 706 *tdbl(ctrlr, sq) = 0; 707 708 return 0; 709 } 710 711 static inline int 712 queue_index(uint16_t qid, int is_cq) 713 { 714 return (qid * 2) + is_cq; 715 } 716 717 static volatile uint32_t * 718 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 719 { 720 assert(ctrlr != NULL); 721 assert(q != NULL); 722 assert(!q->is_cq); 723 724 return &ctrlr->doorbells[queue_index(io_q_id(q), false)]; 725 } 726 727 static volatile uint32_t * 728 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 729 { 730 assert(ctrlr != NULL); 731 assert(q != NULL); 732 assert(q->is_cq); 733 734 return &ctrlr->doorbells[queue_index(io_q_id(q), true)]; 735 } 736 737 static inline bool 738 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 739 { 740 assert(ctrlr != NULL); 741 assert(q != NULL); 742 assert(q->is_cq); 743 744 return ((q->tail + 1) % q->size) == *hdbl(ctrlr, q); 745 } 746 747 static inline void 748 cq_tail_advance(struct nvme_q *q) 749 { 750 assert(q != NULL); 751 assert(q->is_cq); 752 753 assert(q->tail < q->size); 754 q->tail++; 755 756 if (spdk_unlikely(q->tail == q->size)) { 757 q->tail = 0; 758 q->phase = !q->phase; 759 } 760 } 761 762 static int 763 acq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 764 { 765 struct nvme_q *cq; 766 const struct spdk_nvmf_registers *regs; 767 int ret; 768 769 assert(ctrlr != NULL); 770 assert(ctrlr->qp[0] != NULL); 771 assert(ctrlr->qp[0]->cq.addr == NULL); 772 773 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 774 assert(regs != NULL); 775 cq = &ctrlr->qp[0]->cq; 776 cq->size = regs->aqa.bits.acqs + 1; 777 cq->prp1 = regs->acq; 778 cq->tail = 0; 779 cq->is_cq = true; 780 cq->ien = true; 781 cq->phase = true; 782 783 ret = map_q(ctrlr, cq, true, true); 784 if (ret) { 785 return ret; 786 } 787 *hdbl(ctrlr, cq) = 0; 788 789 return 0; 790 } 791 792 static inline dma_sg_t * 793 vu_req_to_sg_t(struct nvmf_vfio_user_req *vu_req, uint32_t iovcnt) 794 { 795 return (dma_sg_t *)((uintptr_t)vu_req->sg + iovcnt * dma_sg_size()); 796 } 797 798 static void * 799 _map_one(void *prv, uint64_t addr, uint64_t len, int prot) 800 { 801 struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv; 802 struct spdk_nvmf_qpair *qpair; 803 struct nvmf_vfio_user_req *vu_req; 804 struct nvmf_vfio_user_qpair *vu_qpair; 805 void *ret; 806 807 assert(req != NULL); 808 qpair = req->qpair; 809 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 810 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 811 812 assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS); 813 ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len, 814 vu_req_to_sg_t(vu_req, vu_req->iovcnt), 815 &vu_req->iov[vu_req->iovcnt], prot); 816 if (spdk_likely(ret != NULL)) { 817 vu_req->iovcnt++; 818 } 819 return ret; 820 } 821 822 static int 823 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req, 824 struct iovec *iov, uint32_t length) 825 { 826 /* Map PRP list to from Guest physical memory to 827 * virtual memory address. 828 */ 829 return nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS, 830 length, 4096, _map_one); 831 } 832 833 static int 834 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 835 struct nvmf_vfio_user_qpair *vu_qpair); 836 837 /* 838 * Posts a CQE in the completion queue. 839 * 840 * @ctrlr: the vfio-user controller 841 * @cq: the completion queue 842 * @cdw0: cdw0 as reported by NVMf 843 * @sqid: submission queue ID 844 * @cid: command identifier in NVMe command 845 * @sc: the NVMe CQE status code 846 * @sct: the NVMe CQE status code type 847 */ 848 static int 849 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *cq, 850 uint32_t cdw0, uint16_t sqid, uint16_t cid, uint16_t sc, uint16_t sct) 851 { 852 struct spdk_nvme_cpl *cpl; 853 const struct spdk_nvmf_registers *regs; 854 int err; 855 856 assert(ctrlr != NULL); 857 858 if (spdk_unlikely(cq == NULL || cq->addr == NULL)) { 859 return 0; 860 } 861 862 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 863 if (regs->csts.bits.shst != SPDK_NVME_SHST_NORMAL) { 864 SPDK_DEBUGLOG(nvmf_vfio, 865 "%s: ignore completion SQ%d cid=%d status=%#x\n", 866 ctrlr_id(ctrlr), sqid, cid, sc); 867 return 0; 868 } 869 870 if (cq_is_full(ctrlr, cq)) { 871 SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n", 872 ctrlr_id(ctrlr), io_q_id(cq), cq->tail, *hdbl(ctrlr, cq)); 873 return -1; 874 } 875 876 cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail; 877 878 assert(ctrlr->qp[sqid] != NULL); 879 SPDK_DEBUGLOG(nvmf_vfio, 880 "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n", 881 ctrlr_id(ctrlr), sqid, cid, sc, sq_head(ctrlr->qp[sqid]), 882 cq->tail); 883 884 cpl->sqhd = sq_head(ctrlr->qp[sqid]); 885 cpl->sqid = sqid; 886 cpl->cid = cid; 887 cpl->cdw0 = cdw0; 888 cpl->status.dnr = 0x0; 889 cpl->status.m = 0x0; 890 cpl->status.sct = sct; 891 cpl->status.p = cq->phase; 892 cpl->status.sc = sc; 893 894 cq_tail_advance(cq); 895 896 /* 897 * this function now executes at SPDK thread context, we 898 * might be triggering interrupts from vfio-user thread context so 899 * check for race conditions. 900 */ 901 if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) { 902 err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv); 903 if (err != 0) { 904 SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n", 905 ctrlr_id(ctrlr)); 906 return err; 907 } 908 } 909 910 return 0; 911 } 912 913 static bool 914 io_q_exists(struct nvmf_vfio_user_ctrlr *vu_ctrlr, const uint16_t qid, const bool is_cq) 915 { 916 assert(vu_ctrlr != NULL); 917 918 if (qid == 0 || qid >= NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR) { 919 return false; 920 } 921 922 if (vu_ctrlr->qp[qid] == NULL) { 923 return false; 924 } 925 926 if (!is_cq) { 927 if (vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_SQ_DELETED || 928 vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_UNINITIALIZED) { 929 return false; 930 } 931 } 932 933 return true; 934 } 935 936 static void 937 unmap_qp(struct nvmf_vfio_user_qpair *qp) 938 { 939 struct nvmf_vfio_user_ctrlr *ctrlr; 940 941 if (qp->ctrlr == NULL) { 942 return; 943 } 944 ctrlr = qp->ctrlr; 945 946 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap QP%d\n", 947 ctrlr_id(ctrlr), qp->qpair.qid); 948 949 if (qp->sq.addr != NULL) { 950 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->sq.sg, &qp->sq.iov, 1); 951 qp->sq.addr = NULL; 952 } 953 954 if (qp->cq.addr != NULL) { 955 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->cq.sg, &qp->cq.iov, 1); 956 qp->cq.addr = NULL; 957 } 958 } 959 960 static int 961 remap_qp(struct nvmf_vfio_user_qpair *vu_qpair) 962 { 963 struct nvme_q *sq, *cq; 964 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 965 int ret; 966 967 vu_ctrlr = vu_qpair->ctrlr; 968 sq = &vu_qpair->sq; 969 cq = &vu_qpair->cq; 970 971 if (sq->size) { 972 ret = map_q(vu_ctrlr, sq, false, false); 973 if (ret) { 974 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n", 975 io_q_id(sq), sq->prp1, sq->prp1 + sq->size * sizeof(struct spdk_nvme_cmd)); 976 return -EFAULT; 977 } 978 } 979 980 if (cq->size) { 981 ret = map_q(vu_ctrlr, cq, true, false); 982 if (ret) { 983 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n", 984 io_q_id(cq), cq->prp1, cq->prp1 + cq->size * sizeof(struct spdk_nvme_cpl)); 985 return -EFAULT; 986 } 987 988 } 989 990 return 0; 991 } 992 993 static void 994 free_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid) 995 { 996 struct nvmf_vfio_user_qpair *qpair; 997 struct nvmf_vfio_user_req *vu_req; 998 uint32_t i; 999 1000 if (ctrlr == NULL) { 1001 return; 1002 } 1003 1004 qpair = ctrlr->qp[qid]; 1005 if (qpair == NULL) { 1006 return; 1007 } 1008 1009 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr), 1010 qid, qpair); 1011 1012 unmap_qp(qpair); 1013 1014 for (i = 0; i < qpair->qsize; i++) { 1015 vu_req = &qpair->reqs_internal[i]; 1016 free(vu_req->sg); 1017 } 1018 free(qpair->reqs_internal); 1019 1020 free(qpair->sq.sg); 1021 free(qpair->cq.sg); 1022 free(qpair); 1023 1024 ctrlr->qp[qid] = NULL; 1025 } 1026 1027 /* This function can only fail because of memory allocation errors. */ 1028 static int 1029 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport, 1030 const uint32_t qsize, const uint16_t id) 1031 { 1032 uint32_t i; 1033 struct nvmf_vfio_user_qpair *qpair; 1034 struct nvmf_vfio_user_req *vu_req, *tmp; 1035 struct spdk_nvmf_request *req; 1036 1037 assert(ctrlr != NULL); 1038 assert(transport != NULL); 1039 1040 qpair = calloc(1, sizeof(*qpair)); 1041 if (qpair == NULL) { 1042 return -ENOMEM; 1043 } 1044 qpair->sq.sg = calloc(1, dma_sg_size()); 1045 if (qpair->sq.sg == NULL) { 1046 free(qpair); 1047 return -ENOMEM; 1048 } 1049 qpair->cq.sg = calloc(1, dma_sg_size()); 1050 if (qpair->cq.sg == NULL) { 1051 free(qpair->sq.sg); 1052 free(qpair); 1053 return -ENOMEM; 1054 } 1055 1056 qpair->qpair.qid = id; 1057 qpair->qpair.transport = transport; 1058 qpair->ctrlr = ctrlr; 1059 qpair->qsize = qsize; 1060 1061 TAILQ_INIT(&qpair->reqs); 1062 1063 qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req)); 1064 if (qpair->reqs_internal == NULL) { 1065 SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr)); 1066 goto reqs_err; 1067 } 1068 1069 for (i = 0; i < qsize; i++) { 1070 vu_req = &qpair->reqs_internal[i]; 1071 vu_req->sg = calloc(NVMF_VFIO_USER_MAX_IOVECS, dma_sg_size()); 1072 if (vu_req->sg == NULL) { 1073 goto sg_err; 1074 } 1075 1076 req = &vu_req->req; 1077 req->qpair = &qpair->qpair; 1078 req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp; 1079 req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd; 1080 1081 TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link); 1082 } 1083 1084 ctrlr->qp[id] = qpair; 1085 return 0; 1086 1087 sg_err: 1088 TAILQ_FOREACH_SAFE(vu_req, &qpair->reqs, link, tmp) { 1089 free(vu_req->sg); 1090 } 1091 free(qpair->reqs_internal); 1092 1093 reqs_err: 1094 free(qpair->sq.sg); 1095 free(qpair->cq.sg); 1096 free(qpair); 1097 return -ENOMEM; 1098 } 1099 1100 /* 1101 * Creates a completion or submission I/O queue. Returns 0 on success, -errno 1102 * on error. 1103 */ 1104 static int 1105 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1106 struct spdk_nvme_cmd *cmd, const bool is_cq) 1107 { 1108 uint16_t qid; 1109 uint32_t qsize; 1110 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1111 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1112 int err = 0; 1113 struct nvmf_vfio_user_qpair *vu_qpair; 1114 struct nvme_q *io_q; 1115 struct nvmf_vfio_user_transport *vu_transport = ctrlr->transport; 1116 1117 assert(ctrlr != NULL); 1118 assert(cmd != NULL); 1119 1120 qid = cmd->cdw10_bits.create_io_q.qid; 1121 if (qid == 0 || qid >= vu_transport->transport.opts.max_qpairs_per_ctrlr) { 1122 SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr), 1123 qid, vu_transport->transport.opts.max_qpairs_per_ctrlr); 1124 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1125 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1126 goto out; 1127 } 1128 1129 if (io_q_exists(ctrlr, qid, is_cq)) { 1130 SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr), 1131 is_cq ? 'C' : 'S', qid); 1132 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1133 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1134 goto out; 1135 } 1136 1137 qsize = cmd->cdw10_bits.create_io_q.qsize + 1; 1138 if (qsize == 1 || qsize > max_queue_size(ctrlr)) { 1139 SPDK_ERRLOG("%s: invalid I/O queue size %u\n", ctrlr_id(ctrlr), qsize); 1140 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1141 sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE; 1142 goto out; 1143 } 1144 1145 SPDK_DEBUGLOG(nvmf_vfio, 1146 "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr), 1147 is_cq ? 'C' : 'S', qid, qsize); 1148 1149 if (is_cq) { 1150 if (cmd->cdw11_bits.create_io_cq.pc != 0x1) { 1151 SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr)); 1152 sc = SPDK_NVME_SC_INVALID_FIELD; 1153 goto out; 1154 } 1155 if (cmd->cdw11_bits.create_io_cq.iv > NVME_IRQ_MSIX_NUM - 1) { 1156 SPDK_ERRLOG("%s: IV is too big\n", ctrlr_id(ctrlr)); 1157 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1158 sc = SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR; 1159 goto out; 1160 } 1161 1162 err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, qsize, qid); 1163 if (err != 0) { 1164 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1165 goto out; 1166 } 1167 1168 io_q = &ctrlr->qp[qid]->cq; 1169 io_q->ien = cmd->cdw11_bits.create_io_cq.ien; 1170 io_q->iv = cmd->cdw11_bits.create_io_cq.iv; 1171 io_q->phase = true; 1172 } else { 1173 if (cmd->cdw11_bits.create_io_sq.cqid == 0) { 1174 SPDK_ERRLOG("%s: invalid CQID 0\n", ctrlr_id(ctrlr)); 1175 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1176 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1177 goto out; 1178 1179 } 1180 /* CQ must be created before SQ */ 1181 if (!io_q_exists(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) { 1182 SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr), 1183 cmd->cdw11_bits.create_io_sq.cqid); 1184 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1185 sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID; 1186 goto out; 1187 } 1188 1189 if (cmd->cdw11_bits.create_io_sq.pc != 0x1) { 1190 SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr)); 1191 sc = SPDK_NVME_SC_INVALID_FIELD; 1192 goto out; 1193 } 1194 /* TODO: support shared IO CQ */ 1195 if (qid != cmd->cdw11_bits.create_io_sq.cqid) { 1196 SPDK_ERRLOG("%s: doesn't support shared CQ now\n", ctrlr_id(ctrlr)); 1197 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1198 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1199 } 1200 1201 io_q = &ctrlr->qp[qid]->sq; 1202 io_q->cqid = cmd->cdw11_bits.create_io_sq.cqid; 1203 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr), 1204 qid, io_q->cqid); 1205 } 1206 1207 io_q->is_cq = is_cq; 1208 io_q->size = qsize; 1209 io_q->prp1 = cmd->dptr.prp.prp1; 1210 1211 err = map_q(ctrlr, io_q, is_cq, true); 1212 if (err) { 1213 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1214 SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr)); 1215 goto out; 1216 } 1217 1218 SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n", 1219 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1220 qid, cmd->dptr.prp.prp1, (unsigned long long)io_q->addr); 1221 1222 if (is_cq) { 1223 *hdbl(ctrlr, io_q) = 0; 1224 } else { 1225 vu_qpair = ctrlr->qp[qid]; 1226 *tdbl(ctrlr, io_q) = 0; 1227 vu_qpair->sq.head = 0; 1228 1229 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1230 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1231 } else { 1232 /* 1233 * Create our new I/O qpair. This asynchronously invokes, on a 1234 * suitable poll group, the nvmf_vfio_user_poll_group_add() 1235 * callback, which will call spdk_nvmf_request_exec_fabrics() 1236 * with a generated fabrics connect command. This command is 1237 * then eventually completed via handle_queue_connect_rsp(). 1238 */ 1239 vu_qpair->create_io_sq_cmd = *cmd; 1240 spdk_nvmf_tgt_new_qpair(ctrlr->transport->transport.tgt, 1241 &vu_qpair->qpair); 1242 return 0; 1243 } 1244 } 1245 1246 out: 1247 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1248 } 1249 1250 /* For ADMIN I/O DELETE COMPLETION QUEUE the NVMf library will disconnect and free 1251 * queue pair, so save the command in a context. 1252 */ 1253 struct vfio_user_delete_cq_ctx { 1254 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1255 struct spdk_nvme_cmd delete_io_cq_cmd; 1256 }; 1257 1258 static void 1259 vfio_user_qpair_delete_cb(void *cb_arg) 1260 { 1261 struct vfio_user_delete_cq_ctx *ctx = cb_arg; 1262 struct nvmf_vfio_user_ctrlr *vu_ctrlr = ctx->vu_ctrlr; 1263 1264 post_completion(vu_ctrlr, &vu_ctrlr->qp[0]->cq, 0, 0, ctx->delete_io_cq_cmd.cid, 1265 SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 1266 free(ctx); 1267 } 1268 1269 /* 1270 * Deletes a completion or submission I/O queue. 1271 */ 1272 static int 1273 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1274 struct spdk_nvme_cmd *cmd, const bool is_cq) 1275 { 1276 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1277 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1278 struct nvmf_vfio_user_qpair *vu_qpair; 1279 struct vfio_user_delete_cq_ctx *ctx; 1280 1281 SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n", 1282 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1283 cmd->cdw10_bits.delete_io_q.qid); 1284 1285 if (!io_q_exists(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq)) { 1286 SPDK_ERRLOG("%s: I/O %cQ%d does not exist\n", ctrlr_id(ctrlr), 1287 is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid); 1288 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1289 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1290 goto out; 1291 } 1292 1293 vu_qpair = ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]; 1294 if (is_cq) { 1295 if (vu_qpair->state == VFIO_USER_QPAIR_UNINITIALIZED) { 1296 free_qp(ctrlr, cmd->cdw10_bits.delete_io_q.qid); 1297 goto out; 1298 } 1299 1300 /* SQ must have been deleted first */ 1301 if (vu_qpair->state != VFIO_USER_QPAIR_SQ_DELETED) { 1302 SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr)); 1303 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1304 sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION; 1305 goto out; 1306 } 1307 ctx = calloc(1, sizeof(*ctx)); 1308 if (!ctx) { 1309 sct = SPDK_NVME_SCT_GENERIC; 1310 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1311 goto out; 1312 } 1313 ctx->vu_ctrlr = ctrlr; 1314 ctx->delete_io_cq_cmd = *cmd; 1315 spdk_nvmf_qpair_disconnect(&vu_qpair->qpair, vfio_user_qpair_delete_cb, ctx); 1316 return 0; 1317 } else { 1318 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1319 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%u is already deleted\n", ctrlr_id(ctrlr), 1320 cmd->cdw10_bits.delete_io_q.qid); 1321 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1322 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1323 goto out; 1324 } 1325 1326 /* 1327 * This doesn't actually delete the SQ, We're merely telling the poll_group_poll 1328 * function to skip checking this SQ. The queue pair will be disconnected in Delete 1329 * IO CQ command. 1330 */ 1331 vu_qpair->state = VFIO_USER_QPAIR_SQ_DELETED; 1332 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, vu_qpair->sq.sg, &vu_qpair->sq.iov, 1); 1333 vu_qpair->sq.addr = NULL; 1334 } 1335 1336 out: 1337 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1338 } 1339 1340 /* 1341 * Returns 0 on success and -errno on error. 1342 */ 1343 static int 1344 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd) 1345 { 1346 assert(ctrlr != NULL); 1347 assert(cmd != NULL); 1348 1349 if (cmd->fuse != 0) { 1350 /* Fused admin commands are not supported. */ 1351 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, 1352 SPDK_NVME_SC_INVALID_FIELD, 1353 SPDK_NVME_SCT_GENERIC); 1354 } 1355 1356 switch (cmd->opc) { 1357 case SPDK_NVME_OPC_CREATE_IO_CQ: 1358 case SPDK_NVME_OPC_CREATE_IO_SQ: 1359 return handle_create_io_q(ctrlr, cmd, 1360 cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ); 1361 case SPDK_NVME_OPC_DELETE_IO_SQ: 1362 case SPDK_NVME_OPC_DELETE_IO_CQ: 1363 return handle_del_io_q(ctrlr, cmd, 1364 cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ); 1365 default: 1366 return handle_cmd_req(ctrlr, cmd, ctrlr->qp[0]); 1367 } 1368 } 1369 1370 static int 1371 handle_cmd_rsp(struct nvmf_vfio_user_req *vu_req, void *cb_arg) 1372 { 1373 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1374 struct nvmf_vfio_user_ctrlr *vu_ctrlr = vu_qpair->ctrlr; 1375 uint16_t sqid, cqid; 1376 1377 assert(vu_qpair != NULL); 1378 assert(vu_req != NULL); 1379 assert(vu_ctrlr != NULL); 1380 1381 if (spdk_likely(vu_req->iovcnt)) { 1382 vfu_unmap_sg(vu_ctrlr->endpoint->vfu_ctx, vu_req->sg, vu_req->iov, vu_req->iovcnt); 1383 } 1384 sqid = vu_qpair->qpair.qid; 1385 cqid = vu_ctrlr->qp[sqid]->sq.cqid; 1386 1387 return post_completion(vu_ctrlr, &vu_ctrlr->qp[cqid]->cq, 1388 vu_req->req.rsp->nvme_cpl.cdw0, 1389 sqid, 1390 vu_req->req.cmd->nvme_cmd.cid, 1391 vu_req->req.rsp->nvme_cpl.status.sc, 1392 vu_req->req.rsp->nvme_cpl.status.sct); 1393 } 1394 1395 static int 1396 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair, 1397 struct spdk_nvme_cmd *cmd) 1398 { 1399 assert(qpair != NULL); 1400 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1401 return consume_admin_cmd(ctrlr, cmd); 1402 } 1403 1404 return handle_cmd_req(ctrlr, cmd, qpair); 1405 } 1406 1407 /* Returns the number of commands processed, or a negative value on error. */ 1408 static int 1409 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail, 1410 struct nvmf_vfio_user_qpair *qpair) 1411 { 1412 struct spdk_nvme_cmd *queue; 1413 int count = 0; 1414 1415 assert(ctrlr != NULL); 1416 assert(qpair != NULL); 1417 1418 queue = qpair->sq.addr; 1419 while (sq_head(qpair) != new_tail) { 1420 int err; 1421 struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)]; 1422 1423 count++; 1424 1425 /* 1426 * SQHD must contain the new head pointer, so we must increase 1427 * it before we generate a completion. 1428 */ 1429 sqhd_advance(ctrlr, qpair); 1430 1431 err = consume_cmd(ctrlr, qpair, cmd); 1432 if (err != 0) { 1433 return err; 1434 } 1435 } 1436 1437 return count; 1438 } 1439 1440 static int 1441 enable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1442 { 1443 int err; 1444 1445 assert(ctrlr != NULL); 1446 1447 err = acq_setup(ctrlr); 1448 if (err != 0) { 1449 return err; 1450 } 1451 1452 err = asq_setup(ctrlr); 1453 if (err != 0) { 1454 return err; 1455 } 1456 1457 return 0; 1458 } 1459 1460 static void 1461 disable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1462 { 1463 assert(ctrlr->qp[0] != NULL); 1464 1465 unmap_qp(ctrlr->qp[0]); 1466 } 1467 1468 static void 1469 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1470 { 1471 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1472 struct nvmf_vfio_user_ctrlr *ctrlr; 1473 struct nvmf_vfio_user_qpair *qpair; 1474 int ret; 1475 1476 /* 1477 * We're not interested in any DMA regions that aren't mappable (we don't 1478 * support clients that don't share their memory). 1479 */ 1480 if (!info->vaddr) { 1481 return; 1482 } 1483 1484 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1485 (info->mapping.iov_len & MASK_2MB)) { 1486 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1487 (uintptr_t)info->mapping.iov_base, 1488 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1489 return; 1490 } 1491 1492 assert(endpoint != NULL); 1493 if (endpoint->ctrlr == NULL) { 1494 return; 1495 } 1496 ctrlr = endpoint->ctrlr; 1497 1498 SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1499 (uintptr_t)info->mapping.iov_base, 1500 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1501 1502 /* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also 1503 * check the protection bits before registering. 1504 */ 1505 if (info->prot == (PROT_WRITE | PROT_READ)) { 1506 ret = spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len); 1507 if (ret) { 1508 SPDK_ERRLOG("Memory region register %#lx-%#lx failed, ret=%d\n", 1509 (uint64_t)(uintptr_t)info->mapping.iov_base, 1510 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1511 ret); 1512 } 1513 } 1514 1515 pthread_mutex_lock(&endpoint->lock); 1516 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1517 if (qpair->state != VFIO_USER_QPAIR_INACTIVE) { 1518 continue; 1519 } 1520 1521 ret = remap_qp(qpair); 1522 if (ret) { 1523 continue; 1524 } 1525 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1526 SPDK_DEBUGLOG(nvmf_vfio, "Remap QP %u successfully\n", qpair->qpair.qid); 1527 } 1528 pthread_mutex_unlock(&endpoint->lock); 1529 } 1530 1531 static int 1532 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1533 { 1534 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1535 struct nvmf_vfio_user_ctrlr *ctrlr; 1536 struct nvmf_vfio_user_qpair *qpair; 1537 void *map_start, *map_end; 1538 int ret = 0; 1539 1540 if (!info->vaddr) { 1541 return 0; 1542 } 1543 1544 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1545 (info->mapping.iov_len & MASK_2MB)) { 1546 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1547 (uintptr_t)info->mapping.iov_base, 1548 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1549 return 0; 1550 } 1551 1552 assert(endpoint != NULL); 1553 if (endpoint->ctrlr == NULL) { 1554 return 0; 1555 } 1556 ctrlr = endpoint->ctrlr; 1557 1558 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1559 (uintptr_t)info->mapping.iov_base, 1560 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1561 1562 map_start = info->mapping.iov_base; 1563 map_end = info->mapping.iov_base + info->mapping.iov_len; 1564 1565 pthread_mutex_lock(&endpoint->lock); 1566 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1567 if ((qpair->cq.addr >= map_start && qpair->cq.addr <= map_end) || 1568 (qpair->sq.addr >= map_start && qpair->sq.addr <= map_end)) { 1569 /* TODO: Ideally we should disconnect this queue pair 1570 * before returning to caller. 1571 */ 1572 unmap_qp(qpair); 1573 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1574 } 1575 } 1576 pthread_mutex_unlock(&endpoint->lock); 1577 1578 if (info->prot == (PROT_WRITE | PROT_READ)) { 1579 ret = spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len); 1580 if (ret) { 1581 SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed, ret=%d\n", 1582 (uint64_t)(uintptr_t)info->mapping.iov_base, 1583 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1584 ret); 1585 } 1586 } 1587 1588 return 0; 1589 } 1590 1591 static int 1592 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1593 { 1594 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1595 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1596 bool disable_admin = false; 1597 int ret; 1598 1599 assert(vu_qpair != NULL); 1600 assert(req != NULL); 1601 1602 if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) { 1603 assert(vu_qpair->ctrlr != NULL); 1604 assert(req != NULL); 1605 1606 memcpy(req->req.data, 1607 &req->req.rsp->prop_get_rsp.value.u64, 1608 req->req.length); 1609 } else { 1610 assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET); 1611 assert(vu_qpair->ctrlr != NULL); 1612 vu_ctrlr = vu_qpair->ctrlr; 1613 1614 if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) { 1615 union spdk_nvme_cc_register cc, diff; 1616 1617 cc.raw = req->req.cmd->prop_set_cmd.value.u64; 1618 diff.raw = cc.raw ^ req->cc.raw; 1619 1620 if (diff.bits.en) { 1621 if (cc.bits.en) { 1622 SPDK_DEBUGLOG(nvmf_vfio, "%s: MAP Admin queue\n", ctrlr_id(vu_ctrlr)); 1623 ret = enable_admin_queue(vu_ctrlr); 1624 if (ret) { 1625 SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(vu_ctrlr)); 1626 return ret; 1627 } 1628 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1629 } else { 1630 disable_admin = true; 1631 } 1632 } 1633 1634 if (diff.bits.shn) { 1635 if (cc.bits.shn == SPDK_NVME_SHN_NORMAL || cc.bits.shn == SPDK_NVME_SHN_ABRUPT) { 1636 disable_admin = true; 1637 } 1638 } 1639 1640 if (disable_admin) { 1641 SPDK_DEBUGLOG(nvmf_vfio, 1642 "%s: UNMAP Admin queue\n", 1643 ctrlr_id(vu_ctrlr)); 1644 vu_qpair->state = VFIO_USER_QPAIR_INACTIVE; 1645 disable_admin_queue(vu_ctrlr); 1646 /* For PCIe controller reset or shutdown, we will drop all AER responses */ 1647 nvmf_ctrlr_abort_aer(vu_qpair->qpair.ctrlr); 1648 } 1649 } 1650 } 1651 1652 return 0; 1653 } 1654 1655 /* 1656 * Handles a write at offset 0x1000 or more; this is the non-mapped path when a 1657 * doorbell is written via access_bar0_fn(). 1658 * 1659 * DSTRD is set to fixed value 0 for NVMf. 1660 * 1661 */ 1662 static int 1663 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf, 1664 const size_t count, loff_t pos, const bool is_write) 1665 { 1666 assert(ctrlr != NULL); 1667 assert(buf != NULL); 1668 1669 if (count != sizeof(uint32_t)) { 1670 SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n", 1671 ctrlr_id(ctrlr), count); 1672 errno = EINVAL; 1673 return -1; 1674 } 1675 1676 pos -= NVME_DOORBELLS_OFFSET; 1677 1678 /* pos must be dword aligned */ 1679 if ((pos & 0x3) != 0) { 1680 SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos); 1681 errno = EINVAL; 1682 return -1; 1683 } 1684 1685 /* convert byte offset to array index */ 1686 pos >>= 2; 1687 1688 if (pos >= NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR * 2) { 1689 SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos); 1690 errno = EINVAL; 1691 return -1; 1692 } 1693 1694 if (is_write) { 1695 ctrlr->doorbells[pos] = *buf; 1696 spdk_wmb(); 1697 } else { 1698 spdk_rmb(); 1699 *buf = ctrlr->doorbells[pos]; 1700 } 1701 return 0; 1702 } 1703 1704 static ssize_t 1705 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, 1706 bool is_write) 1707 { 1708 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1709 struct nvmf_vfio_user_ctrlr *ctrlr; 1710 struct nvmf_vfio_user_req *req; 1711 const struct spdk_nvmf_registers *regs; 1712 int ret; 1713 1714 ctrlr = endpoint->ctrlr; 1715 1716 SPDK_DEBUGLOG(nvmf_vfio, 1717 "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n", 1718 endpoint_id(endpoint), is_write ? "write" : "read", 1719 ctrlr, count, pos); 1720 1721 if (pos >= NVME_DOORBELLS_OFFSET) { 1722 /* 1723 * The fact that the doorbells can be memory mapped doesn't mean 1724 * that the client (VFIO in QEMU) is obliged to memory map them, 1725 * it might still elect to access them via regular read/write; 1726 * we might also have had disable_mappable_bar0 set. 1727 */ 1728 ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count, 1729 pos, is_write); 1730 if (ret == 0) { 1731 return count; 1732 } 1733 return ret; 1734 } 1735 1736 /* Construct a Fabric Property Get/Set command and send it */ 1737 req = get_nvmf_vfio_user_req(ctrlr->qp[0]); 1738 if (req == NULL) { 1739 errno = ENOBUFS; 1740 return -1; 1741 } 1742 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 1743 req->cc.raw = regs->cc.raw; 1744 1745 req->cb_fn = nvmf_vfio_user_prop_req_rsp; 1746 req->cb_arg = ctrlr->qp[0]; 1747 req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC; 1748 req->req.cmd->prop_set_cmd.cid = 0; 1749 req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1; 1750 req->req.cmd->prop_set_cmd.ofst = pos; 1751 if (is_write) { 1752 req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; 1753 if (req->req.cmd->prop_set_cmd.attrib.size) { 1754 req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf; 1755 } else { 1756 req->req.cmd->prop_set_cmd.value.u32.high = 0; 1757 req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf; 1758 } 1759 } else { 1760 req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; 1761 } 1762 req->req.length = count; 1763 req->req.data = buf; 1764 1765 spdk_nvmf_request_exec_fabrics(&req->req); 1766 1767 return count; 1768 } 1769 1770 /* 1771 * NVMe driver reads 4096 bytes, which is the extended PCI configuration space 1772 * available on PCI-X 2.0 and PCI Express buses 1773 */ 1774 static ssize_t 1775 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, 1776 bool is_write) 1777 { 1778 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1779 1780 if (is_write) { 1781 SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n", 1782 endpoint_id(endpoint), offset, offset + count); 1783 errno = EINVAL; 1784 return -1; 1785 } 1786 1787 if (offset + count > NVME_REG_CFG_SIZE) { 1788 SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", 1789 endpoint_id(endpoint), offset, count, 1790 NVME_REG_CFG_SIZE); 1791 errno = ERANGE; 1792 return -1; 1793 } 1794 1795 memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count); 1796 1797 return count; 1798 } 1799 1800 static void 1801 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg) 1802 { 1803 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1804 1805 if (level >= LOG_DEBUG) { 1806 SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1807 } else if (level >= LOG_INFO) { 1808 SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1809 } else if (level >= LOG_NOTICE) { 1810 SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg); 1811 } else if (level >= LOG_WARNING) { 1812 SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg); 1813 } else { 1814 SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg); 1815 } 1816 } 1817 1818 static int 1819 vfio_user_get_log_level(void) 1820 { 1821 int level; 1822 1823 if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio")) { 1824 return LOG_DEBUG; 1825 } 1826 1827 level = spdk_log_to_syslog_level(spdk_log_get_level()); 1828 if (level < 0) { 1829 return LOG_ERR; 1830 } 1831 1832 return level; 1833 } 1834 1835 static void 1836 init_pci_config_space(vfu_pci_config_space_t *p) 1837 { 1838 /* MLBAR */ 1839 p->hdr.bars[0].raw = 0x0; 1840 /* MUBAR */ 1841 p->hdr.bars[1].raw = 0x0; 1842 1843 /* vendor specific, let's set them to zero for now */ 1844 p->hdr.bars[3].raw = 0x0; 1845 p->hdr.bars[4].raw = 0x0; 1846 p->hdr.bars[5].raw = 0x0; 1847 1848 /* enable INTx */ 1849 p->hdr.intr.ipin = 0x1; 1850 } 1851 1852 static int 1853 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport, 1854 struct nvmf_vfio_user_endpoint *endpoint) 1855 { 1856 int ret; 1857 ssize_t cap_offset; 1858 vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; 1859 1860 struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 }; 1861 struct pxcap pxcap = { 1862 .hdr.id = PCI_CAP_ID_EXP, 1863 .pxcaps.ver = 0x2, 1864 .pxdcap = {.rer = 0x1, .flrc = 0x1}, 1865 .pxdcap2.ctds = 0x1 1866 }; 1867 1868 struct msixcap msixcap = { 1869 .hdr.id = PCI_CAP_ID_MSIX, 1870 .mxc.ts = NVME_IRQ_MSIX_NUM - 1, 1871 .mtab = {.tbir = 0x4, .to = 0x0}, 1872 .mpba = {.pbir = 0x5, .pbao = 0x0} 1873 }; 1874 1875 struct iovec sparse_mmap[] = { 1876 { 1877 .iov_base = (void *)NVME_DOORBELLS_OFFSET, 1878 .iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE, 1879 }, 1880 }; 1881 1882 ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); 1883 if (ret < 0) { 1884 SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx); 1885 return ret; 1886 } 1887 vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0); 1888 /* 1889 * 0x02, controller uses the NVM Express programming interface 1890 * 0x08, non-volatile memory controller 1891 * 0x01, mass storage controller 1892 */ 1893 vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02); 1894 1895 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap); 1896 if (cap_offset < 0) { 1897 SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx); 1898 return ret; 1899 } 1900 1901 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap); 1902 if (cap_offset < 0) { 1903 SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx); 1904 return ret; 1905 } 1906 1907 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap); 1908 if (cap_offset < 0) { 1909 SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx); 1910 return ret; 1911 } 1912 1913 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE, 1914 access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1915 if (ret < 0) { 1916 SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx); 1917 return ret; 1918 } 1919 1920 if (vu_transport->transport_opts.disable_mappable_bar0) { 1921 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1922 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1923 NULL, 0, -1, 0); 1924 } else { 1925 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1926 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1927 sparse_mmap, 1, endpoint->devmem_fd, 0); 1928 } 1929 1930 if (ret < 0) { 1931 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx); 1932 return ret; 1933 } 1934 1935 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, NVME_BAR4_SIZE, 1936 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1937 if (ret < 0) { 1938 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx); 1939 return ret; 1940 } 1941 1942 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, NVME_BAR5_SIZE, 1943 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1944 if (ret < 0) { 1945 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx); 1946 return ret; 1947 } 1948 1949 ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb); 1950 if (ret < 0) { 1951 SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx); 1952 return ret; 1953 } 1954 1955 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 1956 if (ret < 0) { 1957 SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx); 1958 return ret; 1959 } 1960 1961 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM); 1962 if (ret < 0) { 1963 SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx); 1964 return ret; 1965 } 1966 1967 ret = vfu_realize_ctx(vfu_ctx); 1968 if (ret < 0) { 1969 SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx); 1970 return ret; 1971 } 1972 1973 endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); 1974 assert(endpoint->pci_config_space != NULL); 1975 init_pci_config_space(endpoint->pci_config_space); 1976 1977 assert(cap_offset != 0); 1978 endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); 1979 1980 return 0; 1981 } 1982 1983 static void 1984 _free_ctrlr(void *ctx) 1985 { 1986 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1987 1988 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 1989 free(ctrlr); 1990 } 1991 1992 static void 1993 free_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr, bool free_qps) 1994 { 1995 int i; 1996 assert(ctrlr != NULL); 1997 1998 SPDK_DEBUGLOG(nvmf_vfio, "free %s\n", ctrlr_id(ctrlr)); 1999 2000 if (free_qps) { 2001 for (i = 0; i < NVMF_VFIO_USER_MAX_QPAIRS_PER_CTRLR; i++) { 2002 free_qp(ctrlr, i); 2003 } 2004 } 2005 2006 if (ctrlr->thread == spdk_get_thread()) { 2007 _free_ctrlr(ctrlr); 2008 } else { 2009 spdk_thread_send_msg(ctrlr->thread, _free_ctrlr, ctrlr); 2010 } 2011 } 2012 2013 static void 2014 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport, 2015 struct nvmf_vfio_user_endpoint *endpoint) 2016 { 2017 struct nvmf_vfio_user_ctrlr *ctrlr; 2018 int err = 0; 2019 2020 /* First, construct a vfio-user CUSTOM transport controller */ 2021 ctrlr = calloc(1, sizeof(*ctrlr)); 2022 if (ctrlr == NULL) { 2023 err = -ENOMEM; 2024 goto out; 2025 } 2026 ctrlr->cntlid = 0xffff; 2027 ctrlr->transport = transport; 2028 ctrlr->endpoint = endpoint; 2029 ctrlr->doorbells = endpoint->doorbells; 2030 TAILQ_INIT(&ctrlr->connected_qps); 2031 2032 /* Then, construct an admin queue pair */ 2033 err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0); 2034 if (err != 0) { 2035 free(ctrlr); 2036 goto out; 2037 } 2038 endpoint->ctrlr = ctrlr; 2039 2040 /* Notify the generic layer about the new admin queue pair */ 2041 spdk_nvmf_tgt_new_qpair(transport->transport.tgt, &ctrlr->qp[0]->qpair); 2042 2043 out: 2044 if (err != 0) { 2045 SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n", 2046 endpoint_id(endpoint), strerror(-err)); 2047 } 2048 } 2049 2050 static int 2051 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport, 2052 const struct spdk_nvme_transport_id *trid, 2053 struct spdk_nvmf_listen_opts *listen_opts) 2054 { 2055 struct nvmf_vfio_user_transport *vu_transport; 2056 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2057 char path[PATH_MAX] = {}; 2058 char uuid[PATH_MAX] = {}; 2059 int ret; 2060 2061 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2062 transport); 2063 2064 pthread_mutex_lock(&vu_transport->lock); 2065 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2066 /* Only compare traddr */ 2067 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2068 pthread_mutex_unlock(&vu_transport->lock); 2069 return -EEXIST; 2070 } 2071 } 2072 pthread_mutex_unlock(&vu_transport->lock); 2073 2074 endpoint = calloc(1, sizeof(*endpoint)); 2075 if (!endpoint) { 2076 return -ENOMEM; 2077 } 2078 2079 pthread_mutex_init(&endpoint->lock, NULL); 2080 endpoint->devmem_fd = -1; 2081 memcpy(&endpoint->trid, trid, sizeof(endpoint->trid)); 2082 2083 ret = snprintf(path, PATH_MAX, "%s/bar0", endpoint_id(endpoint)); 2084 if (ret < 0 || ret >= PATH_MAX) { 2085 SPDK_ERRLOG("%s: error to get socket path: %s.\n", endpoint_id(endpoint), spdk_strerror(errno)); 2086 ret = -1; 2087 goto out; 2088 } 2089 2090 ret = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 2091 if (ret == -1) { 2092 SPDK_ERRLOG("%s: failed to open device memory at %s: %s.\n", 2093 endpoint_id(endpoint), path, spdk_strerror(errno)); 2094 goto out; 2095 } 2096 2097 endpoint->devmem_fd = ret; 2098 ret = ftruncate(endpoint->devmem_fd, 2099 NVME_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE); 2100 if (ret != 0) { 2101 SPDK_ERRLOG("%s: error to ftruncate file %s: %s.\n", endpoint_id(endpoint), path, 2102 spdk_strerror(errno)); 2103 goto out; 2104 } 2105 2106 endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE, 2107 PROT_READ | PROT_WRITE, MAP_SHARED, endpoint->devmem_fd, NVME_DOORBELLS_OFFSET); 2108 if (endpoint->doorbells == MAP_FAILED) { 2109 SPDK_ERRLOG("%s: error to mmap file %s: %s.\n", endpoint_id(endpoint), path, spdk_strerror(errno)); 2110 endpoint->doorbells = NULL; 2111 ret = -1; 2112 goto out; 2113 } 2114 2115 ret = snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint)); 2116 if (ret < 0 || ret >= PATH_MAX) { 2117 SPDK_ERRLOG("%s: error to get ctrlr file path: %s\n", endpoint_id(endpoint), spdk_strerror(errno)); 2118 ret = -1; 2119 goto out; 2120 } 2121 endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB, 2122 endpoint, VFU_DEV_TYPE_PCI); 2123 if (endpoint->vfu_ctx == NULL) { 2124 SPDK_ERRLOG("%s: error creating libmuser context: %m\n", 2125 endpoint_id(endpoint)); 2126 ret = -1; 2127 goto out; 2128 } 2129 vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, vfio_user_get_log_level()); 2130 2131 ret = vfio_user_dev_info_fill(vu_transport, endpoint); 2132 if (ret < 0) { 2133 goto out; 2134 } 2135 2136 TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link); 2137 SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells); 2138 2139 out: 2140 if (ret != 0) { 2141 nvmf_vfio_user_destroy_endpoint(endpoint); 2142 } 2143 2144 return ret; 2145 } 2146 2147 static void 2148 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport, 2149 const struct spdk_nvme_transport_id *trid) 2150 { 2151 struct nvmf_vfio_user_transport *vu_transport; 2152 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2153 2154 assert(trid != NULL); 2155 assert(trid->traddr != NULL); 2156 2157 SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr); 2158 2159 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2160 transport); 2161 2162 pthread_mutex_lock(&vu_transport->lock); 2163 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2164 if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) { 2165 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 2166 if (endpoint->ctrlr) { 2167 /* Users may kill NVMeoF target while VM 2168 * is connected, free all resources. 2169 */ 2170 free_ctrlr(endpoint->ctrlr, true); 2171 } 2172 nvmf_vfio_user_destroy_endpoint(endpoint); 2173 pthread_mutex_unlock(&vu_transport->lock); 2174 2175 return; 2176 } 2177 } 2178 pthread_mutex_unlock(&vu_transport->lock); 2179 2180 SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr); 2181 } 2182 2183 static void 2184 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport, 2185 struct spdk_nvmf_subsystem *subsystem, 2186 struct spdk_nvmf_ctrlr_data *cdata) 2187 { 2188 memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls)); 2189 cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED; 2190 /* libvfio-user can only support 1 connection for now */ 2191 cdata->oncs.reservations = 0; 2192 } 2193 2194 static int 2195 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport, 2196 const struct spdk_nvmf_subsystem *subsystem, 2197 const struct spdk_nvme_transport_id *trid) 2198 { 2199 struct nvmf_vfio_user_transport *vu_transport; 2200 struct nvmf_vfio_user_endpoint *endpoint; 2201 2202 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport); 2203 2204 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2205 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2206 break; 2207 } 2208 } 2209 2210 if (endpoint == NULL) { 2211 return -ENOENT; 2212 } 2213 2214 endpoint->subsystem = subsystem; 2215 2216 return 0; 2217 } 2218 2219 /* 2220 * Executed periodically at a default SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 2221 * frequency. 2222 * 2223 * For each transport endpoint (which at the libvfio-user level corresponds to 2224 * a socket), if we don't currently have a controller set up, peek to see if the 2225 * socket is able to accept a new connection. 2226 * 2227 * This poller also takes care of handling the creation of any pending new 2228 * qpairs. 2229 * 2230 * Returns the number of events handled. 2231 */ 2232 static uint32_t 2233 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport) 2234 { 2235 struct nvmf_vfio_user_transport *vu_transport; 2236 struct nvmf_vfio_user_endpoint *endpoint; 2237 uint32_t count = 0; 2238 int err; 2239 2240 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2241 transport); 2242 2243 pthread_mutex_lock(&vu_transport->lock); 2244 2245 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2246 if (endpoint->ctrlr != NULL) { 2247 continue; 2248 } 2249 2250 err = vfu_attach_ctx(endpoint->vfu_ctx); 2251 if (err != 0) { 2252 if (errno == EAGAIN || errno == EWOULDBLOCK) { 2253 continue; 2254 } 2255 2256 pthread_mutex_unlock(&vu_transport->lock); 2257 return 1; 2258 } 2259 2260 count++; 2261 2262 /* Construct a controller */ 2263 nvmf_vfio_user_create_ctrlr(vu_transport, endpoint); 2264 } 2265 2266 pthread_mutex_unlock(&vu_transport->lock); 2267 2268 return count; 2269 } 2270 2271 static void 2272 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport, 2273 struct spdk_nvme_transport_id *trid, 2274 struct spdk_nvmf_discovery_log_page_entry *entry) 2275 { } 2276 2277 static struct spdk_nvmf_transport_poll_group * 2278 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport) 2279 { 2280 struct nvmf_vfio_user_poll_group *vu_group; 2281 2282 SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n"); 2283 2284 vu_group = calloc(1, sizeof(*vu_group)); 2285 if (vu_group == NULL) { 2286 SPDK_ERRLOG("Error allocating poll group: %m"); 2287 return NULL; 2288 } 2289 2290 TAILQ_INIT(&vu_group->qps); 2291 2292 return &vu_group->group; 2293 } 2294 2295 /* called when process exits */ 2296 static void 2297 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 2298 { 2299 struct nvmf_vfio_user_poll_group *vu_group; 2300 2301 SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n"); 2302 2303 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2304 2305 free(vu_group); 2306 } 2307 2308 static void 2309 vfio_user_qpair_disconnect_cb(void *ctx) 2310 { 2311 struct nvmf_vfio_user_endpoint *endpoint = ctx; 2312 struct nvmf_vfio_user_ctrlr *ctrlr; 2313 2314 pthread_mutex_lock(&endpoint->lock); 2315 ctrlr = endpoint->ctrlr; 2316 if (!ctrlr) { 2317 pthread_mutex_unlock(&endpoint->lock); 2318 return; 2319 } 2320 2321 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2322 endpoint->ctrlr = NULL; 2323 free_ctrlr(ctrlr, false); 2324 } 2325 pthread_mutex_unlock(&endpoint->lock); 2326 } 2327 2328 static void 2329 _vfio_user_qpair_disconnect(void *ctx) 2330 { 2331 struct nvmf_vfio_user_qpair *vu_qpair = ctx; 2332 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2333 struct nvmf_vfio_user_endpoint *endpoint; 2334 2335 vu_ctrlr = vu_qpair->ctrlr; 2336 endpoint = vu_ctrlr->endpoint; 2337 2338 spdk_nvmf_qpair_disconnect(&vu_qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint); 2339 } 2340 2341 static int 2342 vfio_user_destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 2343 { 2344 struct nvmf_vfio_user_qpair *qpair; 2345 struct nvmf_vfio_user_endpoint *endpoint; 2346 2347 SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr)); 2348 2349 endpoint = ctrlr->endpoint; 2350 assert(endpoint != NULL); 2351 2352 pthread_mutex_lock(&endpoint->lock); 2353 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2354 endpoint->ctrlr = NULL; 2355 free_ctrlr(ctrlr, false); 2356 pthread_mutex_unlock(&endpoint->lock); 2357 return 0; 2358 } 2359 2360 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 2361 /* add another round thread poll to avoid recursive endpoint lock */ 2362 spdk_thread_send_msg(ctrlr->thread, _vfio_user_qpair_disconnect, qpair); 2363 } 2364 pthread_mutex_unlock(&endpoint->lock); 2365 2366 return 0; 2367 } 2368 2369 /* 2370 * Poll for and process any incoming vfio-user messages. 2371 */ 2372 static int 2373 vfio_user_poll_vfu_ctx(void *ctx) 2374 { 2375 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 2376 int ret; 2377 2378 assert(ctrlr != NULL); 2379 2380 /* This will call access_bar0_fn() if there are any writes 2381 * to the portion of the BAR that is not mmap'd */ 2382 ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx); 2383 if (spdk_unlikely(ret == -1)) { 2384 if (errno == EBUSY) { 2385 return SPDK_POLLER_BUSY; 2386 } 2387 2388 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 2389 2390 /* initiator shutdown or reset, waiting for another re-connect */ 2391 if (errno == ENOTCONN) { 2392 vfio_user_destroy_ctrlr(ctrlr); 2393 return SPDK_POLLER_BUSY; 2394 } 2395 2396 fail_ctrlr(ctrlr); 2397 } 2398 2399 return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 2400 } 2401 2402 static int 2403 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 2404 { 2405 struct nvmf_vfio_user_poll_group *vu_group; 2406 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 2407 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2408 struct nvmf_vfio_user_endpoint *endpoint; 2409 2410 assert(vu_qpair != NULL); 2411 assert(req != NULL); 2412 2413 vu_ctrlr = vu_qpair->ctrlr; 2414 assert(vu_ctrlr != NULL); 2415 endpoint = vu_ctrlr->endpoint; 2416 assert(endpoint != NULL); 2417 2418 if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) { 2419 SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct); 2420 endpoint->ctrlr = NULL; 2421 free_ctrlr(vu_ctrlr, true); 2422 return -1; 2423 } 2424 2425 vu_group = SPDK_CONTAINEROF(vu_qpair->group, struct nvmf_vfio_user_poll_group, group); 2426 TAILQ_INSERT_TAIL(&vu_group->qps, vu_qpair, link); 2427 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 2428 2429 pthread_mutex_lock(&endpoint->lock); 2430 if (nvmf_qpair_is_admin_queue(&vu_qpair->qpair)) { 2431 vu_ctrlr->cntlid = vu_qpair->qpair.ctrlr->cntlid; 2432 vu_ctrlr->thread = spdk_get_thread(); 2433 vu_ctrlr->ctrlr = vu_qpair->qpair.ctrlr; 2434 vu_ctrlr->vfu_ctx_poller = SPDK_POLLER_REGISTER(vfio_user_poll_vfu_ctx, vu_ctrlr, 0); 2435 } else { 2436 /* For I/O queues this command was generated in response to an 2437 * ADMIN I/O CREATE SUBMISSION QUEUE command which has not yet 2438 * been completed. Complete it now. 2439 */ 2440 post_completion(vu_ctrlr, &vu_ctrlr->qp[0]->cq, 0, 0, 2441 vu_qpair->create_io_sq_cmd.cid, SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 2442 } 2443 TAILQ_INSERT_TAIL(&vu_ctrlr->connected_qps, vu_qpair, tailq); 2444 pthread_mutex_unlock(&endpoint->lock); 2445 2446 free(req->req.data); 2447 req->req.data = NULL; 2448 2449 return 0; 2450 } 2451 2452 /* 2453 * Add the given qpair to the given poll group. New qpairs are added via 2454 * spdk_nvmf_tgt_new_qpair(), which picks a poll group, then calls back 2455 * here via nvmf_transport_poll_group_add(). 2456 */ 2457 static int 2458 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2459 struct spdk_nvmf_qpair *qpair) 2460 { 2461 struct nvmf_vfio_user_qpair *vu_qpair; 2462 struct nvmf_vfio_user_req *vu_req; 2463 struct nvmf_vfio_user_ctrlr *ctrlr; 2464 struct spdk_nvmf_request *req; 2465 struct spdk_nvmf_fabric_connect_data *data; 2466 bool admin; 2467 2468 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2469 vu_qpair->group = group; 2470 ctrlr = vu_qpair->ctrlr; 2471 2472 SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n", 2473 ctrlr_id(ctrlr), vu_qpair->qpair.qid, 2474 vu_qpair, qpair, group); 2475 2476 admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair); 2477 2478 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2479 if (vu_req == NULL) { 2480 return -1; 2481 } 2482 2483 req = &vu_req->req; 2484 req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; 2485 req->cmd->connect_cmd.cid = 0; 2486 req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; 2487 req->cmd->connect_cmd.recfmt = 0; 2488 req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1; 2489 req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid; 2490 2491 req->length = sizeof(struct spdk_nvmf_fabric_connect_data); 2492 req->data = calloc(1, req->length); 2493 if (req->data == NULL) { 2494 nvmf_vfio_user_req_free(req); 2495 return -ENOMEM; 2496 } 2497 2498 data = (struct spdk_nvmf_fabric_connect_data *)req->data; 2499 data->cntlid = admin ? 0xFFFF : ctrlr->cntlid; 2500 snprintf(data->subnqn, sizeof(data->subnqn), "%s", 2501 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem)); 2502 2503 vu_req->cb_fn = handle_queue_connect_rsp; 2504 vu_req->cb_arg = vu_qpair; 2505 2506 SPDK_DEBUGLOG(nvmf_vfio, 2507 "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n", 2508 ctrlr_id(ctrlr), qpair->qid, data->cntlid); 2509 2510 spdk_nvmf_request_exec_fabrics(req); 2511 return 0; 2512 } 2513 2514 static int 2515 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2516 struct spdk_nvmf_qpair *qpair) 2517 { 2518 struct nvmf_vfio_user_qpair *vu_qpair; 2519 struct nvmf_vfio_user_poll_group *vu_group; 2520 2521 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2522 2523 SPDK_DEBUGLOG(nvmf_vfio, 2524 "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n", 2525 ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group); 2526 2527 2528 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2529 TAILQ_REMOVE(&vu_group->qps, vu_qpair, link); 2530 2531 return 0; 2532 } 2533 2534 static void 2535 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req) 2536 { 2537 memset(&vu_req->cmd, 0, sizeof(vu_req->cmd)); 2538 memset(&vu_req->rsp, 0, sizeof(vu_req->rsp)); 2539 vu_req->iovcnt = 0; 2540 vu_req->state = VFIO_USER_REQUEST_STATE_FREE; 2541 2542 TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link); 2543 } 2544 2545 static int 2546 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req) 2547 { 2548 struct nvmf_vfio_user_qpair *vu_qpair; 2549 struct nvmf_vfio_user_req *vu_req; 2550 2551 assert(req != NULL); 2552 2553 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2554 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2555 2556 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2557 2558 return 0; 2559 } 2560 2561 static int 2562 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req) 2563 { 2564 struct nvmf_vfio_user_qpair *vu_qpair; 2565 struct nvmf_vfio_user_req *vu_req; 2566 2567 assert(req != NULL); 2568 2569 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2570 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2571 2572 if (vu_req->cb_fn != NULL) { 2573 if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) { 2574 fail_ctrlr(vu_qpair->ctrlr); 2575 } 2576 } 2577 2578 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2579 2580 return 0; 2581 } 2582 2583 static void 2584 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair, 2585 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2586 { 2587 struct nvmf_vfio_user_qpair *vu_qpair; 2588 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2589 2590 assert(qpair != NULL); 2591 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2592 vu_ctrlr = vu_qpair->ctrlr; 2593 2594 pthread_mutex_lock(&vu_ctrlr->endpoint->lock); 2595 TAILQ_REMOVE(&vu_ctrlr->connected_qps, vu_qpair, tailq); 2596 pthread_mutex_unlock(&vu_ctrlr->endpoint->lock); 2597 2598 free_qp(vu_ctrlr, qpair->qid); 2599 2600 if (cb_fn) { 2601 cb_fn(cb_arg); 2602 } 2603 } 2604 2605 /** 2606 * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available. 2607 */ 2608 static struct nvmf_vfio_user_req * 2609 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair) 2610 { 2611 struct nvmf_vfio_user_req *req; 2612 2613 assert(qpair != NULL); 2614 2615 if (TAILQ_EMPTY(&qpair->reqs)) { 2616 return NULL; 2617 } 2618 2619 req = TAILQ_FIRST(&qpair->reqs); 2620 TAILQ_REMOVE(&qpair->reqs, req, link); 2621 2622 return req; 2623 } 2624 2625 static int 2626 get_nvmf_io_req_length(struct spdk_nvmf_request *req) 2627 { 2628 uint16_t nr; 2629 uint32_t nlb, nsid; 2630 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2631 struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; 2632 struct spdk_nvmf_ns *ns; 2633 2634 nsid = cmd->nsid; 2635 ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); 2636 if (ns == NULL || ns->bdev == NULL) { 2637 SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid); 2638 return -EINVAL; 2639 } 2640 2641 if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { 2642 nr = cmd->cdw10_bits.dsm.nr + 1; 2643 return nr * sizeof(struct spdk_nvme_dsm_range); 2644 } 2645 2646 nlb = (cmd->cdw12 & 0x0000ffffu) + 1; 2647 return nlb * spdk_bdev_get_block_size(ns->bdev); 2648 } 2649 2650 static int 2651 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2652 { 2653 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2654 uint32_t len = 0; 2655 uint8_t fid; 2656 int iovcnt; 2657 2658 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2659 req->length = 0; 2660 req->data = NULL; 2661 2662 if (req->xfer == SPDK_NVME_DATA_NONE) { 2663 return 0; 2664 } 2665 2666 switch (cmd->opc) { 2667 case SPDK_NVME_OPC_IDENTIFY: 2668 len = 4096; 2669 break; 2670 case SPDK_NVME_OPC_GET_LOG_PAGE: 2671 len = (((cmd->cdw11_bits.get_log_page.numdu << 16) | cmd->cdw10_bits.get_log_page.numdl) + 1) * 4; 2672 break; 2673 case SPDK_NVME_OPC_GET_FEATURES: 2674 case SPDK_NVME_OPC_SET_FEATURES: 2675 fid = cmd->cdw10_bits.set_features.fid; 2676 switch (fid) { 2677 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 2678 len = 4096; 2679 break; 2680 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 2681 len = 256; 2682 break; 2683 case SPDK_NVME_FEAT_TIMESTAMP: 2684 len = 8; 2685 break; 2686 case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT: 2687 len = 512; 2688 break; 2689 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 2690 if (cmd->cdw11_bits.feat_host_identifier.bits.exhid) { 2691 len = 16; 2692 } else { 2693 len = 8; 2694 } 2695 break; 2696 default: 2697 return 0; 2698 } 2699 break; 2700 default: 2701 return 0; 2702 } 2703 2704 /* ADMIN command will not use SGL */ 2705 if (cmd->psdt != 0) { 2706 return -EINVAL; 2707 } 2708 2709 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len); 2710 if (iovcnt < 0) { 2711 SPDK_ERRLOG("%s: map Admin Opc %x failed\n", 2712 ctrlr_id(ctrlr), cmd->opc); 2713 return -1; 2714 } 2715 req->length = len; 2716 req->data = req->iov[0].iov_base; 2717 req->iovcnt = iovcnt; 2718 2719 return 0; 2720 } 2721 2722 /* 2723 * Map an I/O command's buffers. 2724 * 2725 * Returns 0 on success and -errno on failure. 2726 */ 2727 static int 2728 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2729 { 2730 int len, iovcnt; 2731 struct spdk_nvme_cmd *cmd; 2732 2733 assert(ctrlr != NULL); 2734 assert(req != NULL); 2735 2736 cmd = &req->cmd->nvme_cmd; 2737 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2738 req->length = 0; 2739 req->data = NULL; 2740 2741 if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) { 2742 return 0; 2743 } 2744 2745 len = get_nvmf_io_req_length(req); 2746 if (len < 0) { 2747 return -EINVAL; 2748 } 2749 req->length = len; 2750 2751 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, req->length); 2752 if (iovcnt < 0) { 2753 SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc); 2754 return -EFAULT; 2755 } 2756 req->data = req->iov[0].iov_base; 2757 req->iovcnt = iovcnt; 2758 2759 return 0; 2760 } 2761 2762 static int 2763 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 2764 struct nvmf_vfio_user_qpair *vu_qpair) 2765 { 2766 int err; 2767 struct nvmf_vfio_user_req *vu_req; 2768 struct spdk_nvmf_request *req; 2769 2770 assert(ctrlr != NULL); 2771 assert(cmd != NULL); 2772 2773 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2774 if (spdk_unlikely(vu_req == NULL)) { 2775 SPDK_ERRLOG("%s: no request for NVMe command opc 0x%x\n", ctrlr_id(ctrlr), cmd->opc); 2776 return post_completion(ctrlr, &vu_qpair->cq, 0, 0, cmd->cid, 2777 SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, SPDK_NVME_SCT_GENERIC); 2778 2779 } 2780 req = &vu_req->req; 2781 2782 assert(req->qpair != NULL); 2783 SPDK_DEBUGLOG(nvmf_vfio, "%s: handle qid%u, req opc=%#x cid=%d\n", 2784 ctrlr_id(ctrlr), req->qpair->qid, cmd->opc, cmd->cid); 2785 2786 vu_req->cb_fn = handle_cmd_rsp; 2787 vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2788 req->cmd->nvme_cmd = *cmd; 2789 2790 if (nvmf_qpair_is_admin_queue(req->qpair)) { 2791 err = map_admin_cmd_req(ctrlr, req); 2792 } else { 2793 switch (cmd->opc) { 2794 case SPDK_NVME_OPC_RESERVATION_REGISTER: 2795 case SPDK_NVME_OPC_RESERVATION_REPORT: 2796 case SPDK_NVME_OPC_RESERVATION_ACQUIRE: 2797 case SPDK_NVME_OPC_RESERVATION_RELEASE: 2798 err = -ENOTSUP; 2799 break; 2800 default: 2801 err = map_io_cmd_req(ctrlr, req); 2802 break; 2803 } 2804 } 2805 2806 if (spdk_unlikely(err < 0)) { 2807 SPDK_ERRLOG("%s: process NVMe command opc 0x%x failed\n", 2808 ctrlr_id(ctrlr), cmd->opc); 2809 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2810 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2811 err = handle_cmd_rsp(vu_req, vu_req->cb_arg); 2812 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2813 return err; 2814 } 2815 2816 vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING; 2817 spdk_nvmf_request_exec(req); 2818 2819 return 0; 2820 } 2821 2822 /* Returns the number of commands processed, or a negative value on error. */ 2823 static int 2824 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair) 2825 { 2826 struct nvmf_vfio_user_ctrlr *ctrlr; 2827 uint32_t new_tail; 2828 int count = 0; 2829 2830 assert(qpair != NULL); 2831 2832 ctrlr = qpair->ctrlr; 2833 2834 /* Load-Acquire. */ 2835 new_tail = *tdbl(ctrlr, &qpair->sq); 2836 2837 /* 2838 * Ensure that changes to the queue are visible to us. 2839 * The host driver should write the queue first, do a wmb(), and then 2840 * update the SQ tail doorbell (their Store-Release). 2841 */ 2842 spdk_rmb(); 2843 2844 new_tail = new_tail & 0xffffu; 2845 if (spdk_unlikely(new_tail >= qpair->sq.size)) { 2846 union spdk_nvme_async_event_completion event = {}; 2847 2848 SPDK_DEBUGLOG(nvmf_vfio, "%s: invalid SQ%u doorbell value %u\n", ctrlr_id(ctrlr), qpair->qpair.qid, 2849 new_tail); 2850 event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_ERROR; 2851 event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_INVALID_DB_WRITE; 2852 nvmf_ctrlr_async_event_error_event(qpair->qpair.ctrlr, event); 2853 2854 return 0; 2855 } 2856 2857 if (sq_head(qpair) == new_tail) { 2858 return 0; 2859 } 2860 2861 count = handle_sq_tdbl_write(ctrlr, new_tail, qpair); 2862 if (count < 0) { 2863 fail_ctrlr(ctrlr); 2864 } 2865 2866 return count; 2867 } 2868 2869 /* 2870 * vfio-user transport poll handler. Note that the library context is polled in 2871 * a separate poller (->vfu_ctx_poller), so this poller only needs to poll the 2872 * active qpairs. 2873 * 2874 * Returns the number of commands processed, or a negative value on error. 2875 */ 2876 static int 2877 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2878 { 2879 struct nvmf_vfio_user_poll_group *vu_group; 2880 struct nvmf_vfio_user_qpair *vu_qpair, *tmp; 2881 int count = 0; 2882 2883 assert(group != NULL); 2884 2885 spdk_rmb(); 2886 2887 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2888 2889 TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) { 2890 int ret; 2891 2892 if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) { 2893 continue; 2894 } 2895 2896 ret = nvmf_vfio_user_qpair_poll(vu_qpair); 2897 2898 if (ret < 0) { 2899 return ret; 2900 } 2901 2902 count += ret; 2903 } 2904 2905 return count; 2906 } 2907 2908 static int 2909 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2910 struct spdk_nvme_transport_id *trid) 2911 { 2912 struct nvmf_vfio_user_qpair *vu_qpair; 2913 struct nvmf_vfio_user_ctrlr *ctrlr; 2914 2915 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2916 ctrlr = vu_qpair->ctrlr; 2917 2918 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2919 return 0; 2920 } 2921 2922 static int 2923 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2924 struct spdk_nvme_transport_id *trid) 2925 { 2926 return 0; 2927 } 2928 2929 static int 2930 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2931 struct spdk_nvme_transport_id *trid) 2932 { 2933 struct nvmf_vfio_user_qpair *vu_qpair; 2934 struct nvmf_vfio_user_ctrlr *ctrlr; 2935 2936 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2937 ctrlr = vu_qpair->ctrlr; 2938 2939 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2940 return 0; 2941 } 2942 2943 static void 2944 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2945 struct spdk_nvmf_request *req) 2946 { 2947 struct nvmf_vfio_user_qpair *vu_qpair; 2948 struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL; 2949 uint32_t i; 2950 uint16_t cid; 2951 2952 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2953 2954 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2955 for (i = 0; i < vu_qpair->qsize; i++) { 2956 vu_req = &vu_qpair->reqs_internal[i]; 2957 if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) { 2958 vu_req_to_abort = vu_req; 2959 break; 2960 } 2961 } 2962 2963 if (vu_req_to_abort == NULL) { 2964 spdk_nvmf_request_complete(req); 2965 return; 2966 } 2967 2968 req->req_to_abort = &vu_req_to_abort->req; 2969 nvmf_ctrlr_abort_request(req); 2970 } 2971 2972 static void 2973 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts) 2974 { 2975 opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH; 2976 opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2977 opts->in_capsule_data_size = 0; 2978 opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE; 2979 opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE; 2980 opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH; 2981 opts->num_shared_buffers = 0; 2982 opts->buf_cache_size = 0; 2983 opts->association_timeout = 0; 2984 opts->transport_specific = NULL; 2985 } 2986 2987 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = { 2988 .name = "VFIOUSER", 2989 .type = SPDK_NVME_TRANSPORT_VFIOUSER, 2990 .opts_init = nvmf_vfio_user_opts_init, 2991 .create = nvmf_vfio_user_create, 2992 .destroy = nvmf_vfio_user_destroy, 2993 2994 .listen = nvmf_vfio_user_listen, 2995 .stop_listen = nvmf_vfio_user_stop_listen, 2996 .accept = nvmf_vfio_user_accept, 2997 .cdata_init = nvmf_vfio_user_cdata_init, 2998 .listen_associate = nvmf_vfio_user_listen_associate, 2999 3000 .listener_discover = nvmf_vfio_user_discover, 3001 3002 .poll_group_create = nvmf_vfio_user_poll_group_create, 3003 .poll_group_destroy = nvmf_vfio_user_poll_group_destroy, 3004 .poll_group_add = nvmf_vfio_user_poll_group_add, 3005 .poll_group_remove = nvmf_vfio_user_poll_group_remove, 3006 .poll_group_poll = nvmf_vfio_user_poll_group_poll, 3007 3008 .req_free = nvmf_vfio_user_req_free, 3009 .req_complete = nvmf_vfio_user_req_complete, 3010 3011 .qpair_fini = nvmf_vfio_user_close_qpair, 3012 .qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid, 3013 .qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid, 3014 .qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid, 3015 .qpair_abort_request = nvmf_vfio_user_qpair_abort_request, 3016 }; 3017 3018 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user); 3019 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio) 3020