1 /*- 2 * BSD LICENSE 3 * Copyright (c) Intel Corporation. All rights reserved. 4 * Copyright (c) 2019, Nutanix Inc. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * NVMe over vfio-user transport 35 */ 36 37 #include <vfio-user/libvfio-user.h> 38 #include <vfio-user/pci_defs.h> 39 40 #include "spdk/barrier.h" 41 #include "spdk/stdinc.h" 42 #include "spdk/assert.h" 43 #include "spdk/thread.h" 44 #include "spdk/nvmf_transport.h" 45 #include "spdk/sock.h" 46 #include "spdk/string.h" 47 #include "spdk/util.h" 48 #include "spdk/log.h" 49 50 #include "transport.h" 51 52 #include "nvmf_internal.h" 53 54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256 55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32 56 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64 57 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB) 58 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 59 60 #define NVMF_VFIO_USER_DOORBELLS_OFFSET 0x1000 61 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000 62 63 #define NVME_REG_CFG_SIZE 0x1000 64 #define NVME_REG_BAR0_SIZE 0x4000 65 #define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 66 67 struct nvmf_vfio_user_req; 68 struct nvmf_vfio_user_qpair; 69 70 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg); 71 72 /* 1 more for PRP2 list itself */ 73 #define NVMF_VFIO_USER_MAX_IOVECS (NVMF_REQ_MAX_BUFFERS + 1) 74 75 enum nvmf_vfio_user_req_state { 76 VFIO_USER_REQUEST_STATE_FREE = 0, 77 VFIO_USER_REQUEST_STATE_EXECUTING, 78 }; 79 80 struct nvmf_vfio_user_req { 81 struct spdk_nvmf_request req; 82 struct spdk_nvme_cpl rsp; 83 struct spdk_nvme_cmd cmd; 84 85 enum nvmf_vfio_user_req_state state; 86 nvmf_vfio_user_req_cb_fn cb_fn; 87 void *cb_arg; 88 89 /* old CC before prop_set_cc fabric command */ 90 union spdk_nvme_cc_register cc; 91 92 /* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */ 93 dma_sg_t *sg; 94 struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS]; 95 uint8_t iovcnt; 96 97 TAILQ_ENTRY(nvmf_vfio_user_req) link; 98 }; 99 100 /* 101 * A NVMe queue. 102 */ 103 struct nvme_q { 104 bool is_cq; 105 106 void *addr; 107 108 dma_sg_t *sg; 109 struct iovec iov; 110 111 uint32_t size; 112 uint64_t prp1; 113 114 union { 115 struct { 116 uint32_t head; 117 /* multiple SQs can be mapped to the same CQ */ 118 uint16_t cqid; 119 }; 120 struct { 121 uint32_t tail; 122 uint16_t iv; 123 bool ien; 124 bool phase; 125 }; 126 }; 127 }; 128 129 enum nvmf_vfio_user_qpair_state { 130 VFIO_USER_QPAIR_UNINITIALIZED = 0, 131 VFIO_USER_QPAIR_ACTIVE, 132 VFIO_USER_QPAIR_SQ_DELETED, 133 VFIO_USER_QPAIR_INACTIVE, 134 VFIO_USER_QPAIR_ERROR, 135 }; 136 137 struct nvmf_vfio_user_qpair { 138 struct spdk_nvmf_qpair qpair; 139 struct spdk_nvmf_transport_poll_group *group; 140 struct nvmf_vfio_user_ctrlr *ctrlr; 141 struct nvmf_vfio_user_req *reqs_internal; 142 uint32_t qsize; 143 struct nvme_q cq; 144 struct nvme_q sq; 145 enum nvmf_vfio_user_qpair_state state; 146 147 /* Copy of Create IO SQ command */ 148 struct spdk_nvme_cmd create_io_sq_cmd; 149 150 TAILQ_HEAD(, nvmf_vfio_user_req) reqs; 151 /* Poll group entry */ 152 TAILQ_ENTRY(nvmf_vfio_user_qpair) link; 153 /* Connected queue pair entry */ 154 TAILQ_ENTRY(nvmf_vfio_user_qpair) tailq; 155 }; 156 157 struct nvmf_vfio_user_poll_group { 158 struct spdk_nvmf_transport_poll_group group; 159 TAILQ_HEAD(, nvmf_vfio_user_qpair) qps; 160 }; 161 162 struct nvmf_vfio_user_ctrlr { 163 struct nvmf_vfio_user_endpoint *endpoint; 164 struct nvmf_vfio_user_transport *transport; 165 166 /* Connected queue pairs list */ 167 TAILQ_HEAD(, nvmf_vfio_user_qpair) connected_qps; 168 169 struct spdk_thread *thread; 170 struct spdk_poller *vfu_ctx_poller; 171 172 uint16_t cntlid; 173 174 struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR]; 175 176 TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link; 177 178 volatile uint32_t *doorbells; 179 180 /* internal CSTS.CFS register for vfio-user fatal errors */ 181 uint32_t cfs : 1; 182 }; 183 184 struct nvmf_vfio_user_endpoint { 185 vfu_ctx_t *vfu_ctx; 186 struct msixcap *msix; 187 vfu_pci_config_space_t *pci_config_space; 188 int devmem_fd; 189 volatile uint32_t *doorbells; 190 191 struct spdk_nvme_transport_id trid; 192 const struct spdk_nvmf_subsystem *subsystem; 193 194 struct nvmf_vfio_user_ctrlr *ctrlr; 195 pthread_mutex_t lock; 196 197 TAILQ_ENTRY(nvmf_vfio_user_endpoint) link; 198 }; 199 200 struct nvmf_vfio_user_transport_opts { 201 bool disable_mappable_bar0; 202 }; 203 204 struct nvmf_vfio_user_transport { 205 struct spdk_nvmf_transport transport; 206 struct nvmf_vfio_user_transport_opts transport_opts; 207 pthread_mutex_t lock; 208 TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints; 209 }; 210 211 /* 212 * function prototypes 213 */ 214 static volatile uint32_t * 215 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 216 217 static volatile uint32_t * 218 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 219 220 static int 221 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req); 222 223 static struct nvmf_vfio_user_req * 224 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair); 225 226 static int 227 nvme_cmd_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, 228 uint32_t max_iovcnt, uint32_t len, size_t mps, 229 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 230 { 231 uint64_t prp1, prp2; 232 void *vva; 233 uint32_t i; 234 uint32_t residue_len, nents; 235 uint64_t *prp_list; 236 uint32_t iovcnt; 237 238 assert(max_iovcnt > 0); 239 240 prp1 = cmd->dptr.prp.prp1; 241 prp2 = cmd->dptr.prp.prp2; 242 243 /* PRP1 may started with unaligned page address */ 244 residue_len = mps - (prp1 % mps); 245 residue_len = spdk_min(len, residue_len); 246 247 vva = gpa_to_vva(prv, prp1, residue_len, PROT_READ | PROT_WRITE); 248 if (spdk_unlikely(vva == NULL)) { 249 SPDK_ERRLOG("GPA to VVA failed\n"); 250 return -EINVAL; 251 } 252 len -= residue_len; 253 if (len && max_iovcnt < 2) { 254 SPDK_ERRLOG("Too many page entries, at least two iovs are required\n"); 255 return -ERANGE; 256 } 257 iovs[0].iov_base = vva; 258 iovs[0].iov_len = residue_len; 259 260 if (len) { 261 if (spdk_unlikely(prp2 == 0)) { 262 SPDK_ERRLOG("no PRP2, %d remaining\n", len); 263 return -EINVAL; 264 } 265 266 if (len <= mps) { 267 /* 2 PRP used */ 268 iovcnt = 2; 269 vva = gpa_to_vva(prv, prp2, len, PROT_READ | PROT_WRITE); 270 if (spdk_unlikely(vva == NULL)) { 271 SPDK_ERRLOG("no VVA for %#" PRIx64 ", len%#x\n", 272 prp2, len); 273 return -EINVAL; 274 } 275 iovs[1].iov_base = vva; 276 iovs[1].iov_len = len; 277 } else { 278 /* PRP list used */ 279 nents = (len + mps - 1) / mps; 280 if (spdk_unlikely(nents + 1 > max_iovcnt)) { 281 SPDK_ERRLOG("Too many page entries\n"); 282 return -ERANGE; 283 } 284 285 vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list), PROT_READ); 286 if (spdk_unlikely(vva == NULL)) { 287 SPDK_ERRLOG("no VVA for %#" PRIx64 ", nents=%#x\n", 288 prp2, nents); 289 return -EINVAL; 290 } 291 prp_list = vva; 292 i = 0; 293 while (len != 0) { 294 residue_len = spdk_min(len, mps); 295 vva = gpa_to_vva(prv, prp_list[i], residue_len, PROT_READ | PROT_WRITE); 296 if (spdk_unlikely(vva == NULL)) { 297 SPDK_ERRLOG("no VVA for %#" PRIx64 ", residue_len=%#x\n", 298 prp_list[i], residue_len); 299 return -EINVAL; 300 } 301 iovs[i + 1].iov_base = vva; 302 iovs[i + 1].iov_len = residue_len; 303 len -= residue_len; 304 i++; 305 } 306 iovcnt = i + 1; 307 } 308 } else { 309 /* 1 PRP used */ 310 iovcnt = 1; 311 } 312 313 assert(iovcnt <= max_iovcnt); 314 return iovcnt; 315 } 316 317 static int 318 nvme_cmd_map_sgls_data(void *prv, struct spdk_nvme_sgl_descriptor *sgls, uint32_t num_sgls, 319 struct iovec *iovs, uint32_t max_iovcnt, 320 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 321 { 322 uint32_t i; 323 void *vva; 324 325 if (spdk_unlikely(max_iovcnt < num_sgls)) { 326 return -ERANGE; 327 } 328 329 for (i = 0; i < num_sgls; i++) { 330 if (spdk_unlikely(sgls[i].unkeyed.type != SPDK_NVME_SGL_TYPE_DATA_BLOCK)) { 331 SPDK_ERRLOG("Invalid SGL type %u\n", sgls[i].unkeyed.type); 332 return -EINVAL; 333 } 334 vva = gpa_to_vva(prv, sgls[i].address, sgls[i].unkeyed.length, PROT_READ | PROT_WRITE); 335 if (spdk_unlikely(vva == NULL)) { 336 SPDK_ERRLOG("GPA to VVA failed\n"); 337 return -EINVAL; 338 } 339 iovs[i].iov_base = vva; 340 iovs[i].iov_len = sgls[i].unkeyed.length; 341 } 342 343 return num_sgls; 344 } 345 346 static int 347 nvme_cmd_map_sgls(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 348 uint32_t len, size_t mps, 349 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 350 { 351 struct spdk_nvme_sgl_descriptor *sgl, *last_sgl; 352 uint32_t num_sgls, seg_len; 353 void *vva; 354 int ret; 355 uint32_t total_iovcnt = 0; 356 357 /* SGL cases */ 358 sgl = &cmd->dptr.sgl1; 359 360 /* only one SGL segment */ 361 if (sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 362 assert(max_iovcnt > 0); 363 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ | PROT_WRITE); 364 if (spdk_unlikely(vva == NULL)) { 365 SPDK_ERRLOG("GPA to VVA failed\n"); 366 return -EINVAL; 367 } 368 iovs[0].iov_base = vva; 369 iovs[0].iov_len = sgl->unkeyed.length; 370 assert(sgl->unkeyed.length == len); 371 372 return 1; 373 } 374 375 for (;;) { 376 if (spdk_unlikely((sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_SEGMENT) && 377 (sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_LAST_SEGMENT))) { 378 SPDK_ERRLOG("Invalid SGL type %u\n", sgl->unkeyed.type); 379 return -EINVAL; 380 } 381 382 seg_len = sgl->unkeyed.length; 383 if (spdk_unlikely(seg_len % sizeof(struct spdk_nvme_sgl_descriptor))) { 384 SPDK_ERRLOG("Invalid SGL segment len %u\n", seg_len); 385 return -EINVAL; 386 } 387 388 num_sgls = seg_len / sizeof(struct spdk_nvme_sgl_descriptor); 389 vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ); 390 if (spdk_unlikely(vva == NULL)) { 391 SPDK_ERRLOG("GPA to VVA failed\n"); 392 return -EINVAL; 393 } 394 395 /* sgl point to the first segment */ 396 sgl = (struct spdk_nvme_sgl_descriptor *)vva; 397 last_sgl = &sgl[num_sgls - 1]; 398 399 /* we are done */ 400 if (last_sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { 401 /* map whole sgl list */ 402 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls, &iovs[total_iovcnt], 403 max_iovcnt - total_iovcnt, gpa_to_vva); 404 if (spdk_unlikely(ret < 0)) { 405 return ret; 406 } 407 total_iovcnt += ret; 408 409 return total_iovcnt; 410 } 411 412 if (num_sgls > 1) { 413 /* map whole sgl exclude last_sgl */ 414 ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls - 1, &iovs[total_iovcnt], 415 max_iovcnt - total_iovcnt, gpa_to_vva); 416 if (spdk_unlikely(ret < 0)) { 417 return ret; 418 } 419 total_iovcnt += ret; 420 } 421 422 /* move to next level's segments */ 423 sgl = last_sgl; 424 } 425 426 return 0; 427 } 428 429 static int 430 nvme_map_cmd(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt, 431 uint32_t len, size_t mps, 432 void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot)) 433 { 434 if (cmd->psdt == SPDK_NVME_PSDT_PRP) { 435 return nvme_cmd_map_prps(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 436 } 437 438 return nvme_cmd_map_sgls(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva); 439 } 440 441 static char * 442 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint) 443 { 444 return endpoint->trid.traddr; 445 } 446 447 static char * 448 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr) 449 { 450 if (!ctrlr || !ctrlr->endpoint) { 451 return "Null Ctrlr"; 452 } 453 454 return endpoint_id(ctrlr->endpoint); 455 } 456 457 static inline uint16_t 458 io_q_id(struct nvme_q *q) 459 { 460 461 struct nvmf_vfio_user_qpair *vu_qpair; 462 463 assert(q); 464 465 if (q->is_cq) { 466 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq); 467 } else { 468 vu_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq); 469 } 470 assert(vu_qpair); 471 return vu_qpair->qpair.qid; 472 } 473 474 static void 475 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 476 { 477 assert(ctrlr != NULL); 478 479 if (ctrlr->cfs == 0) { 480 SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr)); 481 } 482 483 ctrlr->cfs = 1U; 484 } 485 486 static inline bool 487 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *vu_ctrlr) 488 { 489 assert(vu_ctrlr != NULL); 490 assert(vu_ctrlr->endpoint != NULL); 491 492 vfu_pci_config_space_t *pci = vu_ctrlr->endpoint->pci_config_space; 493 494 return (!pci->hdr.cmd.id || vu_ctrlr->endpoint->msix->mxc.mxe); 495 } 496 497 static void 498 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint) 499 { 500 if (endpoint->doorbells) { 501 munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE); 502 } 503 504 if (endpoint->devmem_fd > 0) { 505 close(endpoint->devmem_fd); 506 } 507 508 vfu_destroy_ctx(endpoint->vfu_ctx); 509 510 pthread_mutex_destroy(&endpoint->lock); 511 free(endpoint); 512 } 513 514 /* called when process exits */ 515 static int 516 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport, 517 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 518 { 519 struct nvmf_vfio_user_transport *vu_transport; 520 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 521 522 SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n"); 523 524 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 525 transport); 526 527 (void)pthread_mutex_destroy(&vu_transport->lock); 528 529 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 530 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 531 nvmf_vfio_user_destroy_endpoint(endpoint); 532 } 533 534 free(vu_transport); 535 536 if (cb_fn) { 537 cb_fn(cb_arg); 538 } 539 540 return 0; 541 } 542 543 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = { 544 { 545 "disable_mappable_bar0", 546 offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0), 547 spdk_json_decode_bool, true 548 }, 549 }; 550 551 static struct spdk_nvmf_transport * 552 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts) 553 { 554 struct nvmf_vfio_user_transport *vu_transport; 555 int err; 556 557 if (opts->max_qpairs_per_ctrlr > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 558 SPDK_ERRLOG("Invalid max_qpairs_per_ctrlr=%d, supported max_qpairs_per_ctrlr=%d\n", 559 opts->max_qpairs_per_ctrlr, NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR); 560 return NULL; 561 } 562 563 vu_transport = calloc(1, sizeof(*vu_transport)); 564 if (vu_transport == NULL) { 565 SPDK_ERRLOG("Transport alloc fail: %m\n"); 566 return NULL; 567 } 568 569 err = pthread_mutex_init(&vu_transport->lock, NULL); 570 if (err != 0) { 571 SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err); 572 goto err; 573 } 574 575 TAILQ_INIT(&vu_transport->endpoints); 576 577 if (opts->transport_specific != NULL && 578 spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder, 579 SPDK_COUNTOF(vfio_user_transport_opts_decoder), 580 vu_transport)) { 581 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 582 free(vu_transport); 583 return NULL; 584 } 585 586 SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n", 587 vu_transport->transport_opts.disable_mappable_bar0); 588 589 return &vu_transport->transport; 590 591 err: 592 free(vu_transport); 593 594 return NULL; 595 } 596 597 static uint32_t 598 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr) 599 { 600 assert(ctrlr != NULL); 601 assert(ctrlr->qp[0] != NULL); 602 assert(ctrlr->qp[0]->qpair.ctrlr != NULL); 603 604 return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1; 605 } 606 607 static void * 608 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov, int prot) 609 { 610 int ret; 611 612 assert(ctx != NULL); 613 assert(sg != NULL); 614 assert(iov != NULL); 615 616 ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, prot); 617 if (ret < 0) { 618 return NULL; 619 } 620 621 ret = vfu_map_sg(ctx, sg, iov, 1, 0); 622 if (ret != 0) { 623 return NULL; 624 } 625 626 assert(iov->iov_base != NULL); 627 return iov->iov_base; 628 } 629 630 static inline uint32_t 631 sq_head(struct nvmf_vfio_user_qpair *qpair) 632 { 633 assert(qpair != NULL); 634 return qpair->sq.head; 635 } 636 637 static inline void 638 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair) 639 { 640 assert(ctrlr != NULL); 641 assert(qpair != NULL); 642 qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size; 643 } 644 645 static int 646 map_q(struct nvmf_vfio_user_ctrlr *vu_ctrlr, struct nvme_q *q, bool is_cq, bool unmap) 647 { 648 uint64_t len; 649 650 assert(q->size); 651 assert(q->addr == NULL); 652 653 if (is_cq) { 654 len = q->size * sizeof(struct spdk_nvme_cpl); 655 } else { 656 len = q->size * sizeof(struct spdk_nvme_cmd); 657 } 658 659 q->addr = map_one(vu_ctrlr->endpoint->vfu_ctx, q->prp1, len, q->sg, 660 &q->iov, is_cq ? PROT_READ | PROT_WRITE : PROT_READ); 661 if (q->addr == NULL) { 662 return -EFAULT; 663 } 664 665 if (unmap) { 666 memset(q->addr, 0, len); 667 } 668 669 return 0; 670 } 671 672 static int 673 asq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 674 { 675 struct nvme_q *sq; 676 const struct spdk_nvmf_registers *regs; 677 int ret; 678 679 assert(ctrlr != NULL); 680 assert(ctrlr->qp[0] != NULL); 681 assert(ctrlr->qp[0]->sq.addr == NULL); 682 /* XXX ctrlr->asq == 0 is a valid memory address */ 683 684 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 685 sq = &ctrlr->qp[0]->sq; 686 sq->size = regs->aqa.bits.asqs + 1; 687 sq->prp1 = regs->asq; 688 sq->head = 0; 689 sq->cqid = 0; 690 sq->is_cq = false; 691 692 ret = map_q(ctrlr, sq, false, true); 693 if (ret) { 694 return ret; 695 } 696 697 *tdbl(ctrlr, sq) = 0; 698 699 return 0; 700 } 701 702 static inline int 703 queue_index(uint16_t qid, int is_cq) 704 { 705 return (qid * 2) + is_cq; 706 } 707 708 static volatile uint32_t * 709 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 710 { 711 assert(ctrlr != NULL); 712 assert(q != NULL); 713 assert(!q->is_cq); 714 715 return &ctrlr->doorbells[queue_index(io_q_id(q), false)]; 716 } 717 718 static volatile uint32_t * 719 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 720 { 721 assert(ctrlr != NULL); 722 assert(q != NULL); 723 assert(q->is_cq); 724 725 return &ctrlr->doorbells[queue_index(io_q_id(q), true)]; 726 } 727 728 static inline bool 729 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 730 { 731 assert(ctrlr != NULL); 732 assert(q != NULL); 733 assert(q->is_cq); 734 735 return ((q->tail + 1) % q->size) == *hdbl(ctrlr, q); 736 } 737 738 static inline void 739 cq_tail_advance(struct nvme_q *q) 740 { 741 assert(q != NULL); 742 assert(q->is_cq); 743 744 assert(q->tail < q->size); 745 q->tail++; 746 747 if (spdk_unlikely(q->tail == q->size)) { 748 q->tail = 0; 749 q->phase = !q->phase; 750 } 751 } 752 753 static int 754 acq_setup(struct nvmf_vfio_user_ctrlr *ctrlr) 755 { 756 struct nvme_q *cq; 757 const struct spdk_nvmf_registers *regs; 758 int ret; 759 760 assert(ctrlr != NULL); 761 assert(ctrlr->qp[0] != NULL); 762 assert(ctrlr->qp[0]->cq.addr == NULL); 763 764 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 765 assert(regs != NULL); 766 cq = &ctrlr->qp[0]->cq; 767 cq->size = regs->aqa.bits.acqs + 1; 768 cq->prp1 = regs->acq; 769 cq->tail = 0; 770 cq->is_cq = true; 771 cq->ien = true; 772 cq->phase = true; 773 774 ret = map_q(ctrlr, cq, true, true); 775 if (ret) { 776 return ret; 777 } 778 *hdbl(ctrlr, cq) = 0; 779 780 return 0; 781 } 782 783 static inline dma_sg_t * 784 vu_req_to_sg_t(struct nvmf_vfio_user_req *vu_req, uint32_t iovcnt) 785 { 786 return (dma_sg_t *)((uintptr_t)vu_req->sg + iovcnt * dma_sg_size()); 787 } 788 789 static void * 790 _map_one(void *prv, uint64_t addr, uint64_t len, int prot) 791 { 792 struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv; 793 struct spdk_nvmf_qpair *qpair; 794 struct nvmf_vfio_user_req *vu_req; 795 struct nvmf_vfio_user_qpair *vu_qpair; 796 void *ret; 797 798 assert(req != NULL); 799 qpair = req->qpair; 800 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 801 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 802 803 assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS); 804 ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len, 805 vu_req_to_sg_t(vu_req, vu_req->iovcnt), 806 &vu_req->iov[vu_req->iovcnt], prot); 807 if (spdk_likely(ret != NULL)) { 808 vu_req->iovcnt++; 809 } 810 return ret; 811 } 812 813 static int 814 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req, 815 struct iovec *iov, uint32_t length) 816 { 817 /* Map PRP list to from Guest physical memory to 818 * virtual memory address. 819 */ 820 return nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS, 821 length, 4096, _map_one); 822 } 823 824 static int 825 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 826 struct nvmf_vfio_user_qpair *vu_qpair); 827 828 /* 829 * Posts a CQE in the completion queue. 830 * 831 * @ctrlr: the vfio-user controller 832 * @cq: the completion queue 833 * @cdw0: cdw0 as reported by NVMf 834 * @sqid: submission queue ID 835 * @cid: command identifier in NVMe command 836 * @sc: the NVMe CQE status code 837 * @sct: the NVMe CQE status code type 838 */ 839 static int 840 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *cq, 841 uint32_t cdw0, uint16_t sqid, uint16_t cid, uint16_t sc, uint16_t sct) 842 { 843 struct spdk_nvme_cpl *cpl; 844 const struct spdk_nvmf_registers *regs; 845 int err; 846 847 assert(ctrlr != NULL); 848 849 if (spdk_unlikely(cq == NULL || cq->addr == NULL)) { 850 return 0; 851 } 852 853 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 854 if (regs->csts.bits.shst != SPDK_NVME_SHST_NORMAL) { 855 SPDK_DEBUGLOG(nvmf_vfio, 856 "%s: ignore completion SQ%d cid=%d status=%#x\n", 857 ctrlr_id(ctrlr), sqid, cid, sc); 858 return 0; 859 } 860 861 if (cq_is_full(ctrlr, cq)) { 862 SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n", 863 ctrlr_id(ctrlr), io_q_id(cq), cq->tail, *hdbl(ctrlr, cq)); 864 return -1; 865 } 866 867 cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail; 868 869 assert(ctrlr->qp[sqid] != NULL); 870 SPDK_DEBUGLOG(nvmf_vfio, 871 "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n", 872 ctrlr_id(ctrlr), sqid, cid, sc, sq_head(ctrlr->qp[sqid]), 873 cq->tail); 874 875 cpl->sqhd = sq_head(ctrlr->qp[sqid]); 876 cpl->sqid = sqid; 877 cpl->cid = cid; 878 cpl->cdw0 = cdw0; 879 cpl->status.dnr = 0x0; 880 cpl->status.m = 0x0; 881 cpl->status.sct = sct; 882 cpl->status.p = cq->phase; 883 cpl->status.sc = sc; 884 885 cq_tail_advance(cq); 886 887 /* 888 * this function now executes at SPDK thread context, we 889 * might be triggerring interrupts from vfio-user thread context so 890 * check for race conditions. 891 */ 892 if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) { 893 err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv); 894 if (err != 0) { 895 SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n", 896 ctrlr_id(ctrlr)); 897 return err; 898 } 899 } 900 901 return 0; 902 } 903 904 static bool 905 io_q_exists(struct nvmf_vfio_user_ctrlr *vu_ctrlr, const uint16_t qid, const bool is_cq) 906 { 907 assert(vu_ctrlr != NULL); 908 909 if (qid == 0 || qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 910 return false; 911 } 912 913 if (vu_ctrlr->qp[qid] == NULL) { 914 return false; 915 } 916 917 if (!is_cq) { 918 if (vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_SQ_DELETED || 919 vu_ctrlr->qp[qid]->state == VFIO_USER_QPAIR_UNINITIALIZED) { 920 return false; 921 } 922 } 923 924 return true; 925 } 926 927 static void 928 unmap_qp(struct nvmf_vfio_user_qpair *qp) 929 { 930 struct nvmf_vfio_user_ctrlr *ctrlr; 931 932 if (qp->ctrlr == NULL) { 933 return; 934 } 935 ctrlr = qp->ctrlr; 936 937 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap QP%d\n", 938 ctrlr_id(ctrlr), qp->qpair.qid); 939 940 if (qp->sq.addr != NULL) { 941 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->sq.sg, &qp->sq.iov, 1); 942 qp->sq.addr = NULL; 943 } 944 945 if (qp->cq.addr != NULL) { 946 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->cq.sg, &qp->cq.iov, 1); 947 qp->cq.addr = NULL; 948 } 949 } 950 951 static int 952 remap_qp(struct nvmf_vfio_user_qpair *vu_qpair) 953 { 954 struct nvme_q *sq, *cq; 955 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 956 int ret; 957 958 vu_ctrlr = vu_qpair->ctrlr; 959 sq = &vu_qpair->sq; 960 cq = &vu_qpair->cq; 961 962 if (sq->size) { 963 ret = map_q(vu_ctrlr, sq, false, false); 964 if (ret) { 965 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n", 966 io_q_id(sq), sq->prp1, sq->prp1 + sq->size * sizeof(struct spdk_nvme_cmd)); 967 return -EFAULT; 968 } 969 } 970 971 if (cq->size) { 972 ret = map_q(vu_ctrlr, cq, true, false); 973 if (ret) { 974 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n", 975 io_q_id(cq), cq->prp1, cq->prp1 + cq->size * sizeof(struct spdk_nvme_cpl)); 976 return -EFAULT; 977 } 978 979 } 980 981 return 0; 982 } 983 984 static void 985 free_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid) 986 { 987 struct nvmf_vfio_user_qpair *qpair; 988 struct nvmf_vfio_user_req *vu_req; 989 uint32_t i; 990 991 if (ctrlr == NULL) { 992 return; 993 } 994 995 qpair = ctrlr->qp[qid]; 996 if (qpair == NULL) { 997 return; 998 } 999 1000 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr), 1001 qid, qpair); 1002 1003 unmap_qp(qpair); 1004 1005 for (i = 0; i < qpair->qsize; i++) { 1006 vu_req = &qpair->reqs_internal[i]; 1007 free(vu_req->sg); 1008 } 1009 free(qpair->reqs_internal); 1010 1011 free(qpair->sq.sg); 1012 free(qpair->cq.sg); 1013 free(qpair); 1014 1015 ctrlr->qp[qid] = NULL; 1016 } 1017 1018 /* This function can only fail because of memory allocation errors. */ 1019 static int 1020 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport, 1021 const uint32_t qsize, const uint16_t id) 1022 { 1023 uint32_t i; 1024 struct nvmf_vfio_user_qpair *qpair; 1025 struct nvmf_vfio_user_req *vu_req, *tmp; 1026 struct spdk_nvmf_request *req; 1027 1028 assert(ctrlr != NULL); 1029 assert(transport != NULL); 1030 1031 qpair = calloc(1, sizeof(*qpair)); 1032 if (qpair == NULL) { 1033 return -ENOMEM; 1034 } 1035 qpair->sq.sg = calloc(1, dma_sg_size()); 1036 if (qpair->sq.sg == NULL) { 1037 free(qpair); 1038 return -ENOMEM; 1039 } 1040 qpair->cq.sg = calloc(1, dma_sg_size()); 1041 if (qpair->cq.sg == NULL) { 1042 free(qpair->sq.sg); 1043 free(qpair); 1044 return -ENOMEM; 1045 } 1046 1047 qpair->qpair.qid = id; 1048 qpair->qpair.transport = transport; 1049 qpair->ctrlr = ctrlr; 1050 qpair->qsize = qsize; 1051 1052 TAILQ_INIT(&qpair->reqs); 1053 1054 qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req)); 1055 if (qpair->reqs_internal == NULL) { 1056 SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr)); 1057 goto reqs_err; 1058 } 1059 1060 for (i = 0; i < qsize; i++) { 1061 vu_req = &qpair->reqs_internal[i]; 1062 vu_req->sg = calloc(NVMF_VFIO_USER_MAX_IOVECS, dma_sg_size()); 1063 if (vu_req->sg == NULL) { 1064 goto sg_err; 1065 } 1066 1067 req = &vu_req->req; 1068 req->qpair = &qpair->qpair; 1069 req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp; 1070 req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd; 1071 1072 TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link); 1073 } 1074 1075 ctrlr->qp[id] = qpair; 1076 return 0; 1077 1078 sg_err: 1079 TAILQ_FOREACH_SAFE(vu_req, &qpair->reqs, link, tmp) { 1080 free(vu_req->sg); 1081 } 1082 free(qpair->reqs_internal); 1083 1084 reqs_err: 1085 free(qpair->sq.sg); 1086 free(qpair->cq.sg); 1087 free(qpair); 1088 return -ENOMEM; 1089 } 1090 1091 /* 1092 * Creates a completion or submission I/O queue. Returns 0 on success, -errno 1093 * on error. 1094 */ 1095 static int 1096 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1097 struct spdk_nvme_cmd *cmd, const bool is_cq) 1098 { 1099 uint16_t qid; 1100 uint32_t qsize; 1101 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1102 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1103 int err = 0; 1104 struct nvmf_vfio_user_qpair *vu_qpair; 1105 struct nvme_q *io_q; 1106 struct nvmf_vfio_user_transport *vu_transport = ctrlr->transport; 1107 1108 assert(ctrlr != NULL); 1109 assert(cmd != NULL); 1110 1111 qid = cmd->cdw10_bits.create_io_q.qid; 1112 if (qid == 0 || qid >= vu_transport->transport.opts.max_qpairs_per_ctrlr) { 1113 SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr), 1114 qid, vu_transport->transport.opts.max_qpairs_per_ctrlr); 1115 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1116 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1117 goto out; 1118 } 1119 1120 if (io_q_exists(ctrlr, qid, is_cq)) { 1121 SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr), 1122 is_cq ? 'C' : 'S', qid); 1123 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1124 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1125 goto out; 1126 } 1127 1128 qsize = cmd->cdw10_bits.create_io_q.qsize + 1; 1129 if (qsize == 1 || qsize > max_queue_size(ctrlr)) { 1130 SPDK_ERRLOG("%s: invalid I/O queue size %u\n", ctrlr_id(ctrlr), qsize); 1131 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1132 sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE; 1133 goto out; 1134 } 1135 1136 SPDK_DEBUGLOG(nvmf_vfio, 1137 "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr), 1138 is_cq ? 'C' : 'S', qid, qsize); 1139 1140 if (is_cq) { 1141 if (cmd->cdw11_bits.create_io_cq.pc != 0x1) { 1142 SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr)); 1143 sc = SPDK_NVME_SC_INVALID_FIELD; 1144 goto out; 1145 } 1146 if (cmd->cdw11_bits.create_io_cq.iv > NVME_IRQ_MSIX_NUM - 1) { 1147 SPDK_ERRLOG("%s: IV is too big\n", ctrlr_id(ctrlr)); 1148 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1149 sc = SPDK_NVME_SC_INVALID_INTERRUPT_VECTOR; 1150 goto out; 1151 } 1152 1153 err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, qsize, qid); 1154 if (err != 0) { 1155 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1156 goto out; 1157 } 1158 1159 io_q = &ctrlr->qp[qid]->cq; 1160 io_q->ien = cmd->cdw11_bits.create_io_cq.ien; 1161 io_q->iv = cmd->cdw11_bits.create_io_cq.iv; 1162 io_q->phase = true; 1163 } else { 1164 if (cmd->cdw11_bits.create_io_sq.cqid == 0) { 1165 SPDK_ERRLOG("%s: invalid CQID 0\n", ctrlr_id(ctrlr)); 1166 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1167 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1168 goto out; 1169 1170 } 1171 /* CQ must be created before SQ */ 1172 if (!io_q_exists(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) { 1173 SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr), 1174 cmd->cdw11_bits.create_io_sq.cqid); 1175 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1176 sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID; 1177 goto out; 1178 } 1179 1180 if (cmd->cdw11_bits.create_io_sq.pc != 0x1) { 1181 SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr)); 1182 sc = SPDK_NVME_SC_INVALID_FIELD; 1183 goto out; 1184 } 1185 /* TODO: support shared IO CQ */ 1186 if (qid != cmd->cdw11_bits.create_io_sq.cqid) { 1187 SPDK_ERRLOG("%s: doesn't support shared CQ now\n", ctrlr_id(ctrlr)); 1188 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1189 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1190 } 1191 1192 io_q = &ctrlr->qp[qid]->sq; 1193 io_q->cqid = cmd->cdw11_bits.create_io_sq.cqid; 1194 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr), 1195 qid, io_q->cqid); 1196 } 1197 1198 io_q->is_cq = is_cq; 1199 io_q->size = qsize; 1200 io_q->prp1 = cmd->dptr.prp.prp1; 1201 1202 err = map_q(ctrlr, io_q, is_cq, true); 1203 if (err) { 1204 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1205 SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr)); 1206 goto out; 1207 } 1208 1209 SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n", 1210 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1211 qid, cmd->dptr.prp.prp1, (unsigned long long)io_q->addr); 1212 1213 if (is_cq) { 1214 *hdbl(ctrlr, io_q) = 0; 1215 } else { 1216 vu_qpair = ctrlr->qp[qid]; 1217 *tdbl(ctrlr, io_q) = 0; 1218 vu_qpair->sq.head = 0; 1219 1220 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1221 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1222 } else { 1223 /* 1224 * Create our new I/O qpair. This asynchronously invokes, on a 1225 * suitable poll group, the nvmf_vfio_user_poll_group_add() 1226 * callback, which will call spdk_nvmf_request_exec_fabrics() 1227 * with a generated fabrics connect command. This command is 1228 * then eventually completed via handle_queue_connect_rsp(). 1229 */ 1230 vu_qpair->create_io_sq_cmd = *cmd; 1231 spdk_nvmf_tgt_new_qpair(ctrlr->transport->transport.tgt, 1232 &vu_qpair->qpair); 1233 return 0; 1234 } 1235 } 1236 1237 out: 1238 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1239 } 1240 1241 /* For ADMIN I/O DELETE COMPLETION QUEUE the NVMf library will disconnect and free 1242 * queue pair, so save the command in a context. 1243 */ 1244 struct vfio_user_delete_cq_ctx { 1245 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1246 struct spdk_nvme_cmd delete_io_cq_cmd; 1247 }; 1248 1249 static void 1250 vfio_user_qpair_delete_cb(void *cb_arg) 1251 { 1252 struct vfio_user_delete_cq_ctx *ctx = cb_arg; 1253 struct nvmf_vfio_user_ctrlr *vu_ctrlr = ctx->vu_ctrlr; 1254 1255 post_completion(vu_ctrlr, &vu_ctrlr->qp[0]->cq, 0, 0, ctx->delete_io_cq_cmd.cid, 1256 SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 1257 free(ctx); 1258 } 1259 1260 /* 1261 * Deletes a completion or submission I/O queue. 1262 */ 1263 static int 1264 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 1265 struct spdk_nvme_cmd *cmd, const bool is_cq) 1266 { 1267 uint16_t sct = SPDK_NVME_SCT_GENERIC; 1268 uint16_t sc = SPDK_NVME_SC_SUCCESS; 1269 struct nvmf_vfio_user_qpair *vu_qpair; 1270 struct vfio_user_delete_cq_ctx *ctx; 1271 1272 SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n", 1273 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 1274 cmd->cdw10_bits.delete_io_q.qid); 1275 1276 if (!io_q_exists(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq)) { 1277 SPDK_ERRLOG("%s: I/O %cQ%d does not exist\n", ctrlr_id(ctrlr), 1278 is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid); 1279 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1280 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1281 goto out; 1282 } 1283 1284 vu_qpair = ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]; 1285 if (is_cq) { 1286 if (vu_qpair->state == VFIO_USER_QPAIR_UNINITIALIZED) { 1287 free_qp(ctrlr, cmd->cdw10_bits.delete_io_q.qid); 1288 goto out; 1289 } 1290 1291 /* SQ must have been deleted first */ 1292 if (vu_qpair->state != VFIO_USER_QPAIR_SQ_DELETED) { 1293 SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr)); 1294 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1295 sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION; 1296 goto out; 1297 } 1298 ctx = calloc(1, sizeof(*ctx)); 1299 if (!ctx) { 1300 sct = SPDK_NVME_SCT_GENERIC; 1301 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1302 goto out; 1303 } 1304 ctx->vu_ctrlr = ctrlr; 1305 ctx->delete_io_cq_cmd = *cmd; 1306 spdk_nvmf_qpair_disconnect(&vu_qpair->qpair, vfio_user_qpair_delete_cb, ctx); 1307 return 0; 1308 } else { 1309 if (vu_qpair->state == VFIO_USER_QPAIR_SQ_DELETED) { 1310 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%u is already deleted\n", ctrlr_id(ctrlr), 1311 cmd->cdw10_bits.delete_io_q.qid); 1312 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 1313 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 1314 goto out; 1315 } 1316 1317 /* 1318 * This doesn't actually delete the SQ, We're merely telling the poll_group_poll 1319 * function to skip checking this SQ. The queue pair will be disconnected in Delete 1320 * IO CQ command. 1321 */ 1322 vu_qpair->state = VFIO_USER_QPAIR_SQ_DELETED; 1323 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, vu_qpair->sq.sg, &vu_qpair->sq.iov, 1); 1324 vu_qpair->sq.addr = NULL; 1325 } 1326 1327 out: 1328 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, sc, sct); 1329 } 1330 1331 /* 1332 * Returns 0 on success and -errno on error. 1333 */ 1334 static int 1335 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd) 1336 { 1337 assert(ctrlr != NULL); 1338 assert(cmd != NULL); 1339 1340 if (cmd->fuse != 0) { 1341 /* Fused admin commands are not supported. */ 1342 return post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, cmd->cid, 1343 SPDK_NVME_SC_INVALID_FIELD, 1344 SPDK_NVME_SCT_GENERIC); 1345 } 1346 1347 switch (cmd->opc) { 1348 case SPDK_NVME_OPC_CREATE_IO_CQ: 1349 case SPDK_NVME_OPC_CREATE_IO_SQ: 1350 return handle_create_io_q(ctrlr, cmd, 1351 cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ); 1352 case SPDK_NVME_OPC_DELETE_IO_SQ: 1353 case SPDK_NVME_OPC_DELETE_IO_CQ: 1354 return handle_del_io_q(ctrlr, cmd, 1355 cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ); 1356 default: 1357 return handle_cmd_req(ctrlr, cmd, ctrlr->qp[0]); 1358 } 1359 } 1360 1361 static int 1362 handle_cmd_rsp(struct nvmf_vfio_user_req *vu_req, void *cb_arg) 1363 { 1364 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1365 struct nvmf_vfio_user_ctrlr *vu_ctrlr = vu_qpair->ctrlr; 1366 uint16_t sqid, cqid; 1367 1368 assert(vu_qpair != NULL); 1369 assert(vu_req != NULL); 1370 assert(vu_ctrlr != NULL); 1371 1372 if (spdk_likely(vu_req->iovcnt)) { 1373 vfu_unmap_sg(vu_ctrlr->endpoint->vfu_ctx, vu_req->sg, vu_req->iov, vu_req->iovcnt); 1374 } 1375 sqid = vu_qpair->qpair.qid; 1376 cqid = vu_ctrlr->qp[sqid]->sq.cqid; 1377 1378 return post_completion(vu_ctrlr, &vu_ctrlr->qp[cqid]->cq, 1379 vu_req->req.rsp->nvme_cpl.cdw0, 1380 sqid, 1381 vu_req->req.cmd->nvme_cmd.cid, 1382 vu_req->req.rsp->nvme_cpl.status.sc, 1383 vu_req->req.rsp->nvme_cpl.status.sct); 1384 } 1385 1386 static int 1387 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair, 1388 struct spdk_nvme_cmd *cmd) 1389 { 1390 assert(qpair != NULL); 1391 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1392 return consume_admin_cmd(ctrlr, cmd); 1393 } 1394 1395 return handle_cmd_req(ctrlr, cmd, qpair); 1396 } 1397 1398 /* Returns the number of commands processed, or a negative value on error. */ 1399 static int 1400 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail, 1401 struct nvmf_vfio_user_qpair *qpair) 1402 { 1403 struct spdk_nvme_cmd *queue; 1404 int count = 0; 1405 1406 assert(ctrlr != NULL); 1407 assert(qpair != NULL); 1408 1409 queue = qpair->sq.addr; 1410 while (sq_head(qpair) != new_tail) { 1411 int err; 1412 struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)]; 1413 1414 count++; 1415 1416 /* 1417 * SQHD must contain the new head pointer, so we must increase 1418 * it before we generate a completion. 1419 */ 1420 sqhd_advance(ctrlr, qpair); 1421 1422 err = consume_cmd(ctrlr, qpair, cmd); 1423 if (err != 0) { 1424 return err; 1425 } 1426 } 1427 1428 return count; 1429 } 1430 1431 static int 1432 enable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1433 { 1434 int err; 1435 1436 assert(ctrlr != NULL); 1437 1438 err = acq_setup(ctrlr); 1439 if (err != 0) { 1440 return err; 1441 } 1442 1443 err = asq_setup(ctrlr); 1444 if (err != 0) { 1445 return err; 1446 } 1447 1448 return 0; 1449 } 1450 1451 static void 1452 disable_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1453 { 1454 assert(ctrlr->qp[0] != NULL); 1455 1456 unmap_qp(ctrlr->qp[0]); 1457 } 1458 1459 static void 1460 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1461 { 1462 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1463 struct nvmf_vfio_user_ctrlr *ctrlr; 1464 struct nvmf_vfio_user_qpair *qpair; 1465 int ret; 1466 1467 /* 1468 * We're not interested in any DMA regions that aren't mappable (we don't 1469 * support clients that don't share their memory). 1470 */ 1471 if (!info->vaddr) { 1472 return; 1473 } 1474 1475 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1476 (info->mapping.iov_len & MASK_2MB)) { 1477 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1478 (uintptr_t)info->mapping.iov_base, 1479 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1480 return; 1481 } 1482 1483 assert(endpoint != NULL); 1484 if (endpoint->ctrlr == NULL) { 1485 return; 1486 } 1487 ctrlr = endpoint->ctrlr; 1488 1489 SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1490 (uintptr_t)info->mapping.iov_base, 1491 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1492 1493 /* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also 1494 * check the protection bits before registering. 1495 */ 1496 if (info->prot == (PROT_WRITE | PROT_READ)) { 1497 ret = spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len); 1498 if (ret) { 1499 SPDK_ERRLOG("Memory region register %#lx-%#lx failed, ret=%d\n", 1500 (uint64_t)(uintptr_t)info->mapping.iov_base, 1501 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1502 ret); 1503 } 1504 } 1505 1506 pthread_mutex_lock(&endpoint->lock); 1507 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1508 if (qpair->state != VFIO_USER_QPAIR_INACTIVE) { 1509 continue; 1510 } 1511 1512 ret = remap_qp(qpair); 1513 if (ret) { 1514 continue; 1515 } 1516 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1517 SPDK_DEBUGLOG(nvmf_vfio, "Remap QP %u successfully\n", qpair->qpair.qid); 1518 } 1519 pthread_mutex_unlock(&endpoint->lock); 1520 } 1521 1522 static int 1523 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1524 { 1525 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1526 struct nvmf_vfio_user_ctrlr *ctrlr; 1527 struct nvmf_vfio_user_qpair *qpair; 1528 void *map_start, *map_end; 1529 int ret = 0; 1530 1531 if (!info->vaddr) { 1532 return 0; 1533 } 1534 1535 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1536 (info->mapping.iov_len & MASK_2MB)) { 1537 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1538 (uintptr_t)info->mapping.iov_base, 1539 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1540 return 0; 1541 } 1542 1543 assert(endpoint != NULL); 1544 if (endpoint->ctrlr == NULL) { 1545 return 0; 1546 } 1547 ctrlr = endpoint->ctrlr; 1548 1549 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1550 (uintptr_t)info->mapping.iov_base, 1551 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1552 1553 map_start = info->mapping.iov_base; 1554 map_end = info->mapping.iov_base + info->mapping.iov_len; 1555 1556 pthread_mutex_lock(&endpoint->lock); 1557 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 1558 if ((qpair->cq.addr >= map_start && qpair->cq.addr <= map_end) || 1559 (qpair->sq.addr >= map_start && qpair->sq.addr <= map_end)) { 1560 /* TODO: Ideally we should disconnect this queue pair 1561 * before returning to caller. 1562 */ 1563 unmap_qp(qpair); 1564 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1565 } 1566 } 1567 pthread_mutex_unlock(&endpoint->lock); 1568 1569 if (info->prot == (PROT_WRITE | PROT_READ)) { 1570 ret = spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len); 1571 if (ret) { 1572 SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed, ret=%d\n", 1573 (uint64_t)(uintptr_t)info->mapping.iov_base, 1574 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len, 1575 ret); 1576 } 1577 } 1578 1579 return 0; 1580 } 1581 1582 static int 1583 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1584 { 1585 struct nvmf_vfio_user_qpair *vu_qpair = cb_arg; 1586 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 1587 bool disable_admin = false; 1588 int ret; 1589 1590 assert(vu_qpair != NULL); 1591 assert(req != NULL); 1592 1593 if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) { 1594 assert(vu_qpair->ctrlr != NULL); 1595 assert(req != NULL); 1596 1597 memcpy(req->req.data, 1598 &req->req.rsp->prop_get_rsp.value.u64, 1599 req->req.length); 1600 } else { 1601 assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET); 1602 assert(vu_qpair->ctrlr != NULL); 1603 vu_ctrlr = vu_qpair->ctrlr; 1604 1605 if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) { 1606 union spdk_nvme_cc_register cc, diff; 1607 1608 cc.raw = req->req.cmd->prop_set_cmd.value.u64; 1609 diff.raw = cc.raw ^ req->cc.raw; 1610 1611 if (diff.bits.en) { 1612 if (cc.bits.en) { 1613 SPDK_DEBUGLOG(nvmf_vfio, "%s: MAP Admin queue\n", ctrlr_id(vu_ctrlr)); 1614 ret = enable_admin_queue(vu_ctrlr); 1615 if (ret) { 1616 SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(vu_ctrlr)); 1617 return ret; 1618 } 1619 vu_qpair->state = VFIO_USER_QPAIR_ACTIVE; 1620 } else { 1621 disable_admin = true; 1622 } 1623 } 1624 1625 if (diff.bits.shn) { 1626 if (cc.bits.shn == SPDK_NVME_SHN_NORMAL || cc.bits.shn == SPDK_NVME_SHN_ABRUPT) { 1627 disable_admin = true; 1628 } 1629 } 1630 1631 if (disable_admin) { 1632 SPDK_DEBUGLOG(nvmf_vfio, 1633 "%s: UNMAP Admin queue\n", 1634 ctrlr_id(vu_ctrlr)); 1635 vu_qpair->state = VFIO_USER_QPAIR_INACTIVE; 1636 disable_admin_queue(vu_ctrlr); 1637 /* For PCIe controller reset or shutdown, we will drop all AER responses */ 1638 nvmf_ctrlr_abort_aer(vu_qpair->qpair.ctrlr); 1639 } 1640 } 1641 } 1642 1643 return 0; 1644 } 1645 1646 /* 1647 * Handles a write at offset 0x1000 or more; this is the non-mapped path when a 1648 * doorbell is written via access_bar0_fn(). 1649 * 1650 * DSTRD is set to fixed value 0 for NVMf. 1651 * 1652 */ 1653 static int 1654 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf, 1655 const size_t count, loff_t pos, const bool is_write) 1656 { 1657 assert(ctrlr != NULL); 1658 assert(buf != NULL); 1659 1660 if (count != sizeof(uint32_t)) { 1661 SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n", 1662 ctrlr_id(ctrlr), count); 1663 errno = EINVAL; 1664 return -1; 1665 } 1666 1667 pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET; 1668 1669 /* pos must be dword aligned */ 1670 if ((pos & 0x3) != 0) { 1671 SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos); 1672 errno = EINVAL; 1673 return -1; 1674 } 1675 1676 /* convert byte offset to array index */ 1677 pos >>= 2; 1678 1679 if (pos >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) { 1680 SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos); 1681 errno = EINVAL; 1682 return -1; 1683 } 1684 1685 if (is_write) { 1686 ctrlr->doorbells[pos] = *buf; 1687 spdk_wmb(); 1688 } else { 1689 spdk_rmb(); 1690 *buf = ctrlr->doorbells[pos]; 1691 } 1692 return 0; 1693 } 1694 1695 static ssize_t 1696 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, 1697 bool is_write) 1698 { 1699 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1700 struct nvmf_vfio_user_ctrlr *ctrlr; 1701 struct nvmf_vfio_user_req *req; 1702 const struct spdk_nvmf_registers *regs; 1703 int ret; 1704 1705 ctrlr = endpoint->ctrlr; 1706 1707 SPDK_DEBUGLOG(nvmf_vfio, 1708 "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n", 1709 endpoint_id(endpoint), is_write ? "write" : "read", 1710 ctrlr, count, pos); 1711 1712 if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) { 1713 /* 1714 * The fact that the doorbells can be memory mapped doesn't mean 1715 * that the client (VFIO in QEMU) is obliged to memory map them, 1716 * it might still elect to access them via regular read/write; 1717 * we might also have had disable_mappable_bar0 set. 1718 */ 1719 ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count, 1720 pos, is_write); 1721 if (ret == 0) { 1722 return count; 1723 } 1724 return ret; 1725 } 1726 1727 /* Construct a Fabric Property Get/Set command and send it */ 1728 req = get_nvmf_vfio_user_req(ctrlr->qp[0]); 1729 if (req == NULL) { 1730 errno = ENOBUFS; 1731 return -1; 1732 } 1733 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 1734 req->cc.raw = regs->cc.raw; 1735 1736 req->cb_fn = nvmf_vfio_user_prop_req_rsp; 1737 req->cb_arg = ctrlr->qp[0]; 1738 req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC; 1739 req->req.cmd->prop_set_cmd.cid = 0; 1740 req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1; 1741 req->req.cmd->prop_set_cmd.ofst = pos; 1742 if (is_write) { 1743 req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; 1744 if (req->req.cmd->prop_set_cmd.attrib.size) { 1745 req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf; 1746 } else { 1747 req->req.cmd->prop_set_cmd.value.u32.high = 0; 1748 req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf; 1749 } 1750 } else { 1751 req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; 1752 } 1753 req->req.length = count; 1754 req->req.data = buf; 1755 1756 spdk_nvmf_request_exec_fabrics(&req->req); 1757 1758 return count; 1759 } 1760 1761 /* 1762 * NVMe driver reads 4096 bytes, which is the extended PCI configuration space 1763 * available on PCI-X 2.0 and PCI Express buses 1764 */ 1765 static ssize_t 1766 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, 1767 bool is_write) 1768 { 1769 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1770 1771 if (is_write) { 1772 SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n", 1773 endpoint_id(endpoint), offset, offset + count); 1774 errno = EINVAL; 1775 return -1; 1776 } 1777 1778 if (offset + count > PCI_CFG_SPACE_EXP_SIZE) { 1779 SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", 1780 endpoint_id(endpoint), offset, count, 1781 PCI_CFG_SPACE_EXP_SIZE); 1782 errno = ERANGE; 1783 return -1; 1784 } 1785 1786 memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count); 1787 1788 return count; 1789 } 1790 1791 static void 1792 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg) 1793 { 1794 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1795 1796 if (level >= LOG_DEBUG) { 1797 SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1798 } else if (level >= LOG_INFO) { 1799 SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1800 } else if (level >= LOG_NOTICE) { 1801 SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg); 1802 } else if (level >= LOG_WARNING) { 1803 SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg); 1804 } else { 1805 SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg); 1806 } 1807 } 1808 1809 static int 1810 vfio_user_get_log_level(void) 1811 { 1812 int level; 1813 1814 if (SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio")) { 1815 return LOG_DEBUG; 1816 } 1817 1818 level = spdk_log_to_syslog_level(spdk_log_get_level()); 1819 if (level < 0) { 1820 return LOG_ERR; 1821 } 1822 1823 return level; 1824 } 1825 1826 static void 1827 init_pci_config_space(vfu_pci_config_space_t *p) 1828 { 1829 /* MLBAR */ 1830 p->hdr.bars[0].raw = 0x0; 1831 /* MUBAR */ 1832 p->hdr.bars[1].raw = 0x0; 1833 1834 /* vendor specific, let's set them to zero for now */ 1835 p->hdr.bars[3].raw = 0x0; 1836 p->hdr.bars[4].raw = 0x0; 1837 p->hdr.bars[5].raw = 0x0; 1838 1839 /* enable INTx */ 1840 p->hdr.intr.ipin = 0x1; 1841 } 1842 1843 static int 1844 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport, 1845 struct nvmf_vfio_user_endpoint *endpoint) 1846 { 1847 int ret; 1848 ssize_t cap_offset; 1849 vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; 1850 1851 struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 }; 1852 struct pxcap pxcap = { 1853 .hdr.id = PCI_CAP_ID_EXP, 1854 .pxcaps.ver = 0x2, 1855 .pxdcap = {.rer = 0x1, .flrc = 0x1}, 1856 .pxdcap2.ctds = 0x1 1857 }; 1858 1859 struct msixcap msixcap = { 1860 .hdr.id = PCI_CAP_ID_MSIX, 1861 .mxc.ts = NVME_IRQ_MSIX_NUM - 1, 1862 .mtab = {.tbir = 0x4, .to = 0x0}, 1863 .mpba = {.pbir = 0x5, .pbao = 0x0} 1864 }; 1865 1866 static struct iovec sparse_mmap[] = { 1867 { 1868 .iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET, 1869 .iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE, 1870 }, 1871 }; 1872 1873 ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); 1874 if (ret < 0) { 1875 SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx); 1876 return ret; 1877 } 1878 vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0); 1879 /* 1880 * 0x02, controller uses the NVM Express programming interface 1881 * 0x08, non-volatile memory controller 1882 * 0x01, mass storage controller 1883 */ 1884 vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02); 1885 1886 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap); 1887 if (cap_offset < 0) { 1888 SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx); 1889 return ret; 1890 } 1891 1892 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap); 1893 if (cap_offset < 0) { 1894 SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx); 1895 return ret; 1896 } 1897 1898 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap); 1899 if (cap_offset < 0) { 1900 SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx); 1901 return ret; 1902 } 1903 1904 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE, 1905 access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1906 if (ret < 0) { 1907 SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx); 1908 return ret; 1909 } 1910 1911 if (vu_transport->transport_opts.disable_mappable_bar0) { 1912 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1913 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1914 NULL, 0, -1, 0); 1915 } else { 1916 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1917 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1918 sparse_mmap, 1, endpoint->devmem_fd, 0); 1919 } 1920 1921 if (ret < 0) { 1922 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx); 1923 return ret; 1924 } 1925 1926 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE, 1927 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1928 if (ret < 0) { 1929 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx); 1930 return ret; 1931 } 1932 1933 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE, 1934 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1935 if (ret < 0) { 1936 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx); 1937 return ret; 1938 } 1939 1940 ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb); 1941 if (ret < 0) { 1942 SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx); 1943 return ret; 1944 } 1945 1946 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 1947 if (ret < 0) { 1948 SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx); 1949 return ret; 1950 } 1951 1952 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM); 1953 if (ret < 0) { 1954 SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx); 1955 return ret; 1956 } 1957 1958 ret = vfu_realize_ctx(vfu_ctx); 1959 if (ret < 0) { 1960 SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx); 1961 return ret; 1962 } 1963 1964 endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); 1965 assert(endpoint->pci_config_space != NULL); 1966 init_pci_config_space(endpoint->pci_config_space); 1967 1968 assert(cap_offset != 0); 1969 endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); 1970 1971 return 0; 1972 } 1973 1974 static void 1975 _free_ctrlr(void *ctx) 1976 { 1977 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1978 1979 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 1980 free(ctrlr); 1981 } 1982 1983 static void 1984 free_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr, bool free_qps) 1985 { 1986 int i; 1987 assert(ctrlr != NULL); 1988 1989 SPDK_DEBUGLOG(nvmf_vfio, "free %s\n", ctrlr_id(ctrlr)); 1990 1991 if (free_qps) { 1992 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1993 free_qp(ctrlr, i); 1994 } 1995 } 1996 1997 if (ctrlr->thread == spdk_get_thread()) { 1998 _free_ctrlr(ctrlr); 1999 } else { 2000 spdk_thread_send_msg(ctrlr->thread, _free_ctrlr, ctrlr); 2001 } 2002 } 2003 2004 static void 2005 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport, 2006 struct nvmf_vfio_user_endpoint *endpoint) 2007 { 2008 struct nvmf_vfio_user_ctrlr *ctrlr; 2009 int err = 0; 2010 2011 /* First, construct a vfio-user CUSTOM transport controller */ 2012 ctrlr = calloc(1, sizeof(*ctrlr)); 2013 if (ctrlr == NULL) { 2014 err = -ENOMEM; 2015 goto out; 2016 } 2017 ctrlr->cntlid = 0xffff; 2018 ctrlr->transport = transport; 2019 ctrlr->endpoint = endpoint; 2020 ctrlr->doorbells = endpoint->doorbells; 2021 TAILQ_INIT(&ctrlr->connected_qps); 2022 2023 /* Then, construct an admin queue pair */ 2024 err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0); 2025 if (err != 0) { 2026 free(ctrlr); 2027 goto out; 2028 } 2029 endpoint->ctrlr = ctrlr; 2030 2031 /* Notify the generic layer about the new admin queue pair */ 2032 spdk_nvmf_tgt_new_qpair(transport->transport.tgt, &ctrlr->qp[0]->qpair); 2033 2034 out: 2035 if (err != 0) { 2036 SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n", 2037 endpoint_id(endpoint), strerror(-err)); 2038 } 2039 } 2040 2041 static int 2042 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport, 2043 const struct spdk_nvme_transport_id *trid, 2044 struct spdk_nvmf_listen_opts *listen_opts) 2045 { 2046 struct nvmf_vfio_user_transport *vu_transport; 2047 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2048 char *path = NULL; 2049 char uuid[PATH_MAX] = {}; 2050 int fd; 2051 int err; 2052 2053 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2054 transport); 2055 2056 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2057 /* Only compare traddr */ 2058 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2059 return -EEXIST; 2060 } 2061 } 2062 2063 endpoint = calloc(1, sizeof(*endpoint)); 2064 if (!endpoint) { 2065 return -ENOMEM; 2066 } 2067 2068 endpoint->devmem_fd = -1; 2069 memcpy(&endpoint->trid, trid, sizeof(endpoint->trid)); 2070 2071 err = asprintf(&path, "%s/bar0", endpoint_id(endpoint)); 2072 if (err == -1) { 2073 goto out; 2074 } 2075 2076 fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); 2077 if (fd == -1) { 2078 SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n", 2079 endpoint_id(endpoint), path); 2080 err = fd; 2081 free(path); 2082 goto out; 2083 } 2084 free(path); 2085 2086 endpoint->devmem_fd = fd; 2087 err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE); 2088 if (err != 0) { 2089 goto out; 2090 } 2091 2092 endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE, 2093 PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET); 2094 if (endpoint->doorbells == MAP_FAILED) { 2095 endpoint->doorbells = NULL; 2096 err = -errno; 2097 goto out; 2098 } 2099 2100 snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint)); 2101 2102 endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB, 2103 endpoint, VFU_DEV_TYPE_PCI); 2104 if (endpoint->vfu_ctx == NULL) { 2105 SPDK_ERRLOG("%s: error creating libmuser context: %m\n", 2106 endpoint_id(endpoint)); 2107 err = -1; 2108 goto out; 2109 } 2110 vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, vfio_user_get_log_level()); 2111 2112 err = vfio_user_dev_info_fill(vu_transport, endpoint); 2113 if (err < 0) { 2114 goto out; 2115 } 2116 2117 pthread_mutex_init(&endpoint->lock, NULL); 2118 TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link); 2119 SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells); 2120 2121 out: 2122 if (err != 0) { 2123 nvmf_vfio_user_destroy_endpoint(endpoint); 2124 } 2125 2126 return err; 2127 } 2128 2129 static void 2130 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport, 2131 const struct spdk_nvme_transport_id *trid) 2132 { 2133 struct nvmf_vfio_user_transport *vu_transport; 2134 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 2135 2136 assert(trid != NULL); 2137 assert(trid->traddr != NULL); 2138 2139 SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr); 2140 2141 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2142 transport); 2143 2144 pthread_mutex_lock(&vu_transport->lock); 2145 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 2146 if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) { 2147 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 2148 if (endpoint->ctrlr) { 2149 /* Users may kill NVMeoF target while VM 2150 * is connected, free all resources. 2151 */ 2152 free_ctrlr(endpoint->ctrlr, true); 2153 } 2154 nvmf_vfio_user_destroy_endpoint(endpoint); 2155 pthread_mutex_unlock(&vu_transport->lock); 2156 2157 return; 2158 } 2159 } 2160 pthread_mutex_unlock(&vu_transport->lock); 2161 2162 SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr); 2163 } 2164 2165 static void 2166 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport, 2167 struct spdk_nvmf_subsystem *subsystem, 2168 struct spdk_nvmf_ctrlr_data *cdata) 2169 { 2170 memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls)); 2171 cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED; 2172 /* libvfio-user can only support 1 connection for now */ 2173 cdata->oncs.reservations = 0; 2174 } 2175 2176 static int 2177 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport, 2178 const struct spdk_nvmf_subsystem *subsystem, 2179 const struct spdk_nvme_transport_id *trid) 2180 { 2181 struct nvmf_vfio_user_transport *vu_transport; 2182 struct nvmf_vfio_user_endpoint *endpoint; 2183 2184 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport); 2185 2186 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2187 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 2188 break; 2189 } 2190 } 2191 2192 if (endpoint == NULL) { 2193 return -ENOENT; 2194 } 2195 2196 endpoint->subsystem = subsystem; 2197 2198 return 0; 2199 } 2200 2201 /* 2202 * Executed periodically at a default SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 2203 * frequency. 2204 * 2205 * For each transport endpoint (which at the libvfio-user level corresponds to 2206 * a socket), if we don't currently have a controller set up, peek to see if the 2207 * socket is able to accept a new connection. 2208 * 2209 * This poller also takes care of handling the creation of any pending new 2210 * qpairs. 2211 * 2212 * Returns the number of events handled. 2213 */ 2214 static uint32_t 2215 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport) 2216 { 2217 struct nvmf_vfio_user_transport *vu_transport; 2218 struct nvmf_vfio_user_endpoint *endpoint; 2219 uint32_t count = 0; 2220 int err; 2221 2222 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 2223 transport); 2224 2225 pthread_mutex_lock(&vu_transport->lock); 2226 2227 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 2228 if (endpoint->ctrlr != NULL) { 2229 continue; 2230 } 2231 2232 err = vfu_attach_ctx(endpoint->vfu_ctx); 2233 if (err != 0) { 2234 if (errno == EAGAIN || errno == EWOULDBLOCK) { 2235 continue; 2236 } 2237 2238 pthread_mutex_unlock(&vu_transport->lock); 2239 return 1; 2240 } 2241 2242 count++; 2243 2244 /* Construct a controller */ 2245 nvmf_vfio_user_create_ctrlr(vu_transport, endpoint); 2246 } 2247 2248 pthread_mutex_unlock(&vu_transport->lock); 2249 2250 return count; 2251 } 2252 2253 static void 2254 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport, 2255 struct spdk_nvme_transport_id *trid, 2256 struct spdk_nvmf_discovery_log_page_entry *entry) 2257 { } 2258 2259 static struct spdk_nvmf_transport_poll_group * 2260 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport) 2261 { 2262 struct nvmf_vfio_user_poll_group *vu_group; 2263 2264 SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n"); 2265 2266 vu_group = calloc(1, sizeof(*vu_group)); 2267 if (vu_group == NULL) { 2268 SPDK_ERRLOG("Error allocating poll group: %m"); 2269 return NULL; 2270 } 2271 2272 TAILQ_INIT(&vu_group->qps); 2273 2274 return &vu_group->group; 2275 } 2276 2277 /* called when process exits */ 2278 static void 2279 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 2280 { 2281 struct nvmf_vfio_user_poll_group *vu_group; 2282 2283 SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n"); 2284 2285 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2286 2287 free(vu_group); 2288 } 2289 2290 static void 2291 vfio_user_qpair_disconnect_cb(void *ctx) 2292 { 2293 struct nvmf_vfio_user_endpoint *endpoint = ctx; 2294 struct nvmf_vfio_user_ctrlr *ctrlr; 2295 2296 pthread_mutex_lock(&endpoint->lock); 2297 ctrlr = endpoint->ctrlr; 2298 if (!ctrlr) { 2299 pthread_mutex_unlock(&endpoint->lock); 2300 return; 2301 } 2302 2303 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2304 endpoint->ctrlr = NULL; 2305 free_ctrlr(ctrlr, false); 2306 pthread_mutex_unlock(&endpoint->lock); 2307 return; 2308 } 2309 pthread_mutex_unlock(&endpoint->lock); 2310 } 2311 2312 static int 2313 vfio_user_destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 2314 { 2315 struct nvmf_vfio_user_qpair *qpair; 2316 struct nvmf_vfio_user_endpoint *endpoint; 2317 2318 SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr)); 2319 2320 endpoint = ctrlr->endpoint; 2321 assert(endpoint != NULL); 2322 2323 pthread_mutex_lock(&endpoint->lock); 2324 if (TAILQ_EMPTY(&ctrlr->connected_qps)) { 2325 endpoint->ctrlr = NULL; 2326 free_ctrlr(ctrlr, false); 2327 pthread_mutex_unlock(&endpoint->lock); 2328 return 0; 2329 } 2330 2331 TAILQ_FOREACH(qpair, &ctrlr->connected_qps, tailq) { 2332 spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint); 2333 } 2334 pthread_mutex_unlock(&endpoint->lock); 2335 2336 return 0; 2337 } 2338 2339 /* 2340 * Poll for and process any incoming vfio-user messages. 2341 */ 2342 static int 2343 vfio_user_poll_vfu_ctx(void *ctx) 2344 { 2345 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 2346 int ret; 2347 2348 assert(ctrlr != NULL); 2349 2350 /* This will call access_bar0_fn() if there are any writes 2351 * to the portion of the BAR that is not mmap'd */ 2352 ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx); 2353 if (spdk_unlikely(ret == -1)) { 2354 spdk_poller_unregister(&ctrlr->vfu_ctx_poller); 2355 2356 /* initiator shutdown or reset, waiting for another re-connect */ 2357 if (errno == ENOTCONN) { 2358 vfio_user_destroy_ctrlr(ctrlr); 2359 return SPDK_POLLER_BUSY; 2360 } 2361 2362 fail_ctrlr(ctrlr); 2363 } 2364 2365 return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 2366 } 2367 2368 static int 2369 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 2370 { 2371 struct nvmf_vfio_user_poll_group *vu_group; 2372 struct nvmf_vfio_user_qpair *qpair = cb_arg; 2373 struct nvmf_vfio_user_ctrlr *ctrlr; 2374 struct nvmf_vfio_user_endpoint *endpoint; 2375 2376 assert(qpair != NULL); 2377 assert(req != NULL); 2378 2379 ctrlr = qpair->ctrlr; 2380 endpoint = ctrlr->endpoint; 2381 assert(ctrlr != NULL); 2382 assert(endpoint != NULL); 2383 2384 if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) { 2385 SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct); 2386 endpoint->ctrlr = NULL; 2387 free_ctrlr(ctrlr, true); 2388 return -1; 2389 } 2390 2391 vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group); 2392 TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link); 2393 qpair->state = VFIO_USER_QPAIR_ACTIVE; 2394 2395 pthread_mutex_lock(&endpoint->lock); 2396 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 2397 ctrlr->cntlid = qpair->qpair.ctrlr->cntlid; 2398 ctrlr->thread = spdk_get_thread(); 2399 ctrlr->vfu_ctx_poller = SPDK_POLLER_REGISTER(vfio_user_poll_vfu_ctx, ctrlr, 0); 2400 } else { 2401 /* For I/O queues this command was generated in response to an 2402 * ADMIN I/O CREATE SUBMISSION QUEUE command which has not yet 2403 * been completed. Complete it now. 2404 */ 2405 post_completion(ctrlr, &ctrlr->qp[0]->cq, 0, 0, 2406 qpair->create_io_sq_cmd.cid, SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC); 2407 } 2408 TAILQ_INSERT_TAIL(&ctrlr->connected_qps, qpair, tailq); 2409 pthread_mutex_unlock(&endpoint->lock); 2410 2411 free(req->req.data); 2412 req->req.data = NULL; 2413 2414 return 0; 2415 } 2416 2417 /* 2418 * Add the given qpair to the given poll group. New qpairs are added via 2419 * spdk_nvmf_tgt_new_qpair(), which picks a poll group, then calls back 2420 * here via nvmf_transport_poll_group_add(). 2421 */ 2422 static int 2423 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2424 struct spdk_nvmf_qpair *qpair) 2425 { 2426 struct nvmf_vfio_user_qpair *vu_qpair; 2427 struct nvmf_vfio_user_req *vu_req; 2428 struct nvmf_vfio_user_ctrlr *ctrlr; 2429 struct spdk_nvmf_request *req; 2430 struct spdk_nvmf_fabric_connect_data *data; 2431 bool admin; 2432 2433 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2434 vu_qpair->group = group; 2435 ctrlr = vu_qpair->ctrlr; 2436 2437 SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n", 2438 ctrlr_id(ctrlr), vu_qpair->qpair.qid, 2439 vu_qpair, qpair, group); 2440 2441 admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair); 2442 2443 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2444 if (vu_req == NULL) { 2445 return -1; 2446 } 2447 2448 req = &vu_req->req; 2449 req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; 2450 req->cmd->connect_cmd.cid = 0; 2451 req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; 2452 req->cmd->connect_cmd.recfmt = 0; 2453 req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1; 2454 req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid; 2455 2456 req->length = sizeof(struct spdk_nvmf_fabric_connect_data); 2457 req->data = calloc(1, req->length); 2458 if (req->data == NULL) { 2459 nvmf_vfio_user_req_free(req); 2460 return -ENOMEM; 2461 } 2462 2463 data = (struct spdk_nvmf_fabric_connect_data *)req->data; 2464 data->cntlid = admin ? 0xFFFF : ctrlr->cntlid; 2465 snprintf(data->subnqn, sizeof(data->subnqn), "%s", 2466 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem)); 2467 2468 vu_req->cb_fn = handle_queue_connect_rsp; 2469 vu_req->cb_arg = vu_qpair; 2470 2471 SPDK_DEBUGLOG(nvmf_vfio, 2472 "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n", 2473 ctrlr_id(ctrlr), qpair->qid, data->cntlid); 2474 2475 spdk_nvmf_request_exec_fabrics(req); 2476 return 0; 2477 } 2478 2479 static int 2480 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2481 struct spdk_nvmf_qpair *qpair) 2482 { 2483 struct nvmf_vfio_user_qpair *vu_qpair; 2484 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2485 struct nvmf_vfio_user_endpoint *endpoint; 2486 struct nvmf_vfio_user_poll_group *vu_group; 2487 2488 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2489 vu_ctrlr = vu_qpair->ctrlr; 2490 endpoint = vu_ctrlr->endpoint; 2491 2492 SPDK_DEBUGLOG(nvmf_vfio, 2493 "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n", 2494 ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group); 2495 2496 2497 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2498 TAILQ_REMOVE(&vu_group->qps, vu_qpair, link); 2499 2500 pthread_mutex_lock(&endpoint->lock); 2501 TAILQ_REMOVE(&vu_ctrlr->connected_qps, vu_qpair, tailq); 2502 pthread_mutex_unlock(&endpoint->lock); 2503 2504 return 0; 2505 } 2506 2507 static void 2508 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req) 2509 { 2510 memset(&vu_req->cmd, 0, sizeof(vu_req->cmd)); 2511 memset(&vu_req->rsp, 0, sizeof(vu_req->rsp)); 2512 vu_req->iovcnt = 0; 2513 vu_req->state = VFIO_USER_REQUEST_STATE_FREE; 2514 2515 TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link); 2516 } 2517 2518 static int 2519 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req) 2520 { 2521 struct nvmf_vfio_user_qpair *vu_qpair; 2522 struct nvmf_vfio_user_req *vu_req; 2523 2524 assert(req != NULL); 2525 2526 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2527 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2528 2529 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2530 2531 return 0; 2532 } 2533 2534 static int 2535 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req) 2536 { 2537 struct nvmf_vfio_user_qpair *vu_qpair; 2538 struct nvmf_vfio_user_req *vu_req; 2539 2540 assert(req != NULL); 2541 2542 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2543 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2544 2545 if (vu_req->cb_fn != NULL) { 2546 if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) { 2547 fail_ctrlr(vu_qpair->ctrlr); 2548 } 2549 } 2550 2551 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2552 2553 return 0; 2554 } 2555 2556 static void 2557 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair, 2558 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2559 { 2560 struct nvmf_vfio_user_qpair *vu_qpair; 2561 2562 assert(qpair != NULL); 2563 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2564 free_qp(vu_qpair->ctrlr, qpair->qid); 2565 2566 if (cb_fn) { 2567 cb_fn(cb_arg); 2568 } 2569 } 2570 2571 /** 2572 * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available. 2573 */ 2574 static struct nvmf_vfio_user_req * 2575 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair) 2576 { 2577 struct nvmf_vfio_user_req *req; 2578 2579 assert(qpair != NULL); 2580 2581 if (TAILQ_EMPTY(&qpair->reqs)) { 2582 return NULL; 2583 } 2584 2585 req = TAILQ_FIRST(&qpair->reqs); 2586 TAILQ_REMOVE(&qpair->reqs, req, link); 2587 2588 return req; 2589 } 2590 2591 static int 2592 get_nvmf_io_req_length(struct spdk_nvmf_request *req) 2593 { 2594 uint16_t nr; 2595 uint32_t nlb, nsid; 2596 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2597 struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; 2598 struct spdk_nvmf_ns *ns; 2599 2600 nsid = cmd->nsid; 2601 ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); 2602 if (ns == NULL || ns->bdev == NULL) { 2603 SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid); 2604 return -EINVAL; 2605 } 2606 2607 if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { 2608 nr = cmd->cdw10_bits.dsm.nr + 1; 2609 return nr * sizeof(struct spdk_nvme_dsm_range); 2610 } 2611 2612 nlb = (cmd->cdw12 & 0x0000ffffu) + 1; 2613 return nlb * spdk_bdev_get_block_size(ns->bdev); 2614 } 2615 2616 static int 2617 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2618 { 2619 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2620 uint32_t len = 0; 2621 uint8_t fid; 2622 int iovcnt; 2623 2624 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2625 req->length = 0; 2626 req->data = NULL; 2627 2628 if (req->xfer == SPDK_NVME_DATA_NONE) { 2629 return 0; 2630 } 2631 2632 switch (cmd->opc) { 2633 case SPDK_NVME_OPC_IDENTIFY: 2634 len = 4096; 2635 break; 2636 case SPDK_NVME_OPC_GET_LOG_PAGE: 2637 len = (((cmd->cdw11_bits.get_log_page.numdu << 16) | cmd->cdw10_bits.get_log_page.numdl) + 1) * 4; 2638 break; 2639 case SPDK_NVME_OPC_GET_FEATURES: 2640 case SPDK_NVME_OPC_SET_FEATURES: 2641 fid = cmd->cdw10_bits.set_features.fid; 2642 switch (fid) { 2643 case SPDK_NVME_FEAT_LBA_RANGE_TYPE: 2644 len = 4096; 2645 break; 2646 case SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: 2647 len = 256; 2648 break; 2649 case SPDK_NVME_FEAT_TIMESTAMP: 2650 len = 8; 2651 break; 2652 case SPDK_NVME_FEAT_HOST_BEHAVIOR_SUPPORT: 2653 len = 512; 2654 break; 2655 case SPDK_NVME_FEAT_HOST_IDENTIFIER: 2656 if (cmd->cdw11_bits.feat_host_identifier.bits.exhid) { 2657 len = 16; 2658 } else { 2659 len = 8; 2660 } 2661 break; 2662 default: 2663 return 0; 2664 } 2665 break; 2666 default: 2667 return 0; 2668 } 2669 2670 /* ADMIN command will not use SGL */ 2671 if (cmd->psdt != 0) { 2672 return -EINVAL; 2673 } 2674 2675 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len); 2676 if (iovcnt < 0) { 2677 SPDK_ERRLOG("%s: map Admin Opc %x failed\n", 2678 ctrlr_id(ctrlr), cmd->opc); 2679 return -1; 2680 } 2681 req->length = len; 2682 req->data = req->iov[0].iov_base; 2683 req->iovcnt = iovcnt; 2684 2685 return 0; 2686 } 2687 2688 /* 2689 * Map an I/O command's buffers. 2690 * 2691 * Returns 0 on success and -errno on failure. 2692 */ 2693 static int 2694 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2695 { 2696 int len, iovcnt; 2697 struct spdk_nvme_cmd *cmd; 2698 2699 assert(ctrlr != NULL); 2700 assert(req != NULL); 2701 2702 cmd = &req->cmd->nvme_cmd; 2703 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2704 req->length = 0; 2705 req->data = NULL; 2706 2707 if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) { 2708 return 0; 2709 } 2710 2711 len = get_nvmf_io_req_length(req); 2712 if (len < 0) { 2713 return -EINVAL; 2714 } 2715 req->length = len; 2716 2717 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, req->length); 2718 if (iovcnt < 0) { 2719 SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc); 2720 return -EFAULT; 2721 } 2722 req->data = req->iov[0].iov_base; 2723 req->iovcnt = iovcnt; 2724 2725 return 0; 2726 } 2727 2728 static int 2729 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 2730 struct nvmf_vfio_user_qpair *vu_qpair) 2731 { 2732 int err; 2733 struct nvmf_vfio_user_req *vu_req; 2734 struct spdk_nvmf_request *req; 2735 2736 assert(ctrlr != NULL); 2737 assert(cmd != NULL); 2738 2739 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2740 if (spdk_unlikely(vu_req == NULL)) { 2741 SPDK_ERRLOG("%s: no request for NVMe command opc 0x%x\n", ctrlr_id(ctrlr), cmd->opc); 2742 return post_completion(ctrlr, &vu_qpair->cq, 0, 0, cmd->cid, 2743 SPDK_NVME_SC_INTERNAL_DEVICE_ERROR, SPDK_NVME_SCT_GENERIC); 2744 2745 } 2746 req = &vu_req->req; 2747 2748 assert(req->qpair != NULL); 2749 SPDK_DEBUGLOG(nvmf_vfio, "%s: handle qid%u, req opc=%#x cid=%d\n", 2750 ctrlr_id(ctrlr), req->qpair->qid, cmd->opc, cmd->cid); 2751 2752 vu_req->cb_fn = handle_cmd_rsp; 2753 vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2754 req->cmd->nvme_cmd = *cmd; 2755 2756 if (nvmf_qpair_is_admin_queue(req->qpair)) { 2757 err = map_admin_cmd_req(ctrlr, req); 2758 } else { 2759 switch (cmd->opc) { 2760 case SPDK_NVME_OPC_RESERVATION_REGISTER: 2761 case SPDK_NVME_OPC_RESERVATION_REPORT: 2762 case SPDK_NVME_OPC_RESERVATION_ACQUIRE: 2763 case SPDK_NVME_OPC_RESERVATION_RELEASE: 2764 err = -ENOTSUP; 2765 break; 2766 default: 2767 err = map_io_cmd_req(ctrlr, req); 2768 break; 2769 } 2770 } 2771 2772 if (spdk_unlikely(err < 0)) { 2773 SPDK_ERRLOG("%s: process NVMe command opc 0x%x failed\n", 2774 ctrlr_id(ctrlr), cmd->opc); 2775 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2776 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2777 err = handle_cmd_rsp(vu_req, vu_req->cb_arg); 2778 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2779 return err; 2780 } 2781 2782 vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING; 2783 spdk_nvmf_request_exec(req); 2784 2785 return 0; 2786 } 2787 2788 /* Returns the number of commands processed, or a negative value on error. */ 2789 static int 2790 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair) 2791 { 2792 struct nvmf_vfio_user_ctrlr *ctrlr; 2793 uint32_t new_tail; 2794 int count = 0; 2795 2796 assert(qpair != NULL); 2797 2798 ctrlr = qpair->ctrlr; 2799 2800 /* Load-Acquire. */ 2801 new_tail = *tdbl(ctrlr, &qpair->sq); 2802 2803 /* 2804 * Ensure that changes to the queue are visible to us. 2805 * The host driver should write the queue first, do a wmb(), and then 2806 * update the SQ tail doorbell (their Store-Release). 2807 */ 2808 spdk_rmb(); 2809 2810 new_tail = new_tail & 0xffffu; 2811 if (spdk_unlikely(new_tail >= qpair->sq.size)) { 2812 union spdk_nvme_async_event_completion event = {}; 2813 2814 SPDK_DEBUGLOG(nvmf_vfio, "%s: invalid SQ%u doorbell value %u\n", ctrlr_id(ctrlr), qpair->qpair.qid, 2815 new_tail); 2816 event.bits.async_event_type = SPDK_NVME_ASYNC_EVENT_TYPE_ERROR; 2817 event.bits.async_event_info = SPDK_NVME_ASYNC_EVENT_INVALID_DB_WRITE; 2818 nvmf_ctrlr_async_event_error_event(qpair->qpair.ctrlr, event); 2819 2820 return 0; 2821 } 2822 2823 if (sq_head(qpair) == new_tail) { 2824 return 0; 2825 } 2826 2827 count = handle_sq_tdbl_write(ctrlr, new_tail, qpair); 2828 if (count < 0) { 2829 fail_ctrlr(ctrlr); 2830 } 2831 2832 return count; 2833 } 2834 2835 /* 2836 * vfio-user transport poll handler. Note that the library context is polled in 2837 * a separate poller (->vfu_ctx_poller), so this poller only needs to poll the 2838 * active qpairs. 2839 * 2840 * Returns the number of commands processed, or a negative value on error. 2841 */ 2842 static int 2843 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2844 { 2845 struct nvmf_vfio_user_poll_group *vu_group; 2846 struct nvmf_vfio_user_qpair *vu_qpair, *tmp; 2847 int count = 0; 2848 2849 assert(group != NULL); 2850 2851 spdk_rmb(); 2852 2853 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2854 2855 TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) { 2856 int ret; 2857 2858 if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) { 2859 continue; 2860 } 2861 2862 ret = nvmf_vfio_user_qpair_poll(vu_qpair); 2863 2864 if (ret < 0) { 2865 return ret; 2866 } 2867 2868 count += ret; 2869 } 2870 2871 return count; 2872 } 2873 2874 static int 2875 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2876 struct spdk_nvme_transport_id *trid) 2877 { 2878 struct nvmf_vfio_user_qpair *vu_qpair; 2879 struct nvmf_vfio_user_ctrlr *ctrlr; 2880 2881 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2882 ctrlr = vu_qpair->ctrlr; 2883 2884 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2885 return 0; 2886 } 2887 2888 static int 2889 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2890 struct spdk_nvme_transport_id *trid) 2891 { 2892 return 0; 2893 } 2894 2895 static int 2896 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2897 struct spdk_nvme_transport_id *trid) 2898 { 2899 struct nvmf_vfio_user_qpair *vu_qpair; 2900 struct nvmf_vfio_user_ctrlr *ctrlr; 2901 2902 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2903 ctrlr = vu_qpair->ctrlr; 2904 2905 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2906 return 0; 2907 } 2908 2909 static void 2910 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2911 struct spdk_nvmf_request *req) 2912 { 2913 struct nvmf_vfio_user_qpair *vu_qpair; 2914 struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL; 2915 uint32_t i; 2916 uint16_t cid; 2917 2918 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2919 2920 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2921 for (i = 0; i < vu_qpair->qsize; i++) { 2922 vu_req = &vu_qpair->reqs_internal[i]; 2923 if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) { 2924 vu_req_to_abort = vu_req; 2925 break; 2926 } 2927 } 2928 2929 if (vu_req_to_abort == NULL) { 2930 spdk_nvmf_request_complete(req); 2931 return; 2932 } 2933 2934 req->req_to_abort = &vu_req_to_abort->req; 2935 nvmf_ctrlr_abort_request(req); 2936 } 2937 2938 static void 2939 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts) 2940 { 2941 opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH; 2942 opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2943 opts->in_capsule_data_size = 0; 2944 opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE; 2945 opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE; 2946 opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH; 2947 opts->num_shared_buffers = 0; 2948 opts->buf_cache_size = 0; 2949 opts->association_timeout = 0; 2950 opts->transport_specific = NULL; 2951 } 2952 2953 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = { 2954 .name = "VFIOUSER", 2955 .type = SPDK_NVME_TRANSPORT_VFIOUSER, 2956 .opts_init = nvmf_vfio_user_opts_init, 2957 .create = nvmf_vfio_user_create, 2958 .destroy = nvmf_vfio_user_destroy, 2959 2960 .listen = nvmf_vfio_user_listen, 2961 .stop_listen = nvmf_vfio_user_stop_listen, 2962 .accept = nvmf_vfio_user_accept, 2963 .cdata_init = nvmf_vfio_user_cdata_init, 2964 .listen_associate = nvmf_vfio_user_listen_associate, 2965 2966 .listener_discover = nvmf_vfio_user_discover, 2967 2968 .poll_group_create = nvmf_vfio_user_poll_group_create, 2969 .poll_group_destroy = nvmf_vfio_user_poll_group_destroy, 2970 .poll_group_add = nvmf_vfio_user_poll_group_add, 2971 .poll_group_remove = nvmf_vfio_user_poll_group_remove, 2972 .poll_group_poll = nvmf_vfio_user_poll_group_poll, 2973 2974 .req_free = nvmf_vfio_user_req_free, 2975 .req_complete = nvmf_vfio_user_req_complete, 2976 2977 .qpair_fini = nvmf_vfio_user_close_qpair, 2978 .qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid, 2979 .qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid, 2980 .qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid, 2981 .qpair_abort_request = nvmf_vfio_user_qpair_abort_request, 2982 }; 2983 2984 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user); 2985 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio) 2986