1 /*- 2 * BSD LICENSE 3 * Copyright (c) Intel Corporation. All rights reserved. 4 * Copyright (c) 2019, Nutanix Inc. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * NVMe over vfio-user transport 35 */ 36 37 #include <vfio-user/libvfio-user.h> 38 #include <vfio-user/pci_defs.h> 39 40 #include "spdk/barrier.h" 41 #include "spdk/stdinc.h" 42 #include "spdk/assert.h" 43 #include "spdk/thread.h" 44 #include "spdk/nvmf_transport.h" 45 #include "spdk/sock.h" 46 #include "spdk/string.h" 47 #include "spdk/util.h" 48 #include "spdk/log.h" 49 50 #include "transport.h" 51 52 #include "nvmf_internal.h" 53 54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256 55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32 56 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64 57 #define NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE 0 58 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB) 59 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 60 #define NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS 512 /* internal buf size */ 61 #define NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE 0 62 63 #define NVMF_VFIO_USER_DOORBELLS_OFFSET 0x1000 64 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000 65 66 #define NVME_REG_CFG_SIZE 0x1000 67 #define NVME_REG_BAR0_SIZE 0x4000 68 #define NVME_IRQ_INTX_NUM 1 69 #define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 70 71 struct nvmf_vfio_user_req; 72 struct nvmf_vfio_user_qpair; 73 74 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg); 75 76 /* 1 more for PRP2 list itself */ 77 #define NVMF_VFIO_USER_MAX_IOVECS (NVMF_REQ_MAX_BUFFERS + 1) 78 79 enum nvmf_vfio_user_req_state { 80 VFIO_USER_REQUEST_STATE_FREE = 0, 81 VFIO_USER_REQUEST_STATE_EXECUTING, 82 }; 83 84 struct nvmf_vfio_user_req { 85 struct spdk_nvmf_request req; 86 struct spdk_nvme_cpl rsp; 87 struct spdk_nvme_cmd cmd; 88 89 enum nvmf_vfio_user_req_state state; 90 nvmf_vfio_user_req_cb_fn cb_fn; 91 void *cb_arg; 92 93 /* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */ 94 dma_sg_t sg[NVMF_VFIO_USER_MAX_IOVECS]; 95 struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS]; 96 uint8_t iovcnt; 97 98 TAILQ_ENTRY(nvmf_vfio_user_req) link; 99 }; 100 101 /* 102 * A NVMe queue. 103 */ 104 struct nvme_q { 105 bool is_cq; 106 107 void *addr; 108 109 dma_sg_t sg; 110 struct iovec iov; 111 112 uint32_t size; 113 uint64_t prp1; 114 115 union { 116 struct { 117 uint32_t head; 118 /* multiple SQs can be mapped to the same CQ */ 119 uint16_t cqid; 120 }; 121 struct { 122 uint32_t tail; 123 uint16_t iv; 124 bool ien; 125 }; 126 }; 127 }; 128 129 enum nvmf_vfio_user_qpair_state { 130 VFIO_USER_QPAIR_UNINITIALIZED = 0, 131 VFIO_USER_QPAIR_ACTIVE, 132 VFIO_USER_QPAIR_DELETED, 133 VFIO_USER_QPAIR_INACTIVE, 134 VFIO_USER_QPAIR_ERROR, 135 }; 136 137 struct nvmf_vfio_user_qpair { 138 struct spdk_nvmf_qpair qpair; 139 struct spdk_nvmf_transport_poll_group *group; 140 struct nvmf_vfio_user_ctrlr *ctrlr; 141 struct nvmf_vfio_user_req *reqs_internal; 142 uint16_t qsize; 143 struct nvme_q cq; 144 struct nvme_q sq; 145 enum nvmf_vfio_user_qpair_state state; 146 147 TAILQ_HEAD(, nvmf_vfio_user_req) reqs; 148 TAILQ_ENTRY(nvmf_vfio_user_qpair) link; 149 }; 150 151 struct nvmf_vfio_user_poll_group { 152 struct spdk_nvmf_transport_poll_group group; 153 TAILQ_HEAD(, nvmf_vfio_user_qpair) qps; 154 }; 155 156 struct nvmf_vfio_user_ctrlr { 157 struct nvmf_vfio_user_endpoint *endpoint; 158 struct nvmf_vfio_user_transport *transport; 159 160 /* Number of connected queue pairs */ 161 uint32_t num_connected_qps; 162 163 struct spdk_thread *thread; 164 struct spdk_poller *mmio_poller; 165 166 uint16_t cntlid; 167 168 struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR]; 169 170 TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link; 171 172 volatile uint32_t *doorbells; 173 174 /* internal CSTS.CFS register for vfio-user fatal errors */ 175 uint32_t cfs : 1; 176 }; 177 178 struct nvmf_vfio_user_endpoint { 179 vfu_ctx_t *vfu_ctx; 180 struct msixcap *msix; 181 vfu_pci_config_space_t *pci_config_space; 182 int fd; 183 volatile uint32_t *doorbells; 184 185 struct spdk_nvme_transport_id trid; 186 const struct spdk_nvmf_subsystem *subsystem; 187 188 struct nvmf_vfio_user_ctrlr *ctrlr; 189 pthread_mutex_t lock; 190 191 TAILQ_ENTRY(nvmf_vfio_user_endpoint) link; 192 }; 193 194 struct nvmf_vfio_user_transport_opts { 195 bool disable_mappable_bar0; 196 }; 197 198 struct nvmf_vfio_user_transport { 199 struct spdk_nvmf_transport transport; 200 struct nvmf_vfio_user_transport_opts transport_opts; 201 pthread_mutex_t lock; 202 TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints; 203 204 TAILQ_HEAD(, nvmf_vfio_user_qpair) new_qps; 205 }; 206 207 /* 208 * function prototypes 209 */ 210 static volatile uint32_t * 211 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 212 213 static volatile uint32_t * 214 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 215 216 static int 217 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req); 218 219 static struct nvmf_vfio_user_req * 220 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair); 221 222 static int 223 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 224 struct nvme_q *cq, uint32_t cdw0, uint16_t sc, 225 uint16_t sct); 226 227 static char * 228 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint) 229 { 230 return endpoint->trid.traddr; 231 } 232 233 static char * 234 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr) 235 { 236 if (!ctrlr || !ctrlr->endpoint) { 237 return "Null Ctrlr"; 238 } 239 240 return endpoint_id(ctrlr->endpoint); 241 } 242 243 static uint16_t 244 io_q_id(struct nvme_q *q) 245 { 246 247 struct nvmf_vfio_user_qpair *vfio_user_qpair; 248 249 assert(q); 250 251 if (q->is_cq) { 252 vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq); 253 } else { 254 vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq); 255 } 256 assert(vfio_user_qpair); 257 return vfio_user_qpair->qpair.qid; 258 } 259 260 static void 261 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 262 { 263 assert(ctrlr != NULL); 264 265 if (ctrlr->cfs == 0) { 266 SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr)); 267 } 268 269 ctrlr->cfs = 1U; 270 } 271 272 static bool 273 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *ctrlr) 274 { 275 assert(ctrlr != NULL); 276 assert(ctrlr->endpoint != NULL); 277 278 vfu_pci_config_space_t *pci = ctrlr->endpoint->pci_config_space; 279 280 return (!pci->hdr.cmd.id || ctrlr->endpoint->msix->mxc.mxe); 281 } 282 283 static void 284 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint) 285 { 286 if (endpoint->doorbells) { 287 munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE); 288 } 289 290 if (endpoint->fd > 0) { 291 close(endpoint->fd); 292 } 293 294 vfu_destroy_ctx(endpoint->vfu_ctx); 295 296 pthread_mutex_destroy(&endpoint->lock); 297 free(endpoint); 298 } 299 300 /* called when process exits */ 301 static int 302 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport, 303 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 304 { 305 struct nvmf_vfio_user_transport *vu_transport; 306 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 307 308 SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n"); 309 310 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 311 transport); 312 313 (void)pthread_mutex_destroy(&vu_transport->lock); 314 315 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 316 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 317 nvmf_vfio_user_destroy_endpoint(endpoint); 318 } 319 320 free(vu_transport); 321 322 if (cb_fn) { 323 cb_fn(cb_arg); 324 } 325 326 return 0; 327 } 328 329 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = { 330 { 331 "disable-mappable-bar0", 332 offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0), 333 spdk_json_decode_bool, true 334 }, 335 }; 336 337 static struct spdk_nvmf_transport * 338 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts) 339 { 340 struct nvmf_vfio_user_transport *vu_transport; 341 int err; 342 343 vu_transport = calloc(1, sizeof(*vu_transport)); 344 if (vu_transport == NULL) { 345 SPDK_ERRLOG("Transport alloc fail: %m\n"); 346 return NULL; 347 } 348 349 err = pthread_mutex_init(&vu_transport->lock, NULL); 350 if (err != 0) { 351 SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err); 352 goto err; 353 } 354 355 TAILQ_INIT(&vu_transport->endpoints); 356 TAILQ_INIT(&vu_transport->new_qps); 357 358 if (opts->transport_specific != NULL && 359 spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder, 360 SPDK_COUNTOF(vfio_user_transport_opts_decoder), 361 vu_transport)) { 362 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 363 free(vu_transport); 364 return NULL; 365 } 366 367 SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n", 368 vu_transport->transport_opts.disable_mappable_bar0); 369 370 return &vu_transport->transport; 371 372 err: 373 free(vu_transport); 374 375 return NULL; 376 } 377 378 static uint16_t 379 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr) 380 { 381 assert(ctrlr != NULL); 382 assert(ctrlr->qp[0] != NULL); 383 assert(ctrlr->qp[0]->qpair.ctrlr != NULL); 384 385 return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1; 386 } 387 388 static void * 389 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov) 390 { 391 int ret; 392 393 assert(ctx != NULL); 394 assert(sg != NULL); 395 assert(iov != NULL); 396 397 ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, PROT_READ | PROT_WRITE); 398 if (ret < 0) { 399 return NULL; 400 } 401 402 ret = vfu_map_sg(ctx, sg, iov, 1); 403 if (ret != 0) { 404 return NULL; 405 } 406 407 assert(iov->iov_base != NULL); 408 return iov->iov_base; 409 } 410 411 static uint32_t 412 sq_head(struct nvmf_vfio_user_qpair *qpair) 413 { 414 assert(qpair != NULL); 415 return qpair->sq.head; 416 } 417 418 static void 419 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair) 420 { 421 assert(ctrlr != NULL); 422 assert(qpair != NULL); 423 qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size; 424 } 425 426 static void 427 insert_queue(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q, 428 const bool is_cq, const uint16_t id) 429 { 430 struct nvme_q *_q; 431 struct nvmf_vfio_user_qpair *qpair; 432 433 assert(ctrlr != NULL); 434 assert(q != NULL); 435 436 qpair = ctrlr->qp[id]; 437 438 q->is_cq = is_cq; 439 if (is_cq) { 440 _q = &qpair->cq; 441 *_q = *q; 442 *hdbl(ctrlr, _q) = 0; 443 } else { 444 _q = &qpair->sq; 445 *_q = *q; 446 *tdbl(ctrlr, _q) = 0; 447 } 448 } 449 450 static int 451 asq_map(struct nvmf_vfio_user_ctrlr *ctrlr) 452 { 453 struct nvme_q q = {}; 454 const struct spdk_nvmf_registers *regs; 455 456 assert(ctrlr != NULL); 457 assert(ctrlr->qp[0] != NULL); 458 assert(ctrlr->qp[0]->sq.addr == NULL); 459 /* XXX ctrlr->asq == 0 is a valid memory address */ 460 461 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 462 q.size = regs->aqa.bits.asqs + 1; 463 q.head = ctrlr->doorbells[0] = 0; 464 q.cqid = 0; 465 q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->asq, 466 q.size * sizeof(struct spdk_nvme_cmd), &q.sg, &q.iov); 467 if (q.addr == NULL) { 468 return -1; 469 } 470 memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cmd)); 471 insert_queue(ctrlr, &q, false, 0); 472 473 return 0; 474 } 475 476 static uint16_t 477 cq_next(struct nvme_q *q) 478 { 479 assert(q != NULL); 480 assert(q->is_cq); 481 return (q->tail + 1) % q->size; 482 } 483 484 static int 485 queue_index(uint16_t qid, int is_cq) 486 { 487 return (qid * 2) + is_cq; 488 } 489 490 static volatile uint32_t * 491 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 492 { 493 assert(ctrlr != NULL); 494 assert(q != NULL); 495 assert(!q->is_cq); 496 497 return &ctrlr->doorbells[queue_index(io_q_id(q), false)]; 498 } 499 500 static volatile uint32_t * 501 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 502 { 503 assert(ctrlr != NULL); 504 assert(q != NULL); 505 assert(q->is_cq); 506 507 return &ctrlr->doorbells[queue_index(io_q_id(q), true)]; 508 } 509 510 static bool 511 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 512 { 513 assert(ctrlr != NULL); 514 assert(q != NULL); 515 return cq_next(q) == *hdbl(ctrlr, q); 516 } 517 518 static void 519 cq_tail_advance(struct nvme_q *q) 520 { 521 assert(q != NULL); 522 q->tail = cq_next(q); 523 } 524 525 static int 526 acq_map(struct nvmf_vfio_user_ctrlr *ctrlr) 527 { 528 struct nvme_q q = {}; 529 const struct spdk_nvmf_registers *regs; 530 531 assert(ctrlr != NULL); 532 assert(ctrlr->qp[0] != NULL); 533 assert(ctrlr->qp[0]->cq.addr == NULL); 534 535 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 536 assert(regs != NULL); 537 538 q.size = regs->aqa.bits.acqs + 1; 539 q.tail = 0; 540 q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->acq, 541 q.size * sizeof(struct spdk_nvme_cpl), &q.sg, &q.iov); 542 if (q.addr == NULL) { 543 return -1; 544 } 545 memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cpl)); 546 q.is_cq = true; 547 q.ien = true; 548 insert_queue(ctrlr, &q, true, 0); 549 550 return 0; 551 } 552 553 static void * 554 _map_one(void *prv, uint64_t addr, uint64_t len) 555 { 556 struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv; 557 struct spdk_nvmf_qpair *qpair; 558 struct nvmf_vfio_user_req *vu_req; 559 struct nvmf_vfio_user_qpair *vu_qpair; 560 void *ret; 561 562 assert(req != NULL); 563 qpair = req->qpair; 564 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 565 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 566 567 assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS); 568 ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len, 569 &vu_req->sg[vu_req->iovcnt], 570 &vu_req->iov[vu_req->iovcnt]); 571 if (spdk_likely(ret != NULL)) { 572 vu_req->iovcnt++; 573 } 574 return ret; 575 } 576 577 static int 578 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req, 579 struct iovec *iov, uint32_t length) 580 { 581 /* Map PRP list to from Guest physical memory to 582 * virtual memory address. 583 */ 584 return spdk_nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS, 585 length, 4096, _map_one); 586 } 587 588 static struct spdk_nvmf_request * 589 get_nvmf_req(struct nvmf_vfio_user_qpair *qp); 590 591 static int 592 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 593 struct spdk_nvmf_request *req); 594 595 /* 596 * Posts a CQE in the completion queue. 597 * 598 * @ctrlr: the vfio-user controller 599 * @cmd: the NVMe command for which the completion is posted 600 * @cq: the completion queue 601 * @cdw0: cdw0 as reported by NVMf (only for SPDK_NVME_OPC_GET/SET_FEATURES) 602 * @sc: the NVMe CQE status code 603 * @sct: the NVMe CQE status code type 604 */ 605 static int 606 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 607 struct nvme_q *cq, uint32_t cdw0, uint16_t sc, 608 uint16_t sct) 609 { 610 struct spdk_nvme_cpl *cpl; 611 uint16_t qid; 612 int err; 613 614 assert(ctrlr != NULL); 615 assert(cmd != NULL); 616 617 qid = io_q_id(cq); 618 619 if (ctrlr->qp[0]->qpair.ctrlr->vcprop.csts.bits.shst != SPDK_NVME_SHST_NORMAL) { 620 SPDK_DEBUGLOG(nvmf_vfio, 621 "%s: ignore completion SQ%d cid=%d status=%#x\n", 622 ctrlr_id(ctrlr), qid, cmd->cid, sc); 623 return 0; 624 } 625 626 if (cq_is_full(ctrlr, cq)) { 627 SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n", 628 ctrlr_id(ctrlr), qid, cq->tail, *hdbl(ctrlr, cq)); 629 return -1; 630 } 631 632 cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail; 633 634 SPDK_DEBUGLOG(nvmf_vfio, 635 "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n", 636 ctrlr_id(ctrlr), qid, cmd->cid, sc, ctrlr->qp[qid]->sq.head, 637 cq->tail); 638 639 if (qid == 0) { 640 switch (cmd->opc) { 641 case SPDK_NVME_OPC_SET_FEATURES: 642 case SPDK_NVME_OPC_GET_FEATURES: 643 cpl->cdw0 = cdw0; 644 break; 645 } 646 } 647 648 649 assert(ctrlr->qp[qid] != NULL); 650 651 cpl->sqhd = ctrlr->qp[qid]->sq.head; 652 cpl->cid = cmd->cid; 653 cpl->status.dnr = 0x0; 654 cpl->status.m = 0x0; 655 cpl->status.sct = sct; 656 cpl->status.p = ~cpl->status.p; 657 cpl->status.sc = sc; 658 659 cq_tail_advance(cq); 660 661 /* 662 * this function now executes at SPDK thread context, we 663 * might be triggerring interrupts from vfio-user thread context so 664 * check for race conditions. 665 */ 666 if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) { 667 err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv); 668 if (err != 0) { 669 SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n", 670 ctrlr_id(ctrlr)); 671 return err; 672 } 673 } 674 675 return 0; 676 } 677 678 static struct nvme_q * 679 lookup_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, const uint16_t qid, const bool is_cq) 680 { 681 struct nvme_q *q; 682 683 assert(ctrlr != NULL); 684 685 if (qid > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 686 return NULL; 687 } 688 689 if (ctrlr->qp[qid] == NULL) { 690 return NULL; 691 } 692 693 if (is_cq) { 694 q = &ctrlr->qp[qid]->cq; 695 } else { 696 q = &ctrlr->qp[qid]->sq; 697 } 698 699 if (q->addr == NULL) { 700 return NULL; 701 } 702 703 return q; 704 } 705 706 static void 707 unmap_qp(struct nvmf_vfio_user_qpair *qp) 708 { 709 struct nvmf_vfio_user_ctrlr *ctrlr; 710 711 if (qp->ctrlr == NULL) { 712 return; 713 } 714 ctrlr = qp->ctrlr; 715 716 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy I/O QP%d\n", 717 ctrlr_id(ctrlr), qp->qpair.qid); 718 719 if (qp->sq.addr != NULL) { 720 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->sq.sg, &qp->sq.iov, 1); 721 qp->sq.addr = NULL; 722 } 723 724 if (qp->cq.addr != NULL) { 725 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->cq.sg, &qp->cq.iov, 1); 726 qp->cq.addr = NULL; 727 } 728 } 729 730 static void 731 free_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid) 732 { 733 struct nvmf_vfio_user_qpair *qpair; 734 735 if (ctrlr == NULL) { 736 return; 737 } 738 739 qpair = ctrlr->qp[qid]; 740 if (qpair == NULL) { 741 return; 742 } 743 744 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr), 745 qid, qpair); 746 747 unmap_qp(qpair); 748 free(qpair->reqs_internal); 749 free(qpair); 750 ctrlr->qp[qid] = NULL; 751 } 752 753 /* This function can only fail because of memory allocation errors. */ 754 static int 755 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport, 756 const uint16_t qsize, const uint16_t id) 757 { 758 int err = 0, i; 759 struct nvmf_vfio_user_qpair *qpair; 760 struct nvmf_vfio_user_req *vu_req; 761 struct spdk_nvmf_request *req; 762 763 assert(ctrlr != NULL); 764 assert(transport != NULL); 765 766 qpair = calloc(1, sizeof(*qpair)); 767 if (qpair == NULL) { 768 return -ENOMEM; 769 } 770 771 qpair->qpair.qid = id; 772 qpair->qpair.transport = transport; 773 qpair->ctrlr = ctrlr; 774 qpair->qsize = qsize; 775 776 TAILQ_INIT(&qpair->reqs); 777 778 qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req)); 779 if (qpair->reqs_internal == NULL) { 780 SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr)); 781 err = -ENOMEM; 782 goto out; 783 } 784 785 for (i = 0; i < qsize; i++) { 786 vu_req = &qpair->reqs_internal[i]; 787 req = &vu_req->req; 788 789 req->qpair = &qpair->qpair; 790 req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp; 791 req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd; 792 793 TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link); 794 } 795 ctrlr->qp[id] = qpair; 796 out: 797 if (err != 0) { 798 free(qpair); 799 } 800 return err; 801 } 802 803 /* 804 * Creates a completion or sumbission I/O queue. Returns 0 on success, -errno 805 * on error. 806 * 807 * XXX SPDK thread context. 808 */ 809 static int 810 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 811 struct spdk_nvme_cmd *cmd, const bool is_cq) 812 { 813 size_t entry_size; 814 uint16_t sc = SPDK_NVME_SC_SUCCESS; 815 uint16_t sct = SPDK_NVME_SCT_GENERIC; 816 int err = 0; 817 struct nvme_q io_q = {}; 818 819 assert(ctrlr != NULL); 820 assert(cmd != NULL); 821 822 SPDK_DEBUGLOG(nvmf_vfio, 823 "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr), 824 is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid, 825 cmd->cdw10_bits.create_io_q.qsize); 826 827 if (cmd->cdw10_bits.create_io_q.qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 828 SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr), 829 cmd->cdw10_bits.create_io_q.qid, 830 NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR); 831 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 832 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 833 goto out; 834 } 835 836 if (lookup_io_q(ctrlr, cmd->cdw10_bits.create_io_q.qid, is_cq)) { 837 SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr), 838 is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid); 839 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 840 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 841 goto out; 842 } 843 844 /* TODO break rest of this function into smaller functions */ 845 if (is_cq) { 846 entry_size = sizeof(struct spdk_nvme_cpl); 847 if (cmd->cdw11_bits.create_io_cq.pc != 0x1) { 848 /* 849 * TODO CAP.CMBS is currently set to zero, however we 850 * should zero it out explicitly when CAP is read. 851 * Support for CAP.CMBS is not mentioned in the NVMf 852 * spec. 853 */ 854 SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr)); 855 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 856 goto out; 857 } 858 io_q.ien = cmd->cdw11_bits.create_io_cq.ien; 859 io_q.iv = cmd->cdw11_bits.create_io_cq.iv; 860 } else { 861 /* CQ must be created before SQ */ 862 if (!lookup_io_q(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) { 863 SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr), 864 cmd->cdw11_bits.create_io_sq.cqid); 865 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 866 sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID; 867 goto out; 868 } 869 870 entry_size = sizeof(struct spdk_nvme_cmd); 871 if (cmd->cdw11_bits.create_io_sq.pc != 0x1) { 872 SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr)); 873 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 874 goto out; 875 } 876 877 io_q.cqid = cmd->cdw11_bits.create_io_sq.cqid; 878 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr), 879 cmd->cdw10_bits.create_io_q.qid, io_q.cqid); 880 } 881 882 io_q.size = cmd->cdw10_bits.create_io_q.qsize + 1; 883 if (io_q.size > max_queue_size(ctrlr)) { 884 SPDK_ERRLOG("%s: queue too big, want=%d, max=%d\n", ctrlr_id(ctrlr), 885 io_q.size, max_queue_size(ctrlr)); 886 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 887 sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE; 888 goto out; 889 } 890 891 io_q.addr = map_one(ctrlr->endpoint->vfu_ctx, cmd->dptr.prp.prp1, 892 io_q.size * entry_size, &io_q.sg, &io_q.iov); 893 if (io_q.addr == NULL) { 894 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 895 SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr)); 896 goto out; 897 } 898 io_q.prp1 = cmd->dptr.prp.prp1; 899 memset(io_q.addr, 0, io_q.size * entry_size); 900 901 SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n", 902 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 903 cmd->cdw10_bits.create_io_q.qid, cmd->dptr.prp.prp1, 904 (unsigned long long)io_q.addr); 905 906 if (is_cq) { 907 err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, io_q.size, 908 cmd->cdw10_bits.create_io_q.qid); 909 if (err != 0) { 910 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 911 goto out; 912 } 913 } else { 914 /* 915 * After we've returned from the nvmf_vfio_user_poll_group_poll thread, once 916 * nvmf_vfio_user_accept executes it will pick up this QP and will eventually 917 * call nvmf_vfio_user_poll_group_add. The rest of the opertion needed to 918 * complete the addition of the queue will be continued at the 919 * completion callback. 920 */ 921 TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[cmd->cdw10_bits.create_io_q.qid], link); 922 923 } 924 insert_queue(ctrlr, &io_q, is_cq, cmd->cdw10_bits.create_io_q.qid); 925 926 out: 927 return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct); 928 } 929 930 /* 931 * Deletes a completion or sumbission I/O queue. 932 */ 933 static int 934 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 935 struct spdk_nvme_cmd *cmd, const bool is_cq) 936 { 937 uint16_t sct = SPDK_NVME_SCT_GENERIC; 938 uint16_t sc = SPDK_NVME_SC_SUCCESS; 939 940 SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n", 941 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 942 cmd->cdw10_bits.delete_io_q.qid); 943 944 if (lookup_io_q(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq) == NULL) { 945 SPDK_ERRLOG("%s: %cQ%d does not exist\n", ctrlr_id(ctrlr), 946 is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid); 947 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 948 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 949 goto out; 950 } 951 952 if (is_cq) { 953 /* SQ must have been deleted first */ 954 if (ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state != VFIO_USER_QPAIR_DELETED) { 955 SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr)); 956 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 957 sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION; 958 goto out; 959 } 960 } else { 961 /* 962 * This doesn't actually delete the I/O queue, we can't 963 * do that anyway because NVMf doesn't support it. We're merely 964 * telling the poll_group_poll function to skip checking this 965 * queue. The only workflow this works is when CC.EN is set to 966 * 0 and we're stopping the subsystem, so we know that the 967 * relevant callbacks to destroy the queues will be called. 968 */ 969 assert(ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state == VFIO_USER_QPAIR_ACTIVE); 970 ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state = VFIO_USER_QPAIR_DELETED; 971 } 972 973 out: 974 return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct); 975 } 976 977 /* 978 * Returns 0 on success and -errno on error. 979 * 980 * XXX SPDK thread context 981 */ 982 static int 983 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd) 984 { 985 assert(ctrlr != NULL); 986 assert(cmd != NULL); 987 988 SPDK_DEBUGLOG(nvmf_vfio, "%s: handle admin req opc=%#x cid=%d\n", 989 ctrlr_id(ctrlr), cmd->opc, cmd->cid); 990 991 switch (cmd->opc) { 992 case SPDK_NVME_OPC_CREATE_IO_CQ: 993 case SPDK_NVME_OPC_CREATE_IO_SQ: 994 return handle_create_io_q(ctrlr, cmd, 995 cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ); 996 case SPDK_NVME_OPC_DELETE_IO_SQ: 997 case SPDK_NVME_OPC_DELETE_IO_CQ: 998 return handle_del_io_q(ctrlr, cmd, 999 cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ); 1000 default: 1001 return handle_cmd_req(ctrlr, cmd, get_nvmf_req(ctrlr->qp[0])); 1002 } 1003 } 1004 1005 static int 1006 handle_cmd_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1007 { 1008 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1009 1010 assert(qpair != NULL); 1011 assert(req != NULL); 1012 1013 vfu_unmap_sg(qpair->ctrlr->endpoint->vfu_ctx, req->sg, req->iov, req->iovcnt); 1014 1015 return post_completion(qpair->ctrlr, &req->req.cmd->nvme_cmd, 1016 &qpair->ctrlr->qp[req->req.qpair->qid]->cq, 1017 req->req.rsp->nvme_cpl.cdw0, 1018 req->req.rsp->nvme_cpl.status.sc, 1019 req->req.rsp->nvme_cpl.status.sct); 1020 } 1021 1022 static int 1023 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair, 1024 struct spdk_nvme_cmd *cmd) 1025 { 1026 assert(qpair != NULL); 1027 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1028 return consume_admin_cmd(ctrlr, cmd); 1029 } 1030 1031 return handle_cmd_req(ctrlr, cmd, get_nvmf_req(qpair)); 1032 } 1033 1034 static ssize_t 1035 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail, 1036 struct nvmf_vfio_user_qpair *qpair) 1037 { 1038 struct spdk_nvme_cmd *queue; 1039 1040 assert(ctrlr != NULL); 1041 assert(qpair != NULL); 1042 1043 queue = qpair->sq.addr; 1044 while (sq_head(qpair) != new_tail) { 1045 int err; 1046 struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)]; 1047 1048 /* 1049 * SQHD must contain the new head pointer, so we must increase 1050 * it before we generate a completion. 1051 */ 1052 sqhd_advance(ctrlr, qpair); 1053 1054 err = consume_cmd(ctrlr, qpair, cmd); 1055 if (err != 0) { 1056 return err; 1057 } 1058 } 1059 1060 return 0; 1061 } 1062 1063 static int 1064 map_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1065 { 1066 int err; 1067 1068 assert(ctrlr != NULL); 1069 1070 err = acq_map(ctrlr); 1071 if (err != 0) { 1072 return err; 1073 } 1074 1075 err = asq_map(ctrlr); 1076 if (err != 0) { 1077 return err; 1078 } 1079 1080 return 0; 1081 } 1082 1083 static void 1084 unmap_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1085 { 1086 assert(ctrlr->qp[0] != NULL); 1087 1088 unmap_qp(ctrlr->qp[0]); 1089 } 1090 1091 static void 1092 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1093 { 1094 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1095 struct nvmf_vfio_user_ctrlr *ctrlr; 1096 struct nvmf_vfio_user_qpair *qpair; 1097 int i, ret; 1098 1099 /* 1100 * We're not interested in any DMA regions that aren't mappable (we don't 1101 * support clients that don't share their memory). 1102 */ 1103 if (!info->vaddr) { 1104 return; 1105 } 1106 1107 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1108 (info->mapping.iov_len & MASK_2MB)) { 1109 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1110 (uintptr_t)info->mapping.iov_base, 1111 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1112 return; 1113 } 1114 1115 assert(endpoint != NULL); 1116 if (endpoint->ctrlr == NULL) { 1117 return; 1118 } 1119 ctrlr = endpoint->ctrlr; 1120 1121 SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1122 (uintptr_t)info->mapping.iov_base, 1123 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1124 1125 /* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also 1126 * check the protection bits before registering. 1127 */ 1128 if ((info->prot == (PROT_WRITE | PROT_READ)) && 1129 (spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len))) { 1130 SPDK_ERRLOG("Memory region register %#lx-%#lx failed\n", 1131 (uint64_t)(uintptr_t)info->mapping.iov_base, 1132 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1133 } 1134 1135 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1136 qpair = ctrlr->qp[i]; 1137 if (qpair == NULL) { 1138 continue; 1139 } 1140 1141 if (qpair->state != VFIO_USER_QPAIR_INACTIVE) { 1142 continue; 1143 } 1144 1145 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1146 ret = map_admin_queue(ctrlr); 1147 if (ret) { 1148 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap Admin queue\n"); 1149 continue; 1150 } 1151 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1152 } else { 1153 struct nvme_q *sq = &qpair->sq; 1154 struct nvme_q *cq = &qpair->cq; 1155 1156 sq->addr = map_one(ctrlr->endpoint->vfu_ctx, sq->prp1, sq->size * 64, &sq->sg, &sq->iov); 1157 if (!sq->addr) { 1158 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n", 1159 i, sq->prp1, sq->prp1 + sq->size * 64); 1160 continue; 1161 } 1162 cq->addr = map_one(ctrlr->endpoint->vfu_ctx, cq->prp1, cq->size * 16, &cq->sg, &cq->iov); 1163 if (!cq->addr) { 1164 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n", 1165 i, cq->prp1, cq->prp1 + cq->size * 16); 1166 continue; 1167 } 1168 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1169 } 1170 } 1171 } 1172 1173 static int 1174 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1175 { 1176 1177 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1178 struct nvmf_vfio_user_ctrlr *ctrlr; 1179 struct nvmf_vfio_user_qpair *qpair; 1180 void *map_start, *map_end; 1181 int i; 1182 1183 if (!info->vaddr) { 1184 return 0; 1185 } 1186 1187 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1188 (info->mapping.iov_len & MASK_2MB)) { 1189 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1190 (uintptr_t)info->mapping.iov_base, 1191 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1192 return 0; 1193 } 1194 1195 assert(endpoint != NULL); 1196 if (endpoint->ctrlr == NULL) { 1197 return 0; 1198 } 1199 ctrlr = endpoint->ctrlr; 1200 1201 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1202 (uintptr_t)info->mapping.iov_base, 1203 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1204 1205 if ((info->prot == (PROT_WRITE | PROT_READ)) && 1206 (spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len))) { 1207 SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed\n", 1208 (uint64_t)(uintptr_t)info->mapping.iov_base, 1209 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1210 } 1211 1212 map_start = info->mapping.iov_base; 1213 map_end = info->mapping.iov_base + info->mapping.iov_len; 1214 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1215 qpair = ctrlr->qp[i]; 1216 if (qpair == NULL) { 1217 continue; 1218 } 1219 1220 if ((qpair->cq.addr >= map_start && qpair->cq.addr < map_end) || 1221 (qpair->sq.addr >= map_start && qpair->sq.addr < map_end)) { 1222 unmap_qp(qpair); 1223 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1224 } 1225 } 1226 1227 return 0; 1228 } 1229 1230 static int 1231 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1232 { 1233 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1234 int ret; 1235 1236 assert(qpair != NULL); 1237 assert(req != NULL); 1238 1239 if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) { 1240 assert(qpair->ctrlr != NULL); 1241 assert(req != NULL); 1242 1243 memcpy(req->req.data, 1244 &req->req.rsp->prop_get_rsp.value.u64, 1245 req->req.length); 1246 } else { 1247 assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET); 1248 assert(qpair->ctrlr != NULL); 1249 1250 if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) { 1251 union spdk_nvme_cc_register *cc; 1252 1253 cc = (union spdk_nvme_cc_register *)&req->req.cmd->prop_set_cmd.value.u64; 1254 1255 if (cc->bits.en == 1 && cc->bits.shn == 0) { 1256 SPDK_DEBUGLOG(nvmf_vfio, 1257 "%s: MAP Admin queue\n", 1258 ctrlr_id(qpair->ctrlr)); 1259 ret = map_admin_queue(qpair->ctrlr); 1260 if (ret) { 1261 SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(qpair->ctrlr)); 1262 return ret; 1263 } 1264 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1265 } else if ((cc->bits.en == 0 && cc->bits.shn == 0) || 1266 (cc->bits.en == 1 && cc->bits.shn != 0)) { 1267 SPDK_DEBUGLOG(nvmf_vfio, 1268 "%s: UNMAP Admin queue\n", 1269 ctrlr_id(qpair->ctrlr)); 1270 unmap_admin_queue(qpair->ctrlr); 1271 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1272 } 1273 } 1274 } 1275 1276 return 0; 1277 } 1278 1279 /* 1280 * XXX Do NOT remove, see comment in access_bar0_fn. 1281 * 1282 * Handles a write at offset 0x1000 or more. 1283 * 1284 * DSTRD is set to fixed value 0 for NVMf. 1285 * 1286 */ 1287 static int 1288 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf, 1289 const size_t count, loff_t pos, const bool is_write) 1290 { 1291 assert(ctrlr != NULL); 1292 assert(buf != NULL); 1293 1294 if (count != sizeof(uint32_t)) { 1295 SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n", 1296 ctrlr_id(ctrlr), count); 1297 errno = EINVAL; 1298 return -1; 1299 } 1300 1301 pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET; 1302 1303 /* pos must be dword aligned */ 1304 if ((pos & 0x3) != 0) { 1305 SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos); 1306 errno = EINVAL; 1307 return -1; 1308 } 1309 1310 /* convert byte offset to array index */ 1311 pos >>= 2; 1312 1313 if (pos > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) { 1314 /* 1315 * TODO: need to emit a "Write to Invalid Doorbell Register" 1316 * asynchronous event 1317 */ 1318 SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos); 1319 errno = EINVAL; 1320 return -1; 1321 } 1322 1323 if (is_write) { 1324 ctrlr->doorbells[pos] = *buf; 1325 spdk_wmb(); 1326 } else { 1327 spdk_rmb(); 1328 *buf = ctrlr->doorbells[pos]; 1329 } 1330 return 0; 1331 } 1332 1333 static ssize_t 1334 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, 1335 bool is_write) 1336 { 1337 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1338 struct nvmf_vfio_user_ctrlr *ctrlr; 1339 struct nvmf_vfio_user_req *req; 1340 int ret; 1341 1342 ctrlr = endpoint->ctrlr; 1343 1344 SPDK_DEBUGLOG(nvmf_vfio, 1345 "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n", 1346 endpoint_id(endpoint), is_write ? "write" : "read", 1347 ctrlr, count, pos); 1348 1349 if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) { 1350 /* 1351 * The fact that the doorbells can be memory mapped doesn't mean 1352 * that the client (VFIO in QEMU) is obliged to memory map them, 1353 * it might still elect to access them via regular read/write; 1354 * we might also have had disable_mappable_bar0 set. 1355 */ 1356 ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count, 1357 pos, is_write); 1358 if (ret == 0) { 1359 return count; 1360 } 1361 assert(errno != 0); 1362 return ret; 1363 } 1364 1365 /* Construct a Fabric Property Get/Set command and send it */ 1366 req = get_nvmf_vfio_user_req(ctrlr->qp[0]); 1367 if (req == NULL) { 1368 errno = ENOBUFS; 1369 return -1; 1370 } 1371 1372 req->cb_fn = nvmf_vfio_user_prop_req_rsp; 1373 req->cb_arg = ctrlr->qp[0]; 1374 req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC; 1375 req->req.cmd->prop_set_cmd.cid = 0; 1376 req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1; 1377 req->req.cmd->prop_set_cmd.ofst = pos; 1378 if (is_write) { 1379 req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; 1380 if (req->req.cmd->prop_set_cmd.attrib.size) { 1381 req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf; 1382 } else { 1383 req->req.cmd->prop_set_cmd.value.u32.high = 0; 1384 req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf; 1385 } 1386 } else { 1387 req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; 1388 } 1389 req->req.length = count; 1390 req->req.data = buf; 1391 1392 spdk_nvmf_request_exec_fabrics(&req->req); 1393 1394 return count; 1395 } 1396 1397 /* 1398 * NVMe driver reads 4096 bytes, which is the extended PCI configuration space 1399 * available on PCI-X 2.0 and PCI Express buses 1400 */ 1401 static ssize_t 1402 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, 1403 bool is_write) 1404 { 1405 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1406 1407 if (is_write) { 1408 SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n", 1409 endpoint_id(endpoint), offset, offset + count); 1410 errno = EINVAL; 1411 return -1; 1412 } 1413 1414 if (offset + count > PCI_CFG_SPACE_EXP_SIZE) { 1415 SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", 1416 endpoint_id(endpoint), offset, count, 1417 PCI_CFG_SPACE_EXP_SIZE); 1418 errno = ERANGE; 1419 return -1; 1420 } 1421 1422 memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count); 1423 1424 return count; 1425 } 1426 1427 static void 1428 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg) 1429 { 1430 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1431 1432 if (level >= LOG_DEBUG) { 1433 SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1434 } else if (level >= LOG_INFO) { 1435 SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1436 } else if (level >= LOG_NOTICE) { 1437 SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg); 1438 } else if (level >= LOG_WARNING) { 1439 SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg); 1440 } else { 1441 SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg); 1442 } 1443 } 1444 1445 static void 1446 init_pci_config_space(vfu_pci_config_space_t *p) 1447 { 1448 /* MLBAR */ 1449 p->hdr.bars[0].raw = 0x0; 1450 /* MUBAR */ 1451 p->hdr.bars[1].raw = 0x0; 1452 1453 /* vendor specific, let's set them to zero for now */ 1454 p->hdr.bars[3].raw = 0x0; 1455 p->hdr.bars[4].raw = 0x0; 1456 p->hdr.bars[5].raw = 0x0; 1457 1458 /* enable INTx */ 1459 p->hdr.intr.ipin = 0x1; 1460 } 1461 1462 static int 1463 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport, 1464 struct nvmf_vfio_user_endpoint *endpoint) 1465 { 1466 int ret; 1467 ssize_t cap_offset; 1468 vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; 1469 1470 struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 }; 1471 struct pxcap pxcap = { 1472 .hdr.id = PCI_CAP_ID_EXP, 1473 .pxcaps.ver = 0x2, 1474 .pxdcap = {.rer = 0x1, .flrc = 0x1}, 1475 .pxdcap2.ctds = 0x1 1476 }; 1477 1478 struct msixcap msixcap = { 1479 .hdr.id = PCI_CAP_ID_MSIX, 1480 .mxc.ts = NVME_IRQ_MSIX_NUM - 1, 1481 .mtab = {.tbir = 0x4, .to = 0x0}, 1482 .mpba = {.pbir = 0x5, .pbao = 0x0} 1483 }; 1484 1485 static struct iovec sparse_mmap[] = { 1486 { 1487 .iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET, 1488 .iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE, 1489 }, 1490 }; 1491 1492 ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); 1493 if (ret < 0) { 1494 SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx); 1495 return ret; 1496 } 1497 vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0); 1498 /* 1499 * 0x02, controller uses the NVM Express programming interface 1500 * 0x08, non-volatile memory controller 1501 * 0x01, mass storage controller 1502 */ 1503 vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02); 1504 1505 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap); 1506 if (cap_offset < 0) { 1507 SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx); 1508 return ret; 1509 } 1510 1511 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap); 1512 if (cap_offset < 0) { 1513 SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx); 1514 return ret; 1515 } 1516 1517 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap); 1518 if (cap_offset < 0) { 1519 SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx); 1520 return ret; 1521 } 1522 1523 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE, 1524 access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1525 if (ret < 0) { 1526 SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx); 1527 return ret; 1528 } 1529 1530 if (vu_transport->transport_opts.disable_mappable_bar0) { 1531 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1532 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1533 NULL, 0, -1, 0); 1534 } else { 1535 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1536 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1537 sparse_mmap, 1, endpoint->fd, 0); 1538 } 1539 1540 if (ret < 0) { 1541 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx); 1542 return ret; 1543 } 1544 1545 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE, 1546 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1547 if (ret < 0) { 1548 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx); 1549 return ret; 1550 } 1551 1552 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE, 1553 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); 1554 if (ret < 0) { 1555 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx); 1556 return ret; 1557 } 1558 1559 ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb); 1560 if (ret < 0) { 1561 SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx); 1562 return ret; 1563 } 1564 1565 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 1566 if (ret < 0) { 1567 SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx); 1568 return ret; 1569 } 1570 1571 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM); 1572 if (ret < 0) { 1573 SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx); 1574 return ret; 1575 } 1576 1577 ret = vfu_realize_ctx(vfu_ctx); 1578 if (ret < 0) { 1579 SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx); 1580 return ret; 1581 } 1582 1583 endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); 1584 assert(endpoint->pci_config_space != NULL); 1585 init_pci_config_space(endpoint->pci_config_space); 1586 1587 assert(cap_offset != 0); 1588 endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); 1589 1590 return 0; 1591 } 1592 1593 static void 1594 _free_ctrlr(void *ctx) 1595 { 1596 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1597 int i; 1598 1599 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1600 free_qp(ctrlr, i); 1601 } 1602 1603 if (ctrlr->endpoint) { 1604 ctrlr->endpoint->ctrlr = NULL; 1605 } 1606 1607 spdk_poller_unregister(&ctrlr->mmio_poller); 1608 free(ctrlr); 1609 } 1610 1611 static int 1612 free_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 1613 { 1614 assert(ctrlr != NULL); 1615 1616 SPDK_DEBUGLOG(nvmf_vfio, "free %s\n", ctrlr_id(ctrlr)); 1617 1618 if (ctrlr->thread == spdk_get_thread()) { 1619 _free_ctrlr(ctrlr); 1620 } else { 1621 spdk_thread_send_msg(ctrlr->thread, _free_ctrlr, ctrlr); 1622 } 1623 1624 return 0; 1625 } 1626 1627 static void 1628 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport, 1629 struct nvmf_vfio_user_endpoint *endpoint) 1630 { 1631 struct nvmf_vfio_user_ctrlr *ctrlr; 1632 int err; 1633 1634 /* First, construct a vfio-user CUSTOM transport controller */ 1635 ctrlr = calloc(1, sizeof(*ctrlr)); 1636 if (ctrlr == NULL) { 1637 err = -ENOMEM; 1638 goto out; 1639 } 1640 ctrlr->cntlid = 0xffff; 1641 ctrlr->transport = transport; 1642 ctrlr->endpoint = endpoint; 1643 ctrlr->doorbells = endpoint->doorbells; 1644 1645 /* Then, construct an admin queue pair */ 1646 err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0); 1647 if (err != 0) { 1648 goto out; 1649 } 1650 endpoint->ctrlr = ctrlr; 1651 1652 /* Notify the generic layer about the new admin queue pair */ 1653 TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[0], link); 1654 1655 out: 1656 if (err != 0) { 1657 SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n", 1658 endpoint_id(endpoint), strerror(-err)); 1659 if (free_ctrlr(ctrlr) != 0) { 1660 SPDK_ERRLOG("%s: failed to clean up\n", 1661 endpoint_id(endpoint)); 1662 } 1663 } 1664 } 1665 1666 static int 1667 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport, 1668 const struct spdk_nvme_transport_id *trid, 1669 struct spdk_nvmf_listen_opts *listen_opts) 1670 { 1671 struct nvmf_vfio_user_transport *vu_transport; 1672 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 1673 char *path = NULL; 1674 char uuid[PATH_MAX] = {}; 1675 int fd; 1676 int err; 1677 1678 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1679 transport); 1680 1681 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 1682 /* Only compare traddr */ 1683 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 1684 return -EEXIST; 1685 } 1686 } 1687 1688 endpoint = calloc(1, sizeof(*endpoint)); 1689 if (!endpoint) { 1690 return -ENOMEM; 1691 } 1692 1693 endpoint->fd = -1; 1694 memcpy(&endpoint->trid, trid, sizeof(endpoint->trid)); 1695 1696 err = asprintf(&path, "%s/bar0", endpoint_id(endpoint)); 1697 if (err == -1) { 1698 goto out; 1699 } 1700 1701 fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); 1702 if (fd == -1) { 1703 SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n", 1704 endpoint_id(endpoint), path); 1705 err = fd; 1706 free(path); 1707 goto out; 1708 } 1709 free(path); 1710 1711 err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE); 1712 if (err != 0) { 1713 goto out; 1714 } 1715 1716 endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE, 1717 PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET); 1718 if (endpoint->doorbells == MAP_FAILED) { 1719 endpoint->doorbells = NULL; 1720 err = -errno; 1721 goto out; 1722 } 1723 1724 endpoint->fd = fd; 1725 1726 snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint)); 1727 1728 endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB, 1729 endpoint, VFU_DEV_TYPE_PCI); 1730 if (endpoint->vfu_ctx == NULL) { 1731 SPDK_ERRLOG("%s: error creating libmuser context: %m\n", 1732 endpoint_id(endpoint)); 1733 err = -1; 1734 goto out; 1735 } 1736 vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, 1737 SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio") ? LOG_DEBUG : LOG_ERR); 1738 1739 err = vfio_user_dev_info_fill(vu_transport, endpoint); 1740 if (err < 0) { 1741 goto out; 1742 } 1743 1744 pthread_mutex_init(&endpoint->lock, NULL); 1745 TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link); 1746 SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells); 1747 1748 out: 1749 if (err != 0) { 1750 nvmf_vfio_user_destroy_endpoint(endpoint); 1751 } 1752 1753 return err; 1754 } 1755 1756 static void 1757 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport, 1758 const struct spdk_nvme_transport_id *trid) 1759 { 1760 struct nvmf_vfio_user_transport *vu_transport; 1761 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 1762 int err; 1763 1764 assert(trid != NULL); 1765 assert(trid->traddr != NULL); 1766 1767 SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr); 1768 1769 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1770 transport); 1771 1772 pthread_mutex_lock(&vu_transport->lock); 1773 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 1774 if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) { 1775 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 1776 if (endpoint->ctrlr) { 1777 err = free_ctrlr(endpoint->ctrlr); 1778 if (err != 0) { 1779 SPDK_ERRLOG("%s: failed destroy controller: %s\n", 1780 endpoint_id(endpoint), strerror(-err)); 1781 } 1782 } 1783 nvmf_vfio_user_destroy_endpoint(endpoint); 1784 pthread_mutex_unlock(&vu_transport->lock); 1785 1786 return; 1787 } 1788 } 1789 pthread_mutex_unlock(&vu_transport->lock); 1790 1791 SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr); 1792 } 1793 1794 static void 1795 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport, 1796 struct spdk_nvmf_subsystem *subsystem, 1797 struct spdk_nvmf_ctrlr_data *cdata) 1798 { 1799 memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls)); 1800 cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED; 1801 } 1802 1803 static int 1804 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport, 1805 const struct spdk_nvmf_subsystem *subsystem, 1806 const struct spdk_nvme_transport_id *trid) 1807 { 1808 struct nvmf_vfio_user_transport *vu_transport; 1809 struct nvmf_vfio_user_endpoint *endpoint; 1810 1811 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport); 1812 1813 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 1814 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 1815 break; 1816 } 1817 } 1818 1819 if (endpoint == NULL) { 1820 return -ENOENT; 1821 } 1822 1823 endpoint->subsystem = subsystem; 1824 1825 return 0; 1826 } 1827 1828 /* 1829 * Executed periodically. 1830 * 1831 * XXX SPDK thread context. 1832 */ 1833 static uint32_t 1834 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport) 1835 { 1836 int err; 1837 struct nvmf_vfio_user_transport *vu_transport; 1838 struct nvmf_vfio_user_qpair *qp, *tmp_qp; 1839 struct nvmf_vfio_user_endpoint *endpoint; 1840 1841 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1842 transport); 1843 1844 pthread_mutex_lock(&vu_transport->lock); 1845 1846 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 1847 /* try to attach a new controller */ 1848 if (endpoint->ctrlr != NULL) { 1849 continue; 1850 } 1851 1852 err = vfu_attach_ctx(endpoint->vfu_ctx); 1853 if (err != 0) { 1854 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1855 continue; 1856 } 1857 1858 pthread_mutex_unlock(&vu_transport->lock); 1859 return -EFAULT; 1860 } 1861 1862 /* Construct a controller */ 1863 nvmf_vfio_user_create_ctrlr(vu_transport, endpoint); 1864 } 1865 1866 TAILQ_FOREACH_SAFE(qp, &vu_transport->new_qps, link, tmp_qp) { 1867 TAILQ_REMOVE(&vu_transport->new_qps, qp, link); 1868 spdk_nvmf_tgt_new_qpair(transport->tgt, &qp->qpair); 1869 } 1870 1871 pthread_mutex_unlock(&vu_transport->lock); 1872 1873 return 0; 1874 } 1875 1876 static void 1877 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport, 1878 struct spdk_nvme_transport_id *trid, 1879 struct spdk_nvmf_discovery_log_page_entry *entry) 1880 { } 1881 1882 static struct spdk_nvmf_transport_poll_group * 1883 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport) 1884 { 1885 struct nvmf_vfio_user_poll_group *vu_group; 1886 1887 SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n"); 1888 1889 vu_group = calloc(1, sizeof(*vu_group)); 1890 if (vu_group == NULL) { 1891 SPDK_ERRLOG("Error allocating poll group: %m"); 1892 return NULL; 1893 } 1894 1895 TAILQ_INIT(&vu_group->qps); 1896 1897 return &vu_group->group; 1898 } 1899 1900 /* called when process exits */ 1901 static void 1902 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1903 { 1904 struct nvmf_vfio_user_poll_group *vu_group; 1905 1906 SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n"); 1907 1908 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 1909 1910 free(vu_group); 1911 } 1912 1913 static void 1914 vfio_user_qpair_disconnect_cb(void *ctx) 1915 { 1916 struct nvmf_vfio_user_endpoint *endpoint = ctx; 1917 struct nvmf_vfio_user_ctrlr *ctrlr; 1918 1919 pthread_mutex_lock(&endpoint->lock); 1920 ctrlr = endpoint->ctrlr; 1921 if (!ctrlr) { 1922 pthread_mutex_unlock(&endpoint->lock); 1923 return; 1924 } 1925 1926 if (!ctrlr->num_connected_qps) { 1927 free_ctrlr(ctrlr); 1928 pthread_mutex_unlock(&endpoint->lock); 1929 return; 1930 } 1931 pthread_mutex_unlock(&endpoint->lock); 1932 } 1933 1934 static int 1935 vfio_user_destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 1936 { 1937 uint32_t i; 1938 struct nvmf_vfio_user_qpair *qpair; 1939 struct nvmf_vfio_user_endpoint *endpoint; 1940 1941 SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr)); 1942 1943 endpoint = ctrlr->endpoint; 1944 assert(endpoint != NULL); 1945 1946 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1947 qpair = ctrlr->qp[i]; 1948 if (qpair == NULL) { 1949 continue; 1950 } 1951 spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint); 1952 } 1953 1954 return 0; 1955 } 1956 1957 static int 1958 vfio_user_poll_mmio(void *ctx) 1959 { 1960 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1961 int ret; 1962 1963 assert(ctrlr != NULL); 1964 1965 /* This will call access_bar0_fn() if there are any writes 1966 * to the portion of the BAR that is not mmap'd */ 1967 ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx); 1968 if (spdk_unlikely(ret != 0)) { 1969 spdk_poller_unregister(&ctrlr->mmio_poller); 1970 1971 /* initiator shutdown or reset, waiting for another re-connect */ 1972 if (errno == ENOTCONN) { 1973 vfio_user_destroy_ctrlr(ctrlr); 1974 return SPDK_POLLER_BUSY; 1975 } 1976 1977 fail_ctrlr(ctrlr); 1978 } 1979 1980 return SPDK_POLLER_BUSY; 1981 } 1982 1983 static int 1984 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1985 { 1986 struct nvmf_vfio_user_poll_group *vu_group; 1987 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1988 struct nvmf_vfio_user_ctrlr *ctrlr; 1989 struct nvmf_vfio_user_endpoint *endpoint; 1990 1991 assert(qpair != NULL); 1992 assert(req != NULL); 1993 1994 ctrlr = qpair->ctrlr; 1995 endpoint = ctrlr->endpoint; 1996 assert(ctrlr != NULL); 1997 assert(endpoint != NULL); 1998 1999 if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) { 2000 SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct); 2001 free_ctrlr(ctrlr); 2002 return -1; 2003 } 2004 2005 vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group); 2006 TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link); 2007 qpair->state = VFIO_USER_QPAIR_ACTIVE; 2008 2009 pthread_mutex_lock(&endpoint->lock); 2010 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 2011 ctrlr->cntlid = qpair->qpair.ctrlr->cntlid; 2012 ctrlr->thread = spdk_get_thread(); 2013 ctrlr->mmio_poller = SPDK_POLLER_REGISTER(vfio_user_poll_mmio, ctrlr, 0); 2014 } 2015 ctrlr->num_connected_qps++; 2016 pthread_mutex_unlock(&endpoint->lock); 2017 2018 free(req->req.data); 2019 req->req.data = NULL; 2020 2021 return 0; 2022 } 2023 2024 /* 2025 * Called by spdk_nvmf_transport_poll_group_add. 2026 */ 2027 static int 2028 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2029 struct spdk_nvmf_qpair *qpair) 2030 { 2031 struct nvmf_vfio_user_qpair *vu_qpair; 2032 struct nvmf_vfio_user_req *vu_req; 2033 struct nvmf_vfio_user_ctrlr *ctrlr; 2034 struct spdk_nvmf_request *req; 2035 struct spdk_nvmf_fabric_connect_data *data; 2036 bool admin; 2037 2038 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2039 vu_qpair->group = group; 2040 ctrlr = vu_qpair->ctrlr; 2041 2042 SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n", 2043 ctrlr_id(ctrlr), vu_qpair->qpair.qid, 2044 vu_qpair, qpair, group); 2045 2046 admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair); 2047 2048 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2049 if (vu_req == NULL) { 2050 return -1; 2051 } 2052 2053 req = &vu_req->req; 2054 req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; 2055 req->cmd->connect_cmd.cid = 0; 2056 req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; 2057 req->cmd->connect_cmd.recfmt = 0; 2058 req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1; 2059 req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid; 2060 2061 req->length = sizeof(struct spdk_nvmf_fabric_connect_data); 2062 req->data = calloc(1, req->length); 2063 if (req->data == NULL) { 2064 nvmf_vfio_user_req_free(req); 2065 return -ENOMEM; 2066 } 2067 2068 data = (struct spdk_nvmf_fabric_connect_data *)req->data; 2069 data->cntlid = admin ? 0xFFFF : ctrlr->cntlid; 2070 snprintf(data->subnqn, sizeof(data->subnqn), "%s", 2071 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem)); 2072 2073 vu_req->cb_fn = handle_queue_connect_rsp; 2074 vu_req->cb_arg = vu_qpair; 2075 2076 SPDK_DEBUGLOG(nvmf_vfio, 2077 "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n", 2078 ctrlr_id(ctrlr), qpair->qid, data->cntlid); 2079 2080 spdk_nvmf_request_exec_fabrics(req); 2081 return 0; 2082 } 2083 2084 static int 2085 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2086 struct spdk_nvmf_qpair *qpair) 2087 { 2088 struct nvmf_vfio_user_qpair *vu_qpair; 2089 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2090 struct nvmf_vfio_user_endpoint *endpoint; 2091 struct nvmf_vfio_user_poll_group *vu_group; 2092 2093 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2094 vu_ctrlr = vu_qpair->ctrlr; 2095 endpoint = vu_ctrlr->endpoint; 2096 2097 SPDK_DEBUGLOG(nvmf_vfio, 2098 "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n", 2099 ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group); 2100 2101 2102 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2103 TAILQ_REMOVE(&vu_group->qps, vu_qpair, link); 2104 2105 pthread_mutex_lock(&endpoint->lock); 2106 assert(vu_ctrlr->num_connected_qps); 2107 vu_ctrlr->num_connected_qps--; 2108 pthread_mutex_unlock(&endpoint->lock); 2109 2110 return 0; 2111 } 2112 2113 static void 2114 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req) 2115 { 2116 memset(&vu_req->cmd, 0, sizeof(vu_req->cmd)); 2117 memset(&vu_req->rsp, 0, sizeof(vu_req->rsp)); 2118 vu_req->iovcnt = 0; 2119 vu_req->state = VFIO_USER_REQUEST_STATE_FREE; 2120 2121 TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link); 2122 } 2123 2124 static int 2125 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req) 2126 { 2127 struct nvmf_vfio_user_qpair *vu_qpair; 2128 struct nvmf_vfio_user_req *vu_req; 2129 2130 assert(req != NULL); 2131 2132 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2133 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2134 2135 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2136 2137 return 0; 2138 } 2139 2140 static int 2141 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req) 2142 { 2143 struct nvmf_vfio_user_qpair *vu_qpair; 2144 struct nvmf_vfio_user_req *vu_req; 2145 2146 assert(req != NULL); 2147 2148 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2149 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2150 2151 if (vu_req->cb_fn != NULL) { 2152 if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) { 2153 fail_ctrlr(vu_qpair->ctrlr); 2154 } 2155 } 2156 2157 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2158 2159 return 0; 2160 } 2161 2162 static void 2163 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair, 2164 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2165 { 2166 struct nvmf_vfio_user_qpair *vu_qpair; 2167 2168 assert(qpair != NULL); 2169 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2170 free_qp(vu_qpair->ctrlr, qpair->qid); 2171 2172 if (cb_fn) { 2173 cb_fn(cb_arg); 2174 } 2175 } 2176 2177 /** 2178 * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available. 2179 */ 2180 static struct nvmf_vfio_user_req * 2181 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair) 2182 { 2183 struct nvmf_vfio_user_req *req; 2184 2185 assert(qpair != NULL); 2186 2187 if (TAILQ_EMPTY(&qpair->reqs)) { 2188 return NULL; 2189 } 2190 2191 req = TAILQ_FIRST(&qpair->reqs); 2192 TAILQ_REMOVE(&qpair->reqs, req, link); 2193 2194 return req; 2195 } 2196 2197 static struct spdk_nvmf_request * 2198 get_nvmf_req(struct nvmf_vfio_user_qpair *qpair) 2199 { 2200 struct nvmf_vfio_user_req *req = get_nvmf_vfio_user_req(qpair); 2201 2202 if (req == NULL) { 2203 return NULL; 2204 } 2205 return &req->req; 2206 } 2207 2208 static int 2209 get_nvmf_io_req_length(struct spdk_nvmf_request *req) 2210 { 2211 uint16_t nlb, nr; 2212 uint32_t nsid; 2213 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2214 struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; 2215 struct spdk_nvmf_ns *ns; 2216 2217 nsid = cmd->nsid; 2218 ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); 2219 if (ns == NULL || ns->bdev == NULL) { 2220 SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid); 2221 return -EINVAL; 2222 } 2223 2224 if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { 2225 nr = cmd->cdw10_bits.dsm.nr + 1; 2226 return nr * sizeof(struct spdk_nvme_dsm_range); 2227 } 2228 2229 nlb = (cmd->cdw12 & 0x0000ffffu) + 1; 2230 return nlb * spdk_bdev_get_block_size(ns->bdev); 2231 } 2232 2233 static int 2234 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2235 { 2236 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2237 uint32_t len = 0; 2238 int iovcnt; 2239 2240 req->xfer = cmd->opc & 0x3; 2241 req->length = 0; 2242 req->data = NULL; 2243 2244 switch (cmd->opc) { 2245 case SPDK_NVME_OPC_IDENTIFY: 2246 len = 4096; /* TODO: there should be a define somewhere for this */ 2247 break; 2248 case SPDK_NVME_OPC_GET_LOG_PAGE: 2249 len = (cmd->cdw10_bits.get_log_page.numdl + 1) * 4; 2250 break; 2251 } 2252 2253 if (!cmd->dptr.prp.prp1 || !len) { 2254 return 0; 2255 } 2256 /* ADMIN command will not use SGL */ 2257 assert(req->cmd->nvme_cmd.psdt == 0); 2258 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len); 2259 if (iovcnt < 0) { 2260 SPDK_ERRLOG("%s: map Admin Opc %x failed\n", 2261 ctrlr_id(ctrlr), cmd->opc); 2262 return -1; 2263 } 2264 2265 req->length = len; 2266 req->data = req->iov[0].iov_base; 2267 2268 return 0; 2269 } 2270 2271 /* 2272 * Handles an I/O command. 2273 * 2274 * Returns 0 on success and -errno on failure. Sets @submit on whether or not 2275 * the request must be forwarded to NVMf. 2276 */ 2277 static int 2278 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2279 { 2280 int err = 0; 2281 struct spdk_nvme_cmd *cmd; 2282 2283 assert(ctrlr != NULL); 2284 assert(req != NULL); 2285 2286 cmd = &req->cmd->nvme_cmd; 2287 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2288 2289 if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) { 2290 return 0; 2291 } 2292 2293 err = get_nvmf_io_req_length(req); 2294 if (err < 0) { 2295 return -EINVAL; 2296 } 2297 2298 req->length = err; 2299 err = vfio_user_map_cmd(ctrlr, req, req->iov, req->length); 2300 if (err < 0) { 2301 SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc); 2302 return -EFAULT; 2303 } 2304 2305 req->data = req->iov[0].iov_base; 2306 req->iovcnt = err; 2307 2308 return 0; 2309 } 2310 2311 static int 2312 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 2313 struct spdk_nvmf_request *req) 2314 { 2315 int err; 2316 struct nvmf_vfio_user_req *vu_req; 2317 2318 assert(ctrlr != NULL); 2319 assert(cmd != NULL); 2320 2321 /* 2322 * TODO: this means that there are no free requests available, 2323 * returning -1 will fail the controller. Theoretically this error can 2324 * be avoided completely by ensuring we have as many requests as slots 2325 * in the SQ, plus one for the the property request. 2326 */ 2327 if (spdk_unlikely(req == NULL)) { 2328 return -1; 2329 } 2330 2331 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2332 vu_req->cb_fn = handle_cmd_rsp; 2333 vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2334 req->cmd->nvme_cmd = *cmd; 2335 if (nvmf_qpair_is_admin_queue(req->qpair)) { 2336 err = map_admin_cmd_req(ctrlr, req); 2337 } else { 2338 err = map_io_cmd_req(ctrlr, req); 2339 } 2340 2341 if (spdk_unlikely(err < 0)) { 2342 SPDK_ERRLOG("%s: map NVMe command opc 0x%x failed\n", 2343 ctrlr_id(ctrlr), cmd->opc); 2344 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2345 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2346 return handle_cmd_rsp(vu_req, vu_req->cb_arg); 2347 } 2348 2349 vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING; 2350 spdk_nvmf_request_exec(req); 2351 2352 return 0; 2353 } 2354 2355 static void 2356 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair) 2357 { 2358 struct nvmf_vfio_user_ctrlr *ctrlr; 2359 uint32_t new_tail; 2360 2361 assert(qpair != NULL); 2362 2363 ctrlr = qpair->ctrlr; 2364 2365 new_tail = *tdbl(ctrlr, &qpair->sq); 2366 if (sq_head(qpair) != new_tail) { 2367 int err = handle_sq_tdbl_write(ctrlr, new_tail, qpair); 2368 if (err != 0) { 2369 fail_ctrlr(ctrlr); 2370 return; 2371 } 2372 } 2373 } 2374 2375 /* 2376 * Called unconditionally, periodically, very frequently from SPDK to ask 2377 * whether there's work to be done. This function consumes requests generated 2378 * from read/write_bar0 by setting ctrlr->prop_req.dir. read_bar0, and 2379 * occasionally write_bar0 -- though this may change, synchronously wait. This 2380 * function also consumes requests by looking at the doorbells. 2381 */ 2382 static int 2383 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2384 { 2385 struct nvmf_vfio_user_poll_group *vu_group; 2386 struct nvmf_vfio_user_qpair *vu_qpair, *tmp; 2387 2388 assert(group != NULL); 2389 2390 spdk_rmb(); 2391 2392 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2393 2394 TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) { 2395 if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) { 2396 continue; 2397 } 2398 nvmf_vfio_user_qpair_poll(vu_qpair); 2399 } 2400 2401 return 0; 2402 } 2403 2404 static int 2405 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2406 struct spdk_nvme_transport_id *trid) 2407 { 2408 struct nvmf_vfio_user_qpair *vu_qpair; 2409 struct nvmf_vfio_user_ctrlr *ctrlr; 2410 2411 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2412 ctrlr = vu_qpair->ctrlr; 2413 2414 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2415 return 0; 2416 } 2417 2418 static int 2419 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2420 struct spdk_nvme_transport_id *trid) 2421 { 2422 return 0; 2423 } 2424 2425 static int 2426 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2427 struct spdk_nvme_transport_id *trid) 2428 { 2429 struct nvmf_vfio_user_qpair *vu_qpair; 2430 struct nvmf_vfio_user_ctrlr *ctrlr; 2431 2432 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2433 ctrlr = vu_qpair->ctrlr; 2434 2435 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2436 return 0; 2437 } 2438 2439 static void 2440 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2441 struct spdk_nvmf_request *req) 2442 { 2443 struct nvmf_vfio_user_qpair *vu_qpair; 2444 struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL; 2445 uint16_t i, cid; 2446 2447 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2448 2449 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2450 for (i = 0; i < vu_qpair->qsize; i++) { 2451 vu_req = &vu_qpair->reqs_internal[i]; 2452 if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) { 2453 vu_req_to_abort = vu_req; 2454 break; 2455 } 2456 } 2457 2458 if (vu_req_to_abort == NULL) { 2459 spdk_nvmf_request_complete(req); 2460 return; 2461 } 2462 2463 req->req_to_abort = &vu_req_to_abort->req; 2464 nvmf_ctrlr_abort_request(req); 2465 } 2466 2467 static void 2468 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts) 2469 { 2470 opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH; 2471 opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2472 opts->in_capsule_data_size = NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE; 2473 opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE; 2474 opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE; 2475 opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH; 2476 opts->num_shared_buffers = NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS; 2477 opts->buf_cache_size = NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE; 2478 opts->transport_specific = NULL; 2479 } 2480 2481 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = { 2482 .name = "VFIOUSER", 2483 .type = SPDK_NVME_TRANSPORT_VFIOUSER, 2484 .opts_init = nvmf_vfio_user_opts_init, 2485 .create = nvmf_vfio_user_create, 2486 .destroy = nvmf_vfio_user_destroy, 2487 2488 .listen = nvmf_vfio_user_listen, 2489 .stop_listen = nvmf_vfio_user_stop_listen, 2490 .accept = nvmf_vfio_user_accept, 2491 .cdata_init = nvmf_vfio_user_cdata_init, 2492 .listen_associate = nvmf_vfio_user_listen_associate, 2493 2494 .listener_discover = nvmf_vfio_user_discover, 2495 2496 .poll_group_create = nvmf_vfio_user_poll_group_create, 2497 .poll_group_destroy = nvmf_vfio_user_poll_group_destroy, 2498 .poll_group_add = nvmf_vfio_user_poll_group_add, 2499 .poll_group_remove = nvmf_vfio_user_poll_group_remove, 2500 .poll_group_poll = nvmf_vfio_user_poll_group_poll, 2501 2502 .req_free = nvmf_vfio_user_req_free, 2503 .req_complete = nvmf_vfio_user_req_complete, 2504 2505 .qpair_fini = nvmf_vfio_user_close_qpair, 2506 .qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid, 2507 .qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid, 2508 .qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid, 2509 .qpair_abort_request = nvmf_vfio_user_qpair_abort_request, 2510 }; 2511 2512 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user); 2513 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio) 2514