1 /*- 2 * BSD LICENSE 3 * Copyright (c) Intel Corporation. All rights reserved. 4 * Copyright (c) 2019, Nutanix Inc. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * NVMe over vfio-user transport 35 */ 36 37 #include <vfio-user/libvfio-user.h> 38 #include <vfio-user/pci_defs.h> 39 40 #include "spdk/barrier.h" 41 #include "spdk/stdinc.h" 42 #include "spdk/assert.h" 43 #include "spdk/thread.h" 44 #include "spdk/nvmf_transport.h" 45 #include "spdk/sock.h" 46 #include "spdk/string.h" 47 #include "spdk/util.h" 48 #include "spdk/log.h" 49 50 #include "transport.h" 51 52 #include "nvmf_internal.h" 53 54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256 55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32 56 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64 57 #define NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE 0 58 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB) 59 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 60 #define NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS 512 /* internal buf size */ 61 #define NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE 0 62 63 #define NVMF_VFIO_USER_DOORBELLS_OFFSET 0x1000 64 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000 65 66 #define NVME_REG_CFG_SIZE 0x1000 67 #define NVME_REG_BAR0_SIZE 0x4000 68 #define NVME_IRQ_INTX_NUM 1 69 #define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 70 71 struct nvmf_vfio_user_req; 72 struct nvmf_vfio_user_qpair; 73 74 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg); 75 76 /* 1 more for PRP2 list itself */ 77 #define NVMF_VFIO_USER_MAX_IOVECS (NVMF_REQ_MAX_BUFFERS + 1) 78 79 enum nvmf_vfio_user_req_state { 80 VFIO_USER_REQUEST_STATE_FREE = 0, 81 VFIO_USER_REQUEST_STATE_EXECUTING, 82 }; 83 84 struct nvmf_vfio_user_req { 85 struct spdk_nvmf_request req; 86 struct spdk_nvme_cpl rsp; 87 struct spdk_nvme_cmd cmd; 88 89 enum nvmf_vfio_user_req_state state; 90 nvmf_vfio_user_req_cb_fn cb_fn; 91 void *cb_arg; 92 93 /* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */ 94 dma_sg_t sg[NVMF_VFIO_USER_MAX_IOVECS]; 95 struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS]; 96 uint8_t iovcnt; 97 98 TAILQ_ENTRY(nvmf_vfio_user_req) link; 99 }; 100 101 /* 102 * A NVMe queue. 103 */ 104 struct nvme_q { 105 bool is_cq; 106 107 void *addr; 108 109 dma_sg_t sg; 110 struct iovec iov; 111 112 uint32_t size; 113 uint64_t prp1; 114 115 union { 116 struct { 117 uint32_t head; 118 /* multiple SQs can be mapped to the same CQ */ 119 uint16_t cqid; 120 }; 121 struct { 122 uint32_t tail; 123 uint16_t iv; 124 bool ien; 125 }; 126 }; 127 }; 128 129 enum nvmf_vfio_user_qpair_state { 130 VFIO_USER_QPAIR_UNINITIALIZED = 0, 131 VFIO_USER_QPAIR_ACTIVE, 132 VFIO_USER_QPAIR_DELETED, 133 VFIO_USER_QPAIR_INACTIVE, 134 VFIO_USER_QPAIR_ERROR, 135 }; 136 137 struct nvmf_vfio_user_qpair { 138 struct spdk_nvmf_qpair qpair; 139 struct spdk_nvmf_transport_poll_group *group; 140 struct nvmf_vfio_user_ctrlr *ctrlr; 141 struct nvmf_vfio_user_req *reqs_internal; 142 uint16_t qsize; 143 struct nvme_q cq; 144 struct nvme_q sq; 145 enum nvmf_vfio_user_qpair_state state; 146 147 TAILQ_HEAD(, nvmf_vfio_user_req) reqs; 148 TAILQ_ENTRY(nvmf_vfio_user_qpair) link; 149 }; 150 151 struct nvmf_vfio_user_poll_group { 152 struct spdk_nvmf_transport_poll_group group; 153 TAILQ_HEAD(, nvmf_vfio_user_qpair) qps; 154 }; 155 156 struct nvmf_vfio_user_ctrlr { 157 struct nvmf_vfio_user_endpoint *endpoint; 158 struct nvmf_vfio_user_transport *transport; 159 160 /* True when the socket connection is active */ 161 bool ready; 162 /* Number of connected queue pairs */ 163 uint32_t num_connected_qps; 164 165 struct spdk_thread *thread; 166 struct spdk_poller *mmio_poller; 167 168 uint16_t cntlid; 169 170 struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR]; 171 172 TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link; 173 174 volatile uint32_t *doorbells; 175 176 /* internal CSTS.CFS register for vfio-user fatal errors */ 177 uint32_t cfs : 1; 178 }; 179 180 struct nvmf_vfio_user_endpoint { 181 vfu_ctx_t *vfu_ctx; 182 struct msixcap *msix; 183 vfu_pci_config_space_t *pci_config_space; 184 int fd; 185 volatile uint32_t *doorbells; 186 187 struct spdk_nvme_transport_id trid; 188 const struct spdk_nvmf_subsystem *subsystem; 189 190 struct nvmf_vfio_user_ctrlr *ctrlr; 191 pthread_mutex_t lock; 192 193 TAILQ_ENTRY(nvmf_vfio_user_endpoint) link; 194 }; 195 196 struct nvmf_vfio_user_transport_opts { 197 bool disable_mappable_bar0; 198 }; 199 200 struct nvmf_vfio_user_transport { 201 struct spdk_nvmf_transport transport; 202 struct nvmf_vfio_user_transport_opts transport_opts; 203 pthread_mutex_t lock; 204 TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints; 205 206 TAILQ_HEAD(, nvmf_vfio_user_qpair) new_qps; 207 }; 208 209 /* 210 * function prototypes 211 */ 212 static volatile uint32_t * 213 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 214 215 static volatile uint32_t * 216 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q); 217 218 static int 219 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req); 220 221 static struct nvmf_vfio_user_req * 222 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair); 223 224 static int 225 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 226 struct nvme_q *cq, uint32_t cdw0, uint16_t sc, 227 uint16_t sct); 228 229 static char * 230 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint) 231 { 232 return endpoint->trid.traddr; 233 } 234 235 static char * 236 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr) 237 { 238 if (!ctrlr || !ctrlr->endpoint) { 239 return "Null Ctrlr"; 240 } 241 242 return endpoint_id(ctrlr->endpoint); 243 } 244 245 static uint16_t 246 io_q_id(struct nvme_q *q) 247 { 248 249 struct nvmf_vfio_user_qpair *vfio_user_qpair; 250 251 assert(q); 252 253 if (q->is_cq) { 254 vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq); 255 } else { 256 vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq); 257 } 258 assert(vfio_user_qpair); 259 return vfio_user_qpair->qpair.qid; 260 } 261 262 static void 263 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 264 { 265 assert(ctrlr != NULL); 266 267 if (ctrlr->cfs == 0) { 268 SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr)); 269 } 270 271 ctrlr->ready = false; 272 ctrlr->cfs = 1U; 273 } 274 275 static bool 276 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *ctrlr) 277 { 278 assert(ctrlr != NULL); 279 assert(ctrlr->endpoint != NULL); 280 281 vfu_pci_config_space_t *pci = ctrlr->endpoint->pci_config_space; 282 283 return (!pci->hdr.cmd.id || ctrlr->endpoint->msix->mxc.mxe); 284 } 285 286 static void 287 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint) 288 { 289 if (endpoint->doorbells) { 290 munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE); 291 } 292 293 if (endpoint->fd > 0) { 294 close(endpoint->fd); 295 } 296 297 vfu_destroy_ctx(endpoint->vfu_ctx); 298 299 pthread_mutex_destroy(&endpoint->lock); 300 free(endpoint); 301 } 302 303 /* called when process exits */ 304 static int 305 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport, 306 spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg) 307 { 308 struct nvmf_vfio_user_transport *vu_transport; 309 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 310 311 SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n"); 312 313 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 314 transport); 315 316 (void)pthread_mutex_destroy(&vu_transport->lock); 317 318 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 319 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 320 nvmf_vfio_user_destroy_endpoint(endpoint); 321 } 322 323 free(vu_transport); 324 325 if (cb_fn) { 326 cb_fn(cb_arg); 327 } 328 329 return 0; 330 } 331 332 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = { 333 { 334 "disable-mappable-bar0", 335 offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0), 336 spdk_json_decode_bool, true 337 }, 338 }; 339 340 static struct spdk_nvmf_transport * 341 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts) 342 { 343 struct nvmf_vfio_user_transport *vu_transport; 344 int err; 345 346 vu_transport = calloc(1, sizeof(*vu_transport)); 347 if (vu_transport == NULL) { 348 SPDK_ERRLOG("Transport alloc fail: %m\n"); 349 return NULL; 350 } 351 352 err = pthread_mutex_init(&vu_transport->lock, NULL); 353 if (err != 0) { 354 SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err); 355 goto err; 356 } 357 358 TAILQ_INIT(&vu_transport->endpoints); 359 TAILQ_INIT(&vu_transport->new_qps); 360 361 if (opts->transport_specific != NULL && 362 spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder, 363 SPDK_COUNTOF(vfio_user_transport_opts_decoder), 364 vu_transport)) { 365 SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n"); 366 free(vu_transport); 367 return NULL; 368 } 369 370 SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n", 371 vu_transport->transport_opts.disable_mappable_bar0); 372 373 return &vu_transport->transport; 374 375 err: 376 free(vu_transport); 377 378 return NULL; 379 } 380 381 static uint16_t 382 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr) 383 { 384 assert(ctrlr != NULL); 385 assert(ctrlr->qp[0] != NULL); 386 assert(ctrlr->qp[0]->qpair.ctrlr != NULL); 387 388 return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1; 389 } 390 391 static void * 392 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov) 393 { 394 int ret; 395 396 assert(ctx != NULL); 397 assert(sg != NULL); 398 assert(iov != NULL); 399 400 ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, PROT_READ | PROT_WRITE); 401 if (ret != 1) { 402 return NULL; 403 } 404 405 ret = vfu_map_sg(ctx, sg, iov, 1); 406 if (ret != 0) { 407 return NULL; 408 } 409 410 assert(iov->iov_base != NULL); 411 return iov->iov_base; 412 } 413 414 static uint32_t 415 sq_head(struct nvmf_vfio_user_qpair *qpair) 416 { 417 assert(qpair != NULL); 418 return qpair->sq.head; 419 } 420 421 static void 422 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair) 423 { 424 assert(ctrlr != NULL); 425 assert(qpair != NULL); 426 qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size; 427 } 428 429 static void 430 insert_queue(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q, 431 const bool is_cq, const uint16_t id) 432 { 433 struct nvme_q *_q; 434 struct nvmf_vfio_user_qpair *qpair; 435 436 assert(ctrlr != NULL); 437 assert(q != NULL); 438 439 qpair = ctrlr->qp[id]; 440 441 q->is_cq = is_cq; 442 if (is_cq) { 443 _q = &qpair->cq; 444 *_q = *q; 445 *hdbl(ctrlr, _q) = 0; 446 } else { 447 _q = &qpair->sq; 448 *_q = *q; 449 *tdbl(ctrlr, _q) = 0; 450 } 451 } 452 453 static int 454 asq_map(struct nvmf_vfio_user_ctrlr *ctrlr) 455 { 456 struct nvme_q q = {}; 457 const struct spdk_nvmf_registers *regs; 458 459 assert(ctrlr != NULL); 460 assert(ctrlr->qp[0] != NULL); 461 assert(ctrlr->qp[0]->sq.addr == NULL); 462 /* XXX ctrlr->asq == 0 is a valid memory address */ 463 464 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 465 q.size = regs->aqa.bits.asqs + 1; 466 q.head = ctrlr->doorbells[0] = 0; 467 q.cqid = 0; 468 q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->asq, 469 q.size * sizeof(struct spdk_nvme_cmd), &q.sg, &q.iov); 470 if (q.addr == NULL) { 471 return -1; 472 } 473 memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cmd)); 474 insert_queue(ctrlr, &q, false, 0); 475 476 return 0; 477 } 478 479 static uint16_t 480 cq_next(struct nvme_q *q) 481 { 482 assert(q != NULL); 483 assert(q->is_cq); 484 return (q->tail + 1) % q->size; 485 } 486 487 static int 488 queue_index(uint16_t qid, int is_cq) 489 { 490 return (qid * 2) + is_cq; 491 } 492 493 static volatile uint32_t * 494 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 495 { 496 assert(ctrlr != NULL); 497 assert(q != NULL); 498 assert(!q->is_cq); 499 500 return &ctrlr->doorbells[queue_index(io_q_id(q), false)]; 501 } 502 503 static volatile uint32_t * 504 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 505 { 506 assert(ctrlr != NULL); 507 assert(q != NULL); 508 assert(q->is_cq); 509 510 return &ctrlr->doorbells[queue_index(io_q_id(q), true)]; 511 } 512 513 static bool 514 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q) 515 { 516 assert(ctrlr != NULL); 517 assert(q != NULL); 518 return cq_next(q) == *hdbl(ctrlr, q); 519 } 520 521 static void 522 cq_tail_advance(struct nvme_q *q) 523 { 524 assert(q != NULL); 525 q->tail = cq_next(q); 526 } 527 528 static int 529 acq_map(struct nvmf_vfio_user_ctrlr *ctrlr) 530 { 531 struct nvme_q q = {}; 532 const struct spdk_nvmf_registers *regs; 533 534 assert(ctrlr != NULL); 535 assert(ctrlr->qp[0] != NULL); 536 assert(ctrlr->qp[0]->cq.addr == NULL); 537 538 regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr); 539 assert(regs != NULL); 540 541 q.size = regs->aqa.bits.acqs + 1; 542 q.tail = 0; 543 q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->acq, 544 q.size * sizeof(struct spdk_nvme_cpl), &q.sg, &q.iov); 545 if (q.addr == NULL) { 546 return -1; 547 } 548 memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cpl)); 549 q.is_cq = true; 550 q.ien = true; 551 insert_queue(ctrlr, &q, true, 0); 552 553 return 0; 554 } 555 556 static void * 557 _map_one(void *prv, uint64_t addr, uint64_t len) 558 { 559 struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv; 560 struct spdk_nvmf_qpair *qpair; 561 struct nvmf_vfio_user_req *vu_req; 562 struct nvmf_vfio_user_qpair *vu_qpair; 563 void *ret; 564 565 assert(req != NULL); 566 qpair = req->qpair; 567 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 568 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 569 570 assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS); 571 ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len, 572 &vu_req->sg[vu_req->iovcnt], 573 &vu_req->iov[vu_req->iovcnt]); 574 if (spdk_likely(ret != NULL)) { 575 vu_req->iovcnt++; 576 } 577 return ret; 578 } 579 580 static int 581 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req, 582 struct iovec *iov, uint32_t length) 583 { 584 /* Map PRP list to from Guest physical memory to 585 * virtual memory address. 586 */ 587 return spdk_nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS, 588 length, 4096, _map_one); 589 } 590 591 static struct spdk_nvmf_request * 592 get_nvmf_req(struct nvmf_vfio_user_qpair *qp); 593 594 static int 595 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 596 struct spdk_nvmf_request *req); 597 598 /* 599 * Posts a CQE in the completion queue. 600 * 601 * @ctrlr: the vfio-user controller 602 * @cmd: the NVMe command for which the completion is posted 603 * @cq: the completion queue 604 * @cdw0: cdw0 as reported by NVMf (only for SPDK_NVME_OPC_GET/SET_FEATURES) 605 * @sc: the NVMe CQE status code 606 * @sct: the NVMe CQE status code type 607 */ 608 static int 609 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 610 struct nvme_q *cq, uint32_t cdw0, uint16_t sc, 611 uint16_t sct) 612 { 613 struct spdk_nvme_cpl *cpl; 614 uint16_t qid; 615 int err; 616 617 assert(ctrlr != NULL); 618 assert(cmd != NULL); 619 620 qid = io_q_id(cq); 621 622 if (ctrlr->qp[0]->qpair.ctrlr->vcprop.csts.bits.shst != SPDK_NVME_SHST_NORMAL) { 623 SPDK_DEBUGLOG(nvmf_vfio, 624 "%s: ignore completion SQ%d cid=%d status=%#x\n", 625 ctrlr_id(ctrlr), qid, cmd->cid, sc); 626 return 0; 627 } 628 629 if (cq_is_full(ctrlr, cq)) { 630 SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n", 631 ctrlr_id(ctrlr), qid, cq->tail, *hdbl(ctrlr, cq)); 632 return -1; 633 } 634 635 cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail; 636 637 SPDK_DEBUGLOG(nvmf_vfio, 638 "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n", 639 ctrlr_id(ctrlr), qid, cmd->cid, sc, ctrlr->qp[qid]->sq.head, 640 cq->tail); 641 642 if (qid == 0) { 643 switch (cmd->opc) { 644 case SPDK_NVME_OPC_SET_FEATURES: 645 case SPDK_NVME_OPC_GET_FEATURES: 646 cpl->cdw0 = cdw0; 647 break; 648 } 649 } 650 651 652 assert(ctrlr->qp[qid] != NULL); 653 654 cpl->sqhd = ctrlr->qp[qid]->sq.head; 655 cpl->cid = cmd->cid; 656 cpl->status.dnr = 0x0; 657 cpl->status.m = 0x0; 658 cpl->status.sct = sct; 659 cpl->status.p = ~cpl->status.p; 660 cpl->status.sc = sc; 661 662 cq_tail_advance(cq); 663 664 /* 665 * this function now executes at SPDK thread context, we 666 * might be triggerring interrupts from vfio-user thread context so 667 * check for race conditions. 668 */ 669 if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) { 670 err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv); 671 if (err != 0) { 672 SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n", 673 ctrlr_id(ctrlr)); 674 return err; 675 } 676 } 677 678 return 0; 679 } 680 681 static struct nvme_q * 682 lookup_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, const uint16_t qid, const bool is_cq) 683 { 684 struct nvme_q *q; 685 686 assert(ctrlr != NULL); 687 688 if (qid > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 689 return NULL; 690 } 691 692 if (ctrlr->qp[qid] == NULL) { 693 return NULL; 694 } 695 696 if (is_cq) { 697 q = &ctrlr->qp[qid]->cq; 698 } else { 699 q = &ctrlr->qp[qid]->sq; 700 } 701 702 if (q->addr == NULL) { 703 return NULL; 704 } 705 706 return q; 707 } 708 709 static void 710 unmap_qp(struct nvmf_vfio_user_qpair *qp) 711 { 712 struct nvmf_vfio_user_ctrlr *ctrlr; 713 714 if (qp->ctrlr == NULL) { 715 return; 716 } 717 ctrlr = qp->ctrlr; 718 719 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy I/O QP%d\n", 720 ctrlr_id(ctrlr), qp->qpair.qid); 721 722 if (qp->sq.addr != NULL) { 723 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->sq.sg, &qp->sq.iov, 1); 724 qp->sq.addr = NULL; 725 } 726 727 if (qp->cq.addr != NULL) { 728 vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->cq.sg, &qp->cq.iov, 1); 729 qp->cq.addr = NULL; 730 } 731 } 732 733 /* 734 * TODO we can immediately remove the QP from the list because this function 735 * is now executed by the SPDK thread. 736 */ 737 static void 738 destroy_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid) 739 { 740 struct nvmf_vfio_user_qpair *qpair; 741 742 if (ctrlr == NULL) { 743 return; 744 } 745 746 qpair = ctrlr->qp[qid]; 747 if (qpair == NULL) { 748 return; 749 } 750 751 SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr), 752 qid, qpair); 753 754 unmap_qp(qpair); 755 free(qpair->reqs_internal); 756 free(qpair); 757 ctrlr->qp[qid] = NULL; 758 } 759 760 /* This function can only fail because of memory allocation errors. */ 761 static int 762 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport, 763 const uint16_t qsize, const uint16_t id) 764 { 765 int err = 0, i; 766 struct nvmf_vfio_user_qpair *qpair; 767 struct nvmf_vfio_user_req *vu_req; 768 struct spdk_nvmf_request *req; 769 770 assert(ctrlr != NULL); 771 assert(transport != NULL); 772 773 qpair = calloc(1, sizeof(*qpair)); 774 if (qpair == NULL) { 775 return -ENOMEM; 776 } 777 778 qpair->qpair.qid = id; 779 qpair->qpair.transport = transport; 780 qpair->ctrlr = ctrlr; 781 qpair->qsize = qsize; 782 783 TAILQ_INIT(&qpair->reqs); 784 785 qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req)); 786 if (qpair->reqs_internal == NULL) { 787 SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr)); 788 err = -ENOMEM; 789 goto out; 790 } 791 792 for (i = 0; i < qsize; i++) { 793 vu_req = &qpair->reqs_internal[i]; 794 req = &vu_req->req; 795 796 req->qpair = &qpair->qpair; 797 req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp; 798 req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd; 799 800 TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link); 801 } 802 ctrlr->qp[id] = qpair; 803 out: 804 if (err != 0) { 805 free(qpair); 806 } 807 return err; 808 } 809 810 /* 811 * Creates a completion or sumbission I/O queue. Returns 0 on success, -errno 812 * on error. 813 * 814 * XXX SPDK thread context. 815 */ 816 static int 817 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 818 struct spdk_nvme_cmd *cmd, const bool is_cq) 819 { 820 size_t entry_size; 821 uint16_t sc = SPDK_NVME_SC_SUCCESS; 822 uint16_t sct = SPDK_NVME_SCT_GENERIC; 823 int err = 0; 824 struct nvme_q io_q = {}; 825 826 assert(ctrlr != NULL); 827 assert(cmd != NULL); 828 829 SPDK_DEBUGLOG(nvmf_vfio, 830 "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr), 831 is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid, 832 cmd->cdw10_bits.create_io_q.qsize); 833 834 if (cmd->cdw10_bits.create_io_q.qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) { 835 SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr), 836 cmd->cdw10_bits.create_io_q.qid, 837 NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR); 838 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 839 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 840 goto out; 841 } 842 843 if (lookup_io_q(ctrlr, cmd->cdw10_bits.create_io_q.qid, is_cq)) { 844 SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr), 845 is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid); 846 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 847 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 848 goto out; 849 } 850 851 /* TODO break rest of this function into smaller functions */ 852 if (is_cq) { 853 entry_size = sizeof(struct spdk_nvme_cpl); 854 if (cmd->cdw11_bits.create_io_cq.pc != 0x1) { 855 /* 856 * TODO CAP.CMBS is currently set to zero, however we 857 * should zero it out explicitly when CAP is read. 858 * Support for CAP.CMBS is not mentioned in the NVMf 859 * spec. 860 */ 861 SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr)); 862 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 863 goto out; 864 } 865 io_q.ien = cmd->cdw11_bits.create_io_cq.ien; 866 io_q.iv = cmd->cdw11_bits.create_io_cq.iv; 867 } else { 868 /* CQ must be created before SQ */ 869 if (!lookup_io_q(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) { 870 SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr), 871 cmd->cdw11_bits.create_io_sq.cqid); 872 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 873 sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID; 874 goto out; 875 } 876 877 entry_size = sizeof(struct spdk_nvme_cmd); 878 if (cmd->cdw11_bits.create_io_sq.pc != 0x1) { 879 SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr)); 880 sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF; 881 goto out; 882 } 883 884 io_q.cqid = cmd->cdw11_bits.create_io_sq.cqid; 885 SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr), 886 cmd->cdw10_bits.create_io_q.qid, io_q.cqid); 887 } 888 889 io_q.size = cmd->cdw10_bits.create_io_q.qsize + 1; 890 if (io_q.size > max_queue_size(ctrlr)) { 891 SPDK_ERRLOG("%s: queue too big, want=%d, max=%d\n", ctrlr_id(ctrlr), 892 io_q.size, max_queue_size(ctrlr)); 893 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 894 sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE; 895 goto out; 896 } 897 898 io_q.addr = map_one(ctrlr->endpoint->vfu_ctx, cmd->dptr.prp.prp1, 899 io_q.size * entry_size, &io_q.sg, &io_q.iov); 900 if (io_q.addr == NULL) { 901 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 902 SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr)); 903 goto out; 904 } 905 io_q.prp1 = cmd->dptr.prp.prp1; 906 memset(io_q.addr, 0, io_q.size * entry_size); 907 908 SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n", 909 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 910 cmd->cdw10_bits.create_io_q.qid, cmd->dptr.prp.prp1, 911 (unsigned long long)io_q.addr); 912 913 if (is_cq) { 914 err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, io_q.size, 915 cmd->cdw10_bits.create_io_q.qid); 916 if (err != 0) { 917 sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 918 goto out; 919 } 920 } else { 921 /* 922 * After we've returned from the nvmf_vfio_user_poll_group_poll thread, once 923 * nvmf_vfio_user_accept executes it will pick up this QP and will eventually 924 * call nvmf_vfio_user_poll_group_add. The rest of the opertion needed to 925 * complete the addition of the queue will be continued at the 926 * completion callback. 927 */ 928 TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[cmd->cdw10_bits.create_io_q.qid], link); 929 930 } 931 insert_queue(ctrlr, &io_q, is_cq, cmd->cdw10_bits.create_io_q.qid); 932 933 out: 934 return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct); 935 } 936 937 /* 938 * Deletes a completion or sumbission I/O queue. 939 */ 940 static int 941 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, 942 struct spdk_nvme_cmd *cmd, const bool is_cq) 943 { 944 uint16_t sct = SPDK_NVME_SCT_GENERIC; 945 uint16_t sc = SPDK_NVME_SC_SUCCESS; 946 947 SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n", 948 ctrlr_id(ctrlr), is_cq ? 'C' : 'S', 949 cmd->cdw10_bits.delete_io_q.qid); 950 951 if (lookup_io_q(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq) == NULL) { 952 SPDK_ERRLOG("%s: %cQ%d does not exist\n", ctrlr_id(ctrlr), 953 is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid); 954 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 955 sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; 956 goto out; 957 } 958 959 if (is_cq) { 960 /* SQ must have been deleted first */ 961 if (ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state != VFIO_USER_QPAIR_DELETED) { 962 SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr)); 963 sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; 964 sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION; 965 goto out; 966 } 967 } else { 968 /* 969 * This doesn't actually delete the I/O queue, we can't 970 * do that anyway because NVMf doesn't support it. We're merely 971 * telling the poll_group_poll function to skip checking this 972 * queue. The only workflow this works is when CC.EN is set to 973 * 0 and we're stopping the subsystem, so we know that the 974 * relevant callbacks to destroy the queues will be called. 975 */ 976 assert(ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state == VFIO_USER_QPAIR_ACTIVE); 977 ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state = VFIO_USER_QPAIR_DELETED; 978 } 979 980 out: 981 return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct); 982 } 983 984 /* 985 * Returns 0 on success and -errno on error. 986 * 987 * XXX SPDK thread context 988 */ 989 static int 990 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd) 991 { 992 assert(ctrlr != NULL); 993 assert(cmd != NULL); 994 995 SPDK_DEBUGLOG(nvmf_vfio, "%s: handle admin req opc=%#x cid=%d\n", 996 ctrlr_id(ctrlr), cmd->opc, cmd->cid); 997 998 switch (cmd->opc) { 999 case SPDK_NVME_OPC_CREATE_IO_CQ: 1000 case SPDK_NVME_OPC_CREATE_IO_SQ: 1001 return handle_create_io_q(ctrlr, cmd, 1002 cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ); 1003 case SPDK_NVME_OPC_DELETE_IO_SQ: 1004 case SPDK_NVME_OPC_DELETE_IO_CQ: 1005 return handle_del_io_q(ctrlr, cmd, 1006 cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ); 1007 default: 1008 return handle_cmd_req(ctrlr, cmd, get_nvmf_req(ctrlr->qp[0])); 1009 } 1010 } 1011 1012 static int 1013 handle_cmd_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1014 { 1015 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1016 1017 assert(qpair != NULL); 1018 assert(req != NULL); 1019 1020 vfu_unmap_sg(qpair->ctrlr->endpoint->vfu_ctx, req->sg, req->iov, req->iovcnt); 1021 1022 return post_completion(qpair->ctrlr, &req->req.cmd->nvme_cmd, 1023 &qpair->ctrlr->qp[req->req.qpair->qid]->cq, 1024 req->req.rsp->nvme_cpl.cdw0, 1025 req->req.rsp->nvme_cpl.status.sc, 1026 req->req.rsp->nvme_cpl.status.sct); 1027 } 1028 1029 static int 1030 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair, 1031 struct spdk_nvme_cmd *cmd) 1032 { 1033 assert(qpair != NULL); 1034 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1035 return consume_admin_cmd(ctrlr, cmd); 1036 } 1037 1038 return handle_cmd_req(ctrlr, cmd, get_nvmf_req(qpair)); 1039 } 1040 1041 static ssize_t 1042 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail, 1043 struct nvmf_vfio_user_qpair *qpair) 1044 { 1045 struct spdk_nvme_cmd *queue; 1046 1047 assert(ctrlr != NULL); 1048 assert(qpair != NULL); 1049 1050 queue = qpair->sq.addr; 1051 while (sq_head(qpair) != new_tail) { 1052 int err; 1053 struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)]; 1054 1055 /* 1056 * SQHD must contain the new head pointer, so we must increase 1057 * it before we generate a completion. 1058 */ 1059 sqhd_advance(ctrlr, qpair); 1060 1061 err = consume_cmd(ctrlr, qpair, cmd); 1062 if (err != 0) { 1063 return err; 1064 } 1065 } 1066 1067 return 0; 1068 } 1069 1070 static int 1071 map_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1072 { 1073 int err; 1074 1075 assert(ctrlr != NULL); 1076 1077 err = acq_map(ctrlr); 1078 if (err != 0) { 1079 return err; 1080 } 1081 1082 err = asq_map(ctrlr); 1083 if (err != 0) { 1084 return err; 1085 } 1086 1087 return 0; 1088 } 1089 1090 static void 1091 unmap_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr) 1092 { 1093 assert(ctrlr->qp[0] != NULL); 1094 1095 unmap_qp(ctrlr->qp[0]); 1096 } 1097 1098 static void 1099 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1100 { 1101 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1102 struct nvmf_vfio_user_ctrlr *ctrlr; 1103 struct nvmf_vfio_user_qpair *qpair; 1104 int i, ret; 1105 1106 /* 1107 * We're not interested in any DMA regions that aren't mappable (we don't 1108 * support clients that don't share their memory). 1109 */ 1110 if (!info->vaddr) { 1111 return; 1112 } 1113 1114 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1115 (info->mapping.iov_len & MASK_2MB)) { 1116 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1117 (uintptr_t)info->mapping.iov_base, 1118 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1119 return; 1120 } 1121 1122 assert(endpoint != NULL); 1123 if (endpoint->ctrlr == NULL) { 1124 return; 1125 } 1126 ctrlr = endpoint->ctrlr; 1127 1128 SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1129 (uintptr_t)info->mapping.iov_base, 1130 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1131 1132 /* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also 1133 * check the protection bits before registering. 1134 */ 1135 if ((info->prot == (PROT_WRITE | PROT_READ)) && 1136 (spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len))) { 1137 SPDK_ERRLOG("Memory region register %#lx-%#lx failed\n", 1138 (uint64_t)(uintptr_t)info->mapping.iov_base, 1139 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1140 } 1141 1142 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1143 qpair = ctrlr->qp[i]; 1144 if (qpair == NULL) { 1145 continue; 1146 } 1147 1148 if (qpair->state != VFIO_USER_QPAIR_INACTIVE) { 1149 continue; 1150 } 1151 1152 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 1153 ret = map_admin_queue(ctrlr); 1154 if (ret) { 1155 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap Admin queue\n"); 1156 continue; 1157 } 1158 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1159 } else { 1160 struct nvme_q *sq = &qpair->sq; 1161 struct nvme_q *cq = &qpair->cq; 1162 1163 sq->addr = map_one(ctrlr->endpoint->vfu_ctx, sq->prp1, sq->size * 64, &sq->sg, &sq->iov); 1164 if (!sq->addr) { 1165 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n", 1166 i, sq->prp1, sq->prp1 + sq->size * 64); 1167 continue; 1168 } 1169 cq->addr = map_one(ctrlr->endpoint->vfu_ctx, cq->prp1, cq->size * 16, &cq->sg, &cq->iov); 1170 if (!cq->addr) { 1171 SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n", 1172 i, cq->prp1, cq->prp1 + cq->size * 16); 1173 continue; 1174 } 1175 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1176 } 1177 } 1178 } 1179 1180 static int 1181 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) 1182 { 1183 1184 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1185 struct nvmf_vfio_user_ctrlr *ctrlr; 1186 struct nvmf_vfio_user_qpair *qpair; 1187 void *map_start, *map_end; 1188 int i; 1189 1190 if (!info->vaddr) { 1191 return 0; 1192 } 1193 1194 if (((uintptr_t)info->mapping.iov_base & MASK_2MB) || 1195 (info->mapping.iov_len & MASK_2MB)) { 1196 SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr, 1197 (uintptr_t)info->mapping.iov_base, 1198 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1199 return 0; 1200 } 1201 1202 assert(endpoint != NULL); 1203 if (endpoint->ctrlr == NULL) { 1204 return 0; 1205 } 1206 ctrlr = endpoint->ctrlr; 1207 1208 SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr), 1209 (uintptr_t)info->mapping.iov_base, 1210 (uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1211 1212 if ((info->prot == (PROT_WRITE | PROT_READ)) && 1213 (spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len))) { 1214 SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed\n", 1215 (uint64_t)(uintptr_t)info->mapping.iov_base, 1216 (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len); 1217 } 1218 1219 map_start = info->mapping.iov_base; 1220 map_end = info->mapping.iov_base + info->mapping.iov_len; 1221 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1222 qpair = ctrlr->qp[i]; 1223 if (qpair == NULL) { 1224 continue; 1225 } 1226 1227 if ((qpair->cq.addr >= map_start && qpair->cq.addr < map_end) || 1228 (qpair->sq.addr >= map_start && qpair->sq.addr < map_end)) { 1229 unmap_qp(qpair); 1230 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1231 } 1232 } 1233 1234 return 0; 1235 } 1236 1237 static int 1238 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1239 { 1240 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1241 int ret; 1242 1243 assert(qpair != NULL); 1244 assert(req != NULL); 1245 1246 if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) { 1247 assert(qpair->ctrlr != NULL); 1248 assert(req != NULL); 1249 1250 memcpy(req->req.data, 1251 &req->req.rsp->prop_get_rsp.value.u64, 1252 req->req.length); 1253 } else { 1254 assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET); 1255 assert(qpair->ctrlr != NULL); 1256 1257 if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) { 1258 union spdk_nvme_cc_register *cc; 1259 1260 cc = (union spdk_nvme_cc_register *)&req->req.cmd->prop_set_cmd.value.u64; 1261 1262 if (cc->bits.en == 1 && cc->bits.shn == 0) { 1263 SPDK_DEBUGLOG(nvmf_vfio, 1264 "%s: MAP Admin queue\n", 1265 ctrlr_id(qpair->ctrlr)); 1266 ret = map_admin_queue(qpair->ctrlr); 1267 if (ret) { 1268 SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(qpair->ctrlr)); 1269 return ret; 1270 } 1271 qpair->state = VFIO_USER_QPAIR_ACTIVE; 1272 } else if ((cc->bits.en == 0 && cc->bits.shn == 0) || 1273 (cc->bits.en == 1 && cc->bits.shn != 0)) { 1274 SPDK_DEBUGLOG(nvmf_vfio, 1275 "%s: UNMAP Admin queue\n", 1276 ctrlr_id(qpair->ctrlr)); 1277 unmap_admin_queue(qpair->ctrlr); 1278 qpair->state = VFIO_USER_QPAIR_INACTIVE; 1279 } 1280 } 1281 } 1282 1283 return 0; 1284 } 1285 1286 /* 1287 * XXX Do NOT remove, see comment in access_bar0_fn. 1288 * 1289 * Handles a write at offset 0x1000 or more. 1290 * 1291 * DSTRD is set to fixed value 0 for NVMf. 1292 * 1293 */ 1294 static int 1295 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf, 1296 const size_t count, loff_t pos, const bool is_write) 1297 { 1298 assert(ctrlr != NULL); 1299 assert(buf != NULL); 1300 1301 if (count != sizeof(uint32_t)) { 1302 SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n", 1303 ctrlr_id(ctrlr), count); 1304 errno = EINVAL; 1305 return -1; 1306 } 1307 1308 pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET; 1309 1310 /* pos must be dword aligned */ 1311 if ((pos & 0x3) != 0) { 1312 SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos); 1313 errno = EINVAL; 1314 return -1; 1315 } 1316 1317 /* convert byte offset to array index */ 1318 pos >>= 2; 1319 1320 if (pos > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) { 1321 /* 1322 * TODO: need to emit a "Write to Invalid Doorbell Register" 1323 * asynchronous event 1324 */ 1325 SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos); 1326 errno = EINVAL; 1327 return -1; 1328 } 1329 1330 if (is_write) { 1331 ctrlr->doorbells[pos] = *buf; 1332 spdk_wmb(); 1333 } else { 1334 spdk_rmb(); 1335 *buf = ctrlr->doorbells[pos]; 1336 } 1337 return 0; 1338 } 1339 1340 static ssize_t 1341 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, 1342 bool is_write) 1343 { 1344 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1345 struct nvmf_vfio_user_ctrlr *ctrlr; 1346 struct nvmf_vfio_user_req *req; 1347 int ret; 1348 1349 ctrlr = endpoint->ctrlr; 1350 1351 SPDK_DEBUGLOG(nvmf_vfio, 1352 "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n", 1353 endpoint_id(endpoint), is_write ? "write" : "read", 1354 ctrlr, count, pos); 1355 1356 if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) { 1357 /* 1358 * The fact that the doorbells can be memory mapped doesn't mean 1359 * that the client (VFIO in QEMU) is obliged to memory map them, 1360 * it might still elect to access them via regular read/write; 1361 * we might also have had disable_mappable_bar0 set. 1362 */ 1363 ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count, 1364 pos, is_write); 1365 if (ret == 0) { 1366 return count; 1367 } 1368 assert(errno != 0); 1369 return ret; 1370 } 1371 1372 /* Construct a Fabric Property Get/Set command and send it */ 1373 req = get_nvmf_vfio_user_req(ctrlr->qp[0]); 1374 if (req == NULL) { 1375 errno = ENOBUFS; 1376 return -1; 1377 } 1378 1379 req->cb_fn = nvmf_vfio_user_prop_req_rsp; 1380 req->cb_arg = ctrlr->qp[0]; 1381 req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC; 1382 req->req.cmd->prop_set_cmd.cid = 0; 1383 req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1; 1384 req->req.cmd->prop_set_cmd.ofst = pos; 1385 if (is_write) { 1386 req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; 1387 if (req->req.cmd->prop_set_cmd.attrib.size) { 1388 req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf; 1389 } else { 1390 req->req.cmd->prop_set_cmd.value.u32.high = 0; 1391 req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf; 1392 } 1393 } else { 1394 req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; 1395 } 1396 req->req.length = count; 1397 req->req.data = buf; 1398 1399 spdk_nvmf_request_exec_fabrics(&req->req); 1400 1401 return count; 1402 } 1403 1404 /* 1405 * NVMe driver reads 4096 bytes, which is the extended PCI configuration space 1406 * available on PCI-X 2.0 and PCI Express buses 1407 */ 1408 static ssize_t 1409 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, 1410 bool is_write) 1411 { 1412 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1413 1414 if (is_write) { 1415 SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n", 1416 endpoint_id(endpoint), offset, offset + count); 1417 errno = EINVAL; 1418 return -1; 1419 } 1420 1421 if (offset + count > PCI_CFG_SPACE_EXP_SIZE) { 1422 SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n", 1423 endpoint_id(endpoint), offset, count, 1424 PCI_CFG_SPACE_EXP_SIZE); 1425 errno = ERANGE; 1426 return -1; 1427 } 1428 1429 memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count); 1430 1431 return count; 1432 } 1433 1434 static void 1435 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg) 1436 { 1437 struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx); 1438 1439 if (level >= LOG_DEBUG) { 1440 SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1441 } else if (level >= LOG_INFO) { 1442 SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg); 1443 } else if (level >= LOG_NOTICE) { 1444 SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg); 1445 } else if (level >= LOG_WARNING) { 1446 SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg); 1447 } else { 1448 SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg); 1449 } 1450 } 1451 1452 static void 1453 init_pci_config_space(vfu_pci_config_space_t *p) 1454 { 1455 /* MLBAR */ 1456 p->hdr.bars[0].raw = 0x0; 1457 /* MUBAR */ 1458 p->hdr.bars[1].raw = 0x0; 1459 1460 /* vendor specific, let's set them to zero for now */ 1461 p->hdr.bars[3].raw = 0x0; 1462 p->hdr.bars[4].raw = 0x0; 1463 p->hdr.bars[5].raw = 0x0; 1464 1465 /* enable INTx */ 1466 p->hdr.intr.ipin = 0x1; 1467 } 1468 1469 static int 1470 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport, 1471 struct nvmf_vfio_user_endpoint *endpoint) 1472 { 1473 int ret; 1474 ssize_t cap_offset; 1475 vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx; 1476 1477 struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 }; 1478 struct pxcap pxcap = { 1479 .hdr.id = PCI_CAP_ID_EXP, 1480 .pxcaps.ver = 0x2, 1481 .pxdcap = {.per = 0x1, .flrc = 0x1}, 1482 .pxdcap2.ctds = 0x1 1483 }; 1484 1485 struct msixcap msixcap = { 1486 .hdr.id = PCI_CAP_ID_MSIX, 1487 .mxc.ts = NVME_IRQ_MSIX_NUM - 1, 1488 .mtab = {.tbir = 0x4, .to = 0x0}, 1489 .mpba = {.pbir = 0x5, .pbao = 0x0} 1490 }; 1491 1492 static struct iovec sparse_mmap[] = { 1493 { 1494 .iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET, 1495 .iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE, 1496 }, 1497 }; 1498 1499 ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0); 1500 if (ret < 0) { 1501 SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx); 1502 return ret; 1503 } 1504 vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0); 1505 /* 1506 * 0x02, controller uses the NVM Express programming interface 1507 * 0x08, non-volatile memory controller 1508 * 0x01, mass storage controller 1509 */ 1510 vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02); 1511 1512 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap); 1513 if (cap_offset < 0) { 1514 SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx); 1515 return ret; 1516 } 1517 1518 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap); 1519 if (cap_offset < 0) { 1520 SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx); 1521 return ret; 1522 } 1523 1524 cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap); 1525 if (cap_offset < 0) { 1526 SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx); 1527 return ret; 1528 } 1529 1530 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE, 1531 access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1); 1532 if (ret < 0) { 1533 SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx); 1534 return ret; 1535 } 1536 1537 if (vu_transport->transport_opts.disable_mappable_bar0) { 1538 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1539 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1540 NULL, 0, -1); 1541 } else { 1542 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE, 1543 access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, 1544 sparse_mmap, 1, endpoint->fd); 1545 } 1546 1547 if (ret < 0) { 1548 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx); 1549 return ret; 1550 } 1551 1552 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE, 1553 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1); 1554 if (ret < 0) { 1555 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx); 1556 return ret; 1557 } 1558 1559 ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE, 1560 NULL, VFU_REGION_FLAG_RW, NULL, 0, -1); 1561 if (ret < 0) { 1562 SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx); 1563 return ret; 1564 } 1565 1566 ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb); 1567 if (ret < 0) { 1568 SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx); 1569 return ret; 1570 } 1571 1572 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); 1573 if (ret < 0) { 1574 SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx); 1575 return ret; 1576 } 1577 1578 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM); 1579 if (ret < 0) { 1580 SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx); 1581 return ret; 1582 } 1583 1584 ret = vfu_realize_ctx(vfu_ctx); 1585 if (ret < 0) { 1586 SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx); 1587 return ret; 1588 } 1589 1590 endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx); 1591 assert(endpoint->pci_config_space != NULL); 1592 init_pci_config_space(endpoint->pci_config_space); 1593 1594 assert(cap_offset != 0); 1595 endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset); 1596 1597 return 0; 1598 } 1599 1600 static void 1601 _destroy_ctrlr(void *ctx) 1602 { 1603 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1604 int i; 1605 1606 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1607 destroy_qp(ctrlr, i); 1608 } 1609 1610 if (ctrlr->endpoint) { 1611 ctrlr->endpoint->ctrlr = NULL; 1612 } 1613 1614 spdk_poller_unregister(&ctrlr->mmio_poller); 1615 free(ctrlr); 1616 } 1617 1618 static int 1619 destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 1620 { 1621 assert(ctrlr != NULL); 1622 1623 SPDK_DEBUGLOG(nvmf_vfio, "destroy %s\n", ctrlr_id(ctrlr)); 1624 1625 if (ctrlr->thread == spdk_get_thread()) { 1626 _destroy_ctrlr(ctrlr); 1627 } else { 1628 spdk_thread_send_msg(ctrlr->thread, _destroy_ctrlr, ctrlr); 1629 } 1630 1631 return 0; 1632 } 1633 1634 static void 1635 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport, 1636 struct nvmf_vfio_user_endpoint *endpoint) 1637 { 1638 struct nvmf_vfio_user_ctrlr *ctrlr; 1639 int err; 1640 1641 /* First, construct a vfio-user CUSTOM transport controller */ 1642 ctrlr = calloc(1, sizeof(*ctrlr)); 1643 if (ctrlr == NULL) { 1644 err = -ENOMEM; 1645 goto out; 1646 } 1647 ctrlr->cntlid = 0xffff; 1648 ctrlr->transport = transport; 1649 ctrlr->endpoint = endpoint; 1650 ctrlr->doorbells = endpoint->doorbells; 1651 1652 /* Then, construct an admin queue pair */ 1653 err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0); 1654 if (err != 0) { 1655 goto out; 1656 } 1657 endpoint->ctrlr = ctrlr; 1658 ctrlr->ready = true; 1659 1660 /* Notify the generic layer about the new admin queue pair */ 1661 TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[0], link); 1662 1663 out: 1664 if (err != 0) { 1665 SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n", 1666 endpoint_id(endpoint), strerror(-err)); 1667 if (destroy_ctrlr(ctrlr) != 0) { 1668 SPDK_ERRLOG("%s: failed to clean up\n", 1669 endpoint_id(endpoint)); 1670 } 1671 } 1672 } 1673 1674 static int 1675 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport, 1676 const struct spdk_nvme_transport_id *trid, 1677 struct spdk_nvmf_listen_opts *listen_opts) 1678 { 1679 struct nvmf_vfio_user_transport *vu_transport; 1680 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 1681 char *path = NULL; 1682 char uuid[PATH_MAX] = {}; 1683 int fd; 1684 int err; 1685 1686 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1687 transport); 1688 1689 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 1690 /* Only compare traddr */ 1691 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 1692 return -EEXIST; 1693 } 1694 } 1695 1696 endpoint = calloc(1, sizeof(*endpoint)); 1697 if (!endpoint) { 1698 return -ENOMEM; 1699 } 1700 1701 endpoint->fd = -1; 1702 memcpy(&endpoint->trid, trid, sizeof(endpoint->trid)); 1703 1704 err = asprintf(&path, "%s/bar0", endpoint_id(endpoint)); 1705 if (err == -1) { 1706 goto out; 1707 } 1708 1709 fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); 1710 if (fd == -1) { 1711 SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n", 1712 endpoint_id(endpoint), path); 1713 err = fd; 1714 free(path); 1715 goto out; 1716 } 1717 free(path); 1718 1719 err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE); 1720 if (err != 0) { 1721 goto out; 1722 } 1723 1724 endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE, 1725 PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET); 1726 if (endpoint->doorbells == MAP_FAILED) { 1727 endpoint->doorbells = NULL; 1728 err = -errno; 1729 goto out; 1730 } 1731 1732 endpoint->fd = fd; 1733 1734 snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint)); 1735 1736 endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB, 1737 endpoint, VFU_DEV_TYPE_PCI); 1738 if (endpoint->vfu_ctx == NULL) { 1739 SPDK_ERRLOG("%s: error creating libmuser context: %m\n", 1740 endpoint_id(endpoint)); 1741 err = -1; 1742 goto out; 1743 } 1744 vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, 1745 SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio") ? LOG_DEBUG : LOG_ERR); 1746 1747 err = vfio_user_dev_info_fill(vu_transport, endpoint); 1748 if (err < 0) { 1749 goto out; 1750 } 1751 1752 pthread_mutex_init(&endpoint->lock, NULL); 1753 TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link); 1754 SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells); 1755 1756 out: 1757 if (err != 0) { 1758 nvmf_vfio_user_destroy_endpoint(endpoint); 1759 } 1760 1761 return err; 1762 } 1763 1764 static void 1765 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport, 1766 const struct spdk_nvme_transport_id *trid) 1767 { 1768 struct nvmf_vfio_user_transport *vu_transport; 1769 struct nvmf_vfio_user_endpoint *endpoint, *tmp; 1770 int err; 1771 1772 assert(trid != NULL); 1773 assert(trid->traddr != NULL); 1774 1775 SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr); 1776 1777 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1778 transport); 1779 1780 pthread_mutex_lock(&vu_transport->lock); 1781 TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) { 1782 if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) { 1783 TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link); 1784 if (endpoint->ctrlr) { 1785 err = destroy_ctrlr(endpoint->ctrlr); 1786 if (err != 0) { 1787 SPDK_ERRLOG("%s: failed destroy controller: %s\n", 1788 endpoint_id(endpoint), strerror(-err)); 1789 } 1790 } 1791 nvmf_vfio_user_destroy_endpoint(endpoint); 1792 pthread_mutex_unlock(&vu_transport->lock); 1793 1794 return; 1795 } 1796 } 1797 pthread_mutex_unlock(&vu_transport->lock); 1798 1799 SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr); 1800 } 1801 1802 static void 1803 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport, 1804 struct spdk_nvmf_subsystem *subsystem, 1805 struct spdk_nvmf_ctrlr_data *cdata) 1806 { 1807 memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls)); 1808 cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED; 1809 } 1810 1811 static int 1812 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport, 1813 const struct spdk_nvmf_subsystem *subsystem, 1814 const struct spdk_nvme_transport_id *trid) 1815 { 1816 struct nvmf_vfio_user_transport *vu_transport; 1817 struct nvmf_vfio_user_endpoint *endpoint; 1818 1819 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport); 1820 1821 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 1822 if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) { 1823 break; 1824 } 1825 } 1826 1827 if (endpoint == NULL) { 1828 return -ENOENT; 1829 } 1830 1831 endpoint->subsystem = subsystem; 1832 1833 return 0; 1834 } 1835 1836 /* 1837 * Executed periodically. 1838 * 1839 * XXX SPDK thread context. 1840 */ 1841 static uint32_t 1842 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport) 1843 { 1844 int err; 1845 struct nvmf_vfio_user_transport *vu_transport; 1846 struct nvmf_vfio_user_qpair *qp, *tmp_qp; 1847 struct nvmf_vfio_user_endpoint *endpoint; 1848 1849 vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, 1850 transport); 1851 1852 pthread_mutex_lock(&vu_transport->lock); 1853 1854 TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) { 1855 /* we need try to attach the controller again after reset or shutdown */ 1856 if (endpoint->ctrlr != NULL && endpoint->ctrlr->ready) { 1857 continue; 1858 } 1859 1860 err = vfu_attach_ctx(endpoint->vfu_ctx); 1861 if (err != 0) { 1862 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1863 continue; 1864 } 1865 1866 pthread_mutex_unlock(&vu_transport->lock); 1867 return -EFAULT; 1868 } 1869 1870 /* Construct a controller */ 1871 nvmf_vfio_user_create_ctrlr(vu_transport, endpoint); 1872 } 1873 1874 TAILQ_FOREACH_SAFE(qp, &vu_transport->new_qps, link, tmp_qp) { 1875 TAILQ_REMOVE(&vu_transport->new_qps, qp, link); 1876 spdk_nvmf_tgt_new_qpair(transport->tgt, &qp->qpair); 1877 } 1878 1879 pthread_mutex_unlock(&vu_transport->lock); 1880 1881 return 0; 1882 } 1883 1884 static void 1885 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport, 1886 struct spdk_nvme_transport_id *trid, 1887 struct spdk_nvmf_discovery_log_page_entry *entry) 1888 { } 1889 1890 static struct spdk_nvmf_transport_poll_group * 1891 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport) 1892 { 1893 struct nvmf_vfio_user_poll_group *vu_group; 1894 1895 SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n"); 1896 1897 vu_group = calloc(1, sizeof(*vu_group)); 1898 if (vu_group == NULL) { 1899 SPDK_ERRLOG("Error allocating poll group: %m"); 1900 return NULL; 1901 } 1902 1903 TAILQ_INIT(&vu_group->qps); 1904 1905 return &vu_group->group; 1906 } 1907 1908 /* called when process exits */ 1909 static void 1910 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) 1911 { 1912 struct nvmf_vfio_user_poll_group *vu_group; 1913 1914 SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n"); 1915 1916 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 1917 1918 free(vu_group); 1919 } 1920 1921 static void 1922 vfio_user_qpair_disconnect_cb(void *ctx) 1923 { 1924 struct nvmf_vfio_user_endpoint *endpoint = ctx; 1925 struct nvmf_vfio_user_ctrlr *ctrlr; 1926 1927 pthread_mutex_lock(&endpoint->lock); 1928 ctrlr = endpoint->ctrlr; 1929 if (!ctrlr) { 1930 pthread_mutex_unlock(&endpoint->lock); 1931 return; 1932 } 1933 1934 if (!ctrlr->num_connected_qps) { 1935 destroy_ctrlr(ctrlr); 1936 pthread_mutex_unlock(&endpoint->lock); 1937 return; 1938 } 1939 pthread_mutex_unlock(&endpoint->lock); 1940 } 1941 1942 static int 1943 vfio_user_stop_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr) 1944 { 1945 uint32_t i; 1946 struct nvmf_vfio_user_qpair *qpair; 1947 struct nvmf_vfio_user_endpoint *endpoint; 1948 1949 SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr)); 1950 1951 ctrlr->ready = false; 1952 endpoint = ctrlr->endpoint; 1953 assert(endpoint != NULL); 1954 1955 for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) { 1956 qpair = ctrlr->qp[i]; 1957 if (qpair == NULL) { 1958 continue; 1959 } 1960 spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint); 1961 } 1962 1963 return 0; 1964 } 1965 1966 static int 1967 vfio_user_poll_mmio(void *ctx) 1968 { 1969 struct nvmf_vfio_user_ctrlr *ctrlr = ctx; 1970 int ret; 1971 1972 assert(ctrlr != NULL); 1973 1974 /* This will call access_bar0_fn() if there are any writes 1975 * to the portion of the BAR that is not mmap'd */ 1976 ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx); 1977 if (spdk_unlikely(ret != 0)) { 1978 spdk_poller_unregister(&ctrlr->mmio_poller); 1979 1980 /* initiator shutdown or reset, waiting for another re-connect */ 1981 if (errno == ENOTCONN) { 1982 vfio_user_stop_ctrlr(ctrlr); 1983 return SPDK_POLLER_BUSY; 1984 } 1985 1986 fail_ctrlr(ctrlr); 1987 } 1988 1989 return SPDK_POLLER_BUSY; 1990 } 1991 1992 static int 1993 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg) 1994 { 1995 struct nvmf_vfio_user_poll_group *vu_group; 1996 struct nvmf_vfio_user_qpair *qpair = cb_arg; 1997 struct nvmf_vfio_user_ctrlr *ctrlr; 1998 struct nvmf_vfio_user_endpoint *endpoint; 1999 2000 assert(qpair != NULL); 2001 assert(req != NULL); 2002 2003 ctrlr = qpair->ctrlr; 2004 endpoint = ctrlr->endpoint; 2005 assert(ctrlr != NULL); 2006 assert(endpoint != NULL); 2007 2008 if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) { 2009 SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct); 2010 destroy_qp(ctrlr, qpair->qpair.qid); 2011 destroy_ctrlr(ctrlr); 2012 return -1; 2013 } 2014 2015 vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group); 2016 TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link); 2017 qpair->state = VFIO_USER_QPAIR_ACTIVE; 2018 2019 pthread_mutex_lock(&endpoint->lock); 2020 if (nvmf_qpair_is_admin_queue(&qpair->qpair)) { 2021 ctrlr->cntlid = qpair->qpair.ctrlr->cntlid; 2022 ctrlr->thread = spdk_get_thread(); 2023 ctrlr->mmio_poller = SPDK_POLLER_REGISTER(vfio_user_poll_mmio, ctrlr, 0); 2024 } 2025 ctrlr->num_connected_qps++; 2026 pthread_mutex_unlock(&endpoint->lock); 2027 2028 free(req->req.data); 2029 req->req.data = NULL; 2030 2031 return 0; 2032 } 2033 2034 /* 2035 * Called by spdk_nvmf_transport_poll_group_add. 2036 */ 2037 static int 2038 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group, 2039 struct spdk_nvmf_qpair *qpair) 2040 { 2041 struct nvmf_vfio_user_qpair *vu_qpair; 2042 struct nvmf_vfio_user_req *vu_req; 2043 struct nvmf_vfio_user_ctrlr *ctrlr; 2044 struct spdk_nvmf_request *req; 2045 struct spdk_nvmf_fabric_connect_data *data; 2046 bool admin; 2047 2048 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2049 vu_qpair->group = group; 2050 ctrlr = vu_qpair->ctrlr; 2051 2052 SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n", 2053 ctrlr_id(ctrlr), vu_qpair->qpair.qid, 2054 vu_qpair, qpair, group); 2055 2056 admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair); 2057 2058 vu_req = get_nvmf_vfio_user_req(vu_qpair); 2059 if (vu_req == NULL) { 2060 return -1; 2061 } 2062 2063 req = &vu_req->req; 2064 req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC; 2065 req->cmd->connect_cmd.cid = 0; 2066 req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; 2067 req->cmd->connect_cmd.recfmt = 0; 2068 req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1; 2069 req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid; 2070 2071 req->length = sizeof(struct spdk_nvmf_fabric_connect_data); 2072 req->data = calloc(1, req->length); 2073 if (req->data == NULL) { 2074 nvmf_vfio_user_req_free(req); 2075 return -ENOMEM; 2076 } 2077 2078 data = (struct spdk_nvmf_fabric_connect_data *)req->data; 2079 data->cntlid = admin ? 0xFFFF : ctrlr->cntlid; 2080 snprintf(data->subnqn, sizeof(data->subnqn), "%s", 2081 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem)); 2082 2083 vu_req->cb_fn = handle_queue_connect_rsp; 2084 vu_req->cb_arg = vu_qpair; 2085 2086 SPDK_DEBUGLOG(nvmf_vfio, 2087 "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n", 2088 ctrlr_id(ctrlr), qpair->qid, data->cntlid); 2089 2090 spdk_nvmf_request_exec_fabrics(req); 2091 return 0; 2092 } 2093 2094 static int 2095 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group, 2096 struct spdk_nvmf_qpair *qpair) 2097 { 2098 struct nvmf_vfio_user_qpair *vu_qpair; 2099 struct nvmf_vfio_user_ctrlr *vu_ctrlr; 2100 struct nvmf_vfio_user_endpoint *endpoint; 2101 struct nvmf_vfio_user_poll_group *vu_group; 2102 2103 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2104 vu_ctrlr = vu_qpair->ctrlr; 2105 endpoint = vu_ctrlr->endpoint; 2106 2107 SPDK_DEBUGLOG(nvmf_vfio, 2108 "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n", 2109 ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group); 2110 2111 2112 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2113 TAILQ_REMOVE(&vu_group->qps, vu_qpair, link); 2114 2115 pthread_mutex_lock(&endpoint->lock); 2116 assert(vu_ctrlr->num_connected_qps); 2117 vu_ctrlr->num_connected_qps--; 2118 pthread_mutex_unlock(&endpoint->lock); 2119 2120 return 0; 2121 } 2122 2123 static void 2124 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req) 2125 { 2126 memset(&vu_req->cmd, 0, sizeof(vu_req->cmd)); 2127 memset(&vu_req->rsp, 0, sizeof(vu_req->rsp)); 2128 vu_req->iovcnt = 0; 2129 vu_req->state = VFIO_USER_REQUEST_STATE_FREE; 2130 2131 TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link); 2132 } 2133 2134 static int 2135 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req) 2136 { 2137 struct nvmf_vfio_user_qpair *vu_qpair; 2138 struct nvmf_vfio_user_req *vu_req; 2139 2140 assert(req != NULL); 2141 2142 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2143 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2144 2145 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2146 2147 return 0; 2148 } 2149 2150 static int 2151 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req) 2152 { 2153 struct nvmf_vfio_user_qpair *vu_qpair; 2154 struct nvmf_vfio_user_req *vu_req; 2155 2156 assert(req != NULL); 2157 2158 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2159 vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2160 2161 if (vu_req->cb_fn != NULL) { 2162 if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) { 2163 fail_ctrlr(vu_qpair->ctrlr); 2164 } 2165 } 2166 2167 _nvmf_vfio_user_req_free(vu_qpair, vu_req); 2168 2169 return 0; 2170 } 2171 2172 static void 2173 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair, 2174 spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg) 2175 { 2176 struct nvmf_vfio_user_qpair *vu_qpair; 2177 2178 assert(qpair != NULL); 2179 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2180 destroy_qp(vu_qpair->ctrlr, qpair->qid); 2181 2182 if (cb_fn) { 2183 cb_fn(cb_arg); 2184 } 2185 } 2186 2187 /** 2188 * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available. 2189 */ 2190 static struct nvmf_vfio_user_req * 2191 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair) 2192 { 2193 struct nvmf_vfio_user_req *req; 2194 2195 assert(qpair != NULL); 2196 2197 if (TAILQ_EMPTY(&qpair->reqs)) { 2198 return NULL; 2199 } 2200 2201 req = TAILQ_FIRST(&qpair->reqs); 2202 TAILQ_REMOVE(&qpair->reqs, req, link); 2203 2204 return req; 2205 } 2206 2207 static struct spdk_nvmf_request * 2208 get_nvmf_req(struct nvmf_vfio_user_qpair *qpair) 2209 { 2210 struct nvmf_vfio_user_req *req = get_nvmf_vfio_user_req(qpair); 2211 2212 if (req == NULL) { 2213 return NULL; 2214 } 2215 return &req->req; 2216 } 2217 2218 static int 2219 get_nvmf_io_req_length(struct spdk_nvmf_request *req) 2220 { 2221 uint16_t nlb, nr; 2222 uint32_t nsid; 2223 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2224 struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr; 2225 struct spdk_nvmf_ns *ns; 2226 2227 nsid = cmd->nsid; 2228 ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid); 2229 if (ns == NULL || ns->bdev == NULL) { 2230 SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid); 2231 return -EINVAL; 2232 } 2233 2234 if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { 2235 nr = cmd->cdw10_bits.dsm.nr + 1; 2236 return nr * sizeof(struct spdk_nvme_dsm_range); 2237 } 2238 2239 nlb = (cmd->cdw12 & 0x0000ffffu) + 1; 2240 return nlb * spdk_bdev_get_block_size(ns->bdev); 2241 } 2242 2243 static int 2244 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2245 { 2246 struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; 2247 uint32_t len = 0; 2248 int iovcnt; 2249 2250 req->xfer = cmd->opc & 0x3; 2251 req->length = 0; 2252 req->data = NULL; 2253 2254 switch (cmd->opc) { 2255 case SPDK_NVME_OPC_IDENTIFY: 2256 len = 4096; /* TODO: there should be a define somewhere for this */ 2257 break; 2258 case SPDK_NVME_OPC_GET_LOG_PAGE: 2259 len = (cmd->cdw10_bits.get_log_page.numdl + 1) * 4; 2260 break; 2261 } 2262 2263 if (!cmd->dptr.prp.prp1 || !len) { 2264 return 0; 2265 } 2266 /* ADMIN command will not use SGL */ 2267 assert(req->cmd->nvme_cmd.psdt == 0); 2268 iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len); 2269 if (iovcnt < 0) { 2270 SPDK_ERRLOG("%s: map Admin Opc %x failed\n", 2271 ctrlr_id(ctrlr), cmd->opc); 2272 return -1; 2273 } 2274 2275 req->length = len; 2276 req->data = req->iov[0].iov_base; 2277 2278 return 0; 2279 } 2280 2281 /* 2282 * Handles an I/O command. 2283 * 2284 * Returns 0 on success and -errno on failure. Sets @submit on whether or not 2285 * the request must be forwarded to NVMf. 2286 */ 2287 static int 2288 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req) 2289 { 2290 int err = 0; 2291 struct spdk_nvme_cmd *cmd; 2292 2293 assert(ctrlr != NULL); 2294 assert(req != NULL); 2295 2296 cmd = &req->cmd->nvme_cmd; 2297 req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); 2298 2299 if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) { 2300 return 0; 2301 } 2302 2303 err = get_nvmf_io_req_length(req); 2304 if (err < 0) { 2305 return -EINVAL; 2306 } 2307 2308 req->length = err; 2309 err = vfio_user_map_cmd(ctrlr, req, req->iov, req->length); 2310 if (err < 0) { 2311 SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc); 2312 return -EFAULT; 2313 } 2314 2315 req->data = req->iov[0].iov_base; 2316 req->iovcnt = err; 2317 2318 return 0; 2319 } 2320 2321 static int 2322 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd, 2323 struct spdk_nvmf_request *req) 2324 { 2325 int err; 2326 struct nvmf_vfio_user_req *vu_req; 2327 2328 assert(ctrlr != NULL); 2329 assert(cmd != NULL); 2330 2331 /* 2332 * TODO: this means that there are no free requests available, 2333 * returning -1 will fail the controller. Theoretically this error can 2334 * be avoided completely by ensuring we have as many requests as slots 2335 * in the SQ, plus one for the the property request. 2336 */ 2337 if (spdk_unlikely(req == NULL)) { 2338 return -1; 2339 } 2340 2341 vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req); 2342 vu_req->cb_fn = handle_cmd_rsp; 2343 vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair); 2344 req->cmd->nvme_cmd = *cmd; 2345 if (nvmf_qpair_is_admin_queue(req->qpair)) { 2346 err = map_admin_cmd_req(ctrlr, req); 2347 } else { 2348 err = map_io_cmd_req(ctrlr, req); 2349 } 2350 2351 if (spdk_unlikely(err < 0)) { 2352 SPDK_ERRLOG("%s: map NVMe command opc 0x%x failed\n", 2353 ctrlr_id(ctrlr), cmd->opc); 2354 req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2355 req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC; 2356 return handle_cmd_rsp(vu_req, vu_req->cb_arg); 2357 } 2358 2359 vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING; 2360 spdk_nvmf_request_exec(req); 2361 2362 return 0; 2363 } 2364 2365 static void 2366 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair) 2367 { 2368 struct nvmf_vfio_user_ctrlr *ctrlr; 2369 uint32_t new_tail; 2370 2371 assert(qpair != NULL); 2372 2373 ctrlr = qpair->ctrlr; 2374 2375 new_tail = *tdbl(ctrlr, &qpair->sq); 2376 if (sq_head(qpair) != new_tail) { 2377 int err = handle_sq_tdbl_write(ctrlr, new_tail, qpair); 2378 if (err != 0) { 2379 fail_ctrlr(ctrlr); 2380 return; 2381 } 2382 } 2383 } 2384 2385 /* 2386 * Called unconditionally, periodically, very frequently from SPDK to ask 2387 * whether there's work to be done. This function consumes requests generated 2388 * from read/write_bar0 by setting ctrlr->prop_req.dir. read_bar0, and 2389 * occasionally write_bar0 -- though this may change, synchronously wait. This 2390 * function also consumes requests by looking at the doorbells. 2391 */ 2392 static int 2393 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group) 2394 { 2395 struct nvmf_vfio_user_poll_group *vu_group; 2396 struct nvmf_vfio_user_qpair *vu_qpair, *tmp; 2397 2398 assert(group != NULL); 2399 2400 spdk_rmb(); 2401 2402 vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group); 2403 2404 TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) { 2405 if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) { 2406 continue; 2407 } 2408 nvmf_vfio_user_qpair_poll(vu_qpair); 2409 } 2410 2411 return 0; 2412 } 2413 2414 static int 2415 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair, 2416 struct spdk_nvme_transport_id *trid) 2417 { 2418 struct nvmf_vfio_user_qpair *vu_qpair; 2419 struct nvmf_vfio_user_ctrlr *ctrlr; 2420 2421 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2422 ctrlr = vu_qpair->ctrlr; 2423 2424 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2425 return 0; 2426 } 2427 2428 static int 2429 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair, 2430 struct spdk_nvme_transport_id *trid) 2431 { 2432 return 0; 2433 } 2434 2435 static int 2436 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair, 2437 struct spdk_nvme_transport_id *trid) 2438 { 2439 struct nvmf_vfio_user_qpair *vu_qpair; 2440 struct nvmf_vfio_user_ctrlr *ctrlr; 2441 2442 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2443 ctrlr = vu_qpair->ctrlr; 2444 2445 memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid)); 2446 return 0; 2447 } 2448 2449 static void 2450 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair, 2451 struct spdk_nvmf_request *req) 2452 { 2453 struct nvmf_vfio_user_qpair *vu_qpair; 2454 struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL; 2455 uint16_t i, cid; 2456 2457 vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair); 2458 2459 cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid; 2460 for (i = 0; i < vu_qpair->qsize; i++) { 2461 vu_req = &vu_qpair->reqs_internal[i]; 2462 if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) { 2463 vu_req_to_abort = vu_req; 2464 break; 2465 } 2466 } 2467 2468 if (vu_req_to_abort == NULL) { 2469 spdk_nvmf_request_complete(req); 2470 return; 2471 } 2472 2473 req->req_to_abort = &vu_req_to_abort->req; 2474 nvmf_ctrlr_abort_request(req); 2475 } 2476 2477 static void 2478 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts) 2479 { 2480 opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH; 2481 opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; 2482 opts->in_capsule_data_size = NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE; 2483 opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE; 2484 opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE; 2485 opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH; 2486 opts->num_shared_buffers = NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS; 2487 opts->buf_cache_size = NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE; 2488 opts->transport_specific = NULL; 2489 } 2490 2491 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = { 2492 .name = "VFIOUSER", 2493 .type = SPDK_NVME_TRANSPORT_VFIOUSER, 2494 .opts_init = nvmf_vfio_user_opts_init, 2495 .create = nvmf_vfio_user_create, 2496 .destroy = nvmf_vfio_user_destroy, 2497 2498 .listen = nvmf_vfio_user_listen, 2499 .stop_listen = nvmf_vfio_user_stop_listen, 2500 .accept = nvmf_vfio_user_accept, 2501 .cdata_init = nvmf_vfio_user_cdata_init, 2502 .listen_associate = nvmf_vfio_user_listen_associate, 2503 2504 .listener_discover = nvmf_vfio_user_discover, 2505 2506 .poll_group_create = nvmf_vfio_user_poll_group_create, 2507 .poll_group_destroy = nvmf_vfio_user_poll_group_destroy, 2508 .poll_group_add = nvmf_vfio_user_poll_group_add, 2509 .poll_group_remove = nvmf_vfio_user_poll_group_remove, 2510 .poll_group_poll = nvmf_vfio_user_poll_group_poll, 2511 2512 .req_free = nvmf_vfio_user_req_free, 2513 .req_complete = nvmf_vfio_user_req_complete, 2514 2515 .qpair_fini = nvmf_vfio_user_close_qpair, 2516 .qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid, 2517 .qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid, 2518 .qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid, 2519 .qpair_abort_request = nvmf_vfio_user_qpair_abort_request, 2520 }; 2521 2522 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user); 2523 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio) 2524