1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2021 Intel Corporation. All rights reserved. 3 * Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 /* 8 * NVMe over PCIe common library 9 */ 10 11 #include "spdk/stdinc.h" 12 #include "spdk/likely.h" 13 #include "spdk/string.h" 14 #include "nvme_internal.h" 15 #include "nvme_pcie_internal.h" 16 #include "spdk/trace.h" 17 18 #include "spdk_internal/trace_defs.h" 19 20 __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; 21 22 static struct spdk_nvme_pcie_stat g_dummy_stat = {}; 23 24 static void nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, 25 struct nvme_tracker *tr); 26 27 static inline uint64_t 28 nvme_pcie_vtophys(struct spdk_nvme_ctrlr *ctrlr, const void *buf, uint64_t *size) 29 { 30 if (spdk_likely(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { 31 return spdk_vtophys(buf, size); 32 } else { 33 /* vfio-user address translation with IOVA=VA mode */ 34 return (uint64_t)(uintptr_t)buf; 35 } 36 } 37 38 int 39 nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) 40 { 41 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 42 uint32_t i; 43 44 /* all head/tail vals are set to 0 */ 45 pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0; 46 47 /* 48 * First time through the completion queue, HW will set phase 49 * bit on completions to 1. So set this to 1 here, indicating 50 * we're looking for a 1 to know which entries have completed. 51 * we'll toggle the bit each time when the completion queue 52 * rolls over. 53 */ 54 pqpair->flags.phase = 1; 55 for (i = 0; i < pqpair->num_entries; i++) { 56 pqpair->cpl[i].status.p = 0; 57 } 58 59 return 0; 60 } 61 62 static void 63 nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr) 64 { 65 tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); 66 tr->cid = cid; 67 tr->req = NULL; 68 } 69 70 static void * 71 nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment, 72 uint64_t *phys_addr) 73 { 74 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 75 uintptr_t addr; 76 77 if (pctrlr->cmb.mem_register_addr != NULL) { 78 /* BAR is mapped for data */ 79 return NULL; 80 } 81 82 addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset; 83 addr = (addr + (alignment - 1)) & ~(alignment - 1); 84 85 /* CMB may only consume part of the BAR, calculate accordingly */ 86 if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) { 87 SPDK_ERRLOG("Tried to allocate past valid CMB range!\n"); 88 return NULL; 89 } 90 *phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va; 91 92 pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va; 93 94 return (void *)addr; 95 } 96 97 int 98 nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, 99 const struct spdk_nvme_io_qpair_opts *opts) 100 { 101 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 102 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 103 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 104 struct nvme_tracker *tr; 105 uint16_t i; 106 uint16_t num_trackers; 107 size_t page_align = sysconf(_SC_PAGESIZE); 108 size_t queue_align, queue_len; 109 uint32_t flags = SPDK_MALLOC_DMA; 110 uint64_t sq_paddr = 0; 111 uint64_t cq_paddr = 0; 112 113 if (opts) { 114 pqpair->sq_vaddr = opts->sq.vaddr; 115 pqpair->cq_vaddr = opts->cq.vaddr; 116 pqpair->flags.disable_pcie_sgl_merge = opts->disable_pcie_sgl_merge; 117 sq_paddr = opts->sq.paddr; 118 cq_paddr = opts->cq.paddr; 119 } 120 121 pqpair->retry_count = ctrlr->opts.transport_retry_count; 122 123 /* 124 * Limit the maximum number of completions to return per call to prevent wraparound, 125 * and calculate how many trackers can be submitted at once without overflowing the 126 * completion queue. 127 */ 128 pqpair->max_completions_cap = pqpair->num_entries / 4; 129 pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS); 130 pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS); 131 num_trackers = pqpair->num_entries - pqpair->max_completions_cap; 132 133 SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n", 134 pqpair->max_completions_cap, num_trackers); 135 136 assert(num_trackers != 0); 137 138 pqpair->sq_in_cmb = false; 139 140 if (nvme_qpair_is_admin_queue(&pqpair->qpair)) { 141 flags |= SPDK_MALLOC_SHARE; 142 } 143 144 /* cmd and cpl rings must be aligned on page size boundaries. */ 145 if (ctrlr->opts.use_cmb_sqs) { 146 pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd), 147 page_align, &pqpair->cmd_bus_addr); 148 if (pqpair->cmd != NULL) { 149 pqpair->sq_in_cmb = true; 150 } 151 } 152 153 if (pqpair->sq_in_cmb == false) { 154 if (pqpair->sq_vaddr) { 155 pqpair->cmd = pqpair->sq_vaddr; 156 } else { 157 /* To ensure physical address contiguity we make each ring occupy 158 * a single hugepage only. See MAX_IO_QUEUE_ENTRIES. 159 */ 160 queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd); 161 queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); 162 pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); 163 if (pqpair->cmd == NULL) { 164 SPDK_ERRLOG("alloc qpair_cmd failed\n"); 165 return -ENOMEM; 166 } 167 } 168 if (sq_paddr) { 169 assert(pqpair->sq_vaddr != NULL); 170 pqpair->cmd_bus_addr = sq_paddr; 171 } else { 172 pqpair->cmd_bus_addr = nvme_pcie_vtophys(ctrlr, pqpair->cmd, NULL); 173 if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) { 174 SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n"); 175 return -EFAULT; 176 } 177 } 178 } 179 180 if (pqpair->cq_vaddr) { 181 pqpair->cpl = pqpair->cq_vaddr; 182 } else { 183 queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl); 184 queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); 185 pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags); 186 if (pqpair->cpl == NULL) { 187 SPDK_ERRLOG("alloc qpair_cpl failed\n"); 188 return -ENOMEM; 189 } 190 } 191 if (cq_paddr) { 192 assert(pqpair->cq_vaddr != NULL); 193 pqpair->cpl_bus_addr = cq_paddr; 194 } else { 195 pqpair->cpl_bus_addr = nvme_pcie_vtophys(ctrlr, pqpair->cpl, NULL); 196 if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) { 197 SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n"); 198 return -EFAULT; 199 } 200 } 201 202 pqpair->sq_tdbl = pctrlr->doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32; 203 pqpair->cq_hdbl = pctrlr->doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32; 204 205 /* 206 * Reserve space for all of the trackers in a single allocation. 207 * struct nvme_tracker must be padded so that its size is already a power of 2. 208 * This ensures the PRP list embedded in the nvme_tracker object will not span a 209 * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing. 210 */ 211 pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL, 212 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 213 if (pqpair->tr == NULL) { 214 SPDK_ERRLOG("nvme_tr failed\n"); 215 return -ENOMEM; 216 } 217 218 TAILQ_INIT(&pqpair->free_tr); 219 TAILQ_INIT(&pqpair->outstanding_tr); 220 pqpair->qpair.queue_depth = 0; 221 222 for (i = 0; i < num_trackers; i++) { 223 tr = &pqpair->tr[i]; 224 nvme_qpair_construct_tracker(tr, i, nvme_pcie_vtophys(ctrlr, tr, NULL)); 225 TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); 226 } 227 228 nvme_pcie_qpair_reset(qpair); 229 230 return 0; 231 } 232 233 int 234 nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries) 235 { 236 struct nvme_pcie_qpair *pqpair; 237 int rc; 238 239 pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 240 if (pqpair == NULL) { 241 return -ENOMEM; 242 } 243 244 pqpair->num_entries = num_entries; 245 pqpair->flags.delay_cmd_submit = 0; 246 pqpair->pcie_state = NVME_PCIE_QPAIR_READY; 247 248 ctrlr->adminq = &pqpair->qpair; 249 250 rc = nvme_qpair_init(ctrlr->adminq, 251 0, /* qpair ID */ 252 ctrlr, 253 SPDK_NVME_QPRIO_URGENT, 254 num_entries, 255 false); 256 if (rc != 0) { 257 return rc; 258 } 259 260 pqpair->stat = spdk_zmalloc(sizeof(*pqpair->stat), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, 261 SPDK_MALLOC_SHARE); 262 if (!pqpair->stat) { 263 SPDK_ERRLOG("Failed to allocate admin qpair statistics\n"); 264 return -ENOMEM; 265 } 266 267 return nvme_pcie_qpair_construct(ctrlr->adminq, NULL); 268 } 269 270 /** 271 * Note: the ctrlr_lock must be held when calling this function. 272 */ 273 void 274 nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, 275 struct nvme_request *req, struct spdk_nvme_cpl *cpl) 276 { 277 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 278 struct nvme_request *active_req = req; 279 struct spdk_nvme_ctrlr_process *active_proc; 280 281 /* 282 * The admin request is from another process. Move to the per 283 * process list for that process to handle it later. 284 */ 285 assert(nvme_qpair_is_admin_queue(qpair)); 286 assert(active_req->pid != getpid()); 287 288 active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid); 289 if (active_proc) { 290 /* Save the original completion information */ 291 memcpy(&active_req->cpl, cpl, sizeof(*cpl)); 292 STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); 293 } else { 294 SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n", 295 active_req->pid); 296 nvme_cleanup_user_req(active_req); 297 nvme_free_request(active_req); 298 } 299 } 300 301 /** 302 * Note: the ctrlr_lock must be held when calling this function. 303 */ 304 void 305 nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) 306 { 307 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 308 struct nvme_request *req, *tmp_req; 309 pid_t pid = getpid(); 310 struct spdk_nvme_ctrlr_process *proc; 311 312 /* 313 * Check whether there is any pending admin request from 314 * other active processes. 315 */ 316 assert(nvme_qpair_is_admin_queue(qpair)); 317 318 proc = nvme_ctrlr_get_current_process(ctrlr); 319 if (!proc) { 320 SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid); 321 assert(proc); 322 return; 323 } 324 325 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 326 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 327 328 assert(req->pid == pid); 329 330 nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl); 331 } 332 } 333 334 int 335 nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, 336 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, 337 void *cb_arg) 338 { 339 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); 340 struct nvme_request *req; 341 struct spdk_nvme_cmd *cmd; 342 343 req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 344 if (req == NULL) { 345 return -ENOMEM; 346 } 347 348 cmd = &req->cmd; 349 cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ; 350 351 cmd->cdw10_bits.create_io_q.qid = io_que->id; 352 cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; 353 354 cmd->cdw11_bits.create_io_cq.pc = 1; 355 cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr; 356 357 return nvme_ctrlr_submit_admin_request(ctrlr, req); 358 } 359 360 int 361 nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, 362 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg) 363 { 364 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); 365 struct nvme_request *req; 366 struct spdk_nvme_cmd *cmd; 367 368 req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 369 if (req == NULL) { 370 return -ENOMEM; 371 } 372 373 cmd = &req->cmd; 374 cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ; 375 376 cmd->cdw10_bits.create_io_q.qid = io_que->id; 377 cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; 378 cmd->cdw11_bits.create_io_sq.pc = 1; 379 cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio; 380 cmd->cdw11_bits.create_io_sq.cqid = io_que->id; 381 cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr; 382 383 return nvme_ctrlr_submit_admin_request(ctrlr, req); 384 } 385 386 int 387 nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 388 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 389 { 390 struct nvme_request *req; 391 struct spdk_nvme_cmd *cmd; 392 393 req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 394 if (req == NULL) { 395 return -ENOMEM; 396 } 397 398 cmd = &req->cmd; 399 cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ; 400 cmd->cdw10_bits.delete_io_q.qid = qpair->id; 401 402 return nvme_ctrlr_submit_admin_request(ctrlr, req); 403 } 404 405 int 406 nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 407 spdk_nvme_cmd_cb cb_fn, void *cb_arg) 408 { 409 struct nvme_request *req; 410 struct spdk_nvme_cmd *cmd; 411 412 req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 413 if (req == NULL) { 414 return -ENOMEM; 415 } 416 417 cmd = &req->cmd; 418 cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ; 419 cmd->cdw10_bits.delete_io_q.qid = qpair->id; 420 421 return nvme_ctrlr_submit_admin_request(ctrlr, req); 422 } 423 424 static void 425 nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 426 { 427 struct spdk_nvme_qpair *qpair = arg; 428 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 429 430 if (spdk_nvme_cpl_is_error(cpl)) { 431 SPDK_ERRLOG("delete_io_cq failed!\n"); 432 } 433 434 pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 435 } 436 437 static void 438 nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 439 { 440 struct spdk_nvme_qpair *qpair = arg; 441 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 442 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 443 struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 444 int rc; 445 446 if (pqpair->flags.defer_destruction) { 447 /* This qpair was deleted by the application while the 448 * connection was still in progress. We had to wait 449 * to free the qpair resources until this outstanding 450 * command was completed. Now that we have the completion 451 * free it now. 452 */ 453 nvme_pcie_qpair_destroy(qpair); 454 return; 455 } 456 457 if (spdk_nvme_cpl_is_error(cpl)) { 458 SPDK_ERRLOG("nvme_create_io_sq failed, deleting cq!\n"); 459 rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb, 460 qpair); 461 if (rc != 0) { 462 SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 463 pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 464 } 465 return; 466 } 467 pqpair->pcie_state = NVME_PCIE_QPAIR_READY; 468 if (ctrlr->shadow_doorbell) { 469 pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * 470 pctrlr->doorbell_stride_u32; 471 pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * 472 pctrlr->doorbell_stride_u32; 473 pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * 474 pctrlr->doorbell_stride_u32; 475 pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * 476 pctrlr->doorbell_stride_u32; 477 pqpair->flags.has_shadow_doorbell = 1; 478 } else { 479 pqpair->flags.has_shadow_doorbell = 0; 480 } 481 nvme_pcie_qpair_reset(qpair); 482 483 } 484 485 static void 486 nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 487 { 488 struct spdk_nvme_qpair *qpair = arg; 489 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 490 int rc; 491 492 if (pqpair->flags.defer_destruction) { 493 /* This qpair was deleted by the application while the 494 * connection was still in progress. We had to wait 495 * to free the qpair resources until this outstanding 496 * command was completed. Now that we have the completion 497 * free it now. 498 */ 499 nvme_pcie_qpair_destroy(qpair); 500 return; 501 } 502 503 if (spdk_nvme_cpl_is_error(cpl)) { 504 pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 505 SPDK_ERRLOG("nvme_create_io_cq failed!\n"); 506 return; 507 } 508 509 rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_create_sq_cb, qpair); 510 511 if (rc != 0) { 512 SPDK_ERRLOG("Failed to send request to create_io_sq, deleting cq!\n"); 513 rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb, 514 qpair); 515 if (rc != 0) { 516 SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 517 pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 518 } 519 return; 520 } 521 pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_SQ; 522 } 523 524 static int 525 _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 526 uint16_t qid) 527 { 528 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 529 int rc; 530 531 /* Statistics may already be allocated in the case of controller reset */ 532 if (qpair->poll_group) { 533 struct nvme_pcie_poll_group *group = SPDK_CONTAINEROF(qpair->poll_group, 534 struct nvme_pcie_poll_group, group); 535 536 pqpair->stat = &group->stats; 537 pqpair->shared_stats = true; 538 } else { 539 if (pqpair->stat == NULL) { 540 pqpair->stat = calloc(1, sizeof(*pqpair->stat)); 541 if (!pqpair->stat) { 542 SPDK_ERRLOG("Failed to allocate qpair statistics\n"); 543 nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 544 return -ENOMEM; 545 } 546 } 547 } 548 549 rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_create_cq_cb, qpair); 550 551 if (rc != 0) { 552 SPDK_ERRLOG("Failed to send request to create_io_cq\n"); 553 nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 554 return rc; 555 } 556 pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_CQ; 557 return 0; 558 } 559 560 int 561 nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 562 { 563 int rc = 0; 564 565 if (!nvme_qpair_is_admin_queue(qpair)) { 566 rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id); 567 } else { 568 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 569 } 570 571 return rc; 572 } 573 574 void 575 nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 576 { 577 if (!nvme_qpair_is_admin_queue(qpair) || !ctrlr->is_disconnecting) { 578 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 579 } else { 580 /* If this function is called for the admin qpair via spdk_nvme_ctrlr_reset() 581 * or spdk_nvme_ctrlr_disconnect(), initiate a Controller Level Reset. 582 * Then we can abort trackers safely because the Controller Level Reset deletes 583 * all I/O SQ/CQs. 584 */ 585 nvme_ctrlr_disable(ctrlr); 586 } 587 } 588 589 /* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must 590 * not use wide instructions because QEMU will not emulate such instructions to MMIO space. 591 * So this function ensures we only copy 8 bytes at a time. 592 */ 593 static inline void 594 nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) 595 { 596 uint64_t *dst64 = (uint64_t *)dst; 597 const uint64_t *src64 = (const uint64_t *)src; 598 uint32_t i; 599 600 for (i = 0; i < sizeof(*dst) / 8; i++) { 601 dst64[i] = src64[i]; 602 } 603 } 604 605 static inline void 606 nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) 607 { 608 /* dst and src are known to be non-overlapping and 64-byte aligned. */ 609 #if defined(__SSE2__) 610 __m128i *d128 = (__m128i *)dst; 611 const __m128i *s128 = (const __m128i *)src; 612 613 _mm_stream_si128(&d128[0], _mm_load_si128(&s128[0])); 614 _mm_stream_si128(&d128[1], _mm_load_si128(&s128[1])); 615 _mm_stream_si128(&d128[2], _mm_load_si128(&s128[2])); 616 _mm_stream_si128(&d128[3], _mm_load_si128(&s128[3])); 617 #else 618 *dst = *src; 619 #endif 620 } 621 622 void 623 nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) 624 { 625 struct nvme_request *req; 626 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 627 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 628 629 req = tr->req; 630 assert(req != NULL); 631 632 spdk_trace_record(TRACE_NVME_PCIE_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 633 (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 634 req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12, 635 pqpair->qpair.queue_depth); 636 637 if (req->cmd.fuse) { 638 /* 639 * Keep track of the fuse operation sequence so that we ring the doorbell only 640 * after the second fuse is submitted. 641 */ 642 qpair->last_fuse = req->cmd.fuse; 643 } 644 645 /* Don't use wide instructions to copy NVMe command, this is limited by QEMU 646 * virtual NVMe controller, the maximum access width is 8 Bytes for one time. 647 */ 648 if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) { 649 nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd); 650 } else { 651 /* Copy the command from the tracker to the submission queue. */ 652 nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd); 653 } 654 655 if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) { 656 pqpair->sq_tail = 0; 657 } 658 659 if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) { 660 SPDK_ERRLOG("sq_tail is passing sq_head!\n"); 661 } 662 663 if (!pqpair->flags.delay_cmd_submit) { 664 nvme_pcie_qpair_ring_sq_doorbell(qpair); 665 } 666 } 667 668 void 669 nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, 670 struct spdk_nvme_cpl *cpl, bool print_on_error) 671 { 672 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 673 struct nvme_request *req; 674 bool retry, error; 675 bool print_error; 676 677 req = tr->req; 678 679 spdk_trace_record(TRACE_NVME_PCIE_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 680 (uint32_t)req->cmd.cid, (uint32_t)cpl->status_raw, pqpair->qpair.queue_depth); 681 682 assert(req != NULL); 683 684 error = spdk_nvme_cpl_is_error(cpl); 685 retry = error && nvme_completion_is_retry(cpl) && 686 req->retries < pqpair->retry_count; 687 print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 688 689 if (print_error) { 690 spdk_nvme_qpair_print_command(qpair, &req->cmd); 691 } 692 693 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 694 spdk_nvme_qpair_print_completion(qpair, cpl); 695 } 696 697 assert(cpl->cid == req->cmd.cid); 698 699 if (retry) { 700 req->retries++; 701 nvme_pcie_qpair_submit_tracker(qpair, tr); 702 } else { 703 TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list); 704 pqpair->qpair.queue_depth--; 705 706 /* Only check admin requests from different processes. */ 707 if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { 708 nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); 709 } else { 710 nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); 711 } 712 713 tr->req = NULL; 714 715 TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); 716 } 717 } 718 719 void 720 nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, 721 struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, 722 bool print_on_error) 723 { 724 struct spdk_nvme_cpl cpl; 725 726 memset(&cpl, 0, sizeof(cpl)); 727 cpl.sqid = qpair->id; 728 cpl.cid = tr->cid; 729 cpl.status.sct = sct; 730 cpl.status.sc = sc; 731 cpl.status.dnr = dnr; 732 nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 733 } 734 735 void 736 nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) 737 { 738 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 739 struct nvme_tracker *tr, *temp, *last; 740 741 last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); 742 743 /* Abort previously submitted (outstanding) trs */ 744 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { 745 if (!qpair->ctrlr->opts.disable_error_logging) { 746 SPDK_ERRLOG("aborting outstanding command\n"); 747 } 748 nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, 749 SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); 750 751 if (tr == last) { 752 break; 753 } 754 } 755 } 756 757 void 758 nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 759 { 760 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 761 struct nvme_tracker *tr; 762 763 tr = TAILQ_FIRST(&pqpair->outstanding_tr); 764 while (tr != NULL) { 765 assert(tr->req != NULL); 766 if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 767 nvme_pcie_qpair_manual_complete_tracker(qpair, tr, 768 SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, 769 false); 770 tr = TAILQ_FIRST(&pqpair->outstanding_tr); 771 } else { 772 tr = TAILQ_NEXT(tr, tq_list); 773 } 774 } 775 } 776 777 void 778 nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) 779 { 780 nvme_pcie_admin_qpair_abort_aers(qpair); 781 } 782 783 void 784 nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 785 { 786 nvme_pcie_qpair_abort_trackers(qpair, dnr); 787 } 788 789 static void 790 nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 791 { 792 uint64_t t02; 793 struct nvme_tracker *tr, *tmp; 794 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 795 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 796 struct spdk_nvme_ctrlr_process *active_proc; 797 798 /* Don't check timeouts during controller initialization. */ 799 if (ctrlr->state != NVME_CTRLR_STATE_READY) { 800 return; 801 } 802 803 if (nvme_qpair_is_admin_queue(qpair)) { 804 active_proc = nvme_ctrlr_get_current_process(ctrlr); 805 } else { 806 active_proc = qpair->active_proc; 807 } 808 809 /* Only check timeouts if the current process has a timeout callback. */ 810 if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 811 return; 812 } 813 814 t02 = spdk_get_ticks(); 815 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 816 assert(tr->req != NULL); 817 818 if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) { 819 /* 820 * The requests are in order, so as soon as one has not timed out, 821 * stop iterating. 822 */ 823 break; 824 } 825 } 826 } 827 828 int32_t 829 nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 830 { 831 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 832 struct nvme_tracker *tr; 833 struct spdk_nvme_cpl *cpl, *next_cpl; 834 uint32_t num_completions = 0; 835 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 836 uint16_t next_cq_head; 837 uint8_t next_phase; 838 bool next_is_valid = false; 839 int rc; 840 841 if (spdk_unlikely(pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED)) { 842 return -ENXIO; 843 } 844 845 if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 846 if (pqpair->pcie_state == NVME_PCIE_QPAIR_READY) { 847 /* It is possible that another thread set the pcie_state to 848 * QPAIR_READY, if it polled the adminq and processed the SQ 849 * completion for this qpair. So check for that condition 850 * here and then update the qpair's state to CONNECTED, since 851 * we can only set the qpair state from the qpair's thread. 852 * (Note: this fixed issue #2157.) 853 */ 854 nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 855 } else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) { 856 nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 857 return -ENXIO; 858 } else { 859 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 860 if (rc < 0) { 861 return rc; 862 } else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) { 863 nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 864 return -ENXIO; 865 } 866 } 867 return 0; 868 } 869 870 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 871 nvme_ctrlr_lock(ctrlr); 872 } 873 874 if (max_completions == 0 || max_completions > pqpair->max_completions_cap) { 875 /* 876 * max_completions == 0 means unlimited, but complete at most 877 * max_completions_cap batch of I/O at a time so that the completion 878 * queue doorbells don't wrap around. 879 */ 880 max_completions = pqpair->max_completions_cap; 881 } 882 883 pqpair->stat->polls++; 884 885 while (1) { 886 cpl = &pqpair->cpl[pqpair->cq_head]; 887 888 if (!next_is_valid && cpl->status.p != pqpair->flags.phase) { 889 break; 890 } 891 892 if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) { 893 next_cq_head = pqpair->cq_head + 1; 894 next_phase = pqpair->flags.phase; 895 } else { 896 next_cq_head = 0; 897 next_phase = !pqpair->flags.phase; 898 } 899 next_cpl = &pqpair->cpl[next_cq_head]; 900 next_is_valid = (next_cpl->status.p == next_phase); 901 if (next_is_valid) { 902 __builtin_prefetch(&pqpair->tr[next_cpl->cid]); 903 } 904 905 #if defined(__PPC64__) || defined(__riscv) || defined(__loongarch__) 906 /* 907 * This memory barrier prevents reordering of: 908 * - load after store from/to tr 909 * - load after load cpl phase and cpl cid 910 */ 911 spdk_mb(); 912 #elif defined(__aarch64__) 913 __asm volatile("dmb oshld" ::: "memory"); 914 #endif 915 916 if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) { 917 pqpair->cq_head = 0; 918 pqpair->flags.phase = !pqpair->flags.phase; 919 } 920 921 tr = &pqpair->tr[cpl->cid]; 922 pqpair->sq_head = cpl->sqhd; 923 924 if (tr->req) { 925 /* Prefetch the req's STAILQ_ENTRY since we'll need to access it 926 * as part of putting the req back on the qpair's free list. 927 */ 928 __builtin_prefetch(&tr->req->stailq); 929 nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); 930 } else { 931 SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); 932 spdk_nvme_qpair_print_completion(qpair, cpl); 933 assert(0); 934 } 935 936 if (++num_completions == max_completions) { 937 break; 938 } 939 } 940 941 if (num_completions > 0) { 942 pqpair->stat->completions += num_completions; 943 nvme_pcie_qpair_ring_cq_doorbell(qpair); 944 } else { 945 pqpair->stat->idle_polls++; 946 } 947 948 if (pqpair->flags.delay_cmd_submit) { 949 if (pqpair->last_sq_tail != pqpair->sq_tail) { 950 nvme_pcie_qpair_ring_sq_doorbell(qpair); 951 pqpair->last_sq_tail = pqpair->sq_tail; 952 } 953 } 954 955 if (spdk_unlikely(ctrlr->timeout_enabled)) { 956 /* 957 * User registered for timeout callback 958 */ 959 nvme_pcie_qpair_check_timeout(qpair); 960 } 961 962 /* Before returning, complete any pending admin request or 963 * process the admin qpair disconnection. 964 */ 965 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 966 nvme_pcie_qpair_complete_pending_admin_request(qpair); 967 968 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 969 rc = nvme_ctrlr_disable_poll(qpair->ctrlr); 970 if (rc != -EAGAIN) { 971 nvme_transport_ctrlr_disconnect_qpair_done(qpair); 972 } 973 } 974 975 nvme_ctrlr_unlock(ctrlr); 976 } 977 978 if (spdk_unlikely(pqpair->flags.has_pending_vtophys_failures)) { 979 struct nvme_tracker *tr, *tmp; 980 981 TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 982 if (tr->bad_vtophys) { 983 tr->bad_vtophys = 0; 984 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 985 } 986 } 987 pqpair->flags.has_pending_vtophys_failures = 0; 988 } 989 990 return num_completions; 991 } 992 993 int 994 nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) 995 { 996 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 997 998 if (nvme_qpair_is_admin_queue(qpair)) { 999 nvme_pcie_admin_qpair_destroy(qpair); 1000 } 1001 /* 1002 * We check sq_vaddr and cq_vaddr to see if the user specified the memory 1003 * buffers when creating the I/O queue. 1004 * If the user specified them, we cannot free that memory. 1005 * Nor do we free it if it's in the CMB. 1006 */ 1007 if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) { 1008 spdk_free(pqpair->cmd); 1009 } 1010 if (!pqpair->cq_vaddr && pqpair->cpl) { 1011 spdk_free(pqpair->cpl); 1012 } 1013 if (pqpair->tr) { 1014 spdk_free(pqpair->tr); 1015 } 1016 1017 nvme_qpair_deinit(qpair); 1018 1019 if (!pqpair->shared_stats && (!qpair->active_proc || 1020 qpair->active_proc == nvme_ctrlr_get_current_process(qpair->ctrlr))) { 1021 if (qpair->id) { 1022 free(pqpair->stat); 1023 } else { 1024 /* statistics of admin qpair are allocates from huge pages because 1025 * admin qpair is shared for multi-process */ 1026 spdk_free(pqpair->stat); 1027 } 1028 1029 } 1030 1031 spdk_free(pqpair); 1032 1033 return 0; 1034 } 1035 1036 struct spdk_nvme_qpair * 1037 nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 1038 const struct spdk_nvme_io_qpair_opts *opts) 1039 { 1040 struct nvme_pcie_qpair *pqpair; 1041 struct spdk_nvme_qpair *qpair; 1042 int rc; 1043 1044 assert(ctrlr != NULL); 1045 1046 pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, 1047 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); 1048 if (pqpair == NULL) { 1049 return NULL; 1050 } 1051 1052 pqpair->num_entries = opts->io_queue_size; 1053 pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit; 1054 1055 qpair = &pqpair->qpair; 1056 1057 rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests, opts->async_mode); 1058 if (rc != 0) { 1059 nvme_pcie_qpair_destroy(qpair); 1060 return NULL; 1061 } 1062 1063 rc = nvme_pcie_qpair_construct(qpair, opts); 1064 1065 if (rc != 0) { 1066 nvme_pcie_qpair_destroy(qpair); 1067 return NULL; 1068 } 1069 1070 return qpair; 1071 } 1072 1073 int 1074 nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 1075 { 1076 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1077 struct nvme_completion_poll_status *status; 1078 int rc; 1079 1080 assert(ctrlr != NULL); 1081 1082 if (ctrlr->is_removed) { 1083 goto free; 1084 } 1085 1086 if (ctrlr->prepare_for_reset) { 1087 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING) { 1088 pqpair->flags.defer_destruction = true; 1089 } 1090 goto clear_shadow_doorbells; 1091 } 1092 1093 /* If attempting to delete a qpair that's still being connected, we have to wait until it's 1094 * finished, so that we don't free it while it's waiting for the create cq/sq callbacks. 1095 */ 1096 while (pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_CQ || 1097 pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_SQ) { 1098 rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1099 if (rc < 0) { 1100 break; 1101 } 1102 } 1103 1104 status = calloc(1, sizeof(*status)); 1105 if (!status) { 1106 SPDK_ERRLOG("Failed to allocate status tracker\n"); 1107 goto free; 1108 } 1109 1110 /* Delete the I/O submission queue */ 1111 rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status); 1112 if (rc != 0) { 1113 SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); 1114 free(status); 1115 goto free; 1116 } 1117 if (nvme_wait_for_completion(ctrlr->adminq, status)) { 1118 if (!status->timed_out) { 1119 free(status); 1120 } 1121 goto free; 1122 } 1123 1124 /* Now that the submission queue is deleted, the device is supposed to have 1125 * completed any outstanding I/O. Try to complete them. If they don't complete, 1126 * they'll be marked as aborted and completed below. */ 1127 if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 1128 nvme_pcie_qpair_process_completions(qpair, 0); 1129 } 1130 1131 memset(status, 0, sizeof(*status)); 1132 /* Delete the completion queue */ 1133 rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); 1134 if (rc != 0) { 1135 SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 1136 free(status); 1137 goto free; 1138 } 1139 if (nvme_wait_for_completion(ctrlr->adminq, status)) { 1140 if (!status->timed_out) { 1141 free(status); 1142 } 1143 goto free; 1144 } 1145 free(status); 1146 1147 clear_shadow_doorbells: 1148 if (pqpair->flags.has_shadow_doorbell && ctrlr->shadow_doorbell) { 1149 *pqpair->shadow_doorbell.sq_tdbl = 0; 1150 *pqpair->shadow_doorbell.cq_hdbl = 0; 1151 *pqpair->shadow_doorbell.sq_eventidx = 0; 1152 *pqpair->shadow_doorbell.cq_eventidx = 0; 1153 } 1154 free: 1155 if (qpair->no_deletion_notification_needed == 0) { 1156 /* Abort the rest of the I/O */ 1157 nvme_pcie_qpair_abort_trackers(qpair, 1); 1158 } 1159 1160 if (!pqpair->flags.defer_destruction) { 1161 nvme_pcie_qpair_destroy(qpair); 1162 } 1163 return 0; 1164 } 1165 1166 static void 1167 nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) 1168 { 1169 if (!qpair->in_completion_context) { 1170 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1171 1172 tr->bad_vtophys = 1; 1173 pqpair->flags.has_pending_vtophys_failures = 1; 1174 return; 1175 } 1176 1177 /* 1178 * Bad vtophys translation, so abort this request and return 1179 * immediately. 1180 */ 1181 SPDK_ERRLOG("vtophys or other payload buffer related error\n"); 1182 nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, 1183 SPDK_NVME_SC_INVALID_FIELD, 1184 1 /* do not retry */, true); 1185 } 1186 1187 /* 1188 * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes. 1189 * 1190 * *prp_index will be updated to account for the number of PRP entries used. 1191 */ 1192 static inline int 1193 nvme_pcie_prp_list_append(struct spdk_nvme_ctrlr *ctrlr, struct nvme_tracker *tr, 1194 uint32_t *prp_index, void *virt_addr, size_t len, 1195 uint32_t page_size) 1196 { 1197 struct spdk_nvme_cmd *cmd = &tr->req->cmd; 1198 uintptr_t page_mask = page_size - 1; 1199 uint64_t phys_addr; 1200 uint32_t i; 1201 1202 SPDK_DEBUGLOG(nvme, "prp_index:%u virt_addr:%p len:%u\n", 1203 *prp_index, virt_addr, (uint32_t)len); 1204 1205 if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) { 1206 SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1207 return -EFAULT; 1208 } 1209 1210 i = *prp_index; 1211 while (len) { 1212 uint32_t seg_len; 1213 1214 /* 1215 * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array, 1216 * so prp_index == count is valid. 1217 */ 1218 if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) { 1219 SPDK_ERRLOG("out of PRP entries\n"); 1220 return -EFAULT; 1221 } 1222 1223 phys_addr = nvme_pcie_vtophys(ctrlr, virt_addr, NULL); 1224 if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) { 1225 SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr); 1226 return -EFAULT; 1227 } 1228 1229 if (i == 0) { 1230 SPDK_DEBUGLOG(nvme, "prp1 = %p\n", (void *)phys_addr); 1231 cmd->dptr.prp.prp1 = phys_addr; 1232 seg_len = page_size - ((uintptr_t)virt_addr & page_mask); 1233 } else { 1234 if ((phys_addr & page_mask) != 0) { 1235 SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr); 1236 return -EFAULT; 1237 } 1238 1239 SPDK_DEBUGLOG(nvme, "prp[%u] = %p\n", i - 1, (void *)phys_addr); 1240 tr->u.prp[i - 1] = phys_addr; 1241 seg_len = page_size; 1242 } 1243 1244 seg_len = spdk_min(seg_len, len); 1245 virt_addr = (uint8_t *)virt_addr + seg_len; 1246 len -= seg_len; 1247 i++; 1248 } 1249 1250 cmd->psdt = SPDK_NVME_PSDT_PRP; 1251 if (i <= 1) { 1252 cmd->dptr.prp.prp2 = 0; 1253 } else if (i == 2) { 1254 cmd->dptr.prp.prp2 = tr->u.prp[0]; 1255 SPDK_DEBUGLOG(nvme, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2); 1256 } else { 1257 cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr; 1258 SPDK_DEBUGLOG(nvme, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2); 1259 } 1260 1261 *prp_index = i; 1262 return 0; 1263 } 1264 1265 static int 1266 nvme_pcie_qpair_build_request_invalid(struct spdk_nvme_qpair *qpair, 1267 struct nvme_request *req, struct nvme_tracker *tr, bool dword_aligned) 1268 { 1269 assert(0); 1270 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1271 return -EINVAL; 1272 } 1273 1274 /** 1275 * Build PRP list describing physically contiguous payload buffer. 1276 */ 1277 static int 1278 nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1279 struct nvme_tracker *tr, bool dword_aligned) 1280 { 1281 uint32_t prp_index = 0; 1282 int rc; 1283 1284 rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index, 1285 (uint8_t *)req->payload.contig_or_cb_arg + req->payload_offset, 1286 req->payload_size, qpair->ctrlr->page_size); 1287 if (rc) { 1288 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1289 } else { 1290 SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index); 1291 } 1292 1293 return rc; 1294 } 1295 1296 /** 1297 * Build an SGL describing a physically contiguous payload buffer. 1298 * 1299 * This is more efficient than using PRP because large buffers can be 1300 * described this way. 1301 */ 1302 static int 1303 nvme_pcie_qpair_build_contig_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1304 struct nvme_tracker *tr, bool dword_aligned) 1305 { 1306 uint8_t *virt_addr; 1307 uint64_t phys_addr, mapping_length; 1308 uint32_t length; 1309 struct spdk_nvme_sgl_descriptor *sgl; 1310 uint32_t nseg = 0; 1311 1312 assert(req->payload_size != 0); 1313 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 1314 1315 sgl = tr->u.sgl; 1316 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1317 req->cmd.dptr.sgl1.unkeyed.subtype = 0; 1318 1319 length = req->payload_size; 1320 /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL, 1321 * so just double cast it to make it go away */ 1322 virt_addr = (uint8_t *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset); 1323 1324 while (length > 0) { 1325 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1326 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1327 return -EFAULT; 1328 } 1329 1330 if (dword_aligned && ((uintptr_t)virt_addr & 3)) { 1331 SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1332 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1333 return -EFAULT; 1334 } 1335 1336 mapping_length = length; 1337 phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length); 1338 if (phys_addr == SPDK_VTOPHYS_ERROR) { 1339 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1340 return -EFAULT; 1341 } 1342 1343 mapping_length = spdk_min(length, mapping_length); 1344 1345 length -= mapping_length; 1346 virt_addr += mapping_length; 1347 1348 sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1349 sgl->unkeyed.length = mapping_length; 1350 sgl->address = phys_addr; 1351 sgl->unkeyed.subtype = 0; 1352 1353 sgl++; 1354 nseg++; 1355 } 1356 1357 if (nseg == 1) { 1358 /* 1359 * The whole transfer can be described by a single SGL descriptor. 1360 * Use the special case described by the spec where SGL1's type is Data Block. 1361 * This means the SGL in the tracker is not used at all, so copy the first (and only) 1362 * SGL element into SGL1. 1363 */ 1364 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1365 req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 1366 req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 1367 } else { 1368 /* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because 1369 * NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page. 1370 */ 1371 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; 1372 req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 1373 req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor); 1374 } 1375 1376 SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg); 1377 return 0; 1378 } 1379 1380 /** 1381 * Build SGL list describing scattered payload buffer. 1382 */ 1383 static int 1384 nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1385 struct nvme_tracker *tr, bool dword_aligned) 1386 { 1387 int rc; 1388 void *virt_addr; 1389 uint64_t phys_addr, mapping_length; 1390 uint32_t remaining_transfer_len, remaining_user_sge_len, length; 1391 struct spdk_nvme_sgl_descriptor *sgl; 1392 uint32_t nseg = 0; 1393 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1394 1395 /* 1396 * Build scattered payloads. 1397 */ 1398 assert(req->payload_size != 0); 1399 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1400 assert(req->payload.reset_sgl_fn != NULL); 1401 assert(req->payload.next_sge_fn != NULL); 1402 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1403 1404 sgl = tr->u.sgl; 1405 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1406 req->cmd.dptr.sgl1.unkeyed.subtype = 0; 1407 1408 remaining_transfer_len = req->payload_size; 1409 1410 while (remaining_transfer_len > 0) { 1411 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, 1412 &virt_addr, &remaining_user_sge_len); 1413 if (rc) { 1414 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1415 return -EFAULT; 1416 } 1417 1418 /* Bit Bucket SGL descriptor */ 1419 if ((uint64_t)virt_addr == UINT64_MAX) { 1420 /* TODO: enable WRITE and COMPARE when necessary */ 1421 if (req->cmd.opc != SPDK_NVME_OPC_READ) { 1422 SPDK_ERRLOG("Only READ command can be supported\n"); 1423 goto exit; 1424 } 1425 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1426 SPDK_ERRLOG("Too many SGL entries\n"); 1427 goto exit; 1428 } 1429 1430 sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_BIT_BUCKET; 1431 /* If the SGL describes a destination data buffer, the length of data 1432 * buffer shall be discarded by controller, and the length is included 1433 * in Number of Logical Blocks (NLB) parameter. Otherwise, the length 1434 * is not included in the NLB parameter. 1435 */ 1436 remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); 1437 remaining_transfer_len -= remaining_user_sge_len; 1438 1439 sgl->unkeyed.length = remaining_user_sge_len; 1440 sgl->address = 0; 1441 sgl->unkeyed.subtype = 0; 1442 1443 sgl++; 1444 nseg++; 1445 1446 continue; 1447 } 1448 1449 remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); 1450 remaining_transfer_len -= remaining_user_sge_len; 1451 while (remaining_user_sge_len > 0) { 1452 if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1453 SPDK_ERRLOG("Too many SGL entries\n"); 1454 goto exit; 1455 } 1456 1457 if (dword_aligned && ((uintptr_t)virt_addr & 3)) { 1458 SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1459 goto exit; 1460 } 1461 1462 mapping_length = remaining_user_sge_len; 1463 phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length); 1464 if (phys_addr == SPDK_VTOPHYS_ERROR) { 1465 goto exit; 1466 } 1467 1468 length = spdk_min(remaining_user_sge_len, mapping_length); 1469 remaining_user_sge_len -= length; 1470 virt_addr = (uint8_t *)virt_addr + length; 1471 1472 if (!pqpair->flags.disable_pcie_sgl_merge && nseg > 0 && 1473 phys_addr == (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) { 1474 /* extend previous entry */ 1475 (*(sgl - 1)).unkeyed.length += length; 1476 continue; 1477 } 1478 1479 sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1480 sgl->unkeyed.length = length; 1481 sgl->address = phys_addr; 1482 sgl->unkeyed.subtype = 0; 1483 1484 sgl++; 1485 nseg++; 1486 } 1487 } 1488 1489 if (nseg == 1) { 1490 /* 1491 * The whole transfer can be described by a single SGL descriptor. 1492 * Use the special case described by the spec where SGL1's type is Data Block. 1493 * This means the SGL in the tracker is not used at all, so copy the first (and only) 1494 * SGL element into SGL1. 1495 */ 1496 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1497 req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 1498 req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 1499 } else { 1500 /* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because 1501 * NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page. 1502 */ 1503 req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; 1504 req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 1505 req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor); 1506 } 1507 1508 SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg); 1509 return 0; 1510 1511 exit: 1512 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1513 return -EFAULT; 1514 } 1515 1516 /** 1517 * Build PRP list describing scattered payload buffer. 1518 */ 1519 static int 1520 nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1521 struct nvme_tracker *tr, bool dword_aligned) 1522 { 1523 int rc; 1524 void *virt_addr; 1525 uint32_t remaining_transfer_len, length; 1526 uint32_t prp_index = 0; 1527 uint32_t page_size = qpair->ctrlr->page_size; 1528 1529 /* 1530 * Build scattered payloads. 1531 */ 1532 assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1533 assert(req->payload.reset_sgl_fn != NULL); 1534 req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1535 1536 remaining_transfer_len = req->payload_size; 1537 while (remaining_transfer_len > 0) { 1538 assert(req->payload.next_sge_fn != NULL); 1539 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length); 1540 if (rc) { 1541 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1542 return -EFAULT; 1543 } 1544 1545 length = spdk_min(remaining_transfer_len, length); 1546 1547 /* 1548 * Any incompatible sges should have been handled up in the splitting routine, 1549 * but assert here as an additional check. 1550 * 1551 * All SGEs except last must end on a page boundary. 1552 */ 1553 assert((length == remaining_transfer_len) || 1554 _is_page_aligned((uintptr_t)virt_addr + length, page_size)); 1555 1556 rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index, virt_addr, length, page_size); 1557 if (rc) { 1558 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1559 return rc; 1560 } 1561 1562 remaining_transfer_len -= length; 1563 } 1564 1565 SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index); 1566 return 0; 1567 } 1568 1569 typedef int(*build_req_fn)(struct spdk_nvme_qpair *, struct nvme_request *, struct nvme_tracker *, 1570 bool); 1571 1572 static build_req_fn const g_nvme_pcie_build_req_table[][2] = { 1573 [NVME_PAYLOAD_TYPE_INVALID] = { 1574 nvme_pcie_qpair_build_request_invalid, /* PRP */ 1575 nvme_pcie_qpair_build_request_invalid /* SGL */ 1576 }, 1577 [NVME_PAYLOAD_TYPE_CONTIG] = { 1578 nvme_pcie_qpair_build_contig_request, /* PRP */ 1579 nvme_pcie_qpair_build_contig_hw_sgl_request /* SGL */ 1580 }, 1581 [NVME_PAYLOAD_TYPE_SGL] = { 1582 nvme_pcie_qpair_build_prps_sgl_request, /* PRP */ 1583 nvme_pcie_qpair_build_hw_sgl_request /* SGL */ 1584 } 1585 }; 1586 1587 static int 1588 nvme_pcie_qpair_build_metadata(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, 1589 bool sgl_supported, bool mptr_sgl_supported, bool dword_aligned) 1590 { 1591 void *md_payload; 1592 struct nvme_request *req = tr->req; 1593 uint64_t mapping_length; 1594 1595 if (req->payload.md) { 1596 md_payload = (uint8_t *)req->payload.md + req->md_offset; 1597 if (dword_aligned && ((uintptr_t)md_payload & 3)) { 1598 SPDK_ERRLOG("virt_addr %p not dword aligned\n", md_payload); 1599 goto exit; 1600 } 1601 1602 mapping_length = req->md_size; 1603 if (sgl_supported && mptr_sgl_supported && dword_aligned) { 1604 assert(req->cmd.psdt == SPDK_NVME_PSDT_SGL_MPTR_CONTIG); 1605 req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL; 1606 1607 tr->meta_sgl.address = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length); 1608 if (tr->meta_sgl.address == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) { 1609 goto exit; 1610 } 1611 tr->meta_sgl.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1612 tr->meta_sgl.unkeyed.length = req->md_size; 1613 tr->meta_sgl.unkeyed.subtype = 0; 1614 req->cmd.mptr = tr->prp_sgl_bus_addr - sizeof(struct spdk_nvme_sgl_descriptor); 1615 } else { 1616 req->cmd.mptr = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length); 1617 if (req->cmd.mptr == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) { 1618 goto exit; 1619 } 1620 } 1621 } 1622 1623 return 0; 1624 1625 exit: 1626 nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1627 return -EINVAL; 1628 } 1629 1630 int 1631 nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req) 1632 { 1633 struct nvme_tracker *tr; 1634 int rc = 0; 1635 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1636 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1637 enum nvme_payload_type payload_type; 1638 bool sgl_supported; 1639 bool mptr_sgl_supported; 1640 bool dword_aligned = true; 1641 1642 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 1643 nvme_ctrlr_lock(ctrlr); 1644 } 1645 1646 tr = TAILQ_FIRST(&pqpair->free_tr); 1647 1648 if (tr == NULL) { 1649 pqpair->stat->queued_requests++; 1650 /* Inform the upper layer to try again later. */ 1651 rc = -EAGAIN; 1652 goto exit; 1653 } 1654 1655 pqpair->stat->submitted_requests++; 1656 TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */ 1657 TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list); 1658 pqpair->qpair.queue_depth++; 1659 tr->req = req; 1660 tr->cb_fn = req->cb_fn; 1661 tr->cb_arg = req->cb_arg; 1662 req->cmd.cid = tr->cid; 1663 /* Use PRP by default. This bit will be overridden below if needed. */ 1664 req->cmd.psdt = SPDK_NVME_PSDT_PRP; 1665 1666 if (req->payload_size != 0) { 1667 payload_type = nvme_payload_type(&req->payload); 1668 /* According to the specification, PRPs shall be used for all 1669 * Admin commands for NVMe over PCIe implementations. 1670 */ 1671 sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) != 0 && 1672 !nvme_qpair_is_admin_queue(qpair); 1673 mptr_sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED) != 0 && 1674 !nvme_qpair_is_admin_queue(qpair); 1675 1676 if (sgl_supported) { 1677 /* Don't use SGL for DSM command */ 1678 if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_NO_SGL_FOR_DSM) && 1679 (req->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT))) { 1680 sgl_supported = false; 1681 } 1682 } 1683 1684 if (sgl_supported && !(ctrlr->flags & SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT)) { 1685 dword_aligned = false; 1686 } 1687 1688 /* If we fail to build the request or the metadata, do not return the -EFAULT back up 1689 * the stack. This ensures that we always fail these types of requests via a 1690 * completion callback, and never in the context of the submission. 1691 */ 1692 rc = g_nvme_pcie_build_req_table[payload_type][sgl_supported](qpair, req, tr, dword_aligned); 1693 if (rc < 0) { 1694 assert(rc == -EFAULT); 1695 rc = 0; 1696 goto exit; 1697 } 1698 1699 rc = nvme_pcie_qpair_build_metadata(qpair, tr, sgl_supported, mptr_sgl_supported, dword_aligned); 1700 if (rc < 0) { 1701 assert(rc == -EFAULT); 1702 rc = 0; 1703 goto exit; 1704 } 1705 } 1706 1707 nvme_pcie_qpair_submit_tracker(qpair, tr); 1708 1709 exit: 1710 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 1711 nvme_ctrlr_unlock(ctrlr); 1712 } 1713 1714 return rc; 1715 } 1716 1717 struct spdk_nvme_transport_poll_group * 1718 nvme_pcie_poll_group_create(void) 1719 { 1720 struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group)); 1721 1722 if (group == NULL) { 1723 SPDK_ERRLOG("Unable to allocate poll group.\n"); 1724 return NULL; 1725 } 1726 1727 return &group->group; 1728 } 1729 1730 int 1731 nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 1732 { 1733 return 0; 1734 } 1735 1736 int 1737 nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 1738 { 1739 return 0; 1740 } 1741 1742 int 1743 nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 1744 struct spdk_nvme_qpair *qpair) 1745 { 1746 return 0; 1747 } 1748 1749 int 1750 nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 1751 struct spdk_nvme_qpair *qpair) 1752 { 1753 struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1754 1755 pqpair->stat = &g_dummy_stat; 1756 return 0; 1757 } 1758 1759 int64_t 1760 nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 1761 uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 1762 { 1763 struct spdk_nvme_qpair *qpair, *tmp_qpair; 1764 int32_t local_completions = 0; 1765 int64_t total_completions = 0; 1766 1767 STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 1768 disconnected_qpair_cb(qpair, tgroup->group->ctx); 1769 } 1770 1771 STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { 1772 local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair); 1773 if (spdk_unlikely(local_completions < 0)) { 1774 disconnected_qpair_cb(qpair, tgroup->group->ctx); 1775 total_completions = -ENXIO; 1776 } else if (spdk_likely(total_completions >= 0)) { 1777 total_completions += local_completions; 1778 } 1779 } 1780 1781 return total_completions; 1782 } 1783 1784 int 1785 nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 1786 { 1787 if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 1788 return -EBUSY; 1789 } 1790 1791 free(tgroup); 1792 1793 return 0; 1794 } 1795 1796 int 1797 nvme_pcie_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 1798 struct spdk_nvme_transport_poll_group_stat **_stats) 1799 { 1800 struct nvme_pcie_poll_group *group; 1801 struct spdk_nvme_transport_poll_group_stat *stats; 1802 1803 if (tgroup == NULL || _stats == NULL) { 1804 SPDK_ERRLOG("Invalid stats or group pointer\n"); 1805 return -EINVAL; 1806 } 1807 1808 stats = calloc(1, sizeof(*stats)); 1809 if (!stats) { 1810 SPDK_ERRLOG("Can't allocate memory for stats\n"); 1811 return -ENOMEM; 1812 } 1813 stats->trtype = SPDK_NVME_TRANSPORT_PCIE; 1814 group = SPDK_CONTAINEROF(tgroup, struct nvme_pcie_poll_group, group); 1815 memcpy(&stats->pcie, &group->stats, sizeof(group->stats)); 1816 1817 *_stats = stats; 1818 1819 return 0; 1820 } 1821 1822 void 1823 nvme_pcie_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 1824 struct spdk_nvme_transport_poll_group_stat *stats) 1825 { 1826 free(stats); 1827 } 1828 1829 SPDK_TRACE_REGISTER_FN(nvme_pcie, "nvme_pcie", TRACE_GROUP_NVME_PCIE) 1830 { 1831 struct spdk_trace_tpoint_opts opts[] = { 1832 { 1833 "NVME_PCIE_SUBMIT", TRACE_NVME_PCIE_SUBMIT, 1834 OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 1, 1835 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 1836 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 1837 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 1838 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 1839 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 1840 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 1841 { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 } 1842 } 1843 }, 1844 { 1845 "NVME_PCIE_COMPLETE", TRACE_NVME_PCIE_COMPLETE, 1846 OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 0, 1847 { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 1848 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 1849 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 1850 { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 } 1851 } 1852 }, 1853 }; 1854 1855 spdk_trace_register_object(OBJECT_NVME_PCIE_REQ, 'p'); 1856 spdk_trace_register_owner_type(OWNER_TYPE_NVME_PCIE_QP, 'q'); 1857 spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 1858 } 1859