1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2021 Intel Corporation. All rights reserved. 3558be98fSAlexey Marchuk * Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved. 4ea2db5bbSShuhei Matsumoto * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 56b4b2d29SChangpeng Liu */ 66b4b2d29SChangpeng Liu 76b4b2d29SChangpeng Liu /* 86b4b2d29SChangpeng Liu * NVMe over PCIe common library 96b4b2d29SChangpeng Liu */ 106b4b2d29SChangpeng Liu 116b4b2d29SChangpeng Liu #include "spdk/stdinc.h" 126b4b2d29SChangpeng Liu #include "spdk/likely.h" 136b4b2d29SChangpeng Liu #include "spdk/string.h" 146b4b2d29SChangpeng Liu #include "nvme_internal.h" 156b4b2d29SChangpeng Liu #include "nvme_pcie_internal.h" 16971f07b9SJim Harris #include "spdk/trace.h" 17971f07b9SJim Harris 18c37e776eSKrzysztof Karas #include "spdk_internal/trace_defs.h" 196b4b2d29SChangpeng Liu 208b203d19SChangpeng Liu __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; 218b203d19SChangpeng Liu 22ea2db5bbSShuhei Matsumoto static struct spdk_nvme_pcie_stat g_dummy_stat = {}; 23ea2db5bbSShuhei Matsumoto 248dd1cd21SBen Walker static void nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, 258dd1cd21SBen Walker struct nvme_tracker *tr); 2615b7d3baSJim Harris 275e4fe0adSChangpeng Liu static inline uint64_t 285e4fe0adSChangpeng Liu nvme_pcie_vtophys(struct spdk_nvme_ctrlr *ctrlr, const void *buf, uint64_t *size) 291b0ec0b7SChangpeng Liu { 301b0ec0b7SChangpeng Liu if (spdk_likely(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) { 315e4fe0adSChangpeng Liu return spdk_vtophys(buf, size); 321b0ec0b7SChangpeng Liu } else { 331b0ec0b7SChangpeng Liu /* vfio-user address translation with IOVA=VA mode */ 341b0ec0b7SChangpeng Liu return (uint64_t)(uintptr_t)buf; 351b0ec0b7SChangpeng Liu } 361b0ec0b7SChangpeng Liu } 371b0ec0b7SChangpeng Liu 386b4b2d29SChangpeng Liu int 396b4b2d29SChangpeng Liu nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) 406b4b2d29SChangpeng Liu { 416b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 426b4b2d29SChangpeng Liu uint32_t i; 436b4b2d29SChangpeng Liu 446b4b2d29SChangpeng Liu /* all head/tail vals are set to 0 */ 456b4b2d29SChangpeng Liu pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0; 466b4b2d29SChangpeng Liu 476b4b2d29SChangpeng Liu /* 486b4b2d29SChangpeng Liu * First time through the completion queue, HW will set phase 496b4b2d29SChangpeng Liu * bit on completions to 1. So set this to 1 here, indicating 506b4b2d29SChangpeng Liu * we're looking for a 1 to know which entries have completed. 516b4b2d29SChangpeng Liu * we'll toggle the bit each time when the completion queue 526b4b2d29SChangpeng Liu * rolls over. 536b4b2d29SChangpeng Liu */ 546b4b2d29SChangpeng Liu pqpair->flags.phase = 1; 556b4b2d29SChangpeng Liu for (i = 0; i < pqpair->num_entries; i++) { 566b4b2d29SChangpeng Liu pqpair->cpl[i].status.p = 0; 576b4b2d29SChangpeng Liu } 586b4b2d29SChangpeng Liu 596b4b2d29SChangpeng Liu return 0; 606b4b2d29SChangpeng Liu } 616b4b2d29SChangpeng Liu 6228b353a5SAnkit Kumar int 6328b353a5SAnkit Kumar nvme_pcie_qpair_get_fd(struct spdk_nvme_qpair *qpair, struct spdk_event_handler_opts *opts) 6428b353a5SAnkit Kumar { 6528b353a5SAnkit Kumar struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 6628b353a5SAnkit Kumar struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr); 6728b353a5SAnkit Kumar 6828b353a5SAnkit Kumar assert(devhandle != NULL); 6928b353a5SAnkit Kumar if (!ctrlr->opts.enable_interrupts) { 7028b353a5SAnkit Kumar return -1; 7128b353a5SAnkit Kumar } 7228b353a5SAnkit Kumar 7328b353a5SAnkit Kumar if (!opts) { 7428b353a5SAnkit Kumar return spdk_pci_device_get_interrupt_efd_by_index(devhandle, qpair->id); 7528b353a5SAnkit Kumar } 7628b353a5SAnkit Kumar 7728b353a5SAnkit Kumar if (!SPDK_FIELD_VALID(opts, fd_type, opts->opts_size)) { 7828b353a5SAnkit Kumar return -EINVAL; 7928b353a5SAnkit Kumar } 8028b353a5SAnkit Kumar 8128b353a5SAnkit Kumar spdk_fd_group_get_default_event_handler_opts(opts, opts->opts_size); 8228b353a5SAnkit Kumar opts->fd_type = SPDK_FD_TYPE_EVENTFD; 8328b353a5SAnkit Kumar 8428b353a5SAnkit Kumar return spdk_pci_device_get_interrupt_efd_by_index(devhandle, qpair->id); 8528b353a5SAnkit Kumar } 8628b353a5SAnkit Kumar 876b4b2d29SChangpeng Liu static void 886b4b2d29SChangpeng Liu nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr) 896b4b2d29SChangpeng Liu { 906b4b2d29SChangpeng Liu tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); 916b4b2d29SChangpeng Liu tr->cid = cid; 926b4b2d29SChangpeng Liu tr->req = NULL; 936b4b2d29SChangpeng Liu } 946b4b2d29SChangpeng Liu 956b4b2d29SChangpeng Liu static void * 966b4b2d29SChangpeng Liu nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment, 976b4b2d29SChangpeng Liu uint64_t *phys_addr) 986b4b2d29SChangpeng Liu { 996b4b2d29SChangpeng Liu struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 1006b4b2d29SChangpeng Liu uintptr_t addr; 1016b4b2d29SChangpeng Liu 1026b4b2d29SChangpeng Liu if (pctrlr->cmb.mem_register_addr != NULL) { 1036b4b2d29SChangpeng Liu /* BAR is mapped for data */ 1046b4b2d29SChangpeng Liu return NULL; 1056b4b2d29SChangpeng Liu } 1066b4b2d29SChangpeng Liu 1076b4b2d29SChangpeng Liu addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset; 1086b4b2d29SChangpeng Liu addr = (addr + (alignment - 1)) & ~(alignment - 1); 1096b4b2d29SChangpeng Liu 1106b4b2d29SChangpeng Liu /* CMB may only consume part of the BAR, calculate accordingly */ 1116b4b2d29SChangpeng Liu if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) { 1126b4b2d29SChangpeng Liu SPDK_ERRLOG("Tried to allocate past valid CMB range!\n"); 1136b4b2d29SChangpeng Liu return NULL; 1146b4b2d29SChangpeng Liu } 1156b4b2d29SChangpeng Liu *phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va; 1166b4b2d29SChangpeng Liu 1176b4b2d29SChangpeng Liu pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va; 1186b4b2d29SChangpeng Liu 1196b4b2d29SChangpeng Liu return (void *)addr; 1206b4b2d29SChangpeng Liu } 1216b4b2d29SChangpeng Liu 1226b4b2d29SChangpeng Liu int 1236b4b2d29SChangpeng Liu nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair, 1246b4b2d29SChangpeng Liu const struct spdk_nvme_io_qpair_opts *opts) 1256b4b2d29SChangpeng Liu { 1266b4b2d29SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1276b4b2d29SChangpeng Liu struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 1286b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1296b4b2d29SChangpeng Liu struct nvme_tracker *tr; 1306b4b2d29SChangpeng Liu uint16_t i; 1316b4b2d29SChangpeng Liu uint16_t num_trackers; 1326b4b2d29SChangpeng Liu size_t page_align = sysconf(_SC_PAGESIZE); 1336b4b2d29SChangpeng Liu size_t queue_align, queue_len; 1346b4b2d29SChangpeng Liu uint32_t flags = SPDK_MALLOC_DMA; 135fcc1652cSJim Harris int32_t numa_id; 1366b4b2d29SChangpeng Liu uint64_t sq_paddr = 0; 1376b4b2d29SChangpeng Liu uint64_t cq_paddr = 0; 1386b4b2d29SChangpeng Liu 1396b4b2d29SChangpeng Liu if (opts) { 1406b4b2d29SChangpeng Liu pqpair->sq_vaddr = opts->sq.vaddr; 1416b4b2d29SChangpeng Liu pqpair->cq_vaddr = opts->cq.vaddr; 142e431ba2eSVincent Fu pqpair->flags.disable_pcie_sgl_merge = opts->disable_pcie_sgl_merge; 1436b4b2d29SChangpeng Liu sq_paddr = opts->sq.paddr; 1446b4b2d29SChangpeng Liu cq_paddr = opts->cq.paddr; 1456b4b2d29SChangpeng Liu } 1466b4b2d29SChangpeng Liu 1476b4b2d29SChangpeng Liu pqpair->retry_count = ctrlr->opts.transport_retry_count; 1486b4b2d29SChangpeng Liu 1496b4b2d29SChangpeng Liu /* 1506b4b2d29SChangpeng Liu * Limit the maximum number of completions to return per call to prevent wraparound, 1516b4b2d29SChangpeng Liu * and calculate how many trackers can be submitted at once without overflowing the 1526b4b2d29SChangpeng Liu * completion queue. 1536b4b2d29SChangpeng Liu */ 1546b4b2d29SChangpeng Liu pqpair->max_completions_cap = pqpair->num_entries / 4; 1556b4b2d29SChangpeng Liu pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS); 1566b4b2d29SChangpeng Liu pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS); 1576b4b2d29SChangpeng Liu num_trackers = pqpair->num_entries - pqpair->max_completions_cap; 1586b4b2d29SChangpeng Liu 1596b4b2d29SChangpeng Liu SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n", 1606b4b2d29SChangpeng Liu pqpair->max_completions_cap, num_trackers); 1616b4b2d29SChangpeng Liu 1626b4b2d29SChangpeng Liu assert(num_trackers != 0); 1636b4b2d29SChangpeng Liu 1646b4b2d29SChangpeng Liu pqpair->sq_in_cmb = false; 1656b4b2d29SChangpeng Liu 1666b4b2d29SChangpeng Liu if (nvme_qpair_is_admin_queue(&pqpair->qpair)) { 1676b4b2d29SChangpeng Liu flags |= SPDK_MALLOC_SHARE; 1686b4b2d29SChangpeng Liu } 1696b4b2d29SChangpeng Liu 1706b4b2d29SChangpeng Liu /* cmd and cpl rings must be aligned on page size boundaries. */ 1716b4b2d29SChangpeng Liu if (ctrlr->opts.use_cmb_sqs) { 1726b4b2d29SChangpeng Liu pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd), 1736b4b2d29SChangpeng Liu page_align, &pqpair->cmd_bus_addr); 1746b4b2d29SChangpeng Liu if (pqpair->cmd != NULL) { 1756b4b2d29SChangpeng Liu pqpair->sq_in_cmb = true; 1766b4b2d29SChangpeng Liu } 1776b4b2d29SChangpeng Liu } 1786b4b2d29SChangpeng Liu 1796b4b2d29SChangpeng Liu if (pqpair->sq_in_cmb == false) { 1806b4b2d29SChangpeng Liu if (pqpair->sq_vaddr) { 1816b4b2d29SChangpeng Liu pqpair->cmd = pqpair->sq_vaddr; 1826b4b2d29SChangpeng Liu } else { 1836b4b2d29SChangpeng Liu /* To ensure physical address contiguity we make each ring occupy 1846b4b2d29SChangpeng Liu * a single hugepage only. See MAX_IO_QUEUE_ENTRIES. 1856b4b2d29SChangpeng Liu */ 1866b4b2d29SChangpeng Liu queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd); 1876b4b2d29SChangpeng Liu queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); 188186b109dSJim Harris pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_NUMA_ID_ANY, flags); 1896b4b2d29SChangpeng Liu if (pqpair->cmd == NULL) { 1906b4b2d29SChangpeng Liu SPDK_ERRLOG("alloc qpair_cmd failed\n"); 1916b4b2d29SChangpeng Liu return -ENOMEM; 1926b4b2d29SChangpeng Liu } 1936b4b2d29SChangpeng Liu } 1946b4b2d29SChangpeng Liu if (sq_paddr) { 1956b4b2d29SChangpeng Liu assert(pqpair->sq_vaddr != NULL); 1966b4b2d29SChangpeng Liu pqpair->cmd_bus_addr = sq_paddr; 1976b4b2d29SChangpeng Liu } else { 1985e4fe0adSChangpeng Liu pqpair->cmd_bus_addr = nvme_pcie_vtophys(ctrlr, pqpair->cmd, NULL); 1996b4b2d29SChangpeng Liu if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) { 2006b4b2d29SChangpeng Liu SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n"); 2016b4b2d29SChangpeng Liu return -EFAULT; 2026b4b2d29SChangpeng Liu } 2036b4b2d29SChangpeng Liu } 2046b4b2d29SChangpeng Liu } 2056b4b2d29SChangpeng Liu 2066b4b2d29SChangpeng Liu if (pqpair->cq_vaddr) { 2076b4b2d29SChangpeng Liu pqpair->cpl = pqpair->cq_vaddr; 2086b4b2d29SChangpeng Liu } else { 2096b4b2d29SChangpeng Liu queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl); 2106b4b2d29SChangpeng Liu queue_align = spdk_max(spdk_align32pow2(queue_len), page_align); 211fcc1652cSJim Harris numa_id = spdk_nvme_ctrlr_get_numa_id(ctrlr); 212fcc1652cSJim Harris pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, numa_id, flags); 2136b4b2d29SChangpeng Liu if (pqpair->cpl == NULL) { 2146b4b2d29SChangpeng Liu SPDK_ERRLOG("alloc qpair_cpl failed\n"); 2156b4b2d29SChangpeng Liu return -ENOMEM; 2166b4b2d29SChangpeng Liu } 2176b4b2d29SChangpeng Liu } 2186b4b2d29SChangpeng Liu if (cq_paddr) { 2196b4b2d29SChangpeng Liu assert(pqpair->cq_vaddr != NULL); 2206b4b2d29SChangpeng Liu pqpair->cpl_bus_addr = cq_paddr; 2216b4b2d29SChangpeng Liu } else { 2225e4fe0adSChangpeng Liu pqpair->cpl_bus_addr = nvme_pcie_vtophys(ctrlr, pqpair->cpl, NULL); 2236b4b2d29SChangpeng Liu if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) { 2246b4b2d29SChangpeng Liu SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n"); 2256b4b2d29SChangpeng Liu return -EFAULT; 2266b4b2d29SChangpeng Liu } 2276b4b2d29SChangpeng Liu } 2286b4b2d29SChangpeng Liu 229349895a2SChangpeng Liu pqpair->sq_tdbl = pctrlr->doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32; 230349895a2SChangpeng Liu pqpair->cq_hdbl = pctrlr->doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32; 2316b4b2d29SChangpeng Liu 2326b4b2d29SChangpeng Liu /* 2336b4b2d29SChangpeng Liu * Reserve space for all of the trackers in a single allocation. 2346b4b2d29SChangpeng Liu * struct nvme_tracker must be padded so that its size is already a power of 2. 2356b4b2d29SChangpeng Liu * This ensures the PRP list embedded in the nvme_tracker object will not span a 2366b4b2d29SChangpeng Liu * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing. 2376b4b2d29SChangpeng Liu */ 2386b4b2d29SChangpeng Liu pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL, 239186b109dSJim Harris SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE); 2406b4b2d29SChangpeng Liu if (pqpair->tr == NULL) { 2416b4b2d29SChangpeng Liu SPDK_ERRLOG("nvme_tr failed\n"); 2426b4b2d29SChangpeng Liu return -ENOMEM; 2436b4b2d29SChangpeng Liu } 2446b4b2d29SChangpeng Liu 2456b4b2d29SChangpeng Liu TAILQ_INIT(&pqpair->free_tr); 2466b4b2d29SChangpeng Liu TAILQ_INIT(&pqpair->outstanding_tr); 24788739040SAtul Malakar pqpair->qpair.queue_depth = 0; 2486b4b2d29SChangpeng Liu 2496b4b2d29SChangpeng Liu for (i = 0; i < num_trackers; i++) { 2506b4b2d29SChangpeng Liu tr = &pqpair->tr[i]; 2515e4fe0adSChangpeng Liu nvme_qpair_construct_tracker(tr, i, nvme_pcie_vtophys(ctrlr, tr, NULL)); 2526b4b2d29SChangpeng Liu TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); 2536b4b2d29SChangpeng Liu } 2546b4b2d29SChangpeng Liu 2556b4b2d29SChangpeng Liu nvme_pcie_qpair_reset(qpair); 2566b4b2d29SChangpeng Liu 2576b4b2d29SChangpeng Liu return 0; 2586b4b2d29SChangpeng Liu } 2596b4b2d29SChangpeng Liu 2606b4b2d29SChangpeng Liu int 2616b4b2d29SChangpeng Liu nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries) 2626b4b2d29SChangpeng Liu { 2636b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair; 2646b4b2d29SChangpeng Liu int rc; 2656b4b2d29SChangpeng Liu 266186b109dSJim Harris pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE); 2676b4b2d29SChangpeng Liu if (pqpair == NULL) { 2686b4b2d29SChangpeng Liu return -ENOMEM; 2696b4b2d29SChangpeng Liu } 2706b4b2d29SChangpeng Liu 2716b4b2d29SChangpeng Liu pqpair->num_entries = num_entries; 2726b4b2d29SChangpeng Liu pqpair->flags.delay_cmd_submit = 0; 273455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_READY; 2746b4b2d29SChangpeng Liu 2756b4b2d29SChangpeng Liu ctrlr->adminq = &pqpair->qpair; 2766b4b2d29SChangpeng Liu 2776b4b2d29SChangpeng Liu rc = nvme_qpair_init(ctrlr->adminq, 2786b4b2d29SChangpeng Liu 0, /* qpair ID */ 2796b4b2d29SChangpeng Liu ctrlr, 2806b4b2d29SChangpeng Liu SPDK_NVME_QPRIO_URGENT, 281771f65bbSMonica Kenguva num_entries, 282771f65bbSMonica Kenguva false); 2836b4b2d29SChangpeng Liu if (rc != 0) { 2846b4b2d29SChangpeng Liu return rc; 2856b4b2d29SChangpeng Liu } 2866b4b2d29SChangpeng Liu 287186b109dSJim Harris pqpair->stat = spdk_zmalloc(sizeof(*pqpair->stat), 64, NULL, SPDK_ENV_NUMA_ID_ANY, 288558be98fSAlexey Marchuk SPDK_MALLOC_SHARE); 289558be98fSAlexey Marchuk if (!pqpair->stat) { 290558be98fSAlexey Marchuk SPDK_ERRLOG("Failed to allocate admin qpair statistics\n"); 291558be98fSAlexey Marchuk return -ENOMEM; 292558be98fSAlexey Marchuk } 293558be98fSAlexey Marchuk 2946b4b2d29SChangpeng Liu return nvme_pcie_qpair_construct(ctrlr->adminq, NULL); 2956b4b2d29SChangpeng Liu } 2966b4b2d29SChangpeng Liu 2976b4b2d29SChangpeng Liu /** 2986b4b2d29SChangpeng Liu * Note: the ctrlr_lock must be held when calling this function. 2996b4b2d29SChangpeng Liu */ 3006b4b2d29SChangpeng Liu void 3016b4b2d29SChangpeng Liu nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, 3026b4b2d29SChangpeng Liu struct nvme_request *req, struct spdk_nvme_cpl *cpl) 3036b4b2d29SChangpeng Liu { 3046b4b2d29SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 3056b4b2d29SChangpeng Liu struct nvme_request *active_req = req; 3066b4b2d29SChangpeng Liu struct spdk_nvme_ctrlr_process *active_proc; 3076b4b2d29SChangpeng Liu 3086b4b2d29SChangpeng Liu /* 3096b4b2d29SChangpeng Liu * The admin request is from another process. Move to the per 3106b4b2d29SChangpeng Liu * process list for that process to handle it later. 3116b4b2d29SChangpeng Liu */ 3126b4b2d29SChangpeng Liu assert(nvme_qpair_is_admin_queue(qpair)); 3136b4b2d29SChangpeng Liu assert(active_req->pid != getpid()); 3146b4b2d29SChangpeng Liu 3156b4b2d29SChangpeng Liu active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid); 3166b4b2d29SChangpeng Liu if (active_proc) { 3176b4b2d29SChangpeng Liu /* Save the original completion information */ 3186b4b2d29SChangpeng Liu memcpy(&active_req->cpl, cpl, sizeof(*cpl)); 3196b4b2d29SChangpeng Liu STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); 3206b4b2d29SChangpeng Liu } else { 3216b4b2d29SChangpeng Liu SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n", 3226b4b2d29SChangpeng Liu active_req->pid); 3234b1aa5daSDeepak Abraham Tom nvme_cleanup_user_req(active_req); 3246b4b2d29SChangpeng Liu nvme_free_request(active_req); 3256b4b2d29SChangpeng Liu } 3266b4b2d29SChangpeng Liu } 3276b4b2d29SChangpeng Liu 3286b4b2d29SChangpeng Liu /** 3296b4b2d29SChangpeng Liu * Note: the ctrlr_lock must be held when calling this function. 3306b4b2d29SChangpeng Liu */ 3316b4b2d29SChangpeng Liu void 3326b4b2d29SChangpeng Liu nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) 3336b4b2d29SChangpeng Liu { 3346b4b2d29SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 3356b4b2d29SChangpeng Liu struct nvme_request *req, *tmp_req; 3366b4b2d29SChangpeng Liu pid_t pid = getpid(); 3376b4b2d29SChangpeng Liu struct spdk_nvme_ctrlr_process *proc; 3386b4b2d29SChangpeng Liu 3396b4b2d29SChangpeng Liu /* 3406b4b2d29SChangpeng Liu * Check whether there is any pending admin request from 3416b4b2d29SChangpeng Liu * other active processes. 3426b4b2d29SChangpeng Liu */ 3436b4b2d29SChangpeng Liu assert(nvme_qpair_is_admin_queue(qpair)); 3446b4b2d29SChangpeng Liu 3456b4b2d29SChangpeng Liu proc = nvme_ctrlr_get_current_process(ctrlr); 3466b4b2d29SChangpeng Liu if (!proc) { 3476b4b2d29SChangpeng Liu SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid); 3486b4b2d29SChangpeng Liu assert(proc); 3496b4b2d29SChangpeng Liu return; 3506b4b2d29SChangpeng Liu } 3516b4b2d29SChangpeng Liu 3526b4b2d29SChangpeng Liu STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { 3536b4b2d29SChangpeng Liu STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); 3546b4b2d29SChangpeng Liu 3556b4b2d29SChangpeng Liu assert(req->pid == pid); 3566b4b2d29SChangpeng Liu 3576b4b2d29SChangpeng Liu nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl); 3586b4b2d29SChangpeng Liu } 3596b4b2d29SChangpeng Liu } 3606b4b2d29SChangpeng Liu 3616b4b2d29SChangpeng Liu int 3626b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, 3636b4b2d29SChangpeng Liu struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, 3646b4b2d29SChangpeng Liu void *cb_arg) 3656b4b2d29SChangpeng Liu { 3666b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); 3676b4b2d29SChangpeng Liu struct nvme_request *req; 3686b4b2d29SChangpeng Liu struct spdk_nvme_cmd *cmd; 3693ab7a1f6SAnkit Kumar bool ien = ctrlr->opts.enable_interrupts; 3706b4b2d29SChangpeng Liu 3716b4b2d29SChangpeng Liu req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 3726b4b2d29SChangpeng Liu if (req == NULL) { 3736b4b2d29SChangpeng Liu return -ENOMEM; 3746b4b2d29SChangpeng Liu } 3756b4b2d29SChangpeng Liu 3766b4b2d29SChangpeng Liu cmd = &req->cmd; 3776b4b2d29SChangpeng Liu cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ; 3786b4b2d29SChangpeng Liu 3796b4b2d29SChangpeng Liu cmd->cdw10_bits.create_io_q.qid = io_que->id; 3806b4b2d29SChangpeng Liu cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; 3816b4b2d29SChangpeng Liu 3826b4b2d29SChangpeng Liu cmd->cdw11_bits.create_io_cq.pc = 1; 3833ab7a1f6SAnkit Kumar if (ien) { 3843ab7a1f6SAnkit Kumar cmd->cdw11_bits.create_io_cq.ien = 1; 3853ab7a1f6SAnkit Kumar /* The interrupt vector offset starts from 1. We directly map the 3863ab7a1f6SAnkit Kumar * queue id to interrupt vector. 3873ab7a1f6SAnkit Kumar */ 3883ab7a1f6SAnkit Kumar cmd->cdw11_bits.create_io_cq.iv = io_que->id; 3893ab7a1f6SAnkit Kumar } 3903ab7a1f6SAnkit Kumar 3916b4b2d29SChangpeng Liu cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr; 3926b4b2d29SChangpeng Liu 3936b4b2d29SChangpeng Liu return nvme_ctrlr_submit_admin_request(ctrlr, req); 3946b4b2d29SChangpeng Liu } 3956b4b2d29SChangpeng Liu 3966b4b2d29SChangpeng Liu int 3976b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, 3986b4b2d29SChangpeng Liu struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg) 3996b4b2d29SChangpeng Liu { 4006b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); 4016b4b2d29SChangpeng Liu struct nvme_request *req; 4026b4b2d29SChangpeng Liu struct spdk_nvme_cmd *cmd; 4036b4b2d29SChangpeng Liu 4046b4b2d29SChangpeng Liu req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 4056b4b2d29SChangpeng Liu if (req == NULL) { 4066b4b2d29SChangpeng Liu return -ENOMEM; 4076b4b2d29SChangpeng Liu } 4086b4b2d29SChangpeng Liu 4096b4b2d29SChangpeng Liu cmd = &req->cmd; 4106b4b2d29SChangpeng Liu cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ; 4116b4b2d29SChangpeng Liu 4126b4b2d29SChangpeng Liu cmd->cdw10_bits.create_io_q.qid = io_que->id; 4136b4b2d29SChangpeng Liu cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1; 4146b4b2d29SChangpeng Liu cmd->cdw11_bits.create_io_sq.pc = 1; 4156b4b2d29SChangpeng Liu cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio; 4166b4b2d29SChangpeng Liu cmd->cdw11_bits.create_io_sq.cqid = io_que->id; 4176b4b2d29SChangpeng Liu cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr; 4186b4b2d29SChangpeng Liu 4196b4b2d29SChangpeng Liu return nvme_ctrlr_submit_admin_request(ctrlr, req); 4206b4b2d29SChangpeng Liu } 4216b4b2d29SChangpeng Liu 4226b4b2d29SChangpeng Liu int 4236b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 4246b4b2d29SChangpeng Liu spdk_nvme_cmd_cb cb_fn, void *cb_arg) 4256b4b2d29SChangpeng Liu { 4266b4b2d29SChangpeng Liu struct nvme_request *req; 4276b4b2d29SChangpeng Liu struct spdk_nvme_cmd *cmd; 4286b4b2d29SChangpeng Liu 4296b4b2d29SChangpeng Liu req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 4306b4b2d29SChangpeng Liu if (req == NULL) { 4316b4b2d29SChangpeng Liu return -ENOMEM; 4326b4b2d29SChangpeng Liu } 4336b4b2d29SChangpeng Liu 4346b4b2d29SChangpeng Liu cmd = &req->cmd; 4356b4b2d29SChangpeng Liu cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ; 4366b4b2d29SChangpeng Liu cmd->cdw10_bits.delete_io_q.qid = qpair->id; 4376b4b2d29SChangpeng Liu 4386b4b2d29SChangpeng Liu return nvme_ctrlr_submit_admin_request(ctrlr, req); 4396b4b2d29SChangpeng Liu } 4406b4b2d29SChangpeng Liu 4416b4b2d29SChangpeng Liu int 4426b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 4436b4b2d29SChangpeng Liu spdk_nvme_cmd_cb cb_fn, void *cb_arg) 4446b4b2d29SChangpeng Liu { 4456b4b2d29SChangpeng Liu struct nvme_request *req; 4466b4b2d29SChangpeng Liu struct spdk_nvme_cmd *cmd; 4476b4b2d29SChangpeng Liu 4486b4b2d29SChangpeng Liu req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); 4496b4b2d29SChangpeng Liu if (req == NULL) { 4506b4b2d29SChangpeng Liu return -ENOMEM; 4516b4b2d29SChangpeng Liu } 4526b4b2d29SChangpeng Liu 4536b4b2d29SChangpeng Liu cmd = &req->cmd; 4546b4b2d29SChangpeng Liu cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ; 4556b4b2d29SChangpeng Liu cmd->cdw10_bits.delete_io_q.qid = qpair->id; 4566b4b2d29SChangpeng Liu 4576b4b2d29SChangpeng Liu return nvme_ctrlr_submit_admin_request(ctrlr, req); 4586b4b2d29SChangpeng Liu } 4596b4b2d29SChangpeng Liu 460455a5d78SMonica Kenguva static void 461455a5d78SMonica Kenguva nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 4626b4b2d29SChangpeng Liu { 463455a5d78SMonica Kenguva struct spdk_nvme_qpair *qpair = arg; 4646b4b2d29SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 465455a5d78SMonica Kenguva 466455a5d78SMonica Kenguva if (spdk_nvme_cpl_is_error(cpl)) { 467455a5d78SMonica Kenguva SPDK_ERRLOG("delete_io_cq failed!\n"); 468455a5d78SMonica Kenguva } 469455a5d78SMonica Kenguva 470455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 471455a5d78SMonica Kenguva } 472455a5d78SMonica Kenguva 473455a5d78SMonica Kenguva static void 474455a5d78SMonica Kenguva nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 475455a5d78SMonica Kenguva { 476455a5d78SMonica Kenguva struct spdk_nvme_qpair *qpair = arg; 477455a5d78SMonica Kenguva struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 478455a5d78SMonica Kenguva struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 479455a5d78SMonica Kenguva struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); 4806b4b2d29SChangpeng Liu int rc; 4816b4b2d29SChangpeng Liu 4823cb9bc25SJim Harris if (pqpair->flags.defer_destruction) { 4833cb9bc25SJim Harris /* This qpair was deleted by the application while the 4843cb9bc25SJim Harris * connection was still in progress. We had to wait 4853cb9bc25SJim Harris * to free the qpair resources until this outstanding 4863cb9bc25SJim Harris * command was completed. Now that we have the completion 4873cb9bc25SJim Harris * free it now. 4883cb9bc25SJim Harris */ 4893cb9bc25SJim Harris nvme_pcie_qpair_destroy(qpair); 4903cb9bc25SJim Harris return; 4913cb9bc25SJim Harris } 4923cb9bc25SJim Harris 493455a5d78SMonica Kenguva if (spdk_nvme_cpl_is_error(cpl)) { 494455a5d78SMonica Kenguva SPDK_ERRLOG("nvme_create_io_sq failed, deleting cq!\n"); 495455a5d78SMonica Kenguva rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb, 496455a5d78SMonica Kenguva qpair); 4976b4b2d29SChangpeng Liu if (rc != 0) { 498455a5d78SMonica Kenguva SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 499455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 5006b4b2d29SChangpeng Liu } 501455a5d78SMonica Kenguva return; 5026b4b2d29SChangpeng Liu } 503455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_READY; 5046b4b2d29SChangpeng Liu if (ctrlr->shadow_doorbell) { 5056b4b2d29SChangpeng Liu pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * 5066b4b2d29SChangpeng Liu pctrlr->doorbell_stride_u32; 5076b4b2d29SChangpeng Liu pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * 5086b4b2d29SChangpeng Liu pctrlr->doorbell_stride_u32; 5096b4b2d29SChangpeng Liu pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * 5106b4b2d29SChangpeng Liu pctrlr->doorbell_stride_u32; 5116b4b2d29SChangpeng Liu pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * 5126b4b2d29SChangpeng Liu pctrlr->doorbell_stride_u32; 5136b4b2d29SChangpeng Liu pqpair->flags.has_shadow_doorbell = 1; 5146b4b2d29SChangpeng Liu } else { 5156b4b2d29SChangpeng Liu pqpair->flags.has_shadow_doorbell = 0; 5166b4b2d29SChangpeng Liu } 5176b4b2d29SChangpeng Liu nvme_pcie_qpair_reset(qpair); 5186b4b2d29SChangpeng Liu 519455a5d78SMonica Kenguva } 520455a5d78SMonica Kenguva 521455a5d78SMonica Kenguva static void 522455a5d78SMonica Kenguva nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl) 523455a5d78SMonica Kenguva { 524455a5d78SMonica Kenguva struct spdk_nvme_qpair *qpair = arg; 525455a5d78SMonica Kenguva struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 526455a5d78SMonica Kenguva int rc; 527455a5d78SMonica Kenguva 5283cb9bc25SJim Harris if (pqpair->flags.defer_destruction) { 5293cb9bc25SJim Harris /* This qpair was deleted by the application while the 5303cb9bc25SJim Harris * connection was still in progress. We had to wait 5313cb9bc25SJim Harris * to free the qpair resources until this outstanding 5323cb9bc25SJim Harris * command was completed. Now that we have the completion 5333cb9bc25SJim Harris * free it now. 5343cb9bc25SJim Harris */ 5353cb9bc25SJim Harris nvme_pcie_qpair_destroy(qpair); 5363cb9bc25SJim Harris return; 5373cb9bc25SJim Harris } 5383cb9bc25SJim Harris 539455a5d78SMonica Kenguva if (spdk_nvme_cpl_is_error(cpl)) { 540455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 541455a5d78SMonica Kenguva SPDK_ERRLOG("nvme_create_io_cq failed!\n"); 542455a5d78SMonica Kenguva return; 543455a5d78SMonica Kenguva } 544455a5d78SMonica Kenguva 545455a5d78SMonica Kenguva rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_create_sq_cb, qpair); 546455a5d78SMonica Kenguva 547455a5d78SMonica Kenguva if (rc != 0) { 548455a5d78SMonica Kenguva SPDK_ERRLOG("Failed to send request to create_io_sq, deleting cq!\n"); 549455a5d78SMonica Kenguva rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb, 550455a5d78SMonica Kenguva qpair); 551455a5d78SMonica Kenguva if (rc != 0) { 552455a5d78SMonica Kenguva SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 553455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED; 554455a5d78SMonica Kenguva } 555455a5d78SMonica Kenguva return; 556455a5d78SMonica Kenguva } 557455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_SQ; 558455a5d78SMonica Kenguva } 559455a5d78SMonica Kenguva 560455a5d78SMonica Kenguva static int 561455a5d78SMonica Kenguva _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, 562455a5d78SMonica Kenguva uint16_t qid) 563455a5d78SMonica Kenguva { 564455a5d78SMonica Kenguva struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 565455a5d78SMonica Kenguva int rc; 566455a5d78SMonica Kenguva 567455a5d78SMonica Kenguva /* Statistics may already be allocated in the case of controller reset */ 568455a5d78SMonica Kenguva if (qpair->poll_group) { 569455a5d78SMonica Kenguva struct nvme_pcie_poll_group *group = SPDK_CONTAINEROF(qpair->poll_group, 570455a5d78SMonica Kenguva struct nvme_pcie_poll_group, group); 571455a5d78SMonica Kenguva 572455a5d78SMonica Kenguva pqpair->stat = &group->stats; 573455a5d78SMonica Kenguva pqpair->shared_stats = true; 574455a5d78SMonica Kenguva } else { 575df4600f4SRichael Zhuang if (pqpair->stat == NULL) { 576455a5d78SMonica Kenguva pqpair->stat = calloc(1, sizeof(*pqpair->stat)); 577455a5d78SMonica Kenguva if (!pqpair->stat) { 578455a5d78SMonica Kenguva SPDK_ERRLOG("Failed to allocate qpair statistics\n"); 579455a5d78SMonica Kenguva nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 580455a5d78SMonica Kenguva return -ENOMEM; 581455a5d78SMonica Kenguva } 582455a5d78SMonica Kenguva } 583455a5d78SMonica Kenguva } 584455a5d78SMonica Kenguva 585455a5d78SMonica Kenguva rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_create_cq_cb, qpair); 586455a5d78SMonica Kenguva 587455a5d78SMonica Kenguva if (rc != 0) { 588455a5d78SMonica Kenguva SPDK_ERRLOG("Failed to send request to create_io_cq\n"); 589455a5d78SMonica Kenguva nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 590455a5d78SMonica Kenguva return rc; 591455a5d78SMonica Kenguva } 592455a5d78SMonica Kenguva pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_CQ; 5936b4b2d29SChangpeng Liu return 0; 5946b4b2d29SChangpeng Liu } 5956b4b2d29SChangpeng Liu 5966b4b2d29SChangpeng Liu int 5976b4b2d29SChangpeng Liu nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 5986b4b2d29SChangpeng Liu { 599ea0aaf5eSBen Walker int rc = 0; 600ea0aaf5eSBen Walker 601ea0aaf5eSBen Walker if (!nvme_qpair_is_admin_queue(qpair)) { 602ea0aaf5eSBen Walker rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id); 603455a5d78SMonica Kenguva } else { 604ea0aaf5eSBen Walker nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 605ea0aaf5eSBen Walker } 606ea0aaf5eSBen Walker 607ea0aaf5eSBen Walker return rc; 6086b4b2d29SChangpeng Liu } 6096b4b2d29SChangpeng Liu 6106b4b2d29SChangpeng Liu void 6116b4b2d29SChangpeng Liu nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 6126b4b2d29SChangpeng Liu { 613736b9da0SShuhei Matsumoto if (!nvme_qpair_is_admin_queue(qpair) || !ctrlr->is_disconnecting) { 614cfe11bd1SShuhei Matsumoto nvme_transport_ctrlr_disconnect_qpair_done(qpair); 615736b9da0SShuhei Matsumoto } else { 616736b9da0SShuhei Matsumoto /* If this function is called for the admin qpair via spdk_nvme_ctrlr_reset() 617736b9da0SShuhei Matsumoto * or spdk_nvme_ctrlr_disconnect(), initiate a Controller Level Reset. 618736b9da0SShuhei Matsumoto * Then we can abort trackers safely because the Controller Level Reset deletes 619736b9da0SShuhei Matsumoto * all I/O SQ/CQs. 620736b9da0SShuhei Matsumoto */ 621736b9da0SShuhei Matsumoto nvme_ctrlr_disable(ctrlr); 622736b9da0SShuhei Matsumoto } 6236b4b2d29SChangpeng Liu } 6246b4b2d29SChangpeng Liu 6258b203d19SChangpeng Liu /* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must 6268b203d19SChangpeng Liu * not use wide instructions because QEMU will not emulate such instructions to MMIO space. 6278b203d19SChangpeng Liu * So this function ensures we only copy 8 bytes at a time. 6288b203d19SChangpeng Liu */ 6298b203d19SChangpeng Liu static inline void 6308b203d19SChangpeng Liu nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) 6318b203d19SChangpeng Liu { 6328b203d19SChangpeng Liu uint64_t *dst64 = (uint64_t *)dst; 6338b203d19SChangpeng Liu const uint64_t *src64 = (const uint64_t *)src; 6348b203d19SChangpeng Liu uint32_t i; 6358b203d19SChangpeng Liu 6368b203d19SChangpeng Liu for (i = 0; i < sizeof(*dst) / 8; i++) { 6378b203d19SChangpeng Liu dst64[i] = src64[i]; 6388b203d19SChangpeng Liu } 6398b203d19SChangpeng Liu } 6408b203d19SChangpeng Liu 6418b203d19SChangpeng Liu static inline void 6428b203d19SChangpeng Liu nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) 6438b203d19SChangpeng Liu { 6448b203d19SChangpeng Liu /* dst and src are known to be non-overlapping and 64-byte aligned. */ 6458b203d19SChangpeng Liu #if defined(__SSE2__) 6468b203d19SChangpeng Liu __m128i *d128 = (__m128i *)dst; 6478b203d19SChangpeng Liu const __m128i *s128 = (const __m128i *)src; 6488b203d19SChangpeng Liu 6498b203d19SChangpeng Liu _mm_stream_si128(&d128[0], _mm_load_si128(&s128[0])); 6508b203d19SChangpeng Liu _mm_stream_si128(&d128[1], _mm_load_si128(&s128[1])); 6518b203d19SChangpeng Liu _mm_stream_si128(&d128[2], _mm_load_si128(&s128[2])); 6528b203d19SChangpeng Liu _mm_stream_si128(&d128[3], _mm_load_si128(&s128[3])); 6538b203d19SChangpeng Liu #else 6548b203d19SChangpeng Liu *dst = *src; 6558b203d19SChangpeng Liu #endif 6568b203d19SChangpeng Liu } 6578b203d19SChangpeng Liu 6588b203d19SChangpeng Liu void 6598b203d19SChangpeng Liu nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) 6608b203d19SChangpeng Liu { 6618b203d19SChangpeng Liu struct nvme_request *req; 6628b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 6638b203d19SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 6648b203d19SChangpeng Liu 6658b203d19SChangpeng Liu req = tr->req; 6668b203d19SChangpeng Liu assert(req != NULL); 6678b203d19SChangpeng Liu 668e36f0d36SJim Harris spdk_trace_record(TRACE_NVME_PCIE_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg, 6697b05b29dSJim Harris (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc, 67088739040SAtul Malakar req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12, 67188739040SAtul Malakar pqpair->qpair.queue_depth); 672971f07b9SJim Harris 673f89cf818SAlex Michon if (req->cmd.fuse) { 674f89cf818SAlex Michon /* 675f89cf818SAlex Michon * Keep track of the fuse operation sequence so that we ring the doorbell only 676f89cf818SAlex Michon * after the second fuse is submitted. 677f89cf818SAlex Michon */ 678f89cf818SAlex Michon qpair->last_fuse = req->cmd.fuse; 6798b203d19SChangpeng Liu } 6808b203d19SChangpeng Liu 6818b203d19SChangpeng Liu /* Don't use wide instructions to copy NVMe command, this is limited by QEMU 6828b203d19SChangpeng Liu * virtual NVMe controller, the maximum access width is 8 Bytes for one time. 6838b203d19SChangpeng Liu */ 6848b203d19SChangpeng Liu if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) { 6858b203d19SChangpeng Liu nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd); 6868b203d19SChangpeng Liu } else { 6878b203d19SChangpeng Liu /* Copy the command from the tracker to the submission queue. */ 6888b203d19SChangpeng Liu nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd); 6898b203d19SChangpeng Liu } 6908b203d19SChangpeng Liu 6918b203d19SChangpeng Liu if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) { 6928b203d19SChangpeng Liu pqpair->sq_tail = 0; 6938b203d19SChangpeng Liu } 6948b203d19SChangpeng Liu 6958b203d19SChangpeng Liu if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) { 6968b203d19SChangpeng Liu SPDK_ERRLOG("sq_tail is passing sq_head!\n"); 6978b203d19SChangpeng Liu } 6988b203d19SChangpeng Liu 6998b203d19SChangpeng Liu if (!pqpair->flags.delay_cmd_submit) { 7008b203d19SChangpeng Liu nvme_pcie_qpair_ring_sq_doorbell(qpair); 7018b203d19SChangpeng Liu } 7028b203d19SChangpeng Liu } 7038b203d19SChangpeng Liu 7048b203d19SChangpeng Liu void 7058b203d19SChangpeng Liu nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, 7068b203d19SChangpeng Liu struct spdk_nvme_cpl *cpl, bool print_on_error) 7078b203d19SChangpeng Liu { 7088b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 7098b203d19SChangpeng Liu struct nvme_request *req; 7108b203d19SChangpeng Liu bool retry, error; 71115beaa20SChangpeng Liu bool print_error; 7128b203d19SChangpeng Liu 7138b203d19SChangpeng Liu req = tr->req; 7148b203d19SChangpeng Liu 715e36f0d36SJim Harris spdk_trace_record(TRACE_NVME_PCIE_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg, 71688739040SAtul Malakar (uint32_t)req->cmd.cid, (uint32_t)cpl->status_raw, pqpair->qpair.queue_depth); 717971f07b9SJim Harris 7188b203d19SChangpeng Liu assert(req != NULL); 7198b203d19SChangpeng Liu 7208b203d19SChangpeng Liu error = spdk_nvme_cpl_is_error(cpl); 7218b203d19SChangpeng Liu retry = error && nvme_completion_is_retry(cpl) && 7228b203d19SChangpeng Liu req->retries < pqpair->retry_count; 72315beaa20SChangpeng Liu print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging; 7248b203d19SChangpeng Liu 72515beaa20SChangpeng Liu if (print_error) { 7268b203d19SChangpeng Liu spdk_nvme_qpair_print_command(qpair, &req->cmd); 72715beaa20SChangpeng Liu } 72815beaa20SChangpeng Liu 72915beaa20SChangpeng Liu if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) { 7308b203d19SChangpeng Liu spdk_nvme_qpair_print_completion(qpair, cpl); 7318b203d19SChangpeng Liu } 7328b203d19SChangpeng Liu 7338b203d19SChangpeng Liu assert(cpl->cid == req->cmd.cid); 7348b203d19SChangpeng Liu 7358b203d19SChangpeng Liu if (retry) { 7368b203d19SChangpeng Liu req->retries++; 7378b203d19SChangpeng Liu nvme_pcie_qpair_submit_tracker(qpair, tr); 7388b203d19SChangpeng Liu } else { 7398b203d19SChangpeng Liu TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list); 74088739040SAtul Malakar pqpair->qpair.queue_depth--; 7418b203d19SChangpeng Liu 7428b203d19SChangpeng Liu /* Only check admin requests from different processes. */ 7438b203d19SChangpeng Liu if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { 7448b203d19SChangpeng Liu nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); 7458b203d19SChangpeng Liu } else { 7468b203d19SChangpeng Liu nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); 7478b203d19SChangpeng Liu } 7488b203d19SChangpeng Liu 7498b203d19SChangpeng Liu tr->req = NULL; 7508b203d19SChangpeng Liu 7518b203d19SChangpeng Liu TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); 7528b203d19SChangpeng Liu } 7538b203d19SChangpeng Liu } 7548b203d19SChangpeng Liu 7558b203d19SChangpeng Liu void 7568b203d19SChangpeng Liu nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, 7578b203d19SChangpeng Liu struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, 7588b203d19SChangpeng Liu bool print_on_error) 7598b203d19SChangpeng Liu { 7608b203d19SChangpeng Liu struct spdk_nvme_cpl cpl; 7618b203d19SChangpeng Liu 7628b203d19SChangpeng Liu memset(&cpl, 0, sizeof(cpl)); 7638b203d19SChangpeng Liu cpl.sqid = qpair->id; 7648b203d19SChangpeng Liu cpl.cid = tr->cid; 7658b203d19SChangpeng Liu cpl.status.sct = sct; 7668b203d19SChangpeng Liu cpl.status.sc = sc; 7678b203d19SChangpeng Liu cpl.status.dnr = dnr; 7688b203d19SChangpeng Liu nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); 7698b203d19SChangpeng Liu } 7708b203d19SChangpeng Liu 7718b203d19SChangpeng Liu void 7728b203d19SChangpeng Liu nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) 7738b203d19SChangpeng Liu { 7748b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 7758b203d19SChangpeng Liu struct nvme_tracker *tr, *temp, *last; 7768b203d19SChangpeng Liu 7778b203d19SChangpeng Liu last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); 7788b203d19SChangpeng Liu 7798b203d19SChangpeng Liu /* Abort previously submitted (outstanding) trs */ 7808b203d19SChangpeng Liu TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { 7818b203d19SChangpeng Liu if (!qpair->ctrlr->opts.disable_error_logging) { 7828b203d19SChangpeng Liu SPDK_ERRLOG("aborting outstanding command\n"); 7838b203d19SChangpeng Liu } 7848b203d19SChangpeng Liu nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, 7858b203d19SChangpeng Liu SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); 7868b203d19SChangpeng Liu 7878b203d19SChangpeng Liu if (tr == last) { 7888b203d19SChangpeng Liu break; 7898b203d19SChangpeng Liu } 7908b203d19SChangpeng Liu } 7918b203d19SChangpeng Liu } 7928b203d19SChangpeng Liu 7938b203d19SChangpeng Liu void 7948b203d19SChangpeng Liu nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) 7958b203d19SChangpeng Liu { 7968b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 7978b203d19SChangpeng Liu struct nvme_tracker *tr; 7988b203d19SChangpeng Liu 7998b203d19SChangpeng Liu tr = TAILQ_FIRST(&pqpair->outstanding_tr); 8008b203d19SChangpeng Liu while (tr != NULL) { 8018b203d19SChangpeng Liu assert(tr->req != NULL); 8028b203d19SChangpeng Liu if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { 8038b203d19SChangpeng Liu nvme_pcie_qpair_manual_complete_tracker(qpair, tr, 8048b203d19SChangpeng Liu SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, 8058b203d19SChangpeng Liu false); 8068b203d19SChangpeng Liu tr = TAILQ_FIRST(&pqpair->outstanding_tr); 8078b203d19SChangpeng Liu } else { 8088b203d19SChangpeng Liu tr = TAILQ_NEXT(tr, tq_list); 8098b203d19SChangpeng Liu } 8108b203d19SChangpeng Liu } 8118b203d19SChangpeng Liu } 8128b203d19SChangpeng Liu 8138b203d19SChangpeng Liu void 8148b203d19SChangpeng Liu nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) 8158b203d19SChangpeng Liu { 8168b203d19SChangpeng Liu nvme_pcie_admin_qpair_abort_aers(qpair); 8178b203d19SChangpeng Liu } 8188b203d19SChangpeng Liu 8198b203d19SChangpeng Liu void 8208b203d19SChangpeng Liu nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) 8218b203d19SChangpeng Liu { 8228b203d19SChangpeng Liu nvme_pcie_qpair_abort_trackers(qpair, dnr); 8238b203d19SChangpeng Liu } 8248b203d19SChangpeng Liu 8258b203d19SChangpeng Liu static void 8268b203d19SChangpeng Liu nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair) 8278b203d19SChangpeng Liu { 8288b203d19SChangpeng Liu uint64_t t02; 8298b203d19SChangpeng Liu struct nvme_tracker *tr, *tmp; 8308b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 8318b203d19SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 8328b203d19SChangpeng Liu struct spdk_nvme_ctrlr_process *active_proc; 8338b203d19SChangpeng Liu 8348b203d19SChangpeng Liu /* Don't check timeouts during controller initialization. */ 8358b203d19SChangpeng Liu if (ctrlr->state != NVME_CTRLR_STATE_READY) { 8368b203d19SChangpeng Liu return; 8378b203d19SChangpeng Liu } 8388b203d19SChangpeng Liu 8398b203d19SChangpeng Liu if (nvme_qpair_is_admin_queue(qpair)) { 8408b203d19SChangpeng Liu active_proc = nvme_ctrlr_get_current_process(ctrlr); 8418b203d19SChangpeng Liu } else { 8428b203d19SChangpeng Liu active_proc = qpair->active_proc; 8438b203d19SChangpeng Liu } 8448b203d19SChangpeng Liu 8458b203d19SChangpeng Liu /* Only check timeouts if the current process has a timeout callback. */ 8468b203d19SChangpeng Liu if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { 8478b203d19SChangpeng Liu return; 8488b203d19SChangpeng Liu } 8498b203d19SChangpeng Liu 8508b203d19SChangpeng Liu t02 = spdk_get_ticks(); 8518b203d19SChangpeng Liu TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 8528b203d19SChangpeng Liu assert(tr->req != NULL); 8538b203d19SChangpeng Liu 8548b203d19SChangpeng Liu if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) { 8558b203d19SChangpeng Liu /* 8568b203d19SChangpeng Liu * The requests are in order, so as soon as one has not timed out, 8578b203d19SChangpeng Liu * stop iterating. 8588b203d19SChangpeng Liu */ 8598b203d19SChangpeng Liu break; 8608b203d19SChangpeng Liu } 8618b203d19SChangpeng Liu } 8628b203d19SChangpeng Liu } 8638b203d19SChangpeng Liu 8648b203d19SChangpeng Liu int32_t 8658b203d19SChangpeng Liu nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) 8668b203d19SChangpeng Liu { 8678b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 8688b203d19SChangpeng Liu struct nvme_tracker *tr; 8698b203d19SChangpeng Liu struct spdk_nvme_cpl *cpl, *next_cpl; 8708b203d19SChangpeng Liu uint32_t num_completions = 0; 8718b203d19SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 8728b203d19SChangpeng Liu uint16_t next_cq_head; 8738b203d19SChangpeng Liu uint8_t next_phase; 8748b203d19SChangpeng Liu bool next_is_valid = false; 875455a5d78SMonica Kenguva int rc; 876455a5d78SMonica Kenguva 877455a5d78SMonica Kenguva if (spdk_unlikely(pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED)) { 878455a5d78SMonica Kenguva return -ENXIO; 879455a5d78SMonica Kenguva } 880455a5d78SMonica Kenguva 881e40bd531SJim Harris if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) { 882e40bd531SJim Harris if (pqpair->pcie_state == NVME_PCIE_QPAIR_READY) { 883e40bd531SJim Harris /* It is possible that another thread set the pcie_state to 884e40bd531SJim Harris * QPAIR_READY, if it polled the adminq and processed the SQ 885e40bd531SJim Harris * completion for this qpair. So check for that condition 886e40bd531SJim Harris * here and then update the qpair's state to CONNECTED, since 887e40bd531SJim Harris * we can only set the qpair state from the qpair's thread. 888e40bd531SJim Harris * (Note: this fixed issue #2157.) 889e40bd531SJim Harris */ 890e40bd531SJim Harris nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED); 891e40bd531SJim Harris } else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) { 892e40bd531SJim Harris nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 893e40bd531SJim Harris return -ENXIO; 894e40bd531SJim Harris } else { 895455a5d78SMonica Kenguva rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 896455a5d78SMonica Kenguva if (rc < 0) { 897455a5d78SMonica Kenguva return rc; 898e8dd82e5SMonica Kenguva } else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) { 899e40bd531SJim Harris nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED); 900e8dd82e5SMonica Kenguva return -ENXIO; 901455a5d78SMonica Kenguva } 902e40bd531SJim Harris } 903455a5d78SMonica Kenguva return 0; 904455a5d78SMonica Kenguva } 9058b203d19SChangpeng Liu 9068b203d19SChangpeng Liu if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 907e10b4806SJim Harris nvme_ctrlr_lock(ctrlr); 9088b203d19SChangpeng Liu } 9098b203d19SChangpeng Liu 9108b203d19SChangpeng Liu if (max_completions == 0 || max_completions > pqpair->max_completions_cap) { 9118b203d19SChangpeng Liu /* 9128b203d19SChangpeng Liu * max_completions == 0 means unlimited, but complete at most 9138b203d19SChangpeng Liu * max_completions_cap batch of I/O at a time so that the completion 9148b203d19SChangpeng Liu * queue doorbells don't wrap around. 9158b203d19SChangpeng Liu */ 9168b203d19SChangpeng Liu max_completions = pqpair->max_completions_cap; 9178b203d19SChangpeng Liu } 9188b203d19SChangpeng Liu 919558be98fSAlexey Marchuk pqpair->stat->polls++; 920558be98fSAlexey Marchuk 9218b203d19SChangpeng Liu while (1) { 9228b203d19SChangpeng Liu cpl = &pqpair->cpl[pqpair->cq_head]; 9238b203d19SChangpeng Liu 9248b203d19SChangpeng Liu if (!next_is_valid && cpl->status.p != pqpair->flags.phase) { 9258b203d19SChangpeng Liu break; 9268b203d19SChangpeng Liu } 9278b203d19SChangpeng Liu 9288b203d19SChangpeng Liu if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) { 9298b203d19SChangpeng Liu next_cq_head = pqpair->cq_head + 1; 9308b203d19SChangpeng Liu next_phase = pqpair->flags.phase; 9318b203d19SChangpeng Liu } else { 9328b203d19SChangpeng Liu next_cq_head = 0; 9338b203d19SChangpeng Liu next_phase = !pqpair->flags.phase; 9348b203d19SChangpeng Liu } 9358b203d19SChangpeng Liu next_cpl = &pqpair->cpl[next_cq_head]; 9368b203d19SChangpeng Liu next_is_valid = (next_cpl->status.p == next_phase); 9378b203d19SChangpeng Liu if (next_is_valid) { 9388b203d19SChangpeng Liu __builtin_prefetch(&pqpair->tr[next_cpl->cid]); 9398b203d19SChangpeng Liu } 9408b203d19SChangpeng Liu 941e9a94122SXue Liu #if defined(__PPC64__) || defined(__riscv) || defined(__loongarch__) 9428b203d19SChangpeng Liu /* 9438b203d19SChangpeng Liu * This memory barrier prevents reordering of: 9448b203d19SChangpeng Liu * - load after store from/to tr 9458b203d19SChangpeng Liu * - load after load cpl phase and cpl cid 9468b203d19SChangpeng Liu */ 9478b203d19SChangpeng Liu spdk_mb(); 9488b203d19SChangpeng Liu #elif defined(__aarch64__) 9498b203d19SChangpeng Liu __asm volatile("dmb oshld" ::: "memory"); 9508b203d19SChangpeng Liu #endif 9518b203d19SChangpeng Liu 9528b203d19SChangpeng Liu if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) { 9538b203d19SChangpeng Liu pqpair->cq_head = 0; 9548b203d19SChangpeng Liu pqpair->flags.phase = !pqpair->flags.phase; 9558b203d19SChangpeng Liu } 9568b203d19SChangpeng Liu 9578b203d19SChangpeng Liu tr = &pqpair->tr[cpl->cid]; 9587d44b36eSJim Harris pqpair->sq_head = cpl->sqhd; 9597d44b36eSJim Harris 9607d44b36eSJim Harris if (tr->req) { 9618b203d19SChangpeng Liu /* Prefetch the req's STAILQ_ENTRY since we'll need to access it 9628b203d19SChangpeng Liu * as part of putting the req back on the qpair's free list. 9638b203d19SChangpeng Liu */ 9648b203d19SChangpeng Liu __builtin_prefetch(&tr->req->stailq); 9658b203d19SChangpeng Liu nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); 9668b203d19SChangpeng Liu } else { 9678b203d19SChangpeng Liu SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); 9688b203d19SChangpeng Liu spdk_nvme_qpair_print_completion(qpair, cpl); 9698b203d19SChangpeng Liu assert(0); 9708b203d19SChangpeng Liu } 9718b203d19SChangpeng Liu 9728b203d19SChangpeng Liu if (++num_completions == max_completions) { 9738b203d19SChangpeng Liu break; 9748b203d19SChangpeng Liu } 9758b203d19SChangpeng Liu } 9768b203d19SChangpeng Liu 9778b203d19SChangpeng Liu if (num_completions > 0) { 978558be98fSAlexey Marchuk pqpair->stat->completions += num_completions; 9798b203d19SChangpeng Liu nvme_pcie_qpair_ring_cq_doorbell(qpair); 980558be98fSAlexey Marchuk } else { 981558be98fSAlexey Marchuk pqpair->stat->idle_polls++; 9828b203d19SChangpeng Liu } 9838b203d19SChangpeng Liu 9848b203d19SChangpeng Liu if (pqpair->flags.delay_cmd_submit) { 9858b203d19SChangpeng Liu if (pqpair->last_sq_tail != pqpair->sq_tail) { 9868b203d19SChangpeng Liu nvme_pcie_qpair_ring_sq_doorbell(qpair); 9878b203d19SChangpeng Liu pqpair->last_sq_tail = pqpair->sq_tail; 9888b203d19SChangpeng Liu } 9898b203d19SChangpeng Liu } 9908b203d19SChangpeng Liu 9918b203d19SChangpeng Liu if (spdk_unlikely(ctrlr->timeout_enabled)) { 9928b203d19SChangpeng Liu /* 9938b203d19SChangpeng Liu * User registered for timeout callback 9948b203d19SChangpeng Liu */ 9958b203d19SChangpeng Liu nvme_pcie_qpair_check_timeout(qpair); 9968b203d19SChangpeng Liu } 9978b203d19SChangpeng Liu 998736b9da0SShuhei Matsumoto /* Before returning, complete any pending admin request or 999736b9da0SShuhei Matsumoto * process the admin qpair disconnection. 1000736b9da0SShuhei Matsumoto */ 10018b203d19SChangpeng Liu if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 10028b203d19SChangpeng Liu nvme_pcie_qpair_complete_pending_admin_request(qpair); 10038b203d19SChangpeng Liu 1004736b9da0SShuhei Matsumoto if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) { 1005736b9da0SShuhei Matsumoto rc = nvme_ctrlr_disable_poll(qpair->ctrlr); 1006a475aed5SShuhei Matsumoto if (rc != -EAGAIN) { 1007736b9da0SShuhei Matsumoto nvme_transport_ctrlr_disconnect_qpair_done(qpair); 1008736b9da0SShuhei Matsumoto } 1009736b9da0SShuhei Matsumoto } 1010736b9da0SShuhei Matsumoto 1011e10b4806SJim Harris nvme_ctrlr_unlock(ctrlr); 10128b203d19SChangpeng Liu } 10138b203d19SChangpeng Liu 101415b7d3baSJim Harris if (spdk_unlikely(pqpair->flags.has_pending_vtophys_failures)) { 101515b7d3baSJim Harris struct nvme_tracker *tr, *tmp; 101615b7d3baSJim Harris 101715b7d3baSJim Harris TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { 101815b7d3baSJim Harris if (tr->bad_vtophys) { 101915b7d3baSJim Harris tr->bad_vtophys = 0; 102015b7d3baSJim Harris nvme_pcie_fail_request_bad_vtophys(qpair, tr); 102115b7d3baSJim Harris } 102215b7d3baSJim Harris } 102315b7d3baSJim Harris pqpair->flags.has_pending_vtophys_failures = 0; 102415b7d3baSJim Harris } 102515b7d3baSJim Harris 10268b203d19SChangpeng Liu return num_completions; 10278b203d19SChangpeng Liu } 10288b203d19SChangpeng Liu 10298b203d19SChangpeng Liu int 10308b203d19SChangpeng Liu nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) 10318b203d19SChangpeng Liu { 10328b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 10338b203d19SChangpeng Liu 10348b203d19SChangpeng Liu if (nvme_qpair_is_admin_queue(qpair)) { 10358b203d19SChangpeng Liu nvme_pcie_admin_qpair_destroy(qpair); 10368b203d19SChangpeng Liu } 10378b203d19SChangpeng Liu /* 10388b203d19SChangpeng Liu * We check sq_vaddr and cq_vaddr to see if the user specified the memory 10398b203d19SChangpeng Liu * buffers when creating the I/O queue. 10408b203d19SChangpeng Liu * If the user specified them, we cannot free that memory. 10418b203d19SChangpeng Liu * Nor do we free it if it's in the CMB. 10428b203d19SChangpeng Liu */ 10438b203d19SChangpeng Liu if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) { 10448b203d19SChangpeng Liu spdk_free(pqpair->cmd); 10458b203d19SChangpeng Liu } 10468b203d19SChangpeng Liu if (!pqpair->cq_vaddr && pqpair->cpl) { 10478b203d19SChangpeng Liu spdk_free(pqpair->cpl); 10488b203d19SChangpeng Liu } 10498b203d19SChangpeng Liu if (pqpair->tr) { 10508b203d19SChangpeng Liu spdk_free(pqpair->tr); 10518b203d19SChangpeng Liu } 10528b203d19SChangpeng Liu 10538b203d19SChangpeng Liu nvme_qpair_deinit(qpair); 10548b203d19SChangpeng Liu 1055423f72daSAlexey Marchuk if (!pqpair->shared_stats && (!qpair->active_proc || 1056423f72daSAlexey Marchuk qpair->active_proc == nvme_ctrlr_get_current_process(qpair->ctrlr))) { 1057558be98fSAlexey Marchuk if (qpair->id) { 1058558be98fSAlexey Marchuk free(pqpair->stat); 1059558be98fSAlexey Marchuk } else { 1060558be98fSAlexey Marchuk /* statistics of admin qpair are allocates from huge pages because 1061558be98fSAlexey Marchuk * admin qpair is shared for multi-process */ 1062558be98fSAlexey Marchuk spdk_free(pqpair->stat); 1063558be98fSAlexey Marchuk } 1064558be98fSAlexey Marchuk 1065558be98fSAlexey Marchuk } 1066558be98fSAlexey Marchuk 10678b203d19SChangpeng Liu spdk_free(pqpair); 10688b203d19SChangpeng Liu 10698b203d19SChangpeng Liu return 0; 10708b203d19SChangpeng Liu } 10718b203d19SChangpeng Liu 10728b203d19SChangpeng Liu struct spdk_nvme_qpair * 10738b203d19SChangpeng Liu nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, 10748b203d19SChangpeng Liu const struct spdk_nvme_io_qpair_opts *opts) 10758b203d19SChangpeng Liu { 10768b203d19SChangpeng Liu struct nvme_pcie_qpair *pqpair; 10778b203d19SChangpeng Liu struct spdk_nvme_qpair *qpair; 10788b203d19SChangpeng Liu int rc; 10798b203d19SChangpeng Liu 10808b203d19SChangpeng Liu assert(ctrlr != NULL); 10818b203d19SChangpeng Liu 10828b203d19SChangpeng Liu pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, 1083186b109dSJim Harris SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE); 10848b203d19SChangpeng Liu if (pqpair == NULL) { 10858b203d19SChangpeng Liu return NULL; 10868b203d19SChangpeng Liu } 10878b203d19SChangpeng Liu 10888b203d19SChangpeng Liu pqpair->num_entries = opts->io_queue_size; 10898b203d19SChangpeng Liu pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit; 10908b203d19SChangpeng Liu 10918b203d19SChangpeng Liu qpair = &pqpair->qpair; 10928b203d19SChangpeng Liu 1093771f65bbSMonica Kenguva rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests, opts->async_mode); 10948b203d19SChangpeng Liu if (rc != 0) { 10958b203d19SChangpeng Liu nvme_pcie_qpair_destroy(qpair); 10968b203d19SChangpeng Liu return NULL; 10978b203d19SChangpeng Liu } 10988b203d19SChangpeng Liu 10998b203d19SChangpeng Liu rc = nvme_pcie_qpair_construct(qpair, opts); 11008b203d19SChangpeng Liu 11018b203d19SChangpeng Liu if (rc != 0) { 11028b203d19SChangpeng Liu nvme_pcie_qpair_destroy(qpair); 11038b203d19SChangpeng Liu return NULL; 11048b203d19SChangpeng Liu } 11058b203d19SChangpeng Liu 11068b203d19SChangpeng Liu return qpair; 11078b203d19SChangpeng Liu } 11088b203d19SChangpeng Liu 11098b203d19SChangpeng Liu int 11108b203d19SChangpeng Liu nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) 11118b203d19SChangpeng Liu { 111238736d99SJim Harris struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 11138b203d19SChangpeng Liu struct nvme_completion_poll_status *status; 11148b203d19SChangpeng Liu int rc; 11158b203d19SChangpeng Liu 11168b203d19SChangpeng Liu assert(ctrlr != NULL); 11178b203d19SChangpeng Liu 11188b203d19SChangpeng Liu if (ctrlr->is_removed) { 11198b203d19SChangpeng Liu goto free; 11208b203d19SChangpeng Liu } 11218b203d19SChangpeng Liu 11223cb9bc25SJim Harris if (ctrlr->prepare_for_reset) { 11233cb9bc25SJim Harris if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING) { 11243cb9bc25SJim Harris pqpair->flags.defer_destruction = true; 11253cb9bc25SJim Harris } 11263cb9bc25SJim Harris goto clear_shadow_doorbells; 11273cb9bc25SJim Harris } 11283cb9bc25SJim Harris 1129662c0200SKonrad Sztyber /* If attempting to delete a qpair that's still being connected, we have to wait until it's 1130662c0200SKonrad Sztyber * finished, so that we don't free it while it's waiting for the create cq/sq callbacks. 1131662c0200SKonrad Sztyber */ 113297277e14SChangpeng Liu while (pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_CQ || 113397277e14SChangpeng Liu pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_SQ) { 1134662c0200SKonrad Sztyber rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); 1135662c0200SKonrad Sztyber if (rc < 0) { 1136662c0200SKonrad Sztyber break; 1137662c0200SKonrad Sztyber } 1138662c0200SKonrad Sztyber } 1139662c0200SKonrad Sztyber 11408b203d19SChangpeng Liu status = calloc(1, sizeof(*status)); 11418b203d19SChangpeng Liu if (!status) { 11428b203d19SChangpeng Liu SPDK_ERRLOG("Failed to allocate status tracker\n"); 1143c081a84cSJim Harris goto free; 11448b203d19SChangpeng Liu } 11458b203d19SChangpeng Liu 11468b203d19SChangpeng Liu /* Delete the I/O submission queue */ 11478b203d19SChangpeng Liu rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status); 11488b203d19SChangpeng Liu if (rc != 0) { 11498b203d19SChangpeng Liu SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); 11508b203d19SChangpeng Liu free(status); 1151c081a84cSJim Harris goto free; 11528b203d19SChangpeng Liu } 11538b203d19SChangpeng Liu if (nvme_wait_for_completion(ctrlr->adminq, status)) { 11548b203d19SChangpeng Liu if (!status->timed_out) { 11558b203d19SChangpeng Liu free(status); 11568b203d19SChangpeng Liu } 1157c081a84cSJim Harris goto free; 11588b203d19SChangpeng Liu } 11598b203d19SChangpeng Liu 11608b203d19SChangpeng Liu /* Now that the submission queue is deleted, the device is supposed to have 11618b203d19SChangpeng Liu * completed any outstanding I/O. Try to complete them. If they don't complete, 11628b203d19SChangpeng Liu * they'll be marked as aborted and completed below. */ 1163dbecab8dSChangpeng Liu if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) { 11648b203d19SChangpeng Liu nvme_pcie_qpair_process_completions(qpair, 0); 1165dbecab8dSChangpeng Liu } 11668b203d19SChangpeng Liu 11678b203d19SChangpeng Liu memset(status, 0, sizeof(*status)); 11688b203d19SChangpeng Liu /* Delete the completion queue */ 11698b203d19SChangpeng Liu rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status); 11708b203d19SChangpeng Liu if (rc != 0) { 11718b203d19SChangpeng Liu SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); 11728b203d19SChangpeng Liu free(status); 1173c081a84cSJim Harris goto free; 11748b203d19SChangpeng Liu } 11758b203d19SChangpeng Liu if (nvme_wait_for_completion(ctrlr->adminq, status)) { 11768b203d19SChangpeng Liu if (!status->timed_out) { 11778b203d19SChangpeng Liu free(status); 11788b203d19SChangpeng Liu } 1179c081a84cSJim Harris goto free; 11808b203d19SChangpeng Liu } 11818b203d19SChangpeng Liu free(status); 11828b203d19SChangpeng Liu 11833cb9bc25SJim Harris clear_shadow_doorbells: 118446e531bdSSebastian Brzezinka if (pqpair->flags.has_shadow_doorbell && ctrlr->shadow_doorbell) { 118538736d99SJim Harris *pqpair->shadow_doorbell.sq_tdbl = 0; 118638736d99SJim Harris *pqpair->shadow_doorbell.cq_hdbl = 0; 118738736d99SJim Harris *pqpair->shadow_doorbell.sq_eventidx = 0; 118838736d99SJim Harris *pqpair->shadow_doorbell.cq_eventidx = 0; 118938736d99SJim Harris } 11908b203d19SChangpeng Liu free: 11918b203d19SChangpeng Liu if (qpair->no_deletion_notification_needed == 0) { 11928b203d19SChangpeng Liu /* Abort the rest of the I/O */ 11938b203d19SChangpeng Liu nvme_pcie_qpair_abort_trackers(qpair, 1); 11948b203d19SChangpeng Liu } 11958b203d19SChangpeng Liu 11963cb9bc25SJim Harris if (!pqpair->flags.defer_destruction) { 11978b203d19SChangpeng Liu nvme_pcie_qpair_destroy(qpair); 11983cb9bc25SJim Harris } 11998b203d19SChangpeng Liu return 0; 12008b203d19SChangpeng Liu } 12018b203d19SChangpeng Liu 1202b69827a3SChangpeng Liu static void 1203b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) 1204b69827a3SChangpeng Liu { 120515b7d3baSJim Harris if (!qpair->in_completion_context) { 120615b7d3baSJim Harris struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 120715b7d3baSJim Harris 120815b7d3baSJim Harris tr->bad_vtophys = 1; 120915b7d3baSJim Harris pqpair->flags.has_pending_vtophys_failures = 1; 121015b7d3baSJim Harris return; 121115b7d3baSJim Harris } 121215b7d3baSJim Harris 1213b69827a3SChangpeng Liu /* 1214b69827a3SChangpeng Liu * Bad vtophys translation, so abort this request and return 1215b69827a3SChangpeng Liu * immediately. 1216b69827a3SChangpeng Liu */ 121715b7d3baSJim Harris SPDK_ERRLOG("vtophys or other payload buffer related error\n"); 1218b69827a3SChangpeng Liu nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, 1219b69827a3SChangpeng Liu SPDK_NVME_SC_INVALID_FIELD, 1220b69827a3SChangpeng Liu 1 /* do not retry */, true); 1221b69827a3SChangpeng Liu } 1222b69827a3SChangpeng Liu 1223b69827a3SChangpeng Liu /* 1224b69827a3SChangpeng Liu * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes. 1225b69827a3SChangpeng Liu * 1226b69827a3SChangpeng Liu * *prp_index will be updated to account for the number of PRP entries used. 1227b69827a3SChangpeng Liu */ 1228b69827a3SChangpeng Liu static inline int 1229d4844d5bSChangpeng Liu nvme_pcie_prp_list_append(struct spdk_nvme_ctrlr *ctrlr, struct nvme_tracker *tr, 1230d4844d5bSChangpeng Liu uint32_t *prp_index, void *virt_addr, size_t len, 1231b69827a3SChangpeng Liu uint32_t page_size) 1232b69827a3SChangpeng Liu { 1233b69827a3SChangpeng Liu struct spdk_nvme_cmd *cmd = &tr->req->cmd; 1234b69827a3SChangpeng Liu uintptr_t page_mask = page_size - 1; 1235b69827a3SChangpeng Liu uint64_t phys_addr; 1236b69827a3SChangpeng Liu uint32_t i; 1237b69827a3SChangpeng Liu 1238b69827a3SChangpeng Liu SPDK_DEBUGLOG(nvme, "prp_index:%u virt_addr:%p len:%u\n", 1239b69827a3SChangpeng Liu *prp_index, virt_addr, (uint32_t)len); 1240b69827a3SChangpeng Liu 1241b69827a3SChangpeng Liu if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) { 1242b69827a3SChangpeng Liu SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1243b69827a3SChangpeng Liu return -EFAULT; 1244b69827a3SChangpeng Liu } 1245b69827a3SChangpeng Liu 1246b69827a3SChangpeng Liu i = *prp_index; 1247b69827a3SChangpeng Liu while (len) { 1248b69827a3SChangpeng Liu uint32_t seg_len; 1249b69827a3SChangpeng Liu 1250b69827a3SChangpeng Liu /* 1251b69827a3SChangpeng Liu * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array, 1252b69827a3SChangpeng Liu * so prp_index == count is valid. 1253b69827a3SChangpeng Liu */ 1254b69827a3SChangpeng Liu if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) { 1255b69827a3SChangpeng Liu SPDK_ERRLOG("out of PRP entries\n"); 1256b69827a3SChangpeng Liu return -EFAULT; 1257b69827a3SChangpeng Liu } 1258b69827a3SChangpeng Liu 1259d4844d5bSChangpeng Liu phys_addr = nvme_pcie_vtophys(ctrlr, virt_addr, NULL); 1260b69827a3SChangpeng Liu if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) { 1261b69827a3SChangpeng Liu SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr); 1262b69827a3SChangpeng Liu return -EFAULT; 1263b69827a3SChangpeng Liu } 1264b69827a3SChangpeng Liu 1265b69827a3SChangpeng Liu if (i == 0) { 1266b69827a3SChangpeng Liu SPDK_DEBUGLOG(nvme, "prp1 = %p\n", (void *)phys_addr); 1267b69827a3SChangpeng Liu cmd->dptr.prp.prp1 = phys_addr; 1268b69827a3SChangpeng Liu seg_len = page_size - ((uintptr_t)virt_addr & page_mask); 1269b69827a3SChangpeng Liu } else { 1270b69827a3SChangpeng Liu if ((phys_addr & page_mask) != 0) { 1271b69827a3SChangpeng Liu SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr); 1272b69827a3SChangpeng Liu return -EFAULT; 1273b69827a3SChangpeng Liu } 1274b69827a3SChangpeng Liu 1275b69827a3SChangpeng Liu SPDK_DEBUGLOG(nvme, "prp[%u] = %p\n", i - 1, (void *)phys_addr); 1276b69827a3SChangpeng Liu tr->u.prp[i - 1] = phys_addr; 1277b69827a3SChangpeng Liu seg_len = page_size; 1278b69827a3SChangpeng Liu } 1279b69827a3SChangpeng Liu 1280b69827a3SChangpeng Liu seg_len = spdk_min(seg_len, len); 1281075d422fSKonrad Sztyber virt_addr = (uint8_t *)virt_addr + seg_len; 1282b69827a3SChangpeng Liu len -= seg_len; 1283b69827a3SChangpeng Liu i++; 1284b69827a3SChangpeng Liu } 1285b69827a3SChangpeng Liu 1286b69827a3SChangpeng Liu cmd->psdt = SPDK_NVME_PSDT_PRP; 1287b69827a3SChangpeng Liu if (i <= 1) { 1288b69827a3SChangpeng Liu cmd->dptr.prp.prp2 = 0; 1289b69827a3SChangpeng Liu } else if (i == 2) { 1290b69827a3SChangpeng Liu cmd->dptr.prp.prp2 = tr->u.prp[0]; 1291b69827a3SChangpeng Liu SPDK_DEBUGLOG(nvme, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2); 1292b69827a3SChangpeng Liu } else { 1293b69827a3SChangpeng Liu cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr; 1294b69827a3SChangpeng Liu SPDK_DEBUGLOG(nvme, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2); 1295b69827a3SChangpeng Liu } 1296b69827a3SChangpeng Liu 1297b69827a3SChangpeng Liu *prp_index = i; 1298b69827a3SChangpeng Liu return 0; 1299b69827a3SChangpeng Liu } 1300b69827a3SChangpeng Liu 1301b69827a3SChangpeng Liu static int 1302b69827a3SChangpeng Liu nvme_pcie_qpair_build_request_invalid(struct spdk_nvme_qpair *qpair, 1303b69827a3SChangpeng Liu struct nvme_request *req, struct nvme_tracker *tr, bool dword_aligned) 1304b69827a3SChangpeng Liu { 1305b69827a3SChangpeng Liu assert(0); 1306b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1307b69827a3SChangpeng Liu return -EINVAL; 1308b69827a3SChangpeng Liu } 1309b69827a3SChangpeng Liu 1310b69827a3SChangpeng Liu /** 1311b69827a3SChangpeng Liu * Build PRP list describing physically contiguous payload buffer. 1312b69827a3SChangpeng Liu */ 1313b69827a3SChangpeng Liu static int 1314b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1315b69827a3SChangpeng Liu struct nvme_tracker *tr, bool dword_aligned) 1316b69827a3SChangpeng Liu { 1317b69827a3SChangpeng Liu uint32_t prp_index = 0; 1318b69827a3SChangpeng Liu int rc; 1319b69827a3SChangpeng Liu 1320d4844d5bSChangpeng Liu rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index, 1321075d422fSKonrad Sztyber (uint8_t *)req->payload.contig_or_cb_arg + req->payload_offset, 1322b69827a3SChangpeng Liu req->payload_size, qpair->ctrlr->page_size); 1323b69827a3SChangpeng Liu if (rc) { 1324b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1325a1a2e2b4SVincent Fu } else { 1326a1a2e2b4SVincent Fu SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index); 1327b69827a3SChangpeng Liu } 1328b69827a3SChangpeng Liu 1329b69827a3SChangpeng Liu return rc; 1330b69827a3SChangpeng Liu } 1331b69827a3SChangpeng Liu 1332b69827a3SChangpeng Liu /** 1333b69827a3SChangpeng Liu * Build an SGL describing a physically contiguous payload buffer. 1334b69827a3SChangpeng Liu * 1335b69827a3SChangpeng Liu * This is more efficient than using PRP because large buffers can be 1336b69827a3SChangpeng Liu * described this way. 1337b69827a3SChangpeng Liu */ 1338b69827a3SChangpeng Liu static int 1339b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1340b69827a3SChangpeng Liu struct nvme_tracker *tr, bool dword_aligned) 1341b69827a3SChangpeng Liu { 1342075d422fSKonrad Sztyber uint8_t *virt_addr; 1343b69827a3SChangpeng Liu uint64_t phys_addr, mapping_length; 1344b69827a3SChangpeng Liu uint32_t length; 1345b69827a3SChangpeng Liu struct spdk_nvme_sgl_descriptor *sgl; 1346b69827a3SChangpeng Liu uint32_t nseg = 0; 1347b69827a3SChangpeng Liu 1348b69827a3SChangpeng Liu assert(req->payload_size != 0); 1349b69827a3SChangpeng Liu assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); 1350b69827a3SChangpeng Liu 1351b69827a3SChangpeng Liu sgl = tr->u.sgl; 1352b69827a3SChangpeng Liu req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1353b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.subtype = 0; 1354b69827a3SChangpeng Liu 1355b69827a3SChangpeng Liu length = req->payload_size; 1356075d422fSKonrad Sztyber /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL, 1357075d422fSKonrad Sztyber * so just double cast it to make it go away */ 1358075d422fSKonrad Sztyber virt_addr = (uint8_t *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset); 1359b69827a3SChangpeng Liu 1360b69827a3SChangpeng Liu while (length > 0) { 1361b69827a3SChangpeng Liu if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1362b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1363b69827a3SChangpeng Liu return -EFAULT; 1364b69827a3SChangpeng Liu } 1365b69827a3SChangpeng Liu 1366b69827a3SChangpeng Liu if (dword_aligned && ((uintptr_t)virt_addr & 3)) { 1367b69827a3SChangpeng Liu SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1368b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1369b69827a3SChangpeng Liu return -EFAULT; 1370b69827a3SChangpeng Liu } 1371b69827a3SChangpeng Liu 1372b69827a3SChangpeng Liu mapping_length = length; 1373d4844d5bSChangpeng Liu phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length); 1374b69827a3SChangpeng Liu if (phys_addr == SPDK_VTOPHYS_ERROR) { 1375b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1376b69827a3SChangpeng Liu return -EFAULT; 1377b69827a3SChangpeng Liu } 1378b69827a3SChangpeng Liu 1379b69827a3SChangpeng Liu mapping_length = spdk_min(length, mapping_length); 1380b69827a3SChangpeng Liu 1381b69827a3SChangpeng Liu length -= mapping_length; 1382b69827a3SChangpeng Liu virt_addr += mapping_length; 1383b69827a3SChangpeng Liu 1384b69827a3SChangpeng Liu sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1385b69827a3SChangpeng Liu sgl->unkeyed.length = mapping_length; 1386b69827a3SChangpeng Liu sgl->address = phys_addr; 1387b69827a3SChangpeng Liu sgl->unkeyed.subtype = 0; 1388b69827a3SChangpeng Liu 1389b69827a3SChangpeng Liu sgl++; 1390b69827a3SChangpeng Liu nseg++; 1391b69827a3SChangpeng Liu } 1392b69827a3SChangpeng Liu 1393b69827a3SChangpeng Liu if (nseg == 1) { 1394b69827a3SChangpeng Liu /* 1395b69827a3SChangpeng Liu * The whole transfer can be described by a single SGL descriptor. 1396b69827a3SChangpeng Liu * Use the special case described by the spec where SGL1's type is Data Block. 1397b69827a3SChangpeng Liu * This means the SGL in the tracker is not used at all, so copy the first (and only) 1398b69827a3SChangpeng Liu * SGL element into SGL1. 1399b69827a3SChangpeng Liu */ 1400b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1401b69827a3SChangpeng Liu req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 1402b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 1403b69827a3SChangpeng Liu } else { 1404b69827a3SChangpeng Liu /* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because 1405b69827a3SChangpeng Liu * NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page. 1406b69827a3SChangpeng Liu */ 1407b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; 1408b69827a3SChangpeng Liu req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 1409b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor); 1410b69827a3SChangpeng Liu } 1411b69827a3SChangpeng Liu 1412a1a2e2b4SVincent Fu SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg); 1413b69827a3SChangpeng Liu return 0; 1414b69827a3SChangpeng Liu } 1415b69827a3SChangpeng Liu 1416b69827a3SChangpeng Liu /** 1417b69827a3SChangpeng Liu * Build SGL list describing scattered payload buffer. 1418b69827a3SChangpeng Liu */ 1419b69827a3SChangpeng Liu static int 1420b69827a3SChangpeng Liu nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1421b69827a3SChangpeng Liu struct nvme_tracker *tr, bool dword_aligned) 1422b69827a3SChangpeng Liu { 1423b69827a3SChangpeng Liu int rc; 1424b69827a3SChangpeng Liu void *virt_addr; 1425b69827a3SChangpeng Liu uint64_t phys_addr, mapping_length; 1426b69827a3SChangpeng Liu uint32_t remaining_transfer_len, remaining_user_sge_len, length; 1427b69827a3SChangpeng Liu struct spdk_nvme_sgl_descriptor *sgl; 1428b69827a3SChangpeng Liu uint32_t nseg = 0; 1429e431ba2eSVincent Fu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1430b69827a3SChangpeng Liu 1431b69827a3SChangpeng Liu /* 1432b69827a3SChangpeng Liu * Build scattered payloads. 1433b69827a3SChangpeng Liu */ 1434b69827a3SChangpeng Liu assert(req->payload_size != 0); 1435b69827a3SChangpeng Liu assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1436b69827a3SChangpeng Liu assert(req->payload.reset_sgl_fn != NULL); 1437b69827a3SChangpeng Liu assert(req->payload.next_sge_fn != NULL); 1438b69827a3SChangpeng Liu req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1439b69827a3SChangpeng Liu 1440b69827a3SChangpeng Liu sgl = tr->u.sgl; 1441b69827a3SChangpeng Liu req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; 1442b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.subtype = 0; 1443b69827a3SChangpeng Liu 1444b69827a3SChangpeng Liu remaining_transfer_len = req->payload_size; 1445b69827a3SChangpeng Liu 1446b69827a3SChangpeng Liu while (remaining_transfer_len > 0) { 1447b69827a3SChangpeng Liu rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, 1448b69827a3SChangpeng Liu &virt_addr, &remaining_user_sge_len); 1449b69827a3SChangpeng Liu if (rc) { 1450b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1451b69827a3SChangpeng Liu return -EFAULT; 1452b69827a3SChangpeng Liu } 1453b69827a3SChangpeng Liu 1454b69827a3SChangpeng Liu /* Bit Bucket SGL descriptor */ 1455b69827a3SChangpeng Liu if ((uint64_t)virt_addr == UINT64_MAX) { 1456b69827a3SChangpeng Liu /* TODO: enable WRITE and COMPARE when necessary */ 1457b69827a3SChangpeng Liu if (req->cmd.opc != SPDK_NVME_OPC_READ) { 1458b69827a3SChangpeng Liu SPDK_ERRLOG("Only READ command can be supported\n"); 1459b69827a3SChangpeng Liu goto exit; 1460b69827a3SChangpeng Liu } 1461b69827a3SChangpeng Liu if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1462b69827a3SChangpeng Liu SPDK_ERRLOG("Too many SGL entries\n"); 1463b69827a3SChangpeng Liu goto exit; 1464b69827a3SChangpeng Liu } 1465b69827a3SChangpeng Liu 1466b69827a3SChangpeng Liu sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_BIT_BUCKET; 1467b69827a3SChangpeng Liu /* If the SGL describes a destination data buffer, the length of data 1468b69827a3SChangpeng Liu * buffer shall be discarded by controller, and the length is included 1469b69827a3SChangpeng Liu * in Number of Logical Blocks (NLB) parameter. Otherwise, the length 1470b69827a3SChangpeng Liu * is not included in the NLB parameter. 1471b69827a3SChangpeng Liu */ 1472b69827a3SChangpeng Liu remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); 1473b69827a3SChangpeng Liu remaining_transfer_len -= remaining_user_sge_len; 1474b69827a3SChangpeng Liu 1475b69827a3SChangpeng Liu sgl->unkeyed.length = remaining_user_sge_len; 1476b69827a3SChangpeng Liu sgl->address = 0; 1477b69827a3SChangpeng Liu sgl->unkeyed.subtype = 0; 1478b69827a3SChangpeng Liu 1479b69827a3SChangpeng Liu sgl++; 1480b69827a3SChangpeng Liu nseg++; 1481b69827a3SChangpeng Liu 1482b69827a3SChangpeng Liu continue; 1483b69827a3SChangpeng Liu } 1484b69827a3SChangpeng Liu 1485b69827a3SChangpeng Liu remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); 1486b69827a3SChangpeng Liu remaining_transfer_len -= remaining_user_sge_len; 1487b69827a3SChangpeng Liu while (remaining_user_sge_len > 0) { 1488b69827a3SChangpeng Liu if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { 1489b69827a3SChangpeng Liu SPDK_ERRLOG("Too many SGL entries\n"); 1490b69827a3SChangpeng Liu goto exit; 1491b69827a3SChangpeng Liu } 1492b69827a3SChangpeng Liu 1493b69827a3SChangpeng Liu if (dword_aligned && ((uintptr_t)virt_addr & 3)) { 1494b69827a3SChangpeng Liu SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); 1495b69827a3SChangpeng Liu goto exit; 1496b69827a3SChangpeng Liu } 1497b69827a3SChangpeng Liu 1498b69827a3SChangpeng Liu mapping_length = remaining_user_sge_len; 1499d4844d5bSChangpeng Liu phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length); 1500b69827a3SChangpeng Liu if (phys_addr == SPDK_VTOPHYS_ERROR) { 1501b69827a3SChangpeng Liu goto exit; 1502b69827a3SChangpeng Liu } 1503b69827a3SChangpeng Liu 1504b69827a3SChangpeng Liu length = spdk_min(remaining_user_sge_len, mapping_length); 1505b69827a3SChangpeng Liu remaining_user_sge_len -= length; 1506075d422fSKonrad Sztyber virt_addr = (uint8_t *)virt_addr + length; 1507b69827a3SChangpeng Liu 1508e431ba2eSVincent Fu if (!pqpair->flags.disable_pcie_sgl_merge && nseg > 0 && 1509e431ba2eSVincent Fu phys_addr == (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) { 1510b69827a3SChangpeng Liu /* extend previous entry */ 1511b69827a3SChangpeng Liu (*(sgl - 1)).unkeyed.length += length; 1512b69827a3SChangpeng Liu continue; 1513b69827a3SChangpeng Liu } 1514b69827a3SChangpeng Liu 1515b69827a3SChangpeng Liu sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1516b69827a3SChangpeng Liu sgl->unkeyed.length = length; 1517b69827a3SChangpeng Liu sgl->address = phys_addr; 1518b69827a3SChangpeng Liu sgl->unkeyed.subtype = 0; 1519b69827a3SChangpeng Liu 1520b69827a3SChangpeng Liu sgl++; 1521b69827a3SChangpeng Liu nseg++; 1522b69827a3SChangpeng Liu } 1523b69827a3SChangpeng Liu } 1524b69827a3SChangpeng Liu 1525b69827a3SChangpeng Liu if (nseg == 1) { 1526b69827a3SChangpeng Liu /* 1527b69827a3SChangpeng Liu * The whole transfer can be described by a single SGL descriptor. 1528b69827a3SChangpeng Liu * Use the special case described by the spec where SGL1's type is Data Block. 1529b69827a3SChangpeng Liu * This means the SGL in the tracker is not used at all, so copy the first (and only) 1530b69827a3SChangpeng Liu * SGL element into SGL1. 1531b69827a3SChangpeng Liu */ 1532b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1533b69827a3SChangpeng Liu req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; 1534b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; 1535b69827a3SChangpeng Liu } else { 1536b69827a3SChangpeng Liu /* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because 1537b69827a3SChangpeng Liu * NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page. 1538b69827a3SChangpeng Liu */ 1539b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; 1540b69827a3SChangpeng Liu req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; 1541b69827a3SChangpeng Liu req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor); 1542b69827a3SChangpeng Liu } 1543b69827a3SChangpeng Liu 1544a1a2e2b4SVincent Fu SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg); 1545b69827a3SChangpeng Liu return 0; 1546b69827a3SChangpeng Liu 1547b69827a3SChangpeng Liu exit: 1548b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1549b69827a3SChangpeng Liu return -EFAULT; 1550b69827a3SChangpeng Liu } 1551b69827a3SChangpeng Liu 1552b69827a3SChangpeng Liu /** 1553b69827a3SChangpeng Liu * Build PRP list describing scattered payload buffer. 1554b69827a3SChangpeng Liu */ 1555b69827a3SChangpeng Liu static int 1556b69827a3SChangpeng Liu nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, 1557b69827a3SChangpeng Liu struct nvme_tracker *tr, bool dword_aligned) 1558b69827a3SChangpeng Liu { 1559b69827a3SChangpeng Liu int rc; 1560b69827a3SChangpeng Liu void *virt_addr; 1561b69827a3SChangpeng Liu uint32_t remaining_transfer_len, length; 1562b69827a3SChangpeng Liu uint32_t prp_index = 0; 1563b69827a3SChangpeng Liu uint32_t page_size = qpair->ctrlr->page_size; 1564b69827a3SChangpeng Liu 1565b69827a3SChangpeng Liu /* 1566b69827a3SChangpeng Liu * Build scattered payloads. 1567b69827a3SChangpeng Liu */ 1568b69827a3SChangpeng Liu assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); 1569b69827a3SChangpeng Liu assert(req->payload.reset_sgl_fn != NULL); 1570b69827a3SChangpeng Liu req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); 1571b69827a3SChangpeng Liu 1572b69827a3SChangpeng Liu remaining_transfer_len = req->payload_size; 1573b69827a3SChangpeng Liu while (remaining_transfer_len > 0) { 1574b69827a3SChangpeng Liu assert(req->payload.next_sge_fn != NULL); 1575b69827a3SChangpeng Liu rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length); 1576b69827a3SChangpeng Liu if (rc) { 1577b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1578b69827a3SChangpeng Liu return -EFAULT; 1579b69827a3SChangpeng Liu } 1580b69827a3SChangpeng Liu 1581b69827a3SChangpeng Liu length = spdk_min(remaining_transfer_len, length); 1582b69827a3SChangpeng Liu 1583b69827a3SChangpeng Liu /* 1584b69827a3SChangpeng Liu * Any incompatible sges should have been handled up in the splitting routine, 1585b69827a3SChangpeng Liu * but assert here as an additional check. 1586b69827a3SChangpeng Liu * 1587b69827a3SChangpeng Liu * All SGEs except last must end on a page boundary. 1588b69827a3SChangpeng Liu */ 1589b69827a3SChangpeng Liu assert((length == remaining_transfer_len) || 1590b69827a3SChangpeng Liu _is_page_aligned((uintptr_t)virt_addr + length, page_size)); 1591b69827a3SChangpeng Liu 1592d4844d5bSChangpeng Liu rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index, virt_addr, length, page_size); 1593b69827a3SChangpeng Liu if (rc) { 1594b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1595b69827a3SChangpeng Liu return rc; 1596b69827a3SChangpeng Liu } 1597b69827a3SChangpeng Liu 1598b69827a3SChangpeng Liu remaining_transfer_len -= length; 1599b69827a3SChangpeng Liu } 1600b69827a3SChangpeng Liu 1601a1a2e2b4SVincent Fu SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index); 1602b69827a3SChangpeng Liu return 0; 1603b69827a3SChangpeng Liu } 1604b69827a3SChangpeng Liu 1605b69827a3SChangpeng Liu typedef int(*build_req_fn)(struct spdk_nvme_qpair *, struct nvme_request *, struct nvme_tracker *, 1606b69827a3SChangpeng Liu bool); 1607b69827a3SChangpeng Liu 1608b69827a3SChangpeng Liu static build_req_fn const g_nvme_pcie_build_req_table[][2] = { 1609b69827a3SChangpeng Liu [NVME_PAYLOAD_TYPE_INVALID] = { 1610b69827a3SChangpeng Liu nvme_pcie_qpair_build_request_invalid, /* PRP */ 1611b69827a3SChangpeng Liu nvme_pcie_qpair_build_request_invalid /* SGL */ 1612b69827a3SChangpeng Liu }, 1613b69827a3SChangpeng Liu [NVME_PAYLOAD_TYPE_CONTIG] = { 1614b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_request, /* PRP */ 1615b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_hw_sgl_request /* SGL */ 1616b69827a3SChangpeng Liu }, 1617b69827a3SChangpeng Liu [NVME_PAYLOAD_TYPE_SGL] = { 1618b69827a3SChangpeng Liu nvme_pcie_qpair_build_prps_sgl_request, /* PRP */ 1619b69827a3SChangpeng Liu nvme_pcie_qpair_build_hw_sgl_request /* SGL */ 1620b69827a3SChangpeng Liu } 1621b69827a3SChangpeng Liu }; 1622b69827a3SChangpeng Liu 1623b69827a3SChangpeng Liu static int 1624b69827a3SChangpeng Liu nvme_pcie_qpair_build_metadata(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, 1625074c62d0SMarcin Spiewak bool sgl_supported, bool mptr_sgl_supported, bool dword_aligned) 1626b69827a3SChangpeng Liu { 1627b69827a3SChangpeng Liu void *md_payload; 1628b69827a3SChangpeng Liu struct nvme_request *req = tr->req; 16292be196c6SJim Harris uint64_t mapping_length; 1630b69827a3SChangpeng Liu 1631b69827a3SChangpeng Liu if (req->payload.md) { 1632075d422fSKonrad Sztyber md_payload = (uint8_t *)req->payload.md + req->md_offset; 1633b69827a3SChangpeng Liu if (dword_aligned && ((uintptr_t)md_payload & 3)) { 1634b69827a3SChangpeng Liu SPDK_ERRLOG("virt_addr %p not dword aligned\n", md_payload); 1635b69827a3SChangpeng Liu goto exit; 1636b69827a3SChangpeng Liu } 1637b69827a3SChangpeng Liu 16382be196c6SJim Harris mapping_length = req->md_size; 1639074c62d0SMarcin Spiewak if (sgl_supported && mptr_sgl_supported && dword_aligned) { 1640b69827a3SChangpeng Liu assert(req->cmd.psdt == SPDK_NVME_PSDT_SGL_MPTR_CONTIG); 1641b69827a3SChangpeng Liu req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL; 16422be196c6SJim Harris 16432be196c6SJim Harris tr->meta_sgl.address = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length); 16442be196c6SJim Harris if (tr->meta_sgl.address == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) { 1645b69827a3SChangpeng Liu goto exit; 1646b69827a3SChangpeng Liu } 1647b69827a3SChangpeng Liu tr->meta_sgl.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; 1648b69827a3SChangpeng Liu tr->meta_sgl.unkeyed.length = req->md_size; 1649b69827a3SChangpeng Liu tr->meta_sgl.unkeyed.subtype = 0; 1650b69827a3SChangpeng Liu req->cmd.mptr = tr->prp_sgl_bus_addr - sizeof(struct spdk_nvme_sgl_descriptor); 1651b69827a3SChangpeng Liu } else { 16522be196c6SJim Harris req->cmd.mptr = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length); 16532be196c6SJim Harris if (req->cmd.mptr == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) { 1654b69827a3SChangpeng Liu goto exit; 1655b69827a3SChangpeng Liu } 1656b69827a3SChangpeng Liu } 1657b69827a3SChangpeng Liu } 1658b69827a3SChangpeng Liu 1659b69827a3SChangpeng Liu return 0; 1660b69827a3SChangpeng Liu 1661b69827a3SChangpeng Liu exit: 1662b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(qpair, tr); 1663b69827a3SChangpeng Liu return -EINVAL; 1664b69827a3SChangpeng Liu } 1665b69827a3SChangpeng Liu 1666b69827a3SChangpeng Liu int 1667b69827a3SChangpeng Liu nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req) 1668b69827a3SChangpeng Liu { 1669b69827a3SChangpeng Liu struct nvme_tracker *tr; 1670b69827a3SChangpeng Liu int rc = 0; 1671b69827a3SChangpeng Liu struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; 1672b69827a3SChangpeng Liu struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1673b69827a3SChangpeng Liu enum nvme_payload_type payload_type; 1674b69827a3SChangpeng Liu bool sgl_supported; 1675074c62d0SMarcin Spiewak bool mptr_sgl_supported; 1676b69827a3SChangpeng Liu bool dword_aligned = true; 1677b69827a3SChangpeng Liu 1678b69827a3SChangpeng Liu if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 1679e10b4806SJim Harris nvme_ctrlr_lock(ctrlr); 1680b69827a3SChangpeng Liu } 1681b69827a3SChangpeng Liu 1682b69827a3SChangpeng Liu tr = TAILQ_FIRST(&pqpair->free_tr); 1683b69827a3SChangpeng Liu 1684b69827a3SChangpeng Liu if (tr == NULL) { 1685b69827a3SChangpeng Liu pqpair->stat->queued_requests++; 1686b69827a3SChangpeng Liu /* Inform the upper layer to try again later. */ 1687b69827a3SChangpeng Liu rc = -EAGAIN; 1688b69827a3SChangpeng Liu goto exit; 1689b69827a3SChangpeng Liu } 1690b69827a3SChangpeng Liu 1691b69827a3SChangpeng Liu pqpair->stat->submitted_requests++; 1692b69827a3SChangpeng Liu TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */ 1693b69827a3SChangpeng Liu TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list); 169488739040SAtul Malakar pqpair->qpair.queue_depth++; 1695b69827a3SChangpeng Liu tr->req = req; 1696b69827a3SChangpeng Liu tr->cb_fn = req->cb_fn; 1697b69827a3SChangpeng Liu tr->cb_arg = req->cb_arg; 1698b69827a3SChangpeng Liu req->cmd.cid = tr->cid; 169974c16d8eSAlex Michon /* Use PRP by default. This bit will be overridden below if needed. */ 170074c16d8eSAlex Michon req->cmd.psdt = SPDK_NVME_PSDT_PRP; 1701b69827a3SChangpeng Liu 1702b69827a3SChangpeng Liu if (req->payload_size != 0) { 1703b69827a3SChangpeng Liu payload_type = nvme_payload_type(&req->payload); 1704b69827a3SChangpeng Liu /* According to the specification, PRPs shall be used for all 1705b69827a3SChangpeng Liu * Admin commands for NVMe over PCIe implementations. 1706b69827a3SChangpeng Liu */ 1707b69827a3SChangpeng Liu sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) != 0 && 1708b69827a3SChangpeng Liu !nvme_qpair_is_admin_queue(qpair); 1709074c62d0SMarcin Spiewak mptr_sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED) != 0 && 1710074c62d0SMarcin Spiewak !nvme_qpair_is_admin_queue(qpair); 1711b69827a3SChangpeng Liu 1712b69827a3SChangpeng Liu if (sgl_supported) { 1713b69827a3SChangpeng Liu /* Don't use SGL for DSM command */ 1714b69827a3SChangpeng Liu if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_NO_SGL_FOR_DSM) && 1715b69827a3SChangpeng Liu (req->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT))) { 1716b69827a3SChangpeng Liu sgl_supported = false; 1717b69827a3SChangpeng Liu } 1718b69827a3SChangpeng Liu } 1719b69827a3SChangpeng Liu 1720b69827a3SChangpeng Liu if (sgl_supported && !(ctrlr->flags & SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT)) { 1721b69827a3SChangpeng Liu dword_aligned = false; 1722b69827a3SChangpeng Liu } 172315b7d3baSJim Harris 172415b7d3baSJim Harris /* If we fail to build the request or the metadata, do not return the -EFAULT back up 172515b7d3baSJim Harris * the stack. This ensures that we always fail these types of requests via a 172615b7d3baSJim Harris * completion callback, and never in the context of the submission. 172715b7d3baSJim Harris */ 1728b69827a3SChangpeng Liu rc = g_nvme_pcie_build_req_table[payload_type][sgl_supported](qpair, req, tr, dword_aligned); 1729b69827a3SChangpeng Liu if (rc < 0) { 173015b7d3baSJim Harris assert(rc == -EFAULT); 173115b7d3baSJim Harris rc = 0; 1732b69827a3SChangpeng Liu goto exit; 1733b69827a3SChangpeng Liu } 1734b69827a3SChangpeng Liu 1735074c62d0SMarcin Spiewak rc = nvme_pcie_qpair_build_metadata(qpair, tr, sgl_supported, mptr_sgl_supported, dword_aligned); 1736b69827a3SChangpeng Liu if (rc < 0) { 173715b7d3baSJim Harris assert(rc == -EFAULT); 173815b7d3baSJim Harris rc = 0; 1739b69827a3SChangpeng Liu goto exit; 1740b69827a3SChangpeng Liu } 1741b69827a3SChangpeng Liu } 1742b69827a3SChangpeng Liu 1743b69827a3SChangpeng Liu nvme_pcie_qpair_submit_tracker(qpair, tr); 1744b69827a3SChangpeng Liu 1745b69827a3SChangpeng Liu exit: 1746b69827a3SChangpeng Liu if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { 1747e10b4806SJim Harris nvme_ctrlr_unlock(ctrlr); 1748b69827a3SChangpeng Liu } 1749b69827a3SChangpeng Liu 1750b69827a3SChangpeng Liu return rc; 1751b69827a3SChangpeng Liu } 1752b69827a3SChangpeng Liu 17536b4b2d29SChangpeng Liu struct spdk_nvme_transport_poll_group * 17546b4b2d29SChangpeng Liu nvme_pcie_poll_group_create(void) 17556b4b2d29SChangpeng Liu { 17566b4b2d29SChangpeng Liu struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group)); 17576b4b2d29SChangpeng Liu 17586b4b2d29SChangpeng Liu if (group == NULL) { 17596b4b2d29SChangpeng Liu SPDK_ERRLOG("Unable to allocate poll group.\n"); 17606b4b2d29SChangpeng Liu return NULL; 17616b4b2d29SChangpeng Liu } 17626b4b2d29SChangpeng Liu 17636b4b2d29SChangpeng Liu return &group->group; 17646b4b2d29SChangpeng Liu } 17656b4b2d29SChangpeng Liu 17666b4b2d29SChangpeng Liu int 17676b4b2d29SChangpeng Liu nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair) 17686b4b2d29SChangpeng Liu { 17696b4b2d29SChangpeng Liu return 0; 17706b4b2d29SChangpeng Liu } 17716b4b2d29SChangpeng Liu 17726b4b2d29SChangpeng Liu int 17736b4b2d29SChangpeng Liu nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) 17746b4b2d29SChangpeng Liu { 17756b4b2d29SChangpeng Liu return 0; 17766b4b2d29SChangpeng Liu } 17776b4b2d29SChangpeng Liu 17786b4b2d29SChangpeng Liu int 17796b4b2d29SChangpeng Liu nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, 17806b4b2d29SChangpeng Liu struct spdk_nvme_qpair *qpair) 17816b4b2d29SChangpeng Liu { 17826b4b2d29SChangpeng Liu return 0; 17836b4b2d29SChangpeng Liu } 17846b4b2d29SChangpeng Liu 17856b4b2d29SChangpeng Liu int 17866b4b2d29SChangpeng Liu nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, 17876b4b2d29SChangpeng Liu struct spdk_nvme_qpair *qpair) 17886b4b2d29SChangpeng Liu { 1789ea2db5bbSShuhei Matsumoto struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); 1790ea2db5bbSShuhei Matsumoto 1791ea2db5bbSShuhei Matsumoto pqpair->stat = &g_dummy_stat; 17926b4b2d29SChangpeng Liu return 0; 17936b4b2d29SChangpeng Liu } 17946b4b2d29SChangpeng Liu 17956b4b2d29SChangpeng Liu int64_t 17966b4b2d29SChangpeng Liu nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, 17976b4b2d29SChangpeng Liu uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 17986b4b2d29SChangpeng Liu { 17996b4b2d29SChangpeng Liu struct spdk_nvme_qpair *qpair, *tmp_qpair; 18006b4b2d29SChangpeng Liu int32_t local_completions = 0; 18016b4b2d29SChangpeng Liu int64_t total_completions = 0; 18026b4b2d29SChangpeng Liu 18036b4b2d29SChangpeng Liu STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 18046b4b2d29SChangpeng Liu disconnected_qpair_cb(qpair, tgroup->group->ctx); 18056b4b2d29SChangpeng Liu } 18066b4b2d29SChangpeng Liu 18076b4b2d29SChangpeng Liu STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) { 18086b4b2d29SChangpeng Liu local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair); 180975d38a30SShuhei Matsumoto if (spdk_unlikely(local_completions < 0)) { 18106b4b2d29SChangpeng Liu disconnected_qpair_cb(qpair, tgroup->group->ctx); 181175d38a30SShuhei Matsumoto total_completions = -ENXIO; 181275d38a30SShuhei Matsumoto } else if (spdk_likely(total_completions >= 0)) { 18136b4b2d29SChangpeng Liu total_completions += local_completions; 18146b4b2d29SChangpeng Liu } 181575d38a30SShuhei Matsumoto } 18166b4b2d29SChangpeng Liu 18176b4b2d29SChangpeng Liu return total_completions; 18186b4b2d29SChangpeng Liu } 18196b4b2d29SChangpeng Liu 1820*1efa1b16SAnkit Kumar void 1821*1efa1b16SAnkit Kumar nvme_pcie_poll_group_check_disconnected_qpairs(struct spdk_nvme_transport_poll_group *tgroup, 1822*1efa1b16SAnkit Kumar spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb) 1823*1efa1b16SAnkit Kumar { 1824*1efa1b16SAnkit Kumar struct spdk_nvme_qpair *qpair, *tmp_qpair; 1825*1efa1b16SAnkit Kumar 1826*1efa1b16SAnkit Kumar STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) { 1827*1efa1b16SAnkit Kumar disconnected_qpair_cb(qpair, tgroup->group->ctx); 1828*1efa1b16SAnkit Kumar } 1829*1efa1b16SAnkit Kumar } 1830*1efa1b16SAnkit Kumar 18316b4b2d29SChangpeng Liu int 18326b4b2d29SChangpeng Liu nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) 18336b4b2d29SChangpeng Liu { 18346b4b2d29SChangpeng Liu if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { 18356b4b2d29SChangpeng Liu return -EBUSY; 18366b4b2d29SChangpeng Liu } 18376b4b2d29SChangpeng Liu 18386b4b2d29SChangpeng Liu free(tgroup); 18396b4b2d29SChangpeng Liu 18406b4b2d29SChangpeng Liu return 0; 18416b4b2d29SChangpeng Liu } 1842971f07b9SJim Harris 1843c88345abSChangpeng Liu int 1844c88345abSChangpeng Liu nvme_pcie_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup, 1845c88345abSChangpeng Liu struct spdk_nvme_transport_poll_group_stat **_stats) 1846c88345abSChangpeng Liu { 1847c88345abSChangpeng Liu struct nvme_pcie_poll_group *group; 1848c88345abSChangpeng Liu struct spdk_nvme_transport_poll_group_stat *stats; 1849c88345abSChangpeng Liu 1850c88345abSChangpeng Liu if (tgroup == NULL || _stats == NULL) { 1851c88345abSChangpeng Liu SPDK_ERRLOG("Invalid stats or group pointer\n"); 1852c88345abSChangpeng Liu return -EINVAL; 1853c88345abSChangpeng Liu } 1854c88345abSChangpeng Liu 1855c88345abSChangpeng Liu stats = calloc(1, sizeof(*stats)); 1856c88345abSChangpeng Liu if (!stats) { 1857e9ff4753SJim Harris SPDK_ERRLOG("Can't allocate memory for stats\n"); 1858c88345abSChangpeng Liu return -ENOMEM; 1859c88345abSChangpeng Liu } 1860c88345abSChangpeng Liu stats->trtype = SPDK_NVME_TRANSPORT_PCIE; 1861b813f998Syidong0635 group = SPDK_CONTAINEROF(tgroup, struct nvme_pcie_poll_group, group); 1862c88345abSChangpeng Liu memcpy(&stats->pcie, &group->stats, sizeof(group->stats)); 1863c88345abSChangpeng Liu 1864c88345abSChangpeng Liu *_stats = stats; 1865c88345abSChangpeng Liu 1866c88345abSChangpeng Liu return 0; 1867c88345abSChangpeng Liu } 1868c88345abSChangpeng Liu 1869c88345abSChangpeng Liu void 1870c88345abSChangpeng Liu nvme_pcie_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup, 1871c88345abSChangpeng Liu struct spdk_nvme_transport_poll_group_stat *stats) 1872c88345abSChangpeng Liu { 1873c88345abSChangpeng Liu free(stats); 1874c88345abSChangpeng Liu } 1875c88345abSChangpeng Liu 18760eae0106SJim Harris static void 18770eae0106SJim Harris nvme_pcie_trace(void) 1878971f07b9SJim Harris { 1879971f07b9SJim Harris struct spdk_trace_tpoint_opts opts[] = { 1880971f07b9SJim Harris { 1881971f07b9SJim Harris "NVME_PCIE_SUBMIT", TRACE_NVME_PCIE_SUBMIT, 188226d44a12SJim Harris OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 1, 1883e36f0d36SJim Harris { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 1884e36f0d36SJim Harris { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 18857b05b29dSJim Harris { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 }, 18867b05b29dSJim Harris { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 18877b05b29dSJim Harris { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 188888739040SAtul Malakar { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 188988739040SAtul Malakar { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 } 1890971f07b9SJim Harris } 1891971f07b9SJim Harris }, 1892971f07b9SJim Harris { 1893971f07b9SJim Harris "NVME_PCIE_COMPLETE", TRACE_NVME_PCIE_COMPLETE, 189426d44a12SJim Harris OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 0, 1895e36f0d36SJim Harris { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }, 1896e36f0d36SJim Harris { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 }, 189788739040SAtul Malakar { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 }, 189888739040SAtul Malakar { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 } 189997661e86SJim Harris } 1900971f07b9SJim Harris }, 1901971f07b9SJim Harris }; 1902971f07b9SJim Harris 1903b0396da0SJim Harris spdk_trace_register_object(OBJECT_NVME_PCIE_REQ, 'p'); 190426d44a12SJim Harris spdk_trace_register_owner_type(OWNER_TYPE_NVME_PCIE_QP, 'q'); 1905971f07b9SJim Harris spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts)); 1906971f07b9SJim Harris } 19070eae0106SJim Harris SPDK_TRACE_REGISTER_FN(nvme_pcie_trace, "nvme_pcie", TRACE_GROUP_NVME_PCIE) 1908