xref: /spdk/lib/nvme/nvme_pcie_common.c (revision 1efa1b16d579b0c09bcbf26a84140cbbcf88d9df)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2021 Intel Corporation. All rights reserved.
3558be98fSAlexey Marchuk  *   Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
4ea2db5bbSShuhei Matsumoto  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
56b4b2d29SChangpeng Liu  */
66b4b2d29SChangpeng Liu 
76b4b2d29SChangpeng Liu /*
86b4b2d29SChangpeng Liu  * NVMe over PCIe common library
96b4b2d29SChangpeng Liu  */
106b4b2d29SChangpeng Liu 
116b4b2d29SChangpeng Liu #include "spdk/stdinc.h"
126b4b2d29SChangpeng Liu #include "spdk/likely.h"
136b4b2d29SChangpeng Liu #include "spdk/string.h"
146b4b2d29SChangpeng Liu #include "nvme_internal.h"
156b4b2d29SChangpeng Liu #include "nvme_pcie_internal.h"
16971f07b9SJim Harris #include "spdk/trace.h"
17971f07b9SJim Harris 
18c37e776eSKrzysztof Karas #include "spdk_internal/trace_defs.h"
196b4b2d29SChangpeng Liu 
208b203d19SChangpeng Liu __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
218b203d19SChangpeng Liu 
22ea2db5bbSShuhei Matsumoto static struct spdk_nvme_pcie_stat g_dummy_stat = {};
23ea2db5bbSShuhei Matsumoto 
248dd1cd21SBen Walker static void nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair,
258dd1cd21SBen Walker 		struct nvme_tracker *tr);
2615b7d3baSJim Harris 
275e4fe0adSChangpeng Liu static inline uint64_t
285e4fe0adSChangpeng Liu nvme_pcie_vtophys(struct spdk_nvme_ctrlr *ctrlr, const void *buf, uint64_t *size)
291b0ec0b7SChangpeng Liu {
301b0ec0b7SChangpeng Liu 	if (spdk_likely(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE)) {
315e4fe0adSChangpeng Liu 		return spdk_vtophys(buf, size);
321b0ec0b7SChangpeng Liu 	} else {
331b0ec0b7SChangpeng Liu 		/* vfio-user address translation with IOVA=VA mode */
341b0ec0b7SChangpeng Liu 		return (uint64_t)(uintptr_t)buf;
351b0ec0b7SChangpeng Liu 	}
361b0ec0b7SChangpeng Liu }
371b0ec0b7SChangpeng Liu 
386b4b2d29SChangpeng Liu int
396b4b2d29SChangpeng Liu nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair)
406b4b2d29SChangpeng Liu {
416b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
426b4b2d29SChangpeng Liu 	uint32_t i;
436b4b2d29SChangpeng Liu 
446b4b2d29SChangpeng Liu 	/* all head/tail vals are set to 0 */
456b4b2d29SChangpeng Liu 	pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0;
466b4b2d29SChangpeng Liu 
476b4b2d29SChangpeng Liu 	/*
486b4b2d29SChangpeng Liu 	 * First time through the completion queue, HW will set phase
496b4b2d29SChangpeng Liu 	 *  bit on completions to 1.  So set this to 1 here, indicating
506b4b2d29SChangpeng Liu 	 *  we're looking for a 1 to know which entries have completed.
516b4b2d29SChangpeng Liu 	 *  we'll toggle the bit each time when the completion queue
526b4b2d29SChangpeng Liu 	 *  rolls over.
536b4b2d29SChangpeng Liu 	 */
546b4b2d29SChangpeng Liu 	pqpair->flags.phase = 1;
556b4b2d29SChangpeng Liu 	for (i = 0; i < pqpair->num_entries; i++) {
566b4b2d29SChangpeng Liu 		pqpair->cpl[i].status.p = 0;
576b4b2d29SChangpeng Liu 	}
586b4b2d29SChangpeng Liu 
596b4b2d29SChangpeng Liu 	return 0;
606b4b2d29SChangpeng Liu }
616b4b2d29SChangpeng Liu 
6228b353a5SAnkit Kumar int
6328b353a5SAnkit Kumar nvme_pcie_qpair_get_fd(struct spdk_nvme_qpair *qpair, struct spdk_event_handler_opts *opts)
6428b353a5SAnkit Kumar {
6528b353a5SAnkit Kumar 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
6628b353a5SAnkit Kumar 	struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr);
6728b353a5SAnkit Kumar 
6828b353a5SAnkit Kumar 	assert(devhandle != NULL);
6928b353a5SAnkit Kumar 	if (!ctrlr->opts.enable_interrupts) {
7028b353a5SAnkit Kumar 		return -1;
7128b353a5SAnkit Kumar 	}
7228b353a5SAnkit Kumar 
7328b353a5SAnkit Kumar 	if (!opts) {
7428b353a5SAnkit Kumar 		return spdk_pci_device_get_interrupt_efd_by_index(devhandle, qpair->id);
7528b353a5SAnkit Kumar 	}
7628b353a5SAnkit Kumar 
7728b353a5SAnkit Kumar 	if (!SPDK_FIELD_VALID(opts, fd_type, opts->opts_size)) {
7828b353a5SAnkit Kumar 		return -EINVAL;
7928b353a5SAnkit Kumar 	}
8028b353a5SAnkit Kumar 
8128b353a5SAnkit Kumar 	spdk_fd_group_get_default_event_handler_opts(opts, opts->opts_size);
8228b353a5SAnkit Kumar 	opts->fd_type = SPDK_FD_TYPE_EVENTFD;
8328b353a5SAnkit Kumar 
8428b353a5SAnkit Kumar 	return spdk_pci_device_get_interrupt_efd_by_index(devhandle, qpair->id);
8528b353a5SAnkit Kumar }
8628b353a5SAnkit Kumar 
876b4b2d29SChangpeng Liu static void
886b4b2d29SChangpeng Liu nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr)
896b4b2d29SChangpeng Liu {
906b4b2d29SChangpeng Liu 	tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp);
916b4b2d29SChangpeng Liu 	tr->cid = cid;
926b4b2d29SChangpeng Liu 	tr->req = NULL;
936b4b2d29SChangpeng Liu }
946b4b2d29SChangpeng Liu 
956b4b2d29SChangpeng Liu static void *
966b4b2d29SChangpeng Liu nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment,
976b4b2d29SChangpeng Liu 			  uint64_t *phys_addr)
986b4b2d29SChangpeng Liu {
996b4b2d29SChangpeng Liu 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
1006b4b2d29SChangpeng Liu 	uintptr_t addr;
1016b4b2d29SChangpeng Liu 
1026b4b2d29SChangpeng Liu 	if (pctrlr->cmb.mem_register_addr != NULL) {
1036b4b2d29SChangpeng Liu 		/* BAR is mapped for data */
1046b4b2d29SChangpeng Liu 		return NULL;
1056b4b2d29SChangpeng Liu 	}
1066b4b2d29SChangpeng Liu 
1076b4b2d29SChangpeng Liu 	addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset;
1086b4b2d29SChangpeng Liu 	addr = (addr + (alignment - 1)) & ~(alignment - 1);
1096b4b2d29SChangpeng Liu 
1106b4b2d29SChangpeng Liu 	/* CMB may only consume part of the BAR, calculate accordingly */
1116b4b2d29SChangpeng Liu 	if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) {
1126b4b2d29SChangpeng Liu 		SPDK_ERRLOG("Tried to allocate past valid CMB range!\n");
1136b4b2d29SChangpeng Liu 		return NULL;
1146b4b2d29SChangpeng Liu 	}
1156b4b2d29SChangpeng Liu 	*phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va;
1166b4b2d29SChangpeng Liu 
1176b4b2d29SChangpeng Liu 	pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va;
1186b4b2d29SChangpeng Liu 
1196b4b2d29SChangpeng Liu 	return (void *)addr;
1206b4b2d29SChangpeng Liu }
1216b4b2d29SChangpeng Liu 
1226b4b2d29SChangpeng Liu int
1236b4b2d29SChangpeng Liu nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
1246b4b2d29SChangpeng Liu 			  const struct spdk_nvme_io_qpair_opts *opts)
1256b4b2d29SChangpeng Liu {
1266b4b2d29SChangpeng Liu 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
1276b4b2d29SChangpeng Liu 	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(ctrlr);
1286b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
1296b4b2d29SChangpeng Liu 	struct nvme_tracker	*tr;
1306b4b2d29SChangpeng Liu 	uint16_t		i;
1316b4b2d29SChangpeng Liu 	uint16_t		num_trackers;
1326b4b2d29SChangpeng Liu 	size_t			page_align = sysconf(_SC_PAGESIZE);
1336b4b2d29SChangpeng Liu 	size_t			queue_align, queue_len;
1346b4b2d29SChangpeng Liu 	uint32_t                flags = SPDK_MALLOC_DMA;
135fcc1652cSJim Harris 	int32_t			numa_id;
1366b4b2d29SChangpeng Liu 	uint64_t		sq_paddr = 0;
1376b4b2d29SChangpeng Liu 	uint64_t		cq_paddr = 0;
1386b4b2d29SChangpeng Liu 
1396b4b2d29SChangpeng Liu 	if (opts) {
1406b4b2d29SChangpeng Liu 		pqpair->sq_vaddr = opts->sq.vaddr;
1416b4b2d29SChangpeng Liu 		pqpair->cq_vaddr = opts->cq.vaddr;
142e431ba2eSVincent Fu 		pqpair->flags.disable_pcie_sgl_merge = opts->disable_pcie_sgl_merge;
1436b4b2d29SChangpeng Liu 		sq_paddr = opts->sq.paddr;
1446b4b2d29SChangpeng Liu 		cq_paddr = opts->cq.paddr;
1456b4b2d29SChangpeng Liu 	}
1466b4b2d29SChangpeng Liu 
1476b4b2d29SChangpeng Liu 	pqpair->retry_count = ctrlr->opts.transport_retry_count;
1486b4b2d29SChangpeng Liu 
1496b4b2d29SChangpeng Liu 	/*
1506b4b2d29SChangpeng Liu 	 * Limit the maximum number of completions to return per call to prevent wraparound,
1516b4b2d29SChangpeng Liu 	 * and calculate how many trackers can be submitted at once without overflowing the
1526b4b2d29SChangpeng Liu 	 * completion queue.
1536b4b2d29SChangpeng Liu 	 */
1546b4b2d29SChangpeng Liu 	pqpair->max_completions_cap = pqpair->num_entries / 4;
1556b4b2d29SChangpeng Liu 	pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS);
1566b4b2d29SChangpeng Liu 	pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS);
1576b4b2d29SChangpeng Liu 	num_trackers = pqpair->num_entries - pqpair->max_completions_cap;
1586b4b2d29SChangpeng Liu 
1596b4b2d29SChangpeng Liu 	SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n",
1606b4b2d29SChangpeng Liu 		     pqpair->max_completions_cap, num_trackers);
1616b4b2d29SChangpeng Liu 
1626b4b2d29SChangpeng Liu 	assert(num_trackers != 0);
1636b4b2d29SChangpeng Liu 
1646b4b2d29SChangpeng Liu 	pqpair->sq_in_cmb = false;
1656b4b2d29SChangpeng Liu 
1666b4b2d29SChangpeng Liu 	if (nvme_qpair_is_admin_queue(&pqpair->qpair)) {
1676b4b2d29SChangpeng Liu 		flags |= SPDK_MALLOC_SHARE;
1686b4b2d29SChangpeng Liu 	}
1696b4b2d29SChangpeng Liu 
1706b4b2d29SChangpeng Liu 	/* cmd and cpl rings must be aligned on page size boundaries. */
1716b4b2d29SChangpeng Liu 	if (ctrlr->opts.use_cmb_sqs) {
1726b4b2d29SChangpeng Liu 		pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd),
1736b4b2d29SChangpeng Liu 							page_align, &pqpair->cmd_bus_addr);
1746b4b2d29SChangpeng Liu 		if (pqpair->cmd != NULL) {
1756b4b2d29SChangpeng Liu 			pqpair->sq_in_cmb = true;
1766b4b2d29SChangpeng Liu 		}
1776b4b2d29SChangpeng Liu 	}
1786b4b2d29SChangpeng Liu 
1796b4b2d29SChangpeng Liu 	if (pqpair->sq_in_cmb == false) {
1806b4b2d29SChangpeng Liu 		if (pqpair->sq_vaddr) {
1816b4b2d29SChangpeng Liu 			pqpair->cmd = pqpair->sq_vaddr;
1826b4b2d29SChangpeng Liu 		} else {
1836b4b2d29SChangpeng Liu 			/* To ensure physical address contiguity we make each ring occupy
1846b4b2d29SChangpeng Liu 			 * a single hugepage only. See MAX_IO_QUEUE_ENTRIES.
1856b4b2d29SChangpeng Liu 			 */
1866b4b2d29SChangpeng Liu 			queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd);
1876b4b2d29SChangpeng Liu 			queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
188186b109dSJim Harris 			pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_NUMA_ID_ANY, flags);
1896b4b2d29SChangpeng Liu 			if (pqpair->cmd == NULL) {
1906b4b2d29SChangpeng Liu 				SPDK_ERRLOG("alloc qpair_cmd failed\n");
1916b4b2d29SChangpeng Liu 				return -ENOMEM;
1926b4b2d29SChangpeng Liu 			}
1936b4b2d29SChangpeng Liu 		}
1946b4b2d29SChangpeng Liu 		if (sq_paddr) {
1956b4b2d29SChangpeng Liu 			assert(pqpair->sq_vaddr != NULL);
1966b4b2d29SChangpeng Liu 			pqpair->cmd_bus_addr = sq_paddr;
1976b4b2d29SChangpeng Liu 		} else {
1985e4fe0adSChangpeng Liu 			pqpair->cmd_bus_addr = nvme_pcie_vtophys(ctrlr, pqpair->cmd, NULL);
1996b4b2d29SChangpeng Liu 			if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) {
2006b4b2d29SChangpeng Liu 				SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n");
2016b4b2d29SChangpeng Liu 				return -EFAULT;
2026b4b2d29SChangpeng Liu 			}
2036b4b2d29SChangpeng Liu 		}
2046b4b2d29SChangpeng Liu 	}
2056b4b2d29SChangpeng Liu 
2066b4b2d29SChangpeng Liu 	if (pqpair->cq_vaddr) {
2076b4b2d29SChangpeng Liu 		pqpair->cpl = pqpair->cq_vaddr;
2086b4b2d29SChangpeng Liu 	} else {
2096b4b2d29SChangpeng Liu 		queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl);
2106b4b2d29SChangpeng Liu 		queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
211fcc1652cSJim Harris 		numa_id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
212fcc1652cSJim Harris 		pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, numa_id, flags);
2136b4b2d29SChangpeng Liu 		if (pqpair->cpl == NULL) {
2146b4b2d29SChangpeng Liu 			SPDK_ERRLOG("alloc qpair_cpl failed\n");
2156b4b2d29SChangpeng Liu 			return -ENOMEM;
2166b4b2d29SChangpeng Liu 		}
2176b4b2d29SChangpeng Liu 	}
2186b4b2d29SChangpeng Liu 	if (cq_paddr) {
2196b4b2d29SChangpeng Liu 		assert(pqpair->cq_vaddr != NULL);
2206b4b2d29SChangpeng Liu 		pqpair->cpl_bus_addr = cq_paddr;
2216b4b2d29SChangpeng Liu 	} else {
2225e4fe0adSChangpeng Liu 		pqpair->cpl_bus_addr =  nvme_pcie_vtophys(ctrlr, pqpair->cpl, NULL);
2236b4b2d29SChangpeng Liu 		if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) {
2246b4b2d29SChangpeng Liu 			SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n");
2256b4b2d29SChangpeng Liu 			return -EFAULT;
2266b4b2d29SChangpeng Liu 		}
2276b4b2d29SChangpeng Liu 	}
2286b4b2d29SChangpeng Liu 
229349895a2SChangpeng Liu 	pqpair->sq_tdbl = pctrlr->doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
230349895a2SChangpeng Liu 	pqpair->cq_hdbl = pctrlr->doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
2316b4b2d29SChangpeng Liu 
2326b4b2d29SChangpeng Liu 	/*
2336b4b2d29SChangpeng Liu 	 * Reserve space for all of the trackers in a single allocation.
2346b4b2d29SChangpeng Liu 	 *   struct nvme_tracker must be padded so that its size is already a power of 2.
2356b4b2d29SChangpeng Liu 	 *   This ensures the PRP list embedded in the nvme_tracker object will not span a
2366b4b2d29SChangpeng Liu 	 *   4KB boundary, while allowing access to trackers in tr[] via normal array indexing.
2376b4b2d29SChangpeng Liu 	 */
2386b4b2d29SChangpeng Liu 	pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL,
239186b109dSJim Harris 				  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE);
2406b4b2d29SChangpeng Liu 	if (pqpair->tr == NULL) {
2416b4b2d29SChangpeng Liu 		SPDK_ERRLOG("nvme_tr failed\n");
2426b4b2d29SChangpeng Liu 		return -ENOMEM;
2436b4b2d29SChangpeng Liu 	}
2446b4b2d29SChangpeng Liu 
2456b4b2d29SChangpeng Liu 	TAILQ_INIT(&pqpair->free_tr);
2466b4b2d29SChangpeng Liu 	TAILQ_INIT(&pqpair->outstanding_tr);
24788739040SAtul Malakar 	pqpair->qpair.queue_depth = 0;
2486b4b2d29SChangpeng Liu 
2496b4b2d29SChangpeng Liu 	for (i = 0; i < num_trackers; i++) {
2506b4b2d29SChangpeng Liu 		tr = &pqpair->tr[i];
2515e4fe0adSChangpeng Liu 		nvme_qpair_construct_tracker(tr, i, nvme_pcie_vtophys(ctrlr, tr, NULL));
2526b4b2d29SChangpeng Liu 		TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
2536b4b2d29SChangpeng Liu 	}
2546b4b2d29SChangpeng Liu 
2556b4b2d29SChangpeng Liu 	nvme_pcie_qpair_reset(qpair);
2566b4b2d29SChangpeng Liu 
2576b4b2d29SChangpeng Liu 	return 0;
2586b4b2d29SChangpeng Liu }
2596b4b2d29SChangpeng Liu 
2606b4b2d29SChangpeng Liu int
2616b4b2d29SChangpeng Liu nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries)
2626b4b2d29SChangpeng Liu {
2636b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair *pqpair;
2646b4b2d29SChangpeng Liu 	int rc;
2656b4b2d29SChangpeng Liu 
266186b109dSJim Harris 	pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE);
2676b4b2d29SChangpeng Liu 	if (pqpair == NULL) {
2686b4b2d29SChangpeng Liu 		return -ENOMEM;
2696b4b2d29SChangpeng Liu 	}
2706b4b2d29SChangpeng Liu 
2716b4b2d29SChangpeng Liu 	pqpair->num_entries = num_entries;
2726b4b2d29SChangpeng Liu 	pqpair->flags.delay_cmd_submit = 0;
273455a5d78SMonica Kenguva 	pqpair->pcie_state = NVME_PCIE_QPAIR_READY;
2746b4b2d29SChangpeng Liu 
2756b4b2d29SChangpeng Liu 	ctrlr->adminq = &pqpair->qpair;
2766b4b2d29SChangpeng Liu 
2776b4b2d29SChangpeng Liu 	rc = nvme_qpair_init(ctrlr->adminq,
2786b4b2d29SChangpeng Liu 			     0, /* qpair ID */
2796b4b2d29SChangpeng Liu 			     ctrlr,
2806b4b2d29SChangpeng Liu 			     SPDK_NVME_QPRIO_URGENT,
281771f65bbSMonica Kenguva 			     num_entries,
282771f65bbSMonica Kenguva 			     false);
2836b4b2d29SChangpeng Liu 	if (rc != 0) {
2846b4b2d29SChangpeng Liu 		return rc;
2856b4b2d29SChangpeng Liu 	}
2866b4b2d29SChangpeng Liu 
287186b109dSJim Harris 	pqpair->stat = spdk_zmalloc(sizeof(*pqpair->stat), 64, NULL, SPDK_ENV_NUMA_ID_ANY,
288558be98fSAlexey Marchuk 				    SPDK_MALLOC_SHARE);
289558be98fSAlexey Marchuk 	if (!pqpair->stat) {
290558be98fSAlexey Marchuk 		SPDK_ERRLOG("Failed to allocate admin qpair statistics\n");
291558be98fSAlexey Marchuk 		return -ENOMEM;
292558be98fSAlexey Marchuk 	}
293558be98fSAlexey Marchuk 
2946b4b2d29SChangpeng Liu 	return nvme_pcie_qpair_construct(ctrlr->adminq, NULL);
2956b4b2d29SChangpeng Liu }
2966b4b2d29SChangpeng Liu 
2976b4b2d29SChangpeng Liu /**
2986b4b2d29SChangpeng Liu  * Note: the ctrlr_lock must be held when calling this function.
2996b4b2d29SChangpeng Liu  */
3006b4b2d29SChangpeng Liu void
3016b4b2d29SChangpeng Liu nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
3026b4b2d29SChangpeng Liu 		struct nvme_request *req, struct spdk_nvme_cpl *cpl)
3036b4b2d29SChangpeng Liu {
3046b4b2d29SChangpeng Liu 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
3056b4b2d29SChangpeng Liu 	struct nvme_request		*active_req = req;
3066b4b2d29SChangpeng Liu 	struct spdk_nvme_ctrlr_process	*active_proc;
3076b4b2d29SChangpeng Liu 
3086b4b2d29SChangpeng Liu 	/*
3096b4b2d29SChangpeng Liu 	 * The admin request is from another process. Move to the per
3106b4b2d29SChangpeng Liu 	 *  process list for that process to handle it later.
3116b4b2d29SChangpeng Liu 	 */
3126b4b2d29SChangpeng Liu 	assert(nvme_qpair_is_admin_queue(qpair));
3136b4b2d29SChangpeng Liu 	assert(active_req->pid != getpid());
3146b4b2d29SChangpeng Liu 
3156b4b2d29SChangpeng Liu 	active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid);
3166b4b2d29SChangpeng Liu 	if (active_proc) {
3176b4b2d29SChangpeng Liu 		/* Save the original completion information */
3186b4b2d29SChangpeng Liu 		memcpy(&active_req->cpl, cpl, sizeof(*cpl));
3196b4b2d29SChangpeng Liu 		STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq);
3206b4b2d29SChangpeng Liu 	} else {
3216b4b2d29SChangpeng Liu 		SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n",
3226b4b2d29SChangpeng Liu 			    active_req->pid);
3234b1aa5daSDeepak Abraham Tom 		nvme_cleanup_user_req(active_req);
3246b4b2d29SChangpeng Liu 		nvme_free_request(active_req);
3256b4b2d29SChangpeng Liu 	}
3266b4b2d29SChangpeng Liu }
3276b4b2d29SChangpeng Liu 
3286b4b2d29SChangpeng Liu /**
3296b4b2d29SChangpeng Liu  * Note: the ctrlr_lock must be held when calling this function.
3306b4b2d29SChangpeng Liu  */
3316b4b2d29SChangpeng Liu void
3326b4b2d29SChangpeng Liu nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair)
3336b4b2d29SChangpeng Liu {
3346b4b2d29SChangpeng Liu 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
3356b4b2d29SChangpeng Liu 	struct nvme_request		*req, *tmp_req;
3366b4b2d29SChangpeng Liu 	pid_t				pid = getpid();
3376b4b2d29SChangpeng Liu 	struct spdk_nvme_ctrlr_process	*proc;
3386b4b2d29SChangpeng Liu 
3396b4b2d29SChangpeng Liu 	/*
3406b4b2d29SChangpeng Liu 	 * Check whether there is any pending admin request from
3416b4b2d29SChangpeng Liu 	 * other active processes.
3426b4b2d29SChangpeng Liu 	 */
3436b4b2d29SChangpeng Liu 	assert(nvme_qpair_is_admin_queue(qpair));
3446b4b2d29SChangpeng Liu 
3456b4b2d29SChangpeng Liu 	proc = nvme_ctrlr_get_current_process(ctrlr);
3466b4b2d29SChangpeng Liu 	if (!proc) {
3476b4b2d29SChangpeng Liu 		SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid);
3486b4b2d29SChangpeng Liu 		assert(proc);
3496b4b2d29SChangpeng Liu 		return;
3506b4b2d29SChangpeng Liu 	}
3516b4b2d29SChangpeng Liu 
3526b4b2d29SChangpeng Liu 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
3536b4b2d29SChangpeng Liu 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
3546b4b2d29SChangpeng Liu 
3556b4b2d29SChangpeng Liu 		assert(req->pid == pid);
3566b4b2d29SChangpeng Liu 
3576b4b2d29SChangpeng Liu 		nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl);
3586b4b2d29SChangpeng Liu 	}
3596b4b2d29SChangpeng Liu }
3606b4b2d29SChangpeng Liu 
3616b4b2d29SChangpeng Liu int
3626b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
3636b4b2d29SChangpeng Liu 				 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
3646b4b2d29SChangpeng Liu 				 void *cb_arg)
3656b4b2d29SChangpeng Liu {
3666b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
3676b4b2d29SChangpeng Liu 	struct nvme_request *req;
3686b4b2d29SChangpeng Liu 	struct spdk_nvme_cmd *cmd;
3693ab7a1f6SAnkit Kumar 	bool ien = ctrlr->opts.enable_interrupts;
3706b4b2d29SChangpeng Liu 
3716b4b2d29SChangpeng Liu 	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
3726b4b2d29SChangpeng Liu 	if (req == NULL) {
3736b4b2d29SChangpeng Liu 		return -ENOMEM;
3746b4b2d29SChangpeng Liu 	}
3756b4b2d29SChangpeng Liu 
3766b4b2d29SChangpeng Liu 	cmd = &req->cmd;
3776b4b2d29SChangpeng Liu 	cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ;
3786b4b2d29SChangpeng Liu 
3796b4b2d29SChangpeng Liu 	cmd->cdw10_bits.create_io_q.qid = io_que->id;
3806b4b2d29SChangpeng Liu 	cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
3816b4b2d29SChangpeng Liu 
3826b4b2d29SChangpeng Liu 	cmd->cdw11_bits.create_io_cq.pc = 1;
3833ab7a1f6SAnkit Kumar 	if (ien) {
3843ab7a1f6SAnkit Kumar 		cmd->cdw11_bits.create_io_cq.ien = 1;
3853ab7a1f6SAnkit Kumar 		/* The interrupt vector offset starts from 1. We directly map the
3863ab7a1f6SAnkit Kumar 		 * queue id to interrupt vector.
3873ab7a1f6SAnkit Kumar 		 */
3883ab7a1f6SAnkit Kumar 		cmd->cdw11_bits.create_io_cq.iv = io_que->id;
3893ab7a1f6SAnkit Kumar 	}
3903ab7a1f6SAnkit Kumar 
3916b4b2d29SChangpeng Liu 	cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr;
3926b4b2d29SChangpeng Liu 
3936b4b2d29SChangpeng Liu 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
3946b4b2d29SChangpeng Liu }
3956b4b2d29SChangpeng Liu 
3966b4b2d29SChangpeng Liu int
3976b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
3986b4b2d29SChangpeng Liu 				 struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
3996b4b2d29SChangpeng Liu {
4006b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
4016b4b2d29SChangpeng Liu 	struct nvme_request *req;
4026b4b2d29SChangpeng Liu 	struct spdk_nvme_cmd *cmd;
4036b4b2d29SChangpeng Liu 
4046b4b2d29SChangpeng Liu 	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
4056b4b2d29SChangpeng Liu 	if (req == NULL) {
4066b4b2d29SChangpeng Liu 		return -ENOMEM;
4076b4b2d29SChangpeng Liu 	}
4086b4b2d29SChangpeng Liu 
4096b4b2d29SChangpeng Liu 	cmd = &req->cmd;
4106b4b2d29SChangpeng Liu 	cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ;
4116b4b2d29SChangpeng Liu 
4126b4b2d29SChangpeng Liu 	cmd->cdw10_bits.create_io_q.qid = io_que->id;
4136b4b2d29SChangpeng Liu 	cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
4146b4b2d29SChangpeng Liu 	cmd->cdw11_bits.create_io_sq.pc = 1;
4156b4b2d29SChangpeng Liu 	cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio;
4166b4b2d29SChangpeng Liu 	cmd->cdw11_bits.create_io_sq.cqid = io_que->id;
4176b4b2d29SChangpeng Liu 	cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr;
4186b4b2d29SChangpeng Liu 
4196b4b2d29SChangpeng Liu 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
4206b4b2d29SChangpeng Liu }
4216b4b2d29SChangpeng Liu 
4226b4b2d29SChangpeng Liu int
4236b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
4246b4b2d29SChangpeng Liu 				 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
4256b4b2d29SChangpeng Liu {
4266b4b2d29SChangpeng Liu 	struct nvme_request *req;
4276b4b2d29SChangpeng Liu 	struct spdk_nvme_cmd *cmd;
4286b4b2d29SChangpeng Liu 
4296b4b2d29SChangpeng Liu 	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
4306b4b2d29SChangpeng Liu 	if (req == NULL) {
4316b4b2d29SChangpeng Liu 		return -ENOMEM;
4326b4b2d29SChangpeng Liu 	}
4336b4b2d29SChangpeng Liu 
4346b4b2d29SChangpeng Liu 	cmd = &req->cmd;
4356b4b2d29SChangpeng Liu 	cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ;
4366b4b2d29SChangpeng Liu 	cmd->cdw10_bits.delete_io_q.qid = qpair->id;
4376b4b2d29SChangpeng Liu 
4386b4b2d29SChangpeng Liu 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
4396b4b2d29SChangpeng Liu }
4406b4b2d29SChangpeng Liu 
4416b4b2d29SChangpeng Liu int
4426b4b2d29SChangpeng Liu nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
4436b4b2d29SChangpeng Liu 				 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
4446b4b2d29SChangpeng Liu {
4456b4b2d29SChangpeng Liu 	struct nvme_request *req;
4466b4b2d29SChangpeng Liu 	struct spdk_nvme_cmd *cmd;
4476b4b2d29SChangpeng Liu 
4486b4b2d29SChangpeng Liu 	req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
4496b4b2d29SChangpeng Liu 	if (req == NULL) {
4506b4b2d29SChangpeng Liu 		return -ENOMEM;
4516b4b2d29SChangpeng Liu 	}
4526b4b2d29SChangpeng Liu 
4536b4b2d29SChangpeng Liu 	cmd = &req->cmd;
4546b4b2d29SChangpeng Liu 	cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ;
4556b4b2d29SChangpeng Liu 	cmd->cdw10_bits.delete_io_q.qid = qpair->id;
4566b4b2d29SChangpeng Liu 
4576b4b2d29SChangpeng Liu 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
4586b4b2d29SChangpeng Liu }
4596b4b2d29SChangpeng Liu 
460455a5d78SMonica Kenguva static void
461455a5d78SMonica Kenguva nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
4626b4b2d29SChangpeng Liu {
463455a5d78SMonica Kenguva 	struct spdk_nvme_qpair *qpair = arg;
4646b4b2d29SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
465455a5d78SMonica Kenguva 
466455a5d78SMonica Kenguva 	if (spdk_nvme_cpl_is_error(cpl)) {
467455a5d78SMonica Kenguva 		SPDK_ERRLOG("delete_io_cq failed!\n");
468455a5d78SMonica Kenguva 	}
469455a5d78SMonica Kenguva 
470455a5d78SMonica Kenguva 	pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
471455a5d78SMonica Kenguva }
472455a5d78SMonica Kenguva 
473455a5d78SMonica Kenguva static void
474455a5d78SMonica Kenguva nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
475455a5d78SMonica Kenguva {
476455a5d78SMonica Kenguva 	struct spdk_nvme_qpair *qpair = arg;
477455a5d78SMonica Kenguva 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
478455a5d78SMonica Kenguva 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
479455a5d78SMonica Kenguva 	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(ctrlr);
4806b4b2d29SChangpeng Liu 	int rc;
4816b4b2d29SChangpeng Liu 
4823cb9bc25SJim Harris 	if (pqpair->flags.defer_destruction) {
4833cb9bc25SJim Harris 		/* This qpair was deleted by the application while the
4843cb9bc25SJim Harris 		 * connection was still in progress.  We had to wait
4853cb9bc25SJim Harris 		 * to free the qpair resources until this outstanding
4863cb9bc25SJim Harris 		 * command was completed.  Now that we have the completion
4873cb9bc25SJim Harris 		 * free it now.
4883cb9bc25SJim Harris 		 */
4893cb9bc25SJim Harris 		nvme_pcie_qpair_destroy(qpair);
4903cb9bc25SJim Harris 		return;
4913cb9bc25SJim Harris 	}
4923cb9bc25SJim Harris 
493455a5d78SMonica Kenguva 	if (spdk_nvme_cpl_is_error(cpl)) {
494455a5d78SMonica Kenguva 		SPDK_ERRLOG("nvme_create_io_sq failed, deleting cq!\n");
495455a5d78SMonica Kenguva 		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb,
496455a5d78SMonica Kenguva 						      qpair);
4976b4b2d29SChangpeng Liu 		if (rc != 0) {
498455a5d78SMonica Kenguva 			SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
499455a5d78SMonica Kenguva 			pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
5006b4b2d29SChangpeng Liu 		}
501455a5d78SMonica Kenguva 		return;
5026b4b2d29SChangpeng Liu 	}
503455a5d78SMonica Kenguva 	pqpair->pcie_state = NVME_PCIE_QPAIR_READY;
5046b4b2d29SChangpeng Liu 	if (ctrlr->shadow_doorbell) {
5056b4b2d29SChangpeng Liu 		pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
5066b4b2d29SChangpeng Liu 						  pctrlr->doorbell_stride_u32;
5076b4b2d29SChangpeng Liu 		pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) *
5086b4b2d29SChangpeng Liu 						  pctrlr->doorbell_stride_u32;
5096b4b2d29SChangpeng Liu 		pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) *
5106b4b2d29SChangpeng Liu 						      pctrlr->doorbell_stride_u32;
5116b4b2d29SChangpeng Liu 		pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) *
5126b4b2d29SChangpeng Liu 						      pctrlr->doorbell_stride_u32;
5136b4b2d29SChangpeng Liu 		pqpair->flags.has_shadow_doorbell = 1;
5146b4b2d29SChangpeng Liu 	} else {
5156b4b2d29SChangpeng Liu 		pqpair->flags.has_shadow_doorbell = 0;
5166b4b2d29SChangpeng Liu 	}
5176b4b2d29SChangpeng Liu 	nvme_pcie_qpair_reset(qpair);
5186b4b2d29SChangpeng Liu 
519455a5d78SMonica Kenguva }
520455a5d78SMonica Kenguva 
521455a5d78SMonica Kenguva static void
522455a5d78SMonica Kenguva nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
523455a5d78SMonica Kenguva {
524455a5d78SMonica Kenguva 	struct spdk_nvme_qpair *qpair = arg;
525455a5d78SMonica Kenguva 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
526455a5d78SMonica Kenguva 	int rc;
527455a5d78SMonica Kenguva 
5283cb9bc25SJim Harris 	if (pqpair->flags.defer_destruction) {
5293cb9bc25SJim Harris 		/* This qpair was deleted by the application while the
5303cb9bc25SJim Harris 		 * connection was still in progress.  We had to wait
5313cb9bc25SJim Harris 		 * to free the qpair resources until this outstanding
5323cb9bc25SJim Harris 		 * command was completed.  Now that we have the completion
5333cb9bc25SJim Harris 		 * free it now.
5343cb9bc25SJim Harris 		 */
5353cb9bc25SJim Harris 		nvme_pcie_qpair_destroy(qpair);
5363cb9bc25SJim Harris 		return;
5373cb9bc25SJim Harris 	}
5383cb9bc25SJim Harris 
539455a5d78SMonica Kenguva 	if (spdk_nvme_cpl_is_error(cpl)) {
540455a5d78SMonica Kenguva 		pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
541455a5d78SMonica Kenguva 		SPDK_ERRLOG("nvme_create_io_cq failed!\n");
542455a5d78SMonica Kenguva 		return;
543455a5d78SMonica Kenguva 	}
544455a5d78SMonica Kenguva 
545455a5d78SMonica Kenguva 	rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_create_sq_cb, qpair);
546455a5d78SMonica Kenguva 
547455a5d78SMonica Kenguva 	if (rc != 0) {
548455a5d78SMonica Kenguva 		SPDK_ERRLOG("Failed to send request to create_io_sq, deleting cq!\n");
549455a5d78SMonica Kenguva 		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb,
550455a5d78SMonica Kenguva 						      qpair);
551455a5d78SMonica Kenguva 		if (rc != 0) {
552455a5d78SMonica Kenguva 			SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
553455a5d78SMonica Kenguva 			pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
554455a5d78SMonica Kenguva 		}
555455a5d78SMonica Kenguva 		return;
556455a5d78SMonica Kenguva 	}
557455a5d78SMonica Kenguva 	pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_SQ;
558455a5d78SMonica Kenguva }
559455a5d78SMonica Kenguva 
560455a5d78SMonica Kenguva static int
561455a5d78SMonica Kenguva _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
562455a5d78SMonica Kenguva 				 uint16_t qid)
563455a5d78SMonica Kenguva {
564455a5d78SMonica Kenguva 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
565455a5d78SMonica Kenguva 	int	rc;
566455a5d78SMonica Kenguva 
567455a5d78SMonica Kenguva 	/* Statistics may already be allocated in the case of controller reset */
568455a5d78SMonica Kenguva 	if (qpair->poll_group) {
569455a5d78SMonica Kenguva 		struct nvme_pcie_poll_group *group = SPDK_CONTAINEROF(qpair->poll_group,
570455a5d78SMonica Kenguva 						     struct nvme_pcie_poll_group, group);
571455a5d78SMonica Kenguva 
572455a5d78SMonica Kenguva 		pqpair->stat = &group->stats;
573455a5d78SMonica Kenguva 		pqpair->shared_stats = true;
574455a5d78SMonica Kenguva 	} else {
575df4600f4SRichael Zhuang 		if (pqpair->stat == NULL) {
576455a5d78SMonica Kenguva 			pqpair->stat = calloc(1, sizeof(*pqpair->stat));
577455a5d78SMonica Kenguva 			if (!pqpair->stat) {
578455a5d78SMonica Kenguva 				SPDK_ERRLOG("Failed to allocate qpair statistics\n");
579455a5d78SMonica Kenguva 				nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
580455a5d78SMonica Kenguva 				return -ENOMEM;
581455a5d78SMonica Kenguva 			}
582455a5d78SMonica Kenguva 		}
583455a5d78SMonica Kenguva 	}
584455a5d78SMonica Kenguva 
585455a5d78SMonica Kenguva 	rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_create_cq_cb, qpair);
586455a5d78SMonica Kenguva 
587455a5d78SMonica Kenguva 	if (rc != 0) {
588455a5d78SMonica Kenguva 		SPDK_ERRLOG("Failed to send request to create_io_cq\n");
589455a5d78SMonica Kenguva 		nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
590455a5d78SMonica Kenguva 		return rc;
591455a5d78SMonica Kenguva 	}
592455a5d78SMonica Kenguva 	pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_CQ;
5936b4b2d29SChangpeng Liu 	return 0;
5946b4b2d29SChangpeng Liu }
5956b4b2d29SChangpeng Liu 
5966b4b2d29SChangpeng Liu int
5976b4b2d29SChangpeng Liu nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
5986b4b2d29SChangpeng Liu {
599ea0aaf5eSBen Walker 	int rc = 0;
600ea0aaf5eSBen Walker 
601ea0aaf5eSBen Walker 	if (!nvme_qpair_is_admin_queue(qpair)) {
602ea0aaf5eSBen Walker 		rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);
603455a5d78SMonica Kenguva 	} else {
604ea0aaf5eSBen Walker 		nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
605ea0aaf5eSBen Walker 	}
606ea0aaf5eSBen Walker 
607ea0aaf5eSBen Walker 	return rc;
6086b4b2d29SChangpeng Liu }
6096b4b2d29SChangpeng Liu 
6106b4b2d29SChangpeng Liu void
6116b4b2d29SChangpeng Liu nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
6126b4b2d29SChangpeng Liu {
613736b9da0SShuhei Matsumoto 	if (!nvme_qpair_is_admin_queue(qpair) || !ctrlr->is_disconnecting) {
614cfe11bd1SShuhei Matsumoto 		nvme_transport_ctrlr_disconnect_qpair_done(qpair);
615736b9da0SShuhei Matsumoto 	} else {
616736b9da0SShuhei Matsumoto 		/* If this function is called for the admin qpair via spdk_nvme_ctrlr_reset()
617736b9da0SShuhei Matsumoto 		 * or spdk_nvme_ctrlr_disconnect(), initiate a Controller Level Reset.
618736b9da0SShuhei Matsumoto 		 * Then we can abort trackers safely because the Controller Level Reset deletes
619736b9da0SShuhei Matsumoto 		 * all I/O SQ/CQs.
620736b9da0SShuhei Matsumoto 		 */
621736b9da0SShuhei Matsumoto 		nvme_ctrlr_disable(ctrlr);
622736b9da0SShuhei Matsumoto 	}
6236b4b2d29SChangpeng Liu }
6246b4b2d29SChangpeng Liu 
6258b203d19SChangpeng Liu /* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must
6268b203d19SChangpeng Liu  * not use wide instructions because QEMU will not emulate such instructions to MMIO space.
6278b203d19SChangpeng Liu  * So this function ensures we only copy 8 bytes at a time.
6288b203d19SChangpeng Liu  */
6298b203d19SChangpeng Liu static inline void
6308b203d19SChangpeng Liu nvme_pcie_copy_command_mmio(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
6318b203d19SChangpeng Liu {
6328b203d19SChangpeng Liu 	uint64_t *dst64 = (uint64_t *)dst;
6338b203d19SChangpeng Liu 	const uint64_t *src64 = (const uint64_t *)src;
6348b203d19SChangpeng Liu 	uint32_t i;
6358b203d19SChangpeng Liu 
6368b203d19SChangpeng Liu 	for (i = 0; i < sizeof(*dst) / 8; i++) {
6378b203d19SChangpeng Liu 		dst64[i] = src64[i];
6388b203d19SChangpeng Liu 	}
6398b203d19SChangpeng Liu }
6408b203d19SChangpeng Liu 
6418b203d19SChangpeng Liu static inline void
6428b203d19SChangpeng Liu nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
6438b203d19SChangpeng Liu {
6448b203d19SChangpeng Liu 	/* dst and src are known to be non-overlapping and 64-byte aligned. */
6458b203d19SChangpeng Liu #if defined(__SSE2__)
6468b203d19SChangpeng Liu 	__m128i *d128 = (__m128i *)dst;
6478b203d19SChangpeng Liu 	const __m128i *s128 = (const __m128i *)src;
6488b203d19SChangpeng Liu 
6498b203d19SChangpeng Liu 	_mm_stream_si128(&d128[0], _mm_load_si128(&s128[0]));
6508b203d19SChangpeng Liu 	_mm_stream_si128(&d128[1], _mm_load_si128(&s128[1]));
6518b203d19SChangpeng Liu 	_mm_stream_si128(&d128[2], _mm_load_si128(&s128[2]));
6528b203d19SChangpeng Liu 	_mm_stream_si128(&d128[3], _mm_load_si128(&s128[3]));
6538b203d19SChangpeng Liu #else
6548b203d19SChangpeng Liu 	*dst = *src;
6558b203d19SChangpeng Liu #endif
6568b203d19SChangpeng Liu }
6578b203d19SChangpeng Liu 
6588b203d19SChangpeng Liu void
6598b203d19SChangpeng Liu nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
6608b203d19SChangpeng Liu {
6618b203d19SChangpeng Liu 	struct nvme_request	*req;
6628b203d19SChangpeng Liu 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
6638b203d19SChangpeng Liu 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
6648b203d19SChangpeng Liu 
6658b203d19SChangpeng Liu 	req = tr->req;
6668b203d19SChangpeng Liu 	assert(req != NULL);
6678b203d19SChangpeng Liu 
668e36f0d36SJim Harris 	spdk_trace_record(TRACE_NVME_PCIE_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg,
6697b05b29dSJim Harris 			  (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc,
67088739040SAtul Malakar 			  req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12,
67188739040SAtul Malakar 			  pqpair->qpair.queue_depth);
672971f07b9SJim Harris 
673f89cf818SAlex Michon 	if (req->cmd.fuse) {
674f89cf818SAlex Michon 		/*
675f89cf818SAlex Michon 		 * Keep track of the fuse operation sequence so that we ring the doorbell only
676f89cf818SAlex Michon 		 * after the second fuse is submitted.
677f89cf818SAlex Michon 		 */
678f89cf818SAlex Michon 		qpair->last_fuse = req->cmd.fuse;
6798b203d19SChangpeng Liu 	}
6808b203d19SChangpeng Liu 
6818b203d19SChangpeng Liu 	/* Don't use wide instructions to copy NVMe command, this is limited by QEMU
6828b203d19SChangpeng Liu 	 * virtual NVMe controller, the maximum access width is 8 Bytes for one time.
6838b203d19SChangpeng Liu 	 */
6848b203d19SChangpeng Liu 	if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH) && pqpair->sq_in_cmb)) {
6858b203d19SChangpeng Liu 		nvme_pcie_copy_command_mmio(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
6868b203d19SChangpeng Liu 	} else {
6878b203d19SChangpeng Liu 		/* Copy the command from the tracker to the submission queue. */
6888b203d19SChangpeng Liu 		nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd);
6898b203d19SChangpeng Liu 	}
6908b203d19SChangpeng Liu 
6918b203d19SChangpeng Liu 	if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) {
6928b203d19SChangpeng Liu 		pqpair->sq_tail = 0;
6938b203d19SChangpeng Liu 	}
6948b203d19SChangpeng Liu 
6958b203d19SChangpeng Liu 	if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) {
6968b203d19SChangpeng Liu 		SPDK_ERRLOG("sq_tail is passing sq_head!\n");
6978b203d19SChangpeng Liu 	}
6988b203d19SChangpeng Liu 
6998b203d19SChangpeng Liu 	if (!pqpair->flags.delay_cmd_submit) {
7008b203d19SChangpeng Liu 		nvme_pcie_qpair_ring_sq_doorbell(qpair);
7018b203d19SChangpeng Liu 	}
7028b203d19SChangpeng Liu }
7038b203d19SChangpeng Liu 
7048b203d19SChangpeng Liu void
7058b203d19SChangpeng Liu nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
7068b203d19SChangpeng Liu 				 struct spdk_nvme_cpl *cpl, bool print_on_error)
7078b203d19SChangpeng Liu {
7088b203d19SChangpeng Liu 	struct nvme_pcie_qpair		*pqpair = nvme_pcie_qpair(qpair);
7098b203d19SChangpeng Liu 	struct nvme_request		*req;
7108b203d19SChangpeng Liu 	bool				retry, error;
71115beaa20SChangpeng Liu 	bool				print_error;
7128b203d19SChangpeng Liu 
7138b203d19SChangpeng Liu 	req = tr->req;
7148b203d19SChangpeng Liu 
715e36f0d36SJim Harris 	spdk_trace_record(TRACE_NVME_PCIE_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg,
71688739040SAtul Malakar 			  (uint32_t)req->cmd.cid, (uint32_t)cpl->status_raw, pqpair->qpair.queue_depth);
717971f07b9SJim Harris 
7188b203d19SChangpeng Liu 	assert(req != NULL);
7198b203d19SChangpeng Liu 
7208b203d19SChangpeng Liu 	error = spdk_nvme_cpl_is_error(cpl);
7218b203d19SChangpeng Liu 	retry = error && nvme_completion_is_retry(cpl) &&
7228b203d19SChangpeng Liu 		req->retries < pqpair->retry_count;
72315beaa20SChangpeng Liu 	print_error = error && print_on_error && !qpair->ctrlr->opts.disable_error_logging;
7248b203d19SChangpeng Liu 
72515beaa20SChangpeng Liu 	if (print_error) {
7268b203d19SChangpeng Liu 		spdk_nvme_qpair_print_command(qpair, &req->cmd);
72715beaa20SChangpeng Liu 	}
72815beaa20SChangpeng Liu 
72915beaa20SChangpeng Liu 	if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
7308b203d19SChangpeng Liu 		spdk_nvme_qpair_print_completion(qpair, cpl);
7318b203d19SChangpeng Liu 	}
7328b203d19SChangpeng Liu 
7338b203d19SChangpeng Liu 	assert(cpl->cid == req->cmd.cid);
7348b203d19SChangpeng Liu 
7358b203d19SChangpeng Liu 	if (retry) {
7368b203d19SChangpeng Liu 		req->retries++;
7378b203d19SChangpeng Liu 		nvme_pcie_qpair_submit_tracker(qpair, tr);
7388b203d19SChangpeng Liu 	} else {
7398b203d19SChangpeng Liu 		TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list);
74088739040SAtul Malakar 		pqpair->qpair.queue_depth--;
7418b203d19SChangpeng Liu 
7428b203d19SChangpeng Liu 		/* Only check admin requests from different processes. */
7438b203d19SChangpeng Liu 		if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) {
7448b203d19SChangpeng Liu 			nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl);
7458b203d19SChangpeng Liu 		} else {
7468b203d19SChangpeng Liu 			nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl);
7478b203d19SChangpeng Liu 		}
7488b203d19SChangpeng Liu 
7498b203d19SChangpeng Liu 		tr->req = NULL;
7508b203d19SChangpeng Liu 
7518b203d19SChangpeng Liu 		TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
7528b203d19SChangpeng Liu 	}
7538b203d19SChangpeng Liu }
7548b203d19SChangpeng Liu 
7558b203d19SChangpeng Liu void
7568b203d19SChangpeng Liu nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
7578b203d19SChangpeng Liu 					struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
7588b203d19SChangpeng Liu 					bool print_on_error)
7598b203d19SChangpeng Liu {
7608b203d19SChangpeng Liu 	struct spdk_nvme_cpl	cpl;
7618b203d19SChangpeng Liu 
7628b203d19SChangpeng Liu 	memset(&cpl, 0, sizeof(cpl));
7638b203d19SChangpeng Liu 	cpl.sqid = qpair->id;
7648b203d19SChangpeng Liu 	cpl.cid = tr->cid;
7658b203d19SChangpeng Liu 	cpl.status.sct = sct;
7668b203d19SChangpeng Liu 	cpl.status.sc = sc;
7678b203d19SChangpeng Liu 	cpl.status.dnr = dnr;
7688b203d19SChangpeng Liu 	nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
7698b203d19SChangpeng Liu }
7708b203d19SChangpeng Liu 
7718b203d19SChangpeng Liu void
7728b203d19SChangpeng Liu nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr)
7738b203d19SChangpeng Liu {
7748b203d19SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
7758b203d19SChangpeng Liu 	struct nvme_tracker *tr, *temp, *last;
7768b203d19SChangpeng Liu 
7778b203d19SChangpeng Liu 	last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head);
7788b203d19SChangpeng Liu 
7798b203d19SChangpeng Liu 	/* Abort previously submitted (outstanding) trs */
7808b203d19SChangpeng Liu 	TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) {
7818b203d19SChangpeng Liu 		if (!qpair->ctrlr->opts.disable_error_logging) {
7828b203d19SChangpeng Liu 			SPDK_ERRLOG("aborting outstanding command\n");
7838b203d19SChangpeng Liu 		}
7848b203d19SChangpeng Liu 		nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
7858b203d19SChangpeng Liu 							SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true);
7868b203d19SChangpeng Liu 
7878b203d19SChangpeng Liu 		if (tr == last) {
7888b203d19SChangpeng Liu 			break;
7898b203d19SChangpeng Liu 		}
7908b203d19SChangpeng Liu 	}
7918b203d19SChangpeng Liu }
7928b203d19SChangpeng Liu 
7938b203d19SChangpeng Liu void
7948b203d19SChangpeng Liu nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
7958b203d19SChangpeng Liu {
7968b203d19SChangpeng Liu 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
7978b203d19SChangpeng Liu 	struct nvme_tracker	*tr;
7988b203d19SChangpeng Liu 
7998b203d19SChangpeng Liu 	tr = TAILQ_FIRST(&pqpair->outstanding_tr);
8008b203d19SChangpeng Liu 	while (tr != NULL) {
8018b203d19SChangpeng Liu 		assert(tr->req != NULL);
8028b203d19SChangpeng Liu 		if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
8038b203d19SChangpeng Liu 			nvme_pcie_qpair_manual_complete_tracker(qpair, tr,
8048b203d19SChangpeng Liu 								SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0,
8058b203d19SChangpeng Liu 								false);
8068b203d19SChangpeng Liu 			tr = TAILQ_FIRST(&pqpair->outstanding_tr);
8078b203d19SChangpeng Liu 		} else {
8088b203d19SChangpeng Liu 			tr = TAILQ_NEXT(tr, tq_list);
8098b203d19SChangpeng Liu 		}
8108b203d19SChangpeng Liu 	}
8118b203d19SChangpeng Liu }
8128b203d19SChangpeng Liu 
8138b203d19SChangpeng Liu void
8148b203d19SChangpeng Liu nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair)
8158b203d19SChangpeng Liu {
8168b203d19SChangpeng Liu 	nvme_pcie_admin_qpair_abort_aers(qpair);
8178b203d19SChangpeng Liu }
8188b203d19SChangpeng Liu 
8198b203d19SChangpeng Liu void
8208b203d19SChangpeng Liu nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
8218b203d19SChangpeng Liu {
8228b203d19SChangpeng Liu 	nvme_pcie_qpair_abort_trackers(qpair, dnr);
8238b203d19SChangpeng Liu }
8248b203d19SChangpeng Liu 
8258b203d19SChangpeng Liu static void
8268b203d19SChangpeng Liu nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
8278b203d19SChangpeng Liu {
8288b203d19SChangpeng Liu 	uint64_t t02;
8298b203d19SChangpeng Liu 	struct nvme_tracker *tr, *tmp;
8308b203d19SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
8318b203d19SChangpeng Liu 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
8328b203d19SChangpeng Liu 	struct spdk_nvme_ctrlr_process *active_proc;
8338b203d19SChangpeng Liu 
8348b203d19SChangpeng Liu 	/* Don't check timeouts during controller initialization. */
8358b203d19SChangpeng Liu 	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
8368b203d19SChangpeng Liu 		return;
8378b203d19SChangpeng Liu 	}
8388b203d19SChangpeng Liu 
8398b203d19SChangpeng Liu 	if (nvme_qpair_is_admin_queue(qpair)) {
8408b203d19SChangpeng Liu 		active_proc = nvme_ctrlr_get_current_process(ctrlr);
8418b203d19SChangpeng Liu 	} else {
8428b203d19SChangpeng Liu 		active_proc = qpair->active_proc;
8438b203d19SChangpeng Liu 	}
8448b203d19SChangpeng Liu 
8458b203d19SChangpeng Liu 	/* Only check timeouts if the current process has a timeout callback. */
8468b203d19SChangpeng Liu 	if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
8478b203d19SChangpeng Liu 		return;
8488b203d19SChangpeng Liu 	}
8498b203d19SChangpeng Liu 
8508b203d19SChangpeng Liu 	t02 = spdk_get_ticks();
8518b203d19SChangpeng Liu 	TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
8528b203d19SChangpeng Liu 		assert(tr->req != NULL);
8538b203d19SChangpeng Liu 
8548b203d19SChangpeng Liu 		if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) {
8558b203d19SChangpeng Liu 			/*
8568b203d19SChangpeng Liu 			 * The requests are in order, so as soon as one has not timed out,
8578b203d19SChangpeng Liu 			 * stop iterating.
8588b203d19SChangpeng Liu 			 */
8598b203d19SChangpeng Liu 			break;
8608b203d19SChangpeng Liu 		}
8618b203d19SChangpeng Liu 	}
8628b203d19SChangpeng Liu }
8638b203d19SChangpeng Liu 
8648b203d19SChangpeng Liu int32_t
8658b203d19SChangpeng Liu nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
8668b203d19SChangpeng Liu {
8678b203d19SChangpeng Liu 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
8688b203d19SChangpeng Liu 	struct nvme_tracker	*tr;
8698b203d19SChangpeng Liu 	struct spdk_nvme_cpl	*cpl, *next_cpl;
8708b203d19SChangpeng Liu 	uint32_t		 num_completions = 0;
8718b203d19SChangpeng Liu 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
8728b203d19SChangpeng Liu 	uint16_t		 next_cq_head;
8738b203d19SChangpeng Liu 	uint8_t			 next_phase;
8748b203d19SChangpeng Liu 	bool			 next_is_valid = false;
875455a5d78SMonica Kenguva 	int			 rc;
876455a5d78SMonica Kenguva 
877455a5d78SMonica Kenguva 	if (spdk_unlikely(pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED)) {
878455a5d78SMonica Kenguva 		return -ENXIO;
879455a5d78SMonica Kenguva 	}
880455a5d78SMonica Kenguva 
881e40bd531SJim Harris 	if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
882e40bd531SJim Harris 		if (pqpair->pcie_state == NVME_PCIE_QPAIR_READY) {
883e40bd531SJim Harris 			/* It is possible that another thread set the pcie_state to
884e40bd531SJim Harris 			 * QPAIR_READY, if it polled the adminq and processed the SQ
885e40bd531SJim Harris 			 * completion for this qpair.  So check for that condition
886e40bd531SJim Harris 			 * here and then update the qpair's state to CONNECTED, since
887e40bd531SJim Harris 			 * we can only set the qpair state from the qpair's thread.
888e40bd531SJim Harris 			 * (Note: this fixed issue #2157.)
889e40bd531SJim Harris 			 */
890e40bd531SJim Harris 			nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
891e40bd531SJim Harris 		} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
892e40bd531SJim Harris 			nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
893e40bd531SJim Harris 			return -ENXIO;
894e40bd531SJim Harris 		} else {
895455a5d78SMonica Kenguva 			rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
896455a5d78SMonica Kenguva 			if (rc < 0) {
897455a5d78SMonica Kenguva 				return rc;
898e8dd82e5SMonica Kenguva 			} else if (pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED) {
899e40bd531SJim Harris 				nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
900e8dd82e5SMonica Kenguva 				return -ENXIO;
901455a5d78SMonica Kenguva 			}
902e40bd531SJim Harris 		}
903455a5d78SMonica Kenguva 		return 0;
904455a5d78SMonica Kenguva 	}
9058b203d19SChangpeng Liu 
9068b203d19SChangpeng Liu 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
907e10b4806SJim Harris 		nvme_ctrlr_lock(ctrlr);
9088b203d19SChangpeng Liu 	}
9098b203d19SChangpeng Liu 
9108b203d19SChangpeng Liu 	if (max_completions == 0 || max_completions > pqpair->max_completions_cap) {
9118b203d19SChangpeng Liu 		/*
9128b203d19SChangpeng Liu 		 * max_completions == 0 means unlimited, but complete at most
9138b203d19SChangpeng Liu 		 * max_completions_cap batch of I/O at a time so that the completion
9148b203d19SChangpeng Liu 		 * queue doorbells don't wrap around.
9158b203d19SChangpeng Liu 		 */
9168b203d19SChangpeng Liu 		max_completions = pqpair->max_completions_cap;
9178b203d19SChangpeng Liu 	}
9188b203d19SChangpeng Liu 
919558be98fSAlexey Marchuk 	pqpair->stat->polls++;
920558be98fSAlexey Marchuk 
9218b203d19SChangpeng Liu 	while (1) {
9228b203d19SChangpeng Liu 		cpl = &pqpair->cpl[pqpair->cq_head];
9238b203d19SChangpeng Liu 
9248b203d19SChangpeng Liu 		if (!next_is_valid && cpl->status.p != pqpair->flags.phase) {
9258b203d19SChangpeng Liu 			break;
9268b203d19SChangpeng Liu 		}
9278b203d19SChangpeng Liu 
9288b203d19SChangpeng Liu 		if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) {
9298b203d19SChangpeng Liu 			next_cq_head = pqpair->cq_head + 1;
9308b203d19SChangpeng Liu 			next_phase = pqpair->flags.phase;
9318b203d19SChangpeng Liu 		} else {
9328b203d19SChangpeng Liu 			next_cq_head = 0;
9338b203d19SChangpeng Liu 			next_phase = !pqpair->flags.phase;
9348b203d19SChangpeng Liu 		}
9358b203d19SChangpeng Liu 		next_cpl = &pqpair->cpl[next_cq_head];
9368b203d19SChangpeng Liu 		next_is_valid = (next_cpl->status.p == next_phase);
9378b203d19SChangpeng Liu 		if (next_is_valid) {
9388b203d19SChangpeng Liu 			__builtin_prefetch(&pqpair->tr[next_cpl->cid]);
9398b203d19SChangpeng Liu 		}
9408b203d19SChangpeng Liu 
941e9a94122SXue Liu #if defined(__PPC64__) || defined(__riscv) || defined(__loongarch__)
9428b203d19SChangpeng Liu 		/*
9438b203d19SChangpeng Liu 		 * This memory barrier prevents reordering of:
9448b203d19SChangpeng Liu 		 * - load after store from/to tr
9458b203d19SChangpeng Liu 		 * - load after load cpl phase and cpl cid
9468b203d19SChangpeng Liu 		 */
9478b203d19SChangpeng Liu 		spdk_mb();
9488b203d19SChangpeng Liu #elif defined(__aarch64__)
9498b203d19SChangpeng Liu 		__asm volatile("dmb oshld" ::: "memory");
9508b203d19SChangpeng Liu #endif
9518b203d19SChangpeng Liu 
9528b203d19SChangpeng Liu 		if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) {
9538b203d19SChangpeng Liu 			pqpair->cq_head = 0;
9548b203d19SChangpeng Liu 			pqpair->flags.phase = !pqpair->flags.phase;
9558b203d19SChangpeng Liu 		}
9568b203d19SChangpeng Liu 
9578b203d19SChangpeng Liu 		tr = &pqpair->tr[cpl->cid];
9587d44b36eSJim Harris 		pqpair->sq_head = cpl->sqhd;
9597d44b36eSJim Harris 
9607d44b36eSJim Harris 		if (tr->req) {
9618b203d19SChangpeng Liu 			/* Prefetch the req's STAILQ_ENTRY since we'll need to access it
9628b203d19SChangpeng Liu 			 * as part of putting the req back on the qpair's free list.
9638b203d19SChangpeng Liu 			 */
9648b203d19SChangpeng Liu 			__builtin_prefetch(&tr->req->stailq);
9658b203d19SChangpeng Liu 			nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true);
9668b203d19SChangpeng Liu 		} else {
9678b203d19SChangpeng Liu 			SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
9688b203d19SChangpeng Liu 			spdk_nvme_qpair_print_completion(qpair, cpl);
9698b203d19SChangpeng Liu 			assert(0);
9708b203d19SChangpeng Liu 		}
9718b203d19SChangpeng Liu 
9728b203d19SChangpeng Liu 		if (++num_completions == max_completions) {
9738b203d19SChangpeng Liu 			break;
9748b203d19SChangpeng Liu 		}
9758b203d19SChangpeng Liu 	}
9768b203d19SChangpeng Liu 
9778b203d19SChangpeng Liu 	if (num_completions > 0) {
978558be98fSAlexey Marchuk 		pqpair->stat->completions += num_completions;
9798b203d19SChangpeng Liu 		nvme_pcie_qpair_ring_cq_doorbell(qpair);
980558be98fSAlexey Marchuk 	} else {
981558be98fSAlexey Marchuk 		pqpair->stat->idle_polls++;
9828b203d19SChangpeng Liu 	}
9838b203d19SChangpeng Liu 
9848b203d19SChangpeng Liu 	if (pqpair->flags.delay_cmd_submit) {
9858b203d19SChangpeng Liu 		if (pqpair->last_sq_tail != pqpair->sq_tail) {
9868b203d19SChangpeng Liu 			nvme_pcie_qpair_ring_sq_doorbell(qpair);
9878b203d19SChangpeng Liu 			pqpair->last_sq_tail = pqpair->sq_tail;
9888b203d19SChangpeng Liu 		}
9898b203d19SChangpeng Liu 	}
9908b203d19SChangpeng Liu 
9918b203d19SChangpeng Liu 	if (spdk_unlikely(ctrlr->timeout_enabled)) {
9928b203d19SChangpeng Liu 		/*
9938b203d19SChangpeng Liu 		 * User registered for timeout callback
9948b203d19SChangpeng Liu 		 */
9958b203d19SChangpeng Liu 		nvme_pcie_qpair_check_timeout(qpair);
9968b203d19SChangpeng Liu 	}
9978b203d19SChangpeng Liu 
998736b9da0SShuhei Matsumoto 	/* Before returning, complete any pending admin request or
999736b9da0SShuhei Matsumoto 	 * process the admin qpair disconnection.
1000736b9da0SShuhei Matsumoto 	 */
10018b203d19SChangpeng Liu 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
10028b203d19SChangpeng Liu 		nvme_pcie_qpair_complete_pending_admin_request(qpair);
10038b203d19SChangpeng Liu 
1004736b9da0SShuhei Matsumoto 		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
1005736b9da0SShuhei Matsumoto 			rc = nvme_ctrlr_disable_poll(qpair->ctrlr);
1006a475aed5SShuhei Matsumoto 			if (rc != -EAGAIN) {
1007736b9da0SShuhei Matsumoto 				nvme_transport_ctrlr_disconnect_qpair_done(qpair);
1008736b9da0SShuhei Matsumoto 			}
1009736b9da0SShuhei Matsumoto 		}
1010736b9da0SShuhei Matsumoto 
1011e10b4806SJim Harris 		nvme_ctrlr_unlock(ctrlr);
10128b203d19SChangpeng Liu 	}
10138b203d19SChangpeng Liu 
101415b7d3baSJim Harris 	if (spdk_unlikely(pqpair->flags.has_pending_vtophys_failures)) {
101515b7d3baSJim Harris 		struct nvme_tracker *tr, *tmp;
101615b7d3baSJim Harris 
101715b7d3baSJim Harris 		TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
101815b7d3baSJim Harris 			if (tr->bad_vtophys) {
101915b7d3baSJim Harris 				tr->bad_vtophys = 0;
102015b7d3baSJim Harris 				nvme_pcie_fail_request_bad_vtophys(qpair, tr);
102115b7d3baSJim Harris 			}
102215b7d3baSJim Harris 		}
102315b7d3baSJim Harris 		pqpair->flags.has_pending_vtophys_failures = 0;
102415b7d3baSJim Harris 	}
102515b7d3baSJim Harris 
10268b203d19SChangpeng Liu 	return num_completions;
10278b203d19SChangpeng Liu }
10288b203d19SChangpeng Liu 
10298b203d19SChangpeng Liu int
10308b203d19SChangpeng Liu nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair)
10318b203d19SChangpeng Liu {
10328b203d19SChangpeng Liu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
10338b203d19SChangpeng Liu 
10348b203d19SChangpeng Liu 	if (nvme_qpair_is_admin_queue(qpair)) {
10358b203d19SChangpeng Liu 		nvme_pcie_admin_qpair_destroy(qpair);
10368b203d19SChangpeng Liu 	}
10378b203d19SChangpeng Liu 	/*
10388b203d19SChangpeng Liu 	 * We check sq_vaddr and cq_vaddr to see if the user specified the memory
10398b203d19SChangpeng Liu 	 * buffers when creating the I/O queue.
10408b203d19SChangpeng Liu 	 * If the user specified them, we cannot free that memory.
10418b203d19SChangpeng Liu 	 * Nor do we free it if it's in the CMB.
10428b203d19SChangpeng Liu 	 */
10438b203d19SChangpeng Liu 	if (!pqpair->sq_vaddr && pqpair->cmd && !pqpair->sq_in_cmb) {
10448b203d19SChangpeng Liu 		spdk_free(pqpair->cmd);
10458b203d19SChangpeng Liu 	}
10468b203d19SChangpeng Liu 	if (!pqpair->cq_vaddr && pqpair->cpl) {
10478b203d19SChangpeng Liu 		spdk_free(pqpair->cpl);
10488b203d19SChangpeng Liu 	}
10498b203d19SChangpeng Liu 	if (pqpair->tr) {
10508b203d19SChangpeng Liu 		spdk_free(pqpair->tr);
10518b203d19SChangpeng Liu 	}
10528b203d19SChangpeng Liu 
10538b203d19SChangpeng Liu 	nvme_qpair_deinit(qpair);
10548b203d19SChangpeng Liu 
1055423f72daSAlexey Marchuk 	if (!pqpair->shared_stats && (!qpair->active_proc ||
1056423f72daSAlexey Marchuk 				      qpair->active_proc == nvme_ctrlr_get_current_process(qpair->ctrlr))) {
1057558be98fSAlexey Marchuk 		if (qpair->id) {
1058558be98fSAlexey Marchuk 			free(pqpair->stat);
1059558be98fSAlexey Marchuk 		} else {
1060558be98fSAlexey Marchuk 			/* statistics of admin qpair are allocates from huge pages because
1061558be98fSAlexey Marchuk 			 * admin qpair is shared for multi-process */
1062558be98fSAlexey Marchuk 			spdk_free(pqpair->stat);
1063558be98fSAlexey Marchuk 		}
1064558be98fSAlexey Marchuk 
1065558be98fSAlexey Marchuk 	}
1066558be98fSAlexey Marchuk 
10678b203d19SChangpeng Liu 	spdk_free(pqpair);
10688b203d19SChangpeng Liu 
10698b203d19SChangpeng Liu 	return 0;
10708b203d19SChangpeng Liu }
10718b203d19SChangpeng Liu 
10728b203d19SChangpeng Liu struct spdk_nvme_qpair *
10738b203d19SChangpeng Liu nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
10748b203d19SChangpeng Liu 				const struct spdk_nvme_io_qpair_opts *opts)
10758b203d19SChangpeng Liu {
10768b203d19SChangpeng Liu 	struct nvme_pcie_qpair *pqpair;
10778b203d19SChangpeng Liu 	struct spdk_nvme_qpair *qpair;
10788b203d19SChangpeng Liu 	int rc;
10798b203d19SChangpeng Liu 
10808b203d19SChangpeng Liu 	assert(ctrlr != NULL);
10818b203d19SChangpeng Liu 
10828b203d19SChangpeng Liu 	pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL,
1083186b109dSJim Harris 			      SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_SHARE);
10848b203d19SChangpeng Liu 	if (pqpair == NULL) {
10858b203d19SChangpeng Liu 		return NULL;
10868b203d19SChangpeng Liu 	}
10878b203d19SChangpeng Liu 
10888b203d19SChangpeng Liu 	pqpair->num_entries = opts->io_queue_size;
10898b203d19SChangpeng Liu 	pqpair->flags.delay_cmd_submit = opts->delay_cmd_submit;
10908b203d19SChangpeng Liu 
10918b203d19SChangpeng Liu 	qpair = &pqpair->qpair;
10928b203d19SChangpeng Liu 
1093771f65bbSMonica Kenguva 	rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests, opts->async_mode);
10948b203d19SChangpeng Liu 	if (rc != 0) {
10958b203d19SChangpeng Liu 		nvme_pcie_qpair_destroy(qpair);
10968b203d19SChangpeng Liu 		return NULL;
10978b203d19SChangpeng Liu 	}
10988b203d19SChangpeng Liu 
10998b203d19SChangpeng Liu 	rc = nvme_pcie_qpair_construct(qpair, opts);
11008b203d19SChangpeng Liu 
11018b203d19SChangpeng Liu 	if (rc != 0) {
11028b203d19SChangpeng Liu 		nvme_pcie_qpair_destroy(qpair);
11038b203d19SChangpeng Liu 		return NULL;
11048b203d19SChangpeng Liu 	}
11058b203d19SChangpeng Liu 
11068b203d19SChangpeng Liu 	return qpair;
11078b203d19SChangpeng Liu }
11088b203d19SChangpeng Liu 
11098b203d19SChangpeng Liu int
11108b203d19SChangpeng Liu nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
11118b203d19SChangpeng Liu {
111238736d99SJim Harris 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
11138b203d19SChangpeng Liu 	struct nvme_completion_poll_status *status;
11148b203d19SChangpeng Liu 	int rc;
11158b203d19SChangpeng Liu 
11168b203d19SChangpeng Liu 	assert(ctrlr != NULL);
11178b203d19SChangpeng Liu 
11188b203d19SChangpeng Liu 	if (ctrlr->is_removed) {
11198b203d19SChangpeng Liu 		goto free;
11208b203d19SChangpeng Liu 	}
11218b203d19SChangpeng Liu 
11223cb9bc25SJim Harris 	if (ctrlr->prepare_for_reset) {
11233cb9bc25SJim Harris 		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING) {
11243cb9bc25SJim Harris 			pqpair->flags.defer_destruction = true;
11253cb9bc25SJim Harris 		}
11263cb9bc25SJim Harris 		goto clear_shadow_doorbells;
11273cb9bc25SJim Harris 	}
11283cb9bc25SJim Harris 
1129662c0200SKonrad Sztyber 	/* If attempting to delete a qpair that's still being connected, we have to wait until it's
1130662c0200SKonrad Sztyber 	 * finished, so that we don't free it while it's waiting for the create cq/sq callbacks.
1131662c0200SKonrad Sztyber 	 */
113297277e14SChangpeng Liu 	while (pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_CQ ||
113397277e14SChangpeng Liu 	       pqpair->pcie_state == NVME_PCIE_QPAIR_WAIT_FOR_SQ) {
1134662c0200SKonrad Sztyber 		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1135662c0200SKonrad Sztyber 		if (rc < 0) {
1136662c0200SKonrad Sztyber 			break;
1137662c0200SKonrad Sztyber 		}
1138662c0200SKonrad Sztyber 	}
1139662c0200SKonrad Sztyber 
11408b203d19SChangpeng Liu 	status = calloc(1, sizeof(*status));
11418b203d19SChangpeng Liu 	if (!status) {
11428b203d19SChangpeng Liu 		SPDK_ERRLOG("Failed to allocate status tracker\n");
1143c081a84cSJim Harris 		goto free;
11448b203d19SChangpeng Liu 	}
11458b203d19SChangpeng Liu 
11468b203d19SChangpeng Liu 	/* Delete the I/O submission queue */
11478b203d19SChangpeng Liu 	rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, status);
11488b203d19SChangpeng Liu 	if (rc != 0) {
11498b203d19SChangpeng Liu 		SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc);
11508b203d19SChangpeng Liu 		free(status);
1151c081a84cSJim Harris 		goto free;
11528b203d19SChangpeng Liu 	}
11538b203d19SChangpeng Liu 	if (nvme_wait_for_completion(ctrlr->adminq, status)) {
11548b203d19SChangpeng Liu 		if (!status->timed_out) {
11558b203d19SChangpeng Liu 			free(status);
11568b203d19SChangpeng Liu 		}
1157c081a84cSJim Harris 		goto free;
11588b203d19SChangpeng Liu 	}
11598b203d19SChangpeng Liu 
11608b203d19SChangpeng Liu 	/* Now that the submission queue is deleted, the device is supposed to have
11618b203d19SChangpeng Liu 	 * completed any outstanding I/O. Try to complete them. If they don't complete,
11628b203d19SChangpeng Liu 	 * they'll be marked as aborted and completed below. */
1163dbecab8dSChangpeng Liu 	if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) {
11648b203d19SChangpeng Liu 		nvme_pcie_qpair_process_completions(qpair, 0);
1165dbecab8dSChangpeng Liu 	}
11668b203d19SChangpeng Liu 
11678b203d19SChangpeng Liu 	memset(status, 0, sizeof(*status));
11688b203d19SChangpeng Liu 	/* Delete the completion queue */
11698b203d19SChangpeng Liu 	rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
11708b203d19SChangpeng Liu 	if (rc != 0) {
11718b203d19SChangpeng Liu 		SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
11728b203d19SChangpeng Liu 		free(status);
1173c081a84cSJim Harris 		goto free;
11748b203d19SChangpeng Liu 	}
11758b203d19SChangpeng Liu 	if (nvme_wait_for_completion(ctrlr->adminq, status)) {
11768b203d19SChangpeng Liu 		if (!status->timed_out) {
11778b203d19SChangpeng Liu 			free(status);
11788b203d19SChangpeng Liu 		}
1179c081a84cSJim Harris 		goto free;
11808b203d19SChangpeng Liu 	}
11818b203d19SChangpeng Liu 	free(status);
11828b203d19SChangpeng Liu 
11833cb9bc25SJim Harris clear_shadow_doorbells:
118446e531bdSSebastian Brzezinka 	if (pqpair->flags.has_shadow_doorbell && ctrlr->shadow_doorbell) {
118538736d99SJim Harris 		*pqpair->shadow_doorbell.sq_tdbl = 0;
118638736d99SJim Harris 		*pqpair->shadow_doorbell.cq_hdbl = 0;
118738736d99SJim Harris 		*pqpair->shadow_doorbell.sq_eventidx = 0;
118838736d99SJim Harris 		*pqpair->shadow_doorbell.cq_eventidx = 0;
118938736d99SJim Harris 	}
11908b203d19SChangpeng Liu free:
11918b203d19SChangpeng Liu 	if (qpair->no_deletion_notification_needed == 0) {
11928b203d19SChangpeng Liu 		/* Abort the rest of the I/O */
11938b203d19SChangpeng Liu 		nvme_pcie_qpair_abort_trackers(qpair, 1);
11948b203d19SChangpeng Liu 	}
11958b203d19SChangpeng Liu 
11963cb9bc25SJim Harris 	if (!pqpair->flags.defer_destruction) {
11978b203d19SChangpeng Liu 		nvme_pcie_qpair_destroy(qpair);
11983cb9bc25SJim Harris 	}
11998b203d19SChangpeng Liu 	return 0;
12008b203d19SChangpeng Liu }
12018b203d19SChangpeng Liu 
1202b69827a3SChangpeng Liu static void
1203b69827a3SChangpeng Liu nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
1204b69827a3SChangpeng Liu {
120515b7d3baSJim Harris 	if (!qpair->in_completion_context) {
120615b7d3baSJim Harris 		struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
120715b7d3baSJim Harris 
120815b7d3baSJim Harris 		tr->bad_vtophys = 1;
120915b7d3baSJim Harris 		pqpair->flags.has_pending_vtophys_failures = 1;
121015b7d3baSJim Harris 		return;
121115b7d3baSJim Harris 	}
121215b7d3baSJim Harris 
1213b69827a3SChangpeng Liu 	/*
1214b69827a3SChangpeng Liu 	 * Bad vtophys translation, so abort this request and return
1215b69827a3SChangpeng Liu 	 *  immediately.
1216b69827a3SChangpeng Liu 	 */
121715b7d3baSJim Harris 	SPDK_ERRLOG("vtophys or other payload buffer related error\n");
1218b69827a3SChangpeng Liu 	nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
1219b69827a3SChangpeng Liu 						SPDK_NVME_SC_INVALID_FIELD,
1220b69827a3SChangpeng Liu 						1 /* do not retry */, true);
1221b69827a3SChangpeng Liu }
1222b69827a3SChangpeng Liu 
1223b69827a3SChangpeng Liu /*
1224b69827a3SChangpeng Liu  * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes.
1225b69827a3SChangpeng Liu  *
1226b69827a3SChangpeng Liu  * *prp_index will be updated to account for the number of PRP entries used.
1227b69827a3SChangpeng Liu  */
1228b69827a3SChangpeng Liu static inline int
1229d4844d5bSChangpeng Liu nvme_pcie_prp_list_append(struct spdk_nvme_ctrlr *ctrlr, struct nvme_tracker *tr,
1230d4844d5bSChangpeng Liu 			  uint32_t *prp_index, void *virt_addr, size_t len,
1231b69827a3SChangpeng Liu 			  uint32_t page_size)
1232b69827a3SChangpeng Liu {
1233b69827a3SChangpeng Liu 	struct spdk_nvme_cmd *cmd = &tr->req->cmd;
1234b69827a3SChangpeng Liu 	uintptr_t page_mask = page_size - 1;
1235b69827a3SChangpeng Liu 	uint64_t phys_addr;
1236b69827a3SChangpeng Liu 	uint32_t i;
1237b69827a3SChangpeng Liu 
1238b69827a3SChangpeng Liu 	SPDK_DEBUGLOG(nvme, "prp_index:%u virt_addr:%p len:%u\n",
1239b69827a3SChangpeng Liu 		      *prp_index, virt_addr, (uint32_t)len);
1240b69827a3SChangpeng Liu 
1241b69827a3SChangpeng Liu 	if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) {
1242b69827a3SChangpeng Liu 		SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1243b69827a3SChangpeng Liu 		return -EFAULT;
1244b69827a3SChangpeng Liu 	}
1245b69827a3SChangpeng Liu 
1246b69827a3SChangpeng Liu 	i = *prp_index;
1247b69827a3SChangpeng Liu 	while (len) {
1248b69827a3SChangpeng Liu 		uint32_t seg_len;
1249b69827a3SChangpeng Liu 
1250b69827a3SChangpeng Liu 		/*
1251b69827a3SChangpeng Liu 		 * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array,
1252b69827a3SChangpeng Liu 		 * so prp_index == count is valid.
1253b69827a3SChangpeng Liu 		 */
1254b69827a3SChangpeng Liu 		if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) {
1255b69827a3SChangpeng Liu 			SPDK_ERRLOG("out of PRP entries\n");
1256b69827a3SChangpeng Liu 			return -EFAULT;
1257b69827a3SChangpeng Liu 		}
1258b69827a3SChangpeng Liu 
1259d4844d5bSChangpeng Liu 		phys_addr = nvme_pcie_vtophys(ctrlr, virt_addr, NULL);
1260b69827a3SChangpeng Liu 		if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) {
1261b69827a3SChangpeng Liu 			SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr);
1262b69827a3SChangpeng Liu 			return -EFAULT;
1263b69827a3SChangpeng Liu 		}
1264b69827a3SChangpeng Liu 
1265b69827a3SChangpeng Liu 		if (i == 0) {
1266b69827a3SChangpeng Liu 			SPDK_DEBUGLOG(nvme, "prp1 = %p\n", (void *)phys_addr);
1267b69827a3SChangpeng Liu 			cmd->dptr.prp.prp1 = phys_addr;
1268b69827a3SChangpeng Liu 			seg_len = page_size - ((uintptr_t)virt_addr & page_mask);
1269b69827a3SChangpeng Liu 		} else {
1270b69827a3SChangpeng Liu 			if ((phys_addr & page_mask) != 0) {
1271b69827a3SChangpeng Liu 				SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr);
1272b69827a3SChangpeng Liu 				return -EFAULT;
1273b69827a3SChangpeng Liu 			}
1274b69827a3SChangpeng Liu 
1275b69827a3SChangpeng Liu 			SPDK_DEBUGLOG(nvme, "prp[%u] = %p\n", i - 1, (void *)phys_addr);
1276b69827a3SChangpeng Liu 			tr->u.prp[i - 1] = phys_addr;
1277b69827a3SChangpeng Liu 			seg_len = page_size;
1278b69827a3SChangpeng Liu 		}
1279b69827a3SChangpeng Liu 
1280b69827a3SChangpeng Liu 		seg_len = spdk_min(seg_len, len);
1281075d422fSKonrad Sztyber 		virt_addr = (uint8_t *)virt_addr + seg_len;
1282b69827a3SChangpeng Liu 		len -= seg_len;
1283b69827a3SChangpeng Liu 		i++;
1284b69827a3SChangpeng Liu 	}
1285b69827a3SChangpeng Liu 
1286b69827a3SChangpeng Liu 	cmd->psdt = SPDK_NVME_PSDT_PRP;
1287b69827a3SChangpeng Liu 	if (i <= 1) {
1288b69827a3SChangpeng Liu 		cmd->dptr.prp.prp2 = 0;
1289b69827a3SChangpeng Liu 	} else if (i == 2) {
1290b69827a3SChangpeng Liu 		cmd->dptr.prp.prp2 = tr->u.prp[0];
1291b69827a3SChangpeng Liu 		SPDK_DEBUGLOG(nvme, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2);
1292b69827a3SChangpeng Liu 	} else {
1293b69827a3SChangpeng Liu 		cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr;
1294b69827a3SChangpeng Liu 		SPDK_DEBUGLOG(nvme, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2);
1295b69827a3SChangpeng Liu 	}
1296b69827a3SChangpeng Liu 
1297b69827a3SChangpeng Liu 	*prp_index = i;
1298b69827a3SChangpeng Liu 	return 0;
1299b69827a3SChangpeng Liu }
1300b69827a3SChangpeng Liu 
1301b69827a3SChangpeng Liu static int
1302b69827a3SChangpeng Liu nvme_pcie_qpair_build_request_invalid(struct spdk_nvme_qpair *qpair,
1303b69827a3SChangpeng Liu 				      struct nvme_request *req, struct nvme_tracker *tr, bool dword_aligned)
1304b69827a3SChangpeng Liu {
1305b69827a3SChangpeng Liu 	assert(0);
1306b69827a3SChangpeng Liu 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1307b69827a3SChangpeng Liu 	return -EINVAL;
1308b69827a3SChangpeng Liu }
1309b69827a3SChangpeng Liu 
1310b69827a3SChangpeng Liu /**
1311b69827a3SChangpeng Liu  * Build PRP list describing physically contiguous payload buffer.
1312b69827a3SChangpeng Liu  */
1313b69827a3SChangpeng Liu static int
1314b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1315b69827a3SChangpeng Liu 				     struct nvme_tracker *tr, bool dword_aligned)
1316b69827a3SChangpeng Liu {
1317b69827a3SChangpeng Liu 	uint32_t prp_index = 0;
1318b69827a3SChangpeng Liu 	int rc;
1319b69827a3SChangpeng Liu 
1320d4844d5bSChangpeng Liu 	rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index,
1321075d422fSKonrad Sztyber 				       (uint8_t *)req->payload.contig_or_cb_arg + req->payload_offset,
1322b69827a3SChangpeng Liu 				       req->payload_size, qpair->ctrlr->page_size);
1323b69827a3SChangpeng Liu 	if (rc) {
1324b69827a3SChangpeng Liu 		nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1325a1a2e2b4SVincent Fu 	} else {
1326a1a2e2b4SVincent Fu 		SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index);
1327b69827a3SChangpeng Liu 	}
1328b69827a3SChangpeng Liu 
1329b69827a3SChangpeng Liu 	return rc;
1330b69827a3SChangpeng Liu }
1331b69827a3SChangpeng Liu 
1332b69827a3SChangpeng Liu /**
1333b69827a3SChangpeng Liu  * Build an SGL describing a physically contiguous payload buffer.
1334b69827a3SChangpeng Liu  *
1335b69827a3SChangpeng Liu  * This is more efficient than using PRP because large buffers can be
1336b69827a3SChangpeng Liu  * described this way.
1337b69827a3SChangpeng Liu  */
1338b69827a3SChangpeng Liu static int
1339b69827a3SChangpeng Liu nvme_pcie_qpair_build_contig_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1340b69827a3SChangpeng Liu 		struct nvme_tracker *tr, bool dword_aligned)
1341b69827a3SChangpeng Liu {
1342075d422fSKonrad Sztyber 	uint8_t *virt_addr;
1343b69827a3SChangpeng Liu 	uint64_t phys_addr, mapping_length;
1344b69827a3SChangpeng Liu 	uint32_t length;
1345b69827a3SChangpeng Liu 	struct spdk_nvme_sgl_descriptor *sgl;
1346b69827a3SChangpeng Liu 	uint32_t nseg = 0;
1347b69827a3SChangpeng Liu 
1348b69827a3SChangpeng Liu 	assert(req->payload_size != 0);
1349b69827a3SChangpeng Liu 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
1350b69827a3SChangpeng Liu 
1351b69827a3SChangpeng Liu 	sgl = tr->u.sgl;
1352b69827a3SChangpeng Liu 	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
1353b69827a3SChangpeng Liu 	req->cmd.dptr.sgl1.unkeyed.subtype = 0;
1354b69827a3SChangpeng Liu 
1355b69827a3SChangpeng Liu 	length = req->payload_size;
1356075d422fSKonrad Sztyber 	/* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL,
1357075d422fSKonrad Sztyber 	 * so just double cast it to make it go away */
1358075d422fSKonrad Sztyber 	virt_addr = (uint8_t *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
1359b69827a3SChangpeng Liu 
1360b69827a3SChangpeng Liu 	while (length > 0) {
1361b69827a3SChangpeng Liu 		if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1362b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1363b69827a3SChangpeng Liu 			return -EFAULT;
1364b69827a3SChangpeng Liu 		}
1365b69827a3SChangpeng Liu 
1366b69827a3SChangpeng Liu 		if (dword_aligned && ((uintptr_t)virt_addr & 3)) {
1367b69827a3SChangpeng Liu 			SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1368b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1369b69827a3SChangpeng Liu 			return -EFAULT;
1370b69827a3SChangpeng Liu 		}
1371b69827a3SChangpeng Liu 
1372b69827a3SChangpeng Liu 		mapping_length = length;
1373d4844d5bSChangpeng Liu 		phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length);
1374b69827a3SChangpeng Liu 		if (phys_addr == SPDK_VTOPHYS_ERROR) {
1375b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1376b69827a3SChangpeng Liu 			return -EFAULT;
1377b69827a3SChangpeng Liu 		}
1378b69827a3SChangpeng Liu 
1379b69827a3SChangpeng Liu 		mapping_length = spdk_min(length, mapping_length);
1380b69827a3SChangpeng Liu 
1381b69827a3SChangpeng Liu 		length -= mapping_length;
1382b69827a3SChangpeng Liu 		virt_addr += mapping_length;
1383b69827a3SChangpeng Liu 
1384b69827a3SChangpeng Liu 		sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1385b69827a3SChangpeng Liu 		sgl->unkeyed.length = mapping_length;
1386b69827a3SChangpeng Liu 		sgl->address = phys_addr;
1387b69827a3SChangpeng Liu 		sgl->unkeyed.subtype = 0;
1388b69827a3SChangpeng Liu 
1389b69827a3SChangpeng Liu 		sgl++;
1390b69827a3SChangpeng Liu 		nseg++;
1391b69827a3SChangpeng Liu 	}
1392b69827a3SChangpeng Liu 
1393b69827a3SChangpeng Liu 	if (nseg == 1) {
1394b69827a3SChangpeng Liu 		/*
1395b69827a3SChangpeng Liu 		 * The whole transfer can be described by a single SGL descriptor.
1396b69827a3SChangpeng Liu 		 *  Use the special case described by the spec where SGL1's type is Data Block.
1397b69827a3SChangpeng Liu 		 *  This means the SGL in the tracker is not used at all, so copy the first (and only)
1398b69827a3SChangpeng Liu 		 *  SGL element into SGL1.
1399b69827a3SChangpeng Liu 		 */
1400b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1401b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
1402b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
1403b69827a3SChangpeng Liu 	} else {
1404b69827a3SChangpeng Liu 		/* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because
1405b69827a3SChangpeng Liu 		 *  NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page.
1406b69827a3SChangpeng Liu 		 */
1407b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1408b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
1409b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor);
1410b69827a3SChangpeng Liu 	}
1411b69827a3SChangpeng Liu 
1412a1a2e2b4SVincent Fu 	SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg);
1413b69827a3SChangpeng Liu 	return 0;
1414b69827a3SChangpeng Liu }
1415b69827a3SChangpeng Liu 
1416b69827a3SChangpeng Liu /**
1417b69827a3SChangpeng Liu  * Build SGL list describing scattered payload buffer.
1418b69827a3SChangpeng Liu  */
1419b69827a3SChangpeng Liu static int
1420b69827a3SChangpeng Liu nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1421b69827a3SChangpeng Liu 				     struct nvme_tracker *tr, bool dword_aligned)
1422b69827a3SChangpeng Liu {
1423b69827a3SChangpeng Liu 	int rc;
1424b69827a3SChangpeng Liu 	void *virt_addr;
1425b69827a3SChangpeng Liu 	uint64_t phys_addr, mapping_length;
1426b69827a3SChangpeng Liu 	uint32_t remaining_transfer_len, remaining_user_sge_len, length;
1427b69827a3SChangpeng Liu 	struct spdk_nvme_sgl_descriptor *sgl;
1428b69827a3SChangpeng Liu 	uint32_t nseg = 0;
1429e431ba2eSVincent Fu 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
1430b69827a3SChangpeng Liu 
1431b69827a3SChangpeng Liu 	/*
1432b69827a3SChangpeng Liu 	 * Build scattered payloads.
1433b69827a3SChangpeng Liu 	 */
1434b69827a3SChangpeng Liu 	assert(req->payload_size != 0);
1435b69827a3SChangpeng Liu 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1436b69827a3SChangpeng Liu 	assert(req->payload.reset_sgl_fn != NULL);
1437b69827a3SChangpeng Liu 	assert(req->payload.next_sge_fn != NULL);
1438b69827a3SChangpeng Liu 	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
1439b69827a3SChangpeng Liu 
1440b69827a3SChangpeng Liu 	sgl = tr->u.sgl;
1441b69827a3SChangpeng Liu 	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
1442b69827a3SChangpeng Liu 	req->cmd.dptr.sgl1.unkeyed.subtype = 0;
1443b69827a3SChangpeng Liu 
1444b69827a3SChangpeng Liu 	remaining_transfer_len = req->payload_size;
1445b69827a3SChangpeng Liu 
1446b69827a3SChangpeng Liu 	while (remaining_transfer_len > 0) {
1447b69827a3SChangpeng Liu 		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg,
1448b69827a3SChangpeng Liu 					      &virt_addr, &remaining_user_sge_len);
1449b69827a3SChangpeng Liu 		if (rc) {
1450b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1451b69827a3SChangpeng Liu 			return -EFAULT;
1452b69827a3SChangpeng Liu 		}
1453b69827a3SChangpeng Liu 
1454b69827a3SChangpeng Liu 		/* Bit Bucket SGL descriptor */
1455b69827a3SChangpeng Liu 		if ((uint64_t)virt_addr == UINT64_MAX) {
1456b69827a3SChangpeng Liu 			/* TODO: enable WRITE and COMPARE when necessary */
1457b69827a3SChangpeng Liu 			if (req->cmd.opc != SPDK_NVME_OPC_READ) {
1458b69827a3SChangpeng Liu 				SPDK_ERRLOG("Only READ command can be supported\n");
1459b69827a3SChangpeng Liu 				goto exit;
1460b69827a3SChangpeng Liu 			}
1461b69827a3SChangpeng Liu 			if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1462b69827a3SChangpeng Liu 				SPDK_ERRLOG("Too many SGL entries\n");
1463b69827a3SChangpeng Liu 				goto exit;
1464b69827a3SChangpeng Liu 			}
1465b69827a3SChangpeng Liu 
1466b69827a3SChangpeng Liu 			sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_BIT_BUCKET;
1467b69827a3SChangpeng Liu 			/* If the SGL describes a destination data buffer, the length of data
1468b69827a3SChangpeng Liu 			 * buffer shall be discarded by controller, and the length is included
1469b69827a3SChangpeng Liu 			 * in Number of Logical Blocks (NLB) parameter. Otherwise, the length
1470b69827a3SChangpeng Liu 			 * is not included in the NLB parameter.
1471b69827a3SChangpeng Liu 			 */
1472b69827a3SChangpeng Liu 			remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len);
1473b69827a3SChangpeng Liu 			remaining_transfer_len -= remaining_user_sge_len;
1474b69827a3SChangpeng Liu 
1475b69827a3SChangpeng Liu 			sgl->unkeyed.length = remaining_user_sge_len;
1476b69827a3SChangpeng Liu 			sgl->address = 0;
1477b69827a3SChangpeng Liu 			sgl->unkeyed.subtype = 0;
1478b69827a3SChangpeng Liu 
1479b69827a3SChangpeng Liu 			sgl++;
1480b69827a3SChangpeng Liu 			nseg++;
1481b69827a3SChangpeng Liu 
1482b69827a3SChangpeng Liu 			continue;
1483b69827a3SChangpeng Liu 		}
1484b69827a3SChangpeng Liu 
1485b69827a3SChangpeng Liu 		remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len);
1486b69827a3SChangpeng Liu 		remaining_transfer_len -= remaining_user_sge_len;
1487b69827a3SChangpeng Liu 		while (remaining_user_sge_len > 0) {
1488b69827a3SChangpeng Liu 			if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1489b69827a3SChangpeng Liu 				SPDK_ERRLOG("Too many SGL entries\n");
1490b69827a3SChangpeng Liu 				goto exit;
1491b69827a3SChangpeng Liu 			}
1492b69827a3SChangpeng Liu 
1493b69827a3SChangpeng Liu 			if (dword_aligned && ((uintptr_t)virt_addr & 3)) {
1494b69827a3SChangpeng Liu 				SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1495b69827a3SChangpeng Liu 				goto exit;
1496b69827a3SChangpeng Liu 			}
1497b69827a3SChangpeng Liu 
1498b69827a3SChangpeng Liu 			mapping_length = remaining_user_sge_len;
1499d4844d5bSChangpeng Liu 			phys_addr = nvme_pcie_vtophys(qpair->ctrlr, virt_addr, &mapping_length);
1500b69827a3SChangpeng Liu 			if (phys_addr == SPDK_VTOPHYS_ERROR) {
1501b69827a3SChangpeng Liu 				goto exit;
1502b69827a3SChangpeng Liu 			}
1503b69827a3SChangpeng Liu 
1504b69827a3SChangpeng Liu 			length = spdk_min(remaining_user_sge_len, mapping_length);
1505b69827a3SChangpeng Liu 			remaining_user_sge_len -= length;
1506075d422fSKonrad Sztyber 			virt_addr = (uint8_t *)virt_addr + length;
1507b69827a3SChangpeng Liu 
1508e431ba2eSVincent Fu 			if (!pqpair->flags.disable_pcie_sgl_merge && nseg > 0 &&
1509e431ba2eSVincent Fu 			    phys_addr == (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) {
1510b69827a3SChangpeng Liu 				/* extend previous entry */
1511b69827a3SChangpeng Liu 				(*(sgl - 1)).unkeyed.length += length;
1512b69827a3SChangpeng Liu 				continue;
1513b69827a3SChangpeng Liu 			}
1514b69827a3SChangpeng Liu 
1515b69827a3SChangpeng Liu 			sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1516b69827a3SChangpeng Liu 			sgl->unkeyed.length = length;
1517b69827a3SChangpeng Liu 			sgl->address = phys_addr;
1518b69827a3SChangpeng Liu 			sgl->unkeyed.subtype = 0;
1519b69827a3SChangpeng Liu 
1520b69827a3SChangpeng Liu 			sgl++;
1521b69827a3SChangpeng Liu 			nseg++;
1522b69827a3SChangpeng Liu 		}
1523b69827a3SChangpeng Liu 	}
1524b69827a3SChangpeng Liu 
1525b69827a3SChangpeng Liu 	if (nseg == 1) {
1526b69827a3SChangpeng Liu 		/*
1527b69827a3SChangpeng Liu 		 * The whole transfer can be described by a single SGL descriptor.
1528b69827a3SChangpeng Liu 		 *  Use the special case described by the spec where SGL1's type is Data Block.
1529b69827a3SChangpeng Liu 		 *  This means the SGL in the tracker is not used at all, so copy the first (and only)
1530b69827a3SChangpeng Liu 		 *  SGL element into SGL1.
1531b69827a3SChangpeng Liu 		 */
1532b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1533b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
1534b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
1535b69827a3SChangpeng Liu 	} else {
1536b69827a3SChangpeng Liu 		/* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because
1537b69827a3SChangpeng Liu 		 *  NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page.
1538b69827a3SChangpeng Liu 		 */
1539b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1540b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
1541b69827a3SChangpeng Liu 		req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor);
1542b69827a3SChangpeng Liu 	}
1543b69827a3SChangpeng Liu 
1544a1a2e2b4SVincent Fu 	SPDK_DEBUGLOG(nvme, "Number of SGL descriptors: %" PRIu32 "\n", nseg);
1545b69827a3SChangpeng Liu 	return 0;
1546b69827a3SChangpeng Liu 
1547b69827a3SChangpeng Liu exit:
1548b69827a3SChangpeng Liu 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1549b69827a3SChangpeng Liu 	return -EFAULT;
1550b69827a3SChangpeng Liu }
1551b69827a3SChangpeng Liu 
1552b69827a3SChangpeng Liu /**
1553b69827a3SChangpeng Liu  * Build PRP list describing scattered payload buffer.
1554b69827a3SChangpeng Liu  */
1555b69827a3SChangpeng Liu static int
1556b69827a3SChangpeng Liu nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1557b69827a3SChangpeng Liu 				       struct nvme_tracker *tr, bool dword_aligned)
1558b69827a3SChangpeng Liu {
1559b69827a3SChangpeng Liu 	int rc;
1560b69827a3SChangpeng Liu 	void *virt_addr;
1561b69827a3SChangpeng Liu 	uint32_t remaining_transfer_len, length;
1562b69827a3SChangpeng Liu 	uint32_t prp_index = 0;
1563b69827a3SChangpeng Liu 	uint32_t page_size = qpair->ctrlr->page_size;
1564b69827a3SChangpeng Liu 
1565b69827a3SChangpeng Liu 	/*
1566b69827a3SChangpeng Liu 	 * Build scattered payloads.
1567b69827a3SChangpeng Liu 	 */
1568b69827a3SChangpeng Liu 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1569b69827a3SChangpeng Liu 	assert(req->payload.reset_sgl_fn != NULL);
1570b69827a3SChangpeng Liu 	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
1571b69827a3SChangpeng Liu 
1572b69827a3SChangpeng Liu 	remaining_transfer_len = req->payload_size;
1573b69827a3SChangpeng Liu 	while (remaining_transfer_len > 0) {
1574b69827a3SChangpeng Liu 		assert(req->payload.next_sge_fn != NULL);
1575b69827a3SChangpeng Liu 		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
1576b69827a3SChangpeng Liu 		if (rc) {
1577b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1578b69827a3SChangpeng Liu 			return -EFAULT;
1579b69827a3SChangpeng Liu 		}
1580b69827a3SChangpeng Liu 
1581b69827a3SChangpeng Liu 		length = spdk_min(remaining_transfer_len, length);
1582b69827a3SChangpeng Liu 
1583b69827a3SChangpeng Liu 		/*
1584b69827a3SChangpeng Liu 		 * Any incompatible sges should have been handled up in the splitting routine,
1585b69827a3SChangpeng Liu 		 *  but assert here as an additional check.
1586b69827a3SChangpeng Liu 		 *
1587b69827a3SChangpeng Liu 		 * All SGEs except last must end on a page boundary.
1588b69827a3SChangpeng Liu 		 */
1589b69827a3SChangpeng Liu 		assert((length == remaining_transfer_len) ||
1590b69827a3SChangpeng Liu 		       _is_page_aligned((uintptr_t)virt_addr + length, page_size));
1591b69827a3SChangpeng Liu 
1592d4844d5bSChangpeng Liu 		rc = nvme_pcie_prp_list_append(qpair->ctrlr, tr, &prp_index, virt_addr, length, page_size);
1593b69827a3SChangpeng Liu 		if (rc) {
1594b69827a3SChangpeng Liu 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1595b69827a3SChangpeng Liu 			return rc;
1596b69827a3SChangpeng Liu 		}
1597b69827a3SChangpeng Liu 
1598b69827a3SChangpeng Liu 		remaining_transfer_len -= length;
1599b69827a3SChangpeng Liu 	}
1600b69827a3SChangpeng Liu 
1601a1a2e2b4SVincent Fu 	SPDK_DEBUGLOG(nvme, "Number of PRP entries: %" PRIu32 "\n", prp_index);
1602b69827a3SChangpeng Liu 	return 0;
1603b69827a3SChangpeng Liu }
1604b69827a3SChangpeng Liu 
1605b69827a3SChangpeng Liu typedef int(*build_req_fn)(struct spdk_nvme_qpair *, struct nvme_request *, struct nvme_tracker *,
1606b69827a3SChangpeng Liu 			   bool);
1607b69827a3SChangpeng Liu 
1608b69827a3SChangpeng Liu static build_req_fn const g_nvme_pcie_build_req_table[][2] = {
1609b69827a3SChangpeng Liu 	[NVME_PAYLOAD_TYPE_INVALID] = {
1610b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_request_invalid,			/* PRP */
1611b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_request_invalid			/* SGL */
1612b69827a3SChangpeng Liu 	},
1613b69827a3SChangpeng Liu 	[NVME_PAYLOAD_TYPE_CONTIG] = {
1614b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_contig_request,			/* PRP */
1615b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_contig_hw_sgl_request		/* SGL */
1616b69827a3SChangpeng Liu 	},
1617b69827a3SChangpeng Liu 	[NVME_PAYLOAD_TYPE_SGL] = {
1618b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_prps_sgl_request,			/* PRP */
1619b69827a3SChangpeng Liu 		nvme_pcie_qpair_build_hw_sgl_request			/* SGL */
1620b69827a3SChangpeng Liu 	}
1621b69827a3SChangpeng Liu };
1622b69827a3SChangpeng Liu 
1623b69827a3SChangpeng Liu static int
1624b69827a3SChangpeng Liu nvme_pcie_qpair_build_metadata(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
1625074c62d0SMarcin Spiewak 			       bool sgl_supported, bool mptr_sgl_supported, bool dword_aligned)
1626b69827a3SChangpeng Liu {
1627b69827a3SChangpeng Liu 	void *md_payload;
1628b69827a3SChangpeng Liu 	struct nvme_request *req = tr->req;
16292be196c6SJim Harris 	uint64_t mapping_length;
1630b69827a3SChangpeng Liu 
1631b69827a3SChangpeng Liu 	if (req->payload.md) {
1632075d422fSKonrad Sztyber 		md_payload = (uint8_t *)req->payload.md + req->md_offset;
1633b69827a3SChangpeng Liu 		if (dword_aligned && ((uintptr_t)md_payload & 3)) {
1634b69827a3SChangpeng Liu 			SPDK_ERRLOG("virt_addr %p not dword aligned\n", md_payload);
1635b69827a3SChangpeng Liu 			goto exit;
1636b69827a3SChangpeng Liu 		}
1637b69827a3SChangpeng Liu 
16382be196c6SJim Harris 		mapping_length = req->md_size;
1639074c62d0SMarcin Spiewak 		if (sgl_supported && mptr_sgl_supported && dword_aligned) {
1640b69827a3SChangpeng Liu 			assert(req->cmd.psdt == SPDK_NVME_PSDT_SGL_MPTR_CONTIG);
1641b69827a3SChangpeng Liu 			req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
16422be196c6SJim Harris 
16432be196c6SJim Harris 			tr->meta_sgl.address = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length);
16442be196c6SJim Harris 			if (tr->meta_sgl.address == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) {
1645b69827a3SChangpeng Liu 				goto exit;
1646b69827a3SChangpeng Liu 			}
1647b69827a3SChangpeng Liu 			tr->meta_sgl.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1648b69827a3SChangpeng Liu 			tr->meta_sgl.unkeyed.length = req->md_size;
1649b69827a3SChangpeng Liu 			tr->meta_sgl.unkeyed.subtype = 0;
1650b69827a3SChangpeng Liu 			req->cmd.mptr = tr->prp_sgl_bus_addr - sizeof(struct spdk_nvme_sgl_descriptor);
1651b69827a3SChangpeng Liu 		} else {
16522be196c6SJim Harris 			req->cmd.mptr = nvme_pcie_vtophys(qpair->ctrlr, md_payload, &mapping_length);
16532be196c6SJim Harris 			if (req->cmd.mptr == SPDK_VTOPHYS_ERROR || mapping_length != req->md_size) {
1654b69827a3SChangpeng Liu 				goto exit;
1655b69827a3SChangpeng Liu 			}
1656b69827a3SChangpeng Liu 		}
1657b69827a3SChangpeng Liu 	}
1658b69827a3SChangpeng Liu 
1659b69827a3SChangpeng Liu 	return 0;
1660b69827a3SChangpeng Liu 
1661b69827a3SChangpeng Liu exit:
1662b69827a3SChangpeng Liu 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1663b69827a3SChangpeng Liu 	return -EINVAL;
1664b69827a3SChangpeng Liu }
1665b69827a3SChangpeng Liu 
1666b69827a3SChangpeng Liu int
1667b69827a3SChangpeng Liu nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1668b69827a3SChangpeng Liu {
1669b69827a3SChangpeng Liu 	struct nvme_tracker	*tr;
1670b69827a3SChangpeng Liu 	int			rc = 0;
1671b69827a3SChangpeng Liu 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
1672b69827a3SChangpeng Liu 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
1673b69827a3SChangpeng Liu 	enum nvme_payload_type	payload_type;
1674b69827a3SChangpeng Liu 	bool			sgl_supported;
1675074c62d0SMarcin Spiewak 	bool			mptr_sgl_supported;
1676b69827a3SChangpeng Liu 	bool			dword_aligned = true;
1677b69827a3SChangpeng Liu 
1678b69827a3SChangpeng Liu 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
1679e10b4806SJim Harris 		nvme_ctrlr_lock(ctrlr);
1680b69827a3SChangpeng Liu 	}
1681b69827a3SChangpeng Liu 
1682b69827a3SChangpeng Liu 	tr = TAILQ_FIRST(&pqpair->free_tr);
1683b69827a3SChangpeng Liu 
1684b69827a3SChangpeng Liu 	if (tr == NULL) {
1685b69827a3SChangpeng Liu 		pqpair->stat->queued_requests++;
1686b69827a3SChangpeng Liu 		/* Inform the upper layer to try again later. */
1687b69827a3SChangpeng Liu 		rc = -EAGAIN;
1688b69827a3SChangpeng Liu 		goto exit;
1689b69827a3SChangpeng Liu 	}
1690b69827a3SChangpeng Liu 
1691b69827a3SChangpeng Liu 	pqpair->stat->submitted_requests++;
1692b69827a3SChangpeng Liu 	TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */
1693b69827a3SChangpeng Liu 	TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list);
169488739040SAtul Malakar 	pqpair->qpair.queue_depth++;
1695b69827a3SChangpeng Liu 	tr->req = req;
1696b69827a3SChangpeng Liu 	tr->cb_fn = req->cb_fn;
1697b69827a3SChangpeng Liu 	tr->cb_arg = req->cb_arg;
1698b69827a3SChangpeng Liu 	req->cmd.cid = tr->cid;
169974c16d8eSAlex Michon 	/* Use PRP by default. This bit will be overridden below if needed. */
170074c16d8eSAlex Michon 	req->cmd.psdt = SPDK_NVME_PSDT_PRP;
1701b69827a3SChangpeng Liu 
1702b69827a3SChangpeng Liu 	if (req->payload_size != 0) {
1703b69827a3SChangpeng Liu 		payload_type = nvme_payload_type(&req->payload);
1704b69827a3SChangpeng Liu 		/* According to the specification, PRPs shall be used for all
1705b69827a3SChangpeng Liu 		 *  Admin commands for NVMe over PCIe implementations.
1706b69827a3SChangpeng Liu 		 */
1707b69827a3SChangpeng Liu 		sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) != 0 &&
1708b69827a3SChangpeng Liu 				!nvme_qpair_is_admin_queue(qpair);
1709074c62d0SMarcin Spiewak 		mptr_sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED) != 0 &&
1710074c62d0SMarcin Spiewak 				     !nvme_qpair_is_admin_queue(qpair);
1711b69827a3SChangpeng Liu 
1712b69827a3SChangpeng Liu 		if (sgl_supported) {
1713b69827a3SChangpeng Liu 			/* Don't use SGL for DSM command */
1714b69827a3SChangpeng Liu 			if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_NO_SGL_FOR_DSM) &&
1715b69827a3SChangpeng Liu 					  (req->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT))) {
1716b69827a3SChangpeng Liu 				sgl_supported = false;
1717b69827a3SChangpeng Liu 			}
1718b69827a3SChangpeng Liu 		}
1719b69827a3SChangpeng Liu 
1720b69827a3SChangpeng Liu 		if (sgl_supported && !(ctrlr->flags & SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT)) {
1721b69827a3SChangpeng Liu 			dword_aligned = false;
1722b69827a3SChangpeng Liu 		}
172315b7d3baSJim Harris 
172415b7d3baSJim Harris 		/* If we fail to build the request or the metadata, do not return the -EFAULT back up
172515b7d3baSJim Harris 		 * the stack.  This ensures that we always fail these types of requests via a
172615b7d3baSJim Harris 		 * completion callback, and never in the context of the submission.
172715b7d3baSJim Harris 		 */
1728b69827a3SChangpeng Liu 		rc = g_nvme_pcie_build_req_table[payload_type][sgl_supported](qpair, req, tr, dword_aligned);
1729b69827a3SChangpeng Liu 		if (rc < 0) {
173015b7d3baSJim Harris 			assert(rc == -EFAULT);
173115b7d3baSJim Harris 			rc = 0;
1732b69827a3SChangpeng Liu 			goto exit;
1733b69827a3SChangpeng Liu 		}
1734b69827a3SChangpeng Liu 
1735074c62d0SMarcin Spiewak 		rc = nvme_pcie_qpair_build_metadata(qpair, tr, sgl_supported, mptr_sgl_supported, dword_aligned);
1736b69827a3SChangpeng Liu 		if (rc < 0) {
173715b7d3baSJim Harris 			assert(rc == -EFAULT);
173815b7d3baSJim Harris 			rc = 0;
1739b69827a3SChangpeng Liu 			goto exit;
1740b69827a3SChangpeng Liu 		}
1741b69827a3SChangpeng Liu 	}
1742b69827a3SChangpeng Liu 
1743b69827a3SChangpeng Liu 	nvme_pcie_qpair_submit_tracker(qpair, tr);
1744b69827a3SChangpeng Liu 
1745b69827a3SChangpeng Liu exit:
1746b69827a3SChangpeng Liu 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
1747e10b4806SJim Harris 		nvme_ctrlr_unlock(ctrlr);
1748b69827a3SChangpeng Liu 	}
1749b69827a3SChangpeng Liu 
1750b69827a3SChangpeng Liu 	return rc;
1751b69827a3SChangpeng Liu }
1752b69827a3SChangpeng Liu 
17536b4b2d29SChangpeng Liu struct spdk_nvme_transport_poll_group *
17546b4b2d29SChangpeng Liu nvme_pcie_poll_group_create(void)
17556b4b2d29SChangpeng Liu {
17566b4b2d29SChangpeng Liu 	struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group));
17576b4b2d29SChangpeng Liu 
17586b4b2d29SChangpeng Liu 	if (group == NULL) {
17596b4b2d29SChangpeng Liu 		SPDK_ERRLOG("Unable to allocate poll group.\n");
17606b4b2d29SChangpeng Liu 		return NULL;
17616b4b2d29SChangpeng Liu 	}
17626b4b2d29SChangpeng Liu 
17636b4b2d29SChangpeng Liu 	return &group->group;
17646b4b2d29SChangpeng Liu }
17656b4b2d29SChangpeng Liu 
17666b4b2d29SChangpeng Liu int
17676b4b2d29SChangpeng Liu nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
17686b4b2d29SChangpeng Liu {
17696b4b2d29SChangpeng Liu 	return 0;
17706b4b2d29SChangpeng Liu }
17716b4b2d29SChangpeng Liu 
17726b4b2d29SChangpeng Liu int
17736b4b2d29SChangpeng Liu nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
17746b4b2d29SChangpeng Liu {
17756b4b2d29SChangpeng Liu 	return 0;
17766b4b2d29SChangpeng Liu }
17776b4b2d29SChangpeng Liu 
17786b4b2d29SChangpeng Liu int
17796b4b2d29SChangpeng Liu nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
17806b4b2d29SChangpeng Liu 			 struct spdk_nvme_qpair *qpair)
17816b4b2d29SChangpeng Liu {
17826b4b2d29SChangpeng Liu 	return 0;
17836b4b2d29SChangpeng Liu }
17846b4b2d29SChangpeng Liu 
17856b4b2d29SChangpeng Liu int
17866b4b2d29SChangpeng Liu nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
17876b4b2d29SChangpeng Liu 			    struct spdk_nvme_qpair *qpair)
17886b4b2d29SChangpeng Liu {
1789ea2db5bbSShuhei Matsumoto 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
1790ea2db5bbSShuhei Matsumoto 
1791ea2db5bbSShuhei Matsumoto 	pqpair->stat = &g_dummy_stat;
17926b4b2d29SChangpeng Liu 	return 0;
17936b4b2d29SChangpeng Liu }
17946b4b2d29SChangpeng Liu 
17956b4b2d29SChangpeng Liu int64_t
17966b4b2d29SChangpeng Liu nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
17976b4b2d29SChangpeng Liu 		uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
17986b4b2d29SChangpeng Liu {
17996b4b2d29SChangpeng Liu 	struct spdk_nvme_qpair *qpair, *tmp_qpair;
18006b4b2d29SChangpeng Liu 	int32_t local_completions = 0;
18016b4b2d29SChangpeng Liu 	int64_t total_completions = 0;
18026b4b2d29SChangpeng Liu 
18036b4b2d29SChangpeng Liu 	STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
18046b4b2d29SChangpeng Liu 		disconnected_qpair_cb(qpair, tgroup->group->ctx);
18056b4b2d29SChangpeng Liu 	}
18066b4b2d29SChangpeng Liu 
18076b4b2d29SChangpeng Liu 	STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) {
18086b4b2d29SChangpeng Liu 		local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair);
180975d38a30SShuhei Matsumoto 		if (spdk_unlikely(local_completions < 0)) {
18106b4b2d29SChangpeng Liu 			disconnected_qpair_cb(qpair, tgroup->group->ctx);
181175d38a30SShuhei Matsumoto 			total_completions = -ENXIO;
181275d38a30SShuhei Matsumoto 		} else if (spdk_likely(total_completions >= 0)) {
18136b4b2d29SChangpeng Liu 			total_completions += local_completions;
18146b4b2d29SChangpeng Liu 		}
181575d38a30SShuhei Matsumoto 	}
18166b4b2d29SChangpeng Liu 
18176b4b2d29SChangpeng Liu 	return total_completions;
18186b4b2d29SChangpeng Liu }
18196b4b2d29SChangpeng Liu 
1820*1efa1b16SAnkit Kumar void
1821*1efa1b16SAnkit Kumar nvme_pcie_poll_group_check_disconnected_qpairs(struct spdk_nvme_transport_poll_group *tgroup,
1822*1efa1b16SAnkit Kumar 		spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
1823*1efa1b16SAnkit Kumar {
1824*1efa1b16SAnkit Kumar 	struct spdk_nvme_qpair *qpair, *tmp_qpair;
1825*1efa1b16SAnkit Kumar 
1826*1efa1b16SAnkit Kumar 	STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
1827*1efa1b16SAnkit Kumar 		disconnected_qpair_cb(qpair, tgroup->group->ctx);
1828*1efa1b16SAnkit Kumar 	}
1829*1efa1b16SAnkit Kumar }
1830*1efa1b16SAnkit Kumar 
18316b4b2d29SChangpeng Liu int
18326b4b2d29SChangpeng Liu nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
18336b4b2d29SChangpeng Liu {
18346b4b2d29SChangpeng Liu 	if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
18356b4b2d29SChangpeng Liu 		return -EBUSY;
18366b4b2d29SChangpeng Liu 	}
18376b4b2d29SChangpeng Liu 
18386b4b2d29SChangpeng Liu 	free(tgroup);
18396b4b2d29SChangpeng Liu 
18406b4b2d29SChangpeng Liu 	return 0;
18416b4b2d29SChangpeng Liu }
1842971f07b9SJim Harris 
1843c88345abSChangpeng Liu int
1844c88345abSChangpeng Liu nvme_pcie_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup,
1845c88345abSChangpeng Liu 			       struct spdk_nvme_transport_poll_group_stat **_stats)
1846c88345abSChangpeng Liu {
1847c88345abSChangpeng Liu 	struct nvme_pcie_poll_group *group;
1848c88345abSChangpeng Liu 	struct spdk_nvme_transport_poll_group_stat *stats;
1849c88345abSChangpeng Liu 
1850c88345abSChangpeng Liu 	if (tgroup == NULL || _stats == NULL) {
1851c88345abSChangpeng Liu 		SPDK_ERRLOG("Invalid stats or group pointer\n");
1852c88345abSChangpeng Liu 		return -EINVAL;
1853c88345abSChangpeng Liu 	}
1854c88345abSChangpeng Liu 
1855c88345abSChangpeng Liu 	stats = calloc(1, sizeof(*stats));
1856c88345abSChangpeng Liu 	if (!stats) {
1857e9ff4753SJim Harris 		SPDK_ERRLOG("Can't allocate memory for stats\n");
1858c88345abSChangpeng Liu 		return -ENOMEM;
1859c88345abSChangpeng Liu 	}
1860c88345abSChangpeng Liu 	stats->trtype = SPDK_NVME_TRANSPORT_PCIE;
1861b813f998Syidong0635 	group = SPDK_CONTAINEROF(tgroup, struct nvme_pcie_poll_group, group);
1862c88345abSChangpeng Liu 	memcpy(&stats->pcie, &group->stats, sizeof(group->stats));
1863c88345abSChangpeng Liu 
1864c88345abSChangpeng Liu 	*_stats = stats;
1865c88345abSChangpeng Liu 
1866c88345abSChangpeng Liu 	return 0;
1867c88345abSChangpeng Liu }
1868c88345abSChangpeng Liu 
1869c88345abSChangpeng Liu void
1870c88345abSChangpeng Liu nvme_pcie_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
1871c88345abSChangpeng Liu 				struct spdk_nvme_transport_poll_group_stat *stats)
1872c88345abSChangpeng Liu {
1873c88345abSChangpeng Liu 	free(stats);
1874c88345abSChangpeng Liu }
1875c88345abSChangpeng Liu 
18760eae0106SJim Harris static void
18770eae0106SJim Harris nvme_pcie_trace(void)
1878971f07b9SJim Harris {
1879971f07b9SJim Harris 	struct spdk_trace_tpoint_opts opts[] = {
1880971f07b9SJim Harris 		{
1881971f07b9SJim Harris 			"NVME_PCIE_SUBMIT", TRACE_NVME_PCIE_SUBMIT,
188226d44a12SJim Harris 			OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 1,
1883e36f0d36SJim Harris 			{	{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
1884e36f0d36SJim Harris 				{ "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
18857b05b29dSJim Harris 				{ "opc", SPDK_TRACE_ARG_TYPE_INT, 4 },
18867b05b29dSJim Harris 				{ "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 },
18877b05b29dSJim Harris 				{ "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 },
188888739040SAtul Malakar 				{ "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 },
188988739040SAtul Malakar 				{ "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
1890971f07b9SJim Harris 			}
1891971f07b9SJim Harris 		},
1892971f07b9SJim Harris 		{
1893971f07b9SJim Harris 			"NVME_PCIE_COMPLETE", TRACE_NVME_PCIE_COMPLETE,
189426d44a12SJim Harris 			OWNER_TYPE_NVME_PCIE_QP, OBJECT_NVME_PCIE_REQ, 0,
1895e36f0d36SJim Harris 			{	{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
1896e36f0d36SJim Harris 				{ "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
189788739040SAtul Malakar 				{ "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 },
189888739040SAtul Malakar 				{ "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
189997661e86SJim Harris 			}
1900971f07b9SJim Harris 		},
1901971f07b9SJim Harris 	};
1902971f07b9SJim Harris 
1903b0396da0SJim Harris 	spdk_trace_register_object(OBJECT_NVME_PCIE_REQ, 'p');
190426d44a12SJim Harris 	spdk_trace_register_owner_type(OWNER_TYPE_NVME_PCIE_QP, 'q');
1905971f07b9SJim Harris 	spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
1906971f07b9SJim Harris }
19070eae0106SJim Harris SPDK_TRACE_REGISTER_FN(nvme_pcie_trace, "nvme_pcie", TRACE_GROUP_NVME_PCIE)
1908