xref: /spdk/lib/nvme/nvme_pcie.c (revision 32999ab917f67af61872f868585fd3d78ad6fb8a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2017, IBM Corporation. All rights reserved.
6  *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * NVMe over PCIe transport
37  */
38 
39 #include "spdk/stdinc.h"
40 #include "spdk/env.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "nvme_internal.h"
44 #include "nvme_pcie_internal.h"
45 
46 struct nvme_pcie_enum_ctx {
47 	struct spdk_nvme_probe_ctx *probe_ctx;
48 	struct spdk_pci_addr pci_addr;
49 	bool has_pci_addr;
50 };
51 
52 static uint16_t g_signal_lock;
53 static bool g_sigset = false;
54 static spdk_nvme_pcie_hotplug_filter_cb g_hotplug_filter_cb;
55 
56 static void
57 nvme_sigbus_fault_sighandler(siginfo_t *info, void *ctx)
58 {
59 	void *map_address;
60 	uint16_t flag = 0;
61 
62 	if (!__atomic_compare_exchange_n(&g_signal_lock, &flag, 1, false, __ATOMIC_ACQUIRE,
63 					 __ATOMIC_RELAXED)) {
64 		SPDK_DEBUGLOG(nvme, "request g_signal_lock failed\n");
65 		return;
66 	}
67 
68 	if (g_thread_mmio_ctrlr == NULL) {
69 		return;
70 	}
71 
72 	if (!g_thread_mmio_ctrlr->is_remapped) {
73 		map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size,
74 				   PROT_READ | PROT_WRITE,
75 				   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
76 		if (map_address == MAP_FAILED) {
77 			SPDK_ERRLOG("mmap failed\n");
78 			__atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE);
79 			return;
80 		}
81 		memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers));
82 		g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address;
83 		g_thread_mmio_ctrlr->is_remapped = true;
84 	}
85 	__atomic_store_n(&g_signal_lock, 0, __ATOMIC_RELEASE);
86 }
87 
88 static void
89 _nvme_pcie_event_process(struct spdk_pci_event *event, void *cb_ctx)
90 {
91 	struct spdk_nvme_transport_id trid;
92 	struct spdk_nvme_ctrlr *ctrlr;
93 
94 	if (event->action == SPDK_UEVENT_ADD) {
95 		if (spdk_process_is_primary()) {
96 			if (g_hotplug_filter_cb == NULL || g_hotplug_filter_cb(&event->traddr)) {
97 				/* The enumerate interface implement the add operation */
98 				spdk_pci_device_allow(&event->traddr);
99 			}
100 		}
101 	} else if (event->action == SPDK_UEVENT_REMOVE) {
102 		memset(&trid, 0, sizeof(trid));
103 		spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE);
104 
105 		if (spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &event->traddr) < 0) {
106 			SPDK_ERRLOG("Failed to format pci address\n");
107 			return;
108 		}
109 
110 		ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid);
111 		if (ctrlr == NULL) {
112 			return;
113 		}
114 		SPDK_DEBUGLOG(nvme, "remove nvme address: %s\n", trid.traddr);
115 
116 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
117 		nvme_ctrlr_fail(ctrlr, true);
118 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
119 
120 		/* get the user app to clean up and stop I/O */
121 		if (ctrlr->remove_cb) {
122 			nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
123 			ctrlr->remove_cb(cb_ctx, ctrlr);
124 			nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
125 		}
126 	}
127 }
128 
129 static int
130 _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
131 {
132 	struct spdk_nvme_ctrlr *ctrlr, *tmp;
133 	struct spdk_pci_event event;
134 
135 	if (g_spdk_nvme_driver->hotplug_fd < 0) {
136 		return 0;
137 	}
138 
139 	while (spdk_pci_get_event(g_spdk_nvme_driver->hotplug_fd, &event) > 0) {
140 		_nvme_pcie_event_process(&event, probe_ctx->cb_ctx);
141 	}
142 
143 	/* Initiate removal of physically hotremoved PCI controllers. Even after
144 	 * they're hotremoved from the system, SPDK might still report them via RPC.
145 	 */
146 	TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) {
147 		bool do_remove = false;
148 		struct nvme_pcie_ctrlr *pctrlr;
149 
150 		if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
151 			continue;
152 		}
153 
154 		pctrlr = nvme_pcie_ctrlr(ctrlr);
155 		if (spdk_pci_device_is_removed(pctrlr->devhandle)) {
156 			do_remove = true;
157 		}
158 
159 		if (do_remove) {
160 			nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
161 			nvme_ctrlr_fail(ctrlr, true);
162 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
163 			if (ctrlr->remove_cb) {
164 				nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
165 				ctrlr->remove_cb(ctrlr->cb_ctx, ctrlr);
166 				nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
167 			}
168 		}
169 	}
170 	return 0;
171 }
172 
173 static volatile void *
174 nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset)
175 {
176 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
177 
178 	return (volatile void *)((uintptr_t)pctrlr->regs + offset);
179 }
180 
181 static int
182 nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
183 {
184 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
185 
186 	assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
187 	g_thread_mmio_ctrlr = pctrlr;
188 	spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value);
189 	g_thread_mmio_ctrlr = NULL;
190 	return 0;
191 }
192 
193 static int
194 nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
195 {
196 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
197 
198 	assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
199 	g_thread_mmio_ctrlr = pctrlr;
200 	spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value);
201 	g_thread_mmio_ctrlr = NULL;
202 	return 0;
203 }
204 
205 static int
206 nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
207 {
208 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
209 
210 	assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
211 	assert(value != NULL);
212 	g_thread_mmio_ctrlr = pctrlr;
213 	*value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset));
214 	g_thread_mmio_ctrlr = NULL;
215 	if (~(*value) == 0) {
216 		return -1;
217 	}
218 
219 	return 0;
220 }
221 
222 static int
223 nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
224 {
225 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
226 
227 	assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
228 	assert(value != NULL);
229 	g_thread_mmio_ctrlr = pctrlr;
230 	*value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset));
231 	g_thread_mmio_ctrlr = NULL;
232 	if (~(*value) == 0) {
233 		return -1;
234 	}
235 
236 	return 0;
237 }
238 
239 static int
240 nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value)
241 {
242 	return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq),
243 					 value);
244 }
245 
246 static int
247 nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value)
248 {
249 	return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq),
250 					 value);
251 }
252 
253 static int
254 nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa)
255 {
256 	return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw),
257 					 aqa->raw);
258 }
259 
260 static int
261 nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc)
262 {
263 	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw),
264 					 &cmbloc->raw);
265 }
266 
267 static int
268 nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz)
269 {
270 	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
271 					 &cmbsz->raw);
272 }
273 
274 static int
275 nvme_pcie_ctrlr_get_pmrcap(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrcap_register *pmrcap)
276 {
277 	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw),
278 					 &pmrcap->raw);
279 }
280 
281 static int
282 nvme_pcie_ctrlr_set_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl)
283 {
284 	return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw),
285 					 pmrctl->raw);
286 }
287 
288 static int
289 nvme_pcie_ctrlr_get_pmrctl(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrctl_register *pmrctl)
290 {
291 	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrctl.raw),
292 					 &pmrctl->raw);
293 }
294 
295 static int
296 nvme_pcie_ctrlr_get_pmrsts(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_pmrsts_register *pmrsts)
297 {
298 	return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrsts.raw),
299 					 &pmrsts->raw);
300 }
301 
302 static int
303 nvme_pcie_ctrlr_set_pmrmscl(struct nvme_pcie_ctrlr *pctrlr, uint32_t value)
304 {
305 	return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscl.raw),
306 					 value);
307 }
308 
309 static int
310 nvme_pcie_ctrlr_set_pmrmscu(struct nvme_pcie_ctrlr *pctrlr, uint32_t value)
311 {
312 	return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, pmrmscu),
313 					 value);
314 }
315 
316 static  uint32_t
317 nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
318 {
319 	/*
320 	 * For commands requiring more than 2 PRP entries, one PRP will be
321 	 *  embedded in the command (prp1), and the rest of the PRP entries
322 	 *  will be in a list pointed to by the command (prp2).  The number
323 	 *  of PRP entries in the list is defined by
324 	 *  NVME_MAX_PRP_LIST_ENTRIES.
325 	 *
326 	 *  Note that the max xfer size is not (MAX_ENTRIES + 1) * page_size
327 	 *  because the first PRP entry may not be aligned on a 4KiB
328 	 *  boundary.
329 	 */
330 	return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size;
331 }
332 
333 static uint16_t
334 nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
335 {
336 	return NVME_MAX_SGL_DESCRIPTORS;
337 }
338 
339 static void
340 nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr)
341 {
342 	int rc;
343 	void *addr = NULL;
344 	uint32_t bir;
345 	union spdk_nvme_cmbsz_register cmbsz;
346 	union spdk_nvme_cmbloc_register cmbloc;
347 	uint64_t size, unit_size, offset, bar_size = 0, bar_phys_addr = 0;
348 
349 	if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) ||
350 	    nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) {
351 		SPDK_ERRLOG("get registers failed\n");
352 		goto exit;
353 	}
354 
355 	if (!cmbsz.bits.sz) {
356 		goto exit;
357 	}
358 
359 	bir = cmbloc.bits.bir;
360 	/* Values 0 2 3 4 5 are valid for BAR */
361 	if (bir > 5 || bir == 1) {
362 		goto exit;
363 	}
364 
365 	/* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
366 	unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
367 	/* controller memory buffer size in Bytes */
368 	size = unit_size * cmbsz.bits.sz;
369 	/* controller memory buffer offset from BAR in Bytes */
370 	offset = unit_size * cmbloc.bits.ofst;
371 
372 	rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr,
373 				     &bar_phys_addr, &bar_size);
374 	if ((rc != 0) || addr == NULL) {
375 		goto exit;
376 	}
377 
378 	if (offset > bar_size) {
379 		goto exit;
380 	}
381 
382 	if (size > bar_size - offset) {
383 		goto exit;
384 	}
385 
386 	pctrlr->cmb.bar_va = addr;
387 	pctrlr->cmb.bar_pa = bar_phys_addr;
388 	pctrlr->cmb.size = size;
389 	pctrlr->cmb.current_offset = offset;
390 
391 	if (!cmbsz.bits.sqs) {
392 		pctrlr->ctrlr.opts.use_cmb_sqs = false;
393 	}
394 
395 	return;
396 exit:
397 	pctrlr->ctrlr.opts.use_cmb_sqs = false;
398 	return;
399 }
400 
401 static int
402 nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr)
403 {
404 	int rc = 0;
405 	union spdk_nvme_cmbloc_register cmbloc;
406 	void *addr = pctrlr->cmb.bar_va;
407 
408 	if (addr) {
409 		if (pctrlr->cmb.mem_register_addr) {
410 			spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size);
411 		}
412 
413 		if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) {
414 			SPDK_ERRLOG("get_cmbloc() failed\n");
415 			return -EIO;
416 		}
417 		rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr);
418 	}
419 	return rc;
420 }
421 
422 static int
423 nvme_pcie_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr)
424 {
425 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
426 
427 	if (pctrlr->cmb.bar_va == NULL) {
428 		SPDK_DEBUGLOG(nvme, "CMB not available\n");
429 		return -ENOTSUP;
430 	}
431 
432 	if (ctrlr->opts.use_cmb_sqs) {
433 		SPDK_ERRLOG("CMB is already in use for submission queues.\n");
434 		return -ENOTSUP;
435 	}
436 
437 	return 0;
438 }
439 
440 static void *
441 nvme_pcie_ctrlr_map_io_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
442 {
443 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
444 	union spdk_nvme_cmbsz_register cmbsz;
445 	union spdk_nvme_cmbloc_register cmbloc;
446 	uint64_t mem_register_start, mem_register_end;
447 	int rc;
448 
449 	if (pctrlr->cmb.mem_register_addr != NULL) {
450 		*size = pctrlr->cmb.mem_register_size;
451 		return pctrlr->cmb.mem_register_addr;
452 	}
453 
454 	*size = 0;
455 
456 	if (pctrlr->cmb.bar_va == NULL) {
457 		SPDK_DEBUGLOG(nvme, "CMB not available\n");
458 		return NULL;
459 	}
460 
461 	if (ctrlr->opts.use_cmb_sqs) {
462 		SPDK_ERRLOG("CMB is already in use for submission queues.\n");
463 		return NULL;
464 	}
465 
466 	if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) ||
467 	    nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) {
468 		SPDK_ERRLOG("get registers failed\n");
469 		return NULL;
470 	}
471 
472 	/* If only SQS is supported */
473 	if (!(cmbsz.bits.wds || cmbsz.bits.rds)) {
474 		return NULL;
475 	}
476 
477 	/* If CMB is less than 4MiB in size then abort CMB mapping */
478 	if (pctrlr->cmb.size < (1ULL << 22)) {
479 		return NULL;
480 	}
481 
482 	mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset +
483 				       VALUE_2MB - 1);
484 	mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset +
485 				     pctrlr->cmb.size);
486 
487 	rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start);
488 	if (rc) {
489 		SPDK_ERRLOG("spdk_mem_register() failed\n");
490 		return NULL;
491 	}
492 
493 	pctrlr->cmb.mem_register_addr = (void *)mem_register_start;
494 	pctrlr->cmb.mem_register_size = mem_register_end - mem_register_start;
495 
496 	*size = pctrlr->cmb.mem_register_size;
497 	return pctrlr->cmb.mem_register_addr;
498 }
499 
500 static int
501 nvme_pcie_ctrlr_unmap_io_cmb(struct spdk_nvme_ctrlr *ctrlr)
502 {
503 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
504 	int rc;
505 
506 	if (pctrlr->cmb.mem_register_addr == NULL) {
507 		return 0;
508 	}
509 
510 	rc = spdk_mem_unregister(pctrlr->cmb.mem_register_addr, pctrlr->cmb.mem_register_size);
511 
512 	if (rc == 0) {
513 		pctrlr->cmb.mem_register_addr = NULL;
514 		pctrlr->cmb.mem_register_size = 0;
515 	}
516 
517 	return rc;
518 }
519 
520 static void
521 nvme_pcie_ctrlr_map_pmr(struct nvme_pcie_ctrlr *pctrlr)
522 {
523 	int rc;
524 	void *addr = NULL;
525 	uint32_t bir;
526 	union spdk_nvme_pmrcap_register pmrcap;
527 	uint64_t bar_size = 0, bar_phys_addr = 0;
528 
529 	if (!pctrlr->regs->cap.bits.pmrs) {
530 		return;
531 	}
532 
533 	if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) {
534 		SPDK_ERRLOG("get registers failed\n");
535 		return;
536 	}
537 
538 	bir = pmrcap.bits.bir;
539 	/* Values 2 3 4 5 are valid for BAR */
540 	if (bir > 5 || bir < 2) {
541 		SPDK_ERRLOG("invalid base indicator register value\n");
542 		return;
543 	}
544 
545 	rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, &bar_phys_addr, &bar_size);
546 	if ((rc != 0) || addr == NULL) {
547 		SPDK_ERRLOG("could not map the bar %d\n", bir);
548 		return;
549 	}
550 
551 	if (pmrcap.bits.cmss) {
552 		uint32_t pmrmscl, pmrmscu, cmse = 1;
553 		union spdk_nvme_pmrsts_register pmrsts;
554 
555 		/* Enable Controller Memory Space */
556 		pmrmscl = (uint32_t)((bar_phys_addr & 0xFFFFF000ULL) | (cmse << 1));
557 		pmrmscu = (uint32_t)((bar_phys_addr >> 32ULL) & 0xFFFFFFFFULL);
558 
559 		if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, pmrmscu)) {
560 			SPDK_ERRLOG("set_pmrmscu() failed\n");
561 			spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr);
562 			return;
563 		}
564 
565 		if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, pmrmscl)) {
566 			SPDK_ERRLOG("set_pmrmscl() failed\n");
567 			spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr);
568 			return;
569 		}
570 
571 		if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) {
572 			SPDK_ERRLOG("get pmrsts failed\n");
573 			spdk_pci_device_unmap_bar(pctrlr->devhandle, bir, addr);
574 			return;
575 		}
576 
577 		if (pmrsts.bits.cbai) {
578 			SPDK_ERRLOG("Controller Memory Space Enable Failure\n");
579 			SPDK_ERRLOG("CBA Invalid - Host Addresses cannot reference PMR\n");
580 		} else {
581 			SPDK_DEBUGLOG(nvme, "Controller Memory Space Enable Success\n");
582 			SPDK_DEBUGLOG(nvme, "Host Addresses can reference PMR\n");
583 		}
584 	}
585 
586 	pctrlr->pmr.bar_va = addr;
587 	pctrlr->pmr.bar_pa = bar_phys_addr;
588 	pctrlr->pmr.size = pctrlr->ctrlr.pmr_size = bar_size;
589 }
590 
591 static int
592 nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr)
593 {
594 	int rc = 0;
595 	union spdk_nvme_pmrcap_register pmrcap;
596 	void *addr = pctrlr->pmr.bar_va;
597 
598 	if (addr == NULL) {
599 		return rc;
600 	}
601 
602 	if (pctrlr->pmr.mem_register_addr) {
603 		spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size);
604 	}
605 
606 	if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) {
607 		SPDK_ERRLOG("get_pmrcap() failed\n");
608 		return -EIO;
609 	}
610 
611 	if (pmrcap.bits.cmss) {
612 		if (nvme_pcie_ctrlr_set_pmrmscu(pctrlr, 0)) {
613 			SPDK_ERRLOG("set_pmrmscu() failed\n");
614 		}
615 
616 		if (nvme_pcie_ctrlr_set_pmrmscl(pctrlr, 0)) {
617 			SPDK_ERRLOG("set_pmrmscl() failed\n");
618 		}
619 	}
620 
621 	rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, pmrcap.bits.bir, addr);
622 
623 	return rc;
624 }
625 
626 static int
627 nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable)
628 {
629 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
630 	union spdk_nvme_pmrcap_register pmrcap;
631 	union spdk_nvme_pmrctl_register pmrctl;
632 	union spdk_nvme_pmrsts_register pmrsts;
633 	uint8_t pmrto, pmrtu;
634 	uint64_t timeout_in_ms, ticks_per_ms, timeout_in_ticks, now_ticks;
635 
636 	if (!pctrlr->regs->cap.bits.pmrs) {
637 		SPDK_ERRLOG("PMR is not supported by the controller\n");
638 		return -ENOTSUP;
639 	}
640 
641 	if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) {
642 		SPDK_ERRLOG("get registers failed\n");
643 		return -EIO;
644 	}
645 
646 	pmrto = pmrcap.bits.pmrto;
647 	pmrtu = pmrcap.bits.pmrtu;
648 
649 	if (pmrtu > 1) {
650 		SPDK_ERRLOG("PMR Time Units Invalid\n");
651 		return -EINVAL;
652 	}
653 
654 	ticks_per_ms = spdk_get_ticks_hz() / 1000;
655 	timeout_in_ms = pmrto * (pmrtu ? (60 * 1000) : 500);
656 	timeout_in_ticks = timeout_in_ms * ticks_per_ms;
657 
658 	if (nvme_pcie_ctrlr_get_pmrctl(pctrlr, &pmrctl)) {
659 		SPDK_ERRLOG("get pmrctl failed\n");
660 		return -EIO;
661 	}
662 
663 	if (enable && pmrctl.bits.en != 0) {
664 		SPDK_ERRLOG("PMR is already enabled\n");
665 		return -EINVAL;
666 	} else if (!enable && pmrctl.bits.en != 1) {
667 		SPDK_ERRLOG("PMR is already disabled\n");
668 		return -EINVAL;
669 	}
670 
671 	pmrctl.bits.en = enable;
672 
673 	if (nvme_pcie_ctrlr_set_pmrctl(pctrlr, &pmrctl)) {
674 		SPDK_ERRLOG("set pmrctl failed\n");
675 		return -EIO;
676 	}
677 
678 	now_ticks =  spdk_get_ticks();
679 
680 	do {
681 		if (nvme_pcie_ctrlr_get_pmrsts(pctrlr, &pmrsts)) {
682 			SPDK_ERRLOG("get pmrsts failed\n");
683 			return -EIO;
684 		}
685 
686 		if (pmrsts.bits.nrdy == enable &&
687 		    spdk_get_ticks() > now_ticks + timeout_in_ticks) {
688 			SPDK_ERRLOG("PMR Enable - Timed Out\n");
689 			return -ETIMEDOUT;
690 		}
691 	} while (pmrsts.bits.nrdy == enable);
692 
693 	SPDK_DEBUGLOG(nvme, "PMR %s\n", enable ? "Enabled" : "Disabled");
694 
695 	return 0;
696 }
697 
698 static int
699 nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr)
700 {
701 	return nvme_pcie_ctrlr_config_pmr(ctrlr, true);
702 }
703 
704 static int
705 nvme_pcie_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr)
706 {
707 	return nvme_pcie_ctrlr_config_pmr(ctrlr, false);
708 }
709 
710 static void *
711 nvme_pcie_ctrlr_map_io_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
712 {
713 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
714 	union spdk_nvme_pmrcap_register pmrcap;
715 	uint64_t mem_register_start, mem_register_end;
716 	int rc;
717 
718 	if (!pctrlr->regs->cap.bits.pmrs) {
719 		SPDK_ERRLOG("PMR is not supported by the controller\n");
720 		return NULL;
721 	}
722 
723 	if (pctrlr->pmr.mem_register_addr != NULL) {
724 		*size = pctrlr->pmr.mem_register_size;
725 		return pctrlr->pmr.mem_register_addr;
726 	}
727 
728 	*size = 0;
729 
730 	if (pctrlr->pmr.bar_va == NULL) {
731 		SPDK_DEBUGLOG(nvme, "PMR not available\n");
732 		return NULL;
733 	}
734 
735 	if (nvme_pcie_ctrlr_get_pmrcap(pctrlr, &pmrcap)) {
736 		SPDK_ERRLOG("get registers failed\n");
737 		return NULL;
738 	}
739 
740 	/* Check if WDS / RDS is supported */
741 	if (!(pmrcap.bits.wds || pmrcap.bits.rds)) {
742 		return NULL;
743 	}
744 
745 	/* If PMR is less than 4MiB in size then abort PMR mapping */
746 	if (pctrlr->pmr.size < (1ULL << 22)) {
747 		return NULL;
748 	}
749 
750 	mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + VALUE_2MB - 1);
751 	mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->pmr.bar_va + pctrlr->pmr.size);
752 
753 	rc = spdk_mem_register((void *)mem_register_start, mem_register_end - mem_register_start);
754 	if (rc) {
755 		SPDK_ERRLOG("spdk_mem_register() failed\n");
756 		return NULL;
757 	}
758 
759 	pctrlr->pmr.mem_register_addr = (void *)mem_register_start;
760 	pctrlr->pmr.mem_register_size = mem_register_end - mem_register_start;
761 
762 	*size = pctrlr->pmr.mem_register_size;
763 	return pctrlr->pmr.mem_register_addr;
764 }
765 
766 static int
767 nvme_pcie_ctrlr_unmap_io_pmr(struct spdk_nvme_ctrlr *ctrlr)
768 {
769 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
770 	int rc;
771 
772 	if (pctrlr->pmr.mem_register_addr == NULL) {
773 		return -ENXIO;
774 	}
775 
776 	rc = spdk_mem_unregister(pctrlr->pmr.mem_register_addr, pctrlr->pmr.mem_register_size);
777 
778 	if (rc == 0) {
779 		pctrlr->pmr.mem_register_addr = NULL;
780 		pctrlr->pmr.mem_register_size = 0;
781 	}
782 
783 	return rc;
784 }
785 
786 static int
787 nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr)
788 {
789 	int rc;
790 	void *addr = NULL;
791 	uint64_t phys_addr = 0, size = 0;
792 
793 	rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr,
794 				     &phys_addr, &size);
795 
796 	if ((addr == NULL) || (rc != 0)) {
797 		SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n",
798 			    rc, addr);
799 		return -1;
800 	}
801 
802 	pctrlr->regs = (volatile struct spdk_nvme_registers *)addr;
803 	pctrlr->regs_size = size;
804 	pctrlr->doorbell_base = (volatile uint32_t *)&pctrlr->regs->doorbell[0].sq_tdbl;
805 	nvme_pcie_ctrlr_map_cmb(pctrlr);
806 	nvme_pcie_ctrlr_map_pmr(pctrlr);
807 
808 	return 0;
809 }
810 
811 static int
812 nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr)
813 {
814 	int rc = 0;
815 	void *addr = (void *)pctrlr->regs;
816 
817 	if (pctrlr->ctrlr.is_removed) {
818 		return rc;
819 	}
820 
821 	rc = nvme_pcie_ctrlr_unmap_pmr(pctrlr);
822 	if (rc != 0) {
823 		SPDK_ERRLOG("nvme_ctrlr_unmap_pmr failed with error code %d\n", rc);
824 		return -1;
825 	}
826 
827 	rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr);
828 	if (rc != 0) {
829 		SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc);
830 		return -1;
831 	}
832 
833 	if (addr) {
834 		/* NOTE: addr may have been remapped here. We're relying on DPDK to call
835 		 * munmap internally.
836 		 */
837 		rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr);
838 	}
839 	return rc;
840 }
841 
842 /* This function must only be called while holding g_spdk_nvme_driver->lock */
843 static int
844 pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
845 {
846 	struct spdk_nvme_transport_id trid = {};
847 	struct nvme_pcie_enum_ctx *enum_ctx = ctx;
848 	struct spdk_nvme_ctrlr *ctrlr;
849 	struct spdk_pci_addr pci_addr;
850 
851 	pci_addr = spdk_pci_device_get_addr(pci_dev);
852 
853 	spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE);
854 	spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr);
855 
856 	ctrlr = nvme_get_ctrlr_by_trid_unsafe(&trid);
857 	if (!spdk_process_is_primary()) {
858 		if (!ctrlr) {
859 			SPDK_ERRLOG("Controller must be constructed in the primary process first.\n");
860 			return -1;
861 		}
862 
863 		return nvme_ctrlr_add_process(ctrlr, pci_dev);
864 	}
865 
866 	/* check whether user passes the pci_addr */
867 	if (enum_ctx->has_pci_addr &&
868 	    (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) {
869 		return 1;
870 	}
871 
872 	return nvme_ctrlr_probe(&trid, enum_ctx->probe_ctx, pci_dev);
873 }
874 
875 static int
876 nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx,
877 		     bool direct_connect)
878 {
879 	struct nvme_pcie_enum_ctx enum_ctx = {};
880 
881 	enum_ctx.probe_ctx = probe_ctx;
882 
883 	if (strlen(probe_ctx->trid.traddr) != 0) {
884 		if (spdk_pci_addr_parse(&enum_ctx.pci_addr, probe_ctx->trid.traddr)) {
885 			return -1;
886 		}
887 		enum_ctx.has_pci_addr = true;
888 	}
889 
890 	/* Only the primary process can monitor hotplug. */
891 	if (spdk_process_is_primary()) {
892 		_nvme_pcie_hotplug_monitor(probe_ctx);
893 	}
894 
895 	if (enum_ctx.has_pci_addr == false) {
896 		return spdk_pci_enumerate(spdk_pci_nvme_get_driver(),
897 					  pcie_nvme_enum_cb, &enum_ctx);
898 	} else {
899 		return spdk_pci_device_attach(spdk_pci_nvme_get_driver(),
900 					      pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr);
901 	}
902 }
903 
904 static struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
905 		const struct spdk_nvme_ctrlr_opts *opts,
906 		void *devhandle)
907 {
908 	struct spdk_pci_device *pci_dev = devhandle;
909 	struct nvme_pcie_ctrlr *pctrlr;
910 	union spdk_nvme_cap_register cap;
911 	union spdk_nvme_vs_register vs;
912 	uint16_t cmd_reg;
913 	int rc;
914 	struct spdk_pci_id pci_id;
915 
916 	rc = spdk_pci_device_claim(pci_dev);
917 	if (rc < 0) {
918 		SPDK_ERRLOG("could not claim device %s (%s)\n",
919 			    trid->traddr, spdk_strerror(-rc));
920 		return NULL;
921 	}
922 
923 	pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL,
924 			      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
925 	if (pctrlr == NULL) {
926 		spdk_pci_device_unclaim(pci_dev);
927 		SPDK_ERRLOG("could not allocate ctrlr\n");
928 		return NULL;
929 	}
930 
931 	pctrlr->is_remapped = false;
932 	pctrlr->ctrlr.is_removed = false;
933 	pctrlr->devhandle = devhandle;
934 	pctrlr->ctrlr.opts = *opts;
935 	pctrlr->ctrlr.trid = *trid;
936 
937 	rc = nvme_ctrlr_construct(&pctrlr->ctrlr);
938 	if (rc != 0) {
939 		spdk_pci_device_unclaim(pci_dev);
940 		spdk_free(pctrlr);
941 		return NULL;
942 	}
943 
944 	rc = nvme_pcie_ctrlr_allocate_bars(pctrlr);
945 	if (rc != 0) {
946 		spdk_pci_device_unclaim(pci_dev);
947 		spdk_free(pctrlr);
948 		return NULL;
949 	}
950 
951 	/* Enable PCI busmaster and disable INTx */
952 	spdk_pci_device_cfg_read16(pci_dev, &cmd_reg, 4);
953 	cmd_reg |= 0x404;
954 	spdk_pci_device_cfg_write16(pci_dev, cmd_reg, 4);
955 
956 	if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) {
957 		SPDK_ERRLOG("get_cap() failed\n");
958 		spdk_pci_device_unclaim(pci_dev);
959 		spdk_free(pctrlr);
960 		return NULL;
961 	}
962 
963 	if (nvme_ctrlr_get_vs(&pctrlr->ctrlr, &vs)) {
964 		SPDK_ERRLOG("get_vs() failed\n");
965 		spdk_pci_device_unclaim(pci_dev);
966 		spdk_free(pctrlr);
967 		return NULL;
968 	}
969 
970 	nvme_ctrlr_init_cap(&pctrlr->ctrlr, &cap, &vs);
971 
972 	/* Doorbell stride is 2 ^ (dstrd + 2),
973 	 * but we want multiples of 4, so drop the + 2 */
974 	pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
975 
976 	pci_id = spdk_pci_device_get_id(pci_dev);
977 	pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id);
978 
979 	rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr, pctrlr->ctrlr.opts.admin_queue_size);
980 	if (rc != 0) {
981 		nvme_ctrlr_destruct(&pctrlr->ctrlr);
982 		return NULL;
983 	}
984 
985 	/* Construct the primary process properties */
986 	rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev);
987 	if (rc != 0) {
988 		nvme_ctrlr_destruct(&pctrlr->ctrlr);
989 		return NULL;
990 	}
991 
992 	if (g_sigset != true) {
993 		spdk_pci_register_error_handler(nvme_sigbus_fault_sighandler,
994 						NULL);
995 		g_sigset = true;
996 	}
997 
998 	return &pctrlr->ctrlr;
999 }
1000 
1001 static int
1002 nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
1003 {
1004 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
1005 	struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq);
1006 	union spdk_nvme_aqa_register aqa;
1007 
1008 	if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) {
1009 		SPDK_ERRLOG("set_asq() failed\n");
1010 		return -EIO;
1011 	}
1012 
1013 	if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) {
1014 		SPDK_ERRLOG("set_acq() failed\n");
1015 		return -EIO;
1016 	}
1017 
1018 	aqa.raw = 0;
1019 	/* acqs and asqs are 0-based. */
1020 	aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1;
1021 	aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1;
1022 
1023 	if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) {
1024 		SPDK_ERRLOG("set_aqa() failed\n");
1025 		return -EIO;
1026 	}
1027 
1028 	return 0;
1029 }
1030 
1031 static int
1032 nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
1033 {
1034 	struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
1035 	struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr);
1036 
1037 	if (ctrlr->adminq) {
1038 		nvme_pcie_qpair_destroy(ctrlr->adminq);
1039 	}
1040 
1041 	nvme_ctrlr_destruct_finish(ctrlr);
1042 
1043 	nvme_ctrlr_free_processes(ctrlr);
1044 
1045 	nvme_pcie_ctrlr_free_bars(pctrlr);
1046 
1047 	if (devhandle) {
1048 		spdk_pci_device_unclaim(devhandle);
1049 		spdk_pci_device_detach(devhandle);
1050 	}
1051 
1052 	spdk_free(pctrlr);
1053 
1054 	return 0;
1055 }
1056 
1057 static int
1058 nvme_pcie_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
1059 				 int (*iter_fn)(struct nvme_request *req, void *arg),
1060 				 void *arg)
1061 {
1062 	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
1063 	struct nvme_tracker *tr, *tmp;
1064 	int rc;
1065 
1066 	assert(iter_fn != NULL);
1067 
1068 	TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) {
1069 		assert(tr->req != NULL);
1070 
1071 		rc = iter_fn(tr->req, arg);
1072 		if (rc != 0) {
1073 			return rc;
1074 		}
1075 	}
1076 
1077 	return 0;
1078 }
1079 
1080 static void
1081 nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr)
1082 {
1083 	/*
1084 	 * Bad vtophys translation, so abort this request and return
1085 	 *  immediately.
1086 	 */
1087 	nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC,
1088 						SPDK_NVME_SC_INVALID_FIELD,
1089 						1 /* do not retry */, true);
1090 }
1091 
1092 /*
1093  * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes.
1094  *
1095  * *prp_index will be updated to account for the number of PRP entries used.
1096  */
1097 static inline int
1098 nvme_pcie_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len,
1099 			  uint32_t page_size)
1100 {
1101 	struct spdk_nvme_cmd *cmd = &tr->req->cmd;
1102 	uintptr_t page_mask = page_size - 1;
1103 	uint64_t phys_addr;
1104 	uint32_t i;
1105 
1106 	SPDK_DEBUGLOG(nvme, "prp_index:%u virt_addr:%p len:%u\n",
1107 		      *prp_index, virt_addr, (uint32_t)len);
1108 
1109 	if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) {
1110 		SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1111 		return -EFAULT;
1112 	}
1113 
1114 	i = *prp_index;
1115 	while (len) {
1116 		uint32_t seg_len;
1117 
1118 		/*
1119 		 * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array,
1120 		 * so prp_index == count is valid.
1121 		 */
1122 		if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) {
1123 			SPDK_ERRLOG("out of PRP entries\n");
1124 			return -EFAULT;
1125 		}
1126 
1127 		phys_addr = spdk_vtophys(virt_addr, NULL);
1128 		if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) {
1129 			SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr);
1130 			return -EFAULT;
1131 		}
1132 
1133 		if (i == 0) {
1134 			SPDK_DEBUGLOG(nvme, "prp1 = %p\n", (void *)phys_addr);
1135 			cmd->dptr.prp.prp1 = phys_addr;
1136 			seg_len = page_size - ((uintptr_t)virt_addr & page_mask);
1137 		} else {
1138 			if ((phys_addr & page_mask) != 0) {
1139 				SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr);
1140 				return -EFAULT;
1141 			}
1142 
1143 			SPDK_DEBUGLOG(nvme, "prp[%u] = %p\n", i - 1, (void *)phys_addr);
1144 			tr->u.prp[i - 1] = phys_addr;
1145 			seg_len = page_size;
1146 		}
1147 
1148 		seg_len = spdk_min(seg_len, len);
1149 		virt_addr += seg_len;
1150 		len -= seg_len;
1151 		i++;
1152 	}
1153 
1154 	cmd->psdt = SPDK_NVME_PSDT_PRP;
1155 	if (i <= 1) {
1156 		cmd->dptr.prp.prp2 = 0;
1157 	} else if (i == 2) {
1158 		cmd->dptr.prp.prp2 = tr->u.prp[0];
1159 		SPDK_DEBUGLOG(nvme, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2);
1160 	} else {
1161 		cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr;
1162 		SPDK_DEBUGLOG(nvme, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2);
1163 	}
1164 
1165 	*prp_index = i;
1166 	return 0;
1167 }
1168 
1169 static int
1170 nvme_pcie_qpair_build_request_invalid(struct spdk_nvme_qpair *qpair,
1171 				      struct nvme_request *req, struct nvme_tracker *tr, bool dword_aligned)
1172 {
1173 	assert(0);
1174 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1175 	return -EINVAL;
1176 }
1177 
1178 /**
1179  * Build PRP list describing physically contiguous payload buffer.
1180  */
1181 static int
1182 nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1183 				     struct nvme_tracker *tr, bool dword_aligned)
1184 {
1185 	uint32_t prp_index = 0;
1186 	int rc;
1187 
1188 	rc = nvme_pcie_prp_list_append(tr, &prp_index, req->payload.contig_or_cb_arg + req->payload_offset,
1189 				       req->payload_size, qpair->ctrlr->page_size);
1190 	if (rc) {
1191 		nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1192 	}
1193 
1194 	return rc;
1195 }
1196 
1197 /**
1198  * Build an SGL describing a physically contiguous payload buffer.
1199  *
1200  * This is more efficient than using PRP because large buffers can be
1201  * described this way.
1202  */
1203 static int
1204 nvme_pcie_qpair_build_contig_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1205 		struct nvme_tracker *tr, bool dword_aligned)
1206 {
1207 	void *virt_addr;
1208 	uint64_t phys_addr, mapping_length;
1209 	uint32_t length;
1210 	struct spdk_nvme_sgl_descriptor *sgl;
1211 	uint32_t nseg = 0;
1212 
1213 	assert(req->payload_size != 0);
1214 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
1215 
1216 	sgl = tr->u.sgl;
1217 	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
1218 	req->cmd.dptr.sgl1.unkeyed.subtype = 0;
1219 
1220 	length = req->payload_size;
1221 	virt_addr = req->payload.contig_or_cb_arg + req->payload_offset;
1222 
1223 	while (length > 0) {
1224 		if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1225 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1226 			return -EFAULT;
1227 		}
1228 
1229 		if (dword_aligned && ((uintptr_t)virt_addr & 3)) {
1230 			SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1231 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1232 			return -EFAULT;
1233 		}
1234 
1235 		mapping_length = length;
1236 		phys_addr = spdk_vtophys(virt_addr, &mapping_length);
1237 		if (phys_addr == SPDK_VTOPHYS_ERROR) {
1238 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1239 			return -EFAULT;
1240 		}
1241 
1242 		mapping_length = spdk_min(length, mapping_length);
1243 
1244 		length -= mapping_length;
1245 		virt_addr += mapping_length;
1246 
1247 		sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1248 		sgl->unkeyed.length = mapping_length;
1249 		sgl->address = phys_addr;
1250 		sgl->unkeyed.subtype = 0;
1251 
1252 		sgl++;
1253 		nseg++;
1254 	}
1255 
1256 	if (nseg == 1) {
1257 		/*
1258 		 * The whole transfer can be described by a single SGL descriptor.
1259 		 *  Use the special case described by the spec where SGL1's type is Data Block.
1260 		 *  This means the SGL in the tracker is not used at all, so copy the first (and only)
1261 		 *  SGL element into SGL1.
1262 		 */
1263 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1264 		req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
1265 		req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
1266 	} else {
1267 		/* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because
1268 		 *  NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page.
1269 		 */
1270 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1271 		req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
1272 		req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor);
1273 	}
1274 
1275 	return 0;
1276 }
1277 
1278 /**
1279  * Build SGL list describing scattered payload buffer.
1280  */
1281 static int
1282 nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1283 				     struct nvme_tracker *tr, bool dword_aligned)
1284 {
1285 	int rc;
1286 	void *virt_addr;
1287 	uint64_t phys_addr, mapping_length;
1288 	uint32_t remaining_transfer_len, remaining_user_sge_len, length;
1289 	struct spdk_nvme_sgl_descriptor *sgl;
1290 	uint32_t nseg = 0;
1291 
1292 	/*
1293 	 * Build scattered payloads.
1294 	 */
1295 	assert(req->payload_size != 0);
1296 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1297 	assert(req->payload.reset_sgl_fn != NULL);
1298 	assert(req->payload.next_sge_fn != NULL);
1299 	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
1300 
1301 	sgl = tr->u.sgl;
1302 	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
1303 	req->cmd.dptr.sgl1.unkeyed.subtype = 0;
1304 
1305 	remaining_transfer_len = req->payload_size;
1306 
1307 	while (remaining_transfer_len > 0) {
1308 		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg,
1309 					      &virt_addr, &remaining_user_sge_len);
1310 		if (rc) {
1311 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1312 			return -EFAULT;
1313 		}
1314 
1315 		/* Bit Bucket SGL descriptor */
1316 		if ((uint64_t)virt_addr == UINT64_MAX) {
1317 			/* TODO: enable WRITE and COMPARE when necessary */
1318 			if (req->cmd.opc != SPDK_NVME_OPC_READ) {
1319 				SPDK_ERRLOG("Only READ command can be supported\n");
1320 				goto exit;
1321 			}
1322 			if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1323 				SPDK_ERRLOG("Too many SGL entries\n");
1324 				goto exit;
1325 			}
1326 
1327 			sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_BIT_BUCKET;
1328 			/* If the SGL describes a destination data buffer, the length of data
1329 			 * buffer shall be discarded by controller, and the length is included
1330 			 * in Number of Logical Blocks (NLB) parameter. Otherwise, the length
1331 			 * is not included in the NLB parameter.
1332 			 */
1333 			remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len);
1334 			remaining_transfer_len -= remaining_user_sge_len;
1335 
1336 			sgl->unkeyed.length = remaining_user_sge_len;
1337 			sgl->address = 0;
1338 			sgl->unkeyed.subtype = 0;
1339 
1340 			sgl++;
1341 			nseg++;
1342 
1343 			continue;
1344 		}
1345 
1346 		remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len);
1347 		remaining_transfer_len -= remaining_user_sge_len;
1348 		while (remaining_user_sge_len > 0) {
1349 			if (nseg >= NVME_MAX_SGL_DESCRIPTORS) {
1350 				SPDK_ERRLOG("Too many SGL entries\n");
1351 				goto exit;
1352 			}
1353 
1354 			if (dword_aligned && ((uintptr_t)virt_addr & 3)) {
1355 				SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr);
1356 				goto exit;
1357 			}
1358 
1359 			mapping_length = remaining_user_sge_len;
1360 			phys_addr = spdk_vtophys(virt_addr, &mapping_length);
1361 			if (phys_addr == SPDK_VTOPHYS_ERROR) {
1362 				goto exit;
1363 			}
1364 
1365 			length = spdk_min(remaining_user_sge_len, mapping_length);
1366 			remaining_user_sge_len -= length;
1367 			virt_addr += length;
1368 
1369 			if (nseg > 0 && phys_addr ==
1370 			    (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) {
1371 				/* extend previous entry */
1372 				(*(sgl - 1)).unkeyed.length += length;
1373 				continue;
1374 			}
1375 
1376 			sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1377 			sgl->unkeyed.length = length;
1378 			sgl->address = phys_addr;
1379 			sgl->unkeyed.subtype = 0;
1380 
1381 			sgl++;
1382 			nseg++;
1383 		}
1384 	}
1385 
1386 	if (nseg == 1) {
1387 		/*
1388 		 * The whole transfer can be described by a single SGL descriptor.
1389 		 *  Use the special case described by the spec where SGL1's type is Data Block.
1390 		 *  This means the SGL in the tracker is not used at all, so copy the first (and only)
1391 		 *  SGL element into SGL1.
1392 		 */
1393 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1394 		req->cmd.dptr.sgl1.address = tr->u.sgl[0].address;
1395 		req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length;
1396 	} else {
1397 		/* SPDK NVMe driver supports only 1 SGL segment for now, it is enough because
1398 		 *  NVME_MAX_SGL_DESCRIPTORS * 16 is less than one page.
1399 		 */
1400 		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1401 		req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr;
1402 		req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor);
1403 	}
1404 
1405 	return 0;
1406 
1407 exit:
1408 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1409 	return -EFAULT;
1410 }
1411 
1412 /**
1413  * Build PRP list describing scattered payload buffer.
1414  */
1415 static int
1416 nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req,
1417 				       struct nvme_tracker *tr, bool dword_aligned)
1418 {
1419 	int rc;
1420 	void *virt_addr;
1421 	uint32_t remaining_transfer_len, length;
1422 	uint32_t prp_index = 0;
1423 	uint32_t page_size = qpair->ctrlr->page_size;
1424 
1425 	/*
1426 	 * Build scattered payloads.
1427 	 */
1428 	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
1429 	assert(req->payload.reset_sgl_fn != NULL);
1430 	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
1431 
1432 	remaining_transfer_len = req->payload_size;
1433 	while (remaining_transfer_len > 0) {
1434 		assert(req->payload.next_sge_fn != NULL);
1435 		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
1436 		if (rc) {
1437 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1438 			return -EFAULT;
1439 		}
1440 
1441 		length = spdk_min(remaining_transfer_len, length);
1442 
1443 		/*
1444 		 * Any incompatible sges should have been handled up in the splitting routine,
1445 		 *  but assert here as an additional check.
1446 		 *
1447 		 * All SGEs except last must end on a page boundary.
1448 		 */
1449 		assert((length == remaining_transfer_len) ||
1450 		       _is_page_aligned((uintptr_t)virt_addr + length, page_size));
1451 
1452 		rc = nvme_pcie_prp_list_append(tr, &prp_index, virt_addr, length, page_size);
1453 		if (rc) {
1454 			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1455 			return rc;
1456 		}
1457 
1458 		remaining_transfer_len -= length;
1459 	}
1460 
1461 	return 0;
1462 }
1463 
1464 typedef int(*build_req_fn)(struct spdk_nvme_qpair *, struct nvme_request *, struct nvme_tracker *,
1465 			   bool);
1466 
1467 static build_req_fn const g_nvme_pcie_build_req_table[][2] = {
1468 	[NVME_PAYLOAD_TYPE_INVALID] = {
1469 		nvme_pcie_qpair_build_request_invalid,			/* PRP */
1470 		nvme_pcie_qpair_build_request_invalid			/* SGL */
1471 	},
1472 	[NVME_PAYLOAD_TYPE_CONTIG] = {
1473 		nvme_pcie_qpair_build_contig_request,			/* PRP */
1474 		nvme_pcie_qpair_build_contig_hw_sgl_request		/* SGL */
1475 	},
1476 	[NVME_PAYLOAD_TYPE_SGL] = {
1477 		nvme_pcie_qpair_build_prps_sgl_request,			/* PRP */
1478 		nvme_pcie_qpair_build_hw_sgl_request			/* SGL */
1479 	}
1480 };
1481 
1482 static int
1483 nvme_pcie_qpair_build_metadata(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
1484 			       bool sgl_supported, bool dword_aligned)
1485 {
1486 	void *md_payload;
1487 	struct nvme_request *req = tr->req;
1488 
1489 	if (req->payload.md) {
1490 		md_payload = req->payload.md + req->md_offset;
1491 		if (dword_aligned && ((uintptr_t)md_payload & 3)) {
1492 			SPDK_ERRLOG("virt_addr %p not dword aligned\n", md_payload);
1493 			goto exit;
1494 		}
1495 
1496 		if (sgl_supported && dword_aligned) {
1497 			assert(req->cmd.psdt == SPDK_NVME_PSDT_SGL_MPTR_CONTIG);
1498 			req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
1499 			tr->meta_sgl.address = spdk_vtophys(md_payload, NULL);
1500 			if (tr->meta_sgl.address == SPDK_VTOPHYS_ERROR) {
1501 				goto exit;
1502 			}
1503 			tr->meta_sgl.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
1504 			tr->meta_sgl.unkeyed.length = req->md_size;
1505 			tr->meta_sgl.unkeyed.subtype = 0;
1506 			req->cmd.mptr = tr->prp_sgl_bus_addr - sizeof(struct spdk_nvme_sgl_descriptor);
1507 		} else {
1508 			req->cmd.mptr = spdk_vtophys(md_payload, NULL);
1509 			if (req->cmd.mptr == SPDK_VTOPHYS_ERROR) {
1510 				goto exit;
1511 			}
1512 		}
1513 	}
1514 
1515 	return 0;
1516 
1517 exit:
1518 	nvme_pcie_fail_request_bad_vtophys(qpair, tr);
1519 	return -EINVAL;
1520 }
1521 
1522 static int
1523 nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
1524 {
1525 	struct nvme_tracker	*tr;
1526 	int			rc = 0;
1527 	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
1528 	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
1529 	enum nvme_payload_type	payload_type;
1530 	bool			sgl_supported;
1531 	bool			dword_aligned = true;
1532 
1533 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
1534 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1535 	}
1536 
1537 	tr = TAILQ_FIRST(&pqpair->free_tr);
1538 
1539 	if (tr == NULL) {
1540 		pqpair->stat->queued_requests++;
1541 		/* Inform the upper layer to try again later. */
1542 		rc = -EAGAIN;
1543 		goto exit;
1544 	}
1545 
1546 	pqpair->stat->submitted_requests++;
1547 	TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */
1548 	TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list);
1549 	tr->req = req;
1550 	tr->cb_fn = req->cb_fn;
1551 	tr->cb_arg = req->cb_arg;
1552 	req->cmd.cid = tr->cid;
1553 
1554 	if (req->payload_size != 0) {
1555 		payload_type = nvme_payload_type(&req->payload);
1556 		/* According to the specification, PRPs shall be used for all
1557 		 *  Admin commands for NVMe over PCIe implementations.
1558 		 */
1559 		sgl_supported = (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) != 0 &&
1560 				!nvme_qpair_is_admin_queue(qpair);
1561 
1562 		if (sgl_supported) {
1563 			/* Don't use SGL for DSM command */
1564 			if (spdk_unlikely((ctrlr->quirks & NVME_QUIRK_NO_SGL_FOR_DSM) &&
1565 					  (req->cmd.opc == SPDK_NVME_OPC_DATASET_MANAGEMENT))) {
1566 				sgl_supported = false;
1567 			}
1568 		}
1569 
1570 		if (sgl_supported && !(ctrlr->flags & SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT)) {
1571 			dword_aligned = false;
1572 		}
1573 		rc = g_nvme_pcie_build_req_table[payload_type][sgl_supported](qpair, req, tr, dword_aligned);
1574 		if (rc < 0) {
1575 			goto exit;
1576 		}
1577 
1578 		rc = nvme_pcie_qpair_build_metadata(qpair, tr, sgl_supported, dword_aligned);
1579 		if (rc < 0) {
1580 			goto exit;
1581 		}
1582 	}
1583 
1584 	nvme_pcie_qpair_submit_tracker(qpair, tr);
1585 
1586 exit:
1587 	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
1588 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1589 	}
1590 
1591 	return rc;
1592 }
1593 
1594 void
1595 spdk_nvme_pcie_set_hotplug_filter(spdk_nvme_pcie_hotplug_filter_cb filter_cb)
1596 {
1597 	g_hotplug_filter_cb = filter_cb;
1598 }
1599 
1600 static int
1601 nvme_pcie_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup,
1602 			       struct spdk_nvme_transport_poll_group_stat **_stats)
1603 {
1604 	struct nvme_pcie_poll_group *group;
1605 	struct spdk_nvme_transport_poll_group_stat *stats;
1606 
1607 	if (tgroup == NULL || _stats == NULL) {
1608 		SPDK_ERRLOG("Invalid stats or group pointer\n");
1609 		return -EINVAL;
1610 	}
1611 
1612 	group = SPDK_CONTAINEROF(tgroup, struct nvme_pcie_poll_group, group);
1613 	stats = calloc(1, sizeof(*stats));
1614 	if (!stats) {
1615 		SPDK_ERRLOG("Can't allocate memory for RDMA stats\n");
1616 		return -ENOMEM;
1617 	}
1618 	stats->trtype = SPDK_NVME_TRANSPORT_PCIE;
1619 	memcpy(&stats->pcie, &group->stats, sizeof(group->stats));
1620 
1621 	*_stats = stats;
1622 
1623 	return 0;
1624 }
1625 
1626 static void
1627 nvme_pcie_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
1628 				struct spdk_nvme_transport_poll_group_stat *stats)
1629 {
1630 	free(stats);
1631 }
1632 
1633 static struct spdk_pci_id nvme_pci_driver_id[] = {
1634 	{
1635 		.class_id = SPDK_PCI_CLASS_NVME,
1636 		.vendor_id = SPDK_PCI_ANY_ID,
1637 		.device_id = SPDK_PCI_ANY_ID,
1638 		.subvendor_id = SPDK_PCI_ANY_ID,
1639 		.subdevice_id = SPDK_PCI_ANY_ID,
1640 	},
1641 	{ .vendor_id = 0, /* sentinel */ },
1642 };
1643 
1644 SPDK_PCI_DRIVER_REGISTER(nvme, nvme_pci_driver_id,
1645 			 SPDK_PCI_DRIVER_NEED_MAPPING | SPDK_PCI_DRIVER_WC_ACTIVATE);
1646 
1647 const struct spdk_nvme_transport_ops pcie_ops = {
1648 	.name = "PCIE",
1649 	.type = SPDK_NVME_TRANSPORT_PCIE,
1650 	.ctrlr_construct = nvme_pcie_ctrlr_construct,
1651 	.ctrlr_scan = nvme_pcie_ctrlr_scan,
1652 	.ctrlr_destruct = nvme_pcie_ctrlr_destruct,
1653 	.ctrlr_enable = nvme_pcie_ctrlr_enable,
1654 
1655 	.ctrlr_set_reg_4 = nvme_pcie_ctrlr_set_reg_4,
1656 	.ctrlr_set_reg_8 = nvme_pcie_ctrlr_set_reg_8,
1657 	.ctrlr_get_reg_4 = nvme_pcie_ctrlr_get_reg_4,
1658 	.ctrlr_get_reg_8 = nvme_pcie_ctrlr_get_reg_8,
1659 
1660 	.ctrlr_get_max_xfer_size = nvme_pcie_ctrlr_get_max_xfer_size,
1661 	.ctrlr_get_max_sges = nvme_pcie_ctrlr_get_max_sges,
1662 
1663 	.ctrlr_reserve_cmb = nvme_pcie_ctrlr_reserve_cmb,
1664 	.ctrlr_map_cmb = nvme_pcie_ctrlr_map_io_cmb,
1665 	.ctrlr_unmap_cmb = nvme_pcie_ctrlr_unmap_io_cmb,
1666 
1667 	.ctrlr_enable_pmr = nvme_pcie_ctrlr_enable_pmr,
1668 	.ctrlr_disable_pmr = nvme_pcie_ctrlr_disable_pmr,
1669 	.ctrlr_map_pmr = nvme_pcie_ctrlr_map_io_pmr,
1670 	.ctrlr_unmap_pmr = nvme_pcie_ctrlr_unmap_io_pmr,
1671 
1672 	.ctrlr_create_io_qpair = nvme_pcie_ctrlr_create_io_qpair,
1673 	.ctrlr_delete_io_qpair = nvme_pcie_ctrlr_delete_io_qpair,
1674 	.ctrlr_connect_qpair = nvme_pcie_ctrlr_connect_qpair,
1675 	.ctrlr_disconnect_qpair = nvme_pcie_ctrlr_disconnect_qpair,
1676 
1677 	.qpair_abort_reqs = nvme_pcie_qpair_abort_reqs,
1678 	.qpair_reset = nvme_pcie_qpair_reset,
1679 	.qpair_submit_request = nvme_pcie_qpair_submit_request,
1680 	.qpair_process_completions = nvme_pcie_qpair_process_completions,
1681 	.qpair_iterate_requests = nvme_pcie_qpair_iterate_requests,
1682 	.admin_qpair_abort_aers = nvme_pcie_admin_qpair_abort_aers,
1683 
1684 	.poll_group_create = nvme_pcie_poll_group_create,
1685 	.poll_group_connect_qpair = nvme_pcie_poll_group_connect_qpair,
1686 	.poll_group_disconnect_qpair = nvme_pcie_poll_group_disconnect_qpair,
1687 	.poll_group_add = nvme_pcie_poll_group_add,
1688 	.poll_group_remove = nvme_pcie_poll_group_remove,
1689 	.poll_group_process_completions = nvme_pcie_poll_group_process_completions,
1690 	.poll_group_destroy = nvme_pcie_poll_group_destroy,
1691 	.poll_group_get_stats = nvme_pcie_poll_group_get_stats,
1692 	.poll_group_free_stats = nvme_pcie_poll_group_free_stats
1693 };
1694 
1695 SPDK_NVME_TRANSPORT_REGISTER(pcie, &pcie_ops);
1696