xref: /spdk/lib/nvmf/vfio_user.c (revision cc0d05b427038cc877539c12fb0f8bc9053300c0)
1 /*-
2  *   BSD LICENSE
3  *   Copyright (c) Intel Corporation. All rights reserved.
4  *   Copyright (c) 2019, Nutanix Inc. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * NVMe over vfio-user transport
35  */
36 
37 #include <vfio-user/libvfio-user.h>
38 #include <vfio-user/pci_defs.h>
39 
40 #include "spdk/barrier.h"
41 #include "spdk/stdinc.h"
42 #include "spdk/assert.h"
43 #include "spdk/thread.h"
44 #include "spdk/nvmf_transport.h"
45 #include "spdk/sock.h"
46 #include "spdk/string.h"
47 #include "spdk/util.h"
48 #include "spdk/log.h"
49 
50 #include "transport.h"
51 
52 #include "nvmf_internal.h"
53 
54 #define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256
55 #define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32
56 #define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
57 #define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE ((NVMF_REQ_MAX_BUFFERS - 1) << SHIFT_4KB)
58 #define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE
59 
60 #define NVMF_VFIO_USER_DOORBELLS_OFFSET	0x1000
61 #define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000
62 
63 #define NVME_REG_CFG_SIZE       0x1000
64 #define NVME_REG_BAR0_SIZE      0x4000
65 #define NVME_IRQ_INTX_NUM       1
66 #define NVME_IRQ_MSIX_NUM	NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR
67 
68 struct nvmf_vfio_user_req;
69 struct nvmf_vfio_user_qpair;
70 
71 typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg);
72 
73 /* 1 more for PRP2 list itself */
74 #define NVMF_VFIO_USER_MAX_IOVECS	(NVMF_REQ_MAX_BUFFERS + 1)
75 
76 enum nvmf_vfio_user_req_state {
77 	VFIO_USER_REQUEST_STATE_FREE = 0,
78 	VFIO_USER_REQUEST_STATE_EXECUTING,
79 };
80 
81 struct nvmf_vfio_user_req  {
82 	struct spdk_nvmf_request		req;
83 	struct spdk_nvme_cpl			rsp;
84 	struct spdk_nvme_cmd			cmd;
85 
86 	enum nvmf_vfio_user_req_state		state;
87 	nvmf_vfio_user_req_cb_fn		cb_fn;
88 	void					*cb_arg;
89 
90 	/* old CC before prop_set_cc fabric command */
91 	union spdk_nvme_cc_register		cc;
92 
93 	/* placeholder for gpa_to_vva memory map table, the IO buffer doesn't use it */
94 	dma_sg_t				*sg;
95 	struct iovec				iov[NVMF_VFIO_USER_MAX_IOVECS];
96 	uint8_t					iovcnt;
97 
98 	TAILQ_ENTRY(nvmf_vfio_user_req)		link;
99 };
100 
101 /*
102  * A NVMe queue.
103  */
104 struct nvme_q {
105 	bool is_cq;
106 
107 	void *addr;
108 
109 	dma_sg_t *sg;
110 	struct iovec iov;
111 
112 	uint32_t size;
113 	uint64_t prp1;
114 
115 	union {
116 		struct {
117 			uint32_t head;
118 			/* multiple SQs can be mapped to the same CQ */
119 			uint16_t cqid;
120 		};
121 		struct {
122 			uint32_t tail;
123 			uint16_t iv;
124 			bool ien;
125 		};
126 	};
127 };
128 
129 enum nvmf_vfio_user_qpair_state {
130 	VFIO_USER_QPAIR_UNINITIALIZED = 0,
131 	VFIO_USER_QPAIR_ACTIVE,
132 	VFIO_USER_QPAIR_DELETED,
133 	VFIO_USER_QPAIR_INACTIVE,
134 	VFIO_USER_QPAIR_ERROR,
135 };
136 
137 struct nvmf_vfio_user_qpair {
138 	struct spdk_nvmf_qpair			qpair;
139 	struct spdk_nvmf_transport_poll_group	*group;
140 	struct nvmf_vfio_user_ctrlr		*ctrlr;
141 	struct nvmf_vfio_user_req		*reqs_internal;
142 	uint16_t				qsize;
143 	struct nvme_q				cq;
144 	struct nvme_q				sq;
145 	enum nvmf_vfio_user_qpair_state		state;
146 
147 	/* Copy of Create IO SQ command */
148 	struct spdk_nvme_cmd			create_io_sq_cmd;
149 
150 	TAILQ_HEAD(, nvmf_vfio_user_req)	reqs;
151 	TAILQ_ENTRY(nvmf_vfio_user_qpair)	link;
152 };
153 
154 struct nvmf_vfio_user_poll_group {
155 	struct spdk_nvmf_transport_poll_group	group;
156 	TAILQ_HEAD(, nvmf_vfio_user_qpair)	qps;
157 };
158 
159 struct nvmf_vfio_user_ctrlr {
160 	struct nvmf_vfio_user_endpoint		*endpoint;
161 	struct nvmf_vfio_user_transport		*transport;
162 
163 	/* Number of connected queue pairs */
164 	uint32_t				num_connected_qps;
165 
166 	struct spdk_thread			*thread;
167 	struct spdk_poller			*mmio_poller;
168 
169 	uint16_t				cntlid;
170 
171 	struct nvmf_vfio_user_qpair		*qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR];
172 
173 	TAILQ_ENTRY(nvmf_vfio_user_ctrlr)	link;
174 
175 	volatile uint32_t			*doorbells;
176 
177 	/* internal CSTS.CFS register for vfio-user fatal errors */
178 	uint32_t				cfs : 1;
179 };
180 
181 struct nvmf_vfio_user_endpoint {
182 	vfu_ctx_t				*vfu_ctx;
183 	struct msixcap				*msix;
184 	vfu_pci_config_space_t			*pci_config_space;
185 	int					fd;
186 	volatile uint32_t			*doorbells;
187 
188 	struct spdk_nvme_transport_id		trid;
189 	const struct spdk_nvmf_subsystem	*subsystem;
190 
191 	struct nvmf_vfio_user_ctrlr		*ctrlr;
192 	pthread_mutex_t				lock;
193 
194 	TAILQ_ENTRY(nvmf_vfio_user_endpoint)	link;
195 };
196 
197 struct nvmf_vfio_user_transport_opts {
198 	bool					disable_mappable_bar0;
199 };
200 
201 struct nvmf_vfio_user_transport {
202 	struct spdk_nvmf_transport		transport;
203 	struct nvmf_vfio_user_transport_opts    transport_opts;
204 	pthread_mutex_t				lock;
205 	TAILQ_HEAD(, nvmf_vfio_user_endpoint)	endpoints;
206 
207 	TAILQ_HEAD(, nvmf_vfio_user_qpair)	new_qps;
208 };
209 
210 /*
211  * function prototypes
212  */
213 static volatile uint32_t *
214 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q);
215 
216 static volatile uint32_t *
217 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q);
218 
219 static int
220 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req);
221 
222 static struct nvmf_vfio_user_req *
223 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair);
224 
225 static int
226 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
227 		struct nvme_q *cq, uint32_t cdw0, uint16_t sc,
228 		uint16_t sct);
229 
230 static int
231 nvme_cmd_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs,
232 		  uint32_t max_iovcnt, uint32_t len, size_t mps,
233 		  void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot))
234 {
235 	uint64_t prp1, prp2;
236 	void *vva;
237 	uint32_t i;
238 	uint32_t residue_len, nents;
239 	uint64_t *prp_list;
240 	uint32_t iovcnt;
241 
242 	assert(max_iovcnt > 0);
243 
244 	prp1 = cmd->dptr.prp.prp1;
245 	prp2 = cmd->dptr.prp.prp2;
246 
247 	/* PRP1 may started with unaligned page address */
248 	residue_len = mps - (prp1 % mps);
249 	residue_len = spdk_min(len, residue_len);
250 
251 	vva = gpa_to_vva(prv, prp1, residue_len, PROT_READ | PROT_WRITE);
252 	if (spdk_unlikely(vva == NULL)) {
253 		SPDK_ERRLOG("GPA to VVA failed\n");
254 		return -EINVAL;
255 	}
256 	len -= residue_len;
257 	if (len && max_iovcnt < 2) {
258 		SPDK_ERRLOG("Too many page entries, at least two iovs are required\n");
259 		return -ERANGE;
260 	}
261 	iovs[0].iov_base = vva;
262 	iovs[0].iov_len = residue_len;
263 
264 	if (len) {
265 		if (spdk_unlikely(prp2 == 0)) {
266 			SPDK_ERRLOG("no PRP2, %d remaining\n", len);
267 			return -EINVAL;
268 		}
269 
270 		if (len <= mps) {
271 			/* 2 PRP used */
272 			iovcnt = 2;
273 			vva = gpa_to_vva(prv, prp2, len, PROT_READ | PROT_WRITE);
274 			if (spdk_unlikely(vva == NULL)) {
275 				SPDK_ERRLOG("no VVA for %#" PRIx64 ", len%#x\n",
276 					    prp2, len);
277 				return -EINVAL;
278 			}
279 			iovs[1].iov_base = vva;
280 			iovs[1].iov_len = len;
281 		} else {
282 			/* PRP list used */
283 			nents = (len + mps - 1) / mps;
284 			if (spdk_unlikely(nents + 1 > max_iovcnt)) {
285 				SPDK_ERRLOG("Too many page entries\n");
286 				return -ERANGE;
287 			}
288 
289 			vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list), PROT_READ);
290 			if (spdk_unlikely(vva == NULL)) {
291 				SPDK_ERRLOG("no VVA for %#" PRIx64 ", nents=%#x\n",
292 					    prp2, nents);
293 				return -EINVAL;
294 			}
295 			prp_list = vva;
296 			i = 0;
297 			while (len != 0) {
298 				residue_len = spdk_min(len, mps);
299 				vva = gpa_to_vva(prv, prp_list[i], residue_len, PROT_READ | PROT_WRITE);
300 				if (spdk_unlikely(vva == NULL)) {
301 					SPDK_ERRLOG("no VVA for %#" PRIx64 ", residue_len=%#x\n",
302 						    prp_list[i], residue_len);
303 					return -EINVAL;
304 				}
305 				iovs[i + 1].iov_base = vva;
306 				iovs[i + 1].iov_len = residue_len;
307 				len -= residue_len;
308 				i++;
309 			}
310 			iovcnt = i + 1;
311 		}
312 	} else {
313 		/* 1 PRP used */
314 		iovcnt = 1;
315 	}
316 
317 	assert(iovcnt <= max_iovcnt);
318 	return iovcnt;
319 }
320 
321 static int
322 nvme_cmd_map_sgls_data(void *prv, struct spdk_nvme_sgl_descriptor *sgls, uint32_t num_sgls,
323 		       struct iovec *iovs, uint32_t max_iovcnt,
324 		       void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot))
325 {
326 	uint32_t i;
327 	void *vva;
328 
329 	if (spdk_unlikely(max_iovcnt < num_sgls)) {
330 		return -ERANGE;
331 	}
332 
333 	for (i = 0; i < num_sgls; i++) {
334 		if (spdk_unlikely(sgls[i].unkeyed.type != SPDK_NVME_SGL_TYPE_DATA_BLOCK)) {
335 			SPDK_ERRLOG("Invalid SGL type %u\n", sgls[i].unkeyed.type);
336 			return -EINVAL;
337 		}
338 		vva = gpa_to_vva(prv, sgls[i].address, sgls[i].unkeyed.length, PROT_READ | PROT_WRITE);
339 		if (spdk_unlikely(vva == NULL)) {
340 			SPDK_ERRLOG("GPA to VVA failed\n");
341 			return -EINVAL;
342 		}
343 		iovs[i].iov_base = vva;
344 		iovs[i].iov_len = sgls[i].unkeyed.length;
345 	}
346 
347 	return num_sgls;
348 }
349 
350 static int
351 nvme_cmd_map_sgls(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt,
352 		  uint32_t len, size_t mps,
353 		  void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot))
354 {
355 	struct spdk_nvme_sgl_descriptor *sgl, *last_sgl;
356 	uint32_t num_sgls, seg_len;
357 	void *vva;
358 	int ret;
359 	uint32_t total_iovcnt = 0;
360 
361 	/* SGL cases */
362 	sgl = &cmd->dptr.sgl1;
363 
364 	/* only one SGL segment */
365 	if (sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
366 		assert(max_iovcnt > 0);
367 		vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ | PROT_WRITE);
368 		if (spdk_unlikely(vva == NULL)) {
369 			SPDK_ERRLOG("GPA to VVA failed\n");
370 			return -EINVAL;
371 		}
372 		iovs[0].iov_base = vva;
373 		iovs[0].iov_len = sgl->unkeyed.length;
374 		assert(sgl->unkeyed.length == len);
375 
376 		return 1;
377 	}
378 
379 	for (;;) {
380 		if (spdk_unlikely((sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_SEGMENT) &&
381 				  (sgl->unkeyed.type != SPDK_NVME_SGL_TYPE_LAST_SEGMENT))) {
382 			SPDK_ERRLOG("Invalid SGL type %u\n", sgl->unkeyed.type);
383 			return -EINVAL;
384 		}
385 
386 		seg_len = sgl->unkeyed.length;
387 		if (spdk_unlikely(seg_len % sizeof(struct spdk_nvme_sgl_descriptor))) {
388 			SPDK_ERRLOG("Invalid SGL segment len %u\n", seg_len);
389 			return -EINVAL;
390 		}
391 
392 		num_sgls = seg_len / sizeof(struct spdk_nvme_sgl_descriptor);
393 		vva = gpa_to_vva(prv, sgl->address, sgl->unkeyed.length, PROT_READ);
394 		if (spdk_unlikely(vva == NULL)) {
395 			SPDK_ERRLOG("GPA to VVA failed\n");
396 			return -EINVAL;
397 		}
398 
399 		/* sgl point to the first segment */
400 		sgl = (struct spdk_nvme_sgl_descriptor *)vva;
401 		last_sgl = &sgl[num_sgls - 1];
402 
403 		/* we are done */
404 		if (last_sgl->unkeyed.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
405 			/* map whole sgl list */
406 			ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls, &iovs[total_iovcnt],
407 						     max_iovcnt - total_iovcnt, gpa_to_vva);
408 			if (spdk_unlikely(ret < 0)) {
409 				return ret;
410 			}
411 			total_iovcnt += ret;
412 
413 			return total_iovcnt;
414 		}
415 
416 		if (num_sgls > 1) {
417 			/* map whole sgl exclude last_sgl */
418 			ret = nvme_cmd_map_sgls_data(prv, sgl, num_sgls - 1, &iovs[total_iovcnt],
419 						     max_iovcnt - total_iovcnt, gpa_to_vva);
420 			if (spdk_unlikely(ret < 0)) {
421 				return ret;
422 			}
423 			total_iovcnt += ret;
424 		}
425 
426 		/* move to next level's segments */
427 		sgl = last_sgl;
428 	}
429 
430 	return 0;
431 }
432 
433 static int
434 nvme_map_cmd(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs, uint32_t max_iovcnt,
435 	     uint32_t len, size_t mps,
436 	     void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len, int prot))
437 {
438 	if (cmd->psdt == SPDK_NVME_PSDT_PRP) {
439 		return nvme_cmd_map_prps(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva);
440 	}
441 
442 	return nvme_cmd_map_sgls(prv, cmd, iovs, max_iovcnt, len, mps, gpa_to_vva);
443 }
444 
445 static char *
446 endpoint_id(struct nvmf_vfio_user_endpoint *endpoint)
447 {
448 	return endpoint->trid.traddr;
449 }
450 
451 static char *
452 ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr)
453 {
454 	if (!ctrlr || !ctrlr->endpoint) {
455 		return "Null Ctrlr";
456 	}
457 
458 	return endpoint_id(ctrlr->endpoint);
459 }
460 
461 static uint16_t
462 io_q_id(struct nvme_q *q)
463 {
464 
465 	struct nvmf_vfio_user_qpair *vfio_user_qpair;
466 
467 	assert(q);
468 
469 	if (q->is_cq) {
470 		vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq);
471 	} else {
472 		vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq);
473 	}
474 	assert(vfio_user_qpair);
475 	return vfio_user_qpair->qpair.qid;
476 }
477 
478 static void
479 fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
480 {
481 	assert(ctrlr != NULL);
482 
483 	if (ctrlr->cfs == 0) {
484 		SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr));
485 	}
486 
487 	ctrlr->cfs = 1U;
488 }
489 
490 static bool
491 ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *ctrlr)
492 {
493 	assert(ctrlr != NULL);
494 	assert(ctrlr->endpoint != NULL);
495 
496 	vfu_pci_config_space_t *pci = ctrlr->endpoint->pci_config_space;
497 
498 	return (!pci->hdr.cmd.id || ctrlr->endpoint->msix->mxc.mxe);
499 }
500 
501 static void
502 nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint)
503 {
504 	if (endpoint->doorbells) {
505 		munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE);
506 	}
507 
508 	if (endpoint->fd > 0) {
509 		close(endpoint->fd);
510 	}
511 
512 	vfu_destroy_ctx(endpoint->vfu_ctx);
513 
514 	pthread_mutex_destroy(&endpoint->lock);
515 	free(endpoint);
516 }
517 
518 /* called when process exits */
519 static int
520 nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport,
521 		       spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg)
522 {
523 	struct nvmf_vfio_user_transport *vu_transport;
524 	struct nvmf_vfio_user_endpoint *endpoint, *tmp;
525 
526 	SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n");
527 
528 	vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
529 					transport);
530 
531 	(void)pthread_mutex_destroy(&vu_transport->lock);
532 
533 	TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
534 		TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link);
535 		nvmf_vfio_user_destroy_endpoint(endpoint);
536 	}
537 
538 	free(vu_transport);
539 
540 	if (cb_fn) {
541 		cb_fn(cb_arg);
542 	}
543 
544 	return 0;
545 }
546 
547 static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[] = {
548 	{
549 		"disable-mappable-bar0",
550 		offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0),
551 		spdk_json_decode_bool, true
552 	},
553 };
554 
555 static struct spdk_nvmf_transport *
556 nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts)
557 {
558 	struct nvmf_vfio_user_transport *vu_transport;
559 	int err;
560 
561 	vu_transport = calloc(1, sizeof(*vu_transport));
562 	if (vu_transport == NULL) {
563 		SPDK_ERRLOG("Transport alloc fail: %m\n");
564 		return NULL;
565 	}
566 
567 	err = pthread_mutex_init(&vu_transport->lock, NULL);
568 	if (err != 0) {
569 		SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err);
570 		goto err;
571 	}
572 
573 	TAILQ_INIT(&vu_transport->endpoints);
574 	TAILQ_INIT(&vu_transport->new_qps);
575 
576 	if (opts->transport_specific != NULL &&
577 	    spdk_json_decode_object_relaxed(opts->transport_specific, vfio_user_transport_opts_decoder,
578 					    SPDK_COUNTOF(vfio_user_transport_opts_decoder),
579 					    vu_transport)) {
580 		SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n");
581 		free(vu_transport);
582 		return NULL;
583 	}
584 
585 	SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n",
586 		      vu_transport->transport_opts.disable_mappable_bar0);
587 
588 	return &vu_transport->transport;
589 
590 err:
591 	free(vu_transport);
592 
593 	return NULL;
594 }
595 
596 static uint16_t
597 max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr)
598 {
599 	assert(ctrlr != NULL);
600 	assert(ctrlr->qp[0] != NULL);
601 	assert(ctrlr->qp[0]->qpair.ctrlr != NULL);
602 
603 	return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1;
604 }
605 
606 static void *
607 map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov, int prot)
608 {
609 	int ret;
610 
611 	assert(ctx != NULL);
612 	assert(sg != NULL);
613 	assert(iov != NULL);
614 
615 	ret = vfu_addr_to_sg(ctx, (void *)(uintptr_t)addr, len, sg, 1, prot);
616 	if (ret < 0) {
617 		return NULL;
618 	}
619 
620 	ret = vfu_map_sg(ctx, sg, iov, 1, 0);
621 	if (ret != 0) {
622 		return NULL;
623 	}
624 
625 	assert(iov->iov_base != NULL);
626 	return iov->iov_base;
627 }
628 
629 static uint32_t
630 sq_head(struct nvmf_vfio_user_qpair *qpair)
631 {
632 	assert(qpair != NULL);
633 	return qpair->sq.head;
634 }
635 
636 static void
637 sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair)
638 {
639 	assert(ctrlr != NULL);
640 	assert(qpair != NULL);
641 	qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size;
642 }
643 
644 static int
645 asq_map(struct nvmf_vfio_user_ctrlr *ctrlr)
646 {
647 	struct nvme_q *sq;
648 	const struct spdk_nvmf_registers *regs;
649 
650 	assert(ctrlr != NULL);
651 	assert(ctrlr->qp[0] != NULL);
652 	assert(ctrlr->qp[0]->sq.addr == NULL);
653 	/* XXX ctrlr->asq == 0 is a valid memory address */
654 
655 	regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
656 	sq = &ctrlr->qp[0]->sq;
657 	sq->size = regs->aqa.bits.asqs + 1;
658 	sq->head = ctrlr->doorbells[0] = 0;
659 	sq->cqid = 0;
660 	sq->addr = map_one(ctrlr->endpoint->vfu_ctx, regs->asq,
661 			   sq->size * sizeof(struct spdk_nvme_cmd), sq->sg,
662 			   &sq->iov, PROT_READ);
663 	if (sq->addr == NULL) {
664 		return -1;
665 	}
666 	memset(sq->addr, 0, sq->size * sizeof(struct spdk_nvme_cmd));
667 	sq->is_cq = false;
668 	*tdbl(ctrlr, sq) = 0;
669 
670 	return 0;
671 }
672 
673 static uint16_t
674 cq_next(struct nvme_q *q)
675 {
676 	assert(q != NULL);
677 	assert(q->is_cq);
678 	return (q->tail + 1) % q->size;
679 }
680 
681 static int
682 queue_index(uint16_t qid, int is_cq)
683 {
684 	return (qid * 2) + is_cq;
685 }
686 
687 static volatile uint32_t *
688 tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
689 {
690 	assert(ctrlr != NULL);
691 	assert(q != NULL);
692 	assert(!q->is_cq);
693 
694 	return &ctrlr->doorbells[queue_index(io_q_id(q), false)];
695 }
696 
697 static volatile uint32_t *
698 hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
699 {
700 	assert(ctrlr != NULL);
701 	assert(q != NULL);
702 	assert(q->is_cq);
703 
704 	return &ctrlr->doorbells[queue_index(io_q_id(q), true)];
705 }
706 
707 static bool
708 cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
709 {
710 	assert(ctrlr != NULL);
711 	assert(q != NULL);
712 	return cq_next(q) == *hdbl(ctrlr, q);
713 }
714 
715 static void
716 cq_tail_advance(struct nvme_q *q)
717 {
718 	assert(q != NULL);
719 	q->tail = cq_next(q);
720 }
721 
722 static int
723 acq_map(struct nvmf_vfio_user_ctrlr *ctrlr)
724 {
725 	struct nvme_q *cq;
726 	const struct spdk_nvmf_registers *regs;
727 
728 	assert(ctrlr != NULL);
729 	assert(ctrlr->qp[0] != NULL);
730 	assert(ctrlr->qp[0]->cq.addr == NULL);
731 
732 	regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
733 	assert(regs != NULL);
734 	cq = &ctrlr->qp[0]->cq;
735 	cq->size = regs->aqa.bits.acqs + 1;
736 	cq->tail = 0;
737 	cq->addr = map_one(ctrlr->endpoint->vfu_ctx, regs->acq,
738 			   cq->size * sizeof(struct spdk_nvme_cpl), cq->sg,
739 			   &cq->iov, PROT_READ | PROT_WRITE);
740 	if (cq->addr == NULL) {
741 		return -1;
742 	}
743 	memset(cq->addr, 0, cq->size * sizeof(struct spdk_nvme_cpl));
744 	cq->is_cq = true;
745 	cq->ien = true;
746 	*hdbl(ctrlr, cq) = 0;
747 
748 	return 0;
749 }
750 
751 static inline dma_sg_t *
752 vu_req_to_sg_t(struct nvmf_vfio_user_req *vu_req, uint32_t iovcnt)
753 {
754 	return (dma_sg_t *)((uintptr_t)vu_req->sg + iovcnt * dma_sg_size());
755 }
756 
757 static void *
758 _map_one(void *prv, uint64_t addr, uint64_t len, int prot)
759 {
760 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv;
761 	struct spdk_nvmf_qpair *qpair;
762 	struct nvmf_vfio_user_req *vu_req;
763 	struct nvmf_vfio_user_qpair *vu_qpair;
764 	void *ret;
765 
766 	assert(req != NULL);
767 	qpair = req->qpair;
768 	vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
769 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
770 
771 	assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS);
772 	ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len,
773 		      vu_req_to_sg_t(vu_req, vu_req->iovcnt),
774 		      &vu_req->iov[vu_req->iovcnt], prot);
775 	if (spdk_likely(ret != NULL)) {
776 		vu_req->iovcnt++;
777 	}
778 	return ret;
779 }
780 
781 static int
782 vfio_user_map_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req,
783 		  struct iovec *iov, uint32_t length)
784 {
785 	/* Map PRP list to from Guest physical memory to
786 	 * virtual memory address.
787 	 */
788 	return nvme_map_cmd(req, &req->cmd->nvme_cmd, iov, NVMF_REQ_MAX_BUFFERS,
789 			    length, 4096, _map_one);
790 }
791 
792 static struct spdk_nvmf_request *
793 get_nvmf_req(struct nvmf_vfio_user_qpair *qp);
794 
795 static int
796 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
797 	       struct spdk_nvmf_request *req);
798 
799 /*
800  * Posts a CQE in the completion queue.
801  *
802  * @ctrlr: the vfio-user controller
803  * @cmd: the NVMe command for which the completion is posted
804  * @cq: the completion queue
805  * @cdw0: cdw0 as reported by NVMf
806  * @sc: the NVMe CQE status code
807  * @sct: the NVMe CQE status code type
808  */
809 static int
810 post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
811 		struct nvme_q *cq, uint32_t cdw0, uint16_t sc,
812 		uint16_t sct)
813 {
814 	struct spdk_nvme_cpl *cpl;
815 	const struct spdk_nvmf_registers *regs;
816 	uint16_t qid;
817 	int err;
818 
819 	assert(ctrlr != NULL);
820 	assert(cmd != NULL);
821 
822 	if (spdk_unlikely(cq == NULL || cq->addr == NULL)) {
823 		return 0;
824 	}
825 
826 	qid = io_q_id(cq);
827 	regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
828 	if (regs->csts.bits.shst != SPDK_NVME_SHST_NORMAL) {
829 		SPDK_DEBUGLOG(nvmf_vfio,
830 			      "%s: ignore completion SQ%d cid=%d status=%#x\n",
831 			      ctrlr_id(ctrlr), qid, cmd->cid, sc);
832 		return 0;
833 	}
834 
835 	if (cq_is_full(ctrlr, cq)) {
836 		SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n",
837 			    ctrlr_id(ctrlr), qid, cq->tail, *hdbl(ctrlr, cq));
838 		return -1;
839 	}
840 
841 	cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail;
842 
843 	assert(ctrlr->qp[qid] != NULL);
844 	SPDK_DEBUGLOG(nvmf_vfio,
845 		      "%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n",
846 		      ctrlr_id(ctrlr), qid, cmd->cid, sc, ctrlr->qp[qid]->sq.head,
847 		      cq->tail);
848 
849 	cpl->sqhd = ctrlr->qp[qid]->sq.head;
850 	cpl->cid = cmd->cid;
851 	cpl->cdw0 = cdw0;
852 	cpl->status.dnr = 0x0;
853 	cpl->status.m = 0x0;
854 	cpl->status.sct = sct;
855 	cpl->status.p = ~cpl->status.p;
856 	cpl->status.sc = sc;
857 
858 	cq_tail_advance(cq);
859 
860 	/*
861 	 * this function now executes at SPDK thread context, we
862 	 * might be triggerring interrupts from vfio-user thread context so
863 	 * check for race conditions.
864 	 */
865 	if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) {
866 		err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv);
867 		if (err != 0) {
868 			SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n",
869 				    ctrlr_id(ctrlr));
870 			return err;
871 		}
872 	}
873 
874 	return 0;
875 }
876 
877 static struct nvme_q *
878 lookup_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, const uint16_t qid, const bool is_cq)
879 {
880 	struct nvme_q *q;
881 
882 	assert(ctrlr != NULL);
883 
884 	if (qid > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) {
885 		return NULL;
886 	}
887 
888 	if (ctrlr->qp[qid] == NULL) {
889 		return NULL;
890 	}
891 
892 	if (is_cq) {
893 		q = &ctrlr->qp[qid]->cq;
894 	} else {
895 		q = &ctrlr->qp[qid]->sq;
896 	}
897 
898 	if (q->addr == NULL) {
899 		return NULL;
900 	}
901 
902 	return q;
903 }
904 
905 static void
906 unmap_qp(struct nvmf_vfio_user_qpair *qp)
907 {
908 	struct nvmf_vfio_user_ctrlr *ctrlr;
909 
910 	if (qp->ctrlr == NULL) {
911 		return;
912 	}
913 	ctrlr = qp->ctrlr;
914 
915 	SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap QP%d\n",
916 		      ctrlr_id(ctrlr), qp->qpair.qid);
917 
918 	if (qp->sq.addr != NULL) {
919 		vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->sq.sg, &qp->sq.iov, 1);
920 		qp->sq.addr = NULL;
921 	}
922 
923 	if (qp->cq.addr != NULL) {
924 		vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, qp->cq.sg, &qp->cq.iov, 1);
925 		qp->cq.addr = NULL;
926 	}
927 }
928 
929 static void
930 free_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid)
931 {
932 	struct nvmf_vfio_user_qpair *qpair;
933 	struct nvmf_vfio_user_req *vu_req;
934 	uint32_t i;
935 
936 	if (ctrlr == NULL) {
937 		return;
938 	}
939 
940 	qpair = ctrlr->qp[qid];
941 	if (qpair == NULL) {
942 		return;
943 	}
944 
945 	SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr),
946 		      qid, qpair);
947 
948 	unmap_qp(qpair);
949 
950 	for (i = 0; i < qpair->qsize; i++) {
951 		vu_req = &qpair->reqs_internal[i];
952 		free(vu_req->sg);
953 	}
954 	free(qpair->reqs_internal);
955 
956 	free(qpair->sq.sg);
957 	free(qpair->cq.sg);
958 	free(qpair);
959 
960 	ctrlr->qp[qid] = NULL;
961 }
962 
963 /* This function can only fail because of memory allocation errors. */
964 static int
965 init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport,
966 	const uint16_t qsize, const uint16_t id)
967 {
968 	uint16_t i;
969 	struct nvmf_vfio_user_qpair *qpair;
970 	struct nvmf_vfio_user_req *vu_req, *tmp;
971 	struct spdk_nvmf_request *req;
972 
973 	assert(ctrlr != NULL);
974 	assert(transport != NULL);
975 
976 	qpair = calloc(1, sizeof(*qpair));
977 	if (qpair == NULL) {
978 		return -ENOMEM;
979 	}
980 	qpair->sq.sg = calloc(1, dma_sg_size());
981 	if (qpair->sq.sg == NULL) {
982 		free(qpair);
983 		return -ENOMEM;
984 	}
985 	qpair->cq.sg = calloc(1, dma_sg_size());
986 	if (qpair->cq.sg == NULL) {
987 		free(qpair->sq.sg);
988 		free(qpair);
989 		return -ENOMEM;
990 	}
991 
992 	qpair->qpair.qid = id;
993 	qpair->qpair.transport = transport;
994 	qpair->ctrlr = ctrlr;
995 	qpair->qsize = qsize;
996 
997 	TAILQ_INIT(&qpair->reqs);
998 
999 	qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req));
1000 	if (qpair->reqs_internal == NULL) {
1001 		SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr));
1002 		goto reqs_err;
1003 	}
1004 
1005 	for (i = 0; i < qsize; i++) {
1006 		vu_req = &qpair->reqs_internal[i];
1007 		vu_req->sg = calloc(NVMF_VFIO_USER_MAX_IOVECS, dma_sg_size());
1008 		if (vu_req->sg == NULL) {
1009 			goto sg_err;
1010 		}
1011 
1012 		req = &vu_req->req;
1013 		req->qpair = &qpair->qpair;
1014 		req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp;
1015 		req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd;
1016 
1017 		TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link);
1018 	}
1019 
1020 	ctrlr->qp[id] = qpair;
1021 	return 0;
1022 
1023 sg_err:
1024 	TAILQ_FOREACH_SAFE(vu_req, &qpair->reqs, link, tmp) {
1025 		free(vu_req->sg);
1026 	}
1027 	free(qpair->reqs_internal);
1028 
1029 reqs_err:
1030 	free(qpair->sq.sg);
1031 	free(qpair->cq.sg);
1032 	free(qpair);
1033 	return -ENOMEM;
1034 }
1035 
1036 /*
1037  * Creates a completion or sumbission I/O queue. Returns 0 on success, -errno
1038  * on error.
1039  *
1040  * XXX SPDK thread context.
1041  */
1042 static int
1043 handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr,
1044 		   struct spdk_nvme_cmd *cmd, const bool is_cq)
1045 {
1046 	size_t entry_size;
1047 	uint16_t qsize;
1048 	uint16_t sc = SPDK_NVME_SC_SUCCESS;
1049 	uint16_t sct = SPDK_NVME_SCT_GENERIC;
1050 	int err = 0;
1051 	struct nvmf_vfio_user_qpair *vu_qpair;
1052 	struct nvme_q *io_q;
1053 	int prot;
1054 
1055 	assert(ctrlr != NULL);
1056 	assert(cmd != NULL);
1057 
1058 	SPDK_DEBUGLOG(nvmf_vfio,
1059 		      "%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr),
1060 		      is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid,
1061 		      cmd->cdw10_bits.create_io_q.qsize);
1062 
1063 	if (cmd->cdw10_bits.create_io_q.qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) {
1064 		SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr),
1065 			    cmd->cdw10_bits.create_io_q.qid,
1066 			    NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR);
1067 		sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1068 		sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
1069 		goto out;
1070 	}
1071 
1072 	if (lookup_io_q(ctrlr, cmd->cdw10_bits.create_io_q.qid, is_cq)) {
1073 		SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr),
1074 			    is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid);
1075 		sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1076 		sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
1077 		goto out;
1078 	}
1079 
1080 	qsize = cmd->cdw10_bits.create_io_q.qsize + 1;
1081 	if (qsize > max_queue_size(ctrlr)) {
1082 		SPDK_ERRLOG("%s: queue too big, want=%d, max=%d\n", ctrlr_id(ctrlr),
1083 			    qsize, max_queue_size(ctrlr));
1084 		sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1085 		sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE;
1086 		goto out;
1087 	}
1088 
1089 	if (is_cq) {
1090 		err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, qsize,
1091 			      cmd->cdw10_bits.create_io_q.qid);
1092 		if (err != 0) {
1093 			sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1094 			goto out;
1095 		}
1096 
1097 		io_q = &ctrlr->qp[cmd->cdw10_bits.create_io_q.qid]->cq;
1098 		entry_size = sizeof(struct spdk_nvme_cpl);
1099 		if (cmd->cdw11_bits.create_io_cq.pc != 0x1) {
1100 			SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr));
1101 			sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF;
1102 			goto out;
1103 		}
1104 		io_q->ien = cmd->cdw11_bits.create_io_cq.ien;
1105 		io_q->iv = cmd->cdw11_bits.create_io_cq.iv;
1106 	} else {
1107 		/* CQ must be created before SQ */
1108 		if (!lookup_io_q(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) {
1109 			SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr),
1110 				    cmd->cdw11_bits.create_io_sq.cqid);
1111 			sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1112 			sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID;
1113 			goto out;
1114 		}
1115 
1116 		io_q = &ctrlr->qp[cmd->cdw10_bits.create_io_q.qid]->sq;
1117 		entry_size = sizeof(struct spdk_nvme_cmd);
1118 		if (cmd->cdw11_bits.create_io_sq.pc != 0x1) {
1119 			SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr));
1120 			sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF;
1121 			goto out;
1122 		}
1123 
1124 		io_q->cqid = cmd->cdw11_bits.create_io_sq.cqid;
1125 		SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr),
1126 			      cmd->cdw10_bits.create_io_q.qid, io_q->cqid);
1127 	}
1128 
1129 	io_q->is_cq = is_cq;
1130 	io_q->size = qsize;
1131 	prot = PROT_READ;
1132 	if (is_cq) {
1133 		prot |= PROT_WRITE;
1134 	}
1135 	io_q->addr = map_one(ctrlr->endpoint->vfu_ctx, cmd->dptr.prp.prp1,
1136 			     io_q->size * entry_size, io_q->sg, &io_q->iov, prot);
1137 	if (io_q->addr == NULL) {
1138 		sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1139 		SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr));
1140 		goto out;
1141 	}
1142 	io_q->prp1 = cmd->dptr.prp.prp1;
1143 	memset(io_q->addr, 0, io_q->size * entry_size);
1144 
1145 	SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n",
1146 		      ctrlr_id(ctrlr), is_cq ? 'C' : 'S',
1147 		      cmd->cdw10_bits.create_io_q.qid, cmd->dptr.prp.prp1,
1148 		      (unsigned long long)io_q->addr);
1149 
1150 	if (is_cq) {
1151 		*hdbl(ctrlr, io_q) = 0;
1152 	} else {
1153 		/* After we've returned here, on the next time nvmf_vfio_user_accept executes it will
1154 		 * pick up this qpair and will eventually call nvmf_vfio_user_poll_group_add which will
1155 		 * call spdk_nvmf_request_exec_fabrics with a generated fabrics connect command. That
1156 		 * will then call handle_queue_connect_rsp, which is where we ultimately complete
1157 		 * this command.
1158 		 */
1159 		vu_qpair = ctrlr->qp[cmd->cdw10_bits.create_io_q.qid];
1160 		vu_qpair->create_io_sq_cmd = *cmd;
1161 		TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, vu_qpair, link);
1162 		*tdbl(ctrlr, io_q) = 0;
1163 		return 0;
1164 	}
1165 
1166 out:
1167 	return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct);
1168 }
1169 
1170 /* For ADMIN I/O DELETE COMPLETION QUEUE the NVMf library will disconnect and free
1171  * queue pair, so save the command in a context.
1172  */
1173 struct vfio_user_delete_cq_ctx {
1174 	struct nvmf_vfio_user_ctrlr *vu_ctrlr;
1175 	struct spdk_nvme_cmd delete_io_cq_cmd;
1176 };
1177 
1178 static void
1179 vfio_user_qpair_delete_cb(void *cb_arg)
1180 {
1181 	struct vfio_user_delete_cq_ctx *ctx = cb_arg;
1182 	struct nvmf_vfio_user_ctrlr *vu_ctrlr = ctx->vu_ctrlr;
1183 
1184 	post_completion(vu_ctrlr, &ctx->delete_io_cq_cmd, &vu_ctrlr->qp[0]->cq, 0,
1185 			SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC);
1186 	free(ctx);
1187 }
1188 
1189 /*
1190  * Deletes a completion or sumbission I/O queue.
1191  */
1192 static int
1193 handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr,
1194 		struct spdk_nvme_cmd *cmd, const bool is_cq)
1195 {
1196 	uint16_t sct = SPDK_NVME_SCT_GENERIC;
1197 	uint16_t sc = SPDK_NVME_SC_SUCCESS;
1198 	struct nvmf_vfio_user_qpair *vu_qpair;
1199 	struct vfio_user_delete_cq_ctx *ctx;
1200 
1201 	SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n",
1202 		      ctrlr_id(ctrlr), is_cq ? 'C' : 'S',
1203 		      cmd->cdw10_bits.delete_io_q.qid);
1204 
1205 	if (lookup_io_q(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq) == NULL) {
1206 		SPDK_ERRLOG("%s: %cQ%d does not exist\n", ctrlr_id(ctrlr),
1207 			    is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid);
1208 		sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1209 		sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
1210 		goto out;
1211 	}
1212 
1213 	vu_qpair = ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid];
1214 	if (is_cq) {
1215 		/* SQ must have been deleted first */
1216 		if (vu_qpair->state != VFIO_USER_QPAIR_DELETED) {
1217 			SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr));
1218 			sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
1219 			sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION;
1220 			goto out;
1221 		}
1222 		ctx = calloc(1, sizeof(*ctx));
1223 		if (!ctx) {
1224 			sct = SPDK_NVME_SCT_GENERIC;
1225 			sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1226 			goto out;
1227 		}
1228 		ctx->vu_ctrlr = ctrlr;
1229 		ctx->delete_io_cq_cmd = *cmd;
1230 		spdk_nvmf_qpair_disconnect(&vu_qpair->qpair, vfio_user_qpair_delete_cb, ctx);
1231 		return 0;
1232 	} else {
1233 		/*
1234 		 * This doesn't actually delete the SQ, We're merely telling the poll_group_poll
1235 		 * function to skip checking this SQ.  The queue pair will be disconnected in Delete
1236 		 * IO CQ command.
1237 		 */
1238 		assert(vu_qpair->state == VFIO_USER_QPAIR_ACTIVE);
1239 		vu_qpair->state = VFIO_USER_QPAIR_DELETED;
1240 	}
1241 
1242 out:
1243 	return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct);
1244 }
1245 
1246 /*
1247  * Returns 0 on success and -errno on error.
1248  *
1249  * XXX SPDK thread context
1250  */
1251 static int
1252 consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd)
1253 {
1254 	assert(ctrlr != NULL);
1255 	assert(cmd != NULL);
1256 
1257 	SPDK_DEBUGLOG(nvmf_vfio, "%s: handle admin req opc=%#x cid=%d\n",
1258 		      ctrlr_id(ctrlr), cmd->opc, cmd->cid);
1259 
1260 	switch (cmd->opc) {
1261 	case SPDK_NVME_OPC_CREATE_IO_CQ:
1262 	case SPDK_NVME_OPC_CREATE_IO_SQ:
1263 		return handle_create_io_q(ctrlr, cmd,
1264 					  cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ);
1265 	case SPDK_NVME_OPC_DELETE_IO_SQ:
1266 	case SPDK_NVME_OPC_DELETE_IO_CQ:
1267 		return handle_del_io_q(ctrlr, cmd,
1268 				       cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ);
1269 	default:
1270 		return handle_cmd_req(ctrlr, cmd, get_nvmf_req(ctrlr->qp[0]));
1271 	}
1272 }
1273 
1274 static int
1275 handle_cmd_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
1276 {
1277 	struct nvmf_vfio_user_qpair *qpair = cb_arg;
1278 
1279 	assert(qpair != NULL);
1280 	assert(req != NULL);
1281 
1282 	vfu_unmap_sg(qpair->ctrlr->endpoint->vfu_ctx, req->sg, req->iov, req->iovcnt);
1283 
1284 	return post_completion(qpair->ctrlr, &req->req.cmd->nvme_cmd,
1285 			       &qpair->ctrlr->qp[req->req.qpair->qid]->cq,
1286 			       req->req.rsp->nvme_cpl.cdw0,
1287 			       req->req.rsp->nvme_cpl.status.sc,
1288 			       req->req.rsp->nvme_cpl.status.sct);
1289 }
1290 
1291 static int
1292 consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair,
1293 	    struct spdk_nvme_cmd *cmd)
1294 {
1295 	assert(qpair != NULL);
1296 	if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
1297 		return consume_admin_cmd(ctrlr, cmd);
1298 	}
1299 
1300 	return handle_cmd_req(ctrlr, cmd, get_nvmf_req(qpair));
1301 }
1302 
1303 static ssize_t
1304 handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail,
1305 		     struct nvmf_vfio_user_qpair *qpair)
1306 {
1307 	struct spdk_nvme_cmd *queue;
1308 
1309 	assert(ctrlr != NULL);
1310 	assert(qpair != NULL);
1311 
1312 	queue = qpair->sq.addr;
1313 	while (sq_head(qpair) != new_tail) {
1314 		int err;
1315 		struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)];
1316 
1317 		/*
1318 		 * SQHD must contain the new head pointer, so we must increase
1319 		 * it before we generate a completion.
1320 		 */
1321 		sqhd_advance(ctrlr, qpair);
1322 
1323 		err = consume_cmd(ctrlr, qpair, cmd);
1324 		if (err != 0) {
1325 			return err;
1326 		}
1327 	}
1328 
1329 	return 0;
1330 }
1331 
1332 static int
1333 map_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr)
1334 {
1335 	int err;
1336 
1337 	assert(ctrlr != NULL);
1338 
1339 	err = acq_map(ctrlr);
1340 	if (err != 0) {
1341 		return err;
1342 	}
1343 
1344 	err = asq_map(ctrlr);
1345 	if (err != 0) {
1346 		return err;
1347 	}
1348 
1349 	return 0;
1350 }
1351 
1352 static void
1353 unmap_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr)
1354 {
1355 	assert(ctrlr->qp[0] != NULL);
1356 
1357 	unmap_qp(ctrlr->qp[0]);
1358 }
1359 
1360 static void
1361 memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
1362 {
1363 	struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
1364 	struct nvmf_vfio_user_ctrlr *ctrlr;
1365 	struct nvmf_vfio_user_qpair *qpair;
1366 	int i, ret;
1367 
1368 	/*
1369 	 * We're not interested in any DMA regions that aren't mappable (we don't
1370 	 * support clients that don't share their memory).
1371 	 */
1372 	if (!info->vaddr) {
1373 		return;
1374 	}
1375 
1376 	if (((uintptr_t)info->mapping.iov_base & MASK_2MB) ||
1377 	    (info->mapping.iov_len & MASK_2MB)) {
1378 		SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr,
1379 			      (uintptr_t)info->mapping.iov_base,
1380 			      (uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1381 		return;
1382 	}
1383 
1384 	assert(endpoint != NULL);
1385 	if (endpoint->ctrlr == NULL) {
1386 		return;
1387 	}
1388 	ctrlr = endpoint->ctrlr;
1389 
1390 	SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n", ctrlr_id(ctrlr),
1391 		      (uintptr_t)info->mapping.iov_base,
1392 		      (uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1393 
1394 	/* VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE are enabled when registering to VFIO, here we also
1395 	 * check the protection bits before registering.
1396 	 */
1397 	if ((info->prot == (PROT_WRITE | PROT_READ)) &&
1398 	    (spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len))) {
1399 		SPDK_ERRLOG("Memory region register %#lx-%#lx failed\n",
1400 			    (uint64_t)(uintptr_t)info->mapping.iov_base,
1401 			    (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1402 	}
1403 
1404 	for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
1405 		qpair = ctrlr->qp[i];
1406 		if (qpair == NULL) {
1407 			continue;
1408 		}
1409 
1410 		if (qpair->state != VFIO_USER_QPAIR_INACTIVE) {
1411 			continue;
1412 		}
1413 
1414 		if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
1415 			ret = map_admin_queue(ctrlr);
1416 			if (ret) {
1417 				SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap Admin queue\n");
1418 				continue;
1419 			}
1420 			qpair->state = VFIO_USER_QPAIR_ACTIVE;
1421 			SPDK_DEBUGLOG(nvmf_vfio, "Remap Admin queue\n");
1422 		} else {
1423 			struct nvme_q *sq = &qpair->sq;
1424 			struct nvme_q *cq = &qpair->cq;
1425 
1426 			sq->addr = map_one(ctrlr->endpoint->vfu_ctx, sq->prp1, sq->size * 64, sq->sg, &sq->iov,
1427 					   PROT_READ | PROT_WRITE);
1428 			if (!sq->addr) {
1429 				SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n",
1430 					      i, sq->prp1, sq->prp1 + sq->size * 64);
1431 				continue;
1432 			}
1433 			cq->addr = map_one(ctrlr->endpoint->vfu_ctx, cq->prp1, cq->size * 16, cq->sg, &cq->iov,
1434 					   PROT_READ | PROT_WRITE);
1435 			if (!cq->addr) {
1436 				SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n",
1437 					      i, cq->prp1, cq->prp1 + cq->size * 16);
1438 				continue;
1439 			}
1440 			qpair->state = VFIO_USER_QPAIR_ACTIVE;
1441 			SPDK_DEBUGLOG(nvmf_vfio, "Remap IO QP%u\n", i);
1442 		}
1443 	}
1444 }
1445 
1446 static int
1447 memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
1448 {
1449 
1450 	struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
1451 	struct nvmf_vfio_user_ctrlr *ctrlr;
1452 	struct nvmf_vfio_user_qpair *qpair;
1453 	void *map_start, *map_end;
1454 	int i;
1455 
1456 	if (!info->vaddr) {
1457 		return 0;
1458 	}
1459 
1460 	if (((uintptr_t)info->mapping.iov_base & MASK_2MB) ||
1461 	    (info->mapping.iov_len & MASK_2MB)) {
1462 		SPDK_DEBUGLOG(nvmf_vfio, "Invalid memory region vaddr %p, IOVA %#lx-%#lx\n", info->vaddr,
1463 			      (uintptr_t)info->mapping.iov_base,
1464 			      (uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1465 		return 0;
1466 	}
1467 
1468 	assert(endpoint != NULL);
1469 	if (endpoint->ctrlr == NULL) {
1470 		return 0;
1471 	}
1472 	ctrlr = endpoint->ctrlr;
1473 
1474 	SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n", ctrlr_id(ctrlr),
1475 		      (uintptr_t)info->mapping.iov_base,
1476 		      (uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1477 
1478 	if ((info->prot == (PROT_WRITE | PROT_READ)) &&
1479 	    (spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len))) {
1480 		SPDK_ERRLOG("Memory region unregister %#lx-%#lx failed\n",
1481 			    (uint64_t)(uintptr_t)info->mapping.iov_base,
1482 			    (uint64_t)(uintptr_t)info->mapping.iov_base + info->mapping.iov_len);
1483 	}
1484 
1485 	map_start = info->mapping.iov_base;
1486 	map_end = info->mapping.iov_base + info->mapping.iov_len;
1487 	for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
1488 		qpair = ctrlr->qp[i];
1489 		if (qpair == NULL) {
1490 			continue;
1491 		}
1492 
1493 		if ((qpair->cq.addr >= map_start && qpair->cq.addr < map_end) ||
1494 		    (qpair->sq.addr >= map_start && qpair->sq.addr < map_end)) {
1495 			unmap_qp(qpair);
1496 			qpair->state = VFIO_USER_QPAIR_INACTIVE;
1497 		}
1498 	}
1499 
1500 	return 0;
1501 }
1502 
1503 static int
1504 nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
1505 {
1506 	struct nvmf_vfio_user_qpair *vu_qpair = cb_arg;
1507 	struct nvmf_vfio_user_ctrlr *vu_ctrlr;
1508 	bool unmap_admin = false;
1509 	int ret;
1510 
1511 	assert(vu_qpair != NULL);
1512 	assert(req != NULL);
1513 
1514 	if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) {
1515 		assert(vu_qpair->ctrlr != NULL);
1516 		assert(req != NULL);
1517 
1518 		memcpy(req->req.data,
1519 		       &req->req.rsp->prop_get_rsp.value.u64,
1520 		       req->req.length);
1521 	} else {
1522 		assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET);
1523 		assert(vu_qpair->ctrlr != NULL);
1524 		vu_ctrlr = vu_qpair->ctrlr;
1525 
1526 		if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) {
1527 			union spdk_nvme_cc_register cc, diff;
1528 
1529 			cc.raw = req->req.cmd->prop_set_cmd.value.u64;
1530 			diff.raw = cc.raw ^ req->cc.raw;
1531 
1532 			if (diff.bits.en) {
1533 				if (cc.bits.en) {
1534 					SPDK_DEBUGLOG(nvmf_vfio, "%s: MAP Admin queue\n", ctrlr_id(vu_ctrlr));
1535 					ret = map_admin_queue(vu_ctrlr);
1536 					if (ret) {
1537 						SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(vu_ctrlr));
1538 						return ret;
1539 					}
1540 					vu_qpair->state = VFIO_USER_QPAIR_ACTIVE;
1541 				} else {
1542 					unmap_admin = true;
1543 				}
1544 			}
1545 
1546 			if (diff.bits.shn) {
1547 				if (cc.bits.shn == SPDK_NVME_SHN_NORMAL || cc.bits.shn == SPDK_NVME_SHN_ABRUPT) {
1548 					unmap_admin = true;
1549 				}
1550 			}
1551 
1552 			if (unmap_admin) {
1553 				SPDK_DEBUGLOG(nvmf_vfio,
1554 					      "%s: UNMAP Admin queue\n",
1555 					      ctrlr_id(vu_ctrlr));
1556 				unmap_admin_queue(vu_ctrlr);
1557 				vu_qpair->state = VFIO_USER_QPAIR_INACTIVE;
1558 				/* For PCIe controller reset or shutdown, we will drop all AER responses */
1559 				nvmf_ctrlr_abort_aer(vu_qpair->qpair.ctrlr);
1560 			}
1561 		}
1562 	}
1563 
1564 	return 0;
1565 }
1566 
1567 static int
1568 handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf,
1569 		  const size_t count, loff_t pos, const bool is_write)
1570 {
1571 	assert(ctrlr != NULL);
1572 	assert(buf != NULL);
1573 
1574 	if (count != sizeof(uint32_t)) {
1575 		SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n",
1576 			    ctrlr_id(ctrlr), count);
1577 		errno = EINVAL;
1578 		return -1;
1579 	}
1580 
1581 	pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET;
1582 
1583 	/* pos must be dword aligned */
1584 	if ((pos & 0x3) != 0) {
1585 		SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos);
1586 		errno = EINVAL;
1587 		return -1;
1588 	}
1589 
1590 	/* convert byte offset to array index */
1591 	pos >>= 2;
1592 
1593 	if (pos > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) {
1594 		SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos);
1595 		errno = EINVAL;
1596 		return -1;
1597 	}
1598 
1599 	if (is_write) {
1600 		ctrlr->doorbells[pos] = *buf;
1601 		spdk_wmb();
1602 	} else {
1603 		spdk_rmb();
1604 		*buf = ctrlr->doorbells[pos];
1605 	}
1606 	return 0;
1607 }
1608 
1609 static ssize_t
1610 access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos,
1611 	       bool is_write)
1612 {
1613 	struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
1614 	struct nvmf_vfio_user_ctrlr *ctrlr;
1615 	struct nvmf_vfio_user_req *req;
1616 	const struct spdk_nvmf_registers *regs;
1617 	int ret;
1618 
1619 	ctrlr = endpoint->ctrlr;
1620 
1621 	SPDK_DEBUGLOG(nvmf_vfio,
1622 		      "%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n",
1623 		      endpoint_id(endpoint), is_write ? "write" : "read",
1624 		      ctrlr, count, pos);
1625 
1626 	if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) {
1627 		/*
1628 		 * The fact that the doorbells can be memory mapped doesn't mean
1629 		 * that the client (VFIO in QEMU) is obliged to memory map them,
1630 		 * it might still elect to access them via regular read/write;
1631 		 * we might also have had disable_mappable_bar0 set.
1632 		 */
1633 		ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count,
1634 					pos, is_write);
1635 		if (ret == 0) {
1636 			return count;
1637 		}
1638 		return ret;
1639 	}
1640 
1641 	/* Construct a Fabric Property Get/Set command and send it */
1642 	req = get_nvmf_vfio_user_req(ctrlr->qp[0]);
1643 	if (req == NULL) {
1644 		errno = ENOBUFS;
1645 		return -1;
1646 	}
1647 	regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
1648 	req->cc.raw = regs->cc.raw;
1649 
1650 	req->cb_fn = nvmf_vfio_user_prop_req_rsp;
1651 	req->cb_arg = ctrlr->qp[0];
1652 	req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC;
1653 	req->req.cmd->prop_set_cmd.cid = 0;
1654 	req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1;
1655 	req->req.cmd->prop_set_cmd.ofst = pos;
1656 	if (is_write) {
1657 		req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET;
1658 		if (req->req.cmd->prop_set_cmd.attrib.size) {
1659 			req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf;
1660 		} else {
1661 			req->req.cmd->prop_set_cmd.value.u32.high = 0;
1662 			req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf;
1663 		}
1664 	} else {
1665 		req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET;
1666 	}
1667 	req->req.length = count;
1668 	req->req.data = buf;
1669 
1670 	spdk_nvmf_request_exec_fabrics(&req->req);
1671 
1672 	return count;
1673 }
1674 
1675 /*
1676  * NVMe driver reads 4096 bytes, which is the extended PCI configuration space
1677  * available on PCI-X 2.0 and PCI Express buses
1678  */
1679 static ssize_t
1680 access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset,
1681 		  bool is_write)
1682 {
1683 	struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
1684 
1685 	if (is_write) {
1686 		SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n",
1687 			    endpoint_id(endpoint), offset, offset + count);
1688 		errno = EINVAL;
1689 		return -1;
1690 	}
1691 
1692 	if (offset + count > PCI_CFG_SPACE_EXP_SIZE) {
1693 		SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n",
1694 			    endpoint_id(endpoint), offset, count,
1695 			    PCI_CFG_SPACE_EXP_SIZE);
1696 		errno = ERANGE;
1697 		return -1;
1698 	}
1699 
1700 	memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count);
1701 
1702 	return count;
1703 }
1704 
1705 static void
1706 vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg)
1707 {
1708 	struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
1709 
1710 	if (level >= LOG_DEBUG) {
1711 		SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg);
1712 	} else if (level >= LOG_INFO) {
1713 		SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg);
1714 	} else if (level >= LOG_NOTICE) {
1715 		SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg);
1716 	} else if (level >= LOG_WARNING) {
1717 		SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg);
1718 	} else {
1719 		SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg);
1720 	}
1721 }
1722 
1723 static void
1724 init_pci_config_space(vfu_pci_config_space_t *p)
1725 {
1726 	/* MLBAR */
1727 	p->hdr.bars[0].raw = 0x0;
1728 	/* MUBAR */
1729 	p->hdr.bars[1].raw = 0x0;
1730 
1731 	/* vendor specific, let's set them to zero for now */
1732 	p->hdr.bars[3].raw = 0x0;
1733 	p->hdr.bars[4].raw = 0x0;
1734 	p->hdr.bars[5].raw = 0x0;
1735 
1736 	/* enable INTx */
1737 	p->hdr.intr.ipin = 0x1;
1738 }
1739 
1740 static int
1741 vfio_user_dev_info_fill(struct nvmf_vfio_user_transport *vu_transport,
1742 			struct nvmf_vfio_user_endpoint *endpoint)
1743 {
1744 	int ret;
1745 	ssize_t cap_offset;
1746 	vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx;
1747 
1748 	struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 };
1749 	struct pxcap pxcap = {
1750 		.hdr.id = PCI_CAP_ID_EXP,
1751 		.pxcaps.ver = 0x2,
1752 		.pxdcap = {.rer = 0x1, .flrc = 0x1},
1753 		.pxdcap2.ctds = 0x1
1754 	};
1755 
1756 	struct msixcap msixcap = {
1757 		.hdr.id = PCI_CAP_ID_MSIX,
1758 		.mxc.ts = NVME_IRQ_MSIX_NUM - 1,
1759 		.mtab = {.tbir = 0x4, .to = 0x0},
1760 		.mpba = {.pbir = 0x5, .pbao = 0x0}
1761 	};
1762 
1763 	static struct iovec sparse_mmap[] = {
1764 		{
1765 			.iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET,
1766 			.iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE,
1767 		},
1768 	};
1769 
1770 	ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0);
1771 	if (ret < 0) {
1772 		SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx);
1773 		return ret;
1774 	}
1775 	vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0);
1776 	/*
1777 	 * 0x02, controller uses the NVM Express programming interface
1778 	 * 0x08, non-volatile memory controller
1779 	 * 0x01, mass storage controller
1780 	 */
1781 	vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02);
1782 
1783 	cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap);
1784 	if (cap_offset < 0) {
1785 		SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx);
1786 		return ret;
1787 	}
1788 
1789 	cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap);
1790 	if (cap_offset < 0) {
1791 		SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx);
1792 		return ret;
1793 	}
1794 
1795 	cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap);
1796 	if (cap_offset < 0) {
1797 		SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx);
1798 		return ret;
1799 	}
1800 
1801 	ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE,
1802 			       access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
1803 	if (ret < 0) {
1804 		SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx);
1805 		return ret;
1806 	}
1807 
1808 	if (vu_transport->transport_opts.disable_mappable_bar0) {
1809 		ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE,
1810 				       access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM,
1811 				       NULL, 0, -1, 0);
1812 	} else {
1813 		ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE,
1814 				       access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM,
1815 				       sparse_mmap, 1, endpoint->fd, 0);
1816 	}
1817 
1818 	if (ret < 0) {
1819 		SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx);
1820 		return ret;
1821 	}
1822 
1823 	ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE,
1824 			       NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
1825 	if (ret < 0) {
1826 		SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx);
1827 		return ret;
1828 	}
1829 
1830 	ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE,
1831 			       NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
1832 	if (ret < 0) {
1833 		SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx);
1834 		return ret;
1835 	}
1836 
1837 	ret = vfu_setup_device_dma(vfu_ctx, memory_region_add_cb, memory_region_remove_cb);
1838 	if (ret < 0) {
1839 		SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx);
1840 		return ret;
1841 	}
1842 
1843 	ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
1844 	if (ret < 0) {
1845 		SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx);
1846 		return ret;
1847 	}
1848 
1849 	ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM);
1850 	if (ret < 0) {
1851 		SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx);
1852 		return ret;
1853 	}
1854 
1855 	ret = vfu_realize_ctx(vfu_ctx);
1856 	if (ret < 0) {
1857 		SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx);
1858 		return ret;
1859 	}
1860 
1861 	endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx);
1862 	assert(endpoint->pci_config_space != NULL);
1863 	init_pci_config_space(endpoint->pci_config_space);
1864 
1865 	assert(cap_offset != 0);
1866 	endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset);
1867 
1868 	return 0;
1869 }
1870 
1871 static void
1872 _free_ctrlr(void *ctx)
1873 {
1874 	struct nvmf_vfio_user_ctrlr *ctrlr = ctx;
1875 	int i;
1876 
1877 	for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
1878 		free_qp(ctrlr, i);
1879 	}
1880 
1881 	spdk_poller_unregister(&ctrlr->mmio_poller);
1882 	free(ctrlr);
1883 }
1884 
1885 static void
1886 free_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
1887 {
1888 	assert(ctrlr != NULL);
1889 
1890 	SPDK_DEBUGLOG(nvmf_vfio, "free %s\n", ctrlr_id(ctrlr));
1891 
1892 	if (ctrlr->thread == spdk_get_thread()) {
1893 		_free_ctrlr(ctrlr);
1894 	} else {
1895 		spdk_thread_send_msg(ctrlr->thread, _free_ctrlr, ctrlr);
1896 	}
1897 }
1898 
1899 static void
1900 nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport,
1901 			    struct nvmf_vfio_user_endpoint *endpoint)
1902 {
1903 	struct nvmf_vfio_user_ctrlr *ctrlr;
1904 	int err;
1905 
1906 	/* First, construct a vfio-user CUSTOM transport controller */
1907 	ctrlr = calloc(1, sizeof(*ctrlr));
1908 	if (ctrlr == NULL) {
1909 		err = -ENOMEM;
1910 		goto out;
1911 	}
1912 	ctrlr->cntlid = 0xffff;
1913 	ctrlr->transport = transport;
1914 	ctrlr->endpoint = endpoint;
1915 	ctrlr->doorbells = endpoint->doorbells;
1916 
1917 	/* Then, construct an admin queue pair */
1918 	err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0);
1919 	if (err != 0) {
1920 		goto out;
1921 	}
1922 	endpoint->ctrlr = ctrlr;
1923 
1924 	/* Notify the generic layer about the new admin queue pair */
1925 	TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[0], link);
1926 
1927 out:
1928 	if (err != 0) {
1929 		SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n",
1930 			    endpoint_id(endpoint), strerror(-err));
1931 		free_ctrlr(ctrlr);
1932 	}
1933 }
1934 
1935 static int
1936 nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport,
1937 		      const struct spdk_nvme_transport_id *trid,
1938 		      struct spdk_nvmf_listen_opts *listen_opts)
1939 {
1940 	struct nvmf_vfio_user_transport *vu_transport;
1941 	struct nvmf_vfio_user_endpoint *endpoint, *tmp;
1942 	char *path = NULL;
1943 	char uuid[PATH_MAX] = {};
1944 	int fd;
1945 	int err;
1946 
1947 	vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
1948 					transport);
1949 
1950 	TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
1951 		/* Only compare traddr */
1952 		if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) {
1953 			return -EEXIST;
1954 		}
1955 	}
1956 
1957 	endpoint = calloc(1, sizeof(*endpoint));
1958 	if (!endpoint) {
1959 		return -ENOMEM;
1960 	}
1961 
1962 	endpoint->fd = -1;
1963 	memcpy(&endpoint->trid, trid, sizeof(endpoint->trid));
1964 
1965 	err = asprintf(&path, "%s/bar0", endpoint_id(endpoint));
1966 	if (err == -1) {
1967 		goto out;
1968 	}
1969 
1970 	fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
1971 	if (fd == -1) {
1972 		SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n",
1973 			    endpoint_id(endpoint), path);
1974 		err = fd;
1975 		free(path);
1976 		goto out;
1977 	}
1978 	free(path);
1979 
1980 	endpoint->fd = fd;
1981 	err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE);
1982 	if (err != 0) {
1983 		goto out;
1984 	}
1985 
1986 	endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE,
1987 				   PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET);
1988 	if (endpoint->doorbells == MAP_FAILED) {
1989 		endpoint->doorbells = NULL;
1990 		err = -errno;
1991 		goto out;
1992 	}
1993 
1994 	snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint));
1995 
1996 	endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB,
1997 					   endpoint, VFU_DEV_TYPE_PCI);
1998 	if (endpoint->vfu_ctx == NULL) {
1999 		SPDK_ERRLOG("%s: error creating libmuser context: %m\n",
2000 			    endpoint_id(endpoint));
2001 		err = -1;
2002 		goto out;
2003 	}
2004 	vfu_setup_log(endpoint->vfu_ctx, vfio_user_log, LOG_DEBUG);
2005 
2006 	err = vfio_user_dev_info_fill(vu_transport, endpoint);
2007 	if (err < 0) {
2008 		goto out;
2009 	}
2010 
2011 	pthread_mutex_init(&endpoint->lock, NULL);
2012 	TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link);
2013 	SPDK_DEBUGLOG(nvmf_vfio, "%s: doorbells %p\n", uuid, endpoint->doorbells);
2014 
2015 out:
2016 	if (err != 0) {
2017 		nvmf_vfio_user_destroy_endpoint(endpoint);
2018 	}
2019 
2020 	return err;
2021 }
2022 
2023 static void
2024 nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport,
2025 			   const struct spdk_nvme_transport_id *trid)
2026 {
2027 	struct nvmf_vfio_user_transport *vu_transport;
2028 	struct nvmf_vfio_user_endpoint *endpoint, *tmp;
2029 
2030 	assert(trid != NULL);
2031 	assert(trid->traddr != NULL);
2032 
2033 	SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr);
2034 
2035 	vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
2036 					transport);
2037 
2038 	pthread_mutex_lock(&vu_transport->lock);
2039 	TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
2040 		if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) {
2041 			TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link);
2042 			if (endpoint->ctrlr) {
2043 				free_ctrlr(endpoint->ctrlr);
2044 			}
2045 			nvmf_vfio_user_destroy_endpoint(endpoint);
2046 			pthread_mutex_unlock(&vu_transport->lock);
2047 
2048 			return;
2049 		}
2050 	}
2051 	pthread_mutex_unlock(&vu_transport->lock);
2052 
2053 	SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr);
2054 }
2055 
2056 static void
2057 nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport,
2058 			  struct spdk_nvmf_subsystem *subsystem,
2059 			  struct spdk_nvmf_ctrlr_data *cdata)
2060 {
2061 	memset(&cdata->sgls, 0, sizeof(struct spdk_nvme_cdata_sgls));
2062 	cdata->sgls.supported = SPDK_NVME_SGLS_SUPPORTED_DWORD_ALIGNED;
2063 }
2064 
2065 static int
2066 nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport,
2067 				const struct spdk_nvmf_subsystem *subsystem,
2068 				const struct spdk_nvme_transport_id *trid)
2069 {
2070 	struct nvmf_vfio_user_transport *vu_transport;
2071 	struct nvmf_vfio_user_endpoint *endpoint;
2072 
2073 	vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport);
2074 
2075 	TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) {
2076 		if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) {
2077 			break;
2078 		}
2079 	}
2080 
2081 	if (endpoint == NULL) {
2082 		return -ENOENT;
2083 	}
2084 
2085 	endpoint->subsystem = subsystem;
2086 
2087 	return 0;
2088 }
2089 
2090 /*
2091  * Executed periodically.
2092  *
2093  * XXX SPDK thread context.
2094  */
2095 static uint32_t
2096 nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport)
2097 {
2098 	int err;
2099 	struct nvmf_vfio_user_transport *vu_transport;
2100 	struct nvmf_vfio_user_qpair *qp, *tmp_qp;
2101 	struct nvmf_vfio_user_endpoint *endpoint;
2102 
2103 	vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
2104 					transport);
2105 
2106 	pthread_mutex_lock(&vu_transport->lock);
2107 
2108 	TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) {
2109 		/* try to attach a new controller  */
2110 		if (endpoint->ctrlr != NULL) {
2111 			continue;
2112 		}
2113 
2114 		err = vfu_attach_ctx(endpoint->vfu_ctx);
2115 		if (err != 0) {
2116 			if (errno == EAGAIN || errno == EWOULDBLOCK) {
2117 				continue;
2118 			}
2119 
2120 			pthread_mutex_unlock(&vu_transport->lock);
2121 			return -EFAULT;
2122 		}
2123 
2124 		/* Construct a controller */
2125 		nvmf_vfio_user_create_ctrlr(vu_transport, endpoint);
2126 	}
2127 
2128 	TAILQ_FOREACH_SAFE(qp, &vu_transport->new_qps, link, tmp_qp) {
2129 		TAILQ_REMOVE(&vu_transport->new_qps, qp, link);
2130 		spdk_nvmf_tgt_new_qpair(transport->tgt, &qp->qpair);
2131 	}
2132 
2133 	pthread_mutex_unlock(&vu_transport->lock);
2134 
2135 	return 0;
2136 }
2137 
2138 static void
2139 nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport,
2140 			struct spdk_nvme_transport_id *trid,
2141 			struct spdk_nvmf_discovery_log_page_entry *entry)
2142 { }
2143 
2144 static struct spdk_nvmf_transport_poll_group *
2145 nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport)
2146 {
2147 	struct nvmf_vfio_user_poll_group *vu_group;
2148 
2149 	SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n");
2150 
2151 	vu_group = calloc(1, sizeof(*vu_group));
2152 	if (vu_group == NULL) {
2153 		SPDK_ERRLOG("Error allocating poll group: %m");
2154 		return NULL;
2155 	}
2156 
2157 	TAILQ_INIT(&vu_group->qps);
2158 
2159 	return &vu_group->group;
2160 }
2161 
2162 /* called when process exits */
2163 static void
2164 nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
2165 {
2166 	struct nvmf_vfio_user_poll_group *vu_group;
2167 
2168 	SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n");
2169 
2170 	vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
2171 
2172 	free(vu_group);
2173 }
2174 
2175 static void
2176 vfio_user_qpair_disconnect_cb(void *ctx)
2177 {
2178 	struct nvmf_vfio_user_endpoint *endpoint = ctx;
2179 	struct nvmf_vfio_user_ctrlr *ctrlr;
2180 
2181 	pthread_mutex_lock(&endpoint->lock);
2182 	ctrlr = endpoint->ctrlr;
2183 	if (!ctrlr) {
2184 		pthread_mutex_unlock(&endpoint->lock);
2185 		return;
2186 	}
2187 
2188 	if (!ctrlr->num_connected_qps) {
2189 		endpoint->ctrlr = NULL;
2190 		free_ctrlr(ctrlr);
2191 		pthread_mutex_unlock(&endpoint->lock);
2192 		return;
2193 	}
2194 	pthread_mutex_unlock(&endpoint->lock);
2195 }
2196 
2197 static int
2198 vfio_user_destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
2199 {
2200 	uint32_t i;
2201 	struct nvmf_vfio_user_qpair *qpair;
2202 	struct nvmf_vfio_user_endpoint *endpoint;
2203 
2204 	SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr));
2205 
2206 	endpoint = ctrlr->endpoint;
2207 	assert(endpoint != NULL);
2208 
2209 	pthread_mutex_lock(&endpoint->lock);
2210 	if (ctrlr->num_connected_qps == 0) {
2211 		endpoint->ctrlr = NULL;
2212 		free_ctrlr(ctrlr);
2213 		pthread_mutex_unlock(&endpoint->lock);
2214 		return 0;
2215 	}
2216 	pthread_mutex_unlock(&endpoint->lock);
2217 
2218 	for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
2219 		qpair = ctrlr->qp[i];
2220 		if (qpair == NULL) {
2221 			continue;
2222 		}
2223 		spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint);
2224 	}
2225 
2226 	return 0;
2227 }
2228 
2229 static int
2230 vfio_user_poll_mmio(void *ctx)
2231 {
2232 	struct nvmf_vfio_user_ctrlr *ctrlr = ctx;
2233 	int ret;
2234 
2235 	assert(ctrlr != NULL);
2236 
2237 	/* This will call access_bar0_fn() if there are any writes
2238 	 * to the portion of the BAR that is not mmap'd */
2239 	ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx);
2240 	if (spdk_unlikely(ret != 0)) {
2241 		spdk_poller_unregister(&ctrlr->mmio_poller);
2242 
2243 		/* initiator shutdown or reset, waiting for another re-connect */
2244 		if (errno == ENOTCONN) {
2245 			vfio_user_destroy_ctrlr(ctrlr);
2246 			return SPDK_POLLER_BUSY;
2247 		}
2248 
2249 		fail_ctrlr(ctrlr);
2250 	}
2251 
2252 	return SPDK_POLLER_BUSY;
2253 }
2254 
2255 static int
2256 handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
2257 {
2258 	struct nvmf_vfio_user_poll_group *vu_group;
2259 	struct nvmf_vfio_user_qpair *qpair = cb_arg;
2260 	struct nvmf_vfio_user_ctrlr *ctrlr;
2261 	struct nvmf_vfio_user_endpoint *endpoint;
2262 
2263 	assert(qpair != NULL);
2264 	assert(req != NULL);
2265 
2266 	ctrlr = qpair->ctrlr;
2267 	endpoint = ctrlr->endpoint;
2268 	assert(ctrlr != NULL);
2269 	assert(endpoint != NULL);
2270 
2271 	if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) {
2272 		SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct);
2273 		endpoint->ctrlr = NULL;
2274 		free_ctrlr(ctrlr);
2275 		return -1;
2276 	}
2277 
2278 	vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group);
2279 	TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link);
2280 	qpair->state = VFIO_USER_QPAIR_ACTIVE;
2281 
2282 	pthread_mutex_lock(&endpoint->lock);
2283 	if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
2284 		ctrlr->cntlid = qpair->qpair.ctrlr->cntlid;
2285 		ctrlr->thread = spdk_get_thread();
2286 		ctrlr->mmio_poller = SPDK_POLLER_REGISTER(vfio_user_poll_mmio, ctrlr, 0);
2287 	} else {
2288 		/* For I/O queues this command was generated in response to an
2289 		 * ADMIN I/O CREATE SUBMISSION QUEUE command which has not yet
2290 		 * been completed. Complete it now.
2291 		 */
2292 		post_completion(ctrlr, &qpair->create_io_sq_cmd, &ctrlr->qp[0]->cq, 0,
2293 				SPDK_NVME_SC_SUCCESS, SPDK_NVME_SCT_GENERIC);
2294 	}
2295 	ctrlr->num_connected_qps++;
2296 	pthread_mutex_unlock(&endpoint->lock);
2297 
2298 	free(req->req.data);
2299 	req->req.data = NULL;
2300 
2301 	return 0;
2302 }
2303 
2304 /*
2305  * Add the given qpair to the given poll group. New qpairs are added to
2306  * ->new_qps; they are processed via nvmf_vfio_user_accept(), calling
2307  * spdk_nvmf_tgt_new_qpair(), which picks a poll group, then calls back
2308  * here via nvmf_transport_poll_group_add().
2309  */
2310 static int
2311 nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
2312 			      struct spdk_nvmf_qpair *qpair)
2313 {
2314 	struct nvmf_vfio_user_qpair *vu_qpair;
2315 	struct nvmf_vfio_user_req *vu_req;
2316 	struct nvmf_vfio_user_ctrlr *ctrlr;
2317 	struct spdk_nvmf_request *req;
2318 	struct spdk_nvmf_fabric_connect_data *data;
2319 	bool admin;
2320 
2321 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2322 	vu_qpair->group = group;
2323 	ctrlr = vu_qpair->ctrlr;
2324 
2325 	SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n",
2326 		      ctrlr_id(ctrlr), vu_qpair->qpair.qid,
2327 		      vu_qpair, qpair, group);
2328 
2329 	admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair);
2330 
2331 	vu_req = get_nvmf_vfio_user_req(vu_qpair);
2332 	if (vu_req == NULL) {
2333 		return -1;
2334 	}
2335 
2336 	req = &vu_req->req;
2337 	req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC;
2338 	req->cmd->connect_cmd.cid = 0;
2339 	req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT;
2340 	req->cmd->connect_cmd.recfmt = 0;
2341 	req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1;
2342 	req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid;
2343 
2344 	req->length = sizeof(struct spdk_nvmf_fabric_connect_data);
2345 	req->data = calloc(1, req->length);
2346 	if (req->data == NULL) {
2347 		nvmf_vfio_user_req_free(req);
2348 		return -ENOMEM;
2349 	}
2350 
2351 	data = (struct spdk_nvmf_fabric_connect_data *)req->data;
2352 	data->cntlid = admin ? 0xFFFF : ctrlr->cntlid;
2353 	snprintf(data->subnqn, sizeof(data->subnqn), "%s",
2354 		 spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem));
2355 
2356 	vu_req->cb_fn = handle_queue_connect_rsp;
2357 	vu_req->cb_arg = vu_qpair;
2358 
2359 	SPDK_DEBUGLOG(nvmf_vfio,
2360 		      "%s: sending connect fabrics command for QID=%#x cntlid=%#x\n",
2361 		      ctrlr_id(ctrlr), qpair->qid, data->cntlid);
2362 
2363 	spdk_nvmf_request_exec_fabrics(req);
2364 	return 0;
2365 }
2366 
2367 static int
2368 nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
2369 				 struct spdk_nvmf_qpair *qpair)
2370 {
2371 	struct nvmf_vfio_user_qpair *vu_qpair;
2372 	struct nvmf_vfio_user_ctrlr *vu_ctrlr;
2373 	struct nvmf_vfio_user_endpoint *endpoint;
2374 	struct nvmf_vfio_user_poll_group *vu_group;
2375 
2376 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2377 	vu_ctrlr = vu_qpair->ctrlr;
2378 	endpoint = vu_ctrlr->endpoint;
2379 
2380 	SPDK_DEBUGLOG(nvmf_vfio,
2381 		      "%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n",
2382 		      ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group);
2383 
2384 
2385 	vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
2386 	TAILQ_REMOVE(&vu_group->qps, vu_qpair, link);
2387 
2388 	pthread_mutex_lock(&endpoint->lock);
2389 	assert(vu_ctrlr->num_connected_qps);
2390 	vu_ctrlr->num_connected_qps--;
2391 	pthread_mutex_unlock(&endpoint->lock);
2392 
2393 	return 0;
2394 }
2395 
2396 static void
2397 _nvmf_vfio_user_req_free(struct nvmf_vfio_user_qpair *vu_qpair, struct nvmf_vfio_user_req *vu_req)
2398 {
2399 	memset(&vu_req->cmd, 0, sizeof(vu_req->cmd));
2400 	memset(&vu_req->rsp, 0, sizeof(vu_req->rsp));
2401 	vu_req->iovcnt = 0;
2402 	vu_req->state = VFIO_USER_REQUEST_STATE_FREE;
2403 
2404 	TAILQ_INSERT_TAIL(&vu_qpair->reqs, vu_req, link);
2405 }
2406 
2407 static int
2408 nvmf_vfio_user_req_free(struct spdk_nvmf_request *req)
2409 {
2410 	struct nvmf_vfio_user_qpair *vu_qpair;
2411 	struct nvmf_vfio_user_req *vu_req;
2412 
2413 	assert(req != NULL);
2414 
2415 	vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
2416 	vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair);
2417 
2418 	_nvmf_vfio_user_req_free(vu_qpair, vu_req);
2419 
2420 	return 0;
2421 }
2422 
2423 static int
2424 nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req)
2425 {
2426 	struct nvmf_vfio_user_qpair *vu_qpair;
2427 	struct nvmf_vfio_user_req *vu_req;
2428 
2429 	assert(req != NULL);
2430 
2431 	vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
2432 	vu_qpair = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair);
2433 
2434 	if (vu_req->cb_fn != NULL) {
2435 		if (vu_req->cb_fn(vu_req, vu_req->cb_arg) != 0) {
2436 			fail_ctrlr(vu_qpair->ctrlr);
2437 		}
2438 	}
2439 
2440 	_nvmf_vfio_user_req_free(vu_qpair, vu_req);
2441 
2442 	return 0;
2443 }
2444 
2445 static void
2446 nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair,
2447 			   spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg)
2448 {
2449 	struct nvmf_vfio_user_qpair *vu_qpair;
2450 
2451 	assert(qpair != NULL);
2452 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2453 	free_qp(vu_qpair->ctrlr, qpair->qid);
2454 
2455 	if (cb_fn) {
2456 		cb_fn(cb_arg);
2457 	}
2458 }
2459 
2460 /**
2461  * Returns a preallocated spdk_nvmf_request or NULL if there isn't one available.
2462  */
2463 static struct nvmf_vfio_user_req *
2464 get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair)
2465 {
2466 	struct nvmf_vfio_user_req *req;
2467 
2468 	assert(qpair != NULL);
2469 
2470 	if (TAILQ_EMPTY(&qpair->reqs)) {
2471 		return NULL;
2472 	}
2473 
2474 	req = TAILQ_FIRST(&qpair->reqs);
2475 	TAILQ_REMOVE(&qpair->reqs, req, link);
2476 
2477 	return req;
2478 }
2479 
2480 static struct spdk_nvmf_request *
2481 get_nvmf_req(struct nvmf_vfio_user_qpair *qpair)
2482 {
2483 	struct nvmf_vfio_user_req *req = get_nvmf_vfio_user_req(qpair);
2484 
2485 	if (req == NULL) {
2486 		return NULL;
2487 	}
2488 	return &req->req;
2489 }
2490 
2491 static int
2492 get_nvmf_io_req_length(struct spdk_nvmf_request *req)
2493 {
2494 	uint16_t nlb, nr;
2495 	uint32_t nsid;
2496 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2497 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2498 	struct spdk_nvmf_ns *ns;
2499 
2500 	nsid = cmd->nsid;
2501 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2502 	if (ns == NULL || ns->bdev == NULL) {
2503 		SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid);
2504 		return -EINVAL;
2505 	}
2506 
2507 	if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
2508 		nr = cmd->cdw10_bits.dsm.nr + 1;
2509 		return nr * sizeof(struct spdk_nvme_dsm_range);
2510 	}
2511 
2512 	nlb = (cmd->cdw12 & 0x0000ffffu) + 1;
2513 	return nlb * spdk_bdev_get_block_size(ns->bdev);
2514 }
2515 
2516 static int
2517 map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req)
2518 {
2519 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2520 	uint32_t len = 0;
2521 	int iovcnt;
2522 
2523 	req->xfer = cmd->opc & 0x3;
2524 	req->length = 0;
2525 	req->data = NULL;
2526 
2527 	switch (cmd->opc) {
2528 	case SPDK_NVME_OPC_IDENTIFY:
2529 		len = 4096; /* TODO: there should be a define somewhere for this */
2530 		break;
2531 	case SPDK_NVME_OPC_GET_LOG_PAGE:
2532 		len = (cmd->cdw10_bits.get_log_page.numdl + 1) * 4;
2533 		break;
2534 	}
2535 
2536 	if (!cmd->dptr.prp.prp1 || !len) {
2537 		return 0;
2538 	}
2539 	/* ADMIN command will not use SGL */
2540 	assert(req->cmd->nvme_cmd.psdt == 0);
2541 	iovcnt = vfio_user_map_cmd(ctrlr, req, req->iov, len);
2542 	if (iovcnt < 0) {
2543 		SPDK_ERRLOG("%s: map Admin Opc %x failed\n",
2544 			    ctrlr_id(ctrlr), cmd->opc);
2545 		return -1;
2546 	}
2547 
2548 	req->length = len;
2549 	req->data = req->iov[0].iov_base;
2550 
2551 	return 0;
2552 }
2553 
2554 /*
2555  * Handles an I/O command.
2556  *
2557  * Returns 0 on success and -errno on failure. Sets @submit on whether or not
2558  * the request must be forwarded to NVMf.
2559  */
2560 static int
2561 map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req)
2562 {
2563 	int err = 0;
2564 	struct spdk_nvme_cmd *cmd;
2565 
2566 	assert(ctrlr != NULL);
2567 	assert(req != NULL);
2568 
2569 	cmd = &req->cmd->nvme_cmd;
2570 	req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
2571 
2572 	if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) {
2573 		return 0;
2574 	}
2575 
2576 	err = get_nvmf_io_req_length(req);
2577 	if (err < 0) {
2578 		return -EINVAL;
2579 	}
2580 
2581 	req->length = err;
2582 	err = vfio_user_map_cmd(ctrlr, req, req->iov, req->length);
2583 	if (err < 0) {
2584 		SPDK_ERRLOG("%s: failed to map IO OPC %u\n", ctrlr_id(ctrlr), cmd->opc);
2585 		return -EFAULT;
2586 	}
2587 
2588 	req->data = req->iov[0].iov_base;
2589 	req->iovcnt = err;
2590 
2591 	return 0;
2592 }
2593 
2594 static int
2595 handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
2596 	       struct spdk_nvmf_request *req)
2597 {
2598 	int err;
2599 	struct nvmf_vfio_user_req *vu_req;
2600 
2601 	assert(ctrlr != NULL);
2602 	assert(cmd != NULL);
2603 
2604 	/*
2605 	 * TODO: this means that there are no free requests available,
2606 	 * returning -1 will fail the controller. Theoretically this error can
2607 	 * be avoided completely by ensuring we have as many requests as slots
2608 	 * in the SQ, plus one for the the property request.
2609 	 */
2610 	if (spdk_unlikely(req == NULL)) {
2611 		return -1;
2612 	}
2613 
2614 	vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
2615 	vu_req->cb_fn = handle_cmd_rsp;
2616 	vu_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair);
2617 	req->cmd->nvme_cmd = *cmd;
2618 	if (nvmf_qpair_is_admin_queue(req->qpair)) {
2619 		err = map_admin_cmd_req(ctrlr, req);
2620 	} else {
2621 		err = map_io_cmd_req(ctrlr, req);
2622 	}
2623 
2624 	if (spdk_unlikely(err < 0)) {
2625 		SPDK_ERRLOG("%s: map NVMe command opc 0x%x failed\n",
2626 			    ctrlr_id(ctrlr), cmd->opc);
2627 		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2628 		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2629 		return handle_cmd_rsp(vu_req, vu_req->cb_arg);
2630 	}
2631 
2632 	vu_req->state = VFIO_USER_REQUEST_STATE_EXECUTING;
2633 	spdk_nvmf_request_exec(req);
2634 
2635 	return 0;
2636 }
2637 
2638 static void
2639 nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair)
2640 {
2641 	struct nvmf_vfio_user_ctrlr *ctrlr;
2642 	uint32_t new_tail;
2643 
2644 	assert(qpair != NULL);
2645 
2646 	ctrlr = qpair->ctrlr;
2647 
2648 	new_tail = *tdbl(ctrlr, &qpair->sq);
2649 	if (sq_head(qpair) != new_tail) {
2650 		int err = handle_sq_tdbl_write(ctrlr, new_tail, qpair);
2651 		if (err != 0) {
2652 			fail_ctrlr(ctrlr);
2653 			return;
2654 		}
2655 	}
2656 }
2657 
2658 static int
2659 nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
2660 {
2661 	struct nvmf_vfio_user_poll_group *vu_group;
2662 	struct nvmf_vfio_user_qpair *vu_qpair, *tmp;
2663 
2664 	assert(group != NULL);
2665 
2666 	spdk_rmb();
2667 
2668 	vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
2669 
2670 	TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) {
2671 		if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) {
2672 			continue;
2673 		}
2674 		nvmf_vfio_user_qpair_poll(vu_qpair);
2675 	}
2676 
2677 	return 0;
2678 }
2679 
2680 static int
2681 nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
2682 				    struct spdk_nvme_transport_id *trid)
2683 {
2684 	struct nvmf_vfio_user_qpair *vu_qpair;
2685 	struct nvmf_vfio_user_ctrlr *ctrlr;
2686 
2687 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2688 	ctrlr = vu_qpair->ctrlr;
2689 
2690 	memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid));
2691 	return 0;
2692 }
2693 
2694 static int
2695 nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
2696 				   struct spdk_nvme_transport_id *trid)
2697 {
2698 	return 0;
2699 }
2700 
2701 static int
2702 nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
2703 				     struct spdk_nvme_transport_id *trid)
2704 {
2705 	struct nvmf_vfio_user_qpair *vu_qpair;
2706 	struct nvmf_vfio_user_ctrlr *ctrlr;
2707 
2708 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2709 	ctrlr = vu_qpair->ctrlr;
2710 
2711 	memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid));
2712 	return 0;
2713 }
2714 
2715 static void
2716 nvmf_vfio_user_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
2717 				   struct spdk_nvmf_request *req)
2718 {
2719 	struct nvmf_vfio_user_qpair *vu_qpair;
2720 	struct nvmf_vfio_user_req *vu_req, *vu_req_to_abort = NULL;
2721 	uint16_t i, cid;
2722 
2723 	vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
2724 
2725 	cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
2726 	for (i = 0; i < vu_qpair->qsize; i++) {
2727 		vu_req = &vu_qpair->reqs_internal[i];
2728 		if (vu_req->state == VFIO_USER_REQUEST_STATE_EXECUTING && vu_req->cmd.cid == cid) {
2729 			vu_req_to_abort = vu_req;
2730 			break;
2731 		}
2732 	}
2733 
2734 	if (vu_req_to_abort == NULL) {
2735 		spdk_nvmf_request_complete(req);
2736 		return;
2737 	}
2738 
2739 	req->req_to_abort = &vu_req_to_abort->req;
2740 	nvmf_ctrlr_abort_request(req);
2741 }
2742 
2743 static void
2744 nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts)
2745 {
2746 	opts->max_queue_depth =		NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH;
2747 	opts->max_qpairs_per_ctrlr =	NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2748 	opts->in_capsule_data_size =	0;
2749 	opts->max_io_size =		NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE;
2750 	opts->io_unit_size =		NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE;
2751 	opts->max_aq_depth =		NVMF_VFIO_USER_DEFAULT_AQ_DEPTH;
2752 	opts->num_shared_buffers =	0;
2753 	opts->buf_cache_size =		0;
2754 	opts->association_timeout =	0;
2755 	opts->transport_specific =      NULL;
2756 }
2757 
2758 const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = {
2759 	.name = "VFIOUSER",
2760 	.type = SPDK_NVME_TRANSPORT_VFIOUSER,
2761 	.opts_init = nvmf_vfio_user_opts_init,
2762 	.create = nvmf_vfio_user_create,
2763 	.destroy = nvmf_vfio_user_destroy,
2764 
2765 	.listen = nvmf_vfio_user_listen,
2766 	.stop_listen = nvmf_vfio_user_stop_listen,
2767 	.accept = nvmf_vfio_user_accept,
2768 	.cdata_init = nvmf_vfio_user_cdata_init,
2769 	.listen_associate = nvmf_vfio_user_listen_associate,
2770 
2771 	.listener_discover = nvmf_vfio_user_discover,
2772 
2773 	.poll_group_create = nvmf_vfio_user_poll_group_create,
2774 	.poll_group_destroy = nvmf_vfio_user_poll_group_destroy,
2775 	.poll_group_add = nvmf_vfio_user_poll_group_add,
2776 	.poll_group_remove = nvmf_vfio_user_poll_group_remove,
2777 	.poll_group_poll = nvmf_vfio_user_poll_group_poll,
2778 
2779 	.req_free = nvmf_vfio_user_req_free,
2780 	.req_complete = nvmf_vfio_user_req_complete,
2781 
2782 	.qpair_fini = nvmf_vfio_user_close_qpair,
2783 	.qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid,
2784 	.qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid,
2785 	.qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid,
2786 	.qpair_abort_request = nvmf_vfio_user_qpair_abort_request,
2787 };
2788 
2789 SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user);
2790 SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio)
2791