xref: /spdk/lib/nvme/nvme_cuse.c (revision a1246d4df98c50190ddaa9be43d18e00f2a83c12)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2019 Intel Corporation.
3  *   All rights reserved.
4  */
5 #include "spdk/stdinc.h"
6 #include "spdk/config.h"
7 #include "spdk/log.h"
8 #include "spdk/nvme.h"
9 
10 #ifdef SPDK_CONFIG_NVME_CUSE
11 #define FUSE_USE_VERSION 31
12 
13 #include <fuse3/cuse_lowlevel.h>
14 
15 #include <linux/nvme_ioctl.h>
16 #include <linux/fs.h>
17 
18 #include "nvme_internal.h"
19 #include "nvme_io_msg.h"
20 #include "nvme_cuse.h"
21 
22 struct cuse_device {
23 	char				dev_name[128];
24 	uint32_t			index;
25 	int				claim_fd;
26 	char				lock_name[64];
27 
28 	struct spdk_nvme_ctrlr		*ctrlr;		/**< NVMe controller */
29 	uint32_t			nsid;		/**< NVMe name space id, or 0 */
30 
31 	pthread_t			tid;
32 	struct fuse_session		*session;
33 
34 	struct cuse_device		*ctrlr_device;
35 	TAILQ_HEAD(, cuse_device)	ns_devices;
36 
37 	TAILQ_ENTRY(cuse_device)	tailq;
38 };
39 
40 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
41 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
42 static struct spdk_bit_array *g_ctrlr_started;
43 
44 struct cuse_io_ctx {
45 	struct spdk_nvme_cmd		nvme_cmd;
46 	enum spdk_nvme_data_transfer	data_transfer;
47 
48 	uint64_t			lba;
49 	uint32_t			lba_count;
50 	uint16_t			apptag;
51 	uint16_t			appmask;
52 
53 	void				*data;
54 	void				*metadata;
55 
56 	int				data_len;
57 	int				metadata_len;
58 
59 	fuse_req_t			req;
60 };
61 
62 static void
63 cuse_io_ctx_free(struct cuse_io_ctx *ctx)
64 {
65 	spdk_free(ctx->data);
66 	spdk_free(ctx->metadata);
67 	free(ctx);
68 }
69 
70 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val)		\
71 	if (out_bufsz == 0) {						\
72 		struct iovec out_iov;					\
73 		out_iov.iov_base = (void *)arg;				\
74 		out_iov.iov_len = sizeof(val);				\
75 		fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1);	\
76 		return;							\
77 	}
78 
79 #define FUSE_MAX_SIZE 128*1024
80 
81 static bool
82 fuse_check_req_size(fuse_req_t req, struct iovec iov[], int iovcnt)
83 {
84 	int total_iov_len = 0;
85 	for (int i = 0; i < iovcnt; i++) {
86 		total_iov_len += iov[i].iov_len;
87 		if (total_iov_len > FUSE_MAX_SIZE) {
88 			fuse_reply_err(req, ENOMEM);
89 			SPDK_ERRLOG("FUSE request cannot be larger that %d\n", FUSE_MAX_SIZE);
90 			return false;
91 		}
92 	}
93 	return true;
94 }
95 
96 static void
97 cuse_nvme_passthru_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
98 {
99 	struct cuse_io_ctx *ctx = arg;
100 	struct iovec out_iov[3];
101 	struct spdk_nvme_cpl _cpl;
102 	int out_iovcnt = 0;
103 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
104 
105 	memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
106 	out_iov[out_iovcnt].iov_base = &_cpl.cdw0;
107 	out_iov[out_iovcnt].iov_len = sizeof(_cpl.cdw0);
108 	out_iovcnt += 1;
109 
110 	if (ctx->data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
111 		if (ctx->data_len > 0) {
112 			out_iov[out_iovcnt].iov_base = ctx->data;
113 			out_iov[out_iovcnt].iov_len = ctx->data_len;
114 			out_iovcnt += 1;
115 		}
116 		if (ctx->metadata_len > 0) {
117 			out_iov[out_iovcnt].iov_base = ctx->metadata;
118 			out_iov[out_iovcnt].iov_len = ctx->metadata_len;
119 			out_iovcnt += 1;
120 		}
121 	}
122 
123 	fuse_reply_ioctl_iov(ctx->req, status_field, out_iov, out_iovcnt);
124 	cuse_io_ctx_free(ctx);
125 }
126 
127 static void
128 cuse_nvme_passthru_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
129 {
130 	int rc;
131 	struct cuse_io_ctx *ctx = arg;
132 
133 	if (nsid != 0) {
134 		rc = spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, ctrlr->external_io_msgs_qpair, &ctx->nvme_cmd,
135 							ctx->data,
136 							ctx->data_len, ctx->metadata, cuse_nvme_passthru_cmd_cb, (void *)ctx);
137 	} else {
138 		rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
139 						   cuse_nvme_passthru_cmd_cb, (void *)ctx);
140 	}
141 	if (rc < 0) {
142 		fuse_reply_err(ctx->req, EINVAL);
143 		cuse_io_ctx_free(ctx);
144 	}
145 }
146 
147 static void
148 cuse_nvme_passthru_cmd_send(fuse_req_t req, struct nvme_passthru_cmd *passthru_cmd,
149 			    const void *data, const void *metadata, int cmd)
150 {
151 	struct cuse_io_ctx *ctx;
152 	struct cuse_device *cuse_device = fuse_req_userdata(req);
153 	int rv;
154 
155 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
156 	if (!ctx) {
157 		SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
158 		fuse_reply_err(req, ENOMEM);
159 		return;
160 	}
161 
162 	ctx->req = req;
163 	ctx->data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
164 
165 	memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
166 	ctx->nvme_cmd.opc = passthru_cmd->opcode;
167 	ctx->nvme_cmd.nsid = passthru_cmd->nsid;
168 	ctx->nvme_cmd.cdw10 = passthru_cmd->cdw10;
169 	ctx->nvme_cmd.cdw11 = passthru_cmd->cdw11;
170 	ctx->nvme_cmd.cdw12 = passthru_cmd->cdw12;
171 	ctx->nvme_cmd.cdw13 = passthru_cmd->cdw13;
172 	ctx->nvme_cmd.cdw14 = passthru_cmd->cdw14;
173 	ctx->nvme_cmd.cdw15 = passthru_cmd->cdw15;
174 
175 	ctx->data_len = passthru_cmd->data_len;
176 	ctx->metadata_len = passthru_cmd->metadata_len;
177 
178 	if (ctx->data_len > 0) {
179 		ctx->data = spdk_malloc(ctx->data_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
180 		if (!ctx->data) {
181 			SPDK_ERRLOG("Cannot allocate memory for data\n");
182 			fuse_reply_err(req, ENOMEM);
183 			free(ctx);
184 			return;
185 		}
186 		if (data != NULL) {
187 			memcpy(ctx->data, data, ctx->data_len);
188 		}
189 	}
190 
191 	if (ctx->metadata_len > 0) {
192 		ctx->metadata = spdk_malloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
193 		if (!ctx->metadata) {
194 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
195 			fuse_reply_err(req, ENOMEM);
196 			cuse_io_ctx_free(ctx);
197 			return;
198 		}
199 		if (metadata != NULL) {
200 			memcpy(ctx->metadata, metadata, ctx->metadata_len);
201 		}
202 	}
203 
204 	if ((unsigned int)cmd != NVME_IOCTL_ADMIN_CMD) {
205 		/* Send NS for IO IOCTLs */
206 		rv = nvme_io_msg_send(cuse_device->ctrlr, passthru_cmd->nsid, cuse_nvme_passthru_cmd_execute, ctx);
207 	} else {
208 		/* NS == 0 for Admin IOCTLs */
209 		rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_passthru_cmd_execute, ctx);
210 	}
211 	if (rv) {
212 		SPDK_ERRLOG("Cannot send io msg to the controller\n");
213 		fuse_reply_err(req, -rv);
214 		cuse_io_ctx_free(ctx);
215 		return;
216 	}
217 }
218 
219 static void
220 cuse_nvme_passthru_cmd(fuse_req_t req, int cmd, void *arg,
221 		       struct fuse_file_info *fi, unsigned flags,
222 		       const void *in_buf, size_t in_bufsz, size_t out_bufsz)
223 {
224 	struct nvme_passthru_cmd *passthru_cmd;
225 	struct iovec in_iov[3], out_iov[3];
226 	int in_iovcnt = 0, out_iovcnt = 0;
227 	const void *dptr = NULL, *mdptr = NULL;
228 	enum spdk_nvme_data_transfer data_transfer;
229 
230 	in_iov[in_iovcnt].iov_base = (void *)arg;
231 	in_iov[in_iovcnt].iov_len = sizeof(*passthru_cmd);
232 	in_iovcnt += 1;
233 	if (in_bufsz == 0) {
234 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
235 		return;
236 	}
237 
238 	passthru_cmd = (struct nvme_passthru_cmd *)in_buf;
239 	data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
240 
241 	if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
242 		/* Make data pointer accessible (RO) */
243 		if (passthru_cmd->addr != 0) {
244 			in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->addr;
245 			in_iov[in_iovcnt].iov_len = passthru_cmd->data_len;
246 			in_iovcnt += 1;
247 		}
248 		/* Make metadata pointer accessible (RO) */
249 		if (passthru_cmd->metadata != 0) {
250 			in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->metadata;
251 			in_iov[in_iovcnt].iov_len = passthru_cmd->metadata_len;
252 			in_iovcnt += 1;
253 		}
254 	}
255 
256 	if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
257 		return;
258 	}
259 	/* Always make result field writeable regardless of data transfer bits */
260 	out_iov[out_iovcnt].iov_base = &((struct nvme_passthru_cmd *)arg)->result;
261 	out_iov[out_iovcnt].iov_len = sizeof(uint32_t);
262 	out_iovcnt += 1;
263 
264 	if (data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
265 		/* Make data pointer accessible (WO) */
266 		if (passthru_cmd->data_len > 0) {
267 			out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->addr;
268 			out_iov[out_iovcnt].iov_len = passthru_cmd->data_len;
269 			out_iovcnt += 1;
270 		}
271 		/* Make metadata pointer accessible (WO) */
272 		if (passthru_cmd->metadata_len > 0) {
273 			out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->metadata;
274 			out_iov[out_iovcnt].iov_len = passthru_cmd->metadata_len;
275 			out_iovcnt += 1;
276 		}
277 	}
278 
279 	if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
280 		return;
281 	}
282 
283 	if (out_bufsz == 0) {
284 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
285 		return;
286 	}
287 
288 	if (data_transfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
289 		fuse_reply_err(req, EINVAL);
290 		return;
291 	}
292 
293 	if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
294 		dptr = (passthru_cmd->addr == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd);
295 		mdptr = (passthru_cmd->metadata == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd) +
296 			passthru_cmd->data_len;
297 	}
298 
299 	cuse_nvme_passthru_cmd_send(req, passthru_cmd, dptr, mdptr, cmd);
300 }
301 
302 static void
303 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
304 {
305 	int rc;
306 	fuse_req_t req = arg;
307 
308 	rc = spdk_nvme_ctrlr_reset(ctrlr);
309 	if (rc) {
310 		fuse_reply_err(req, rc);
311 		return;
312 	}
313 
314 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
315 }
316 
317 static void
318 cuse_nvme_subsys_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
319 {
320 	int rc;
321 	fuse_req_t req = arg;
322 
323 	rc = spdk_nvme_ctrlr_reset_subsystem(ctrlr);
324 	if (rc) {
325 		fuse_reply_err(req, rc);
326 		return;
327 	}
328 
329 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
330 }
331 
332 static void
333 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
334 		struct fuse_file_info *fi, unsigned flags,
335 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
336 {
337 	int rv;
338 	struct cuse_device *cuse_device = fuse_req_userdata(req);
339 
340 	if (cuse_device->nsid) {
341 		SPDK_ERRLOG("Namespace reset not supported\n");
342 		fuse_reply_err(req, EINVAL);
343 		return;
344 	}
345 
346 	if (cmd == NVME_IOCTL_SUBSYS_RESET) {
347 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBSYS_RESET\n");
348 		rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_subsys_reset_execute,
349 				      (void *)req);
350 	} else {
351 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESET\n");
352 		rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
353 	}
354 	if (rv) {
355 		SPDK_ERRLOG("Cannot send reset\n");
356 		fuse_reply_err(req, EINVAL);
357 	}
358 }
359 
360 static void
361 cuse_nvme_rescan_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
362 {
363 	fuse_req_t req = arg;
364 
365 	nvme_ctrlr_update_namespaces(ctrlr);
366 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
367 }
368 
369 static void
370 cuse_nvme_rescan(fuse_req_t req, int cmd, void *arg,
371 		 struct fuse_file_info *fi, unsigned flags,
372 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
373 {
374 	int rv;
375 	struct cuse_device *cuse_device = fuse_req_userdata(req);
376 
377 	if (cuse_device->nsid) {
378 		SPDK_ERRLOG("Namespace rescan not supported\n");
379 		fuse_reply_err(req, EINVAL);
380 		return;
381 	}
382 
383 	rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_rescan_execute, (void *)req);
384 	if (rv) {
385 		SPDK_ERRLOG("Cannot send rescan\n");
386 		fuse_reply_err(req, EINVAL);
387 	}
388 }
389 
390 /*****************************************************************************
391  * Namespace IO requests
392  */
393 
394 static void
395 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
396 {
397 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
398 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
399 
400 	fuse_reply_ioctl_iov(ctx->req, status_field, NULL, 0);
401 
402 	cuse_io_ctx_free(ctx);
403 }
404 
405 static void
406 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
407 {
408 	int rc;
409 	struct cuse_io_ctx *ctx = arg;
410 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
411 
412 	rc = spdk_nvme_ns_cmd_write_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
413 					    ctx->lba, /* LBA start */
414 					    ctx->lba_count, /* number of LBAs */
415 					    cuse_nvme_submit_io_write_done, ctx, 0,
416 					    ctx->appmask, ctx->apptag);
417 
418 	if (rc != 0) {
419 		SPDK_ERRLOG("write failed: rc = %d\n", rc);
420 		fuse_reply_err(ctx->req, rc);
421 		cuse_io_ctx_free(ctx);
422 		return;
423 	}
424 }
425 
426 static void
427 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
428 			  struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
429 			  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
430 {
431 	const struct nvme_user_io *user_io = in_buf;
432 	struct cuse_io_ctx *ctx;
433 	int rc;
434 
435 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
436 	if (!ctx) {
437 		SPDK_ERRLOG("Cannot allocate memory for context\n");
438 		fuse_reply_err(req, ENOMEM);
439 		return;
440 	}
441 
442 	ctx->req = req;
443 	ctx->lba = user_io->slba;
444 	ctx->lba_count = user_io->nblocks + 1;
445 	ctx->data_len = ctx->lba_count * block_size;
446 
447 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
448 				 SPDK_MALLOC_DMA);
449 	if (ctx->data == NULL) {
450 		SPDK_ERRLOG("Write buffer allocation failed\n");
451 		fuse_reply_err(ctx->req, ENOMEM);
452 		free(ctx);
453 		return;
454 	}
455 
456 	memcpy(ctx->data, (uint8_t *)in_buf + sizeof(*user_io), ctx->data_len);
457 
458 	if (user_io->metadata) {
459 		ctx->apptag = user_io->apptag;
460 		ctx->appmask = user_io->appmask;
461 		ctx->metadata_len = md_size * ctx->lba_count;
462 		ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
463 
464 		if (ctx->metadata == NULL) {
465 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
466 			if (ctx->metadata_len == 0) {
467 				SPDK_ERRLOG("Device format does not support metadata\n");
468 			}
469 			fuse_reply_err(req, ENOMEM);
470 			cuse_io_ctx_free(ctx);
471 			return;
472 		}
473 
474 		memcpy(ctx->metadata, (uint8_t *)in_buf + sizeof(*user_io) + ctx->data_len,
475 		       ctx->metadata_len);
476 	}
477 
478 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
479 			      ctx);
480 	if (rc < 0) {
481 		SPDK_ERRLOG("Cannot send write io\n");
482 		fuse_reply_err(ctx->req, rc);
483 		cuse_io_ctx_free(ctx);
484 	}
485 }
486 
487 static void
488 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
489 {
490 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
491 	struct iovec iov[2];
492 	int iovcnt = 0;
493 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
494 
495 	iov[iovcnt].iov_base = ctx->data;
496 	iov[iovcnt].iov_len = ctx->data_len;
497 	iovcnt += 1;
498 
499 	if (ctx->metadata) {
500 		iov[iovcnt].iov_base = ctx->metadata;
501 		iov[iovcnt].iov_len = ctx->metadata_len;
502 		iovcnt += 1;
503 	}
504 
505 	fuse_reply_ioctl_iov(ctx->req, status_field, iov, iovcnt);
506 
507 	cuse_io_ctx_free(ctx);
508 }
509 
510 static void
511 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
512 {
513 	int rc;
514 	struct cuse_io_ctx *ctx = arg;
515 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
516 
517 	rc = spdk_nvme_ns_cmd_read_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
518 					   ctx->lba, /* LBA start */
519 					   ctx->lba_count, /* number of LBAs */
520 					   cuse_nvme_submit_io_read_done, ctx, 0,
521 					   ctx->appmask, ctx->apptag);
522 
523 	if (rc != 0) {
524 		SPDK_ERRLOG("read failed: rc = %d\n", rc);
525 		fuse_reply_err(ctx->req, rc);
526 		cuse_io_ctx_free(ctx);
527 		return;
528 	}
529 }
530 
531 static void
532 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
533 			 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
534 			 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
535 {
536 	int rc;
537 	struct cuse_io_ctx *ctx;
538 	const struct nvme_user_io *user_io = in_buf;
539 
540 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
541 	if (!ctx) {
542 		SPDK_ERRLOG("Cannot allocate memory for context\n");
543 		fuse_reply_err(req, ENOMEM);
544 		return;
545 	}
546 
547 	ctx->req = req;
548 	ctx->lba = user_io->slba;
549 	ctx->lba_count = user_io->nblocks + 1;
550 
551 	ctx->data_len = ctx->lba_count * block_size;
552 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
553 				 SPDK_MALLOC_DMA);
554 	if (ctx->data == NULL) {
555 		SPDK_ERRLOG("Read buffer allocation failed\n");
556 		fuse_reply_err(ctx->req, ENOMEM);
557 		free(ctx);
558 		return;
559 	}
560 
561 	if (user_io->metadata) {
562 		ctx->apptag = user_io->apptag;
563 		ctx->appmask = user_io->appmask;
564 		ctx->metadata_len = md_size * ctx->lba_count;
565 		ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
566 
567 		if (ctx->metadata == NULL) {
568 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
569 			if (ctx->metadata_len == 0) {
570 				SPDK_ERRLOG("Device format does not support metadata\n");
571 			}
572 			fuse_reply_err(req, ENOMEM);
573 			cuse_io_ctx_free(ctx);
574 			return;
575 		}
576 	}
577 
578 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
579 	if (rc < 0) {
580 		SPDK_ERRLOG("Cannot send read io\n");
581 		fuse_reply_err(ctx->req, rc);
582 		cuse_io_ctx_free(ctx);
583 	}
584 }
585 
586 
587 static void
588 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
589 		    struct fuse_file_info *fi, unsigned flags,
590 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
591 {
592 	const struct nvme_user_io *user_io;
593 	struct iovec in_iov[3], out_iov[2];
594 	int in_iovcnt = 0, out_iovcnt = 0;
595 	struct cuse_device *cuse_device = fuse_req_userdata(req);
596 	struct spdk_nvme_ns *ns;
597 	uint32_t block_size;
598 	uint32_t md_size;
599 
600 	in_iov[in_iovcnt].iov_base = (void *)arg;
601 	in_iov[in_iovcnt].iov_len = sizeof(*user_io);
602 	in_iovcnt += 1;
603 	if (in_bufsz == 0) {
604 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, 0);
605 		return;
606 	}
607 
608 	user_io = in_buf;
609 
610 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
611 	block_size = spdk_nvme_ns_get_sector_size(ns);
612 	md_size = spdk_nvme_ns_get_md_size(ns);
613 
614 	switch (user_io->opcode) {
615 	case SPDK_NVME_OPC_READ:
616 		out_iov[out_iovcnt].iov_base = (void *)user_io->addr;
617 		out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
618 		out_iovcnt += 1;
619 		if (user_io->metadata != 0) {
620 			out_iov[out_iovcnt].iov_base = (void *)user_io->metadata;
621 			out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
622 			out_iovcnt += 1;
623 		}
624 		if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
625 			return;
626 		}
627 		if (out_bufsz == 0) {
628 			fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
629 			return;
630 		}
631 
632 		cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags,
633 					 block_size, md_size, in_buf, in_bufsz, out_bufsz);
634 		break;
635 	case SPDK_NVME_OPC_WRITE:
636 		in_iov[in_iovcnt].iov_base = (void *)user_io->addr;
637 		in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
638 		in_iovcnt += 1;
639 		if (user_io->metadata != 0) {
640 			in_iov[in_iovcnt].iov_base = (void *)user_io->metadata;
641 			in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
642 			in_iovcnt += 1;
643 		}
644 		if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
645 			return;
646 		}
647 		if (in_bufsz == sizeof(*user_io)) {
648 			fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
649 			return;
650 		}
651 
652 		cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags,
653 					  block_size, md_size, in_buf, in_bufsz, out_bufsz);
654 		break;
655 	default:
656 		SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
657 		fuse_reply_err(req, EINVAL);
658 		return;
659 	}
660 
661 }
662 
663 /*****************************************************************************
664  * Other namespace IOCTLs
665  */
666 static void
667 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
668 		  struct fuse_file_info *fi, unsigned flags,
669 		  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
670 {
671 	uint64_t size;
672 	struct spdk_nvme_ns *ns;
673 	struct cuse_device *cuse_device = fuse_req_userdata(req);
674 
675 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
676 
677 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
678 	size = spdk_nvme_ns_get_num_sectors(ns);
679 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
680 }
681 
682 static void
683 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
684 		struct fuse_file_info *fi, unsigned flags,
685 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
686 {
687 	int pbsz;
688 	struct spdk_nvme_ns *ns;
689 	struct cuse_device *cuse_device = fuse_req_userdata(req);
690 
691 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
692 
693 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
694 	pbsz = spdk_nvme_ns_get_sector_size(ns);
695 	fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
696 }
697 
698 static void
699 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
700 		struct fuse_file_info *fi, unsigned flags,
701 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
702 {
703 	long size;
704 	struct spdk_nvme_ns *ns;
705 	struct cuse_device *cuse_device = fuse_req_userdata(req);
706 
707 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
708 
709 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
710 
711 	/* return size in 512 bytes blocks */
712 	size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
713 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
714 }
715 
716 static void
717 cuse_blkgetsectorsize(fuse_req_t req, int cmd, void *arg,
718 		      struct fuse_file_info *fi, unsigned flags,
719 		      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
720 {
721 	int ssize;
722 	struct spdk_nvme_ns *ns;
723 	struct cuse_device *cuse_device = fuse_req_userdata(req);
724 
725 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, ssize);
726 
727 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
728 	ssize = spdk_nvme_ns_get_sector_size(ns);
729 	fuse_reply_ioctl(req, 0, &ssize, sizeof(ssize));
730 }
731 
732 static void
733 cuse_getid(fuse_req_t req, int cmd, void *arg,
734 	   struct fuse_file_info *fi, unsigned flags,
735 	   const void *in_buf, size_t in_bufsz, size_t out_bufsz)
736 {
737 	struct cuse_device *cuse_device = fuse_req_userdata(req);
738 
739 	fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
740 }
741 
742 struct cuse_transport {
743 	char trstring[SPDK_NVMF_TRSTRING_MAX_LEN + 1];
744 	char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
745 };
746 
747 #define SPDK_CUSE_GET_TRANSPORT _IOWR('n', 0x1, struct cuse_transport)
748 
749 static void
750 cuse_get_transport(fuse_req_t req, int cmd, void *arg,
751 		   struct fuse_file_info *fi, unsigned flags,
752 		   const void *in_buf, size_t in_bufsz, size_t out_bufsz)
753 {
754 	struct cuse_device *cuse_device = fuse_req_userdata(req);
755 	struct cuse_transport tr = {};
756 
757 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, tr);
758 
759 	memcpy(tr.trstring, cuse_device->ctrlr->trid.trstring, SPDK_NVMF_TRSTRING_MAX_LEN + 1);
760 	memcpy(tr.traddr, cuse_device->ctrlr->trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1);
761 
762 	fuse_reply_ioctl(req, 0, &tr, sizeof(tr));
763 }
764 
765 static void
766 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
767 		 struct fuse_file_info *fi, unsigned flags,
768 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
769 {
770 	if (flags & FUSE_IOCTL_COMPAT) {
771 		fuse_reply_err(req, ENOSYS);
772 		return;
773 	}
774 
775 	switch ((unsigned int)cmd) {
776 	case NVME_IOCTL_ADMIN_CMD:
777 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
778 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
779 		break;
780 
781 	case NVME_IOCTL_RESET:
782 	case NVME_IOCTL_SUBSYS_RESET:
783 		cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
784 		break;
785 
786 	case NVME_IOCTL_RESCAN:
787 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESCAN\n");
788 		cuse_nvme_rescan(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
789 		break;
790 
791 	case SPDK_CUSE_GET_TRANSPORT:
792 		SPDK_DEBUGLOG(nvme_cuse, "SPDK_CUSE_GET_TRANSPORT\n");
793 		cuse_get_transport(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
794 		break;
795 
796 	default:
797 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
798 		fuse_reply_err(req, ENOTTY);
799 	}
800 }
801 
802 static void
803 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
804 	      struct fuse_file_info *fi, unsigned flags,
805 	      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
806 {
807 	if (flags & FUSE_IOCTL_COMPAT) {
808 		fuse_reply_err(req, ENOSYS);
809 		return;
810 	}
811 
812 	switch ((unsigned int)cmd) {
813 	case NVME_IOCTL_ADMIN_CMD:
814 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
815 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
816 		break;
817 
818 	case NVME_IOCTL_SUBMIT_IO:
819 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBMIT_IO\n");
820 		cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
821 		break;
822 
823 	case NVME_IOCTL_IO_CMD:
824 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_IO_CMD\n");
825 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
826 		break;
827 
828 	case NVME_IOCTL_ID:
829 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ID\n");
830 		cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
831 		break;
832 
833 	case BLKPBSZGET:
834 		SPDK_DEBUGLOG(nvme_cuse, "BLKPBSZGET\n");
835 		cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
836 		break;
837 
838 	case BLKSSZGET:
839 		SPDK_DEBUGLOG(nvme_cuse, "BLKSSZGET\n");
840 		cuse_blkgetsectorsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
841 		break;
842 
843 	case BLKGETSIZE:
844 		SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE\n");
845 		/* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
846 		cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
847 		break;
848 
849 	case BLKGETSIZE64:
850 		SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE64\n");
851 		/* Returns the device size in sectors (returns pointer to uint64_t) */
852 		cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
853 		break;
854 
855 	default:
856 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
857 		fuse_reply_err(req, ENOTTY);
858 	}
859 }
860 
861 /*****************************************************************************
862  * CUSE threads initialization.
863  */
864 
865 static void
866 cuse_open(fuse_req_t req, struct fuse_file_info *fi)
867 {
868 	fuse_reply_open(req, fi);
869 }
870 
871 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
872 	.open		= cuse_open,
873 	.ioctl		= cuse_ctrlr_ioctl,
874 };
875 
876 static const struct cuse_lowlevel_ops cuse_ns_clop = {
877 	.open		= cuse_open,
878 	.ioctl		= cuse_ns_ioctl,
879 };
880 
881 static int
882 cuse_session_create(struct cuse_device *cuse_device)
883 {
884 	char *cuse_argv[] = { "cuse", "-f" };
885 	int multithreaded;
886 	int cuse_argc = SPDK_COUNTOF(cuse_argv);
887 	struct cuse_info ci;
888 	char devname_arg[128 + 8];
889 	const char *dev_info_argv[] = { devname_arg };
890 
891 	snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
892 
893 	memset(&ci, 0, sizeof(ci));
894 	ci.dev_info_argc = 1;
895 	ci.dev_info_argv = dev_info_argv;
896 	ci.flags = CUSE_UNRESTRICTED_IOCTL;
897 
898 	if (cuse_device->nsid) {
899 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
900 				       &multithreaded, cuse_device);
901 	} else {
902 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
903 				       &multithreaded, cuse_device);
904 	}
905 
906 	if (!cuse_device->session) {
907 		SPDK_ERRLOG("Cannot create cuse session\n");
908 		return -1;
909 	}
910 	SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
911 	return 0;
912 }
913 
914 static void *
915 cuse_thread(void *arg)
916 {
917 	struct cuse_device *cuse_device = arg;
918 	int rc;
919 	struct fuse_buf buf = { .mem = NULL };
920 	struct pollfd fds;
921 	int timeout_msecs = 500;
922 
923 	spdk_unaffinitize_thread();
924 
925 	/* Receive and process fuse requests */
926 	fds.fd = fuse_session_fd(cuse_device->session);
927 	fds.events = POLLIN;
928 	while (!fuse_session_exited(cuse_device->session)) {
929 		rc = poll(&fds, 1, timeout_msecs);
930 		if (rc <= 0) {
931 			continue;
932 		}
933 		rc = fuse_session_receive_buf(cuse_device->session, &buf);
934 		if (rc > 0) {
935 			fuse_session_process_buf(cuse_device->session, &buf);
936 		}
937 	}
938 	free(buf.mem);
939 	fuse_session_reset(cuse_device->session);
940 	pthread_exit(NULL);
941 }
942 
943 static struct cuse_device *nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr,
944 		uint32_t nsid);
945 
946 /*****************************************************************************
947  * CUSE devices management
948  */
949 
950 static int
951 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
952 {
953 	struct cuse_device *ns_device;
954 	int rv;
955 
956 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr_device->ctrlr, nsid);
957 	if (ns_device != NULL) {
958 		return 0;
959 	}
960 
961 	ns_device = calloc(1, sizeof(struct cuse_device));
962 	if (ns_device == NULL) {
963 		return -ENOMEM;
964 	}
965 
966 	ns_device->ctrlr = ctrlr_device->ctrlr;
967 	ns_device->ctrlr_device = ctrlr_device;
968 	ns_device->nsid = nsid;
969 	rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
970 		      ctrlr_device->dev_name, ns_device->nsid);
971 	if (rv < 0) {
972 		SPDK_ERRLOG("Device name too long.\n");
973 		free(ns_device);
974 		return -ENAMETOOLONG;
975 	}
976 	rv = cuse_session_create(ns_device);
977 	if (rv != 0) {
978 		free(ns_device);
979 		return rv;
980 	}
981 	rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
982 	if (rv != 0) {
983 		SPDK_ERRLOG("pthread_create failed\n");
984 		free(ns_device);
985 		return -rv;
986 	}
987 	TAILQ_INSERT_TAIL(&ctrlr_device->ns_devices, ns_device, tailq);
988 
989 	return 0;
990 }
991 
992 static void
993 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, struct cuse_device *ns_device)
994 {
995 	if (ns_device->session != NULL) {
996 		fuse_session_exit(ns_device->session);
997 	}
998 	pthread_join(ns_device->tid, NULL);
999 	TAILQ_REMOVE(&ctrlr_device->ns_devices, ns_device, tailq);
1000 	if (ns_device->session != NULL) {
1001 		cuse_lowlevel_teardown(ns_device->session);
1002 	}
1003 	free(ns_device);
1004 }
1005 
1006 static int
1007 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
1008 {
1009 	int dev_fd;
1010 	int pid;
1011 	void *dev_map;
1012 	struct flock cusedev_lock = {
1013 		.l_type = F_WRLCK,
1014 		.l_whence = SEEK_SET,
1015 		.l_start = 0,
1016 		.l_len = 0,
1017 	};
1018 
1019 	snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
1020 		 "/var/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
1021 
1022 	dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
1023 	if (dev_fd == -1) {
1024 		SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
1025 		return -errno;
1026 	}
1027 
1028 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
1029 		SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
1030 		close(dev_fd);
1031 		return -errno;
1032 	}
1033 
1034 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
1035 		       MAP_SHARED, dev_fd, 0);
1036 	if (dev_map == MAP_FAILED) {
1037 		SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
1038 		close(dev_fd);
1039 		return -errno;
1040 	}
1041 
1042 	if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
1043 		pid = *(int *)dev_map;
1044 		SPDK_ERRLOG("Cannot create lock on device %s, probably"
1045 			    " process %d has claimed it\n", ctrlr_device->lock_name, pid);
1046 		munmap(dev_map, sizeof(int));
1047 		close(dev_fd);
1048 		/* F_SETLK returns unspecified errnos, normalize them */
1049 		return -EACCES;
1050 	}
1051 
1052 	*(int *)dev_map = (int)getpid();
1053 	munmap(dev_map, sizeof(int));
1054 	ctrlr_device->claim_fd = dev_fd;
1055 	ctrlr_device->index = index;
1056 	/* Keep dev_fd open to maintain the lock. */
1057 	return 0;
1058 }
1059 
1060 static void
1061 nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
1062 {
1063 	close(ctrlr_device->claim_fd);
1064 	ctrlr_device->claim_fd = -1;
1065 	unlink(ctrlr_device->lock_name);
1066 }
1067 
1068 static void
1069 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
1070 {
1071 	struct cuse_device *ns_device, *tmp;
1072 
1073 	TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1074 		cuse_nvme_ns_stop(ctrlr_device, ns_device);
1075 	}
1076 
1077 	assert(TAILQ_EMPTY(&ctrlr_device->ns_devices));
1078 
1079 	fuse_session_exit(ctrlr_device->session);
1080 	pthread_join(ctrlr_device->tid, NULL);
1081 	TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1082 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1083 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1084 		spdk_bit_array_free(&g_ctrlr_started);
1085 	}
1086 	nvme_cuse_unclaim(ctrlr_device);
1087 	if (ctrlr_device->session != NULL) {
1088 		cuse_lowlevel_teardown(ctrlr_device->session);
1089 	}
1090 	free(ctrlr_device);
1091 }
1092 
1093 static int
1094 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
1095 {
1096 	struct cuse_device *ns_device, *tmp;
1097 	uint32_t nsid;
1098 
1099 	/* Remove namespaces that have disappeared */
1100 	TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1101 		if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, ns_device->nsid)) {
1102 			cuse_nvme_ns_stop(ctrlr_device, ns_device);
1103 		}
1104 	}
1105 
1106 	/* Add new namespaces */
1107 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr_device->ctrlr);
1108 	while (nsid != 0) {
1109 		if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
1110 			SPDK_ERRLOG("Cannot start CUSE namespace device.");
1111 			return -1;
1112 		}
1113 
1114 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr_device->ctrlr, nsid);
1115 	}
1116 
1117 	return 0;
1118 }
1119 
1120 static int
1121 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
1122 {
1123 	int rv = 0;
1124 	struct cuse_device *ctrlr_device;
1125 
1126 	SPDK_NOTICELOG("Creating cuse device for controller\n");
1127 
1128 	if (g_ctrlr_started == NULL) {
1129 		g_ctrlr_started = spdk_bit_array_create(128);
1130 		if (g_ctrlr_started == NULL) {
1131 			SPDK_ERRLOG("Cannot create bit array\n");
1132 			return -ENOMEM;
1133 		}
1134 	}
1135 
1136 	ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
1137 	if (!ctrlr_device) {
1138 		SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
1139 		rv = -ENOMEM;
1140 		goto free_device;
1141 	}
1142 
1143 	ctrlr_device->ctrlr = ctrlr;
1144 
1145 	/* Check if device already exists, if not increment index until success */
1146 	ctrlr_device->index = 0;
1147 	while (1) {
1148 		ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
1149 		if (ctrlr_device->index == UINT32_MAX) {
1150 			SPDK_ERRLOG("Too many registered controllers\n");
1151 			goto free_device;
1152 		}
1153 
1154 		if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
1155 			break;
1156 		}
1157 		ctrlr_device->index++;
1158 	}
1159 	spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
1160 	snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
1161 		 ctrlr_device->index);
1162 
1163 	rv = cuse_session_create(ctrlr_device);
1164 	if (rv != 0) {
1165 		goto clear_and_free;
1166 	}
1167 
1168 	rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
1169 	if (rv != 0) {
1170 		SPDK_ERRLOG("pthread_create failed\n");
1171 		rv = -rv;
1172 		goto clear_and_free;
1173 	}
1174 
1175 	TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1176 
1177 	TAILQ_INIT(&ctrlr_device->ns_devices);
1178 
1179 	/* Start all active namespaces */
1180 	if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
1181 		SPDK_ERRLOG("Cannot start CUSE namespace devices.");
1182 		cuse_nvme_ctrlr_stop(ctrlr_device);
1183 		rv = -1;
1184 		goto clear_and_free;
1185 	}
1186 
1187 	return 0;
1188 
1189 clear_and_free:
1190 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1191 free_device:
1192 	free(ctrlr_device);
1193 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1194 		spdk_bit_array_free(&g_ctrlr_started);
1195 	}
1196 	return rv;
1197 }
1198 
1199 static struct cuse_device *
1200 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
1201 {
1202 	struct cuse_device *ctrlr_device = NULL;
1203 
1204 	TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
1205 		if (ctrlr_device->ctrlr == ctrlr) {
1206 			break;
1207 		}
1208 	}
1209 
1210 	return ctrlr_device;
1211 }
1212 
1213 static struct cuse_device *
1214 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1215 {
1216 	struct cuse_device *ctrlr_device = NULL;
1217 	struct cuse_device *ns_device;
1218 
1219 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1220 	if (!ctrlr_device) {
1221 		return NULL;
1222 	}
1223 
1224 	TAILQ_FOREACH(ns_device, &ctrlr_device->ns_devices, tailq) {
1225 		if (ns_device->nsid == nsid) {
1226 			return ns_device;
1227 		}
1228 	}
1229 
1230 	return NULL;
1231 }
1232 
1233 static void
1234 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
1235 {
1236 	struct cuse_device *ctrlr_device;
1237 
1238 	pthread_mutex_lock(&g_cuse_mtx);
1239 
1240 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1241 	if (!ctrlr_device) {
1242 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1243 		pthread_mutex_unlock(&g_cuse_mtx);
1244 		return;
1245 	}
1246 
1247 	cuse_nvme_ctrlr_stop(ctrlr_device);
1248 
1249 	pthread_mutex_unlock(&g_cuse_mtx);
1250 }
1251 
1252 static void
1253 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
1254 {
1255 	struct cuse_device *ctrlr_device;
1256 
1257 	pthread_mutex_lock(&g_cuse_mtx);
1258 
1259 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1260 	if (!ctrlr_device) {
1261 		pthread_mutex_unlock(&g_cuse_mtx);
1262 		return;
1263 	}
1264 
1265 	cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
1266 
1267 	pthread_mutex_unlock(&g_cuse_mtx);
1268 }
1269 
1270 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
1271 	.name = "cuse",
1272 	.stop = nvme_cuse_stop,
1273 	.update = nvme_cuse_update,
1274 };
1275 
1276 int
1277 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1278 {
1279 	int rc;
1280 
1281 	rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1282 	if (rc) {
1283 		return rc;
1284 	}
1285 
1286 	pthread_mutex_lock(&g_cuse_mtx);
1287 
1288 	rc = nvme_cuse_start(ctrlr);
1289 	if (rc) {
1290 		nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1291 	}
1292 
1293 	pthread_mutex_unlock(&g_cuse_mtx);
1294 
1295 	return rc;
1296 }
1297 
1298 int
1299 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1300 {
1301 	struct cuse_device *ctrlr_device;
1302 
1303 	pthread_mutex_lock(&g_cuse_mtx);
1304 
1305 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1306 	if (!ctrlr_device) {
1307 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1308 		pthread_mutex_unlock(&g_cuse_mtx);
1309 		return -ENODEV;
1310 	}
1311 
1312 	cuse_nvme_ctrlr_stop(ctrlr_device);
1313 
1314 	pthread_mutex_unlock(&g_cuse_mtx);
1315 
1316 	nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1317 
1318 	return 0;
1319 }
1320 
1321 void
1322 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1323 {
1324 	nvme_cuse_update(ctrlr);
1325 }
1326 
1327 int
1328 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1329 {
1330 	struct cuse_device *ctrlr_device;
1331 	size_t req_len;
1332 
1333 	pthread_mutex_lock(&g_cuse_mtx);
1334 
1335 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1336 	if (!ctrlr_device) {
1337 		pthread_mutex_unlock(&g_cuse_mtx);
1338 		return -ENODEV;
1339 	}
1340 
1341 	req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1342 	if (*size < req_len) {
1343 		*size = req_len;
1344 		pthread_mutex_unlock(&g_cuse_mtx);
1345 		return -ENOSPC;
1346 	}
1347 	snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1348 
1349 	pthread_mutex_unlock(&g_cuse_mtx);
1350 
1351 	return 0;
1352 }
1353 
1354 int
1355 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1356 {
1357 	struct cuse_device *ns_device;
1358 	size_t req_len;
1359 
1360 	pthread_mutex_lock(&g_cuse_mtx);
1361 
1362 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1363 	if (!ns_device) {
1364 		pthread_mutex_unlock(&g_cuse_mtx);
1365 		return -ENODEV;
1366 	}
1367 
1368 	req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1369 	if (*size < req_len) {
1370 		*size = req_len;
1371 		pthread_mutex_unlock(&g_cuse_mtx);
1372 		return -ENOSPC;
1373 	}
1374 	snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1375 
1376 	pthread_mutex_unlock(&g_cuse_mtx);
1377 
1378 	return 0;
1379 }
1380 
1381 SPDK_LOG_REGISTER_COMPONENT(nvme_cuse)
1382 
1383 #else /* SPDK_CONFIG_NVME_CUSE */
1384 
1385 int
1386 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1387 {
1388 	SPDK_ERRLOG("spdk_nvme_cuse_get_ctrlr_name() is unsupported\n");
1389 	return -ENOTSUP;
1390 }
1391 
1392 int
1393 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1394 {
1395 	SPDK_ERRLOG("spdk_nvme_cuse_get_ns_name() is unsupported\n");
1396 	return -ENOTSUP;
1397 }
1398 
1399 int
1400 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1401 {
1402 	SPDK_ERRLOG("spdk_nvme_cuse_register() is unsupported\n");
1403 	return -ENOTSUP;
1404 }
1405 
1406 int
1407 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1408 {
1409 	SPDK_ERRLOG("spdk_nvme_cuse_unregister() is unsupported\n");
1410 	return -ENOTSUP;
1411 }
1412 
1413 void
1414 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1415 {
1416 	SPDK_ERRLOG("spdk_nvme_cuse_update_namespaces() is unsupported\n");
1417 }
1418 
1419 #endif
1420