xref: /spdk/lib/nvme/nvme_cuse.c (revision fecffda6ecf8853b82edccde429b68252f0a62c5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2019 Intel Corporation.
3  *   All rights reserved.
4  */
5 #include "spdk/stdinc.h"
6 #include "spdk/config.h"
7 #include "spdk/log.h"
8 #include "spdk/nvme.h"
9 
10 #ifdef SPDK_CONFIG_NVME_CUSE
11 #define FUSE_USE_VERSION 31
12 
13 #include <fuse3/cuse_lowlevel.h>
14 
15 #include <linux/nvme_ioctl.h>
16 #include <linux/fs.h>
17 
18 #include "nvme_internal.h"
19 #include "nvme_io_msg.h"
20 #include "nvme_cuse.h"
21 
22 struct cuse_device {
23 	char				dev_name[128];
24 	uint32_t			index;
25 	int				claim_fd;
26 	char				lock_name[64];
27 
28 	struct spdk_nvme_ctrlr		*ctrlr;		/**< NVMe controller */
29 	uint32_t			nsid;		/**< NVMe name space id, or 0 */
30 
31 	pthread_t			tid;
32 	struct fuse_session		*session;
33 
34 	struct cuse_device		*ctrlr_device;
35 	TAILQ_HEAD(, cuse_device)	ns_devices;
36 
37 	TAILQ_ENTRY(cuse_device)	tailq;
38 };
39 
40 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
41 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
42 static struct spdk_bit_array *g_ctrlr_started;
43 
44 struct cuse_io_ctx {
45 	struct spdk_nvme_cmd		nvme_cmd;
46 	enum spdk_nvme_data_transfer	data_transfer;
47 
48 	uint64_t			lba;
49 	uint32_t			lba_count;
50 	uint16_t			apptag;
51 	uint16_t			appmask;
52 
53 	void				*data;
54 	void				*metadata;
55 
56 	int				data_len;
57 	int				metadata_len;
58 
59 	fuse_req_t			req;
60 };
61 
62 static void
63 cuse_io_ctx_free(struct cuse_io_ctx *ctx)
64 {
65 	spdk_free(ctx->data);
66 	spdk_free(ctx->metadata);
67 	free(ctx);
68 }
69 
70 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val)		\
71 	if (out_bufsz == 0) {						\
72 		struct iovec out_iov;					\
73 		out_iov.iov_base = (void *)arg;				\
74 		out_iov.iov_len = sizeof(val);				\
75 		fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1);	\
76 		return;							\
77 	}
78 
79 #define FUSE_MAX_SIZE 128*1024
80 
81 static bool
82 fuse_check_req_size(fuse_req_t req, struct iovec iov[], int iovcnt)
83 {
84 	int total_iov_len = 0;
85 	for (int i = 0; i < iovcnt; i++) {
86 		total_iov_len += iov[i].iov_len;
87 		if (total_iov_len > FUSE_MAX_SIZE) {
88 			fuse_reply_err(req, ENOMEM);
89 			SPDK_ERRLOG("FUSE request cannot be larger that %d\n", FUSE_MAX_SIZE);
90 			return false;
91 		}
92 	}
93 	return true;
94 }
95 
96 static void
97 cuse_nvme_passthru_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
98 {
99 	struct cuse_io_ctx *ctx = arg;
100 	struct iovec out_iov[3];
101 	struct spdk_nvme_cpl _cpl;
102 	int out_iovcnt = 0;
103 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
104 
105 	memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
106 	out_iov[out_iovcnt].iov_base = &_cpl.cdw0;
107 	out_iov[out_iovcnt].iov_len = sizeof(_cpl.cdw0);
108 	out_iovcnt += 1;
109 
110 	if (ctx->data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
111 		if (ctx->data_len > 0) {
112 			out_iov[out_iovcnt].iov_base = ctx->data;
113 			out_iov[out_iovcnt].iov_len = ctx->data_len;
114 			out_iovcnt += 1;
115 		}
116 		if (ctx->metadata_len > 0) {
117 			out_iov[out_iovcnt].iov_base = ctx->metadata;
118 			out_iov[out_iovcnt].iov_len = ctx->metadata_len;
119 			out_iovcnt += 1;
120 		}
121 	}
122 
123 	fuse_reply_ioctl_iov(ctx->req, status_field, out_iov, out_iovcnt);
124 	cuse_io_ctx_free(ctx);
125 }
126 
127 static void
128 cuse_nvme_passthru_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
129 {
130 	int rc;
131 	struct cuse_io_ctx *ctx = arg;
132 
133 	if (nsid != 0) {
134 		rc = spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, ctrlr->external_io_msgs_qpair, &ctx->nvme_cmd,
135 							ctx->data,
136 							ctx->data_len, ctx->metadata, cuse_nvme_passthru_cmd_cb, (void *)ctx);
137 	} else {
138 		rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
139 						   cuse_nvme_passthru_cmd_cb, (void *)ctx);
140 	}
141 	if (rc < 0) {
142 		fuse_reply_err(ctx->req, EINVAL);
143 		cuse_io_ctx_free(ctx);
144 	}
145 }
146 
147 static void
148 cuse_nvme_passthru_cmd_send(fuse_req_t req, struct nvme_passthru_cmd *passthru_cmd,
149 			    const void *data, const void *metadata, int cmd)
150 {
151 	struct cuse_io_ctx *ctx;
152 	struct cuse_device *cuse_device = fuse_req_userdata(req);
153 	int rv;
154 
155 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
156 	if (!ctx) {
157 		SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
158 		fuse_reply_err(req, ENOMEM);
159 		return;
160 	}
161 
162 	ctx->req = req;
163 	ctx->data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
164 
165 	memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
166 	ctx->nvme_cmd.opc = passthru_cmd->opcode;
167 	ctx->nvme_cmd.nsid = passthru_cmd->nsid;
168 	ctx->nvme_cmd.cdw10 = passthru_cmd->cdw10;
169 	ctx->nvme_cmd.cdw11 = passthru_cmd->cdw11;
170 	ctx->nvme_cmd.cdw12 = passthru_cmd->cdw12;
171 	ctx->nvme_cmd.cdw13 = passthru_cmd->cdw13;
172 	ctx->nvme_cmd.cdw14 = passthru_cmd->cdw14;
173 	ctx->nvme_cmd.cdw15 = passthru_cmd->cdw15;
174 
175 	ctx->data_len = passthru_cmd->data_len;
176 	ctx->metadata_len = passthru_cmd->metadata_len;
177 
178 	if (ctx->data_len > 0) {
179 		ctx->data = spdk_malloc(ctx->data_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
180 		if (!ctx->data) {
181 			SPDK_ERRLOG("Cannot allocate memory for data\n");
182 			fuse_reply_err(req, ENOMEM);
183 			free(ctx);
184 			return;
185 		}
186 		if (data != NULL) {
187 			memcpy(ctx->data, data, ctx->data_len);
188 		}
189 	}
190 
191 	if (ctx->metadata_len > 0) {
192 		ctx->metadata = spdk_malloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
193 		if (!ctx->metadata) {
194 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
195 			fuse_reply_err(req, ENOMEM);
196 			cuse_io_ctx_free(ctx);
197 			return;
198 		}
199 		if (metadata != NULL) {
200 			memcpy(ctx->metadata, metadata, ctx->metadata_len);
201 		}
202 	}
203 
204 	if ((unsigned int)cmd != NVME_IOCTL_ADMIN_CMD) {
205 		/* Send NS for IO IOCTLs */
206 		rv = nvme_io_msg_send(cuse_device->ctrlr, passthru_cmd->nsid, cuse_nvme_passthru_cmd_execute, ctx);
207 	} else {
208 		/* NS == 0 for Admin IOCTLs */
209 		rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_passthru_cmd_execute, ctx);
210 	}
211 	if (rv) {
212 		SPDK_ERRLOG("Cannot send io msg to the controller\n");
213 		fuse_reply_err(req, -rv);
214 		cuse_io_ctx_free(ctx);
215 		return;
216 	}
217 }
218 
219 static void
220 cuse_nvme_passthru_cmd(fuse_req_t req, int cmd, void *arg,
221 		       struct fuse_file_info *fi, unsigned flags,
222 		       const void *in_buf, size_t in_bufsz, size_t out_bufsz)
223 {
224 	struct nvme_passthru_cmd *passthru_cmd;
225 	struct iovec in_iov[3], out_iov[3];
226 	int in_iovcnt = 0, out_iovcnt = 0;
227 	const void *dptr = NULL, *mdptr = NULL;
228 	enum spdk_nvme_data_transfer data_transfer;
229 
230 	in_iov[in_iovcnt].iov_base = (void *)arg;
231 	in_iov[in_iovcnt].iov_len = sizeof(*passthru_cmd);
232 	in_iovcnt += 1;
233 	if (in_bufsz == 0) {
234 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
235 		return;
236 	}
237 
238 	passthru_cmd = (struct nvme_passthru_cmd *)in_buf;
239 	data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
240 
241 	if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
242 		/* Make data pointer accessible (RO) */
243 		if (passthru_cmd->addr != 0) {
244 			in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->addr;
245 			in_iov[in_iovcnt].iov_len = passthru_cmd->data_len;
246 			in_iovcnt += 1;
247 		}
248 		/* Make metadata pointer accessible (RO) */
249 		if (passthru_cmd->metadata != 0) {
250 			in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->metadata;
251 			in_iov[in_iovcnt].iov_len = passthru_cmd->metadata_len;
252 			in_iovcnt += 1;
253 		}
254 	}
255 
256 	if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
257 		return;
258 	}
259 	/* Always make result field writeable regardless of data transfer bits */
260 	out_iov[out_iovcnt].iov_base = &((struct nvme_passthru_cmd *)arg)->result;
261 	out_iov[out_iovcnt].iov_len = sizeof(uint32_t);
262 	out_iovcnt += 1;
263 
264 	if (data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
265 		/* Make data pointer accessible (WO) */
266 		if (passthru_cmd->data_len > 0) {
267 			out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->addr;
268 			out_iov[out_iovcnt].iov_len = passthru_cmd->data_len;
269 			out_iovcnt += 1;
270 		}
271 		/* Make metadata pointer accessible (WO) */
272 		if (passthru_cmd->metadata_len > 0) {
273 			out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->metadata;
274 			out_iov[out_iovcnt].iov_len = passthru_cmd->metadata_len;
275 			out_iovcnt += 1;
276 		}
277 	}
278 
279 	if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
280 		return;
281 	}
282 
283 	if (out_bufsz == 0) {
284 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
285 		return;
286 	}
287 
288 	if (data_transfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
289 		fuse_reply_err(req, EINVAL);
290 		return;
291 	}
292 
293 	if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
294 		dptr = (passthru_cmd->addr == 0) ? NULL : in_buf + sizeof(*passthru_cmd);
295 		mdptr = (passthru_cmd->metadata == 0) ? NULL : in_buf + sizeof(*passthru_cmd) +
296 			passthru_cmd->data_len;
297 	}
298 
299 	cuse_nvme_passthru_cmd_send(req, passthru_cmd, dptr, mdptr, cmd);
300 }
301 
302 static void
303 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
304 {
305 	int rc;
306 	fuse_req_t req = arg;
307 
308 	rc = spdk_nvme_ctrlr_reset(ctrlr);
309 	if (rc) {
310 		fuse_reply_err(req, rc);
311 		return;
312 	}
313 
314 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
315 }
316 
317 static void
318 cuse_nvme_subsys_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
319 {
320 	int rc;
321 	fuse_req_t req = arg;
322 
323 	rc = spdk_nvme_ctrlr_reset_subsystem(ctrlr);
324 	if (rc) {
325 		fuse_reply_err(req, rc);
326 		return;
327 	}
328 
329 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
330 }
331 
332 static void
333 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
334 		struct fuse_file_info *fi, unsigned flags,
335 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
336 {
337 	int rv;
338 	struct cuse_device *cuse_device = fuse_req_userdata(req);
339 
340 	if (cuse_device->nsid) {
341 		SPDK_ERRLOG("Namespace reset not supported\n");
342 		fuse_reply_err(req, EINVAL);
343 		return;
344 	}
345 
346 	if (cmd == NVME_IOCTL_SUBSYS_RESET) {
347 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBSYS_RESET\n");
348 		rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_subsys_reset_execute,
349 				      (void *)req);
350 	} else {
351 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESET\n");
352 		rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
353 	}
354 	if (rv) {
355 		SPDK_ERRLOG("Cannot send reset\n");
356 		fuse_reply_err(req, EINVAL);
357 	}
358 }
359 
360 static void
361 cuse_nvme_rescan_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
362 {
363 	fuse_req_t req = arg;
364 
365 	nvme_ctrlr_update_namespaces(ctrlr);
366 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
367 }
368 
369 static void
370 cuse_nvme_rescan(fuse_req_t req, int cmd, void *arg,
371 		 struct fuse_file_info *fi, unsigned flags,
372 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
373 {
374 	int rv;
375 	struct cuse_device *cuse_device = fuse_req_userdata(req);
376 
377 	if (cuse_device->nsid) {
378 		SPDK_ERRLOG("Namespace rescan not supported\n");
379 		fuse_reply_err(req, EINVAL);
380 		return;
381 	}
382 
383 	rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_rescan_execute, (void *)req);
384 	if (rv) {
385 		SPDK_ERRLOG("Cannot send rescan\n");
386 		fuse_reply_err(req, EINVAL);
387 	}
388 }
389 
390 /*****************************************************************************
391  * Namespace IO requests
392  */
393 
394 static void
395 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
396 {
397 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
398 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
399 
400 	fuse_reply_ioctl_iov(ctx->req, status_field, NULL, 0);
401 
402 	cuse_io_ctx_free(ctx);
403 }
404 
405 static void
406 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
407 {
408 	int rc;
409 	struct cuse_io_ctx *ctx = arg;
410 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
411 
412 	rc = spdk_nvme_ns_cmd_write_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
413 					    ctx->lba, /* LBA start */
414 					    ctx->lba_count, /* number of LBAs */
415 					    cuse_nvme_submit_io_write_done, ctx, 0,
416 					    ctx->appmask, ctx->apptag);
417 
418 	if (rc != 0) {
419 		SPDK_ERRLOG("write failed: rc = %d\n", rc);
420 		fuse_reply_err(ctx->req, rc);
421 		cuse_io_ctx_free(ctx);
422 		return;
423 	}
424 }
425 
426 static void
427 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
428 			  struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
429 			  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
430 {
431 	const struct nvme_user_io *user_io = in_buf;
432 	struct cuse_io_ctx *ctx;
433 	int rc;
434 
435 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
436 	if (!ctx) {
437 		SPDK_ERRLOG("Cannot allocate memory for context\n");
438 		fuse_reply_err(req, ENOMEM);
439 		return;
440 	}
441 
442 	ctx->req = req;
443 	ctx->lba = user_io->slba;
444 	ctx->lba_count = user_io->nblocks + 1;
445 	ctx->data_len = ctx->lba_count * block_size;
446 
447 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
448 				 SPDK_MALLOC_DMA);
449 	if (ctx->data == NULL) {
450 		SPDK_ERRLOG("Write buffer allocation failed\n");
451 		fuse_reply_err(ctx->req, ENOMEM);
452 		free(ctx);
453 		return;
454 	}
455 
456 	memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len);
457 
458 	if (user_io->metadata) {
459 		ctx->apptag = user_io->apptag;
460 		ctx->appmask = user_io->appmask;
461 		ctx->metadata_len = md_size * ctx->lba_count;
462 		ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
463 
464 		if (ctx->metadata == NULL) {
465 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
466 			if (ctx->metadata_len == 0) {
467 				SPDK_ERRLOG("Device format does not support metadata\n");
468 			}
469 			fuse_reply_err(req, ENOMEM);
470 			cuse_io_ctx_free(ctx);
471 			return;
472 		}
473 
474 		memcpy(ctx->metadata, in_buf + sizeof(*user_io) + ctx->data_len, ctx->metadata_len);
475 	}
476 
477 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
478 			      ctx);
479 	if (rc < 0) {
480 		SPDK_ERRLOG("Cannot send write io\n");
481 		fuse_reply_err(ctx->req, rc);
482 		cuse_io_ctx_free(ctx);
483 	}
484 }
485 
486 static void
487 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
488 {
489 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
490 	struct iovec iov[2];
491 	int iovcnt = 0;
492 	uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
493 
494 	iov[iovcnt].iov_base = ctx->data;
495 	iov[iovcnt].iov_len = ctx->data_len;
496 	iovcnt += 1;
497 
498 	if (ctx->metadata) {
499 		iov[iovcnt].iov_base = ctx->metadata;
500 		iov[iovcnt].iov_len = ctx->metadata_len;
501 		iovcnt += 1;
502 	}
503 
504 	fuse_reply_ioctl_iov(ctx->req, status_field, iov, iovcnt);
505 
506 	cuse_io_ctx_free(ctx);
507 }
508 
509 static void
510 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
511 {
512 	int rc;
513 	struct cuse_io_ctx *ctx = arg;
514 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
515 
516 	rc = spdk_nvme_ns_cmd_read_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
517 					   ctx->lba, /* LBA start */
518 					   ctx->lba_count, /* number of LBAs */
519 					   cuse_nvme_submit_io_read_done, ctx, 0,
520 					   ctx->appmask, ctx->apptag);
521 
522 	if (rc != 0) {
523 		SPDK_ERRLOG("read failed: rc = %d\n", rc);
524 		fuse_reply_err(ctx->req, rc);
525 		cuse_io_ctx_free(ctx);
526 		return;
527 	}
528 }
529 
530 static void
531 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
532 			 struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
533 			 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
534 {
535 	int rc;
536 	struct cuse_io_ctx *ctx;
537 	const struct nvme_user_io *user_io = in_buf;
538 
539 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
540 	if (!ctx) {
541 		SPDK_ERRLOG("Cannot allocate memory for context\n");
542 		fuse_reply_err(req, ENOMEM);
543 		return;
544 	}
545 
546 	ctx->req = req;
547 	ctx->lba = user_io->slba;
548 	ctx->lba_count = user_io->nblocks + 1;
549 
550 	ctx->data_len = ctx->lba_count * block_size;
551 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
552 				 SPDK_MALLOC_DMA);
553 	if (ctx->data == NULL) {
554 		SPDK_ERRLOG("Read buffer allocation failed\n");
555 		fuse_reply_err(ctx->req, ENOMEM);
556 		free(ctx);
557 		return;
558 	}
559 
560 	if (user_io->metadata) {
561 		ctx->apptag = user_io->apptag;
562 		ctx->appmask = user_io->appmask;
563 		ctx->metadata_len = md_size * ctx->lba_count;
564 		ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
565 
566 		if (ctx->metadata == NULL) {
567 			SPDK_ERRLOG("Cannot allocate memory for metadata\n");
568 			if (ctx->metadata_len == 0) {
569 				SPDK_ERRLOG("Device format does not support metadata\n");
570 			}
571 			fuse_reply_err(req, ENOMEM);
572 			cuse_io_ctx_free(ctx);
573 			return;
574 		}
575 	}
576 
577 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
578 	if (rc < 0) {
579 		SPDK_ERRLOG("Cannot send read io\n");
580 		fuse_reply_err(ctx->req, rc);
581 		cuse_io_ctx_free(ctx);
582 	}
583 }
584 
585 
586 static void
587 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
588 		    struct fuse_file_info *fi, unsigned flags,
589 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
590 {
591 	const struct nvme_user_io *user_io;
592 	struct iovec in_iov[3], out_iov[2];
593 	int in_iovcnt = 0, out_iovcnt = 0;
594 	struct cuse_device *cuse_device = fuse_req_userdata(req);
595 	struct spdk_nvme_ns *ns;
596 	uint32_t block_size;
597 	uint32_t md_size;
598 
599 	in_iov[in_iovcnt].iov_base = (void *)arg;
600 	in_iov[in_iovcnt].iov_len = sizeof(*user_io);
601 	in_iovcnt += 1;
602 	if (in_bufsz == 0) {
603 		fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, 0);
604 		return;
605 	}
606 
607 	user_io = in_buf;
608 
609 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
610 	block_size = spdk_nvme_ns_get_sector_size(ns);
611 	md_size = spdk_nvme_ns_get_md_size(ns);
612 
613 	switch (user_io->opcode) {
614 	case SPDK_NVME_OPC_READ:
615 		out_iov[out_iovcnt].iov_base = (void *)user_io->addr;
616 		out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
617 		out_iovcnt += 1;
618 		if (user_io->metadata != 0) {
619 			out_iov[out_iovcnt].iov_base = (void *)user_io->metadata;
620 			out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
621 			out_iovcnt += 1;
622 		}
623 		if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
624 			return;
625 		}
626 		if (out_bufsz == 0) {
627 			fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
628 			return;
629 		}
630 
631 		cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags,
632 					 block_size, md_size, in_buf, in_bufsz, out_bufsz);
633 		break;
634 	case SPDK_NVME_OPC_WRITE:
635 		in_iov[in_iovcnt].iov_base = (void *)user_io->addr;
636 		in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
637 		in_iovcnt += 1;
638 		if (user_io->metadata != 0) {
639 			in_iov[in_iovcnt].iov_base = (void *)user_io->metadata;
640 			in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
641 			in_iovcnt += 1;
642 		}
643 		if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
644 			return;
645 		}
646 		if (in_bufsz == sizeof(*user_io)) {
647 			fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
648 			return;
649 		}
650 
651 		cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags,
652 					  block_size, md_size, in_buf, in_bufsz, out_bufsz);
653 		break;
654 	default:
655 		SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
656 		fuse_reply_err(req, EINVAL);
657 		return;
658 	}
659 
660 }
661 
662 /*****************************************************************************
663  * Other namespace IOCTLs
664  */
665 static void
666 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
667 		  struct fuse_file_info *fi, unsigned flags,
668 		  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
669 {
670 	uint64_t size;
671 	struct spdk_nvme_ns *ns;
672 	struct cuse_device *cuse_device = fuse_req_userdata(req);
673 
674 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
675 
676 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
677 	size = spdk_nvme_ns_get_num_sectors(ns);
678 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
679 }
680 
681 static void
682 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
683 		struct fuse_file_info *fi, unsigned flags,
684 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
685 {
686 	int pbsz;
687 	struct spdk_nvme_ns *ns;
688 	struct cuse_device *cuse_device = fuse_req_userdata(req);
689 
690 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
691 
692 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
693 	pbsz = spdk_nvme_ns_get_sector_size(ns);
694 	fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
695 }
696 
697 static void
698 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
699 		struct fuse_file_info *fi, unsigned flags,
700 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
701 {
702 	long size;
703 	struct spdk_nvme_ns *ns;
704 	struct cuse_device *cuse_device = fuse_req_userdata(req);
705 
706 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
707 
708 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
709 
710 	/* return size in 512 bytes blocks */
711 	size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
712 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
713 }
714 
715 static void
716 cuse_blkgetsectorsize(fuse_req_t req, int cmd, void *arg,
717 		      struct fuse_file_info *fi, unsigned flags,
718 		      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
719 {
720 	int ssize;
721 	struct spdk_nvme_ns *ns;
722 	struct cuse_device *cuse_device = fuse_req_userdata(req);
723 
724 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, ssize);
725 
726 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
727 	ssize = spdk_nvme_ns_get_sector_size(ns);
728 	fuse_reply_ioctl(req, 0, &ssize, sizeof(ssize));
729 }
730 
731 static void
732 cuse_getid(fuse_req_t req, int cmd, void *arg,
733 	   struct fuse_file_info *fi, unsigned flags,
734 	   const void *in_buf, size_t in_bufsz, size_t out_bufsz)
735 {
736 	struct cuse_device *cuse_device = fuse_req_userdata(req);
737 
738 	fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
739 }
740 
741 static void
742 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
743 		 struct fuse_file_info *fi, unsigned flags,
744 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
745 {
746 	if (flags & FUSE_IOCTL_COMPAT) {
747 		fuse_reply_err(req, ENOSYS);
748 		return;
749 	}
750 
751 	switch ((unsigned int)cmd) {
752 	case NVME_IOCTL_ADMIN_CMD:
753 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
754 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
755 		break;
756 
757 	case NVME_IOCTL_RESET:
758 	case NVME_IOCTL_SUBSYS_RESET:
759 		cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
760 		break;
761 
762 	case NVME_IOCTL_RESCAN:
763 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESCAN\n");
764 		cuse_nvme_rescan(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
765 		break;
766 
767 	default:
768 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
769 		fuse_reply_err(req, ENOTTY);
770 	}
771 }
772 
773 static void
774 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
775 	      struct fuse_file_info *fi, unsigned flags,
776 	      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
777 {
778 	if (flags & FUSE_IOCTL_COMPAT) {
779 		fuse_reply_err(req, ENOSYS);
780 		return;
781 	}
782 
783 	switch ((unsigned int)cmd) {
784 	case NVME_IOCTL_ADMIN_CMD:
785 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
786 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
787 		break;
788 
789 	case NVME_IOCTL_SUBMIT_IO:
790 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBMIT_IO\n");
791 		cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
792 		break;
793 
794 	case NVME_IOCTL_IO_CMD:
795 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_IO_CMD\n");
796 		cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
797 		break;
798 
799 	case NVME_IOCTL_ID:
800 		SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ID\n");
801 		cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
802 		break;
803 
804 	case BLKPBSZGET:
805 		SPDK_DEBUGLOG(nvme_cuse, "BLKPBSZGET\n");
806 		cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
807 		break;
808 
809 	case BLKSSZGET:
810 		SPDK_DEBUGLOG(nvme_cuse, "BLKSSZGET\n");
811 		cuse_blkgetsectorsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
812 		break;
813 
814 	case BLKGETSIZE:
815 		SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE\n");
816 		/* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
817 		cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
818 		break;
819 
820 	case BLKGETSIZE64:
821 		SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE64\n");
822 		/* Returns the device size in sectors (returns pointer to uint64_t) */
823 		cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
824 		break;
825 
826 	default:
827 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
828 		fuse_reply_err(req, ENOTTY);
829 	}
830 }
831 
832 /*****************************************************************************
833  * CUSE threads initialization.
834  */
835 
836 static void
837 cuse_open(fuse_req_t req, struct fuse_file_info *fi)
838 {
839 	fuse_reply_open(req, fi);
840 }
841 
842 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
843 	.open		= cuse_open,
844 	.ioctl		= cuse_ctrlr_ioctl,
845 };
846 
847 static const struct cuse_lowlevel_ops cuse_ns_clop = {
848 	.open		= cuse_open,
849 	.ioctl		= cuse_ns_ioctl,
850 };
851 
852 static int
853 cuse_session_create(struct cuse_device *cuse_device)
854 {
855 	char *cuse_argv[] = { "cuse", "-f" };
856 	int multithreaded;
857 	int cuse_argc = SPDK_COUNTOF(cuse_argv);
858 	struct cuse_info ci;
859 	char devname_arg[128 + 8];
860 	const char *dev_info_argv[] = { devname_arg };
861 
862 	snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
863 
864 	memset(&ci, 0, sizeof(ci));
865 	ci.dev_info_argc = 1;
866 	ci.dev_info_argv = dev_info_argv;
867 	ci.flags = CUSE_UNRESTRICTED_IOCTL;
868 
869 	if (cuse_device->nsid) {
870 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
871 				       &multithreaded, cuse_device);
872 	} else {
873 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
874 				       &multithreaded, cuse_device);
875 	}
876 
877 	if (!cuse_device->session) {
878 		SPDK_ERRLOG("Cannot create cuse session\n");
879 		return -1;
880 	}
881 	SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
882 	return 0;
883 }
884 
885 static void *
886 cuse_thread(void *arg)
887 {
888 	struct cuse_device *cuse_device = arg;
889 	int rc;
890 	struct fuse_buf buf = { .mem = NULL };
891 	struct pollfd fds;
892 	int timeout_msecs = 500;
893 
894 	spdk_unaffinitize_thread();
895 
896 	/* Receive and process fuse requests */
897 	fds.fd = fuse_session_fd(cuse_device->session);
898 	fds.events = POLLIN;
899 	while (!fuse_session_exited(cuse_device->session)) {
900 		rc = poll(&fds, 1, timeout_msecs);
901 		if (rc <= 0) {
902 			continue;
903 		}
904 		rc = fuse_session_receive_buf(cuse_device->session, &buf);
905 		if (rc > 0) {
906 			fuse_session_process_buf(cuse_device->session, &buf);
907 		}
908 	}
909 	free(buf.mem);
910 	fuse_session_reset(cuse_device->session);
911 	pthread_exit(NULL);
912 }
913 
914 static struct cuse_device *nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr,
915 		uint32_t nsid);
916 
917 /*****************************************************************************
918  * CUSE devices management
919  */
920 
921 static int
922 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
923 {
924 	struct cuse_device *ns_device;
925 	int rv;
926 
927 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr_device->ctrlr, nsid);
928 	if (ns_device != NULL) {
929 		return 0;
930 	}
931 
932 	ns_device = calloc(1, sizeof(struct cuse_device));
933 	if (ns_device == NULL) {
934 		return -ENOMEM;
935 	}
936 
937 	ns_device->ctrlr = ctrlr_device->ctrlr;
938 	ns_device->ctrlr_device = ctrlr_device;
939 	ns_device->nsid = nsid;
940 	rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
941 		      ctrlr_device->dev_name, ns_device->nsid);
942 	if (rv < 0) {
943 		SPDK_ERRLOG("Device name too long.\n");
944 		free(ns_device);
945 		return -ENAMETOOLONG;
946 	}
947 	rv = cuse_session_create(ns_device);
948 	if (rv != 0) {
949 		free(ns_device);
950 		return rv;
951 	}
952 	rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
953 	if (rv != 0) {
954 		SPDK_ERRLOG("pthread_create failed\n");
955 		free(ns_device);
956 		return -rv;
957 	}
958 	TAILQ_INSERT_TAIL(&ctrlr_device->ns_devices, ns_device, tailq);
959 
960 	return 0;
961 }
962 
963 static void
964 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, struct cuse_device *ns_device)
965 {
966 	if (ns_device->session != NULL) {
967 		fuse_session_exit(ns_device->session);
968 	}
969 	pthread_join(ns_device->tid, NULL);
970 	TAILQ_REMOVE(&ctrlr_device->ns_devices, ns_device, tailq);
971 	if (ns_device->session != NULL) {
972 		cuse_lowlevel_teardown(ns_device->session);
973 	}
974 	free(ns_device);
975 }
976 
977 static int
978 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
979 {
980 	int dev_fd;
981 	int pid;
982 	void *dev_map;
983 	struct flock cusedev_lock = {
984 		.l_type = F_WRLCK,
985 		.l_whence = SEEK_SET,
986 		.l_start = 0,
987 		.l_len = 0,
988 	};
989 
990 	snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
991 		 "/var/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
992 
993 	dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
994 	if (dev_fd == -1) {
995 		SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
996 		return -errno;
997 	}
998 
999 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
1000 		SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
1001 		close(dev_fd);
1002 		return -errno;
1003 	}
1004 
1005 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
1006 		       MAP_SHARED, dev_fd, 0);
1007 	if (dev_map == MAP_FAILED) {
1008 		SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
1009 		close(dev_fd);
1010 		return -errno;
1011 	}
1012 
1013 	if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
1014 		pid = *(int *)dev_map;
1015 		SPDK_ERRLOG("Cannot create lock on device %s, probably"
1016 			    " process %d has claimed it\n", ctrlr_device->lock_name, pid);
1017 		munmap(dev_map, sizeof(int));
1018 		close(dev_fd);
1019 		/* F_SETLK returns unspecified errnos, normalize them */
1020 		return -EACCES;
1021 	}
1022 
1023 	*(int *)dev_map = (int)getpid();
1024 	munmap(dev_map, sizeof(int));
1025 	ctrlr_device->claim_fd = dev_fd;
1026 	ctrlr_device->index = index;
1027 	/* Keep dev_fd open to maintain the lock. */
1028 	return 0;
1029 }
1030 
1031 static void
1032 nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
1033 {
1034 	close(ctrlr_device->claim_fd);
1035 	ctrlr_device->claim_fd = -1;
1036 	unlink(ctrlr_device->lock_name);
1037 }
1038 
1039 static void
1040 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
1041 {
1042 	struct cuse_device *ns_device, *tmp;
1043 
1044 	TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1045 		cuse_nvme_ns_stop(ctrlr_device, ns_device);
1046 	}
1047 
1048 	assert(TAILQ_EMPTY(&ctrlr_device->ns_devices));
1049 
1050 	fuse_session_exit(ctrlr_device->session);
1051 	pthread_join(ctrlr_device->tid, NULL);
1052 	TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1053 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1054 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1055 		spdk_bit_array_free(&g_ctrlr_started);
1056 	}
1057 	nvme_cuse_unclaim(ctrlr_device);
1058 	if (ctrlr_device->session != NULL) {
1059 		cuse_lowlevel_teardown(ctrlr_device->session);
1060 	}
1061 	free(ctrlr_device);
1062 }
1063 
1064 static int
1065 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
1066 {
1067 	struct cuse_device *ns_device, *tmp;
1068 	uint32_t nsid;
1069 
1070 	/* Remove namespaces that have disappeared */
1071 	TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1072 		if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, ns_device->nsid)) {
1073 			cuse_nvme_ns_stop(ctrlr_device, ns_device);
1074 		}
1075 	}
1076 
1077 	/* Add new namespaces */
1078 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr_device->ctrlr);
1079 	while (nsid != 0) {
1080 		if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
1081 			SPDK_ERRLOG("Cannot start CUSE namespace device.");
1082 			return -1;
1083 		}
1084 
1085 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr_device->ctrlr, nsid);
1086 	}
1087 
1088 	return 0;
1089 }
1090 
1091 static int
1092 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
1093 {
1094 	int rv = 0;
1095 	struct cuse_device *ctrlr_device;
1096 
1097 	SPDK_NOTICELOG("Creating cuse device for controller\n");
1098 
1099 	if (g_ctrlr_started == NULL) {
1100 		g_ctrlr_started = spdk_bit_array_create(128);
1101 		if (g_ctrlr_started == NULL) {
1102 			SPDK_ERRLOG("Cannot create bit array\n");
1103 			return -ENOMEM;
1104 		}
1105 	}
1106 
1107 	ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
1108 	if (!ctrlr_device) {
1109 		SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
1110 		rv = -ENOMEM;
1111 		goto free_device;
1112 	}
1113 
1114 	ctrlr_device->ctrlr = ctrlr;
1115 
1116 	/* Check if device already exists, if not increment index until success */
1117 	ctrlr_device->index = 0;
1118 	while (1) {
1119 		ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
1120 		if (ctrlr_device->index == UINT32_MAX) {
1121 			SPDK_ERRLOG("Too many registered controllers\n");
1122 			goto free_device;
1123 		}
1124 
1125 		if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
1126 			break;
1127 		}
1128 		ctrlr_device->index++;
1129 	}
1130 	spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
1131 	snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
1132 		 ctrlr_device->index);
1133 
1134 	rv = cuse_session_create(ctrlr_device);
1135 	if (rv != 0) {
1136 		goto clear_and_free;
1137 	}
1138 
1139 	rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
1140 	if (rv != 0) {
1141 		SPDK_ERRLOG("pthread_create failed\n");
1142 		rv = -rv;
1143 		goto clear_and_free;
1144 	}
1145 
1146 	TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1147 
1148 	TAILQ_INIT(&ctrlr_device->ns_devices);
1149 
1150 	/* Start all active namespaces */
1151 	if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
1152 		SPDK_ERRLOG("Cannot start CUSE namespace devices.");
1153 		cuse_nvme_ctrlr_stop(ctrlr_device);
1154 		rv = -1;
1155 		goto clear_and_free;
1156 	}
1157 
1158 	return 0;
1159 
1160 clear_and_free:
1161 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1162 free_device:
1163 	free(ctrlr_device);
1164 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1165 		spdk_bit_array_free(&g_ctrlr_started);
1166 	}
1167 	return rv;
1168 }
1169 
1170 static struct cuse_device *
1171 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
1172 {
1173 	struct cuse_device *ctrlr_device = NULL;
1174 
1175 	TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
1176 		if (ctrlr_device->ctrlr == ctrlr) {
1177 			break;
1178 		}
1179 	}
1180 
1181 	return ctrlr_device;
1182 }
1183 
1184 static struct cuse_device *
1185 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1186 {
1187 	struct cuse_device *ctrlr_device = NULL;
1188 	struct cuse_device *ns_device;
1189 
1190 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1191 	if (!ctrlr_device) {
1192 		return NULL;
1193 	}
1194 
1195 	TAILQ_FOREACH(ns_device, &ctrlr_device->ns_devices, tailq) {
1196 		if (ns_device->nsid == nsid) {
1197 			return ns_device;
1198 		}
1199 	}
1200 
1201 	return NULL;
1202 }
1203 
1204 static void
1205 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
1206 {
1207 	struct cuse_device *ctrlr_device;
1208 
1209 	pthread_mutex_lock(&g_cuse_mtx);
1210 
1211 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1212 	if (!ctrlr_device) {
1213 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1214 		pthread_mutex_unlock(&g_cuse_mtx);
1215 		return;
1216 	}
1217 
1218 	cuse_nvme_ctrlr_stop(ctrlr_device);
1219 
1220 	pthread_mutex_unlock(&g_cuse_mtx);
1221 }
1222 
1223 static void
1224 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
1225 {
1226 	struct cuse_device *ctrlr_device;
1227 
1228 	pthread_mutex_lock(&g_cuse_mtx);
1229 
1230 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1231 	if (!ctrlr_device) {
1232 		pthread_mutex_unlock(&g_cuse_mtx);
1233 		return;
1234 	}
1235 
1236 	cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
1237 
1238 	pthread_mutex_unlock(&g_cuse_mtx);
1239 }
1240 
1241 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
1242 	.name = "cuse",
1243 	.stop = nvme_cuse_stop,
1244 	.update = nvme_cuse_update,
1245 };
1246 
1247 int
1248 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1249 {
1250 	int rc;
1251 
1252 	rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1253 	if (rc) {
1254 		return rc;
1255 	}
1256 
1257 	pthread_mutex_lock(&g_cuse_mtx);
1258 
1259 	rc = nvme_cuse_start(ctrlr);
1260 	if (rc) {
1261 		nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1262 	}
1263 
1264 	pthread_mutex_unlock(&g_cuse_mtx);
1265 
1266 	return rc;
1267 }
1268 
1269 int
1270 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1271 {
1272 	struct cuse_device *ctrlr_device;
1273 
1274 	pthread_mutex_lock(&g_cuse_mtx);
1275 
1276 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1277 	if (!ctrlr_device) {
1278 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1279 		pthread_mutex_unlock(&g_cuse_mtx);
1280 		return -ENODEV;
1281 	}
1282 
1283 	cuse_nvme_ctrlr_stop(ctrlr_device);
1284 
1285 	pthread_mutex_unlock(&g_cuse_mtx);
1286 
1287 	nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1288 
1289 	return 0;
1290 }
1291 
1292 void
1293 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1294 {
1295 	nvme_cuse_update(ctrlr);
1296 }
1297 
1298 int
1299 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1300 {
1301 	struct cuse_device *ctrlr_device;
1302 	size_t req_len;
1303 
1304 	pthread_mutex_lock(&g_cuse_mtx);
1305 
1306 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1307 	if (!ctrlr_device) {
1308 		pthread_mutex_unlock(&g_cuse_mtx);
1309 		return -ENODEV;
1310 	}
1311 
1312 	req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1313 	if (*size < req_len) {
1314 		*size = req_len;
1315 		pthread_mutex_unlock(&g_cuse_mtx);
1316 		return -ENOSPC;
1317 	}
1318 	snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1319 
1320 	pthread_mutex_unlock(&g_cuse_mtx);
1321 
1322 	return 0;
1323 }
1324 
1325 int
1326 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1327 {
1328 	struct cuse_device *ns_device;
1329 	size_t req_len;
1330 
1331 	pthread_mutex_lock(&g_cuse_mtx);
1332 
1333 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1334 	if (!ns_device) {
1335 		pthread_mutex_unlock(&g_cuse_mtx);
1336 		return -ENODEV;
1337 	}
1338 
1339 	req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1340 	if (*size < req_len) {
1341 		*size = req_len;
1342 		pthread_mutex_unlock(&g_cuse_mtx);
1343 		return -ENOSPC;
1344 	}
1345 	snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1346 
1347 	pthread_mutex_unlock(&g_cuse_mtx);
1348 
1349 	return 0;
1350 }
1351 
1352 SPDK_LOG_REGISTER_COMPONENT(nvme_cuse)
1353 
1354 #else /* SPDK_CONFIG_NVME_CUSE */
1355 
1356 int
1357 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1358 {
1359 	SPDK_ERRLOG("spdk_nvme_cuse_get_ctrlr_name() is unsupported\n");
1360 	return -ENOTSUP;
1361 }
1362 
1363 int
1364 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1365 {
1366 	SPDK_ERRLOG("spdk_nvme_cuse_get_ns_name() is unsupported\n");
1367 	return -ENOTSUP;
1368 }
1369 
1370 int
1371 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1372 {
1373 	SPDK_ERRLOG("spdk_nvme_cuse_register() is unsupported\n");
1374 	return -ENOTSUP;
1375 }
1376 
1377 int
1378 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1379 {
1380 	SPDK_ERRLOG("spdk_nvme_cuse_unregister() is unsupported\n");
1381 	return -ENOTSUP;
1382 }
1383 
1384 void
1385 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1386 {
1387 	SPDK_ERRLOG("spdk_nvme_cuse_update_namespaces() is unsupported\n");
1388 }
1389 
1390 #endif
1391