xref: /spdk/lib/nvme/nvme_cuse.c (revision 06b537bfdb4393dea857e204b85d8df46a351d8a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define FUSE_USE_VERSION 31
35 
36 #include <fuse3/cuse_lowlevel.h>
37 
38 #include <linux/nvme_ioctl.h>
39 #include <linux/fs.h>
40 
41 #include "nvme_internal.h"
42 #include "nvme_io_msg.h"
43 #include "nvme_cuse.h"
44 
45 struct cuse_device {
46 	bool				is_started;
47 
48 	char				dev_name[128];
49 	uint32_t			index;
50 	int				claim_fd;
51 	char				lock_name[64];
52 
53 	struct spdk_nvme_ctrlr		*ctrlr;		/**< NVMe controller */
54 	uint32_t			nsid;		/**< NVMe name space id, or 0 */
55 
56 	pthread_t			tid;
57 	struct fuse_session		*session;
58 
59 	struct cuse_device		*ctrlr_device;
60 	struct cuse_device		*ns_devices;	/**< Array of cuse ns devices */
61 
62 	TAILQ_ENTRY(cuse_device)	tailq;
63 };
64 
65 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
66 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
67 static struct spdk_bit_array *g_ctrlr_started;
68 
69 struct cuse_io_ctx {
70 	struct spdk_nvme_cmd		nvme_cmd;
71 	enum spdk_nvme_data_transfer	data_transfer;
72 
73 	uint64_t			lba;
74 	uint32_t			lba_count;
75 
76 	void				*data;
77 	int				data_len;
78 
79 	fuse_req_t			req;
80 };
81 
82 static void
83 cuse_io_ctx_free(struct cuse_io_ctx *ctx)
84 {
85 	spdk_free(ctx->data);
86 	free(ctx);
87 }
88 
89 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val)		\
90 	if (out_bufsz == 0) {						\
91 		struct iovec out_iov;					\
92 		out_iov.iov_base = (void *)arg;				\
93 		out_iov.iov_len = sizeof(val);				\
94 		fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1);	\
95 		return;							\
96 	}
97 
98 static void
99 cuse_nvme_admin_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
100 {
101 	struct cuse_io_ctx *ctx = arg;
102 	struct iovec out_iov[2];
103 	struct spdk_nvme_cpl _cpl;
104 
105 	if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER ||
106 	    ctx->data_transfer == SPDK_NVME_DATA_NONE) {
107 		fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
108 	} else {
109 		memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
110 
111 		out_iov[0].iov_base = &_cpl.cdw0;
112 		out_iov[0].iov_len = sizeof(_cpl.cdw0);
113 
114 		if (ctx->data_len > 0) {
115 			out_iov[1].iov_base = ctx->data;
116 			out_iov[1].iov_len = ctx->data_len;
117 			fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2);
118 		} else {
119 			fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1);
120 		}
121 	}
122 
123 	cuse_io_ctx_free(ctx);
124 }
125 
126 static void
127 cuse_nvme_admin_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
128 {
129 	int rc;
130 	struct cuse_io_ctx *ctx = arg;
131 
132 	rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
133 					   cuse_nvme_admin_cmd_cb, (void *)ctx);
134 	if (rc < 0) {
135 		fuse_reply_err(ctx->req, EINVAL);
136 		cuse_io_ctx_free(ctx);
137 	}
138 }
139 
140 static void
141 cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd,
142 			 const void *data)
143 {
144 	struct cuse_io_ctx *ctx;
145 	struct cuse_device *cuse_device = fuse_req_userdata(req);
146 	int rv;
147 
148 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
149 	if (!ctx) {
150 		SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
151 		fuse_reply_err(req, ENOMEM);
152 		return;
153 	}
154 
155 	ctx->req = req;
156 	ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode);
157 
158 	memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
159 	ctx->nvme_cmd.opc = admin_cmd->opcode;
160 	ctx->nvme_cmd.nsid = admin_cmd->nsid;
161 	ctx->nvme_cmd.cdw10 = admin_cmd->cdw10;
162 	ctx->nvme_cmd.cdw11 = admin_cmd->cdw11;
163 	ctx->nvme_cmd.cdw12 = admin_cmd->cdw12;
164 	ctx->nvme_cmd.cdw13 = admin_cmd->cdw13;
165 	ctx->nvme_cmd.cdw14 = admin_cmd->cdw14;
166 	ctx->nvme_cmd.cdw15 = admin_cmd->cdw15;
167 
168 	ctx->data_len = admin_cmd->data_len;
169 
170 	if (ctx->data_len > 0) {
171 		ctx->data = spdk_malloc(ctx->data_len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
172 		if (!ctx->data) {
173 			SPDK_ERRLOG("Cannot allocate memory for data\n");
174 			fuse_reply_err(req, ENOMEM);
175 			free(ctx);
176 			return;
177 		}
178 		if (data != NULL) {
179 			memcpy(ctx->data, data, ctx->data_len);
180 		}
181 	}
182 
183 	rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_admin_cmd_execute, ctx);
184 	if (rv) {
185 		SPDK_ERRLOG("Cannot send io msg to the controller\n");
186 		fuse_reply_err(req, -rv);
187 		cuse_io_ctx_free(ctx);
188 		return;
189 	}
190 }
191 
192 static void
193 cuse_nvme_admin_cmd(fuse_req_t req, int cmd, void *arg,
194 		    struct fuse_file_info *fi, unsigned flags,
195 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
196 {
197 	struct nvme_admin_cmd *admin_cmd;
198 	struct iovec in_iov[2], out_iov[2];
199 
200 	in_iov[0].iov_base = (void *)arg;
201 	in_iov[0].iov_len = sizeof(*admin_cmd);
202 	if (in_bufsz == 0) {
203 		fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
204 		return;
205 	}
206 
207 	admin_cmd = (struct nvme_admin_cmd *)in_buf;
208 
209 	switch (spdk_nvme_opc_get_data_transfer(admin_cmd->opcode)) {
210 	case SPDK_NVME_DATA_HOST_TO_CONTROLLER:
211 		if (admin_cmd->addr != 0) {
212 			in_iov[1].iov_base = (void *)admin_cmd->addr;
213 			in_iov[1].iov_len = admin_cmd->data_len;
214 			if (in_bufsz == sizeof(*admin_cmd)) {
215 				fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
216 				return;
217 			}
218 			cuse_nvme_admin_cmd_send(req, admin_cmd, in_buf + sizeof(*admin_cmd));
219 		} else {
220 			cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
221 		}
222 		return;
223 	case SPDK_NVME_DATA_NONE:
224 	case SPDK_NVME_DATA_CONTROLLER_TO_HOST:
225 		if (out_bufsz == 0) {
226 			out_iov[0].iov_base = &((struct nvme_admin_cmd *)arg)->result;
227 			out_iov[0].iov_len = sizeof(uint32_t);
228 			if (admin_cmd->data_len > 0) {
229 				out_iov[1].iov_base = (void *)admin_cmd->addr;
230 				out_iov[1].iov_len = admin_cmd->data_len;
231 				fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2);
232 			} else {
233 				fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1);
234 			}
235 			return;
236 		}
237 
238 		cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
239 
240 		return;
241 	case SPDK_NVME_DATA_BIDIRECTIONAL:
242 		fuse_reply_err(req, EINVAL);
243 		return;
244 	}
245 }
246 
247 static void
248 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
249 {
250 	int rc;
251 	fuse_req_t req = arg;
252 
253 	rc = spdk_nvme_ctrlr_reset(ctrlr);
254 	if (rc) {
255 		fuse_reply_err(req, rc);
256 		return;
257 	}
258 
259 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
260 }
261 
262 static void
263 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
264 		struct fuse_file_info *fi, unsigned flags,
265 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
266 {
267 	int rv;
268 	struct cuse_device *cuse_device = fuse_req_userdata(req);
269 
270 	if (cuse_device->nsid) {
271 		SPDK_ERRLOG("Namespace reset not supported\n");
272 		fuse_reply_err(req, EINVAL);
273 		return;
274 	}
275 
276 	rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
277 	if (rv) {
278 		SPDK_ERRLOG("Cannot send reset\n");
279 		fuse_reply_err(req, EINVAL);
280 	}
281 }
282 
283 /*****************************************************************************
284  * Namespace IO requests
285  */
286 
287 static void
288 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
289 {
290 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
291 
292 	fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
293 
294 	cuse_io_ctx_free(ctx);
295 }
296 
297 static void
298 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
299 {
300 	int rc;
301 	struct cuse_io_ctx *ctx = arg;
302 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
303 
304 	rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data,
305 				    ctx->lba, /* LBA start */
306 				    ctx->lba_count, /* number of LBAs */
307 				    cuse_nvme_submit_io_write_done, ctx, 0);
308 
309 	if (rc != 0) {
310 		SPDK_ERRLOG("write failed: rc = %d\n", rc);
311 		fuse_reply_err(ctx->req, rc);
312 		cuse_io_ctx_free(ctx);
313 		return;
314 	}
315 }
316 
317 static void
318 cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
319 			  struct fuse_file_info *fi, unsigned flags, uint32_t block_size,
320 			  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
321 {
322 	const struct nvme_user_io *user_io = in_buf;
323 	struct cuse_io_ctx *ctx;
324 	int rc;
325 
326 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
327 	if (!ctx) {
328 		SPDK_ERRLOG("Cannot allocate memory for context\n");
329 		fuse_reply_err(req, ENOMEM);
330 		return;
331 	}
332 
333 	ctx->req = req;
334 	ctx->lba = user_io->slba;
335 	ctx->lba_count = user_io->nblocks + 1;
336 	ctx->data_len = ctx->lba_count * block_size;
337 
338 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
339 				 SPDK_MALLOC_DMA);
340 	if (ctx->data == NULL) {
341 		SPDK_ERRLOG("Write buffer allocation failed\n");
342 		fuse_reply_err(ctx->req, ENOMEM);
343 		free(ctx);
344 		return;
345 	}
346 
347 	memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len);
348 
349 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
350 			      ctx);
351 	if (rc < 0) {
352 		SPDK_ERRLOG("Cannot send write io\n");
353 		fuse_reply_err(ctx->req, rc);
354 		cuse_io_ctx_free(ctx);
355 	}
356 }
357 
358 static void
359 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
360 {
361 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
362 	struct iovec iov;
363 
364 	iov.iov_base = ctx->data;
365 	iov.iov_len = ctx->data_len;
366 
367 	fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1);
368 
369 	cuse_io_ctx_free(ctx);
370 }
371 
372 static void
373 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
374 {
375 	int rc;
376 	struct cuse_io_ctx *ctx = arg;
377 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
378 
379 	rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data,
380 				   ctx->lba, /* LBA start */
381 				   ctx->lba_count, /* number of LBAs */
382 				   cuse_nvme_submit_io_read_done, ctx, 0);
383 
384 	if (rc != 0) {
385 		SPDK_ERRLOG("read failed: rc = %d\n", rc);
386 		fuse_reply_err(ctx->req, rc);
387 		cuse_io_ctx_free(ctx);
388 		return;
389 	}
390 }
391 
392 static void
393 cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
394 			 struct fuse_file_info *fi, unsigned flags, uint32_t block_size,
395 			 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
396 {
397 	int rc;
398 	struct cuse_io_ctx *ctx;
399 	const struct nvme_user_io *user_io = in_buf;
400 
401 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
402 	if (!ctx) {
403 		SPDK_ERRLOG("Cannot allocate memory for context\n");
404 		fuse_reply_err(req, ENOMEM);
405 		return;
406 	}
407 
408 	ctx->req = req;
409 	ctx->lba = user_io->slba;
410 	ctx->lba_count = user_io->nblocks + 1;
411 
412 	ctx->data_len = ctx->lba_count * block_size;
413 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
414 				 SPDK_MALLOC_DMA);
415 	if (ctx->data == NULL) {
416 		SPDK_ERRLOG("Read buffer allocation failed\n");
417 		fuse_reply_err(ctx->req, ENOMEM);
418 		free(ctx);
419 		return;
420 	}
421 
422 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
423 	if (rc < 0) {
424 		SPDK_ERRLOG("Cannot send read io\n");
425 		fuse_reply_err(ctx->req, rc);
426 		cuse_io_ctx_free(ctx);
427 	}
428 }
429 
430 
431 static void
432 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
433 		    struct fuse_file_info *fi, unsigned flags,
434 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
435 {
436 	const struct nvme_user_io *user_io;
437 	struct iovec in_iov[2], out_iov;
438 	struct cuse_device *cuse_device = fuse_req_userdata(req);
439 	struct spdk_nvme_ns *ns;
440 	uint32_t block_size;
441 
442 	in_iov[0].iov_base = (void *)arg;
443 	in_iov[0].iov_len = sizeof(*user_io);
444 	if (in_bufsz == 0) {
445 		fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
446 		return;
447 	}
448 
449 	user_io = in_buf;
450 
451 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
452 	block_size = spdk_nvme_ns_get_sector_size(ns);
453 
454 	switch (user_io->opcode) {
455 	case SPDK_NVME_OPC_READ:
456 		out_iov.iov_base = (void *)user_io->addr;
457 		out_iov.iov_len = (user_io->nblocks + 1) * block_size;
458 		if (out_bufsz == 0) {
459 			fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1);
460 			return;
461 		}
462 
463 		cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags,
464 					 block_size, in_buf, in_bufsz, out_bufsz);
465 		break;
466 	case SPDK_NVME_OPC_WRITE:
467 		in_iov[1].iov_base = (void *)user_io->addr;
468 		in_iov[1].iov_len = (user_io->nblocks + 1) * block_size;
469 		if (in_bufsz == sizeof(*user_io)) {
470 			fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
471 			return;
472 		}
473 
474 		cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags,
475 					  block_size, in_buf, in_bufsz, out_bufsz);
476 		break;
477 	default:
478 		SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
479 		fuse_reply_err(req, EINVAL);
480 		return;
481 	}
482 
483 }
484 
485 /*****************************************************************************
486  * Other namespace IOCTLs
487  */
488 static void
489 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
490 		  struct fuse_file_info *fi, unsigned flags,
491 		  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
492 {
493 	uint64_t size;
494 	struct spdk_nvme_ns *ns;
495 	struct cuse_device *cuse_device = fuse_req_userdata(req);
496 
497 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
498 
499 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
500 	size = spdk_nvme_ns_get_num_sectors(ns);
501 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
502 }
503 
504 static void
505 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
506 		struct fuse_file_info *fi, unsigned flags,
507 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
508 {
509 	int pbsz;
510 	struct spdk_nvme_ns *ns;
511 	struct cuse_device *cuse_device = fuse_req_userdata(req);
512 
513 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
514 
515 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
516 	pbsz = spdk_nvme_ns_get_sector_size(ns);
517 	fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
518 }
519 
520 static void
521 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
522 		struct fuse_file_info *fi, unsigned flags,
523 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
524 {
525 	long size;
526 	struct spdk_nvme_ns *ns;
527 	struct cuse_device *cuse_device = fuse_req_userdata(req);
528 
529 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
530 
531 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
532 
533 	/* return size in 512 bytes blocks */
534 	size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
535 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
536 }
537 
538 static void
539 cuse_getid(fuse_req_t req, int cmd, void *arg,
540 	   struct fuse_file_info *fi, unsigned flags,
541 	   const void *in_buf, size_t in_bufsz, size_t out_bufsz)
542 {
543 	struct cuse_device *cuse_device = fuse_req_userdata(req);
544 
545 	fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
546 }
547 
548 static void
549 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
550 		 struct fuse_file_info *fi, unsigned flags,
551 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
552 {
553 	if (flags & FUSE_IOCTL_COMPAT) {
554 		fuse_reply_err(req, ENOSYS);
555 		return;
556 	}
557 
558 	switch (cmd) {
559 	case NVME_IOCTL_ADMIN_CMD:
560 		cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
561 		break;
562 
563 	case NVME_IOCTL_RESET:
564 		cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
565 		break;
566 
567 	default:
568 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
569 		fuse_reply_err(req, EINVAL);
570 	}
571 }
572 
573 static void
574 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
575 	      struct fuse_file_info *fi, unsigned flags,
576 	      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
577 {
578 	if (flags & FUSE_IOCTL_COMPAT) {
579 		fuse_reply_err(req, ENOSYS);
580 		return;
581 	}
582 
583 	switch (cmd) {
584 	case NVME_IOCTL_ADMIN_CMD:
585 		cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
586 		break;
587 
588 	case NVME_IOCTL_SUBMIT_IO:
589 		cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
590 		break;
591 
592 	case NVME_IOCTL_ID:
593 		cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
594 		break;
595 
596 	case BLKPBSZGET:
597 		cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
598 		break;
599 
600 	case BLKGETSIZE:
601 		/* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
602 		cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
603 		break;
604 
605 	case BLKGETSIZE64:
606 		/* Returns the device size in sectors (returns pointer to uint64_t) */
607 		cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
608 		break;
609 
610 	default:
611 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
612 		fuse_reply_err(req, EINVAL);
613 	}
614 }
615 
616 /*****************************************************************************
617  * CUSE threads initialization.
618  */
619 
620 static void cuse_open(fuse_req_t req, struct fuse_file_info *fi)
621 {
622 	fuse_reply_open(req, fi);
623 }
624 
625 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
626 	.open		= cuse_open,
627 	.ioctl		= cuse_ctrlr_ioctl,
628 };
629 
630 static const struct cuse_lowlevel_ops cuse_ns_clop = {
631 	.open		= cuse_open,
632 	.ioctl		= cuse_ns_ioctl,
633 };
634 
635 static void *
636 cuse_thread(void *arg)
637 {
638 	struct cuse_device *cuse_device = arg;
639 	char *cuse_argv[] = { "cuse", "-f" };
640 	int cuse_argc = SPDK_COUNTOF(cuse_argv);
641 	char devname_arg[128 + 8];
642 	const char *dev_info_argv[] = { devname_arg };
643 	struct cuse_info ci;
644 	int multithreaded;
645 	int rc;
646 	struct fuse_buf buf = { .mem = NULL };
647 	struct pollfd fds;
648 	int timeout_msecs = 500;
649 
650 	spdk_unaffinitize_thread();
651 
652 	snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
653 
654 	memset(&ci, 0, sizeof(ci));
655 	ci.dev_info_argc = 1;
656 	ci.dev_info_argv = dev_info_argv;
657 	ci.flags = CUSE_UNRESTRICTED_IOCTL;
658 
659 	if (cuse_device->nsid) {
660 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
661 				       &multithreaded, cuse_device);
662 	} else {
663 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
664 				       &multithreaded, cuse_device);
665 	}
666 	if (!cuse_device->session) {
667 		SPDK_ERRLOG("Cannot create cuse session\n");
668 		goto err;
669 	}
670 
671 	SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
672 
673 	/* Receive and process fuse requests */
674 	fds.fd = fuse_session_fd(cuse_device->session);
675 	fds.events = POLLIN;
676 	while (!fuse_session_exited(cuse_device->session)) {
677 		rc = poll(&fds, 1, timeout_msecs);
678 		if (rc <= 0) {
679 			continue;
680 		}
681 		rc = fuse_session_receive_buf(cuse_device->session, &buf);
682 		if (rc > 0) {
683 			fuse_session_process_buf(cuse_device->session, &buf);
684 		}
685 	}
686 	free(buf.mem);
687 	fuse_session_reset(cuse_device->session);
688 	cuse_lowlevel_teardown(cuse_device->session);
689 err:
690 	pthread_exit(NULL);
691 }
692 
693 /*****************************************************************************
694  * CUSE devices management
695  */
696 
697 static int
698 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
699 {
700 	struct cuse_device *ns_device;
701 	int rv;
702 
703 	ns_device = &ctrlr_device->ns_devices[nsid - 1];
704 	if (ns_device->is_started) {
705 		return 0;
706 	}
707 
708 	ns_device->ctrlr = ctrlr_device->ctrlr;
709 	ns_device->ctrlr_device = ctrlr_device;
710 	ns_device->nsid = nsid;
711 	rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
712 		      ctrlr_device->dev_name, ns_device->nsid);
713 	if (rv < 0) {
714 		SPDK_ERRLOG("Device name too long.\n");
715 		free(ns_device);
716 		return -ENAMETOOLONG;
717 	}
718 
719 	rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
720 	if (rv != 0) {
721 		SPDK_ERRLOG("pthread_create failed\n");
722 		return -rv;
723 	}
724 
725 	ns_device->is_started = true;
726 
727 	return 0;
728 }
729 
730 static void
731 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid)
732 {
733 	struct cuse_device *ns_device;
734 
735 	ns_device = &ctrlr_device->ns_devices[nsid - 1];
736 	if (!ns_device->is_started) {
737 		return;
738 	}
739 
740 	fuse_session_exit(ns_device->session);
741 	pthread_join(ns_device->tid, NULL);
742 	ns_device->is_started = false;
743 }
744 
745 static int
746 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
747 {
748 	int dev_fd;
749 	int pid;
750 	void *dev_map;
751 	struct flock cusedev_lock = {
752 		.l_type = F_WRLCK,
753 		.l_whence = SEEK_SET,
754 		.l_start = 0,
755 		.l_len = 0,
756 	};
757 
758 	snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
759 		 "/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
760 
761 	dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
762 	if (dev_fd == -1) {
763 		SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
764 		return -errno;
765 	}
766 
767 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
768 		SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
769 		close(dev_fd);
770 		return -errno;
771 	}
772 
773 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
774 		       MAP_SHARED, dev_fd, 0);
775 	if (dev_map == MAP_FAILED) {
776 		SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
777 		close(dev_fd);
778 		return -errno;
779 	}
780 
781 	if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
782 		pid = *(int *)dev_map;
783 		SPDK_ERRLOG("Cannot create lock on device %s, probably"
784 			    " process %d has claimed it\n", ctrlr_device->lock_name, pid);
785 		munmap(dev_map, sizeof(int));
786 		close(dev_fd);
787 		/* F_SETLK returns unspecified errnos, normalize them */
788 		return -EACCES;
789 	}
790 
791 	*(int *)dev_map = (int)getpid();
792 	munmap(dev_map, sizeof(int));
793 	ctrlr_device->claim_fd = dev_fd;
794 	ctrlr_device->index = index;
795 	/* Keep dev_fd open to maintain the lock. */
796 	return 0;
797 }
798 
799 static void
800 nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
801 {
802 	close(ctrlr_device->claim_fd);
803 	ctrlr_device->claim_fd = -1;
804 	unlink(ctrlr_device->lock_name);
805 }
806 
807 static void
808 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
809 {
810 	uint32_t i;
811 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
812 
813 	for (i = 1; i <= num_ns; i++) {
814 		cuse_nvme_ns_stop(ctrlr_device, i);
815 	}
816 
817 	fuse_session_exit(ctrlr_device->session);
818 	pthread_join(ctrlr_device->tid, NULL);
819 	TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
820 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
821 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
822 		spdk_bit_array_free(&g_ctrlr_started);
823 	}
824 	nvme_cuse_unclaim(ctrlr_device);
825 	free(ctrlr_device->ns_devices);
826 	free(ctrlr_device);
827 }
828 
829 static int
830 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
831 {
832 	uint32_t nsid;
833 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
834 
835 	for (nsid = 1; nsid <= num_ns; nsid++) {
836 		if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) {
837 			cuse_nvme_ns_stop(ctrlr_device, nsid);
838 			continue;
839 		}
840 
841 		if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
842 			SPDK_ERRLOG("Cannot start CUSE namespace device.");
843 			return -1;
844 		}
845 	}
846 
847 	return 0;
848 }
849 
850 static int
851 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
852 {
853 	int rv = 0;
854 	struct cuse_device *ctrlr_device;
855 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
856 
857 	SPDK_NOTICELOG("Creating cuse device for controller\n");
858 
859 	if (g_ctrlr_started == NULL) {
860 		g_ctrlr_started = spdk_bit_array_create(128);
861 		if (g_ctrlr_started == NULL) {
862 			SPDK_ERRLOG("Cannot create bit array\n");
863 			return -ENOMEM;
864 		}
865 	}
866 
867 	ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
868 	if (!ctrlr_device) {
869 		SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
870 		rv = -ENOMEM;
871 		goto err2;
872 	}
873 
874 	ctrlr_device->ctrlr = ctrlr;
875 
876 	/* Check if device already exists, if not increment index until success */
877 	ctrlr_device->index = 0;
878 	while (1) {
879 		ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
880 		if (ctrlr_device->index == UINT32_MAX) {
881 			SPDK_ERRLOG("Too many registered controllers\n");
882 			goto err2;
883 		}
884 
885 		if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
886 			break;
887 		}
888 		ctrlr_device->index++;
889 	}
890 	spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
891 	snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
892 		 ctrlr_device->index);
893 
894 	rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
895 	if (rv != 0) {
896 		SPDK_ERRLOG("pthread_create failed\n");
897 		rv = -rv;
898 		goto err3;
899 	}
900 	TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
901 
902 	ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
903 	/* Start all active namespaces */
904 	if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
905 		SPDK_ERRLOG("Cannot start CUSE namespace devices.");
906 		cuse_nvme_ctrlr_stop(ctrlr_device);
907 		rv = -1;
908 		goto err3;
909 	}
910 
911 	return 0;
912 
913 err3:
914 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
915 err2:
916 	free(ctrlr_device);
917 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
918 		spdk_bit_array_free(&g_ctrlr_started);
919 	}
920 	return rv;
921 }
922 
923 static struct cuse_device *
924 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
925 {
926 	struct cuse_device *ctrlr_device = NULL;
927 
928 	TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
929 		if (ctrlr_device->ctrlr == ctrlr) {
930 			break;
931 		}
932 	}
933 
934 	return ctrlr_device;
935 }
936 
937 static struct cuse_device *
938 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
939 {
940 	struct cuse_device *ctrlr_device = NULL;
941 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
942 
943 	if (nsid < 1 || nsid > num_ns) {
944 		return NULL;
945 	}
946 
947 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
948 	if (!ctrlr_device) {
949 		return NULL;
950 	}
951 
952 	if (!ctrlr_device->ns_devices[nsid - 1].is_started) {
953 		return NULL;
954 	}
955 
956 	return &ctrlr_device->ns_devices[nsid - 1];
957 }
958 
959 static void
960 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
961 {
962 	struct cuse_device *ctrlr_device;
963 
964 	pthread_mutex_lock(&g_cuse_mtx);
965 
966 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
967 	if (!ctrlr_device) {
968 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
969 		pthread_mutex_unlock(&g_cuse_mtx);
970 		return;
971 	}
972 
973 	cuse_nvme_ctrlr_stop(ctrlr_device);
974 
975 	pthread_mutex_unlock(&g_cuse_mtx);
976 }
977 
978 static void
979 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
980 {
981 	struct cuse_device *ctrlr_device;
982 
983 	pthread_mutex_lock(&g_cuse_mtx);
984 
985 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
986 	if (!ctrlr_device) {
987 		pthread_mutex_unlock(&g_cuse_mtx);
988 		return;
989 	}
990 
991 	cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
992 
993 	pthread_mutex_unlock(&g_cuse_mtx);
994 }
995 
996 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
997 	.name = "cuse",
998 	.stop = nvme_cuse_stop,
999 	.update = nvme_cuse_update,
1000 };
1001 
1002 int
1003 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1004 {
1005 	int rc;
1006 
1007 	rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1008 	if (rc) {
1009 		return rc;
1010 	}
1011 
1012 	pthread_mutex_lock(&g_cuse_mtx);
1013 
1014 	rc = nvme_cuse_start(ctrlr);
1015 	if (rc) {
1016 		nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1017 	}
1018 
1019 	pthread_mutex_unlock(&g_cuse_mtx);
1020 
1021 	return rc;
1022 }
1023 
1024 int
1025 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1026 {
1027 	struct cuse_device *ctrlr_device;
1028 
1029 	pthread_mutex_lock(&g_cuse_mtx);
1030 
1031 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1032 	if (!ctrlr_device) {
1033 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1034 		pthread_mutex_unlock(&g_cuse_mtx);
1035 		return -ENODEV;
1036 	}
1037 
1038 	cuse_nvme_ctrlr_stop(ctrlr_device);
1039 
1040 	pthread_mutex_unlock(&g_cuse_mtx);
1041 
1042 	nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1043 
1044 	return 0;
1045 }
1046 
1047 void
1048 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1049 {
1050 	nvme_cuse_update(ctrlr);
1051 }
1052 
1053 int
1054 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1055 {
1056 	struct cuse_device *ctrlr_device;
1057 	size_t req_len;
1058 
1059 	pthread_mutex_lock(&g_cuse_mtx);
1060 
1061 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1062 	if (!ctrlr_device) {
1063 		pthread_mutex_unlock(&g_cuse_mtx);
1064 		return -ENODEV;
1065 	}
1066 
1067 	req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1068 	if (*size < req_len) {
1069 		*size = req_len;
1070 		pthread_mutex_unlock(&g_cuse_mtx);
1071 		return -ENOSPC;
1072 	}
1073 	snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1074 
1075 	pthread_mutex_unlock(&g_cuse_mtx);
1076 
1077 	return 0;
1078 }
1079 
1080 int
1081 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1082 {
1083 	struct cuse_device *ns_device;
1084 	size_t req_len;
1085 
1086 	pthread_mutex_lock(&g_cuse_mtx);
1087 
1088 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1089 	if (!ns_device) {
1090 		pthread_mutex_unlock(&g_cuse_mtx);
1091 		return -ENODEV;
1092 	}
1093 
1094 	req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1095 	if (*size < req_len) {
1096 		*size = req_len;
1097 		pthread_mutex_unlock(&g_cuse_mtx);
1098 		return -ENOSPC;
1099 	}
1100 	snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1101 
1102 	pthread_mutex_unlock(&g_cuse_mtx);
1103 
1104 	return 0;
1105 }
1106