xref: /spdk/lib/nvme/nvme_cuse.c (revision ba23cec1820104cc710ad776f0127e1cf82033aa)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define FUSE_USE_VERSION 31
35 
36 #include <fuse3/cuse_lowlevel.h>
37 
38 #include <linux/nvme_ioctl.h>
39 #include <linux/fs.h>
40 
41 #include "nvme_internal.h"
42 #include "nvme_io_msg.h"
43 #include "nvme_cuse.h"
44 
45 struct cuse_device {
46 	bool				is_started;
47 
48 	char				dev_name[128];
49 	uint32_t			index;
50 	int				claim_fd;
51 	char				lock_name[64];
52 
53 	struct spdk_nvme_ctrlr		*ctrlr;		/**< NVMe controller */
54 	uint32_t			nsid;		/**< NVMe name space id, or 0 */
55 
56 	pthread_t			tid;
57 	struct fuse_session		*session;
58 
59 	struct cuse_device		*ctrlr_device;
60 	struct cuse_device		*ns_devices;	/**< Array of cuse ns devices */
61 
62 	TAILQ_ENTRY(cuse_device)	tailq;
63 };
64 
65 static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
66 static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
67 static struct spdk_bit_array *g_ctrlr_started;
68 
69 struct cuse_io_ctx {
70 	struct spdk_nvme_cmd		nvme_cmd;
71 	enum spdk_nvme_data_transfer	data_transfer;
72 
73 	uint64_t			lba;
74 	uint32_t			lba_count;
75 
76 	void				*data;
77 	int				data_len;
78 
79 	fuse_req_t			req;
80 };
81 
82 static void
83 cuse_io_ctx_free(struct cuse_io_ctx *ctx)
84 {
85 	spdk_free(ctx->data);
86 	free(ctx);
87 }
88 
89 #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val)		\
90 	if (out_bufsz == 0) {						\
91 		struct iovec out_iov;					\
92 		out_iov.iov_base = (void *)arg;				\
93 		out_iov.iov_len = sizeof(val);				\
94 		fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1);	\
95 		return;							\
96 	}
97 
98 static void
99 cuse_nvme_admin_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
100 {
101 	struct cuse_io_ctx *ctx = arg;
102 	struct iovec out_iov[2];
103 	struct spdk_nvme_cpl _cpl;
104 
105 	if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
106 		fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
107 	} else {
108 		memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
109 
110 		out_iov[0].iov_base = &_cpl.cdw0;
111 		out_iov[0].iov_len = sizeof(_cpl.cdw0);
112 
113 		if (ctx->data_len > 0) {
114 			out_iov[1].iov_base = ctx->data;
115 			out_iov[1].iov_len = ctx->data_len;
116 			fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2);
117 		} else {
118 			fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1);
119 		}
120 	}
121 
122 	cuse_io_ctx_free(ctx);
123 }
124 
125 static void
126 cuse_nvme_admin_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
127 {
128 	int rc;
129 	struct cuse_io_ctx *ctx = arg;
130 
131 	rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
132 					   cuse_nvme_admin_cmd_cb, (void *)ctx);
133 	if (rc < 0) {
134 		fuse_reply_err(ctx->req, EINVAL);
135 		cuse_io_ctx_free(ctx);
136 	}
137 }
138 
139 static void
140 cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd,
141 			 const void *data)
142 {
143 	struct cuse_io_ctx *ctx;
144 	struct cuse_device *cuse_device = fuse_req_userdata(req);
145 	int rv;
146 
147 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
148 	if (!ctx) {
149 		SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
150 		fuse_reply_err(req, ENOMEM);
151 		return;
152 	}
153 
154 	ctx->req = req;
155 	ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode);
156 
157 	memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
158 	ctx->nvme_cmd.opc = admin_cmd->opcode;
159 	ctx->nvme_cmd.nsid = admin_cmd->nsid;
160 	ctx->nvme_cmd.cdw10 = admin_cmd->cdw10;
161 	ctx->nvme_cmd.cdw11 = admin_cmd->cdw11;
162 	ctx->nvme_cmd.cdw12 = admin_cmd->cdw12;
163 	ctx->nvme_cmd.cdw13 = admin_cmd->cdw13;
164 	ctx->nvme_cmd.cdw14 = admin_cmd->cdw14;
165 	ctx->nvme_cmd.cdw15 = admin_cmd->cdw15;
166 
167 	ctx->data_len = admin_cmd->data_len;
168 
169 	if (ctx->data_len > 0) {
170 		ctx->data = spdk_malloc(ctx->data_len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
171 		if (!ctx->data) {
172 			SPDK_ERRLOG("Cannot allocate memory for data\n");
173 			fuse_reply_err(req, ENOMEM);
174 			free(ctx);
175 			return;
176 		}
177 		if (data != NULL) {
178 			memcpy(ctx->data, data, ctx->data_len);
179 		}
180 	}
181 
182 	rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_admin_cmd_execute, ctx);
183 	if (rv) {
184 		SPDK_ERRLOG("Cannot send io msg to the controller\n");
185 		fuse_reply_err(req, -rv);
186 		cuse_io_ctx_free(ctx);
187 		return;
188 	}
189 }
190 
191 static void
192 cuse_nvme_admin_cmd(fuse_req_t req, int cmd, void *arg,
193 		    struct fuse_file_info *fi, unsigned flags,
194 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
195 {
196 	struct nvme_admin_cmd *admin_cmd;
197 	struct iovec in_iov[2], out_iov[2];
198 
199 	in_iov[0].iov_base = (void *)arg;
200 	in_iov[0].iov_len = sizeof(*admin_cmd);
201 	if (in_bufsz == 0) {
202 		fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
203 		return;
204 	}
205 
206 	admin_cmd = (struct nvme_admin_cmd *)in_buf;
207 
208 	switch (spdk_nvme_opc_get_data_transfer(admin_cmd->opcode)) {
209 	case SPDK_NVME_DATA_NONE:
210 		SPDK_ERRLOG("SPDK_NVME_DATA_NONE not implemented\n");
211 		fuse_reply_err(req, EINVAL);
212 		return;
213 	case SPDK_NVME_DATA_HOST_TO_CONTROLLER:
214 		if (admin_cmd->addr != 0) {
215 			in_iov[1].iov_base = (void *)admin_cmd->addr;
216 			in_iov[1].iov_len = admin_cmd->data_len;
217 			if (in_bufsz == sizeof(*admin_cmd)) {
218 				fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
219 				return;
220 			}
221 			cuse_nvme_admin_cmd_send(req, admin_cmd, in_buf + sizeof(*admin_cmd));
222 		} else {
223 			cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
224 		}
225 		return;
226 	case SPDK_NVME_DATA_CONTROLLER_TO_HOST:
227 		if (out_bufsz == 0) {
228 			out_iov[0].iov_base = &((struct nvme_admin_cmd *)arg)->result;
229 			out_iov[0].iov_len = sizeof(uint32_t);
230 			if (admin_cmd->data_len > 0) {
231 				out_iov[1].iov_base = (void *)admin_cmd->addr;
232 				out_iov[1].iov_len = admin_cmd->data_len;
233 				fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2);
234 			} else {
235 				fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1);
236 			}
237 			return;
238 		}
239 
240 		cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
241 
242 		return;
243 	case SPDK_NVME_DATA_BIDIRECTIONAL:
244 		fuse_reply_err(req, EINVAL);
245 		return;
246 	}
247 }
248 
249 static void
250 cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
251 {
252 	int rc;
253 	fuse_req_t req = arg;
254 
255 	rc = spdk_nvme_ctrlr_reset(ctrlr);
256 	if (rc) {
257 		fuse_reply_err(req, rc);
258 		return;
259 	}
260 
261 	fuse_reply_ioctl_iov(req, 0, NULL, 0);
262 }
263 
264 static void
265 cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
266 		struct fuse_file_info *fi, unsigned flags,
267 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
268 {
269 	int rv;
270 	struct cuse_device *cuse_device = fuse_req_userdata(req);
271 
272 	if (cuse_device->nsid) {
273 		SPDK_ERRLOG("Namespace reset not supported\n");
274 		fuse_reply_err(req, EINVAL);
275 		return;
276 	}
277 
278 	rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
279 	if (rv) {
280 		SPDK_ERRLOG("Cannot send reset\n");
281 		fuse_reply_err(req, EINVAL);
282 	}
283 }
284 
285 /*****************************************************************************
286  * Namespace IO requests
287  */
288 
289 static void
290 cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
291 {
292 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
293 
294 	fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
295 
296 	cuse_io_ctx_free(ctx);
297 }
298 
299 static void
300 cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
301 {
302 	int rc;
303 	struct cuse_io_ctx *ctx = arg;
304 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
305 
306 	rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data,
307 				    ctx->lba, /* LBA start */
308 				    ctx->lba_count, /* number of LBAs */
309 				    cuse_nvme_submit_io_write_done, ctx, 0);
310 
311 	if (rc != 0) {
312 		SPDK_ERRLOG("write failed: rc = %d\n", rc);
313 		fuse_reply_err(ctx->req, rc);
314 		cuse_io_ctx_free(ctx);
315 		return;
316 	}
317 }
318 
319 static void
320 cuse_nvme_submit_io_write(fuse_req_t req, int cmd, void *arg,
321 			  struct fuse_file_info *fi, unsigned flags,
322 			  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
323 {
324 	const struct nvme_user_io *user_io = in_buf;
325 	struct cuse_io_ctx *ctx;
326 	struct spdk_nvme_ns *ns;
327 	uint32_t block_size;
328 	int rc;
329 	struct cuse_device *cuse_device = fuse_req_userdata(req);
330 
331 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
332 	if (!ctx) {
333 		SPDK_ERRLOG("Cannot allocate memory for context\n");
334 		fuse_reply_err(req, ENOMEM);
335 		return;
336 	}
337 
338 	ctx->req = req;
339 
340 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
341 	block_size = spdk_nvme_ns_get_sector_size(ns);
342 
343 	ctx->lba = user_io->slba;
344 	ctx->lba_count = user_io->nblocks + 1;
345 	ctx->data_len = ctx->lba_count * block_size;
346 
347 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
348 				 SPDK_MALLOC_DMA);
349 	if (ctx->data == NULL) {
350 		SPDK_ERRLOG("Write buffer allocation failed\n");
351 		fuse_reply_err(ctx->req, ENOMEM);
352 		free(ctx);
353 		return;
354 	}
355 
356 	memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len);
357 
358 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
359 			      ctx);
360 	if (rc < 0) {
361 		SPDK_ERRLOG("Cannot send write io\n");
362 		fuse_reply_err(ctx->req, rc);
363 		cuse_io_ctx_free(ctx);
364 	}
365 }
366 
367 static void
368 cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
369 {
370 	struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
371 	struct iovec iov;
372 
373 	iov.iov_base = ctx->data;
374 	iov.iov_len = ctx->data_len;
375 
376 	fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1);
377 
378 	cuse_io_ctx_free(ctx);
379 }
380 
381 static void
382 cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
383 {
384 	int rc;
385 	struct cuse_io_ctx *ctx = arg;
386 	struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
387 
388 	rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data,
389 				   ctx->lba, /* LBA start */
390 				   ctx->lba_count, /* number of LBAs */
391 				   cuse_nvme_submit_io_read_done, ctx, 0);
392 
393 	if (rc != 0) {
394 		SPDK_ERRLOG("read failed: rc = %d\n", rc);
395 		fuse_reply_err(ctx->req, rc);
396 		cuse_io_ctx_free(ctx);
397 		return;
398 	}
399 }
400 
401 static void
402 cuse_nvme_submit_io_read(fuse_req_t req, int cmd, void *arg,
403 			 struct fuse_file_info *fi, unsigned flags,
404 			 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
405 {
406 	int rc;
407 	struct cuse_io_ctx *ctx;
408 	const struct nvme_user_io *user_io = in_buf;
409 	struct cuse_device *cuse_device = fuse_req_userdata(req);
410 	struct spdk_nvme_ns *ns;
411 	uint32_t block_size;
412 
413 	ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
414 	if (!ctx) {
415 		SPDK_ERRLOG("Cannot allocate memory for context\n");
416 		fuse_reply_err(req, ENOMEM);
417 		return;
418 	}
419 
420 	ctx->req = req;
421 	ctx->lba = user_io->slba;
422 	ctx->lba_count = user_io->nblocks;
423 
424 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
425 	block_size = spdk_nvme_ns_get_sector_size(ns);
426 
427 	ctx->data_len = ctx->lba_count * block_size;
428 	ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
429 				 SPDK_MALLOC_DMA);
430 	if (ctx->data == NULL) {
431 		SPDK_ERRLOG("Read buffer allocation failed\n");
432 		fuse_reply_err(ctx->req, ENOMEM);
433 		free(ctx);
434 		return;
435 	}
436 
437 	rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
438 	if (rc < 0) {
439 		SPDK_ERRLOG("Cannot send read io\n");
440 		fuse_reply_err(ctx->req, rc);
441 		cuse_io_ctx_free(ctx);
442 	}
443 }
444 
445 
446 static void
447 cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
448 		    struct fuse_file_info *fi, unsigned flags,
449 		    const void *in_buf, size_t in_bufsz, size_t out_bufsz)
450 {
451 	const struct nvme_user_io *user_io;
452 	struct iovec in_iov[2], out_iov;
453 
454 	in_iov[0].iov_base = (void *)arg;
455 	in_iov[0].iov_len = sizeof(*user_io);
456 	if (in_bufsz == 0) {
457 		fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
458 		return;
459 	}
460 
461 	user_io = in_buf;
462 
463 	switch (user_io->opcode) {
464 	case SPDK_NVME_OPC_READ:
465 		out_iov.iov_base = (void *)user_io->addr;
466 		out_iov.iov_len = (user_io->nblocks + 1) * 512;
467 		if (out_bufsz == 0) {
468 			fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1);
469 			return;
470 		}
471 
472 		cuse_nvme_submit_io_read(req, cmd, arg, fi, flags, in_buf,
473 					 in_bufsz, out_bufsz);
474 		break;
475 	case SPDK_NVME_OPC_WRITE:
476 		in_iov[1].iov_base = (void *)user_io->addr;
477 		in_iov[1].iov_len = (user_io->nblocks + 1) * 512;
478 		if (in_bufsz == sizeof(*user_io)) {
479 			fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
480 			return;
481 		}
482 
483 		cuse_nvme_submit_io_write(req, cmd, arg, fi, flags, in_buf,
484 					  in_bufsz, out_bufsz);
485 
486 		break;
487 	default:
488 		SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
489 		fuse_reply_err(req, EINVAL);
490 		return;
491 	}
492 
493 }
494 
495 /*****************************************************************************
496  * Other namespace IOCTLs
497  */
498 static void
499 cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
500 		  struct fuse_file_info *fi, unsigned flags,
501 		  const void *in_buf, size_t in_bufsz, size_t out_bufsz)
502 {
503 	uint64_t size;
504 	struct spdk_nvme_ns *ns;
505 	struct cuse_device *cuse_device = fuse_req_userdata(req);
506 
507 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
508 
509 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
510 	size = spdk_nvme_ns_get_num_sectors(ns);
511 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
512 }
513 
514 static void
515 cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
516 		struct fuse_file_info *fi, unsigned flags,
517 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
518 {
519 	int pbsz;
520 	struct spdk_nvme_ns *ns;
521 	struct cuse_device *cuse_device = fuse_req_userdata(req);
522 
523 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
524 
525 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
526 	pbsz = spdk_nvme_ns_get_sector_size(ns);
527 	fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
528 }
529 
530 static void
531 cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
532 		struct fuse_file_info *fi, unsigned flags,
533 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
534 {
535 	long size;
536 	struct spdk_nvme_ns *ns;
537 	struct cuse_device *cuse_device = fuse_req_userdata(req);
538 
539 	FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
540 
541 	ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
542 
543 	/* return size in 512 bytes blocks */
544 	size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
545 	fuse_reply_ioctl(req, 0, &size, sizeof(size));
546 }
547 
548 static void
549 cuse_getid(fuse_req_t req, int cmd, void *arg,
550 	   struct fuse_file_info *fi, unsigned flags,
551 	   const void *in_buf, size_t in_bufsz, size_t out_bufsz)
552 {
553 	struct cuse_device *cuse_device = fuse_req_userdata(req);
554 
555 	fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
556 }
557 
558 static void
559 cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
560 		 struct fuse_file_info *fi, unsigned flags,
561 		 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
562 {
563 	if (flags & FUSE_IOCTL_COMPAT) {
564 		fuse_reply_err(req, ENOSYS);
565 		return;
566 	}
567 
568 	switch (cmd) {
569 	case NVME_IOCTL_ADMIN_CMD:
570 		cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
571 		break;
572 
573 	case NVME_IOCTL_RESET:
574 		cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
575 		break;
576 
577 	default:
578 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
579 		fuse_reply_err(req, EINVAL);
580 	}
581 }
582 
583 static void
584 cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
585 	      struct fuse_file_info *fi, unsigned flags,
586 	      const void *in_buf, size_t in_bufsz, size_t out_bufsz)
587 {
588 	if (flags & FUSE_IOCTL_COMPAT) {
589 		fuse_reply_err(req, ENOSYS);
590 		return;
591 	}
592 
593 	switch (cmd) {
594 	case NVME_IOCTL_ADMIN_CMD:
595 		cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
596 		break;
597 
598 	case NVME_IOCTL_SUBMIT_IO:
599 		cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
600 		break;
601 
602 	case NVME_IOCTL_ID:
603 		cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
604 		break;
605 
606 	case BLKPBSZGET:
607 		cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
608 		break;
609 
610 	case BLKGETSIZE:
611 		/* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
612 		cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
613 		break;
614 
615 	case BLKGETSIZE64:
616 		/* Returns the device size in sectors (returns pointer to uint64_t) */
617 		cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
618 		break;
619 
620 	default:
621 		SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
622 		fuse_reply_err(req, EINVAL);
623 	}
624 }
625 
626 /*****************************************************************************
627  * CUSE threads initialization.
628  */
629 
630 static void cuse_open(fuse_req_t req, struct fuse_file_info *fi)
631 {
632 	fuse_reply_open(req, fi);
633 }
634 
635 static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
636 	.open		= cuse_open,
637 	.ioctl		= cuse_ctrlr_ioctl,
638 };
639 
640 static const struct cuse_lowlevel_ops cuse_ns_clop = {
641 	.open		= cuse_open,
642 	.ioctl		= cuse_ns_ioctl,
643 };
644 
645 static void *
646 cuse_thread(void *arg)
647 {
648 	struct cuse_device *cuse_device = arg;
649 	char *cuse_argv[] = { "cuse", "-f" };
650 	int cuse_argc = SPDK_COUNTOF(cuse_argv);
651 	char devname_arg[128 + 8];
652 	const char *dev_info_argv[] = { devname_arg };
653 	struct cuse_info ci;
654 	int multithreaded;
655 	int rc;
656 	struct fuse_buf buf = { .mem = NULL };
657 	struct pollfd fds;
658 	int timeout_msecs = 500;
659 
660 	spdk_unaffinitize_thread();
661 
662 	snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
663 
664 	memset(&ci, 0, sizeof(ci));
665 	ci.dev_info_argc = 1;
666 	ci.dev_info_argv = dev_info_argv;
667 	ci.flags = CUSE_UNRESTRICTED_IOCTL;
668 
669 	if (cuse_device->nsid) {
670 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
671 				       &multithreaded, cuse_device);
672 	} else {
673 		cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
674 				       &multithreaded, cuse_device);
675 	}
676 	if (!cuse_device->session) {
677 		SPDK_ERRLOG("Cannot create cuse session\n");
678 		goto err;
679 	}
680 
681 	SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
682 
683 	/* Receive and process fuse requests */
684 	fds.fd = fuse_session_fd(cuse_device->session);
685 	fds.events = POLLIN;
686 	while (!fuse_session_exited(cuse_device->session)) {
687 		rc = poll(&fds, 1, timeout_msecs);
688 		if (rc <= 0) {
689 			continue;
690 		}
691 		rc = fuse_session_receive_buf(cuse_device->session, &buf);
692 		if (rc > 0) {
693 			fuse_session_process_buf(cuse_device->session, &buf);
694 		}
695 	}
696 	free(buf.mem);
697 	fuse_session_reset(cuse_device->session);
698 	cuse_lowlevel_teardown(cuse_device->session);
699 err:
700 	pthread_exit(NULL);
701 }
702 
703 /*****************************************************************************
704  * CUSE devices management
705  */
706 
707 static int
708 cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
709 {
710 	struct cuse_device *ns_device;
711 	int rv;
712 
713 	ns_device = &ctrlr_device->ns_devices[nsid - 1];
714 	if (ns_device->is_started) {
715 		return 0;
716 	}
717 
718 	ns_device->ctrlr = ctrlr_device->ctrlr;
719 	ns_device->ctrlr_device = ctrlr_device;
720 	ns_device->nsid = nsid;
721 	rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
722 		      ctrlr_device->dev_name, ns_device->nsid);
723 	if (rv < 0) {
724 		SPDK_ERRLOG("Device name too long.\n");
725 		free(ns_device);
726 		return -ENAMETOOLONG;
727 	}
728 
729 	rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
730 	if (rv != 0) {
731 		SPDK_ERRLOG("pthread_create failed\n");
732 		return -rv;
733 	}
734 
735 	ns_device->is_started = true;
736 
737 	return 0;
738 }
739 
740 static void
741 cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid)
742 {
743 	struct cuse_device *ns_device;
744 
745 	ns_device = &ctrlr_device->ns_devices[nsid - 1];
746 	if (!ns_device->is_started) {
747 		return;
748 	}
749 
750 	fuse_session_exit(ns_device->session);
751 	pthread_join(ns_device->tid, NULL);
752 	ns_device->is_started = false;
753 }
754 
755 static int
756 nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
757 {
758 	int dev_fd;
759 	int pid;
760 	void *dev_map;
761 	struct flock cusedev_lock = {
762 		.l_type = F_WRLCK,
763 		.l_whence = SEEK_SET,
764 		.l_start = 0,
765 		.l_len = 0,
766 	};
767 
768 	snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
769 		 "/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
770 
771 	dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
772 	if (dev_fd == -1) {
773 		SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
774 		return -errno;
775 	}
776 
777 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
778 		SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
779 		close(dev_fd);
780 		return -errno;
781 	}
782 
783 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
784 		       MAP_SHARED, dev_fd, 0);
785 	if (dev_map == MAP_FAILED) {
786 		SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
787 		close(dev_fd);
788 		return -errno;
789 	}
790 
791 	if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
792 		pid = *(int *)dev_map;
793 		SPDK_ERRLOG("Cannot create lock on device %s, probably"
794 			    " process %d has claimed it\n", ctrlr_device->lock_name, pid);
795 		munmap(dev_map, sizeof(int));
796 		close(dev_fd);
797 		/* F_SETLK returns unspecified errnos, normalize them */
798 		return -EACCES;
799 	}
800 
801 	*(int *)dev_map = (int)getpid();
802 	munmap(dev_map, sizeof(int));
803 	ctrlr_device->claim_fd = dev_fd;
804 	ctrlr_device->index = index;
805 	/* Keep dev_fd open to maintain the lock. */
806 	return 0;
807 }
808 
809 static void
810 nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
811 {
812 	close(ctrlr_device->claim_fd);
813 	ctrlr_device->claim_fd = -1;
814 	unlink(ctrlr_device->lock_name);
815 }
816 
817 static void
818 cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
819 {
820 	uint32_t i;
821 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
822 
823 	for (i = 1; i <= num_ns; i++) {
824 		cuse_nvme_ns_stop(ctrlr_device, i);
825 	}
826 
827 	fuse_session_exit(ctrlr_device->session);
828 	pthread_join(ctrlr_device->tid, NULL);
829 	TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
830 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
831 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
832 		spdk_bit_array_free(&g_ctrlr_started);
833 	}
834 	nvme_cuse_unclaim(ctrlr_device);
835 	free(ctrlr_device->ns_devices);
836 	free(ctrlr_device);
837 }
838 
839 static int
840 cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
841 {
842 	uint32_t nsid;
843 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
844 
845 	for (nsid = 1; nsid <= num_ns; nsid++) {
846 		if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) {
847 			cuse_nvme_ns_stop(ctrlr_device, nsid);
848 			continue;
849 		}
850 
851 		if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
852 			SPDK_ERRLOG("Cannot start CUSE namespace device.");
853 			return -1;
854 		}
855 	}
856 
857 	return 0;
858 }
859 
860 static int
861 nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
862 {
863 	int rv = 0;
864 	struct cuse_device *ctrlr_device;
865 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
866 
867 	SPDK_NOTICELOG("Creating cuse device for controller\n");
868 
869 	if (g_ctrlr_started == NULL) {
870 		g_ctrlr_started = spdk_bit_array_create(128);
871 		if (g_ctrlr_started == NULL) {
872 			SPDK_ERRLOG("Cannot create bit array\n");
873 			return -ENOMEM;
874 		}
875 	}
876 
877 	ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
878 	if (!ctrlr_device) {
879 		SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
880 		rv = -ENOMEM;
881 		goto err2;
882 	}
883 
884 	ctrlr_device->ctrlr = ctrlr;
885 
886 	/* Check if device already exists, if not increment index until success */
887 	ctrlr_device->index = 0;
888 	while (1) {
889 		ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
890 		if (ctrlr_device->index == UINT32_MAX) {
891 			SPDK_ERRLOG("Too many registered controllers\n");
892 			goto err2;
893 		}
894 
895 		if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
896 			break;
897 		}
898 		ctrlr_device->index++;
899 	}
900 	spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
901 	snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
902 		 ctrlr_device->index);
903 
904 	rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
905 	if (rv != 0) {
906 		SPDK_ERRLOG("pthread_create failed\n");
907 		rv = -rv;
908 		goto err3;
909 	}
910 	TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
911 
912 	ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
913 	/* Start all active namespaces */
914 	if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
915 		SPDK_ERRLOG("Cannot start CUSE namespace devices.");
916 		cuse_nvme_ctrlr_stop(ctrlr_device);
917 		rv = -1;
918 		goto err3;
919 	}
920 
921 	return 0;
922 
923 err3:
924 	spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
925 err2:
926 	free(ctrlr_device);
927 	if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
928 		spdk_bit_array_free(&g_ctrlr_started);
929 	}
930 	return rv;
931 }
932 
933 static struct cuse_device *
934 nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
935 {
936 	struct cuse_device *ctrlr_device = NULL;
937 
938 	TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
939 		if (ctrlr_device->ctrlr == ctrlr) {
940 			break;
941 		}
942 	}
943 
944 	return ctrlr_device;
945 }
946 
947 static struct cuse_device *
948 nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
949 {
950 	struct cuse_device *ctrlr_device = NULL;
951 	uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
952 
953 	if (nsid < 1 || nsid > num_ns) {
954 		return NULL;
955 	}
956 
957 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
958 	if (!ctrlr_device) {
959 		return NULL;
960 	}
961 
962 	if (!ctrlr_device->ns_devices[nsid - 1].is_started) {
963 		return NULL;
964 	}
965 
966 	return &ctrlr_device->ns_devices[nsid - 1];
967 }
968 
969 static void
970 nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
971 {
972 	struct cuse_device *ctrlr_device;
973 
974 	pthread_mutex_lock(&g_cuse_mtx);
975 
976 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
977 	if (!ctrlr_device) {
978 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
979 		pthread_mutex_unlock(&g_cuse_mtx);
980 		return;
981 	}
982 
983 	cuse_nvme_ctrlr_stop(ctrlr_device);
984 
985 	pthread_mutex_unlock(&g_cuse_mtx);
986 }
987 
988 static void
989 nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
990 {
991 	struct cuse_device *ctrlr_device;
992 
993 	pthread_mutex_lock(&g_cuse_mtx);
994 
995 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
996 	if (!ctrlr_device) {
997 		pthread_mutex_unlock(&g_cuse_mtx);
998 		return;
999 	}
1000 
1001 	cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
1002 
1003 	pthread_mutex_unlock(&g_cuse_mtx);
1004 }
1005 
1006 static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
1007 	.name = "cuse",
1008 	.stop = nvme_cuse_stop,
1009 	.update = nvme_cuse_update,
1010 };
1011 
1012 int
1013 spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1014 {
1015 	int rc;
1016 
1017 	rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1018 	if (rc) {
1019 		return rc;
1020 	}
1021 
1022 	pthread_mutex_lock(&g_cuse_mtx);
1023 
1024 	rc = nvme_cuse_start(ctrlr);
1025 	if (rc) {
1026 		nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1027 	}
1028 
1029 	pthread_mutex_unlock(&g_cuse_mtx);
1030 
1031 	return rc;
1032 }
1033 
1034 int
1035 spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1036 {
1037 	struct cuse_device *ctrlr_device;
1038 
1039 	pthread_mutex_lock(&g_cuse_mtx);
1040 
1041 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1042 	if (!ctrlr_device) {
1043 		SPDK_ERRLOG("Cannot find associated CUSE device\n");
1044 		pthread_mutex_unlock(&g_cuse_mtx);
1045 		return -ENODEV;
1046 	}
1047 
1048 	cuse_nvme_ctrlr_stop(ctrlr_device);
1049 
1050 	pthread_mutex_unlock(&g_cuse_mtx);
1051 
1052 	nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1053 
1054 	return 0;
1055 }
1056 
1057 void
1058 spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1059 {
1060 	nvme_cuse_update(ctrlr);
1061 }
1062 
1063 int
1064 spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1065 {
1066 	struct cuse_device *ctrlr_device;
1067 	size_t req_len;
1068 
1069 	pthread_mutex_lock(&g_cuse_mtx);
1070 
1071 	ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1072 	if (!ctrlr_device) {
1073 		pthread_mutex_unlock(&g_cuse_mtx);
1074 		return -ENODEV;
1075 	}
1076 
1077 	req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1078 	if (*size < req_len) {
1079 		*size = req_len;
1080 		pthread_mutex_unlock(&g_cuse_mtx);
1081 		return -ENOSPC;
1082 	}
1083 	snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1084 
1085 	pthread_mutex_unlock(&g_cuse_mtx);
1086 
1087 	return 0;
1088 }
1089 
1090 int
1091 spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1092 {
1093 	struct cuse_device *ns_device;
1094 	size_t req_len;
1095 
1096 	pthread_mutex_lock(&g_cuse_mtx);
1097 
1098 	ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1099 	if (!ns_device) {
1100 		pthread_mutex_unlock(&g_cuse_mtx);
1101 		return -ENODEV;
1102 	}
1103 
1104 	req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1105 	if (*size < req_len) {
1106 		*size = req_len;
1107 		pthread_mutex_unlock(&g_cuse_mtx);
1108 		return -ENOSPC;
1109 	}
1110 	snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1111 
1112 	pthread_mutex_unlock(&g_cuse_mtx);
1113 
1114 	return 0;
1115 }
1116