xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision a6dbe3721eb3b5990707fc3e378c95e505dd8ab5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   Copyright (c) Samsung Electronics Co., Ltd.
4  *   All rights reserved.
5  */
6 
7 #include "libxnvme.h"
8 #include "libxnvme_pp.h"
9 
10 #include "bdev_xnvme.h"
11 
12 #include "spdk/stdinc.h"
13 
14 #include "spdk/barrier.h"
15 #include "spdk/bdev.h"
16 #include "spdk/env.h"
17 #include "spdk/fd.h"
18 #include "spdk/likely.h"
19 #include "spdk/thread.h"
20 #include "spdk/json.h"
21 #include "spdk/util.h"
22 #include "spdk/string.h"
23 
24 #include "spdk/log.h"
25 
26 struct bdev_xnvme_io_channel {
27 	struct xnvme_queue	*queue;
28 	struct spdk_poller	*poller;
29 };
30 
31 struct bdev_xnvme_task {
32 	struct bdev_xnvme_io_channel *ch;
33 	TAILQ_ENTRY(bdev_xnvme_task) link;
34 };
35 
36 struct bdev_xnvme {
37 	struct spdk_bdev	bdev;
38 	char			*filename;
39 	char			*io_mechanism;
40 	struct xnvme_dev	*dev;
41 	uint32_t		nsid;
42 
43 	TAILQ_ENTRY(bdev_xnvme) link;
44 };
45 
46 static int bdev_xnvme_init(void);
47 static void bdev_xnvme_fini(void);
48 static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
49 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
50 
51 static int
52 bdev_xnvme_get_ctx_size(void)
53 {
54 	return sizeof(struct bdev_xnvme_task);
55 }
56 
57 static int
58 bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
59 {
60 	struct bdev_xnvme *xnvme;
61 
62 	TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
63 		spdk_json_write_object_begin(w);
64 
65 		spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
66 
67 		spdk_json_write_named_object_begin(w, "params");
68 		spdk_json_write_named_string(w, "name", xnvme->bdev.name);
69 		spdk_json_write_named_string(w, "filename", xnvme->filename);
70 		spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
71 		spdk_json_write_object_end(w);
72 
73 		spdk_json_write_object_end(w);
74 	}
75 
76 	return 0;
77 }
78 
79 static struct spdk_bdev_module xnvme_if = {
80 	.name		= "xnvme",
81 	.module_init	= bdev_xnvme_init,
82 	.module_fini	= bdev_xnvme_fini,
83 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
84 	.config_json	= bdev_xnvme_config_json,
85 };
86 
87 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
88 
89 static struct spdk_io_channel *
90 bdev_xnvme_get_io_channel(void *ctx)
91 {
92 	struct bdev_xnvme *xnvme = ctx;
93 
94 	return spdk_get_io_channel(xnvme);
95 }
96 
97 static bool
98 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
99 {
100 	switch (io_type) {
101 	case SPDK_BDEV_IO_TYPE_READ:
102 	case SPDK_BDEV_IO_TYPE_WRITE:
103 		return true;
104 	default:
105 		return false;
106 	}
107 }
108 
109 static void
110 bdev_xnvme_destruct_cb(void *io_device)
111 {
112 	struct bdev_xnvme *xnvme = io_device;
113 
114 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
115 	bdev_xnvme_free(xnvme);
116 }
117 
118 static int
119 bdev_xnvme_destruct(void *ctx)
120 {
121 	struct bdev_xnvme *xnvme = ctx;
122 
123 	spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb);
124 
125 	return 0;
126 }
127 
128 static void
129 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
130 {
131 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
132 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
133 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
134 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
135 	int err;
136 
137 	if (!success) {
138 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
139 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
140 		return;
141 	}
142 
143 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
144 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
145 
146 	switch (bdev_io->type) {
147 	case SPDK_BDEV_IO_TYPE_READ:
148 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
149 		ctx->cmd.common.nsid = xnvme->nsid;
150 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
151 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
152 		break;
153 	case SPDK_BDEV_IO_TYPE_WRITE:
154 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
155 		ctx->cmd.common.nsid = xnvme->nsid;
156 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
157 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
158 		break;
159 
160 	default:
161 		SPDK_ERRLOG("Wrong io type\n");
162 
163 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
164 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
165 		return;
166 	}
167 
168 	xnvme_task->ch = xnvme_ch;
169 	ctx->async.cb_arg = xnvme_task;
170 
171 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
172 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
173 
174 	switch (err) {
175 	/* Submission success! */
176 	case 0:
177 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
178 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
179 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
180 		return;
181 
182 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
183 	case -EBUSY:
184 	case -EAGAIN:
185 	case -ENOMEM:
186 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
187 
188 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
189 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
190 		return;
191 
192 	/* Submission failed: unexpected error, put the command-context back in the queue */
193 	default:
194 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
195 
196 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
197 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
198 		return;
199 	}
200 }
201 
202 static void
203 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
204 {
205 	switch (bdev_io->type) {
206 	/* Read and write operations must be performed on buffers aligned to
207 	 * bdev->required_alignment. If user specified unaligned buffers,
208 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
209 	case SPDK_BDEV_IO_TYPE_READ:
210 	case SPDK_BDEV_IO_TYPE_WRITE:
211 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
212 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
213 		break;
214 
215 	default:
216 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
217 		break;
218 	}
219 }
220 
221 static const struct spdk_bdev_fn_table xnvme_fn_table = {
222 	.destruct		= bdev_xnvme_destruct,
223 	.submit_request		= bdev_xnvme_submit_request,
224 	.io_type_supported	= bdev_xnvme_io_type_supported,
225 	.get_io_channel		= bdev_xnvme_get_io_channel,
226 };
227 
228 static void
229 bdev_xnvme_free(struct bdev_xnvme *xnvme)
230 {
231 	assert(xnvme != NULL);
232 
233 	xnvme_dev_close(xnvme->dev);
234 	free(xnvme->io_mechanism);
235 	free(xnvme->filename);
236 	free(xnvme->bdev.name);
237 	free(xnvme);
238 }
239 
240 static void
241 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
242 {
243 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
244 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
245 
246 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
247 
248 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
249 		SPDK_ERRLOG("xNVMe I/O Failed\n");
250 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
251 		status = SPDK_BDEV_IO_STATUS_FAILED;
252 	}
253 
254 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
255 
256 	/* Completed: Put the command- context back in the queue */
257 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
258 }
259 
260 static int
261 bdev_xnvme_poll(void *arg)
262 {
263 	struct bdev_xnvme_io_channel *ch = arg;
264 	int rc;
265 
266 	rc = xnvme_queue_poke(ch->queue, 0);
267 	if (rc < 0) {
268 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
269 		return SPDK_POLLER_BUSY;
270 	}
271 
272 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
273 }
274 
275 static int
276 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
277 {
278 	struct bdev_xnvme *xnvme = io_device;
279 	struct bdev_xnvme_io_channel *ch = ctx_buf;
280 	int rc;
281 	int qd = 512;
282 
283 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
284 	if (rc) {
285 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
286 		return 1;
287 	}
288 
289 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
290 
291 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
292 
293 	return 0;
294 }
295 
296 static void
297 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
298 {
299 	struct bdev_xnvme_io_channel *ch = ctx_buf;
300 
301 	spdk_poller_unregister(&ch->poller);
302 
303 	xnvme_queue_term(ch->queue);
304 }
305 
306 struct spdk_bdev *
307 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism)
308 {
309 	struct bdev_xnvme *xnvme;
310 	uint32_t block_size;
311 	uint64_t bdev_size;
312 	int rc;
313 	struct xnvme_opts opts = xnvme_opts_default();
314 
315 	xnvme = calloc(1, sizeof(*xnvme));
316 	if (!xnvme) {
317 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
318 		return NULL;
319 	}
320 
321 	opts.direct = 1;
322 	opts.async = io_mechanism;
323 	if (!opts.async) {
324 		goto error_return;
325 	}
326 	xnvme->io_mechanism = strdup(io_mechanism);
327 	if (!xnvme->io_mechanism) {
328 		goto error_return;
329 	}
330 
331 	if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
332 		opts.poll_sq = 1;
333 	}
334 
335 	xnvme->filename = strdup(filename);
336 	if (!xnvme->filename) {
337 		goto error_return;
338 	}
339 
340 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
341 	if (!xnvme->dev) {
342 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
343 		goto error_return;
344 	}
345 
346 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
347 
348 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
349 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
350 
351 	xnvme->bdev.name = strdup(name);
352 	if (!xnvme->bdev.name) {
353 		goto error_return;
354 	}
355 
356 	xnvme->bdev.product_name = "xNVMe bdev";
357 	xnvme->bdev.module = &xnvme_if;
358 
359 	xnvme->bdev.write_cache = 0;
360 
361 	if (block_size == 0) {
362 		SPDK_ERRLOG("Block size could not be auto-detected\n");
363 		goto error_return;
364 	}
365 
366 	if (block_size < 512) {
367 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
368 		goto error_return;
369 	}
370 
371 	if (!spdk_u32_is_pow2(block_size)) {
372 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
373 		goto error_return;
374 	}
375 
376 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
377 		      xnvme->bdev.name, bdev_size, block_size);
378 
379 	xnvme->bdev.blocklen = block_size;
380 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
381 
382 	if (bdev_size % xnvme->bdev.blocklen != 0) {
383 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
384 			    bdev_size, xnvme->bdev.blocklen);
385 		goto error_return;
386 	}
387 
388 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
389 	xnvme->bdev.ctxt = xnvme;
390 
391 	xnvme->bdev.fn_table = &xnvme_fn_table;
392 
393 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
394 				sizeof(struct bdev_xnvme_io_channel),
395 				xnvme->bdev.name);
396 	rc = spdk_bdev_register(&xnvme->bdev);
397 	if (rc) {
398 		spdk_io_device_unregister(xnvme, NULL);
399 		goto error_return;
400 	}
401 
402 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
403 
404 	return &xnvme->bdev;
405 
406 error_return:
407 	bdev_xnvme_free(xnvme);
408 	return NULL;
409 }
410 
411 struct delete_xnvme_bdev_ctx {
412 	struct bdev_xnvme *xnvme;
413 	spdk_delete_xnvme_complete cb_fn;
414 	void *cb_arg;
415 };
416 
417 static void
418 xnvme_bdev_unregister_cb(void *arg, int bdeverrno)
419 {
420 	struct delete_xnvme_bdev_ctx *ctx = arg;
421 
422 	bdev_xnvme_destruct(ctx->xnvme);
423 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
424 	free(ctx);
425 }
426 
427 void
428 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg)
429 {
430 	struct delete_xnvme_bdev_ctx *ctx;
431 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt;
432 
433 	if (!bdev || bdev->module != &xnvme_if) {
434 		cb_fn(cb_arg, -ENODEV);
435 		return;
436 	}
437 
438 	ctx = calloc(1, sizeof(*ctx));
439 	if (ctx == NULL) {
440 		cb_fn(cb_arg, -ENOMEM);
441 		return;
442 	}
443 
444 	ctx->xnvme = xnvme;
445 	ctx->cb_fn = cb_fn;
446 	ctx->cb_arg = cb_arg;
447 	spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx);
448 }
449 
450 static int
451 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
452 {
453 	return 0;
454 }
455 
456 static void
457 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
458 {
459 }
460 
461 static int
462 bdev_xnvme_init(void)
463 {
464 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
465 				0, "xnvme_module");
466 
467 	return 0;
468 }
469 
470 static void
471 bdev_xnvme_fini(void)
472 {
473 	spdk_io_device_unregister(&xnvme_if, NULL);
474 }
475 
476 SPDK_LOG_REGISTER_COMPONENT(xnvme)
477