xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision 1f3a6b0398dfba2d9aedea1d272e64e55d6f1af6)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Samsung Electronics Co., Ltd.
3  *   All rights reserved.
4  */
5 
6 #include "libxnvme.h"
7 #include "libxnvme_pp.h"
8 
9 #include "bdev_xnvme.h"
10 
11 #include "spdk/stdinc.h"
12 
13 #include "spdk/barrier.h"
14 #include "spdk/bdev.h"
15 #include "spdk/env.h"
16 #include "spdk/fd.h"
17 #include "spdk/likely.h"
18 #include "spdk/thread.h"
19 #include "spdk/json.h"
20 #include "spdk/util.h"
21 #include "spdk/string.h"
22 
23 #include "spdk/log.h"
24 
25 struct bdev_xnvme_io_channel {
26 	struct xnvme_queue	*queue;
27 	struct spdk_poller	*poller;
28 };
29 
30 struct bdev_xnvme_task {
31 	struct bdev_xnvme_io_channel *ch;
32 	TAILQ_ENTRY(bdev_xnvme_task) link;
33 };
34 
35 struct bdev_xnvme {
36 	struct spdk_bdev	bdev;
37 	char			*filename;
38 	char			*io_mechanism;
39 	struct xnvme_dev	*dev;
40 	uint32_t		nsid;
41 
42 	TAILQ_ENTRY(bdev_xnvme) link;
43 };
44 
45 static int bdev_xnvme_init(void);
46 static void bdev_xnvme_fini(void);
47 static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
48 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
49 
50 static int
51 bdev_xnvme_get_ctx_size(void)
52 {
53 	return sizeof(struct bdev_xnvme_task);
54 }
55 
56 static int
57 bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
58 {
59 	struct bdev_xnvme *xnvme;
60 
61 	TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
62 		spdk_json_write_object_begin(w);
63 
64 		spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
65 
66 		spdk_json_write_named_object_begin(w, "params");
67 		spdk_json_write_named_string(w, "name", xnvme->bdev.name);
68 		spdk_json_write_named_string(w, "filename", xnvme->filename);
69 		spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
70 		spdk_json_write_object_end(w);
71 
72 		spdk_json_write_object_end(w);
73 	}
74 
75 	return 0;
76 }
77 
78 static struct spdk_bdev_module xnvme_if = {
79 	.name		= "xnvme",
80 	.module_init	= bdev_xnvme_init,
81 	.module_fini	= bdev_xnvme_fini,
82 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
83 	.config_json	= bdev_xnvme_config_json,
84 };
85 
86 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
87 
88 static struct spdk_io_channel *
89 bdev_xnvme_get_io_channel(void *ctx)
90 {
91 	struct bdev_xnvme *xnvme = ctx;
92 
93 	return spdk_get_io_channel(xnvme);
94 }
95 
96 static bool
97 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
98 {
99 	switch (io_type) {
100 	case SPDK_BDEV_IO_TYPE_READ:
101 	case SPDK_BDEV_IO_TYPE_WRITE:
102 		return true;
103 	default:
104 		return false;
105 	}
106 }
107 
108 static void
109 bdev_xnvme_destruct_cb(void *io_device)
110 {
111 	struct bdev_xnvme *xnvme = io_device;
112 
113 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
114 	bdev_xnvme_free(xnvme);
115 }
116 
117 static int
118 bdev_xnvme_destruct(void *ctx)
119 {
120 	struct bdev_xnvme *xnvme = ctx;
121 
122 	spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb);
123 
124 	return 0;
125 }
126 
127 static void
128 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
129 {
130 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
131 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
132 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
133 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
134 	int err;
135 
136 	if (!success) {
137 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
138 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
139 		return;
140 	}
141 
142 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
143 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
144 
145 	switch (bdev_io->type) {
146 	case SPDK_BDEV_IO_TYPE_READ:
147 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
148 		ctx->cmd.common.nsid = xnvme->nsid;
149 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
150 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
151 		break;
152 	case SPDK_BDEV_IO_TYPE_WRITE:
153 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
154 		ctx->cmd.common.nsid = xnvme->nsid;
155 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
156 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
157 		break;
158 
159 	default:
160 		SPDK_ERRLOG("Wrong io type\n");
161 
162 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
163 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
164 		return;
165 	}
166 
167 	xnvme_task->ch = xnvme_ch;
168 	ctx->async.cb_arg = xnvme_task;
169 
170 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
171 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
172 
173 	switch (err) {
174 	/* Submission success! */
175 	case 0:
176 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
177 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
178 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
179 		return;
180 
181 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
182 	case -EBUSY:
183 	case -EAGAIN:
184 	case -ENOMEM:
185 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
186 
187 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
188 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
189 		return;
190 
191 	/* Submission failed: unexpected error, put the command-context back in the queue */
192 	default:
193 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
194 
195 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
196 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
197 		return;
198 	}
199 }
200 
201 static void
202 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
203 {
204 	switch (bdev_io->type) {
205 	/* Read and write operations must be performed on buffers aligned to
206 	 * bdev->required_alignment. If user specified unaligned buffers,
207 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
208 	case SPDK_BDEV_IO_TYPE_READ:
209 	case SPDK_BDEV_IO_TYPE_WRITE:
210 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
211 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
212 		break;
213 
214 	default:
215 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
216 		break;
217 	}
218 }
219 
220 static const struct spdk_bdev_fn_table xnvme_fn_table = {
221 	.destruct		= bdev_xnvme_destruct,
222 	.submit_request		= bdev_xnvme_submit_request,
223 	.io_type_supported	= bdev_xnvme_io_type_supported,
224 	.get_io_channel		= bdev_xnvme_get_io_channel,
225 };
226 
227 static void
228 bdev_xnvme_free(struct bdev_xnvme *xnvme)
229 {
230 	assert(xnvme != NULL);
231 
232 	xnvme_dev_close(xnvme->dev);
233 	free(xnvme->io_mechanism);
234 	free(xnvme->filename);
235 	free(xnvme->bdev.name);
236 	free(xnvme);
237 }
238 
239 static void
240 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
241 {
242 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
243 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
244 
245 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
246 
247 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
248 		SPDK_ERRLOG("xNVMe I/O Failed\n");
249 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
250 		status = SPDK_BDEV_IO_STATUS_FAILED;
251 	}
252 
253 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
254 
255 	/* Completed: Put the command- context back in the queue */
256 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
257 }
258 
259 static int
260 bdev_xnvme_poll(void *arg)
261 {
262 	struct bdev_xnvme_io_channel *ch = arg;
263 	int rc;
264 
265 	rc = xnvme_queue_poke(ch->queue, 0);
266 	if (rc < 0) {
267 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
268 		return SPDK_POLLER_BUSY;
269 	}
270 
271 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
272 }
273 
274 static int
275 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
276 {
277 	struct bdev_xnvme *xnvme = io_device;
278 	struct bdev_xnvme_io_channel *ch = ctx_buf;
279 	int rc;
280 	int qd = 512;
281 
282 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
283 	if (rc) {
284 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
285 		return 1;
286 	}
287 
288 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
289 
290 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
291 
292 	return 0;
293 }
294 
295 static void
296 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
297 {
298 	struct bdev_xnvme_io_channel *ch = ctx_buf;
299 
300 	spdk_poller_unregister(&ch->poller);
301 
302 	xnvme_queue_term(ch->queue);
303 }
304 
305 struct spdk_bdev *
306 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism)
307 {
308 	struct bdev_xnvme *xnvme;
309 	uint32_t block_size;
310 	uint64_t bdev_size;
311 	int rc;
312 	struct xnvme_opts opts = xnvme_opts_default();
313 
314 	xnvme = calloc(1, sizeof(*xnvme));
315 	if (!xnvme) {
316 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
317 		return NULL;
318 	}
319 
320 	opts.direct = 1;
321 	opts.async = io_mechanism;
322 	if (!opts.async) {
323 		goto error_return;
324 	}
325 	xnvme->io_mechanism = strdup(io_mechanism);
326 	if (!xnvme->io_mechanism) {
327 		goto error_return;
328 	}
329 
330 	if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
331 		opts.poll_sq = 1;
332 	}
333 
334 	xnvme->filename = strdup(filename);
335 	if (!xnvme->filename) {
336 		goto error_return;
337 	}
338 
339 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
340 	if (!xnvme->dev) {
341 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
342 		goto error_return;
343 	}
344 
345 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
346 
347 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
348 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
349 
350 	xnvme->bdev.name = strdup(name);
351 	if (!xnvme->bdev.name) {
352 		goto error_return;
353 	}
354 
355 	xnvme->bdev.product_name = "xNVMe bdev";
356 	xnvme->bdev.module = &xnvme_if;
357 
358 	xnvme->bdev.write_cache = 0;
359 
360 	if (block_size == 0) {
361 		SPDK_ERRLOG("Block size could not be auto-detected\n");
362 		goto error_return;
363 	}
364 
365 	if (block_size < 512) {
366 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
367 		goto error_return;
368 	}
369 
370 	if (!spdk_u32_is_pow2(block_size)) {
371 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
372 		goto error_return;
373 	}
374 
375 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
376 		      xnvme->bdev.name, bdev_size, block_size);
377 
378 	xnvme->bdev.blocklen = block_size;
379 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
380 
381 	if (bdev_size % xnvme->bdev.blocklen != 0) {
382 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
383 			    bdev_size, xnvme->bdev.blocklen);
384 		goto error_return;
385 	}
386 
387 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
388 	xnvme->bdev.ctxt = xnvme;
389 
390 	xnvme->bdev.fn_table = &xnvme_fn_table;
391 
392 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
393 				sizeof(struct bdev_xnvme_io_channel),
394 				xnvme->bdev.name);
395 	rc = spdk_bdev_register(&xnvme->bdev);
396 	if (rc) {
397 		spdk_io_device_unregister(xnvme, NULL);
398 		goto error_return;
399 	}
400 
401 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
402 
403 	return &xnvme->bdev;
404 
405 error_return:
406 	bdev_xnvme_free(xnvme);
407 	return NULL;
408 }
409 
410 struct delete_xnvme_bdev_ctx {
411 	struct bdev_xnvme *xnvme;
412 	spdk_delete_xnvme_complete cb_fn;
413 	void *cb_arg;
414 };
415 
416 static void
417 xnvme_bdev_unregister_cb(void *arg, int bdeverrno)
418 {
419 	struct delete_xnvme_bdev_ctx *ctx = arg;
420 
421 	bdev_xnvme_destruct(ctx->xnvme);
422 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
423 	free(ctx);
424 }
425 
426 void
427 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg)
428 {
429 	struct delete_xnvme_bdev_ctx *ctx;
430 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt;
431 
432 	if (!bdev || bdev->module != &xnvme_if) {
433 		cb_fn(cb_arg, -ENODEV);
434 		return;
435 	}
436 
437 	ctx = calloc(1, sizeof(*ctx));
438 	if (ctx == NULL) {
439 		cb_fn(cb_arg, -ENOMEM);
440 		return;
441 	}
442 
443 	ctx->xnvme = xnvme;
444 	ctx->cb_fn = cb_fn;
445 	ctx->cb_arg = cb_arg;
446 	spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx);
447 }
448 
449 static int
450 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
451 {
452 	return 0;
453 }
454 
455 static void
456 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
457 {
458 }
459 
460 static int
461 bdev_xnvme_init(void)
462 {
463 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
464 				0, "xnvme_module");
465 
466 	return 0;
467 }
468 
469 static void
470 bdev_xnvme_fini(void)
471 {
472 	spdk_io_device_unregister(&xnvme_if, NULL);
473 }
474 
475 SPDK_LOG_REGISTER_COMPONENT(xnvme)
476