xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision 18c8b52afa69f39481ebb75711b2f30b11693f9d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Samsung Electronics Co., Ltd.
3  *   All rights reserved.
4  */
5 
6 #include "libxnvme.h"
7 #include "libxnvme_pp.h"
8 
9 #include "bdev_xnvme.h"
10 
11 #include "spdk/stdinc.h"
12 
13 #include "spdk/barrier.h"
14 #include "spdk/bdev.h"
15 #include "spdk/env.h"
16 #include "spdk/fd.h"
17 #include "spdk/likely.h"
18 #include "spdk/thread.h"
19 #include "spdk/json.h"
20 #include "spdk/util.h"
21 #include "spdk/string.h"
22 
23 #include "spdk/log.h"
24 
25 struct bdev_xnvme_io_channel {
26 	struct xnvme_queue	*queue;
27 	struct spdk_poller	*poller;
28 };
29 
30 struct bdev_xnvme_task {
31 	struct bdev_xnvme_io_channel *ch;
32 	TAILQ_ENTRY(bdev_xnvme_task) link;
33 };
34 
35 struct bdev_xnvme {
36 	struct spdk_bdev	bdev;
37 	char			*filename;
38 	char			*io_mechanism;
39 	struct xnvme_dev	*dev;
40 	uint32_t		nsid;
41 
42 	TAILQ_ENTRY(bdev_xnvme) link;
43 };
44 
45 static int bdev_xnvme_init(void);
46 static void bdev_xnvme_fini(void);
47 static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
48 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
49 
50 static int
51 bdev_xnvme_get_ctx_size(void)
52 {
53 	return sizeof(struct bdev_xnvme_task);
54 }
55 
56 static int
57 bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
58 {
59 	struct bdev_xnvme *xnvme;
60 
61 	TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
62 		spdk_json_write_object_begin(w);
63 
64 		spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
65 
66 		spdk_json_write_named_object_begin(w, "params");
67 		spdk_json_write_named_string(w, "name", xnvme->bdev.name);
68 		spdk_json_write_named_string(w, "filename", xnvme->filename);
69 		spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
70 		spdk_json_write_object_end(w);
71 
72 		spdk_json_write_object_end(w);
73 	}
74 
75 	return 0;
76 }
77 
78 static struct spdk_bdev_module xnvme_if = {
79 	.name		= "xnvme",
80 	.module_init	= bdev_xnvme_init,
81 	.module_fini	= bdev_xnvme_fini,
82 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
83 	.config_json	= bdev_xnvme_config_json,
84 };
85 
86 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
87 
88 static struct spdk_io_channel *
89 bdev_xnvme_get_io_channel(void *ctx)
90 {
91 	struct bdev_xnvme *xnvme = ctx;
92 
93 	return spdk_get_io_channel(xnvme);
94 }
95 
96 static bool
97 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
98 {
99 	switch (io_type) {
100 	case SPDK_BDEV_IO_TYPE_READ:
101 	case SPDK_BDEV_IO_TYPE_WRITE:
102 		return true;
103 	default:
104 		return false;
105 	}
106 }
107 
108 static int
109 bdev_xnvme_destruct(void *ctx)
110 {
111 	struct bdev_xnvme *xnvme = ctx;
112 
113 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
114 	spdk_io_device_unregister(xnvme, NULL);
115 	bdev_xnvme_free(xnvme);
116 
117 	return 0;
118 }
119 
120 static void
121 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
122 {
123 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
124 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
125 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
126 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
127 	int err;
128 
129 	if (!success) {
130 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
131 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
132 		return;
133 	}
134 
135 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
136 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
137 
138 	switch (bdev_io->type) {
139 	case SPDK_BDEV_IO_TYPE_READ:
140 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
141 		ctx->cmd.common.nsid = xnvme->nsid;
142 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
143 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
144 		break;
145 	case SPDK_BDEV_IO_TYPE_WRITE:
146 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
147 		ctx->cmd.common.nsid = xnvme->nsid;
148 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
149 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
150 		break;
151 
152 	default:
153 		SPDK_ERRLOG("Wrong io type\n");
154 
155 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
156 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
157 		return;
158 	}
159 
160 	xnvme_task->ch = xnvme_ch;
161 	ctx->async.cb_arg = xnvme_task;
162 
163 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
164 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
165 
166 	switch (err) {
167 	/* Submission success! */
168 	case 0:
169 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
170 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
171 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
172 		return;
173 
174 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
175 	case -EBUSY:
176 	case -EAGAIN:
177 	case -ENOMEM:
178 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
179 
180 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
181 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
182 		return;
183 
184 	/* Submission failed: unexpected error, put the command-context back in the queue */
185 	default:
186 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
187 
188 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
189 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
190 		return;
191 	}
192 }
193 
194 static void
195 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
196 {
197 	switch (bdev_io->type) {
198 	/* Read and write operations must be performed on buffers aligned to
199 	 * bdev->required_alignment. If user specified unaligned buffers,
200 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
201 	case SPDK_BDEV_IO_TYPE_READ:
202 	case SPDK_BDEV_IO_TYPE_WRITE:
203 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
204 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
205 		break;
206 
207 	default:
208 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
209 		break;
210 	}
211 }
212 
213 static const struct spdk_bdev_fn_table xnvme_fn_table = {
214 	.destruct		= bdev_xnvme_destruct,
215 	.submit_request		= bdev_xnvme_submit_request,
216 	.io_type_supported	= bdev_xnvme_io_type_supported,
217 	.get_io_channel		= bdev_xnvme_get_io_channel,
218 };
219 
220 static void
221 bdev_xnvme_free(struct bdev_xnvme *xnvme)
222 {
223 	assert(xnvme != NULL);
224 
225 	xnvme_dev_close(xnvme->dev);
226 	free(xnvme->io_mechanism);
227 	free(xnvme->filename);
228 	free(xnvme->bdev.name);
229 	free(xnvme);
230 }
231 
232 static void
233 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
234 {
235 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
236 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
237 
238 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
239 
240 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
241 		SPDK_ERRLOG("xNVMe I/O Failed\n");
242 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
243 		status = SPDK_BDEV_IO_STATUS_FAILED;
244 	}
245 
246 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
247 
248 	/* Completed: Put the command- context back in the queue */
249 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
250 }
251 
252 static int
253 bdev_xnvme_poll(void *arg)
254 {
255 	struct bdev_xnvme_io_channel *ch = arg;
256 	int rc;
257 
258 	rc = xnvme_queue_poke(ch->queue, 0);
259 	if (rc < 0) {
260 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
261 		return SPDK_POLLER_BUSY;
262 	}
263 
264 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
265 }
266 
267 static int
268 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
269 {
270 	struct bdev_xnvme *xnvme = io_device;
271 	struct bdev_xnvme_io_channel *ch = ctx_buf;
272 	int rc;
273 	int qd = 512;
274 
275 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
276 	if (rc) {
277 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
278 		return 1;
279 	}
280 
281 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
282 
283 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
284 
285 	return 0;
286 }
287 
288 static void
289 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
290 {
291 	struct bdev_xnvme_io_channel *ch = ctx_buf;
292 
293 	spdk_poller_unregister(&ch->poller);
294 
295 	xnvme_queue_term(ch->queue);
296 }
297 
298 struct spdk_bdev *
299 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism)
300 {
301 	struct bdev_xnvme *xnvme;
302 	uint32_t block_size;
303 	uint64_t bdev_size;
304 	int rc;
305 	struct xnvme_opts opts = xnvme_opts_default();
306 
307 	xnvme = calloc(1, sizeof(*xnvme));
308 	if (!xnvme) {
309 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
310 		return NULL;
311 	}
312 
313 	opts.direct = 1;
314 	opts.async = io_mechanism;
315 	if (!opts.async) {
316 		goto error_return;
317 	}
318 	xnvme->io_mechanism = strdup(io_mechanism);
319 	if (!xnvme->io_mechanism) {
320 		goto error_return;
321 	}
322 
323 	xnvme->filename = strdup(filename);
324 	if (!xnvme->filename) {
325 		goto error_return;
326 	}
327 
328 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
329 	if (!xnvme->dev) {
330 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
331 		goto error_return;
332 	}
333 
334 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
335 
336 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
337 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
338 
339 	xnvme->bdev.name = strdup(name);
340 	if (!xnvme->bdev.name) {
341 		goto error_return;
342 	}
343 
344 	xnvme->bdev.product_name = "xNVMe bdev";
345 	xnvme->bdev.module = &xnvme_if;
346 
347 	xnvme->bdev.write_cache = 0;
348 
349 	if (block_size == 0) {
350 		SPDK_ERRLOG("Block size could not be auto-detected\n");
351 		goto error_return;
352 	}
353 
354 	if (block_size < 512) {
355 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
356 		goto error_return;
357 	}
358 
359 	if (!spdk_u32_is_pow2(block_size)) {
360 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
361 		goto error_return;
362 	}
363 
364 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
365 		      xnvme->bdev.name, bdev_size, block_size);
366 
367 	xnvme->bdev.blocklen = block_size;
368 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
369 
370 	if (bdev_size % xnvme->bdev.blocklen != 0) {
371 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
372 			    bdev_size, xnvme->bdev.blocklen);
373 		goto error_return;
374 	}
375 
376 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
377 	xnvme->bdev.ctxt = xnvme;
378 
379 	xnvme->bdev.fn_table = &xnvme_fn_table;
380 
381 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
382 				sizeof(struct bdev_xnvme_io_channel),
383 				xnvme->bdev.name);
384 	rc = spdk_bdev_register(&xnvme->bdev);
385 	if (rc) {
386 		spdk_io_device_unregister(xnvme, NULL);
387 		goto error_return;
388 	}
389 
390 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
391 
392 	return &xnvme->bdev;
393 
394 error_return:
395 	bdev_xnvme_free(xnvme);
396 	return NULL;
397 }
398 
399 struct delete_xnvme_bdev_ctx {
400 	struct bdev_xnvme *xnvme;
401 	spdk_delete_xnvme_complete cb_fn;
402 	void *cb_arg;
403 };
404 
405 static void
406 xnvme_bdev_unregister_cb(void *arg, int bdeverrno)
407 {
408 	struct delete_xnvme_bdev_ctx *ctx = arg;
409 
410 	bdev_xnvme_destruct(ctx->xnvme);
411 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
412 	free(ctx);
413 }
414 
415 void
416 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg)
417 {
418 	struct delete_xnvme_bdev_ctx *ctx;
419 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt;
420 
421 	if (!bdev || bdev->module != &xnvme_if) {
422 		cb_fn(cb_arg, -ENODEV);
423 		return;
424 	}
425 
426 	ctx = calloc(1, sizeof(*ctx));
427 	if (ctx == NULL) {
428 		cb_fn(cb_arg, -ENOMEM);
429 		return;
430 	}
431 
432 	ctx->xnvme = xnvme;
433 	ctx->cb_fn = cb_fn;
434 	ctx->cb_arg = cb_arg;
435 	spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx);
436 }
437 
438 static int
439 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
440 {
441 	return 0;
442 }
443 
444 static void
445 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
446 {
447 }
448 
449 static int
450 bdev_xnvme_init(void)
451 {
452 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
453 				0, "xnvme_module");
454 
455 	return 0;
456 }
457 
458 static void
459 bdev_xnvme_fini(void)
460 {
461 	spdk_io_device_unregister(&xnvme_if, NULL);
462 }
463 
464 SPDK_LOG_REGISTER_COMPONENT(xnvme)
465