xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision 12fbe739a31b09aff0d05f354d4f3bbef99afc55)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   Copyright (c) Samsung Electronics Co., Ltd.
4  *   All rights reserved.
5  */
6 
7 #include "libxnvme.h"
8 #include "libxnvme_pp.h"
9 
10 #include "bdev_xnvme.h"
11 
12 #include "spdk/stdinc.h"
13 
14 #include "spdk/barrier.h"
15 #include "spdk/bdev.h"
16 #include "spdk/env.h"
17 #include "spdk/fd.h"
18 #include "spdk/likely.h"
19 #include "spdk/thread.h"
20 #include "spdk/json.h"
21 #include "spdk/util.h"
22 #include "spdk/string.h"
23 
24 #include "spdk/log.h"
25 
26 struct bdev_xnvme_io_channel {
27 	struct xnvme_queue	*queue;
28 	struct spdk_poller	*poller;
29 };
30 
31 struct bdev_xnvme_task {
32 	struct bdev_xnvme_io_channel *ch;
33 	TAILQ_ENTRY(bdev_xnvme_task) link;
34 };
35 
36 struct bdev_xnvme {
37 	struct spdk_bdev	bdev;
38 	char			*filename;
39 	char			*io_mechanism;
40 	struct xnvme_dev	*dev;
41 	uint32_t		nsid;
42 	bool			conserve_cpu;
43 
44 	TAILQ_ENTRY(bdev_xnvme) link;
45 };
46 
47 static int bdev_xnvme_init(void);
48 static void bdev_xnvme_fini(void);
49 static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
50 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
51 
52 static int
53 bdev_xnvme_get_ctx_size(void)
54 {
55 	return sizeof(struct bdev_xnvme_task);
56 }
57 
58 static int
59 bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
60 {
61 	struct bdev_xnvme *xnvme;
62 
63 	TAILQ_FOREACH(xnvme, &g_xnvme_bdev_head, link) {
64 		spdk_json_write_object_begin(w);
65 
66 		spdk_json_write_named_string(w, "method", "bdev_xnvme_create");
67 
68 		spdk_json_write_named_object_begin(w, "params");
69 		spdk_json_write_named_string(w, "name", xnvme->bdev.name);
70 		spdk_json_write_named_string(w, "filename", xnvme->filename);
71 		spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
72 		spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu);
73 		spdk_json_write_object_end(w);
74 
75 		spdk_json_write_object_end(w);
76 	}
77 
78 	return 0;
79 }
80 
81 static struct spdk_bdev_module xnvme_if = {
82 	.name		= "xnvme",
83 	.module_init	= bdev_xnvme_init,
84 	.module_fini	= bdev_xnvme_fini,
85 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
86 	.config_json	= bdev_xnvme_config_json,
87 };
88 
89 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
90 
91 static struct spdk_io_channel *
92 bdev_xnvme_get_io_channel(void *ctx)
93 {
94 	struct bdev_xnvme *xnvme = ctx;
95 
96 	return spdk_get_io_channel(xnvme);
97 }
98 
99 static bool
100 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
101 {
102 	switch (io_type) {
103 	case SPDK_BDEV_IO_TYPE_READ:
104 	case SPDK_BDEV_IO_TYPE_WRITE:
105 		return true;
106 	default:
107 		return false;
108 	}
109 }
110 
111 static void
112 bdev_xnvme_destruct_cb(void *io_device)
113 {
114 	struct bdev_xnvme *xnvme = io_device;
115 
116 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
117 	bdev_xnvme_free(xnvme);
118 }
119 
120 static int
121 bdev_xnvme_destruct(void *ctx)
122 {
123 	struct bdev_xnvme *xnvme = ctx;
124 
125 	spdk_io_device_unregister(xnvme, bdev_xnvme_destruct_cb);
126 
127 	return 0;
128 }
129 
130 static void
131 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
132 {
133 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
134 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
135 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
136 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
137 	int err;
138 
139 	if (!success) {
140 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
141 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
142 		return;
143 	}
144 
145 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
146 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
147 
148 	switch (bdev_io->type) {
149 	case SPDK_BDEV_IO_TYPE_READ:
150 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
151 		ctx->cmd.common.nsid = xnvme->nsid;
152 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
153 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
154 		break;
155 	case SPDK_BDEV_IO_TYPE_WRITE:
156 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
157 		ctx->cmd.common.nsid = xnvme->nsid;
158 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
159 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
160 		break;
161 
162 	default:
163 		SPDK_ERRLOG("Wrong io type\n");
164 
165 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
166 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
167 		return;
168 	}
169 
170 	xnvme_task->ch = xnvme_ch;
171 	ctx->async.cb_arg = xnvme_task;
172 
173 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
174 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
175 
176 	switch (err) {
177 	/* Submission success! */
178 	case 0:
179 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
180 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
181 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
182 		return;
183 
184 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
185 	case -EBUSY:
186 	case -EAGAIN:
187 	case -ENOMEM:
188 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
189 
190 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
191 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
192 		return;
193 
194 	/* Submission failed: unexpected error, put the command-context back in the queue */
195 	default:
196 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
197 
198 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
199 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
200 		return;
201 	}
202 }
203 
204 static void
205 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
206 {
207 	switch (bdev_io->type) {
208 	/* Read and write operations must be performed on buffers aligned to
209 	 * bdev->required_alignment. If user specified unaligned buffers,
210 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
211 	case SPDK_BDEV_IO_TYPE_READ:
212 	case SPDK_BDEV_IO_TYPE_WRITE:
213 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
214 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
215 		break;
216 
217 	default:
218 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
219 		break;
220 	}
221 }
222 
223 static const struct spdk_bdev_fn_table xnvme_fn_table = {
224 	.destruct		= bdev_xnvme_destruct,
225 	.submit_request		= bdev_xnvme_submit_request,
226 	.io_type_supported	= bdev_xnvme_io_type_supported,
227 	.get_io_channel		= bdev_xnvme_get_io_channel,
228 };
229 
230 static void
231 bdev_xnvme_free(struct bdev_xnvme *xnvme)
232 {
233 	assert(xnvme != NULL);
234 
235 	xnvme_dev_close(xnvme->dev);
236 	free(xnvme->io_mechanism);
237 	free(xnvme->filename);
238 	free(xnvme->bdev.name);
239 	free(xnvme);
240 }
241 
242 static void
243 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
244 {
245 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
246 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
247 
248 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
249 
250 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
251 		SPDK_ERRLOG("xNVMe I/O Failed\n");
252 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
253 		status = SPDK_BDEV_IO_STATUS_FAILED;
254 	}
255 
256 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
257 
258 	/* Completed: Put the command- context back in the queue */
259 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
260 }
261 
262 static int
263 bdev_xnvme_poll(void *arg)
264 {
265 	struct bdev_xnvme_io_channel *ch = arg;
266 	int rc;
267 
268 	rc = xnvme_queue_poke(ch->queue, 0);
269 	if (rc < 0) {
270 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
271 		return SPDK_POLLER_BUSY;
272 	}
273 
274 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
275 }
276 
277 static int
278 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
279 {
280 	struct bdev_xnvme *xnvme = io_device;
281 	struct bdev_xnvme_io_channel *ch = ctx_buf;
282 	int rc;
283 	int qd = 512;
284 
285 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
286 	if (rc) {
287 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
288 		return 1;
289 	}
290 
291 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
292 
293 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
294 
295 	return 0;
296 }
297 
298 static void
299 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
300 {
301 	struct bdev_xnvme_io_channel *ch = ctx_buf;
302 
303 	spdk_poller_unregister(&ch->poller);
304 
305 	xnvme_queue_term(ch->queue);
306 }
307 
308 struct spdk_bdev *
309 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism,
310 		  bool conserve_cpu)
311 {
312 	struct bdev_xnvme *xnvme;
313 	uint32_t block_size;
314 	uint64_t bdev_size;
315 	int rc;
316 	struct xnvme_opts opts = xnvme_opts_default();
317 
318 	xnvme = calloc(1, sizeof(*xnvme));
319 	if (!xnvme) {
320 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
321 		return NULL;
322 	}
323 
324 	opts.direct = 1;
325 	opts.async = io_mechanism;
326 	if (!opts.async) {
327 		goto error_return;
328 	}
329 	xnvme->io_mechanism = strdup(io_mechanism);
330 	if (!xnvme->io_mechanism) {
331 		goto error_return;
332 	}
333 
334 	if (!conserve_cpu) {
335 		if (!strcmp(xnvme->io_mechanism, "libaio")) {
336 			opts.poll_io = 1;
337 		} else if (!strcmp(xnvme->io_mechanism, "io_uring")) {
338 			opts.poll_io = 1;
339 		} else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
340 			opts.poll_sq = 1;
341 		}
342 	}
343 
344 	xnvme->filename = strdup(filename);
345 	if (!xnvme->filename) {
346 		goto error_return;
347 	}
348 
349 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
350 	if (!xnvme->dev) {
351 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
352 		goto error_return;
353 	}
354 
355 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
356 
357 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
358 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
359 
360 	xnvme->bdev.name = strdup(name);
361 	if (!xnvme->bdev.name) {
362 		goto error_return;
363 	}
364 
365 	xnvme->bdev.product_name = "xNVMe bdev";
366 	xnvme->bdev.module = &xnvme_if;
367 
368 	xnvme->bdev.write_cache = 0;
369 
370 	if (block_size == 0) {
371 		SPDK_ERRLOG("Block size could not be auto-detected\n");
372 		goto error_return;
373 	}
374 
375 	if (block_size < 512) {
376 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
377 		goto error_return;
378 	}
379 
380 	if (!spdk_u32_is_pow2(block_size)) {
381 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
382 		goto error_return;
383 	}
384 
385 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
386 		      xnvme->bdev.name, bdev_size, block_size);
387 
388 	xnvme->bdev.blocklen = block_size;
389 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
390 
391 	if (bdev_size % xnvme->bdev.blocklen != 0) {
392 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
393 			    bdev_size, xnvme->bdev.blocklen);
394 		goto error_return;
395 	}
396 
397 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
398 	xnvme->bdev.ctxt = xnvme;
399 
400 	xnvme->bdev.fn_table = &xnvme_fn_table;
401 
402 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
403 				sizeof(struct bdev_xnvme_io_channel),
404 				xnvme->bdev.name);
405 	rc = spdk_bdev_register(&xnvme->bdev);
406 	if (rc) {
407 		spdk_io_device_unregister(xnvme, NULL);
408 		goto error_return;
409 	}
410 
411 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
412 
413 	return &xnvme->bdev;
414 
415 error_return:
416 	bdev_xnvme_free(xnvme);
417 	return NULL;
418 }
419 
420 void
421 delete_xnvme_bdev(const char *name, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
422 {
423 	int rc;
424 
425 	rc = spdk_bdev_unregister_by_name(name, &xnvme_if, cb_fn, cb_arg);
426 	if (rc != 0) {
427 		cb_fn(cb_arg, rc);
428 	}
429 }
430 
431 static int
432 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
433 {
434 	return 0;
435 }
436 
437 static void
438 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
439 {
440 }
441 
442 static int
443 bdev_xnvme_init(void)
444 {
445 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
446 				0, "xnvme_module");
447 
448 	return 0;
449 }
450 
451 static void
452 bdev_xnvme_fini(void)
453 {
454 	spdk_io_device_unregister(&xnvme_if, NULL);
455 }
456 
457 SPDK_LOG_REGISTER_COMPONENT(xnvme)
458