xref: /spdk/module/bdev/xnvme/bdev_xnvme.c (revision cdb0726b95631d46eaf4f2e39ddb6533f150fd27)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Samsung Electronics Co., Ltd.
3  *   All rights reserved.
4  */
5 
6 #include "libxnvme.h"
7 #include "libxnvme_pp.h"
8 
9 #include "bdev_xnvme.h"
10 
11 #include "spdk/stdinc.h"
12 
13 #include "spdk/barrier.h"
14 #include "spdk/bdev.h"
15 #include "spdk/env.h"
16 #include "spdk/fd.h"
17 #include "spdk/likely.h"
18 #include "spdk/thread.h"
19 #include "spdk/json.h"
20 #include "spdk/util.h"
21 #include "spdk/string.h"
22 
23 #include "spdk/log.h"
24 
25 struct bdev_xnvme_io_channel {
26 	struct xnvme_queue	*queue;
27 	struct spdk_poller	*poller;
28 };
29 
30 struct bdev_xnvme_task {
31 	struct bdev_xnvme_io_channel *ch;
32 	TAILQ_ENTRY(bdev_xnvme_task) link;
33 };
34 
35 struct bdev_xnvme {
36 	struct spdk_bdev	bdev;
37 	char			*filename;
38 	struct xnvme_dev	*dev;
39 	uint32_t		nsid;
40 
41 	TAILQ_ENTRY(bdev_xnvme) link;
42 };
43 
44 static int bdev_xnvme_init(void);
45 static void bdev_xnvme_fini(void);
46 static void bdev_xnvme_free(struct bdev_xnvme *xnvme);
47 static TAILQ_HEAD(, bdev_xnvme) g_xnvme_bdev_head = TAILQ_HEAD_INITIALIZER(g_xnvme_bdev_head);
48 
49 static int
50 bdev_xnvme_get_ctx_size(void)
51 {
52 	return sizeof(struct bdev_xnvme_task);
53 }
54 
55 static struct spdk_bdev_module xnvme_if = {
56 	.name		= "xnvme",
57 	.module_init	= bdev_xnvme_init,
58 	.module_fini	= bdev_xnvme_fini,
59 	.get_ctx_size	= bdev_xnvme_get_ctx_size,
60 };
61 
62 SPDK_BDEV_MODULE_REGISTER(xnvme, &xnvme_if)
63 
64 static struct spdk_io_channel *
65 bdev_xnvme_get_io_channel(void *ctx)
66 {
67 	struct bdev_xnvme *xnvme = ctx;
68 
69 	return spdk_get_io_channel(xnvme);
70 }
71 
72 static bool
73 bdev_xnvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
74 {
75 	switch (io_type) {
76 	case SPDK_BDEV_IO_TYPE_READ:
77 	case SPDK_BDEV_IO_TYPE_WRITE:
78 		return true;
79 	default:
80 		return false;
81 	}
82 }
83 
84 static int
85 bdev_xnvme_destruct(void *ctx)
86 {
87 	struct bdev_xnvme *xnvme = ctx;
88 	int rc = 0;
89 
90 	TAILQ_REMOVE(&g_xnvme_bdev_head, xnvme, link);
91 	spdk_io_device_unregister(xnvme, NULL);
92 	bdev_xnvme_free(xnvme);
93 
94 	return rc;
95 }
96 
97 static void
98 bdev_xnvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
99 {
100 	struct bdev_xnvme_task *xnvme_task = (struct bdev_xnvme_task *)bdev_io->driver_ctx;
101 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev_io->bdev->ctxt;
102 	struct bdev_xnvme_io_channel *xnvme_ch = spdk_io_channel_get_ctx(ch);
103 	struct xnvme_cmd_ctx *ctx = xnvme_queue_get_cmd_ctx(xnvme_ch->queue);
104 	int err;
105 
106 	if (!success) {
107 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
108 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
109 		return;
110 	}
111 
112 	SPDK_DEBUGLOG(xnvme, "bdev_io : %p, iov_cnt : %d, bdev_xnvme_task : %p\n",
113 		      bdev_io, bdev_io->u.bdev.iovcnt, (struct bdev_xnvme_task *)bdev_io->driver_ctx);
114 
115 	switch (bdev_io->type) {
116 	case SPDK_BDEV_IO_TYPE_READ:
117 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
118 		ctx->cmd.common.nsid = xnvme->nsid;
119 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
120 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
121 		break;
122 	case SPDK_BDEV_IO_TYPE_WRITE:
123 		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
124 		ctx->cmd.common.nsid = xnvme->nsid;
125 		ctx->cmd.nvm.nlb = bdev_io->u.bdev.num_blocks - 1;
126 		ctx->cmd.nvm.slba = bdev_io->u.bdev.offset_blocks;
127 		break;
128 
129 	default:
130 		SPDK_ERRLOG("Wrong io type\n");
131 
132 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
133 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
134 		return;
135 	}
136 
137 	xnvme_task->ch = xnvme_ch;
138 	ctx->async.cb_arg = xnvme_task;
139 
140 	err = xnvme_cmd_passv(ctx, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
141 			      bdev_io->u.bdev.num_blocks * xnvme->bdev.blocklen, NULL, 0, 0);
142 
143 	switch (err) {
144 	/* Submission success! */
145 	case 0:
146 		SPDK_DEBUGLOG(xnvme, "io_channel : %p, iovcnt:%d, nblks: %lu off: %#lx\n",
147 			      xnvme_ch, bdev_io->u.bdev.iovcnt,
148 			      bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks);
149 		return;
150 
151 	/* Submission failed: queue is full or no memory  => Queue the I/O in bdev layer */
152 	case -EBUSY:
153 	case -EAGAIN:
154 	case -ENOMEM:
155 		SPDK_WARNLOG("Start to queue I/O for xnvme bdev\n");
156 
157 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
158 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
159 		return;
160 
161 	/* Submission failed: unexpected error, put the command-context back in the queue */
162 	default:
163 		SPDK_ERRLOG("bdev_xnvme_cmd_passv : Submission failed: unexpected error\n");
164 
165 		xnvme_queue_put_cmd_ctx(xnvme_ch->queue, ctx);
166 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
167 		return;
168 	}
169 }
170 
171 static void
172 bdev_xnvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
173 {
174 	switch (bdev_io->type) {
175 	/* Read and write operations must be performed on buffers aligned to
176 	 * bdev->required_alignment. If user specified unaligned buffers,
177 	 * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
178 	case SPDK_BDEV_IO_TYPE_READ:
179 	case SPDK_BDEV_IO_TYPE_WRITE:
180 		spdk_bdev_io_get_buf(bdev_io, bdev_xnvme_get_buf_cb,
181 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
182 		break;
183 
184 	default:
185 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
186 		break;
187 	}
188 }
189 
190 static const struct spdk_bdev_fn_table xnvme_fn_table = {
191 	.destruct		= bdev_xnvme_destruct,
192 	.submit_request		= bdev_xnvme_submit_request,
193 	.io_type_supported	= bdev_xnvme_io_type_supported,
194 	.get_io_channel		= bdev_xnvme_get_io_channel,
195 };
196 
197 static void
198 bdev_xnvme_free(struct bdev_xnvme *xnvme)
199 {
200 	assert(xnvme != NULL);
201 
202 	free(xnvme->filename);
203 	free(xnvme->bdev.name);
204 	free(xnvme);
205 }
206 
207 static void
208 bdev_xnvme_cmd_cb(struct xnvme_cmd_ctx *ctx, void *cb_arg)
209 {
210 	struct bdev_xnvme_task *xnvme_task = ctx->async.cb_arg;
211 	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
212 
213 	SPDK_DEBUGLOG(xnvme, "xnvme_task : %p\n", xnvme_task);
214 
215 	if (xnvme_cmd_ctx_cpl_status(ctx)) {
216 		SPDK_ERRLOG("xNVMe I/O Failed\n");
217 		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
218 		status = SPDK_BDEV_IO_STATUS_FAILED;
219 	}
220 
221 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(xnvme_task), status);
222 
223 	/* Completed: Put the command- context back in the queue */
224 	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
225 }
226 
227 static int
228 bdev_xnvme_poll(void *arg)
229 {
230 	struct bdev_xnvme_io_channel *ch = arg;
231 	int rc;
232 
233 	rc = xnvme_queue_poke(ch->queue, 0);
234 	if (rc < 0) {
235 		SPDK_ERRLOG("xnvme_queue_poke failure rc : %d\n", rc);
236 		return SPDK_POLLER_BUSY;
237 	}
238 
239 	return xnvme_queue_get_outstanding(ch->queue) ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
240 }
241 
242 static int
243 bdev_xnvme_queue_create_cb(void *io_device, void *ctx_buf)
244 {
245 	struct bdev_xnvme *xnvme = io_device;
246 	struct bdev_xnvme_io_channel *ch = ctx_buf;
247 	int rc;
248 	int qd = 512;
249 
250 	rc = xnvme_queue_init(xnvme->dev, qd, 0, &ch->queue);
251 	if (rc) {
252 		SPDK_ERRLOG("xnvme_queue_init failure: %d\n", rc);
253 		return 1;
254 	}
255 
256 	xnvme_queue_set_cb(ch->queue, bdev_xnvme_cmd_cb, ch);
257 
258 	ch->poller = SPDK_POLLER_REGISTER(bdev_xnvme_poll, ch, 0);
259 
260 	return 0;
261 }
262 
263 static void
264 bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
265 {
266 	struct bdev_xnvme_io_channel *ch = ctx_buf;
267 
268 	spdk_poller_unregister(&ch->poller);
269 
270 	xnvme_queue_term(ch->queue);
271 }
272 
273 struct spdk_bdev *
274 create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism)
275 {
276 	struct bdev_xnvme *xnvme;
277 	uint32_t block_size;
278 	uint64_t bdev_size;
279 	int rc;
280 	struct xnvme_opts opts = xnvme_opts_default();
281 
282 	xnvme = calloc(1, sizeof(*xnvme));
283 	if (!xnvme) {
284 		SPDK_ERRLOG("Unable to allocate enough memory for xNVMe backend\n");
285 		return NULL;
286 	}
287 
288 	opts.direct = 1;
289 	opts.async = io_mechanism;
290 	if (!opts.async) {
291 		goto error_return;
292 	}
293 
294 	xnvme->filename = strdup(filename);
295 	if (!xnvme->filename) {
296 		goto error_return;
297 	}
298 
299 	xnvme->dev = xnvme_dev_open(xnvme->filename, &opts);
300 	if (!xnvme->dev) {
301 		SPDK_ERRLOG("Unable to open xNVMe device %s\n", filename);
302 		goto error_return;
303 	}
304 
305 	xnvme->nsid = xnvme_dev_get_nsid(xnvme->dev);
306 
307 	bdev_size = xnvme_dev_get_geo(xnvme->dev)->tbytes;
308 	block_size = xnvme_dev_get_geo(xnvme->dev)->nbytes;
309 
310 	xnvme->bdev.name = strdup(name);
311 	if (!xnvme->bdev.name) {
312 		goto error_return;
313 	}
314 
315 	xnvme->bdev.product_name = "xNVMe bdev";
316 	xnvme->bdev.module = &xnvme_if;
317 
318 	xnvme->bdev.write_cache = 0;
319 
320 	if (block_size == 0) {
321 		SPDK_ERRLOG("Block size could not be auto-detected\n");
322 		goto error_return;
323 	}
324 
325 	if (block_size < 512) {
326 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
327 		goto error_return;
328 	}
329 
330 	if (!spdk_u32_is_pow2(block_size)) {
331 		SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
332 		goto error_return;
333 	}
334 
335 	SPDK_DEBUGLOG(xnvme, "bdev_name : %s, bdev_size : %lu, block_size : %d\n",
336 		      xnvme->bdev.name, bdev_size, block_size);
337 
338 	xnvme->bdev.blocklen = block_size;
339 	xnvme->bdev.required_alignment = spdk_u32log2(block_size);
340 
341 	if (bdev_size % xnvme->bdev.blocklen != 0) {
342 		SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
343 			    bdev_size, xnvme->bdev.blocklen);
344 		goto error_return;
345 	}
346 
347 	xnvme->bdev.blockcnt = bdev_size / xnvme->bdev.blocklen;
348 	xnvme->bdev.ctxt = xnvme;
349 
350 	xnvme->bdev.fn_table = &xnvme_fn_table;
351 
352 	spdk_io_device_register(xnvme, bdev_xnvme_queue_create_cb, bdev_xnvme_queue_destroy_cb,
353 				sizeof(struct bdev_xnvme_io_channel),
354 				xnvme->bdev.name);
355 	rc = spdk_bdev_register(&xnvme->bdev);
356 	if (rc) {
357 		spdk_io_device_unregister(xnvme, NULL);
358 		goto error_return;
359 	}
360 
361 	TAILQ_INSERT_TAIL(&g_xnvme_bdev_head, xnvme, link);
362 
363 	return &xnvme->bdev;
364 
365 error_return:
366 	xnvme_dev_close(xnvme->dev);
367 
368 	bdev_xnvme_free(xnvme);
369 	return NULL;
370 }
371 
372 struct delete_xnvme_bdev_ctx {
373 	spdk_delete_xnvme_complete cb_fn;
374 	void *cb_arg;
375 };
376 
377 static void
378 xnvme_bdev_unregister_cb(void *arg, int bdeverrno)
379 {
380 	struct delete_xnvme_bdev_ctx *ctx = arg;
381 
382 	ctx->cb_fn(ctx->cb_arg, bdeverrno);
383 	free(ctx);
384 }
385 
386 void
387 delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg)
388 {
389 	struct delete_xnvme_bdev_ctx *ctx;
390 	struct bdev_xnvme *xnvme = (struct bdev_xnvme *)bdev->ctxt;
391 
392 	if (!bdev || bdev->module != &xnvme_if) {
393 		cb_fn(cb_arg, -ENODEV);
394 		return;
395 	}
396 
397 	ctx = calloc(1, sizeof(*ctx));
398 	if (ctx == NULL) {
399 		cb_fn(cb_arg, -ENOMEM);
400 		return;
401 	}
402 
403 	ctx->cb_fn = cb_fn;
404 	ctx->cb_arg = cb_arg;
405 	spdk_bdev_unregister(bdev, xnvme_bdev_unregister_cb, ctx);
406 	xnvme_dev_close(xnvme->dev);
407 }
408 
409 static int
410 bdev_xnvme_module_create_cb(void *io_device, void *ctx_buf)
411 {
412 	return 0;
413 }
414 
415 static void
416 bdev_xnvme_module_destroy_cb(void *io_device, void *ctx_buf)
417 {
418 }
419 
420 static int
421 bdev_xnvme_init(void)
422 {
423 	spdk_io_device_register(&xnvme_if, bdev_xnvme_module_create_cb, bdev_xnvme_module_destroy_cb,
424 				0, "xnvme_module");
425 
426 	return 0;
427 }
428 
429 static void
430 bdev_xnvme_fini(void)
431 {
432 	spdk_io_device_unregister(&xnvme_if, NULL);
433 }
434 
435 SPDK_LOG_REGISTER_COMPONENT(xnvme)
436