xref: /spdk/module/bdev/null/bdev_null.c (revision bf30e09abe1667ae2769aa367cde39c550bcac00)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/thread.h"
11 #include "spdk/json.h"
12 #include "spdk/string.h"
13 #include "spdk/likely.h"
14 
15 #include "spdk/bdev_module.h"
16 #include "spdk/log.h"
17 
18 #include "bdev_null.h"
19 
20 struct null_bdev_io {
21 	TAILQ_ENTRY(null_bdev_io) link;
22 };
23 
24 struct null_bdev {
25 	struct spdk_bdev	bdev;
26 	TAILQ_ENTRY(null_bdev)	tailq;
27 };
28 
29 struct null_io_channel {
30 	struct spdk_poller		*poller;
31 	TAILQ_HEAD(, null_bdev_io)	io;
32 };
33 
34 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head);
35 static void *g_null_read_buf;
36 
37 static int bdev_null_initialize(void);
38 static void bdev_null_finish(void);
39 
40 static int
41 bdev_null_get_ctx_size(void)
42 {
43 	return sizeof(struct null_bdev_io);
44 }
45 
46 static struct spdk_bdev_module null_if = {
47 	.name = "null",
48 	.module_init = bdev_null_initialize,
49 	.module_fini = bdev_null_finish,
50 	.async_fini = true,
51 	.get_ctx_size = bdev_null_get_ctx_size,
52 };
53 
54 SPDK_BDEV_MODULE_REGISTER(null, &null_if)
55 
56 static int
57 bdev_null_destruct(void *ctx)
58 {
59 	struct null_bdev *bdev = ctx;
60 
61 	TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq);
62 	free(bdev->bdev.name);
63 	free(bdev);
64 
65 	return 0;
66 }
67 
68 static bool
69 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort)
70 {
71 	struct null_bdev_io *null_io;
72 	struct spdk_bdev_io *bdev_io;
73 
74 	TAILQ_FOREACH(null_io, &ch->io, link) {
75 		bdev_io = spdk_bdev_io_from_ctx(null_io);
76 
77 		if (bdev_io == bio_to_abort) {
78 			TAILQ_REMOVE(&ch->io, null_io, link);
79 			spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
80 			return true;
81 		}
82 	}
83 
84 	return false;
85 }
86 
87 static void
88 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
89 {
90 	struct null_bdev_io *null_io = (struct null_bdev_io *)bdev_io->driver_ctx;
91 	struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch);
92 	struct spdk_bdev *bdev = bdev_io->bdev;
93 	struct spdk_dif_ctx dif_ctx;
94 	struct spdk_dif_error err_blk;
95 	int rc;
96 	struct spdk_dif_ctx_init_ext_opts dif_opts;
97 
98 	if (SPDK_DIF_DISABLE != bdev->dif_type &&
99 	    (SPDK_BDEV_IO_TYPE_READ == bdev_io->type ||
100 	     SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) {
101 		dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
102 		dif_opts.dif_pi_format = bdev->dif_pi_format;
103 		rc = spdk_dif_ctx_init(&dif_ctx,
104 				       bdev->blocklen,
105 				       bdev->md_len,
106 				       bdev->md_interleave,
107 				       bdev->dif_is_head_of_md,
108 				       bdev->dif_type,
109 				       bdev_io->u.bdev.dif_check_flags,
110 				       bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF,
111 				       0xFFFF, 0, 0, 0, &dif_opts);
112 		if (0 != rc) {
113 			SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc);
114 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
115 			return;
116 		}
117 	}
118 
119 	switch (bdev_io->type) {
120 	case SPDK_BDEV_IO_TYPE_READ:
121 		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
122 			assert(bdev_io->u.bdev.iovcnt == 1);
123 			if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <=
124 					SPDK_BDEV_LARGE_BUF_MAX_SIZE)) {
125 				bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf;
126 				bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
127 			} else {
128 				SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n",
129 					    bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
130 					    SPDK_BDEV_LARGE_BUF_MAX_SIZE);
131 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
132 				return;
133 			}
134 		}
135 		if (SPDK_DIF_DISABLE != bdev->dif_type) {
136 			rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
137 					       bdev_io->u.bdev.num_blocks, &dif_ctx);
138 			if (0 != rc) {
139 				SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n",
140 					    bdev_io->u.bdev.offset_blocks,
141 					    bdev_io->u.bdev.num_blocks);
142 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
143 				return;
144 			}
145 		}
146 		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
147 		break;
148 	case SPDK_BDEV_IO_TYPE_WRITE:
149 		if (SPDK_DIF_DISABLE != bdev->dif_type) {
150 			rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
151 					     bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
152 			if (0 != rc) {
153 				SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", "
154 					    "err_type %u, expected %lu, actual %lu, err_offset %u\n",
155 					    bdev_io->u.bdev.offset_blocks,
156 					    bdev_io->u.bdev.num_blocks,
157 					    err_blk.err_type,
158 					    err_blk.expected,
159 					    err_blk.actual,
160 					    err_blk.err_offset);
161 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
162 				return;
163 			}
164 		}
165 		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
166 		break;
167 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
168 	case SPDK_BDEV_IO_TYPE_RESET:
169 		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
170 		break;
171 	case SPDK_BDEV_IO_TYPE_ABORT:
172 		if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) {
173 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
174 		} else {
175 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
176 		}
177 		break;
178 	case SPDK_BDEV_IO_TYPE_FLUSH:
179 	case SPDK_BDEV_IO_TYPE_UNMAP:
180 	default:
181 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
182 		break;
183 	}
184 }
185 
186 static bool
187 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
188 {
189 	switch (io_type) {
190 	case SPDK_BDEV_IO_TYPE_READ:
191 	case SPDK_BDEV_IO_TYPE_WRITE:
192 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
193 	case SPDK_BDEV_IO_TYPE_RESET:
194 	case SPDK_BDEV_IO_TYPE_ABORT:
195 		return true;
196 	case SPDK_BDEV_IO_TYPE_FLUSH:
197 	case SPDK_BDEV_IO_TYPE_UNMAP:
198 	default:
199 		return false;
200 	}
201 }
202 
203 static struct spdk_io_channel *
204 bdev_null_get_io_channel(void *ctx)
205 {
206 	return spdk_get_io_channel(&g_null_bdev_head);
207 }
208 
209 static void
210 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
211 {
212 	spdk_json_write_object_begin(w);
213 
214 	spdk_json_write_named_string(w, "method", "bdev_null_create");
215 
216 	spdk_json_write_named_object_begin(w, "params");
217 	spdk_json_write_named_string(w, "name", bdev->name);
218 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
219 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
220 	spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen);
221 	spdk_json_write_named_uint32(w, "md_size", bdev->md_len);
222 	spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type);
223 	spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md);
224 	spdk_json_write_named_uint32(w, "dif_pi_format", bdev->dif_pi_format);
225 	spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
226 	spdk_json_write_object_end(w);
227 
228 	spdk_json_write_object_end(w);
229 }
230 
231 static const struct spdk_bdev_fn_table null_fn_table = {
232 	.destruct		= bdev_null_destruct,
233 	.submit_request		= bdev_null_submit_request,
234 	.io_type_supported	= bdev_null_io_type_supported,
235 	.get_io_channel		= bdev_null_get_io_channel,
236 	.write_config_json	= bdev_null_write_config_json,
237 };
238 
239 /* Use a dummy DIF context to validate DIF configuration of the
240  * craeted bdev.
241  */
242 static int
243 _bdev_validate_dif_config(struct spdk_bdev *bdev)
244 {
245 	struct spdk_dif_ctx dif_ctx;
246 	struct spdk_dif_ctx_init_ext_opts dif_opts;
247 
248 	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
249 	dif_opts.dif_pi_format = bdev->dif_pi_format;
250 
251 	return spdk_dif_ctx_init(&dif_ctx,
252 				 bdev->blocklen,
253 				 bdev->md_len,
254 				 true,
255 				 bdev->dif_is_head_of_md,
256 				 bdev->dif_type,
257 				 bdev->dif_check_flags,
258 				 SPDK_DIF_REFTAG_IGNORE,
259 				 0xFFFF, SPDK_DIF_APPTAG_IGNORE,
260 				 0, 0, &dif_opts);
261 }
262 
263 int
264 bdev_null_create(struct spdk_bdev **bdev, const struct null_bdev_opts *opts)
265 {
266 	struct null_bdev *null_disk;
267 	uint32_t block_size;
268 	int rc;
269 
270 	if (!opts) {
271 		SPDK_ERRLOG("No options provided for Null bdev.\n");
272 		return -EINVAL;
273 	}
274 
275 	switch (opts->md_size) {
276 	case 0:
277 	case 8:
278 	case 16:
279 	case 32:
280 	case 64:
281 	case 128:
282 		break;
283 	default:
284 		SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size);
285 		return -EINVAL;
286 	}
287 
288 	if (opts->block_size % 512 != 0) {
289 		SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size);
290 		return -EINVAL;
291 	}
292 
293 	if (opts->physical_block_size % 512 != 0) {
294 		SPDK_ERRLOG("Physical block must be 512 bytes aligned\n");
295 		return -EINVAL;
296 	}
297 
298 	block_size = opts->block_size + opts->md_size;
299 
300 	if (opts->num_blocks == 0) {
301 		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
302 		return -EINVAL;
303 	}
304 
305 	null_disk = calloc(1, sizeof(*null_disk));
306 	if (!null_disk) {
307 		SPDK_ERRLOG("could not allocate null_bdev\n");
308 		return -ENOMEM;
309 	}
310 
311 	null_disk->bdev.name = strdup(opts->name);
312 	if (!null_disk->bdev.name) {
313 		free(null_disk);
314 		return -ENOMEM;
315 	}
316 	null_disk->bdev.product_name = "Null disk";
317 
318 	null_disk->bdev.write_cache = 0;
319 	null_disk->bdev.blocklen = block_size;
320 	null_disk->bdev.phys_blocklen = opts->physical_block_size;
321 	null_disk->bdev.blockcnt = opts->num_blocks;
322 	null_disk->bdev.md_len = opts->md_size;
323 	null_disk->bdev.md_interleave = true;
324 	null_disk->bdev.dif_type = opts->dif_type;
325 	null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md;
326 	/* Current block device layer API does not propagate
327 	 * any DIF related information from user. So, we can
328 	 * not generate or verify Application Tag.
329 	 */
330 	switch (opts->dif_type) {
331 	case SPDK_DIF_TYPE1:
332 	case SPDK_DIF_TYPE2:
333 		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK |
334 						  SPDK_DIF_FLAGS_REFTAG_CHECK;
335 		break;
336 	case SPDK_DIF_TYPE3:
337 		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK;
338 		break;
339 	case SPDK_DIF_DISABLE:
340 		break;
341 	}
342 	null_disk->bdev.dif_pi_format = opts->dif_pi_format;
343 
344 	if (opts->dif_type != SPDK_DIF_DISABLE) {
345 		rc = _bdev_validate_dif_config(&null_disk->bdev);
346 		if (rc != 0) {
347 			SPDK_ERRLOG("DIF configuration was wrong\n");
348 			free(null_disk);
349 			return -EINVAL;
350 		}
351 	}
352 
353 	if (!spdk_uuid_is_null(&opts->uuid)) {
354 		spdk_uuid_copy(&null_disk->bdev.uuid, &opts->uuid);
355 	}
356 
357 	null_disk->bdev.ctxt = null_disk;
358 	null_disk->bdev.fn_table = &null_fn_table;
359 	null_disk->bdev.module = &null_if;
360 
361 	rc = spdk_bdev_register(&null_disk->bdev);
362 	if (rc) {
363 		free(null_disk->bdev.name);
364 		free(null_disk);
365 		return rc;
366 	}
367 
368 	*bdev = &(null_disk->bdev);
369 
370 	TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq);
371 
372 	return rc;
373 }
374 
375 void
376 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg)
377 {
378 	int rc;
379 
380 	rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg);
381 	if (rc != 0) {
382 		cb_fn(cb_arg, rc);
383 	}
384 }
385 
386 static int
387 null_io_poll(void *arg)
388 {
389 	struct null_io_channel		*ch = arg;
390 	TAILQ_HEAD(, null_bdev_io)	io;
391 	struct null_bdev_io		*null_io;
392 
393 	TAILQ_INIT(&io);
394 	TAILQ_SWAP(&ch->io, &io, null_bdev_io, link);
395 
396 	if (TAILQ_EMPTY(&io)) {
397 		return SPDK_POLLER_IDLE;
398 	}
399 
400 	while (!TAILQ_EMPTY(&io)) {
401 		null_io = TAILQ_FIRST(&io);
402 		TAILQ_REMOVE(&io, null_io, link);
403 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(null_io), SPDK_BDEV_IO_STATUS_SUCCESS);
404 	}
405 
406 	return SPDK_POLLER_BUSY;
407 }
408 
409 static int
410 null_bdev_create_cb(void *io_device, void *ctx_buf)
411 {
412 	struct null_io_channel *ch = ctx_buf;
413 
414 	TAILQ_INIT(&ch->io);
415 	ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0);
416 
417 	return 0;
418 }
419 
420 static void
421 null_bdev_destroy_cb(void *io_device, void *ctx_buf)
422 {
423 	struct null_io_channel *ch = ctx_buf;
424 
425 	spdk_poller_unregister(&ch->poller);
426 }
427 
428 static int
429 bdev_null_initialize(void)
430 {
431 	/*
432 	 * This will be used if upper layer expects us to allocate the read buffer.
433 	 *  Instead of using a real rbuf from the bdev pool, just always point to
434 	 *  this same zeroed buffer.
435 	 */
436 	g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL,
437 				       SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
438 	if (g_null_read_buf == NULL) {
439 		return -1;
440 	}
441 
442 	/*
443 	 * We need to pick some unique address as our "io device" - so just use the
444 	 *  address of the global tailq.
445 	 */
446 	spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb,
447 				sizeof(struct null_io_channel), "null_bdev");
448 
449 	return 0;
450 }
451 
452 static void
453 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
454 {
455 }
456 
457 int
458 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb)
459 {
460 	struct spdk_bdev_desc *desc;
461 	struct spdk_bdev *bdev;
462 	uint64_t current_size_in_mb;
463 	uint64_t new_size_in_byte;
464 	int rc = 0;
465 
466 	rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc);
467 	if (rc != 0) {
468 		SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name);
469 		return rc;
470 	}
471 
472 	bdev = spdk_bdev_desc_get_bdev(desc);
473 
474 	if (bdev->module != &null_if) {
475 		rc = -EINVAL;
476 		goto exit;
477 	}
478 
479 	current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
480 	if (new_size_in_mb < current_size_in_mb) {
481 		SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n");
482 		rc = -EINVAL;
483 		goto exit;
484 	}
485 
486 	new_size_in_byte = new_size_in_mb * 1024 * 1024;
487 
488 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
489 	if (rc != 0) {
490 		SPDK_ERRLOG("failed to notify block cnt change.\n");
491 	}
492 
493 exit:
494 	spdk_bdev_close(desc);
495 	return rc;
496 }
497 
498 static void
499 _bdev_null_finish_cb(void *arg)
500 {
501 	spdk_free(g_null_read_buf);
502 	spdk_bdev_module_fini_done();
503 }
504 
505 static void
506 bdev_null_finish(void)
507 {
508 	if (g_null_read_buf == NULL) {
509 		spdk_bdev_module_fini_done();
510 		return;
511 	}
512 	spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb);
513 }
514 
515 SPDK_LOG_REGISTER_COMPONENT(bdev_null)
516