xref: /spdk/module/bdev/null/bdev_null.c (revision f8abbede89d30584d2a4f8427b13896f8591b873)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/thread.h"
11 #include "spdk/json.h"
12 #include "spdk/string.h"
13 #include "spdk/likely.h"
14 
15 #include "spdk/bdev_module.h"
16 #include "spdk/log.h"
17 
18 #include "bdev_null.h"
19 
20 struct null_bdev {
21 	struct spdk_bdev	bdev;
22 	TAILQ_ENTRY(null_bdev)	tailq;
23 };
24 
25 struct null_io_channel {
26 	struct spdk_poller		*poller;
27 	TAILQ_HEAD(, spdk_bdev_io)	io;
28 };
29 
30 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head);
31 static void *g_null_read_buf;
32 
33 static int bdev_null_initialize(void);
34 static void bdev_null_finish(void);
35 
36 static struct spdk_bdev_module null_if = {
37 	.name = "null",
38 	.module_init = bdev_null_initialize,
39 	.module_fini = bdev_null_finish,
40 	.async_fini = true,
41 };
42 
43 SPDK_BDEV_MODULE_REGISTER(null, &null_if)
44 
45 static int
46 bdev_null_destruct(void *ctx)
47 {
48 	struct null_bdev *bdev = ctx;
49 
50 	TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq);
51 	free(bdev->bdev.name);
52 	free(bdev);
53 
54 	return 0;
55 }
56 
57 static bool
58 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort)
59 {
60 	struct spdk_bdev_io *bdev_io;
61 
62 	TAILQ_FOREACH(bdev_io, &ch->io, module_link) {
63 		if (bdev_io == bio_to_abort) {
64 			TAILQ_REMOVE(&ch->io, bio_to_abort, module_link);
65 			spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
66 			return true;
67 		}
68 	}
69 
70 	return false;
71 }
72 
73 static void
74 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
75 {
76 	struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch);
77 	struct spdk_bdev *bdev = bdev_io->bdev;
78 	struct spdk_dif_ctx dif_ctx;
79 	struct spdk_dif_error err_blk;
80 	int rc;
81 	struct spdk_dif_ctx_init_ext_opts dif_opts;
82 
83 	if (SPDK_DIF_DISABLE != bdev->dif_type &&
84 	    (SPDK_BDEV_IO_TYPE_READ == bdev_io->type ||
85 	     SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) {
86 		dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
87 		dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
88 		rc = spdk_dif_ctx_init(&dif_ctx,
89 				       bdev->blocklen,
90 				       bdev->md_len,
91 				       bdev->md_interleave,
92 				       bdev->dif_is_head_of_md,
93 				       bdev->dif_type,
94 				       bdev->dif_check_flags,
95 				       bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF,
96 				       0xFFFF, 0, 0, 0, &dif_opts);
97 		if (0 != rc) {
98 			SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc);
99 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
100 			return;
101 		}
102 	}
103 
104 	switch (bdev_io->type) {
105 	case SPDK_BDEV_IO_TYPE_READ:
106 		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
107 			assert(bdev_io->u.bdev.iovcnt == 1);
108 			if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <=
109 					SPDK_BDEV_LARGE_BUF_MAX_SIZE)) {
110 				bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf;
111 				bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
112 			} else {
113 				SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n",
114 					    bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
115 					    SPDK_BDEV_LARGE_BUF_MAX_SIZE);
116 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
117 				return;
118 			}
119 		}
120 		if (SPDK_DIF_DISABLE != bdev->dif_type) {
121 			rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
122 					       bdev_io->u.bdev.num_blocks, &dif_ctx);
123 			if (0 != rc) {
124 				SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n",
125 					    bdev_io->u.bdev.offset_blocks,
126 					    bdev_io->u.bdev.num_blocks);
127 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
128 				return;
129 			}
130 		}
131 		TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
132 		break;
133 	case SPDK_BDEV_IO_TYPE_WRITE:
134 		if (SPDK_DIF_DISABLE != bdev->dif_type) {
135 			rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
136 					     bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
137 			if (0 != rc) {
138 				SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", "
139 					    "err_type %u, expected %lu, actual %lu, err_offset %u\n",
140 					    bdev_io->u.bdev.offset_blocks,
141 					    bdev_io->u.bdev.num_blocks,
142 					    err_blk.err_type,
143 					    err_blk.expected,
144 					    err_blk.actual,
145 					    err_blk.err_offset);
146 				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
147 				return;
148 			}
149 		}
150 		TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
151 		break;
152 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
153 	case SPDK_BDEV_IO_TYPE_RESET:
154 		TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link);
155 		break;
156 	case SPDK_BDEV_IO_TYPE_ABORT:
157 		if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) {
158 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
159 		} else {
160 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
161 		}
162 		break;
163 	case SPDK_BDEV_IO_TYPE_FLUSH:
164 	case SPDK_BDEV_IO_TYPE_UNMAP:
165 	default:
166 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
167 		break;
168 	}
169 }
170 
171 static bool
172 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
173 {
174 	switch (io_type) {
175 	case SPDK_BDEV_IO_TYPE_READ:
176 	case SPDK_BDEV_IO_TYPE_WRITE:
177 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
178 	case SPDK_BDEV_IO_TYPE_RESET:
179 	case SPDK_BDEV_IO_TYPE_ABORT:
180 		return true;
181 	case SPDK_BDEV_IO_TYPE_FLUSH:
182 	case SPDK_BDEV_IO_TYPE_UNMAP:
183 	default:
184 		return false;
185 	}
186 }
187 
188 static struct spdk_io_channel *
189 bdev_null_get_io_channel(void *ctx)
190 {
191 	return spdk_get_io_channel(&g_null_bdev_head);
192 }
193 
194 static void
195 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
196 {
197 	spdk_json_write_object_begin(w);
198 
199 	spdk_json_write_named_string(w, "method", "bdev_null_create");
200 
201 	spdk_json_write_named_object_begin(w, "params");
202 	spdk_json_write_named_string(w, "name", bdev->name);
203 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
204 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
205 	spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen);
206 	spdk_json_write_named_uint32(w, "md_size", bdev->md_len);
207 	spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type);
208 	spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md);
209 	spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
210 	spdk_json_write_object_end(w);
211 
212 	spdk_json_write_object_end(w);
213 }
214 
215 static const struct spdk_bdev_fn_table null_fn_table = {
216 	.destruct		= bdev_null_destruct,
217 	.submit_request		= bdev_null_submit_request,
218 	.io_type_supported	= bdev_null_io_type_supported,
219 	.get_io_channel		= bdev_null_get_io_channel,
220 	.write_config_json	= bdev_null_write_config_json,
221 };
222 
223 int
224 bdev_null_create(struct spdk_bdev **bdev, const struct spdk_null_bdev_opts *opts)
225 {
226 	struct null_bdev *null_disk;
227 	uint32_t data_block_size;
228 	int rc;
229 
230 	if (!opts) {
231 		SPDK_ERRLOG("No options provided for Null bdev.\n");
232 		return -EINVAL;
233 	}
234 
235 	switch (opts->md_size) {
236 	case 0:
237 	case 8:
238 	case 16:
239 	case 32:
240 	case 64:
241 	case 128:
242 		break;
243 	default:
244 		SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size);
245 		return -EINVAL;
246 	}
247 
248 	if (opts->md_interleave) {
249 		if (opts->block_size < opts->md_size) {
250 			SPDK_ERRLOG("Interleaved metadata size can not be greater than block size.\n");
251 			return -EINVAL;
252 		}
253 		data_block_size = opts->block_size - opts->md_size;
254 	} else {
255 		if (opts->md_size != 0) {
256 			SPDK_ERRLOG("Metadata in separate buffer is not supported\n");
257 			return -ENOTSUP;
258 		}
259 		data_block_size = opts->block_size;
260 	}
261 
262 	if (data_block_size % 512 != 0) {
263 		SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size);
264 		return -EINVAL;
265 	}
266 
267 	if (opts->num_blocks == 0) {
268 		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
269 		return -EINVAL;
270 	}
271 
272 	null_disk = calloc(1, sizeof(*null_disk));
273 	if (!null_disk) {
274 		SPDK_ERRLOG("could not allocate null_bdev\n");
275 		return -ENOMEM;
276 	}
277 
278 	null_disk->bdev.name = strdup(opts->name);
279 	if (!null_disk->bdev.name) {
280 		free(null_disk);
281 		return -ENOMEM;
282 	}
283 	null_disk->bdev.product_name = "Null disk";
284 
285 	null_disk->bdev.write_cache = 0;
286 	null_disk->bdev.blocklen = opts->block_size;
287 	null_disk->bdev.phys_blocklen = opts->physical_block_size;
288 	null_disk->bdev.blockcnt = opts->num_blocks;
289 	null_disk->bdev.md_len = opts->md_size;
290 	null_disk->bdev.md_interleave = opts->md_interleave;
291 	null_disk->bdev.dif_type = opts->dif_type;
292 	null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md;
293 	/* Current block device layer API does not propagate
294 	 * any DIF related information from user. So, we can
295 	 * not generate or verify Application Tag.
296 	 */
297 	switch (opts->dif_type) {
298 	case SPDK_DIF_TYPE1:
299 	case SPDK_DIF_TYPE2:
300 		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK |
301 						  SPDK_DIF_FLAGS_REFTAG_CHECK;
302 		break;
303 	case SPDK_DIF_TYPE3:
304 		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK;
305 		break;
306 	case SPDK_DIF_DISABLE:
307 		break;
308 	}
309 
310 	null_disk->bdev.uuid = *opts->uuid;
311 	null_disk->bdev.ctxt = null_disk;
312 	null_disk->bdev.fn_table = &null_fn_table;
313 	null_disk->bdev.module = &null_if;
314 
315 	rc = spdk_bdev_register(&null_disk->bdev);
316 	if (rc) {
317 		free(null_disk->bdev.name);
318 		free(null_disk);
319 		return rc;
320 	}
321 
322 	*bdev = &(null_disk->bdev);
323 
324 	TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq);
325 
326 	return rc;
327 }
328 
329 void
330 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg)
331 {
332 	int rc;
333 
334 	rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg);
335 	if (rc != 0) {
336 		cb_fn(cb_arg, rc);
337 	}
338 }
339 
340 static int
341 null_io_poll(void *arg)
342 {
343 	struct null_io_channel		*ch = arg;
344 	TAILQ_HEAD(, spdk_bdev_io)	io;
345 	struct spdk_bdev_io		*bdev_io;
346 
347 	TAILQ_INIT(&io);
348 	TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link);
349 
350 	if (TAILQ_EMPTY(&io)) {
351 		return SPDK_POLLER_IDLE;
352 	}
353 
354 	while (!TAILQ_EMPTY(&io)) {
355 		bdev_io = TAILQ_FIRST(&io);
356 		TAILQ_REMOVE(&io, bdev_io, module_link);
357 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
358 	}
359 
360 	return SPDK_POLLER_BUSY;
361 }
362 
363 static int
364 null_bdev_create_cb(void *io_device, void *ctx_buf)
365 {
366 	struct null_io_channel *ch = ctx_buf;
367 
368 	TAILQ_INIT(&ch->io);
369 	ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0);
370 
371 	return 0;
372 }
373 
374 static void
375 null_bdev_destroy_cb(void *io_device, void *ctx_buf)
376 {
377 	struct null_io_channel *ch = ctx_buf;
378 
379 	spdk_poller_unregister(&ch->poller);
380 }
381 
382 static int
383 bdev_null_initialize(void)
384 {
385 	/*
386 	 * This will be used if upper layer expects us to allocate the read buffer.
387 	 *  Instead of using a real rbuf from the bdev pool, just always point to
388 	 *  this same zeroed buffer.
389 	 */
390 	g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL,
391 				       SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
392 	if (g_null_read_buf == NULL) {
393 		return -1;
394 	}
395 
396 	/*
397 	 * We need to pick some unique address as our "io device" - so just use the
398 	 *  address of the global tailq.
399 	 */
400 	spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb,
401 				sizeof(struct null_io_channel), "null_bdev");
402 
403 	return 0;
404 }
405 
406 static void
407 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
408 {
409 }
410 
411 int
412 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb)
413 {
414 	struct spdk_bdev_desc *desc;
415 	struct spdk_bdev *bdev;
416 	uint64_t current_size_in_mb;
417 	uint64_t new_size_in_byte;
418 	int rc = 0;
419 
420 	rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc);
421 	if (rc != 0) {
422 		SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name);
423 		return rc;
424 	}
425 
426 	bdev = spdk_bdev_desc_get_bdev(desc);
427 
428 	if (bdev->module != &null_if) {
429 		rc = -EINVAL;
430 		goto exit;
431 	}
432 
433 	current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
434 	if (new_size_in_mb < current_size_in_mb) {
435 		SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n");
436 		rc = -EINVAL;
437 		goto exit;
438 	}
439 
440 	new_size_in_byte = new_size_in_mb * 1024 * 1024;
441 
442 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
443 	if (rc != 0) {
444 		SPDK_ERRLOG("failed to notify block cnt change.\n");
445 	}
446 
447 exit:
448 	spdk_bdev_close(desc);
449 	return rc;
450 }
451 
452 static void
453 _bdev_null_finish_cb(void *arg)
454 {
455 	spdk_free(g_null_read_buf);
456 	spdk_bdev_module_fini_done();
457 }
458 
459 static void
460 bdev_null_finish(void)
461 {
462 	if (g_null_read_buf == NULL) {
463 		spdk_bdev_module_fini_done();
464 		return;
465 	}
466 	spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb);
467 }
468 
469 SPDK_LOG_REGISTER_COMPONENT(bdev_null)
470