xref: /spdk/module/bdev/null/bdev_null.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1  /*   SPDX-License-Identifier: BSD-3-Clause
2   *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3   *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4   */
5  
6  #include "spdk/stdinc.h"
7  
8  #include "spdk/bdev.h"
9  #include "spdk/env.h"
10  #include "spdk/thread.h"
11  #include "spdk/json.h"
12  #include "spdk/string.h"
13  #include "spdk/likely.h"
14  
15  #include "spdk/bdev_module.h"
16  #include "spdk/log.h"
17  
18  #include "bdev_null.h"
19  
20  struct null_bdev_io {
21  	TAILQ_ENTRY(null_bdev_io) link;
22  };
23  
24  struct null_bdev {
25  	struct spdk_bdev	bdev;
26  	TAILQ_ENTRY(null_bdev)	tailq;
27  };
28  
29  struct null_io_channel {
30  	struct spdk_poller		*poller;
31  	TAILQ_HEAD(, null_bdev_io)	io;
32  };
33  
34  static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head);
35  static void *g_null_read_buf;
36  
37  static int bdev_null_initialize(void);
38  static void bdev_null_finish(void);
39  
40  static int
41  bdev_null_get_ctx_size(void)
42  {
43  	return sizeof(struct null_bdev_io);
44  }
45  
46  static struct spdk_bdev_module null_if = {
47  	.name = "null",
48  	.module_init = bdev_null_initialize,
49  	.module_fini = bdev_null_finish,
50  	.async_fini = true,
51  	.get_ctx_size = bdev_null_get_ctx_size,
52  };
53  
54  SPDK_BDEV_MODULE_REGISTER(null, &null_if)
55  
56  static int
57  bdev_null_destruct(void *ctx)
58  {
59  	struct null_bdev *bdev = ctx;
60  
61  	TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq);
62  	free(bdev->bdev.name);
63  	free(bdev);
64  
65  	return 0;
66  }
67  
68  static bool
69  bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort)
70  {
71  	struct null_bdev_io *null_io;
72  	struct spdk_bdev_io *bdev_io;
73  
74  	TAILQ_FOREACH(null_io, &ch->io, link) {
75  		bdev_io = spdk_bdev_io_from_ctx(null_io);
76  
77  		if (bdev_io == bio_to_abort) {
78  			TAILQ_REMOVE(&ch->io, null_io, link);
79  			spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
80  			return true;
81  		}
82  	}
83  
84  	return false;
85  }
86  
87  static void
88  bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
89  {
90  	struct null_bdev_io *null_io = (struct null_bdev_io *)bdev_io->driver_ctx;
91  	struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch);
92  	struct spdk_bdev *bdev = bdev_io->bdev;
93  	struct spdk_dif_ctx dif_ctx;
94  	struct spdk_dif_error err_blk;
95  	int rc;
96  	struct spdk_dif_ctx_init_ext_opts dif_opts;
97  
98  	if (SPDK_DIF_DISABLE != bdev->dif_type &&
99  	    (SPDK_BDEV_IO_TYPE_READ == bdev_io->type ||
100  	     SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) {
101  		dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
102  		dif_opts.dif_pi_format = bdev->dif_pi_format;
103  		rc = spdk_dif_ctx_init(&dif_ctx,
104  				       bdev->blocklen,
105  				       bdev->md_len,
106  				       bdev->md_interleave,
107  				       bdev->dif_is_head_of_md,
108  				       bdev->dif_type,
109  				       bdev_io->u.bdev.dif_check_flags,
110  				       bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF,
111  				       0xFFFF, 0, 0, 0, &dif_opts);
112  		if (0 != rc) {
113  			SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc);
114  			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
115  			return;
116  		}
117  	}
118  
119  	switch (bdev_io->type) {
120  	case SPDK_BDEV_IO_TYPE_READ:
121  		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
122  			assert(bdev_io->u.bdev.iovcnt == 1);
123  			if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <=
124  					SPDK_BDEV_LARGE_BUF_MAX_SIZE)) {
125  				bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf;
126  				bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
127  			} else {
128  				SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n",
129  					    bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
130  					    SPDK_BDEV_LARGE_BUF_MAX_SIZE);
131  				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
132  				return;
133  			}
134  		}
135  		if (SPDK_DIF_DISABLE != bdev->dif_type) {
136  			rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
137  					       bdev_io->u.bdev.num_blocks, &dif_ctx);
138  			if (0 != rc) {
139  				SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n",
140  					    bdev_io->u.bdev.offset_blocks,
141  					    bdev_io->u.bdev.num_blocks);
142  				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
143  				return;
144  			}
145  		}
146  		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
147  		break;
148  	case SPDK_BDEV_IO_TYPE_WRITE:
149  		if (SPDK_DIF_DISABLE != bdev->dif_type) {
150  			rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
151  					     bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
152  			if (0 != rc) {
153  				SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", "
154  					    "err_type %u, expected %lu, actual %lu, err_offset %u\n",
155  					    bdev_io->u.bdev.offset_blocks,
156  					    bdev_io->u.bdev.num_blocks,
157  					    err_blk.err_type,
158  					    err_blk.expected,
159  					    err_blk.actual,
160  					    err_blk.err_offset);
161  				spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
162  				return;
163  			}
164  		}
165  		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
166  		break;
167  	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
168  	case SPDK_BDEV_IO_TYPE_RESET:
169  		TAILQ_INSERT_TAIL(&ch->io, null_io, link);
170  		break;
171  	case SPDK_BDEV_IO_TYPE_ABORT:
172  		if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) {
173  			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
174  		} else {
175  			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
176  		}
177  		break;
178  	case SPDK_BDEV_IO_TYPE_FLUSH:
179  	case SPDK_BDEV_IO_TYPE_UNMAP:
180  	default:
181  		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
182  		break;
183  	}
184  }
185  
186  static bool
187  bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
188  {
189  	switch (io_type) {
190  	case SPDK_BDEV_IO_TYPE_READ:
191  	case SPDK_BDEV_IO_TYPE_WRITE:
192  	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
193  	case SPDK_BDEV_IO_TYPE_RESET:
194  	case SPDK_BDEV_IO_TYPE_ABORT:
195  		return true;
196  	case SPDK_BDEV_IO_TYPE_FLUSH:
197  	case SPDK_BDEV_IO_TYPE_UNMAP:
198  	default:
199  		return false;
200  	}
201  }
202  
203  static struct spdk_io_channel *
204  bdev_null_get_io_channel(void *ctx)
205  {
206  	return spdk_get_io_channel(&g_null_bdev_head);
207  }
208  
209  static void
210  bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
211  {
212  	spdk_json_write_object_begin(w);
213  
214  	spdk_json_write_named_string(w, "method", "bdev_null_create");
215  
216  	spdk_json_write_named_object_begin(w, "params");
217  	spdk_json_write_named_string(w, "name", bdev->name);
218  	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
219  	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
220  	spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen);
221  	spdk_json_write_named_uint32(w, "md_size", bdev->md_len);
222  	spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type);
223  	spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md);
224  	spdk_json_write_named_uint32(w, "dif_pi_format", bdev->dif_pi_format);
225  	spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
226  	spdk_json_write_object_end(w);
227  
228  	spdk_json_write_object_end(w);
229  }
230  
231  static const struct spdk_bdev_fn_table null_fn_table = {
232  	.destruct		= bdev_null_destruct,
233  	.submit_request		= bdev_null_submit_request,
234  	.io_type_supported	= bdev_null_io_type_supported,
235  	.get_io_channel		= bdev_null_get_io_channel,
236  	.write_config_json	= bdev_null_write_config_json,
237  };
238  
239  /* Use a dummy DIF context to validate DIF configuration of the
240   * craeted bdev.
241   */
242  static int
243  _bdev_validate_dif_config(struct spdk_bdev *bdev)
244  {
245  	struct spdk_dif_ctx dif_ctx;
246  	struct spdk_dif_ctx_init_ext_opts dif_opts;
247  
248  	dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
249  	dif_opts.dif_pi_format = bdev->dif_pi_format;
250  
251  	return spdk_dif_ctx_init(&dif_ctx,
252  				 bdev->blocklen,
253  				 bdev->md_len,
254  				 true,
255  				 bdev->dif_is_head_of_md,
256  				 bdev->dif_type,
257  				 bdev->dif_check_flags,
258  				 SPDK_DIF_REFTAG_IGNORE,
259  				 0xFFFF, SPDK_DIF_APPTAG_IGNORE,
260  				 0, 0, &dif_opts);
261  }
262  
263  int
264  bdev_null_create(struct spdk_bdev **bdev, const struct null_bdev_opts *opts)
265  {
266  	struct null_bdev *null_disk;
267  	uint32_t block_size;
268  	int rc;
269  
270  	if (!opts) {
271  		SPDK_ERRLOG("No options provided for Null bdev.\n");
272  		return -EINVAL;
273  	}
274  
275  	switch (opts->md_size) {
276  	case 0:
277  	case 8:
278  	case 16:
279  	case 32:
280  	case 64:
281  	case 128:
282  		break;
283  	default:
284  		SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size);
285  		return -EINVAL;
286  	}
287  
288  	if (opts->block_size % 512 != 0) {
289  		SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size);
290  		return -EINVAL;
291  	}
292  
293  	if (opts->physical_block_size % 512 != 0) {
294  		SPDK_ERRLOG("Physical block must be 512 bytes aligned\n");
295  		return -EINVAL;
296  	}
297  
298  	block_size = opts->block_size + opts->md_size;
299  
300  	if (opts->num_blocks == 0) {
301  		SPDK_ERRLOG("Disk must be more than 0 blocks\n");
302  		return -EINVAL;
303  	}
304  
305  	null_disk = calloc(1, sizeof(*null_disk));
306  	if (!null_disk) {
307  		SPDK_ERRLOG("could not allocate null_bdev\n");
308  		return -ENOMEM;
309  	}
310  
311  	null_disk->bdev.name = strdup(opts->name);
312  	if (!null_disk->bdev.name) {
313  		free(null_disk);
314  		return -ENOMEM;
315  	}
316  	null_disk->bdev.product_name = "Null disk";
317  
318  	null_disk->bdev.write_cache = 0;
319  	null_disk->bdev.blocklen = block_size;
320  	null_disk->bdev.phys_blocklen = opts->physical_block_size;
321  	null_disk->bdev.blockcnt = opts->num_blocks;
322  	null_disk->bdev.md_len = opts->md_size;
323  	null_disk->bdev.md_interleave = true;
324  	null_disk->bdev.dif_type = opts->dif_type;
325  	null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md;
326  	/* Current block device layer API does not propagate
327  	 * any DIF related information from user. So, we can
328  	 * not generate or verify Application Tag.
329  	 */
330  	switch (opts->dif_type) {
331  	case SPDK_DIF_TYPE1:
332  	case SPDK_DIF_TYPE2:
333  		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK |
334  						  SPDK_DIF_FLAGS_REFTAG_CHECK;
335  		break;
336  	case SPDK_DIF_TYPE3:
337  		null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK;
338  		break;
339  	case SPDK_DIF_DISABLE:
340  		break;
341  	}
342  	null_disk->bdev.dif_pi_format = opts->dif_pi_format;
343  
344  	if (opts->dif_type != SPDK_DIF_DISABLE) {
345  		rc = _bdev_validate_dif_config(&null_disk->bdev);
346  		if (rc != 0) {
347  			SPDK_ERRLOG("DIF configuration was wrong\n");
348  			free(null_disk);
349  			return -EINVAL;
350  		}
351  	}
352  
353  	if (!spdk_uuid_is_null(&opts->uuid)) {
354  		spdk_uuid_copy(&null_disk->bdev.uuid, &opts->uuid);
355  	}
356  
357  	null_disk->bdev.ctxt = null_disk;
358  	null_disk->bdev.fn_table = &null_fn_table;
359  	null_disk->bdev.module = &null_if;
360  
361  	rc = spdk_bdev_register(&null_disk->bdev);
362  	if (rc) {
363  		free(null_disk->bdev.name);
364  		free(null_disk);
365  		return rc;
366  	}
367  
368  	*bdev = &(null_disk->bdev);
369  
370  	TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq);
371  
372  	return rc;
373  }
374  
375  void
376  bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg)
377  {
378  	int rc;
379  
380  	rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg);
381  	if (rc != 0) {
382  		cb_fn(cb_arg, rc);
383  	}
384  }
385  
386  static int
387  null_io_poll(void *arg)
388  {
389  	struct null_io_channel		*ch = arg;
390  	TAILQ_HEAD(, null_bdev_io)	io;
391  	struct null_bdev_io		*null_io;
392  
393  	TAILQ_INIT(&io);
394  	TAILQ_SWAP(&ch->io, &io, null_bdev_io, link);
395  
396  	if (TAILQ_EMPTY(&io)) {
397  		return SPDK_POLLER_IDLE;
398  	}
399  
400  	while (!TAILQ_EMPTY(&io)) {
401  		null_io = TAILQ_FIRST(&io);
402  		TAILQ_REMOVE(&io, null_io, link);
403  		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(null_io), SPDK_BDEV_IO_STATUS_SUCCESS);
404  	}
405  
406  	return SPDK_POLLER_BUSY;
407  }
408  
409  static int
410  null_bdev_create_cb(void *io_device, void *ctx_buf)
411  {
412  	struct null_io_channel *ch = ctx_buf;
413  
414  	TAILQ_INIT(&ch->io);
415  	ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0);
416  
417  	return 0;
418  }
419  
420  static void
421  null_bdev_destroy_cb(void *io_device, void *ctx_buf)
422  {
423  	struct null_io_channel *ch = ctx_buf;
424  
425  	spdk_poller_unregister(&ch->poller);
426  }
427  
428  static int
429  bdev_null_initialize(void)
430  {
431  	/*
432  	 * This will be used if upper layer expects us to allocate the read buffer.
433  	 *  Instead of using a real rbuf from the bdev pool, just always point to
434  	 *  this same zeroed buffer.
435  	 */
436  	g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL,
437  				       SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
438  	if (g_null_read_buf == NULL) {
439  		return -1;
440  	}
441  
442  	/*
443  	 * We need to pick some unique address as our "io device" - so just use the
444  	 *  address of the global tailq.
445  	 */
446  	spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb,
447  				sizeof(struct null_io_channel), "null_bdev");
448  
449  	return 0;
450  }
451  
452  static void
453  dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
454  {
455  }
456  
457  int
458  bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb)
459  {
460  	struct spdk_bdev_desc *desc;
461  	struct spdk_bdev *bdev;
462  	uint64_t current_size_in_mb;
463  	uint64_t new_size_in_byte;
464  	int rc = 0;
465  
466  	rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc);
467  	if (rc != 0) {
468  		SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name);
469  		return rc;
470  	}
471  
472  	bdev = spdk_bdev_desc_get_bdev(desc);
473  
474  	if (bdev->module != &null_if) {
475  		rc = -EINVAL;
476  		goto exit;
477  	}
478  
479  	current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
480  	if (new_size_in_mb < current_size_in_mb) {
481  		SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n");
482  		rc = -EINVAL;
483  		goto exit;
484  	}
485  
486  	new_size_in_byte = new_size_in_mb * 1024 * 1024;
487  
488  	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
489  	if (rc != 0) {
490  		SPDK_ERRLOG("failed to notify block cnt change.\n");
491  	}
492  
493  exit:
494  	spdk_bdev_close(desc);
495  	return rc;
496  }
497  
498  static void
499  _bdev_null_finish_cb(void *arg)
500  {
501  	spdk_free(g_null_read_buf);
502  	spdk_bdev_module_fini_done();
503  }
504  
505  static void
506  bdev_null_finish(void)
507  {
508  	if (g_null_read_buf == NULL) {
509  		spdk_bdev_module_fini_done();
510  		return;
511  	}
512  	spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb);
513  }
514  
515  SPDK_LOG_REGISTER_COMPONENT(bdev_null)
516