1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/bdev.h" 9 #include "spdk/env.h" 10 #include "spdk/thread.h" 11 #include "spdk/json.h" 12 #include "spdk/string.h" 13 #include "spdk/likely.h" 14 15 #include "spdk/bdev_module.h" 16 #include "spdk/log.h" 17 18 #include "bdev_null.h" 19 20 struct null_bdev { 21 struct spdk_bdev bdev; 22 TAILQ_ENTRY(null_bdev) tailq; 23 }; 24 25 struct null_io_channel { 26 struct spdk_poller *poller; 27 TAILQ_HEAD(, spdk_bdev_io) io; 28 }; 29 30 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head); 31 static void *g_null_read_buf; 32 33 static int bdev_null_initialize(void); 34 static void bdev_null_finish(void); 35 36 static struct spdk_bdev_module null_if = { 37 .name = "null", 38 .module_init = bdev_null_initialize, 39 .module_fini = bdev_null_finish, 40 .async_fini = true, 41 }; 42 43 SPDK_BDEV_MODULE_REGISTER(null, &null_if) 44 45 static int 46 bdev_null_destruct(void *ctx) 47 { 48 struct null_bdev *bdev = ctx; 49 50 TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq); 51 free(bdev->bdev.name); 52 free(bdev); 53 54 return 0; 55 } 56 57 static bool 58 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort) 59 { 60 struct spdk_bdev_io *bdev_io; 61 62 TAILQ_FOREACH(bdev_io, &ch->io, module_link) { 63 if (bdev_io == bio_to_abort) { 64 TAILQ_REMOVE(&ch->io, bio_to_abort, module_link); 65 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 66 return true; 67 } 68 } 69 70 return false; 71 } 72 73 static void 74 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) 75 { 76 struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch); 77 struct spdk_bdev *bdev = bdev_io->bdev; 78 struct spdk_dif_ctx dif_ctx; 79 struct spdk_dif_error err_blk; 80 int rc; 81 struct spdk_dif_ctx_init_ext_opts dif_opts; 82 83 if (SPDK_DIF_DISABLE != bdev->dif_type && 84 (SPDK_BDEV_IO_TYPE_READ == bdev_io->type || 85 SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) { 86 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 87 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 88 rc = spdk_dif_ctx_init(&dif_ctx, 89 bdev->blocklen, 90 bdev->md_len, 91 bdev->md_interleave, 92 bdev->dif_is_head_of_md, 93 bdev->dif_type, 94 bdev->dif_check_flags, 95 bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 96 0xFFFF, 0, 0, 0, &dif_opts); 97 if (0 != rc) { 98 SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc); 99 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 100 return; 101 } 102 } 103 104 switch (bdev_io->type) { 105 case SPDK_BDEV_IO_TYPE_READ: 106 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 107 assert(bdev_io->u.bdev.iovcnt == 1); 108 if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <= 109 SPDK_BDEV_LARGE_BUF_MAX_SIZE)) { 110 bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf; 111 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 112 } else { 113 SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n", 114 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 115 SPDK_BDEV_LARGE_BUF_MAX_SIZE); 116 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 117 return; 118 } 119 } 120 if (SPDK_DIF_DISABLE != bdev->dif_type) { 121 rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 122 bdev_io->u.bdev.num_blocks, &dif_ctx); 123 if (0 != rc) { 124 SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n", 125 bdev_io->u.bdev.offset_blocks, 126 bdev_io->u.bdev.num_blocks); 127 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 128 return; 129 } 130 } 131 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 132 break; 133 case SPDK_BDEV_IO_TYPE_WRITE: 134 if (SPDK_DIF_DISABLE != bdev->dif_type) { 135 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 136 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 137 if (0 != rc) { 138 SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 139 "err_type %u, expected %lu, actual %lu, err_offset %u\n", 140 bdev_io->u.bdev.offset_blocks, 141 bdev_io->u.bdev.num_blocks, 142 err_blk.err_type, 143 err_blk.expected, 144 err_blk.actual, 145 err_blk.err_offset); 146 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 147 return; 148 } 149 } 150 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 151 break; 152 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 153 case SPDK_BDEV_IO_TYPE_RESET: 154 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 155 break; 156 case SPDK_BDEV_IO_TYPE_ABORT: 157 if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) { 158 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 159 } else { 160 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 161 } 162 break; 163 case SPDK_BDEV_IO_TYPE_FLUSH: 164 case SPDK_BDEV_IO_TYPE_UNMAP: 165 default: 166 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 167 break; 168 } 169 } 170 171 static bool 172 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 173 { 174 switch (io_type) { 175 case SPDK_BDEV_IO_TYPE_READ: 176 case SPDK_BDEV_IO_TYPE_WRITE: 177 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 178 case SPDK_BDEV_IO_TYPE_RESET: 179 case SPDK_BDEV_IO_TYPE_ABORT: 180 return true; 181 case SPDK_BDEV_IO_TYPE_FLUSH: 182 case SPDK_BDEV_IO_TYPE_UNMAP: 183 default: 184 return false; 185 } 186 } 187 188 static struct spdk_io_channel * 189 bdev_null_get_io_channel(void *ctx) 190 { 191 return spdk_get_io_channel(&g_null_bdev_head); 192 } 193 194 static void 195 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 196 { 197 spdk_json_write_object_begin(w); 198 199 spdk_json_write_named_string(w, "method", "bdev_null_create"); 200 201 spdk_json_write_named_object_begin(w, "params"); 202 spdk_json_write_named_string(w, "name", bdev->name); 203 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 204 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 205 spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen); 206 spdk_json_write_named_uint32(w, "md_size", bdev->md_len); 207 spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type); 208 spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md); 209 spdk_json_write_named_uuid(w, "uuid", &bdev->uuid); 210 spdk_json_write_object_end(w); 211 212 spdk_json_write_object_end(w); 213 } 214 215 static const struct spdk_bdev_fn_table null_fn_table = { 216 .destruct = bdev_null_destruct, 217 .submit_request = bdev_null_submit_request, 218 .io_type_supported = bdev_null_io_type_supported, 219 .get_io_channel = bdev_null_get_io_channel, 220 .write_config_json = bdev_null_write_config_json, 221 }; 222 223 int 224 bdev_null_create(struct spdk_bdev **bdev, const struct spdk_null_bdev_opts *opts) 225 { 226 struct null_bdev *null_disk; 227 uint32_t data_block_size; 228 int rc; 229 230 if (!opts) { 231 SPDK_ERRLOG("No options provided for Null bdev.\n"); 232 return -EINVAL; 233 } 234 235 switch (opts->md_size) { 236 case 0: 237 case 8: 238 case 16: 239 case 32: 240 case 64: 241 case 128: 242 break; 243 default: 244 SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size); 245 return -EINVAL; 246 } 247 248 if (opts->md_interleave) { 249 if (opts->block_size < opts->md_size) { 250 SPDK_ERRLOG("Interleaved metadata size can not be greater than block size.\n"); 251 return -EINVAL; 252 } 253 data_block_size = opts->block_size - opts->md_size; 254 } else { 255 if (opts->md_size != 0) { 256 SPDK_ERRLOG("Metadata in separate buffer is not supported\n"); 257 return -ENOTSUP; 258 } 259 data_block_size = opts->block_size; 260 } 261 262 if (data_block_size % 512 != 0) { 263 SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size); 264 return -EINVAL; 265 } 266 267 if (opts->num_blocks == 0) { 268 SPDK_ERRLOG("Disk must be more than 0 blocks\n"); 269 return -EINVAL; 270 } 271 272 null_disk = calloc(1, sizeof(*null_disk)); 273 if (!null_disk) { 274 SPDK_ERRLOG("could not allocate null_bdev\n"); 275 return -ENOMEM; 276 } 277 278 null_disk->bdev.name = strdup(opts->name); 279 if (!null_disk->bdev.name) { 280 free(null_disk); 281 return -ENOMEM; 282 } 283 null_disk->bdev.product_name = "Null disk"; 284 285 null_disk->bdev.write_cache = 0; 286 null_disk->bdev.blocklen = opts->block_size; 287 null_disk->bdev.phys_blocklen = opts->physical_block_size; 288 null_disk->bdev.blockcnt = opts->num_blocks; 289 null_disk->bdev.md_len = opts->md_size; 290 null_disk->bdev.md_interleave = opts->md_interleave; 291 null_disk->bdev.dif_type = opts->dif_type; 292 null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md; 293 /* Current block device layer API does not propagate 294 * any DIF related information from user. So, we can 295 * not generate or verify Application Tag. 296 */ 297 switch (opts->dif_type) { 298 case SPDK_DIF_TYPE1: 299 case SPDK_DIF_TYPE2: 300 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 301 SPDK_DIF_FLAGS_REFTAG_CHECK; 302 break; 303 case SPDK_DIF_TYPE3: 304 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 305 break; 306 case SPDK_DIF_DISABLE: 307 break; 308 } 309 310 null_disk->bdev.uuid = *opts->uuid; 311 null_disk->bdev.ctxt = null_disk; 312 null_disk->bdev.fn_table = &null_fn_table; 313 null_disk->bdev.module = &null_if; 314 315 rc = spdk_bdev_register(&null_disk->bdev); 316 if (rc) { 317 free(null_disk->bdev.name); 318 free(null_disk); 319 return rc; 320 } 321 322 *bdev = &(null_disk->bdev); 323 324 TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq); 325 326 return rc; 327 } 328 329 void 330 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg) 331 { 332 int rc; 333 334 rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg); 335 if (rc != 0) { 336 cb_fn(cb_arg, rc); 337 } 338 } 339 340 static int 341 null_io_poll(void *arg) 342 { 343 struct null_io_channel *ch = arg; 344 TAILQ_HEAD(, spdk_bdev_io) io; 345 struct spdk_bdev_io *bdev_io; 346 347 TAILQ_INIT(&io); 348 TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link); 349 350 if (TAILQ_EMPTY(&io)) { 351 return SPDK_POLLER_IDLE; 352 } 353 354 while (!TAILQ_EMPTY(&io)) { 355 bdev_io = TAILQ_FIRST(&io); 356 TAILQ_REMOVE(&io, bdev_io, module_link); 357 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 358 } 359 360 return SPDK_POLLER_BUSY; 361 } 362 363 static int 364 null_bdev_create_cb(void *io_device, void *ctx_buf) 365 { 366 struct null_io_channel *ch = ctx_buf; 367 368 TAILQ_INIT(&ch->io); 369 ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0); 370 371 return 0; 372 } 373 374 static void 375 null_bdev_destroy_cb(void *io_device, void *ctx_buf) 376 { 377 struct null_io_channel *ch = ctx_buf; 378 379 spdk_poller_unregister(&ch->poller); 380 } 381 382 static int 383 bdev_null_initialize(void) 384 { 385 /* 386 * This will be used if upper layer expects us to allocate the read buffer. 387 * Instead of using a real rbuf from the bdev pool, just always point to 388 * this same zeroed buffer. 389 */ 390 g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL, 391 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 392 if (g_null_read_buf == NULL) { 393 return -1; 394 } 395 396 /* 397 * We need to pick some unique address as our "io device" - so just use the 398 * address of the global tailq. 399 */ 400 spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb, 401 sizeof(struct null_io_channel), "null_bdev"); 402 403 return 0; 404 } 405 406 static void 407 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 408 { 409 } 410 411 int 412 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb) 413 { 414 struct spdk_bdev_desc *desc; 415 struct spdk_bdev *bdev; 416 uint64_t current_size_in_mb; 417 uint64_t new_size_in_byte; 418 int rc = 0; 419 420 rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc); 421 if (rc != 0) { 422 SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name); 423 return rc; 424 } 425 426 bdev = spdk_bdev_desc_get_bdev(desc); 427 428 if (bdev->module != &null_if) { 429 rc = -EINVAL; 430 goto exit; 431 } 432 433 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 434 if (new_size_in_mb < current_size_in_mb) { 435 SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n"); 436 rc = -EINVAL; 437 goto exit; 438 } 439 440 new_size_in_byte = new_size_in_mb * 1024 * 1024; 441 442 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 443 if (rc != 0) { 444 SPDK_ERRLOG("failed to notify block cnt change.\n"); 445 } 446 447 exit: 448 spdk_bdev_close(desc); 449 return rc; 450 } 451 452 static void 453 _bdev_null_finish_cb(void *arg) 454 { 455 spdk_free(g_null_read_buf); 456 spdk_bdev_module_fini_done(); 457 } 458 459 static void 460 bdev_null_finish(void) 461 { 462 if (g_null_read_buf == NULL) { 463 spdk_bdev_module_fini_done(); 464 return; 465 } 466 spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb); 467 } 468 469 SPDK_LOG_REGISTER_COMPONENT(bdev_null) 470