1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/bdev.h" 9 #include "spdk/env.h" 10 #include "spdk/thread.h" 11 #include "spdk/json.h" 12 #include "spdk/string.h" 13 #include "spdk/likely.h" 14 15 #include "spdk/bdev_module.h" 16 #include "spdk/log.h" 17 18 #include "bdev_null.h" 19 20 struct null_bdev_io { 21 TAILQ_ENTRY(null_bdev_io) link; 22 }; 23 24 struct null_bdev { 25 struct spdk_bdev bdev; 26 TAILQ_ENTRY(null_bdev) tailq; 27 }; 28 29 struct null_io_channel { 30 struct spdk_poller *poller; 31 TAILQ_HEAD(, null_bdev_io) io; 32 }; 33 34 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head); 35 static void *g_null_read_buf; 36 37 static int bdev_null_initialize(void); 38 static void bdev_null_finish(void); 39 40 static int 41 bdev_null_get_ctx_size(void) 42 { 43 return sizeof(struct null_bdev_io); 44 } 45 46 static struct spdk_bdev_module null_if = { 47 .name = "null", 48 .module_init = bdev_null_initialize, 49 .module_fini = bdev_null_finish, 50 .async_fini = true, 51 .get_ctx_size = bdev_null_get_ctx_size, 52 }; 53 54 SPDK_BDEV_MODULE_REGISTER(null, &null_if) 55 56 static int 57 bdev_null_destruct(void *ctx) 58 { 59 struct null_bdev *bdev = ctx; 60 61 TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq); 62 free(bdev->bdev.name); 63 free(bdev); 64 65 return 0; 66 } 67 68 static bool 69 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort) 70 { 71 struct null_bdev_io *null_io; 72 struct spdk_bdev_io *bdev_io; 73 74 TAILQ_FOREACH(null_io, &ch->io, link) { 75 bdev_io = spdk_bdev_io_from_ctx(null_io); 76 77 if (bdev_io == bio_to_abort) { 78 TAILQ_REMOVE(&ch->io, null_io, link); 79 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 80 return true; 81 } 82 } 83 84 return false; 85 } 86 87 static void 88 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) 89 { 90 struct null_bdev_io *null_io = (struct null_bdev_io *)bdev_io->driver_ctx; 91 struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch); 92 struct spdk_bdev *bdev = bdev_io->bdev; 93 struct spdk_dif_ctx dif_ctx; 94 struct spdk_dif_error err_blk; 95 int rc; 96 struct spdk_dif_ctx_init_ext_opts dif_opts; 97 98 if (SPDK_DIF_DISABLE != bdev->dif_type && 99 (SPDK_BDEV_IO_TYPE_READ == bdev_io->type || 100 SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) { 101 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 102 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 103 rc = spdk_dif_ctx_init(&dif_ctx, 104 bdev->blocklen, 105 bdev->md_len, 106 bdev->md_interleave, 107 bdev->dif_is_head_of_md, 108 bdev->dif_type, 109 bdev_io->u.bdev.dif_check_flags, 110 bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 111 0xFFFF, 0, 0, 0, &dif_opts); 112 if (0 != rc) { 113 SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc); 114 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 115 return; 116 } 117 } 118 119 switch (bdev_io->type) { 120 case SPDK_BDEV_IO_TYPE_READ: 121 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 122 assert(bdev_io->u.bdev.iovcnt == 1); 123 if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <= 124 SPDK_BDEV_LARGE_BUF_MAX_SIZE)) { 125 bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf; 126 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 127 } else { 128 SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n", 129 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 130 SPDK_BDEV_LARGE_BUF_MAX_SIZE); 131 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 132 return; 133 } 134 } 135 if (SPDK_DIF_DISABLE != bdev->dif_type) { 136 rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 137 bdev_io->u.bdev.num_blocks, &dif_ctx); 138 if (0 != rc) { 139 SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n", 140 bdev_io->u.bdev.offset_blocks, 141 bdev_io->u.bdev.num_blocks); 142 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 143 return; 144 } 145 } 146 TAILQ_INSERT_TAIL(&ch->io, null_io, link); 147 break; 148 case SPDK_BDEV_IO_TYPE_WRITE: 149 if (SPDK_DIF_DISABLE != bdev->dif_type) { 150 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 151 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 152 if (0 != rc) { 153 SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 154 "err_type %u, expected %lu, actual %lu, err_offset %u\n", 155 bdev_io->u.bdev.offset_blocks, 156 bdev_io->u.bdev.num_blocks, 157 err_blk.err_type, 158 err_blk.expected, 159 err_blk.actual, 160 err_blk.err_offset); 161 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 162 return; 163 } 164 } 165 TAILQ_INSERT_TAIL(&ch->io, null_io, link); 166 break; 167 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 168 case SPDK_BDEV_IO_TYPE_RESET: 169 TAILQ_INSERT_TAIL(&ch->io, null_io, link); 170 break; 171 case SPDK_BDEV_IO_TYPE_ABORT: 172 if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) { 173 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 174 } else { 175 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 176 } 177 break; 178 case SPDK_BDEV_IO_TYPE_FLUSH: 179 case SPDK_BDEV_IO_TYPE_UNMAP: 180 default: 181 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 182 break; 183 } 184 } 185 186 static bool 187 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 188 { 189 switch (io_type) { 190 case SPDK_BDEV_IO_TYPE_READ: 191 case SPDK_BDEV_IO_TYPE_WRITE: 192 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 193 case SPDK_BDEV_IO_TYPE_RESET: 194 case SPDK_BDEV_IO_TYPE_ABORT: 195 return true; 196 case SPDK_BDEV_IO_TYPE_FLUSH: 197 case SPDK_BDEV_IO_TYPE_UNMAP: 198 default: 199 return false; 200 } 201 } 202 203 static struct spdk_io_channel * 204 bdev_null_get_io_channel(void *ctx) 205 { 206 return spdk_get_io_channel(&g_null_bdev_head); 207 } 208 209 static void 210 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 211 { 212 spdk_json_write_object_begin(w); 213 214 spdk_json_write_named_string(w, "method", "bdev_null_create"); 215 216 spdk_json_write_named_object_begin(w, "params"); 217 spdk_json_write_named_string(w, "name", bdev->name); 218 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 219 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 220 spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen); 221 spdk_json_write_named_uint32(w, "md_size", bdev->md_len); 222 spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type); 223 spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md); 224 spdk_json_write_named_uuid(w, "uuid", &bdev->uuid); 225 spdk_json_write_object_end(w); 226 227 spdk_json_write_object_end(w); 228 } 229 230 static const struct spdk_bdev_fn_table null_fn_table = { 231 .destruct = bdev_null_destruct, 232 .submit_request = bdev_null_submit_request, 233 .io_type_supported = bdev_null_io_type_supported, 234 .get_io_channel = bdev_null_get_io_channel, 235 .write_config_json = bdev_null_write_config_json, 236 }; 237 238 int 239 bdev_null_create(struct spdk_bdev **bdev, const struct spdk_null_bdev_opts *opts) 240 { 241 struct null_bdev *null_disk; 242 uint32_t data_block_size; 243 int rc; 244 245 if (!opts) { 246 SPDK_ERRLOG("No options provided for Null bdev.\n"); 247 return -EINVAL; 248 } 249 250 switch (opts->md_size) { 251 case 0: 252 case 8: 253 case 16: 254 case 32: 255 case 64: 256 case 128: 257 break; 258 default: 259 SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size); 260 return -EINVAL; 261 } 262 263 if (opts->md_interleave) { 264 if (opts->block_size < opts->md_size) { 265 SPDK_ERRLOG("Interleaved metadata size can not be greater than block size.\n"); 266 return -EINVAL; 267 } 268 data_block_size = opts->block_size - opts->md_size; 269 } else { 270 if (opts->md_size != 0) { 271 SPDK_ERRLOG("Metadata in separate buffer is not supported\n"); 272 return -ENOTSUP; 273 } 274 data_block_size = opts->block_size; 275 } 276 277 if (data_block_size % 512 != 0) { 278 SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size); 279 return -EINVAL; 280 } 281 282 if (opts->num_blocks == 0) { 283 SPDK_ERRLOG("Disk must be more than 0 blocks\n"); 284 return -EINVAL; 285 } 286 287 null_disk = calloc(1, sizeof(*null_disk)); 288 if (!null_disk) { 289 SPDK_ERRLOG("could not allocate null_bdev\n"); 290 return -ENOMEM; 291 } 292 293 null_disk->bdev.name = strdup(opts->name); 294 if (!null_disk->bdev.name) { 295 free(null_disk); 296 return -ENOMEM; 297 } 298 null_disk->bdev.product_name = "Null disk"; 299 300 null_disk->bdev.write_cache = 0; 301 null_disk->bdev.blocklen = opts->block_size; 302 null_disk->bdev.phys_blocklen = opts->physical_block_size; 303 null_disk->bdev.blockcnt = opts->num_blocks; 304 null_disk->bdev.md_len = opts->md_size; 305 null_disk->bdev.md_interleave = opts->md_interleave; 306 null_disk->bdev.dif_type = opts->dif_type; 307 null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md; 308 /* Current block device layer API does not propagate 309 * any DIF related information from user. So, we can 310 * not generate or verify Application Tag. 311 */ 312 switch (opts->dif_type) { 313 case SPDK_DIF_TYPE1: 314 case SPDK_DIF_TYPE2: 315 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 316 SPDK_DIF_FLAGS_REFTAG_CHECK; 317 break; 318 case SPDK_DIF_TYPE3: 319 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 320 break; 321 case SPDK_DIF_DISABLE: 322 break; 323 } 324 325 null_disk->bdev.uuid = *opts->uuid; 326 null_disk->bdev.ctxt = null_disk; 327 null_disk->bdev.fn_table = &null_fn_table; 328 null_disk->bdev.module = &null_if; 329 330 rc = spdk_bdev_register(&null_disk->bdev); 331 if (rc) { 332 free(null_disk->bdev.name); 333 free(null_disk); 334 return rc; 335 } 336 337 *bdev = &(null_disk->bdev); 338 339 TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq); 340 341 return rc; 342 } 343 344 void 345 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg) 346 { 347 int rc; 348 349 rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg); 350 if (rc != 0) { 351 cb_fn(cb_arg, rc); 352 } 353 } 354 355 static int 356 null_io_poll(void *arg) 357 { 358 struct null_io_channel *ch = arg; 359 TAILQ_HEAD(, null_bdev_io) io; 360 struct null_bdev_io *null_io; 361 362 TAILQ_INIT(&io); 363 TAILQ_SWAP(&ch->io, &io, null_bdev_io, link); 364 365 if (TAILQ_EMPTY(&io)) { 366 return SPDK_POLLER_IDLE; 367 } 368 369 while (!TAILQ_EMPTY(&io)) { 370 null_io = TAILQ_FIRST(&io); 371 TAILQ_REMOVE(&io, null_io, link); 372 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(null_io), SPDK_BDEV_IO_STATUS_SUCCESS); 373 } 374 375 return SPDK_POLLER_BUSY; 376 } 377 378 static int 379 null_bdev_create_cb(void *io_device, void *ctx_buf) 380 { 381 struct null_io_channel *ch = ctx_buf; 382 383 TAILQ_INIT(&ch->io); 384 ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0); 385 386 return 0; 387 } 388 389 static void 390 null_bdev_destroy_cb(void *io_device, void *ctx_buf) 391 { 392 struct null_io_channel *ch = ctx_buf; 393 394 spdk_poller_unregister(&ch->poller); 395 } 396 397 static int 398 bdev_null_initialize(void) 399 { 400 /* 401 * This will be used if upper layer expects us to allocate the read buffer. 402 * Instead of using a real rbuf from the bdev pool, just always point to 403 * this same zeroed buffer. 404 */ 405 g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL, 406 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 407 if (g_null_read_buf == NULL) { 408 return -1; 409 } 410 411 /* 412 * We need to pick some unique address as our "io device" - so just use the 413 * address of the global tailq. 414 */ 415 spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb, 416 sizeof(struct null_io_channel), "null_bdev"); 417 418 return 0; 419 } 420 421 static void 422 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 423 { 424 } 425 426 int 427 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb) 428 { 429 struct spdk_bdev_desc *desc; 430 struct spdk_bdev *bdev; 431 uint64_t current_size_in_mb; 432 uint64_t new_size_in_byte; 433 int rc = 0; 434 435 rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc); 436 if (rc != 0) { 437 SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name); 438 return rc; 439 } 440 441 bdev = spdk_bdev_desc_get_bdev(desc); 442 443 if (bdev->module != &null_if) { 444 rc = -EINVAL; 445 goto exit; 446 } 447 448 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 449 if (new_size_in_mb < current_size_in_mb) { 450 SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n"); 451 rc = -EINVAL; 452 goto exit; 453 } 454 455 new_size_in_byte = new_size_in_mb * 1024 * 1024; 456 457 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 458 if (rc != 0) { 459 SPDK_ERRLOG("failed to notify block cnt change.\n"); 460 } 461 462 exit: 463 spdk_bdev_close(desc); 464 return rc; 465 } 466 467 static void 468 _bdev_null_finish_cb(void *arg) 469 { 470 spdk_free(g_null_read_buf); 471 spdk_bdev_module_fini_done(); 472 } 473 474 static void 475 bdev_null_finish(void) 476 { 477 if (g_null_read_buf == NULL) { 478 spdk_bdev_module_fini_done(); 479 return; 480 } 481 spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb); 482 } 483 484 SPDK_LOG_REGISTER_COMPONENT(bdev_null) 485