1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/bdev.h" 9 #include "spdk/env.h" 10 #include "spdk/thread.h" 11 #include "spdk/json.h" 12 #include "spdk/string.h" 13 #include "spdk/likely.h" 14 15 #include "spdk/bdev_module.h" 16 #include "spdk/log.h" 17 18 #include "bdev_null.h" 19 20 struct null_bdev { 21 struct spdk_bdev bdev; 22 TAILQ_ENTRY(null_bdev) tailq; 23 }; 24 25 struct null_io_channel { 26 struct spdk_poller *poller; 27 TAILQ_HEAD(, spdk_bdev_io) io; 28 }; 29 30 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head); 31 static void *g_null_read_buf; 32 33 static int bdev_null_initialize(void); 34 static void bdev_null_finish(void); 35 36 static struct spdk_bdev_module null_if = { 37 .name = "null", 38 .module_init = bdev_null_initialize, 39 .module_fini = bdev_null_finish, 40 .async_fini = true, 41 }; 42 43 SPDK_BDEV_MODULE_REGISTER(null, &null_if) 44 45 static int 46 bdev_null_destruct(void *ctx) 47 { 48 struct null_bdev *bdev = ctx; 49 50 TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq); 51 free(bdev->bdev.name); 52 free(bdev); 53 54 return 0; 55 } 56 57 static bool 58 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort) 59 { 60 struct spdk_bdev_io *bdev_io; 61 62 TAILQ_FOREACH(bdev_io, &ch->io, module_link) { 63 if (bdev_io == bio_to_abort) { 64 TAILQ_REMOVE(&ch->io, bio_to_abort, module_link); 65 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 66 return true; 67 } 68 } 69 70 return false; 71 } 72 73 static void 74 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) 75 { 76 struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch); 77 struct spdk_bdev *bdev = bdev_io->bdev; 78 struct spdk_dif_ctx dif_ctx; 79 struct spdk_dif_error err_blk; 80 int rc; 81 struct spdk_dif_ctx_init_ext_opts dif_opts; 82 83 if (SPDK_DIF_DISABLE != bdev->dif_type && 84 (SPDK_BDEV_IO_TYPE_READ == bdev_io->type || 85 SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) { 86 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 87 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 88 rc = spdk_dif_ctx_init(&dif_ctx, 89 bdev->blocklen, 90 bdev->md_len, 91 bdev->md_interleave, 92 bdev->dif_is_head_of_md, 93 bdev->dif_type, 94 bdev->dif_check_flags, 95 bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 96 0xFFFF, 0, 0, 0, &dif_opts); 97 if (0 != rc) { 98 SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc); 99 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 100 return; 101 } 102 } 103 104 switch (bdev_io->type) { 105 case SPDK_BDEV_IO_TYPE_READ: 106 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 107 assert(bdev_io->u.bdev.iovcnt == 1); 108 if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <= 109 SPDK_BDEV_LARGE_BUF_MAX_SIZE)) { 110 bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf; 111 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 112 } else { 113 SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n", 114 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 115 SPDK_BDEV_LARGE_BUF_MAX_SIZE); 116 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 117 return; 118 } 119 } 120 if (SPDK_DIF_DISABLE != bdev->dif_type) { 121 rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 122 bdev_io->u.bdev.num_blocks, &dif_ctx); 123 if (0 != rc) { 124 SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n", 125 bdev_io->u.bdev.offset_blocks, 126 bdev_io->u.bdev.num_blocks); 127 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 128 return; 129 } 130 } 131 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 132 break; 133 case SPDK_BDEV_IO_TYPE_WRITE: 134 if (SPDK_DIF_DISABLE != bdev->dif_type) { 135 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 136 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 137 if (0 != rc) { 138 SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 139 "err_type %u, expected %lu, actual %lu, err_offset %u\n", 140 bdev_io->u.bdev.offset_blocks, 141 bdev_io->u.bdev.num_blocks, 142 err_blk.err_type, 143 err_blk.expected, 144 err_blk.actual, 145 err_blk.err_offset); 146 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 147 return; 148 } 149 } 150 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 151 break; 152 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 153 case SPDK_BDEV_IO_TYPE_RESET: 154 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 155 break; 156 case SPDK_BDEV_IO_TYPE_ABORT: 157 if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) { 158 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 159 } else { 160 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 161 } 162 break; 163 case SPDK_BDEV_IO_TYPE_FLUSH: 164 case SPDK_BDEV_IO_TYPE_UNMAP: 165 default: 166 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 167 break; 168 } 169 } 170 171 static bool 172 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 173 { 174 switch (io_type) { 175 case SPDK_BDEV_IO_TYPE_READ: 176 case SPDK_BDEV_IO_TYPE_WRITE: 177 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 178 case SPDK_BDEV_IO_TYPE_RESET: 179 case SPDK_BDEV_IO_TYPE_ABORT: 180 return true; 181 case SPDK_BDEV_IO_TYPE_FLUSH: 182 case SPDK_BDEV_IO_TYPE_UNMAP: 183 default: 184 return false; 185 } 186 } 187 188 static struct spdk_io_channel * 189 bdev_null_get_io_channel(void *ctx) 190 { 191 return spdk_get_io_channel(&g_null_bdev_head); 192 } 193 194 static void 195 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 196 { 197 char uuid_str[SPDK_UUID_STRING_LEN]; 198 199 spdk_json_write_object_begin(w); 200 201 spdk_json_write_named_string(w, "method", "bdev_null_create"); 202 203 spdk_json_write_named_object_begin(w, "params"); 204 spdk_json_write_named_string(w, "name", bdev->name); 205 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 206 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 207 spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen); 208 spdk_json_write_named_uint32(w, "md_size", bdev->md_len); 209 spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type); 210 spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md); 211 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 212 spdk_json_write_named_string(w, "uuid", uuid_str); 213 spdk_json_write_object_end(w); 214 215 spdk_json_write_object_end(w); 216 } 217 218 static const struct spdk_bdev_fn_table null_fn_table = { 219 .destruct = bdev_null_destruct, 220 .submit_request = bdev_null_submit_request, 221 .io_type_supported = bdev_null_io_type_supported, 222 .get_io_channel = bdev_null_get_io_channel, 223 .write_config_json = bdev_null_write_config_json, 224 }; 225 226 int 227 bdev_null_create(struct spdk_bdev **bdev, const struct spdk_null_bdev_opts *opts) 228 { 229 struct null_bdev *null_disk; 230 uint32_t data_block_size; 231 int rc; 232 233 if (!opts) { 234 SPDK_ERRLOG("No options provided for Null bdev.\n"); 235 return -EINVAL; 236 } 237 238 if (opts->md_interleave) { 239 if (opts->block_size < opts->md_size) { 240 SPDK_ERRLOG("Interleaved metadata size can not be greater than block size.\n"); 241 return -EINVAL; 242 } 243 data_block_size = opts->block_size - opts->md_size; 244 } else { 245 if (opts->md_size != 0) { 246 SPDK_ERRLOG("Metadata in separate buffer is not supported\n"); 247 return -ENOTSUP; 248 } 249 data_block_size = opts->block_size; 250 } 251 252 if (data_block_size % 512 != 0) { 253 SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size); 254 return -EINVAL; 255 } 256 257 if (opts->num_blocks == 0) { 258 SPDK_ERRLOG("Disk must be more than 0 blocks\n"); 259 return -EINVAL; 260 } 261 262 null_disk = calloc(1, sizeof(*null_disk)); 263 if (!null_disk) { 264 SPDK_ERRLOG("could not allocate null_bdev\n"); 265 return -ENOMEM; 266 } 267 268 null_disk->bdev.name = strdup(opts->name); 269 if (!null_disk->bdev.name) { 270 free(null_disk); 271 return -ENOMEM; 272 } 273 null_disk->bdev.product_name = "Null disk"; 274 275 null_disk->bdev.write_cache = 0; 276 null_disk->bdev.blocklen = opts->block_size; 277 null_disk->bdev.phys_blocklen = opts->physical_block_size; 278 null_disk->bdev.blockcnt = opts->num_blocks; 279 null_disk->bdev.md_len = opts->md_size; 280 null_disk->bdev.md_interleave = opts->md_interleave; 281 null_disk->bdev.dif_type = opts->dif_type; 282 null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md; 283 /* Current block device layer API does not propagate 284 * any DIF related information from user. So, we can 285 * not generate or verify Application Tag. 286 */ 287 switch (opts->dif_type) { 288 case SPDK_DIF_TYPE1: 289 case SPDK_DIF_TYPE2: 290 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 291 SPDK_DIF_FLAGS_REFTAG_CHECK; 292 break; 293 case SPDK_DIF_TYPE3: 294 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 295 break; 296 case SPDK_DIF_DISABLE: 297 break; 298 } 299 if (opts->uuid) { 300 null_disk->bdev.uuid = *opts->uuid; 301 } 302 303 null_disk->bdev.ctxt = null_disk; 304 null_disk->bdev.fn_table = &null_fn_table; 305 null_disk->bdev.module = &null_if; 306 307 rc = spdk_bdev_register(&null_disk->bdev); 308 if (rc) { 309 free(null_disk->bdev.name); 310 free(null_disk); 311 return rc; 312 } 313 314 *bdev = &(null_disk->bdev); 315 316 TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq); 317 318 return rc; 319 } 320 321 void 322 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg) 323 { 324 int rc; 325 326 rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg); 327 if (rc != 0) { 328 cb_fn(cb_arg, rc); 329 } 330 } 331 332 static int 333 null_io_poll(void *arg) 334 { 335 struct null_io_channel *ch = arg; 336 TAILQ_HEAD(, spdk_bdev_io) io; 337 struct spdk_bdev_io *bdev_io; 338 339 TAILQ_INIT(&io); 340 TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link); 341 342 if (TAILQ_EMPTY(&io)) { 343 return SPDK_POLLER_IDLE; 344 } 345 346 while (!TAILQ_EMPTY(&io)) { 347 bdev_io = TAILQ_FIRST(&io); 348 TAILQ_REMOVE(&io, bdev_io, module_link); 349 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 350 } 351 352 return SPDK_POLLER_BUSY; 353 } 354 355 static int 356 null_bdev_create_cb(void *io_device, void *ctx_buf) 357 { 358 struct null_io_channel *ch = ctx_buf; 359 360 TAILQ_INIT(&ch->io); 361 ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0); 362 363 return 0; 364 } 365 366 static void 367 null_bdev_destroy_cb(void *io_device, void *ctx_buf) 368 { 369 struct null_io_channel *ch = ctx_buf; 370 371 spdk_poller_unregister(&ch->poller); 372 } 373 374 static int 375 bdev_null_initialize(void) 376 { 377 /* 378 * This will be used if upper layer expects us to allocate the read buffer. 379 * Instead of using a real rbuf from the bdev pool, just always point to 380 * this same zeroed buffer. 381 */ 382 g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL, 383 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 384 if (g_null_read_buf == NULL) { 385 return -1; 386 } 387 388 /* 389 * We need to pick some unique address as our "io device" - so just use the 390 * address of the global tailq. 391 */ 392 spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb, 393 sizeof(struct null_io_channel), "null_bdev"); 394 395 return 0; 396 } 397 398 static void 399 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 400 { 401 } 402 403 int 404 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb) 405 { 406 struct spdk_bdev_desc *desc; 407 struct spdk_bdev *bdev; 408 uint64_t current_size_in_mb; 409 uint64_t new_size_in_byte; 410 int rc = 0; 411 412 rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc); 413 if (rc != 0) { 414 SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name); 415 return rc; 416 } 417 418 bdev = spdk_bdev_desc_get_bdev(desc); 419 420 if (bdev->module != &null_if) { 421 rc = -EINVAL; 422 goto exit; 423 } 424 425 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 426 if (new_size_in_mb < current_size_in_mb) { 427 SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n"); 428 rc = -EINVAL; 429 goto exit; 430 } 431 432 new_size_in_byte = new_size_in_mb * 1024 * 1024; 433 434 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 435 if (rc != 0) { 436 SPDK_ERRLOG("failed to notify block cnt change.\n"); 437 } 438 439 exit: 440 spdk_bdev_close(desc); 441 return rc; 442 } 443 444 static void 445 _bdev_null_finish_cb(void *arg) 446 { 447 spdk_free(g_null_read_buf); 448 spdk_bdev_module_fini_done(); 449 } 450 451 static void 452 bdev_null_finish(void) 453 { 454 if (g_null_read_buf == NULL) { 455 spdk_bdev_module_fini_done(); 456 return; 457 } 458 spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb); 459 } 460 461 SPDK_LOG_REGISTER_COMPONENT(bdev_null) 462