1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 */ 5 6 #include "spdk/stdinc.h" 7 8 #include "spdk/bdev.h" 9 #include "spdk/env.h" 10 #include "spdk/thread.h" 11 #include "spdk/json.h" 12 #include "spdk/string.h" 13 #include "spdk/likely.h" 14 15 #include "spdk/bdev_module.h" 16 #include "spdk/log.h" 17 18 #include "bdev_null.h" 19 20 struct null_bdev { 21 struct spdk_bdev bdev; 22 TAILQ_ENTRY(null_bdev) tailq; 23 }; 24 25 struct null_io_channel { 26 struct spdk_poller *poller; 27 TAILQ_HEAD(, spdk_bdev_io) io; 28 }; 29 30 static TAILQ_HEAD(, null_bdev) g_null_bdev_head = TAILQ_HEAD_INITIALIZER(g_null_bdev_head); 31 static void *g_null_read_buf; 32 33 static int bdev_null_initialize(void); 34 static void bdev_null_finish(void); 35 36 static struct spdk_bdev_module null_if = { 37 .name = "null", 38 .module_init = bdev_null_initialize, 39 .module_fini = bdev_null_finish, 40 .async_fini = true, 41 }; 42 43 SPDK_BDEV_MODULE_REGISTER(null, &null_if) 44 45 static int 46 bdev_null_destruct(void *ctx) 47 { 48 struct null_bdev *bdev = ctx; 49 50 TAILQ_REMOVE(&g_null_bdev_head, bdev, tailq); 51 free(bdev->bdev.name); 52 free(bdev); 53 54 return 0; 55 } 56 57 static bool 58 bdev_null_abort_io(struct null_io_channel *ch, struct spdk_bdev_io *bio_to_abort) 59 { 60 struct spdk_bdev_io *bdev_io; 61 62 TAILQ_FOREACH(bdev_io, &ch->io, module_link) { 63 if (bdev_io == bio_to_abort) { 64 TAILQ_REMOVE(&ch->io, bio_to_abort, module_link); 65 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 66 return true; 67 } 68 } 69 70 return false; 71 } 72 73 static void 74 bdev_null_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io) 75 { 76 struct null_io_channel *ch = spdk_io_channel_get_ctx(_ch); 77 struct spdk_bdev *bdev = bdev_io->bdev; 78 struct spdk_dif_ctx dif_ctx; 79 struct spdk_dif_error err_blk; 80 int rc; 81 82 if (SPDK_DIF_DISABLE != bdev->dif_type && 83 (SPDK_BDEV_IO_TYPE_READ == bdev_io->type || 84 SPDK_BDEV_IO_TYPE_WRITE == bdev_io->type)) { 85 rc = spdk_dif_ctx_init(&dif_ctx, 86 bdev->blocklen, 87 bdev->md_len, 88 bdev->md_interleave, 89 bdev->dif_is_head_of_md, 90 bdev->dif_type, 91 bdev->dif_check_flags, 92 bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 93 0xFFFF, 0, 0, 0); 94 if (0 != rc) { 95 SPDK_ERRLOG("Failed to initialize DIF context, error %d\n", rc); 96 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 97 return; 98 } 99 } 100 101 switch (bdev_io->type) { 102 case SPDK_BDEV_IO_TYPE_READ: 103 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 104 assert(bdev_io->u.bdev.iovcnt == 1); 105 if (spdk_likely(bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen <= 106 SPDK_BDEV_LARGE_BUF_MAX_SIZE)) { 107 bdev_io->u.bdev.iovs[0].iov_base = g_null_read_buf; 108 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 109 } else { 110 SPDK_ERRLOG("Overflow occurred. Read I/O size %" PRIu64 " was larger than permitted %d\n", 111 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 112 SPDK_BDEV_LARGE_BUF_MAX_SIZE); 113 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 114 return; 115 } 116 } 117 if (SPDK_DIF_DISABLE != bdev->dif_type) { 118 rc = spdk_dif_generate(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 119 bdev_io->u.bdev.num_blocks, &dif_ctx); 120 if (0 != rc) { 121 SPDK_ERRLOG("IO DIF generation failed: lba %" PRIu64 ", num_block %" PRIu64 "\n", 122 bdev_io->u.bdev.offset_blocks, 123 bdev_io->u.bdev.num_blocks); 124 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 125 return; 126 } 127 } 128 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 129 break; 130 case SPDK_BDEV_IO_TYPE_WRITE: 131 if (SPDK_DIF_DISABLE != bdev->dif_type) { 132 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 133 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); 134 if (0 != rc) { 135 SPDK_ERRLOG("IO DIF verification failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 136 "err_type %u, expected %u, actual %u, err_offset %u\n", 137 bdev_io->u.bdev.offset_blocks, 138 bdev_io->u.bdev.num_blocks, 139 err_blk.err_type, 140 err_blk.expected, 141 err_blk.actual, 142 err_blk.err_offset); 143 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 144 return; 145 } 146 } 147 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 148 break; 149 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 150 case SPDK_BDEV_IO_TYPE_RESET: 151 TAILQ_INSERT_TAIL(&ch->io, bdev_io, module_link); 152 break; 153 case SPDK_BDEV_IO_TYPE_ABORT: 154 if (bdev_null_abort_io(ch, bdev_io->u.abort.bio_to_abort)) { 155 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 156 } else { 157 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 158 } 159 break; 160 case SPDK_BDEV_IO_TYPE_FLUSH: 161 case SPDK_BDEV_IO_TYPE_UNMAP: 162 default: 163 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 164 break; 165 } 166 } 167 168 static bool 169 bdev_null_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 170 { 171 switch (io_type) { 172 case SPDK_BDEV_IO_TYPE_READ: 173 case SPDK_BDEV_IO_TYPE_WRITE: 174 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 175 case SPDK_BDEV_IO_TYPE_RESET: 176 case SPDK_BDEV_IO_TYPE_ABORT: 177 return true; 178 case SPDK_BDEV_IO_TYPE_FLUSH: 179 case SPDK_BDEV_IO_TYPE_UNMAP: 180 default: 181 return false; 182 } 183 } 184 185 static struct spdk_io_channel * 186 bdev_null_get_io_channel(void *ctx) 187 { 188 return spdk_get_io_channel(&g_null_bdev_head); 189 } 190 191 static void 192 bdev_null_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 193 { 194 char uuid_str[SPDK_UUID_STRING_LEN]; 195 196 spdk_json_write_object_begin(w); 197 198 spdk_json_write_named_string(w, "method", "bdev_null_create"); 199 200 spdk_json_write_named_object_begin(w, "params"); 201 spdk_json_write_named_string(w, "name", bdev->name); 202 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 203 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 204 spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen); 205 spdk_json_write_named_uint32(w, "md_size", bdev->md_len); 206 spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type); 207 spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md); 208 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 209 spdk_json_write_named_string(w, "uuid", uuid_str); 210 spdk_json_write_object_end(w); 211 212 spdk_json_write_object_end(w); 213 } 214 215 static const struct spdk_bdev_fn_table null_fn_table = { 216 .destruct = bdev_null_destruct, 217 .submit_request = bdev_null_submit_request, 218 .io_type_supported = bdev_null_io_type_supported, 219 .get_io_channel = bdev_null_get_io_channel, 220 .write_config_json = bdev_null_write_config_json, 221 }; 222 223 int 224 bdev_null_create(struct spdk_bdev **bdev, const struct spdk_null_bdev_opts *opts) 225 { 226 struct null_bdev *null_disk; 227 uint32_t data_block_size; 228 int rc; 229 230 if (!opts) { 231 SPDK_ERRLOG("No options provided for Null bdev.\n"); 232 return -EINVAL; 233 } 234 235 if (opts->md_interleave) { 236 if (opts->block_size < opts->md_size) { 237 SPDK_ERRLOG("Interleaved metadata size can not be greater than block size.\n"); 238 return -EINVAL; 239 } 240 data_block_size = opts->block_size - opts->md_size; 241 } else { 242 if (opts->md_size != 0) { 243 SPDK_ERRLOG("Metadata in separate buffer is not supported\n"); 244 return -ENOTSUP; 245 } 246 data_block_size = opts->block_size; 247 } 248 249 if (data_block_size % 512 != 0) { 250 SPDK_ERRLOG("Data block size %u is not a multiple of 512.\n", opts->block_size); 251 return -EINVAL; 252 } 253 254 if (opts->num_blocks == 0) { 255 SPDK_ERRLOG("Disk must be more than 0 blocks\n"); 256 return -EINVAL; 257 } 258 259 null_disk = calloc(1, sizeof(*null_disk)); 260 if (!null_disk) { 261 SPDK_ERRLOG("could not allocate null_bdev\n"); 262 return -ENOMEM; 263 } 264 265 null_disk->bdev.name = strdup(opts->name); 266 if (!null_disk->bdev.name) { 267 free(null_disk); 268 return -ENOMEM; 269 } 270 null_disk->bdev.product_name = "Null disk"; 271 272 null_disk->bdev.write_cache = 0; 273 null_disk->bdev.blocklen = opts->block_size; 274 null_disk->bdev.phys_blocklen = opts->physical_block_size; 275 null_disk->bdev.blockcnt = opts->num_blocks; 276 null_disk->bdev.md_len = opts->md_size; 277 null_disk->bdev.md_interleave = opts->md_interleave; 278 null_disk->bdev.dif_type = opts->dif_type; 279 null_disk->bdev.dif_is_head_of_md = opts->dif_is_head_of_md; 280 /* Current block device layer API does not propagate 281 * any DIF related information from user. So, we can 282 * not generate or verify Application Tag. 283 */ 284 switch (opts->dif_type) { 285 case SPDK_DIF_TYPE1: 286 case SPDK_DIF_TYPE2: 287 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 288 SPDK_DIF_FLAGS_REFTAG_CHECK; 289 break; 290 case SPDK_DIF_TYPE3: 291 null_disk->bdev.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 292 break; 293 case SPDK_DIF_DISABLE: 294 break; 295 } 296 if (opts->uuid) { 297 null_disk->bdev.uuid = *opts->uuid; 298 } else { 299 spdk_uuid_generate(&null_disk->bdev.uuid); 300 } 301 302 null_disk->bdev.ctxt = null_disk; 303 null_disk->bdev.fn_table = &null_fn_table; 304 null_disk->bdev.module = &null_if; 305 306 rc = spdk_bdev_register(&null_disk->bdev); 307 if (rc) { 308 free(null_disk->bdev.name); 309 free(null_disk); 310 return rc; 311 } 312 313 *bdev = &(null_disk->bdev); 314 315 TAILQ_INSERT_TAIL(&g_null_bdev_head, null_disk, tailq); 316 317 return rc; 318 } 319 320 void 321 bdev_null_delete(const char *bdev_name, spdk_delete_null_complete cb_fn, void *cb_arg) 322 { 323 int rc; 324 325 rc = spdk_bdev_unregister_by_name(bdev_name, &null_if, cb_fn, cb_arg); 326 if (rc != 0) { 327 cb_fn(cb_arg, rc); 328 } 329 } 330 331 static int 332 null_io_poll(void *arg) 333 { 334 struct null_io_channel *ch = arg; 335 TAILQ_HEAD(, spdk_bdev_io) io; 336 struct spdk_bdev_io *bdev_io; 337 338 TAILQ_INIT(&io); 339 TAILQ_SWAP(&ch->io, &io, spdk_bdev_io, module_link); 340 341 if (TAILQ_EMPTY(&io)) { 342 return SPDK_POLLER_IDLE; 343 } 344 345 while (!TAILQ_EMPTY(&io)) { 346 bdev_io = TAILQ_FIRST(&io); 347 TAILQ_REMOVE(&io, bdev_io, module_link); 348 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 349 } 350 351 return SPDK_POLLER_BUSY; 352 } 353 354 static int 355 null_bdev_create_cb(void *io_device, void *ctx_buf) 356 { 357 struct null_io_channel *ch = ctx_buf; 358 359 TAILQ_INIT(&ch->io); 360 ch->poller = SPDK_POLLER_REGISTER(null_io_poll, ch, 0); 361 362 return 0; 363 } 364 365 static void 366 null_bdev_destroy_cb(void *io_device, void *ctx_buf) 367 { 368 struct null_io_channel *ch = ctx_buf; 369 370 spdk_poller_unregister(&ch->poller); 371 } 372 373 static int 374 bdev_null_initialize(void) 375 { 376 /* 377 * This will be used if upper layer expects us to allocate the read buffer. 378 * Instead of using a real rbuf from the bdev pool, just always point to 379 * this same zeroed buffer. 380 */ 381 g_null_read_buf = spdk_zmalloc(SPDK_BDEV_LARGE_BUF_MAX_SIZE, 0, NULL, 382 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 383 if (g_null_read_buf == NULL) { 384 return -1; 385 } 386 387 /* 388 * We need to pick some unique address as our "io device" - so just use the 389 * address of the global tailq. 390 */ 391 spdk_io_device_register(&g_null_bdev_head, null_bdev_create_cb, null_bdev_destroy_cb, 392 sizeof(struct null_io_channel), "null_bdev"); 393 394 return 0; 395 } 396 397 static void 398 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 399 { 400 } 401 402 int 403 bdev_null_resize(const char *bdev_name, const uint64_t new_size_in_mb) 404 { 405 struct spdk_bdev_desc *desc; 406 struct spdk_bdev *bdev; 407 uint64_t current_size_in_mb; 408 uint64_t new_size_in_byte; 409 int rc = 0; 410 411 rc = spdk_bdev_open_ext(bdev_name, false, dummy_bdev_event_cb, NULL, &desc); 412 if (rc != 0) { 413 SPDK_ERRLOG("failed to open bdev; %s.\n", bdev_name); 414 return rc; 415 } 416 417 bdev = spdk_bdev_desc_get_bdev(desc); 418 419 if (bdev->module != &null_if) { 420 rc = -EINVAL; 421 goto exit; 422 } 423 424 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 425 if (new_size_in_mb < current_size_in_mb) { 426 SPDK_ERRLOG("The new bdev size must not be smaller than current bdev size.\n"); 427 rc = -EINVAL; 428 goto exit; 429 } 430 431 new_size_in_byte = new_size_in_mb * 1024 * 1024; 432 433 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 434 if (rc != 0) { 435 SPDK_ERRLOG("failed to notify block cnt change.\n"); 436 } 437 438 exit: 439 spdk_bdev_close(desc); 440 return rc; 441 } 442 443 static void 444 _bdev_null_finish_cb(void *arg) 445 { 446 spdk_free(g_null_read_buf); 447 spdk_bdev_module_fini_done(); 448 } 449 450 static void 451 bdev_null_finish(void) 452 { 453 if (g_null_read_buf == NULL) { 454 spdk_bdev_module_fini_done(); 455 return; 456 } 457 spdk_io_device_unregister(&g_null_bdev_head, _bdev_null_finish_cb); 458 } 459 460 SPDK_LOG_REGISTER_COMPONENT(bdev_null) 461