1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2017 Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "spdk/blob_bdev.h" 10 #include "spdk/blob.h" 11 #include "spdk/thread.h" 12 #include "spdk/log.h" 13 #include "spdk/endian.h" 14 #define __SPDK_BDEV_MODULE_ONLY 15 #include "spdk/bdev_module.h" 16 17 struct blob_bdev { 18 struct spdk_bs_dev bs_dev; 19 struct spdk_bdev *bdev; 20 struct spdk_bdev_desc *desc; 21 bool write; 22 int32_t refs; 23 struct spdk_spinlock lock; 24 }; 25 26 struct blob_resubmit { 27 struct spdk_bdev_io_wait_entry bdev_io_wait; 28 enum spdk_bdev_io_type io_type; 29 struct spdk_bs_dev *dev; 30 struct spdk_io_channel *channel; 31 void *payload; 32 int iovcnt; 33 uint64_t lba; 34 uint64_t src_lba; 35 uint32_t lba_count; 36 struct spdk_bs_dev_cb_args *cb_args; 37 struct spdk_blob_ext_io_opts *ext_io_opts; 38 }; 39 static void bdev_blob_resubmit(void *); 40 41 static inline struct spdk_bdev_desc * 42 __get_desc(struct spdk_bs_dev *dev) 43 { 44 return ((struct blob_bdev *)dev)->desc; 45 } 46 47 static inline struct spdk_bdev * 48 __get_bdev(struct spdk_bs_dev *dev) 49 { 50 return ((struct blob_bdev *)dev)->bdev; 51 } 52 53 static void 54 bdev_blob_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *arg) 55 { 56 struct spdk_bs_dev_cb_args *cb_args = arg; 57 int bserrno; 58 59 if (success) { 60 bserrno = 0; 61 } else { 62 bserrno = -EIO; 63 } 64 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno); 65 spdk_bdev_free_io(bdev_io); 66 } 67 68 static void 69 bdev_blob_queue_io(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, 70 int iovcnt, uint64_t lba, uint64_t src_lba, uint32_t lba_count, 71 enum spdk_bdev_io_type io_type, struct spdk_bs_dev_cb_args *cb_args, 72 struct spdk_blob_ext_io_opts *ext_io_opts) 73 { 74 int rc; 75 struct spdk_bdev *bdev = __get_bdev(dev); 76 struct blob_resubmit *ctx; 77 78 ctx = calloc(1, sizeof(struct blob_resubmit)); 79 80 if (ctx == NULL) { 81 SPDK_ERRLOG("Not enough memory to queue io\n"); 82 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -ENOMEM); 83 return; 84 } 85 86 ctx->io_type = io_type; 87 ctx->dev = dev; 88 ctx->channel = channel; 89 ctx->payload = payload; 90 ctx->iovcnt = iovcnt; 91 ctx->lba = lba; 92 ctx->src_lba = src_lba; 93 ctx->lba_count = lba_count; 94 ctx->cb_args = cb_args; 95 ctx->bdev_io_wait.bdev = bdev; 96 ctx->bdev_io_wait.cb_fn = bdev_blob_resubmit; 97 ctx->bdev_io_wait.cb_arg = ctx; 98 ctx->ext_io_opts = ext_io_opts; 99 100 rc = spdk_bdev_queue_io_wait(bdev, channel, &ctx->bdev_io_wait); 101 if (rc != 0) { 102 SPDK_ERRLOG("Queue io failed, rc=%d\n", rc); 103 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 104 free(ctx); 105 assert(false); 106 } 107 } 108 109 static void 110 bdev_blob_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, 111 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 112 { 113 int rc; 114 115 rc = spdk_bdev_read_blocks(__get_desc(dev), channel, payload, lba, 116 lba_count, bdev_blob_io_complete, cb_args); 117 if (rc == -ENOMEM) { 118 bdev_blob_queue_io(dev, channel, payload, 0, lba, 0, 119 lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args, NULL); 120 } else if (rc != 0) { 121 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 122 } 123 } 124 125 static void 126 bdev_blob_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, 127 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 128 { 129 int rc; 130 131 rc = spdk_bdev_write_blocks(__get_desc(dev), channel, payload, lba, 132 lba_count, bdev_blob_io_complete, cb_args); 133 if (rc == -ENOMEM) { 134 bdev_blob_queue_io(dev, channel, payload, 0, lba, 0, 135 lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args, NULL); 136 } else if (rc != 0) { 137 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 138 } 139 } 140 141 static void 142 bdev_blob_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 143 struct iovec *iov, int iovcnt, 144 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 145 { 146 int rc; 147 148 rc = spdk_bdev_readv_blocks(__get_desc(dev), channel, iov, iovcnt, lba, 149 lba_count, bdev_blob_io_complete, cb_args); 150 if (rc == -ENOMEM) { 151 bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, 152 lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args, NULL); 153 } else if (rc != 0) { 154 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 155 } 156 } 157 158 static void 159 bdev_blob_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 160 struct iovec *iov, int iovcnt, 161 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 162 { 163 int rc; 164 165 rc = spdk_bdev_writev_blocks(__get_desc(dev), channel, iov, iovcnt, lba, 166 lba_count, bdev_blob_io_complete, cb_args); 167 if (rc == -ENOMEM) { 168 bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, 169 lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args, NULL); 170 } else if (rc != 0) { 171 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 172 } 173 } 174 175 static inline void 176 blob_ext_io_opts_to_bdev_opts(struct spdk_bdev_ext_io_opts *dst, struct spdk_blob_ext_io_opts *src) 177 { 178 memset(dst, 0, sizeof(*dst)); 179 dst->size = sizeof(*dst); 180 dst->memory_domain = src->memory_domain; 181 dst->memory_domain_ctx = src->memory_domain_ctx; 182 } 183 184 static void 185 bdev_blob_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 186 struct iovec *iov, int iovcnt, 187 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args, 188 struct spdk_blob_ext_io_opts *io_opts) 189 { 190 struct spdk_bdev_ext_io_opts bdev_io_opts; 191 int rc; 192 193 blob_ext_io_opts_to_bdev_opts(&bdev_io_opts, io_opts); 194 rc = spdk_bdev_readv_blocks_ext(__get_desc(dev), channel, iov, iovcnt, lba, lba_count, 195 bdev_blob_io_complete, cb_args, &bdev_io_opts); 196 if (rc == -ENOMEM) { 197 bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, lba_count, SPDK_BDEV_IO_TYPE_READ, cb_args, 198 io_opts); 199 } else if (rc != 0) { 200 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 201 } 202 } 203 204 static void 205 bdev_blob_writev_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 206 struct iovec *iov, int iovcnt, 207 uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args, 208 struct spdk_blob_ext_io_opts *io_opts) 209 { 210 struct spdk_bdev_ext_io_opts bdev_io_opts; 211 int rc; 212 213 blob_ext_io_opts_to_bdev_opts(&bdev_io_opts, io_opts); 214 rc = spdk_bdev_writev_blocks_ext(__get_desc(dev), channel, iov, iovcnt, lba, lba_count, 215 bdev_blob_io_complete, cb_args, &bdev_io_opts); 216 if (rc == -ENOMEM) { 217 bdev_blob_queue_io(dev, channel, iov, iovcnt, lba, 0, lba_count, SPDK_BDEV_IO_TYPE_WRITE, cb_args, 218 io_opts); 219 } else if (rc != 0) { 220 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 221 } 222 } 223 224 static void 225 bdev_blob_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, 226 uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 227 { 228 int rc; 229 230 rc = spdk_bdev_write_zeroes_blocks(__get_desc(dev), channel, lba, 231 lba_count, bdev_blob_io_complete, cb_args); 232 if (rc == -ENOMEM) { 233 bdev_blob_queue_io(dev, channel, NULL, 0, lba, 0, 234 lba_count, SPDK_BDEV_IO_TYPE_WRITE_ZEROES, cb_args, NULL); 235 } else if (rc != 0) { 236 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 237 } 238 } 239 240 static void 241 bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, 242 uint64_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 243 { 244 struct blob_bdev *blob_bdev = (struct blob_bdev *)dev; 245 int rc; 246 247 if (spdk_bdev_io_type_supported(blob_bdev->bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 248 rc = spdk_bdev_unmap_blocks(__get_desc(dev), channel, lba, lba_count, 249 bdev_blob_io_complete, cb_args); 250 if (rc == -ENOMEM) { 251 bdev_blob_queue_io(dev, channel, NULL, 0, lba, 0, 252 lba_count, SPDK_BDEV_IO_TYPE_UNMAP, cb_args, NULL); 253 } else if (rc != 0) { 254 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 255 } 256 } else { 257 /* 258 * If the device doesn't support unmap, immediately complete 259 * the request. Blobstore does not rely on unmap zeroing 260 * data. 261 */ 262 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, 0); 263 } 264 } 265 266 static void 267 bdev_blob_copy(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 268 uint64_t dst_lba, uint64_t src_lba, uint64_t lba_count, 269 struct spdk_bs_dev_cb_args *cb_args) 270 { 271 int rc; 272 273 rc = spdk_bdev_copy_blocks(__get_desc(dev), channel, 274 dst_lba, src_lba, lba_count, 275 bdev_blob_io_complete, cb_args); 276 if (rc == -ENOMEM) { 277 bdev_blob_queue_io(dev, channel, NULL, 0, dst_lba, src_lba, 278 lba_count, SPDK_BDEV_IO_TYPE_COPY, cb_args, NULL); 279 } else if (rc != 0) { 280 cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); 281 } 282 } 283 284 static void 285 bdev_blob_resubmit(void *arg) 286 { 287 struct blob_resubmit *ctx = (struct blob_resubmit *) arg; 288 289 switch (ctx->io_type) { 290 case SPDK_BDEV_IO_TYPE_READ: 291 if (ctx->iovcnt > 0) { 292 bdev_blob_readv_ext(ctx->dev, ctx->channel, (struct iovec *) ctx->payload, ctx->iovcnt, 293 ctx->lba, ctx->lba_count, ctx->cb_args, ctx->ext_io_opts); 294 } else { 295 bdev_blob_read(ctx->dev, ctx->channel, ctx->payload, 296 ctx->lba, ctx->lba_count, ctx->cb_args); 297 } 298 break; 299 case SPDK_BDEV_IO_TYPE_WRITE: 300 if (ctx->iovcnt > 0) { 301 bdev_blob_writev_ext(ctx->dev, ctx->channel, (struct iovec *) ctx->payload, ctx->iovcnt, 302 ctx->lba, ctx->lba_count, ctx->cb_args, ctx->ext_io_opts); 303 } else { 304 bdev_blob_write(ctx->dev, ctx->channel, ctx->payload, 305 ctx->lba, ctx->lba_count, ctx->cb_args); 306 } 307 break; 308 case SPDK_BDEV_IO_TYPE_UNMAP: 309 bdev_blob_unmap(ctx->dev, ctx->channel, 310 ctx->lba, ctx->lba_count, ctx->cb_args); 311 break; 312 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 313 bdev_blob_write_zeroes(ctx->dev, ctx->channel, 314 ctx->lba, ctx->lba_count, ctx->cb_args); 315 break; 316 case SPDK_BDEV_IO_TYPE_COPY: 317 bdev_blob_copy(ctx->dev, ctx->channel, 318 ctx->lba, ctx->src_lba, ctx->lba_count, ctx->cb_args); 319 break; 320 default: 321 SPDK_ERRLOG("Unsupported io type %d\n", ctx->io_type); 322 assert(false); 323 break; 324 } 325 free(ctx); 326 } 327 328 int 329 spdk_bs_bdev_claim(struct spdk_bs_dev *bs_dev, struct spdk_bdev_module *module) 330 { 331 struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev; 332 struct spdk_bdev_desc *desc = blob_bdev->desc; 333 enum spdk_bdev_claim_type claim_type; 334 int rc; 335 336 claim_type = blob_bdev->write ? SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE : 337 SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE; 338 rc = spdk_bdev_module_claim_bdev_desc(desc, claim_type, NULL, module); 339 if (rc != 0) { 340 SPDK_ERRLOG("could not claim bs dev\n"); 341 return rc; 342 } 343 344 return rc; 345 } 346 347 static struct spdk_io_channel * 348 bdev_blob_create_channel(struct spdk_bs_dev *dev) 349 { 350 struct blob_bdev *blob_bdev = (struct blob_bdev *)dev; 351 struct spdk_io_channel *ch; 352 353 ch = spdk_bdev_get_io_channel(blob_bdev->desc); 354 if (ch != NULL) { 355 spdk_spin_lock(&blob_bdev->lock); 356 blob_bdev->refs++; 357 spdk_spin_unlock(&blob_bdev->lock); 358 } 359 360 return ch; 361 } 362 363 static void 364 bdev_blob_free(struct blob_bdev *blob_bdev) 365 { 366 assert(blob_bdev->refs == 0); 367 368 spdk_spin_destroy(&blob_bdev->lock); 369 free(blob_bdev); 370 } 371 372 static void 373 bdev_blob_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel) 374 { 375 struct blob_bdev *blob_bdev = (struct blob_bdev *)dev; 376 int32_t refs; 377 378 spdk_spin_lock(&blob_bdev->lock); 379 380 assert(blob_bdev->refs > 0); 381 blob_bdev->refs--; 382 refs = blob_bdev->refs; 383 384 spdk_spin_unlock(&blob_bdev->lock); 385 386 spdk_put_io_channel(channel); 387 388 /* 389 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero, the blob and 390 * this channel have been destroyed. This means that dev->destroy() has been called and it 391 * would be an error (akin to use after free) if dev is dereferenced after destroying it. 392 * Thus, there should be no race with bdev_blob_create_channel(). 393 * 394 * Because the value of blob_bdev->refs was taken while holding the lock here and the same 395 * is done in bdev_blob_destroy(), there is no race with bdev_blob_destroy(). 396 */ 397 if (refs == 0) { 398 bdev_blob_free(blob_bdev); 399 } 400 } 401 402 static void 403 bdev_blob_destroy(struct spdk_bs_dev *bs_dev) 404 { 405 struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev; 406 struct spdk_bdev_desc *desc; 407 int32_t refs; 408 409 spdk_spin_lock(&blob_bdev->lock); 410 411 desc = blob_bdev->desc; 412 blob_bdev->desc = NULL; 413 blob_bdev->refs--; 414 refs = blob_bdev->refs; 415 416 spdk_spin_unlock(&blob_bdev->lock); 417 418 spdk_bdev_close(desc); 419 420 /* 421 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero, 422 * bs_dev->destroy() has been called and all the channels have been destroyed. It would be 423 * an error (akin to use after free) if bs_dev is dereferenced after destroying it. Thus, 424 * there should be no race with bdev_blob_create_channel(). 425 * 426 * Because the value of blob_bdev->refs was taken while holding the lock here and the same 427 * is done in bdev_blob_destroy_channel(), there is no race with 428 * bdev_blob_destroy_channel(). 429 */ 430 if (refs == 0) { 431 bdev_blob_free(blob_bdev); 432 } 433 } 434 435 static struct spdk_bdev * 436 bdev_blob_get_base_bdev(struct spdk_bs_dev *bs_dev) 437 { 438 return __get_bdev(bs_dev); 439 } 440 441 static bool 442 bdev_blob_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) 443 { 444 return false; 445 } 446 447 static bool 448 bdev_blob_translate_lba(struct spdk_bs_dev *dev, uint64_t lba, uint64_t *base_lba) 449 { 450 *base_lba = lba; 451 return true; 452 } 453 454 static void 455 blob_bdev_init(struct blob_bdev *b, struct spdk_bdev_desc *desc) 456 { 457 struct spdk_bdev *bdev; 458 459 bdev = spdk_bdev_desc_get_bdev(desc); 460 assert(bdev != NULL); 461 462 b->bdev = bdev; 463 b->desc = desc; 464 b->bs_dev.blockcnt = spdk_bdev_get_num_blocks(bdev); 465 b->bs_dev.blocklen = spdk_bdev_get_block_size(bdev); 466 b->bs_dev.create_channel = bdev_blob_create_channel; 467 b->bs_dev.destroy_channel = bdev_blob_destroy_channel; 468 b->bs_dev.destroy = bdev_blob_destroy; 469 b->bs_dev.read = bdev_blob_read; 470 b->bs_dev.write = bdev_blob_write; 471 b->bs_dev.readv = bdev_blob_readv; 472 b->bs_dev.writev = bdev_blob_writev; 473 b->bs_dev.readv_ext = bdev_blob_readv_ext; 474 b->bs_dev.writev_ext = bdev_blob_writev_ext; 475 b->bs_dev.write_zeroes = bdev_blob_write_zeroes; 476 b->bs_dev.unmap = bdev_blob_unmap; 477 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY)) { 478 b->bs_dev.copy = bdev_blob_copy; 479 } 480 b->bs_dev.get_base_bdev = bdev_blob_get_base_bdev; 481 b->bs_dev.is_zeroes = bdev_blob_is_zeroes; 482 b->bs_dev.translate_lba = bdev_blob_translate_lba; 483 } 484 485 int 486 spdk_bdev_create_bs_dev(const char *bdev_name, bool write, 487 struct spdk_bdev_bs_dev_opts *opts, size_t opts_size, 488 spdk_bdev_event_cb_t event_cb, void *event_ctx, 489 struct spdk_bs_dev **bs_dev) 490 { 491 struct blob_bdev *b; 492 struct spdk_bdev_desc *desc; 493 int rc; 494 495 assert(spdk_get_thread() != NULL); 496 497 if (opts != NULL && opts_size != sizeof(*opts)) { 498 SPDK_ERRLOG("bdev name '%s': unsupported options\n", bdev_name); 499 return -EINVAL; 500 } 501 502 b = calloc(1, sizeof(*b)); 503 504 if (b == NULL) { 505 SPDK_ERRLOG("could not allocate blob_bdev\n"); 506 return -ENOMEM; 507 } 508 509 rc = spdk_bdev_open_ext(bdev_name, write, event_cb, event_ctx, &desc); 510 if (rc != 0) { 511 free(b); 512 return rc; 513 } 514 515 blob_bdev_init(b, desc); 516 517 *bs_dev = &b->bs_dev; 518 b->write = write; 519 b->refs = 1; 520 spdk_spin_init(&b->lock); 521 522 return 0; 523 } 524 525 int 526 spdk_bdev_create_bs_dev_ext(const char *bdev_name, spdk_bdev_event_cb_t event_cb, 527 void *event_ctx, struct spdk_bs_dev **bs_dev) 528 { 529 return spdk_bdev_create_bs_dev(bdev_name, true, NULL, 0, event_cb, event_ctx, bs_dev); 530 } 531