1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) croit GmbH. 3 * All rights reserved. 4 */ 5 6 #include <sys/queue.h> 7 8 #include "spdk/bdev.h" 9 #include "spdk/bdev_module.h" 10 #include "spdk/endian.h" 11 #include "spdk/env.h" 12 #include "spdk/json.h" 13 #include "spdk/thread.h" 14 #include "spdk/queue.h" 15 #include "spdk/string.h" 16 #include "spdk/stdinc.h" 17 #include "spdk/log.h" 18 19 #include <daos.h> 20 #include <daos_event.h> 21 #include <daos_fs.h> 22 #include <daos_types.h> 23 #include <daos_pool.h> 24 #include <daos_cont.h> 25 #include <daos_errno.h> 26 27 #include "bdev_daos.h" 28 29 #define BDEV_DAOS_IOVECS_MAX 32 30 31 struct bdev_daos_task { 32 daos_event_t ev; 33 struct spdk_thread *submit_td; 34 struct spdk_bdev_io *bdev_io; 35 36 enum spdk_bdev_io_status status; 37 38 uint64_t offset; 39 40 /* DAOS version of iovec and scatter/gather */ 41 daos_size_t read_size; 42 d_iov_t diovs[BDEV_DAOS_IOVECS_MAX]; 43 d_sg_list_t sgl; 44 }; 45 46 struct bdev_daos { 47 struct spdk_bdev disk; 48 daos_oclass_id_t oclass; 49 50 char pool_name[DAOS_PROP_MAX_LABEL_BUF_LEN]; 51 char cont_name[DAOS_PROP_MAX_LABEL_BUF_LEN]; 52 53 struct bdev_daos_task *reset_task; 54 struct spdk_poller *reset_retry_timer; 55 }; 56 57 struct bdev_daos_io_channel { 58 struct bdev_daos *disk; 59 struct spdk_poller *poller; 60 61 daos_handle_t pool; 62 daos_handle_t cont; 63 64 dfs_t *dfs; 65 dfs_obj_t *obj; 66 daos_handle_t queue; 67 }; 68 69 static uint32_t g_bdev_daos_init_count = 0; 70 static pthread_mutex_t g_bdev_daos_init_mutex = PTHREAD_MUTEX_INITIALIZER; 71 72 static int bdev_daos_initialize(void); 73 74 static int bdev_get_daos_engine(void); 75 static int bdev_daos_put_engine(void); 76 77 static int 78 bdev_daos_get_ctx_size(void) 79 { 80 return sizeof(struct bdev_daos_task); 81 } 82 83 static struct spdk_bdev_module daos_if = { 84 .name = "daos", 85 .module_init = bdev_daos_initialize, 86 .get_ctx_size = bdev_daos_get_ctx_size, 87 }; 88 89 SPDK_BDEV_MODULE_REGISTER(daos, &daos_if) 90 91 static void 92 bdev_daos_free(struct bdev_daos *bdev_daos) 93 { 94 if (!bdev_daos) { 95 return; 96 } 97 98 free(bdev_daos->disk.name); 99 free(bdev_daos); 100 } 101 102 static void 103 bdev_daos_destruct_cb(void *io_device) 104 { 105 int rc; 106 struct bdev_daos *daos = io_device; 107 108 assert(daos != NULL); 109 110 bdev_daos_free(daos); 111 112 rc = bdev_daos_put_engine(); 113 if (rc) { 114 SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 115 } 116 } 117 118 static int 119 bdev_daos_destruct(void *ctx) 120 { 121 struct bdev_daos *daos = ctx; 122 123 SPDK_NOTICELOG("%s: destroying bdev_daos device\n", daos->disk.name); 124 125 spdk_io_device_unregister(daos, bdev_daos_destruct_cb); 126 127 return 0; 128 } 129 130 static void 131 _bdev_daos_io_complete(void *bdev_daos_task) 132 { 133 struct bdev_daos_task *task = bdev_daos_task; 134 135 SPDK_DEBUGLOG(bdev_daos, "completed IO at %#lx with status %s\n", task->offset, 136 task->status == SPDK_BDEV_IO_STATUS_SUCCESS ? "SUCCESS" : "FAILURE"); 137 138 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 139 } 140 141 static void 142 bdev_daos_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 143 { 144 struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx; 145 struct spdk_thread *current_thread = spdk_get_thread(); 146 147 assert(task->submit_td != NULL); 148 149 task->status = status; 150 if (task->submit_td != current_thread) { 151 spdk_thread_send_msg(task->submit_td, _bdev_daos_io_complete, task); 152 } else { 153 _bdev_daos_io_complete(task); 154 } 155 } 156 157 static int64_t 158 bdev_daos_writev(struct bdev_daos *daos, struct bdev_daos_io_channel *ch, 159 struct bdev_daos_task *task, 160 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 161 { 162 int rc; 163 164 SPDK_DEBUGLOG(bdev_daos, "write %d iovs size %lu to off: %#lx\n", 165 iovcnt, nbytes, offset); 166 167 assert(ch != NULL); 168 assert(daos != NULL); 169 assert(task != NULL); 170 assert(iov != NULL); 171 172 if (iovcnt > BDEV_DAOS_IOVECS_MAX) { 173 SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt, 174 BDEV_DAOS_IOVECS_MAX); 175 return -E2BIG; 176 } 177 178 if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) { 179 SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n", 180 daos->disk.name, DP_RC(rc)); 181 return -EINVAL; 182 } 183 184 for (int i = 0; i < iovcnt; i++, iov++) { 185 d_iov_set(&(task->diovs[i]), iov->iov_base, iov->iov_len); 186 } 187 188 task->sgl.sg_nr = iovcnt; 189 task->sgl.sg_nr_out = 0; 190 task->sgl.sg_iovs = task->diovs; 191 task->offset = offset; 192 193 if ((rc = dfs_write(ch->dfs, ch->obj, &task->sgl, offset, &task->ev))) { 194 SPDK_ERRLOG("%s: could not start async write: " DF_RC "\n", 195 daos->disk.name, DP_RC(rc)); 196 daos_event_fini(&task->ev); 197 return -EINVAL; 198 } 199 200 return nbytes; 201 } 202 203 static int64_t 204 bdev_daos_readv(struct bdev_daos *daos, struct bdev_daos_io_channel *ch, 205 struct bdev_daos_task *task, 206 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 207 { 208 int rc; 209 210 SPDK_DEBUGLOG(bdev_daos, "read %d iovs size %lu to off: %#lx\n", 211 iovcnt, nbytes, offset); 212 213 assert(ch != NULL); 214 assert(daos != NULL); 215 assert(task != NULL); 216 assert(iov != NULL); 217 218 if (iovcnt > BDEV_DAOS_IOVECS_MAX) { 219 SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt, 220 BDEV_DAOS_IOVECS_MAX); 221 return -E2BIG; 222 } 223 224 if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) { 225 SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n", 226 daos->disk.name, DP_RC(rc)); 227 return -EINVAL; 228 } 229 230 for (int i = 0; i < iovcnt; i++, iov++) { 231 d_iov_set(&(task->diovs[i]), iov->iov_base, iov->iov_len); 232 } 233 234 task->sgl.sg_nr = iovcnt; 235 task->sgl.sg_nr_out = 0; 236 task->sgl.sg_iovs = task->diovs; 237 task->offset = offset; 238 239 if ((rc = dfs_read(ch->dfs, ch->obj, &task->sgl, offset, &task->read_size, &task->ev))) { 240 SPDK_ERRLOG("%s: could not start async read: " DF_RC "\n", 241 daos->disk.name, DP_RC(rc)); 242 daos_event_fini(&task->ev); 243 return -EINVAL; 244 } 245 246 return nbytes; 247 } 248 249 static void 250 bdev_daos_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 251 bool success) 252 { 253 int64_t rc; 254 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 255 256 if (!success) { 257 bdev_daos_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 258 return; 259 } 260 261 rc = bdev_daos_readv((struct bdev_daos *)bdev_io->bdev->ctxt, 262 dch, 263 (struct bdev_daos_task *)bdev_io->driver_ctx, 264 bdev_io->u.bdev.iovs, 265 bdev_io->u.bdev.iovcnt, 266 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 267 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 268 269 if (rc < 0) { 270 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 271 return; 272 } 273 } 274 275 static void 276 _bdev_daos_get_io_inflight(struct spdk_io_channel_iter *i) 277 { 278 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 279 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 280 int io_inflight = daos_eq_query(dch->queue, DAOS_EQR_WAITING, 0, NULL); 281 282 if (io_inflight > 0) { 283 spdk_for_each_channel_continue(i, -1); 284 return; 285 } 286 287 spdk_for_each_channel_continue(i, 0); 288 } 289 290 static int bdev_daos_reset_retry_timer(void *arg); 291 292 static void 293 _bdev_daos_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 294 { 295 struct bdev_daos *daos = spdk_io_channel_iter_get_ctx(i); 296 297 if (status == -1) { 298 daos->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_daos_reset_retry_timer, daos, 1000); 299 return; 300 } 301 302 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(daos->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 303 } 304 305 static int 306 bdev_daos_reset_retry_timer(void *arg) 307 { 308 struct bdev_daos *daos = arg; 309 310 if (daos->reset_retry_timer) { 311 spdk_poller_unregister(&daos->reset_retry_timer); 312 } 313 314 spdk_for_each_channel(daos, 315 _bdev_daos_get_io_inflight, 316 daos, 317 _bdev_daos_get_io_inflight_done); 318 319 return SPDK_POLLER_BUSY; 320 } 321 322 static void 323 bdev_daos_reset(struct bdev_daos *daos, struct bdev_daos_task *task) 324 { 325 assert(daos != NULL); 326 assert(task != NULL); 327 328 daos->reset_task = task; 329 bdev_daos_reset_retry_timer(daos); 330 } 331 332 333 static int64_t 334 bdev_daos_unmap(struct bdev_daos_io_channel *ch, uint64_t nbytes, 335 uint64_t offset) 336 { 337 SPDK_DEBUGLOG(bdev_daos, "unmap at %#lx with size %#lx\n", offset, nbytes); 338 return dfs_punch(ch->dfs, ch->obj, offset, nbytes); 339 } 340 341 static void 342 _bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 343 { 344 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 345 346 int64_t rc; 347 switch (bdev_io->type) { 348 case SPDK_BDEV_IO_TYPE_READ: 349 spdk_bdev_io_get_buf(bdev_io, bdev_daos_get_buf_cb, 350 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 351 break; 352 353 case SPDK_BDEV_IO_TYPE_WRITE: 354 rc = bdev_daos_writev((struct bdev_daos *)bdev_io->bdev->ctxt, 355 dch, 356 (struct bdev_daos_task *)bdev_io->driver_ctx, 357 bdev_io->u.bdev.iovs, 358 bdev_io->u.bdev.iovcnt, 359 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 360 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 361 if (rc < 0) { 362 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 363 return; 364 } 365 break; 366 367 case SPDK_BDEV_IO_TYPE_RESET: 368 /* Can't cancel in-flight requests, but can wait for their completions */ 369 bdev_daos_reset((struct bdev_daos *)bdev_io->bdev->ctxt, 370 (struct bdev_daos_task *)bdev_io->driver_ctx); 371 break; 372 373 case SPDK_BDEV_IO_TYPE_FLUSH: 374 /* NOOP because DAOS requests land on PMEM and writes are persistent upon completion */ 375 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 376 break; 377 378 case SPDK_BDEV_IO_TYPE_UNMAP: 379 rc = bdev_daos_unmap(dch, 380 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 381 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 382 if (!rc) { 383 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 384 } else { 385 SPDK_DEBUGLOG(bdev_daos, "%s: could not unmap: " DF_RC "\n", 386 dch->disk->disk.name, DP_RC((int)rc)); 387 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 388 } 389 390 break; 391 392 default: 393 SPDK_ERRLOG("Wrong io type\n"); 394 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 395 break; 396 } 397 } 398 399 static void 400 bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 401 { 402 struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx; 403 struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch); 404 405 assert(task != NULL); 406 407 task->submit_td = submit_td; 408 task->bdev_io = bdev_io; 409 410 _bdev_daos_submit_request(ch, bdev_io); 411 } 412 413 #define POLLING_EVENTS_NUM 64 414 415 static int 416 bdev_daos_channel_poll(void *arg) 417 { 418 daos_event_t *evp[POLLING_EVENTS_NUM]; 419 struct bdev_daos_io_channel *ch = arg; 420 421 assert(ch != NULL); 422 assert(ch->disk != NULL); 423 424 int rc = daos_eq_poll(ch->queue, 0, DAOS_EQ_NOWAIT, 425 POLLING_EVENTS_NUM, evp); 426 427 if (rc < 0) { 428 SPDK_DEBUGLOG(bdev_daos, "%s: could not poll daos event queue: " DF_RC "\n", 429 ch->disk->disk.name, DP_RC(rc)); 430 /* 431 * TODO: There are cases when this is self healing, e.g. 432 * brief network issues, DAOS agent restarting etc. 433 * However, if the issue persists over some time better would be 434 * to remove a bdev or the whole controller 435 */ 436 return SPDK_POLLER_BUSY; 437 } 438 439 for (int i = 0; i < rc; ++i) { 440 struct bdev_daos_task *task = SPDK_CONTAINEROF(evp[i], struct bdev_daos_task, ev); 441 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 442 443 assert(task != NULL); 444 445 if (task->ev.ev_error != DER_SUCCESS) { 446 status = SPDK_BDEV_IO_STATUS_FAILED; 447 } 448 449 daos_event_fini(&task->ev); 450 bdev_daos_io_complete(task->bdev_io, status); 451 } 452 453 return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 454 } 455 456 static bool 457 bdev_daos_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 458 { 459 switch (io_type) { 460 case SPDK_BDEV_IO_TYPE_READ: 461 case SPDK_BDEV_IO_TYPE_WRITE: 462 case SPDK_BDEV_IO_TYPE_RESET: 463 case SPDK_BDEV_IO_TYPE_FLUSH: 464 case SPDK_BDEV_IO_TYPE_UNMAP: 465 return true; 466 467 default: 468 return false; 469 } 470 } 471 472 static struct spdk_io_channel * 473 bdev_daos_get_io_channel(void *ctx) 474 { 475 return spdk_get_io_channel(ctx); 476 } 477 478 static void 479 bdev_daos_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 480 { 481 char uuid_str[SPDK_UUID_STRING_LEN]; 482 struct bdev_daos *daos = bdev->ctxt; 483 484 spdk_json_write_object_begin(w); 485 486 spdk_json_write_named_string(w, "method", "bdev_daos_create"); 487 488 spdk_json_write_named_object_begin(w, "params"); 489 spdk_json_write_named_string(w, "name", bdev->name); 490 spdk_json_write_named_string(w, "pool", daos->pool_name); 491 spdk_json_write_named_string(w, "cont", daos->cont_name); 492 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 493 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 494 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 495 spdk_json_write_named_string(w, "uuid", uuid_str); 496 497 spdk_json_write_object_end(w); 498 499 spdk_json_write_object_end(w); 500 } 501 502 static const struct spdk_bdev_fn_table daos_fn_table = { 503 .destruct = bdev_daos_destruct, 504 .submit_request = bdev_daos_submit_request, 505 .io_type_supported = bdev_daos_io_type_supported, 506 .get_io_channel = bdev_daos_get_io_channel, 507 .write_config_json = bdev_daos_write_json_config, 508 }; 509 510 static int 511 bdev_daos_io_channel_setup_daos(struct bdev_daos_io_channel *ch) 512 { 513 int rc = 0; 514 struct bdev_daos *daos = ch->disk; 515 daos_pool_info_t pinfo; 516 daos_cont_info_t cinfo; 517 518 int fd_oflag = O_CREAT | O_RDWR; 519 mode_t mode = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO; 520 521 rc = bdev_get_daos_engine(); 522 if (rc) { 523 SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 524 return rc; 525 } 526 527 SPDK_DEBUGLOG(bdev_daos, "connecting to daos pool '%s'\n", daos->pool_name); 528 if ((rc = daos_pool_connect(daos->pool_name, NULL, DAOS_PC_RW, &ch->pool, &pinfo, NULL))) { 529 SPDK_ERRLOG("%s: could not connect to daos pool: " DF_RC "\n", 530 daos->disk.name, DP_RC(rc)); 531 return rc; 532 } 533 SPDK_DEBUGLOG(bdev_daos, "connecting to daos container '%s'\n", daos->cont_name); 534 if ((rc = daos_cont_open(ch->pool, daos->cont_name, DAOS_COO_RW, &ch->cont, &cinfo, NULL))) { 535 SPDK_ERRLOG("%s: could not open daos container: " DF_RC "\n", 536 daos->disk.name, DP_RC(rc)); 537 goto cleanup_pool; 538 } 539 SPDK_DEBUGLOG(bdev_daos, "mounting daos dfs\n"); 540 if ((rc = dfs_mount(ch->pool, ch->cont, O_RDWR, &ch->dfs))) { 541 SPDK_ERRLOG("%s: could not mount daos dfs: " DF_RC "\n", 542 daos->disk.name, DP_RC(rc)); 543 goto cleanup_cont; 544 } 545 SPDK_DEBUGLOG(bdev_daos, "opening dfs object\n"); 546 if ((rc = dfs_open(ch->dfs, NULL, daos->disk.name, mode, fd_oflag, daos->oclass, 547 0, NULL, &ch->obj))) { 548 SPDK_ERRLOG("%s: could not open dfs object: " DF_RC "\n", 549 daos->disk.name, DP_RC(rc)); 550 goto cleanup_mount; 551 } 552 if ((rc = daos_eq_create(&ch->queue))) { 553 SPDK_ERRLOG("%s: could not create daos event queue: " DF_RC "\n", 554 daos->disk.name, DP_RC(rc)); 555 goto cleanup_obj; 556 } 557 558 return 0; 559 560 cleanup_obj: 561 dfs_release(ch->obj); 562 cleanup_mount: 563 dfs_umount(ch->dfs); 564 cleanup_cont: 565 daos_cont_close(ch->cont, NULL); 566 cleanup_pool: 567 daos_pool_disconnect(ch->pool, NULL); 568 569 return rc; 570 } 571 572 static int 573 bdev_daos_io_channel_create_cb(void *io_device, void *ctx_buf) 574 { 575 struct bdev_daos_io_channel *ch = ctx_buf; 576 577 ch->disk = io_device; 578 579 if (bdev_daos_io_channel_setup_daos(ch)) { 580 return -EINVAL; 581 } 582 583 SPDK_DEBUGLOG(bdev_daos, "%s: starting daos event queue poller\n", 584 ch->disk->disk.name); 585 586 ch->poller = SPDK_POLLER_REGISTER(bdev_daos_channel_poll, ch, 0); 587 588 return 0; 589 } 590 591 static void 592 bdev_daos_io_channel_destroy_cb(void *io_device, void *ctx_buf) 593 { 594 int rc; 595 struct bdev_daos_io_channel *ch = ctx_buf; 596 597 SPDK_DEBUGLOG(bdev_daos, "stopping daos event queue poller\n"); 598 599 spdk_poller_unregister(&ch->poller); 600 601 if ((rc = daos_eq_destroy(ch->queue, DAOS_EQ_DESTROY_FORCE))) { 602 SPDK_ERRLOG("could not destroy daos event queue: " DF_RC "\n", DP_RC(rc)); 603 } 604 if ((rc = dfs_release(ch->obj))) { 605 SPDK_ERRLOG("could not release dfs object: " DF_RC "\n", DP_RC(rc)); 606 } 607 if ((rc = dfs_umount(ch->dfs))) { 608 SPDK_ERRLOG("could not unmount dfs: " DF_RC "\n", DP_RC(rc)); 609 } 610 if ((rc = daos_cont_close(ch->cont, NULL))) { 611 SPDK_ERRLOG("could not close container: " DF_RC "\n", DP_RC(rc)); 612 } 613 if ((rc = daos_pool_disconnect(ch->pool, NULL))) { 614 SPDK_ERRLOG("could not disconnect from pool: " DF_RC "\n", DP_RC(rc)); 615 } 616 rc = bdev_daos_put_engine(); 617 if (rc) { 618 SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 619 } 620 } 621 622 int 623 create_bdev_daos(struct spdk_bdev **bdev, 624 const char *name, const struct spdk_uuid *uuid, 625 const char *pool, const char *cont, const char *oclass, 626 uint64_t num_blocks, uint32_t block_size) 627 { 628 int rc; 629 size_t len; 630 struct bdev_daos *daos; 631 struct bdev_daos_io_channel ch = {}; 632 633 SPDK_NOTICELOG("%s: creating bdev_daos disk on '%s:%s'\n", name, pool, cont); 634 635 if (num_blocks == 0) { 636 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 637 return -EINVAL; 638 } 639 640 if (block_size % 512) { 641 SPDK_ERRLOG("block size must be 512 bytes aligned\n"); 642 return -EINVAL; 643 } 644 645 if (!name) { 646 SPDK_ERRLOG("device name cannot be empty\n"); 647 return -EINVAL; 648 } 649 650 if (!pool) { 651 SPDK_ERRLOG("daos pool cannot be empty\n"); 652 return -EINVAL; 653 } 654 if (!cont) { 655 SPDK_ERRLOG("daos cont cannot be empty\n"); 656 return -EINVAL; 657 } 658 659 daos = calloc(1, sizeof(*daos)); 660 if (!daos) { 661 SPDK_ERRLOG("calloc() failed\n"); 662 return -ENOMEM; 663 } 664 665 if (!oclass) { 666 oclass = "SX"; /* Max throughput by default */ 667 } 668 daos->oclass = daos_oclass_name2id(oclass); 669 if (daos->oclass == OC_UNKNOWN) { 670 SPDK_ERRLOG("could not parse daos oclass: '%s'\n", oclass); 671 free(daos); 672 return -EINVAL; 673 } 674 675 len = strlen(pool); 676 if (len > DAOS_PROP_LABEL_MAX_LEN) { 677 SPDK_ERRLOG("daos pool name is too long\n"); 678 free(daos); 679 return -EINVAL; 680 } 681 memcpy(daos->pool_name, pool, len); 682 683 len = strlen(cont); 684 if (len > DAOS_PROP_LABEL_MAX_LEN) { 685 SPDK_ERRLOG("daos cont name is too long\n"); 686 free(daos); 687 return -EINVAL; 688 } 689 memcpy(daos->cont_name, cont, len); 690 691 daos->disk.name = strdup(name); 692 daos->disk.product_name = "DAOS bdev"; 693 694 daos->disk.write_cache = 0; 695 daos->disk.blocklen = block_size; 696 daos->disk.blockcnt = num_blocks; 697 698 if (uuid) { 699 daos->disk.uuid = *uuid; 700 } 701 702 daos->disk.ctxt = daos; 703 daos->disk.fn_table = &daos_fn_table; 704 daos->disk.module = &daos_if; 705 706 rc = bdev_get_daos_engine(); 707 if (rc) { 708 SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 709 bdev_daos_free(daos); 710 return rc; 711 } 712 713 /* We try to connect to the DAOS container during channel creation, so simulate 714 * creating a channel here, so that we can return a failure when the DAOS bdev 715 * is created, instead of finding it out later when the first channel is created 716 * and leaving unusable bdev registered. 717 */ 718 rc = bdev_daos_io_channel_create_cb(daos, &ch); 719 if (rc) { 720 SPDK_ERRLOG("'%s' could not initialize io-channel: %s", name, strerror(-rc)); 721 bdev_daos_free(daos); 722 return rc; 723 } 724 bdev_daos_io_channel_destroy_cb(daos, &ch); 725 726 spdk_io_device_register(daos, bdev_daos_io_channel_create_cb, 727 bdev_daos_io_channel_destroy_cb, 728 sizeof(struct bdev_daos_io_channel), 729 daos->disk.name); 730 731 732 rc = spdk_bdev_register(&daos->disk); 733 if (rc) { 734 spdk_io_device_unregister(daos, NULL); 735 bdev_daos_free(daos); 736 return rc; 737 } 738 739 *bdev = &(daos->disk); 740 741 return rc; 742 } 743 744 static void 745 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 746 { 747 } 748 749 int 750 bdev_daos_resize(const char *name, const uint64_t new_size_in_mb) 751 { 752 int rc = 0; 753 struct spdk_bdev_desc *desc; 754 struct spdk_bdev *bdev; 755 struct spdk_io_channel *ch; 756 struct bdev_daos_io_channel *dch; 757 uint64_t new_size_in_byte; 758 uint64_t current_size_in_mb; 759 760 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 761 if (rc != 0) { 762 return rc; 763 } 764 765 bdev = spdk_bdev_desc_get_bdev(desc); 766 if (bdev->module != &daos_if) { 767 rc = -EINVAL; 768 goto exit; 769 } 770 771 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 772 if (current_size_in_mb > new_size_in_mb) { 773 SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n"); 774 rc = -EINVAL; 775 goto exit; 776 } 777 778 ch = bdev_daos_get_io_channel(bdev); 779 dch = spdk_io_channel_get_ctx(ch); 780 new_size_in_byte = new_size_in_mb * 1024 * 1024; 781 782 rc = dfs_punch(dch->dfs, dch->obj, new_size_in_byte, DFS_MAX_FSIZE); 783 spdk_put_io_channel(ch); 784 if (rc != 0) { 785 SPDK_ERRLOG("failed to resize daos bdev: " DF_RC "\n", DP_RC(rc)); 786 rc = -EINTR; 787 goto exit; 788 } 789 790 SPDK_NOTICELOG("DAOS bdev device is resized: bdev name %s, old block count %" PRIu64 791 ", new block count %" 792 PRIu64 "\n", 793 bdev->name, 794 bdev->blockcnt, 795 new_size_in_byte / bdev->blocklen); 796 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 797 if (rc != 0) { 798 SPDK_ERRLOG("failed to notify block cnt change.\n"); 799 } 800 801 exit: 802 spdk_bdev_close(desc); 803 return rc; 804 } 805 806 void 807 delete_bdev_daos(const char *bdev_name, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 808 { 809 int rc; 810 811 rc = spdk_bdev_unregister_by_name(bdev_name, &daos_if, cb_fn, cb_arg); 812 if (rc != 0) { 813 cb_fn(cb_arg, rc); 814 } 815 } 816 817 static int 818 bdev_get_daos_engine(void) 819 { 820 int rc = 0; 821 822 pthread_mutex_lock(&g_bdev_daos_init_mutex); 823 if (g_bdev_daos_init_count++ > 0) { 824 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 825 return 0; 826 } 827 SPDK_DEBUGLOG(bdev_daos, "initializing DAOS engine\n"); 828 829 rc = daos_init(); 830 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 831 832 if (rc != -DER_ALREADY && rc) { 833 return rc; 834 } 835 return 0; 836 } 837 838 static int 839 bdev_daos_put_engine(void) 840 { 841 int rc = 0; 842 843 pthread_mutex_lock(&g_bdev_daos_init_mutex); 844 if (--g_bdev_daos_init_count > 0) { 845 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 846 return 0; 847 } 848 SPDK_DEBUGLOG(bdev_daos, "de-initializing DAOS engine\n"); 849 850 rc = daos_fini(); 851 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 852 853 return rc; 854 } 855 856 static int 857 bdev_daos_initialize(void) 858 { 859 /* DAOS engine and client initialization happens 860 during the first bdev creation */ 861 return 0; 862 } 863 864 SPDK_LOG_REGISTER_COMPONENT(bdev_daos) 865