1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) croit GmbH. 5 * All rights reserved. 6 */ 7 8 #include <sys/queue.h> 9 10 #include "spdk/bdev.h" 11 #include "spdk/bdev_module.h" 12 #include "spdk/endian.h" 13 #include "spdk/env.h" 14 #include "spdk/json.h" 15 #include "spdk/thread.h" 16 #include "spdk/queue.h" 17 #include "spdk/string.h" 18 #include "spdk/stdinc.h" 19 #include "spdk/log.h" 20 21 #include <daos.h> 22 #include <daos_event.h> 23 #include <daos_fs.h> 24 #include <daos_types.h> 25 #include <daos_pool.h> 26 #include <daos_cont.h> 27 #include <daos_errno.h> 28 29 #include "bdev_daos.h" 30 31 #define BDEV_DAOS_IOVECS_MAX 32 32 33 struct bdev_daos_task { 34 daos_event_t ev; 35 struct spdk_thread *submit_td; 36 struct spdk_bdev_io *bdev_io; 37 38 enum spdk_bdev_io_status status; 39 40 uint64_t offset; 41 42 /* DAOS version of iovec and scatter/gather */ 43 daos_size_t read_size; 44 d_iov_t diovs[BDEV_DAOS_IOVECS_MAX]; 45 d_sg_list_t sgl; 46 }; 47 48 struct bdev_daos { 49 struct spdk_bdev disk; 50 daos_oclass_id_t oclass; 51 52 char pool_name[DAOS_PROP_MAX_LABEL_BUF_LEN]; 53 char cont_name[DAOS_PROP_MAX_LABEL_BUF_LEN]; 54 55 struct bdev_daos_task *reset_task; 56 struct spdk_poller *reset_retry_timer; 57 }; 58 59 struct bdev_daos_io_channel { 60 struct bdev_daos *disk; 61 struct spdk_poller *poller; 62 63 daos_handle_t pool; 64 daos_handle_t cont; 65 66 dfs_t *dfs; 67 dfs_obj_t *obj; 68 daos_handle_t queue; 69 }; 70 71 static uint32_t g_bdev_daos_init_count = 0; 72 static pthread_mutex_t g_bdev_daos_init_mutex = PTHREAD_MUTEX_INITIALIZER; 73 74 static int bdev_daos_initialize(void); 75 76 static int bdev_get_daos_engine(void); 77 static int bdev_daos_put_engine(void); 78 79 static int 80 bdev_daos_get_ctx_size(void) 81 { 82 return sizeof(struct bdev_daos_task); 83 } 84 85 static struct spdk_bdev_module daos_if = { 86 .name = "daos", 87 .module_init = bdev_daos_initialize, 88 .get_ctx_size = bdev_daos_get_ctx_size, 89 }; 90 91 SPDK_BDEV_MODULE_REGISTER(daos, &daos_if) 92 93 static void 94 bdev_daos_free(struct bdev_daos *bdev_daos) 95 { 96 if (!bdev_daos) { 97 return; 98 } 99 100 free(bdev_daos->disk.name); 101 free(bdev_daos); 102 } 103 104 static void 105 bdev_daos_destruct_cb(void *io_device) 106 { 107 int rc; 108 struct bdev_daos *daos = io_device; 109 110 assert(daos != NULL); 111 112 bdev_daos_free(daos); 113 114 rc = bdev_daos_put_engine(); 115 if (rc) { 116 SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 117 } 118 } 119 120 static int 121 bdev_daos_destruct(void *ctx) 122 { 123 struct bdev_daos *daos = ctx; 124 125 SPDK_NOTICELOG("%s: destroying bdev_daos device\n", daos->disk.name); 126 127 spdk_io_device_unregister(daos, bdev_daos_destruct_cb); 128 129 return 0; 130 } 131 132 static void 133 _bdev_daos_io_complete(void *bdev_daos_task) 134 { 135 struct bdev_daos_task *task = bdev_daos_task; 136 137 SPDK_DEBUGLOG(bdev_daos, "completed IO at %#lx with status %s\n", task->offset, 138 task->status == SPDK_BDEV_IO_STATUS_SUCCESS ? "SUCCESS" : "FAILURE"); 139 140 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 141 } 142 143 static void 144 bdev_daos_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 145 { 146 struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx; 147 struct spdk_thread *current_thread = spdk_get_thread(); 148 149 assert(task->submit_td != NULL); 150 151 task->status = status; 152 if (task->submit_td != current_thread) { 153 spdk_thread_send_msg(task->submit_td, _bdev_daos_io_complete, task); 154 } else { 155 _bdev_daos_io_complete(task); 156 } 157 } 158 159 static int64_t 160 bdev_daos_writev(struct bdev_daos *daos, struct bdev_daos_io_channel *ch, 161 struct bdev_daos_task *task, 162 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 163 { 164 int rc; 165 struct iovec *io = iov; 166 167 SPDK_DEBUGLOG(bdev_daos, "write %d iovs size %lu to off: %#lx\n", 168 iovcnt, nbytes, offset); 169 170 assert(ch != NULL); 171 assert(daos != NULL); 172 assert(task != NULL); 173 assert(iov != NULL); 174 175 if (iovcnt > BDEV_DAOS_IOVECS_MAX) { 176 SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt, 177 BDEV_DAOS_IOVECS_MAX); 178 return -E2BIG; 179 } 180 181 if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) { 182 SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n", 183 daos->disk.name, DP_RC(rc)); 184 return -EINVAL; 185 } 186 187 for (int i = 0; i < iovcnt; i++, iov++) { 188 d_iov_set(&(task->diovs[i]), io->iov_base, io->iov_len); 189 } 190 191 task->sgl.sg_nr = iovcnt; 192 task->sgl.sg_nr_out = 0; 193 task->sgl.sg_iovs = task->diovs; 194 task->offset = offset; 195 196 if ((rc = dfs_write(ch->dfs, ch->obj, &task->sgl, offset, &task->ev))) { 197 SPDK_ERRLOG("%s: could not start async write: " DF_RC "\n", 198 daos->disk.name, DP_RC(rc)); 199 daos_event_fini(&task->ev); 200 return -EINVAL; 201 } 202 203 return nbytes; 204 } 205 206 static int64_t 207 bdev_daos_readv(struct bdev_daos *daos, struct bdev_daos_io_channel *ch, 208 struct bdev_daos_task *task, 209 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset) 210 { 211 int rc; 212 struct iovec *io = iov; 213 214 SPDK_DEBUGLOG(bdev_daos, "read %d iovs size %lu to off: %#lx\n", 215 iovcnt, nbytes, offset); 216 217 assert(ch != NULL); 218 assert(daos != NULL); 219 assert(task != NULL); 220 assert(iov != NULL); 221 222 if (iovcnt > BDEV_DAOS_IOVECS_MAX) { 223 SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt, 224 BDEV_DAOS_IOVECS_MAX); 225 return -E2BIG; 226 } 227 228 if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) { 229 SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n", 230 daos->disk.name, DP_RC(rc)); 231 return -EINVAL; 232 } 233 234 for (int i = 0; i < iovcnt; i++, io++) { 235 d_iov_set(&(task->diovs[i]), io->iov_base, io->iov_len); 236 } 237 238 task->sgl.sg_nr = iovcnt; 239 task->sgl.sg_nr_out = 0; 240 task->sgl.sg_iovs = task->diovs; 241 task->offset = offset; 242 243 if ((rc = dfs_read(ch->dfs, ch->obj, &task->sgl, offset, &task->read_size, &task->ev))) { 244 SPDK_ERRLOG("%s: could not start async read: " DF_RC "\n", 245 daos->disk.name, DP_RC(rc)); 246 daos_event_fini(&task->ev); 247 return -EINVAL; 248 } 249 250 return nbytes; 251 } 252 253 static void 254 bdev_daos_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 255 bool success) 256 { 257 int64_t rc; 258 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 259 260 if (!success) { 261 bdev_daos_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 262 return; 263 } 264 265 rc = bdev_daos_readv((struct bdev_daos *)bdev_io->bdev->ctxt, 266 dch, 267 (struct bdev_daos_task *)bdev_io->driver_ctx, 268 bdev_io->u.bdev.iovs, 269 bdev_io->u.bdev.iovcnt, 270 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 271 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 272 273 if (rc < 0) { 274 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 275 return; 276 } 277 } 278 279 static void 280 _bdev_daos_get_io_inflight(struct spdk_io_channel_iter *i) 281 { 282 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 283 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 284 int io_inflight = daos_eq_query(dch->queue, DAOS_EQR_WAITING, 0, NULL); 285 286 if (io_inflight > 0) { 287 spdk_for_each_channel_continue(i, -1); 288 return; 289 } 290 291 spdk_for_each_channel_continue(i, 0); 292 } 293 294 static int bdev_daos_reset_retry_timer(void *arg); 295 296 static void 297 _bdev_daos_get_io_inflight_done(struct spdk_io_channel_iter *i, int status) 298 { 299 struct bdev_daos *daos = spdk_io_channel_iter_get_ctx(i); 300 301 if (status == -1) { 302 daos->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_daos_reset_retry_timer, daos, 1000); 303 return; 304 } 305 306 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(daos->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS); 307 } 308 309 static int 310 bdev_daos_reset_retry_timer(void *arg) 311 { 312 struct bdev_daos *daos = arg; 313 314 if (daos->reset_retry_timer) { 315 spdk_poller_unregister(&daos->reset_retry_timer); 316 } 317 318 spdk_for_each_channel(daos, 319 _bdev_daos_get_io_inflight, 320 daos, 321 _bdev_daos_get_io_inflight_done); 322 323 return SPDK_POLLER_BUSY; 324 } 325 326 static void 327 bdev_daos_reset(struct bdev_daos *daos, struct bdev_daos_task *task) 328 { 329 assert(daos != NULL); 330 assert(task != NULL); 331 332 daos->reset_task = task; 333 bdev_daos_reset_retry_timer(daos); 334 } 335 336 337 static int64_t 338 bdev_daos_unmap(struct bdev_daos_io_channel *ch, uint64_t nbytes, 339 uint64_t offset) 340 { 341 SPDK_DEBUGLOG(bdev_daos, "unmap at %#lx with size %#lx\n", offset, nbytes); 342 return dfs_punch(ch->dfs, ch->obj, offset, nbytes); 343 } 344 345 static void 346 _bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 347 { 348 struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch); 349 350 int64_t rc; 351 switch (bdev_io->type) { 352 case SPDK_BDEV_IO_TYPE_READ: 353 spdk_bdev_io_get_buf(bdev_io, bdev_daos_get_buf_cb, 354 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 355 break; 356 357 case SPDK_BDEV_IO_TYPE_WRITE: 358 rc = bdev_daos_writev((struct bdev_daos *)bdev_io->bdev->ctxt, 359 dch, 360 (struct bdev_daos_task *)bdev_io->driver_ctx, 361 bdev_io->u.bdev.iovs, 362 bdev_io->u.bdev.iovcnt, 363 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 364 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 365 if (rc < 0) { 366 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 367 return; 368 } 369 break; 370 371 case SPDK_BDEV_IO_TYPE_RESET: 372 /* Can't cancel in-flight requests, but can wait for their completions */ 373 bdev_daos_reset((struct bdev_daos *)bdev_io->bdev->ctxt, 374 (struct bdev_daos_task *)bdev_io->driver_ctx); 375 break; 376 377 case SPDK_BDEV_IO_TYPE_FLUSH: 378 /* NOOP because DAOS requests land on PMEM and writes are persistent upon completion */ 379 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 380 break; 381 382 case SPDK_BDEV_IO_TYPE_UNMAP: 383 rc = bdev_daos_unmap(dch, 384 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, 385 bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); 386 if (!rc) { 387 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 388 } else { 389 SPDK_DEBUGLOG(bdev_daos, "%s: could not unmap: " DF_RC "\n", 390 dch->disk->disk.name, DP_RC((int)rc)); 391 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 392 } 393 394 break; 395 396 default: 397 SPDK_ERRLOG("Wrong io type\n"); 398 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 399 break; 400 } 401 } 402 403 static void 404 bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 405 { 406 struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx; 407 struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch); 408 409 assert(task != NULL); 410 411 task->submit_td = submit_td; 412 task->bdev_io = bdev_io; 413 414 _bdev_daos_submit_request(ch, bdev_io); 415 } 416 417 #define POLLING_EVENTS_NUM 64 418 419 static int 420 bdev_daos_channel_poll(void *arg) 421 { 422 daos_event_t *evp[POLLING_EVENTS_NUM]; 423 struct bdev_daos_io_channel *ch = arg; 424 425 assert(ch != NULL); 426 assert(ch->disk != NULL); 427 428 int rc = daos_eq_poll(ch->queue, 0, DAOS_EQ_NOWAIT, 429 POLLING_EVENTS_NUM, evp); 430 431 if (rc < 0) { 432 SPDK_DEBUGLOG(bdev_daos, "%s: could not poll daos event queue: " DF_RC "\n", 433 ch->disk->disk.name, DP_RC(rc)); 434 /* 435 * TODO: There are cases when this is self healing, e.g. 436 * brief network issues, DAOS agent restarting etc. 437 * However, if the issue persists over some time better would be 438 * to remove a bdev or the whole controller 439 */ 440 return SPDK_POLLER_BUSY; 441 } 442 443 for (int i = 0; i < rc; ++i) { 444 struct bdev_daos_task *task = container_of(evp[i], struct bdev_daos_task, ev); 445 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS; 446 447 assert(task != NULL); 448 449 if (task->ev.ev_error != DER_SUCCESS) { 450 status = SPDK_BDEV_IO_STATUS_FAILED; 451 } 452 453 daos_event_fini(&task->ev); 454 bdev_daos_io_complete(task->bdev_io, status); 455 } 456 457 return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 458 } 459 460 static bool 461 bdev_daos_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 462 { 463 switch (io_type) { 464 case SPDK_BDEV_IO_TYPE_READ: 465 case SPDK_BDEV_IO_TYPE_WRITE: 466 case SPDK_BDEV_IO_TYPE_RESET: 467 case SPDK_BDEV_IO_TYPE_FLUSH: 468 case SPDK_BDEV_IO_TYPE_UNMAP: 469 return true; 470 471 default: 472 return false; 473 } 474 } 475 476 static struct spdk_io_channel * 477 bdev_daos_get_io_channel(void *ctx) 478 { 479 return spdk_get_io_channel(ctx); 480 } 481 482 static void 483 bdev_daos_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 484 { 485 char uuid_str[SPDK_UUID_STRING_LEN]; 486 struct bdev_daos *daos = bdev->ctxt; 487 488 spdk_json_write_object_begin(w); 489 490 spdk_json_write_named_string(w, "method", "bdev_daos_create"); 491 492 spdk_json_write_named_object_begin(w, "params"); 493 spdk_json_write_named_string(w, "name", bdev->name); 494 spdk_json_write_named_string(w, "pool", daos->pool_name); 495 spdk_json_write_named_string(w, "cont", daos->cont_name); 496 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 497 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 498 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 499 spdk_json_write_named_string(w, "uuid", uuid_str); 500 501 spdk_json_write_object_end(w); 502 503 spdk_json_write_object_end(w); 504 } 505 506 static const struct spdk_bdev_fn_table daos_fn_table = { 507 .destruct = bdev_daos_destruct, 508 .submit_request = bdev_daos_submit_request, 509 .io_type_supported = bdev_daos_io_type_supported, 510 .get_io_channel = bdev_daos_get_io_channel, 511 .write_config_json = bdev_daos_write_json_config, 512 }; 513 514 static void * 515 _bdev_daos_io_channel_create_cb(void *ctx) 516 { 517 int rc = 0 ; 518 struct bdev_daos_io_channel *ch = ctx; 519 struct bdev_daos *daos = ch->disk; 520 521 daos_pool_info_t pinfo; 522 daos_cont_info_t cinfo; 523 524 int fd_oflag = O_CREAT | O_RDWR; 525 mode_t mode = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO; 526 527 rc = bdev_get_daos_engine(); 528 if (rc) { 529 SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 530 return NULL; 531 } 532 533 SPDK_DEBUGLOG(bdev_daos, "connecting to daos pool '%s'\n", daos->pool_name); 534 if ((rc = daos_pool_connect(daos->pool_name, NULL, DAOS_PC_RW, &ch->pool, &pinfo, NULL))) { 535 SPDK_ERRLOG("%s: could not connect to daos pool: " DF_RC "\n", 536 daos->disk.name, DP_RC(rc)); 537 return NULL; 538 } 539 SPDK_DEBUGLOG(bdev_daos, "connecting to daos container '%s'\n", daos->cont_name); 540 if ((rc = daos_cont_open(ch->pool, daos->cont_name, DAOS_COO_RW, &ch->cont, &cinfo, NULL))) { 541 SPDK_ERRLOG("%s: could not open daos container: " DF_RC "\n", 542 daos->disk.name, DP_RC(rc)); 543 goto cleanup_pool; 544 } 545 SPDK_DEBUGLOG(bdev_daos, "mounting daos dfs\n"); 546 if ((rc = dfs_mount(ch->pool, ch->cont, O_RDWR, &ch->dfs))) { 547 SPDK_ERRLOG("%s: could not mount daos dfs: " DF_RC "\n", 548 daos->disk.name, DP_RC(rc)); 549 goto cleanup_cont; 550 } 551 SPDK_DEBUGLOG(bdev_daos, "opening dfs object\n"); 552 if ((rc = dfs_open(ch->dfs, NULL, daos->disk.name, mode, fd_oflag, daos->oclass, 553 0, NULL, &ch->obj))) { 554 SPDK_ERRLOG("%s: could not open dfs object: " DF_RC "\n", 555 daos->disk.name, DP_RC(rc)); 556 goto cleanup_mount; 557 } 558 if ((rc = daos_eq_create(&ch->queue))) { 559 SPDK_ERRLOG("%s: could not create daos event queue: " DF_RC "\n", 560 daos->disk.name, DP_RC(rc)); 561 goto cleanup_obj; 562 } 563 564 return ctx; 565 566 cleanup_obj: 567 dfs_release(ch->obj); 568 cleanup_mount: 569 dfs_umount(ch->dfs); 570 cleanup_cont: 571 daos_cont_close(ch->cont, NULL); 572 cleanup_pool: 573 daos_pool_disconnect(ch->pool, NULL); 574 575 return NULL; 576 } 577 578 static int 579 bdev_daos_io_channel_create_cb(void *io_device, void *ctx_buf) 580 { 581 struct bdev_daos_io_channel *ch = ctx_buf; 582 583 ch->disk = io_device; 584 585 if (spdk_call_unaffinitized(_bdev_daos_io_channel_create_cb, ch) == NULL) { 586 return -EINVAL; 587 } 588 589 SPDK_DEBUGLOG(bdev_daos, "%s: starting daos event queue poller\n", 590 ch->disk->disk.name); 591 592 ch->poller = SPDK_POLLER_REGISTER(bdev_daos_channel_poll, ch, 0); 593 594 return 0; 595 } 596 597 static void 598 bdev_daos_io_channel_destroy_cb(void *io_device, void *ctx_buf) 599 { 600 int rc; 601 struct bdev_daos_io_channel *ch = ctx_buf; 602 603 SPDK_DEBUGLOG(bdev_daos, "stopping daos event queue poller\n"); 604 605 spdk_poller_unregister(&ch->poller); 606 607 if ((rc = daos_eq_destroy(ch->queue, DAOS_EQ_DESTROY_FORCE))) { 608 SPDK_ERRLOG("could not destroy daos event queue: " DF_RC "\n", DP_RC(rc)); 609 } 610 if ((rc = dfs_release(ch->obj))) { 611 SPDK_ERRLOG("could not release dfs object: " DF_RC "\n", DP_RC(rc)); 612 } 613 if ((rc = dfs_umount(ch->dfs))) { 614 SPDK_ERRLOG("could not unmount dfs: " DF_RC "\n", DP_RC(rc)); 615 } 616 if ((rc = daos_cont_close(ch->cont, NULL))) { 617 SPDK_ERRLOG("could not close container: " DF_RC "\n", DP_RC(rc)); 618 } 619 if ((rc = daos_pool_disconnect(ch->pool, NULL))) { 620 SPDK_ERRLOG("could not disconnect from pool: " DF_RC "\n", DP_RC(rc)); 621 } 622 rc = bdev_daos_put_engine(); 623 if (rc) { 624 SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 625 } 626 } 627 628 int 629 create_bdev_daos(struct spdk_bdev **bdev, 630 const char *name, const struct spdk_uuid *uuid, 631 const char *pool, const char *cont, const char *oclass, 632 uint64_t num_blocks, uint32_t block_size) 633 { 634 int rc; 635 size_t len; 636 struct bdev_daos *daos; 637 struct bdev_daos_io_channel ch = {}; 638 639 SPDK_NOTICELOG("%s: creating bdev_daos disk on '%s:%s'\n", name, pool, cont); 640 641 if (num_blocks == 0) { 642 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 643 return -EINVAL; 644 } 645 646 if (block_size % 512) { 647 SPDK_ERRLOG("block size must be 512 bytes aligned\n"); 648 return -EINVAL; 649 } 650 651 if (!name) { 652 SPDK_ERRLOG("device name cannot be empty\n"); 653 return -EINVAL; 654 } 655 656 if (!pool) { 657 SPDK_ERRLOG("daos pool cannot be empty\n"); 658 return -EINVAL; 659 } 660 if (!cont) { 661 SPDK_ERRLOG("daos cont cannot be empty\n"); 662 return -EINVAL; 663 } 664 665 daos = calloc(1, sizeof(*daos)); 666 if (!daos) { 667 SPDK_ERRLOG("calloc() failed\n"); 668 return -ENOMEM; 669 } 670 671 if (!oclass) { 672 oclass = "SX"; /* Max throughput by default */ 673 } 674 daos->oclass = daos_oclass_name2id(oclass); 675 if (daos->oclass == OC_UNKNOWN) { 676 SPDK_ERRLOG("could not parse daos oclass: '%s'\n", oclass); 677 free(daos); 678 return -EINVAL; 679 } 680 681 len = strlen(pool); 682 if (len > DAOS_PROP_LABEL_MAX_LEN) { 683 SPDK_ERRLOG("daos pool name is too long\n"); 684 free(daos); 685 return -EINVAL; 686 } 687 memcpy(daos->pool_name, pool, len); 688 689 len = strlen(cont); 690 if (len > DAOS_PROP_LABEL_MAX_LEN) { 691 SPDK_ERRLOG("daos cont name is too long\n"); 692 free(daos); 693 return -EINVAL; 694 } 695 memcpy(daos->cont_name, cont, len); 696 697 daos->disk.name = strdup(name); 698 daos->disk.product_name = "DAOS bdev"; 699 700 daos->disk.write_cache = 0; 701 daos->disk.blocklen = block_size; 702 daos->disk.blockcnt = num_blocks; 703 704 if (uuid) { 705 daos->disk.uuid = *uuid; 706 } else { 707 spdk_uuid_generate(&daos->disk.uuid); 708 } 709 710 daos->disk.ctxt = daos; 711 daos->disk.fn_table = &daos_fn_table; 712 daos->disk.module = &daos_if; 713 714 rc = bdev_get_daos_engine(); 715 if (rc) { 716 SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc)); 717 bdev_daos_free(daos); 718 return rc; 719 } 720 721 /* We try to connect to the DAOS container during channel creation, so simulate 722 * creating a channel here, so that we can return a failure when the DAOS bdev 723 * is created, instead of finding it out later when the first channel is created 724 * and leaving unusable bdev registered. 725 */ 726 rc = bdev_daos_io_channel_create_cb(daos, &ch); 727 if (rc) { 728 SPDK_ERRLOG("'%s' could not initialize io-channel: %s", name, strerror(-rc)); 729 bdev_daos_free(daos); 730 return rc; 731 } 732 bdev_daos_io_channel_destroy_cb(daos, &ch); 733 734 spdk_io_device_register(daos, bdev_daos_io_channel_create_cb, 735 bdev_daos_io_channel_destroy_cb, 736 sizeof(struct bdev_daos_io_channel), 737 daos->disk.name); 738 739 740 rc = spdk_bdev_register(&daos->disk); 741 if (rc) { 742 spdk_io_device_unregister(daos, NULL); 743 bdev_daos_free(daos); 744 return rc; 745 } 746 747 *bdev = &(daos->disk); 748 749 return rc; 750 } 751 752 static void 753 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) 754 { 755 } 756 757 int 758 bdev_daos_resize(const char *name, const uint64_t new_size_in_mb) 759 { 760 int rc = 0; 761 struct spdk_bdev_desc *desc; 762 struct spdk_bdev *bdev; 763 struct spdk_io_channel *ch; 764 struct bdev_daos_io_channel *dch; 765 uint64_t new_size_in_byte; 766 uint64_t current_size_in_mb; 767 768 rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc); 769 if (rc != 0) { 770 return rc; 771 } 772 773 bdev = spdk_bdev_desc_get_bdev(desc); 774 if (bdev->module != &daos_if) { 775 rc = -EINVAL; 776 goto exit; 777 } 778 779 current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); 780 if (current_size_in_mb > new_size_in_mb) { 781 SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n"); 782 rc = -EINVAL; 783 goto exit; 784 } 785 786 ch = bdev_daos_get_io_channel(bdev); 787 dch = spdk_io_channel_get_ctx(ch); 788 new_size_in_byte = new_size_in_mb * 1024 * 1024; 789 790 rc = dfs_punch(dch->dfs, dch->obj, new_size_in_byte, DFS_MAX_FSIZE); 791 spdk_put_io_channel(ch); 792 if (rc != 0) { 793 SPDK_ERRLOG("failed to resize daos bdev: " DF_RC "\n", DP_RC(rc)); 794 rc = -EINTR; 795 goto exit; 796 } 797 798 SPDK_NOTICELOG("DAOS bdev device is resized: bdev name %s, old block count %" PRIu64 799 ", new block count %" 800 PRIu64 "\n", 801 bdev->name, 802 bdev->blockcnt, 803 new_size_in_byte / bdev->blocklen); 804 rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); 805 if (rc != 0) { 806 SPDK_ERRLOG("failed to notify block cnt change.\n"); 807 } 808 809 exit: 810 spdk_bdev_close(desc); 811 return rc; 812 } 813 814 void 815 delete_bdev_daos(struct spdk_bdev *bdev, spdk_delete_daos_complete cb_fn, void *cb_arg) 816 { 817 if (!bdev || bdev->module != &daos_if) { 818 cb_fn(cb_arg, -ENODEV); 819 return; 820 } 821 822 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 823 } 824 825 static int 826 bdev_get_daos_engine(void) 827 { 828 int rc = 0; 829 830 pthread_mutex_lock(&g_bdev_daos_init_mutex); 831 if (g_bdev_daos_init_count++ > 0) { 832 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 833 return 0; 834 } 835 SPDK_DEBUGLOG(bdev_daos, "initializing DAOS engine\n"); 836 837 rc = daos_init(); 838 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 839 840 if (rc != -DER_ALREADY && rc) { 841 return rc; 842 } 843 return 0; 844 } 845 846 static int 847 bdev_daos_put_engine(void) 848 { 849 int rc = 0; 850 851 pthread_mutex_lock(&g_bdev_daos_init_mutex); 852 if (--g_bdev_daos_init_count > 0) { 853 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 854 return 0; 855 } 856 SPDK_DEBUGLOG(bdev_daos, "de-initializing DAOS engine\n"); 857 858 rc = daos_fini(); 859 pthread_mutex_unlock(&g_bdev_daos_init_mutex); 860 861 return rc; 862 } 863 864 static int 865 bdev_daos_initialize(void) 866 { 867 /* DAOS engine and client initialization happens 868 during the first bdev creation */ 869 return 0; 870 } 871 872 SPDK_LOG_REGISTER_COMPONENT(bdev_daos) 873