xref: /spdk/module/bdev/daos/bdev_daos.c (revision db8083d908563ff6f5fd97a519040dadbc7cbf8b)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) croit GmbH.
5  *   All rights reserved.
6  */
7 
8 #include <sys/queue.h>
9 
10 #include "spdk/bdev.h"
11 #include "spdk/bdev_module.h"
12 #include "spdk/endian.h"
13 #include "spdk/env.h"
14 #include "spdk/json.h"
15 #include "spdk/thread.h"
16 #include "spdk/queue.h"
17 #include "spdk/string.h"
18 #include "spdk/stdinc.h"
19 #include "spdk/log.h"
20 
21 #include <daos.h>
22 #include <daos_event.h>
23 #include <daos_fs.h>
24 #include <daos_types.h>
25 #include <daos_pool.h>
26 #include <daos_cont.h>
27 #include <daos_errno.h>
28 
29 #include "bdev_daos.h"
30 
31 #define BDEV_DAOS_IOVECS_MAX 32
32 
33 struct bdev_daos_task {
34 	daos_event_t ev;
35 	struct spdk_thread *submit_td;
36 	struct spdk_bdev_io *bdev_io;
37 
38 	enum spdk_bdev_io_status status;
39 
40 	uint64_t offset;
41 
42 	/* DAOS version of iovec and scatter/gather */
43 	daos_size_t read_size;
44 	d_iov_t diovs[BDEV_DAOS_IOVECS_MAX];
45 	d_sg_list_t sgl;
46 };
47 
48 struct bdev_daos {
49 	struct spdk_bdev disk;
50 	daos_oclass_id_t oclass;
51 
52 	char pool_name[DAOS_PROP_MAX_LABEL_BUF_LEN];
53 	char cont_name[DAOS_PROP_MAX_LABEL_BUF_LEN];
54 
55 	struct bdev_daos_task *reset_task;
56 	struct spdk_poller    *reset_retry_timer;
57 };
58 
59 struct bdev_daos_io_channel {
60 	struct bdev_daos *disk;
61 	struct spdk_poller *poller;
62 
63 	daos_handle_t pool;
64 	daos_handle_t cont;
65 
66 	dfs_t *dfs;
67 	dfs_obj_t *obj;
68 	daos_handle_t queue;
69 };
70 
71 static uint32_t g_bdev_daos_init_count = 0;
72 static pthread_mutex_t g_bdev_daos_init_mutex = PTHREAD_MUTEX_INITIALIZER;
73 
74 static int bdev_daos_initialize(void);
75 
76 static int bdev_get_daos_engine(void);
77 static int bdev_daos_put_engine(void);
78 
79 static int
80 bdev_daos_get_ctx_size(void)
81 {
82 	return sizeof(struct bdev_daos_task);
83 }
84 
85 static struct spdk_bdev_module daos_if = {
86 	.name = "daos",
87 	.module_init = bdev_daos_initialize,
88 	.get_ctx_size = bdev_daos_get_ctx_size,
89 };
90 
91 SPDK_BDEV_MODULE_REGISTER(daos, &daos_if)
92 
93 static void
94 bdev_daos_free(struct bdev_daos *bdev_daos)
95 {
96 	if (!bdev_daos) {
97 		return;
98 	}
99 
100 	free(bdev_daos->disk.name);
101 	free(bdev_daos);
102 }
103 
104 static void
105 bdev_daos_destruct_cb(void *io_device)
106 {
107 	int rc;
108 	struct bdev_daos *daos = io_device;
109 
110 	assert(daos != NULL);
111 
112 	bdev_daos_free(daos);
113 
114 	rc = bdev_daos_put_engine();
115 	if (rc) {
116 		SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
117 	}
118 }
119 
120 static int
121 bdev_daos_destruct(void *ctx)
122 {
123 	struct bdev_daos *daos = ctx;
124 
125 	SPDK_NOTICELOG("%s: destroying bdev_daos device\n", daos->disk.name);
126 
127 	spdk_io_device_unregister(daos, bdev_daos_destruct_cb);
128 
129 	return 0;
130 }
131 
132 static void
133 _bdev_daos_io_complete(void *bdev_daos_task)
134 {
135 	struct bdev_daos_task *task = bdev_daos_task;
136 
137 	SPDK_DEBUGLOG(bdev_daos, "completed IO at %#lx with status %s\n", task->offset,
138 		      task->status == SPDK_BDEV_IO_STATUS_SUCCESS ? "SUCCESS" : "FAILURE");
139 
140 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
141 }
142 
143 static void
144 bdev_daos_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
145 {
146 	struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx;
147 	struct spdk_thread *current_thread = spdk_get_thread();
148 
149 	assert(task->submit_td != NULL);
150 
151 	task->status = status;
152 	if (task->submit_td != current_thread) {
153 		spdk_thread_send_msg(task->submit_td, _bdev_daos_io_complete, task);
154 	} else {
155 		_bdev_daos_io_complete(task);
156 	}
157 }
158 
159 static int64_t
160 bdev_daos_writev(struct bdev_daos *daos, struct bdev_daos_io_channel *ch,
161 		 struct bdev_daos_task *task,
162 		 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
163 {
164 	int rc;
165 	struct iovec *io = iov;
166 
167 	SPDK_DEBUGLOG(bdev_daos, "write %d iovs size %lu to off: %#lx\n",
168 		      iovcnt, nbytes, offset);
169 
170 	assert(ch != NULL);
171 	assert(daos != NULL);
172 	assert(task != NULL);
173 	assert(iov != NULL);
174 
175 	if (iovcnt > BDEV_DAOS_IOVECS_MAX) {
176 		SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt,
177 			    BDEV_DAOS_IOVECS_MAX);
178 		return -E2BIG;
179 	}
180 
181 	if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) {
182 		SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n",
183 			    daos->disk.name, DP_RC(rc));
184 		return -EINVAL;
185 	}
186 
187 	for (int i = 0; i < iovcnt; i++, iov++) {
188 		d_iov_set(&(task->diovs[i]), io->iov_base, io->iov_len);
189 	}
190 
191 	task->sgl.sg_nr = iovcnt;
192 	task->sgl.sg_nr_out = 0;
193 	task->sgl.sg_iovs = task->diovs;
194 	task->offset = offset;
195 
196 	if ((rc = dfs_write(ch->dfs, ch->obj, &task->sgl, offset, &task->ev))) {
197 		SPDK_ERRLOG("%s: could not start async write: " DF_RC "\n",
198 			    daos->disk.name, DP_RC(rc));
199 		daos_event_fini(&task->ev);
200 		return -EINVAL;
201 	}
202 
203 	return nbytes;
204 }
205 
206 static int64_t
207 bdev_daos_readv(struct bdev_daos *daos, struct bdev_daos_io_channel *ch,
208 		struct bdev_daos_task *task,
209 		struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
210 {
211 	int rc;
212 	struct iovec *io = iov;
213 
214 	SPDK_DEBUGLOG(bdev_daos, "read %d iovs size %lu to off: %#lx\n",
215 		      iovcnt, nbytes, offset);
216 
217 	assert(ch != NULL);
218 	assert(daos != NULL);
219 	assert(task != NULL);
220 	assert(iov != NULL);
221 
222 	if (iovcnt > BDEV_DAOS_IOVECS_MAX) {
223 		SPDK_ERRLOG("iovs number [%d] exceeds max allowed limit [%d]\n", iovcnt,
224 			    BDEV_DAOS_IOVECS_MAX);
225 		return -E2BIG;
226 	}
227 
228 	if ((rc = daos_event_init(&task->ev, ch->queue, NULL))) {
229 		SPDK_ERRLOG("%s: could not initialize async event: " DF_RC "\n",
230 			    daos->disk.name, DP_RC(rc));
231 		return -EINVAL;
232 	}
233 
234 	for (int i = 0; i < iovcnt; i++, io++) {
235 		d_iov_set(&(task->diovs[i]), io->iov_base, io->iov_len);
236 	}
237 
238 	task->sgl.sg_nr = iovcnt;
239 	task->sgl.sg_nr_out = 0;
240 	task->sgl.sg_iovs = task->diovs;
241 	task->offset = offset;
242 
243 	if ((rc = dfs_read(ch->dfs, ch->obj, &task->sgl, offset, &task->read_size, &task->ev))) {
244 		SPDK_ERRLOG("%s: could not start async read: " DF_RC "\n",
245 			    daos->disk.name, DP_RC(rc));
246 		daos_event_fini(&task->ev);
247 		return -EINVAL;
248 	}
249 
250 	return nbytes;
251 }
252 
253 static void
254 bdev_daos_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
255 		     bool success)
256 {
257 	int64_t rc;
258 	struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
259 
260 	if (!success) {
261 		bdev_daos_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
262 		return;
263 	}
264 
265 	rc = bdev_daos_readv((struct bdev_daos *)bdev_io->bdev->ctxt,
266 			     dch,
267 			     (struct bdev_daos_task *)bdev_io->driver_ctx,
268 			     bdev_io->u.bdev.iovs,
269 			     bdev_io->u.bdev.iovcnt,
270 			     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
271 			     bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
272 
273 	if (rc < 0) {
274 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
275 		return;
276 	}
277 }
278 
279 static void
280 _bdev_daos_get_io_inflight(struct spdk_io_channel_iter *i)
281 {
282 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
283 	struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
284 	int io_inflight = daos_eq_query(dch->queue, DAOS_EQR_WAITING, 0, NULL);
285 
286 	if (io_inflight > 0) {
287 		spdk_for_each_channel_continue(i, -1);
288 		return;
289 	}
290 
291 	spdk_for_each_channel_continue(i, 0);
292 }
293 
294 static int bdev_daos_reset_retry_timer(void *arg);
295 
296 static void
297 _bdev_daos_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
298 {
299 	struct bdev_daos *daos = spdk_io_channel_iter_get_ctx(i);
300 
301 	if (status == -1) {
302 		daos->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_daos_reset_retry_timer, daos, 1000);
303 		return;
304 	}
305 
306 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(daos->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
307 }
308 
309 static int
310 bdev_daos_reset_retry_timer(void *arg)
311 {
312 	struct bdev_daos *daos = arg;
313 
314 	if (daos->reset_retry_timer) {
315 		spdk_poller_unregister(&daos->reset_retry_timer);
316 	}
317 
318 	spdk_for_each_channel(daos,
319 			      _bdev_daos_get_io_inflight,
320 			      daos,
321 			      _bdev_daos_get_io_inflight_done);
322 
323 	return SPDK_POLLER_BUSY;
324 }
325 
326 static void
327 bdev_daos_reset(struct bdev_daos *daos, struct bdev_daos_task *task)
328 {
329 	assert(daos != NULL);
330 	assert(task != NULL);
331 
332 	daos->reset_task = task;
333 	bdev_daos_reset_retry_timer(daos);
334 }
335 
336 
337 static int64_t
338 bdev_daos_unmap(struct bdev_daos_io_channel *ch, uint64_t nbytes,
339 		uint64_t offset)
340 {
341 	SPDK_DEBUGLOG(bdev_daos, "unmap at %#lx with size %#lx\n", offset, nbytes);
342 	return dfs_punch(ch->dfs, ch->obj, offset, nbytes);
343 }
344 
345 static void
346 _bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
347 {
348 	struct bdev_daos_io_channel *dch = spdk_io_channel_get_ctx(ch);
349 
350 	int64_t rc;
351 	switch (bdev_io->type) {
352 	case SPDK_BDEV_IO_TYPE_READ:
353 		spdk_bdev_io_get_buf(bdev_io, bdev_daos_get_buf_cb,
354 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
355 		break;
356 
357 	case SPDK_BDEV_IO_TYPE_WRITE:
358 		rc = bdev_daos_writev((struct bdev_daos *)bdev_io->bdev->ctxt,
359 				      dch,
360 				      (struct bdev_daos_task *)bdev_io->driver_ctx,
361 				      bdev_io->u.bdev.iovs,
362 				      bdev_io->u.bdev.iovcnt,
363 				      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
364 				      bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
365 		if (rc < 0) {
366 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
367 			return;
368 		}
369 		break;
370 
371 	case SPDK_BDEV_IO_TYPE_RESET:
372 		/* Can't cancel in-flight requests, but can wait for their completions */
373 		bdev_daos_reset((struct bdev_daos *)bdev_io->bdev->ctxt,
374 				(struct bdev_daos_task *)bdev_io->driver_ctx);
375 		break;
376 
377 	case SPDK_BDEV_IO_TYPE_FLUSH:
378 		/* NOOP because DAOS requests land on PMEM and writes are persistent upon completion */
379 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
380 		break;
381 
382 	case SPDK_BDEV_IO_TYPE_UNMAP:
383 		rc = bdev_daos_unmap(dch,
384 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
385 				     bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
386 		if (!rc) {
387 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
388 		} else {
389 			SPDK_DEBUGLOG(bdev_daos, "%s: could not unmap: " DF_RC "\n",
390 				      dch->disk->disk.name, DP_RC((int)rc));
391 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
392 		}
393 
394 		break;
395 
396 	default:
397 		SPDK_ERRLOG("Wrong io type\n");
398 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
399 		break;
400 	}
401 }
402 
403 static void
404 bdev_daos_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
405 {
406 	struct bdev_daos_task *task = (struct bdev_daos_task *)bdev_io->driver_ctx;
407 	struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
408 
409 	assert(task != NULL);
410 
411 	task->submit_td = submit_td;
412 	task->bdev_io = bdev_io;
413 
414 	_bdev_daos_submit_request(ch, bdev_io);
415 }
416 
417 #define POLLING_EVENTS_NUM 64
418 
419 static int
420 bdev_daos_channel_poll(void *arg)
421 {
422 	daos_event_t *evp[POLLING_EVENTS_NUM];
423 	struct bdev_daos_io_channel *ch = arg;
424 
425 	assert(ch != NULL);
426 	assert(ch->disk != NULL);
427 
428 	int rc = daos_eq_poll(ch->queue, 0, DAOS_EQ_NOWAIT,
429 			      POLLING_EVENTS_NUM, evp);
430 
431 	if (rc < 0) {
432 		SPDK_DEBUGLOG(bdev_daos, "%s: could not poll daos event queue: " DF_RC "\n",
433 			      ch->disk->disk.name, DP_RC(rc));
434 		/*
435 		 * TODO: There are cases when this is self healing, e.g.
436 		 * brief network issues, DAOS agent restarting etc.
437 		 * However, if the issue persists over some time better would be
438 		 * to remove a bdev or the whole controller
439 		 */
440 		return SPDK_POLLER_BUSY;
441 	}
442 
443 	for (int i = 0; i < rc; ++i) {
444 		struct bdev_daos_task *task = container_of(evp[i], struct bdev_daos_task, ev);
445 		enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
446 
447 		assert(task != NULL);
448 
449 		if (task->ev.ev_error != DER_SUCCESS) {
450 			status = SPDK_BDEV_IO_STATUS_FAILED;
451 		}
452 
453 		daos_event_fini(&task->ev);
454 		bdev_daos_io_complete(task->bdev_io, status);
455 	}
456 
457 	return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
458 }
459 
460 static bool
461 bdev_daos_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
462 {
463 	switch (io_type) {
464 	case SPDK_BDEV_IO_TYPE_READ:
465 	case SPDK_BDEV_IO_TYPE_WRITE:
466 	case SPDK_BDEV_IO_TYPE_RESET:
467 	case SPDK_BDEV_IO_TYPE_FLUSH:
468 	case SPDK_BDEV_IO_TYPE_UNMAP:
469 		return true;
470 
471 	default:
472 		return false;
473 	}
474 }
475 
476 static struct spdk_io_channel *
477 bdev_daos_get_io_channel(void *ctx)
478 {
479 	return spdk_get_io_channel(ctx);
480 }
481 
482 static void
483 bdev_daos_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
484 {
485 	char uuid_str[SPDK_UUID_STRING_LEN];
486 	struct bdev_daos *daos = bdev->ctxt;
487 
488 	spdk_json_write_object_begin(w);
489 
490 	spdk_json_write_named_string(w, "method", "bdev_daos_create");
491 
492 	spdk_json_write_named_object_begin(w, "params");
493 	spdk_json_write_named_string(w, "name", bdev->name);
494 	spdk_json_write_named_string(w, "pool", daos->pool_name);
495 	spdk_json_write_named_string(w, "cont", daos->cont_name);
496 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
497 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
498 	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
499 	spdk_json_write_named_string(w, "uuid", uuid_str);
500 
501 	spdk_json_write_object_end(w);
502 
503 	spdk_json_write_object_end(w);
504 }
505 
506 static const struct spdk_bdev_fn_table daos_fn_table = {
507 	.destruct		= bdev_daos_destruct,
508 	.submit_request		= bdev_daos_submit_request,
509 	.io_type_supported	= bdev_daos_io_type_supported,
510 	.get_io_channel		= bdev_daos_get_io_channel,
511 	.write_config_json	= bdev_daos_write_json_config,
512 };
513 
514 static void *
515 _bdev_daos_io_channel_create_cb(void *ctx)
516 {
517 	int rc = 0 ;
518 	struct bdev_daos_io_channel *ch = ctx;
519 	struct bdev_daos *daos = ch->disk;
520 
521 	daos_pool_info_t pinfo;
522 	daos_cont_info_t cinfo;
523 
524 	int fd_oflag = O_CREAT | O_RDWR;
525 	mode_t mode = S_IFREG | S_IRWXU | S_IRWXG | S_IRWXO;
526 
527 	rc = bdev_get_daos_engine();
528 	if (rc) {
529 		SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
530 		return NULL;
531 	}
532 
533 	SPDK_DEBUGLOG(bdev_daos, "connecting to daos pool '%s'\n", daos->pool_name);
534 	if ((rc = daos_pool_connect(daos->pool_name, NULL, DAOS_PC_RW, &ch->pool, &pinfo, NULL))) {
535 		SPDK_ERRLOG("%s: could not connect to daos pool: " DF_RC "\n",
536 			    daos->disk.name, DP_RC(rc));
537 		return NULL;
538 	}
539 	SPDK_DEBUGLOG(bdev_daos, "connecting to daos container '%s'\n", daos->cont_name);
540 	if ((rc = daos_cont_open(ch->pool, daos->cont_name, DAOS_COO_RW, &ch->cont, &cinfo, NULL))) {
541 		SPDK_ERRLOG("%s: could not open daos container: " DF_RC "\n",
542 			    daos->disk.name, DP_RC(rc));
543 		goto cleanup_pool;
544 	}
545 	SPDK_DEBUGLOG(bdev_daos, "mounting daos dfs\n");
546 	if ((rc = dfs_mount(ch->pool, ch->cont, O_RDWR, &ch->dfs))) {
547 		SPDK_ERRLOG("%s: could not mount daos dfs: " DF_RC "\n",
548 			    daos->disk.name, DP_RC(rc));
549 		goto cleanup_cont;
550 	}
551 	SPDK_DEBUGLOG(bdev_daos, "opening dfs object\n");
552 	if ((rc = dfs_open(ch->dfs, NULL, daos->disk.name, mode, fd_oflag, daos->oclass,
553 			   0, NULL, &ch->obj))) {
554 		SPDK_ERRLOG("%s: could not open dfs object: " DF_RC "\n",
555 			    daos->disk.name, DP_RC(rc));
556 		goto cleanup_mount;
557 	}
558 	if ((rc = daos_eq_create(&ch->queue))) {
559 		SPDK_ERRLOG("%s: could not create daos event queue: " DF_RC "\n",
560 			    daos->disk.name, DP_RC(rc));
561 		goto cleanup_obj;
562 	}
563 
564 	return ctx;
565 
566 cleanup_obj:
567 	dfs_release(ch->obj);
568 cleanup_mount:
569 	dfs_umount(ch->dfs);
570 cleanup_cont:
571 	daos_cont_close(ch->cont, NULL);
572 cleanup_pool:
573 	daos_pool_disconnect(ch->pool, NULL);
574 
575 	return NULL;
576 }
577 
578 static int
579 bdev_daos_io_channel_create_cb(void *io_device, void *ctx_buf)
580 {
581 	struct bdev_daos_io_channel *ch = ctx_buf;
582 
583 	ch->disk = io_device;
584 
585 	if (spdk_call_unaffinitized(_bdev_daos_io_channel_create_cb, ch) == NULL) {
586 		return EINVAL;
587 	}
588 
589 	SPDK_DEBUGLOG(bdev_daos, "%s: starting daos event queue poller\n",
590 		      ch->disk->disk.name);
591 
592 	ch->poller = SPDK_POLLER_REGISTER(bdev_daos_channel_poll, ch, 0);
593 
594 	return 0;
595 }
596 
597 static void
598 bdev_daos_io_channel_destroy_cb(void *io_device, void *ctx_buf)
599 {
600 	int rc;
601 	struct bdev_daos_io_channel *ch = ctx_buf;
602 
603 	SPDK_DEBUGLOG(bdev_daos, "stopping daos event queue poller\n");
604 
605 	spdk_poller_unregister(&ch->poller);
606 
607 	if ((rc = daos_eq_destroy(ch->queue, DAOS_EQ_DESTROY_FORCE))) {
608 		SPDK_ERRLOG("could not destroy daos event queue: " DF_RC "\n", DP_RC(rc));
609 	}
610 	if ((rc = dfs_release(ch->obj))) {
611 		SPDK_ERRLOG("could not release dfs object: " DF_RC "\n", DP_RC(rc));
612 	}
613 	if ((rc = dfs_umount(ch->dfs))) {
614 		SPDK_ERRLOG("could not unmount dfs: " DF_RC "\n", DP_RC(rc));
615 	}
616 	if ((rc = daos_cont_close(ch->cont, NULL))) {
617 		SPDK_ERRLOG("could not close container: " DF_RC "\n", DP_RC(rc));
618 	}
619 	if ((rc = daos_pool_disconnect(ch->pool, NULL))) {
620 		SPDK_ERRLOG("could not disconnect from pool: " DF_RC "\n", DP_RC(rc));
621 	}
622 	rc = bdev_daos_put_engine();
623 	if (rc) {
624 		SPDK_ERRLOG("could not de-initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
625 	}
626 }
627 
628 int
629 create_bdev_daos(struct spdk_bdev **bdev,
630 		 const char *name, const struct spdk_uuid *uuid,
631 		 const char *pool, const char *cont, const char *oclass,
632 		 uint64_t num_blocks, uint32_t block_size)
633 {
634 	int rc;
635 	size_t len;
636 	struct bdev_daos *daos;
637 
638 	SPDK_NOTICELOG("%s: creating bdev_daos disk on '%s:%s'\n", name, pool, cont);
639 
640 	if (num_blocks == 0) {
641 		SPDK_ERRLOG("Disk num_blocks must be greater than 0");
642 		return -EINVAL;
643 	}
644 
645 	if (block_size % 512) {
646 		SPDK_ERRLOG("block size must be 512 bytes aligned\n");
647 		return -EINVAL;
648 	}
649 
650 	if (!name) {
651 		SPDK_ERRLOG("device name cannot be empty\n");
652 		return -EINVAL;
653 	}
654 
655 	if (!pool) {
656 		SPDK_ERRLOG("daos pool cannot be empty\n");
657 		return -EINVAL;
658 	}
659 	if (!cont) {
660 		SPDK_ERRLOG("daos cont cannot be empty\n");
661 		return -EINVAL;
662 	}
663 
664 	daos = calloc(1, sizeof(*daos));
665 	if (!daos) {
666 		SPDK_ERRLOG("calloc() failed\n");
667 		return -ENOMEM;
668 	}
669 
670 	if (!oclass) {
671 		oclass = "SX"; /* Max throughput by default */
672 	}
673 	daos->oclass = daos_oclass_name2id(oclass);
674 	if (daos->oclass == OC_UNKNOWN) {
675 		SPDK_ERRLOG("could not parse daos oclass: '%s'\n", oclass);
676 		free(daos);
677 		return -EINVAL;
678 	}
679 
680 	len = strlen(pool);
681 	if (len > DAOS_PROP_LABEL_MAX_LEN) {
682 		SPDK_ERRLOG("daos pool name is too long\n");
683 		free(daos);
684 		return -EINVAL;
685 	}
686 	memcpy(daos->pool_name, pool, len);
687 
688 	len = strlen(cont);
689 	if (len > DAOS_PROP_LABEL_MAX_LEN) {
690 		SPDK_ERRLOG("daos cont name is too long\n");
691 		free(daos);
692 		return -EINVAL;
693 	}
694 	memcpy(daos->cont_name, cont, len);
695 
696 	daos->disk.name = strdup(name);
697 	daos->disk.product_name = "DAOS bdev";
698 
699 	daos->disk.write_cache = 0;
700 	daos->disk.blocklen = block_size;
701 	daos->disk.blockcnt = num_blocks;
702 
703 	if (uuid) {
704 		daos->disk.uuid = *uuid;
705 	} else {
706 		spdk_uuid_generate(&daos->disk.uuid);
707 	}
708 
709 	daos->disk.ctxt = daos;
710 	daos->disk.fn_table = &daos_fn_table;
711 	daos->disk.module = &daos_if;
712 
713 	rc = bdev_get_daos_engine();
714 	if (rc) {
715 		SPDK_ERRLOG("could not initialize DAOS engine: " DF_RC "\n", DP_RC(rc));
716 		bdev_daos_free(daos);
717 		return rc;
718 	}
719 
720 	spdk_io_device_register(daos, bdev_daos_io_channel_create_cb,
721 				bdev_daos_io_channel_destroy_cb,
722 				sizeof(struct bdev_daos_io_channel),
723 				daos->disk.name);
724 
725 
726 	rc = spdk_bdev_register(&daos->disk);
727 	if (rc) {
728 		spdk_io_device_unregister(daos, NULL);
729 		bdev_daos_free(daos);
730 		return rc;
731 	}
732 
733 	*bdev = &(daos->disk);
734 
735 	return rc;
736 }
737 
738 static void
739 dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
740 {
741 }
742 
743 int
744 bdev_daos_resize(const char *name, const uint64_t new_size_in_mb)
745 {
746 	int rc = 0;
747 	struct spdk_bdev_desc *desc;
748 	struct spdk_bdev *bdev;
749 	struct spdk_io_channel *ch;
750 	struct bdev_daos_io_channel *dch;
751 	uint64_t new_size_in_byte;
752 	uint64_t current_size_in_mb;
753 
754 	rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
755 	if (rc != 0) {
756 		return rc;
757 	}
758 
759 	bdev = spdk_bdev_desc_get_bdev(desc);
760 	if (bdev->module != &daos_if) {
761 		rc = -EINVAL;
762 		goto exit;
763 	}
764 
765 	current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
766 	if (current_size_in_mb > new_size_in_mb) {
767 		SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
768 		rc = -EINVAL;
769 		goto exit;
770 	}
771 
772 	ch = bdev_daos_get_io_channel(bdev);
773 	dch = spdk_io_channel_get_ctx(ch);
774 	new_size_in_byte = new_size_in_mb * 1024 * 1024;
775 
776 	rc = dfs_punch(dch->dfs, dch->obj, new_size_in_byte, DFS_MAX_FSIZE);
777 	spdk_put_io_channel(ch);
778 	if (rc != 0) {
779 		SPDK_ERRLOG("failed to resize daos bdev: " DF_RC "\n", DP_RC(rc));
780 		rc = -EINTR;
781 		goto exit;
782 	}
783 
784 	SPDK_NOTICELOG("DAOS bdev device is resized: bdev name %s, old block count %" PRIu64
785 		       ", new block count %"
786 		       PRIu64 "\n",
787 		       bdev->name,
788 		       bdev->blockcnt,
789 		       new_size_in_byte / bdev->blocklen);
790 	rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
791 	if (rc != 0) {
792 		SPDK_ERRLOG("failed to notify block cnt change.\n");
793 	}
794 
795 exit:
796 	spdk_bdev_close(desc);
797 	return rc;
798 }
799 
800 void
801 delete_bdev_daos(struct spdk_bdev *bdev, spdk_delete_daos_complete cb_fn, void *cb_arg)
802 {
803 	if (!bdev || bdev->module != &daos_if) {
804 		cb_fn(cb_arg, -ENODEV);
805 		return;
806 	}
807 
808 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
809 }
810 
811 static int
812 bdev_get_daos_engine(void)
813 {
814 	int rc = 0;
815 
816 	pthread_mutex_lock(&g_bdev_daos_init_mutex);
817 	if (g_bdev_daos_init_count++ > 0) {
818 		pthread_mutex_unlock(&g_bdev_daos_init_mutex);
819 		return 0;
820 	}
821 	SPDK_DEBUGLOG(bdev_daos, "initializing DAOS engine\n");
822 
823 	rc = daos_init();
824 	pthread_mutex_unlock(&g_bdev_daos_init_mutex);
825 
826 	if (rc != -DER_ALREADY && rc) {
827 		return rc;
828 	}
829 	return 0;
830 }
831 
832 static int
833 bdev_daos_put_engine(void)
834 {
835 	int rc = 0;
836 
837 	pthread_mutex_lock(&g_bdev_daos_init_mutex);
838 	if (--g_bdev_daos_init_count > 0) {
839 		pthread_mutex_unlock(&g_bdev_daos_init_mutex);
840 		return 0;
841 	}
842 	SPDK_DEBUGLOG(bdev_daos, "de-initializing DAOS engine\n");
843 
844 	rc = daos_fini();
845 	pthread_mutex_unlock(&g_bdev_daos_init_mutex);
846 
847 	return rc;
848 }
849 
850 static int
851 bdev_daos_initialize(void)
852 {
853 	/* DAOS engine and client initialization happens
854 	   during the first bdev creation */
855 	return 0;
856 }
857 
858 SPDK_LOG_REGISTER_COMPONENT(bdev_daos)
859