xref: /spdk/module/bdev/compress/vbdev_compress.c (revision 45379ed84341f94a6e1ec3eab4cc1f9c219d3e90)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2018 Intel Corporation.
307fe6a43SSeth Howell  *   All rights reserved.
48d48071aSAlexey Marchuk  *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
507fe6a43SSeth Howell  */
607fe6a43SSeth Howell 
707fe6a43SSeth Howell #include "vbdev_compress.h"
807fe6a43SSeth Howell 
907fe6a43SSeth Howell #include "spdk/reduce.h"
1007fe6a43SSeth Howell #include "spdk/stdinc.h"
1107fe6a43SSeth Howell #include "spdk/rpc.h"
1207fe6a43SSeth Howell #include "spdk/env.h"
1307fe6a43SSeth Howell #include "spdk/endian.h"
1407fe6a43SSeth Howell #include "spdk/string.h"
1507fe6a43SSeth Howell #include "spdk/thread.h"
1607fe6a43SSeth Howell #include "spdk/util.h"
1707fe6a43SSeth Howell #include "spdk/bdev_module.h"
18ce49d2f9SAlexey Marchuk #include "spdk/likely.h"
194e8e97c8STomasz Zawadzki #include "spdk/log.h"
20bb5083a8Spaul luse #include "spdk/accel.h"
2107fe6a43SSeth Howell 
225d2d59beSKonrad Sztyber #include "spdk/accel_module.h"
23ec2e6e2bSpaul luse 
2407fe6a43SSeth Howell #define CHUNK_SIZE (1024 * 16)
2507fe6a43SSeth Howell #define COMP_BDEV_NAME "compress"
2607fe6a43SSeth Howell #define BACKING_IO_SZ (4 * 1024)
2707fe6a43SSeth Howell 
28d83e87f8SKrzysztof Karas /* This namespace UUID was generated using uuid_generate() method. */
29d83e87f8SKrzysztof Karas #define BDEV_COMPRESS_NAMESPACE_UUID "c3fad6da-832f-4cc0-9cdc-5c552b225e7b"
30d83e87f8SKrzysztof Karas 
31fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx {
32fcf8e454SShuhei Matsumoto 	spdk_delete_compress_complete	cb_fn;
33fcf8e454SShuhei Matsumoto 	void				*cb_arg;
34fcf8e454SShuhei Matsumoto 	int				cb_rc;
35fcf8e454SShuhei Matsumoto 	struct spdk_thread		*orig_thread;
36fcf8e454SShuhei Matsumoto };
37fcf8e454SShuhei Matsumoto 
3807fe6a43SSeth Howell /* List of virtual bdevs and associated info for each. */
3907fe6a43SSeth Howell struct vbdev_compress {
4007fe6a43SSeth Howell 	struct spdk_bdev		*base_bdev;	/* the thing we're attaching to */
4107fe6a43SSeth Howell 	struct spdk_bdev_desc		*base_desc;	/* its descriptor we get from open */
4207fe6a43SSeth Howell 	struct spdk_io_channel		*base_ch;	/* IO channel of base device */
4307fe6a43SSeth Howell 	struct spdk_bdev		comp_bdev;	/* the compression virtual bdev */
4407fe6a43SSeth Howell 	struct comp_io_channel		*comp_ch;	/* channel associated with this bdev */
45bb5083a8Spaul luse 	struct spdk_io_channel		*accel_channel;	/* to communicate with the accel framework */
4607fe6a43SSeth Howell 	struct spdk_thread		*reduce_thread;
4707fe6a43SSeth Howell 	pthread_mutex_t			reduce_lock;
4807fe6a43SSeth Howell 	uint32_t			ch_count;
4907fe6a43SSeth Howell 	TAILQ_HEAD(, spdk_bdev_io)	pending_comp_ios;	/* outstanding operations to a comp library */
5007fe6a43SSeth Howell 	struct spdk_poller		*poller;	/* completion poller */
5107fe6a43SSeth Howell 	struct spdk_reduce_vol_params	params;		/* params for the reduce volume */
5207fe6a43SSeth Howell 	struct spdk_reduce_backing_dev	backing_dev;	/* backing device info for the reduce volume */
5307fe6a43SSeth Howell 	struct spdk_reduce_vol		*vol;		/* the reduce volume */
54fcf8e454SShuhei Matsumoto 	struct vbdev_comp_delete_ctx	*delete_ctx;
5507fe6a43SSeth Howell 	bool				orphaned;	/* base bdev claimed but comp_bdev not registered */
5685724ba2SShuhei Matsumoto 	int				reduce_errno;
5707fe6a43SSeth Howell 	TAILQ_HEAD(, vbdev_comp_op)	queued_comp_ops;
5807fe6a43SSeth Howell 	TAILQ_ENTRY(vbdev_compress)	link;
59b3be320dSGangCao 	struct spdk_thread		*thread;	/* thread where base device is opened */
60ddd4603cSYankun Li 	enum spdk_accel_comp_algo       comp_algo;      /* compression algorithm for compress bdev */
61ddd4603cSYankun Li 	uint32_t                        comp_level;     /* compression algorithm level */
62*45379ed8SYankun Li 	bool				init_failed;	/* compress bdev initialization failed */
6307fe6a43SSeth Howell };
6407fe6a43SSeth Howell static TAILQ_HEAD(, vbdev_compress) g_vbdev_comp = TAILQ_HEAD_INITIALIZER(g_vbdev_comp);
6507fe6a43SSeth Howell 
6607fe6a43SSeth Howell /* The comp vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
6707fe6a43SSeth Howell  */
6807fe6a43SSeth Howell struct comp_io_channel {
6907fe6a43SSeth Howell 	struct spdk_io_channel_iter	*iter;	/* used with for_each_channel in reset */
7007fe6a43SSeth Howell };
7107fe6a43SSeth Howell 
7207fe6a43SSeth Howell /* Per I/O context for the compression vbdev. */
7307fe6a43SSeth Howell struct comp_bdev_io {
7407fe6a43SSeth Howell 	struct comp_io_channel		*comp_ch;		/* used in completion handling */
7507fe6a43SSeth Howell 	struct vbdev_compress		*comp_bdev;		/* vbdev associated with this IO */
7607fe6a43SSeth Howell 	struct spdk_bdev_io_wait_entry	bdev_io_wait;		/* for bdev_io_wait */
7707fe6a43SSeth Howell 	struct spdk_bdev_io		*orig_io;		/* the original IO */
7807fe6a43SSeth Howell 	int				status;			/* save for completion on orig thread */
7907fe6a43SSeth Howell };
8007fe6a43SSeth Howell 
8107fe6a43SSeth Howell static void vbdev_compress_examine(struct spdk_bdev *bdev);
82c3ed33f4SShuhei Matsumoto static int vbdev_compress_claim(struct vbdev_compress *comp_bdev);
83ddd4603cSYankun Li struct vbdev_compress *_prepare_for_load_init(struct spdk_bdev_desc *bdev_desc, uint32_t lb_size,
84ddd4603cSYankun Li 		uint8_t comp_algo, uint32_t comp_level);
8507fe6a43SSeth Howell static void vbdev_compress_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
8607fe6a43SSeth Howell static void comp_bdev_ch_destroy_cb(void *io_device, void *ctx_buf);
87fcf8e454SShuhei Matsumoto static void vbdev_compress_delete_done(void *cb_arg, int bdeverrno);
884e94e54eSYankun Li static void _comp_reduce_resubmit_backing_io(void *_backing_io);
8907fe6a43SSeth Howell 
9007fe6a43SSeth Howell /* for completing rw requests on the orig IO thread. */
9107fe6a43SSeth Howell static void
92347499e7SSeth Howell _reduce_rw_blocks_cb(void *arg)
9307fe6a43SSeth Howell {
9407fe6a43SSeth Howell 	struct comp_bdev_io *io_ctx = arg;
9507fe6a43SSeth Howell 
96ce49d2f9SAlexey Marchuk 	if (spdk_likely(io_ctx->status == 0)) {
9707fe6a43SSeth Howell 		spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
98ce49d2f9SAlexey Marchuk 	} else if (io_ctx->status == -ENOMEM) {
993b207588SYankun Li 		spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_NOMEM);
10007fe6a43SSeth Howell 	} else {
1015251fc2dSYankun Li 		SPDK_ERRLOG("Failed to execute reduce api. %s\n", spdk_strerror(-io_ctx->status));
10207fe6a43SSeth Howell 		spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_FAILED);
10307fe6a43SSeth Howell 	}
10407fe6a43SSeth Howell }
10507fe6a43SSeth Howell 
10607fe6a43SSeth Howell /* Completion callback for r/w that were issued via reducelib. */
10707fe6a43SSeth Howell static void
108347499e7SSeth Howell reduce_rw_blocks_cb(void *arg, int reduce_errno)
10907fe6a43SSeth Howell {
11007fe6a43SSeth Howell 	struct spdk_bdev_io *bdev_io = arg;
11107fe6a43SSeth Howell 	struct comp_bdev_io *io_ctx = (struct comp_bdev_io *)bdev_io->driver_ctx;
11207fe6a43SSeth Howell 	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(io_ctx->comp_ch);
11329252a48SShuhei Matsumoto 	struct spdk_thread *orig_thread;
11407fe6a43SSeth Howell 
11507fe6a43SSeth Howell 	/* TODO: need to decide which error codes are bdev_io success vs failure;
11607fe6a43SSeth Howell 	 * example examine calls reading metadata */
11707fe6a43SSeth Howell 
11807fe6a43SSeth Howell 	io_ctx->status = reduce_errno;
11907fe6a43SSeth Howell 
12007fe6a43SSeth Howell 	/* Send this request to the orig IO thread. */
12129252a48SShuhei Matsumoto 	orig_thread = spdk_io_channel_get_thread(ch);
1225f270928SJohn Levon 
1235f270928SJohn Levon 	spdk_thread_exec_msg(orig_thread, _reduce_rw_blocks_cb, io_ctx);
12407fe6a43SSeth Howell }
12507fe6a43SSeth Howell 
126bfd7fcb8SAlexey Marchuk static int
12707fe6a43SSeth Howell _compress_operation(struct spdk_reduce_backing_dev *backing_dev, struct iovec *src_iovs,
12807fe6a43SSeth Howell 		    int src_iovcnt, struct iovec *dst_iovs,
12907fe6a43SSeth Howell 		    int dst_iovcnt, bool compress, void *cb_arg)
13007fe6a43SSeth Howell {
131bb5083a8Spaul luse 	struct spdk_reduce_vol_cb_args *reduce_cb_arg = cb_arg;
13207fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_dev, struct vbdev_compress,
13307fe6a43SSeth Howell 					   backing_dev);
134bb5083a8Spaul luse 	int rc;
13507fe6a43SSeth Howell 
136bb5083a8Spaul luse 	if (compress) {
137bb5083a8Spaul luse 		assert(dst_iovcnt == 1);
138ddd4603cSYankun Li 		rc = spdk_accel_submit_compress_ext(comp_bdev->accel_channel, dst_iovs[0].iov_base,
139ddd4603cSYankun Li 						    dst_iovs[0].iov_len, src_iovs, src_iovcnt,
140ddd4603cSYankun Li 						    comp_bdev->comp_algo, comp_bdev->comp_level,
141ddd4603cSYankun Li 						    &reduce_cb_arg->output_size, reduce_cb_arg->cb_fn,
142ddd4603cSYankun Li 						    reduce_cb_arg->cb_arg);
143f530abcaSAlexey Marchuk 	} else {
144ddd4603cSYankun Li 		rc = spdk_accel_submit_decompress_ext(comp_bdev->accel_channel, dst_iovs, dst_iovcnt,
145ddd4603cSYankun Li 						      src_iovs, src_iovcnt, comp_bdev->comp_algo,
146ddd4603cSYankun Li 						      &reduce_cb_arg->output_size, reduce_cb_arg->cb_fn,
147ddd4603cSYankun Li 						      reduce_cb_arg->cb_arg);
148f530abcaSAlexey Marchuk 	}
14907fe6a43SSeth Howell 
150bfd7fcb8SAlexey Marchuk 	return rc;
151bfd7fcb8SAlexey Marchuk }
1527c77c292Spaul luse 
15307fe6a43SSeth Howell /* Entry point for reduce lib to issue a compress operation. */
15407fe6a43SSeth Howell static void
15507fe6a43SSeth Howell _comp_reduce_compress(struct spdk_reduce_backing_dev *dev,
15607fe6a43SSeth Howell 		      struct iovec *src_iovs, int src_iovcnt,
15707fe6a43SSeth Howell 		      struct iovec *dst_iovs, int dst_iovcnt,
15807fe6a43SSeth Howell 		      struct spdk_reduce_vol_cb_args *cb_arg)
15907fe6a43SSeth Howell {
16007fe6a43SSeth Howell 	int rc;
16107fe6a43SSeth Howell 
16207fe6a43SSeth Howell 	rc = _compress_operation(dev, src_iovs, src_iovcnt, dst_iovs, dst_iovcnt, true, cb_arg);
16307fe6a43SSeth Howell 	if (rc) {
16407fe6a43SSeth Howell 		SPDK_ERRLOG("with compress operation code %d (%s)\n", rc, spdk_strerror(-rc));
16507fe6a43SSeth Howell 		cb_arg->cb_fn(cb_arg->cb_arg, rc);
16607fe6a43SSeth Howell 	}
16707fe6a43SSeth Howell }
16807fe6a43SSeth Howell 
16907fe6a43SSeth Howell /* Entry point for reduce lib to issue a decompress operation. */
17007fe6a43SSeth Howell static void
17107fe6a43SSeth Howell _comp_reduce_decompress(struct spdk_reduce_backing_dev *dev,
17207fe6a43SSeth Howell 			struct iovec *src_iovs, int src_iovcnt,
17307fe6a43SSeth Howell 			struct iovec *dst_iovs, int dst_iovcnt,
17407fe6a43SSeth Howell 			struct spdk_reduce_vol_cb_args *cb_arg)
17507fe6a43SSeth Howell {
17607fe6a43SSeth Howell 	int rc;
17707fe6a43SSeth Howell 
17807fe6a43SSeth Howell 	rc = _compress_operation(dev, src_iovs, src_iovcnt, dst_iovs, dst_iovcnt, false, cb_arg);
17907fe6a43SSeth Howell 	if (rc) {
18007fe6a43SSeth Howell 		SPDK_ERRLOG("with decompress operation code %d (%s)\n", rc, spdk_strerror(-rc));
18107fe6a43SSeth Howell 		cb_arg->cb_fn(cb_arg->cb_arg, rc);
18207fe6a43SSeth Howell 	}
18307fe6a43SSeth Howell }
18407fe6a43SSeth Howell 
18510cb404aSKonrad Sztyber static void
18610cb404aSKonrad Sztyber _comp_submit_write(void *ctx)
18710cb404aSKonrad Sztyber {
18810cb404aSKonrad Sztyber 	struct spdk_bdev_io *bdev_io = ctx;
18910cb404aSKonrad Sztyber 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
19010cb404aSKonrad Sztyber 					   comp_bdev);
19110cb404aSKonrad Sztyber 
19210cb404aSKonrad Sztyber 	spdk_reduce_vol_writev(comp_bdev->vol, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
19310cb404aSKonrad Sztyber 			       bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
19410cb404aSKonrad Sztyber 			       reduce_rw_blocks_cb, bdev_io);
19510cb404aSKonrad Sztyber }
19610cb404aSKonrad Sztyber 
19710cb404aSKonrad Sztyber static void
19810cb404aSKonrad Sztyber _comp_submit_read(void *ctx)
19910cb404aSKonrad Sztyber {
20010cb404aSKonrad Sztyber 	struct spdk_bdev_io *bdev_io = ctx;
20110cb404aSKonrad Sztyber 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
20210cb404aSKonrad Sztyber 					   comp_bdev);
20310cb404aSKonrad Sztyber 
20410cb404aSKonrad Sztyber 	spdk_reduce_vol_readv(comp_bdev->vol, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
20510cb404aSKonrad Sztyber 			      bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
20610cb404aSKonrad Sztyber 			      reduce_rw_blocks_cb, bdev_io);
20710cb404aSKonrad Sztyber }
20810cb404aSKonrad Sztyber 
20910cb404aSKonrad Sztyber 
21007fe6a43SSeth Howell /* Callback for getting a buf from the bdev pool in the event that the caller passed
21107fe6a43SSeth Howell  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
21207fe6a43SSeth Howell  * beneath us before we're done with it.
21307fe6a43SSeth Howell  */
21407fe6a43SSeth Howell static void
21507fe6a43SSeth Howell comp_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
21607fe6a43SSeth Howell {
21707fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
21807fe6a43SSeth Howell 					   comp_bdev);
21907fe6a43SSeth Howell 
220ce49d2f9SAlexey Marchuk 	if (spdk_unlikely(!success)) {
221ce49d2f9SAlexey Marchuk 		SPDK_ERRLOG("Failed to get data buffer\n");
222ce49d2f9SAlexey Marchuk 		reduce_rw_blocks_cb(bdev_io, -ENOMEM);
223ce49d2f9SAlexey Marchuk 		return;
224ce49d2f9SAlexey Marchuk 	}
225ce49d2f9SAlexey Marchuk 
22610cb404aSKonrad Sztyber 	spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_read, bdev_io);
227ce49d2f9SAlexey Marchuk }
22807fe6a43SSeth Howell 
229412fced1SYalong Wang struct partial_chunk_info {
230412fced1SYalong Wang 	uint64_t chunk_idx;
231412fced1SYalong Wang 	uint64_t block_offset;
232412fced1SYalong Wang 	uint64_t block_length;
233412fced1SYalong Wang };
234412fced1SYalong Wang 
235412fced1SYalong Wang /*
236412fced1SYalong Wang  * It's a structure used to hold information needed during the execution of an unmap operation.
237412fced1SYalong Wang  */
238412fced1SYalong Wang struct compress_unmap_split_ctx {
239412fced1SYalong Wang 	struct spdk_bdev_io *bdev_io;
240412fced1SYalong Wang 	int32_t status;
241412fced1SYalong Wang 	uint32_t logical_blocks_per_chunk;
242412fced1SYalong Wang 	/* The first chunk that can be fully covered by the unmap bdevio interval */
243412fced1SYalong Wang 	uint64_t full_chunk_idx_b;
244412fced1SYalong Wang 	/* The last chunk that can be fully covered by the unmap bdevio interval */
245412fced1SYalong Wang 	uint64_t full_chunk_idx_e;
246412fced1SYalong Wang 	uint64_t num_full_chunks;
247412fced1SYalong Wang 	uint64_t num_full_chunks_consumed;
248412fced1SYalong Wang 	uint32_t num_partial_chunks;
249412fced1SYalong Wang 	uint32_t num_partial_chunks_consumed;
250412fced1SYalong Wang 	/* Used to hold the partial chunk information. There will only be less than or equal to two,
251412fced1SYalong Wang 	because chunks that cannot be fully covered will only appear at the beginning or end or both two. */
252412fced1SYalong Wang 	struct partial_chunk_info partial_chunk_info[2];
253412fced1SYalong Wang };
254412fced1SYalong Wang 
255412fced1SYalong Wang static void _comp_unmap_subcmd_done_cb(void *ctx, int error);
256412fced1SYalong Wang 
257412fced1SYalong Wang /*
258412fced1SYalong Wang  * This function processes the unmap operation for both full and partial chunks in a
259412fced1SYalong Wang  * compressed block device. It iteratively submits unmap requests until all the chunks
260412fced1SYalong Wang  * have been unmapped or an error occurs.
261412fced1SYalong Wang  */
262412fced1SYalong Wang static void
263412fced1SYalong Wang _comp_submit_unmap_split(void *ctx)
264412fced1SYalong Wang {
265412fced1SYalong Wang 	struct compress_unmap_split_ctx *split_ctx = ctx;
266412fced1SYalong Wang 	struct spdk_bdev_io *bdev_io = split_ctx->bdev_io;
267412fced1SYalong Wang 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
268412fced1SYalong Wang 					   comp_bdev);
269412fced1SYalong Wang 	struct partial_chunk_info *partial_chunk = NULL;
270412fced1SYalong Wang 	uint64_t chunk_idx = 0;
271412fced1SYalong Wang 	uint64_t block_offset = 0;
272412fced1SYalong Wang 	uint64_t block_length = 0;
273412fced1SYalong Wang 
274412fced1SYalong Wang 	if (split_ctx->status != 0 ||
275412fced1SYalong Wang 	    (split_ctx->num_full_chunks_consumed == split_ctx->num_full_chunks &&
276412fced1SYalong Wang 	     split_ctx->num_partial_chunks_consumed == split_ctx->num_partial_chunks)) {
277412fced1SYalong Wang 		reduce_rw_blocks_cb(bdev_io, split_ctx->status);
278412fced1SYalong Wang 		free(split_ctx);
279412fced1SYalong Wang 		return;
280412fced1SYalong Wang 	}
281412fced1SYalong Wang 
282412fced1SYalong Wang 	if (split_ctx->num_full_chunks_consumed < split_ctx->num_full_chunks) {
283412fced1SYalong Wang 		chunk_idx = split_ctx->full_chunk_idx_b + split_ctx->num_full_chunks_consumed;
284412fced1SYalong Wang 		block_offset = chunk_idx * split_ctx->logical_blocks_per_chunk;
285412fced1SYalong Wang 		block_length = split_ctx->logical_blocks_per_chunk;
286412fced1SYalong Wang 
287412fced1SYalong Wang 		split_ctx->num_full_chunks_consumed++;
288412fced1SYalong Wang 		spdk_reduce_vol_unmap(comp_bdev->vol,
289412fced1SYalong Wang 				      block_offset, block_length,
290412fced1SYalong Wang 				      _comp_unmap_subcmd_done_cb, split_ctx);
291412fced1SYalong Wang 	} else if (split_ctx->num_partial_chunks_consumed < split_ctx->num_partial_chunks) {
292412fced1SYalong Wang 		partial_chunk = &split_ctx->partial_chunk_info[split_ctx->num_partial_chunks_consumed];
293412fced1SYalong Wang 		block_offset = partial_chunk->chunk_idx * split_ctx->logical_blocks_per_chunk +
294412fced1SYalong Wang 			       partial_chunk->block_offset;
295412fced1SYalong Wang 		block_length = partial_chunk->block_length;
296412fced1SYalong Wang 
297412fced1SYalong Wang 		split_ctx->num_partial_chunks_consumed++;
298412fced1SYalong Wang 		spdk_reduce_vol_unmap(comp_bdev->vol,
299412fced1SYalong Wang 				      block_offset, block_length,
300412fced1SYalong Wang 				      _comp_unmap_subcmd_done_cb, split_ctx);
301412fced1SYalong Wang 	} else {
302412fced1SYalong Wang 		assert(false);
303412fced1SYalong Wang 	}
304412fced1SYalong Wang }
305412fced1SYalong Wang 
306412fced1SYalong Wang /*
307412fced1SYalong Wang  * When mkfs or fstrim, large unmap requests may be generated.
308412fced1SYalong Wang  * Large request will be split into multiple subcmds and processed recursively.
309412fced1SYalong Wang  * Run too many subcmds recursively may cause stack overflow or monopolize the thread,
310412fced1SYalong Wang  * delaying other tasks. To avoid this, next subcmd need to be processed asynchronously
311412fced1SYalong Wang  * by 'spdk_thread_send_msg'.
312412fced1SYalong Wang  */
313412fced1SYalong Wang static void
314412fced1SYalong Wang _comp_unmap_subcmd_done_cb(void *ctx, int error)
315412fced1SYalong Wang {
316412fced1SYalong Wang 	struct compress_unmap_split_ctx *split_ctx = ctx;
317412fced1SYalong Wang 
318412fced1SYalong Wang 	split_ctx->status = error;
319412fced1SYalong Wang 	spdk_thread_send_msg(spdk_get_thread(), _comp_submit_unmap_split, split_ctx);
320412fced1SYalong Wang }
321412fced1SYalong Wang 
322412fced1SYalong Wang /*
323412fced1SYalong Wang  * This function splits the unmap operation into full and partial chunks based on the
324412fced1SYalong Wang  * block range specified in the 'spdk_bdev_io' structure. It calculates the start and end
325412fced1SYalong Wang  * chunks, as well as any partial chunks at the beginning or end of the range, and prepares
326412fced1SYalong Wang  * a context (compress_unmap_split_ctx) to handle these chunks. The unmap operation is
327412fced1SYalong Wang  * then submitted for processing through '_comp_submit_unmap_split'.
328412fced1SYalong Wang  * some cases to handle:
329412fced1SYalong Wang  * 1. start and end chunks are different
330412fced1SYalong Wang  * 1.1 start and end chunks are full
331412fced1SYalong Wang  * 1.2 start and end chunks are partial
332412fced1SYalong Wang  * 1.3 start or  end chunk  is full and the other is partial
333412fced1SYalong Wang  * 2. start and end chunks are the same
334412fced1SYalong Wang  * 2.1 full
335412fced1SYalong Wang  * 2.2 partial
336412fced1SYalong Wang  */
337412fced1SYalong Wang static void
338412fced1SYalong Wang _comp_submit_unmap(void *ctx)
339412fced1SYalong Wang {
340412fced1SYalong Wang 	struct spdk_bdev_io *bdev_io = ctx;
341412fced1SYalong Wang 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
342412fced1SYalong Wang 					   comp_bdev);
343412fced1SYalong Wang 	const struct spdk_reduce_vol_params *vol_params = spdk_reduce_vol_get_params(comp_bdev->vol);
344412fced1SYalong Wang 	struct compress_unmap_split_ctx *split_ctx;
345412fced1SYalong Wang 	struct partial_chunk_info *partial_chunk;
346412fced1SYalong Wang 	uint32_t logical_blocks_per_chunk;
347412fced1SYalong Wang 	uint64_t start_chunk, end_chunk, start_offset, end_tail;
348412fced1SYalong Wang 
349412fced1SYalong Wang 	logical_blocks_per_chunk = vol_params->chunk_size / vol_params->logical_block_size;
350412fced1SYalong Wang 	start_chunk = bdev_io->u.bdev.offset_blocks / logical_blocks_per_chunk;
351412fced1SYalong Wang 	end_chunk = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) /
352412fced1SYalong Wang 		    logical_blocks_per_chunk;
353412fced1SYalong Wang 	start_offset = bdev_io->u.bdev.offset_blocks % logical_blocks_per_chunk;
354412fced1SYalong Wang 	end_tail = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks) %
355412fced1SYalong Wang 		   logical_blocks_per_chunk;
356412fced1SYalong Wang 
357412fced1SYalong Wang 	split_ctx = calloc(1, sizeof(struct compress_unmap_split_ctx));
358412fced1SYalong Wang 	if (split_ctx == NULL) {
359412fced1SYalong Wang 		reduce_rw_blocks_cb(bdev_io, -ENOMEM);
360412fced1SYalong Wang 		return;
361412fced1SYalong Wang 	}
362412fced1SYalong Wang 	partial_chunk = split_ctx->partial_chunk_info;
363412fced1SYalong Wang 	split_ctx->bdev_io = bdev_io;
364412fced1SYalong Wang 	split_ctx->logical_blocks_per_chunk = logical_blocks_per_chunk;
365412fced1SYalong Wang 
366412fced1SYalong Wang 	if (start_chunk < end_chunk) {
367412fced1SYalong Wang 		if (start_offset != 0) {
368412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].chunk_idx = start_chunk;
369412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].block_offset = start_offset;
370412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].block_length = logical_blocks_per_chunk
371412fced1SYalong Wang 					- start_offset;
372412fced1SYalong Wang 			split_ctx->num_partial_chunks++;
373412fced1SYalong Wang 			split_ctx->full_chunk_idx_b = start_chunk + 1;
374412fced1SYalong Wang 		} else {
375412fced1SYalong Wang 			split_ctx->full_chunk_idx_b = start_chunk;
376412fced1SYalong Wang 		}
377412fced1SYalong Wang 
378412fced1SYalong Wang 		if (end_tail != 0) {
379412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].chunk_idx = end_chunk;
380412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].block_offset = 0;
381412fced1SYalong Wang 			partial_chunk[split_ctx->num_partial_chunks].block_length = end_tail;
382412fced1SYalong Wang 			split_ctx->num_partial_chunks++;
383412fced1SYalong Wang 			split_ctx->full_chunk_idx_e = end_chunk - 1;
384412fced1SYalong Wang 		} else {
385412fced1SYalong Wang 			split_ctx->full_chunk_idx_e = end_chunk;
386412fced1SYalong Wang 		}
387412fced1SYalong Wang 
388412fced1SYalong Wang 		split_ctx->num_full_chunks = end_chunk - start_chunk + 1 - split_ctx->num_partial_chunks;
389412fced1SYalong Wang 
390412fced1SYalong Wang 		if (split_ctx->num_full_chunks) {
391412fced1SYalong Wang 			assert(split_ctx->full_chunk_idx_b != UINT64_MAX && split_ctx->full_chunk_idx_e != UINT64_MAX);
392412fced1SYalong Wang 			assert(split_ctx->full_chunk_idx_e - split_ctx->full_chunk_idx_b + 1 == split_ctx->num_full_chunks);
393412fced1SYalong Wang 		} else {
394412fced1SYalong Wang 			assert(split_ctx->full_chunk_idx_b - split_ctx->full_chunk_idx_e == 1);
395412fced1SYalong Wang 		}
396412fced1SYalong Wang 	} else if (start_offset != 0 || end_tail != 0) {
397412fced1SYalong Wang 		partial_chunk[0].chunk_idx = start_chunk;
398412fced1SYalong Wang 		partial_chunk[0].block_offset = start_offset;
399412fced1SYalong Wang 		partial_chunk[0].block_length =
400412fced1SYalong Wang 			bdev_io->u.bdev.num_blocks;
401412fced1SYalong Wang 		split_ctx->num_partial_chunks = 1;
402412fced1SYalong Wang 	} else {
403412fced1SYalong Wang 		split_ctx->full_chunk_idx_b = start_chunk;
404412fced1SYalong Wang 		split_ctx->full_chunk_idx_e = end_chunk;
405412fced1SYalong Wang 		split_ctx->num_full_chunks = 1;
406412fced1SYalong Wang 	}
407412fced1SYalong Wang 	assert(split_ctx->num_partial_chunks <= SPDK_COUNTOF(split_ctx->partial_chunk_info));
408412fced1SYalong Wang 
409412fced1SYalong Wang 	_comp_submit_unmap_split(split_ctx);
410412fced1SYalong Wang }
411412fced1SYalong Wang 
41207fe6a43SSeth Howell /* Called when someone above submits IO to this vbdev. */
41307fe6a43SSeth Howell static void
41407fe6a43SSeth Howell vbdev_compress_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
41507fe6a43SSeth Howell {
41607fe6a43SSeth Howell 	struct comp_bdev_io *io_ctx = (struct comp_bdev_io *)bdev_io->driver_ctx;
41707fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress,
41807fe6a43SSeth Howell 					   comp_bdev);
41907fe6a43SSeth Howell 	struct comp_io_channel *comp_ch = spdk_io_channel_get_ctx(ch);
42007fe6a43SSeth Howell 
42107fe6a43SSeth Howell 	memset(io_ctx, 0, sizeof(struct comp_bdev_io));
42207fe6a43SSeth Howell 	io_ctx->comp_bdev = comp_bdev;
42307fe6a43SSeth Howell 	io_ctx->comp_ch = comp_ch;
42407fe6a43SSeth Howell 	io_ctx->orig_io = bdev_io;
42507fe6a43SSeth Howell 
42610cb404aSKonrad Sztyber 	switch (bdev_io->type) {
42710cb404aSKonrad Sztyber 	case SPDK_BDEV_IO_TYPE_READ:
42810cb404aSKonrad Sztyber 		spdk_bdev_io_get_buf(bdev_io, comp_read_get_buf_cb,
42910cb404aSKonrad Sztyber 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
43010cb404aSKonrad Sztyber 		return;
43110cb404aSKonrad Sztyber 	case SPDK_BDEV_IO_TYPE_WRITE:
43210cb404aSKonrad Sztyber 		spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_write, bdev_io);
43310cb404aSKonrad Sztyber 		return;
434412fced1SYalong Wang 	case SPDK_BDEV_IO_TYPE_UNMAP:
435412fced1SYalong Wang 		spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_unmap, bdev_io);
436412fced1SYalong Wang 		return;
43710cb404aSKonrad Sztyber 	/* TODO support RESET in future patch in the series */
43810cb404aSKonrad Sztyber 	case SPDK_BDEV_IO_TYPE_RESET:
43910cb404aSKonrad Sztyber 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
44010cb404aSKonrad Sztyber 	case SPDK_BDEV_IO_TYPE_FLUSH:
44110cb404aSKonrad Sztyber 	default:
44210cb404aSKonrad Sztyber 		SPDK_ERRLOG("Unknown I/O type %d\n", bdev_io->type);
44310cb404aSKonrad Sztyber 		spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_FAILED);
44410cb404aSKonrad Sztyber 		break;
44507fe6a43SSeth Howell 	}
44607fe6a43SSeth Howell }
44707fe6a43SSeth Howell 
44807fe6a43SSeth Howell static bool
44907fe6a43SSeth Howell vbdev_compress_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
45007fe6a43SSeth Howell {
45107fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
45207fe6a43SSeth Howell 
45307fe6a43SSeth Howell 	switch (io_type) {
45407fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_READ:
45507fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_WRITE:
45607fe6a43SSeth Howell 		return spdk_bdev_io_type_supported(comp_bdev->base_bdev, io_type);
45707fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_UNMAP:
458412fced1SYalong Wang 		return true;
45907fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_RESET:
46007fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_FLUSH:
46107fe6a43SSeth Howell 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
46207fe6a43SSeth Howell 	default:
46307fe6a43SSeth Howell 		return false;
46407fe6a43SSeth Howell 	}
46507fe6a43SSeth Howell }
46607fe6a43SSeth Howell 
46707fe6a43SSeth Howell /* Callback for unregistering the IO device. */
46807fe6a43SSeth Howell static void
46907fe6a43SSeth Howell _device_unregister_cb(void *io_device)
47007fe6a43SSeth Howell {
47107fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = io_device;
47207fe6a43SSeth Howell 
47307fe6a43SSeth Howell 	/* Done with this comp_bdev. */
47407fe6a43SSeth Howell 	pthread_mutex_destroy(&comp_bdev->reduce_lock);
47507fe6a43SSeth Howell 	free(comp_bdev->comp_bdev.name);
47607fe6a43SSeth Howell 	free(comp_bdev);
47707fe6a43SSeth Howell }
47807fe6a43SSeth Howell 
47907fe6a43SSeth Howell static void
480b3be320dSGangCao _vbdev_compress_destruct_cb(void *ctx)
481b3be320dSGangCao {
48231d26015SShuhei Matsumoto 	struct vbdev_compress *comp_bdev = ctx;
483b3be320dSGangCao 
48431d26015SShuhei Matsumoto 	/* Close the underlying bdev on its same opened thread. */
48531d26015SShuhei Matsumoto 	spdk_bdev_close(comp_bdev->base_desc);
48631d26015SShuhei Matsumoto 	comp_bdev->vol = NULL;
487*45379ed8SYankun Li 	if (comp_bdev->init_failed) {
488*45379ed8SYankun Li 		free(comp_bdev);
489*45379ed8SYankun Li 		return;
490*45379ed8SYankun Li 	}
491*45379ed8SYankun Li 
492*45379ed8SYankun Li 	TAILQ_REMOVE(&g_vbdev_comp, comp_bdev, link);
493*45379ed8SYankun Li 	spdk_bdev_module_release_bdev(comp_bdev->base_bdev);
494*45379ed8SYankun Li 
49531d26015SShuhei Matsumoto 	if (comp_bdev->orphaned == false) {
49631d26015SShuhei Matsumoto 		spdk_io_device_unregister(comp_bdev, _device_unregister_cb);
49731d26015SShuhei Matsumoto 	} else {
49831d26015SShuhei Matsumoto 		vbdev_compress_delete_done(comp_bdev->delete_ctx, 0);
49931d26015SShuhei Matsumoto 		_device_unregister_cb(comp_bdev);
50031d26015SShuhei Matsumoto 	}
501b3be320dSGangCao }
502b3be320dSGangCao 
503b3be320dSGangCao static void
50407fe6a43SSeth Howell vbdev_compress_destruct_cb(void *cb_arg, int reduce_errno)
50507fe6a43SSeth Howell {
50607fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg;
50707fe6a43SSeth Howell 
50807fe6a43SSeth Howell 	if (reduce_errno) {
50907fe6a43SSeth Howell 		SPDK_ERRLOG("number %d\n", reduce_errno);
51007fe6a43SSeth Howell 	} else {
511b3be320dSGangCao 		if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) {
51231d26015SShuhei Matsumoto 			spdk_thread_send_msg(comp_bdev->thread,
51331d26015SShuhei Matsumoto 					     _vbdev_compress_destruct_cb, comp_bdev);
514b3be320dSGangCao 		} else {
51531d26015SShuhei Matsumoto 			_vbdev_compress_destruct_cb(comp_bdev);
51607fe6a43SSeth Howell 		}
51707fe6a43SSeth Howell 	}
51807fe6a43SSeth Howell }
51907fe6a43SSeth Howell 
52007fe6a43SSeth Howell static void
52107fe6a43SSeth Howell _reduce_destroy_cb(void *ctx, int reduce_errno)
52207fe6a43SSeth Howell {
52307fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
52407fe6a43SSeth Howell 
52507fe6a43SSeth Howell 	if (reduce_errno) {
52607fe6a43SSeth Howell 		SPDK_ERRLOG("number %d\n", reduce_errno);
52707fe6a43SSeth Howell 	}
52807fe6a43SSeth Howell 
52907fe6a43SSeth Howell 	comp_bdev->vol = NULL;
53007fe6a43SSeth Howell 	spdk_put_io_channel(comp_bdev->base_ch);
531*45379ed8SYankun Li 	if (comp_bdev->init_failed || comp_bdev->orphaned) {
532*45379ed8SYankun Li 		vbdev_compress_destruct_cb((void *)comp_bdev, 0);
533*45379ed8SYankun Li 	} else {
534fcf8e454SShuhei Matsumoto 		spdk_bdev_unregister(&comp_bdev->comp_bdev, vbdev_compress_delete_done,
535fcf8e454SShuhei Matsumoto 				     comp_bdev->delete_ctx);
53607fe6a43SSeth Howell 	}
53707fe6a43SSeth Howell 
53807fe6a43SSeth Howell }
53907fe6a43SSeth Howell 
5406a98b18fSShuhei Matsumoto static void
5416a98b18fSShuhei Matsumoto _delete_vol_unload_cb(void *ctx)
5426a98b18fSShuhei Matsumoto {
5436a98b18fSShuhei Matsumoto 	struct vbdev_compress *comp_bdev = ctx;
5446a98b18fSShuhei Matsumoto 
5451960ef16SJosh Soref 	/* FIXME: Assert if these conditions are not satisfied for now. */
5466a98b18fSShuhei Matsumoto 	assert(!comp_bdev->reduce_thread ||
5476a98b18fSShuhei Matsumoto 	       comp_bdev->reduce_thread == spdk_get_thread());
5486a98b18fSShuhei Matsumoto 
5496a98b18fSShuhei Matsumoto 	/* reducelib needs a channel to comm with the backing device */
5506a98b18fSShuhei Matsumoto 	comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc);
5516a98b18fSShuhei Matsumoto 
5526a98b18fSShuhei Matsumoto 	/* Clean the device before we free our resources. */
5536a98b18fSShuhei Matsumoto 	spdk_reduce_vol_destroy(&comp_bdev->backing_dev, _reduce_destroy_cb, comp_bdev);
5546a98b18fSShuhei Matsumoto }
5556a98b18fSShuhei Matsumoto 
55607fe6a43SSeth Howell /* Called by reduceLib after performing unload vol actions */
55707fe6a43SSeth Howell static void
55807fe6a43SSeth Howell delete_vol_unload_cb(void *cb_arg, int reduce_errno)
55907fe6a43SSeth Howell {
56007fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg;
56107fe6a43SSeth Howell 
56207fe6a43SSeth Howell 	if (reduce_errno) {
56301b28622SYankun Li 		SPDK_ERRLOG("Failed to unload vol, error %s\n", spdk_strerror(-reduce_errno));
56401b28622SYankun Li 		vbdev_compress_delete_done(comp_bdev->delete_ctx, reduce_errno);
5656a98b18fSShuhei Matsumoto 		return;
5666a98b18fSShuhei Matsumoto 	}
56707fe6a43SSeth Howell 
5686a98b18fSShuhei Matsumoto 	pthread_mutex_lock(&comp_bdev->reduce_lock);
5696a98b18fSShuhei Matsumoto 	if (comp_bdev->reduce_thread && comp_bdev->reduce_thread != spdk_get_thread()) {
5706a98b18fSShuhei Matsumoto 		spdk_thread_send_msg(comp_bdev->reduce_thread,
5716a98b18fSShuhei Matsumoto 				     _delete_vol_unload_cb, comp_bdev);
5726a98b18fSShuhei Matsumoto 		pthread_mutex_unlock(&comp_bdev->reduce_lock);
5736a98b18fSShuhei Matsumoto 	} else {
5746a98b18fSShuhei Matsumoto 		pthread_mutex_unlock(&comp_bdev->reduce_lock);
5756a98b18fSShuhei Matsumoto 
5766a98b18fSShuhei Matsumoto 		_delete_vol_unload_cb(comp_bdev);
57707fe6a43SSeth Howell 	}
57807fe6a43SSeth Howell }
57907fe6a43SSeth Howell 
58007fe6a43SSeth Howell const char *
58107fe6a43SSeth Howell compress_get_name(const struct vbdev_compress *comp_bdev)
58207fe6a43SSeth Howell {
58307fe6a43SSeth Howell 	return comp_bdev->comp_bdev.name;
58407fe6a43SSeth Howell }
58507fe6a43SSeth Howell 
58607fe6a43SSeth Howell struct vbdev_compress *
58707fe6a43SSeth Howell compress_bdev_first(void)
58807fe6a43SSeth Howell {
58907fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev;
59007fe6a43SSeth Howell 
59107fe6a43SSeth Howell 	comp_bdev = TAILQ_FIRST(&g_vbdev_comp);
59207fe6a43SSeth Howell 
59307fe6a43SSeth Howell 	return comp_bdev;
59407fe6a43SSeth Howell }
59507fe6a43SSeth Howell 
59607fe6a43SSeth Howell struct vbdev_compress *
59707fe6a43SSeth Howell compress_bdev_next(struct vbdev_compress *prev)
59807fe6a43SSeth Howell {
59907fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev;
60007fe6a43SSeth Howell 
60107fe6a43SSeth Howell 	comp_bdev = TAILQ_NEXT(prev, link);
60207fe6a43SSeth Howell 
60307fe6a43SSeth Howell 	return comp_bdev;
60407fe6a43SSeth Howell }
60507fe6a43SSeth Howell 
60607fe6a43SSeth Howell bool
60707fe6a43SSeth Howell compress_has_orphan(const char *name)
60807fe6a43SSeth Howell {
60907fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev;
61007fe6a43SSeth Howell 
61107fe6a43SSeth Howell 	TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) {
61207fe6a43SSeth Howell 		if (comp_bdev->orphaned && strcmp(name, comp_bdev->comp_bdev.name) == 0) {
61307fe6a43SSeth Howell 			return true;
61407fe6a43SSeth Howell 		}
61507fe6a43SSeth Howell 	}
61607fe6a43SSeth Howell 	return false;
61707fe6a43SSeth Howell }
61807fe6a43SSeth Howell 
61907fe6a43SSeth Howell /* Called after we've unregistered following a hot remove callback.
62007fe6a43SSeth Howell  * Our finish entry point will be called next.
62107fe6a43SSeth Howell  */
62207fe6a43SSeth Howell static int
62307fe6a43SSeth Howell vbdev_compress_destruct(void *ctx)
62407fe6a43SSeth Howell {
62507fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
62607fe6a43SSeth Howell 
62707fe6a43SSeth Howell 	if (comp_bdev->vol != NULL) {
62807fe6a43SSeth Howell 		/* Tell reducelib that we're done with this volume. */
62907fe6a43SSeth Howell 		spdk_reduce_vol_unload(comp_bdev->vol, vbdev_compress_destruct_cb, comp_bdev);
63007fe6a43SSeth Howell 	} else {
63107fe6a43SSeth Howell 		vbdev_compress_destruct_cb(comp_bdev, 0);
63207fe6a43SSeth Howell 	}
63307fe6a43SSeth Howell 
63407fe6a43SSeth Howell 	return 0;
63507fe6a43SSeth Howell }
63607fe6a43SSeth Howell 
63707fe6a43SSeth Howell /* We supplied this as an entry point for upper layers who want to communicate to this
63807fe6a43SSeth Howell  * bdev.  This is how they get a channel.
63907fe6a43SSeth Howell  */
64007fe6a43SSeth Howell static struct spdk_io_channel *
64107fe6a43SSeth Howell vbdev_compress_get_io_channel(void *ctx)
64207fe6a43SSeth Howell {
64307fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
64407fe6a43SSeth Howell 
64507fe6a43SSeth Howell 	/* The IO channel code will allocate a channel for us which consists of
64607fe6a43SSeth Howell 	 * the SPDK channel structure plus the size of our comp_io_channel struct
64707fe6a43SSeth Howell 	 * that we passed in when we registered our IO device. It will then call
64807fe6a43SSeth Howell 	 * our channel create callback to populate any elements that we need to
64907fe6a43SSeth Howell 	 * update.
65007fe6a43SSeth Howell 	 */
65107fe6a43SSeth Howell 	return spdk_get_io_channel(comp_bdev);
65207fe6a43SSeth Howell }
65307fe6a43SSeth Howell 
6542c49e910SMaciej Wawryk /* This is the output for bdev_get_bdevs() for this vbdev */
65507fe6a43SSeth Howell static int
65607fe6a43SSeth Howell vbdev_compress_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
65707fe6a43SSeth Howell {
65807fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
659d70bfa13SYankun Li 	const struct spdk_reduce_vol_info *vol_info;
66078f92084SYankun Li 	char *comp_algo = NULL;
66178f92084SYankun Li 
66278f92084SYankun Li 	if (comp_bdev->params.comp_algo == SPDK_ACCEL_COMP_ALGO_LZ4) {
66378f92084SYankun Li 		comp_algo = "lz4";
66478f92084SYankun Li 	} else if (comp_bdev->params.comp_algo == SPDK_ACCEL_COMP_ALGO_DEFLATE) {
66578f92084SYankun Li 		comp_algo = "deflate";
66678f92084SYankun Li 	} else {
66778f92084SYankun Li 		assert(false);
66878f92084SYankun Li 	}
66907fe6a43SSeth Howell 
67007fe6a43SSeth Howell 	spdk_json_write_name(w, "compress");
67107fe6a43SSeth Howell 	spdk_json_write_object_begin(w);
67207fe6a43SSeth Howell 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&comp_bdev->comp_bdev));
67307fe6a43SSeth Howell 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(comp_bdev->base_bdev));
67489648519SYankun Li 	spdk_json_write_named_string(w, "pm_path", spdk_reduce_vol_get_pm_path(comp_bdev->vol));
67578f92084SYankun Li 	spdk_json_write_named_string(w, "comp_algo", comp_algo);
67678f92084SYankun Li 	spdk_json_write_named_uint32(w, "comp_level", comp_bdev->params.comp_level);
67778f92084SYankun Li 	spdk_json_write_named_uint32(w, "chunk_size", comp_bdev->params.chunk_size);
67878f92084SYankun Li 	spdk_json_write_named_uint32(w, "backing_io_unit_size", comp_bdev->params.backing_io_unit_size);
679d70bfa13SYankun Li 	vol_info = spdk_reduce_vol_get_info(comp_bdev->vol);
680d70bfa13SYankun Li 	spdk_json_write_named_uint64(w, "allocated_io_units", vol_info->allocated_io_units);
68107fe6a43SSeth Howell 	spdk_json_write_object_end(w);
68207fe6a43SSeth Howell 
68307fe6a43SSeth Howell 	return 0;
68407fe6a43SSeth Howell }
68507fe6a43SSeth Howell 
68607fe6a43SSeth Howell static int
68707fe6a43SSeth Howell vbdev_compress_config_json(struct spdk_json_write_ctx *w)
68807fe6a43SSeth Howell {
689adbac36fSYankun Li 	/* Nothing to dump as compress bdev configuration is saved on physical device. */
69007fe6a43SSeth Howell 	return 0;
69107fe6a43SSeth Howell }
69207fe6a43SSeth Howell 
693f3cda926SYankun Li struct vbdev_init_reduce_ctx {
694f3cda926SYankun Li 	struct vbdev_compress   *comp_bdev;
695f3cda926SYankun Li 	int                     status;
696f3cda926SYankun Li 	bdev_compress_create_cb cb_fn;
697f3cda926SYankun Li 	void                    *cb_ctx;
698f3cda926SYankun Li };
699f3cda926SYankun Li 
700b3be320dSGangCao static void
701*45379ed8SYankun Li _cleanup_vol_unload_cb(void *ctx)
7025734decaSYankun Li {
703*45379ed8SYankun Li 	struct vbdev_compress *comp_bdev = ctx;
704*45379ed8SYankun Li 
705*45379ed8SYankun Li 	assert(!comp_bdev->reduce_thread ||
706*45379ed8SYankun Li 	       comp_bdev->reduce_thread == spdk_get_thread());
707*45379ed8SYankun Li 
708*45379ed8SYankun Li 	comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc);
709*45379ed8SYankun Li 
710*45379ed8SYankun Li 	spdk_reduce_vol_destroy(&comp_bdev->backing_dev, _reduce_destroy_cb, comp_bdev);
711*45379ed8SYankun Li }
712*45379ed8SYankun Li 
713*45379ed8SYankun Li static void
714*45379ed8SYankun Li init_vol_unload_cb(void *ctx, int reduce_errno)
715*45379ed8SYankun Li {
716*45379ed8SYankun Li 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx;
717*45379ed8SYankun Li 
718*45379ed8SYankun Li 	if (reduce_errno) {
719*45379ed8SYankun Li 		SPDK_ERRLOG("Failed to unload vol, error %s\n", spdk_strerror(-reduce_errno));
720*45379ed8SYankun Li 	}
721*45379ed8SYankun Li 
722*45379ed8SYankun Li 	pthread_mutex_lock(&comp_bdev->reduce_lock);
723*45379ed8SYankun Li 	if (comp_bdev->reduce_thread && comp_bdev->reduce_thread != spdk_get_thread()) {
724*45379ed8SYankun Li 		spdk_thread_send_msg(comp_bdev->reduce_thread,
725*45379ed8SYankun Li 				     _cleanup_vol_unload_cb, comp_bdev);
726*45379ed8SYankun Li 		pthread_mutex_unlock(&comp_bdev->reduce_lock);
727*45379ed8SYankun Li 	} else {
728*45379ed8SYankun Li 		pthread_mutex_unlock(&comp_bdev->reduce_lock);
729*45379ed8SYankun Li 
730*45379ed8SYankun Li 		_cleanup_vol_unload_cb(comp_bdev);
731*45379ed8SYankun Li 	}
7325734decaSYankun Li }
7335734decaSYankun Li 
7345734decaSYankun Li static void
735b3be320dSGangCao _vbdev_reduce_init_cb(void *ctx)
736b3be320dSGangCao {
737f3cda926SYankun Li 	struct vbdev_init_reduce_ctx *init_ctx = ctx;
738f3cda926SYankun Li 	struct vbdev_compress *comp_bdev = init_ctx->comp_bdev;
739*45379ed8SYankun Li 	int rc = init_ctx->status;
740c3ed33f4SShuhei Matsumoto 
741915dc5d6SYankun Li 	assert(comp_bdev->base_desc != NULL);
742b3be320dSGangCao 
743137684a8SShuhei Matsumoto 	/* We're done with metadata operations */
744915dc5d6SYankun Li 	spdk_put_io_channel(comp_bdev->base_ch);
745137684a8SShuhei Matsumoto 
746*45379ed8SYankun Li 	if (rc != 0) {
747*45379ed8SYankun Li 		goto err;
748*45379ed8SYankun Li 	}
749*45379ed8SYankun Li 
750*45379ed8SYankun Li 	assert(comp_bdev->vol != NULL);
751*45379ed8SYankun Li 
752915dc5d6SYankun Li 	rc = vbdev_compress_claim(comp_bdev);
753*45379ed8SYankun Li 	if (rc != 0) {
754*45379ed8SYankun Li 		comp_bdev->init_failed = true;
755*45379ed8SYankun Li 		spdk_reduce_vol_unload(comp_bdev->vol, init_vol_unload_cb, comp_bdev);
756*45379ed8SYankun Li 	}
757*45379ed8SYankun Li 
758f3cda926SYankun Li 	init_ctx->cb_fn(init_ctx->cb_ctx, rc);
759f3cda926SYankun Li 	free(init_ctx);
760c3ed33f4SShuhei Matsumoto 	return;
761b3be320dSGangCao 
762*45379ed8SYankun Li err:
763*45379ed8SYankun Li 	init_ctx->cb_fn(init_ctx->cb_ctx, rc);
764c3ed33f4SShuhei Matsumoto 	/* Close the underlying bdev on its same opened thread. */
765915dc5d6SYankun Li 	spdk_bdev_close(comp_bdev->base_desc);
766915dc5d6SYankun Li 	free(comp_bdev);
767f3cda926SYankun Li 	free(init_ctx);
768c3ed33f4SShuhei Matsumoto }
769c3ed33f4SShuhei Matsumoto 
77007fe6a43SSeth Howell /* Callback from reduce for when init is complete. We'll pass the vbdev_comp struct
77107fe6a43SSeth Howell  * used for initial metadata operations to claim where it will be further filled out
77207fe6a43SSeth Howell  * and added to the global list.
77307fe6a43SSeth Howell  */
77407fe6a43SSeth Howell static void
77507fe6a43SSeth Howell vbdev_reduce_init_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno)
77607fe6a43SSeth Howell {
777f3cda926SYankun Li 	struct vbdev_init_reduce_ctx *init_ctx = cb_arg;
778f3cda926SYankun Li 	struct vbdev_compress *comp_bdev = init_ctx->comp_bdev;
77907fe6a43SSeth Howell 
78007fe6a43SSeth Howell 	if (reduce_errno == 0) {
781915dc5d6SYankun Li 		comp_bdev->vol = vol;
78207fe6a43SSeth Howell 	} else {
78327b81860SYankun Li 		SPDK_ERRLOG("for vol %s, error %s\n",
78427b81860SYankun Li 			    spdk_bdev_get_name(comp_bdev->base_bdev), spdk_strerror(-reduce_errno));
785137684a8SShuhei Matsumoto 	}
786137684a8SShuhei Matsumoto 
787f3cda926SYankun Li 	init_ctx->status = reduce_errno;
788f3cda926SYankun Li 
789915dc5d6SYankun Li 	if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) {
790f3cda926SYankun Li 		spdk_thread_send_msg(comp_bdev->thread, _vbdev_reduce_init_cb, init_ctx);
791137684a8SShuhei Matsumoto 	} else {
792f3cda926SYankun Li 		_vbdev_reduce_init_cb(init_ctx);
79307fe6a43SSeth Howell 	}
79407fe6a43SSeth Howell }
79507fe6a43SSeth Howell 
79607fe6a43SSeth Howell /* Callback for the function used by reduceLib to perform IO to/from the backing device. We just
79707fe6a43SSeth Howell  * call the callback provided by reduceLib when it called the read/write/unmap function and
79807fe6a43SSeth Howell  * free the bdev_io.
79907fe6a43SSeth Howell  */
80007fe6a43SSeth Howell static void
80107fe6a43SSeth Howell comp_reduce_io_cb(struct spdk_bdev_io *bdev_io, bool success, void *arg)
80207fe6a43SSeth Howell {
80307fe6a43SSeth Howell 	struct spdk_reduce_vol_cb_args *cb_args = arg;
80407fe6a43SSeth Howell 	int reduce_errno;
80507fe6a43SSeth Howell 
80607fe6a43SSeth Howell 	if (success) {
80707fe6a43SSeth Howell 		reduce_errno = 0;
80807fe6a43SSeth Howell 	} else {
80907fe6a43SSeth Howell 		reduce_errno = -EIO;
81007fe6a43SSeth Howell 	}
81107fe6a43SSeth Howell 	spdk_bdev_free_io(bdev_io);
81207fe6a43SSeth Howell 	cb_args->cb_fn(cb_args->cb_arg, reduce_errno);
81307fe6a43SSeth Howell }
81407fe6a43SSeth Howell 
81507fe6a43SSeth Howell static void
8164e94e54eSYankun Li _comp_backing_bdev_queue_io_wait(struct vbdev_compress *comp_bdev,
8174e94e54eSYankun Li 				 struct spdk_reduce_backing_io *backing_io)
8184e94e54eSYankun Li {
8194e94e54eSYankun Li 	struct spdk_bdev_io_wait_entry *waitq_entry;
8204e94e54eSYankun Li 	int rc;
8214e94e54eSYankun Li 
8224e94e54eSYankun Li 	waitq_entry = (struct spdk_bdev_io_wait_entry *) &backing_io->user_ctx;
8234e94e54eSYankun Li 	waitq_entry->bdev = spdk_bdev_desc_get_bdev(comp_bdev->base_desc);
8244e94e54eSYankun Li 	waitq_entry->cb_fn = _comp_reduce_resubmit_backing_io;
8254e94e54eSYankun Li 	waitq_entry->cb_arg = backing_io;
8264e94e54eSYankun Li 
8274e94e54eSYankun Li 	rc = spdk_bdev_queue_io_wait(waitq_entry->bdev, comp_bdev->base_ch, waitq_entry);
8284e94e54eSYankun Li 	if (rc) {
8294e94e54eSYankun Li 		SPDK_ERRLOG("Queue io failed in _comp_backing_bdev_queue_io_wait, rc=%d.\n", rc);
8304e94e54eSYankun Li 		assert(false);
8314e94e54eSYankun Li 		backing_io->backing_cb_args->cb_fn(backing_io->backing_cb_args->cb_arg, rc);
8324e94e54eSYankun Li 	}
8334e94e54eSYankun Li }
8344e94e54eSYankun Li 
8354e94e54eSYankun Li static void
836245271b6SYankun Li _comp_backing_bdev_read(struct spdk_reduce_backing_io *backing_io)
83707fe6a43SSeth Howell {
838245271b6SYankun Li 	struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args;
839245271b6SYankun Li 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress,
84007fe6a43SSeth Howell 					   backing_dev);
84107fe6a43SSeth Howell 	int rc;
84207fe6a43SSeth Howell 
84307fe6a43SSeth Howell 	rc = spdk_bdev_readv_blocks(comp_bdev->base_desc, comp_bdev->base_ch,
844245271b6SYankun Li 				    backing_io->iov, backing_io->iovcnt,
845245271b6SYankun Li 				    backing_io->lba, backing_io->lba_count,
84607fe6a43SSeth Howell 				    comp_reduce_io_cb,
847245271b6SYankun Li 				    backing_cb_args);
848245271b6SYankun Li 
84907fe6a43SSeth Howell 	if (rc) {
85007fe6a43SSeth Howell 		if (rc == -ENOMEM) {
8514e94e54eSYankun Li 			_comp_backing_bdev_queue_io_wait(comp_bdev, backing_io);
8524e94e54eSYankun Li 			return;
85307fe6a43SSeth Howell 		} else {
854245271b6SYankun Li 			SPDK_ERRLOG("submitting readv request, rc=%d\n", rc);
85507fe6a43SSeth Howell 		}
856245271b6SYankun Li 		backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc);
85707fe6a43SSeth Howell 	}
85807fe6a43SSeth Howell }
85907fe6a43SSeth Howell 
86007fe6a43SSeth Howell static void
861245271b6SYankun Li _comp_backing_bdev_write(struct spdk_reduce_backing_io  *backing_io)
86207fe6a43SSeth Howell {
863245271b6SYankun Li 	struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args;
864245271b6SYankun Li 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress,
86507fe6a43SSeth Howell 					   backing_dev);
86607fe6a43SSeth Howell 	int rc;
86707fe6a43SSeth Howell 
86807fe6a43SSeth Howell 	rc = spdk_bdev_writev_blocks(comp_bdev->base_desc, comp_bdev->base_ch,
869245271b6SYankun Li 				     backing_io->iov, backing_io->iovcnt,
870245271b6SYankun Li 				     backing_io->lba, backing_io->lba_count,
87107fe6a43SSeth Howell 				     comp_reduce_io_cb,
872245271b6SYankun Li 				     backing_cb_args);
873245271b6SYankun Li 
87407fe6a43SSeth Howell 	if (rc) {
87507fe6a43SSeth Howell 		if (rc == -ENOMEM) {
8764e94e54eSYankun Li 			_comp_backing_bdev_queue_io_wait(comp_bdev, backing_io);
8774e94e54eSYankun Li 			return;
87807fe6a43SSeth Howell 		} else {
879245271b6SYankun Li 			SPDK_ERRLOG("error submitting writev request, rc=%d\n", rc);
88007fe6a43SSeth Howell 		}
881245271b6SYankun Li 		backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc);
88207fe6a43SSeth Howell 	}
88307fe6a43SSeth Howell }
88407fe6a43SSeth Howell 
88507fe6a43SSeth Howell static void
886245271b6SYankun Li _comp_backing_bdev_unmap(struct spdk_reduce_backing_io *backing_io)
88707fe6a43SSeth Howell {
888245271b6SYankun Li 	struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args;
889245271b6SYankun Li 	struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress,
89007fe6a43SSeth Howell 					   backing_dev);
89107fe6a43SSeth Howell 	int rc;
89207fe6a43SSeth Howell 
89307fe6a43SSeth Howell 	rc = spdk_bdev_unmap_blocks(comp_bdev->base_desc, comp_bdev->base_ch,
894245271b6SYankun Li 				    backing_io->lba, backing_io->lba_count,
89507fe6a43SSeth Howell 				    comp_reduce_io_cb,
896245271b6SYankun Li 				    backing_cb_args);
89707fe6a43SSeth Howell 
89807fe6a43SSeth Howell 	if (rc) {
89907fe6a43SSeth Howell 		if (rc == -ENOMEM) {
9004e94e54eSYankun Li 			_comp_backing_bdev_queue_io_wait(comp_bdev, backing_io);
9014e94e54eSYankun Li 			return;
90207fe6a43SSeth Howell 		} else {
903245271b6SYankun Li 			SPDK_ERRLOG("submitting unmap request, rc=%d\n", rc);
90407fe6a43SSeth Howell 		}
905245271b6SYankun Li 		backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc);
906245271b6SYankun Li 	}
907245271b6SYankun Li }
908245271b6SYankun Li 
909245271b6SYankun Li /* This is the function provided to the reduceLib for sending reads/writes/unmaps
910245271b6SYankun Li  * directly to the backing device.
911245271b6SYankun Li  */
912245271b6SYankun Li static void
913245271b6SYankun Li _comp_reduce_submit_backing_io(struct spdk_reduce_backing_io *backing_io)
914245271b6SYankun Li {
915245271b6SYankun Li 	switch (backing_io->backing_io_type) {
916245271b6SYankun Li 	case SPDK_REDUCE_BACKING_IO_WRITE:
917245271b6SYankun Li 		_comp_backing_bdev_write(backing_io);
918245271b6SYankun Li 		break;
919245271b6SYankun Li 	case SPDK_REDUCE_BACKING_IO_READ:
920245271b6SYankun Li 		_comp_backing_bdev_read(backing_io);
921245271b6SYankun Li 		break;
922245271b6SYankun Li 	case SPDK_REDUCE_BACKING_IO_UNMAP:
923245271b6SYankun Li 		_comp_backing_bdev_unmap(backing_io);
924245271b6SYankun Li 		break;
925245271b6SYankun Li 	default:
926245271b6SYankun Li 		SPDK_ERRLOG("Unknown I/O type %d\n", backing_io->backing_io_type);
927245271b6SYankun Li 		backing_io->backing_cb_args->cb_fn(backing_io->backing_cb_args->cb_arg, -EINVAL);
928245271b6SYankun Li 		break;
92907fe6a43SSeth Howell 	}
93007fe6a43SSeth Howell }
93107fe6a43SSeth Howell 
9324e94e54eSYankun Li static void
9334e94e54eSYankun Li _comp_reduce_resubmit_backing_io(void *_backing_io)
9344e94e54eSYankun Li {
9354e94e54eSYankun Li 	struct spdk_reduce_backing_io *backing_io = _backing_io;
9364e94e54eSYankun Li 
9374e94e54eSYankun Li 	_comp_reduce_submit_backing_io(backing_io);
9384e94e54eSYankun Li }
9394e94e54eSYankun Li 
94007fe6a43SSeth Howell /* Called by reduceLib after performing unload vol actions following base bdev hotremove */
94107fe6a43SSeth Howell static void
94207fe6a43SSeth Howell bdev_hotremove_vol_unload_cb(void *cb_arg, int reduce_errno)
94307fe6a43SSeth Howell {
94407fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg;
94507fe6a43SSeth Howell 
94607fe6a43SSeth Howell 	if (reduce_errno) {
94707fe6a43SSeth Howell 		SPDK_ERRLOG("number %d\n", reduce_errno);
94807fe6a43SSeth Howell 	}
94907fe6a43SSeth Howell 
950f4e401eaSpaul luse 	comp_bdev->vol = NULL;
95107fe6a43SSeth Howell 	spdk_bdev_unregister(&comp_bdev->comp_bdev, NULL, NULL);
95207fe6a43SSeth Howell }
95307fe6a43SSeth Howell 
95407fe6a43SSeth Howell static void
955779a6bdfSShuhei Matsumoto vbdev_compress_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find)
95607fe6a43SSeth Howell {
95707fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev, *tmp;
95807fe6a43SSeth Howell 
95907fe6a43SSeth Howell 	TAILQ_FOREACH_SAFE(comp_bdev, &g_vbdev_comp, link, tmp) {
96007fe6a43SSeth Howell 		if (bdev_find == comp_bdev->base_bdev) {
961149f0f7eSpaul luse 			/* Tell reduceLib that we're done with this volume. */
96207fe6a43SSeth Howell 			spdk_reduce_vol_unload(comp_bdev->vol, bdev_hotremove_vol_unload_cb, comp_bdev);
96307fe6a43SSeth Howell 		}
96407fe6a43SSeth Howell 	}
96507fe6a43SSeth Howell }
96607fe6a43SSeth Howell 
967779a6bdfSShuhei Matsumoto /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */
968779a6bdfSShuhei Matsumoto static void
969779a6bdfSShuhei Matsumoto vbdev_compress_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
970779a6bdfSShuhei Matsumoto 				  void *event_ctx)
971779a6bdfSShuhei Matsumoto {
972779a6bdfSShuhei Matsumoto 	switch (type) {
973779a6bdfSShuhei Matsumoto 	case SPDK_BDEV_EVENT_REMOVE:
974779a6bdfSShuhei Matsumoto 		vbdev_compress_base_bdev_hotremove_cb(bdev);
975779a6bdfSShuhei Matsumoto 		break;
976779a6bdfSShuhei Matsumoto 	default:
977779a6bdfSShuhei Matsumoto 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
978779a6bdfSShuhei Matsumoto 		break;
979779a6bdfSShuhei Matsumoto 	}
980779a6bdfSShuhei Matsumoto }
981779a6bdfSShuhei Matsumoto 
98207fe6a43SSeth Howell /* TODO: determine which parms we want user configurable, HC for now
98307fe6a43SSeth Howell  * params.vol_size
98407fe6a43SSeth Howell  * params.chunk_size
98507fe6a43SSeth Howell  * compression PMD, algorithm, window size, comp level, etc.
98607fe6a43SSeth Howell  * DEV_MD_PATH
98707fe6a43SSeth Howell  */
98807fe6a43SSeth Howell 
98907fe6a43SSeth Howell /* Common function for init and load to allocate and populate the minimal
99007fe6a43SSeth Howell  * information for reducelib to init or load.
99107fe6a43SSeth Howell  */
99207fe6a43SSeth Howell struct vbdev_compress *
993ddd4603cSYankun Li _prepare_for_load_init(struct spdk_bdev_desc *bdev_desc, uint32_t lb_size, uint8_t comp_algo,
994ddd4603cSYankun Li 		       uint32_t comp_level)
99507fe6a43SSeth Howell {
996915dc5d6SYankun Li 	struct vbdev_compress *comp_bdev;
997aec30063SShuhei Matsumoto 	struct spdk_bdev *bdev;
99807fe6a43SSeth Howell 
999915dc5d6SYankun Li 	comp_bdev = calloc(1, sizeof(struct vbdev_compress));
1000915dc5d6SYankun Li 	if (comp_bdev == NULL) {
1001915dc5d6SYankun Li 		SPDK_ERRLOG("failed to alloc comp_bdev\n");
100207fe6a43SSeth Howell 		return NULL;
100307fe6a43SSeth Howell 	}
100407fe6a43SSeth Howell 
1005245271b6SYankun Li 	comp_bdev->backing_dev.submit_backing_io = _comp_reduce_submit_backing_io;
1006915dc5d6SYankun Li 	comp_bdev->backing_dev.compress = _comp_reduce_compress;
1007915dc5d6SYankun Li 	comp_bdev->backing_dev.decompress = _comp_reduce_decompress;
100807fe6a43SSeth Howell 
1009915dc5d6SYankun Li 	comp_bdev->base_desc = bdev_desc;
1010aec30063SShuhei Matsumoto 	bdev = spdk_bdev_desc_get_bdev(bdev_desc);
1011915dc5d6SYankun Li 	comp_bdev->base_bdev = bdev;
1012aec30063SShuhei Matsumoto 
1013915dc5d6SYankun Li 	comp_bdev->backing_dev.blocklen = bdev->blocklen;
1014915dc5d6SYankun Li 	comp_bdev->backing_dev.blockcnt = bdev->blockcnt;
101507fe6a43SSeth Howell 
10164e94e54eSYankun Li 	comp_bdev->backing_dev.user_ctx_size = sizeof(struct spdk_bdev_io_wait_entry);
10174e94e54eSYankun Li 
1018ddd4603cSYankun Li 	comp_bdev->comp_algo = comp_algo;
1019ddd4603cSYankun Li 	comp_bdev->comp_level = comp_level;
1020ddd4603cSYankun Li 	comp_bdev->params.comp_algo = comp_algo;
1021ddd4603cSYankun Li 	comp_bdev->params.comp_level = comp_level;
1022915dc5d6SYankun Li 	comp_bdev->params.chunk_size = CHUNK_SIZE;
102362b3b171Spaul luse 	if (lb_size == 0) {
1024915dc5d6SYankun Li 		comp_bdev->params.logical_block_size = bdev->blocklen;
102562b3b171Spaul luse 	} else {
1026915dc5d6SYankun Li 		comp_bdev->params.logical_block_size = lb_size;
102762b3b171Spaul luse 	}
102862b3b171Spaul luse 
1029915dc5d6SYankun Li 	comp_bdev->params.backing_io_unit_size = BACKING_IO_SZ;
1030915dc5d6SYankun Li 	return comp_bdev;
103107fe6a43SSeth Howell }
103207fe6a43SSeth Howell 
103307fe6a43SSeth Howell /* Call reducelib to initialize a new volume */
103407fe6a43SSeth Howell static int
1035ddd4603cSYankun Li vbdev_init_reduce(const char *bdev_name, const char *pm_path, uint32_t lb_size, uint8_t comp_algo,
1036ddd4603cSYankun Li 		  uint32_t comp_level, bdev_compress_create_cb cb_fn, void *cb_arg)
103707fe6a43SSeth Howell {
1038aec30063SShuhei Matsumoto 	struct spdk_bdev_desc *bdev_desc = NULL;
1039f3cda926SYankun Li 	struct vbdev_init_reduce_ctx *init_ctx;
1040915dc5d6SYankun Li 	struct vbdev_compress *comp_bdev;
104107fe6a43SSeth Howell 	int rc;
104207fe6a43SSeth Howell 
1043f3cda926SYankun Li 	init_ctx = calloc(1, sizeof(*init_ctx));
1044f3cda926SYankun Li 	if (init_ctx == NULL) {
1045f3cda926SYankun Li 		SPDK_ERRLOG("failed to alloc init contexts\n");
1046f3cda926SYankun Li 		return - ENOMEM;
1047f3cda926SYankun Li 	}
1048f3cda926SYankun Li 
1049f3cda926SYankun Li 	init_ctx->cb_fn = cb_fn;
1050f3cda926SYankun Li 	init_ctx->cb_ctx = cb_arg;
1051f3cda926SYankun Li 
1052aec30063SShuhei Matsumoto 	rc = spdk_bdev_open_ext(bdev_name, true, vbdev_compress_base_bdev_event_cb,
1053aec30063SShuhei Matsumoto 				NULL, &bdev_desc);
1054aec30063SShuhei Matsumoto 	if (rc) {
1055161f75d5SYankun Li 		SPDK_ERRLOG("could not open bdev %s, error %s\n", bdev_name, spdk_strerror(-rc));
1056f3cda926SYankun Li 		free(init_ctx);
1057aec30063SShuhei Matsumoto 		return rc;
1058aec30063SShuhei Matsumoto 	}
1059aec30063SShuhei Matsumoto 
1060ddd4603cSYankun Li 	comp_bdev = _prepare_for_load_init(bdev_desc, lb_size, comp_algo, comp_level);
1061915dc5d6SYankun Li 	if (comp_bdev == NULL) {
1062f3cda926SYankun Li 		free(init_ctx);
1063aec30063SShuhei Matsumoto 		spdk_bdev_close(bdev_desc);
106407fe6a43SSeth Howell 		return -EINVAL;
106507fe6a43SSeth Howell 	}
106607fe6a43SSeth Howell 
1067f3cda926SYankun Li 	init_ctx->comp_bdev = comp_bdev;
1068f3cda926SYankun Li 
1069b3be320dSGangCao 	/* Save the thread where the base device is opened */
1070915dc5d6SYankun Li 	comp_bdev->thread = spdk_get_thread();
1071b3be320dSGangCao 
1072915dc5d6SYankun Li 	comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc);
107307fe6a43SSeth Howell 
1074915dc5d6SYankun Li 	spdk_reduce_vol_init(&comp_bdev->params, &comp_bdev->backing_dev,
107507fe6a43SSeth Howell 			     pm_path,
107607fe6a43SSeth Howell 			     vbdev_reduce_init_cb,
1077f3cda926SYankun Li 			     init_ctx);
107807fe6a43SSeth Howell 	return 0;
107907fe6a43SSeth Howell }
108007fe6a43SSeth Howell 
108107fe6a43SSeth Howell /* We provide this callback for the SPDK channel code to create a channel using
108207fe6a43SSeth Howell  * the channel struct we provided in our module get_io_channel() entry point. Here
108307fe6a43SSeth Howell  * we get and save off an underlying base channel of the device below us so that
108407fe6a43SSeth Howell  * we can communicate with the base bdev on a per channel basis.  If we needed
108507fe6a43SSeth Howell  * our own poller for this vbdev, we'd register it here.
108607fe6a43SSeth Howell  */
108707fe6a43SSeth Howell static int
108807fe6a43SSeth Howell comp_bdev_ch_create_cb(void *io_device, void *ctx_buf)
108907fe6a43SSeth Howell {
109007fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = io_device;
1091976f8b09Spaul luse 
109282b8dd90SShuhei Matsumoto 	/* Now set the reduce channel if it's not already set. */
109382b8dd90SShuhei Matsumoto 	pthread_mutex_lock(&comp_bdev->reduce_lock);
109482b8dd90SShuhei Matsumoto 	if (comp_bdev->ch_count == 0) {
1095149f0f7eSpaul luse 		/* We use this queue to track outstanding IO in our layer. */
109607fe6a43SSeth Howell 		TAILQ_INIT(&comp_bdev->pending_comp_ios);
109707fe6a43SSeth Howell 
109807fe6a43SSeth Howell 		/* We use this to queue up compression operations as needed. */
109907fe6a43SSeth Howell 		TAILQ_INIT(&comp_bdev->queued_comp_ops);
110007fe6a43SSeth Howell 
110107fe6a43SSeth Howell 		comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc);
110207fe6a43SSeth Howell 		comp_bdev->reduce_thread = spdk_get_thread();
1103bb5083a8Spaul luse 		comp_bdev->accel_channel = spdk_accel_get_io_channel();
110407fe6a43SSeth Howell 	}
110507fe6a43SSeth Howell 	comp_bdev->ch_count++;
110607fe6a43SSeth Howell 	pthread_mutex_unlock(&comp_bdev->reduce_lock);
110707fe6a43SSeth Howell 
110807fe6a43SSeth Howell 	return 0;
110907fe6a43SSeth Howell }
111007fe6a43SSeth Howell 
111107fe6a43SSeth Howell static void
111207fe6a43SSeth Howell _channel_cleanup(struct vbdev_compress *comp_bdev)
111307fe6a43SSeth Howell {
111407fe6a43SSeth Howell 	spdk_put_io_channel(comp_bdev->base_ch);
1115bb5083a8Spaul luse 	spdk_put_io_channel(comp_bdev->accel_channel);
111607fe6a43SSeth Howell 	comp_bdev->reduce_thread = NULL;
111707fe6a43SSeth Howell }
111807fe6a43SSeth Howell 
111907fe6a43SSeth Howell /* Used to reroute destroy_ch to the correct thread */
112007fe6a43SSeth Howell static void
112107fe6a43SSeth Howell _comp_bdev_ch_destroy_cb(void *arg)
112207fe6a43SSeth Howell {
112307fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = arg;
112407fe6a43SSeth Howell 
112507fe6a43SSeth Howell 	pthread_mutex_lock(&comp_bdev->reduce_lock);
112607fe6a43SSeth Howell 	_channel_cleanup(comp_bdev);
112707fe6a43SSeth Howell 	pthread_mutex_unlock(&comp_bdev->reduce_lock);
112807fe6a43SSeth Howell }
112907fe6a43SSeth Howell 
113007fe6a43SSeth Howell /* We provide this callback for the SPDK channel code to destroy a channel
113107fe6a43SSeth Howell  * created with our create callback. We just need to undo anything we did
113207fe6a43SSeth Howell  * when we created. If this bdev used its own poller, we'd unregister it here.
113307fe6a43SSeth Howell  */
113407fe6a43SSeth Howell static void
113507fe6a43SSeth Howell comp_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
113607fe6a43SSeth Howell {
113707fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = io_device;
113807fe6a43SSeth Howell 
113907fe6a43SSeth Howell 	pthread_mutex_lock(&comp_bdev->reduce_lock);
114007fe6a43SSeth Howell 	comp_bdev->ch_count--;
114107fe6a43SSeth Howell 	if (comp_bdev->ch_count == 0) {
114207fe6a43SSeth Howell 		/* Send this request to the thread where the channel was created. */
114307fe6a43SSeth Howell 		if (comp_bdev->reduce_thread != spdk_get_thread()) {
114407fe6a43SSeth Howell 			spdk_thread_send_msg(comp_bdev->reduce_thread,
114507fe6a43SSeth Howell 					     _comp_bdev_ch_destroy_cb, comp_bdev);
114607fe6a43SSeth Howell 		} else {
114707fe6a43SSeth Howell 			_channel_cleanup(comp_bdev);
114807fe6a43SSeth Howell 		}
114907fe6a43SSeth Howell 	}
115007fe6a43SSeth Howell 	pthread_mutex_unlock(&comp_bdev->reduce_lock);
115107fe6a43SSeth Howell }
115207fe6a43SSeth Howell 
1153ddd4603cSYankun Li static int
1154ddd4603cSYankun Li _check_compress_bdev_comp_algo(enum spdk_accel_comp_algo algo, uint32_t comp_level)
1155ddd4603cSYankun Li {
1156ddd4603cSYankun Li 	uint32_t min_level, max_level;
1157ddd4603cSYankun Li 	int rc;
1158ddd4603cSYankun Li 
1159ddd4603cSYankun Li 	rc = spdk_accel_get_compress_level_range(algo, &min_level, &max_level);
1160ddd4603cSYankun Li 	if (rc != 0) {
1161ddd4603cSYankun Li 		return rc;
1162ddd4603cSYankun Li 	}
1163ddd4603cSYankun Li 
1164ddd4603cSYankun Li 	/* If both min_level and max_level are 0, the compression level can be ignored.
1165ddd4603cSYankun Li 	 * The back-end implementation hardcodes the compression level.
1166ddd4603cSYankun Li 	 */
1167ddd4603cSYankun Li 	if (min_level == 0 && max_level == 0) {
1168ddd4603cSYankun Li 		return 0;
1169ddd4603cSYankun Li 	}
1170ddd4603cSYankun Li 
1171ddd4603cSYankun Li 	if (comp_level > max_level || comp_level < min_level) {
1172ddd4603cSYankun Li 		return -EINVAL;
1173ddd4603cSYankun Li 	}
1174ddd4603cSYankun Li 
1175ddd4603cSYankun Li 	return 0;
1176ddd4603cSYankun Li }
1177ddd4603cSYankun Li 
117807fe6a43SSeth Howell /* RPC entry point for compression vbdev creation. */
117907fe6a43SSeth Howell int
1180f3cda926SYankun Li create_compress_bdev(const char *bdev_name, const char *pm_path, uint32_t lb_size,
1181ddd4603cSYankun Li 		     uint8_t comp_algo, uint32_t comp_level,
1182f3cda926SYankun Li 		     bdev_compress_create_cb cb_fn, void *cb_arg)
118307fe6a43SSeth Howell {
118489ee5a13Spaul luse 	struct vbdev_compress *comp_bdev = NULL;
118538b03952SYankun Li 	struct stat info;
1186ddd4603cSYankun Li 	int rc;
118738b03952SYankun Li 
118838b03952SYankun Li 	if (stat(pm_path, &info) != 0) {
118938b03952SYankun Li 		SPDK_ERRLOG("PM path %s does not exist.\n", pm_path);
119038b03952SYankun Li 		return -EINVAL;
119138b03952SYankun Li 	} else if (!S_ISDIR(info.st_mode)) {
119238b03952SYankun Li 		SPDK_ERRLOG("PM path %s is not a directory.\n", pm_path);
119338b03952SYankun Li 		return -EINVAL;
119438b03952SYankun Li 	}
119589ee5a13Spaul luse 
119662b3b171Spaul luse 	if ((lb_size != 0) && (lb_size != LB_SIZE_4K) && (lb_size != LB_SIZE_512B)) {
119762b3b171Spaul luse 		SPDK_ERRLOG("Logical block size must be 512 or 4096\n");
119862b3b171Spaul luse 		return -EINVAL;
119962b3b171Spaul luse 	}
120062b3b171Spaul luse 
1201ddd4603cSYankun Li 	rc = _check_compress_bdev_comp_algo(comp_algo, comp_level);
1202ddd4603cSYankun Li 	if (rc != 0) {
1203ddd4603cSYankun Li 		SPDK_ERRLOG("Compress bdev doesn't support compression algo(%u) or level(%u)\n",
1204ddd4603cSYankun Li 			    comp_algo, comp_level);
1205ddd4603cSYankun Li 		return rc;
1206ddd4603cSYankun Li 	}
1207ddd4603cSYankun Li 
120889ee5a13Spaul luse 	TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) {
120989ee5a13Spaul luse 		if (strcmp(bdev_name, comp_bdev->base_bdev->name) == 0) {
121089ee5a13Spaul luse 			SPDK_ERRLOG("Bass bdev %s already being used for a compress bdev\n", bdev_name);
121189ee5a13Spaul luse 			return -EBUSY;
121289ee5a13Spaul luse 		}
121389ee5a13Spaul luse 	}
1214ddd4603cSYankun Li 	return vbdev_init_reduce(bdev_name, pm_path, lb_size, comp_algo, comp_level, cb_fn, cb_arg);
121507fe6a43SSeth Howell }
121607fe6a43SSeth Howell 
121707fe6a43SSeth Howell static int
121807fe6a43SSeth Howell vbdev_compress_init(void)
121907fe6a43SSeth Howell {
122007fe6a43SSeth Howell 	return 0;
122107fe6a43SSeth Howell }
122207fe6a43SSeth Howell 
122307fe6a43SSeth Howell /* Called when the entire module is being torn down. */
122407fe6a43SSeth Howell static void
122507fe6a43SSeth Howell vbdev_compress_finish(void)
122607fe6a43SSeth Howell {
122707fe6a43SSeth Howell 	/* TODO: unload vol in a future patch */
122807fe6a43SSeth Howell }
122907fe6a43SSeth Howell 
123007fe6a43SSeth Howell /* During init we'll be asked how much memory we'd like passed to us
123107fe6a43SSeth Howell  * in bev_io structures as context. Here's where we specify how
123207fe6a43SSeth Howell  * much context we want per IO.
123307fe6a43SSeth Howell  */
123407fe6a43SSeth Howell static int
123507fe6a43SSeth Howell vbdev_compress_get_ctx_size(void)
123607fe6a43SSeth Howell {
123707fe6a43SSeth Howell 	return sizeof(struct comp_bdev_io);
123807fe6a43SSeth Howell }
123907fe6a43SSeth Howell 
124007fe6a43SSeth Howell /* When we register our bdev this is how we specify our entry points. */
124107fe6a43SSeth Howell static const struct spdk_bdev_fn_table vbdev_compress_fn_table = {
124207fe6a43SSeth Howell 	.destruct		= vbdev_compress_destruct,
124307fe6a43SSeth Howell 	.submit_request		= vbdev_compress_submit_request,
124407fe6a43SSeth Howell 	.io_type_supported	= vbdev_compress_io_type_supported,
124507fe6a43SSeth Howell 	.get_io_channel		= vbdev_compress_get_io_channel,
124607fe6a43SSeth Howell 	.dump_info_json		= vbdev_compress_dump_info_json,
124707fe6a43SSeth Howell 	.write_config_json	= NULL,
124807fe6a43SSeth Howell };
124907fe6a43SSeth Howell 
125007fe6a43SSeth Howell static struct spdk_bdev_module compress_if = {
125107fe6a43SSeth Howell 	.name = "compress",
125207fe6a43SSeth Howell 	.module_init = vbdev_compress_init,
125307fe6a43SSeth Howell 	.get_ctx_size = vbdev_compress_get_ctx_size,
125407fe6a43SSeth Howell 	.examine_disk = vbdev_compress_examine,
125507fe6a43SSeth Howell 	.module_fini = vbdev_compress_finish,
125607fe6a43SSeth Howell 	.config_json = vbdev_compress_config_json
125707fe6a43SSeth Howell };
125807fe6a43SSeth Howell 
125907fe6a43SSeth Howell SPDK_BDEV_MODULE_REGISTER(compress, &compress_if)
126007fe6a43SSeth Howell 
126107fe6a43SSeth Howell static int _set_compbdev_name(struct vbdev_compress *comp_bdev)
126207fe6a43SSeth Howell {
126307fe6a43SSeth Howell 	struct spdk_bdev_alias *aliases;
126407fe6a43SSeth Howell 
126507fe6a43SSeth Howell 	if (!TAILQ_EMPTY(spdk_bdev_get_aliases(comp_bdev->base_bdev))) {
126607fe6a43SSeth Howell 		aliases = TAILQ_FIRST(spdk_bdev_get_aliases(comp_bdev->base_bdev));
1267eabe783cSJiewei Ke 		comp_bdev->comp_bdev.name = spdk_sprintf_alloc("COMP_%s", aliases->alias.name);
126807fe6a43SSeth Howell 		if (!comp_bdev->comp_bdev.name) {
126907fe6a43SSeth Howell 			SPDK_ERRLOG("could not allocate comp_bdev name for alias\n");
127007fe6a43SSeth Howell 			return -ENOMEM;
127107fe6a43SSeth Howell 		}
127207fe6a43SSeth Howell 	} else {
127307fe6a43SSeth Howell 		comp_bdev->comp_bdev.name = spdk_sprintf_alloc("COMP_%s", comp_bdev->base_bdev->name);
127407fe6a43SSeth Howell 		if (!comp_bdev->comp_bdev.name) {
127507fe6a43SSeth Howell 			SPDK_ERRLOG("could not allocate comp_bdev name for unique name\n");
127607fe6a43SSeth Howell 			return -ENOMEM;
127707fe6a43SSeth Howell 		}
127807fe6a43SSeth Howell 	}
127907fe6a43SSeth Howell 	return 0;
128007fe6a43SSeth Howell }
128107fe6a43SSeth Howell 
1282c3ed33f4SShuhei Matsumoto static int
128307fe6a43SSeth Howell vbdev_compress_claim(struct vbdev_compress *comp_bdev)
128407fe6a43SSeth Howell {
1285d83e87f8SKrzysztof Karas 	struct spdk_uuid ns_uuid;
128607fe6a43SSeth Howell 	int rc;
128707fe6a43SSeth Howell 
128807fe6a43SSeth Howell 	if (_set_compbdev_name(comp_bdev)) {
1289c3ed33f4SShuhei Matsumoto 		return -EINVAL;
129007fe6a43SSeth Howell 	}
129107fe6a43SSeth Howell 
129207fe6a43SSeth Howell 	/* Note: some of the fields below will change in the future - for example,
129307fe6a43SSeth Howell 	 * blockcnt specifically will not match (the compressed volume size will
129407fe6a43SSeth Howell 	 * be slightly less than the base bdev size)
129507fe6a43SSeth Howell 	 */
129607fe6a43SSeth Howell 	comp_bdev->comp_bdev.product_name = COMP_BDEV_NAME;
129707fe6a43SSeth Howell 	comp_bdev->comp_bdev.write_cache = comp_bdev->base_bdev->write_cache;
129807fe6a43SSeth Howell 
129907fe6a43SSeth Howell 	comp_bdev->comp_bdev.optimal_io_boundary =
130007fe6a43SSeth Howell 		comp_bdev->params.chunk_size / comp_bdev->params.logical_block_size;
130107fe6a43SSeth Howell 
130207fe6a43SSeth Howell 	comp_bdev->comp_bdev.split_on_optimal_io_boundary = true;
130307fe6a43SSeth Howell 
13040190e71eSSven Breuner 	comp_bdev->comp_bdev.blocklen = comp_bdev->params.logical_block_size;
130507fe6a43SSeth Howell 	comp_bdev->comp_bdev.blockcnt = comp_bdev->params.vol_size / comp_bdev->comp_bdev.blocklen;
130607fe6a43SSeth Howell 	assert(comp_bdev->comp_bdev.blockcnt > 0);
130707fe6a43SSeth Howell 
130807fe6a43SSeth Howell 	/* This is the context that is passed to us when the bdev
130907fe6a43SSeth Howell 	 * layer calls in so we'll save our comp_bdev node here.
131007fe6a43SSeth Howell 	 */
131107fe6a43SSeth Howell 	comp_bdev->comp_bdev.ctxt = comp_bdev;
131207fe6a43SSeth Howell 	comp_bdev->comp_bdev.fn_table = &vbdev_compress_fn_table;
131307fe6a43SSeth Howell 	comp_bdev->comp_bdev.module = &compress_if;
131407fe6a43SSeth Howell 
1315d83e87f8SKrzysztof Karas 	/* Generate UUID based on namespace UUID + base bdev UUID. */
1316d83e87f8SKrzysztof Karas 	spdk_uuid_parse(&ns_uuid, BDEV_COMPRESS_NAMESPACE_UUID);
1317d83e87f8SKrzysztof Karas 	rc = spdk_uuid_generate_sha1(&comp_bdev->comp_bdev.uuid, &ns_uuid,
1318d83e87f8SKrzysztof Karas 				     (const char *)&comp_bdev->base_bdev->uuid, sizeof(struct spdk_uuid));
1319d83e87f8SKrzysztof Karas 	if (rc) {
1320161f75d5SYankun Li 		SPDK_ERRLOG("Unable to generate new UUID for compress bdev, error %s\n", spdk_strerror(-rc));
1321d83e87f8SKrzysztof Karas 		return -EINVAL;
1322d83e87f8SKrzysztof Karas 	}
1323d83e87f8SKrzysztof Karas 
132407fe6a43SSeth Howell 	pthread_mutex_init(&comp_bdev->reduce_lock, NULL);
132507fe6a43SSeth Howell 
1326b3be320dSGangCao 	/* Save the thread where the base device is opened */
1327b3be320dSGangCao 	comp_bdev->thread = spdk_get_thread();
1328b3be320dSGangCao 
132907fe6a43SSeth Howell 	spdk_io_device_register(comp_bdev, comp_bdev_ch_create_cb, comp_bdev_ch_destroy_cb,
133007fe6a43SSeth Howell 				sizeof(struct comp_io_channel),
133107fe6a43SSeth Howell 				comp_bdev->comp_bdev.name);
133207fe6a43SSeth Howell 
133307fe6a43SSeth Howell 	rc = spdk_bdev_module_claim_bdev(comp_bdev->base_bdev, comp_bdev->base_desc,
133407fe6a43SSeth Howell 					 comp_bdev->comp_bdev.module);
133507fe6a43SSeth Howell 	if (rc) {
1336161f75d5SYankun Li 		SPDK_ERRLOG("could not claim bdev %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev),
1337161f75d5SYankun Li 			    spdk_strerror(-rc));
133807fe6a43SSeth Howell 		goto error_claim;
133907fe6a43SSeth Howell 	}
134007fe6a43SSeth Howell 
134107fe6a43SSeth Howell 	rc = spdk_bdev_register(&comp_bdev->comp_bdev);
134207fe6a43SSeth Howell 	if (rc < 0) {
1343161f75d5SYankun Li 		SPDK_ERRLOG("trying to register bdev, error %s\n", spdk_strerror(-rc));
134407fe6a43SSeth Howell 		goto error_bdev_register;
134507fe6a43SSeth Howell 	}
134607fe6a43SSeth Howell 
13473934784dSShuhei Matsumoto 	TAILQ_INSERT_TAIL(&g_vbdev_comp, comp_bdev, link);
13483934784dSShuhei Matsumoto 
134907fe6a43SSeth Howell 	SPDK_NOTICELOG("registered io_device and virtual bdev for: %s\n", comp_bdev->comp_bdev.name);
135007fe6a43SSeth Howell 
1351c3ed33f4SShuhei Matsumoto 	return 0;
1352c3ed33f4SShuhei Matsumoto 
135307fe6a43SSeth Howell 	/* Error cleanup paths. */
135407fe6a43SSeth Howell error_bdev_register:
135507fe6a43SSeth Howell 	spdk_bdev_module_release_bdev(comp_bdev->base_bdev);
135607fe6a43SSeth Howell error_claim:
135707fe6a43SSeth Howell 	spdk_io_device_unregister(comp_bdev, NULL);
135807fe6a43SSeth Howell 	free(comp_bdev->comp_bdev.name);
1359c3ed33f4SShuhei Matsumoto 	return rc;
136007fe6a43SSeth Howell }
136107fe6a43SSeth Howell 
1362fcf8e454SShuhei Matsumoto static void
1363fcf8e454SShuhei Matsumoto _vbdev_compress_delete_done(void *_ctx)
1364fcf8e454SShuhei Matsumoto {
1365fcf8e454SShuhei Matsumoto 	struct vbdev_comp_delete_ctx *ctx = _ctx;
1366fcf8e454SShuhei Matsumoto 
1367fcf8e454SShuhei Matsumoto 	ctx->cb_fn(ctx->cb_arg, ctx->cb_rc);
1368fcf8e454SShuhei Matsumoto 
1369fcf8e454SShuhei Matsumoto 	free(ctx);
1370fcf8e454SShuhei Matsumoto }
1371fcf8e454SShuhei Matsumoto 
1372fcf8e454SShuhei Matsumoto static void
1373fcf8e454SShuhei Matsumoto vbdev_compress_delete_done(void *cb_arg, int bdeverrno)
1374fcf8e454SShuhei Matsumoto {
1375fcf8e454SShuhei Matsumoto 	struct vbdev_comp_delete_ctx *ctx = cb_arg;
1376fcf8e454SShuhei Matsumoto 
1377fcf8e454SShuhei Matsumoto 	ctx->cb_rc = bdeverrno;
1378fcf8e454SShuhei Matsumoto 
1379fcf8e454SShuhei Matsumoto 	if (ctx->orig_thread != spdk_get_thread()) {
1380fcf8e454SShuhei Matsumoto 		spdk_thread_send_msg(ctx->orig_thread, _vbdev_compress_delete_done, ctx);
1381fcf8e454SShuhei Matsumoto 	} else {
1382fcf8e454SShuhei Matsumoto 		_vbdev_compress_delete_done(ctx);
1383fcf8e454SShuhei Matsumoto 	}
1384fcf8e454SShuhei Matsumoto }
1385fcf8e454SShuhei Matsumoto 
138607fe6a43SSeth Howell void
138707fe6a43SSeth Howell bdev_compress_delete(const char *name, spdk_delete_compress_complete cb_fn, void *cb_arg)
138807fe6a43SSeth Howell {
138907fe6a43SSeth Howell 	struct vbdev_compress *comp_bdev = NULL;
1390fcf8e454SShuhei Matsumoto 	struct vbdev_comp_delete_ctx *ctx;
139107fe6a43SSeth Howell 
139207fe6a43SSeth Howell 	TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) {
139307fe6a43SSeth Howell 		if (strcmp(name, comp_bdev->comp_bdev.name) == 0) {
139407fe6a43SSeth Howell 			break;
139507fe6a43SSeth Howell 		}
139607fe6a43SSeth Howell 	}
139707fe6a43SSeth Howell 
139807fe6a43SSeth Howell 	if (comp_bdev == NULL) {
139907fe6a43SSeth Howell 		cb_fn(cb_arg, -ENODEV);
140007fe6a43SSeth Howell 		return;
140107fe6a43SSeth Howell 	}
140207fe6a43SSeth Howell 
1403fcf8e454SShuhei Matsumoto 	ctx = calloc(1, sizeof(*ctx));
1404fcf8e454SShuhei Matsumoto 	if (ctx == NULL) {
1405fcf8e454SShuhei Matsumoto 		SPDK_ERRLOG("Failed to allocate delete context\n");
1406fcf8e454SShuhei Matsumoto 		cb_fn(cb_arg, -ENOMEM);
1407fcf8e454SShuhei Matsumoto 		return;
1408fcf8e454SShuhei Matsumoto 	}
1409fcf8e454SShuhei Matsumoto 
141007fe6a43SSeth Howell 	/* Save these for after the vol is destroyed. */
1411fcf8e454SShuhei Matsumoto 	ctx->cb_fn = cb_fn;
1412fcf8e454SShuhei Matsumoto 	ctx->cb_arg = cb_arg;
1413fcf8e454SShuhei Matsumoto 	ctx->orig_thread = spdk_get_thread();
1414fcf8e454SShuhei Matsumoto 
1415fcf8e454SShuhei Matsumoto 	comp_bdev->delete_ctx = ctx;
141607fe6a43SSeth Howell 
141707fe6a43SSeth Howell 	/* Tell reducelib that we're done with this volume. */
141807fe6a43SSeth Howell 	if (comp_bdev->orphaned == false) {
141907fe6a43SSeth Howell 		spdk_reduce_vol_unload(comp_bdev->vol, delete_vol_unload_cb, comp_bdev);
142007fe6a43SSeth Howell 	} else {
142107fe6a43SSeth Howell 		delete_vol_unload_cb(comp_bdev, 0);
142207fe6a43SSeth Howell 	}
142307fe6a43SSeth Howell }
142407fe6a43SSeth Howell 
1425b3be320dSGangCao static void
14267e10e593SYankun Li _vbdev_reduce_load_unload_cb(void *ctx, int reduce_errno)
14277e10e593SYankun Li {
14287e10e593SYankun Li }
14297e10e593SYankun Li 
14307e10e593SYankun Li static void
1431b3be320dSGangCao _vbdev_reduce_load_cb(void *ctx)
1432b3be320dSGangCao {
1433915dc5d6SYankun Li 	struct vbdev_compress *comp_bdev = ctx;
143407fe6a43SSeth Howell 	int rc;
143507fe6a43SSeth Howell 
1436915dc5d6SYankun Li 	assert(comp_bdev->base_desc != NULL);
1437c3ed33f4SShuhei Matsumoto 
143807fe6a43SSeth Howell 	/* Done with metadata operations */
1439915dc5d6SYankun Li 	spdk_put_io_channel(comp_bdev->base_ch);
144007fe6a43SSeth Howell 
1441915dc5d6SYankun Li 	if (comp_bdev->reduce_errno == 0) {
1442915dc5d6SYankun Li 		rc = vbdev_compress_claim(comp_bdev);
1443c3ed33f4SShuhei Matsumoto 		if (rc != 0) {
14447e10e593SYankun Li 			spdk_reduce_vol_unload(comp_bdev->vol, _vbdev_reduce_load_unload_cb, NULL);
144507fe6a43SSeth Howell 			goto err;
144607fe6a43SSeth Howell 		}
1447915dc5d6SYankun Li 	} else if (comp_bdev->reduce_errno == -ENOENT) {
1448915dc5d6SYankun Li 		if (_set_compbdev_name(comp_bdev)) {
144907fe6a43SSeth Howell 			goto err;
145007fe6a43SSeth Howell 		}
145107fe6a43SSeth Howell 
1452b3be320dSGangCao 		/* Save the thread where the base device is opened */
1453915dc5d6SYankun Li 		comp_bdev->thread = spdk_get_thread();
1454b3be320dSGangCao 
1455915dc5d6SYankun Li 		comp_bdev->comp_bdev.module = &compress_if;
1456915dc5d6SYankun Li 		pthread_mutex_init(&comp_bdev->reduce_lock, NULL);
1457915dc5d6SYankun Li 		rc = spdk_bdev_module_claim_bdev(comp_bdev->base_bdev, comp_bdev->base_desc,
1458915dc5d6SYankun Li 						 comp_bdev->comp_bdev.module);
145907fe6a43SSeth Howell 		if (rc) {
1460915dc5d6SYankun Li 			SPDK_ERRLOG("could not claim bdev %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev),
1461161f75d5SYankun Li 				    spdk_strerror(-rc));
1462915dc5d6SYankun Li 			free(comp_bdev->comp_bdev.name);
146307fe6a43SSeth Howell 			goto err;
146407fe6a43SSeth Howell 		}
146507fe6a43SSeth Howell 
1466915dc5d6SYankun Li 		comp_bdev->orphaned = true;
1467915dc5d6SYankun Li 		TAILQ_INSERT_TAIL(&g_vbdev_comp, comp_bdev, link);
1468d5a0220eSShuhei Matsumoto 	} else {
1469915dc5d6SYankun Li 		if (comp_bdev->reduce_errno != -EILSEQ) {
1470915dc5d6SYankun Li 			SPDK_ERRLOG("for vol %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev),
1471915dc5d6SYankun Li 				    spdk_strerror(-comp_bdev->reduce_errno));
147207fe6a43SSeth Howell 		}
147399e6fe41SShuhei Matsumoto 		goto err;
147407fe6a43SSeth Howell 	}
147507fe6a43SSeth Howell 
147607fe6a43SSeth Howell 	spdk_bdev_module_examine_done(&compress_if);
147799e6fe41SShuhei Matsumoto 	return;
1478d5a0220eSShuhei Matsumoto 
147999e6fe41SShuhei Matsumoto err:
1480c3ed33f4SShuhei Matsumoto 	/* Close the underlying bdev on its same opened thread. */
1481915dc5d6SYankun Li 	spdk_bdev_close(comp_bdev->base_desc);
1482915dc5d6SYankun Li 	free(comp_bdev);
148399e6fe41SShuhei Matsumoto 	spdk_bdev_module_examine_done(&compress_if);
148407fe6a43SSeth Howell }
148507fe6a43SSeth Howell 
148685724ba2SShuhei Matsumoto /* Callback from reduce for then load is complete. We'll pass the vbdev_comp struct
148785724ba2SShuhei Matsumoto  * used for initial metadata operations to claim where it will be further filled out
148885724ba2SShuhei Matsumoto  * and added to the global list.
148985724ba2SShuhei Matsumoto  */
149085724ba2SShuhei Matsumoto static void
149185724ba2SShuhei Matsumoto vbdev_reduce_load_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno)
149285724ba2SShuhei Matsumoto {
1493915dc5d6SYankun Li 	struct vbdev_compress *comp_bdev = cb_arg;
149485724ba2SShuhei Matsumoto 
149585724ba2SShuhei Matsumoto 	if (reduce_errno == 0) {
149685724ba2SShuhei Matsumoto 		/* Update information following volume load. */
1497915dc5d6SYankun Li 		comp_bdev->vol = vol;
1498915dc5d6SYankun Li 		memcpy(&comp_bdev->params, spdk_reduce_vol_get_params(vol),
149985724ba2SShuhei Matsumoto 		       sizeof(struct spdk_reduce_vol_params));
1500ddd4603cSYankun Li 		comp_bdev->comp_algo = comp_bdev->params.comp_algo;
1501ddd4603cSYankun Li 		comp_bdev->comp_level = comp_bdev->params.comp_level;
150285724ba2SShuhei Matsumoto 	}
150385724ba2SShuhei Matsumoto 
1504915dc5d6SYankun Li 	comp_bdev->reduce_errno = reduce_errno;
150585724ba2SShuhei Matsumoto 
1506915dc5d6SYankun Li 	if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) {
1507915dc5d6SYankun Li 		spdk_thread_send_msg(comp_bdev->thread, _vbdev_reduce_load_cb, comp_bdev);
150885724ba2SShuhei Matsumoto 	} else {
1509915dc5d6SYankun Li 		_vbdev_reduce_load_cb(comp_bdev);
151085724ba2SShuhei Matsumoto 	}
151185724ba2SShuhei Matsumoto 
151285724ba2SShuhei Matsumoto }
151385724ba2SShuhei Matsumoto 
151407fe6a43SSeth Howell /* Examine_disk entry point: will do a metadata load to see if this is ours,
151507fe6a43SSeth Howell  * and if so will go ahead and claim it.
151607fe6a43SSeth Howell  */
151707fe6a43SSeth Howell static void
151807fe6a43SSeth Howell vbdev_compress_examine(struct spdk_bdev *bdev)
151907fe6a43SSeth Howell {
1520aec30063SShuhei Matsumoto 	struct spdk_bdev_desc *bdev_desc = NULL;
1521915dc5d6SYankun Li 	struct vbdev_compress *comp_bdev;
152207fe6a43SSeth Howell 	int rc;
152307fe6a43SSeth Howell 
152407fe6a43SSeth Howell 	if (strcmp(bdev->product_name, COMP_BDEV_NAME) == 0) {
152507fe6a43SSeth Howell 		spdk_bdev_module_examine_done(&compress_if);
152607fe6a43SSeth Howell 		return;
152707fe6a43SSeth Howell 	}
152807fe6a43SSeth Howell 
1529aec30063SShuhei Matsumoto 	rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false,
1530aec30063SShuhei Matsumoto 				vbdev_compress_base_bdev_event_cb, NULL, &bdev_desc);
1531aec30063SShuhei Matsumoto 	if (rc) {
1532161f75d5SYankun Li 		SPDK_ERRLOG("could not open bdev %s, error %s\n", spdk_bdev_get_name(bdev),
1533161f75d5SYankun Li 			    spdk_strerror(-rc));
153407fe6a43SSeth Howell 		spdk_bdev_module_examine_done(&compress_if);
153507fe6a43SSeth Howell 		return;
153607fe6a43SSeth Howell 	}
153707fe6a43SSeth Howell 
1538ddd4603cSYankun Li 	comp_bdev = _prepare_for_load_init(bdev_desc, 0, SPDK_ACCEL_COMP_ALGO_DEFLATE, 1);
1539915dc5d6SYankun Li 	if (comp_bdev == NULL) {
1540aec30063SShuhei Matsumoto 		spdk_bdev_close(bdev_desc);
154107fe6a43SSeth Howell 		spdk_bdev_module_examine_done(&compress_if);
154207fe6a43SSeth Howell 		return;
154307fe6a43SSeth Howell 	}
154407fe6a43SSeth Howell 
1545b3be320dSGangCao 	/* Save the thread where the base device is opened */
1546915dc5d6SYankun Li 	comp_bdev->thread = spdk_get_thread();
1547b3be320dSGangCao 
1548915dc5d6SYankun Li 	comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc);
1549915dc5d6SYankun Li 	spdk_reduce_vol_load(&comp_bdev->backing_dev, vbdev_reduce_load_cb, comp_bdev);
155007fe6a43SSeth Howell }
155107fe6a43SSeth Howell 
15522172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(vbdev_compress)
1553