1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2018 Intel Corporation. 307fe6a43SSeth Howell * All rights reserved. 48d48071aSAlexey Marchuk * Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 507fe6a43SSeth Howell */ 607fe6a43SSeth Howell 707fe6a43SSeth Howell #include "vbdev_compress.h" 807fe6a43SSeth Howell 907fe6a43SSeth Howell #include "spdk/reduce.h" 1007fe6a43SSeth Howell #include "spdk/stdinc.h" 1107fe6a43SSeth Howell #include "spdk/rpc.h" 1207fe6a43SSeth Howell #include "spdk/env.h" 1307fe6a43SSeth Howell #include "spdk/endian.h" 1407fe6a43SSeth Howell #include "spdk/string.h" 1507fe6a43SSeth Howell #include "spdk/thread.h" 1607fe6a43SSeth Howell #include "spdk/util.h" 1707fe6a43SSeth Howell #include "spdk/bdev_module.h" 18ce49d2f9SAlexey Marchuk #include "spdk/likely.h" 194e8e97c8STomasz Zawadzki #include "spdk/log.h" 20bb5083a8Spaul luse #include "spdk/accel.h" 2107fe6a43SSeth Howell 225d2d59beSKonrad Sztyber #include "spdk/accel_module.h" 23ec2e6e2bSpaul luse 2407fe6a43SSeth Howell #define CHUNK_SIZE (1024 * 16) 2507fe6a43SSeth Howell #define COMP_BDEV_NAME "compress" 2607fe6a43SSeth Howell #define BACKING_IO_SZ (4 * 1024) 2707fe6a43SSeth Howell 28d83e87f8SKrzysztof Karas /* This namespace UUID was generated using uuid_generate() method. */ 29d83e87f8SKrzysztof Karas #define BDEV_COMPRESS_NAMESPACE_UUID "c3fad6da-832f-4cc0-9cdc-5c552b225e7b" 30d83e87f8SKrzysztof Karas 31fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx { 32fcf8e454SShuhei Matsumoto spdk_delete_compress_complete cb_fn; 33fcf8e454SShuhei Matsumoto void *cb_arg; 34fcf8e454SShuhei Matsumoto int cb_rc; 35fcf8e454SShuhei Matsumoto struct spdk_thread *orig_thread; 36fcf8e454SShuhei Matsumoto }; 37fcf8e454SShuhei Matsumoto 3807fe6a43SSeth Howell /* List of virtual bdevs and associated info for each. */ 3907fe6a43SSeth Howell struct vbdev_compress { 4007fe6a43SSeth Howell struct spdk_bdev *base_bdev; /* the thing we're attaching to */ 4107fe6a43SSeth Howell struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 4207fe6a43SSeth Howell struct spdk_io_channel *base_ch; /* IO channel of base device */ 4307fe6a43SSeth Howell struct spdk_bdev comp_bdev; /* the compression virtual bdev */ 4407fe6a43SSeth Howell struct comp_io_channel *comp_ch; /* channel associated with this bdev */ 45bb5083a8Spaul luse struct spdk_io_channel *accel_channel; /* to communicate with the accel framework */ 4607fe6a43SSeth Howell struct spdk_thread *reduce_thread; 4707fe6a43SSeth Howell pthread_mutex_t reduce_lock; 4807fe6a43SSeth Howell uint32_t ch_count; 4907fe6a43SSeth Howell TAILQ_HEAD(, spdk_bdev_io) pending_comp_ios; /* outstanding operations to a comp library */ 5007fe6a43SSeth Howell struct spdk_poller *poller; /* completion poller */ 5107fe6a43SSeth Howell struct spdk_reduce_vol_params params; /* params for the reduce volume */ 5207fe6a43SSeth Howell struct spdk_reduce_backing_dev backing_dev; /* backing device info for the reduce volume */ 5307fe6a43SSeth Howell struct spdk_reduce_vol *vol; /* the reduce volume */ 54fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx *delete_ctx; 5507fe6a43SSeth Howell bool orphaned; /* base bdev claimed but comp_bdev not registered */ 5685724ba2SShuhei Matsumoto int reduce_errno; 5707fe6a43SSeth Howell TAILQ_HEAD(, vbdev_comp_op) queued_comp_ops; 5807fe6a43SSeth Howell TAILQ_ENTRY(vbdev_compress) link; 59b3be320dSGangCao struct spdk_thread *thread; /* thread where base device is opened */ 60ddd4603cSYankun Li enum spdk_accel_comp_algo comp_algo; /* compression algorithm for compress bdev */ 61ddd4603cSYankun Li uint32_t comp_level; /* compression algorithm level */ 62*45379ed8SYankun Li bool init_failed; /* compress bdev initialization failed */ 6307fe6a43SSeth Howell }; 6407fe6a43SSeth Howell static TAILQ_HEAD(, vbdev_compress) g_vbdev_comp = TAILQ_HEAD_INITIALIZER(g_vbdev_comp); 6507fe6a43SSeth Howell 6607fe6a43SSeth Howell /* The comp vbdev channel struct. It is allocated and freed on my behalf by the io channel code. 6707fe6a43SSeth Howell */ 6807fe6a43SSeth Howell struct comp_io_channel { 6907fe6a43SSeth Howell struct spdk_io_channel_iter *iter; /* used with for_each_channel in reset */ 7007fe6a43SSeth Howell }; 7107fe6a43SSeth Howell 7207fe6a43SSeth Howell /* Per I/O context for the compression vbdev. */ 7307fe6a43SSeth Howell struct comp_bdev_io { 7407fe6a43SSeth Howell struct comp_io_channel *comp_ch; /* used in completion handling */ 7507fe6a43SSeth Howell struct vbdev_compress *comp_bdev; /* vbdev associated with this IO */ 7607fe6a43SSeth Howell struct spdk_bdev_io_wait_entry bdev_io_wait; /* for bdev_io_wait */ 7707fe6a43SSeth Howell struct spdk_bdev_io *orig_io; /* the original IO */ 7807fe6a43SSeth Howell int status; /* save for completion on orig thread */ 7907fe6a43SSeth Howell }; 8007fe6a43SSeth Howell 8107fe6a43SSeth Howell static void vbdev_compress_examine(struct spdk_bdev *bdev); 82c3ed33f4SShuhei Matsumoto static int vbdev_compress_claim(struct vbdev_compress *comp_bdev); 83ddd4603cSYankun Li struct vbdev_compress *_prepare_for_load_init(struct spdk_bdev_desc *bdev_desc, uint32_t lb_size, 84ddd4603cSYankun Li uint8_t comp_algo, uint32_t comp_level); 8507fe6a43SSeth Howell static void vbdev_compress_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); 8607fe6a43SSeth Howell static void comp_bdev_ch_destroy_cb(void *io_device, void *ctx_buf); 87fcf8e454SShuhei Matsumoto static void vbdev_compress_delete_done(void *cb_arg, int bdeverrno); 884e94e54eSYankun Li static void _comp_reduce_resubmit_backing_io(void *_backing_io); 8907fe6a43SSeth Howell 9007fe6a43SSeth Howell /* for completing rw requests on the orig IO thread. */ 9107fe6a43SSeth Howell static void 92347499e7SSeth Howell _reduce_rw_blocks_cb(void *arg) 9307fe6a43SSeth Howell { 9407fe6a43SSeth Howell struct comp_bdev_io *io_ctx = arg; 9507fe6a43SSeth Howell 96ce49d2f9SAlexey Marchuk if (spdk_likely(io_ctx->status == 0)) { 9707fe6a43SSeth Howell spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS); 98ce49d2f9SAlexey Marchuk } else if (io_ctx->status == -ENOMEM) { 993b207588SYankun Li spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_NOMEM); 10007fe6a43SSeth Howell } else { 1015251fc2dSYankun Li SPDK_ERRLOG("Failed to execute reduce api. %s\n", spdk_strerror(-io_ctx->status)); 10207fe6a43SSeth Howell spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_FAILED); 10307fe6a43SSeth Howell } 10407fe6a43SSeth Howell } 10507fe6a43SSeth Howell 10607fe6a43SSeth Howell /* Completion callback for r/w that were issued via reducelib. */ 10707fe6a43SSeth Howell static void 108347499e7SSeth Howell reduce_rw_blocks_cb(void *arg, int reduce_errno) 10907fe6a43SSeth Howell { 11007fe6a43SSeth Howell struct spdk_bdev_io *bdev_io = arg; 11107fe6a43SSeth Howell struct comp_bdev_io *io_ctx = (struct comp_bdev_io *)bdev_io->driver_ctx; 11207fe6a43SSeth Howell struct spdk_io_channel *ch = spdk_io_channel_from_ctx(io_ctx->comp_ch); 11329252a48SShuhei Matsumoto struct spdk_thread *orig_thread; 11407fe6a43SSeth Howell 11507fe6a43SSeth Howell /* TODO: need to decide which error codes are bdev_io success vs failure; 11607fe6a43SSeth Howell * example examine calls reading metadata */ 11707fe6a43SSeth Howell 11807fe6a43SSeth Howell io_ctx->status = reduce_errno; 11907fe6a43SSeth Howell 12007fe6a43SSeth Howell /* Send this request to the orig IO thread. */ 12129252a48SShuhei Matsumoto orig_thread = spdk_io_channel_get_thread(ch); 1225f270928SJohn Levon 1235f270928SJohn Levon spdk_thread_exec_msg(orig_thread, _reduce_rw_blocks_cb, io_ctx); 12407fe6a43SSeth Howell } 12507fe6a43SSeth Howell 126bfd7fcb8SAlexey Marchuk static int 12707fe6a43SSeth Howell _compress_operation(struct spdk_reduce_backing_dev *backing_dev, struct iovec *src_iovs, 12807fe6a43SSeth Howell int src_iovcnt, struct iovec *dst_iovs, 12907fe6a43SSeth Howell int dst_iovcnt, bool compress, void *cb_arg) 13007fe6a43SSeth Howell { 131bb5083a8Spaul luse struct spdk_reduce_vol_cb_args *reduce_cb_arg = cb_arg; 13207fe6a43SSeth Howell struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_dev, struct vbdev_compress, 13307fe6a43SSeth Howell backing_dev); 134bb5083a8Spaul luse int rc; 13507fe6a43SSeth Howell 136bb5083a8Spaul luse if (compress) { 137bb5083a8Spaul luse assert(dst_iovcnt == 1); 138ddd4603cSYankun Li rc = spdk_accel_submit_compress_ext(comp_bdev->accel_channel, dst_iovs[0].iov_base, 139ddd4603cSYankun Li dst_iovs[0].iov_len, src_iovs, src_iovcnt, 140ddd4603cSYankun Li comp_bdev->comp_algo, comp_bdev->comp_level, 141ddd4603cSYankun Li &reduce_cb_arg->output_size, reduce_cb_arg->cb_fn, 142ddd4603cSYankun Li reduce_cb_arg->cb_arg); 143f530abcaSAlexey Marchuk } else { 144ddd4603cSYankun Li rc = spdk_accel_submit_decompress_ext(comp_bdev->accel_channel, dst_iovs, dst_iovcnt, 145ddd4603cSYankun Li src_iovs, src_iovcnt, comp_bdev->comp_algo, 146ddd4603cSYankun Li &reduce_cb_arg->output_size, reduce_cb_arg->cb_fn, 147ddd4603cSYankun Li reduce_cb_arg->cb_arg); 148f530abcaSAlexey Marchuk } 14907fe6a43SSeth Howell 150bfd7fcb8SAlexey Marchuk return rc; 151bfd7fcb8SAlexey Marchuk } 1527c77c292Spaul luse 15307fe6a43SSeth Howell /* Entry point for reduce lib to issue a compress operation. */ 15407fe6a43SSeth Howell static void 15507fe6a43SSeth Howell _comp_reduce_compress(struct spdk_reduce_backing_dev *dev, 15607fe6a43SSeth Howell struct iovec *src_iovs, int src_iovcnt, 15707fe6a43SSeth Howell struct iovec *dst_iovs, int dst_iovcnt, 15807fe6a43SSeth Howell struct spdk_reduce_vol_cb_args *cb_arg) 15907fe6a43SSeth Howell { 16007fe6a43SSeth Howell int rc; 16107fe6a43SSeth Howell 16207fe6a43SSeth Howell rc = _compress_operation(dev, src_iovs, src_iovcnt, dst_iovs, dst_iovcnt, true, cb_arg); 16307fe6a43SSeth Howell if (rc) { 16407fe6a43SSeth Howell SPDK_ERRLOG("with compress operation code %d (%s)\n", rc, spdk_strerror(-rc)); 16507fe6a43SSeth Howell cb_arg->cb_fn(cb_arg->cb_arg, rc); 16607fe6a43SSeth Howell } 16707fe6a43SSeth Howell } 16807fe6a43SSeth Howell 16907fe6a43SSeth Howell /* Entry point for reduce lib to issue a decompress operation. */ 17007fe6a43SSeth Howell static void 17107fe6a43SSeth Howell _comp_reduce_decompress(struct spdk_reduce_backing_dev *dev, 17207fe6a43SSeth Howell struct iovec *src_iovs, int src_iovcnt, 17307fe6a43SSeth Howell struct iovec *dst_iovs, int dst_iovcnt, 17407fe6a43SSeth Howell struct spdk_reduce_vol_cb_args *cb_arg) 17507fe6a43SSeth Howell { 17607fe6a43SSeth Howell int rc; 17707fe6a43SSeth Howell 17807fe6a43SSeth Howell rc = _compress_operation(dev, src_iovs, src_iovcnt, dst_iovs, dst_iovcnt, false, cb_arg); 17907fe6a43SSeth Howell if (rc) { 18007fe6a43SSeth Howell SPDK_ERRLOG("with decompress operation code %d (%s)\n", rc, spdk_strerror(-rc)); 18107fe6a43SSeth Howell cb_arg->cb_fn(cb_arg->cb_arg, rc); 18207fe6a43SSeth Howell } 18307fe6a43SSeth Howell } 18407fe6a43SSeth Howell 18510cb404aSKonrad Sztyber static void 18610cb404aSKonrad Sztyber _comp_submit_write(void *ctx) 18710cb404aSKonrad Sztyber { 18810cb404aSKonrad Sztyber struct spdk_bdev_io *bdev_io = ctx; 18910cb404aSKonrad Sztyber struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 19010cb404aSKonrad Sztyber comp_bdev); 19110cb404aSKonrad Sztyber 19210cb404aSKonrad Sztyber spdk_reduce_vol_writev(comp_bdev->vol, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 19310cb404aSKonrad Sztyber bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 19410cb404aSKonrad Sztyber reduce_rw_blocks_cb, bdev_io); 19510cb404aSKonrad Sztyber } 19610cb404aSKonrad Sztyber 19710cb404aSKonrad Sztyber static void 19810cb404aSKonrad Sztyber _comp_submit_read(void *ctx) 19910cb404aSKonrad Sztyber { 20010cb404aSKonrad Sztyber struct spdk_bdev_io *bdev_io = ctx; 20110cb404aSKonrad Sztyber struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 20210cb404aSKonrad Sztyber comp_bdev); 20310cb404aSKonrad Sztyber 20410cb404aSKonrad Sztyber spdk_reduce_vol_readv(comp_bdev->vol, bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 20510cb404aSKonrad Sztyber bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks, 20610cb404aSKonrad Sztyber reduce_rw_blocks_cb, bdev_io); 20710cb404aSKonrad Sztyber } 20810cb404aSKonrad Sztyber 20910cb404aSKonrad Sztyber 21007fe6a43SSeth Howell /* Callback for getting a buf from the bdev pool in the event that the caller passed 21107fe6a43SSeth Howell * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module 21207fe6a43SSeth Howell * beneath us before we're done with it. 21307fe6a43SSeth Howell */ 21407fe6a43SSeth Howell static void 21507fe6a43SSeth Howell comp_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 21607fe6a43SSeth Howell { 21707fe6a43SSeth Howell struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 21807fe6a43SSeth Howell comp_bdev); 21907fe6a43SSeth Howell 220ce49d2f9SAlexey Marchuk if (spdk_unlikely(!success)) { 221ce49d2f9SAlexey Marchuk SPDK_ERRLOG("Failed to get data buffer\n"); 222ce49d2f9SAlexey Marchuk reduce_rw_blocks_cb(bdev_io, -ENOMEM); 223ce49d2f9SAlexey Marchuk return; 224ce49d2f9SAlexey Marchuk } 225ce49d2f9SAlexey Marchuk 22610cb404aSKonrad Sztyber spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_read, bdev_io); 227ce49d2f9SAlexey Marchuk } 22807fe6a43SSeth Howell 229412fced1SYalong Wang struct partial_chunk_info { 230412fced1SYalong Wang uint64_t chunk_idx; 231412fced1SYalong Wang uint64_t block_offset; 232412fced1SYalong Wang uint64_t block_length; 233412fced1SYalong Wang }; 234412fced1SYalong Wang 235412fced1SYalong Wang /* 236412fced1SYalong Wang * It's a structure used to hold information needed during the execution of an unmap operation. 237412fced1SYalong Wang */ 238412fced1SYalong Wang struct compress_unmap_split_ctx { 239412fced1SYalong Wang struct spdk_bdev_io *bdev_io; 240412fced1SYalong Wang int32_t status; 241412fced1SYalong Wang uint32_t logical_blocks_per_chunk; 242412fced1SYalong Wang /* The first chunk that can be fully covered by the unmap bdevio interval */ 243412fced1SYalong Wang uint64_t full_chunk_idx_b; 244412fced1SYalong Wang /* The last chunk that can be fully covered by the unmap bdevio interval */ 245412fced1SYalong Wang uint64_t full_chunk_idx_e; 246412fced1SYalong Wang uint64_t num_full_chunks; 247412fced1SYalong Wang uint64_t num_full_chunks_consumed; 248412fced1SYalong Wang uint32_t num_partial_chunks; 249412fced1SYalong Wang uint32_t num_partial_chunks_consumed; 250412fced1SYalong Wang /* Used to hold the partial chunk information. There will only be less than or equal to two, 251412fced1SYalong Wang because chunks that cannot be fully covered will only appear at the beginning or end or both two. */ 252412fced1SYalong Wang struct partial_chunk_info partial_chunk_info[2]; 253412fced1SYalong Wang }; 254412fced1SYalong Wang 255412fced1SYalong Wang static void _comp_unmap_subcmd_done_cb(void *ctx, int error); 256412fced1SYalong Wang 257412fced1SYalong Wang /* 258412fced1SYalong Wang * This function processes the unmap operation for both full and partial chunks in a 259412fced1SYalong Wang * compressed block device. It iteratively submits unmap requests until all the chunks 260412fced1SYalong Wang * have been unmapped or an error occurs. 261412fced1SYalong Wang */ 262412fced1SYalong Wang static void 263412fced1SYalong Wang _comp_submit_unmap_split(void *ctx) 264412fced1SYalong Wang { 265412fced1SYalong Wang struct compress_unmap_split_ctx *split_ctx = ctx; 266412fced1SYalong Wang struct spdk_bdev_io *bdev_io = split_ctx->bdev_io; 267412fced1SYalong Wang struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 268412fced1SYalong Wang comp_bdev); 269412fced1SYalong Wang struct partial_chunk_info *partial_chunk = NULL; 270412fced1SYalong Wang uint64_t chunk_idx = 0; 271412fced1SYalong Wang uint64_t block_offset = 0; 272412fced1SYalong Wang uint64_t block_length = 0; 273412fced1SYalong Wang 274412fced1SYalong Wang if (split_ctx->status != 0 || 275412fced1SYalong Wang (split_ctx->num_full_chunks_consumed == split_ctx->num_full_chunks && 276412fced1SYalong Wang split_ctx->num_partial_chunks_consumed == split_ctx->num_partial_chunks)) { 277412fced1SYalong Wang reduce_rw_blocks_cb(bdev_io, split_ctx->status); 278412fced1SYalong Wang free(split_ctx); 279412fced1SYalong Wang return; 280412fced1SYalong Wang } 281412fced1SYalong Wang 282412fced1SYalong Wang if (split_ctx->num_full_chunks_consumed < split_ctx->num_full_chunks) { 283412fced1SYalong Wang chunk_idx = split_ctx->full_chunk_idx_b + split_ctx->num_full_chunks_consumed; 284412fced1SYalong Wang block_offset = chunk_idx * split_ctx->logical_blocks_per_chunk; 285412fced1SYalong Wang block_length = split_ctx->logical_blocks_per_chunk; 286412fced1SYalong Wang 287412fced1SYalong Wang split_ctx->num_full_chunks_consumed++; 288412fced1SYalong Wang spdk_reduce_vol_unmap(comp_bdev->vol, 289412fced1SYalong Wang block_offset, block_length, 290412fced1SYalong Wang _comp_unmap_subcmd_done_cb, split_ctx); 291412fced1SYalong Wang } else if (split_ctx->num_partial_chunks_consumed < split_ctx->num_partial_chunks) { 292412fced1SYalong Wang partial_chunk = &split_ctx->partial_chunk_info[split_ctx->num_partial_chunks_consumed]; 293412fced1SYalong Wang block_offset = partial_chunk->chunk_idx * split_ctx->logical_blocks_per_chunk + 294412fced1SYalong Wang partial_chunk->block_offset; 295412fced1SYalong Wang block_length = partial_chunk->block_length; 296412fced1SYalong Wang 297412fced1SYalong Wang split_ctx->num_partial_chunks_consumed++; 298412fced1SYalong Wang spdk_reduce_vol_unmap(comp_bdev->vol, 299412fced1SYalong Wang block_offset, block_length, 300412fced1SYalong Wang _comp_unmap_subcmd_done_cb, split_ctx); 301412fced1SYalong Wang } else { 302412fced1SYalong Wang assert(false); 303412fced1SYalong Wang } 304412fced1SYalong Wang } 305412fced1SYalong Wang 306412fced1SYalong Wang /* 307412fced1SYalong Wang * When mkfs or fstrim, large unmap requests may be generated. 308412fced1SYalong Wang * Large request will be split into multiple subcmds and processed recursively. 309412fced1SYalong Wang * Run too many subcmds recursively may cause stack overflow or monopolize the thread, 310412fced1SYalong Wang * delaying other tasks. To avoid this, next subcmd need to be processed asynchronously 311412fced1SYalong Wang * by 'spdk_thread_send_msg'. 312412fced1SYalong Wang */ 313412fced1SYalong Wang static void 314412fced1SYalong Wang _comp_unmap_subcmd_done_cb(void *ctx, int error) 315412fced1SYalong Wang { 316412fced1SYalong Wang struct compress_unmap_split_ctx *split_ctx = ctx; 317412fced1SYalong Wang 318412fced1SYalong Wang split_ctx->status = error; 319412fced1SYalong Wang spdk_thread_send_msg(spdk_get_thread(), _comp_submit_unmap_split, split_ctx); 320412fced1SYalong Wang } 321412fced1SYalong Wang 322412fced1SYalong Wang /* 323412fced1SYalong Wang * This function splits the unmap operation into full and partial chunks based on the 324412fced1SYalong Wang * block range specified in the 'spdk_bdev_io' structure. It calculates the start and end 325412fced1SYalong Wang * chunks, as well as any partial chunks at the beginning or end of the range, and prepares 326412fced1SYalong Wang * a context (compress_unmap_split_ctx) to handle these chunks. The unmap operation is 327412fced1SYalong Wang * then submitted for processing through '_comp_submit_unmap_split'. 328412fced1SYalong Wang * some cases to handle: 329412fced1SYalong Wang * 1. start and end chunks are different 330412fced1SYalong Wang * 1.1 start and end chunks are full 331412fced1SYalong Wang * 1.2 start and end chunks are partial 332412fced1SYalong Wang * 1.3 start or end chunk is full and the other is partial 333412fced1SYalong Wang * 2. start and end chunks are the same 334412fced1SYalong Wang * 2.1 full 335412fced1SYalong Wang * 2.2 partial 336412fced1SYalong Wang */ 337412fced1SYalong Wang static void 338412fced1SYalong Wang _comp_submit_unmap(void *ctx) 339412fced1SYalong Wang { 340412fced1SYalong Wang struct spdk_bdev_io *bdev_io = ctx; 341412fced1SYalong Wang struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 342412fced1SYalong Wang comp_bdev); 343412fced1SYalong Wang const struct spdk_reduce_vol_params *vol_params = spdk_reduce_vol_get_params(comp_bdev->vol); 344412fced1SYalong Wang struct compress_unmap_split_ctx *split_ctx; 345412fced1SYalong Wang struct partial_chunk_info *partial_chunk; 346412fced1SYalong Wang uint32_t logical_blocks_per_chunk; 347412fced1SYalong Wang uint64_t start_chunk, end_chunk, start_offset, end_tail; 348412fced1SYalong Wang 349412fced1SYalong Wang logical_blocks_per_chunk = vol_params->chunk_size / vol_params->logical_block_size; 350412fced1SYalong Wang start_chunk = bdev_io->u.bdev.offset_blocks / logical_blocks_per_chunk; 351412fced1SYalong Wang end_chunk = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) / 352412fced1SYalong Wang logical_blocks_per_chunk; 353412fced1SYalong Wang start_offset = bdev_io->u.bdev.offset_blocks % logical_blocks_per_chunk; 354412fced1SYalong Wang end_tail = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks) % 355412fced1SYalong Wang logical_blocks_per_chunk; 356412fced1SYalong Wang 357412fced1SYalong Wang split_ctx = calloc(1, sizeof(struct compress_unmap_split_ctx)); 358412fced1SYalong Wang if (split_ctx == NULL) { 359412fced1SYalong Wang reduce_rw_blocks_cb(bdev_io, -ENOMEM); 360412fced1SYalong Wang return; 361412fced1SYalong Wang } 362412fced1SYalong Wang partial_chunk = split_ctx->partial_chunk_info; 363412fced1SYalong Wang split_ctx->bdev_io = bdev_io; 364412fced1SYalong Wang split_ctx->logical_blocks_per_chunk = logical_blocks_per_chunk; 365412fced1SYalong Wang 366412fced1SYalong Wang if (start_chunk < end_chunk) { 367412fced1SYalong Wang if (start_offset != 0) { 368412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].chunk_idx = start_chunk; 369412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].block_offset = start_offset; 370412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].block_length = logical_blocks_per_chunk 371412fced1SYalong Wang - start_offset; 372412fced1SYalong Wang split_ctx->num_partial_chunks++; 373412fced1SYalong Wang split_ctx->full_chunk_idx_b = start_chunk + 1; 374412fced1SYalong Wang } else { 375412fced1SYalong Wang split_ctx->full_chunk_idx_b = start_chunk; 376412fced1SYalong Wang } 377412fced1SYalong Wang 378412fced1SYalong Wang if (end_tail != 0) { 379412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].chunk_idx = end_chunk; 380412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].block_offset = 0; 381412fced1SYalong Wang partial_chunk[split_ctx->num_partial_chunks].block_length = end_tail; 382412fced1SYalong Wang split_ctx->num_partial_chunks++; 383412fced1SYalong Wang split_ctx->full_chunk_idx_e = end_chunk - 1; 384412fced1SYalong Wang } else { 385412fced1SYalong Wang split_ctx->full_chunk_idx_e = end_chunk; 386412fced1SYalong Wang } 387412fced1SYalong Wang 388412fced1SYalong Wang split_ctx->num_full_chunks = end_chunk - start_chunk + 1 - split_ctx->num_partial_chunks; 389412fced1SYalong Wang 390412fced1SYalong Wang if (split_ctx->num_full_chunks) { 391412fced1SYalong Wang assert(split_ctx->full_chunk_idx_b != UINT64_MAX && split_ctx->full_chunk_idx_e != UINT64_MAX); 392412fced1SYalong Wang assert(split_ctx->full_chunk_idx_e - split_ctx->full_chunk_idx_b + 1 == split_ctx->num_full_chunks); 393412fced1SYalong Wang } else { 394412fced1SYalong Wang assert(split_ctx->full_chunk_idx_b - split_ctx->full_chunk_idx_e == 1); 395412fced1SYalong Wang } 396412fced1SYalong Wang } else if (start_offset != 0 || end_tail != 0) { 397412fced1SYalong Wang partial_chunk[0].chunk_idx = start_chunk; 398412fced1SYalong Wang partial_chunk[0].block_offset = start_offset; 399412fced1SYalong Wang partial_chunk[0].block_length = 400412fced1SYalong Wang bdev_io->u.bdev.num_blocks; 401412fced1SYalong Wang split_ctx->num_partial_chunks = 1; 402412fced1SYalong Wang } else { 403412fced1SYalong Wang split_ctx->full_chunk_idx_b = start_chunk; 404412fced1SYalong Wang split_ctx->full_chunk_idx_e = end_chunk; 405412fced1SYalong Wang split_ctx->num_full_chunks = 1; 406412fced1SYalong Wang } 407412fced1SYalong Wang assert(split_ctx->num_partial_chunks <= SPDK_COUNTOF(split_ctx->partial_chunk_info)); 408412fced1SYalong Wang 409412fced1SYalong Wang _comp_submit_unmap_split(split_ctx); 410412fced1SYalong Wang } 411412fced1SYalong Wang 41207fe6a43SSeth Howell /* Called when someone above submits IO to this vbdev. */ 41307fe6a43SSeth Howell static void 41407fe6a43SSeth Howell vbdev_compress_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 41507fe6a43SSeth Howell { 41607fe6a43SSeth Howell struct comp_bdev_io *io_ctx = (struct comp_bdev_io *)bdev_io->driver_ctx; 41707fe6a43SSeth Howell struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_compress, 41807fe6a43SSeth Howell comp_bdev); 41907fe6a43SSeth Howell struct comp_io_channel *comp_ch = spdk_io_channel_get_ctx(ch); 42007fe6a43SSeth Howell 42107fe6a43SSeth Howell memset(io_ctx, 0, sizeof(struct comp_bdev_io)); 42207fe6a43SSeth Howell io_ctx->comp_bdev = comp_bdev; 42307fe6a43SSeth Howell io_ctx->comp_ch = comp_ch; 42407fe6a43SSeth Howell io_ctx->orig_io = bdev_io; 42507fe6a43SSeth Howell 42610cb404aSKonrad Sztyber switch (bdev_io->type) { 42710cb404aSKonrad Sztyber case SPDK_BDEV_IO_TYPE_READ: 42810cb404aSKonrad Sztyber spdk_bdev_io_get_buf(bdev_io, comp_read_get_buf_cb, 42910cb404aSKonrad Sztyber bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 43010cb404aSKonrad Sztyber return; 43110cb404aSKonrad Sztyber case SPDK_BDEV_IO_TYPE_WRITE: 43210cb404aSKonrad Sztyber spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_write, bdev_io); 43310cb404aSKonrad Sztyber return; 434412fced1SYalong Wang case SPDK_BDEV_IO_TYPE_UNMAP: 435412fced1SYalong Wang spdk_thread_exec_msg(comp_bdev->reduce_thread, _comp_submit_unmap, bdev_io); 436412fced1SYalong Wang return; 43710cb404aSKonrad Sztyber /* TODO support RESET in future patch in the series */ 43810cb404aSKonrad Sztyber case SPDK_BDEV_IO_TYPE_RESET: 43910cb404aSKonrad Sztyber case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 44010cb404aSKonrad Sztyber case SPDK_BDEV_IO_TYPE_FLUSH: 44110cb404aSKonrad Sztyber default: 44210cb404aSKonrad Sztyber SPDK_ERRLOG("Unknown I/O type %d\n", bdev_io->type); 44310cb404aSKonrad Sztyber spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_FAILED); 44410cb404aSKonrad Sztyber break; 44507fe6a43SSeth Howell } 44607fe6a43SSeth Howell } 44707fe6a43SSeth Howell 44807fe6a43SSeth Howell static bool 44907fe6a43SSeth Howell vbdev_compress_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 45007fe6a43SSeth Howell { 45107fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 45207fe6a43SSeth Howell 45307fe6a43SSeth Howell switch (io_type) { 45407fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_READ: 45507fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE: 45607fe6a43SSeth Howell return spdk_bdev_io_type_supported(comp_bdev->base_bdev, io_type); 45707fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_UNMAP: 458412fced1SYalong Wang return true; 45907fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_RESET: 46007fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_FLUSH: 46107fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 46207fe6a43SSeth Howell default: 46307fe6a43SSeth Howell return false; 46407fe6a43SSeth Howell } 46507fe6a43SSeth Howell } 46607fe6a43SSeth Howell 46707fe6a43SSeth Howell /* Callback for unregistering the IO device. */ 46807fe6a43SSeth Howell static void 46907fe6a43SSeth Howell _device_unregister_cb(void *io_device) 47007fe6a43SSeth Howell { 47107fe6a43SSeth Howell struct vbdev_compress *comp_bdev = io_device; 47207fe6a43SSeth Howell 47307fe6a43SSeth Howell /* Done with this comp_bdev. */ 47407fe6a43SSeth Howell pthread_mutex_destroy(&comp_bdev->reduce_lock); 47507fe6a43SSeth Howell free(comp_bdev->comp_bdev.name); 47607fe6a43SSeth Howell free(comp_bdev); 47707fe6a43SSeth Howell } 47807fe6a43SSeth Howell 47907fe6a43SSeth Howell static void 480b3be320dSGangCao _vbdev_compress_destruct_cb(void *ctx) 481b3be320dSGangCao { 48231d26015SShuhei Matsumoto struct vbdev_compress *comp_bdev = ctx; 483b3be320dSGangCao 48431d26015SShuhei Matsumoto /* Close the underlying bdev on its same opened thread. */ 48531d26015SShuhei Matsumoto spdk_bdev_close(comp_bdev->base_desc); 48631d26015SShuhei Matsumoto comp_bdev->vol = NULL; 487*45379ed8SYankun Li if (comp_bdev->init_failed) { 488*45379ed8SYankun Li free(comp_bdev); 489*45379ed8SYankun Li return; 490*45379ed8SYankun Li } 491*45379ed8SYankun Li 492*45379ed8SYankun Li TAILQ_REMOVE(&g_vbdev_comp, comp_bdev, link); 493*45379ed8SYankun Li spdk_bdev_module_release_bdev(comp_bdev->base_bdev); 494*45379ed8SYankun Li 49531d26015SShuhei Matsumoto if (comp_bdev->orphaned == false) { 49631d26015SShuhei Matsumoto spdk_io_device_unregister(comp_bdev, _device_unregister_cb); 49731d26015SShuhei Matsumoto } else { 49831d26015SShuhei Matsumoto vbdev_compress_delete_done(comp_bdev->delete_ctx, 0); 49931d26015SShuhei Matsumoto _device_unregister_cb(comp_bdev); 50031d26015SShuhei Matsumoto } 501b3be320dSGangCao } 502b3be320dSGangCao 503b3be320dSGangCao static void 50407fe6a43SSeth Howell vbdev_compress_destruct_cb(void *cb_arg, int reduce_errno) 50507fe6a43SSeth Howell { 50607fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg; 50707fe6a43SSeth Howell 50807fe6a43SSeth Howell if (reduce_errno) { 50907fe6a43SSeth Howell SPDK_ERRLOG("number %d\n", reduce_errno); 51007fe6a43SSeth Howell } else { 511b3be320dSGangCao if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) { 51231d26015SShuhei Matsumoto spdk_thread_send_msg(comp_bdev->thread, 51331d26015SShuhei Matsumoto _vbdev_compress_destruct_cb, comp_bdev); 514b3be320dSGangCao } else { 51531d26015SShuhei Matsumoto _vbdev_compress_destruct_cb(comp_bdev); 51607fe6a43SSeth Howell } 51707fe6a43SSeth Howell } 51807fe6a43SSeth Howell } 51907fe6a43SSeth Howell 52007fe6a43SSeth Howell static void 52107fe6a43SSeth Howell _reduce_destroy_cb(void *ctx, int reduce_errno) 52207fe6a43SSeth Howell { 52307fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 52407fe6a43SSeth Howell 52507fe6a43SSeth Howell if (reduce_errno) { 52607fe6a43SSeth Howell SPDK_ERRLOG("number %d\n", reduce_errno); 52707fe6a43SSeth Howell } 52807fe6a43SSeth Howell 52907fe6a43SSeth Howell comp_bdev->vol = NULL; 53007fe6a43SSeth Howell spdk_put_io_channel(comp_bdev->base_ch); 531*45379ed8SYankun Li if (comp_bdev->init_failed || comp_bdev->orphaned) { 532*45379ed8SYankun Li vbdev_compress_destruct_cb((void *)comp_bdev, 0); 533*45379ed8SYankun Li } else { 534fcf8e454SShuhei Matsumoto spdk_bdev_unregister(&comp_bdev->comp_bdev, vbdev_compress_delete_done, 535fcf8e454SShuhei Matsumoto comp_bdev->delete_ctx); 53607fe6a43SSeth Howell } 53707fe6a43SSeth Howell 53807fe6a43SSeth Howell } 53907fe6a43SSeth Howell 5406a98b18fSShuhei Matsumoto static void 5416a98b18fSShuhei Matsumoto _delete_vol_unload_cb(void *ctx) 5426a98b18fSShuhei Matsumoto { 5436a98b18fSShuhei Matsumoto struct vbdev_compress *comp_bdev = ctx; 5446a98b18fSShuhei Matsumoto 5451960ef16SJosh Soref /* FIXME: Assert if these conditions are not satisfied for now. */ 5466a98b18fSShuhei Matsumoto assert(!comp_bdev->reduce_thread || 5476a98b18fSShuhei Matsumoto comp_bdev->reduce_thread == spdk_get_thread()); 5486a98b18fSShuhei Matsumoto 5496a98b18fSShuhei Matsumoto /* reducelib needs a channel to comm with the backing device */ 5506a98b18fSShuhei Matsumoto comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc); 5516a98b18fSShuhei Matsumoto 5526a98b18fSShuhei Matsumoto /* Clean the device before we free our resources. */ 5536a98b18fSShuhei Matsumoto spdk_reduce_vol_destroy(&comp_bdev->backing_dev, _reduce_destroy_cb, comp_bdev); 5546a98b18fSShuhei Matsumoto } 5556a98b18fSShuhei Matsumoto 55607fe6a43SSeth Howell /* Called by reduceLib after performing unload vol actions */ 55707fe6a43SSeth Howell static void 55807fe6a43SSeth Howell delete_vol_unload_cb(void *cb_arg, int reduce_errno) 55907fe6a43SSeth Howell { 56007fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg; 56107fe6a43SSeth Howell 56207fe6a43SSeth Howell if (reduce_errno) { 56301b28622SYankun Li SPDK_ERRLOG("Failed to unload vol, error %s\n", spdk_strerror(-reduce_errno)); 56401b28622SYankun Li vbdev_compress_delete_done(comp_bdev->delete_ctx, reduce_errno); 5656a98b18fSShuhei Matsumoto return; 5666a98b18fSShuhei Matsumoto } 56707fe6a43SSeth Howell 5686a98b18fSShuhei Matsumoto pthread_mutex_lock(&comp_bdev->reduce_lock); 5696a98b18fSShuhei Matsumoto if (comp_bdev->reduce_thread && comp_bdev->reduce_thread != spdk_get_thread()) { 5706a98b18fSShuhei Matsumoto spdk_thread_send_msg(comp_bdev->reduce_thread, 5716a98b18fSShuhei Matsumoto _delete_vol_unload_cb, comp_bdev); 5726a98b18fSShuhei Matsumoto pthread_mutex_unlock(&comp_bdev->reduce_lock); 5736a98b18fSShuhei Matsumoto } else { 5746a98b18fSShuhei Matsumoto pthread_mutex_unlock(&comp_bdev->reduce_lock); 5756a98b18fSShuhei Matsumoto 5766a98b18fSShuhei Matsumoto _delete_vol_unload_cb(comp_bdev); 57707fe6a43SSeth Howell } 57807fe6a43SSeth Howell } 57907fe6a43SSeth Howell 58007fe6a43SSeth Howell const char * 58107fe6a43SSeth Howell compress_get_name(const struct vbdev_compress *comp_bdev) 58207fe6a43SSeth Howell { 58307fe6a43SSeth Howell return comp_bdev->comp_bdev.name; 58407fe6a43SSeth Howell } 58507fe6a43SSeth Howell 58607fe6a43SSeth Howell struct vbdev_compress * 58707fe6a43SSeth Howell compress_bdev_first(void) 58807fe6a43SSeth Howell { 58907fe6a43SSeth Howell struct vbdev_compress *comp_bdev; 59007fe6a43SSeth Howell 59107fe6a43SSeth Howell comp_bdev = TAILQ_FIRST(&g_vbdev_comp); 59207fe6a43SSeth Howell 59307fe6a43SSeth Howell return comp_bdev; 59407fe6a43SSeth Howell } 59507fe6a43SSeth Howell 59607fe6a43SSeth Howell struct vbdev_compress * 59707fe6a43SSeth Howell compress_bdev_next(struct vbdev_compress *prev) 59807fe6a43SSeth Howell { 59907fe6a43SSeth Howell struct vbdev_compress *comp_bdev; 60007fe6a43SSeth Howell 60107fe6a43SSeth Howell comp_bdev = TAILQ_NEXT(prev, link); 60207fe6a43SSeth Howell 60307fe6a43SSeth Howell return comp_bdev; 60407fe6a43SSeth Howell } 60507fe6a43SSeth Howell 60607fe6a43SSeth Howell bool 60707fe6a43SSeth Howell compress_has_orphan(const char *name) 60807fe6a43SSeth Howell { 60907fe6a43SSeth Howell struct vbdev_compress *comp_bdev; 61007fe6a43SSeth Howell 61107fe6a43SSeth Howell TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) { 61207fe6a43SSeth Howell if (comp_bdev->orphaned && strcmp(name, comp_bdev->comp_bdev.name) == 0) { 61307fe6a43SSeth Howell return true; 61407fe6a43SSeth Howell } 61507fe6a43SSeth Howell } 61607fe6a43SSeth Howell return false; 61707fe6a43SSeth Howell } 61807fe6a43SSeth Howell 61907fe6a43SSeth Howell /* Called after we've unregistered following a hot remove callback. 62007fe6a43SSeth Howell * Our finish entry point will be called next. 62107fe6a43SSeth Howell */ 62207fe6a43SSeth Howell static int 62307fe6a43SSeth Howell vbdev_compress_destruct(void *ctx) 62407fe6a43SSeth Howell { 62507fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 62607fe6a43SSeth Howell 62707fe6a43SSeth Howell if (comp_bdev->vol != NULL) { 62807fe6a43SSeth Howell /* Tell reducelib that we're done with this volume. */ 62907fe6a43SSeth Howell spdk_reduce_vol_unload(comp_bdev->vol, vbdev_compress_destruct_cb, comp_bdev); 63007fe6a43SSeth Howell } else { 63107fe6a43SSeth Howell vbdev_compress_destruct_cb(comp_bdev, 0); 63207fe6a43SSeth Howell } 63307fe6a43SSeth Howell 63407fe6a43SSeth Howell return 0; 63507fe6a43SSeth Howell } 63607fe6a43SSeth Howell 63707fe6a43SSeth Howell /* We supplied this as an entry point for upper layers who want to communicate to this 63807fe6a43SSeth Howell * bdev. This is how they get a channel. 63907fe6a43SSeth Howell */ 64007fe6a43SSeth Howell static struct spdk_io_channel * 64107fe6a43SSeth Howell vbdev_compress_get_io_channel(void *ctx) 64207fe6a43SSeth Howell { 64307fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 64407fe6a43SSeth Howell 64507fe6a43SSeth Howell /* The IO channel code will allocate a channel for us which consists of 64607fe6a43SSeth Howell * the SPDK channel structure plus the size of our comp_io_channel struct 64707fe6a43SSeth Howell * that we passed in when we registered our IO device. It will then call 64807fe6a43SSeth Howell * our channel create callback to populate any elements that we need to 64907fe6a43SSeth Howell * update. 65007fe6a43SSeth Howell */ 65107fe6a43SSeth Howell return spdk_get_io_channel(comp_bdev); 65207fe6a43SSeth Howell } 65307fe6a43SSeth Howell 6542c49e910SMaciej Wawryk /* This is the output for bdev_get_bdevs() for this vbdev */ 65507fe6a43SSeth Howell static int 65607fe6a43SSeth Howell vbdev_compress_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 65707fe6a43SSeth Howell { 65807fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 659d70bfa13SYankun Li const struct spdk_reduce_vol_info *vol_info; 66078f92084SYankun Li char *comp_algo = NULL; 66178f92084SYankun Li 66278f92084SYankun Li if (comp_bdev->params.comp_algo == SPDK_ACCEL_COMP_ALGO_LZ4) { 66378f92084SYankun Li comp_algo = "lz4"; 66478f92084SYankun Li } else if (comp_bdev->params.comp_algo == SPDK_ACCEL_COMP_ALGO_DEFLATE) { 66578f92084SYankun Li comp_algo = "deflate"; 66678f92084SYankun Li } else { 66778f92084SYankun Li assert(false); 66878f92084SYankun Li } 66907fe6a43SSeth Howell 67007fe6a43SSeth Howell spdk_json_write_name(w, "compress"); 67107fe6a43SSeth Howell spdk_json_write_object_begin(w); 67207fe6a43SSeth Howell spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&comp_bdev->comp_bdev)); 67307fe6a43SSeth Howell spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(comp_bdev->base_bdev)); 67489648519SYankun Li spdk_json_write_named_string(w, "pm_path", spdk_reduce_vol_get_pm_path(comp_bdev->vol)); 67578f92084SYankun Li spdk_json_write_named_string(w, "comp_algo", comp_algo); 67678f92084SYankun Li spdk_json_write_named_uint32(w, "comp_level", comp_bdev->params.comp_level); 67778f92084SYankun Li spdk_json_write_named_uint32(w, "chunk_size", comp_bdev->params.chunk_size); 67878f92084SYankun Li spdk_json_write_named_uint32(w, "backing_io_unit_size", comp_bdev->params.backing_io_unit_size); 679d70bfa13SYankun Li vol_info = spdk_reduce_vol_get_info(comp_bdev->vol); 680d70bfa13SYankun Li spdk_json_write_named_uint64(w, "allocated_io_units", vol_info->allocated_io_units); 68107fe6a43SSeth Howell spdk_json_write_object_end(w); 68207fe6a43SSeth Howell 68307fe6a43SSeth Howell return 0; 68407fe6a43SSeth Howell } 68507fe6a43SSeth Howell 68607fe6a43SSeth Howell static int 68707fe6a43SSeth Howell vbdev_compress_config_json(struct spdk_json_write_ctx *w) 68807fe6a43SSeth Howell { 689adbac36fSYankun Li /* Nothing to dump as compress bdev configuration is saved on physical device. */ 69007fe6a43SSeth Howell return 0; 69107fe6a43SSeth Howell } 69207fe6a43SSeth Howell 693f3cda926SYankun Li struct vbdev_init_reduce_ctx { 694f3cda926SYankun Li struct vbdev_compress *comp_bdev; 695f3cda926SYankun Li int status; 696f3cda926SYankun Li bdev_compress_create_cb cb_fn; 697f3cda926SYankun Li void *cb_ctx; 698f3cda926SYankun Li }; 699f3cda926SYankun Li 700b3be320dSGangCao static void 701*45379ed8SYankun Li _cleanup_vol_unload_cb(void *ctx) 7025734decaSYankun Li { 703*45379ed8SYankun Li struct vbdev_compress *comp_bdev = ctx; 704*45379ed8SYankun Li 705*45379ed8SYankun Li assert(!comp_bdev->reduce_thread || 706*45379ed8SYankun Li comp_bdev->reduce_thread == spdk_get_thread()); 707*45379ed8SYankun Li 708*45379ed8SYankun Li comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc); 709*45379ed8SYankun Li 710*45379ed8SYankun Li spdk_reduce_vol_destroy(&comp_bdev->backing_dev, _reduce_destroy_cb, comp_bdev); 711*45379ed8SYankun Li } 712*45379ed8SYankun Li 713*45379ed8SYankun Li static void 714*45379ed8SYankun Li init_vol_unload_cb(void *ctx, int reduce_errno) 715*45379ed8SYankun Li { 716*45379ed8SYankun Li struct vbdev_compress *comp_bdev = (struct vbdev_compress *)ctx; 717*45379ed8SYankun Li 718*45379ed8SYankun Li if (reduce_errno) { 719*45379ed8SYankun Li SPDK_ERRLOG("Failed to unload vol, error %s\n", spdk_strerror(-reduce_errno)); 720*45379ed8SYankun Li } 721*45379ed8SYankun Li 722*45379ed8SYankun Li pthread_mutex_lock(&comp_bdev->reduce_lock); 723*45379ed8SYankun Li if (comp_bdev->reduce_thread && comp_bdev->reduce_thread != spdk_get_thread()) { 724*45379ed8SYankun Li spdk_thread_send_msg(comp_bdev->reduce_thread, 725*45379ed8SYankun Li _cleanup_vol_unload_cb, comp_bdev); 726*45379ed8SYankun Li pthread_mutex_unlock(&comp_bdev->reduce_lock); 727*45379ed8SYankun Li } else { 728*45379ed8SYankun Li pthread_mutex_unlock(&comp_bdev->reduce_lock); 729*45379ed8SYankun Li 730*45379ed8SYankun Li _cleanup_vol_unload_cb(comp_bdev); 731*45379ed8SYankun Li } 7325734decaSYankun Li } 7335734decaSYankun Li 7345734decaSYankun Li static void 735b3be320dSGangCao _vbdev_reduce_init_cb(void *ctx) 736b3be320dSGangCao { 737f3cda926SYankun Li struct vbdev_init_reduce_ctx *init_ctx = ctx; 738f3cda926SYankun Li struct vbdev_compress *comp_bdev = init_ctx->comp_bdev; 739*45379ed8SYankun Li int rc = init_ctx->status; 740c3ed33f4SShuhei Matsumoto 741915dc5d6SYankun Li assert(comp_bdev->base_desc != NULL); 742b3be320dSGangCao 743137684a8SShuhei Matsumoto /* We're done with metadata operations */ 744915dc5d6SYankun Li spdk_put_io_channel(comp_bdev->base_ch); 745137684a8SShuhei Matsumoto 746*45379ed8SYankun Li if (rc != 0) { 747*45379ed8SYankun Li goto err; 748*45379ed8SYankun Li } 749*45379ed8SYankun Li 750*45379ed8SYankun Li assert(comp_bdev->vol != NULL); 751*45379ed8SYankun Li 752915dc5d6SYankun Li rc = vbdev_compress_claim(comp_bdev); 753*45379ed8SYankun Li if (rc != 0) { 754*45379ed8SYankun Li comp_bdev->init_failed = true; 755*45379ed8SYankun Li spdk_reduce_vol_unload(comp_bdev->vol, init_vol_unload_cb, comp_bdev); 756*45379ed8SYankun Li } 757*45379ed8SYankun Li 758f3cda926SYankun Li init_ctx->cb_fn(init_ctx->cb_ctx, rc); 759f3cda926SYankun Li free(init_ctx); 760c3ed33f4SShuhei Matsumoto return; 761b3be320dSGangCao 762*45379ed8SYankun Li err: 763*45379ed8SYankun Li init_ctx->cb_fn(init_ctx->cb_ctx, rc); 764c3ed33f4SShuhei Matsumoto /* Close the underlying bdev on its same opened thread. */ 765915dc5d6SYankun Li spdk_bdev_close(comp_bdev->base_desc); 766915dc5d6SYankun Li free(comp_bdev); 767f3cda926SYankun Li free(init_ctx); 768c3ed33f4SShuhei Matsumoto } 769c3ed33f4SShuhei Matsumoto 77007fe6a43SSeth Howell /* Callback from reduce for when init is complete. We'll pass the vbdev_comp struct 77107fe6a43SSeth Howell * used for initial metadata operations to claim where it will be further filled out 77207fe6a43SSeth Howell * and added to the global list. 77307fe6a43SSeth Howell */ 77407fe6a43SSeth Howell static void 77507fe6a43SSeth Howell vbdev_reduce_init_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno) 77607fe6a43SSeth Howell { 777f3cda926SYankun Li struct vbdev_init_reduce_ctx *init_ctx = cb_arg; 778f3cda926SYankun Li struct vbdev_compress *comp_bdev = init_ctx->comp_bdev; 77907fe6a43SSeth Howell 78007fe6a43SSeth Howell if (reduce_errno == 0) { 781915dc5d6SYankun Li comp_bdev->vol = vol; 78207fe6a43SSeth Howell } else { 78327b81860SYankun Li SPDK_ERRLOG("for vol %s, error %s\n", 78427b81860SYankun Li spdk_bdev_get_name(comp_bdev->base_bdev), spdk_strerror(-reduce_errno)); 785137684a8SShuhei Matsumoto } 786137684a8SShuhei Matsumoto 787f3cda926SYankun Li init_ctx->status = reduce_errno; 788f3cda926SYankun Li 789915dc5d6SYankun Li if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) { 790f3cda926SYankun Li spdk_thread_send_msg(comp_bdev->thread, _vbdev_reduce_init_cb, init_ctx); 791137684a8SShuhei Matsumoto } else { 792f3cda926SYankun Li _vbdev_reduce_init_cb(init_ctx); 79307fe6a43SSeth Howell } 79407fe6a43SSeth Howell } 79507fe6a43SSeth Howell 79607fe6a43SSeth Howell /* Callback for the function used by reduceLib to perform IO to/from the backing device. We just 79707fe6a43SSeth Howell * call the callback provided by reduceLib when it called the read/write/unmap function and 79807fe6a43SSeth Howell * free the bdev_io. 79907fe6a43SSeth Howell */ 80007fe6a43SSeth Howell static void 80107fe6a43SSeth Howell comp_reduce_io_cb(struct spdk_bdev_io *bdev_io, bool success, void *arg) 80207fe6a43SSeth Howell { 80307fe6a43SSeth Howell struct spdk_reduce_vol_cb_args *cb_args = arg; 80407fe6a43SSeth Howell int reduce_errno; 80507fe6a43SSeth Howell 80607fe6a43SSeth Howell if (success) { 80707fe6a43SSeth Howell reduce_errno = 0; 80807fe6a43SSeth Howell } else { 80907fe6a43SSeth Howell reduce_errno = -EIO; 81007fe6a43SSeth Howell } 81107fe6a43SSeth Howell spdk_bdev_free_io(bdev_io); 81207fe6a43SSeth Howell cb_args->cb_fn(cb_args->cb_arg, reduce_errno); 81307fe6a43SSeth Howell } 81407fe6a43SSeth Howell 81507fe6a43SSeth Howell static void 8164e94e54eSYankun Li _comp_backing_bdev_queue_io_wait(struct vbdev_compress *comp_bdev, 8174e94e54eSYankun Li struct spdk_reduce_backing_io *backing_io) 8184e94e54eSYankun Li { 8194e94e54eSYankun Li struct spdk_bdev_io_wait_entry *waitq_entry; 8204e94e54eSYankun Li int rc; 8214e94e54eSYankun Li 8224e94e54eSYankun Li waitq_entry = (struct spdk_bdev_io_wait_entry *) &backing_io->user_ctx; 8234e94e54eSYankun Li waitq_entry->bdev = spdk_bdev_desc_get_bdev(comp_bdev->base_desc); 8244e94e54eSYankun Li waitq_entry->cb_fn = _comp_reduce_resubmit_backing_io; 8254e94e54eSYankun Li waitq_entry->cb_arg = backing_io; 8264e94e54eSYankun Li 8274e94e54eSYankun Li rc = spdk_bdev_queue_io_wait(waitq_entry->bdev, comp_bdev->base_ch, waitq_entry); 8284e94e54eSYankun Li if (rc) { 8294e94e54eSYankun Li SPDK_ERRLOG("Queue io failed in _comp_backing_bdev_queue_io_wait, rc=%d.\n", rc); 8304e94e54eSYankun Li assert(false); 8314e94e54eSYankun Li backing_io->backing_cb_args->cb_fn(backing_io->backing_cb_args->cb_arg, rc); 8324e94e54eSYankun Li } 8334e94e54eSYankun Li } 8344e94e54eSYankun Li 8354e94e54eSYankun Li static void 836245271b6SYankun Li _comp_backing_bdev_read(struct spdk_reduce_backing_io *backing_io) 83707fe6a43SSeth Howell { 838245271b6SYankun Li struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args; 839245271b6SYankun Li struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress, 84007fe6a43SSeth Howell backing_dev); 84107fe6a43SSeth Howell int rc; 84207fe6a43SSeth Howell 84307fe6a43SSeth Howell rc = spdk_bdev_readv_blocks(comp_bdev->base_desc, comp_bdev->base_ch, 844245271b6SYankun Li backing_io->iov, backing_io->iovcnt, 845245271b6SYankun Li backing_io->lba, backing_io->lba_count, 84607fe6a43SSeth Howell comp_reduce_io_cb, 847245271b6SYankun Li backing_cb_args); 848245271b6SYankun Li 84907fe6a43SSeth Howell if (rc) { 85007fe6a43SSeth Howell if (rc == -ENOMEM) { 8514e94e54eSYankun Li _comp_backing_bdev_queue_io_wait(comp_bdev, backing_io); 8524e94e54eSYankun Li return; 85307fe6a43SSeth Howell } else { 854245271b6SYankun Li SPDK_ERRLOG("submitting readv request, rc=%d\n", rc); 85507fe6a43SSeth Howell } 856245271b6SYankun Li backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc); 85707fe6a43SSeth Howell } 85807fe6a43SSeth Howell } 85907fe6a43SSeth Howell 86007fe6a43SSeth Howell static void 861245271b6SYankun Li _comp_backing_bdev_write(struct spdk_reduce_backing_io *backing_io) 86207fe6a43SSeth Howell { 863245271b6SYankun Li struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args; 864245271b6SYankun Li struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress, 86507fe6a43SSeth Howell backing_dev); 86607fe6a43SSeth Howell int rc; 86707fe6a43SSeth Howell 86807fe6a43SSeth Howell rc = spdk_bdev_writev_blocks(comp_bdev->base_desc, comp_bdev->base_ch, 869245271b6SYankun Li backing_io->iov, backing_io->iovcnt, 870245271b6SYankun Li backing_io->lba, backing_io->lba_count, 87107fe6a43SSeth Howell comp_reduce_io_cb, 872245271b6SYankun Li backing_cb_args); 873245271b6SYankun Li 87407fe6a43SSeth Howell if (rc) { 87507fe6a43SSeth Howell if (rc == -ENOMEM) { 8764e94e54eSYankun Li _comp_backing_bdev_queue_io_wait(comp_bdev, backing_io); 8774e94e54eSYankun Li return; 87807fe6a43SSeth Howell } else { 879245271b6SYankun Li SPDK_ERRLOG("error submitting writev request, rc=%d\n", rc); 88007fe6a43SSeth Howell } 881245271b6SYankun Li backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc); 88207fe6a43SSeth Howell } 88307fe6a43SSeth Howell } 88407fe6a43SSeth Howell 88507fe6a43SSeth Howell static void 886245271b6SYankun Li _comp_backing_bdev_unmap(struct spdk_reduce_backing_io *backing_io) 88707fe6a43SSeth Howell { 888245271b6SYankun Li struct spdk_reduce_vol_cb_args *backing_cb_args = backing_io->backing_cb_args; 889245271b6SYankun Li struct vbdev_compress *comp_bdev = SPDK_CONTAINEROF(backing_io->dev, struct vbdev_compress, 89007fe6a43SSeth Howell backing_dev); 89107fe6a43SSeth Howell int rc; 89207fe6a43SSeth Howell 89307fe6a43SSeth Howell rc = spdk_bdev_unmap_blocks(comp_bdev->base_desc, comp_bdev->base_ch, 894245271b6SYankun Li backing_io->lba, backing_io->lba_count, 89507fe6a43SSeth Howell comp_reduce_io_cb, 896245271b6SYankun Li backing_cb_args); 89707fe6a43SSeth Howell 89807fe6a43SSeth Howell if (rc) { 89907fe6a43SSeth Howell if (rc == -ENOMEM) { 9004e94e54eSYankun Li _comp_backing_bdev_queue_io_wait(comp_bdev, backing_io); 9014e94e54eSYankun Li return; 90207fe6a43SSeth Howell } else { 903245271b6SYankun Li SPDK_ERRLOG("submitting unmap request, rc=%d\n", rc); 90407fe6a43SSeth Howell } 905245271b6SYankun Li backing_cb_args->cb_fn(backing_cb_args->cb_arg, rc); 906245271b6SYankun Li } 907245271b6SYankun Li } 908245271b6SYankun Li 909245271b6SYankun Li /* This is the function provided to the reduceLib for sending reads/writes/unmaps 910245271b6SYankun Li * directly to the backing device. 911245271b6SYankun Li */ 912245271b6SYankun Li static void 913245271b6SYankun Li _comp_reduce_submit_backing_io(struct spdk_reduce_backing_io *backing_io) 914245271b6SYankun Li { 915245271b6SYankun Li switch (backing_io->backing_io_type) { 916245271b6SYankun Li case SPDK_REDUCE_BACKING_IO_WRITE: 917245271b6SYankun Li _comp_backing_bdev_write(backing_io); 918245271b6SYankun Li break; 919245271b6SYankun Li case SPDK_REDUCE_BACKING_IO_READ: 920245271b6SYankun Li _comp_backing_bdev_read(backing_io); 921245271b6SYankun Li break; 922245271b6SYankun Li case SPDK_REDUCE_BACKING_IO_UNMAP: 923245271b6SYankun Li _comp_backing_bdev_unmap(backing_io); 924245271b6SYankun Li break; 925245271b6SYankun Li default: 926245271b6SYankun Li SPDK_ERRLOG("Unknown I/O type %d\n", backing_io->backing_io_type); 927245271b6SYankun Li backing_io->backing_cb_args->cb_fn(backing_io->backing_cb_args->cb_arg, -EINVAL); 928245271b6SYankun Li break; 92907fe6a43SSeth Howell } 93007fe6a43SSeth Howell } 93107fe6a43SSeth Howell 9324e94e54eSYankun Li static void 9334e94e54eSYankun Li _comp_reduce_resubmit_backing_io(void *_backing_io) 9344e94e54eSYankun Li { 9354e94e54eSYankun Li struct spdk_reduce_backing_io *backing_io = _backing_io; 9364e94e54eSYankun Li 9374e94e54eSYankun Li _comp_reduce_submit_backing_io(backing_io); 9384e94e54eSYankun Li } 9394e94e54eSYankun Li 94007fe6a43SSeth Howell /* Called by reduceLib after performing unload vol actions following base bdev hotremove */ 94107fe6a43SSeth Howell static void 94207fe6a43SSeth Howell bdev_hotremove_vol_unload_cb(void *cb_arg, int reduce_errno) 94307fe6a43SSeth Howell { 94407fe6a43SSeth Howell struct vbdev_compress *comp_bdev = (struct vbdev_compress *)cb_arg; 94507fe6a43SSeth Howell 94607fe6a43SSeth Howell if (reduce_errno) { 94707fe6a43SSeth Howell SPDK_ERRLOG("number %d\n", reduce_errno); 94807fe6a43SSeth Howell } 94907fe6a43SSeth Howell 950f4e401eaSpaul luse comp_bdev->vol = NULL; 95107fe6a43SSeth Howell spdk_bdev_unregister(&comp_bdev->comp_bdev, NULL, NULL); 95207fe6a43SSeth Howell } 95307fe6a43SSeth Howell 95407fe6a43SSeth Howell static void 955779a6bdfSShuhei Matsumoto vbdev_compress_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 95607fe6a43SSeth Howell { 95707fe6a43SSeth Howell struct vbdev_compress *comp_bdev, *tmp; 95807fe6a43SSeth Howell 95907fe6a43SSeth Howell TAILQ_FOREACH_SAFE(comp_bdev, &g_vbdev_comp, link, tmp) { 96007fe6a43SSeth Howell if (bdev_find == comp_bdev->base_bdev) { 961149f0f7eSpaul luse /* Tell reduceLib that we're done with this volume. */ 96207fe6a43SSeth Howell spdk_reduce_vol_unload(comp_bdev->vol, bdev_hotremove_vol_unload_cb, comp_bdev); 96307fe6a43SSeth Howell } 96407fe6a43SSeth Howell } 96507fe6a43SSeth Howell } 96607fe6a43SSeth Howell 967779a6bdfSShuhei Matsumoto /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */ 968779a6bdfSShuhei Matsumoto static void 969779a6bdfSShuhei Matsumoto vbdev_compress_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 970779a6bdfSShuhei Matsumoto void *event_ctx) 971779a6bdfSShuhei Matsumoto { 972779a6bdfSShuhei Matsumoto switch (type) { 973779a6bdfSShuhei Matsumoto case SPDK_BDEV_EVENT_REMOVE: 974779a6bdfSShuhei Matsumoto vbdev_compress_base_bdev_hotremove_cb(bdev); 975779a6bdfSShuhei Matsumoto break; 976779a6bdfSShuhei Matsumoto default: 977779a6bdfSShuhei Matsumoto SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 978779a6bdfSShuhei Matsumoto break; 979779a6bdfSShuhei Matsumoto } 980779a6bdfSShuhei Matsumoto } 981779a6bdfSShuhei Matsumoto 98207fe6a43SSeth Howell /* TODO: determine which parms we want user configurable, HC for now 98307fe6a43SSeth Howell * params.vol_size 98407fe6a43SSeth Howell * params.chunk_size 98507fe6a43SSeth Howell * compression PMD, algorithm, window size, comp level, etc. 98607fe6a43SSeth Howell * DEV_MD_PATH 98707fe6a43SSeth Howell */ 98807fe6a43SSeth Howell 98907fe6a43SSeth Howell /* Common function for init and load to allocate and populate the minimal 99007fe6a43SSeth Howell * information for reducelib to init or load. 99107fe6a43SSeth Howell */ 99207fe6a43SSeth Howell struct vbdev_compress * 993ddd4603cSYankun Li _prepare_for_load_init(struct spdk_bdev_desc *bdev_desc, uint32_t lb_size, uint8_t comp_algo, 994ddd4603cSYankun Li uint32_t comp_level) 99507fe6a43SSeth Howell { 996915dc5d6SYankun Li struct vbdev_compress *comp_bdev; 997aec30063SShuhei Matsumoto struct spdk_bdev *bdev; 99807fe6a43SSeth Howell 999915dc5d6SYankun Li comp_bdev = calloc(1, sizeof(struct vbdev_compress)); 1000915dc5d6SYankun Li if (comp_bdev == NULL) { 1001915dc5d6SYankun Li SPDK_ERRLOG("failed to alloc comp_bdev\n"); 100207fe6a43SSeth Howell return NULL; 100307fe6a43SSeth Howell } 100407fe6a43SSeth Howell 1005245271b6SYankun Li comp_bdev->backing_dev.submit_backing_io = _comp_reduce_submit_backing_io; 1006915dc5d6SYankun Li comp_bdev->backing_dev.compress = _comp_reduce_compress; 1007915dc5d6SYankun Li comp_bdev->backing_dev.decompress = _comp_reduce_decompress; 100807fe6a43SSeth Howell 1009915dc5d6SYankun Li comp_bdev->base_desc = bdev_desc; 1010aec30063SShuhei Matsumoto bdev = spdk_bdev_desc_get_bdev(bdev_desc); 1011915dc5d6SYankun Li comp_bdev->base_bdev = bdev; 1012aec30063SShuhei Matsumoto 1013915dc5d6SYankun Li comp_bdev->backing_dev.blocklen = bdev->blocklen; 1014915dc5d6SYankun Li comp_bdev->backing_dev.blockcnt = bdev->blockcnt; 101507fe6a43SSeth Howell 10164e94e54eSYankun Li comp_bdev->backing_dev.user_ctx_size = sizeof(struct spdk_bdev_io_wait_entry); 10174e94e54eSYankun Li 1018ddd4603cSYankun Li comp_bdev->comp_algo = comp_algo; 1019ddd4603cSYankun Li comp_bdev->comp_level = comp_level; 1020ddd4603cSYankun Li comp_bdev->params.comp_algo = comp_algo; 1021ddd4603cSYankun Li comp_bdev->params.comp_level = comp_level; 1022915dc5d6SYankun Li comp_bdev->params.chunk_size = CHUNK_SIZE; 102362b3b171Spaul luse if (lb_size == 0) { 1024915dc5d6SYankun Li comp_bdev->params.logical_block_size = bdev->blocklen; 102562b3b171Spaul luse } else { 1026915dc5d6SYankun Li comp_bdev->params.logical_block_size = lb_size; 102762b3b171Spaul luse } 102862b3b171Spaul luse 1029915dc5d6SYankun Li comp_bdev->params.backing_io_unit_size = BACKING_IO_SZ; 1030915dc5d6SYankun Li return comp_bdev; 103107fe6a43SSeth Howell } 103207fe6a43SSeth Howell 103307fe6a43SSeth Howell /* Call reducelib to initialize a new volume */ 103407fe6a43SSeth Howell static int 1035ddd4603cSYankun Li vbdev_init_reduce(const char *bdev_name, const char *pm_path, uint32_t lb_size, uint8_t comp_algo, 1036ddd4603cSYankun Li uint32_t comp_level, bdev_compress_create_cb cb_fn, void *cb_arg) 103707fe6a43SSeth Howell { 1038aec30063SShuhei Matsumoto struct spdk_bdev_desc *bdev_desc = NULL; 1039f3cda926SYankun Li struct vbdev_init_reduce_ctx *init_ctx; 1040915dc5d6SYankun Li struct vbdev_compress *comp_bdev; 104107fe6a43SSeth Howell int rc; 104207fe6a43SSeth Howell 1043f3cda926SYankun Li init_ctx = calloc(1, sizeof(*init_ctx)); 1044f3cda926SYankun Li if (init_ctx == NULL) { 1045f3cda926SYankun Li SPDK_ERRLOG("failed to alloc init contexts\n"); 1046f3cda926SYankun Li return - ENOMEM; 1047f3cda926SYankun Li } 1048f3cda926SYankun Li 1049f3cda926SYankun Li init_ctx->cb_fn = cb_fn; 1050f3cda926SYankun Li init_ctx->cb_ctx = cb_arg; 1051f3cda926SYankun Li 1052aec30063SShuhei Matsumoto rc = spdk_bdev_open_ext(bdev_name, true, vbdev_compress_base_bdev_event_cb, 1053aec30063SShuhei Matsumoto NULL, &bdev_desc); 1054aec30063SShuhei Matsumoto if (rc) { 1055161f75d5SYankun Li SPDK_ERRLOG("could not open bdev %s, error %s\n", bdev_name, spdk_strerror(-rc)); 1056f3cda926SYankun Li free(init_ctx); 1057aec30063SShuhei Matsumoto return rc; 1058aec30063SShuhei Matsumoto } 1059aec30063SShuhei Matsumoto 1060ddd4603cSYankun Li comp_bdev = _prepare_for_load_init(bdev_desc, lb_size, comp_algo, comp_level); 1061915dc5d6SYankun Li if (comp_bdev == NULL) { 1062f3cda926SYankun Li free(init_ctx); 1063aec30063SShuhei Matsumoto spdk_bdev_close(bdev_desc); 106407fe6a43SSeth Howell return -EINVAL; 106507fe6a43SSeth Howell } 106607fe6a43SSeth Howell 1067f3cda926SYankun Li init_ctx->comp_bdev = comp_bdev; 1068f3cda926SYankun Li 1069b3be320dSGangCao /* Save the thread where the base device is opened */ 1070915dc5d6SYankun Li comp_bdev->thread = spdk_get_thread(); 1071b3be320dSGangCao 1072915dc5d6SYankun Li comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc); 107307fe6a43SSeth Howell 1074915dc5d6SYankun Li spdk_reduce_vol_init(&comp_bdev->params, &comp_bdev->backing_dev, 107507fe6a43SSeth Howell pm_path, 107607fe6a43SSeth Howell vbdev_reduce_init_cb, 1077f3cda926SYankun Li init_ctx); 107807fe6a43SSeth Howell return 0; 107907fe6a43SSeth Howell } 108007fe6a43SSeth Howell 108107fe6a43SSeth Howell /* We provide this callback for the SPDK channel code to create a channel using 108207fe6a43SSeth Howell * the channel struct we provided in our module get_io_channel() entry point. Here 108307fe6a43SSeth Howell * we get and save off an underlying base channel of the device below us so that 108407fe6a43SSeth Howell * we can communicate with the base bdev on a per channel basis. If we needed 108507fe6a43SSeth Howell * our own poller for this vbdev, we'd register it here. 108607fe6a43SSeth Howell */ 108707fe6a43SSeth Howell static int 108807fe6a43SSeth Howell comp_bdev_ch_create_cb(void *io_device, void *ctx_buf) 108907fe6a43SSeth Howell { 109007fe6a43SSeth Howell struct vbdev_compress *comp_bdev = io_device; 1091976f8b09Spaul luse 109282b8dd90SShuhei Matsumoto /* Now set the reduce channel if it's not already set. */ 109382b8dd90SShuhei Matsumoto pthread_mutex_lock(&comp_bdev->reduce_lock); 109482b8dd90SShuhei Matsumoto if (comp_bdev->ch_count == 0) { 1095149f0f7eSpaul luse /* We use this queue to track outstanding IO in our layer. */ 109607fe6a43SSeth Howell TAILQ_INIT(&comp_bdev->pending_comp_ios); 109707fe6a43SSeth Howell 109807fe6a43SSeth Howell /* We use this to queue up compression operations as needed. */ 109907fe6a43SSeth Howell TAILQ_INIT(&comp_bdev->queued_comp_ops); 110007fe6a43SSeth Howell 110107fe6a43SSeth Howell comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc); 110207fe6a43SSeth Howell comp_bdev->reduce_thread = spdk_get_thread(); 1103bb5083a8Spaul luse comp_bdev->accel_channel = spdk_accel_get_io_channel(); 110407fe6a43SSeth Howell } 110507fe6a43SSeth Howell comp_bdev->ch_count++; 110607fe6a43SSeth Howell pthread_mutex_unlock(&comp_bdev->reduce_lock); 110707fe6a43SSeth Howell 110807fe6a43SSeth Howell return 0; 110907fe6a43SSeth Howell } 111007fe6a43SSeth Howell 111107fe6a43SSeth Howell static void 111207fe6a43SSeth Howell _channel_cleanup(struct vbdev_compress *comp_bdev) 111307fe6a43SSeth Howell { 111407fe6a43SSeth Howell spdk_put_io_channel(comp_bdev->base_ch); 1115bb5083a8Spaul luse spdk_put_io_channel(comp_bdev->accel_channel); 111607fe6a43SSeth Howell comp_bdev->reduce_thread = NULL; 111707fe6a43SSeth Howell } 111807fe6a43SSeth Howell 111907fe6a43SSeth Howell /* Used to reroute destroy_ch to the correct thread */ 112007fe6a43SSeth Howell static void 112107fe6a43SSeth Howell _comp_bdev_ch_destroy_cb(void *arg) 112207fe6a43SSeth Howell { 112307fe6a43SSeth Howell struct vbdev_compress *comp_bdev = arg; 112407fe6a43SSeth Howell 112507fe6a43SSeth Howell pthread_mutex_lock(&comp_bdev->reduce_lock); 112607fe6a43SSeth Howell _channel_cleanup(comp_bdev); 112707fe6a43SSeth Howell pthread_mutex_unlock(&comp_bdev->reduce_lock); 112807fe6a43SSeth Howell } 112907fe6a43SSeth Howell 113007fe6a43SSeth Howell /* We provide this callback for the SPDK channel code to destroy a channel 113107fe6a43SSeth Howell * created with our create callback. We just need to undo anything we did 113207fe6a43SSeth Howell * when we created. If this bdev used its own poller, we'd unregister it here. 113307fe6a43SSeth Howell */ 113407fe6a43SSeth Howell static void 113507fe6a43SSeth Howell comp_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) 113607fe6a43SSeth Howell { 113707fe6a43SSeth Howell struct vbdev_compress *comp_bdev = io_device; 113807fe6a43SSeth Howell 113907fe6a43SSeth Howell pthread_mutex_lock(&comp_bdev->reduce_lock); 114007fe6a43SSeth Howell comp_bdev->ch_count--; 114107fe6a43SSeth Howell if (comp_bdev->ch_count == 0) { 114207fe6a43SSeth Howell /* Send this request to the thread where the channel was created. */ 114307fe6a43SSeth Howell if (comp_bdev->reduce_thread != spdk_get_thread()) { 114407fe6a43SSeth Howell spdk_thread_send_msg(comp_bdev->reduce_thread, 114507fe6a43SSeth Howell _comp_bdev_ch_destroy_cb, comp_bdev); 114607fe6a43SSeth Howell } else { 114707fe6a43SSeth Howell _channel_cleanup(comp_bdev); 114807fe6a43SSeth Howell } 114907fe6a43SSeth Howell } 115007fe6a43SSeth Howell pthread_mutex_unlock(&comp_bdev->reduce_lock); 115107fe6a43SSeth Howell } 115207fe6a43SSeth Howell 1153ddd4603cSYankun Li static int 1154ddd4603cSYankun Li _check_compress_bdev_comp_algo(enum spdk_accel_comp_algo algo, uint32_t comp_level) 1155ddd4603cSYankun Li { 1156ddd4603cSYankun Li uint32_t min_level, max_level; 1157ddd4603cSYankun Li int rc; 1158ddd4603cSYankun Li 1159ddd4603cSYankun Li rc = spdk_accel_get_compress_level_range(algo, &min_level, &max_level); 1160ddd4603cSYankun Li if (rc != 0) { 1161ddd4603cSYankun Li return rc; 1162ddd4603cSYankun Li } 1163ddd4603cSYankun Li 1164ddd4603cSYankun Li /* If both min_level and max_level are 0, the compression level can be ignored. 1165ddd4603cSYankun Li * The back-end implementation hardcodes the compression level. 1166ddd4603cSYankun Li */ 1167ddd4603cSYankun Li if (min_level == 0 && max_level == 0) { 1168ddd4603cSYankun Li return 0; 1169ddd4603cSYankun Li } 1170ddd4603cSYankun Li 1171ddd4603cSYankun Li if (comp_level > max_level || comp_level < min_level) { 1172ddd4603cSYankun Li return -EINVAL; 1173ddd4603cSYankun Li } 1174ddd4603cSYankun Li 1175ddd4603cSYankun Li return 0; 1176ddd4603cSYankun Li } 1177ddd4603cSYankun Li 117807fe6a43SSeth Howell /* RPC entry point for compression vbdev creation. */ 117907fe6a43SSeth Howell int 1180f3cda926SYankun Li create_compress_bdev(const char *bdev_name, const char *pm_path, uint32_t lb_size, 1181ddd4603cSYankun Li uint8_t comp_algo, uint32_t comp_level, 1182f3cda926SYankun Li bdev_compress_create_cb cb_fn, void *cb_arg) 118307fe6a43SSeth Howell { 118489ee5a13Spaul luse struct vbdev_compress *comp_bdev = NULL; 118538b03952SYankun Li struct stat info; 1186ddd4603cSYankun Li int rc; 118738b03952SYankun Li 118838b03952SYankun Li if (stat(pm_path, &info) != 0) { 118938b03952SYankun Li SPDK_ERRLOG("PM path %s does not exist.\n", pm_path); 119038b03952SYankun Li return -EINVAL; 119138b03952SYankun Li } else if (!S_ISDIR(info.st_mode)) { 119238b03952SYankun Li SPDK_ERRLOG("PM path %s is not a directory.\n", pm_path); 119338b03952SYankun Li return -EINVAL; 119438b03952SYankun Li } 119589ee5a13Spaul luse 119662b3b171Spaul luse if ((lb_size != 0) && (lb_size != LB_SIZE_4K) && (lb_size != LB_SIZE_512B)) { 119762b3b171Spaul luse SPDK_ERRLOG("Logical block size must be 512 or 4096\n"); 119862b3b171Spaul luse return -EINVAL; 119962b3b171Spaul luse } 120062b3b171Spaul luse 1201ddd4603cSYankun Li rc = _check_compress_bdev_comp_algo(comp_algo, comp_level); 1202ddd4603cSYankun Li if (rc != 0) { 1203ddd4603cSYankun Li SPDK_ERRLOG("Compress bdev doesn't support compression algo(%u) or level(%u)\n", 1204ddd4603cSYankun Li comp_algo, comp_level); 1205ddd4603cSYankun Li return rc; 1206ddd4603cSYankun Li } 1207ddd4603cSYankun Li 120889ee5a13Spaul luse TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) { 120989ee5a13Spaul luse if (strcmp(bdev_name, comp_bdev->base_bdev->name) == 0) { 121089ee5a13Spaul luse SPDK_ERRLOG("Bass bdev %s already being used for a compress bdev\n", bdev_name); 121189ee5a13Spaul luse return -EBUSY; 121289ee5a13Spaul luse } 121389ee5a13Spaul luse } 1214ddd4603cSYankun Li return vbdev_init_reduce(bdev_name, pm_path, lb_size, comp_algo, comp_level, cb_fn, cb_arg); 121507fe6a43SSeth Howell } 121607fe6a43SSeth Howell 121707fe6a43SSeth Howell static int 121807fe6a43SSeth Howell vbdev_compress_init(void) 121907fe6a43SSeth Howell { 122007fe6a43SSeth Howell return 0; 122107fe6a43SSeth Howell } 122207fe6a43SSeth Howell 122307fe6a43SSeth Howell /* Called when the entire module is being torn down. */ 122407fe6a43SSeth Howell static void 122507fe6a43SSeth Howell vbdev_compress_finish(void) 122607fe6a43SSeth Howell { 122707fe6a43SSeth Howell /* TODO: unload vol in a future patch */ 122807fe6a43SSeth Howell } 122907fe6a43SSeth Howell 123007fe6a43SSeth Howell /* During init we'll be asked how much memory we'd like passed to us 123107fe6a43SSeth Howell * in bev_io structures as context. Here's where we specify how 123207fe6a43SSeth Howell * much context we want per IO. 123307fe6a43SSeth Howell */ 123407fe6a43SSeth Howell static int 123507fe6a43SSeth Howell vbdev_compress_get_ctx_size(void) 123607fe6a43SSeth Howell { 123707fe6a43SSeth Howell return sizeof(struct comp_bdev_io); 123807fe6a43SSeth Howell } 123907fe6a43SSeth Howell 124007fe6a43SSeth Howell /* When we register our bdev this is how we specify our entry points. */ 124107fe6a43SSeth Howell static const struct spdk_bdev_fn_table vbdev_compress_fn_table = { 124207fe6a43SSeth Howell .destruct = vbdev_compress_destruct, 124307fe6a43SSeth Howell .submit_request = vbdev_compress_submit_request, 124407fe6a43SSeth Howell .io_type_supported = vbdev_compress_io_type_supported, 124507fe6a43SSeth Howell .get_io_channel = vbdev_compress_get_io_channel, 124607fe6a43SSeth Howell .dump_info_json = vbdev_compress_dump_info_json, 124707fe6a43SSeth Howell .write_config_json = NULL, 124807fe6a43SSeth Howell }; 124907fe6a43SSeth Howell 125007fe6a43SSeth Howell static struct spdk_bdev_module compress_if = { 125107fe6a43SSeth Howell .name = "compress", 125207fe6a43SSeth Howell .module_init = vbdev_compress_init, 125307fe6a43SSeth Howell .get_ctx_size = vbdev_compress_get_ctx_size, 125407fe6a43SSeth Howell .examine_disk = vbdev_compress_examine, 125507fe6a43SSeth Howell .module_fini = vbdev_compress_finish, 125607fe6a43SSeth Howell .config_json = vbdev_compress_config_json 125707fe6a43SSeth Howell }; 125807fe6a43SSeth Howell 125907fe6a43SSeth Howell SPDK_BDEV_MODULE_REGISTER(compress, &compress_if) 126007fe6a43SSeth Howell 126107fe6a43SSeth Howell static int _set_compbdev_name(struct vbdev_compress *comp_bdev) 126207fe6a43SSeth Howell { 126307fe6a43SSeth Howell struct spdk_bdev_alias *aliases; 126407fe6a43SSeth Howell 126507fe6a43SSeth Howell if (!TAILQ_EMPTY(spdk_bdev_get_aliases(comp_bdev->base_bdev))) { 126607fe6a43SSeth Howell aliases = TAILQ_FIRST(spdk_bdev_get_aliases(comp_bdev->base_bdev)); 1267eabe783cSJiewei Ke comp_bdev->comp_bdev.name = spdk_sprintf_alloc("COMP_%s", aliases->alias.name); 126807fe6a43SSeth Howell if (!comp_bdev->comp_bdev.name) { 126907fe6a43SSeth Howell SPDK_ERRLOG("could not allocate comp_bdev name for alias\n"); 127007fe6a43SSeth Howell return -ENOMEM; 127107fe6a43SSeth Howell } 127207fe6a43SSeth Howell } else { 127307fe6a43SSeth Howell comp_bdev->comp_bdev.name = spdk_sprintf_alloc("COMP_%s", comp_bdev->base_bdev->name); 127407fe6a43SSeth Howell if (!comp_bdev->comp_bdev.name) { 127507fe6a43SSeth Howell SPDK_ERRLOG("could not allocate comp_bdev name for unique name\n"); 127607fe6a43SSeth Howell return -ENOMEM; 127707fe6a43SSeth Howell } 127807fe6a43SSeth Howell } 127907fe6a43SSeth Howell return 0; 128007fe6a43SSeth Howell } 128107fe6a43SSeth Howell 1282c3ed33f4SShuhei Matsumoto static int 128307fe6a43SSeth Howell vbdev_compress_claim(struct vbdev_compress *comp_bdev) 128407fe6a43SSeth Howell { 1285d83e87f8SKrzysztof Karas struct spdk_uuid ns_uuid; 128607fe6a43SSeth Howell int rc; 128707fe6a43SSeth Howell 128807fe6a43SSeth Howell if (_set_compbdev_name(comp_bdev)) { 1289c3ed33f4SShuhei Matsumoto return -EINVAL; 129007fe6a43SSeth Howell } 129107fe6a43SSeth Howell 129207fe6a43SSeth Howell /* Note: some of the fields below will change in the future - for example, 129307fe6a43SSeth Howell * blockcnt specifically will not match (the compressed volume size will 129407fe6a43SSeth Howell * be slightly less than the base bdev size) 129507fe6a43SSeth Howell */ 129607fe6a43SSeth Howell comp_bdev->comp_bdev.product_name = COMP_BDEV_NAME; 129707fe6a43SSeth Howell comp_bdev->comp_bdev.write_cache = comp_bdev->base_bdev->write_cache; 129807fe6a43SSeth Howell 129907fe6a43SSeth Howell comp_bdev->comp_bdev.optimal_io_boundary = 130007fe6a43SSeth Howell comp_bdev->params.chunk_size / comp_bdev->params.logical_block_size; 130107fe6a43SSeth Howell 130207fe6a43SSeth Howell comp_bdev->comp_bdev.split_on_optimal_io_boundary = true; 130307fe6a43SSeth Howell 13040190e71eSSven Breuner comp_bdev->comp_bdev.blocklen = comp_bdev->params.logical_block_size; 130507fe6a43SSeth Howell comp_bdev->comp_bdev.blockcnt = comp_bdev->params.vol_size / comp_bdev->comp_bdev.blocklen; 130607fe6a43SSeth Howell assert(comp_bdev->comp_bdev.blockcnt > 0); 130707fe6a43SSeth Howell 130807fe6a43SSeth Howell /* This is the context that is passed to us when the bdev 130907fe6a43SSeth Howell * layer calls in so we'll save our comp_bdev node here. 131007fe6a43SSeth Howell */ 131107fe6a43SSeth Howell comp_bdev->comp_bdev.ctxt = comp_bdev; 131207fe6a43SSeth Howell comp_bdev->comp_bdev.fn_table = &vbdev_compress_fn_table; 131307fe6a43SSeth Howell comp_bdev->comp_bdev.module = &compress_if; 131407fe6a43SSeth Howell 1315d83e87f8SKrzysztof Karas /* Generate UUID based on namespace UUID + base bdev UUID. */ 1316d83e87f8SKrzysztof Karas spdk_uuid_parse(&ns_uuid, BDEV_COMPRESS_NAMESPACE_UUID); 1317d83e87f8SKrzysztof Karas rc = spdk_uuid_generate_sha1(&comp_bdev->comp_bdev.uuid, &ns_uuid, 1318d83e87f8SKrzysztof Karas (const char *)&comp_bdev->base_bdev->uuid, sizeof(struct spdk_uuid)); 1319d83e87f8SKrzysztof Karas if (rc) { 1320161f75d5SYankun Li SPDK_ERRLOG("Unable to generate new UUID for compress bdev, error %s\n", spdk_strerror(-rc)); 1321d83e87f8SKrzysztof Karas return -EINVAL; 1322d83e87f8SKrzysztof Karas } 1323d83e87f8SKrzysztof Karas 132407fe6a43SSeth Howell pthread_mutex_init(&comp_bdev->reduce_lock, NULL); 132507fe6a43SSeth Howell 1326b3be320dSGangCao /* Save the thread where the base device is opened */ 1327b3be320dSGangCao comp_bdev->thread = spdk_get_thread(); 1328b3be320dSGangCao 132907fe6a43SSeth Howell spdk_io_device_register(comp_bdev, comp_bdev_ch_create_cb, comp_bdev_ch_destroy_cb, 133007fe6a43SSeth Howell sizeof(struct comp_io_channel), 133107fe6a43SSeth Howell comp_bdev->comp_bdev.name); 133207fe6a43SSeth Howell 133307fe6a43SSeth Howell rc = spdk_bdev_module_claim_bdev(comp_bdev->base_bdev, comp_bdev->base_desc, 133407fe6a43SSeth Howell comp_bdev->comp_bdev.module); 133507fe6a43SSeth Howell if (rc) { 1336161f75d5SYankun Li SPDK_ERRLOG("could not claim bdev %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev), 1337161f75d5SYankun Li spdk_strerror(-rc)); 133807fe6a43SSeth Howell goto error_claim; 133907fe6a43SSeth Howell } 134007fe6a43SSeth Howell 134107fe6a43SSeth Howell rc = spdk_bdev_register(&comp_bdev->comp_bdev); 134207fe6a43SSeth Howell if (rc < 0) { 1343161f75d5SYankun Li SPDK_ERRLOG("trying to register bdev, error %s\n", spdk_strerror(-rc)); 134407fe6a43SSeth Howell goto error_bdev_register; 134507fe6a43SSeth Howell } 134607fe6a43SSeth Howell 13473934784dSShuhei Matsumoto TAILQ_INSERT_TAIL(&g_vbdev_comp, comp_bdev, link); 13483934784dSShuhei Matsumoto 134907fe6a43SSeth Howell SPDK_NOTICELOG("registered io_device and virtual bdev for: %s\n", comp_bdev->comp_bdev.name); 135007fe6a43SSeth Howell 1351c3ed33f4SShuhei Matsumoto return 0; 1352c3ed33f4SShuhei Matsumoto 135307fe6a43SSeth Howell /* Error cleanup paths. */ 135407fe6a43SSeth Howell error_bdev_register: 135507fe6a43SSeth Howell spdk_bdev_module_release_bdev(comp_bdev->base_bdev); 135607fe6a43SSeth Howell error_claim: 135707fe6a43SSeth Howell spdk_io_device_unregister(comp_bdev, NULL); 135807fe6a43SSeth Howell free(comp_bdev->comp_bdev.name); 1359c3ed33f4SShuhei Matsumoto return rc; 136007fe6a43SSeth Howell } 136107fe6a43SSeth Howell 1362fcf8e454SShuhei Matsumoto static void 1363fcf8e454SShuhei Matsumoto _vbdev_compress_delete_done(void *_ctx) 1364fcf8e454SShuhei Matsumoto { 1365fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx *ctx = _ctx; 1366fcf8e454SShuhei Matsumoto 1367fcf8e454SShuhei Matsumoto ctx->cb_fn(ctx->cb_arg, ctx->cb_rc); 1368fcf8e454SShuhei Matsumoto 1369fcf8e454SShuhei Matsumoto free(ctx); 1370fcf8e454SShuhei Matsumoto } 1371fcf8e454SShuhei Matsumoto 1372fcf8e454SShuhei Matsumoto static void 1373fcf8e454SShuhei Matsumoto vbdev_compress_delete_done(void *cb_arg, int bdeverrno) 1374fcf8e454SShuhei Matsumoto { 1375fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx *ctx = cb_arg; 1376fcf8e454SShuhei Matsumoto 1377fcf8e454SShuhei Matsumoto ctx->cb_rc = bdeverrno; 1378fcf8e454SShuhei Matsumoto 1379fcf8e454SShuhei Matsumoto if (ctx->orig_thread != spdk_get_thread()) { 1380fcf8e454SShuhei Matsumoto spdk_thread_send_msg(ctx->orig_thread, _vbdev_compress_delete_done, ctx); 1381fcf8e454SShuhei Matsumoto } else { 1382fcf8e454SShuhei Matsumoto _vbdev_compress_delete_done(ctx); 1383fcf8e454SShuhei Matsumoto } 1384fcf8e454SShuhei Matsumoto } 1385fcf8e454SShuhei Matsumoto 138607fe6a43SSeth Howell void 138707fe6a43SSeth Howell bdev_compress_delete(const char *name, spdk_delete_compress_complete cb_fn, void *cb_arg) 138807fe6a43SSeth Howell { 138907fe6a43SSeth Howell struct vbdev_compress *comp_bdev = NULL; 1390fcf8e454SShuhei Matsumoto struct vbdev_comp_delete_ctx *ctx; 139107fe6a43SSeth Howell 139207fe6a43SSeth Howell TAILQ_FOREACH(comp_bdev, &g_vbdev_comp, link) { 139307fe6a43SSeth Howell if (strcmp(name, comp_bdev->comp_bdev.name) == 0) { 139407fe6a43SSeth Howell break; 139507fe6a43SSeth Howell } 139607fe6a43SSeth Howell } 139707fe6a43SSeth Howell 139807fe6a43SSeth Howell if (comp_bdev == NULL) { 139907fe6a43SSeth Howell cb_fn(cb_arg, -ENODEV); 140007fe6a43SSeth Howell return; 140107fe6a43SSeth Howell } 140207fe6a43SSeth Howell 1403fcf8e454SShuhei Matsumoto ctx = calloc(1, sizeof(*ctx)); 1404fcf8e454SShuhei Matsumoto if (ctx == NULL) { 1405fcf8e454SShuhei Matsumoto SPDK_ERRLOG("Failed to allocate delete context\n"); 1406fcf8e454SShuhei Matsumoto cb_fn(cb_arg, -ENOMEM); 1407fcf8e454SShuhei Matsumoto return; 1408fcf8e454SShuhei Matsumoto } 1409fcf8e454SShuhei Matsumoto 141007fe6a43SSeth Howell /* Save these for after the vol is destroyed. */ 1411fcf8e454SShuhei Matsumoto ctx->cb_fn = cb_fn; 1412fcf8e454SShuhei Matsumoto ctx->cb_arg = cb_arg; 1413fcf8e454SShuhei Matsumoto ctx->orig_thread = spdk_get_thread(); 1414fcf8e454SShuhei Matsumoto 1415fcf8e454SShuhei Matsumoto comp_bdev->delete_ctx = ctx; 141607fe6a43SSeth Howell 141707fe6a43SSeth Howell /* Tell reducelib that we're done with this volume. */ 141807fe6a43SSeth Howell if (comp_bdev->orphaned == false) { 141907fe6a43SSeth Howell spdk_reduce_vol_unload(comp_bdev->vol, delete_vol_unload_cb, comp_bdev); 142007fe6a43SSeth Howell } else { 142107fe6a43SSeth Howell delete_vol_unload_cb(comp_bdev, 0); 142207fe6a43SSeth Howell } 142307fe6a43SSeth Howell } 142407fe6a43SSeth Howell 1425b3be320dSGangCao static void 14267e10e593SYankun Li _vbdev_reduce_load_unload_cb(void *ctx, int reduce_errno) 14277e10e593SYankun Li { 14287e10e593SYankun Li } 14297e10e593SYankun Li 14307e10e593SYankun Li static void 1431b3be320dSGangCao _vbdev_reduce_load_cb(void *ctx) 1432b3be320dSGangCao { 1433915dc5d6SYankun Li struct vbdev_compress *comp_bdev = ctx; 143407fe6a43SSeth Howell int rc; 143507fe6a43SSeth Howell 1436915dc5d6SYankun Li assert(comp_bdev->base_desc != NULL); 1437c3ed33f4SShuhei Matsumoto 143807fe6a43SSeth Howell /* Done with metadata operations */ 1439915dc5d6SYankun Li spdk_put_io_channel(comp_bdev->base_ch); 144007fe6a43SSeth Howell 1441915dc5d6SYankun Li if (comp_bdev->reduce_errno == 0) { 1442915dc5d6SYankun Li rc = vbdev_compress_claim(comp_bdev); 1443c3ed33f4SShuhei Matsumoto if (rc != 0) { 14447e10e593SYankun Li spdk_reduce_vol_unload(comp_bdev->vol, _vbdev_reduce_load_unload_cb, NULL); 144507fe6a43SSeth Howell goto err; 144607fe6a43SSeth Howell } 1447915dc5d6SYankun Li } else if (comp_bdev->reduce_errno == -ENOENT) { 1448915dc5d6SYankun Li if (_set_compbdev_name(comp_bdev)) { 144907fe6a43SSeth Howell goto err; 145007fe6a43SSeth Howell } 145107fe6a43SSeth Howell 1452b3be320dSGangCao /* Save the thread where the base device is opened */ 1453915dc5d6SYankun Li comp_bdev->thread = spdk_get_thread(); 1454b3be320dSGangCao 1455915dc5d6SYankun Li comp_bdev->comp_bdev.module = &compress_if; 1456915dc5d6SYankun Li pthread_mutex_init(&comp_bdev->reduce_lock, NULL); 1457915dc5d6SYankun Li rc = spdk_bdev_module_claim_bdev(comp_bdev->base_bdev, comp_bdev->base_desc, 1458915dc5d6SYankun Li comp_bdev->comp_bdev.module); 145907fe6a43SSeth Howell if (rc) { 1460915dc5d6SYankun Li SPDK_ERRLOG("could not claim bdev %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev), 1461161f75d5SYankun Li spdk_strerror(-rc)); 1462915dc5d6SYankun Li free(comp_bdev->comp_bdev.name); 146307fe6a43SSeth Howell goto err; 146407fe6a43SSeth Howell } 146507fe6a43SSeth Howell 1466915dc5d6SYankun Li comp_bdev->orphaned = true; 1467915dc5d6SYankun Li TAILQ_INSERT_TAIL(&g_vbdev_comp, comp_bdev, link); 1468d5a0220eSShuhei Matsumoto } else { 1469915dc5d6SYankun Li if (comp_bdev->reduce_errno != -EILSEQ) { 1470915dc5d6SYankun Li SPDK_ERRLOG("for vol %s, error %s\n", spdk_bdev_get_name(comp_bdev->base_bdev), 1471915dc5d6SYankun Li spdk_strerror(-comp_bdev->reduce_errno)); 147207fe6a43SSeth Howell } 147399e6fe41SShuhei Matsumoto goto err; 147407fe6a43SSeth Howell } 147507fe6a43SSeth Howell 147607fe6a43SSeth Howell spdk_bdev_module_examine_done(&compress_if); 147799e6fe41SShuhei Matsumoto return; 1478d5a0220eSShuhei Matsumoto 147999e6fe41SShuhei Matsumoto err: 1480c3ed33f4SShuhei Matsumoto /* Close the underlying bdev on its same opened thread. */ 1481915dc5d6SYankun Li spdk_bdev_close(comp_bdev->base_desc); 1482915dc5d6SYankun Li free(comp_bdev); 148399e6fe41SShuhei Matsumoto spdk_bdev_module_examine_done(&compress_if); 148407fe6a43SSeth Howell } 148507fe6a43SSeth Howell 148685724ba2SShuhei Matsumoto /* Callback from reduce for then load is complete. We'll pass the vbdev_comp struct 148785724ba2SShuhei Matsumoto * used for initial metadata operations to claim where it will be further filled out 148885724ba2SShuhei Matsumoto * and added to the global list. 148985724ba2SShuhei Matsumoto */ 149085724ba2SShuhei Matsumoto static void 149185724ba2SShuhei Matsumoto vbdev_reduce_load_cb(void *cb_arg, struct spdk_reduce_vol *vol, int reduce_errno) 149285724ba2SShuhei Matsumoto { 1493915dc5d6SYankun Li struct vbdev_compress *comp_bdev = cb_arg; 149485724ba2SShuhei Matsumoto 149585724ba2SShuhei Matsumoto if (reduce_errno == 0) { 149685724ba2SShuhei Matsumoto /* Update information following volume load. */ 1497915dc5d6SYankun Li comp_bdev->vol = vol; 1498915dc5d6SYankun Li memcpy(&comp_bdev->params, spdk_reduce_vol_get_params(vol), 149985724ba2SShuhei Matsumoto sizeof(struct spdk_reduce_vol_params)); 1500ddd4603cSYankun Li comp_bdev->comp_algo = comp_bdev->params.comp_algo; 1501ddd4603cSYankun Li comp_bdev->comp_level = comp_bdev->params.comp_level; 150285724ba2SShuhei Matsumoto } 150385724ba2SShuhei Matsumoto 1504915dc5d6SYankun Li comp_bdev->reduce_errno = reduce_errno; 150585724ba2SShuhei Matsumoto 1506915dc5d6SYankun Li if (comp_bdev->thread && comp_bdev->thread != spdk_get_thread()) { 1507915dc5d6SYankun Li spdk_thread_send_msg(comp_bdev->thread, _vbdev_reduce_load_cb, comp_bdev); 150885724ba2SShuhei Matsumoto } else { 1509915dc5d6SYankun Li _vbdev_reduce_load_cb(comp_bdev); 151085724ba2SShuhei Matsumoto } 151185724ba2SShuhei Matsumoto 151285724ba2SShuhei Matsumoto } 151385724ba2SShuhei Matsumoto 151407fe6a43SSeth Howell /* Examine_disk entry point: will do a metadata load to see if this is ours, 151507fe6a43SSeth Howell * and if so will go ahead and claim it. 151607fe6a43SSeth Howell */ 151707fe6a43SSeth Howell static void 151807fe6a43SSeth Howell vbdev_compress_examine(struct spdk_bdev *bdev) 151907fe6a43SSeth Howell { 1520aec30063SShuhei Matsumoto struct spdk_bdev_desc *bdev_desc = NULL; 1521915dc5d6SYankun Li struct vbdev_compress *comp_bdev; 152207fe6a43SSeth Howell int rc; 152307fe6a43SSeth Howell 152407fe6a43SSeth Howell if (strcmp(bdev->product_name, COMP_BDEV_NAME) == 0) { 152507fe6a43SSeth Howell spdk_bdev_module_examine_done(&compress_if); 152607fe6a43SSeth Howell return; 152707fe6a43SSeth Howell } 152807fe6a43SSeth Howell 1529aec30063SShuhei Matsumoto rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, 1530aec30063SShuhei Matsumoto vbdev_compress_base_bdev_event_cb, NULL, &bdev_desc); 1531aec30063SShuhei Matsumoto if (rc) { 1532161f75d5SYankun Li SPDK_ERRLOG("could not open bdev %s, error %s\n", spdk_bdev_get_name(bdev), 1533161f75d5SYankun Li spdk_strerror(-rc)); 153407fe6a43SSeth Howell spdk_bdev_module_examine_done(&compress_if); 153507fe6a43SSeth Howell return; 153607fe6a43SSeth Howell } 153707fe6a43SSeth Howell 1538ddd4603cSYankun Li comp_bdev = _prepare_for_load_init(bdev_desc, 0, SPDK_ACCEL_COMP_ALGO_DEFLATE, 1); 1539915dc5d6SYankun Li if (comp_bdev == NULL) { 1540aec30063SShuhei Matsumoto spdk_bdev_close(bdev_desc); 154107fe6a43SSeth Howell spdk_bdev_module_examine_done(&compress_if); 154207fe6a43SSeth Howell return; 154307fe6a43SSeth Howell } 154407fe6a43SSeth Howell 1545b3be320dSGangCao /* Save the thread where the base device is opened */ 1546915dc5d6SYankun Li comp_bdev->thread = spdk_get_thread(); 1547b3be320dSGangCao 1548915dc5d6SYankun Li comp_bdev->base_ch = spdk_bdev_get_io_channel(comp_bdev->base_desc); 1549915dc5d6SYankun Li spdk_reduce_vol_load(&comp_bdev->backing_dev, vbdev_reduce_load_cb, comp_bdev); 155007fe6a43SSeth Howell } 155107fe6a43SSeth Howell 15522172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(vbdev_compress) 1553