1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2018 Intel Corporation. 3c26c4e9fSPiotr Pelplinski * All rights reserved. 4a2360845SAlexey Marchuk * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5c26c4e9fSPiotr Pelplinski */ 6c26c4e9fSPiotr Pelplinski 7c26c4e9fSPiotr Pelplinski #include "spdk/stdinc.h" 8c26c4e9fSPiotr Pelplinski #include "spdk/blob.h" 9c26c4e9fSPiotr Pelplinski #include "spdk/log.h" 1000311abcSDiwakar Sharma #include "spdk/likely.h" 11c26c4e9fSPiotr Pelplinski #include "blobstore.h" 12c26c4e9fSPiotr Pelplinski 13c26c4e9fSPiotr Pelplinski static void 14c26c4e9fSPiotr Pelplinski blob_bs_dev_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, 15c26c4e9fSPiotr Pelplinski uint64_t lba, uint32_t lba_count, 16c26c4e9fSPiotr Pelplinski struct spdk_bs_dev_cb_args *cb_args) 17c26c4e9fSPiotr Pelplinski { 18c26c4e9fSPiotr Pelplinski cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); 19c26c4e9fSPiotr Pelplinski assert(false); 20c26c4e9fSPiotr Pelplinski } 21c26c4e9fSPiotr Pelplinski 22c26c4e9fSPiotr Pelplinski static void 23c26c4e9fSPiotr Pelplinski blob_bs_dev_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 24c26c4e9fSPiotr Pelplinski struct iovec *iov, int iovcnt, 25c26c4e9fSPiotr Pelplinski uint64_t lba, uint32_t lba_count, 26c26c4e9fSPiotr Pelplinski struct spdk_bs_dev_cb_args *cb_args) 27c26c4e9fSPiotr Pelplinski { 28c26c4e9fSPiotr Pelplinski cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); 29c26c4e9fSPiotr Pelplinski assert(false); 30c26c4e9fSPiotr Pelplinski } 31c26c4e9fSPiotr Pelplinski 32c26c4e9fSPiotr Pelplinski static void 33a2360845SAlexey Marchuk blob_bs_dev_writev_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 34a2360845SAlexey Marchuk struct iovec *iov, int iovcnt, 35a2360845SAlexey Marchuk uint64_t lba, uint32_t lba_count, 36a2360845SAlexey Marchuk struct spdk_bs_dev_cb_args *cb_args, 37a2360845SAlexey Marchuk struct spdk_blob_ext_io_opts *ext_opts) 38a2360845SAlexey Marchuk { 39a2360845SAlexey Marchuk cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); 40a2360845SAlexey Marchuk assert(false); 41a2360845SAlexey Marchuk } 42a2360845SAlexey Marchuk 43a2360845SAlexey Marchuk static void 44c26c4e9fSPiotr Pelplinski blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 45f01146aeSJim Harris uint64_t lba, uint64_t lba_count, 46c26c4e9fSPiotr Pelplinski struct spdk_bs_dev_cb_args *cb_args) 47c26c4e9fSPiotr Pelplinski { 48c26c4e9fSPiotr Pelplinski cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); 49c26c4e9fSPiotr Pelplinski assert(false); 50c26c4e9fSPiotr Pelplinski } 51c26c4e9fSPiotr Pelplinski 52c26c4e9fSPiotr Pelplinski static void 53c26c4e9fSPiotr Pelplinski blob_bs_dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 54f01146aeSJim Harris uint64_t lba, uint64_t lba_count, 55c26c4e9fSPiotr Pelplinski struct spdk_bs_dev_cb_args *cb_args) 56c26c4e9fSPiotr Pelplinski { 57c26c4e9fSPiotr Pelplinski cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM); 58c26c4e9fSPiotr Pelplinski assert(false); 59c26c4e9fSPiotr Pelplinski } 60c26c4e9fSPiotr Pelplinski 61c26c4e9fSPiotr Pelplinski static void 62c26c4e9fSPiotr Pelplinski blob_bs_dev_read_cpl(void *cb_arg, int bserrno) 63c26c4e9fSPiotr Pelplinski { 64c26c4e9fSPiotr Pelplinski struct spdk_bs_dev_cb_args *cb_args = (struct spdk_bs_dev_cb_args *)cb_arg; 65c26c4e9fSPiotr Pelplinski 66c26c4e9fSPiotr Pelplinski cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno); 67c26c4e9fSPiotr Pelplinski } 68c26c4e9fSPiotr Pelplinski 69c26c4e9fSPiotr Pelplinski static inline void 7000311abcSDiwakar Sharma zero_trailing_bytes(struct spdk_blob_bs_dev *b, struct iovec *iov, int iovcnt, 7100311abcSDiwakar Sharma uint64_t lba, uint32_t *lba_count) 7200311abcSDiwakar Sharma { 7300311abcSDiwakar Sharma uint32_t zero_lba_count; 7400311abcSDiwakar Sharma uint64_t zero_bytes, zero_len; 7500311abcSDiwakar Sharma uint64_t payload_bytes; 7600311abcSDiwakar Sharma uint64_t valid_bytes; 7700311abcSDiwakar Sharma void *zero_start; 7800311abcSDiwakar Sharma struct iovec *i; 7900311abcSDiwakar Sharma 8000311abcSDiwakar Sharma if (spdk_likely(lba + *lba_count <= b->bs_dev.blockcnt)) { 8100311abcSDiwakar Sharma return; 8200311abcSDiwakar Sharma } 8300311abcSDiwakar Sharma 8400311abcSDiwakar Sharma /* Figure out how many bytes in the payload will need to be zeroed. */ 8500311abcSDiwakar Sharma zero_lba_count = spdk_min(*lba_count, lba + *lba_count - b->bs_dev.blockcnt); 86b084cba0SMarcin Spiewak zero_bytes = zero_lba_count * (uint64_t)b->bs_dev.blocklen; 8700311abcSDiwakar Sharma 88b084cba0SMarcin Spiewak payload_bytes = *lba_count * (uint64_t)b->bs_dev.blocklen; 8900311abcSDiwakar Sharma valid_bytes = payload_bytes - zero_bytes; 9000311abcSDiwakar Sharma 9100311abcSDiwakar Sharma i = iov; 9200311abcSDiwakar Sharma while (zero_bytes > 0) { 9300311abcSDiwakar Sharma if (i->iov_len > valid_bytes) { 9400311abcSDiwakar Sharma zero_start = i->iov_base + valid_bytes; 9500311abcSDiwakar Sharma zero_len = spdk_min(payload_bytes, i->iov_len - valid_bytes); 9600311abcSDiwakar Sharma memset(zero_start, 0, zero_bytes); 9700311abcSDiwakar Sharma valid_bytes = 0; 9800311abcSDiwakar Sharma zero_bytes -= zero_len; 9900311abcSDiwakar Sharma } 10000311abcSDiwakar Sharma valid_bytes -= spdk_min(valid_bytes, i->iov_len); 10100311abcSDiwakar Sharma payload_bytes -= spdk_min(payload_bytes, i->iov_len); 10200311abcSDiwakar Sharma i++; 10300311abcSDiwakar Sharma } 10400311abcSDiwakar Sharma 10500311abcSDiwakar Sharma *lba_count -= zero_lba_count; 10600311abcSDiwakar Sharma } 10700311abcSDiwakar Sharma 10800311abcSDiwakar Sharma static inline void 109c26c4e9fSPiotr Pelplinski blob_bs_dev_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, 110c26c4e9fSPiotr Pelplinski uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 111c26c4e9fSPiotr Pelplinski { 112c26c4e9fSPiotr Pelplinski struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 11300311abcSDiwakar Sharma struct iovec iov; 11400311abcSDiwakar Sharma 11500311abcSDiwakar Sharma iov.iov_base = payload; 11600311abcSDiwakar Sharma iov.iov_len = lba_count * b->bs_dev.blocklen; 11700311abcSDiwakar Sharma /* The backing blob may be smaller than this blob, so zero any trailing bytes. */ 11800311abcSDiwakar Sharma zero_trailing_bytes(b, &iov, 1, lba, &lba_count); 119c26c4e9fSPiotr Pelplinski 120c26c4e9fSPiotr Pelplinski spdk_blob_io_read(b->blob, channel, payload, lba, lba_count, 121c26c4e9fSPiotr Pelplinski blob_bs_dev_read_cpl, cb_args); 122c26c4e9fSPiotr Pelplinski } 123c26c4e9fSPiotr Pelplinski 124c26c4e9fSPiotr Pelplinski static inline void 125c26c4e9fSPiotr Pelplinski blob_bs_dev_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 126c26c4e9fSPiotr Pelplinski struct iovec *iov, int iovcnt, 127c26c4e9fSPiotr Pelplinski uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) 128c26c4e9fSPiotr Pelplinski { 129c26c4e9fSPiotr Pelplinski struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 130c26c4e9fSPiotr Pelplinski 13100311abcSDiwakar Sharma /* The backing blob may be smaller than this blob, so zero any trailing bytes. */ 13200311abcSDiwakar Sharma zero_trailing_bytes(b, iov, iovcnt, lba, &lba_count); 13300311abcSDiwakar Sharma 134c26c4e9fSPiotr Pelplinski spdk_blob_io_readv(b->blob, channel, iov, iovcnt, lba, lba_count, 135c26c4e9fSPiotr Pelplinski blob_bs_dev_read_cpl, cb_args); 136c26c4e9fSPiotr Pelplinski } 137c26c4e9fSPiotr Pelplinski 138a2360845SAlexey Marchuk static inline void 139a2360845SAlexey Marchuk blob_bs_dev_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, 140a2360845SAlexey Marchuk struct iovec *iov, int iovcnt, 141a2360845SAlexey Marchuk uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args, 142a2360845SAlexey Marchuk struct spdk_blob_ext_io_opts *ext_opts) 143a2360845SAlexey Marchuk { 144a2360845SAlexey Marchuk struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 145a2360845SAlexey Marchuk 14600311abcSDiwakar Sharma /* The backing blob may be smaller than this blob, so zero any trailing bytes. */ 14700311abcSDiwakar Sharma zero_trailing_bytes(b, iov, iovcnt, lba, &lba_count); 14800311abcSDiwakar Sharma 149a2360845SAlexey Marchuk spdk_blob_io_readv_ext(b->blob, channel, iov, iovcnt, lba, lba_count, 150a2360845SAlexey Marchuk blob_bs_dev_read_cpl, cb_args, ext_opts); 151a2360845SAlexey Marchuk } 152a2360845SAlexey Marchuk 153c26c4e9fSPiotr Pelplinski static void 154c26c4e9fSPiotr Pelplinski blob_bs_dev_destroy_cpl(void *cb_arg, int bserrno) 155c26c4e9fSPiotr Pelplinski { 156c26c4e9fSPiotr Pelplinski if (bserrno != 0) { 157c26c4e9fSPiotr Pelplinski SPDK_ERRLOG("Error on blob_bs_dev destroy: %d", bserrno); 158c26c4e9fSPiotr Pelplinski } 159c26c4e9fSPiotr Pelplinski 160c26c4e9fSPiotr Pelplinski /* Free blob_bs_dev */ 161c26c4e9fSPiotr Pelplinski free(cb_arg); 162c26c4e9fSPiotr Pelplinski } 163c26c4e9fSPiotr Pelplinski 164c26c4e9fSPiotr Pelplinski static void 165c26c4e9fSPiotr Pelplinski blob_bs_dev_destroy(struct spdk_bs_dev *bs_dev) 166c26c4e9fSPiotr Pelplinski { 167c26c4e9fSPiotr Pelplinski struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)bs_dev; 168c26c4e9fSPiotr Pelplinski 169c26c4e9fSPiotr Pelplinski spdk_blob_close(b->blob, blob_bs_dev_destroy_cpl, b); 170c26c4e9fSPiotr Pelplinski } 171c26c4e9fSPiotr Pelplinski 1722e7a7fe5SEvgeniy Kochetov static bool 1732e7a7fe5SEvgeniy Kochetov blob_bs_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) 1742e7a7fe5SEvgeniy Kochetov { 1752e7a7fe5SEvgeniy Kochetov struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 1762e7a7fe5SEvgeniy Kochetov struct spdk_blob *blob = b->blob; 177b357ad48SDiwakar Sharma bool is_valid_range; 1782e7a7fe5SEvgeniy Kochetov 1792e7a7fe5SEvgeniy Kochetov assert(lba == bs_cluster_to_lba(blob->bs, bs_lba_to_cluster(blob->bs, lba))); 1802e7a7fe5SEvgeniy Kochetov assert(lba_count == bs_dev_byte_to_lba(dev, blob->bs->cluster_sz)); 1812e7a7fe5SEvgeniy Kochetov 1822e7a7fe5SEvgeniy Kochetov if (bs_io_unit_is_allocated(blob, lba)) { 1832e7a7fe5SEvgeniy Kochetov return false; 1842e7a7fe5SEvgeniy Kochetov } 1852e7a7fe5SEvgeniy Kochetov 1862e7a7fe5SEvgeniy Kochetov assert(blob->back_bs_dev != NULL); 187b357ad48SDiwakar Sharma is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev, lba, lba_count); 188b357ad48SDiwakar Sharma return is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev, 1892e7a7fe5SEvgeniy Kochetov bs_io_unit_to_back_dev_lba(blob, lba), 1902e7a7fe5SEvgeniy Kochetov bs_io_unit_to_back_dev_lba(blob, lba_count)); 1912e7a7fe5SEvgeniy Kochetov } 192c26c4e9fSPiotr Pelplinski 1939e843fdbSEvgeniy Kochetov static bool 19400311abcSDiwakar Sharma blob_bs_is_range_valid(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) 19500311abcSDiwakar Sharma { 19600311abcSDiwakar Sharma struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 19700311abcSDiwakar Sharma struct spdk_blob *blob = b->blob; 198*3299bf6dSJim Harris uint64_t io_units_per_cluster; 19900311abcSDiwakar Sharma 20000311abcSDiwakar Sharma /* The lba here is supposed to be the first lba of cluster. lba_count 20100311abcSDiwakar Sharma * will typically be fixed e.g. 8192 for 4MiB cluster. */ 20200311abcSDiwakar Sharma assert(lba_count == blob->bs->cluster_sz / dev->blocklen); 20300311abcSDiwakar Sharma assert(lba % lba_count == 0); 20400311abcSDiwakar Sharma 205*3299bf6dSJim Harris io_units_per_cluster = blob->bs->io_units_per_cluster; 20600311abcSDiwakar Sharma 20700311abcSDiwakar Sharma /* A blob will either have: 20800311abcSDiwakar Sharma * - no backing bs_bdev (normal thick blob), or 20900311abcSDiwakar Sharma * - zeroes backing bs_bdev (thin provisioned blob), or 21000311abcSDiwakar Sharma * - blob backing bs_bdev (e.g snapshot) 21100311abcSDiwakar Sharma * It may be possible that backing bs_bdev has lesser number of clusters 21200311abcSDiwakar Sharma * than the child lvol blob because lvol blob has been expanded after 213*3299bf6dSJim Harris * taking snapshot. In such a case, page will be outside the cluster io_unit 21400311abcSDiwakar Sharma * range of the backing dev. Always return true for zeroes backing bdev. */ 215*3299bf6dSJim Harris return lba < blob->active.num_clusters * io_units_per_cluster; 21600311abcSDiwakar Sharma } 21700311abcSDiwakar Sharma 21800311abcSDiwakar Sharma static bool 2199e843fdbSEvgeniy Kochetov blob_bs_translate_lba(struct spdk_bs_dev *dev, uint64_t lba, uint64_t *base_lba) 2209e843fdbSEvgeniy Kochetov { 2219e843fdbSEvgeniy Kochetov struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 2229e843fdbSEvgeniy Kochetov struct spdk_blob *blob = b->blob; 223b357ad48SDiwakar Sharma bool is_valid_range; 2249e843fdbSEvgeniy Kochetov 2259e843fdbSEvgeniy Kochetov assert(base_lba != NULL); 2269e843fdbSEvgeniy Kochetov if (bs_io_unit_is_allocated(blob, lba)) { 2279e843fdbSEvgeniy Kochetov *base_lba = bs_blob_io_unit_to_lba(blob, lba); 2289e843fdbSEvgeniy Kochetov return true; 2299e843fdbSEvgeniy Kochetov } 2309e843fdbSEvgeniy Kochetov 2319e843fdbSEvgeniy Kochetov assert(blob->back_bs_dev != NULL); 232b357ad48SDiwakar Sharma /* Since here we don't get lba_count directly, passing lba_count derived 233b357ad48SDiwakar Sharma * from cluster_sz which typically happens for other calls like is_zeroes 234b357ad48SDiwakar Sharma * in CoW path. */ 235b357ad48SDiwakar Sharma is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev, lba, 236b357ad48SDiwakar Sharma bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz)); 237b357ad48SDiwakar Sharma return is_valid_range && blob->back_bs_dev->translate_lba(blob->back_bs_dev, 2389e843fdbSEvgeniy Kochetov bs_io_unit_to_back_dev_lba(blob, lba), 2399e843fdbSEvgeniy Kochetov base_lba); 2409e843fdbSEvgeniy Kochetov } 2419e843fdbSEvgeniy Kochetov 2428b3dcd61SMike Gerdts static bool 2438b3dcd61SMike Gerdts blob_bs_is_degraded(struct spdk_bs_dev *dev) 2448b3dcd61SMike Gerdts { 2458b3dcd61SMike Gerdts struct spdk_blob_bs_dev *b = (struct spdk_blob_bs_dev *)dev; 2468b3dcd61SMike Gerdts 2478b3dcd61SMike Gerdts return spdk_blob_is_degraded(b->blob); 2488b3dcd61SMike Gerdts } 2498b3dcd61SMike Gerdts 250c26c4e9fSPiotr Pelplinski struct spdk_bs_dev * 251ad7fdd12SSeth Howell bs_create_blob_bs_dev(struct spdk_blob *blob) 252c26c4e9fSPiotr Pelplinski { 253c26c4e9fSPiotr Pelplinski struct spdk_blob_bs_dev *b; 254c26c4e9fSPiotr Pelplinski 255c26c4e9fSPiotr Pelplinski b = calloc(1, sizeof(*b)); 256c26c4e9fSPiotr Pelplinski if (b == NULL) { 257c26c4e9fSPiotr Pelplinski return NULL; 258c26c4e9fSPiotr Pelplinski } 259c26c4e9fSPiotr Pelplinski /* snapshot blob */ 260*3299bf6dSJim Harris b->bs_dev.blockcnt = blob->active.num_clusters * blob->bs->io_units_per_cluster; 2616609b776SPiotr Pelplinski b->bs_dev.blocklen = spdk_bs_get_io_unit_size(blob->bs); 262c26c4e9fSPiotr Pelplinski b->bs_dev.create_channel = NULL; 263c26c4e9fSPiotr Pelplinski b->bs_dev.destroy_channel = NULL; 264c26c4e9fSPiotr Pelplinski b->bs_dev.destroy = blob_bs_dev_destroy; 265c26c4e9fSPiotr Pelplinski b->bs_dev.write = blob_bs_dev_write; 266c26c4e9fSPiotr Pelplinski b->bs_dev.writev = blob_bs_dev_writev; 267a2360845SAlexey Marchuk b->bs_dev.writev_ext = blob_bs_dev_writev_ext; 268c26c4e9fSPiotr Pelplinski b->bs_dev.read = blob_bs_dev_read; 269c26c4e9fSPiotr Pelplinski b->bs_dev.readv = blob_bs_dev_readv; 270a2360845SAlexey Marchuk b->bs_dev.readv_ext = blob_bs_dev_readv_ext; 271c26c4e9fSPiotr Pelplinski b->bs_dev.write_zeroes = blob_bs_dev_write_zeroes; 272c26c4e9fSPiotr Pelplinski b->bs_dev.unmap = blob_bs_dev_unmap; 2732e7a7fe5SEvgeniy Kochetov b->bs_dev.is_zeroes = blob_bs_is_zeroes; 27400311abcSDiwakar Sharma b->bs_dev.is_range_valid = blob_bs_is_range_valid; 2759e843fdbSEvgeniy Kochetov b->bs_dev.translate_lba = blob_bs_translate_lba; 2768b3dcd61SMike Gerdts b->bs_dev.is_degraded = blob_bs_is_degraded; 277c26c4e9fSPiotr Pelplinski b->blob = blob; 278c26c4e9fSPiotr Pelplinski 279c26c4e9fSPiotr Pelplinski return &b->bs_dev; 280c26c4e9fSPiotr Pelplinski } 281