1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2017 Intel Corporation. 307fe6a43SSeth Howell * All rights reserved. 45e1e850bSAlexey Marchuk * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 507fe6a43SSeth Howell */ 607fe6a43SSeth Howell 707fe6a43SSeth Howell #include "spdk/stdinc.h" 807fe6a43SSeth Howell 907fe6a43SSeth Howell #include "bdev_malloc.h" 1007fe6a43SSeth Howell #include "spdk/endian.h" 1107fe6a43SSeth Howell #include "spdk/env.h" 12081f080aSBen Walker #include "spdk/accel.h" 1392760190SKonrad Sztyber #include "spdk/dma.h" 14560164d8SKonrad Sztyber #include "spdk/likely.h" 1507fe6a43SSeth Howell #include "spdk/string.h" 1607fe6a43SSeth Howell 174e8e97c8STomasz Zawadzki #include "spdk/log.h" 1807fe6a43SSeth Howell 1907fe6a43SSeth Howell struct malloc_disk { 2007fe6a43SSeth Howell struct spdk_bdev disk; 2107fe6a43SSeth Howell void *malloc_buf; 22aef00d44SShuhei Matsumoto void *malloc_md_buf; 2307fe6a43SSeth Howell TAILQ_ENTRY(malloc_disk) link; 2407fe6a43SSeth Howell }; 2507fe6a43SSeth Howell 2607fe6a43SSeth Howell struct malloc_task { 27560164d8SKonrad Sztyber struct iovec iov; 2807fe6a43SSeth Howell int num_outstanding; 2907fe6a43SSeth Howell enum spdk_bdev_io_status status; 300a49fbd2SKonrad Sztyber TAILQ_ENTRY(malloc_task) tailq; 3107fe6a43SSeth Howell }; 3207fe6a43SSeth Howell 33fcd5f601SKonrad Sztyber struct malloc_channel { 34fcd5f601SKonrad Sztyber struct spdk_io_channel *accel_channel; 350a49fbd2SKonrad Sztyber struct spdk_poller *completion_poller; 360a49fbd2SKonrad Sztyber TAILQ_HEAD(, malloc_task) completed_tasks; 37fcd5f601SKonrad Sztyber }; 38fcd5f601SKonrad Sztyber 3900bff560SShuhei Matsumoto static int 40c86e5b18SShuhei Matsumoto _malloc_verify_pi(struct spdk_bdev_io *bdev_io, struct iovec *iovs, int iovcnt, 41c86e5b18SShuhei Matsumoto void *md_buf) 4200bff560SShuhei Matsumoto { 4300bff560SShuhei Matsumoto struct spdk_bdev *bdev = bdev_io->bdev; 4400bff560SShuhei Matsumoto struct spdk_dif_ctx dif_ctx; 4500bff560SShuhei Matsumoto struct spdk_dif_error err_blk; 4600bff560SShuhei Matsumoto int rc; 47a711d629SSlawomir Ptak struct spdk_dif_ctx_init_ext_opts dif_opts; 4800bff560SShuhei Matsumoto 4992760190SKonrad Sztyber assert(bdev_io->u.bdev.memory_domain == NULL); 505681a8a6SKonrad Sztyber dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 51fcf59a6fSShuhei Matsumoto dif_opts.dif_pi_format = bdev->dif_pi_format; 5200bff560SShuhei Matsumoto rc = spdk_dif_ctx_init(&dif_ctx, 5300bff560SShuhei Matsumoto bdev->blocklen, 5400bff560SShuhei Matsumoto bdev->md_len, 5500bff560SShuhei Matsumoto bdev->md_interleave, 5600bff560SShuhei Matsumoto bdev->dif_is_head_of_md, 5700bff560SShuhei Matsumoto bdev->dif_type, 582c579dcfSSlawomir Ptak bdev_io->u.bdev.dif_check_flags, 5900bff560SShuhei Matsumoto bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF, 60a711d629SSlawomir Ptak 0xFFFF, 0, 0, 0, &dif_opts); 6100bff560SShuhei Matsumoto if (rc != 0) { 6200bff560SShuhei Matsumoto SPDK_ERRLOG("Failed to initialize DIF/DIX context\n"); 6300bff560SShuhei Matsumoto return rc; 6400bff560SShuhei Matsumoto } 6500bff560SShuhei Matsumoto 6600bff560SShuhei Matsumoto if (spdk_bdev_is_md_interleaved(bdev)) { 67c86e5b18SShuhei Matsumoto rc = spdk_dif_verify(iovs, 68c86e5b18SShuhei Matsumoto iovcnt, 6900bff560SShuhei Matsumoto bdev_io->u.bdev.num_blocks, 7000bff560SShuhei Matsumoto &dif_ctx, 7100bff560SShuhei Matsumoto &err_blk); 7200bff560SShuhei Matsumoto } else { 7300bff560SShuhei Matsumoto struct iovec md_iov = { 74c86e5b18SShuhei Matsumoto .iov_base = md_buf, 7500bff560SShuhei Matsumoto .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 7600bff560SShuhei Matsumoto }; 7700bff560SShuhei Matsumoto 789bec3f58SChangpeng Liu if (bdev_io->u.bdev.md_buf == NULL) { 799bec3f58SChangpeng Liu return 0; 809bec3f58SChangpeng Liu } 819bec3f58SChangpeng Liu 82c86e5b18SShuhei Matsumoto rc = spdk_dix_verify(iovs, 83c86e5b18SShuhei Matsumoto iovcnt, 8400bff560SShuhei Matsumoto &md_iov, 8500bff560SShuhei Matsumoto bdev_io->u.bdev.num_blocks, 8600bff560SShuhei Matsumoto &dif_ctx, 8700bff560SShuhei Matsumoto &err_blk); 8800bff560SShuhei Matsumoto } 8900bff560SShuhei Matsumoto 9000bff560SShuhei Matsumoto if (rc != 0) { 9100bff560SShuhei Matsumoto SPDK_ERRLOG("DIF/DIX verify failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", " 9207d28d02SSlawomir Ptak "err_type %u, expected %lu, actual %lu, err_offset %u\n", 9300bff560SShuhei Matsumoto bdev_io->u.bdev.offset_blocks, 9400bff560SShuhei Matsumoto bdev_io->u.bdev.num_blocks, 9500bff560SShuhei Matsumoto err_blk.err_type, 9600bff560SShuhei Matsumoto err_blk.expected, 9700bff560SShuhei Matsumoto err_blk.actual, 9800bff560SShuhei Matsumoto err_blk.err_offset); 9900bff560SShuhei Matsumoto } 10000bff560SShuhei Matsumoto 10100bff560SShuhei Matsumoto return rc; 10200bff560SShuhei Matsumoto } 10300bff560SShuhei Matsumoto 104e1f15ba5SChangpeng Liu static int 105*e93f0f94SShuhei Matsumoto malloc_verify_pi_io_buf(struct spdk_bdev_io *bdev_io) 106c86e5b18SShuhei Matsumoto { 107c86e5b18SShuhei Matsumoto return _malloc_verify_pi(bdev_io, 108c86e5b18SShuhei Matsumoto bdev_io->u.bdev.iovs, 109c86e5b18SShuhei Matsumoto bdev_io->u.bdev.iovcnt, 110c86e5b18SShuhei Matsumoto bdev_io->u.bdev.md_buf); 111c86e5b18SShuhei Matsumoto } 112c86e5b18SShuhei Matsumoto 113c86e5b18SShuhei Matsumoto static int 114*e93f0f94SShuhei Matsumoto malloc_verify_pi_malloc_buf(struct spdk_bdev_io *bdev_io) 115*e93f0f94SShuhei Matsumoto { 116*e93f0f94SShuhei Matsumoto struct iovec iov; 117*e93f0f94SShuhei Matsumoto struct spdk_bdev *bdev = bdev_io->bdev; 118*e93f0f94SShuhei Matsumoto struct malloc_disk *mdisk = bdev->ctxt; 119*e93f0f94SShuhei Matsumoto uint64_t len, offset; 120*e93f0f94SShuhei Matsumoto 121*e93f0f94SShuhei Matsumoto len = bdev_io->u.bdev.num_blocks * bdev->blocklen; 122*e93f0f94SShuhei Matsumoto offset = bdev_io->u.bdev.offset_blocks * bdev->blocklen; 123*e93f0f94SShuhei Matsumoto 124*e93f0f94SShuhei Matsumoto iov.iov_base = mdisk->malloc_buf + offset; 125*e93f0f94SShuhei Matsumoto iov.iov_len = len; 126*e93f0f94SShuhei Matsumoto 127*e93f0f94SShuhei Matsumoto return _malloc_verify_pi(bdev_io, &iov, 1, NULL); 128*e93f0f94SShuhei Matsumoto } 129*e93f0f94SShuhei Matsumoto 130*e93f0f94SShuhei Matsumoto static int 131e1f15ba5SChangpeng Liu malloc_unmap_write_zeroes_generate_pi(struct spdk_bdev_io *bdev_io) 132e1f15ba5SChangpeng Liu { 133e1f15ba5SChangpeng Liu struct spdk_bdev *bdev = bdev_io->bdev; 134e1f15ba5SChangpeng Liu struct malloc_disk *mdisk = bdev_io->bdev->ctxt; 135e1f15ba5SChangpeng Liu uint32_t block_size = bdev_io->bdev->blocklen; 136f5304d66SShuhei Matsumoto uint32_t dif_check_flags; 137e1f15ba5SChangpeng Liu struct spdk_dif_ctx dif_ctx; 138e1f15ba5SChangpeng Liu struct spdk_dif_ctx_init_ext_opts dif_opts; 139e1f15ba5SChangpeng Liu int rc; 140e1f15ba5SChangpeng Liu 141e1f15ba5SChangpeng Liu dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 142fcf59a6fSShuhei Matsumoto dif_opts.dif_pi_format = bdev->dif_pi_format; 143f5304d66SShuhei Matsumoto dif_check_flags = bdev->dif_check_flags | SPDK_DIF_CHECK_TYPE_REFTAG | 144f5304d66SShuhei Matsumoto SPDK_DIF_FLAGS_APPTAG_CHECK; 145e1f15ba5SChangpeng Liu rc = spdk_dif_ctx_init(&dif_ctx, 146e1f15ba5SChangpeng Liu bdev->blocklen, 147e1f15ba5SChangpeng Liu bdev->md_len, 148e1f15ba5SChangpeng Liu bdev->md_interleave, 149e1f15ba5SChangpeng Liu bdev->dif_is_head_of_md, 150e1f15ba5SChangpeng Liu bdev->dif_type, 151f5304d66SShuhei Matsumoto dif_check_flags, 152e1f15ba5SChangpeng Liu SPDK_DIF_REFTAG_IGNORE, 153e1f15ba5SChangpeng Liu 0xFFFF, SPDK_DIF_APPTAG_IGNORE, 154e1f15ba5SChangpeng Liu 0, 0, &dif_opts); 155e1f15ba5SChangpeng Liu if (rc != 0) { 156e1f15ba5SChangpeng Liu SPDK_ERRLOG("Initialization of DIF/DIX context failed\n"); 157e1f15ba5SChangpeng Liu return rc; 158e1f15ba5SChangpeng Liu } 159e1f15ba5SChangpeng Liu 160e1f15ba5SChangpeng Liu if (bdev->md_interleave) { 161e1f15ba5SChangpeng Liu struct iovec iov = { 162e1f15ba5SChangpeng Liu .iov_base = mdisk->malloc_buf + bdev_io->u.bdev.offset_blocks * block_size, 163e1f15ba5SChangpeng Liu .iov_len = bdev_io->u.bdev.num_blocks * block_size, 164e1f15ba5SChangpeng Liu }; 165e1f15ba5SChangpeng Liu 166e1f15ba5SChangpeng Liu rc = spdk_dif_generate(&iov, 1, bdev_io->u.bdev.num_blocks, &dif_ctx); 167e1f15ba5SChangpeng Liu } else { 168e1f15ba5SChangpeng Liu struct iovec iov = { 169e1f15ba5SChangpeng Liu .iov_base = mdisk->malloc_buf + bdev_io->u.bdev.offset_blocks * block_size, 170e1f15ba5SChangpeng Liu .iov_len = bdev_io->u.bdev.num_blocks * block_size, 171e1f15ba5SChangpeng Liu }; 172e1f15ba5SChangpeng Liu 173e1f15ba5SChangpeng Liu struct iovec md_iov = { 174e1f15ba5SChangpeng Liu .iov_base = mdisk->malloc_md_buf + bdev_io->u.bdev.offset_blocks * bdev->md_len, 175e1f15ba5SChangpeng Liu .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, 176e1f15ba5SChangpeng Liu }; 177e1f15ba5SChangpeng Liu 178e1f15ba5SChangpeng Liu rc = spdk_dix_generate(&iov, 1, &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx); 179e1f15ba5SChangpeng Liu } 180e1f15ba5SChangpeng Liu 181e1f15ba5SChangpeng Liu if (rc != 0) { 182e1f15ba5SChangpeng Liu SPDK_ERRLOG("Formatting by DIF/DIX failed\n"); 183e1f15ba5SChangpeng Liu } 184e1f15ba5SChangpeng Liu 185e1f15ba5SChangpeng Liu 186e1f15ba5SChangpeng Liu return rc; 187e1f15ba5SChangpeng Liu } 188e1f15ba5SChangpeng Liu 18907fe6a43SSeth Howell static void 19007fe6a43SSeth Howell malloc_done(void *ref, int status) 19107fe6a43SSeth Howell { 192ee7e31f9Spaul luse struct malloc_task *task = (struct malloc_task *)ref; 19300bff560SShuhei Matsumoto struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task); 19400bff560SShuhei Matsumoto int rc; 19507fe6a43SSeth Howell 19607fe6a43SSeth Howell if (status != 0) { 19707fe6a43SSeth Howell if (status == -ENOMEM) { 1985d25e3fcSKonrad Sztyber if (task->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 19907fe6a43SSeth Howell task->status = SPDK_BDEV_IO_STATUS_NOMEM; 2005d25e3fcSKonrad Sztyber } 20107fe6a43SSeth Howell } else { 20207fe6a43SSeth Howell task->status = SPDK_BDEV_IO_STATUS_FAILED; 20307fe6a43SSeth Howell } 20407fe6a43SSeth Howell } 20507fe6a43SSeth Howell 20600bff560SShuhei Matsumoto if (--task->num_outstanding != 0) { 20700bff560SShuhei Matsumoto return; 20807fe6a43SSeth Howell } 20900bff560SShuhei Matsumoto 21000bff560SShuhei Matsumoto if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE && 21100bff560SShuhei Matsumoto task->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 21297329b16SShuhei Matsumoto switch (bdev_io->type) { 21397329b16SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_READ: 214*e93f0f94SShuhei Matsumoto if (!spdk_bdev_io_hide_metadata(bdev_io)) { 215*e93f0f94SShuhei Matsumoto rc = malloc_verify_pi_io_buf(bdev_io); 216*e93f0f94SShuhei Matsumoto } else { 217*e93f0f94SShuhei Matsumoto rc = 0; 218*e93f0f94SShuhei Matsumoto } 219*e93f0f94SShuhei Matsumoto break; 220*e93f0f94SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_WRITE: 221*e93f0f94SShuhei Matsumoto if (!spdk_bdev_io_hide_metadata(bdev_io)) { 222*e93f0f94SShuhei Matsumoto rc = 0; 223*e93f0f94SShuhei Matsumoto } else { 224*e93f0f94SShuhei Matsumoto rc = malloc_verify_pi_malloc_buf(bdev_io); 225*e93f0f94SShuhei Matsumoto } 22697329b16SShuhei Matsumoto break; 22797329b16SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_UNMAP: 22897329b16SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 22997329b16SShuhei Matsumoto rc = malloc_unmap_write_zeroes_generate_pi(bdev_io); 23097329b16SShuhei Matsumoto break; 23197329b16SShuhei Matsumoto default: 23297329b16SShuhei Matsumoto rc = 0; 23397329b16SShuhei Matsumoto break; 23400bff560SShuhei Matsumoto } 23500bff560SShuhei Matsumoto 236e1f15ba5SChangpeng Liu if (rc != 0) { 237e1f15ba5SChangpeng Liu task->status = SPDK_BDEV_IO_STATUS_FAILED; 238e1f15ba5SChangpeng Liu } 239e1f15ba5SChangpeng Liu } 240e1f15ba5SChangpeng Liu 241560164d8SKonrad Sztyber assert(!bdev_io->u.bdev.accel_sequence || task->status == SPDK_BDEV_IO_STATUS_NOMEM); 24200bff560SShuhei Matsumoto spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 24307fe6a43SSeth Howell } 24407fe6a43SSeth Howell 2452bb8a83eSKonrad Sztyber static void 2462bb8a83eSKonrad Sztyber malloc_complete_task(struct malloc_task *task, struct malloc_channel *mch, 2472bb8a83eSKonrad Sztyber enum spdk_bdev_io_status status) 2482bb8a83eSKonrad Sztyber { 2492bb8a83eSKonrad Sztyber task->status = status; 2502bb8a83eSKonrad Sztyber TAILQ_INSERT_TAIL(&mch->completed_tasks, task, tailq); 2512bb8a83eSKonrad Sztyber } 2522bb8a83eSKonrad Sztyber 25307fe6a43SSeth Howell static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 25407fe6a43SSeth Howell 25507fe6a43SSeth Howell int malloc_disk_count = 0; 25607fe6a43SSeth Howell 25707fe6a43SSeth Howell static int bdev_malloc_initialize(void); 258fcd5f601SKonrad Sztyber static void bdev_malloc_deinitialize(void); 25907fe6a43SSeth Howell 26007fe6a43SSeth Howell static int 26107fe6a43SSeth Howell bdev_malloc_get_ctx_size(void) 26207fe6a43SSeth Howell { 263ee7e31f9Spaul luse return sizeof(struct malloc_task); 26407fe6a43SSeth Howell } 26507fe6a43SSeth Howell 26607fe6a43SSeth Howell static struct spdk_bdev_module malloc_if = { 26707fe6a43SSeth Howell .name = "malloc", 26807fe6a43SSeth Howell .module_init = bdev_malloc_initialize, 269fcd5f601SKonrad Sztyber .module_fini = bdev_malloc_deinitialize, 27007fe6a43SSeth Howell .get_ctx_size = bdev_malloc_get_ctx_size, 27107fe6a43SSeth Howell 27207fe6a43SSeth Howell }; 27307fe6a43SSeth Howell 27407fe6a43SSeth Howell SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 27507fe6a43SSeth Howell 27607fe6a43SSeth Howell static void 27707fe6a43SSeth Howell malloc_disk_free(struct malloc_disk *malloc_disk) 27807fe6a43SSeth Howell { 27907fe6a43SSeth Howell if (!malloc_disk) { 28007fe6a43SSeth Howell return; 28107fe6a43SSeth Howell } 28207fe6a43SSeth Howell 28307fe6a43SSeth Howell free(malloc_disk->disk.name); 28407fe6a43SSeth Howell spdk_free(malloc_disk->malloc_buf); 285aef00d44SShuhei Matsumoto spdk_free(malloc_disk->malloc_md_buf); 28607fe6a43SSeth Howell free(malloc_disk); 28707fe6a43SSeth Howell } 28807fe6a43SSeth Howell 28907fe6a43SSeth Howell static int 29007fe6a43SSeth Howell bdev_malloc_destruct(void *ctx) 29107fe6a43SSeth Howell { 29207fe6a43SSeth Howell struct malloc_disk *malloc_disk = ctx; 29307fe6a43SSeth Howell 29407fe6a43SSeth Howell TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 29507fe6a43SSeth Howell malloc_disk_free(malloc_disk); 29607fe6a43SSeth Howell return 0; 29707fe6a43SSeth Howell } 29807fe6a43SSeth Howell 29907fe6a43SSeth Howell static int 30007fe6a43SSeth Howell bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 30107fe6a43SSeth Howell { 30207fe6a43SSeth Howell int i; 30307fe6a43SSeth Howell 30407fe6a43SSeth Howell for (i = 0; i < iovcnt; i++) { 30507fe6a43SSeth Howell if (nbytes < iovs[i].iov_len) { 30607fe6a43SSeth Howell return 0; 30707fe6a43SSeth Howell } 30807fe6a43SSeth Howell 30907fe6a43SSeth Howell nbytes -= iovs[i].iov_len; 31007fe6a43SSeth Howell } 31107fe6a43SSeth Howell 31207fe6a43SSeth Howell return nbytes != 0; 31307fe6a43SSeth Howell } 31407fe6a43SSeth Howell 31539846f0fSKonrad Sztyber static size_t 31639846f0fSKonrad Sztyber malloc_get_md_len(struct spdk_bdev_io *bdev_io) 31739846f0fSKonrad Sztyber { 31839846f0fSKonrad Sztyber return bdev_io->u.bdev.num_blocks * bdev_io->bdev->md_len; 31939846f0fSKonrad Sztyber } 32039846f0fSKonrad Sztyber 32139846f0fSKonrad Sztyber static uint64_t 32239846f0fSKonrad Sztyber malloc_get_md_offset(struct spdk_bdev_io *bdev_io) 32339846f0fSKonrad Sztyber { 32439846f0fSKonrad Sztyber return bdev_io->u.bdev.offset_blocks * bdev_io->bdev->md_len; 32539846f0fSKonrad Sztyber } 32639846f0fSKonrad Sztyber 32739846f0fSKonrad Sztyber static void * 32839846f0fSKonrad Sztyber malloc_get_md_buf(struct spdk_bdev_io *bdev_io) 32939846f0fSKonrad Sztyber { 33039846f0fSKonrad Sztyber struct malloc_disk *mdisk = SPDK_CONTAINEROF(bdev_io->bdev, struct malloc_disk, disk); 33139846f0fSKonrad Sztyber 33239846f0fSKonrad Sztyber assert(spdk_bdev_is_md_separate(bdev_io->bdev)); 33339846f0fSKonrad Sztyber 33439846f0fSKonrad Sztyber return (char *)mdisk->malloc_md_buf + malloc_get_md_offset(bdev_io); 33539846f0fSKonrad Sztyber } 33639846f0fSKonrad Sztyber 33707fe6a43SSeth Howell static void 338560164d8SKonrad Sztyber malloc_sequence_fail(struct malloc_task *task, int status) 339560164d8SKonrad Sztyber { 340560164d8SKonrad Sztyber struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task); 341560164d8SKonrad Sztyber 342560164d8SKonrad Sztyber /* For ENOMEM, the IO will be retried by the bdev layer, so we don't abort the sequence */ 343560164d8SKonrad Sztyber if (status != -ENOMEM) { 344560164d8SKonrad Sztyber spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence); 345560164d8SKonrad Sztyber bdev_io->u.bdev.accel_sequence = NULL; 346560164d8SKonrad Sztyber } 347560164d8SKonrad Sztyber 348560164d8SKonrad Sztyber malloc_done(task, status); 349560164d8SKonrad Sztyber } 350560164d8SKonrad Sztyber 351560164d8SKonrad Sztyber static void 352560164d8SKonrad Sztyber malloc_sequence_done(void *ctx, int status) 353560164d8SKonrad Sztyber { 354560164d8SKonrad Sztyber struct malloc_task *task = ctx; 355560164d8SKonrad Sztyber struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task); 356560164d8SKonrad Sztyber 357560164d8SKonrad Sztyber bdev_io->u.bdev.accel_sequence = NULL; 358560164d8SKonrad Sztyber /* Prevent bdev layer from retrying the request if the sequence failed with ENOMEM */ 359560164d8SKonrad Sztyber malloc_done(task, status != -ENOMEM ? status : -EFAULT); 360560164d8SKonrad Sztyber } 361560164d8SKonrad Sztyber 362560164d8SKonrad Sztyber static void 36307fe6a43SSeth Howell bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 3647ce4205aSKonrad Sztyber struct malloc_task *task, struct spdk_bdev_io *bdev_io) 36507fe6a43SSeth Howell { 36639846f0fSKonrad Sztyber uint64_t len, offset; 367560164d8SKonrad Sztyber int res = 0; 36807fe6a43SSeth Howell 3697ce4205aSKonrad Sztyber len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 3707ce4205aSKonrad Sztyber offset = bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen; 3717ce4205aSKonrad Sztyber 3727ce4205aSKonrad Sztyber if (bdev_malloc_check_iov_len(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, len)) { 37307fe6a43SSeth Howell spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 37407fe6a43SSeth Howell SPDK_BDEV_IO_STATUS_FAILED); 37507fe6a43SSeth Howell return; 37607fe6a43SSeth Howell } 37707fe6a43SSeth Howell 378aef00d44SShuhei Matsumoto task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 379aef00d44SShuhei Matsumoto task->num_outstanding = 0; 380560164d8SKonrad Sztyber task->iov.iov_base = mdisk->malloc_buf + offset; 381560164d8SKonrad Sztyber task->iov.iov_len = len; 382aef00d44SShuhei Matsumoto 38331d684d7SChangpeng Liu SPDK_DEBUGLOG(bdev_malloc, "read %zu bytes from offset %#" PRIx64 ", iovcnt=%d\n", 3847ce4205aSKonrad Sztyber len, offset, bdev_io->u.bdev.iovcnt); 38507fe6a43SSeth Howell 38631d684d7SChangpeng Liu task->num_outstanding++; 387560164d8SKonrad Sztyber res = spdk_accel_append_copy(&bdev_io->u.bdev.accel_sequence, ch, 388560164d8SKonrad Sztyber bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 389560164d8SKonrad Sztyber bdev_io->u.bdev.memory_domain, 390560164d8SKonrad Sztyber bdev_io->u.bdev.memory_domain_ctx, 39179e2a56fSKonrad Sztyber &task->iov, 1, NULL, NULL, NULL, NULL); 392560164d8SKonrad Sztyber if (spdk_unlikely(res != 0)) { 393560164d8SKonrad Sztyber malloc_sequence_fail(task, res); 394560164d8SKonrad Sztyber return; 39507fe6a43SSeth Howell } 39607fe6a43SSeth Howell 397560164d8SKonrad Sztyber spdk_accel_sequence_reverse(bdev_io->u.bdev.accel_sequence); 398560164d8SKonrad Sztyber spdk_accel_sequence_finish(bdev_io->u.bdev.accel_sequence, malloc_sequence_done, task); 399aef00d44SShuhei Matsumoto 4007ce4205aSKonrad Sztyber if (bdev_io->u.bdev.md_buf == NULL) { 401aef00d44SShuhei Matsumoto return; 402aef00d44SShuhei Matsumoto } 403aef00d44SShuhei Matsumoto 404aef00d44SShuhei Matsumoto SPDK_DEBUGLOG(bdev_malloc, "read metadata %zu bytes from offset%#" PRIx64 "\n", 40539846f0fSKonrad Sztyber malloc_get_md_len(bdev_io), malloc_get_md_offset(bdev_io)); 406aef00d44SShuhei Matsumoto 407aef00d44SShuhei Matsumoto task->num_outstanding++; 40839846f0fSKonrad Sztyber res = spdk_accel_submit_copy(ch, bdev_io->u.bdev.md_buf, malloc_get_md_buf(bdev_io), 40979e2a56fSKonrad Sztyber malloc_get_md_len(bdev_io), malloc_done, task); 410aef00d44SShuhei Matsumoto if (res != 0) { 411aef00d44SShuhei Matsumoto malloc_done(task, res); 412aef00d44SShuhei Matsumoto } 41307fe6a43SSeth Howell } 41407fe6a43SSeth Howell 41507fe6a43SSeth Howell static void 41607fe6a43SSeth Howell bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 417ad154aaeSKonrad Sztyber struct malloc_task *task, struct spdk_bdev_io *bdev_io) 41807fe6a43SSeth Howell { 41939846f0fSKonrad Sztyber uint64_t len, offset; 42006fd87e4SKonrad Sztyber int res = 0; 42107fe6a43SSeth Howell 422ad154aaeSKonrad Sztyber len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 423ad154aaeSKonrad Sztyber offset = bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen; 424ad154aaeSKonrad Sztyber 425ad154aaeSKonrad Sztyber if (bdev_malloc_check_iov_len(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, len)) { 42607fe6a43SSeth Howell spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 42707fe6a43SSeth Howell SPDK_BDEV_IO_STATUS_FAILED); 42807fe6a43SSeth Howell return; 42907fe6a43SSeth Howell } 43007fe6a43SSeth Howell 43106fd87e4SKonrad Sztyber task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 43206fd87e4SKonrad Sztyber task->num_outstanding = 0; 43306fd87e4SKonrad Sztyber task->iov.iov_base = mdisk->malloc_buf + offset; 43406fd87e4SKonrad Sztyber task->iov.iov_len = len; 43506fd87e4SKonrad Sztyber 4364aac5547SJacek Kalwas SPDK_DEBUGLOG(bdev_malloc, "write %zu bytes to offset %#" PRIx64 ", iovcnt=%d\n", 437ad154aaeSKonrad Sztyber len, offset, bdev_io->u.bdev.iovcnt); 43807fe6a43SSeth Howell 43931d684d7SChangpeng Liu task->num_outstanding++; 44006fd87e4SKonrad Sztyber res = spdk_accel_append_copy(&bdev_io->u.bdev.accel_sequence, ch, &task->iov, 1, NULL, NULL, 44106fd87e4SKonrad Sztyber bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 44206fd87e4SKonrad Sztyber bdev_io->u.bdev.memory_domain, 44379e2a56fSKonrad Sztyber bdev_io->u.bdev.memory_domain_ctx, NULL, NULL); 44406fd87e4SKonrad Sztyber if (spdk_unlikely(res != 0)) { 44506fd87e4SKonrad Sztyber malloc_sequence_fail(task, res); 44606fd87e4SKonrad Sztyber return; 44707fe6a43SSeth Howell } 44807fe6a43SSeth Howell 44906fd87e4SKonrad Sztyber spdk_accel_sequence_finish(bdev_io->u.bdev.accel_sequence, malloc_sequence_done, task); 450aef00d44SShuhei Matsumoto 451ad154aaeSKonrad Sztyber if (bdev_io->u.bdev.md_buf == NULL) { 452aef00d44SShuhei Matsumoto return; 453aef00d44SShuhei Matsumoto } 454ad154aaeSKonrad Sztyber 4554aac5547SJacek Kalwas SPDK_DEBUGLOG(bdev_malloc, "write metadata %zu bytes to offset %#" PRIx64 "\n", 45639846f0fSKonrad Sztyber malloc_get_md_len(bdev_io), malloc_get_md_offset(bdev_io)); 457aef00d44SShuhei Matsumoto 458aef00d44SShuhei Matsumoto task->num_outstanding++; 45939846f0fSKonrad Sztyber res = spdk_accel_submit_copy(ch, malloc_get_md_buf(bdev_io), bdev_io->u.bdev.md_buf, 46079e2a56fSKonrad Sztyber malloc_get_md_len(bdev_io), malloc_done, task); 461aef00d44SShuhei Matsumoto if (res != 0) { 462aef00d44SShuhei Matsumoto malloc_done(task, res); 463aef00d44SShuhei Matsumoto } 46407fe6a43SSeth Howell } 46507fe6a43SSeth Howell 46607fe6a43SSeth Howell static int 46707fe6a43SSeth Howell bdev_malloc_unmap(struct malloc_disk *mdisk, 46807fe6a43SSeth Howell struct spdk_io_channel *ch, 46907fe6a43SSeth Howell struct malloc_task *task, 47007fe6a43SSeth Howell uint64_t offset, 47107fe6a43SSeth Howell uint64_t byte_count) 47207fe6a43SSeth Howell { 47307fe6a43SSeth Howell task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 47407fe6a43SSeth Howell task->num_outstanding = 1; 47507fe6a43SSeth Howell 476ee7e31f9Spaul luse return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0, 47779e2a56fSKonrad Sztyber byte_count, malloc_done, task); 47807fe6a43SSeth Howell } 47907fe6a43SSeth Howell 4801f47bbbaSEvgeniy Kochetov static void 4811f47bbbaSEvgeniy Kochetov bdev_malloc_copy(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 4821f47bbbaSEvgeniy Kochetov struct malloc_task *task, 4831f47bbbaSEvgeniy Kochetov uint64_t dst_offset, uint64_t src_offset, size_t len) 4841f47bbbaSEvgeniy Kochetov { 4851f47bbbaSEvgeniy Kochetov int64_t res = 0; 4861f47bbbaSEvgeniy Kochetov void *dst = mdisk->malloc_buf + dst_offset; 4871f47bbbaSEvgeniy Kochetov void *src = mdisk->malloc_buf + src_offset; 4881f47bbbaSEvgeniy Kochetov 4891f47bbbaSEvgeniy Kochetov SPDK_DEBUGLOG(bdev_malloc, "Copy %zu bytes from offset %#" PRIx64 " to offset %#" PRIx64 "\n", 4901f47bbbaSEvgeniy Kochetov len, src_offset, dst_offset); 4911f47bbbaSEvgeniy Kochetov 4921f47bbbaSEvgeniy Kochetov task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 4931f47bbbaSEvgeniy Kochetov task->num_outstanding = 1; 4941f47bbbaSEvgeniy Kochetov 49579e2a56fSKonrad Sztyber res = spdk_accel_submit_copy(ch, dst, src, len, malloc_done, task); 4961f47bbbaSEvgeniy Kochetov if (res != 0) { 4971f47bbbaSEvgeniy Kochetov malloc_done(task, res); 4981f47bbbaSEvgeniy Kochetov } 4991f47bbbaSEvgeniy Kochetov } 5001f47bbbaSEvgeniy Kochetov 5018dd1cd21SBen Walker static int 5028dd1cd21SBen Walker _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bdev_io) 50307fe6a43SSeth Howell { 504d8f63f39SKonrad Sztyber struct malloc_task *task = (struct malloc_task *)bdev_io->driver_ctx; 505d8f63f39SKonrad Sztyber struct malloc_disk *disk = bdev_io->bdev->ctxt; 50607fe6a43SSeth Howell uint32_t block_size = bdev_io->bdev->blocklen; 50700bff560SShuhei Matsumoto int rc; 50807fe6a43SSeth Howell 50907fe6a43SSeth Howell switch (bdev_io->type) { 51007fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_READ: 51107fe6a43SSeth Howell if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 51207fe6a43SSeth Howell assert(bdev_io->u.bdev.iovcnt == 1); 51392760190SKonrad Sztyber assert(bdev_io->u.bdev.memory_domain == NULL); 51407fe6a43SSeth Howell bdev_io->u.bdev.iovs[0].iov_base = 515d8f63f39SKonrad Sztyber disk->malloc_buf + bdev_io->u.bdev.offset_blocks * block_size; 51607fe6a43SSeth Howell bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 517b62aadf2SKonrad Sztyber if (spdk_bdev_is_md_separate(bdev_io->bdev)) { 518b62aadf2SKonrad Sztyber spdk_bdev_io_set_md_buf(bdev_io, malloc_get_md_buf(bdev_io), 519b62aadf2SKonrad Sztyber malloc_get_md_len(bdev_io)); 520b62aadf2SKonrad Sztyber } 521d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_SUCCESS); 52207fe6a43SSeth Howell return 0; 52307fe6a43SSeth Howell } 52407fe6a43SSeth Howell 525*e93f0f94SShuhei Matsumoto if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE && 526*e93f0f94SShuhei Matsumoto spdk_bdev_io_hide_metadata(bdev_io)) { 527*e93f0f94SShuhei Matsumoto rc = malloc_verify_pi_malloc_buf(bdev_io); 528*e93f0f94SShuhei Matsumoto if (rc != 0) { 529*e93f0f94SShuhei Matsumoto malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_FAILED); 530*e93f0f94SShuhei Matsumoto return 0; 531*e93f0f94SShuhei Matsumoto } 532*e93f0f94SShuhei Matsumoto } 533*e93f0f94SShuhei Matsumoto 5347ce4205aSKonrad Sztyber bdev_malloc_readv(disk, mch->accel_channel, task, bdev_io); 53507fe6a43SSeth Howell return 0; 53607fe6a43SSeth Howell 53707fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE: 538*e93f0f94SShuhei Matsumoto if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE && 539*e93f0f94SShuhei Matsumoto !spdk_bdev_io_hide_metadata(bdev_io)) { 540*e93f0f94SShuhei Matsumoto rc = malloc_verify_pi_io_buf(bdev_io); 54100bff560SShuhei Matsumoto if (rc != 0) { 542d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_FAILED); 54300bff560SShuhei Matsumoto return 0; 54400bff560SShuhei Matsumoto } 54500bff560SShuhei Matsumoto } 54600bff560SShuhei Matsumoto 547ad154aaeSKonrad Sztyber bdev_malloc_writev(disk, mch->accel_channel, task, bdev_io); 54807fe6a43SSeth Howell return 0; 54907fe6a43SSeth Howell 55007fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_RESET: 551d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_SUCCESS); 5520f0c16a7SKonrad Sztyber return 0; 55307fe6a43SSeth Howell 55407fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_FLUSH: 555d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_SUCCESS); 5560f0c16a7SKonrad Sztyber return 0; 55707fe6a43SSeth Howell 55807fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_UNMAP: 559d8f63f39SKonrad Sztyber return bdev_malloc_unmap(disk, mch->accel_channel, task, 56007fe6a43SSeth Howell bdev_io->u.bdev.offset_blocks * block_size, 56107fe6a43SSeth Howell bdev_io->u.bdev.num_blocks * block_size); 56207fe6a43SSeth Howell 56307fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 56407fe6a43SSeth Howell /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 565d8f63f39SKonrad Sztyber return bdev_malloc_unmap(disk, mch->accel_channel, task, 56607fe6a43SSeth Howell bdev_io->u.bdev.offset_blocks * block_size, 56707fe6a43SSeth Howell bdev_io->u.bdev.num_blocks * block_size); 56807fe6a43SSeth Howell 56907fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_ZCOPY: 57007fe6a43SSeth Howell if (bdev_io->u.bdev.zcopy.start) { 57107fe6a43SSeth Howell void *buf; 57207fe6a43SSeth Howell size_t len; 57307fe6a43SSeth Howell 574d8f63f39SKonrad Sztyber buf = disk->malloc_buf + bdev_io->u.bdev.offset_blocks * block_size; 57507fe6a43SSeth Howell len = bdev_io->u.bdev.num_blocks * block_size; 57607fe6a43SSeth Howell spdk_bdev_io_set_buf(bdev_io, buf, len); 577b62aadf2SKonrad Sztyber if (spdk_bdev_is_md_separate(bdev_io->bdev)) { 578b62aadf2SKonrad Sztyber spdk_bdev_io_set_md_buf(bdev_io, malloc_get_md_buf(bdev_io), 579b62aadf2SKonrad Sztyber malloc_get_md_len(bdev_io)); 580b62aadf2SKonrad Sztyber } 58107fe6a43SSeth Howell } 582d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_SUCCESS); 583e2d1dfb4SShuhei Matsumoto return 0; 584e2d1dfb4SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_ABORT: 585d8f63f39SKonrad Sztyber malloc_complete_task(task, mch, SPDK_BDEV_IO_STATUS_FAILED); 58607fe6a43SSeth Howell return 0; 5871f47bbbaSEvgeniy Kochetov case SPDK_BDEV_IO_TYPE_COPY: 588d8f63f39SKonrad Sztyber bdev_malloc_copy(disk, mch->accel_channel, task, 5891f47bbbaSEvgeniy Kochetov bdev_io->u.bdev.offset_blocks * block_size, 5901f47bbbaSEvgeniy Kochetov bdev_io->u.bdev.copy.src_offset_blocks * block_size, 5911f47bbbaSEvgeniy Kochetov bdev_io->u.bdev.num_blocks * block_size); 5921f47bbbaSEvgeniy Kochetov return 0; 5931f47bbbaSEvgeniy Kochetov 59407fe6a43SSeth Howell default: 59507fe6a43SSeth Howell return -1; 59607fe6a43SSeth Howell } 59707fe6a43SSeth Howell return 0; 59807fe6a43SSeth Howell } 59907fe6a43SSeth Howell 6008dd1cd21SBen Walker static void 6018dd1cd21SBen Walker bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 60207fe6a43SSeth Howell { 6032bb8a83eSKonrad Sztyber struct malloc_channel *mch = spdk_io_channel_get_ctx(ch); 6042bb8a83eSKonrad Sztyber 6052bb8a83eSKonrad Sztyber if (_bdev_malloc_submit_request(mch, bdev_io) != 0) { 6062bb8a83eSKonrad Sztyber malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch, 6072bb8a83eSKonrad Sztyber SPDK_BDEV_IO_STATUS_FAILED); 60807fe6a43SSeth Howell } 60907fe6a43SSeth Howell } 61007fe6a43SSeth Howell 61107fe6a43SSeth Howell static bool 61207fe6a43SSeth Howell bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 61307fe6a43SSeth Howell { 61407fe6a43SSeth Howell switch (io_type) { 61507fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_READ: 61607fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE: 61707fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_FLUSH: 61807fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_RESET: 61907fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_UNMAP: 62007fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 62107fe6a43SSeth Howell case SPDK_BDEV_IO_TYPE_ZCOPY: 622e2d1dfb4SShuhei Matsumoto case SPDK_BDEV_IO_TYPE_ABORT: 6231f47bbbaSEvgeniy Kochetov case SPDK_BDEV_IO_TYPE_COPY: 62407fe6a43SSeth Howell return true; 62507fe6a43SSeth Howell 62607fe6a43SSeth Howell default: 62707fe6a43SSeth Howell return false; 62807fe6a43SSeth Howell } 62907fe6a43SSeth Howell } 63007fe6a43SSeth Howell 63107fe6a43SSeth Howell static struct spdk_io_channel * 63207fe6a43SSeth Howell bdev_malloc_get_io_channel(void *ctx) 63307fe6a43SSeth Howell { 634fcd5f601SKonrad Sztyber return spdk_get_io_channel(&g_malloc_disks); 63507fe6a43SSeth Howell } 63607fe6a43SSeth Howell 63707fe6a43SSeth Howell static void 63807fe6a43SSeth Howell bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 63907fe6a43SSeth Howell { 64007fe6a43SSeth Howell spdk_json_write_object_begin(w); 64107fe6a43SSeth Howell 64207fe6a43SSeth Howell spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 64307fe6a43SSeth Howell 64407fe6a43SSeth Howell spdk_json_write_named_object_begin(w, "params"); 64507fe6a43SSeth Howell spdk_json_write_named_string(w, "name", bdev->name); 64607fe6a43SSeth Howell spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 64707fe6a43SSeth Howell spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 6481eb06bd6SPanfil, Wojciech spdk_json_write_named_uint32(w, "physical_block_size", bdev->phys_blocklen); 64969f9c9acSJim Harris spdk_json_write_named_uuid(w, "uuid", &bdev->uuid); 6505e1e850bSAlexey Marchuk spdk_json_write_named_uint32(w, "optimal_io_boundary", bdev->optimal_io_boundary); 651b1a1e099SShuhei Matsumoto spdk_json_write_named_uint32(w, "md_size", bdev->md_len); 652b1a1e099SShuhei Matsumoto spdk_json_write_named_uint32(w, "dif_type", bdev->dif_type); 653b1a1e099SShuhei Matsumoto spdk_json_write_named_bool(w, "dif_is_head_of_md", bdev->dif_is_head_of_md); 654c016e6ffSShuhei Matsumoto spdk_json_write_named_uint32(w, "dif_pi_format", bdev->dif_pi_format); 65507fe6a43SSeth Howell 65607fe6a43SSeth Howell spdk_json_write_object_end(w); 65707fe6a43SSeth Howell 65807fe6a43SSeth Howell spdk_json_write_object_end(w); 65907fe6a43SSeth Howell } 66007fe6a43SSeth Howell 66192760190SKonrad Sztyber static int 66292760190SKonrad Sztyber bdev_malloc_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) 66392760190SKonrad Sztyber { 66492760190SKonrad Sztyber struct malloc_disk *malloc_disk = ctx; 66592760190SKonrad Sztyber struct spdk_memory_domain *domain; 66692760190SKonrad Sztyber int num_domains = 0; 66792760190SKonrad Sztyber 66892760190SKonrad Sztyber if (malloc_disk->disk.dif_type != SPDK_DIF_DISABLE) { 66992760190SKonrad Sztyber return 0; 67092760190SKonrad Sztyber } 67192760190SKonrad Sztyber 67292760190SKonrad Sztyber /* Report support for every memory domain */ 67392760190SKonrad Sztyber for (domain = spdk_memory_domain_get_first(NULL); domain != NULL; 67492760190SKonrad Sztyber domain = spdk_memory_domain_get_next(domain, NULL)) { 67592760190SKonrad Sztyber if (domains != NULL && num_domains < array_size) { 67692760190SKonrad Sztyber domains[num_domains] = domain; 67792760190SKonrad Sztyber } 67892760190SKonrad Sztyber num_domains++; 67992760190SKonrad Sztyber } 68092760190SKonrad Sztyber 68192760190SKonrad Sztyber return num_domains; 68292760190SKonrad Sztyber } 68392760190SKonrad Sztyber 68497ce07c2SKonrad Sztyber static bool 68597ce07c2SKonrad Sztyber bdev_malloc_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type) 68697ce07c2SKonrad Sztyber { 68797ce07c2SKonrad Sztyber switch (type) { 68897ce07c2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_READ: 68997ce07c2SKonrad Sztyber case SPDK_BDEV_IO_TYPE_WRITE: 69097ce07c2SKonrad Sztyber return true; 69197ce07c2SKonrad Sztyber default: 69297ce07c2SKonrad Sztyber return false; 69397ce07c2SKonrad Sztyber } 69497ce07c2SKonrad Sztyber } 69597ce07c2SKonrad Sztyber 69607fe6a43SSeth Howell static const struct spdk_bdev_fn_table malloc_fn_table = { 69707fe6a43SSeth Howell .destruct = bdev_malloc_destruct, 69807fe6a43SSeth Howell .submit_request = bdev_malloc_submit_request, 69907fe6a43SSeth Howell .io_type_supported = bdev_malloc_io_type_supported, 70007fe6a43SSeth Howell .get_io_channel = bdev_malloc_get_io_channel, 70107fe6a43SSeth Howell .write_config_json = bdev_malloc_write_json_config, 70292760190SKonrad Sztyber .get_memory_domains = bdev_malloc_get_memory_domains, 70397ce07c2SKonrad Sztyber .accel_sequence_supported = bdev_malloc_accel_sequence_supported, 70407fe6a43SSeth Howell }; 70507fe6a43SSeth Howell 70600bff560SShuhei Matsumoto static int 70700bff560SShuhei Matsumoto malloc_disk_setup_pi(struct malloc_disk *mdisk) 70800bff560SShuhei Matsumoto { 70900bff560SShuhei Matsumoto struct spdk_bdev *bdev = &mdisk->disk; 71000bff560SShuhei Matsumoto struct spdk_dif_ctx dif_ctx; 71100bff560SShuhei Matsumoto struct iovec iov, md_iov; 712f5304d66SShuhei Matsumoto uint32_t dif_check_flags; 71300bff560SShuhei Matsumoto int rc; 714a711d629SSlawomir Ptak struct spdk_dif_ctx_init_ext_opts dif_opts; 71500bff560SShuhei Matsumoto 7165681a8a6SKonrad Sztyber dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 717fcf59a6fSShuhei Matsumoto dif_opts.dif_pi_format = bdev->dif_pi_format; 718e1f15ba5SChangpeng Liu /* Set APPTAG|REFTAG_IGNORE to PI fields after creation of malloc bdev */ 719f5304d66SShuhei Matsumoto dif_check_flags = bdev->dif_check_flags | SPDK_DIF_CHECK_TYPE_REFTAG | 720f5304d66SShuhei Matsumoto SPDK_DIF_FLAGS_APPTAG_CHECK; 72100bff560SShuhei Matsumoto rc = spdk_dif_ctx_init(&dif_ctx, 72200bff560SShuhei Matsumoto bdev->blocklen, 72300bff560SShuhei Matsumoto bdev->md_len, 72400bff560SShuhei Matsumoto bdev->md_interleave, 72500bff560SShuhei Matsumoto bdev->dif_is_head_of_md, 72600bff560SShuhei Matsumoto bdev->dif_type, 727f5304d66SShuhei Matsumoto dif_check_flags, 728e1f15ba5SChangpeng Liu SPDK_DIF_REFTAG_IGNORE, 729e1f15ba5SChangpeng Liu 0xFFFF, SPDK_DIF_APPTAG_IGNORE, 730e1f15ba5SChangpeng Liu 0, 0, &dif_opts); 73100bff560SShuhei Matsumoto if (rc != 0) { 73200bff560SShuhei Matsumoto SPDK_ERRLOG("Initialization of DIF/DIX context failed\n"); 73300bff560SShuhei Matsumoto return rc; 73400bff560SShuhei Matsumoto } 73500bff560SShuhei Matsumoto 73600bff560SShuhei Matsumoto iov.iov_base = mdisk->malloc_buf; 73700bff560SShuhei Matsumoto iov.iov_len = bdev->blockcnt * bdev->blocklen; 73800bff560SShuhei Matsumoto 73900bff560SShuhei Matsumoto if (mdisk->disk.md_interleave) { 74000bff560SShuhei Matsumoto rc = spdk_dif_generate(&iov, 1, bdev->blockcnt, &dif_ctx); 74100bff560SShuhei Matsumoto } else { 74200bff560SShuhei Matsumoto md_iov.iov_base = mdisk->malloc_md_buf; 74300bff560SShuhei Matsumoto md_iov.iov_len = bdev->blockcnt * bdev->md_len; 74400bff560SShuhei Matsumoto 74500bff560SShuhei Matsumoto rc = spdk_dix_generate(&iov, 1, &md_iov, bdev->blockcnt, &dif_ctx); 74600bff560SShuhei Matsumoto } 74700bff560SShuhei Matsumoto 74800bff560SShuhei Matsumoto if (rc != 0) { 74900bff560SShuhei Matsumoto SPDK_ERRLOG("Formatting by DIF/DIX failed\n"); 75000bff560SShuhei Matsumoto } 75100bff560SShuhei Matsumoto 75200bff560SShuhei Matsumoto return rc; 75300bff560SShuhei Matsumoto } 75400bff560SShuhei Matsumoto 75507fe6a43SSeth Howell int 756e6b2b907SShuhei Matsumoto create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts) 75707fe6a43SSeth Howell { 75807fe6a43SSeth Howell struct malloc_disk *mdisk; 759aef00d44SShuhei Matsumoto uint32_t block_size; 76007fe6a43SSeth Howell int rc; 76107fe6a43SSeth Howell 762e6b2b907SShuhei Matsumoto assert(opts != NULL); 763e6b2b907SShuhei Matsumoto 764e6b2b907SShuhei Matsumoto if (opts->num_blocks == 0) { 76507fe6a43SSeth Howell SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 76607fe6a43SSeth Howell return -EINVAL; 76707fe6a43SSeth Howell } 76807fe6a43SSeth Howell 769e6b2b907SShuhei Matsumoto if (opts->block_size % 512) { 770aef00d44SShuhei Matsumoto SPDK_ERRLOG("Data block size must be 512 bytes aligned\n"); 771b5fe146aSChangpeng Liu return -EINVAL; 772b5fe146aSChangpeng Liu } 773b5fe146aSChangpeng Liu 7741eb06bd6SPanfil, Wojciech if (opts->physical_block_size % 512) { 7751eb06bd6SPanfil, Wojciech SPDK_ERRLOG("Physical block must be 512 bytes aligned\n"); 7761eb06bd6SPanfil, Wojciech return -EINVAL; 7771eb06bd6SPanfil, Wojciech } 7781eb06bd6SPanfil, Wojciech 779aef00d44SShuhei Matsumoto switch (opts->md_size) { 780aef00d44SShuhei Matsumoto case 0: 781aef00d44SShuhei Matsumoto case 8: 782aef00d44SShuhei Matsumoto case 16: 783aef00d44SShuhei Matsumoto case 32: 784aef00d44SShuhei Matsumoto case 64: 785aef00d44SShuhei Matsumoto case 128: 786aef00d44SShuhei Matsumoto break; 787aef00d44SShuhei Matsumoto default: 788aef00d44SShuhei Matsumoto SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size); 789aef00d44SShuhei Matsumoto return -EINVAL; 790aef00d44SShuhei Matsumoto } 791aef00d44SShuhei Matsumoto 792aef00d44SShuhei Matsumoto if (opts->md_interleave) { 793aef00d44SShuhei Matsumoto block_size = opts->block_size + opts->md_size; 794aef00d44SShuhei Matsumoto } else { 795aef00d44SShuhei Matsumoto block_size = opts->block_size; 796aef00d44SShuhei Matsumoto } 797aef00d44SShuhei Matsumoto 79807fe6a43SSeth Howell mdisk = calloc(1, sizeof(*mdisk)); 79907fe6a43SSeth Howell if (!mdisk) { 80007fe6a43SSeth Howell SPDK_ERRLOG("mdisk calloc() failed\n"); 80107fe6a43SSeth Howell return -ENOMEM; 80207fe6a43SSeth Howell } 80307fe6a43SSeth Howell 80407fe6a43SSeth Howell /* 80507fe6a43SSeth Howell * Allocate the large backend memory buffer from pinned memory. 80607fe6a43SSeth Howell * 80707fe6a43SSeth Howell * TODO: need to pass a hint so we know which socket to allocate 80807fe6a43SSeth Howell * from on multi-socket systems. 80907fe6a43SSeth Howell */ 810aef00d44SShuhei Matsumoto mdisk->malloc_buf = spdk_zmalloc(opts->num_blocks * block_size, 2 * 1024 * 1024, NULL, 81107fe6a43SSeth Howell SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 81207fe6a43SSeth Howell if (!mdisk->malloc_buf) { 81307fe6a43SSeth Howell SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 81407fe6a43SSeth Howell malloc_disk_free(mdisk); 81507fe6a43SSeth Howell return -ENOMEM; 81607fe6a43SSeth Howell } 81707fe6a43SSeth Howell 818aef00d44SShuhei Matsumoto if (!opts->md_interleave && opts->md_size != 0) { 819aef00d44SShuhei Matsumoto mdisk->malloc_md_buf = spdk_zmalloc(opts->num_blocks * opts->md_size, 2 * 1024 * 1024, NULL, 820aef00d44SShuhei Matsumoto SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 821aef00d44SShuhei Matsumoto if (!mdisk->malloc_md_buf) { 822aef00d44SShuhei Matsumoto SPDK_ERRLOG("malloc_md_buf spdk_zmalloc() failed\n"); 823aef00d44SShuhei Matsumoto malloc_disk_free(mdisk); 824aef00d44SShuhei Matsumoto return -ENOMEM; 825aef00d44SShuhei Matsumoto } 826aef00d44SShuhei Matsumoto } 827aef00d44SShuhei Matsumoto 828e6b2b907SShuhei Matsumoto if (opts->name) { 829e6b2b907SShuhei Matsumoto mdisk->disk.name = strdup(opts->name); 83007fe6a43SSeth Howell } else { 83107fe6a43SSeth Howell /* Auto-generate a name */ 83207fe6a43SSeth Howell mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 83307fe6a43SSeth Howell malloc_disk_count++; 83407fe6a43SSeth Howell } 83507fe6a43SSeth Howell if (!mdisk->disk.name) { 83607fe6a43SSeth Howell malloc_disk_free(mdisk); 83707fe6a43SSeth Howell return -ENOMEM; 83807fe6a43SSeth Howell } 83907fe6a43SSeth Howell mdisk->disk.product_name = "Malloc disk"; 84007fe6a43SSeth Howell 84107fe6a43SSeth Howell mdisk->disk.write_cache = 1; 842aef00d44SShuhei Matsumoto mdisk->disk.blocklen = block_size; 8431eb06bd6SPanfil, Wojciech mdisk->disk.phys_blocklen = opts->physical_block_size; 844e6b2b907SShuhei Matsumoto mdisk->disk.blockcnt = opts->num_blocks; 845aef00d44SShuhei Matsumoto mdisk->disk.md_len = opts->md_size; 846aef00d44SShuhei Matsumoto mdisk->disk.md_interleave = opts->md_interleave; 84700bff560SShuhei Matsumoto mdisk->disk.dif_type = opts->dif_type; 84800bff560SShuhei Matsumoto mdisk->disk.dif_is_head_of_md = opts->dif_is_head_of_md; 84900bff560SShuhei Matsumoto /* Current block device layer API does not propagate 85000bff560SShuhei Matsumoto * any DIF related information from user. So, we can 85100bff560SShuhei Matsumoto * not generate or verify Application Tag. 85200bff560SShuhei Matsumoto */ 85300bff560SShuhei Matsumoto switch (opts->dif_type) { 85400bff560SShuhei Matsumoto case SPDK_DIF_TYPE1: 85500bff560SShuhei Matsumoto case SPDK_DIF_TYPE2: 85600bff560SShuhei Matsumoto mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK | 85700bff560SShuhei Matsumoto SPDK_DIF_FLAGS_REFTAG_CHECK; 85800bff560SShuhei Matsumoto break; 85900bff560SShuhei Matsumoto case SPDK_DIF_TYPE3: 86000bff560SShuhei Matsumoto mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK; 86100bff560SShuhei Matsumoto break; 86200bff560SShuhei Matsumoto case SPDK_DIF_DISABLE: 86300bff560SShuhei Matsumoto break; 86400bff560SShuhei Matsumoto } 865c016e6ffSShuhei Matsumoto mdisk->disk.dif_pi_format = opts->dif_pi_format; 86600bff560SShuhei Matsumoto 86700bff560SShuhei Matsumoto if (opts->dif_type != SPDK_DIF_DISABLE) { 86800bff560SShuhei Matsumoto rc = malloc_disk_setup_pi(mdisk); 86900bff560SShuhei Matsumoto if (rc) { 87000bff560SShuhei Matsumoto SPDK_ERRLOG("Failed to set up protection information.\n"); 87100bff560SShuhei Matsumoto malloc_disk_free(mdisk); 87200bff560SShuhei Matsumoto return rc; 87300bff560SShuhei Matsumoto } 87400bff560SShuhei Matsumoto } 87500bff560SShuhei Matsumoto 876e6b2b907SShuhei Matsumoto if (opts->optimal_io_boundary) { 877e6b2b907SShuhei Matsumoto mdisk->disk.optimal_io_boundary = opts->optimal_io_boundary; 8785e1e850bSAlexey Marchuk mdisk->disk.split_on_optimal_io_boundary = true; 8795e1e850bSAlexey Marchuk } 88095a367d6SArtur Paszkiewicz if (!spdk_uuid_is_null(&opts->uuid)) { 881e6b2b907SShuhei Matsumoto spdk_uuid_copy(&mdisk->disk.uuid, &opts->uuid); 88207fe6a43SSeth Howell } 88307fe6a43SSeth Howell 8841f47bbbaSEvgeniy Kochetov mdisk->disk.max_copy = 0; 88507fe6a43SSeth Howell mdisk->disk.ctxt = mdisk; 88607fe6a43SSeth Howell mdisk->disk.fn_table = &malloc_fn_table; 88707fe6a43SSeth Howell mdisk->disk.module = &malloc_if; 88807fe6a43SSeth Howell 88907fe6a43SSeth Howell rc = spdk_bdev_register(&mdisk->disk); 89007fe6a43SSeth Howell if (rc) { 89107fe6a43SSeth Howell malloc_disk_free(mdisk); 89207fe6a43SSeth Howell return rc; 89307fe6a43SSeth Howell } 89407fe6a43SSeth Howell 89507fe6a43SSeth Howell *bdev = &(mdisk->disk); 89607fe6a43SSeth Howell 89707fe6a43SSeth Howell TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 89807fe6a43SSeth Howell 89907fe6a43SSeth Howell return rc; 90007fe6a43SSeth Howell } 90107fe6a43SSeth Howell 90207fe6a43SSeth Howell void 9034573e4ccSShuhei Matsumoto delete_malloc_disk(const char *name, spdk_delete_malloc_complete cb_fn, void *cb_arg) 90407fe6a43SSeth Howell { 9054573e4ccSShuhei Matsumoto int rc; 90607fe6a43SSeth Howell 9074573e4ccSShuhei Matsumoto rc = spdk_bdev_unregister_by_name(name, &malloc_if, cb_fn, cb_arg); 9084573e4ccSShuhei Matsumoto if (rc != 0) { 9094573e4ccSShuhei Matsumoto cb_fn(cb_arg, rc); 9104573e4ccSShuhei Matsumoto } 91107fe6a43SSeth Howell } 91207fe6a43SSeth Howell 913fcd5f601SKonrad Sztyber static int 9140a49fbd2SKonrad Sztyber malloc_completion_poller(void *ctx) 9150a49fbd2SKonrad Sztyber { 9160a49fbd2SKonrad Sztyber struct malloc_channel *ch = ctx; 9170a49fbd2SKonrad Sztyber struct malloc_task *task; 9180a49fbd2SKonrad Sztyber TAILQ_HEAD(, malloc_task) completed_tasks; 9190a49fbd2SKonrad Sztyber uint32_t num_completions = 0; 9200a49fbd2SKonrad Sztyber 9210a49fbd2SKonrad Sztyber TAILQ_INIT(&completed_tasks); 9220a49fbd2SKonrad Sztyber TAILQ_SWAP(&completed_tasks, &ch->completed_tasks, malloc_task, tailq); 9230a49fbd2SKonrad Sztyber 9240a49fbd2SKonrad Sztyber while (!TAILQ_EMPTY(&completed_tasks)) { 9250a49fbd2SKonrad Sztyber task = TAILQ_FIRST(&completed_tasks); 9260a49fbd2SKonrad Sztyber TAILQ_REMOVE(&completed_tasks, task, tailq); 9270a49fbd2SKonrad Sztyber spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 9280a49fbd2SKonrad Sztyber num_completions++; 9290a49fbd2SKonrad Sztyber } 9300a49fbd2SKonrad Sztyber 9310a49fbd2SKonrad Sztyber return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; 9320a49fbd2SKonrad Sztyber } 9330a49fbd2SKonrad Sztyber 9340a49fbd2SKonrad Sztyber static int 935fcd5f601SKonrad Sztyber malloc_create_channel_cb(void *io_device, void *ctx) 936fcd5f601SKonrad Sztyber { 937fcd5f601SKonrad Sztyber struct malloc_channel *ch = ctx; 938fcd5f601SKonrad Sztyber 93934c48f1bSBen Walker ch->accel_channel = spdk_accel_get_io_channel(); 940fcd5f601SKonrad Sztyber if (!ch->accel_channel) { 94134c48f1bSBen Walker SPDK_ERRLOG("Failed to get accel framework's IO channel\n"); 942fcd5f601SKonrad Sztyber return -ENOMEM; 943fcd5f601SKonrad Sztyber } 944fcd5f601SKonrad Sztyber 9450a49fbd2SKonrad Sztyber ch->completion_poller = SPDK_POLLER_REGISTER(malloc_completion_poller, ch, 0); 9460a49fbd2SKonrad Sztyber if (!ch->completion_poller) { 9470a49fbd2SKonrad Sztyber SPDK_ERRLOG("Failed to register malloc completion poller\n"); 9480a49fbd2SKonrad Sztyber spdk_put_io_channel(ch->accel_channel); 9490a49fbd2SKonrad Sztyber return -ENOMEM; 9500a49fbd2SKonrad Sztyber } 9510a49fbd2SKonrad Sztyber 9520a49fbd2SKonrad Sztyber TAILQ_INIT(&ch->completed_tasks); 9530a49fbd2SKonrad Sztyber 954fcd5f601SKonrad Sztyber return 0; 955fcd5f601SKonrad Sztyber } 956fcd5f601SKonrad Sztyber 957fcd5f601SKonrad Sztyber static void 958fcd5f601SKonrad Sztyber malloc_destroy_channel_cb(void *io_device, void *ctx) 959fcd5f601SKonrad Sztyber { 960fcd5f601SKonrad Sztyber struct malloc_channel *ch = ctx; 961fcd5f601SKonrad Sztyber 9620a49fbd2SKonrad Sztyber assert(TAILQ_EMPTY(&ch->completed_tasks)); 9630a49fbd2SKonrad Sztyber 964fcd5f601SKonrad Sztyber spdk_put_io_channel(ch->accel_channel); 9650a49fbd2SKonrad Sztyber spdk_poller_unregister(&ch->completion_poller); 966fcd5f601SKonrad Sztyber } 967fcd5f601SKonrad Sztyber 9688dd1cd21SBen Walker static int 9698dd1cd21SBen Walker bdev_malloc_initialize(void) 97007fe6a43SSeth Howell { 97198fde363STomasz Zawadzki /* This needs to be reset for each reinitialization of submodules. 97298fde363STomasz Zawadzki * Otherwise after enough devices or reinitializations the value gets too high. 97398fde363STomasz Zawadzki * TODO: Make malloc bdev name mandatory and remove this counter. */ 974ef7c128aSXiaodong Liu malloc_disk_count = 0; 975fcd5f601SKonrad Sztyber 976fcd5f601SKonrad Sztyber spdk_io_device_register(&g_malloc_disks, malloc_create_channel_cb, 977fcd5f601SKonrad Sztyber malloc_destroy_channel_cb, sizeof(struct malloc_channel), 978fcd5f601SKonrad Sztyber "bdev_malloc"); 979fcd5f601SKonrad Sztyber 98098fde363STomasz Zawadzki return 0; 98107fe6a43SSeth Howell } 98207fe6a43SSeth Howell 983fcd5f601SKonrad Sztyber static void 984fcd5f601SKonrad Sztyber bdev_malloc_deinitialize(void) 985fcd5f601SKonrad Sztyber { 986fcd5f601SKonrad Sztyber spdk_io_device_unregister(&g_malloc_disks, NULL); 987fcd5f601SKonrad Sztyber } 988fcd5f601SKonrad Sztyber 9892172c432STomasz Zawadzki SPDK_LOG_REGISTER_COMPONENT(bdev_malloc) 990