1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "bdev_malloc.h" 38 #include "spdk/bdev.h" 39 #include "spdk/endian.h" 40 #include "spdk/env.h" 41 #include "spdk/accel_engine.h" 42 #include "spdk/json.h" 43 #include "spdk/thread.h" 44 #include "spdk/queue.h" 45 #include "spdk/string.h" 46 47 #include "spdk/bdev_module.h" 48 #include "spdk/log.h" 49 50 struct malloc_disk { 51 struct spdk_bdev disk; 52 void *malloc_buf; 53 TAILQ_ENTRY(malloc_disk) link; 54 }; 55 56 struct malloc_task { 57 int num_outstanding; 58 enum spdk_bdev_io_status status; 59 }; 60 61 static void 62 malloc_done(void *ref, int status) 63 { 64 struct malloc_task *task = (struct malloc_task *)ref; 65 66 if (status != 0) { 67 if (status == -ENOMEM) { 68 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 69 } else { 70 task->status = SPDK_BDEV_IO_STATUS_FAILED; 71 } 72 } 73 74 if (--task->num_outstanding == 0) { 75 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 76 } 77 } 78 79 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 80 81 int malloc_disk_count = 0; 82 83 static int bdev_malloc_initialize(void); 84 85 static int 86 bdev_malloc_get_ctx_size(void) 87 { 88 return sizeof(struct malloc_task); 89 } 90 91 static struct spdk_bdev_module malloc_if = { 92 .name = "malloc", 93 .module_init = bdev_malloc_initialize, 94 .get_ctx_size = bdev_malloc_get_ctx_size, 95 96 }; 97 98 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 99 100 static void 101 malloc_disk_free(struct malloc_disk *malloc_disk) 102 { 103 if (!malloc_disk) { 104 return; 105 } 106 107 free(malloc_disk->disk.name); 108 spdk_free(malloc_disk->malloc_buf); 109 free(malloc_disk); 110 } 111 112 static int 113 bdev_malloc_destruct(void *ctx) 114 { 115 struct malloc_disk *malloc_disk = ctx; 116 117 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 118 malloc_disk_free(malloc_disk); 119 return 0; 120 } 121 122 static int 123 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 124 { 125 int i; 126 127 for (i = 0; i < iovcnt; i++) { 128 if (nbytes < iovs[i].iov_len) { 129 return 0; 130 } 131 132 nbytes -= iovs[i].iov_len; 133 } 134 135 return nbytes != 0; 136 } 137 138 static void 139 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 140 struct malloc_task *task, 141 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 142 { 143 int64_t res = 0; 144 void *src = mdisk->malloc_buf + offset; 145 int i; 146 147 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 148 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 149 SPDK_BDEV_IO_STATUS_FAILED); 150 return; 151 } 152 153 SPDK_DEBUGLOG(bdev_malloc, "read %zu bytes from offset %#" PRIx64 "\n", 154 len, offset); 155 156 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 157 task->num_outstanding = iovcnt; 158 159 for (i = 0; i < iovcnt; i++) { 160 res = spdk_accel_submit_copy(ch, iov[i].iov_base, 161 src, iov[i].iov_len, malloc_done, task); 162 163 if (res != 0) { 164 malloc_done(task, res); 165 } 166 167 src += iov[i].iov_len; 168 len -= iov[i].iov_len; 169 } 170 } 171 172 static void 173 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 174 struct malloc_task *task, 175 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 176 { 177 int64_t res = 0; 178 void *dst = mdisk->malloc_buf + offset; 179 int i; 180 181 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 182 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 183 SPDK_BDEV_IO_STATUS_FAILED); 184 return; 185 } 186 187 SPDK_DEBUGLOG(bdev_malloc, "wrote %zu bytes to offset %#" PRIx64 "\n", 188 len, offset); 189 190 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 191 task->num_outstanding = iovcnt; 192 193 for (i = 0; i < iovcnt; i++) { 194 res = spdk_accel_submit_copy(ch, dst, iov[i].iov_base, 195 iov[i].iov_len, malloc_done, task); 196 197 if (res != 0) { 198 malloc_done(task, res); 199 } 200 201 dst += iov[i].iov_len; 202 } 203 } 204 205 static int 206 bdev_malloc_unmap(struct malloc_disk *mdisk, 207 struct spdk_io_channel *ch, 208 struct malloc_task *task, 209 uint64_t offset, 210 uint64_t byte_count) 211 { 212 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 213 task->num_outstanding = 1; 214 215 return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0, 216 byte_count, malloc_done, task); 217 } 218 219 static int64_t 220 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task, 221 uint64_t offset, uint64_t nbytes) 222 { 223 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 224 225 return 0; 226 } 227 228 static int 229 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task) 230 { 231 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 232 233 return 0; 234 } 235 236 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 237 { 238 uint32_t block_size = bdev_io->bdev->blocklen; 239 240 switch (bdev_io->type) { 241 case SPDK_BDEV_IO_TYPE_READ: 242 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 243 assert(bdev_io->u.bdev.iovcnt == 1); 244 bdev_io->u.bdev.iovs[0].iov_base = 245 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 246 bdev_io->u.bdev.offset_blocks * block_size; 247 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 248 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 249 return 0; 250 } 251 252 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 253 ch, 254 (struct malloc_task *)bdev_io->driver_ctx, 255 bdev_io->u.bdev.iovs, 256 bdev_io->u.bdev.iovcnt, 257 bdev_io->u.bdev.num_blocks * block_size, 258 bdev_io->u.bdev.offset_blocks * block_size); 259 return 0; 260 261 case SPDK_BDEV_IO_TYPE_WRITE: 262 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 263 ch, 264 (struct malloc_task *)bdev_io->driver_ctx, 265 bdev_io->u.bdev.iovs, 266 bdev_io->u.bdev.iovcnt, 267 bdev_io->u.bdev.num_blocks * block_size, 268 bdev_io->u.bdev.offset_blocks * block_size); 269 return 0; 270 271 case SPDK_BDEV_IO_TYPE_RESET: 272 return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt, 273 (struct malloc_task *)bdev_io->driver_ctx); 274 275 case SPDK_BDEV_IO_TYPE_FLUSH: 276 return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt, 277 (struct malloc_task *)bdev_io->driver_ctx, 278 bdev_io->u.bdev.offset_blocks * block_size, 279 bdev_io->u.bdev.num_blocks * block_size); 280 281 case SPDK_BDEV_IO_TYPE_UNMAP: 282 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 283 ch, 284 (struct malloc_task *)bdev_io->driver_ctx, 285 bdev_io->u.bdev.offset_blocks * block_size, 286 bdev_io->u.bdev.num_blocks * block_size); 287 288 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 289 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 290 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 291 ch, 292 (struct malloc_task *)bdev_io->driver_ctx, 293 bdev_io->u.bdev.offset_blocks * block_size, 294 bdev_io->u.bdev.num_blocks * block_size); 295 296 case SPDK_BDEV_IO_TYPE_ZCOPY: 297 if (bdev_io->u.bdev.zcopy.start) { 298 void *buf; 299 size_t len; 300 301 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 302 bdev_io->u.bdev.offset_blocks * block_size; 303 len = bdev_io->u.bdev.num_blocks * block_size; 304 spdk_bdev_io_set_buf(bdev_io, buf, len); 305 306 } 307 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 308 return 0; 309 case SPDK_BDEV_IO_TYPE_ABORT: 310 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 311 return 0; 312 default: 313 return -1; 314 } 315 return 0; 316 } 317 318 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 319 { 320 if (_bdev_malloc_submit_request(ch, bdev_io) != 0) { 321 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 322 } 323 } 324 325 static bool 326 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 327 { 328 switch (io_type) { 329 case SPDK_BDEV_IO_TYPE_READ: 330 case SPDK_BDEV_IO_TYPE_WRITE: 331 case SPDK_BDEV_IO_TYPE_FLUSH: 332 case SPDK_BDEV_IO_TYPE_RESET: 333 case SPDK_BDEV_IO_TYPE_UNMAP: 334 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 335 case SPDK_BDEV_IO_TYPE_ZCOPY: 336 case SPDK_BDEV_IO_TYPE_ABORT: 337 return true; 338 339 default: 340 return false; 341 } 342 } 343 344 static struct spdk_io_channel * 345 bdev_malloc_get_io_channel(void *ctx) 346 { 347 return spdk_accel_engine_get_io_channel(); 348 } 349 350 static void 351 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 352 { 353 char uuid_str[SPDK_UUID_STRING_LEN]; 354 355 spdk_json_write_object_begin(w); 356 357 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 358 359 spdk_json_write_named_object_begin(w, "params"); 360 spdk_json_write_named_string(w, "name", bdev->name); 361 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 362 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 363 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 364 spdk_json_write_named_string(w, "uuid", uuid_str); 365 spdk_json_write_named_uint32(w, "optimal_io_boundary", bdev->optimal_io_boundary); 366 367 spdk_json_write_object_end(w); 368 369 spdk_json_write_object_end(w); 370 } 371 372 static const struct spdk_bdev_fn_table malloc_fn_table = { 373 .destruct = bdev_malloc_destruct, 374 .submit_request = bdev_malloc_submit_request, 375 .io_type_supported = bdev_malloc_io_type_supported, 376 .get_io_channel = bdev_malloc_get_io_channel, 377 .write_config_json = bdev_malloc_write_json_config, 378 }; 379 380 int 381 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid, 382 uint64_t num_blocks, uint32_t block_size, uint32_t optimal_io_boundary) 383 { 384 struct malloc_disk *mdisk; 385 int rc; 386 387 if (num_blocks == 0) { 388 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 389 return -EINVAL; 390 } 391 392 if (block_size % 512) { 393 SPDK_ERRLOG("block size must be 512 bytes aligned\n"); 394 return -EINVAL; 395 } 396 397 mdisk = calloc(1, sizeof(*mdisk)); 398 if (!mdisk) { 399 SPDK_ERRLOG("mdisk calloc() failed\n"); 400 return -ENOMEM; 401 } 402 403 /* 404 * Allocate the large backend memory buffer from pinned memory. 405 * 406 * TODO: need to pass a hint so we know which socket to allocate 407 * from on multi-socket systems. 408 */ 409 mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL, 410 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 411 if (!mdisk->malloc_buf) { 412 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 413 malloc_disk_free(mdisk); 414 return -ENOMEM; 415 } 416 417 if (name) { 418 mdisk->disk.name = strdup(name); 419 } else { 420 /* Auto-generate a name */ 421 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 422 malloc_disk_count++; 423 } 424 if (!mdisk->disk.name) { 425 malloc_disk_free(mdisk); 426 return -ENOMEM; 427 } 428 mdisk->disk.product_name = "Malloc disk"; 429 430 mdisk->disk.write_cache = 1; 431 mdisk->disk.blocklen = block_size; 432 mdisk->disk.blockcnt = num_blocks; 433 if (optimal_io_boundary) { 434 mdisk->disk.optimal_io_boundary = optimal_io_boundary; 435 mdisk->disk.split_on_optimal_io_boundary = true; 436 } 437 if (uuid) { 438 mdisk->disk.uuid = *uuid; 439 } else { 440 spdk_uuid_generate(&mdisk->disk.uuid); 441 } 442 443 mdisk->disk.ctxt = mdisk; 444 mdisk->disk.fn_table = &malloc_fn_table; 445 mdisk->disk.module = &malloc_if; 446 447 rc = spdk_bdev_register(&mdisk->disk); 448 if (rc) { 449 malloc_disk_free(mdisk); 450 return rc; 451 } 452 453 *bdev = &(mdisk->disk); 454 455 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 456 457 return rc; 458 } 459 460 void 461 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg) 462 { 463 if (!bdev || bdev->module != &malloc_if) { 464 cb_fn(cb_arg, -ENODEV); 465 return; 466 } 467 468 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 469 } 470 471 static int bdev_malloc_initialize(void) 472 { 473 /* This needs to be reset for each reinitialization of submodules. 474 * Otherwise after enough devices or reinitializations the value gets too high. 475 * TODO: Make malloc bdev name mandatory and remove this counter. */ 476 malloc_disk_count = 0; 477 return 0; 478 } 479 480 SPDK_LOG_REGISTER_COMPONENT(bdev_malloc) 481