1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_malloc.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/endian.h" 40 #include "spdk/env.h" 41 #include "spdk/accel_engine.h" 42 #include "spdk/json.h" 43 #include "spdk/thread.h" 44 #include "spdk/queue.h" 45 #include "spdk/string.h" 46 47 #include "spdk/bdev_module.h" 48 #include "spdk_internal/log.h" 49 50 struct malloc_disk { 51 struct spdk_bdev disk; 52 void *malloc_buf; 53 TAILQ_ENTRY(malloc_disk) link; 54 }; 55 56 struct malloc_task { 57 int num_outstanding; 58 enum spdk_bdev_io_status status; 59 }; 60 61 static struct malloc_task * 62 __malloc_task_from_accel_task(struct spdk_accel_task *ct) 63 { 64 return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task)); 65 } 66 67 static struct spdk_accel_task * 68 __accel_task_from_malloc_task(struct malloc_task *mt) 69 { 70 return (struct spdk_accel_task *)((uintptr_t)mt + sizeof(struct malloc_task)); 71 } 72 73 static void 74 malloc_done(void *ref, int status) 75 { 76 struct malloc_task *task = __malloc_task_from_accel_task(ref); 77 78 if (status != 0) { 79 if (status == -ENOMEM) { 80 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 81 } else { 82 task->status = SPDK_BDEV_IO_STATUS_FAILED; 83 } 84 } 85 86 if (--task->num_outstanding == 0) { 87 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 88 } 89 } 90 91 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 92 93 int malloc_disk_count = 0; 94 95 static int bdev_malloc_initialize(void); 96 static void bdev_malloc_get_spdk_running_config(FILE *fp); 97 98 static int 99 bdev_malloc_get_ctx_size(void) 100 { 101 return sizeof(struct malloc_task) + spdk_accel_task_size(); 102 } 103 104 static struct spdk_bdev_module malloc_if = { 105 .name = "malloc", 106 .module_init = bdev_malloc_initialize, 107 .config_text = bdev_malloc_get_spdk_running_config, 108 .get_ctx_size = bdev_malloc_get_ctx_size, 109 110 }; 111 112 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 113 114 static void 115 malloc_disk_free(struct malloc_disk *malloc_disk) 116 { 117 if (!malloc_disk) { 118 return; 119 } 120 121 free(malloc_disk->disk.name); 122 spdk_free(malloc_disk->malloc_buf); 123 free(malloc_disk); 124 } 125 126 static int 127 bdev_malloc_destruct(void *ctx) 128 { 129 struct malloc_disk *malloc_disk = ctx; 130 131 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 132 malloc_disk_free(malloc_disk); 133 return 0; 134 } 135 136 static int 137 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 138 { 139 int i; 140 141 for (i = 0; i < iovcnt; i++) { 142 if (nbytes < iovs[i].iov_len) { 143 return 0; 144 } 145 146 nbytes -= iovs[i].iov_len; 147 } 148 149 return nbytes != 0; 150 } 151 152 static void 153 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 154 struct malloc_task *task, 155 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 156 { 157 int64_t res = 0; 158 void *src = mdisk->malloc_buf + offset; 159 int i; 160 161 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 162 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 163 SPDK_BDEV_IO_STATUS_FAILED); 164 return; 165 } 166 167 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n", 168 len, offset); 169 170 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 171 task->num_outstanding = iovcnt; 172 173 for (i = 0; i < iovcnt; i++) { 174 res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task), 175 ch, iov[i].iov_base, 176 src, iov[i].iov_len, malloc_done); 177 178 if (res != 0) { 179 malloc_done(__accel_task_from_malloc_task(task), res); 180 } 181 182 src += iov[i].iov_len; 183 len -= iov[i].iov_len; 184 } 185 } 186 187 static void 188 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 189 struct malloc_task *task, 190 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 191 { 192 int64_t res = 0; 193 void *dst = mdisk->malloc_buf + offset; 194 int i; 195 196 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 197 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 198 SPDK_BDEV_IO_STATUS_FAILED); 199 return; 200 } 201 202 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n", 203 len, offset); 204 205 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 206 task->num_outstanding = iovcnt; 207 208 for (i = 0; i < iovcnt; i++) { 209 res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task), 210 ch, dst, iov[i].iov_base, 211 iov[i].iov_len, malloc_done); 212 213 if (res != 0) { 214 malloc_done(__accel_task_from_malloc_task(task), res); 215 } 216 217 dst += iov[i].iov_len; 218 } 219 } 220 221 static int 222 bdev_malloc_unmap(struct malloc_disk *mdisk, 223 struct spdk_io_channel *ch, 224 struct malloc_task *task, 225 uint64_t offset, 226 uint64_t byte_count) 227 { 228 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 229 task->num_outstanding = 1; 230 231 return spdk_accel_submit_fill(__accel_task_from_malloc_task(task), ch, 232 mdisk->malloc_buf + offset, 0, byte_count, malloc_done); 233 } 234 235 static int64_t 236 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task, 237 uint64_t offset, uint64_t nbytes) 238 { 239 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 240 241 return 0; 242 } 243 244 static int 245 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task) 246 { 247 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 248 249 return 0; 250 } 251 252 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 253 { 254 uint32_t block_size = bdev_io->bdev->blocklen; 255 256 switch (bdev_io->type) { 257 case SPDK_BDEV_IO_TYPE_READ: 258 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 259 assert(bdev_io->u.bdev.iovcnt == 1); 260 bdev_io->u.bdev.iovs[0].iov_base = 261 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 262 bdev_io->u.bdev.offset_blocks * block_size; 263 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 264 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx), 265 SPDK_BDEV_IO_STATUS_SUCCESS); 266 return 0; 267 } 268 269 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 270 ch, 271 (struct malloc_task *)bdev_io->driver_ctx, 272 bdev_io->u.bdev.iovs, 273 bdev_io->u.bdev.iovcnt, 274 bdev_io->u.bdev.num_blocks * block_size, 275 bdev_io->u.bdev.offset_blocks * block_size); 276 return 0; 277 278 case SPDK_BDEV_IO_TYPE_WRITE: 279 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 280 ch, 281 (struct malloc_task *)bdev_io->driver_ctx, 282 bdev_io->u.bdev.iovs, 283 bdev_io->u.bdev.iovcnt, 284 bdev_io->u.bdev.num_blocks * block_size, 285 bdev_io->u.bdev.offset_blocks * block_size); 286 return 0; 287 288 case SPDK_BDEV_IO_TYPE_RESET: 289 return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt, 290 (struct malloc_task *)bdev_io->driver_ctx); 291 292 case SPDK_BDEV_IO_TYPE_FLUSH: 293 return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt, 294 (struct malloc_task *)bdev_io->driver_ctx, 295 bdev_io->u.bdev.offset_blocks * block_size, 296 bdev_io->u.bdev.num_blocks * block_size); 297 298 case SPDK_BDEV_IO_TYPE_UNMAP: 299 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 300 ch, 301 (struct malloc_task *)bdev_io->driver_ctx, 302 bdev_io->u.bdev.offset_blocks * block_size, 303 bdev_io->u.bdev.num_blocks * block_size); 304 305 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 306 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 307 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 308 ch, 309 (struct malloc_task *)bdev_io->driver_ctx, 310 bdev_io->u.bdev.offset_blocks * block_size, 311 bdev_io->u.bdev.num_blocks * block_size); 312 313 case SPDK_BDEV_IO_TYPE_ZCOPY: 314 if (bdev_io->u.bdev.zcopy.start) { 315 void *buf; 316 size_t len; 317 318 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 319 bdev_io->u.bdev.offset_blocks * block_size; 320 len = bdev_io->u.bdev.num_blocks * block_size; 321 spdk_bdev_io_set_buf(bdev_io, buf, len); 322 323 } 324 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx), 325 SPDK_BDEV_IO_STATUS_SUCCESS); 326 return 0; 327 default: 328 return -1; 329 } 330 return 0; 331 } 332 333 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 334 { 335 if (_bdev_malloc_submit_request(ch, bdev_io) != 0) { 336 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 337 } 338 } 339 340 static bool 341 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 342 { 343 switch (io_type) { 344 case SPDK_BDEV_IO_TYPE_READ: 345 case SPDK_BDEV_IO_TYPE_WRITE: 346 case SPDK_BDEV_IO_TYPE_FLUSH: 347 case SPDK_BDEV_IO_TYPE_RESET: 348 case SPDK_BDEV_IO_TYPE_UNMAP: 349 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 350 case SPDK_BDEV_IO_TYPE_ZCOPY: 351 return true; 352 353 default: 354 return false; 355 } 356 } 357 358 static struct spdk_io_channel * 359 bdev_malloc_get_io_channel(void *ctx) 360 { 361 return spdk_accel_engine_get_io_channel(); 362 } 363 364 static void 365 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 366 { 367 char uuid_str[SPDK_UUID_STRING_LEN]; 368 369 spdk_json_write_object_begin(w); 370 371 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 372 373 spdk_json_write_named_object_begin(w, "params"); 374 spdk_json_write_named_string(w, "name", bdev->name); 375 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 376 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 377 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 378 spdk_json_write_named_string(w, "uuid", uuid_str); 379 380 spdk_json_write_object_end(w); 381 382 spdk_json_write_object_end(w); 383 } 384 385 static const struct spdk_bdev_fn_table malloc_fn_table = { 386 .destruct = bdev_malloc_destruct, 387 .submit_request = bdev_malloc_submit_request, 388 .io_type_supported = bdev_malloc_io_type_supported, 389 .get_io_channel = bdev_malloc_get_io_channel, 390 .write_config_json = bdev_malloc_write_json_config, 391 }; 392 393 int 394 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid, 395 uint64_t num_blocks, uint32_t block_size) 396 { 397 struct malloc_disk *mdisk; 398 int rc; 399 400 if (num_blocks == 0) { 401 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 402 return -EINVAL; 403 } 404 405 mdisk = calloc(1, sizeof(*mdisk)); 406 if (!mdisk) { 407 SPDK_ERRLOG("mdisk calloc() failed\n"); 408 return -ENOMEM; 409 } 410 411 /* 412 * Allocate the large backend memory buffer from pinned memory. 413 * 414 * TODO: need to pass a hint so we know which socket to allocate 415 * from on multi-socket systems. 416 */ 417 mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL, 418 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 419 if (!mdisk->malloc_buf) { 420 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 421 malloc_disk_free(mdisk); 422 return -ENOMEM; 423 } 424 425 if (name) { 426 mdisk->disk.name = strdup(name); 427 } else { 428 /* Auto-generate a name */ 429 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 430 malloc_disk_count++; 431 } 432 if (!mdisk->disk.name) { 433 malloc_disk_free(mdisk); 434 return -ENOMEM; 435 } 436 mdisk->disk.product_name = "Malloc disk"; 437 438 mdisk->disk.write_cache = 1; 439 mdisk->disk.blocklen = block_size; 440 mdisk->disk.blockcnt = num_blocks; 441 if (uuid) { 442 mdisk->disk.uuid = *uuid; 443 } else { 444 spdk_uuid_generate(&mdisk->disk.uuid); 445 } 446 447 mdisk->disk.ctxt = mdisk; 448 mdisk->disk.fn_table = &malloc_fn_table; 449 mdisk->disk.module = &malloc_if; 450 451 rc = spdk_bdev_register(&mdisk->disk); 452 if (rc) { 453 malloc_disk_free(mdisk); 454 return rc; 455 } 456 457 *bdev = &(mdisk->disk); 458 459 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 460 461 return rc; 462 } 463 464 void 465 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg) 466 { 467 if (!bdev || bdev->module != &malloc_if) { 468 cb_fn(cb_arg, -ENODEV); 469 return; 470 } 471 472 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 473 } 474 475 static int bdev_malloc_initialize(void) 476 { 477 struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc"); 478 int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0; 479 uint64_t size; 480 struct spdk_bdev *bdev; 481 482 if (sp != NULL) { 483 NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns"); 484 LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB"); 485 BlockSize = spdk_conf_section_get_intval(sp, "BlockSize"); 486 if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) { 487 SPDK_ERRLOG("Malloc section present, but no devices specified\n"); 488 goto end; 489 } 490 if (BlockSize < 1) { 491 /* Default is 512 bytes */ 492 BlockSize = 512; 493 } 494 size = (uint64_t)LunSizeInMB * 1024 * 1024; 495 for (i = 0; i < NumberOfLuns; i++) { 496 rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize); 497 if (rc) { 498 SPDK_ERRLOG("Could not create malloc disk\n"); 499 goto end; 500 } 501 } 502 } 503 504 end: 505 return rc; 506 } 507 508 static void 509 bdev_malloc_get_spdk_running_config(FILE *fp) 510 { 511 int num_malloc_luns = 0; 512 uint64_t malloc_lun_size = 0; 513 struct malloc_disk *mdisk; 514 515 /* count number of malloc LUNs, get LUN size */ 516 TAILQ_FOREACH(mdisk, &g_malloc_disks, link) { 517 if (0 == malloc_lun_size) { 518 /* assume all malloc luns the same size */ 519 malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt; 520 malloc_lun_size /= (1024 * 1024); 521 } 522 num_malloc_luns++; 523 } 524 525 if (num_malloc_luns > 0) { 526 fprintf(fp, 527 "\n" 528 "# Users may change this section to create a different number or size of\n" 529 "# malloc LUNs.\n" 530 "# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n" 531 "# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n" 532 "# Not all LUNs defined here are necessarily used below.\n" 533 "[Malloc]\n" 534 " NumberOfLuns %d\n" 535 " LunSizeInMB %" PRIu64 "\n", 536 num_malloc_luns, malloc_lun_size, 537 num_malloc_luns - 1, num_malloc_luns, 538 malloc_lun_size); 539 } 540 } 541 542 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC) 543