1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_malloc.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/endian.h" 40 #include "spdk/env.h" 41 #include "spdk/accel_engine.h" 42 #include "spdk/json.h" 43 #include "spdk/thread.h" 44 #include "spdk/queue.h" 45 #include "spdk/string.h" 46 47 #include "spdk/bdev_module.h" 48 #include "spdk_internal/log.h" 49 50 struct malloc_disk { 51 struct spdk_bdev disk; 52 void *malloc_buf; 53 TAILQ_ENTRY(malloc_disk) link; 54 }; 55 56 struct malloc_task { 57 int num_outstanding; 58 enum spdk_bdev_io_status status; 59 }; 60 61 static struct malloc_task * 62 __malloc_task_from_accel_task(struct spdk_accel_task *ct) 63 { 64 return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task)); 65 } 66 67 static struct spdk_accel_task * 68 __accel_task_from_malloc_task(struct malloc_task *mt) 69 { 70 return (struct spdk_accel_task *)((uintptr_t)mt + sizeof(struct malloc_task)); 71 } 72 73 static void 74 malloc_done(void *ref, int status) 75 { 76 struct malloc_task *task = __malloc_task_from_accel_task(ref); 77 78 if (status != 0) { 79 if (status == -ENOMEM) { 80 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 81 } else { 82 task->status = SPDK_BDEV_IO_STATUS_FAILED; 83 } 84 } 85 86 if (--task->num_outstanding == 0) { 87 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 88 } 89 } 90 91 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 92 93 int malloc_disk_count = 0; 94 95 static int bdev_malloc_initialize(void); 96 static void bdev_malloc_get_spdk_running_config(FILE *fp); 97 98 static int 99 bdev_malloc_get_ctx_size(void) 100 { 101 return sizeof(struct malloc_task) + spdk_accel_task_size(); 102 } 103 104 static struct spdk_bdev_module malloc_if = { 105 .name = "malloc", 106 .module_init = bdev_malloc_initialize, 107 .config_text = bdev_malloc_get_spdk_running_config, 108 .get_ctx_size = bdev_malloc_get_ctx_size, 109 110 }; 111 112 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 113 114 static void 115 malloc_disk_free(struct malloc_disk *malloc_disk) 116 { 117 if (!malloc_disk) { 118 return; 119 } 120 121 free(malloc_disk->disk.name); 122 spdk_free(malloc_disk->malloc_buf); 123 free(malloc_disk); 124 } 125 126 static int 127 bdev_malloc_destruct(void *ctx) 128 { 129 struct malloc_disk *malloc_disk = ctx; 130 131 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 132 malloc_disk_free(malloc_disk); 133 return 0; 134 } 135 136 static int 137 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 138 { 139 int i; 140 141 for (i = 0; i < iovcnt; i++) { 142 if (nbytes < iovs[i].iov_len) { 143 return 0; 144 } 145 146 nbytes -= iovs[i].iov_len; 147 } 148 149 return nbytes != 0; 150 } 151 152 static void 153 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 154 struct malloc_task *task, 155 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 156 { 157 int64_t res = 0; 158 void *src = mdisk->malloc_buf + offset; 159 int i; 160 161 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 162 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 163 SPDK_BDEV_IO_STATUS_FAILED); 164 return; 165 } 166 167 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n", 168 len, offset); 169 170 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 171 task->num_outstanding = iovcnt; 172 173 for (i = 0; i < iovcnt; i++) { 174 res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task), 175 ch, iov[i].iov_base, 176 src, iov[i].iov_len, malloc_done); 177 178 if (res != 0) { 179 malloc_done(__accel_task_from_malloc_task(task), res); 180 } 181 182 src += iov[i].iov_len; 183 len -= iov[i].iov_len; 184 } 185 } 186 187 static void 188 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 189 struct malloc_task *task, 190 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 191 { 192 int64_t res = 0; 193 void *dst = mdisk->malloc_buf + offset; 194 int i; 195 196 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 197 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 198 SPDK_BDEV_IO_STATUS_FAILED); 199 return; 200 } 201 202 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n", 203 len, offset); 204 205 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 206 task->num_outstanding = iovcnt; 207 208 for (i = 0; i < iovcnt; i++) { 209 res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task), 210 ch, dst, iov[i].iov_base, 211 iov[i].iov_len, malloc_done); 212 213 if (res != 0) { 214 malloc_done(__accel_task_from_malloc_task(task), res); 215 } 216 217 dst += iov[i].iov_len; 218 } 219 } 220 221 static int 222 bdev_malloc_unmap(struct malloc_disk *mdisk, 223 struct spdk_io_channel *ch, 224 struct malloc_task *task, 225 uint64_t offset, 226 uint64_t byte_count) 227 { 228 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 229 task->num_outstanding = 1; 230 231 return spdk_accel_submit_fill(__accel_task_from_malloc_task(task), ch, 232 mdisk->malloc_buf + offset, 0, byte_count, malloc_done); 233 } 234 235 static int64_t 236 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task, 237 uint64_t offset, uint64_t nbytes) 238 { 239 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 240 241 return 0; 242 } 243 244 static int 245 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task) 246 { 247 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 248 249 return 0; 250 } 251 252 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 253 { 254 uint32_t block_size = bdev_io->bdev->blocklen; 255 256 switch (bdev_io->type) { 257 case SPDK_BDEV_IO_TYPE_READ: 258 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 259 assert(bdev_io->u.bdev.iovcnt == 1); 260 bdev_io->u.bdev.iovs[0].iov_base = 261 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 262 bdev_io->u.bdev.offset_blocks * block_size; 263 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 264 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 265 return 0; 266 } 267 268 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 269 ch, 270 (struct malloc_task *)bdev_io->driver_ctx, 271 bdev_io->u.bdev.iovs, 272 bdev_io->u.bdev.iovcnt, 273 bdev_io->u.bdev.num_blocks * block_size, 274 bdev_io->u.bdev.offset_blocks * block_size); 275 return 0; 276 277 case SPDK_BDEV_IO_TYPE_WRITE: 278 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 279 ch, 280 (struct malloc_task *)bdev_io->driver_ctx, 281 bdev_io->u.bdev.iovs, 282 bdev_io->u.bdev.iovcnt, 283 bdev_io->u.bdev.num_blocks * block_size, 284 bdev_io->u.bdev.offset_blocks * block_size); 285 return 0; 286 287 case SPDK_BDEV_IO_TYPE_RESET: 288 return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt, 289 (struct malloc_task *)bdev_io->driver_ctx); 290 291 case SPDK_BDEV_IO_TYPE_FLUSH: 292 return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt, 293 (struct malloc_task *)bdev_io->driver_ctx, 294 bdev_io->u.bdev.offset_blocks * block_size, 295 bdev_io->u.bdev.num_blocks * block_size); 296 297 case SPDK_BDEV_IO_TYPE_UNMAP: 298 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 299 ch, 300 (struct malloc_task *)bdev_io->driver_ctx, 301 bdev_io->u.bdev.offset_blocks * block_size, 302 bdev_io->u.bdev.num_blocks * block_size); 303 304 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 305 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 306 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 307 ch, 308 (struct malloc_task *)bdev_io->driver_ctx, 309 bdev_io->u.bdev.offset_blocks * block_size, 310 bdev_io->u.bdev.num_blocks * block_size); 311 312 case SPDK_BDEV_IO_TYPE_ZCOPY: 313 if (bdev_io->u.bdev.zcopy.start) { 314 void *buf; 315 size_t len; 316 317 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 318 bdev_io->u.bdev.offset_blocks * block_size; 319 len = bdev_io->u.bdev.num_blocks * block_size; 320 spdk_bdev_io_set_buf(bdev_io, buf, len); 321 322 } 323 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 324 return 0; 325 case SPDK_BDEV_IO_TYPE_ABORT: 326 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 327 return 0; 328 default: 329 return -1; 330 } 331 return 0; 332 } 333 334 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 335 { 336 if (_bdev_malloc_submit_request(ch, bdev_io) != 0) { 337 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 338 } 339 } 340 341 static bool 342 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 343 { 344 switch (io_type) { 345 case SPDK_BDEV_IO_TYPE_READ: 346 case SPDK_BDEV_IO_TYPE_WRITE: 347 case SPDK_BDEV_IO_TYPE_FLUSH: 348 case SPDK_BDEV_IO_TYPE_RESET: 349 case SPDK_BDEV_IO_TYPE_UNMAP: 350 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 351 case SPDK_BDEV_IO_TYPE_ZCOPY: 352 case SPDK_BDEV_IO_TYPE_ABORT: 353 return true; 354 355 default: 356 return false; 357 } 358 } 359 360 static struct spdk_io_channel * 361 bdev_malloc_get_io_channel(void *ctx) 362 { 363 return spdk_accel_engine_get_io_channel(); 364 } 365 366 static void 367 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 368 { 369 char uuid_str[SPDK_UUID_STRING_LEN]; 370 371 spdk_json_write_object_begin(w); 372 373 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 374 375 spdk_json_write_named_object_begin(w, "params"); 376 spdk_json_write_named_string(w, "name", bdev->name); 377 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 378 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 379 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 380 spdk_json_write_named_string(w, "uuid", uuid_str); 381 382 spdk_json_write_object_end(w); 383 384 spdk_json_write_object_end(w); 385 } 386 387 static const struct spdk_bdev_fn_table malloc_fn_table = { 388 .destruct = bdev_malloc_destruct, 389 .submit_request = bdev_malloc_submit_request, 390 .io_type_supported = bdev_malloc_io_type_supported, 391 .get_io_channel = bdev_malloc_get_io_channel, 392 .write_config_json = bdev_malloc_write_json_config, 393 }; 394 395 int 396 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid, 397 uint64_t num_blocks, uint32_t block_size) 398 { 399 struct malloc_disk *mdisk; 400 int rc; 401 402 if (num_blocks == 0) { 403 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 404 return -EINVAL; 405 } 406 407 mdisk = calloc(1, sizeof(*mdisk)); 408 if (!mdisk) { 409 SPDK_ERRLOG("mdisk calloc() failed\n"); 410 return -ENOMEM; 411 } 412 413 /* 414 * Allocate the large backend memory buffer from pinned memory. 415 * 416 * TODO: need to pass a hint so we know which socket to allocate 417 * from on multi-socket systems. 418 */ 419 mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL, 420 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 421 if (!mdisk->malloc_buf) { 422 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 423 malloc_disk_free(mdisk); 424 return -ENOMEM; 425 } 426 427 if (name) { 428 mdisk->disk.name = strdup(name); 429 } else { 430 /* Auto-generate a name */ 431 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 432 malloc_disk_count++; 433 } 434 if (!mdisk->disk.name) { 435 malloc_disk_free(mdisk); 436 return -ENOMEM; 437 } 438 mdisk->disk.product_name = "Malloc disk"; 439 440 mdisk->disk.write_cache = 1; 441 mdisk->disk.blocklen = block_size; 442 mdisk->disk.blockcnt = num_blocks; 443 if (uuid) { 444 mdisk->disk.uuid = *uuid; 445 } else { 446 spdk_uuid_generate(&mdisk->disk.uuid); 447 } 448 449 mdisk->disk.ctxt = mdisk; 450 mdisk->disk.fn_table = &malloc_fn_table; 451 mdisk->disk.module = &malloc_if; 452 453 rc = spdk_bdev_register(&mdisk->disk); 454 if (rc) { 455 malloc_disk_free(mdisk); 456 return rc; 457 } 458 459 *bdev = &(mdisk->disk); 460 461 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 462 463 return rc; 464 } 465 466 void 467 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg) 468 { 469 if (!bdev || bdev->module != &malloc_if) { 470 cb_fn(cb_arg, -ENODEV); 471 return; 472 } 473 474 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 475 } 476 477 static int bdev_malloc_initialize(void) 478 { 479 struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc"); 480 int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0; 481 uint64_t size; 482 struct spdk_bdev *bdev; 483 484 if (sp != NULL) { 485 NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns"); 486 LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB"); 487 BlockSize = spdk_conf_section_get_intval(sp, "BlockSize"); 488 if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) { 489 SPDK_ERRLOG("Malloc section present, but no devices specified\n"); 490 goto end; 491 } 492 if (BlockSize < 1) { 493 /* Default is 512 bytes */ 494 BlockSize = 512; 495 } 496 size = (uint64_t)LunSizeInMB * 1024 * 1024; 497 for (i = 0; i < NumberOfLuns; i++) { 498 rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize); 499 if (rc) { 500 SPDK_ERRLOG("Could not create malloc disk\n"); 501 goto end; 502 } 503 } 504 } 505 506 end: 507 return rc; 508 } 509 510 static void 511 bdev_malloc_get_spdk_running_config(FILE *fp) 512 { 513 int num_malloc_luns = 0; 514 uint64_t malloc_lun_size = 0; 515 struct malloc_disk *mdisk; 516 517 /* count number of malloc LUNs, get LUN size */ 518 TAILQ_FOREACH(mdisk, &g_malloc_disks, link) { 519 if (0 == malloc_lun_size) { 520 /* assume all malloc luns the same size */ 521 malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt; 522 malloc_lun_size /= (1024 * 1024); 523 } 524 num_malloc_luns++; 525 } 526 527 if (num_malloc_luns > 0) { 528 fprintf(fp, 529 "\n" 530 "# Users may change this section to create a different number or size of\n" 531 "# malloc LUNs.\n" 532 "# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n" 533 "# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n" 534 "# Not all LUNs defined here are necessarily used below.\n" 535 "[Malloc]\n" 536 " NumberOfLuns %d\n" 537 " LunSizeInMB %" PRIu64 "\n", 538 num_malloc_luns, malloc_lun_size, 539 num_malloc_luns - 1, num_malloc_luns, 540 malloc_lun_size); 541 } 542 } 543 544 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC) 545