1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_malloc.h" 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 #include "spdk/endian.h" 40 #include "spdk/env.h" 41 #include "spdk/accel_engine.h" 42 #include "spdk/json.h" 43 #include "spdk/thread.h" 44 #include "spdk/queue.h" 45 #include "spdk/string.h" 46 47 #include "spdk/bdev_module.h" 48 #include "spdk_internal/log.h" 49 50 struct malloc_disk { 51 struct spdk_bdev disk; 52 void *malloc_buf; 53 TAILQ_ENTRY(malloc_disk) link; 54 }; 55 56 struct malloc_task { 57 int num_outstanding; 58 enum spdk_bdev_io_status status; 59 }; 60 61 static void 62 malloc_done(void *ref, int status) 63 { 64 struct malloc_task *task = (struct malloc_task *)ref; 65 66 if (status != 0) { 67 if (status == -ENOMEM) { 68 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 69 } else { 70 task->status = SPDK_BDEV_IO_STATUS_FAILED; 71 } 72 } 73 74 if (--task->num_outstanding == 0) { 75 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 76 } 77 } 78 79 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 80 81 int malloc_disk_count = 0; 82 83 static int bdev_malloc_initialize(void); 84 static void bdev_malloc_get_spdk_running_config(FILE *fp); 85 86 static int 87 bdev_malloc_get_ctx_size(void) 88 { 89 return sizeof(struct malloc_task); 90 } 91 92 static struct spdk_bdev_module malloc_if = { 93 .name = "malloc", 94 .module_init = bdev_malloc_initialize, 95 .config_text = bdev_malloc_get_spdk_running_config, 96 .get_ctx_size = bdev_malloc_get_ctx_size, 97 98 }; 99 100 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 101 102 static void 103 malloc_disk_free(struct malloc_disk *malloc_disk) 104 { 105 if (!malloc_disk) { 106 return; 107 } 108 109 free(malloc_disk->disk.name); 110 spdk_free(malloc_disk->malloc_buf); 111 free(malloc_disk); 112 } 113 114 static int 115 bdev_malloc_destruct(void *ctx) 116 { 117 struct malloc_disk *malloc_disk = ctx; 118 119 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 120 malloc_disk_free(malloc_disk); 121 return 0; 122 } 123 124 static int 125 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 126 { 127 int i; 128 129 for (i = 0; i < iovcnt; i++) { 130 if (nbytes < iovs[i].iov_len) { 131 return 0; 132 } 133 134 nbytes -= iovs[i].iov_len; 135 } 136 137 return nbytes != 0; 138 } 139 140 static void 141 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 142 struct malloc_task *task, 143 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 144 { 145 int64_t res = 0; 146 void *src = mdisk->malloc_buf + offset; 147 int i; 148 149 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 150 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 151 SPDK_BDEV_IO_STATUS_FAILED); 152 return; 153 } 154 155 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n", 156 len, offset); 157 158 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 159 task->num_outstanding = iovcnt; 160 161 for (i = 0; i < iovcnt; i++) { 162 res = spdk_accel_submit_copy(ch, iov[i].iov_base, 163 src, iov[i].iov_len, malloc_done, task); 164 165 if (res != 0) { 166 malloc_done(task, res); 167 } 168 169 src += iov[i].iov_len; 170 len -= iov[i].iov_len; 171 } 172 } 173 174 static void 175 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 176 struct malloc_task *task, 177 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 178 { 179 int64_t res = 0; 180 void *dst = mdisk->malloc_buf + offset; 181 int i; 182 183 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 184 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 185 SPDK_BDEV_IO_STATUS_FAILED); 186 return; 187 } 188 189 SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n", 190 len, offset); 191 192 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 193 task->num_outstanding = iovcnt; 194 195 for (i = 0; i < iovcnt; i++) { 196 res = spdk_accel_submit_copy(ch, dst, iov[i].iov_base, 197 iov[i].iov_len, malloc_done, task); 198 199 if (res != 0) { 200 malloc_done(task, res); 201 } 202 203 dst += iov[i].iov_len; 204 } 205 } 206 207 static int 208 bdev_malloc_unmap(struct malloc_disk *mdisk, 209 struct spdk_io_channel *ch, 210 struct malloc_task *task, 211 uint64_t offset, 212 uint64_t byte_count) 213 { 214 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 215 task->num_outstanding = 1; 216 217 return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0, 218 byte_count, malloc_done, task); 219 } 220 221 static int64_t 222 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task, 223 uint64_t offset, uint64_t nbytes) 224 { 225 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 226 227 return 0; 228 } 229 230 static int 231 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task) 232 { 233 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 234 235 return 0; 236 } 237 238 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 239 { 240 uint32_t block_size = bdev_io->bdev->blocklen; 241 242 switch (bdev_io->type) { 243 case SPDK_BDEV_IO_TYPE_READ: 244 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 245 assert(bdev_io->u.bdev.iovcnt == 1); 246 bdev_io->u.bdev.iovs[0].iov_base = 247 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 248 bdev_io->u.bdev.offset_blocks * block_size; 249 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 250 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 251 return 0; 252 } 253 254 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 255 ch, 256 (struct malloc_task *)bdev_io->driver_ctx, 257 bdev_io->u.bdev.iovs, 258 bdev_io->u.bdev.iovcnt, 259 bdev_io->u.bdev.num_blocks * block_size, 260 bdev_io->u.bdev.offset_blocks * block_size); 261 return 0; 262 263 case SPDK_BDEV_IO_TYPE_WRITE: 264 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 265 ch, 266 (struct malloc_task *)bdev_io->driver_ctx, 267 bdev_io->u.bdev.iovs, 268 bdev_io->u.bdev.iovcnt, 269 bdev_io->u.bdev.num_blocks * block_size, 270 bdev_io->u.bdev.offset_blocks * block_size); 271 return 0; 272 273 case SPDK_BDEV_IO_TYPE_RESET: 274 return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt, 275 (struct malloc_task *)bdev_io->driver_ctx); 276 277 case SPDK_BDEV_IO_TYPE_FLUSH: 278 return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt, 279 (struct malloc_task *)bdev_io->driver_ctx, 280 bdev_io->u.bdev.offset_blocks * block_size, 281 bdev_io->u.bdev.num_blocks * block_size); 282 283 case SPDK_BDEV_IO_TYPE_UNMAP: 284 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 285 ch, 286 (struct malloc_task *)bdev_io->driver_ctx, 287 bdev_io->u.bdev.offset_blocks * block_size, 288 bdev_io->u.bdev.num_blocks * block_size); 289 290 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 291 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 292 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 293 ch, 294 (struct malloc_task *)bdev_io->driver_ctx, 295 bdev_io->u.bdev.offset_blocks * block_size, 296 bdev_io->u.bdev.num_blocks * block_size); 297 298 case SPDK_BDEV_IO_TYPE_ZCOPY: 299 if (bdev_io->u.bdev.zcopy.start) { 300 void *buf; 301 size_t len; 302 303 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 304 bdev_io->u.bdev.offset_blocks * block_size; 305 len = bdev_io->u.bdev.num_blocks * block_size; 306 spdk_bdev_io_set_buf(bdev_io, buf, len); 307 308 } 309 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 310 return 0; 311 case SPDK_BDEV_IO_TYPE_ABORT: 312 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 313 return 0; 314 default: 315 return -1; 316 } 317 return 0; 318 } 319 320 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 321 { 322 if (_bdev_malloc_submit_request(ch, bdev_io) != 0) { 323 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 324 } 325 } 326 327 static bool 328 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 329 { 330 switch (io_type) { 331 case SPDK_BDEV_IO_TYPE_READ: 332 case SPDK_BDEV_IO_TYPE_WRITE: 333 case SPDK_BDEV_IO_TYPE_FLUSH: 334 case SPDK_BDEV_IO_TYPE_RESET: 335 case SPDK_BDEV_IO_TYPE_UNMAP: 336 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 337 case SPDK_BDEV_IO_TYPE_ZCOPY: 338 case SPDK_BDEV_IO_TYPE_ABORT: 339 return true; 340 341 default: 342 return false; 343 } 344 } 345 346 static struct spdk_io_channel * 347 bdev_malloc_get_io_channel(void *ctx) 348 { 349 return spdk_accel_engine_get_io_channel(); 350 } 351 352 static void 353 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 354 { 355 char uuid_str[SPDK_UUID_STRING_LEN]; 356 357 spdk_json_write_object_begin(w); 358 359 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 360 361 spdk_json_write_named_object_begin(w, "params"); 362 spdk_json_write_named_string(w, "name", bdev->name); 363 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 364 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 365 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 366 spdk_json_write_named_string(w, "uuid", uuid_str); 367 368 spdk_json_write_object_end(w); 369 370 spdk_json_write_object_end(w); 371 } 372 373 static const struct spdk_bdev_fn_table malloc_fn_table = { 374 .destruct = bdev_malloc_destruct, 375 .submit_request = bdev_malloc_submit_request, 376 .io_type_supported = bdev_malloc_io_type_supported, 377 .get_io_channel = bdev_malloc_get_io_channel, 378 .write_config_json = bdev_malloc_write_json_config, 379 }; 380 381 int 382 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid, 383 uint64_t num_blocks, uint32_t block_size) 384 { 385 struct malloc_disk *mdisk; 386 int rc; 387 388 if (num_blocks == 0) { 389 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 390 return -EINVAL; 391 } 392 393 mdisk = calloc(1, sizeof(*mdisk)); 394 if (!mdisk) { 395 SPDK_ERRLOG("mdisk calloc() failed\n"); 396 return -ENOMEM; 397 } 398 399 /* 400 * Allocate the large backend memory buffer from pinned memory. 401 * 402 * TODO: need to pass a hint so we know which socket to allocate 403 * from on multi-socket systems. 404 */ 405 mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL, 406 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 407 if (!mdisk->malloc_buf) { 408 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 409 malloc_disk_free(mdisk); 410 return -ENOMEM; 411 } 412 413 if (name) { 414 mdisk->disk.name = strdup(name); 415 } else { 416 /* Auto-generate a name */ 417 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 418 malloc_disk_count++; 419 } 420 if (!mdisk->disk.name) { 421 malloc_disk_free(mdisk); 422 return -ENOMEM; 423 } 424 mdisk->disk.product_name = "Malloc disk"; 425 426 mdisk->disk.write_cache = 1; 427 mdisk->disk.blocklen = block_size; 428 mdisk->disk.blockcnt = num_blocks; 429 if (uuid) { 430 mdisk->disk.uuid = *uuid; 431 } else { 432 spdk_uuid_generate(&mdisk->disk.uuid); 433 } 434 435 mdisk->disk.ctxt = mdisk; 436 mdisk->disk.fn_table = &malloc_fn_table; 437 mdisk->disk.module = &malloc_if; 438 439 rc = spdk_bdev_register(&mdisk->disk); 440 if (rc) { 441 malloc_disk_free(mdisk); 442 return rc; 443 } 444 445 *bdev = &(mdisk->disk); 446 447 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 448 449 return rc; 450 } 451 452 void 453 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg) 454 { 455 if (!bdev || bdev->module != &malloc_if) { 456 cb_fn(cb_arg, -ENODEV); 457 return; 458 } 459 460 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 461 } 462 463 static int bdev_malloc_initialize(void) 464 { 465 struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc"); 466 int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0; 467 uint64_t size; 468 struct spdk_bdev *bdev; 469 470 malloc_disk_count = 0; 471 472 if (sp != NULL) { 473 NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns"); 474 LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB"); 475 BlockSize = spdk_conf_section_get_intval(sp, "BlockSize"); 476 if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) { 477 SPDK_ERRLOG("Malloc section present, but no devices specified\n"); 478 goto end; 479 } 480 if (BlockSize < 1) { 481 /* Default is 512 bytes */ 482 BlockSize = 512; 483 } 484 size = (uint64_t)LunSizeInMB * 1024 * 1024; 485 for (i = 0; i < NumberOfLuns; i++) { 486 rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize); 487 if (rc) { 488 SPDK_ERRLOG("Could not create malloc disk\n"); 489 goto end; 490 } 491 } 492 } 493 494 end: 495 return rc; 496 } 497 498 static void 499 bdev_malloc_get_spdk_running_config(FILE *fp) 500 { 501 int num_malloc_luns = 0; 502 uint64_t malloc_lun_size = 0; 503 struct malloc_disk *mdisk; 504 505 /* count number of malloc LUNs, get LUN size */ 506 TAILQ_FOREACH(mdisk, &g_malloc_disks, link) { 507 if (0 == malloc_lun_size) { 508 /* assume all malloc luns the same size */ 509 malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt; 510 malloc_lun_size /= (1024 * 1024); 511 } 512 num_malloc_luns++; 513 } 514 515 if (num_malloc_luns > 0) { 516 fprintf(fp, 517 "\n" 518 "# Users may change this section to create a different number or size of\n" 519 "# malloc LUNs.\n" 520 "# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n" 521 "# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n" 522 "# Not all LUNs defined here are necessarily used below.\n" 523 "[Malloc]\n" 524 " NumberOfLuns %d\n" 525 " LunSizeInMB %" PRIu64 "\n", 526 num_malloc_luns, malloc_lun_size, 527 num_malloc_luns - 1, num_malloc_luns, 528 malloc_lun_size); 529 } 530 } 531 532 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC) 533