1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "bdev_malloc.h" 37 #include "spdk/bdev.h" 38 #include "spdk/endian.h" 39 #include "spdk/env.h" 40 #include "spdk/accel_engine.h" 41 #include "spdk/json.h" 42 #include "spdk/thread.h" 43 #include "spdk/queue.h" 44 #include "spdk/string.h" 45 46 #include "spdk/bdev_module.h" 47 #include "spdk/log.h" 48 49 struct malloc_disk { 50 struct spdk_bdev disk; 51 void *malloc_buf; 52 TAILQ_ENTRY(malloc_disk) link; 53 }; 54 55 struct malloc_task { 56 int num_outstanding; 57 enum spdk_bdev_io_status status; 58 }; 59 60 static void 61 malloc_done(void *ref, int status) 62 { 63 struct malloc_task *task = (struct malloc_task *)ref; 64 65 if (status != 0) { 66 if (status == -ENOMEM) { 67 task->status = SPDK_BDEV_IO_STATUS_NOMEM; 68 } else { 69 task->status = SPDK_BDEV_IO_STATUS_FAILED; 70 } 71 } 72 73 if (--task->num_outstanding == 0) { 74 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status); 75 } 76 } 77 78 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks); 79 80 int malloc_disk_count = 0; 81 82 static int bdev_malloc_initialize(void); 83 84 static int 85 bdev_malloc_get_ctx_size(void) 86 { 87 return sizeof(struct malloc_task); 88 } 89 90 static struct spdk_bdev_module malloc_if = { 91 .name = "malloc", 92 .module_init = bdev_malloc_initialize, 93 .get_ctx_size = bdev_malloc_get_ctx_size, 94 95 }; 96 97 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if) 98 99 static void 100 malloc_disk_free(struct malloc_disk *malloc_disk) 101 { 102 if (!malloc_disk) { 103 return; 104 } 105 106 free(malloc_disk->disk.name); 107 spdk_free(malloc_disk->malloc_buf); 108 free(malloc_disk); 109 } 110 111 static int 112 bdev_malloc_destruct(void *ctx) 113 { 114 struct malloc_disk *malloc_disk = ctx; 115 116 TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link); 117 malloc_disk_free(malloc_disk); 118 return 0; 119 } 120 121 static int 122 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes) 123 { 124 int i; 125 126 for (i = 0; i < iovcnt; i++) { 127 if (nbytes < iovs[i].iov_len) { 128 return 0; 129 } 130 131 nbytes -= iovs[i].iov_len; 132 } 133 134 return nbytes != 0; 135 } 136 137 static void 138 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 139 struct malloc_task *task, 140 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 141 { 142 int64_t res = 0; 143 void *src = mdisk->malloc_buf + offset; 144 int i; 145 146 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 147 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 148 SPDK_BDEV_IO_STATUS_FAILED); 149 return; 150 } 151 152 SPDK_DEBUGLOG(bdev_malloc, "read %zu bytes from offset %#" PRIx64 "\n", 153 len, offset); 154 155 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 156 task->num_outstanding = iovcnt; 157 158 for (i = 0; i < iovcnt; i++) { 159 res = spdk_accel_submit_copy(ch, iov[i].iov_base, 160 src, iov[i].iov_len, malloc_done, task); 161 162 if (res != 0) { 163 malloc_done(task, res); 164 } 165 166 src += iov[i].iov_len; 167 len -= iov[i].iov_len; 168 } 169 } 170 171 static void 172 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch, 173 struct malloc_task *task, 174 struct iovec *iov, int iovcnt, size_t len, uint64_t offset) 175 { 176 int64_t res = 0; 177 void *dst = mdisk->malloc_buf + offset; 178 int i; 179 180 if (bdev_malloc_check_iov_len(iov, iovcnt, len)) { 181 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), 182 SPDK_BDEV_IO_STATUS_FAILED); 183 return; 184 } 185 186 SPDK_DEBUGLOG(bdev_malloc, "wrote %zu bytes to offset %#" PRIx64 "\n", 187 len, offset); 188 189 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 190 task->num_outstanding = iovcnt; 191 192 for (i = 0; i < iovcnt; i++) { 193 res = spdk_accel_submit_copy(ch, dst, iov[i].iov_base, 194 iov[i].iov_len, malloc_done, task); 195 196 if (res != 0) { 197 malloc_done(task, res); 198 } 199 200 dst += iov[i].iov_len; 201 } 202 } 203 204 static int 205 bdev_malloc_unmap(struct malloc_disk *mdisk, 206 struct spdk_io_channel *ch, 207 struct malloc_task *task, 208 uint64_t offset, 209 uint64_t byte_count) 210 { 211 task->status = SPDK_BDEV_IO_STATUS_SUCCESS; 212 task->num_outstanding = 1; 213 214 return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0, 215 byte_count, malloc_done, task); 216 } 217 218 static int64_t 219 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task, 220 uint64_t offset, uint64_t nbytes) 221 { 222 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 223 224 return 0; 225 } 226 227 static int 228 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task) 229 { 230 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS); 231 232 return 0; 233 } 234 235 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 236 { 237 uint32_t block_size = bdev_io->bdev->blocklen; 238 239 switch (bdev_io->type) { 240 case SPDK_BDEV_IO_TYPE_READ: 241 if (bdev_io->u.bdev.iovs[0].iov_base == NULL) { 242 assert(bdev_io->u.bdev.iovcnt == 1); 243 bdev_io->u.bdev.iovs[0].iov_base = 244 ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 245 bdev_io->u.bdev.offset_blocks * block_size; 246 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size; 247 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 248 return 0; 249 } 250 251 bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt, 252 ch, 253 (struct malloc_task *)bdev_io->driver_ctx, 254 bdev_io->u.bdev.iovs, 255 bdev_io->u.bdev.iovcnt, 256 bdev_io->u.bdev.num_blocks * block_size, 257 bdev_io->u.bdev.offset_blocks * block_size); 258 return 0; 259 260 case SPDK_BDEV_IO_TYPE_WRITE: 261 bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt, 262 ch, 263 (struct malloc_task *)bdev_io->driver_ctx, 264 bdev_io->u.bdev.iovs, 265 bdev_io->u.bdev.iovcnt, 266 bdev_io->u.bdev.num_blocks * block_size, 267 bdev_io->u.bdev.offset_blocks * block_size); 268 return 0; 269 270 case SPDK_BDEV_IO_TYPE_RESET: 271 return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt, 272 (struct malloc_task *)bdev_io->driver_ctx); 273 274 case SPDK_BDEV_IO_TYPE_FLUSH: 275 return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt, 276 (struct malloc_task *)bdev_io->driver_ctx, 277 bdev_io->u.bdev.offset_blocks * block_size, 278 bdev_io->u.bdev.num_blocks * block_size); 279 280 case SPDK_BDEV_IO_TYPE_UNMAP: 281 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 282 ch, 283 (struct malloc_task *)bdev_io->driver_ctx, 284 bdev_io->u.bdev.offset_blocks * block_size, 285 bdev_io->u.bdev.num_blocks * block_size); 286 287 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 288 /* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */ 289 return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, 290 ch, 291 (struct malloc_task *)bdev_io->driver_ctx, 292 bdev_io->u.bdev.offset_blocks * block_size, 293 bdev_io->u.bdev.num_blocks * block_size); 294 295 case SPDK_BDEV_IO_TYPE_ZCOPY: 296 if (bdev_io->u.bdev.zcopy.start) { 297 void *buf; 298 size_t len; 299 300 buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf + 301 bdev_io->u.bdev.offset_blocks * block_size; 302 len = bdev_io->u.bdev.num_blocks * block_size; 303 spdk_bdev_io_set_buf(bdev_io, buf, len); 304 305 } 306 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 307 return 0; 308 case SPDK_BDEV_IO_TYPE_ABORT: 309 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 310 return 0; 311 default: 312 return -1; 313 } 314 return 0; 315 } 316 317 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 318 { 319 if (_bdev_malloc_submit_request(ch, bdev_io) != 0) { 320 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 321 } 322 } 323 324 static bool 325 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 326 { 327 switch (io_type) { 328 case SPDK_BDEV_IO_TYPE_READ: 329 case SPDK_BDEV_IO_TYPE_WRITE: 330 case SPDK_BDEV_IO_TYPE_FLUSH: 331 case SPDK_BDEV_IO_TYPE_RESET: 332 case SPDK_BDEV_IO_TYPE_UNMAP: 333 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 334 case SPDK_BDEV_IO_TYPE_ZCOPY: 335 case SPDK_BDEV_IO_TYPE_ABORT: 336 return true; 337 338 default: 339 return false; 340 } 341 } 342 343 static struct spdk_io_channel * 344 bdev_malloc_get_io_channel(void *ctx) 345 { 346 return spdk_accel_engine_get_io_channel(); 347 } 348 349 static void 350 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 351 { 352 char uuid_str[SPDK_UUID_STRING_LEN]; 353 354 spdk_json_write_object_begin(w); 355 356 spdk_json_write_named_string(w, "method", "bdev_malloc_create"); 357 358 spdk_json_write_named_object_begin(w, "params"); 359 spdk_json_write_named_string(w, "name", bdev->name); 360 spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt); 361 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); 362 spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid); 363 spdk_json_write_named_string(w, "uuid", uuid_str); 364 365 spdk_json_write_object_end(w); 366 367 spdk_json_write_object_end(w); 368 } 369 370 static const struct spdk_bdev_fn_table malloc_fn_table = { 371 .destruct = bdev_malloc_destruct, 372 .submit_request = bdev_malloc_submit_request, 373 .io_type_supported = bdev_malloc_io_type_supported, 374 .get_io_channel = bdev_malloc_get_io_channel, 375 .write_config_json = bdev_malloc_write_json_config, 376 }; 377 378 int 379 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid, 380 uint64_t num_blocks, uint32_t block_size) 381 { 382 struct malloc_disk *mdisk; 383 int rc; 384 385 if (num_blocks == 0) { 386 SPDK_ERRLOG("Disk num_blocks must be greater than 0"); 387 return -EINVAL; 388 } 389 390 if (block_size % 512) { 391 SPDK_ERRLOG("block size must be 512 bytes aligned\n"); 392 return -EINVAL; 393 } 394 395 mdisk = calloc(1, sizeof(*mdisk)); 396 if (!mdisk) { 397 SPDK_ERRLOG("mdisk calloc() failed\n"); 398 return -ENOMEM; 399 } 400 401 /* 402 * Allocate the large backend memory buffer from pinned memory. 403 * 404 * TODO: need to pass a hint so we know which socket to allocate 405 * from on multi-socket systems. 406 */ 407 mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL, 408 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 409 if (!mdisk->malloc_buf) { 410 SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n"); 411 malloc_disk_free(mdisk); 412 return -ENOMEM; 413 } 414 415 if (name) { 416 mdisk->disk.name = strdup(name); 417 } else { 418 /* Auto-generate a name */ 419 mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count); 420 malloc_disk_count++; 421 } 422 if (!mdisk->disk.name) { 423 malloc_disk_free(mdisk); 424 return -ENOMEM; 425 } 426 mdisk->disk.product_name = "Malloc disk"; 427 428 mdisk->disk.write_cache = 1; 429 mdisk->disk.blocklen = block_size; 430 mdisk->disk.blockcnt = num_blocks; 431 if (uuid) { 432 mdisk->disk.uuid = *uuid; 433 } else { 434 spdk_uuid_generate(&mdisk->disk.uuid); 435 } 436 437 mdisk->disk.ctxt = mdisk; 438 mdisk->disk.fn_table = &malloc_fn_table; 439 mdisk->disk.module = &malloc_if; 440 441 rc = spdk_bdev_register(&mdisk->disk); 442 if (rc) { 443 malloc_disk_free(mdisk); 444 return rc; 445 } 446 447 *bdev = &(mdisk->disk); 448 449 TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link); 450 451 return rc; 452 } 453 454 void 455 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg) 456 { 457 if (!bdev || bdev->module != &malloc_if) { 458 cb_fn(cb_arg, -ENODEV); 459 return; 460 } 461 462 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 463 } 464 465 static int bdev_malloc_initialize(void) 466 { 467 /* This needs to be reset for each reinitialization of submodules. 468 * Otherwise after enough devices or reinitializations the value gets too high. 469 * TODO: Make malloc bdev name mandatory and remove this counter. */ 470 malloc_disk_count = 0; 471 return 0; 472 } 473 474 SPDK_LOG_REGISTER_COMPONENT(bdev_malloc) 475