xref: /spdk/module/bdev/malloc/bdev_malloc.c (revision 9889ab2dc80e40dae92dcef361d53dcba722043d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "bdev_malloc.h"
37 #include "spdk/bdev.h"
38 #include "spdk/conf.h"
39 #include "spdk/endian.h"
40 #include "spdk/env.h"
41 #include "spdk/copy_engine.h"
42 #include "spdk/json.h"
43 #include "spdk/thread.h"
44 #include "spdk/queue.h"
45 #include "spdk/string.h"
46 
47 #include "spdk/bdev_module.h"
48 #include "spdk_internal/log.h"
49 
50 struct malloc_disk {
51 	struct spdk_bdev		disk;
52 	void				*malloc_buf;
53 	TAILQ_ENTRY(malloc_disk)	link;
54 };
55 
56 struct malloc_task {
57 	int				num_outstanding;
58 	enum spdk_bdev_io_status	status;
59 };
60 
61 static struct malloc_task *
62 __malloc_task_from_copy_task(struct spdk_copy_task *ct)
63 {
64 	return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task));
65 }
66 
67 static struct spdk_copy_task *
68 __copy_task_from_malloc_task(struct malloc_task *mt)
69 {
70 	return (struct spdk_copy_task *)((uintptr_t)mt + sizeof(struct malloc_task));
71 }
72 
73 static void
74 malloc_done(void *ref, int status)
75 {
76 	struct malloc_task *task = __malloc_task_from_copy_task(ref);
77 
78 	if (status != 0) {
79 		if (status == -ENOMEM) {
80 			task->status = SPDK_BDEV_IO_STATUS_NOMEM;
81 		} else {
82 			task->status = SPDK_BDEV_IO_STATUS_FAILED;
83 		}
84 	}
85 
86 	if (--task->num_outstanding == 0) {
87 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
88 	}
89 }
90 
91 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks);
92 
93 int malloc_disk_count = 0;
94 
95 static int bdev_malloc_initialize(void);
96 static void bdev_malloc_get_spdk_running_config(FILE *fp);
97 
98 static int
99 bdev_malloc_get_ctx_size(void)
100 {
101 	return sizeof(struct malloc_task) + spdk_copy_task_size();
102 }
103 
104 static struct spdk_bdev_module malloc_if = {
105 	.name = "malloc",
106 	.module_init = bdev_malloc_initialize,
107 	.config_text = bdev_malloc_get_spdk_running_config,
108 	.get_ctx_size = bdev_malloc_get_ctx_size,
109 
110 };
111 
112 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if)
113 
114 static void
115 malloc_disk_free(struct malloc_disk *malloc_disk)
116 {
117 	if (!malloc_disk) {
118 		return;
119 	}
120 
121 	free(malloc_disk->disk.name);
122 	spdk_free(malloc_disk->malloc_buf);
123 	free(malloc_disk);
124 }
125 
126 static int
127 bdev_malloc_destruct(void *ctx)
128 {
129 	struct malloc_disk *malloc_disk = ctx;
130 
131 	TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link);
132 	malloc_disk_free(malloc_disk);
133 	return 0;
134 }
135 
136 static int
137 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
138 {
139 	int i;
140 
141 	for (i = 0; i < iovcnt; i++) {
142 		if (nbytes < iovs[i].iov_len) {
143 			return 0;
144 		}
145 
146 		nbytes -= iovs[i].iov_len;
147 	}
148 
149 	return nbytes != 0;
150 }
151 
152 static void
153 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
154 		  struct malloc_task *task,
155 		  struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
156 {
157 	int64_t res = 0;
158 	void *src = mdisk->malloc_buf + offset;
159 	int i;
160 
161 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
162 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
163 				      SPDK_BDEV_IO_STATUS_FAILED);
164 		return;
165 	}
166 
167 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n",
168 		      len, offset);
169 
170 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
171 	task->num_outstanding = iovcnt;
172 
173 	for (i = 0; i < iovcnt; i++) {
174 		res = spdk_copy_submit(__copy_task_from_malloc_task(task),
175 				       ch, iov[i].iov_base,
176 				       src, iov[i].iov_len, malloc_done);
177 
178 		if (res != 0) {
179 			malloc_done(__copy_task_from_malloc_task(task), res);
180 		}
181 
182 		src += iov[i].iov_len;
183 		len -= iov[i].iov_len;
184 	}
185 }
186 
187 static void
188 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
189 		   struct malloc_task *task,
190 		   struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
191 {
192 	int64_t res = 0;
193 	void *dst = mdisk->malloc_buf + offset;
194 	int i;
195 
196 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
197 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
198 				      SPDK_BDEV_IO_STATUS_FAILED);
199 		return;
200 	}
201 
202 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n",
203 		      len, offset);
204 
205 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
206 	task->num_outstanding = iovcnt;
207 
208 	for (i = 0; i < iovcnt; i++) {
209 		res = spdk_copy_submit(__copy_task_from_malloc_task(task),
210 				       ch, dst, iov[i].iov_base,
211 				       iov[i].iov_len, malloc_done);
212 
213 		if (res != 0) {
214 			malloc_done(__copy_task_from_malloc_task(task), res);
215 		}
216 
217 		dst += iov[i].iov_len;
218 	}
219 }
220 
221 static int
222 bdev_malloc_unmap(struct malloc_disk *mdisk,
223 		  struct spdk_io_channel *ch,
224 		  struct malloc_task *task,
225 		  uint64_t offset,
226 		  uint64_t byte_count)
227 {
228 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
229 	task->num_outstanding = 1;
230 
231 	return spdk_copy_submit_fill(__copy_task_from_malloc_task(task), ch,
232 				     mdisk->malloc_buf + offset, 0, byte_count, malloc_done);
233 }
234 
235 static int64_t
236 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task,
237 		  uint64_t offset, uint64_t nbytes)
238 {
239 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
240 
241 	return 0;
242 }
243 
244 static int
245 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task)
246 {
247 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
248 
249 	return 0;
250 }
251 
252 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
253 {
254 	uint32_t block_size = bdev_io->bdev->blocklen;
255 
256 	switch (bdev_io->type) {
257 	case SPDK_BDEV_IO_TYPE_READ:
258 		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
259 			assert(bdev_io->u.bdev.iovcnt == 1);
260 			bdev_io->u.bdev.iovs[0].iov_base =
261 				((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
262 				bdev_io->u.bdev.offset_blocks * block_size;
263 			bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size;
264 			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx),
265 					      SPDK_BDEV_IO_STATUS_SUCCESS);
266 			return 0;
267 		}
268 
269 		bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt,
270 				  ch,
271 				  (struct malloc_task *)bdev_io->driver_ctx,
272 				  bdev_io->u.bdev.iovs,
273 				  bdev_io->u.bdev.iovcnt,
274 				  bdev_io->u.bdev.num_blocks * block_size,
275 				  bdev_io->u.bdev.offset_blocks * block_size);
276 		return 0;
277 
278 	case SPDK_BDEV_IO_TYPE_WRITE:
279 		bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
280 				   ch,
281 				   (struct malloc_task *)bdev_io->driver_ctx,
282 				   bdev_io->u.bdev.iovs,
283 				   bdev_io->u.bdev.iovcnt,
284 				   bdev_io->u.bdev.num_blocks * block_size,
285 				   bdev_io->u.bdev.offset_blocks * block_size);
286 		return 0;
287 
288 	case SPDK_BDEV_IO_TYPE_RESET:
289 		return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt,
290 					 (struct malloc_task *)bdev_io->driver_ctx);
291 
292 	case SPDK_BDEV_IO_TYPE_FLUSH:
293 		return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt,
294 					 (struct malloc_task *)bdev_io->driver_ctx,
295 					 bdev_io->u.bdev.offset_blocks * block_size,
296 					 bdev_io->u.bdev.num_blocks * block_size);
297 
298 	case SPDK_BDEV_IO_TYPE_UNMAP:
299 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
300 					 ch,
301 					 (struct malloc_task *)bdev_io->driver_ctx,
302 					 bdev_io->u.bdev.offset_blocks * block_size,
303 					 bdev_io->u.bdev.num_blocks * block_size);
304 
305 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
306 		/* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */
307 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
308 					 ch,
309 					 (struct malloc_task *)bdev_io->driver_ctx,
310 					 bdev_io->u.bdev.offset_blocks * block_size,
311 					 bdev_io->u.bdev.num_blocks * block_size);
312 
313 	case SPDK_BDEV_IO_TYPE_ZCOPY:
314 		if (bdev_io->u.bdev.zcopy.start) {
315 			void *buf;
316 			size_t len;
317 
318 			buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
319 			      bdev_io->u.bdev.offset_blocks * block_size;
320 			len = bdev_io->u.bdev.num_blocks * block_size;
321 			spdk_bdev_io_set_buf(bdev_io, buf, len);
322 
323 		}
324 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bdev_io->driver_ctx),
325 				      SPDK_BDEV_IO_STATUS_SUCCESS);
326 		return 0;
327 	default:
328 		return -1;
329 	}
330 	return 0;
331 }
332 
333 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
334 {
335 	if (_bdev_malloc_submit_request(ch, bdev_io) != 0) {
336 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
337 	}
338 }
339 
340 static bool
341 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
342 {
343 	switch (io_type) {
344 	case SPDK_BDEV_IO_TYPE_READ:
345 	case SPDK_BDEV_IO_TYPE_WRITE:
346 	case SPDK_BDEV_IO_TYPE_FLUSH:
347 	case SPDK_BDEV_IO_TYPE_RESET:
348 	case SPDK_BDEV_IO_TYPE_UNMAP:
349 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
350 	case SPDK_BDEV_IO_TYPE_ZCOPY:
351 		return true;
352 
353 	default:
354 		return false;
355 	}
356 }
357 
358 static struct spdk_io_channel *
359 bdev_malloc_get_io_channel(void *ctx)
360 {
361 	return spdk_copy_engine_get_io_channel();
362 }
363 
364 static void
365 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
366 {
367 	char uuid_str[SPDK_UUID_STRING_LEN];
368 
369 	spdk_json_write_object_begin(w);
370 
371 	spdk_json_write_named_string(w, "method", "bdev_malloc_create");
372 
373 	spdk_json_write_named_object_begin(w, "params");
374 	spdk_json_write_named_string(w, "name", bdev->name);
375 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
376 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
377 	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
378 	spdk_json_write_named_string(w, "uuid", uuid_str);
379 
380 	spdk_json_write_object_end(w);
381 
382 	spdk_json_write_object_end(w);
383 }
384 
385 static const struct spdk_bdev_fn_table malloc_fn_table = {
386 	.destruct		= bdev_malloc_destruct,
387 	.submit_request		= bdev_malloc_submit_request,
388 	.io_type_supported	= bdev_malloc_io_type_supported,
389 	.get_io_channel		= bdev_malloc_get_io_channel,
390 	.write_config_json	= bdev_malloc_write_json_config,
391 };
392 
393 int
394 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid,
395 		   uint64_t num_blocks, uint32_t block_size)
396 {
397 	struct malloc_disk	*mdisk;
398 	int rc;
399 
400 	if (num_blocks == 0) {
401 		SPDK_ERRLOG("Disk num_blocks must be greater than 0");
402 		return -EINVAL;
403 	}
404 
405 	mdisk = calloc(1, sizeof(*mdisk));
406 	if (!mdisk) {
407 		SPDK_ERRLOG("mdisk calloc() failed\n");
408 		return -ENOMEM;
409 	}
410 
411 	/*
412 	 * Allocate the large backend memory buffer from pinned memory.
413 	 *
414 	 * TODO: need to pass a hint so we know which socket to allocate
415 	 *  from on multi-socket systems.
416 	 */
417 	mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL,
418 					 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
419 	if (!mdisk->malloc_buf) {
420 		SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n");
421 		malloc_disk_free(mdisk);
422 		return -ENOMEM;
423 	}
424 
425 	if (name) {
426 		mdisk->disk.name = strdup(name);
427 	} else {
428 		/* Auto-generate a name */
429 		mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count);
430 		malloc_disk_count++;
431 	}
432 	if (!mdisk->disk.name) {
433 		malloc_disk_free(mdisk);
434 		return -ENOMEM;
435 	}
436 	mdisk->disk.product_name = "Malloc disk";
437 
438 	mdisk->disk.write_cache = 1;
439 	mdisk->disk.blocklen = block_size;
440 	mdisk->disk.blockcnt = num_blocks;
441 	if (uuid) {
442 		mdisk->disk.uuid = *uuid;
443 	} else {
444 		spdk_uuid_generate(&mdisk->disk.uuid);
445 	}
446 
447 	mdisk->disk.ctxt = mdisk;
448 	mdisk->disk.fn_table = &malloc_fn_table;
449 	mdisk->disk.module = &malloc_if;
450 
451 	rc = spdk_bdev_register(&mdisk->disk);
452 	if (rc) {
453 		malloc_disk_free(mdisk);
454 		return rc;
455 	}
456 
457 	*bdev = &(mdisk->disk);
458 
459 	TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link);
460 
461 	return rc;
462 }
463 
464 void
465 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg)
466 {
467 	if (!bdev || bdev->module != &malloc_if) {
468 		cb_fn(cb_arg, -ENODEV);
469 		return;
470 	}
471 
472 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
473 }
474 
475 static int bdev_malloc_initialize(void)
476 {
477 	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
478 	int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0;
479 	uint64_t size;
480 	struct spdk_bdev *bdev;
481 
482 	if (sp != NULL) {
483 		NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
484 		LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
485 		BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
486 		if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
487 			SPDK_ERRLOG("Malloc section present, but no devices specified\n");
488 			goto end;
489 		}
490 		if (BlockSize < 1) {
491 			/* Default is 512 bytes */
492 			BlockSize = 512;
493 		}
494 		size = (uint64_t)LunSizeInMB * 1024 * 1024;
495 		for (i = 0; i < NumberOfLuns; i++) {
496 			rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize);
497 			if (rc) {
498 				SPDK_ERRLOG("Could not create malloc disk\n");
499 				goto end;
500 			}
501 		}
502 	}
503 
504 end:
505 	return rc;
506 }
507 
508 static void
509 bdev_malloc_get_spdk_running_config(FILE *fp)
510 {
511 	int num_malloc_luns = 0;
512 	uint64_t malloc_lun_size = 0;
513 	struct malloc_disk *mdisk;
514 
515 	/* count number of malloc LUNs, get LUN size */
516 	TAILQ_FOREACH(mdisk, &g_malloc_disks, link) {
517 		if (0 == malloc_lun_size) {
518 			/* assume all malloc luns the same size */
519 			malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
520 			malloc_lun_size /= (1024 * 1024);
521 		}
522 		num_malloc_luns++;
523 	}
524 
525 	if (num_malloc_luns > 0) {
526 		fprintf(fp,
527 			"\n"
528 			"# Users may change this section to create a different number or size of\n"
529 			"# malloc LUNs.\n"
530 			"# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n"
531 			"# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
532 			"# Not all LUNs defined here are necessarily used below.\n"
533 			"[Malloc]\n"
534 			"  NumberOfLuns %d\n"
535 			"  LunSizeInMB %" PRIu64 "\n",
536 			num_malloc_luns, malloc_lun_size,
537 			num_malloc_luns - 1, num_malloc_luns,
538 			malloc_lun_size);
539 	}
540 }
541 
542 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC)
543