xref: /spdk/module/bdev/malloc/bdev_malloc.c (revision 03e3fc4f5835983a4e6602b4e770922e798ce263)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "bdev_malloc.h"
37 #include "spdk/bdev.h"
38 #include "spdk/conf.h"
39 #include "spdk/endian.h"
40 #include "spdk/env.h"
41 #include "spdk/accel_engine.h"
42 #include "spdk/json.h"
43 #include "spdk/thread.h"
44 #include "spdk/queue.h"
45 #include "spdk/string.h"
46 
47 #include "spdk/bdev_module.h"
48 #include "spdk_internal/log.h"
49 
50 struct malloc_disk {
51 	struct spdk_bdev		disk;
52 	void				*malloc_buf;
53 	TAILQ_ENTRY(malloc_disk)	link;
54 };
55 
56 struct malloc_task {
57 	int				num_outstanding;
58 	enum spdk_bdev_io_status	status;
59 };
60 
61 static struct malloc_task *
62 __malloc_task_from_accel_task(struct spdk_accel_task *ct)
63 {
64 	return (struct malloc_task *)((uintptr_t)ct - sizeof(struct malloc_task));
65 }
66 
67 static struct spdk_accel_task *
68 __accel_task_from_malloc_task(struct malloc_task *mt)
69 {
70 	return (struct spdk_accel_task *)((uintptr_t)mt + sizeof(struct malloc_task));
71 }
72 
73 static void
74 malloc_done(void *ref, int status)
75 {
76 	struct malloc_task *task = __malloc_task_from_accel_task(ref);
77 
78 	if (status != 0) {
79 		if (status == -ENOMEM) {
80 			task->status = SPDK_BDEV_IO_STATUS_NOMEM;
81 		} else {
82 			task->status = SPDK_BDEV_IO_STATUS_FAILED;
83 		}
84 	}
85 
86 	if (--task->num_outstanding == 0) {
87 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
88 	}
89 }
90 
91 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks);
92 
93 int malloc_disk_count = 0;
94 
95 static int bdev_malloc_initialize(void);
96 static void bdev_malloc_get_spdk_running_config(FILE *fp);
97 
98 static int
99 bdev_malloc_get_ctx_size(void)
100 {
101 	return sizeof(struct malloc_task) + spdk_accel_task_size();
102 }
103 
104 static struct spdk_bdev_module malloc_if = {
105 	.name = "malloc",
106 	.module_init = bdev_malloc_initialize,
107 	.config_text = bdev_malloc_get_spdk_running_config,
108 	.get_ctx_size = bdev_malloc_get_ctx_size,
109 
110 };
111 
112 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if)
113 
114 static void
115 malloc_disk_free(struct malloc_disk *malloc_disk)
116 {
117 	if (!malloc_disk) {
118 		return;
119 	}
120 
121 	free(malloc_disk->disk.name);
122 	spdk_free(malloc_disk->malloc_buf);
123 	free(malloc_disk);
124 }
125 
126 static int
127 bdev_malloc_destruct(void *ctx)
128 {
129 	struct malloc_disk *malloc_disk = ctx;
130 
131 	TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link);
132 	malloc_disk_free(malloc_disk);
133 	return 0;
134 }
135 
136 static int
137 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
138 {
139 	int i;
140 
141 	for (i = 0; i < iovcnt; i++) {
142 		if (nbytes < iovs[i].iov_len) {
143 			return 0;
144 		}
145 
146 		nbytes -= iovs[i].iov_len;
147 	}
148 
149 	return nbytes != 0;
150 }
151 
152 static void
153 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
154 		  struct malloc_task *task,
155 		  struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
156 {
157 	int64_t res = 0;
158 	void *src = mdisk->malloc_buf + offset;
159 	int i;
160 
161 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
162 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
163 				      SPDK_BDEV_IO_STATUS_FAILED);
164 		return;
165 	}
166 
167 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n",
168 		      len, offset);
169 
170 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
171 	task->num_outstanding = iovcnt;
172 
173 	for (i = 0; i < iovcnt; i++) {
174 		res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task),
175 					     ch, iov[i].iov_base,
176 					     src, iov[i].iov_len, malloc_done);
177 
178 		if (res != 0) {
179 			malloc_done(__accel_task_from_malloc_task(task), res);
180 		}
181 
182 		src += iov[i].iov_len;
183 		len -= iov[i].iov_len;
184 	}
185 }
186 
187 static void
188 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
189 		   struct malloc_task *task,
190 		   struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
191 {
192 	int64_t res = 0;
193 	void *dst = mdisk->malloc_buf + offset;
194 	int i;
195 
196 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
197 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
198 				      SPDK_BDEV_IO_STATUS_FAILED);
199 		return;
200 	}
201 
202 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n",
203 		      len, offset);
204 
205 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
206 	task->num_outstanding = iovcnt;
207 
208 	for (i = 0; i < iovcnt; i++) {
209 		res = spdk_accel_submit_copy(__accel_task_from_malloc_task(task),
210 					     ch, dst, iov[i].iov_base,
211 					     iov[i].iov_len, malloc_done);
212 
213 		if (res != 0) {
214 			malloc_done(__accel_task_from_malloc_task(task), res);
215 		}
216 
217 		dst += iov[i].iov_len;
218 	}
219 }
220 
221 static int
222 bdev_malloc_unmap(struct malloc_disk *mdisk,
223 		  struct spdk_io_channel *ch,
224 		  struct malloc_task *task,
225 		  uint64_t offset,
226 		  uint64_t byte_count)
227 {
228 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
229 	task->num_outstanding = 1;
230 
231 	return spdk_accel_submit_fill(__accel_task_from_malloc_task(task), ch,
232 				      mdisk->malloc_buf + offset, 0, byte_count, malloc_done);
233 }
234 
235 static int64_t
236 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task,
237 		  uint64_t offset, uint64_t nbytes)
238 {
239 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
240 
241 	return 0;
242 }
243 
244 static int
245 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task)
246 {
247 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
248 
249 	return 0;
250 }
251 
252 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
253 {
254 	uint32_t block_size = bdev_io->bdev->blocklen;
255 
256 	switch (bdev_io->type) {
257 	case SPDK_BDEV_IO_TYPE_READ:
258 		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
259 			assert(bdev_io->u.bdev.iovcnt == 1);
260 			bdev_io->u.bdev.iovs[0].iov_base =
261 				((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
262 				bdev_io->u.bdev.offset_blocks * block_size;
263 			bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size;
264 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
265 			return 0;
266 		}
267 
268 		bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt,
269 				  ch,
270 				  (struct malloc_task *)bdev_io->driver_ctx,
271 				  bdev_io->u.bdev.iovs,
272 				  bdev_io->u.bdev.iovcnt,
273 				  bdev_io->u.bdev.num_blocks * block_size,
274 				  bdev_io->u.bdev.offset_blocks * block_size);
275 		return 0;
276 
277 	case SPDK_BDEV_IO_TYPE_WRITE:
278 		bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
279 				   ch,
280 				   (struct malloc_task *)bdev_io->driver_ctx,
281 				   bdev_io->u.bdev.iovs,
282 				   bdev_io->u.bdev.iovcnt,
283 				   bdev_io->u.bdev.num_blocks * block_size,
284 				   bdev_io->u.bdev.offset_blocks * block_size);
285 		return 0;
286 
287 	case SPDK_BDEV_IO_TYPE_RESET:
288 		return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt,
289 					 (struct malloc_task *)bdev_io->driver_ctx);
290 
291 	case SPDK_BDEV_IO_TYPE_FLUSH:
292 		return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt,
293 					 (struct malloc_task *)bdev_io->driver_ctx,
294 					 bdev_io->u.bdev.offset_blocks * block_size,
295 					 bdev_io->u.bdev.num_blocks * block_size);
296 
297 	case SPDK_BDEV_IO_TYPE_UNMAP:
298 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
299 					 ch,
300 					 (struct malloc_task *)bdev_io->driver_ctx,
301 					 bdev_io->u.bdev.offset_blocks * block_size,
302 					 bdev_io->u.bdev.num_blocks * block_size);
303 
304 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
305 		/* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */
306 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
307 					 ch,
308 					 (struct malloc_task *)bdev_io->driver_ctx,
309 					 bdev_io->u.bdev.offset_blocks * block_size,
310 					 bdev_io->u.bdev.num_blocks * block_size);
311 
312 	case SPDK_BDEV_IO_TYPE_ZCOPY:
313 		if (bdev_io->u.bdev.zcopy.start) {
314 			void *buf;
315 			size_t len;
316 
317 			buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
318 			      bdev_io->u.bdev.offset_blocks * block_size;
319 			len = bdev_io->u.bdev.num_blocks * block_size;
320 			spdk_bdev_io_set_buf(bdev_io, buf, len);
321 
322 		}
323 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
324 		return 0;
325 	case SPDK_BDEV_IO_TYPE_ABORT:
326 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
327 		return 0;
328 	default:
329 		return -1;
330 	}
331 	return 0;
332 }
333 
334 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
335 {
336 	if (_bdev_malloc_submit_request(ch, bdev_io) != 0) {
337 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
338 	}
339 }
340 
341 static bool
342 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
343 {
344 	switch (io_type) {
345 	case SPDK_BDEV_IO_TYPE_READ:
346 	case SPDK_BDEV_IO_TYPE_WRITE:
347 	case SPDK_BDEV_IO_TYPE_FLUSH:
348 	case SPDK_BDEV_IO_TYPE_RESET:
349 	case SPDK_BDEV_IO_TYPE_UNMAP:
350 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
351 	case SPDK_BDEV_IO_TYPE_ZCOPY:
352 	case SPDK_BDEV_IO_TYPE_ABORT:
353 		return true;
354 
355 	default:
356 		return false;
357 	}
358 }
359 
360 static struct spdk_io_channel *
361 bdev_malloc_get_io_channel(void *ctx)
362 {
363 	return spdk_accel_engine_get_io_channel();
364 }
365 
366 static void
367 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
368 {
369 	char uuid_str[SPDK_UUID_STRING_LEN];
370 
371 	spdk_json_write_object_begin(w);
372 
373 	spdk_json_write_named_string(w, "method", "bdev_malloc_create");
374 
375 	spdk_json_write_named_object_begin(w, "params");
376 	spdk_json_write_named_string(w, "name", bdev->name);
377 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
378 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
379 	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
380 	spdk_json_write_named_string(w, "uuid", uuid_str);
381 
382 	spdk_json_write_object_end(w);
383 
384 	spdk_json_write_object_end(w);
385 }
386 
387 static const struct spdk_bdev_fn_table malloc_fn_table = {
388 	.destruct		= bdev_malloc_destruct,
389 	.submit_request		= bdev_malloc_submit_request,
390 	.io_type_supported	= bdev_malloc_io_type_supported,
391 	.get_io_channel		= bdev_malloc_get_io_channel,
392 	.write_config_json	= bdev_malloc_write_json_config,
393 };
394 
395 int
396 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid,
397 		   uint64_t num_blocks, uint32_t block_size)
398 {
399 	struct malloc_disk	*mdisk;
400 	int rc;
401 
402 	if (num_blocks == 0) {
403 		SPDK_ERRLOG("Disk num_blocks must be greater than 0");
404 		return -EINVAL;
405 	}
406 
407 	mdisk = calloc(1, sizeof(*mdisk));
408 	if (!mdisk) {
409 		SPDK_ERRLOG("mdisk calloc() failed\n");
410 		return -ENOMEM;
411 	}
412 
413 	/*
414 	 * Allocate the large backend memory buffer from pinned memory.
415 	 *
416 	 * TODO: need to pass a hint so we know which socket to allocate
417 	 *  from on multi-socket systems.
418 	 */
419 	mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL,
420 					 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
421 	if (!mdisk->malloc_buf) {
422 		SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n");
423 		malloc_disk_free(mdisk);
424 		return -ENOMEM;
425 	}
426 
427 	if (name) {
428 		mdisk->disk.name = strdup(name);
429 	} else {
430 		/* Auto-generate a name */
431 		mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count);
432 		malloc_disk_count++;
433 	}
434 	if (!mdisk->disk.name) {
435 		malloc_disk_free(mdisk);
436 		return -ENOMEM;
437 	}
438 	mdisk->disk.product_name = "Malloc disk";
439 
440 	mdisk->disk.write_cache = 1;
441 	mdisk->disk.blocklen = block_size;
442 	mdisk->disk.blockcnt = num_blocks;
443 	if (uuid) {
444 		mdisk->disk.uuid = *uuid;
445 	} else {
446 		spdk_uuid_generate(&mdisk->disk.uuid);
447 	}
448 
449 	mdisk->disk.ctxt = mdisk;
450 	mdisk->disk.fn_table = &malloc_fn_table;
451 	mdisk->disk.module = &malloc_if;
452 
453 	rc = spdk_bdev_register(&mdisk->disk);
454 	if (rc) {
455 		malloc_disk_free(mdisk);
456 		return rc;
457 	}
458 
459 	*bdev = &(mdisk->disk);
460 
461 	TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link);
462 
463 	return rc;
464 }
465 
466 void
467 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg)
468 {
469 	if (!bdev || bdev->module != &malloc_if) {
470 		cb_fn(cb_arg, -ENODEV);
471 		return;
472 	}
473 
474 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
475 }
476 
477 static int bdev_malloc_initialize(void)
478 {
479 	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
480 	int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0;
481 	uint64_t size;
482 	struct spdk_bdev *bdev;
483 
484 	if (sp != NULL) {
485 		NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
486 		LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
487 		BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
488 		if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
489 			SPDK_ERRLOG("Malloc section present, but no devices specified\n");
490 			goto end;
491 		}
492 		if (BlockSize < 1) {
493 			/* Default is 512 bytes */
494 			BlockSize = 512;
495 		}
496 		size = (uint64_t)LunSizeInMB * 1024 * 1024;
497 		for (i = 0; i < NumberOfLuns; i++) {
498 			rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize);
499 			if (rc) {
500 				SPDK_ERRLOG("Could not create malloc disk\n");
501 				goto end;
502 			}
503 		}
504 	}
505 
506 end:
507 	return rc;
508 }
509 
510 static void
511 bdev_malloc_get_spdk_running_config(FILE *fp)
512 {
513 	int num_malloc_luns = 0;
514 	uint64_t malloc_lun_size = 0;
515 	struct malloc_disk *mdisk;
516 
517 	/* count number of malloc LUNs, get LUN size */
518 	TAILQ_FOREACH(mdisk, &g_malloc_disks, link) {
519 		if (0 == malloc_lun_size) {
520 			/* assume all malloc luns the same size */
521 			malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
522 			malloc_lun_size /= (1024 * 1024);
523 		}
524 		num_malloc_luns++;
525 	}
526 
527 	if (num_malloc_luns > 0) {
528 		fprintf(fp,
529 			"\n"
530 			"# Users may change this section to create a different number or size of\n"
531 			"# malloc LUNs.\n"
532 			"# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n"
533 			"# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
534 			"# Not all LUNs defined here are necessarily used below.\n"
535 			"[Malloc]\n"
536 			"  NumberOfLuns %d\n"
537 			"  LunSizeInMB %" PRIu64 "\n",
538 			num_malloc_luns, malloc_lun_size,
539 			num_malloc_luns - 1, num_malloc_luns,
540 			malloc_lun_size);
541 	}
542 }
543 
544 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC)
545