xref: /spdk/module/bdev/malloc/bdev_malloc.c (revision 0ed85362c8132a2d1927757fbcade66b6660d26a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "bdev_malloc.h"
37 #include "spdk/bdev.h"
38 #include "spdk/conf.h"
39 #include "spdk/endian.h"
40 #include "spdk/env.h"
41 #include "spdk/accel_engine.h"
42 #include "spdk/json.h"
43 #include "spdk/thread.h"
44 #include "spdk/queue.h"
45 #include "spdk/string.h"
46 
47 #include "spdk/bdev_module.h"
48 #include "spdk_internal/log.h"
49 
50 struct malloc_disk {
51 	struct spdk_bdev		disk;
52 	void				*malloc_buf;
53 	TAILQ_ENTRY(malloc_disk)	link;
54 };
55 
56 struct malloc_task {
57 	int				num_outstanding;
58 	enum spdk_bdev_io_status	status;
59 };
60 
61 static void
62 malloc_done(void *ref, int status)
63 {
64 	struct malloc_task *task = (struct malloc_task *)ref;
65 
66 	if (status != 0) {
67 		if (status == -ENOMEM) {
68 			task->status = SPDK_BDEV_IO_STATUS_NOMEM;
69 		} else {
70 			task->status = SPDK_BDEV_IO_STATUS_FAILED;
71 		}
72 	}
73 
74 	if (--task->num_outstanding == 0) {
75 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
76 	}
77 }
78 
79 static TAILQ_HEAD(, malloc_disk) g_malloc_disks = TAILQ_HEAD_INITIALIZER(g_malloc_disks);
80 
81 int malloc_disk_count = 0;
82 
83 static int bdev_malloc_initialize(void);
84 static void bdev_malloc_get_spdk_running_config(FILE *fp);
85 
86 static int
87 bdev_malloc_get_ctx_size(void)
88 {
89 	return sizeof(struct malloc_task);
90 }
91 
92 static struct spdk_bdev_module malloc_if = {
93 	.name = "malloc",
94 	.module_init = bdev_malloc_initialize,
95 	.config_text = bdev_malloc_get_spdk_running_config,
96 	.get_ctx_size = bdev_malloc_get_ctx_size,
97 
98 };
99 
100 SPDK_BDEV_MODULE_REGISTER(malloc, &malloc_if)
101 
102 static void
103 malloc_disk_free(struct malloc_disk *malloc_disk)
104 {
105 	if (!malloc_disk) {
106 		return;
107 	}
108 
109 	free(malloc_disk->disk.name);
110 	spdk_free(malloc_disk->malloc_buf);
111 	free(malloc_disk);
112 }
113 
114 static int
115 bdev_malloc_destruct(void *ctx)
116 {
117 	struct malloc_disk *malloc_disk = ctx;
118 
119 	TAILQ_REMOVE(&g_malloc_disks, malloc_disk, link);
120 	malloc_disk_free(malloc_disk);
121 	return 0;
122 }
123 
124 static int
125 bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
126 {
127 	int i;
128 
129 	for (i = 0; i < iovcnt; i++) {
130 		if (nbytes < iovs[i].iov_len) {
131 			return 0;
132 		}
133 
134 		nbytes -= iovs[i].iov_len;
135 	}
136 
137 	return nbytes != 0;
138 }
139 
140 static void
141 bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
142 		  struct malloc_task *task,
143 		  struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
144 {
145 	int64_t res = 0;
146 	void *src = mdisk->malloc_buf + offset;
147 	int i;
148 
149 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
150 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
151 				      SPDK_BDEV_IO_STATUS_FAILED);
152 		return;
153 	}
154 
155 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "read %lu bytes from offset %#lx\n",
156 		      len, offset);
157 
158 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
159 	task->num_outstanding = iovcnt;
160 
161 	for (i = 0; i < iovcnt; i++) {
162 		res = spdk_accel_submit_copy(ch, iov[i].iov_base,
163 					     src, iov[i].iov_len, malloc_done, task);
164 
165 		if (res != 0) {
166 			malloc_done(task, res);
167 		}
168 
169 		src += iov[i].iov_len;
170 		len -= iov[i].iov_len;
171 	}
172 }
173 
174 static void
175 bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
176 		   struct malloc_task *task,
177 		   struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
178 {
179 	int64_t res = 0;
180 	void *dst = mdisk->malloc_buf + offset;
181 	int i;
182 
183 	if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
184 		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task),
185 				      SPDK_BDEV_IO_STATUS_FAILED);
186 		return;
187 	}
188 
189 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_MALLOC, "wrote %lu bytes to offset %#lx\n",
190 		      len, offset);
191 
192 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
193 	task->num_outstanding = iovcnt;
194 
195 	for (i = 0; i < iovcnt; i++) {
196 		res = spdk_accel_submit_copy(ch, dst, iov[i].iov_base,
197 					     iov[i].iov_len, malloc_done, task);
198 
199 		if (res != 0) {
200 			malloc_done(task, res);
201 		}
202 
203 		dst += iov[i].iov_len;
204 	}
205 }
206 
207 static int
208 bdev_malloc_unmap(struct malloc_disk *mdisk,
209 		  struct spdk_io_channel *ch,
210 		  struct malloc_task *task,
211 		  uint64_t offset,
212 		  uint64_t byte_count)
213 {
214 	task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
215 	task->num_outstanding = 1;
216 
217 	return spdk_accel_submit_fill(ch, mdisk->malloc_buf + offset, 0,
218 				      byte_count, malloc_done, task);
219 }
220 
221 static int64_t
222 bdev_malloc_flush(struct malloc_disk *mdisk, struct malloc_task *task,
223 		  uint64_t offset, uint64_t nbytes)
224 {
225 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
226 
227 	return 0;
228 }
229 
230 static int
231 bdev_malloc_reset(struct malloc_disk *mdisk, struct malloc_task *task)
232 {
233 	spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), SPDK_BDEV_IO_STATUS_SUCCESS);
234 
235 	return 0;
236 }
237 
238 static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
239 {
240 	uint32_t block_size = bdev_io->bdev->blocklen;
241 
242 	switch (bdev_io->type) {
243 	case SPDK_BDEV_IO_TYPE_READ:
244 		if (bdev_io->u.bdev.iovs[0].iov_base == NULL) {
245 			assert(bdev_io->u.bdev.iovcnt == 1);
246 			bdev_io->u.bdev.iovs[0].iov_base =
247 				((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
248 				bdev_io->u.bdev.offset_blocks * block_size;
249 			bdev_io->u.bdev.iovs[0].iov_len = bdev_io->u.bdev.num_blocks * block_size;
250 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
251 			return 0;
252 		}
253 
254 		bdev_malloc_readv((struct malloc_disk *)bdev_io->bdev->ctxt,
255 				  ch,
256 				  (struct malloc_task *)bdev_io->driver_ctx,
257 				  bdev_io->u.bdev.iovs,
258 				  bdev_io->u.bdev.iovcnt,
259 				  bdev_io->u.bdev.num_blocks * block_size,
260 				  bdev_io->u.bdev.offset_blocks * block_size);
261 		return 0;
262 
263 	case SPDK_BDEV_IO_TYPE_WRITE:
264 		bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
265 				   ch,
266 				   (struct malloc_task *)bdev_io->driver_ctx,
267 				   bdev_io->u.bdev.iovs,
268 				   bdev_io->u.bdev.iovcnt,
269 				   bdev_io->u.bdev.num_blocks * block_size,
270 				   bdev_io->u.bdev.offset_blocks * block_size);
271 		return 0;
272 
273 	case SPDK_BDEV_IO_TYPE_RESET:
274 		return bdev_malloc_reset((struct malloc_disk *)bdev_io->bdev->ctxt,
275 					 (struct malloc_task *)bdev_io->driver_ctx);
276 
277 	case SPDK_BDEV_IO_TYPE_FLUSH:
278 		return bdev_malloc_flush((struct malloc_disk *)bdev_io->bdev->ctxt,
279 					 (struct malloc_task *)bdev_io->driver_ctx,
280 					 bdev_io->u.bdev.offset_blocks * block_size,
281 					 bdev_io->u.bdev.num_blocks * block_size);
282 
283 	case SPDK_BDEV_IO_TYPE_UNMAP:
284 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
285 					 ch,
286 					 (struct malloc_task *)bdev_io->driver_ctx,
287 					 bdev_io->u.bdev.offset_blocks * block_size,
288 					 bdev_io->u.bdev.num_blocks * block_size);
289 
290 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
291 		/* bdev_malloc_unmap is implemented with a call to mem_cpy_fill which zeroes out all of the requested bytes. */
292 		return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt,
293 					 ch,
294 					 (struct malloc_task *)bdev_io->driver_ctx,
295 					 bdev_io->u.bdev.offset_blocks * block_size,
296 					 bdev_io->u.bdev.num_blocks * block_size);
297 
298 	case SPDK_BDEV_IO_TYPE_ZCOPY:
299 		if (bdev_io->u.bdev.zcopy.start) {
300 			void *buf;
301 			size_t len;
302 
303 			buf = ((struct malloc_disk *)bdev_io->bdev->ctxt)->malloc_buf +
304 			      bdev_io->u.bdev.offset_blocks * block_size;
305 			len = bdev_io->u.bdev.num_blocks * block_size;
306 			spdk_bdev_io_set_buf(bdev_io, buf, len);
307 
308 		}
309 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
310 		return 0;
311 	case SPDK_BDEV_IO_TYPE_ABORT:
312 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
313 		return 0;
314 	default:
315 		return -1;
316 	}
317 	return 0;
318 }
319 
320 static void bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
321 {
322 	if (_bdev_malloc_submit_request(ch, bdev_io) != 0) {
323 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
324 	}
325 }
326 
327 static bool
328 bdev_malloc_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
329 {
330 	switch (io_type) {
331 	case SPDK_BDEV_IO_TYPE_READ:
332 	case SPDK_BDEV_IO_TYPE_WRITE:
333 	case SPDK_BDEV_IO_TYPE_FLUSH:
334 	case SPDK_BDEV_IO_TYPE_RESET:
335 	case SPDK_BDEV_IO_TYPE_UNMAP:
336 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
337 	case SPDK_BDEV_IO_TYPE_ZCOPY:
338 	case SPDK_BDEV_IO_TYPE_ABORT:
339 		return true;
340 
341 	default:
342 		return false;
343 	}
344 }
345 
346 static struct spdk_io_channel *
347 bdev_malloc_get_io_channel(void *ctx)
348 {
349 	return spdk_accel_engine_get_io_channel();
350 }
351 
352 static void
353 bdev_malloc_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
354 {
355 	char uuid_str[SPDK_UUID_STRING_LEN];
356 
357 	spdk_json_write_object_begin(w);
358 
359 	spdk_json_write_named_string(w, "method", "bdev_malloc_create");
360 
361 	spdk_json_write_named_object_begin(w, "params");
362 	spdk_json_write_named_string(w, "name", bdev->name);
363 	spdk_json_write_named_uint64(w, "num_blocks", bdev->blockcnt);
364 	spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
365 	spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
366 	spdk_json_write_named_string(w, "uuid", uuid_str);
367 
368 	spdk_json_write_object_end(w);
369 
370 	spdk_json_write_object_end(w);
371 }
372 
373 static const struct spdk_bdev_fn_table malloc_fn_table = {
374 	.destruct		= bdev_malloc_destruct,
375 	.submit_request		= bdev_malloc_submit_request,
376 	.io_type_supported	= bdev_malloc_io_type_supported,
377 	.get_io_channel		= bdev_malloc_get_io_channel,
378 	.write_config_json	= bdev_malloc_write_json_config,
379 };
380 
381 int
382 create_malloc_disk(struct spdk_bdev **bdev, const char *name, const struct spdk_uuid *uuid,
383 		   uint64_t num_blocks, uint32_t block_size)
384 {
385 	struct malloc_disk	*mdisk;
386 	int rc;
387 
388 	if (num_blocks == 0) {
389 		SPDK_ERRLOG("Disk num_blocks must be greater than 0");
390 		return -EINVAL;
391 	}
392 
393 	mdisk = calloc(1, sizeof(*mdisk));
394 	if (!mdisk) {
395 		SPDK_ERRLOG("mdisk calloc() failed\n");
396 		return -ENOMEM;
397 	}
398 
399 	/*
400 	 * Allocate the large backend memory buffer from pinned memory.
401 	 *
402 	 * TODO: need to pass a hint so we know which socket to allocate
403 	 *  from on multi-socket systems.
404 	 */
405 	mdisk->malloc_buf = spdk_zmalloc(num_blocks * block_size, 2 * 1024 * 1024, NULL,
406 					 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
407 	if (!mdisk->malloc_buf) {
408 		SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n");
409 		malloc_disk_free(mdisk);
410 		return -ENOMEM;
411 	}
412 
413 	if (name) {
414 		mdisk->disk.name = strdup(name);
415 	} else {
416 		/* Auto-generate a name */
417 		mdisk->disk.name = spdk_sprintf_alloc("Malloc%d", malloc_disk_count);
418 		malloc_disk_count++;
419 	}
420 	if (!mdisk->disk.name) {
421 		malloc_disk_free(mdisk);
422 		return -ENOMEM;
423 	}
424 	mdisk->disk.product_name = "Malloc disk";
425 
426 	mdisk->disk.write_cache = 1;
427 	mdisk->disk.blocklen = block_size;
428 	mdisk->disk.blockcnt = num_blocks;
429 	if (uuid) {
430 		mdisk->disk.uuid = *uuid;
431 	} else {
432 		spdk_uuid_generate(&mdisk->disk.uuid);
433 	}
434 
435 	mdisk->disk.ctxt = mdisk;
436 	mdisk->disk.fn_table = &malloc_fn_table;
437 	mdisk->disk.module = &malloc_if;
438 
439 	rc = spdk_bdev_register(&mdisk->disk);
440 	if (rc) {
441 		malloc_disk_free(mdisk);
442 		return rc;
443 	}
444 
445 	*bdev = &(mdisk->disk);
446 
447 	TAILQ_INSERT_TAIL(&g_malloc_disks, mdisk, link);
448 
449 	return rc;
450 }
451 
452 void
453 delete_malloc_disk(struct spdk_bdev *bdev, spdk_delete_malloc_complete cb_fn, void *cb_arg)
454 {
455 	if (!bdev || bdev->module != &malloc_if) {
456 		cb_fn(cb_arg, -ENODEV);
457 		return;
458 	}
459 
460 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
461 }
462 
463 static int bdev_malloc_initialize(void)
464 {
465 	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Malloc");
466 	int NumberOfLuns, LunSizeInMB, BlockSize, i, rc = 0;
467 	uint64_t size;
468 	struct spdk_bdev *bdev;
469 
470 	malloc_disk_count = 0;
471 
472 	if (sp != NULL) {
473 		NumberOfLuns = spdk_conf_section_get_intval(sp, "NumberOfLuns");
474 		LunSizeInMB = spdk_conf_section_get_intval(sp, "LunSizeInMB");
475 		BlockSize = spdk_conf_section_get_intval(sp, "BlockSize");
476 		if ((NumberOfLuns < 1) || (LunSizeInMB < 1)) {
477 			SPDK_ERRLOG("Malloc section present, but no devices specified\n");
478 			goto end;
479 		}
480 		if (BlockSize < 1) {
481 			/* Default is 512 bytes */
482 			BlockSize = 512;
483 		}
484 		size = (uint64_t)LunSizeInMB * 1024 * 1024;
485 		for (i = 0; i < NumberOfLuns; i++) {
486 			rc = create_malloc_disk(&bdev, NULL, NULL, size / BlockSize, BlockSize);
487 			if (rc) {
488 				SPDK_ERRLOG("Could not create malloc disk\n");
489 				goto end;
490 			}
491 		}
492 	}
493 
494 end:
495 	return rc;
496 }
497 
498 static void
499 bdev_malloc_get_spdk_running_config(FILE *fp)
500 {
501 	int num_malloc_luns = 0;
502 	uint64_t malloc_lun_size = 0;
503 	struct malloc_disk *mdisk;
504 
505 	/* count number of malloc LUNs, get LUN size */
506 	TAILQ_FOREACH(mdisk, &g_malloc_disks, link) {
507 		if (0 == malloc_lun_size) {
508 			/* assume all malloc luns the same size */
509 			malloc_lun_size = mdisk->disk.blocklen * mdisk->disk.blockcnt;
510 			malloc_lun_size /= (1024 * 1024);
511 		}
512 		num_malloc_luns++;
513 	}
514 
515 	if (num_malloc_luns > 0) {
516 		fprintf(fp,
517 			"\n"
518 			"# Users may change this section to create a different number or size of\n"
519 			"# malloc LUNs.\n"
520 			"# This will generate %d LUNs with a malloc-allocated backend. Each LUN\n"
521 			"# will be %" PRIu64 "MB in size and these will be named Malloc0 through Malloc%d.\n"
522 			"# Not all LUNs defined here are necessarily used below.\n"
523 			"[Malloc]\n"
524 			"  NumberOfLuns %d\n"
525 			"  LunSizeInMB %" PRIu64 "\n",
526 			num_malloc_luns, malloc_lun_size,
527 			num_malloc_luns - 1, num_malloc_luns,
528 			malloc_lun_size);
529 	}
530 }
531 
532 SPDK_LOG_REGISTER_COMPONENT("bdev_malloc", SPDK_LOG_BDEV_MALLOC)
533