xref: /spdk/test/dma/test_dma/test_dma.c (revision c6c1234de9e0015e670dd0b51bf6ce39ee0e07bd)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2021, 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/stdinc.h"
6 
7 #include "spdk/dma.h"
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/event.h"
11 #include "spdk/likely.h"
12 #include "spdk/string.h"
13 #include "spdk/util.h"
14 
15 #include <infiniband/verbs.h>
16 
17 struct dma_test_task;
18 
19 struct dma_test_req {
20 	struct iovec iov;
21 	struct spdk_bdev_ext_io_opts io_opts;
22 	uint64_t submit_tsc;
23 	struct ibv_mr *mr;
24 	struct dma_test_task *task;
25 };
26 
27 struct dma_test_task_stats {
28 	uint64_t io_completed;
29 	uint64_t total_tsc;
30 	uint64_t min_tsc;
31 	uint64_t max_tsc;
32 };
33 
34 struct dma_test_task {
35 	struct spdk_bdev_desc *desc;
36 	struct spdk_io_channel *channel;
37 	uint64_t cur_io_offset;
38 	uint64_t max_offset_in_ios;
39 	uint64_t num_blocks_per_io;
40 	int rw_percentage;
41 	uint32_t seed;
42 	uint32_t io_inflight;
43 	struct dma_test_task_stats stats;
44 	struct dma_test_task_stats last_stats;
45 	bool is_draining;
46 	bool is_random;
47 	struct dma_test_req *reqs;
48 	struct spdk_thread *thread;
49 	const char *bdev_name;
50 	uint64_t num_translations;
51 	uint64_t num_pull_push;
52 	uint64_t num_mem_zero;
53 	uint32_t lcore;
54 
55 	TAILQ_ENTRY(dma_test_task) link;
56 };
57 
58 struct dma_test_data_cpl_ctx {
59 	spdk_memory_domain_data_cpl_cb data_cpl;
60 	void *data_cpl_arg;
61 };
62 
63 enum dma_test_domain_ops {
64 	DMA_TEST_DOMAIN_OP_TRANSLATE = 1u << 0,
65 	DMA_TEST_DOMAIN_OP_PULL_PUSH = 1u << 1,
66 	DMA_TEST_DOMAIN_OP_MEMZERO = 1u << 2,
67 };
68 
69 TAILQ_HEAD(, dma_test_task) g_tasks = TAILQ_HEAD_INITIALIZER(g_tasks);
70 
71 /* User's input */
72 static char *g_bdev_name;
73 static const char *g_rw_mode_str;
74 static int g_rw_percentage = -1;
75 static uint32_t g_queue_depth;
76 static uint32_t g_io_size;
77 static uint32_t g_run_time_sec;
78 static uint32_t g_run_count;
79 static uint32_t g_test_ops;
80 static bool g_is_random;
81 static bool g_force_memory_domains_support;
82 
83 static struct spdk_thread *g_main_thread;
84 static struct spdk_poller *g_runtime_poller;
85 static struct spdk_memory_domain *g_domain;
86 static uint64_t g_num_blocks_per_io;
87 static uint32_t g_num_construct_tasks;
88 static uint32_t g_num_complete_tasks;
89 static uint64_t g_start_tsc;
90 static int g_run_rc;
91 
92 static void destroy_tasks(void);
93 static int dma_test_submit_io(struct dma_test_req *req);
94 
95 static void
96 print_total_stats(void)
97 {
98 	struct dma_test_task *task;
99 	uint64_t tsc_rate = spdk_get_ticks_hz();
100 	uint64_t test_time_usec = (spdk_get_ticks() - g_start_tsc) * SPDK_SEC_TO_USEC / tsc_rate;
101 	uint64_t total_tsc = 0, total_io_completed = 0;
102 	double task_iops, task_bw, task_min_lat, task_avg_lat, task_max_lat;
103 	double total_iops = 0, total_bw = 0, total_min_lat = (double)UINT64_MAX, total_max_lat = 0,
104 	       total_avg_lat;
105 
106 	printf("==========================================================================\n");
107 	printf("%*s\n", 55, "Latency [us]");
108 	printf("%*s %10s %10s %10s %10s\n", 19, "IOPS", "MiB/s", "Average", "min", "max");
109 
110 	TAILQ_FOREACH(task, &g_tasks, link) {
111 		if (!task->stats.io_completed) {
112 			continue;
113 		}
114 		task_iops = (double)task->stats.io_completed * SPDK_SEC_TO_USEC / test_time_usec;
115 		task_bw = task_iops * g_io_size / (1024 * 1024);
116 		task_avg_lat = (double)task->stats.total_tsc / task->stats.io_completed * SPDK_SEC_TO_USEC /
117 			       tsc_rate;
118 		task_min_lat = (double)task->stats.min_tsc * SPDK_SEC_TO_USEC / tsc_rate;
119 		task_max_lat = (double)task->stats.max_tsc * SPDK_SEC_TO_USEC / tsc_rate;
120 
121 		total_iops += task_iops;
122 		total_bw += task_bw;
123 		total_io_completed += task->stats.io_completed;
124 		total_tsc += task->stats.total_tsc;
125 		if (task_min_lat < total_min_lat) {
126 			total_min_lat = task_min_lat;
127 		}
128 		if (task_max_lat > total_max_lat) {
129 			total_max_lat = task_max_lat;
130 		}
131 		printf("Core %2u: %10.2f %10.2f %10.2f %10.2f %10.2f\n",
132 		       task->lcore, task_iops, task_bw, task_avg_lat, task_min_lat, task_max_lat);
133 	}
134 
135 	if (total_io_completed) {
136 		total_avg_lat = (double)total_tsc / total_io_completed  * SPDK_SEC_TO_USEC / tsc_rate;
137 		printf("==========================================================================\n");
138 		printf("%-*s %10.2f %10.2f %10.2f %10.2f %10.2f\n",
139 		       8, "Total  :", total_iops, total_bw, total_avg_lat, total_min_lat, total_max_lat);
140 		printf("\n");
141 	}
142 }
143 
144 static void
145 print_periodic_stats(void)
146 {
147 	struct dma_test_task *task;
148 	uint64_t io_last_sec = 0, tsc_last_sec = 0;
149 	double lat_last_sec, bw_last_sec;
150 
151 	TAILQ_FOREACH(task, &g_tasks, link) {
152 		io_last_sec += task->stats.io_completed - task->last_stats.io_completed;
153 		tsc_last_sec += task->stats.total_tsc - task->last_stats.total_tsc;
154 		memcpy(&task->last_stats, &task->stats, sizeof(task->stats));
155 	}
156 
157 	printf("Running %3u/%-3u sec", g_run_count, g_run_time_sec);
158 	if (io_last_sec) {
159 		lat_last_sec =	(double)tsc_last_sec / io_last_sec * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
160 		bw_last_sec = (double)io_last_sec * g_io_size / (1024 * 1024);
161 		printf(" IOPS: %-8"PRIu64" BW: %-6.2f [MiB/s] avg.lat %-5.2f [us]",
162 		       io_last_sec, bw_last_sec, lat_last_sec);
163 	}
164 
165 	printf("\r");
166 	fflush(stdout);
167 }
168 
169 static void
170 dma_test_task_complete(void *ctx)
171 {
172 	assert(g_num_complete_tasks > 0);
173 
174 	if (--g_num_complete_tasks == 0) {
175 		spdk_poller_unregister(&g_runtime_poller);
176 		print_total_stats();
177 		spdk_app_stop(g_run_rc);
178 	}
179 }
180 
181 static inline void
182 dma_test_check_and_signal_task_done(struct dma_test_task *task)
183 {
184 	if (task->io_inflight == 0) {
185 		spdk_put_io_channel(task->channel);
186 		spdk_bdev_close(task->desc);
187 		spdk_thread_send_msg(g_main_thread, dma_test_task_complete, task);
188 		spdk_thread_exit(spdk_get_thread());
189 	}
190 }
191 
192 static inline void
193 dma_test_task_update_stats(struct dma_test_task *task, uint64_t submit_tsc)
194 {
195 	uint64_t tsc_diff = spdk_get_ticks() - submit_tsc;
196 
197 	task->stats.io_completed++;
198 	task->stats.total_tsc += tsc_diff;
199 	if (spdk_unlikely(tsc_diff < task->stats.min_tsc)) {
200 		task->stats.min_tsc = tsc_diff;
201 	}
202 	if (spdk_unlikely(tsc_diff > task->stats.max_tsc)) {
203 		task->stats.max_tsc = tsc_diff;
204 	}
205 }
206 
207 static void
208 dma_test_bdev_io_completion_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
209 {
210 	struct dma_test_req *req = cb_arg;
211 	struct dma_test_task *task = req->task;
212 
213 	assert(task->io_inflight > 0);
214 	--task->io_inflight;
215 	dma_test_task_update_stats(task, req->submit_tsc);
216 
217 	if (!success) {
218 		if (!g_run_rc) {
219 			fprintf(stderr, "IO completed with error\n");
220 			g_run_rc = -1;
221 		}
222 		task->is_draining = true;
223 	}
224 
225 	spdk_bdev_free_io(bdev_io);
226 
227 	if (spdk_unlikely(task->is_draining)) {
228 		dma_test_check_and_signal_task_done(task);
229 		return;
230 	}
231 
232 	dma_test_submit_io(req);
233 }
234 
235 static inline uint64_t
236 dma_test_get_offset_in_ios(struct dma_test_task *task)
237 {
238 	uint64_t offset;
239 
240 	if (task->is_random) {
241 		offset = rand_r(&task->seed) % task->max_offset_in_ios;
242 	} else {
243 		offset = task->cur_io_offset++;
244 		if (spdk_unlikely(task->cur_io_offset == task->max_offset_in_ios)) {
245 			task->cur_io_offset = 0;
246 		}
247 	}
248 
249 	return offset;
250 }
251 
252 static inline bool
253 dma_test_task_is_read(struct dma_test_task *task)
254 {
255 	if (task->rw_percentage == 100) {
256 		return true;
257 	}
258 	if (task->rw_percentage != 0 && (rand_r(&task->seed) % 100) <  task->rw_percentage) {
259 		return true;
260 	}
261 	return false;
262 }
263 
264 static void
265 dma_test_data_cpl(void *ctx)
266 {
267 	struct dma_test_data_cpl_ctx *cpl_ctx = ctx;
268 
269 	cpl_ctx->data_cpl(cpl_ctx->data_cpl_arg, 0);
270 	free(cpl_ctx);
271 }
272 
273 static int
274 dma_test_copy_memory(struct dma_test_req *req, struct iovec *dst_iov, uint32_t dst_iovcnt,
275 		     struct iovec *src_iov, uint32_t src_iovcnt, spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
276 {
277 	struct dma_test_data_cpl_ctx *cpl_ctx;
278 
279 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
280 	if (!cpl_ctx) {
281 		return -ENOMEM;
282 	}
283 
284 	cpl_ctx->data_cpl = cpl_cb;
285 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
286 
287 	spdk_iovcpy(src_iov, src_iovcnt, dst_iov, dst_iovcnt);
288 	req->task->num_pull_push++;
289 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
290 
291 	return 0;
292 }
293 
294 static int
295 dma_test_push_memory_cb(struct spdk_memory_domain *dst_domain,
296 			void *dst_domain_ctx,
297 			struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt,
298 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
299 {
300 	struct dma_test_req *req = dst_domain_ctx;
301 
302 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
303 }
304 
305 static int
306 dma_test_pull_memory_cb(struct spdk_memory_domain *src_domain,
307 			void *src_domain_ctx,
308 			struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, uint32_t dst_iovcnt,
309 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
310 {
311 	struct dma_test_req *req = src_domain_ctx;
312 
313 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
314 }
315 
316 static int
317 dma_test_memzero_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
318 		    struct iovec *iov, uint32_t iovcnt,
319 		    spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
320 {
321 	struct dma_test_req *req = src_domain_ctx;
322 	struct dma_test_data_cpl_ctx *cpl_ctx;
323 	uint32_t i;
324 
325 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
326 	if (!cpl_ctx) {
327 		return -ENOMEM;
328 	}
329 
330 	cpl_ctx->data_cpl = cpl_cb;
331 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
332 
333 	for (i = 0; i < iovcnt; i++) {
334 		memset(iov[i].iov_base, 0, iov[i].iov_len);
335 	}
336 	req->task->num_mem_zero++;
337 
338 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
339 
340 	return 0;
341 }
342 
343 
344 static int
345 dma_test_translate_memory_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
346 			     struct spdk_memory_domain *dst_domain, struct spdk_memory_domain_translation_ctx *dst_domain_ctx,
347 			     void *addr, size_t len, struct spdk_memory_domain_translation_result *result)
348 {
349 	struct dma_test_req *req = src_domain_ctx;
350 	struct ibv_qp *dst_domain_qp = (struct ibv_qp *)dst_domain_ctx->rdma.ibv_qp;
351 
352 	if (spdk_unlikely(addr < req->iov.iov_base ||
353 			  (uint8_t *)addr + len > (uint8_t *)req->iov.iov_base + req->iov.iov_len)) {
354 		fprintf(stderr, "incorrect data %p, len %zu\n", addr, len);
355 		return -1;
356 	}
357 
358 	if (spdk_unlikely(!req->mr)) {
359 		req->mr = ibv_reg_mr(dst_domain_qp->pd, req->iov.iov_base, req->iov.iov_len,
360 				     IBV_ACCESS_LOCAL_WRITE |
361 				     IBV_ACCESS_REMOTE_READ |
362 				     IBV_ACCESS_REMOTE_WRITE);
363 		if (!req->mr) {
364 			fprintf(stderr, "Failed to register memory region, errno %d\n", errno);
365 			return -1;
366 		}
367 	}
368 
369 	result->iov.iov_base = addr;
370 	result->iov.iov_len = len;
371 	result->iov_count = 1;
372 	result->rdma.lkey = req->mr->lkey;
373 	result->rdma.rkey = req->mr->rkey;
374 	result->dst_domain = dst_domain;
375 
376 	req->task->num_translations++;
377 
378 	return 0;
379 }
380 
381 static int
382 dma_test_submit_io(struct dma_test_req *req)
383 {
384 	struct dma_test_task *task = req->task;
385 	uint64_t offset_in_ios;
386 	int rc;
387 	bool is_read;
388 
389 	offset_in_ios = dma_test_get_offset_in_ios(task);
390 	is_read = dma_test_task_is_read(task);
391 	req->submit_tsc = spdk_get_ticks();
392 	if (is_read) {
393 		rc = spdk_bdev_readv_blocks_ext(task->desc, task->channel, &req->iov, 1,
394 						offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
395 						dma_test_bdev_io_completion_cb, req, &req->io_opts);
396 	} else {
397 		rc = spdk_bdev_writev_blocks_ext(task->desc, task->channel, &req->iov, 1,
398 						 offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
399 						 dma_test_bdev_io_completion_cb, req, &req->io_opts);
400 	}
401 
402 	if (spdk_unlikely(rc)) {
403 		if (!g_run_rc) {
404 			/* log an error only once */
405 			fprintf(stderr, "Failed to submit %s IO, rc %d, stop sending IO\n", is_read ? "read" : "write", rc);
406 			g_run_rc = rc;
407 		}
408 		task->is_draining = true;
409 		dma_test_check_and_signal_task_done(task);
410 		return rc;
411 	}
412 
413 	task->io_inflight++;
414 
415 	return 0;
416 }
417 
418 static void
419 dma_test_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
420 {
421 	struct dma_test_task *task = event_ctx;
422 
423 	if (type == SPDK_BDEV_EVENT_REMOVE) {
424 		task->is_draining = true;
425 	}
426 }
427 
428 static void
429 dma_test_bdev_dummy_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
430 			     void *event_ctx)
431 {
432 }
433 
434 static void
435 dma_test_task_run(void *ctx)
436 {
437 	struct dma_test_task *task = ctx;
438 	uint32_t i;
439 	int rc = 0;
440 
441 	for (i = 0; i < g_queue_depth && rc == 0; i++) {
442 		rc = dma_test_submit_io(&task->reqs[i]);
443 	}
444 }
445 
446 static void
447 dma_test_drain_task(void *ctx)
448 {
449 	struct dma_test_task *task = ctx;
450 
451 	task->is_draining = true;
452 }
453 
454 static void
455 dma_test_shutdown_cb(void)
456 {
457 	struct dma_test_task *task;
458 
459 	spdk_poller_unregister(&g_runtime_poller);
460 
461 	TAILQ_FOREACH(task, &g_tasks, link) {
462 		spdk_thread_send_msg(task->thread, dma_test_drain_task, task);
463 	}
464 }
465 
466 static int
467 dma_test_run_time_poller(void *ctx)
468 {
469 	g_run_count++;
470 
471 	if (g_run_count < g_run_time_sec) {
472 		if (isatty(STDOUT_FILENO)) {
473 			print_periodic_stats();
474 		}
475 	} else {
476 		dma_test_shutdown_cb();
477 	}
478 
479 	return SPDK_POLLER_BUSY;
480 }
481 
482 static void
483 dma_test_construct_task_done(void *ctx)
484 {
485 	struct dma_test_task *task;
486 
487 	assert(g_num_construct_tasks > 0);
488 	--g_num_construct_tasks;
489 
490 	if (g_num_construct_tasks != 0) {
491 		return;
492 	}
493 
494 	if (g_run_rc) {
495 		fprintf(stderr, "Initialization failed with error %d\n", g_run_rc);
496 		spdk_app_stop(g_run_rc);
497 		return;
498 	}
499 
500 	g_runtime_poller = spdk_poller_register_named(dma_test_run_time_poller, NULL, 1 * 1000 * 1000,
501 			   "dma_test_run_time_poller");
502 	if (!g_runtime_poller) {
503 		fprintf(stderr, "Failed to run timer\n");
504 		spdk_app_stop(-1);
505 		return;
506 	}
507 
508 	printf("Initialization complete, running %s IO for %u sec on %u cores\n", g_rw_mode_str,
509 	       g_run_time_sec, spdk_env_get_core_count());
510 	g_start_tsc = spdk_get_ticks();
511 	TAILQ_FOREACH(task, &g_tasks, link) {
512 		spdk_thread_send_msg(task->thread, dma_test_task_run, task);
513 	}
514 }
515 
516 static void
517 dma_test_construct_task_on_thread(void *ctx)
518 {
519 	struct dma_test_task *task = ctx;
520 	int rc;
521 
522 	rc = spdk_bdev_open_ext(task->bdev_name, true, dma_test_bdev_event_cb, task, &task->desc);
523 	if (rc) {
524 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
525 		g_run_rc = rc;
526 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
527 		return;
528 	}
529 
530 	task->channel = spdk_bdev_get_io_channel(task->desc);
531 	if (!task->channel) {
532 		spdk_bdev_close(task->desc);
533 		task->desc = NULL;
534 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
535 		g_run_rc = rc;
536 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
537 		return;
538 	}
539 
540 	task->max_offset_in_ios = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(
541 					  task->desc)) / task->num_blocks_per_io;
542 
543 	spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, task);
544 }
545 
546 static bool
547 dma_test_check_bdev_supports_rdma_memory_domain(struct spdk_bdev *bdev)
548 {
549 	struct spdk_memory_domain **bdev_domains;
550 	int bdev_domains_count, bdev_domains_count_tmp, i;
551 	bool rdma_domain_supported = false;
552 
553 	bdev_domains_count = spdk_bdev_get_memory_domains(bdev, NULL, 0);
554 
555 	if (bdev_domains_count < 0) {
556 		fprintf(stderr, "Failed to get bdev memory domains count, rc %d\n", bdev_domains_count);
557 		return false;
558 	} else if (bdev_domains_count == 0) {
559 		fprintf(stderr, "bdev %s doesn't support any memory domains\n", spdk_bdev_get_name(bdev));
560 		return false;
561 	}
562 
563 	fprintf(stdout, "bdev %s reports %d memory domains\n", spdk_bdev_get_name(bdev),
564 		bdev_domains_count);
565 
566 	bdev_domains = calloc((size_t)bdev_domains_count, sizeof(*bdev_domains));
567 	if (!bdev_domains) {
568 		fprintf(stderr, "Failed to allocate memory domains\n");
569 		return false;
570 	}
571 
572 	bdev_domains_count_tmp = spdk_bdev_get_memory_domains(bdev, bdev_domains, bdev_domains_count);
573 	if (bdev_domains_count_tmp != bdev_domains_count) {
574 		fprintf(stderr, "Unexpected bdev domains return value %d\n", bdev_domains_count_tmp);
575 		return false;
576 	}
577 
578 	for (i = 0; i < bdev_domains_count; i++) {
579 		if (spdk_memory_domain_get_dma_device_type(bdev_domains[i]) == SPDK_DMA_DEVICE_TYPE_RDMA) {
580 			/* Bdev supports memory domain of RDMA type, we can try to submit IO request to it using
581 			 * bdev ext API */
582 			rdma_domain_supported = true;
583 			break;
584 		}
585 	}
586 
587 	fprintf(stdout, "bdev %s %s RDMA memory domain\n", spdk_bdev_get_name(bdev),
588 		rdma_domain_supported ? "supports" : "doesn't support");
589 	free(bdev_domains);
590 
591 	return rdma_domain_supported;
592 }
593 
594 static int
595 allocate_task(uint32_t core, const char *bdev_name)
596 {
597 	char thread_name[32];
598 	struct spdk_cpuset cpu_set;
599 	uint32_t i;
600 	struct dma_test_task *task;
601 	struct dma_test_req *req;
602 
603 	task = calloc(1, sizeof(*task));
604 	if (!task) {
605 		fprintf(stderr, "Failed to allocate per thread task\n");
606 		return -ENOMEM;
607 	}
608 
609 	TAILQ_INSERT_TAIL(&g_tasks, task, link);
610 
611 	task->reqs = calloc(g_queue_depth, sizeof(*task->reqs));
612 	if (!task->reqs) {
613 		fprintf(stderr, "Failed to allocate requests\n");
614 		return -ENOMEM;
615 	}
616 
617 	for (i = 0; i < g_queue_depth; i++) {
618 		req = &task->reqs[i];
619 		req->task = task;
620 		req->iov.iov_len = g_io_size;
621 		req->iov.iov_base = malloc(req->iov.iov_len);
622 		if (!req->iov.iov_base) {
623 			fprintf(stderr, "Failed to allocate request data buffer\n");
624 			return -ENOMEM;
625 		}
626 		memset(req->iov.iov_base, 0xc, req->iov.iov_len);
627 		req->io_opts.size = sizeof(req->io_opts);
628 		req->io_opts.memory_domain = g_domain;
629 		req->io_opts.memory_domain_ctx = req;
630 	}
631 
632 	snprintf(thread_name, 32, "task_%u", core);
633 	spdk_cpuset_zero(&cpu_set);
634 	spdk_cpuset_set_cpu(&cpu_set, core, true);
635 	task->thread = spdk_thread_create(thread_name, &cpu_set);
636 	if (!task->thread) {
637 		fprintf(stderr, "Failed to create SPDK thread, core %u, cpu_mask %s\n", core,
638 			spdk_cpuset_fmt(&cpu_set));
639 		return -ENOMEM;
640 	}
641 
642 	task->seed = core;
643 	task->lcore = core;
644 	task->bdev_name = bdev_name;
645 	task->is_random = g_is_random;
646 	task->rw_percentage = g_rw_percentage;
647 	task->num_blocks_per_io = g_num_blocks_per_io;
648 	task->stats.min_tsc = UINT64_MAX;
649 
650 	return 0;
651 }
652 
653 static void
654 destroy_task(struct dma_test_task *task)
655 {
656 	struct dma_test_req *req;
657 	uint32_t i;
658 
659 	for (i = 0; i < g_queue_depth; i++) {
660 		req = &task->reqs[i];
661 		if (req->mr) {
662 			ibv_dereg_mr(req->mr);
663 		}
664 		free(req->iov.iov_base);
665 	}
666 	free(task->reqs);
667 	TAILQ_REMOVE(&g_tasks, task, link);
668 	free(task);
669 }
670 
671 static void
672 destroy_tasks(void)
673 {
674 	struct dma_test_task *task, *tmp_task;
675 
676 	TAILQ_FOREACH_SAFE(task, &g_tasks, link, tmp_task) {
677 		destroy_task(task);
678 	}
679 }
680 
681 static int
682 verify_tasks(void)
683 {
684 	struct dma_test_task *task;
685 	uint64_t total_requests = 0;
686 	uint64_t num_translations = 0;
687 	uint64_t num_pull_push = 0;
688 	uint64_t num_memzero = 0;
689 	int rc = 0;
690 
691 	if (!g_test_ops) {
692 		/* No specific ops were requested, nothing to check */
693 		return rc;
694 	}
695 
696 	TAILQ_FOREACH(task, &g_tasks, link) {
697 		total_requests += task->stats.io_completed;
698 		num_translations += task->num_translations;
699 		num_pull_push += task->num_pull_push;
700 		num_memzero += task->num_mem_zero;
701 	}
702 
703 	if (g_test_ops & DMA_TEST_DOMAIN_OP_TRANSLATE) {
704 		if (num_translations == 0) {
705 			fprintf(stderr, "Requested \"translate\" operation, but it was not executed\n");
706 			rc = -EINVAL;
707 		}
708 	}
709 	if (g_test_ops & DMA_TEST_DOMAIN_OP_PULL_PUSH) {
710 		if (num_pull_push == 0) {
711 			fprintf(stderr, "Requested \"pull_push\" operation, but it was not executed\n");
712 			rc = -EINVAL;
713 		}
714 	}
715 	if (g_test_ops & DMA_TEST_DOMAIN_OP_MEMZERO) {
716 		if (num_memzero == 0) {
717 			fprintf(stderr, "Requested \"memzero\" operation, but it was not executed\n");
718 			rc = -EINVAL;
719 		}
720 	}
721 
722 	/* bdev request can be split, so the total number of pull_push +translate operations
723 	 * can be bigger than total_number of requests */
724 	if (num_translations + num_pull_push + num_memzero < total_requests) {
725 		fprintf(stderr,
726 			"Operations number mismatch: translate %"PRIu64", pull_push %"PRIu64", mem_zero %"PRIu64" expected total %"PRIu64"\n",
727 			num_translations, num_pull_push, num_memzero, total_requests);
728 		rc = -EINVAL;
729 	} else {
730 		fprintf(stdout,
731 			"Total operations: %"PRIu64", translate %"PRIu64" pull_push %"PRIu64" memzero %"PRIu64"\n",
732 			total_requests, num_translations, num_pull_push, num_memzero);
733 	}
734 
735 	return rc;
736 }
737 
738 static void
739 dma_test_start(void *arg)
740 {
741 	struct spdk_bdev_desc *desc;
742 	struct spdk_bdev *bdev;
743 	struct dma_test_task *task;
744 	uint32_t block_size, i;
745 	int rc;
746 
747 	rc = spdk_bdev_open_ext(g_bdev_name, true, dma_test_bdev_dummy_event_cb, NULL, &desc);
748 	if (rc) {
749 		fprintf(stderr, "Can't find bdev %s\n", g_bdev_name);
750 		spdk_app_stop(-ENODEV);
751 		return;
752 	}
753 	bdev = spdk_bdev_desc_get_bdev(desc);
754 	/* This function checks if bdev supports memory domains. Test is not failed if there are
755 	 * no memory domains since bdev layer can pull/push data */
756 	if (!dma_test_check_bdev_supports_rdma_memory_domain(bdev) && g_force_memory_domains_support) {
757 		fprintf(stderr, "Test aborted due to \"-f\" (force memory domains support) option\n");
758 		spdk_bdev_close(desc);
759 		spdk_app_stop(-ENODEV);
760 		return;
761 	}
762 
763 	g_main_thread = spdk_get_thread();
764 
765 	block_size = spdk_bdev_get_block_size(bdev);
766 	if (g_io_size < block_size || g_io_size % block_size != 0) {
767 		fprintf(stderr, "Invalid io_size %u requested, bdev block size %u\n", g_io_size, block_size);
768 		spdk_bdev_close(desc);
769 		spdk_app_stop(-EINVAL);
770 		return;
771 	}
772 	g_num_blocks_per_io = g_io_size / block_size;
773 
774 	/* Create a memory domain to represent the source memory domain.
775 	 * Since we don't actually have a remote memory domain in this test, this will describe memory
776 	 * on the local system and the translation to the destination memory domain will be trivial.
777 	 * But this at least allows us to demonstrate the flow and test the functionality. */
778 	rc = spdk_memory_domain_create(&g_domain, SPDK_DMA_DEVICE_TYPE_RDMA, NULL, "test_dma");
779 	if (rc != 0) {
780 		spdk_bdev_close(desc);
781 		spdk_app_stop(rc);
782 		return;
783 	}
784 	spdk_memory_domain_set_translation(g_domain, dma_test_translate_memory_cb);
785 	spdk_memory_domain_set_pull(g_domain, dma_test_pull_memory_cb);
786 	spdk_memory_domain_set_push(g_domain, dma_test_push_memory_cb);
787 	spdk_memory_domain_set_memzero(g_domain, dma_test_memzero_cb);
788 
789 	SPDK_ENV_FOREACH_CORE(i) {
790 		rc = allocate_task(i, g_bdev_name);
791 		if (rc) {
792 			destroy_tasks();
793 			spdk_bdev_close(desc);
794 			spdk_app_stop(rc);
795 			return;
796 		}
797 		g_num_construct_tasks++;
798 		g_num_complete_tasks++;
799 	}
800 
801 	TAILQ_FOREACH(task, &g_tasks, link) {
802 		spdk_thread_send_msg(task->thread, dma_test_construct_task_on_thread, task);
803 	}
804 
805 	spdk_bdev_close(desc);
806 }
807 
808 static void
809 print_usage(void)
810 {
811 	printf(" -b <bdev>         bdev name for test\n");
812 	printf(" -f                force memory domains support - abort test if bdev doesn't report memory domains\n");
813 	printf(" -q <val>          io depth\n");
814 	printf(" -o <val>          io size in bytes\n");
815 	printf(" -t <val>          run time in seconds\n");
816 	printf(" -x <op,op>        Comma separated memory domain operations expected in the test. Values are \"translate\" and \"pull_push\"\n");
817 	printf(" -w <str>          io pattern (read, write, randread, randwrite, randrw)\n");
818 	printf(" -M <0-100>        rw percentage (100 for reads, 0 for writes)\n");
819 }
820 
821 static int
822 parse_expected_ops(const char *_str)
823 {
824 	char *str = strdup(_str);
825 	char *tok;
826 	int rc = 0;
827 
828 	if (!str) {
829 		fprintf(stderr, "Failed to dup args\n");
830 		return -ENOMEM;
831 	}
832 
833 	tok = strtok(str, ",");
834 	while (tok) {
835 		if (strcmp(tok, "translate") == 0) {
836 			g_test_ops |= DMA_TEST_DOMAIN_OP_TRANSLATE;
837 		} else if (strcmp(tok, "pull_push") == 0) {
838 			g_test_ops |= DMA_TEST_DOMAIN_OP_PULL_PUSH;
839 		} else if (strcmp(tok, "memzero") == 0) {
840 			g_test_ops |= DMA_TEST_DOMAIN_OP_MEMZERO;
841 		} else {
842 			fprintf(stderr, "Unknown value %s\n", tok);
843 			rc = -EINVAL;
844 			break;
845 		}
846 		tok = strtok(NULL, ",");
847 	}
848 
849 	free(str);
850 
851 	if (g_test_ops == 0 || rc) {
852 		fprintf(stderr, "-e \"%s\" specified but nothing was parsed\n", _str);
853 		return -EINVAL;
854 	}
855 
856 	return rc;
857 }
858 
859 static int
860 parse_arg(int ch, char *arg)
861 {
862 	long tmp;
863 
864 	switch (ch) {
865 	case 'q':
866 	case 'o':
867 	case 't':
868 	case 'M':
869 		tmp = spdk_strtol(arg, 10);
870 		if (tmp < 0) {
871 			fprintf(stderr, "Invalid option %c value %s\n", ch, arg);
872 			return 1;
873 		}
874 
875 		switch (ch) {
876 		case 'q':
877 			g_queue_depth = (uint32_t) tmp;
878 			break;
879 		case 'o':
880 			g_io_size = (uint32_t) tmp;
881 			break;
882 		case 't':
883 			g_run_time_sec = (uint32_t) tmp;
884 			break;
885 		case 'M':
886 			g_rw_percentage = (uint32_t) tmp;
887 			break;
888 		}
889 		break;
890 	case 'w':
891 		g_rw_mode_str = arg;
892 		break;
893 	case 'b':
894 		g_bdev_name = arg;
895 		break;
896 	case 'f':
897 		g_force_memory_domains_support = true;
898 		break;
899 	case 'x':
900 		if (parse_expected_ops(arg)) {
901 			return 1;
902 		}
903 		break;
904 	default:
905 		fprintf(stderr, "Unknown option %c\n", ch);
906 		return 1;
907 	}
908 
909 	return 0;
910 }
911 
912 static int
913 verify_args(void)
914 {
915 	const char *rw_mode = g_rw_mode_str;
916 
917 	if (g_queue_depth == 0) {
918 		fprintf(stderr, "queue depth (-q) is not set\n");
919 		return 1;
920 	}
921 	if (g_io_size == 0) {
922 		fprintf(stderr, "io size (-o) is not set\n");
923 		return 1;
924 	}
925 	if (g_run_time_sec == 0) {
926 		fprintf(stderr, "test run time (-t) is not set\n");
927 		return 1;
928 	}
929 	if (!rw_mode) {
930 		fprintf(stderr, "io pattern (-w) is not set\n");
931 		return 1;
932 	}
933 	if (strncmp(rw_mode, "rand", 4) == 0) {
934 		g_is_random = true;
935 		rw_mode = &rw_mode[4];
936 	}
937 	if (strcmp(rw_mode, "read") == 0 || strcmp(rw_mode, "write") == 0) {
938 		if (g_rw_percentage > 0) {
939 			fprintf(stderr, "Ignoring -M option\n");
940 		}
941 		g_rw_percentage = strcmp(rw_mode, "read") == 0 ? 100 : 0;
942 	} else if (strcmp(rw_mode, "rw") == 0) {
943 		if (g_rw_percentage < 0 || g_rw_percentage > 100) {
944 			fprintf(stderr, "Invalid -M value (%d) must be 0..100\n", g_rw_percentage);
945 			return 1;
946 		}
947 	} else {
948 		fprintf(stderr, "io pattern (-w) one of [read, write, randread, randwrite, rw, randrw]\n");
949 		return 1;
950 	}
951 	if (!g_bdev_name) {
952 		fprintf(stderr, "bdev name (-b) is not set\n");
953 		return 1;
954 	}
955 
956 	return 0;
957 }
958 
959 int
960 main(int argc, char **argv)
961 {
962 	struct spdk_app_opts opts = {};
963 	int rc;
964 
965 	spdk_app_opts_init(&opts, sizeof(opts));
966 	opts.name = "test_dma";
967 	opts.shutdown_cb = dma_test_shutdown_cb;
968 	opts.rpc_addr = NULL;
969 
970 	rc = spdk_app_parse_args(argc, argv, &opts, "b:fq:o:t:x:w:M:", NULL, parse_arg, print_usage);
971 	if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) {
972 		exit(rc);
973 	}
974 
975 	rc = verify_args();
976 	if (rc) {
977 		exit(rc);
978 	}
979 
980 	rc = spdk_app_start(&opts, dma_test_start, NULL);
981 	if (rc == 0) {
982 		rc = verify_tasks();
983 	}
984 	destroy_tasks();
985 	spdk_app_fini();
986 
987 	return rc;
988 }
989