xref: /spdk/test/dma/test_dma/test_dma.c (revision c680e3a05b1a903c18bf3f75b732765607126f45)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/stdinc.h"
6 
7 #include "spdk/dma.h"
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/event.h"
11 #include "spdk/likely.h"
12 #include "spdk/string.h"
13 #include "spdk/util.h"
14 
15 #include <infiniband/verbs.h>
16 
17 struct dma_test_task;
18 
19 struct dma_test_req {
20 	struct iovec iov;
21 	struct spdk_bdev_ext_io_opts io_opts;
22 	uint64_t submit_tsc;
23 	struct ibv_mr *mr;
24 	struct dma_test_task *task;
25 };
26 
27 struct dma_test_task_stats {
28 	uint64_t io_completed;
29 	uint64_t total_tsc;
30 	uint64_t min_tsc;
31 	uint64_t max_tsc;
32 };
33 
34 struct dma_test_task {
35 	struct spdk_bdev_desc *desc;
36 	struct spdk_io_channel *channel;
37 	uint64_t cur_io_offset;
38 	uint64_t max_offset_in_ios;
39 	uint64_t num_blocks_per_io;
40 	int rw_percentage;
41 	uint32_t seed;
42 	uint32_t io_inflight;
43 	struct dma_test_task_stats stats;
44 	struct dma_test_task_stats last_stats;
45 	bool is_draining;
46 	bool is_random;
47 	struct dma_test_req *reqs;
48 	struct spdk_thread *thread;
49 	const char *bdev_name;
50 	uint64_t num_translations;
51 	uint64_t num_pull_push;
52 	uint64_t num_mem_zero;
53 	uint32_t lcore;
54 
55 	TAILQ_ENTRY(dma_test_task) link;
56 };
57 
58 struct dma_test_data_cpl_ctx {
59 	spdk_memory_domain_data_cpl_cb data_cpl;
60 	void *data_cpl_arg;
61 };
62 
63 enum dma_test_domain_ops {
64 	DMA_TEST_DOMAIN_OP_TRANSLATE = 1u << 0,
65 	DMA_TEST_DOMAIN_OP_PULL_PUSH = 1u << 1,
66 	DMA_TEST_DOMAIN_OP_MEMZERO = 1u << 2,
67 };
68 
69 TAILQ_HEAD(, dma_test_task) g_tasks = TAILQ_HEAD_INITIALIZER(g_tasks);
70 
71 /* User's input */
72 static char *g_bdev_name;
73 static const char *g_rw_mode_str;
74 static int g_rw_percentage = -1;
75 static uint32_t g_queue_depth;
76 static uint32_t g_io_size;
77 static uint32_t g_run_time_sec;
78 static uint32_t g_run_count;
79 static uint32_t g_test_ops;
80 static bool g_is_random;
81 static bool g_force_memory_domains_support;
82 
83 static struct spdk_thread *g_main_thread;
84 static struct spdk_poller *g_runtime_poller;
85 static struct spdk_memory_domain *g_domain;
86 static uint64_t g_num_blocks_per_io;
87 static uint32_t g_num_construct_tasks;
88 static uint32_t g_num_complete_tasks;
89 static uint64_t g_start_tsc;
90 static int g_run_rc;
91 
92 static void destroy_tasks(void);
93 static int dma_test_submit_io(struct dma_test_req *req);
94 
95 static void
96 print_total_stats(void)
97 {
98 	struct dma_test_task *task;
99 	uint64_t tsc_rate = spdk_get_ticks_hz();
100 	uint64_t test_time_usec = (spdk_get_ticks() - g_start_tsc) * SPDK_SEC_TO_USEC / tsc_rate;
101 	uint64_t total_tsc = 0, total_io_completed = 0;
102 	double task_iops, task_bw, task_min_lat, task_avg_lat, task_max_lat;
103 	double total_iops = 0, total_bw = 0, total_min_lat = (double)UINT64_MAX, total_max_lat = 0,
104 	       total_avg_lat;
105 
106 	printf("==========================================================================\n");
107 	printf("%*s\n", 55, "Latency [us]");
108 	printf("%*s %10s %10s %10s %10s\n", 19, "IOPS", "MiB/s", "Average", "min", "max");
109 
110 	TAILQ_FOREACH(task, &g_tasks, link) {
111 		if (!task->stats.io_completed) {
112 			continue;
113 		}
114 		task_iops = (double)task->stats.io_completed * SPDK_SEC_TO_USEC / test_time_usec;
115 		task_bw = task_iops * g_io_size / (1024 * 1024);
116 		task_avg_lat = (double)task->stats.total_tsc / task->stats.io_completed * SPDK_SEC_TO_USEC /
117 			       tsc_rate;
118 		task_min_lat = (double)task->stats.min_tsc * SPDK_SEC_TO_USEC / tsc_rate;
119 		task_max_lat = (double)task->stats.max_tsc * SPDK_SEC_TO_USEC / tsc_rate;
120 
121 		total_iops += task_iops;
122 		total_bw += task_bw;
123 		total_io_completed += task->stats.io_completed;
124 		total_tsc += task->stats.total_tsc;
125 		if (task_min_lat < total_min_lat) {
126 			total_min_lat = task_min_lat;
127 		}
128 		if (task_max_lat > total_max_lat) {
129 			total_max_lat = task_max_lat;
130 		}
131 		printf("Core %2u: %10.2f %10.2f %10.2f %10.2f %10.2f\n",
132 		       task->lcore, task_iops, task_bw, task_avg_lat, task_min_lat, task_max_lat);
133 	}
134 
135 	if (total_io_completed) {
136 		total_avg_lat = (double)total_tsc / total_io_completed  * SPDK_SEC_TO_USEC / tsc_rate;
137 		printf("==========================================================================\n");
138 		printf("%-*s %10.2f %10.2f %10.2f %10.2f %10.2f\n",
139 		       8, "Total  :", total_iops, total_bw, total_avg_lat, total_min_lat, total_max_lat);
140 		printf("\n");
141 	}
142 }
143 
144 static void
145 print_periodic_stats(void)
146 {
147 	struct dma_test_task *task;
148 	uint64_t io_last_sec = 0, tsc_last_sec = 0;
149 	double lat_last_sec, bw_last_sec;
150 
151 	TAILQ_FOREACH(task, &g_tasks, link) {
152 		io_last_sec += task->stats.io_completed - task->last_stats.io_completed;
153 		tsc_last_sec += task->stats.total_tsc - task->last_stats.total_tsc;
154 		memcpy(&task->last_stats, &task->stats, sizeof(task->stats));
155 	}
156 
157 	printf("Running %3u/%-3u sec", g_run_count, g_run_time_sec);
158 	if (io_last_sec) {
159 		lat_last_sec =	(double)tsc_last_sec / io_last_sec * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
160 		bw_last_sec = (double)io_last_sec * g_io_size / (1024 * 1024);
161 		printf(" IOPS: %-8"PRIu64" BW: %-6.2f [MiB/s] avg.lat %-5.2f [us]",
162 		       io_last_sec, bw_last_sec, lat_last_sec);
163 	}
164 
165 	printf("\r");
166 	fflush(stdout);
167 }
168 
169 static void
170 dma_test_task_complete(void *ctx)
171 {
172 	assert(g_num_complete_tasks > 0);
173 
174 	if (--g_num_complete_tasks == 0) {
175 		spdk_poller_unregister(&g_runtime_poller);
176 		print_total_stats();
177 		spdk_app_stop(g_run_rc);
178 	}
179 }
180 
181 static inline void
182 dma_test_check_and_signal_task_done(struct dma_test_task *task)
183 {
184 	if (task->io_inflight == 0) {
185 		spdk_put_io_channel(task->channel);
186 		spdk_bdev_close(task->desc);
187 		spdk_thread_send_msg(g_main_thread, dma_test_task_complete, task);
188 		spdk_thread_exit(spdk_get_thread());
189 	}
190 }
191 
192 static inline void
193 dma_test_task_update_stats(struct dma_test_task *task, uint64_t submit_tsc)
194 {
195 	uint64_t tsc_diff = spdk_get_ticks() - submit_tsc;
196 
197 	task->stats.io_completed++;
198 	task->stats.total_tsc += tsc_diff;
199 	if (spdk_unlikely(tsc_diff < task->stats.min_tsc)) {
200 		task->stats.min_tsc = tsc_diff;
201 	}
202 	if (spdk_unlikely(tsc_diff > task->stats.max_tsc)) {
203 		task->stats.max_tsc = tsc_diff;
204 	}
205 }
206 
207 static void
208 dma_test_bdev_io_completion_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
209 {
210 	struct dma_test_req *req = cb_arg;
211 	struct dma_test_task *task = req->task;
212 
213 	assert(task->io_inflight > 0);
214 	--task->io_inflight;
215 	dma_test_task_update_stats(task, req->submit_tsc);
216 
217 	if (!success) {
218 		if (!g_run_rc) {
219 			fprintf(stderr, "IO completed with error\n");
220 			g_run_rc = -1;
221 		}
222 		task->is_draining = true;
223 	}
224 
225 	spdk_bdev_free_io(bdev_io);
226 
227 	if (spdk_unlikely(task->is_draining)) {
228 		dma_test_check_and_signal_task_done(task);
229 		return;
230 	}
231 
232 	dma_test_submit_io(req);
233 }
234 
235 static inline uint64_t
236 dma_test_get_offset_in_ios(struct dma_test_task *task)
237 {
238 	uint64_t offset;
239 
240 	if (task->is_random) {
241 		offset = rand_r(&task->seed) % task->max_offset_in_ios;
242 	} else {
243 		offset = task->cur_io_offset++;
244 		if (spdk_unlikely(task->cur_io_offset == task->max_offset_in_ios)) {
245 			task->cur_io_offset = 0;
246 		}
247 	}
248 
249 	return offset;
250 }
251 
252 static inline bool
253 dma_test_task_is_read(struct dma_test_task *task)
254 {
255 	if (task->rw_percentage == 100) {
256 		return true;
257 	}
258 	if (task->rw_percentage != 0 && (rand_r(&task->seed) % 100) <  task->rw_percentage) {
259 		return true;
260 	}
261 	return false;
262 }
263 
264 static void
265 dma_test_data_cpl(void *ctx)
266 {
267 	struct dma_test_data_cpl_ctx *cpl_ctx = ctx;
268 
269 	cpl_ctx->data_cpl(cpl_ctx->data_cpl_arg, 0);
270 	free(cpl_ctx);
271 }
272 
273 static int
274 dma_test_copy_memory(struct dma_test_req *req, struct iovec *dst_iov, uint32_t dst_iovcnt,
275 		     struct iovec *src_iov, uint32_t src_iovcnt, spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
276 {
277 	struct dma_test_data_cpl_ctx *cpl_ctx;
278 
279 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
280 	if (!cpl_ctx) {
281 		return -ENOMEM;
282 	}
283 
284 	cpl_ctx->data_cpl = cpl_cb;
285 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
286 
287 	spdk_iovcpy(src_iov, src_iovcnt, dst_iov, dst_iovcnt);
288 	req->task->num_pull_push++;
289 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
290 
291 	return 0;
292 }
293 
294 static int
295 dma_test_push_memory_cb(struct spdk_memory_domain *dst_domain,
296 			void *dst_domain_ctx,
297 			struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt,
298 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
299 {
300 	struct dma_test_req *req = dst_domain_ctx;
301 
302 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
303 }
304 
305 static int
306 dma_test_pull_memory_cb(struct spdk_memory_domain *src_domain,
307 			void *src_domain_ctx,
308 			struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, uint32_t dst_iovcnt,
309 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
310 {
311 	struct dma_test_req *req = src_domain_ctx;
312 
313 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
314 }
315 
316 static int
317 dma_test_memzero_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
318 		    struct iovec *iov, uint32_t iovcnt,
319 		    spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
320 {
321 	struct dma_test_req *req = src_domain_ctx;
322 	struct dma_test_data_cpl_ctx *cpl_ctx;
323 	uint32_t i;
324 
325 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
326 	if (!cpl_ctx) {
327 		return -ENOMEM;
328 	}
329 
330 	cpl_ctx->data_cpl = cpl_cb;
331 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
332 
333 	for (i = 0; i < iovcnt; i++) {
334 		memset(iov[i].iov_base, 0, iov[i].iov_len);
335 	}
336 	req->task->num_mem_zero++;
337 
338 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
339 
340 	return 0;
341 }
342 
343 
344 static int
345 dma_test_translate_memory_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
346 			     struct spdk_memory_domain *dst_domain, struct spdk_memory_domain_translation_ctx *dst_domain_ctx,
347 			     void *addr, size_t len, struct spdk_memory_domain_translation_result *result)
348 {
349 	struct dma_test_req *req = src_domain_ctx;
350 	struct ibv_qp *dst_domain_qp = (struct ibv_qp *)dst_domain_ctx->rdma.ibv_qp;
351 
352 	if (spdk_unlikely(!req->mr)) {
353 		req->mr = ibv_reg_mr(dst_domain_qp->pd, addr, len, IBV_ACCESS_LOCAL_WRITE |
354 				     IBV_ACCESS_REMOTE_READ |
355 				     IBV_ACCESS_REMOTE_WRITE);
356 		if (!req->mr) {
357 			fprintf(stderr, "Failed to register memory region, errno %d\n", errno);
358 			return -1;
359 		}
360 	}
361 
362 	result->iov.iov_base = addr;
363 	result->iov.iov_len = len;
364 	result->iov_count = 1;
365 	result->rdma.lkey = req->mr->lkey;
366 	result->rdma.rkey = req->mr->rkey;
367 	result->dst_domain = dst_domain;
368 
369 	req->task->num_translations++;
370 
371 	return 0;
372 }
373 
374 static int
375 dma_test_submit_io(struct dma_test_req *req)
376 {
377 	struct dma_test_task *task = req->task;
378 	uint64_t offset_in_ios;
379 	int rc;
380 	bool is_read;
381 
382 	offset_in_ios = dma_test_get_offset_in_ios(task);
383 	is_read = dma_test_task_is_read(task);
384 	req->submit_tsc = spdk_get_ticks();
385 	if (is_read) {
386 		rc = spdk_bdev_readv_blocks_ext(task->desc, task->channel, &req->iov, 1,
387 						offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
388 						dma_test_bdev_io_completion_cb, req, &req->io_opts);
389 	} else {
390 		rc = spdk_bdev_writev_blocks_ext(task->desc, task->channel, &req->iov, 1,
391 						 offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
392 						 dma_test_bdev_io_completion_cb, req, &req->io_opts);
393 	}
394 
395 	if (spdk_unlikely(rc)) {
396 		if (!g_run_rc) {
397 			/* log an error only once */
398 			fprintf(stderr, "Failed to submit %s IO, rc %d, stop sending IO\n", is_read ? "read" : "write", rc);
399 			g_run_rc = rc;
400 		}
401 		task->is_draining = true;
402 		dma_test_check_and_signal_task_done(task);
403 		return rc;
404 	}
405 
406 	task->io_inflight++;
407 
408 	return 0;
409 }
410 
411 static void
412 dma_test_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
413 {
414 	struct dma_test_task *task = event_ctx;
415 
416 	if (type == SPDK_BDEV_EVENT_REMOVE) {
417 		task->is_draining = true;
418 	}
419 }
420 
421 static void
422 dma_test_bdev_dummy_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
423 			     void *event_ctx)
424 {
425 }
426 
427 static void
428 dma_test_task_run(void *ctx)
429 {
430 	struct dma_test_task *task = ctx;
431 	uint32_t i;
432 	int rc = 0;
433 
434 	for (i = 0; i < g_queue_depth && rc == 0; i++) {
435 		rc = dma_test_submit_io(&task->reqs[i]);
436 	}
437 }
438 
439 static void
440 dma_test_drain_task(void *ctx)
441 {
442 	struct dma_test_task *task = ctx;
443 
444 	task->is_draining = true;
445 }
446 
447 static void
448 dma_test_shutdown_cb(void)
449 {
450 	struct dma_test_task *task;
451 
452 	spdk_poller_unregister(&g_runtime_poller);
453 
454 	TAILQ_FOREACH(task, &g_tasks, link) {
455 		spdk_thread_send_msg(task->thread, dma_test_drain_task, task);
456 	}
457 }
458 
459 static int
460 dma_test_run_time_poller(void *ctx)
461 {
462 	g_run_count++;
463 
464 	if (g_run_count < g_run_time_sec) {
465 		if (isatty(STDOUT_FILENO)) {
466 			print_periodic_stats();
467 		}
468 	} else {
469 		dma_test_shutdown_cb();
470 	}
471 
472 	return SPDK_POLLER_BUSY;
473 }
474 
475 static void
476 dma_test_construct_task_done(void *ctx)
477 {
478 	struct dma_test_task *task;
479 
480 	assert(g_num_construct_tasks > 0);
481 	--g_num_construct_tasks;
482 
483 	if (g_num_construct_tasks != 0) {
484 		return;
485 	}
486 
487 	if (g_run_rc) {
488 		fprintf(stderr, "Initialization failed with error %d\n", g_run_rc);
489 		spdk_app_stop(g_run_rc);
490 		return;
491 	}
492 
493 	g_runtime_poller = spdk_poller_register_named(dma_test_run_time_poller, NULL, 1 * 1000 * 1000,
494 			   "dma_test_run_time_poller");
495 	if (!g_runtime_poller) {
496 		fprintf(stderr, "Failed to run timer\n");
497 		spdk_app_stop(-1);
498 		return;
499 	}
500 
501 	printf("Initialization complete, running %s IO for %u sec on %u cores\n", g_rw_mode_str,
502 	       g_run_time_sec, spdk_env_get_core_count());
503 	g_start_tsc = spdk_get_ticks();
504 	TAILQ_FOREACH(task, &g_tasks, link) {
505 		spdk_thread_send_msg(task->thread, dma_test_task_run, task);
506 	}
507 }
508 
509 static void
510 dma_test_construct_task_on_thread(void *ctx)
511 {
512 	struct dma_test_task *task = ctx;
513 	int rc;
514 
515 	rc = spdk_bdev_open_ext(task->bdev_name, true, dma_test_bdev_event_cb, task, &task->desc);
516 	if (rc) {
517 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
518 		g_run_rc = rc;
519 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
520 		return;
521 	}
522 
523 	task->channel = spdk_bdev_get_io_channel(task->desc);
524 	if (!task->channel) {
525 		spdk_bdev_close(task->desc);
526 		task->desc = NULL;
527 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
528 		g_run_rc = rc;
529 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
530 		return;
531 	}
532 
533 	task->max_offset_in_ios = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(
534 					  task->desc)) / task->num_blocks_per_io;
535 
536 	spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, task);
537 }
538 
539 static bool
540 dma_test_check_bdev_supports_rdma_memory_domain(struct spdk_bdev *bdev)
541 {
542 	struct spdk_memory_domain **bdev_domains;
543 	int bdev_domains_count, bdev_domains_count_tmp, i;
544 	bool rdma_domain_supported = false;
545 
546 	bdev_domains_count = spdk_bdev_get_memory_domains(bdev, NULL, 0);
547 
548 	if (bdev_domains_count < 0) {
549 		fprintf(stderr, "Failed to get bdev memory domains count, rc %d\n", bdev_domains_count);
550 		return false;
551 	} else if (bdev_domains_count == 0) {
552 		fprintf(stderr, "bdev %s doesn't support any memory domains\n", spdk_bdev_get_name(bdev));
553 		return false;
554 	}
555 
556 	fprintf(stdout, "bdev %s reports %d memory domains\n", spdk_bdev_get_name(bdev),
557 		bdev_domains_count);
558 
559 	bdev_domains = calloc((size_t)bdev_domains_count, sizeof(*bdev_domains));
560 	if (!bdev_domains) {
561 		fprintf(stderr, "Failed to allocate memory domains\n");
562 		return false;
563 	}
564 
565 	bdev_domains_count_tmp = spdk_bdev_get_memory_domains(bdev, bdev_domains, bdev_domains_count);
566 	if (bdev_domains_count_tmp != bdev_domains_count) {
567 		fprintf(stderr, "Unexpected bdev domains return value %d\n", bdev_domains_count_tmp);
568 		return false;
569 	}
570 
571 	for (i = 0; i < bdev_domains_count; i++) {
572 		if (spdk_memory_domain_get_dma_device_type(bdev_domains[i]) == SPDK_DMA_DEVICE_TYPE_RDMA) {
573 			/* Bdev supports memory domain of RDMA type, we can try to submit IO request to it using
574 			 * bdev ext API */
575 			rdma_domain_supported = true;
576 			break;
577 		}
578 	}
579 
580 	fprintf(stdout, "bdev %s %s RDMA memory domain\n", spdk_bdev_get_name(bdev),
581 		rdma_domain_supported ? "supports" : "doesn't support");
582 	free(bdev_domains);
583 
584 	return rdma_domain_supported;
585 }
586 
587 static int
588 allocate_task(uint32_t core, const char *bdev_name)
589 {
590 	char thread_name[32];
591 	struct spdk_cpuset cpu_set;
592 	uint32_t i;
593 	struct dma_test_task *task;
594 	struct dma_test_req *req;
595 
596 	task = calloc(1, sizeof(*task));
597 	if (!task) {
598 		fprintf(stderr, "Failed to allocate per thread task\n");
599 		return -ENOMEM;
600 	}
601 
602 	TAILQ_INSERT_TAIL(&g_tasks, task, link);
603 
604 	task->reqs = calloc(g_queue_depth, sizeof(*task->reqs));
605 	if (!task->reqs) {
606 		fprintf(stderr, "Failed to allocate requests\n");
607 		return -ENOMEM;
608 	}
609 
610 	for (i = 0; i < g_queue_depth; i++) {
611 		req = &task->reqs[i];
612 		req->task = task;
613 		req->iov.iov_len = g_io_size;
614 		req->iov.iov_base = malloc(req->iov.iov_len);
615 		if (!req->iov.iov_base) {
616 			fprintf(stderr, "Failed to allocate request data buffer\n");
617 			return -ENOMEM;
618 		}
619 		memset(req->iov.iov_base, 0xc, req->iov.iov_len);
620 		req->io_opts.size = sizeof(req->io_opts);
621 		req->io_opts.memory_domain = g_domain;
622 		req->io_opts.memory_domain_ctx = req;
623 	}
624 
625 	snprintf(thread_name, 32, "task_%u", core);
626 	spdk_cpuset_zero(&cpu_set);
627 	spdk_cpuset_set_cpu(&cpu_set, core, true);
628 	task->thread = spdk_thread_create(thread_name, &cpu_set);
629 	if (!task->thread) {
630 		fprintf(stderr, "Failed to create SPDK thread, core %u, cpu_mask %s\n", core,
631 			spdk_cpuset_fmt(&cpu_set));
632 		return -ENOMEM;
633 	}
634 
635 	task->seed = core;
636 	task->lcore = core;
637 	task->bdev_name = bdev_name;
638 	task->is_random = g_is_random;
639 	task->rw_percentage = g_rw_percentage;
640 	task->num_blocks_per_io = g_num_blocks_per_io;
641 	task->stats.min_tsc = UINT64_MAX;
642 
643 	return 0;
644 }
645 
646 static void
647 destroy_task(struct dma_test_task *task)
648 {
649 	struct dma_test_req *req;
650 	uint32_t i;
651 
652 	for (i = 0; i < g_queue_depth; i++) {
653 		req = &task->reqs[i];
654 		if (req->mr) {
655 			ibv_dereg_mr(req->mr);
656 		}
657 		free(req->iov.iov_base);
658 	}
659 	free(task->reqs);
660 	TAILQ_REMOVE(&g_tasks, task, link);
661 	free(task);
662 }
663 
664 static void
665 destroy_tasks(void)
666 {
667 	struct dma_test_task *task, *tmp_task;
668 
669 	TAILQ_FOREACH_SAFE(task, &g_tasks, link, tmp_task) {
670 		destroy_task(task);
671 	}
672 }
673 
674 static int
675 verify_tasks(void)
676 {
677 	struct dma_test_task *task;
678 	uint64_t total_requests = 0;
679 	uint64_t num_translations = 0;
680 	uint64_t num_pull_push = 0;
681 	uint64_t num_memzero = 0;
682 	int rc = 0;
683 
684 	if (!g_test_ops) {
685 		/* No specific ops were requested, nothing to check */
686 		return rc;
687 	}
688 
689 	TAILQ_FOREACH(task, &g_tasks, link) {
690 		total_requests += task->stats.io_completed;
691 		num_translations += task->num_translations;
692 		num_pull_push += task->num_pull_push;
693 		num_memzero += task->num_mem_zero;
694 	}
695 
696 	if (g_test_ops & DMA_TEST_DOMAIN_OP_TRANSLATE) {
697 		if (num_translations == 0) {
698 			fprintf(stderr, "Requested \"translate\" operation, but it was not executed\n");
699 			rc = -EINVAL;
700 		}
701 	}
702 	if (g_test_ops & DMA_TEST_DOMAIN_OP_PULL_PUSH) {
703 		if (num_pull_push == 0) {
704 			fprintf(stderr, "Requested \"pull_push\" operation, but it was not executed\n");
705 			rc = -EINVAL;
706 		}
707 	}
708 	if (g_test_ops & DMA_TEST_DOMAIN_OP_MEMZERO) {
709 		if (num_memzero == 0) {
710 			fprintf(stderr, "Requested \"memzero\" operation, but it was not executed\n");
711 			rc = -EINVAL;
712 		}
713 	}
714 
715 	/* bdev request can be split, so the total number of pull_push +translate operations
716 	 * can be bigger than total_number of requests */
717 	if (num_translations + num_pull_push + num_memzero < total_requests) {
718 		fprintf(stderr,
719 			"Operations number mismatch: translate %"PRIu64", pull_push %"PRIu64", mem_zero %"PRIu64" expected total %"PRIu64"\n",
720 			num_translations, num_pull_push, num_memzero, total_requests);
721 		rc = -EINVAL;
722 	} else {
723 		fprintf(stdout,
724 			"Total operations: %"PRIu64", translate %"PRIu64" pull_push %"PRIu64" memzero %"PRIu64"\n",
725 			total_requests, num_translations, num_pull_push, num_memzero);
726 	}
727 
728 	return rc;
729 }
730 
731 static void
732 dma_test_start(void *arg)
733 {
734 	struct spdk_bdev_desc *desc;
735 	struct spdk_bdev *bdev;
736 	struct dma_test_task *task;
737 	uint32_t block_size, i;
738 	int rc;
739 
740 	rc = spdk_bdev_open_ext(g_bdev_name, true, dma_test_bdev_dummy_event_cb, NULL, &desc);
741 	if (rc) {
742 		fprintf(stderr, "Can't find bdev %s\n", g_bdev_name);
743 		spdk_app_stop(-ENODEV);
744 		return;
745 	}
746 	bdev = spdk_bdev_desc_get_bdev(desc);
747 	/* This function checks if bdev supports memory domains. Test is not failed if there are
748 	 * no memory domains since bdev layer can pull/push data */
749 	if (!dma_test_check_bdev_supports_rdma_memory_domain(bdev) && g_force_memory_domains_support) {
750 		fprintf(stderr, "Test aborted due to \"-f\" (force memory domains support) option\n");
751 		spdk_bdev_close(desc);
752 		spdk_app_stop(-ENODEV);
753 		return;
754 	}
755 
756 	g_main_thread = spdk_get_thread();
757 
758 	block_size = spdk_bdev_get_block_size(bdev);
759 	if (g_io_size < block_size || g_io_size % block_size != 0) {
760 		fprintf(stderr, "Invalid io_size %u requested, bdev block size %u\n", g_io_size, block_size);
761 		spdk_bdev_close(desc);
762 		spdk_app_stop(-EINVAL);
763 		return;
764 	}
765 	g_num_blocks_per_io = g_io_size / block_size;
766 
767 	/* Create a memory domain to represent the source memory domain.
768 	 * Since we don't actually have a remote memory domain in this test, this will describe memory
769 	 * on the local system and the translation to the destination memory domain will be trivial.
770 	 * But this at least allows us to demonstrate the flow and test the functionality. */
771 	rc = spdk_memory_domain_create(&g_domain, SPDK_DMA_DEVICE_TYPE_RDMA, NULL, "test_dma");
772 	if (rc != 0) {
773 		spdk_bdev_close(desc);
774 		spdk_app_stop(rc);
775 		return;
776 	}
777 	spdk_memory_domain_set_translation(g_domain, dma_test_translate_memory_cb);
778 	spdk_memory_domain_set_pull(g_domain, dma_test_pull_memory_cb);
779 	spdk_memory_domain_set_push(g_domain, dma_test_push_memory_cb);
780 	spdk_memory_domain_set_memzero(g_domain, dma_test_memzero_cb);
781 
782 	SPDK_ENV_FOREACH_CORE(i) {
783 		rc = allocate_task(i, g_bdev_name);
784 		if (rc) {
785 			destroy_tasks();
786 			spdk_bdev_close(desc);
787 			spdk_app_stop(rc);
788 			return;
789 		}
790 		g_num_construct_tasks++;
791 		g_num_complete_tasks++;
792 	}
793 
794 	TAILQ_FOREACH(task, &g_tasks, link) {
795 		spdk_thread_send_msg(task->thread, dma_test_construct_task_on_thread, task);
796 	}
797 
798 	spdk_bdev_close(desc);
799 }
800 
801 static void
802 print_usage(void)
803 {
804 	printf(" -b <bdev>         bdev name for test\n");
805 	printf(" -f                force memory domains support - abort test if bdev doesn't report memory domains\n");
806 	printf(" -q <val>          io depth\n");
807 	printf(" -o <val>          io size in bytes\n");
808 	printf(" -t <val>          run time in seconds\n");
809 	printf(" -x <op,op>        Comma separated memory domain operations expected in the test. Values are \"translate\" and \"pull_push\"\n");
810 	printf(" -w <str>          io pattern (read, write, randread, randwrite, randrw)\n");
811 	printf(" -M <0-100>        rw percentage (100 for reads, 0 for writes)\n");
812 }
813 
814 static int
815 parse_expected_ops(const char *_str)
816 {
817 	char *str = strdup(_str);
818 	char *tok;
819 	int rc = 0;
820 
821 	if (!str) {
822 		fprintf(stderr, "Failed to dup args\n");
823 		return -ENOMEM;
824 	}
825 
826 	tok = strtok(str, ",");
827 	while (tok) {
828 		if (strcmp(tok, "translate") == 0) {
829 			g_test_ops |= DMA_TEST_DOMAIN_OP_TRANSLATE;
830 		} else if (strcmp(tok, "pull_push") == 0) {
831 			g_test_ops |= DMA_TEST_DOMAIN_OP_PULL_PUSH;
832 		} else if (strcmp(tok, "memzero") == 0) {
833 			g_test_ops |= DMA_TEST_DOMAIN_OP_MEMZERO;
834 		} else {
835 			fprintf(stderr, "Unknown value %s\n", tok);
836 			rc = -EINVAL;
837 			break;
838 		}
839 		tok = strtok(NULL, ",");
840 	}
841 
842 	free(str);
843 
844 	if (g_test_ops == 0 || rc) {
845 		fprintf(stderr, "-e \"%s\" specified but nothing was parsed\n", _str);
846 		return -EINVAL;
847 	}
848 
849 	return rc;
850 }
851 
852 static int
853 parse_arg(int ch, char *arg)
854 {
855 	long tmp;
856 
857 	switch (ch) {
858 	case 'q':
859 	case 'o':
860 	case 't':
861 	case 'M':
862 		tmp = spdk_strtol(arg, 10);
863 		if (tmp < 0) {
864 			fprintf(stderr, "Invalid option %c value %s\n", ch, arg);
865 			return 1;
866 		}
867 
868 		switch (ch) {
869 		case 'q':
870 			g_queue_depth = (uint32_t) tmp;
871 			break;
872 		case 'o':
873 			g_io_size = (uint32_t) tmp;
874 			break;
875 		case 't':
876 			g_run_time_sec = (uint32_t) tmp;
877 			break;
878 		case 'M':
879 			g_rw_percentage = (uint32_t) tmp;
880 			break;
881 		}
882 		break;
883 	case 'w':
884 		g_rw_mode_str = arg;
885 		break;
886 	case 'b':
887 		g_bdev_name = arg;
888 		break;
889 	case 'f':
890 		g_force_memory_domains_support = true;
891 		break;
892 	case 'x':
893 		if (parse_expected_ops(arg)) {
894 			return 1;
895 		}
896 		break;
897 	default:
898 		fprintf(stderr, "Unknown option %c\n", ch);
899 		return 1;
900 	}
901 
902 	return 0;
903 }
904 
905 static int
906 verify_args(void)
907 {
908 	const char *rw_mode = g_rw_mode_str;
909 
910 	if (g_queue_depth == 0) {
911 		fprintf(stderr, "queue depth (-q) is not set\n");
912 		return 1;
913 	}
914 	if (g_io_size == 0) {
915 		fprintf(stderr, "io size (-o) is not set\n");
916 		return 1;
917 	}
918 	if (g_run_time_sec == 0) {
919 		fprintf(stderr, "test run time (-t) is not set\n");
920 		return 1;
921 	}
922 	if (!rw_mode) {
923 		fprintf(stderr, "io pattern (-w) is not set\n");
924 		return 1;
925 	}
926 	if (strncmp(rw_mode, "rand", 4) == 0) {
927 		g_is_random = true;
928 		rw_mode = &rw_mode[4];
929 	}
930 	if (strcmp(rw_mode, "read") == 0 || strcmp(rw_mode, "write") == 0) {
931 		if (g_rw_percentage > 0) {
932 			fprintf(stderr, "Ignoring -M option\n");
933 		}
934 		g_rw_percentage = strcmp(rw_mode, "read") == 0 ? 100 : 0;
935 	} else if (strcmp(rw_mode, "rw") == 0) {
936 		if (g_rw_percentage < 0 || g_rw_percentage > 100) {
937 			fprintf(stderr, "Invalid -M value (%d) must be 0..100\n", g_rw_percentage);
938 			return 1;
939 		}
940 	} else {
941 		fprintf(stderr, "io pattern (-w) one of [read, write, randread, randwrite, rw, randrw]\n");
942 		return 1;
943 	}
944 	if (!g_bdev_name) {
945 		fprintf(stderr, "bdev name (-b) is not set\n");
946 		return 1;
947 	}
948 
949 	return 0;
950 }
951 
952 int
953 main(int argc, char **argv)
954 {
955 	struct spdk_app_opts opts = {};
956 	int rc;
957 
958 	spdk_app_opts_init(&opts, sizeof(opts));
959 	opts.name = "test_dma";
960 	opts.shutdown_cb = dma_test_shutdown_cb;
961 
962 	rc = spdk_app_parse_args(argc, argv, &opts, "b:fq:o:t:x:w:M:", NULL, parse_arg, print_usage);
963 	if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) {
964 		exit(rc);
965 	}
966 
967 	rc = verify_args();
968 	if (rc) {
969 		exit(rc);
970 	}
971 
972 	rc = spdk_app_start(&opts, dma_test_start, NULL);
973 	if (rc == 0) {
974 		rc = verify_tasks();
975 	}
976 	destroy_tasks();
977 	spdk_app_fini();
978 
979 	return rc;
980 }
981