xref: /spdk/test/dma/test_dma/test_dma.c (revision 8dd1cd2104ea4001e4a0da2a4851ccd62c82f8e8)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  */
4 
5 #include "spdk/stdinc.h"
6 
7 #include "spdk/dma.h"
8 #include "spdk/bdev.h"
9 #include "spdk/env.h"
10 #include "spdk/event.h"
11 #include "spdk/likely.h"
12 #include "spdk/string.h"
13 #include "spdk/util.h"
14 
15 #include <infiniband/verbs.h>
16 
17 struct dma_test_task;
18 
19 struct dma_test_req {
20 	struct iovec iov;
21 	struct spdk_bdev_ext_io_opts io_opts;
22 	uint64_t submit_tsc;
23 	struct ibv_mr *mr;
24 	struct dma_test_task *task;
25 };
26 
27 struct dma_test_task_stats {
28 	uint64_t io_completed;
29 	uint64_t total_tsc;
30 	uint64_t min_tsc;
31 	uint64_t max_tsc;
32 };
33 
34 struct dma_test_task {
35 	struct spdk_bdev_desc *desc;
36 	struct spdk_io_channel *channel;
37 	uint64_t cur_io_offset;
38 	uint64_t max_offset_in_ios;
39 	uint64_t num_blocks_per_io;
40 	int rw_percentage;
41 	uint32_t seed;
42 	uint32_t io_inflight;
43 	struct dma_test_task_stats stats;
44 	struct dma_test_task_stats last_stats;
45 	bool is_draining;
46 	bool is_random;
47 	struct dma_test_req *reqs;
48 	struct spdk_thread *thread;
49 	const char *bdev_name;
50 	uint64_t num_translations;
51 	uint64_t num_pull_push;
52 	uint64_t num_mem_zero;
53 	uint32_t lcore;
54 
55 	TAILQ_ENTRY(dma_test_task) link;
56 };
57 
58 struct dma_test_data_cpl_ctx {
59 	spdk_memory_domain_data_cpl_cb data_cpl;
60 	void *data_cpl_arg;
61 };
62 
63 enum dma_test_domain_ops {
64 	DMA_TEST_DOMAIN_OP_TRANSLATE = 1u << 0,
65 	DMA_TEST_DOMAIN_OP_PULL_PUSH = 1u << 1,
66 	DMA_TEST_DOMAIN_OP_MEMZERO = 1u << 2,
67 };
68 
69 TAILQ_HEAD(, dma_test_task) g_tasks = TAILQ_HEAD_INITIALIZER(g_tasks);
70 
71 /* User's input */
72 static char *g_bdev_name;
73 static const char *g_rw_mode_str;
74 static int g_rw_percentage = -1;
75 static uint32_t g_queue_depth;
76 static uint32_t g_io_size;
77 static uint32_t g_run_time_sec;
78 static uint32_t g_run_count;
79 static uint32_t g_test_ops;
80 static bool g_is_random;
81 static bool g_force_memory_domains_support;
82 
83 static struct spdk_thread *g_main_thread;
84 static struct spdk_poller *g_runtime_poller;
85 static struct spdk_memory_domain *g_domain;
86 static uint64_t g_num_blocks_per_io;
87 static uint32_t g_num_construct_tasks;
88 static uint32_t g_num_complete_tasks;
89 static uint64_t g_start_tsc;
90 static int g_run_rc;
91 
92 static void destroy_tasks(void);
93 static int dma_test_submit_io(struct dma_test_req *req);
94 
95 static void
96 print_total_stats(void)
97 {
98 	struct dma_test_task *task;
99 	uint64_t tsc_rate = spdk_get_ticks_hz();
100 	uint64_t test_time_usec = (spdk_get_ticks() - g_start_tsc) * SPDK_SEC_TO_USEC / tsc_rate;
101 	uint64_t total_tsc = 0, total_io_completed = 0;
102 	double task_iops, task_bw, task_min_lat, task_avg_lat, task_max_lat;
103 	double total_iops = 0, total_bw = 0, total_min_lat = (double)UINT64_MAX, total_max_lat = 0,
104 	       total_avg_lat;
105 
106 	printf("==========================================================================\n");
107 	printf("%*s\n", 55, "Latency [us]");
108 	printf("%*s %10s %10s %10s %10s\n", 19, "IOPS", "MiB/s", "Average", "min", "max");
109 
110 	TAILQ_FOREACH(task, &g_tasks, link) {
111 		if (!task->stats.io_completed) {
112 			continue;
113 		}
114 		task_iops = (double)task->stats.io_completed * SPDK_SEC_TO_USEC / test_time_usec;
115 		task_bw = task_iops * g_io_size / (1024 * 1024);
116 		task_avg_lat = (double)task->stats.total_tsc / task->stats.io_completed * SPDK_SEC_TO_USEC /
117 			       tsc_rate;
118 		task_min_lat = (double)task->stats.min_tsc * SPDK_SEC_TO_USEC / tsc_rate;
119 		task_max_lat = (double)task->stats.max_tsc * SPDK_SEC_TO_USEC / tsc_rate;
120 
121 		total_iops += task_iops;
122 		total_bw += task_bw;
123 		total_io_completed += task->stats.io_completed;
124 		total_tsc += task->stats.total_tsc;
125 		if (task_min_lat < total_min_lat) {
126 			total_min_lat = task_min_lat;
127 		}
128 		if (task_max_lat > total_max_lat) {
129 			total_max_lat = task_max_lat;
130 		}
131 		printf("Core %2u: %10.2f %10.2f %10.2f %10.2f %10.2f\n",
132 		       task->lcore, task_iops, task_bw, task_avg_lat, task_min_lat, task_max_lat);
133 	}
134 
135 	if (total_io_completed) {
136 		total_avg_lat = (double)total_tsc / total_io_completed  * SPDK_SEC_TO_USEC / tsc_rate;
137 		printf("==========================================================================\n");
138 		printf("%-*s %10.2f %10.2f %10.2f %10.2f %10.2f\n",
139 		       8, "Total  :", total_iops, total_bw, total_avg_lat, total_min_lat, total_max_lat);
140 		printf("\n");
141 	}
142 }
143 
144 static void
145 print_periodic_stats(void)
146 {
147 	struct dma_test_task *task;
148 	uint64_t io_last_sec = 0, tsc_last_sec = 0;
149 	double lat_last_sec, bw_last_sec;
150 
151 	TAILQ_FOREACH(task, &g_tasks, link) {
152 		io_last_sec += task->stats.io_completed - task->last_stats.io_completed;
153 		tsc_last_sec += task->stats.total_tsc - task->last_stats.total_tsc;
154 		memcpy(&task->last_stats, &task->stats, sizeof(task->stats));
155 	}
156 
157 	printf("Running %3u/%-3u sec", g_run_count, g_run_time_sec);
158 	if (io_last_sec) {
159 		lat_last_sec =	(double)tsc_last_sec / io_last_sec * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
160 		bw_last_sec = (double)io_last_sec * g_io_size / (1024 * 1024);
161 		printf(" IOPS: %-8"PRIu64" BW: %-6.2f [MiB/s] avg.lat %-5.2f [us]",
162 		       io_last_sec, bw_last_sec, lat_last_sec);
163 	}
164 
165 	printf("\r");
166 	fflush(stdout);
167 }
168 
169 static void
170 dma_test_task_complete(void *ctx)
171 {
172 	assert(g_num_complete_tasks > 0);
173 
174 	if (--g_num_complete_tasks == 0) {
175 		spdk_poller_unregister(&g_runtime_poller);
176 		print_total_stats();
177 		spdk_app_stop(g_run_rc);
178 	}
179 }
180 
181 static inline void
182 dma_test_check_and_signal_task_done(struct dma_test_task *task)
183 {
184 	if (task->io_inflight == 0) {
185 		spdk_put_io_channel(task->channel);
186 		spdk_bdev_close(task->desc);
187 		spdk_thread_send_msg(g_main_thread, dma_test_task_complete, task);
188 	}
189 }
190 
191 static inline void
192 dma_test_task_update_stats(struct dma_test_task *task, uint64_t submit_tsc)
193 {
194 	uint64_t tsc_diff = spdk_get_ticks() - submit_tsc;
195 
196 	task->stats.io_completed++;
197 	task->stats.total_tsc += tsc_diff;
198 	if (spdk_unlikely(tsc_diff < task->stats.min_tsc)) {
199 		task->stats.min_tsc = tsc_diff;
200 	}
201 	if (spdk_unlikely(tsc_diff > task->stats.max_tsc)) {
202 		task->stats.max_tsc = tsc_diff;
203 	}
204 }
205 
206 static void
207 dma_test_bdev_io_completion_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
208 {
209 	struct dma_test_req *req = cb_arg;
210 	struct dma_test_task *task = req->task;
211 
212 	assert(task->io_inflight > 0);
213 	--task->io_inflight;
214 	dma_test_task_update_stats(task, req->submit_tsc);
215 
216 	if (!success) {
217 		if (!g_run_rc) {
218 			fprintf(stderr, "IO completed with error\n");
219 			g_run_rc = -1;
220 		}
221 		task->is_draining = true;
222 	}
223 
224 	spdk_bdev_free_io(bdev_io);
225 
226 	if (spdk_unlikely(task->is_draining)) {
227 		dma_test_check_and_signal_task_done(task);
228 		return;
229 	}
230 
231 	dma_test_submit_io(req);
232 }
233 
234 static inline uint64_t
235 dma_test_get_offset_in_ios(struct dma_test_task *task)
236 {
237 	uint64_t offset;
238 
239 	if (task->is_random) {
240 		offset = rand_r(&task->seed) % task->max_offset_in_ios;
241 	} else {
242 		offset = task->cur_io_offset++;
243 		if (spdk_unlikely(task->cur_io_offset == task->max_offset_in_ios)) {
244 			task->cur_io_offset = 0;
245 		}
246 	}
247 
248 	return offset;
249 }
250 
251 static inline bool
252 dma_test_task_is_read(struct dma_test_task *task)
253 {
254 	if (task->rw_percentage == 100) {
255 		return true;
256 	}
257 	if (task->rw_percentage != 0 && (rand_r(&task->seed) % 100) <  task->rw_percentage) {
258 		return true;
259 	}
260 	return false;
261 }
262 
263 static void
264 dma_test_data_cpl(void *ctx)
265 {
266 	struct dma_test_data_cpl_ctx *cpl_ctx = ctx;
267 
268 	cpl_ctx->data_cpl(cpl_ctx->data_cpl_arg, 0);
269 	free(cpl_ctx);
270 }
271 
272 static int
273 dma_test_copy_memory(struct dma_test_req *req, struct iovec *dst_iov, uint32_t dst_iovcnt,
274 		     struct iovec *src_iov, uint32_t src_iovcnt, spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
275 {
276 	struct dma_test_data_cpl_ctx *cpl_ctx;
277 
278 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
279 	if (!cpl_ctx) {
280 		return -ENOMEM;
281 	}
282 
283 	cpl_ctx->data_cpl = cpl_cb;
284 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
285 
286 	spdk_iovcpy(src_iov, src_iovcnt, dst_iov, dst_iovcnt);
287 	req->task->num_pull_push++;
288 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
289 
290 	return 0;
291 }
292 
293 static int
294 dma_test_push_memory_cb(struct spdk_memory_domain *dst_domain,
295 			void *dst_domain_ctx,
296 			struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt,
297 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
298 {
299 	struct dma_test_req *req = dst_domain_ctx;
300 
301 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
302 }
303 
304 static int
305 dma_test_pull_memory_cb(struct spdk_memory_domain *src_domain,
306 			void *src_domain_ctx,
307 			struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, uint32_t dst_iovcnt,
308 			spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
309 {
310 	struct dma_test_req *req = src_domain_ctx;
311 
312 	return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg);
313 }
314 
315 static int
316 dma_test_memzero_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
317 		    struct iovec *iov, uint32_t iovcnt,
318 		    spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
319 {
320 	struct dma_test_req *req = src_domain_ctx;
321 	struct dma_test_data_cpl_ctx *cpl_ctx;
322 	uint32_t i;
323 
324 	cpl_ctx = calloc(1, sizeof(*cpl_ctx));
325 	if (!cpl_ctx) {
326 		return -ENOMEM;
327 	}
328 
329 	cpl_ctx->data_cpl = cpl_cb;
330 	cpl_ctx->data_cpl_arg = cpl_cb_arg;
331 
332 	for (i = 0; i < iovcnt; i++) {
333 		memset(iov[i].iov_base, 0, iov[i].iov_len);
334 	}
335 	req->task->num_mem_zero++;
336 
337 	spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx);
338 
339 	return 0;
340 }
341 
342 
343 static int
344 dma_test_translate_memory_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
345 			     struct spdk_memory_domain *dst_domain, struct spdk_memory_domain_translation_ctx *dst_domain_ctx,
346 			     void *addr, size_t len, struct spdk_memory_domain_translation_result *result)
347 {
348 	struct dma_test_req *req = src_domain_ctx;
349 	struct ibv_qp *dst_domain_qp = (struct ibv_qp *)dst_domain_ctx->rdma.ibv_qp;
350 
351 	if (spdk_unlikely(!req->mr)) {
352 		req->mr = ibv_reg_mr(dst_domain_qp->pd, addr, len, IBV_ACCESS_LOCAL_WRITE |
353 				     IBV_ACCESS_REMOTE_READ |
354 				     IBV_ACCESS_REMOTE_WRITE);
355 		if (!req->mr) {
356 			fprintf(stderr, "Failed to register memory region, errno %d\n", errno);
357 			return -1;
358 		}
359 	}
360 
361 	result->iov.iov_base = addr;
362 	result->iov.iov_len = len;
363 	result->iov_count = 1;
364 	result->rdma.lkey = req->mr->lkey;
365 	result->rdma.rkey = req->mr->rkey;
366 	result->dst_domain = dst_domain;
367 
368 	req->task->num_translations++;
369 
370 	return 0;
371 }
372 
373 static int
374 dma_test_submit_io(struct dma_test_req *req)
375 {
376 	struct dma_test_task *task = req->task;
377 	uint64_t offset_in_ios;
378 	int rc;
379 	bool is_read;
380 
381 	offset_in_ios = dma_test_get_offset_in_ios(task);
382 	is_read = dma_test_task_is_read(task);
383 	req->submit_tsc = spdk_get_ticks();
384 	if (is_read) {
385 		rc = spdk_bdev_readv_blocks_ext(task->desc, task->channel, &req->iov, 1,
386 						offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
387 						dma_test_bdev_io_completion_cb, req, &req->io_opts);
388 	} else {
389 		rc = spdk_bdev_writev_blocks_ext(task->desc, task->channel, &req->iov, 1,
390 						 offset_in_ios * task->num_blocks_per_io, task->num_blocks_per_io,
391 						 dma_test_bdev_io_completion_cb, req, &req->io_opts);
392 	}
393 
394 	if (spdk_unlikely(rc)) {
395 		if (!g_run_rc) {
396 			/* log an error only once */
397 			fprintf(stderr, "Failed to submit %s IO, rc %d, stop sending IO\n", is_read ? "read" : "write", rc);
398 			g_run_rc = rc;
399 		}
400 		task->is_draining = true;
401 		dma_test_check_and_signal_task_done(task);
402 		return rc;
403 	}
404 
405 	task->io_inflight++;
406 
407 	return 0;
408 }
409 
410 static void
411 dma_test_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
412 {
413 	struct dma_test_task *task = event_ctx;
414 
415 	if (type == SPDK_BDEV_EVENT_REMOVE) {
416 		task->is_draining = true;
417 	}
418 }
419 
420 static void
421 dma_test_bdev_dummy_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
422 			     void *event_ctx)
423 {
424 }
425 
426 static void
427 dma_test_task_run(void *ctx)
428 {
429 	struct dma_test_task *task = ctx;
430 	uint32_t i;
431 	int rc = 0;
432 
433 	for (i = 0; i < g_queue_depth && rc == 0; i++) {
434 		rc = dma_test_submit_io(&task->reqs[i]);
435 	}
436 }
437 
438 static void
439 dma_test_drain_task(void *ctx)
440 {
441 	struct dma_test_task *task = ctx;
442 
443 	task->is_draining = true;
444 }
445 
446 static void
447 dma_test_shutdown_cb(void)
448 {
449 	struct dma_test_task *task;
450 
451 	spdk_poller_unregister(&g_runtime_poller);
452 
453 	TAILQ_FOREACH(task, &g_tasks, link) {
454 		spdk_thread_send_msg(task->thread, dma_test_drain_task, task);
455 	}
456 }
457 
458 static int
459 dma_test_run_time_poller(void *ctx)
460 {
461 	g_run_count++;
462 
463 	if (g_run_count < g_run_time_sec) {
464 		if (isatty(STDOUT_FILENO)) {
465 			print_periodic_stats();
466 		}
467 	} else {
468 		dma_test_shutdown_cb();
469 	}
470 
471 	return SPDK_POLLER_BUSY;
472 }
473 
474 static void
475 dma_test_construct_task_done(void *ctx)
476 {
477 	struct dma_test_task *task;
478 
479 	assert(g_num_construct_tasks > 0);
480 	--g_num_construct_tasks;
481 
482 	if (g_num_construct_tasks != 0) {
483 		return;
484 	}
485 
486 	if (g_run_rc) {
487 		fprintf(stderr, "Initialization failed with error %d\n", g_run_rc);
488 		spdk_app_stop(g_run_rc);
489 		return;
490 	}
491 
492 	g_runtime_poller = spdk_poller_register_named(dma_test_run_time_poller, NULL, 1 * 1000 * 1000,
493 			   "dma_test_run_time_poller");
494 	if (!g_runtime_poller) {
495 		fprintf(stderr, "Failed to run timer\n");
496 		spdk_app_stop(-1);
497 		return;
498 	}
499 
500 	printf("Initialization complete, running %s IO for %u sec on %u cores\n", g_rw_mode_str,
501 	       g_run_time_sec, spdk_env_get_core_count());
502 	g_start_tsc = spdk_get_ticks();
503 	TAILQ_FOREACH(task, &g_tasks, link) {
504 		spdk_thread_send_msg(task->thread, dma_test_task_run, task);
505 	}
506 }
507 
508 static void
509 dma_test_construct_task_on_thread(void *ctx)
510 {
511 	struct dma_test_task *task = ctx;
512 	int rc;
513 
514 	rc = spdk_bdev_open_ext(task->bdev_name, true, dma_test_bdev_event_cb, task, &task->desc);
515 	if (rc) {
516 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
517 		g_run_rc = rc;
518 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
519 		return;
520 	}
521 
522 	task->channel = spdk_bdev_get_io_channel(task->desc);
523 	if (!task->channel) {
524 		spdk_bdev_close(task->desc);
525 		task->desc = NULL;
526 		fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc);
527 		g_run_rc = rc;
528 		spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL);
529 		return;
530 	}
531 
532 	task->max_offset_in_ios = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev(
533 					  task->desc)) / task->num_blocks_per_io;
534 
535 	spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, task);
536 }
537 
538 static bool
539 dma_test_check_bdev_supports_rdma_memory_domain(struct spdk_bdev *bdev)
540 {
541 	struct spdk_memory_domain **bdev_domains;
542 	int bdev_domains_count, bdev_domains_count_tmp, i;
543 	bool rdma_domain_supported = false;
544 
545 	bdev_domains_count = spdk_bdev_get_memory_domains(bdev, NULL, 0);
546 
547 	if (bdev_domains_count < 0) {
548 		fprintf(stderr, "Failed to get bdev memory domains count, rc %d\n", bdev_domains_count);
549 		return false;
550 	} else if (bdev_domains_count == 0) {
551 		fprintf(stderr, "bdev %s doesn't support any memory domains\n", spdk_bdev_get_name(bdev));
552 		return false;
553 	}
554 
555 	fprintf(stdout, "bdev %s reports %d memory domains\n", spdk_bdev_get_name(bdev),
556 		bdev_domains_count);
557 
558 	bdev_domains = calloc((size_t)bdev_domains_count, sizeof(*bdev_domains));
559 	if (!bdev_domains) {
560 		fprintf(stderr, "Failed to allocate memory domains\n");
561 		return false;
562 	}
563 
564 	bdev_domains_count_tmp = spdk_bdev_get_memory_domains(bdev, bdev_domains, bdev_domains_count);
565 	if (bdev_domains_count_tmp != bdev_domains_count) {
566 		fprintf(stderr, "Unexpected bdev domains return value %d\n", bdev_domains_count_tmp);
567 		return false;
568 	}
569 
570 	for (i = 0; i < bdev_domains_count; i++) {
571 		if (spdk_memory_domain_get_dma_device_type(bdev_domains[i]) == SPDK_DMA_DEVICE_TYPE_RDMA) {
572 			/* Bdev supports memory domain of RDMA type, we can try to submit IO request to it using
573 			 * bdev ext API */
574 			rdma_domain_supported = true;
575 			break;
576 		}
577 	}
578 
579 	fprintf(stdout, "bdev %s %s RDMA memory domain\n", spdk_bdev_get_name(bdev),
580 		rdma_domain_supported ? "supports" : "doesn't support");
581 	free(bdev_domains);
582 
583 	return rdma_domain_supported;
584 }
585 
586 static int
587 allocate_task(uint32_t core, const char *bdev_name)
588 {
589 	char thread_name[32];
590 	struct spdk_cpuset cpu_set;
591 	uint32_t i;
592 	struct dma_test_task *task;
593 	struct dma_test_req *req;
594 
595 	task = calloc(1, sizeof(*task));
596 	if (!task) {
597 		fprintf(stderr, "Failed to allocate per thread task\n");
598 		return -ENOMEM;
599 	}
600 
601 	TAILQ_INSERT_TAIL(&g_tasks, task, link);
602 
603 	task->reqs = calloc(g_queue_depth, sizeof(*task->reqs));
604 	if (!task->reqs) {
605 		fprintf(stderr, "Failed to allocate requests\n");
606 		return -ENOMEM;
607 	}
608 
609 	for (i = 0; i < g_queue_depth; i++) {
610 		req = &task->reqs[i];
611 		req->task = task;
612 		req->iov.iov_len = g_io_size;
613 		req->iov.iov_base = malloc(req->iov.iov_len);
614 		if (!req->iov.iov_base) {
615 			fprintf(stderr, "Failed to allocate request data buffer\n");
616 			return -ENOMEM;
617 		}
618 		memset(req->iov.iov_base, 0xc, req->iov.iov_len);
619 		req->io_opts.size = sizeof(req->io_opts);
620 		req->io_opts.memory_domain = g_domain;
621 		req->io_opts.memory_domain_ctx = req;
622 	}
623 
624 	snprintf(thread_name, 32, "task_%u", core);
625 	spdk_cpuset_zero(&cpu_set);
626 	spdk_cpuset_set_cpu(&cpu_set, core, true);
627 	task->thread = spdk_thread_create(thread_name, &cpu_set);
628 	if (!task->thread) {
629 		fprintf(stderr, "Failed to create SPDK thread, core %u, cpu_mask %s\n", core,
630 			spdk_cpuset_fmt(&cpu_set));
631 		return -ENOMEM;
632 	}
633 
634 	task->seed = core;
635 	task->lcore = core;
636 	task->bdev_name = bdev_name;
637 	task->is_random = g_is_random;
638 	task->rw_percentage = g_rw_percentage;
639 	task->num_blocks_per_io = g_num_blocks_per_io;
640 	task->stats.min_tsc = UINT64_MAX;
641 
642 	return 0;
643 }
644 
645 static void
646 destroy_task(struct dma_test_task *task)
647 {
648 	struct dma_test_req *req;
649 	uint32_t i;
650 
651 	for (i = 0; i < g_queue_depth; i++) {
652 		req = &task->reqs[i];
653 		if (req->mr) {
654 			ibv_dereg_mr(req->mr);
655 		}
656 		free(req->iov.iov_base);
657 	}
658 	free(task->reqs);
659 	TAILQ_REMOVE(&g_tasks, task, link);
660 	free(task);
661 }
662 
663 static void
664 destroy_tasks(void)
665 {
666 	struct dma_test_task *task, *tmp_task;
667 
668 	TAILQ_FOREACH_SAFE(task, &g_tasks, link, tmp_task) {
669 		destroy_task(task);
670 	}
671 }
672 
673 static int
674 verify_tasks(void)
675 {
676 	struct dma_test_task *task;
677 	uint64_t total_requests = 0;
678 	uint64_t num_translations = 0;
679 	uint64_t num_pull_push = 0;
680 	uint64_t num_memzero = 0;
681 	int rc = 0;
682 
683 	if (!g_test_ops) {
684 		/* No specific ops were requested, nothing to check */
685 		return rc;
686 	}
687 
688 	TAILQ_FOREACH(task, &g_tasks, link) {
689 		total_requests += task->stats.io_completed;
690 		num_translations += task->num_translations;
691 		num_pull_push += task->num_pull_push;
692 		num_memzero += task->num_mem_zero;
693 	}
694 
695 	if (g_test_ops & DMA_TEST_DOMAIN_OP_TRANSLATE) {
696 		if (num_translations == 0) {
697 			fprintf(stderr, "Requested \"translate\" operation, but it was not executed\n");
698 			rc = -EINVAL;
699 		}
700 	}
701 	if (g_test_ops & DMA_TEST_DOMAIN_OP_PULL_PUSH) {
702 		if (num_pull_push == 0) {
703 			fprintf(stderr, "Requested \"pull_push\" operation, but it was not executed\n");
704 			rc = -EINVAL;
705 		}
706 	}
707 	if (g_test_ops & DMA_TEST_DOMAIN_OP_MEMZERO) {
708 		if (num_memzero == 0) {
709 			fprintf(stderr, "Requested \"memzero\" operation, but it was not executed\n");
710 			rc = -EINVAL;
711 		}
712 	}
713 
714 	/* bdev request can be split, so the total number of pull_push +translate operations
715 	 * can be bigger than total_number of requests */
716 	if (num_translations + num_pull_push + num_memzero < total_requests) {
717 		fprintf(stderr,
718 			"Operations number mismatch: translate %"PRIu64", pull_push %"PRIu64", mem_zero %"PRIu64" expected total %"PRIu64"\n",
719 			num_translations, num_pull_push, num_memzero, total_requests);
720 		rc = -EINVAL;
721 	} else {
722 		fprintf(stdout,
723 			"Total operations: %"PRIu64", translate %"PRIu64" pull_push %"PRIu64" memzero %"PRIu64"\n",
724 			total_requests, num_translations, num_pull_push, num_memzero);
725 	}
726 
727 	return rc;
728 }
729 
730 static void
731 dma_test_start(void *arg)
732 {
733 	struct spdk_bdev_desc *desc;
734 	struct spdk_bdev *bdev;
735 	struct dma_test_task *task;
736 	uint32_t block_size, i;
737 	int rc;
738 
739 	rc = spdk_bdev_open_ext(g_bdev_name, true, dma_test_bdev_dummy_event_cb, NULL, &desc);
740 	if (rc) {
741 		fprintf(stderr, "Can't find bdev %s\n", g_bdev_name);
742 		spdk_app_stop(-ENODEV);
743 		return;
744 	}
745 	bdev = spdk_bdev_desc_get_bdev(desc);
746 	/* This function checks if bdev supports memory domains. Test is not failed if there are
747 	 * no memory domains since bdev layer can pull/push data */
748 	if (!dma_test_check_bdev_supports_rdma_memory_domain(bdev) && g_force_memory_domains_support) {
749 		fprintf(stderr, "Test aborted due to \"-f\" (force memory domains support) option\n");
750 		spdk_bdev_close(desc);
751 		spdk_app_stop(-ENODEV);
752 		return;
753 	}
754 
755 	g_main_thread = spdk_get_thread();
756 
757 	block_size = spdk_bdev_get_block_size(bdev);
758 	if (g_io_size < block_size || g_io_size % block_size != 0) {
759 		fprintf(stderr, "Invalid io_size %u requested, bdev block size %u\n", g_io_size, block_size);
760 		spdk_bdev_close(desc);
761 		spdk_app_stop(-EINVAL);
762 		return;
763 	}
764 	g_num_blocks_per_io = g_io_size / block_size;
765 
766 	/* Create a memory domain to represent the source memory domain.
767 	 * Since we don't actually have a remote memory domain in this test, this will describe memory
768 	 * on the local system and the translation to the destination memory domain will be trivial.
769 	 * But this at least allows us to demonstrate the flow and test the functionality. */
770 	rc = spdk_memory_domain_create(&g_domain, SPDK_DMA_DEVICE_TYPE_RDMA, NULL, "test_dma");
771 	if (rc != 0) {
772 		spdk_bdev_close(desc);
773 		spdk_app_stop(rc);
774 		return;
775 	}
776 	spdk_memory_domain_set_translation(g_domain, dma_test_translate_memory_cb);
777 	spdk_memory_domain_set_pull(g_domain, dma_test_pull_memory_cb);
778 	spdk_memory_domain_set_push(g_domain, dma_test_push_memory_cb);
779 	spdk_memory_domain_set_memzero(g_domain, dma_test_memzero_cb);
780 
781 	SPDK_ENV_FOREACH_CORE(i) {
782 		rc = allocate_task(i, g_bdev_name);
783 		if (rc) {
784 			destroy_tasks();
785 			spdk_bdev_close(desc);
786 			spdk_app_stop(rc);
787 			return;
788 		}
789 		g_num_construct_tasks++;
790 		g_num_complete_tasks++;
791 	}
792 
793 	TAILQ_FOREACH(task, &g_tasks, link) {
794 		spdk_thread_send_msg(task->thread, dma_test_construct_task_on_thread, task);
795 	}
796 
797 	spdk_bdev_close(desc);
798 }
799 
800 static void
801 print_usage(void)
802 {
803 	printf(" -b <bdev>         bdev name for test\n");
804 	printf(" -f                force memory domains support - abort test if bdev doesn't report memory domains\n");
805 	printf(" -q <val>          io depth\n");
806 	printf(" -o <val>          io size in bytes\n");
807 	printf(" -t <val>          run time in seconds\n");
808 	printf(" -x <op,op>        Comma separated memory domain operations expected in the test. Values are \"translate\" and \"pull_push\"\n");
809 	printf(" -w <str>          io pattern (read, write, randread, randwrite, randrw)\n");
810 	printf(" -M <0-100>        rw percentage (100 for reads, 0 for writes)\n");
811 }
812 
813 static int
814 parse_expected_ops(const char *_str)
815 {
816 	char *str = strdup(_str);
817 	char *tok;
818 	int rc = 0;
819 
820 	if (!str) {
821 		fprintf(stderr, "Failed to dup args\n");
822 		return -ENOMEM;
823 	}
824 
825 	tok = strtok(str, ",");
826 	while (tok) {
827 		if (strcmp(tok, "translate") == 0) {
828 			g_test_ops |= DMA_TEST_DOMAIN_OP_TRANSLATE;
829 		} else if (strcmp(tok, "pull_push") == 0) {
830 			g_test_ops |= DMA_TEST_DOMAIN_OP_PULL_PUSH;
831 		} else if (strcmp(tok, "memzero") == 0) {
832 			g_test_ops |= DMA_TEST_DOMAIN_OP_MEMZERO;
833 		} else {
834 			fprintf(stderr, "Unknown value %s\n", tok);
835 			rc = -EINVAL;
836 			break;
837 		}
838 		tok = strtok(NULL, ",");
839 	}
840 
841 	free(str);
842 
843 	if (g_test_ops == 0 || rc) {
844 		fprintf(stderr, "-e \"%s\" specified but nothing was parsed\n", _str);
845 		return -EINVAL;
846 	}
847 
848 	return rc;
849 }
850 
851 static int
852 parse_arg(int ch, char *arg)
853 {
854 	long tmp;
855 
856 	switch (ch) {
857 	case 'q':
858 	case 'o':
859 	case 't':
860 	case 'M':
861 		tmp = spdk_strtol(arg, 10);
862 		if (tmp < 0) {
863 			fprintf(stderr, "Invalid option %c value %s\n", ch, arg);
864 			return 1;
865 		}
866 
867 		switch (ch) {
868 		case 'q':
869 			g_queue_depth = (uint32_t) tmp;
870 			break;
871 		case 'o':
872 			g_io_size = (uint32_t) tmp;
873 			break;
874 		case 't':
875 			g_run_time_sec = (uint32_t) tmp;
876 			break;
877 		case 'M':
878 			g_rw_percentage = (uint32_t) tmp;
879 			break;
880 		}
881 		break;
882 	case 'w':
883 		g_rw_mode_str = arg;
884 		break;
885 	case 'b':
886 		g_bdev_name = arg;
887 		break;
888 	case 'f':
889 		g_force_memory_domains_support = true;
890 		break;
891 	case 'x':
892 		if (parse_expected_ops(arg)) {
893 			return 1;
894 		}
895 		break;
896 	default:
897 		fprintf(stderr, "Unknown option %c\n", ch);
898 		return 1;
899 	}
900 
901 	return 0;
902 }
903 
904 static int
905 verify_args(void)
906 {
907 	const char *rw_mode = g_rw_mode_str;
908 
909 	if (g_queue_depth == 0) {
910 		fprintf(stderr, "queue depth (-q) is not set\n");
911 		return 1;
912 	}
913 	if (g_io_size == 0) {
914 		fprintf(stderr, "io size (-o) is not set\n");
915 		return 1;
916 	}
917 	if (g_run_time_sec == 0) {
918 		fprintf(stderr, "test run time (-t) is not set\n");
919 		return 1;
920 	}
921 	if (!rw_mode) {
922 		fprintf(stderr, "io pattern (-w) is not set\n");
923 		return 1;
924 	}
925 	if (strncmp(rw_mode, "rand", 4) == 0) {
926 		g_is_random = true;
927 		rw_mode = &rw_mode[4];
928 	}
929 	if (strcmp(rw_mode, "read") == 0 || strcmp(rw_mode, "write") == 0) {
930 		if (g_rw_percentage > 0) {
931 			fprintf(stderr, "Ignoring -M option\n");
932 		}
933 		g_rw_percentage = strcmp(rw_mode, "read") == 0 ? 100 : 0;
934 	} else if (strcmp(rw_mode, "rw") == 0) {
935 		if (g_rw_percentage < 0 || g_rw_percentage > 100) {
936 			fprintf(stderr, "Invalid -M value (%d) must be 0..100\n", g_rw_percentage);
937 			return 1;
938 		}
939 	} else {
940 		fprintf(stderr, "io pattern (-w) one of [read, write, randread, randwrite, rw, randrw]\n");
941 		return 1;
942 	}
943 	if (!g_bdev_name) {
944 		fprintf(stderr, "bdev name (-b) is not set\n");
945 		return 1;
946 	}
947 
948 	return 0;
949 }
950 
951 int
952 main(int argc, char **argv)
953 {
954 	struct spdk_app_opts opts = {};
955 	int rc;
956 
957 	spdk_app_opts_init(&opts, sizeof(opts));
958 	opts.name = "test_dma";
959 	opts.shutdown_cb = dma_test_shutdown_cb;
960 
961 	rc = spdk_app_parse_args(argc, argv, &opts, "b:fq:o:t:x:w:M:", NULL, parse_arg, print_usage);
962 	if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) {
963 		exit(rc);
964 	}
965 
966 	rc = verify_args();
967 	if (rc) {
968 		exit(rc);
969 	}
970 
971 	rc = spdk_app_start(&opts, dma_test_start, NULL);
972 	if (rc == 0) {
973 		rc = verify_tasks();
974 	}
975 	destroy_tasks();
976 	spdk_app_fini();
977 
978 	return rc;
979 }
980