1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021, 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/stdinc.h" 6 7 #include "spdk/dma.h" 8 #include "spdk/bdev.h" 9 #include "spdk/env.h" 10 #include "spdk/event.h" 11 #include "spdk/likely.h" 12 #include "spdk/string.h" 13 #include "spdk/util.h" 14 #include "spdk/md5.h" 15 16 #include <infiniband/verbs.h> 17 18 struct dma_test_task; 19 20 struct dma_test_req { 21 struct iovec *iovs; 22 struct spdk_bdev_ext_io_opts io_opts; 23 uint64_t io_offset; 24 uint64_t submit_tsc; 25 struct ibv_mr *mr; 26 struct dma_test_task *task; 27 void *buffer; 28 uint32_t idx; 29 uint8_t md5_orig[SPDK_MD5DIGEST_LEN]; 30 }; 31 32 struct dma_test_task_stats { 33 uint64_t io_completed; 34 uint64_t total_tsc; 35 uint64_t min_tsc; 36 uint64_t max_tsc; 37 }; 38 39 struct dma_test_task { 40 struct spdk_bdev_desc *desc; 41 struct spdk_io_channel *channel; 42 uint64_t cur_io_offset; 43 uint64_t max_offset_in_ios; 44 uint64_t num_blocks_per_io; 45 uint64_t num_blocks_per_core; 46 int rw_percentage; 47 uint32_t seed; 48 uint32_t io_inflight; 49 struct dma_test_task_stats stats; 50 struct dma_test_task_stats last_stats; 51 bool is_draining; 52 struct dma_test_req *reqs; 53 struct spdk_thread *thread; 54 const char *bdev_name; 55 uint64_t num_translations; 56 uint64_t num_pull_push; 57 uint64_t num_mem_zero; 58 uint32_t lcore; 59 uint32_t idx; /* sequential number of this task */ 60 61 TAILQ_ENTRY(dma_test_task) link; 62 }; 63 64 struct dma_test_data_cpl_ctx { 65 spdk_memory_domain_data_cpl_cb data_cpl; 66 void *data_cpl_arg; 67 }; 68 69 enum dma_test_domain_ops { 70 DMA_TEST_DOMAIN_OP_TRANSLATE = 1u << 0, 71 DMA_TEST_DOMAIN_OP_PULL_PUSH = 1u << 1, 72 DMA_TEST_DOMAIN_OP_MEMZERO = 1u << 2, 73 }; 74 75 TAILQ_HEAD(, dma_test_task) g_tasks = TAILQ_HEAD_INITIALIZER(g_tasks); 76 77 /* User's input */ 78 static char *g_bdev_name; 79 static const char *g_rw_mode_str; 80 static int g_rw_percentage = -1; 81 static uint32_t g_queue_depth; 82 static uint32_t g_io_size; 83 static uint32_t g_run_time_sec; 84 static uint32_t g_run_count; 85 static uint32_t g_test_ops; 86 static uint32_t g_corrupt_mkey_counter; 87 static uint32_t g_iovcnt = 1; 88 static bool g_is_random; 89 static bool g_verify; 90 static bool g_force_memory_domains_support; 91 92 static struct spdk_thread *g_main_thread; 93 static struct spdk_poller *g_runtime_poller; 94 static struct spdk_memory_domain *g_domain; 95 static uint64_t g_num_blocks_per_io; 96 static uint32_t g_num_construct_tasks; 97 static uint32_t g_num_complete_tasks; 98 static uint64_t g_start_tsc; 99 static int g_run_rc; 100 101 static void destroy_tasks(void); 102 static int dma_test_submit_io(struct dma_test_req *req); 103 104 static void 105 print_total_stats(void) 106 { 107 struct dma_test_task *task; 108 uint64_t tsc_rate = spdk_get_ticks_hz(); 109 uint64_t test_time_usec = (spdk_get_ticks() - g_start_tsc) * SPDK_SEC_TO_USEC / tsc_rate; 110 uint64_t total_tsc = 0, total_io_completed = 0; 111 double task_iops, task_bw, task_min_lat, task_avg_lat, task_max_lat; 112 double total_iops = 0, total_bw = 0, total_min_lat = (double)UINT64_MAX, total_max_lat = 0, 113 total_avg_lat; 114 115 printf("==========================================================================\n"); 116 printf("%*s\n", 55, "Latency [us]"); 117 printf("%*s %10s %10s %10s %10s\n", 19, "IOPS", "MiB/s", "Average", "min", "max"); 118 119 TAILQ_FOREACH(task, &g_tasks, link) { 120 if (!task->stats.io_completed) { 121 continue; 122 } 123 task_iops = (double)task->stats.io_completed * SPDK_SEC_TO_USEC / test_time_usec; 124 task_bw = task_iops * g_io_size / (1024 * 1024); 125 task_avg_lat = (double)task->stats.total_tsc / task->stats.io_completed * SPDK_SEC_TO_USEC / 126 tsc_rate; 127 task_min_lat = (double)task->stats.min_tsc * SPDK_SEC_TO_USEC / tsc_rate; 128 task_max_lat = (double)task->stats.max_tsc * SPDK_SEC_TO_USEC / tsc_rate; 129 130 total_iops += task_iops; 131 total_bw += task_bw; 132 total_io_completed += task->stats.io_completed; 133 total_tsc += task->stats.total_tsc; 134 if (task_min_lat < total_min_lat) { 135 total_min_lat = task_min_lat; 136 } 137 if (task_max_lat > total_max_lat) { 138 total_max_lat = task_max_lat; 139 } 140 printf("Core %2u: %10.2f %10.2f %10.2f %10.2f %10.2f\n", 141 task->lcore, task_iops, task_bw, task_avg_lat, task_min_lat, task_max_lat); 142 } 143 144 if (total_io_completed) { 145 total_avg_lat = (double)total_tsc / total_io_completed * SPDK_SEC_TO_USEC / tsc_rate; 146 printf("==========================================================================\n"); 147 printf("%-*s %10.2f %10.2f %10.2f %10.2f %10.2f\n", 148 8, "Total :", total_iops, total_bw, total_avg_lat, total_min_lat, total_max_lat); 149 printf("\n"); 150 } 151 } 152 153 static void 154 print_periodic_stats(void) 155 { 156 struct dma_test_task *task; 157 uint64_t io_last_sec = 0, tsc_last_sec = 0; 158 double lat_last_sec, bw_last_sec; 159 160 TAILQ_FOREACH(task, &g_tasks, link) { 161 io_last_sec += task->stats.io_completed - task->last_stats.io_completed; 162 tsc_last_sec += task->stats.total_tsc - task->last_stats.total_tsc; 163 memcpy(&task->last_stats, &task->stats, sizeof(task->stats)); 164 } 165 166 printf("Running %3u/%-3u sec", g_run_count, g_run_time_sec); 167 if (io_last_sec) { 168 lat_last_sec = (double)tsc_last_sec / io_last_sec * SPDK_SEC_TO_USEC / spdk_get_ticks_hz(); 169 bw_last_sec = (double)io_last_sec * g_io_size / (1024 * 1024); 170 printf(" IOPS: %-8"PRIu64" BW: %-6.2f [MiB/s] avg.lat %-5.2f [us]", 171 io_last_sec, bw_last_sec, lat_last_sec); 172 } 173 174 printf("\r"); 175 fflush(stdout); 176 } 177 178 static void 179 dma_test_task_complete(void *ctx) 180 { 181 assert(g_num_complete_tasks > 0); 182 183 if (--g_num_complete_tasks == 0) { 184 spdk_poller_unregister(&g_runtime_poller); 185 print_total_stats(); 186 spdk_app_stop(g_run_rc); 187 } 188 } 189 190 static inline void 191 dma_test_check_and_signal_task_done(struct dma_test_task *task) 192 { 193 if (task->io_inflight == 0) { 194 spdk_put_io_channel(task->channel); 195 spdk_bdev_close(task->desc); 196 spdk_thread_send_msg(g_main_thread, dma_test_task_complete, task); 197 spdk_thread_exit(spdk_get_thread()); 198 } 199 } 200 201 static inline void 202 dma_test_task_update_stats(struct dma_test_task *task, uint64_t submit_tsc) 203 { 204 uint64_t tsc_diff = spdk_get_ticks() - submit_tsc; 205 206 task->stats.io_completed++; 207 task->stats.total_tsc += tsc_diff; 208 if (spdk_unlikely(tsc_diff < task->stats.min_tsc)) { 209 task->stats.min_tsc = tsc_diff; 210 } 211 if (spdk_unlikely(tsc_diff > task->stats.max_tsc)) { 212 task->stats.max_tsc = tsc_diff; 213 } 214 } 215 216 static void 217 dma_test_bdev_io_completion_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 218 { 219 struct dma_test_req *req = cb_arg; 220 struct dma_test_task *task = req->task; 221 222 assert(task->io_inflight > 0); 223 --task->io_inflight; 224 dma_test_task_update_stats(task, req->submit_tsc); 225 226 if (!success && !g_corrupt_mkey_counter) { 227 if (!g_run_rc) { 228 fprintf(stderr, "IO completed with error\n"); 229 g_run_rc = -1; 230 } 231 task->is_draining = true; 232 } 233 234 spdk_bdev_free_io(bdev_io); 235 236 if (spdk_unlikely(task->is_draining)) { 237 dma_test_check_and_signal_task_done(task); 238 return; 239 } 240 241 dma_test_submit_io(req); 242 } 243 244 static void 245 dma_test_bdev_io_completion_verify_read_done(struct spdk_bdev_io *bdev_io, bool success, 246 void *cb_arg) 247 { 248 uint8_t md5_new[SPDK_MD5DIGEST_LEN]; 249 struct dma_test_req *req = cb_arg; 250 struct dma_test_task *task = req->task; 251 struct spdk_md5ctx md5ctx; 252 253 assert(task->io_inflight > 0); 254 --task->io_inflight; 255 dma_test_task_update_stats(task, req->submit_tsc); 256 257 if (!success && !g_corrupt_mkey_counter) { 258 if (!g_run_rc) { 259 fprintf(stderr, "IO completed with error\n"); 260 g_run_rc = -1; 261 } 262 task->is_draining = true; 263 } 264 265 spdk_bdev_free_io(bdev_io); 266 267 if (spdk_unlikely(task->is_draining)) { 268 dma_test_check_and_signal_task_done(task); 269 return; 270 } 271 272 spdk_md5init(&md5ctx); 273 spdk_md5update(&md5ctx, req->buffer, g_io_size); 274 spdk_md5final(md5_new, &md5ctx); 275 276 if (memcmp(req->md5_orig, md5_new, SPDK_MD5DIGEST_LEN) != 0) { 277 fprintf(stderr, "lcore %u, offset %"PRIu64" md5 mismatch\n", task->lcore, req->io_offset); 278 if (!g_run_rc) { 279 g_run_rc = -1; 280 } 281 task->is_draining = true; 282 dma_test_check_and_signal_task_done(task); 283 return; 284 } 285 286 dma_test_submit_io(req); 287 } 288 289 static void 290 dma_test_bdev_io_completion_verify_write_done(struct spdk_bdev_io *bdev_io, bool success, 291 void *cb_arg) 292 { 293 struct dma_test_req *req = cb_arg; 294 struct dma_test_task *task = req->task; 295 int rc; 296 297 assert(task->io_inflight > 0); 298 --task->io_inflight; 299 dma_test_task_update_stats(task, req->submit_tsc); 300 301 if (!success && !g_corrupt_mkey_counter) { 302 if (!g_run_rc) { 303 fprintf(stderr, "IO completed with error\n"); 304 g_run_rc = -1; 305 } 306 task->is_draining = true; 307 } 308 309 spdk_bdev_free_io(bdev_io); 310 311 if (spdk_unlikely(task->is_draining)) { 312 dma_test_check_and_signal_task_done(task); 313 return; 314 } 315 316 req->submit_tsc = spdk_get_ticks(); 317 rc = spdk_bdev_readv_blocks_ext(task->desc, task->channel, req->iovs, g_iovcnt, 318 req->io_offset, task->num_blocks_per_io, 319 dma_test_bdev_io_completion_verify_read_done, req, &req->io_opts); 320 if (spdk_unlikely(rc)) { 321 if (!g_run_rc) { 322 /* log an error only once */ 323 fprintf(stderr, "Failed to submit read IO, rc %d, stop sending IO\n", rc); 324 g_run_rc = rc; 325 } 326 task->is_draining = true; 327 dma_test_check_and_signal_task_done(task); 328 return; 329 } 330 331 task->io_inflight++; 332 } 333 334 static inline uint64_t 335 dma_test_get_offset_in_ios(struct dma_test_task *task, uint32_t req_offset) 336 { 337 uint64_t offset; 338 339 if (g_is_random) { 340 offset = rand_r(&task->seed) % task->max_offset_in_ios; 341 if (g_verify) { 342 offset += task->num_blocks_per_core * task->idx; 343 offset += task->max_offset_in_ios * req_offset; 344 } 345 } else { 346 offset = task->cur_io_offset++; 347 if (spdk_unlikely(task->cur_io_offset == task->max_offset_in_ios)) { 348 task->cur_io_offset = 0; 349 } 350 } 351 352 return offset; 353 } 354 355 static inline bool 356 dma_test_task_is_read(struct dma_test_task *task) 357 { 358 if (g_verify) { 359 return false; 360 } 361 if (task->rw_percentage == 100) { 362 return true; 363 } 364 if (task->rw_percentage != 0 && (rand_r(&task->seed) % 100) < task->rw_percentage) { 365 return true; 366 } 367 return false; 368 } 369 370 static void 371 dma_test_data_cpl(void *ctx) 372 { 373 struct dma_test_data_cpl_ctx *cpl_ctx = ctx; 374 375 cpl_ctx->data_cpl(cpl_ctx->data_cpl_arg, 0); 376 free(cpl_ctx); 377 } 378 379 static int 380 dma_test_copy_memory(struct dma_test_req *req, struct iovec *dst_iov, uint32_t dst_iovcnt, 381 struct iovec *src_iov, uint32_t src_iovcnt, spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg) 382 { 383 struct dma_test_data_cpl_ctx *cpl_ctx; 384 385 cpl_ctx = calloc(1, sizeof(*cpl_ctx)); 386 if (!cpl_ctx) { 387 return -ENOMEM; 388 } 389 390 cpl_ctx->data_cpl = cpl_cb; 391 cpl_ctx->data_cpl_arg = cpl_cb_arg; 392 393 spdk_iovcpy(src_iov, src_iovcnt, dst_iov, dst_iovcnt); 394 req->task->num_pull_push++; 395 spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx); 396 397 return 0; 398 } 399 400 static int 401 dma_test_push_memory_cb(struct spdk_memory_domain *dst_domain, 402 void *dst_domain_ctx, 403 struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt, 404 spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg) 405 { 406 struct dma_test_req *req = dst_domain_ctx; 407 408 return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg); 409 } 410 411 static int 412 dma_test_pull_memory_cb(struct spdk_memory_domain *src_domain, 413 void *src_domain_ctx, 414 struct iovec *src_iov, uint32_t src_iovcnt, struct iovec *dst_iov, uint32_t dst_iovcnt, 415 spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg) 416 { 417 struct dma_test_req *req = src_domain_ctx; 418 419 return dma_test_copy_memory(req, dst_iov, dst_iovcnt, src_iov, src_iovcnt, cpl_cb, cpl_cb_arg); 420 } 421 422 static int 423 dma_test_memzero_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx, 424 struct iovec *iov, uint32_t iovcnt, 425 spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg) 426 { 427 struct dma_test_req *req = src_domain_ctx; 428 struct dma_test_data_cpl_ctx *cpl_ctx; 429 uint32_t i; 430 431 cpl_ctx = calloc(1, sizeof(*cpl_ctx)); 432 if (!cpl_ctx) { 433 return -ENOMEM; 434 } 435 436 cpl_ctx->data_cpl = cpl_cb; 437 cpl_ctx->data_cpl_arg = cpl_cb_arg; 438 439 for (i = 0; i < iovcnt; i++) { 440 memset(iov[i].iov_base, 0, iov[i].iov_len); 441 } 442 req->task->num_mem_zero++; 443 444 spdk_thread_send_msg(req->task->thread, dma_test_data_cpl, cpl_ctx); 445 446 return 0; 447 } 448 449 450 static int 451 dma_test_translate_memory_cb(struct spdk_memory_domain *src_domain, void *src_domain_ctx, 452 struct spdk_memory_domain *dst_domain, struct spdk_memory_domain_translation_ctx *dst_domain_ctx, 453 void *addr, size_t len, struct spdk_memory_domain_translation_result *result) 454 { 455 struct dma_test_req *req = src_domain_ctx; 456 struct dma_test_task *task = req->task; 457 struct ibv_qp *dst_domain_qp = (struct ibv_qp *)dst_domain_ctx->rdma.ibv_qp; 458 459 if (spdk_unlikely(addr < req->buffer || 460 (uint8_t *)addr + len > (uint8_t *)req->buffer + g_io_size)) { 461 fprintf(stderr, "incorrect data %p, len %zu\n", addr, len); 462 return -1; 463 } 464 465 if (spdk_unlikely(!req->mr)) { 466 req->mr = ibv_reg_mr(dst_domain_qp->pd, req->buffer, g_io_size, 467 IBV_ACCESS_LOCAL_WRITE | 468 IBV_ACCESS_REMOTE_READ | 469 IBV_ACCESS_REMOTE_WRITE); 470 if (!req->mr) { 471 fprintf(stderr, "Failed to register memory region, errno %d\n", errno); 472 return -1; 473 } 474 } 475 476 result->iov.iov_base = addr; 477 result->iov.iov_len = len; 478 result->iov_count = 1; 479 result->rdma.lkey = req->mr->lkey; 480 result->rdma.rkey = req->mr->rkey; 481 result->dst_domain = dst_domain; 482 483 task->num_translations++; 484 485 if (g_corrupt_mkey_counter && task->num_translations >= g_corrupt_mkey_counter && 486 task->num_translations % g_corrupt_mkey_counter == 0) { 487 SPDK_NOTICELOG("Corrupt mkey on core %u\n", task->lcore); 488 result->rdma.lkey = 0xffffffff; 489 result->rdma.rkey = 0xffffffff; 490 } 491 492 return 0; 493 } 494 495 static int 496 dma_test_submit_io(struct dma_test_req *req) 497 { 498 struct dma_test_task *task = req->task; 499 int rc; 500 bool is_read; 501 502 req->io_offset = dma_test_get_offset_in_ios(task, req->idx) * task->num_blocks_per_io; 503 req->submit_tsc = spdk_get_ticks(); 504 is_read = dma_test_task_is_read(task); 505 if (is_read) { 506 rc = spdk_bdev_readv_blocks_ext(task->desc, task->channel, req->iovs, g_iovcnt, 507 req->io_offset, task->num_blocks_per_io, 508 dma_test_bdev_io_completion_cb, req, &req->io_opts); 509 } else { 510 rc = spdk_bdev_writev_blocks_ext(task->desc, task->channel, req->iovs, g_iovcnt, 511 req->io_offset, task->num_blocks_per_io, 512 g_verify ? dma_test_bdev_io_completion_verify_write_done 513 : dma_test_bdev_io_completion_cb, 514 req, &req->io_opts); 515 } 516 517 if (spdk_unlikely(rc)) { 518 if (!g_run_rc) { 519 /* log an error only once */ 520 fprintf(stderr, "Failed to submit %s IO, rc %d, stop sending IO\n", is_read ? "read" : "write", rc); 521 g_run_rc = rc; 522 } 523 task->is_draining = true; 524 dma_test_check_and_signal_task_done(task); 525 return rc; 526 } 527 528 task->io_inflight++; 529 530 return 0; 531 } 532 533 static void 534 dma_test_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 535 { 536 struct dma_test_task *task = event_ctx; 537 538 if (type == SPDK_BDEV_EVENT_REMOVE) { 539 task->is_draining = true; 540 } 541 } 542 543 static void 544 dma_test_bdev_dummy_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 545 void *event_ctx) 546 { 547 } 548 549 static void 550 dma_test_task_run(void *ctx) 551 { 552 struct dma_test_task *task = ctx; 553 uint32_t i; 554 int rc = 0; 555 556 for (i = 0; i < g_queue_depth && rc == 0; i++) { 557 rc = dma_test_submit_io(&task->reqs[i]); 558 } 559 } 560 561 static void 562 dma_test_drain_task(void *ctx) 563 { 564 struct dma_test_task *task = ctx; 565 566 task->is_draining = true; 567 } 568 569 static void 570 dma_test_shutdown_cb(void) 571 { 572 struct dma_test_task *task; 573 574 spdk_poller_unregister(&g_runtime_poller); 575 576 TAILQ_FOREACH(task, &g_tasks, link) { 577 spdk_thread_send_msg(task->thread, dma_test_drain_task, task); 578 } 579 } 580 581 static int 582 dma_test_run_time_poller(void *ctx) 583 { 584 g_run_count++; 585 586 if (g_run_count < g_run_time_sec) { 587 if (isatty(STDOUT_FILENO)) { 588 print_periodic_stats(); 589 } 590 } else { 591 dma_test_shutdown_cb(); 592 } 593 594 return SPDK_POLLER_BUSY; 595 } 596 597 static void 598 dma_test_construct_task_done(void *ctx) 599 { 600 struct dma_test_task *task; 601 602 assert(g_num_construct_tasks > 0); 603 --g_num_construct_tasks; 604 605 if (g_num_construct_tasks != 0) { 606 return; 607 } 608 609 if (g_run_rc) { 610 fprintf(stderr, "Initialization failed with error %d\n", g_run_rc); 611 spdk_app_stop(g_run_rc); 612 return; 613 } 614 615 g_runtime_poller = spdk_poller_register_named(dma_test_run_time_poller, NULL, 1 * 1000 * 1000, 616 "dma_test_run_time_poller"); 617 if (!g_runtime_poller) { 618 fprintf(stderr, "Failed to run timer\n"); 619 spdk_app_stop(-1); 620 return; 621 } 622 623 printf("Initialization complete, running %s IO for %u sec on %u cores\n", g_rw_mode_str, 624 g_run_time_sec, spdk_env_get_core_count()); 625 g_start_tsc = spdk_get_ticks(); 626 TAILQ_FOREACH(task, &g_tasks, link) { 627 spdk_thread_send_msg(task->thread, dma_test_task_run, task); 628 } 629 } 630 631 static void 632 dma_test_construct_task_on_thread(void *ctx) 633 { 634 struct dma_test_task *task = ctx; 635 int rc; 636 637 rc = spdk_bdev_open_ext(task->bdev_name, true, dma_test_bdev_event_cb, task, &task->desc); 638 if (rc) { 639 fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc); 640 g_run_rc = rc; 641 spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL); 642 return; 643 } 644 645 task->channel = spdk_bdev_get_io_channel(task->desc); 646 if (!task->channel) { 647 spdk_bdev_close(task->desc); 648 task->desc = NULL; 649 fprintf(stderr, "Failed to open bdev %s, rc %d\n", task->bdev_name, rc); 650 g_run_rc = rc; 651 spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL); 652 return; 653 } 654 655 task->max_offset_in_ios = spdk_bdev_get_num_blocks(spdk_bdev_desc_get_bdev( 656 task->desc)) / task->num_blocks_per_io; 657 if (g_verify) { 658 /* In verify mode each req writes a buffer and then reads its context again. It is possible that 659 * while some req is reading a buffer, another req from another thread writes a new data to 660 * the same lba. To prevent it, split lba range among threads and then split a smaller range 661 * among requests */ 662 task->num_blocks_per_core = task->max_offset_in_ios / spdk_env_get_core_count(); 663 task->max_offset_in_ios = task->num_blocks_per_core; 664 if (!task->max_offset_in_ios) { 665 fprintf(stderr, "Disk is too small to run on %u cores\n", spdk_env_get_core_count()); 666 g_run_rc = -EINVAL; 667 spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL); 668 } 669 task->max_offset_in_ios /= g_queue_depth; 670 if (!task->max_offset_in_ios) { 671 fprintf(stderr, "Disk is too small to run on %u cores with qdepth %u\n", spdk_env_get_core_count(), 672 g_queue_depth); 673 g_run_rc = -EINVAL; 674 spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, NULL); 675 } 676 } 677 678 spdk_thread_send_msg(g_main_thread, dma_test_construct_task_done, task); 679 } 680 681 static bool 682 dma_test_check_bdev_supports_rdma_memory_domain(struct spdk_bdev *bdev) 683 { 684 struct spdk_memory_domain **bdev_domains; 685 int bdev_domains_count, bdev_domains_count_tmp, i; 686 bool rdma_domain_supported = false; 687 688 bdev_domains_count = spdk_bdev_get_memory_domains(bdev, NULL, 0); 689 690 if (bdev_domains_count < 0) { 691 fprintf(stderr, "Failed to get bdev memory domains count, rc %d\n", bdev_domains_count); 692 return false; 693 } else if (bdev_domains_count == 0) { 694 fprintf(stderr, "bdev %s doesn't support any memory domains\n", spdk_bdev_get_name(bdev)); 695 return false; 696 } 697 698 fprintf(stdout, "bdev %s reports %d memory domains\n", spdk_bdev_get_name(bdev), 699 bdev_domains_count); 700 701 bdev_domains = calloc((size_t)bdev_domains_count, sizeof(*bdev_domains)); 702 if (!bdev_domains) { 703 fprintf(stderr, "Failed to allocate memory domains\n"); 704 return false; 705 } 706 707 bdev_domains_count_tmp = spdk_bdev_get_memory_domains(bdev, bdev_domains, bdev_domains_count); 708 if (bdev_domains_count_tmp != bdev_domains_count) { 709 fprintf(stderr, "Unexpected bdev domains return value %d\n", bdev_domains_count_tmp); 710 return false; 711 } 712 713 for (i = 0; i < bdev_domains_count; i++) { 714 if (spdk_memory_domain_get_dma_device_type(bdev_domains[i]) == SPDK_DMA_DEVICE_TYPE_RDMA) { 715 /* Bdev supports memory domain of RDMA type, we can try to submit IO request to it using 716 * bdev ext API */ 717 rdma_domain_supported = true; 718 break; 719 } 720 } 721 722 fprintf(stdout, "bdev %s %s RDMA memory domain\n", spdk_bdev_get_name(bdev), 723 rdma_domain_supported ? "supports" : "doesn't support"); 724 free(bdev_domains); 725 726 return rdma_domain_supported; 727 } 728 729 static int 730 req_alloc_buffers(struct dma_test_req *req) 731 { 732 struct spdk_md5ctx md5ctx; 733 size_t iov_len, remainder; 734 uint32_t i; 735 736 iov_len = g_io_size / g_iovcnt; 737 remainder = g_io_size - iov_len * g_iovcnt; 738 739 req->buffer = malloc(g_io_size); 740 if (!req->buffer) { 741 return -ENOMEM; 742 } 743 memset(req->buffer, (int)req->idx + 1, g_io_size); 744 req->iovs = calloc(g_iovcnt, sizeof(struct iovec)); 745 if (!req->iovs) { 746 return -ENOMEM; 747 } 748 for (i = 0; i < g_iovcnt; i++) { 749 req->iovs[i].iov_len = iov_len; 750 req->iovs[i].iov_base = (uint8_t *)req->buffer + iov_len * i; 751 } 752 req->iovs[g_iovcnt - 1].iov_len += remainder; 753 if (g_verify) { 754 spdk_md5init(&md5ctx); 755 spdk_md5update(&md5ctx, req->buffer, g_io_size); 756 spdk_md5final(req->md5_orig, &md5ctx); 757 } 758 759 return 0; 760 } 761 762 static int 763 allocate_task(uint32_t core, const char *bdev_name) 764 { 765 char thread_name[32]; 766 struct spdk_cpuset cpu_set; 767 uint32_t i; 768 struct dma_test_task *task; 769 struct dma_test_req *req; 770 int rc; 771 772 task = calloc(1, sizeof(*task)); 773 if (!task) { 774 fprintf(stderr, "Failed to allocate per thread task\n"); 775 return -ENOMEM; 776 } 777 778 TAILQ_INSERT_TAIL(&g_tasks, task, link); 779 780 task->reqs = calloc(g_queue_depth, sizeof(*task->reqs)); 781 if (!task->reqs) { 782 fprintf(stderr, "Failed to allocate requests\n"); 783 return -ENOMEM; 784 } 785 786 task->lcore = core; 787 task->seed = core; 788 for (i = 0; i < g_queue_depth; i++) { 789 req = &task->reqs[i]; 790 req->task = task; 791 req->idx = i; 792 rc = req_alloc_buffers(req); 793 if (rc) { 794 fprintf(stderr, "Failed to allocate request data buffer\n"); 795 return rc; 796 } 797 798 req->io_opts.size = sizeof(req->io_opts); 799 req->io_opts.memory_domain = g_domain; 800 req->io_opts.memory_domain_ctx = req; 801 } 802 803 snprintf(thread_name, 32, "task_%u", core); 804 spdk_cpuset_zero(&cpu_set); 805 spdk_cpuset_set_cpu(&cpu_set, core, true); 806 task->thread = spdk_thread_create(thread_name, &cpu_set); 807 if (!task->thread) { 808 fprintf(stderr, "Failed to create SPDK thread, core %u, cpu_mask %s\n", core, 809 spdk_cpuset_fmt(&cpu_set)); 810 return -ENOMEM; 811 } 812 task->idx = g_num_construct_tasks++; 813 task->bdev_name = bdev_name; 814 task->rw_percentage = g_rw_percentage; 815 task->num_blocks_per_io = g_num_blocks_per_io; 816 task->stats.min_tsc = UINT64_MAX; 817 818 return 0; 819 } 820 821 static void 822 destroy_task(struct dma_test_task *task) 823 { 824 struct dma_test_req *req; 825 uint32_t i; 826 827 for (i = 0; i < g_queue_depth; i++) { 828 req = &task->reqs[i]; 829 if (req->mr) { 830 ibv_dereg_mr(req->mr); 831 } 832 free(req->buffer); 833 free(req->iovs); 834 } 835 free(task->reqs); 836 TAILQ_REMOVE(&g_tasks, task, link); 837 free(task); 838 } 839 840 static void 841 destroy_tasks(void) 842 { 843 struct dma_test_task *task, *tmp_task; 844 845 TAILQ_FOREACH_SAFE(task, &g_tasks, link, tmp_task) { 846 destroy_task(task); 847 } 848 } 849 850 static int 851 verify_tasks(void) 852 { 853 struct dma_test_task *task; 854 uint64_t total_requests = 0; 855 uint64_t num_translations = 0; 856 uint64_t num_pull_push = 0; 857 uint64_t num_memzero = 0; 858 int rc = 0; 859 860 if (!g_test_ops) { 861 /* No specific ops were requested, nothing to check */ 862 return rc; 863 } 864 865 TAILQ_FOREACH(task, &g_tasks, link) { 866 total_requests += task->stats.io_completed; 867 num_translations += task->num_translations; 868 num_pull_push += task->num_pull_push; 869 num_memzero += task->num_mem_zero; 870 } 871 872 if (g_test_ops & DMA_TEST_DOMAIN_OP_TRANSLATE) { 873 if (num_translations == 0) { 874 fprintf(stderr, "Requested \"translate\" operation, but it was not executed\n"); 875 rc = -EINVAL; 876 } 877 } 878 if (g_test_ops & DMA_TEST_DOMAIN_OP_PULL_PUSH) { 879 if (num_pull_push == 0) { 880 fprintf(stderr, "Requested \"pull_push\" operation, but it was not executed\n"); 881 rc = -EINVAL; 882 } 883 } 884 if (g_test_ops & DMA_TEST_DOMAIN_OP_MEMZERO) { 885 if (num_memzero == 0) { 886 fprintf(stderr, "Requested \"memzero\" operation, but it was not executed\n"); 887 rc = -EINVAL; 888 } 889 } 890 891 /* bdev request can be split, so the total number of pull_push +translate operations 892 * can be bigger than total_number of requests */ 893 if (num_translations + num_pull_push + num_memzero < total_requests) { 894 fprintf(stderr, 895 "Operations number mismatch: translate %"PRIu64", pull_push %"PRIu64", mem_zero %"PRIu64" expected total %"PRIu64"\n", 896 num_translations, num_pull_push, num_memzero, total_requests); 897 rc = -EINVAL; 898 } else { 899 fprintf(stdout, 900 "Total operations: %"PRIu64", translate %"PRIu64" pull_push %"PRIu64" memzero %"PRIu64"\n", 901 total_requests, num_translations, num_pull_push, num_memzero); 902 } 903 904 return rc; 905 } 906 907 static void 908 dma_test_start(void *arg) 909 { 910 struct spdk_bdev_desc *desc; 911 struct spdk_bdev *bdev; 912 struct dma_test_task *task; 913 uint32_t block_size, i; 914 int rc; 915 916 rc = spdk_bdev_open_ext(g_bdev_name, true, dma_test_bdev_dummy_event_cb, NULL, &desc); 917 if (rc) { 918 fprintf(stderr, "Can't find bdev %s\n", g_bdev_name); 919 spdk_app_stop(-ENODEV); 920 return; 921 } 922 bdev = spdk_bdev_desc_get_bdev(desc); 923 /* This function checks if bdev supports memory domains. Test is not failed if there are 924 * no memory domains since bdev layer can pull/push data */ 925 if (!dma_test_check_bdev_supports_rdma_memory_domain(bdev) && g_force_memory_domains_support) { 926 fprintf(stderr, "Test aborted due to \"-f\" (force memory domains support) option\n"); 927 spdk_bdev_close(desc); 928 spdk_app_stop(-ENODEV); 929 return; 930 } 931 932 g_main_thread = spdk_get_thread(); 933 934 block_size = spdk_bdev_get_block_size(bdev); 935 if (g_io_size < block_size || g_io_size % block_size != 0) { 936 fprintf(stderr, "Invalid io_size %u requested, bdev block size %u\n", g_io_size, block_size); 937 spdk_bdev_close(desc); 938 spdk_app_stop(-EINVAL); 939 return; 940 } 941 g_num_blocks_per_io = g_io_size / block_size; 942 943 /* Create a memory domain to represent the source memory domain. 944 * Since we don't actually have a remote memory domain in this test, this will describe memory 945 * on the local system and the translation to the destination memory domain will be trivial. 946 * But this at least allows us to demonstrate the flow and test the functionality. */ 947 rc = spdk_memory_domain_create(&g_domain, SPDK_DMA_DEVICE_TYPE_RDMA, NULL, "test_dma"); 948 if (rc != 0) { 949 spdk_bdev_close(desc); 950 spdk_app_stop(rc); 951 return; 952 } 953 spdk_memory_domain_set_translation(g_domain, dma_test_translate_memory_cb); 954 spdk_memory_domain_set_pull(g_domain, dma_test_pull_memory_cb); 955 spdk_memory_domain_set_push(g_domain, dma_test_push_memory_cb); 956 spdk_memory_domain_set_memzero(g_domain, dma_test_memzero_cb); 957 958 SPDK_ENV_FOREACH_CORE(i) { 959 rc = allocate_task(i, g_bdev_name); 960 if (rc) { 961 destroy_tasks(); 962 spdk_bdev_close(desc); 963 spdk_app_stop(rc); 964 return; 965 } 966 g_num_complete_tasks++; 967 } 968 969 TAILQ_FOREACH(task, &g_tasks, link) { 970 spdk_thread_send_msg(task->thread, dma_test_construct_task_on_thread, task); 971 } 972 973 spdk_bdev_close(desc); 974 } 975 976 static void 977 print_usage(void) 978 { 979 printf(" -b <bdev> bdev name for test\n"); 980 printf(" -f force memory domains support - abort test if bdev doesn't report memory domains\n"); 981 printf(" -q <val> io depth\n"); 982 printf(" -o <val> io size in bytes\n"); 983 printf(" -t <val> run time in seconds\n"); 984 printf(" -x <op,op> Comma separated memory domain operations expected in the test. Values are \"translate\" and \"pull_push\"\n"); 985 printf(" -w <str> io pattern (read, write, randread, randwrite, randrw)\n"); 986 printf(" -M <0-100> rw percentage (100 for reads, 0 for writes)\n"); 987 printf(" -O <val> iovs count to be used in IO, default 1\n"); 988 printf(" -Y <val> Return invalid mkey each <val>th translation\n"); 989 } 990 991 static int 992 parse_expected_ops(const char *_str) 993 { 994 char *str = strdup(_str); 995 char *tok; 996 char *sp = NULL; 997 int rc = 0; 998 999 if (!str) { 1000 fprintf(stderr, "Failed to dup args\n"); 1001 return -ENOMEM; 1002 } 1003 1004 tok = strtok_r(str, ",", &sp); 1005 while (tok) { 1006 if (strcmp(tok, "translate") == 0) { 1007 g_test_ops |= DMA_TEST_DOMAIN_OP_TRANSLATE; 1008 } else if (strcmp(tok, "pull_push") == 0) { 1009 g_test_ops |= DMA_TEST_DOMAIN_OP_PULL_PUSH; 1010 } else if (strcmp(tok, "memzero") == 0) { 1011 g_test_ops |= DMA_TEST_DOMAIN_OP_MEMZERO; 1012 } else { 1013 fprintf(stderr, "Unknown value %s\n", tok); 1014 rc = -EINVAL; 1015 break; 1016 } 1017 tok = strtok_r(NULL, ",", &sp); 1018 } 1019 1020 free(str); 1021 1022 if (g_test_ops == 0 || rc) { 1023 fprintf(stderr, "-e \"%s\" specified but nothing was parsed\n", _str); 1024 return -EINVAL; 1025 } 1026 1027 return rc; 1028 } 1029 1030 static int 1031 parse_arg(int ch, char *arg) 1032 { 1033 long tmp; 1034 1035 switch (ch) { 1036 case 'q': 1037 case 'o': 1038 case 't': 1039 case 'M': 1040 case 'O': 1041 case 'Y': 1042 tmp = spdk_strtol(arg, 10); 1043 if (tmp < 0) { 1044 fprintf(stderr, "Invalid option %c value %s\n", ch, arg); 1045 return 1; 1046 } 1047 1048 switch (ch) { 1049 case 'q': 1050 g_queue_depth = (uint32_t) tmp; 1051 break; 1052 case 'o': 1053 g_io_size = (uint32_t) tmp; 1054 break; 1055 case 't': 1056 g_run_time_sec = (uint32_t) tmp; 1057 break; 1058 case 'M': 1059 g_rw_percentage = (uint32_t) tmp; 1060 break; 1061 case 'O': 1062 g_iovcnt = (uint32_t) tmp; 1063 break; 1064 case 'Y': 1065 g_corrupt_mkey_counter = (uint32_t) tmp; 1066 break; 1067 } 1068 break; 1069 case 'w': 1070 g_rw_mode_str = arg; 1071 break; 1072 case 'b': 1073 g_bdev_name = arg; 1074 break; 1075 case 'f': 1076 g_force_memory_domains_support = true; 1077 break; 1078 case 'x': 1079 if (parse_expected_ops(arg)) { 1080 return 1; 1081 } 1082 break; 1083 default: 1084 fprintf(stderr, "Unknown option %c\n", ch); 1085 return 1; 1086 } 1087 1088 return 0; 1089 } 1090 1091 static int 1092 verify_args(void) 1093 { 1094 const char *rw_mode = g_rw_mode_str; 1095 1096 if (g_queue_depth == 0) { 1097 fprintf(stderr, "queue depth (-q) is not set\n"); 1098 return 1; 1099 } 1100 if (g_io_size == 0) { 1101 fprintf(stderr, "io size (-o) is not set\n"); 1102 return 1; 1103 } 1104 if (g_iovcnt == 0) { 1105 fprintf(stderr, "iov count (-O) is invalid\n"); 1106 return 1; 1107 } 1108 if (g_run_time_sec == 0) { 1109 fprintf(stderr, "test run time (-t) is not set\n"); 1110 return 1; 1111 } 1112 if (!rw_mode) { 1113 fprintf(stderr, "io pattern (-w) is not set\n"); 1114 return 1; 1115 } 1116 if (strncmp(rw_mode, "rand", 4) == 0) { 1117 g_is_random = true; 1118 rw_mode = &rw_mode[4]; 1119 } 1120 if (strcmp(rw_mode, "read") == 0 || strcmp(rw_mode, "write") == 0) { 1121 if (g_rw_percentage > 0) { 1122 fprintf(stderr, "Ignoring -M option\n"); 1123 } 1124 g_rw_percentage = strcmp(rw_mode, "read") == 0 ? 100 : 0; 1125 } else if (strcmp(rw_mode, "rw") == 0) { 1126 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 1127 fprintf(stderr, "Invalid -M value (%d) must be 0..100\n", g_rw_percentage); 1128 return 1; 1129 } 1130 } else if (strcmp(rw_mode, "verify") == 0) { 1131 g_is_random = true; 1132 g_verify = true; 1133 if (g_rw_percentage > 0) { 1134 fprintf(stderr, "Ignoring -M option\n"); 1135 } 1136 } else { 1137 fprintf(stderr, "io pattern (-w) one of [read, write, randread, randwrite, rw, randrw, verify]\n"); 1138 return 1; 1139 } 1140 if (!g_bdev_name) { 1141 fprintf(stderr, "bdev name (-b) is not set\n"); 1142 return 1; 1143 } 1144 1145 return 0; 1146 } 1147 1148 int 1149 main(int argc, char **argv) 1150 { 1151 struct spdk_app_opts opts = {}; 1152 int rc; 1153 1154 spdk_app_opts_init(&opts, sizeof(opts)); 1155 opts.name = "test_dma"; 1156 opts.shutdown_cb = dma_test_shutdown_cb; 1157 opts.rpc_addr = NULL; 1158 1159 rc = spdk_app_parse_args(argc, argv, &opts, "b:fq:o:t:x:w:M:O:Y:", NULL, parse_arg, print_usage); 1160 if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) { 1161 exit(rc); 1162 } 1163 1164 rc = verify_args(); 1165 if (rc) { 1166 exit(rc); 1167 } 1168 1169 rc = spdk_app_start(&opts, dma_test_start, NULL); 1170 if (rc == 0) { 1171 rc = verify_tasks(); 1172 } 1173 destroy_tasks(); 1174 spdk_app_fini(); 1175 1176 return rc; 1177 } 1178