1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. 3 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "spdk/bdev.h" 10 #include "spdk/accel.h" 11 #include "spdk/endian.h" 12 #include "spdk/env.h" 13 #include "spdk/event.h" 14 #include "spdk/log.h" 15 #include "spdk/util.h" 16 #include "spdk/thread.h" 17 #include "spdk/string.h" 18 #include "spdk/rpc.h" 19 #include "spdk/bit_array.h" 20 #include "spdk/conf.h" 21 #include "spdk/zipf.h" 22 23 #define BDEVPERF_CONFIG_MAX_FILENAME 1024 24 #define BDEVPERF_CONFIG_UNDEFINED -1 25 #define BDEVPERF_CONFIG_ERROR -2 26 27 struct bdevperf_task { 28 struct iovec iov; 29 struct bdevperf_job *job; 30 struct spdk_bdev_io *bdev_io; 31 void *buf; 32 void *md_buf; 33 uint64_t offset_blocks; 34 struct bdevperf_task *task_to_abort; 35 enum spdk_bdev_io_type io_type; 36 TAILQ_ENTRY(bdevperf_task) link; 37 struct spdk_bdev_io_wait_entry bdev_io_wait; 38 }; 39 40 static const char *g_workload_type = NULL; 41 static int g_io_size = 0; 42 /* initialize to invalid value so we can detect if user overrides it. */ 43 static int g_rw_percentage = -1; 44 static bool g_verify = false; 45 static bool g_reset = false; 46 static bool g_continue_on_failure = false; 47 static bool g_abort = false; 48 static bool g_error_to_exit = false; 49 static int g_queue_depth = 0; 50 static uint64_t g_time_in_usec; 51 static int g_show_performance_real_time = 0; 52 static uint64_t g_show_performance_period_in_usec = 1000000; 53 static uint64_t g_show_performance_period_num = 0; 54 static uint64_t g_show_performance_ema_period = 0; 55 static int g_run_rc = 0; 56 static bool g_shutdown = false; 57 static uint64_t g_start_tsc; 58 static uint64_t g_shutdown_tsc; 59 static bool g_zcopy = false; 60 static struct spdk_thread *g_main_thread; 61 static int g_time_in_sec = 0; 62 static bool g_mix_specified = false; 63 static const char *g_job_bdev_name; 64 static bool g_wait_for_tests = false; 65 static struct spdk_jsonrpc_request *g_request = NULL; 66 static bool g_multithread_mode = false; 67 static int g_timeout_in_sec; 68 static struct spdk_conf *g_bdevperf_conf = NULL; 69 static const char *g_bdevperf_conf_file = NULL; 70 static double g_zipf_theta; 71 72 static struct spdk_cpuset g_all_cpuset; 73 static struct spdk_poller *g_perf_timer = NULL; 74 75 static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task); 76 static void rpc_perform_tests_cb(void); 77 78 struct bdevperf_job { 79 char *name; 80 struct spdk_bdev *bdev; 81 struct spdk_bdev_desc *bdev_desc; 82 struct spdk_io_channel *ch; 83 TAILQ_ENTRY(bdevperf_job) link; 84 struct spdk_thread *thread; 85 86 const char *workload_type; 87 int io_size; 88 int rw_percentage; 89 bool is_random; 90 bool verify; 91 bool reset; 92 bool continue_on_failure; 93 bool unmap; 94 bool write_zeroes; 95 bool flush; 96 bool abort; 97 int queue_depth; 98 unsigned int seed; 99 100 uint64_t io_completed; 101 uint64_t io_failed; 102 uint64_t io_timeout; 103 uint64_t prev_io_completed; 104 double ema_io_per_second; 105 int current_queue_depth; 106 uint64_t size_in_ios; 107 uint64_t ios_base; 108 uint64_t offset_in_ios; 109 uint64_t io_size_blocks; 110 uint64_t buf_size; 111 uint32_t dif_check_flags; 112 bool is_draining; 113 struct spdk_poller *run_timer; 114 struct spdk_poller *reset_timer; 115 struct spdk_bit_array *outstanding; 116 struct spdk_zipf *zipf; 117 TAILQ_HEAD(, bdevperf_task) task_list; 118 uint64_t run_time_in_usec; 119 }; 120 121 struct spdk_bdevperf { 122 TAILQ_HEAD(, bdevperf_job) jobs; 123 uint32_t running_jobs; 124 }; 125 126 static struct spdk_bdevperf g_bdevperf = { 127 .jobs = TAILQ_HEAD_INITIALIZER(g_bdevperf.jobs), 128 .running_jobs = 0, 129 }; 130 131 enum job_config_rw { 132 JOB_CONFIG_RW_READ = 0, 133 JOB_CONFIG_RW_WRITE, 134 JOB_CONFIG_RW_RANDREAD, 135 JOB_CONFIG_RW_RANDWRITE, 136 JOB_CONFIG_RW_RW, 137 JOB_CONFIG_RW_RANDRW, 138 JOB_CONFIG_RW_VERIFY, 139 JOB_CONFIG_RW_RESET, 140 JOB_CONFIG_RW_UNMAP, 141 JOB_CONFIG_RW_FLUSH, 142 JOB_CONFIG_RW_WRITE_ZEROES, 143 }; 144 145 /* Storing values from a section of job config file */ 146 struct job_config { 147 const char *name; 148 const char *filename; 149 struct spdk_cpuset cpumask; 150 int bs; 151 int iodepth; 152 int rwmixread; 153 int64_t offset; 154 uint64_t length; 155 enum job_config_rw rw; 156 TAILQ_ENTRY(job_config) link; 157 }; 158 159 TAILQ_HEAD(, job_config) job_config_list 160 = TAILQ_HEAD_INITIALIZER(job_config_list); 161 162 static bool g_performance_dump_active = false; 163 164 struct bdevperf_aggregate_stats { 165 struct bdevperf_job *current_job; 166 uint64_t io_time_in_usec; 167 uint64_t ema_period; 168 double total_io_per_second; 169 double total_mb_per_second; 170 double total_failed_per_second; 171 double total_timeout_per_second; 172 }; 173 174 static struct bdevperf_aggregate_stats g_stats = {}; 175 176 /* 177 * Cumulative Moving Average (CMA): average of all data up to current 178 * Exponential Moving Average (EMA): weighted mean of the previous n data and more weight is given to recent 179 * Simple Moving Average (SMA): unweighted mean of the previous n data 180 * 181 * Bdevperf supports CMA and EMA. 182 */ 183 static double 184 get_cma_io_per_second(struct bdevperf_job *job, uint64_t io_time_in_usec) 185 { 186 return (double)job->io_completed * 1000000 / io_time_in_usec; 187 } 188 189 static double 190 get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period) 191 { 192 double io_completed, io_per_second; 193 194 io_completed = job->io_completed; 195 io_per_second = (double)(io_completed - job->prev_io_completed) * 1000000 196 / g_show_performance_period_in_usec; 197 job->prev_io_completed = io_completed; 198 199 job->ema_io_per_second += (io_per_second - job->ema_io_per_second) * 2 200 / (ema_period + 1); 201 return job->ema_io_per_second; 202 } 203 204 static void 205 performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job) 206 { 207 double io_per_second, mb_per_second, failed_per_second, timeout_per_second; 208 uint64_t time_in_usec; 209 210 printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread), 211 spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread))); 212 213 if (job->io_failed > 0 && !job->reset && !job->continue_on_failure) { 214 printf("\r Job: %s ended in about %.2f seconds with error\n", 215 spdk_thread_get_name(job->thread), (double)job->run_time_in_usec / 1000000); 216 } 217 if (job->verify) { 218 printf("\t Verification LBA range: start 0x%" PRIx64 " length 0x%" PRIx64 "\n", 219 job->ios_base, job->size_in_ios); 220 } 221 222 if (g_performance_dump_active == true) { 223 /* Use job's actual run time as Job has ended */ 224 if (job->io_failed > 0 && !job->continue_on_failure) { 225 time_in_usec = job->run_time_in_usec; 226 } else { 227 time_in_usec = stats->io_time_in_usec; 228 } 229 } else { 230 time_in_usec = job->run_time_in_usec; 231 } 232 233 if (stats->ema_period == 0) { 234 io_per_second = get_cma_io_per_second(job, time_in_usec); 235 } else { 236 io_per_second = get_ema_io_per_second(job, stats->ema_period); 237 } 238 mb_per_second = io_per_second * job->io_size / (1024 * 1024); 239 240 failed_per_second = (double)job->io_failed * 1000000 / time_in_usec; 241 timeout_per_second = (double)job->io_timeout * 1000000 / time_in_usec; 242 243 printf("\t %-20s: %10.2f %10.2f %10.2f", 244 job->name, (float)time_in_usec / 1000000, io_per_second, mb_per_second); 245 printf(" %10.2f %8.2f\n", 246 failed_per_second, timeout_per_second); 247 248 stats->total_io_per_second += io_per_second; 249 stats->total_mb_per_second += mb_per_second; 250 stats->total_failed_per_second += failed_per_second; 251 stats->total_timeout_per_second += timeout_per_second; 252 } 253 254 static void 255 generate_data(void *buf, int buf_len, int block_size, void *md_buf, int md_size, 256 int num_blocks) 257 { 258 int offset_blocks = 0, md_offset, data_block_size, inner_offset; 259 260 if (buf_len < num_blocks * block_size) { 261 return; 262 } 263 264 if (md_buf == NULL) { 265 data_block_size = block_size - md_size; 266 md_buf = (char *)buf + data_block_size; 267 md_offset = block_size; 268 } else { 269 data_block_size = block_size; 270 md_offset = md_size; 271 } 272 273 while (offset_blocks < num_blocks) { 274 inner_offset = 0; 275 while (inner_offset < data_block_size) { 276 *(uint32_t *)buf = offset_blocks + inner_offset; 277 inner_offset += sizeof(uint32_t); 278 buf += sizeof(uint32_t); 279 } 280 memset(md_buf, offset_blocks, md_size); 281 md_buf += md_offset; 282 offset_blocks++; 283 } 284 } 285 286 static bool 287 copy_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, 288 void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks) 289 { 290 if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { 291 return false; 292 } 293 294 assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); 295 296 memcpy(wr_buf, rd_buf, block_size * num_blocks); 297 298 if (wr_md_buf != NULL) { 299 memcpy(wr_md_buf, rd_md_buf, md_size * num_blocks); 300 } 301 302 return true; 303 } 304 305 static bool 306 verify_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, 307 void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks, bool md_check) 308 { 309 int offset_blocks = 0, md_offset, data_block_size; 310 311 if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { 312 return false; 313 } 314 315 assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); 316 317 if (wr_md_buf == NULL) { 318 data_block_size = block_size - md_size; 319 wr_md_buf = (char *)wr_buf + data_block_size; 320 rd_md_buf = (char *)rd_buf + data_block_size; 321 md_offset = block_size; 322 } else { 323 data_block_size = block_size; 324 md_offset = md_size; 325 } 326 327 while (offset_blocks < num_blocks) { 328 if (memcmp(wr_buf, rd_buf, data_block_size) != 0) { 329 return false; 330 } 331 332 wr_buf += block_size; 333 rd_buf += block_size; 334 335 if (md_check) { 336 if (memcmp(wr_md_buf, rd_md_buf, md_size) != 0) { 337 return false; 338 } 339 340 wr_md_buf += md_offset; 341 rd_md_buf += md_offset; 342 } 343 344 offset_blocks++; 345 } 346 347 return true; 348 } 349 350 static void 351 free_job_config(void) 352 { 353 struct job_config *config, *tmp; 354 355 spdk_conf_free(g_bdevperf_conf); 356 g_bdevperf_conf = NULL; 357 358 TAILQ_FOREACH_SAFE(config, &job_config_list, link, tmp) { 359 TAILQ_REMOVE(&job_config_list, config, link); 360 free(config); 361 } 362 } 363 364 static void 365 bdevperf_job_free(struct bdevperf_job *job) 366 { 367 spdk_bit_array_free(&job->outstanding); 368 spdk_zipf_free(&job->zipf); 369 free(job->name); 370 free(job); 371 } 372 373 static void 374 job_thread_exit(void *ctx) 375 { 376 spdk_thread_exit(spdk_get_thread()); 377 } 378 379 static void 380 bdevperf_test_done(void *ctx) 381 { 382 struct bdevperf_job *job, *jtmp; 383 struct bdevperf_task *task, *ttmp; 384 int rc; 385 uint64_t time_in_usec; 386 387 if (g_time_in_usec) { 388 g_stats.io_time_in_usec = g_time_in_usec; 389 390 if (!g_run_rc && g_performance_dump_active) { 391 spdk_thread_send_msg(spdk_get_thread(), bdevperf_test_done, NULL); 392 return; 393 } 394 } 395 396 if (g_show_performance_real_time) { 397 spdk_poller_unregister(&g_perf_timer); 398 } 399 400 if (g_shutdown) { 401 g_shutdown_tsc = spdk_get_ticks() - g_start_tsc; 402 time_in_usec = g_shutdown_tsc * 1000000 / spdk_get_ticks_hz(); 403 g_time_in_usec = (g_time_in_usec > time_in_usec) ? time_in_usec : g_time_in_usec; 404 printf("Received shutdown signal, test time was about %.6f seconds\n", 405 (double)g_time_in_usec / 1000000); 406 } 407 408 printf("\n\r %-*s: %10s %10s %10s %10s %8s\n", 409 28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s"); 410 411 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { 412 TAILQ_REMOVE(&g_bdevperf.jobs, job, link); 413 414 performance_dump_job(&g_stats, job); 415 416 spdk_thread_send_msg(job->thread, job_thread_exit, NULL); 417 418 TAILQ_FOREACH_SAFE(task, &job->task_list, link, ttmp) { 419 TAILQ_REMOVE(&job->task_list, task, link); 420 spdk_free(task->buf); 421 spdk_free(task->md_buf); 422 free(task); 423 } 424 425 bdevperf_job_free(job); 426 } 427 428 printf("\r ==================================================================================\n"); 429 printf("\r %-28s: %10s %10.2f %10.2f", 430 "Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second); 431 printf(" %10.2f %8.2f\n", 432 g_stats.total_failed_per_second, g_stats.total_timeout_per_second); 433 fflush(stdout); 434 435 rc = g_run_rc; 436 if (g_request && !g_shutdown) { 437 rpc_perform_tests_cb(); 438 if (rc != 0) { 439 spdk_app_stop(rc); 440 } 441 } else { 442 spdk_app_stop(rc); 443 } 444 } 445 446 static void 447 bdevperf_job_end(void *ctx) 448 { 449 assert(g_main_thread == spdk_get_thread()); 450 451 if (--g_bdevperf.running_jobs == 0) { 452 bdevperf_test_done(NULL); 453 } 454 } 455 456 static void 457 bdevperf_end_task(struct bdevperf_task *task) 458 { 459 struct bdevperf_job *job = task->job; 460 uint64_t end_tsc = 0; 461 462 TAILQ_INSERT_TAIL(&job->task_list, task, link); 463 if (job->is_draining) { 464 if (job->current_queue_depth == 0) { 465 end_tsc = spdk_get_ticks() - g_start_tsc; 466 job->run_time_in_usec = end_tsc * 1000000 / spdk_get_ticks_hz(); 467 spdk_put_io_channel(job->ch); 468 spdk_bdev_close(job->bdev_desc); 469 spdk_thread_send_msg(g_main_thread, bdevperf_job_end, NULL); 470 } 471 } 472 } 473 474 static void 475 bdevperf_queue_io_wait_with_cb(struct bdevperf_task *task, spdk_bdev_io_wait_cb cb_fn) 476 { 477 struct bdevperf_job *job = task->job; 478 479 task->bdev_io_wait.bdev = job->bdev; 480 task->bdev_io_wait.cb_fn = cb_fn; 481 task->bdev_io_wait.cb_arg = task; 482 spdk_bdev_queue_io_wait(job->bdev, job->ch, &task->bdev_io_wait); 483 } 484 485 static int 486 bdevperf_job_drain(void *ctx) 487 { 488 struct bdevperf_job *job = ctx; 489 490 spdk_poller_unregister(&job->run_timer); 491 if (job->reset) { 492 spdk_poller_unregister(&job->reset_timer); 493 } 494 495 job->is_draining = true; 496 497 return -1; 498 } 499 500 static void 501 bdevperf_abort_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 502 { 503 struct bdevperf_task *task = cb_arg; 504 struct bdevperf_job *job = task->job; 505 506 job->current_queue_depth--; 507 508 if (success) { 509 job->io_completed++; 510 } else { 511 job->io_failed++; 512 if (!job->continue_on_failure) { 513 bdevperf_job_drain(job); 514 g_run_rc = -1; 515 } 516 } 517 518 spdk_bdev_free_io(bdev_io); 519 bdevperf_end_task(task); 520 } 521 522 static int 523 bdevperf_verify_dif(struct bdevperf_task *task, struct iovec *iovs, int iovcnt) 524 { 525 struct bdevperf_job *job = task->job; 526 struct spdk_bdev *bdev = job->bdev; 527 struct spdk_dif_ctx dif_ctx; 528 struct spdk_dif_error err_blk = {}; 529 int rc; 530 531 rc = spdk_dif_ctx_init(&dif_ctx, 532 spdk_bdev_get_block_size(bdev), 533 spdk_bdev_get_md_size(bdev), 534 spdk_bdev_is_md_interleaved(bdev), 535 spdk_bdev_is_dif_head_of_md(bdev), 536 spdk_bdev_get_dif_type(bdev), 537 job->dif_check_flags, 538 task->offset_blocks, 0, 0, 0, 0); 539 if (rc != 0) { 540 fprintf(stderr, "Initialization of DIF context failed\n"); 541 return rc; 542 } 543 544 if (spdk_bdev_is_md_interleaved(bdev)) { 545 rc = spdk_dif_verify(iovs, iovcnt, job->io_size_blocks, &dif_ctx, &err_blk); 546 } else { 547 struct iovec md_iov = { 548 .iov_base = task->md_buf, 549 .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks, 550 }; 551 552 rc = spdk_dix_verify(iovs, iovcnt, &md_iov, job->io_size_blocks, &dif_ctx, &err_blk); 553 } 554 555 if (rc != 0) { 556 fprintf(stderr, "DIF/DIX error detected. type=%d, offset=%" PRIu32 "\n", 557 err_blk.err_type, err_blk.err_offset); 558 } 559 560 return rc; 561 } 562 563 static void 564 bdevperf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 565 { 566 struct bdevperf_job *job; 567 struct bdevperf_task *task = cb_arg; 568 struct iovec *iovs; 569 int iovcnt; 570 bool md_check; 571 uint64_t offset_in_ios; 572 int rc; 573 574 job = task->job; 575 md_check = spdk_bdev_get_dif_type(job->bdev) == SPDK_DIF_DISABLE; 576 577 if (g_error_to_exit == true) { 578 bdevperf_job_drain(job); 579 } else if (!success) { 580 if (!job->reset && !job->continue_on_failure) { 581 bdevperf_job_drain(job); 582 g_run_rc = -1; 583 g_error_to_exit = true; 584 printf("task offset: %" PRIu64 " on job bdev=%s fails\n", 585 task->offset_blocks, job->name); 586 } 587 } else if (job->verify || job->reset) { 588 spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); 589 assert(iovcnt == 1); 590 assert(iovs != NULL); 591 if (!verify_data(task->buf, job->buf_size, iovs[0].iov_base, iovs[0].iov_len, 592 spdk_bdev_get_block_size(job->bdev), 593 task->md_buf, spdk_bdev_io_get_md_buf(bdev_io), 594 spdk_bdev_get_md_size(job->bdev), 595 job->io_size_blocks, md_check)) { 596 printf("Buffer mismatch! Target: %s Disk Offset: %" PRIu64 "\n", job->name, task->offset_blocks); 597 printf(" First dword expected 0x%x got 0x%x\n", *(int *)task->buf, *(int *)iovs[0].iov_base); 598 bdevperf_job_drain(job); 599 g_run_rc = -1; 600 } 601 } else if (job->dif_check_flags != 0) { 602 if (task->io_type == SPDK_BDEV_IO_TYPE_READ && spdk_bdev_get_md_size(job->bdev) != 0) { 603 spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); 604 assert(iovcnt == 1); 605 assert(iovs != NULL); 606 rc = bdevperf_verify_dif(task, iovs, iovcnt); 607 if (rc != 0) { 608 printf("DIF error detected. task offset: %" PRIu64 " on job bdev=%s\n", 609 task->offset_blocks, job->name); 610 611 success = false; 612 if (!job->reset && !job->continue_on_failure) { 613 bdevperf_job_drain(job); 614 g_run_rc = -1; 615 g_error_to_exit = true; 616 } 617 } 618 } 619 } 620 621 job->current_queue_depth--; 622 623 if (success) { 624 job->io_completed++; 625 } else { 626 job->io_failed++; 627 } 628 629 if (job->verify) { 630 assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); 631 offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; 632 633 assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); 634 spdk_bit_array_clear(job->outstanding, offset_in_ios); 635 } 636 637 spdk_bdev_free_io(bdev_io); 638 639 /* 640 * is_draining indicates when time has expired for the test run 641 * and we are just waiting for the previously submitted I/O 642 * to complete. In this case, do not submit a new I/O to replace 643 * the one just completed. 644 */ 645 if (!job->is_draining) { 646 bdevperf_submit_single(job, task); 647 } else { 648 bdevperf_end_task(task); 649 } 650 } 651 652 static void 653 bdevperf_verify_submit_read(void *cb_arg) 654 { 655 struct bdevperf_job *job; 656 struct bdevperf_task *task = cb_arg; 657 int rc; 658 659 job = task->job; 660 661 /* Read the data back in */ 662 rc = spdk_bdev_read_blocks_with_md(job->bdev_desc, job->ch, NULL, NULL, 663 task->offset_blocks, job->io_size_blocks, 664 bdevperf_complete, task); 665 666 if (rc == -ENOMEM) { 667 bdevperf_queue_io_wait_with_cb(task, bdevperf_verify_submit_read); 668 } else if (rc != 0) { 669 printf("Failed to submit read: %d\n", rc); 670 bdevperf_job_drain(job); 671 g_run_rc = rc; 672 } 673 } 674 675 static void 676 bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success, 677 void *cb_arg) 678 { 679 if (success) { 680 spdk_bdev_free_io(bdev_io); 681 bdevperf_verify_submit_read(cb_arg); 682 } else { 683 bdevperf_complete(bdev_io, success, cb_arg); 684 } 685 } 686 687 static void 688 bdevperf_zcopy_populate_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 689 { 690 if (!success) { 691 bdevperf_complete(bdev_io, success, cb_arg); 692 return; 693 } 694 695 spdk_bdev_zcopy_end(bdev_io, false, bdevperf_complete, cb_arg); 696 } 697 698 static int 699 bdevperf_generate_dif(struct bdevperf_task *task) 700 { 701 struct bdevperf_job *job = task->job; 702 struct spdk_bdev *bdev = job->bdev; 703 struct spdk_dif_ctx dif_ctx; 704 int rc; 705 706 rc = spdk_dif_ctx_init(&dif_ctx, 707 spdk_bdev_get_block_size(bdev), 708 spdk_bdev_get_md_size(bdev), 709 spdk_bdev_is_md_interleaved(bdev), 710 spdk_bdev_is_dif_head_of_md(bdev), 711 spdk_bdev_get_dif_type(bdev), 712 job->dif_check_flags, 713 task->offset_blocks, 0, 0, 0, 0); 714 if (rc != 0) { 715 fprintf(stderr, "Initialization of DIF context failed\n"); 716 return rc; 717 } 718 719 if (spdk_bdev_is_md_interleaved(bdev)) { 720 rc = spdk_dif_generate(&task->iov, 1, job->io_size_blocks, &dif_ctx); 721 } else { 722 struct iovec md_iov = { 723 .iov_base = task->md_buf, 724 .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks, 725 }; 726 727 rc = spdk_dix_generate(&task->iov, 1, &md_iov, job->io_size_blocks, &dif_ctx); 728 } 729 730 if (rc != 0) { 731 fprintf(stderr, "Generation of DIF/DIX failed\n"); 732 } 733 734 return rc; 735 } 736 737 static void 738 bdevperf_submit_task(void *arg) 739 { 740 struct bdevperf_task *task = arg; 741 struct bdevperf_job *job = task->job; 742 struct spdk_bdev_desc *desc; 743 struct spdk_io_channel *ch; 744 spdk_bdev_io_completion_cb cb_fn; 745 uint64_t offset_in_ios; 746 int rc = 0; 747 748 desc = job->bdev_desc; 749 ch = job->ch; 750 751 switch (task->io_type) { 752 case SPDK_BDEV_IO_TYPE_WRITE: 753 if (spdk_bdev_get_md_size(job->bdev) != 0 && job->dif_check_flags != 0) { 754 rc = bdevperf_generate_dif(task); 755 } 756 if (rc == 0) { 757 cb_fn = (job->verify || job->reset) ? bdevperf_verify_write_complete : bdevperf_complete; 758 759 if (g_zcopy) { 760 spdk_bdev_zcopy_end(task->bdev_io, true, cb_fn, task); 761 return; 762 } else { 763 rc = spdk_bdev_writev_blocks_with_md(desc, ch, &task->iov, 1, 764 task->md_buf, 765 task->offset_blocks, 766 job->io_size_blocks, 767 cb_fn, task); 768 } 769 } 770 break; 771 case SPDK_BDEV_IO_TYPE_FLUSH: 772 rc = spdk_bdev_flush_blocks(desc, ch, task->offset_blocks, 773 job->io_size_blocks, bdevperf_complete, task); 774 break; 775 case SPDK_BDEV_IO_TYPE_UNMAP: 776 rc = spdk_bdev_unmap_blocks(desc, ch, task->offset_blocks, 777 job->io_size_blocks, bdevperf_complete, task); 778 break; 779 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 780 rc = spdk_bdev_write_zeroes_blocks(desc, ch, task->offset_blocks, 781 job->io_size_blocks, bdevperf_complete, task); 782 break; 783 case SPDK_BDEV_IO_TYPE_READ: 784 if (g_zcopy) { 785 rc = spdk_bdev_zcopy_start(desc, ch, NULL, 0, task->offset_blocks, job->io_size_blocks, 786 true, bdevperf_zcopy_populate_complete, task); 787 } else { 788 rc = spdk_bdev_read_blocks_with_md(desc, ch, task->buf, task->md_buf, 789 task->offset_blocks, 790 job->io_size_blocks, 791 bdevperf_complete, task); 792 } 793 break; 794 case SPDK_BDEV_IO_TYPE_ABORT: 795 rc = spdk_bdev_abort(desc, ch, task->task_to_abort, bdevperf_abort_complete, task); 796 break; 797 default: 798 assert(false); 799 rc = -EINVAL; 800 break; 801 } 802 803 if (rc == -ENOMEM) { 804 bdevperf_queue_io_wait_with_cb(task, bdevperf_submit_task); 805 return; 806 } else if (rc != 0) { 807 printf("Failed to submit bdev_io: %d\n", rc); 808 if (job->verify) { 809 assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); 810 offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; 811 812 assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); 813 spdk_bit_array_clear(job->outstanding, offset_in_ios); 814 } 815 bdevperf_job_drain(job); 816 g_run_rc = rc; 817 return; 818 } 819 820 job->current_queue_depth++; 821 } 822 823 static void 824 bdevperf_zcopy_get_buf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 825 { 826 struct bdevperf_task *task = cb_arg; 827 struct bdevperf_job *job = task->job; 828 struct iovec *iovs; 829 int iovcnt; 830 831 if (!success) { 832 bdevperf_job_drain(job); 833 g_run_rc = -1; 834 return; 835 } 836 837 task->bdev_io = bdev_io; 838 task->io_type = SPDK_BDEV_IO_TYPE_WRITE; 839 840 if (job->verify || job->reset) { 841 /* When job->verify or job->reset is enabled, task->buf is used for 842 * verification of read after write. For write I/O, when zcopy APIs 843 * are used, task->buf cannot be used, and data must be written to 844 * the data buffer allocated underneath bdev layer instead. 845 * Hence we copy task->buf to the allocated data buffer here. 846 */ 847 spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); 848 assert(iovcnt == 1); 849 assert(iovs != NULL); 850 851 copy_data(iovs[0].iov_base, iovs[0].iov_len, task->buf, job->buf_size, 852 spdk_bdev_get_block_size(job->bdev), 853 spdk_bdev_io_get_md_buf(bdev_io), task->md_buf, 854 spdk_bdev_get_md_size(job->bdev), job->io_size_blocks); 855 } 856 857 bdevperf_submit_task(task); 858 } 859 860 static void 861 bdevperf_prep_zcopy_write_task(void *arg) 862 { 863 struct bdevperf_task *task = arg; 864 struct bdevperf_job *job = task->job; 865 int rc; 866 867 rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch, NULL, 0, 868 task->offset_blocks, job->io_size_blocks, 869 false, bdevperf_zcopy_get_buf_complete, task); 870 if (rc != 0) { 871 assert(rc == -ENOMEM); 872 bdevperf_queue_io_wait_with_cb(task, bdevperf_prep_zcopy_write_task); 873 return; 874 } 875 876 job->current_queue_depth++; 877 } 878 879 static struct bdevperf_task * 880 bdevperf_job_get_task(struct bdevperf_job *job) 881 { 882 struct bdevperf_task *task; 883 884 task = TAILQ_FIRST(&job->task_list); 885 if (!task) { 886 printf("Task allocation failed\n"); 887 abort(); 888 } 889 890 TAILQ_REMOVE(&job->task_list, task, link); 891 return task; 892 } 893 894 static void 895 bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task) 896 { 897 uint64_t offset_in_ios; 898 899 if (job->zipf) { 900 offset_in_ios = spdk_zipf_generate(job->zipf); 901 } else if (job->is_random) { 902 offset_in_ios = rand_r(&job->seed) % job->size_in_ios; 903 } else { 904 offset_in_ios = job->offset_in_ios++; 905 if (job->offset_in_ios == job->size_in_ios) { 906 job->offset_in_ios = 0; 907 } 908 909 /* Increment of offset_in_ios if there's already an outstanding IO 910 * to that location. We only need this with job->verify as random 911 * offsets are not supported with job->verify at this time. 912 */ 913 if (job->verify) { 914 assert(spdk_bit_array_find_first_clear(job->outstanding, 0) != UINT32_MAX); 915 916 while (spdk_bit_array_get(job->outstanding, offset_in_ios)) { 917 offset_in_ios = job->offset_in_ios++; 918 if (job->offset_in_ios == job->size_in_ios) { 919 job->offset_in_ios = 0; 920 } 921 } 922 spdk_bit_array_set(job->outstanding, offset_in_ios); 923 } 924 } 925 926 /* For multi-thread to same job, offset_in_ios is relative 927 * to the LBA range assigned for that job. job->offset_blocks 928 * is absolute (entire bdev LBA range). 929 */ 930 task->offset_blocks = (offset_in_ios + job->ios_base) * job->io_size_blocks; 931 932 if (job->verify || job->reset) { 933 generate_data(task->buf, job->buf_size, 934 spdk_bdev_get_block_size(job->bdev), 935 task->md_buf, spdk_bdev_get_md_size(job->bdev), 936 job->io_size_blocks); 937 if (g_zcopy) { 938 bdevperf_prep_zcopy_write_task(task); 939 return; 940 } else { 941 task->iov.iov_base = task->buf; 942 task->iov.iov_len = job->buf_size; 943 task->io_type = SPDK_BDEV_IO_TYPE_WRITE; 944 } 945 } else if (job->flush) { 946 task->io_type = SPDK_BDEV_IO_TYPE_FLUSH; 947 } else if (job->unmap) { 948 task->io_type = SPDK_BDEV_IO_TYPE_UNMAP; 949 } else if (job->write_zeroes) { 950 task->io_type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 951 } else if ((job->rw_percentage == 100) || 952 (job->rw_percentage != 0 && ((rand_r(&job->seed) % 100) < job->rw_percentage))) { 953 task->io_type = SPDK_BDEV_IO_TYPE_READ; 954 } else { 955 if (g_zcopy) { 956 bdevperf_prep_zcopy_write_task(task); 957 return; 958 } else { 959 task->iov.iov_base = task->buf; 960 task->iov.iov_len = job->buf_size; 961 task->io_type = SPDK_BDEV_IO_TYPE_WRITE; 962 } 963 } 964 965 bdevperf_submit_task(task); 966 } 967 968 static int reset_job(void *arg); 969 970 static void 971 reset_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 972 { 973 struct bdevperf_task *task = cb_arg; 974 struct bdevperf_job *job = task->job; 975 976 if (!success) { 977 printf("Reset blockdev=%s failed\n", spdk_bdev_get_name(job->bdev)); 978 bdevperf_job_drain(job); 979 g_run_rc = -1; 980 } 981 982 TAILQ_INSERT_TAIL(&job->task_list, task, link); 983 spdk_bdev_free_io(bdev_io); 984 985 job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, 986 10 * 1000000); 987 } 988 989 static int 990 reset_job(void *arg) 991 { 992 struct bdevperf_job *job = arg; 993 struct bdevperf_task *task; 994 int rc; 995 996 spdk_poller_unregister(&job->reset_timer); 997 998 /* Do reset. */ 999 task = bdevperf_job_get_task(job); 1000 rc = spdk_bdev_reset(job->bdev_desc, job->ch, 1001 reset_cb, task); 1002 if (rc) { 1003 printf("Reset failed: %d\n", rc); 1004 bdevperf_job_drain(job); 1005 g_run_rc = -1; 1006 } 1007 1008 return -1; 1009 } 1010 1011 static void 1012 bdevperf_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io) 1013 { 1014 struct bdevperf_job *job = cb_arg; 1015 struct bdevperf_task *task; 1016 1017 job->io_timeout++; 1018 1019 if (job->is_draining || !job->abort || 1020 !spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ABORT)) { 1021 return; 1022 } 1023 1024 task = bdevperf_job_get_task(job); 1025 if (task == NULL) { 1026 return; 1027 } 1028 1029 task->task_to_abort = spdk_bdev_io_get_cb_arg(bdev_io); 1030 task->io_type = SPDK_BDEV_IO_TYPE_ABORT; 1031 1032 bdevperf_submit_task(task); 1033 } 1034 1035 static void 1036 bdevperf_job_run(void *ctx) 1037 { 1038 struct bdevperf_job *job = ctx; 1039 struct bdevperf_task *task; 1040 int i; 1041 1042 /* Submit initial I/O for this job. Each time one 1043 * completes, another will be submitted. */ 1044 1045 /* Start a timer to stop this I/O chain when the run is over */ 1046 job->run_timer = SPDK_POLLER_REGISTER(bdevperf_job_drain, job, g_time_in_usec); 1047 if (job->reset) { 1048 job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, 1049 10 * 1000000); 1050 } 1051 1052 spdk_bdev_set_timeout(job->bdev_desc, g_timeout_in_sec, bdevperf_timeout_cb, job); 1053 1054 for (i = 0; i < job->queue_depth; i++) { 1055 task = bdevperf_job_get_task(job); 1056 bdevperf_submit_single(job, task); 1057 } 1058 } 1059 1060 static void 1061 _performance_dump_done(void *ctx) 1062 { 1063 struct bdevperf_aggregate_stats *stats = ctx; 1064 1065 printf("\r ==================================================================================\n"); 1066 printf("\r %-28s: %10s %10.2f %10.2f", 1067 "Total", "", stats->total_io_per_second, stats->total_mb_per_second); 1068 printf(" %10.2f %8.2f\n", 1069 stats->total_failed_per_second, stats->total_timeout_per_second); 1070 fflush(stdout); 1071 1072 g_performance_dump_active = false; 1073 1074 free(stats); 1075 } 1076 1077 static void 1078 _performance_dump(void *ctx) 1079 { 1080 struct bdevperf_aggregate_stats *stats = ctx; 1081 1082 performance_dump_job(stats, stats->current_job); 1083 1084 /* This assumes the jobs list is static after start up time. 1085 * That's true right now, but if that ever changed this would need a lock. */ 1086 stats->current_job = TAILQ_NEXT(stats->current_job, link); 1087 if (stats->current_job == NULL) { 1088 spdk_thread_send_msg(g_main_thread, _performance_dump_done, stats); 1089 } else { 1090 spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); 1091 } 1092 } 1093 1094 static int 1095 performance_statistics_thread(void *arg) 1096 { 1097 struct bdevperf_aggregate_stats *stats; 1098 1099 if (g_performance_dump_active) { 1100 return -1; 1101 } 1102 1103 g_performance_dump_active = true; 1104 1105 stats = calloc(1, sizeof(*stats)); 1106 if (stats == NULL) { 1107 return -1; 1108 } 1109 1110 g_show_performance_period_num++; 1111 1112 stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec; 1113 stats->ema_period = g_show_performance_ema_period; 1114 1115 /* Iterate all of the jobs to gather stats 1116 * These jobs will not get removed here until a final performance dump is run, 1117 * so this should be safe without locking. 1118 */ 1119 stats->current_job = TAILQ_FIRST(&g_bdevperf.jobs); 1120 if (stats->current_job == NULL) { 1121 spdk_thread_send_msg(g_main_thread, _performance_dump_done, stats); 1122 } else { 1123 spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); 1124 } 1125 1126 return -1; 1127 } 1128 1129 static void 1130 bdevperf_test(void) 1131 { 1132 struct bdevperf_job *job; 1133 1134 printf("Running I/O for %" PRIu64 " seconds...\n", g_time_in_usec / 1000000); 1135 fflush(stdout); 1136 1137 /* Start a timer to dump performance numbers */ 1138 g_start_tsc = spdk_get_ticks(); 1139 if (g_show_performance_real_time && !g_perf_timer) { 1140 printf("\r %-*s: %10s %10s %10s %10s %8s\n", 1141 28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s"); 1142 1143 g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL, 1144 g_show_performance_period_in_usec); 1145 } 1146 1147 /* Iterate jobs to start all I/O */ 1148 TAILQ_FOREACH(job, &g_bdevperf.jobs, link) { 1149 g_bdevperf.running_jobs++; 1150 spdk_thread_send_msg(job->thread, bdevperf_job_run, job); 1151 } 1152 } 1153 1154 static void 1155 bdevperf_bdev_removed(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 1156 { 1157 struct bdevperf_job *job = event_ctx; 1158 1159 if (SPDK_BDEV_EVENT_REMOVE == type) { 1160 bdevperf_job_drain(job); 1161 } 1162 } 1163 1164 static uint32_t g_construct_job_count = 0; 1165 1166 static void 1167 _bdevperf_construct_job_done(void *ctx) 1168 { 1169 if (--g_construct_job_count == 0) { 1170 1171 if (g_run_rc != 0) { 1172 /* Something failed. */ 1173 bdevperf_test_done(NULL); 1174 return; 1175 } 1176 1177 /* Ready to run the test */ 1178 bdevperf_test(); 1179 } else if (g_run_rc != 0) { 1180 /* Reset error as some jobs constructed right */ 1181 g_run_rc = 0; 1182 if (g_continue_on_failure == false) { 1183 g_error_to_exit = true; 1184 } 1185 } 1186 } 1187 1188 /* Checkformat will not allow to use inlined type, 1189 this is a workaround */ 1190 typedef struct spdk_thread *spdk_thread_t; 1191 1192 static spdk_thread_t 1193 construct_job_thread(struct spdk_cpuset *cpumask, const char *tag) 1194 { 1195 struct spdk_cpuset tmp; 1196 1197 /* This function runs on the main thread. */ 1198 assert(g_main_thread == spdk_get_thread()); 1199 1200 /* Handle default mask */ 1201 if (spdk_cpuset_count(cpumask) == 0) { 1202 cpumask = &g_all_cpuset; 1203 } 1204 1205 /* Warn user that mask might need to be changed */ 1206 spdk_cpuset_copy(&tmp, cpumask); 1207 spdk_cpuset_or(&tmp, &g_all_cpuset); 1208 if (!spdk_cpuset_equal(&tmp, &g_all_cpuset)) { 1209 fprintf(stderr, "cpumask for '%s' is too big\n", tag); 1210 } 1211 1212 return spdk_thread_create(tag, cpumask); 1213 } 1214 1215 static uint32_t 1216 _get_next_core(void) 1217 { 1218 static uint32_t current_core = SPDK_ENV_LCORE_ID_ANY; 1219 1220 if (current_core == SPDK_ENV_LCORE_ID_ANY) { 1221 current_core = spdk_env_get_first_core(); 1222 return current_core; 1223 } 1224 1225 current_core = spdk_env_get_next_core(current_core); 1226 if (current_core == SPDK_ENV_LCORE_ID_ANY) { 1227 current_core = spdk_env_get_first_core(); 1228 } 1229 1230 return current_core; 1231 } 1232 1233 static void 1234 _bdevperf_construct_job(void *ctx) 1235 { 1236 struct bdevperf_job *job = ctx; 1237 int rc; 1238 1239 rc = spdk_bdev_open_ext(spdk_bdev_get_name(job->bdev), true, bdevperf_bdev_removed, job, 1240 &job->bdev_desc); 1241 if (rc != 0) { 1242 SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(job->bdev), rc); 1243 g_run_rc = -EINVAL; 1244 goto end; 1245 } 1246 1247 if (g_zcopy) { 1248 if (!spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) { 1249 printf("Test requires ZCOPY but bdev module does not support ZCOPY\n"); 1250 g_run_rc = -ENOTSUP; 1251 goto end; 1252 } 1253 } 1254 1255 job->ch = spdk_bdev_get_io_channel(job->bdev_desc); 1256 if (!job->ch) { 1257 SPDK_ERRLOG("Could not get io_channel for device %s, error=%d\n", spdk_bdev_get_name(job->bdev), 1258 rc); 1259 spdk_bdev_close(job->bdev_desc); 1260 TAILQ_REMOVE(&g_bdevperf.jobs, job, link); 1261 g_run_rc = -ENOMEM; 1262 goto end; 1263 } 1264 1265 end: 1266 spdk_thread_send_msg(g_main_thread, _bdevperf_construct_job_done, NULL); 1267 } 1268 1269 static void 1270 job_init_rw(struct bdevperf_job *job, enum job_config_rw rw) 1271 { 1272 switch (rw) { 1273 case JOB_CONFIG_RW_READ: 1274 job->rw_percentage = 100; 1275 break; 1276 case JOB_CONFIG_RW_WRITE: 1277 job->rw_percentage = 0; 1278 break; 1279 case JOB_CONFIG_RW_RANDREAD: 1280 job->is_random = true; 1281 job->rw_percentage = 100; 1282 job->seed = rand(); 1283 break; 1284 case JOB_CONFIG_RW_RANDWRITE: 1285 job->is_random = true; 1286 job->rw_percentage = 0; 1287 job->seed = rand(); 1288 break; 1289 case JOB_CONFIG_RW_RW: 1290 job->is_random = false; 1291 break; 1292 case JOB_CONFIG_RW_RANDRW: 1293 job->is_random = true; 1294 job->seed = rand(); 1295 break; 1296 case JOB_CONFIG_RW_VERIFY: 1297 job->verify = true; 1298 job->rw_percentage = 50; 1299 break; 1300 case JOB_CONFIG_RW_RESET: 1301 job->reset = true; 1302 job->verify = true; 1303 job->rw_percentage = 50; 1304 break; 1305 case JOB_CONFIG_RW_UNMAP: 1306 job->unmap = true; 1307 break; 1308 case JOB_CONFIG_RW_FLUSH: 1309 job->flush = true; 1310 break; 1311 case JOB_CONFIG_RW_WRITE_ZEROES: 1312 job->write_zeroes = true; 1313 break; 1314 } 1315 } 1316 1317 static int 1318 bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config, 1319 struct spdk_thread *thread) 1320 { 1321 struct bdevperf_job *job; 1322 struct bdevperf_task *task; 1323 int block_size, data_block_size; 1324 int rc; 1325 int task_num, n; 1326 1327 block_size = spdk_bdev_get_block_size(bdev); 1328 data_block_size = spdk_bdev_get_data_block_size(bdev); 1329 1330 job = calloc(1, sizeof(struct bdevperf_job)); 1331 if (!job) { 1332 fprintf(stderr, "Unable to allocate memory for new job.\n"); 1333 return -ENOMEM; 1334 } 1335 1336 job->name = strdup(spdk_bdev_get_name(bdev)); 1337 if (!job->name) { 1338 fprintf(stderr, "Unable to allocate memory for job name.\n"); 1339 bdevperf_job_free(job); 1340 return -ENOMEM; 1341 } 1342 1343 job->workload_type = g_workload_type; 1344 job->io_size = config->bs; 1345 job->rw_percentage = config->rwmixread; 1346 job->continue_on_failure = g_continue_on_failure; 1347 job->queue_depth = config->iodepth; 1348 job->bdev = bdev; 1349 job->io_size_blocks = job->io_size / data_block_size; 1350 job->buf_size = job->io_size_blocks * block_size; 1351 job->abort = g_abort; 1352 job_init_rw(job, config->rw); 1353 1354 if ((job->io_size % data_block_size) != 0) { 1355 SPDK_ERRLOG("IO size (%d) is not multiples of data block size of bdev %s (%"PRIu32")\n", 1356 job->io_size, spdk_bdev_get_name(bdev), data_block_size); 1357 bdevperf_job_free(job); 1358 return -ENOTSUP; 1359 } 1360 1361 if (job->unmap && !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1362 printf("Skipping %s because it does not support unmap\n", spdk_bdev_get_name(bdev)); 1363 bdevperf_job_free(job); 1364 return -ENOTSUP; 1365 } 1366 1367 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) { 1368 job->dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK; 1369 } 1370 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) { 1371 job->dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK; 1372 } 1373 1374 job->offset_in_ios = 0; 1375 1376 if (config->length != 0) { 1377 /* Use subset of disk */ 1378 job->size_in_ios = config->length / job->io_size_blocks; 1379 job->ios_base = config->offset / job->io_size_blocks; 1380 } else { 1381 /* Use whole disk */ 1382 job->size_in_ios = spdk_bdev_get_num_blocks(bdev) / job->io_size_blocks; 1383 job->ios_base = 0; 1384 } 1385 1386 if (job->is_random && g_zipf_theta > 0) { 1387 job->zipf = spdk_zipf_create(job->size_in_ios, g_zipf_theta, 0); 1388 } 1389 1390 if (job->verify) { 1391 job->outstanding = spdk_bit_array_create(job->size_in_ios); 1392 if (job->outstanding == NULL) { 1393 SPDK_ERRLOG("Could not create outstanding array bitmap for bdev %s\n", 1394 spdk_bdev_get_name(bdev)); 1395 bdevperf_job_free(job); 1396 return -ENOMEM; 1397 } 1398 } 1399 1400 TAILQ_INIT(&job->task_list); 1401 1402 task_num = job->queue_depth; 1403 if (job->reset) { 1404 task_num += 1; 1405 } 1406 if (job->abort) { 1407 task_num += job->queue_depth; 1408 } 1409 1410 TAILQ_INSERT_TAIL(&g_bdevperf.jobs, job, link); 1411 1412 for (n = 0; n < task_num; n++) { 1413 task = calloc(1, sizeof(struct bdevperf_task)); 1414 if (!task) { 1415 fprintf(stderr, "Failed to allocate task from memory\n"); 1416 return -ENOMEM; 1417 } 1418 1419 task->buf = spdk_zmalloc(job->buf_size, spdk_bdev_get_buf_align(job->bdev), NULL, 1420 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1421 if (!task->buf) { 1422 fprintf(stderr, "Cannot allocate buf for task=%p\n", task); 1423 free(task); 1424 return -ENOMEM; 1425 } 1426 1427 if (spdk_bdev_is_md_separate(job->bdev)) { 1428 task->md_buf = spdk_zmalloc(job->io_size_blocks * 1429 spdk_bdev_get_md_size(job->bdev), 0, NULL, 1430 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1431 if (!task->md_buf) { 1432 fprintf(stderr, "Cannot allocate md buf for task=%p\n", task); 1433 spdk_free(task->buf); 1434 free(task); 1435 return -ENOMEM; 1436 } 1437 } 1438 1439 task->job = job; 1440 TAILQ_INSERT_TAIL(&job->task_list, task, link); 1441 } 1442 1443 job->thread = thread; 1444 1445 g_construct_job_count++; 1446 1447 rc = spdk_thread_send_msg(thread, _bdevperf_construct_job, job); 1448 assert(rc == 0); 1449 1450 return rc; 1451 } 1452 1453 static int 1454 parse_rw(const char *str, enum job_config_rw ret) 1455 { 1456 if (str == NULL) { 1457 return ret; 1458 } 1459 1460 if (!strcmp(str, "read")) { 1461 ret = JOB_CONFIG_RW_READ; 1462 } else if (!strcmp(str, "randread")) { 1463 ret = JOB_CONFIG_RW_RANDREAD; 1464 } else if (!strcmp(str, "write")) { 1465 ret = JOB_CONFIG_RW_WRITE; 1466 } else if (!strcmp(str, "randwrite")) { 1467 ret = JOB_CONFIG_RW_RANDWRITE; 1468 } else if (!strcmp(str, "verify")) { 1469 ret = JOB_CONFIG_RW_VERIFY; 1470 } else if (!strcmp(str, "reset")) { 1471 ret = JOB_CONFIG_RW_RESET; 1472 } else if (!strcmp(str, "unmap")) { 1473 ret = JOB_CONFIG_RW_UNMAP; 1474 } else if (!strcmp(str, "write_zeroes")) { 1475 ret = JOB_CONFIG_RW_WRITE_ZEROES; 1476 } else if (!strcmp(str, "flush")) { 1477 ret = JOB_CONFIG_RW_FLUSH; 1478 } else if (!strcmp(str, "rw")) { 1479 ret = JOB_CONFIG_RW_RW; 1480 } else if (!strcmp(str, "randrw")) { 1481 ret = JOB_CONFIG_RW_RANDRW; 1482 } else { 1483 fprintf(stderr, "rw must be one of\n" 1484 "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n"); 1485 ret = BDEVPERF_CONFIG_ERROR; 1486 } 1487 1488 return ret; 1489 } 1490 1491 static const char * 1492 config_filename_next(const char *filename, char *out) 1493 { 1494 int i, k; 1495 1496 if (filename == NULL) { 1497 out[0] = '\0'; 1498 return NULL; 1499 } 1500 1501 if (filename[0] == ':') { 1502 filename++; 1503 } 1504 1505 for (i = 0, k = 0; 1506 filename[i] != '\0' && 1507 filename[i] != ':' && 1508 i < BDEVPERF_CONFIG_MAX_FILENAME; 1509 i++) { 1510 if (filename[i] == ' ' || filename[i] == '\t') { 1511 continue; 1512 } 1513 1514 out[k++] = filename[i]; 1515 } 1516 out[k] = 0; 1517 1518 return filename + i; 1519 } 1520 1521 static void 1522 bdevperf_construct_jobs(void) 1523 { 1524 char filename[BDEVPERF_CONFIG_MAX_FILENAME]; 1525 struct spdk_thread *thread; 1526 struct job_config *config; 1527 struct spdk_bdev *bdev; 1528 const char *filenames; 1529 int rc; 1530 1531 TAILQ_FOREACH(config, &job_config_list, link) { 1532 filenames = config->filename; 1533 1534 thread = construct_job_thread(&config->cpumask, config->name); 1535 assert(thread); 1536 1537 while (filenames) { 1538 filenames = config_filename_next(filenames, filename); 1539 if (strlen(filename) == 0) { 1540 break; 1541 } 1542 1543 bdev = spdk_bdev_get_by_name(filename); 1544 if (!bdev) { 1545 fprintf(stderr, "Unable to find bdev '%s'\n", filename); 1546 g_run_rc = -EINVAL; 1547 return; 1548 } 1549 1550 rc = bdevperf_construct_job(bdev, config, thread); 1551 if (rc < 0) { 1552 g_run_rc = rc; 1553 return; 1554 } 1555 } 1556 } 1557 } 1558 1559 static int 1560 make_cli_job_config(const char *filename, int64_t offset, uint64_t range) 1561 { 1562 struct job_config *config = calloc(1, sizeof(*config)); 1563 1564 if (config == NULL) { 1565 fprintf(stderr, "Unable to allocate memory for job config\n"); 1566 return -ENOMEM; 1567 } 1568 1569 config->name = filename; 1570 config->filename = filename; 1571 spdk_cpuset_zero(&config->cpumask); 1572 spdk_cpuset_set_cpu(&config->cpumask, _get_next_core(), true); 1573 config->bs = g_io_size; 1574 config->iodepth = g_queue_depth; 1575 config->rwmixread = g_rw_percentage; 1576 config->offset = offset; 1577 config->length = range; 1578 config->rw = parse_rw(g_workload_type, BDEVPERF_CONFIG_ERROR); 1579 if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { 1580 free(config); 1581 return -EINVAL; 1582 } 1583 1584 TAILQ_INSERT_TAIL(&job_config_list, config, link); 1585 return 0; 1586 } 1587 1588 static void 1589 bdevperf_construct_multithread_job_configs(void) 1590 { 1591 struct spdk_bdev *bdev; 1592 uint32_t i; 1593 uint32_t num_cores; 1594 uint64_t blocks_per_job; 1595 int64_t offset; 1596 1597 num_cores = 0; 1598 SPDK_ENV_FOREACH_CORE(i) { 1599 num_cores++; 1600 } 1601 1602 if (num_cores == 0) { 1603 g_run_rc = -EINVAL; 1604 return; 1605 } 1606 1607 if (g_job_bdev_name != NULL) { 1608 bdev = spdk_bdev_get_by_name(g_job_bdev_name); 1609 if (!bdev) { 1610 fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); 1611 return; 1612 } 1613 1614 blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores; 1615 offset = 0; 1616 1617 SPDK_ENV_FOREACH_CORE(i) { 1618 g_run_rc = make_cli_job_config(g_job_bdev_name, offset, blocks_per_job); 1619 if (g_run_rc) { 1620 return; 1621 } 1622 1623 offset += blocks_per_job; 1624 } 1625 } else { 1626 bdev = spdk_bdev_first_leaf(); 1627 while (bdev != NULL) { 1628 blocks_per_job = spdk_bdev_get_num_blocks(bdev) / num_cores; 1629 offset = 0; 1630 1631 SPDK_ENV_FOREACH_CORE(i) { 1632 g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev), 1633 offset, blocks_per_job); 1634 if (g_run_rc) { 1635 return; 1636 } 1637 1638 offset += blocks_per_job; 1639 } 1640 1641 bdev = spdk_bdev_next_leaf(bdev); 1642 } 1643 } 1644 } 1645 1646 static void 1647 bdevperf_construct_job_configs(void) 1648 { 1649 struct spdk_bdev *bdev; 1650 1651 /* There are three different modes for allocating jobs. Standard mode 1652 * (the default) creates one spdk_thread per bdev and runs the I/O job there. 1653 * 1654 * The -C flag places bdevperf into "multithread" mode, meaning it creates 1655 * one spdk_thread per bdev PER CORE, and runs a copy of the job on each. 1656 * This runs multiple threads per bdev, effectively. 1657 * 1658 * The -j flag implies "FIO" mode which tries to mimic semantic of FIO jobs. 1659 * In "FIO" mode, threads are spawned per-job instead of per-bdev. 1660 * Each FIO job can be individually parameterized by filename, cpu mask, etc, 1661 * which is different from other modes in that they only support global options. 1662 */ 1663 1664 if (g_bdevperf_conf) { 1665 goto end; 1666 } else if (g_multithread_mode) { 1667 bdevperf_construct_multithread_job_configs(); 1668 goto end; 1669 } 1670 1671 if (g_job_bdev_name != NULL) { 1672 bdev = spdk_bdev_get_by_name(g_job_bdev_name); 1673 if (bdev) { 1674 /* Construct the job */ 1675 g_run_rc = make_cli_job_config(g_job_bdev_name, 0, 0); 1676 } else { 1677 fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); 1678 } 1679 } else { 1680 bdev = spdk_bdev_first_leaf(); 1681 1682 while (bdev != NULL) { 1683 /* Construct the job */ 1684 g_run_rc = make_cli_job_config(spdk_bdev_get_name(bdev), 0, 0); 1685 if (g_run_rc) { 1686 break; 1687 } 1688 1689 bdev = spdk_bdev_next_leaf(bdev); 1690 } 1691 } 1692 1693 end: 1694 /* Increment initial construct_jobs count so that it will never reach 0 in the middle 1695 * of iteration. 1696 */ 1697 g_construct_job_count = 1; 1698 1699 if (g_run_rc == 0) { 1700 bdevperf_construct_jobs(); 1701 } 1702 1703 _bdevperf_construct_job_done(NULL); 1704 } 1705 1706 static int 1707 parse_uint_option(struct spdk_conf_section *s, const char *name, int def) 1708 { 1709 const char *job_name; 1710 int tmp; 1711 1712 tmp = spdk_conf_section_get_intval(s, name); 1713 if (tmp == -1) { 1714 /* Field was not found. Check default value 1715 * In [global] section it is ok to have undefined values 1716 * but for other sections it is not ok */ 1717 if (def == BDEVPERF_CONFIG_UNDEFINED) { 1718 job_name = spdk_conf_section_get_name(s); 1719 if (strcmp(job_name, "global") == 0) { 1720 return def; 1721 } 1722 1723 fprintf(stderr, 1724 "Job '%s' has no '%s' assigned\n", 1725 job_name, name); 1726 return BDEVPERF_CONFIG_ERROR; 1727 } 1728 return def; 1729 } 1730 1731 /* NOTE: get_intval returns nonnegative on success */ 1732 if (tmp < 0) { 1733 fprintf(stderr, "Job '%s' has bad '%s' value.\n", 1734 spdk_conf_section_get_name(s), name); 1735 return BDEVPERF_CONFIG_ERROR; 1736 } 1737 1738 return tmp; 1739 } 1740 1741 /* CLI arguments override parameters for global sections */ 1742 static void 1743 config_set_cli_args(struct job_config *config) 1744 { 1745 if (g_job_bdev_name) { 1746 config->filename = g_job_bdev_name; 1747 } 1748 if (g_io_size > 0) { 1749 config->bs = g_io_size; 1750 } 1751 if (g_queue_depth > 0) { 1752 config->iodepth = g_queue_depth; 1753 } 1754 if (g_rw_percentage > 0) { 1755 config->rwmixread = g_rw_percentage; 1756 } 1757 if (g_workload_type) { 1758 config->rw = parse_rw(g_workload_type, config->rw); 1759 } 1760 } 1761 1762 static int 1763 read_job_config(void) 1764 { 1765 struct job_config global_default_config; 1766 struct job_config global_config; 1767 struct spdk_conf_section *s; 1768 struct job_config *config; 1769 const char *cpumask; 1770 const char *rw; 1771 bool is_global; 1772 int n = 0; 1773 int val; 1774 1775 if (g_bdevperf_conf_file == NULL) { 1776 return 0; 1777 } 1778 1779 g_bdevperf_conf = spdk_conf_allocate(); 1780 if (g_bdevperf_conf == NULL) { 1781 fprintf(stderr, "Could not allocate job config structure\n"); 1782 return 1; 1783 } 1784 1785 spdk_conf_disable_sections_merge(g_bdevperf_conf); 1786 if (spdk_conf_read(g_bdevperf_conf, g_bdevperf_conf_file)) { 1787 fprintf(stderr, "Invalid job config"); 1788 return 1; 1789 } 1790 1791 /* Initialize global defaults */ 1792 global_default_config.filename = NULL; 1793 /* Zero mask is the same as g_all_cpuset 1794 * The g_all_cpuset is not initialized yet, 1795 * so use zero mask as the default instead */ 1796 spdk_cpuset_zero(&global_default_config.cpumask); 1797 global_default_config.bs = BDEVPERF_CONFIG_UNDEFINED; 1798 global_default_config.iodepth = BDEVPERF_CONFIG_UNDEFINED; 1799 /* bdevperf has no default for -M option but in FIO the default is 50 */ 1800 global_default_config.rwmixread = 50; 1801 global_default_config.offset = 0; 1802 /* length 0 means 100% */ 1803 global_default_config.length = 0; 1804 global_default_config.rw = BDEVPERF_CONFIG_UNDEFINED; 1805 config_set_cli_args(&global_default_config); 1806 1807 if ((int)global_default_config.rw == BDEVPERF_CONFIG_ERROR) { 1808 return 1; 1809 } 1810 1811 /* There is only a single instance of global job_config 1812 * We just reset its value when we encounter new [global] section */ 1813 global_config = global_default_config; 1814 1815 for (s = spdk_conf_first_section(g_bdevperf_conf); 1816 s != NULL; 1817 s = spdk_conf_next_section(s)) { 1818 config = calloc(1, sizeof(*config)); 1819 if (config == NULL) { 1820 fprintf(stderr, "Unable to allocate memory for job config\n"); 1821 return 1; 1822 } 1823 1824 config->name = spdk_conf_section_get_name(s); 1825 is_global = strcmp(config->name, "global") == 0; 1826 1827 if (is_global) { 1828 global_config = global_default_config; 1829 } 1830 1831 config->filename = spdk_conf_section_get_val(s, "filename"); 1832 if (config->filename == NULL) { 1833 config->filename = global_config.filename; 1834 } 1835 if (!is_global) { 1836 if (config->filename == NULL) { 1837 fprintf(stderr, "Job '%s' expects 'filename' parameter\n", config->name); 1838 goto error; 1839 } else if (strnlen(config->filename, BDEVPERF_CONFIG_MAX_FILENAME) 1840 >= BDEVPERF_CONFIG_MAX_FILENAME) { 1841 fprintf(stderr, 1842 "filename for '%s' job is too long. Max length is %d\n", 1843 config->name, BDEVPERF_CONFIG_MAX_FILENAME); 1844 goto error; 1845 } 1846 } 1847 1848 cpumask = spdk_conf_section_get_val(s, "cpumask"); 1849 if (cpumask == NULL) { 1850 config->cpumask = global_config.cpumask; 1851 } else if (spdk_cpuset_parse(&config->cpumask, cpumask)) { 1852 fprintf(stderr, "Job '%s' has bad 'cpumask' value\n", config->name); 1853 goto error; 1854 } 1855 1856 config->bs = parse_uint_option(s, "bs", global_config.bs); 1857 if (config->bs == BDEVPERF_CONFIG_ERROR) { 1858 goto error; 1859 } else if (config->bs == 0) { 1860 fprintf(stderr, "'bs' of job '%s' must be greater than 0\n", config->name); 1861 goto error; 1862 } 1863 1864 config->iodepth = parse_uint_option(s, "iodepth", global_config.iodepth); 1865 if (config->iodepth == BDEVPERF_CONFIG_ERROR) { 1866 goto error; 1867 } else if (config->iodepth == 0) { 1868 fprintf(stderr, 1869 "'iodepth' of job '%s' must be greater than 0\n", 1870 config->name); 1871 goto error; 1872 } 1873 1874 config->rwmixread = parse_uint_option(s, "rwmixread", global_config.rwmixread); 1875 if (config->rwmixread == BDEVPERF_CONFIG_ERROR) { 1876 goto error; 1877 } else if (config->rwmixread > 100) { 1878 fprintf(stderr, 1879 "'rwmixread' value of '%s' job is not in 0-100 range\n", 1880 config->name); 1881 goto error; 1882 } 1883 1884 config->offset = parse_uint_option(s, "offset", global_config.offset); 1885 if (config->offset == BDEVPERF_CONFIG_ERROR) { 1886 goto error; 1887 } 1888 1889 val = parse_uint_option(s, "length", global_config.length); 1890 if (val == BDEVPERF_CONFIG_ERROR) { 1891 goto error; 1892 } 1893 config->length = val; 1894 1895 rw = spdk_conf_section_get_val(s, "rw"); 1896 config->rw = parse_rw(rw, global_config.rw); 1897 if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { 1898 fprintf(stderr, "Job '%s' has bad 'rw' value\n", config->name); 1899 goto error; 1900 } else if (!is_global && (int)config->rw == BDEVPERF_CONFIG_UNDEFINED) { 1901 fprintf(stderr, "Job '%s' has no 'rw' assigned\n", config->name); 1902 goto error; 1903 } 1904 1905 if (is_global) { 1906 config_set_cli_args(config); 1907 global_config = *config; 1908 free(config); 1909 } else { 1910 TAILQ_INSERT_TAIL(&job_config_list, config, link); 1911 n++; 1912 } 1913 } 1914 1915 printf("Using job config with %d jobs\n", n); 1916 return 0; 1917 error: 1918 free(config); 1919 return 1; 1920 } 1921 1922 static void 1923 bdevperf_run(void *arg1) 1924 { 1925 uint32_t i; 1926 1927 g_main_thread = spdk_get_thread(); 1928 1929 spdk_cpuset_zero(&g_all_cpuset); 1930 SPDK_ENV_FOREACH_CORE(i) { 1931 spdk_cpuset_set_cpu(&g_all_cpuset, i, true); 1932 } 1933 1934 if (g_wait_for_tests) { 1935 /* Do not perform any tests until RPC is received */ 1936 return; 1937 } 1938 1939 bdevperf_construct_job_configs(); 1940 } 1941 1942 static void 1943 rpc_perform_tests_cb(void) 1944 { 1945 struct spdk_json_write_ctx *w; 1946 struct spdk_jsonrpc_request *request = g_request; 1947 1948 g_request = NULL; 1949 1950 if (g_run_rc == 0) { 1951 w = spdk_jsonrpc_begin_result(request); 1952 spdk_json_write_uint32(w, g_run_rc); 1953 spdk_jsonrpc_end_result(request, w); 1954 } else { 1955 spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, 1956 "bdevperf failed with error %s", spdk_strerror(-g_run_rc)); 1957 } 1958 1959 /* Reset g_run_rc to 0 for the next test run. */ 1960 g_run_rc = 0; 1961 1962 /* Reset g_stats to 0 for the next test run. */ 1963 memset(&g_stats, 0, sizeof(g_stats)); 1964 } 1965 1966 static void 1967 rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) 1968 { 1969 if (params != NULL) { 1970 spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, 1971 "perform_tests method requires no parameters"); 1972 return; 1973 } 1974 if (g_request != NULL) { 1975 fprintf(stderr, "Another test is already in progress.\n"); 1976 spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, 1977 spdk_strerror(-EINPROGRESS)); 1978 return; 1979 } 1980 g_request = request; 1981 1982 /* Only construct job configs at the first test run. */ 1983 if (TAILQ_EMPTY(&job_config_list)) { 1984 bdevperf_construct_job_configs(); 1985 } else { 1986 bdevperf_construct_jobs(); 1987 } 1988 } 1989 SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME) 1990 1991 static void 1992 _bdevperf_job_drain(void *ctx) 1993 { 1994 bdevperf_job_drain(ctx); 1995 } 1996 1997 static void 1998 spdk_bdevperf_shutdown_cb(void) 1999 { 2000 g_shutdown = true; 2001 struct bdevperf_job *job, *tmp; 2002 2003 if (g_bdevperf.running_jobs == 0) { 2004 bdevperf_test_done(NULL); 2005 return; 2006 } 2007 2008 /* Iterate jobs to stop all I/O */ 2009 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, tmp) { 2010 spdk_thread_send_msg(job->thread, _bdevperf_job_drain, job); 2011 } 2012 } 2013 2014 static int 2015 bdevperf_parse_arg(int ch, char *arg) 2016 { 2017 long long tmp; 2018 2019 if (ch == 'w') { 2020 g_workload_type = optarg; 2021 } else if (ch == 'T') { 2022 g_job_bdev_name = optarg; 2023 } else if (ch == 'z') { 2024 g_wait_for_tests = true; 2025 } else if (ch == 'Z') { 2026 g_zcopy = true; 2027 } else if (ch == 'X') { 2028 g_abort = true; 2029 } else if (ch == 'C') { 2030 g_multithread_mode = true; 2031 } else if (ch == 'f') { 2032 g_continue_on_failure = true; 2033 } else if (ch == 'j') { 2034 g_bdevperf_conf_file = optarg; 2035 } else if (ch == 'F') { 2036 char *endptr; 2037 2038 errno = 0; 2039 g_zipf_theta = strtod(optarg, &endptr); 2040 if (errno || optarg == endptr || g_zipf_theta < 0) { 2041 fprintf(stderr, "Illegal zipf theta value %s\n", optarg); 2042 return -EINVAL; 2043 } 2044 } else { 2045 tmp = spdk_strtoll(optarg, 10); 2046 if (tmp < 0) { 2047 fprintf(stderr, "Parse failed for the option %c.\n", ch); 2048 return tmp; 2049 } else if (tmp >= INT_MAX) { 2050 fprintf(stderr, "Parsed option was too large %c.\n", ch); 2051 return -ERANGE; 2052 } 2053 2054 switch (ch) { 2055 case 'q': 2056 g_queue_depth = tmp; 2057 break; 2058 case 'o': 2059 g_io_size = tmp; 2060 break; 2061 case 't': 2062 g_time_in_sec = tmp; 2063 break; 2064 case 'k': 2065 g_timeout_in_sec = tmp; 2066 break; 2067 case 'M': 2068 g_rw_percentage = tmp; 2069 g_mix_specified = true; 2070 break; 2071 case 'P': 2072 g_show_performance_ema_period = tmp; 2073 break; 2074 case 'S': 2075 g_show_performance_real_time = 1; 2076 g_show_performance_period_in_usec = tmp * 1000000; 2077 break; 2078 default: 2079 return -EINVAL; 2080 } 2081 } 2082 return 0; 2083 } 2084 2085 static void 2086 bdevperf_usage(void) 2087 { 2088 printf(" -q <depth> io depth\n"); 2089 printf(" -o <size> io size in bytes\n"); 2090 printf(" -w <type> io pattern type, must be one of (read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush)\n"); 2091 printf(" -t <time> time in seconds\n"); 2092 printf(" -k <timeout> timeout in seconds to detect starved I/O (default is 0 and disabled)\n"); 2093 printf(" -M <percent> rwmixread (100 for reads, 0 for writes)\n"); 2094 printf(" -P <num> number of moving average period\n"); 2095 printf("\t\t(If set to n, show weighted mean of the previous n IO/s in real time)\n"); 2096 printf("\t\t(Formula: M = 2 / (n + 1), EMA[i+1] = IO/s * M + (1 - M) * EMA[i])\n"); 2097 printf("\t\t(only valid with -S)\n"); 2098 printf(" -S <period> show performance result in real time every <period> seconds\n"); 2099 printf(" -T <bdev> bdev to run against. Default: all available bdevs.\n"); 2100 printf(" -f continue processing I/O even after failures\n"); 2101 printf(" -F <zipf theta> use zipf distribution for random I/O\n"); 2102 printf(" -Z enable using zcopy bdev API for read or write I/O\n"); 2103 printf(" -z start bdevperf, but wait for RPC to start tests\n"); 2104 printf(" -X abort timed out I/O\n"); 2105 printf(" -C enable every core to send I/Os to each bdev\n"); 2106 printf(" -j <filename> use job config file\n"); 2107 } 2108 2109 static int 2110 verify_test_params(struct spdk_app_opts *opts) 2111 { 2112 /* When RPC is used for starting tests and 2113 * no rpc_addr was configured for the app, 2114 * use the default address. */ 2115 if (g_wait_for_tests && opts->rpc_addr == NULL) { 2116 opts->rpc_addr = SPDK_DEFAULT_RPC_ADDR; 2117 } 2118 2119 if (!g_bdevperf_conf_file && g_queue_depth <= 0) { 2120 goto out; 2121 } 2122 if (!g_bdevperf_conf_file && g_io_size <= 0) { 2123 goto out; 2124 } 2125 if (!g_bdevperf_conf_file && !g_workload_type) { 2126 goto out; 2127 } 2128 if (g_time_in_sec <= 0) { 2129 goto out; 2130 } 2131 g_time_in_usec = g_time_in_sec * 1000000LL; 2132 2133 if (g_timeout_in_sec < 0) { 2134 goto out; 2135 } 2136 2137 if (g_abort && !g_timeout_in_sec) { 2138 printf("Timeout must be set for abort option, Ignoring g_abort\n"); 2139 } 2140 2141 if (g_show_performance_ema_period > 0 && 2142 g_show_performance_real_time == 0) { 2143 fprintf(stderr, "-P option must be specified with -S option\n"); 2144 return 1; 2145 } 2146 2147 if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { 2148 printf("I/O size of %d is greater than zero copy threshold (%d).\n", 2149 g_io_size, SPDK_BDEV_LARGE_BUF_MAX_SIZE); 2150 printf("Zero copy mechanism will not be used.\n"); 2151 g_zcopy = false; 2152 } 2153 2154 if (g_bdevperf_conf_file) { 2155 /* workload_type verification happens during config file parsing */ 2156 return 0; 2157 } 2158 2159 if (!strcmp(g_workload_type, "verify") || 2160 !strcmp(g_workload_type, "reset")) { 2161 g_rw_percentage = 50; 2162 if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { 2163 fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n", 2164 SPDK_BDEV_LARGE_BUF_MAX_SIZE, g_io_size); 2165 return 1; 2166 } 2167 g_verify = true; 2168 if (!strcmp(g_workload_type, "reset")) { 2169 g_reset = true; 2170 } 2171 } 2172 2173 if (!strcmp(g_workload_type, "read") || 2174 !strcmp(g_workload_type, "randread") || 2175 !strcmp(g_workload_type, "write") || 2176 !strcmp(g_workload_type, "randwrite") || 2177 !strcmp(g_workload_type, "verify") || 2178 !strcmp(g_workload_type, "reset") || 2179 !strcmp(g_workload_type, "unmap") || 2180 !strcmp(g_workload_type, "write_zeroes") || 2181 !strcmp(g_workload_type, "flush")) { 2182 if (g_mix_specified) { 2183 fprintf(stderr, "Ignoring -M option... Please use -M option" 2184 " only when using rw or randrw.\n"); 2185 } 2186 } 2187 2188 if (!strcmp(g_workload_type, "rw") || 2189 !strcmp(g_workload_type, "randrw")) { 2190 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 2191 fprintf(stderr, 2192 "-M must be specified to value from 0 to 100 " 2193 "for rw or randrw.\n"); 2194 return 1; 2195 } 2196 } 2197 2198 return 0; 2199 out: 2200 spdk_app_usage(); 2201 bdevperf_usage(); 2202 return 1; 2203 } 2204 2205 int 2206 main(int argc, char **argv) 2207 { 2208 struct spdk_app_opts opts = {}; 2209 int rc; 2210 2211 /* Use the runtime PID to set the random seed */ 2212 srand(getpid()); 2213 2214 spdk_app_opts_init(&opts, sizeof(opts)); 2215 opts.name = "bdevperf"; 2216 opts.rpc_addr = NULL; 2217 opts.shutdown_cb = spdk_bdevperf_shutdown_cb; 2218 2219 if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CF:M:P:S:T:Xj:", NULL, 2220 bdevperf_parse_arg, bdevperf_usage)) != 2221 SPDK_APP_PARSE_ARGS_SUCCESS) { 2222 return rc; 2223 } 2224 2225 if (read_job_config()) { 2226 free_job_config(); 2227 return 1; 2228 } 2229 2230 if (verify_test_params(&opts) != 0) { 2231 free_job_config(); 2232 exit(1); 2233 } 2234 2235 rc = spdk_app_start(&opts, bdevperf_run, NULL); 2236 2237 spdk_app_fini(); 2238 free_job_config(); 2239 return rc; 2240 } 2241