1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. 3 * Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. 4 * All rights reserved. 5 */ 6 7 #include "spdk/stdinc.h" 8 9 #include "spdk/bdev.h" 10 #include "spdk/accel.h" 11 #include "spdk/endian.h" 12 #include "spdk/env.h" 13 #include "spdk/event.h" 14 #include "spdk/log.h" 15 #include "spdk/util.h" 16 #include "spdk/thread.h" 17 #include "spdk/string.h" 18 #include "spdk/rpc.h" 19 #include "spdk/bit_array.h" 20 #include "spdk/conf.h" 21 #include "spdk/zipf.h" 22 #include "spdk/histogram_data.h" 23 24 #define BDEVPERF_CONFIG_MAX_FILENAME 1024 25 #define BDEVPERF_CONFIG_UNDEFINED -1 26 #define BDEVPERF_CONFIG_ERROR -2 27 #define PATTERN_TYPES_STR "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap, flush, write_zeroes)" 28 29 struct bdevperf_task { 30 struct iovec iov; 31 struct bdevperf_job *job; 32 struct spdk_bdev_io *bdev_io; 33 void *buf; 34 void *verify_buf; 35 void *md_buf; 36 uint64_t offset_blocks; 37 struct bdevperf_task *task_to_abort; 38 enum spdk_bdev_io_type io_type; 39 TAILQ_ENTRY(bdevperf_task) link; 40 struct spdk_bdev_io_wait_entry bdev_io_wait; 41 }; 42 43 static char *g_workload_type = NULL; 44 static int g_io_size = 0; 45 /* initialize to invalid value so we can detect if user overrides it. */ 46 static int g_rw_percentage = -1; 47 static bool g_verify = false; 48 static bool g_reset = false; 49 static bool g_continue_on_failure = false; 50 static bool g_abort = false; 51 static bool g_error_to_exit = false; 52 static int g_queue_depth = 0; 53 static uint64_t g_time_in_usec; 54 static int g_show_performance_real_time = 0; 55 static uint64_t g_show_performance_period_in_usec = SPDK_SEC_TO_USEC; 56 static uint64_t g_show_performance_period_num = 0; 57 static uint64_t g_show_performance_ema_period = 0; 58 static int g_run_rc = 0; 59 static bool g_shutdown = false; 60 static uint64_t g_start_tsc; 61 static uint64_t g_shutdown_tsc; 62 static bool g_zcopy = false; 63 static struct spdk_thread *g_main_thread; 64 static int g_time_in_sec = 0; 65 static bool g_mix_specified = false; 66 static const char *g_job_bdev_name; 67 static bool g_wait_for_tests = false; 68 static struct spdk_jsonrpc_request *g_request = NULL; 69 static bool g_multithread_mode = false; 70 static int g_timeout_in_sec; 71 static struct spdk_conf *g_bdevperf_conf = NULL; 72 static const char *g_bdevperf_conf_file = NULL; 73 static double g_zipf_theta; 74 static bool g_random_map = false; 75 static bool g_unique_writes = false; 76 77 static struct spdk_cpuset g_all_cpuset; 78 static struct spdk_poller *g_perf_timer = NULL; 79 80 static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task); 81 static void rpc_perform_tests_cb(void); 82 static int bdevperf_parse_arg(int ch, char *arg); 83 static int verify_test_params(void); 84 static void bdevperf_usage(void); 85 86 static uint32_t g_bdev_count = 0; 87 static uint32_t g_latency_display_level; 88 89 static bool g_one_thread_per_lcore = false; 90 91 static const double g_latency_cutoffs[] = { 92 0.01, 93 0.10, 94 0.25, 95 0.50, 96 0.75, 97 0.90, 98 0.95, 99 0.98, 100 0.99, 101 0.995, 102 0.999, 103 0.9999, 104 0.99999, 105 0.999999, 106 0.9999999, 107 -1, 108 }; 109 110 static const char *g_rpc_log_file_name = NULL; 111 static FILE *g_rpc_log_file = NULL; 112 113 struct latency_info { 114 uint64_t min; 115 uint64_t max; 116 uint64_t total; 117 }; 118 119 120 enum job_config_rw { 121 JOB_CONFIG_RW_READ = 0, 122 JOB_CONFIG_RW_WRITE, 123 JOB_CONFIG_RW_RANDREAD, 124 JOB_CONFIG_RW_RANDWRITE, 125 JOB_CONFIG_RW_RW, 126 JOB_CONFIG_RW_RANDRW, 127 JOB_CONFIG_RW_VERIFY, 128 JOB_CONFIG_RW_RESET, 129 JOB_CONFIG_RW_UNMAP, 130 JOB_CONFIG_RW_FLUSH, 131 JOB_CONFIG_RW_WRITE_ZEROES, 132 }; 133 134 struct bdevperf_job { 135 char *name; 136 struct spdk_bdev *bdev; 137 struct spdk_bdev_desc *bdev_desc; 138 struct spdk_io_channel *ch; 139 TAILQ_ENTRY(bdevperf_job) link; 140 struct spdk_thread *thread; 141 142 enum job_config_rw workload_type; 143 int io_size; 144 int rw_percentage; 145 bool is_random; 146 bool verify; 147 bool reset; 148 bool continue_on_failure; 149 bool unmap; 150 bool write_zeroes; 151 bool flush; 152 bool abort; 153 int queue_depth; 154 unsigned int seed; 155 156 uint64_t io_completed; 157 uint64_t io_failed; 158 uint64_t io_timeout; 159 uint64_t prev_io_completed; 160 double ema_io_per_second; 161 int current_queue_depth; 162 uint64_t size_in_ios; 163 uint64_t ios_base; 164 uint64_t offset_in_ios; 165 uint64_t io_size_blocks; 166 uint64_t buf_size; 167 uint32_t dif_check_flags; 168 bool is_draining; 169 struct spdk_poller *run_timer; 170 struct spdk_poller *reset_timer; 171 struct spdk_bit_array *outstanding; 172 struct spdk_zipf *zipf; 173 TAILQ_HEAD(, bdevperf_task) task_list; 174 uint64_t run_time_in_usec; 175 176 /* keep channel's histogram data before being destroyed */ 177 struct spdk_histogram_data *histogram; 178 struct spdk_bit_array *random_map; 179 180 /* counter used for generating unique write data (-U option) */ 181 uint32_t write_io_count; 182 }; 183 184 struct spdk_bdevperf { 185 TAILQ_HEAD(, bdevperf_job) jobs; 186 uint32_t running_jobs; 187 }; 188 189 static struct spdk_bdevperf g_bdevperf = { 190 .jobs = TAILQ_HEAD_INITIALIZER(g_bdevperf.jobs), 191 .running_jobs = 0, 192 }; 193 194 /* Storing values from a section of job config file */ 195 struct job_config { 196 const char *name; 197 const char *filename; 198 struct spdk_cpuset cpumask; 199 int bs; 200 int iodepth; 201 int rwmixread; 202 uint32_t lcore; 203 int64_t offset; 204 uint64_t length; 205 enum job_config_rw rw; 206 TAILQ_ENTRY(job_config) link; 207 }; 208 209 TAILQ_HEAD(, job_config) job_config_list 210 = TAILQ_HEAD_INITIALIZER(job_config_list); 211 212 static bool g_performance_dump_active = false; 213 214 struct bdevperf_aggregate_stats { 215 struct bdevperf_job *current_job; 216 uint64_t io_time_in_usec; 217 uint64_t ema_period; 218 double total_io_per_second; 219 double total_mb_per_second; 220 double total_failed_per_second; 221 double total_timeout_per_second; 222 double min_latency; 223 double max_latency; 224 uint64_t total_io_completed; 225 uint64_t total_tsc; 226 }; 227 228 static struct bdevperf_aggregate_stats g_stats = {.min_latency = (double)UINT64_MAX}; 229 230 struct lcore_thread { 231 struct spdk_thread *thread; 232 uint32_t lcore; 233 TAILQ_ENTRY(lcore_thread) link; 234 }; 235 236 TAILQ_HEAD(, lcore_thread) g_lcore_thread_list 237 = TAILQ_HEAD_INITIALIZER(g_lcore_thread_list); 238 239 240 static char * 241 parse_workload_type(enum job_config_rw ret) 242 { 243 switch (ret) { 244 case JOB_CONFIG_RW_READ: 245 return "read"; 246 case JOB_CONFIG_RW_RANDREAD: 247 return "randread"; 248 case JOB_CONFIG_RW_WRITE: 249 return "write"; 250 case JOB_CONFIG_RW_RANDWRITE: 251 return "randwrite"; 252 case JOB_CONFIG_RW_VERIFY: 253 return "verify"; 254 case JOB_CONFIG_RW_RESET: 255 return "reset"; 256 case JOB_CONFIG_RW_UNMAP: 257 return "unmap"; 258 case JOB_CONFIG_RW_WRITE_ZEROES: 259 return "write_zeroes"; 260 case JOB_CONFIG_RW_FLUSH: 261 return "flush"; 262 case JOB_CONFIG_RW_RW: 263 return "rw"; 264 case JOB_CONFIG_RW_RANDRW: 265 return "randrw"; 266 default: 267 fprintf(stderr, "wrong workload_type code\n"); 268 } 269 270 return NULL; 271 } 272 273 /* 274 * Cumulative Moving Average (CMA): average of all data up to current 275 * Exponential Moving Average (EMA): weighted mean of the previous n data and more weight is given to recent 276 * Simple Moving Average (SMA): unweighted mean of the previous n data 277 * 278 * Bdevperf supports CMA and EMA. 279 */ 280 static double 281 get_cma_io_per_second(struct bdevperf_job *job, uint64_t io_time_in_usec) 282 { 283 return (double)job->io_completed * SPDK_SEC_TO_USEC / io_time_in_usec; 284 } 285 286 static double 287 get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period) 288 { 289 double io_completed, io_per_second; 290 291 io_completed = job->io_completed; 292 io_per_second = (double)(io_completed - job->prev_io_completed) * SPDK_SEC_TO_USEC 293 / g_show_performance_period_in_usec; 294 job->prev_io_completed = io_completed; 295 296 job->ema_io_per_second += (io_per_second - job->ema_io_per_second) * 2 297 / (ema_period + 1); 298 return job->ema_io_per_second; 299 } 300 301 static void 302 get_avg_latency(void *ctx, uint64_t start, uint64_t end, uint64_t count, 303 uint64_t total, uint64_t so_far) 304 { 305 struct latency_info *latency_info = ctx; 306 307 if (count == 0) { 308 return; 309 } 310 311 latency_info->total += (start + end) / 2 * count; 312 313 if (so_far == count) { 314 latency_info->min = start; 315 } 316 317 if (so_far == total) { 318 latency_info->max = end; 319 } 320 } 321 322 static void 323 performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job) 324 { 325 double io_per_second, mb_per_second, failed_per_second, timeout_per_second; 326 double average_latency = 0.0, min_latency, max_latency; 327 uint64_t time_in_usec; 328 uint64_t tsc_rate; 329 uint64_t total_io; 330 struct latency_info latency_info = {}; 331 332 if (job->workload_type == JOB_CONFIG_RW_RW || job->workload_type == JOB_CONFIG_RW_RANDRW) { 333 printf("\r Job: %s (Core Mask 0x%s, workload: %s, percentage: %d, depth: %d, IO size: %d)\n", 334 job->name, spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)), 335 parse_workload_type(job->workload_type), job->rw_percentage, 336 job->queue_depth, job->io_size); 337 } else { 338 printf("\r Job: %s (Core Mask 0x%s, workload: %s, depth: %d, IO size: %d)\n", 339 job->name, spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)), 340 parse_workload_type(job->workload_type), job->queue_depth, job->io_size); 341 } 342 343 344 if (job->io_failed > 0 && !job->reset && !job->continue_on_failure) { 345 printf("\r Job: %s ended in about %.2f seconds with error\n", 346 job->name, (double)job->run_time_in_usec / SPDK_SEC_TO_USEC); 347 } 348 if (job->verify) { 349 printf("\t Verification LBA range: start 0x%" PRIx64 " length 0x%" PRIx64 "\n", 350 job->ios_base, job->size_in_ios); 351 } 352 353 if (g_performance_dump_active == true) { 354 /* Use job's actual run time as Job has ended */ 355 if (job->io_failed > 0 && !job->continue_on_failure) { 356 time_in_usec = job->run_time_in_usec; 357 } else { 358 time_in_usec = stats->io_time_in_usec; 359 } 360 } else { 361 time_in_usec = job->run_time_in_usec; 362 } 363 364 if (stats->ema_period == 0) { 365 io_per_second = get_cma_io_per_second(job, time_in_usec); 366 } else { 367 io_per_second = get_ema_io_per_second(job, stats->ema_period); 368 } 369 370 tsc_rate = spdk_get_ticks_hz(); 371 mb_per_second = io_per_second * job->io_size / (1024 * 1024); 372 373 spdk_histogram_data_iterate(job->histogram, get_avg_latency, &latency_info); 374 375 total_io = job->io_completed + job->io_failed; 376 if (total_io != 0) { 377 average_latency = (double)latency_info.total / total_io * SPDK_SEC_TO_USEC / tsc_rate; 378 } 379 min_latency = (double)latency_info.min * SPDK_SEC_TO_USEC / tsc_rate; 380 max_latency = (double)latency_info.max * SPDK_SEC_TO_USEC / tsc_rate; 381 382 failed_per_second = (double)job->io_failed * SPDK_SEC_TO_USEC / time_in_usec; 383 timeout_per_second = (double)job->io_timeout * SPDK_SEC_TO_USEC / time_in_usec; 384 385 printf("\t %-20s: %10.2f %10.2f %10.2f", 386 job->name, (float)time_in_usec / SPDK_SEC_TO_USEC, io_per_second, mb_per_second); 387 printf(" %10.2f %8.2f", 388 failed_per_second, timeout_per_second); 389 printf(" %10.2f %10.2f %10.2f\n", 390 average_latency, min_latency, max_latency); 391 392 stats->total_io_per_second += io_per_second; 393 stats->total_mb_per_second += mb_per_second; 394 stats->total_failed_per_second += failed_per_second; 395 stats->total_timeout_per_second += timeout_per_second; 396 stats->total_io_completed += job->io_completed + job->io_failed; 397 stats->total_tsc += latency_info.total; 398 if (min_latency < stats->min_latency) { 399 stats->min_latency = min_latency; 400 } 401 if (max_latency > stats->max_latency) { 402 stats->max_latency = max_latency; 403 } 404 } 405 406 static void 407 generate_data(struct bdevperf_job *job, void *buf, void *md_buf, bool unique) 408 { 409 int offset_blocks = 0, md_offset, data_block_size, inner_offset; 410 int buf_len = job->buf_size; 411 int block_size = spdk_bdev_get_block_size(job->bdev); 412 int md_size = spdk_bdev_get_md_size(job->bdev); 413 int num_blocks = job->io_size_blocks; 414 415 if (buf_len < num_blocks * block_size) { 416 return; 417 } 418 419 if (md_buf == NULL) { 420 data_block_size = block_size - md_size; 421 md_buf = (char *)buf + data_block_size; 422 md_offset = block_size; 423 } else { 424 data_block_size = block_size; 425 md_offset = md_size; 426 } 427 428 if (unique) { 429 uint64_t io_count = job->write_io_count++; 430 unsigned int i; 431 432 assert(md_size == 0 || md_size >= (int)sizeof(uint64_t)); 433 434 while (offset_blocks < num_blocks) { 435 inner_offset = 0; 436 while (inner_offset < data_block_size) { 437 *(uint64_t *)buf = (io_count << 32) | (offset_blocks + inner_offset); 438 inner_offset += sizeof(uint64_t); 439 buf += sizeof(uint64_t); 440 } 441 for (i = 0; i < md_size / sizeof(uint64_t); i++) { 442 ((uint64_t *)md_buf)[i] = (io_count << 32) | offset_blocks; 443 } 444 md_buf += md_offset; 445 offset_blocks++; 446 } 447 return; 448 } 449 450 while (offset_blocks < num_blocks) { 451 inner_offset = 0; 452 while (inner_offset < data_block_size) { 453 *(uint32_t *)buf = offset_blocks + inner_offset; 454 inner_offset += sizeof(uint32_t); 455 buf += sizeof(uint32_t); 456 } 457 memset(md_buf, offset_blocks, md_size); 458 md_buf += md_offset; 459 offset_blocks++; 460 } 461 } 462 463 static bool 464 copy_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, 465 void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks) 466 { 467 if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { 468 return false; 469 } 470 471 assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); 472 473 memcpy(wr_buf, rd_buf, block_size * num_blocks); 474 475 if (wr_md_buf != NULL) { 476 memcpy(wr_md_buf, rd_md_buf, md_size * num_blocks); 477 } 478 479 return true; 480 } 481 482 static bool 483 verify_data(void *wr_buf, int wr_buf_len, void *rd_buf, int rd_buf_len, int block_size, 484 void *wr_md_buf, void *rd_md_buf, int md_size, int num_blocks, bool md_check) 485 { 486 int offset_blocks = 0, md_offset, data_block_size; 487 488 if (wr_buf_len < num_blocks * block_size || rd_buf_len < num_blocks * block_size) { 489 return false; 490 } 491 492 assert((wr_md_buf != NULL) == (rd_md_buf != NULL)); 493 494 if (wr_md_buf == NULL) { 495 data_block_size = block_size - md_size; 496 wr_md_buf = (char *)wr_buf + data_block_size; 497 rd_md_buf = (char *)rd_buf + data_block_size; 498 md_offset = block_size; 499 } else { 500 data_block_size = block_size; 501 md_offset = md_size; 502 } 503 504 while (offset_blocks < num_blocks) { 505 if (memcmp(wr_buf, rd_buf, data_block_size) != 0) { 506 printf("data_block_size %d, num_blocks %d, offset %d\n", data_block_size, num_blocks, 507 offset_blocks); 508 spdk_log_dump(stdout, "rd_buf", rd_buf, data_block_size); 509 spdk_log_dump(stdout, "wr_buf", wr_buf, data_block_size); 510 return false; 511 } 512 513 wr_buf += block_size; 514 rd_buf += block_size; 515 516 if (md_check) { 517 if (memcmp(wr_md_buf, rd_md_buf, md_size) != 0) { 518 printf("md_size %d, num_blocks %d, offset %d\n", md_size, num_blocks, offset_blocks); 519 spdk_log_dump(stdout, "rd_md_buf", rd_md_buf, md_size); 520 spdk_log_dump(stdout, "wr_md_buf", wr_md_buf, md_size); 521 return false; 522 } 523 524 wr_md_buf += md_offset; 525 rd_md_buf += md_offset; 526 } 527 528 offset_blocks++; 529 } 530 531 return true; 532 } 533 534 static void 535 free_job_config(void) 536 { 537 struct job_config *config, *tmp; 538 539 spdk_conf_free(g_bdevperf_conf); 540 g_bdevperf_conf = NULL; 541 542 TAILQ_FOREACH_SAFE(config, &job_config_list, link, tmp) { 543 TAILQ_REMOVE(&job_config_list, config, link); 544 free(config); 545 } 546 } 547 548 static void 549 bdevperf_job_free(struct bdevperf_job *job) 550 { 551 spdk_histogram_data_free(job->histogram); 552 spdk_bit_array_free(&job->outstanding); 553 spdk_bit_array_free(&job->random_map); 554 spdk_zipf_free(&job->zipf); 555 free(job->name); 556 free(job); 557 } 558 559 static void 560 job_thread_exit(void *ctx) 561 { 562 spdk_thread_exit(spdk_get_thread()); 563 } 564 565 static void 566 check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count, 567 uint64_t total, uint64_t so_far) 568 { 569 double so_far_pct; 570 double **cutoff = ctx; 571 uint64_t tsc_rate; 572 573 if (count == 0) { 574 return; 575 } 576 577 tsc_rate = spdk_get_ticks_hz(); 578 so_far_pct = (double)so_far / total; 579 while (so_far_pct >= **cutoff && **cutoff > 0) { 580 printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * SPDK_SEC_TO_USEC / tsc_rate); 581 (*cutoff)++; 582 } 583 } 584 585 static void 586 print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count, 587 uint64_t total, uint64_t so_far) 588 { 589 double so_far_pct; 590 uint64_t tsc_rate; 591 592 if (count == 0) { 593 return; 594 } 595 596 tsc_rate = spdk_get_ticks_hz(); 597 so_far_pct = (double)so_far * 100 / total; 598 printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n", 599 (double)start * SPDK_SEC_TO_USEC / tsc_rate, 600 (double)end * SPDK_SEC_TO_USEC / tsc_rate, 601 so_far_pct, count); 602 } 603 604 static void 605 bdevperf_test_done(void *ctx) 606 { 607 struct bdevperf_job *job, *jtmp; 608 struct bdevperf_task *task, *ttmp; 609 struct lcore_thread *lthread, *lttmp; 610 double average_latency = 0.0; 611 uint64_t time_in_usec; 612 int rc; 613 614 if (g_time_in_usec) { 615 g_stats.io_time_in_usec = g_time_in_usec; 616 617 if (!g_run_rc && g_performance_dump_active) { 618 spdk_thread_send_msg(spdk_get_thread(), bdevperf_test_done, NULL); 619 return; 620 } 621 } 622 623 if (g_show_performance_real_time) { 624 spdk_poller_unregister(&g_perf_timer); 625 } 626 627 if (g_shutdown) { 628 g_shutdown_tsc = spdk_get_ticks() - g_start_tsc; 629 time_in_usec = g_shutdown_tsc * SPDK_SEC_TO_USEC / spdk_get_ticks_hz(); 630 g_time_in_usec = (g_time_in_usec > time_in_usec) ? time_in_usec : g_time_in_usec; 631 printf("Received shutdown signal, test time was about %.6f seconds\n", 632 (double)g_time_in_usec / SPDK_SEC_TO_USEC); 633 } 634 635 printf("\n%*s\n", 107, "Latency(us)"); 636 printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n", 637 28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max"); 638 639 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { 640 performance_dump_job(&g_stats, job); 641 } 642 643 printf("\r ==================================================================================" 644 "=================================\n"); 645 printf("\r %-28s: %10s %10.2f %10.2f", 646 "Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second); 647 printf(" %10.2f %8.2f", 648 g_stats.total_failed_per_second, g_stats.total_timeout_per_second); 649 650 if (g_stats.total_io_completed != 0) { 651 average_latency = ((double)g_stats.total_tsc / g_stats.total_io_completed) * SPDK_SEC_TO_USEC / 652 spdk_get_ticks_hz(); 653 } 654 printf(" %10.2f %10.2f %10.2f\n", average_latency, g_stats.min_latency, g_stats.max_latency); 655 656 if (g_latency_display_level == 0 || g_stats.total_io_completed == 0) { 657 goto clean; 658 } 659 660 printf("\n Latency summary\n"); 661 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { 662 printf("\r =============================================\n"); 663 printf("\r Job: %s (Core Mask 0x%s)\n", job->name, 664 spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread))); 665 666 const double *cutoff = g_latency_cutoffs; 667 668 spdk_histogram_data_iterate(job->histogram, check_cutoff, &cutoff); 669 670 printf("\n"); 671 } 672 673 if (g_latency_display_level == 1) { 674 goto clean; 675 } 676 677 printf("\r Latency histogram\n"); 678 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { 679 printf("\r =============================================\n"); 680 printf("\r Job: %s (Core Mask 0x%s)\n", job->name, 681 spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread))); 682 683 spdk_histogram_data_iterate(job->histogram, print_bucket, NULL); 684 printf("\n"); 685 } 686 687 clean: 688 fflush(stdout); 689 690 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) { 691 TAILQ_REMOVE(&g_bdevperf.jobs, job, link); 692 693 if (!g_one_thread_per_lcore) { 694 spdk_thread_send_msg(job->thread, job_thread_exit, NULL); 695 } 696 697 TAILQ_FOREACH_SAFE(task, &job->task_list, link, ttmp) { 698 TAILQ_REMOVE(&job->task_list, task, link); 699 spdk_free(task->buf); 700 spdk_free(task->verify_buf); 701 spdk_free(task->md_buf); 702 free(task); 703 } 704 705 bdevperf_job_free(job); 706 } 707 708 if (g_one_thread_per_lcore) { 709 TAILQ_FOREACH_SAFE(lthread, &g_lcore_thread_list, link, lttmp) { 710 TAILQ_REMOVE(&g_lcore_thread_list, lthread, link); 711 spdk_thread_send_msg(lthread->thread, job_thread_exit, NULL); 712 free(lthread); 713 } 714 } 715 716 if (g_bdevperf_conf == NULL) { 717 free_job_config(); 718 } 719 720 rc = g_run_rc; 721 if (g_request && !g_shutdown) { 722 rpc_perform_tests_cb(); 723 if (rc != 0) { 724 spdk_app_stop(rc); 725 } 726 } else { 727 spdk_app_stop(rc); 728 } 729 } 730 731 static void 732 bdevperf_job_end(void *ctx) 733 { 734 assert(g_main_thread == spdk_get_thread()); 735 736 if (--g_bdevperf.running_jobs == 0) { 737 bdevperf_test_done(NULL); 738 } 739 } 740 741 static void 742 bdevperf_channel_get_histogram_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram) 743 { 744 struct spdk_histogram_data *job_hist = cb_arg; 745 746 if (status == 0) { 747 spdk_histogram_data_merge(job_hist, histogram); 748 } 749 } 750 751 static void 752 bdevperf_job_empty(struct bdevperf_job *job) 753 { 754 uint64_t end_tsc = 0; 755 756 end_tsc = spdk_get_ticks() - g_start_tsc; 757 job->run_time_in_usec = end_tsc * SPDK_SEC_TO_USEC / spdk_get_ticks_hz(); 758 /* keep histogram info before channel is destroyed */ 759 spdk_bdev_channel_get_histogram(job->ch, bdevperf_channel_get_histogram_cb, 760 job->histogram); 761 spdk_put_io_channel(job->ch); 762 spdk_bdev_close(job->bdev_desc); 763 spdk_thread_send_msg(g_main_thread, bdevperf_job_end, NULL); 764 } 765 766 static void 767 bdevperf_end_task(struct bdevperf_task *task) 768 { 769 struct bdevperf_job *job = task->job; 770 771 TAILQ_INSERT_TAIL(&job->task_list, task, link); 772 if (job->is_draining) { 773 if (job->current_queue_depth == 0) { 774 bdevperf_job_empty(job); 775 } 776 } 777 } 778 779 static void 780 bdevperf_queue_io_wait_with_cb(struct bdevperf_task *task, spdk_bdev_io_wait_cb cb_fn) 781 { 782 struct bdevperf_job *job = task->job; 783 784 task->bdev_io_wait.bdev = job->bdev; 785 task->bdev_io_wait.cb_fn = cb_fn; 786 task->bdev_io_wait.cb_arg = task; 787 spdk_bdev_queue_io_wait(job->bdev, job->ch, &task->bdev_io_wait); 788 } 789 790 static int 791 bdevperf_job_drain(void *ctx) 792 { 793 struct bdevperf_job *job = ctx; 794 795 spdk_poller_unregister(&job->run_timer); 796 if (job->reset) { 797 spdk_poller_unregister(&job->reset_timer); 798 } 799 800 job->is_draining = true; 801 802 return -1; 803 } 804 805 static int 806 bdevperf_job_drain_timer(void *ctx) 807 { 808 struct bdevperf_job *job = ctx; 809 810 bdevperf_job_drain(ctx); 811 if (job->current_queue_depth == 0) { 812 bdevperf_job_empty(job); 813 } 814 815 return SPDK_POLLER_BUSY; 816 } 817 818 static void 819 bdevperf_abort_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 820 { 821 struct bdevperf_task *task = cb_arg; 822 struct bdevperf_job *job = task->job; 823 824 job->current_queue_depth--; 825 826 if (success) { 827 job->io_completed++; 828 } else { 829 job->io_failed++; 830 if (!job->continue_on_failure) { 831 bdevperf_job_drain(job); 832 g_run_rc = -1; 833 } 834 } 835 836 spdk_bdev_free_io(bdev_io); 837 bdevperf_end_task(task); 838 } 839 840 static int 841 bdevperf_verify_dif(struct bdevperf_task *task) 842 { 843 struct bdevperf_job *job = task->job; 844 struct spdk_bdev *bdev = job->bdev; 845 struct spdk_dif_ctx dif_ctx; 846 struct spdk_dif_error err_blk = {}; 847 int rc; 848 struct spdk_dif_ctx_init_ext_opts dif_opts; 849 850 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 851 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 852 rc = spdk_dif_ctx_init(&dif_ctx, 853 spdk_bdev_get_block_size(bdev), 854 spdk_bdev_get_md_size(bdev), 855 spdk_bdev_is_md_interleaved(bdev), 856 spdk_bdev_is_dif_head_of_md(bdev), 857 spdk_bdev_get_dif_type(bdev), 858 job->dif_check_flags, 859 task->offset_blocks, 0, 0, 0, 0, &dif_opts); 860 if (rc != 0) { 861 fprintf(stderr, "Initialization of DIF context failed\n"); 862 return rc; 863 } 864 865 if (spdk_bdev_is_md_interleaved(bdev)) { 866 rc = spdk_dif_verify(&task->iov, 1, job->io_size_blocks, &dif_ctx, &err_blk); 867 } else { 868 struct iovec md_iov = { 869 .iov_base = task->md_buf, 870 .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks, 871 }; 872 873 rc = spdk_dix_verify(&task->iov, 1, &md_iov, job->io_size_blocks, &dif_ctx, &err_blk); 874 } 875 876 if (rc != 0) { 877 fprintf(stderr, "DIF/DIX error detected. type=%d, offset=%" PRIu32 "\n", 878 err_blk.err_type, err_blk.err_offset); 879 } 880 881 return rc; 882 } 883 884 static void 885 bdevperf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 886 { 887 struct bdevperf_job *job; 888 struct bdevperf_task *task = cb_arg; 889 bool md_check; 890 uint64_t offset_in_ios; 891 int rc; 892 893 job = task->job; 894 md_check = spdk_bdev_get_dif_type(job->bdev) == SPDK_DIF_DISABLE; 895 896 if (g_error_to_exit == true) { 897 bdevperf_job_drain(job); 898 } else if (!success) { 899 if (!job->reset && !job->continue_on_failure) { 900 bdevperf_job_drain(job); 901 g_run_rc = -1; 902 g_error_to_exit = true; 903 printf("task offset: %" PRIu64 " on job bdev=%s fails\n", 904 task->offset_blocks, job->name); 905 } 906 } else if (job->verify || job->reset) { 907 if (!verify_data(task->buf, job->buf_size, 908 task->iov.iov_base, job->buf_size, 909 spdk_bdev_get_block_size(job->bdev), 910 task->md_buf, spdk_bdev_io_get_md_buf(bdev_io), 911 spdk_bdev_get_md_size(job->bdev), 912 job->io_size_blocks, md_check)) { 913 printf("Buffer mismatch! Target: %s Disk Offset: %" PRIu64 "\n", job->name, task->offset_blocks); 914 bdevperf_job_drain(job); 915 g_run_rc = -1; 916 } 917 } else if (job->dif_check_flags != 0) { 918 if (task->io_type == SPDK_BDEV_IO_TYPE_READ && spdk_bdev_get_md_size(job->bdev) != 0) { 919 rc = bdevperf_verify_dif(task); 920 if (rc != 0) { 921 printf("DIF error detected. task offset: %" PRIu64 " on job bdev=%s\n", 922 task->offset_blocks, job->name); 923 924 success = false; 925 if (!job->reset && !job->continue_on_failure) { 926 bdevperf_job_drain(job); 927 g_run_rc = -1; 928 g_error_to_exit = true; 929 } 930 } 931 } 932 } 933 934 job->current_queue_depth--; 935 936 if (success) { 937 job->io_completed++; 938 } else { 939 job->io_failed++; 940 } 941 942 if (job->verify) { 943 assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); 944 offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; 945 946 assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); 947 spdk_bit_array_clear(job->outstanding, offset_in_ios); 948 } 949 950 spdk_bdev_free_io(bdev_io); 951 952 /* 953 * is_draining indicates when time has expired for the test run 954 * and we are just waiting for the previously submitted I/O 955 * to complete. In this case, do not submit a new I/O to replace 956 * the one just completed. 957 */ 958 if (!job->is_draining) { 959 bdevperf_submit_single(job, task); 960 } else { 961 bdevperf_end_task(task); 962 } 963 } 964 965 static inline void 966 bdevperf_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, void *md_buf, 967 uint32_t dif_check_flags_exclude_mask) 968 { 969 memset(opts, 0, sizeof(*opts)); 970 opts->size = sizeof(*opts); 971 opts->metadata = md_buf; 972 opts->dif_check_flags_exclude_mask = dif_check_flags_exclude_mask; 973 } 974 975 static void 976 bdevperf_verify_submit_read(void *cb_arg) 977 { 978 struct bdevperf_job *job; 979 struct bdevperf_task *task = cb_arg; 980 struct spdk_bdev_ext_io_opts opts; 981 int rc; 982 983 job = task->job; 984 985 task->iov.iov_base = task->verify_buf; 986 task->iov.iov_len = job->buf_size; 987 bdevperf_init_ext_io_opts(&opts, NULL, ~job->dif_check_flags); 988 989 /* Read the data back in */ 990 rc = spdk_bdev_readv_blocks_ext(job->bdev_desc, job->ch, &task->iov, 1, 991 task->offset_blocks, job->io_size_blocks, 992 bdevperf_complete, task, &opts); 993 994 if (rc == -ENOMEM) { 995 bdevperf_queue_io_wait_with_cb(task, bdevperf_verify_submit_read); 996 } else if (rc != 0) { 997 printf("Failed to submit read: %d\n", rc); 998 bdevperf_job_drain(job); 999 g_run_rc = rc; 1000 } 1001 } 1002 1003 static void 1004 bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success, 1005 void *cb_arg) 1006 { 1007 if (success) { 1008 spdk_bdev_free_io(bdev_io); 1009 bdevperf_verify_submit_read(cb_arg); 1010 } else { 1011 bdevperf_complete(bdev_io, success, cb_arg); 1012 } 1013 } 1014 1015 static void 1016 bdevperf_zcopy_populate_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1017 { 1018 if (!success) { 1019 bdevperf_complete(bdev_io, success, cb_arg); 1020 return; 1021 } 1022 1023 spdk_bdev_zcopy_end(bdev_io, false, bdevperf_complete, cb_arg); 1024 } 1025 1026 static int 1027 bdevperf_generate_dif(struct bdevperf_task *task) 1028 { 1029 struct bdevperf_job *job = task->job; 1030 struct spdk_bdev *bdev = job->bdev; 1031 struct spdk_dif_ctx dif_ctx; 1032 int rc; 1033 struct spdk_dif_ctx_init_ext_opts dif_opts; 1034 1035 dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format); 1036 dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16; 1037 rc = spdk_dif_ctx_init(&dif_ctx, 1038 spdk_bdev_get_block_size(bdev), 1039 spdk_bdev_get_md_size(bdev), 1040 spdk_bdev_is_md_interleaved(bdev), 1041 spdk_bdev_is_dif_head_of_md(bdev), 1042 spdk_bdev_get_dif_type(bdev), 1043 job->dif_check_flags, 1044 task->offset_blocks, 0, 0, 0, 0, &dif_opts); 1045 if (rc != 0) { 1046 fprintf(stderr, "Initialization of DIF context failed\n"); 1047 return rc; 1048 } 1049 1050 if (spdk_bdev_is_md_interleaved(bdev)) { 1051 rc = spdk_dif_generate(&task->iov, 1, job->io_size_blocks, &dif_ctx); 1052 } else { 1053 struct iovec md_iov = { 1054 .iov_base = task->md_buf, 1055 .iov_len = spdk_bdev_get_md_size(bdev) * job->io_size_blocks, 1056 }; 1057 1058 rc = spdk_dix_generate(&task->iov, 1, &md_iov, job->io_size_blocks, &dif_ctx); 1059 } 1060 1061 if (rc != 0) { 1062 fprintf(stderr, "Generation of DIF/DIX failed\n"); 1063 } 1064 1065 return rc; 1066 } 1067 1068 static void 1069 bdevperf_submit_task(void *arg) 1070 { 1071 struct bdevperf_task *task = arg; 1072 struct bdevperf_job *job = task->job; 1073 struct spdk_bdev_desc *desc; 1074 struct spdk_io_channel *ch; 1075 spdk_bdev_io_completion_cb cb_fn; 1076 uint64_t offset_in_ios; 1077 struct spdk_bdev_ext_io_opts opts; 1078 int rc = 0; 1079 1080 desc = job->bdev_desc; 1081 ch = job->ch; 1082 1083 switch (task->io_type) { 1084 case SPDK_BDEV_IO_TYPE_WRITE: 1085 if (spdk_bdev_get_md_size(job->bdev) != 0 && job->dif_check_flags != 0) { 1086 rc = bdevperf_generate_dif(task); 1087 } 1088 if (rc == 0) { 1089 cb_fn = (job->verify || job->reset) ? bdevperf_verify_write_complete : bdevperf_complete; 1090 1091 if (g_zcopy) { 1092 spdk_bdev_zcopy_end(task->bdev_io, true, cb_fn, task); 1093 return; 1094 } else { 1095 bdevperf_init_ext_io_opts(&opts, task->md_buf, ~job->dif_check_flags); 1096 rc = spdk_bdev_writev_blocks_ext(desc, ch, &task->iov, 1, 1097 task->offset_blocks, 1098 job->io_size_blocks, 1099 cb_fn, task, &opts); 1100 } 1101 } 1102 break; 1103 case SPDK_BDEV_IO_TYPE_FLUSH: 1104 rc = spdk_bdev_flush_blocks(desc, ch, task->offset_blocks, 1105 job->io_size_blocks, bdevperf_complete, task); 1106 break; 1107 case SPDK_BDEV_IO_TYPE_UNMAP: 1108 rc = spdk_bdev_unmap_blocks(desc, ch, task->offset_blocks, 1109 job->io_size_blocks, bdevperf_complete, task); 1110 break; 1111 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1112 rc = spdk_bdev_write_zeroes_blocks(desc, ch, task->offset_blocks, 1113 job->io_size_blocks, bdevperf_complete, task); 1114 break; 1115 case SPDK_BDEV_IO_TYPE_READ: 1116 if (g_zcopy) { 1117 rc = spdk_bdev_zcopy_start(desc, ch, NULL, 0, task->offset_blocks, job->io_size_blocks, 1118 true, bdevperf_zcopy_populate_complete, task); 1119 } else { 1120 bdevperf_init_ext_io_opts(&opts, task->md_buf, ~job->dif_check_flags); 1121 rc = spdk_bdev_readv_blocks_ext(desc, ch, &task->iov, 1, 1122 task->offset_blocks, 1123 job->io_size_blocks, 1124 bdevperf_complete, task, &opts); 1125 } 1126 break; 1127 case SPDK_BDEV_IO_TYPE_ABORT: 1128 rc = spdk_bdev_abort(desc, ch, task->task_to_abort, bdevperf_abort_complete, task); 1129 break; 1130 default: 1131 assert(false); 1132 rc = -EINVAL; 1133 break; 1134 } 1135 1136 if (rc == -ENOMEM) { 1137 bdevperf_queue_io_wait_with_cb(task, bdevperf_submit_task); 1138 return; 1139 } else if (rc != 0) { 1140 printf("Failed to submit bdev_io: %d\n", rc); 1141 if (job->verify) { 1142 assert(task->offset_blocks / job->io_size_blocks >= job->ios_base); 1143 offset_in_ios = task->offset_blocks / job->io_size_blocks - job->ios_base; 1144 1145 assert(spdk_bit_array_get(job->outstanding, offset_in_ios) == true); 1146 spdk_bit_array_clear(job->outstanding, offset_in_ios); 1147 } 1148 bdevperf_job_drain(job); 1149 g_run_rc = rc; 1150 return; 1151 } 1152 1153 job->current_queue_depth++; 1154 } 1155 1156 static void 1157 bdevperf_zcopy_get_buf_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1158 { 1159 struct bdevperf_task *task = cb_arg; 1160 struct bdevperf_job *job = task->job; 1161 struct iovec *iovs; 1162 int iovcnt; 1163 1164 if (!success) { 1165 bdevperf_job_drain(job); 1166 g_run_rc = -1; 1167 return; 1168 } 1169 1170 task->bdev_io = bdev_io; 1171 task->io_type = SPDK_BDEV_IO_TYPE_WRITE; 1172 1173 if (job->verify || job->reset) { 1174 /* When job->verify or job->reset is enabled, task->buf is used for 1175 * verification of read after write. For write I/O, when zcopy APIs 1176 * are used, task->buf cannot be used, and data must be written to 1177 * the data buffer allocated underneath bdev layer instead. 1178 * Hence we copy task->buf to the allocated data buffer here. 1179 */ 1180 spdk_bdev_io_get_iovec(bdev_io, &iovs, &iovcnt); 1181 assert(iovcnt == 1); 1182 assert(iovs != NULL); 1183 1184 copy_data(iovs[0].iov_base, iovs[0].iov_len, task->buf, job->buf_size, 1185 spdk_bdev_get_block_size(job->bdev), 1186 spdk_bdev_io_get_md_buf(bdev_io), task->md_buf, 1187 spdk_bdev_get_md_size(job->bdev), job->io_size_blocks); 1188 } 1189 1190 bdevperf_submit_task(task); 1191 } 1192 1193 static void 1194 bdevperf_prep_zcopy_write_task(void *arg) 1195 { 1196 struct bdevperf_task *task = arg; 1197 struct bdevperf_job *job = task->job; 1198 int rc; 1199 1200 rc = spdk_bdev_zcopy_start(job->bdev_desc, job->ch, NULL, 0, 1201 task->offset_blocks, job->io_size_blocks, 1202 false, bdevperf_zcopy_get_buf_complete, task); 1203 if (rc != 0) { 1204 assert(rc == -ENOMEM); 1205 bdevperf_queue_io_wait_with_cb(task, bdevperf_prep_zcopy_write_task); 1206 return; 1207 } 1208 1209 job->current_queue_depth++; 1210 } 1211 1212 static struct bdevperf_task * 1213 bdevperf_job_get_task(struct bdevperf_job *job) 1214 { 1215 struct bdevperf_task *task; 1216 1217 task = TAILQ_FIRST(&job->task_list); 1218 if (!task) { 1219 printf("Task allocation failed\n"); 1220 abort(); 1221 } 1222 1223 TAILQ_REMOVE(&job->task_list, task, link); 1224 return task; 1225 } 1226 1227 static void 1228 bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task) 1229 { 1230 uint64_t offset_in_ios; 1231 uint64_t rand_value; 1232 uint32_t first_clear; 1233 1234 if (job->zipf) { 1235 offset_in_ios = spdk_zipf_generate(job->zipf); 1236 } else if (job->is_random) { 1237 /* RAND_MAX is only INT32_MAX, so use 2 calls to rand_r to 1238 * get a large enough value to ensure we are issuing I/O 1239 * uniformly across the whole bdev. 1240 */ 1241 rand_value = (uint64_t)rand_r(&job->seed) * RAND_MAX + rand_r(&job->seed); 1242 offset_in_ios = rand_value % job->size_in_ios; 1243 1244 if (g_random_map) { 1245 /* Make sure, that the offset does not exceed the maximum size 1246 * of the bit array (verified during job creation) 1247 */ 1248 assert(offset_in_ios < UINT32_MAX); 1249 1250 first_clear = spdk_bit_array_find_first_clear(job->random_map, (uint32_t)offset_in_ios); 1251 1252 if (first_clear == UINT32_MAX) { 1253 first_clear = spdk_bit_array_find_first_clear(job->random_map, 0); 1254 1255 if (first_clear == UINT32_MAX) { 1256 /* If there are no more clear bits in the array, we start over 1257 * and select the previously selected random value. 1258 */ 1259 spdk_bit_array_clear_mask(job->random_map); 1260 first_clear = (uint32_t)offset_in_ios; 1261 } 1262 } 1263 1264 spdk_bit_array_set(job->random_map, first_clear); 1265 1266 offset_in_ios = first_clear; 1267 } 1268 } else { 1269 offset_in_ios = job->offset_in_ios++; 1270 if (job->offset_in_ios == job->size_in_ios) { 1271 job->offset_in_ios = 0; 1272 } 1273 1274 /* Increment of offset_in_ios if there's already an outstanding IO 1275 * to that location. We only need this with job->verify as random 1276 * offsets are not supported with job->verify at this time. 1277 */ 1278 if (job->verify) { 1279 assert(spdk_bit_array_find_first_clear(job->outstanding, 0) != UINT32_MAX); 1280 1281 while (spdk_bit_array_get(job->outstanding, offset_in_ios)) { 1282 offset_in_ios = job->offset_in_ios++; 1283 if (job->offset_in_ios == job->size_in_ios) { 1284 job->offset_in_ios = 0; 1285 } 1286 } 1287 spdk_bit_array_set(job->outstanding, offset_in_ios); 1288 } 1289 } 1290 1291 /* For multi-thread to same job, offset_in_ios is relative 1292 * to the LBA range assigned for that job. job->offset_blocks 1293 * is absolute (entire bdev LBA range). 1294 */ 1295 task->offset_blocks = (offset_in_ios + job->ios_base) * job->io_size_blocks; 1296 1297 if (job->flush) { 1298 task->io_type = SPDK_BDEV_IO_TYPE_FLUSH; 1299 } else if (job->unmap) { 1300 task->io_type = SPDK_BDEV_IO_TYPE_UNMAP; 1301 } else if (job->write_zeroes) { 1302 task->io_type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 1303 } else if ((job->rw_percentage == 100) || 1304 (job->rw_percentage != 0 && ((rand_r(&job->seed) % 100) < job->rw_percentage))) { 1305 assert(!job->verify); 1306 task->io_type = SPDK_BDEV_IO_TYPE_READ; 1307 if (!g_zcopy) { 1308 task->iov.iov_base = task->buf; 1309 task->iov.iov_len = job->buf_size; 1310 } 1311 } else { 1312 if (job->verify || job->reset || g_unique_writes) { 1313 generate_data(job, task->buf, task->md_buf, g_unique_writes); 1314 } 1315 if (g_zcopy) { 1316 bdevperf_prep_zcopy_write_task(task); 1317 return; 1318 } else { 1319 task->iov.iov_base = task->buf; 1320 task->iov.iov_len = job->buf_size; 1321 task->io_type = SPDK_BDEV_IO_TYPE_WRITE; 1322 } 1323 } 1324 1325 bdevperf_submit_task(task); 1326 } 1327 1328 static int reset_job(void *arg); 1329 1330 static void 1331 reset_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1332 { 1333 struct bdevperf_task *task = cb_arg; 1334 struct bdevperf_job *job = task->job; 1335 1336 if (!success) { 1337 printf("Reset blockdev=%s failed\n", spdk_bdev_get_name(job->bdev)); 1338 bdevperf_job_drain(job); 1339 g_run_rc = -1; 1340 } 1341 1342 TAILQ_INSERT_TAIL(&job->task_list, task, link); 1343 spdk_bdev_free_io(bdev_io); 1344 1345 job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, 1346 10 * SPDK_SEC_TO_USEC); 1347 } 1348 1349 static int 1350 reset_job(void *arg) 1351 { 1352 struct bdevperf_job *job = arg; 1353 struct bdevperf_task *task; 1354 int rc; 1355 1356 spdk_poller_unregister(&job->reset_timer); 1357 1358 /* Do reset. */ 1359 task = bdevperf_job_get_task(job); 1360 rc = spdk_bdev_reset(job->bdev_desc, job->ch, 1361 reset_cb, task); 1362 if (rc) { 1363 printf("Reset failed: %d\n", rc); 1364 bdevperf_job_drain(job); 1365 g_run_rc = -1; 1366 } 1367 1368 return -1; 1369 } 1370 1371 static void 1372 bdevperf_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io) 1373 { 1374 struct bdevperf_job *job = cb_arg; 1375 struct bdevperf_task *task; 1376 1377 job->io_timeout++; 1378 1379 if (job->is_draining || !job->abort || 1380 !spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ABORT)) { 1381 return; 1382 } 1383 1384 task = bdevperf_job_get_task(job); 1385 if (task == NULL) { 1386 return; 1387 } 1388 1389 task->task_to_abort = spdk_bdev_io_get_cb_arg(bdev_io); 1390 task->io_type = SPDK_BDEV_IO_TYPE_ABORT; 1391 1392 bdevperf_submit_task(task); 1393 } 1394 1395 static void 1396 bdevperf_job_run(void *ctx) 1397 { 1398 struct bdevperf_job *job = ctx; 1399 struct bdevperf_task *task; 1400 int i; 1401 1402 /* Submit initial I/O for this job. Each time one 1403 * completes, another will be submitted. */ 1404 1405 /* Start a timer to stop this I/O chain when the run is over */ 1406 job->run_timer = SPDK_POLLER_REGISTER(bdevperf_job_drain_timer, job, g_time_in_usec); 1407 if (job->reset) { 1408 job->reset_timer = SPDK_POLLER_REGISTER(reset_job, job, 1409 10 * SPDK_SEC_TO_USEC); 1410 } 1411 1412 spdk_bdev_set_timeout(job->bdev_desc, g_timeout_in_sec, bdevperf_timeout_cb, job); 1413 1414 for (i = 0; i < job->queue_depth; i++) { 1415 task = bdevperf_job_get_task(job); 1416 bdevperf_submit_single(job, task); 1417 } 1418 } 1419 1420 static void 1421 _performance_dump_done(void *ctx) 1422 { 1423 struct bdevperf_aggregate_stats *stats = ctx; 1424 double average_latency; 1425 1426 printf("\r ==================================================================================" 1427 "=================================\n"); 1428 printf("\r %-28s: %10s %10.2f %10.2f", 1429 "Total", "", stats->total_io_per_second, stats->total_mb_per_second); 1430 printf(" %10.2f %8.2f", 1431 stats->total_failed_per_second, stats->total_timeout_per_second); 1432 1433 average_latency = ((double)stats->total_tsc / stats->total_io_completed) * SPDK_SEC_TO_USEC / 1434 spdk_get_ticks_hz(); 1435 printf(" %10.2f %10.2f %10.2f\n", average_latency, stats->min_latency, stats->max_latency); 1436 printf("\n"); 1437 1438 fflush(stdout); 1439 1440 g_performance_dump_active = false; 1441 1442 free(stats); 1443 } 1444 1445 static void 1446 _performance_dump(void *ctx) 1447 { 1448 struct bdevperf_aggregate_stats *stats = ctx; 1449 1450 performance_dump_job(stats, stats->current_job); 1451 1452 /* This assumes the jobs list is static after start up time. 1453 * That's true right now, but if that ever changed this would need a lock. */ 1454 stats->current_job = TAILQ_NEXT(stats->current_job, link); 1455 if (stats->current_job == NULL) { 1456 spdk_thread_send_msg(g_main_thread, _performance_dump_done, stats); 1457 } else { 1458 spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); 1459 } 1460 } 1461 1462 static int 1463 performance_statistics_thread(void *arg) 1464 { 1465 struct bdevperf_aggregate_stats *stats; 1466 1467 if (g_performance_dump_active) { 1468 return -1; 1469 } 1470 1471 g_performance_dump_active = true; 1472 1473 stats = calloc(1, sizeof(*stats)); 1474 if (stats == NULL) { 1475 return -1; 1476 } 1477 1478 stats->min_latency = (double)UINT64_MAX; 1479 1480 g_show_performance_period_num++; 1481 1482 stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec; 1483 stats->ema_period = g_show_performance_ema_period; 1484 1485 /* Iterate all of the jobs to gather stats 1486 * These jobs will not get removed here until a final performance dump is run, 1487 * so this should be safe without locking. 1488 */ 1489 stats->current_job = TAILQ_FIRST(&g_bdevperf.jobs); 1490 if (stats->current_job == NULL) { 1491 spdk_thread_send_msg(g_main_thread, _performance_dump_done, stats); 1492 } else { 1493 spdk_thread_send_msg(stats->current_job->thread, _performance_dump, stats); 1494 } 1495 1496 return -1; 1497 } 1498 1499 static void 1500 bdevperf_test(void) 1501 { 1502 struct bdevperf_job *job; 1503 1504 printf("Running I/O for %" PRIu64 " seconds...\n", g_time_in_usec / (uint64_t)SPDK_SEC_TO_USEC); 1505 fflush(stdout); 1506 1507 /* Start a timer to dump performance numbers */ 1508 g_start_tsc = spdk_get_ticks(); 1509 if (g_show_performance_real_time && !g_perf_timer) { 1510 printf("%*s\n", 107, "Latency(us)"); 1511 printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n", 1512 28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max"); 1513 1514 g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL, 1515 g_show_performance_period_in_usec); 1516 } 1517 1518 /* Iterate jobs to start all I/O */ 1519 TAILQ_FOREACH(job, &g_bdevperf.jobs, link) { 1520 g_bdevperf.running_jobs++; 1521 spdk_thread_send_msg(job->thread, bdevperf_job_run, job); 1522 } 1523 } 1524 1525 static void 1526 bdevperf_bdev_removed(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) 1527 { 1528 struct bdevperf_job *job = event_ctx; 1529 1530 if (SPDK_BDEV_EVENT_REMOVE == type) { 1531 bdevperf_job_drain(job); 1532 } 1533 } 1534 1535 static void 1536 bdevperf_histogram_status_cb(void *cb_arg, int status) 1537 { 1538 if (status != 0) { 1539 g_run_rc = status; 1540 if (g_continue_on_failure == false) { 1541 g_error_to_exit = true; 1542 } 1543 } 1544 1545 if (--g_bdev_count == 0) { 1546 if (g_run_rc == 0) { 1547 /* Ready to run the test */ 1548 bdevperf_test(); 1549 } else { 1550 bdevperf_test_done(NULL); 1551 } 1552 } 1553 } 1554 1555 static uint32_t g_construct_job_count = 0; 1556 1557 static int 1558 _bdevperf_enable_histogram(void *ctx, struct spdk_bdev *bdev) 1559 { 1560 bool *enable = ctx; 1561 1562 g_bdev_count++; 1563 1564 spdk_bdev_histogram_enable(bdev, bdevperf_histogram_status_cb, NULL, *enable); 1565 1566 return 0; 1567 } 1568 1569 static void 1570 bdevperf_enable_histogram(bool enable) 1571 { 1572 struct spdk_bdev *bdev; 1573 int rc; 1574 1575 /* increment initial g_bdev_count so that it will never reach 0 in the middle of iteration */ 1576 g_bdev_count = 1; 1577 1578 if (g_job_bdev_name != NULL) { 1579 bdev = spdk_bdev_get_by_name(g_job_bdev_name); 1580 if (bdev) { 1581 rc = _bdevperf_enable_histogram(&enable, bdev); 1582 } else { 1583 fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); 1584 rc = -1; 1585 } 1586 } else { 1587 rc = spdk_for_each_bdev_leaf(&enable, _bdevperf_enable_histogram); 1588 } 1589 1590 bdevperf_histogram_status_cb(NULL, rc); 1591 } 1592 1593 static void 1594 _bdevperf_construct_job_done(void *ctx) 1595 { 1596 if (--g_construct_job_count == 0) { 1597 if (g_run_rc != 0) { 1598 /* Something failed. */ 1599 bdevperf_test_done(NULL); 1600 return; 1601 } 1602 1603 /* always enable histogram. */ 1604 bdevperf_enable_histogram(true); 1605 } else if (g_run_rc != 0) { 1606 /* Reset error as some jobs constructed right */ 1607 g_run_rc = 0; 1608 if (g_continue_on_failure == false) { 1609 g_error_to_exit = true; 1610 } 1611 } 1612 } 1613 1614 /* Checkformat will not allow to use inlined type, 1615 this is a workaround */ 1616 typedef struct spdk_thread *spdk_thread_t; 1617 1618 static spdk_thread_t 1619 construct_job_thread(struct spdk_cpuset *cpumask, const char *tag) 1620 { 1621 struct spdk_cpuset tmp; 1622 1623 /* This function runs on the main thread. */ 1624 assert(g_main_thread == spdk_get_thread()); 1625 1626 /* Handle default mask */ 1627 if (spdk_cpuset_count(cpumask) == 0) { 1628 cpumask = &g_all_cpuset; 1629 } 1630 1631 /* Warn user that mask might need to be changed */ 1632 spdk_cpuset_copy(&tmp, cpumask); 1633 spdk_cpuset_or(&tmp, &g_all_cpuset); 1634 if (!spdk_cpuset_equal(&tmp, &g_all_cpuset)) { 1635 fprintf(stderr, "cpumask for '%s' is too big\n", tag); 1636 } 1637 1638 return spdk_thread_create(tag, cpumask); 1639 } 1640 1641 static uint32_t 1642 _get_next_core(void) 1643 { 1644 static uint32_t current_core = SPDK_ENV_LCORE_ID_ANY; 1645 1646 if (current_core == SPDK_ENV_LCORE_ID_ANY) { 1647 current_core = spdk_env_get_first_core(); 1648 return current_core; 1649 } 1650 1651 current_core = spdk_env_get_next_core(current_core); 1652 if (current_core == SPDK_ENV_LCORE_ID_ANY) { 1653 current_core = spdk_env_get_first_core(); 1654 } 1655 1656 return current_core; 1657 } 1658 1659 static void 1660 _bdevperf_construct_job(void *ctx) 1661 { 1662 struct bdevperf_job *job = ctx; 1663 int rc; 1664 1665 rc = spdk_bdev_open_ext(spdk_bdev_get_name(job->bdev), true, bdevperf_bdev_removed, job, 1666 &job->bdev_desc); 1667 if (rc != 0) { 1668 SPDK_ERRLOG("Could not open leaf bdev %s, error=%d\n", spdk_bdev_get_name(job->bdev), rc); 1669 g_run_rc = -EINVAL; 1670 goto end; 1671 } 1672 1673 if (g_zcopy) { 1674 if (!spdk_bdev_io_type_supported(job->bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) { 1675 printf("Test requires ZCOPY but bdev module does not support ZCOPY\n"); 1676 g_run_rc = -ENOTSUP; 1677 goto end; 1678 } 1679 } 1680 1681 job->ch = spdk_bdev_get_io_channel(job->bdev_desc); 1682 if (!job->ch) { 1683 SPDK_ERRLOG("Could not get io_channel for device %s, error=%d\n", spdk_bdev_get_name(job->bdev), 1684 rc); 1685 spdk_bdev_close(job->bdev_desc); 1686 TAILQ_REMOVE(&g_bdevperf.jobs, job, link); 1687 g_run_rc = -ENOMEM; 1688 goto end; 1689 } 1690 1691 end: 1692 spdk_thread_send_msg(g_main_thread, _bdevperf_construct_job_done, NULL); 1693 } 1694 1695 static void 1696 job_init_rw(struct bdevperf_job *job, enum job_config_rw rw) 1697 { 1698 switch (rw) { 1699 case JOB_CONFIG_RW_READ: 1700 job->rw_percentage = 100; 1701 break; 1702 case JOB_CONFIG_RW_WRITE: 1703 job->rw_percentage = 0; 1704 break; 1705 case JOB_CONFIG_RW_RANDREAD: 1706 job->is_random = true; 1707 job->rw_percentage = 100; 1708 job->seed = rand(); 1709 break; 1710 case JOB_CONFIG_RW_RANDWRITE: 1711 job->is_random = true; 1712 job->rw_percentage = 0; 1713 job->seed = rand(); 1714 break; 1715 case JOB_CONFIG_RW_RW: 1716 job->is_random = false; 1717 break; 1718 case JOB_CONFIG_RW_RANDRW: 1719 job->is_random = true; 1720 job->seed = rand(); 1721 break; 1722 case JOB_CONFIG_RW_RESET: 1723 /* Reset shares the flow with verify. */ 1724 job->reset = true; 1725 /* fallthrough */ 1726 case JOB_CONFIG_RW_VERIFY: 1727 job->verify = true; 1728 /* For verify flow read is done on write completion 1729 * callback only, rw_percentage shall not be used. */ 1730 job->rw_percentage = 0; 1731 break; 1732 case JOB_CONFIG_RW_UNMAP: 1733 job->unmap = true; 1734 break; 1735 case JOB_CONFIG_RW_FLUSH: 1736 job->flush = true; 1737 break; 1738 case JOB_CONFIG_RW_WRITE_ZEROES: 1739 job->write_zeroes = true; 1740 break; 1741 } 1742 } 1743 1744 static int 1745 bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config, 1746 struct spdk_thread *thread) 1747 { 1748 struct bdevperf_job *job; 1749 struct bdevperf_task *task; 1750 int block_size, data_block_size; 1751 int rc; 1752 int task_num, n; 1753 1754 block_size = spdk_bdev_get_block_size(bdev); 1755 data_block_size = spdk_bdev_get_data_block_size(bdev); 1756 1757 job = calloc(1, sizeof(struct bdevperf_job)); 1758 if (!job) { 1759 fprintf(stderr, "Unable to allocate memory for new job.\n"); 1760 return -ENOMEM; 1761 } 1762 1763 job->name = strdup(spdk_bdev_get_name(bdev)); 1764 if (!job->name) { 1765 fprintf(stderr, "Unable to allocate memory for job name.\n"); 1766 bdevperf_job_free(job); 1767 return -ENOMEM; 1768 } 1769 1770 job->workload_type = config->rw; 1771 job->io_size = config->bs; 1772 job->rw_percentage = config->rwmixread; 1773 job->continue_on_failure = g_continue_on_failure; 1774 job->queue_depth = config->iodepth; 1775 job->bdev = bdev; 1776 job->io_size_blocks = job->io_size / data_block_size; 1777 job->buf_size = job->io_size_blocks * block_size; 1778 job->abort = g_abort; 1779 job_init_rw(job, config->rw); 1780 1781 if ((job->io_size % data_block_size) != 0) { 1782 SPDK_ERRLOG("IO size (%d) is not multiples of data block size of bdev %s (%"PRIu32")\n", 1783 job->io_size, spdk_bdev_get_name(bdev), data_block_size); 1784 bdevperf_job_free(job); 1785 return -ENOTSUP; 1786 } 1787 1788 if (job->unmap && !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { 1789 printf("Skipping %s because it does not support unmap\n", spdk_bdev_get_name(bdev)); 1790 bdevperf_job_free(job); 1791 return -ENOTSUP; 1792 } 1793 1794 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_REFTAG)) { 1795 job->dif_check_flags |= SPDK_DIF_FLAGS_REFTAG_CHECK; 1796 } 1797 if (spdk_bdev_is_dif_check_enabled(bdev, SPDK_DIF_CHECK_TYPE_GUARD)) { 1798 job->dif_check_flags |= SPDK_DIF_FLAGS_GUARD_CHECK; 1799 } 1800 1801 job->offset_in_ios = 0; 1802 1803 if (config->length != 0) { 1804 /* Use subset of disk */ 1805 job->size_in_ios = config->length / job->io_size_blocks; 1806 job->ios_base = config->offset / job->io_size_blocks; 1807 } else { 1808 /* Use whole disk */ 1809 job->size_in_ios = spdk_bdev_get_num_blocks(bdev) / job->io_size_blocks; 1810 job->ios_base = 0; 1811 } 1812 1813 if (job->is_random && g_zipf_theta > 0) { 1814 job->zipf = spdk_zipf_create(job->size_in_ios, g_zipf_theta, 0); 1815 } 1816 1817 if (job->verify) { 1818 if (job->size_in_ios >= UINT32_MAX) { 1819 SPDK_ERRLOG("Due to constraints of verify operation, the job storage capacity is too large\n"); 1820 bdevperf_job_free(job); 1821 return -ENOMEM; 1822 } 1823 job->outstanding = spdk_bit_array_create(job->size_in_ios); 1824 if (job->outstanding == NULL) { 1825 SPDK_ERRLOG("Could not create outstanding array bitmap for bdev %s\n", 1826 spdk_bdev_get_name(bdev)); 1827 bdevperf_job_free(job); 1828 return -ENOMEM; 1829 } 1830 if (job->queue_depth > (int)job->size_in_ios) { 1831 SPDK_WARNLOG("Due to constraints of verify job, queue depth (-q, %d) can't exceed the number of IO " 1832 "requests which can be submitted to the bdev %s simultaneously (%"PRIu64"). " 1833 "Queue depth is limited to %"PRIu64"\n", 1834 job->queue_depth, job->name, job->size_in_ios, job->size_in_ios); 1835 job->queue_depth = (int)job->size_in_ios; 1836 } 1837 } 1838 1839 job->histogram = spdk_histogram_data_alloc(); 1840 if (job->histogram == NULL) { 1841 fprintf(stderr, "Failed to allocate histogram\n"); 1842 bdevperf_job_free(job); 1843 return -ENOMEM; 1844 } 1845 1846 TAILQ_INIT(&job->task_list); 1847 1848 if (g_random_map) { 1849 if (job->size_in_ios >= UINT32_MAX) { 1850 SPDK_ERRLOG("Due to constraints of the random map, the job storage capacity is too large\n"); 1851 bdevperf_job_free(job); 1852 return -ENOMEM; 1853 } 1854 job->random_map = spdk_bit_array_create(job->size_in_ios); 1855 if (job->random_map == NULL) { 1856 SPDK_ERRLOG("Could not create random_map array bitmap for bdev %s\n", 1857 spdk_bdev_get_name(bdev)); 1858 bdevperf_job_free(job); 1859 return -ENOMEM; 1860 } 1861 } 1862 1863 task_num = job->queue_depth; 1864 if (job->reset) { 1865 task_num += 1; 1866 } 1867 if (job->abort) { 1868 task_num += job->queue_depth; 1869 } 1870 1871 TAILQ_INSERT_TAIL(&g_bdevperf.jobs, job, link); 1872 1873 for (n = 0; n < task_num; n++) { 1874 task = calloc(1, sizeof(struct bdevperf_task)); 1875 if (!task) { 1876 fprintf(stderr, "Failed to allocate task from memory\n"); 1877 spdk_zipf_free(&job->zipf); 1878 return -ENOMEM; 1879 } 1880 1881 task->buf = spdk_zmalloc(job->buf_size, spdk_bdev_get_buf_align(job->bdev), NULL, 1882 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1883 if (!task->buf) { 1884 fprintf(stderr, "Cannot allocate buf for task=%p\n", task); 1885 spdk_zipf_free(&job->zipf); 1886 free(task); 1887 return -ENOMEM; 1888 } 1889 1890 if (job->verify && job->buf_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { 1891 task->verify_buf = spdk_zmalloc(job->buf_size, spdk_bdev_get_buf_align(job->bdev), NULL, 1892 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1893 if (!task->verify_buf) { 1894 fprintf(stderr, "Cannot allocate buf_verify for task=%p\n", task); 1895 spdk_free(task->buf); 1896 spdk_zipf_free(&job->zipf); 1897 free(task); 1898 return -ENOMEM; 1899 } 1900 1901 } 1902 1903 if (spdk_bdev_is_md_separate(job->bdev)) { 1904 task->md_buf = spdk_zmalloc(job->io_size_blocks * 1905 spdk_bdev_get_md_size(job->bdev), 0, NULL, 1906 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 1907 if (!task->md_buf) { 1908 fprintf(stderr, "Cannot allocate md buf for task=%p\n", task); 1909 spdk_zipf_free(&job->zipf); 1910 spdk_free(task->verify_buf); 1911 spdk_free(task->buf); 1912 free(task); 1913 return -ENOMEM; 1914 } 1915 } 1916 1917 task->job = job; 1918 TAILQ_INSERT_TAIL(&job->task_list, task, link); 1919 } 1920 1921 job->thread = thread; 1922 1923 g_construct_job_count++; 1924 1925 rc = spdk_thread_send_msg(thread, _bdevperf_construct_job, job); 1926 assert(rc == 0); 1927 1928 return rc; 1929 } 1930 1931 static int 1932 parse_rw(const char *str, enum job_config_rw ret) 1933 { 1934 if (str == NULL) { 1935 return ret; 1936 } 1937 1938 if (!strcmp(str, "read")) { 1939 ret = JOB_CONFIG_RW_READ; 1940 } else if (!strcmp(str, "randread")) { 1941 ret = JOB_CONFIG_RW_RANDREAD; 1942 } else if (!strcmp(str, "write")) { 1943 ret = JOB_CONFIG_RW_WRITE; 1944 } else if (!strcmp(str, "randwrite")) { 1945 ret = JOB_CONFIG_RW_RANDWRITE; 1946 } else if (!strcmp(str, "verify")) { 1947 ret = JOB_CONFIG_RW_VERIFY; 1948 } else if (!strcmp(str, "reset")) { 1949 ret = JOB_CONFIG_RW_RESET; 1950 } else if (!strcmp(str, "unmap")) { 1951 ret = JOB_CONFIG_RW_UNMAP; 1952 } else if (!strcmp(str, "write_zeroes")) { 1953 ret = JOB_CONFIG_RW_WRITE_ZEROES; 1954 } else if (!strcmp(str, "flush")) { 1955 ret = JOB_CONFIG_RW_FLUSH; 1956 } else if (!strcmp(str, "rw")) { 1957 ret = JOB_CONFIG_RW_RW; 1958 } else if (!strcmp(str, "randrw")) { 1959 ret = JOB_CONFIG_RW_RANDRW; 1960 } else { 1961 fprintf(stderr, "rw must be one of\n" 1962 PATTERN_TYPES_STR "\n"); 1963 ret = BDEVPERF_CONFIG_ERROR; 1964 } 1965 1966 return ret; 1967 } 1968 1969 static const char * 1970 config_filename_next(const char *filename, char *out) 1971 { 1972 int i, k; 1973 1974 if (filename == NULL) { 1975 out[0] = '\0'; 1976 return NULL; 1977 } 1978 1979 if (filename[0] == ':') { 1980 filename++; 1981 } 1982 1983 for (i = 0, k = 0; 1984 filename[i] != '\0' && 1985 filename[i] != ':' && 1986 i < BDEVPERF_CONFIG_MAX_FILENAME && 1987 k < (BDEVPERF_CONFIG_MAX_FILENAME - 1); 1988 i++) { 1989 if (filename[i] == ' ' || filename[i] == '\t') { 1990 continue; 1991 } 1992 1993 out[k++] = filename[i]; 1994 } 1995 out[k] = 0; 1996 1997 return filename + i; 1998 } 1999 2000 static struct spdk_thread * 2001 get_lcore_thread(uint32_t lcore) 2002 { 2003 struct lcore_thread *lthread; 2004 2005 TAILQ_FOREACH(lthread, &g_lcore_thread_list, link) { 2006 if (lthread->lcore == lcore) { 2007 return lthread->thread; 2008 } 2009 } 2010 2011 return NULL; 2012 } 2013 2014 static void 2015 create_lcore_thread(uint32_t lcore) 2016 { 2017 struct lcore_thread *lthread; 2018 struct spdk_cpuset cpumask = {}; 2019 char name[32]; 2020 2021 lthread = calloc(1, sizeof(*lthread)); 2022 assert(lthread != NULL); 2023 2024 lthread->lcore = lcore; 2025 2026 snprintf(name, sizeof(name), "lcore_%u", lcore); 2027 spdk_cpuset_set_cpu(&cpumask, lcore, true); 2028 2029 lthread->thread = spdk_thread_create(name, &cpumask); 2030 assert(lthread->thread != NULL); 2031 2032 TAILQ_INSERT_TAIL(&g_lcore_thread_list, lthread, link); 2033 } 2034 2035 static void 2036 bdevperf_construct_jobs(void) 2037 { 2038 char filename[BDEVPERF_CONFIG_MAX_FILENAME]; 2039 struct spdk_thread *thread; 2040 struct job_config *config; 2041 struct spdk_bdev *bdev; 2042 const char *filenames; 2043 uint32_t i; 2044 int rc; 2045 2046 if (g_one_thread_per_lcore) { 2047 SPDK_ENV_FOREACH_CORE(i) { 2048 create_lcore_thread(i); 2049 } 2050 } 2051 2052 TAILQ_FOREACH(config, &job_config_list, link) { 2053 filenames = config->filename; 2054 2055 if (!g_one_thread_per_lcore) { 2056 thread = construct_job_thread(&config->cpumask, config->name); 2057 } else { 2058 thread = get_lcore_thread(config->lcore); 2059 } 2060 assert(thread); 2061 2062 while (filenames) { 2063 filenames = config_filename_next(filenames, filename); 2064 if (strlen(filename) == 0) { 2065 break; 2066 } 2067 2068 bdev = spdk_bdev_get_by_name(filename); 2069 if (!bdev) { 2070 fprintf(stderr, "Unable to find bdev '%s'\n", filename); 2071 g_run_rc = -EINVAL; 2072 return; 2073 } 2074 2075 rc = bdevperf_construct_job(bdev, config, thread); 2076 if (rc < 0) { 2077 g_run_rc = rc; 2078 return; 2079 } 2080 } 2081 } 2082 } 2083 2084 static int 2085 make_cli_job_config(const char *filename, int64_t offset, uint64_t range) 2086 { 2087 struct job_config *config = calloc(1, sizeof(*config)); 2088 2089 if (config == NULL) { 2090 fprintf(stderr, "Unable to allocate memory for job config\n"); 2091 return -ENOMEM; 2092 } 2093 2094 config->name = filename; 2095 config->filename = filename; 2096 config->lcore = _get_next_core(); 2097 spdk_cpuset_zero(&config->cpumask); 2098 spdk_cpuset_set_cpu(&config->cpumask, config->lcore, true); 2099 config->bs = g_io_size; 2100 config->iodepth = g_queue_depth; 2101 config->rwmixread = g_rw_percentage; 2102 config->offset = offset; 2103 config->length = range; 2104 config->rw = parse_rw(g_workload_type, BDEVPERF_CONFIG_ERROR); 2105 if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { 2106 free(config); 2107 return -EINVAL; 2108 } 2109 2110 TAILQ_INSERT_TAIL(&job_config_list, config, link); 2111 return 0; 2112 } 2113 2114 static int 2115 bdevperf_construct_multithread_job_config(void *ctx, struct spdk_bdev *bdev) 2116 { 2117 uint32_t *num_cores = ctx; 2118 uint32_t i; 2119 uint64_t blocks_per_job; 2120 int64_t offset; 2121 int rc; 2122 2123 blocks_per_job = spdk_bdev_get_num_blocks(bdev) / *num_cores; 2124 offset = 0; 2125 2126 SPDK_ENV_FOREACH_CORE(i) { 2127 rc = make_cli_job_config(spdk_bdev_get_name(bdev), offset, blocks_per_job); 2128 if (rc) { 2129 return rc; 2130 } 2131 2132 offset += blocks_per_job; 2133 } 2134 2135 return 0; 2136 } 2137 2138 static void 2139 bdevperf_construct_multithread_job_configs(void) 2140 { 2141 struct spdk_bdev *bdev; 2142 uint32_t i; 2143 uint32_t num_cores; 2144 2145 num_cores = 0; 2146 SPDK_ENV_FOREACH_CORE(i) { 2147 num_cores++; 2148 } 2149 2150 if (num_cores == 0) { 2151 g_run_rc = -EINVAL; 2152 return; 2153 } 2154 2155 if (g_job_bdev_name != NULL) { 2156 bdev = spdk_bdev_get_by_name(g_job_bdev_name); 2157 if (!bdev) { 2158 fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); 2159 return; 2160 } 2161 g_run_rc = bdevperf_construct_multithread_job_config(&num_cores, bdev); 2162 } else { 2163 g_run_rc = spdk_for_each_bdev_leaf(&num_cores, bdevperf_construct_multithread_job_config); 2164 } 2165 2166 } 2167 2168 static int 2169 bdevperf_construct_job_config(void *ctx, struct spdk_bdev *bdev) 2170 { 2171 /* Construct the job */ 2172 return make_cli_job_config(spdk_bdev_get_name(bdev), 0, 0); 2173 } 2174 2175 static void 2176 bdevperf_construct_job_configs(void) 2177 { 2178 struct spdk_bdev *bdev; 2179 2180 /* There are three different modes for allocating jobs. Standard mode 2181 * (the default) creates one spdk_thread per bdev and runs the I/O job there. 2182 * 2183 * The -C flag places bdevperf into "multithread" mode, meaning it creates 2184 * one spdk_thread per bdev PER CORE, and runs a copy of the job on each. 2185 * This runs multiple threads per bdev, effectively. 2186 * 2187 * The -j flag implies "FIO" mode which tries to mimic semantic of FIO jobs. 2188 * In "FIO" mode, threads are spawned per-job instead of per-bdev. 2189 * Each FIO job can be individually parameterized by filename, cpu mask, etc, 2190 * which is different from other modes in that they only support global options. 2191 * 2192 * Both for standard mode and "multithread" mode, if the -E flag is specified, 2193 * it creates one spdk_thread PER CORE. On each core, one spdk_thread is shared by 2194 * multiple jobs. 2195 */ 2196 2197 if (g_bdevperf_conf) { 2198 goto end; 2199 } 2200 2201 if (g_multithread_mode) { 2202 bdevperf_construct_multithread_job_configs(); 2203 } else if (g_job_bdev_name != NULL) { 2204 bdev = spdk_bdev_get_by_name(g_job_bdev_name); 2205 if (bdev) { 2206 /* Construct the job */ 2207 g_run_rc = make_cli_job_config(g_job_bdev_name, 0, 0); 2208 } else { 2209 fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name); 2210 } 2211 } else { 2212 g_run_rc = spdk_for_each_bdev_leaf(NULL, bdevperf_construct_job_config); 2213 } 2214 2215 end: 2216 /* Increment initial construct_jobs count so that it will never reach 0 in the middle 2217 * of iteration. 2218 */ 2219 g_construct_job_count = 1; 2220 2221 if (g_run_rc == 0) { 2222 bdevperf_construct_jobs(); 2223 } 2224 2225 _bdevperf_construct_job_done(NULL); 2226 } 2227 2228 static int 2229 parse_uint_option(struct spdk_conf_section *s, const char *name, int def) 2230 { 2231 const char *job_name; 2232 int tmp; 2233 2234 tmp = spdk_conf_section_get_intval(s, name); 2235 if (tmp == -1) { 2236 /* Field was not found. Check default value 2237 * In [global] section it is ok to have undefined values 2238 * but for other sections it is not ok */ 2239 if (def == BDEVPERF_CONFIG_UNDEFINED) { 2240 job_name = spdk_conf_section_get_name(s); 2241 if (strcmp(job_name, "global") == 0) { 2242 return def; 2243 } 2244 2245 fprintf(stderr, 2246 "Job '%s' has no '%s' assigned\n", 2247 job_name, name); 2248 return BDEVPERF_CONFIG_ERROR; 2249 } 2250 return def; 2251 } 2252 2253 /* NOTE: get_intval returns nonnegative on success */ 2254 if (tmp < 0) { 2255 fprintf(stderr, "Job '%s' has bad '%s' value.\n", 2256 spdk_conf_section_get_name(s), name); 2257 return BDEVPERF_CONFIG_ERROR; 2258 } 2259 2260 return tmp; 2261 } 2262 2263 /* CLI arguments override parameters for global sections */ 2264 static void 2265 config_set_cli_args(struct job_config *config) 2266 { 2267 if (g_job_bdev_name) { 2268 config->filename = g_job_bdev_name; 2269 } 2270 if (g_io_size > 0) { 2271 config->bs = g_io_size; 2272 } 2273 if (g_queue_depth > 0) { 2274 config->iodepth = g_queue_depth; 2275 } 2276 if (g_rw_percentage > 0) { 2277 config->rwmixread = g_rw_percentage; 2278 } 2279 if (g_workload_type) { 2280 config->rw = parse_rw(g_workload_type, config->rw); 2281 } 2282 } 2283 2284 static int 2285 read_job_config(void) 2286 { 2287 struct job_config global_default_config; 2288 struct job_config global_config; 2289 struct spdk_conf_section *s; 2290 struct job_config *config = NULL; 2291 const char *cpumask; 2292 const char *rw; 2293 bool is_global; 2294 int n = 0; 2295 int val; 2296 2297 if (g_bdevperf_conf_file == NULL) { 2298 return 0; 2299 } 2300 2301 g_bdevperf_conf = spdk_conf_allocate(); 2302 if (g_bdevperf_conf == NULL) { 2303 fprintf(stderr, "Could not allocate job config structure\n"); 2304 return 1; 2305 } 2306 2307 spdk_conf_disable_sections_merge(g_bdevperf_conf); 2308 if (spdk_conf_read(g_bdevperf_conf, g_bdevperf_conf_file)) { 2309 fprintf(stderr, "Invalid job config"); 2310 return 1; 2311 } 2312 2313 /* Initialize global defaults */ 2314 global_default_config.filename = NULL; 2315 /* Zero mask is the same as g_all_cpuset 2316 * The g_all_cpuset is not initialized yet, 2317 * so use zero mask as the default instead */ 2318 spdk_cpuset_zero(&global_default_config.cpumask); 2319 global_default_config.bs = BDEVPERF_CONFIG_UNDEFINED; 2320 global_default_config.iodepth = BDEVPERF_CONFIG_UNDEFINED; 2321 /* bdevperf has no default for -M option but in FIO the default is 50 */ 2322 global_default_config.rwmixread = 50; 2323 global_default_config.offset = 0; 2324 /* length 0 means 100% */ 2325 global_default_config.length = 0; 2326 global_default_config.rw = BDEVPERF_CONFIG_UNDEFINED; 2327 config_set_cli_args(&global_default_config); 2328 2329 if ((int)global_default_config.rw == BDEVPERF_CONFIG_ERROR) { 2330 return 1; 2331 } 2332 2333 /* There is only a single instance of global job_config 2334 * We just reset its value when we encounter new [global] section */ 2335 global_config = global_default_config; 2336 2337 for (s = spdk_conf_first_section(g_bdevperf_conf); 2338 s != NULL; 2339 s = spdk_conf_next_section(s)) { 2340 config = calloc(1, sizeof(*config)); 2341 if (config == NULL) { 2342 fprintf(stderr, "Unable to allocate memory for job config\n"); 2343 return 1; 2344 } 2345 2346 config->name = spdk_conf_section_get_name(s); 2347 is_global = strcmp(config->name, "global") == 0; 2348 2349 if (is_global) { 2350 global_config = global_default_config; 2351 } 2352 2353 config->filename = spdk_conf_section_get_val(s, "filename"); 2354 if (config->filename == NULL) { 2355 config->filename = global_config.filename; 2356 } 2357 if (!is_global) { 2358 if (config->filename == NULL) { 2359 fprintf(stderr, "Job '%s' expects 'filename' parameter\n", config->name); 2360 goto error; 2361 } else if (strnlen(config->filename, BDEVPERF_CONFIG_MAX_FILENAME) 2362 >= BDEVPERF_CONFIG_MAX_FILENAME) { 2363 fprintf(stderr, 2364 "filename for '%s' job is too long. Max length is %d\n", 2365 config->name, BDEVPERF_CONFIG_MAX_FILENAME); 2366 goto error; 2367 } 2368 } 2369 2370 cpumask = spdk_conf_section_get_val(s, "cpumask"); 2371 if (cpumask == NULL) { 2372 config->cpumask = global_config.cpumask; 2373 } else if (spdk_cpuset_parse(&config->cpumask, cpumask)) { 2374 fprintf(stderr, "Job '%s' has bad 'cpumask' value\n", config->name); 2375 goto error; 2376 } 2377 2378 config->bs = parse_uint_option(s, "bs", global_config.bs); 2379 if (config->bs == BDEVPERF_CONFIG_ERROR) { 2380 goto error; 2381 } else if (config->bs == 0) { 2382 fprintf(stderr, "'bs' of job '%s' must be greater than 0\n", config->name); 2383 goto error; 2384 } 2385 2386 config->iodepth = parse_uint_option(s, "iodepth", global_config.iodepth); 2387 if (config->iodepth == BDEVPERF_CONFIG_ERROR) { 2388 goto error; 2389 } else if (config->iodepth == 0) { 2390 fprintf(stderr, 2391 "'iodepth' of job '%s' must be greater than 0\n", 2392 config->name); 2393 goto error; 2394 } 2395 2396 config->rwmixread = parse_uint_option(s, "rwmixread", global_config.rwmixread); 2397 if (config->rwmixread == BDEVPERF_CONFIG_ERROR) { 2398 goto error; 2399 } else if (config->rwmixread > 100) { 2400 fprintf(stderr, 2401 "'rwmixread' value of '%s' job is not in 0-100 range\n", 2402 config->name); 2403 goto error; 2404 } 2405 2406 config->offset = parse_uint_option(s, "offset", global_config.offset); 2407 if (config->offset == BDEVPERF_CONFIG_ERROR) { 2408 goto error; 2409 } 2410 2411 val = parse_uint_option(s, "length", global_config.length); 2412 if (val == BDEVPERF_CONFIG_ERROR) { 2413 goto error; 2414 } 2415 config->length = val; 2416 2417 rw = spdk_conf_section_get_val(s, "rw"); 2418 config->rw = parse_rw(rw, global_config.rw); 2419 if ((int)config->rw == BDEVPERF_CONFIG_ERROR) { 2420 fprintf(stderr, "Job '%s' has bad 'rw' value\n", config->name); 2421 goto error; 2422 } else if (!is_global && (int)config->rw == BDEVPERF_CONFIG_UNDEFINED) { 2423 fprintf(stderr, "Job '%s' has no 'rw' assigned\n", config->name); 2424 goto error; 2425 } 2426 2427 if (is_global) { 2428 config_set_cli_args(config); 2429 global_config = *config; 2430 free(config); 2431 config = NULL; 2432 } else { 2433 TAILQ_INSERT_TAIL(&job_config_list, config, link); 2434 n++; 2435 } 2436 } 2437 2438 if (g_rpc_log_file_name != NULL) { 2439 g_rpc_log_file = fopen(g_rpc_log_file_name, "a"); 2440 if (g_rpc_log_file == NULL) { 2441 fprintf(stderr, "Failed to open %s\n", g_rpc_log_file_name); 2442 goto error; 2443 } 2444 } 2445 2446 printf("Using job config with %d jobs\n", n); 2447 return 0; 2448 error: 2449 free(config); 2450 return 1; 2451 } 2452 2453 static void 2454 bdevperf_run(void *arg1) 2455 { 2456 uint32_t i; 2457 2458 g_main_thread = spdk_get_thread(); 2459 2460 spdk_cpuset_zero(&g_all_cpuset); 2461 SPDK_ENV_FOREACH_CORE(i) { 2462 spdk_cpuset_set_cpu(&g_all_cpuset, i, true); 2463 } 2464 2465 if (g_wait_for_tests) { 2466 /* Do not perform any tests until RPC is received */ 2467 return; 2468 } 2469 2470 bdevperf_construct_job_configs(); 2471 } 2472 2473 static void 2474 rpc_perform_tests_reset(void) 2475 { 2476 /* Reset g_run_rc to 0 for the next test run. */ 2477 g_run_rc = 0; 2478 2479 /* Reset g_stats to 0 for the next test run. */ 2480 memset(&g_stats, 0, sizeof(g_stats)); 2481 2482 /* Reset g_show_performance_period_num to 0 for the next test run. */ 2483 g_show_performance_period_num = 0; 2484 } 2485 2486 static void 2487 rpc_perform_tests_cb(void) 2488 { 2489 struct spdk_json_write_ctx *w; 2490 struct spdk_jsonrpc_request *request = g_request; 2491 2492 g_request = NULL; 2493 2494 if (g_run_rc == 0) { 2495 w = spdk_jsonrpc_begin_result(request); 2496 spdk_json_write_uint32(w, g_run_rc); 2497 spdk_jsonrpc_end_result(request, w); 2498 } else { 2499 spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, 2500 "bdevperf failed with error %s", spdk_strerror(-g_run_rc)); 2501 } 2502 2503 rpc_perform_tests_reset(); 2504 } 2505 2506 struct rpc_bdevperf_params { 2507 int time_in_sec; 2508 char *workload_type; 2509 int queue_depth; 2510 char *io_size; 2511 int rw_percentage; 2512 }; 2513 2514 static const struct spdk_json_object_decoder rpc_bdevperf_params_decoders[] = { 2515 {"time_in_sec", offsetof(struct rpc_bdevperf_params, time_in_sec), spdk_json_decode_int32, true}, 2516 {"workload_type", offsetof(struct rpc_bdevperf_params, workload_type), spdk_json_decode_string, true}, 2517 {"queue_depth", offsetof(struct rpc_bdevperf_params, queue_depth), spdk_json_decode_int32, true}, 2518 {"io_size", offsetof(struct rpc_bdevperf_params, io_size), spdk_json_decode_string, true}, 2519 {"rw_percentage", offsetof(struct rpc_bdevperf_params, rw_percentage), spdk_json_decode_int32, true}, 2520 }; 2521 2522 static void 2523 rpc_apply_bdevperf_params(struct rpc_bdevperf_params *params) 2524 { 2525 if (params->workload_type) { 2526 /* we need to clear previously settled parameter to avoid memory leak */ 2527 free(g_workload_type); 2528 g_workload_type = strdup(params->workload_type); 2529 } 2530 if (params->queue_depth) { 2531 g_queue_depth = params->queue_depth; 2532 } 2533 if (params->io_size) { 2534 bdevperf_parse_arg('o', params->io_size); 2535 } 2536 if (params->time_in_sec) { 2537 g_time_in_sec = params->time_in_sec; 2538 } 2539 if (params->rw_percentage) { 2540 g_rw_percentage = params->rw_percentage; 2541 g_mix_specified = true; 2542 } else { 2543 g_mix_specified = false; 2544 } 2545 } 2546 2547 static void 2548 rpc_perform_tests(struct spdk_jsonrpc_request *request, const struct spdk_json_val *params) 2549 { 2550 struct rpc_bdevperf_params req = {}, backup = {}; 2551 int rc; 2552 2553 if (g_request != NULL) { 2554 fprintf(stderr, "Another test is already in progress.\n"); 2555 spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, 2556 spdk_strerror(-EINPROGRESS)); 2557 return; 2558 } 2559 2560 if (params) { 2561 if (spdk_json_decode_object_relaxed(params, rpc_bdevperf_params_decoders, 2562 SPDK_COUNTOF(rpc_bdevperf_params_decoders), 2563 &req)) { 2564 spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_PARSE_ERROR, 2565 "spdk_json_decode_object failed"); 2566 return; 2567 } 2568 2569 if (g_workload_type) { 2570 backup.workload_type = strdup(g_workload_type); 2571 } 2572 backup.queue_depth = g_queue_depth; 2573 if (asprintf(&backup.io_size, "%d", g_io_size) < 0) { 2574 fprintf(stderr, "Couldn't allocate memory for queue depth"); 2575 goto rpc_error; 2576 } 2577 backup.time_in_sec = g_time_in_sec; 2578 backup.rw_percentage = g_rw_percentage; 2579 2580 rpc_apply_bdevperf_params(&req); 2581 2582 free(req.workload_type); 2583 free(req.io_size); 2584 } 2585 2586 rc = verify_test_params(); 2587 2588 if (rc) { 2589 spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_PARSE_ERROR, 2590 "Invalid parameters provided"); 2591 /* restore old params on error */ 2592 rpc_apply_bdevperf_params(&backup); 2593 goto rpc_error; 2594 } 2595 2596 g_request = request; 2597 2598 /* Only construct job configs at the first test run. */ 2599 if (TAILQ_EMPTY(&job_config_list)) { 2600 bdevperf_construct_job_configs(); 2601 } else { 2602 bdevperf_construct_jobs(); 2603 } 2604 2605 rpc_error: 2606 free(backup.io_size); 2607 free(backup.workload_type); 2608 } 2609 SPDK_RPC_REGISTER("perform_tests", rpc_perform_tests, SPDK_RPC_RUNTIME) 2610 2611 static void 2612 _bdevperf_job_drain(void *ctx) 2613 { 2614 bdevperf_job_drain(ctx); 2615 } 2616 2617 static void 2618 spdk_bdevperf_shutdown_cb(void) 2619 { 2620 g_shutdown = true; 2621 struct bdevperf_job *job, *tmp; 2622 2623 if (g_bdevperf.running_jobs == 0) { 2624 bdevperf_test_done(NULL); 2625 return; 2626 } 2627 2628 /* Iterate jobs to stop all I/O */ 2629 TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, tmp) { 2630 spdk_thread_send_msg(job->thread, _bdevperf_job_drain, job); 2631 } 2632 } 2633 2634 static int 2635 bdevperf_parse_arg(int ch, char *arg) 2636 { 2637 long long tmp; 2638 2639 if (ch == 'w') { 2640 g_workload_type = strdup(arg); 2641 } else if (ch == 'T') { 2642 g_job_bdev_name = arg; 2643 } else if (ch == 'z') { 2644 g_wait_for_tests = true; 2645 } else if (ch == 'Z') { 2646 g_zcopy = true; 2647 } else if (ch == 'X') { 2648 g_abort = true; 2649 } else if (ch == 'C') { 2650 g_multithread_mode = true; 2651 } else if (ch == 'f') { 2652 g_continue_on_failure = true; 2653 } else if (ch == 'j') { 2654 g_bdevperf_conf_file = arg; 2655 } else if (ch == 'F') { 2656 char *endptr; 2657 2658 errno = 0; 2659 g_zipf_theta = strtod(arg, &endptr); 2660 if (errno || arg == endptr || g_zipf_theta < 0) { 2661 fprintf(stderr, "Illegal zipf theta value %s\n", arg); 2662 return -EINVAL; 2663 } 2664 } else if (ch == 'l') { 2665 g_latency_display_level++; 2666 } else if (ch == 'D') { 2667 g_random_map = true; 2668 } else if (ch == 'E') { 2669 g_one_thread_per_lcore = true; 2670 } else if (ch == 'J') { 2671 g_rpc_log_file_name = arg; 2672 } else if (ch == 'o') { 2673 uint64_t size; 2674 2675 if (spdk_parse_capacity(arg, &size, NULL) != 0) { 2676 fprintf(stderr, "Invalid IO size: %s\n", arg); 2677 return -EINVAL; 2678 } 2679 g_io_size = (int)size; 2680 } else if (ch == 'U') { 2681 g_unique_writes = true; 2682 } else { 2683 tmp = spdk_strtoll(arg, 10); 2684 if (tmp < 0) { 2685 fprintf(stderr, "Parse failed for the option %c.\n", ch); 2686 return tmp; 2687 } else if (tmp >= INT_MAX) { 2688 fprintf(stderr, "Parsed option was too large %c.\n", ch); 2689 return -ERANGE; 2690 } 2691 2692 switch (ch) { 2693 case 'q': 2694 g_queue_depth = tmp; 2695 break; 2696 case 't': 2697 g_time_in_sec = tmp; 2698 break; 2699 case 'k': 2700 g_timeout_in_sec = tmp; 2701 break; 2702 case 'M': 2703 g_rw_percentage = tmp; 2704 g_mix_specified = true; 2705 break; 2706 case 'P': 2707 g_show_performance_ema_period = tmp; 2708 break; 2709 case 'S': 2710 g_show_performance_real_time = 1; 2711 g_show_performance_period_in_usec = tmp * SPDK_SEC_TO_USEC; 2712 break; 2713 default: 2714 return -EINVAL; 2715 } 2716 } 2717 return 0; 2718 } 2719 2720 static void 2721 bdevperf_usage(void) 2722 { 2723 printf(" -q <depth> io depth\n"); 2724 printf(" -o <size> io size in bytes\n"); 2725 printf(" -w <type> io pattern type, must be one of " PATTERN_TYPES_STR "\n"); 2726 printf(" -t <time> time in seconds\n"); 2727 printf(" -k <timeout> timeout in seconds to detect starved I/O (default is 0 and disabled)\n"); 2728 printf(" -M <percent> rwmixread (100 for reads, 0 for writes)\n"); 2729 printf(" -P <num> number of moving average period\n"); 2730 printf("\t\t(If set to n, show weighted mean of the previous n IO/s in real time)\n"); 2731 printf("\t\t(Formula: M = 2 / (n + 1), EMA[i+1] = IO/s * M + (1 - M) * EMA[i])\n"); 2732 printf("\t\t(only valid with -S)\n"); 2733 printf(" -S <period> show performance result in real time every <period> seconds\n"); 2734 printf(" -T <bdev> bdev to run against. Default: all available bdevs.\n"); 2735 printf(" -f continue processing I/O even after failures\n"); 2736 printf(" -F <zipf theta> use zipf distribution for random I/O\n"); 2737 printf(" -Z enable using zcopy bdev API for read or write I/O\n"); 2738 printf(" -z start bdevperf, but wait for perform_tests RPC to start tests\n"); 2739 printf(" (See examples/bdev/bdevperf/bdevperf.py)\n"); 2740 printf(" -X abort timed out I/O\n"); 2741 printf(" -C enable every core to send I/Os to each bdev\n"); 2742 printf(" -j <filename> use job config file\n"); 2743 printf(" -l display latency histogram, default: disable. -l display summary, -ll display details\n"); 2744 printf(" -D use a random map for picking offsets not previously read or written (for all jobs)\n"); 2745 printf(" -E share per lcore thread among jobs. Available only if -j is not used.\n"); 2746 printf(" -J File name to open with append mode and log JSON RPC calls.\n"); 2747 printf(" -U generate unique data for each write I/O, has no effect on non-write I/O\n"); 2748 } 2749 2750 static void 2751 bdevperf_fini(void) 2752 { 2753 free_job_config(); 2754 free(g_workload_type); 2755 2756 if (g_rpc_log_file != NULL) { 2757 fclose(g_rpc_log_file); 2758 g_rpc_log_file = NULL; 2759 } 2760 } 2761 2762 static int 2763 verify_test_params(void) 2764 { 2765 if (!g_bdevperf_conf_file && g_queue_depth <= 0) { 2766 goto out; 2767 } 2768 if (!g_bdevperf_conf_file && g_io_size <= 0) { 2769 goto out; 2770 } 2771 if (!g_bdevperf_conf_file && !g_workload_type) { 2772 goto out; 2773 } 2774 if (g_bdevperf_conf_file && g_one_thread_per_lcore) { 2775 printf("If bdevperf's config file is used, per lcore thread cannot be used\n"); 2776 goto out; 2777 } 2778 if (g_time_in_sec <= 0) { 2779 goto out; 2780 } 2781 g_time_in_usec = g_time_in_sec * SPDK_SEC_TO_USEC; 2782 2783 if (g_timeout_in_sec < 0) { 2784 goto out; 2785 } 2786 2787 if (g_abort && !g_timeout_in_sec) { 2788 printf("Timeout must be set for abort option, Ignoring g_abort\n"); 2789 } 2790 2791 if (g_show_performance_ema_period > 0 && 2792 g_show_performance_real_time == 0) { 2793 fprintf(stderr, "-P option must be specified with -S option\n"); 2794 return 1; 2795 } 2796 2797 if (g_io_size > SPDK_BDEV_LARGE_BUF_MAX_SIZE) { 2798 printf("I/O size of %d is greater than zero copy threshold (%d).\n", 2799 g_io_size, SPDK_BDEV_LARGE_BUF_MAX_SIZE); 2800 printf("Zero copy mechanism will not be used.\n"); 2801 g_zcopy = false; 2802 } 2803 2804 if (g_bdevperf_conf_file) { 2805 /* workload_type verification happens during config file parsing */ 2806 return 0; 2807 } 2808 2809 if (!strcmp(g_workload_type, "verify") || 2810 !strcmp(g_workload_type, "reset")) { 2811 g_rw_percentage = 50; 2812 g_verify = true; 2813 if (!strcmp(g_workload_type, "reset")) { 2814 g_reset = true; 2815 } 2816 } 2817 2818 if (!strcmp(g_workload_type, "read") || 2819 !strcmp(g_workload_type, "randread") || 2820 !strcmp(g_workload_type, "write") || 2821 !strcmp(g_workload_type, "randwrite") || 2822 !strcmp(g_workload_type, "verify") || 2823 !strcmp(g_workload_type, "reset") || 2824 !strcmp(g_workload_type, "unmap") || 2825 !strcmp(g_workload_type, "write_zeroes") || 2826 !strcmp(g_workload_type, "flush")) { 2827 if (g_mix_specified) { 2828 fprintf(stderr, "Ignoring -M option... Please use -M option" 2829 " only when using rw or randrw.\n"); 2830 } 2831 } 2832 2833 if (!strcmp(g_workload_type, "rw") || 2834 !strcmp(g_workload_type, "randrw")) { 2835 if (g_rw_percentage < 0 || g_rw_percentage > 100) { 2836 fprintf(stderr, 2837 "-M must be specified to value from 0 to 100 " 2838 "for rw or randrw.\n"); 2839 return 1; 2840 } 2841 } 2842 2843 if (strcmp(g_workload_type, "randread") && 2844 strcmp(g_workload_type, "randwrite") && 2845 strcmp(g_workload_type, "randrw")) { 2846 if (g_random_map) { 2847 fprintf(stderr, "Ignoring -D option... Please use -D option" 2848 " only when using randread, randwrite or randrw.\n"); 2849 return 1; 2850 } 2851 } 2852 2853 return 0; 2854 out: 2855 return 1; 2856 } 2857 2858 int 2859 main(int argc, char **argv) 2860 { 2861 struct spdk_app_opts opts = {}; 2862 int rc; 2863 2864 /* Use the runtime PID to set the random seed */ 2865 srand(getpid()); 2866 2867 spdk_app_opts_init(&opts, sizeof(opts)); 2868 opts.name = "bdevperf"; 2869 opts.rpc_addr = NULL; 2870 opts.shutdown_cb = spdk_bdevperf_shutdown_cb; 2871 2872 if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CEF:J:M:P:S:T:Xlj:DU", NULL, 2873 bdevperf_parse_arg, bdevperf_usage)) != 2874 SPDK_APP_PARSE_ARGS_SUCCESS) { 2875 return rc; 2876 } 2877 2878 /* Set the default address if no rpc_addr was provided in args 2879 * and RPC is used for starting tests */ 2880 if (g_wait_for_tests && opts.rpc_addr == NULL) { 2881 opts.rpc_addr = SPDK_DEFAULT_RPC_ADDR; 2882 } 2883 2884 if (read_job_config()) { 2885 bdevperf_fini(); 2886 return 1; 2887 } 2888 2889 if (g_rpc_log_file != NULL) { 2890 opts.rpc_log_file = g_rpc_log_file; 2891 } 2892 2893 if (verify_test_params() != 0 && !g_wait_for_tests) { 2894 spdk_app_usage(); 2895 bdevperf_usage(); 2896 bdevperf_fini(); 2897 exit(1); 2898 } 2899 2900 rc = spdk_app_start(&opts, bdevperf_run, NULL); 2901 2902 spdk_app_fini(); 2903 bdevperf_fini(); 2904 return rc; 2905 } 2906