1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 7 #include <rte_malloc.h> 8 #include <rte_eal.h> 9 #include <rte_log.h> 10 #include <rte_cycles.h> 11 #include "rte_spinlock.h" 12 #include <rte_compressdev.h> 13 14 #include "comp_perf_test_cyclecount.h" 15 16 struct cperf_cyclecount_ctx { 17 struct cperf_verify_ctx ver; 18 19 uint32_t ops_enq_retries; 20 uint32_t ops_deq_retries; 21 22 uint64_t duration_op; 23 uint64_t duration_enq; 24 uint64_t duration_deq; 25 }; 26 27 void 28 cperf_cyclecount_test_destructor(void *arg) 29 { 30 struct cperf_cyclecount_ctx *ctx = arg; 31 32 if (arg) { 33 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); 34 rte_free(arg); 35 } 36 } 37 38 void * 39 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, 40 struct comp_test_data *options) 41 { 42 struct cperf_cyclecount_ctx *ctx = NULL; 43 44 ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); 45 46 if (ctx == NULL) 47 return NULL; 48 49 ctx->ver.mem.dev_id = dev_id; 50 ctx->ver.mem.qp_id = qp_id; 51 ctx->ver.options = options; 52 ctx->ver.silent = 1; /* ver. part will be silent */ 53 54 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 55 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 56 return ctx; 57 58 cperf_cyclecount_test_destructor(ctx); 59 return NULL; 60 } 61 62 static int 63 cperf_cyclecount_op_setup(struct rte_comp_op **ops, 64 struct cperf_cyclecount_ctx *ctx, 65 struct rte_mbuf **input_bufs, 66 struct rte_mbuf **output_bufs, 67 void *priv_xform, 68 uint32_t out_seg_sz) 69 { 70 struct comp_test_data *test_data = ctx->ver.options; 71 struct cperf_mem_resources *mem = &ctx->ver.mem; 72 73 uint32_t i, iter, num_iter; 74 int res = 0; 75 uint16_t ops_needed; 76 77 num_iter = test_data->num_iter; 78 79 for (iter = 0; iter < num_iter; iter++) { 80 uint32_t remaining_ops = mem->total_bufs; 81 uint32_t total_enq_ops = 0; 82 uint16_t num_enq = 0; 83 uint16_t num_deq = 0; 84 85 while (remaining_ops > 0) { 86 uint16_t num_ops = RTE_MIN(remaining_ops, 87 test_data->burst_sz); 88 ops_needed = num_ops; 89 90 /* Allocate compression operations */ 91 if (ops_needed && rte_mempool_get_bulk( 92 mem->op_pool, 93 (void **)ops, 94 ops_needed) != 0) { 95 RTE_LOG(ERR, USER1, 96 "Cyclecount: could not allocate enough operations\n"); 97 res = -1; 98 goto end; 99 } 100 101 for (i = 0; i < ops_needed; i++) { 102 103 /* Calculate next buffer to attach */ 104 /* to operation */ 105 uint32_t buf_id = total_enq_ops + i; 106 uint16_t op_id = i; 107 108 /* Reset all data in output buffers */ 109 struct rte_mbuf *m = output_bufs[buf_id]; 110 111 m->pkt_len = out_seg_sz * m->nb_segs; 112 while (m) { 113 m->data_len = m->buf_len - m->data_off; 114 m = m->next; 115 } 116 ops[op_id]->m_src = input_bufs[buf_id]; 117 ops[op_id]->m_dst = output_bufs[buf_id]; 118 ops[op_id]->src.offset = 0; 119 ops[op_id]->src.length = 120 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 121 ops[op_id]->dst.offset = 0; 122 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 123 ops[op_id]->input_chksum = buf_id; 124 ops[op_id]->private_xform = priv_xform; 125 } 126 127 /* E N Q U E U I N G */ 128 /* assuming that all ops are enqueued */ 129 /* instead of the real enqueue operation */ 130 num_enq = num_ops; 131 132 remaining_ops -= num_enq; 133 total_enq_ops += num_enq; 134 135 /* D E Q U E U I N G */ 136 /* assuming that all ops dequeued */ 137 /* instead of the real dequeue operation */ 138 num_deq = num_ops; 139 140 rte_mempool_put_bulk(mem->op_pool, 141 (void **)ops, num_deq); 142 } 143 } 144 return res; 145 end: 146 rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); 147 rte_free(ops); 148 149 return res; 150 } 151 152 static int 153 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) 154 { 155 struct comp_test_data *test_data = ctx->ver.options; 156 struct cperf_mem_resources *mem = &ctx->ver.mem; 157 uint8_t dev_id = mem->dev_id; 158 uint32_t i, iter, num_iter; 159 struct rte_comp_op **ops, **deq_ops; 160 void *priv_xform = NULL; 161 struct rte_comp_xform xform; 162 struct rte_mbuf **input_bufs, **output_bufs; 163 int ret, res = 0; 164 int allocated = 0; 165 uint32_t out_seg_sz; 166 167 uint64_t tsc_start, tsc_end, tsc_duration; 168 169 if (test_data == NULL || !test_data->burst_sz) { 170 RTE_LOG(ERR, USER1, "Unknown burst size\n"); 171 return -1; 172 } 173 ctx->duration_enq = 0; 174 ctx->duration_deq = 0; 175 ctx->ops_enq_retries = 0; 176 ctx->ops_deq_retries = 0; 177 178 /* one array for both enqueue and dequeue */ 179 ops = rte_zmalloc_socket(NULL, 180 (test_data->burst_sz + mem->total_bufs) * 181 sizeof(struct rte_comp_op *), 182 0, rte_socket_id()); 183 184 if (ops == NULL) { 185 RTE_LOG(ERR, USER1, 186 "Can't allocate memory for ops structures\n"); 187 return -1; 188 } 189 190 deq_ops = &ops[test_data->burst_sz]; 191 192 if (type == RTE_COMP_COMPRESS) { 193 xform = (struct rte_comp_xform) { 194 .type = RTE_COMP_COMPRESS, 195 .compress = { 196 .algo = test_data->test_algo, 197 .level = test_data->level, 198 .window_size = test_data->window_sz, 199 .chksum = RTE_COMP_CHECKSUM_NONE, 200 .hash_algo = RTE_COMP_HASH_ALGO_NONE 201 } 202 }; 203 if (test_data->test_algo == RTE_COMP_ALGO_DEFLATE) 204 xform.compress.deflate.huffman = test_data->huffman_enc; 205 else if (test_data->test_algo == RTE_COMP_ALGO_LZ4) 206 xform.compress.lz4.flags = test_data->lz4_flags; 207 input_bufs = mem->decomp_bufs; 208 output_bufs = mem->comp_bufs; 209 out_seg_sz = test_data->out_seg_sz; 210 } else { 211 xform = (struct rte_comp_xform) { 212 .type = RTE_COMP_DECOMPRESS, 213 .decompress = { 214 .algo = test_data->test_algo, 215 .chksum = RTE_COMP_CHECKSUM_NONE, 216 .window_size = test_data->window_sz, 217 .hash_algo = RTE_COMP_HASH_ALGO_NONE 218 } 219 }; 220 if (test_data->test_algo == RTE_COMP_ALGO_LZ4) 221 xform.decompress.lz4.flags = test_data->lz4_flags; 222 input_bufs = mem->comp_bufs; 223 output_bufs = mem->decomp_bufs; 224 out_seg_sz = test_data->seg_sz; 225 } 226 227 /* Create private xform */ 228 if (rte_compressdev_private_xform_create(dev_id, &xform, 229 &priv_xform) < 0) { 230 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 231 res = -1; 232 goto end; 233 } 234 235 tsc_start = rte_rdtsc_precise(); 236 ret = cperf_cyclecount_op_setup(ops, 237 ctx, 238 input_bufs, 239 output_bufs, 240 priv_xform, 241 out_seg_sz); 242 243 tsc_end = rte_rdtsc_precise(); 244 245 /* ret value check postponed a bit to cancel extra 'if' bias */ 246 if (ret < 0) { 247 RTE_LOG(ERR, USER1, "Setup function failed\n"); 248 res = -1; 249 goto end; 250 } 251 252 tsc_duration = tsc_end - tsc_start; 253 ctx->duration_op = tsc_duration; 254 255 num_iter = test_data->num_iter; 256 for (iter = 0; iter < num_iter; iter++) { 257 uint32_t total_ops = mem->total_bufs; 258 uint32_t remaining_ops = mem->total_bufs; 259 uint32_t total_deq_ops = 0; 260 uint32_t total_enq_ops = 0; 261 uint16_t ops_unused = 0; 262 uint16_t num_enq = 0; 263 uint16_t num_deq = 0; 264 265 while (remaining_ops > 0) { 266 uint16_t num_ops = RTE_MIN(remaining_ops, 267 test_data->burst_sz); 268 uint16_t ops_needed = num_ops - ops_unused; 269 270 /* 271 * Move the unused operations from the previous 272 * enqueue_burst call to the front, to maintain order 273 */ 274 if ((ops_unused > 0) && (num_enq > 0)) { 275 size_t nb_b_to_mov = 276 ops_unused * sizeof(struct rte_comp_op *); 277 278 memmove(ops, &ops[num_enq], nb_b_to_mov); 279 } 280 281 /* Allocate compression operations */ 282 if (ops_needed && rte_mempool_get_bulk( 283 mem->op_pool, 284 (void **)&ops[ops_unused], 285 ops_needed) != 0) { 286 RTE_LOG(ERR, USER1, 287 "Could not allocate enough operations\n"); 288 res = -1; 289 goto end; 290 } 291 allocated += ops_needed; 292 293 for (i = 0; i < ops_needed; i++) { 294 /* 295 * Calculate next buffer to attach to operation 296 */ 297 uint32_t buf_id = total_enq_ops + i + 298 ops_unused; 299 uint16_t op_id = ops_unused + i; 300 /* Reset all data in output buffers */ 301 struct rte_mbuf *m = output_bufs[buf_id]; 302 303 m->pkt_len = out_seg_sz * m->nb_segs; 304 while (m) { 305 m->data_len = m->buf_len - m->data_off; 306 m = m->next; 307 } 308 ops[op_id]->m_src = input_bufs[buf_id]; 309 ops[op_id]->m_dst = output_bufs[buf_id]; 310 ops[op_id]->src.offset = 0; 311 ops[op_id]->src.length = 312 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 313 ops[op_id]->dst.offset = 0; 314 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 315 ops[op_id]->input_chksum = buf_id; 316 ops[op_id]->private_xform = priv_xform; 317 } 318 319 if (unlikely(test_data->perf_comp_force_stop)) 320 goto end; 321 322 tsc_start = rte_rdtsc_precise(); 323 num_enq = rte_compressdev_enqueue_burst(dev_id, 324 mem->qp_id, ops, 325 num_ops); 326 tsc_end = rte_rdtsc_precise(); 327 tsc_duration = tsc_end - tsc_start; 328 ctx->duration_enq += tsc_duration; 329 330 if (num_enq < num_ops) 331 ctx->ops_enq_retries++; 332 333 if (test_data->cyclecount_delay) 334 rte_delay_us_block(test_data->cyclecount_delay); 335 336 if (num_enq == 0) { 337 struct rte_compressdev_stats stats; 338 339 rte_compressdev_stats_get(dev_id, &stats); 340 if (stats.enqueue_err_count) { 341 res = -1; 342 goto end; 343 } 344 } 345 346 ops_unused = num_ops - num_enq; 347 remaining_ops -= num_enq; 348 total_enq_ops += num_enq; 349 350 tsc_start = rte_rdtsc_precise(); 351 num_deq = rte_compressdev_dequeue_burst(dev_id, 352 mem->qp_id, 353 deq_ops, 354 allocated); 355 tsc_end = rte_rdtsc_precise(); 356 tsc_duration = tsc_end - tsc_start; 357 ctx->duration_deq += tsc_duration; 358 359 if (num_deq < allocated) 360 ctx->ops_deq_retries++; 361 362 total_deq_ops += num_deq; 363 364 if (iter == num_iter - 1) { 365 for (i = 0; i < num_deq; i++) { 366 struct rte_comp_op *op = deq_ops[i]; 367 368 if (op->status != 369 RTE_COMP_OP_STATUS_SUCCESS) { 370 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 371 goto end; 372 } 373 374 struct rte_mbuf *m = op->m_dst; 375 376 m->pkt_len = op->produced; 377 uint32_t remaining_data = op->produced; 378 uint16_t data_to_append; 379 380 while (remaining_data > 0) { 381 data_to_append = 382 RTE_MIN(remaining_data, 383 out_seg_sz); 384 m->data_len = data_to_append; 385 remaining_data -= 386 data_to_append; 387 m = m->next; 388 } 389 } 390 } 391 rte_mempool_put_bulk(mem->op_pool, 392 (void **)deq_ops, num_deq); 393 allocated -= num_deq; 394 } 395 396 /* Dequeue the last operations */ 397 while (total_deq_ops < total_ops) { 398 if (unlikely(test_data->perf_comp_force_stop)) 399 goto end; 400 401 tsc_start = rte_rdtsc_precise(); 402 num_deq = rte_compressdev_dequeue_burst(dev_id, 403 mem->qp_id, 404 deq_ops, 405 test_data->burst_sz); 406 tsc_end = rte_rdtsc_precise(); 407 tsc_duration = tsc_end - tsc_start; 408 ctx->duration_deq += tsc_duration; 409 ctx->ops_deq_retries++; 410 411 if (num_deq == 0) { 412 struct rte_compressdev_stats stats; 413 414 rte_compressdev_stats_get(dev_id, &stats); 415 if (stats.dequeue_err_count) { 416 res = -1; 417 goto end; 418 } 419 } 420 total_deq_ops += num_deq; 421 422 if (iter == num_iter - 1) { 423 for (i = 0; i < num_deq; i++) { 424 struct rte_comp_op *op = deq_ops[i]; 425 426 if (op->status != 427 RTE_COMP_OP_STATUS_SUCCESS) { 428 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 429 goto end; 430 } 431 432 struct rte_mbuf *m = op->m_dst; 433 434 m->pkt_len = op->produced; 435 uint32_t remaining_data = op->produced; 436 uint16_t data_to_append; 437 438 while (remaining_data > 0) { 439 data_to_append = 440 RTE_MIN(remaining_data, 441 out_seg_sz); 442 m->data_len = data_to_append; 443 remaining_data -= 444 data_to_append; 445 m = m->next; 446 } 447 } 448 } 449 rte_mempool_put_bulk(mem->op_pool, 450 (void **)deq_ops, num_deq); 451 allocated -= num_deq; 452 } 453 } 454 allocated = 0; 455 456 end: 457 if (allocated) 458 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 459 rte_compressdev_private_xform_free(dev_id, priv_xform); 460 rte_free(ops); 461 462 if (test_data->perf_comp_force_stop) { 463 RTE_LOG(ERR, USER1, 464 "lcore: %d Perf. test has been aborted by user\n", 465 mem->lcore_id); 466 res = -1; 467 } 468 return res; 469 } 470 471 int 472 cperf_cyclecount_test_runner(void *test_ctx) 473 { 474 struct cperf_cyclecount_ctx *ctx = test_ctx; 475 struct comp_test_data *test_data = ctx->ver.options; 476 uint32_t lcore = rte_lcore_id(); 477 static uint16_t display_once; 478 static rte_spinlock_t print_spinlock; 479 int i; 480 481 uint32_t ops_enq_retries_comp; 482 uint32_t ops_deq_retries_comp; 483 484 uint32_t ops_enq_retries_decomp; 485 uint32_t ops_deq_retries_decomp; 486 487 uint32_t duration_setup_per_op; 488 489 uint32_t duration_enq_per_op_comp; 490 uint32_t duration_deq_per_op_comp; 491 492 uint32_t duration_enq_per_op_decomp; 493 uint32_t duration_deq_per_op_decomp; 494 495 ctx->ver.mem.lcore_id = lcore; 496 497 uint16_t exp = 0; 498 /* 499 * printing information about current compression thread 500 */ 501 if (rte_atomic_compare_exchange_strong_explicit(&ctx->ver.mem.print_info_once, &exp, 502 1, rte_memory_order_relaxed, rte_memory_order_relaxed)) 503 printf(" lcore: %u," 504 " driver name: %s," 505 " device name: %s," 506 " device id: %u," 507 " socket id: %u," 508 " queue pair id: %u\n", 509 lcore, 510 ctx->ver.options->driver_name, 511 rte_compressdev_name_get(ctx->ver.mem.dev_id), 512 ctx->ver.mem.dev_id, 513 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 514 ctx->ver.mem.qp_id); 515 516 /* 517 * First the verification part is needed 518 */ 519 if (cperf_verify_test_runner(&ctx->ver)) 520 return EXIT_FAILURE; 521 522 if (test_data->test_op & COMPRESS) { 523 /* 524 * Run the test twice, discarding the first performance 525 * results, before the cache is warmed up 526 */ 527 for (i = 0; i < 2; i++) { 528 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) 529 return EXIT_FAILURE; 530 } 531 532 ops_enq_retries_comp = ctx->ops_enq_retries; 533 ops_deq_retries_comp = ctx->ops_deq_retries; 534 535 duration_enq_per_op_comp = ctx->duration_enq / 536 (ctx->ver.mem.total_bufs * test_data->num_iter); 537 duration_deq_per_op_comp = ctx->duration_deq / 538 (ctx->ver.mem.total_bufs * test_data->num_iter); 539 } else { 540 ops_enq_retries_comp = 0; 541 ops_deq_retries_comp = 0; 542 543 duration_enq_per_op_comp = 0; 544 duration_deq_per_op_comp = 0; 545 } 546 547 if (test_data->test_op & DECOMPRESS) { 548 /* 549 * Run the test twice, discarding the first performance 550 * results, before the cache is warmed up 551 */ 552 for (i = 0; i < 2; i++) { 553 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) 554 return EXIT_FAILURE; 555 } 556 557 ops_enq_retries_decomp = ctx->ops_enq_retries; 558 ops_deq_retries_decomp = ctx->ops_deq_retries; 559 560 duration_enq_per_op_decomp = ctx->duration_enq / 561 (ctx->ver.mem.total_bufs * test_data->num_iter); 562 duration_deq_per_op_decomp = ctx->duration_deq / 563 (ctx->ver.mem.total_bufs * test_data->num_iter); 564 } else { 565 ops_enq_retries_decomp = 0; 566 ops_deq_retries_decomp = 0; 567 568 duration_enq_per_op_decomp = 0; 569 duration_deq_per_op_decomp = 0; 570 } 571 572 duration_setup_per_op = ctx->duration_op / 573 (ctx->ver.mem.total_bufs * test_data->num_iter); 574 575 /* R E P O R T processing */ 576 rte_spinlock_lock(&print_spinlock); 577 578 if (display_once == 0) { 579 display_once = 1; 580 581 printf("\nLegend for the table\n" 582 " - Retries section: number of retries for the following operations:\n" 583 " [C-e] - compression enqueue\n" 584 " [C-d] - compression dequeue\n" 585 " [D-e] - decompression enqueue\n" 586 " [D-d] - decompression dequeue\n" 587 " - Cycles section: number of cycles per 'op' for the following operations:\n" 588 " setup/op - memory allocation, op configuration and memory deallocation\n" 589 " [C-e] - compression enqueue\n" 590 " [C-d] - compression dequeue\n" 591 " [D-e] - decompression enqueue\n" 592 " [D-d] - decompression dequeue\n\n"); 593 594 printf("\n%12s%6s%12s%17s", 595 "lcore id", "Level", "Comp size", "Comp ratio [%]"); 596 597 printf(" |%10s %6s %8s %6s %8s", 598 " Retries:", 599 "[C-e]", "[C-d]", 600 "[D-e]", "[D-d]"); 601 602 printf(" |%9s %9s %9s %9s %9s %9s\n", 603 " Cycles:", 604 "setup/op", 605 "[C-e]", "[C-d]", 606 "[D-e]", "[D-d]"); 607 } 608 609 printf("%12u" 610 "%6u" 611 "%12zu" 612 "%17.2f", 613 ctx->ver.mem.lcore_id, 614 test_data->level, 615 ctx->ver.comp_data_sz, 616 ctx->ver.ratio); 617 618 printf(" |%10s %6u %8u %6u %8u", 619 " ", 620 ops_enq_retries_comp, 621 ops_deq_retries_comp, 622 ops_enq_retries_decomp, 623 ops_deq_retries_decomp); 624 625 printf(" |%9s %9u %9u %9u %9u %9u\n", 626 " ", 627 duration_setup_per_op, 628 duration_enq_per_op_comp, 629 duration_deq_per_op_comp, 630 duration_enq_per_op_decomp, 631 duration_deq_per_op_decomp); 632 633 rte_spinlock_unlock(&print_spinlock); 634 635 return EXIT_SUCCESS; 636 } 637