1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_eal.h> 7 #include <rte_log.h> 8 #include <rte_cycles.h> 9 #include "rte_spinlock.h" 10 #include <rte_compressdev.h> 11 12 #include "comp_perf_test_cyclecount.h" 13 14 struct cperf_cyclecount_ctx { 15 struct cperf_verify_ctx ver; 16 17 uint32_t ops_enq_retries; 18 uint32_t ops_deq_retries; 19 20 uint64_t duration_op; 21 uint64_t duration_enq; 22 uint64_t duration_deq; 23 }; 24 25 void 26 cperf_cyclecount_test_destructor(void *arg) 27 { 28 struct cperf_cyclecount_ctx *ctx = arg; 29 30 if (arg) { 31 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); 32 rte_free(arg); 33 } 34 } 35 36 void * 37 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, 38 struct comp_test_data *options) 39 { 40 struct cperf_cyclecount_ctx *ctx = NULL; 41 42 ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); 43 44 if (ctx == NULL) 45 return NULL; 46 47 ctx->ver.mem.dev_id = dev_id; 48 ctx->ver.mem.qp_id = qp_id; 49 ctx->ver.options = options; 50 ctx->ver.silent = 1; /* ver. part will be silent */ 51 52 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 53 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 54 return ctx; 55 56 cperf_cyclecount_test_destructor(ctx); 57 return NULL; 58 } 59 60 static int 61 cperf_cyclecount_op_setup(struct rte_comp_op **ops, 62 struct cperf_cyclecount_ctx *ctx, 63 struct rte_mbuf **input_bufs, 64 struct rte_mbuf **output_bufs, 65 void *priv_xform, 66 uint32_t out_seg_sz) 67 { 68 struct comp_test_data *test_data = ctx->ver.options; 69 struct cperf_mem_resources *mem = &ctx->ver.mem; 70 71 uint32_t i, iter, num_iter; 72 int res = 0; 73 uint16_t ops_needed; 74 75 num_iter = test_data->num_iter; 76 77 for (iter = 0; iter < num_iter; iter++) { 78 uint32_t remaining_ops = mem->total_bufs; 79 uint32_t total_deq_ops = 0; 80 uint32_t total_enq_ops = 0; 81 uint16_t num_enq = 0; 82 uint16_t num_deq = 0; 83 84 while (remaining_ops > 0) { 85 uint16_t num_ops = RTE_MIN(remaining_ops, 86 test_data->burst_sz); 87 ops_needed = num_ops; 88 89 /* Allocate compression operations */ 90 if (ops_needed && rte_mempool_get_bulk( 91 mem->op_pool, 92 (void **)ops, 93 ops_needed) != 0) { 94 RTE_LOG(ERR, USER1, 95 "Cyclecount: could not allocate enough operations\n"); 96 res = -1; 97 goto end; 98 } 99 100 for (i = 0; i < ops_needed; i++) { 101 102 /* Calculate next buffer to attach */ 103 /* to operation */ 104 uint32_t buf_id = total_enq_ops + i; 105 uint16_t op_id = i; 106 107 /* Reset all data in output buffers */ 108 struct rte_mbuf *m = output_bufs[buf_id]; 109 110 m->pkt_len = out_seg_sz * m->nb_segs; 111 while (m) { 112 m->data_len = m->buf_len - m->data_off; 113 m = m->next; 114 } 115 ops[op_id]->m_src = input_bufs[buf_id]; 116 ops[op_id]->m_dst = output_bufs[buf_id]; 117 ops[op_id]->src.offset = 0; 118 ops[op_id]->src.length = 119 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 120 ops[op_id]->dst.offset = 0; 121 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 122 ops[op_id]->input_chksum = buf_id; 123 ops[op_id]->private_xform = priv_xform; 124 } 125 126 /* E N Q U E U I N G */ 127 /* assuming that all ops are enqueued */ 128 /* instead of the real enqueue operation */ 129 num_enq = num_ops; 130 131 remaining_ops -= num_enq; 132 total_enq_ops += num_enq; 133 134 /* D E Q U E U I N G */ 135 /* assuming that all ops dequeued */ 136 /* instead of the real dequeue operation */ 137 num_deq = num_ops; 138 139 total_deq_ops += num_deq; 140 rte_mempool_put_bulk(mem->op_pool, 141 (void **)ops, num_deq); 142 } 143 } 144 return res; 145 end: 146 rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); 147 rte_free(ops); 148 149 return res; 150 } 151 152 static int 153 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) 154 { 155 struct comp_test_data *test_data = ctx->ver.options; 156 struct cperf_mem_resources *mem = &ctx->ver.mem; 157 uint8_t dev_id = mem->dev_id; 158 uint32_t i, iter, num_iter; 159 struct rte_comp_op **ops, **deq_ops; 160 void *priv_xform = NULL; 161 struct rte_comp_xform xform; 162 struct rte_mbuf **input_bufs, **output_bufs; 163 int ret, res = 0; 164 int allocated = 0; 165 uint32_t out_seg_sz; 166 167 uint64_t tsc_start, tsc_end, tsc_duration; 168 169 if (test_data == NULL || !test_data->burst_sz) { 170 RTE_LOG(ERR, USER1, "Unknown burst size\n"); 171 return -1; 172 } 173 ctx->duration_enq = 0; 174 ctx->duration_deq = 0; 175 ctx->ops_enq_retries = 0; 176 ctx->ops_deq_retries = 0; 177 178 /* one array for both enqueue and dequeue */ 179 ops = rte_zmalloc_socket(NULL, 180 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 181 0, rte_socket_id()); 182 183 if (ops == NULL) { 184 RTE_LOG(ERR, USER1, 185 "Can't allocate memory for ops strucures\n"); 186 return -1; 187 } 188 189 deq_ops = &ops[mem->total_bufs]; 190 191 if (type == RTE_COMP_COMPRESS) { 192 xform = (struct rte_comp_xform) { 193 .type = RTE_COMP_COMPRESS, 194 .compress = { 195 .algo = RTE_COMP_ALGO_DEFLATE, 196 .deflate.huffman = test_data->huffman_enc, 197 .level = test_data->level, 198 .window_size = test_data->window_sz, 199 .chksum = RTE_COMP_CHECKSUM_NONE, 200 .hash_algo = RTE_COMP_HASH_ALGO_NONE 201 } 202 }; 203 input_bufs = mem->decomp_bufs; 204 output_bufs = mem->comp_bufs; 205 out_seg_sz = test_data->out_seg_sz; 206 } else { 207 xform = (struct rte_comp_xform) { 208 .type = RTE_COMP_DECOMPRESS, 209 .decompress = { 210 .algo = RTE_COMP_ALGO_DEFLATE, 211 .chksum = RTE_COMP_CHECKSUM_NONE, 212 .window_size = test_data->window_sz, 213 .hash_algo = RTE_COMP_HASH_ALGO_NONE 214 } 215 }; 216 input_bufs = mem->comp_bufs; 217 output_bufs = mem->decomp_bufs; 218 out_seg_sz = test_data->seg_sz; 219 } 220 221 /* Create private xform */ 222 if (rte_compressdev_private_xform_create(dev_id, &xform, 223 &priv_xform) < 0) { 224 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 225 res = -1; 226 goto end; 227 } 228 229 tsc_start = rte_rdtsc_precise(); 230 ret = cperf_cyclecount_op_setup(ops, 231 ctx, 232 input_bufs, 233 output_bufs, 234 priv_xform, 235 out_seg_sz); 236 237 tsc_end = rte_rdtsc_precise(); 238 239 /* ret value check postponed a bit to cancel extra 'if' bias */ 240 if (ret < 0) { 241 RTE_LOG(ERR, USER1, "Setup function failed\n"); 242 res = -1; 243 goto end; 244 } 245 246 tsc_duration = tsc_end - tsc_start; 247 ctx->duration_op = tsc_duration; 248 249 num_iter = test_data->num_iter; 250 for (iter = 0; iter < num_iter; iter++) { 251 uint32_t total_ops = mem->total_bufs; 252 uint32_t remaining_ops = mem->total_bufs; 253 uint32_t total_deq_ops = 0; 254 uint32_t total_enq_ops = 0; 255 uint16_t ops_unused = 0; 256 uint16_t num_enq = 0; 257 uint16_t num_deq = 0; 258 259 while (remaining_ops > 0) { 260 uint16_t num_ops = RTE_MIN(remaining_ops, 261 test_data->burst_sz); 262 uint16_t ops_needed = num_ops - ops_unused; 263 264 /* 265 * Move the unused operations from the previous 266 * enqueue_burst call to the front, to maintain order 267 */ 268 if ((ops_unused > 0) && (num_enq > 0)) { 269 size_t nb_b_to_mov = 270 ops_unused * sizeof(struct rte_comp_op *); 271 272 memmove(ops, &ops[num_enq], nb_b_to_mov); 273 } 274 275 /* Allocate compression operations */ 276 if (ops_needed && rte_mempool_get_bulk( 277 mem->op_pool, 278 (void **)ops, 279 ops_needed) != 0) { 280 RTE_LOG(ERR, USER1, 281 "Could not allocate enough operations\n"); 282 res = -1; 283 goto end; 284 } 285 allocated += ops_needed; 286 287 for (i = 0; i < ops_needed; i++) { 288 /* 289 * Calculate next buffer to attach to operation 290 */ 291 uint32_t buf_id = total_enq_ops + i + 292 ops_unused; 293 uint16_t op_id = ops_unused + i; 294 /* Reset all data in output buffers */ 295 struct rte_mbuf *m = output_bufs[buf_id]; 296 297 m->pkt_len = out_seg_sz * m->nb_segs; 298 while (m) { 299 m->data_len = m->buf_len - m->data_off; 300 m = m->next; 301 } 302 ops[op_id]->m_src = input_bufs[buf_id]; 303 ops[op_id]->m_dst = output_bufs[buf_id]; 304 ops[op_id]->src.offset = 0; 305 ops[op_id]->src.length = 306 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 307 ops[op_id]->dst.offset = 0; 308 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 309 ops[op_id]->input_chksum = buf_id; 310 ops[op_id]->private_xform = priv_xform; 311 } 312 313 if (unlikely(test_data->perf_comp_force_stop)) 314 goto end; 315 316 tsc_start = rte_rdtsc_precise(); 317 num_enq = rte_compressdev_enqueue_burst(dev_id, 318 mem->qp_id, ops, 319 num_ops); 320 tsc_end = rte_rdtsc_precise(); 321 tsc_duration = tsc_end - tsc_start; 322 ctx->duration_enq += tsc_duration; 323 324 if (num_enq < num_ops) 325 ctx->ops_enq_retries++; 326 327 if (test_data->cyclecount_delay) 328 rte_delay_us_block(test_data->cyclecount_delay); 329 330 if (num_enq == 0) { 331 struct rte_compressdev_stats stats; 332 333 rte_compressdev_stats_get(dev_id, &stats); 334 if (stats.enqueue_err_count) { 335 res = -1; 336 goto end; 337 } 338 } 339 340 ops_unused = num_ops - num_enq; 341 remaining_ops -= num_enq; 342 total_enq_ops += num_enq; 343 344 tsc_start = rte_rdtsc_precise(); 345 num_deq = rte_compressdev_dequeue_burst(dev_id, 346 mem->qp_id, 347 deq_ops, 348 allocated); 349 tsc_end = rte_rdtsc_precise(); 350 tsc_duration = tsc_end - tsc_start; 351 ctx->duration_deq += tsc_duration; 352 353 if (num_deq < allocated) 354 ctx->ops_deq_retries++; 355 356 total_deq_ops += num_deq; 357 358 if (iter == num_iter - 1) { 359 for (i = 0; i < num_deq; i++) { 360 struct rte_comp_op *op = deq_ops[i]; 361 362 if (op->status != 363 RTE_COMP_OP_STATUS_SUCCESS) { 364 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 365 goto end; 366 } 367 368 struct rte_mbuf *m = op->m_dst; 369 370 m->pkt_len = op->produced; 371 uint32_t remaining_data = op->produced; 372 uint16_t data_to_append; 373 374 while (remaining_data > 0) { 375 data_to_append = 376 RTE_MIN(remaining_data, 377 out_seg_sz); 378 m->data_len = data_to_append; 379 remaining_data -= 380 data_to_append; 381 m = m->next; 382 } 383 } 384 } 385 rte_mempool_put_bulk(mem->op_pool, 386 (void **)deq_ops, num_deq); 387 allocated -= num_deq; 388 } 389 390 /* Dequeue the last operations */ 391 while (total_deq_ops < total_ops) { 392 if (unlikely(test_data->perf_comp_force_stop)) 393 goto end; 394 395 tsc_start = rte_rdtsc_precise(); 396 num_deq = rte_compressdev_dequeue_burst(dev_id, 397 mem->qp_id, 398 deq_ops, 399 test_data->burst_sz); 400 tsc_end = rte_rdtsc_precise(); 401 tsc_duration = tsc_end - tsc_start; 402 ctx->duration_deq += tsc_duration; 403 ctx->ops_deq_retries++; 404 405 if (num_deq == 0) { 406 struct rte_compressdev_stats stats; 407 408 rte_compressdev_stats_get(dev_id, &stats); 409 if (stats.dequeue_err_count) { 410 res = -1; 411 goto end; 412 } 413 } 414 total_deq_ops += num_deq; 415 416 if (iter == num_iter - 1) { 417 for (i = 0; i < num_deq; i++) { 418 struct rte_comp_op *op = deq_ops[i]; 419 420 if (op->status != 421 RTE_COMP_OP_STATUS_SUCCESS) { 422 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 423 goto end; 424 } 425 426 struct rte_mbuf *m = op->m_dst; 427 428 m->pkt_len = op->produced; 429 uint32_t remaining_data = op->produced; 430 uint16_t data_to_append; 431 432 while (remaining_data > 0) { 433 data_to_append = 434 RTE_MIN(remaining_data, 435 out_seg_sz); 436 m->data_len = data_to_append; 437 remaining_data -= 438 data_to_append; 439 m = m->next; 440 } 441 } 442 } 443 rte_mempool_put_bulk(mem->op_pool, 444 (void **)deq_ops, num_deq); 445 allocated -= num_deq; 446 } 447 } 448 allocated = 0; 449 450 end: 451 if (allocated) 452 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 453 rte_compressdev_private_xform_free(dev_id, priv_xform); 454 rte_free(ops); 455 456 if (test_data->perf_comp_force_stop) { 457 RTE_LOG(ERR, USER1, 458 "lcore: %d Perf. test has been aborted by user\n", 459 mem->lcore_id); 460 res = -1; 461 } 462 return res; 463 } 464 465 int 466 cperf_cyclecount_test_runner(void *test_ctx) 467 { 468 struct cperf_cyclecount_ctx *ctx = test_ctx; 469 struct comp_test_data *test_data = ctx->ver.options; 470 uint32_t lcore = rte_lcore_id(); 471 static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0); 472 static rte_spinlock_t print_spinlock; 473 int i; 474 475 uint32_t ops_enq_retries_comp; 476 uint32_t ops_deq_retries_comp; 477 478 uint32_t ops_enq_retries_decomp; 479 uint32_t ops_deq_retries_decomp; 480 481 uint32_t duration_setup_per_op; 482 483 uint32_t duration_enq_per_op_comp; 484 uint32_t duration_deq_per_op_comp; 485 486 uint32_t duration_enq_per_op_decomp; 487 uint32_t duration_deq_per_op_decomp; 488 489 ctx->ver.mem.lcore_id = lcore; 490 491 /* 492 * printing information about current compression thread 493 */ 494 if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once)) 495 printf(" lcore: %u," 496 " driver name: %s," 497 " device name: %s," 498 " device id: %u," 499 " socket id: %u," 500 " queue pair id: %u\n", 501 lcore, 502 ctx->ver.options->driver_name, 503 rte_compressdev_name_get(ctx->ver.mem.dev_id), 504 ctx->ver.mem.dev_id, 505 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 506 ctx->ver.mem.qp_id); 507 508 /* 509 * First the verification part is needed 510 */ 511 if (cperf_verify_test_runner(&ctx->ver)) 512 return EXIT_FAILURE; 513 514 /* 515 * Run the tests twice, discarding the first performance 516 * results, before the cache is warmed up 517 */ 518 519 /* C O M P R E S S */ 520 for (i = 0; i < 2; i++) { 521 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) 522 return EXIT_FAILURE; 523 } 524 525 ops_enq_retries_comp = ctx->ops_enq_retries; 526 ops_deq_retries_comp = ctx->ops_deq_retries; 527 528 duration_enq_per_op_comp = ctx->duration_enq / 529 (ctx->ver.mem.total_bufs * test_data->num_iter); 530 duration_deq_per_op_comp = ctx->duration_deq / 531 (ctx->ver.mem.total_bufs * test_data->num_iter); 532 533 /* D E C O M P R E S S */ 534 for (i = 0; i < 2; i++) { 535 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) 536 return EXIT_FAILURE; 537 } 538 539 ops_enq_retries_decomp = ctx->ops_enq_retries; 540 ops_deq_retries_decomp = ctx->ops_deq_retries; 541 542 duration_enq_per_op_decomp = ctx->duration_enq / 543 (ctx->ver.mem.total_bufs * test_data->num_iter); 544 duration_deq_per_op_decomp = ctx->duration_deq / 545 (ctx->ver.mem.total_bufs * test_data->num_iter); 546 547 duration_setup_per_op = ctx->duration_op / 548 (ctx->ver.mem.total_bufs * test_data->num_iter); 549 550 /* R E P O R T processing */ 551 if (rte_atomic16_test_and_set(&display_once)) { 552 553 rte_spinlock_lock(&print_spinlock); 554 555 printf("\nLegend for the table\n" 556 " - Retries section: number of retries for the following operations:\n" 557 " [C-e] - compression enqueue\n" 558 " [C-d] - compression dequeue\n" 559 " [D-e] - decompression enqueue\n" 560 " [D-d] - decompression dequeue\n" 561 " - Cycles section: number of cycles per 'op' for the following operations:\n" 562 " setup/op - memory allocation, op configuration and memory dealocation\n" 563 " [C-e] - compression enqueue\n" 564 " [C-d] - compression dequeue\n" 565 " [D-e] - decompression enqueue\n" 566 " [D-d] - decompression dequeue\n\n"); 567 568 printf("\n%12s%6s%12s%17s", 569 "lcore id", "Level", "Comp size", "Comp ratio [%]"); 570 571 printf(" |%10s %6s %8s %6s %8s", 572 " Retries:", 573 "[C-e]", "[C-d]", 574 "[D-e]", "[D-d]"); 575 576 printf(" |%9s %9s %9s %9s %9s %9s\n", 577 " Cycles:", 578 "setup/op", 579 "[C-e]", "[C-d]", 580 "[D-e]", "[D-d]"); 581 582 rte_spinlock_unlock(&print_spinlock); 583 } 584 585 rte_spinlock_lock(&print_spinlock); 586 587 printf("%12u" 588 "%6u" 589 "%12zu" 590 "%17.2f", 591 ctx->ver.mem.lcore_id, 592 test_data->level, 593 ctx->ver.comp_data_sz, 594 ctx->ver.ratio); 595 596 printf(" |%10s %6u %8u %6u %8u", 597 " ", 598 ops_enq_retries_comp, 599 ops_deq_retries_comp, 600 ops_enq_retries_decomp, 601 ops_deq_retries_decomp); 602 603 printf(" |%9s %9u %9u %9u %9u %9u\n", 604 " ", 605 duration_setup_per_op, 606 duration_enq_per_op_comp, 607 duration_deq_per_op_comp, 608 duration_enq_per_op_decomp, 609 duration_deq_per_op_decomp); 610 611 rte_spinlock_unlock(&print_spinlock); 612 613 return EXIT_SUCCESS; 614 } 615