1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_eal.h> 7 #include <rte_log.h> 8 #include <rte_cycles.h> 9 #include "rte_spinlock.h" 10 #include <rte_compressdev.h> 11 12 #include "comp_perf_test_cyclecount.h" 13 14 struct cperf_cyclecount_ctx { 15 struct cperf_verify_ctx ver; 16 17 uint32_t ops_enq_retries; 18 uint32_t ops_deq_retries; 19 20 uint64_t duration_op; 21 uint64_t duration_enq; 22 uint64_t duration_deq; 23 }; 24 25 void 26 cperf_cyclecount_test_destructor(void *arg) 27 { 28 struct cperf_cyclecount_ctx *ctx = arg; 29 30 if (arg) { 31 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); 32 rte_free(arg); 33 } 34 } 35 36 void * 37 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, 38 struct comp_test_data *options) 39 { 40 struct cperf_cyclecount_ctx *ctx = NULL; 41 42 ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); 43 44 if (ctx == NULL) 45 return NULL; 46 47 ctx->ver.mem.dev_id = dev_id; 48 ctx->ver.mem.qp_id = qp_id; 49 ctx->ver.options = options; 50 ctx->ver.silent = 1; /* ver. part will be silent */ 51 52 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 53 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 54 return ctx; 55 56 cperf_cyclecount_test_destructor(ctx); 57 return NULL; 58 } 59 60 static int 61 cperf_cyclecount_op_setup(struct rte_comp_op **ops, 62 struct cperf_cyclecount_ctx *ctx, 63 struct rte_mbuf **input_bufs, 64 struct rte_mbuf **output_bufs, 65 void *priv_xform, 66 uint32_t out_seg_sz) 67 { 68 struct comp_test_data *test_data = ctx->ver.options; 69 struct cperf_mem_resources *mem = &ctx->ver.mem; 70 71 uint32_t i, iter, num_iter; 72 int res = 0; 73 uint16_t ops_needed; 74 75 num_iter = test_data->num_iter; 76 77 for (iter = 0; iter < num_iter; iter++) { 78 uint32_t remaining_ops = mem->total_bufs; 79 uint32_t total_enq_ops = 0; 80 uint16_t num_enq = 0; 81 uint16_t num_deq = 0; 82 83 while (remaining_ops > 0) { 84 uint16_t num_ops = RTE_MIN(remaining_ops, 85 test_data->burst_sz); 86 ops_needed = num_ops; 87 88 /* Allocate compression operations */ 89 if (ops_needed && rte_mempool_get_bulk( 90 mem->op_pool, 91 (void **)ops, 92 ops_needed) != 0) { 93 RTE_LOG(ERR, USER1, 94 "Cyclecount: could not allocate enough operations\n"); 95 res = -1; 96 goto end; 97 } 98 99 for (i = 0; i < ops_needed; i++) { 100 101 /* Calculate next buffer to attach */ 102 /* to operation */ 103 uint32_t buf_id = total_enq_ops + i; 104 uint16_t op_id = i; 105 106 /* Reset all data in output buffers */ 107 struct rte_mbuf *m = output_bufs[buf_id]; 108 109 m->pkt_len = out_seg_sz * m->nb_segs; 110 while (m) { 111 m->data_len = m->buf_len - m->data_off; 112 m = m->next; 113 } 114 ops[op_id]->m_src = input_bufs[buf_id]; 115 ops[op_id]->m_dst = output_bufs[buf_id]; 116 ops[op_id]->src.offset = 0; 117 ops[op_id]->src.length = 118 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 119 ops[op_id]->dst.offset = 0; 120 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 121 ops[op_id]->input_chksum = buf_id; 122 ops[op_id]->private_xform = priv_xform; 123 } 124 125 /* E N Q U E U I N G */ 126 /* assuming that all ops are enqueued */ 127 /* instead of the real enqueue operation */ 128 num_enq = num_ops; 129 130 remaining_ops -= num_enq; 131 total_enq_ops += num_enq; 132 133 /* D E Q U E U I N G */ 134 /* assuming that all ops dequeued */ 135 /* instead of the real dequeue operation */ 136 num_deq = num_ops; 137 138 rte_mempool_put_bulk(mem->op_pool, 139 (void **)ops, num_deq); 140 } 141 } 142 return res; 143 end: 144 rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); 145 rte_free(ops); 146 147 return res; 148 } 149 150 static int 151 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) 152 { 153 struct comp_test_data *test_data = ctx->ver.options; 154 struct cperf_mem_resources *mem = &ctx->ver.mem; 155 uint8_t dev_id = mem->dev_id; 156 uint32_t i, iter, num_iter; 157 struct rte_comp_op **ops, **deq_ops; 158 void *priv_xform = NULL; 159 struct rte_comp_xform xform; 160 struct rte_mbuf **input_bufs, **output_bufs; 161 int ret, res = 0; 162 int allocated = 0; 163 uint32_t out_seg_sz; 164 165 uint64_t tsc_start, tsc_end, tsc_duration; 166 167 if (test_data == NULL || !test_data->burst_sz) { 168 RTE_LOG(ERR, USER1, "Unknown burst size\n"); 169 return -1; 170 } 171 ctx->duration_enq = 0; 172 ctx->duration_deq = 0; 173 ctx->ops_enq_retries = 0; 174 ctx->ops_deq_retries = 0; 175 176 /* one array for both enqueue and dequeue */ 177 ops = rte_zmalloc_socket(NULL, 178 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 179 0, rte_socket_id()); 180 181 if (ops == NULL) { 182 RTE_LOG(ERR, USER1, 183 "Can't allocate memory for ops strucures\n"); 184 return -1; 185 } 186 187 deq_ops = &ops[mem->total_bufs]; 188 189 if (type == RTE_COMP_COMPRESS) { 190 xform = (struct rte_comp_xform) { 191 .type = RTE_COMP_COMPRESS, 192 .compress = { 193 .algo = RTE_COMP_ALGO_DEFLATE, 194 .deflate.huffman = test_data->huffman_enc, 195 .level = test_data->level, 196 .window_size = test_data->window_sz, 197 .chksum = RTE_COMP_CHECKSUM_NONE, 198 .hash_algo = RTE_COMP_HASH_ALGO_NONE 199 } 200 }; 201 input_bufs = mem->decomp_bufs; 202 output_bufs = mem->comp_bufs; 203 out_seg_sz = test_data->out_seg_sz; 204 } else { 205 xform = (struct rte_comp_xform) { 206 .type = RTE_COMP_DECOMPRESS, 207 .decompress = { 208 .algo = RTE_COMP_ALGO_DEFLATE, 209 .chksum = RTE_COMP_CHECKSUM_NONE, 210 .window_size = test_data->window_sz, 211 .hash_algo = RTE_COMP_HASH_ALGO_NONE 212 } 213 }; 214 input_bufs = mem->comp_bufs; 215 output_bufs = mem->decomp_bufs; 216 out_seg_sz = test_data->seg_sz; 217 } 218 219 /* Create private xform */ 220 if (rte_compressdev_private_xform_create(dev_id, &xform, 221 &priv_xform) < 0) { 222 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 223 res = -1; 224 goto end; 225 } 226 227 tsc_start = rte_rdtsc_precise(); 228 ret = cperf_cyclecount_op_setup(ops, 229 ctx, 230 input_bufs, 231 output_bufs, 232 priv_xform, 233 out_seg_sz); 234 235 tsc_end = rte_rdtsc_precise(); 236 237 /* ret value check postponed a bit to cancel extra 'if' bias */ 238 if (ret < 0) { 239 RTE_LOG(ERR, USER1, "Setup function failed\n"); 240 res = -1; 241 goto end; 242 } 243 244 tsc_duration = tsc_end - tsc_start; 245 ctx->duration_op = tsc_duration; 246 247 num_iter = test_data->num_iter; 248 for (iter = 0; iter < num_iter; iter++) { 249 uint32_t total_ops = mem->total_bufs; 250 uint32_t remaining_ops = mem->total_bufs; 251 uint32_t total_deq_ops = 0; 252 uint32_t total_enq_ops = 0; 253 uint16_t ops_unused = 0; 254 uint16_t num_enq = 0; 255 uint16_t num_deq = 0; 256 257 while (remaining_ops > 0) { 258 uint16_t num_ops = RTE_MIN(remaining_ops, 259 test_data->burst_sz); 260 uint16_t ops_needed = num_ops - ops_unused; 261 262 /* 263 * Move the unused operations from the previous 264 * enqueue_burst call to the front, to maintain order 265 */ 266 if ((ops_unused > 0) && (num_enq > 0)) { 267 size_t nb_b_to_mov = 268 ops_unused * sizeof(struct rte_comp_op *); 269 270 memmove(ops, &ops[num_enq], nb_b_to_mov); 271 } 272 273 /* Allocate compression operations */ 274 if (ops_needed && rte_mempool_get_bulk( 275 mem->op_pool, 276 (void **)ops, 277 ops_needed) != 0) { 278 RTE_LOG(ERR, USER1, 279 "Could not allocate enough operations\n"); 280 res = -1; 281 goto end; 282 } 283 allocated += ops_needed; 284 285 for (i = 0; i < ops_needed; i++) { 286 /* 287 * Calculate next buffer to attach to operation 288 */ 289 uint32_t buf_id = total_enq_ops + i + 290 ops_unused; 291 uint16_t op_id = ops_unused + i; 292 /* Reset all data in output buffers */ 293 struct rte_mbuf *m = output_bufs[buf_id]; 294 295 m->pkt_len = out_seg_sz * m->nb_segs; 296 while (m) { 297 m->data_len = m->buf_len - m->data_off; 298 m = m->next; 299 } 300 ops[op_id]->m_src = input_bufs[buf_id]; 301 ops[op_id]->m_dst = output_bufs[buf_id]; 302 ops[op_id]->src.offset = 0; 303 ops[op_id]->src.length = 304 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 305 ops[op_id]->dst.offset = 0; 306 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 307 ops[op_id]->input_chksum = buf_id; 308 ops[op_id]->private_xform = priv_xform; 309 } 310 311 if (unlikely(test_data->perf_comp_force_stop)) 312 goto end; 313 314 tsc_start = rte_rdtsc_precise(); 315 num_enq = rte_compressdev_enqueue_burst(dev_id, 316 mem->qp_id, ops, 317 num_ops); 318 tsc_end = rte_rdtsc_precise(); 319 tsc_duration = tsc_end - tsc_start; 320 ctx->duration_enq += tsc_duration; 321 322 if (num_enq < num_ops) 323 ctx->ops_enq_retries++; 324 325 if (test_data->cyclecount_delay) 326 rte_delay_us_block(test_data->cyclecount_delay); 327 328 if (num_enq == 0) { 329 struct rte_compressdev_stats stats; 330 331 rte_compressdev_stats_get(dev_id, &stats); 332 if (stats.enqueue_err_count) { 333 res = -1; 334 goto end; 335 } 336 } 337 338 ops_unused = num_ops - num_enq; 339 remaining_ops -= num_enq; 340 total_enq_ops += num_enq; 341 342 tsc_start = rte_rdtsc_precise(); 343 num_deq = rte_compressdev_dequeue_burst(dev_id, 344 mem->qp_id, 345 deq_ops, 346 allocated); 347 tsc_end = rte_rdtsc_precise(); 348 tsc_duration = tsc_end - tsc_start; 349 ctx->duration_deq += tsc_duration; 350 351 if (num_deq < allocated) 352 ctx->ops_deq_retries++; 353 354 total_deq_ops += num_deq; 355 356 if (iter == num_iter - 1) { 357 for (i = 0; i < num_deq; i++) { 358 struct rte_comp_op *op = deq_ops[i]; 359 360 if (op->status != 361 RTE_COMP_OP_STATUS_SUCCESS) { 362 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 363 goto end; 364 } 365 366 struct rte_mbuf *m = op->m_dst; 367 368 m->pkt_len = op->produced; 369 uint32_t remaining_data = op->produced; 370 uint16_t data_to_append; 371 372 while (remaining_data > 0) { 373 data_to_append = 374 RTE_MIN(remaining_data, 375 out_seg_sz); 376 m->data_len = data_to_append; 377 remaining_data -= 378 data_to_append; 379 m = m->next; 380 } 381 } 382 } 383 rte_mempool_put_bulk(mem->op_pool, 384 (void **)deq_ops, num_deq); 385 allocated -= num_deq; 386 } 387 388 /* Dequeue the last operations */ 389 while (total_deq_ops < total_ops) { 390 if (unlikely(test_data->perf_comp_force_stop)) 391 goto end; 392 393 tsc_start = rte_rdtsc_precise(); 394 num_deq = rte_compressdev_dequeue_burst(dev_id, 395 mem->qp_id, 396 deq_ops, 397 test_data->burst_sz); 398 tsc_end = rte_rdtsc_precise(); 399 tsc_duration = tsc_end - tsc_start; 400 ctx->duration_deq += tsc_duration; 401 ctx->ops_deq_retries++; 402 403 if (num_deq == 0) { 404 struct rte_compressdev_stats stats; 405 406 rte_compressdev_stats_get(dev_id, &stats); 407 if (stats.dequeue_err_count) { 408 res = -1; 409 goto end; 410 } 411 } 412 total_deq_ops += num_deq; 413 414 if (iter == num_iter - 1) { 415 for (i = 0; i < num_deq; i++) { 416 struct rte_comp_op *op = deq_ops[i]; 417 418 if (op->status != 419 RTE_COMP_OP_STATUS_SUCCESS) { 420 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 421 goto end; 422 } 423 424 struct rte_mbuf *m = op->m_dst; 425 426 m->pkt_len = op->produced; 427 uint32_t remaining_data = op->produced; 428 uint16_t data_to_append; 429 430 while (remaining_data > 0) { 431 data_to_append = 432 RTE_MIN(remaining_data, 433 out_seg_sz); 434 m->data_len = data_to_append; 435 remaining_data -= 436 data_to_append; 437 m = m->next; 438 } 439 } 440 } 441 rte_mempool_put_bulk(mem->op_pool, 442 (void **)deq_ops, num_deq); 443 allocated -= num_deq; 444 } 445 } 446 allocated = 0; 447 448 end: 449 if (allocated) 450 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 451 rte_compressdev_private_xform_free(dev_id, priv_xform); 452 rte_free(ops); 453 454 if (test_data->perf_comp_force_stop) { 455 RTE_LOG(ERR, USER1, 456 "lcore: %d Perf. test has been aborted by user\n", 457 mem->lcore_id); 458 res = -1; 459 } 460 return res; 461 } 462 463 int 464 cperf_cyclecount_test_runner(void *test_ctx) 465 { 466 struct cperf_cyclecount_ctx *ctx = test_ctx; 467 struct comp_test_data *test_data = ctx->ver.options; 468 uint32_t lcore = rte_lcore_id(); 469 static uint16_t display_once; 470 static rte_spinlock_t print_spinlock; 471 int i; 472 473 uint32_t ops_enq_retries_comp; 474 uint32_t ops_deq_retries_comp; 475 476 uint32_t ops_enq_retries_decomp; 477 uint32_t ops_deq_retries_decomp; 478 479 uint32_t duration_setup_per_op; 480 481 uint32_t duration_enq_per_op_comp; 482 uint32_t duration_deq_per_op_comp; 483 484 uint32_t duration_enq_per_op_decomp; 485 uint32_t duration_deq_per_op_decomp; 486 487 ctx->ver.mem.lcore_id = lcore; 488 489 uint16_t exp = 0; 490 /* 491 * printing information about current compression thread 492 */ 493 if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp, 494 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 495 printf(" lcore: %u," 496 " driver name: %s," 497 " device name: %s," 498 " device id: %u," 499 " socket id: %u," 500 " queue pair id: %u\n", 501 lcore, 502 ctx->ver.options->driver_name, 503 rte_compressdev_name_get(ctx->ver.mem.dev_id), 504 ctx->ver.mem.dev_id, 505 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 506 ctx->ver.mem.qp_id); 507 508 /* 509 * First the verification part is needed 510 */ 511 if (cperf_verify_test_runner(&ctx->ver)) 512 return EXIT_FAILURE; 513 514 /* 515 * Run the tests twice, discarding the first performance 516 * results, before the cache is warmed up 517 */ 518 519 /* C O M P R E S S */ 520 for (i = 0; i < 2; i++) { 521 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) 522 return EXIT_FAILURE; 523 } 524 525 ops_enq_retries_comp = ctx->ops_enq_retries; 526 ops_deq_retries_comp = ctx->ops_deq_retries; 527 528 duration_enq_per_op_comp = ctx->duration_enq / 529 (ctx->ver.mem.total_bufs * test_data->num_iter); 530 duration_deq_per_op_comp = ctx->duration_deq / 531 (ctx->ver.mem.total_bufs * test_data->num_iter); 532 533 /* D E C O M P R E S S */ 534 for (i = 0; i < 2; i++) { 535 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) 536 return EXIT_FAILURE; 537 } 538 539 ops_enq_retries_decomp = ctx->ops_enq_retries; 540 ops_deq_retries_decomp = ctx->ops_deq_retries; 541 542 duration_enq_per_op_decomp = ctx->duration_enq / 543 (ctx->ver.mem.total_bufs * test_data->num_iter); 544 duration_deq_per_op_decomp = ctx->duration_deq / 545 (ctx->ver.mem.total_bufs * test_data->num_iter); 546 547 duration_setup_per_op = ctx->duration_op / 548 (ctx->ver.mem.total_bufs * test_data->num_iter); 549 550 /* R E P O R T processing */ 551 rte_spinlock_lock(&print_spinlock); 552 553 if (display_once == 0) { 554 display_once = 1; 555 556 printf("\nLegend for the table\n" 557 " - Retries section: number of retries for the following operations:\n" 558 " [C-e] - compression enqueue\n" 559 " [C-d] - compression dequeue\n" 560 " [D-e] - decompression enqueue\n" 561 " [D-d] - decompression dequeue\n" 562 " - Cycles section: number of cycles per 'op' for the following operations:\n" 563 " setup/op - memory allocation, op configuration and memory dealocation\n" 564 " [C-e] - compression enqueue\n" 565 " [C-d] - compression dequeue\n" 566 " [D-e] - decompression enqueue\n" 567 " [D-d] - decompression dequeue\n\n"); 568 569 printf("\n%12s%6s%12s%17s", 570 "lcore id", "Level", "Comp size", "Comp ratio [%]"); 571 572 printf(" |%10s %6s %8s %6s %8s", 573 " Retries:", 574 "[C-e]", "[C-d]", 575 "[D-e]", "[D-d]"); 576 577 printf(" |%9s %9s %9s %9s %9s %9s\n", 578 " Cycles:", 579 "setup/op", 580 "[C-e]", "[C-d]", 581 "[D-e]", "[D-d]"); 582 } 583 584 printf("%12u" 585 "%6u" 586 "%12zu" 587 "%17.2f", 588 ctx->ver.mem.lcore_id, 589 test_data->level, 590 ctx->ver.comp_data_sz, 591 ctx->ver.ratio); 592 593 printf(" |%10s %6u %8u %6u %8u", 594 " ", 595 ops_enq_retries_comp, 596 ops_deq_retries_comp, 597 ops_enq_retries_decomp, 598 ops_deq_retries_decomp); 599 600 printf(" |%9s %9u %9u %9u %9u %9u\n", 601 " ", 602 duration_setup_per_op, 603 duration_enq_per_op_comp, 604 duration_deq_per_op_comp, 605 duration_enq_per_op_decomp, 606 duration_deq_per_op_decomp); 607 608 rte_spinlock_unlock(&print_spinlock); 609 610 return EXIT_SUCCESS; 611 } 612