1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 7 #include <rte_malloc.h> 8 #include <rte_eal.h> 9 #include <rte_log.h> 10 #include <rte_cycles.h> 11 #include "rte_spinlock.h" 12 #include <rte_compressdev.h> 13 14 #include "comp_perf_test_cyclecount.h" 15 16 struct cperf_cyclecount_ctx { 17 struct cperf_verify_ctx ver; 18 19 uint32_t ops_enq_retries; 20 uint32_t ops_deq_retries; 21 22 uint64_t duration_op; 23 uint64_t duration_enq; 24 uint64_t duration_deq; 25 }; 26 27 void 28 cperf_cyclecount_test_destructor(void *arg) 29 { 30 struct cperf_cyclecount_ctx *ctx = arg; 31 32 if (arg) { 33 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); 34 rte_free(arg); 35 } 36 } 37 38 void * 39 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, 40 struct comp_test_data *options) 41 { 42 struct cperf_cyclecount_ctx *ctx = NULL; 43 44 ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); 45 46 if (ctx == NULL) 47 return NULL; 48 49 ctx->ver.mem.dev_id = dev_id; 50 ctx->ver.mem.qp_id = qp_id; 51 ctx->ver.options = options; 52 ctx->ver.silent = 1; /* ver. part will be silent */ 53 54 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 55 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 56 return ctx; 57 58 cperf_cyclecount_test_destructor(ctx); 59 return NULL; 60 } 61 62 static int 63 cperf_cyclecount_op_setup(struct rte_comp_op **ops, 64 struct cperf_cyclecount_ctx *ctx, 65 struct rte_mbuf **input_bufs, 66 struct rte_mbuf **output_bufs, 67 void *priv_xform, 68 uint32_t out_seg_sz) 69 { 70 struct comp_test_data *test_data = ctx->ver.options; 71 struct cperf_mem_resources *mem = &ctx->ver.mem; 72 73 uint32_t i, iter, num_iter; 74 int res = 0; 75 uint16_t ops_needed; 76 77 num_iter = test_data->num_iter; 78 79 for (iter = 0; iter < num_iter; iter++) { 80 uint32_t remaining_ops = mem->total_bufs; 81 uint32_t total_enq_ops = 0; 82 uint16_t num_enq = 0; 83 uint16_t num_deq = 0; 84 85 while (remaining_ops > 0) { 86 uint16_t num_ops = RTE_MIN(remaining_ops, 87 test_data->burst_sz); 88 ops_needed = num_ops; 89 90 /* Allocate compression operations */ 91 if (ops_needed && rte_mempool_get_bulk( 92 mem->op_pool, 93 (void **)ops, 94 ops_needed) != 0) { 95 RTE_LOG(ERR, USER1, 96 "Cyclecount: could not allocate enough operations\n"); 97 res = -1; 98 goto end; 99 } 100 101 for (i = 0; i < ops_needed; i++) { 102 103 /* Calculate next buffer to attach */ 104 /* to operation */ 105 uint32_t buf_id = total_enq_ops + i; 106 uint16_t op_id = i; 107 108 /* Reset all data in output buffers */ 109 struct rte_mbuf *m = output_bufs[buf_id]; 110 111 m->pkt_len = out_seg_sz * m->nb_segs; 112 while (m) { 113 m->data_len = m->buf_len - m->data_off; 114 m = m->next; 115 } 116 ops[op_id]->m_src = input_bufs[buf_id]; 117 ops[op_id]->m_dst = output_bufs[buf_id]; 118 ops[op_id]->src.offset = 0; 119 ops[op_id]->src.length = 120 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 121 ops[op_id]->dst.offset = 0; 122 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 123 ops[op_id]->input_chksum = buf_id; 124 ops[op_id]->private_xform = priv_xform; 125 } 126 127 /* E N Q U E U I N G */ 128 /* assuming that all ops are enqueued */ 129 /* instead of the real enqueue operation */ 130 num_enq = num_ops; 131 132 remaining_ops -= num_enq; 133 total_enq_ops += num_enq; 134 135 /* D E Q U E U I N G */ 136 /* assuming that all ops dequeued */ 137 /* instead of the real dequeue operation */ 138 num_deq = num_ops; 139 140 rte_mempool_put_bulk(mem->op_pool, 141 (void **)ops, num_deq); 142 } 143 } 144 return res; 145 end: 146 rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); 147 rte_free(ops); 148 149 return res; 150 } 151 152 static int 153 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) 154 { 155 struct comp_test_data *test_data = ctx->ver.options; 156 struct cperf_mem_resources *mem = &ctx->ver.mem; 157 uint8_t dev_id = mem->dev_id; 158 uint32_t i, iter, num_iter; 159 struct rte_comp_op **ops, **deq_ops; 160 void *priv_xform = NULL; 161 struct rte_comp_xform xform; 162 struct rte_mbuf **input_bufs, **output_bufs; 163 int ret, res = 0; 164 int allocated = 0; 165 uint32_t out_seg_sz; 166 167 uint64_t tsc_start, tsc_end, tsc_duration; 168 169 if (test_data == NULL || !test_data->burst_sz) { 170 RTE_LOG(ERR, USER1, "Unknown burst size\n"); 171 return -1; 172 } 173 ctx->duration_enq = 0; 174 ctx->duration_deq = 0; 175 ctx->ops_enq_retries = 0; 176 ctx->ops_deq_retries = 0; 177 178 /* one array for both enqueue and dequeue */ 179 ops = rte_zmalloc_socket(NULL, 180 (test_data->burst_sz + mem->total_bufs) * 181 sizeof(struct rte_comp_op *), 182 0, rte_socket_id()); 183 184 if (ops == NULL) { 185 RTE_LOG(ERR, USER1, 186 "Can't allocate memory for ops structures\n"); 187 return -1; 188 } 189 190 deq_ops = &ops[test_data->burst_sz]; 191 192 if (type == RTE_COMP_COMPRESS) { 193 xform = (struct rte_comp_xform) { 194 .type = RTE_COMP_COMPRESS, 195 .compress = { 196 .algo = RTE_COMP_ALGO_DEFLATE, 197 .deflate.huffman = test_data->huffman_enc, 198 .level = test_data->level, 199 .window_size = test_data->window_sz, 200 .chksum = RTE_COMP_CHECKSUM_NONE, 201 .hash_algo = RTE_COMP_HASH_ALGO_NONE 202 } 203 }; 204 input_bufs = mem->decomp_bufs; 205 output_bufs = mem->comp_bufs; 206 out_seg_sz = test_data->out_seg_sz; 207 } else { 208 xform = (struct rte_comp_xform) { 209 .type = RTE_COMP_DECOMPRESS, 210 .decompress = { 211 .algo = RTE_COMP_ALGO_DEFLATE, 212 .chksum = RTE_COMP_CHECKSUM_NONE, 213 .window_size = test_data->window_sz, 214 .hash_algo = RTE_COMP_HASH_ALGO_NONE 215 } 216 }; 217 input_bufs = mem->comp_bufs; 218 output_bufs = mem->decomp_bufs; 219 out_seg_sz = test_data->seg_sz; 220 } 221 222 /* Create private xform */ 223 if (rte_compressdev_private_xform_create(dev_id, &xform, 224 &priv_xform) < 0) { 225 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 226 res = -1; 227 goto end; 228 } 229 230 tsc_start = rte_rdtsc_precise(); 231 ret = cperf_cyclecount_op_setup(ops, 232 ctx, 233 input_bufs, 234 output_bufs, 235 priv_xform, 236 out_seg_sz); 237 238 tsc_end = rte_rdtsc_precise(); 239 240 /* ret value check postponed a bit to cancel extra 'if' bias */ 241 if (ret < 0) { 242 RTE_LOG(ERR, USER1, "Setup function failed\n"); 243 res = -1; 244 goto end; 245 } 246 247 tsc_duration = tsc_end - tsc_start; 248 ctx->duration_op = tsc_duration; 249 250 num_iter = test_data->num_iter; 251 for (iter = 0; iter < num_iter; iter++) { 252 uint32_t total_ops = mem->total_bufs; 253 uint32_t remaining_ops = mem->total_bufs; 254 uint32_t total_deq_ops = 0; 255 uint32_t total_enq_ops = 0; 256 uint16_t ops_unused = 0; 257 uint16_t num_enq = 0; 258 uint16_t num_deq = 0; 259 260 while (remaining_ops > 0) { 261 uint16_t num_ops = RTE_MIN(remaining_ops, 262 test_data->burst_sz); 263 uint16_t ops_needed = num_ops - ops_unused; 264 265 /* 266 * Move the unused operations from the previous 267 * enqueue_burst call to the front, to maintain order 268 */ 269 if ((ops_unused > 0) && (num_enq > 0)) { 270 size_t nb_b_to_mov = 271 ops_unused * sizeof(struct rte_comp_op *); 272 273 memmove(ops, &ops[num_enq], nb_b_to_mov); 274 } 275 276 /* Allocate compression operations */ 277 if (ops_needed && rte_mempool_get_bulk( 278 mem->op_pool, 279 (void **)&ops[ops_unused], 280 ops_needed) != 0) { 281 RTE_LOG(ERR, USER1, 282 "Could not allocate enough operations\n"); 283 res = -1; 284 goto end; 285 } 286 allocated += ops_needed; 287 288 for (i = 0; i < ops_needed; i++) { 289 /* 290 * Calculate next buffer to attach to operation 291 */ 292 uint32_t buf_id = total_enq_ops + i + 293 ops_unused; 294 uint16_t op_id = ops_unused + i; 295 /* Reset all data in output buffers */ 296 struct rte_mbuf *m = output_bufs[buf_id]; 297 298 m->pkt_len = out_seg_sz * m->nb_segs; 299 while (m) { 300 m->data_len = m->buf_len - m->data_off; 301 m = m->next; 302 } 303 ops[op_id]->m_src = input_bufs[buf_id]; 304 ops[op_id]->m_dst = output_bufs[buf_id]; 305 ops[op_id]->src.offset = 0; 306 ops[op_id]->src.length = 307 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 308 ops[op_id]->dst.offset = 0; 309 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 310 ops[op_id]->input_chksum = buf_id; 311 ops[op_id]->private_xform = priv_xform; 312 } 313 314 if (unlikely(test_data->perf_comp_force_stop)) 315 goto end; 316 317 tsc_start = rte_rdtsc_precise(); 318 num_enq = rte_compressdev_enqueue_burst(dev_id, 319 mem->qp_id, ops, 320 num_ops); 321 tsc_end = rte_rdtsc_precise(); 322 tsc_duration = tsc_end - tsc_start; 323 ctx->duration_enq += tsc_duration; 324 325 if (num_enq < num_ops) 326 ctx->ops_enq_retries++; 327 328 if (test_data->cyclecount_delay) 329 rte_delay_us_block(test_data->cyclecount_delay); 330 331 if (num_enq == 0) { 332 struct rte_compressdev_stats stats; 333 334 rte_compressdev_stats_get(dev_id, &stats); 335 if (stats.enqueue_err_count) { 336 res = -1; 337 goto end; 338 } 339 } 340 341 ops_unused = num_ops - num_enq; 342 remaining_ops -= num_enq; 343 total_enq_ops += num_enq; 344 345 tsc_start = rte_rdtsc_precise(); 346 num_deq = rte_compressdev_dequeue_burst(dev_id, 347 mem->qp_id, 348 deq_ops, 349 allocated); 350 tsc_end = rte_rdtsc_precise(); 351 tsc_duration = tsc_end - tsc_start; 352 ctx->duration_deq += tsc_duration; 353 354 if (num_deq < allocated) 355 ctx->ops_deq_retries++; 356 357 total_deq_ops += num_deq; 358 359 if (iter == num_iter - 1) { 360 for (i = 0; i < num_deq; i++) { 361 struct rte_comp_op *op = deq_ops[i]; 362 363 if (op->status != 364 RTE_COMP_OP_STATUS_SUCCESS) { 365 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 366 goto end; 367 } 368 369 struct rte_mbuf *m = op->m_dst; 370 371 m->pkt_len = op->produced; 372 uint32_t remaining_data = op->produced; 373 uint16_t data_to_append; 374 375 while (remaining_data > 0) { 376 data_to_append = 377 RTE_MIN(remaining_data, 378 out_seg_sz); 379 m->data_len = data_to_append; 380 remaining_data -= 381 data_to_append; 382 m = m->next; 383 } 384 } 385 } 386 rte_mempool_put_bulk(mem->op_pool, 387 (void **)deq_ops, num_deq); 388 allocated -= num_deq; 389 } 390 391 /* Dequeue the last operations */ 392 while (total_deq_ops < total_ops) { 393 if (unlikely(test_data->perf_comp_force_stop)) 394 goto end; 395 396 tsc_start = rte_rdtsc_precise(); 397 num_deq = rte_compressdev_dequeue_burst(dev_id, 398 mem->qp_id, 399 deq_ops, 400 test_data->burst_sz); 401 tsc_end = rte_rdtsc_precise(); 402 tsc_duration = tsc_end - tsc_start; 403 ctx->duration_deq += tsc_duration; 404 ctx->ops_deq_retries++; 405 406 if (num_deq == 0) { 407 struct rte_compressdev_stats stats; 408 409 rte_compressdev_stats_get(dev_id, &stats); 410 if (stats.dequeue_err_count) { 411 res = -1; 412 goto end; 413 } 414 } 415 total_deq_ops += num_deq; 416 417 if (iter == num_iter - 1) { 418 for (i = 0; i < num_deq; i++) { 419 struct rte_comp_op *op = deq_ops[i]; 420 421 if (op->status != 422 RTE_COMP_OP_STATUS_SUCCESS) { 423 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 424 goto end; 425 } 426 427 struct rte_mbuf *m = op->m_dst; 428 429 m->pkt_len = op->produced; 430 uint32_t remaining_data = op->produced; 431 uint16_t data_to_append; 432 433 while (remaining_data > 0) { 434 data_to_append = 435 RTE_MIN(remaining_data, 436 out_seg_sz); 437 m->data_len = data_to_append; 438 remaining_data -= 439 data_to_append; 440 m = m->next; 441 } 442 } 443 } 444 rte_mempool_put_bulk(mem->op_pool, 445 (void **)deq_ops, num_deq); 446 allocated -= num_deq; 447 } 448 } 449 allocated = 0; 450 451 end: 452 if (allocated) 453 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 454 rte_compressdev_private_xform_free(dev_id, priv_xform); 455 rte_free(ops); 456 457 if (test_data->perf_comp_force_stop) { 458 RTE_LOG(ERR, USER1, 459 "lcore: %d Perf. test has been aborted by user\n", 460 mem->lcore_id); 461 res = -1; 462 } 463 return res; 464 } 465 466 int 467 cperf_cyclecount_test_runner(void *test_ctx) 468 { 469 struct cperf_cyclecount_ctx *ctx = test_ctx; 470 struct comp_test_data *test_data = ctx->ver.options; 471 uint32_t lcore = rte_lcore_id(); 472 static uint16_t display_once; 473 static rte_spinlock_t print_spinlock; 474 int i; 475 476 uint32_t ops_enq_retries_comp; 477 uint32_t ops_deq_retries_comp; 478 479 uint32_t ops_enq_retries_decomp; 480 uint32_t ops_deq_retries_decomp; 481 482 uint32_t duration_setup_per_op; 483 484 uint32_t duration_enq_per_op_comp; 485 uint32_t duration_deq_per_op_comp; 486 487 uint32_t duration_enq_per_op_decomp; 488 uint32_t duration_deq_per_op_decomp; 489 490 ctx->ver.mem.lcore_id = lcore; 491 492 uint16_t exp = 0; 493 /* 494 * printing information about current compression thread 495 */ 496 if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp, 497 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 498 printf(" lcore: %u," 499 " driver name: %s," 500 " device name: %s," 501 " device id: %u," 502 " socket id: %u," 503 " queue pair id: %u\n", 504 lcore, 505 ctx->ver.options->driver_name, 506 rte_compressdev_name_get(ctx->ver.mem.dev_id), 507 ctx->ver.mem.dev_id, 508 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 509 ctx->ver.mem.qp_id); 510 511 /* 512 * First the verification part is needed 513 */ 514 if (cperf_verify_test_runner(&ctx->ver)) 515 return EXIT_FAILURE; 516 517 /* 518 * Run the tests twice, discarding the first performance 519 * results, before the cache is warmed up 520 */ 521 522 /* C O M P R E S S */ 523 for (i = 0; i < 2; i++) { 524 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) 525 return EXIT_FAILURE; 526 } 527 528 ops_enq_retries_comp = ctx->ops_enq_retries; 529 ops_deq_retries_comp = ctx->ops_deq_retries; 530 531 duration_enq_per_op_comp = ctx->duration_enq / 532 (ctx->ver.mem.total_bufs * test_data->num_iter); 533 duration_deq_per_op_comp = ctx->duration_deq / 534 (ctx->ver.mem.total_bufs * test_data->num_iter); 535 536 /* D E C O M P R E S S */ 537 for (i = 0; i < 2; i++) { 538 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) 539 return EXIT_FAILURE; 540 } 541 542 ops_enq_retries_decomp = ctx->ops_enq_retries; 543 ops_deq_retries_decomp = ctx->ops_deq_retries; 544 545 duration_enq_per_op_decomp = ctx->duration_enq / 546 (ctx->ver.mem.total_bufs * test_data->num_iter); 547 duration_deq_per_op_decomp = ctx->duration_deq / 548 (ctx->ver.mem.total_bufs * test_data->num_iter); 549 550 duration_setup_per_op = ctx->duration_op / 551 (ctx->ver.mem.total_bufs * test_data->num_iter); 552 553 /* R E P O R T processing */ 554 rte_spinlock_lock(&print_spinlock); 555 556 if (display_once == 0) { 557 display_once = 1; 558 559 printf("\nLegend for the table\n" 560 " - Retries section: number of retries for the following operations:\n" 561 " [C-e] - compression enqueue\n" 562 " [C-d] - compression dequeue\n" 563 " [D-e] - decompression enqueue\n" 564 " [D-d] - decompression dequeue\n" 565 " - Cycles section: number of cycles per 'op' for the following operations:\n" 566 " setup/op - memory allocation, op configuration and memory dealocation\n" 567 " [C-e] - compression enqueue\n" 568 " [C-d] - compression dequeue\n" 569 " [D-e] - decompression enqueue\n" 570 " [D-d] - decompression dequeue\n\n"); 571 572 printf("\n%12s%6s%12s%17s", 573 "lcore id", "Level", "Comp size", "Comp ratio [%]"); 574 575 printf(" |%10s %6s %8s %6s %8s", 576 " Retries:", 577 "[C-e]", "[C-d]", 578 "[D-e]", "[D-d]"); 579 580 printf(" |%9s %9s %9s %9s %9s %9s\n", 581 " Cycles:", 582 "setup/op", 583 "[C-e]", "[C-d]", 584 "[D-e]", "[D-d]"); 585 } 586 587 printf("%12u" 588 "%6u" 589 "%12zu" 590 "%17.2f", 591 ctx->ver.mem.lcore_id, 592 test_data->level, 593 ctx->ver.comp_data_sz, 594 ctx->ver.ratio); 595 596 printf(" |%10s %6u %8u %6u %8u", 597 " ", 598 ops_enq_retries_comp, 599 ops_deq_retries_comp, 600 ops_enq_retries_decomp, 601 ops_deq_retries_decomp); 602 603 printf(" |%9s %9u %9u %9u %9u %9u\n", 604 " ", 605 duration_setup_per_op, 606 duration_enq_per_op_comp, 607 duration_deq_per_op_comp, 608 duration_enq_per_op_decomp, 609 duration_deq_per_op_decomp); 610 611 rte_spinlock_unlock(&print_spinlock); 612 613 return EXIT_SUCCESS; 614 } 615