1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_eal.h> 7 #include <rte_log.h> 8 #include <rte_cycles.h> 9 #include "rte_spinlock.h" 10 #include <rte_compressdev.h> 11 12 #include "comp_perf_test_cyclecount.h" 13 14 struct cperf_cyclecount_ctx { 15 struct cperf_verify_ctx ver; 16 17 uint32_t ops_enq_retries; 18 uint32_t ops_deq_retries; 19 20 uint64_t duration_op; 21 uint64_t duration_enq; 22 uint64_t duration_deq; 23 }; 24 25 void 26 cperf_cyclecount_test_destructor(void *arg) 27 { 28 struct cperf_cyclecount_ctx *ctx = arg; 29 30 if (arg) { 31 comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem); 32 rte_free(arg); 33 } 34 } 35 36 void * 37 cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id, 38 struct comp_test_data *options) 39 { 40 struct cperf_cyclecount_ctx *ctx = NULL; 41 42 ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0); 43 44 if (ctx == NULL) 45 return NULL; 46 47 ctx->ver.mem.dev_id = dev_id; 48 ctx->ver.mem.qp_id = qp_id; 49 ctx->ver.options = options; 50 ctx->ver.silent = 1; /* ver. part will be silent */ 51 52 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 53 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 54 return ctx; 55 56 cperf_cyclecount_test_destructor(ctx); 57 return NULL; 58 } 59 60 static int 61 cperf_cyclecount_op_setup(struct rte_comp_op **ops, 62 struct cperf_cyclecount_ctx *ctx, 63 struct rte_mbuf **input_bufs, 64 struct rte_mbuf **output_bufs, 65 void *priv_xform, 66 uint32_t out_seg_sz) 67 { 68 struct comp_test_data *test_data = ctx->ver.options; 69 struct cperf_mem_resources *mem = &ctx->ver.mem; 70 71 uint32_t i, iter, num_iter; 72 int res = 0; 73 uint16_t ops_needed; 74 75 num_iter = test_data->num_iter; 76 77 for (iter = 0; iter < num_iter; iter++) { 78 uint32_t remaining_ops = mem->total_bufs; 79 uint32_t total_enq_ops = 0; 80 uint16_t num_enq = 0; 81 uint16_t num_deq = 0; 82 83 while (remaining_ops > 0) { 84 uint16_t num_ops = RTE_MIN(remaining_ops, 85 test_data->burst_sz); 86 ops_needed = num_ops; 87 88 /* Allocate compression operations */ 89 if (ops_needed && rte_mempool_get_bulk( 90 mem->op_pool, 91 (void **)ops, 92 ops_needed) != 0) { 93 RTE_LOG(ERR, USER1, 94 "Cyclecount: could not allocate enough operations\n"); 95 res = -1; 96 goto end; 97 } 98 99 for (i = 0; i < ops_needed; i++) { 100 101 /* Calculate next buffer to attach */ 102 /* to operation */ 103 uint32_t buf_id = total_enq_ops + i; 104 uint16_t op_id = i; 105 106 /* Reset all data in output buffers */ 107 struct rte_mbuf *m = output_bufs[buf_id]; 108 109 m->pkt_len = out_seg_sz * m->nb_segs; 110 while (m) { 111 m->data_len = m->buf_len - m->data_off; 112 m = m->next; 113 } 114 ops[op_id]->m_src = input_bufs[buf_id]; 115 ops[op_id]->m_dst = output_bufs[buf_id]; 116 ops[op_id]->src.offset = 0; 117 ops[op_id]->src.length = 118 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 119 ops[op_id]->dst.offset = 0; 120 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 121 ops[op_id]->input_chksum = buf_id; 122 ops[op_id]->private_xform = priv_xform; 123 } 124 125 /* E N Q U E U I N G */ 126 /* assuming that all ops are enqueued */ 127 /* instead of the real enqueue operation */ 128 num_enq = num_ops; 129 130 remaining_ops -= num_enq; 131 total_enq_ops += num_enq; 132 133 /* D E Q U E U I N G */ 134 /* assuming that all ops dequeued */ 135 /* instead of the real dequeue operation */ 136 num_deq = num_ops; 137 138 rte_mempool_put_bulk(mem->op_pool, 139 (void **)ops, num_deq); 140 } 141 } 142 return res; 143 end: 144 rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed); 145 rte_free(ops); 146 147 return res; 148 } 149 150 static int 151 main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) 152 { 153 struct comp_test_data *test_data = ctx->ver.options; 154 struct cperf_mem_resources *mem = &ctx->ver.mem; 155 uint8_t dev_id = mem->dev_id; 156 uint32_t i, iter, num_iter; 157 struct rte_comp_op **ops, **deq_ops; 158 void *priv_xform = NULL; 159 struct rte_comp_xform xform; 160 struct rte_mbuf **input_bufs, **output_bufs; 161 int ret, res = 0; 162 int allocated = 0; 163 uint32_t out_seg_sz; 164 165 uint64_t tsc_start, tsc_end, tsc_duration; 166 167 if (test_data == NULL || !test_data->burst_sz) { 168 RTE_LOG(ERR, USER1, "Unknown burst size\n"); 169 return -1; 170 } 171 ctx->duration_enq = 0; 172 ctx->duration_deq = 0; 173 ctx->ops_enq_retries = 0; 174 ctx->ops_deq_retries = 0; 175 176 /* one array for both enqueue and dequeue */ 177 ops = rte_zmalloc_socket(NULL, 178 (test_data->burst_sz + mem->total_bufs) * 179 sizeof(struct rte_comp_op *), 180 0, rte_socket_id()); 181 182 if (ops == NULL) { 183 RTE_LOG(ERR, USER1, 184 "Can't allocate memory for ops structures\n"); 185 return -1; 186 } 187 188 deq_ops = &ops[test_data->burst_sz]; 189 190 if (type == RTE_COMP_COMPRESS) { 191 xform = (struct rte_comp_xform) { 192 .type = RTE_COMP_COMPRESS, 193 .compress = { 194 .algo = RTE_COMP_ALGO_DEFLATE, 195 .deflate.huffman = test_data->huffman_enc, 196 .level = test_data->level, 197 .window_size = test_data->window_sz, 198 .chksum = RTE_COMP_CHECKSUM_NONE, 199 .hash_algo = RTE_COMP_HASH_ALGO_NONE 200 } 201 }; 202 input_bufs = mem->decomp_bufs; 203 output_bufs = mem->comp_bufs; 204 out_seg_sz = test_data->out_seg_sz; 205 } else { 206 xform = (struct rte_comp_xform) { 207 .type = RTE_COMP_DECOMPRESS, 208 .decompress = { 209 .algo = RTE_COMP_ALGO_DEFLATE, 210 .chksum = RTE_COMP_CHECKSUM_NONE, 211 .window_size = test_data->window_sz, 212 .hash_algo = RTE_COMP_HASH_ALGO_NONE 213 } 214 }; 215 input_bufs = mem->comp_bufs; 216 output_bufs = mem->decomp_bufs; 217 out_seg_sz = test_data->seg_sz; 218 } 219 220 /* Create private xform */ 221 if (rte_compressdev_private_xform_create(dev_id, &xform, 222 &priv_xform) < 0) { 223 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 224 res = -1; 225 goto end; 226 } 227 228 tsc_start = rte_rdtsc_precise(); 229 ret = cperf_cyclecount_op_setup(ops, 230 ctx, 231 input_bufs, 232 output_bufs, 233 priv_xform, 234 out_seg_sz); 235 236 tsc_end = rte_rdtsc_precise(); 237 238 /* ret value check postponed a bit to cancel extra 'if' bias */ 239 if (ret < 0) { 240 RTE_LOG(ERR, USER1, "Setup function failed\n"); 241 res = -1; 242 goto end; 243 } 244 245 tsc_duration = tsc_end - tsc_start; 246 ctx->duration_op = tsc_duration; 247 248 num_iter = test_data->num_iter; 249 for (iter = 0; iter < num_iter; iter++) { 250 uint32_t total_ops = mem->total_bufs; 251 uint32_t remaining_ops = mem->total_bufs; 252 uint32_t total_deq_ops = 0; 253 uint32_t total_enq_ops = 0; 254 uint16_t ops_unused = 0; 255 uint16_t num_enq = 0; 256 uint16_t num_deq = 0; 257 258 while (remaining_ops > 0) { 259 uint16_t num_ops = RTE_MIN(remaining_ops, 260 test_data->burst_sz); 261 uint16_t ops_needed = num_ops - ops_unused; 262 263 /* 264 * Move the unused operations from the previous 265 * enqueue_burst call to the front, to maintain order 266 */ 267 if ((ops_unused > 0) && (num_enq > 0)) { 268 size_t nb_b_to_mov = 269 ops_unused * sizeof(struct rte_comp_op *); 270 271 memmove(ops, &ops[num_enq], nb_b_to_mov); 272 } 273 274 /* Allocate compression operations */ 275 if (ops_needed && rte_mempool_get_bulk( 276 mem->op_pool, 277 (void **)&ops[ops_unused], 278 ops_needed) != 0) { 279 RTE_LOG(ERR, USER1, 280 "Could not allocate enough operations\n"); 281 res = -1; 282 goto end; 283 } 284 allocated += ops_needed; 285 286 for (i = 0; i < ops_needed; i++) { 287 /* 288 * Calculate next buffer to attach to operation 289 */ 290 uint32_t buf_id = total_enq_ops + i + 291 ops_unused; 292 uint16_t op_id = ops_unused + i; 293 /* Reset all data in output buffers */ 294 struct rte_mbuf *m = output_bufs[buf_id]; 295 296 m->pkt_len = out_seg_sz * m->nb_segs; 297 while (m) { 298 m->data_len = m->buf_len - m->data_off; 299 m = m->next; 300 } 301 ops[op_id]->m_src = input_bufs[buf_id]; 302 ops[op_id]->m_dst = output_bufs[buf_id]; 303 ops[op_id]->src.offset = 0; 304 ops[op_id]->src.length = 305 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 306 ops[op_id]->dst.offset = 0; 307 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 308 ops[op_id]->input_chksum = buf_id; 309 ops[op_id]->private_xform = priv_xform; 310 } 311 312 if (unlikely(test_data->perf_comp_force_stop)) 313 goto end; 314 315 tsc_start = rte_rdtsc_precise(); 316 num_enq = rte_compressdev_enqueue_burst(dev_id, 317 mem->qp_id, ops, 318 num_ops); 319 tsc_end = rte_rdtsc_precise(); 320 tsc_duration = tsc_end - tsc_start; 321 ctx->duration_enq += tsc_duration; 322 323 if (num_enq < num_ops) 324 ctx->ops_enq_retries++; 325 326 if (test_data->cyclecount_delay) 327 rte_delay_us_block(test_data->cyclecount_delay); 328 329 if (num_enq == 0) { 330 struct rte_compressdev_stats stats; 331 332 rte_compressdev_stats_get(dev_id, &stats); 333 if (stats.enqueue_err_count) { 334 res = -1; 335 goto end; 336 } 337 } 338 339 ops_unused = num_ops - num_enq; 340 remaining_ops -= num_enq; 341 total_enq_ops += num_enq; 342 343 tsc_start = rte_rdtsc_precise(); 344 num_deq = rte_compressdev_dequeue_burst(dev_id, 345 mem->qp_id, 346 deq_ops, 347 allocated); 348 tsc_end = rte_rdtsc_precise(); 349 tsc_duration = tsc_end - tsc_start; 350 ctx->duration_deq += tsc_duration; 351 352 if (num_deq < allocated) 353 ctx->ops_deq_retries++; 354 355 total_deq_ops += num_deq; 356 357 if (iter == num_iter - 1) { 358 for (i = 0; i < num_deq; i++) { 359 struct rte_comp_op *op = deq_ops[i]; 360 361 if (op->status != 362 RTE_COMP_OP_STATUS_SUCCESS) { 363 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 364 goto end; 365 } 366 367 struct rte_mbuf *m = op->m_dst; 368 369 m->pkt_len = op->produced; 370 uint32_t remaining_data = op->produced; 371 uint16_t data_to_append; 372 373 while (remaining_data > 0) { 374 data_to_append = 375 RTE_MIN(remaining_data, 376 out_seg_sz); 377 m->data_len = data_to_append; 378 remaining_data -= 379 data_to_append; 380 m = m->next; 381 } 382 } 383 } 384 rte_mempool_put_bulk(mem->op_pool, 385 (void **)deq_ops, num_deq); 386 allocated -= num_deq; 387 } 388 389 /* Dequeue the last operations */ 390 while (total_deq_ops < total_ops) { 391 if (unlikely(test_data->perf_comp_force_stop)) 392 goto end; 393 394 tsc_start = rte_rdtsc_precise(); 395 num_deq = rte_compressdev_dequeue_burst(dev_id, 396 mem->qp_id, 397 deq_ops, 398 test_data->burst_sz); 399 tsc_end = rte_rdtsc_precise(); 400 tsc_duration = tsc_end - tsc_start; 401 ctx->duration_deq += tsc_duration; 402 ctx->ops_deq_retries++; 403 404 if (num_deq == 0) { 405 struct rte_compressdev_stats stats; 406 407 rte_compressdev_stats_get(dev_id, &stats); 408 if (stats.dequeue_err_count) { 409 res = -1; 410 goto end; 411 } 412 } 413 total_deq_ops += num_deq; 414 415 if (iter == num_iter - 1) { 416 for (i = 0; i < num_deq; i++) { 417 struct rte_comp_op *op = deq_ops[i]; 418 419 if (op->status != 420 RTE_COMP_OP_STATUS_SUCCESS) { 421 RTE_LOG(ERR, USER1, "Some operations were not successful\n"); 422 goto end; 423 } 424 425 struct rte_mbuf *m = op->m_dst; 426 427 m->pkt_len = op->produced; 428 uint32_t remaining_data = op->produced; 429 uint16_t data_to_append; 430 431 while (remaining_data > 0) { 432 data_to_append = 433 RTE_MIN(remaining_data, 434 out_seg_sz); 435 m->data_len = data_to_append; 436 remaining_data -= 437 data_to_append; 438 m = m->next; 439 } 440 } 441 } 442 rte_mempool_put_bulk(mem->op_pool, 443 (void **)deq_ops, num_deq); 444 allocated -= num_deq; 445 } 446 } 447 allocated = 0; 448 449 end: 450 if (allocated) 451 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 452 rte_compressdev_private_xform_free(dev_id, priv_xform); 453 rte_free(ops); 454 455 if (test_data->perf_comp_force_stop) { 456 RTE_LOG(ERR, USER1, 457 "lcore: %d Perf. test has been aborted by user\n", 458 mem->lcore_id); 459 res = -1; 460 } 461 return res; 462 } 463 464 int 465 cperf_cyclecount_test_runner(void *test_ctx) 466 { 467 struct cperf_cyclecount_ctx *ctx = test_ctx; 468 struct comp_test_data *test_data = ctx->ver.options; 469 uint32_t lcore = rte_lcore_id(); 470 static uint16_t display_once; 471 static rte_spinlock_t print_spinlock; 472 int i; 473 474 uint32_t ops_enq_retries_comp; 475 uint32_t ops_deq_retries_comp; 476 477 uint32_t ops_enq_retries_decomp; 478 uint32_t ops_deq_retries_decomp; 479 480 uint32_t duration_setup_per_op; 481 482 uint32_t duration_enq_per_op_comp; 483 uint32_t duration_deq_per_op_comp; 484 485 uint32_t duration_enq_per_op_decomp; 486 uint32_t duration_deq_per_op_decomp; 487 488 ctx->ver.mem.lcore_id = lcore; 489 490 uint16_t exp = 0; 491 /* 492 * printing information about current compression thread 493 */ 494 if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp, 495 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 496 printf(" lcore: %u," 497 " driver name: %s," 498 " device name: %s," 499 " device id: %u," 500 " socket id: %u," 501 " queue pair id: %u\n", 502 lcore, 503 ctx->ver.options->driver_name, 504 rte_compressdev_name_get(ctx->ver.mem.dev_id), 505 ctx->ver.mem.dev_id, 506 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 507 ctx->ver.mem.qp_id); 508 509 /* 510 * First the verification part is needed 511 */ 512 if (cperf_verify_test_runner(&ctx->ver)) 513 return EXIT_FAILURE; 514 515 /* 516 * Run the tests twice, discarding the first performance 517 * results, before the cache is warmed up 518 */ 519 520 /* C O M P R E S S */ 521 for (i = 0; i < 2; i++) { 522 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) 523 return EXIT_FAILURE; 524 } 525 526 ops_enq_retries_comp = ctx->ops_enq_retries; 527 ops_deq_retries_comp = ctx->ops_deq_retries; 528 529 duration_enq_per_op_comp = ctx->duration_enq / 530 (ctx->ver.mem.total_bufs * test_data->num_iter); 531 duration_deq_per_op_comp = ctx->duration_deq / 532 (ctx->ver.mem.total_bufs * test_data->num_iter); 533 534 /* D E C O M P R E S S */ 535 for (i = 0; i < 2; i++) { 536 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) 537 return EXIT_FAILURE; 538 } 539 540 ops_enq_retries_decomp = ctx->ops_enq_retries; 541 ops_deq_retries_decomp = ctx->ops_deq_retries; 542 543 duration_enq_per_op_decomp = ctx->duration_enq / 544 (ctx->ver.mem.total_bufs * test_data->num_iter); 545 duration_deq_per_op_decomp = ctx->duration_deq / 546 (ctx->ver.mem.total_bufs * test_data->num_iter); 547 548 duration_setup_per_op = ctx->duration_op / 549 (ctx->ver.mem.total_bufs * test_data->num_iter); 550 551 /* R E P O R T processing */ 552 rte_spinlock_lock(&print_spinlock); 553 554 if (display_once == 0) { 555 display_once = 1; 556 557 printf("\nLegend for the table\n" 558 " - Retries section: number of retries for the following operations:\n" 559 " [C-e] - compression enqueue\n" 560 " [C-d] - compression dequeue\n" 561 " [D-e] - decompression enqueue\n" 562 " [D-d] - decompression dequeue\n" 563 " - Cycles section: number of cycles per 'op' for the following operations:\n" 564 " setup/op - memory allocation, op configuration and memory dealocation\n" 565 " [C-e] - compression enqueue\n" 566 " [C-d] - compression dequeue\n" 567 " [D-e] - decompression enqueue\n" 568 " [D-d] - decompression dequeue\n\n"); 569 570 printf("\n%12s%6s%12s%17s", 571 "lcore id", "Level", "Comp size", "Comp ratio [%]"); 572 573 printf(" |%10s %6s %8s %6s %8s", 574 " Retries:", 575 "[C-e]", "[C-d]", 576 "[D-e]", "[D-d]"); 577 578 printf(" |%9s %9s %9s %9s %9s %9s\n", 579 " Cycles:", 580 "setup/op", 581 "[C-e]", "[C-d]", 582 "[D-e]", "[D-d]"); 583 } 584 585 printf("%12u" 586 "%6u" 587 "%12zu" 588 "%17.2f", 589 ctx->ver.mem.lcore_id, 590 test_data->level, 591 ctx->ver.comp_data_sz, 592 ctx->ver.ratio); 593 594 printf(" |%10s %6u %8u %6u %8u", 595 " ", 596 ops_enq_retries_comp, 597 ops_deq_retries_comp, 598 ops_enq_retries_decomp, 599 ops_deq_retries_decomp); 600 601 printf(" |%9s %9u %9u %9u %9u %9u\n", 602 " ", 603 duration_setup_per_op, 604 duration_enq_per_op_comp, 605 duration_deq_per_op_comp, 606 duration_enq_per_op_decomp, 607 duration_deq_per_op_decomp); 608 609 rte_spinlock_unlock(&print_spinlock); 610 611 return EXIT_SUCCESS; 612 } 613