1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 7 #include <rte_malloc.h> 8 #include <rte_eal.h> 9 #include <rte_log.h> 10 #include <rte_cycles.h> 11 #include <rte_compressdev.h> 12 13 #include "comp_perf_test_throughput.h" 14 15 void 16 cperf_throughput_test_destructor(void *arg) 17 { 18 if (arg) { 19 comp_perf_free_memory( 20 ((struct cperf_benchmark_ctx *)arg)->ver.options, 21 &((struct cperf_benchmark_ctx *)arg)->ver.mem); 22 rte_free(arg); 23 } 24 } 25 26 void * 27 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, 28 struct comp_test_data *options) 29 { 30 struct cperf_benchmark_ctx *ctx = NULL; 31 32 ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); 33 34 if (ctx == NULL) 35 return NULL; 36 37 ctx->ver.mem.dev_id = dev_id; 38 ctx->ver.mem.qp_id = qp_id; 39 ctx->ver.options = options; 40 ctx->ver.silent = 1; /* ver. part will be silent */ 41 42 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 43 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 44 return ctx; 45 46 cperf_throughput_test_destructor(ctx); 47 return NULL; 48 } 49 50 static int 51 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) 52 { 53 struct comp_test_data *test_data = ctx->ver.options; 54 struct cperf_mem_resources *mem = &ctx->ver.mem; 55 uint8_t dev_id = mem->dev_id; 56 uint32_t i, iter, num_iter; 57 struct rte_comp_op **ops, **deq_ops; 58 void *priv_xform = NULL; 59 struct rte_comp_xform xform; 60 struct rte_mbuf **input_bufs, **output_bufs; 61 int res = 0; 62 int allocated = 0; 63 uint32_t out_seg_sz; 64 65 if (test_data == NULL || !test_data->burst_sz) { 66 RTE_LOG(ERR, USER1, 67 "Unknown burst size\n"); 68 return -1; 69 } 70 71 ops = rte_zmalloc_socket(NULL, 72 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 73 0, rte_socket_id()); 74 75 if (ops == NULL) { 76 RTE_LOG(ERR, USER1, 77 "Can't allocate memory for ops structures\n"); 78 return -1; 79 } 80 81 deq_ops = &ops[mem->total_bufs]; 82 83 if (type == RTE_COMP_COMPRESS) { 84 xform = (struct rte_comp_xform) { 85 .type = RTE_COMP_COMPRESS, 86 .compress = { 87 .algo = test_data->test_algo, 88 .level = test_data->level, 89 .window_size = test_data->window_sz, 90 .chksum = RTE_COMP_CHECKSUM_NONE, 91 .hash_algo = RTE_COMP_HASH_ALGO_NONE 92 } 93 }; 94 if (test_data->test_algo == RTE_COMP_ALGO_DEFLATE) 95 xform.compress.deflate.huffman = test_data->huffman_enc; 96 else if (test_data->test_algo == RTE_COMP_ALGO_LZ4) 97 xform.compress.lz4.flags = test_data->lz4_flags; 98 input_bufs = mem->decomp_bufs; 99 output_bufs = mem->comp_bufs; 100 out_seg_sz = test_data->out_seg_sz; 101 } else { 102 xform = (struct rte_comp_xform) { 103 .type = RTE_COMP_DECOMPRESS, 104 .decompress = { 105 .algo = test_data->test_algo, 106 .chksum = RTE_COMP_CHECKSUM_NONE, 107 .window_size = test_data->window_sz, 108 .hash_algo = RTE_COMP_HASH_ALGO_NONE 109 } 110 }; 111 if (test_data->test_algo == RTE_COMP_ALGO_LZ4) 112 xform.decompress.lz4.flags = test_data->lz4_flags; 113 input_bufs = mem->comp_bufs; 114 output_bufs = mem->decomp_bufs; 115 out_seg_sz = test_data->seg_sz; 116 } 117 118 /* Create private xform */ 119 if (rte_compressdev_private_xform_create(dev_id, &xform, 120 &priv_xform) < 0) { 121 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 122 res = -1; 123 goto end; 124 } 125 126 uint64_t tsc_start, tsc_end, tsc_duration; 127 128 num_iter = test_data->num_iter; 129 tsc_start = tsc_end = tsc_duration = 0; 130 tsc_start = rte_rdtsc_precise(); 131 132 for (iter = 0; iter < num_iter; iter++) { 133 uint32_t total_ops = mem->total_bufs; 134 uint32_t remaining_ops = mem->total_bufs; 135 uint32_t total_deq_ops = 0; 136 uint32_t total_enq_ops = 0; 137 uint16_t ops_unused = 0; 138 uint16_t num_enq = 0; 139 uint16_t num_deq = 0; 140 141 while (remaining_ops > 0) { 142 uint16_t num_ops = RTE_MIN(remaining_ops, 143 test_data->burst_sz); 144 uint16_t ops_needed = num_ops - ops_unused; 145 146 /* 147 * Move the unused operations from the previous 148 * enqueue_burst call to the front, to maintain order 149 */ 150 if ((ops_unused > 0) && (num_enq > 0)) { 151 size_t nb_b_to_mov = 152 ops_unused * sizeof(struct rte_comp_op *); 153 154 memmove(ops, &ops[num_enq], nb_b_to_mov); 155 } 156 157 /* Allocate compression operations */ 158 if (ops_needed && !rte_comp_op_bulk_alloc( 159 mem->op_pool, 160 &ops[ops_unused], 161 ops_needed)) { 162 RTE_LOG(ERR, USER1, 163 "Could not allocate enough operations\n"); 164 res = -1; 165 goto end; 166 } 167 allocated += ops_needed; 168 169 for (i = 0; i < ops_needed; i++) { 170 /* 171 * Calculate next buffer to attach to operation 172 */ 173 uint32_t buf_id = total_enq_ops + i + 174 ops_unused; 175 uint16_t op_id = ops_unused + i; 176 /* Reset all data in output buffers */ 177 struct rte_mbuf *m = output_bufs[buf_id]; 178 179 m->pkt_len = out_seg_sz * m->nb_segs; 180 while (m) { 181 m->data_len = m->buf_len - m->data_off; 182 m = m->next; 183 } 184 ops[op_id]->m_src = input_bufs[buf_id]; 185 ops[op_id]->m_dst = output_bufs[buf_id]; 186 ops[op_id]->src.offset = 0; 187 ops[op_id]->src.length = 188 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 189 ops[op_id]->dst.offset = 0; 190 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 191 ops[op_id]->input_chksum = buf_id; 192 ops[op_id]->private_xform = priv_xform; 193 } 194 195 if (unlikely(test_data->perf_comp_force_stop)) 196 goto end; 197 198 num_enq = rte_compressdev_enqueue_burst(dev_id, 199 mem->qp_id, ops, 200 num_ops); 201 if (num_enq == 0) { 202 struct rte_compressdev_stats stats; 203 204 rte_compressdev_stats_get(dev_id, &stats); 205 if (stats.enqueue_err_count) { 206 res = -1; 207 goto end; 208 } 209 } 210 211 ops_unused = num_ops - num_enq; 212 remaining_ops -= num_enq; 213 total_enq_ops += num_enq; 214 215 num_deq = rte_compressdev_dequeue_burst(dev_id, 216 mem->qp_id, 217 deq_ops, 218 test_data->burst_sz); 219 total_deq_ops += num_deq; 220 221 if (iter == num_iter - 1) { 222 for (i = 0; i < num_deq; i++) { 223 struct rte_comp_op *op = deq_ops[i]; 224 225 if (op->status != 226 RTE_COMP_OP_STATUS_SUCCESS) { 227 RTE_LOG(ERR, USER1, 228 "Some operations were not successful\n"); 229 goto end; 230 } 231 232 struct rte_mbuf *m = op->m_dst; 233 234 m->pkt_len = op->produced; 235 uint32_t remaining_data = op->produced; 236 uint16_t data_to_append; 237 238 while (remaining_data > 0) { 239 data_to_append = 240 RTE_MIN(remaining_data, 241 out_seg_sz); 242 m->data_len = data_to_append; 243 remaining_data -= 244 data_to_append; 245 m = m->next; 246 } 247 } 248 } 249 rte_mempool_put_bulk(mem->op_pool, 250 (void **)deq_ops, num_deq); 251 allocated -= num_deq; 252 } 253 254 /* Dequeue the last operations */ 255 while (total_deq_ops < total_ops) { 256 if (unlikely(test_data->perf_comp_force_stop)) 257 goto end; 258 259 num_deq = rte_compressdev_dequeue_burst(dev_id, 260 mem->qp_id, 261 deq_ops, 262 test_data->burst_sz); 263 if (num_deq == 0) { 264 struct rte_compressdev_stats stats; 265 266 rte_compressdev_stats_get(dev_id, &stats); 267 if (stats.dequeue_err_count) { 268 res = -1; 269 goto end; 270 } 271 } 272 273 total_deq_ops += num_deq; 274 275 if (iter == num_iter - 1) { 276 for (i = 0; i < num_deq; i++) { 277 struct rte_comp_op *op = deq_ops[i]; 278 279 if (op->status != 280 RTE_COMP_OP_STATUS_SUCCESS) { 281 RTE_LOG(ERR, USER1, 282 "Some operations were not successful\n"); 283 goto end; 284 } 285 286 struct rte_mbuf *m = op->m_dst; 287 288 m->pkt_len = op->produced; 289 uint32_t remaining_data = op->produced; 290 uint16_t data_to_append; 291 292 while (remaining_data > 0) { 293 data_to_append = 294 RTE_MIN(remaining_data, 295 out_seg_sz); 296 m->data_len = data_to_append; 297 remaining_data -= 298 data_to_append; 299 m = m->next; 300 } 301 } 302 } 303 rte_mempool_put_bulk(mem->op_pool, 304 (void **)deq_ops, num_deq); 305 allocated -= num_deq; 306 } 307 } 308 309 tsc_end = rte_rdtsc_precise(); 310 tsc_duration = tsc_end - tsc_start; 311 312 if (type == RTE_COMP_COMPRESS) 313 ctx->comp_tsc_duration[test_data->level] = 314 tsc_duration / num_iter; 315 else 316 ctx->decomp_tsc_duration[test_data->level] = 317 tsc_duration / num_iter; 318 319 end: 320 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 321 rte_compressdev_private_xform_free(dev_id, priv_xform); 322 rte_free(ops); 323 324 if (test_data->perf_comp_force_stop) { 325 RTE_LOG(ERR, USER1, 326 "lcore: %d Perf. test has been aborted by user\n", 327 mem->lcore_id); 328 res = -1; 329 } 330 return res; 331 } 332 333 int 334 cperf_throughput_test_runner(void *test_ctx) 335 { 336 struct cperf_benchmark_ctx *ctx = test_ctx; 337 struct comp_test_data *test_data = ctx->ver.options; 338 uint32_t lcore = rte_lcore_id(); 339 static RTE_ATOMIC(uint16_t) display_once; 340 int i, ret = EXIT_SUCCESS; 341 342 ctx->ver.mem.lcore_id = lcore; 343 344 uint16_t exp = 0; 345 /* 346 * printing information about current compression thread 347 */ 348 if (rte_atomic_compare_exchange_strong_explicit(&ctx->ver.mem.print_info_once, &exp, 349 1, rte_memory_order_relaxed, rte_memory_order_relaxed)) 350 printf(" lcore: %u," 351 " driver name: %s," 352 " device name: %s," 353 " device id: %u," 354 " socket id: %u," 355 " queue pair id: %u\n", 356 lcore, 357 ctx->ver.options->driver_name, 358 rte_compressdev_name_get(ctx->ver.mem.dev_id), 359 ctx->ver.mem.dev_id, 360 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 361 ctx->ver.mem.qp_id); 362 363 /* 364 * First the verification part is needed 365 */ 366 if (cperf_verify_test_runner(&ctx->ver)) { 367 ret = EXIT_FAILURE; 368 goto end; 369 } 370 371 if (test_data->test_op & COMPRESS) { 372 /* 373 * Run the test twice, discarding the first performance 374 * results, before the cache is warmed up 375 */ 376 for (i = 0; i < 2; i++) { 377 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { 378 ret = EXIT_FAILURE; 379 goto end; 380 } 381 } 382 383 ctx->comp_tsc_byte = 384 (double)(ctx->comp_tsc_duration[test_data->level]) / 385 test_data->input_data_sz; 386 ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / 387 1000000000; 388 } else { 389 ctx->comp_tsc_byte = 0; 390 ctx->comp_gbps = 0; 391 } 392 393 if (test_data->test_op & DECOMPRESS) { 394 /* 395 * Run the test twice, discarding the first performance 396 * results, before the cache is warmed up 397 */ 398 for (i = 0; i < 2; i++) { 399 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { 400 ret = EXIT_FAILURE; 401 goto end; 402 } 403 } 404 405 ctx->decomp_tsc_byte = 406 (double)(ctx->decomp_tsc_duration[test_data->level]) / 407 test_data->input_data_sz; 408 ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / 409 1000000000; 410 } else { 411 ctx->decomp_tsc_byte = 0; 412 ctx->decomp_gbps = 0; 413 } 414 415 exp = 0; 416 if (rte_atomic_compare_exchange_strong_explicit(&display_once, &exp, 1, 417 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 418 printf("\n%12s%6s%12s%17s%15s%16s\n", 419 "lcore id", "Level", "Comp size", "Comp ratio [%]", 420 "Comp [Gbps]", "Decomp [Gbps]"); 421 } 422 423 printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", 424 ctx->ver.mem.lcore_id, 425 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, 426 ctx->comp_gbps, 427 ctx->decomp_gbps); 428 429 end: 430 return ret; 431 } 432