1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 7 #include <rte_malloc.h> 8 #include <rte_eal.h> 9 #include <rte_log.h> 10 #include <rte_cycles.h> 11 #include <rte_compressdev.h> 12 13 #include "comp_perf_test_throughput.h" 14 15 void 16 cperf_throughput_test_destructor(void *arg) 17 { 18 if (arg) { 19 comp_perf_free_memory( 20 ((struct cperf_benchmark_ctx *)arg)->ver.options, 21 &((struct cperf_benchmark_ctx *)arg)->ver.mem); 22 rte_free(arg); 23 } 24 } 25 26 void * 27 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, 28 struct comp_test_data *options) 29 { 30 struct cperf_benchmark_ctx *ctx = NULL; 31 32 ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); 33 34 if (ctx == NULL) 35 return NULL; 36 37 ctx->ver.mem.dev_id = dev_id; 38 ctx->ver.mem.qp_id = qp_id; 39 ctx->ver.options = options; 40 ctx->ver.silent = 1; /* ver. part will be silent */ 41 42 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 43 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 44 return ctx; 45 46 cperf_throughput_test_destructor(ctx); 47 return NULL; 48 } 49 50 static int 51 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) 52 { 53 struct comp_test_data *test_data = ctx->ver.options; 54 struct cperf_mem_resources *mem = &ctx->ver.mem; 55 uint8_t dev_id = mem->dev_id; 56 uint32_t i, iter, num_iter; 57 struct rte_comp_op **ops, **deq_ops; 58 void *priv_xform = NULL; 59 struct rte_comp_xform xform; 60 struct rte_mbuf **input_bufs, **output_bufs; 61 int res = 0; 62 int allocated = 0; 63 uint32_t out_seg_sz; 64 65 if (test_data == NULL || !test_data->burst_sz) { 66 RTE_LOG(ERR, USER1, 67 "Unknown burst size\n"); 68 return -1; 69 } 70 71 ops = rte_zmalloc_socket(NULL, 72 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 73 0, rte_socket_id()); 74 75 if (ops == NULL) { 76 RTE_LOG(ERR, USER1, 77 "Can't allocate memory for ops structures\n"); 78 return -1; 79 } 80 81 deq_ops = &ops[mem->total_bufs]; 82 83 if (type == RTE_COMP_COMPRESS) { 84 xform = (struct rte_comp_xform) { 85 .type = RTE_COMP_COMPRESS, 86 .compress = { 87 .algo = RTE_COMP_ALGO_DEFLATE, 88 .deflate.huffman = test_data->huffman_enc, 89 .level = test_data->level, 90 .window_size = test_data->window_sz, 91 .chksum = RTE_COMP_CHECKSUM_NONE, 92 .hash_algo = RTE_COMP_HASH_ALGO_NONE 93 } 94 }; 95 input_bufs = mem->decomp_bufs; 96 output_bufs = mem->comp_bufs; 97 out_seg_sz = test_data->out_seg_sz; 98 } else { 99 xform = (struct rte_comp_xform) { 100 .type = RTE_COMP_DECOMPRESS, 101 .decompress = { 102 .algo = RTE_COMP_ALGO_DEFLATE, 103 .chksum = RTE_COMP_CHECKSUM_NONE, 104 .window_size = test_data->window_sz, 105 .hash_algo = RTE_COMP_HASH_ALGO_NONE 106 } 107 }; 108 input_bufs = mem->comp_bufs; 109 output_bufs = mem->decomp_bufs; 110 out_seg_sz = test_data->seg_sz; 111 } 112 113 /* Create private xform */ 114 if (rte_compressdev_private_xform_create(dev_id, &xform, 115 &priv_xform) < 0) { 116 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 117 res = -1; 118 goto end; 119 } 120 121 uint64_t tsc_start, tsc_end, tsc_duration; 122 123 num_iter = test_data->num_iter; 124 tsc_start = tsc_end = tsc_duration = 0; 125 tsc_start = rte_rdtsc_precise(); 126 127 for (iter = 0; iter < num_iter; iter++) { 128 uint32_t total_ops = mem->total_bufs; 129 uint32_t remaining_ops = mem->total_bufs; 130 uint32_t total_deq_ops = 0; 131 uint32_t total_enq_ops = 0; 132 uint16_t ops_unused = 0; 133 uint16_t num_enq = 0; 134 uint16_t num_deq = 0; 135 136 while (remaining_ops > 0) { 137 uint16_t num_ops = RTE_MIN(remaining_ops, 138 test_data->burst_sz); 139 uint16_t ops_needed = num_ops - ops_unused; 140 141 /* 142 * Move the unused operations from the previous 143 * enqueue_burst call to the front, to maintain order 144 */ 145 if ((ops_unused > 0) && (num_enq > 0)) { 146 size_t nb_b_to_mov = 147 ops_unused * sizeof(struct rte_comp_op *); 148 149 memmove(ops, &ops[num_enq], nb_b_to_mov); 150 } 151 152 /* Allocate compression operations */ 153 if (ops_needed && !rte_comp_op_bulk_alloc( 154 mem->op_pool, 155 &ops[ops_unused], 156 ops_needed)) { 157 RTE_LOG(ERR, USER1, 158 "Could not allocate enough operations\n"); 159 res = -1; 160 goto end; 161 } 162 allocated += ops_needed; 163 164 for (i = 0; i < ops_needed; i++) { 165 /* 166 * Calculate next buffer to attach to operation 167 */ 168 uint32_t buf_id = total_enq_ops + i + 169 ops_unused; 170 uint16_t op_id = ops_unused + i; 171 /* Reset all data in output buffers */ 172 struct rte_mbuf *m = output_bufs[buf_id]; 173 174 m->pkt_len = out_seg_sz * m->nb_segs; 175 while (m) { 176 m->data_len = m->buf_len - m->data_off; 177 m = m->next; 178 } 179 ops[op_id]->m_src = input_bufs[buf_id]; 180 ops[op_id]->m_dst = output_bufs[buf_id]; 181 ops[op_id]->src.offset = 0; 182 ops[op_id]->src.length = 183 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 184 ops[op_id]->dst.offset = 0; 185 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 186 ops[op_id]->input_chksum = buf_id; 187 ops[op_id]->private_xform = priv_xform; 188 } 189 190 if (unlikely(test_data->perf_comp_force_stop)) 191 goto end; 192 193 num_enq = rte_compressdev_enqueue_burst(dev_id, 194 mem->qp_id, ops, 195 num_ops); 196 if (num_enq == 0) { 197 struct rte_compressdev_stats stats; 198 199 rte_compressdev_stats_get(dev_id, &stats); 200 if (stats.enqueue_err_count) { 201 res = -1; 202 goto end; 203 } 204 } 205 206 ops_unused = num_ops - num_enq; 207 remaining_ops -= num_enq; 208 total_enq_ops += num_enq; 209 210 num_deq = rte_compressdev_dequeue_burst(dev_id, 211 mem->qp_id, 212 deq_ops, 213 test_data->burst_sz); 214 total_deq_ops += num_deq; 215 216 if (iter == num_iter - 1) { 217 for (i = 0; i < num_deq; i++) { 218 struct rte_comp_op *op = deq_ops[i]; 219 220 if (op->status != 221 RTE_COMP_OP_STATUS_SUCCESS) { 222 RTE_LOG(ERR, USER1, 223 "Some operations were not successful\n"); 224 goto end; 225 } 226 227 struct rte_mbuf *m = op->m_dst; 228 229 m->pkt_len = op->produced; 230 uint32_t remaining_data = op->produced; 231 uint16_t data_to_append; 232 233 while (remaining_data > 0) { 234 data_to_append = 235 RTE_MIN(remaining_data, 236 out_seg_sz); 237 m->data_len = data_to_append; 238 remaining_data -= 239 data_to_append; 240 m = m->next; 241 } 242 } 243 } 244 rte_mempool_put_bulk(mem->op_pool, 245 (void **)deq_ops, num_deq); 246 allocated -= num_deq; 247 } 248 249 /* Dequeue the last operations */ 250 while (total_deq_ops < total_ops) { 251 if (unlikely(test_data->perf_comp_force_stop)) 252 goto end; 253 254 num_deq = rte_compressdev_dequeue_burst(dev_id, 255 mem->qp_id, 256 deq_ops, 257 test_data->burst_sz); 258 if (num_deq == 0) { 259 struct rte_compressdev_stats stats; 260 261 rte_compressdev_stats_get(dev_id, &stats); 262 if (stats.dequeue_err_count) { 263 res = -1; 264 goto end; 265 } 266 } 267 268 total_deq_ops += num_deq; 269 270 if (iter == num_iter - 1) { 271 for (i = 0; i < num_deq; i++) { 272 struct rte_comp_op *op = deq_ops[i]; 273 274 if (op->status != 275 RTE_COMP_OP_STATUS_SUCCESS) { 276 RTE_LOG(ERR, USER1, 277 "Some operations were not successful\n"); 278 goto end; 279 } 280 281 struct rte_mbuf *m = op->m_dst; 282 283 m->pkt_len = op->produced; 284 uint32_t remaining_data = op->produced; 285 uint16_t data_to_append; 286 287 while (remaining_data > 0) { 288 data_to_append = 289 RTE_MIN(remaining_data, 290 out_seg_sz); 291 m->data_len = data_to_append; 292 remaining_data -= 293 data_to_append; 294 m = m->next; 295 } 296 } 297 } 298 rte_mempool_put_bulk(mem->op_pool, 299 (void **)deq_ops, num_deq); 300 allocated -= num_deq; 301 } 302 } 303 304 tsc_end = rte_rdtsc_precise(); 305 tsc_duration = tsc_end - tsc_start; 306 307 if (type == RTE_COMP_COMPRESS) 308 ctx->comp_tsc_duration[test_data->level] = 309 tsc_duration / num_iter; 310 else 311 ctx->decomp_tsc_duration[test_data->level] = 312 tsc_duration / num_iter; 313 314 end: 315 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 316 rte_compressdev_private_xform_free(dev_id, priv_xform); 317 rte_free(ops); 318 319 if (test_data->perf_comp_force_stop) { 320 RTE_LOG(ERR, USER1, 321 "lcore: %d Perf. test has been aborted by user\n", 322 mem->lcore_id); 323 res = -1; 324 } 325 return res; 326 } 327 328 int 329 cperf_throughput_test_runner(void *test_ctx) 330 { 331 struct cperf_benchmark_ctx *ctx = test_ctx; 332 struct comp_test_data *test_data = ctx->ver.options; 333 uint32_t lcore = rte_lcore_id(); 334 static uint16_t display_once; 335 int i, ret = EXIT_SUCCESS; 336 337 ctx->ver.mem.lcore_id = lcore; 338 339 uint16_t exp = 0; 340 /* 341 * printing information about current compression thread 342 */ 343 if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp, 344 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 345 printf(" lcore: %u," 346 " driver name: %s," 347 " device name: %s," 348 " device id: %u," 349 " socket id: %u," 350 " queue pair id: %u\n", 351 lcore, 352 ctx->ver.options->driver_name, 353 rte_compressdev_name_get(ctx->ver.mem.dev_id), 354 ctx->ver.mem.dev_id, 355 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 356 ctx->ver.mem.qp_id); 357 358 /* 359 * First the verification part is needed 360 */ 361 if (cperf_verify_test_runner(&ctx->ver)) { 362 ret = EXIT_FAILURE; 363 goto end; 364 } 365 366 /* 367 * Run the tests twice, discarding the first performance 368 * results, before the cache is warmed up 369 */ 370 for (i = 0; i < 2; i++) { 371 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { 372 ret = EXIT_FAILURE; 373 goto end; 374 } 375 } 376 377 for (i = 0; i < 2; i++) { 378 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { 379 ret = EXIT_FAILURE; 380 goto end; 381 } 382 } 383 384 ctx->comp_tsc_byte = 385 (double)(ctx->comp_tsc_duration[test_data->level]) / 386 test_data->input_data_sz; 387 388 ctx->decomp_tsc_byte = 389 (double)(ctx->decomp_tsc_duration[test_data->level]) / 390 test_data->input_data_sz; 391 392 ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / 393 1000000000; 394 395 ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / 396 1000000000; 397 398 exp = 0; 399 if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0, 400 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 401 printf("\n%12s%6s%12s%17s%15s%16s\n", 402 "lcore id", "Level", "Comp size", "Comp ratio [%]", 403 "Comp [Gbps]", "Decomp [Gbps]"); 404 } 405 406 printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", 407 ctx->ver.mem.lcore_id, 408 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, 409 ctx->comp_gbps, 410 ctx->decomp_gbps); 411 412 end: 413 return ret; 414 } 415