1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_eal.h> 7 #include <rte_log.h> 8 #include <rte_cycles.h> 9 #include <rte_compressdev.h> 10 11 #include "comp_perf_test_throughput.h" 12 13 void 14 cperf_throughput_test_destructor(void *arg) 15 { 16 if (arg) { 17 comp_perf_free_memory( 18 ((struct cperf_benchmark_ctx *)arg)->ver.options, 19 &((struct cperf_benchmark_ctx *)arg)->ver.mem); 20 rte_free(arg); 21 } 22 } 23 24 void * 25 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, 26 struct comp_test_data *options) 27 { 28 struct cperf_benchmark_ctx *ctx = NULL; 29 30 ctx = rte_malloc(NULL, sizeof(struct cperf_benchmark_ctx), 0); 31 32 if (ctx == NULL) 33 return NULL; 34 35 ctx->ver.mem.dev_id = dev_id; 36 ctx->ver.mem.qp_id = qp_id; 37 ctx->ver.options = options; 38 ctx->ver.silent = 1; /* ver. part will be silent */ 39 40 if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem) 41 && !prepare_bufs(ctx->ver.options, &ctx->ver.mem)) 42 return ctx; 43 44 cperf_throughput_test_destructor(ctx); 45 return NULL; 46 } 47 48 static int 49 main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) 50 { 51 struct comp_test_data *test_data = ctx->ver.options; 52 struct cperf_mem_resources *mem = &ctx->ver.mem; 53 uint8_t dev_id = mem->dev_id; 54 uint32_t i, iter, num_iter; 55 struct rte_comp_op **ops, **deq_ops; 56 void *priv_xform = NULL; 57 struct rte_comp_xform xform; 58 struct rte_mbuf **input_bufs, **output_bufs; 59 int res = 0; 60 int allocated = 0; 61 uint32_t out_seg_sz; 62 63 if (test_data == NULL || !test_data->burst_sz) { 64 RTE_LOG(ERR, USER1, 65 "Unknown burst size\n"); 66 return -1; 67 } 68 69 ops = rte_zmalloc_socket(NULL, 70 2 * mem->total_bufs * sizeof(struct rte_comp_op *), 71 0, rte_socket_id()); 72 73 if (ops == NULL) { 74 RTE_LOG(ERR, USER1, 75 "Can't allocate memory for ops strucures\n"); 76 return -1; 77 } 78 79 deq_ops = &ops[mem->total_bufs]; 80 81 if (type == RTE_COMP_COMPRESS) { 82 xform = (struct rte_comp_xform) { 83 .type = RTE_COMP_COMPRESS, 84 .compress = { 85 .algo = RTE_COMP_ALGO_DEFLATE, 86 .deflate.huffman = test_data->huffman_enc, 87 .level = test_data->level, 88 .window_size = test_data->window_sz, 89 .chksum = RTE_COMP_CHECKSUM_NONE, 90 .hash_algo = RTE_COMP_HASH_ALGO_NONE 91 } 92 }; 93 input_bufs = mem->decomp_bufs; 94 output_bufs = mem->comp_bufs; 95 out_seg_sz = test_data->out_seg_sz; 96 } else { 97 xform = (struct rte_comp_xform) { 98 .type = RTE_COMP_DECOMPRESS, 99 .decompress = { 100 .algo = RTE_COMP_ALGO_DEFLATE, 101 .chksum = RTE_COMP_CHECKSUM_NONE, 102 .window_size = test_data->window_sz, 103 .hash_algo = RTE_COMP_HASH_ALGO_NONE 104 } 105 }; 106 input_bufs = mem->comp_bufs; 107 output_bufs = mem->decomp_bufs; 108 out_seg_sz = test_data->seg_sz; 109 } 110 111 /* Create private xform */ 112 if (rte_compressdev_private_xform_create(dev_id, &xform, 113 &priv_xform) < 0) { 114 RTE_LOG(ERR, USER1, "Private xform could not be created\n"); 115 res = -1; 116 goto end; 117 } 118 119 uint64_t tsc_start, tsc_end, tsc_duration; 120 121 num_iter = test_data->num_iter; 122 tsc_start = tsc_end = tsc_duration = 0; 123 tsc_start = rte_rdtsc_precise(); 124 125 for (iter = 0; iter < num_iter; iter++) { 126 uint32_t total_ops = mem->total_bufs; 127 uint32_t remaining_ops = mem->total_bufs; 128 uint32_t total_deq_ops = 0; 129 uint32_t total_enq_ops = 0; 130 uint16_t ops_unused = 0; 131 uint16_t num_enq = 0; 132 uint16_t num_deq = 0; 133 134 while (remaining_ops > 0) { 135 uint16_t num_ops = RTE_MIN(remaining_ops, 136 test_data->burst_sz); 137 uint16_t ops_needed = num_ops - ops_unused; 138 139 /* 140 * Move the unused operations from the previous 141 * enqueue_burst call to the front, to maintain order 142 */ 143 if ((ops_unused > 0) && (num_enq > 0)) { 144 size_t nb_b_to_mov = 145 ops_unused * sizeof(struct rte_comp_op *); 146 147 memmove(ops, &ops[num_enq], nb_b_to_mov); 148 } 149 150 /* Allocate compression operations */ 151 if (ops_needed && !rte_comp_op_bulk_alloc( 152 mem->op_pool, 153 &ops[ops_unused], 154 ops_needed)) { 155 RTE_LOG(ERR, USER1, 156 "Could not allocate enough operations\n"); 157 res = -1; 158 goto end; 159 } 160 allocated += ops_needed; 161 162 for (i = 0; i < ops_needed; i++) { 163 /* 164 * Calculate next buffer to attach to operation 165 */ 166 uint32_t buf_id = total_enq_ops + i + 167 ops_unused; 168 uint16_t op_id = ops_unused + i; 169 /* Reset all data in output buffers */ 170 struct rte_mbuf *m = output_bufs[buf_id]; 171 172 m->pkt_len = out_seg_sz * m->nb_segs; 173 while (m) { 174 m->data_len = m->buf_len - m->data_off; 175 m = m->next; 176 } 177 ops[op_id]->m_src = input_bufs[buf_id]; 178 ops[op_id]->m_dst = output_bufs[buf_id]; 179 ops[op_id]->src.offset = 0; 180 ops[op_id]->src.length = 181 rte_pktmbuf_pkt_len(input_bufs[buf_id]); 182 ops[op_id]->dst.offset = 0; 183 ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL; 184 ops[op_id]->input_chksum = buf_id; 185 ops[op_id]->private_xform = priv_xform; 186 } 187 188 if (unlikely(test_data->perf_comp_force_stop)) 189 goto end; 190 191 num_enq = rte_compressdev_enqueue_burst(dev_id, 192 mem->qp_id, ops, 193 num_ops); 194 if (num_enq == 0) { 195 struct rte_compressdev_stats stats; 196 197 rte_compressdev_stats_get(dev_id, &stats); 198 if (stats.enqueue_err_count) { 199 res = -1; 200 goto end; 201 } 202 } 203 204 ops_unused = num_ops - num_enq; 205 remaining_ops -= num_enq; 206 total_enq_ops += num_enq; 207 208 num_deq = rte_compressdev_dequeue_burst(dev_id, 209 mem->qp_id, 210 deq_ops, 211 test_data->burst_sz); 212 total_deq_ops += num_deq; 213 214 if (iter == num_iter - 1) { 215 for (i = 0; i < num_deq; i++) { 216 struct rte_comp_op *op = deq_ops[i]; 217 218 if (op->status != 219 RTE_COMP_OP_STATUS_SUCCESS) { 220 RTE_LOG(ERR, USER1, 221 "Some operations were not successful\n"); 222 goto end; 223 } 224 225 struct rte_mbuf *m = op->m_dst; 226 227 m->pkt_len = op->produced; 228 uint32_t remaining_data = op->produced; 229 uint16_t data_to_append; 230 231 while (remaining_data > 0) { 232 data_to_append = 233 RTE_MIN(remaining_data, 234 out_seg_sz); 235 m->data_len = data_to_append; 236 remaining_data -= 237 data_to_append; 238 m = m->next; 239 } 240 } 241 } 242 rte_mempool_put_bulk(mem->op_pool, 243 (void **)deq_ops, num_deq); 244 allocated -= num_deq; 245 } 246 247 /* Dequeue the last operations */ 248 while (total_deq_ops < total_ops) { 249 if (unlikely(test_data->perf_comp_force_stop)) 250 goto end; 251 252 num_deq = rte_compressdev_dequeue_burst(dev_id, 253 mem->qp_id, 254 deq_ops, 255 test_data->burst_sz); 256 if (num_deq == 0) { 257 struct rte_compressdev_stats stats; 258 259 rte_compressdev_stats_get(dev_id, &stats); 260 if (stats.dequeue_err_count) { 261 res = -1; 262 goto end; 263 } 264 } 265 266 total_deq_ops += num_deq; 267 268 if (iter == num_iter - 1) { 269 for (i = 0; i < num_deq; i++) { 270 struct rte_comp_op *op = deq_ops[i]; 271 272 if (op->status != 273 RTE_COMP_OP_STATUS_SUCCESS) { 274 RTE_LOG(ERR, USER1, 275 "Some operations were not successful\n"); 276 goto end; 277 } 278 279 struct rte_mbuf *m = op->m_dst; 280 281 m->pkt_len = op->produced; 282 uint32_t remaining_data = op->produced; 283 uint16_t data_to_append; 284 285 while (remaining_data > 0) { 286 data_to_append = 287 RTE_MIN(remaining_data, 288 out_seg_sz); 289 m->data_len = data_to_append; 290 remaining_data -= 291 data_to_append; 292 m = m->next; 293 } 294 } 295 } 296 rte_mempool_put_bulk(mem->op_pool, 297 (void **)deq_ops, num_deq); 298 allocated -= num_deq; 299 } 300 } 301 302 tsc_end = rte_rdtsc_precise(); 303 tsc_duration = tsc_end - tsc_start; 304 305 if (type == RTE_COMP_COMPRESS) 306 ctx->comp_tsc_duration[test_data->level] = 307 tsc_duration / num_iter; 308 else 309 ctx->decomp_tsc_duration[test_data->level] = 310 tsc_duration / num_iter; 311 312 end: 313 rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated); 314 rte_compressdev_private_xform_free(dev_id, priv_xform); 315 rte_free(ops); 316 317 if (test_data->perf_comp_force_stop) { 318 RTE_LOG(ERR, USER1, 319 "lcore: %d Perf. test has been aborted by user\n", 320 mem->lcore_id); 321 res = -1; 322 } 323 return res; 324 } 325 326 int 327 cperf_throughput_test_runner(void *test_ctx) 328 { 329 struct cperf_benchmark_ctx *ctx = test_ctx; 330 struct comp_test_data *test_data = ctx->ver.options; 331 uint32_t lcore = rte_lcore_id(); 332 static uint16_t display_once; 333 int i, ret = EXIT_SUCCESS; 334 335 ctx->ver.mem.lcore_id = lcore; 336 337 uint16_t exp = 0; 338 /* 339 * printing information about current compression thread 340 */ 341 if (__atomic_compare_exchange_n(&ctx->ver.mem.print_info_once, &exp, 342 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 343 printf(" lcore: %u," 344 " driver name: %s," 345 " device name: %s," 346 " device id: %u," 347 " socket id: %u," 348 " queue pair id: %u\n", 349 lcore, 350 ctx->ver.options->driver_name, 351 rte_compressdev_name_get(ctx->ver.mem.dev_id), 352 ctx->ver.mem.dev_id, 353 rte_compressdev_socket_id(ctx->ver.mem.dev_id), 354 ctx->ver.mem.qp_id); 355 356 /* 357 * First the verification part is needed 358 */ 359 if (cperf_verify_test_runner(&ctx->ver)) { 360 ret = EXIT_FAILURE; 361 goto end; 362 } 363 364 /* 365 * Run the tests twice, discarding the first performance 366 * results, before the cache is warmed up 367 */ 368 for (i = 0; i < 2; i++) { 369 if (main_loop(ctx, RTE_COMP_COMPRESS) < 0) { 370 ret = EXIT_FAILURE; 371 goto end; 372 } 373 } 374 375 for (i = 0; i < 2; i++) { 376 if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0) { 377 ret = EXIT_FAILURE; 378 goto end; 379 } 380 } 381 382 ctx->comp_tsc_byte = 383 (double)(ctx->comp_tsc_duration[test_data->level]) / 384 test_data->input_data_sz; 385 386 ctx->decomp_tsc_byte = 387 (double)(ctx->decomp_tsc_duration[test_data->level]) / 388 test_data->input_data_sz; 389 390 ctx->comp_gbps = rte_get_tsc_hz() / ctx->comp_tsc_byte * 8 / 391 1000000000; 392 393 ctx->decomp_gbps = rte_get_tsc_hz() / ctx->decomp_tsc_byte * 8 / 394 1000000000; 395 396 exp = 0; 397 if (__atomic_compare_exchange_n(&display_once, &exp, 1, 0, 398 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 399 printf("\n%12s%6s%12s%17s%15s%16s\n", 400 "lcore id", "Level", "Comp size", "Comp ratio [%]", 401 "Comp [Gbps]", "Decomp [Gbps]"); 402 } 403 404 printf("%12u%6u%12zu%17.2f%15.2f%16.2f\n", 405 ctx->ver.mem.lcore_id, 406 test_data->level, ctx->ver.comp_data_sz, ctx->ver.ratio, 407 ctx->comp_gbps, 408 ctx->decomp_gbps); 409 410 end: 411 return ret; 412 } 413