1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <inttypes.h> 7 #include <math.h> 8 9 #include <rte_eal.h> 10 #include <rte_common.h> 11 #include <rte_dev.h> 12 #include <rte_launch.h> 13 #include <rte_bbdev.h> 14 #include <rte_cycles.h> 15 #include <rte_lcore.h> 16 #include <rte_malloc.h> 17 #include <rte_random.h> 18 #include <rte_hexdump.h> 19 #include <rte_interrupts.h> 20 21 #include "main.h" 22 #include "test_bbdev_vector.h" 23 24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 25 26 #define MAX_QUEUES RTE_MAX_LCORE 27 #define TEST_REPETITIONS 1000 28 29 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC 30 #include <fpga_lte_fec.h> 31 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 32 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 33 #define VF_UL_4G_QUEUE_VALUE 4 34 #define VF_DL_4G_QUEUE_VALUE 4 35 #define UL_4G_BANDWIDTH 3 36 #define DL_4G_BANDWIDTH 3 37 #define UL_4G_LOAD_BALANCE 128 38 #define DL_4G_LOAD_BALANCE 128 39 #define FLR_4G_TIMEOUT 610 40 #endif 41 42 #define OPS_CACHE_SIZE 256U 43 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 44 45 #define SYNC_WAIT 0 46 #define SYNC_START 1 47 #define INVALID_OPAQUE -1 48 49 #define INVALID_QUEUE_ID -1 50 /* Increment for next code block in external HARQ memory */ 51 #define HARQ_INCR 32768 52 /* Headroom for filler LLRs insertion in HARQ buffer */ 53 #define FILLER_HEADROOM 1024 54 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 55 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 56 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 57 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 58 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 59 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 60 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 61 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 62 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 63 64 static struct test_bbdev_vector test_vector; 65 66 /* Switch between PMD and Interrupt for throughput TC */ 67 static bool intr_enabled; 68 69 /* LLR arithmetic representation for numerical conversion */ 70 static int ldpc_llr_decimals; 71 static int ldpc_llr_size; 72 /* Keep track of the LDPC decoder device capability flag */ 73 static uint32_t ldpc_cap_flags; 74 75 /* Represents tested active devices */ 76 static struct active_device { 77 const char *driver_name; 78 uint8_t dev_id; 79 uint16_t supported_ops; 80 uint16_t queue_ids[MAX_QUEUES]; 81 uint16_t nb_queues; 82 struct rte_mempool *ops_mempool; 83 struct rte_mempool *in_mbuf_pool; 84 struct rte_mempool *hard_out_mbuf_pool; 85 struct rte_mempool *soft_out_mbuf_pool; 86 struct rte_mempool *harq_in_mbuf_pool; 87 struct rte_mempool *harq_out_mbuf_pool; 88 } active_devs[RTE_BBDEV_MAX_DEVS]; 89 90 static uint8_t nb_active_devs; 91 92 /* Data buffers used by BBDEV ops */ 93 struct test_buffers { 94 struct rte_bbdev_op_data *inputs; 95 struct rte_bbdev_op_data *hard_outputs; 96 struct rte_bbdev_op_data *soft_outputs; 97 struct rte_bbdev_op_data *harq_inputs; 98 struct rte_bbdev_op_data *harq_outputs; 99 }; 100 101 /* Operation parameters specific for given test case */ 102 struct test_op_params { 103 struct rte_mempool *mp; 104 struct rte_bbdev_dec_op *ref_dec_op; 105 struct rte_bbdev_enc_op *ref_enc_op; 106 uint16_t burst_sz; 107 uint16_t num_to_process; 108 uint16_t num_lcores; 109 int vector_mask; 110 rte_atomic16_t sync; 111 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 112 }; 113 114 /* Contains per lcore params */ 115 struct thread_params { 116 uint8_t dev_id; 117 uint16_t queue_id; 118 uint32_t lcore_id; 119 uint64_t start_time; 120 double ops_per_sec; 121 double mbps; 122 uint8_t iter_count; 123 double iter_average; 124 double bler; 125 rte_atomic16_t nb_dequeued; 126 rte_atomic16_t processing_status; 127 rte_atomic16_t burst_sz; 128 struct test_op_params *op_params; 129 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 130 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 131 }; 132 133 #ifdef RTE_BBDEV_OFFLOAD_COST 134 /* Stores time statistics */ 135 struct test_time_stats { 136 /* Stores software enqueue total working time */ 137 uint64_t enq_sw_total_time; 138 /* Stores minimum value of software enqueue working time */ 139 uint64_t enq_sw_min_time; 140 /* Stores maximum value of software enqueue working time */ 141 uint64_t enq_sw_max_time; 142 /* Stores turbo enqueue total working time */ 143 uint64_t enq_acc_total_time; 144 /* Stores minimum value of accelerator enqueue working time */ 145 uint64_t enq_acc_min_time; 146 /* Stores maximum value of accelerator enqueue working time */ 147 uint64_t enq_acc_max_time; 148 /* Stores dequeue total working time */ 149 uint64_t deq_total_time; 150 /* Stores minimum value of dequeue working time */ 151 uint64_t deq_min_time; 152 /* Stores maximum value of dequeue working time */ 153 uint64_t deq_max_time; 154 }; 155 #endif 156 157 typedef int (test_case_function)(struct active_device *ad, 158 struct test_op_params *op_params); 159 160 static inline void 161 mbuf_reset(struct rte_mbuf *m) 162 { 163 m->pkt_len = 0; 164 165 do { 166 m->data_len = 0; 167 m = m->next; 168 } while (m != NULL); 169 } 170 171 /* Read flag value 0/1 from bitmap */ 172 static inline bool 173 check_bit(uint32_t bitmap, uint32_t bitmask) 174 { 175 return bitmap & bitmask; 176 } 177 178 static inline void 179 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 180 { 181 ad->supported_ops |= (1 << op_type); 182 } 183 184 static inline bool 185 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 186 { 187 return ad->supported_ops & (1 << op_type); 188 } 189 190 static inline bool 191 flags_match(uint32_t flags_req, uint32_t flags_present) 192 { 193 return (flags_req & flags_present) == flags_req; 194 } 195 196 static void 197 clear_soft_out_cap(uint32_t *op_flags) 198 { 199 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 200 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 201 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 202 } 203 204 static int 205 check_dev_cap(const struct rte_bbdev_info *dev_info) 206 { 207 unsigned int i; 208 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 209 nb_harq_inputs, nb_harq_outputs; 210 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 211 212 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 213 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 214 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 215 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 216 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 217 218 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 219 if (op_cap->type != test_vector.op_type) 220 continue; 221 222 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 223 const struct rte_bbdev_op_cap_turbo_dec *cap = 224 &op_cap->cap.turbo_dec; 225 /* Ignore lack of soft output capability, just skip 226 * checking if soft output is valid. 227 */ 228 if ((test_vector.turbo_dec.op_flags & 229 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 230 !(cap->capability_flags & 231 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 232 printf( 233 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 234 dev_info->dev_name); 235 clear_soft_out_cap( 236 &test_vector.turbo_dec.op_flags); 237 } 238 239 if (!flags_match(test_vector.turbo_dec.op_flags, 240 cap->capability_flags)) 241 return TEST_FAILED; 242 if (nb_inputs > cap->num_buffers_src) { 243 printf("Too many inputs defined: %u, max: %u\n", 244 nb_inputs, cap->num_buffers_src); 245 return TEST_FAILED; 246 } 247 if (nb_soft_outputs > cap->num_buffers_soft_out && 248 (test_vector.turbo_dec.op_flags & 249 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 250 printf( 251 "Too many soft outputs defined: %u, max: %u\n", 252 nb_soft_outputs, 253 cap->num_buffers_soft_out); 254 return TEST_FAILED; 255 } 256 if (nb_hard_outputs > cap->num_buffers_hard_out) { 257 printf( 258 "Too many hard outputs defined: %u, max: %u\n", 259 nb_hard_outputs, 260 cap->num_buffers_hard_out); 261 return TEST_FAILED; 262 } 263 if (intr_enabled && !(cap->capability_flags & 264 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 265 printf( 266 "Dequeue interrupts are not supported!\n"); 267 return TEST_FAILED; 268 } 269 270 return TEST_SUCCESS; 271 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 272 const struct rte_bbdev_op_cap_turbo_enc *cap = 273 &op_cap->cap.turbo_enc; 274 275 if (!flags_match(test_vector.turbo_enc.op_flags, 276 cap->capability_flags)) 277 return TEST_FAILED; 278 if (nb_inputs > cap->num_buffers_src) { 279 printf("Too many inputs defined: %u, max: %u\n", 280 nb_inputs, cap->num_buffers_src); 281 return TEST_FAILED; 282 } 283 if (nb_hard_outputs > cap->num_buffers_dst) { 284 printf( 285 "Too many hard outputs defined: %u, max: %u\n", 286 nb_hard_outputs, cap->num_buffers_dst); 287 return TEST_FAILED; 288 } 289 if (intr_enabled && !(cap->capability_flags & 290 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 291 printf( 292 "Dequeue interrupts are not supported!\n"); 293 return TEST_FAILED; 294 } 295 296 return TEST_SUCCESS; 297 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 298 const struct rte_bbdev_op_cap_ldpc_enc *cap = 299 &op_cap->cap.ldpc_enc; 300 301 if (!flags_match(test_vector.ldpc_enc.op_flags, 302 cap->capability_flags)){ 303 printf("Flag Mismatch\n"); 304 return TEST_FAILED; 305 } 306 if (nb_inputs > cap->num_buffers_src) { 307 printf("Too many inputs defined: %u, max: %u\n", 308 nb_inputs, cap->num_buffers_src); 309 return TEST_FAILED; 310 } 311 if (nb_hard_outputs > cap->num_buffers_dst) { 312 printf( 313 "Too many hard outputs defined: %u, max: %u\n", 314 nb_hard_outputs, cap->num_buffers_dst); 315 return TEST_FAILED; 316 } 317 if (intr_enabled && !(cap->capability_flags & 318 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 319 printf( 320 "Dequeue interrupts are not supported!\n"); 321 return TEST_FAILED; 322 } 323 324 return TEST_SUCCESS; 325 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 326 const struct rte_bbdev_op_cap_ldpc_dec *cap = 327 &op_cap->cap.ldpc_dec; 328 329 if (!flags_match(test_vector.ldpc_dec.op_flags, 330 cap->capability_flags)){ 331 printf("Flag Mismatch\n"); 332 return TEST_FAILED; 333 } 334 if (nb_inputs > cap->num_buffers_src) { 335 printf("Too many inputs defined: %u, max: %u\n", 336 nb_inputs, cap->num_buffers_src); 337 return TEST_FAILED; 338 } 339 if (nb_hard_outputs > cap->num_buffers_hard_out) { 340 printf( 341 "Too many hard outputs defined: %u, max: %u\n", 342 nb_hard_outputs, 343 cap->num_buffers_hard_out); 344 return TEST_FAILED; 345 } 346 if (nb_harq_inputs > cap->num_buffers_hard_out) { 347 printf( 348 "Too many HARQ inputs defined: %u, max: %u\n", 349 nb_hard_outputs, 350 cap->num_buffers_hard_out); 351 return TEST_FAILED; 352 } 353 if (nb_harq_outputs > cap->num_buffers_hard_out) { 354 printf( 355 "Too many HARQ outputs defined: %u, max: %u\n", 356 nb_hard_outputs, 357 cap->num_buffers_hard_out); 358 return TEST_FAILED; 359 } 360 if (intr_enabled && !(cap->capability_flags & 361 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 362 printf( 363 "Dequeue interrupts are not supported!\n"); 364 return TEST_FAILED; 365 } 366 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 367 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 368 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 369 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 370 ))) { 371 printf("Skip loop-back with interrupt\n"); 372 return TEST_FAILED; 373 } 374 return TEST_SUCCESS; 375 } 376 } 377 378 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 379 return TEST_SUCCESS; /* Special case for NULL device */ 380 381 return TEST_FAILED; 382 } 383 384 /* calculates optimal mempool size not smaller than the val */ 385 static unsigned int 386 optimal_mempool_size(unsigned int val) 387 { 388 return rte_align32pow2(val + 1) - 1; 389 } 390 391 /* allocates mbuf mempool for inputs and outputs */ 392 static struct rte_mempool * 393 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 394 int socket_id, unsigned int mbuf_pool_size, 395 const char *op_type_str) 396 { 397 unsigned int i; 398 uint32_t max_seg_sz = 0; 399 char pool_name[RTE_MEMPOOL_NAMESIZE]; 400 401 /* find max input segment size */ 402 for (i = 0; i < entries->nb_segments; ++i) 403 if (entries->segments[i].length > max_seg_sz) 404 max_seg_sz = entries->segments[i].length; 405 406 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 407 dev_id); 408 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 409 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 410 + FILLER_HEADROOM, 411 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 412 } 413 414 static int 415 create_mempools(struct active_device *ad, int socket_id, 416 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 417 { 418 struct rte_mempool *mp; 419 unsigned int ops_pool_size, mbuf_pool_size = 0; 420 char pool_name[RTE_MEMPOOL_NAMESIZE]; 421 const char *op_type_str; 422 enum rte_bbdev_op_type op_type = org_op_type; 423 424 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 425 struct op_data_entries *hard_out = 426 &test_vector.entries[DATA_HARD_OUTPUT]; 427 struct op_data_entries *soft_out = 428 &test_vector.entries[DATA_SOFT_OUTPUT]; 429 struct op_data_entries *harq_in = 430 &test_vector.entries[DATA_HARQ_INPUT]; 431 struct op_data_entries *harq_out = 432 &test_vector.entries[DATA_HARQ_OUTPUT]; 433 434 /* allocate ops mempool */ 435 ops_pool_size = optimal_mempool_size(RTE_MAX( 436 /* Ops used plus 1 reference op */ 437 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 438 /* Minimal cache size plus 1 reference op */ 439 (unsigned int)(1.5 * rte_lcore_count() * 440 OPS_CACHE_SIZE + 1)), 441 OPS_POOL_SIZE_MIN)); 442 443 if (org_op_type == RTE_BBDEV_OP_NONE) 444 op_type = RTE_BBDEV_OP_TURBO_ENC; 445 446 op_type_str = rte_bbdev_op_type_str(op_type); 447 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 448 449 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 450 ad->dev_id); 451 mp = rte_bbdev_op_pool_create(pool_name, op_type, 452 ops_pool_size, OPS_CACHE_SIZE, socket_id); 453 TEST_ASSERT_NOT_NULL(mp, 454 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 455 ops_pool_size, 456 ad->dev_id, 457 socket_id); 458 ad->ops_mempool = mp; 459 460 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 461 if (org_op_type == RTE_BBDEV_OP_NONE) 462 return TEST_SUCCESS; 463 464 /* Inputs */ 465 if (in->nb_segments > 0) { 466 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 467 in->nb_segments); 468 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 469 mbuf_pool_size, "in"); 470 TEST_ASSERT_NOT_NULL(mp, 471 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 472 mbuf_pool_size, 473 ad->dev_id, 474 socket_id); 475 ad->in_mbuf_pool = mp; 476 } 477 478 /* Hard outputs */ 479 if (hard_out->nb_segments > 0) { 480 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 481 hard_out->nb_segments); 482 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 483 mbuf_pool_size, 484 "hard_out"); 485 TEST_ASSERT_NOT_NULL(mp, 486 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 487 mbuf_pool_size, 488 ad->dev_id, 489 socket_id); 490 ad->hard_out_mbuf_pool = mp; 491 } 492 493 /* Soft outputs */ 494 if (soft_out->nb_segments > 0) { 495 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 496 soft_out->nb_segments); 497 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 498 mbuf_pool_size, 499 "soft_out"); 500 TEST_ASSERT_NOT_NULL(mp, 501 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 502 mbuf_pool_size, 503 ad->dev_id, 504 socket_id); 505 ad->soft_out_mbuf_pool = mp; 506 } 507 508 /* HARQ inputs */ 509 if (harq_in->nb_segments > 0) { 510 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 511 harq_in->nb_segments); 512 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 513 mbuf_pool_size, 514 "harq_in"); 515 TEST_ASSERT_NOT_NULL(mp, 516 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 517 mbuf_pool_size, 518 ad->dev_id, 519 socket_id); 520 ad->harq_in_mbuf_pool = mp; 521 } 522 523 /* HARQ outputs */ 524 if (harq_out->nb_segments > 0) { 525 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 526 harq_out->nb_segments); 527 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 528 mbuf_pool_size, 529 "harq_out"); 530 TEST_ASSERT_NOT_NULL(mp, 531 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 532 mbuf_pool_size, 533 ad->dev_id, 534 socket_id); 535 ad->harq_out_mbuf_pool = mp; 536 } 537 538 return TEST_SUCCESS; 539 } 540 541 static int 542 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 543 struct test_bbdev_vector *vector) 544 { 545 int ret; 546 unsigned int queue_id; 547 struct rte_bbdev_queue_conf qconf; 548 struct active_device *ad = &active_devs[nb_active_devs]; 549 unsigned int nb_queues; 550 enum rte_bbdev_op_type op_type = vector->op_type; 551 552 /* Configure fpga lte fec with PF & VF values 553 * if '-i' flag is set and using fpga device 554 */ 555 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC 556 if ((get_init_device() == true) && 557 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 558 struct fpga_lte_fec_conf conf; 559 unsigned int i; 560 561 printf("Configure FPGA LTE FEC Driver %s with default values\n", 562 info->drv.driver_name); 563 564 /* clear default configuration before initialization */ 565 memset(&conf, 0, sizeof(struct fpga_lte_fec_conf)); 566 567 /* Set PF mode : 568 * true if PF is used for data plane 569 * false for VFs 570 */ 571 conf.pf_mode_en = true; 572 573 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 574 /* Number of UL queues per VF (fpga supports 8 VFs) */ 575 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 576 /* Number of DL queues per VF (fpga supports 8 VFs) */ 577 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 578 } 579 580 /* UL bandwidth. Needed for schedule algorithm */ 581 conf.ul_bandwidth = UL_4G_BANDWIDTH; 582 /* DL bandwidth */ 583 conf.dl_bandwidth = DL_4G_BANDWIDTH; 584 585 /* UL & DL load Balance Factor to 64 */ 586 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 587 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 588 589 /**< FLR timeout value */ 590 conf.flr_time_out = FLR_4G_TIMEOUT; 591 592 /* setup FPGA PF with configuration information */ 593 ret = fpga_lte_fec_configure(info->dev_name, &conf); 594 TEST_ASSERT_SUCCESS(ret, 595 "Failed to configure 4G FPGA PF for bbdev %s", 596 info->dev_name); 597 } 598 #endif 599 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 600 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 601 602 /* setup device */ 603 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 604 if (ret < 0) { 605 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 606 dev_id, nb_queues, info->socket_id, ret); 607 return TEST_FAILED; 608 } 609 610 /* configure interrupts if needed */ 611 if (intr_enabled) { 612 ret = rte_bbdev_intr_enable(dev_id); 613 if (ret < 0) { 614 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 615 ret); 616 return TEST_FAILED; 617 } 618 } 619 620 /* setup device queues */ 621 qconf.socket = info->socket_id; 622 qconf.queue_size = info->drv.default_queue_conf.queue_size; 623 qconf.priority = 0; 624 qconf.deferred_start = 0; 625 qconf.op_type = op_type; 626 627 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 628 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 629 if (ret != 0) { 630 printf( 631 "Allocated all queues (id=%u) at prio%u on dev%u\n", 632 queue_id, qconf.priority, dev_id); 633 qconf.priority++; 634 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 635 &qconf); 636 } 637 if (ret != 0) { 638 printf("All queues on dev %u allocated: %u\n", 639 dev_id, queue_id); 640 break; 641 } 642 ad->queue_ids[queue_id] = queue_id; 643 } 644 TEST_ASSERT(queue_id != 0, 645 "ERROR Failed to configure any queues on dev %u", 646 dev_id); 647 ad->nb_queues = queue_id; 648 649 set_avail_op(ad, op_type); 650 651 return TEST_SUCCESS; 652 } 653 654 static int 655 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 656 struct test_bbdev_vector *vector) 657 { 658 int ret; 659 660 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 661 active_devs[nb_active_devs].dev_id = dev_id; 662 663 ret = add_bbdev_dev(dev_id, info, vector); 664 if (ret == TEST_SUCCESS) 665 ++nb_active_devs; 666 return ret; 667 } 668 669 static uint8_t 670 populate_active_devices(void) 671 { 672 int ret; 673 uint8_t dev_id; 674 uint8_t nb_devs_added = 0; 675 struct rte_bbdev_info info; 676 677 RTE_BBDEV_FOREACH(dev_id) { 678 rte_bbdev_info_get(dev_id, &info); 679 680 if (check_dev_cap(&info)) { 681 printf( 682 "Device %d (%s) does not support specified capabilities\n", 683 dev_id, info.dev_name); 684 continue; 685 } 686 687 ret = add_active_device(dev_id, &info, &test_vector); 688 if (ret != 0) { 689 printf("Adding active bbdev %s skipped\n", 690 info.dev_name); 691 continue; 692 } 693 nb_devs_added++; 694 } 695 696 return nb_devs_added; 697 } 698 699 static int 700 read_test_vector(void) 701 { 702 int ret; 703 704 memset(&test_vector, 0, sizeof(test_vector)); 705 printf("Test vector file = %s\n", get_vector_filename()); 706 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 707 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 708 get_vector_filename()); 709 710 return TEST_SUCCESS; 711 } 712 713 static int 714 testsuite_setup(void) 715 { 716 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 717 718 if (populate_active_devices() == 0) { 719 printf("No suitable devices found!\n"); 720 return TEST_SKIPPED; 721 } 722 723 return TEST_SUCCESS; 724 } 725 726 static int 727 interrupt_testsuite_setup(void) 728 { 729 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 730 731 /* Enable interrupts */ 732 intr_enabled = true; 733 734 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 735 if (populate_active_devices() == 0 || 736 test_vector.op_type == RTE_BBDEV_OP_NONE) { 737 intr_enabled = false; 738 printf("No suitable devices found!\n"); 739 return TEST_SKIPPED; 740 } 741 742 return TEST_SUCCESS; 743 } 744 745 static void 746 testsuite_teardown(void) 747 { 748 uint8_t dev_id; 749 750 /* Unconfigure devices */ 751 RTE_BBDEV_FOREACH(dev_id) 752 rte_bbdev_close(dev_id); 753 754 /* Clear active devices structs. */ 755 memset(active_devs, 0, sizeof(active_devs)); 756 nb_active_devs = 0; 757 758 /* Disable interrupts */ 759 intr_enabled = false; 760 } 761 762 static int 763 ut_setup(void) 764 { 765 uint8_t i, dev_id; 766 767 for (i = 0; i < nb_active_devs; i++) { 768 dev_id = active_devs[i].dev_id; 769 /* reset bbdev stats */ 770 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 771 "Failed to reset stats of bbdev %u", dev_id); 772 /* start the device */ 773 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 774 "Failed to start bbdev %u", dev_id); 775 } 776 777 return TEST_SUCCESS; 778 } 779 780 static void 781 ut_teardown(void) 782 { 783 uint8_t i, dev_id; 784 struct rte_bbdev_stats stats; 785 786 for (i = 0; i < nb_active_devs; i++) { 787 dev_id = active_devs[i].dev_id; 788 /* read stats and print */ 789 rte_bbdev_stats_get(dev_id, &stats); 790 /* Stop the device */ 791 rte_bbdev_stop(dev_id); 792 } 793 } 794 795 static int 796 init_op_data_objs(struct rte_bbdev_op_data *bufs, 797 struct op_data_entries *ref_entries, 798 struct rte_mempool *mbuf_pool, const uint16_t n, 799 enum op_data_type op_type, uint16_t min_alignment) 800 { 801 int ret; 802 unsigned int i, j; 803 bool large_input = false; 804 805 for (i = 0; i < n; ++i) { 806 char *data; 807 struct op_data_buf *seg = &ref_entries->segments[0]; 808 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 809 TEST_ASSERT_NOT_NULL(m_head, 810 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 811 op_type, n * ref_entries->nb_segments, 812 mbuf_pool->size); 813 814 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) { 815 /* 816 * Special case when DPDK mbuf cannot handle 817 * the required input size 818 */ 819 printf("Warning: Larger input size than DPDK mbuf %d\n", 820 seg->length); 821 large_input = true; 822 } 823 bufs[i].data = m_head; 824 bufs[i].offset = 0; 825 bufs[i].length = 0; 826 827 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 828 if ((op_type == DATA_INPUT) && large_input) { 829 /* Allocate a fake overused mbuf */ 830 data = rte_malloc(NULL, seg->length, 0); 831 memcpy(data, seg->addr, seg->length); 832 m_head->buf_addr = data; 833 m_head->buf_iova = rte_malloc_virt2iova(data); 834 m_head->data_off = 0; 835 m_head->data_len = seg->length; 836 } else { 837 data = rte_pktmbuf_append(m_head, seg->length); 838 TEST_ASSERT_NOT_NULL(data, 839 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 840 seg->length, op_type); 841 842 TEST_ASSERT(data == RTE_PTR_ALIGN( 843 data, min_alignment), 844 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 845 data, min_alignment); 846 rte_memcpy(data, seg->addr, seg->length); 847 } 848 849 bufs[i].length += seg->length; 850 851 for (j = 1; j < ref_entries->nb_segments; ++j) { 852 struct rte_mbuf *m_tail = 853 rte_pktmbuf_alloc(mbuf_pool); 854 TEST_ASSERT_NOT_NULL(m_tail, 855 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 856 op_type, 857 n * ref_entries->nb_segments, 858 mbuf_pool->size); 859 seg += 1; 860 861 data = rte_pktmbuf_append(m_tail, seg->length); 862 TEST_ASSERT_NOT_NULL(data, 863 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 864 seg->length, op_type); 865 866 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 867 min_alignment), 868 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 869 data, min_alignment); 870 rte_memcpy(data, seg->addr, seg->length); 871 bufs[i].length += seg->length; 872 873 ret = rte_pktmbuf_chain(m_head, m_tail); 874 TEST_ASSERT_SUCCESS(ret, 875 "Couldn't chain mbufs from %d data type mbuf pool", 876 op_type); 877 } 878 } else { 879 880 /* allocate chained-mbuf for output buffer */ 881 for (j = 1; j < ref_entries->nb_segments; ++j) { 882 struct rte_mbuf *m_tail = 883 rte_pktmbuf_alloc(mbuf_pool); 884 TEST_ASSERT_NOT_NULL(m_tail, 885 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 886 op_type, 887 n * ref_entries->nb_segments, 888 mbuf_pool->size); 889 890 ret = rte_pktmbuf_chain(m_head, m_tail); 891 TEST_ASSERT_SUCCESS(ret, 892 "Couldn't chain mbufs from %d data type mbuf pool", 893 op_type); 894 } 895 } 896 } 897 898 return 0; 899 } 900 901 static int 902 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 903 const int socket) 904 { 905 int i; 906 907 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 908 if (*buffers == NULL) { 909 printf("WARNING: Failed to allocate op_data on socket %d\n", 910 socket); 911 /* try to allocate memory on other detected sockets */ 912 for (i = 0; i < socket; i++) { 913 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 914 if (*buffers != NULL) 915 break; 916 } 917 } 918 919 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 920 } 921 922 static void 923 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 924 const uint16_t n, const int8_t max_llr_modulus) 925 { 926 uint16_t i, byte_idx; 927 928 for (i = 0; i < n; ++i) { 929 struct rte_mbuf *m = input_ops[i].data; 930 while (m != NULL) { 931 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 932 input_ops[i].offset); 933 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 934 ++byte_idx) 935 llr[byte_idx] = round((double)max_llr_modulus * 936 llr[byte_idx] / INT8_MAX); 937 938 m = m->next; 939 } 940 } 941 } 942 943 /* 944 * We may have to insert filler bits 945 * when they are required by the HARQ assumption 946 */ 947 static void 948 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 949 const uint16_t n, struct test_op_params *op_params) 950 { 951 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 952 953 if (input_ops == NULL) 954 return; 955 /* No need to add filler if not required by device */ 956 if (!(ldpc_cap_flags & 957 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 958 return; 959 /* No need to add filler for loopback operation */ 960 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 961 return; 962 963 uint16_t i, j, parity_offset; 964 for (i = 0; i < n; ++i) { 965 struct rte_mbuf *m = input_ops[i].data; 966 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 967 input_ops[i].offset); 968 parity_offset = (dec.basegraph == 1 ? 20 : 8) 969 * dec.z_c - dec.n_filler; 970 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 971 m->data_len = new_hin_size; 972 input_ops[i].length = new_hin_size; 973 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 974 j--) 975 llr[j] = llr[j - dec.n_filler]; 976 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 977 for (j = 0; j < dec.n_filler; j++) 978 llr[parity_offset + j] = llr_max_pre_scaling; 979 } 980 } 981 982 static void 983 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 984 const uint16_t n, const int8_t llr_size, 985 const int8_t llr_decimals) 986 { 987 if (input_ops == NULL) 988 return; 989 990 uint16_t i, byte_idx; 991 992 int16_t llr_max, llr_min, llr_tmp; 993 llr_max = (1 << (llr_size - 1)) - 1; 994 llr_min = -llr_max; 995 for (i = 0; i < n; ++i) { 996 struct rte_mbuf *m = input_ops[i].data; 997 while (m != NULL) { 998 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 999 input_ops[i].offset); 1000 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1001 ++byte_idx) { 1002 1003 llr_tmp = llr[byte_idx]; 1004 if (llr_decimals == 4) 1005 llr_tmp *= 8; 1006 else if (llr_decimals == 2) 1007 llr_tmp *= 2; 1008 else if (llr_decimals == 0) 1009 llr_tmp /= 2; 1010 llr_tmp = RTE_MIN(llr_max, 1011 RTE_MAX(llr_min, llr_tmp)); 1012 llr[byte_idx] = (int8_t) llr_tmp; 1013 } 1014 1015 m = m->next; 1016 } 1017 } 1018 } 1019 1020 1021 1022 static int 1023 fill_queue_buffers(struct test_op_params *op_params, 1024 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1025 struct rte_mempool *soft_out_mp, 1026 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1027 uint16_t queue_id, 1028 const struct rte_bbdev_op_cap *capabilities, 1029 uint16_t min_alignment, const int socket_id) 1030 { 1031 int ret; 1032 enum op_data_type type; 1033 const uint16_t n = op_params->num_to_process; 1034 1035 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1036 in_mp, 1037 soft_out_mp, 1038 hard_out_mp, 1039 harq_in_mp, 1040 harq_out_mp, 1041 }; 1042 1043 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1044 &op_params->q_bufs[socket_id][queue_id].inputs, 1045 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1046 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1047 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1048 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1049 }; 1050 1051 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1052 struct op_data_entries *ref_entries = 1053 &test_vector.entries[type]; 1054 if (ref_entries->nb_segments == 0) 1055 continue; 1056 1057 ret = allocate_buffers_on_socket(queue_ops[type], 1058 n * sizeof(struct rte_bbdev_op_data), 1059 socket_id); 1060 TEST_ASSERT_SUCCESS(ret, 1061 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1062 1063 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1064 mbuf_pools[type], n, type, min_alignment); 1065 TEST_ASSERT_SUCCESS(ret, 1066 "Couldn't init rte_bbdev_op_data structs"); 1067 } 1068 1069 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1070 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1071 capabilities->cap.turbo_dec.max_llr_modulus); 1072 1073 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1074 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1075 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1076 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1077 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1078 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1079 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1080 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1081 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1082 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1083 if (!loopback && !llr_comp) 1084 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1085 ldpc_llr_size, ldpc_llr_decimals); 1086 if (!loopback && !harq_comp) 1087 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1088 ldpc_llr_size, ldpc_llr_decimals); 1089 if (!loopback) 1090 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1091 op_params); 1092 } 1093 1094 return 0; 1095 } 1096 1097 static void 1098 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1099 { 1100 unsigned int i, j; 1101 1102 rte_mempool_free(ad->ops_mempool); 1103 rte_mempool_free(ad->in_mbuf_pool); 1104 rte_mempool_free(ad->hard_out_mbuf_pool); 1105 rte_mempool_free(ad->soft_out_mbuf_pool); 1106 rte_mempool_free(ad->harq_in_mbuf_pool); 1107 rte_mempool_free(ad->harq_out_mbuf_pool); 1108 1109 for (i = 0; i < rte_lcore_count(); ++i) { 1110 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1111 rte_free(op_params->q_bufs[j][i].inputs); 1112 rte_free(op_params->q_bufs[j][i].hard_outputs); 1113 rte_free(op_params->q_bufs[j][i].soft_outputs); 1114 rte_free(op_params->q_bufs[j][i].harq_inputs); 1115 rte_free(op_params->q_bufs[j][i].harq_outputs); 1116 } 1117 } 1118 } 1119 1120 static void 1121 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1122 unsigned int start_idx, 1123 struct rte_bbdev_op_data *inputs, 1124 struct rte_bbdev_op_data *hard_outputs, 1125 struct rte_bbdev_op_data *soft_outputs, 1126 struct rte_bbdev_dec_op *ref_op) 1127 { 1128 unsigned int i; 1129 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1130 1131 for (i = 0; i < n; ++i) { 1132 if (turbo_dec->code_block_mode == 0) { 1133 ops[i]->turbo_dec.tb_params.ea = 1134 turbo_dec->tb_params.ea; 1135 ops[i]->turbo_dec.tb_params.eb = 1136 turbo_dec->tb_params.eb; 1137 ops[i]->turbo_dec.tb_params.k_pos = 1138 turbo_dec->tb_params.k_pos; 1139 ops[i]->turbo_dec.tb_params.k_neg = 1140 turbo_dec->tb_params.k_neg; 1141 ops[i]->turbo_dec.tb_params.c = 1142 turbo_dec->tb_params.c; 1143 ops[i]->turbo_dec.tb_params.c_neg = 1144 turbo_dec->tb_params.c_neg; 1145 ops[i]->turbo_dec.tb_params.cab = 1146 turbo_dec->tb_params.cab; 1147 ops[i]->turbo_dec.tb_params.r = 1148 turbo_dec->tb_params.r; 1149 } else { 1150 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1151 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1152 } 1153 1154 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1155 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1156 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1157 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1158 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1159 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1160 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1161 1162 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1163 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1164 if (soft_outputs != NULL) 1165 ops[i]->turbo_dec.soft_output = 1166 soft_outputs[start_idx + i]; 1167 } 1168 } 1169 1170 static void 1171 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1172 unsigned int start_idx, 1173 struct rte_bbdev_op_data *inputs, 1174 struct rte_bbdev_op_data *outputs, 1175 struct rte_bbdev_enc_op *ref_op) 1176 { 1177 unsigned int i; 1178 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1179 for (i = 0; i < n; ++i) { 1180 if (turbo_enc->code_block_mode == 0) { 1181 ops[i]->turbo_enc.tb_params.ea = 1182 turbo_enc->tb_params.ea; 1183 ops[i]->turbo_enc.tb_params.eb = 1184 turbo_enc->tb_params.eb; 1185 ops[i]->turbo_enc.tb_params.k_pos = 1186 turbo_enc->tb_params.k_pos; 1187 ops[i]->turbo_enc.tb_params.k_neg = 1188 turbo_enc->tb_params.k_neg; 1189 ops[i]->turbo_enc.tb_params.c = 1190 turbo_enc->tb_params.c; 1191 ops[i]->turbo_enc.tb_params.c_neg = 1192 turbo_enc->tb_params.c_neg; 1193 ops[i]->turbo_enc.tb_params.cab = 1194 turbo_enc->tb_params.cab; 1195 ops[i]->turbo_enc.tb_params.ncb_pos = 1196 turbo_enc->tb_params.ncb_pos; 1197 ops[i]->turbo_enc.tb_params.ncb_neg = 1198 turbo_enc->tb_params.ncb_neg; 1199 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1200 } else { 1201 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1202 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1203 ops[i]->turbo_enc.cb_params.ncb = 1204 turbo_enc->cb_params.ncb; 1205 } 1206 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1207 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1208 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1209 1210 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1211 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1212 } 1213 } 1214 1215 1216 /* Returns a random number drawn from a normal distribution 1217 * with mean of 0 and variance of 1 1218 * Marsaglia algorithm 1219 */ 1220 static double 1221 randn(int n) 1222 { 1223 double S, Z, U1, U2, u, v, fac; 1224 1225 do { 1226 U1 = (double)rand() / RAND_MAX; 1227 U2 = (double)rand() / RAND_MAX; 1228 u = 2. * U1 - 1.; 1229 v = 2. * U2 - 1.; 1230 S = u * u + v * v; 1231 } while (S >= 1 || S == 0); 1232 fac = sqrt(-2. * log(S) / S); 1233 Z = (n % 2) ? u * fac : v * fac; 1234 return Z; 1235 } 1236 1237 static inline double 1238 maxstar(double A, double B) 1239 { 1240 if (fabs(A - B) > 5) 1241 return RTE_MAX(A, B); 1242 else 1243 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1244 } 1245 1246 /* 1247 * Generate Qm LLRS for Qm==8 1248 * Modulation, AWGN and LLR estimation from max log development 1249 */ 1250 static void 1251 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1252 { 1253 int qm = 8; 1254 int qam = 256; 1255 int m, k; 1256 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1257 /* 5.1.4 of TS38.211 */ 1258 const double symbols_I[256] = { 1259 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1260 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1261 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1262 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1263 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1264 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1265 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1266 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1267 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1268 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1269 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1270 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1271 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1272 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1273 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1274 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1275 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1276 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1277 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1278 -13, -13, -15, -15, -13, -13, -15, -15}; 1279 const double symbols_Q[256] = { 1280 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1281 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1282 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1283 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1284 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1285 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1286 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1287 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1288 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1289 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1290 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1291 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1292 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1293 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1294 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1295 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1296 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1297 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1298 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1299 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1300 /* Average constellation point energy */ 1301 N0 *= 170.0; 1302 for (k = 0; k < qm; k++) 1303 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1304 /* 5.1.4 of TS38.211 */ 1305 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1306 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1307 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1308 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1309 /* AWGN channel */ 1310 I += sqrt(N0 / 2) * randn(0); 1311 Q += sqrt(N0 / 2) * randn(1); 1312 /* 1313 * Calculate the log of the probability that each of 1314 * the constellation points was transmitted 1315 */ 1316 for (m = 0; m < qam; m++) 1317 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1318 + pow(Q - symbols_Q[m], 2.0)) / N0; 1319 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1320 for (k = 0; k < qm; k++) { 1321 p0 = -999999; 1322 p1 = -999999; 1323 /* For each constellation point */ 1324 for (m = 0; m < qam; m++) { 1325 if ((m >> (qm - k - 1)) & 1) 1326 p1 = maxstar(p1, log_syml_prob[m]); 1327 else 1328 p0 = maxstar(p0, log_syml_prob[m]); 1329 } 1330 /* Calculate the LLR */ 1331 llr_ = p0 - p1; 1332 llr_ *= (1 << ldpc_llr_decimals); 1333 llr_ = round(llr_); 1334 if (llr_ > llr_max) 1335 llr_ = llr_max; 1336 if (llr_ < -llr_max) 1337 llr_ = -llr_max; 1338 llrs[qm * i + k] = (int8_t) llr_; 1339 } 1340 } 1341 1342 1343 /* 1344 * Generate Qm LLRS for Qm==6 1345 * Modulation, AWGN and LLR estimation from max log development 1346 */ 1347 static void 1348 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1349 { 1350 int qm = 6; 1351 int qam = 64; 1352 int m, k; 1353 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1354 /* 5.1.4 of TS38.211 */ 1355 const double symbols_I[64] = { 1356 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1357 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1358 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1359 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1360 -5, -5, -7, -7, -5, -5, -7, -7}; 1361 const double symbols_Q[64] = { 1362 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1363 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1364 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1365 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1366 -3, -1, -3, -1, -5, -7, -5, -7}; 1367 /* Average constellation point energy */ 1368 N0 *= 42.0; 1369 for (k = 0; k < qm; k++) 1370 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1371 /* 5.1.4 of TS38.211 */ 1372 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1373 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1374 /* AWGN channel */ 1375 I += sqrt(N0 / 2) * randn(0); 1376 Q += sqrt(N0 / 2) * randn(1); 1377 /* 1378 * Calculate the log of the probability that each of 1379 * the constellation points was transmitted 1380 */ 1381 for (m = 0; m < qam; m++) 1382 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1383 + pow(Q - symbols_Q[m], 2.0)) / N0; 1384 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1385 for (k = 0; k < qm; k++) { 1386 p0 = -999999; 1387 p1 = -999999; 1388 /* For each constellation point */ 1389 for (m = 0; m < qam; m++) { 1390 if ((m >> (qm - k - 1)) & 1) 1391 p1 = maxstar(p1, log_syml_prob[m]); 1392 else 1393 p0 = maxstar(p0, log_syml_prob[m]); 1394 } 1395 /* Calculate the LLR */ 1396 llr_ = p0 - p1; 1397 llr_ *= (1 << ldpc_llr_decimals); 1398 llr_ = round(llr_); 1399 if (llr_ > llr_max) 1400 llr_ = llr_max; 1401 if (llr_ < -llr_max) 1402 llr_ = -llr_max; 1403 llrs[qm * i + k] = (int8_t) llr_; 1404 } 1405 } 1406 1407 /* 1408 * Generate Qm LLRS for Qm==4 1409 * Modulation, AWGN and LLR estimation from max log development 1410 */ 1411 static void 1412 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1413 { 1414 int qm = 4; 1415 int qam = 16; 1416 int m, k; 1417 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1418 /* 5.1.4 of TS38.211 */ 1419 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1420 -1, -1, -3, -3, -1, -1, -3, -3}; 1421 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1422 1, 3, 1, 3, -1, -3, -1, -3}; 1423 /* Average constellation point energy */ 1424 N0 *= 10.0; 1425 for (k = 0; k < qm; k++) 1426 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1427 /* 5.1.4 of TS38.211 */ 1428 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1429 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1430 /* AWGN channel */ 1431 I += sqrt(N0 / 2) * randn(0); 1432 Q += sqrt(N0 / 2) * randn(1); 1433 /* 1434 * Calculate the log of the probability that each of 1435 * the constellation points was transmitted 1436 */ 1437 for (m = 0; m < qam; m++) 1438 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1439 + pow(Q - symbols_Q[m], 2.0)) / N0; 1440 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1441 for (k = 0; k < qm; k++) { 1442 p0 = -999999; 1443 p1 = -999999; 1444 /* For each constellation point */ 1445 for (m = 0; m < qam; m++) { 1446 if ((m >> (qm - k - 1)) & 1) 1447 p1 = maxstar(p1, log_syml_prob[m]); 1448 else 1449 p0 = maxstar(p0, log_syml_prob[m]); 1450 } 1451 /* Calculate the LLR */ 1452 llr_ = p0 - p1; 1453 llr_ *= (1 << ldpc_llr_decimals); 1454 llr_ = round(llr_); 1455 if (llr_ > llr_max) 1456 llr_ = llr_max; 1457 if (llr_ < -llr_max) 1458 llr_ = -llr_max; 1459 llrs[qm * i + k] = (int8_t) llr_; 1460 } 1461 } 1462 1463 static void 1464 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1465 { 1466 double b, b1, n; 1467 double coeff = 2.0 * sqrt(N0); 1468 1469 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1470 if (llrs[j] < 8 && llrs[j] > -8) 1471 return; 1472 1473 /* Note don't change sign here */ 1474 n = randn(j % 2); 1475 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1476 + coeff * n) / N0; 1477 b = b1 * (1 << ldpc_llr_decimals); 1478 b = round(b); 1479 if (b > llr_max) 1480 b = llr_max; 1481 if (b < -llr_max) 1482 b = -llr_max; 1483 llrs[j] = (int8_t) b; 1484 } 1485 1486 /* Generate LLR for a given SNR */ 1487 static void 1488 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1489 struct rte_bbdev_dec_op *ref_op) 1490 { 1491 struct rte_mbuf *m; 1492 uint16_t qm; 1493 uint32_t i, j, e, range; 1494 double N0, llr_max; 1495 1496 e = ref_op->ldpc_dec.cb_params.e; 1497 qm = ref_op->ldpc_dec.q_m; 1498 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1499 range = e / qm; 1500 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1501 1502 for (i = 0; i < n; ++i) { 1503 m = inputs[i].data; 1504 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1505 if (qm == 8) { 1506 for (j = 0; j < range; ++j) 1507 gen_qm8_llr(llrs, j, N0, llr_max); 1508 } else if (qm == 6) { 1509 for (j = 0; j < range; ++j) 1510 gen_qm6_llr(llrs, j, N0, llr_max); 1511 } else if (qm == 4) { 1512 for (j = 0; j < range; ++j) 1513 gen_qm4_llr(llrs, j, N0, llr_max); 1514 } else { 1515 for (j = 0; j < e; ++j) 1516 gen_qm2_llr(llrs, j, N0, llr_max); 1517 } 1518 } 1519 } 1520 1521 static void 1522 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1523 unsigned int start_idx, 1524 struct rte_bbdev_op_data *inputs, 1525 struct rte_bbdev_op_data *hard_outputs, 1526 struct rte_bbdev_op_data *soft_outputs, 1527 struct rte_bbdev_op_data *harq_inputs, 1528 struct rte_bbdev_op_data *harq_outputs, 1529 struct rte_bbdev_dec_op *ref_op) 1530 { 1531 unsigned int i; 1532 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1533 1534 for (i = 0; i < n; ++i) { 1535 if (ldpc_dec->code_block_mode == 0) { 1536 ops[i]->ldpc_dec.tb_params.ea = 1537 ldpc_dec->tb_params.ea; 1538 ops[i]->ldpc_dec.tb_params.eb = 1539 ldpc_dec->tb_params.eb; 1540 ops[i]->ldpc_dec.tb_params.c = 1541 ldpc_dec->tb_params.c; 1542 ops[i]->ldpc_dec.tb_params.cab = 1543 ldpc_dec->tb_params.cab; 1544 ops[i]->ldpc_dec.tb_params.r = 1545 ldpc_dec->tb_params.r; 1546 } else { 1547 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1548 } 1549 1550 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1551 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1552 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1553 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1554 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1555 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1556 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1557 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1558 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1559 1560 if (hard_outputs != NULL) 1561 ops[i]->ldpc_dec.hard_output = 1562 hard_outputs[start_idx + i]; 1563 if (inputs != NULL) 1564 ops[i]->ldpc_dec.input = 1565 inputs[start_idx + i]; 1566 if (soft_outputs != NULL) 1567 ops[i]->ldpc_dec.soft_output = 1568 soft_outputs[start_idx + i]; 1569 if (harq_inputs != NULL) 1570 ops[i]->ldpc_dec.harq_combined_input = 1571 harq_inputs[start_idx + i]; 1572 if (harq_outputs != NULL) 1573 ops[i]->ldpc_dec.harq_combined_output = 1574 harq_outputs[start_idx + i]; 1575 } 1576 } 1577 1578 1579 static void 1580 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1581 unsigned int start_idx, 1582 struct rte_bbdev_op_data *inputs, 1583 struct rte_bbdev_op_data *outputs, 1584 struct rte_bbdev_enc_op *ref_op) 1585 { 1586 unsigned int i; 1587 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1588 for (i = 0; i < n; ++i) { 1589 if (ldpc_enc->code_block_mode == 0) { 1590 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1591 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1592 ops[i]->ldpc_enc.tb_params.cab = 1593 ldpc_enc->tb_params.cab; 1594 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1595 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1596 } else { 1597 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1598 } 1599 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1600 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1601 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1602 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1603 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1604 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1605 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1606 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1607 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1608 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1609 } 1610 } 1611 1612 static int 1613 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1614 unsigned int order_idx, const int expected_status) 1615 { 1616 int status = op->status; 1617 /* ignore parity mismatch false alarms for long iterations */ 1618 if (get_iter_max() >= 10) { 1619 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1620 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1621 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1622 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1623 } 1624 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1625 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1626 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1627 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1628 } 1629 } 1630 1631 TEST_ASSERT(status == expected_status, 1632 "op_status (%d) != expected_status (%d)", 1633 op->status, expected_status); 1634 1635 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1636 "Ordering error, expected %p, got %p", 1637 (void *)(uintptr_t)order_idx, op->opaque_data); 1638 1639 return TEST_SUCCESS; 1640 } 1641 1642 static int 1643 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1644 unsigned int order_idx, const int expected_status) 1645 { 1646 TEST_ASSERT(op->status == expected_status, 1647 "op_status (%d) != expected_status (%d)", 1648 op->status, expected_status); 1649 1650 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1651 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1652 "Ordering error, expected %p, got %p", 1653 (void *)(uintptr_t)order_idx, op->opaque_data); 1654 1655 return TEST_SUCCESS; 1656 } 1657 1658 static inline int 1659 validate_op_chain(struct rte_bbdev_op_data *op, 1660 struct op_data_entries *orig_op) 1661 { 1662 uint8_t i; 1663 struct rte_mbuf *m = op->data; 1664 uint8_t nb_dst_segments = orig_op->nb_segments; 1665 uint32_t total_data_size = 0; 1666 1667 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1668 "Number of segments differ in original (%u) and filled (%u) op", 1669 nb_dst_segments, m->nb_segs); 1670 1671 /* Validate each mbuf segment length */ 1672 for (i = 0; i < nb_dst_segments; ++i) { 1673 /* Apply offset to the first mbuf segment */ 1674 uint16_t offset = (i == 0) ? op->offset : 0; 1675 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1676 total_data_size += orig_op->segments[i].length; 1677 1678 TEST_ASSERT(orig_op->segments[i].length == data_len, 1679 "Length of segment differ in original (%u) and filled (%u) op", 1680 orig_op->segments[i].length, data_len); 1681 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 1682 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 1683 data_len, 1684 "Output buffers (CB=%u) are not equal", i); 1685 m = m->next; 1686 } 1687 1688 /* Validate total mbuf pkt length */ 1689 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1690 TEST_ASSERT(total_data_size == pkt_len, 1691 "Length of data differ in original (%u) and filled (%u) op", 1692 total_data_size, pkt_len); 1693 1694 return TEST_SUCCESS; 1695 } 1696 1697 /* 1698 * Compute K0 for a given configuration for HARQ output length computation 1699 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 1700 */ 1701 static inline uint16_t 1702 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 1703 { 1704 if (rv_index == 0) 1705 return 0; 1706 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 1707 if (n_cb == n) { 1708 if (rv_index == 1) 1709 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 1710 else if (rv_index == 2) 1711 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 1712 else 1713 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 1714 } 1715 /* LBRM case - includes a division by N */ 1716 if (rv_index == 1) 1717 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 1718 / n) * z_c; 1719 else if (rv_index == 2) 1720 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 1721 / n) * z_c; 1722 else 1723 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 1724 / n) * z_c; 1725 } 1726 1727 /* HARQ output length including the Filler bits */ 1728 static inline uint16_t 1729 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 1730 { 1731 uint16_t k0 = 0; 1732 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 1733 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 1734 /* Compute RM out size and number of rows */ 1735 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1736 * ops_ld->z_c - ops_ld->n_filler; 1737 uint16_t deRmOutSize = RTE_MIN( 1738 k0 + ops_ld->cb_params.e + 1739 ((k0 > parity_offset) ? 1740 0 : ops_ld->n_filler), 1741 ops_ld->n_cb); 1742 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 1743 / ops_ld->z_c); 1744 uint16_t harq_output_len = numRows * ops_ld->z_c; 1745 return harq_output_len; 1746 } 1747 1748 static inline int 1749 validate_op_harq_chain(struct rte_bbdev_op_data *op, 1750 struct op_data_entries *orig_op, 1751 struct rte_bbdev_op_ldpc_dec *ops_ld) 1752 { 1753 uint8_t i; 1754 uint32_t j, jj, k; 1755 struct rte_mbuf *m = op->data; 1756 uint8_t nb_dst_segments = orig_op->nb_segments; 1757 uint32_t total_data_size = 0; 1758 int8_t *harq_orig, *harq_out, abs_harq_origin; 1759 uint32_t byte_error = 0, cum_error = 0, error; 1760 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 1761 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1762 uint16_t parity_offset; 1763 1764 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1765 "Number of segments differ in original (%u) and filled (%u) op", 1766 nb_dst_segments, m->nb_segs); 1767 1768 /* Validate each mbuf segment length */ 1769 for (i = 0; i < nb_dst_segments; ++i) { 1770 /* Apply offset to the first mbuf segment */ 1771 uint16_t offset = (i == 0) ? op->offset : 0; 1772 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1773 total_data_size += orig_op->segments[i].length; 1774 1775 TEST_ASSERT(orig_op->segments[i].length < 1776 (uint32_t)(data_len + 64), 1777 "Length of segment differ in original (%u) and filled (%u) op", 1778 orig_op->segments[i].length, data_len); 1779 harq_orig = (int8_t *) orig_op->segments[i].addr; 1780 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 1781 1782 if (!(ldpc_cap_flags & 1783 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 1784 ) || (ops_ld->op_flags & 1785 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1786 data_len -= ops_ld->z_c; 1787 parity_offset = data_len; 1788 } else { 1789 /* Compute RM out size and number of rows */ 1790 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1791 * ops_ld->z_c - ops_ld->n_filler; 1792 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 1793 ops_ld->n_filler; 1794 if (data_len > deRmOutSize) 1795 data_len = deRmOutSize; 1796 if (data_len > orig_op->segments[i].length) 1797 data_len = orig_op->segments[i].length; 1798 } 1799 /* 1800 * HARQ output can have minor differences 1801 * due to integer representation and related scaling 1802 */ 1803 for (j = 0, jj = 0; j < data_len; j++, jj++) { 1804 if (j == parity_offset) { 1805 /* Special Handling of the filler bits */ 1806 for (k = 0; k < ops_ld->n_filler; k++) { 1807 if (harq_out[jj] != 1808 llr_max_pre_scaling) { 1809 printf("HARQ Filler issue %d: %d %d\n", 1810 jj, harq_out[jj], 1811 llr_max); 1812 byte_error++; 1813 } 1814 jj++; 1815 } 1816 } 1817 if (!(ops_ld->op_flags & 1818 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1819 if (ldpc_llr_decimals > 1) 1820 harq_out[jj] = (harq_out[jj] + 1) 1821 >> (ldpc_llr_decimals - 1); 1822 /* Saturated to S7 */ 1823 if (harq_orig[j] > llr_max) 1824 harq_orig[j] = llr_max; 1825 if (harq_orig[j] < -llr_max) 1826 harq_orig[j] = -llr_max; 1827 } 1828 if (harq_orig[j] != harq_out[jj]) { 1829 error = (harq_orig[j] > harq_out[jj]) ? 1830 harq_orig[j] - harq_out[jj] : 1831 harq_out[jj] - harq_orig[j]; 1832 abs_harq_origin = harq_orig[j] > 0 ? 1833 harq_orig[j] : 1834 -harq_orig[j]; 1835 /* Residual quantization error */ 1836 if ((error > 8 && (abs_harq_origin < 1837 (llr_max - 16))) || 1838 (error > 16)) { 1839 printf("HARQ mismatch %d: exp %d act %d => %d\n", 1840 j, harq_orig[j], 1841 harq_out[jj], error); 1842 byte_error++; 1843 cum_error += error; 1844 } 1845 } 1846 } 1847 m = m->next; 1848 } 1849 1850 if (byte_error) 1851 TEST_ASSERT(byte_error <= 1, 1852 "HARQ output mismatch (%d) %d", 1853 byte_error, cum_error); 1854 1855 /* Validate total mbuf pkt length */ 1856 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1857 TEST_ASSERT(total_data_size < pkt_len + 64, 1858 "Length of data differ in original (%u) and filled (%u) op", 1859 total_data_size, pkt_len); 1860 1861 return TEST_SUCCESS; 1862 } 1863 1864 static int 1865 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1866 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1867 { 1868 unsigned int i; 1869 int ret; 1870 struct op_data_entries *hard_data_orig = 1871 &test_vector.entries[DATA_HARD_OUTPUT]; 1872 struct op_data_entries *soft_data_orig = 1873 &test_vector.entries[DATA_SOFT_OUTPUT]; 1874 struct rte_bbdev_op_turbo_dec *ops_td; 1875 struct rte_bbdev_op_data *hard_output; 1876 struct rte_bbdev_op_data *soft_output; 1877 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 1878 1879 for (i = 0; i < n; ++i) { 1880 ops_td = &ops[i]->turbo_dec; 1881 hard_output = &ops_td->hard_output; 1882 soft_output = &ops_td->soft_output; 1883 1884 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 1885 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 1886 "Returned iter_count (%d) > expected iter_count (%d)", 1887 ops_td->iter_count, ref_td->iter_count); 1888 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 1889 TEST_ASSERT_SUCCESS(ret, 1890 "Checking status and ordering for decoder failed"); 1891 1892 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 1893 hard_data_orig), 1894 "Hard output buffers (CB=%u) are not equal", 1895 i); 1896 1897 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 1898 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 1899 soft_data_orig), 1900 "Soft output buffers (CB=%u) are not equal", 1901 i); 1902 } 1903 1904 return TEST_SUCCESS; 1905 } 1906 1907 /* Check Number of code blocks errors */ 1908 static int 1909 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 1910 { 1911 unsigned int i; 1912 struct op_data_entries *hard_data_orig = 1913 &test_vector.entries[DATA_HARD_OUTPUT]; 1914 struct rte_bbdev_op_ldpc_dec *ops_td; 1915 struct rte_bbdev_op_data *hard_output; 1916 int errors = 0; 1917 struct rte_mbuf *m; 1918 1919 for (i = 0; i < n; ++i) { 1920 ops_td = &ops[i]->ldpc_dec; 1921 hard_output = &ops_td->hard_output; 1922 m = hard_output->data; 1923 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 1924 hard_data_orig->segments[0].addr, 1925 hard_data_orig->segments[0].length)) 1926 errors++; 1927 } 1928 return errors; 1929 } 1930 1931 static int 1932 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1933 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1934 { 1935 unsigned int i; 1936 int ret; 1937 struct op_data_entries *hard_data_orig = 1938 &test_vector.entries[DATA_HARD_OUTPUT]; 1939 struct op_data_entries *soft_data_orig = 1940 &test_vector.entries[DATA_SOFT_OUTPUT]; 1941 struct op_data_entries *harq_data_orig = 1942 &test_vector.entries[DATA_HARQ_OUTPUT]; 1943 struct rte_bbdev_op_ldpc_dec *ops_td; 1944 struct rte_bbdev_op_data *hard_output; 1945 struct rte_bbdev_op_data *harq_output; 1946 struct rte_bbdev_op_data *soft_output; 1947 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 1948 1949 for (i = 0; i < n; ++i) { 1950 ops_td = &ops[i]->ldpc_dec; 1951 hard_output = &ops_td->hard_output; 1952 harq_output = &ops_td->harq_combined_output; 1953 soft_output = &ops_td->soft_output; 1954 1955 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 1956 TEST_ASSERT_SUCCESS(ret, 1957 "Checking status and ordering for decoder failed"); 1958 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 1959 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 1960 "Returned iter_count (%d) > expected iter_count (%d)", 1961 ops_td->iter_count, ref_td->iter_count); 1962 /* 1963 * We can ignore output data when the decoding failed to 1964 * converge or for loop-back cases 1965 */ 1966 if (!check_bit(ops[i]->ldpc_dec.op_flags, 1967 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 1968 ) && ( 1969 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 1970 )) == 0) 1971 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 1972 hard_data_orig), 1973 "Hard output buffers (CB=%u) are not equal", 1974 i); 1975 1976 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 1977 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 1978 soft_data_orig), 1979 "Soft output buffers (CB=%u) are not equal", 1980 i); 1981 if (ref_op->ldpc_dec.op_flags & 1982 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 1983 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 1984 harq_data_orig, ops_td), 1985 "HARQ output buffers (CB=%u) are not equal", 1986 i); 1987 } 1988 if (ref_op->ldpc_dec.op_flags & 1989 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1990 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 1991 harq_data_orig, ops_td), 1992 "HARQ output buffers (CB=%u) are not equal", 1993 i); 1994 1995 } 1996 1997 return TEST_SUCCESS; 1998 } 1999 2000 2001 static int 2002 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2003 struct rte_bbdev_enc_op *ref_op) 2004 { 2005 unsigned int i; 2006 int ret; 2007 struct op_data_entries *hard_data_orig = 2008 &test_vector.entries[DATA_HARD_OUTPUT]; 2009 2010 for (i = 0; i < n; ++i) { 2011 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2012 TEST_ASSERT_SUCCESS(ret, 2013 "Checking status and ordering for encoder failed"); 2014 TEST_ASSERT_SUCCESS(validate_op_chain( 2015 &ops[i]->turbo_enc.output, 2016 hard_data_orig), 2017 "Output buffers (CB=%u) are not equal", 2018 i); 2019 } 2020 2021 return TEST_SUCCESS; 2022 } 2023 2024 static int 2025 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2026 struct rte_bbdev_enc_op *ref_op) 2027 { 2028 unsigned int i; 2029 int ret; 2030 struct op_data_entries *hard_data_orig = 2031 &test_vector.entries[DATA_HARD_OUTPUT]; 2032 2033 for (i = 0; i < n; ++i) { 2034 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2035 TEST_ASSERT_SUCCESS(ret, 2036 "Checking status and ordering for encoder failed"); 2037 TEST_ASSERT_SUCCESS(validate_op_chain( 2038 &ops[i]->ldpc_enc.output, 2039 hard_data_orig), 2040 "Output buffers (CB=%u) are not equal", 2041 i); 2042 } 2043 2044 return TEST_SUCCESS; 2045 } 2046 2047 static void 2048 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2049 { 2050 unsigned int i; 2051 struct op_data_entries *entry; 2052 2053 op->turbo_dec = test_vector.turbo_dec; 2054 entry = &test_vector.entries[DATA_INPUT]; 2055 for (i = 0; i < entry->nb_segments; ++i) 2056 op->turbo_dec.input.length += 2057 entry->segments[i].length; 2058 } 2059 2060 static void 2061 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2062 { 2063 unsigned int i; 2064 struct op_data_entries *entry; 2065 2066 op->ldpc_dec = test_vector.ldpc_dec; 2067 entry = &test_vector.entries[DATA_INPUT]; 2068 for (i = 0; i < entry->nb_segments; ++i) 2069 op->ldpc_dec.input.length += 2070 entry->segments[i].length; 2071 if (test_vector.ldpc_dec.op_flags & 2072 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2073 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2074 for (i = 0; i < entry->nb_segments; ++i) 2075 op->ldpc_dec.harq_combined_input.length += 2076 entry->segments[i].length; 2077 } 2078 } 2079 2080 2081 static void 2082 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2083 { 2084 unsigned int i; 2085 struct op_data_entries *entry; 2086 2087 op->turbo_enc = test_vector.turbo_enc; 2088 entry = &test_vector.entries[DATA_INPUT]; 2089 for (i = 0; i < entry->nb_segments; ++i) 2090 op->turbo_enc.input.length += 2091 entry->segments[i].length; 2092 } 2093 2094 static void 2095 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2096 { 2097 unsigned int i; 2098 struct op_data_entries *entry; 2099 2100 op->ldpc_enc = test_vector.ldpc_enc; 2101 entry = &test_vector.entries[DATA_INPUT]; 2102 for (i = 0; i < entry->nb_segments; ++i) 2103 op->ldpc_enc.input.length += 2104 entry->segments[i].length; 2105 } 2106 2107 static uint32_t 2108 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2109 { 2110 uint8_t i; 2111 uint32_t c, r, tb_size = 0; 2112 2113 if (op->turbo_dec.code_block_mode) { 2114 tb_size = op->turbo_dec.tb_params.k_neg; 2115 } else { 2116 c = op->turbo_dec.tb_params.c; 2117 r = op->turbo_dec.tb_params.r; 2118 for (i = 0; i < c-r; i++) 2119 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2120 op->turbo_dec.tb_params.k_neg : 2121 op->turbo_dec.tb_params.k_pos; 2122 } 2123 return tb_size; 2124 } 2125 2126 static uint32_t 2127 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2128 { 2129 uint8_t i; 2130 uint32_t c, r, tb_size = 0; 2131 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2132 2133 if (op->ldpc_dec.code_block_mode) { 2134 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 2135 } else { 2136 c = op->ldpc_dec.tb_params.c; 2137 r = op->ldpc_dec.tb_params.r; 2138 for (i = 0; i < c-r; i++) 2139 tb_size += sys_cols * op->ldpc_dec.z_c 2140 - op->ldpc_dec.n_filler; 2141 } 2142 return tb_size; 2143 } 2144 2145 static uint32_t 2146 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2147 { 2148 uint8_t i; 2149 uint32_t c, r, tb_size = 0; 2150 2151 if (op->turbo_enc.code_block_mode) { 2152 tb_size = op->turbo_enc.tb_params.k_neg; 2153 } else { 2154 c = op->turbo_enc.tb_params.c; 2155 r = op->turbo_enc.tb_params.r; 2156 for (i = 0; i < c-r; i++) 2157 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2158 op->turbo_enc.tb_params.k_neg : 2159 op->turbo_enc.tb_params.k_pos; 2160 } 2161 return tb_size; 2162 } 2163 2164 static uint32_t 2165 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2166 { 2167 uint8_t i; 2168 uint32_t c, r, tb_size = 0; 2169 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2170 2171 if (op->turbo_enc.code_block_mode) { 2172 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 2173 } else { 2174 c = op->turbo_enc.tb_params.c; 2175 r = op->turbo_enc.tb_params.r; 2176 for (i = 0; i < c-r; i++) 2177 tb_size += sys_cols * op->ldpc_enc.z_c 2178 - op->ldpc_enc.n_filler; 2179 } 2180 return tb_size; 2181 } 2182 2183 2184 static int 2185 init_test_op_params(struct test_op_params *op_params, 2186 enum rte_bbdev_op_type op_type, const int expected_status, 2187 const int vector_mask, struct rte_mempool *ops_mp, 2188 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2189 { 2190 int ret = 0; 2191 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2192 op_type == RTE_BBDEV_OP_LDPC_DEC) 2193 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2194 &op_params->ref_dec_op, 1); 2195 else 2196 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2197 &op_params->ref_enc_op, 1); 2198 2199 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2200 2201 op_params->mp = ops_mp; 2202 op_params->burst_sz = burst_sz; 2203 op_params->num_to_process = num_to_process; 2204 op_params->num_lcores = num_lcores; 2205 op_params->vector_mask = vector_mask; 2206 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2207 op_type == RTE_BBDEV_OP_LDPC_DEC) 2208 op_params->ref_dec_op->status = expected_status; 2209 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2210 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2211 op_params->ref_enc_op->status = expected_status; 2212 return 0; 2213 } 2214 2215 static int 2216 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2217 struct test_op_params *op_params) 2218 { 2219 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2220 unsigned int i; 2221 struct active_device *ad; 2222 unsigned int burst_sz = get_burst_sz(); 2223 enum rte_bbdev_op_type op_type = test_vector.op_type; 2224 const struct rte_bbdev_op_cap *capabilities = NULL; 2225 2226 ad = &active_devs[dev_id]; 2227 2228 /* Check if device supports op_type */ 2229 if (!is_avail_op(ad, test_vector.op_type)) 2230 return TEST_SUCCESS; 2231 2232 struct rte_bbdev_info info; 2233 rte_bbdev_info_get(ad->dev_id, &info); 2234 socket_id = GET_SOCKET(info.socket_id); 2235 2236 f_ret = create_mempools(ad, socket_id, op_type, 2237 get_num_ops()); 2238 if (f_ret != TEST_SUCCESS) { 2239 printf("Couldn't create mempools"); 2240 goto fail; 2241 } 2242 if (op_type == RTE_BBDEV_OP_NONE) 2243 op_type = RTE_BBDEV_OP_TURBO_ENC; 2244 2245 f_ret = init_test_op_params(op_params, test_vector.op_type, 2246 test_vector.expected_status, 2247 test_vector.mask, 2248 ad->ops_mempool, 2249 burst_sz, 2250 get_num_ops(), 2251 get_num_lcores()); 2252 if (f_ret != TEST_SUCCESS) { 2253 printf("Couldn't init test op params"); 2254 goto fail; 2255 } 2256 2257 2258 /* Find capabilities */ 2259 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2260 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) { 2261 if (cap->type == test_vector.op_type) { 2262 capabilities = cap; 2263 break; 2264 } 2265 cap++; 2266 } 2267 TEST_ASSERT_NOT_NULL(capabilities, 2268 "Couldn't find capabilities"); 2269 2270 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2271 create_reference_dec_op(op_params->ref_dec_op); 2272 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2273 create_reference_enc_op(op_params->ref_enc_op); 2274 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2275 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2276 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2277 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2278 2279 for (i = 0; i < ad->nb_queues; ++i) { 2280 f_ret = fill_queue_buffers(op_params, 2281 ad->in_mbuf_pool, 2282 ad->hard_out_mbuf_pool, 2283 ad->soft_out_mbuf_pool, 2284 ad->harq_in_mbuf_pool, 2285 ad->harq_out_mbuf_pool, 2286 ad->queue_ids[i], 2287 capabilities, 2288 info.drv.min_alignment, 2289 socket_id); 2290 if (f_ret != TEST_SUCCESS) { 2291 printf("Couldn't init queue buffers"); 2292 goto fail; 2293 } 2294 } 2295 2296 /* Run test case function */ 2297 t_ret = test_case_func(ad, op_params); 2298 2299 /* Free active device resources and return */ 2300 free_buffers(ad, op_params); 2301 return t_ret; 2302 2303 fail: 2304 free_buffers(ad, op_params); 2305 return TEST_FAILED; 2306 } 2307 2308 /* Run given test function per active device per supported op type 2309 * per burst size. 2310 */ 2311 static int 2312 run_test_case(test_case_function *test_case_func) 2313 { 2314 int ret = 0; 2315 uint8_t dev; 2316 2317 /* Alloc op_params */ 2318 struct test_op_params *op_params = rte_zmalloc(NULL, 2319 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2320 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2321 RTE_ALIGN(sizeof(struct test_op_params), 2322 RTE_CACHE_LINE_SIZE)); 2323 2324 /* For each device run test case function */ 2325 for (dev = 0; dev < nb_active_devs; ++dev) 2326 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2327 2328 rte_free(op_params); 2329 2330 return ret; 2331 } 2332 2333 2334 /* Push back the HARQ output from DDR to host */ 2335 static void 2336 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2337 struct rte_bbdev_dec_op **ops, 2338 const uint16_t n) 2339 { 2340 uint16_t j; 2341 int save_status, ret; 2342 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; 2343 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2344 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2345 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2346 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2347 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2348 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2349 for (j = 0; j < n; ++j) { 2350 if ((loopback && mem_out) || hc_out) { 2351 save_status = ops[j]->status; 2352 ops[j]->ldpc_dec.op_flags = 2353 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2354 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2355 if (h_comp) 2356 ops[j]->ldpc_dec.op_flags += 2357 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2358 ops[j]->ldpc_dec.harq_combined_input.offset = 2359 harq_offset; 2360 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2361 harq_offset += HARQ_INCR; 2362 if (!loopback) 2363 ops[j]->ldpc_dec.harq_combined_input.length = 2364 ops[j]->ldpc_dec.harq_combined_output.length; 2365 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2366 &ops[j], 1); 2367 ret = 0; 2368 while (ret == 0) 2369 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2370 dev_id, queue_id, 2371 &ops_deq[j], 1); 2372 ops[j]->ldpc_dec.op_flags = flags; 2373 ops[j]->status = save_status; 2374 } 2375 } 2376 } 2377 2378 /* 2379 * Push back the HARQ output from HW DDR to Host 2380 * Preload HARQ memory input and adjust HARQ offset 2381 */ 2382 static void 2383 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2384 struct rte_bbdev_dec_op **ops, const uint16_t n, 2385 bool preload) 2386 { 2387 uint16_t j; 2388 int ret; 2389 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; 2390 struct rte_bbdev_op_data save_hc_in, save_hc_out; 2391 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2392 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2393 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2394 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2395 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2396 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2397 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2398 for (j = 0; j < n; ++j) { 2399 if ((mem_in || hc_in) && preload) { 2400 save_hc_in = ops[j]->ldpc_dec.harq_combined_input; 2401 save_hc_out = ops[j]->ldpc_dec.harq_combined_output; 2402 ops[j]->ldpc_dec.op_flags = 2403 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2404 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2405 if (h_comp) 2406 ops[j]->ldpc_dec.op_flags += 2407 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2408 ops[j]->ldpc_dec.harq_combined_output.offset = 2409 harq_offset; 2410 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2411 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2412 &ops[j], 1); 2413 ret = 0; 2414 while (ret == 0) 2415 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2416 dev_id, queue_id, &ops_deq[j], 1); 2417 ops[j]->ldpc_dec.op_flags = flags; 2418 ops[j]->ldpc_dec.harq_combined_input = save_hc_in; 2419 ops[j]->ldpc_dec.harq_combined_output = save_hc_out; 2420 } 2421 /* Adjust HARQ offset when we reach external DDR */ 2422 if (mem_in || hc_in) 2423 ops[j]->ldpc_dec.harq_combined_input.offset 2424 = harq_offset; 2425 if (mem_out || hc_out) 2426 ops[j]->ldpc_dec.harq_combined_output.offset 2427 = harq_offset; 2428 harq_offset += HARQ_INCR; 2429 } 2430 } 2431 2432 static void 2433 dequeue_event_callback(uint16_t dev_id, 2434 enum rte_bbdev_event_type event, void *cb_arg, 2435 void *ret_param) 2436 { 2437 int ret; 2438 uint16_t i; 2439 uint64_t total_time; 2440 uint16_t deq, burst_sz, num_ops; 2441 uint16_t queue_id = *(uint16_t *) ret_param; 2442 struct rte_bbdev_info info; 2443 double tb_len_bits; 2444 struct thread_params *tp = cb_arg; 2445 2446 /* Find matching thread params using queue_id */ 2447 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2448 if (tp->queue_id == queue_id) 2449 break; 2450 2451 if (i == MAX_QUEUES) { 2452 printf("%s: Queue_id from interrupt details was not found!\n", 2453 __func__); 2454 return; 2455 } 2456 2457 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2458 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2459 printf( 2460 "Dequeue interrupt handler called for incorrect event!\n"); 2461 return; 2462 } 2463 2464 burst_sz = rte_atomic16_read(&tp->burst_sz); 2465 num_ops = tp->op_params->num_to_process; 2466 2467 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2468 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2469 &tp->dec_ops[ 2470 rte_atomic16_read(&tp->nb_dequeued)], 2471 burst_sz); 2472 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2473 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2474 &tp->dec_ops[ 2475 rte_atomic16_read(&tp->nb_dequeued)], 2476 burst_sz); 2477 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2478 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2479 &tp->enc_ops[ 2480 rte_atomic16_read(&tp->nb_dequeued)], 2481 burst_sz); 2482 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2483 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2484 &tp->enc_ops[ 2485 rte_atomic16_read(&tp->nb_dequeued)], 2486 burst_sz); 2487 2488 if (deq < burst_sz) { 2489 printf( 2490 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2491 burst_sz, deq); 2492 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2493 return; 2494 } 2495 2496 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { 2497 rte_atomic16_add(&tp->nb_dequeued, deq); 2498 return; 2499 } 2500 2501 total_time = rte_rdtsc_precise() - tp->start_time; 2502 2503 rte_bbdev_info_get(dev_id, &info); 2504 2505 ret = TEST_SUCCESS; 2506 2507 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2508 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2509 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2510 tp->op_params->vector_mask); 2511 /* get the max of iter_count for all dequeued ops */ 2512 for (i = 0; i < num_ops; ++i) 2513 tp->iter_count = RTE_MAX( 2514 tp->dec_ops[i]->turbo_dec.iter_count, 2515 tp->iter_count); 2516 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2517 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2518 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2519 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2520 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2521 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 2522 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2523 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 2524 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2525 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 2526 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2527 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 2528 tp->op_params->vector_mask); 2529 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2530 } 2531 2532 if (ret) { 2533 printf("Buffers validation failed\n"); 2534 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2535 } 2536 2537 switch (test_vector.op_type) { 2538 case RTE_BBDEV_OP_TURBO_DEC: 2539 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 2540 break; 2541 case RTE_BBDEV_OP_TURBO_ENC: 2542 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 2543 break; 2544 case RTE_BBDEV_OP_LDPC_DEC: 2545 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 2546 break; 2547 case RTE_BBDEV_OP_LDPC_ENC: 2548 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 2549 break; 2550 case RTE_BBDEV_OP_NONE: 2551 tb_len_bits = 0.0; 2552 break; 2553 default: 2554 printf("Unknown op type: %d\n", test_vector.op_type); 2555 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2556 return; 2557 } 2558 2559 tp->ops_per_sec += ((double)num_ops) / 2560 ((double)total_time / (double)rte_get_tsc_hz()); 2561 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 2562 ((double)total_time / (double)rte_get_tsc_hz()); 2563 2564 rte_atomic16_add(&tp->nb_dequeued, deq); 2565 } 2566 2567 static int 2568 throughput_intr_lcore_ldpc_dec(void *arg) 2569 { 2570 struct thread_params *tp = arg; 2571 unsigned int enqueued; 2572 const uint16_t queue_id = tp->queue_id; 2573 const uint16_t burst_sz = tp->op_params->burst_sz; 2574 const uint16_t num_to_process = tp->op_params->num_to_process; 2575 struct rte_bbdev_dec_op *ops[num_to_process]; 2576 struct test_buffers *bufs = NULL; 2577 struct rte_bbdev_info info; 2578 int ret, i, j; 2579 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2580 uint16_t num_to_enq, enq; 2581 2582 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 2583 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 2584 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 2585 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 2586 2587 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2588 "BURST_SIZE should be <= %u", MAX_BURST); 2589 2590 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2591 "Failed to enable interrupts for dev: %u, queue_id: %u", 2592 tp->dev_id, queue_id); 2593 2594 rte_bbdev_info_get(tp->dev_id, &info); 2595 2596 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2597 "NUM_OPS cannot exceed %u for this device", 2598 info.drv.queue_size_lim); 2599 2600 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2601 2602 rte_atomic16_clear(&tp->processing_status); 2603 rte_atomic16_clear(&tp->nb_dequeued); 2604 2605 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2606 rte_pause(); 2607 2608 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2609 num_to_process); 2610 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2611 num_to_process); 2612 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2613 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 2614 bufs->hard_outputs, bufs->soft_outputs, 2615 bufs->harq_inputs, bufs->harq_outputs, ref_op); 2616 2617 /* Set counter to validate the ordering */ 2618 for (j = 0; j < num_to_process; ++j) 2619 ops[j]->opaque_data = (void *)(uintptr_t)j; 2620 2621 for (j = 0; j < TEST_REPETITIONS; ++j) { 2622 for (i = 0; i < num_to_process; ++i) { 2623 if (!loopback) 2624 rte_pktmbuf_reset( 2625 ops[i]->ldpc_dec.hard_output.data); 2626 if (hc_out || loopback) 2627 mbuf_reset( 2628 ops[i]->ldpc_dec.harq_combined_output.data); 2629 } 2630 2631 tp->start_time = rte_rdtsc_precise(); 2632 for (enqueued = 0; enqueued < num_to_process;) { 2633 num_to_enq = burst_sz; 2634 2635 if (unlikely(num_to_process - enqueued < num_to_enq)) 2636 num_to_enq = num_to_process - enqueued; 2637 2638 enq = 0; 2639 do { 2640 enq += rte_bbdev_enqueue_ldpc_dec_ops( 2641 tp->dev_id, 2642 queue_id, &ops[enqueued], 2643 num_to_enq); 2644 } while (unlikely(num_to_enq != enq)); 2645 enqueued += enq; 2646 2647 /* Write to thread burst_sz current number of enqueued 2648 * descriptors. It ensures that proper number of 2649 * descriptors will be dequeued in callback 2650 * function - needed for last batch in case where 2651 * the number of operations is not a multiple of 2652 * burst size. 2653 */ 2654 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2655 2656 /* Wait until processing of previous batch is 2657 * completed 2658 */ 2659 while (rte_atomic16_read(&tp->nb_dequeued) != 2660 (int16_t) enqueued) 2661 rte_pause(); 2662 } 2663 if (j != TEST_REPETITIONS - 1) 2664 rte_atomic16_clear(&tp->nb_dequeued); 2665 } 2666 2667 return TEST_SUCCESS; 2668 } 2669 2670 static int 2671 throughput_intr_lcore_dec(void *arg) 2672 { 2673 struct thread_params *tp = arg; 2674 unsigned int enqueued; 2675 const uint16_t queue_id = tp->queue_id; 2676 const uint16_t burst_sz = tp->op_params->burst_sz; 2677 const uint16_t num_to_process = tp->op_params->num_to_process; 2678 struct rte_bbdev_dec_op *ops[num_to_process]; 2679 struct test_buffers *bufs = NULL; 2680 struct rte_bbdev_info info; 2681 int ret, i, j; 2682 uint16_t num_to_enq, enq; 2683 2684 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2685 "BURST_SIZE should be <= %u", MAX_BURST); 2686 2687 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2688 "Failed to enable interrupts for dev: %u, queue_id: %u", 2689 tp->dev_id, queue_id); 2690 2691 rte_bbdev_info_get(tp->dev_id, &info); 2692 2693 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2694 "NUM_OPS cannot exceed %u for this device", 2695 info.drv.queue_size_lim); 2696 2697 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2698 2699 rte_atomic16_clear(&tp->processing_status); 2700 rte_atomic16_clear(&tp->nb_dequeued); 2701 2702 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2703 rte_pause(); 2704 2705 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2706 num_to_process); 2707 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2708 num_to_process); 2709 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2710 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 2711 bufs->hard_outputs, bufs->soft_outputs, 2712 tp->op_params->ref_dec_op); 2713 2714 /* Set counter to validate the ordering */ 2715 for (j = 0; j < num_to_process; ++j) 2716 ops[j]->opaque_data = (void *)(uintptr_t)j; 2717 2718 for (j = 0; j < TEST_REPETITIONS; ++j) { 2719 for (i = 0; i < num_to_process; ++i) 2720 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 2721 2722 tp->start_time = rte_rdtsc_precise(); 2723 for (enqueued = 0; enqueued < num_to_process;) { 2724 num_to_enq = burst_sz; 2725 2726 if (unlikely(num_to_process - enqueued < num_to_enq)) 2727 num_to_enq = num_to_process - enqueued; 2728 2729 enq = 0; 2730 do { 2731 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2732 queue_id, &ops[enqueued], 2733 num_to_enq); 2734 } while (unlikely(num_to_enq != enq)); 2735 enqueued += enq; 2736 2737 /* Write to thread burst_sz current number of enqueued 2738 * descriptors. It ensures that proper number of 2739 * descriptors will be dequeued in callback 2740 * function - needed for last batch in case where 2741 * the number of operations is not a multiple of 2742 * burst size. 2743 */ 2744 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2745 2746 /* Wait until processing of previous batch is 2747 * completed 2748 */ 2749 while (rte_atomic16_read(&tp->nb_dequeued) != 2750 (int16_t) enqueued) 2751 rte_pause(); 2752 } 2753 if (j != TEST_REPETITIONS - 1) 2754 rte_atomic16_clear(&tp->nb_dequeued); 2755 } 2756 2757 return TEST_SUCCESS; 2758 } 2759 2760 static int 2761 throughput_intr_lcore_enc(void *arg) 2762 { 2763 struct thread_params *tp = arg; 2764 unsigned int enqueued; 2765 const uint16_t queue_id = tp->queue_id; 2766 const uint16_t burst_sz = tp->op_params->burst_sz; 2767 const uint16_t num_to_process = tp->op_params->num_to_process; 2768 struct rte_bbdev_enc_op *ops[num_to_process]; 2769 struct test_buffers *bufs = NULL; 2770 struct rte_bbdev_info info; 2771 int ret, i, j; 2772 uint16_t num_to_enq, enq; 2773 2774 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2775 "BURST_SIZE should be <= %u", MAX_BURST); 2776 2777 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2778 "Failed to enable interrupts for dev: %u, queue_id: %u", 2779 tp->dev_id, queue_id); 2780 2781 rte_bbdev_info_get(tp->dev_id, &info); 2782 2783 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2784 "NUM_OPS cannot exceed %u for this device", 2785 info.drv.queue_size_lim); 2786 2787 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2788 2789 rte_atomic16_clear(&tp->processing_status); 2790 rte_atomic16_clear(&tp->nb_dequeued); 2791 2792 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2793 rte_pause(); 2794 2795 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2796 num_to_process); 2797 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2798 num_to_process); 2799 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2800 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 2801 bufs->hard_outputs, tp->op_params->ref_enc_op); 2802 2803 /* Set counter to validate the ordering */ 2804 for (j = 0; j < num_to_process; ++j) 2805 ops[j]->opaque_data = (void *)(uintptr_t)j; 2806 2807 for (j = 0; j < TEST_REPETITIONS; ++j) { 2808 for (i = 0; i < num_to_process; ++i) 2809 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2810 2811 tp->start_time = rte_rdtsc_precise(); 2812 for (enqueued = 0; enqueued < num_to_process;) { 2813 num_to_enq = burst_sz; 2814 2815 if (unlikely(num_to_process - enqueued < num_to_enq)) 2816 num_to_enq = num_to_process - enqueued; 2817 2818 enq = 0; 2819 do { 2820 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2821 queue_id, &ops[enqueued], 2822 num_to_enq); 2823 } while (unlikely(enq != num_to_enq)); 2824 enqueued += enq; 2825 2826 /* Write to thread burst_sz current number of enqueued 2827 * descriptors. It ensures that proper number of 2828 * descriptors will be dequeued in callback 2829 * function - needed for last batch in case where 2830 * the number of operations is not a multiple of 2831 * burst size. 2832 */ 2833 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2834 2835 /* Wait until processing of previous batch is 2836 * completed 2837 */ 2838 while (rte_atomic16_read(&tp->nb_dequeued) != 2839 (int16_t) enqueued) 2840 rte_pause(); 2841 } 2842 if (j != TEST_REPETITIONS - 1) 2843 rte_atomic16_clear(&tp->nb_dequeued); 2844 } 2845 2846 return TEST_SUCCESS; 2847 } 2848 2849 2850 static int 2851 throughput_intr_lcore_ldpc_enc(void *arg) 2852 { 2853 struct thread_params *tp = arg; 2854 unsigned int enqueued; 2855 const uint16_t queue_id = tp->queue_id; 2856 const uint16_t burst_sz = tp->op_params->burst_sz; 2857 const uint16_t num_to_process = tp->op_params->num_to_process; 2858 struct rte_bbdev_enc_op *ops[num_to_process]; 2859 struct test_buffers *bufs = NULL; 2860 struct rte_bbdev_info info; 2861 int ret, i, j; 2862 uint16_t num_to_enq, enq; 2863 2864 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2865 "BURST_SIZE should be <= %u", MAX_BURST); 2866 2867 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2868 "Failed to enable interrupts for dev: %u, queue_id: %u", 2869 tp->dev_id, queue_id); 2870 2871 rte_bbdev_info_get(tp->dev_id, &info); 2872 2873 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2874 "NUM_OPS cannot exceed %u for this device", 2875 info.drv.queue_size_lim); 2876 2877 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2878 2879 rte_atomic16_clear(&tp->processing_status); 2880 rte_atomic16_clear(&tp->nb_dequeued); 2881 2882 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2883 rte_pause(); 2884 2885 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2886 num_to_process); 2887 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2888 num_to_process); 2889 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2890 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 2891 bufs->inputs, bufs->hard_outputs, 2892 tp->op_params->ref_enc_op); 2893 2894 /* Set counter to validate the ordering */ 2895 for (j = 0; j < num_to_process; ++j) 2896 ops[j]->opaque_data = (void *)(uintptr_t)j; 2897 2898 for (j = 0; j < TEST_REPETITIONS; ++j) { 2899 for (i = 0; i < num_to_process; ++i) 2900 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2901 2902 tp->start_time = rte_rdtsc_precise(); 2903 for (enqueued = 0; enqueued < num_to_process;) { 2904 num_to_enq = burst_sz; 2905 2906 if (unlikely(num_to_process - enqueued < num_to_enq)) 2907 num_to_enq = num_to_process - enqueued; 2908 2909 enq = 0; 2910 do { 2911 enq += rte_bbdev_enqueue_ldpc_enc_ops( 2912 tp->dev_id, 2913 queue_id, &ops[enqueued], 2914 num_to_enq); 2915 } while (unlikely(enq != num_to_enq)); 2916 enqueued += enq; 2917 2918 /* Write to thread burst_sz current number of enqueued 2919 * descriptors. It ensures that proper number of 2920 * descriptors will be dequeued in callback 2921 * function - needed for last batch in case where 2922 * the number of operations is not a multiple of 2923 * burst size. 2924 */ 2925 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2926 2927 /* Wait until processing of previous batch is 2928 * completed 2929 */ 2930 while (rte_atomic16_read(&tp->nb_dequeued) != 2931 (int16_t) enqueued) 2932 rte_pause(); 2933 } 2934 if (j != TEST_REPETITIONS - 1) 2935 rte_atomic16_clear(&tp->nb_dequeued); 2936 } 2937 2938 return TEST_SUCCESS; 2939 } 2940 2941 static int 2942 throughput_pmd_lcore_dec(void *arg) 2943 { 2944 struct thread_params *tp = arg; 2945 uint16_t enq, deq; 2946 uint64_t total_time = 0, start_time; 2947 const uint16_t queue_id = tp->queue_id; 2948 const uint16_t burst_sz = tp->op_params->burst_sz; 2949 const uint16_t num_ops = tp->op_params->num_to_process; 2950 struct rte_bbdev_dec_op *ops_enq[num_ops]; 2951 struct rte_bbdev_dec_op *ops_deq[num_ops]; 2952 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2953 struct test_buffers *bufs = NULL; 2954 int i, j, ret; 2955 struct rte_bbdev_info info; 2956 uint16_t num_to_enq; 2957 2958 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2959 "BURST_SIZE should be <= %u", MAX_BURST); 2960 2961 rte_bbdev_info_get(tp->dev_id, &info); 2962 2963 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 2964 "NUM_OPS cannot exceed %u for this device", 2965 info.drv.queue_size_lim); 2966 2967 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2968 2969 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2970 rte_pause(); 2971 2972 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 2973 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 2974 2975 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2976 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 2977 bufs->hard_outputs, bufs->soft_outputs, ref_op); 2978 2979 /* Set counter to validate the ordering */ 2980 for (j = 0; j < num_ops; ++j) 2981 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 2982 2983 for (i = 0; i < TEST_REPETITIONS; ++i) { 2984 2985 for (j = 0; j < num_ops; ++j) 2986 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 2987 2988 start_time = rte_rdtsc_precise(); 2989 2990 for (enq = 0, deq = 0; enq < num_ops;) { 2991 num_to_enq = burst_sz; 2992 2993 if (unlikely(num_ops - enq < num_to_enq)) 2994 num_to_enq = num_ops - enq; 2995 2996 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2997 queue_id, &ops_enq[enq], num_to_enq); 2998 2999 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3000 queue_id, &ops_deq[deq], enq - deq); 3001 } 3002 3003 /* dequeue the remaining */ 3004 while (deq < enq) { 3005 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3006 queue_id, &ops_deq[deq], enq - deq); 3007 } 3008 3009 total_time += rte_rdtsc_precise() - start_time; 3010 } 3011 3012 tp->iter_count = 0; 3013 /* get the max of iter_count for all dequeued ops */ 3014 for (i = 0; i < num_ops; ++i) { 3015 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3016 tp->iter_count); 3017 } 3018 3019 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3020 ret = validate_dec_op(ops_deq, num_ops, ref_op, 3021 tp->op_params->vector_mask); 3022 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3023 } 3024 3025 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3026 3027 double tb_len_bits = calc_dec_TB_size(ref_op); 3028 3029 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3030 ((double)total_time / (double)rte_get_tsc_hz()); 3031 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3032 1000000.0) / ((double)total_time / 3033 (double)rte_get_tsc_hz()); 3034 3035 return TEST_SUCCESS; 3036 } 3037 3038 static int 3039 bler_pmd_lcore_ldpc_dec(void *arg) 3040 { 3041 struct thread_params *tp = arg; 3042 uint16_t enq, deq; 3043 uint64_t total_time = 0, start_time; 3044 const uint16_t queue_id = tp->queue_id; 3045 const uint16_t burst_sz = tp->op_params->burst_sz; 3046 const uint16_t num_ops = tp->op_params->num_to_process; 3047 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3048 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3049 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3050 struct test_buffers *bufs = NULL; 3051 int i, j, ret; 3052 float parity_bler = 0; 3053 struct rte_bbdev_info info; 3054 uint16_t num_to_enq; 3055 bool extDdr = check_bit(ldpc_cap_flags, 3056 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3057 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3058 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3059 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3060 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3061 3062 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3063 "BURST_SIZE should be <= %u", MAX_BURST); 3064 3065 rte_bbdev_info_get(tp->dev_id, &info); 3066 3067 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3068 "NUM_OPS cannot exceed %u for this device", 3069 info.drv.queue_size_lim); 3070 3071 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3072 3073 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3074 rte_pause(); 3075 3076 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3077 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3078 3079 /* For BLER tests we need to enable early termination */ 3080 if (!check_bit(ref_op->ldpc_dec.op_flags, 3081 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3082 ref_op->ldpc_dec.op_flags += 3083 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3084 ref_op->ldpc_dec.iter_max = get_iter_max(); 3085 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3086 3087 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3088 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3089 bufs->hard_outputs, bufs->soft_outputs, 3090 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3091 generate_llr_input(num_ops, bufs->inputs, ref_op); 3092 3093 /* Set counter to validate the ordering */ 3094 for (j = 0; j < num_ops; ++j) 3095 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3096 3097 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3098 for (j = 0; j < num_ops; ++j) { 3099 if (!loopback) 3100 mbuf_reset( 3101 ops_enq[j]->ldpc_dec.hard_output.data); 3102 if (hc_out || loopback) 3103 mbuf_reset( 3104 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3105 } 3106 if (extDdr) { 3107 bool preload = i == (TEST_REPETITIONS - 1); 3108 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3109 num_ops, preload); 3110 } 3111 start_time = rte_rdtsc_precise(); 3112 3113 for (enq = 0, deq = 0; enq < num_ops;) { 3114 num_to_enq = burst_sz; 3115 3116 if (unlikely(num_ops - enq < num_to_enq)) 3117 num_to_enq = num_ops - enq; 3118 3119 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3120 queue_id, &ops_enq[enq], num_to_enq); 3121 3122 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3123 queue_id, &ops_deq[deq], enq - deq); 3124 } 3125 3126 /* dequeue the remaining */ 3127 while (deq < enq) { 3128 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3129 queue_id, &ops_deq[deq], enq - deq); 3130 } 3131 3132 total_time += rte_rdtsc_precise() - start_time; 3133 } 3134 3135 tp->iter_count = 0; 3136 tp->iter_average = 0; 3137 /* get the max of iter_count for all dequeued ops */ 3138 for (i = 0; i < num_ops; ++i) { 3139 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3140 tp->iter_count); 3141 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3142 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3143 parity_bler += 1.0; 3144 } 3145 3146 parity_bler /= num_ops; /* This one is based on SYND */ 3147 tp->iter_average /= num_ops; 3148 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3149 3150 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3151 && tp->bler == 0 3152 && parity_bler == 0 3153 && !hc_out) { 3154 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3155 tp->op_params->vector_mask); 3156 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3157 } 3158 3159 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3160 3161 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3162 tp->ops_per_sec = ((double)num_ops * 1) / 3163 ((double)total_time / (double)rte_get_tsc_hz()); 3164 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3165 1000000.0) / ((double)total_time / 3166 (double)rte_get_tsc_hz()); 3167 3168 return TEST_SUCCESS; 3169 } 3170 3171 static int 3172 throughput_pmd_lcore_ldpc_dec(void *arg) 3173 { 3174 struct thread_params *tp = arg; 3175 uint16_t enq, deq; 3176 uint64_t total_time = 0, start_time; 3177 const uint16_t queue_id = tp->queue_id; 3178 const uint16_t burst_sz = tp->op_params->burst_sz; 3179 const uint16_t num_ops = tp->op_params->num_to_process; 3180 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3181 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3182 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3183 struct test_buffers *bufs = NULL; 3184 int i, j, ret; 3185 struct rte_bbdev_info info; 3186 uint16_t num_to_enq; 3187 bool extDdr = check_bit(ldpc_cap_flags, 3188 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3189 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3190 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3191 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3192 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3193 3194 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3195 "BURST_SIZE should be <= %u", MAX_BURST); 3196 3197 rte_bbdev_info_get(tp->dev_id, &info); 3198 3199 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3200 "NUM_OPS cannot exceed %u for this device", 3201 info.drv.queue_size_lim); 3202 3203 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3204 3205 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3206 rte_pause(); 3207 3208 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3209 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3210 3211 /* For throughput tests we need to disable early termination */ 3212 if (check_bit(ref_op->ldpc_dec.op_flags, 3213 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3214 ref_op->ldpc_dec.op_flags -= 3215 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3216 ref_op->ldpc_dec.iter_max = get_iter_max(); 3217 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3218 3219 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3220 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3221 bufs->hard_outputs, bufs->soft_outputs, 3222 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3223 3224 /* Set counter to validate the ordering */ 3225 for (j = 0; j < num_ops; ++j) 3226 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3227 3228 for (i = 0; i < TEST_REPETITIONS; ++i) { 3229 for (j = 0; j < num_ops; ++j) { 3230 if (!loopback) 3231 mbuf_reset( 3232 ops_enq[j]->ldpc_dec.hard_output.data); 3233 if (hc_out || loopback) 3234 mbuf_reset( 3235 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3236 } 3237 if (extDdr) { 3238 bool preload = i == (TEST_REPETITIONS - 1); 3239 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3240 num_ops, preload); 3241 } 3242 start_time = rte_rdtsc_precise(); 3243 3244 for (enq = 0, deq = 0; enq < num_ops;) { 3245 num_to_enq = burst_sz; 3246 3247 if (unlikely(num_ops - enq < num_to_enq)) 3248 num_to_enq = num_ops - enq; 3249 3250 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3251 queue_id, &ops_enq[enq], num_to_enq); 3252 3253 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3254 queue_id, &ops_deq[deq], enq - deq); 3255 } 3256 3257 /* dequeue the remaining */ 3258 while (deq < enq) { 3259 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3260 queue_id, &ops_deq[deq], enq - deq); 3261 } 3262 3263 total_time += rte_rdtsc_precise() - start_time; 3264 } 3265 3266 tp->iter_count = 0; 3267 /* get the max of iter_count for all dequeued ops */ 3268 for (i = 0; i < num_ops; ++i) { 3269 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3270 tp->iter_count); 3271 } 3272 if (extDdr) { 3273 /* Read loopback is not thread safe */ 3274 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 3275 } 3276 3277 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3278 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3279 tp->op_params->vector_mask); 3280 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3281 } 3282 3283 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3284 3285 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3286 3287 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3288 ((double)total_time / (double)rte_get_tsc_hz()); 3289 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3290 1000000.0) / ((double)total_time / 3291 (double)rte_get_tsc_hz()); 3292 3293 return TEST_SUCCESS; 3294 } 3295 3296 static int 3297 throughput_pmd_lcore_enc(void *arg) 3298 { 3299 struct thread_params *tp = arg; 3300 uint16_t enq, deq; 3301 uint64_t total_time = 0, start_time; 3302 const uint16_t queue_id = tp->queue_id; 3303 const uint16_t burst_sz = tp->op_params->burst_sz; 3304 const uint16_t num_ops = tp->op_params->num_to_process; 3305 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3306 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3307 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3308 struct test_buffers *bufs = NULL; 3309 int i, j, ret; 3310 struct rte_bbdev_info info; 3311 uint16_t num_to_enq; 3312 3313 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3314 "BURST_SIZE should be <= %u", MAX_BURST); 3315 3316 rte_bbdev_info_get(tp->dev_id, &info); 3317 3318 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3319 "NUM_OPS cannot exceed %u for this device", 3320 info.drv.queue_size_lim); 3321 3322 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3323 3324 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3325 rte_pause(); 3326 3327 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3328 num_ops); 3329 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3330 num_ops); 3331 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3332 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3333 bufs->hard_outputs, ref_op); 3334 3335 /* Set counter to validate the ordering */ 3336 for (j = 0; j < num_ops; ++j) 3337 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3338 3339 for (i = 0; i < TEST_REPETITIONS; ++i) { 3340 3341 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3342 for (j = 0; j < num_ops; ++j) 3343 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3344 3345 start_time = rte_rdtsc_precise(); 3346 3347 for (enq = 0, deq = 0; enq < num_ops;) { 3348 num_to_enq = burst_sz; 3349 3350 if (unlikely(num_ops - enq < num_to_enq)) 3351 num_to_enq = num_ops - enq; 3352 3353 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3354 queue_id, &ops_enq[enq], num_to_enq); 3355 3356 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3357 queue_id, &ops_deq[deq], enq - deq); 3358 } 3359 3360 /* dequeue the remaining */ 3361 while (deq < enq) { 3362 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3363 queue_id, &ops_deq[deq], enq - deq); 3364 } 3365 3366 total_time += rte_rdtsc_precise() - start_time; 3367 } 3368 3369 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3370 ret = validate_enc_op(ops_deq, num_ops, ref_op); 3371 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3372 } 3373 3374 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3375 3376 double tb_len_bits = calc_enc_TB_size(ref_op); 3377 3378 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3379 ((double)total_time / (double)rte_get_tsc_hz()); 3380 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3381 / 1000000.0) / ((double)total_time / 3382 (double)rte_get_tsc_hz()); 3383 3384 return TEST_SUCCESS; 3385 } 3386 3387 static int 3388 throughput_pmd_lcore_ldpc_enc(void *arg) 3389 { 3390 struct thread_params *tp = arg; 3391 uint16_t enq, deq; 3392 uint64_t total_time = 0, start_time; 3393 const uint16_t queue_id = tp->queue_id; 3394 const uint16_t burst_sz = tp->op_params->burst_sz; 3395 const uint16_t num_ops = tp->op_params->num_to_process; 3396 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3397 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3398 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3399 struct test_buffers *bufs = NULL; 3400 int i, j, ret; 3401 struct rte_bbdev_info info; 3402 uint16_t num_to_enq; 3403 3404 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3405 "BURST_SIZE should be <= %u", MAX_BURST); 3406 3407 rte_bbdev_info_get(tp->dev_id, &info); 3408 3409 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3410 "NUM_OPS cannot exceed %u for this device", 3411 info.drv.queue_size_lim); 3412 3413 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3414 3415 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3416 rte_pause(); 3417 3418 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3419 num_ops); 3420 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3421 num_ops); 3422 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3423 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3424 bufs->hard_outputs, ref_op); 3425 3426 /* Set counter to validate the ordering */ 3427 for (j = 0; j < num_ops; ++j) 3428 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3429 3430 for (i = 0; i < TEST_REPETITIONS; ++i) { 3431 3432 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3433 for (j = 0; j < num_ops; ++j) 3434 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3435 3436 start_time = rte_rdtsc_precise(); 3437 3438 for (enq = 0, deq = 0; enq < num_ops;) { 3439 num_to_enq = burst_sz; 3440 3441 if (unlikely(num_ops - enq < num_to_enq)) 3442 num_to_enq = num_ops - enq; 3443 3444 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 3445 queue_id, &ops_enq[enq], num_to_enq); 3446 3447 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3448 queue_id, &ops_deq[deq], enq - deq); 3449 } 3450 3451 /* dequeue the remaining */ 3452 while (deq < enq) { 3453 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3454 queue_id, &ops_deq[deq], enq - deq); 3455 } 3456 3457 total_time += rte_rdtsc_precise() - start_time; 3458 } 3459 3460 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3461 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 3462 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3463 } 3464 3465 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3466 3467 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 3468 3469 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3470 ((double)total_time / (double)rte_get_tsc_hz()); 3471 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3472 / 1000000.0) / ((double)total_time / 3473 (double)rte_get_tsc_hz()); 3474 3475 return TEST_SUCCESS; 3476 } 3477 3478 static void 3479 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 3480 { 3481 unsigned int iter = 0; 3482 double total_mops = 0, total_mbps = 0; 3483 3484 for (iter = 0; iter < used_cores; iter++) { 3485 printf( 3486 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 3487 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 3488 t_params[iter].mbps); 3489 total_mops += t_params[iter].ops_per_sec; 3490 total_mbps += t_params[iter].mbps; 3491 } 3492 printf( 3493 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 3494 used_cores, total_mops, total_mbps); 3495 } 3496 3497 /* Aggregate the performance results over the number of cores used */ 3498 static void 3499 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 3500 { 3501 unsigned int core_idx = 0; 3502 double total_mops = 0, total_mbps = 0; 3503 uint8_t iter_count = 0; 3504 3505 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3506 printf( 3507 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 3508 t_params[core_idx].lcore_id, 3509 t_params[core_idx].ops_per_sec, 3510 t_params[core_idx].mbps, 3511 t_params[core_idx].iter_count); 3512 total_mops += t_params[core_idx].ops_per_sec; 3513 total_mbps += t_params[core_idx].mbps; 3514 iter_count = RTE_MAX(iter_count, 3515 t_params[core_idx].iter_count); 3516 } 3517 printf( 3518 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 3519 used_cores, total_mops, total_mbps, iter_count); 3520 } 3521 3522 /* Aggregate the performance results over the number of cores used */ 3523 static void 3524 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 3525 { 3526 unsigned int core_idx = 0; 3527 double total_mbps = 0, total_bler = 0, total_iter = 0; 3528 double snr = get_snr(); 3529 3530 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3531 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 3532 t_params[core_idx].lcore_id, 3533 t_params[core_idx].bler * 100, 3534 t_params[core_idx].iter_average, 3535 t_params[core_idx].mbps, 3536 get_vector_filename()); 3537 total_mbps += t_params[core_idx].mbps; 3538 total_bler += t_params[core_idx].bler; 3539 total_iter += t_params[core_idx].iter_average; 3540 } 3541 total_bler /= used_cores; 3542 total_iter /= used_cores; 3543 3544 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n", 3545 snr, total_bler * 100, total_iter, get_iter_max(), 3546 total_mbps, get_vector_filename()); 3547 } 3548 3549 /* 3550 * Test function that determines BLER wireless performance 3551 */ 3552 static int 3553 bler_test(struct active_device *ad, 3554 struct test_op_params *op_params) 3555 { 3556 int ret; 3557 unsigned int lcore_id, used_cores = 0; 3558 struct thread_params *t_params; 3559 struct rte_bbdev_info info; 3560 lcore_function_t *bler_function; 3561 uint16_t num_lcores; 3562 const char *op_type_str; 3563 3564 rte_bbdev_info_get(ad->dev_id, &info); 3565 3566 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3567 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3568 test_vector.op_type); 3569 3570 printf("+ ------------------------------------------------------- +\n"); 3571 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3572 info.dev_name, ad->nb_queues, op_params->burst_sz, 3573 op_params->num_to_process, op_params->num_lcores, 3574 op_type_str, 3575 intr_enabled ? "Interrupt mode" : "PMD mode", 3576 (double)rte_get_tsc_hz() / 1000000000.0); 3577 3578 /* Set number of lcores */ 3579 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3580 ? ad->nb_queues 3581 : op_params->num_lcores; 3582 3583 /* Allocate memory for thread parameters structure */ 3584 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3585 RTE_CACHE_LINE_SIZE); 3586 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3587 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3588 RTE_CACHE_LINE_SIZE)); 3589 3590 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3591 bler_function = bler_pmd_lcore_ldpc_dec; 3592 else 3593 return TEST_SKIPPED; 3594 3595 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3596 3597 /* Master core is set at first entry */ 3598 t_params[0].dev_id = ad->dev_id; 3599 t_params[0].lcore_id = rte_lcore_id(); 3600 t_params[0].op_params = op_params; 3601 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3602 t_params[0].iter_count = 0; 3603 3604 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 3605 if (used_cores >= num_lcores) 3606 break; 3607 3608 t_params[used_cores].dev_id = ad->dev_id; 3609 t_params[used_cores].lcore_id = lcore_id; 3610 t_params[used_cores].op_params = op_params; 3611 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3612 t_params[used_cores].iter_count = 0; 3613 3614 rte_eal_remote_launch(bler_function, 3615 &t_params[used_cores++], lcore_id); 3616 } 3617 3618 rte_atomic16_set(&op_params->sync, SYNC_START); 3619 ret = bler_function(&t_params[0]); 3620 3621 /* Master core is always used */ 3622 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3623 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3624 3625 print_dec_bler(t_params, num_lcores); 3626 3627 /* Return if test failed */ 3628 if (ret) { 3629 rte_free(t_params); 3630 return ret; 3631 } 3632 3633 /* Function to print something here*/ 3634 rte_free(t_params); 3635 return ret; 3636 } 3637 3638 /* 3639 * Test function that determines how long an enqueue + dequeue of a burst 3640 * takes on available lcores. 3641 */ 3642 static int 3643 throughput_test(struct active_device *ad, 3644 struct test_op_params *op_params) 3645 { 3646 int ret; 3647 unsigned int lcore_id, used_cores = 0; 3648 struct thread_params *t_params, *tp; 3649 struct rte_bbdev_info info; 3650 lcore_function_t *throughput_function; 3651 uint16_t num_lcores; 3652 const char *op_type_str; 3653 3654 rte_bbdev_info_get(ad->dev_id, &info); 3655 3656 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3657 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3658 test_vector.op_type); 3659 3660 printf("+ ------------------------------------------------------- +\n"); 3661 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3662 info.dev_name, ad->nb_queues, op_params->burst_sz, 3663 op_params->num_to_process, op_params->num_lcores, 3664 op_type_str, 3665 intr_enabled ? "Interrupt mode" : "PMD mode", 3666 (double)rte_get_tsc_hz() / 1000000000.0); 3667 3668 /* Set number of lcores */ 3669 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3670 ? ad->nb_queues 3671 : op_params->num_lcores; 3672 3673 /* Allocate memory for thread parameters structure */ 3674 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3675 RTE_CACHE_LINE_SIZE); 3676 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3677 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3678 RTE_CACHE_LINE_SIZE)); 3679 3680 if (intr_enabled) { 3681 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3682 throughput_function = throughput_intr_lcore_dec; 3683 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3684 throughput_function = throughput_intr_lcore_ldpc_dec; 3685 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3686 throughput_function = throughput_intr_lcore_enc; 3687 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3688 throughput_function = throughput_intr_lcore_ldpc_enc; 3689 else 3690 throughput_function = throughput_intr_lcore_enc; 3691 3692 /* Dequeue interrupt callback registration */ 3693 ret = rte_bbdev_callback_register(ad->dev_id, 3694 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 3695 t_params); 3696 if (ret < 0) { 3697 rte_free(t_params); 3698 return ret; 3699 } 3700 } else { 3701 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3702 throughput_function = throughput_pmd_lcore_dec; 3703 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3704 throughput_function = throughput_pmd_lcore_ldpc_dec; 3705 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3706 throughput_function = throughput_pmd_lcore_enc; 3707 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3708 throughput_function = throughput_pmd_lcore_ldpc_enc; 3709 else 3710 throughput_function = throughput_pmd_lcore_enc; 3711 } 3712 3713 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3714 3715 /* Master core is set at first entry */ 3716 t_params[0].dev_id = ad->dev_id; 3717 t_params[0].lcore_id = rte_lcore_id(); 3718 t_params[0].op_params = op_params; 3719 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3720 t_params[0].iter_count = 0; 3721 3722 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 3723 if (used_cores >= num_lcores) 3724 break; 3725 3726 t_params[used_cores].dev_id = ad->dev_id; 3727 t_params[used_cores].lcore_id = lcore_id; 3728 t_params[used_cores].op_params = op_params; 3729 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3730 t_params[used_cores].iter_count = 0; 3731 3732 rte_eal_remote_launch(throughput_function, 3733 &t_params[used_cores++], lcore_id); 3734 } 3735 3736 rte_atomic16_set(&op_params->sync, SYNC_START); 3737 ret = throughput_function(&t_params[0]); 3738 3739 /* Master core is always used */ 3740 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3741 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3742 3743 /* Return if test failed */ 3744 if (ret) { 3745 rte_free(t_params); 3746 return ret; 3747 } 3748 3749 /* Print throughput if interrupts are disabled and test passed */ 3750 if (!intr_enabled) { 3751 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3752 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3753 print_dec_throughput(t_params, num_lcores); 3754 else 3755 print_enc_throughput(t_params, num_lcores); 3756 rte_free(t_params); 3757 return ret; 3758 } 3759 3760 /* In interrupt TC we need to wait for the interrupt callback to deqeue 3761 * all pending operations. Skip waiting for queues which reported an 3762 * error using processing_status variable. 3763 * Wait for master lcore operations. 3764 */ 3765 tp = &t_params[0]; 3766 while ((rte_atomic16_read(&tp->nb_dequeued) < 3767 op_params->num_to_process) && 3768 (rte_atomic16_read(&tp->processing_status) != 3769 TEST_FAILED)) 3770 rte_pause(); 3771 3772 tp->ops_per_sec /= TEST_REPETITIONS; 3773 tp->mbps /= TEST_REPETITIONS; 3774 ret |= (int)rte_atomic16_read(&tp->processing_status); 3775 3776 /* Wait for slave lcores operations */ 3777 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 3778 tp = &t_params[used_cores]; 3779 3780 while ((rte_atomic16_read(&tp->nb_dequeued) < 3781 op_params->num_to_process) && 3782 (rte_atomic16_read(&tp->processing_status) != 3783 TEST_FAILED)) 3784 rte_pause(); 3785 3786 tp->ops_per_sec /= TEST_REPETITIONS; 3787 tp->mbps /= TEST_REPETITIONS; 3788 ret |= (int)rte_atomic16_read(&tp->processing_status); 3789 } 3790 3791 /* Print throughput if test passed */ 3792 if (!ret) { 3793 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3794 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3795 print_dec_throughput(t_params, num_lcores); 3796 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 3797 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3798 print_enc_throughput(t_params, num_lcores); 3799 } 3800 3801 rte_free(t_params); 3802 return ret; 3803 } 3804 3805 static int 3806 latency_test_dec(struct rte_mempool *mempool, 3807 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3808 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3809 const uint16_t num_to_process, uint16_t burst_sz, 3810 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3811 { 3812 int ret = TEST_SUCCESS; 3813 uint16_t i, j, dequeued; 3814 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3815 uint64_t start_time = 0, last_time = 0; 3816 3817 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3818 uint16_t enq = 0, deq = 0; 3819 bool first_time = true; 3820 last_time = 0; 3821 3822 if (unlikely(num_to_process - dequeued < burst_sz)) 3823 burst_sz = num_to_process - dequeued; 3824 3825 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3826 TEST_ASSERT_SUCCESS(ret, 3827 "rte_bbdev_dec_op_alloc_bulk() failed"); 3828 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3829 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3830 bufs->inputs, 3831 bufs->hard_outputs, 3832 bufs->soft_outputs, 3833 ref_op); 3834 3835 /* Set counter to validate the ordering */ 3836 for (j = 0; j < burst_sz; ++j) 3837 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3838 3839 start_time = rte_rdtsc_precise(); 3840 3841 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 3842 burst_sz); 3843 TEST_ASSERT(enq == burst_sz, 3844 "Error enqueueing burst, expected %u, got %u", 3845 burst_sz, enq); 3846 3847 /* Dequeue */ 3848 do { 3849 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3850 &ops_deq[deq], burst_sz - deq); 3851 if (likely(first_time && (deq > 0))) { 3852 last_time = rte_rdtsc_precise() - start_time; 3853 first_time = false; 3854 } 3855 } while (unlikely(burst_sz != deq)); 3856 3857 *max_time = RTE_MAX(*max_time, last_time); 3858 *min_time = RTE_MIN(*min_time, last_time); 3859 *total_time += last_time; 3860 3861 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3862 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 3863 vector_mask); 3864 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3865 } 3866 3867 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3868 dequeued += deq; 3869 } 3870 3871 return i; 3872 } 3873 3874 static int 3875 latency_test_ldpc_dec(struct rte_mempool *mempool, 3876 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3877 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3878 const uint16_t num_to_process, uint16_t burst_sz, 3879 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3880 { 3881 int ret = TEST_SUCCESS; 3882 uint16_t i, j, dequeued; 3883 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3884 uint64_t start_time = 0, last_time = 0; 3885 bool extDdr = ldpc_cap_flags & 3886 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 3887 3888 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3889 uint16_t enq = 0, deq = 0; 3890 bool first_time = true; 3891 last_time = 0; 3892 3893 if (unlikely(num_to_process - dequeued < burst_sz)) 3894 burst_sz = num_to_process - dequeued; 3895 3896 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3897 TEST_ASSERT_SUCCESS(ret, 3898 "rte_bbdev_dec_op_alloc_bulk() failed"); 3899 3900 /* For latency tests we need to disable early termination */ 3901 if (check_bit(ref_op->ldpc_dec.op_flags, 3902 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3903 ref_op->ldpc_dec.op_flags -= 3904 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3905 ref_op->ldpc_dec.iter_max = get_iter_max(); 3906 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3907 3908 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3909 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 3910 bufs->inputs, 3911 bufs->hard_outputs, 3912 bufs->soft_outputs, 3913 bufs->harq_inputs, 3914 bufs->harq_outputs, 3915 ref_op); 3916 3917 if (extDdr) 3918 preload_harq_ddr(dev_id, queue_id, ops_enq, 3919 burst_sz, true); 3920 3921 /* Set counter to validate the ordering */ 3922 for (j = 0; j < burst_sz; ++j) 3923 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3924 3925 start_time = rte_rdtsc_precise(); 3926 3927 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 3928 &ops_enq[enq], burst_sz); 3929 TEST_ASSERT(enq == burst_sz, 3930 "Error enqueueing burst, expected %u, got %u", 3931 burst_sz, enq); 3932 3933 /* Dequeue */ 3934 do { 3935 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 3936 &ops_deq[deq], burst_sz - deq); 3937 if (likely(first_time && (deq > 0))) { 3938 last_time = rte_rdtsc_precise() - start_time; 3939 first_time = false; 3940 } 3941 } while (unlikely(burst_sz != deq)); 3942 3943 *max_time = RTE_MAX(*max_time, last_time); 3944 *min_time = RTE_MIN(*min_time, last_time); 3945 *total_time += last_time; 3946 3947 if (extDdr) 3948 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 3949 3950 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3951 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 3952 vector_mask); 3953 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3954 } 3955 3956 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 3957 dequeued += deq; 3958 } 3959 return i; 3960 } 3961 3962 static int 3963 latency_test_enc(struct rte_mempool *mempool, 3964 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 3965 uint16_t dev_id, uint16_t queue_id, 3966 const uint16_t num_to_process, uint16_t burst_sz, 3967 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3968 { 3969 int ret = TEST_SUCCESS; 3970 uint16_t i, j, dequeued; 3971 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3972 uint64_t start_time = 0, last_time = 0; 3973 3974 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3975 uint16_t enq = 0, deq = 0; 3976 bool first_time = true; 3977 last_time = 0; 3978 3979 if (unlikely(num_to_process - dequeued < burst_sz)) 3980 burst_sz = num_to_process - dequeued; 3981 3982 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 3983 TEST_ASSERT_SUCCESS(ret, 3984 "rte_bbdev_enc_op_alloc_bulk() failed"); 3985 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3986 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 3987 bufs->inputs, 3988 bufs->hard_outputs, 3989 ref_op); 3990 3991 /* Set counter to validate the ordering */ 3992 for (j = 0; j < burst_sz; ++j) 3993 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3994 3995 start_time = rte_rdtsc_precise(); 3996 3997 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 3998 burst_sz); 3999 TEST_ASSERT(enq == burst_sz, 4000 "Error enqueueing burst, expected %u, got %u", 4001 burst_sz, enq); 4002 4003 /* Dequeue */ 4004 do { 4005 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4006 &ops_deq[deq], burst_sz - deq); 4007 if (likely(first_time && (deq > 0))) { 4008 last_time += rte_rdtsc_precise() - start_time; 4009 first_time = false; 4010 } 4011 } while (unlikely(burst_sz != deq)); 4012 4013 *max_time = RTE_MAX(*max_time, last_time); 4014 *min_time = RTE_MIN(*min_time, last_time); 4015 *total_time += last_time; 4016 4017 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4018 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4019 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4020 } 4021 4022 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4023 dequeued += deq; 4024 } 4025 4026 return i; 4027 } 4028 4029 static int 4030 latency_test_ldpc_enc(struct rte_mempool *mempool, 4031 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4032 uint16_t dev_id, uint16_t queue_id, 4033 const uint16_t num_to_process, uint16_t burst_sz, 4034 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4035 { 4036 int ret = TEST_SUCCESS; 4037 uint16_t i, j, dequeued; 4038 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4039 uint64_t start_time = 0, last_time = 0; 4040 4041 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4042 uint16_t enq = 0, deq = 0; 4043 bool first_time = true; 4044 last_time = 0; 4045 4046 if (unlikely(num_to_process - dequeued < burst_sz)) 4047 burst_sz = num_to_process - dequeued; 4048 4049 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4050 TEST_ASSERT_SUCCESS(ret, 4051 "rte_bbdev_enc_op_alloc_bulk() failed"); 4052 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4053 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4054 bufs->inputs, 4055 bufs->hard_outputs, 4056 ref_op); 4057 4058 /* Set counter to validate the ordering */ 4059 for (j = 0; j < burst_sz; ++j) 4060 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4061 4062 start_time = rte_rdtsc_precise(); 4063 4064 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4065 &ops_enq[enq], burst_sz); 4066 TEST_ASSERT(enq == burst_sz, 4067 "Error enqueueing burst, expected %u, got %u", 4068 burst_sz, enq); 4069 4070 /* Dequeue */ 4071 do { 4072 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4073 &ops_deq[deq], burst_sz - deq); 4074 if (likely(first_time && (deq > 0))) { 4075 last_time += rte_rdtsc_precise() - start_time; 4076 first_time = false; 4077 } 4078 } while (unlikely(burst_sz != deq)); 4079 4080 *max_time = RTE_MAX(*max_time, last_time); 4081 *min_time = RTE_MIN(*min_time, last_time); 4082 *total_time += last_time; 4083 4084 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4085 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4086 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4087 } 4088 4089 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4090 dequeued += deq; 4091 } 4092 4093 return i; 4094 } 4095 4096 static int 4097 latency_test(struct active_device *ad, 4098 struct test_op_params *op_params) 4099 { 4100 int iter; 4101 uint16_t burst_sz = op_params->burst_sz; 4102 const uint16_t num_to_process = op_params->num_to_process; 4103 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4104 const uint16_t queue_id = ad->queue_ids[0]; 4105 struct test_buffers *bufs = NULL; 4106 struct rte_bbdev_info info; 4107 uint64_t total_time, min_time, max_time; 4108 const char *op_type_str; 4109 4110 total_time = max_time = 0; 4111 min_time = UINT64_MAX; 4112 4113 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4114 "BURST_SIZE should be <= %u", MAX_BURST); 4115 4116 rte_bbdev_info_get(ad->dev_id, &info); 4117 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4118 4119 op_type_str = rte_bbdev_op_type_str(op_type); 4120 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4121 4122 printf("+ ------------------------------------------------------- +\n"); 4123 printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4124 info.dev_name, burst_sz, num_to_process, op_type_str); 4125 4126 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4127 iter = latency_test_dec(op_params->mp, bufs, 4128 op_params->ref_dec_op, op_params->vector_mask, 4129 ad->dev_id, queue_id, num_to_process, 4130 burst_sz, &total_time, &min_time, &max_time); 4131 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 4132 iter = latency_test_enc(op_params->mp, bufs, 4133 op_params->ref_enc_op, ad->dev_id, queue_id, 4134 num_to_process, burst_sz, &total_time, 4135 &min_time, &max_time); 4136 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4137 iter = latency_test_ldpc_enc(op_params->mp, bufs, 4138 op_params->ref_enc_op, ad->dev_id, queue_id, 4139 num_to_process, burst_sz, &total_time, 4140 &min_time, &max_time); 4141 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4142 iter = latency_test_ldpc_dec(op_params->mp, bufs, 4143 op_params->ref_dec_op, op_params->vector_mask, 4144 ad->dev_id, queue_id, num_to_process, 4145 burst_sz, &total_time, &min_time, &max_time); 4146 else 4147 iter = latency_test_enc(op_params->mp, bufs, 4148 op_params->ref_enc_op, 4149 ad->dev_id, queue_id, 4150 num_to_process, burst_sz, &total_time, 4151 &min_time, &max_time); 4152 4153 if (iter <= 0) 4154 return TEST_FAILED; 4155 4156 printf("Operation latency:\n" 4157 "\tavg: %lg cycles, %lg us\n" 4158 "\tmin: %lg cycles, %lg us\n" 4159 "\tmax: %lg cycles, %lg us\n", 4160 (double)total_time / (double)iter, 4161 (double)(total_time * 1000000) / (double)iter / 4162 (double)rte_get_tsc_hz(), (double)min_time, 4163 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 4164 (double)max_time, (double)(max_time * 1000000) / 4165 (double)rte_get_tsc_hz()); 4166 4167 return TEST_SUCCESS; 4168 } 4169 4170 #ifdef RTE_BBDEV_OFFLOAD_COST 4171 static int 4172 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 4173 struct rte_bbdev_stats *stats) 4174 { 4175 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 4176 struct rte_bbdev_stats *q_stats; 4177 4178 if (queue_id >= dev->data->num_queues) 4179 return -1; 4180 4181 q_stats = &dev->data->queues[queue_id].queue_stats; 4182 4183 stats->enqueued_count = q_stats->enqueued_count; 4184 stats->dequeued_count = q_stats->dequeued_count; 4185 stats->enqueue_err_count = q_stats->enqueue_err_count; 4186 stats->dequeue_err_count = q_stats->dequeue_err_count; 4187 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 4188 4189 return 0; 4190 } 4191 4192 static int 4193 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 4194 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4195 uint16_t queue_id, const uint16_t num_to_process, 4196 uint16_t burst_sz, struct test_time_stats *time_st) 4197 { 4198 int i, dequeued, ret; 4199 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4200 uint64_t enq_start_time, deq_start_time; 4201 uint64_t enq_sw_last_time, deq_last_time; 4202 struct rte_bbdev_stats stats; 4203 4204 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4205 uint16_t enq = 0, deq = 0; 4206 4207 if (unlikely(num_to_process - dequeued < burst_sz)) 4208 burst_sz = num_to_process - dequeued; 4209 4210 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4211 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4212 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4213 bufs->inputs, 4214 bufs->hard_outputs, 4215 bufs->soft_outputs, 4216 ref_op); 4217 4218 /* Start time meas for enqueue function offload latency */ 4219 enq_start_time = rte_rdtsc_precise(); 4220 do { 4221 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 4222 &ops_enq[enq], burst_sz - enq); 4223 } while (unlikely(burst_sz != enq)); 4224 4225 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4226 TEST_ASSERT_SUCCESS(ret, 4227 "Failed to get stats for queue (%u) of device (%u)", 4228 queue_id, dev_id); 4229 4230 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 4231 stats.acc_offload_cycles; 4232 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4233 enq_sw_last_time); 4234 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4235 enq_sw_last_time); 4236 time_st->enq_sw_total_time += enq_sw_last_time; 4237 4238 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4239 stats.acc_offload_cycles); 4240 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4241 stats.acc_offload_cycles); 4242 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4243 4244 /* give time for device to process ops */ 4245 rte_delay_us(200); 4246 4247 /* Start time meas for dequeue function offload latency */ 4248 deq_start_time = rte_rdtsc_precise(); 4249 /* Dequeue one operation */ 4250 do { 4251 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4252 &ops_deq[deq], 1); 4253 } while (unlikely(deq != 1)); 4254 4255 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4256 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4257 deq_last_time); 4258 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4259 deq_last_time); 4260 time_st->deq_total_time += deq_last_time; 4261 4262 /* Dequeue remaining operations if needed*/ 4263 while (burst_sz != deq) 4264 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4265 &ops_deq[deq], burst_sz - deq); 4266 4267 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4268 dequeued += deq; 4269 } 4270 4271 return i; 4272 } 4273 4274 static int 4275 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 4276 struct test_buffers *bufs, 4277 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4278 uint16_t queue_id, const uint16_t num_to_process, 4279 uint16_t burst_sz, struct test_time_stats *time_st) 4280 { 4281 int i, dequeued, ret; 4282 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4283 uint64_t enq_start_time, deq_start_time; 4284 uint64_t enq_sw_last_time, deq_last_time; 4285 struct rte_bbdev_stats stats; 4286 bool extDdr = ldpc_cap_flags & 4287 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4288 4289 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4290 uint16_t enq = 0, deq = 0; 4291 4292 if (unlikely(num_to_process - dequeued < burst_sz)) 4293 burst_sz = num_to_process - dequeued; 4294 4295 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4296 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4297 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4298 bufs->inputs, 4299 bufs->hard_outputs, 4300 bufs->soft_outputs, 4301 bufs->harq_inputs, 4302 bufs->harq_outputs, 4303 ref_op); 4304 4305 if (extDdr) 4306 preload_harq_ddr(dev_id, queue_id, ops_enq, 4307 burst_sz, true); 4308 4309 /* Start time meas for enqueue function offload latency */ 4310 enq_start_time = rte_rdtsc_precise(); 4311 do { 4312 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4313 &ops_enq[enq], burst_sz - enq); 4314 } while (unlikely(burst_sz != enq)); 4315 4316 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4317 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4318 TEST_ASSERT_SUCCESS(ret, 4319 "Failed to get stats for queue (%u) of device (%u)", 4320 queue_id, dev_id); 4321 4322 enq_sw_last_time -= stats.acc_offload_cycles; 4323 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4324 enq_sw_last_time); 4325 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4326 enq_sw_last_time); 4327 time_st->enq_sw_total_time += enq_sw_last_time; 4328 4329 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4330 stats.acc_offload_cycles); 4331 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4332 stats.acc_offload_cycles); 4333 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4334 4335 /* give time for device to process ops */ 4336 rte_delay_us(200); 4337 4338 /* Start time meas for dequeue function offload latency */ 4339 deq_start_time = rte_rdtsc_precise(); 4340 /* Dequeue one operation */ 4341 do { 4342 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4343 &ops_deq[deq], 1); 4344 } while (unlikely(deq != 1)); 4345 4346 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4347 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4348 deq_last_time); 4349 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4350 deq_last_time); 4351 time_st->deq_total_time += deq_last_time; 4352 4353 /* Dequeue remaining operations if needed*/ 4354 while (burst_sz != deq) 4355 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4356 &ops_deq[deq], burst_sz - deq); 4357 4358 if (extDdr) { 4359 /* Read loopback is not thread safe */ 4360 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4361 } 4362 4363 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4364 dequeued += deq; 4365 } 4366 4367 return i; 4368 } 4369 4370 static int 4371 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 4372 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4373 uint16_t queue_id, const uint16_t num_to_process, 4374 uint16_t burst_sz, struct test_time_stats *time_st) 4375 { 4376 int i, dequeued, ret; 4377 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4378 uint64_t enq_start_time, deq_start_time; 4379 uint64_t enq_sw_last_time, deq_last_time; 4380 struct rte_bbdev_stats stats; 4381 4382 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4383 uint16_t enq = 0, deq = 0; 4384 4385 if (unlikely(num_to_process - dequeued < burst_sz)) 4386 burst_sz = num_to_process - dequeued; 4387 4388 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4389 TEST_ASSERT_SUCCESS(ret, 4390 "rte_bbdev_enc_op_alloc_bulk() failed"); 4391 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4392 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4393 bufs->inputs, 4394 bufs->hard_outputs, 4395 ref_op); 4396 4397 /* Start time meas for enqueue function offload latency */ 4398 enq_start_time = rte_rdtsc_precise(); 4399 do { 4400 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 4401 &ops_enq[enq], burst_sz - enq); 4402 } while (unlikely(burst_sz != enq)); 4403 4404 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4405 4406 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4407 TEST_ASSERT_SUCCESS(ret, 4408 "Failed to get stats for queue (%u) of device (%u)", 4409 queue_id, dev_id); 4410 enq_sw_last_time -= stats.acc_offload_cycles; 4411 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4412 enq_sw_last_time); 4413 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4414 enq_sw_last_time); 4415 time_st->enq_sw_total_time += enq_sw_last_time; 4416 4417 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4418 stats.acc_offload_cycles); 4419 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4420 stats.acc_offload_cycles); 4421 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4422 4423 /* give time for device to process ops */ 4424 rte_delay_us(200); 4425 4426 /* Start time meas for dequeue function offload latency */ 4427 deq_start_time = rte_rdtsc_precise(); 4428 /* Dequeue one operation */ 4429 do { 4430 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4431 &ops_deq[deq], 1); 4432 } while (unlikely(deq != 1)); 4433 4434 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4435 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4436 deq_last_time); 4437 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4438 deq_last_time); 4439 time_st->deq_total_time += deq_last_time; 4440 4441 while (burst_sz != deq) 4442 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4443 &ops_deq[deq], burst_sz - deq); 4444 4445 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4446 dequeued += deq; 4447 } 4448 4449 return i; 4450 } 4451 4452 static int 4453 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 4454 struct test_buffers *bufs, 4455 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4456 uint16_t queue_id, const uint16_t num_to_process, 4457 uint16_t burst_sz, struct test_time_stats *time_st) 4458 { 4459 int i, dequeued, ret; 4460 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4461 uint64_t enq_start_time, deq_start_time; 4462 uint64_t enq_sw_last_time, deq_last_time; 4463 struct rte_bbdev_stats stats; 4464 4465 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4466 uint16_t enq = 0, deq = 0; 4467 4468 if (unlikely(num_to_process - dequeued < burst_sz)) 4469 burst_sz = num_to_process - dequeued; 4470 4471 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4472 TEST_ASSERT_SUCCESS(ret, 4473 "rte_bbdev_enc_op_alloc_bulk() failed"); 4474 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4475 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4476 bufs->inputs, 4477 bufs->hard_outputs, 4478 ref_op); 4479 4480 /* Start time meas for enqueue function offload latency */ 4481 enq_start_time = rte_rdtsc_precise(); 4482 do { 4483 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4484 &ops_enq[enq], burst_sz - enq); 4485 } while (unlikely(burst_sz != enq)); 4486 4487 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4488 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4489 TEST_ASSERT_SUCCESS(ret, 4490 "Failed to get stats for queue (%u) of device (%u)", 4491 queue_id, dev_id); 4492 4493 enq_sw_last_time -= stats.acc_offload_cycles; 4494 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4495 enq_sw_last_time); 4496 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4497 enq_sw_last_time); 4498 time_st->enq_sw_total_time += enq_sw_last_time; 4499 4500 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4501 stats.acc_offload_cycles); 4502 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4503 stats.acc_offload_cycles); 4504 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4505 4506 /* give time for device to process ops */ 4507 rte_delay_us(200); 4508 4509 /* Start time meas for dequeue function offload latency */ 4510 deq_start_time = rte_rdtsc_precise(); 4511 /* Dequeue one operation */ 4512 do { 4513 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4514 &ops_deq[deq], 1); 4515 } while (unlikely(deq != 1)); 4516 4517 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4518 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4519 deq_last_time); 4520 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4521 deq_last_time); 4522 time_st->deq_total_time += deq_last_time; 4523 4524 while (burst_sz != deq) 4525 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4526 &ops_deq[deq], burst_sz - deq); 4527 4528 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4529 dequeued += deq; 4530 } 4531 4532 return i; 4533 } 4534 #endif 4535 4536 static int 4537 offload_cost_test(struct active_device *ad, 4538 struct test_op_params *op_params) 4539 { 4540 #ifndef RTE_BBDEV_OFFLOAD_COST 4541 RTE_SET_USED(ad); 4542 RTE_SET_USED(op_params); 4543 printf("Offload latency test is disabled.\n"); 4544 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4545 return TEST_SKIPPED; 4546 #else 4547 int iter; 4548 uint16_t burst_sz = op_params->burst_sz; 4549 const uint16_t num_to_process = op_params->num_to_process; 4550 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4551 const uint16_t queue_id = ad->queue_ids[0]; 4552 struct test_buffers *bufs = NULL; 4553 struct rte_bbdev_info info; 4554 const char *op_type_str; 4555 struct test_time_stats time_st; 4556 4557 memset(&time_st, 0, sizeof(struct test_time_stats)); 4558 time_st.enq_sw_min_time = UINT64_MAX; 4559 time_st.enq_acc_min_time = UINT64_MAX; 4560 time_st.deq_min_time = UINT64_MAX; 4561 4562 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4563 "BURST_SIZE should be <= %u", MAX_BURST); 4564 4565 rte_bbdev_info_get(ad->dev_id, &info); 4566 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4567 4568 op_type_str = rte_bbdev_op_type_str(op_type); 4569 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4570 4571 printf("+ ------------------------------------------------------- +\n"); 4572 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4573 info.dev_name, burst_sz, num_to_process, op_type_str); 4574 4575 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4576 iter = offload_latency_test_dec(op_params->mp, bufs, 4577 op_params->ref_dec_op, ad->dev_id, queue_id, 4578 num_to_process, burst_sz, &time_st); 4579 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 4580 iter = offload_latency_test_enc(op_params->mp, bufs, 4581 op_params->ref_enc_op, ad->dev_id, queue_id, 4582 num_to_process, burst_sz, &time_st); 4583 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4584 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 4585 op_params->ref_enc_op, ad->dev_id, queue_id, 4586 num_to_process, burst_sz, &time_st); 4587 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4588 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 4589 op_params->ref_dec_op, ad->dev_id, queue_id, 4590 num_to_process, burst_sz, &time_st); 4591 else 4592 iter = offload_latency_test_enc(op_params->mp, bufs, 4593 op_params->ref_enc_op, ad->dev_id, queue_id, 4594 num_to_process, burst_sz, &time_st); 4595 4596 if (iter <= 0) 4597 return TEST_FAILED; 4598 4599 printf("Enqueue driver offload cost latency:\n" 4600 "\tavg: %lg cycles, %lg us\n" 4601 "\tmin: %lg cycles, %lg us\n" 4602 "\tmax: %lg cycles, %lg us\n" 4603 "Enqueue accelerator offload cost latency:\n" 4604 "\tavg: %lg cycles, %lg us\n" 4605 "\tmin: %lg cycles, %lg us\n" 4606 "\tmax: %lg cycles, %lg us\n", 4607 (double)time_st.enq_sw_total_time / (double)iter, 4608 (double)(time_st.enq_sw_total_time * 1000000) / 4609 (double)iter / (double)rte_get_tsc_hz(), 4610 (double)time_st.enq_sw_min_time, 4611 (double)(time_st.enq_sw_min_time * 1000000) / 4612 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 4613 (double)(time_st.enq_sw_max_time * 1000000) / 4614 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 4615 (double)iter, 4616 (double)(time_st.enq_acc_total_time * 1000000) / 4617 (double)iter / (double)rte_get_tsc_hz(), 4618 (double)time_st.enq_acc_min_time, 4619 (double)(time_st.enq_acc_min_time * 1000000) / 4620 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 4621 (double)(time_st.enq_acc_max_time * 1000000) / 4622 rte_get_tsc_hz()); 4623 4624 printf("Dequeue offload cost latency - one op:\n" 4625 "\tavg: %lg cycles, %lg us\n" 4626 "\tmin: %lg cycles, %lg us\n" 4627 "\tmax: %lg cycles, %lg us\n", 4628 (double)time_st.deq_total_time / (double)iter, 4629 (double)(time_st.deq_total_time * 1000000) / 4630 (double)iter / (double)rte_get_tsc_hz(), 4631 (double)time_st.deq_min_time, 4632 (double)(time_st.deq_min_time * 1000000) / 4633 rte_get_tsc_hz(), (double)time_st.deq_max_time, 4634 (double)(time_st.deq_max_time * 1000000) / 4635 rte_get_tsc_hz()); 4636 4637 return TEST_SUCCESS; 4638 #endif 4639 } 4640 4641 #ifdef RTE_BBDEV_OFFLOAD_COST 4642 static int 4643 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 4644 const uint16_t num_to_process, uint16_t burst_sz, 4645 uint64_t *deq_total_time, uint64_t *deq_min_time, 4646 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4647 { 4648 int i, deq_total; 4649 struct rte_bbdev_dec_op *ops[MAX_BURST]; 4650 uint64_t deq_start_time, deq_last_time; 4651 4652 /* Test deq offload latency from an empty queue */ 4653 4654 for (i = 0, deq_total = 0; deq_total < num_to_process; 4655 ++i, deq_total += burst_sz) { 4656 deq_start_time = rte_rdtsc_precise(); 4657 4658 if (unlikely(num_to_process - deq_total < burst_sz)) 4659 burst_sz = num_to_process - deq_total; 4660 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4661 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 4662 burst_sz); 4663 else 4664 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 4665 burst_sz); 4666 4667 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4668 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4669 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4670 *deq_total_time += deq_last_time; 4671 } 4672 4673 return i; 4674 } 4675 4676 static int 4677 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 4678 const uint16_t num_to_process, uint16_t burst_sz, 4679 uint64_t *deq_total_time, uint64_t *deq_min_time, 4680 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4681 { 4682 int i, deq_total; 4683 struct rte_bbdev_enc_op *ops[MAX_BURST]; 4684 uint64_t deq_start_time, deq_last_time; 4685 4686 /* Test deq offload latency from an empty queue */ 4687 for (i = 0, deq_total = 0; deq_total < num_to_process; 4688 ++i, deq_total += burst_sz) { 4689 deq_start_time = rte_rdtsc_precise(); 4690 4691 if (unlikely(num_to_process - deq_total < burst_sz)) 4692 burst_sz = num_to_process - deq_total; 4693 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4694 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 4695 burst_sz); 4696 else 4697 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 4698 burst_sz); 4699 4700 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4701 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4702 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4703 *deq_total_time += deq_last_time; 4704 } 4705 4706 return i; 4707 } 4708 4709 #endif 4710 4711 static int 4712 offload_latency_empty_q_test(struct active_device *ad, 4713 struct test_op_params *op_params) 4714 { 4715 #ifndef RTE_BBDEV_OFFLOAD_COST 4716 RTE_SET_USED(ad); 4717 RTE_SET_USED(op_params); 4718 printf("Offload latency empty dequeue test is disabled.\n"); 4719 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4720 return TEST_SKIPPED; 4721 #else 4722 int iter; 4723 uint64_t deq_total_time, deq_min_time, deq_max_time; 4724 uint16_t burst_sz = op_params->burst_sz; 4725 const uint16_t num_to_process = op_params->num_to_process; 4726 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4727 const uint16_t queue_id = ad->queue_ids[0]; 4728 struct rte_bbdev_info info; 4729 const char *op_type_str; 4730 4731 deq_total_time = deq_max_time = 0; 4732 deq_min_time = UINT64_MAX; 4733 4734 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4735 "BURST_SIZE should be <= %u", MAX_BURST); 4736 4737 rte_bbdev_info_get(ad->dev_id, &info); 4738 4739 op_type_str = rte_bbdev_op_type_str(op_type); 4740 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4741 4742 printf("+ ------------------------------------------------------- +\n"); 4743 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4744 info.dev_name, burst_sz, num_to_process, op_type_str); 4745 4746 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 4747 op_type == RTE_BBDEV_OP_LDPC_DEC) 4748 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 4749 num_to_process, burst_sz, &deq_total_time, 4750 &deq_min_time, &deq_max_time, op_type); 4751 else 4752 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 4753 num_to_process, burst_sz, &deq_total_time, 4754 &deq_min_time, &deq_max_time, op_type); 4755 4756 if (iter <= 0) 4757 return TEST_FAILED; 4758 4759 printf("Empty dequeue offload:\n" 4760 "\tavg: %lg cycles, %lg us\n" 4761 "\tmin: %lg cycles, %lg us\n" 4762 "\tmax: %lg cycles, %lg us\n", 4763 (double)deq_total_time / (double)iter, 4764 (double)(deq_total_time * 1000000) / (double)iter / 4765 (double)rte_get_tsc_hz(), (double)deq_min_time, 4766 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 4767 (double)deq_max_time, (double)(deq_max_time * 1000000) / 4768 rte_get_tsc_hz()); 4769 4770 return TEST_SUCCESS; 4771 #endif 4772 } 4773 4774 static int 4775 bler_tc(void) 4776 { 4777 return run_test_case(bler_test); 4778 } 4779 4780 static int 4781 throughput_tc(void) 4782 { 4783 return run_test_case(throughput_test); 4784 } 4785 4786 static int 4787 offload_cost_tc(void) 4788 { 4789 return run_test_case(offload_cost_test); 4790 } 4791 4792 static int 4793 offload_latency_empty_q_tc(void) 4794 { 4795 return run_test_case(offload_latency_empty_q_test); 4796 } 4797 4798 static int 4799 latency_tc(void) 4800 { 4801 return run_test_case(latency_test); 4802 } 4803 4804 static int 4805 interrupt_tc(void) 4806 { 4807 return run_test_case(throughput_test); 4808 } 4809 4810 static struct unit_test_suite bbdev_bler_testsuite = { 4811 .suite_name = "BBdev BLER Tests", 4812 .setup = testsuite_setup, 4813 .teardown = testsuite_teardown, 4814 .unit_test_cases = { 4815 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 4816 TEST_CASES_END() /**< NULL terminate unit test array */ 4817 } 4818 }; 4819 4820 static struct unit_test_suite bbdev_throughput_testsuite = { 4821 .suite_name = "BBdev Throughput Tests", 4822 .setup = testsuite_setup, 4823 .teardown = testsuite_teardown, 4824 .unit_test_cases = { 4825 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 4826 TEST_CASES_END() /**< NULL terminate unit test array */ 4827 } 4828 }; 4829 4830 static struct unit_test_suite bbdev_validation_testsuite = { 4831 .suite_name = "BBdev Validation Tests", 4832 .setup = testsuite_setup, 4833 .teardown = testsuite_teardown, 4834 .unit_test_cases = { 4835 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 4836 TEST_CASES_END() /**< NULL terminate unit test array */ 4837 } 4838 }; 4839 4840 static struct unit_test_suite bbdev_latency_testsuite = { 4841 .suite_name = "BBdev Latency Tests", 4842 .setup = testsuite_setup, 4843 .teardown = testsuite_teardown, 4844 .unit_test_cases = { 4845 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 4846 TEST_CASES_END() /**< NULL terminate unit test array */ 4847 } 4848 }; 4849 4850 static struct unit_test_suite bbdev_offload_cost_testsuite = { 4851 .suite_name = "BBdev Offload Cost Tests", 4852 .setup = testsuite_setup, 4853 .teardown = testsuite_teardown, 4854 .unit_test_cases = { 4855 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 4856 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 4857 TEST_CASES_END() /**< NULL terminate unit test array */ 4858 } 4859 }; 4860 4861 static struct unit_test_suite bbdev_interrupt_testsuite = { 4862 .suite_name = "BBdev Interrupt Tests", 4863 .setup = interrupt_testsuite_setup, 4864 .teardown = testsuite_teardown, 4865 .unit_test_cases = { 4866 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 4867 TEST_CASES_END() /**< NULL terminate unit test array */ 4868 } 4869 }; 4870 4871 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 4872 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 4873 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 4874 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 4875 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 4876 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 4877