1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <inttypes.h> 7 #include <math.h> 8 9 #include <rte_eal.h> 10 #include <rte_common.h> 11 #include <rte_dev.h> 12 #include <rte_launch.h> 13 #include <rte_bbdev.h> 14 #include <rte_cycles.h> 15 #include <rte_lcore.h> 16 #include <rte_malloc.h> 17 #include <rte_random.h> 18 #include <rte_hexdump.h> 19 #include <rte_interrupts.h> 20 21 #include "main.h" 22 #include "test_bbdev_vector.h" 23 24 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 25 26 #define MAX_QUEUES RTE_MAX_LCORE 27 #define TEST_REPETITIONS 100 28 #define WAIT_OFFLOAD_US 1000 29 30 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 31 #include <fpga_lte_fec.h> 32 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 33 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 34 #define VF_UL_4G_QUEUE_VALUE 4 35 #define VF_DL_4G_QUEUE_VALUE 4 36 #define UL_4G_BANDWIDTH 3 37 #define DL_4G_BANDWIDTH 3 38 #define UL_4G_LOAD_BALANCE 128 39 #define DL_4G_LOAD_BALANCE 128 40 #define FLR_4G_TIMEOUT 610 41 #endif 42 43 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 44 #include <rte_pmd_fpga_5gnr_fec.h> 45 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf") 46 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf") 47 #define VF_UL_5G_QUEUE_VALUE 4 48 #define VF_DL_5G_QUEUE_VALUE 4 49 #define UL_5G_BANDWIDTH 3 50 #define DL_5G_BANDWIDTH 3 51 #define UL_5G_LOAD_BALANCE 128 52 #define DL_5G_LOAD_BALANCE 128 53 #define FLR_5G_TIMEOUT 610 54 #endif 55 56 #ifdef RTE_BASEBAND_ACC100 57 #include <rte_acc100_cfg.h> 58 #define ACC100PF_DRIVER_NAME ("intel_acc100_pf") 59 #define ACC100VF_DRIVER_NAME ("intel_acc100_vf") 60 #define ACC100_QMGR_NUM_AQS 16 61 #define ACC100_QMGR_NUM_QGS 2 62 #define ACC100_QMGR_AQ_DEPTH 5 63 #define ACC100_QMGR_INVALID_IDX -1 64 #define ACC100_QMGR_RR 1 65 #define ACC100_QOS_GBR 0 66 #endif 67 68 #define OPS_CACHE_SIZE 256U 69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 70 71 #define SYNC_WAIT 0 72 #define SYNC_START 1 73 #define INVALID_OPAQUE -1 74 75 #define INVALID_QUEUE_ID -1 76 /* Increment for next code block in external HARQ memory */ 77 #define HARQ_INCR 32768 78 /* Headroom for filler LLRs insertion in HARQ buffer */ 79 #define FILLER_HEADROOM 1024 80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 89 90 static struct test_bbdev_vector test_vector; 91 92 /* Switch between PMD and Interrupt for throughput TC */ 93 static bool intr_enabled; 94 95 /* LLR arithmetic representation for numerical conversion */ 96 static int ldpc_llr_decimals; 97 static int ldpc_llr_size; 98 /* Keep track of the LDPC decoder device capability flag */ 99 static uint32_t ldpc_cap_flags; 100 101 /* Represents tested active devices */ 102 static struct active_device { 103 const char *driver_name; 104 uint8_t dev_id; 105 uint16_t supported_ops; 106 uint16_t queue_ids[MAX_QUEUES]; 107 uint16_t nb_queues; 108 struct rte_mempool *ops_mempool; 109 struct rte_mempool *in_mbuf_pool; 110 struct rte_mempool *hard_out_mbuf_pool; 111 struct rte_mempool *soft_out_mbuf_pool; 112 struct rte_mempool *harq_in_mbuf_pool; 113 struct rte_mempool *harq_out_mbuf_pool; 114 } active_devs[RTE_BBDEV_MAX_DEVS]; 115 116 static uint8_t nb_active_devs; 117 118 /* Data buffers used by BBDEV ops */ 119 struct test_buffers { 120 struct rte_bbdev_op_data *inputs; 121 struct rte_bbdev_op_data *hard_outputs; 122 struct rte_bbdev_op_data *soft_outputs; 123 struct rte_bbdev_op_data *harq_inputs; 124 struct rte_bbdev_op_data *harq_outputs; 125 }; 126 127 /* Operation parameters specific for given test case */ 128 struct test_op_params { 129 struct rte_mempool *mp; 130 struct rte_bbdev_dec_op *ref_dec_op; 131 struct rte_bbdev_enc_op *ref_enc_op; 132 uint16_t burst_sz; 133 uint16_t num_to_process; 134 uint16_t num_lcores; 135 int vector_mask; 136 rte_atomic16_t sync; 137 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 138 }; 139 140 /* Contains per lcore params */ 141 struct thread_params { 142 uint8_t dev_id; 143 uint16_t queue_id; 144 uint32_t lcore_id; 145 uint64_t start_time; 146 double ops_per_sec; 147 double mbps; 148 uint8_t iter_count; 149 double iter_average; 150 double bler; 151 rte_atomic16_t nb_dequeued; 152 rte_atomic16_t processing_status; 153 rte_atomic16_t burst_sz; 154 struct test_op_params *op_params; 155 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 156 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 157 }; 158 159 #ifdef RTE_BBDEV_OFFLOAD_COST 160 /* Stores time statistics */ 161 struct test_time_stats { 162 /* Stores software enqueue total working time */ 163 uint64_t enq_sw_total_time; 164 /* Stores minimum value of software enqueue working time */ 165 uint64_t enq_sw_min_time; 166 /* Stores maximum value of software enqueue working time */ 167 uint64_t enq_sw_max_time; 168 /* Stores turbo enqueue total working time */ 169 uint64_t enq_acc_total_time; 170 /* Stores minimum value of accelerator enqueue working time */ 171 uint64_t enq_acc_min_time; 172 /* Stores maximum value of accelerator enqueue working time */ 173 uint64_t enq_acc_max_time; 174 /* Stores dequeue total working time */ 175 uint64_t deq_total_time; 176 /* Stores minimum value of dequeue working time */ 177 uint64_t deq_min_time; 178 /* Stores maximum value of dequeue working time */ 179 uint64_t deq_max_time; 180 }; 181 #endif 182 183 typedef int (test_case_function)(struct active_device *ad, 184 struct test_op_params *op_params); 185 186 static inline void 187 mbuf_reset(struct rte_mbuf *m) 188 { 189 m->pkt_len = 0; 190 191 do { 192 m->data_len = 0; 193 m = m->next; 194 } while (m != NULL); 195 } 196 197 /* Read flag value 0/1 from bitmap */ 198 static inline bool 199 check_bit(uint32_t bitmap, uint32_t bitmask) 200 { 201 return bitmap & bitmask; 202 } 203 204 static inline void 205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 206 { 207 ad->supported_ops |= (1 << op_type); 208 } 209 210 static inline bool 211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 212 { 213 return ad->supported_ops & (1 << op_type); 214 } 215 216 static inline bool 217 flags_match(uint32_t flags_req, uint32_t flags_present) 218 { 219 return (flags_req & flags_present) == flags_req; 220 } 221 222 static void 223 clear_soft_out_cap(uint32_t *op_flags) 224 { 225 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 226 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 227 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 228 } 229 230 static int 231 check_dev_cap(const struct rte_bbdev_info *dev_info) 232 { 233 unsigned int i; 234 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 235 nb_harq_inputs, nb_harq_outputs; 236 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 237 238 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 239 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 240 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 241 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 242 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 243 244 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 245 if (op_cap->type != test_vector.op_type) 246 continue; 247 248 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 249 const struct rte_bbdev_op_cap_turbo_dec *cap = 250 &op_cap->cap.turbo_dec; 251 /* Ignore lack of soft output capability, just skip 252 * checking if soft output is valid. 253 */ 254 if ((test_vector.turbo_dec.op_flags & 255 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 256 !(cap->capability_flags & 257 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 258 printf( 259 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 260 dev_info->dev_name); 261 clear_soft_out_cap( 262 &test_vector.turbo_dec.op_flags); 263 } 264 265 if (!flags_match(test_vector.turbo_dec.op_flags, 266 cap->capability_flags)) 267 return TEST_FAILED; 268 if (nb_inputs > cap->num_buffers_src) { 269 printf("Too many inputs defined: %u, max: %u\n", 270 nb_inputs, cap->num_buffers_src); 271 return TEST_FAILED; 272 } 273 if (nb_soft_outputs > cap->num_buffers_soft_out && 274 (test_vector.turbo_dec.op_flags & 275 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 276 printf( 277 "Too many soft outputs defined: %u, max: %u\n", 278 nb_soft_outputs, 279 cap->num_buffers_soft_out); 280 return TEST_FAILED; 281 } 282 if (nb_hard_outputs > cap->num_buffers_hard_out) { 283 printf( 284 "Too many hard outputs defined: %u, max: %u\n", 285 nb_hard_outputs, 286 cap->num_buffers_hard_out); 287 return TEST_FAILED; 288 } 289 if (intr_enabled && !(cap->capability_flags & 290 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 291 printf( 292 "Dequeue interrupts are not supported!\n"); 293 return TEST_FAILED; 294 } 295 296 return TEST_SUCCESS; 297 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 298 const struct rte_bbdev_op_cap_turbo_enc *cap = 299 &op_cap->cap.turbo_enc; 300 301 if (!flags_match(test_vector.turbo_enc.op_flags, 302 cap->capability_flags)) 303 return TEST_FAILED; 304 if (nb_inputs > cap->num_buffers_src) { 305 printf("Too many inputs defined: %u, max: %u\n", 306 nb_inputs, cap->num_buffers_src); 307 return TEST_FAILED; 308 } 309 if (nb_hard_outputs > cap->num_buffers_dst) { 310 printf( 311 "Too many hard outputs defined: %u, max: %u\n", 312 nb_hard_outputs, cap->num_buffers_dst); 313 return TEST_FAILED; 314 } 315 if (intr_enabled && !(cap->capability_flags & 316 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 317 printf( 318 "Dequeue interrupts are not supported!\n"); 319 return TEST_FAILED; 320 } 321 322 return TEST_SUCCESS; 323 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 324 const struct rte_bbdev_op_cap_ldpc_enc *cap = 325 &op_cap->cap.ldpc_enc; 326 327 if (!flags_match(test_vector.ldpc_enc.op_flags, 328 cap->capability_flags)){ 329 printf("Flag Mismatch\n"); 330 return TEST_FAILED; 331 } 332 if (nb_inputs > cap->num_buffers_src) { 333 printf("Too many inputs defined: %u, max: %u\n", 334 nb_inputs, cap->num_buffers_src); 335 return TEST_FAILED; 336 } 337 if (nb_hard_outputs > cap->num_buffers_dst) { 338 printf( 339 "Too many hard outputs defined: %u, max: %u\n", 340 nb_hard_outputs, cap->num_buffers_dst); 341 return TEST_FAILED; 342 } 343 if (intr_enabled && !(cap->capability_flags & 344 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 345 printf( 346 "Dequeue interrupts are not supported!\n"); 347 return TEST_FAILED; 348 } 349 350 return TEST_SUCCESS; 351 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 352 const struct rte_bbdev_op_cap_ldpc_dec *cap = 353 &op_cap->cap.ldpc_dec; 354 355 if (!flags_match(test_vector.ldpc_dec.op_flags, 356 cap->capability_flags)){ 357 printf("Flag Mismatch\n"); 358 return TEST_FAILED; 359 } 360 if (nb_inputs > cap->num_buffers_src) { 361 printf("Too many inputs defined: %u, max: %u\n", 362 nb_inputs, cap->num_buffers_src); 363 return TEST_FAILED; 364 } 365 if (nb_hard_outputs > cap->num_buffers_hard_out) { 366 printf( 367 "Too many hard outputs defined: %u, max: %u\n", 368 nb_hard_outputs, 369 cap->num_buffers_hard_out); 370 return TEST_FAILED; 371 } 372 if (nb_harq_inputs > cap->num_buffers_hard_out) { 373 printf( 374 "Too many HARQ inputs defined: %u, max: %u\n", 375 nb_harq_inputs, 376 cap->num_buffers_hard_out); 377 return TEST_FAILED; 378 } 379 if (nb_harq_outputs > cap->num_buffers_hard_out) { 380 printf( 381 "Too many HARQ outputs defined: %u, max: %u\n", 382 nb_harq_outputs, 383 cap->num_buffers_hard_out); 384 return TEST_FAILED; 385 } 386 if (intr_enabled && !(cap->capability_flags & 387 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 388 printf( 389 "Dequeue interrupts are not supported!\n"); 390 return TEST_FAILED; 391 } 392 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 393 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 394 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 395 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 396 ))) { 397 printf("Skip loop-back with interrupt\n"); 398 return TEST_FAILED; 399 } 400 return TEST_SUCCESS; 401 } 402 } 403 404 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 405 return TEST_SUCCESS; /* Special case for NULL device */ 406 407 return TEST_FAILED; 408 } 409 410 /* calculates optimal mempool size not smaller than the val */ 411 static unsigned int 412 optimal_mempool_size(unsigned int val) 413 { 414 return rte_align32pow2(val + 1) - 1; 415 } 416 417 /* allocates mbuf mempool for inputs and outputs */ 418 static struct rte_mempool * 419 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 420 int socket_id, unsigned int mbuf_pool_size, 421 const char *op_type_str) 422 { 423 unsigned int i; 424 uint32_t max_seg_sz = 0; 425 char pool_name[RTE_MEMPOOL_NAMESIZE]; 426 427 /* find max input segment size */ 428 for (i = 0; i < entries->nb_segments; ++i) 429 if (entries->segments[i].length > max_seg_sz) 430 max_seg_sz = entries->segments[i].length; 431 432 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 433 dev_id); 434 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 435 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 436 + FILLER_HEADROOM, 437 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 438 } 439 440 static int 441 create_mempools(struct active_device *ad, int socket_id, 442 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 443 { 444 struct rte_mempool *mp; 445 unsigned int ops_pool_size, mbuf_pool_size = 0; 446 char pool_name[RTE_MEMPOOL_NAMESIZE]; 447 const char *op_type_str; 448 enum rte_bbdev_op_type op_type = org_op_type; 449 450 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 451 struct op_data_entries *hard_out = 452 &test_vector.entries[DATA_HARD_OUTPUT]; 453 struct op_data_entries *soft_out = 454 &test_vector.entries[DATA_SOFT_OUTPUT]; 455 struct op_data_entries *harq_in = 456 &test_vector.entries[DATA_HARQ_INPUT]; 457 struct op_data_entries *harq_out = 458 &test_vector.entries[DATA_HARQ_OUTPUT]; 459 460 /* allocate ops mempool */ 461 ops_pool_size = optimal_mempool_size(RTE_MAX( 462 /* Ops used plus 1 reference op */ 463 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 464 /* Minimal cache size plus 1 reference op */ 465 (unsigned int)(1.5 * rte_lcore_count() * 466 OPS_CACHE_SIZE + 1)), 467 OPS_POOL_SIZE_MIN)); 468 469 if (org_op_type == RTE_BBDEV_OP_NONE) 470 op_type = RTE_BBDEV_OP_TURBO_ENC; 471 472 op_type_str = rte_bbdev_op_type_str(op_type); 473 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 474 475 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 476 ad->dev_id); 477 mp = rte_bbdev_op_pool_create(pool_name, op_type, 478 ops_pool_size, OPS_CACHE_SIZE, socket_id); 479 TEST_ASSERT_NOT_NULL(mp, 480 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 481 ops_pool_size, 482 ad->dev_id, 483 socket_id); 484 ad->ops_mempool = mp; 485 486 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 487 if (org_op_type == RTE_BBDEV_OP_NONE) 488 return TEST_SUCCESS; 489 490 /* Inputs */ 491 if (in->nb_segments > 0) { 492 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 493 in->nb_segments); 494 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 495 mbuf_pool_size, "in"); 496 TEST_ASSERT_NOT_NULL(mp, 497 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 498 mbuf_pool_size, 499 ad->dev_id, 500 socket_id); 501 ad->in_mbuf_pool = mp; 502 } 503 504 /* Hard outputs */ 505 if (hard_out->nb_segments > 0) { 506 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 507 hard_out->nb_segments); 508 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 509 mbuf_pool_size, 510 "hard_out"); 511 TEST_ASSERT_NOT_NULL(mp, 512 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 513 mbuf_pool_size, 514 ad->dev_id, 515 socket_id); 516 ad->hard_out_mbuf_pool = mp; 517 } 518 519 /* Soft outputs */ 520 if (soft_out->nb_segments > 0) { 521 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 522 soft_out->nb_segments); 523 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 524 mbuf_pool_size, 525 "soft_out"); 526 TEST_ASSERT_NOT_NULL(mp, 527 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 528 mbuf_pool_size, 529 ad->dev_id, 530 socket_id); 531 ad->soft_out_mbuf_pool = mp; 532 } 533 534 /* HARQ inputs */ 535 if (harq_in->nb_segments > 0) { 536 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 537 harq_in->nb_segments); 538 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 539 mbuf_pool_size, 540 "harq_in"); 541 TEST_ASSERT_NOT_NULL(mp, 542 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 543 mbuf_pool_size, 544 ad->dev_id, 545 socket_id); 546 ad->harq_in_mbuf_pool = mp; 547 } 548 549 /* HARQ outputs */ 550 if (harq_out->nb_segments > 0) { 551 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 552 harq_out->nb_segments); 553 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 554 mbuf_pool_size, 555 "harq_out"); 556 TEST_ASSERT_NOT_NULL(mp, 557 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 558 mbuf_pool_size, 559 ad->dev_id, 560 socket_id); 561 ad->harq_out_mbuf_pool = mp; 562 } 563 564 return TEST_SUCCESS; 565 } 566 567 static int 568 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 569 struct test_bbdev_vector *vector) 570 { 571 int ret; 572 unsigned int queue_id; 573 struct rte_bbdev_queue_conf qconf; 574 struct active_device *ad = &active_devs[nb_active_devs]; 575 unsigned int nb_queues; 576 enum rte_bbdev_op_type op_type = vector->op_type; 577 578 /* Configure fpga lte fec with PF & VF values 579 * if '-i' flag is set and using fpga device 580 */ 581 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 582 if ((get_init_device() == true) && 583 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 584 struct rte_fpga_lte_fec_conf conf; 585 unsigned int i; 586 587 printf("Configure FPGA LTE FEC Driver %s with default values\n", 588 info->drv.driver_name); 589 590 /* clear default configuration before initialization */ 591 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 592 593 /* Set PF mode : 594 * true if PF is used for data plane 595 * false for VFs 596 */ 597 conf.pf_mode_en = true; 598 599 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 600 /* Number of UL queues per VF (fpga supports 8 VFs) */ 601 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 602 /* Number of DL queues per VF (fpga supports 8 VFs) */ 603 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 604 } 605 606 /* UL bandwidth. Needed for schedule algorithm */ 607 conf.ul_bandwidth = UL_4G_BANDWIDTH; 608 /* DL bandwidth */ 609 conf.dl_bandwidth = DL_4G_BANDWIDTH; 610 611 /* UL & DL load Balance Factor to 64 */ 612 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 613 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 614 615 /**< FLR timeout value */ 616 conf.flr_time_out = FLR_4G_TIMEOUT; 617 618 /* setup FPGA PF with configuration information */ 619 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf); 620 TEST_ASSERT_SUCCESS(ret, 621 "Failed to configure 4G FPGA PF for bbdev %s", 622 info->dev_name); 623 } 624 #endif 625 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 626 if ((get_init_device() == true) && 627 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) { 628 struct rte_fpga_5gnr_fec_conf conf; 629 unsigned int i; 630 631 printf("Configure FPGA 5GNR FEC Driver %s with default values\n", 632 info->drv.driver_name); 633 634 /* clear default configuration before initialization */ 635 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf)); 636 637 /* Set PF mode : 638 * true if PF is used for data plane 639 * false for VFs 640 */ 641 conf.pf_mode_en = true; 642 643 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { 644 /* Number of UL queues per VF (fpga supports 8 VFs) */ 645 conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE; 646 /* Number of DL queues per VF (fpga supports 8 VFs) */ 647 conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE; 648 } 649 650 /* UL bandwidth. Needed for schedule algorithm */ 651 conf.ul_bandwidth = UL_5G_BANDWIDTH; 652 /* DL bandwidth */ 653 conf.dl_bandwidth = DL_5G_BANDWIDTH; 654 655 /* UL & DL load Balance Factor to 64 */ 656 conf.ul_load_balance = UL_5G_LOAD_BALANCE; 657 conf.dl_load_balance = DL_5G_LOAD_BALANCE; 658 659 /**< FLR timeout value */ 660 conf.flr_time_out = FLR_5G_TIMEOUT; 661 662 /* setup FPGA PF with configuration information */ 663 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf); 664 TEST_ASSERT_SUCCESS(ret, 665 "Failed to configure 5G FPGA PF for bbdev %s", 666 info->dev_name); 667 } 668 #endif 669 #ifdef RTE_BASEBAND_ACC100 670 if ((get_init_device() == true) && 671 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) { 672 struct rte_acc100_conf conf; 673 unsigned int i; 674 675 printf("Configure ACC100 FEC Driver %s with default values\n", 676 info->drv.driver_name); 677 678 /* clear default configuration before initialization */ 679 memset(&conf, 0, sizeof(struct rte_acc100_conf)); 680 681 /* Always set in PF mode for built-in configuration */ 682 conf.pf_mode_en = true; 683 for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) { 684 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 685 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 686 conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR; 687 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 688 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 689 conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR; 690 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 691 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 692 conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR; 693 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 694 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 695 conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR; 696 } 697 698 conf.input_pos_llr_1_bit = true; 699 conf.output_pos_llr_1_bit = true; 700 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 701 702 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 703 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 704 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 705 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 706 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 707 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 708 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 709 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 710 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 711 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 712 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 713 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 714 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 715 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 716 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 717 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 718 719 /* setup PF with configuration information */ 720 ret = rte_acc100_configure(info->dev_name, &conf); 721 TEST_ASSERT_SUCCESS(ret, 722 "Failed to configure ACC100 PF for bbdev %s", 723 info->dev_name); 724 } 725 #endif 726 /* Let's refresh this now this is configured */ 727 rte_bbdev_info_get(dev_id, info); 728 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 729 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 730 731 /* setup device */ 732 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 733 if (ret < 0) { 734 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 735 dev_id, nb_queues, info->socket_id, ret); 736 return TEST_FAILED; 737 } 738 739 /* configure interrupts if needed */ 740 if (intr_enabled) { 741 ret = rte_bbdev_intr_enable(dev_id); 742 if (ret < 0) { 743 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 744 ret); 745 return TEST_FAILED; 746 } 747 } 748 749 /* setup device queues */ 750 qconf.socket = info->socket_id; 751 qconf.queue_size = info->drv.default_queue_conf.queue_size; 752 qconf.priority = 0; 753 qconf.deferred_start = 0; 754 qconf.op_type = op_type; 755 756 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 757 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 758 if (ret != 0) { 759 printf( 760 "Allocated all queues (id=%u) at prio%u on dev%u\n", 761 queue_id, qconf.priority, dev_id); 762 qconf.priority++; 763 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 764 &qconf); 765 } 766 if (ret != 0) { 767 printf("All queues on dev %u allocated: %u\n", 768 dev_id, queue_id); 769 break; 770 } 771 ad->queue_ids[queue_id] = queue_id; 772 } 773 TEST_ASSERT(queue_id != 0, 774 "ERROR Failed to configure any queues on dev %u", 775 dev_id); 776 ad->nb_queues = queue_id; 777 778 set_avail_op(ad, op_type); 779 780 return TEST_SUCCESS; 781 } 782 783 static int 784 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 785 struct test_bbdev_vector *vector) 786 { 787 int ret; 788 789 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 790 active_devs[nb_active_devs].dev_id = dev_id; 791 792 ret = add_bbdev_dev(dev_id, info, vector); 793 if (ret == TEST_SUCCESS) 794 ++nb_active_devs; 795 return ret; 796 } 797 798 static uint8_t 799 populate_active_devices(void) 800 { 801 int ret; 802 uint8_t dev_id; 803 uint8_t nb_devs_added = 0; 804 struct rte_bbdev_info info; 805 806 RTE_BBDEV_FOREACH(dev_id) { 807 rte_bbdev_info_get(dev_id, &info); 808 809 if (check_dev_cap(&info)) { 810 printf( 811 "Device %d (%s) does not support specified capabilities\n", 812 dev_id, info.dev_name); 813 continue; 814 } 815 816 ret = add_active_device(dev_id, &info, &test_vector); 817 if (ret != 0) { 818 printf("Adding active bbdev %s skipped\n", 819 info.dev_name); 820 continue; 821 } 822 nb_devs_added++; 823 } 824 825 return nb_devs_added; 826 } 827 828 static int 829 read_test_vector(void) 830 { 831 int ret; 832 833 memset(&test_vector, 0, sizeof(test_vector)); 834 printf("Test vector file = %s\n", get_vector_filename()); 835 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 836 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 837 get_vector_filename()); 838 839 return TEST_SUCCESS; 840 } 841 842 static int 843 testsuite_setup(void) 844 { 845 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 846 847 if (populate_active_devices() == 0) { 848 printf("No suitable devices found!\n"); 849 return TEST_SKIPPED; 850 } 851 852 return TEST_SUCCESS; 853 } 854 855 static int 856 interrupt_testsuite_setup(void) 857 { 858 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 859 860 /* Enable interrupts */ 861 intr_enabled = true; 862 863 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 864 if (populate_active_devices() == 0 || 865 test_vector.op_type == RTE_BBDEV_OP_NONE) { 866 intr_enabled = false; 867 printf("No suitable devices found!\n"); 868 return TEST_SKIPPED; 869 } 870 871 return TEST_SUCCESS; 872 } 873 874 static void 875 testsuite_teardown(void) 876 { 877 uint8_t dev_id; 878 879 /* Unconfigure devices */ 880 RTE_BBDEV_FOREACH(dev_id) 881 rte_bbdev_close(dev_id); 882 883 /* Clear active devices structs. */ 884 memset(active_devs, 0, sizeof(active_devs)); 885 nb_active_devs = 0; 886 887 /* Disable interrupts */ 888 intr_enabled = false; 889 } 890 891 static int 892 ut_setup(void) 893 { 894 uint8_t i, dev_id; 895 896 for (i = 0; i < nb_active_devs; i++) { 897 dev_id = active_devs[i].dev_id; 898 /* reset bbdev stats */ 899 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 900 "Failed to reset stats of bbdev %u", dev_id); 901 /* start the device */ 902 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 903 "Failed to start bbdev %u", dev_id); 904 } 905 906 return TEST_SUCCESS; 907 } 908 909 static void 910 ut_teardown(void) 911 { 912 uint8_t i, dev_id; 913 struct rte_bbdev_stats stats; 914 915 for (i = 0; i < nb_active_devs; i++) { 916 dev_id = active_devs[i].dev_id; 917 /* read stats and print */ 918 rte_bbdev_stats_get(dev_id, &stats); 919 /* Stop the device */ 920 rte_bbdev_stop(dev_id); 921 } 922 } 923 924 static int 925 init_op_data_objs(struct rte_bbdev_op_data *bufs, 926 struct op_data_entries *ref_entries, 927 struct rte_mempool *mbuf_pool, const uint16_t n, 928 enum op_data_type op_type, uint16_t min_alignment) 929 { 930 int ret; 931 unsigned int i, j; 932 bool large_input = false; 933 934 for (i = 0; i < n; ++i) { 935 char *data; 936 struct op_data_buf *seg = &ref_entries->segments[0]; 937 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 938 TEST_ASSERT_NOT_NULL(m_head, 939 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 940 op_type, n * ref_entries->nb_segments, 941 mbuf_pool->size); 942 943 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) { 944 /* 945 * Special case when DPDK mbuf cannot handle 946 * the required input size 947 */ 948 printf("Warning: Larger input size than DPDK mbuf %d\n", 949 seg->length); 950 large_input = true; 951 } 952 bufs[i].data = m_head; 953 bufs[i].offset = 0; 954 bufs[i].length = 0; 955 956 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 957 if ((op_type == DATA_INPUT) && large_input) { 958 /* Allocate a fake overused mbuf */ 959 data = rte_malloc(NULL, seg->length, 0); 960 TEST_ASSERT_NOT_NULL(data, 961 "rte malloc failed with %u bytes", 962 seg->length); 963 memcpy(data, seg->addr, seg->length); 964 m_head->buf_addr = data; 965 m_head->buf_iova = rte_malloc_virt2iova(data); 966 m_head->data_off = 0; 967 m_head->data_len = seg->length; 968 } else { 969 data = rte_pktmbuf_append(m_head, seg->length); 970 TEST_ASSERT_NOT_NULL(data, 971 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 972 seg->length, op_type); 973 974 TEST_ASSERT(data == RTE_PTR_ALIGN( 975 data, min_alignment), 976 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 977 data, min_alignment); 978 rte_memcpy(data, seg->addr, seg->length); 979 } 980 981 bufs[i].length += seg->length; 982 983 for (j = 1; j < ref_entries->nb_segments; ++j) { 984 struct rte_mbuf *m_tail = 985 rte_pktmbuf_alloc(mbuf_pool); 986 TEST_ASSERT_NOT_NULL(m_tail, 987 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 988 op_type, 989 n * ref_entries->nb_segments, 990 mbuf_pool->size); 991 seg += 1; 992 993 data = rte_pktmbuf_append(m_tail, seg->length); 994 TEST_ASSERT_NOT_NULL(data, 995 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 996 seg->length, op_type); 997 998 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 999 min_alignment), 1000 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1001 data, min_alignment); 1002 rte_memcpy(data, seg->addr, seg->length); 1003 bufs[i].length += seg->length; 1004 1005 ret = rte_pktmbuf_chain(m_head, m_tail); 1006 TEST_ASSERT_SUCCESS(ret, 1007 "Couldn't chain mbufs from %d data type mbuf pool", 1008 op_type); 1009 } 1010 } else { 1011 1012 /* allocate chained-mbuf for output buffer */ 1013 for (j = 1; j < ref_entries->nb_segments; ++j) { 1014 struct rte_mbuf *m_tail = 1015 rte_pktmbuf_alloc(mbuf_pool); 1016 TEST_ASSERT_NOT_NULL(m_tail, 1017 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1018 op_type, 1019 n * ref_entries->nb_segments, 1020 mbuf_pool->size); 1021 1022 ret = rte_pktmbuf_chain(m_head, m_tail); 1023 TEST_ASSERT_SUCCESS(ret, 1024 "Couldn't chain mbufs from %d data type mbuf pool", 1025 op_type); 1026 } 1027 } 1028 } 1029 1030 return 0; 1031 } 1032 1033 static int 1034 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 1035 const int socket) 1036 { 1037 int i; 1038 1039 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 1040 if (*buffers == NULL) { 1041 printf("WARNING: Failed to allocate op_data on socket %d\n", 1042 socket); 1043 /* try to allocate memory on other detected sockets */ 1044 for (i = 0; i < socket; i++) { 1045 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 1046 if (*buffers != NULL) 1047 break; 1048 } 1049 } 1050 1051 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 1052 } 1053 1054 static void 1055 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 1056 const uint16_t n, const int8_t max_llr_modulus) 1057 { 1058 uint16_t i, byte_idx; 1059 1060 for (i = 0; i < n; ++i) { 1061 struct rte_mbuf *m = input_ops[i].data; 1062 while (m != NULL) { 1063 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1064 input_ops[i].offset); 1065 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1066 ++byte_idx) 1067 llr[byte_idx] = round((double)max_llr_modulus * 1068 llr[byte_idx] / INT8_MAX); 1069 1070 m = m->next; 1071 } 1072 } 1073 } 1074 1075 /* 1076 * We may have to insert filler bits 1077 * when they are required by the HARQ assumption 1078 */ 1079 static void 1080 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 1081 const uint16_t n, struct test_op_params *op_params) 1082 { 1083 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 1084 1085 if (input_ops == NULL) 1086 return; 1087 /* No need to add filler if not required by device */ 1088 if (!(ldpc_cap_flags & 1089 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 1090 return; 1091 /* No need to add filler for loopback operation */ 1092 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1093 return; 1094 1095 uint16_t i, j, parity_offset; 1096 for (i = 0; i < n; ++i) { 1097 struct rte_mbuf *m = input_ops[i].data; 1098 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1099 input_ops[i].offset); 1100 parity_offset = (dec.basegraph == 1 ? 20 : 8) 1101 * dec.z_c - dec.n_filler; 1102 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 1103 m->data_len = new_hin_size; 1104 input_ops[i].length = new_hin_size; 1105 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 1106 j--) 1107 llr[j] = llr[j - dec.n_filler]; 1108 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1109 for (j = 0; j < dec.n_filler; j++) 1110 llr[parity_offset + j] = llr_max_pre_scaling; 1111 } 1112 } 1113 1114 static void 1115 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 1116 const uint16_t n, const int8_t llr_size, 1117 const int8_t llr_decimals) 1118 { 1119 if (input_ops == NULL) 1120 return; 1121 1122 uint16_t i, byte_idx; 1123 1124 int16_t llr_max, llr_min, llr_tmp; 1125 llr_max = (1 << (llr_size - 1)) - 1; 1126 llr_min = -llr_max; 1127 for (i = 0; i < n; ++i) { 1128 struct rte_mbuf *m = input_ops[i].data; 1129 while (m != NULL) { 1130 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1131 input_ops[i].offset); 1132 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1133 ++byte_idx) { 1134 1135 llr_tmp = llr[byte_idx]; 1136 if (llr_decimals == 4) 1137 llr_tmp *= 8; 1138 else if (llr_decimals == 2) 1139 llr_tmp *= 2; 1140 else if (llr_decimals == 0) 1141 llr_tmp /= 2; 1142 llr_tmp = RTE_MIN(llr_max, 1143 RTE_MAX(llr_min, llr_tmp)); 1144 llr[byte_idx] = (int8_t) llr_tmp; 1145 } 1146 1147 m = m->next; 1148 } 1149 } 1150 } 1151 1152 1153 1154 static int 1155 fill_queue_buffers(struct test_op_params *op_params, 1156 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1157 struct rte_mempool *soft_out_mp, 1158 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1159 uint16_t queue_id, 1160 const struct rte_bbdev_op_cap *capabilities, 1161 uint16_t min_alignment, const int socket_id) 1162 { 1163 int ret; 1164 enum op_data_type type; 1165 const uint16_t n = op_params->num_to_process; 1166 1167 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1168 in_mp, 1169 soft_out_mp, 1170 hard_out_mp, 1171 harq_in_mp, 1172 harq_out_mp, 1173 }; 1174 1175 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1176 &op_params->q_bufs[socket_id][queue_id].inputs, 1177 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1178 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1179 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1180 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1181 }; 1182 1183 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1184 struct op_data_entries *ref_entries = 1185 &test_vector.entries[type]; 1186 if (ref_entries->nb_segments == 0) 1187 continue; 1188 1189 ret = allocate_buffers_on_socket(queue_ops[type], 1190 n * sizeof(struct rte_bbdev_op_data), 1191 socket_id); 1192 TEST_ASSERT_SUCCESS(ret, 1193 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1194 1195 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1196 mbuf_pools[type], n, type, min_alignment); 1197 TEST_ASSERT_SUCCESS(ret, 1198 "Couldn't init rte_bbdev_op_data structs"); 1199 } 1200 1201 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1202 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1203 capabilities->cap.turbo_dec.max_llr_modulus); 1204 1205 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1206 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1207 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1208 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1209 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1210 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1211 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1212 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1213 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1214 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1215 if (!loopback && !llr_comp) 1216 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1217 ldpc_llr_size, ldpc_llr_decimals); 1218 if (!loopback && !harq_comp) 1219 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1220 ldpc_llr_size, ldpc_llr_decimals); 1221 if (!loopback) 1222 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1223 op_params); 1224 } 1225 1226 return 0; 1227 } 1228 1229 static void 1230 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1231 { 1232 unsigned int i, j; 1233 1234 rte_mempool_free(ad->ops_mempool); 1235 rte_mempool_free(ad->in_mbuf_pool); 1236 rte_mempool_free(ad->hard_out_mbuf_pool); 1237 rte_mempool_free(ad->soft_out_mbuf_pool); 1238 rte_mempool_free(ad->harq_in_mbuf_pool); 1239 rte_mempool_free(ad->harq_out_mbuf_pool); 1240 1241 for (i = 0; i < rte_lcore_count(); ++i) { 1242 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1243 rte_free(op_params->q_bufs[j][i].inputs); 1244 rte_free(op_params->q_bufs[j][i].hard_outputs); 1245 rte_free(op_params->q_bufs[j][i].soft_outputs); 1246 rte_free(op_params->q_bufs[j][i].harq_inputs); 1247 rte_free(op_params->q_bufs[j][i].harq_outputs); 1248 } 1249 } 1250 } 1251 1252 static void 1253 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1254 unsigned int start_idx, 1255 struct rte_bbdev_op_data *inputs, 1256 struct rte_bbdev_op_data *hard_outputs, 1257 struct rte_bbdev_op_data *soft_outputs, 1258 struct rte_bbdev_dec_op *ref_op) 1259 { 1260 unsigned int i; 1261 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1262 1263 for (i = 0; i < n; ++i) { 1264 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1265 ops[i]->turbo_dec.tb_params.ea = 1266 turbo_dec->tb_params.ea; 1267 ops[i]->turbo_dec.tb_params.eb = 1268 turbo_dec->tb_params.eb; 1269 ops[i]->turbo_dec.tb_params.k_pos = 1270 turbo_dec->tb_params.k_pos; 1271 ops[i]->turbo_dec.tb_params.k_neg = 1272 turbo_dec->tb_params.k_neg; 1273 ops[i]->turbo_dec.tb_params.c = 1274 turbo_dec->tb_params.c; 1275 ops[i]->turbo_dec.tb_params.c_neg = 1276 turbo_dec->tb_params.c_neg; 1277 ops[i]->turbo_dec.tb_params.cab = 1278 turbo_dec->tb_params.cab; 1279 ops[i]->turbo_dec.tb_params.r = 1280 turbo_dec->tb_params.r; 1281 } else { 1282 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1283 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1284 } 1285 1286 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1287 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1288 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1289 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1290 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1291 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1292 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1293 1294 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1295 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1296 if (soft_outputs != NULL) 1297 ops[i]->turbo_dec.soft_output = 1298 soft_outputs[start_idx + i]; 1299 } 1300 } 1301 1302 static void 1303 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1304 unsigned int start_idx, 1305 struct rte_bbdev_op_data *inputs, 1306 struct rte_bbdev_op_data *outputs, 1307 struct rte_bbdev_enc_op *ref_op) 1308 { 1309 unsigned int i; 1310 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1311 for (i = 0; i < n; ++i) { 1312 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1313 ops[i]->turbo_enc.tb_params.ea = 1314 turbo_enc->tb_params.ea; 1315 ops[i]->turbo_enc.tb_params.eb = 1316 turbo_enc->tb_params.eb; 1317 ops[i]->turbo_enc.tb_params.k_pos = 1318 turbo_enc->tb_params.k_pos; 1319 ops[i]->turbo_enc.tb_params.k_neg = 1320 turbo_enc->tb_params.k_neg; 1321 ops[i]->turbo_enc.tb_params.c = 1322 turbo_enc->tb_params.c; 1323 ops[i]->turbo_enc.tb_params.c_neg = 1324 turbo_enc->tb_params.c_neg; 1325 ops[i]->turbo_enc.tb_params.cab = 1326 turbo_enc->tb_params.cab; 1327 ops[i]->turbo_enc.tb_params.ncb_pos = 1328 turbo_enc->tb_params.ncb_pos; 1329 ops[i]->turbo_enc.tb_params.ncb_neg = 1330 turbo_enc->tb_params.ncb_neg; 1331 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1332 } else { 1333 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1334 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1335 ops[i]->turbo_enc.cb_params.ncb = 1336 turbo_enc->cb_params.ncb; 1337 } 1338 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1339 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1340 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1341 1342 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1343 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1344 } 1345 } 1346 1347 1348 /* Returns a random number drawn from a normal distribution 1349 * with mean of 0 and variance of 1 1350 * Marsaglia algorithm 1351 */ 1352 static double 1353 randn(int n) 1354 { 1355 double S, Z, U1, U2, u, v, fac; 1356 1357 do { 1358 U1 = (double)rand() / RAND_MAX; 1359 U2 = (double)rand() / RAND_MAX; 1360 u = 2. * U1 - 1.; 1361 v = 2. * U2 - 1.; 1362 S = u * u + v * v; 1363 } while (S >= 1 || S == 0); 1364 fac = sqrt(-2. * log(S) / S); 1365 Z = (n % 2) ? u * fac : v * fac; 1366 return Z; 1367 } 1368 1369 static inline double 1370 maxstar(double A, double B) 1371 { 1372 if (fabs(A - B) > 5) 1373 return RTE_MAX(A, B); 1374 else 1375 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1376 } 1377 1378 /* 1379 * Generate Qm LLRS for Qm==8 1380 * Modulation, AWGN and LLR estimation from max log development 1381 */ 1382 static void 1383 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1384 { 1385 int qm = 8; 1386 int qam = 256; 1387 int m, k; 1388 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1389 /* 5.1.4 of TS38.211 */ 1390 const double symbols_I[256] = { 1391 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1392 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1393 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1394 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1395 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1396 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1397 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1398 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1399 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1400 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1401 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1402 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1403 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1404 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1405 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1406 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1407 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1408 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1409 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1410 -13, -13, -15, -15, -13, -13, -15, -15}; 1411 const double symbols_Q[256] = { 1412 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1413 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1414 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1415 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1416 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1417 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1418 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1419 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1420 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1421 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1422 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1423 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1424 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1425 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1426 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1427 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1428 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1429 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1430 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1431 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1432 /* Average constellation point energy */ 1433 N0 *= 170.0; 1434 for (k = 0; k < qm; k++) 1435 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1436 /* 5.1.4 of TS38.211 */ 1437 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1438 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1439 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1440 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1441 /* AWGN channel */ 1442 I += sqrt(N0 / 2) * randn(0); 1443 Q += sqrt(N0 / 2) * randn(1); 1444 /* 1445 * Calculate the log of the probability that each of 1446 * the constellation points was transmitted 1447 */ 1448 for (m = 0; m < qam; m++) 1449 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1450 + pow(Q - symbols_Q[m], 2.0)) / N0; 1451 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1452 for (k = 0; k < qm; k++) { 1453 p0 = -999999; 1454 p1 = -999999; 1455 /* For each constellation point */ 1456 for (m = 0; m < qam; m++) { 1457 if ((m >> (qm - k - 1)) & 1) 1458 p1 = maxstar(p1, log_syml_prob[m]); 1459 else 1460 p0 = maxstar(p0, log_syml_prob[m]); 1461 } 1462 /* Calculate the LLR */ 1463 llr_ = p0 - p1; 1464 llr_ *= (1 << ldpc_llr_decimals); 1465 llr_ = round(llr_); 1466 if (llr_ > llr_max) 1467 llr_ = llr_max; 1468 if (llr_ < -llr_max) 1469 llr_ = -llr_max; 1470 llrs[qm * i + k] = (int8_t) llr_; 1471 } 1472 } 1473 1474 1475 /* 1476 * Generate Qm LLRS for Qm==6 1477 * Modulation, AWGN and LLR estimation from max log development 1478 */ 1479 static void 1480 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1481 { 1482 int qm = 6; 1483 int qam = 64; 1484 int m, k; 1485 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1486 /* 5.1.4 of TS38.211 */ 1487 const double symbols_I[64] = { 1488 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1489 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1490 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1491 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1492 -5, -5, -7, -7, -5, -5, -7, -7}; 1493 const double symbols_Q[64] = { 1494 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1495 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1496 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1497 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1498 -3, -1, -3, -1, -5, -7, -5, -7}; 1499 /* Average constellation point energy */ 1500 N0 *= 42.0; 1501 for (k = 0; k < qm; k++) 1502 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1503 /* 5.1.4 of TS38.211 */ 1504 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1505 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1506 /* AWGN channel */ 1507 I += sqrt(N0 / 2) * randn(0); 1508 Q += sqrt(N0 / 2) * randn(1); 1509 /* 1510 * Calculate the log of the probability that each of 1511 * the constellation points was transmitted 1512 */ 1513 for (m = 0; m < qam; m++) 1514 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1515 + pow(Q - symbols_Q[m], 2.0)) / N0; 1516 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1517 for (k = 0; k < qm; k++) { 1518 p0 = -999999; 1519 p1 = -999999; 1520 /* For each constellation point */ 1521 for (m = 0; m < qam; m++) { 1522 if ((m >> (qm - k - 1)) & 1) 1523 p1 = maxstar(p1, log_syml_prob[m]); 1524 else 1525 p0 = maxstar(p0, log_syml_prob[m]); 1526 } 1527 /* Calculate the LLR */ 1528 llr_ = p0 - p1; 1529 llr_ *= (1 << ldpc_llr_decimals); 1530 llr_ = round(llr_); 1531 if (llr_ > llr_max) 1532 llr_ = llr_max; 1533 if (llr_ < -llr_max) 1534 llr_ = -llr_max; 1535 llrs[qm * i + k] = (int8_t) llr_; 1536 } 1537 } 1538 1539 /* 1540 * Generate Qm LLRS for Qm==4 1541 * Modulation, AWGN and LLR estimation from max log development 1542 */ 1543 static void 1544 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1545 { 1546 int qm = 4; 1547 int qam = 16; 1548 int m, k; 1549 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1550 /* 5.1.4 of TS38.211 */ 1551 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1552 -1, -1, -3, -3, -1, -1, -3, -3}; 1553 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1554 1, 3, 1, 3, -1, -3, -1, -3}; 1555 /* Average constellation point energy */ 1556 N0 *= 10.0; 1557 for (k = 0; k < qm; k++) 1558 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1559 /* 5.1.4 of TS38.211 */ 1560 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1561 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1562 /* AWGN channel */ 1563 I += sqrt(N0 / 2) * randn(0); 1564 Q += sqrt(N0 / 2) * randn(1); 1565 /* 1566 * Calculate the log of the probability that each of 1567 * the constellation points was transmitted 1568 */ 1569 for (m = 0; m < qam; m++) 1570 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1571 + pow(Q - symbols_Q[m], 2.0)) / N0; 1572 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1573 for (k = 0; k < qm; k++) { 1574 p0 = -999999; 1575 p1 = -999999; 1576 /* For each constellation point */ 1577 for (m = 0; m < qam; m++) { 1578 if ((m >> (qm - k - 1)) & 1) 1579 p1 = maxstar(p1, log_syml_prob[m]); 1580 else 1581 p0 = maxstar(p0, log_syml_prob[m]); 1582 } 1583 /* Calculate the LLR */ 1584 llr_ = p0 - p1; 1585 llr_ *= (1 << ldpc_llr_decimals); 1586 llr_ = round(llr_); 1587 if (llr_ > llr_max) 1588 llr_ = llr_max; 1589 if (llr_ < -llr_max) 1590 llr_ = -llr_max; 1591 llrs[qm * i + k] = (int8_t) llr_; 1592 } 1593 } 1594 1595 static void 1596 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1597 { 1598 double b, b1, n; 1599 double coeff = 2.0 * sqrt(N0); 1600 1601 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1602 if (llrs[j] < 8 && llrs[j] > -8) 1603 return; 1604 1605 /* Note don't change sign here */ 1606 n = randn(j % 2); 1607 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1608 + coeff * n) / N0; 1609 b = b1 * (1 << ldpc_llr_decimals); 1610 b = round(b); 1611 if (b > llr_max) 1612 b = llr_max; 1613 if (b < -llr_max) 1614 b = -llr_max; 1615 llrs[j] = (int8_t) b; 1616 } 1617 1618 /* Generate LLR for a given SNR */ 1619 static void 1620 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1621 struct rte_bbdev_dec_op *ref_op) 1622 { 1623 struct rte_mbuf *m; 1624 uint16_t qm; 1625 uint32_t i, j, e, range; 1626 double N0, llr_max; 1627 1628 e = ref_op->ldpc_dec.cb_params.e; 1629 qm = ref_op->ldpc_dec.q_m; 1630 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1631 range = e / qm; 1632 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1633 1634 for (i = 0; i < n; ++i) { 1635 m = inputs[i].data; 1636 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1637 if (qm == 8) { 1638 for (j = 0; j < range; ++j) 1639 gen_qm8_llr(llrs, j, N0, llr_max); 1640 } else if (qm == 6) { 1641 for (j = 0; j < range; ++j) 1642 gen_qm6_llr(llrs, j, N0, llr_max); 1643 } else if (qm == 4) { 1644 for (j = 0; j < range; ++j) 1645 gen_qm4_llr(llrs, j, N0, llr_max); 1646 } else { 1647 for (j = 0; j < e; ++j) 1648 gen_qm2_llr(llrs, j, N0, llr_max); 1649 } 1650 } 1651 } 1652 1653 static void 1654 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1655 unsigned int start_idx, 1656 struct rte_bbdev_op_data *inputs, 1657 struct rte_bbdev_op_data *hard_outputs, 1658 struct rte_bbdev_op_data *soft_outputs, 1659 struct rte_bbdev_op_data *harq_inputs, 1660 struct rte_bbdev_op_data *harq_outputs, 1661 struct rte_bbdev_dec_op *ref_op) 1662 { 1663 unsigned int i; 1664 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1665 1666 for (i = 0; i < n; ++i) { 1667 if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1668 ops[i]->ldpc_dec.tb_params.ea = 1669 ldpc_dec->tb_params.ea; 1670 ops[i]->ldpc_dec.tb_params.eb = 1671 ldpc_dec->tb_params.eb; 1672 ops[i]->ldpc_dec.tb_params.c = 1673 ldpc_dec->tb_params.c; 1674 ops[i]->ldpc_dec.tb_params.cab = 1675 ldpc_dec->tb_params.cab; 1676 ops[i]->ldpc_dec.tb_params.r = 1677 ldpc_dec->tb_params.r; 1678 } else { 1679 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1680 } 1681 1682 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1683 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1684 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1685 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1686 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1687 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1688 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1689 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1690 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1691 1692 if (hard_outputs != NULL) 1693 ops[i]->ldpc_dec.hard_output = 1694 hard_outputs[start_idx + i]; 1695 if (inputs != NULL) 1696 ops[i]->ldpc_dec.input = 1697 inputs[start_idx + i]; 1698 if (soft_outputs != NULL) 1699 ops[i]->ldpc_dec.soft_output = 1700 soft_outputs[start_idx + i]; 1701 if (harq_inputs != NULL) 1702 ops[i]->ldpc_dec.harq_combined_input = 1703 harq_inputs[start_idx + i]; 1704 if (harq_outputs != NULL) 1705 ops[i]->ldpc_dec.harq_combined_output = 1706 harq_outputs[start_idx + i]; 1707 } 1708 } 1709 1710 1711 static void 1712 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1713 unsigned int start_idx, 1714 struct rte_bbdev_op_data *inputs, 1715 struct rte_bbdev_op_data *outputs, 1716 struct rte_bbdev_enc_op *ref_op) 1717 { 1718 unsigned int i; 1719 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1720 for (i = 0; i < n; ++i) { 1721 if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1722 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1723 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1724 ops[i]->ldpc_enc.tb_params.cab = 1725 ldpc_enc->tb_params.cab; 1726 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1727 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1728 } else { 1729 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1730 } 1731 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1732 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1733 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1734 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1735 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1736 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1737 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1738 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1739 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1740 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1741 } 1742 } 1743 1744 static int 1745 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1746 unsigned int order_idx, const int expected_status) 1747 { 1748 int status = op->status; 1749 /* ignore parity mismatch false alarms for long iterations */ 1750 if (get_iter_max() >= 10) { 1751 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1752 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1753 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1754 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1755 } 1756 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1757 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1758 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1759 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1760 } 1761 } 1762 1763 TEST_ASSERT(status == expected_status, 1764 "op_status (%d) != expected_status (%d)", 1765 op->status, expected_status); 1766 1767 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1768 "Ordering error, expected %p, got %p", 1769 (void *)(uintptr_t)order_idx, op->opaque_data); 1770 1771 return TEST_SUCCESS; 1772 } 1773 1774 static int 1775 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1776 unsigned int order_idx, const int expected_status) 1777 { 1778 TEST_ASSERT(op->status == expected_status, 1779 "op_status (%d) != expected_status (%d)", 1780 op->status, expected_status); 1781 1782 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1783 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1784 "Ordering error, expected %p, got %p", 1785 (void *)(uintptr_t)order_idx, op->opaque_data); 1786 1787 return TEST_SUCCESS; 1788 } 1789 1790 static inline int 1791 validate_op_chain(struct rte_bbdev_op_data *op, 1792 struct op_data_entries *orig_op) 1793 { 1794 uint8_t i; 1795 struct rte_mbuf *m = op->data; 1796 uint8_t nb_dst_segments = orig_op->nb_segments; 1797 uint32_t total_data_size = 0; 1798 1799 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1800 "Number of segments differ in original (%u) and filled (%u) op", 1801 nb_dst_segments, m->nb_segs); 1802 1803 /* Validate each mbuf segment length */ 1804 for (i = 0; i < nb_dst_segments; ++i) { 1805 /* Apply offset to the first mbuf segment */ 1806 uint16_t offset = (i == 0) ? op->offset : 0; 1807 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1808 total_data_size += orig_op->segments[i].length; 1809 1810 TEST_ASSERT(orig_op->segments[i].length == data_len, 1811 "Length of segment differ in original (%u) and filled (%u) op", 1812 orig_op->segments[i].length, data_len); 1813 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 1814 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 1815 data_len, 1816 "Output buffers (CB=%u) are not equal", i); 1817 m = m->next; 1818 } 1819 1820 /* Validate total mbuf pkt length */ 1821 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1822 TEST_ASSERT(total_data_size == pkt_len, 1823 "Length of data differ in original (%u) and filled (%u) op", 1824 total_data_size, pkt_len); 1825 1826 return TEST_SUCCESS; 1827 } 1828 1829 /* 1830 * Compute K0 for a given configuration for HARQ output length computation 1831 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 1832 */ 1833 static inline uint16_t 1834 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 1835 { 1836 if (rv_index == 0) 1837 return 0; 1838 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 1839 if (n_cb == n) { 1840 if (rv_index == 1) 1841 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 1842 else if (rv_index == 2) 1843 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 1844 else 1845 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 1846 } 1847 /* LBRM case - includes a division by N */ 1848 if (rv_index == 1) 1849 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 1850 / n) * z_c; 1851 else if (rv_index == 2) 1852 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 1853 / n) * z_c; 1854 else 1855 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 1856 / n) * z_c; 1857 } 1858 1859 /* HARQ output length including the Filler bits */ 1860 static inline uint16_t 1861 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 1862 { 1863 uint16_t k0 = 0; 1864 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 1865 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 1866 /* Compute RM out size and number of rows */ 1867 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1868 * ops_ld->z_c - ops_ld->n_filler; 1869 uint16_t deRmOutSize = RTE_MIN( 1870 k0 + ops_ld->cb_params.e + 1871 ((k0 > parity_offset) ? 1872 0 : ops_ld->n_filler), 1873 ops_ld->n_cb); 1874 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 1875 / ops_ld->z_c); 1876 uint16_t harq_output_len = numRows * ops_ld->z_c; 1877 return harq_output_len; 1878 } 1879 1880 static inline int 1881 validate_op_harq_chain(struct rte_bbdev_op_data *op, 1882 struct op_data_entries *orig_op, 1883 struct rte_bbdev_op_ldpc_dec *ops_ld) 1884 { 1885 uint8_t i; 1886 uint32_t j, jj, k; 1887 struct rte_mbuf *m = op->data; 1888 uint8_t nb_dst_segments = orig_op->nb_segments; 1889 uint32_t total_data_size = 0; 1890 int8_t *harq_orig, *harq_out, abs_harq_origin; 1891 uint32_t byte_error = 0, cum_error = 0, error; 1892 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 1893 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1894 uint16_t parity_offset; 1895 1896 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1897 "Number of segments differ in original (%u) and filled (%u) op", 1898 nb_dst_segments, m->nb_segs); 1899 1900 /* Validate each mbuf segment length */ 1901 for (i = 0; i < nb_dst_segments; ++i) { 1902 /* Apply offset to the first mbuf segment */ 1903 uint16_t offset = (i == 0) ? op->offset : 0; 1904 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1905 total_data_size += orig_op->segments[i].length; 1906 1907 TEST_ASSERT(orig_op->segments[i].length < 1908 (uint32_t)(data_len + 64), 1909 "Length of segment differ in original (%u) and filled (%u) op", 1910 orig_op->segments[i].length, data_len); 1911 harq_orig = (int8_t *) orig_op->segments[i].addr; 1912 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 1913 1914 if (!(ldpc_cap_flags & 1915 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 1916 ) || (ops_ld->op_flags & 1917 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1918 data_len -= ops_ld->z_c; 1919 parity_offset = data_len; 1920 } else { 1921 /* Compute RM out size and number of rows */ 1922 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1923 * ops_ld->z_c - ops_ld->n_filler; 1924 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 1925 ops_ld->n_filler; 1926 if (data_len > deRmOutSize) 1927 data_len = deRmOutSize; 1928 if (data_len > orig_op->segments[i].length) 1929 data_len = orig_op->segments[i].length; 1930 } 1931 /* 1932 * HARQ output can have minor differences 1933 * due to integer representation and related scaling 1934 */ 1935 for (j = 0, jj = 0; j < data_len; j++, jj++) { 1936 if (j == parity_offset) { 1937 /* Special Handling of the filler bits */ 1938 for (k = 0; k < ops_ld->n_filler; k++) { 1939 if (harq_out[jj] != 1940 llr_max_pre_scaling) { 1941 printf("HARQ Filler issue %d: %d %d\n", 1942 jj, harq_out[jj], 1943 llr_max); 1944 byte_error++; 1945 } 1946 jj++; 1947 } 1948 } 1949 if (!(ops_ld->op_flags & 1950 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1951 if (ldpc_llr_decimals > 1) 1952 harq_out[jj] = (harq_out[jj] + 1) 1953 >> (ldpc_llr_decimals - 1); 1954 /* Saturated to S7 */ 1955 if (harq_orig[j] > llr_max) 1956 harq_orig[j] = llr_max; 1957 if (harq_orig[j] < -llr_max) 1958 harq_orig[j] = -llr_max; 1959 } 1960 if (harq_orig[j] != harq_out[jj]) { 1961 error = (harq_orig[j] > harq_out[jj]) ? 1962 harq_orig[j] - harq_out[jj] : 1963 harq_out[jj] - harq_orig[j]; 1964 abs_harq_origin = harq_orig[j] > 0 ? 1965 harq_orig[j] : 1966 -harq_orig[j]; 1967 /* Residual quantization error */ 1968 if ((error > 8 && (abs_harq_origin < 1969 (llr_max - 16))) || 1970 (error > 16)) { 1971 printf("HARQ mismatch %d: exp %d act %d => %d\n", 1972 j, harq_orig[j], 1973 harq_out[jj], error); 1974 byte_error++; 1975 cum_error += error; 1976 } 1977 } 1978 } 1979 m = m->next; 1980 } 1981 1982 if (byte_error) 1983 TEST_ASSERT(byte_error <= 1, 1984 "HARQ output mismatch (%d) %d", 1985 byte_error, cum_error); 1986 1987 /* Validate total mbuf pkt length */ 1988 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1989 TEST_ASSERT(total_data_size < pkt_len + 64, 1990 "Length of data differ in original (%u) and filled (%u) op", 1991 total_data_size, pkt_len); 1992 1993 return TEST_SUCCESS; 1994 } 1995 1996 static int 1997 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 1998 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 1999 { 2000 unsigned int i; 2001 int ret; 2002 struct op_data_entries *hard_data_orig = 2003 &test_vector.entries[DATA_HARD_OUTPUT]; 2004 struct op_data_entries *soft_data_orig = 2005 &test_vector.entries[DATA_SOFT_OUTPUT]; 2006 struct rte_bbdev_op_turbo_dec *ops_td; 2007 struct rte_bbdev_op_data *hard_output; 2008 struct rte_bbdev_op_data *soft_output; 2009 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 2010 2011 for (i = 0; i < n; ++i) { 2012 ops_td = &ops[i]->turbo_dec; 2013 hard_output = &ops_td->hard_output; 2014 soft_output = &ops_td->soft_output; 2015 2016 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2017 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2018 "Returned iter_count (%d) > expected iter_count (%d)", 2019 ops_td->iter_count, ref_td->iter_count); 2020 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2021 TEST_ASSERT_SUCCESS(ret, 2022 "Checking status and ordering for decoder failed"); 2023 2024 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2025 hard_data_orig), 2026 "Hard output buffers (CB=%u) are not equal", 2027 i); 2028 2029 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 2030 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2031 soft_data_orig), 2032 "Soft output buffers (CB=%u) are not equal", 2033 i); 2034 } 2035 2036 return TEST_SUCCESS; 2037 } 2038 2039 /* Check Number of code blocks errors */ 2040 static int 2041 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2042 { 2043 unsigned int i; 2044 struct op_data_entries *hard_data_orig = 2045 &test_vector.entries[DATA_HARD_OUTPUT]; 2046 struct rte_bbdev_op_ldpc_dec *ops_td; 2047 struct rte_bbdev_op_data *hard_output; 2048 int errors = 0; 2049 struct rte_mbuf *m; 2050 2051 for (i = 0; i < n; ++i) { 2052 ops_td = &ops[i]->ldpc_dec; 2053 hard_output = &ops_td->hard_output; 2054 m = hard_output->data; 2055 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2056 hard_data_orig->segments[0].addr, 2057 hard_data_orig->segments[0].length)) 2058 errors++; 2059 } 2060 return errors; 2061 } 2062 2063 static int 2064 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2065 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2066 { 2067 unsigned int i; 2068 int ret; 2069 struct op_data_entries *hard_data_orig = 2070 &test_vector.entries[DATA_HARD_OUTPUT]; 2071 struct op_data_entries *soft_data_orig = 2072 &test_vector.entries[DATA_SOFT_OUTPUT]; 2073 struct op_data_entries *harq_data_orig = 2074 &test_vector.entries[DATA_HARQ_OUTPUT]; 2075 struct rte_bbdev_op_ldpc_dec *ops_td; 2076 struct rte_bbdev_op_data *hard_output; 2077 struct rte_bbdev_op_data *harq_output; 2078 struct rte_bbdev_op_data *soft_output; 2079 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 2080 2081 for (i = 0; i < n; ++i) { 2082 ops_td = &ops[i]->ldpc_dec; 2083 hard_output = &ops_td->hard_output; 2084 harq_output = &ops_td->harq_combined_output; 2085 soft_output = &ops_td->soft_output; 2086 2087 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2088 TEST_ASSERT_SUCCESS(ret, 2089 "Checking status and ordering for decoder failed"); 2090 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2091 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2092 "Returned iter_count (%d) > expected iter_count (%d)", 2093 ops_td->iter_count, ref_td->iter_count); 2094 /* 2095 * We can ignore output data when the decoding failed to 2096 * converge or for loop-back cases 2097 */ 2098 if (!check_bit(ops[i]->ldpc_dec.op_flags, 2099 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 2100 ) && ( 2101 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 2102 )) == 0) 2103 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2104 hard_data_orig), 2105 "Hard output buffers (CB=%u) are not equal", 2106 i); 2107 2108 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 2109 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2110 soft_data_orig), 2111 "Soft output buffers (CB=%u) are not equal", 2112 i); 2113 if (ref_op->ldpc_dec.op_flags & 2114 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 2115 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2116 harq_data_orig, ops_td), 2117 "HARQ output buffers (CB=%u) are not equal", 2118 i); 2119 } 2120 if (ref_op->ldpc_dec.op_flags & 2121 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 2122 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2123 harq_data_orig, ops_td), 2124 "HARQ output buffers (CB=%u) are not equal", 2125 i); 2126 2127 } 2128 2129 return TEST_SUCCESS; 2130 } 2131 2132 2133 static int 2134 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2135 struct rte_bbdev_enc_op *ref_op) 2136 { 2137 unsigned int i; 2138 int ret; 2139 struct op_data_entries *hard_data_orig = 2140 &test_vector.entries[DATA_HARD_OUTPUT]; 2141 2142 for (i = 0; i < n; ++i) { 2143 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2144 TEST_ASSERT_SUCCESS(ret, 2145 "Checking status and ordering for encoder failed"); 2146 TEST_ASSERT_SUCCESS(validate_op_chain( 2147 &ops[i]->turbo_enc.output, 2148 hard_data_orig), 2149 "Output buffers (CB=%u) are not equal", 2150 i); 2151 } 2152 2153 return TEST_SUCCESS; 2154 } 2155 2156 static int 2157 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2158 struct rte_bbdev_enc_op *ref_op) 2159 { 2160 unsigned int i; 2161 int ret; 2162 struct op_data_entries *hard_data_orig = 2163 &test_vector.entries[DATA_HARD_OUTPUT]; 2164 2165 for (i = 0; i < n; ++i) { 2166 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2167 TEST_ASSERT_SUCCESS(ret, 2168 "Checking status and ordering for encoder failed"); 2169 TEST_ASSERT_SUCCESS(validate_op_chain( 2170 &ops[i]->ldpc_enc.output, 2171 hard_data_orig), 2172 "Output buffers (CB=%u) are not equal", 2173 i); 2174 } 2175 2176 return TEST_SUCCESS; 2177 } 2178 2179 static void 2180 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2181 { 2182 unsigned int i; 2183 struct op_data_entries *entry; 2184 2185 op->turbo_dec = test_vector.turbo_dec; 2186 entry = &test_vector.entries[DATA_INPUT]; 2187 for (i = 0; i < entry->nb_segments; ++i) 2188 op->turbo_dec.input.length += 2189 entry->segments[i].length; 2190 } 2191 2192 static void 2193 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2194 { 2195 unsigned int i; 2196 struct op_data_entries *entry; 2197 2198 op->ldpc_dec = test_vector.ldpc_dec; 2199 entry = &test_vector.entries[DATA_INPUT]; 2200 for (i = 0; i < entry->nb_segments; ++i) 2201 op->ldpc_dec.input.length += 2202 entry->segments[i].length; 2203 if (test_vector.ldpc_dec.op_flags & 2204 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2205 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2206 for (i = 0; i < entry->nb_segments; ++i) 2207 op->ldpc_dec.harq_combined_input.length += 2208 entry->segments[i].length; 2209 } 2210 } 2211 2212 2213 static void 2214 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2215 { 2216 unsigned int i; 2217 struct op_data_entries *entry; 2218 2219 op->turbo_enc = test_vector.turbo_enc; 2220 entry = &test_vector.entries[DATA_INPUT]; 2221 for (i = 0; i < entry->nb_segments; ++i) 2222 op->turbo_enc.input.length += 2223 entry->segments[i].length; 2224 } 2225 2226 static void 2227 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2228 { 2229 unsigned int i; 2230 struct op_data_entries *entry; 2231 2232 op->ldpc_enc = test_vector.ldpc_enc; 2233 entry = &test_vector.entries[DATA_INPUT]; 2234 for (i = 0; i < entry->nb_segments; ++i) 2235 op->ldpc_enc.input.length += 2236 entry->segments[i].length; 2237 } 2238 2239 static uint32_t 2240 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2241 { 2242 uint8_t i; 2243 uint32_t c, r, tb_size = 0; 2244 2245 if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2246 tb_size = op->turbo_dec.tb_params.k_neg; 2247 } else { 2248 c = op->turbo_dec.tb_params.c; 2249 r = op->turbo_dec.tb_params.r; 2250 for (i = 0; i < c-r; i++) 2251 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2252 op->turbo_dec.tb_params.k_neg : 2253 op->turbo_dec.tb_params.k_pos; 2254 } 2255 return tb_size; 2256 } 2257 2258 static uint32_t 2259 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2260 { 2261 uint8_t i; 2262 uint32_t c, r, tb_size = 0; 2263 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2264 2265 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2266 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 2267 } else { 2268 c = op->ldpc_dec.tb_params.c; 2269 r = op->ldpc_dec.tb_params.r; 2270 for (i = 0; i < c-r; i++) 2271 tb_size += sys_cols * op->ldpc_dec.z_c 2272 - op->ldpc_dec.n_filler; 2273 } 2274 return tb_size; 2275 } 2276 2277 static uint32_t 2278 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2279 { 2280 uint8_t i; 2281 uint32_t c, r, tb_size = 0; 2282 2283 if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2284 tb_size = op->turbo_enc.tb_params.k_neg; 2285 } else { 2286 c = op->turbo_enc.tb_params.c; 2287 r = op->turbo_enc.tb_params.r; 2288 for (i = 0; i < c-r; i++) 2289 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2290 op->turbo_enc.tb_params.k_neg : 2291 op->turbo_enc.tb_params.k_pos; 2292 } 2293 return tb_size; 2294 } 2295 2296 static uint32_t 2297 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2298 { 2299 uint8_t i; 2300 uint32_t c, r, tb_size = 0; 2301 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2302 2303 if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2304 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 2305 } else { 2306 c = op->turbo_enc.tb_params.c; 2307 r = op->turbo_enc.tb_params.r; 2308 for (i = 0; i < c-r; i++) 2309 tb_size += sys_cols * op->ldpc_enc.z_c 2310 - op->ldpc_enc.n_filler; 2311 } 2312 return tb_size; 2313 } 2314 2315 2316 static int 2317 init_test_op_params(struct test_op_params *op_params, 2318 enum rte_bbdev_op_type op_type, const int expected_status, 2319 const int vector_mask, struct rte_mempool *ops_mp, 2320 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2321 { 2322 int ret = 0; 2323 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2324 op_type == RTE_BBDEV_OP_LDPC_DEC) 2325 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2326 &op_params->ref_dec_op, 1); 2327 else 2328 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2329 &op_params->ref_enc_op, 1); 2330 2331 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2332 2333 op_params->mp = ops_mp; 2334 op_params->burst_sz = burst_sz; 2335 op_params->num_to_process = num_to_process; 2336 op_params->num_lcores = num_lcores; 2337 op_params->vector_mask = vector_mask; 2338 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2339 op_type == RTE_BBDEV_OP_LDPC_DEC) 2340 op_params->ref_dec_op->status = expected_status; 2341 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2342 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2343 op_params->ref_enc_op->status = expected_status; 2344 return 0; 2345 } 2346 2347 static int 2348 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2349 struct test_op_params *op_params) 2350 { 2351 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2352 unsigned int i; 2353 struct active_device *ad; 2354 unsigned int burst_sz = get_burst_sz(); 2355 enum rte_bbdev_op_type op_type = test_vector.op_type; 2356 const struct rte_bbdev_op_cap *capabilities = NULL; 2357 2358 ad = &active_devs[dev_id]; 2359 2360 /* Check if device supports op_type */ 2361 if (!is_avail_op(ad, test_vector.op_type)) 2362 return TEST_SUCCESS; 2363 2364 struct rte_bbdev_info info; 2365 rte_bbdev_info_get(ad->dev_id, &info); 2366 socket_id = GET_SOCKET(info.socket_id); 2367 2368 f_ret = create_mempools(ad, socket_id, op_type, 2369 get_num_ops()); 2370 if (f_ret != TEST_SUCCESS) { 2371 printf("Couldn't create mempools"); 2372 goto fail; 2373 } 2374 if (op_type == RTE_BBDEV_OP_NONE) 2375 op_type = RTE_BBDEV_OP_TURBO_ENC; 2376 2377 f_ret = init_test_op_params(op_params, test_vector.op_type, 2378 test_vector.expected_status, 2379 test_vector.mask, 2380 ad->ops_mempool, 2381 burst_sz, 2382 get_num_ops(), 2383 get_num_lcores()); 2384 if (f_ret != TEST_SUCCESS) { 2385 printf("Couldn't init test op params"); 2386 goto fail; 2387 } 2388 2389 2390 /* Find capabilities */ 2391 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2392 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) { 2393 if (cap->type == test_vector.op_type) { 2394 capabilities = cap; 2395 break; 2396 } 2397 cap++; 2398 } 2399 TEST_ASSERT_NOT_NULL(capabilities, 2400 "Couldn't find capabilities"); 2401 2402 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2403 create_reference_dec_op(op_params->ref_dec_op); 2404 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2405 create_reference_enc_op(op_params->ref_enc_op); 2406 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2407 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2408 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2409 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2410 2411 for (i = 0; i < ad->nb_queues; ++i) { 2412 f_ret = fill_queue_buffers(op_params, 2413 ad->in_mbuf_pool, 2414 ad->hard_out_mbuf_pool, 2415 ad->soft_out_mbuf_pool, 2416 ad->harq_in_mbuf_pool, 2417 ad->harq_out_mbuf_pool, 2418 ad->queue_ids[i], 2419 capabilities, 2420 info.drv.min_alignment, 2421 socket_id); 2422 if (f_ret != TEST_SUCCESS) { 2423 printf("Couldn't init queue buffers"); 2424 goto fail; 2425 } 2426 } 2427 2428 /* Run test case function */ 2429 t_ret = test_case_func(ad, op_params); 2430 2431 /* Free active device resources and return */ 2432 free_buffers(ad, op_params); 2433 return t_ret; 2434 2435 fail: 2436 free_buffers(ad, op_params); 2437 return TEST_FAILED; 2438 } 2439 2440 /* Run given test function per active device per supported op type 2441 * per burst size. 2442 */ 2443 static int 2444 run_test_case(test_case_function *test_case_func) 2445 { 2446 int ret = 0; 2447 uint8_t dev; 2448 2449 /* Alloc op_params */ 2450 struct test_op_params *op_params = rte_zmalloc(NULL, 2451 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2452 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2453 RTE_ALIGN(sizeof(struct test_op_params), 2454 RTE_CACHE_LINE_SIZE)); 2455 2456 /* For each device run test case function */ 2457 for (dev = 0; dev < nb_active_devs; ++dev) 2458 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2459 2460 rte_free(op_params); 2461 2462 return ret; 2463 } 2464 2465 2466 /* Push back the HARQ output from DDR to host */ 2467 static void 2468 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2469 struct rte_bbdev_dec_op **ops, 2470 const uint16_t n) 2471 { 2472 uint16_t j; 2473 int save_status, ret; 2474 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2475 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2476 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2477 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2478 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2479 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2480 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2481 for (j = 0; j < n; ++j) { 2482 if ((loopback && mem_out) || hc_out) { 2483 save_status = ops[j]->status; 2484 ops[j]->ldpc_dec.op_flags = 2485 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2486 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2487 if (h_comp) 2488 ops[j]->ldpc_dec.op_flags += 2489 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2490 ops[j]->ldpc_dec.harq_combined_input.offset = 2491 harq_offset; 2492 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2493 harq_offset += HARQ_INCR; 2494 if (!loopback) 2495 ops[j]->ldpc_dec.harq_combined_input.length = 2496 ops[j]->ldpc_dec.harq_combined_output.length; 2497 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2498 &ops[j], 1); 2499 ret = 0; 2500 while (ret == 0) 2501 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2502 dev_id, queue_id, 2503 &ops_deq[j], 1); 2504 ops[j]->ldpc_dec.op_flags = flags; 2505 ops[j]->status = save_status; 2506 } 2507 } 2508 } 2509 2510 /* 2511 * Push back the HARQ output from HW DDR to Host 2512 * Preload HARQ memory input and adjust HARQ offset 2513 */ 2514 static void 2515 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2516 struct rte_bbdev_dec_op **ops, const uint16_t n, 2517 bool preload) 2518 { 2519 uint16_t j; 2520 int deq; 2521 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2522 struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; 2523 struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; 2524 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2525 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2526 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2527 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2528 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2529 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2530 if ((mem_in || hc_in) && preload) { 2531 for (j = 0; j < n; ++j) { 2532 save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; 2533 save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; 2534 ops[j]->ldpc_dec.op_flags = 2535 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2536 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2537 if (h_comp) 2538 ops[j]->ldpc_dec.op_flags += 2539 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2540 ops[j]->ldpc_dec.harq_combined_output.offset = 2541 harq_offset; 2542 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2543 harq_offset += HARQ_INCR; 2544 } 2545 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); 2546 deq = 0; 2547 while (deq != n) 2548 deq += rte_bbdev_dequeue_ldpc_dec_ops( 2549 dev_id, queue_id, &ops_deq[deq], 2550 n - deq); 2551 /* Restore the operations */ 2552 for (j = 0; j < n; ++j) { 2553 ops[j]->ldpc_dec.op_flags = flags; 2554 ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; 2555 ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; 2556 } 2557 } 2558 harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2559 for (j = 0; j < n; ++j) { 2560 /* Adjust HARQ offset when we reach external DDR */ 2561 if (mem_in || hc_in) 2562 ops[j]->ldpc_dec.harq_combined_input.offset 2563 = harq_offset; 2564 if (mem_out || hc_out) 2565 ops[j]->ldpc_dec.harq_combined_output.offset 2566 = harq_offset; 2567 harq_offset += HARQ_INCR; 2568 } 2569 } 2570 2571 static void 2572 dequeue_event_callback(uint16_t dev_id, 2573 enum rte_bbdev_event_type event, void *cb_arg, 2574 void *ret_param) 2575 { 2576 int ret; 2577 uint16_t i; 2578 uint64_t total_time; 2579 uint16_t deq, burst_sz, num_ops; 2580 uint16_t queue_id = *(uint16_t *) ret_param; 2581 struct rte_bbdev_info info; 2582 double tb_len_bits; 2583 struct thread_params *tp = cb_arg; 2584 2585 /* Find matching thread params using queue_id */ 2586 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2587 if (tp->queue_id == queue_id) 2588 break; 2589 2590 if (i == MAX_QUEUES) { 2591 printf("%s: Queue_id from interrupt details was not found!\n", 2592 __func__); 2593 return; 2594 } 2595 2596 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2597 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2598 printf( 2599 "Dequeue interrupt handler called for incorrect event!\n"); 2600 return; 2601 } 2602 2603 burst_sz = rte_atomic16_read(&tp->burst_sz); 2604 num_ops = tp->op_params->num_to_process; 2605 2606 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2607 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2608 &tp->dec_ops[ 2609 rte_atomic16_read(&tp->nb_dequeued)], 2610 burst_sz); 2611 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2612 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2613 &tp->dec_ops[ 2614 rte_atomic16_read(&tp->nb_dequeued)], 2615 burst_sz); 2616 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2617 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2618 &tp->enc_ops[ 2619 rte_atomic16_read(&tp->nb_dequeued)], 2620 burst_sz); 2621 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2622 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2623 &tp->enc_ops[ 2624 rte_atomic16_read(&tp->nb_dequeued)], 2625 burst_sz); 2626 2627 if (deq < burst_sz) { 2628 printf( 2629 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2630 burst_sz, deq); 2631 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2632 return; 2633 } 2634 2635 if (rte_atomic16_read(&tp->nb_dequeued) + deq < num_ops) { 2636 rte_atomic16_add(&tp->nb_dequeued, deq); 2637 return; 2638 } 2639 2640 total_time = rte_rdtsc_precise() - tp->start_time; 2641 2642 rte_bbdev_info_get(dev_id, &info); 2643 2644 ret = TEST_SUCCESS; 2645 2646 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2647 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2648 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2649 tp->op_params->vector_mask); 2650 /* get the max of iter_count for all dequeued ops */ 2651 for (i = 0; i < num_ops; ++i) 2652 tp->iter_count = RTE_MAX( 2653 tp->dec_ops[i]->turbo_dec.iter_count, 2654 tp->iter_count); 2655 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2656 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2657 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2658 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2659 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2660 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 2661 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2662 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 2663 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2664 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 2665 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2666 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 2667 tp->op_params->vector_mask); 2668 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2669 } 2670 2671 if (ret) { 2672 printf("Buffers validation failed\n"); 2673 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2674 } 2675 2676 switch (test_vector.op_type) { 2677 case RTE_BBDEV_OP_TURBO_DEC: 2678 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 2679 break; 2680 case RTE_BBDEV_OP_TURBO_ENC: 2681 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 2682 break; 2683 case RTE_BBDEV_OP_LDPC_DEC: 2684 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 2685 break; 2686 case RTE_BBDEV_OP_LDPC_ENC: 2687 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 2688 break; 2689 case RTE_BBDEV_OP_NONE: 2690 tb_len_bits = 0.0; 2691 break; 2692 default: 2693 printf("Unknown op type: %d\n", test_vector.op_type); 2694 rte_atomic16_set(&tp->processing_status, TEST_FAILED); 2695 return; 2696 } 2697 2698 tp->ops_per_sec += ((double)num_ops) / 2699 ((double)total_time / (double)rte_get_tsc_hz()); 2700 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 2701 ((double)total_time / (double)rte_get_tsc_hz()); 2702 2703 rte_atomic16_add(&tp->nb_dequeued, deq); 2704 } 2705 2706 static int 2707 throughput_intr_lcore_ldpc_dec(void *arg) 2708 { 2709 struct thread_params *tp = arg; 2710 unsigned int enqueued; 2711 const uint16_t queue_id = tp->queue_id; 2712 const uint16_t burst_sz = tp->op_params->burst_sz; 2713 const uint16_t num_to_process = tp->op_params->num_to_process; 2714 struct rte_bbdev_dec_op *ops[num_to_process]; 2715 struct test_buffers *bufs = NULL; 2716 struct rte_bbdev_info info; 2717 int ret, i, j; 2718 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2719 uint16_t num_to_enq, enq; 2720 2721 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 2722 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 2723 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 2724 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 2725 2726 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2727 "BURST_SIZE should be <= %u", MAX_BURST); 2728 2729 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2730 "Failed to enable interrupts for dev: %u, queue_id: %u", 2731 tp->dev_id, queue_id); 2732 2733 rte_bbdev_info_get(tp->dev_id, &info); 2734 2735 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2736 "NUM_OPS cannot exceed %u for this device", 2737 info.drv.queue_size_lim); 2738 2739 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2740 2741 rte_atomic16_clear(&tp->processing_status); 2742 rte_atomic16_clear(&tp->nb_dequeued); 2743 2744 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2745 rte_pause(); 2746 2747 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2748 num_to_process); 2749 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2750 num_to_process); 2751 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2752 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 2753 bufs->hard_outputs, bufs->soft_outputs, 2754 bufs->harq_inputs, bufs->harq_outputs, ref_op); 2755 2756 /* Set counter to validate the ordering */ 2757 for (j = 0; j < num_to_process; ++j) 2758 ops[j]->opaque_data = (void *)(uintptr_t)j; 2759 2760 for (j = 0; j < TEST_REPETITIONS; ++j) { 2761 for (i = 0; i < num_to_process; ++i) { 2762 if (!loopback) 2763 rte_pktmbuf_reset( 2764 ops[i]->ldpc_dec.hard_output.data); 2765 if (hc_out || loopback) 2766 mbuf_reset( 2767 ops[i]->ldpc_dec.harq_combined_output.data); 2768 } 2769 2770 tp->start_time = rte_rdtsc_precise(); 2771 for (enqueued = 0; enqueued < num_to_process;) { 2772 num_to_enq = burst_sz; 2773 2774 if (unlikely(num_to_process - enqueued < num_to_enq)) 2775 num_to_enq = num_to_process - enqueued; 2776 2777 enq = 0; 2778 do { 2779 enq += rte_bbdev_enqueue_ldpc_dec_ops( 2780 tp->dev_id, 2781 queue_id, &ops[enqueued], 2782 num_to_enq); 2783 } while (unlikely(num_to_enq != enq)); 2784 enqueued += enq; 2785 2786 /* Write to thread burst_sz current number of enqueued 2787 * descriptors. It ensures that proper number of 2788 * descriptors will be dequeued in callback 2789 * function - needed for last batch in case where 2790 * the number of operations is not a multiple of 2791 * burst size. 2792 */ 2793 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2794 2795 /* Wait until processing of previous batch is 2796 * completed 2797 */ 2798 while (rte_atomic16_read(&tp->nb_dequeued) != 2799 (int16_t) enqueued) 2800 rte_pause(); 2801 } 2802 if (j != TEST_REPETITIONS - 1) 2803 rte_atomic16_clear(&tp->nb_dequeued); 2804 } 2805 2806 return TEST_SUCCESS; 2807 } 2808 2809 static int 2810 throughput_intr_lcore_dec(void *arg) 2811 { 2812 struct thread_params *tp = arg; 2813 unsigned int enqueued; 2814 const uint16_t queue_id = tp->queue_id; 2815 const uint16_t burst_sz = tp->op_params->burst_sz; 2816 const uint16_t num_to_process = tp->op_params->num_to_process; 2817 struct rte_bbdev_dec_op *ops[num_to_process]; 2818 struct test_buffers *bufs = NULL; 2819 struct rte_bbdev_info info; 2820 int ret, i, j; 2821 uint16_t num_to_enq, enq; 2822 2823 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2824 "BURST_SIZE should be <= %u", MAX_BURST); 2825 2826 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2827 "Failed to enable interrupts for dev: %u, queue_id: %u", 2828 tp->dev_id, queue_id); 2829 2830 rte_bbdev_info_get(tp->dev_id, &info); 2831 2832 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2833 "NUM_OPS cannot exceed %u for this device", 2834 info.drv.queue_size_lim); 2835 2836 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2837 2838 rte_atomic16_clear(&tp->processing_status); 2839 rte_atomic16_clear(&tp->nb_dequeued); 2840 2841 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2842 rte_pause(); 2843 2844 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2845 num_to_process); 2846 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2847 num_to_process); 2848 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2849 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 2850 bufs->hard_outputs, bufs->soft_outputs, 2851 tp->op_params->ref_dec_op); 2852 2853 /* Set counter to validate the ordering */ 2854 for (j = 0; j < num_to_process; ++j) 2855 ops[j]->opaque_data = (void *)(uintptr_t)j; 2856 2857 for (j = 0; j < TEST_REPETITIONS; ++j) { 2858 for (i = 0; i < num_to_process; ++i) 2859 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 2860 2861 tp->start_time = rte_rdtsc_precise(); 2862 for (enqueued = 0; enqueued < num_to_process;) { 2863 num_to_enq = burst_sz; 2864 2865 if (unlikely(num_to_process - enqueued < num_to_enq)) 2866 num_to_enq = num_to_process - enqueued; 2867 2868 enq = 0; 2869 do { 2870 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2871 queue_id, &ops[enqueued], 2872 num_to_enq); 2873 } while (unlikely(num_to_enq != enq)); 2874 enqueued += enq; 2875 2876 /* Write to thread burst_sz current number of enqueued 2877 * descriptors. It ensures that proper number of 2878 * descriptors will be dequeued in callback 2879 * function - needed for last batch in case where 2880 * the number of operations is not a multiple of 2881 * burst size. 2882 */ 2883 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2884 2885 /* Wait until processing of previous batch is 2886 * completed 2887 */ 2888 while (rte_atomic16_read(&tp->nb_dequeued) != 2889 (int16_t) enqueued) 2890 rte_pause(); 2891 } 2892 if (j != TEST_REPETITIONS - 1) 2893 rte_atomic16_clear(&tp->nb_dequeued); 2894 } 2895 2896 return TEST_SUCCESS; 2897 } 2898 2899 static int 2900 throughput_intr_lcore_enc(void *arg) 2901 { 2902 struct thread_params *tp = arg; 2903 unsigned int enqueued; 2904 const uint16_t queue_id = tp->queue_id; 2905 const uint16_t burst_sz = tp->op_params->burst_sz; 2906 const uint16_t num_to_process = tp->op_params->num_to_process; 2907 struct rte_bbdev_enc_op *ops[num_to_process]; 2908 struct test_buffers *bufs = NULL; 2909 struct rte_bbdev_info info; 2910 int ret, i, j; 2911 uint16_t num_to_enq, enq; 2912 2913 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2914 "BURST_SIZE should be <= %u", MAX_BURST); 2915 2916 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2917 "Failed to enable interrupts for dev: %u, queue_id: %u", 2918 tp->dev_id, queue_id); 2919 2920 rte_bbdev_info_get(tp->dev_id, &info); 2921 2922 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2923 "NUM_OPS cannot exceed %u for this device", 2924 info.drv.queue_size_lim); 2925 2926 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2927 2928 rte_atomic16_clear(&tp->processing_status); 2929 rte_atomic16_clear(&tp->nb_dequeued); 2930 2931 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 2932 rte_pause(); 2933 2934 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2935 num_to_process); 2936 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2937 num_to_process); 2938 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2939 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 2940 bufs->hard_outputs, tp->op_params->ref_enc_op); 2941 2942 /* Set counter to validate the ordering */ 2943 for (j = 0; j < num_to_process; ++j) 2944 ops[j]->opaque_data = (void *)(uintptr_t)j; 2945 2946 for (j = 0; j < TEST_REPETITIONS; ++j) { 2947 for (i = 0; i < num_to_process; ++i) 2948 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2949 2950 tp->start_time = rte_rdtsc_precise(); 2951 for (enqueued = 0; enqueued < num_to_process;) { 2952 num_to_enq = burst_sz; 2953 2954 if (unlikely(num_to_process - enqueued < num_to_enq)) 2955 num_to_enq = num_to_process - enqueued; 2956 2957 enq = 0; 2958 do { 2959 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2960 queue_id, &ops[enqueued], 2961 num_to_enq); 2962 } while (unlikely(enq != num_to_enq)); 2963 enqueued += enq; 2964 2965 /* Write to thread burst_sz current number of enqueued 2966 * descriptors. It ensures that proper number of 2967 * descriptors will be dequeued in callback 2968 * function - needed for last batch in case where 2969 * the number of operations is not a multiple of 2970 * burst size. 2971 */ 2972 rte_atomic16_set(&tp->burst_sz, num_to_enq); 2973 2974 /* Wait until processing of previous batch is 2975 * completed 2976 */ 2977 while (rte_atomic16_read(&tp->nb_dequeued) != 2978 (int16_t) enqueued) 2979 rte_pause(); 2980 } 2981 if (j != TEST_REPETITIONS - 1) 2982 rte_atomic16_clear(&tp->nb_dequeued); 2983 } 2984 2985 return TEST_SUCCESS; 2986 } 2987 2988 2989 static int 2990 throughput_intr_lcore_ldpc_enc(void *arg) 2991 { 2992 struct thread_params *tp = arg; 2993 unsigned int enqueued; 2994 const uint16_t queue_id = tp->queue_id; 2995 const uint16_t burst_sz = tp->op_params->burst_sz; 2996 const uint16_t num_to_process = tp->op_params->num_to_process; 2997 struct rte_bbdev_enc_op *ops[num_to_process]; 2998 struct test_buffers *bufs = NULL; 2999 struct rte_bbdev_info info; 3000 int ret, i, j; 3001 uint16_t num_to_enq, enq; 3002 3003 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3004 "BURST_SIZE should be <= %u", MAX_BURST); 3005 3006 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3007 "Failed to enable interrupts for dev: %u, queue_id: %u", 3008 tp->dev_id, queue_id); 3009 3010 rte_bbdev_info_get(tp->dev_id, &info); 3011 3012 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3013 "NUM_OPS cannot exceed %u for this device", 3014 info.drv.queue_size_lim); 3015 3016 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3017 3018 rte_atomic16_clear(&tp->processing_status); 3019 rte_atomic16_clear(&tp->nb_dequeued); 3020 3021 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3022 rte_pause(); 3023 3024 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3025 num_to_process); 3026 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3027 num_to_process); 3028 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3029 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 3030 bufs->inputs, bufs->hard_outputs, 3031 tp->op_params->ref_enc_op); 3032 3033 /* Set counter to validate the ordering */ 3034 for (j = 0; j < num_to_process; ++j) 3035 ops[j]->opaque_data = (void *)(uintptr_t)j; 3036 3037 for (j = 0; j < TEST_REPETITIONS; ++j) { 3038 for (i = 0; i < num_to_process; ++i) 3039 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 3040 3041 tp->start_time = rte_rdtsc_precise(); 3042 for (enqueued = 0; enqueued < num_to_process;) { 3043 num_to_enq = burst_sz; 3044 3045 if (unlikely(num_to_process - enqueued < num_to_enq)) 3046 num_to_enq = num_to_process - enqueued; 3047 3048 enq = 0; 3049 do { 3050 enq += rte_bbdev_enqueue_ldpc_enc_ops( 3051 tp->dev_id, 3052 queue_id, &ops[enqueued], 3053 num_to_enq); 3054 } while (unlikely(enq != num_to_enq)); 3055 enqueued += enq; 3056 3057 /* Write to thread burst_sz current number of enqueued 3058 * descriptors. It ensures that proper number of 3059 * descriptors will be dequeued in callback 3060 * function - needed for last batch in case where 3061 * the number of operations is not a multiple of 3062 * burst size. 3063 */ 3064 rte_atomic16_set(&tp->burst_sz, num_to_enq); 3065 3066 /* Wait until processing of previous batch is 3067 * completed 3068 */ 3069 while (rte_atomic16_read(&tp->nb_dequeued) != 3070 (int16_t) enqueued) 3071 rte_pause(); 3072 } 3073 if (j != TEST_REPETITIONS - 1) 3074 rte_atomic16_clear(&tp->nb_dequeued); 3075 } 3076 3077 return TEST_SUCCESS; 3078 } 3079 3080 static int 3081 throughput_pmd_lcore_dec(void *arg) 3082 { 3083 struct thread_params *tp = arg; 3084 uint16_t enq, deq; 3085 uint64_t total_time = 0, start_time; 3086 const uint16_t queue_id = tp->queue_id; 3087 const uint16_t burst_sz = tp->op_params->burst_sz; 3088 const uint16_t num_ops = tp->op_params->num_to_process; 3089 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3090 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3091 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3092 struct test_buffers *bufs = NULL; 3093 int i, j, ret; 3094 struct rte_bbdev_info info; 3095 uint16_t num_to_enq; 3096 3097 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3098 "BURST_SIZE should be <= %u", MAX_BURST); 3099 3100 rte_bbdev_info_get(tp->dev_id, &info); 3101 3102 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3103 "NUM_OPS cannot exceed %u for this device", 3104 info.drv.queue_size_lim); 3105 3106 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3107 3108 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3109 rte_pause(); 3110 3111 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3112 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3113 3114 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3115 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3116 bufs->hard_outputs, bufs->soft_outputs, ref_op); 3117 3118 /* Set counter to validate the ordering */ 3119 for (j = 0; j < num_ops; ++j) 3120 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3121 3122 for (i = 0; i < TEST_REPETITIONS; ++i) { 3123 3124 for (j = 0; j < num_ops; ++j) 3125 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 3126 3127 start_time = rte_rdtsc_precise(); 3128 3129 for (enq = 0, deq = 0; enq < num_ops;) { 3130 num_to_enq = burst_sz; 3131 3132 if (unlikely(num_ops - enq < num_to_enq)) 3133 num_to_enq = num_ops - enq; 3134 3135 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3136 queue_id, &ops_enq[enq], num_to_enq); 3137 3138 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3139 queue_id, &ops_deq[deq], enq - deq); 3140 } 3141 3142 /* dequeue the remaining */ 3143 while (deq < enq) { 3144 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3145 queue_id, &ops_deq[deq], enq - deq); 3146 } 3147 3148 total_time += rte_rdtsc_precise() - start_time; 3149 } 3150 3151 tp->iter_count = 0; 3152 /* get the max of iter_count for all dequeued ops */ 3153 for (i = 0; i < num_ops; ++i) { 3154 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3155 tp->iter_count); 3156 } 3157 3158 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3159 ret = validate_dec_op(ops_deq, num_ops, ref_op, 3160 tp->op_params->vector_mask); 3161 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3162 } 3163 3164 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3165 3166 double tb_len_bits = calc_dec_TB_size(ref_op); 3167 3168 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3169 ((double)total_time / (double)rte_get_tsc_hz()); 3170 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3171 1000000.0) / ((double)total_time / 3172 (double)rte_get_tsc_hz()); 3173 3174 return TEST_SUCCESS; 3175 } 3176 3177 static int 3178 bler_pmd_lcore_ldpc_dec(void *arg) 3179 { 3180 struct thread_params *tp = arg; 3181 uint16_t enq, deq; 3182 uint64_t total_time = 0, start_time; 3183 const uint16_t queue_id = tp->queue_id; 3184 const uint16_t burst_sz = tp->op_params->burst_sz; 3185 const uint16_t num_ops = tp->op_params->num_to_process; 3186 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3187 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3188 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3189 struct test_buffers *bufs = NULL; 3190 int i, j, ret; 3191 float parity_bler = 0; 3192 struct rte_bbdev_info info; 3193 uint16_t num_to_enq; 3194 bool extDdr = check_bit(ldpc_cap_flags, 3195 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3196 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3197 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3198 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3199 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3200 3201 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3202 "BURST_SIZE should be <= %u", MAX_BURST); 3203 3204 rte_bbdev_info_get(tp->dev_id, &info); 3205 3206 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3207 "NUM_OPS cannot exceed %u for this device", 3208 info.drv.queue_size_lim); 3209 3210 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3211 3212 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3213 rte_pause(); 3214 3215 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3216 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3217 3218 /* For BLER tests we need to enable early termination */ 3219 if (!check_bit(ref_op->ldpc_dec.op_flags, 3220 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3221 ref_op->ldpc_dec.op_flags += 3222 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3223 ref_op->ldpc_dec.iter_max = get_iter_max(); 3224 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3225 3226 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3227 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3228 bufs->hard_outputs, bufs->soft_outputs, 3229 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3230 generate_llr_input(num_ops, bufs->inputs, ref_op); 3231 3232 /* Set counter to validate the ordering */ 3233 for (j = 0; j < num_ops; ++j) 3234 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3235 3236 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3237 for (j = 0; j < num_ops; ++j) { 3238 if (!loopback) 3239 mbuf_reset( 3240 ops_enq[j]->ldpc_dec.hard_output.data); 3241 if (hc_out || loopback) 3242 mbuf_reset( 3243 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3244 } 3245 if (extDdr) 3246 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3247 num_ops, true); 3248 start_time = rte_rdtsc_precise(); 3249 3250 for (enq = 0, deq = 0; enq < num_ops;) { 3251 num_to_enq = burst_sz; 3252 3253 if (unlikely(num_ops - enq < num_to_enq)) 3254 num_to_enq = num_ops - enq; 3255 3256 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3257 queue_id, &ops_enq[enq], num_to_enq); 3258 3259 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3260 queue_id, &ops_deq[deq], enq - deq); 3261 } 3262 3263 /* dequeue the remaining */ 3264 while (deq < enq) { 3265 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3266 queue_id, &ops_deq[deq], enq - deq); 3267 } 3268 3269 total_time += rte_rdtsc_precise() - start_time; 3270 } 3271 3272 tp->iter_count = 0; 3273 tp->iter_average = 0; 3274 /* get the max of iter_count for all dequeued ops */ 3275 for (i = 0; i < num_ops; ++i) { 3276 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3277 tp->iter_count); 3278 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3279 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3280 parity_bler += 1.0; 3281 } 3282 3283 parity_bler /= num_ops; /* This one is based on SYND */ 3284 tp->iter_average /= num_ops; 3285 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3286 3287 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3288 && tp->bler == 0 3289 && parity_bler == 0 3290 && !hc_out) { 3291 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3292 tp->op_params->vector_mask); 3293 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3294 } 3295 3296 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3297 3298 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3299 tp->ops_per_sec = ((double)num_ops * 1) / 3300 ((double)total_time / (double)rte_get_tsc_hz()); 3301 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3302 1000000.0) / ((double)total_time / 3303 (double)rte_get_tsc_hz()); 3304 3305 return TEST_SUCCESS; 3306 } 3307 3308 static int 3309 throughput_pmd_lcore_ldpc_dec(void *arg) 3310 { 3311 struct thread_params *tp = arg; 3312 uint16_t enq, deq; 3313 uint64_t total_time = 0, start_time; 3314 const uint16_t queue_id = tp->queue_id; 3315 const uint16_t burst_sz = tp->op_params->burst_sz; 3316 const uint16_t num_ops = tp->op_params->num_to_process; 3317 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3318 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3319 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3320 struct test_buffers *bufs = NULL; 3321 int i, j, ret; 3322 struct rte_bbdev_info info; 3323 uint16_t num_to_enq; 3324 bool extDdr = check_bit(ldpc_cap_flags, 3325 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3326 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3327 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3328 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3329 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3330 3331 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3332 "BURST_SIZE should be <= %u", MAX_BURST); 3333 3334 rte_bbdev_info_get(tp->dev_id, &info); 3335 3336 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3337 "NUM_OPS cannot exceed %u for this device", 3338 info.drv.queue_size_lim); 3339 3340 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3341 3342 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3343 rte_pause(); 3344 3345 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3346 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3347 3348 /* For throughput tests we need to disable early termination */ 3349 if (check_bit(ref_op->ldpc_dec.op_flags, 3350 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3351 ref_op->ldpc_dec.op_flags -= 3352 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3353 ref_op->ldpc_dec.iter_max = get_iter_max(); 3354 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3355 3356 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3357 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3358 bufs->hard_outputs, bufs->soft_outputs, 3359 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3360 3361 /* Set counter to validate the ordering */ 3362 for (j = 0; j < num_ops; ++j) 3363 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3364 3365 for (i = 0; i < TEST_REPETITIONS; ++i) { 3366 for (j = 0; j < num_ops; ++j) { 3367 if (!loopback) 3368 mbuf_reset( 3369 ops_enq[j]->ldpc_dec.hard_output.data); 3370 if (hc_out || loopback) 3371 mbuf_reset( 3372 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3373 } 3374 if (extDdr) 3375 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3376 num_ops, true); 3377 start_time = rte_rdtsc_precise(); 3378 3379 for (enq = 0, deq = 0; enq < num_ops;) { 3380 num_to_enq = burst_sz; 3381 3382 if (unlikely(num_ops - enq < num_to_enq)) 3383 num_to_enq = num_ops - enq; 3384 3385 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3386 queue_id, &ops_enq[enq], num_to_enq); 3387 3388 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3389 queue_id, &ops_deq[deq], enq - deq); 3390 } 3391 3392 /* dequeue the remaining */ 3393 while (deq < enq) { 3394 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3395 queue_id, &ops_deq[deq], enq - deq); 3396 } 3397 3398 total_time += rte_rdtsc_precise() - start_time; 3399 } 3400 3401 tp->iter_count = 0; 3402 /* get the max of iter_count for all dequeued ops */ 3403 for (i = 0; i < num_ops; ++i) { 3404 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3405 tp->iter_count); 3406 } 3407 if (extDdr) { 3408 /* Read loopback is not thread safe */ 3409 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 3410 } 3411 3412 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3413 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3414 tp->op_params->vector_mask); 3415 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3416 } 3417 3418 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3419 3420 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3421 3422 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3423 ((double)total_time / (double)rte_get_tsc_hz()); 3424 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3425 1000000.0) / ((double)total_time / 3426 (double)rte_get_tsc_hz()); 3427 3428 return TEST_SUCCESS; 3429 } 3430 3431 static int 3432 throughput_pmd_lcore_enc(void *arg) 3433 { 3434 struct thread_params *tp = arg; 3435 uint16_t enq, deq; 3436 uint64_t total_time = 0, start_time; 3437 const uint16_t queue_id = tp->queue_id; 3438 const uint16_t burst_sz = tp->op_params->burst_sz; 3439 const uint16_t num_ops = tp->op_params->num_to_process; 3440 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3441 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3442 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3443 struct test_buffers *bufs = NULL; 3444 int i, j, ret; 3445 struct rte_bbdev_info info; 3446 uint16_t num_to_enq; 3447 3448 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3449 "BURST_SIZE should be <= %u", MAX_BURST); 3450 3451 rte_bbdev_info_get(tp->dev_id, &info); 3452 3453 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3454 "NUM_OPS cannot exceed %u for this device", 3455 info.drv.queue_size_lim); 3456 3457 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3458 3459 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3460 rte_pause(); 3461 3462 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3463 num_ops); 3464 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3465 num_ops); 3466 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3467 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3468 bufs->hard_outputs, ref_op); 3469 3470 /* Set counter to validate the ordering */ 3471 for (j = 0; j < num_ops; ++j) 3472 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3473 3474 for (i = 0; i < TEST_REPETITIONS; ++i) { 3475 3476 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3477 for (j = 0; j < num_ops; ++j) 3478 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3479 3480 start_time = rte_rdtsc_precise(); 3481 3482 for (enq = 0, deq = 0; enq < num_ops;) { 3483 num_to_enq = burst_sz; 3484 3485 if (unlikely(num_ops - enq < num_to_enq)) 3486 num_to_enq = num_ops - enq; 3487 3488 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3489 queue_id, &ops_enq[enq], num_to_enq); 3490 3491 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3492 queue_id, &ops_deq[deq], enq - deq); 3493 } 3494 3495 /* dequeue the remaining */ 3496 while (deq < enq) { 3497 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3498 queue_id, &ops_deq[deq], enq - deq); 3499 } 3500 3501 total_time += rte_rdtsc_precise() - start_time; 3502 } 3503 3504 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3505 ret = validate_enc_op(ops_deq, num_ops, ref_op); 3506 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3507 } 3508 3509 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3510 3511 double tb_len_bits = calc_enc_TB_size(ref_op); 3512 3513 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3514 ((double)total_time / (double)rte_get_tsc_hz()); 3515 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3516 / 1000000.0) / ((double)total_time / 3517 (double)rte_get_tsc_hz()); 3518 3519 return TEST_SUCCESS; 3520 } 3521 3522 static int 3523 throughput_pmd_lcore_ldpc_enc(void *arg) 3524 { 3525 struct thread_params *tp = arg; 3526 uint16_t enq, deq; 3527 uint64_t total_time = 0, start_time; 3528 const uint16_t queue_id = tp->queue_id; 3529 const uint16_t burst_sz = tp->op_params->burst_sz; 3530 const uint16_t num_ops = tp->op_params->num_to_process; 3531 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3532 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3533 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3534 struct test_buffers *bufs = NULL; 3535 int i, j, ret; 3536 struct rte_bbdev_info info; 3537 uint16_t num_to_enq; 3538 3539 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3540 "BURST_SIZE should be <= %u", MAX_BURST); 3541 3542 rte_bbdev_info_get(tp->dev_id, &info); 3543 3544 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3545 "NUM_OPS cannot exceed %u for this device", 3546 info.drv.queue_size_lim); 3547 3548 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3549 3550 while (rte_atomic16_read(&tp->op_params->sync) == SYNC_WAIT) 3551 rte_pause(); 3552 3553 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3554 num_ops); 3555 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3556 num_ops); 3557 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3558 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3559 bufs->hard_outputs, ref_op); 3560 3561 /* Set counter to validate the ordering */ 3562 for (j = 0; j < num_ops; ++j) 3563 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3564 3565 for (i = 0; i < TEST_REPETITIONS; ++i) { 3566 3567 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3568 for (j = 0; j < num_ops; ++j) 3569 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3570 3571 start_time = rte_rdtsc_precise(); 3572 3573 for (enq = 0, deq = 0; enq < num_ops;) { 3574 num_to_enq = burst_sz; 3575 3576 if (unlikely(num_ops - enq < num_to_enq)) 3577 num_to_enq = num_ops - enq; 3578 3579 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 3580 queue_id, &ops_enq[enq], num_to_enq); 3581 3582 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3583 queue_id, &ops_deq[deq], enq - deq); 3584 } 3585 3586 /* dequeue the remaining */ 3587 while (deq < enq) { 3588 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3589 queue_id, &ops_deq[deq], enq - deq); 3590 } 3591 3592 total_time += rte_rdtsc_precise() - start_time; 3593 } 3594 3595 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3596 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 3597 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3598 } 3599 3600 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3601 3602 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 3603 3604 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3605 ((double)total_time / (double)rte_get_tsc_hz()); 3606 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3607 / 1000000.0) / ((double)total_time / 3608 (double)rte_get_tsc_hz()); 3609 3610 return TEST_SUCCESS; 3611 } 3612 3613 static void 3614 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 3615 { 3616 unsigned int iter = 0; 3617 double total_mops = 0, total_mbps = 0; 3618 3619 for (iter = 0; iter < used_cores; iter++) { 3620 printf( 3621 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 3622 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 3623 t_params[iter].mbps); 3624 total_mops += t_params[iter].ops_per_sec; 3625 total_mbps += t_params[iter].mbps; 3626 } 3627 printf( 3628 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 3629 used_cores, total_mops, total_mbps); 3630 } 3631 3632 /* Aggregate the performance results over the number of cores used */ 3633 static void 3634 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 3635 { 3636 unsigned int core_idx = 0; 3637 double total_mops = 0, total_mbps = 0; 3638 uint8_t iter_count = 0; 3639 3640 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3641 printf( 3642 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 3643 t_params[core_idx].lcore_id, 3644 t_params[core_idx].ops_per_sec, 3645 t_params[core_idx].mbps, 3646 t_params[core_idx].iter_count); 3647 total_mops += t_params[core_idx].ops_per_sec; 3648 total_mbps += t_params[core_idx].mbps; 3649 iter_count = RTE_MAX(iter_count, 3650 t_params[core_idx].iter_count); 3651 } 3652 printf( 3653 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 3654 used_cores, total_mops, total_mbps, iter_count); 3655 } 3656 3657 /* Aggregate the performance results over the number of cores used */ 3658 static void 3659 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 3660 { 3661 unsigned int core_idx = 0; 3662 double total_mbps = 0, total_bler = 0, total_iter = 0; 3663 double snr = get_snr(); 3664 3665 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3666 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 3667 t_params[core_idx].lcore_id, 3668 t_params[core_idx].bler * 100, 3669 t_params[core_idx].iter_average, 3670 t_params[core_idx].mbps, 3671 get_vector_filename()); 3672 total_mbps += t_params[core_idx].mbps; 3673 total_bler += t_params[core_idx].bler; 3674 total_iter += t_params[core_idx].iter_average; 3675 } 3676 total_bler /= used_cores; 3677 total_iter /= used_cores; 3678 3679 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n", 3680 snr, total_bler * 100, total_iter, get_iter_max(), 3681 total_mbps, get_vector_filename()); 3682 } 3683 3684 /* 3685 * Test function that determines BLER wireless performance 3686 */ 3687 static int 3688 bler_test(struct active_device *ad, 3689 struct test_op_params *op_params) 3690 { 3691 int ret; 3692 unsigned int lcore_id, used_cores = 0; 3693 struct thread_params *t_params; 3694 struct rte_bbdev_info info; 3695 lcore_function_t *bler_function; 3696 uint16_t num_lcores; 3697 const char *op_type_str; 3698 3699 rte_bbdev_info_get(ad->dev_id, &info); 3700 3701 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3702 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3703 test_vector.op_type); 3704 3705 printf("+ ------------------------------------------------------- +\n"); 3706 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3707 info.dev_name, ad->nb_queues, op_params->burst_sz, 3708 op_params->num_to_process, op_params->num_lcores, 3709 op_type_str, 3710 intr_enabled ? "Interrupt mode" : "PMD mode", 3711 (double)rte_get_tsc_hz() / 1000000000.0); 3712 3713 /* Set number of lcores */ 3714 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3715 ? ad->nb_queues 3716 : op_params->num_lcores; 3717 3718 /* Allocate memory for thread parameters structure */ 3719 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3720 RTE_CACHE_LINE_SIZE); 3721 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3722 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3723 RTE_CACHE_LINE_SIZE)); 3724 3725 if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && 3726 !check_bit(test_vector.ldpc_dec.op_flags, 3727 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 3728 && !check_bit(test_vector.ldpc_dec.op_flags, 3729 RTE_BBDEV_LDPC_LLR_COMPRESSION)) 3730 bler_function = bler_pmd_lcore_ldpc_dec; 3731 else 3732 return TEST_SKIPPED; 3733 3734 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3735 3736 /* Main core is set at first entry */ 3737 t_params[0].dev_id = ad->dev_id; 3738 t_params[0].lcore_id = rte_lcore_id(); 3739 t_params[0].op_params = op_params; 3740 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3741 t_params[0].iter_count = 0; 3742 3743 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3744 if (used_cores >= num_lcores) 3745 break; 3746 3747 t_params[used_cores].dev_id = ad->dev_id; 3748 t_params[used_cores].lcore_id = lcore_id; 3749 t_params[used_cores].op_params = op_params; 3750 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3751 t_params[used_cores].iter_count = 0; 3752 3753 rte_eal_remote_launch(bler_function, 3754 &t_params[used_cores++], lcore_id); 3755 } 3756 3757 rte_atomic16_set(&op_params->sync, SYNC_START); 3758 ret = bler_function(&t_params[0]); 3759 3760 /* Main core is always used */ 3761 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3762 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3763 3764 print_dec_bler(t_params, num_lcores); 3765 3766 /* Return if test failed */ 3767 if (ret) { 3768 rte_free(t_params); 3769 return ret; 3770 } 3771 3772 /* Function to print something here*/ 3773 rte_free(t_params); 3774 return ret; 3775 } 3776 3777 /* 3778 * Test function that determines how long an enqueue + dequeue of a burst 3779 * takes on available lcores. 3780 */ 3781 static int 3782 throughput_test(struct active_device *ad, 3783 struct test_op_params *op_params) 3784 { 3785 int ret; 3786 unsigned int lcore_id, used_cores = 0; 3787 struct thread_params *t_params, *tp; 3788 struct rte_bbdev_info info; 3789 lcore_function_t *throughput_function; 3790 uint16_t num_lcores; 3791 const char *op_type_str; 3792 3793 rte_bbdev_info_get(ad->dev_id, &info); 3794 3795 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3796 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3797 test_vector.op_type); 3798 3799 printf("+ ------------------------------------------------------- +\n"); 3800 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3801 info.dev_name, ad->nb_queues, op_params->burst_sz, 3802 op_params->num_to_process, op_params->num_lcores, 3803 op_type_str, 3804 intr_enabled ? "Interrupt mode" : "PMD mode", 3805 (double)rte_get_tsc_hz() / 1000000000.0); 3806 3807 /* Set number of lcores */ 3808 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3809 ? ad->nb_queues 3810 : op_params->num_lcores; 3811 3812 /* Allocate memory for thread parameters structure */ 3813 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3814 RTE_CACHE_LINE_SIZE); 3815 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3816 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3817 RTE_CACHE_LINE_SIZE)); 3818 3819 if (intr_enabled) { 3820 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3821 throughput_function = throughput_intr_lcore_dec; 3822 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3823 throughput_function = throughput_intr_lcore_ldpc_dec; 3824 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3825 throughput_function = throughput_intr_lcore_enc; 3826 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3827 throughput_function = throughput_intr_lcore_ldpc_enc; 3828 else 3829 throughput_function = throughput_intr_lcore_enc; 3830 3831 /* Dequeue interrupt callback registration */ 3832 ret = rte_bbdev_callback_register(ad->dev_id, 3833 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 3834 t_params); 3835 if (ret < 0) { 3836 rte_free(t_params); 3837 return ret; 3838 } 3839 } else { 3840 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3841 throughput_function = throughput_pmd_lcore_dec; 3842 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3843 throughput_function = throughput_pmd_lcore_ldpc_dec; 3844 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3845 throughput_function = throughput_pmd_lcore_enc; 3846 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3847 throughput_function = throughput_pmd_lcore_ldpc_enc; 3848 else 3849 throughput_function = throughput_pmd_lcore_enc; 3850 } 3851 3852 rte_atomic16_set(&op_params->sync, SYNC_WAIT); 3853 3854 /* Main core is set at first entry */ 3855 t_params[0].dev_id = ad->dev_id; 3856 t_params[0].lcore_id = rte_lcore_id(); 3857 t_params[0].op_params = op_params; 3858 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3859 t_params[0].iter_count = 0; 3860 3861 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3862 if (used_cores >= num_lcores) 3863 break; 3864 3865 t_params[used_cores].dev_id = ad->dev_id; 3866 t_params[used_cores].lcore_id = lcore_id; 3867 t_params[used_cores].op_params = op_params; 3868 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3869 t_params[used_cores].iter_count = 0; 3870 3871 rte_eal_remote_launch(throughput_function, 3872 &t_params[used_cores++], lcore_id); 3873 } 3874 3875 rte_atomic16_set(&op_params->sync, SYNC_START); 3876 ret = throughput_function(&t_params[0]); 3877 3878 /* Main core is always used */ 3879 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3880 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3881 3882 /* Return if test failed */ 3883 if (ret) { 3884 rte_free(t_params); 3885 return ret; 3886 } 3887 3888 /* Print throughput if interrupts are disabled and test passed */ 3889 if (!intr_enabled) { 3890 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3891 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3892 print_dec_throughput(t_params, num_lcores); 3893 else 3894 print_enc_throughput(t_params, num_lcores); 3895 rte_free(t_params); 3896 return ret; 3897 } 3898 3899 /* In interrupt TC we need to wait for the interrupt callback to deqeue 3900 * all pending operations. Skip waiting for queues which reported an 3901 * error using processing_status variable. 3902 * Wait for main lcore operations. 3903 */ 3904 tp = &t_params[0]; 3905 while ((rte_atomic16_read(&tp->nb_dequeued) < 3906 op_params->num_to_process) && 3907 (rte_atomic16_read(&tp->processing_status) != 3908 TEST_FAILED)) 3909 rte_pause(); 3910 3911 tp->ops_per_sec /= TEST_REPETITIONS; 3912 tp->mbps /= TEST_REPETITIONS; 3913 ret |= (int)rte_atomic16_read(&tp->processing_status); 3914 3915 /* Wait for worker lcores operations */ 3916 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 3917 tp = &t_params[used_cores]; 3918 3919 while ((rte_atomic16_read(&tp->nb_dequeued) < 3920 op_params->num_to_process) && 3921 (rte_atomic16_read(&tp->processing_status) != 3922 TEST_FAILED)) 3923 rte_pause(); 3924 3925 tp->ops_per_sec /= TEST_REPETITIONS; 3926 tp->mbps /= TEST_REPETITIONS; 3927 ret |= (int)rte_atomic16_read(&tp->processing_status); 3928 } 3929 3930 /* Print throughput if test passed */ 3931 if (!ret) { 3932 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3933 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3934 print_dec_throughput(t_params, num_lcores); 3935 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 3936 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3937 print_enc_throughput(t_params, num_lcores); 3938 } 3939 3940 rte_free(t_params); 3941 return ret; 3942 } 3943 3944 static int 3945 latency_test_dec(struct rte_mempool *mempool, 3946 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3947 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3948 const uint16_t num_to_process, uint16_t burst_sz, 3949 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3950 { 3951 int ret = TEST_SUCCESS; 3952 uint16_t i, j, dequeued; 3953 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3954 uint64_t start_time = 0, last_time = 0; 3955 3956 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3957 uint16_t enq = 0, deq = 0; 3958 bool first_time = true; 3959 last_time = 0; 3960 3961 if (unlikely(num_to_process - dequeued < burst_sz)) 3962 burst_sz = num_to_process - dequeued; 3963 3964 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3965 TEST_ASSERT_SUCCESS(ret, 3966 "rte_bbdev_dec_op_alloc_bulk() failed"); 3967 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3968 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3969 bufs->inputs, 3970 bufs->hard_outputs, 3971 bufs->soft_outputs, 3972 ref_op); 3973 3974 /* Set counter to validate the ordering */ 3975 for (j = 0; j < burst_sz; ++j) 3976 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3977 3978 start_time = rte_rdtsc_precise(); 3979 3980 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 3981 burst_sz); 3982 TEST_ASSERT(enq == burst_sz, 3983 "Error enqueueing burst, expected %u, got %u", 3984 burst_sz, enq); 3985 3986 /* Dequeue */ 3987 do { 3988 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3989 &ops_deq[deq], burst_sz - deq); 3990 if (likely(first_time && (deq > 0))) { 3991 last_time = rte_rdtsc_precise() - start_time; 3992 first_time = false; 3993 } 3994 } while (unlikely(burst_sz != deq)); 3995 3996 *max_time = RTE_MAX(*max_time, last_time); 3997 *min_time = RTE_MIN(*min_time, last_time); 3998 *total_time += last_time; 3999 4000 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4001 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 4002 vector_mask); 4003 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4004 } 4005 4006 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4007 dequeued += deq; 4008 } 4009 4010 return i; 4011 } 4012 4013 /* Test case for latency/validation for LDPC Decoder */ 4014 static int 4015 latency_test_ldpc_dec(struct rte_mempool *mempool, 4016 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4017 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4018 const uint16_t num_to_process, uint16_t burst_sz, 4019 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, 4020 bool disable_et) 4021 { 4022 int ret = TEST_SUCCESS; 4023 uint16_t i, j, dequeued; 4024 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4025 uint64_t start_time = 0, last_time = 0; 4026 bool extDdr = ldpc_cap_flags & 4027 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4028 4029 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4030 uint16_t enq = 0, deq = 0; 4031 bool first_time = true; 4032 last_time = 0; 4033 4034 if (unlikely(num_to_process - dequeued < burst_sz)) 4035 burst_sz = num_to_process - dequeued; 4036 4037 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4038 TEST_ASSERT_SUCCESS(ret, 4039 "rte_bbdev_dec_op_alloc_bulk() failed"); 4040 4041 /* For latency tests we need to disable early termination */ 4042 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, 4043 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4044 ref_op->ldpc_dec.op_flags -= 4045 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4046 ref_op->ldpc_dec.iter_max = get_iter_max(); 4047 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4048 4049 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4050 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4051 bufs->inputs, 4052 bufs->hard_outputs, 4053 bufs->soft_outputs, 4054 bufs->harq_inputs, 4055 bufs->harq_outputs, 4056 ref_op); 4057 4058 if (extDdr) 4059 preload_harq_ddr(dev_id, queue_id, ops_enq, 4060 burst_sz, true); 4061 4062 /* Set counter to validate the ordering */ 4063 for (j = 0; j < burst_sz; ++j) 4064 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4065 4066 start_time = rte_rdtsc_precise(); 4067 4068 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4069 &ops_enq[enq], burst_sz); 4070 TEST_ASSERT(enq == burst_sz, 4071 "Error enqueueing burst, expected %u, got %u", 4072 burst_sz, enq); 4073 4074 /* Dequeue */ 4075 do { 4076 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4077 &ops_deq[deq], burst_sz - deq); 4078 if (likely(first_time && (deq > 0))) { 4079 last_time = rte_rdtsc_precise() - start_time; 4080 first_time = false; 4081 } 4082 } while (unlikely(burst_sz != deq)); 4083 4084 *max_time = RTE_MAX(*max_time, last_time); 4085 *min_time = RTE_MIN(*min_time, last_time); 4086 *total_time += last_time; 4087 4088 if (extDdr) 4089 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4090 4091 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4092 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 4093 vector_mask); 4094 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4095 } 4096 4097 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4098 dequeued += deq; 4099 } 4100 return i; 4101 } 4102 4103 static int 4104 latency_test_enc(struct rte_mempool *mempool, 4105 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4106 uint16_t dev_id, uint16_t queue_id, 4107 const uint16_t num_to_process, uint16_t burst_sz, 4108 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4109 { 4110 int ret = TEST_SUCCESS; 4111 uint16_t i, j, dequeued; 4112 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4113 uint64_t start_time = 0, last_time = 0; 4114 4115 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4116 uint16_t enq = 0, deq = 0; 4117 bool first_time = true; 4118 last_time = 0; 4119 4120 if (unlikely(num_to_process - dequeued < burst_sz)) 4121 burst_sz = num_to_process - dequeued; 4122 4123 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4124 TEST_ASSERT_SUCCESS(ret, 4125 "rte_bbdev_enc_op_alloc_bulk() failed"); 4126 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4127 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4128 bufs->inputs, 4129 bufs->hard_outputs, 4130 ref_op); 4131 4132 /* Set counter to validate the ordering */ 4133 for (j = 0; j < burst_sz; ++j) 4134 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4135 4136 start_time = rte_rdtsc_precise(); 4137 4138 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 4139 burst_sz); 4140 TEST_ASSERT(enq == burst_sz, 4141 "Error enqueueing burst, expected %u, got %u", 4142 burst_sz, enq); 4143 4144 /* Dequeue */ 4145 do { 4146 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4147 &ops_deq[deq], burst_sz - deq); 4148 if (likely(first_time && (deq > 0))) { 4149 last_time += rte_rdtsc_precise() - start_time; 4150 first_time = false; 4151 } 4152 } while (unlikely(burst_sz != deq)); 4153 4154 *max_time = RTE_MAX(*max_time, last_time); 4155 *min_time = RTE_MIN(*min_time, last_time); 4156 *total_time += last_time; 4157 4158 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4159 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4160 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4161 } 4162 4163 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4164 dequeued += deq; 4165 } 4166 4167 return i; 4168 } 4169 4170 static int 4171 latency_test_ldpc_enc(struct rte_mempool *mempool, 4172 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4173 uint16_t dev_id, uint16_t queue_id, 4174 const uint16_t num_to_process, uint16_t burst_sz, 4175 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4176 { 4177 int ret = TEST_SUCCESS; 4178 uint16_t i, j, dequeued; 4179 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4180 uint64_t start_time = 0, last_time = 0; 4181 4182 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4183 uint16_t enq = 0, deq = 0; 4184 bool first_time = true; 4185 last_time = 0; 4186 4187 if (unlikely(num_to_process - dequeued < burst_sz)) 4188 burst_sz = num_to_process - dequeued; 4189 4190 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4191 TEST_ASSERT_SUCCESS(ret, 4192 "rte_bbdev_enc_op_alloc_bulk() failed"); 4193 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4194 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4195 bufs->inputs, 4196 bufs->hard_outputs, 4197 ref_op); 4198 4199 /* Set counter to validate the ordering */ 4200 for (j = 0; j < burst_sz; ++j) 4201 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4202 4203 start_time = rte_rdtsc_precise(); 4204 4205 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4206 &ops_enq[enq], burst_sz); 4207 TEST_ASSERT(enq == burst_sz, 4208 "Error enqueueing burst, expected %u, got %u", 4209 burst_sz, enq); 4210 4211 /* Dequeue */ 4212 do { 4213 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4214 &ops_deq[deq], burst_sz - deq); 4215 if (likely(first_time && (deq > 0))) { 4216 last_time += rte_rdtsc_precise() - start_time; 4217 first_time = false; 4218 } 4219 } while (unlikely(burst_sz != deq)); 4220 4221 *max_time = RTE_MAX(*max_time, last_time); 4222 *min_time = RTE_MIN(*min_time, last_time); 4223 *total_time += last_time; 4224 4225 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4226 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4227 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4228 } 4229 4230 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4231 dequeued += deq; 4232 } 4233 4234 return i; 4235 } 4236 4237 /* Common function for running validation and latency test cases */ 4238 static int 4239 validation_latency_test(struct active_device *ad, 4240 struct test_op_params *op_params, bool latency_flag) 4241 { 4242 int iter; 4243 uint16_t burst_sz = op_params->burst_sz; 4244 const uint16_t num_to_process = op_params->num_to_process; 4245 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4246 const uint16_t queue_id = ad->queue_ids[0]; 4247 struct test_buffers *bufs = NULL; 4248 struct rte_bbdev_info info; 4249 uint64_t total_time, min_time, max_time; 4250 const char *op_type_str; 4251 4252 total_time = max_time = 0; 4253 min_time = UINT64_MAX; 4254 4255 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4256 "BURST_SIZE should be <= %u", MAX_BURST); 4257 4258 rte_bbdev_info_get(ad->dev_id, &info); 4259 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4260 4261 op_type_str = rte_bbdev_op_type_str(op_type); 4262 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4263 4264 printf("+ ------------------------------------------------------- +\n"); 4265 if (latency_flag) 4266 printf("== test: latency\ndev:"); 4267 else 4268 printf("== test: validation\ndev:"); 4269 printf("%s, burst size: %u, num ops: %u, op type: %s\n", 4270 info.dev_name, burst_sz, num_to_process, op_type_str); 4271 4272 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4273 iter = latency_test_dec(op_params->mp, bufs, 4274 op_params->ref_dec_op, op_params->vector_mask, 4275 ad->dev_id, queue_id, num_to_process, 4276 burst_sz, &total_time, &min_time, &max_time); 4277 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4278 iter = latency_test_ldpc_enc(op_params->mp, bufs, 4279 op_params->ref_enc_op, ad->dev_id, queue_id, 4280 num_to_process, burst_sz, &total_time, 4281 &min_time, &max_time); 4282 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4283 iter = latency_test_ldpc_dec(op_params->mp, bufs, 4284 op_params->ref_dec_op, op_params->vector_mask, 4285 ad->dev_id, queue_id, num_to_process, 4286 burst_sz, &total_time, &min_time, &max_time, 4287 latency_flag); 4288 else /* RTE_BBDEV_OP_TURBO_ENC */ 4289 iter = latency_test_enc(op_params->mp, bufs, 4290 op_params->ref_enc_op, 4291 ad->dev_id, queue_id, 4292 num_to_process, burst_sz, &total_time, 4293 &min_time, &max_time); 4294 4295 if (iter <= 0) 4296 return TEST_FAILED; 4297 4298 printf("Operation latency:\n" 4299 "\tavg: %lg cycles, %lg us\n" 4300 "\tmin: %lg cycles, %lg us\n" 4301 "\tmax: %lg cycles, %lg us\n", 4302 (double)total_time / (double)iter, 4303 (double)(total_time * 1000000) / (double)iter / 4304 (double)rte_get_tsc_hz(), (double)min_time, 4305 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 4306 (double)max_time, (double)(max_time * 1000000) / 4307 (double)rte_get_tsc_hz()); 4308 4309 return TEST_SUCCESS; 4310 } 4311 4312 static int 4313 latency_test(struct active_device *ad, struct test_op_params *op_params) 4314 { 4315 return validation_latency_test(ad, op_params, true); 4316 } 4317 4318 static int 4319 validation_test(struct active_device *ad, struct test_op_params *op_params) 4320 { 4321 return validation_latency_test(ad, op_params, false); 4322 } 4323 4324 #ifdef RTE_BBDEV_OFFLOAD_COST 4325 static int 4326 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 4327 struct rte_bbdev_stats *stats) 4328 { 4329 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 4330 struct rte_bbdev_stats *q_stats; 4331 4332 if (queue_id >= dev->data->num_queues) 4333 return -1; 4334 4335 q_stats = &dev->data->queues[queue_id].queue_stats; 4336 4337 stats->enqueued_count = q_stats->enqueued_count; 4338 stats->dequeued_count = q_stats->dequeued_count; 4339 stats->enqueue_err_count = q_stats->enqueue_err_count; 4340 stats->dequeue_err_count = q_stats->dequeue_err_count; 4341 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 4342 4343 return 0; 4344 } 4345 4346 static int 4347 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 4348 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4349 uint16_t queue_id, const uint16_t num_to_process, 4350 uint16_t burst_sz, struct test_time_stats *time_st) 4351 { 4352 int i, dequeued, ret; 4353 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4354 uint64_t enq_start_time, deq_start_time; 4355 uint64_t enq_sw_last_time, deq_last_time; 4356 struct rte_bbdev_stats stats; 4357 4358 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4359 uint16_t enq = 0, deq = 0; 4360 4361 if (unlikely(num_to_process - dequeued < burst_sz)) 4362 burst_sz = num_to_process - dequeued; 4363 4364 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4365 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4366 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4367 bufs->inputs, 4368 bufs->hard_outputs, 4369 bufs->soft_outputs, 4370 ref_op); 4371 4372 /* Start time meas for enqueue function offload latency */ 4373 enq_start_time = rte_rdtsc_precise(); 4374 do { 4375 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 4376 &ops_enq[enq], burst_sz - enq); 4377 } while (unlikely(burst_sz != enq)); 4378 4379 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4380 TEST_ASSERT_SUCCESS(ret, 4381 "Failed to get stats for queue (%u) of device (%u)", 4382 queue_id, dev_id); 4383 4384 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 4385 stats.acc_offload_cycles; 4386 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4387 enq_sw_last_time); 4388 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4389 enq_sw_last_time); 4390 time_st->enq_sw_total_time += enq_sw_last_time; 4391 4392 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4393 stats.acc_offload_cycles); 4394 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4395 stats.acc_offload_cycles); 4396 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4397 4398 /* give time for device to process ops */ 4399 rte_delay_us(WAIT_OFFLOAD_US); 4400 4401 /* Start time meas for dequeue function offload latency */ 4402 deq_start_time = rte_rdtsc_precise(); 4403 /* Dequeue one operation */ 4404 do { 4405 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4406 &ops_deq[deq], enq); 4407 } while (unlikely(deq == 0)); 4408 4409 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4410 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4411 deq_last_time); 4412 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4413 deq_last_time); 4414 time_st->deq_total_time += deq_last_time; 4415 4416 /* Dequeue remaining operations if needed*/ 4417 while (burst_sz != deq) 4418 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4419 &ops_deq[deq], burst_sz - deq); 4420 4421 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4422 dequeued += deq; 4423 } 4424 4425 return i; 4426 } 4427 4428 static int 4429 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 4430 struct test_buffers *bufs, 4431 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4432 uint16_t queue_id, const uint16_t num_to_process, 4433 uint16_t burst_sz, struct test_time_stats *time_st) 4434 { 4435 int i, dequeued, ret; 4436 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4437 uint64_t enq_start_time, deq_start_time; 4438 uint64_t enq_sw_last_time, deq_last_time; 4439 struct rte_bbdev_stats stats; 4440 bool extDdr = ldpc_cap_flags & 4441 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4442 4443 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4444 uint16_t enq = 0, deq = 0; 4445 4446 if (unlikely(num_to_process - dequeued < burst_sz)) 4447 burst_sz = num_to_process - dequeued; 4448 4449 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4450 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4451 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4452 bufs->inputs, 4453 bufs->hard_outputs, 4454 bufs->soft_outputs, 4455 bufs->harq_inputs, 4456 bufs->harq_outputs, 4457 ref_op); 4458 4459 if (extDdr) 4460 preload_harq_ddr(dev_id, queue_id, ops_enq, 4461 burst_sz, true); 4462 4463 /* Start time meas for enqueue function offload latency */ 4464 enq_start_time = rte_rdtsc_precise(); 4465 do { 4466 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4467 &ops_enq[enq], burst_sz - enq); 4468 } while (unlikely(burst_sz != enq)); 4469 4470 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4471 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4472 TEST_ASSERT_SUCCESS(ret, 4473 "Failed to get stats for queue (%u) of device (%u)", 4474 queue_id, dev_id); 4475 4476 enq_sw_last_time -= stats.acc_offload_cycles; 4477 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4478 enq_sw_last_time); 4479 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4480 enq_sw_last_time); 4481 time_st->enq_sw_total_time += enq_sw_last_time; 4482 4483 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4484 stats.acc_offload_cycles); 4485 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4486 stats.acc_offload_cycles); 4487 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4488 4489 /* give time for device to process ops */ 4490 rte_delay_us(WAIT_OFFLOAD_US); 4491 4492 /* Start time meas for dequeue function offload latency */ 4493 deq_start_time = rte_rdtsc_precise(); 4494 /* Dequeue one operation */ 4495 do { 4496 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4497 &ops_deq[deq], enq); 4498 } while (unlikely(deq == 0)); 4499 4500 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4501 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4502 deq_last_time); 4503 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4504 deq_last_time); 4505 time_st->deq_total_time += deq_last_time; 4506 4507 /* Dequeue remaining operations if needed*/ 4508 while (burst_sz != deq) 4509 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4510 &ops_deq[deq], burst_sz - deq); 4511 4512 if (extDdr) { 4513 /* Read loopback is not thread safe */ 4514 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4515 } 4516 4517 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4518 dequeued += deq; 4519 } 4520 4521 return i; 4522 } 4523 4524 static int 4525 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 4526 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4527 uint16_t queue_id, const uint16_t num_to_process, 4528 uint16_t burst_sz, struct test_time_stats *time_st) 4529 { 4530 int i, dequeued, ret; 4531 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4532 uint64_t enq_start_time, deq_start_time; 4533 uint64_t enq_sw_last_time, deq_last_time; 4534 struct rte_bbdev_stats stats; 4535 4536 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4537 uint16_t enq = 0, deq = 0; 4538 4539 if (unlikely(num_to_process - dequeued < burst_sz)) 4540 burst_sz = num_to_process - dequeued; 4541 4542 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4543 TEST_ASSERT_SUCCESS(ret, 4544 "rte_bbdev_enc_op_alloc_bulk() failed"); 4545 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4546 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4547 bufs->inputs, 4548 bufs->hard_outputs, 4549 ref_op); 4550 4551 /* Start time meas for enqueue function offload latency */ 4552 enq_start_time = rte_rdtsc_precise(); 4553 do { 4554 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 4555 &ops_enq[enq], burst_sz - enq); 4556 } while (unlikely(burst_sz != enq)); 4557 4558 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4559 4560 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4561 TEST_ASSERT_SUCCESS(ret, 4562 "Failed to get stats for queue (%u) of device (%u)", 4563 queue_id, dev_id); 4564 enq_sw_last_time -= stats.acc_offload_cycles; 4565 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4566 enq_sw_last_time); 4567 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4568 enq_sw_last_time); 4569 time_st->enq_sw_total_time += enq_sw_last_time; 4570 4571 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4572 stats.acc_offload_cycles); 4573 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4574 stats.acc_offload_cycles); 4575 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4576 4577 /* give time for device to process ops */ 4578 rte_delay_us(WAIT_OFFLOAD_US); 4579 4580 /* Start time meas for dequeue function offload latency */ 4581 deq_start_time = rte_rdtsc_precise(); 4582 /* Dequeue one operation */ 4583 do { 4584 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4585 &ops_deq[deq], enq); 4586 } while (unlikely(deq == 0)); 4587 4588 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4589 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4590 deq_last_time); 4591 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4592 deq_last_time); 4593 time_st->deq_total_time += deq_last_time; 4594 4595 while (burst_sz != deq) 4596 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4597 &ops_deq[deq], burst_sz - deq); 4598 4599 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4600 dequeued += deq; 4601 } 4602 4603 return i; 4604 } 4605 4606 static int 4607 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 4608 struct test_buffers *bufs, 4609 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4610 uint16_t queue_id, const uint16_t num_to_process, 4611 uint16_t burst_sz, struct test_time_stats *time_st) 4612 { 4613 int i, dequeued, ret; 4614 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4615 uint64_t enq_start_time, deq_start_time; 4616 uint64_t enq_sw_last_time, deq_last_time; 4617 struct rte_bbdev_stats stats; 4618 4619 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4620 uint16_t enq = 0, deq = 0; 4621 4622 if (unlikely(num_to_process - dequeued < burst_sz)) 4623 burst_sz = num_to_process - dequeued; 4624 4625 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4626 TEST_ASSERT_SUCCESS(ret, 4627 "rte_bbdev_enc_op_alloc_bulk() failed"); 4628 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4629 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4630 bufs->inputs, 4631 bufs->hard_outputs, 4632 ref_op); 4633 4634 /* Start time meas for enqueue function offload latency */ 4635 enq_start_time = rte_rdtsc_precise(); 4636 do { 4637 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4638 &ops_enq[enq], burst_sz - enq); 4639 } while (unlikely(burst_sz != enq)); 4640 4641 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4642 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4643 TEST_ASSERT_SUCCESS(ret, 4644 "Failed to get stats for queue (%u) of device (%u)", 4645 queue_id, dev_id); 4646 4647 enq_sw_last_time -= stats.acc_offload_cycles; 4648 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4649 enq_sw_last_time); 4650 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4651 enq_sw_last_time); 4652 time_st->enq_sw_total_time += enq_sw_last_time; 4653 4654 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4655 stats.acc_offload_cycles); 4656 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4657 stats.acc_offload_cycles); 4658 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4659 4660 /* give time for device to process ops */ 4661 rte_delay_us(WAIT_OFFLOAD_US); 4662 4663 /* Start time meas for dequeue function offload latency */ 4664 deq_start_time = rte_rdtsc_precise(); 4665 /* Dequeue one operation */ 4666 do { 4667 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4668 &ops_deq[deq], enq); 4669 } while (unlikely(deq == 0)); 4670 4671 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4672 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4673 deq_last_time); 4674 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4675 deq_last_time); 4676 time_st->deq_total_time += deq_last_time; 4677 4678 while (burst_sz != deq) 4679 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4680 &ops_deq[deq], burst_sz - deq); 4681 4682 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4683 dequeued += deq; 4684 } 4685 4686 return i; 4687 } 4688 #endif 4689 4690 static int 4691 offload_cost_test(struct active_device *ad, 4692 struct test_op_params *op_params) 4693 { 4694 #ifndef RTE_BBDEV_OFFLOAD_COST 4695 RTE_SET_USED(ad); 4696 RTE_SET_USED(op_params); 4697 printf("Offload latency test is disabled.\n"); 4698 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4699 return TEST_SKIPPED; 4700 #else 4701 int iter; 4702 uint16_t burst_sz = op_params->burst_sz; 4703 const uint16_t num_to_process = op_params->num_to_process; 4704 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4705 const uint16_t queue_id = ad->queue_ids[0]; 4706 struct test_buffers *bufs = NULL; 4707 struct rte_bbdev_info info; 4708 const char *op_type_str; 4709 struct test_time_stats time_st; 4710 4711 memset(&time_st, 0, sizeof(struct test_time_stats)); 4712 time_st.enq_sw_min_time = UINT64_MAX; 4713 time_st.enq_acc_min_time = UINT64_MAX; 4714 time_st.deq_min_time = UINT64_MAX; 4715 4716 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4717 "BURST_SIZE should be <= %u", MAX_BURST); 4718 4719 rte_bbdev_info_get(ad->dev_id, &info); 4720 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4721 4722 op_type_str = rte_bbdev_op_type_str(op_type); 4723 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4724 4725 printf("+ ------------------------------------------------------- +\n"); 4726 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4727 info.dev_name, burst_sz, num_to_process, op_type_str); 4728 4729 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4730 iter = offload_latency_test_dec(op_params->mp, bufs, 4731 op_params->ref_dec_op, ad->dev_id, queue_id, 4732 num_to_process, burst_sz, &time_st); 4733 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 4734 iter = offload_latency_test_enc(op_params->mp, bufs, 4735 op_params->ref_enc_op, ad->dev_id, queue_id, 4736 num_to_process, burst_sz, &time_st); 4737 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4738 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 4739 op_params->ref_enc_op, ad->dev_id, queue_id, 4740 num_to_process, burst_sz, &time_st); 4741 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4742 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 4743 op_params->ref_dec_op, ad->dev_id, queue_id, 4744 num_to_process, burst_sz, &time_st); 4745 else 4746 iter = offload_latency_test_enc(op_params->mp, bufs, 4747 op_params->ref_enc_op, ad->dev_id, queue_id, 4748 num_to_process, burst_sz, &time_st); 4749 4750 if (iter <= 0) 4751 return TEST_FAILED; 4752 4753 printf("Enqueue driver offload cost latency:\n" 4754 "\tavg: %lg cycles, %lg us\n" 4755 "\tmin: %lg cycles, %lg us\n" 4756 "\tmax: %lg cycles, %lg us\n" 4757 "Enqueue accelerator offload cost latency:\n" 4758 "\tavg: %lg cycles, %lg us\n" 4759 "\tmin: %lg cycles, %lg us\n" 4760 "\tmax: %lg cycles, %lg us\n", 4761 (double)time_st.enq_sw_total_time / (double)iter, 4762 (double)(time_st.enq_sw_total_time * 1000000) / 4763 (double)iter / (double)rte_get_tsc_hz(), 4764 (double)time_st.enq_sw_min_time, 4765 (double)(time_st.enq_sw_min_time * 1000000) / 4766 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 4767 (double)(time_st.enq_sw_max_time * 1000000) / 4768 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 4769 (double)iter, 4770 (double)(time_st.enq_acc_total_time * 1000000) / 4771 (double)iter / (double)rte_get_tsc_hz(), 4772 (double)time_st.enq_acc_min_time, 4773 (double)(time_st.enq_acc_min_time * 1000000) / 4774 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 4775 (double)(time_st.enq_acc_max_time * 1000000) / 4776 rte_get_tsc_hz()); 4777 4778 printf("Dequeue offload cost latency - one op:\n" 4779 "\tavg: %lg cycles, %lg us\n" 4780 "\tmin: %lg cycles, %lg us\n" 4781 "\tmax: %lg cycles, %lg us\n", 4782 (double)time_st.deq_total_time / (double)iter, 4783 (double)(time_st.deq_total_time * 1000000) / 4784 (double)iter / (double)rte_get_tsc_hz(), 4785 (double)time_st.deq_min_time, 4786 (double)(time_st.deq_min_time * 1000000) / 4787 rte_get_tsc_hz(), (double)time_st.deq_max_time, 4788 (double)(time_st.deq_max_time * 1000000) / 4789 rte_get_tsc_hz()); 4790 4791 struct rte_bbdev_stats stats = {0}; 4792 get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); 4793 if (op_type != RTE_BBDEV_OP_LDPC_DEC) { 4794 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, 4795 "Mismatch in enqueue count %10"PRIu64" %d", 4796 stats.enqueued_count, num_to_process); 4797 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, 4798 "Mismatch in dequeue count %10"PRIu64" %d", 4799 stats.dequeued_count, num_to_process); 4800 } 4801 TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, 4802 "Enqueue count Error %10"PRIu64"", 4803 stats.enqueue_err_count); 4804 TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, 4805 "Dequeue count Error (%10"PRIu64"", 4806 stats.dequeue_err_count); 4807 4808 return TEST_SUCCESS; 4809 #endif 4810 } 4811 4812 #ifdef RTE_BBDEV_OFFLOAD_COST 4813 static int 4814 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 4815 const uint16_t num_to_process, uint16_t burst_sz, 4816 uint64_t *deq_total_time, uint64_t *deq_min_time, 4817 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4818 { 4819 int i, deq_total; 4820 struct rte_bbdev_dec_op *ops[MAX_BURST]; 4821 uint64_t deq_start_time, deq_last_time; 4822 4823 /* Test deq offload latency from an empty queue */ 4824 4825 for (i = 0, deq_total = 0; deq_total < num_to_process; 4826 ++i, deq_total += burst_sz) { 4827 deq_start_time = rte_rdtsc_precise(); 4828 4829 if (unlikely(num_to_process - deq_total < burst_sz)) 4830 burst_sz = num_to_process - deq_total; 4831 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4832 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 4833 burst_sz); 4834 else 4835 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 4836 burst_sz); 4837 4838 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4839 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4840 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4841 *deq_total_time += deq_last_time; 4842 } 4843 4844 return i; 4845 } 4846 4847 static int 4848 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 4849 const uint16_t num_to_process, uint16_t burst_sz, 4850 uint64_t *deq_total_time, uint64_t *deq_min_time, 4851 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4852 { 4853 int i, deq_total; 4854 struct rte_bbdev_enc_op *ops[MAX_BURST]; 4855 uint64_t deq_start_time, deq_last_time; 4856 4857 /* Test deq offload latency from an empty queue */ 4858 for (i = 0, deq_total = 0; deq_total < num_to_process; 4859 ++i, deq_total += burst_sz) { 4860 deq_start_time = rte_rdtsc_precise(); 4861 4862 if (unlikely(num_to_process - deq_total < burst_sz)) 4863 burst_sz = num_to_process - deq_total; 4864 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4865 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 4866 burst_sz); 4867 else 4868 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 4869 burst_sz); 4870 4871 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4872 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4873 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4874 *deq_total_time += deq_last_time; 4875 } 4876 4877 return i; 4878 } 4879 4880 #endif 4881 4882 static int 4883 offload_latency_empty_q_test(struct active_device *ad, 4884 struct test_op_params *op_params) 4885 { 4886 #ifndef RTE_BBDEV_OFFLOAD_COST 4887 RTE_SET_USED(ad); 4888 RTE_SET_USED(op_params); 4889 printf("Offload latency empty dequeue test is disabled.\n"); 4890 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4891 return TEST_SKIPPED; 4892 #else 4893 int iter; 4894 uint64_t deq_total_time, deq_min_time, deq_max_time; 4895 uint16_t burst_sz = op_params->burst_sz; 4896 const uint16_t num_to_process = op_params->num_to_process; 4897 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4898 const uint16_t queue_id = ad->queue_ids[0]; 4899 struct rte_bbdev_info info; 4900 const char *op_type_str; 4901 4902 deq_total_time = deq_max_time = 0; 4903 deq_min_time = UINT64_MAX; 4904 4905 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4906 "BURST_SIZE should be <= %u", MAX_BURST); 4907 4908 rte_bbdev_info_get(ad->dev_id, &info); 4909 4910 op_type_str = rte_bbdev_op_type_str(op_type); 4911 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4912 4913 printf("+ ------------------------------------------------------- +\n"); 4914 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4915 info.dev_name, burst_sz, num_to_process, op_type_str); 4916 4917 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 4918 op_type == RTE_BBDEV_OP_LDPC_DEC) 4919 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 4920 num_to_process, burst_sz, &deq_total_time, 4921 &deq_min_time, &deq_max_time, op_type); 4922 else 4923 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 4924 num_to_process, burst_sz, &deq_total_time, 4925 &deq_min_time, &deq_max_time, op_type); 4926 4927 if (iter <= 0) 4928 return TEST_FAILED; 4929 4930 printf("Empty dequeue offload:\n" 4931 "\tavg: %lg cycles, %lg us\n" 4932 "\tmin: %lg cycles, %lg us\n" 4933 "\tmax: %lg cycles, %lg us\n", 4934 (double)deq_total_time / (double)iter, 4935 (double)(deq_total_time * 1000000) / (double)iter / 4936 (double)rte_get_tsc_hz(), (double)deq_min_time, 4937 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 4938 (double)deq_max_time, (double)(deq_max_time * 1000000) / 4939 rte_get_tsc_hz()); 4940 4941 return TEST_SUCCESS; 4942 #endif 4943 } 4944 4945 static int 4946 bler_tc(void) 4947 { 4948 return run_test_case(bler_test); 4949 } 4950 4951 static int 4952 throughput_tc(void) 4953 { 4954 return run_test_case(throughput_test); 4955 } 4956 4957 static int 4958 offload_cost_tc(void) 4959 { 4960 return run_test_case(offload_cost_test); 4961 } 4962 4963 static int 4964 offload_latency_empty_q_tc(void) 4965 { 4966 return run_test_case(offload_latency_empty_q_test); 4967 } 4968 4969 static int 4970 latency_tc(void) 4971 { 4972 return run_test_case(latency_test); 4973 } 4974 4975 static int 4976 validation_tc(void) 4977 { 4978 return run_test_case(validation_test); 4979 } 4980 4981 static int 4982 interrupt_tc(void) 4983 { 4984 return run_test_case(throughput_test); 4985 } 4986 4987 static struct unit_test_suite bbdev_bler_testsuite = { 4988 .suite_name = "BBdev BLER Tests", 4989 .setup = testsuite_setup, 4990 .teardown = testsuite_teardown, 4991 .unit_test_cases = { 4992 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 4993 TEST_CASES_END() /**< NULL terminate unit test array */ 4994 } 4995 }; 4996 4997 static struct unit_test_suite bbdev_throughput_testsuite = { 4998 .suite_name = "BBdev Throughput Tests", 4999 .setup = testsuite_setup, 5000 .teardown = testsuite_teardown, 5001 .unit_test_cases = { 5002 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 5003 TEST_CASES_END() /**< NULL terminate unit test array */ 5004 } 5005 }; 5006 5007 static struct unit_test_suite bbdev_validation_testsuite = { 5008 .suite_name = "BBdev Validation Tests", 5009 .setup = testsuite_setup, 5010 .teardown = testsuite_teardown, 5011 .unit_test_cases = { 5012 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), 5013 TEST_CASES_END() /**< NULL terminate unit test array */ 5014 } 5015 }; 5016 5017 static struct unit_test_suite bbdev_latency_testsuite = { 5018 .suite_name = "BBdev Latency Tests", 5019 .setup = testsuite_setup, 5020 .teardown = testsuite_teardown, 5021 .unit_test_cases = { 5022 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 5023 TEST_CASES_END() /**< NULL terminate unit test array */ 5024 } 5025 }; 5026 5027 static struct unit_test_suite bbdev_offload_cost_testsuite = { 5028 .suite_name = "BBdev Offload Cost Tests", 5029 .setup = testsuite_setup, 5030 .teardown = testsuite_teardown, 5031 .unit_test_cases = { 5032 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 5033 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 5034 TEST_CASES_END() /**< NULL terminate unit test array */ 5035 } 5036 }; 5037 5038 static struct unit_test_suite bbdev_interrupt_testsuite = { 5039 .suite_name = "BBdev Interrupt Tests", 5040 .setup = interrupt_testsuite_setup, 5041 .teardown = testsuite_teardown, 5042 .unit_test_cases = { 5043 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 5044 TEST_CASES_END() /**< NULL terminate unit test array */ 5045 } 5046 }; 5047 5048 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 5049 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 5050 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 5051 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 5052 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 5053 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 5054