1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <inttypes.h> 8 #include <math.h> 9 10 #include <rte_eal.h> 11 #include <rte_common.h> 12 #include <rte_dev.h> 13 #include <rte_launch.h> 14 #include <rte_bbdev.h> 15 #include <rte_cycles.h> 16 #include <rte_lcore.h> 17 #include <rte_malloc.h> 18 #include <rte_random.h> 19 #include <rte_hexdump.h> 20 #include <rte_interrupts.h> 21 22 #include "main.h" 23 #include "test_bbdev_vector.h" 24 25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 26 27 #define MAX_QUEUES RTE_MAX_LCORE 28 #define TEST_REPETITIONS 100 29 #define WAIT_OFFLOAD_US 1000 30 31 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 32 #include <fpga_lte_fec.h> 33 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 34 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 35 #define VF_UL_4G_QUEUE_VALUE 4 36 #define VF_DL_4G_QUEUE_VALUE 4 37 #define UL_4G_BANDWIDTH 3 38 #define DL_4G_BANDWIDTH 3 39 #define UL_4G_LOAD_BALANCE 128 40 #define DL_4G_LOAD_BALANCE 128 41 #define FLR_4G_TIMEOUT 610 42 #endif 43 44 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 45 #include <rte_pmd_fpga_5gnr_fec.h> 46 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf") 47 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf") 48 #define VF_UL_5G_QUEUE_VALUE 4 49 #define VF_DL_5G_QUEUE_VALUE 4 50 #define UL_5G_BANDWIDTH 3 51 #define DL_5G_BANDWIDTH 3 52 #define UL_5G_LOAD_BALANCE 128 53 #define DL_5G_LOAD_BALANCE 128 54 #endif 55 56 #ifdef RTE_BASEBAND_ACC100 57 #include <rte_acc100_cfg.h> 58 #define ACC100PF_DRIVER_NAME ("intel_acc100_pf") 59 #define ACC100VF_DRIVER_NAME ("intel_acc100_vf") 60 #define ACC100_QMGR_NUM_AQS 16 61 #define ACC100_QMGR_NUM_QGS 2 62 #define ACC100_QMGR_AQ_DEPTH 5 63 #define ACC100_QMGR_INVALID_IDX -1 64 #define ACC100_QMGR_RR 1 65 #define ACC100_QOS_GBR 0 66 #endif 67 68 #define OPS_CACHE_SIZE 256U 69 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 70 71 #define SYNC_WAIT 0 72 #define SYNC_START 1 73 #define INVALID_OPAQUE -1 74 75 #define INVALID_QUEUE_ID -1 76 /* Increment for next code block in external HARQ memory */ 77 #define HARQ_INCR 32768 78 /* Headroom for filler LLRs insertion in HARQ buffer */ 79 #define FILLER_HEADROOM 1024 80 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 81 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 82 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 83 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 84 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 85 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 86 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 87 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 88 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 89 90 static struct test_bbdev_vector test_vector; 91 92 /* Switch between PMD and Interrupt for throughput TC */ 93 static bool intr_enabled; 94 95 /* LLR arithmetic representation for numerical conversion */ 96 static int ldpc_llr_decimals; 97 static int ldpc_llr_size; 98 /* Keep track of the LDPC decoder device capability flag */ 99 static uint32_t ldpc_cap_flags; 100 101 /* Represents tested active devices */ 102 static struct active_device { 103 const char *driver_name; 104 uint8_t dev_id; 105 uint16_t supported_ops; 106 uint16_t queue_ids[MAX_QUEUES]; 107 uint16_t nb_queues; 108 struct rte_mempool *ops_mempool; 109 struct rte_mempool *in_mbuf_pool; 110 struct rte_mempool *hard_out_mbuf_pool; 111 struct rte_mempool *soft_out_mbuf_pool; 112 struct rte_mempool *harq_in_mbuf_pool; 113 struct rte_mempool *harq_out_mbuf_pool; 114 } active_devs[RTE_BBDEV_MAX_DEVS]; 115 116 static uint8_t nb_active_devs; 117 118 /* Data buffers used by BBDEV ops */ 119 struct test_buffers { 120 struct rte_bbdev_op_data *inputs; 121 struct rte_bbdev_op_data *hard_outputs; 122 struct rte_bbdev_op_data *soft_outputs; 123 struct rte_bbdev_op_data *harq_inputs; 124 struct rte_bbdev_op_data *harq_outputs; 125 }; 126 127 /* Operation parameters specific for given test case */ 128 struct test_op_params { 129 struct rte_mempool *mp; 130 struct rte_bbdev_dec_op *ref_dec_op; 131 struct rte_bbdev_enc_op *ref_enc_op; 132 uint16_t burst_sz; 133 uint16_t num_to_process; 134 uint16_t num_lcores; 135 int vector_mask; 136 uint16_t sync; 137 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 138 }; 139 140 /* Contains per lcore params */ 141 struct thread_params { 142 uint8_t dev_id; 143 uint16_t queue_id; 144 uint32_t lcore_id; 145 uint64_t start_time; 146 double ops_per_sec; 147 double mbps; 148 uint8_t iter_count; 149 double iter_average; 150 double bler; 151 uint16_t nb_dequeued; 152 int16_t processing_status; 153 uint16_t burst_sz; 154 struct test_op_params *op_params; 155 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 156 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 157 }; 158 159 #ifdef RTE_BBDEV_OFFLOAD_COST 160 /* Stores time statistics */ 161 struct test_time_stats { 162 /* Stores software enqueue total working time */ 163 uint64_t enq_sw_total_time; 164 /* Stores minimum value of software enqueue working time */ 165 uint64_t enq_sw_min_time; 166 /* Stores maximum value of software enqueue working time */ 167 uint64_t enq_sw_max_time; 168 /* Stores turbo enqueue total working time */ 169 uint64_t enq_acc_total_time; 170 /* Stores minimum value of accelerator enqueue working time */ 171 uint64_t enq_acc_min_time; 172 /* Stores maximum value of accelerator enqueue working time */ 173 uint64_t enq_acc_max_time; 174 /* Stores dequeue total working time */ 175 uint64_t deq_total_time; 176 /* Stores minimum value of dequeue working time */ 177 uint64_t deq_min_time; 178 /* Stores maximum value of dequeue working time */ 179 uint64_t deq_max_time; 180 }; 181 #endif 182 183 typedef int (test_case_function)(struct active_device *ad, 184 struct test_op_params *op_params); 185 186 static inline void 187 mbuf_reset(struct rte_mbuf *m) 188 { 189 m->pkt_len = 0; 190 191 do { 192 m->data_len = 0; 193 m = m->next; 194 } while (m != NULL); 195 } 196 197 /* Read flag value 0/1 from bitmap */ 198 static inline bool 199 check_bit(uint32_t bitmap, uint32_t bitmask) 200 { 201 return bitmap & bitmask; 202 } 203 204 static inline void 205 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 206 { 207 ad->supported_ops |= (1 << op_type); 208 } 209 210 static inline bool 211 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 212 { 213 return ad->supported_ops & (1 << op_type); 214 } 215 216 static inline bool 217 flags_match(uint32_t flags_req, uint32_t flags_present) 218 { 219 return (flags_req & flags_present) == flags_req; 220 } 221 222 static void 223 clear_soft_out_cap(uint32_t *op_flags) 224 { 225 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 226 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 227 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 228 } 229 230 /* This API is to convert all the test vector op data entries 231 * to big endian format. It is used when the device supports 232 * the input in the big endian format. 233 */ 234 static inline void 235 convert_op_data_to_be(void) 236 { 237 struct op_data_entries *op; 238 enum op_data_type type; 239 uint8_t nb_segs, *rem_data, temp; 240 uint32_t *data, len; 241 int complete, rem, i, j; 242 243 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 244 nb_segs = test_vector.entries[type].nb_segments; 245 op = &test_vector.entries[type]; 246 247 /* Invert byte endianness for all the segments */ 248 for (i = 0; i < nb_segs; ++i) { 249 len = op->segments[i].length; 250 data = op->segments[i].addr; 251 252 /* Swap complete u32 bytes */ 253 complete = len / 4; 254 for (j = 0; j < complete; j++) 255 data[j] = rte_bswap32(data[j]); 256 257 /* Swap any remaining bytes */ 258 rem = len % 4; 259 rem_data = (uint8_t *)&data[j]; 260 for (j = 0; j < rem/2; j++) { 261 temp = rem_data[j]; 262 rem_data[j] = rem_data[rem - j - 1]; 263 rem_data[rem - j - 1] = temp; 264 } 265 } 266 } 267 } 268 269 static int 270 check_dev_cap(const struct rte_bbdev_info *dev_info) 271 { 272 unsigned int i; 273 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 274 nb_harq_inputs, nb_harq_outputs; 275 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 276 uint8_t dev_data_endianness = dev_info->drv.data_endianness; 277 278 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 279 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 280 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 281 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 282 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 283 284 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 285 if (op_cap->type != test_vector.op_type) 286 continue; 287 288 if (dev_data_endianness == RTE_BIG_ENDIAN) 289 convert_op_data_to_be(); 290 291 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 292 const struct rte_bbdev_op_cap_turbo_dec *cap = 293 &op_cap->cap.turbo_dec; 294 /* Ignore lack of soft output capability, just skip 295 * checking if soft output is valid. 296 */ 297 if ((test_vector.turbo_dec.op_flags & 298 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 299 !(cap->capability_flags & 300 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 301 printf( 302 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 303 dev_info->dev_name); 304 clear_soft_out_cap( 305 &test_vector.turbo_dec.op_flags); 306 } 307 308 if (!flags_match(test_vector.turbo_dec.op_flags, 309 cap->capability_flags)) 310 return TEST_FAILED; 311 if (nb_inputs > cap->num_buffers_src) { 312 printf("Too many inputs defined: %u, max: %u\n", 313 nb_inputs, cap->num_buffers_src); 314 return TEST_FAILED; 315 } 316 if (nb_soft_outputs > cap->num_buffers_soft_out && 317 (test_vector.turbo_dec.op_flags & 318 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 319 printf( 320 "Too many soft outputs defined: %u, max: %u\n", 321 nb_soft_outputs, 322 cap->num_buffers_soft_out); 323 return TEST_FAILED; 324 } 325 if (nb_hard_outputs > cap->num_buffers_hard_out) { 326 printf( 327 "Too many hard outputs defined: %u, max: %u\n", 328 nb_hard_outputs, 329 cap->num_buffers_hard_out); 330 return TEST_FAILED; 331 } 332 if (intr_enabled && !(cap->capability_flags & 333 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 334 printf( 335 "Dequeue interrupts are not supported!\n"); 336 return TEST_FAILED; 337 } 338 339 return TEST_SUCCESS; 340 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 341 const struct rte_bbdev_op_cap_turbo_enc *cap = 342 &op_cap->cap.turbo_enc; 343 344 if (!flags_match(test_vector.turbo_enc.op_flags, 345 cap->capability_flags)) 346 return TEST_FAILED; 347 if (nb_inputs > cap->num_buffers_src) { 348 printf("Too many inputs defined: %u, max: %u\n", 349 nb_inputs, cap->num_buffers_src); 350 return TEST_FAILED; 351 } 352 if (nb_hard_outputs > cap->num_buffers_dst) { 353 printf( 354 "Too many hard outputs defined: %u, max: %u\n", 355 nb_hard_outputs, cap->num_buffers_dst); 356 return TEST_FAILED; 357 } 358 if (intr_enabled && !(cap->capability_flags & 359 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 360 printf( 361 "Dequeue interrupts are not supported!\n"); 362 return TEST_FAILED; 363 } 364 365 return TEST_SUCCESS; 366 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 367 const struct rte_bbdev_op_cap_ldpc_enc *cap = 368 &op_cap->cap.ldpc_enc; 369 370 if (!flags_match(test_vector.ldpc_enc.op_flags, 371 cap->capability_flags)){ 372 printf("Flag Mismatch\n"); 373 return TEST_FAILED; 374 } 375 if (nb_inputs > cap->num_buffers_src) { 376 printf("Too many inputs defined: %u, max: %u\n", 377 nb_inputs, cap->num_buffers_src); 378 return TEST_FAILED; 379 } 380 if (nb_hard_outputs > cap->num_buffers_dst) { 381 printf( 382 "Too many hard outputs defined: %u, max: %u\n", 383 nb_hard_outputs, cap->num_buffers_dst); 384 return TEST_FAILED; 385 } 386 if (intr_enabled && !(cap->capability_flags & 387 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 388 printf( 389 "Dequeue interrupts are not supported!\n"); 390 return TEST_FAILED; 391 } 392 393 return TEST_SUCCESS; 394 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 395 const struct rte_bbdev_op_cap_ldpc_dec *cap = 396 &op_cap->cap.ldpc_dec; 397 398 if (!flags_match(test_vector.ldpc_dec.op_flags, 399 cap->capability_flags)){ 400 printf("Flag Mismatch\n"); 401 return TEST_FAILED; 402 } 403 if (nb_inputs > cap->num_buffers_src) { 404 printf("Too many inputs defined: %u, max: %u\n", 405 nb_inputs, cap->num_buffers_src); 406 return TEST_FAILED; 407 } 408 if (nb_hard_outputs > cap->num_buffers_hard_out) { 409 printf( 410 "Too many hard outputs defined: %u, max: %u\n", 411 nb_hard_outputs, 412 cap->num_buffers_hard_out); 413 return TEST_FAILED; 414 } 415 if (nb_harq_inputs > cap->num_buffers_hard_out) { 416 printf( 417 "Too many HARQ inputs defined: %u, max: %u\n", 418 nb_harq_inputs, 419 cap->num_buffers_hard_out); 420 return TEST_FAILED; 421 } 422 if (nb_harq_outputs > cap->num_buffers_hard_out) { 423 printf( 424 "Too many HARQ outputs defined: %u, max: %u\n", 425 nb_harq_outputs, 426 cap->num_buffers_hard_out); 427 return TEST_FAILED; 428 } 429 if (intr_enabled && !(cap->capability_flags & 430 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 431 printf( 432 "Dequeue interrupts are not supported!\n"); 433 return TEST_FAILED; 434 } 435 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 436 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 437 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 438 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 439 ))) { 440 printf("Skip loop-back with interrupt\n"); 441 return TEST_FAILED; 442 } 443 return TEST_SUCCESS; 444 } 445 } 446 447 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 448 return TEST_SUCCESS; /* Special case for NULL device */ 449 450 return TEST_FAILED; 451 } 452 453 /* calculates optimal mempool size not smaller than the val */ 454 static unsigned int 455 optimal_mempool_size(unsigned int val) 456 { 457 return rte_align32pow2(val + 1) - 1; 458 } 459 460 /* allocates mbuf mempool for inputs and outputs */ 461 static struct rte_mempool * 462 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 463 int socket_id, unsigned int mbuf_pool_size, 464 const char *op_type_str) 465 { 466 unsigned int i; 467 uint32_t max_seg_sz = 0; 468 char pool_name[RTE_MEMPOOL_NAMESIZE]; 469 470 /* find max input segment size */ 471 for (i = 0; i < entries->nb_segments; ++i) 472 if (entries->segments[i].length > max_seg_sz) 473 max_seg_sz = entries->segments[i].length; 474 475 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 476 dev_id); 477 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 478 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 479 + FILLER_HEADROOM, 480 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 481 } 482 483 static int 484 create_mempools(struct active_device *ad, int socket_id, 485 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 486 { 487 struct rte_mempool *mp; 488 unsigned int ops_pool_size, mbuf_pool_size = 0; 489 char pool_name[RTE_MEMPOOL_NAMESIZE]; 490 const char *op_type_str; 491 enum rte_bbdev_op_type op_type = org_op_type; 492 493 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 494 struct op_data_entries *hard_out = 495 &test_vector.entries[DATA_HARD_OUTPUT]; 496 struct op_data_entries *soft_out = 497 &test_vector.entries[DATA_SOFT_OUTPUT]; 498 struct op_data_entries *harq_in = 499 &test_vector.entries[DATA_HARQ_INPUT]; 500 struct op_data_entries *harq_out = 501 &test_vector.entries[DATA_HARQ_OUTPUT]; 502 503 /* allocate ops mempool */ 504 ops_pool_size = optimal_mempool_size(RTE_MAX( 505 /* Ops used plus 1 reference op */ 506 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 507 /* Minimal cache size plus 1 reference op */ 508 (unsigned int)(1.5 * rte_lcore_count() * 509 OPS_CACHE_SIZE + 1)), 510 OPS_POOL_SIZE_MIN)); 511 512 if (org_op_type == RTE_BBDEV_OP_NONE) 513 op_type = RTE_BBDEV_OP_TURBO_ENC; 514 515 op_type_str = rte_bbdev_op_type_str(op_type); 516 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 517 518 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 519 ad->dev_id); 520 mp = rte_bbdev_op_pool_create(pool_name, op_type, 521 ops_pool_size, OPS_CACHE_SIZE, socket_id); 522 TEST_ASSERT_NOT_NULL(mp, 523 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 524 ops_pool_size, 525 ad->dev_id, 526 socket_id); 527 ad->ops_mempool = mp; 528 529 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 530 if (org_op_type == RTE_BBDEV_OP_NONE) 531 return TEST_SUCCESS; 532 533 /* Inputs */ 534 if (in->nb_segments > 0) { 535 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 536 in->nb_segments); 537 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 538 mbuf_pool_size, "in"); 539 TEST_ASSERT_NOT_NULL(mp, 540 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 541 mbuf_pool_size, 542 ad->dev_id, 543 socket_id); 544 ad->in_mbuf_pool = mp; 545 } 546 547 /* Hard outputs */ 548 if (hard_out->nb_segments > 0) { 549 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 550 hard_out->nb_segments); 551 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 552 mbuf_pool_size, 553 "hard_out"); 554 TEST_ASSERT_NOT_NULL(mp, 555 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 556 mbuf_pool_size, 557 ad->dev_id, 558 socket_id); 559 ad->hard_out_mbuf_pool = mp; 560 } 561 562 /* Soft outputs */ 563 if (soft_out->nb_segments > 0) { 564 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 565 soft_out->nb_segments); 566 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 567 mbuf_pool_size, 568 "soft_out"); 569 TEST_ASSERT_NOT_NULL(mp, 570 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 571 mbuf_pool_size, 572 ad->dev_id, 573 socket_id); 574 ad->soft_out_mbuf_pool = mp; 575 } 576 577 /* HARQ inputs */ 578 if (harq_in->nb_segments > 0) { 579 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 580 harq_in->nb_segments); 581 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 582 mbuf_pool_size, 583 "harq_in"); 584 TEST_ASSERT_NOT_NULL(mp, 585 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 586 mbuf_pool_size, 587 ad->dev_id, 588 socket_id); 589 ad->harq_in_mbuf_pool = mp; 590 } 591 592 /* HARQ outputs */ 593 if (harq_out->nb_segments > 0) { 594 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 595 harq_out->nb_segments); 596 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 597 mbuf_pool_size, 598 "harq_out"); 599 TEST_ASSERT_NOT_NULL(mp, 600 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 601 mbuf_pool_size, 602 ad->dev_id, 603 socket_id); 604 ad->harq_out_mbuf_pool = mp; 605 } 606 607 return TEST_SUCCESS; 608 } 609 610 static int 611 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 612 struct test_bbdev_vector *vector) 613 { 614 int ret; 615 unsigned int queue_id; 616 struct rte_bbdev_queue_conf qconf; 617 struct active_device *ad = &active_devs[nb_active_devs]; 618 unsigned int nb_queues; 619 enum rte_bbdev_op_type op_type = vector->op_type; 620 621 /* Configure fpga lte fec with PF & VF values 622 * if '-i' flag is set and using fpga device 623 */ 624 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 625 if ((get_init_device() == true) && 626 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 627 struct rte_fpga_lte_fec_conf conf; 628 unsigned int i; 629 630 printf("Configure FPGA LTE FEC Driver %s with default values\n", 631 info->drv.driver_name); 632 633 /* clear default configuration before initialization */ 634 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 635 636 /* Set PF mode : 637 * true if PF is used for data plane 638 * false for VFs 639 */ 640 conf.pf_mode_en = true; 641 642 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 643 /* Number of UL queues per VF (fpga supports 8 VFs) */ 644 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 645 /* Number of DL queues per VF (fpga supports 8 VFs) */ 646 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 647 } 648 649 /* UL bandwidth. Needed for schedule algorithm */ 650 conf.ul_bandwidth = UL_4G_BANDWIDTH; 651 /* DL bandwidth */ 652 conf.dl_bandwidth = DL_4G_BANDWIDTH; 653 654 /* UL & DL load Balance Factor to 64 */ 655 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 656 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 657 658 /**< FLR timeout value */ 659 conf.flr_time_out = FLR_4G_TIMEOUT; 660 661 /* setup FPGA PF with configuration information */ 662 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf); 663 TEST_ASSERT_SUCCESS(ret, 664 "Failed to configure 4G FPGA PF for bbdev %s", 665 info->dev_name); 666 } 667 #endif 668 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 669 if ((get_init_device() == true) && 670 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) { 671 struct rte_fpga_5gnr_fec_conf conf; 672 unsigned int i; 673 674 printf("Configure FPGA 5GNR FEC Driver %s with default values\n", 675 info->drv.driver_name); 676 677 /* clear default configuration before initialization */ 678 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf)); 679 680 /* Set PF mode : 681 * true if PF is used for data plane 682 * false for VFs 683 */ 684 conf.pf_mode_en = true; 685 686 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { 687 /* Number of UL queues per VF (fpga supports 8 VFs) */ 688 conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE; 689 /* Number of DL queues per VF (fpga supports 8 VFs) */ 690 conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE; 691 } 692 693 /* UL bandwidth. Needed for schedule algorithm */ 694 conf.ul_bandwidth = UL_5G_BANDWIDTH; 695 /* DL bandwidth */ 696 conf.dl_bandwidth = DL_5G_BANDWIDTH; 697 698 /* UL & DL load Balance Factor to 64 */ 699 conf.ul_load_balance = UL_5G_LOAD_BALANCE; 700 conf.dl_load_balance = DL_5G_LOAD_BALANCE; 701 702 /* setup FPGA PF with configuration information */ 703 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf); 704 TEST_ASSERT_SUCCESS(ret, 705 "Failed to configure 5G FPGA PF for bbdev %s", 706 info->dev_name); 707 } 708 #endif 709 #ifdef RTE_BASEBAND_ACC100 710 if ((get_init_device() == true) && 711 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) { 712 struct rte_acc100_conf conf; 713 unsigned int i; 714 715 printf("Configure ACC100/ACC101 FEC Driver %s with default values\n", 716 info->drv.driver_name); 717 718 /* clear default configuration before initialization */ 719 memset(&conf, 0, sizeof(struct rte_acc100_conf)); 720 721 /* Always set in PF mode for built-in configuration */ 722 conf.pf_mode_en = true; 723 for (i = 0; i < RTE_ACC100_NUM_VFS; ++i) { 724 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 725 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 726 conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR; 727 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 728 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 729 conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR; 730 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 731 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 732 conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR; 733 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 734 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 735 conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR; 736 } 737 738 conf.input_pos_llr_1_bit = true; 739 conf.output_pos_llr_1_bit = true; 740 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 741 742 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 743 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 744 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 745 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 746 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 747 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 748 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 749 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 750 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 751 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 752 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 753 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 754 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 755 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 756 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 757 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 758 759 /* setup PF with configuration information */ 760 ret = rte_acc10x_configure(info->dev_name, &conf); 761 TEST_ASSERT_SUCCESS(ret, 762 "Failed to configure ACC100 PF for bbdev %s", 763 info->dev_name); 764 } 765 #endif 766 /* Let's refresh this now this is configured */ 767 rte_bbdev_info_get(dev_id, info); 768 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 769 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 770 771 /* setup device */ 772 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 773 if (ret < 0) { 774 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 775 dev_id, nb_queues, info->socket_id, ret); 776 return TEST_FAILED; 777 } 778 779 /* configure interrupts if needed */ 780 if (intr_enabled) { 781 ret = rte_bbdev_intr_enable(dev_id); 782 if (ret < 0) { 783 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 784 ret); 785 return TEST_FAILED; 786 } 787 } 788 789 /* setup device queues */ 790 qconf.socket = info->socket_id; 791 qconf.queue_size = info->drv.default_queue_conf.queue_size; 792 qconf.priority = 0; 793 qconf.deferred_start = 0; 794 qconf.op_type = op_type; 795 796 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 797 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 798 if (ret != 0) { 799 printf( 800 "Allocated all queues (id=%u) at prio%u on dev%u\n", 801 queue_id, qconf.priority, dev_id); 802 qconf.priority++; 803 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 804 &qconf); 805 } 806 if (ret != 0) { 807 printf("All queues on dev %u allocated: %u\n", 808 dev_id, queue_id); 809 break; 810 } 811 ad->queue_ids[queue_id] = queue_id; 812 } 813 TEST_ASSERT(queue_id != 0, 814 "ERROR Failed to configure any queues on dev %u", 815 dev_id); 816 ad->nb_queues = queue_id; 817 818 set_avail_op(ad, op_type); 819 820 return TEST_SUCCESS; 821 } 822 823 static int 824 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 825 struct test_bbdev_vector *vector) 826 { 827 int ret; 828 829 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 830 active_devs[nb_active_devs].dev_id = dev_id; 831 832 ret = add_bbdev_dev(dev_id, info, vector); 833 if (ret == TEST_SUCCESS) 834 ++nb_active_devs; 835 return ret; 836 } 837 838 static uint8_t 839 populate_active_devices(void) 840 { 841 int ret; 842 uint8_t dev_id; 843 uint8_t nb_devs_added = 0; 844 struct rte_bbdev_info info; 845 846 RTE_BBDEV_FOREACH(dev_id) { 847 rte_bbdev_info_get(dev_id, &info); 848 849 if (check_dev_cap(&info)) { 850 printf( 851 "Device %d (%s) does not support specified capabilities\n", 852 dev_id, info.dev_name); 853 continue; 854 } 855 856 ret = add_active_device(dev_id, &info, &test_vector); 857 if (ret != 0) { 858 printf("Adding active bbdev %s skipped\n", 859 info.dev_name); 860 continue; 861 } 862 nb_devs_added++; 863 } 864 865 return nb_devs_added; 866 } 867 868 static int 869 read_test_vector(void) 870 { 871 int ret; 872 873 memset(&test_vector, 0, sizeof(test_vector)); 874 printf("Test vector file = %s\n", get_vector_filename()); 875 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 876 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 877 get_vector_filename()); 878 879 return TEST_SUCCESS; 880 } 881 882 static int 883 testsuite_setup(void) 884 { 885 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 886 887 if (populate_active_devices() == 0) { 888 printf("No suitable devices found!\n"); 889 return TEST_SKIPPED; 890 } 891 892 return TEST_SUCCESS; 893 } 894 895 static int 896 interrupt_testsuite_setup(void) 897 { 898 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 899 900 /* Enable interrupts */ 901 intr_enabled = true; 902 903 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 904 if (populate_active_devices() == 0 || 905 test_vector.op_type == RTE_BBDEV_OP_NONE) { 906 intr_enabled = false; 907 printf("No suitable devices found!\n"); 908 return TEST_SKIPPED; 909 } 910 911 return TEST_SUCCESS; 912 } 913 914 static void 915 testsuite_teardown(void) 916 { 917 uint8_t dev_id; 918 919 /* Unconfigure devices */ 920 RTE_BBDEV_FOREACH(dev_id) 921 rte_bbdev_close(dev_id); 922 923 /* Clear active devices structs. */ 924 memset(active_devs, 0, sizeof(active_devs)); 925 nb_active_devs = 0; 926 927 /* Disable interrupts */ 928 intr_enabled = false; 929 } 930 931 static int 932 ut_setup(void) 933 { 934 uint8_t i, dev_id; 935 936 for (i = 0; i < nb_active_devs; i++) { 937 dev_id = active_devs[i].dev_id; 938 /* reset bbdev stats */ 939 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 940 "Failed to reset stats of bbdev %u", dev_id); 941 /* start the device */ 942 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 943 "Failed to start bbdev %u", dev_id); 944 } 945 946 return TEST_SUCCESS; 947 } 948 949 static void 950 ut_teardown(void) 951 { 952 uint8_t i, dev_id; 953 struct rte_bbdev_stats stats; 954 955 for (i = 0; i < nb_active_devs; i++) { 956 dev_id = active_devs[i].dev_id; 957 /* read stats and print */ 958 rte_bbdev_stats_get(dev_id, &stats); 959 /* Stop the device */ 960 rte_bbdev_stop(dev_id); 961 } 962 } 963 964 static int 965 init_op_data_objs(struct rte_bbdev_op_data *bufs, 966 struct op_data_entries *ref_entries, 967 struct rte_mempool *mbuf_pool, const uint16_t n, 968 enum op_data_type op_type, uint16_t min_alignment) 969 { 970 int ret; 971 unsigned int i, j; 972 bool large_input = false; 973 974 for (i = 0; i < n; ++i) { 975 char *data; 976 struct op_data_buf *seg = &ref_entries->segments[0]; 977 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 978 TEST_ASSERT_NOT_NULL(m_head, 979 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 980 op_type, n * ref_entries->nb_segments, 981 mbuf_pool->size); 982 983 if (seg->length > RTE_BBDEV_LDPC_E_MAX_MBUF) { 984 /* 985 * Special case when DPDK mbuf cannot handle 986 * the required input size 987 */ 988 printf("Warning: Larger input size than DPDK mbuf %d\n", 989 seg->length); 990 large_input = true; 991 } 992 bufs[i].data = m_head; 993 bufs[i].offset = 0; 994 bufs[i].length = 0; 995 996 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 997 if ((op_type == DATA_INPUT) && large_input) { 998 /* Allocate a fake overused mbuf */ 999 data = rte_malloc(NULL, seg->length, 0); 1000 TEST_ASSERT_NOT_NULL(data, 1001 "rte malloc failed with %u bytes", 1002 seg->length); 1003 memcpy(data, seg->addr, seg->length); 1004 m_head->buf_addr = data; 1005 m_head->buf_iova = rte_malloc_virt2iova(data); 1006 m_head->data_off = 0; 1007 m_head->data_len = seg->length; 1008 } else { 1009 data = rte_pktmbuf_append(m_head, seg->length); 1010 TEST_ASSERT_NOT_NULL(data, 1011 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1012 seg->length, op_type); 1013 1014 TEST_ASSERT(data == RTE_PTR_ALIGN( 1015 data, min_alignment), 1016 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1017 data, min_alignment); 1018 rte_memcpy(data, seg->addr, seg->length); 1019 } 1020 1021 bufs[i].length += seg->length; 1022 1023 for (j = 1; j < ref_entries->nb_segments; ++j) { 1024 struct rte_mbuf *m_tail = 1025 rte_pktmbuf_alloc(mbuf_pool); 1026 TEST_ASSERT_NOT_NULL(m_tail, 1027 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1028 op_type, 1029 n * ref_entries->nb_segments, 1030 mbuf_pool->size); 1031 seg += 1; 1032 1033 data = rte_pktmbuf_append(m_tail, seg->length); 1034 TEST_ASSERT_NOT_NULL(data, 1035 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1036 seg->length, op_type); 1037 1038 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 1039 min_alignment), 1040 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1041 data, min_alignment); 1042 rte_memcpy(data, seg->addr, seg->length); 1043 bufs[i].length += seg->length; 1044 1045 ret = rte_pktmbuf_chain(m_head, m_tail); 1046 TEST_ASSERT_SUCCESS(ret, 1047 "Couldn't chain mbufs from %d data type mbuf pool", 1048 op_type); 1049 } 1050 } else { 1051 1052 /* allocate chained-mbuf for output buffer */ 1053 for (j = 1; j < ref_entries->nb_segments; ++j) { 1054 struct rte_mbuf *m_tail = 1055 rte_pktmbuf_alloc(mbuf_pool); 1056 TEST_ASSERT_NOT_NULL(m_tail, 1057 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1058 op_type, 1059 n * ref_entries->nb_segments, 1060 mbuf_pool->size); 1061 1062 ret = rte_pktmbuf_chain(m_head, m_tail); 1063 TEST_ASSERT_SUCCESS(ret, 1064 "Couldn't chain mbufs from %d data type mbuf pool", 1065 op_type); 1066 } 1067 } 1068 } 1069 1070 return 0; 1071 } 1072 1073 static int 1074 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 1075 const int socket) 1076 { 1077 int i; 1078 1079 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 1080 if (*buffers == NULL) { 1081 printf("WARNING: Failed to allocate op_data on socket %d\n", 1082 socket); 1083 /* try to allocate memory on other detected sockets */ 1084 for (i = 0; i < socket; i++) { 1085 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 1086 if (*buffers != NULL) 1087 break; 1088 } 1089 } 1090 1091 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 1092 } 1093 1094 static void 1095 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 1096 const uint16_t n, const int8_t max_llr_modulus) 1097 { 1098 uint16_t i, byte_idx; 1099 1100 for (i = 0; i < n; ++i) { 1101 struct rte_mbuf *m = input_ops[i].data; 1102 while (m != NULL) { 1103 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1104 input_ops[i].offset); 1105 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1106 ++byte_idx) 1107 llr[byte_idx] = round((double)max_llr_modulus * 1108 llr[byte_idx] / INT8_MAX); 1109 1110 m = m->next; 1111 } 1112 } 1113 } 1114 1115 /* 1116 * We may have to insert filler bits 1117 * when they are required by the HARQ assumption 1118 */ 1119 static void 1120 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 1121 const uint16_t n, struct test_op_params *op_params) 1122 { 1123 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 1124 1125 if (input_ops == NULL) 1126 return; 1127 /* No need to add filler if not required by device */ 1128 if (!(ldpc_cap_flags & 1129 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 1130 return; 1131 /* No need to add filler for loopback operation */ 1132 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1133 return; 1134 1135 uint16_t i, j, parity_offset; 1136 for (i = 0; i < n; ++i) { 1137 struct rte_mbuf *m = input_ops[i].data; 1138 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1139 input_ops[i].offset); 1140 parity_offset = (dec.basegraph == 1 ? 20 : 8) 1141 * dec.z_c - dec.n_filler; 1142 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 1143 m->data_len = new_hin_size; 1144 input_ops[i].length = new_hin_size; 1145 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 1146 j--) 1147 llr[j] = llr[j - dec.n_filler]; 1148 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1149 for (j = 0; j < dec.n_filler; j++) 1150 llr[parity_offset + j] = llr_max_pre_scaling; 1151 } 1152 } 1153 1154 static void 1155 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 1156 const uint16_t n, const int8_t llr_size, 1157 const int8_t llr_decimals) 1158 { 1159 if (input_ops == NULL) 1160 return; 1161 1162 uint16_t i, byte_idx; 1163 1164 int16_t llr_max, llr_min, llr_tmp; 1165 llr_max = (1 << (llr_size - 1)) - 1; 1166 llr_min = -llr_max; 1167 for (i = 0; i < n; ++i) { 1168 struct rte_mbuf *m = input_ops[i].data; 1169 while (m != NULL) { 1170 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1171 input_ops[i].offset); 1172 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1173 ++byte_idx) { 1174 1175 llr_tmp = llr[byte_idx]; 1176 if (llr_decimals == 4) 1177 llr_tmp *= 8; 1178 else if (llr_decimals == 2) 1179 llr_tmp *= 2; 1180 else if (llr_decimals == 0) 1181 llr_tmp /= 2; 1182 llr_tmp = RTE_MIN(llr_max, 1183 RTE_MAX(llr_min, llr_tmp)); 1184 llr[byte_idx] = (int8_t) llr_tmp; 1185 } 1186 1187 m = m->next; 1188 } 1189 } 1190 } 1191 1192 1193 1194 static int 1195 fill_queue_buffers(struct test_op_params *op_params, 1196 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1197 struct rte_mempool *soft_out_mp, 1198 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1199 uint16_t queue_id, 1200 const struct rte_bbdev_op_cap *capabilities, 1201 uint16_t min_alignment, const int socket_id) 1202 { 1203 int ret; 1204 enum op_data_type type; 1205 const uint16_t n = op_params->num_to_process; 1206 1207 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1208 in_mp, 1209 soft_out_mp, 1210 hard_out_mp, 1211 harq_in_mp, 1212 harq_out_mp, 1213 }; 1214 1215 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1216 &op_params->q_bufs[socket_id][queue_id].inputs, 1217 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1218 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1219 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1220 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1221 }; 1222 1223 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1224 struct op_data_entries *ref_entries = 1225 &test_vector.entries[type]; 1226 if (ref_entries->nb_segments == 0) 1227 continue; 1228 1229 ret = allocate_buffers_on_socket(queue_ops[type], 1230 n * sizeof(struct rte_bbdev_op_data), 1231 socket_id); 1232 TEST_ASSERT_SUCCESS(ret, 1233 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1234 1235 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1236 mbuf_pools[type], n, type, min_alignment); 1237 TEST_ASSERT_SUCCESS(ret, 1238 "Couldn't init rte_bbdev_op_data structs"); 1239 } 1240 1241 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1242 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1243 capabilities->cap.turbo_dec.max_llr_modulus); 1244 1245 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1246 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1247 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1248 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1249 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1250 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1251 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1252 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1253 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1254 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1255 if (!loopback && !llr_comp) 1256 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1257 ldpc_llr_size, ldpc_llr_decimals); 1258 if (!loopback && !harq_comp) 1259 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1260 ldpc_llr_size, ldpc_llr_decimals); 1261 if (!loopback) 1262 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1263 op_params); 1264 } 1265 1266 return 0; 1267 } 1268 1269 static void 1270 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1271 { 1272 unsigned int i, j; 1273 1274 rte_mempool_free(ad->ops_mempool); 1275 rte_mempool_free(ad->in_mbuf_pool); 1276 rte_mempool_free(ad->hard_out_mbuf_pool); 1277 rte_mempool_free(ad->soft_out_mbuf_pool); 1278 rte_mempool_free(ad->harq_in_mbuf_pool); 1279 rte_mempool_free(ad->harq_out_mbuf_pool); 1280 1281 for (i = 0; i < rte_lcore_count(); ++i) { 1282 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1283 rte_free(op_params->q_bufs[j][i].inputs); 1284 rte_free(op_params->q_bufs[j][i].hard_outputs); 1285 rte_free(op_params->q_bufs[j][i].soft_outputs); 1286 rte_free(op_params->q_bufs[j][i].harq_inputs); 1287 rte_free(op_params->q_bufs[j][i].harq_outputs); 1288 } 1289 } 1290 } 1291 1292 static void 1293 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1294 unsigned int start_idx, 1295 struct rte_bbdev_op_data *inputs, 1296 struct rte_bbdev_op_data *hard_outputs, 1297 struct rte_bbdev_op_data *soft_outputs, 1298 struct rte_bbdev_dec_op *ref_op) 1299 { 1300 unsigned int i; 1301 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1302 1303 for (i = 0; i < n; ++i) { 1304 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1305 ops[i]->turbo_dec.tb_params.ea = 1306 turbo_dec->tb_params.ea; 1307 ops[i]->turbo_dec.tb_params.eb = 1308 turbo_dec->tb_params.eb; 1309 ops[i]->turbo_dec.tb_params.k_pos = 1310 turbo_dec->tb_params.k_pos; 1311 ops[i]->turbo_dec.tb_params.k_neg = 1312 turbo_dec->tb_params.k_neg; 1313 ops[i]->turbo_dec.tb_params.c = 1314 turbo_dec->tb_params.c; 1315 ops[i]->turbo_dec.tb_params.c_neg = 1316 turbo_dec->tb_params.c_neg; 1317 ops[i]->turbo_dec.tb_params.cab = 1318 turbo_dec->tb_params.cab; 1319 ops[i]->turbo_dec.tb_params.r = 1320 turbo_dec->tb_params.r; 1321 } else { 1322 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1323 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1324 } 1325 1326 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1327 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1328 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1329 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1330 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1331 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1332 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1333 1334 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1335 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1336 if (soft_outputs != NULL) 1337 ops[i]->turbo_dec.soft_output = 1338 soft_outputs[start_idx + i]; 1339 } 1340 } 1341 1342 static void 1343 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1344 unsigned int start_idx, 1345 struct rte_bbdev_op_data *inputs, 1346 struct rte_bbdev_op_data *outputs, 1347 struct rte_bbdev_enc_op *ref_op) 1348 { 1349 unsigned int i; 1350 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1351 for (i = 0; i < n; ++i) { 1352 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1353 ops[i]->turbo_enc.tb_params.ea = 1354 turbo_enc->tb_params.ea; 1355 ops[i]->turbo_enc.tb_params.eb = 1356 turbo_enc->tb_params.eb; 1357 ops[i]->turbo_enc.tb_params.k_pos = 1358 turbo_enc->tb_params.k_pos; 1359 ops[i]->turbo_enc.tb_params.k_neg = 1360 turbo_enc->tb_params.k_neg; 1361 ops[i]->turbo_enc.tb_params.c = 1362 turbo_enc->tb_params.c; 1363 ops[i]->turbo_enc.tb_params.c_neg = 1364 turbo_enc->tb_params.c_neg; 1365 ops[i]->turbo_enc.tb_params.cab = 1366 turbo_enc->tb_params.cab; 1367 ops[i]->turbo_enc.tb_params.ncb_pos = 1368 turbo_enc->tb_params.ncb_pos; 1369 ops[i]->turbo_enc.tb_params.ncb_neg = 1370 turbo_enc->tb_params.ncb_neg; 1371 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1372 } else { 1373 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1374 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1375 ops[i]->turbo_enc.cb_params.ncb = 1376 turbo_enc->cb_params.ncb; 1377 } 1378 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1379 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1380 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1381 1382 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1383 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1384 } 1385 } 1386 1387 1388 /* Returns a random number drawn from a normal distribution 1389 * with mean of 0 and variance of 1 1390 * Marsaglia algorithm 1391 */ 1392 static double 1393 randn(int n) 1394 { 1395 double S, Z, U1, U2, u, v, fac; 1396 1397 do { 1398 U1 = (double)rand() / RAND_MAX; 1399 U2 = (double)rand() / RAND_MAX; 1400 u = 2. * U1 - 1.; 1401 v = 2. * U2 - 1.; 1402 S = u * u + v * v; 1403 } while (S >= 1 || S == 0); 1404 fac = sqrt(-2. * log(S) / S); 1405 Z = (n % 2) ? u * fac : v * fac; 1406 return Z; 1407 } 1408 1409 static inline double 1410 maxstar(double A, double B) 1411 { 1412 if (fabs(A - B) > 5) 1413 return RTE_MAX(A, B); 1414 else 1415 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1416 } 1417 1418 /* 1419 * Generate Qm LLRS for Qm==8 1420 * Modulation, AWGN and LLR estimation from max log development 1421 */ 1422 static void 1423 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1424 { 1425 int qm = 8; 1426 int qam = 256; 1427 int m, k; 1428 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1429 /* 5.1.4 of TS38.211 */ 1430 const double symbols_I[256] = { 1431 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1432 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1433 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1434 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1435 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1436 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1437 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1438 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1439 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1440 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1441 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1442 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1443 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1444 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1445 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1446 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1447 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1448 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1449 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1450 -13, -13, -15, -15, -13, -13, -15, -15}; 1451 const double symbols_Q[256] = { 1452 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1453 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1454 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1455 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1456 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1457 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1458 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1459 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1460 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1461 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1462 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1463 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1464 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1465 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1466 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1467 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1468 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1469 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1470 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1471 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1472 /* Average constellation point energy */ 1473 N0 *= 170.0; 1474 for (k = 0; k < qm; k++) 1475 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1476 /* 5.1.4 of TS38.211 */ 1477 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1478 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1479 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1480 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1481 /* AWGN channel */ 1482 I += sqrt(N0 / 2) * randn(0); 1483 Q += sqrt(N0 / 2) * randn(1); 1484 /* 1485 * Calculate the log of the probability that each of 1486 * the constellation points was transmitted 1487 */ 1488 for (m = 0; m < qam; m++) 1489 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1490 + pow(Q - symbols_Q[m], 2.0)) / N0; 1491 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1492 for (k = 0; k < qm; k++) { 1493 p0 = -999999; 1494 p1 = -999999; 1495 /* For each constellation point */ 1496 for (m = 0; m < qam; m++) { 1497 if ((m >> (qm - k - 1)) & 1) 1498 p1 = maxstar(p1, log_syml_prob[m]); 1499 else 1500 p0 = maxstar(p0, log_syml_prob[m]); 1501 } 1502 /* Calculate the LLR */ 1503 llr_ = p0 - p1; 1504 llr_ *= (1 << ldpc_llr_decimals); 1505 llr_ = round(llr_); 1506 if (llr_ > llr_max) 1507 llr_ = llr_max; 1508 if (llr_ < -llr_max) 1509 llr_ = -llr_max; 1510 llrs[qm * i + k] = (int8_t) llr_; 1511 } 1512 } 1513 1514 1515 /* 1516 * Generate Qm LLRS for Qm==6 1517 * Modulation, AWGN and LLR estimation from max log development 1518 */ 1519 static void 1520 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1521 { 1522 int qm = 6; 1523 int qam = 64; 1524 int m, k; 1525 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1526 /* 5.1.4 of TS38.211 */ 1527 const double symbols_I[64] = { 1528 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1529 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1530 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1531 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1532 -5, -5, -7, -7, -5, -5, -7, -7}; 1533 const double symbols_Q[64] = { 1534 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1535 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1536 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1537 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1538 -3, -1, -3, -1, -5, -7, -5, -7}; 1539 /* Average constellation point energy */ 1540 N0 *= 42.0; 1541 for (k = 0; k < qm; k++) 1542 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1543 /* 5.1.4 of TS38.211 */ 1544 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1545 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1546 /* AWGN channel */ 1547 I += sqrt(N0 / 2) * randn(0); 1548 Q += sqrt(N0 / 2) * randn(1); 1549 /* 1550 * Calculate the log of the probability that each of 1551 * the constellation points was transmitted 1552 */ 1553 for (m = 0; m < qam; m++) 1554 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1555 + pow(Q - symbols_Q[m], 2.0)) / N0; 1556 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1557 for (k = 0; k < qm; k++) { 1558 p0 = -999999; 1559 p1 = -999999; 1560 /* For each constellation point */ 1561 for (m = 0; m < qam; m++) { 1562 if ((m >> (qm - k - 1)) & 1) 1563 p1 = maxstar(p1, log_syml_prob[m]); 1564 else 1565 p0 = maxstar(p0, log_syml_prob[m]); 1566 } 1567 /* Calculate the LLR */ 1568 llr_ = p0 - p1; 1569 llr_ *= (1 << ldpc_llr_decimals); 1570 llr_ = round(llr_); 1571 if (llr_ > llr_max) 1572 llr_ = llr_max; 1573 if (llr_ < -llr_max) 1574 llr_ = -llr_max; 1575 llrs[qm * i + k] = (int8_t) llr_; 1576 } 1577 } 1578 1579 /* 1580 * Generate Qm LLRS for Qm==4 1581 * Modulation, AWGN and LLR estimation from max log development 1582 */ 1583 static void 1584 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1585 { 1586 int qm = 4; 1587 int qam = 16; 1588 int m, k; 1589 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1590 /* 5.1.4 of TS38.211 */ 1591 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1592 -1, -1, -3, -3, -1, -1, -3, -3}; 1593 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1594 1, 3, 1, 3, -1, -3, -1, -3}; 1595 /* Average constellation point energy */ 1596 N0 *= 10.0; 1597 for (k = 0; k < qm; k++) 1598 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1599 /* 5.1.4 of TS38.211 */ 1600 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1601 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1602 /* AWGN channel */ 1603 I += sqrt(N0 / 2) * randn(0); 1604 Q += sqrt(N0 / 2) * randn(1); 1605 /* 1606 * Calculate the log of the probability that each of 1607 * the constellation points was transmitted 1608 */ 1609 for (m = 0; m < qam; m++) 1610 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1611 + pow(Q - symbols_Q[m], 2.0)) / N0; 1612 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1613 for (k = 0; k < qm; k++) { 1614 p0 = -999999; 1615 p1 = -999999; 1616 /* For each constellation point */ 1617 for (m = 0; m < qam; m++) { 1618 if ((m >> (qm - k - 1)) & 1) 1619 p1 = maxstar(p1, log_syml_prob[m]); 1620 else 1621 p0 = maxstar(p0, log_syml_prob[m]); 1622 } 1623 /* Calculate the LLR */ 1624 llr_ = p0 - p1; 1625 llr_ *= (1 << ldpc_llr_decimals); 1626 llr_ = round(llr_); 1627 if (llr_ > llr_max) 1628 llr_ = llr_max; 1629 if (llr_ < -llr_max) 1630 llr_ = -llr_max; 1631 llrs[qm * i + k] = (int8_t) llr_; 1632 } 1633 } 1634 1635 static void 1636 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1637 { 1638 double b, b1, n; 1639 double coeff = 2.0 * sqrt(N0); 1640 1641 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1642 if (llrs[j] < 8 && llrs[j] > -8) 1643 return; 1644 1645 /* Note don't change sign here */ 1646 n = randn(j % 2); 1647 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1648 + coeff * n) / N0; 1649 b = b1 * (1 << ldpc_llr_decimals); 1650 b = round(b); 1651 if (b > llr_max) 1652 b = llr_max; 1653 if (b < -llr_max) 1654 b = -llr_max; 1655 llrs[j] = (int8_t) b; 1656 } 1657 1658 /* Generate LLR for a given SNR */ 1659 static void 1660 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1661 struct rte_bbdev_dec_op *ref_op) 1662 { 1663 struct rte_mbuf *m; 1664 uint16_t qm; 1665 uint32_t i, j, e, range; 1666 double N0, llr_max; 1667 1668 e = ref_op->ldpc_dec.cb_params.e; 1669 qm = ref_op->ldpc_dec.q_m; 1670 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1671 range = e / qm; 1672 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1673 1674 for (i = 0; i < n; ++i) { 1675 m = inputs[i].data; 1676 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1677 if (qm == 8) { 1678 for (j = 0; j < range; ++j) 1679 gen_qm8_llr(llrs, j, N0, llr_max); 1680 } else if (qm == 6) { 1681 for (j = 0; j < range; ++j) 1682 gen_qm6_llr(llrs, j, N0, llr_max); 1683 } else if (qm == 4) { 1684 for (j = 0; j < range; ++j) 1685 gen_qm4_llr(llrs, j, N0, llr_max); 1686 } else { 1687 for (j = 0; j < e; ++j) 1688 gen_qm2_llr(llrs, j, N0, llr_max); 1689 } 1690 } 1691 } 1692 1693 static void 1694 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1695 unsigned int start_idx, 1696 struct rte_bbdev_op_data *inputs, 1697 struct rte_bbdev_op_data *hard_outputs, 1698 struct rte_bbdev_op_data *soft_outputs, 1699 struct rte_bbdev_op_data *harq_inputs, 1700 struct rte_bbdev_op_data *harq_outputs, 1701 struct rte_bbdev_dec_op *ref_op) 1702 { 1703 unsigned int i; 1704 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1705 1706 for (i = 0; i < n; ++i) { 1707 if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1708 ops[i]->ldpc_dec.tb_params.ea = 1709 ldpc_dec->tb_params.ea; 1710 ops[i]->ldpc_dec.tb_params.eb = 1711 ldpc_dec->tb_params.eb; 1712 ops[i]->ldpc_dec.tb_params.c = 1713 ldpc_dec->tb_params.c; 1714 ops[i]->ldpc_dec.tb_params.cab = 1715 ldpc_dec->tb_params.cab; 1716 ops[i]->ldpc_dec.tb_params.r = 1717 ldpc_dec->tb_params.r; 1718 } else { 1719 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1720 } 1721 1722 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1723 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1724 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1725 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1726 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1727 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1728 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1729 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1730 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1731 1732 if (hard_outputs != NULL) 1733 ops[i]->ldpc_dec.hard_output = 1734 hard_outputs[start_idx + i]; 1735 if (inputs != NULL) 1736 ops[i]->ldpc_dec.input = 1737 inputs[start_idx + i]; 1738 if (soft_outputs != NULL) 1739 ops[i]->ldpc_dec.soft_output = 1740 soft_outputs[start_idx + i]; 1741 if (harq_inputs != NULL) 1742 ops[i]->ldpc_dec.harq_combined_input = 1743 harq_inputs[start_idx + i]; 1744 if (harq_outputs != NULL) 1745 ops[i]->ldpc_dec.harq_combined_output = 1746 harq_outputs[start_idx + i]; 1747 } 1748 } 1749 1750 1751 static void 1752 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1753 unsigned int start_idx, 1754 struct rte_bbdev_op_data *inputs, 1755 struct rte_bbdev_op_data *outputs, 1756 struct rte_bbdev_enc_op *ref_op) 1757 { 1758 unsigned int i; 1759 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1760 for (i = 0; i < n; ++i) { 1761 if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1762 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1763 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1764 ops[i]->ldpc_enc.tb_params.cab = 1765 ldpc_enc->tb_params.cab; 1766 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1767 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1768 } else { 1769 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1770 } 1771 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1772 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1773 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1774 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1775 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1776 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1777 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1778 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1779 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1780 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1781 } 1782 } 1783 1784 static int 1785 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1786 unsigned int order_idx, const int expected_status) 1787 { 1788 int status = op->status; 1789 /* ignore parity mismatch false alarms for long iterations */ 1790 if (get_iter_max() >= 10) { 1791 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1792 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1793 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1794 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1795 } 1796 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1797 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1798 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1799 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1800 } 1801 } 1802 1803 TEST_ASSERT(status == expected_status, 1804 "op_status (%d) != expected_status (%d)", 1805 op->status, expected_status); 1806 1807 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1808 "Ordering error, expected %p, got %p", 1809 (void *)(uintptr_t)order_idx, op->opaque_data); 1810 1811 return TEST_SUCCESS; 1812 } 1813 1814 static int 1815 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1816 unsigned int order_idx, const int expected_status) 1817 { 1818 TEST_ASSERT(op->status == expected_status, 1819 "op_status (%d) != expected_status (%d)", 1820 op->status, expected_status); 1821 1822 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1823 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1824 "Ordering error, expected %p, got %p", 1825 (void *)(uintptr_t)order_idx, op->opaque_data); 1826 1827 return TEST_SUCCESS; 1828 } 1829 1830 static inline int 1831 validate_op_chain(struct rte_bbdev_op_data *op, 1832 struct op_data_entries *orig_op) 1833 { 1834 uint8_t i; 1835 struct rte_mbuf *m = op->data; 1836 uint8_t nb_dst_segments = orig_op->nb_segments; 1837 uint32_t total_data_size = 0; 1838 1839 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1840 "Number of segments differ in original (%u) and filled (%u) op", 1841 nb_dst_segments, m->nb_segs); 1842 1843 /* Validate each mbuf segment length */ 1844 for (i = 0; i < nb_dst_segments; ++i) { 1845 /* Apply offset to the first mbuf segment */ 1846 uint16_t offset = (i == 0) ? op->offset : 0; 1847 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1848 total_data_size += orig_op->segments[i].length; 1849 1850 TEST_ASSERT(orig_op->segments[i].length == data_len, 1851 "Length of segment differ in original (%u) and filled (%u) op", 1852 orig_op->segments[i].length, data_len); 1853 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 1854 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 1855 data_len, 1856 "Output buffers (CB=%u) are not equal", i); 1857 m = m->next; 1858 } 1859 1860 /* Validate total mbuf pkt length */ 1861 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 1862 TEST_ASSERT(total_data_size == pkt_len, 1863 "Length of data differ in original (%u) and filled (%u) op", 1864 total_data_size, pkt_len); 1865 1866 return TEST_SUCCESS; 1867 } 1868 1869 /* 1870 * Compute K0 for a given configuration for HARQ output length computation 1871 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 1872 */ 1873 static inline uint16_t 1874 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 1875 { 1876 if (rv_index == 0) 1877 return 0; 1878 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 1879 if (n_cb == n) { 1880 if (rv_index == 1) 1881 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 1882 else if (rv_index == 2) 1883 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 1884 else 1885 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 1886 } 1887 /* LBRM case - includes a division by N */ 1888 if (rv_index == 1) 1889 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 1890 / n) * z_c; 1891 else if (rv_index == 2) 1892 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 1893 / n) * z_c; 1894 else 1895 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 1896 / n) * z_c; 1897 } 1898 1899 /* HARQ output length including the Filler bits */ 1900 static inline uint16_t 1901 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 1902 { 1903 uint16_t k0 = 0; 1904 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 1905 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 1906 /* Compute RM out size and number of rows */ 1907 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1908 * ops_ld->z_c - ops_ld->n_filler; 1909 uint16_t deRmOutSize = RTE_MIN( 1910 k0 + ops_ld->cb_params.e + 1911 ((k0 > parity_offset) ? 1912 0 : ops_ld->n_filler), 1913 ops_ld->n_cb); 1914 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 1915 / ops_ld->z_c); 1916 uint16_t harq_output_len = numRows * ops_ld->z_c; 1917 return harq_output_len; 1918 } 1919 1920 static inline int 1921 validate_op_harq_chain(struct rte_bbdev_op_data *op, 1922 struct op_data_entries *orig_op, 1923 struct rte_bbdev_op_ldpc_dec *ops_ld) 1924 { 1925 uint8_t i; 1926 uint32_t j, jj, k; 1927 struct rte_mbuf *m = op->data; 1928 uint8_t nb_dst_segments = orig_op->nb_segments; 1929 uint32_t total_data_size = 0; 1930 int8_t *harq_orig, *harq_out, abs_harq_origin; 1931 uint32_t byte_error = 0, cum_error = 0, error; 1932 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 1933 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1934 uint16_t parity_offset; 1935 1936 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1937 "Number of segments differ in original (%u) and filled (%u) op", 1938 nb_dst_segments, m->nb_segs); 1939 1940 /* Validate each mbuf segment length */ 1941 for (i = 0; i < nb_dst_segments; ++i) { 1942 /* Apply offset to the first mbuf segment */ 1943 uint16_t offset = (i == 0) ? op->offset : 0; 1944 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1945 total_data_size += orig_op->segments[i].length; 1946 1947 TEST_ASSERT(orig_op->segments[i].length < 1948 (uint32_t)(data_len + 64), 1949 "Length of segment differ in original (%u) and filled (%u) op", 1950 orig_op->segments[i].length, data_len); 1951 harq_orig = (int8_t *) orig_op->segments[i].addr; 1952 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 1953 1954 if (!(ldpc_cap_flags & 1955 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 1956 ) || (ops_ld->op_flags & 1957 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1958 data_len -= ops_ld->z_c; 1959 parity_offset = data_len; 1960 } else { 1961 /* Compute RM out size and number of rows */ 1962 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 1963 * ops_ld->z_c - ops_ld->n_filler; 1964 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 1965 ops_ld->n_filler; 1966 if (data_len > deRmOutSize) 1967 data_len = deRmOutSize; 1968 if (data_len > orig_op->segments[i].length) 1969 data_len = orig_op->segments[i].length; 1970 } 1971 /* 1972 * HARQ output can have minor differences 1973 * due to integer representation and related scaling 1974 */ 1975 for (j = 0, jj = 0; j < data_len; j++, jj++) { 1976 if (j == parity_offset) { 1977 /* Special Handling of the filler bits */ 1978 for (k = 0; k < ops_ld->n_filler; k++) { 1979 if (harq_out[jj] != 1980 llr_max_pre_scaling) { 1981 printf("HARQ Filler issue %d: %d %d\n", 1982 jj, harq_out[jj], 1983 llr_max); 1984 byte_error++; 1985 } 1986 jj++; 1987 } 1988 } 1989 if (!(ops_ld->op_flags & 1990 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 1991 if (ldpc_llr_decimals > 1) 1992 harq_out[jj] = (harq_out[jj] + 1) 1993 >> (ldpc_llr_decimals - 1); 1994 /* Saturated to S7 */ 1995 if (harq_orig[j] > llr_max) 1996 harq_orig[j] = llr_max; 1997 if (harq_orig[j] < -llr_max) 1998 harq_orig[j] = -llr_max; 1999 } 2000 if (harq_orig[j] != harq_out[jj]) { 2001 error = (harq_orig[j] > harq_out[jj]) ? 2002 harq_orig[j] - harq_out[jj] : 2003 harq_out[jj] - harq_orig[j]; 2004 abs_harq_origin = harq_orig[j] > 0 ? 2005 harq_orig[j] : 2006 -harq_orig[j]; 2007 /* Residual quantization error */ 2008 if ((error > 8 && (abs_harq_origin < 2009 (llr_max - 16))) || 2010 (error > 16)) { 2011 printf("HARQ mismatch %d: exp %d act %d => %d\n", 2012 j, harq_orig[j], 2013 harq_out[jj], error); 2014 byte_error++; 2015 cum_error += error; 2016 } 2017 } 2018 } 2019 m = m->next; 2020 } 2021 2022 if (byte_error) 2023 TEST_ASSERT(byte_error <= 1, 2024 "HARQ output mismatch (%d) %d", 2025 byte_error, cum_error); 2026 2027 /* Validate total mbuf pkt length */ 2028 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 2029 TEST_ASSERT(total_data_size < pkt_len + 64, 2030 "Length of data differ in original (%u) and filled (%u) op", 2031 total_data_size, pkt_len); 2032 2033 return TEST_SUCCESS; 2034 } 2035 2036 static int 2037 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2038 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2039 { 2040 unsigned int i; 2041 int ret; 2042 struct op_data_entries *hard_data_orig = 2043 &test_vector.entries[DATA_HARD_OUTPUT]; 2044 struct op_data_entries *soft_data_orig = 2045 &test_vector.entries[DATA_SOFT_OUTPUT]; 2046 struct rte_bbdev_op_turbo_dec *ops_td; 2047 struct rte_bbdev_op_data *hard_output; 2048 struct rte_bbdev_op_data *soft_output; 2049 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 2050 2051 for (i = 0; i < n; ++i) { 2052 ops_td = &ops[i]->turbo_dec; 2053 hard_output = &ops_td->hard_output; 2054 soft_output = &ops_td->soft_output; 2055 2056 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2057 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2058 "Returned iter_count (%d) > expected iter_count (%d)", 2059 ops_td->iter_count, ref_td->iter_count); 2060 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2061 TEST_ASSERT_SUCCESS(ret, 2062 "Checking status and ordering for decoder failed"); 2063 2064 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2065 hard_data_orig), 2066 "Hard output buffers (CB=%u) are not equal", 2067 i); 2068 2069 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 2070 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2071 soft_data_orig), 2072 "Soft output buffers (CB=%u) are not equal", 2073 i); 2074 } 2075 2076 return TEST_SUCCESS; 2077 } 2078 2079 /* Check Number of code blocks errors */ 2080 static int 2081 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2082 { 2083 unsigned int i; 2084 struct op_data_entries *hard_data_orig = 2085 &test_vector.entries[DATA_HARD_OUTPUT]; 2086 struct rte_bbdev_op_ldpc_dec *ops_td; 2087 struct rte_bbdev_op_data *hard_output; 2088 int errors = 0; 2089 struct rte_mbuf *m; 2090 2091 for (i = 0; i < n; ++i) { 2092 ops_td = &ops[i]->ldpc_dec; 2093 hard_output = &ops_td->hard_output; 2094 m = hard_output->data; 2095 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2096 hard_data_orig->segments[0].addr, 2097 hard_data_orig->segments[0].length)) 2098 errors++; 2099 } 2100 return errors; 2101 } 2102 2103 static int 2104 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2105 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2106 { 2107 unsigned int i; 2108 int ret; 2109 struct op_data_entries *hard_data_orig = 2110 &test_vector.entries[DATA_HARD_OUTPUT]; 2111 struct op_data_entries *soft_data_orig = 2112 &test_vector.entries[DATA_SOFT_OUTPUT]; 2113 struct op_data_entries *harq_data_orig = 2114 &test_vector.entries[DATA_HARQ_OUTPUT]; 2115 struct rte_bbdev_op_ldpc_dec *ops_td; 2116 struct rte_bbdev_op_data *hard_output; 2117 struct rte_bbdev_op_data *harq_output; 2118 struct rte_bbdev_op_data *soft_output; 2119 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 2120 2121 for (i = 0; i < n; ++i) { 2122 ops_td = &ops[i]->ldpc_dec; 2123 hard_output = &ops_td->hard_output; 2124 harq_output = &ops_td->harq_combined_output; 2125 soft_output = &ops_td->soft_output; 2126 2127 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2128 TEST_ASSERT_SUCCESS(ret, 2129 "Checking status and ordering for decoder failed"); 2130 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2131 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2132 "Returned iter_count (%d) > expected iter_count (%d)", 2133 ops_td->iter_count, ref_td->iter_count); 2134 /* 2135 * We can ignore output data when the decoding failed to 2136 * converge or for loop-back cases 2137 */ 2138 if (!check_bit(ops[i]->ldpc_dec.op_flags, 2139 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 2140 ) && ( 2141 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 2142 )) == 0) 2143 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2144 hard_data_orig), 2145 "Hard output buffers (CB=%u) are not equal", 2146 i); 2147 2148 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 2149 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2150 soft_data_orig), 2151 "Soft output buffers (CB=%u) are not equal", 2152 i); 2153 if (ref_op->ldpc_dec.op_flags & 2154 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 2155 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2156 harq_data_orig, ops_td), 2157 "HARQ output buffers (CB=%u) are not equal", 2158 i); 2159 } 2160 if (ref_op->ldpc_dec.op_flags & 2161 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 2162 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2163 harq_data_orig, ops_td), 2164 "HARQ output buffers (CB=%u) are not equal", 2165 i); 2166 2167 } 2168 2169 return TEST_SUCCESS; 2170 } 2171 2172 2173 static int 2174 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2175 struct rte_bbdev_enc_op *ref_op) 2176 { 2177 unsigned int i; 2178 int ret; 2179 struct op_data_entries *hard_data_orig = 2180 &test_vector.entries[DATA_HARD_OUTPUT]; 2181 2182 for (i = 0; i < n; ++i) { 2183 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2184 TEST_ASSERT_SUCCESS(ret, 2185 "Checking status and ordering for encoder failed"); 2186 TEST_ASSERT_SUCCESS(validate_op_chain( 2187 &ops[i]->turbo_enc.output, 2188 hard_data_orig), 2189 "Output buffers (CB=%u) are not equal", 2190 i); 2191 } 2192 2193 return TEST_SUCCESS; 2194 } 2195 2196 static int 2197 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2198 struct rte_bbdev_enc_op *ref_op) 2199 { 2200 unsigned int i; 2201 int ret; 2202 struct op_data_entries *hard_data_orig = 2203 &test_vector.entries[DATA_HARD_OUTPUT]; 2204 2205 for (i = 0; i < n; ++i) { 2206 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2207 TEST_ASSERT_SUCCESS(ret, 2208 "Checking status and ordering for encoder failed"); 2209 TEST_ASSERT_SUCCESS(validate_op_chain( 2210 &ops[i]->ldpc_enc.output, 2211 hard_data_orig), 2212 "Output buffers (CB=%u) are not equal", 2213 i); 2214 } 2215 2216 return TEST_SUCCESS; 2217 } 2218 2219 static void 2220 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2221 { 2222 unsigned int i; 2223 struct op_data_entries *entry; 2224 2225 op->turbo_dec = test_vector.turbo_dec; 2226 entry = &test_vector.entries[DATA_INPUT]; 2227 for (i = 0; i < entry->nb_segments; ++i) 2228 op->turbo_dec.input.length += 2229 entry->segments[i].length; 2230 } 2231 2232 static void 2233 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2234 { 2235 unsigned int i; 2236 struct op_data_entries *entry; 2237 2238 op->ldpc_dec = test_vector.ldpc_dec; 2239 entry = &test_vector.entries[DATA_INPUT]; 2240 for (i = 0; i < entry->nb_segments; ++i) 2241 op->ldpc_dec.input.length += 2242 entry->segments[i].length; 2243 if (test_vector.ldpc_dec.op_flags & 2244 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2245 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2246 for (i = 0; i < entry->nb_segments; ++i) 2247 op->ldpc_dec.harq_combined_input.length += 2248 entry->segments[i].length; 2249 } 2250 } 2251 2252 2253 static void 2254 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2255 { 2256 unsigned int i; 2257 struct op_data_entries *entry; 2258 2259 op->turbo_enc = test_vector.turbo_enc; 2260 entry = &test_vector.entries[DATA_INPUT]; 2261 for (i = 0; i < entry->nb_segments; ++i) 2262 op->turbo_enc.input.length += 2263 entry->segments[i].length; 2264 } 2265 2266 static void 2267 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2268 { 2269 unsigned int i; 2270 struct op_data_entries *entry; 2271 2272 op->ldpc_enc = test_vector.ldpc_enc; 2273 entry = &test_vector.entries[DATA_INPUT]; 2274 for (i = 0; i < entry->nb_segments; ++i) 2275 op->ldpc_enc.input.length += 2276 entry->segments[i].length; 2277 } 2278 2279 static uint32_t 2280 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2281 { 2282 uint8_t i; 2283 uint32_t c, r, tb_size = 0; 2284 2285 if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2286 tb_size = op->turbo_dec.tb_params.k_neg; 2287 } else { 2288 c = op->turbo_dec.tb_params.c; 2289 r = op->turbo_dec.tb_params.r; 2290 for (i = 0; i < c-r; i++) 2291 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2292 op->turbo_dec.tb_params.k_neg : 2293 op->turbo_dec.tb_params.k_pos; 2294 } 2295 return tb_size; 2296 } 2297 2298 static uint32_t 2299 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2300 { 2301 uint8_t i; 2302 uint32_t c, r, tb_size = 0; 2303 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2304 2305 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2306 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 2307 } else { 2308 c = op->ldpc_dec.tb_params.c; 2309 r = op->ldpc_dec.tb_params.r; 2310 for (i = 0; i < c-r; i++) 2311 tb_size += sys_cols * op->ldpc_dec.z_c 2312 - op->ldpc_dec.n_filler; 2313 } 2314 return tb_size; 2315 } 2316 2317 static uint32_t 2318 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2319 { 2320 uint8_t i; 2321 uint32_t c, r, tb_size = 0; 2322 2323 if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2324 tb_size = op->turbo_enc.tb_params.k_neg; 2325 } else { 2326 c = op->turbo_enc.tb_params.c; 2327 r = op->turbo_enc.tb_params.r; 2328 for (i = 0; i < c-r; i++) 2329 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2330 op->turbo_enc.tb_params.k_neg : 2331 op->turbo_enc.tb_params.k_pos; 2332 } 2333 return tb_size; 2334 } 2335 2336 static uint32_t 2337 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2338 { 2339 uint8_t i; 2340 uint32_t c, r, tb_size = 0; 2341 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2342 2343 if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2344 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 2345 } else { 2346 c = op->turbo_enc.tb_params.c; 2347 r = op->turbo_enc.tb_params.r; 2348 for (i = 0; i < c-r; i++) 2349 tb_size += sys_cols * op->ldpc_enc.z_c 2350 - op->ldpc_enc.n_filler; 2351 } 2352 return tb_size; 2353 } 2354 2355 2356 static int 2357 init_test_op_params(struct test_op_params *op_params, 2358 enum rte_bbdev_op_type op_type, const int expected_status, 2359 const int vector_mask, struct rte_mempool *ops_mp, 2360 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2361 { 2362 int ret = 0; 2363 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2364 op_type == RTE_BBDEV_OP_LDPC_DEC) 2365 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2366 &op_params->ref_dec_op, 1); 2367 else 2368 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2369 &op_params->ref_enc_op, 1); 2370 2371 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2372 2373 op_params->mp = ops_mp; 2374 op_params->burst_sz = burst_sz; 2375 op_params->num_to_process = num_to_process; 2376 op_params->num_lcores = num_lcores; 2377 op_params->vector_mask = vector_mask; 2378 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2379 op_type == RTE_BBDEV_OP_LDPC_DEC) 2380 op_params->ref_dec_op->status = expected_status; 2381 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2382 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2383 op_params->ref_enc_op->status = expected_status; 2384 return 0; 2385 } 2386 2387 static int 2388 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2389 struct test_op_params *op_params) 2390 { 2391 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2392 unsigned int i; 2393 struct active_device *ad; 2394 unsigned int burst_sz = get_burst_sz(); 2395 enum rte_bbdev_op_type op_type = test_vector.op_type; 2396 const struct rte_bbdev_op_cap *capabilities = NULL; 2397 2398 ad = &active_devs[dev_id]; 2399 2400 /* Check if device supports op_type */ 2401 if (!is_avail_op(ad, test_vector.op_type)) 2402 return TEST_SUCCESS; 2403 2404 struct rte_bbdev_info info; 2405 rte_bbdev_info_get(ad->dev_id, &info); 2406 socket_id = GET_SOCKET(info.socket_id); 2407 2408 f_ret = create_mempools(ad, socket_id, op_type, 2409 get_num_ops()); 2410 if (f_ret != TEST_SUCCESS) { 2411 printf("Couldn't create mempools"); 2412 goto fail; 2413 } 2414 if (op_type == RTE_BBDEV_OP_NONE) 2415 op_type = RTE_BBDEV_OP_TURBO_ENC; 2416 2417 f_ret = init_test_op_params(op_params, test_vector.op_type, 2418 test_vector.expected_status, 2419 test_vector.mask, 2420 ad->ops_mempool, 2421 burst_sz, 2422 get_num_ops(), 2423 get_num_lcores()); 2424 if (f_ret != TEST_SUCCESS) { 2425 printf("Couldn't init test op params"); 2426 goto fail; 2427 } 2428 2429 2430 /* Find capabilities */ 2431 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2432 for (i = 0; i < RTE_BBDEV_OP_TYPE_COUNT; i++) { 2433 if (cap->type == test_vector.op_type) { 2434 capabilities = cap; 2435 break; 2436 } 2437 cap++; 2438 } 2439 TEST_ASSERT_NOT_NULL(capabilities, 2440 "Couldn't find capabilities"); 2441 2442 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2443 create_reference_dec_op(op_params->ref_dec_op); 2444 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2445 create_reference_enc_op(op_params->ref_enc_op); 2446 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2447 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2448 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2449 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2450 2451 for (i = 0; i < ad->nb_queues; ++i) { 2452 f_ret = fill_queue_buffers(op_params, 2453 ad->in_mbuf_pool, 2454 ad->hard_out_mbuf_pool, 2455 ad->soft_out_mbuf_pool, 2456 ad->harq_in_mbuf_pool, 2457 ad->harq_out_mbuf_pool, 2458 ad->queue_ids[i], 2459 capabilities, 2460 info.drv.min_alignment, 2461 socket_id); 2462 if (f_ret != TEST_SUCCESS) { 2463 printf("Couldn't init queue buffers"); 2464 goto fail; 2465 } 2466 } 2467 2468 /* Run test case function */ 2469 t_ret = test_case_func(ad, op_params); 2470 2471 /* Free active device resources and return */ 2472 free_buffers(ad, op_params); 2473 return t_ret; 2474 2475 fail: 2476 free_buffers(ad, op_params); 2477 return TEST_FAILED; 2478 } 2479 2480 /* Run given test function per active device per supported op type 2481 * per burst size. 2482 */ 2483 static int 2484 run_test_case(test_case_function *test_case_func) 2485 { 2486 int ret = 0; 2487 uint8_t dev; 2488 2489 /* Alloc op_params */ 2490 struct test_op_params *op_params = rte_zmalloc(NULL, 2491 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2492 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2493 RTE_ALIGN(sizeof(struct test_op_params), 2494 RTE_CACHE_LINE_SIZE)); 2495 2496 /* For each device run test case function */ 2497 for (dev = 0; dev < nb_active_devs; ++dev) 2498 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2499 2500 rte_free(op_params); 2501 2502 return ret; 2503 } 2504 2505 2506 /* Push back the HARQ output from DDR to host */ 2507 static void 2508 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2509 struct rte_bbdev_dec_op **ops, 2510 const uint16_t n) 2511 { 2512 uint16_t j; 2513 int save_status, ret; 2514 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2515 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2516 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2517 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2518 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2519 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2520 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2521 for (j = 0; j < n; ++j) { 2522 if ((loopback && mem_out) || hc_out) { 2523 save_status = ops[j]->status; 2524 ops[j]->ldpc_dec.op_flags = 2525 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2526 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2527 if (h_comp) 2528 ops[j]->ldpc_dec.op_flags += 2529 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2530 ops[j]->ldpc_dec.harq_combined_input.offset = 2531 harq_offset; 2532 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2533 harq_offset += HARQ_INCR; 2534 if (!loopback) 2535 ops[j]->ldpc_dec.harq_combined_input.length = 2536 ops[j]->ldpc_dec.harq_combined_output.length; 2537 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2538 &ops[j], 1); 2539 ret = 0; 2540 while (ret == 0) 2541 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2542 dev_id, queue_id, 2543 &ops_deq[j], 1); 2544 ops[j]->ldpc_dec.op_flags = flags; 2545 ops[j]->status = save_status; 2546 } 2547 } 2548 } 2549 2550 /* 2551 * Push back the HARQ output from HW DDR to Host 2552 * Preload HARQ memory input and adjust HARQ offset 2553 */ 2554 static void 2555 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2556 struct rte_bbdev_dec_op **ops, const uint16_t n, 2557 bool preload) 2558 { 2559 uint16_t j; 2560 int deq; 2561 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2562 struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; 2563 struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; 2564 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2565 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2566 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2567 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2568 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2569 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2570 if ((mem_in || hc_in) && preload) { 2571 for (j = 0; j < n; ++j) { 2572 save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; 2573 save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; 2574 ops[j]->ldpc_dec.op_flags = 2575 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2576 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2577 if (h_comp) 2578 ops[j]->ldpc_dec.op_flags += 2579 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2580 ops[j]->ldpc_dec.harq_combined_output.offset = 2581 harq_offset; 2582 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2583 harq_offset += HARQ_INCR; 2584 } 2585 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); 2586 deq = 0; 2587 while (deq != n) 2588 deq += rte_bbdev_dequeue_ldpc_dec_ops( 2589 dev_id, queue_id, &ops_deq[deq], 2590 n - deq); 2591 /* Restore the operations */ 2592 for (j = 0; j < n; ++j) { 2593 ops[j]->ldpc_dec.op_flags = flags; 2594 ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; 2595 ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; 2596 } 2597 } 2598 harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2599 for (j = 0; j < n; ++j) { 2600 /* Adjust HARQ offset when we reach external DDR */ 2601 if (mem_in || hc_in) 2602 ops[j]->ldpc_dec.harq_combined_input.offset 2603 = harq_offset; 2604 if (mem_out || hc_out) 2605 ops[j]->ldpc_dec.harq_combined_output.offset 2606 = harq_offset; 2607 harq_offset += HARQ_INCR; 2608 } 2609 } 2610 2611 static void 2612 dequeue_event_callback(uint16_t dev_id, 2613 enum rte_bbdev_event_type event, void *cb_arg, 2614 void *ret_param) 2615 { 2616 int ret; 2617 uint16_t i; 2618 uint64_t total_time; 2619 uint16_t deq, burst_sz, num_ops; 2620 uint16_t queue_id = *(uint16_t *) ret_param; 2621 struct rte_bbdev_info info; 2622 double tb_len_bits; 2623 struct thread_params *tp = cb_arg; 2624 2625 /* Find matching thread params using queue_id */ 2626 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2627 if (tp->queue_id == queue_id) 2628 break; 2629 2630 if (i == MAX_QUEUES) { 2631 printf("%s: Queue_id from interrupt details was not found!\n", 2632 __func__); 2633 return; 2634 } 2635 2636 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2637 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2638 printf( 2639 "Dequeue interrupt handler called for incorrect event!\n"); 2640 return; 2641 } 2642 2643 burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); 2644 num_ops = tp->op_params->num_to_process; 2645 2646 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2647 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2648 &tp->dec_ops[ 2649 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2650 burst_sz); 2651 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2652 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2653 &tp->dec_ops[ 2654 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2655 burst_sz); 2656 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2657 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2658 &tp->enc_ops[ 2659 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2660 burst_sz); 2661 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2662 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2663 &tp->enc_ops[ 2664 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2665 burst_sz); 2666 2667 if (deq < burst_sz) { 2668 printf( 2669 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2670 burst_sz, deq); 2671 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2672 return; 2673 } 2674 2675 if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) { 2676 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 2677 return; 2678 } 2679 2680 total_time = rte_rdtsc_precise() - tp->start_time; 2681 2682 rte_bbdev_info_get(dev_id, &info); 2683 2684 ret = TEST_SUCCESS; 2685 2686 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2687 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2688 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2689 tp->op_params->vector_mask); 2690 /* get the max of iter_count for all dequeued ops */ 2691 for (i = 0; i < num_ops; ++i) 2692 tp->iter_count = RTE_MAX( 2693 tp->dec_ops[i]->turbo_dec.iter_count, 2694 tp->iter_count); 2695 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2696 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2697 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2698 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2699 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2700 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 2701 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2702 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 2703 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 2704 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 2705 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2706 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 2707 tp->op_params->vector_mask); 2708 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2709 } 2710 2711 if (ret) { 2712 printf("Buffers validation failed\n"); 2713 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2714 } 2715 2716 switch (test_vector.op_type) { 2717 case RTE_BBDEV_OP_TURBO_DEC: 2718 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 2719 break; 2720 case RTE_BBDEV_OP_TURBO_ENC: 2721 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 2722 break; 2723 case RTE_BBDEV_OP_LDPC_DEC: 2724 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 2725 break; 2726 case RTE_BBDEV_OP_LDPC_ENC: 2727 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 2728 break; 2729 case RTE_BBDEV_OP_NONE: 2730 tb_len_bits = 0.0; 2731 break; 2732 default: 2733 printf("Unknown op type: %d\n", test_vector.op_type); 2734 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2735 return; 2736 } 2737 2738 tp->ops_per_sec += ((double)num_ops) / 2739 ((double)total_time / (double)rte_get_tsc_hz()); 2740 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 2741 ((double)total_time / (double)rte_get_tsc_hz()); 2742 2743 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 2744 } 2745 2746 static int 2747 throughput_intr_lcore_ldpc_dec(void *arg) 2748 { 2749 struct thread_params *tp = arg; 2750 unsigned int enqueued; 2751 const uint16_t queue_id = tp->queue_id; 2752 const uint16_t burst_sz = tp->op_params->burst_sz; 2753 const uint16_t num_to_process = tp->op_params->num_to_process; 2754 struct rte_bbdev_dec_op *ops[num_to_process]; 2755 struct test_buffers *bufs = NULL; 2756 struct rte_bbdev_info info; 2757 int ret, i, j; 2758 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2759 uint16_t num_to_enq, enq; 2760 2761 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 2762 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 2763 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 2764 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 2765 2766 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2767 "BURST_SIZE should be <= %u", MAX_BURST); 2768 2769 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2770 "Failed to enable interrupts for dev: %u, queue_id: %u", 2771 tp->dev_id, queue_id); 2772 2773 rte_bbdev_info_get(tp->dev_id, &info); 2774 2775 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2776 "NUM_OPS cannot exceed %u for this device", 2777 info.drv.queue_size_lim); 2778 2779 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2780 2781 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 2782 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 2783 2784 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 2785 2786 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2787 num_to_process); 2788 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2789 num_to_process); 2790 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2791 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 2792 bufs->hard_outputs, bufs->soft_outputs, 2793 bufs->harq_inputs, bufs->harq_outputs, ref_op); 2794 2795 /* Set counter to validate the ordering */ 2796 for (j = 0; j < num_to_process; ++j) 2797 ops[j]->opaque_data = (void *)(uintptr_t)j; 2798 2799 for (j = 0; j < TEST_REPETITIONS; ++j) { 2800 for (i = 0; i < num_to_process; ++i) { 2801 if (!loopback) 2802 rte_pktmbuf_reset( 2803 ops[i]->ldpc_dec.hard_output.data); 2804 if (hc_out || loopback) 2805 mbuf_reset( 2806 ops[i]->ldpc_dec.harq_combined_output.data); 2807 } 2808 2809 tp->start_time = rte_rdtsc_precise(); 2810 for (enqueued = 0; enqueued < num_to_process;) { 2811 num_to_enq = burst_sz; 2812 2813 if (unlikely(num_to_process - enqueued < num_to_enq)) 2814 num_to_enq = num_to_process - enqueued; 2815 2816 enq = 0; 2817 do { 2818 enq += rte_bbdev_enqueue_ldpc_dec_ops( 2819 tp->dev_id, 2820 queue_id, &ops[enqueued], 2821 num_to_enq); 2822 } while (unlikely(num_to_enq != enq)); 2823 enqueued += enq; 2824 2825 /* Write to thread burst_sz current number of enqueued 2826 * descriptors. It ensures that proper number of 2827 * descriptors will be dequeued in callback 2828 * function - needed for last batch in case where 2829 * the number of operations is not a multiple of 2830 * burst size. 2831 */ 2832 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 2833 2834 /* Wait until processing of previous batch is 2835 * completed 2836 */ 2837 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 2838 } 2839 if (j != TEST_REPETITIONS - 1) 2840 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 2841 } 2842 2843 return TEST_SUCCESS; 2844 } 2845 2846 static int 2847 throughput_intr_lcore_dec(void *arg) 2848 { 2849 struct thread_params *tp = arg; 2850 unsigned int enqueued; 2851 const uint16_t queue_id = tp->queue_id; 2852 const uint16_t burst_sz = tp->op_params->burst_sz; 2853 const uint16_t num_to_process = tp->op_params->num_to_process; 2854 struct rte_bbdev_dec_op *ops[num_to_process]; 2855 struct test_buffers *bufs = NULL; 2856 struct rte_bbdev_info info; 2857 int ret, i, j; 2858 uint16_t num_to_enq, enq; 2859 2860 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2861 "BURST_SIZE should be <= %u", MAX_BURST); 2862 2863 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2864 "Failed to enable interrupts for dev: %u, queue_id: %u", 2865 tp->dev_id, queue_id); 2866 2867 rte_bbdev_info_get(tp->dev_id, &info); 2868 2869 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2870 "NUM_OPS cannot exceed %u for this device", 2871 info.drv.queue_size_lim); 2872 2873 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2874 2875 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 2876 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 2877 2878 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 2879 2880 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 2881 num_to_process); 2882 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2883 num_to_process); 2884 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2885 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 2886 bufs->hard_outputs, bufs->soft_outputs, 2887 tp->op_params->ref_dec_op); 2888 2889 /* Set counter to validate the ordering */ 2890 for (j = 0; j < num_to_process; ++j) 2891 ops[j]->opaque_data = (void *)(uintptr_t)j; 2892 2893 for (j = 0; j < TEST_REPETITIONS; ++j) { 2894 for (i = 0; i < num_to_process; ++i) 2895 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 2896 2897 tp->start_time = rte_rdtsc_precise(); 2898 for (enqueued = 0; enqueued < num_to_process;) { 2899 num_to_enq = burst_sz; 2900 2901 if (unlikely(num_to_process - enqueued < num_to_enq)) 2902 num_to_enq = num_to_process - enqueued; 2903 2904 enq = 0; 2905 do { 2906 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 2907 queue_id, &ops[enqueued], 2908 num_to_enq); 2909 } while (unlikely(num_to_enq != enq)); 2910 enqueued += enq; 2911 2912 /* Write to thread burst_sz current number of enqueued 2913 * descriptors. It ensures that proper number of 2914 * descriptors will be dequeued in callback 2915 * function - needed for last batch in case where 2916 * the number of operations is not a multiple of 2917 * burst size. 2918 */ 2919 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 2920 2921 /* Wait until processing of previous batch is 2922 * completed 2923 */ 2924 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 2925 } 2926 if (j != TEST_REPETITIONS - 1) 2927 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 2928 } 2929 2930 return TEST_SUCCESS; 2931 } 2932 2933 static int 2934 throughput_intr_lcore_enc(void *arg) 2935 { 2936 struct thread_params *tp = arg; 2937 unsigned int enqueued; 2938 const uint16_t queue_id = tp->queue_id; 2939 const uint16_t burst_sz = tp->op_params->burst_sz; 2940 const uint16_t num_to_process = tp->op_params->num_to_process; 2941 struct rte_bbdev_enc_op *ops[num_to_process]; 2942 struct test_buffers *bufs = NULL; 2943 struct rte_bbdev_info info; 2944 int ret, i, j; 2945 uint16_t num_to_enq, enq; 2946 2947 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 2948 "BURST_SIZE should be <= %u", MAX_BURST); 2949 2950 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 2951 "Failed to enable interrupts for dev: %u, queue_id: %u", 2952 tp->dev_id, queue_id); 2953 2954 rte_bbdev_info_get(tp->dev_id, &info); 2955 2956 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 2957 "NUM_OPS cannot exceed %u for this device", 2958 info.drv.queue_size_lim); 2959 2960 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 2961 2962 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 2963 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 2964 2965 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 2966 2967 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 2968 num_to_process); 2969 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 2970 num_to_process); 2971 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 2972 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 2973 bufs->hard_outputs, tp->op_params->ref_enc_op); 2974 2975 /* Set counter to validate the ordering */ 2976 for (j = 0; j < num_to_process; ++j) 2977 ops[j]->opaque_data = (void *)(uintptr_t)j; 2978 2979 for (j = 0; j < TEST_REPETITIONS; ++j) { 2980 for (i = 0; i < num_to_process; ++i) 2981 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 2982 2983 tp->start_time = rte_rdtsc_precise(); 2984 for (enqueued = 0; enqueued < num_to_process;) { 2985 num_to_enq = burst_sz; 2986 2987 if (unlikely(num_to_process - enqueued < num_to_enq)) 2988 num_to_enq = num_to_process - enqueued; 2989 2990 enq = 0; 2991 do { 2992 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 2993 queue_id, &ops[enqueued], 2994 num_to_enq); 2995 } while (unlikely(enq != num_to_enq)); 2996 enqueued += enq; 2997 2998 /* Write to thread burst_sz current number of enqueued 2999 * descriptors. It ensures that proper number of 3000 * descriptors will be dequeued in callback 3001 * function - needed for last batch in case where 3002 * the number of operations is not a multiple of 3003 * burst size. 3004 */ 3005 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3006 3007 /* Wait until processing of previous batch is 3008 * completed 3009 */ 3010 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3011 } 3012 if (j != TEST_REPETITIONS - 1) 3013 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3014 } 3015 3016 return TEST_SUCCESS; 3017 } 3018 3019 3020 static int 3021 throughput_intr_lcore_ldpc_enc(void *arg) 3022 { 3023 struct thread_params *tp = arg; 3024 unsigned int enqueued; 3025 const uint16_t queue_id = tp->queue_id; 3026 const uint16_t burst_sz = tp->op_params->burst_sz; 3027 const uint16_t num_to_process = tp->op_params->num_to_process; 3028 struct rte_bbdev_enc_op *ops[num_to_process]; 3029 struct test_buffers *bufs = NULL; 3030 struct rte_bbdev_info info; 3031 int ret, i, j; 3032 uint16_t num_to_enq, enq; 3033 3034 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3035 "BURST_SIZE should be <= %u", MAX_BURST); 3036 3037 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3038 "Failed to enable interrupts for dev: %u, queue_id: %u", 3039 tp->dev_id, queue_id); 3040 3041 rte_bbdev_info_get(tp->dev_id, &info); 3042 3043 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3044 "NUM_OPS cannot exceed %u for this device", 3045 info.drv.queue_size_lim); 3046 3047 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3048 3049 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3050 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3051 3052 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3053 3054 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3055 num_to_process); 3056 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3057 num_to_process); 3058 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3059 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 3060 bufs->inputs, bufs->hard_outputs, 3061 tp->op_params->ref_enc_op); 3062 3063 /* Set counter to validate the ordering */ 3064 for (j = 0; j < num_to_process; ++j) 3065 ops[j]->opaque_data = (void *)(uintptr_t)j; 3066 3067 for (j = 0; j < TEST_REPETITIONS; ++j) { 3068 for (i = 0; i < num_to_process; ++i) 3069 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 3070 3071 tp->start_time = rte_rdtsc_precise(); 3072 for (enqueued = 0; enqueued < num_to_process;) { 3073 num_to_enq = burst_sz; 3074 3075 if (unlikely(num_to_process - enqueued < num_to_enq)) 3076 num_to_enq = num_to_process - enqueued; 3077 3078 enq = 0; 3079 do { 3080 enq += rte_bbdev_enqueue_ldpc_enc_ops( 3081 tp->dev_id, 3082 queue_id, &ops[enqueued], 3083 num_to_enq); 3084 } while (unlikely(enq != num_to_enq)); 3085 enqueued += enq; 3086 3087 /* Write to thread burst_sz current number of enqueued 3088 * descriptors. It ensures that proper number of 3089 * descriptors will be dequeued in callback 3090 * function - needed for last batch in case where 3091 * the number of operations is not a multiple of 3092 * burst size. 3093 */ 3094 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3095 3096 /* Wait until processing of previous batch is 3097 * completed 3098 */ 3099 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3100 } 3101 if (j != TEST_REPETITIONS - 1) 3102 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3103 } 3104 3105 return TEST_SUCCESS; 3106 } 3107 3108 static int 3109 throughput_pmd_lcore_dec(void *arg) 3110 { 3111 struct thread_params *tp = arg; 3112 uint16_t enq, deq; 3113 uint64_t total_time = 0, start_time; 3114 const uint16_t queue_id = tp->queue_id; 3115 const uint16_t burst_sz = tp->op_params->burst_sz; 3116 const uint16_t num_ops = tp->op_params->num_to_process; 3117 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3118 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3119 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3120 struct test_buffers *bufs = NULL; 3121 int i, j, ret; 3122 struct rte_bbdev_info info; 3123 uint16_t num_to_enq; 3124 3125 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3126 "BURST_SIZE should be <= %u", MAX_BURST); 3127 3128 rte_bbdev_info_get(tp->dev_id, &info); 3129 3130 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3131 "NUM_OPS cannot exceed %u for this device", 3132 info.drv.queue_size_lim); 3133 3134 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3135 3136 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3137 3138 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3139 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3140 3141 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3142 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3143 bufs->hard_outputs, bufs->soft_outputs, ref_op); 3144 3145 /* Set counter to validate the ordering */ 3146 for (j = 0; j < num_ops; ++j) 3147 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3148 3149 for (i = 0; i < TEST_REPETITIONS; ++i) { 3150 3151 for (j = 0; j < num_ops; ++j) 3152 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 3153 3154 start_time = rte_rdtsc_precise(); 3155 3156 for (enq = 0, deq = 0; enq < num_ops;) { 3157 num_to_enq = burst_sz; 3158 3159 if (unlikely(num_ops - enq < num_to_enq)) 3160 num_to_enq = num_ops - enq; 3161 3162 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3163 queue_id, &ops_enq[enq], num_to_enq); 3164 3165 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3166 queue_id, &ops_deq[deq], enq - deq); 3167 } 3168 3169 /* dequeue the remaining */ 3170 while (deq < enq) { 3171 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3172 queue_id, &ops_deq[deq], enq - deq); 3173 } 3174 3175 total_time += rte_rdtsc_precise() - start_time; 3176 } 3177 3178 tp->iter_count = 0; 3179 /* get the max of iter_count for all dequeued ops */ 3180 for (i = 0; i < num_ops; ++i) { 3181 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3182 tp->iter_count); 3183 } 3184 3185 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3186 ret = validate_dec_op(ops_deq, num_ops, ref_op, 3187 tp->op_params->vector_mask); 3188 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3189 } 3190 3191 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3192 3193 double tb_len_bits = calc_dec_TB_size(ref_op); 3194 3195 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3196 ((double)total_time / (double)rte_get_tsc_hz()); 3197 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3198 1000000.0) / ((double)total_time / 3199 (double)rte_get_tsc_hz()); 3200 3201 return TEST_SUCCESS; 3202 } 3203 3204 static int 3205 bler_pmd_lcore_ldpc_dec(void *arg) 3206 { 3207 struct thread_params *tp = arg; 3208 uint16_t enq, deq; 3209 uint64_t total_time = 0, start_time; 3210 const uint16_t queue_id = tp->queue_id; 3211 const uint16_t burst_sz = tp->op_params->burst_sz; 3212 const uint16_t num_ops = tp->op_params->num_to_process; 3213 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3214 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3215 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3216 struct test_buffers *bufs = NULL; 3217 int i, j, ret; 3218 float parity_bler = 0; 3219 struct rte_bbdev_info info; 3220 uint16_t num_to_enq; 3221 bool extDdr = check_bit(ldpc_cap_flags, 3222 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3223 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3224 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3225 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3226 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3227 3228 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3229 "BURST_SIZE should be <= %u", MAX_BURST); 3230 3231 rte_bbdev_info_get(tp->dev_id, &info); 3232 3233 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3234 "NUM_OPS cannot exceed %u for this device", 3235 info.drv.queue_size_lim); 3236 3237 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3238 3239 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3240 3241 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3242 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3243 3244 /* For BLER tests we need to enable early termination */ 3245 if (!check_bit(ref_op->ldpc_dec.op_flags, 3246 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3247 ref_op->ldpc_dec.op_flags += 3248 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3249 ref_op->ldpc_dec.iter_max = get_iter_max(); 3250 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3251 3252 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3253 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3254 bufs->hard_outputs, bufs->soft_outputs, 3255 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3256 generate_llr_input(num_ops, bufs->inputs, ref_op); 3257 3258 /* Set counter to validate the ordering */ 3259 for (j = 0; j < num_ops; ++j) 3260 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3261 3262 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3263 for (j = 0; j < num_ops; ++j) { 3264 if (!loopback) 3265 mbuf_reset( 3266 ops_enq[j]->ldpc_dec.hard_output.data); 3267 if (hc_out || loopback) 3268 mbuf_reset( 3269 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3270 } 3271 if (extDdr) 3272 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3273 num_ops, true); 3274 start_time = rte_rdtsc_precise(); 3275 3276 for (enq = 0, deq = 0; enq < num_ops;) { 3277 num_to_enq = burst_sz; 3278 3279 if (unlikely(num_ops - enq < num_to_enq)) 3280 num_to_enq = num_ops - enq; 3281 3282 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3283 queue_id, &ops_enq[enq], num_to_enq); 3284 3285 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3286 queue_id, &ops_deq[deq], enq - deq); 3287 } 3288 3289 /* dequeue the remaining */ 3290 while (deq < enq) { 3291 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3292 queue_id, &ops_deq[deq], enq - deq); 3293 } 3294 3295 total_time += rte_rdtsc_precise() - start_time; 3296 } 3297 3298 tp->iter_count = 0; 3299 tp->iter_average = 0; 3300 /* get the max of iter_count for all dequeued ops */ 3301 for (i = 0; i < num_ops; ++i) { 3302 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3303 tp->iter_count); 3304 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3305 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3306 parity_bler += 1.0; 3307 } 3308 3309 parity_bler /= num_ops; /* This one is based on SYND */ 3310 tp->iter_average /= num_ops; 3311 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3312 3313 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3314 && tp->bler == 0 3315 && parity_bler == 0 3316 && !hc_out) { 3317 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3318 tp->op_params->vector_mask); 3319 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3320 } 3321 3322 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3323 3324 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3325 tp->ops_per_sec = ((double)num_ops * 1) / 3326 ((double)total_time / (double)rte_get_tsc_hz()); 3327 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3328 1000000.0) / ((double)total_time / 3329 (double)rte_get_tsc_hz()); 3330 3331 return TEST_SUCCESS; 3332 } 3333 3334 static int 3335 throughput_pmd_lcore_ldpc_dec(void *arg) 3336 { 3337 struct thread_params *tp = arg; 3338 uint16_t enq, deq; 3339 uint64_t total_time = 0, start_time; 3340 const uint16_t queue_id = tp->queue_id; 3341 const uint16_t burst_sz = tp->op_params->burst_sz; 3342 const uint16_t num_ops = tp->op_params->num_to_process; 3343 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3344 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3345 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3346 struct test_buffers *bufs = NULL; 3347 int i, j, ret; 3348 struct rte_bbdev_info info; 3349 uint16_t num_to_enq; 3350 bool extDdr = check_bit(ldpc_cap_flags, 3351 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3352 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3353 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3354 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3355 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3356 3357 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3358 "BURST_SIZE should be <= %u", MAX_BURST); 3359 3360 rte_bbdev_info_get(tp->dev_id, &info); 3361 3362 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3363 "NUM_OPS cannot exceed %u for this device", 3364 info.drv.queue_size_lim); 3365 3366 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3367 3368 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3369 3370 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3371 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3372 3373 /* For throughput tests we need to disable early termination */ 3374 if (check_bit(ref_op->ldpc_dec.op_flags, 3375 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3376 ref_op->ldpc_dec.op_flags -= 3377 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3378 ref_op->ldpc_dec.iter_max = get_iter_max(); 3379 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3380 3381 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3382 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3383 bufs->hard_outputs, bufs->soft_outputs, 3384 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3385 3386 /* Set counter to validate the ordering */ 3387 for (j = 0; j < num_ops; ++j) 3388 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3389 3390 for (i = 0; i < TEST_REPETITIONS; ++i) { 3391 for (j = 0; j < num_ops; ++j) { 3392 if (!loopback) 3393 mbuf_reset( 3394 ops_enq[j]->ldpc_dec.hard_output.data); 3395 if (hc_out || loopback) 3396 mbuf_reset( 3397 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3398 } 3399 if (extDdr) 3400 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3401 num_ops, true); 3402 start_time = rte_rdtsc_precise(); 3403 3404 for (enq = 0, deq = 0; enq < num_ops;) { 3405 num_to_enq = burst_sz; 3406 3407 if (unlikely(num_ops - enq < num_to_enq)) 3408 num_to_enq = num_ops - enq; 3409 3410 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3411 queue_id, &ops_enq[enq], num_to_enq); 3412 3413 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3414 queue_id, &ops_deq[deq], enq - deq); 3415 } 3416 3417 /* dequeue the remaining */ 3418 while (deq < enq) { 3419 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3420 queue_id, &ops_deq[deq], enq - deq); 3421 } 3422 3423 total_time += rte_rdtsc_precise() - start_time; 3424 } 3425 3426 tp->iter_count = 0; 3427 /* get the max of iter_count for all dequeued ops */ 3428 for (i = 0; i < num_ops; ++i) { 3429 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3430 tp->iter_count); 3431 } 3432 if (extDdr) { 3433 /* Read loopback is not thread safe */ 3434 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 3435 } 3436 3437 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3438 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3439 tp->op_params->vector_mask); 3440 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3441 } 3442 3443 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3444 3445 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3446 3447 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3448 ((double)total_time / (double)rte_get_tsc_hz()); 3449 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3450 1000000.0) / ((double)total_time / 3451 (double)rte_get_tsc_hz()); 3452 3453 return TEST_SUCCESS; 3454 } 3455 3456 static int 3457 throughput_pmd_lcore_enc(void *arg) 3458 { 3459 struct thread_params *tp = arg; 3460 uint16_t enq, deq; 3461 uint64_t total_time = 0, start_time; 3462 const uint16_t queue_id = tp->queue_id; 3463 const uint16_t burst_sz = tp->op_params->burst_sz; 3464 const uint16_t num_ops = tp->op_params->num_to_process; 3465 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3466 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3467 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3468 struct test_buffers *bufs = NULL; 3469 int i, j, ret; 3470 struct rte_bbdev_info info; 3471 uint16_t num_to_enq; 3472 3473 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3474 "BURST_SIZE should be <= %u", MAX_BURST); 3475 3476 rte_bbdev_info_get(tp->dev_id, &info); 3477 3478 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3479 "NUM_OPS cannot exceed %u for this device", 3480 info.drv.queue_size_lim); 3481 3482 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3483 3484 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3485 3486 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3487 num_ops); 3488 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3489 num_ops); 3490 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3491 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3492 bufs->hard_outputs, ref_op); 3493 3494 /* Set counter to validate the ordering */ 3495 for (j = 0; j < num_ops; ++j) 3496 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3497 3498 for (i = 0; i < TEST_REPETITIONS; ++i) { 3499 3500 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3501 for (j = 0; j < num_ops; ++j) 3502 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3503 3504 start_time = rte_rdtsc_precise(); 3505 3506 for (enq = 0, deq = 0; enq < num_ops;) { 3507 num_to_enq = burst_sz; 3508 3509 if (unlikely(num_ops - enq < num_to_enq)) 3510 num_to_enq = num_ops - enq; 3511 3512 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3513 queue_id, &ops_enq[enq], num_to_enq); 3514 3515 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3516 queue_id, &ops_deq[deq], enq - deq); 3517 } 3518 3519 /* dequeue the remaining */ 3520 while (deq < enq) { 3521 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3522 queue_id, &ops_deq[deq], enq - deq); 3523 } 3524 3525 total_time += rte_rdtsc_precise() - start_time; 3526 } 3527 3528 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3529 ret = validate_enc_op(ops_deq, num_ops, ref_op); 3530 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3531 } 3532 3533 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3534 3535 double tb_len_bits = calc_enc_TB_size(ref_op); 3536 3537 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3538 ((double)total_time / (double)rte_get_tsc_hz()); 3539 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3540 / 1000000.0) / ((double)total_time / 3541 (double)rte_get_tsc_hz()); 3542 3543 return TEST_SUCCESS; 3544 } 3545 3546 static int 3547 throughput_pmd_lcore_ldpc_enc(void *arg) 3548 { 3549 struct thread_params *tp = arg; 3550 uint16_t enq, deq; 3551 uint64_t total_time = 0, start_time; 3552 const uint16_t queue_id = tp->queue_id; 3553 const uint16_t burst_sz = tp->op_params->burst_sz; 3554 const uint16_t num_ops = tp->op_params->num_to_process; 3555 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3556 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3557 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3558 struct test_buffers *bufs = NULL; 3559 int i, j, ret; 3560 struct rte_bbdev_info info; 3561 uint16_t num_to_enq; 3562 3563 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3564 "BURST_SIZE should be <= %u", MAX_BURST); 3565 3566 rte_bbdev_info_get(tp->dev_id, &info); 3567 3568 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3569 "NUM_OPS cannot exceed %u for this device", 3570 info.drv.queue_size_lim); 3571 3572 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3573 3574 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3575 3576 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3577 num_ops); 3578 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3579 num_ops); 3580 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3581 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3582 bufs->hard_outputs, ref_op); 3583 3584 /* Set counter to validate the ordering */ 3585 for (j = 0; j < num_ops; ++j) 3586 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3587 3588 for (i = 0; i < TEST_REPETITIONS; ++i) { 3589 3590 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3591 for (j = 0; j < num_ops; ++j) 3592 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3593 3594 start_time = rte_rdtsc_precise(); 3595 3596 for (enq = 0, deq = 0; enq < num_ops;) { 3597 num_to_enq = burst_sz; 3598 3599 if (unlikely(num_ops - enq < num_to_enq)) 3600 num_to_enq = num_ops - enq; 3601 3602 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 3603 queue_id, &ops_enq[enq], num_to_enq); 3604 3605 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3606 queue_id, &ops_deq[deq], enq - deq); 3607 } 3608 3609 /* dequeue the remaining */ 3610 while (deq < enq) { 3611 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 3612 queue_id, &ops_deq[deq], enq - deq); 3613 } 3614 3615 total_time += rte_rdtsc_precise() - start_time; 3616 } 3617 3618 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3619 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 3620 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3621 } 3622 3623 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3624 3625 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 3626 3627 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3628 ((double)total_time / (double)rte_get_tsc_hz()); 3629 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3630 / 1000000.0) / ((double)total_time / 3631 (double)rte_get_tsc_hz()); 3632 3633 return TEST_SUCCESS; 3634 } 3635 3636 static void 3637 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 3638 { 3639 unsigned int iter = 0; 3640 double total_mops = 0, total_mbps = 0; 3641 3642 for (iter = 0; iter < used_cores; iter++) { 3643 printf( 3644 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 3645 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 3646 t_params[iter].mbps); 3647 total_mops += t_params[iter].ops_per_sec; 3648 total_mbps += t_params[iter].mbps; 3649 } 3650 printf( 3651 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 3652 used_cores, total_mops, total_mbps); 3653 } 3654 3655 /* Aggregate the performance results over the number of cores used */ 3656 static void 3657 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 3658 { 3659 unsigned int core_idx = 0; 3660 double total_mops = 0, total_mbps = 0; 3661 uint8_t iter_count = 0; 3662 3663 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3664 printf( 3665 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 3666 t_params[core_idx].lcore_id, 3667 t_params[core_idx].ops_per_sec, 3668 t_params[core_idx].mbps, 3669 t_params[core_idx].iter_count); 3670 total_mops += t_params[core_idx].ops_per_sec; 3671 total_mbps += t_params[core_idx].mbps; 3672 iter_count = RTE_MAX(iter_count, 3673 t_params[core_idx].iter_count); 3674 } 3675 printf( 3676 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 3677 used_cores, total_mops, total_mbps, iter_count); 3678 } 3679 3680 /* Aggregate the performance results over the number of cores used */ 3681 static void 3682 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 3683 { 3684 unsigned int core_idx = 0; 3685 double total_mbps = 0, total_bler = 0, total_iter = 0; 3686 double snr = get_snr(); 3687 3688 for (core_idx = 0; core_idx < used_cores; core_idx++) { 3689 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 3690 t_params[core_idx].lcore_id, 3691 t_params[core_idx].bler * 100, 3692 t_params[core_idx].iter_average, 3693 t_params[core_idx].mbps, 3694 get_vector_filename()); 3695 total_mbps += t_params[core_idx].mbps; 3696 total_bler += t_params[core_idx].bler; 3697 total_iter += t_params[core_idx].iter_average; 3698 } 3699 total_bler /= used_cores; 3700 total_iter /= used_cores; 3701 3702 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n", 3703 snr, total_bler * 100, total_iter, get_iter_max(), 3704 total_mbps, get_vector_filename()); 3705 } 3706 3707 /* 3708 * Test function that determines BLER wireless performance 3709 */ 3710 static int 3711 bler_test(struct active_device *ad, 3712 struct test_op_params *op_params) 3713 { 3714 int ret; 3715 unsigned int lcore_id, used_cores = 0; 3716 struct thread_params *t_params; 3717 struct rte_bbdev_info info; 3718 lcore_function_t *bler_function; 3719 uint16_t num_lcores; 3720 const char *op_type_str; 3721 3722 rte_bbdev_info_get(ad->dev_id, &info); 3723 3724 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3725 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3726 test_vector.op_type); 3727 3728 printf("+ ------------------------------------------------------- +\n"); 3729 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3730 info.dev_name, ad->nb_queues, op_params->burst_sz, 3731 op_params->num_to_process, op_params->num_lcores, 3732 op_type_str, 3733 intr_enabled ? "Interrupt mode" : "PMD mode", 3734 (double)rte_get_tsc_hz() / 1000000000.0); 3735 3736 /* Set number of lcores */ 3737 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3738 ? ad->nb_queues 3739 : op_params->num_lcores; 3740 3741 /* Allocate memory for thread parameters structure */ 3742 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3743 RTE_CACHE_LINE_SIZE); 3744 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3745 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3746 RTE_CACHE_LINE_SIZE)); 3747 3748 if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && 3749 !check_bit(test_vector.ldpc_dec.op_flags, 3750 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 3751 && !check_bit(test_vector.ldpc_dec.op_flags, 3752 RTE_BBDEV_LDPC_LLR_COMPRESSION)) 3753 bler_function = bler_pmd_lcore_ldpc_dec; 3754 else 3755 return TEST_SKIPPED; 3756 3757 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 3758 3759 /* Main core is set at first entry */ 3760 t_params[0].dev_id = ad->dev_id; 3761 t_params[0].lcore_id = rte_lcore_id(); 3762 t_params[0].op_params = op_params; 3763 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3764 t_params[0].iter_count = 0; 3765 3766 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3767 if (used_cores >= num_lcores) 3768 break; 3769 3770 t_params[used_cores].dev_id = ad->dev_id; 3771 t_params[used_cores].lcore_id = lcore_id; 3772 t_params[used_cores].op_params = op_params; 3773 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3774 t_params[used_cores].iter_count = 0; 3775 3776 rte_eal_remote_launch(bler_function, 3777 &t_params[used_cores++], lcore_id); 3778 } 3779 3780 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3781 ret = bler_function(&t_params[0]); 3782 3783 /* Main core is always used */ 3784 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3785 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3786 3787 print_dec_bler(t_params, num_lcores); 3788 3789 /* Return if test failed */ 3790 if (ret) { 3791 rte_free(t_params); 3792 return ret; 3793 } 3794 3795 /* Function to print something here*/ 3796 rte_free(t_params); 3797 return ret; 3798 } 3799 3800 /* 3801 * Test function that determines how long an enqueue + dequeue of a burst 3802 * takes on available lcores. 3803 */ 3804 static int 3805 throughput_test(struct active_device *ad, 3806 struct test_op_params *op_params) 3807 { 3808 int ret; 3809 unsigned int lcore_id, used_cores = 0; 3810 struct thread_params *t_params, *tp; 3811 struct rte_bbdev_info info; 3812 lcore_function_t *throughput_function; 3813 uint16_t num_lcores; 3814 const char *op_type_str; 3815 3816 rte_bbdev_info_get(ad->dev_id, &info); 3817 3818 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 3819 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 3820 test_vector.op_type); 3821 3822 printf("+ ------------------------------------------------------- +\n"); 3823 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 3824 info.dev_name, ad->nb_queues, op_params->burst_sz, 3825 op_params->num_to_process, op_params->num_lcores, 3826 op_type_str, 3827 intr_enabled ? "Interrupt mode" : "PMD mode", 3828 (double)rte_get_tsc_hz() / 1000000000.0); 3829 3830 /* Set number of lcores */ 3831 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 3832 ? ad->nb_queues 3833 : op_params->num_lcores; 3834 3835 /* Allocate memory for thread parameters structure */ 3836 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 3837 RTE_CACHE_LINE_SIZE); 3838 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 3839 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 3840 RTE_CACHE_LINE_SIZE)); 3841 3842 if (intr_enabled) { 3843 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3844 throughput_function = throughput_intr_lcore_dec; 3845 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3846 throughput_function = throughput_intr_lcore_ldpc_dec; 3847 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3848 throughput_function = throughput_intr_lcore_enc; 3849 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3850 throughput_function = throughput_intr_lcore_ldpc_enc; 3851 else 3852 throughput_function = throughput_intr_lcore_enc; 3853 3854 /* Dequeue interrupt callback registration */ 3855 ret = rte_bbdev_callback_register(ad->dev_id, 3856 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 3857 t_params); 3858 if (ret < 0) { 3859 rte_free(t_params); 3860 return ret; 3861 } 3862 } else { 3863 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3864 throughput_function = throughput_pmd_lcore_dec; 3865 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3866 throughput_function = throughput_pmd_lcore_ldpc_dec; 3867 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 3868 throughput_function = throughput_pmd_lcore_enc; 3869 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3870 throughput_function = throughput_pmd_lcore_ldpc_enc; 3871 else 3872 throughput_function = throughput_pmd_lcore_enc; 3873 } 3874 3875 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 3876 3877 /* Main core is set at first entry */ 3878 t_params[0].dev_id = ad->dev_id; 3879 t_params[0].lcore_id = rte_lcore_id(); 3880 t_params[0].op_params = op_params; 3881 t_params[0].queue_id = ad->queue_ids[used_cores++]; 3882 t_params[0].iter_count = 0; 3883 3884 RTE_LCORE_FOREACH_WORKER(lcore_id) { 3885 if (used_cores >= num_lcores) 3886 break; 3887 3888 t_params[used_cores].dev_id = ad->dev_id; 3889 t_params[used_cores].lcore_id = lcore_id; 3890 t_params[used_cores].op_params = op_params; 3891 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 3892 t_params[used_cores].iter_count = 0; 3893 3894 rte_eal_remote_launch(throughput_function, 3895 &t_params[used_cores++], lcore_id); 3896 } 3897 3898 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3899 ret = throughput_function(&t_params[0]); 3900 3901 /* Main core is always used */ 3902 for (used_cores = 1; used_cores < num_lcores; used_cores++) 3903 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 3904 3905 /* Return if test failed */ 3906 if (ret) { 3907 rte_free(t_params); 3908 return ret; 3909 } 3910 3911 /* Print throughput if interrupts are disabled and test passed */ 3912 if (!intr_enabled) { 3913 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3914 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3915 print_dec_throughput(t_params, num_lcores); 3916 else 3917 print_enc_throughput(t_params, num_lcores); 3918 rte_free(t_params); 3919 return ret; 3920 } 3921 3922 /* In interrupt TC we need to wait for the interrupt callback to deqeue 3923 * all pending operations. Skip waiting for queues which reported an 3924 * error using processing_status variable. 3925 * Wait for main lcore operations. 3926 */ 3927 tp = &t_params[0]; 3928 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 3929 op_params->num_to_process) && 3930 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 3931 TEST_FAILED)) 3932 rte_pause(); 3933 3934 tp->ops_per_sec /= TEST_REPETITIONS; 3935 tp->mbps /= TEST_REPETITIONS; 3936 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 3937 3938 /* Wait for worker lcores operations */ 3939 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 3940 tp = &t_params[used_cores]; 3941 3942 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 3943 op_params->num_to_process) && 3944 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 3945 TEST_FAILED)) 3946 rte_pause(); 3947 3948 tp->ops_per_sec /= TEST_REPETITIONS; 3949 tp->mbps /= TEST_REPETITIONS; 3950 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 3951 } 3952 3953 /* Print throughput if test passed */ 3954 if (!ret) { 3955 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 3956 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3957 print_dec_throughput(t_params, num_lcores); 3958 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 3959 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3960 print_enc_throughput(t_params, num_lcores); 3961 } 3962 3963 rte_free(t_params); 3964 return ret; 3965 } 3966 3967 static int 3968 latency_test_dec(struct rte_mempool *mempool, 3969 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 3970 int vector_mask, uint16_t dev_id, uint16_t queue_id, 3971 const uint16_t num_to_process, uint16_t burst_sz, 3972 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 3973 { 3974 int ret = TEST_SUCCESS; 3975 uint16_t i, j, dequeued; 3976 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 3977 uint64_t start_time = 0, last_time = 0; 3978 3979 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 3980 uint16_t enq = 0, deq = 0; 3981 bool first_time = true; 3982 last_time = 0; 3983 3984 if (unlikely(num_to_process - dequeued < burst_sz)) 3985 burst_sz = num_to_process - dequeued; 3986 3987 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 3988 TEST_ASSERT_SUCCESS(ret, 3989 "rte_bbdev_dec_op_alloc_bulk() failed"); 3990 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3991 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 3992 bufs->inputs, 3993 bufs->hard_outputs, 3994 bufs->soft_outputs, 3995 ref_op); 3996 3997 /* Set counter to validate the ordering */ 3998 for (j = 0; j < burst_sz; ++j) 3999 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4000 4001 start_time = rte_rdtsc_precise(); 4002 4003 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 4004 burst_sz); 4005 TEST_ASSERT(enq == burst_sz, 4006 "Error enqueueing burst, expected %u, got %u", 4007 burst_sz, enq); 4008 4009 /* Dequeue */ 4010 do { 4011 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4012 &ops_deq[deq], burst_sz - deq); 4013 if (likely(first_time && (deq > 0))) { 4014 last_time = rte_rdtsc_precise() - start_time; 4015 first_time = false; 4016 } 4017 } while (unlikely(burst_sz != deq)); 4018 4019 *max_time = RTE_MAX(*max_time, last_time); 4020 *min_time = RTE_MIN(*min_time, last_time); 4021 *total_time += last_time; 4022 4023 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4024 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 4025 vector_mask); 4026 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4027 } 4028 4029 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4030 dequeued += deq; 4031 } 4032 4033 return i; 4034 } 4035 4036 /* Test case for latency/validation for LDPC Decoder */ 4037 static int 4038 latency_test_ldpc_dec(struct rte_mempool *mempool, 4039 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4040 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4041 const uint16_t num_to_process, uint16_t burst_sz, 4042 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, 4043 bool disable_et) 4044 { 4045 int ret = TEST_SUCCESS; 4046 uint16_t i, j, dequeued; 4047 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4048 uint64_t start_time = 0, last_time = 0; 4049 bool extDdr = ldpc_cap_flags & 4050 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4051 4052 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4053 uint16_t enq = 0, deq = 0; 4054 bool first_time = true; 4055 last_time = 0; 4056 4057 if (unlikely(num_to_process - dequeued < burst_sz)) 4058 burst_sz = num_to_process - dequeued; 4059 4060 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4061 TEST_ASSERT_SUCCESS(ret, 4062 "rte_bbdev_dec_op_alloc_bulk() failed"); 4063 4064 /* For latency tests we need to disable early termination */ 4065 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, 4066 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4067 ref_op->ldpc_dec.op_flags -= 4068 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4069 ref_op->ldpc_dec.iter_max = get_iter_max(); 4070 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4071 4072 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4073 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4074 bufs->inputs, 4075 bufs->hard_outputs, 4076 bufs->soft_outputs, 4077 bufs->harq_inputs, 4078 bufs->harq_outputs, 4079 ref_op); 4080 4081 if (extDdr) 4082 preload_harq_ddr(dev_id, queue_id, ops_enq, 4083 burst_sz, true); 4084 4085 /* Set counter to validate the ordering */ 4086 for (j = 0; j < burst_sz; ++j) 4087 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4088 4089 start_time = rte_rdtsc_precise(); 4090 4091 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4092 &ops_enq[enq], burst_sz); 4093 TEST_ASSERT(enq == burst_sz, 4094 "Error enqueueing burst, expected %u, got %u", 4095 burst_sz, enq); 4096 4097 /* Dequeue */ 4098 do { 4099 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4100 &ops_deq[deq], burst_sz - deq); 4101 if (likely(first_time && (deq > 0))) { 4102 last_time = rte_rdtsc_precise() - start_time; 4103 first_time = false; 4104 } 4105 } while (unlikely(burst_sz != deq)); 4106 4107 *max_time = RTE_MAX(*max_time, last_time); 4108 *min_time = RTE_MIN(*min_time, last_time); 4109 *total_time += last_time; 4110 4111 if (extDdr) 4112 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4113 4114 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4115 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 4116 vector_mask); 4117 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4118 } 4119 4120 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4121 dequeued += deq; 4122 } 4123 return i; 4124 } 4125 4126 static int 4127 latency_test_enc(struct rte_mempool *mempool, 4128 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4129 uint16_t dev_id, uint16_t queue_id, 4130 const uint16_t num_to_process, uint16_t burst_sz, 4131 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4132 { 4133 int ret = TEST_SUCCESS; 4134 uint16_t i, j, dequeued; 4135 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4136 uint64_t start_time = 0, last_time = 0; 4137 4138 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4139 uint16_t enq = 0, deq = 0; 4140 bool first_time = true; 4141 last_time = 0; 4142 4143 if (unlikely(num_to_process - dequeued < burst_sz)) 4144 burst_sz = num_to_process - dequeued; 4145 4146 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4147 TEST_ASSERT_SUCCESS(ret, 4148 "rte_bbdev_enc_op_alloc_bulk() failed"); 4149 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4150 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4151 bufs->inputs, 4152 bufs->hard_outputs, 4153 ref_op); 4154 4155 /* Set counter to validate the ordering */ 4156 for (j = 0; j < burst_sz; ++j) 4157 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4158 4159 start_time = rte_rdtsc_precise(); 4160 4161 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 4162 burst_sz); 4163 TEST_ASSERT(enq == burst_sz, 4164 "Error enqueueing burst, expected %u, got %u", 4165 burst_sz, enq); 4166 4167 /* Dequeue */ 4168 do { 4169 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4170 &ops_deq[deq], burst_sz - deq); 4171 if (likely(first_time && (deq > 0))) { 4172 last_time += rte_rdtsc_precise() - start_time; 4173 first_time = false; 4174 } 4175 } while (unlikely(burst_sz != deq)); 4176 4177 *max_time = RTE_MAX(*max_time, last_time); 4178 *min_time = RTE_MIN(*min_time, last_time); 4179 *total_time += last_time; 4180 4181 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4182 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4183 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4184 } 4185 4186 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4187 dequeued += deq; 4188 } 4189 4190 return i; 4191 } 4192 4193 static int 4194 latency_test_ldpc_enc(struct rte_mempool *mempool, 4195 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4196 uint16_t dev_id, uint16_t queue_id, 4197 const uint16_t num_to_process, uint16_t burst_sz, 4198 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4199 { 4200 int ret = TEST_SUCCESS; 4201 uint16_t i, j, dequeued; 4202 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4203 uint64_t start_time = 0, last_time = 0; 4204 4205 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4206 uint16_t enq = 0, deq = 0; 4207 bool first_time = true; 4208 last_time = 0; 4209 4210 if (unlikely(num_to_process - dequeued < burst_sz)) 4211 burst_sz = num_to_process - dequeued; 4212 4213 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4214 TEST_ASSERT_SUCCESS(ret, 4215 "rte_bbdev_enc_op_alloc_bulk() failed"); 4216 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4217 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4218 bufs->inputs, 4219 bufs->hard_outputs, 4220 ref_op); 4221 4222 /* Set counter to validate the ordering */ 4223 for (j = 0; j < burst_sz; ++j) 4224 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4225 4226 start_time = rte_rdtsc_precise(); 4227 4228 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4229 &ops_enq[enq], burst_sz); 4230 TEST_ASSERT(enq == burst_sz, 4231 "Error enqueueing burst, expected %u, got %u", 4232 burst_sz, enq); 4233 4234 /* Dequeue */ 4235 do { 4236 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4237 &ops_deq[deq], burst_sz - deq); 4238 if (likely(first_time && (deq > 0))) { 4239 last_time += rte_rdtsc_precise() - start_time; 4240 first_time = false; 4241 } 4242 } while (unlikely(burst_sz != deq)); 4243 4244 *max_time = RTE_MAX(*max_time, last_time); 4245 *min_time = RTE_MIN(*min_time, last_time); 4246 *total_time += last_time; 4247 4248 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4249 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4250 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4251 } 4252 4253 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4254 dequeued += deq; 4255 } 4256 4257 return i; 4258 } 4259 4260 /* Common function for running validation and latency test cases */ 4261 static int 4262 validation_latency_test(struct active_device *ad, 4263 struct test_op_params *op_params, bool latency_flag) 4264 { 4265 int iter; 4266 uint16_t burst_sz = op_params->burst_sz; 4267 const uint16_t num_to_process = op_params->num_to_process; 4268 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4269 const uint16_t queue_id = ad->queue_ids[0]; 4270 struct test_buffers *bufs = NULL; 4271 struct rte_bbdev_info info; 4272 uint64_t total_time, min_time, max_time; 4273 const char *op_type_str; 4274 4275 total_time = max_time = 0; 4276 min_time = UINT64_MAX; 4277 4278 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4279 "BURST_SIZE should be <= %u", MAX_BURST); 4280 4281 rte_bbdev_info_get(ad->dev_id, &info); 4282 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4283 4284 op_type_str = rte_bbdev_op_type_str(op_type); 4285 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4286 4287 printf("+ ------------------------------------------------------- +\n"); 4288 if (latency_flag) 4289 printf("== test: latency\ndev:"); 4290 else 4291 printf("== test: validation\ndev:"); 4292 printf("%s, burst size: %u, num ops: %u, op type: %s\n", 4293 info.dev_name, burst_sz, num_to_process, op_type_str); 4294 4295 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4296 iter = latency_test_dec(op_params->mp, bufs, 4297 op_params->ref_dec_op, op_params->vector_mask, 4298 ad->dev_id, queue_id, num_to_process, 4299 burst_sz, &total_time, &min_time, &max_time); 4300 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4301 iter = latency_test_ldpc_enc(op_params->mp, bufs, 4302 op_params->ref_enc_op, ad->dev_id, queue_id, 4303 num_to_process, burst_sz, &total_time, 4304 &min_time, &max_time); 4305 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4306 iter = latency_test_ldpc_dec(op_params->mp, bufs, 4307 op_params->ref_dec_op, op_params->vector_mask, 4308 ad->dev_id, queue_id, num_to_process, 4309 burst_sz, &total_time, &min_time, &max_time, 4310 latency_flag); 4311 else /* RTE_BBDEV_OP_TURBO_ENC */ 4312 iter = latency_test_enc(op_params->mp, bufs, 4313 op_params->ref_enc_op, 4314 ad->dev_id, queue_id, 4315 num_to_process, burst_sz, &total_time, 4316 &min_time, &max_time); 4317 4318 if (iter <= 0) 4319 return TEST_FAILED; 4320 4321 printf("Operation latency:\n" 4322 "\tavg: %lg cycles, %lg us\n" 4323 "\tmin: %lg cycles, %lg us\n" 4324 "\tmax: %lg cycles, %lg us\n", 4325 (double)total_time / (double)iter, 4326 (double)(total_time * 1000000) / (double)iter / 4327 (double)rte_get_tsc_hz(), (double)min_time, 4328 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 4329 (double)max_time, (double)(max_time * 1000000) / 4330 (double)rte_get_tsc_hz()); 4331 4332 return TEST_SUCCESS; 4333 } 4334 4335 static int 4336 latency_test(struct active_device *ad, struct test_op_params *op_params) 4337 { 4338 return validation_latency_test(ad, op_params, true); 4339 } 4340 4341 static int 4342 validation_test(struct active_device *ad, struct test_op_params *op_params) 4343 { 4344 return validation_latency_test(ad, op_params, false); 4345 } 4346 4347 #ifdef RTE_BBDEV_OFFLOAD_COST 4348 static int 4349 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 4350 struct rte_bbdev_stats *stats) 4351 { 4352 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 4353 struct rte_bbdev_stats *q_stats; 4354 4355 if (queue_id >= dev->data->num_queues) 4356 return -1; 4357 4358 q_stats = &dev->data->queues[queue_id].queue_stats; 4359 4360 stats->enqueued_count = q_stats->enqueued_count; 4361 stats->dequeued_count = q_stats->dequeued_count; 4362 stats->enqueue_err_count = q_stats->enqueue_err_count; 4363 stats->dequeue_err_count = q_stats->dequeue_err_count; 4364 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 4365 4366 return 0; 4367 } 4368 4369 static int 4370 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 4371 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4372 uint16_t queue_id, const uint16_t num_to_process, 4373 uint16_t burst_sz, struct test_time_stats *time_st) 4374 { 4375 int i, dequeued, ret; 4376 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4377 uint64_t enq_start_time, deq_start_time; 4378 uint64_t enq_sw_last_time, deq_last_time; 4379 struct rte_bbdev_stats stats; 4380 4381 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4382 uint16_t enq = 0, deq = 0; 4383 4384 if (unlikely(num_to_process - dequeued < burst_sz)) 4385 burst_sz = num_to_process - dequeued; 4386 4387 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4388 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4389 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4390 bufs->inputs, 4391 bufs->hard_outputs, 4392 bufs->soft_outputs, 4393 ref_op); 4394 4395 /* Start time meas for enqueue function offload latency */ 4396 enq_start_time = rte_rdtsc_precise(); 4397 do { 4398 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 4399 &ops_enq[enq], burst_sz - enq); 4400 } while (unlikely(burst_sz != enq)); 4401 4402 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4403 TEST_ASSERT_SUCCESS(ret, 4404 "Failed to get stats for queue (%u) of device (%u)", 4405 queue_id, dev_id); 4406 4407 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 4408 stats.acc_offload_cycles; 4409 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4410 enq_sw_last_time); 4411 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4412 enq_sw_last_time); 4413 time_st->enq_sw_total_time += enq_sw_last_time; 4414 4415 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4416 stats.acc_offload_cycles); 4417 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4418 stats.acc_offload_cycles); 4419 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4420 4421 /* give time for device to process ops */ 4422 rte_delay_us(WAIT_OFFLOAD_US); 4423 4424 /* Start time meas for dequeue function offload latency */ 4425 deq_start_time = rte_rdtsc_precise(); 4426 /* Dequeue one operation */ 4427 do { 4428 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4429 &ops_deq[deq], enq); 4430 } while (unlikely(deq == 0)); 4431 4432 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4433 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4434 deq_last_time); 4435 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4436 deq_last_time); 4437 time_st->deq_total_time += deq_last_time; 4438 4439 /* Dequeue remaining operations if needed*/ 4440 while (burst_sz != deq) 4441 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4442 &ops_deq[deq], burst_sz - deq); 4443 4444 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4445 dequeued += deq; 4446 } 4447 4448 return i; 4449 } 4450 4451 static int 4452 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 4453 struct test_buffers *bufs, 4454 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 4455 uint16_t queue_id, const uint16_t num_to_process, 4456 uint16_t burst_sz, struct test_time_stats *time_st) 4457 { 4458 int i, dequeued, ret; 4459 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4460 uint64_t enq_start_time, deq_start_time; 4461 uint64_t enq_sw_last_time, deq_last_time; 4462 struct rte_bbdev_stats stats; 4463 bool extDdr = ldpc_cap_flags & 4464 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4465 4466 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4467 uint16_t enq = 0, deq = 0; 4468 4469 if (unlikely(num_to_process - dequeued < burst_sz)) 4470 burst_sz = num_to_process - dequeued; 4471 4472 rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4473 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4474 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4475 bufs->inputs, 4476 bufs->hard_outputs, 4477 bufs->soft_outputs, 4478 bufs->harq_inputs, 4479 bufs->harq_outputs, 4480 ref_op); 4481 4482 if (extDdr) 4483 preload_harq_ddr(dev_id, queue_id, ops_enq, 4484 burst_sz, true); 4485 4486 /* Start time meas for enqueue function offload latency */ 4487 enq_start_time = rte_rdtsc_precise(); 4488 do { 4489 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4490 &ops_enq[enq], burst_sz - enq); 4491 } while (unlikely(burst_sz != enq)); 4492 4493 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4494 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4495 TEST_ASSERT_SUCCESS(ret, 4496 "Failed to get stats for queue (%u) of device (%u)", 4497 queue_id, dev_id); 4498 4499 enq_sw_last_time -= stats.acc_offload_cycles; 4500 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4501 enq_sw_last_time); 4502 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4503 enq_sw_last_time); 4504 time_st->enq_sw_total_time += enq_sw_last_time; 4505 4506 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4507 stats.acc_offload_cycles); 4508 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4509 stats.acc_offload_cycles); 4510 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4511 4512 /* give time for device to process ops */ 4513 rte_delay_us(WAIT_OFFLOAD_US); 4514 4515 /* Start time meas for dequeue function offload latency */ 4516 deq_start_time = rte_rdtsc_precise(); 4517 /* Dequeue one operation */ 4518 do { 4519 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4520 &ops_deq[deq], enq); 4521 } while (unlikely(deq == 0)); 4522 4523 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4524 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4525 deq_last_time); 4526 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4527 deq_last_time); 4528 time_st->deq_total_time += deq_last_time; 4529 4530 /* Dequeue remaining operations if needed*/ 4531 while (burst_sz != deq) 4532 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4533 &ops_deq[deq], burst_sz - deq); 4534 4535 if (extDdr) { 4536 /* Read loopback is not thread safe */ 4537 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4538 } 4539 4540 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4541 dequeued += deq; 4542 } 4543 4544 return i; 4545 } 4546 4547 static int 4548 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 4549 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4550 uint16_t queue_id, const uint16_t num_to_process, 4551 uint16_t burst_sz, struct test_time_stats *time_st) 4552 { 4553 int i, dequeued, ret; 4554 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4555 uint64_t enq_start_time, deq_start_time; 4556 uint64_t enq_sw_last_time, deq_last_time; 4557 struct rte_bbdev_stats stats; 4558 4559 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4560 uint16_t enq = 0, deq = 0; 4561 4562 if (unlikely(num_to_process - dequeued < burst_sz)) 4563 burst_sz = num_to_process - dequeued; 4564 4565 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4566 TEST_ASSERT_SUCCESS(ret, 4567 "rte_bbdev_enc_op_alloc_bulk() failed"); 4568 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4569 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4570 bufs->inputs, 4571 bufs->hard_outputs, 4572 ref_op); 4573 4574 /* Start time meas for enqueue function offload latency */ 4575 enq_start_time = rte_rdtsc_precise(); 4576 do { 4577 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 4578 &ops_enq[enq], burst_sz - enq); 4579 } while (unlikely(burst_sz != enq)); 4580 4581 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4582 4583 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4584 TEST_ASSERT_SUCCESS(ret, 4585 "Failed to get stats for queue (%u) of device (%u)", 4586 queue_id, dev_id); 4587 enq_sw_last_time -= stats.acc_offload_cycles; 4588 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4589 enq_sw_last_time); 4590 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4591 enq_sw_last_time); 4592 time_st->enq_sw_total_time += enq_sw_last_time; 4593 4594 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4595 stats.acc_offload_cycles); 4596 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4597 stats.acc_offload_cycles); 4598 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4599 4600 /* give time for device to process ops */ 4601 rte_delay_us(WAIT_OFFLOAD_US); 4602 4603 /* Start time meas for dequeue function offload latency */ 4604 deq_start_time = rte_rdtsc_precise(); 4605 /* Dequeue one operation */ 4606 do { 4607 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4608 &ops_deq[deq], enq); 4609 } while (unlikely(deq == 0)); 4610 4611 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4612 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4613 deq_last_time); 4614 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4615 deq_last_time); 4616 time_st->deq_total_time += deq_last_time; 4617 4618 while (burst_sz != deq) 4619 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4620 &ops_deq[deq], burst_sz - deq); 4621 4622 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4623 dequeued += deq; 4624 } 4625 4626 return i; 4627 } 4628 4629 static int 4630 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 4631 struct test_buffers *bufs, 4632 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 4633 uint16_t queue_id, const uint16_t num_to_process, 4634 uint16_t burst_sz, struct test_time_stats *time_st) 4635 { 4636 int i, dequeued, ret; 4637 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4638 uint64_t enq_start_time, deq_start_time; 4639 uint64_t enq_sw_last_time, deq_last_time; 4640 struct rte_bbdev_stats stats; 4641 4642 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4643 uint16_t enq = 0, deq = 0; 4644 4645 if (unlikely(num_to_process - dequeued < burst_sz)) 4646 burst_sz = num_to_process - dequeued; 4647 4648 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4649 TEST_ASSERT_SUCCESS(ret, 4650 "rte_bbdev_enc_op_alloc_bulk() failed"); 4651 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4652 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4653 bufs->inputs, 4654 bufs->hard_outputs, 4655 ref_op); 4656 4657 /* Start time meas for enqueue function offload latency */ 4658 enq_start_time = rte_rdtsc_precise(); 4659 do { 4660 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4661 &ops_enq[enq], burst_sz - enq); 4662 } while (unlikely(burst_sz != enq)); 4663 4664 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 4665 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4666 TEST_ASSERT_SUCCESS(ret, 4667 "Failed to get stats for queue (%u) of device (%u)", 4668 queue_id, dev_id); 4669 4670 enq_sw_last_time -= stats.acc_offload_cycles; 4671 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4672 enq_sw_last_time); 4673 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4674 enq_sw_last_time); 4675 time_st->enq_sw_total_time += enq_sw_last_time; 4676 4677 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4678 stats.acc_offload_cycles); 4679 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4680 stats.acc_offload_cycles); 4681 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4682 4683 /* give time for device to process ops */ 4684 rte_delay_us(WAIT_OFFLOAD_US); 4685 4686 /* Start time meas for dequeue function offload latency */ 4687 deq_start_time = rte_rdtsc_precise(); 4688 /* Dequeue one operation */ 4689 do { 4690 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4691 &ops_deq[deq], enq); 4692 } while (unlikely(deq == 0)); 4693 4694 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4695 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 4696 deq_last_time); 4697 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 4698 deq_last_time); 4699 time_st->deq_total_time += deq_last_time; 4700 4701 while (burst_sz != deq) 4702 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4703 &ops_deq[deq], burst_sz - deq); 4704 4705 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4706 dequeued += deq; 4707 } 4708 4709 return i; 4710 } 4711 #endif 4712 4713 static int 4714 offload_cost_test(struct active_device *ad, 4715 struct test_op_params *op_params) 4716 { 4717 #ifndef RTE_BBDEV_OFFLOAD_COST 4718 RTE_SET_USED(ad); 4719 RTE_SET_USED(op_params); 4720 printf("Offload latency test is disabled.\n"); 4721 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4722 return TEST_SKIPPED; 4723 #else 4724 int iter; 4725 uint16_t burst_sz = op_params->burst_sz; 4726 const uint16_t num_to_process = op_params->num_to_process; 4727 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4728 const uint16_t queue_id = ad->queue_ids[0]; 4729 struct test_buffers *bufs = NULL; 4730 struct rte_bbdev_info info; 4731 const char *op_type_str; 4732 struct test_time_stats time_st; 4733 4734 memset(&time_st, 0, sizeof(struct test_time_stats)); 4735 time_st.enq_sw_min_time = UINT64_MAX; 4736 time_st.enq_acc_min_time = UINT64_MAX; 4737 time_st.deq_min_time = UINT64_MAX; 4738 4739 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4740 "BURST_SIZE should be <= %u", MAX_BURST); 4741 4742 rte_bbdev_info_get(ad->dev_id, &info); 4743 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4744 4745 op_type_str = rte_bbdev_op_type_str(op_type); 4746 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4747 4748 printf("+ ------------------------------------------------------- +\n"); 4749 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4750 info.dev_name, burst_sz, num_to_process, op_type_str); 4751 4752 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4753 iter = offload_latency_test_dec(op_params->mp, bufs, 4754 op_params->ref_dec_op, ad->dev_id, queue_id, 4755 num_to_process, burst_sz, &time_st); 4756 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 4757 iter = offload_latency_test_enc(op_params->mp, bufs, 4758 op_params->ref_enc_op, ad->dev_id, queue_id, 4759 num_to_process, burst_sz, &time_st); 4760 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4761 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 4762 op_params->ref_enc_op, ad->dev_id, queue_id, 4763 num_to_process, burst_sz, &time_st); 4764 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4765 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 4766 op_params->ref_dec_op, ad->dev_id, queue_id, 4767 num_to_process, burst_sz, &time_st); 4768 else 4769 iter = offload_latency_test_enc(op_params->mp, bufs, 4770 op_params->ref_enc_op, ad->dev_id, queue_id, 4771 num_to_process, burst_sz, &time_st); 4772 4773 if (iter <= 0) 4774 return TEST_FAILED; 4775 4776 printf("Enqueue driver offload cost latency:\n" 4777 "\tavg: %lg cycles, %lg us\n" 4778 "\tmin: %lg cycles, %lg us\n" 4779 "\tmax: %lg cycles, %lg us\n" 4780 "Enqueue accelerator offload cost latency:\n" 4781 "\tavg: %lg cycles, %lg us\n" 4782 "\tmin: %lg cycles, %lg us\n" 4783 "\tmax: %lg cycles, %lg us\n", 4784 (double)time_st.enq_sw_total_time / (double)iter, 4785 (double)(time_st.enq_sw_total_time * 1000000) / 4786 (double)iter / (double)rte_get_tsc_hz(), 4787 (double)time_st.enq_sw_min_time, 4788 (double)(time_st.enq_sw_min_time * 1000000) / 4789 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 4790 (double)(time_st.enq_sw_max_time * 1000000) / 4791 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 4792 (double)iter, 4793 (double)(time_st.enq_acc_total_time * 1000000) / 4794 (double)iter / (double)rte_get_tsc_hz(), 4795 (double)time_st.enq_acc_min_time, 4796 (double)(time_st.enq_acc_min_time * 1000000) / 4797 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 4798 (double)(time_st.enq_acc_max_time * 1000000) / 4799 rte_get_tsc_hz()); 4800 4801 printf("Dequeue offload cost latency - one op:\n" 4802 "\tavg: %lg cycles, %lg us\n" 4803 "\tmin: %lg cycles, %lg us\n" 4804 "\tmax: %lg cycles, %lg us\n", 4805 (double)time_st.deq_total_time / (double)iter, 4806 (double)(time_st.deq_total_time * 1000000) / 4807 (double)iter / (double)rte_get_tsc_hz(), 4808 (double)time_st.deq_min_time, 4809 (double)(time_st.deq_min_time * 1000000) / 4810 rte_get_tsc_hz(), (double)time_st.deq_max_time, 4811 (double)(time_st.deq_max_time * 1000000) / 4812 rte_get_tsc_hz()); 4813 4814 struct rte_bbdev_stats stats = {0}; 4815 get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); 4816 if (op_type != RTE_BBDEV_OP_LDPC_DEC) { 4817 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, 4818 "Mismatch in enqueue count %10"PRIu64" %d", 4819 stats.enqueued_count, num_to_process); 4820 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, 4821 "Mismatch in dequeue count %10"PRIu64" %d", 4822 stats.dequeued_count, num_to_process); 4823 } 4824 TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, 4825 "Enqueue count Error %10"PRIu64"", 4826 stats.enqueue_err_count); 4827 TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, 4828 "Dequeue count Error (%10"PRIu64"", 4829 stats.dequeue_err_count); 4830 4831 return TEST_SUCCESS; 4832 #endif 4833 } 4834 4835 #ifdef RTE_BBDEV_OFFLOAD_COST 4836 static int 4837 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 4838 const uint16_t num_to_process, uint16_t burst_sz, 4839 uint64_t *deq_total_time, uint64_t *deq_min_time, 4840 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4841 { 4842 int i, deq_total; 4843 struct rte_bbdev_dec_op *ops[MAX_BURST]; 4844 uint64_t deq_start_time, deq_last_time; 4845 4846 /* Test deq offload latency from an empty queue */ 4847 4848 for (i = 0, deq_total = 0; deq_total < num_to_process; 4849 ++i, deq_total += burst_sz) { 4850 deq_start_time = rte_rdtsc_precise(); 4851 4852 if (unlikely(num_to_process - deq_total < burst_sz)) 4853 burst_sz = num_to_process - deq_total; 4854 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4855 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 4856 burst_sz); 4857 else 4858 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 4859 burst_sz); 4860 4861 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4862 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4863 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4864 *deq_total_time += deq_last_time; 4865 } 4866 4867 return i; 4868 } 4869 4870 static int 4871 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 4872 const uint16_t num_to_process, uint16_t burst_sz, 4873 uint64_t *deq_total_time, uint64_t *deq_min_time, 4874 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 4875 { 4876 int i, deq_total; 4877 struct rte_bbdev_enc_op *ops[MAX_BURST]; 4878 uint64_t deq_start_time, deq_last_time; 4879 4880 /* Test deq offload latency from an empty queue */ 4881 for (i = 0, deq_total = 0; deq_total < num_to_process; 4882 ++i, deq_total += burst_sz) { 4883 deq_start_time = rte_rdtsc_precise(); 4884 4885 if (unlikely(num_to_process - deq_total < burst_sz)) 4886 burst_sz = num_to_process - deq_total; 4887 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4888 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 4889 burst_sz); 4890 else 4891 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 4892 burst_sz); 4893 4894 deq_last_time = rte_rdtsc_precise() - deq_start_time; 4895 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 4896 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 4897 *deq_total_time += deq_last_time; 4898 } 4899 4900 return i; 4901 } 4902 4903 #endif 4904 4905 static int 4906 offload_latency_empty_q_test(struct active_device *ad, 4907 struct test_op_params *op_params) 4908 { 4909 #ifndef RTE_BBDEV_OFFLOAD_COST 4910 RTE_SET_USED(ad); 4911 RTE_SET_USED(op_params); 4912 printf("Offload latency empty dequeue test is disabled.\n"); 4913 printf("Set RTE_BBDEV_OFFLOAD_COST to 'y' to turn the test on.\n"); 4914 return TEST_SKIPPED; 4915 #else 4916 int iter; 4917 uint64_t deq_total_time, deq_min_time, deq_max_time; 4918 uint16_t burst_sz = op_params->burst_sz; 4919 const uint16_t num_to_process = op_params->num_to_process; 4920 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4921 const uint16_t queue_id = ad->queue_ids[0]; 4922 struct rte_bbdev_info info; 4923 const char *op_type_str; 4924 4925 deq_total_time = deq_max_time = 0; 4926 deq_min_time = UINT64_MAX; 4927 4928 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4929 "BURST_SIZE should be <= %u", MAX_BURST); 4930 4931 rte_bbdev_info_get(ad->dev_id, &info); 4932 4933 op_type_str = rte_bbdev_op_type_str(op_type); 4934 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4935 4936 printf("+ ------------------------------------------------------- +\n"); 4937 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 4938 info.dev_name, burst_sz, num_to_process, op_type_str); 4939 4940 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 4941 op_type == RTE_BBDEV_OP_LDPC_DEC) 4942 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 4943 num_to_process, burst_sz, &deq_total_time, 4944 &deq_min_time, &deq_max_time, op_type); 4945 else 4946 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 4947 num_to_process, burst_sz, &deq_total_time, 4948 &deq_min_time, &deq_max_time, op_type); 4949 4950 if (iter <= 0) 4951 return TEST_FAILED; 4952 4953 printf("Empty dequeue offload:\n" 4954 "\tavg: %lg cycles, %lg us\n" 4955 "\tmin: %lg cycles, %lg us\n" 4956 "\tmax: %lg cycles, %lg us\n", 4957 (double)deq_total_time / (double)iter, 4958 (double)(deq_total_time * 1000000) / (double)iter / 4959 (double)rte_get_tsc_hz(), (double)deq_min_time, 4960 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 4961 (double)deq_max_time, (double)(deq_max_time * 1000000) / 4962 rte_get_tsc_hz()); 4963 4964 return TEST_SUCCESS; 4965 #endif 4966 } 4967 4968 static int 4969 bler_tc(void) 4970 { 4971 return run_test_case(bler_test); 4972 } 4973 4974 static int 4975 throughput_tc(void) 4976 { 4977 return run_test_case(throughput_test); 4978 } 4979 4980 static int 4981 offload_cost_tc(void) 4982 { 4983 return run_test_case(offload_cost_test); 4984 } 4985 4986 static int 4987 offload_latency_empty_q_tc(void) 4988 { 4989 return run_test_case(offload_latency_empty_q_test); 4990 } 4991 4992 static int 4993 latency_tc(void) 4994 { 4995 return run_test_case(latency_test); 4996 } 4997 4998 static int 4999 validation_tc(void) 5000 { 5001 return run_test_case(validation_test); 5002 } 5003 5004 static int 5005 interrupt_tc(void) 5006 { 5007 return run_test_case(throughput_test); 5008 } 5009 5010 static struct unit_test_suite bbdev_bler_testsuite = { 5011 .suite_name = "BBdev BLER Tests", 5012 .setup = testsuite_setup, 5013 .teardown = testsuite_teardown, 5014 .unit_test_cases = { 5015 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 5016 TEST_CASES_END() /**< NULL terminate unit test array */ 5017 } 5018 }; 5019 5020 static struct unit_test_suite bbdev_throughput_testsuite = { 5021 .suite_name = "BBdev Throughput Tests", 5022 .setup = testsuite_setup, 5023 .teardown = testsuite_teardown, 5024 .unit_test_cases = { 5025 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 5026 TEST_CASES_END() /**< NULL terminate unit test array */ 5027 } 5028 }; 5029 5030 static struct unit_test_suite bbdev_validation_testsuite = { 5031 .suite_name = "BBdev Validation Tests", 5032 .setup = testsuite_setup, 5033 .teardown = testsuite_teardown, 5034 .unit_test_cases = { 5035 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), 5036 TEST_CASES_END() /**< NULL terminate unit test array */ 5037 } 5038 }; 5039 5040 static struct unit_test_suite bbdev_latency_testsuite = { 5041 .suite_name = "BBdev Latency Tests", 5042 .setup = testsuite_setup, 5043 .teardown = testsuite_teardown, 5044 .unit_test_cases = { 5045 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 5046 TEST_CASES_END() /**< NULL terminate unit test array */ 5047 } 5048 }; 5049 5050 static struct unit_test_suite bbdev_offload_cost_testsuite = { 5051 .suite_name = "BBdev Offload Cost Tests", 5052 .setup = testsuite_setup, 5053 .teardown = testsuite_teardown, 5054 .unit_test_cases = { 5055 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 5056 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 5057 TEST_CASES_END() /**< NULL terminate unit test array */ 5058 } 5059 }; 5060 5061 static struct unit_test_suite bbdev_interrupt_testsuite = { 5062 .suite_name = "BBdev Interrupt Tests", 5063 .setup = interrupt_testsuite_setup, 5064 .teardown = testsuite_teardown, 5065 .unit_test_cases = { 5066 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 5067 TEST_CASES_END() /**< NULL terminate unit test array */ 5068 } 5069 }; 5070 5071 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 5072 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 5073 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 5074 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 5075 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 5076 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 5077