1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <inttypes.h> 8 #include <math.h> 9 10 #include <rte_eal.h> 11 #include <rte_common.h> 12 #include <rte_dev.h> 13 #include <rte_launch.h> 14 #include <rte_bbdev.h> 15 #include <rte_cycles.h> 16 #include <rte_lcore.h> 17 #include <rte_malloc.h> 18 #include <rte_random.h> 19 #include <rte_hexdump.h> 20 #include <rte_interrupts.h> 21 22 #include "main.h" 23 #include "test_bbdev_vector.h" 24 25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 26 27 #define MAX_QUEUES RTE_MAX_LCORE 28 #define TEST_REPETITIONS 100 29 #define TIME_OUT_POLL 1e8 30 #define WAIT_OFFLOAD_US 1000 31 32 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 33 #include <fpga_lte_fec.h> 34 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 35 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 36 #define VF_UL_4G_QUEUE_VALUE 4 37 #define VF_DL_4G_QUEUE_VALUE 4 38 #define UL_4G_BANDWIDTH 3 39 #define DL_4G_BANDWIDTH 3 40 #define UL_4G_LOAD_BALANCE 128 41 #define DL_4G_LOAD_BALANCE 128 42 #define FLR_4G_TIMEOUT 610 43 #endif 44 45 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 46 #include <rte_pmd_fpga_5gnr_fec.h> 47 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf") 48 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf") 49 #define VF_UL_5G_QUEUE_VALUE 4 50 #define VF_DL_5G_QUEUE_VALUE 4 51 #define UL_5G_BANDWIDTH 3 52 #define DL_5G_BANDWIDTH 3 53 #define UL_5G_LOAD_BALANCE 128 54 #define DL_5G_LOAD_BALANCE 128 55 #endif 56 57 #ifdef RTE_BASEBAND_ACC 58 #include <rte_acc_cfg.h> 59 #define ACC100PF_DRIVER_NAME ("intel_acc100_pf") 60 #define ACC100VF_DRIVER_NAME ("intel_acc100_vf") 61 #define ACC100_QMGR_NUM_AQS 16 62 #define ACC100_QMGR_NUM_QGS 2 63 #define ACC100_QMGR_AQ_DEPTH 5 64 #define ACC100_QMGR_INVALID_IDX -1 65 #define ACC100_QMGR_RR 1 66 #define ACC100_QOS_GBR 0 67 #define ACC200PF_DRIVER_NAME ("intel_acc200_pf") 68 #define ACC200VF_DRIVER_NAME ("intel_acc200_vf") 69 #define ACC200_QMGR_NUM_AQS 16 70 #define ACC200_QMGR_NUM_QGS 2 71 #define ACC200_QMGR_AQ_DEPTH 5 72 #define ACC200_QMGR_INVALID_IDX -1 73 #define ACC200_QMGR_RR 1 74 #define ACC200_QOS_GBR 0 75 #endif 76 77 #define OPS_CACHE_SIZE 256U 78 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 79 80 #define SYNC_WAIT 0 81 #define SYNC_START 1 82 83 #define INVALID_QUEUE_ID -1 84 /* Increment for next code block in external HARQ memory */ 85 #define HARQ_INCR 32768 86 /* Headroom for filler LLRs insertion in HARQ buffer */ 87 #define FILLER_HEADROOM 2048 88 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 89 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 90 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 91 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 92 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 93 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 94 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 95 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 96 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 97 98 #define HARQ_MEM_TOLERANCE 256 99 static struct test_bbdev_vector test_vector; 100 101 /* Switch between PMD and Interrupt for throughput TC */ 102 static bool intr_enabled; 103 104 /* LLR arithmetic representation for numerical conversion */ 105 static int ldpc_llr_decimals; 106 static int ldpc_llr_size; 107 /* Keep track of the LDPC decoder device capability flag */ 108 static uint32_t ldpc_cap_flags; 109 110 /* Represents tested active devices */ 111 static struct active_device { 112 const char *driver_name; 113 uint8_t dev_id; 114 uint16_t supported_ops; 115 uint16_t queue_ids[MAX_QUEUES]; 116 uint16_t nb_queues; 117 struct rte_mempool *ops_mempool; 118 struct rte_mempool *in_mbuf_pool; 119 struct rte_mempool *hard_out_mbuf_pool; 120 struct rte_mempool *soft_out_mbuf_pool; 121 struct rte_mempool *harq_in_mbuf_pool; 122 struct rte_mempool *harq_out_mbuf_pool; 123 } active_devs[RTE_BBDEV_MAX_DEVS]; 124 125 static uint8_t nb_active_devs; 126 127 /* Data buffers used by BBDEV ops */ 128 struct test_buffers { 129 struct rte_bbdev_op_data *inputs; 130 struct rte_bbdev_op_data *hard_outputs; 131 struct rte_bbdev_op_data *soft_outputs; 132 struct rte_bbdev_op_data *harq_inputs; 133 struct rte_bbdev_op_data *harq_outputs; 134 }; 135 136 /* Operation parameters specific for given test case */ 137 struct test_op_params { 138 struct rte_mempool *mp; 139 struct rte_bbdev_dec_op *ref_dec_op; 140 struct rte_bbdev_enc_op *ref_enc_op; 141 struct rte_bbdev_fft_op *ref_fft_op; 142 uint16_t burst_sz; 143 uint16_t num_to_process; 144 uint16_t num_lcores; 145 int vector_mask; 146 uint16_t sync; 147 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 148 }; 149 150 /* Contains per lcore params */ 151 struct thread_params { 152 uint8_t dev_id; 153 uint16_t queue_id; 154 uint32_t lcore_id; 155 uint64_t start_time; 156 double ops_per_sec; 157 double mbps; 158 uint8_t iter_count; 159 double iter_average; 160 double bler; 161 uint16_t nb_dequeued; 162 int16_t processing_status; 163 uint16_t burst_sz; 164 struct test_op_params *op_params; 165 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 166 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 167 struct rte_bbdev_fft_op *fft_ops[MAX_BURST]; 168 }; 169 170 /* Stores time statistics */ 171 struct test_time_stats { 172 /* Stores software enqueue total working time */ 173 uint64_t enq_sw_total_time; 174 /* Stores minimum value of software enqueue working time */ 175 uint64_t enq_sw_min_time; 176 /* Stores maximum value of software enqueue working time */ 177 uint64_t enq_sw_max_time; 178 /* Stores turbo enqueue total working time */ 179 uint64_t enq_acc_total_time; 180 /* Stores minimum value of accelerator enqueue working time */ 181 uint64_t enq_acc_min_time; 182 /* Stores maximum value of accelerator enqueue working time */ 183 uint64_t enq_acc_max_time; 184 /* Stores dequeue total working time */ 185 uint64_t deq_total_time; 186 /* Stores minimum value of dequeue working time */ 187 uint64_t deq_min_time; 188 /* Stores maximum value of dequeue working time */ 189 uint64_t deq_max_time; 190 }; 191 192 typedef int (test_case_function)(struct active_device *ad, 193 struct test_op_params *op_params); 194 195 /* Get device status before timeout exit */ 196 static inline void 197 timeout_exit(uint8_t dev_id) 198 { 199 struct rte_bbdev_info info; 200 rte_bbdev_info_get(dev_id, &info); 201 printf("Device Status %s\n", rte_bbdev_device_status_str(info.drv.device_status)); 202 } 203 204 static inline void 205 mbuf_reset(struct rte_mbuf *m) 206 { 207 m->pkt_len = 0; 208 209 do { 210 m->data_len = 0; 211 m = m->next; 212 } while (m != NULL); 213 } 214 215 /* Read flag value 0/1 from bitmap */ 216 static inline bool 217 check_bit(uint32_t bitmap, uint32_t bitmask) 218 { 219 return bitmap & bitmask; 220 } 221 222 static inline void 223 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 224 { 225 ad->supported_ops |= (1 << op_type); 226 } 227 228 static inline bool 229 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 230 { 231 return ad->supported_ops & (1 << op_type); 232 } 233 234 static inline bool 235 flags_match(uint32_t flags_req, uint32_t flags_present) 236 { 237 return (flags_req & flags_present) == flags_req; 238 } 239 240 static void 241 clear_soft_out_cap(uint32_t *op_flags) 242 { 243 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 244 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 245 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 246 } 247 248 /* This API is to convert all the test vector op data entries 249 * to big endian format. It is used when the device supports 250 * the input in the big endian format. 251 */ 252 static inline void 253 convert_op_data_to_be(void) 254 { 255 struct op_data_entries *op; 256 enum op_data_type type; 257 uint8_t nb_segs, *rem_data, temp; 258 uint32_t *data, len; 259 int complete, rem, i, j; 260 261 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 262 nb_segs = test_vector.entries[type].nb_segments; 263 op = &test_vector.entries[type]; 264 265 /* Invert byte endianness for all the segments */ 266 for (i = 0; i < nb_segs; ++i) { 267 len = op->segments[i].length; 268 data = op->segments[i].addr; 269 270 /* Swap complete u32 bytes */ 271 complete = len / 4; 272 for (j = 0; j < complete; j++) 273 data[j] = rte_bswap32(data[j]); 274 275 /* Swap any remaining bytes */ 276 rem = len % 4; 277 rem_data = (uint8_t *)&data[j]; 278 for (j = 0; j < rem/2; j++) { 279 temp = rem_data[j]; 280 rem_data[j] = rem_data[rem - j - 1]; 281 rem_data[rem - j - 1] = temp; 282 } 283 } 284 } 285 } 286 287 static int 288 check_dev_cap(const struct rte_bbdev_info *dev_info) 289 { 290 unsigned int i; 291 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 292 nb_harq_inputs, nb_harq_outputs; 293 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 294 uint8_t dev_data_endianness = dev_info->drv.data_endianness; 295 296 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 297 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 298 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 299 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 300 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 301 302 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 303 if (op_cap->type != test_vector.op_type) 304 continue; 305 306 if (dev_data_endianness == RTE_BIG_ENDIAN) 307 convert_op_data_to_be(); 308 309 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 310 const struct rte_bbdev_op_cap_turbo_dec *cap = 311 &op_cap->cap.turbo_dec; 312 /* Ignore lack of soft output capability, just skip 313 * checking if soft output is valid. 314 */ 315 if ((test_vector.turbo_dec.op_flags & 316 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 317 !(cap->capability_flags & 318 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 319 printf( 320 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 321 dev_info->dev_name); 322 clear_soft_out_cap( 323 &test_vector.turbo_dec.op_flags); 324 } 325 326 if (!flags_match(test_vector.turbo_dec.op_flags, 327 cap->capability_flags)) 328 return TEST_FAILED; 329 if (nb_inputs > cap->num_buffers_src) { 330 printf("Too many inputs defined: %u, max: %u\n", 331 nb_inputs, cap->num_buffers_src); 332 return TEST_FAILED; 333 } 334 if (nb_soft_outputs > cap->num_buffers_soft_out && 335 (test_vector.turbo_dec.op_flags & 336 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 337 printf( 338 "Too many soft outputs defined: %u, max: %u\n", 339 nb_soft_outputs, 340 cap->num_buffers_soft_out); 341 return TEST_FAILED; 342 } 343 if (nb_hard_outputs > cap->num_buffers_hard_out) { 344 printf( 345 "Too many hard outputs defined: %u, max: %u\n", 346 nb_hard_outputs, 347 cap->num_buffers_hard_out); 348 return TEST_FAILED; 349 } 350 if (intr_enabled && !(cap->capability_flags & 351 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 352 printf( 353 "Dequeue interrupts are not supported!\n"); 354 return TEST_FAILED; 355 } 356 357 return TEST_SUCCESS; 358 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 359 const struct rte_bbdev_op_cap_turbo_enc *cap = 360 &op_cap->cap.turbo_enc; 361 362 if (!flags_match(test_vector.turbo_enc.op_flags, 363 cap->capability_flags)) 364 return TEST_FAILED; 365 if (nb_inputs > cap->num_buffers_src) { 366 printf("Too many inputs defined: %u, max: %u\n", 367 nb_inputs, cap->num_buffers_src); 368 return TEST_FAILED; 369 } 370 if (nb_hard_outputs > cap->num_buffers_dst) { 371 printf( 372 "Too many hard outputs defined: %u, max: %u\n", 373 nb_hard_outputs, cap->num_buffers_dst); 374 return TEST_FAILED; 375 } 376 if (intr_enabled && !(cap->capability_flags & 377 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 378 printf( 379 "Dequeue interrupts are not supported!\n"); 380 return TEST_FAILED; 381 } 382 383 return TEST_SUCCESS; 384 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 385 const struct rte_bbdev_op_cap_ldpc_enc *cap = 386 &op_cap->cap.ldpc_enc; 387 388 if (!flags_match(test_vector.ldpc_enc.op_flags, 389 cap->capability_flags)){ 390 printf("Flag Mismatch\n"); 391 return TEST_FAILED; 392 } 393 if (nb_inputs > cap->num_buffers_src) { 394 printf("Too many inputs defined: %u, max: %u\n", 395 nb_inputs, cap->num_buffers_src); 396 return TEST_FAILED; 397 } 398 if (nb_hard_outputs > cap->num_buffers_dst) { 399 printf( 400 "Too many hard outputs defined: %u, max: %u\n", 401 nb_hard_outputs, cap->num_buffers_dst); 402 return TEST_FAILED; 403 } 404 if (intr_enabled && !(cap->capability_flags & 405 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 406 printf( 407 "Dequeue interrupts are not supported!\n"); 408 return TEST_FAILED; 409 } 410 411 return TEST_SUCCESS; 412 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 413 const struct rte_bbdev_op_cap_ldpc_dec *cap = 414 &op_cap->cap.ldpc_dec; 415 416 if (!flags_match(test_vector.ldpc_dec.op_flags, 417 cap->capability_flags)){ 418 printf("Flag Mismatch\n"); 419 return TEST_FAILED; 420 } 421 if (nb_inputs > cap->num_buffers_src) { 422 printf("Too many inputs defined: %u, max: %u\n", 423 nb_inputs, cap->num_buffers_src); 424 return TEST_FAILED; 425 } 426 if (nb_hard_outputs > cap->num_buffers_hard_out) { 427 printf( 428 "Too many hard outputs defined: %u, max: %u\n", 429 nb_hard_outputs, 430 cap->num_buffers_hard_out); 431 return TEST_FAILED; 432 } 433 if (nb_harq_inputs > cap->num_buffers_hard_out) { 434 printf( 435 "Too many HARQ inputs defined: %u, max: %u\n", 436 nb_harq_inputs, 437 cap->num_buffers_hard_out); 438 return TEST_FAILED; 439 } 440 if (nb_harq_outputs > cap->num_buffers_hard_out) { 441 printf( 442 "Too many HARQ outputs defined: %u, max: %u\n", 443 nb_harq_outputs, 444 cap->num_buffers_hard_out); 445 return TEST_FAILED; 446 } 447 if (intr_enabled && !(cap->capability_flags & 448 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 449 printf( 450 "Dequeue interrupts are not supported!\n"); 451 return TEST_FAILED; 452 } 453 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 454 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 455 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 456 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 457 ))) { 458 printf("Skip loop-back with interrupt\n"); 459 return TEST_FAILED; 460 } 461 return TEST_SUCCESS; 462 } else if (op_cap->type == RTE_BBDEV_OP_FFT) { 463 const struct rte_bbdev_op_cap_fft *cap = &op_cap->cap.fft; 464 465 if (!flags_match(test_vector.fft.op_flags, cap->capability_flags)) { 466 printf("Flag Mismatch\n"); 467 return TEST_FAILED; 468 } 469 if (nb_inputs > cap->num_buffers_src) { 470 printf("Too many inputs defined: %u, max: %u\n", 471 nb_inputs, cap->num_buffers_src); 472 return TEST_FAILED; 473 } 474 return TEST_SUCCESS; 475 } 476 } 477 478 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 479 return TEST_SUCCESS; /* Special case for NULL device */ 480 481 return TEST_FAILED; 482 } 483 484 /* calculates optimal mempool size not smaller than the val */ 485 static unsigned int 486 optimal_mempool_size(unsigned int val) 487 { 488 return rte_align32pow2(val + 1) - 1; 489 } 490 491 /* allocates mbuf mempool for inputs and outputs */ 492 static struct rte_mempool * 493 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 494 int socket_id, unsigned int mbuf_pool_size, 495 const char *op_type_str) 496 { 497 unsigned int i; 498 uint32_t max_seg_sz = 0; 499 char pool_name[RTE_MEMPOOL_NAMESIZE]; 500 501 /* find max input segment size */ 502 for (i = 0; i < entries->nb_segments; ++i) 503 if (entries->segments[i].length > max_seg_sz) 504 max_seg_sz = entries->segments[i].length; 505 506 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 507 dev_id); 508 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 509 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 510 + FILLER_HEADROOM, 511 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 512 } 513 514 static int 515 create_mempools(struct active_device *ad, int socket_id, 516 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 517 { 518 struct rte_mempool *mp; 519 unsigned int ops_pool_size, mbuf_pool_size = 0; 520 char pool_name[RTE_MEMPOOL_NAMESIZE]; 521 const char *op_type_str; 522 enum rte_bbdev_op_type op_type = org_op_type; 523 524 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 525 struct op_data_entries *hard_out = 526 &test_vector.entries[DATA_HARD_OUTPUT]; 527 struct op_data_entries *soft_out = 528 &test_vector.entries[DATA_SOFT_OUTPUT]; 529 struct op_data_entries *harq_in = 530 &test_vector.entries[DATA_HARQ_INPUT]; 531 struct op_data_entries *harq_out = 532 &test_vector.entries[DATA_HARQ_OUTPUT]; 533 534 /* allocate ops mempool */ 535 ops_pool_size = optimal_mempool_size(RTE_MAX( 536 /* Ops used plus 1 reference op */ 537 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 538 /* Minimal cache size plus 1 reference op */ 539 (unsigned int)(1.5 * rte_lcore_count() * 540 OPS_CACHE_SIZE + 1)), 541 OPS_POOL_SIZE_MIN)); 542 543 if (org_op_type == RTE_BBDEV_OP_NONE) 544 op_type = RTE_BBDEV_OP_TURBO_ENC; 545 546 op_type_str = rte_bbdev_op_type_str(op_type); 547 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 548 549 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 550 ad->dev_id); 551 mp = rte_bbdev_op_pool_create(pool_name, op_type, 552 ops_pool_size, OPS_CACHE_SIZE, socket_id); 553 TEST_ASSERT_NOT_NULL(mp, 554 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 555 ops_pool_size, 556 ad->dev_id, 557 socket_id); 558 ad->ops_mempool = mp; 559 560 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 561 if (org_op_type == RTE_BBDEV_OP_NONE) 562 return TEST_SUCCESS; 563 564 /* Inputs */ 565 if (in->nb_segments > 0) { 566 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 567 in->nb_segments); 568 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 569 mbuf_pool_size, "in"); 570 TEST_ASSERT_NOT_NULL(mp, 571 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 572 mbuf_pool_size, 573 ad->dev_id, 574 socket_id); 575 ad->in_mbuf_pool = mp; 576 } 577 578 /* Hard outputs */ 579 if (hard_out->nb_segments > 0) { 580 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 581 hard_out->nb_segments); 582 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 583 mbuf_pool_size, 584 "hard_out"); 585 TEST_ASSERT_NOT_NULL(mp, 586 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 587 mbuf_pool_size, 588 ad->dev_id, 589 socket_id); 590 ad->hard_out_mbuf_pool = mp; 591 } 592 593 /* Soft outputs */ 594 if (soft_out->nb_segments > 0) { 595 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 596 soft_out->nb_segments); 597 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 598 mbuf_pool_size, 599 "soft_out"); 600 TEST_ASSERT_NOT_NULL(mp, 601 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 602 mbuf_pool_size, 603 ad->dev_id, 604 socket_id); 605 ad->soft_out_mbuf_pool = mp; 606 } 607 608 /* HARQ inputs */ 609 if (harq_in->nb_segments > 0) { 610 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 611 harq_in->nb_segments); 612 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 613 mbuf_pool_size, 614 "harq_in"); 615 TEST_ASSERT_NOT_NULL(mp, 616 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 617 mbuf_pool_size, 618 ad->dev_id, 619 socket_id); 620 ad->harq_in_mbuf_pool = mp; 621 } 622 623 /* HARQ outputs */ 624 if (harq_out->nb_segments > 0) { 625 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 626 harq_out->nb_segments); 627 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 628 mbuf_pool_size, 629 "harq_out"); 630 TEST_ASSERT_NOT_NULL(mp, 631 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 632 mbuf_pool_size, 633 ad->dev_id, 634 socket_id); 635 ad->harq_out_mbuf_pool = mp; 636 } 637 638 return TEST_SUCCESS; 639 } 640 641 static int 642 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 643 struct test_bbdev_vector *vector) 644 { 645 int ret; 646 unsigned int queue_id; 647 struct rte_bbdev_queue_conf qconf; 648 struct active_device *ad = &active_devs[nb_active_devs]; 649 unsigned int nb_queues; 650 enum rte_bbdev_op_type op_type = vector->op_type; 651 652 /* Configure fpga lte fec with PF & VF values 653 * if '-i' flag is set and using fpga device 654 */ 655 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 656 if ((get_init_device() == true) && 657 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 658 struct rte_fpga_lte_fec_conf conf; 659 unsigned int i; 660 661 printf("Configure FPGA LTE FEC Driver %s with default values\n", 662 info->drv.driver_name); 663 664 /* clear default configuration before initialization */ 665 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 666 667 /* Set PF mode : 668 * true if PF is used for data plane 669 * false for VFs 670 */ 671 conf.pf_mode_en = true; 672 673 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 674 /* Number of UL queues per VF (fpga supports 8 VFs) */ 675 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 676 /* Number of DL queues per VF (fpga supports 8 VFs) */ 677 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 678 } 679 680 /* UL bandwidth. Needed for schedule algorithm */ 681 conf.ul_bandwidth = UL_4G_BANDWIDTH; 682 /* DL bandwidth */ 683 conf.dl_bandwidth = DL_4G_BANDWIDTH; 684 685 /* UL & DL load Balance Factor to 64 */ 686 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 687 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 688 689 /**< FLR timeout value */ 690 conf.flr_time_out = FLR_4G_TIMEOUT; 691 692 /* setup FPGA PF with configuration information */ 693 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf); 694 TEST_ASSERT_SUCCESS(ret, 695 "Failed to configure 4G FPGA PF for bbdev %s", 696 info->dev_name); 697 } 698 #endif 699 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 700 if ((get_init_device() == true) && 701 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) { 702 struct rte_fpga_5gnr_fec_conf conf; 703 unsigned int i; 704 705 printf("Configure FPGA 5GNR FEC Driver %s with default values\n", 706 info->drv.driver_name); 707 708 /* clear default configuration before initialization */ 709 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf)); 710 711 /* Set PF mode : 712 * true if PF is used for data plane 713 * false for VFs 714 */ 715 conf.pf_mode_en = true; 716 717 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { 718 /* Number of UL queues per VF (fpga supports 8 VFs) */ 719 conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE; 720 /* Number of DL queues per VF (fpga supports 8 VFs) */ 721 conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE; 722 } 723 724 /* UL bandwidth. Needed for schedule algorithm */ 725 conf.ul_bandwidth = UL_5G_BANDWIDTH; 726 /* DL bandwidth */ 727 conf.dl_bandwidth = DL_5G_BANDWIDTH; 728 729 /* UL & DL load Balance Factor to 64 */ 730 conf.ul_load_balance = UL_5G_LOAD_BALANCE; 731 conf.dl_load_balance = DL_5G_LOAD_BALANCE; 732 733 /* setup FPGA PF with configuration information */ 734 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf); 735 TEST_ASSERT_SUCCESS(ret, 736 "Failed to configure 5G FPGA PF for bbdev %s", 737 info->dev_name); 738 } 739 #endif 740 #ifdef RTE_BASEBAND_ACC 741 if ((get_init_device() == true) && 742 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) { 743 struct rte_acc_conf conf; 744 unsigned int i; 745 746 printf("Configure ACC100/ACC101 FEC Driver %s with default values\n", 747 info->drv.driver_name); 748 749 /* clear default configuration before initialization */ 750 memset(&conf, 0, sizeof(struct rte_acc_conf)); 751 752 /* Always set in PF mode for built-in configuration */ 753 conf.pf_mode_en = true; 754 for (i = 0; i < RTE_ACC_NUM_VFS; ++i) { 755 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 756 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 757 conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR; 758 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 759 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 760 conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR; 761 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 762 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 763 conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR; 764 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 765 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 766 conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR; 767 } 768 769 conf.input_pos_llr_1_bit = true; 770 conf.output_pos_llr_1_bit = true; 771 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 772 773 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 774 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 775 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 776 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 777 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 778 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 779 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 780 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 781 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 782 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 783 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 784 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 785 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 786 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 787 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 788 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 789 790 /* setup PF with configuration information */ 791 ret = rte_acc_configure(info->dev_name, &conf); 792 TEST_ASSERT_SUCCESS(ret, 793 "Failed to configure ACC100 PF for bbdev %s", 794 info->dev_name); 795 } 796 if ((get_init_device() == true) && 797 (!strcmp(info->drv.driver_name, ACC200PF_DRIVER_NAME))) { 798 struct rte_acc_conf conf; 799 unsigned int i; 800 801 printf("Configure ACC200 FEC Driver %s with default values\n", 802 info->drv.driver_name); 803 804 /* clear default configuration before initialization */ 805 memset(&conf, 0, sizeof(struct rte_acc_conf)); 806 807 /* Always set in PF mode for built-in configuration */ 808 conf.pf_mode_en = true; 809 for (i = 0; i < RTE_ACC_NUM_VFS; ++i) { 810 conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 811 conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 812 conf.arb_dl_4g[i].round_robin_weight = ACC200_QMGR_RR; 813 conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 814 conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 815 conf.arb_ul_4g[i].round_robin_weight = ACC200_QMGR_RR; 816 conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 817 conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 818 conf.arb_dl_5g[i].round_robin_weight = ACC200_QMGR_RR; 819 conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 820 conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 821 conf.arb_ul_5g[i].round_robin_weight = ACC200_QMGR_RR; 822 conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR; 823 conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR; 824 conf.arb_fft[i].round_robin_weight = ACC200_QMGR_RR; 825 } 826 827 conf.input_pos_llr_1_bit = true; 828 conf.output_pos_llr_1_bit = true; 829 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 830 831 conf.q_ul_4g.num_qgroups = ACC200_QMGR_NUM_QGS; 832 conf.q_ul_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 833 conf.q_ul_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 834 conf.q_ul_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 835 conf.q_dl_4g.num_qgroups = ACC200_QMGR_NUM_QGS; 836 conf.q_dl_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 837 conf.q_dl_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 838 conf.q_dl_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 839 conf.q_ul_5g.num_qgroups = ACC200_QMGR_NUM_QGS; 840 conf.q_ul_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 841 conf.q_ul_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 842 conf.q_ul_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 843 conf.q_dl_5g.num_qgroups = ACC200_QMGR_NUM_QGS; 844 conf.q_dl_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 845 conf.q_dl_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 846 conf.q_dl_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 847 conf.q_fft.num_qgroups = ACC200_QMGR_NUM_QGS; 848 conf.q_fft.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 849 conf.q_fft.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 850 conf.q_fft.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 851 852 /* setup PF with configuration information */ 853 ret = rte_acc_configure(info->dev_name, &conf); 854 TEST_ASSERT_SUCCESS(ret, 855 "Failed to configure ACC200 PF for bbdev %s", 856 info->dev_name); 857 } 858 #endif 859 /* Let's refresh this now this is configured */ 860 rte_bbdev_info_get(dev_id, info); 861 if (info->drv.device_status == RTE_BBDEV_DEV_FATAL_ERR) 862 printf("Device Status %s\n", rte_bbdev_device_status_str(info->drv.device_status)); 863 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 864 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 865 866 /* setup device */ 867 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 868 if (ret < 0) { 869 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 870 dev_id, nb_queues, info->socket_id, ret); 871 return TEST_FAILED; 872 } 873 874 /* configure interrupts if needed */ 875 if (intr_enabled) { 876 ret = rte_bbdev_intr_enable(dev_id); 877 if (ret < 0) { 878 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 879 ret); 880 return TEST_FAILED; 881 } 882 } 883 884 /* setup device queues */ 885 qconf.socket = info->socket_id; 886 qconf.queue_size = info->drv.default_queue_conf.queue_size; 887 qconf.priority = 0; 888 qconf.deferred_start = 0; 889 qconf.op_type = op_type; 890 891 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 892 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 893 if (ret != 0) { 894 printf( 895 "Allocated all queues (id=%u) at prio%u on dev%u\n", 896 queue_id, qconf.priority, dev_id); 897 qconf.priority++; 898 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, &qconf); 899 } 900 if (ret != 0) { 901 printf("All queues on dev %u allocated: %u\n", dev_id, queue_id); 902 break; 903 } 904 ret = rte_bbdev_queue_start(ad->dev_id, queue_id); 905 if (ret != 0) { 906 printf("Failed to start queue on dev %u q_id: %u\n", dev_id, queue_id); 907 break; 908 } 909 ad->queue_ids[queue_id] = queue_id; 910 } 911 TEST_ASSERT(queue_id != 0, 912 "ERROR Failed to configure any queues on dev %u\n" 913 "\tthe device may not support the related operation capability\n" 914 "\tor the device may not have been configured yet", dev_id); 915 ad->nb_queues = queue_id; 916 917 set_avail_op(ad, op_type); 918 919 return TEST_SUCCESS; 920 } 921 922 static int 923 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 924 struct test_bbdev_vector *vector) 925 { 926 int ret; 927 928 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 929 active_devs[nb_active_devs].dev_id = dev_id; 930 931 ret = add_bbdev_dev(dev_id, info, vector); 932 if (ret == TEST_SUCCESS) 933 ++nb_active_devs; 934 return ret; 935 } 936 937 static uint8_t 938 populate_active_devices(void) 939 { 940 int ret; 941 uint8_t dev_id; 942 uint8_t nb_devs_added = 0; 943 struct rte_bbdev_info info; 944 945 RTE_BBDEV_FOREACH(dev_id) { 946 rte_bbdev_info_get(dev_id, &info); 947 948 if (check_dev_cap(&info)) { 949 printf( 950 "Device %d (%s) does not support specified capabilities\n", 951 dev_id, info.dev_name); 952 continue; 953 } 954 955 ret = add_active_device(dev_id, &info, &test_vector); 956 if (ret != 0) { 957 printf("Adding active bbdev %s skipped\n", 958 info.dev_name); 959 continue; 960 } 961 nb_devs_added++; 962 } 963 964 return nb_devs_added; 965 } 966 967 static int 968 read_test_vector(void) 969 { 970 int ret; 971 972 memset(&test_vector, 0, sizeof(test_vector)); 973 printf("Test vector file = %s\n", get_vector_filename()); 974 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 975 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 976 get_vector_filename()); 977 978 return TEST_SUCCESS; 979 } 980 981 static int 982 testsuite_setup(void) 983 { 984 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 985 986 if (populate_active_devices() == 0) { 987 printf("No suitable devices found!\n"); 988 return TEST_SKIPPED; 989 } 990 991 return TEST_SUCCESS; 992 } 993 994 static int 995 interrupt_testsuite_setup(void) 996 { 997 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 998 999 /* Enable interrupts */ 1000 intr_enabled = true; 1001 1002 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 1003 if (populate_active_devices() == 0 || 1004 test_vector.op_type == RTE_BBDEV_OP_NONE) { 1005 intr_enabled = false; 1006 printf("No suitable devices found!\n"); 1007 return TEST_SKIPPED; 1008 } 1009 1010 return TEST_SUCCESS; 1011 } 1012 1013 static void 1014 testsuite_teardown(void) 1015 { 1016 uint8_t dev_id; 1017 1018 /* Unconfigure devices */ 1019 RTE_BBDEV_FOREACH(dev_id) 1020 rte_bbdev_close(dev_id); 1021 1022 /* Clear active devices structs. */ 1023 memset(active_devs, 0, sizeof(active_devs)); 1024 nb_active_devs = 0; 1025 1026 /* Disable interrupts */ 1027 intr_enabled = false; 1028 } 1029 1030 static int 1031 ut_setup(void) 1032 { 1033 uint8_t i, dev_id; 1034 1035 for (i = 0; i < nb_active_devs; i++) { 1036 dev_id = active_devs[i].dev_id; 1037 /* reset bbdev stats */ 1038 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 1039 "Failed to reset stats of bbdev %u", dev_id); 1040 /* start the device */ 1041 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 1042 "Failed to start bbdev %u", dev_id); 1043 } 1044 1045 return TEST_SUCCESS; 1046 } 1047 1048 static void 1049 ut_teardown(void) 1050 { 1051 uint8_t i, dev_id; 1052 struct rte_bbdev_stats stats; 1053 1054 for (i = 0; i < nb_active_devs; i++) { 1055 dev_id = active_devs[i].dev_id; 1056 /* read stats and print */ 1057 rte_bbdev_stats_get(dev_id, &stats); 1058 /* Stop the device */ 1059 rte_bbdev_stop(dev_id); 1060 } 1061 } 1062 1063 static int 1064 init_op_data_objs(struct rte_bbdev_op_data *bufs, 1065 struct op_data_entries *ref_entries, 1066 struct rte_mempool *mbuf_pool, const uint16_t n, 1067 enum op_data_type op_type, uint16_t min_alignment) 1068 { 1069 int ret; 1070 unsigned int i, j; 1071 bool large_input = false; 1072 1073 for (i = 0; i < n; ++i) { 1074 char *data; 1075 struct op_data_buf *seg = &ref_entries->segments[0]; 1076 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 1077 TEST_ASSERT_NOT_NULL(m_head, 1078 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1079 op_type, n * ref_entries->nb_segments, 1080 mbuf_pool->size); 1081 1082 if ((seg->length + RTE_PKTMBUF_HEADROOM) > RTE_BBDEV_LDPC_E_MAX_MBUF) { 1083 /* 1084 * Special case when DPDK mbuf cannot handle 1085 * the required input size 1086 */ 1087 large_input = true; 1088 } 1089 bufs[i].data = m_head; 1090 bufs[i].offset = 0; 1091 bufs[i].length = 0; 1092 1093 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 1094 if (large_input) { 1095 /* Allocate a fake overused mbuf */ 1096 data = rte_malloc(NULL, seg->length, 0); 1097 TEST_ASSERT_NOT_NULL(data, 1098 "rte malloc failed with %u bytes", 1099 seg->length); 1100 memcpy(data, seg->addr, seg->length); 1101 m_head->buf_addr = data; 1102 rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data)); 1103 m_head->data_off = 0; 1104 m_head->data_len = seg->length; 1105 } else { 1106 data = rte_pktmbuf_append(m_head, seg->length); 1107 TEST_ASSERT_NOT_NULL(data, 1108 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1109 seg->length, op_type); 1110 1111 TEST_ASSERT(data == RTE_PTR_ALIGN( 1112 data, min_alignment), 1113 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1114 data, min_alignment); 1115 rte_memcpy(data, seg->addr, seg->length); 1116 } 1117 1118 bufs[i].length += seg->length; 1119 1120 for (j = 1; j < ref_entries->nb_segments; ++j) { 1121 struct rte_mbuf *m_tail = 1122 rte_pktmbuf_alloc(mbuf_pool); 1123 TEST_ASSERT_NOT_NULL(m_tail, 1124 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1125 op_type, 1126 n * ref_entries->nb_segments, 1127 mbuf_pool->size); 1128 seg += 1; 1129 1130 data = rte_pktmbuf_append(m_tail, seg->length); 1131 TEST_ASSERT_NOT_NULL(data, 1132 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1133 seg->length, op_type); 1134 1135 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 1136 min_alignment), 1137 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1138 data, min_alignment); 1139 rte_memcpy(data, seg->addr, seg->length); 1140 bufs[i].length += seg->length; 1141 1142 ret = rte_pktmbuf_chain(m_head, m_tail); 1143 TEST_ASSERT_SUCCESS(ret, 1144 "Couldn't chain mbufs from %d data type mbuf pool", 1145 op_type); 1146 } 1147 } else { 1148 if (((op_type == DATA_HARD_OUTPUT) || (op_type == DATA_SOFT_OUTPUT)) 1149 && ((seg->length + RTE_PKTMBUF_HEADROOM) 1150 > RTE_BBDEV_LDPC_E_MAX_MBUF)) { 1151 /* Allocate a fake overused mbuf + margin */ 1152 data = rte_malloc(NULL, seg->length + 1024, 0); 1153 TEST_ASSERT_NOT_NULL(data, 1154 "rte malloc failed with %u bytes", 1155 seg->length + 1024); 1156 m_head->buf_addr = data; 1157 rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data)); 1158 m_head->data_off = 0; 1159 m_head->data_len = seg->length; 1160 } else { 1161 /* allocate chained-mbuf for output buffer */ 1162 for (j = 1; j < ref_entries->nb_segments; ++j) { 1163 struct rte_mbuf *m_tail = 1164 rte_pktmbuf_alloc(mbuf_pool); 1165 TEST_ASSERT_NOT_NULL(m_tail, 1166 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1167 op_type, 1168 n * ref_entries->nb_segments, 1169 mbuf_pool->size); 1170 1171 ret = rte_pktmbuf_chain(m_head, m_tail); 1172 TEST_ASSERT_SUCCESS(ret, 1173 "Couldn't chain mbufs from %d data type mbuf pool", 1174 op_type); 1175 } 1176 } 1177 bufs[i].length += seg->length; 1178 } 1179 } 1180 1181 return 0; 1182 } 1183 1184 static int 1185 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 1186 const int socket) 1187 { 1188 int i; 1189 1190 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 1191 if (*buffers == NULL) { 1192 printf("WARNING: Failed to allocate op_data on socket %d\n", 1193 socket); 1194 /* try to allocate memory on other detected sockets */ 1195 for (i = 0; i < socket; i++) { 1196 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 1197 if (*buffers != NULL) 1198 break; 1199 } 1200 } 1201 1202 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 1203 } 1204 1205 static void 1206 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 1207 const uint16_t n, const int8_t max_llr_modulus) 1208 { 1209 uint16_t i, byte_idx; 1210 1211 for (i = 0; i < n; ++i) { 1212 struct rte_mbuf *m = input_ops[i].data; 1213 while (m != NULL) { 1214 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1215 input_ops[i].offset); 1216 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1217 ++byte_idx) 1218 llr[byte_idx] = round((double)max_llr_modulus * 1219 llr[byte_idx] / INT8_MAX); 1220 1221 m = m->next; 1222 } 1223 } 1224 } 1225 1226 /* 1227 * We may have to insert filler bits 1228 * when they are required by the HARQ assumption 1229 */ 1230 static void 1231 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 1232 const uint16_t n, struct test_op_params *op_params) 1233 { 1234 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 1235 1236 if (input_ops == NULL) 1237 return; 1238 /* No need to add filler if not required by device */ 1239 if (!(ldpc_cap_flags & 1240 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 1241 return; 1242 /* No need to add filler for loopback operation */ 1243 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1244 return; 1245 1246 uint16_t i, j, parity_offset; 1247 for (i = 0; i < n; ++i) { 1248 struct rte_mbuf *m = input_ops[i].data; 1249 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1250 input_ops[i].offset); 1251 parity_offset = (dec.basegraph == 1 ? 20 : 8) 1252 * dec.z_c - dec.n_filler; 1253 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 1254 m->data_len = new_hin_size; 1255 input_ops[i].length = new_hin_size; 1256 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 1257 j--) 1258 llr[j] = llr[j - dec.n_filler]; 1259 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1260 for (j = 0; j < dec.n_filler; j++) 1261 llr[parity_offset + j] = llr_max_pre_scaling; 1262 } 1263 } 1264 1265 static void 1266 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 1267 const uint16_t n, const int8_t llr_size, 1268 const int8_t llr_decimals) 1269 { 1270 if (input_ops == NULL) 1271 return; 1272 1273 uint16_t i, byte_idx; 1274 1275 int16_t llr_max, llr_min, llr_tmp; 1276 llr_max = (1 << (llr_size - 1)) - 1; 1277 llr_min = -llr_max; 1278 for (i = 0; i < n; ++i) { 1279 struct rte_mbuf *m = input_ops[i].data; 1280 while (m != NULL) { 1281 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1282 input_ops[i].offset); 1283 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1284 ++byte_idx) { 1285 1286 llr_tmp = llr[byte_idx]; 1287 if (llr_decimals == 4) 1288 llr_tmp *= 8; 1289 else if (llr_decimals == 2) 1290 llr_tmp *= 2; 1291 else if (llr_decimals == 0) 1292 llr_tmp /= 2; 1293 llr_tmp = RTE_MIN(llr_max, 1294 RTE_MAX(llr_min, llr_tmp)); 1295 llr[byte_idx] = (int8_t) llr_tmp; 1296 } 1297 1298 m = m->next; 1299 } 1300 } 1301 } 1302 1303 1304 1305 static int 1306 fill_queue_buffers(struct test_op_params *op_params, 1307 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1308 struct rte_mempool *soft_out_mp, 1309 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1310 uint16_t queue_id, 1311 const struct rte_bbdev_op_cap *capabilities, 1312 uint16_t min_alignment, const int socket_id) 1313 { 1314 int ret; 1315 enum op_data_type type; 1316 const uint16_t n = op_params->num_to_process; 1317 1318 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1319 in_mp, 1320 soft_out_mp, 1321 hard_out_mp, 1322 harq_in_mp, 1323 harq_out_mp, 1324 }; 1325 1326 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1327 &op_params->q_bufs[socket_id][queue_id].inputs, 1328 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1329 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1330 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1331 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1332 }; 1333 1334 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1335 struct op_data_entries *ref_entries = 1336 &test_vector.entries[type]; 1337 if (ref_entries->nb_segments == 0) 1338 continue; 1339 1340 ret = allocate_buffers_on_socket(queue_ops[type], 1341 n * sizeof(struct rte_bbdev_op_data), 1342 socket_id); 1343 TEST_ASSERT_SUCCESS(ret, 1344 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1345 1346 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1347 mbuf_pools[type], n, type, min_alignment); 1348 TEST_ASSERT_SUCCESS(ret, 1349 "Couldn't init rte_bbdev_op_data structs"); 1350 } 1351 1352 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1353 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1354 capabilities->cap.turbo_dec.max_llr_modulus); 1355 1356 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1357 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1358 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1359 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1360 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1361 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1362 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1363 1364 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1365 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1366 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1367 if (!loopback && !llr_comp) 1368 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1369 ldpc_llr_size, ldpc_llr_decimals); 1370 if (!loopback && !harq_comp) 1371 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1372 ldpc_llr_size, ldpc_llr_decimals); 1373 if (!loopback) 1374 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1375 op_params); 1376 } 1377 1378 return 0; 1379 } 1380 1381 static void 1382 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1383 { 1384 unsigned int i, j; 1385 1386 rte_mempool_free(ad->ops_mempool); 1387 rte_mempool_free(ad->in_mbuf_pool); 1388 rte_mempool_free(ad->hard_out_mbuf_pool); 1389 rte_mempool_free(ad->soft_out_mbuf_pool); 1390 rte_mempool_free(ad->harq_in_mbuf_pool); 1391 rte_mempool_free(ad->harq_out_mbuf_pool); 1392 1393 for (i = 0; i < rte_lcore_count(); ++i) { 1394 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1395 rte_free(op_params->q_bufs[j][i].inputs); 1396 rte_free(op_params->q_bufs[j][i].hard_outputs); 1397 rte_free(op_params->q_bufs[j][i].soft_outputs); 1398 rte_free(op_params->q_bufs[j][i].harq_inputs); 1399 rte_free(op_params->q_bufs[j][i].harq_outputs); 1400 } 1401 } 1402 } 1403 1404 static void 1405 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1406 unsigned int start_idx, 1407 struct rte_bbdev_op_data *inputs, 1408 struct rte_bbdev_op_data *hard_outputs, 1409 struct rte_bbdev_op_data *soft_outputs, 1410 struct rte_bbdev_dec_op *ref_op) 1411 { 1412 unsigned int i; 1413 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1414 1415 for (i = 0; i < n; ++i) { 1416 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1417 ops[i]->turbo_dec.tb_params.ea = 1418 turbo_dec->tb_params.ea; 1419 ops[i]->turbo_dec.tb_params.eb = 1420 turbo_dec->tb_params.eb; 1421 ops[i]->turbo_dec.tb_params.k_pos = 1422 turbo_dec->tb_params.k_pos; 1423 ops[i]->turbo_dec.tb_params.k_neg = 1424 turbo_dec->tb_params.k_neg; 1425 ops[i]->turbo_dec.tb_params.c = 1426 turbo_dec->tb_params.c; 1427 ops[i]->turbo_dec.tb_params.c_neg = 1428 turbo_dec->tb_params.c_neg; 1429 ops[i]->turbo_dec.tb_params.cab = 1430 turbo_dec->tb_params.cab; 1431 ops[i]->turbo_dec.tb_params.r = 1432 turbo_dec->tb_params.r; 1433 } else { 1434 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1435 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1436 } 1437 1438 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1439 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1440 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1441 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1442 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1443 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1444 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1445 1446 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1447 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1448 if (soft_outputs != NULL) 1449 ops[i]->turbo_dec.soft_output = 1450 soft_outputs[start_idx + i]; 1451 } 1452 } 1453 1454 static void 1455 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1456 unsigned int start_idx, 1457 struct rte_bbdev_op_data *inputs, 1458 struct rte_bbdev_op_data *outputs, 1459 struct rte_bbdev_enc_op *ref_op) 1460 { 1461 unsigned int i; 1462 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1463 for (i = 0; i < n; ++i) { 1464 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1465 ops[i]->turbo_enc.tb_params.ea = 1466 turbo_enc->tb_params.ea; 1467 ops[i]->turbo_enc.tb_params.eb = 1468 turbo_enc->tb_params.eb; 1469 ops[i]->turbo_enc.tb_params.k_pos = 1470 turbo_enc->tb_params.k_pos; 1471 ops[i]->turbo_enc.tb_params.k_neg = 1472 turbo_enc->tb_params.k_neg; 1473 ops[i]->turbo_enc.tb_params.c = 1474 turbo_enc->tb_params.c; 1475 ops[i]->turbo_enc.tb_params.c_neg = 1476 turbo_enc->tb_params.c_neg; 1477 ops[i]->turbo_enc.tb_params.cab = 1478 turbo_enc->tb_params.cab; 1479 ops[i]->turbo_enc.tb_params.ncb_pos = 1480 turbo_enc->tb_params.ncb_pos; 1481 ops[i]->turbo_enc.tb_params.ncb_neg = 1482 turbo_enc->tb_params.ncb_neg; 1483 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1484 } else { 1485 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1486 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1487 ops[i]->turbo_enc.cb_params.ncb = 1488 turbo_enc->cb_params.ncb; 1489 } 1490 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1491 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1492 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1493 1494 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1495 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1496 } 1497 } 1498 1499 1500 /* Returns a random number drawn from a normal distribution 1501 * with mean of 0 and variance of 1 1502 * Marsaglia algorithm 1503 */ 1504 static double 1505 randn(int n) 1506 { 1507 double S, Z, U1, U2, u, v, fac; 1508 1509 do { 1510 U1 = (double)rand() / RAND_MAX; 1511 U2 = (double)rand() / RAND_MAX; 1512 u = 2. * U1 - 1.; 1513 v = 2. * U2 - 1.; 1514 S = u * u + v * v; 1515 } while (S >= 1 || S == 0); 1516 fac = sqrt(-2. * log(S) / S); 1517 Z = (n % 2) ? u * fac : v * fac; 1518 return Z; 1519 } 1520 1521 static inline double 1522 maxstar(double A, double B) 1523 { 1524 if (fabs(A - B) > 5) 1525 return RTE_MAX(A, B); 1526 else 1527 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1528 } 1529 1530 /* 1531 * Generate Qm LLRS for Qm==8 1532 * Modulation, AWGN and LLR estimation from max log development 1533 */ 1534 static void 1535 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1536 { 1537 int qm = 8; 1538 int qam = 256; 1539 int m, k; 1540 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1541 /* 5.1.4 of TS38.211 */ 1542 const double symbols_I[256] = { 1543 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1544 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1545 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1546 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1547 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1548 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1549 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1550 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1551 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1552 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1553 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1554 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1555 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1556 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1557 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1558 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1559 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1560 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1561 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1562 -13, -13, -15, -15, -13, -13, -15, -15}; 1563 const double symbols_Q[256] = { 1564 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1565 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1566 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1567 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1568 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1569 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1570 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1571 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1572 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1573 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1574 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1575 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1576 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1577 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1578 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1579 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1580 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1581 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1582 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1583 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1584 /* Average constellation point energy */ 1585 N0 *= 170.0; 1586 for (k = 0; k < qm; k++) 1587 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1588 /* 5.1.4 of TS38.211 */ 1589 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1590 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1591 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1592 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1593 /* AWGN channel */ 1594 I += sqrt(N0 / 2) * randn(0); 1595 Q += sqrt(N0 / 2) * randn(1); 1596 /* 1597 * Calculate the log of the probability that each of 1598 * the constellation points was transmitted 1599 */ 1600 for (m = 0; m < qam; m++) 1601 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1602 + pow(Q - symbols_Q[m], 2.0)) / N0; 1603 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1604 for (k = 0; k < qm; k++) { 1605 p0 = -999999; 1606 p1 = -999999; 1607 /* For each constellation point */ 1608 for (m = 0; m < qam; m++) { 1609 if ((m >> (qm - k - 1)) & 1) 1610 p1 = maxstar(p1, log_syml_prob[m]); 1611 else 1612 p0 = maxstar(p0, log_syml_prob[m]); 1613 } 1614 /* Calculate the LLR */ 1615 llr_ = p0 - p1; 1616 llr_ *= (1 << ldpc_llr_decimals); 1617 llr_ = round(llr_); 1618 if (llr_ > llr_max) 1619 llr_ = llr_max; 1620 if (llr_ < -llr_max) 1621 llr_ = -llr_max; 1622 llrs[qm * i + k] = (int8_t) llr_; 1623 } 1624 } 1625 1626 1627 /* 1628 * Generate Qm LLRS for Qm==6 1629 * Modulation, AWGN and LLR estimation from max log development 1630 */ 1631 static void 1632 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1633 { 1634 int qm = 6; 1635 int qam = 64; 1636 int m, k; 1637 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1638 /* 5.1.4 of TS38.211 */ 1639 const double symbols_I[64] = { 1640 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1641 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1642 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1643 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1644 -5, -5, -7, -7, -5, -5, -7, -7}; 1645 const double symbols_Q[64] = { 1646 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1647 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1648 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1649 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1650 -3, -1, -3, -1, -5, -7, -5, -7}; 1651 /* Average constellation point energy */ 1652 N0 *= 42.0; 1653 for (k = 0; k < qm; k++) 1654 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1655 /* 5.1.4 of TS38.211 */ 1656 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1657 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1658 /* AWGN channel */ 1659 I += sqrt(N0 / 2) * randn(0); 1660 Q += sqrt(N0 / 2) * randn(1); 1661 /* 1662 * Calculate the log of the probability that each of 1663 * the constellation points was transmitted 1664 */ 1665 for (m = 0; m < qam; m++) 1666 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1667 + pow(Q - symbols_Q[m], 2.0)) / N0; 1668 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1669 for (k = 0; k < qm; k++) { 1670 p0 = -999999; 1671 p1 = -999999; 1672 /* For each constellation point */ 1673 for (m = 0; m < qam; m++) { 1674 if ((m >> (qm - k - 1)) & 1) 1675 p1 = maxstar(p1, log_syml_prob[m]); 1676 else 1677 p0 = maxstar(p0, log_syml_prob[m]); 1678 } 1679 /* Calculate the LLR */ 1680 llr_ = p0 - p1; 1681 llr_ *= (1 << ldpc_llr_decimals); 1682 llr_ = round(llr_); 1683 if (llr_ > llr_max) 1684 llr_ = llr_max; 1685 if (llr_ < -llr_max) 1686 llr_ = -llr_max; 1687 llrs[qm * i + k] = (int8_t) llr_; 1688 } 1689 } 1690 1691 /* 1692 * Generate Qm LLRS for Qm==4 1693 * Modulation, AWGN and LLR estimation from max log development 1694 */ 1695 static void 1696 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1697 { 1698 int qm = 4; 1699 int qam = 16; 1700 int m, k; 1701 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1702 /* 5.1.4 of TS38.211 */ 1703 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1704 -1, -1, -3, -3, -1, -1, -3, -3}; 1705 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1706 1, 3, 1, 3, -1, -3, -1, -3}; 1707 /* Average constellation point energy */ 1708 N0 *= 10.0; 1709 for (k = 0; k < qm; k++) 1710 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1711 /* 5.1.4 of TS38.211 */ 1712 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1713 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1714 /* AWGN channel */ 1715 I += sqrt(N0 / 2) * randn(0); 1716 Q += sqrt(N0 / 2) * randn(1); 1717 /* 1718 * Calculate the log of the probability that each of 1719 * the constellation points was transmitted 1720 */ 1721 for (m = 0; m < qam; m++) 1722 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1723 + pow(Q - symbols_Q[m], 2.0)) / N0; 1724 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1725 for (k = 0; k < qm; k++) { 1726 p0 = -999999; 1727 p1 = -999999; 1728 /* For each constellation point */ 1729 for (m = 0; m < qam; m++) { 1730 if ((m >> (qm - k - 1)) & 1) 1731 p1 = maxstar(p1, log_syml_prob[m]); 1732 else 1733 p0 = maxstar(p0, log_syml_prob[m]); 1734 } 1735 /* Calculate the LLR */ 1736 llr_ = p0 - p1; 1737 llr_ *= (1 << ldpc_llr_decimals); 1738 llr_ = round(llr_); 1739 if (llr_ > llr_max) 1740 llr_ = llr_max; 1741 if (llr_ < -llr_max) 1742 llr_ = -llr_max; 1743 llrs[qm * i + k] = (int8_t) llr_; 1744 } 1745 } 1746 1747 static void 1748 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1749 { 1750 double b, b1, n; 1751 double coeff = 2.0 * sqrt(N0); 1752 1753 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1754 if (llrs[j] < 8 && llrs[j] > -8) 1755 return; 1756 1757 /* Note don't change sign here */ 1758 n = randn(j % 2); 1759 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1760 + coeff * n) / N0; 1761 b = b1 * (1 << ldpc_llr_decimals); 1762 b = round(b); 1763 if (b > llr_max) 1764 b = llr_max; 1765 if (b < -llr_max) 1766 b = -llr_max; 1767 llrs[j] = (int8_t) b; 1768 } 1769 1770 /* Simple LLR generation assuming AWGN and QPSK */ 1771 static void 1772 gen_turbo_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1773 { 1774 double b, b1, n; 1775 double coeff = 2.0 * sqrt(N0); 1776 1777 /* Ignore in vectors null LLRs not to be saturated */ 1778 if (llrs[j] == 0) 1779 return; 1780 1781 /* Note don't change sign here */ 1782 n = randn(j % 2); 1783 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1784 + coeff * n) / N0; 1785 b = b1 * (1 << 4); 1786 b = round(b); 1787 if (b > llr_max) 1788 b = llr_max; 1789 if (b < -llr_max) 1790 b = -llr_max; 1791 llrs[j] = (int8_t) b; 1792 } 1793 1794 /* Generate LLR for a given SNR */ 1795 static void 1796 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1797 struct rte_bbdev_dec_op *ref_op) 1798 { 1799 struct rte_mbuf *m; 1800 uint16_t qm; 1801 uint32_t i, j, e, range; 1802 double N0, llr_max; 1803 1804 e = ref_op->ldpc_dec.cb_params.e; 1805 qm = ref_op->ldpc_dec.q_m; 1806 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1807 range = e / qm; 1808 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1809 1810 for (i = 0; i < n; ++i) { 1811 m = inputs[i].data; 1812 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1813 if (qm == 8) { 1814 for (j = 0; j < range; ++j) 1815 gen_qm8_llr(llrs, j, N0, llr_max); 1816 } else if (qm == 6) { 1817 for (j = 0; j < range; ++j) 1818 gen_qm6_llr(llrs, j, N0, llr_max); 1819 } else if (qm == 4) { 1820 for (j = 0; j < range; ++j) 1821 gen_qm4_llr(llrs, j, N0, llr_max); 1822 } else { 1823 for (j = 0; j < e; ++j) 1824 gen_qm2_llr(llrs, j, N0, llr_max); 1825 } 1826 } 1827 } 1828 1829 /* Generate LLR for turbo decoder for a given SNR */ 1830 static void 1831 generate_turbo_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1832 struct rte_bbdev_dec_op *ref_op) 1833 { 1834 struct rte_mbuf *m; 1835 uint32_t i, j, range; 1836 double N0, llr_max; 1837 1838 llr_max = 127; 1839 range = ref_op->turbo_dec.input.length; 1840 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1841 1842 for (i = 0; i < n; ++i) { 1843 m = inputs[i].data; 1844 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1845 for (j = 0; j < range; ++j) 1846 gen_turbo_llr(llrs, j, N0, llr_max); 1847 } 1848 } 1849 1850 static void 1851 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1852 unsigned int start_idx, 1853 struct rte_bbdev_op_data *inputs, 1854 struct rte_bbdev_op_data *hard_outputs, 1855 struct rte_bbdev_op_data *soft_outputs, 1856 struct rte_bbdev_op_data *harq_inputs, 1857 struct rte_bbdev_op_data *harq_outputs, 1858 struct rte_bbdev_dec_op *ref_op) 1859 { 1860 unsigned int i; 1861 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1862 1863 for (i = 0; i < n; ++i) { 1864 if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1865 ops[i]->ldpc_dec.tb_params.ea = 1866 ldpc_dec->tb_params.ea; 1867 ops[i]->ldpc_dec.tb_params.eb = 1868 ldpc_dec->tb_params.eb; 1869 ops[i]->ldpc_dec.tb_params.c = 1870 ldpc_dec->tb_params.c; 1871 ops[i]->ldpc_dec.tb_params.cab = 1872 ldpc_dec->tb_params.cab; 1873 ops[i]->ldpc_dec.tb_params.r = 1874 ldpc_dec->tb_params.r; 1875 } else { 1876 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1877 } 1878 1879 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1880 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1881 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1882 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1883 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1884 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1885 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1886 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1887 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1888 1889 if (hard_outputs != NULL) 1890 ops[i]->ldpc_dec.hard_output = 1891 hard_outputs[start_idx + i]; 1892 if (inputs != NULL) 1893 ops[i]->ldpc_dec.input = 1894 inputs[start_idx + i]; 1895 if (soft_outputs != NULL) 1896 ops[i]->ldpc_dec.soft_output = 1897 soft_outputs[start_idx + i]; 1898 if (harq_inputs != NULL) 1899 ops[i]->ldpc_dec.harq_combined_input = 1900 harq_inputs[start_idx + i]; 1901 if (harq_outputs != NULL) 1902 ops[i]->ldpc_dec.harq_combined_output = 1903 harq_outputs[start_idx + i]; 1904 } 1905 } 1906 1907 1908 static void 1909 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1910 unsigned int start_idx, 1911 struct rte_bbdev_op_data *inputs, 1912 struct rte_bbdev_op_data *outputs, 1913 struct rte_bbdev_enc_op *ref_op) 1914 { 1915 unsigned int i; 1916 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1917 for (i = 0; i < n; ++i) { 1918 if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1919 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1920 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1921 ops[i]->ldpc_enc.tb_params.cab = 1922 ldpc_enc->tb_params.cab; 1923 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1924 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1925 } else { 1926 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1927 } 1928 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1929 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1930 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1931 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1932 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1933 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1934 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1935 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1936 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1937 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1938 } 1939 } 1940 1941 static void 1942 copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n, 1943 unsigned int start_idx, struct rte_bbdev_op_data *inputs, 1944 struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *pwrouts, 1945 struct rte_bbdev_fft_op *ref_op) 1946 { 1947 unsigned int i, j; 1948 struct rte_bbdev_op_fft *fft = &ref_op->fft; 1949 for (i = 0; i < n; i++) { 1950 ops[i]->fft.input_sequence_size = fft->input_sequence_size; 1951 ops[i]->fft.input_leading_padding = fft->input_leading_padding; 1952 ops[i]->fft.output_sequence_size = fft->output_sequence_size; 1953 ops[i]->fft.output_leading_depadding = 1954 fft->output_leading_depadding; 1955 for (j = 0; j < RTE_BBDEV_MAX_CS_2; j++) 1956 ops[i]->fft.window_index[j] = fft->window_index[j]; 1957 ops[i]->fft.cs_bitmap = fft->cs_bitmap; 1958 ops[i]->fft.num_antennas_log2 = fft->num_antennas_log2; 1959 ops[i]->fft.idft_log2 = fft->idft_log2; 1960 ops[i]->fft.dft_log2 = fft->dft_log2; 1961 ops[i]->fft.cs_time_adjustment = fft->cs_time_adjustment; 1962 ops[i]->fft.idft_shift = fft->idft_shift; 1963 ops[i]->fft.dft_shift = fft->dft_shift; 1964 ops[i]->fft.ncs_reciprocal = fft->ncs_reciprocal; 1965 ops[i]->fft.power_shift = fft->power_shift; 1966 ops[i]->fft.fp16_exp_adjust = fft->fp16_exp_adjust; 1967 ops[i]->fft.base_output = outputs[start_idx + i]; 1968 ops[i]->fft.base_input = inputs[start_idx + i]; 1969 if (pwrouts != NULL) 1970 ops[i]->fft.power_meas_output = pwrouts[start_idx + i]; 1971 ops[i]->fft.op_flags = fft->op_flags; 1972 } 1973 } 1974 1975 static int 1976 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1977 unsigned int order_idx, const int expected_status) 1978 { 1979 int status = op->status; 1980 /* ignore parity mismatch false alarms for long iterations */ 1981 if (get_iter_max() >= 10) { 1982 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1983 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1984 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1985 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1986 } 1987 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1988 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1989 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1990 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1991 } 1992 } 1993 1994 TEST_ASSERT(status == expected_status, 1995 "op_status (%d) != expected_status (%d)", 1996 op->status, expected_status); 1997 1998 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1999 "Ordering error, expected %p, got %p", 2000 (void *)(uintptr_t)order_idx, op->opaque_data); 2001 2002 return TEST_SUCCESS; 2003 } 2004 2005 static int 2006 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 2007 unsigned int order_idx, const int expected_status) 2008 { 2009 TEST_ASSERT(op->status == expected_status, 2010 "op_status (%d) != expected_status (%d)", 2011 op->status, expected_status); 2012 2013 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 2014 "Ordering error, expected %p, got %p", 2015 (void *)(uintptr_t)order_idx, op->opaque_data); 2016 2017 return TEST_SUCCESS; 2018 } 2019 2020 static int 2021 check_fft_status_and_ordering(struct rte_bbdev_fft_op *op, 2022 unsigned int order_idx, const int expected_status) 2023 { 2024 TEST_ASSERT(op->status == expected_status, 2025 "op_status (%d) != expected_status (%d)", 2026 op->status, expected_status); 2027 2028 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 2029 "Ordering error, expected %p, got %p", 2030 (void *)(uintptr_t)order_idx, op->opaque_data); 2031 2032 return TEST_SUCCESS; 2033 } 2034 2035 static inline int 2036 validate_op_chain(struct rte_bbdev_op_data *op, 2037 struct op_data_entries *orig_op) 2038 { 2039 uint8_t i; 2040 struct rte_mbuf *m = op->data; 2041 uint8_t nb_dst_segments = orig_op->nb_segments; 2042 uint32_t total_data_size = 0; 2043 bool ignore_mbuf = false; /* ignore mbuf limitations */ 2044 2045 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2046 "Number of segments differ in original (%u) and filled (%u) op", 2047 nb_dst_segments, m->nb_segs); 2048 2049 /* Validate each mbuf segment length */ 2050 for (i = 0; i < nb_dst_segments; ++i) { 2051 /* Apply offset to the first mbuf segment */ 2052 uint16_t offset = (i == 0) ? op->offset : 0; 2053 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 2054 total_data_size += orig_op->segments[i].length; 2055 2056 if (orig_op->segments[i].length > RTE_BBDEV_LDPC_E_MAX_MBUF) 2057 ignore_mbuf = true; 2058 if (!ignore_mbuf) 2059 TEST_ASSERT(orig_op->segments[i].length == data_len, 2060 "Length of segment differ in original (%u) and filled (%u) op", 2061 orig_op->segments[i].length, data_len); 2062 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 2063 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 2064 orig_op->segments[i].length, 2065 "Output buffers (CB=%u) are not equal", i); 2066 m = m->next; 2067 } 2068 2069 /* Validate total mbuf pkt length */ 2070 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 2071 if (!ignore_mbuf) 2072 TEST_ASSERT(total_data_size == pkt_len, 2073 "Length of data differ in original (%u) and filled (%u) op", 2074 total_data_size, pkt_len); 2075 2076 return TEST_SUCCESS; 2077 } 2078 2079 /* 2080 * Compute K0 for a given configuration for HARQ output length computation 2081 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 2082 */ 2083 static inline uint16_t 2084 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 2085 { 2086 if (rv_index == 0) 2087 return 0; 2088 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 2089 if (n_cb == n) { 2090 if (rv_index == 1) 2091 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 2092 else if (rv_index == 2) 2093 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 2094 else 2095 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 2096 } 2097 /* LBRM case - includes a division by N */ 2098 if (rv_index == 1) 2099 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 2100 / n) * z_c; 2101 else if (rv_index == 2) 2102 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 2103 / n) * z_c; 2104 else 2105 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 2106 / n) * z_c; 2107 } 2108 2109 /* HARQ output length including the Filler bits */ 2110 static inline uint16_t 2111 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 2112 { 2113 uint16_t k0 = 0; 2114 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 2115 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 2116 /* Compute RM out size and number of rows */ 2117 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 2118 * ops_ld->z_c - ops_ld->n_filler; 2119 uint16_t deRmOutSize = RTE_MIN( 2120 k0 + ops_ld->cb_params.e + 2121 ((k0 > parity_offset) ? 2122 0 : ops_ld->n_filler), 2123 ops_ld->n_cb); 2124 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 2125 / ops_ld->z_c); 2126 uint16_t harq_output_len = numRows * ops_ld->z_c; 2127 return harq_output_len; 2128 } 2129 2130 static inline int 2131 validate_op_harq_chain(struct rte_bbdev_op_data *op, 2132 struct op_data_entries *orig_op, 2133 struct rte_bbdev_op_ldpc_dec *ops_ld) 2134 { 2135 uint8_t i; 2136 uint32_t j, jj, k; 2137 struct rte_mbuf *m = op->data; 2138 uint8_t nb_dst_segments = orig_op->nb_segments; 2139 uint32_t total_data_size = 0; 2140 int8_t *harq_orig, *harq_out, abs_harq_origin; 2141 uint32_t byte_error = 0, cum_error = 0, error; 2142 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 2143 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 2144 uint16_t parity_offset; 2145 2146 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2147 "Number of segments differ in original (%u) and filled (%u) op", 2148 nb_dst_segments, m->nb_segs); 2149 2150 /* Validate each mbuf segment length */ 2151 for (i = 0; i < nb_dst_segments; ++i) { 2152 /* Apply offset to the first mbuf segment */ 2153 uint16_t offset = (i == 0) ? op->offset : 0; 2154 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 2155 total_data_size += orig_op->segments[i].length; 2156 2157 TEST_ASSERT(orig_op->segments[i].length < (uint32_t)(data_len + HARQ_MEM_TOLERANCE), 2158 "Length of segment differ in original (%u) and filled (%u) op", 2159 orig_op->segments[i].length, data_len); 2160 harq_orig = (int8_t *) orig_op->segments[i].addr; 2161 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 2162 2163 /* Cannot compare HARQ output data for such cases */ 2164 if ((ldpc_llr_decimals > 1) && ((ops_ld->op_flags & RTE_BBDEV_LDPC_LLR_COMPRESSION) 2165 || (ops_ld->op_flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION))) 2166 break; 2167 2168 if (!(ldpc_cap_flags & 2169 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 2170 ) || (ops_ld->op_flags & 2171 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 2172 data_len -= ops_ld->z_c; 2173 parity_offset = data_len; 2174 } else { 2175 /* Compute RM out size and number of rows */ 2176 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 2177 * ops_ld->z_c - ops_ld->n_filler; 2178 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 2179 ops_ld->n_filler; 2180 if (data_len > deRmOutSize) 2181 data_len = deRmOutSize; 2182 } 2183 if (data_len > orig_op->segments[i].length) 2184 data_len = orig_op->segments[i].length; 2185 /* 2186 * HARQ output can have minor differences 2187 * due to integer representation and related scaling 2188 */ 2189 for (j = 0, jj = 0; j < data_len; j++, jj++) { 2190 if (j == parity_offset) { 2191 /* Special Handling of the filler bits */ 2192 for (k = 0; k < ops_ld->n_filler; k++) { 2193 if (harq_out[jj] != 2194 llr_max_pre_scaling) { 2195 printf("HARQ Filler issue %d: %d %d\n", 2196 jj, harq_out[jj], 2197 llr_max); 2198 byte_error++; 2199 } 2200 jj++; 2201 } 2202 } 2203 if (!(ops_ld->op_flags & 2204 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 2205 if (ldpc_llr_decimals > 1) 2206 harq_out[jj] = (harq_out[jj] + 1) 2207 >> (ldpc_llr_decimals - 1); 2208 /* Saturated to S7 */ 2209 if (harq_orig[j] > llr_max) 2210 harq_orig[j] = llr_max; 2211 if (harq_orig[j] < -llr_max) 2212 harq_orig[j] = -llr_max; 2213 } 2214 if (harq_orig[j] != harq_out[jj]) { 2215 error = (harq_orig[j] > harq_out[jj]) ? 2216 harq_orig[j] - harq_out[jj] : 2217 harq_out[jj] - harq_orig[j]; 2218 abs_harq_origin = harq_orig[j] > 0 ? 2219 harq_orig[j] : 2220 -harq_orig[j]; 2221 /* Residual quantization error */ 2222 if ((error > 8 && (abs_harq_origin < 2223 (llr_max - 16))) || 2224 (error > 16)) { 2225 printf("HARQ mismatch %d: exp %d act %d => %d\n", 2226 j, harq_orig[j], 2227 harq_out[jj], error); 2228 byte_error++; 2229 cum_error += error; 2230 } 2231 } 2232 } 2233 m = m->next; 2234 } 2235 2236 if (byte_error) 2237 TEST_ASSERT(byte_error <= 1, 2238 "HARQ output mismatch (%d) %d", 2239 byte_error, cum_error); 2240 2241 /* Validate total mbuf pkt length */ 2242 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 2243 TEST_ASSERT(total_data_size < pkt_len + HARQ_MEM_TOLERANCE, 2244 "Length of data differ in original (%u) and filled (%u) op", 2245 total_data_size, pkt_len); 2246 2247 return TEST_SUCCESS; 2248 } 2249 2250 2251 static inline int 2252 validate_op_so_chain(struct rte_bbdev_op_data *op, 2253 struct op_data_entries *orig_op) 2254 { 2255 struct rte_mbuf *m = op->data; 2256 uint8_t i, nb_dst_segments = orig_op->nb_segments; 2257 uint32_t j, jj; 2258 int8_t *so_orig, *so_out; 2259 uint32_t byte_error = 0, error, margin_error = 0; 2260 2261 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2262 "Number of segments differ in original (%u) and filled (%u) op", 2263 nb_dst_segments, m->nb_segs); 2264 2265 /* Validate each mbuf segment length. */ 2266 for (i = 0; i < nb_dst_segments; ++i) { 2267 /* Apply offset to the first mbuf segment. */ 2268 uint16_t offset = (i == 0) ? op->offset : 0; 2269 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 2270 2271 TEST_ASSERT(orig_op->segments[i].length == data_len, 2272 "Length of segment differ in original (%u) and filled (%u) op", 2273 orig_op->segments[i].length, data_len); 2274 so_orig = (int8_t *) orig_op->segments[i].addr; 2275 so_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 2276 margin_error += data_len / 8; /* Allow for few % errors. */ 2277 2278 /* SO output can have minor differences due to algorithm variations. */ 2279 for (j = 0, jj = 0; j < data_len; j++, jj++) { 2280 if (so_orig[j] != so_out[jj]) { 2281 error = (so_orig[j] > so_out[jj]) ? so_orig[j] - so_out[jj] : 2282 so_out[jj] - so_orig[j]; 2283 /* Residual quantization error. */ 2284 if (error > 32) { 2285 printf("Warning: Soft mismatch %d: exp %d act %d => %d\n", 2286 j, so_orig[j], so_out[jj], error); 2287 byte_error++; 2288 } 2289 } 2290 } 2291 m = m->next; 2292 } 2293 2294 if (byte_error > margin_error) 2295 TEST_ASSERT(byte_error <= 1, "Soft output mismatch (%d) %d", 2296 byte_error, margin_error); 2297 2298 return TEST_SUCCESS; 2299 } 2300 2301 static int 2302 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2303 struct rte_bbdev_dec_op *ref_op) 2304 { 2305 unsigned int i; 2306 int ret; 2307 struct op_data_entries *hard_data_orig = 2308 &test_vector.entries[DATA_HARD_OUTPUT]; 2309 struct op_data_entries *soft_data_orig = 2310 &test_vector.entries[DATA_SOFT_OUTPUT]; 2311 struct rte_bbdev_op_turbo_dec *ops_td; 2312 struct rte_bbdev_op_data *hard_output; 2313 struct rte_bbdev_op_data *soft_output; 2314 2315 for (i = 0; i < n; ++i) { 2316 ops_td = &ops[i]->turbo_dec; 2317 hard_output = &ops_td->hard_output; 2318 soft_output = &ops_td->soft_output; 2319 2320 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2321 TEST_ASSERT_SUCCESS(ret, 2322 "Checking status and ordering for decoder failed"); 2323 2324 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2325 hard_data_orig), 2326 "Hard output buffers (CB=%u) are not equal", 2327 i); 2328 2329 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 2330 TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output, 2331 soft_data_orig), 2332 "Soft output buffers (CB=%u) are not equal", 2333 i); 2334 } 2335 2336 return TEST_SUCCESS; 2337 } 2338 2339 /* Check Number of code blocks errors */ 2340 static int 2341 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2342 { 2343 unsigned int i; 2344 struct op_data_entries *hard_data_orig = 2345 &test_vector.entries[DATA_HARD_OUTPUT]; 2346 struct rte_bbdev_op_ldpc_dec *ops_td; 2347 struct rte_bbdev_op_data *hard_output; 2348 int errors = 0; 2349 struct rte_mbuf *m; 2350 2351 for (i = 0; i < n; ++i) { 2352 ops_td = &ops[i]->ldpc_dec; 2353 hard_output = &ops_td->hard_output; 2354 m = hard_output->data; 2355 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2356 hard_data_orig->segments[0].addr, 2357 hard_data_orig->segments[0].length)) 2358 errors++; 2359 } 2360 return errors; 2361 } 2362 2363 /* Check Number of code blocks errors */ 2364 static int 2365 validate_turbo_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2366 { 2367 unsigned int i; 2368 struct op_data_entries *hard_data_orig = &test_vector.entries[DATA_HARD_OUTPUT]; 2369 struct rte_bbdev_op_turbo_dec *ops_td; 2370 struct rte_bbdev_op_data *hard_output; 2371 int errors = 0; 2372 struct rte_mbuf *m; 2373 2374 for (i = 0; i < n; ++i) { 2375 ops_td = &ops[i]->turbo_dec; 2376 hard_output = &ops_td->hard_output; 2377 m = hard_output->data; 2378 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2379 hard_data_orig->segments[0].addr, 2380 hard_data_orig->segments[0].length)) 2381 errors++; 2382 } 2383 return errors; 2384 } 2385 2386 2387 static int 2388 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2389 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2390 { 2391 unsigned int i; 2392 int ret; 2393 struct op_data_entries *hard_data_orig = 2394 &test_vector.entries[DATA_HARD_OUTPUT]; 2395 struct op_data_entries *soft_data_orig = 2396 &test_vector.entries[DATA_SOFT_OUTPUT]; 2397 struct op_data_entries *harq_data_orig = 2398 &test_vector.entries[DATA_HARQ_OUTPUT]; 2399 struct rte_bbdev_op_ldpc_dec *ops_td; 2400 struct rte_bbdev_op_data *hard_output; 2401 struct rte_bbdev_op_data *harq_output; 2402 struct rte_bbdev_op_data *soft_output; 2403 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 2404 2405 for (i = 0; i < n; ++i) { 2406 ops_td = &ops[i]->ldpc_dec; 2407 hard_output = &ops_td->hard_output; 2408 harq_output = &ops_td->harq_combined_output; 2409 soft_output = &ops_td->soft_output; 2410 2411 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2412 TEST_ASSERT_SUCCESS(ret, 2413 "Checking status and ordering for decoder failed"); 2414 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2415 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2416 "Returned iter_count (%d) > expected iter_count (%d)", 2417 ops_td->iter_count, ref_td->iter_count); 2418 /* 2419 * We can ignore output data when the decoding failed to 2420 * converge or for loop-back cases 2421 */ 2422 if (!check_bit(ops[i]->ldpc_dec.op_flags, 2423 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 2424 ) && ( 2425 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 2426 )) == 0) 2427 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2428 hard_data_orig), 2429 "Hard output buffers (CB=%u) are not equal", 2430 i); 2431 2432 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 2433 TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output, 2434 soft_data_orig), 2435 "Soft output buffers (CB=%u) are not equal", 2436 i); 2437 if (ref_op->ldpc_dec.op_flags & 2438 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 2439 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2440 harq_data_orig, ops_td), 2441 "HARQ output buffers (CB=%u) are not equal", 2442 i); 2443 } 2444 if (ref_op->ldpc_dec.op_flags & 2445 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 2446 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2447 harq_data_orig, ops_td), 2448 "HARQ output buffers (CB=%u) are not equal", 2449 i); 2450 2451 } 2452 2453 return TEST_SUCCESS; 2454 } 2455 2456 2457 static int 2458 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2459 struct rte_bbdev_enc_op *ref_op) 2460 { 2461 unsigned int i; 2462 int ret; 2463 struct op_data_entries *hard_data_orig = 2464 &test_vector.entries[DATA_HARD_OUTPUT]; 2465 2466 for (i = 0; i < n; ++i) { 2467 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2468 TEST_ASSERT_SUCCESS(ret, 2469 "Checking status and ordering for encoder failed"); 2470 TEST_ASSERT_SUCCESS(validate_op_chain( 2471 &ops[i]->turbo_enc.output, 2472 hard_data_orig), 2473 "Output buffers (CB=%u) are not equal", 2474 i); 2475 } 2476 2477 return TEST_SUCCESS; 2478 } 2479 2480 static int 2481 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2482 struct rte_bbdev_enc_op *ref_op) 2483 { 2484 unsigned int i; 2485 int ret; 2486 struct op_data_entries *hard_data_orig = 2487 &test_vector.entries[DATA_HARD_OUTPUT]; 2488 2489 for (i = 0; i < n; ++i) { 2490 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2491 TEST_ASSERT_SUCCESS(ret, 2492 "Checking status and ordering for encoder failed"); 2493 TEST_ASSERT_SUCCESS(validate_op_chain( 2494 &ops[i]->ldpc_enc.output, 2495 hard_data_orig), 2496 "Output buffers (CB=%u) are not equal", 2497 i); 2498 } 2499 2500 return TEST_SUCCESS; 2501 } 2502 2503 static inline int 2504 validate_op_fft_chain(struct rte_bbdev_op_data *op, struct op_data_entries *orig_op) 2505 { 2506 struct rte_mbuf *m = op->data; 2507 uint8_t i, nb_dst_segments = orig_op->nb_segments; 2508 int16_t delt, abs_delt, thres_hold = 3; 2509 uint32_t j, data_len_iq, error_num; 2510 int16_t *ref_out, *op_out; 2511 2512 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2513 "Number of segments differ in original (%u) and filled (%u) op fft", 2514 nb_dst_segments, m->nb_segs); 2515 2516 /* Due to size limitation of mbuf, FFT doesn't use real mbuf. */ 2517 for (i = 0; i < nb_dst_segments; ++i) { 2518 uint16_t offset = (i == 0) ? op->offset : 0; 2519 uint32_t data_len = op->length; 2520 2521 TEST_ASSERT(orig_op->segments[i].length == data_len, 2522 "Length of segment differ in original (%u) and filled (%u) op fft", 2523 orig_op->segments[i].length, data_len); 2524 /* Divided by 2 to get the number of 16bits data. */ 2525 data_len_iq = data_len >> 1; 2526 ref_out = (int16_t *)(orig_op->segments[i].addr); 2527 op_out = rte_pktmbuf_mtod_offset(m, int16_t *, offset); 2528 error_num = 0; 2529 for (j = 0; j < data_len_iq; j++) { 2530 delt = ref_out[j] - op_out[j]; 2531 abs_delt = delt > 0 ? delt : -delt; 2532 error_num += (abs_delt > thres_hold ? 1 : 0); 2533 } 2534 if (error_num > 0) { 2535 rte_memdump(stdout, "Buffer A", ref_out, data_len); 2536 rte_memdump(stdout, "Buffer B", op_out, data_len); 2537 TEST_ASSERT(error_num == 0, 2538 "FFT Output are not matched total (%u) errors (%u)", 2539 data_len_iq, error_num); 2540 } 2541 2542 m = m->next; 2543 } 2544 2545 return TEST_SUCCESS; 2546 } 2547 2548 static int 2549 validate_fft_op(struct rte_bbdev_fft_op **ops, const uint16_t n, 2550 struct rte_bbdev_fft_op *ref_op) 2551 { 2552 unsigned int i; 2553 int ret; 2554 struct op_data_entries *fft_data_orig = &test_vector.entries[DATA_HARD_OUTPUT]; 2555 struct op_data_entries *fft_pwr_orig = &test_vector.entries[DATA_SOFT_OUTPUT]; 2556 2557 for (i = 0; i < n; ++i) { 2558 ret = check_fft_status_and_ordering(ops[i], i, ref_op->status); 2559 TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for FFT failed"); 2560 TEST_ASSERT_SUCCESS(validate_op_fft_chain( 2561 &ops[i]->fft.base_output, fft_data_orig), 2562 "FFT Output buffers (op=%u) are not matched", i); 2563 if (check_bit(ops[i]->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS)) 2564 TEST_ASSERT_SUCCESS(validate_op_fft_chain( 2565 &ops[i]->fft.power_meas_output, fft_pwr_orig), 2566 "FFT Power Output buffers (op=%u) are not matched", i); 2567 } 2568 2569 return TEST_SUCCESS; 2570 } 2571 2572 static void 2573 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2574 { 2575 unsigned int i; 2576 struct op_data_entries *entry; 2577 2578 op->turbo_dec = test_vector.turbo_dec; 2579 entry = &test_vector.entries[DATA_INPUT]; 2580 for (i = 0; i < entry->nb_segments; ++i) 2581 op->turbo_dec.input.length += 2582 entry->segments[i].length; 2583 } 2584 2585 static void 2586 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2587 { 2588 unsigned int i; 2589 struct op_data_entries *entry; 2590 2591 op->ldpc_dec = test_vector.ldpc_dec; 2592 entry = &test_vector.entries[DATA_INPUT]; 2593 for (i = 0; i < entry->nb_segments; ++i) 2594 op->ldpc_dec.input.length += 2595 entry->segments[i].length; 2596 if (test_vector.ldpc_dec.op_flags & 2597 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2598 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2599 for (i = 0; i < entry->nb_segments; ++i) 2600 op->ldpc_dec.harq_combined_input.length += 2601 entry->segments[i].length; 2602 } 2603 } 2604 2605 static void 2606 create_reference_fft_op(struct rte_bbdev_fft_op *op) 2607 { 2608 unsigned int i; 2609 struct op_data_entries *entry; 2610 op->fft = test_vector.fft; 2611 entry = &test_vector.entries[DATA_INPUT]; 2612 for (i = 0; i < entry->nb_segments; ++i) 2613 op->fft.base_input.length += entry->segments[i].length; 2614 } 2615 2616 static void 2617 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2618 { 2619 unsigned int i; 2620 struct op_data_entries *entry; 2621 2622 op->turbo_enc = test_vector.turbo_enc; 2623 entry = &test_vector.entries[DATA_INPUT]; 2624 for (i = 0; i < entry->nb_segments; ++i) 2625 op->turbo_enc.input.length += 2626 entry->segments[i].length; 2627 } 2628 2629 static void 2630 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2631 { 2632 unsigned int i; 2633 struct op_data_entries *entry; 2634 2635 op->ldpc_enc = test_vector.ldpc_enc; 2636 entry = &test_vector.entries[DATA_INPUT]; 2637 for (i = 0; i < entry->nb_segments; ++i) 2638 op->ldpc_enc.input.length += 2639 entry->segments[i].length; 2640 } 2641 2642 static uint32_t 2643 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2644 { 2645 uint8_t i; 2646 uint32_t c, r, tb_size = 0; 2647 2648 if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2649 tb_size = op->turbo_dec.tb_params.k_neg; 2650 } else { 2651 c = op->turbo_dec.tb_params.c; 2652 r = op->turbo_dec.tb_params.r; 2653 for (i = 0; i < c-r; i++) 2654 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2655 op->turbo_dec.tb_params.k_neg : 2656 op->turbo_dec.tb_params.k_pos; 2657 } 2658 return tb_size; 2659 } 2660 2661 static uint32_t 2662 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2663 { 2664 uint8_t num_cbs = 0; 2665 uint32_t tb_size = 0; 2666 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2667 2668 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) 2669 num_cbs = 1; 2670 else 2671 num_cbs = op->ldpc_dec.tb_params.c - op->ldpc_dec.tb_params.r; 2672 2673 tb_size = (sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler) * num_cbs; 2674 return tb_size; 2675 } 2676 2677 static uint32_t 2678 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2679 { 2680 uint8_t i; 2681 uint32_t c, r, tb_size = 0; 2682 2683 if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2684 tb_size = op->turbo_enc.tb_params.k_neg; 2685 } else { 2686 c = op->turbo_enc.tb_params.c; 2687 r = op->turbo_enc.tb_params.r; 2688 for (i = 0; i < c-r; i++) 2689 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2690 op->turbo_enc.tb_params.k_neg : 2691 op->turbo_enc.tb_params.k_pos; 2692 } 2693 return tb_size; 2694 } 2695 2696 static uint32_t 2697 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2698 { 2699 uint8_t num_cbs = 0; 2700 uint32_t tb_size = 0; 2701 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2702 2703 if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) 2704 num_cbs = 1; 2705 else 2706 num_cbs = op->ldpc_enc.tb_params.c - op->ldpc_enc.tb_params.r; 2707 2708 tb_size = (sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler) * num_cbs; 2709 return tb_size; 2710 } 2711 2712 static uint32_t 2713 calc_fft_size(struct rte_bbdev_fft_op *op) 2714 { 2715 uint32_t output_size; 2716 int num_cs = 0, i; 2717 for (i = 0; i < 12; i++) 2718 if (check_bit(op->fft.cs_bitmap, 1 << i)) 2719 num_cs++; 2720 output_size = (num_cs * op->fft.output_sequence_size * 4) << op->fft.num_antennas_log2; 2721 return output_size; 2722 } 2723 2724 static int 2725 init_test_op_params(struct test_op_params *op_params, 2726 enum rte_bbdev_op_type op_type, const int expected_status, 2727 const int vector_mask, struct rte_mempool *ops_mp, 2728 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2729 { 2730 int ret = 0; 2731 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2732 op_type == RTE_BBDEV_OP_LDPC_DEC) 2733 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2734 &op_params->ref_dec_op, 1); 2735 else if (op_type == RTE_BBDEV_OP_FFT) 2736 ret = rte_bbdev_fft_op_alloc_bulk(ops_mp, 2737 &op_params->ref_fft_op, 1); 2738 else 2739 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2740 &op_params->ref_enc_op, 1); 2741 2742 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2743 2744 op_params->mp = ops_mp; 2745 op_params->burst_sz = burst_sz; 2746 op_params->num_to_process = num_to_process; 2747 op_params->num_lcores = num_lcores; 2748 op_params->vector_mask = vector_mask; 2749 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2750 op_type == RTE_BBDEV_OP_LDPC_DEC) 2751 op_params->ref_dec_op->status = expected_status; 2752 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2753 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2754 op_params->ref_enc_op->status = expected_status; 2755 else if (op_type == RTE_BBDEV_OP_FFT) 2756 op_params->ref_fft_op->status = expected_status; 2757 return 0; 2758 } 2759 2760 static int 2761 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2762 struct test_op_params *op_params) 2763 { 2764 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2765 unsigned int i; 2766 struct active_device *ad; 2767 unsigned int burst_sz = get_burst_sz(); 2768 enum rte_bbdev_op_type op_type = test_vector.op_type; 2769 const struct rte_bbdev_op_cap *capabilities = NULL; 2770 2771 ad = &active_devs[dev_id]; 2772 2773 /* Check if device supports op_type */ 2774 if (!is_avail_op(ad, test_vector.op_type)) 2775 return TEST_SUCCESS; 2776 2777 struct rte_bbdev_info info; 2778 rte_bbdev_info_get(ad->dev_id, &info); 2779 socket_id = GET_SOCKET(info.socket_id); 2780 2781 f_ret = create_mempools(ad, socket_id, op_type, 2782 get_num_ops()); 2783 if (f_ret != TEST_SUCCESS) { 2784 printf("Couldn't create mempools"); 2785 goto fail; 2786 } 2787 if (op_type == RTE_BBDEV_OP_NONE) 2788 op_type = RTE_BBDEV_OP_TURBO_ENC; 2789 2790 f_ret = init_test_op_params(op_params, test_vector.op_type, 2791 test_vector.expected_status, 2792 test_vector.mask, 2793 ad->ops_mempool, 2794 burst_sz, 2795 get_num_ops(), 2796 get_num_lcores()); 2797 if (f_ret != TEST_SUCCESS) { 2798 printf("Couldn't init test op params"); 2799 goto fail; 2800 } 2801 2802 2803 /* Find capabilities */ 2804 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2805 do { 2806 if (cap->type == test_vector.op_type) { 2807 capabilities = cap; 2808 break; 2809 } 2810 cap++; 2811 } while (cap->type != RTE_BBDEV_OP_NONE); 2812 TEST_ASSERT_NOT_NULL(capabilities, 2813 "Couldn't find capabilities"); 2814 2815 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2816 create_reference_dec_op(op_params->ref_dec_op); 2817 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2818 create_reference_enc_op(op_params->ref_enc_op); 2819 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2820 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2821 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2822 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2823 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 2824 create_reference_fft_op(op_params->ref_fft_op); 2825 2826 for (i = 0; i < ad->nb_queues; ++i) { 2827 f_ret = fill_queue_buffers(op_params, 2828 ad->in_mbuf_pool, 2829 ad->hard_out_mbuf_pool, 2830 ad->soft_out_mbuf_pool, 2831 ad->harq_in_mbuf_pool, 2832 ad->harq_out_mbuf_pool, 2833 ad->queue_ids[i], 2834 capabilities, 2835 info.drv.min_alignment, 2836 socket_id); 2837 if (f_ret != TEST_SUCCESS) { 2838 printf("Couldn't init queue buffers"); 2839 goto fail; 2840 } 2841 } 2842 2843 /* Run test case function */ 2844 t_ret = test_case_func(ad, op_params); 2845 2846 /* Free active device resources and return */ 2847 free_buffers(ad, op_params); 2848 return t_ret; 2849 2850 fail: 2851 free_buffers(ad, op_params); 2852 return TEST_FAILED; 2853 } 2854 2855 /* Run given test function per active device per supported op type 2856 * per burst size. 2857 */ 2858 static int 2859 run_test_case(test_case_function *test_case_func) 2860 { 2861 int ret = 0; 2862 uint8_t dev; 2863 2864 /* Alloc op_params */ 2865 struct test_op_params *op_params = rte_zmalloc(NULL, 2866 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2867 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2868 RTE_ALIGN(sizeof(struct test_op_params), 2869 RTE_CACHE_LINE_SIZE)); 2870 2871 /* For each device run test case function */ 2872 for (dev = 0; dev < nb_active_devs; ++dev) 2873 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2874 2875 rte_free(op_params); 2876 2877 return ret; 2878 } 2879 2880 2881 /* Push back the HARQ output from DDR to host */ 2882 static void 2883 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2884 struct rte_bbdev_dec_op **ops, 2885 const uint16_t n) 2886 { 2887 uint16_t j; 2888 int save_status, ret; 2889 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2890 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2891 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2892 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2893 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2894 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2895 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2896 for (j = 0; j < n; ++j) { 2897 if ((loopback && mem_out) || hc_out) { 2898 save_status = ops[j]->status; 2899 ops[j]->ldpc_dec.op_flags = 2900 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2901 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2902 if (h_comp) 2903 ops[j]->ldpc_dec.op_flags += 2904 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2905 ops[j]->ldpc_dec.harq_combined_input.offset = 2906 harq_offset; 2907 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2908 harq_offset += HARQ_INCR; 2909 if (!loopback) 2910 ops[j]->ldpc_dec.harq_combined_input.length = 2911 ops[j]->ldpc_dec.harq_combined_output.length; 2912 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2913 &ops[j], 1); 2914 ret = 0; 2915 while (ret == 0) 2916 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2917 dev_id, queue_id, 2918 &ops_deq[j], 1); 2919 ops[j]->ldpc_dec.op_flags = flags; 2920 ops[j]->status = save_status; 2921 } 2922 } 2923 } 2924 2925 /* 2926 * Push back the HARQ output from HW DDR to Host 2927 * Preload HARQ memory input and adjust HARQ offset 2928 */ 2929 static void 2930 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2931 struct rte_bbdev_dec_op **ops, const uint16_t n, 2932 bool preload) 2933 { 2934 uint16_t j; 2935 int deq; 2936 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2937 struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; 2938 struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; 2939 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2940 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2941 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2942 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2943 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2944 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2945 if ((mem_in || hc_in) && preload) { 2946 for (j = 0; j < n; ++j) { 2947 save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; 2948 save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; 2949 ops[j]->ldpc_dec.op_flags = 2950 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2951 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2952 if (h_comp) 2953 ops[j]->ldpc_dec.op_flags += 2954 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2955 ops[j]->ldpc_dec.harq_combined_output.offset = 2956 harq_offset; 2957 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2958 harq_offset += HARQ_INCR; 2959 } 2960 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); 2961 deq = 0; 2962 while (deq != n) 2963 deq += rte_bbdev_dequeue_ldpc_dec_ops( 2964 dev_id, queue_id, &ops_deq[deq], 2965 n - deq); 2966 /* Restore the operations */ 2967 for (j = 0; j < n; ++j) { 2968 ops[j]->ldpc_dec.op_flags = flags; 2969 ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; 2970 ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; 2971 } 2972 } 2973 harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2974 for (j = 0; j < n; ++j) { 2975 /* Adjust HARQ offset when we reach external DDR */ 2976 if (mem_in || hc_in) 2977 ops[j]->ldpc_dec.harq_combined_input.offset 2978 = harq_offset; 2979 if (mem_out || hc_out) 2980 ops[j]->ldpc_dec.harq_combined_output.offset 2981 = harq_offset; 2982 harq_offset += HARQ_INCR; 2983 } 2984 } 2985 2986 static void 2987 dequeue_event_callback(uint16_t dev_id, 2988 enum rte_bbdev_event_type event, void *cb_arg, 2989 void *ret_param) 2990 { 2991 int ret; 2992 uint16_t i; 2993 uint64_t total_time; 2994 uint16_t deq, burst_sz, num_ops; 2995 uint16_t queue_id = *(uint16_t *) ret_param; 2996 struct rte_bbdev_info info; 2997 double tb_len_bits; 2998 struct thread_params *tp = cb_arg; 2999 3000 /* Find matching thread params using queue_id */ 3001 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 3002 if (tp->queue_id == queue_id) 3003 break; 3004 3005 if (i == MAX_QUEUES) { 3006 printf("%s: Queue_id from interrupt details was not found!\n", 3007 __func__); 3008 return; 3009 } 3010 3011 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 3012 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3013 printf( 3014 "Dequeue interrupt handler called for incorrect event!\n"); 3015 return; 3016 } 3017 3018 burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); 3019 num_ops = tp->op_params->num_to_process; 3020 3021 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 3022 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 3023 &tp->dec_ops[ 3024 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 3025 burst_sz); 3026 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 3027 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 3028 &tp->dec_ops[ 3029 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 3030 burst_sz); 3031 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 3032 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 3033 &tp->enc_ops[ 3034 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 3035 burst_sz); 3036 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 3037 deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 3038 &tp->fft_ops[ 3039 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 3040 burst_sz); 3041 else /*RTE_BBDEV_OP_TURBO_ENC*/ 3042 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 3043 &tp->enc_ops[ 3044 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 3045 burst_sz); 3046 3047 if (deq < burst_sz) { 3048 printf( 3049 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 3050 burst_sz, deq); 3051 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3052 return; 3053 } 3054 3055 if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) { 3056 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 3057 return; 3058 } 3059 3060 total_time = rte_rdtsc_precise() - tp->start_time; 3061 3062 rte_bbdev_info_get(dev_id, &info); 3063 3064 ret = TEST_SUCCESS; 3065 3066 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 3067 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3068 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op); 3069 /* get the max of iter_count for all dequeued ops */ 3070 for (i = 0; i < num_ops; ++i) 3071 tp->iter_count = RTE_MAX( 3072 tp->dec_ops[i]->turbo_dec.iter_count, 3073 tp->iter_count); 3074 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 3075 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 3076 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3077 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 3078 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 3079 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 3080 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3081 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 3082 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 3083 } else if (test_vector.op_type == RTE_BBDEV_OP_FFT) { 3084 struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op; 3085 ret = validate_fft_op(tp->fft_ops, num_ops, ref_op); 3086 rte_bbdev_fft_op_free_bulk(tp->fft_ops, deq); 3087 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 3088 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3089 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 3090 tp->op_params->vector_mask); 3091 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 3092 } 3093 3094 if (ret) { 3095 printf("Buffers validation failed\n"); 3096 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3097 } 3098 3099 switch (test_vector.op_type) { 3100 case RTE_BBDEV_OP_TURBO_DEC: 3101 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 3102 break; 3103 case RTE_BBDEV_OP_TURBO_ENC: 3104 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 3105 break; 3106 case RTE_BBDEV_OP_LDPC_DEC: 3107 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 3108 break; 3109 case RTE_BBDEV_OP_FFT: 3110 tb_len_bits = calc_fft_size(tp->op_params->ref_fft_op); 3111 break; 3112 case RTE_BBDEV_OP_LDPC_ENC: 3113 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 3114 break; 3115 case RTE_BBDEV_OP_NONE: 3116 tb_len_bits = 0.0; 3117 break; 3118 default: 3119 printf("Unknown op type: %d\n", test_vector.op_type); 3120 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3121 return; 3122 } 3123 3124 tp->ops_per_sec += ((double)num_ops) / 3125 ((double)total_time / (double)rte_get_tsc_hz()); 3126 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 3127 ((double)total_time / (double)rte_get_tsc_hz()); 3128 3129 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 3130 } 3131 3132 static int 3133 throughput_intr_lcore_ldpc_dec(void *arg) 3134 { 3135 struct thread_params *tp = arg; 3136 unsigned int enqueued; 3137 const uint16_t queue_id = tp->queue_id; 3138 const uint16_t burst_sz = tp->op_params->burst_sz; 3139 const uint16_t num_to_process = tp->op_params->num_to_process; 3140 struct rte_bbdev_dec_op *ops[num_to_process]; 3141 struct test_buffers *bufs = NULL; 3142 struct rte_bbdev_info info; 3143 int ret, i, j; 3144 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3145 uint16_t num_to_enq, enq; 3146 3147 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3148 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3149 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3150 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3151 3152 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3153 "BURST_SIZE should be <= %u", MAX_BURST); 3154 3155 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3156 "Failed to enable interrupts for dev: %u, queue_id: %u", 3157 tp->dev_id, queue_id); 3158 3159 rte_bbdev_info_get(tp->dev_id, &info); 3160 3161 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3162 "NUM_OPS cannot exceed %u for this device", 3163 info.drv.queue_size_lim); 3164 3165 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3166 3167 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3168 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3169 3170 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3171 3172 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 3173 num_to_process); 3174 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3175 num_to_process); 3176 ref_op->ldpc_dec.iter_max = get_iter_max(); 3177 3178 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3179 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 3180 bufs->hard_outputs, bufs->soft_outputs, 3181 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3182 3183 /* Set counter to validate the ordering */ 3184 for (j = 0; j < num_to_process; ++j) 3185 ops[j]->opaque_data = (void *)(uintptr_t)j; 3186 3187 for (j = 0; j < TEST_REPETITIONS; ++j) { 3188 for (i = 0; i < num_to_process; ++i) { 3189 if (!loopback) 3190 mbuf_reset(ops[i]->ldpc_dec.hard_output.data); 3191 if (hc_out || loopback) 3192 mbuf_reset(ops[i]->ldpc_dec.harq_combined_output.data); 3193 if (ops[i]->ldpc_dec.soft_output.data != NULL) 3194 mbuf_reset(ops[i]->ldpc_dec.soft_output.data); 3195 } 3196 3197 tp->start_time = rte_rdtsc_precise(); 3198 for (enqueued = 0; enqueued < num_to_process;) { 3199 num_to_enq = burst_sz; 3200 3201 if (unlikely(num_to_process - enqueued < num_to_enq)) 3202 num_to_enq = num_to_process - enqueued; 3203 3204 enq = 0; 3205 do { 3206 enq += rte_bbdev_enqueue_ldpc_dec_ops( 3207 tp->dev_id, 3208 queue_id, &ops[enqueued], 3209 num_to_enq); 3210 } while (unlikely(num_to_enq != enq)); 3211 enqueued += enq; 3212 3213 /* Write to thread burst_sz current number of enqueued 3214 * descriptors. It ensures that proper number of 3215 * descriptors will be dequeued in callback 3216 * function - needed for last batch in case where 3217 * the number of operations is not a multiple of 3218 * burst size. 3219 */ 3220 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3221 3222 /* Wait until processing of previous batch is 3223 * completed 3224 */ 3225 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3226 } 3227 if (j != TEST_REPETITIONS - 1) 3228 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3229 } 3230 3231 return TEST_SUCCESS; 3232 } 3233 3234 static int 3235 throughput_intr_lcore_dec(void *arg) 3236 { 3237 struct thread_params *tp = arg; 3238 unsigned int enqueued; 3239 const uint16_t queue_id = tp->queue_id; 3240 const uint16_t burst_sz = tp->op_params->burst_sz; 3241 const uint16_t num_to_process = tp->op_params->num_to_process; 3242 struct rte_bbdev_dec_op *ops[num_to_process]; 3243 struct test_buffers *bufs = NULL; 3244 struct rte_bbdev_info info; 3245 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3246 int ret, i, j; 3247 uint16_t num_to_enq, enq; 3248 3249 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3250 "BURST_SIZE should be <= %u", MAX_BURST); 3251 3252 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3253 "Failed to enable interrupts for dev: %u, queue_id: %u", 3254 tp->dev_id, queue_id); 3255 3256 rte_bbdev_info_get(tp->dev_id, &info); 3257 3258 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3259 "NUM_OPS cannot exceed %u for this device", 3260 info.drv.queue_size_lim); 3261 3262 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3263 3264 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3265 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3266 3267 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3268 3269 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 3270 num_to_process); 3271 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_to_process); 3272 ref_op->turbo_dec.iter_max = get_iter_max(); 3273 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3274 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 3275 bufs->hard_outputs, bufs->soft_outputs, 3276 tp->op_params->ref_dec_op); 3277 3278 /* Set counter to validate the ordering. */ 3279 for (j = 0; j < num_to_process; ++j) 3280 ops[j]->opaque_data = (void *)(uintptr_t)j; 3281 3282 for (j = 0; j < TEST_REPETITIONS; ++j) { 3283 for (i = 0; i < num_to_process; ++i) { 3284 mbuf_reset(ops[i]->turbo_dec.hard_output.data); 3285 if (ops[i]->turbo_dec.soft_output.data != NULL) 3286 mbuf_reset(ops[i]->turbo_dec.soft_output.data); 3287 } 3288 3289 tp->start_time = rte_rdtsc_precise(); 3290 for (enqueued = 0; enqueued < num_to_process;) { 3291 num_to_enq = burst_sz; 3292 3293 if (unlikely(num_to_process - enqueued < num_to_enq)) 3294 num_to_enq = num_to_process - enqueued; 3295 3296 enq = 0; 3297 do { 3298 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3299 queue_id, &ops[enqueued], 3300 num_to_enq); 3301 } while (unlikely(num_to_enq != enq)); 3302 enqueued += enq; 3303 3304 /* Write to thread burst_sz current number of enqueued 3305 * descriptors. It ensures that proper number of 3306 * descriptors will be dequeued in callback 3307 * function - needed for last batch in case where 3308 * the number of operations is not a multiple of 3309 * burst size. 3310 */ 3311 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3312 3313 /* Wait until processing of previous batch is 3314 * completed 3315 */ 3316 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3317 } 3318 if (j != TEST_REPETITIONS - 1) 3319 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3320 } 3321 3322 return TEST_SUCCESS; 3323 } 3324 3325 static int 3326 throughput_intr_lcore_enc(void *arg) 3327 { 3328 struct thread_params *tp = arg; 3329 unsigned int enqueued; 3330 const uint16_t queue_id = tp->queue_id; 3331 const uint16_t burst_sz = tp->op_params->burst_sz; 3332 const uint16_t num_to_process = tp->op_params->num_to_process; 3333 struct rte_bbdev_enc_op *ops[num_to_process]; 3334 struct test_buffers *bufs = NULL; 3335 struct rte_bbdev_info info; 3336 int ret, i, j; 3337 uint16_t num_to_enq, enq; 3338 3339 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3340 "BURST_SIZE should be <= %u", MAX_BURST); 3341 3342 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3343 "Failed to enable interrupts for dev: %u, queue_id: %u", 3344 tp->dev_id, queue_id); 3345 3346 rte_bbdev_info_get(tp->dev_id, &info); 3347 3348 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3349 "NUM_OPS cannot exceed %u for this device", 3350 info.drv.queue_size_lim); 3351 3352 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3353 3354 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3355 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3356 3357 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3358 3359 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3360 num_to_process); 3361 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3362 num_to_process); 3363 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3364 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 3365 bufs->hard_outputs, tp->op_params->ref_enc_op); 3366 3367 /* Set counter to validate the ordering */ 3368 for (j = 0; j < num_to_process; ++j) 3369 ops[j]->opaque_data = (void *)(uintptr_t)j; 3370 3371 for (j = 0; j < TEST_REPETITIONS; ++j) { 3372 for (i = 0; i < num_to_process; ++i) 3373 mbuf_reset(ops[i]->turbo_enc.output.data); 3374 3375 tp->start_time = rte_rdtsc_precise(); 3376 for (enqueued = 0; enqueued < num_to_process;) { 3377 num_to_enq = burst_sz; 3378 3379 if (unlikely(num_to_process - enqueued < num_to_enq)) 3380 num_to_enq = num_to_process - enqueued; 3381 3382 enq = 0; 3383 do { 3384 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3385 queue_id, &ops[enqueued], 3386 num_to_enq); 3387 } while (unlikely(enq != num_to_enq)); 3388 enqueued += enq; 3389 3390 /* Write to thread burst_sz current number of enqueued 3391 * descriptors. It ensures that proper number of 3392 * descriptors will be dequeued in callback 3393 * function - needed for last batch in case where 3394 * the number of operations is not a multiple of 3395 * burst size. 3396 */ 3397 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3398 3399 /* Wait until processing of previous batch is 3400 * completed 3401 */ 3402 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3403 } 3404 if (j != TEST_REPETITIONS - 1) 3405 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3406 } 3407 3408 return TEST_SUCCESS; 3409 } 3410 3411 3412 static int 3413 throughput_intr_lcore_ldpc_enc(void *arg) 3414 { 3415 struct thread_params *tp = arg; 3416 unsigned int enqueued; 3417 const uint16_t queue_id = tp->queue_id; 3418 const uint16_t burst_sz = tp->op_params->burst_sz; 3419 const uint16_t num_to_process = tp->op_params->num_to_process; 3420 struct rte_bbdev_enc_op *ops[num_to_process]; 3421 struct test_buffers *bufs = NULL; 3422 struct rte_bbdev_info info; 3423 int ret, i, j; 3424 uint16_t num_to_enq, enq; 3425 3426 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3427 "BURST_SIZE should be <= %u", MAX_BURST); 3428 3429 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3430 "Failed to enable interrupts for dev: %u, queue_id: %u", 3431 tp->dev_id, queue_id); 3432 3433 rte_bbdev_info_get(tp->dev_id, &info); 3434 3435 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3436 "NUM_OPS cannot exceed %u for this device", 3437 info.drv.queue_size_lim); 3438 3439 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3440 3441 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3442 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3443 3444 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3445 3446 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3447 num_to_process); 3448 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3449 num_to_process); 3450 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3451 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 3452 bufs->inputs, bufs->hard_outputs, 3453 tp->op_params->ref_enc_op); 3454 3455 /* Set counter to validate the ordering */ 3456 for (j = 0; j < num_to_process; ++j) 3457 ops[j]->opaque_data = (void *)(uintptr_t)j; 3458 3459 for (j = 0; j < TEST_REPETITIONS; ++j) { 3460 for (i = 0; i < num_to_process; ++i) 3461 mbuf_reset(ops[i]->turbo_enc.output.data); 3462 3463 tp->start_time = rte_rdtsc_precise(); 3464 for (enqueued = 0; enqueued < num_to_process;) { 3465 num_to_enq = burst_sz; 3466 3467 if (unlikely(num_to_process - enqueued < num_to_enq)) 3468 num_to_enq = num_to_process - enqueued; 3469 3470 enq = 0; 3471 do { 3472 enq += rte_bbdev_enqueue_ldpc_enc_ops( 3473 tp->dev_id, 3474 queue_id, &ops[enqueued], 3475 num_to_enq); 3476 } while (unlikely(enq != num_to_enq)); 3477 enqueued += enq; 3478 3479 /* Write to thread burst_sz current number of enqueued 3480 * descriptors. It ensures that proper number of 3481 * descriptors will be dequeued in callback 3482 * function - needed for last batch in case where 3483 * the number of operations is not a multiple of 3484 * burst size. 3485 */ 3486 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3487 3488 /* Wait until processing of previous batch is 3489 * completed 3490 */ 3491 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3492 } 3493 if (j != TEST_REPETITIONS - 1) 3494 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3495 } 3496 3497 return TEST_SUCCESS; 3498 } 3499 3500 3501 static int 3502 throughput_intr_lcore_fft(void *arg) 3503 { 3504 struct thread_params *tp = arg; 3505 unsigned int enqueued; 3506 const uint16_t queue_id = tp->queue_id; 3507 const uint16_t burst_sz = tp->op_params->burst_sz; 3508 const uint16_t num_to_process = tp->op_params->num_to_process; 3509 struct rte_bbdev_fft_op *ops[num_to_process]; 3510 struct test_buffers *bufs = NULL; 3511 struct rte_bbdev_info info; 3512 int ret, i, j; 3513 uint16_t num_to_enq, enq; 3514 3515 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3516 "BURST_SIZE should be <= %u", MAX_BURST); 3517 3518 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3519 "Failed to enable interrupts for dev: %u, queue_id: %u", 3520 tp->dev_id, queue_id); 3521 3522 rte_bbdev_info_get(tp->dev_id, &info); 3523 3524 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3525 "NUM_OPS cannot exceed %u for this device", 3526 info.drv.queue_size_lim); 3527 3528 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3529 3530 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3531 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3532 3533 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3534 3535 ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops, 3536 num_to_process); 3537 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3538 num_to_process); 3539 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3540 copy_reference_fft_op(ops, num_to_process, 0, bufs->inputs, 3541 bufs->hard_outputs, bufs->soft_outputs, tp->op_params->ref_fft_op); 3542 3543 /* Set counter to validate the ordering */ 3544 for (j = 0; j < num_to_process; ++j) 3545 ops[j]->opaque_data = (void *)(uintptr_t)j; 3546 3547 for (j = 0; j < TEST_REPETITIONS; ++j) { 3548 for (i = 0; i < num_to_process; ++i) 3549 mbuf_reset(ops[i]->fft.base_output.data); 3550 3551 tp->start_time = rte_rdtsc_precise(); 3552 for (enqueued = 0; enqueued < num_to_process;) { 3553 num_to_enq = burst_sz; 3554 3555 if (unlikely(num_to_process - enqueued < num_to_enq)) 3556 num_to_enq = num_to_process - enqueued; 3557 3558 enq = 0; 3559 do { 3560 enq += rte_bbdev_enqueue_fft_ops(tp->dev_id, 3561 queue_id, &ops[enqueued], 3562 num_to_enq); 3563 } while (unlikely(enq != num_to_enq)); 3564 enqueued += enq; 3565 3566 /* Write to thread burst_sz current number of enqueued 3567 * descriptors. It ensures that proper number of 3568 * descriptors will be dequeued in callback 3569 * function - needed for last batch in case where 3570 * the number of operations is not a multiple of 3571 * burst size. 3572 */ 3573 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3574 3575 /* Wait until processing of previous batch is 3576 * completed 3577 */ 3578 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3579 } 3580 if (j != TEST_REPETITIONS - 1) 3581 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3582 } 3583 3584 return TEST_SUCCESS; 3585 } 3586 3587 static int 3588 throughput_pmd_lcore_dec(void *arg) 3589 { 3590 struct thread_params *tp = arg; 3591 uint16_t enq, deq; 3592 uint64_t total_time = 0, start_time; 3593 const uint16_t queue_id = tp->queue_id; 3594 const uint16_t burst_sz = tp->op_params->burst_sz; 3595 const uint16_t num_ops = tp->op_params->num_to_process; 3596 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3597 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3598 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3599 struct test_buffers *bufs = NULL; 3600 int i, j, ret; 3601 struct rte_bbdev_info info; 3602 uint16_t num_to_enq; 3603 bool so_enable; 3604 3605 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3606 "BURST_SIZE should be <= %u", MAX_BURST); 3607 3608 rte_bbdev_info_get(tp->dev_id, &info); 3609 3610 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3611 "NUM_OPS cannot exceed %u for this device", 3612 info.drv.queue_size_lim); 3613 3614 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3615 3616 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3617 3618 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3619 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3620 ref_op->turbo_dec.iter_max = get_iter_max(); 3621 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3622 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3623 bufs->hard_outputs, bufs->soft_outputs, ref_op); 3624 3625 so_enable = check_bit(ops_enq[0]->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT); 3626 3627 /* Set counter to validate the ordering */ 3628 for (j = 0; j < num_ops; ++j) 3629 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3630 3631 for (i = 0; i < TEST_REPETITIONS; ++i) { 3632 uint32_t time_out = 0; 3633 for (j = 0; j < num_ops; ++j) 3634 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 3635 if (so_enable) 3636 for (j = 0; j < num_ops; ++j) 3637 mbuf_reset(ops_enq[j]->turbo_dec.soft_output.data); 3638 3639 start_time = rte_rdtsc_precise(); 3640 3641 for (enq = 0, deq = 0; enq < num_ops;) { 3642 num_to_enq = burst_sz; 3643 3644 if (unlikely(num_ops - enq < num_to_enq)) 3645 num_to_enq = num_ops - enq; 3646 3647 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3648 queue_id, &ops_enq[enq], num_to_enq); 3649 3650 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3651 queue_id, &ops_deq[deq], enq - deq); 3652 time_out++; 3653 if (time_out >= TIME_OUT_POLL) { 3654 timeout_exit(tp->dev_id); 3655 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 3656 } 3657 } 3658 3659 /* dequeue the remaining */ 3660 time_out = 0; 3661 while (deq < enq) { 3662 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3663 queue_id, &ops_deq[deq], enq - deq); 3664 time_out++; 3665 if (time_out >= TIME_OUT_POLL) { 3666 timeout_exit(tp->dev_id); 3667 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 3668 } 3669 } 3670 3671 total_time += rte_rdtsc_precise() - start_time; 3672 } 3673 3674 tp->iter_count = 0; 3675 /* get the max of iter_count for all dequeued ops */ 3676 for (i = 0; i < num_ops; ++i) { 3677 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3678 tp->iter_count); 3679 } 3680 3681 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3682 ret = validate_dec_op(ops_deq, num_ops, ref_op); 3683 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3684 } 3685 3686 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3687 3688 double tb_len_bits = calc_dec_TB_size(ref_op); 3689 3690 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3691 ((double)total_time / (double)rte_get_tsc_hz()); 3692 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3693 1000000.0) / ((double)total_time / 3694 (double)rte_get_tsc_hz()); 3695 3696 return TEST_SUCCESS; 3697 } 3698 3699 static int 3700 bler_pmd_lcore_ldpc_dec(void *arg) 3701 { 3702 struct thread_params *tp = arg; 3703 uint16_t enq, deq; 3704 uint64_t total_time = 0, start_time; 3705 const uint16_t queue_id = tp->queue_id; 3706 const uint16_t burst_sz = tp->op_params->burst_sz; 3707 const uint16_t num_ops = tp->op_params->num_to_process; 3708 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3709 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3710 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3711 struct test_buffers *bufs = NULL; 3712 int i, j, ret; 3713 float parity_bler = 0; 3714 struct rte_bbdev_info info; 3715 uint16_t num_to_enq; 3716 bool extDdr = check_bit(ldpc_cap_flags, 3717 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3718 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3719 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3720 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3721 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3722 3723 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3724 "BURST_SIZE should be <= %u", MAX_BURST); 3725 TEST_ASSERT_SUCCESS((num_ops == 0), "NUM_OPS must be greater than 0"); 3726 3727 rte_bbdev_info_get(tp->dev_id, &info); 3728 3729 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3730 "NUM_OPS cannot exceed %u for this device", 3731 info.drv.queue_size_lim); 3732 3733 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3734 3735 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3736 3737 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3738 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3739 3740 /* For BLER tests we need to enable early termination */ 3741 if (!check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3742 ref_op->ldpc_dec.op_flags += RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3743 3744 ref_op->ldpc_dec.iter_max = get_iter_max(); 3745 3746 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3747 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3748 bufs->hard_outputs, bufs->soft_outputs, 3749 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3750 generate_llr_input(num_ops, bufs->inputs, ref_op); 3751 3752 /* Set counter to validate the ordering */ 3753 for (j = 0; j < num_ops; ++j) 3754 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3755 3756 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3757 uint32_t time_out = 0; 3758 for (j = 0; j < num_ops; ++j) { 3759 if (!loopback) 3760 mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data); 3761 if (hc_out || loopback) 3762 mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data); 3763 if (ops_enq[j]->ldpc_dec.soft_output.data != NULL) 3764 mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data); 3765 } 3766 if (extDdr) 3767 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3768 num_ops, true); 3769 start_time = rte_rdtsc_precise(); 3770 3771 for (enq = 0, deq = 0; enq < num_ops;) { 3772 num_to_enq = burst_sz; 3773 3774 if (unlikely(num_ops - enq < num_to_enq)) 3775 num_to_enq = num_ops - enq; 3776 3777 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3778 queue_id, &ops_enq[enq], num_to_enq); 3779 3780 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3781 queue_id, &ops_deq[deq], enq - deq); 3782 time_out++; 3783 if (time_out >= TIME_OUT_POLL) { 3784 timeout_exit(tp->dev_id); 3785 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 3786 } 3787 } 3788 3789 /* dequeue the remaining */ 3790 time_out = 0; 3791 while (deq < enq) { 3792 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3793 queue_id, &ops_deq[deq], enq - deq); 3794 time_out++; 3795 if (time_out >= TIME_OUT_POLL) { 3796 timeout_exit(tp->dev_id); 3797 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 3798 } 3799 } 3800 3801 total_time += rte_rdtsc_precise() - start_time; 3802 } 3803 3804 tp->iter_count = 0; 3805 tp->iter_average = 0; 3806 /* get the max of iter_count for all dequeued ops */ 3807 for (i = 0; i < num_ops; ++i) { 3808 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3809 tp->iter_count); 3810 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3811 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3812 parity_bler += 1.0; 3813 } 3814 3815 parity_bler /= num_ops; /* This one is based on SYND */ 3816 tp->iter_average /= num_ops; 3817 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3818 3819 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3820 && tp->bler == 0 3821 && parity_bler == 0 3822 && !hc_out) { 3823 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3824 tp->op_params->vector_mask); 3825 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3826 } 3827 3828 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3829 3830 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3831 tp->ops_per_sec = ((double)num_ops * 1) / 3832 ((double)total_time / (double)rte_get_tsc_hz()); 3833 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3834 1000000.0) / ((double)total_time / 3835 (double)rte_get_tsc_hz()); 3836 3837 return TEST_SUCCESS; 3838 } 3839 3840 3841 static int 3842 bler_pmd_lcore_turbo_dec(void *arg) 3843 { 3844 struct thread_params *tp = arg; 3845 uint16_t enq, deq; 3846 uint64_t total_time = 0, start_time; 3847 const uint16_t queue_id = tp->queue_id; 3848 const uint16_t burst_sz = tp->op_params->burst_sz; 3849 const uint16_t num_ops = tp->op_params->num_to_process; 3850 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3851 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3852 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3853 struct test_buffers *bufs = NULL; 3854 int i, j, ret; 3855 struct rte_bbdev_info info; 3856 uint16_t num_to_enq; 3857 3858 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3859 "BURST_SIZE should be <= %u", MAX_BURST); 3860 TEST_ASSERT_SUCCESS((num_ops == 0), "NUM_OPS must be greater than 0"); 3861 3862 rte_bbdev_info_get(tp->dev_id, &info); 3863 3864 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3865 "NUM_OPS cannot exceed %u for this device", 3866 info.drv.queue_size_lim); 3867 3868 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3869 3870 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3871 3872 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3873 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3874 3875 /* For BLER tests we need to enable early termination */ 3876 if (!check_bit(ref_op->turbo_dec.op_flags, RTE_BBDEV_TURBO_EARLY_TERMINATION)) 3877 ref_op->turbo_dec.op_flags += RTE_BBDEV_TURBO_EARLY_TERMINATION; 3878 3879 ref_op->turbo_dec.iter_max = get_iter_max(); 3880 3881 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3882 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3883 bufs->hard_outputs, bufs->soft_outputs, 3884 ref_op); 3885 generate_turbo_llr_input(num_ops, bufs->inputs, ref_op); 3886 3887 /* Set counter to validate the ordering */ 3888 for (j = 0; j < num_ops; ++j) 3889 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3890 3891 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3892 uint32_t time_out = 0; 3893 for (j = 0; j < num_ops; ++j) { 3894 mbuf_reset( 3895 ops_enq[j]->turbo_dec.hard_output.data); 3896 } 3897 3898 start_time = rte_rdtsc_precise(); 3899 3900 for (enq = 0, deq = 0; enq < num_ops;) { 3901 num_to_enq = burst_sz; 3902 3903 if (unlikely(num_ops - enq < num_to_enq)) 3904 num_to_enq = num_ops - enq; 3905 3906 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3907 queue_id, &ops_enq[enq], num_to_enq); 3908 3909 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3910 queue_id, &ops_deq[deq], enq - deq); 3911 time_out++; 3912 if (time_out >= TIME_OUT_POLL) { 3913 timeout_exit(tp->dev_id); 3914 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 3915 } 3916 } 3917 3918 /* dequeue the remaining */ 3919 time_out = 0; 3920 while (deq < enq) { 3921 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3922 queue_id, &ops_deq[deq], enq - deq); 3923 time_out++; 3924 if (time_out >= TIME_OUT_POLL) { 3925 timeout_exit(tp->dev_id); 3926 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 3927 } 3928 } 3929 3930 total_time += rte_rdtsc_precise() - start_time; 3931 } 3932 3933 tp->iter_count = 0; 3934 tp->iter_average = 0; 3935 /* get the max of iter_count for all dequeued ops */ 3936 for (i = 0; i < num_ops; ++i) { 3937 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3938 tp->iter_count); 3939 tp->iter_average += (double) ops_enq[i]->turbo_dec.iter_count; 3940 } 3941 3942 tp->iter_average /= num_ops; 3943 tp->bler = (double) validate_turbo_bler(ops_deq, num_ops) / num_ops; 3944 3945 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3946 3947 double tb_len_bits = calc_dec_TB_size(ref_op); 3948 tp->ops_per_sec = ((double)num_ops * 1) / 3949 ((double)total_time / (double)rte_get_tsc_hz()); 3950 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3951 1000000.0) / ((double)total_time / 3952 (double)rte_get_tsc_hz()); 3953 printf("TBS %.0f Time %.0f\n", tb_len_bits, 1000000.0 * 3954 ((double)total_time / (double)rte_get_tsc_hz())); 3955 3956 return TEST_SUCCESS; 3957 } 3958 3959 static int 3960 throughput_pmd_lcore_ldpc_dec(void *arg) 3961 { 3962 struct thread_params *tp = arg; 3963 uint16_t enq, deq; 3964 uint64_t total_time = 0, start_time; 3965 const uint16_t queue_id = tp->queue_id; 3966 const uint16_t burst_sz = tp->op_params->burst_sz; 3967 const uint16_t num_ops = tp->op_params->num_to_process; 3968 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3969 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3970 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3971 struct test_buffers *bufs = NULL; 3972 int i, j, ret; 3973 struct rte_bbdev_info info; 3974 uint16_t num_to_enq; 3975 bool extDdr = check_bit(ldpc_cap_flags, 3976 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3977 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3978 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3979 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3980 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3981 3982 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3983 "BURST_SIZE should be <= %u", MAX_BURST); 3984 3985 rte_bbdev_info_get(tp->dev_id, &info); 3986 3987 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3988 "NUM_OPS cannot exceed %u for this device", 3989 info.drv.queue_size_lim); 3990 3991 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3992 3993 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3994 3995 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3996 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3997 3998 /* For throughput tests we need to disable early termination */ 3999 if (check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4000 ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4001 4002 ref_op->ldpc_dec.iter_max = get_iter_max(); 4003 /* Since ET is disabled, the expected iter_count is iter_max */ 4004 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4005 4006 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4007 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 4008 bufs->hard_outputs, bufs->soft_outputs, 4009 bufs->harq_inputs, bufs->harq_outputs, ref_op); 4010 4011 /* Set counter to validate the ordering */ 4012 for (j = 0; j < num_ops; ++j) 4013 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4014 4015 for (i = 0; i < TEST_REPETITIONS; ++i) { 4016 uint32_t time_out = 0; 4017 for (j = 0; j < num_ops; ++j) { 4018 if (!loopback) 4019 mbuf_reset(ops_enq[j]->ldpc_dec.hard_output.data); 4020 if (hc_out || loopback) 4021 mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data); 4022 if (ops_enq[j]->ldpc_dec.soft_output.data != NULL) 4023 mbuf_reset(ops_enq[j]->ldpc_dec.soft_output.data); 4024 } 4025 if (extDdr) 4026 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 4027 num_ops, true); 4028 start_time = rte_rdtsc_precise(); 4029 4030 for (enq = 0, deq = 0; enq < num_ops;) { 4031 num_to_enq = burst_sz; 4032 4033 if (unlikely(num_ops - enq < num_to_enq)) 4034 num_to_enq = num_ops - enq; 4035 4036 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 4037 queue_id, &ops_enq[enq], num_to_enq); 4038 4039 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 4040 queue_id, &ops_deq[deq], enq - deq); 4041 time_out++; 4042 if (time_out >= TIME_OUT_POLL) { 4043 timeout_exit(tp->dev_id); 4044 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 4045 } 4046 } 4047 4048 /* dequeue the remaining */ 4049 time_out = 0; 4050 while (deq < enq) { 4051 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 4052 queue_id, &ops_deq[deq], enq - deq); 4053 time_out++; 4054 if (time_out >= TIME_OUT_POLL) { 4055 timeout_exit(tp->dev_id); 4056 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4057 } 4058 } 4059 4060 total_time += rte_rdtsc_precise() - start_time; 4061 } 4062 4063 tp->iter_count = 0; 4064 /* get the max of iter_count for all dequeued ops */ 4065 for (i = 0; i < num_ops; ++i) { 4066 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 4067 tp->iter_count); 4068 } 4069 if (extDdr) { 4070 /* Read loopback is not thread safe */ 4071 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 4072 } 4073 4074 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4075 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 4076 tp->op_params->vector_mask); 4077 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4078 } 4079 4080 ret = rte_bbdev_queue_stop(tp->dev_id, queue_id); 4081 if (ret != 0) 4082 printf("Failed to stop queue on dev %u q_id: %u\n", tp->dev_id, queue_id); 4083 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 4084 4085 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 4086 4087 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4088 ((double)total_time / (double)rte_get_tsc_hz()); 4089 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 4090 1000000.0) / ((double)total_time / 4091 (double)rte_get_tsc_hz()); 4092 4093 return TEST_SUCCESS; 4094 } 4095 4096 static int 4097 throughput_pmd_lcore_enc(void *arg) 4098 { 4099 struct thread_params *tp = arg; 4100 uint16_t enq, deq; 4101 uint64_t total_time = 0, start_time; 4102 const uint16_t queue_id = tp->queue_id; 4103 const uint16_t burst_sz = tp->op_params->burst_sz; 4104 const uint16_t num_ops = tp->op_params->num_to_process; 4105 struct rte_bbdev_enc_op *ops_enq[num_ops]; 4106 struct rte_bbdev_enc_op *ops_deq[num_ops]; 4107 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 4108 struct test_buffers *bufs = NULL; 4109 int i, j, ret; 4110 struct rte_bbdev_info info; 4111 uint16_t num_to_enq; 4112 4113 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4114 "BURST_SIZE should be <= %u", MAX_BURST); 4115 4116 rte_bbdev_info_get(tp->dev_id, &info); 4117 4118 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 4119 "NUM_OPS cannot exceed %u for this device", 4120 info.drv.queue_size_lim); 4121 4122 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4123 4124 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4125 4126 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 4127 num_ops); 4128 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 4129 num_ops); 4130 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4131 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 4132 bufs->hard_outputs, ref_op); 4133 4134 /* Set counter to validate the ordering */ 4135 for (j = 0; j < num_ops; ++j) 4136 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4137 4138 for (i = 0; i < TEST_REPETITIONS; ++i) { 4139 uint32_t time_out = 0; 4140 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4141 for (j = 0; j < num_ops; ++j) 4142 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 4143 4144 start_time = rte_rdtsc_precise(); 4145 4146 for (enq = 0, deq = 0; enq < num_ops;) { 4147 num_to_enq = burst_sz; 4148 4149 if (unlikely(num_ops - enq < num_to_enq)) 4150 num_to_enq = num_ops - enq; 4151 4152 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 4153 queue_id, &ops_enq[enq], num_to_enq); 4154 4155 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 4156 queue_id, &ops_deq[deq], enq - deq); 4157 time_out++; 4158 if (time_out >= TIME_OUT_POLL) { 4159 timeout_exit(tp->dev_id); 4160 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 4161 } 4162 } 4163 4164 /* dequeue the remaining */ 4165 time_out = 0; 4166 while (deq < enq) { 4167 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 4168 queue_id, &ops_deq[deq], enq - deq); 4169 time_out++; 4170 if (time_out >= TIME_OUT_POLL) { 4171 timeout_exit(tp->dev_id); 4172 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4173 } 4174 } 4175 4176 total_time += rte_rdtsc_precise() - start_time; 4177 } 4178 4179 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4180 ret = validate_enc_op(ops_deq, num_ops, ref_op); 4181 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4182 } 4183 4184 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 4185 4186 double tb_len_bits = calc_enc_TB_size(ref_op); 4187 4188 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4189 ((double)total_time / (double)rte_get_tsc_hz()); 4190 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 4191 / 1000000.0) / ((double)total_time / 4192 (double)rte_get_tsc_hz()); 4193 4194 return TEST_SUCCESS; 4195 } 4196 4197 static int 4198 throughput_pmd_lcore_ldpc_enc(void *arg) 4199 { 4200 struct thread_params *tp = arg; 4201 uint16_t enq, deq; 4202 uint64_t total_time = 0, start_time; 4203 const uint16_t queue_id = tp->queue_id; 4204 const uint16_t burst_sz = tp->op_params->burst_sz; 4205 const uint16_t num_ops = tp->op_params->num_to_process; 4206 struct rte_bbdev_enc_op *ops_enq[num_ops]; 4207 struct rte_bbdev_enc_op *ops_deq[num_ops]; 4208 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 4209 struct test_buffers *bufs = NULL; 4210 int i, j, ret; 4211 struct rte_bbdev_info info; 4212 uint16_t num_to_enq; 4213 4214 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4215 "BURST_SIZE should be <= %u", MAX_BURST); 4216 4217 rte_bbdev_info_get(tp->dev_id, &info); 4218 4219 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 4220 "NUM_OPS cannot exceed %u for this device", 4221 info.drv.queue_size_lim); 4222 4223 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4224 4225 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4226 4227 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 4228 num_ops); 4229 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 4230 num_ops); 4231 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4232 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 4233 bufs->hard_outputs, ref_op); 4234 4235 /* Set counter to validate the ordering */ 4236 for (j = 0; j < num_ops; ++j) 4237 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4238 4239 for (i = 0; i < TEST_REPETITIONS; ++i) { 4240 uint32_t time_out = 0; 4241 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4242 for (j = 0; j < num_ops; ++j) 4243 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 4244 4245 start_time = rte_rdtsc_precise(); 4246 4247 for (enq = 0, deq = 0; enq < num_ops;) { 4248 num_to_enq = burst_sz; 4249 4250 if (unlikely(num_ops - enq < num_to_enq)) 4251 num_to_enq = num_ops - enq; 4252 4253 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 4254 queue_id, &ops_enq[enq], num_to_enq); 4255 4256 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 4257 queue_id, &ops_deq[deq], enq - deq); 4258 time_out++; 4259 if (time_out >= TIME_OUT_POLL) { 4260 timeout_exit(tp->dev_id); 4261 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 4262 } 4263 } 4264 4265 /* dequeue the remaining */ 4266 time_out = 0; 4267 while (deq < enq) { 4268 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 4269 queue_id, &ops_deq[deq], enq - deq); 4270 time_out++; 4271 if (time_out >= TIME_OUT_POLL) { 4272 timeout_exit(tp->dev_id); 4273 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4274 } 4275 } 4276 4277 total_time += rte_rdtsc_precise() - start_time; 4278 } 4279 4280 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4281 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 4282 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4283 } 4284 4285 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 4286 4287 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 4288 4289 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4290 ((double)total_time / (double)rte_get_tsc_hz()); 4291 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 4292 / 1000000.0) / ((double)total_time / 4293 (double)rte_get_tsc_hz()); 4294 4295 return TEST_SUCCESS; 4296 } 4297 4298 static int 4299 throughput_pmd_lcore_fft(void *arg) 4300 { 4301 struct thread_params *tp = arg; 4302 uint16_t enq, deq; 4303 uint64_t total_time = 0, start_time; 4304 const uint16_t queue_id = tp->queue_id; 4305 const uint16_t burst_sz = tp->op_params->burst_sz; 4306 const uint16_t num_ops = tp->op_params->num_to_process; 4307 struct rte_bbdev_fft_op *ops_enq[num_ops]; 4308 struct rte_bbdev_fft_op *ops_deq[num_ops]; 4309 struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op; 4310 struct test_buffers *bufs = NULL; 4311 int i, j, ret; 4312 struct rte_bbdev_info info; 4313 uint16_t num_to_enq; 4314 4315 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4316 "BURST_SIZE should be <= %u", MAX_BURST); 4317 4318 rte_bbdev_info_get(tp->dev_id, &info); 4319 4320 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 4321 "NUM_OPS cannot exceed %u for this device", 4322 info.drv.queue_size_lim); 4323 4324 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4325 4326 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4327 4328 ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 4329 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 4330 4331 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4332 copy_reference_fft_op(ops_enq, num_ops, 0, bufs->inputs, 4333 bufs->hard_outputs, bufs->soft_outputs, ref_op); 4334 4335 /* Set counter to validate the ordering */ 4336 for (j = 0; j < num_ops; ++j) 4337 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4338 4339 for (i = 0; i < TEST_REPETITIONS; ++i) { 4340 uint32_t time_out = 0; 4341 for (j = 0; j < num_ops; ++j) 4342 mbuf_reset(ops_enq[j]->fft.base_output.data); 4343 4344 start_time = rte_rdtsc_precise(); 4345 4346 for (enq = 0, deq = 0; enq < num_ops;) { 4347 num_to_enq = burst_sz; 4348 4349 if (unlikely(num_ops - enq < num_to_enq)) 4350 num_to_enq = num_ops - enq; 4351 4352 enq += rte_bbdev_enqueue_fft_ops(tp->dev_id, 4353 queue_id, &ops_enq[enq], num_to_enq); 4354 4355 deq += rte_bbdev_dequeue_fft_ops(tp->dev_id, 4356 queue_id, &ops_deq[deq], enq - deq); 4357 time_out++; 4358 if (time_out >= TIME_OUT_POLL) { 4359 timeout_exit(tp->dev_id); 4360 TEST_ASSERT_SUCCESS(TEST_FAILED, "Enqueue timeout!"); 4361 } 4362 } 4363 4364 /* dequeue the remaining */ 4365 time_out = 0; 4366 while (deq < enq) { 4367 deq += rte_bbdev_dequeue_fft_ops(tp->dev_id, 4368 queue_id, &ops_deq[deq], enq - deq); 4369 time_out++; 4370 if (time_out >= TIME_OUT_POLL) { 4371 timeout_exit(tp->dev_id); 4372 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4373 } 4374 } 4375 4376 total_time += rte_rdtsc_precise() - start_time; 4377 } 4378 4379 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4380 ret = validate_fft_op(ops_deq, num_ops, ref_op); 4381 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4382 } 4383 4384 rte_bbdev_fft_op_free_bulk(ops_enq, num_ops); 4385 4386 double tb_len_bits = calc_fft_size(ref_op); 4387 4388 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4389 ((double)total_time / (double)rte_get_tsc_hz()); 4390 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 4391 1000000.0) / ((double)total_time / 4392 (double)rte_get_tsc_hz()); 4393 4394 return TEST_SUCCESS; 4395 } 4396 4397 static void 4398 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 4399 { 4400 unsigned int iter = 0; 4401 double total_mops = 0, total_mbps = 0; 4402 4403 for (iter = 0; iter < used_cores; iter++) { 4404 printf( 4405 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 4406 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 4407 t_params[iter].mbps); 4408 total_mops += t_params[iter].ops_per_sec; 4409 total_mbps += t_params[iter].mbps; 4410 } 4411 printf( 4412 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 4413 used_cores, total_mops, total_mbps); 4414 } 4415 4416 /* Aggregate the performance results over the number of cores used */ 4417 static void 4418 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 4419 { 4420 unsigned int core_idx = 0; 4421 double total_mops = 0, total_mbps = 0; 4422 uint8_t iter_count = 0; 4423 4424 for (core_idx = 0; core_idx < used_cores; core_idx++) { 4425 printf( 4426 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 4427 t_params[core_idx].lcore_id, 4428 t_params[core_idx].ops_per_sec, 4429 t_params[core_idx].mbps, 4430 t_params[core_idx].iter_count); 4431 total_mops += t_params[core_idx].ops_per_sec; 4432 total_mbps += t_params[core_idx].mbps; 4433 iter_count = RTE_MAX(iter_count, 4434 t_params[core_idx].iter_count); 4435 } 4436 printf( 4437 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 4438 used_cores, total_mops, total_mbps, iter_count); 4439 } 4440 4441 /* Aggregate the performance results over the number of cores used */ 4442 static void 4443 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 4444 { 4445 unsigned int core_idx = 0; 4446 double total_mbps = 0, total_bler = 0, total_iter = 0; 4447 double snr = get_snr(); 4448 4449 for (core_idx = 0; core_idx < used_cores; core_idx++) { 4450 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 4451 t_params[core_idx].lcore_id, 4452 t_params[core_idx].bler * 100, 4453 t_params[core_idx].iter_average, 4454 t_params[core_idx].mbps, 4455 get_vector_filename()); 4456 total_mbps += t_params[core_idx].mbps; 4457 total_bler += t_params[core_idx].bler; 4458 total_iter += t_params[core_idx].iter_average; 4459 } 4460 total_bler /= used_cores; 4461 total_iter /= used_cores; 4462 4463 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.3f Mbps %s\n", 4464 snr, total_bler * 100, total_iter, get_iter_max(), 4465 total_mbps, get_vector_filename()); 4466 } 4467 4468 /* 4469 * Test function that determines BLER wireless performance 4470 */ 4471 static int 4472 bler_test(struct active_device *ad, 4473 struct test_op_params *op_params) 4474 { 4475 int ret; 4476 unsigned int lcore_id, used_cores = 0; 4477 struct thread_params *t_params; 4478 struct rte_bbdev_info info; 4479 lcore_function_t *bler_function; 4480 uint16_t num_lcores; 4481 const char *op_type_str; 4482 4483 rte_bbdev_info_get(ad->dev_id, &info); 4484 4485 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 4486 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 4487 test_vector.op_type); 4488 4489 printf("+ ------------------------------------------------------- +\n"); 4490 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 4491 info.dev_name, ad->nb_queues, op_params->burst_sz, 4492 op_params->num_to_process, op_params->num_lcores, 4493 op_type_str, 4494 intr_enabled ? "Interrupt mode" : "PMD mode", 4495 (double)rte_get_tsc_hz() / 1000000000.0); 4496 4497 /* Set number of lcores */ 4498 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 4499 ? ad->nb_queues 4500 : op_params->num_lcores; 4501 4502 /* Allocate memory for thread parameters structure */ 4503 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 4504 RTE_CACHE_LINE_SIZE); 4505 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 4506 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 4507 RTE_CACHE_LINE_SIZE)); 4508 4509 if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && 4510 !check_bit(test_vector.ldpc_dec.op_flags, 4511 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 4512 && !check_bit(test_vector.ldpc_dec.op_flags, 4513 RTE_BBDEV_LDPC_LLR_COMPRESSION)) 4514 bler_function = bler_pmd_lcore_ldpc_dec; 4515 else if ((test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) && 4516 !check_bit(test_vector.turbo_dec.op_flags, 4517 RTE_BBDEV_TURBO_SOFT_OUTPUT)) 4518 bler_function = bler_pmd_lcore_turbo_dec; 4519 else 4520 return TEST_SKIPPED; 4521 4522 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 4523 4524 /* Main core is set at first entry */ 4525 t_params[0].dev_id = ad->dev_id; 4526 t_params[0].lcore_id = rte_lcore_id(); 4527 t_params[0].op_params = op_params; 4528 t_params[0].queue_id = ad->queue_ids[used_cores++]; 4529 t_params[0].iter_count = 0; 4530 4531 RTE_LCORE_FOREACH_WORKER(lcore_id) { 4532 if (used_cores >= num_lcores) 4533 break; 4534 4535 t_params[used_cores].dev_id = ad->dev_id; 4536 t_params[used_cores].lcore_id = lcore_id; 4537 t_params[used_cores].op_params = op_params; 4538 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 4539 t_params[used_cores].iter_count = 0; 4540 4541 rte_eal_remote_launch(bler_function, 4542 &t_params[used_cores++], lcore_id); 4543 } 4544 4545 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4546 ret = bler_function(&t_params[0]); 4547 4548 /* Main core is always used */ 4549 for (used_cores = 1; used_cores < num_lcores; used_cores++) 4550 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 4551 4552 print_dec_bler(t_params, num_lcores); 4553 4554 /* Return if test failed */ 4555 if (ret) { 4556 rte_free(t_params); 4557 return ret; 4558 } 4559 4560 /* Function to print something here*/ 4561 rte_free(t_params); 4562 return ret; 4563 } 4564 4565 /* 4566 * Test function that determines how long an enqueue + dequeue of a burst 4567 * takes on available lcores. 4568 */ 4569 static int 4570 throughput_test(struct active_device *ad, 4571 struct test_op_params *op_params) 4572 { 4573 int ret; 4574 unsigned int lcore_id, used_cores = 0; 4575 struct thread_params *t_params, *tp; 4576 struct rte_bbdev_info info; 4577 lcore_function_t *throughput_function; 4578 uint16_t num_lcores; 4579 const char *op_type_str; 4580 4581 rte_bbdev_info_get(ad->dev_id, &info); 4582 4583 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 4584 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 4585 test_vector.op_type); 4586 4587 printf("+ ------------------------------------------------------- +\n"); 4588 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 4589 info.dev_name, ad->nb_queues, op_params->burst_sz, 4590 op_params->num_to_process, op_params->num_lcores, 4591 op_type_str, 4592 intr_enabled ? "Interrupt mode" : "PMD mode", 4593 (double)rte_get_tsc_hz() / 1000000000.0); 4594 4595 /* Set number of lcores */ 4596 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 4597 ? ad->nb_queues 4598 : op_params->num_lcores; 4599 4600 /* Allocate memory for thread parameters structure */ 4601 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 4602 RTE_CACHE_LINE_SIZE); 4603 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 4604 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 4605 RTE_CACHE_LINE_SIZE)); 4606 4607 if (intr_enabled) { 4608 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 4609 throughput_function = throughput_intr_lcore_dec; 4610 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4611 throughput_function = throughput_intr_lcore_ldpc_dec; 4612 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 4613 throughput_function = throughput_intr_lcore_enc; 4614 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4615 throughput_function = throughput_intr_lcore_ldpc_enc; 4616 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 4617 throughput_function = throughput_intr_lcore_fft; 4618 else 4619 throughput_function = throughput_intr_lcore_enc; 4620 4621 /* Dequeue interrupt callback registration */ 4622 ret = rte_bbdev_callback_register(ad->dev_id, 4623 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 4624 t_params); 4625 if (ret < 0) { 4626 rte_free(t_params); 4627 return ret; 4628 } 4629 } else { 4630 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 4631 throughput_function = throughput_pmd_lcore_dec; 4632 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4633 throughput_function = throughput_pmd_lcore_ldpc_dec; 4634 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 4635 throughput_function = throughput_pmd_lcore_enc; 4636 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4637 throughput_function = throughput_pmd_lcore_ldpc_enc; 4638 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 4639 throughput_function = throughput_pmd_lcore_fft; 4640 else 4641 throughput_function = throughput_pmd_lcore_enc; 4642 } 4643 4644 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 4645 4646 /* Main core is set at first entry */ 4647 t_params[0].dev_id = ad->dev_id; 4648 t_params[0].lcore_id = rte_lcore_id(); 4649 t_params[0].op_params = op_params; 4650 t_params[0].queue_id = ad->queue_ids[used_cores++]; 4651 t_params[0].iter_count = 0; 4652 4653 RTE_LCORE_FOREACH_WORKER(lcore_id) { 4654 if (used_cores >= num_lcores) 4655 break; 4656 4657 t_params[used_cores].dev_id = ad->dev_id; 4658 t_params[used_cores].lcore_id = lcore_id; 4659 t_params[used_cores].op_params = op_params; 4660 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 4661 t_params[used_cores].iter_count = 0; 4662 4663 rte_eal_remote_launch(throughput_function, 4664 &t_params[used_cores++], lcore_id); 4665 } 4666 4667 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4668 ret = throughput_function(&t_params[0]); 4669 4670 /* Main core is always used */ 4671 for (used_cores = 1; used_cores < num_lcores; used_cores++) 4672 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 4673 4674 /* Return if test failed */ 4675 if (ret) { 4676 rte_free(t_params); 4677 return ret; 4678 } 4679 4680 /* Print throughput if interrupts are disabled and test passed */ 4681 if (!intr_enabled) { 4682 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 4683 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4684 print_dec_throughput(t_params, num_lcores); 4685 else 4686 print_enc_throughput(t_params, num_lcores); 4687 rte_free(t_params); 4688 return ret; 4689 } 4690 4691 /* In interrupt TC we need to wait for the interrupt callback to deqeue 4692 * all pending operations. Skip waiting for queues which reported an 4693 * error using processing_status variable. 4694 * Wait for main lcore operations. 4695 */ 4696 tp = &t_params[0]; 4697 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 4698 op_params->num_to_process) && 4699 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 4700 TEST_FAILED)) 4701 rte_pause(); 4702 4703 tp->ops_per_sec /= TEST_REPETITIONS; 4704 tp->mbps /= TEST_REPETITIONS; 4705 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 4706 4707 /* Wait for worker lcores operations */ 4708 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 4709 tp = &t_params[used_cores]; 4710 4711 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 4712 op_params->num_to_process) && 4713 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 4714 TEST_FAILED)) 4715 rte_pause(); 4716 4717 tp->ops_per_sec /= TEST_REPETITIONS; 4718 tp->mbps /= TEST_REPETITIONS; 4719 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 4720 } 4721 4722 /* Print throughput if test passed */ 4723 if (!ret) { 4724 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 4725 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4726 print_dec_throughput(t_params, num_lcores); 4727 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 4728 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4729 print_enc_throughput(t_params, num_lcores); 4730 } 4731 4732 rte_free(t_params); 4733 return ret; 4734 } 4735 4736 static int 4737 latency_test_dec(struct rte_mempool *mempool, 4738 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4739 uint16_t dev_id, uint16_t queue_id, 4740 const uint16_t num_to_process, uint16_t burst_sz, 4741 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, bool disable_et) 4742 { 4743 int ret = TEST_SUCCESS; 4744 uint16_t i, j, dequeued; 4745 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4746 uint64_t start_time = 0, last_time = 0; 4747 4748 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4749 uint16_t enq = 0, deq = 0; 4750 uint32_t time_out = 0; 4751 bool first_time = true; 4752 last_time = 0; 4753 4754 if (unlikely(num_to_process - dequeued < burst_sz)) 4755 burst_sz = num_to_process - dequeued; 4756 4757 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4758 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_dec_op_alloc_bulk() failed"); 4759 4760 ref_op->turbo_dec.iter_max = get_iter_max(); 4761 /* For validation tests we want to enable early termination */ 4762 if (!disable_et && !check_bit(ref_op->turbo_dec.op_flags, 4763 RTE_BBDEV_TURBO_EARLY_TERMINATION)) 4764 ref_op->turbo_dec.op_flags |= RTE_BBDEV_TURBO_EARLY_TERMINATION; 4765 4766 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4767 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4768 bufs->inputs, 4769 bufs->hard_outputs, 4770 bufs->soft_outputs, 4771 ref_op); 4772 4773 /* Set counter to validate the ordering */ 4774 for (j = 0; j < burst_sz; ++j) 4775 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4776 4777 start_time = rte_rdtsc_precise(); 4778 4779 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 4780 burst_sz); 4781 TEST_ASSERT(enq == burst_sz, 4782 "Error enqueueing burst, expected %u, got %u", 4783 burst_sz, enq); 4784 4785 /* Dequeue */ 4786 do { 4787 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4788 &ops_deq[deq], burst_sz - deq); 4789 if (likely(first_time && (deq > 0))) { 4790 last_time = rte_rdtsc_precise() - start_time; 4791 first_time = false; 4792 } 4793 time_out++; 4794 if (time_out >= TIME_OUT_POLL) { 4795 timeout_exit(dev_id); 4796 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4797 } 4798 } while (unlikely(burst_sz != deq)); 4799 4800 *max_time = RTE_MAX(*max_time, last_time); 4801 *min_time = RTE_MIN(*min_time, last_time); 4802 *total_time += last_time; 4803 4804 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4805 ret = validate_dec_op(ops_deq, burst_sz, ref_op); 4806 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4807 } 4808 4809 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4810 dequeued += deq; 4811 } 4812 4813 return i; 4814 } 4815 4816 /* Test case for latency/validation for LDPC Decoder */ 4817 static int 4818 latency_test_ldpc_dec(struct rte_mempool *mempool, 4819 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4820 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4821 const uint16_t num_to_process, uint16_t burst_sz, 4822 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, 4823 bool disable_et) 4824 { 4825 int ret = TEST_SUCCESS; 4826 uint16_t i, j, dequeued; 4827 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4828 uint64_t start_time = 0, last_time = 0; 4829 bool extDdr = ldpc_cap_flags & 4830 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4831 4832 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4833 uint16_t enq = 0, deq = 0; 4834 uint32_t time_out = 0; 4835 bool first_time = true; 4836 last_time = 0; 4837 4838 if (unlikely(num_to_process - dequeued < burst_sz)) 4839 burst_sz = num_to_process - dequeued; 4840 4841 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4842 TEST_ASSERT_SUCCESS(ret, 4843 "rte_bbdev_dec_op_alloc_bulk() failed"); 4844 4845 /* For latency tests we need to disable early termination */ 4846 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, 4847 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4848 ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4849 4850 ref_op->ldpc_dec.iter_max = get_iter_max(); 4851 /* When ET is disabled, the expected iter_count is iter_max */ 4852 if (disable_et) 4853 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4854 4855 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4856 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4857 bufs->inputs, 4858 bufs->hard_outputs, 4859 bufs->soft_outputs, 4860 bufs->harq_inputs, 4861 bufs->harq_outputs, 4862 ref_op); 4863 4864 if (extDdr) 4865 preload_harq_ddr(dev_id, queue_id, ops_enq, 4866 burst_sz, true); 4867 4868 /* Set counter to validate the ordering */ 4869 for (j = 0; j < burst_sz; ++j) 4870 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4871 4872 start_time = rte_rdtsc_precise(); 4873 4874 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4875 &ops_enq[enq], burst_sz); 4876 TEST_ASSERT(enq == burst_sz, 4877 "Error enqueueing burst, expected %u, got %u", 4878 burst_sz, enq); 4879 4880 /* Dequeue */ 4881 do { 4882 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4883 &ops_deq[deq], burst_sz - deq); 4884 if (likely(first_time && (deq > 0))) { 4885 last_time = rte_rdtsc_precise() - start_time; 4886 first_time = false; 4887 } 4888 time_out++; 4889 if (time_out >= TIME_OUT_POLL) { 4890 timeout_exit(dev_id); 4891 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4892 } 4893 } while (unlikely(burst_sz != deq)); 4894 4895 *max_time = RTE_MAX(*max_time, last_time); 4896 *min_time = RTE_MIN(*min_time, last_time); 4897 *total_time += last_time; 4898 4899 if (extDdr) 4900 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4901 4902 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4903 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, vector_mask); 4904 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4905 } 4906 4907 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4908 dequeued += deq; 4909 } 4910 return i; 4911 } 4912 4913 static int 4914 latency_test_enc(struct rte_mempool *mempool, 4915 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4916 uint16_t dev_id, uint16_t queue_id, 4917 const uint16_t num_to_process, uint16_t burst_sz, 4918 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4919 { 4920 int ret = TEST_SUCCESS; 4921 uint16_t i, j, dequeued; 4922 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4923 uint64_t start_time = 0, last_time = 0; 4924 4925 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4926 uint16_t enq = 0, deq = 0; 4927 uint32_t time_out = 0; 4928 bool first_time = true; 4929 last_time = 0; 4930 4931 if (unlikely(num_to_process - dequeued < burst_sz)) 4932 burst_sz = num_to_process - dequeued; 4933 4934 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4935 TEST_ASSERT_SUCCESS(ret, 4936 "rte_bbdev_enc_op_alloc_bulk() failed"); 4937 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4938 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4939 bufs->inputs, 4940 bufs->hard_outputs, 4941 ref_op); 4942 4943 /* Set counter to validate the ordering */ 4944 for (j = 0; j < burst_sz; ++j) 4945 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4946 4947 start_time = rte_rdtsc_precise(); 4948 4949 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 4950 burst_sz); 4951 TEST_ASSERT(enq == burst_sz, 4952 "Error enqueueing burst, expected %u, got %u", 4953 burst_sz, enq); 4954 4955 /* Dequeue */ 4956 do { 4957 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4958 &ops_deq[deq], burst_sz - deq); 4959 if (likely(first_time && (deq > 0))) { 4960 last_time += rte_rdtsc_precise() - start_time; 4961 first_time = false; 4962 } 4963 time_out++; 4964 if (time_out >= TIME_OUT_POLL) { 4965 timeout_exit(dev_id); 4966 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 4967 } 4968 } while (unlikely(burst_sz != deq)); 4969 4970 *max_time = RTE_MAX(*max_time, last_time); 4971 *min_time = RTE_MIN(*min_time, last_time); 4972 *total_time += last_time; 4973 4974 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4975 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4976 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4977 } 4978 4979 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4980 dequeued += deq; 4981 } 4982 4983 return i; 4984 } 4985 4986 static int 4987 latency_test_ldpc_enc(struct rte_mempool *mempool, 4988 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4989 uint16_t dev_id, uint16_t queue_id, 4990 const uint16_t num_to_process, uint16_t burst_sz, 4991 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4992 { 4993 int ret = TEST_SUCCESS; 4994 uint16_t i, j, dequeued; 4995 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4996 uint64_t start_time = 0, last_time = 0; 4997 4998 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4999 uint16_t enq = 0, deq = 0; 5000 uint32_t time_out = 0; 5001 bool first_time = true; 5002 last_time = 0; 5003 5004 if (unlikely(num_to_process - dequeued < burst_sz)) 5005 burst_sz = num_to_process - dequeued; 5006 5007 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 5008 TEST_ASSERT_SUCCESS(ret, 5009 "rte_bbdev_enc_op_alloc_bulk() failed"); 5010 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5011 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 5012 bufs->inputs, 5013 bufs->hard_outputs, 5014 ref_op); 5015 5016 /* Set counter to validate the ordering */ 5017 for (j = 0; j < burst_sz; ++j) 5018 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 5019 5020 start_time = rte_rdtsc_precise(); 5021 5022 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 5023 &ops_enq[enq], burst_sz); 5024 TEST_ASSERT(enq == burst_sz, 5025 "Error enqueueing burst, expected %u, got %u", 5026 burst_sz, enq); 5027 5028 /* Dequeue */ 5029 do { 5030 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 5031 &ops_deq[deq], burst_sz - deq); 5032 if (likely(first_time && (deq > 0))) { 5033 last_time += rte_rdtsc_precise() - start_time; 5034 first_time = false; 5035 } 5036 time_out++; 5037 if (time_out >= TIME_OUT_POLL) { 5038 timeout_exit(dev_id); 5039 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 5040 } 5041 } while (unlikely(burst_sz != deq)); 5042 5043 *max_time = RTE_MAX(*max_time, last_time); 5044 *min_time = RTE_MIN(*min_time, last_time); 5045 *total_time += last_time; 5046 5047 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 5048 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 5049 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 5050 } 5051 5052 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 5053 dequeued += deq; 5054 } 5055 5056 return i; 5057 } 5058 5059 5060 static int 5061 latency_test_fft(struct rte_mempool *mempool, 5062 struct test_buffers *bufs, struct rte_bbdev_fft_op *ref_op, 5063 uint16_t dev_id, uint16_t queue_id, 5064 const uint16_t num_to_process, uint16_t burst_sz, 5065 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 5066 { 5067 int ret = TEST_SUCCESS; 5068 uint16_t i, j, dequeued; 5069 struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5070 uint64_t start_time = 0, last_time = 0; 5071 5072 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5073 uint16_t enq = 0, deq = 0; 5074 uint32_t time_out = 0; 5075 bool first_time = true; 5076 last_time = 0; 5077 5078 if (unlikely(num_to_process - dequeued < burst_sz)) 5079 burst_sz = num_to_process - dequeued; 5080 5081 ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz); 5082 TEST_ASSERT_SUCCESS(ret, 5083 "rte_bbdev_fft_op_alloc_bulk() failed"); 5084 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5085 copy_reference_fft_op(ops_enq, burst_sz, dequeued, 5086 bufs->inputs, 5087 bufs->hard_outputs, bufs->soft_outputs, 5088 ref_op); 5089 5090 /* Set counter to validate the ordering */ 5091 for (j = 0; j < burst_sz; ++j) 5092 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 5093 5094 start_time = rte_rdtsc_precise(); 5095 5096 enq = rte_bbdev_enqueue_fft_ops(dev_id, queue_id, 5097 &ops_enq[enq], burst_sz); 5098 TEST_ASSERT(enq == burst_sz, 5099 "Error enqueueing burst, expected %u, got %u", 5100 burst_sz, enq); 5101 5102 /* Dequeue */ 5103 do { 5104 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 5105 &ops_deq[deq], burst_sz - deq); 5106 if (likely(first_time && (deq > 0))) { 5107 last_time += rte_rdtsc_precise() - start_time; 5108 first_time = false; 5109 } 5110 time_out++; 5111 if (time_out >= TIME_OUT_POLL) { 5112 timeout_exit(dev_id); 5113 TEST_ASSERT_SUCCESS(TEST_FAILED, "Dequeue timeout!"); 5114 } 5115 } while (unlikely(burst_sz != deq)); 5116 5117 *max_time = RTE_MAX(*max_time, last_time); 5118 *min_time = RTE_MIN(*min_time, last_time); 5119 *total_time += last_time; 5120 5121 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 5122 ret = validate_fft_op(ops_deq, burst_sz, ref_op); 5123 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 5124 } 5125 5126 rte_bbdev_fft_op_free_bulk(ops_enq, deq); 5127 dequeued += deq; 5128 } 5129 5130 return i; 5131 } 5132 5133 /* Common function for running validation and latency test cases */ 5134 static int 5135 validation_latency_test(struct active_device *ad, 5136 struct test_op_params *op_params, bool latency_flag) 5137 { 5138 int iter; 5139 uint16_t burst_sz = op_params->burst_sz; 5140 const uint16_t num_to_process = op_params->num_to_process; 5141 const enum rte_bbdev_op_type op_type = test_vector.op_type; 5142 const uint16_t queue_id = ad->queue_ids[0]; 5143 struct test_buffers *bufs = NULL; 5144 struct rte_bbdev_info info; 5145 uint64_t total_time, min_time, max_time; 5146 const char *op_type_str; 5147 5148 total_time = max_time = 0; 5149 min_time = UINT64_MAX; 5150 5151 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 5152 "BURST_SIZE should be <= %u", MAX_BURST); 5153 5154 rte_bbdev_info_get(ad->dev_id, &info); 5155 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 5156 5157 op_type_str = rte_bbdev_op_type_str(op_type); 5158 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 5159 5160 printf("+ ------------------------------------------------------- +\n"); 5161 if (latency_flag) 5162 printf("== test: latency\ndev:"); 5163 else 5164 printf("== test: validation\ndev:"); 5165 printf("%s, burst size: %u, num ops: %u, op type: %s\n", 5166 info.dev_name, burst_sz, num_to_process, op_type_str); 5167 5168 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 5169 iter = latency_test_dec(op_params->mp, bufs, 5170 op_params->ref_dec_op, ad->dev_id, queue_id, 5171 num_to_process, burst_sz, &total_time, 5172 &min_time, &max_time, latency_flag); 5173 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 5174 iter = latency_test_ldpc_enc(op_params->mp, bufs, 5175 op_params->ref_enc_op, ad->dev_id, queue_id, 5176 num_to_process, burst_sz, &total_time, 5177 &min_time, &max_time); 5178 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 5179 iter = latency_test_ldpc_dec(op_params->mp, bufs, 5180 op_params->ref_dec_op, op_params->vector_mask, 5181 ad->dev_id, queue_id, num_to_process, 5182 burst_sz, &total_time, &min_time, &max_time, 5183 latency_flag); 5184 else if (op_type == RTE_BBDEV_OP_FFT) 5185 iter = latency_test_fft(op_params->mp, bufs, 5186 op_params->ref_fft_op, 5187 ad->dev_id, queue_id, 5188 num_to_process, burst_sz, &total_time, 5189 &min_time, &max_time); 5190 else /* RTE_BBDEV_OP_TURBO_ENC */ 5191 iter = latency_test_enc(op_params->mp, bufs, 5192 op_params->ref_enc_op, 5193 ad->dev_id, queue_id, 5194 num_to_process, burst_sz, &total_time, 5195 &min_time, &max_time); 5196 5197 if (iter <= 0) 5198 return TEST_FAILED; 5199 5200 printf("Operation latency:\n" 5201 "\tavg: %lg cycles, %lg us\n" 5202 "\tmin: %lg cycles, %lg us\n" 5203 "\tmax: %lg cycles, %lg us\n", 5204 (double)total_time / (double)iter, 5205 (double)(total_time * 1000000) / (double)iter / 5206 (double)rte_get_tsc_hz(), (double)min_time, 5207 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 5208 (double)max_time, (double)(max_time * 1000000) / 5209 (double)rte_get_tsc_hz()); 5210 5211 return TEST_SUCCESS; 5212 } 5213 5214 static int 5215 latency_test(struct active_device *ad, struct test_op_params *op_params) 5216 { 5217 return validation_latency_test(ad, op_params, true); 5218 } 5219 5220 static int 5221 validation_test(struct active_device *ad, struct test_op_params *op_params) 5222 { 5223 return validation_latency_test(ad, op_params, false); 5224 } 5225 5226 static int 5227 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 5228 struct rte_bbdev_stats *stats) 5229 { 5230 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 5231 struct rte_bbdev_stats *q_stats; 5232 5233 if (queue_id >= dev->data->num_queues) 5234 return -1; 5235 5236 q_stats = &dev->data->queues[queue_id].queue_stats; 5237 5238 stats->enqueued_count = q_stats->enqueued_count; 5239 stats->dequeued_count = q_stats->dequeued_count; 5240 stats->enqueue_err_count = q_stats->enqueue_err_count; 5241 stats->dequeue_err_count = q_stats->dequeue_err_count; 5242 stats->enqueue_warn_count = q_stats->enqueue_warn_count; 5243 stats->dequeue_warn_count = q_stats->dequeue_warn_count; 5244 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 5245 5246 return 0; 5247 } 5248 5249 static int 5250 offload_latency_test_fft(struct rte_mempool *mempool, struct test_buffers *bufs, 5251 struct rte_bbdev_fft_op *ref_op, uint16_t dev_id, 5252 uint16_t queue_id, const uint16_t num_to_process, 5253 uint16_t burst_sz, struct test_time_stats *time_st) 5254 { 5255 int i, dequeued, ret; 5256 struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5257 uint64_t enq_start_time, deq_start_time; 5258 uint64_t enq_sw_last_time, deq_last_time; 5259 struct rte_bbdev_stats stats; 5260 5261 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5262 uint16_t enq = 0, deq = 0; 5263 5264 if (unlikely(num_to_process - dequeued < burst_sz)) 5265 burst_sz = num_to_process - dequeued; 5266 5267 ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz); 5268 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 5269 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5270 copy_reference_fft_op(ops_enq, burst_sz, dequeued, 5271 bufs->inputs, 5272 bufs->hard_outputs, bufs->soft_outputs, 5273 ref_op); 5274 5275 /* Start time meas for enqueue function offload latency */ 5276 enq_start_time = rte_rdtsc_precise(); 5277 do { 5278 enq += rte_bbdev_enqueue_fft_ops(dev_id, queue_id, 5279 &ops_enq[enq], burst_sz - enq); 5280 } while (unlikely(burst_sz != enq)); 5281 5282 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5283 TEST_ASSERT_SUCCESS(ret, 5284 "Failed to get stats for queue (%u) of device (%u)", 5285 queue_id, dev_id); 5286 5287 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 5288 stats.acc_offload_cycles; 5289 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5290 enq_sw_last_time); 5291 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5292 enq_sw_last_time); 5293 time_st->enq_sw_total_time += enq_sw_last_time; 5294 5295 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5296 stats.acc_offload_cycles); 5297 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5298 stats.acc_offload_cycles); 5299 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5300 5301 /* give time for device to process ops */ 5302 rte_delay_us(WAIT_OFFLOAD_US); 5303 5304 /* Start time meas for dequeue function offload latency */ 5305 deq_start_time = rte_rdtsc_precise(); 5306 /* Dequeue one operation */ 5307 do { 5308 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 5309 &ops_deq[deq], enq); 5310 } while (unlikely(deq == 0)); 5311 5312 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5313 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5314 deq_last_time); 5315 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5316 deq_last_time); 5317 time_st->deq_total_time += deq_last_time; 5318 5319 /* Dequeue remaining operations if needed*/ 5320 while (burst_sz != deq) 5321 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 5322 &ops_deq[deq], burst_sz - deq); 5323 5324 rte_bbdev_fft_op_free_bulk(ops_enq, deq); 5325 dequeued += deq; 5326 } 5327 5328 return i; 5329 } 5330 5331 static int 5332 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 5333 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 5334 uint16_t queue_id, const uint16_t num_to_process, 5335 uint16_t burst_sz, struct test_time_stats *time_st) 5336 { 5337 int i, dequeued, ret; 5338 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5339 uint64_t enq_start_time, deq_start_time; 5340 uint64_t enq_sw_last_time, deq_last_time; 5341 struct rte_bbdev_stats stats; 5342 5343 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5344 uint16_t enq = 0, deq = 0; 5345 5346 if (unlikely(num_to_process - dequeued < burst_sz)) 5347 burst_sz = num_to_process - dequeued; 5348 5349 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 5350 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 5351 ref_op->turbo_dec.iter_max = get_iter_max(); 5352 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5353 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 5354 bufs->inputs, 5355 bufs->hard_outputs, 5356 bufs->soft_outputs, 5357 ref_op); 5358 5359 /* Start time meas for enqueue function offload latency */ 5360 enq_start_time = rte_rdtsc_precise(); 5361 do { 5362 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 5363 &ops_enq[enq], burst_sz - enq); 5364 } while (unlikely(burst_sz != enq)); 5365 5366 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5367 TEST_ASSERT_SUCCESS(ret, 5368 "Failed to get stats for queue (%u) of device (%u)", 5369 queue_id, dev_id); 5370 5371 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 5372 stats.acc_offload_cycles; 5373 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5374 enq_sw_last_time); 5375 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5376 enq_sw_last_time); 5377 time_st->enq_sw_total_time += enq_sw_last_time; 5378 5379 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5380 stats.acc_offload_cycles); 5381 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5382 stats.acc_offload_cycles); 5383 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5384 5385 /* give time for device to process ops */ 5386 rte_delay_us(WAIT_OFFLOAD_US); 5387 5388 /* Start time meas for dequeue function offload latency */ 5389 deq_start_time = rte_rdtsc_precise(); 5390 /* Dequeue one operation */ 5391 do { 5392 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 5393 &ops_deq[deq], enq); 5394 } while (unlikely(deq == 0)); 5395 5396 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5397 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5398 deq_last_time); 5399 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5400 deq_last_time); 5401 time_st->deq_total_time += deq_last_time; 5402 5403 /* Dequeue remaining operations if needed*/ 5404 while (burst_sz != deq) 5405 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 5406 &ops_deq[deq], burst_sz - deq); 5407 5408 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 5409 dequeued += deq; 5410 } 5411 5412 return i; 5413 } 5414 5415 static int 5416 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 5417 struct test_buffers *bufs, 5418 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 5419 uint16_t queue_id, const uint16_t num_to_process, 5420 uint16_t burst_sz, struct test_time_stats *time_st) 5421 { 5422 int i, dequeued, ret; 5423 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5424 uint64_t enq_start_time, deq_start_time; 5425 uint64_t enq_sw_last_time, deq_last_time; 5426 struct rte_bbdev_stats stats; 5427 bool extDdr = ldpc_cap_flags & 5428 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 5429 5430 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5431 uint16_t enq = 0, deq = 0; 5432 5433 if (unlikely(num_to_process - dequeued < burst_sz)) 5434 burst_sz = num_to_process - dequeued; 5435 5436 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 5437 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 5438 ref_op->ldpc_dec.iter_max = get_iter_max(); 5439 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5440 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 5441 bufs->inputs, 5442 bufs->hard_outputs, 5443 bufs->soft_outputs, 5444 bufs->harq_inputs, 5445 bufs->harq_outputs, 5446 ref_op); 5447 5448 if (extDdr) 5449 preload_harq_ddr(dev_id, queue_id, ops_enq, 5450 burst_sz, true); 5451 5452 /* Start time meas for enqueue function offload latency */ 5453 enq_start_time = rte_rdtsc_precise(); 5454 do { 5455 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 5456 &ops_enq[enq], burst_sz - enq); 5457 } while (unlikely(burst_sz != enq)); 5458 5459 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5460 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5461 TEST_ASSERT_SUCCESS(ret, 5462 "Failed to get stats for queue (%u) of device (%u)", 5463 queue_id, dev_id); 5464 5465 enq_sw_last_time -= stats.acc_offload_cycles; 5466 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5467 enq_sw_last_time); 5468 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5469 enq_sw_last_time); 5470 time_st->enq_sw_total_time += enq_sw_last_time; 5471 5472 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5473 stats.acc_offload_cycles); 5474 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5475 stats.acc_offload_cycles); 5476 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5477 5478 /* give time for device to process ops */ 5479 rte_delay_us(WAIT_OFFLOAD_US); 5480 5481 /* Start time meas for dequeue function offload latency */ 5482 deq_start_time = rte_rdtsc_precise(); 5483 /* Dequeue one operation */ 5484 do { 5485 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 5486 &ops_deq[deq], enq); 5487 } while (unlikely(deq == 0)); 5488 5489 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5490 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5491 deq_last_time); 5492 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5493 deq_last_time); 5494 time_st->deq_total_time += deq_last_time; 5495 5496 /* Dequeue remaining operations if needed*/ 5497 while (burst_sz != deq) 5498 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 5499 &ops_deq[deq], burst_sz - deq); 5500 5501 if (extDdr) { 5502 /* Read loopback is not thread safe */ 5503 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 5504 } 5505 5506 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 5507 dequeued += deq; 5508 } 5509 5510 return i; 5511 } 5512 5513 static int 5514 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 5515 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 5516 uint16_t queue_id, const uint16_t num_to_process, 5517 uint16_t burst_sz, struct test_time_stats *time_st) 5518 { 5519 int i, dequeued, ret; 5520 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5521 uint64_t enq_start_time, deq_start_time; 5522 uint64_t enq_sw_last_time, deq_last_time; 5523 struct rte_bbdev_stats stats; 5524 5525 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5526 uint16_t enq = 0, deq = 0; 5527 5528 if (unlikely(num_to_process - dequeued < burst_sz)) 5529 burst_sz = num_to_process - dequeued; 5530 5531 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 5532 TEST_ASSERT_SUCCESS(ret, 5533 "rte_bbdev_enc_op_alloc_bulk() failed"); 5534 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5535 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 5536 bufs->inputs, 5537 bufs->hard_outputs, 5538 ref_op); 5539 5540 /* Start time meas for enqueue function offload latency */ 5541 enq_start_time = rte_rdtsc_precise(); 5542 do { 5543 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 5544 &ops_enq[enq], burst_sz - enq); 5545 } while (unlikely(burst_sz != enq)); 5546 5547 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5548 5549 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5550 TEST_ASSERT_SUCCESS(ret, 5551 "Failed to get stats for queue (%u) of device (%u)", 5552 queue_id, dev_id); 5553 enq_sw_last_time -= stats.acc_offload_cycles; 5554 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5555 enq_sw_last_time); 5556 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5557 enq_sw_last_time); 5558 time_st->enq_sw_total_time += enq_sw_last_time; 5559 5560 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5561 stats.acc_offload_cycles); 5562 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5563 stats.acc_offload_cycles); 5564 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5565 5566 /* give time for device to process ops */ 5567 rte_delay_us(WAIT_OFFLOAD_US); 5568 5569 /* Start time meas for dequeue function offload latency */ 5570 deq_start_time = rte_rdtsc_precise(); 5571 /* Dequeue one operation */ 5572 do { 5573 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 5574 &ops_deq[deq], enq); 5575 } while (unlikely(deq == 0)); 5576 5577 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5578 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5579 deq_last_time); 5580 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5581 deq_last_time); 5582 time_st->deq_total_time += deq_last_time; 5583 5584 while (burst_sz != deq) 5585 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 5586 &ops_deq[deq], burst_sz - deq); 5587 5588 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 5589 dequeued += deq; 5590 } 5591 5592 return i; 5593 } 5594 5595 static int 5596 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 5597 struct test_buffers *bufs, 5598 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 5599 uint16_t queue_id, const uint16_t num_to_process, 5600 uint16_t burst_sz, struct test_time_stats *time_st) 5601 { 5602 int i, dequeued, ret; 5603 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5604 uint64_t enq_start_time, deq_start_time; 5605 uint64_t enq_sw_last_time, deq_last_time; 5606 struct rte_bbdev_stats stats; 5607 5608 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5609 uint16_t enq = 0, deq = 0; 5610 5611 if (unlikely(num_to_process - dequeued < burst_sz)) 5612 burst_sz = num_to_process - dequeued; 5613 5614 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 5615 TEST_ASSERT_SUCCESS(ret, 5616 "rte_bbdev_enc_op_alloc_bulk() failed"); 5617 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5618 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 5619 bufs->inputs, 5620 bufs->hard_outputs, 5621 ref_op); 5622 5623 /* Start time meas for enqueue function offload latency */ 5624 enq_start_time = rte_rdtsc_precise(); 5625 do { 5626 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 5627 &ops_enq[enq], burst_sz - enq); 5628 } while (unlikely(burst_sz != enq)); 5629 5630 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5631 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5632 TEST_ASSERT_SUCCESS(ret, 5633 "Failed to get stats for queue (%u) of device (%u)", 5634 queue_id, dev_id); 5635 5636 enq_sw_last_time -= stats.acc_offload_cycles; 5637 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5638 enq_sw_last_time); 5639 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5640 enq_sw_last_time); 5641 time_st->enq_sw_total_time += enq_sw_last_time; 5642 5643 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5644 stats.acc_offload_cycles); 5645 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5646 stats.acc_offload_cycles); 5647 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5648 5649 /* give time for device to process ops */ 5650 rte_delay_us(WAIT_OFFLOAD_US); 5651 5652 /* Start time meas for dequeue function offload latency */ 5653 deq_start_time = rte_rdtsc_precise(); 5654 /* Dequeue one operation */ 5655 do { 5656 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 5657 &ops_deq[deq], enq); 5658 } while (unlikely(deq == 0)); 5659 5660 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5661 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5662 deq_last_time); 5663 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5664 deq_last_time); 5665 time_st->deq_total_time += deq_last_time; 5666 5667 while (burst_sz != deq) 5668 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 5669 &ops_deq[deq], burst_sz - deq); 5670 5671 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 5672 dequeued += deq; 5673 } 5674 5675 return i; 5676 } 5677 5678 static int 5679 offload_cost_test(struct active_device *ad, 5680 struct test_op_params *op_params) 5681 { 5682 int iter, ret; 5683 uint16_t burst_sz = op_params->burst_sz; 5684 const uint16_t num_to_process = op_params->num_to_process; 5685 const enum rte_bbdev_op_type op_type = test_vector.op_type; 5686 const uint16_t queue_id = ad->queue_ids[0]; 5687 struct test_buffers *bufs = NULL; 5688 struct rte_bbdev_info info; 5689 const char *op_type_str; 5690 struct test_time_stats time_st; 5691 5692 memset(&time_st, 0, sizeof(struct test_time_stats)); 5693 time_st.enq_sw_min_time = UINT64_MAX; 5694 time_st.enq_acc_min_time = UINT64_MAX; 5695 time_st.deq_min_time = UINT64_MAX; 5696 5697 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 5698 "BURST_SIZE should be <= %u", MAX_BURST); 5699 5700 rte_bbdev_info_get(ad->dev_id, &info); 5701 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 5702 5703 op_type_str = rte_bbdev_op_type_str(op_type); 5704 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 5705 5706 printf("+ ------------------------------------------------------- +\n"); 5707 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 5708 info.dev_name, burst_sz, num_to_process, op_type_str); 5709 5710 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 5711 iter = offload_latency_test_dec(op_params->mp, bufs, 5712 op_params->ref_dec_op, ad->dev_id, queue_id, 5713 num_to_process, burst_sz, &time_st); 5714 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 5715 iter = offload_latency_test_enc(op_params->mp, bufs, 5716 op_params->ref_enc_op, ad->dev_id, queue_id, 5717 num_to_process, burst_sz, &time_st); 5718 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 5719 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 5720 op_params->ref_enc_op, ad->dev_id, queue_id, 5721 num_to_process, burst_sz, &time_st); 5722 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 5723 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 5724 op_params->ref_dec_op, ad->dev_id, queue_id, 5725 num_to_process, burst_sz, &time_st); 5726 else if (op_type == RTE_BBDEV_OP_FFT) 5727 iter = offload_latency_test_fft(op_params->mp, bufs, 5728 op_params->ref_fft_op, ad->dev_id, queue_id, 5729 num_to_process, burst_sz, &time_st); 5730 else 5731 iter = offload_latency_test_enc(op_params->mp, bufs, 5732 op_params->ref_enc_op, ad->dev_id, queue_id, 5733 num_to_process, burst_sz, &time_st); 5734 5735 if (iter <= 0) 5736 return TEST_FAILED; 5737 5738 printf("Enqueue driver offload cost latency:\n" 5739 "\tavg: %lg cycles, %lg us\n" 5740 "\tmin: %lg cycles, %lg us\n" 5741 "\tmax: %lg cycles, %lg us\n" 5742 "Enqueue accelerator offload cost latency:\n" 5743 "\tavg: %lg cycles, %lg us\n" 5744 "\tmin: %lg cycles, %lg us\n" 5745 "\tmax: %lg cycles, %lg us\n", 5746 (double)time_st.enq_sw_total_time / (double)iter, 5747 (double)(time_st.enq_sw_total_time * 1000000) / 5748 (double)iter / (double)rte_get_tsc_hz(), 5749 (double)time_st.enq_sw_min_time, 5750 (double)(time_st.enq_sw_min_time * 1000000) / 5751 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 5752 (double)(time_st.enq_sw_max_time * 1000000) / 5753 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 5754 (double)iter, 5755 (double)(time_st.enq_acc_total_time * 1000000) / 5756 (double)iter / (double)rte_get_tsc_hz(), 5757 (double)time_st.enq_acc_min_time, 5758 (double)(time_st.enq_acc_min_time * 1000000) / 5759 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 5760 (double)(time_st.enq_acc_max_time * 1000000) / 5761 rte_get_tsc_hz()); 5762 5763 printf("Dequeue offload cost latency - one op:\n" 5764 "\tavg: %lg cycles, %lg us\n" 5765 "\tmin: %lg cycles, %lg us\n" 5766 "\tmax: %lg cycles, %lg us\n", 5767 (double)time_st.deq_total_time / (double)iter, 5768 (double)(time_st.deq_total_time * 1000000) / 5769 (double)iter / (double)rte_get_tsc_hz(), 5770 (double)time_st.deq_min_time, 5771 (double)(time_st.deq_min_time * 1000000) / 5772 rte_get_tsc_hz(), (double)time_st.deq_max_time, 5773 (double)(time_st.deq_max_time * 1000000) / 5774 rte_get_tsc_hz()); 5775 5776 struct rte_bbdev_stats stats = {0}; 5777 ret = get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); 5778 TEST_ASSERT_SUCCESS(ret, 5779 "Failed to get stats for queue (%u) of device (%u)", 5780 queue_id, ad->dev_id); 5781 if (stats.enqueue_warn_count > 0) 5782 printf("Warning reported on the queue : %10"PRIu64"\n", 5783 stats.enqueue_warn_count); 5784 if (op_type != RTE_BBDEV_OP_LDPC_DEC) { 5785 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, 5786 "Mismatch in enqueue count %10"PRIu64" %d", 5787 stats.enqueued_count, num_to_process); 5788 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, 5789 "Mismatch in dequeue count %10"PRIu64" %d", 5790 stats.dequeued_count, num_to_process); 5791 } 5792 TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, 5793 "Enqueue count Error %10"PRIu64"", 5794 stats.enqueue_err_count); 5795 TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, 5796 "Dequeue count Error (%10"PRIu64"", 5797 stats.dequeue_err_count); 5798 5799 return TEST_SUCCESS; 5800 } 5801 5802 static int 5803 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 5804 const uint16_t num_to_process, uint16_t burst_sz, 5805 uint64_t *deq_total_time, uint64_t *deq_min_time, 5806 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 5807 { 5808 int i, deq_total; 5809 struct rte_bbdev_dec_op *ops[MAX_BURST]; 5810 uint64_t deq_start_time, deq_last_time; 5811 5812 /* Test deq offload latency from an empty queue */ 5813 5814 for (i = 0, deq_total = 0; deq_total < num_to_process; 5815 ++i, deq_total += burst_sz) { 5816 deq_start_time = rte_rdtsc_precise(); 5817 5818 if (unlikely(num_to_process - deq_total < burst_sz)) 5819 burst_sz = num_to_process - deq_total; 5820 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 5821 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 5822 burst_sz); 5823 else 5824 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 5825 burst_sz); 5826 5827 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5828 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 5829 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 5830 *deq_total_time += deq_last_time; 5831 } 5832 5833 return i; 5834 } 5835 5836 static int 5837 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 5838 const uint16_t num_to_process, uint16_t burst_sz, 5839 uint64_t *deq_total_time, uint64_t *deq_min_time, 5840 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 5841 { 5842 int i, deq_total; 5843 struct rte_bbdev_enc_op *ops[MAX_BURST]; 5844 uint64_t deq_start_time, deq_last_time; 5845 5846 /* Test deq offload latency from an empty queue */ 5847 for (i = 0, deq_total = 0; deq_total < num_to_process; 5848 ++i, deq_total += burst_sz) { 5849 deq_start_time = rte_rdtsc_precise(); 5850 5851 if (unlikely(num_to_process - deq_total < burst_sz)) 5852 burst_sz = num_to_process - deq_total; 5853 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 5854 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 5855 burst_sz); 5856 else 5857 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 5858 burst_sz); 5859 5860 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5861 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 5862 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 5863 *deq_total_time += deq_last_time; 5864 } 5865 5866 return i; 5867 } 5868 5869 static int 5870 offload_latency_empty_q_test(struct active_device *ad, 5871 struct test_op_params *op_params) 5872 { 5873 int iter; 5874 uint64_t deq_total_time, deq_min_time, deq_max_time; 5875 uint16_t burst_sz = op_params->burst_sz; 5876 const uint16_t num_to_process = op_params->num_to_process; 5877 const enum rte_bbdev_op_type op_type = test_vector.op_type; 5878 const uint16_t queue_id = ad->queue_ids[0]; 5879 struct rte_bbdev_info info; 5880 const char *op_type_str; 5881 5882 deq_total_time = deq_max_time = 0; 5883 deq_min_time = UINT64_MAX; 5884 5885 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 5886 "BURST_SIZE should be <= %u", MAX_BURST); 5887 5888 rte_bbdev_info_get(ad->dev_id, &info); 5889 5890 op_type_str = rte_bbdev_op_type_str(op_type); 5891 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 5892 5893 printf("+ ------------------------------------------------------- +\n"); 5894 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 5895 info.dev_name, burst_sz, num_to_process, op_type_str); 5896 5897 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 5898 op_type == RTE_BBDEV_OP_LDPC_DEC) 5899 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 5900 num_to_process, burst_sz, &deq_total_time, 5901 &deq_min_time, &deq_max_time, op_type); 5902 else 5903 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 5904 num_to_process, burst_sz, &deq_total_time, 5905 &deq_min_time, &deq_max_time, op_type); 5906 5907 if (iter <= 0) 5908 return TEST_FAILED; 5909 5910 printf("Empty dequeue offload:\n" 5911 "\tavg: %lg cycles, %lg us\n" 5912 "\tmin: %lg cycles, %lg us\n" 5913 "\tmax: %lg cycles, %lg us\n", 5914 (double)deq_total_time / (double)iter, 5915 (double)(deq_total_time * 1000000) / (double)iter / 5916 (double)rte_get_tsc_hz(), (double)deq_min_time, 5917 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 5918 (double)deq_max_time, (double)(deq_max_time * 1000000) / 5919 rte_get_tsc_hz()); 5920 5921 return TEST_SUCCESS; 5922 } 5923 5924 static int 5925 bler_tc(void) 5926 { 5927 return run_test_case(bler_test); 5928 } 5929 5930 static int 5931 throughput_tc(void) 5932 { 5933 return run_test_case(throughput_test); 5934 } 5935 5936 static int 5937 offload_cost_tc(void) 5938 { 5939 return run_test_case(offload_cost_test); 5940 } 5941 5942 static int 5943 offload_latency_empty_q_tc(void) 5944 { 5945 return run_test_case(offload_latency_empty_q_test); 5946 } 5947 5948 static int 5949 latency_tc(void) 5950 { 5951 return run_test_case(latency_test); 5952 } 5953 5954 static int 5955 validation_tc(void) 5956 { 5957 return run_test_case(validation_test); 5958 } 5959 5960 static int 5961 interrupt_tc(void) 5962 { 5963 return run_test_case(throughput_test); 5964 } 5965 5966 static struct unit_test_suite bbdev_bler_testsuite = { 5967 .suite_name = "BBdev BLER Tests", 5968 .setup = testsuite_setup, 5969 .teardown = testsuite_teardown, 5970 .unit_test_cases = { 5971 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 5972 TEST_CASES_END() /**< NULL terminate unit test array */ 5973 } 5974 }; 5975 5976 static struct unit_test_suite bbdev_throughput_testsuite = { 5977 .suite_name = "BBdev Throughput Tests", 5978 .setup = testsuite_setup, 5979 .teardown = testsuite_teardown, 5980 .unit_test_cases = { 5981 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 5982 TEST_CASES_END() /**< NULL terminate unit test array */ 5983 } 5984 }; 5985 5986 static struct unit_test_suite bbdev_validation_testsuite = { 5987 .suite_name = "BBdev Validation Tests", 5988 .setup = testsuite_setup, 5989 .teardown = testsuite_teardown, 5990 .unit_test_cases = { 5991 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), 5992 TEST_CASES_END() /**< NULL terminate unit test array */ 5993 } 5994 }; 5995 5996 static struct unit_test_suite bbdev_latency_testsuite = { 5997 .suite_name = "BBdev Latency Tests", 5998 .setup = testsuite_setup, 5999 .teardown = testsuite_teardown, 6000 .unit_test_cases = { 6001 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 6002 TEST_CASES_END() /**< NULL terminate unit test array */ 6003 } 6004 }; 6005 6006 static struct unit_test_suite bbdev_offload_cost_testsuite = { 6007 .suite_name = "BBdev Offload Cost Tests", 6008 .setup = testsuite_setup, 6009 .teardown = testsuite_teardown, 6010 .unit_test_cases = { 6011 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 6012 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 6013 TEST_CASES_END() /**< NULL terminate unit test array */ 6014 } 6015 }; 6016 6017 static struct unit_test_suite bbdev_interrupt_testsuite = { 6018 .suite_name = "BBdev Interrupt Tests", 6019 .setup = interrupt_testsuite_setup, 6020 .teardown = testsuite_teardown, 6021 .unit_test_cases = { 6022 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 6023 TEST_CASES_END() /**< NULL terminate unit test array */ 6024 } 6025 }; 6026 6027 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 6028 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 6029 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 6030 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 6031 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 6032 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 6033