1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <inttypes.h> 8 #include <math.h> 9 10 #include <rte_eal.h> 11 #include <rte_common.h> 12 #include <rte_dev.h> 13 #include <rte_launch.h> 14 #include <rte_bbdev.h> 15 #include <rte_cycles.h> 16 #include <rte_lcore.h> 17 #include <rte_malloc.h> 18 #include <rte_random.h> 19 #include <rte_hexdump.h> 20 #include <rte_interrupts.h> 21 22 #include "main.h" 23 #include "test_bbdev_vector.h" 24 25 #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) 26 27 #define MAX_QUEUES RTE_MAX_LCORE 28 #define TEST_REPETITIONS 100 29 #define WAIT_OFFLOAD_US 1000 30 31 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 32 #include <fpga_lte_fec.h> 33 #define FPGA_LTE_PF_DRIVER_NAME ("intel_fpga_lte_fec_pf") 34 #define FPGA_LTE_VF_DRIVER_NAME ("intel_fpga_lte_fec_vf") 35 #define VF_UL_4G_QUEUE_VALUE 4 36 #define VF_DL_4G_QUEUE_VALUE 4 37 #define UL_4G_BANDWIDTH 3 38 #define DL_4G_BANDWIDTH 3 39 #define UL_4G_LOAD_BALANCE 128 40 #define DL_4G_LOAD_BALANCE 128 41 #define FLR_4G_TIMEOUT 610 42 #endif 43 44 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 45 #include <rte_pmd_fpga_5gnr_fec.h> 46 #define FPGA_5GNR_PF_DRIVER_NAME ("intel_fpga_5gnr_fec_pf") 47 #define FPGA_5GNR_VF_DRIVER_NAME ("intel_fpga_5gnr_fec_vf") 48 #define VF_UL_5G_QUEUE_VALUE 4 49 #define VF_DL_5G_QUEUE_VALUE 4 50 #define UL_5G_BANDWIDTH 3 51 #define DL_5G_BANDWIDTH 3 52 #define UL_5G_LOAD_BALANCE 128 53 #define DL_5G_LOAD_BALANCE 128 54 #endif 55 56 #ifdef RTE_BASEBAND_ACC 57 #include <rte_acc_cfg.h> 58 #define ACC100PF_DRIVER_NAME ("intel_acc100_pf") 59 #define ACC100VF_DRIVER_NAME ("intel_acc100_vf") 60 #define ACC100_QMGR_NUM_AQS 16 61 #define ACC100_QMGR_NUM_QGS 2 62 #define ACC100_QMGR_AQ_DEPTH 5 63 #define ACC100_QMGR_INVALID_IDX -1 64 #define ACC100_QMGR_RR 1 65 #define ACC100_QOS_GBR 0 66 #define ACC200PF_DRIVER_NAME ("intel_acc200_pf") 67 #define ACC200VF_DRIVER_NAME ("intel_acc200_vf") 68 #define ACC200_QMGR_NUM_AQS 16 69 #define ACC200_QMGR_NUM_QGS 2 70 #define ACC200_QMGR_AQ_DEPTH 5 71 #define ACC200_QMGR_INVALID_IDX -1 72 #define ACC200_QMGR_RR 1 73 #define ACC200_QOS_GBR 0 74 #endif 75 76 #define OPS_CACHE_SIZE 256U 77 #define OPS_POOL_SIZE_MIN 511U /* 0.5K per queue */ 78 79 #define SYNC_WAIT 0 80 #define SYNC_START 1 81 #define INVALID_OPAQUE -1 82 83 #define INVALID_QUEUE_ID -1 84 /* Increment for next code block in external HARQ memory */ 85 #define HARQ_INCR 32768 86 /* Headroom for filler LLRs insertion in HARQ buffer */ 87 #define FILLER_HEADROOM 1024 88 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ 89 #define N_ZC_1 66 /* N = 66 Zc for BG 1 */ 90 #define N_ZC_2 50 /* N = 50 Zc for BG 2 */ 91 #define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ 92 #define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ 93 #define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ 94 #define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ 95 #define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ 96 #define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ 97 98 static struct test_bbdev_vector test_vector; 99 100 /* Switch between PMD and Interrupt for throughput TC */ 101 static bool intr_enabled; 102 103 /* LLR arithmetic representation for numerical conversion */ 104 static int ldpc_llr_decimals; 105 static int ldpc_llr_size; 106 /* Keep track of the LDPC decoder device capability flag */ 107 static uint32_t ldpc_cap_flags; 108 109 /* Represents tested active devices */ 110 static struct active_device { 111 const char *driver_name; 112 uint8_t dev_id; 113 uint16_t supported_ops; 114 uint16_t queue_ids[MAX_QUEUES]; 115 uint16_t nb_queues; 116 struct rte_mempool *ops_mempool; 117 struct rte_mempool *in_mbuf_pool; 118 struct rte_mempool *hard_out_mbuf_pool; 119 struct rte_mempool *soft_out_mbuf_pool; 120 struct rte_mempool *harq_in_mbuf_pool; 121 struct rte_mempool *harq_out_mbuf_pool; 122 } active_devs[RTE_BBDEV_MAX_DEVS]; 123 124 static uint8_t nb_active_devs; 125 126 /* Data buffers used by BBDEV ops */ 127 struct test_buffers { 128 struct rte_bbdev_op_data *inputs; 129 struct rte_bbdev_op_data *hard_outputs; 130 struct rte_bbdev_op_data *soft_outputs; 131 struct rte_bbdev_op_data *harq_inputs; 132 struct rte_bbdev_op_data *harq_outputs; 133 }; 134 135 /* Operation parameters specific for given test case */ 136 struct test_op_params { 137 struct rte_mempool *mp; 138 struct rte_bbdev_dec_op *ref_dec_op; 139 struct rte_bbdev_enc_op *ref_enc_op; 140 struct rte_bbdev_fft_op *ref_fft_op; 141 uint16_t burst_sz; 142 uint16_t num_to_process; 143 uint16_t num_lcores; 144 int vector_mask; 145 uint16_t sync; 146 struct test_buffers q_bufs[RTE_MAX_NUMA_NODES][MAX_QUEUES]; 147 }; 148 149 /* Contains per lcore params */ 150 struct thread_params { 151 uint8_t dev_id; 152 uint16_t queue_id; 153 uint32_t lcore_id; 154 uint64_t start_time; 155 double ops_per_sec; 156 double mbps; 157 uint8_t iter_count; 158 double iter_average; 159 double bler; 160 uint16_t nb_dequeued; 161 int16_t processing_status; 162 uint16_t burst_sz; 163 struct test_op_params *op_params; 164 struct rte_bbdev_dec_op *dec_ops[MAX_BURST]; 165 struct rte_bbdev_enc_op *enc_ops[MAX_BURST]; 166 struct rte_bbdev_fft_op *fft_ops[MAX_BURST]; 167 }; 168 169 /* Stores time statistics */ 170 struct test_time_stats { 171 /* Stores software enqueue total working time */ 172 uint64_t enq_sw_total_time; 173 /* Stores minimum value of software enqueue working time */ 174 uint64_t enq_sw_min_time; 175 /* Stores maximum value of software enqueue working time */ 176 uint64_t enq_sw_max_time; 177 /* Stores turbo enqueue total working time */ 178 uint64_t enq_acc_total_time; 179 /* Stores minimum value of accelerator enqueue working time */ 180 uint64_t enq_acc_min_time; 181 /* Stores maximum value of accelerator enqueue working time */ 182 uint64_t enq_acc_max_time; 183 /* Stores dequeue total working time */ 184 uint64_t deq_total_time; 185 /* Stores minimum value of dequeue working time */ 186 uint64_t deq_min_time; 187 /* Stores maximum value of dequeue working time */ 188 uint64_t deq_max_time; 189 }; 190 191 typedef int (test_case_function)(struct active_device *ad, 192 struct test_op_params *op_params); 193 194 static inline void 195 mbuf_reset(struct rte_mbuf *m) 196 { 197 m->pkt_len = 0; 198 199 do { 200 m->data_len = 0; 201 m = m->next; 202 } while (m != NULL); 203 } 204 205 /* Read flag value 0/1 from bitmap */ 206 static inline bool 207 check_bit(uint32_t bitmap, uint32_t bitmask) 208 { 209 return bitmap & bitmask; 210 } 211 212 static inline void 213 set_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 214 { 215 ad->supported_ops |= (1 << op_type); 216 } 217 218 static inline bool 219 is_avail_op(struct active_device *ad, enum rte_bbdev_op_type op_type) 220 { 221 return ad->supported_ops & (1 << op_type); 222 } 223 224 static inline bool 225 flags_match(uint32_t flags_req, uint32_t flags_present) 226 { 227 return (flags_req & flags_present) == flags_req; 228 } 229 230 static void 231 clear_soft_out_cap(uint32_t *op_flags) 232 { 233 *op_flags &= ~RTE_BBDEV_TURBO_SOFT_OUTPUT; 234 *op_flags &= ~RTE_BBDEV_TURBO_POS_LLR_1_BIT_SOFT_OUT; 235 *op_flags &= ~RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT; 236 } 237 238 /* This API is to convert all the test vector op data entries 239 * to big endian format. It is used when the device supports 240 * the input in the big endian format. 241 */ 242 static inline void 243 convert_op_data_to_be(void) 244 { 245 struct op_data_entries *op; 246 enum op_data_type type; 247 uint8_t nb_segs, *rem_data, temp; 248 uint32_t *data, len; 249 int complete, rem, i, j; 250 251 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 252 nb_segs = test_vector.entries[type].nb_segments; 253 op = &test_vector.entries[type]; 254 255 /* Invert byte endianness for all the segments */ 256 for (i = 0; i < nb_segs; ++i) { 257 len = op->segments[i].length; 258 data = op->segments[i].addr; 259 260 /* Swap complete u32 bytes */ 261 complete = len / 4; 262 for (j = 0; j < complete; j++) 263 data[j] = rte_bswap32(data[j]); 264 265 /* Swap any remaining bytes */ 266 rem = len % 4; 267 rem_data = (uint8_t *)&data[j]; 268 for (j = 0; j < rem/2; j++) { 269 temp = rem_data[j]; 270 rem_data[j] = rem_data[rem - j - 1]; 271 rem_data[rem - j - 1] = temp; 272 } 273 } 274 } 275 } 276 277 static int 278 check_dev_cap(const struct rte_bbdev_info *dev_info) 279 { 280 unsigned int i; 281 unsigned int nb_inputs, nb_soft_outputs, nb_hard_outputs, 282 nb_harq_inputs, nb_harq_outputs; 283 const struct rte_bbdev_op_cap *op_cap = dev_info->drv.capabilities; 284 uint8_t dev_data_endianness = dev_info->drv.data_endianness; 285 286 nb_inputs = test_vector.entries[DATA_INPUT].nb_segments; 287 nb_soft_outputs = test_vector.entries[DATA_SOFT_OUTPUT].nb_segments; 288 nb_hard_outputs = test_vector.entries[DATA_HARD_OUTPUT].nb_segments; 289 nb_harq_inputs = test_vector.entries[DATA_HARQ_INPUT].nb_segments; 290 nb_harq_outputs = test_vector.entries[DATA_HARQ_OUTPUT].nb_segments; 291 292 for (i = 0; op_cap->type != RTE_BBDEV_OP_NONE; ++i, ++op_cap) { 293 if (op_cap->type != test_vector.op_type) 294 continue; 295 296 if (dev_data_endianness == RTE_BIG_ENDIAN) 297 convert_op_data_to_be(); 298 299 if (op_cap->type == RTE_BBDEV_OP_TURBO_DEC) { 300 const struct rte_bbdev_op_cap_turbo_dec *cap = 301 &op_cap->cap.turbo_dec; 302 /* Ignore lack of soft output capability, just skip 303 * checking if soft output is valid. 304 */ 305 if ((test_vector.turbo_dec.op_flags & 306 RTE_BBDEV_TURBO_SOFT_OUTPUT) && 307 !(cap->capability_flags & 308 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 309 printf( 310 "INFO: Device \"%s\" does not support soft output - soft output flags will be ignored.\n", 311 dev_info->dev_name); 312 clear_soft_out_cap( 313 &test_vector.turbo_dec.op_flags); 314 } 315 316 if (!flags_match(test_vector.turbo_dec.op_flags, 317 cap->capability_flags)) 318 return TEST_FAILED; 319 if (nb_inputs > cap->num_buffers_src) { 320 printf("Too many inputs defined: %u, max: %u\n", 321 nb_inputs, cap->num_buffers_src); 322 return TEST_FAILED; 323 } 324 if (nb_soft_outputs > cap->num_buffers_soft_out && 325 (test_vector.turbo_dec.op_flags & 326 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 327 printf( 328 "Too many soft outputs defined: %u, max: %u\n", 329 nb_soft_outputs, 330 cap->num_buffers_soft_out); 331 return TEST_FAILED; 332 } 333 if (nb_hard_outputs > cap->num_buffers_hard_out) { 334 printf( 335 "Too many hard outputs defined: %u, max: %u\n", 336 nb_hard_outputs, 337 cap->num_buffers_hard_out); 338 return TEST_FAILED; 339 } 340 if (intr_enabled && !(cap->capability_flags & 341 RTE_BBDEV_TURBO_DEC_INTERRUPTS)) { 342 printf( 343 "Dequeue interrupts are not supported!\n"); 344 return TEST_FAILED; 345 } 346 347 return TEST_SUCCESS; 348 } else if (op_cap->type == RTE_BBDEV_OP_TURBO_ENC) { 349 const struct rte_bbdev_op_cap_turbo_enc *cap = 350 &op_cap->cap.turbo_enc; 351 352 if (!flags_match(test_vector.turbo_enc.op_flags, 353 cap->capability_flags)) 354 return TEST_FAILED; 355 if (nb_inputs > cap->num_buffers_src) { 356 printf("Too many inputs defined: %u, max: %u\n", 357 nb_inputs, cap->num_buffers_src); 358 return TEST_FAILED; 359 } 360 if (nb_hard_outputs > cap->num_buffers_dst) { 361 printf( 362 "Too many hard outputs defined: %u, max: %u\n", 363 nb_hard_outputs, cap->num_buffers_dst); 364 return TEST_FAILED; 365 } 366 if (intr_enabled && !(cap->capability_flags & 367 RTE_BBDEV_TURBO_ENC_INTERRUPTS)) { 368 printf( 369 "Dequeue interrupts are not supported!\n"); 370 return TEST_FAILED; 371 } 372 373 return TEST_SUCCESS; 374 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_ENC) { 375 const struct rte_bbdev_op_cap_ldpc_enc *cap = 376 &op_cap->cap.ldpc_enc; 377 378 if (!flags_match(test_vector.ldpc_enc.op_flags, 379 cap->capability_flags)){ 380 printf("Flag Mismatch\n"); 381 return TEST_FAILED; 382 } 383 if (nb_inputs > cap->num_buffers_src) { 384 printf("Too many inputs defined: %u, max: %u\n", 385 nb_inputs, cap->num_buffers_src); 386 return TEST_FAILED; 387 } 388 if (nb_hard_outputs > cap->num_buffers_dst) { 389 printf( 390 "Too many hard outputs defined: %u, max: %u\n", 391 nb_hard_outputs, cap->num_buffers_dst); 392 return TEST_FAILED; 393 } 394 if (intr_enabled && !(cap->capability_flags & 395 RTE_BBDEV_LDPC_ENC_INTERRUPTS)) { 396 printf( 397 "Dequeue interrupts are not supported!\n"); 398 return TEST_FAILED; 399 } 400 401 return TEST_SUCCESS; 402 } else if (op_cap->type == RTE_BBDEV_OP_LDPC_DEC) { 403 const struct rte_bbdev_op_cap_ldpc_dec *cap = 404 &op_cap->cap.ldpc_dec; 405 406 if (!flags_match(test_vector.ldpc_dec.op_flags, 407 cap->capability_flags)){ 408 printf("Flag Mismatch\n"); 409 return TEST_FAILED; 410 } 411 if (nb_inputs > cap->num_buffers_src) { 412 printf("Too many inputs defined: %u, max: %u\n", 413 nb_inputs, cap->num_buffers_src); 414 return TEST_FAILED; 415 } 416 if (nb_hard_outputs > cap->num_buffers_hard_out) { 417 printf( 418 "Too many hard outputs defined: %u, max: %u\n", 419 nb_hard_outputs, 420 cap->num_buffers_hard_out); 421 return TEST_FAILED; 422 } 423 if (nb_harq_inputs > cap->num_buffers_hard_out) { 424 printf( 425 "Too many HARQ inputs defined: %u, max: %u\n", 426 nb_harq_inputs, 427 cap->num_buffers_hard_out); 428 return TEST_FAILED; 429 } 430 if (nb_harq_outputs > cap->num_buffers_hard_out) { 431 printf( 432 "Too many HARQ outputs defined: %u, max: %u\n", 433 nb_harq_outputs, 434 cap->num_buffers_hard_out); 435 return TEST_FAILED; 436 } 437 if (intr_enabled && !(cap->capability_flags & 438 RTE_BBDEV_LDPC_DEC_INTERRUPTS)) { 439 printf( 440 "Dequeue interrupts are not supported!\n"); 441 return TEST_FAILED; 442 } 443 if (intr_enabled && (test_vector.ldpc_dec.op_flags & 444 (RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 445 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 446 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 447 ))) { 448 printf("Skip loop-back with interrupt\n"); 449 return TEST_FAILED; 450 } 451 return TEST_SUCCESS; 452 } else if (op_cap->type == RTE_BBDEV_OP_FFT) { 453 const struct rte_bbdev_op_cap_fft *cap = &op_cap->cap.fft; 454 455 if (!flags_match(test_vector.fft.op_flags, cap->capability_flags)) { 456 printf("Flag Mismatch\n"); 457 return TEST_FAILED; 458 } 459 if (nb_inputs > cap->num_buffers_src) { 460 printf("Too many inputs defined: %u, max: %u\n", 461 nb_inputs, cap->num_buffers_src); 462 return TEST_FAILED; 463 } 464 return TEST_SUCCESS; 465 } 466 } 467 468 if ((i == 0) && (test_vector.op_type == RTE_BBDEV_OP_NONE)) 469 return TEST_SUCCESS; /* Special case for NULL device */ 470 471 return TEST_FAILED; 472 } 473 474 /* calculates optimal mempool size not smaller than the val */ 475 static unsigned int 476 optimal_mempool_size(unsigned int val) 477 { 478 return rte_align32pow2(val + 1) - 1; 479 } 480 481 /* allocates mbuf mempool for inputs and outputs */ 482 static struct rte_mempool * 483 create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id, 484 int socket_id, unsigned int mbuf_pool_size, 485 const char *op_type_str) 486 { 487 unsigned int i; 488 uint32_t max_seg_sz = 0; 489 char pool_name[RTE_MEMPOOL_NAMESIZE]; 490 491 /* find max input segment size */ 492 for (i = 0; i < entries->nb_segments; ++i) 493 if (entries->segments[i].length > max_seg_sz) 494 max_seg_sz = entries->segments[i].length; 495 496 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 497 dev_id); 498 return rte_pktmbuf_pool_create(pool_name, mbuf_pool_size, 0, 0, 499 RTE_MAX(max_seg_sz + RTE_PKTMBUF_HEADROOM 500 + FILLER_HEADROOM, 501 (unsigned int)RTE_MBUF_DEFAULT_BUF_SIZE), socket_id); 502 } 503 504 static int 505 create_mempools(struct active_device *ad, int socket_id, 506 enum rte_bbdev_op_type org_op_type, uint16_t num_ops) 507 { 508 struct rte_mempool *mp; 509 unsigned int ops_pool_size, mbuf_pool_size = 0; 510 char pool_name[RTE_MEMPOOL_NAMESIZE]; 511 const char *op_type_str; 512 enum rte_bbdev_op_type op_type = org_op_type; 513 514 struct op_data_entries *in = &test_vector.entries[DATA_INPUT]; 515 struct op_data_entries *hard_out = 516 &test_vector.entries[DATA_HARD_OUTPUT]; 517 struct op_data_entries *soft_out = 518 &test_vector.entries[DATA_SOFT_OUTPUT]; 519 struct op_data_entries *harq_in = 520 &test_vector.entries[DATA_HARQ_INPUT]; 521 struct op_data_entries *harq_out = 522 &test_vector.entries[DATA_HARQ_OUTPUT]; 523 524 /* allocate ops mempool */ 525 ops_pool_size = optimal_mempool_size(RTE_MAX( 526 /* Ops used plus 1 reference op */ 527 RTE_MAX((unsigned int)(ad->nb_queues * num_ops + 1), 528 /* Minimal cache size plus 1 reference op */ 529 (unsigned int)(1.5 * rte_lcore_count() * 530 OPS_CACHE_SIZE + 1)), 531 OPS_POOL_SIZE_MIN)); 532 533 if (org_op_type == RTE_BBDEV_OP_NONE) 534 op_type = RTE_BBDEV_OP_TURBO_ENC; 535 536 op_type_str = rte_bbdev_op_type_str(op_type); 537 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 538 539 snprintf(pool_name, sizeof(pool_name), "%s_pool_%u", op_type_str, 540 ad->dev_id); 541 mp = rte_bbdev_op_pool_create(pool_name, op_type, 542 ops_pool_size, OPS_CACHE_SIZE, socket_id); 543 TEST_ASSERT_NOT_NULL(mp, 544 "ERROR Failed to create %u items ops pool for dev %u on socket %u.", 545 ops_pool_size, 546 ad->dev_id, 547 socket_id); 548 ad->ops_mempool = mp; 549 550 /* Do not create inputs and outputs mbufs for BaseBand Null Device */ 551 if (org_op_type == RTE_BBDEV_OP_NONE) 552 return TEST_SUCCESS; 553 554 /* Inputs */ 555 if (in->nb_segments > 0) { 556 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 557 in->nb_segments); 558 mp = create_mbuf_pool(in, ad->dev_id, socket_id, 559 mbuf_pool_size, "in"); 560 TEST_ASSERT_NOT_NULL(mp, 561 "ERROR Failed to create %u items input pktmbuf pool for dev %u on socket %u.", 562 mbuf_pool_size, 563 ad->dev_id, 564 socket_id); 565 ad->in_mbuf_pool = mp; 566 } 567 568 /* Hard outputs */ 569 if (hard_out->nb_segments > 0) { 570 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 571 hard_out->nb_segments); 572 mp = create_mbuf_pool(hard_out, ad->dev_id, socket_id, 573 mbuf_pool_size, 574 "hard_out"); 575 TEST_ASSERT_NOT_NULL(mp, 576 "ERROR Failed to create %u items hard output pktmbuf pool for dev %u on socket %u.", 577 mbuf_pool_size, 578 ad->dev_id, 579 socket_id); 580 ad->hard_out_mbuf_pool = mp; 581 } 582 583 /* Soft outputs */ 584 if (soft_out->nb_segments > 0) { 585 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 586 soft_out->nb_segments); 587 mp = create_mbuf_pool(soft_out, ad->dev_id, socket_id, 588 mbuf_pool_size, 589 "soft_out"); 590 TEST_ASSERT_NOT_NULL(mp, 591 "ERROR Failed to create %uB soft output pktmbuf pool for dev %u on socket %u.", 592 mbuf_pool_size, 593 ad->dev_id, 594 socket_id); 595 ad->soft_out_mbuf_pool = mp; 596 } 597 598 /* HARQ inputs */ 599 if (harq_in->nb_segments > 0) { 600 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 601 harq_in->nb_segments); 602 mp = create_mbuf_pool(harq_in, ad->dev_id, socket_id, 603 mbuf_pool_size, 604 "harq_in"); 605 TEST_ASSERT_NOT_NULL(mp, 606 "ERROR Failed to create %uB harq input pktmbuf pool for dev %u on socket %u.", 607 mbuf_pool_size, 608 ad->dev_id, 609 socket_id); 610 ad->harq_in_mbuf_pool = mp; 611 } 612 613 /* HARQ outputs */ 614 if (harq_out->nb_segments > 0) { 615 mbuf_pool_size = optimal_mempool_size(ops_pool_size * 616 harq_out->nb_segments); 617 mp = create_mbuf_pool(harq_out, ad->dev_id, socket_id, 618 mbuf_pool_size, 619 "harq_out"); 620 TEST_ASSERT_NOT_NULL(mp, 621 "ERROR Failed to create %uB harq output pktmbuf pool for dev %u on socket %u.", 622 mbuf_pool_size, 623 ad->dev_id, 624 socket_id); 625 ad->harq_out_mbuf_pool = mp; 626 } 627 628 return TEST_SUCCESS; 629 } 630 631 static int 632 add_bbdev_dev(uint8_t dev_id, struct rte_bbdev_info *info, 633 struct test_bbdev_vector *vector) 634 { 635 int ret; 636 unsigned int queue_id; 637 struct rte_bbdev_queue_conf qconf; 638 struct active_device *ad = &active_devs[nb_active_devs]; 639 unsigned int nb_queues; 640 enum rte_bbdev_op_type op_type = vector->op_type; 641 642 /* Configure fpga lte fec with PF & VF values 643 * if '-i' flag is set and using fpga device 644 */ 645 #ifdef RTE_BASEBAND_FPGA_LTE_FEC 646 if ((get_init_device() == true) && 647 (!strcmp(info->drv.driver_name, FPGA_LTE_PF_DRIVER_NAME))) { 648 struct rte_fpga_lte_fec_conf conf; 649 unsigned int i; 650 651 printf("Configure FPGA LTE FEC Driver %s with default values\n", 652 info->drv.driver_name); 653 654 /* clear default configuration before initialization */ 655 memset(&conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 656 657 /* Set PF mode : 658 * true if PF is used for data plane 659 * false for VFs 660 */ 661 conf.pf_mode_en = true; 662 663 for (i = 0; i < FPGA_LTE_FEC_NUM_VFS; ++i) { 664 /* Number of UL queues per VF (fpga supports 8 VFs) */ 665 conf.vf_ul_queues_number[i] = VF_UL_4G_QUEUE_VALUE; 666 /* Number of DL queues per VF (fpga supports 8 VFs) */ 667 conf.vf_dl_queues_number[i] = VF_DL_4G_QUEUE_VALUE; 668 } 669 670 /* UL bandwidth. Needed for schedule algorithm */ 671 conf.ul_bandwidth = UL_4G_BANDWIDTH; 672 /* DL bandwidth */ 673 conf.dl_bandwidth = DL_4G_BANDWIDTH; 674 675 /* UL & DL load Balance Factor to 64 */ 676 conf.ul_load_balance = UL_4G_LOAD_BALANCE; 677 conf.dl_load_balance = DL_4G_LOAD_BALANCE; 678 679 /**< FLR timeout value */ 680 conf.flr_time_out = FLR_4G_TIMEOUT; 681 682 /* setup FPGA PF with configuration information */ 683 ret = rte_fpga_lte_fec_configure(info->dev_name, &conf); 684 TEST_ASSERT_SUCCESS(ret, 685 "Failed to configure 4G FPGA PF for bbdev %s", 686 info->dev_name); 687 } 688 #endif 689 #ifdef RTE_BASEBAND_FPGA_5GNR_FEC 690 if ((get_init_device() == true) && 691 (!strcmp(info->drv.driver_name, FPGA_5GNR_PF_DRIVER_NAME))) { 692 struct rte_fpga_5gnr_fec_conf conf; 693 unsigned int i; 694 695 printf("Configure FPGA 5GNR FEC Driver %s with default values\n", 696 info->drv.driver_name); 697 698 /* clear default configuration before initialization */ 699 memset(&conf, 0, sizeof(struct rte_fpga_5gnr_fec_conf)); 700 701 /* Set PF mode : 702 * true if PF is used for data plane 703 * false for VFs 704 */ 705 conf.pf_mode_en = true; 706 707 for (i = 0; i < FPGA_5GNR_FEC_NUM_VFS; ++i) { 708 /* Number of UL queues per VF (fpga supports 8 VFs) */ 709 conf.vf_ul_queues_number[i] = VF_UL_5G_QUEUE_VALUE; 710 /* Number of DL queues per VF (fpga supports 8 VFs) */ 711 conf.vf_dl_queues_number[i] = VF_DL_5G_QUEUE_VALUE; 712 } 713 714 /* UL bandwidth. Needed for schedule algorithm */ 715 conf.ul_bandwidth = UL_5G_BANDWIDTH; 716 /* DL bandwidth */ 717 conf.dl_bandwidth = DL_5G_BANDWIDTH; 718 719 /* UL & DL load Balance Factor to 64 */ 720 conf.ul_load_balance = UL_5G_LOAD_BALANCE; 721 conf.dl_load_balance = DL_5G_LOAD_BALANCE; 722 723 /* setup FPGA PF with configuration information */ 724 ret = rte_fpga_5gnr_fec_configure(info->dev_name, &conf); 725 TEST_ASSERT_SUCCESS(ret, 726 "Failed to configure 5G FPGA PF for bbdev %s", 727 info->dev_name); 728 } 729 #endif 730 #ifdef RTE_BASEBAND_ACC 731 if ((get_init_device() == true) && 732 (!strcmp(info->drv.driver_name, ACC100PF_DRIVER_NAME))) { 733 struct rte_acc_conf conf; 734 unsigned int i; 735 736 printf("Configure ACC100/ACC101 FEC Driver %s with default values\n", 737 info->drv.driver_name); 738 739 /* clear default configuration before initialization */ 740 memset(&conf, 0, sizeof(struct rte_acc_conf)); 741 742 /* Always set in PF mode for built-in configuration */ 743 conf.pf_mode_en = true; 744 for (i = 0; i < RTE_ACC_NUM_VFS; ++i) { 745 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 746 conf.arb_dl_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 747 conf.arb_dl_4g[i].round_robin_weight = ACC100_QMGR_RR; 748 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 749 conf.arb_ul_4g[i].gbr_threshold1 = ACC100_QOS_GBR; 750 conf.arb_ul_4g[i].round_robin_weight = ACC100_QMGR_RR; 751 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 752 conf.arb_dl_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 753 conf.arb_dl_5g[i].round_robin_weight = ACC100_QMGR_RR; 754 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 755 conf.arb_ul_5g[i].gbr_threshold1 = ACC100_QOS_GBR; 756 conf.arb_ul_5g[i].round_robin_weight = ACC100_QMGR_RR; 757 } 758 759 conf.input_pos_llr_1_bit = true; 760 conf.output_pos_llr_1_bit = true; 761 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 762 763 conf.q_ul_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 764 conf.q_ul_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 765 conf.q_ul_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 766 conf.q_ul_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 767 conf.q_dl_4g.num_qgroups = ACC100_QMGR_NUM_QGS; 768 conf.q_dl_4g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 769 conf.q_dl_4g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 770 conf.q_dl_4g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 771 conf.q_ul_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 772 conf.q_ul_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 773 conf.q_ul_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 774 conf.q_ul_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 775 conf.q_dl_5g.num_qgroups = ACC100_QMGR_NUM_QGS; 776 conf.q_dl_5g.first_qgroup_index = ACC100_QMGR_INVALID_IDX; 777 conf.q_dl_5g.num_aqs_per_groups = ACC100_QMGR_NUM_AQS; 778 conf.q_dl_5g.aq_depth_log2 = ACC100_QMGR_AQ_DEPTH; 779 780 /* setup PF with configuration information */ 781 ret = rte_acc_configure(info->dev_name, &conf); 782 TEST_ASSERT_SUCCESS(ret, 783 "Failed to configure ACC100 PF for bbdev %s", 784 info->dev_name); 785 } 786 if ((get_init_device() == true) && 787 (!strcmp(info->drv.driver_name, ACC200PF_DRIVER_NAME))) { 788 struct rte_acc_conf conf; 789 unsigned int i; 790 791 printf("Configure ACC200 FEC Driver %s with default values\n", 792 info->drv.driver_name); 793 794 /* clear default configuration before initialization */ 795 memset(&conf, 0, sizeof(struct rte_acc_conf)); 796 797 /* Always set in PF mode for built-in configuration */ 798 conf.pf_mode_en = true; 799 for (i = 0; i < RTE_ACC_NUM_VFS; ++i) { 800 conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 801 conf.arb_dl_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 802 conf.arb_dl_4g[i].round_robin_weight = ACC200_QMGR_RR; 803 conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 804 conf.arb_ul_4g[i].gbr_threshold1 = ACC200_QOS_GBR; 805 conf.arb_ul_4g[i].round_robin_weight = ACC200_QMGR_RR; 806 conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 807 conf.arb_dl_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 808 conf.arb_dl_5g[i].round_robin_weight = ACC200_QMGR_RR; 809 conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 810 conf.arb_ul_5g[i].gbr_threshold1 = ACC200_QOS_GBR; 811 conf.arb_ul_5g[i].round_robin_weight = ACC200_QMGR_RR; 812 conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR; 813 conf.arb_fft[i].gbr_threshold1 = ACC200_QOS_GBR; 814 conf.arb_fft[i].round_robin_weight = ACC200_QMGR_RR; 815 } 816 817 conf.input_pos_llr_1_bit = true; 818 conf.output_pos_llr_1_bit = true; 819 conf.num_vf_bundles = 1; /**< Number of VF bundles to setup */ 820 821 conf.q_ul_4g.num_qgroups = ACC200_QMGR_NUM_QGS; 822 conf.q_ul_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 823 conf.q_ul_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 824 conf.q_ul_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 825 conf.q_dl_4g.num_qgroups = ACC200_QMGR_NUM_QGS; 826 conf.q_dl_4g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 827 conf.q_dl_4g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 828 conf.q_dl_4g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 829 conf.q_ul_5g.num_qgroups = ACC200_QMGR_NUM_QGS; 830 conf.q_ul_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 831 conf.q_ul_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 832 conf.q_ul_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 833 conf.q_dl_5g.num_qgroups = ACC200_QMGR_NUM_QGS; 834 conf.q_dl_5g.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 835 conf.q_dl_5g.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 836 conf.q_dl_5g.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 837 conf.q_fft.num_qgroups = ACC200_QMGR_NUM_QGS; 838 conf.q_fft.first_qgroup_index = ACC200_QMGR_INVALID_IDX; 839 conf.q_fft.num_aqs_per_groups = ACC200_QMGR_NUM_AQS; 840 conf.q_fft.aq_depth_log2 = ACC200_QMGR_AQ_DEPTH; 841 842 /* setup PF with configuration information */ 843 ret = rte_acc_configure(info->dev_name, &conf); 844 TEST_ASSERT_SUCCESS(ret, 845 "Failed to configure ACC200 PF for bbdev %s", 846 info->dev_name); 847 } 848 #endif 849 /* Let's refresh this now this is configured */ 850 rte_bbdev_info_get(dev_id, info); 851 nb_queues = RTE_MIN(rte_lcore_count(), info->drv.max_num_queues); 852 nb_queues = RTE_MIN(nb_queues, (unsigned int) MAX_QUEUES); 853 854 /* setup device */ 855 ret = rte_bbdev_setup_queues(dev_id, nb_queues, info->socket_id); 856 if (ret < 0) { 857 printf("rte_bbdev_setup_queues(%u, %u, %d) ret %i\n", 858 dev_id, nb_queues, info->socket_id, ret); 859 return TEST_FAILED; 860 } 861 862 /* configure interrupts if needed */ 863 if (intr_enabled) { 864 ret = rte_bbdev_intr_enable(dev_id); 865 if (ret < 0) { 866 printf("rte_bbdev_intr_enable(%u) ret %i\n", dev_id, 867 ret); 868 return TEST_FAILED; 869 } 870 } 871 872 /* setup device queues */ 873 qconf.socket = info->socket_id; 874 qconf.queue_size = info->drv.default_queue_conf.queue_size; 875 qconf.priority = 0; 876 qconf.deferred_start = 0; 877 qconf.op_type = op_type; 878 879 for (queue_id = 0; queue_id < nb_queues; ++queue_id) { 880 ret = rte_bbdev_queue_configure(dev_id, queue_id, &qconf); 881 if (ret != 0) { 882 printf( 883 "Allocated all queues (id=%u) at prio%u on dev%u\n", 884 queue_id, qconf.priority, dev_id); 885 qconf.priority++; 886 ret = rte_bbdev_queue_configure(ad->dev_id, queue_id, 887 &qconf); 888 } 889 if (ret != 0) { 890 printf("All queues on dev %u allocated: %u\n", 891 dev_id, queue_id); 892 break; 893 } 894 ad->queue_ids[queue_id] = queue_id; 895 } 896 TEST_ASSERT(queue_id != 0, 897 "ERROR Failed to configure any queues on dev %u", 898 dev_id); 899 ad->nb_queues = queue_id; 900 901 set_avail_op(ad, op_type); 902 903 return TEST_SUCCESS; 904 } 905 906 static int 907 add_active_device(uint8_t dev_id, struct rte_bbdev_info *info, 908 struct test_bbdev_vector *vector) 909 { 910 int ret; 911 912 active_devs[nb_active_devs].driver_name = info->drv.driver_name; 913 active_devs[nb_active_devs].dev_id = dev_id; 914 915 ret = add_bbdev_dev(dev_id, info, vector); 916 if (ret == TEST_SUCCESS) 917 ++nb_active_devs; 918 return ret; 919 } 920 921 static uint8_t 922 populate_active_devices(void) 923 { 924 int ret; 925 uint8_t dev_id; 926 uint8_t nb_devs_added = 0; 927 struct rte_bbdev_info info; 928 929 RTE_BBDEV_FOREACH(dev_id) { 930 rte_bbdev_info_get(dev_id, &info); 931 932 if (check_dev_cap(&info)) { 933 printf( 934 "Device %d (%s) does not support specified capabilities\n", 935 dev_id, info.dev_name); 936 continue; 937 } 938 939 ret = add_active_device(dev_id, &info, &test_vector); 940 if (ret != 0) { 941 printf("Adding active bbdev %s skipped\n", 942 info.dev_name); 943 continue; 944 } 945 nb_devs_added++; 946 } 947 948 return nb_devs_added; 949 } 950 951 static int 952 read_test_vector(void) 953 { 954 int ret; 955 956 memset(&test_vector, 0, sizeof(test_vector)); 957 printf("Test vector file = %s\n", get_vector_filename()); 958 ret = test_bbdev_vector_read(get_vector_filename(), &test_vector); 959 TEST_ASSERT_SUCCESS(ret, "Failed to parse file %s\n", 960 get_vector_filename()); 961 962 return TEST_SUCCESS; 963 } 964 965 static int 966 testsuite_setup(void) 967 { 968 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 969 970 if (populate_active_devices() == 0) { 971 printf("No suitable devices found!\n"); 972 return TEST_SKIPPED; 973 } 974 975 return TEST_SUCCESS; 976 } 977 978 static int 979 interrupt_testsuite_setup(void) 980 { 981 TEST_ASSERT_SUCCESS(read_test_vector(), "Test suite setup failed\n"); 982 983 /* Enable interrupts */ 984 intr_enabled = true; 985 986 /* Special case for NULL device (RTE_BBDEV_OP_NONE) */ 987 if (populate_active_devices() == 0 || 988 test_vector.op_type == RTE_BBDEV_OP_NONE) { 989 intr_enabled = false; 990 printf("No suitable devices found!\n"); 991 return TEST_SKIPPED; 992 } 993 994 return TEST_SUCCESS; 995 } 996 997 static void 998 testsuite_teardown(void) 999 { 1000 uint8_t dev_id; 1001 1002 /* Unconfigure devices */ 1003 RTE_BBDEV_FOREACH(dev_id) 1004 rte_bbdev_close(dev_id); 1005 1006 /* Clear active devices structs. */ 1007 memset(active_devs, 0, sizeof(active_devs)); 1008 nb_active_devs = 0; 1009 1010 /* Disable interrupts */ 1011 intr_enabled = false; 1012 } 1013 1014 static int 1015 ut_setup(void) 1016 { 1017 uint8_t i, dev_id; 1018 1019 for (i = 0; i < nb_active_devs; i++) { 1020 dev_id = active_devs[i].dev_id; 1021 /* reset bbdev stats */ 1022 TEST_ASSERT_SUCCESS(rte_bbdev_stats_reset(dev_id), 1023 "Failed to reset stats of bbdev %u", dev_id); 1024 /* start the device */ 1025 TEST_ASSERT_SUCCESS(rte_bbdev_start(dev_id), 1026 "Failed to start bbdev %u", dev_id); 1027 } 1028 1029 return TEST_SUCCESS; 1030 } 1031 1032 static void 1033 ut_teardown(void) 1034 { 1035 uint8_t i, dev_id; 1036 struct rte_bbdev_stats stats; 1037 1038 for (i = 0; i < nb_active_devs; i++) { 1039 dev_id = active_devs[i].dev_id; 1040 /* read stats and print */ 1041 rte_bbdev_stats_get(dev_id, &stats); 1042 /* Stop the device */ 1043 rte_bbdev_stop(dev_id); 1044 } 1045 } 1046 1047 static int 1048 init_op_data_objs(struct rte_bbdev_op_data *bufs, 1049 struct op_data_entries *ref_entries, 1050 struct rte_mempool *mbuf_pool, const uint16_t n, 1051 enum op_data_type op_type, uint16_t min_alignment) 1052 { 1053 int ret; 1054 unsigned int i, j; 1055 bool large_input = false; 1056 1057 for (i = 0; i < n; ++i) { 1058 char *data; 1059 struct op_data_buf *seg = &ref_entries->segments[0]; 1060 struct rte_mbuf *m_head = rte_pktmbuf_alloc(mbuf_pool); 1061 TEST_ASSERT_NOT_NULL(m_head, 1062 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1063 op_type, n * ref_entries->nb_segments, 1064 mbuf_pool->size); 1065 1066 if ((seg->length + RTE_PKTMBUF_HEADROOM) > RTE_BBDEV_LDPC_E_MAX_MBUF) { 1067 /* 1068 * Special case when DPDK mbuf cannot handle 1069 * the required input size 1070 */ 1071 printf("Warning: Larger input size than DPDK mbuf %d\n", 1072 seg->length); 1073 large_input = true; 1074 } 1075 bufs[i].data = m_head; 1076 bufs[i].offset = 0; 1077 bufs[i].length = 0; 1078 1079 if ((op_type == DATA_INPUT) || (op_type == DATA_HARQ_INPUT)) { 1080 if (large_input) { 1081 /* Allocate a fake overused mbuf */ 1082 data = rte_malloc(NULL, seg->length, 0); 1083 TEST_ASSERT_NOT_NULL(data, 1084 "rte malloc failed with %u bytes", 1085 seg->length); 1086 memcpy(data, seg->addr, seg->length); 1087 m_head->buf_addr = data; 1088 rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data)); 1089 m_head->data_off = 0; 1090 m_head->data_len = seg->length; 1091 } else { 1092 data = rte_pktmbuf_append(m_head, seg->length); 1093 TEST_ASSERT_NOT_NULL(data, 1094 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1095 seg->length, op_type); 1096 1097 TEST_ASSERT(data == RTE_PTR_ALIGN( 1098 data, min_alignment), 1099 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1100 data, min_alignment); 1101 rte_memcpy(data, seg->addr, seg->length); 1102 } 1103 1104 bufs[i].length += seg->length; 1105 1106 for (j = 1; j < ref_entries->nb_segments; ++j) { 1107 struct rte_mbuf *m_tail = 1108 rte_pktmbuf_alloc(mbuf_pool); 1109 TEST_ASSERT_NOT_NULL(m_tail, 1110 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1111 op_type, 1112 n * ref_entries->nb_segments, 1113 mbuf_pool->size); 1114 seg += 1; 1115 1116 data = rte_pktmbuf_append(m_tail, seg->length); 1117 TEST_ASSERT_NOT_NULL(data, 1118 "Couldn't append %u bytes to mbuf from %d data type mbuf pool", 1119 seg->length, op_type); 1120 1121 TEST_ASSERT(data == RTE_PTR_ALIGN(data, 1122 min_alignment), 1123 "Data addr in mbuf (%p) is not aligned to device min alignment (%u)", 1124 data, min_alignment); 1125 rte_memcpy(data, seg->addr, seg->length); 1126 bufs[i].length += seg->length; 1127 1128 ret = rte_pktmbuf_chain(m_head, m_tail); 1129 TEST_ASSERT_SUCCESS(ret, 1130 "Couldn't chain mbufs from %d data type mbuf pool", 1131 op_type); 1132 } 1133 } else { 1134 if (((op_type == DATA_HARD_OUTPUT) || (op_type == DATA_SOFT_OUTPUT)) 1135 && ((seg->length + RTE_PKTMBUF_HEADROOM) 1136 > RTE_BBDEV_LDPC_E_MAX_MBUF)) { 1137 /* Allocate a fake overused mbuf + margin */ 1138 data = rte_malloc(NULL, seg->length + 1024, 0); 1139 TEST_ASSERT_NOT_NULL(data, 1140 "rte malloc failed with %u bytes", 1141 seg->length + 1024); 1142 m_head->buf_addr = data; 1143 rte_mbuf_iova_set(m_head, rte_malloc_virt2iova(data)); 1144 m_head->data_off = 0; 1145 m_head->data_len = seg->length; 1146 } else { 1147 /* allocate chained-mbuf for output buffer */ 1148 for (j = 1; j < ref_entries->nb_segments; ++j) { 1149 struct rte_mbuf *m_tail = 1150 rte_pktmbuf_alloc(mbuf_pool); 1151 TEST_ASSERT_NOT_NULL(m_tail, 1152 "Not enough mbufs in %d data type mbuf pool (needed %u, available %u)", 1153 op_type, 1154 n * ref_entries->nb_segments, 1155 mbuf_pool->size); 1156 1157 ret = rte_pktmbuf_chain(m_head, m_tail); 1158 TEST_ASSERT_SUCCESS(ret, 1159 "Couldn't chain mbufs from %d data type mbuf pool", 1160 op_type); 1161 } 1162 } 1163 bufs[i].length += seg->length; 1164 } 1165 } 1166 1167 return 0; 1168 } 1169 1170 static int 1171 allocate_buffers_on_socket(struct rte_bbdev_op_data **buffers, const int len, 1172 const int socket) 1173 { 1174 int i; 1175 1176 *buffers = rte_zmalloc_socket(NULL, len, 0, socket); 1177 if (*buffers == NULL) { 1178 printf("WARNING: Failed to allocate op_data on socket %d\n", 1179 socket); 1180 /* try to allocate memory on other detected sockets */ 1181 for (i = 0; i < socket; i++) { 1182 *buffers = rte_zmalloc_socket(NULL, len, 0, i); 1183 if (*buffers != NULL) 1184 break; 1185 } 1186 } 1187 1188 return (*buffers == NULL) ? TEST_FAILED : TEST_SUCCESS; 1189 } 1190 1191 static void 1192 limit_input_llr_val_range(struct rte_bbdev_op_data *input_ops, 1193 const uint16_t n, const int8_t max_llr_modulus) 1194 { 1195 uint16_t i, byte_idx; 1196 1197 for (i = 0; i < n; ++i) { 1198 struct rte_mbuf *m = input_ops[i].data; 1199 while (m != NULL) { 1200 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1201 input_ops[i].offset); 1202 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1203 ++byte_idx) 1204 llr[byte_idx] = round((double)max_llr_modulus * 1205 llr[byte_idx] / INT8_MAX); 1206 1207 m = m->next; 1208 } 1209 } 1210 } 1211 1212 /* 1213 * We may have to insert filler bits 1214 * when they are required by the HARQ assumption 1215 */ 1216 static void 1217 ldpc_add_filler(struct rte_bbdev_op_data *input_ops, 1218 const uint16_t n, struct test_op_params *op_params) 1219 { 1220 struct rte_bbdev_op_ldpc_dec dec = op_params->ref_dec_op->ldpc_dec; 1221 1222 if (input_ops == NULL) 1223 return; 1224 /* No need to add filler if not required by device */ 1225 if (!(ldpc_cap_flags & 1226 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS)) 1227 return; 1228 /* No need to add filler for loopback operation */ 1229 if (dec.op_flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 1230 return; 1231 1232 uint16_t i, j, parity_offset; 1233 for (i = 0; i < n; ++i) { 1234 struct rte_mbuf *m = input_ops[i].data; 1235 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1236 input_ops[i].offset); 1237 parity_offset = (dec.basegraph == 1 ? 20 : 8) 1238 * dec.z_c - dec.n_filler; 1239 uint16_t new_hin_size = input_ops[i].length + dec.n_filler; 1240 m->data_len = new_hin_size; 1241 input_ops[i].length = new_hin_size; 1242 for (j = new_hin_size - 1; j >= parity_offset + dec.n_filler; 1243 j--) 1244 llr[j] = llr[j - dec.n_filler]; 1245 uint16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 1246 for (j = 0; j < dec.n_filler; j++) 1247 llr[parity_offset + j] = llr_max_pre_scaling; 1248 } 1249 } 1250 1251 static void 1252 ldpc_input_llr_scaling(struct rte_bbdev_op_data *input_ops, 1253 const uint16_t n, const int8_t llr_size, 1254 const int8_t llr_decimals) 1255 { 1256 if (input_ops == NULL) 1257 return; 1258 1259 uint16_t i, byte_idx; 1260 1261 int16_t llr_max, llr_min, llr_tmp; 1262 llr_max = (1 << (llr_size - 1)) - 1; 1263 llr_min = -llr_max; 1264 for (i = 0; i < n; ++i) { 1265 struct rte_mbuf *m = input_ops[i].data; 1266 while (m != NULL) { 1267 int8_t *llr = rte_pktmbuf_mtod_offset(m, int8_t *, 1268 input_ops[i].offset); 1269 for (byte_idx = 0; byte_idx < rte_pktmbuf_data_len(m); 1270 ++byte_idx) { 1271 1272 llr_tmp = llr[byte_idx]; 1273 if (llr_decimals == 4) 1274 llr_tmp *= 8; 1275 else if (llr_decimals == 2) 1276 llr_tmp *= 2; 1277 else if (llr_decimals == 0) 1278 llr_tmp /= 2; 1279 llr_tmp = RTE_MIN(llr_max, 1280 RTE_MAX(llr_min, llr_tmp)); 1281 llr[byte_idx] = (int8_t) llr_tmp; 1282 } 1283 1284 m = m->next; 1285 } 1286 } 1287 } 1288 1289 1290 1291 static int 1292 fill_queue_buffers(struct test_op_params *op_params, 1293 struct rte_mempool *in_mp, struct rte_mempool *hard_out_mp, 1294 struct rte_mempool *soft_out_mp, 1295 struct rte_mempool *harq_in_mp, struct rte_mempool *harq_out_mp, 1296 uint16_t queue_id, 1297 const struct rte_bbdev_op_cap *capabilities, 1298 uint16_t min_alignment, const int socket_id) 1299 { 1300 int ret; 1301 enum op_data_type type; 1302 const uint16_t n = op_params->num_to_process; 1303 1304 struct rte_mempool *mbuf_pools[DATA_NUM_TYPES] = { 1305 in_mp, 1306 soft_out_mp, 1307 hard_out_mp, 1308 harq_in_mp, 1309 harq_out_mp, 1310 }; 1311 1312 struct rte_bbdev_op_data **queue_ops[DATA_NUM_TYPES] = { 1313 &op_params->q_bufs[socket_id][queue_id].inputs, 1314 &op_params->q_bufs[socket_id][queue_id].soft_outputs, 1315 &op_params->q_bufs[socket_id][queue_id].hard_outputs, 1316 &op_params->q_bufs[socket_id][queue_id].harq_inputs, 1317 &op_params->q_bufs[socket_id][queue_id].harq_outputs, 1318 }; 1319 1320 for (type = DATA_INPUT; type < DATA_NUM_TYPES; ++type) { 1321 struct op_data_entries *ref_entries = 1322 &test_vector.entries[type]; 1323 if (ref_entries->nb_segments == 0) 1324 continue; 1325 1326 ret = allocate_buffers_on_socket(queue_ops[type], 1327 n * sizeof(struct rte_bbdev_op_data), 1328 socket_id); 1329 TEST_ASSERT_SUCCESS(ret, 1330 "Couldn't allocate memory for rte_bbdev_op_data structs"); 1331 1332 ret = init_op_data_objs(*queue_ops[type], ref_entries, 1333 mbuf_pools[type], n, type, min_alignment); 1334 TEST_ASSERT_SUCCESS(ret, 1335 "Couldn't init rte_bbdev_op_data structs"); 1336 } 1337 1338 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 1339 limit_input_llr_val_range(*queue_ops[DATA_INPUT], n, 1340 capabilities->cap.turbo_dec.max_llr_modulus); 1341 1342 if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 1343 bool loopback = op_params->ref_dec_op->ldpc_dec.op_flags & 1344 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 1345 bool llr_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1346 RTE_BBDEV_LDPC_LLR_COMPRESSION; 1347 bool harq_comp = op_params->ref_dec_op->ldpc_dec.op_flags & 1348 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 1349 ldpc_llr_decimals = capabilities->cap.ldpc_dec.llr_decimals; 1350 ldpc_llr_size = capabilities->cap.ldpc_dec.llr_size; 1351 ldpc_cap_flags = capabilities->cap.ldpc_dec.capability_flags; 1352 if (!loopback && !llr_comp) 1353 ldpc_input_llr_scaling(*queue_ops[DATA_INPUT], n, 1354 ldpc_llr_size, ldpc_llr_decimals); 1355 if (!loopback && !harq_comp) 1356 ldpc_input_llr_scaling(*queue_ops[DATA_HARQ_INPUT], n, 1357 ldpc_llr_size, ldpc_llr_decimals); 1358 if (!loopback) 1359 ldpc_add_filler(*queue_ops[DATA_HARQ_INPUT], n, 1360 op_params); 1361 } 1362 1363 return 0; 1364 } 1365 1366 static void 1367 free_buffers(struct active_device *ad, struct test_op_params *op_params) 1368 { 1369 unsigned int i, j; 1370 1371 rte_mempool_free(ad->ops_mempool); 1372 rte_mempool_free(ad->in_mbuf_pool); 1373 rte_mempool_free(ad->hard_out_mbuf_pool); 1374 rte_mempool_free(ad->soft_out_mbuf_pool); 1375 rte_mempool_free(ad->harq_in_mbuf_pool); 1376 rte_mempool_free(ad->harq_out_mbuf_pool); 1377 1378 for (i = 0; i < rte_lcore_count(); ++i) { 1379 for (j = 0; j < RTE_MAX_NUMA_NODES; ++j) { 1380 rte_free(op_params->q_bufs[j][i].inputs); 1381 rte_free(op_params->q_bufs[j][i].hard_outputs); 1382 rte_free(op_params->q_bufs[j][i].soft_outputs); 1383 rte_free(op_params->q_bufs[j][i].harq_inputs); 1384 rte_free(op_params->q_bufs[j][i].harq_outputs); 1385 } 1386 } 1387 } 1388 1389 static void 1390 copy_reference_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1391 unsigned int start_idx, 1392 struct rte_bbdev_op_data *inputs, 1393 struct rte_bbdev_op_data *hard_outputs, 1394 struct rte_bbdev_op_data *soft_outputs, 1395 struct rte_bbdev_dec_op *ref_op) 1396 { 1397 unsigned int i; 1398 struct rte_bbdev_op_turbo_dec *turbo_dec = &ref_op->turbo_dec; 1399 1400 for (i = 0; i < n; ++i) { 1401 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1402 ops[i]->turbo_dec.tb_params.ea = 1403 turbo_dec->tb_params.ea; 1404 ops[i]->turbo_dec.tb_params.eb = 1405 turbo_dec->tb_params.eb; 1406 ops[i]->turbo_dec.tb_params.k_pos = 1407 turbo_dec->tb_params.k_pos; 1408 ops[i]->turbo_dec.tb_params.k_neg = 1409 turbo_dec->tb_params.k_neg; 1410 ops[i]->turbo_dec.tb_params.c = 1411 turbo_dec->tb_params.c; 1412 ops[i]->turbo_dec.tb_params.c_neg = 1413 turbo_dec->tb_params.c_neg; 1414 ops[i]->turbo_dec.tb_params.cab = 1415 turbo_dec->tb_params.cab; 1416 ops[i]->turbo_dec.tb_params.r = 1417 turbo_dec->tb_params.r; 1418 } else { 1419 ops[i]->turbo_dec.cb_params.e = turbo_dec->cb_params.e; 1420 ops[i]->turbo_dec.cb_params.k = turbo_dec->cb_params.k; 1421 } 1422 1423 ops[i]->turbo_dec.ext_scale = turbo_dec->ext_scale; 1424 ops[i]->turbo_dec.iter_max = turbo_dec->iter_max; 1425 ops[i]->turbo_dec.iter_min = turbo_dec->iter_min; 1426 ops[i]->turbo_dec.op_flags = turbo_dec->op_flags; 1427 ops[i]->turbo_dec.rv_index = turbo_dec->rv_index; 1428 ops[i]->turbo_dec.num_maps = turbo_dec->num_maps; 1429 ops[i]->turbo_dec.code_block_mode = turbo_dec->code_block_mode; 1430 1431 ops[i]->turbo_dec.hard_output = hard_outputs[start_idx + i]; 1432 ops[i]->turbo_dec.input = inputs[start_idx + i]; 1433 if (soft_outputs != NULL) 1434 ops[i]->turbo_dec.soft_output = 1435 soft_outputs[start_idx + i]; 1436 } 1437 } 1438 1439 static void 1440 copy_reference_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1441 unsigned int start_idx, 1442 struct rte_bbdev_op_data *inputs, 1443 struct rte_bbdev_op_data *outputs, 1444 struct rte_bbdev_enc_op *ref_op) 1445 { 1446 unsigned int i; 1447 struct rte_bbdev_op_turbo_enc *turbo_enc = &ref_op->turbo_enc; 1448 for (i = 0; i < n; ++i) { 1449 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1450 ops[i]->turbo_enc.tb_params.ea = 1451 turbo_enc->tb_params.ea; 1452 ops[i]->turbo_enc.tb_params.eb = 1453 turbo_enc->tb_params.eb; 1454 ops[i]->turbo_enc.tb_params.k_pos = 1455 turbo_enc->tb_params.k_pos; 1456 ops[i]->turbo_enc.tb_params.k_neg = 1457 turbo_enc->tb_params.k_neg; 1458 ops[i]->turbo_enc.tb_params.c = 1459 turbo_enc->tb_params.c; 1460 ops[i]->turbo_enc.tb_params.c_neg = 1461 turbo_enc->tb_params.c_neg; 1462 ops[i]->turbo_enc.tb_params.cab = 1463 turbo_enc->tb_params.cab; 1464 ops[i]->turbo_enc.tb_params.ncb_pos = 1465 turbo_enc->tb_params.ncb_pos; 1466 ops[i]->turbo_enc.tb_params.ncb_neg = 1467 turbo_enc->tb_params.ncb_neg; 1468 ops[i]->turbo_enc.tb_params.r = turbo_enc->tb_params.r; 1469 } else { 1470 ops[i]->turbo_enc.cb_params.e = turbo_enc->cb_params.e; 1471 ops[i]->turbo_enc.cb_params.k = turbo_enc->cb_params.k; 1472 ops[i]->turbo_enc.cb_params.ncb = 1473 turbo_enc->cb_params.ncb; 1474 } 1475 ops[i]->turbo_enc.rv_index = turbo_enc->rv_index; 1476 ops[i]->turbo_enc.op_flags = turbo_enc->op_flags; 1477 ops[i]->turbo_enc.code_block_mode = turbo_enc->code_block_mode; 1478 1479 ops[i]->turbo_enc.output = outputs[start_idx + i]; 1480 ops[i]->turbo_enc.input = inputs[start_idx + i]; 1481 } 1482 } 1483 1484 1485 /* Returns a random number drawn from a normal distribution 1486 * with mean of 0 and variance of 1 1487 * Marsaglia algorithm 1488 */ 1489 static double 1490 randn(int n) 1491 { 1492 double S, Z, U1, U2, u, v, fac; 1493 1494 do { 1495 U1 = (double)rand() / RAND_MAX; 1496 U2 = (double)rand() / RAND_MAX; 1497 u = 2. * U1 - 1.; 1498 v = 2. * U2 - 1.; 1499 S = u * u + v * v; 1500 } while (S >= 1 || S == 0); 1501 fac = sqrt(-2. * log(S) / S); 1502 Z = (n % 2) ? u * fac : v * fac; 1503 return Z; 1504 } 1505 1506 static inline double 1507 maxstar(double A, double B) 1508 { 1509 if (fabs(A - B) > 5) 1510 return RTE_MAX(A, B); 1511 else 1512 return RTE_MAX(A, B) + log1p(exp(-fabs(A - B))); 1513 } 1514 1515 /* 1516 * Generate Qm LLRS for Qm==8 1517 * Modulation, AWGN and LLR estimation from max log development 1518 */ 1519 static void 1520 gen_qm8_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1521 { 1522 int qm = 8; 1523 int qam = 256; 1524 int m, k; 1525 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1526 /* 5.1.4 of TS38.211 */ 1527 const double symbols_I[256] = { 1528 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 5, 1529 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1, 3, 3, 1, 1, 11, 1530 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 15, 13, 13, 1531 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 15, 1532 15, 13, 13, 15, 15, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1533 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 3, 3, 1, 1534 1, 3, 3, 1, 1, 11, 11, 9, 9, 11, 11, 9, 9, 13, 13, 1535 15, 15, 13, 13, 15, 15, 11, 11, 9, 9, 11, 11, 9, 9, 1536 13, 13, 15, 15, 13, 13, 15, 15, -5, -5, -7, -7, -5, 1537 -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, 1538 -7, -7, -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, 1539 -1, -1, -11, -11, -9, -9, -11, -11, -9, -9, -13, 1540 -13, -15, -15, -13, -13, -15, -15, -11, -11, -9, 1541 -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1542 -13, -15, -15, -5, -5, -7, -7, -5, -5, -7, -7, -3, 1543 -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, -5, -5, 1544 -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, -11, -11, 1545 -9, -9, -11, -11, -9, -9, -13, -13, -15, -15, -13, 1546 -13, -15, -15, -11, -11, -9, -9, -11, -11, -9, -9, 1547 -13, -13, -15, -15, -13, -13, -15, -15}; 1548 const double symbols_Q[256] = { 1549 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1550 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 13, 1551 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1552 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 1553 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, -5, 1554 -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, 1555 -15, -13, -15, -11, -9, -11, -9, -13, -15, -13, 1556 -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, -5, 1557 -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1558 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15, 5, 1559 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 11, 1560 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 13, 15, 1561 13, 15, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 1562 3, 1, 11, 9, 11, 9, 13, 15, 13, 15, 11, 9, 11, 9, 1563 13, 15, 13, 15, -5, -7, -5, -7, -3, -1, -3, -1, 1564 -5, -7, -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, 1565 -13, -15, -13, -15, -11, -9, -11, -9, -13, -15, 1566 -13, -15, -5, -7, -5, -7, -3, -1, -3, -1, -5, -7, 1567 -5, -7, -3, -1, -3, -1, -11, -9, -11, -9, -13, -15, 1568 -13, -15, -11, -9, -11, -9, -13, -15, -13, -15}; 1569 /* Average constellation point energy */ 1570 N0 *= 170.0; 1571 for (k = 0; k < qm; k++) 1572 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1573 /* 5.1.4 of TS38.211 */ 1574 I = (1 - 2 * b[0]) * (8 - (1 - 2 * b[2]) * 1575 (4 - (1 - 2 * b[4]) * (2 - (1 - 2 * b[6])))); 1576 Q = (1 - 2 * b[1]) * (8 - (1 - 2 * b[3]) * 1577 (4 - (1 - 2 * b[5]) * (2 - (1 - 2 * b[7])))); 1578 /* AWGN channel */ 1579 I += sqrt(N0 / 2) * randn(0); 1580 Q += sqrt(N0 / 2) * randn(1); 1581 /* 1582 * Calculate the log of the probability that each of 1583 * the constellation points was transmitted 1584 */ 1585 for (m = 0; m < qam; m++) 1586 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1587 + pow(Q - symbols_Q[m], 2.0)) / N0; 1588 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1589 for (k = 0; k < qm; k++) { 1590 p0 = -999999; 1591 p1 = -999999; 1592 /* For each constellation point */ 1593 for (m = 0; m < qam; m++) { 1594 if ((m >> (qm - k - 1)) & 1) 1595 p1 = maxstar(p1, log_syml_prob[m]); 1596 else 1597 p0 = maxstar(p0, log_syml_prob[m]); 1598 } 1599 /* Calculate the LLR */ 1600 llr_ = p0 - p1; 1601 llr_ *= (1 << ldpc_llr_decimals); 1602 llr_ = round(llr_); 1603 if (llr_ > llr_max) 1604 llr_ = llr_max; 1605 if (llr_ < -llr_max) 1606 llr_ = -llr_max; 1607 llrs[qm * i + k] = (int8_t) llr_; 1608 } 1609 } 1610 1611 1612 /* 1613 * Generate Qm LLRS for Qm==6 1614 * Modulation, AWGN and LLR estimation from max log development 1615 */ 1616 static void 1617 gen_qm6_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1618 { 1619 int qm = 6; 1620 int qam = 64; 1621 int m, k; 1622 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1623 /* 5.1.4 of TS38.211 */ 1624 const double symbols_I[64] = { 1625 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1626 3, 3, 1, 1, 3, 3, 1, 1, 5, 5, 7, 7, 5, 5, 7, 7, 1627 -3, -3, -1, -1, -3, -3, -1, -1, -5, -5, -7, -7, 1628 -5, -5, -7, -7, -3, -3, -1, -1, -3, -3, -1, -1, 1629 -5, -5, -7, -7, -5, -5, -7, -7}; 1630 const double symbols_Q[64] = { 1631 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 5, 7, 5, 7, 1632 -3, -1, -3, -1, -5, -7, -5, -7, -3, -1, -3, -1, 1633 -5, -7, -5, -7, 3, 1, 3, 1, 5, 7, 5, 7, 3, 1, 3, 1, 1634 5, 7, 5, 7, -3, -1, -3, -1, -5, -7, -5, -7, 1635 -3, -1, -3, -1, -5, -7, -5, -7}; 1636 /* Average constellation point energy */ 1637 N0 *= 42.0; 1638 for (k = 0; k < qm; k++) 1639 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1640 /* 5.1.4 of TS38.211 */ 1641 I = (1 - 2 * b[0])*(4 - (1 - 2 * b[2]) * (2 - (1 - 2 * b[4]))); 1642 Q = (1 - 2 * b[1])*(4 - (1 - 2 * b[3]) * (2 - (1 - 2 * b[5]))); 1643 /* AWGN channel */ 1644 I += sqrt(N0 / 2) * randn(0); 1645 Q += sqrt(N0 / 2) * randn(1); 1646 /* 1647 * Calculate the log of the probability that each of 1648 * the constellation points was transmitted 1649 */ 1650 for (m = 0; m < qam; m++) 1651 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1652 + pow(Q - symbols_Q[m], 2.0)) / N0; 1653 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1654 for (k = 0; k < qm; k++) { 1655 p0 = -999999; 1656 p1 = -999999; 1657 /* For each constellation point */ 1658 for (m = 0; m < qam; m++) { 1659 if ((m >> (qm - k - 1)) & 1) 1660 p1 = maxstar(p1, log_syml_prob[m]); 1661 else 1662 p0 = maxstar(p0, log_syml_prob[m]); 1663 } 1664 /* Calculate the LLR */ 1665 llr_ = p0 - p1; 1666 llr_ *= (1 << ldpc_llr_decimals); 1667 llr_ = round(llr_); 1668 if (llr_ > llr_max) 1669 llr_ = llr_max; 1670 if (llr_ < -llr_max) 1671 llr_ = -llr_max; 1672 llrs[qm * i + k] = (int8_t) llr_; 1673 } 1674 } 1675 1676 /* 1677 * Generate Qm LLRS for Qm==4 1678 * Modulation, AWGN and LLR estimation from max log development 1679 */ 1680 static void 1681 gen_qm4_llr(int8_t *llrs, uint32_t i, double N0, double llr_max) 1682 { 1683 int qm = 4; 1684 int qam = 16; 1685 int m, k; 1686 double I, Q, p0, p1, llr_, b[qm], log_syml_prob[qam]; 1687 /* 5.1.4 of TS38.211 */ 1688 const double symbols_I[16] = {1, 1, 3, 3, 1, 1, 3, 3, 1689 -1, -1, -3, -3, -1, -1, -3, -3}; 1690 const double symbols_Q[16] = {1, 3, 1, 3, -1, -3, -1, -3, 1691 1, 3, 1, 3, -1, -3, -1, -3}; 1692 /* Average constellation point energy */ 1693 N0 *= 10.0; 1694 for (k = 0; k < qm; k++) 1695 b[k] = llrs[qm * i + k] < 0 ? 1.0 : 0.0; 1696 /* 5.1.4 of TS38.211 */ 1697 I = (1 - 2 * b[0]) * (2 - (1 - 2 * b[2])); 1698 Q = (1 - 2 * b[1]) * (2 - (1 - 2 * b[3])); 1699 /* AWGN channel */ 1700 I += sqrt(N0 / 2) * randn(0); 1701 Q += sqrt(N0 / 2) * randn(1); 1702 /* 1703 * Calculate the log of the probability that each of 1704 * the constellation points was transmitted 1705 */ 1706 for (m = 0; m < qam; m++) 1707 log_syml_prob[m] = -(pow(I - symbols_I[m], 2.0) 1708 + pow(Q - symbols_Q[m], 2.0)) / N0; 1709 /* Calculate an LLR for each of the k_64QAM bits in the set */ 1710 for (k = 0; k < qm; k++) { 1711 p0 = -999999; 1712 p1 = -999999; 1713 /* For each constellation point */ 1714 for (m = 0; m < qam; m++) { 1715 if ((m >> (qm - k - 1)) & 1) 1716 p1 = maxstar(p1, log_syml_prob[m]); 1717 else 1718 p0 = maxstar(p0, log_syml_prob[m]); 1719 } 1720 /* Calculate the LLR */ 1721 llr_ = p0 - p1; 1722 llr_ *= (1 << ldpc_llr_decimals); 1723 llr_ = round(llr_); 1724 if (llr_ > llr_max) 1725 llr_ = llr_max; 1726 if (llr_ < -llr_max) 1727 llr_ = -llr_max; 1728 llrs[qm * i + k] = (int8_t) llr_; 1729 } 1730 } 1731 1732 static void 1733 gen_qm2_llr(int8_t *llrs, uint32_t j, double N0, double llr_max) 1734 { 1735 double b, b1, n; 1736 double coeff = 2.0 * sqrt(N0); 1737 1738 /* Ignore in vectors rare quasi null LLRs not to be saturated */ 1739 if (llrs[j] < 8 && llrs[j] > -8) 1740 return; 1741 1742 /* Note don't change sign here */ 1743 n = randn(j % 2); 1744 b1 = ((llrs[j] > 0 ? 2.0 : -2.0) 1745 + coeff * n) / N0; 1746 b = b1 * (1 << ldpc_llr_decimals); 1747 b = round(b); 1748 if (b > llr_max) 1749 b = llr_max; 1750 if (b < -llr_max) 1751 b = -llr_max; 1752 llrs[j] = (int8_t) b; 1753 } 1754 1755 /* Generate LLR for a given SNR */ 1756 static void 1757 generate_llr_input(uint16_t n, struct rte_bbdev_op_data *inputs, 1758 struct rte_bbdev_dec_op *ref_op) 1759 { 1760 struct rte_mbuf *m; 1761 uint16_t qm; 1762 uint32_t i, j, e, range; 1763 double N0, llr_max; 1764 1765 e = ref_op->ldpc_dec.cb_params.e; 1766 qm = ref_op->ldpc_dec.q_m; 1767 llr_max = (1 << (ldpc_llr_size - 1)) - 1; 1768 range = e / qm; 1769 N0 = 1.0 / pow(10.0, get_snr() / 10.0); 1770 1771 for (i = 0; i < n; ++i) { 1772 m = inputs[i].data; 1773 int8_t *llrs = rte_pktmbuf_mtod_offset(m, int8_t *, 0); 1774 if (qm == 8) { 1775 for (j = 0; j < range; ++j) 1776 gen_qm8_llr(llrs, j, N0, llr_max); 1777 } else if (qm == 6) { 1778 for (j = 0; j < range; ++j) 1779 gen_qm6_llr(llrs, j, N0, llr_max); 1780 } else if (qm == 4) { 1781 for (j = 0; j < range; ++j) 1782 gen_qm4_llr(llrs, j, N0, llr_max); 1783 } else { 1784 for (j = 0; j < e; ++j) 1785 gen_qm2_llr(llrs, j, N0, llr_max); 1786 } 1787 } 1788 } 1789 1790 static void 1791 copy_reference_ldpc_dec_op(struct rte_bbdev_dec_op **ops, unsigned int n, 1792 unsigned int start_idx, 1793 struct rte_bbdev_op_data *inputs, 1794 struct rte_bbdev_op_data *hard_outputs, 1795 struct rte_bbdev_op_data *soft_outputs, 1796 struct rte_bbdev_op_data *harq_inputs, 1797 struct rte_bbdev_op_data *harq_outputs, 1798 struct rte_bbdev_dec_op *ref_op) 1799 { 1800 unsigned int i; 1801 struct rte_bbdev_op_ldpc_dec *ldpc_dec = &ref_op->ldpc_dec; 1802 1803 for (i = 0; i < n; ++i) { 1804 if (ldpc_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1805 ops[i]->ldpc_dec.tb_params.ea = 1806 ldpc_dec->tb_params.ea; 1807 ops[i]->ldpc_dec.tb_params.eb = 1808 ldpc_dec->tb_params.eb; 1809 ops[i]->ldpc_dec.tb_params.c = 1810 ldpc_dec->tb_params.c; 1811 ops[i]->ldpc_dec.tb_params.cab = 1812 ldpc_dec->tb_params.cab; 1813 ops[i]->ldpc_dec.tb_params.r = 1814 ldpc_dec->tb_params.r; 1815 } else { 1816 ops[i]->ldpc_dec.cb_params.e = ldpc_dec->cb_params.e; 1817 } 1818 1819 ops[i]->ldpc_dec.basegraph = ldpc_dec->basegraph; 1820 ops[i]->ldpc_dec.z_c = ldpc_dec->z_c; 1821 ops[i]->ldpc_dec.q_m = ldpc_dec->q_m; 1822 ops[i]->ldpc_dec.n_filler = ldpc_dec->n_filler; 1823 ops[i]->ldpc_dec.n_cb = ldpc_dec->n_cb; 1824 ops[i]->ldpc_dec.iter_max = ldpc_dec->iter_max; 1825 ops[i]->ldpc_dec.rv_index = ldpc_dec->rv_index; 1826 ops[i]->ldpc_dec.op_flags = ldpc_dec->op_flags; 1827 ops[i]->ldpc_dec.code_block_mode = ldpc_dec->code_block_mode; 1828 1829 if (hard_outputs != NULL) 1830 ops[i]->ldpc_dec.hard_output = 1831 hard_outputs[start_idx + i]; 1832 if (inputs != NULL) 1833 ops[i]->ldpc_dec.input = 1834 inputs[start_idx + i]; 1835 if (soft_outputs != NULL) 1836 ops[i]->ldpc_dec.soft_output = 1837 soft_outputs[start_idx + i]; 1838 if (harq_inputs != NULL) 1839 ops[i]->ldpc_dec.harq_combined_input = 1840 harq_inputs[start_idx + i]; 1841 if (harq_outputs != NULL) 1842 ops[i]->ldpc_dec.harq_combined_output = 1843 harq_outputs[start_idx + i]; 1844 } 1845 } 1846 1847 1848 static void 1849 copy_reference_ldpc_enc_op(struct rte_bbdev_enc_op **ops, unsigned int n, 1850 unsigned int start_idx, 1851 struct rte_bbdev_op_data *inputs, 1852 struct rte_bbdev_op_data *outputs, 1853 struct rte_bbdev_enc_op *ref_op) 1854 { 1855 unsigned int i; 1856 struct rte_bbdev_op_ldpc_enc *ldpc_enc = &ref_op->ldpc_enc; 1857 for (i = 0; i < n; ++i) { 1858 if (ldpc_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1859 ops[i]->ldpc_enc.tb_params.ea = ldpc_enc->tb_params.ea; 1860 ops[i]->ldpc_enc.tb_params.eb = ldpc_enc->tb_params.eb; 1861 ops[i]->ldpc_enc.tb_params.cab = 1862 ldpc_enc->tb_params.cab; 1863 ops[i]->ldpc_enc.tb_params.c = ldpc_enc->tb_params.c; 1864 ops[i]->ldpc_enc.tb_params.r = ldpc_enc->tb_params.r; 1865 } else { 1866 ops[i]->ldpc_enc.cb_params.e = ldpc_enc->cb_params.e; 1867 } 1868 ops[i]->ldpc_enc.basegraph = ldpc_enc->basegraph; 1869 ops[i]->ldpc_enc.z_c = ldpc_enc->z_c; 1870 ops[i]->ldpc_enc.q_m = ldpc_enc->q_m; 1871 ops[i]->ldpc_enc.n_filler = ldpc_enc->n_filler; 1872 ops[i]->ldpc_enc.n_cb = ldpc_enc->n_cb; 1873 ops[i]->ldpc_enc.rv_index = ldpc_enc->rv_index; 1874 ops[i]->ldpc_enc.op_flags = ldpc_enc->op_flags; 1875 ops[i]->ldpc_enc.code_block_mode = ldpc_enc->code_block_mode; 1876 ops[i]->ldpc_enc.output = outputs[start_idx + i]; 1877 ops[i]->ldpc_enc.input = inputs[start_idx + i]; 1878 } 1879 } 1880 1881 static void 1882 copy_reference_fft_op(struct rte_bbdev_fft_op **ops, unsigned int n, 1883 unsigned int start_idx, struct rte_bbdev_op_data *inputs, 1884 struct rte_bbdev_op_data *outputs, struct rte_bbdev_op_data *pwrouts, 1885 struct rte_bbdev_fft_op *ref_op) 1886 { 1887 unsigned int i, j; 1888 struct rte_bbdev_op_fft *fft = &ref_op->fft; 1889 for (i = 0; i < n; i++) { 1890 ops[i]->fft.input_sequence_size = fft->input_sequence_size; 1891 ops[i]->fft.input_leading_padding = fft->input_leading_padding; 1892 ops[i]->fft.output_sequence_size = fft->output_sequence_size; 1893 ops[i]->fft.output_leading_depadding = 1894 fft->output_leading_depadding; 1895 for (j = 0; j < RTE_BBDEV_MAX_CS_2; j++) 1896 ops[i]->fft.window_index[j] = fft->window_index[j]; 1897 ops[i]->fft.cs_bitmap = fft->cs_bitmap; 1898 ops[i]->fft.num_antennas_log2 = fft->num_antennas_log2; 1899 ops[i]->fft.idft_log2 = fft->idft_log2; 1900 ops[i]->fft.dft_log2 = fft->dft_log2; 1901 ops[i]->fft.cs_time_adjustment = fft->cs_time_adjustment; 1902 ops[i]->fft.idft_shift = fft->idft_shift; 1903 ops[i]->fft.dft_shift = fft->dft_shift; 1904 ops[i]->fft.ncs_reciprocal = fft->ncs_reciprocal; 1905 ops[i]->fft.power_shift = fft->power_shift; 1906 ops[i]->fft.fp16_exp_adjust = fft->fp16_exp_adjust; 1907 ops[i]->fft.base_output = outputs[start_idx + i]; 1908 ops[i]->fft.base_input = inputs[start_idx + i]; 1909 if (pwrouts != NULL) 1910 ops[i]->fft.power_meas_output = pwrouts[start_idx + i]; 1911 ops[i]->fft.op_flags = fft->op_flags; 1912 } 1913 } 1914 1915 static int 1916 check_dec_status_and_ordering(struct rte_bbdev_dec_op *op, 1917 unsigned int order_idx, const int expected_status) 1918 { 1919 int status = op->status; 1920 /* ignore parity mismatch false alarms for long iterations */ 1921 if (get_iter_max() >= 10) { 1922 if (!(expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1923 (status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1924 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1925 status -= (1 << RTE_BBDEV_SYNDROME_ERROR); 1926 } 1927 if ((expected_status & (1 << RTE_BBDEV_SYNDROME_ERROR)) && 1928 !(status & (1 << RTE_BBDEV_SYNDROME_ERROR))) { 1929 printf("WARNING: Ignore Syndrome Check mismatch\n"); 1930 status += (1 << RTE_BBDEV_SYNDROME_ERROR); 1931 } 1932 } 1933 1934 TEST_ASSERT(status == expected_status, 1935 "op_status (%d) != expected_status (%d)", 1936 op->status, expected_status); 1937 1938 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1939 "Ordering error, expected %p, got %p", 1940 (void *)(uintptr_t)order_idx, op->opaque_data); 1941 1942 return TEST_SUCCESS; 1943 } 1944 1945 static int 1946 check_enc_status_and_ordering(struct rte_bbdev_enc_op *op, 1947 unsigned int order_idx, const int expected_status) 1948 { 1949 TEST_ASSERT(op->status == expected_status, 1950 "op_status (%d) != expected_status (%d)", 1951 op->status, expected_status); 1952 1953 if (op->opaque_data != (void *)(uintptr_t)INVALID_OPAQUE) 1954 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1955 "Ordering error, expected %p, got %p", 1956 (void *)(uintptr_t)order_idx, op->opaque_data); 1957 1958 return TEST_SUCCESS; 1959 } 1960 1961 static int 1962 check_fft_status_and_ordering(struct rte_bbdev_fft_op *op, 1963 unsigned int order_idx, const int expected_status) 1964 { 1965 TEST_ASSERT(op->status == expected_status, 1966 "op_status (%d) != expected_status (%d)", 1967 op->status, expected_status); 1968 1969 TEST_ASSERT((void *)(uintptr_t)order_idx == op->opaque_data, 1970 "Ordering error, expected %p, got %p", 1971 (void *)(uintptr_t)order_idx, op->opaque_data); 1972 1973 return TEST_SUCCESS; 1974 } 1975 1976 static inline int 1977 validate_op_chain(struct rte_bbdev_op_data *op, 1978 struct op_data_entries *orig_op) 1979 { 1980 uint8_t i; 1981 struct rte_mbuf *m = op->data; 1982 uint8_t nb_dst_segments = orig_op->nb_segments; 1983 uint32_t total_data_size = 0; 1984 1985 TEST_ASSERT(nb_dst_segments == m->nb_segs, 1986 "Number of segments differ in original (%u) and filled (%u) op", 1987 nb_dst_segments, m->nb_segs); 1988 1989 /* Validate each mbuf segment length */ 1990 for (i = 0; i < nb_dst_segments; ++i) { 1991 /* Apply offset to the first mbuf segment */ 1992 uint16_t offset = (i == 0) ? op->offset : 0; 1993 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 1994 total_data_size += orig_op->segments[i].length; 1995 1996 TEST_ASSERT(orig_op->segments[i].length == data_len, 1997 "Length of segment differ in original (%u) and filled (%u) op", 1998 orig_op->segments[i].length, data_len); 1999 TEST_ASSERT_BUFFERS_ARE_EQUAL(orig_op->segments[i].addr, 2000 rte_pktmbuf_mtod_offset(m, uint32_t *, offset), 2001 data_len, 2002 "Output buffers (CB=%u) are not equal", i); 2003 m = m->next; 2004 } 2005 2006 /* Validate total mbuf pkt length */ 2007 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 2008 TEST_ASSERT(total_data_size == pkt_len, 2009 "Length of data differ in original (%u) and filled (%u) op", 2010 total_data_size, pkt_len); 2011 2012 return TEST_SUCCESS; 2013 } 2014 2015 /* 2016 * Compute K0 for a given configuration for HARQ output length computation 2017 * As per definition in 3GPP 38.212 Table 5.4.2.1-2 2018 */ 2019 static inline uint16_t 2020 get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) 2021 { 2022 if (rv_index == 0) 2023 return 0; 2024 uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; 2025 if (n_cb == n) { 2026 if (rv_index == 1) 2027 return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; 2028 else if (rv_index == 2) 2029 return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; 2030 else 2031 return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; 2032 } 2033 /* LBRM case - includes a division by N */ 2034 if (rv_index == 1) 2035 return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) 2036 / n) * z_c; 2037 else if (rv_index == 2) 2038 return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) 2039 / n) * z_c; 2040 else 2041 return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) 2042 / n) * z_c; 2043 } 2044 2045 /* HARQ output length including the Filler bits */ 2046 static inline uint16_t 2047 compute_harq_len(struct rte_bbdev_op_ldpc_dec *ops_ld) 2048 { 2049 uint16_t k0 = 0; 2050 uint8_t max_rv = (ops_ld->rv_index == 1) ? 3 : ops_ld->rv_index; 2051 k0 = get_k0(ops_ld->n_cb, ops_ld->z_c, ops_ld->basegraph, max_rv); 2052 /* Compute RM out size and number of rows */ 2053 uint16_t parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 2054 * ops_ld->z_c - ops_ld->n_filler; 2055 uint16_t deRmOutSize = RTE_MIN( 2056 k0 + ops_ld->cb_params.e + 2057 ((k0 > parity_offset) ? 2058 0 : ops_ld->n_filler), 2059 ops_ld->n_cb); 2060 uint16_t numRows = ((deRmOutSize + ops_ld->z_c - 1) 2061 / ops_ld->z_c); 2062 uint16_t harq_output_len = numRows * ops_ld->z_c; 2063 return harq_output_len; 2064 } 2065 2066 static inline int 2067 validate_op_harq_chain(struct rte_bbdev_op_data *op, 2068 struct op_data_entries *orig_op, 2069 struct rte_bbdev_op_ldpc_dec *ops_ld) 2070 { 2071 uint8_t i; 2072 uint32_t j, jj, k; 2073 struct rte_mbuf *m = op->data; 2074 uint8_t nb_dst_segments = orig_op->nb_segments; 2075 uint32_t total_data_size = 0; 2076 int8_t *harq_orig, *harq_out, abs_harq_origin; 2077 uint32_t byte_error = 0, cum_error = 0, error; 2078 int16_t llr_max = (1 << (ldpc_llr_size - ldpc_llr_decimals)) - 1; 2079 int16_t llr_max_pre_scaling = (1 << (ldpc_llr_size - 1)) - 1; 2080 uint16_t parity_offset; 2081 2082 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2083 "Number of segments differ in original (%u) and filled (%u) op", 2084 nb_dst_segments, m->nb_segs); 2085 2086 /* Validate each mbuf segment length */ 2087 for (i = 0; i < nb_dst_segments; ++i) { 2088 /* Apply offset to the first mbuf segment */ 2089 uint16_t offset = (i == 0) ? op->offset : 0; 2090 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 2091 total_data_size += orig_op->segments[i].length; 2092 2093 TEST_ASSERT(orig_op->segments[i].length < 2094 (uint32_t)(data_len + 64), 2095 "Length of segment differ in original (%u) and filled (%u) op", 2096 orig_op->segments[i].length, data_len); 2097 harq_orig = (int8_t *) orig_op->segments[i].addr; 2098 harq_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 2099 2100 if (!(ldpc_cap_flags & 2101 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS 2102 ) || (ops_ld->op_flags & 2103 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 2104 data_len -= ops_ld->z_c; 2105 parity_offset = data_len; 2106 } else { 2107 /* Compute RM out size and number of rows */ 2108 parity_offset = (ops_ld->basegraph == 1 ? 20 : 8) 2109 * ops_ld->z_c - ops_ld->n_filler; 2110 uint16_t deRmOutSize = compute_harq_len(ops_ld) - 2111 ops_ld->n_filler; 2112 if (data_len > deRmOutSize) 2113 data_len = deRmOutSize; 2114 if (data_len > orig_op->segments[i].length) 2115 data_len = orig_op->segments[i].length; 2116 } 2117 /* 2118 * HARQ output can have minor differences 2119 * due to integer representation and related scaling 2120 */ 2121 for (j = 0, jj = 0; j < data_len; j++, jj++) { 2122 if (j == parity_offset) { 2123 /* Special Handling of the filler bits */ 2124 for (k = 0; k < ops_ld->n_filler; k++) { 2125 if (harq_out[jj] != 2126 llr_max_pre_scaling) { 2127 printf("HARQ Filler issue %d: %d %d\n", 2128 jj, harq_out[jj], 2129 llr_max); 2130 byte_error++; 2131 } 2132 jj++; 2133 } 2134 } 2135 if (!(ops_ld->op_flags & 2136 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { 2137 if (ldpc_llr_decimals > 1) 2138 harq_out[jj] = (harq_out[jj] + 1) 2139 >> (ldpc_llr_decimals - 1); 2140 /* Saturated to S7 */ 2141 if (harq_orig[j] > llr_max) 2142 harq_orig[j] = llr_max; 2143 if (harq_orig[j] < -llr_max) 2144 harq_orig[j] = -llr_max; 2145 } 2146 if (harq_orig[j] != harq_out[jj]) { 2147 error = (harq_orig[j] > harq_out[jj]) ? 2148 harq_orig[j] - harq_out[jj] : 2149 harq_out[jj] - harq_orig[j]; 2150 abs_harq_origin = harq_orig[j] > 0 ? 2151 harq_orig[j] : 2152 -harq_orig[j]; 2153 /* Residual quantization error */ 2154 if ((error > 8 && (abs_harq_origin < 2155 (llr_max - 16))) || 2156 (error > 16)) { 2157 printf("HARQ mismatch %d: exp %d act %d => %d\n", 2158 j, harq_orig[j], 2159 harq_out[jj], error); 2160 byte_error++; 2161 cum_error += error; 2162 } 2163 } 2164 } 2165 m = m->next; 2166 } 2167 2168 if (byte_error) 2169 TEST_ASSERT(byte_error <= 1, 2170 "HARQ output mismatch (%d) %d", 2171 byte_error, cum_error); 2172 2173 /* Validate total mbuf pkt length */ 2174 uint32_t pkt_len = rte_pktmbuf_pkt_len(op->data) - op->offset; 2175 TEST_ASSERT(total_data_size < pkt_len + 64, 2176 "Length of data differ in original (%u) and filled (%u) op", 2177 total_data_size, pkt_len); 2178 2179 return TEST_SUCCESS; 2180 } 2181 2182 2183 static inline int 2184 validate_op_so_chain(struct rte_bbdev_op_data *op, 2185 struct op_data_entries *orig_op) 2186 { 2187 struct rte_mbuf *m = op->data; 2188 uint8_t i, nb_dst_segments = orig_op->nb_segments; 2189 uint32_t j, jj; 2190 int8_t *so_orig, *so_out; 2191 uint32_t byte_error = 0, error, margin_error = 0; 2192 2193 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2194 "Number of segments differ in original (%u) and filled (%u) op", 2195 nb_dst_segments, m->nb_segs); 2196 2197 /* Validate each mbuf segment length. */ 2198 for (i = 0; i < nb_dst_segments; ++i) { 2199 /* Apply offset to the first mbuf segment. */ 2200 uint16_t offset = (i == 0) ? op->offset : 0; 2201 uint16_t data_len = rte_pktmbuf_data_len(m) - offset; 2202 2203 TEST_ASSERT(orig_op->segments[i].length == data_len, 2204 "Length of segment differ in original (%u) and filled (%u) op", 2205 orig_op->segments[i].length, data_len); 2206 so_orig = (int8_t *) orig_op->segments[i].addr; 2207 so_out = rte_pktmbuf_mtod_offset(m, int8_t *, offset); 2208 margin_error += data_len / 8; /* Allow for few % errors. */ 2209 2210 /* SO output can have minor differences due to algorithm variations. */ 2211 for (j = 0, jj = 0; j < data_len; j++, jj++) { 2212 if (so_orig[j] != so_out[jj]) { 2213 error = (so_orig[j] > so_out[jj]) ? so_orig[j] - so_out[jj] : 2214 so_out[jj] - so_orig[j]; 2215 /* Residual quantization error. */ 2216 if (error > 32) { 2217 printf("Warning: Soft mismatch %d: exp %d act %d => %d\n", 2218 j, so_orig[j], so_out[jj], error); 2219 byte_error++; 2220 } 2221 } 2222 } 2223 m = m->next; 2224 } 2225 2226 if (byte_error > margin_error) 2227 TEST_ASSERT(byte_error <= 1, "Soft output mismatch (%d) %d", 2228 byte_error, margin_error); 2229 2230 return TEST_SUCCESS; 2231 } 2232 2233 static int 2234 validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2235 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2236 { 2237 unsigned int i; 2238 int ret; 2239 struct op_data_entries *hard_data_orig = 2240 &test_vector.entries[DATA_HARD_OUTPUT]; 2241 struct op_data_entries *soft_data_orig = 2242 &test_vector.entries[DATA_SOFT_OUTPUT]; 2243 struct rte_bbdev_op_turbo_dec *ops_td; 2244 struct rte_bbdev_op_data *hard_output; 2245 struct rte_bbdev_op_data *soft_output; 2246 struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; 2247 2248 for (i = 0; i < n; ++i) { 2249 ops_td = &ops[i]->turbo_dec; 2250 hard_output = &ops_td->hard_output; 2251 soft_output = &ops_td->soft_output; 2252 2253 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2254 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2255 "Returned iter_count (%d) > expected iter_count (%d)", 2256 ops_td->iter_count, ref_td->iter_count); 2257 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2258 TEST_ASSERT_SUCCESS(ret, 2259 "Checking status and ordering for decoder failed"); 2260 2261 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2262 hard_data_orig), 2263 "Hard output buffers (CB=%u) are not equal", 2264 i); 2265 2266 if (ref_op->turbo_dec.op_flags & RTE_BBDEV_TURBO_SOFT_OUTPUT) 2267 TEST_ASSERT_SUCCESS(validate_op_so_chain(soft_output, 2268 soft_data_orig), 2269 "Soft output buffers (CB=%u) are not equal", 2270 i); 2271 } 2272 2273 return TEST_SUCCESS; 2274 } 2275 2276 /* Check Number of code blocks errors */ 2277 static int 2278 validate_ldpc_bler(struct rte_bbdev_dec_op **ops, const uint16_t n) 2279 { 2280 unsigned int i; 2281 struct op_data_entries *hard_data_orig = 2282 &test_vector.entries[DATA_HARD_OUTPUT]; 2283 struct rte_bbdev_op_ldpc_dec *ops_td; 2284 struct rte_bbdev_op_data *hard_output; 2285 int errors = 0; 2286 struct rte_mbuf *m; 2287 2288 for (i = 0; i < n; ++i) { 2289 ops_td = &ops[i]->ldpc_dec; 2290 hard_output = &ops_td->hard_output; 2291 m = hard_output->data; 2292 if (memcmp(rte_pktmbuf_mtod_offset(m, uint32_t *, 0), 2293 hard_data_orig->segments[0].addr, 2294 hard_data_orig->segments[0].length)) 2295 errors++; 2296 } 2297 return errors; 2298 } 2299 2300 static int 2301 validate_ldpc_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, 2302 struct rte_bbdev_dec_op *ref_op, const int vector_mask) 2303 { 2304 unsigned int i; 2305 int ret; 2306 struct op_data_entries *hard_data_orig = 2307 &test_vector.entries[DATA_HARD_OUTPUT]; 2308 struct op_data_entries *soft_data_orig = 2309 &test_vector.entries[DATA_SOFT_OUTPUT]; 2310 struct op_data_entries *harq_data_orig = 2311 &test_vector.entries[DATA_HARQ_OUTPUT]; 2312 struct rte_bbdev_op_ldpc_dec *ops_td; 2313 struct rte_bbdev_op_data *hard_output; 2314 struct rte_bbdev_op_data *harq_output; 2315 struct rte_bbdev_op_data *soft_output; 2316 struct rte_bbdev_op_ldpc_dec *ref_td = &ref_op->ldpc_dec; 2317 2318 for (i = 0; i < n; ++i) { 2319 ops_td = &ops[i]->ldpc_dec; 2320 hard_output = &ops_td->hard_output; 2321 harq_output = &ops_td->harq_combined_output; 2322 soft_output = &ops_td->soft_output; 2323 2324 ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); 2325 TEST_ASSERT_SUCCESS(ret, 2326 "Checking status and ordering for decoder failed"); 2327 if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) 2328 TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, 2329 "Returned iter_count (%d) > expected iter_count (%d)", 2330 ops_td->iter_count, ref_td->iter_count); 2331 /* 2332 * We can ignore output data when the decoding failed to 2333 * converge or for loop-back cases 2334 */ 2335 if (!check_bit(ops[i]->ldpc_dec.op_flags, 2336 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK 2337 ) && ( 2338 ops[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR 2339 )) == 0) 2340 TEST_ASSERT_SUCCESS(validate_op_chain(hard_output, 2341 hard_data_orig), 2342 "Hard output buffers (CB=%u) are not equal", 2343 i); 2344 2345 if (ref_op->ldpc_dec.op_flags & RTE_BBDEV_LDPC_SOFT_OUT_ENABLE) 2346 TEST_ASSERT_SUCCESS(validate_op_chain(soft_output, 2347 soft_data_orig), 2348 "Soft output buffers (CB=%u) are not equal", 2349 i); 2350 if (ref_op->ldpc_dec.op_flags & 2351 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE) { 2352 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2353 harq_data_orig, ops_td), 2354 "HARQ output buffers (CB=%u) are not equal", 2355 i); 2356 } 2357 if (ref_op->ldpc_dec.op_flags & 2358 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 2359 TEST_ASSERT_SUCCESS(validate_op_harq_chain(harq_output, 2360 harq_data_orig, ops_td), 2361 "HARQ output buffers (CB=%u) are not equal", 2362 i); 2363 2364 } 2365 2366 return TEST_SUCCESS; 2367 } 2368 2369 2370 static int 2371 validate_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2372 struct rte_bbdev_enc_op *ref_op) 2373 { 2374 unsigned int i; 2375 int ret; 2376 struct op_data_entries *hard_data_orig = 2377 &test_vector.entries[DATA_HARD_OUTPUT]; 2378 2379 for (i = 0; i < n; ++i) { 2380 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2381 TEST_ASSERT_SUCCESS(ret, 2382 "Checking status and ordering for encoder failed"); 2383 TEST_ASSERT_SUCCESS(validate_op_chain( 2384 &ops[i]->turbo_enc.output, 2385 hard_data_orig), 2386 "Output buffers (CB=%u) are not equal", 2387 i); 2388 } 2389 2390 return TEST_SUCCESS; 2391 } 2392 2393 static int 2394 validate_ldpc_enc_op(struct rte_bbdev_enc_op **ops, const uint16_t n, 2395 struct rte_bbdev_enc_op *ref_op) 2396 { 2397 unsigned int i; 2398 int ret; 2399 struct op_data_entries *hard_data_orig = 2400 &test_vector.entries[DATA_HARD_OUTPUT]; 2401 2402 for (i = 0; i < n; ++i) { 2403 ret = check_enc_status_and_ordering(ops[i], i, ref_op->status); 2404 TEST_ASSERT_SUCCESS(ret, 2405 "Checking status and ordering for encoder failed"); 2406 TEST_ASSERT_SUCCESS(validate_op_chain( 2407 &ops[i]->ldpc_enc.output, 2408 hard_data_orig), 2409 "Output buffers (CB=%u) are not equal", 2410 i); 2411 } 2412 2413 return TEST_SUCCESS; 2414 } 2415 2416 2417 static inline int 2418 validate_op_fft_chain(struct rte_bbdev_op_data *op, struct op_data_entries *orig_op) 2419 { 2420 struct rte_mbuf *m = op->data; 2421 uint8_t i, nb_dst_segments = orig_op->nb_segments; 2422 int16_t delt, abs_delt, thres_hold = 3; 2423 uint32_t j, data_len_iq, error_num; 2424 int16_t *ref_out, *op_out; 2425 2426 TEST_ASSERT(nb_dst_segments == m->nb_segs, 2427 "Number of segments differ in original (%u) and filled (%u) op fft", 2428 nb_dst_segments, m->nb_segs); 2429 2430 /* Due to size limitation of mbuf, FFT doesn't use real mbuf. */ 2431 for (i = 0; i < nb_dst_segments; ++i) { 2432 uint16_t offset = (i == 0) ? op->offset : 0; 2433 uint32_t data_len = op->length; 2434 2435 TEST_ASSERT(orig_op->segments[i].length == data_len, 2436 "Length of segment differ in original (%u) and filled (%u) op fft", 2437 orig_op->segments[i].length, data_len); 2438 /* Divided by 2 to get the number of 16bits data. */ 2439 data_len_iq = data_len >> 1; 2440 ref_out = (int16_t *)(orig_op->segments[i].addr); 2441 op_out = rte_pktmbuf_mtod_offset(m, int16_t *, offset); 2442 error_num = 0; 2443 for (j = 0; j < data_len_iq; j++) { 2444 delt = ref_out[j] - op_out[j]; 2445 abs_delt = delt > 0 ? delt : -delt; 2446 error_num += (abs_delt > thres_hold ? 1 : 0); 2447 } 2448 if (error_num > 0) { 2449 rte_memdump(stdout, "Buffer A", ref_out, data_len); 2450 rte_memdump(stdout, "Buffer B", op_out, data_len); 2451 TEST_ASSERT(error_num == 0, 2452 "FFT Output are not matched total (%u) errors (%u)", 2453 data_len_iq, error_num); 2454 } 2455 2456 m = m->next; 2457 } 2458 2459 return TEST_SUCCESS; 2460 } 2461 2462 static int 2463 validate_fft_op(struct rte_bbdev_fft_op **ops, const uint16_t n, 2464 struct rte_bbdev_fft_op *ref_op) 2465 { 2466 unsigned int i; 2467 int ret; 2468 struct op_data_entries *fft_data_orig = &test_vector.entries[DATA_HARD_OUTPUT]; 2469 struct op_data_entries *fft_pwr_orig = &test_vector.entries[DATA_SOFT_OUTPUT]; 2470 2471 for (i = 0; i < n; ++i) { 2472 ret = check_fft_status_and_ordering(ops[i], i, ref_op->status); 2473 TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for FFT failed"); 2474 TEST_ASSERT_SUCCESS(validate_op_fft_chain( 2475 &ops[i]->fft.base_output, fft_data_orig), 2476 "FFT Output buffers (op=%u) are not matched", i); 2477 if (check_bit(ops[i]->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS)) 2478 TEST_ASSERT_SUCCESS(validate_op_fft_chain( 2479 &ops[i]->fft.power_meas_output, fft_pwr_orig), 2480 "FFT Power Output buffers (op=%u) are not matched", i); 2481 } 2482 2483 return TEST_SUCCESS; 2484 } 2485 2486 static void 2487 create_reference_dec_op(struct rte_bbdev_dec_op *op) 2488 { 2489 unsigned int i; 2490 struct op_data_entries *entry; 2491 2492 op->turbo_dec = test_vector.turbo_dec; 2493 entry = &test_vector.entries[DATA_INPUT]; 2494 for (i = 0; i < entry->nb_segments; ++i) 2495 op->turbo_dec.input.length += 2496 entry->segments[i].length; 2497 } 2498 2499 static void 2500 create_reference_ldpc_dec_op(struct rte_bbdev_dec_op *op) 2501 { 2502 unsigned int i; 2503 struct op_data_entries *entry; 2504 2505 op->ldpc_dec = test_vector.ldpc_dec; 2506 entry = &test_vector.entries[DATA_INPUT]; 2507 for (i = 0; i < entry->nb_segments; ++i) 2508 op->ldpc_dec.input.length += 2509 entry->segments[i].length; 2510 if (test_vector.ldpc_dec.op_flags & 2511 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) { 2512 entry = &test_vector.entries[DATA_HARQ_INPUT]; 2513 for (i = 0; i < entry->nb_segments; ++i) 2514 op->ldpc_dec.harq_combined_input.length += 2515 entry->segments[i].length; 2516 } 2517 } 2518 2519 static void 2520 create_reference_fft_op(struct rte_bbdev_fft_op *op) 2521 { 2522 unsigned int i; 2523 struct op_data_entries *entry; 2524 op->fft = test_vector.fft; 2525 entry = &test_vector.entries[DATA_INPUT]; 2526 for (i = 0; i < entry->nb_segments; ++i) 2527 op->fft.base_input.length += entry->segments[i].length; 2528 } 2529 2530 static void 2531 create_reference_enc_op(struct rte_bbdev_enc_op *op) 2532 { 2533 unsigned int i; 2534 struct op_data_entries *entry; 2535 2536 op->turbo_enc = test_vector.turbo_enc; 2537 entry = &test_vector.entries[DATA_INPUT]; 2538 for (i = 0; i < entry->nb_segments; ++i) 2539 op->turbo_enc.input.length += 2540 entry->segments[i].length; 2541 } 2542 2543 static void 2544 create_reference_ldpc_enc_op(struct rte_bbdev_enc_op *op) 2545 { 2546 unsigned int i; 2547 struct op_data_entries *entry; 2548 2549 op->ldpc_enc = test_vector.ldpc_enc; 2550 entry = &test_vector.entries[DATA_INPUT]; 2551 for (i = 0; i < entry->nb_segments; ++i) 2552 op->ldpc_enc.input.length += 2553 entry->segments[i].length; 2554 } 2555 2556 static uint32_t 2557 calc_dec_TB_size(struct rte_bbdev_dec_op *op) 2558 { 2559 uint8_t i; 2560 uint32_t c, r, tb_size = 0; 2561 2562 if (op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2563 tb_size = op->turbo_dec.tb_params.k_neg; 2564 } else { 2565 c = op->turbo_dec.tb_params.c; 2566 r = op->turbo_dec.tb_params.r; 2567 for (i = 0; i < c-r; i++) 2568 tb_size += (r < op->turbo_dec.tb_params.c_neg) ? 2569 op->turbo_dec.tb_params.k_neg : 2570 op->turbo_dec.tb_params.k_pos; 2571 } 2572 return tb_size; 2573 } 2574 2575 static uint32_t 2576 calc_ldpc_dec_TB_size(struct rte_bbdev_dec_op *op) 2577 { 2578 uint8_t i; 2579 uint32_t c, r, tb_size = 0; 2580 uint16_t sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2581 2582 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2583 tb_size = sys_cols * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 2584 } else { 2585 c = op->ldpc_dec.tb_params.c; 2586 r = op->ldpc_dec.tb_params.r; 2587 for (i = 0; i < c-r; i++) 2588 tb_size += sys_cols * op->ldpc_dec.z_c 2589 - op->ldpc_dec.n_filler; 2590 } 2591 return tb_size; 2592 } 2593 2594 static uint32_t 2595 calc_enc_TB_size(struct rte_bbdev_enc_op *op) 2596 { 2597 uint8_t i; 2598 uint32_t c, r, tb_size = 0; 2599 2600 if (op->turbo_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2601 tb_size = op->turbo_enc.tb_params.k_neg; 2602 } else { 2603 c = op->turbo_enc.tb_params.c; 2604 r = op->turbo_enc.tb_params.r; 2605 for (i = 0; i < c-r; i++) 2606 tb_size += (r < op->turbo_enc.tb_params.c_neg) ? 2607 op->turbo_enc.tb_params.k_neg : 2608 op->turbo_enc.tb_params.k_pos; 2609 } 2610 return tb_size; 2611 } 2612 2613 static uint32_t 2614 calc_ldpc_enc_TB_size(struct rte_bbdev_enc_op *op) 2615 { 2616 uint8_t i; 2617 uint32_t c, r, tb_size = 0; 2618 uint16_t sys_cols = (op->ldpc_enc.basegraph == 1) ? 22 : 10; 2619 2620 if (op->ldpc_enc.code_block_mode == RTE_BBDEV_CODE_BLOCK) { 2621 tb_size = sys_cols * op->ldpc_enc.z_c - op->ldpc_enc.n_filler; 2622 } else { 2623 c = op->turbo_enc.tb_params.c; 2624 r = op->turbo_enc.tb_params.r; 2625 for (i = 0; i < c-r; i++) 2626 tb_size += sys_cols * op->ldpc_enc.z_c 2627 - op->ldpc_enc.n_filler; 2628 } 2629 return tb_size; 2630 } 2631 2632 static uint32_t 2633 calc_fft_size(struct rte_bbdev_fft_op *op) 2634 { 2635 uint32_t output_size; 2636 int num_cs = 0, i; 2637 for (i = 0; i < 12; i++) 2638 if (check_bit(op->fft.cs_bitmap, 1 << i)) 2639 num_cs++; 2640 output_size = (num_cs * op->fft.output_sequence_size * 4) << op->fft.num_antennas_log2; 2641 return output_size; 2642 } 2643 2644 static int 2645 init_test_op_params(struct test_op_params *op_params, 2646 enum rte_bbdev_op_type op_type, const int expected_status, 2647 const int vector_mask, struct rte_mempool *ops_mp, 2648 uint16_t burst_sz, uint16_t num_to_process, uint16_t num_lcores) 2649 { 2650 int ret = 0; 2651 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2652 op_type == RTE_BBDEV_OP_LDPC_DEC) 2653 ret = rte_bbdev_dec_op_alloc_bulk(ops_mp, 2654 &op_params->ref_dec_op, 1); 2655 else if (op_type == RTE_BBDEV_OP_FFT) 2656 ret = rte_bbdev_fft_op_alloc_bulk(ops_mp, 2657 &op_params->ref_fft_op, 1); 2658 else 2659 ret = rte_bbdev_enc_op_alloc_bulk(ops_mp, 2660 &op_params->ref_enc_op, 1); 2661 2662 TEST_ASSERT_SUCCESS(ret, "rte_bbdev_op_alloc_bulk() failed"); 2663 2664 op_params->mp = ops_mp; 2665 op_params->burst_sz = burst_sz; 2666 op_params->num_to_process = num_to_process; 2667 op_params->num_lcores = num_lcores; 2668 op_params->vector_mask = vector_mask; 2669 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 2670 op_type == RTE_BBDEV_OP_LDPC_DEC) 2671 op_params->ref_dec_op->status = expected_status; 2672 else if (op_type == RTE_BBDEV_OP_TURBO_ENC 2673 || op_type == RTE_BBDEV_OP_LDPC_ENC) 2674 op_params->ref_enc_op->status = expected_status; 2675 else if (op_type == RTE_BBDEV_OP_FFT) 2676 op_params->ref_fft_op->status = expected_status; 2677 return 0; 2678 } 2679 2680 static int 2681 run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id, 2682 struct test_op_params *op_params) 2683 { 2684 int t_ret, f_ret, socket_id = SOCKET_ID_ANY; 2685 unsigned int i; 2686 struct active_device *ad; 2687 unsigned int burst_sz = get_burst_sz(); 2688 enum rte_bbdev_op_type op_type = test_vector.op_type; 2689 const struct rte_bbdev_op_cap *capabilities = NULL; 2690 2691 ad = &active_devs[dev_id]; 2692 2693 /* Check if device supports op_type */ 2694 if (!is_avail_op(ad, test_vector.op_type)) 2695 return TEST_SUCCESS; 2696 2697 struct rte_bbdev_info info; 2698 rte_bbdev_info_get(ad->dev_id, &info); 2699 socket_id = GET_SOCKET(info.socket_id); 2700 2701 f_ret = create_mempools(ad, socket_id, op_type, 2702 get_num_ops()); 2703 if (f_ret != TEST_SUCCESS) { 2704 printf("Couldn't create mempools"); 2705 goto fail; 2706 } 2707 if (op_type == RTE_BBDEV_OP_NONE) 2708 op_type = RTE_BBDEV_OP_TURBO_ENC; 2709 2710 f_ret = init_test_op_params(op_params, test_vector.op_type, 2711 test_vector.expected_status, 2712 test_vector.mask, 2713 ad->ops_mempool, 2714 burst_sz, 2715 get_num_ops(), 2716 get_num_lcores()); 2717 if (f_ret != TEST_SUCCESS) { 2718 printf("Couldn't init test op params"); 2719 goto fail; 2720 } 2721 2722 2723 /* Find capabilities */ 2724 const struct rte_bbdev_op_cap *cap = info.drv.capabilities; 2725 do { 2726 if (cap->type == test_vector.op_type) { 2727 capabilities = cap; 2728 break; 2729 } 2730 cap++; 2731 } while (cap->type != RTE_BBDEV_OP_NONE); 2732 TEST_ASSERT_NOT_NULL(capabilities, 2733 "Couldn't find capabilities"); 2734 2735 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2736 create_reference_dec_op(op_params->ref_dec_op); 2737 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 2738 create_reference_enc_op(op_params->ref_enc_op); 2739 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2740 create_reference_ldpc_enc_op(op_params->ref_enc_op); 2741 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2742 create_reference_ldpc_dec_op(op_params->ref_dec_op); 2743 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 2744 create_reference_fft_op(op_params->ref_fft_op); 2745 2746 for (i = 0; i < ad->nb_queues; ++i) { 2747 f_ret = fill_queue_buffers(op_params, 2748 ad->in_mbuf_pool, 2749 ad->hard_out_mbuf_pool, 2750 ad->soft_out_mbuf_pool, 2751 ad->harq_in_mbuf_pool, 2752 ad->harq_out_mbuf_pool, 2753 ad->queue_ids[i], 2754 capabilities, 2755 info.drv.min_alignment, 2756 socket_id); 2757 if (f_ret != TEST_SUCCESS) { 2758 printf("Couldn't init queue buffers"); 2759 goto fail; 2760 } 2761 } 2762 2763 /* Run test case function */ 2764 t_ret = test_case_func(ad, op_params); 2765 2766 /* Free active device resources and return */ 2767 free_buffers(ad, op_params); 2768 return t_ret; 2769 2770 fail: 2771 free_buffers(ad, op_params); 2772 return TEST_FAILED; 2773 } 2774 2775 /* Run given test function per active device per supported op type 2776 * per burst size. 2777 */ 2778 static int 2779 run_test_case(test_case_function *test_case_func) 2780 { 2781 int ret = 0; 2782 uint8_t dev; 2783 2784 /* Alloc op_params */ 2785 struct test_op_params *op_params = rte_zmalloc(NULL, 2786 sizeof(struct test_op_params), RTE_CACHE_LINE_SIZE); 2787 TEST_ASSERT_NOT_NULL(op_params, "Failed to alloc %zuB for op_params", 2788 RTE_ALIGN(sizeof(struct test_op_params), 2789 RTE_CACHE_LINE_SIZE)); 2790 2791 /* For each device run test case function */ 2792 for (dev = 0; dev < nb_active_devs; ++dev) 2793 ret |= run_test_case_on_device(test_case_func, dev, op_params); 2794 2795 rte_free(op_params); 2796 2797 return ret; 2798 } 2799 2800 2801 /* Push back the HARQ output from DDR to host */ 2802 static void 2803 retrieve_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2804 struct rte_bbdev_dec_op **ops, 2805 const uint16_t n) 2806 { 2807 uint16_t j; 2808 int save_status, ret; 2809 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2810 struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; 2811 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2812 bool loopback = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK; 2813 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2814 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2815 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2816 for (j = 0; j < n; ++j) { 2817 if ((loopback && mem_out) || hc_out) { 2818 save_status = ops[j]->status; 2819 ops[j]->ldpc_dec.op_flags = 2820 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2821 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2822 if (h_comp) 2823 ops[j]->ldpc_dec.op_flags += 2824 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2825 ops[j]->ldpc_dec.harq_combined_input.offset = 2826 harq_offset; 2827 ops[j]->ldpc_dec.harq_combined_output.offset = 0; 2828 harq_offset += HARQ_INCR; 2829 if (!loopback) 2830 ops[j]->ldpc_dec.harq_combined_input.length = 2831 ops[j]->ldpc_dec.harq_combined_output.length; 2832 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 2833 &ops[j], 1); 2834 ret = 0; 2835 while (ret == 0) 2836 ret = rte_bbdev_dequeue_ldpc_dec_ops( 2837 dev_id, queue_id, 2838 &ops_deq[j], 1); 2839 ops[j]->ldpc_dec.op_flags = flags; 2840 ops[j]->status = save_status; 2841 } 2842 } 2843 } 2844 2845 /* 2846 * Push back the HARQ output from HW DDR to Host 2847 * Preload HARQ memory input and adjust HARQ offset 2848 */ 2849 static void 2850 preload_harq_ddr(uint16_t dev_id, uint16_t queue_id, 2851 struct rte_bbdev_dec_op **ops, const uint16_t n, 2852 bool preload) 2853 { 2854 uint16_t j; 2855 int deq; 2856 uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2857 struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; 2858 struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; 2859 uint32_t flags = ops[0]->ldpc_dec.op_flags; 2860 bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; 2861 bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 2862 bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2863 bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; 2864 bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2865 if ((mem_in || hc_in) && preload) { 2866 for (j = 0; j < n; ++j) { 2867 save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; 2868 save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; 2869 ops[j]->ldpc_dec.op_flags = 2870 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + 2871 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 2872 if (h_comp) 2873 ops[j]->ldpc_dec.op_flags += 2874 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; 2875 ops[j]->ldpc_dec.harq_combined_output.offset = 2876 harq_offset; 2877 ops[j]->ldpc_dec.harq_combined_input.offset = 0; 2878 harq_offset += HARQ_INCR; 2879 } 2880 rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); 2881 deq = 0; 2882 while (deq != n) 2883 deq += rte_bbdev_dequeue_ldpc_dec_ops( 2884 dev_id, queue_id, &ops_deq[deq], 2885 n - deq); 2886 /* Restore the operations */ 2887 for (j = 0; j < n; ++j) { 2888 ops[j]->ldpc_dec.op_flags = flags; 2889 ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; 2890 ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; 2891 } 2892 } 2893 harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; 2894 for (j = 0; j < n; ++j) { 2895 /* Adjust HARQ offset when we reach external DDR */ 2896 if (mem_in || hc_in) 2897 ops[j]->ldpc_dec.harq_combined_input.offset 2898 = harq_offset; 2899 if (mem_out || hc_out) 2900 ops[j]->ldpc_dec.harq_combined_output.offset 2901 = harq_offset; 2902 harq_offset += HARQ_INCR; 2903 } 2904 } 2905 2906 static void 2907 dequeue_event_callback(uint16_t dev_id, 2908 enum rte_bbdev_event_type event, void *cb_arg, 2909 void *ret_param) 2910 { 2911 int ret; 2912 uint16_t i; 2913 uint64_t total_time; 2914 uint16_t deq, burst_sz, num_ops; 2915 uint16_t queue_id = *(uint16_t *) ret_param; 2916 struct rte_bbdev_info info; 2917 double tb_len_bits; 2918 struct thread_params *tp = cb_arg; 2919 2920 /* Find matching thread params using queue_id */ 2921 for (i = 0; i < MAX_QUEUES; ++i, ++tp) 2922 if (tp->queue_id == queue_id) 2923 break; 2924 2925 if (i == MAX_QUEUES) { 2926 printf("%s: Queue_id from interrupt details was not found!\n", 2927 __func__); 2928 return; 2929 } 2930 2931 if (unlikely(event != RTE_BBDEV_EVENT_DEQUEUE)) { 2932 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2933 printf( 2934 "Dequeue interrupt handler called for incorrect event!\n"); 2935 return; 2936 } 2937 2938 burst_sz = __atomic_load_n(&tp->burst_sz, __ATOMIC_RELAXED); 2939 num_ops = tp->op_params->num_to_process; 2940 2941 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 2942 deq = rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 2943 &tp->dec_ops[ 2944 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2945 burst_sz); 2946 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 2947 deq = rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 2948 &tp->dec_ops[ 2949 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2950 burst_sz); 2951 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 2952 deq = rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 2953 &tp->enc_ops[ 2954 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2955 burst_sz); 2956 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 2957 deq = rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 2958 &tp->fft_ops[ 2959 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2960 burst_sz); 2961 else /*RTE_BBDEV_OP_TURBO_ENC*/ 2962 deq = rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 2963 &tp->enc_ops[ 2964 __atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED)], 2965 burst_sz); 2966 2967 if (deq < burst_sz) { 2968 printf( 2969 "After receiving the interrupt all operations should be dequeued. Expected: %u, got: %u\n", 2970 burst_sz, deq); 2971 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 2972 return; 2973 } 2974 2975 if (__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) + deq < num_ops) { 2976 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 2977 return; 2978 } 2979 2980 total_time = rte_rdtsc_precise() - tp->start_time; 2981 2982 rte_bbdev_info_get(dev_id, &info); 2983 2984 ret = TEST_SUCCESS; 2985 2986 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) { 2987 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 2988 ret = validate_dec_op(tp->dec_ops, num_ops, ref_op, 2989 tp->op_params->vector_mask); 2990 /* get the max of iter_count for all dequeued ops */ 2991 for (i = 0; i < num_ops; ++i) 2992 tp->iter_count = RTE_MAX( 2993 tp->dec_ops[i]->turbo_dec.iter_count, 2994 tp->iter_count); 2995 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 2996 } else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) { 2997 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 2998 ret = validate_enc_op(tp->enc_ops, num_ops, ref_op); 2999 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 3000 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) { 3001 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3002 ret = validate_ldpc_enc_op(tp->enc_ops, num_ops, ref_op); 3003 rte_bbdev_enc_op_free_bulk(tp->enc_ops, deq); 3004 } else if (test_vector.op_type == RTE_BBDEV_OP_FFT) { 3005 struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op; 3006 ret = validate_fft_op(tp->fft_ops, num_ops, ref_op); 3007 rte_bbdev_fft_op_free_bulk(tp->fft_ops, deq); 3008 } else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) { 3009 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3010 ret = validate_ldpc_dec_op(tp->dec_ops, num_ops, ref_op, 3011 tp->op_params->vector_mask); 3012 rte_bbdev_dec_op_free_bulk(tp->dec_ops, deq); 3013 } 3014 3015 if (ret) { 3016 printf("Buffers validation failed\n"); 3017 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3018 } 3019 3020 switch (test_vector.op_type) { 3021 case RTE_BBDEV_OP_TURBO_DEC: 3022 tb_len_bits = calc_dec_TB_size(tp->op_params->ref_dec_op); 3023 break; 3024 case RTE_BBDEV_OP_TURBO_ENC: 3025 tb_len_bits = calc_enc_TB_size(tp->op_params->ref_enc_op); 3026 break; 3027 case RTE_BBDEV_OP_LDPC_DEC: 3028 tb_len_bits = calc_ldpc_dec_TB_size(tp->op_params->ref_dec_op); 3029 break; 3030 case RTE_BBDEV_OP_FFT: 3031 tb_len_bits = calc_fft_size(tp->op_params->ref_fft_op); 3032 break; 3033 case RTE_BBDEV_OP_LDPC_ENC: 3034 tb_len_bits = calc_ldpc_enc_TB_size(tp->op_params->ref_enc_op); 3035 break; 3036 case RTE_BBDEV_OP_NONE: 3037 tb_len_bits = 0.0; 3038 break; 3039 default: 3040 printf("Unknown op type: %d\n", test_vector.op_type); 3041 __atomic_store_n(&tp->processing_status, TEST_FAILED, __ATOMIC_RELAXED); 3042 return; 3043 } 3044 3045 tp->ops_per_sec += ((double)num_ops) / 3046 ((double)total_time / (double)rte_get_tsc_hz()); 3047 tp->mbps += (((double)(num_ops * tb_len_bits)) / 1000000.0) / 3048 ((double)total_time / (double)rte_get_tsc_hz()); 3049 3050 __atomic_fetch_add(&tp->nb_dequeued, deq, __ATOMIC_RELAXED); 3051 } 3052 3053 static int 3054 throughput_intr_lcore_ldpc_dec(void *arg) 3055 { 3056 struct thread_params *tp = arg; 3057 unsigned int enqueued; 3058 const uint16_t queue_id = tp->queue_id; 3059 const uint16_t burst_sz = tp->op_params->burst_sz; 3060 const uint16_t num_to_process = tp->op_params->num_to_process; 3061 struct rte_bbdev_dec_op *ops[num_to_process]; 3062 struct test_buffers *bufs = NULL; 3063 struct rte_bbdev_info info; 3064 int ret, i, j; 3065 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3066 uint16_t num_to_enq, enq; 3067 3068 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3069 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3070 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3071 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3072 3073 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3074 "BURST_SIZE should be <= %u", MAX_BURST); 3075 3076 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3077 "Failed to enable interrupts for dev: %u, queue_id: %u", 3078 tp->dev_id, queue_id); 3079 3080 rte_bbdev_info_get(tp->dev_id, &info); 3081 3082 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3083 "NUM_OPS cannot exceed %u for this device", 3084 info.drv.queue_size_lim); 3085 3086 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3087 3088 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3089 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3090 3091 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3092 3093 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 3094 num_to_process); 3095 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3096 num_to_process); 3097 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3098 copy_reference_ldpc_dec_op(ops, num_to_process, 0, bufs->inputs, 3099 bufs->hard_outputs, bufs->soft_outputs, 3100 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3101 3102 /* Set counter to validate the ordering */ 3103 for (j = 0; j < num_to_process; ++j) 3104 ops[j]->opaque_data = (void *)(uintptr_t)j; 3105 3106 for (j = 0; j < TEST_REPETITIONS; ++j) { 3107 for (i = 0; i < num_to_process; ++i) { 3108 if (!loopback) 3109 rte_pktmbuf_reset( 3110 ops[i]->ldpc_dec.hard_output.data); 3111 if (hc_out || loopback) 3112 mbuf_reset( 3113 ops[i]->ldpc_dec.harq_combined_output.data); 3114 } 3115 3116 tp->start_time = rte_rdtsc_precise(); 3117 for (enqueued = 0; enqueued < num_to_process;) { 3118 num_to_enq = burst_sz; 3119 3120 if (unlikely(num_to_process - enqueued < num_to_enq)) 3121 num_to_enq = num_to_process - enqueued; 3122 3123 enq = 0; 3124 do { 3125 enq += rte_bbdev_enqueue_ldpc_dec_ops( 3126 tp->dev_id, 3127 queue_id, &ops[enqueued], 3128 num_to_enq); 3129 } while (unlikely(num_to_enq != enq)); 3130 enqueued += enq; 3131 3132 /* Write to thread burst_sz current number of enqueued 3133 * descriptors. It ensures that proper number of 3134 * descriptors will be dequeued in callback 3135 * function - needed for last batch in case where 3136 * the number of operations is not a multiple of 3137 * burst size. 3138 */ 3139 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3140 3141 /* Wait until processing of previous batch is 3142 * completed 3143 */ 3144 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3145 } 3146 if (j != TEST_REPETITIONS - 1) 3147 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3148 } 3149 3150 return TEST_SUCCESS; 3151 } 3152 3153 static int 3154 throughput_intr_lcore_dec(void *arg) 3155 { 3156 struct thread_params *tp = arg; 3157 unsigned int enqueued; 3158 const uint16_t queue_id = tp->queue_id; 3159 const uint16_t burst_sz = tp->op_params->burst_sz; 3160 const uint16_t num_to_process = tp->op_params->num_to_process; 3161 struct rte_bbdev_dec_op *ops[num_to_process]; 3162 struct test_buffers *bufs = NULL; 3163 struct rte_bbdev_info info; 3164 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3165 int ret, i, j; 3166 uint16_t num_to_enq, enq; 3167 3168 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3169 "BURST_SIZE should be <= %u", MAX_BURST); 3170 3171 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3172 "Failed to enable interrupts for dev: %u, queue_id: %u", 3173 tp->dev_id, queue_id); 3174 3175 rte_bbdev_info_get(tp->dev_id, &info); 3176 3177 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3178 "NUM_OPS cannot exceed %u for this device", 3179 info.drv.queue_size_lim); 3180 3181 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3182 3183 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3184 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3185 3186 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3187 3188 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops, 3189 num_to_process); 3190 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_to_process); 3191 ref_op->turbo_dec.iter_max = get_iter_max(); 3192 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3193 copy_reference_dec_op(ops, num_to_process, 0, bufs->inputs, 3194 bufs->hard_outputs, bufs->soft_outputs, 3195 tp->op_params->ref_dec_op); 3196 3197 /* Set counter to validate the ordering. */ 3198 for (j = 0; j < num_to_process; ++j) 3199 ops[j]->opaque_data = (void *)(uintptr_t)j; 3200 3201 for (j = 0; j < TEST_REPETITIONS; ++j) { 3202 for (i = 0; i < num_to_process; ++i) { 3203 rte_pktmbuf_reset(ops[i]->turbo_dec.hard_output.data); 3204 if (ops[i]->turbo_dec.soft_output.data != NULL) 3205 rte_pktmbuf_reset(ops[i]->turbo_dec.soft_output.data); 3206 } 3207 3208 3209 tp->start_time = rte_rdtsc_precise(); 3210 for (enqueued = 0; enqueued < num_to_process;) { 3211 num_to_enq = burst_sz; 3212 3213 if (unlikely(num_to_process - enqueued < num_to_enq)) 3214 num_to_enq = num_to_process - enqueued; 3215 3216 enq = 0; 3217 do { 3218 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3219 queue_id, &ops[enqueued], 3220 num_to_enq); 3221 } while (unlikely(num_to_enq != enq)); 3222 enqueued += enq; 3223 3224 /* Write to thread burst_sz current number of enqueued 3225 * descriptors. It ensures that proper number of 3226 * descriptors will be dequeued in callback 3227 * function - needed for last batch in case where 3228 * the number of operations is not a multiple of 3229 * burst size. 3230 */ 3231 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3232 3233 /* Wait until processing of previous batch is 3234 * completed 3235 */ 3236 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3237 } 3238 if (j != TEST_REPETITIONS - 1) 3239 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3240 } 3241 3242 return TEST_SUCCESS; 3243 } 3244 3245 static int 3246 throughput_intr_lcore_enc(void *arg) 3247 { 3248 struct thread_params *tp = arg; 3249 unsigned int enqueued; 3250 const uint16_t queue_id = tp->queue_id; 3251 const uint16_t burst_sz = tp->op_params->burst_sz; 3252 const uint16_t num_to_process = tp->op_params->num_to_process; 3253 struct rte_bbdev_enc_op *ops[num_to_process]; 3254 struct test_buffers *bufs = NULL; 3255 struct rte_bbdev_info info; 3256 int ret, i, j; 3257 uint16_t num_to_enq, enq; 3258 3259 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3260 "BURST_SIZE should be <= %u", MAX_BURST); 3261 3262 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3263 "Failed to enable interrupts for dev: %u, queue_id: %u", 3264 tp->dev_id, queue_id); 3265 3266 rte_bbdev_info_get(tp->dev_id, &info); 3267 3268 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3269 "NUM_OPS cannot exceed %u for this device", 3270 info.drv.queue_size_lim); 3271 3272 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3273 3274 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3275 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3276 3277 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3278 3279 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3280 num_to_process); 3281 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3282 num_to_process); 3283 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3284 copy_reference_enc_op(ops, num_to_process, 0, bufs->inputs, 3285 bufs->hard_outputs, tp->op_params->ref_enc_op); 3286 3287 /* Set counter to validate the ordering */ 3288 for (j = 0; j < num_to_process; ++j) 3289 ops[j]->opaque_data = (void *)(uintptr_t)j; 3290 3291 for (j = 0; j < TEST_REPETITIONS; ++j) { 3292 for (i = 0; i < num_to_process; ++i) 3293 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 3294 3295 tp->start_time = rte_rdtsc_precise(); 3296 for (enqueued = 0; enqueued < num_to_process;) { 3297 num_to_enq = burst_sz; 3298 3299 if (unlikely(num_to_process - enqueued < num_to_enq)) 3300 num_to_enq = num_to_process - enqueued; 3301 3302 enq = 0; 3303 do { 3304 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3305 queue_id, &ops[enqueued], 3306 num_to_enq); 3307 } while (unlikely(enq != num_to_enq)); 3308 enqueued += enq; 3309 3310 /* Write to thread burst_sz current number of enqueued 3311 * descriptors. It ensures that proper number of 3312 * descriptors will be dequeued in callback 3313 * function - needed for last batch in case where 3314 * the number of operations is not a multiple of 3315 * burst size. 3316 */ 3317 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3318 3319 /* Wait until processing of previous batch is 3320 * completed 3321 */ 3322 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3323 } 3324 if (j != TEST_REPETITIONS - 1) 3325 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3326 } 3327 3328 return TEST_SUCCESS; 3329 } 3330 3331 3332 static int 3333 throughput_intr_lcore_ldpc_enc(void *arg) 3334 { 3335 struct thread_params *tp = arg; 3336 unsigned int enqueued; 3337 const uint16_t queue_id = tp->queue_id; 3338 const uint16_t burst_sz = tp->op_params->burst_sz; 3339 const uint16_t num_to_process = tp->op_params->num_to_process; 3340 struct rte_bbdev_enc_op *ops[num_to_process]; 3341 struct test_buffers *bufs = NULL; 3342 struct rte_bbdev_info info; 3343 int ret, i, j; 3344 uint16_t num_to_enq, enq; 3345 3346 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3347 "BURST_SIZE should be <= %u", MAX_BURST); 3348 3349 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3350 "Failed to enable interrupts for dev: %u, queue_id: %u", 3351 tp->dev_id, queue_id); 3352 3353 rte_bbdev_info_get(tp->dev_id, &info); 3354 3355 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3356 "NUM_OPS cannot exceed %u for this device", 3357 info.drv.queue_size_lim); 3358 3359 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3360 3361 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3362 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3363 3364 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3365 3366 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops, 3367 num_to_process); 3368 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3369 num_to_process); 3370 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3371 copy_reference_ldpc_enc_op(ops, num_to_process, 0, 3372 bufs->inputs, bufs->hard_outputs, 3373 tp->op_params->ref_enc_op); 3374 3375 /* Set counter to validate the ordering */ 3376 for (j = 0; j < num_to_process; ++j) 3377 ops[j]->opaque_data = (void *)(uintptr_t)j; 3378 3379 for (j = 0; j < TEST_REPETITIONS; ++j) { 3380 for (i = 0; i < num_to_process; ++i) 3381 rte_pktmbuf_reset(ops[i]->turbo_enc.output.data); 3382 3383 tp->start_time = rte_rdtsc_precise(); 3384 for (enqueued = 0; enqueued < num_to_process;) { 3385 num_to_enq = burst_sz; 3386 3387 if (unlikely(num_to_process - enqueued < num_to_enq)) 3388 num_to_enq = num_to_process - enqueued; 3389 3390 enq = 0; 3391 do { 3392 enq += rte_bbdev_enqueue_ldpc_enc_ops( 3393 tp->dev_id, 3394 queue_id, &ops[enqueued], 3395 num_to_enq); 3396 } while (unlikely(enq != num_to_enq)); 3397 enqueued += enq; 3398 3399 /* Write to thread burst_sz current number of enqueued 3400 * descriptors. It ensures that proper number of 3401 * descriptors will be dequeued in callback 3402 * function - needed for last batch in case where 3403 * the number of operations is not a multiple of 3404 * burst size. 3405 */ 3406 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3407 3408 /* Wait until processing of previous batch is 3409 * completed 3410 */ 3411 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3412 } 3413 if (j != TEST_REPETITIONS - 1) 3414 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3415 } 3416 3417 return TEST_SUCCESS; 3418 } 3419 3420 3421 static int 3422 throughput_intr_lcore_fft(void *arg) 3423 { 3424 struct thread_params *tp = arg; 3425 unsigned int enqueued; 3426 const uint16_t queue_id = tp->queue_id; 3427 const uint16_t burst_sz = tp->op_params->burst_sz; 3428 const uint16_t num_to_process = tp->op_params->num_to_process; 3429 struct rte_bbdev_fft_op *ops[num_to_process]; 3430 struct test_buffers *bufs = NULL; 3431 struct rte_bbdev_info info; 3432 int ret, i, j; 3433 uint16_t num_to_enq, enq; 3434 3435 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3436 "BURST_SIZE should be <= %u", MAX_BURST); 3437 3438 TEST_ASSERT_SUCCESS(rte_bbdev_queue_intr_enable(tp->dev_id, queue_id), 3439 "Failed to enable interrupts for dev: %u, queue_id: %u", 3440 tp->dev_id, queue_id); 3441 3442 rte_bbdev_info_get(tp->dev_id, &info); 3443 3444 TEST_ASSERT_SUCCESS((num_to_process > info.drv.queue_size_lim), 3445 "NUM_OPS cannot exceed %u for this device", 3446 info.drv.queue_size_lim); 3447 3448 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3449 3450 __atomic_store_n(&tp->processing_status, 0, __ATOMIC_RELAXED); 3451 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3452 3453 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3454 3455 ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops, 3456 num_to_process); 3457 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3458 num_to_process); 3459 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3460 copy_reference_fft_op(ops, num_to_process, 0, bufs->inputs, 3461 bufs->hard_outputs, bufs->soft_outputs, tp->op_params->ref_fft_op); 3462 3463 /* Set counter to validate the ordering */ 3464 for (j = 0; j < num_to_process; ++j) 3465 ops[j]->opaque_data = (void *)(uintptr_t)j; 3466 3467 for (j = 0; j < TEST_REPETITIONS; ++j) { 3468 for (i = 0; i < num_to_process; ++i) 3469 rte_pktmbuf_reset(ops[i]->fft.base_output.data); 3470 3471 tp->start_time = rte_rdtsc_precise(); 3472 for (enqueued = 0; enqueued < num_to_process;) { 3473 num_to_enq = burst_sz; 3474 3475 if (unlikely(num_to_process - enqueued < num_to_enq)) 3476 num_to_enq = num_to_process - enqueued; 3477 3478 enq = 0; 3479 do { 3480 enq += rte_bbdev_enqueue_fft_ops(tp->dev_id, 3481 queue_id, &ops[enqueued], 3482 num_to_enq); 3483 } while (unlikely(enq != num_to_enq)); 3484 enqueued += enq; 3485 3486 /* Write to thread burst_sz current number of enqueued 3487 * descriptors. It ensures that proper number of 3488 * descriptors will be dequeued in callback 3489 * function - needed for last batch in case where 3490 * the number of operations is not a multiple of 3491 * burst size. 3492 */ 3493 __atomic_store_n(&tp->burst_sz, num_to_enq, __ATOMIC_RELAXED); 3494 3495 /* Wait until processing of previous batch is 3496 * completed 3497 */ 3498 rte_wait_until_equal_16(&tp->nb_dequeued, enqueued, __ATOMIC_RELAXED); 3499 } 3500 if (j != TEST_REPETITIONS - 1) 3501 __atomic_store_n(&tp->nb_dequeued, 0, __ATOMIC_RELAXED); 3502 } 3503 3504 return TEST_SUCCESS; 3505 } 3506 3507 static int 3508 throughput_pmd_lcore_dec(void *arg) 3509 { 3510 struct thread_params *tp = arg; 3511 uint16_t enq, deq; 3512 uint64_t total_time = 0, start_time; 3513 const uint16_t queue_id = tp->queue_id; 3514 const uint16_t burst_sz = tp->op_params->burst_sz; 3515 const uint16_t num_ops = tp->op_params->num_to_process; 3516 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3517 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3518 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3519 struct test_buffers *bufs = NULL; 3520 int i, j, ret; 3521 struct rte_bbdev_info info; 3522 uint16_t num_to_enq; 3523 bool so_enable; 3524 3525 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3526 "BURST_SIZE should be <= %u", MAX_BURST); 3527 3528 rte_bbdev_info_get(tp->dev_id, &info); 3529 3530 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3531 "NUM_OPS cannot exceed %u for this device", 3532 info.drv.queue_size_lim); 3533 3534 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3535 3536 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3537 3538 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3539 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3540 ref_op->turbo_dec.iter_max = get_iter_max(); 3541 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3542 copy_reference_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3543 bufs->hard_outputs, bufs->soft_outputs, ref_op); 3544 3545 so_enable = check_bit(ops_enq[0]->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT); 3546 3547 /* Set counter to validate the ordering */ 3548 for (j = 0; j < num_ops; ++j) 3549 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3550 3551 for (i = 0; i < TEST_REPETITIONS; ++i) { 3552 3553 for (j = 0; j < num_ops; ++j) 3554 mbuf_reset(ops_enq[j]->turbo_dec.hard_output.data); 3555 if (so_enable) 3556 for (j = 0; j < num_ops; ++j) 3557 mbuf_reset(ops_enq[j]->turbo_dec.soft_output.data); 3558 3559 start_time = rte_rdtsc_precise(); 3560 3561 for (enq = 0, deq = 0; enq < num_ops;) { 3562 num_to_enq = burst_sz; 3563 3564 if (unlikely(num_ops - enq < num_to_enq)) 3565 num_to_enq = num_ops - enq; 3566 3567 enq += rte_bbdev_enqueue_dec_ops(tp->dev_id, 3568 queue_id, &ops_enq[enq], num_to_enq); 3569 3570 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3571 queue_id, &ops_deq[deq], enq - deq); 3572 } 3573 3574 /* dequeue the remaining */ 3575 while (deq < enq) { 3576 deq += rte_bbdev_dequeue_dec_ops(tp->dev_id, 3577 queue_id, &ops_deq[deq], enq - deq); 3578 } 3579 3580 total_time += rte_rdtsc_precise() - start_time; 3581 } 3582 3583 tp->iter_count = 0; 3584 /* get the max of iter_count for all dequeued ops */ 3585 for (i = 0; i < num_ops; ++i) { 3586 tp->iter_count = RTE_MAX(ops_enq[i]->turbo_dec.iter_count, 3587 tp->iter_count); 3588 } 3589 3590 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3591 ret = validate_dec_op(ops_deq, num_ops, ref_op, 3592 tp->op_params->vector_mask); 3593 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3594 } 3595 3596 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3597 3598 double tb_len_bits = calc_dec_TB_size(ref_op); 3599 3600 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3601 ((double)total_time / (double)rte_get_tsc_hz()); 3602 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3603 1000000.0) / ((double)total_time / 3604 (double)rte_get_tsc_hz()); 3605 3606 return TEST_SUCCESS; 3607 } 3608 3609 static int 3610 bler_pmd_lcore_ldpc_dec(void *arg) 3611 { 3612 struct thread_params *tp = arg; 3613 uint16_t enq, deq; 3614 uint64_t total_time = 0, start_time; 3615 const uint16_t queue_id = tp->queue_id; 3616 const uint16_t burst_sz = tp->op_params->burst_sz; 3617 const uint16_t num_ops = tp->op_params->num_to_process; 3618 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3619 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3620 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3621 struct test_buffers *bufs = NULL; 3622 int i, j, ret; 3623 float parity_bler = 0; 3624 struct rte_bbdev_info info; 3625 uint16_t num_to_enq; 3626 bool extDdr = check_bit(ldpc_cap_flags, 3627 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3628 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3629 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3630 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3631 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3632 3633 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3634 "BURST_SIZE should be <= %u", MAX_BURST); 3635 3636 rte_bbdev_info_get(tp->dev_id, &info); 3637 3638 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3639 "NUM_OPS cannot exceed %u for this device", 3640 info.drv.queue_size_lim); 3641 3642 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3643 3644 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3645 3646 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3647 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3648 3649 /* For BLER tests we need to enable early termination */ 3650 if (!check_bit(ref_op->ldpc_dec.op_flags, 3651 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3652 ref_op->ldpc_dec.op_flags += 3653 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3654 ref_op->ldpc_dec.iter_max = get_iter_max(); 3655 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3656 3657 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3658 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3659 bufs->hard_outputs, bufs->soft_outputs, 3660 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3661 generate_llr_input(num_ops, bufs->inputs, ref_op); 3662 3663 /* Set counter to validate the ordering */ 3664 for (j = 0; j < num_ops; ++j) 3665 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3666 3667 for (i = 0; i < 1; ++i) { /* Could add more iterations */ 3668 for (j = 0; j < num_ops; ++j) { 3669 if (!loopback) 3670 mbuf_reset( 3671 ops_enq[j]->ldpc_dec.hard_output.data); 3672 if (hc_out || loopback) 3673 mbuf_reset(ops_enq[j]->ldpc_dec.harq_combined_output.data); 3674 } 3675 if (extDdr) 3676 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3677 num_ops, true); 3678 start_time = rte_rdtsc_precise(); 3679 3680 for (enq = 0, deq = 0; enq < num_ops;) { 3681 num_to_enq = burst_sz; 3682 3683 if (unlikely(num_ops - enq < num_to_enq)) 3684 num_to_enq = num_ops - enq; 3685 3686 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3687 queue_id, &ops_enq[enq], num_to_enq); 3688 3689 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3690 queue_id, &ops_deq[deq], enq - deq); 3691 } 3692 3693 /* dequeue the remaining */ 3694 while (deq < enq) { 3695 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3696 queue_id, &ops_deq[deq], enq - deq); 3697 } 3698 3699 total_time += rte_rdtsc_precise() - start_time; 3700 } 3701 3702 tp->iter_count = 0; 3703 tp->iter_average = 0; 3704 /* get the max of iter_count for all dequeued ops */ 3705 for (i = 0; i < num_ops; ++i) { 3706 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3707 tp->iter_count); 3708 tp->iter_average += (double) ops_enq[i]->ldpc_dec.iter_count; 3709 if (ops_enq[i]->status & (1 << RTE_BBDEV_SYNDROME_ERROR)) 3710 parity_bler += 1.0; 3711 } 3712 3713 parity_bler /= num_ops; /* This one is based on SYND */ 3714 tp->iter_average /= num_ops; 3715 tp->bler = (double) validate_ldpc_bler(ops_deq, num_ops) / num_ops; 3716 3717 if (test_vector.op_type != RTE_BBDEV_OP_NONE 3718 && tp->bler == 0 3719 && parity_bler == 0 3720 && !hc_out) { 3721 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3722 tp->op_params->vector_mask); 3723 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3724 } 3725 3726 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3727 3728 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3729 tp->ops_per_sec = ((double)num_ops * 1) / 3730 ((double)total_time / (double)rte_get_tsc_hz()); 3731 tp->mbps = (((double)(num_ops * 1 * tb_len_bits)) / 3732 1000000.0) / ((double)total_time / 3733 (double)rte_get_tsc_hz()); 3734 3735 return TEST_SUCCESS; 3736 } 3737 3738 static int 3739 throughput_pmd_lcore_ldpc_dec(void *arg) 3740 { 3741 struct thread_params *tp = arg; 3742 uint16_t enq, deq; 3743 uint64_t total_time = 0, start_time; 3744 const uint16_t queue_id = tp->queue_id; 3745 const uint16_t burst_sz = tp->op_params->burst_sz; 3746 const uint16_t num_ops = tp->op_params->num_to_process; 3747 struct rte_bbdev_dec_op *ops_enq[num_ops]; 3748 struct rte_bbdev_dec_op *ops_deq[num_ops]; 3749 struct rte_bbdev_dec_op *ref_op = tp->op_params->ref_dec_op; 3750 struct test_buffers *bufs = NULL; 3751 int i, j, ret; 3752 struct rte_bbdev_info info; 3753 uint16_t num_to_enq; 3754 bool extDdr = check_bit(ldpc_cap_flags, 3755 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE); 3756 bool loopback = check_bit(ref_op->ldpc_dec.op_flags, 3757 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK); 3758 bool hc_out = check_bit(ref_op->ldpc_dec.op_flags, 3759 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 3760 3761 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3762 "BURST_SIZE should be <= %u", MAX_BURST); 3763 3764 rte_bbdev_info_get(tp->dev_id, &info); 3765 3766 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3767 "NUM_OPS cannot exceed %u for this device", 3768 info.drv.queue_size_lim); 3769 3770 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3771 3772 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3773 3774 ret = rte_bbdev_dec_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 3775 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 3776 3777 /* For throughput tests we need to disable early termination */ 3778 if (check_bit(ref_op->ldpc_dec.op_flags, 3779 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 3780 ref_op->ldpc_dec.op_flags -= 3781 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 3782 ref_op->ldpc_dec.iter_max = get_iter_max(); 3783 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 3784 3785 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3786 copy_reference_ldpc_dec_op(ops_enq, num_ops, 0, bufs->inputs, 3787 bufs->hard_outputs, bufs->soft_outputs, 3788 bufs->harq_inputs, bufs->harq_outputs, ref_op); 3789 3790 /* Set counter to validate the ordering */ 3791 for (j = 0; j < num_ops; ++j) 3792 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3793 3794 for (i = 0; i < TEST_REPETITIONS; ++i) { 3795 for (j = 0; j < num_ops; ++j) { 3796 if (!loopback) 3797 mbuf_reset( 3798 ops_enq[j]->ldpc_dec.hard_output.data); 3799 if (hc_out || loopback) 3800 mbuf_reset( 3801 ops_enq[j]->ldpc_dec.harq_combined_output.data); 3802 } 3803 if (extDdr) 3804 preload_harq_ddr(tp->dev_id, queue_id, ops_enq, 3805 num_ops, true); 3806 start_time = rte_rdtsc_precise(); 3807 3808 for (enq = 0, deq = 0; enq < num_ops;) { 3809 num_to_enq = burst_sz; 3810 3811 if (unlikely(num_ops - enq < num_to_enq)) 3812 num_to_enq = num_ops - enq; 3813 3814 enq += rte_bbdev_enqueue_ldpc_dec_ops(tp->dev_id, 3815 queue_id, &ops_enq[enq], num_to_enq); 3816 3817 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3818 queue_id, &ops_deq[deq], enq - deq); 3819 } 3820 3821 /* dequeue the remaining */ 3822 while (deq < enq) { 3823 deq += rte_bbdev_dequeue_ldpc_dec_ops(tp->dev_id, 3824 queue_id, &ops_deq[deq], enq - deq); 3825 } 3826 3827 total_time += rte_rdtsc_precise() - start_time; 3828 } 3829 3830 tp->iter_count = 0; 3831 /* get the max of iter_count for all dequeued ops */ 3832 for (i = 0; i < num_ops; ++i) { 3833 tp->iter_count = RTE_MAX(ops_enq[i]->ldpc_dec.iter_count, 3834 tp->iter_count); 3835 } 3836 if (extDdr) { 3837 /* Read loopback is not thread safe */ 3838 retrieve_harq_ddr(tp->dev_id, queue_id, ops_enq, num_ops); 3839 } 3840 3841 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3842 ret = validate_ldpc_dec_op(ops_deq, num_ops, ref_op, 3843 tp->op_params->vector_mask); 3844 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3845 } 3846 3847 rte_bbdev_dec_op_free_bulk(ops_enq, num_ops); 3848 3849 double tb_len_bits = calc_ldpc_dec_TB_size(ref_op); 3850 3851 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3852 ((double)total_time / (double)rte_get_tsc_hz()); 3853 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 3854 1000000.0) / ((double)total_time / 3855 (double)rte_get_tsc_hz()); 3856 3857 return TEST_SUCCESS; 3858 } 3859 3860 static int 3861 throughput_pmd_lcore_enc(void *arg) 3862 { 3863 struct thread_params *tp = arg; 3864 uint16_t enq, deq; 3865 uint64_t total_time = 0, start_time; 3866 const uint16_t queue_id = tp->queue_id; 3867 const uint16_t burst_sz = tp->op_params->burst_sz; 3868 const uint16_t num_ops = tp->op_params->num_to_process; 3869 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3870 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3871 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3872 struct test_buffers *bufs = NULL; 3873 int i, j, ret; 3874 struct rte_bbdev_info info; 3875 uint16_t num_to_enq; 3876 3877 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3878 "BURST_SIZE should be <= %u", MAX_BURST); 3879 3880 rte_bbdev_info_get(tp->dev_id, &info); 3881 3882 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3883 "NUM_OPS cannot exceed %u for this device", 3884 info.drv.queue_size_lim); 3885 3886 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3887 3888 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3889 3890 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3891 num_ops); 3892 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3893 num_ops); 3894 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3895 copy_reference_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3896 bufs->hard_outputs, ref_op); 3897 3898 /* Set counter to validate the ordering */ 3899 for (j = 0; j < num_ops; ++j) 3900 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3901 3902 for (i = 0; i < TEST_REPETITIONS; ++i) { 3903 3904 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3905 for (j = 0; j < num_ops; ++j) 3906 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3907 3908 start_time = rte_rdtsc_precise(); 3909 3910 for (enq = 0, deq = 0; enq < num_ops;) { 3911 num_to_enq = burst_sz; 3912 3913 if (unlikely(num_ops - enq < num_to_enq)) 3914 num_to_enq = num_ops - enq; 3915 3916 enq += rte_bbdev_enqueue_enc_ops(tp->dev_id, 3917 queue_id, &ops_enq[enq], num_to_enq); 3918 3919 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3920 queue_id, &ops_deq[deq], enq - deq); 3921 } 3922 3923 /* dequeue the remaining */ 3924 while (deq < enq) { 3925 deq += rte_bbdev_dequeue_enc_ops(tp->dev_id, 3926 queue_id, &ops_deq[deq], enq - deq); 3927 } 3928 3929 total_time += rte_rdtsc_precise() - start_time; 3930 } 3931 3932 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 3933 ret = validate_enc_op(ops_deq, num_ops, ref_op); 3934 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 3935 } 3936 3937 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 3938 3939 double tb_len_bits = calc_enc_TB_size(ref_op); 3940 3941 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 3942 ((double)total_time / (double)rte_get_tsc_hz()); 3943 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 3944 / 1000000.0) / ((double)total_time / 3945 (double)rte_get_tsc_hz()); 3946 3947 return TEST_SUCCESS; 3948 } 3949 3950 static int 3951 throughput_pmd_lcore_ldpc_enc(void *arg) 3952 { 3953 struct thread_params *tp = arg; 3954 uint16_t enq, deq; 3955 uint64_t total_time = 0, start_time; 3956 const uint16_t queue_id = tp->queue_id; 3957 const uint16_t burst_sz = tp->op_params->burst_sz; 3958 const uint16_t num_ops = tp->op_params->num_to_process; 3959 struct rte_bbdev_enc_op *ops_enq[num_ops]; 3960 struct rte_bbdev_enc_op *ops_deq[num_ops]; 3961 struct rte_bbdev_enc_op *ref_op = tp->op_params->ref_enc_op; 3962 struct test_buffers *bufs = NULL; 3963 int i, j, ret; 3964 struct rte_bbdev_info info; 3965 uint16_t num_to_enq; 3966 3967 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 3968 "BURST_SIZE should be <= %u", MAX_BURST); 3969 3970 rte_bbdev_info_get(tp->dev_id, &info); 3971 3972 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 3973 "NUM_OPS cannot exceed %u for this device", 3974 info.drv.queue_size_lim); 3975 3976 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 3977 3978 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 3979 3980 ret = rte_bbdev_enc_op_alloc_bulk(tp->op_params->mp, ops_enq, 3981 num_ops); 3982 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", 3983 num_ops); 3984 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3985 copy_reference_ldpc_enc_op(ops_enq, num_ops, 0, bufs->inputs, 3986 bufs->hard_outputs, ref_op); 3987 3988 /* Set counter to validate the ordering */ 3989 for (j = 0; j < num_ops; ++j) 3990 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 3991 3992 for (i = 0; i < TEST_REPETITIONS; ++i) { 3993 3994 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 3995 for (j = 0; j < num_ops; ++j) 3996 mbuf_reset(ops_enq[j]->turbo_enc.output.data); 3997 3998 start_time = rte_rdtsc_precise(); 3999 4000 for (enq = 0, deq = 0; enq < num_ops;) { 4001 num_to_enq = burst_sz; 4002 4003 if (unlikely(num_ops - enq < num_to_enq)) 4004 num_to_enq = num_ops - enq; 4005 4006 enq += rte_bbdev_enqueue_ldpc_enc_ops(tp->dev_id, 4007 queue_id, &ops_enq[enq], num_to_enq); 4008 4009 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 4010 queue_id, &ops_deq[deq], enq - deq); 4011 } 4012 4013 /* dequeue the remaining */ 4014 while (deq < enq) { 4015 deq += rte_bbdev_dequeue_ldpc_enc_ops(tp->dev_id, 4016 queue_id, &ops_deq[deq], enq - deq); 4017 } 4018 4019 total_time += rte_rdtsc_precise() - start_time; 4020 } 4021 4022 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4023 ret = validate_ldpc_enc_op(ops_deq, num_ops, ref_op); 4024 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4025 } 4026 4027 rte_bbdev_enc_op_free_bulk(ops_enq, num_ops); 4028 4029 double tb_len_bits = calc_ldpc_enc_TB_size(ref_op); 4030 4031 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4032 ((double)total_time / (double)rte_get_tsc_hz()); 4033 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) 4034 / 1000000.0) / ((double)total_time / 4035 (double)rte_get_tsc_hz()); 4036 4037 return TEST_SUCCESS; 4038 } 4039 4040 static int 4041 throughput_pmd_lcore_fft(void *arg) 4042 { 4043 struct thread_params *tp = arg; 4044 uint16_t enq, deq; 4045 uint64_t total_time = 0, start_time; 4046 const uint16_t queue_id = tp->queue_id; 4047 const uint16_t burst_sz = tp->op_params->burst_sz; 4048 const uint16_t num_ops = tp->op_params->num_to_process; 4049 struct rte_bbdev_fft_op *ops_enq[num_ops]; 4050 struct rte_bbdev_fft_op *ops_deq[num_ops]; 4051 struct rte_bbdev_fft_op *ref_op = tp->op_params->ref_fft_op; 4052 struct test_buffers *bufs = NULL; 4053 int i, j, ret; 4054 struct rte_bbdev_info info; 4055 uint16_t num_to_enq; 4056 4057 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4058 "BURST_SIZE should be <= %u", MAX_BURST); 4059 4060 rte_bbdev_info_get(tp->dev_id, &info); 4061 4062 TEST_ASSERT_SUCCESS((num_ops > info.drv.queue_size_lim), 4063 "NUM_OPS cannot exceed %u for this device", 4064 info.drv.queue_size_lim); 4065 4066 bufs = &tp->op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4067 4068 rte_wait_until_equal_16(&tp->op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4069 4070 ret = rte_bbdev_fft_op_alloc_bulk(tp->op_params->mp, ops_enq, num_ops); 4071 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", num_ops); 4072 4073 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4074 copy_reference_fft_op(ops_enq, num_ops, 0, bufs->inputs, 4075 bufs->hard_outputs, bufs->soft_outputs, ref_op); 4076 4077 /* Set counter to validate the ordering */ 4078 for (j = 0; j < num_ops; ++j) 4079 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4080 4081 for (i = 0; i < TEST_REPETITIONS; ++i) { 4082 4083 for (j = 0; j < num_ops; ++j) 4084 mbuf_reset(ops_enq[j]->fft.base_output.data); 4085 4086 start_time = rte_rdtsc_precise(); 4087 4088 for (enq = 0, deq = 0; enq < num_ops;) { 4089 num_to_enq = burst_sz; 4090 4091 if (unlikely(num_ops - enq < num_to_enq)) 4092 num_to_enq = num_ops - enq; 4093 4094 enq += rte_bbdev_enqueue_fft_ops(tp->dev_id, 4095 queue_id, &ops_enq[enq], num_to_enq); 4096 4097 deq += rte_bbdev_dequeue_fft_ops(tp->dev_id, 4098 queue_id, &ops_deq[deq], enq - deq); 4099 } 4100 4101 /* dequeue the remaining */ 4102 while (deq < enq) { 4103 deq += rte_bbdev_dequeue_fft_ops(tp->dev_id, 4104 queue_id, &ops_deq[deq], enq - deq); 4105 } 4106 4107 total_time += rte_rdtsc_precise() - start_time; 4108 } 4109 4110 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4111 ret = validate_fft_op(ops_deq, num_ops, ref_op); 4112 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4113 } 4114 4115 rte_bbdev_fft_op_free_bulk(ops_enq, num_ops); 4116 4117 double tb_len_bits = calc_fft_size(ref_op); 4118 4119 tp->ops_per_sec = ((double)num_ops * TEST_REPETITIONS) / 4120 ((double)total_time / (double)rte_get_tsc_hz()); 4121 tp->mbps = (((double)(num_ops * TEST_REPETITIONS * tb_len_bits)) / 4122 1000000.0) / ((double)total_time / 4123 (double)rte_get_tsc_hz()); 4124 4125 return TEST_SUCCESS; 4126 } 4127 4128 static void 4129 print_enc_throughput(struct thread_params *t_params, unsigned int used_cores) 4130 { 4131 unsigned int iter = 0; 4132 double total_mops = 0, total_mbps = 0; 4133 4134 for (iter = 0; iter < used_cores; iter++) { 4135 printf( 4136 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps\n", 4137 t_params[iter].lcore_id, t_params[iter].ops_per_sec, 4138 t_params[iter].mbps); 4139 total_mops += t_params[iter].ops_per_sec; 4140 total_mbps += t_params[iter].mbps; 4141 } 4142 printf( 4143 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps\n", 4144 used_cores, total_mops, total_mbps); 4145 } 4146 4147 /* Aggregate the performance results over the number of cores used */ 4148 static void 4149 print_dec_throughput(struct thread_params *t_params, unsigned int used_cores) 4150 { 4151 unsigned int core_idx = 0; 4152 double total_mops = 0, total_mbps = 0; 4153 uint8_t iter_count = 0; 4154 4155 for (core_idx = 0; core_idx < used_cores; core_idx++) { 4156 printf( 4157 "Throughput for core (%u): %.8lg Ops/s, %.8lg Mbps @ max %u iterations\n", 4158 t_params[core_idx].lcore_id, 4159 t_params[core_idx].ops_per_sec, 4160 t_params[core_idx].mbps, 4161 t_params[core_idx].iter_count); 4162 total_mops += t_params[core_idx].ops_per_sec; 4163 total_mbps += t_params[core_idx].mbps; 4164 iter_count = RTE_MAX(iter_count, 4165 t_params[core_idx].iter_count); 4166 } 4167 printf( 4168 "\nTotal throughput for %u cores: %.8lg MOPS, %.8lg Mbps @ max %u iterations\n", 4169 used_cores, total_mops, total_mbps, iter_count); 4170 } 4171 4172 /* Aggregate the performance results over the number of cores used */ 4173 static void 4174 print_dec_bler(struct thread_params *t_params, unsigned int used_cores) 4175 { 4176 unsigned int core_idx = 0; 4177 double total_mbps = 0, total_bler = 0, total_iter = 0; 4178 double snr = get_snr(); 4179 4180 for (core_idx = 0; core_idx < used_cores; core_idx++) { 4181 printf("Core%u BLER %.1f %% - Iters %.1f - Tp %.1f Mbps %s\n", 4182 t_params[core_idx].lcore_id, 4183 t_params[core_idx].bler * 100, 4184 t_params[core_idx].iter_average, 4185 t_params[core_idx].mbps, 4186 get_vector_filename()); 4187 total_mbps += t_params[core_idx].mbps; 4188 total_bler += t_params[core_idx].bler; 4189 total_iter += t_params[core_idx].iter_average; 4190 } 4191 total_bler /= used_cores; 4192 total_iter /= used_cores; 4193 4194 printf("SNR %.2f BLER %.1f %% - Iterations %.1f %d - Tp %.1f Mbps %s\n", 4195 snr, total_bler * 100, total_iter, get_iter_max(), 4196 total_mbps, get_vector_filename()); 4197 } 4198 4199 /* 4200 * Test function that determines BLER wireless performance 4201 */ 4202 static int 4203 bler_test(struct active_device *ad, 4204 struct test_op_params *op_params) 4205 { 4206 int ret; 4207 unsigned int lcore_id, used_cores = 0; 4208 struct thread_params *t_params; 4209 struct rte_bbdev_info info; 4210 lcore_function_t *bler_function; 4211 uint16_t num_lcores; 4212 const char *op_type_str; 4213 4214 rte_bbdev_info_get(ad->dev_id, &info); 4215 4216 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 4217 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 4218 test_vector.op_type); 4219 4220 printf("+ ------------------------------------------------------- +\n"); 4221 printf("== test: bler\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 4222 info.dev_name, ad->nb_queues, op_params->burst_sz, 4223 op_params->num_to_process, op_params->num_lcores, 4224 op_type_str, 4225 intr_enabled ? "Interrupt mode" : "PMD mode", 4226 (double)rte_get_tsc_hz() / 1000000000.0); 4227 4228 /* Set number of lcores */ 4229 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 4230 ? ad->nb_queues 4231 : op_params->num_lcores; 4232 4233 /* Allocate memory for thread parameters structure */ 4234 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 4235 RTE_CACHE_LINE_SIZE); 4236 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 4237 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 4238 RTE_CACHE_LINE_SIZE)); 4239 4240 if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && 4241 !check_bit(test_vector.ldpc_dec.op_flags, 4242 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) 4243 && !check_bit(test_vector.ldpc_dec.op_flags, 4244 RTE_BBDEV_LDPC_LLR_COMPRESSION)) 4245 bler_function = bler_pmd_lcore_ldpc_dec; 4246 else 4247 return TEST_SKIPPED; 4248 4249 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 4250 4251 /* Main core is set at first entry */ 4252 t_params[0].dev_id = ad->dev_id; 4253 t_params[0].lcore_id = rte_lcore_id(); 4254 t_params[0].op_params = op_params; 4255 t_params[0].queue_id = ad->queue_ids[used_cores++]; 4256 t_params[0].iter_count = 0; 4257 4258 RTE_LCORE_FOREACH_WORKER(lcore_id) { 4259 if (used_cores >= num_lcores) 4260 break; 4261 4262 t_params[used_cores].dev_id = ad->dev_id; 4263 t_params[used_cores].lcore_id = lcore_id; 4264 t_params[used_cores].op_params = op_params; 4265 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 4266 t_params[used_cores].iter_count = 0; 4267 4268 rte_eal_remote_launch(bler_function, 4269 &t_params[used_cores++], lcore_id); 4270 } 4271 4272 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4273 ret = bler_function(&t_params[0]); 4274 4275 /* Main core is always used */ 4276 for (used_cores = 1; used_cores < num_lcores; used_cores++) 4277 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 4278 4279 print_dec_bler(t_params, num_lcores); 4280 4281 /* Return if test failed */ 4282 if (ret) { 4283 rte_free(t_params); 4284 return ret; 4285 } 4286 4287 /* Function to print something here*/ 4288 rte_free(t_params); 4289 return ret; 4290 } 4291 4292 /* 4293 * Test function that determines how long an enqueue + dequeue of a burst 4294 * takes on available lcores. 4295 */ 4296 static int 4297 throughput_test(struct active_device *ad, 4298 struct test_op_params *op_params) 4299 { 4300 int ret; 4301 unsigned int lcore_id, used_cores = 0; 4302 struct thread_params *t_params, *tp; 4303 struct rte_bbdev_info info; 4304 lcore_function_t *throughput_function; 4305 uint16_t num_lcores; 4306 const char *op_type_str; 4307 4308 rte_bbdev_info_get(ad->dev_id, &info); 4309 4310 op_type_str = rte_bbdev_op_type_str(test_vector.op_type); 4311 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", 4312 test_vector.op_type); 4313 4314 printf("+ ------------------------------------------------------- +\n"); 4315 printf("== test: throughput\ndev: %s, nb_queues: %u, burst size: %u, num ops: %u, num_lcores: %u, op type: %s, itr mode: %s, GHz: %lg\n", 4316 info.dev_name, ad->nb_queues, op_params->burst_sz, 4317 op_params->num_to_process, op_params->num_lcores, 4318 op_type_str, 4319 intr_enabled ? "Interrupt mode" : "PMD mode", 4320 (double)rte_get_tsc_hz() / 1000000000.0); 4321 4322 /* Set number of lcores */ 4323 num_lcores = (ad->nb_queues < (op_params->num_lcores)) 4324 ? ad->nb_queues 4325 : op_params->num_lcores; 4326 4327 /* Allocate memory for thread parameters structure */ 4328 t_params = rte_zmalloc(NULL, num_lcores * sizeof(struct thread_params), 4329 RTE_CACHE_LINE_SIZE); 4330 TEST_ASSERT_NOT_NULL(t_params, "Failed to alloc %zuB for t_params", 4331 RTE_ALIGN(sizeof(struct thread_params) * num_lcores, 4332 RTE_CACHE_LINE_SIZE)); 4333 4334 if (intr_enabled) { 4335 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 4336 throughput_function = throughput_intr_lcore_dec; 4337 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4338 throughput_function = throughput_intr_lcore_ldpc_dec; 4339 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 4340 throughput_function = throughput_intr_lcore_enc; 4341 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4342 throughput_function = throughput_intr_lcore_ldpc_enc; 4343 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 4344 throughput_function = throughput_intr_lcore_fft; 4345 else 4346 throughput_function = throughput_intr_lcore_enc; 4347 4348 /* Dequeue interrupt callback registration */ 4349 ret = rte_bbdev_callback_register(ad->dev_id, 4350 RTE_BBDEV_EVENT_DEQUEUE, dequeue_event_callback, 4351 t_params); 4352 if (ret < 0) { 4353 rte_free(t_params); 4354 return ret; 4355 } 4356 } else { 4357 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC) 4358 throughput_function = throughput_pmd_lcore_dec; 4359 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4360 throughput_function = throughput_pmd_lcore_ldpc_dec; 4361 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC) 4362 throughput_function = throughput_pmd_lcore_enc; 4363 else if (test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4364 throughput_function = throughput_pmd_lcore_ldpc_enc; 4365 else if (test_vector.op_type == RTE_BBDEV_OP_FFT) 4366 throughput_function = throughput_pmd_lcore_fft; 4367 else 4368 throughput_function = throughput_pmd_lcore_enc; 4369 } 4370 4371 __atomic_store_n(&op_params->sync, SYNC_WAIT, __ATOMIC_RELAXED); 4372 4373 /* Main core is set at first entry */ 4374 t_params[0].dev_id = ad->dev_id; 4375 t_params[0].lcore_id = rte_lcore_id(); 4376 t_params[0].op_params = op_params; 4377 t_params[0].queue_id = ad->queue_ids[used_cores++]; 4378 t_params[0].iter_count = 0; 4379 4380 RTE_LCORE_FOREACH_WORKER(lcore_id) { 4381 if (used_cores >= num_lcores) 4382 break; 4383 4384 t_params[used_cores].dev_id = ad->dev_id; 4385 t_params[used_cores].lcore_id = lcore_id; 4386 t_params[used_cores].op_params = op_params; 4387 t_params[used_cores].queue_id = ad->queue_ids[used_cores]; 4388 t_params[used_cores].iter_count = 0; 4389 4390 rte_eal_remote_launch(throughput_function, 4391 &t_params[used_cores++], lcore_id); 4392 } 4393 4394 __atomic_store_n(&op_params->sync, SYNC_START, __ATOMIC_RELAXED); 4395 ret = throughput_function(&t_params[0]); 4396 4397 /* Main core is always used */ 4398 for (used_cores = 1; used_cores < num_lcores; used_cores++) 4399 ret |= rte_eal_wait_lcore(t_params[used_cores].lcore_id); 4400 4401 /* Return if test failed */ 4402 if (ret) { 4403 rte_free(t_params); 4404 return ret; 4405 } 4406 4407 /* Print throughput if interrupts are disabled and test passed */ 4408 if (!intr_enabled) { 4409 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 4410 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4411 print_dec_throughput(t_params, num_lcores); 4412 else 4413 print_enc_throughput(t_params, num_lcores); 4414 rte_free(t_params); 4415 return ret; 4416 } 4417 4418 /* In interrupt TC we need to wait for the interrupt callback to deqeue 4419 * all pending operations. Skip waiting for queues which reported an 4420 * error using processing_status variable. 4421 * Wait for main lcore operations. 4422 */ 4423 tp = &t_params[0]; 4424 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 4425 op_params->num_to_process) && 4426 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 4427 TEST_FAILED)) 4428 rte_pause(); 4429 4430 tp->ops_per_sec /= TEST_REPETITIONS; 4431 tp->mbps /= TEST_REPETITIONS; 4432 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 4433 4434 /* Wait for worker lcores operations */ 4435 for (used_cores = 1; used_cores < num_lcores; used_cores++) { 4436 tp = &t_params[used_cores]; 4437 4438 while ((__atomic_load_n(&tp->nb_dequeued, __ATOMIC_RELAXED) < 4439 op_params->num_to_process) && 4440 (__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED) != 4441 TEST_FAILED)) 4442 rte_pause(); 4443 4444 tp->ops_per_sec /= TEST_REPETITIONS; 4445 tp->mbps /= TEST_REPETITIONS; 4446 ret |= (int)__atomic_load_n(&tp->processing_status, __ATOMIC_RELAXED); 4447 } 4448 4449 /* Print throughput if test passed */ 4450 if (!ret) { 4451 if (test_vector.op_type == RTE_BBDEV_OP_TURBO_DEC || 4452 test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) 4453 print_dec_throughput(t_params, num_lcores); 4454 else if (test_vector.op_type == RTE_BBDEV_OP_TURBO_ENC || 4455 test_vector.op_type == RTE_BBDEV_OP_LDPC_ENC) 4456 print_enc_throughput(t_params, num_lcores); 4457 } 4458 4459 rte_free(t_params); 4460 return ret; 4461 } 4462 4463 static int 4464 latency_test_dec(struct rte_mempool *mempool, 4465 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4466 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4467 const uint16_t num_to_process, uint16_t burst_sz, 4468 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4469 { 4470 int ret = TEST_SUCCESS; 4471 uint16_t i, j, dequeued; 4472 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4473 uint64_t start_time = 0, last_time = 0; 4474 4475 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4476 uint16_t enq = 0, deq = 0; 4477 bool first_time = true; 4478 last_time = 0; 4479 4480 if (unlikely(num_to_process - dequeued < burst_sz)) 4481 burst_sz = num_to_process - dequeued; 4482 4483 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4484 TEST_ASSERT_SUCCESS(ret, 4485 "rte_bbdev_dec_op_alloc_bulk() failed"); 4486 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4487 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 4488 bufs->inputs, 4489 bufs->hard_outputs, 4490 bufs->soft_outputs, 4491 ref_op); 4492 4493 /* Set counter to validate the ordering */ 4494 for (j = 0; j < burst_sz; ++j) 4495 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4496 4497 start_time = rte_rdtsc_precise(); 4498 4499 enq = rte_bbdev_enqueue_dec_ops(dev_id, queue_id, &ops_enq[enq], 4500 burst_sz); 4501 TEST_ASSERT(enq == burst_sz, 4502 "Error enqueueing burst, expected %u, got %u", 4503 burst_sz, enq); 4504 4505 /* Dequeue */ 4506 do { 4507 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 4508 &ops_deq[deq], burst_sz - deq); 4509 if (likely(first_time && (deq > 0))) { 4510 last_time = rte_rdtsc_precise() - start_time; 4511 first_time = false; 4512 } 4513 } while (unlikely(burst_sz != deq)); 4514 4515 *max_time = RTE_MAX(*max_time, last_time); 4516 *min_time = RTE_MIN(*min_time, last_time); 4517 *total_time += last_time; 4518 4519 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4520 ret = validate_dec_op(ops_deq, burst_sz, ref_op, 4521 vector_mask); 4522 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4523 } 4524 4525 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4526 dequeued += deq; 4527 } 4528 4529 return i; 4530 } 4531 4532 /* Test case for latency/validation for LDPC Decoder */ 4533 static int 4534 latency_test_ldpc_dec(struct rte_mempool *mempool, 4535 struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, 4536 int vector_mask, uint16_t dev_id, uint16_t queue_id, 4537 const uint16_t num_to_process, uint16_t burst_sz, 4538 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, 4539 bool disable_et) 4540 { 4541 int ret = TEST_SUCCESS; 4542 uint16_t i, j, dequeued; 4543 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4544 uint64_t start_time = 0, last_time = 0; 4545 bool extDdr = ldpc_cap_flags & 4546 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 4547 4548 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4549 uint16_t enq = 0, deq = 0; 4550 bool first_time = true; 4551 last_time = 0; 4552 4553 if (unlikely(num_to_process - dequeued < burst_sz)) 4554 burst_sz = num_to_process - dequeued; 4555 4556 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 4557 TEST_ASSERT_SUCCESS(ret, 4558 "rte_bbdev_dec_op_alloc_bulk() failed"); 4559 4560 /* For latency tests we need to disable early termination */ 4561 if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, 4562 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) 4563 ref_op->ldpc_dec.op_flags -= 4564 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; 4565 ref_op->ldpc_dec.iter_max = get_iter_max(); 4566 ref_op->ldpc_dec.iter_count = ref_op->ldpc_dec.iter_max; 4567 4568 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4569 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 4570 bufs->inputs, 4571 bufs->hard_outputs, 4572 bufs->soft_outputs, 4573 bufs->harq_inputs, 4574 bufs->harq_outputs, 4575 ref_op); 4576 4577 if (extDdr) 4578 preload_harq_ddr(dev_id, queue_id, ops_enq, 4579 burst_sz, true); 4580 4581 /* Set counter to validate the ordering */ 4582 for (j = 0; j < burst_sz; ++j) 4583 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4584 4585 start_time = rte_rdtsc_precise(); 4586 4587 enq = rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 4588 &ops_enq[enq], burst_sz); 4589 TEST_ASSERT(enq == burst_sz, 4590 "Error enqueueing burst, expected %u, got %u", 4591 burst_sz, enq); 4592 4593 /* Dequeue */ 4594 do { 4595 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 4596 &ops_deq[deq], burst_sz - deq); 4597 if (likely(first_time && (deq > 0))) { 4598 last_time = rte_rdtsc_precise() - start_time; 4599 first_time = false; 4600 } 4601 } while (unlikely(burst_sz != deq)); 4602 4603 *max_time = RTE_MAX(*max_time, last_time); 4604 *min_time = RTE_MIN(*min_time, last_time); 4605 *total_time += last_time; 4606 4607 if (extDdr) 4608 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 4609 4610 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4611 ret = validate_ldpc_dec_op(ops_deq, burst_sz, ref_op, 4612 vector_mask); 4613 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4614 } 4615 4616 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 4617 dequeued += deq; 4618 } 4619 return i; 4620 } 4621 4622 static int 4623 latency_test_enc(struct rte_mempool *mempool, 4624 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4625 uint16_t dev_id, uint16_t queue_id, 4626 const uint16_t num_to_process, uint16_t burst_sz, 4627 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4628 { 4629 int ret = TEST_SUCCESS; 4630 uint16_t i, j, dequeued; 4631 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4632 uint64_t start_time = 0, last_time = 0; 4633 4634 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4635 uint16_t enq = 0, deq = 0; 4636 bool first_time = true; 4637 last_time = 0; 4638 4639 if (unlikely(num_to_process - dequeued < burst_sz)) 4640 burst_sz = num_to_process - dequeued; 4641 4642 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4643 TEST_ASSERT_SUCCESS(ret, 4644 "rte_bbdev_enc_op_alloc_bulk() failed"); 4645 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4646 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 4647 bufs->inputs, 4648 bufs->hard_outputs, 4649 ref_op); 4650 4651 /* Set counter to validate the ordering */ 4652 for (j = 0; j < burst_sz; ++j) 4653 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4654 4655 start_time = rte_rdtsc_precise(); 4656 4657 enq = rte_bbdev_enqueue_enc_ops(dev_id, queue_id, &ops_enq[enq], 4658 burst_sz); 4659 TEST_ASSERT(enq == burst_sz, 4660 "Error enqueueing burst, expected %u, got %u", 4661 burst_sz, enq); 4662 4663 /* Dequeue */ 4664 do { 4665 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 4666 &ops_deq[deq], burst_sz - deq); 4667 if (likely(first_time && (deq > 0))) { 4668 last_time += rte_rdtsc_precise() - start_time; 4669 first_time = false; 4670 } 4671 } while (unlikely(burst_sz != deq)); 4672 4673 *max_time = RTE_MAX(*max_time, last_time); 4674 *min_time = RTE_MIN(*min_time, last_time); 4675 *total_time += last_time; 4676 4677 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4678 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4679 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4680 } 4681 4682 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4683 dequeued += deq; 4684 } 4685 4686 return i; 4687 } 4688 4689 static int 4690 latency_test_ldpc_enc(struct rte_mempool *mempool, 4691 struct test_buffers *bufs, struct rte_bbdev_enc_op *ref_op, 4692 uint16_t dev_id, uint16_t queue_id, 4693 const uint16_t num_to_process, uint16_t burst_sz, 4694 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4695 { 4696 int ret = TEST_SUCCESS; 4697 uint16_t i, j, dequeued; 4698 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4699 uint64_t start_time = 0, last_time = 0; 4700 4701 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4702 uint16_t enq = 0, deq = 0; 4703 bool first_time = true; 4704 last_time = 0; 4705 4706 if (unlikely(num_to_process - dequeued < burst_sz)) 4707 burst_sz = num_to_process - dequeued; 4708 4709 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 4710 TEST_ASSERT_SUCCESS(ret, 4711 "rte_bbdev_enc_op_alloc_bulk() failed"); 4712 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4713 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 4714 bufs->inputs, 4715 bufs->hard_outputs, 4716 ref_op); 4717 4718 /* Set counter to validate the ordering */ 4719 for (j = 0; j < burst_sz; ++j) 4720 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4721 4722 start_time = rte_rdtsc_precise(); 4723 4724 enq = rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 4725 &ops_enq[enq], burst_sz); 4726 TEST_ASSERT(enq == burst_sz, 4727 "Error enqueueing burst, expected %u, got %u", 4728 burst_sz, enq); 4729 4730 /* Dequeue */ 4731 do { 4732 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 4733 &ops_deq[deq], burst_sz - deq); 4734 if (likely(first_time && (deq > 0))) { 4735 last_time += rte_rdtsc_precise() - start_time; 4736 first_time = false; 4737 } 4738 } while (unlikely(burst_sz != deq)); 4739 4740 *max_time = RTE_MAX(*max_time, last_time); 4741 *min_time = RTE_MIN(*min_time, last_time); 4742 *total_time += last_time; 4743 4744 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4745 ret = validate_enc_op(ops_deq, burst_sz, ref_op); 4746 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4747 } 4748 4749 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 4750 dequeued += deq; 4751 } 4752 4753 return i; 4754 } 4755 4756 4757 static int 4758 latency_test_fft(struct rte_mempool *mempool, 4759 struct test_buffers *bufs, struct rte_bbdev_fft_op *ref_op, 4760 uint16_t dev_id, uint16_t queue_id, 4761 const uint16_t num_to_process, uint16_t burst_sz, 4762 uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) 4763 { 4764 int ret = TEST_SUCCESS; 4765 uint16_t i, j, dequeued; 4766 struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4767 uint64_t start_time = 0, last_time = 0; 4768 4769 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4770 uint16_t enq = 0, deq = 0; 4771 bool first_time = true; 4772 last_time = 0; 4773 4774 if (unlikely(num_to_process - dequeued < burst_sz)) 4775 burst_sz = num_to_process - dequeued; 4776 4777 ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz); 4778 TEST_ASSERT_SUCCESS(ret, 4779 "rte_bbdev_fft_op_alloc_bulk() failed"); 4780 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4781 copy_reference_fft_op(ops_enq, burst_sz, dequeued, 4782 bufs->inputs, 4783 bufs->hard_outputs, bufs->soft_outputs, 4784 ref_op); 4785 4786 /* Set counter to validate the ordering */ 4787 for (j = 0; j < burst_sz; ++j) 4788 ops_enq[j]->opaque_data = (void *)(uintptr_t)j; 4789 4790 start_time = rte_rdtsc_precise(); 4791 4792 enq = rte_bbdev_enqueue_fft_ops(dev_id, queue_id, 4793 &ops_enq[enq], burst_sz); 4794 TEST_ASSERT(enq == burst_sz, 4795 "Error enqueueing burst, expected %u, got %u", 4796 burst_sz, enq); 4797 4798 /* Dequeue */ 4799 do { 4800 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 4801 &ops_deq[deq], burst_sz - deq); 4802 if (likely(first_time && (deq > 0))) { 4803 last_time += rte_rdtsc_precise() - start_time; 4804 first_time = false; 4805 } 4806 } while (unlikely(burst_sz != deq)); 4807 4808 *max_time = RTE_MAX(*max_time, last_time); 4809 *min_time = RTE_MIN(*min_time, last_time); 4810 *total_time += last_time; 4811 4812 if (test_vector.op_type != RTE_BBDEV_OP_NONE) { 4813 ret = validate_fft_op(ops_deq, burst_sz, ref_op); 4814 TEST_ASSERT_SUCCESS(ret, "Validation failed!"); 4815 } 4816 4817 rte_bbdev_fft_op_free_bulk(ops_enq, deq); 4818 dequeued += deq; 4819 } 4820 4821 return i; 4822 } 4823 4824 /* Common function for running validation and latency test cases */ 4825 static int 4826 validation_latency_test(struct active_device *ad, 4827 struct test_op_params *op_params, bool latency_flag) 4828 { 4829 int iter; 4830 uint16_t burst_sz = op_params->burst_sz; 4831 const uint16_t num_to_process = op_params->num_to_process; 4832 const enum rte_bbdev_op_type op_type = test_vector.op_type; 4833 const uint16_t queue_id = ad->queue_ids[0]; 4834 struct test_buffers *bufs = NULL; 4835 struct rte_bbdev_info info; 4836 uint64_t total_time, min_time, max_time; 4837 const char *op_type_str; 4838 4839 total_time = max_time = 0; 4840 min_time = UINT64_MAX; 4841 4842 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 4843 "BURST_SIZE should be <= %u", MAX_BURST); 4844 4845 rte_bbdev_info_get(ad->dev_id, &info); 4846 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 4847 4848 op_type_str = rte_bbdev_op_type_str(op_type); 4849 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 4850 4851 printf("+ ------------------------------------------------------- +\n"); 4852 if (latency_flag) 4853 printf("== test: latency\ndev:"); 4854 else 4855 printf("== test: validation\ndev:"); 4856 printf("%s, burst size: %u, num ops: %u, op type: %s\n", 4857 info.dev_name, burst_sz, num_to_process, op_type_str); 4858 4859 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 4860 iter = latency_test_dec(op_params->mp, bufs, 4861 op_params->ref_dec_op, op_params->vector_mask, 4862 ad->dev_id, queue_id, num_to_process, 4863 burst_sz, &total_time, &min_time, &max_time); 4864 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 4865 iter = latency_test_ldpc_enc(op_params->mp, bufs, 4866 op_params->ref_enc_op, ad->dev_id, queue_id, 4867 num_to_process, burst_sz, &total_time, 4868 &min_time, &max_time); 4869 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 4870 iter = latency_test_ldpc_dec(op_params->mp, bufs, 4871 op_params->ref_dec_op, op_params->vector_mask, 4872 ad->dev_id, queue_id, num_to_process, 4873 burst_sz, &total_time, &min_time, &max_time, 4874 latency_flag); 4875 else if (op_type == RTE_BBDEV_OP_FFT) 4876 iter = latency_test_fft(op_params->mp, bufs, 4877 op_params->ref_fft_op, 4878 ad->dev_id, queue_id, 4879 num_to_process, burst_sz, &total_time, 4880 &min_time, &max_time); 4881 else /* RTE_BBDEV_OP_TURBO_ENC */ 4882 iter = latency_test_enc(op_params->mp, bufs, 4883 op_params->ref_enc_op, 4884 ad->dev_id, queue_id, 4885 num_to_process, burst_sz, &total_time, 4886 &min_time, &max_time); 4887 4888 if (iter <= 0) 4889 return TEST_FAILED; 4890 4891 printf("Operation latency:\n" 4892 "\tavg: %lg cycles, %lg us\n" 4893 "\tmin: %lg cycles, %lg us\n" 4894 "\tmax: %lg cycles, %lg us\n", 4895 (double)total_time / (double)iter, 4896 (double)(total_time * 1000000) / (double)iter / 4897 (double)rte_get_tsc_hz(), (double)min_time, 4898 (double)(min_time * 1000000) / (double)rte_get_tsc_hz(), 4899 (double)max_time, (double)(max_time * 1000000) / 4900 (double)rte_get_tsc_hz()); 4901 4902 return TEST_SUCCESS; 4903 } 4904 4905 static int 4906 latency_test(struct active_device *ad, struct test_op_params *op_params) 4907 { 4908 return validation_latency_test(ad, op_params, true); 4909 } 4910 4911 static int 4912 validation_test(struct active_device *ad, struct test_op_params *op_params) 4913 { 4914 return validation_latency_test(ad, op_params, false); 4915 } 4916 4917 static int 4918 get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id, 4919 struct rte_bbdev_stats *stats) 4920 { 4921 struct rte_bbdev *dev = &rte_bbdev_devices[dev_id]; 4922 struct rte_bbdev_stats *q_stats; 4923 4924 if (queue_id >= dev->data->num_queues) 4925 return -1; 4926 4927 q_stats = &dev->data->queues[queue_id].queue_stats; 4928 4929 stats->enqueued_count = q_stats->enqueued_count; 4930 stats->dequeued_count = q_stats->dequeued_count; 4931 stats->enqueue_err_count = q_stats->enqueue_err_count; 4932 stats->dequeue_err_count = q_stats->dequeue_err_count; 4933 stats->enqueue_warn_count = q_stats->enqueue_warn_count; 4934 stats->dequeue_warn_count = q_stats->dequeue_warn_count; 4935 stats->acc_offload_cycles = q_stats->acc_offload_cycles; 4936 4937 return 0; 4938 } 4939 4940 static int 4941 offload_latency_test_fft(struct rte_mempool *mempool, struct test_buffers *bufs, 4942 struct rte_bbdev_fft_op *ref_op, uint16_t dev_id, 4943 uint16_t queue_id, const uint16_t num_to_process, 4944 uint16_t burst_sz, struct test_time_stats *time_st) 4945 { 4946 int i, dequeued, ret; 4947 struct rte_bbdev_fft_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 4948 uint64_t enq_start_time, deq_start_time; 4949 uint64_t enq_sw_last_time, deq_last_time; 4950 struct rte_bbdev_stats stats; 4951 4952 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 4953 uint16_t enq = 0, deq = 0; 4954 4955 if (unlikely(num_to_process - dequeued < burst_sz)) 4956 burst_sz = num_to_process - dequeued; 4957 4958 ret = rte_bbdev_fft_op_alloc_bulk(mempool, ops_enq, burst_sz); 4959 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 4960 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 4961 copy_reference_fft_op(ops_enq, burst_sz, dequeued, 4962 bufs->inputs, 4963 bufs->hard_outputs, bufs->soft_outputs, 4964 ref_op); 4965 4966 /* Start time meas for enqueue function offload latency */ 4967 enq_start_time = rte_rdtsc_precise(); 4968 do { 4969 enq += rte_bbdev_enqueue_fft_ops(dev_id, queue_id, 4970 &ops_enq[enq], burst_sz - enq); 4971 } while (unlikely(burst_sz != enq)); 4972 4973 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 4974 TEST_ASSERT_SUCCESS(ret, 4975 "Failed to get stats for queue (%u) of device (%u)", 4976 queue_id, dev_id); 4977 4978 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 4979 stats.acc_offload_cycles; 4980 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 4981 enq_sw_last_time); 4982 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 4983 enq_sw_last_time); 4984 time_st->enq_sw_total_time += enq_sw_last_time; 4985 4986 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 4987 stats.acc_offload_cycles); 4988 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 4989 stats.acc_offload_cycles); 4990 time_st->enq_acc_total_time += stats.acc_offload_cycles; 4991 4992 /* give time for device to process ops */ 4993 rte_delay_us(WAIT_OFFLOAD_US); 4994 4995 /* Start time meas for dequeue function offload latency */ 4996 deq_start_time = rte_rdtsc_precise(); 4997 /* Dequeue one operation */ 4998 do { 4999 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 5000 &ops_deq[deq], enq); 5001 } while (unlikely(deq == 0)); 5002 5003 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5004 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5005 deq_last_time); 5006 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5007 deq_last_time); 5008 time_st->deq_total_time += deq_last_time; 5009 5010 /* Dequeue remaining operations if needed*/ 5011 while (burst_sz != deq) 5012 deq += rte_bbdev_dequeue_fft_ops(dev_id, queue_id, 5013 &ops_deq[deq], burst_sz - deq); 5014 5015 rte_bbdev_fft_op_free_bulk(ops_enq, deq); 5016 dequeued += deq; 5017 } 5018 5019 return i; 5020 } 5021 5022 static int 5023 offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs, 5024 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 5025 uint16_t queue_id, const uint16_t num_to_process, 5026 uint16_t burst_sz, struct test_time_stats *time_st) 5027 { 5028 int i, dequeued, ret; 5029 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5030 uint64_t enq_start_time, deq_start_time; 5031 uint64_t enq_sw_last_time, deq_last_time; 5032 struct rte_bbdev_stats stats; 5033 5034 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5035 uint16_t enq = 0, deq = 0; 5036 5037 if (unlikely(num_to_process - dequeued < burst_sz)) 5038 burst_sz = num_to_process - dequeued; 5039 5040 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 5041 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 5042 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5043 copy_reference_dec_op(ops_enq, burst_sz, dequeued, 5044 bufs->inputs, 5045 bufs->hard_outputs, 5046 bufs->soft_outputs, 5047 ref_op); 5048 5049 /* Start time meas for enqueue function offload latency */ 5050 enq_start_time = rte_rdtsc_precise(); 5051 do { 5052 enq += rte_bbdev_enqueue_dec_ops(dev_id, queue_id, 5053 &ops_enq[enq], burst_sz - enq); 5054 } while (unlikely(burst_sz != enq)); 5055 5056 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5057 TEST_ASSERT_SUCCESS(ret, 5058 "Failed to get stats for queue (%u) of device (%u)", 5059 queue_id, dev_id); 5060 5061 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time - 5062 stats.acc_offload_cycles; 5063 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5064 enq_sw_last_time); 5065 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5066 enq_sw_last_time); 5067 time_st->enq_sw_total_time += enq_sw_last_time; 5068 5069 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5070 stats.acc_offload_cycles); 5071 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5072 stats.acc_offload_cycles); 5073 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5074 5075 /* give time for device to process ops */ 5076 rte_delay_us(WAIT_OFFLOAD_US); 5077 5078 /* Start time meas for dequeue function offload latency */ 5079 deq_start_time = rte_rdtsc_precise(); 5080 /* Dequeue one operation */ 5081 do { 5082 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 5083 &ops_deq[deq], enq); 5084 } while (unlikely(deq == 0)); 5085 5086 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5087 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5088 deq_last_time); 5089 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5090 deq_last_time); 5091 time_st->deq_total_time += deq_last_time; 5092 5093 /* Dequeue remaining operations if needed*/ 5094 while (burst_sz != deq) 5095 deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, 5096 &ops_deq[deq], burst_sz - deq); 5097 5098 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 5099 dequeued += deq; 5100 } 5101 5102 return i; 5103 } 5104 5105 static int 5106 offload_latency_test_ldpc_dec(struct rte_mempool *mempool, 5107 struct test_buffers *bufs, 5108 struct rte_bbdev_dec_op *ref_op, uint16_t dev_id, 5109 uint16_t queue_id, const uint16_t num_to_process, 5110 uint16_t burst_sz, struct test_time_stats *time_st) 5111 { 5112 int i, dequeued, ret; 5113 struct rte_bbdev_dec_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5114 uint64_t enq_start_time, deq_start_time; 5115 uint64_t enq_sw_last_time, deq_last_time; 5116 struct rte_bbdev_stats stats; 5117 bool extDdr = ldpc_cap_flags & 5118 RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; 5119 5120 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5121 uint16_t enq = 0, deq = 0; 5122 5123 if (unlikely(num_to_process - dequeued < burst_sz)) 5124 burst_sz = num_to_process - dequeued; 5125 5126 ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz); 5127 TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops", burst_sz); 5128 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5129 copy_reference_ldpc_dec_op(ops_enq, burst_sz, dequeued, 5130 bufs->inputs, 5131 bufs->hard_outputs, 5132 bufs->soft_outputs, 5133 bufs->harq_inputs, 5134 bufs->harq_outputs, 5135 ref_op); 5136 5137 if (extDdr) 5138 preload_harq_ddr(dev_id, queue_id, ops_enq, 5139 burst_sz, true); 5140 5141 /* Start time meas for enqueue function offload latency */ 5142 enq_start_time = rte_rdtsc_precise(); 5143 do { 5144 enq += rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, 5145 &ops_enq[enq], burst_sz - enq); 5146 } while (unlikely(burst_sz != enq)); 5147 5148 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5149 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5150 TEST_ASSERT_SUCCESS(ret, 5151 "Failed to get stats for queue (%u) of device (%u)", 5152 queue_id, dev_id); 5153 5154 enq_sw_last_time -= stats.acc_offload_cycles; 5155 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5156 enq_sw_last_time); 5157 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5158 enq_sw_last_time); 5159 time_st->enq_sw_total_time += enq_sw_last_time; 5160 5161 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5162 stats.acc_offload_cycles); 5163 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5164 stats.acc_offload_cycles); 5165 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5166 5167 /* give time for device to process ops */ 5168 rte_delay_us(WAIT_OFFLOAD_US); 5169 5170 /* Start time meas for dequeue function offload latency */ 5171 deq_start_time = rte_rdtsc_precise(); 5172 /* Dequeue one operation */ 5173 do { 5174 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 5175 &ops_deq[deq], enq); 5176 } while (unlikely(deq == 0)); 5177 5178 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5179 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5180 deq_last_time); 5181 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5182 deq_last_time); 5183 time_st->deq_total_time += deq_last_time; 5184 5185 /* Dequeue remaining operations if needed*/ 5186 while (burst_sz != deq) 5187 deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, 5188 &ops_deq[deq], burst_sz - deq); 5189 5190 if (extDdr) { 5191 /* Read loopback is not thread safe */ 5192 retrieve_harq_ddr(dev_id, queue_id, ops_enq, burst_sz); 5193 } 5194 5195 rte_bbdev_dec_op_free_bulk(ops_enq, deq); 5196 dequeued += deq; 5197 } 5198 5199 return i; 5200 } 5201 5202 static int 5203 offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs, 5204 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 5205 uint16_t queue_id, const uint16_t num_to_process, 5206 uint16_t burst_sz, struct test_time_stats *time_st) 5207 { 5208 int i, dequeued, ret; 5209 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5210 uint64_t enq_start_time, deq_start_time; 5211 uint64_t enq_sw_last_time, deq_last_time; 5212 struct rte_bbdev_stats stats; 5213 5214 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5215 uint16_t enq = 0, deq = 0; 5216 5217 if (unlikely(num_to_process - dequeued < burst_sz)) 5218 burst_sz = num_to_process - dequeued; 5219 5220 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 5221 TEST_ASSERT_SUCCESS(ret, 5222 "rte_bbdev_enc_op_alloc_bulk() failed"); 5223 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5224 copy_reference_enc_op(ops_enq, burst_sz, dequeued, 5225 bufs->inputs, 5226 bufs->hard_outputs, 5227 ref_op); 5228 5229 /* Start time meas for enqueue function offload latency */ 5230 enq_start_time = rte_rdtsc_precise(); 5231 do { 5232 enq += rte_bbdev_enqueue_enc_ops(dev_id, queue_id, 5233 &ops_enq[enq], burst_sz - enq); 5234 } while (unlikely(burst_sz != enq)); 5235 5236 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5237 5238 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5239 TEST_ASSERT_SUCCESS(ret, 5240 "Failed to get stats for queue (%u) of device (%u)", 5241 queue_id, dev_id); 5242 enq_sw_last_time -= stats.acc_offload_cycles; 5243 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5244 enq_sw_last_time); 5245 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5246 enq_sw_last_time); 5247 time_st->enq_sw_total_time += enq_sw_last_time; 5248 5249 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5250 stats.acc_offload_cycles); 5251 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5252 stats.acc_offload_cycles); 5253 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5254 5255 /* give time for device to process ops */ 5256 rte_delay_us(WAIT_OFFLOAD_US); 5257 5258 /* Start time meas for dequeue function offload latency */ 5259 deq_start_time = rte_rdtsc_precise(); 5260 /* Dequeue one operation */ 5261 do { 5262 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 5263 &ops_deq[deq], enq); 5264 } while (unlikely(deq == 0)); 5265 5266 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5267 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5268 deq_last_time); 5269 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5270 deq_last_time); 5271 time_st->deq_total_time += deq_last_time; 5272 5273 while (burst_sz != deq) 5274 deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, 5275 &ops_deq[deq], burst_sz - deq); 5276 5277 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 5278 dequeued += deq; 5279 } 5280 5281 return i; 5282 } 5283 5284 static int 5285 offload_latency_test_ldpc_enc(struct rte_mempool *mempool, 5286 struct test_buffers *bufs, 5287 struct rte_bbdev_enc_op *ref_op, uint16_t dev_id, 5288 uint16_t queue_id, const uint16_t num_to_process, 5289 uint16_t burst_sz, struct test_time_stats *time_st) 5290 { 5291 int i, dequeued, ret; 5292 struct rte_bbdev_enc_op *ops_enq[MAX_BURST], *ops_deq[MAX_BURST]; 5293 uint64_t enq_start_time, deq_start_time; 5294 uint64_t enq_sw_last_time, deq_last_time; 5295 struct rte_bbdev_stats stats; 5296 5297 for (i = 0, dequeued = 0; dequeued < num_to_process; ++i) { 5298 uint16_t enq = 0, deq = 0; 5299 5300 if (unlikely(num_to_process - dequeued < burst_sz)) 5301 burst_sz = num_to_process - dequeued; 5302 5303 ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz); 5304 TEST_ASSERT_SUCCESS(ret, 5305 "rte_bbdev_enc_op_alloc_bulk() failed"); 5306 if (test_vector.op_type != RTE_BBDEV_OP_NONE) 5307 copy_reference_ldpc_enc_op(ops_enq, burst_sz, dequeued, 5308 bufs->inputs, 5309 bufs->hard_outputs, 5310 ref_op); 5311 5312 /* Start time meas for enqueue function offload latency */ 5313 enq_start_time = rte_rdtsc_precise(); 5314 do { 5315 enq += rte_bbdev_enqueue_ldpc_enc_ops(dev_id, queue_id, 5316 &ops_enq[enq], burst_sz - enq); 5317 } while (unlikely(burst_sz != enq)); 5318 5319 enq_sw_last_time = rte_rdtsc_precise() - enq_start_time; 5320 ret = get_bbdev_queue_stats(dev_id, queue_id, &stats); 5321 TEST_ASSERT_SUCCESS(ret, 5322 "Failed to get stats for queue (%u) of device (%u)", 5323 queue_id, dev_id); 5324 5325 enq_sw_last_time -= stats.acc_offload_cycles; 5326 time_st->enq_sw_max_time = RTE_MAX(time_st->enq_sw_max_time, 5327 enq_sw_last_time); 5328 time_st->enq_sw_min_time = RTE_MIN(time_st->enq_sw_min_time, 5329 enq_sw_last_time); 5330 time_st->enq_sw_total_time += enq_sw_last_time; 5331 5332 time_st->enq_acc_max_time = RTE_MAX(time_st->enq_acc_max_time, 5333 stats.acc_offload_cycles); 5334 time_st->enq_acc_min_time = RTE_MIN(time_st->enq_acc_min_time, 5335 stats.acc_offload_cycles); 5336 time_st->enq_acc_total_time += stats.acc_offload_cycles; 5337 5338 /* give time for device to process ops */ 5339 rte_delay_us(WAIT_OFFLOAD_US); 5340 5341 /* Start time meas for dequeue function offload latency */ 5342 deq_start_time = rte_rdtsc_precise(); 5343 /* Dequeue one operation */ 5344 do { 5345 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 5346 &ops_deq[deq], enq); 5347 } while (unlikely(deq == 0)); 5348 5349 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5350 time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, 5351 deq_last_time); 5352 time_st->deq_min_time = RTE_MIN(time_st->deq_min_time, 5353 deq_last_time); 5354 time_st->deq_total_time += deq_last_time; 5355 5356 while (burst_sz != deq) 5357 deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, 5358 &ops_deq[deq], burst_sz - deq); 5359 5360 rte_bbdev_enc_op_free_bulk(ops_enq, deq); 5361 dequeued += deq; 5362 } 5363 5364 return i; 5365 } 5366 5367 static int 5368 offload_cost_test(struct active_device *ad, 5369 struct test_op_params *op_params) 5370 { 5371 int iter; 5372 uint16_t burst_sz = op_params->burst_sz; 5373 const uint16_t num_to_process = op_params->num_to_process; 5374 const enum rte_bbdev_op_type op_type = test_vector.op_type; 5375 const uint16_t queue_id = ad->queue_ids[0]; 5376 struct test_buffers *bufs = NULL; 5377 struct rte_bbdev_info info; 5378 const char *op_type_str; 5379 struct test_time_stats time_st; 5380 5381 memset(&time_st, 0, sizeof(struct test_time_stats)); 5382 time_st.enq_sw_min_time = UINT64_MAX; 5383 time_st.enq_acc_min_time = UINT64_MAX; 5384 time_st.deq_min_time = UINT64_MAX; 5385 5386 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 5387 "BURST_SIZE should be <= %u", MAX_BURST); 5388 5389 rte_bbdev_info_get(ad->dev_id, &info); 5390 bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; 5391 5392 op_type_str = rte_bbdev_op_type_str(op_type); 5393 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 5394 5395 printf("+ ------------------------------------------------------- +\n"); 5396 printf("== test: offload latency test\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 5397 info.dev_name, burst_sz, num_to_process, op_type_str); 5398 5399 if (op_type == RTE_BBDEV_OP_TURBO_DEC) 5400 iter = offload_latency_test_dec(op_params->mp, bufs, 5401 op_params->ref_dec_op, ad->dev_id, queue_id, 5402 num_to_process, burst_sz, &time_st); 5403 else if (op_type == RTE_BBDEV_OP_TURBO_ENC) 5404 iter = offload_latency_test_enc(op_params->mp, bufs, 5405 op_params->ref_enc_op, ad->dev_id, queue_id, 5406 num_to_process, burst_sz, &time_st); 5407 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) 5408 iter = offload_latency_test_ldpc_enc(op_params->mp, bufs, 5409 op_params->ref_enc_op, ad->dev_id, queue_id, 5410 num_to_process, burst_sz, &time_st); 5411 else if (op_type == RTE_BBDEV_OP_LDPC_DEC) 5412 iter = offload_latency_test_ldpc_dec(op_params->mp, bufs, 5413 op_params->ref_dec_op, ad->dev_id, queue_id, 5414 num_to_process, burst_sz, &time_st); 5415 else if (op_type == RTE_BBDEV_OP_FFT) 5416 iter = offload_latency_test_fft(op_params->mp, bufs, 5417 op_params->ref_fft_op, ad->dev_id, queue_id, 5418 num_to_process, burst_sz, &time_st); 5419 else 5420 iter = offload_latency_test_enc(op_params->mp, bufs, 5421 op_params->ref_enc_op, ad->dev_id, queue_id, 5422 num_to_process, burst_sz, &time_st); 5423 5424 if (iter <= 0) 5425 return TEST_FAILED; 5426 5427 printf("Enqueue driver offload cost latency:\n" 5428 "\tavg: %lg cycles, %lg us\n" 5429 "\tmin: %lg cycles, %lg us\n" 5430 "\tmax: %lg cycles, %lg us\n" 5431 "Enqueue accelerator offload cost latency:\n" 5432 "\tavg: %lg cycles, %lg us\n" 5433 "\tmin: %lg cycles, %lg us\n" 5434 "\tmax: %lg cycles, %lg us\n", 5435 (double)time_st.enq_sw_total_time / (double)iter, 5436 (double)(time_st.enq_sw_total_time * 1000000) / 5437 (double)iter / (double)rte_get_tsc_hz(), 5438 (double)time_st.enq_sw_min_time, 5439 (double)(time_st.enq_sw_min_time * 1000000) / 5440 rte_get_tsc_hz(), (double)time_st.enq_sw_max_time, 5441 (double)(time_st.enq_sw_max_time * 1000000) / 5442 rte_get_tsc_hz(), (double)time_st.enq_acc_total_time / 5443 (double)iter, 5444 (double)(time_st.enq_acc_total_time * 1000000) / 5445 (double)iter / (double)rte_get_tsc_hz(), 5446 (double)time_st.enq_acc_min_time, 5447 (double)(time_st.enq_acc_min_time * 1000000) / 5448 rte_get_tsc_hz(), (double)time_st.enq_acc_max_time, 5449 (double)(time_st.enq_acc_max_time * 1000000) / 5450 rte_get_tsc_hz()); 5451 5452 printf("Dequeue offload cost latency - one op:\n" 5453 "\tavg: %lg cycles, %lg us\n" 5454 "\tmin: %lg cycles, %lg us\n" 5455 "\tmax: %lg cycles, %lg us\n", 5456 (double)time_st.deq_total_time / (double)iter, 5457 (double)(time_st.deq_total_time * 1000000) / 5458 (double)iter / (double)rte_get_tsc_hz(), 5459 (double)time_st.deq_min_time, 5460 (double)(time_st.deq_min_time * 1000000) / 5461 rte_get_tsc_hz(), (double)time_st.deq_max_time, 5462 (double)(time_st.deq_max_time * 1000000) / 5463 rte_get_tsc_hz()); 5464 5465 struct rte_bbdev_stats stats = {0}; 5466 get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); 5467 if (op_type != RTE_BBDEV_OP_LDPC_DEC) { 5468 TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, 5469 "Mismatch in enqueue count %10"PRIu64" %d", 5470 stats.enqueued_count, num_to_process); 5471 TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, 5472 "Mismatch in dequeue count %10"PRIu64" %d", 5473 stats.dequeued_count, num_to_process); 5474 } 5475 TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, 5476 "Enqueue count Error %10"PRIu64"", 5477 stats.enqueue_err_count); 5478 TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, 5479 "Dequeue count Error (%10"PRIu64"", 5480 stats.dequeue_err_count); 5481 5482 return TEST_SUCCESS; 5483 } 5484 5485 static int 5486 offload_latency_empty_q_test_dec(uint16_t dev_id, uint16_t queue_id, 5487 const uint16_t num_to_process, uint16_t burst_sz, 5488 uint64_t *deq_total_time, uint64_t *deq_min_time, 5489 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 5490 { 5491 int i, deq_total; 5492 struct rte_bbdev_dec_op *ops[MAX_BURST]; 5493 uint64_t deq_start_time, deq_last_time; 5494 5495 /* Test deq offload latency from an empty queue */ 5496 5497 for (i = 0, deq_total = 0; deq_total < num_to_process; 5498 ++i, deq_total += burst_sz) { 5499 deq_start_time = rte_rdtsc_precise(); 5500 5501 if (unlikely(num_to_process - deq_total < burst_sz)) 5502 burst_sz = num_to_process - deq_total; 5503 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 5504 rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, ops, 5505 burst_sz); 5506 else 5507 rte_bbdev_dequeue_dec_ops(dev_id, queue_id, ops, 5508 burst_sz); 5509 5510 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5511 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 5512 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 5513 *deq_total_time += deq_last_time; 5514 } 5515 5516 return i; 5517 } 5518 5519 static int 5520 offload_latency_empty_q_test_enc(uint16_t dev_id, uint16_t queue_id, 5521 const uint16_t num_to_process, uint16_t burst_sz, 5522 uint64_t *deq_total_time, uint64_t *deq_min_time, 5523 uint64_t *deq_max_time, const enum rte_bbdev_op_type op_type) 5524 { 5525 int i, deq_total; 5526 struct rte_bbdev_enc_op *ops[MAX_BURST]; 5527 uint64_t deq_start_time, deq_last_time; 5528 5529 /* Test deq offload latency from an empty queue */ 5530 for (i = 0, deq_total = 0; deq_total < num_to_process; 5531 ++i, deq_total += burst_sz) { 5532 deq_start_time = rte_rdtsc_precise(); 5533 5534 if (unlikely(num_to_process - deq_total < burst_sz)) 5535 burst_sz = num_to_process - deq_total; 5536 if (op_type == RTE_BBDEV_OP_LDPC_ENC) 5537 rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, ops, 5538 burst_sz); 5539 else 5540 rte_bbdev_dequeue_enc_ops(dev_id, queue_id, ops, 5541 burst_sz); 5542 5543 deq_last_time = rte_rdtsc_precise() - deq_start_time; 5544 *deq_max_time = RTE_MAX(*deq_max_time, deq_last_time); 5545 *deq_min_time = RTE_MIN(*deq_min_time, deq_last_time); 5546 *deq_total_time += deq_last_time; 5547 } 5548 5549 return i; 5550 } 5551 5552 static int 5553 offload_latency_empty_q_test(struct active_device *ad, 5554 struct test_op_params *op_params) 5555 { 5556 int iter; 5557 uint64_t deq_total_time, deq_min_time, deq_max_time; 5558 uint16_t burst_sz = op_params->burst_sz; 5559 const uint16_t num_to_process = op_params->num_to_process; 5560 const enum rte_bbdev_op_type op_type = test_vector.op_type; 5561 const uint16_t queue_id = ad->queue_ids[0]; 5562 struct rte_bbdev_info info; 5563 const char *op_type_str; 5564 5565 deq_total_time = deq_max_time = 0; 5566 deq_min_time = UINT64_MAX; 5567 5568 TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), 5569 "BURST_SIZE should be <= %u", MAX_BURST); 5570 5571 rte_bbdev_info_get(ad->dev_id, &info); 5572 5573 op_type_str = rte_bbdev_op_type_str(op_type); 5574 TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); 5575 5576 printf("+ ------------------------------------------------------- +\n"); 5577 printf("== test: offload latency empty dequeue\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", 5578 info.dev_name, burst_sz, num_to_process, op_type_str); 5579 5580 if (op_type == RTE_BBDEV_OP_TURBO_DEC || 5581 op_type == RTE_BBDEV_OP_LDPC_DEC) 5582 iter = offload_latency_empty_q_test_dec(ad->dev_id, queue_id, 5583 num_to_process, burst_sz, &deq_total_time, 5584 &deq_min_time, &deq_max_time, op_type); 5585 else 5586 iter = offload_latency_empty_q_test_enc(ad->dev_id, queue_id, 5587 num_to_process, burst_sz, &deq_total_time, 5588 &deq_min_time, &deq_max_time, op_type); 5589 5590 if (iter <= 0) 5591 return TEST_FAILED; 5592 5593 printf("Empty dequeue offload:\n" 5594 "\tavg: %lg cycles, %lg us\n" 5595 "\tmin: %lg cycles, %lg us\n" 5596 "\tmax: %lg cycles, %lg us\n", 5597 (double)deq_total_time / (double)iter, 5598 (double)(deq_total_time * 1000000) / (double)iter / 5599 (double)rte_get_tsc_hz(), (double)deq_min_time, 5600 (double)(deq_min_time * 1000000) / rte_get_tsc_hz(), 5601 (double)deq_max_time, (double)(deq_max_time * 1000000) / 5602 rte_get_tsc_hz()); 5603 5604 return TEST_SUCCESS; 5605 } 5606 5607 static int 5608 bler_tc(void) 5609 { 5610 return run_test_case(bler_test); 5611 } 5612 5613 static int 5614 throughput_tc(void) 5615 { 5616 return run_test_case(throughput_test); 5617 } 5618 5619 static int 5620 offload_cost_tc(void) 5621 { 5622 return run_test_case(offload_cost_test); 5623 } 5624 5625 static int 5626 offload_latency_empty_q_tc(void) 5627 { 5628 return run_test_case(offload_latency_empty_q_test); 5629 } 5630 5631 static int 5632 latency_tc(void) 5633 { 5634 return run_test_case(latency_test); 5635 } 5636 5637 static int 5638 validation_tc(void) 5639 { 5640 return run_test_case(validation_test); 5641 } 5642 5643 static int 5644 interrupt_tc(void) 5645 { 5646 return run_test_case(throughput_test); 5647 } 5648 5649 static struct unit_test_suite bbdev_bler_testsuite = { 5650 .suite_name = "BBdev BLER Tests", 5651 .setup = testsuite_setup, 5652 .teardown = testsuite_teardown, 5653 .unit_test_cases = { 5654 TEST_CASE_ST(ut_setup, ut_teardown, bler_tc), 5655 TEST_CASES_END() /**< NULL terminate unit test array */ 5656 } 5657 }; 5658 5659 static struct unit_test_suite bbdev_throughput_testsuite = { 5660 .suite_name = "BBdev Throughput Tests", 5661 .setup = testsuite_setup, 5662 .teardown = testsuite_teardown, 5663 .unit_test_cases = { 5664 TEST_CASE_ST(ut_setup, ut_teardown, throughput_tc), 5665 TEST_CASES_END() /**< NULL terminate unit test array */ 5666 } 5667 }; 5668 5669 static struct unit_test_suite bbdev_validation_testsuite = { 5670 .suite_name = "BBdev Validation Tests", 5671 .setup = testsuite_setup, 5672 .teardown = testsuite_teardown, 5673 .unit_test_cases = { 5674 TEST_CASE_ST(ut_setup, ut_teardown, validation_tc), 5675 TEST_CASES_END() /**< NULL terminate unit test array */ 5676 } 5677 }; 5678 5679 static struct unit_test_suite bbdev_latency_testsuite = { 5680 .suite_name = "BBdev Latency Tests", 5681 .setup = testsuite_setup, 5682 .teardown = testsuite_teardown, 5683 .unit_test_cases = { 5684 TEST_CASE_ST(ut_setup, ut_teardown, latency_tc), 5685 TEST_CASES_END() /**< NULL terminate unit test array */ 5686 } 5687 }; 5688 5689 static struct unit_test_suite bbdev_offload_cost_testsuite = { 5690 .suite_name = "BBdev Offload Cost Tests", 5691 .setup = testsuite_setup, 5692 .teardown = testsuite_teardown, 5693 .unit_test_cases = { 5694 TEST_CASE_ST(ut_setup, ut_teardown, offload_cost_tc), 5695 TEST_CASE_ST(ut_setup, ut_teardown, offload_latency_empty_q_tc), 5696 TEST_CASES_END() /**< NULL terminate unit test array */ 5697 } 5698 }; 5699 5700 static struct unit_test_suite bbdev_interrupt_testsuite = { 5701 .suite_name = "BBdev Interrupt Tests", 5702 .setup = interrupt_testsuite_setup, 5703 .teardown = testsuite_teardown, 5704 .unit_test_cases = { 5705 TEST_CASE_ST(ut_setup, ut_teardown, interrupt_tc), 5706 TEST_CASES_END() /**< NULL terminate unit test array */ 5707 } 5708 }; 5709 5710 REGISTER_TEST_COMMAND(bler, bbdev_bler_testsuite); 5711 REGISTER_TEST_COMMAND(throughput, bbdev_throughput_testsuite); 5712 REGISTER_TEST_COMMAND(validation, bbdev_validation_testsuite); 5713 REGISTER_TEST_COMMAND(latency, bbdev_latency_testsuite); 5714 REGISTER_TEST_COMMAND(offload, bbdev_offload_cost_testsuite); 5715 REGISTER_TEST_COMMAND(interrupt, bbdev_interrupt_testsuite); 5716