1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <string.h> 6 7 #include <rte_common.h> 8 #include <rte_bus_vdev.h> 9 #include <rte_malloc.h> 10 #include <rte_ring.h> 11 #include <rte_kvargs.h> 12 #include <rte_cycles.h> 13 #include <rte_errno.h> 14 15 #include <rte_bbdev.h> 16 #include <rte_bbdev_pmd.h> 17 18 #include <rte_hexdump.h> 19 #include <rte_log.h> 20 21 #ifdef RTE_BBDEV_SDK_AVX2 22 #include <ipp.h> 23 #include <ipps.h> 24 #include <phy_turbo.h> 25 #include <phy_crc.h> 26 #include <phy_rate_match.h> 27 #endif 28 #ifdef RTE_BBDEV_SDK_AVX512 29 #include <bit_reverse.h> 30 #include <phy_ldpc_encoder_5gnr.h> 31 #include <phy_ldpc_decoder_5gnr.h> 32 #include <phy_LDPC_ratematch_5gnr.h> 33 #include <phy_rate_dematching_5gnr.h> 34 #endif 35 36 #define DRIVER_NAME baseband_turbo_sw 37 38 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE); 39 40 /* Helper macro for logging */ 41 #define rte_bbdev_log(level, fmt, ...) \ 42 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ 43 ##__VA_ARGS__) 44 45 #define rte_bbdev_log_debug(fmt, ...) \ 46 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 47 ##__VA_ARGS__) 48 49 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 50 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 51 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 52 53 /* private data structure */ 54 struct bbdev_private { 55 unsigned int max_nb_queues; /**< Max number of queues */ 56 }; 57 58 /* Initialisation params structure that can be used by Turbo SW driver */ 59 struct turbo_sw_params { 60 int socket_id; /*< Turbo SW device socket */ 61 uint16_t queues_num; /*< Turbo SW device queues number */ 62 }; 63 64 /* Accecptable params for Turbo SW devices */ 65 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 66 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 67 68 static const char * const turbo_sw_valid_params[] = { 69 TURBO_SW_MAX_NB_QUEUES_ARG, 70 TURBO_SW_SOCKET_ID_ARG 71 }; 72 73 /* queue */ 74 struct turbo_sw_queue { 75 /* Ring for processed (encoded/decoded) operations which are ready to 76 * be dequeued. 77 */ 78 struct rte_ring *processed_pkts; 79 /* Stores input for turbo encoder (used when CRC attachment is 80 * performed 81 */ 82 uint8_t *enc_in; 83 /* Stores output from turbo encoder */ 84 uint8_t *enc_out; 85 /* Alpha gamma buf for bblib_turbo_decoder() function */ 86 int8_t *ag; 87 /* Temp buf for bblib_turbo_decoder() function */ 88 uint16_t *code_block; 89 /* Input buf for bblib_rate_dematching_lte() function */ 90 uint8_t *deint_input; 91 /* Output buf for bblib_rate_dematching_lte() function */ 92 uint8_t *deint_output; 93 /* Output buf for bblib_turbodec_adapter_lte() function */ 94 uint8_t *adapter_output; 95 /* Operation type of this queue */ 96 enum rte_bbdev_op_type type; 97 } __rte_cache_aligned; 98 99 100 #ifdef RTE_BBDEV_SDK_AVX2 101 static inline char * 102 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 103 { 104 if (unlikely(len > rte_pktmbuf_tailroom(m))) 105 return NULL; 106 107 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 108 m->data_len = (uint16_t)(m->data_len + len); 109 m_head->pkt_len = (m_head->pkt_len + len); 110 return tail; 111 } 112 113 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 114 static inline int32_t 115 compute_idx(uint16_t k) 116 { 117 int32_t result = 0; 118 119 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 120 return -1; 121 122 if (k > 2048) { 123 if ((k - 2048) % 64 != 0) 124 result = -1; 125 126 result = 124 + (k - 2048) / 64; 127 } else if (k <= 512) { 128 if ((k - 40) % 8 != 0) 129 result = -1; 130 131 result = (k - 40) / 8 + 1; 132 } else if (k <= 1024) { 133 if ((k - 512) % 16 != 0) 134 result = -1; 135 136 result = 60 + (k - 512) / 16; 137 } else { /* 1024 < k <= 2048 */ 138 if ((k - 1024) % 32 != 0) 139 result = -1; 140 141 result = 92 + (k - 1024) / 32; 142 } 143 144 return result; 145 } 146 #endif 147 148 /* Read flag value 0/1 from bitmap */ 149 static inline bool 150 check_bit(uint32_t bitmap, uint32_t bitmask) 151 { 152 return bitmap & bitmask; 153 } 154 155 /* Get device info */ 156 static void 157 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 158 { 159 struct bbdev_private *internals = dev->data->dev_private; 160 161 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 162 #ifdef RTE_BBDEV_SDK_AVX2 163 { 164 .type = RTE_BBDEV_OP_TURBO_DEC, 165 .cap.turbo_dec = { 166 .capability_flags = 167 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 168 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 169 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 170 RTE_BBDEV_TURBO_CRC_TYPE_24B | 171 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 172 RTE_BBDEV_TURBO_EARLY_TERMINATION, 173 .max_llr_modulus = 16, 174 .num_buffers_src = 175 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 176 .num_buffers_hard_out = 177 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 178 .num_buffers_soft_out = 0, 179 } 180 }, 181 { 182 .type = RTE_BBDEV_OP_TURBO_ENC, 183 .cap.turbo_enc = { 184 .capability_flags = 185 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 186 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 187 RTE_BBDEV_TURBO_RATE_MATCH | 188 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 189 .num_buffers_src = 190 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 191 .num_buffers_dst = 192 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 193 } 194 }, 195 #endif 196 #ifdef RTE_BBDEV_SDK_AVX512 197 { 198 .type = RTE_BBDEV_OP_LDPC_ENC, 199 .cap.ldpc_enc = { 200 .capability_flags = 201 RTE_BBDEV_LDPC_RATE_MATCH | 202 RTE_BBDEV_LDPC_CRC_16_ATTACH | 203 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 204 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 205 .num_buffers_src = 206 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 207 .num_buffers_dst = 208 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 209 } 210 }, 211 { 212 .type = RTE_BBDEV_OP_LDPC_DEC, 213 .cap.ldpc_dec = { 214 .capability_flags = 215 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 216 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 217 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 218 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 219 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 220 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 221 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 222 .llr_size = 8, 223 .llr_decimals = 4, 224 .num_buffers_src = 225 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 226 .num_buffers_hard_out = 227 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 228 .num_buffers_soft_out = 0, 229 } 230 }, 231 #endif 232 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 233 }; 234 235 static struct rte_bbdev_queue_conf default_queue_conf = { 236 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 237 }; 238 #ifdef RTE_BBDEV_SDK_AVX2 239 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 240 dev_info->cpu_flag_reqs = &cpu_flag; 241 #else 242 dev_info->cpu_flag_reqs = NULL; 243 #endif 244 default_queue_conf.socket = dev->data->socket_id; 245 246 dev_info->driver_name = RTE_STR(DRIVER_NAME); 247 dev_info->max_num_queues = internals->max_nb_queues; 248 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 249 dev_info->hardware_accelerated = false; 250 dev_info->max_dl_queue_priority = 0; 251 dev_info->max_ul_queue_priority = 0; 252 dev_info->default_queue_conf = default_queue_conf; 253 dev_info->capabilities = bbdev_capabilities; 254 dev_info->min_alignment = 64; 255 dev_info->harq_buffer_size = 0; 256 dev_info->data_endianness = RTE_LITTLE_ENDIAN; 257 258 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); 259 } 260 261 /* Release queue */ 262 static int 263 q_release(struct rte_bbdev *dev, uint16_t q_id) 264 { 265 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 266 267 if (q != NULL) { 268 rte_ring_free(q->processed_pkts); 269 rte_free(q->enc_out); 270 rte_free(q->enc_in); 271 rte_free(q->ag); 272 rte_free(q->code_block); 273 rte_free(q->deint_input); 274 rte_free(q->deint_output); 275 rte_free(q->adapter_output); 276 rte_free(q); 277 dev->data->queues[q_id].queue_private = NULL; 278 } 279 280 rte_bbdev_log_debug("released device queue %u:%u", 281 dev->data->dev_id, q_id); 282 return 0; 283 } 284 285 /* Setup a queue */ 286 static int 287 q_setup(struct rte_bbdev *dev, uint16_t q_id, 288 const struct rte_bbdev_queue_conf *queue_conf) 289 { 290 int ret; 291 struct turbo_sw_queue *q; 292 char name[RTE_RING_NAMESIZE]; 293 294 /* Allocate the queue data structure. */ 295 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 296 RTE_CACHE_LINE_SIZE, queue_conf->socket); 297 if (q == NULL) { 298 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 299 return -ENOMEM; 300 } 301 302 /* Allocate memory for encoder output. */ 303 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 304 dev->data->dev_id, q_id); 305 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 306 rte_bbdev_log(ERR, 307 "Creating queue name for device %u queue %u failed", 308 dev->data->dev_id, q_id); 309 ret = -ENAMETOOLONG; 310 goto free_q; 311 } 312 q->enc_out = rte_zmalloc_socket(name, 313 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 314 sizeof(*q->enc_out) * 3, 315 RTE_CACHE_LINE_SIZE, queue_conf->socket); 316 if (q->enc_out == NULL) { 317 rte_bbdev_log(ERR, 318 "Failed to allocate queue memory for %s", name); 319 ret = -ENOMEM; 320 goto free_q; 321 } 322 323 /* Allocate memory for rate matching output. */ 324 ret = snprintf(name, RTE_RING_NAMESIZE, 325 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 326 q_id); 327 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 328 rte_bbdev_log(ERR, 329 "Creating queue name for device %u queue %u failed", 330 dev->data->dev_id, q_id); 331 ret = -ENAMETOOLONG; 332 goto free_q; 333 } 334 q->enc_in = rte_zmalloc_socket(name, 335 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 336 RTE_CACHE_LINE_SIZE, queue_conf->socket); 337 if (q->enc_in == NULL) { 338 rte_bbdev_log(ERR, 339 "Failed to allocate queue memory for %s", name); 340 ret = -ENOMEM; 341 goto free_q; 342 } 343 344 /* Allocate memory for Alpha Gamma temp buffer. */ 345 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 346 dev->data->dev_id, q_id); 347 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 348 rte_bbdev_log(ERR, 349 "Creating queue name for device %u queue %u failed", 350 dev->data->dev_id, q_id); 351 ret = -ENAMETOOLONG; 352 goto free_q; 353 } 354 q->ag = rte_zmalloc_socket(name, 355 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 356 RTE_CACHE_LINE_SIZE, queue_conf->socket); 357 if (q->ag == NULL) { 358 rte_bbdev_log(ERR, 359 "Failed to allocate queue memory for %s", name); 360 ret = -ENOMEM; 361 goto free_q; 362 } 363 364 /* Allocate memory for code block temp buffer. */ 365 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 366 dev->data->dev_id, q_id); 367 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 368 rte_bbdev_log(ERR, 369 "Creating queue name for device %u queue %u failed", 370 dev->data->dev_id, q_id); 371 ret = -ENAMETOOLONG; 372 goto free_q; 373 } 374 q->code_block = rte_zmalloc_socket(name, 375 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 376 RTE_CACHE_LINE_SIZE, queue_conf->socket); 377 if (q->code_block == NULL) { 378 rte_bbdev_log(ERR, 379 "Failed to allocate queue memory for %s", name); 380 ret = -ENOMEM; 381 goto free_q; 382 } 383 384 /* Allocate memory for Deinterleaver input. */ 385 ret = snprintf(name, RTE_RING_NAMESIZE, 386 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 387 dev->data->dev_id, q_id); 388 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 389 rte_bbdev_log(ERR, 390 "Creating queue name for device %u queue %u failed", 391 dev->data->dev_id, q_id); 392 ret = -ENAMETOOLONG; 393 goto free_q; 394 } 395 q->deint_input = rte_zmalloc_socket(name, 396 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 397 RTE_CACHE_LINE_SIZE, queue_conf->socket); 398 if (q->deint_input == NULL) { 399 rte_bbdev_log(ERR, 400 "Failed to allocate queue memory for %s", name); 401 ret = -ENOMEM; 402 goto free_q; 403 } 404 405 /* Allocate memory for Deinterleaver output. */ 406 ret = snprintf(name, RTE_RING_NAMESIZE, 407 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 408 dev->data->dev_id, q_id); 409 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 410 rte_bbdev_log(ERR, 411 "Creating queue name for device %u queue %u failed", 412 dev->data->dev_id, q_id); 413 ret = -ENAMETOOLONG; 414 goto free_q; 415 } 416 q->deint_output = rte_zmalloc_socket(NULL, 417 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 418 RTE_CACHE_LINE_SIZE, queue_conf->socket); 419 if (q->deint_output == NULL) { 420 rte_bbdev_log(ERR, 421 "Failed to allocate queue memory for %s", name); 422 ret = -ENOMEM; 423 goto free_q; 424 } 425 426 /* Allocate memory for Adapter output. */ 427 ret = snprintf(name, RTE_RING_NAMESIZE, 428 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 429 dev->data->dev_id, q_id); 430 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 431 rte_bbdev_log(ERR, 432 "Creating queue name for device %u queue %u failed", 433 dev->data->dev_id, q_id); 434 ret = -ENAMETOOLONG; 435 goto free_q; 436 } 437 q->adapter_output = rte_zmalloc_socket(NULL, 438 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 439 RTE_CACHE_LINE_SIZE, queue_conf->socket); 440 if (q->adapter_output == NULL) { 441 rte_bbdev_log(ERR, 442 "Failed to allocate queue memory for %s", name); 443 ret = -ENOMEM; 444 goto free_q; 445 } 446 447 /* Create ring for packets awaiting to be dequeued. */ 448 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 449 dev->data->dev_id, q_id); 450 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 451 rte_bbdev_log(ERR, 452 "Creating queue name for device %u queue %u failed", 453 dev->data->dev_id, q_id); 454 ret = -ENAMETOOLONG; 455 goto free_q; 456 } 457 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 458 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 459 if (q->processed_pkts == NULL) { 460 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 461 ret = -rte_errno; 462 goto free_q; 463 } 464 465 q->type = queue_conf->op_type; 466 467 dev->data->queues[q_id].queue_private = q; 468 rte_bbdev_log_debug("setup device queue %s", name); 469 return 0; 470 471 free_q: 472 rte_ring_free(q->processed_pkts); 473 rte_free(q->enc_out); 474 rte_free(q->enc_in); 475 rte_free(q->ag); 476 rte_free(q->code_block); 477 rte_free(q->deint_input); 478 rte_free(q->deint_output); 479 rte_free(q->adapter_output); 480 rte_free(q); 481 return ret; 482 } 483 484 static const struct rte_bbdev_ops pmd_ops = { 485 .info_get = info_get, 486 .queue_setup = q_setup, 487 .queue_release = q_release 488 }; 489 490 #ifdef RTE_BBDEV_SDK_AVX2 491 #ifdef RTE_LIBRTE_BBDEV_DEBUG 492 /* Checks if the encoder input buffer is correct. 493 * Returns 0 if it's valid, -1 otherwise. 494 */ 495 static inline int 496 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 497 const uint16_t in_length) 498 { 499 if (k_idx < 0) { 500 rte_bbdev_log(ERR, "K Index is invalid"); 501 return -1; 502 } 503 504 if (in_length - (k >> 3) < 0) { 505 rte_bbdev_log(ERR, 506 "Mismatch between input length (%u bytes) and K (%u bits)", 507 in_length, k); 508 return -1; 509 } 510 511 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 512 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 513 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 514 return -1; 515 } 516 517 return 0; 518 } 519 520 /* Checks if the decoder input buffer is correct. 521 * Returns 0 if it's valid, -1 otherwise. 522 */ 523 static inline int 524 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 525 { 526 if (k_idx < 0) { 527 rte_bbdev_log(ERR, "K index is invalid"); 528 return -1; 529 } 530 531 if (in_length < kw) { 532 rte_bbdev_log(ERR, 533 "Mismatch between input length (%u) and kw (%u)", 534 in_length, kw); 535 return -1; 536 } 537 538 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 539 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 540 kw, RTE_BBDEV_TURBO_MAX_KW); 541 return -1; 542 } 543 544 return 0; 545 } 546 #endif 547 #endif 548 549 static inline void 550 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 551 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 552 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 553 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 554 uint16_t in_length, struct rte_bbdev_stats *q_stats) 555 { 556 #ifdef RTE_BBDEV_SDK_AVX2 557 #ifdef RTE_LIBRTE_BBDEV_DEBUG 558 int ret; 559 #else 560 RTE_SET_USED(in_length); 561 #endif 562 int16_t k_idx; 563 uint16_t m; 564 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 565 uint64_t first_3_bytes = 0; 566 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 567 struct bblib_crc_request crc_req; 568 struct bblib_crc_response crc_resp; 569 struct bblib_turbo_encoder_request turbo_req; 570 struct bblib_turbo_encoder_response turbo_resp; 571 struct bblib_rate_match_dl_request rm_req; 572 struct bblib_rate_match_dl_response rm_resp; 573 #ifdef RTE_BBDEV_OFFLOAD_COST 574 uint64_t start_time; 575 #else 576 RTE_SET_USED(q_stats); 577 #endif 578 579 k_idx = compute_idx(k); 580 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 581 582 /* CRC24A (for TB) */ 583 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 584 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) { 585 #ifdef RTE_LIBRTE_BBDEV_DEBUG 586 ret = is_enc_input_valid(k - 24, k_idx, in_length); 587 if (ret != 0) { 588 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 589 return; 590 } 591 #endif 592 593 crc_req.data = in; 594 crc_req.len = k - 24; 595 /* Check if there is a room for CRC bits if not use 596 * the temporary buffer. 597 */ 598 if (mbuf_append(m_in, m_in, 3) == NULL) { 599 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 600 in = q->enc_in; 601 } else { 602 /* Store 3 first bytes of next CB as they will be 603 * overwritten by CRC bytes. If it is the last CB then 604 * there is no point to store 3 next bytes and this 605 * if..else branch will be omitted. 606 */ 607 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 608 } 609 610 crc_resp.data = in; 611 #ifdef RTE_BBDEV_OFFLOAD_COST 612 start_time = rte_rdtsc_precise(); 613 #endif 614 /* CRC24A generation */ 615 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 616 #ifdef RTE_BBDEV_OFFLOAD_COST 617 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 618 #endif 619 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 620 /* CRC24B */ 621 #ifdef RTE_LIBRTE_BBDEV_DEBUG 622 ret = is_enc_input_valid(k - 24, k_idx, in_length); 623 if (ret != 0) { 624 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 625 return; 626 } 627 #endif 628 629 crc_req.data = in; 630 crc_req.len = k - 24; 631 /* Check if there is a room for CRC bits if this is the last 632 * CB in TB. If not use temporary buffer. 633 */ 634 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 635 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 636 in = q->enc_in; 637 } else if (c - r > 1) { 638 /* Store 3 first bytes of next CB as they will be 639 * overwritten by CRC bytes. If it is the last CB then 640 * there is no point to store 3 next bytes and this 641 * if..else branch will be omitted. 642 */ 643 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 644 } 645 646 crc_resp.data = in; 647 #ifdef RTE_BBDEV_OFFLOAD_COST 648 start_time = rte_rdtsc_precise(); 649 #endif 650 /* CRC24B generation */ 651 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 652 #ifdef RTE_BBDEV_OFFLOAD_COST 653 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 654 #endif 655 } 656 #ifdef RTE_LIBRTE_BBDEV_DEBUG 657 else { 658 ret = is_enc_input_valid(k, k_idx, in_length); 659 if (ret != 0) { 660 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 661 return; 662 } 663 } 664 #endif 665 666 /* Turbo encoder */ 667 668 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 669 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 670 * So dst_data's length should be 3*(k/8) + 3 bytes. 671 * In Rate-matching bypass case outputs pointers passed to encoder 672 * (out0, out1 and out2) can directly point to addresses of output from 673 * turbo_enc entity. 674 */ 675 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 676 out0 = q->enc_out; 677 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 678 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 679 } else { 680 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 681 (k >> 3) * 3 + 2); 682 if (out0 == NULL) { 683 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 684 rte_bbdev_log(ERR, 685 "Too little space in output mbuf"); 686 return; 687 } 688 enc->output.length += (k >> 3) * 3 + 2; 689 /* rte_bbdev_op_data.offset can be different than the 690 * offset of the appended bytes 691 */ 692 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 693 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 694 out_offset + (k >> 3) + 1); 695 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 696 out_offset + 2 * ((k >> 3) + 1)); 697 } 698 699 turbo_req.case_id = k_idx; 700 turbo_req.input_win = in; 701 turbo_req.length = k >> 3; 702 turbo_resp.output_win_0 = out0; 703 turbo_resp.output_win_1 = out1; 704 turbo_resp.output_win_2 = out2; 705 706 #ifdef RTE_BBDEV_OFFLOAD_COST 707 start_time = rte_rdtsc_precise(); 708 #endif 709 /* Turbo encoding */ 710 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 711 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 712 rte_bbdev_log(ERR, "Turbo Encoder failed"); 713 return; 714 } 715 #ifdef RTE_BBDEV_OFFLOAD_COST 716 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 717 #endif 718 719 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 720 if (first_3_bytes != 0) 721 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 722 723 /* Rate-matching */ 724 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 725 uint8_t mask_id; 726 /* Integer round up division by 8 */ 727 uint16_t out_len = (e + 7) >> 3; 728 /* The mask array is indexed using E%8. E is an even number so 729 * there are only 4 possible values. 730 */ 731 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 732 733 /* get output data starting address */ 734 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 735 if (rm_out == NULL) { 736 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 737 rte_bbdev_log(ERR, 738 "Too little space in output mbuf"); 739 return; 740 } 741 /* rte_bbdev_op_data.offset can be different than the offset 742 * of the appended bytes 743 */ 744 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 745 746 /* index of current code block */ 747 rm_req.r = r; 748 /* total number of code block */ 749 rm_req.C = c; 750 /* For DL - 1, UL - 0 */ 751 rm_req.direction = 1; 752 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 753 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 754 * known we can adjust those parameters 755 */ 756 rm_req.Nsoft = ncb * rm_req.C; 757 rm_req.KMIMO = 1; 758 rm_req.MDL_HARQ = 1; 759 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 760 * are used for E calculation. As E is already known we can 761 * adjust those parameters 762 */ 763 rm_req.NL = e; 764 rm_req.Qm = 1; 765 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 766 767 rm_req.rvidx = enc->rv_index; 768 rm_req.Kidx = k_idx - 1; 769 rm_req.nLen = k + 4; 770 rm_req.tin0 = out0; 771 rm_req.tin1 = out1; 772 rm_req.tin2 = out2; 773 rm_resp.output = rm_out; 774 rm_resp.OutputLen = out_len; 775 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 776 rm_req.bypass_rvidx = 1; 777 else 778 rm_req.bypass_rvidx = 0; 779 780 #ifdef RTE_BBDEV_OFFLOAD_COST 781 start_time = rte_rdtsc_precise(); 782 #endif 783 /* Rate-Matching */ 784 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 785 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 786 rte_bbdev_log(ERR, "Rate matching failed"); 787 return; 788 } 789 #ifdef RTE_BBDEV_OFFLOAD_COST 790 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 791 #endif 792 793 /* SW fills an entire last byte even if E%8 != 0. Clear the 794 * superfluous data bits for consistency with HW device. 795 */ 796 mask_id = (e & 7) >> 1; 797 rm_out[out_len - 1] &= mask_out[mask_id]; 798 enc->output.length += rm_resp.OutputLen; 799 } else { 800 /* Rate matching is bypassed */ 801 802 /* Completing last byte of out0 (where 4 tail bits are stored) 803 * by moving first 4 bits from out1 804 */ 805 tmp_out = (uint8_t *) --out1; 806 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 807 tmp_out++; 808 /* Shifting out1 data by 4 bits to the left */ 809 for (m = 0; m < k >> 3; ++m) { 810 uint8_t *first = tmp_out; 811 uint8_t second = *(tmp_out + 1); 812 *first = (*first << 4) | ((second & 0xF0) >> 4); 813 tmp_out++; 814 } 815 /* Shifting out2 data by 8 bits to the left */ 816 for (m = 0; m < (k >> 3) + 1; ++m) { 817 *tmp_out = *(tmp_out + 1); 818 tmp_out++; 819 } 820 *tmp_out = 0; 821 } 822 #else 823 RTE_SET_USED(q); 824 RTE_SET_USED(op); 825 RTE_SET_USED(r); 826 RTE_SET_USED(c); 827 RTE_SET_USED(k); 828 RTE_SET_USED(ncb); 829 RTE_SET_USED(e); 830 RTE_SET_USED(m_in); 831 RTE_SET_USED(m_out_head); 832 RTE_SET_USED(m_out); 833 RTE_SET_USED(in_offset); 834 RTE_SET_USED(out_offset); 835 RTE_SET_USED(in_length); 836 RTE_SET_USED(q_stats); 837 #endif 838 } 839 840 841 static inline void 842 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 843 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 844 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 845 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 846 { 847 #ifdef RTE_BBDEV_SDK_AVX512 848 RTE_SET_USED(seg_total_left); 849 uint8_t *in, *rm_out; 850 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 851 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 852 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 853 struct bblib_LDPC_ratematch_5gnr_request rm_req; 854 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 855 struct bblib_crc_request crc_req; 856 struct bblib_crc_response crc_resp; 857 uint16_t msgLen, puntBits, parity_offset, out_len; 858 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 859 uint16_t in_length_in_bits = K - enc->n_filler; 860 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 861 862 #ifdef RTE_BBDEV_OFFLOAD_COST 863 uint64_t start_time = rte_rdtsc_precise(); 864 #else 865 RTE_SET_USED(q_stats); 866 #endif 867 868 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 869 870 /* Masking the Filler bits explicitly */ 871 memset(q->enc_in + (in_length_in_bytes - 3), 0, 872 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 873 /* CRC Generation */ 874 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 875 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 876 crc_req.data = in; 877 crc_req.len = in_length_in_bits - 24; 878 crc_resp.data = q->enc_in; 879 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 880 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 881 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 882 crc_req.data = in; 883 crc_req.len = in_length_in_bits - 24; 884 crc_resp.data = q->enc_in; 885 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 886 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) { 887 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2); 888 crc_req.data = in; 889 crc_req.len = in_length_in_bits - 16; 890 crc_resp.data = q->enc_in; 891 bblib_lte_crc16_gen(&crc_req, &crc_resp); 892 } else 893 rte_memcpy(q->enc_in, in, in_length_in_bytes); 894 895 /* LDPC Encoding */ 896 ldpc_req.Zc = enc->z_c; 897 ldpc_req.baseGraph = enc->basegraph; 898 /* Number of rows set to maximum */ 899 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 900 ldpc_req.numberCodeblocks = 1; 901 ldpc_req.input[0] = (int8_t *) q->enc_in; 902 ldpc_resp.output[0] = (int8_t *) q->enc_out; 903 904 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 905 906 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 907 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 908 rte_bbdev_log(ERR, "LDPC Encoder failed"); 909 return; 910 } 911 912 /* 913 * Systematic + Parity : Recreating stream with filler bits, ideally 914 * the bit select could handle this in the RM SDK 915 */ 916 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 917 puntBits = 2 * ldpc_req.Zc; 918 parity_offset = msgLen - puntBits; 919 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 920 puntBits%8, q->adapter_output, 0, parity_offset); 921 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 922 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 923 924 out_len = (e + 7) >> 3; 925 /* get output data starting address */ 926 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 927 if (rm_out == NULL) { 928 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 929 rte_bbdev_log(ERR, 930 "Too little space in output mbuf"); 931 return; 932 } 933 /* 934 * rte_bbdev_op_data.offset can be different than the offset 935 * of the appended bytes 936 */ 937 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 938 939 /* Rate-Matching */ 940 rm_req.E = e; 941 rm_req.Ncb = enc->n_cb; 942 rm_req.Qm = enc->q_m; 943 rm_req.Zc = enc->z_c; 944 rm_req.baseGraph = enc->basegraph; 945 rm_req.input = q->adapter_output; 946 rm_req.nLen = enc->n_filler; 947 rm_req.nullIndex = parity_offset - enc->n_filler; 948 rm_req.rvidx = enc->rv_index; 949 rm_resp.output = q->deint_output; 950 951 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 952 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 953 rte_bbdev_log(ERR, "Rate matching failed"); 954 return; 955 } 956 957 /* RM SDK may provide non zero bits on last byte */ 958 if ((e % 8) != 0) 959 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 960 961 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 962 963 rte_memcpy(rm_out, q->deint_output, out_len); 964 enc->output.length += out_len; 965 966 #ifdef RTE_BBDEV_OFFLOAD_COST 967 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 968 #endif 969 #else 970 RTE_SET_USED(q); 971 RTE_SET_USED(op); 972 RTE_SET_USED(e); 973 RTE_SET_USED(m_in); 974 RTE_SET_USED(m_out_head); 975 RTE_SET_USED(m_out); 976 RTE_SET_USED(in_offset); 977 RTE_SET_USED(out_offset); 978 RTE_SET_USED(seg_total_left); 979 RTE_SET_USED(q_stats); 980 #endif 981 } 982 983 static inline void 984 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 985 struct rte_bbdev_stats *queue_stats) 986 { 987 uint8_t c, r, crc24_bits = 0; 988 uint16_t k, ncb; 989 uint32_t e; 990 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 991 uint16_t in_offset = enc->input.offset; 992 uint16_t out_offset = enc->output.offset; 993 struct rte_mbuf *m_in = enc->input.data; 994 struct rte_mbuf *m_out = enc->output.data; 995 struct rte_mbuf *m_out_head = enc->output.data; 996 uint32_t in_length, mbuf_total_left = enc->input.length; 997 uint16_t seg_total_left; 998 999 /* Clear op status */ 1000 op->status = 0; 1001 1002 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1003 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1004 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1005 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1006 return; 1007 } 1008 1009 if (m_in == NULL || m_out == NULL) { 1010 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1011 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1012 return; 1013 } 1014 1015 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1016 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1017 crc24_bits = 24; 1018 1019 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1020 c = enc->tb_params.c; 1021 r = enc->tb_params.r; 1022 } else {/* For Code Block mode */ 1023 c = 1; 1024 r = 0; 1025 } 1026 1027 while (mbuf_total_left > 0 && r < c) { 1028 1029 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1030 1031 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1032 k = (r < enc->tb_params.c_neg) ? 1033 enc->tb_params.k_neg : enc->tb_params.k_pos; 1034 ncb = (r < enc->tb_params.c_neg) ? 1035 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1036 e = (r < enc->tb_params.cab) ? 1037 enc->tb_params.ea : enc->tb_params.eb; 1038 } else { 1039 k = enc->cb_params.k; 1040 ncb = enc->cb_params.ncb; 1041 e = enc->cb_params.e; 1042 } 1043 1044 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1045 m_out, in_offset, out_offset, seg_total_left, 1046 queue_stats); 1047 /* Update total_left */ 1048 in_length = ((k - crc24_bits) >> 3); 1049 mbuf_total_left -= in_length; 1050 /* Update offsets for next CBs (if exist) */ 1051 in_offset += (k - crc24_bits) >> 3; 1052 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1053 out_offset += e >> 3; 1054 else 1055 out_offset += (k >> 3) * 3 + 2; 1056 1057 /* Update offsets */ 1058 if (seg_total_left == in_length) { 1059 /* Go to the next mbuf */ 1060 m_in = m_in->next; 1061 m_out = m_out->next; 1062 in_offset = 0; 1063 out_offset = 0; 1064 } 1065 r++; 1066 } 1067 1068 /* check if all input data was processed */ 1069 if (mbuf_total_left != 0) { 1070 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1071 rte_bbdev_log(ERR, 1072 "Mismatch between mbuf length and included CBs sizes"); 1073 } 1074 } 1075 1076 1077 static inline void 1078 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1079 struct rte_bbdev_stats *queue_stats) 1080 { 1081 uint8_t c, r, crc24_bits = 0; 1082 uint32_t e; 1083 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1084 uint16_t in_offset = enc->input.offset; 1085 uint16_t out_offset = enc->output.offset; 1086 struct rte_mbuf *m_in = enc->input.data; 1087 struct rte_mbuf *m_out = enc->output.data; 1088 struct rte_mbuf *m_out_head = enc->output.data; 1089 uint32_t in_length, mbuf_total_left = enc->input.length; 1090 1091 uint16_t seg_total_left; 1092 1093 /* Clear op status */ 1094 op->status = 0; 1095 1096 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1097 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1098 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1099 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1100 return; 1101 } 1102 1103 if (m_in == NULL || m_out == NULL) { 1104 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1105 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1106 return; 1107 } 1108 1109 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1110 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1111 crc24_bits = 24; 1112 1113 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1114 c = enc->tb_params.c; 1115 r = enc->tb_params.r; 1116 } else { /* For Code Block mode */ 1117 c = 1; 1118 r = 0; 1119 } 1120 1121 while (mbuf_total_left > 0 && r < c) { 1122 1123 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1124 1125 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1126 e = (r < enc->tb_params.cab) ? 1127 enc->tb_params.ea : enc->tb_params.eb; 1128 } else { 1129 e = enc->cb_params.e; 1130 } 1131 1132 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1133 m_out, in_offset, out_offset, seg_total_left, 1134 queue_stats); 1135 /* Update total_left */ 1136 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1137 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1138 mbuf_total_left -= in_length; 1139 /* Update offsets for next CBs (if exist) */ 1140 in_offset += in_length; 1141 out_offset += (e + 7) >> 3; 1142 1143 /* Update offsets */ 1144 if (seg_total_left == in_length) { 1145 /* Go to the next mbuf */ 1146 m_in = m_in->next; 1147 m_out = m_out->next; 1148 in_offset = 0; 1149 out_offset = 0; 1150 } 1151 r++; 1152 } 1153 1154 /* check if all input data was processed */ 1155 if (mbuf_total_left != 0) { 1156 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1157 rte_bbdev_log(ERR, 1158 "Mismatch between mbuf length and included CBs sizes %d", 1159 mbuf_total_left); 1160 } 1161 } 1162 1163 static inline uint16_t 1164 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1165 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1166 { 1167 uint16_t i; 1168 #ifdef RTE_BBDEV_OFFLOAD_COST 1169 queue_stats->acc_offload_cycles = 0; 1170 #endif 1171 1172 for (i = 0; i < nb_ops; ++i) 1173 enqueue_enc_one_op(q, ops[i], queue_stats); 1174 1175 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1176 NULL); 1177 } 1178 1179 static inline uint16_t 1180 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1181 struct rte_bbdev_enc_op **ops, 1182 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1183 { 1184 uint16_t i; 1185 #ifdef RTE_BBDEV_OFFLOAD_COST 1186 queue_stats->acc_offload_cycles = 0; 1187 #endif 1188 1189 for (i = 0; i < nb_ops; ++i) 1190 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1191 1192 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1193 NULL); 1194 } 1195 1196 #ifdef RTE_BBDEV_SDK_AVX2 1197 static inline void 1198 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1199 uint16_t ncb) 1200 { 1201 uint16_t d = k + 4; 1202 uint16_t kpi = ncb / 3; 1203 uint16_t nd = kpi - d; 1204 1205 rte_memcpy(&out[nd], in, d); 1206 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1207 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1208 } 1209 #endif 1210 1211 static inline void 1212 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1213 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1214 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1215 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1216 uint16_t crc24_overlap, uint16_t in_length, 1217 struct rte_bbdev_stats *q_stats) 1218 { 1219 #ifdef RTE_BBDEV_SDK_AVX2 1220 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1221 int ret; 1222 #else 1223 RTE_SET_USED(in_length); 1224 #endif 1225 int32_t k_idx; 1226 int32_t iter_cnt; 1227 uint8_t *in, *out, *adapter_input; 1228 int32_t ncb, ncb_without_null; 1229 struct bblib_turbo_adapter_ul_response adapter_resp; 1230 struct bblib_turbo_adapter_ul_request adapter_req; 1231 struct bblib_turbo_decoder_request turbo_req; 1232 struct bblib_turbo_decoder_response turbo_resp; 1233 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1234 #ifdef RTE_BBDEV_OFFLOAD_COST 1235 uint64_t start_time; 1236 #else 1237 RTE_SET_USED(q_stats); 1238 #endif 1239 1240 k_idx = compute_idx(k); 1241 1242 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1243 ret = is_dec_input_valid(k_idx, kw, in_length); 1244 if (ret != 0) { 1245 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1246 return; 1247 } 1248 #endif 1249 1250 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1251 ncb = kw; 1252 ncb_without_null = (k + 4) * 3; 1253 1254 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1255 struct bblib_deinterleave_ul_request deint_req; 1256 struct bblib_deinterleave_ul_response deint_resp; 1257 1258 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1259 deint_req.pharqbuffer = in; 1260 deint_req.ncb = ncb; 1261 deint_resp.pinteleavebuffer = q->deint_output; 1262 1263 #ifdef RTE_BBDEV_OFFLOAD_COST 1264 start_time = rte_rdtsc_precise(); 1265 #endif 1266 /* Sub-block De-Interleaving */ 1267 bblib_deinterleave_ul(&deint_req, &deint_resp); 1268 #ifdef RTE_BBDEV_OFFLOAD_COST 1269 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1270 #endif 1271 } else 1272 move_padding_bytes(in, q->deint_output, k, ncb); 1273 1274 adapter_input = q->deint_output; 1275 1276 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1277 adapter_req.isinverted = 1; 1278 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1279 adapter_req.isinverted = 0; 1280 else { 1281 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1282 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1283 return; 1284 } 1285 1286 adapter_req.ncb = ncb_without_null; 1287 adapter_req.pinteleavebuffer = adapter_input; 1288 adapter_resp.pharqout = q->adapter_output; 1289 1290 #ifdef RTE_BBDEV_OFFLOAD_COST 1291 start_time = rte_rdtsc_precise(); 1292 #endif 1293 /* Turbo decode adaptation */ 1294 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1295 #ifdef RTE_BBDEV_OFFLOAD_COST 1296 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1297 #endif 1298 1299 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1300 ((k - crc24_overlap) >> 3)); 1301 if (out == NULL) { 1302 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1303 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1304 return; 1305 } 1306 /* rte_bbdev_op_data.offset can be different than the offset of the 1307 * appended bytes 1308 */ 1309 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1310 if (check_crc_24b) 1311 turbo_req.c = c + 1; 1312 else 1313 turbo_req.c = c; 1314 turbo_req.input = (int8_t *)q->adapter_output; 1315 turbo_req.k = k; 1316 turbo_req.k_idx = k_idx; 1317 turbo_req.max_iter_num = dec->iter_max; 1318 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1319 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1320 turbo_resp.ag_buf = q->ag; 1321 turbo_resp.cb_buf = q->code_block; 1322 turbo_resp.output = out; 1323 1324 #ifdef RTE_BBDEV_OFFLOAD_COST 1325 start_time = rte_rdtsc_precise(); 1326 #endif 1327 /* Turbo decode */ 1328 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1329 #ifdef RTE_BBDEV_OFFLOAD_COST 1330 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1331 #endif 1332 dec->hard_output.length += (k >> 3); 1333 1334 if (iter_cnt > 0) { 1335 /* Temporary solution for returned iter_count from SDK */ 1336 iter_cnt = (iter_cnt - 1) >> 1; 1337 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1338 } else { 1339 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1340 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1341 return; 1342 } 1343 #else 1344 RTE_SET_USED(q); 1345 RTE_SET_USED(op); 1346 RTE_SET_USED(c); 1347 RTE_SET_USED(k); 1348 RTE_SET_USED(kw); 1349 RTE_SET_USED(m_in); 1350 RTE_SET_USED(m_out_head); 1351 RTE_SET_USED(m_out); 1352 RTE_SET_USED(in_offset); 1353 RTE_SET_USED(out_offset); 1354 RTE_SET_USED(check_crc_24b); 1355 RTE_SET_USED(crc24_overlap); 1356 RTE_SET_USED(in_length); 1357 RTE_SET_USED(q_stats); 1358 #endif 1359 } 1360 1361 static inline void 1362 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1363 uint8_t c, uint16_t out_length, uint32_t e, 1364 struct rte_mbuf *m_in, 1365 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1366 struct rte_mbuf *m_harq_in, 1367 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1368 uint16_t in_offset, uint16_t out_offset, 1369 uint16_t harq_in_offset, uint16_t harq_out_offset, 1370 bool check_crc_24b, 1371 uint16_t crc24_overlap, uint16_t in_length, 1372 struct rte_bbdev_stats *q_stats) 1373 { 1374 #ifdef RTE_BBDEV_SDK_AVX512 1375 RTE_SET_USED(in_length); 1376 RTE_SET_USED(c); 1377 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1378 struct bblib_rate_dematching_5gnr_request derm_req; 1379 struct bblib_rate_dematching_5gnr_response derm_resp; 1380 struct bblib_ldpc_decoder_5gnr_request dec_req; 1381 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1382 struct bblib_crc_request crc_req; 1383 struct bblib_crc_response crc_resp; 1384 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1385 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1386 int16_t deRmOutSize, numRows; 1387 1388 /* Compute some LDPC BG lengths */ 1389 outLenWithCrc = out_length + (crc24_overlap >> 3); 1390 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1391 K = sys_cols * dec->z_c; 1392 parity_offset = K - 2 * dec->z_c; 1393 1394 #ifdef RTE_BBDEV_OFFLOAD_COST 1395 uint64_t start_time = rte_rdtsc_precise(); 1396 #else 1397 RTE_SET_USED(q_stats); 1398 #endif 1399 1400 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1401 1402 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1403 /** 1404 * Single contiguous block from the first LLR of the 1405 * circular buffer. 1406 */ 1407 harq_in = NULL; 1408 if (m_harq_in != NULL) 1409 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1410 uint8_t *, harq_in_offset); 1411 if (harq_in == NULL) { 1412 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1413 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1414 return; 1415 } 1416 uint16_t harq_in_length = RTE_MIN( 1417 dec->harq_combined_input.length, 1418 (uint32_t) dec->n_cb); 1419 memset(q->ag + harq_in_length, 0, 1420 dec->n_cb - harq_in_length); 1421 rte_memcpy(q->ag, harq_in, harq_in_length); 1422 } 1423 1424 derm_req.p_in = (int8_t *) in; 1425 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1426 derm_req.base_graph = dec->basegraph; 1427 derm_req.zc = dec->z_c; 1428 derm_req.ncb = dec->n_cb; 1429 derm_req.e = e; 1430 derm_req.k0 = 0; /* Actual output from SDK */ 1431 derm_req.isretx = check_bit(dec->op_flags, 1432 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1433 derm_req.rvid = dec->rv_index; 1434 derm_req.modulation_order = dec->q_m; 1435 derm_req.start_null_index = parity_offset - dec->n_filler; 1436 derm_req.num_of_null = dec->n_filler; 1437 1438 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1439 1440 /* Compute RM out size and number of rows */ 1441 deRmOutSize = RTE_MIN( 1442 derm_req.k0 + derm_req.e - 1443 ((derm_req.k0 < derm_req.start_null_index) ? 1444 0 : dec->n_filler), 1445 dec->n_cb - dec->n_filler); 1446 if (m_harq_in != NULL) 1447 deRmOutSize = RTE_MAX(deRmOutSize, 1448 RTE_MIN(dec->n_cb - dec->n_filler, 1449 m_harq_in->data_len)); 1450 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1451 - sys_cols + 2; 1452 numRows = RTE_MAX(4, numRows); 1453 1454 /* get output data starting address */ 1455 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1456 if (out == NULL) { 1457 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1458 rte_bbdev_log(ERR, 1459 "Too little space in LDPC decoder output mbuf"); 1460 return; 1461 } 1462 1463 /* rte_bbdev_op_data.offset can be different than the offset 1464 * of the appended bytes 1465 */ 1466 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1467 adapter_input = q->enc_out; 1468 1469 dec_req.Zc = dec->z_c; 1470 dec_req.baseGraph = dec->basegraph; 1471 dec_req.nRows = numRows; 1472 dec_req.numChannelLlrs = deRmOutSize; 1473 dec_req.varNodes = derm_req.p_harq; 1474 dec_req.numFillerBits = dec->n_filler; 1475 dec_req.maxIterations = dec->iter_max; 1476 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1477 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1478 dec_resp.varNodes = (int16_t *) q->adapter_output; 1479 dec_resp.compactedMessageBytes = q->enc_out; 1480 1481 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1482 1483 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1484 dec->iter_count); 1485 if (!dec_resp.parityPassedAtTermination) 1486 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1487 1488 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1489 1490 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1491 check_bit(dec->op_flags, 1492 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1493 crc_req.data = adapter_input; 1494 crc_req.len = K - dec->n_filler - 24; 1495 crc_resp.check_passed = false; 1496 crc_resp.data = adapter_input; 1497 if (check_crc_24b) 1498 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1499 else 1500 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1501 if (!crc_resp.check_passed) 1502 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1503 } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 1504 crc_req.data = adapter_input; 1505 crc_req.len = K - dec->n_filler - 16; 1506 crc_resp.check_passed = false; 1507 crc_resp.data = adapter_input; 1508 bblib_lte_crc16_check(&crc_req, &crc_resp); 1509 if (!crc_resp.check_passed) 1510 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1511 } 1512 1513 #ifdef RTE_BBDEV_OFFLOAD_COST 1514 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1515 #endif 1516 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1517 harq_out = NULL; 1518 if (m_harq_out != NULL) { 1519 /* Initialize HARQ data length since we overwrite */ 1520 m_harq_out->data_len = 0; 1521 /* Check there is enough space 1522 * in the HARQ outbound buffer 1523 */ 1524 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1525 m_harq_out, deRmOutSize); 1526 } 1527 if (harq_out == NULL) { 1528 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1529 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1530 return; 1531 } 1532 /* get output data starting address and overwrite the data */ 1533 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1534 harq_out_offset); 1535 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1536 dec->harq_combined_output.length += deRmOutSize; 1537 } 1538 1539 rte_memcpy(out, adapter_input, out_length); 1540 dec->hard_output.length += out_length; 1541 #else 1542 RTE_SET_USED(q); 1543 RTE_SET_USED(op); 1544 RTE_SET_USED(c); 1545 RTE_SET_USED(out_length); 1546 RTE_SET_USED(e); 1547 RTE_SET_USED(m_in); 1548 RTE_SET_USED(m_out_head); 1549 RTE_SET_USED(m_out); 1550 RTE_SET_USED(m_harq_in); 1551 RTE_SET_USED(m_harq_out_head); 1552 RTE_SET_USED(m_harq_out); 1553 RTE_SET_USED(harq_in_offset); 1554 RTE_SET_USED(harq_out_offset); 1555 RTE_SET_USED(in_offset); 1556 RTE_SET_USED(out_offset); 1557 RTE_SET_USED(check_crc_24b); 1558 RTE_SET_USED(crc24_overlap); 1559 RTE_SET_USED(in_length); 1560 RTE_SET_USED(q_stats); 1561 #endif 1562 } 1563 1564 1565 static inline void 1566 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1567 struct rte_bbdev_stats *queue_stats) 1568 { 1569 uint8_t c, r = 0; 1570 uint16_t kw, k = 0; 1571 uint16_t crc24_overlap = 0; 1572 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1573 struct rte_mbuf *m_in = dec->input.data; 1574 struct rte_mbuf *m_out = dec->hard_output.data; 1575 struct rte_mbuf *m_out_head = dec->hard_output.data; 1576 uint16_t in_offset = dec->input.offset; 1577 uint16_t out_offset = dec->hard_output.offset; 1578 uint32_t mbuf_total_left = dec->input.length; 1579 uint16_t seg_total_left; 1580 1581 /* Clear op status */ 1582 op->status = 0; 1583 1584 if (m_in == NULL || m_out == NULL) { 1585 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1586 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1587 return; 1588 } 1589 1590 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1591 c = dec->tb_params.c; 1592 } else { /* For Code Block mode */ 1593 k = dec->cb_params.k; 1594 c = 1; 1595 } 1596 1597 if ((c > 1) && !check_bit(dec->op_flags, 1598 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1599 crc24_overlap = 24; 1600 1601 while (mbuf_total_left > 0) { 1602 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1603 k = (r < dec->tb_params.c_neg) ? 1604 dec->tb_params.k_neg : dec->tb_params.k_pos; 1605 1606 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1607 1608 /* Calculates circular buffer size (Kw). 1609 * According to 3gpp 36.212 section 5.1.4.2 1610 * Kw = 3 * Kpi, 1611 * where: 1612 * Kpi = nCol * nRow 1613 * where nCol is 32 and nRow can be calculated from: 1614 * D =< nCol * nRow 1615 * where D is the size of each output from turbo encoder block 1616 * (k + 4). 1617 */ 1618 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1619 1620 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1621 in_offset, out_offset, check_bit(dec->op_flags, 1622 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1623 seg_total_left, queue_stats); 1624 1625 /* To keep CRC24 attached to end of Code block, use 1626 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1627 * removed by default once verified. 1628 */ 1629 1630 mbuf_total_left -= kw; 1631 1632 /* Update offsets */ 1633 if (seg_total_left == kw) { 1634 /* Go to the next mbuf */ 1635 m_in = m_in->next; 1636 m_out = m_out->next; 1637 in_offset = 0; 1638 out_offset = 0; 1639 } else { 1640 /* Update offsets for next CBs (if exist) */ 1641 in_offset += kw; 1642 out_offset += ((k - crc24_overlap) >> 3); 1643 } 1644 r++; 1645 } 1646 } 1647 1648 static inline void 1649 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1650 struct rte_bbdev_stats *queue_stats) 1651 { 1652 uint8_t c, r = 0; 1653 uint32_t e; 1654 uint16_t out_length, crc24_overlap = 0; 1655 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1656 struct rte_mbuf *m_in = dec->input.data; 1657 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1658 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1659 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1660 struct rte_mbuf *m_out = dec->hard_output.data; 1661 struct rte_mbuf *m_out_head = dec->hard_output.data; 1662 uint16_t in_offset = dec->input.offset; 1663 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1664 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1665 uint16_t out_offset = dec->hard_output.offset; 1666 uint32_t mbuf_total_left = dec->input.length; 1667 uint16_t seg_total_left; 1668 1669 /* Clear op status */ 1670 op->status = 0; 1671 1672 if (m_in == NULL || m_out == NULL) { 1673 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1674 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1675 return; 1676 } 1677 1678 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1679 c = dec->tb_params.c; 1680 e = dec->tb_params.ea; 1681 } else { /* For Code Block mode */ 1682 c = 1; 1683 e = dec->cb_params.e; 1684 } 1685 1686 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1687 crc24_overlap = 24; 1688 1689 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1690 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1691 1692 while (mbuf_total_left > 0) { 1693 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1694 e = (r < dec->tb_params.cab) ? 1695 dec->tb_params.ea : dec->tb_params.eb; 1696 /* Special case handling when overusing mbuf */ 1697 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1698 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1699 else 1700 seg_total_left = e; 1701 1702 process_ldpc_dec_cb(q, op, c, out_length, e, 1703 m_in, m_out_head, m_out, 1704 m_harq_in, m_harq_out_head, m_harq_out, 1705 in_offset, out_offset, harq_in_offset, 1706 harq_out_offset, 1707 check_bit(dec->op_flags, 1708 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1709 crc24_overlap, 1710 seg_total_left, queue_stats); 1711 1712 /* To keep CRC24 attached to end of Code block, use 1713 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1714 * removed by default once verified. 1715 */ 1716 1717 mbuf_total_left -= e; 1718 1719 /* Update offsets */ 1720 if (seg_total_left == e) { 1721 /* Go to the next mbuf */ 1722 m_in = m_in->next; 1723 m_out = m_out->next; 1724 if (m_harq_in != NULL) 1725 m_harq_in = m_harq_in->next; 1726 if (m_harq_out != NULL) 1727 m_harq_out = m_harq_out->next; 1728 in_offset = 0; 1729 out_offset = 0; 1730 harq_in_offset = 0; 1731 harq_out_offset = 0; 1732 } else { 1733 /* Update offsets for next CBs (if exist) */ 1734 in_offset += e; 1735 out_offset += out_length; 1736 } 1737 r++; 1738 } 1739 } 1740 1741 static inline uint16_t 1742 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1743 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1744 { 1745 uint16_t i; 1746 #ifdef RTE_BBDEV_OFFLOAD_COST 1747 queue_stats->acc_offload_cycles = 0; 1748 #endif 1749 1750 for (i = 0; i < nb_ops; ++i) 1751 enqueue_dec_one_op(q, ops[i], queue_stats); 1752 1753 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1754 NULL); 1755 } 1756 1757 static inline uint16_t 1758 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1759 struct rte_bbdev_dec_op **ops, 1760 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1761 { 1762 uint16_t i; 1763 #ifdef RTE_BBDEV_OFFLOAD_COST 1764 queue_stats->acc_offload_cycles = 0; 1765 #endif 1766 1767 for (i = 0; i < nb_ops; ++i) 1768 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1769 1770 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1771 NULL); 1772 } 1773 1774 /* Enqueue burst */ 1775 static uint16_t 1776 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1777 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1778 { 1779 void *queue = q_data->queue_private; 1780 struct turbo_sw_queue *q = queue; 1781 uint16_t nb_enqueued = 0; 1782 1783 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1784 1785 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1786 q_data->queue_stats.enqueued_count += nb_enqueued; 1787 1788 return nb_enqueued; 1789 } 1790 1791 /* Enqueue burst */ 1792 static uint16_t 1793 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1794 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1795 { 1796 void *queue = q_data->queue_private; 1797 struct turbo_sw_queue *q = queue; 1798 uint16_t nb_enqueued = 0; 1799 1800 nb_enqueued = enqueue_ldpc_enc_all_ops( 1801 q, ops, nb_ops, &q_data->queue_stats); 1802 1803 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1804 q_data->queue_stats.enqueued_count += nb_enqueued; 1805 1806 return nb_enqueued; 1807 } 1808 1809 /* Enqueue burst */ 1810 static uint16_t 1811 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1812 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1813 { 1814 void *queue = q_data->queue_private; 1815 struct turbo_sw_queue *q = queue; 1816 uint16_t nb_enqueued = 0; 1817 1818 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1819 1820 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1821 q_data->queue_stats.enqueued_count += nb_enqueued; 1822 1823 return nb_enqueued; 1824 } 1825 1826 /* Enqueue burst */ 1827 static uint16_t 1828 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1829 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1830 { 1831 void *queue = q_data->queue_private; 1832 struct turbo_sw_queue *q = queue; 1833 uint16_t nb_enqueued = 0; 1834 1835 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1836 &q_data->queue_stats); 1837 1838 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1839 q_data->queue_stats.enqueued_count += nb_enqueued; 1840 1841 return nb_enqueued; 1842 } 1843 1844 /* Dequeue decode burst */ 1845 static uint16_t 1846 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1847 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1848 { 1849 struct turbo_sw_queue *q = q_data->queue_private; 1850 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1851 (void **)ops, nb_ops, NULL); 1852 q_data->queue_stats.dequeued_count += nb_dequeued; 1853 1854 return nb_dequeued; 1855 } 1856 1857 /* Dequeue encode burst */ 1858 static uint16_t 1859 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1860 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1861 { 1862 struct turbo_sw_queue *q = q_data->queue_private; 1863 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1864 (void **)ops, nb_ops, NULL); 1865 q_data->queue_stats.dequeued_count += nb_dequeued; 1866 1867 return nb_dequeued; 1868 } 1869 1870 /* Parse 16bit integer from string argument */ 1871 static inline int 1872 parse_u16_arg(const char *key, const char *value, void *extra_args) 1873 { 1874 uint16_t *u16 = extra_args; 1875 unsigned int long result; 1876 1877 if ((value == NULL) || (extra_args == NULL)) 1878 return -EINVAL; 1879 errno = 0; 1880 result = strtoul(value, NULL, 0); 1881 if ((result >= (1 << 16)) || (errno != 0)) { 1882 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1883 return -ERANGE; 1884 } 1885 *u16 = (uint16_t)result; 1886 return 0; 1887 } 1888 1889 /* Parse parameters used to create device */ 1890 static int 1891 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1892 { 1893 struct rte_kvargs *kvlist = NULL; 1894 int ret = 0; 1895 1896 if (params == NULL) 1897 return -EINVAL; 1898 if (input_args) { 1899 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1900 if (kvlist == NULL) 1901 return -EFAULT; 1902 1903 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1904 &parse_u16_arg, ¶ms->queues_num); 1905 if (ret < 0) 1906 goto exit; 1907 1908 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1909 &parse_u16_arg, ¶ms->socket_id); 1910 if (ret < 0) 1911 goto exit; 1912 1913 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1914 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1915 RTE_MAX_NUMA_NODES); 1916 goto exit; 1917 } 1918 } 1919 1920 exit: 1921 if (kvlist) 1922 rte_kvargs_free(kvlist); 1923 return ret; 1924 } 1925 1926 /* Create device */ 1927 static int 1928 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1929 struct turbo_sw_params *init_params) 1930 { 1931 struct rte_bbdev *bbdev; 1932 const char *name = rte_vdev_device_name(vdev); 1933 1934 bbdev = rte_bbdev_allocate(name); 1935 if (bbdev == NULL) 1936 return -ENODEV; 1937 1938 bbdev->data->dev_private = rte_zmalloc_socket(name, 1939 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1940 init_params->socket_id); 1941 if (bbdev->data->dev_private == NULL) { 1942 rte_bbdev_release(bbdev); 1943 return -ENOMEM; 1944 } 1945 1946 bbdev->dev_ops = &pmd_ops; 1947 bbdev->device = &vdev->device; 1948 bbdev->data->socket_id = init_params->socket_id; 1949 bbdev->intr_handle = NULL; 1950 1951 /* register rx/tx burst functions for data path */ 1952 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1953 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1954 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1955 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1956 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1957 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1958 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1959 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1960 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1961 init_params->queues_num; 1962 1963 return 0; 1964 } 1965 1966 /* Initialise device */ 1967 static int 1968 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1969 { 1970 struct turbo_sw_params init_params = { 1971 rte_socket_id(), 1972 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1973 }; 1974 const char *name; 1975 const char *input_args; 1976 1977 if (vdev == NULL) 1978 return -EINVAL; 1979 1980 name = rte_vdev_device_name(vdev); 1981 if (name == NULL) 1982 return -EINVAL; 1983 input_args = rte_vdev_device_args(vdev); 1984 parse_turbo_sw_params(&init_params, input_args); 1985 1986 rte_bbdev_log_debug( 1987 "Initialising %s on NUMA node %d with max queues: %d\n", 1988 name, init_params.socket_id, init_params.queues_num); 1989 1990 return turbo_sw_bbdev_create(vdev, &init_params); 1991 } 1992 1993 /* Uninitialise device */ 1994 static int 1995 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 1996 { 1997 struct rte_bbdev *bbdev; 1998 const char *name; 1999 2000 if (vdev == NULL) 2001 return -EINVAL; 2002 2003 name = rte_vdev_device_name(vdev); 2004 if (name == NULL) 2005 return -EINVAL; 2006 2007 bbdev = rte_bbdev_get_named_dev(name); 2008 if (bbdev == NULL) 2009 return -EINVAL; 2010 2011 rte_free(bbdev->data->dev_private); 2012 2013 return rte_bbdev_release(bbdev); 2014 } 2015 2016 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 2017 .probe = turbo_sw_bbdev_probe, 2018 .remove = turbo_sw_bbdev_remove 2019 }; 2020 2021 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 2022 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 2023 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 2024 TURBO_SW_SOCKET_ID_ARG"=<int>"); 2025 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 2026