1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 #include <string.h> 7 8 #include <rte_common.h> 9 #include <bus_vdev_driver.h> 10 #include <rte_malloc.h> 11 #include <rte_ring.h> 12 #include <rte_kvargs.h> 13 #include <rte_cycles.h> 14 #include <rte_errno.h> 15 16 #include <rte_bbdev.h> 17 #include <rte_bbdev_pmd.h> 18 19 #include <rte_hexdump.h> 20 #include <rte_log.h> 21 22 #ifdef RTE_BBDEV_SDK_AVX2 23 #include <ipp.h> 24 #include <ipps.h> 25 #include <phy_turbo.h> 26 #include <phy_crc.h> 27 #include <phy_rate_match.h> 28 #endif 29 #ifdef RTE_BBDEV_SDK_AVX512 30 #include <bit_reverse.h> 31 #include <phy_ldpc_encoder_5gnr.h> 32 #include <phy_ldpc_decoder_5gnr.h> 33 #include <phy_LDPC_ratematch_5gnr.h> 34 #include <phy_rate_dematching_5gnr.h> 35 #endif 36 37 #define DRIVER_NAME baseband_turbo_sw 38 39 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE); 40 41 /* Helper macro for logging */ 42 #define rte_bbdev_log(level, fmt, ...) \ 43 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ 44 ##__VA_ARGS__) 45 46 #define rte_bbdev_log_debug(fmt, ...) \ 47 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 48 ##__VA_ARGS__) 49 50 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 51 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 52 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 53 54 /* private data structure */ 55 struct bbdev_private { 56 unsigned int max_nb_queues; /**< Max number of queues */ 57 }; 58 59 /* Initialisation params structure that can be used by Turbo SW driver */ 60 struct turbo_sw_params { 61 int socket_id; /*< Turbo SW device socket */ 62 uint16_t queues_num; /*< Turbo SW device queues number */ 63 }; 64 65 /* Acceptable params for Turbo SW devices */ 66 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 67 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 68 69 static const char * const turbo_sw_valid_params[] = { 70 TURBO_SW_MAX_NB_QUEUES_ARG, 71 TURBO_SW_SOCKET_ID_ARG 72 }; 73 74 /* queue */ 75 struct turbo_sw_queue { 76 /* Ring for processed (encoded/decoded) operations which are ready to 77 * be dequeued. 78 */ 79 struct rte_ring *processed_pkts; 80 /* Stores input for turbo encoder (used when CRC attachment is 81 * performed 82 */ 83 uint8_t *enc_in; 84 /* Stores output from turbo encoder */ 85 uint8_t *enc_out; 86 /* Alpha gamma buf for bblib_turbo_decoder() function */ 87 int8_t *ag; 88 /* Temp buf for bblib_turbo_decoder() function */ 89 uint16_t *code_block; 90 /* Input buf for bblib_rate_dematching_lte() function */ 91 uint8_t *deint_input; 92 /* Output buf for bblib_rate_dematching_lte() function */ 93 uint8_t *deint_output; 94 /* Output buf for bblib_turbodec_adapter_lte() function */ 95 uint8_t *adapter_output; 96 /* Operation type of this queue */ 97 enum rte_bbdev_op_type type; 98 } __rte_cache_aligned; 99 100 101 #ifdef RTE_BBDEV_SDK_AVX2 102 static inline char * 103 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 104 { 105 if (unlikely(len > rte_pktmbuf_tailroom(m))) 106 return NULL; 107 108 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 109 m->data_len = (uint16_t)(m->data_len + len); 110 m_head->pkt_len = (m_head->pkt_len + len); 111 return tail; 112 } 113 114 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 115 static inline int32_t 116 compute_idx(uint16_t k) 117 { 118 int32_t result = 0; 119 120 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 121 return -1; 122 123 if (k > 2048) { 124 if ((k - 2048) % 64 != 0) 125 result = -1; 126 127 result = 124 + (k - 2048) / 64; 128 } else if (k <= 512) { 129 if ((k - 40) % 8 != 0) 130 result = -1; 131 132 result = (k - 40) / 8 + 1; 133 } else if (k <= 1024) { 134 if ((k - 512) % 16 != 0) 135 result = -1; 136 137 result = 60 + (k - 512) / 16; 138 } else { /* 1024 < k <= 2048 */ 139 if ((k - 1024) % 32 != 0) 140 result = -1; 141 142 result = 92 + (k - 1024) / 32; 143 } 144 145 return result; 146 } 147 #endif 148 149 /* Read flag value 0/1 from bitmap */ 150 static inline bool 151 check_bit(uint32_t bitmap, uint32_t bitmask) 152 { 153 return bitmap & bitmask; 154 } 155 156 /* Get device info */ 157 static void 158 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 159 { 160 struct bbdev_private *internals = dev->data->dev_private; 161 162 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 163 #ifdef RTE_BBDEV_SDK_AVX2 164 { 165 .type = RTE_BBDEV_OP_TURBO_DEC, 166 .cap.turbo_dec = { 167 .capability_flags = 168 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 169 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 170 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 171 RTE_BBDEV_TURBO_CRC_TYPE_24B | 172 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 173 RTE_BBDEV_TURBO_EARLY_TERMINATION, 174 .max_llr_modulus = 16, 175 .num_buffers_src = 176 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 177 .num_buffers_hard_out = 178 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 179 .num_buffers_soft_out = 0, 180 } 181 }, 182 { 183 .type = RTE_BBDEV_OP_TURBO_ENC, 184 .cap.turbo_enc = { 185 .capability_flags = 186 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 187 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 188 RTE_BBDEV_TURBO_RATE_MATCH | 189 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 190 .num_buffers_src = 191 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 192 .num_buffers_dst = 193 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 194 } 195 }, 196 #endif 197 #ifdef RTE_BBDEV_SDK_AVX512 198 { 199 .type = RTE_BBDEV_OP_LDPC_ENC, 200 .cap.ldpc_enc = { 201 .capability_flags = 202 RTE_BBDEV_LDPC_RATE_MATCH | 203 RTE_BBDEV_LDPC_CRC_16_ATTACH | 204 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 205 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 206 .num_buffers_src = 207 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 208 .num_buffers_dst = 209 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 210 } 211 }, 212 { 213 .type = RTE_BBDEV_OP_LDPC_DEC, 214 .cap.ldpc_dec = { 215 .capability_flags = 216 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 217 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 218 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 219 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 220 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 221 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 222 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 223 .llr_size = 8, 224 .llr_decimals = 4, 225 .num_buffers_src = 226 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 227 .num_buffers_hard_out = 228 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 229 .num_buffers_soft_out = 0, 230 } 231 }, 232 #endif 233 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 234 }; 235 236 static struct rte_bbdev_queue_conf default_queue_conf = { 237 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 238 }; 239 #ifdef RTE_BBDEV_SDK_AVX2 240 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 241 dev_info->cpu_flag_reqs = &cpu_flag; 242 #else 243 dev_info->cpu_flag_reqs = NULL; 244 #endif 245 default_queue_conf.socket = dev->data->socket_id; 246 247 dev_info->driver_name = RTE_STR(DRIVER_NAME); 248 dev_info->max_num_queues = internals->max_nb_queues; 249 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 250 dev_info->hardware_accelerated = false; 251 dev_info->max_dl_queue_priority = 0; 252 dev_info->max_ul_queue_priority = 0; 253 dev_info->default_queue_conf = default_queue_conf; 254 dev_info->capabilities = bbdev_capabilities; 255 dev_info->min_alignment = 64; 256 dev_info->harq_buffer_size = 0; 257 dev_info->data_endianness = RTE_LITTLE_ENDIAN; 258 259 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); 260 } 261 262 /* Release queue */ 263 static int 264 q_release(struct rte_bbdev *dev, uint16_t q_id) 265 { 266 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 267 268 if (q != NULL) { 269 rte_ring_free(q->processed_pkts); 270 rte_free(q->enc_out); 271 rte_free(q->enc_in); 272 rte_free(q->ag); 273 rte_free(q->code_block); 274 rte_free(q->deint_input); 275 rte_free(q->deint_output); 276 rte_free(q->adapter_output); 277 rte_free(q); 278 dev->data->queues[q_id].queue_private = NULL; 279 } 280 281 rte_bbdev_log_debug("released device queue %u:%u", 282 dev->data->dev_id, q_id); 283 return 0; 284 } 285 286 /* Setup a queue */ 287 static int 288 q_setup(struct rte_bbdev *dev, uint16_t q_id, 289 const struct rte_bbdev_queue_conf *queue_conf) 290 { 291 int ret; 292 struct turbo_sw_queue *q; 293 char name[RTE_RING_NAMESIZE]; 294 295 /* Allocate the queue data structure. */ 296 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 297 RTE_CACHE_LINE_SIZE, queue_conf->socket); 298 if (q == NULL) { 299 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 300 return -ENOMEM; 301 } 302 303 /* Allocate memory for encoder output. */ 304 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 305 dev->data->dev_id, q_id); 306 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 307 rte_bbdev_log(ERR, 308 "Creating queue name for device %u queue %u failed", 309 dev->data->dev_id, q_id); 310 ret = -ENAMETOOLONG; 311 goto free_q; 312 } 313 q->enc_out = rte_zmalloc_socket(name, 314 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 315 sizeof(*q->enc_out) * 3, 316 RTE_CACHE_LINE_SIZE, queue_conf->socket); 317 if (q->enc_out == NULL) { 318 rte_bbdev_log(ERR, 319 "Failed to allocate queue memory for %s", name); 320 ret = -ENOMEM; 321 goto free_q; 322 } 323 324 /* Allocate memory for rate matching output. */ 325 ret = snprintf(name, RTE_RING_NAMESIZE, 326 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 327 q_id); 328 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 329 rte_bbdev_log(ERR, 330 "Creating queue name for device %u queue %u failed", 331 dev->data->dev_id, q_id); 332 ret = -ENAMETOOLONG; 333 goto free_q; 334 } 335 q->enc_in = rte_zmalloc_socket(name, 336 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 337 RTE_CACHE_LINE_SIZE, queue_conf->socket); 338 if (q->enc_in == NULL) { 339 rte_bbdev_log(ERR, 340 "Failed to allocate queue memory for %s", name); 341 ret = -ENOMEM; 342 goto free_q; 343 } 344 345 /* Allocate memory for Alpha Gamma temp buffer. */ 346 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 347 dev->data->dev_id, q_id); 348 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 349 rte_bbdev_log(ERR, 350 "Creating queue name for device %u queue %u failed", 351 dev->data->dev_id, q_id); 352 ret = -ENAMETOOLONG; 353 goto free_q; 354 } 355 q->ag = rte_zmalloc_socket(name, 356 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 357 RTE_CACHE_LINE_SIZE, queue_conf->socket); 358 if (q->ag == NULL) { 359 rte_bbdev_log(ERR, 360 "Failed to allocate queue memory for %s", name); 361 ret = -ENOMEM; 362 goto free_q; 363 } 364 365 /* Allocate memory for code block temp buffer. */ 366 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 367 dev->data->dev_id, q_id); 368 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 369 rte_bbdev_log(ERR, 370 "Creating queue name for device %u queue %u failed", 371 dev->data->dev_id, q_id); 372 ret = -ENAMETOOLONG; 373 goto free_q; 374 } 375 q->code_block = rte_zmalloc_socket(name, 376 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 377 RTE_CACHE_LINE_SIZE, queue_conf->socket); 378 if (q->code_block == NULL) { 379 rte_bbdev_log(ERR, 380 "Failed to allocate queue memory for %s", name); 381 ret = -ENOMEM; 382 goto free_q; 383 } 384 385 /* Allocate memory for Deinterleaver input. */ 386 ret = snprintf(name, RTE_RING_NAMESIZE, 387 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 388 dev->data->dev_id, q_id); 389 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 390 rte_bbdev_log(ERR, 391 "Creating queue name for device %u queue %u failed", 392 dev->data->dev_id, q_id); 393 ret = -ENAMETOOLONG; 394 goto free_q; 395 } 396 q->deint_input = rte_zmalloc_socket(name, 397 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 398 RTE_CACHE_LINE_SIZE, queue_conf->socket); 399 if (q->deint_input == NULL) { 400 rte_bbdev_log(ERR, 401 "Failed to allocate queue memory for %s", name); 402 ret = -ENOMEM; 403 goto free_q; 404 } 405 406 /* Allocate memory for Deinterleaver output. */ 407 ret = snprintf(name, RTE_RING_NAMESIZE, 408 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 409 dev->data->dev_id, q_id); 410 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 411 rte_bbdev_log(ERR, 412 "Creating queue name for device %u queue %u failed", 413 dev->data->dev_id, q_id); 414 ret = -ENAMETOOLONG; 415 goto free_q; 416 } 417 q->deint_output = rte_zmalloc_socket(NULL, 418 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 419 RTE_CACHE_LINE_SIZE, queue_conf->socket); 420 if (q->deint_output == NULL) { 421 rte_bbdev_log(ERR, 422 "Failed to allocate queue memory for %s", name); 423 ret = -ENOMEM; 424 goto free_q; 425 } 426 427 /* Allocate memory for Adapter output. */ 428 ret = snprintf(name, RTE_RING_NAMESIZE, 429 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 430 dev->data->dev_id, q_id); 431 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 432 rte_bbdev_log(ERR, 433 "Creating queue name for device %u queue %u failed", 434 dev->data->dev_id, q_id); 435 ret = -ENAMETOOLONG; 436 goto free_q; 437 } 438 q->adapter_output = rte_zmalloc_socket(NULL, 439 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 440 RTE_CACHE_LINE_SIZE, queue_conf->socket); 441 if (q->adapter_output == NULL) { 442 rte_bbdev_log(ERR, 443 "Failed to allocate queue memory for %s", name); 444 ret = -ENOMEM; 445 goto free_q; 446 } 447 448 /* Create ring for packets awaiting to be dequeued. */ 449 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 450 dev->data->dev_id, q_id); 451 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 452 rte_bbdev_log(ERR, 453 "Creating queue name for device %u queue %u failed", 454 dev->data->dev_id, q_id); 455 ret = -ENAMETOOLONG; 456 goto free_q; 457 } 458 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 459 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 460 if (q->processed_pkts == NULL) { 461 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 462 ret = -rte_errno; 463 goto free_q; 464 } 465 466 q->type = queue_conf->op_type; 467 468 dev->data->queues[q_id].queue_private = q; 469 rte_bbdev_log_debug("setup device queue %s", name); 470 return 0; 471 472 free_q: 473 rte_ring_free(q->processed_pkts); 474 rte_free(q->enc_out); 475 rte_free(q->enc_in); 476 rte_free(q->ag); 477 rte_free(q->code_block); 478 rte_free(q->deint_input); 479 rte_free(q->deint_output); 480 rte_free(q->adapter_output); 481 rte_free(q); 482 return ret; 483 } 484 485 static const struct rte_bbdev_ops pmd_ops = { 486 .info_get = info_get, 487 .queue_setup = q_setup, 488 .queue_release = q_release 489 }; 490 491 #ifdef RTE_BBDEV_SDK_AVX2 492 #ifdef RTE_LIBRTE_BBDEV_DEBUG 493 /* Checks if the encoder input buffer is correct. 494 * Returns 0 if it's valid, -1 otherwise. 495 */ 496 static inline int 497 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 498 const uint16_t in_length) 499 { 500 if (k_idx < 0) { 501 rte_bbdev_log(ERR, "K Index is invalid"); 502 return -1; 503 } 504 505 if (in_length - (k >> 3) < 0) { 506 rte_bbdev_log(ERR, 507 "Mismatch between input length (%u bytes) and K (%u bits)", 508 in_length, k); 509 return -1; 510 } 511 512 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 513 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 514 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 515 return -1; 516 } 517 518 return 0; 519 } 520 521 /* Checks if the decoder input buffer is correct. 522 * Returns 0 if it's valid, -1 otherwise. 523 */ 524 static inline int 525 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 526 { 527 if (k_idx < 0) { 528 rte_bbdev_log(ERR, "K index is invalid"); 529 return -1; 530 } 531 532 if (in_length < kw) { 533 rte_bbdev_log(ERR, 534 "Mismatch between input length (%u) and kw (%u)", 535 in_length, kw); 536 return -1; 537 } 538 539 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 540 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 541 kw, RTE_BBDEV_TURBO_MAX_KW); 542 return -1; 543 } 544 545 return 0; 546 } 547 #endif 548 #endif 549 550 static inline void 551 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 552 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 553 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 554 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 555 uint16_t in_length, struct rte_bbdev_stats *q_stats) 556 { 557 #ifdef RTE_BBDEV_SDK_AVX2 558 #ifdef RTE_LIBRTE_BBDEV_DEBUG 559 int ret; 560 #else 561 RTE_SET_USED(in_length); 562 #endif 563 int16_t k_idx; 564 uint16_t m; 565 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 566 uint64_t first_3_bytes = 0; 567 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 568 struct bblib_crc_request crc_req; 569 struct bblib_crc_response crc_resp; 570 struct bblib_turbo_encoder_request turbo_req; 571 struct bblib_turbo_encoder_response turbo_resp; 572 struct bblib_rate_match_dl_request rm_req; 573 struct bblib_rate_match_dl_response rm_resp; 574 #ifdef RTE_BBDEV_OFFLOAD_COST 575 uint64_t start_time; 576 #else 577 RTE_SET_USED(q_stats); 578 #endif 579 580 k_idx = compute_idx(k); 581 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 582 583 /* CRC24A (for TB) */ 584 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 585 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) { 586 #ifdef RTE_LIBRTE_BBDEV_DEBUG 587 ret = is_enc_input_valid(k - 24, k_idx, in_length); 588 if (ret != 0) { 589 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 590 return; 591 } 592 #endif 593 594 crc_req.data = in; 595 crc_req.len = k - 24; 596 /* Check if there is a room for CRC bits if not use 597 * the temporary buffer. 598 */ 599 if (mbuf_append(m_in, m_in, 3) == NULL) { 600 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 601 in = q->enc_in; 602 } else { 603 /* Store 3 first bytes of next CB as they will be 604 * overwritten by CRC bytes. If it is the last CB then 605 * there is no point to store 3 next bytes and this 606 * if..else branch will be omitted. 607 */ 608 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 609 } 610 611 crc_resp.data = in; 612 #ifdef RTE_BBDEV_OFFLOAD_COST 613 start_time = rte_rdtsc_precise(); 614 #endif 615 /* CRC24A generation */ 616 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 617 #ifdef RTE_BBDEV_OFFLOAD_COST 618 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 619 #endif 620 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 621 /* CRC24B */ 622 #ifdef RTE_LIBRTE_BBDEV_DEBUG 623 ret = is_enc_input_valid(k - 24, k_idx, in_length); 624 if (ret != 0) { 625 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 626 return; 627 } 628 #endif 629 630 crc_req.data = in; 631 crc_req.len = k - 24; 632 /* Check if there is a room for CRC bits if this is the last 633 * CB in TB. If not use temporary buffer. 634 */ 635 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 636 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 637 in = q->enc_in; 638 } else if (c - r > 1) { 639 /* Store 3 first bytes of next CB as they will be 640 * overwritten by CRC bytes. If it is the last CB then 641 * there is no point to store 3 next bytes and this 642 * if..else branch will be omitted. 643 */ 644 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 645 } 646 647 crc_resp.data = in; 648 #ifdef RTE_BBDEV_OFFLOAD_COST 649 start_time = rte_rdtsc_precise(); 650 #endif 651 /* CRC24B generation */ 652 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 653 #ifdef RTE_BBDEV_OFFLOAD_COST 654 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 655 #endif 656 } 657 #ifdef RTE_LIBRTE_BBDEV_DEBUG 658 else { 659 ret = is_enc_input_valid(k, k_idx, in_length); 660 if (ret != 0) { 661 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 662 return; 663 } 664 } 665 #endif 666 667 /* Turbo encoder */ 668 669 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 670 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 671 * So dst_data's length should be 3*(k/8) + 3 bytes. 672 * In Rate-matching bypass case outputs pointers passed to encoder 673 * (out0, out1 and out2) can directly point to addresses of output from 674 * turbo_enc entity. 675 */ 676 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 677 out0 = q->enc_out; 678 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 679 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 680 } else { 681 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 682 (k >> 3) * 3 + 2); 683 if (out0 == NULL) { 684 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 685 rte_bbdev_log(ERR, 686 "Too little space in output mbuf"); 687 return; 688 } 689 enc->output.length += (k >> 3) * 3 + 2; 690 /* rte_bbdev_op_data.offset can be different than the 691 * offset of the appended bytes 692 */ 693 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 694 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 695 out_offset + (k >> 3) + 1); 696 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 697 out_offset + 2 * ((k >> 3) + 1)); 698 } 699 700 turbo_req.case_id = k_idx; 701 turbo_req.input_win = in; 702 turbo_req.length = k >> 3; 703 turbo_resp.output_win_0 = out0; 704 turbo_resp.output_win_1 = out1; 705 turbo_resp.output_win_2 = out2; 706 707 #ifdef RTE_BBDEV_OFFLOAD_COST 708 start_time = rte_rdtsc_precise(); 709 #endif 710 /* Turbo encoding */ 711 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 712 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 713 rte_bbdev_log(ERR, "Turbo Encoder failed"); 714 return; 715 } 716 #ifdef RTE_BBDEV_OFFLOAD_COST 717 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 718 #endif 719 720 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 721 if (first_3_bytes != 0) 722 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 723 724 /* Rate-matching */ 725 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 726 uint8_t mask_id; 727 /* Integer round up division by 8 */ 728 uint16_t out_len = (e + 7) >> 3; 729 /* The mask array is indexed using E%8. E is an even number so 730 * there are only 4 possible values. 731 */ 732 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 733 734 /* get output data starting address */ 735 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 736 if (rm_out == NULL) { 737 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 738 rte_bbdev_log(ERR, 739 "Too little space in output mbuf"); 740 return; 741 } 742 /* rte_bbdev_op_data.offset can be different than the offset 743 * of the appended bytes 744 */ 745 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 746 747 /* index of current code block */ 748 rm_req.r = r; 749 /* total number of code block */ 750 rm_req.C = c; 751 /* For DL - 1, UL - 0 */ 752 rm_req.direction = 1; 753 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 754 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 755 * known we can adjust those parameters 756 */ 757 rm_req.Nsoft = ncb * rm_req.C; 758 rm_req.KMIMO = 1; 759 rm_req.MDL_HARQ = 1; 760 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 761 * are used for E calculation. As E is already known we can 762 * adjust those parameters 763 */ 764 rm_req.NL = e; 765 rm_req.Qm = 1; 766 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 767 768 rm_req.rvidx = enc->rv_index; 769 rm_req.Kidx = k_idx - 1; 770 rm_req.nLen = k + 4; 771 rm_req.tin0 = out0; 772 rm_req.tin1 = out1; 773 rm_req.tin2 = out2; 774 rm_resp.output = rm_out; 775 rm_resp.OutputLen = out_len; 776 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 777 rm_req.bypass_rvidx = 1; 778 else 779 rm_req.bypass_rvidx = 0; 780 781 #ifdef RTE_BBDEV_OFFLOAD_COST 782 start_time = rte_rdtsc_precise(); 783 #endif 784 /* Rate-Matching */ 785 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 786 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 787 rte_bbdev_log(ERR, "Rate matching failed"); 788 return; 789 } 790 #ifdef RTE_BBDEV_OFFLOAD_COST 791 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 792 #endif 793 794 /* SW fills an entire last byte even if E%8 != 0. Clear the 795 * superfluous data bits for consistency with HW device. 796 */ 797 mask_id = (e & 7) >> 1; 798 rm_out[out_len - 1] &= mask_out[mask_id]; 799 enc->output.length += rm_resp.OutputLen; 800 } else { 801 /* Rate matching is bypassed */ 802 803 /* Completing last byte of out0 (where 4 tail bits are stored) 804 * by moving first 4 bits from out1 805 */ 806 tmp_out = (uint8_t *) --out1; 807 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 808 tmp_out++; 809 /* Shifting out1 data by 4 bits to the left */ 810 for (m = 0; m < k >> 3; ++m) { 811 uint8_t *first = tmp_out; 812 uint8_t second = *(tmp_out + 1); 813 *first = (*first << 4) | ((second & 0xF0) >> 4); 814 tmp_out++; 815 } 816 /* Shifting out2 data by 8 bits to the left */ 817 for (m = 0; m < (k >> 3) + 1; ++m) { 818 *tmp_out = *(tmp_out + 1); 819 tmp_out++; 820 } 821 *tmp_out = 0; 822 } 823 #else 824 RTE_SET_USED(q); 825 RTE_SET_USED(op); 826 RTE_SET_USED(r); 827 RTE_SET_USED(c); 828 RTE_SET_USED(k); 829 RTE_SET_USED(ncb); 830 RTE_SET_USED(e); 831 RTE_SET_USED(m_in); 832 RTE_SET_USED(m_out_head); 833 RTE_SET_USED(m_out); 834 RTE_SET_USED(in_offset); 835 RTE_SET_USED(out_offset); 836 RTE_SET_USED(in_length); 837 RTE_SET_USED(q_stats); 838 #endif 839 } 840 841 842 static inline void 843 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 844 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 845 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 846 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 847 { 848 #ifdef RTE_BBDEV_SDK_AVX512 849 RTE_SET_USED(seg_total_left); 850 uint8_t *in, *rm_out; 851 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 852 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 853 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 854 struct bblib_LDPC_ratematch_5gnr_request rm_req; 855 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 856 struct bblib_crc_request crc_req; 857 struct bblib_crc_response crc_resp; 858 uint16_t msgLen, puntBits, parity_offset, out_len; 859 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 860 uint16_t in_length_in_bits = K - enc->n_filler; 861 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 862 863 #ifdef RTE_BBDEV_OFFLOAD_COST 864 uint64_t start_time = rte_rdtsc_precise(); 865 #else 866 RTE_SET_USED(q_stats); 867 #endif 868 869 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 870 871 /* Masking the Filler bits explicitly */ 872 memset(q->enc_in + (in_length_in_bytes - 3), 0, 873 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 874 /* CRC Generation */ 875 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 876 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 877 crc_req.data = in; 878 crc_req.len = in_length_in_bits - 24; 879 crc_resp.data = q->enc_in; 880 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 881 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 882 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 883 crc_req.data = in; 884 crc_req.len = in_length_in_bits - 24; 885 crc_resp.data = q->enc_in; 886 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 887 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) { 888 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2); 889 crc_req.data = in; 890 crc_req.len = in_length_in_bits - 16; 891 crc_resp.data = q->enc_in; 892 bblib_lte_crc16_gen(&crc_req, &crc_resp); 893 } else 894 rte_memcpy(q->enc_in, in, in_length_in_bytes); 895 896 /* LDPC Encoding */ 897 ldpc_req.Zc = enc->z_c; 898 ldpc_req.baseGraph = enc->basegraph; 899 /* Number of rows set to maximum */ 900 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 901 ldpc_req.numberCodeblocks = 1; 902 ldpc_req.input[0] = (int8_t *) q->enc_in; 903 ldpc_resp.output[0] = (int8_t *) q->enc_out; 904 905 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 906 907 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 908 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 909 rte_bbdev_log(ERR, "LDPC Encoder failed"); 910 return; 911 } 912 913 /* 914 * Systematic + Parity : Recreating stream with filler bits, ideally 915 * the bit select could handle this in the RM SDK 916 */ 917 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 918 puntBits = 2 * ldpc_req.Zc; 919 parity_offset = msgLen - puntBits; 920 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 921 puntBits%8, q->adapter_output, 0, parity_offset); 922 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 923 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 924 925 out_len = (e + 7) >> 3; 926 /* get output data starting address */ 927 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 928 if (rm_out == NULL) { 929 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 930 rte_bbdev_log(ERR, 931 "Too little space in output mbuf"); 932 return; 933 } 934 /* 935 * rte_bbdev_op_data.offset can be different than the offset 936 * of the appended bytes 937 */ 938 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 939 940 /* Rate-Matching */ 941 rm_req.E = e; 942 rm_req.Ncb = enc->n_cb; 943 rm_req.Qm = enc->q_m; 944 rm_req.Zc = enc->z_c; 945 rm_req.baseGraph = enc->basegraph; 946 rm_req.input = q->adapter_output; 947 rm_req.nLen = enc->n_filler; 948 rm_req.nullIndex = parity_offset - enc->n_filler; 949 rm_req.rvidx = enc->rv_index; 950 rm_resp.output = q->deint_output; 951 952 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 953 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 954 rte_bbdev_log(ERR, "Rate matching failed"); 955 return; 956 } 957 958 /* RM SDK may provide non zero bits on last byte */ 959 if ((e % 8) != 0) 960 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 961 962 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 963 964 rte_memcpy(rm_out, q->deint_output, out_len); 965 enc->output.length += out_len; 966 967 #ifdef RTE_BBDEV_OFFLOAD_COST 968 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 969 #endif 970 #else 971 RTE_SET_USED(q); 972 RTE_SET_USED(op); 973 RTE_SET_USED(e); 974 RTE_SET_USED(m_in); 975 RTE_SET_USED(m_out_head); 976 RTE_SET_USED(m_out); 977 RTE_SET_USED(in_offset); 978 RTE_SET_USED(out_offset); 979 RTE_SET_USED(seg_total_left); 980 RTE_SET_USED(q_stats); 981 #endif 982 } 983 984 static inline void 985 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 986 struct rte_bbdev_stats *queue_stats) 987 { 988 uint8_t c, r, crc24_bits = 0; 989 uint16_t k, ncb; 990 uint32_t e; 991 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 992 uint16_t in_offset = enc->input.offset; 993 uint16_t out_offset = enc->output.offset; 994 struct rte_mbuf *m_in = enc->input.data; 995 struct rte_mbuf *m_out = enc->output.data; 996 struct rte_mbuf *m_out_head = enc->output.data; 997 uint32_t in_length, mbuf_total_left = enc->input.length; 998 uint16_t seg_total_left; 999 1000 /* Clear op status */ 1001 op->status = 0; 1002 1003 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1004 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1005 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1006 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1007 return; 1008 } 1009 1010 if (m_in == NULL || m_out == NULL) { 1011 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1012 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1013 return; 1014 } 1015 1016 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1017 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1018 crc24_bits = 24; 1019 1020 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1021 c = enc->tb_params.c; 1022 r = enc->tb_params.r; 1023 } else {/* For Code Block mode */ 1024 c = 1; 1025 r = 0; 1026 } 1027 1028 while (mbuf_total_left > 0 && r < c) { 1029 1030 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1031 1032 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1033 k = (r < enc->tb_params.c_neg) ? 1034 enc->tb_params.k_neg : enc->tb_params.k_pos; 1035 ncb = (r < enc->tb_params.c_neg) ? 1036 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1037 e = (r < enc->tb_params.cab) ? 1038 enc->tb_params.ea : enc->tb_params.eb; 1039 } else { 1040 k = enc->cb_params.k; 1041 ncb = enc->cb_params.ncb; 1042 e = enc->cb_params.e; 1043 } 1044 1045 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1046 m_out, in_offset, out_offset, seg_total_left, 1047 queue_stats); 1048 /* Update total_left */ 1049 in_length = ((k - crc24_bits) >> 3); 1050 mbuf_total_left -= in_length; 1051 /* Update offsets for next CBs (if exist) */ 1052 in_offset += (k - crc24_bits) >> 3; 1053 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1054 out_offset += e >> 3; 1055 else 1056 out_offset += (k >> 3) * 3 + 2; 1057 1058 /* Update offsets */ 1059 if (seg_total_left == in_length) { 1060 /* Go to the next mbuf */ 1061 m_in = m_in->next; 1062 m_out = m_out->next; 1063 in_offset = 0; 1064 out_offset = 0; 1065 } 1066 r++; 1067 } 1068 1069 /* check if all input data was processed */ 1070 if (mbuf_total_left != 0) { 1071 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1072 rte_bbdev_log(ERR, 1073 "Mismatch between mbuf length and included CBs sizes"); 1074 } 1075 } 1076 1077 1078 static inline void 1079 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1080 struct rte_bbdev_stats *queue_stats) 1081 { 1082 uint8_t c, r, crc24_bits = 0; 1083 uint32_t e; 1084 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1085 uint16_t in_offset = enc->input.offset; 1086 uint16_t out_offset = enc->output.offset; 1087 struct rte_mbuf *m_in = enc->input.data; 1088 struct rte_mbuf *m_out = enc->output.data; 1089 struct rte_mbuf *m_out_head = enc->output.data; 1090 uint32_t in_length, mbuf_total_left = enc->input.length; 1091 1092 uint16_t seg_total_left; 1093 1094 /* Clear op status */ 1095 op->status = 0; 1096 1097 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1098 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1099 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1100 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1101 return; 1102 } 1103 1104 if (m_in == NULL || m_out == NULL) { 1105 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1106 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1107 return; 1108 } 1109 1110 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1111 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1112 crc24_bits = 24; 1113 1114 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1115 c = enc->tb_params.c; 1116 r = enc->tb_params.r; 1117 } else { /* For Code Block mode */ 1118 c = 1; 1119 r = 0; 1120 } 1121 1122 while (mbuf_total_left > 0 && r < c) { 1123 1124 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1125 1126 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1127 e = (r < enc->tb_params.cab) ? 1128 enc->tb_params.ea : enc->tb_params.eb; 1129 } else { 1130 e = enc->cb_params.e; 1131 } 1132 1133 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1134 m_out, in_offset, out_offset, seg_total_left, 1135 queue_stats); 1136 /* Update total_left */ 1137 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1138 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1139 mbuf_total_left -= in_length; 1140 /* Update offsets for next CBs (if exist) */ 1141 in_offset += in_length; 1142 out_offset += (e + 7) >> 3; 1143 1144 /* Update offsets */ 1145 if (seg_total_left == in_length) { 1146 /* Go to the next mbuf */ 1147 m_in = m_in->next; 1148 m_out = m_out->next; 1149 in_offset = 0; 1150 out_offset = 0; 1151 } 1152 r++; 1153 } 1154 1155 /* check if all input data was processed */ 1156 if (mbuf_total_left != 0) { 1157 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1158 rte_bbdev_log(ERR, 1159 "Mismatch between mbuf length and included CBs sizes %d", 1160 mbuf_total_left); 1161 } 1162 } 1163 1164 static inline uint16_t 1165 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1166 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1167 { 1168 uint16_t i; 1169 #ifdef RTE_BBDEV_OFFLOAD_COST 1170 queue_stats->acc_offload_cycles = 0; 1171 #endif 1172 1173 for (i = 0; i < nb_ops; ++i) 1174 enqueue_enc_one_op(q, ops[i], queue_stats); 1175 1176 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1177 NULL); 1178 } 1179 1180 static inline uint16_t 1181 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1182 struct rte_bbdev_enc_op **ops, 1183 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1184 { 1185 uint16_t i; 1186 #ifdef RTE_BBDEV_OFFLOAD_COST 1187 queue_stats->acc_offload_cycles = 0; 1188 #endif 1189 1190 for (i = 0; i < nb_ops; ++i) 1191 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1192 1193 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1194 NULL); 1195 } 1196 1197 #ifdef RTE_BBDEV_SDK_AVX2 1198 static inline void 1199 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1200 uint16_t ncb) 1201 { 1202 uint16_t d = k + 4; 1203 uint16_t kpi = ncb / 3; 1204 uint16_t nd = kpi - d; 1205 1206 rte_memcpy(&out[nd], in, d); 1207 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1208 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1209 } 1210 #endif 1211 1212 static inline void 1213 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1214 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1215 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1216 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1217 uint16_t crc24_overlap, uint16_t in_length, 1218 struct rte_bbdev_stats *q_stats) 1219 { 1220 #ifdef RTE_BBDEV_SDK_AVX2 1221 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1222 int ret; 1223 #else 1224 RTE_SET_USED(in_length); 1225 #endif 1226 int32_t k_idx; 1227 int32_t iter_cnt; 1228 uint8_t *in, *out, *adapter_input; 1229 int32_t ncb, ncb_without_null; 1230 struct bblib_turbo_adapter_ul_response adapter_resp; 1231 struct bblib_turbo_adapter_ul_request adapter_req; 1232 struct bblib_turbo_decoder_request turbo_req; 1233 struct bblib_turbo_decoder_response turbo_resp; 1234 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1235 #ifdef RTE_BBDEV_OFFLOAD_COST 1236 uint64_t start_time; 1237 #else 1238 RTE_SET_USED(q_stats); 1239 #endif 1240 1241 k_idx = compute_idx(k); 1242 1243 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1244 ret = is_dec_input_valid(k_idx, kw, in_length); 1245 if (ret != 0) { 1246 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1247 return; 1248 } 1249 #endif 1250 1251 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1252 ncb = kw; 1253 ncb_without_null = (k + 4) * 3; 1254 1255 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1256 struct bblib_deinterleave_ul_request deint_req; 1257 struct bblib_deinterleave_ul_response deint_resp; 1258 1259 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1260 deint_req.pharqbuffer = in; 1261 deint_req.ncb = ncb; 1262 deint_resp.pinteleavebuffer = q->deint_output; 1263 1264 #ifdef RTE_BBDEV_OFFLOAD_COST 1265 start_time = rte_rdtsc_precise(); 1266 #endif 1267 /* Sub-block De-Interleaving */ 1268 bblib_deinterleave_ul(&deint_req, &deint_resp); 1269 #ifdef RTE_BBDEV_OFFLOAD_COST 1270 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1271 #endif 1272 } else 1273 move_padding_bytes(in, q->deint_output, k, ncb); 1274 1275 adapter_input = q->deint_output; 1276 1277 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1278 adapter_req.isinverted = 1; 1279 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1280 adapter_req.isinverted = 0; 1281 else { 1282 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1283 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1284 return; 1285 } 1286 1287 adapter_req.ncb = ncb_without_null; 1288 adapter_req.pinteleavebuffer = adapter_input; 1289 adapter_resp.pharqout = q->adapter_output; 1290 1291 #ifdef RTE_BBDEV_OFFLOAD_COST 1292 start_time = rte_rdtsc_precise(); 1293 #endif 1294 /* Turbo decode adaptation */ 1295 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1296 #ifdef RTE_BBDEV_OFFLOAD_COST 1297 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1298 #endif 1299 1300 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1301 ((k - crc24_overlap) >> 3)); 1302 if (out == NULL) { 1303 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1304 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1305 return; 1306 } 1307 /* rte_bbdev_op_data.offset can be different than the offset of the 1308 * appended bytes 1309 */ 1310 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1311 if (check_crc_24b) 1312 turbo_req.c = c + 1; 1313 else 1314 turbo_req.c = c; 1315 turbo_req.input = (int8_t *)q->adapter_output; 1316 turbo_req.k = k; 1317 turbo_req.k_idx = k_idx; 1318 turbo_req.max_iter_num = dec->iter_max; 1319 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1320 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1321 turbo_resp.ag_buf = q->ag; 1322 turbo_resp.cb_buf = q->code_block; 1323 turbo_resp.output = out; 1324 1325 #ifdef RTE_BBDEV_OFFLOAD_COST 1326 start_time = rte_rdtsc_precise(); 1327 #endif 1328 /* Turbo decode */ 1329 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1330 #ifdef RTE_BBDEV_OFFLOAD_COST 1331 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1332 #endif 1333 dec->hard_output.length += (k >> 3); 1334 1335 if (iter_cnt > 0) { 1336 /* Temporary solution for returned iter_count from SDK */ 1337 iter_cnt = (iter_cnt - 1) >> 1; 1338 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1339 } else { 1340 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1341 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1342 return; 1343 } 1344 #else 1345 RTE_SET_USED(q); 1346 RTE_SET_USED(op); 1347 RTE_SET_USED(c); 1348 RTE_SET_USED(k); 1349 RTE_SET_USED(kw); 1350 RTE_SET_USED(m_in); 1351 RTE_SET_USED(m_out_head); 1352 RTE_SET_USED(m_out); 1353 RTE_SET_USED(in_offset); 1354 RTE_SET_USED(out_offset); 1355 RTE_SET_USED(check_crc_24b); 1356 RTE_SET_USED(crc24_overlap); 1357 RTE_SET_USED(in_length); 1358 RTE_SET_USED(q_stats); 1359 #endif 1360 } 1361 1362 static inline void 1363 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1364 uint8_t c, uint16_t out_length, uint32_t e, 1365 struct rte_mbuf *m_in, 1366 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1367 struct rte_mbuf *m_harq_in, 1368 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1369 uint16_t in_offset, uint16_t out_offset, 1370 uint16_t harq_in_offset, uint16_t harq_out_offset, 1371 bool check_crc_24b, 1372 uint16_t crc24_overlap, uint16_t in_length, 1373 struct rte_bbdev_stats *q_stats) 1374 { 1375 #ifdef RTE_BBDEV_SDK_AVX512 1376 RTE_SET_USED(in_length); 1377 RTE_SET_USED(c); 1378 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1379 struct bblib_rate_dematching_5gnr_request derm_req; 1380 struct bblib_rate_dematching_5gnr_response derm_resp; 1381 struct bblib_ldpc_decoder_5gnr_request dec_req; 1382 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1383 struct bblib_crc_request crc_req; 1384 struct bblib_crc_response crc_resp; 1385 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1386 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1387 int16_t deRmOutSize, numRows; 1388 1389 /* Compute some LDPC BG lengths */ 1390 outLenWithCrc = out_length + (crc24_overlap >> 3); 1391 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1392 K = sys_cols * dec->z_c; 1393 parity_offset = K - 2 * dec->z_c; 1394 1395 #ifdef RTE_BBDEV_OFFLOAD_COST 1396 uint64_t start_time = rte_rdtsc_precise(); 1397 #else 1398 RTE_SET_USED(q_stats); 1399 #endif 1400 1401 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1402 1403 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1404 /** 1405 * Single contiguous block from the first LLR of the 1406 * circular buffer. 1407 */ 1408 harq_in = NULL; 1409 if (m_harq_in != NULL) 1410 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1411 uint8_t *, harq_in_offset); 1412 if (harq_in == NULL) { 1413 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1414 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1415 return; 1416 } 1417 uint16_t harq_in_length = RTE_MIN( 1418 dec->harq_combined_input.length, 1419 (uint32_t) dec->n_cb); 1420 memset(q->ag + harq_in_length, 0, 1421 dec->n_cb - harq_in_length); 1422 rte_memcpy(q->ag, harq_in, harq_in_length); 1423 } 1424 1425 derm_req.p_in = (int8_t *) in; 1426 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1427 derm_req.base_graph = dec->basegraph; 1428 derm_req.zc = dec->z_c; 1429 derm_req.ncb = dec->n_cb; 1430 derm_req.e = e; 1431 derm_req.k0 = 0; /* Actual output from SDK */ 1432 derm_req.isretx = check_bit(dec->op_flags, 1433 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1434 derm_req.rvid = dec->rv_index; 1435 derm_req.modulation_order = dec->q_m; 1436 derm_req.start_null_index = parity_offset - dec->n_filler; 1437 derm_req.num_of_null = dec->n_filler; 1438 1439 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1440 1441 /* Compute RM out size and number of rows */ 1442 deRmOutSize = RTE_MIN( 1443 derm_req.k0 + derm_req.e - 1444 ((derm_req.k0 < derm_req.start_null_index) ? 1445 0 : dec->n_filler), 1446 dec->n_cb - dec->n_filler); 1447 if (m_harq_in != NULL) 1448 deRmOutSize = RTE_MAX(deRmOutSize, 1449 RTE_MIN(dec->n_cb - dec->n_filler, 1450 m_harq_in->data_len)); 1451 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1452 - sys_cols + 2; 1453 numRows = RTE_MAX(4, numRows); 1454 1455 /* get output data starting address */ 1456 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1457 if (out == NULL) { 1458 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1459 rte_bbdev_log(ERR, 1460 "Too little space in LDPC decoder output mbuf"); 1461 return; 1462 } 1463 1464 /* rte_bbdev_op_data.offset can be different than the offset 1465 * of the appended bytes 1466 */ 1467 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1468 adapter_input = q->enc_out; 1469 1470 dec_req.Zc = dec->z_c; 1471 dec_req.baseGraph = dec->basegraph; 1472 dec_req.nRows = numRows; 1473 dec_req.numChannelLlrs = deRmOutSize; 1474 dec_req.varNodes = derm_req.p_harq; 1475 dec_req.numFillerBits = dec->n_filler; 1476 dec_req.maxIterations = dec->iter_max; 1477 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1478 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1479 dec_resp.varNodes = (int16_t *) q->adapter_output; 1480 dec_resp.compactedMessageBytes = q->enc_out; 1481 1482 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1483 1484 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1485 dec->iter_count); 1486 if (!dec_resp.parityPassedAtTermination) 1487 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1488 1489 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1490 1491 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1492 check_bit(dec->op_flags, 1493 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1494 crc_req.data = adapter_input; 1495 crc_req.len = K - dec->n_filler - 24; 1496 crc_resp.check_passed = false; 1497 crc_resp.data = adapter_input; 1498 if (check_crc_24b) 1499 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1500 else 1501 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1502 if (!crc_resp.check_passed) 1503 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1504 } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 1505 crc_req.data = adapter_input; 1506 crc_req.len = K - dec->n_filler - 16; 1507 crc_resp.check_passed = false; 1508 crc_resp.data = adapter_input; 1509 bblib_lte_crc16_check(&crc_req, &crc_resp); 1510 if (!crc_resp.check_passed) 1511 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1512 } 1513 1514 #ifdef RTE_BBDEV_OFFLOAD_COST 1515 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1516 #endif 1517 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1518 harq_out = NULL; 1519 if (m_harq_out != NULL) { 1520 /* Initialize HARQ data length since we overwrite */ 1521 m_harq_out->data_len = 0; 1522 /* Check there is enough space 1523 * in the HARQ outbound buffer 1524 */ 1525 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1526 m_harq_out, deRmOutSize); 1527 } 1528 if (harq_out == NULL) { 1529 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1530 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1531 return; 1532 } 1533 /* get output data starting address and overwrite the data */ 1534 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1535 harq_out_offset); 1536 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1537 dec->harq_combined_output.length += deRmOutSize; 1538 } 1539 1540 rte_memcpy(out, adapter_input, out_length); 1541 dec->hard_output.length += out_length; 1542 #else 1543 RTE_SET_USED(q); 1544 RTE_SET_USED(op); 1545 RTE_SET_USED(c); 1546 RTE_SET_USED(out_length); 1547 RTE_SET_USED(e); 1548 RTE_SET_USED(m_in); 1549 RTE_SET_USED(m_out_head); 1550 RTE_SET_USED(m_out); 1551 RTE_SET_USED(m_harq_in); 1552 RTE_SET_USED(m_harq_out_head); 1553 RTE_SET_USED(m_harq_out); 1554 RTE_SET_USED(harq_in_offset); 1555 RTE_SET_USED(harq_out_offset); 1556 RTE_SET_USED(in_offset); 1557 RTE_SET_USED(out_offset); 1558 RTE_SET_USED(check_crc_24b); 1559 RTE_SET_USED(crc24_overlap); 1560 RTE_SET_USED(in_length); 1561 RTE_SET_USED(q_stats); 1562 #endif 1563 } 1564 1565 1566 static inline void 1567 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1568 struct rte_bbdev_stats *queue_stats) 1569 { 1570 uint8_t c, r = 0; 1571 uint16_t kw, k = 0; 1572 uint16_t crc24_overlap = 0; 1573 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1574 struct rte_mbuf *m_in = dec->input.data; 1575 struct rte_mbuf *m_out = dec->hard_output.data; 1576 struct rte_mbuf *m_out_head = dec->hard_output.data; 1577 uint16_t in_offset = dec->input.offset; 1578 uint16_t out_offset = dec->hard_output.offset; 1579 uint32_t mbuf_total_left = dec->input.length; 1580 uint16_t seg_total_left; 1581 1582 /* Clear op status */ 1583 op->status = 0; 1584 1585 if (m_in == NULL || m_out == NULL) { 1586 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1587 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1588 return; 1589 } 1590 1591 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1592 c = dec->tb_params.c; 1593 } else { /* For Code Block mode */ 1594 k = dec->cb_params.k; 1595 c = 1; 1596 } 1597 1598 if ((c > 1) && !check_bit(dec->op_flags, 1599 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1600 crc24_overlap = 24; 1601 1602 while (mbuf_total_left > 0) { 1603 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1604 k = (r < dec->tb_params.c_neg) ? 1605 dec->tb_params.k_neg : dec->tb_params.k_pos; 1606 1607 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1608 1609 /* Calculates circular buffer size (Kw). 1610 * According to 3gpp 36.212 section 5.1.4.2 1611 * Kw = 3 * Kpi, 1612 * where: 1613 * Kpi = nCol * nRow 1614 * where nCol is 32 and nRow can be calculated from: 1615 * D =< nCol * nRow 1616 * where D is the size of each output from turbo encoder block 1617 * (k + 4). 1618 */ 1619 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1620 1621 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1622 in_offset, out_offset, check_bit(dec->op_flags, 1623 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1624 seg_total_left, queue_stats); 1625 1626 /* To keep CRC24 attached to end of Code block, use 1627 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1628 * removed by default once verified. 1629 */ 1630 1631 mbuf_total_left -= kw; 1632 1633 /* Update offsets */ 1634 if (seg_total_left == kw) { 1635 /* Go to the next mbuf */ 1636 m_in = m_in->next; 1637 m_out = m_out->next; 1638 in_offset = 0; 1639 out_offset = 0; 1640 } else { 1641 /* Update offsets for next CBs (if exist) */ 1642 in_offset += kw; 1643 out_offset += ((k - crc24_overlap) >> 3); 1644 } 1645 r++; 1646 } 1647 } 1648 1649 static inline void 1650 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1651 struct rte_bbdev_stats *queue_stats) 1652 { 1653 uint8_t c, r = 0; 1654 uint32_t e; 1655 uint16_t out_length, crc24_overlap = 0; 1656 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1657 struct rte_mbuf *m_in = dec->input.data; 1658 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1659 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1660 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1661 struct rte_mbuf *m_out = dec->hard_output.data; 1662 struct rte_mbuf *m_out_head = dec->hard_output.data; 1663 uint16_t in_offset = dec->input.offset; 1664 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1665 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1666 uint16_t out_offset = dec->hard_output.offset; 1667 uint32_t mbuf_total_left = dec->input.length; 1668 uint16_t seg_total_left; 1669 1670 /* Clear op status */ 1671 op->status = 0; 1672 1673 if (m_in == NULL || m_out == NULL) { 1674 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1675 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1676 return; 1677 } 1678 1679 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1680 c = dec->tb_params.c; 1681 e = dec->tb_params.ea; 1682 } else { /* For Code Block mode */ 1683 c = 1; 1684 e = dec->cb_params.e; 1685 } 1686 1687 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1688 crc24_overlap = 24; 1689 1690 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1691 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1692 1693 while (mbuf_total_left > 0) { 1694 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1695 e = (r < dec->tb_params.cab) ? 1696 dec->tb_params.ea : dec->tb_params.eb; 1697 /* Special case handling when overusing mbuf */ 1698 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1699 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1700 else 1701 seg_total_left = e; 1702 1703 process_ldpc_dec_cb(q, op, c, out_length, e, 1704 m_in, m_out_head, m_out, 1705 m_harq_in, m_harq_out_head, m_harq_out, 1706 in_offset, out_offset, harq_in_offset, 1707 harq_out_offset, 1708 check_bit(dec->op_flags, 1709 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1710 crc24_overlap, 1711 seg_total_left, queue_stats); 1712 1713 /* To keep CRC24 attached to end of Code block, use 1714 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1715 * removed by default once verified. 1716 */ 1717 1718 mbuf_total_left -= e; 1719 1720 /* Update offsets */ 1721 if (seg_total_left == e) { 1722 /* Go to the next mbuf */ 1723 m_in = m_in->next; 1724 m_out = m_out->next; 1725 if (m_harq_in != NULL) 1726 m_harq_in = m_harq_in->next; 1727 if (m_harq_out != NULL) 1728 m_harq_out = m_harq_out->next; 1729 in_offset = 0; 1730 out_offset = 0; 1731 harq_in_offset = 0; 1732 harq_out_offset = 0; 1733 } else { 1734 /* Update offsets for next CBs (if exist) */ 1735 in_offset += e; 1736 out_offset += out_length; 1737 } 1738 r++; 1739 } 1740 } 1741 1742 static inline uint16_t 1743 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1744 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1745 { 1746 uint16_t i; 1747 #ifdef RTE_BBDEV_OFFLOAD_COST 1748 queue_stats->acc_offload_cycles = 0; 1749 #endif 1750 1751 for (i = 0; i < nb_ops; ++i) 1752 enqueue_dec_one_op(q, ops[i], queue_stats); 1753 1754 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1755 NULL); 1756 } 1757 1758 static inline uint16_t 1759 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1760 struct rte_bbdev_dec_op **ops, 1761 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1762 { 1763 uint16_t i; 1764 #ifdef RTE_BBDEV_OFFLOAD_COST 1765 queue_stats->acc_offload_cycles = 0; 1766 #endif 1767 1768 for (i = 0; i < nb_ops; ++i) 1769 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1770 1771 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1772 NULL); 1773 } 1774 1775 /* Enqueue burst */ 1776 static uint16_t 1777 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1778 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1779 { 1780 void *queue = q_data->queue_private; 1781 struct turbo_sw_queue *q = queue; 1782 uint16_t nb_enqueued = 0; 1783 1784 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1785 1786 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1787 q_data->queue_stats.enqueued_count += nb_enqueued; 1788 1789 return nb_enqueued; 1790 } 1791 1792 /* Enqueue burst */ 1793 static uint16_t 1794 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1795 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1796 { 1797 void *queue = q_data->queue_private; 1798 struct turbo_sw_queue *q = queue; 1799 uint16_t nb_enqueued = 0; 1800 1801 nb_enqueued = enqueue_ldpc_enc_all_ops( 1802 q, ops, nb_ops, &q_data->queue_stats); 1803 1804 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1805 q_data->queue_stats.enqueued_count += nb_enqueued; 1806 1807 return nb_enqueued; 1808 } 1809 1810 /* Enqueue burst */ 1811 static uint16_t 1812 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1813 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1814 { 1815 void *queue = q_data->queue_private; 1816 struct turbo_sw_queue *q = queue; 1817 uint16_t nb_enqueued = 0; 1818 1819 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1820 1821 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1822 q_data->queue_stats.enqueued_count += nb_enqueued; 1823 1824 return nb_enqueued; 1825 } 1826 1827 /* Enqueue burst */ 1828 static uint16_t 1829 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1830 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1831 { 1832 void *queue = q_data->queue_private; 1833 struct turbo_sw_queue *q = queue; 1834 uint16_t nb_enqueued = 0; 1835 1836 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1837 &q_data->queue_stats); 1838 1839 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1840 q_data->queue_stats.enqueued_count += nb_enqueued; 1841 1842 return nb_enqueued; 1843 } 1844 1845 /* Dequeue decode burst */ 1846 static uint16_t 1847 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1848 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1849 { 1850 struct turbo_sw_queue *q = q_data->queue_private; 1851 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1852 (void **)ops, nb_ops, NULL); 1853 q_data->queue_stats.dequeued_count += nb_dequeued; 1854 1855 return nb_dequeued; 1856 } 1857 1858 /* Dequeue encode burst */ 1859 static uint16_t 1860 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1861 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1862 { 1863 struct turbo_sw_queue *q = q_data->queue_private; 1864 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1865 (void **)ops, nb_ops, NULL); 1866 q_data->queue_stats.dequeued_count += nb_dequeued; 1867 1868 return nb_dequeued; 1869 } 1870 1871 /* Parse 16bit integer from string argument */ 1872 static inline int 1873 parse_u16_arg(const char *key, const char *value, void *extra_args) 1874 { 1875 uint16_t *u16 = extra_args; 1876 unsigned int long result; 1877 1878 if ((value == NULL) || (extra_args == NULL)) 1879 return -EINVAL; 1880 errno = 0; 1881 result = strtoul(value, NULL, 0); 1882 if ((result >= (1 << 16)) || (errno != 0)) { 1883 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1884 return -ERANGE; 1885 } 1886 *u16 = (uint16_t)result; 1887 return 0; 1888 } 1889 1890 /* Parse parameters used to create device */ 1891 static int 1892 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1893 { 1894 struct rte_kvargs *kvlist = NULL; 1895 int ret = 0; 1896 1897 if (params == NULL) 1898 return -EINVAL; 1899 if (input_args) { 1900 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1901 if (kvlist == NULL) 1902 return -EFAULT; 1903 1904 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1905 &parse_u16_arg, ¶ms->queues_num); 1906 if (ret < 0) 1907 goto exit; 1908 1909 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1910 &parse_u16_arg, ¶ms->socket_id); 1911 if (ret < 0) 1912 goto exit; 1913 1914 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1915 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1916 RTE_MAX_NUMA_NODES); 1917 goto exit; 1918 } 1919 } 1920 1921 exit: 1922 rte_kvargs_free(kvlist); 1923 return ret; 1924 } 1925 1926 /* Create device */ 1927 static int 1928 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1929 struct turbo_sw_params *init_params) 1930 { 1931 struct rte_bbdev *bbdev; 1932 const char *name = rte_vdev_device_name(vdev); 1933 1934 bbdev = rte_bbdev_allocate(name); 1935 if (bbdev == NULL) 1936 return -ENODEV; 1937 1938 bbdev->data->dev_private = rte_zmalloc_socket(name, 1939 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1940 init_params->socket_id); 1941 if (bbdev->data->dev_private == NULL) { 1942 rte_bbdev_release(bbdev); 1943 return -ENOMEM; 1944 } 1945 1946 bbdev->dev_ops = &pmd_ops; 1947 bbdev->device = &vdev->device; 1948 bbdev->data->socket_id = init_params->socket_id; 1949 bbdev->intr_handle = NULL; 1950 1951 /* register rx/tx burst functions for data path */ 1952 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1953 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1954 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1955 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1956 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1957 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1958 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1959 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1960 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1961 init_params->queues_num; 1962 1963 return 0; 1964 } 1965 1966 /* Initialise device */ 1967 static int 1968 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1969 { 1970 struct turbo_sw_params init_params = { 1971 rte_socket_id(), 1972 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1973 }; 1974 const char *name; 1975 const char *input_args; 1976 1977 if (vdev == NULL) 1978 return -EINVAL; 1979 1980 name = rte_vdev_device_name(vdev); 1981 if (name == NULL) 1982 return -EINVAL; 1983 input_args = rte_vdev_device_args(vdev); 1984 parse_turbo_sw_params(&init_params, input_args); 1985 1986 rte_bbdev_log_debug( 1987 "Initialising %s on NUMA node %d with max queues: %d\n", 1988 name, init_params.socket_id, init_params.queues_num); 1989 1990 return turbo_sw_bbdev_create(vdev, &init_params); 1991 } 1992 1993 /* Uninitialise device */ 1994 static int 1995 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 1996 { 1997 struct rte_bbdev *bbdev; 1998 const char *name; 1999 2000 if (vdev == NULL) 2001 return -EINVAL; 2002 2003 name = rte_vdev_device_name(vdev); 2004 if (name == NULL) 2005 return -EINVAL; 2006 2007 bbdev = rte_bbdev_get_named_dev(name); 2008 if (bbdev == NULL) 2009 return -EINVAL; 2010 2011 rte_free(bbdev->data->dev_private); 2012 2013 return rte_bbdev_release(bbdev); 2014 } 2015 2016 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 2017 .probe = turbo_sw_bbdev_probe, 2018 .remove = turbo_sw_bbdev_remove 2019 }; 2020 2021 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 2022 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 2023 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 2024 TURBO_SW_SOCKET_ID_ARG"=<int>"); 2025 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 2026