1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <string.h> 6 7 #include <rte_common.h> 8 #include <rte_bus_vdev.h> 9 #include <rte_malloc.h> 10 #include <rte_ring.h> 11 #include <rte_kvargs.h> 12 #include <rte_cycles.h> 13 #include <rte_errno.h> 14 15 #include <rte_bbdev.h> 16 #include <rte_bbdev_pmd.h> 17 18 #include <rte_hexdump.h> 19 #include <rte_log.h> 20 21 #ifdef RTE_BBDEV_SDK_AVX2 22 #include <ipp.h> 23 #include <ipps.h> 24 #include <phy_turbo.h> 25 #include <phy_crc.h> 26 #include <phy_rate_match.h> 27 #endif 28 #ifdef RTE_BBDEV_SDK_AVX512 29 #include <bit_reverse.h> 30 #include <phy_ldpc_encoder_5gnr.h> 31 #include <phy_ldpc_decoder_5gnr.h> 32 #include <phy_LDPC_ratematch_5gnr.h> 33 #include <phy_rate_dematching_5gnr.h> 34 #endif 35 36 #define DRIVER_NAME baseband_turbo_sw 37 38 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE); 39 40 /* Helper macro for logging */ 41 #define rte_bbdev_log(level, fmt, ...) \ 42 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ 43 ##__VA_ARGS__) 44 45 #define rte_bbdev_log_debug(fmt, ...) \ 46 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 47 ##__VA_ARGS__) 48 49 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 50 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 51 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 52 53 /* private data structure */ 54 struct bbdev_private { 55 unsigned int max_nb_queues; /**< Max number of queues */ 56 }; 57 58 /* Initialisation params structure that can be used by Turbo SW driver */ 59 struct turbo_sw_params { 60 int socket_id; /*< Turbo SW device socket */ 61 uint16_t queues_num; /*< Turbo SW device queues number */ 62 }; 63 64 /* Accecptable params for Turbo SW devices */ 65 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 66 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 67 68 static const char * const turbo_sw_valid_params[] = { 69 TURBO_SW_MAX_NB_QUEUES_ARG, 70 TURBO_SW_SOCKET_ID_ARG 71 }; 72 73 /* queue */ 74 struct turbo_sw_queue { 75 /* Ring for processed (encoded/decoded) operations which are ready to 76 * be dequeued. 77 */ 78 struct rte_ring *processed_pkts; 79 /* Stores input for turbo encoder (used when CRC attachment is 80 * performed 81 */ 82 uint8_t *enc_in; 83 /* Stores output from turbo encoder */ 84 uint8_t *enc_out; 85 /* Alpha gamma buf for bblib_turbo_decoder() function */ 86 int8_t *ag; 87 /* Temp buf for bblib_turbo_decoder() function */ 88 uint16_t *code_block; 89 /* Input buf for bblib_rate_dematching_lte() function */ 90 uint8_t *deint_input; 91 /* Output buf for bblib_rate_dematching_lte() function */ 92 uint8_t *deint_output; 93 /* Output buf for bblib_turbodec_adapter_lte() function */ 94 uint8_t *adapter_output; 95 /* Operation type of this queue */ 96 enum rte_bbdev_op_type type; 97 } __rte_cache_aligned; 98 99 100 #ifdef RTE_BBDEV_SDK_AVX2 101 static inline char * 102 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 103 { 104 if (unlikely(len > rte_pktmbuf_tailroom(m))) 105 return NULL; 106 107 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 108 m->data_len = (uint16_t)(m->data_len + len); 109 m_head->pkt_len = (m_head->pkt_len + len); 110 return tail; 111 } 112 113 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 114 static inline int32_t 115 compute_idx(uint16_t k) 116 { 117 int32_t result = 0; 118 119 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 120 return -1; 121 122 if (k > 2048) { 123 if ((k - 2048) % 64 != 0) 124 result = -1; 125 126 result = 124 + (k - 2048) / 64; 127 } else if (k <= 512) { 128 if ((k - 40) % 8 != 0) 129 result = -1; 130 131 result = (k - 40) / 8 + 1; 132 } else if (k <= 1024) { 133 if ((k - 512) % 16 != 0) 134 result = -1; 135 136 result = 60 + (k - 512) / 16; 137 } else { /* 1024 < k <= 2048 */ 138 if ((k - 1024) % 32 != 0) 139 result = -1; 140 141 result = 92 + (k - 1024) / 32; 142 } 143 144 return result; 145 } 146 #endif 147 148 /* Read flag value 0/1 from bitmap */ 149 static inline bool 150 check_bit(uint32_t bitmap, uint32_t bitmask) 151 { 152 return bitmap & bitmask; 153 } 154 155 /* Get device info */ 156 static void 157 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 158 { 159 struct bbdev_private *internals = dev->data->dev_private; 160 161 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 162 #ifdef RTE_BBDEV_SDK_AVX2 163 { 164 .type = RTE_BBDEV_OP_TURBO_DEC, 165 .cap.turbo_dec = { 166 .capability_flags = 167 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 168 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 169 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 170 RTE_BBDEV_TURBO_CRC_TYPE_24B | 171 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 172 RTE_BBDEV_TURBO_EARLY_TERMINATION, 173 .max_llr_modulus = 16, 174 .num_buffers_src = 175 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 176 .num_buffers_hard_out = 177 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 178 .num_buffers_soft_out = 0, 179 } 180 }, 181 { 182 .type = RTE_BBDEV_OP_TURBO_ENC, 183 .cap.turbo_enc = { 184 .capability_flags = 185 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 186 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 187 RTE_BBDEV_TURBO_RATE_MATCH | 188 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 189 .num_buffers_src = 190 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 191 .num_buffers_dst = 192 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 193 } 194 }, 195 #endif 196 #ifdef RTE_BBDEV_SDK_AVX512 197 { 198 .type = RTE_BBDEV_OP_LDPC_ENC, 199 .cap.ldpc_enc = { 200 .capability_flags = 201 RTE_BBDEV_LDPC_RATE_MATCH | 202 RTE_BBDEV_LDPC_CRC_16_ATTACH | 203 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 204 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 205 .num_buffers_src = 206 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 207 .num_buffers_dst = 208 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 209 } 210 }, 211 { 212 .type = RTE_BBDEV_OP_LDPC_DEC, 213 .cap.ldpc_dec = { 214 .capability_flags = 215 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 216 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 217 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 218 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 219 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 220 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 221 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 222 .llr_size = 8, 223 .llr_decimals = 4, 224 .num_buffers_src = 225 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 226 .num_buffers_hard_out = 227 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 228 .num_buffers_soft_out = 0, 229 } 230 }, 231 #endif 232 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 233 }; 234 235 static struct rte_bbdev_queue_conf default_queue_conf = { 236 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 237 }; 238 #ifdef RTE_BBDEV_SDK_AVX2 239 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 240 dev_info->cpu_flag_reqs = &cpu_flag; 241 #else 242 dev_info->cpu_flag_reqs = NULL; 243 #endif 244 default_queue_conf.socket = dev->data->socket_id; 245 246 dev_info->driver_name = RTE_STR(DRIVER_NAME); 247 dev_info->max_num_queues = internals->max_nb_queues; 248 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 249 dev_info->hardware_accelerated = false; 250 dev_info->max_dl_queue_priority = 0; 251 dev_info->max_ul_queue_priority = 0; 252 dev_info->default_queue_conf = default_queue_conf; 253 dev_info->capabilities = bbdev_capabilities; 254 dev_info->min_alignment = 64; 255 dev_info->harq_buffer_size = 0; 256 257 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); 258 } 259 260 /* Release queue */ 261 static int 262 q_release(struct rte_bbdev *dev, uint16_t q_id) 263 { 264 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 265 266 if (q != NULL) { 267 rte_ring_free(q->processed_pkts); 268 rte_free(q->enc_out); 269 rte_free(q->enc_in); 270 rte_free(q->ag); 271 rte_free(q->code_block); 272 rte_free(q->deint_input); 273 rte_free(q->deint_output); 274 rte_free(q->adapter_output); 275 rte_free(q); 276 dev->data->queues[q_id].queue_private = NULL; 277 } 278 279 rte_bbdev_log_debug("released device queue %u:%u", 280 dev->data->dev_id, q_id); 281 return 0; 282 } 283 284 /* Setup a queue */ 285 static int 286 q_setup(struct rte_bbdev *dev, uint16_t q_id, 287 const struct rte_bbdev_queue_conf *queue_conf) 288 { 289 int ret; 290 struct turbo_sw_queue *q; 291 char name[RTE_RING_NAMESIZE]; 292 293 /* Allocate the queue data structure. */ 294 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 295 RTE_CACHE_LINE_SIZE, queue_conf->socket); 296 if (q == NULL) { 297 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 298 return -ENOMEM; 299 } 300 301 /* Allocate memory for encoder output. */ 302 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 303 dev->data->dev_id, q_id); 304 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 305 rte_bbdev_log(ERR, 306 "Creating queue name for device %u queue %u failed", 307 dev->data->dev_id, q_id); 308 ret = -ENAMETOOLONG; 309 goto free_q; 310 } 311 q->enc_out = rte_zmalloc_socket(name, 312 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 313 sizeof(*q->enc_out) * 3, 314 RTE_CACHE_LINE_SIZE, queue_conf->socket); 315 if (q->enc_out == NULL) { 316 rte_bbdev_log(ERR, 317 "Failed to allocate queue memory for %s", name); 318 ret = -ENOMEM; 319 goto free_q; 320 } 321 322 /* Allocate memory for rate matching output. */ 323 ret = snprintf(name, RTE_RING_NAMESIZE, 324 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 325 q_id); 326 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 327 rte_bbdev_log(ERR, 328 "Creating queue name for device %u queue %u failed", 329 dev->data->dev_id, q_id); 330 ret = -ENAMETOOLONG; 331 goto free_q; 332 } 333 q->enc_in = rte_zmalloc_socket(name, 334 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 335 RTE_CACHE_LINE_SIZE, queue_conf->socket); 336 if (q->enc_in == NULL) { 337 rte_bbdev_log(ERR, 338 "Failed to allocate queue memory for %s", name); 339 ret = -ENOMEM; 340 goto free_q; 341 } 342 343 /* Allocate memory for Alpha Gamma temp buffer. */ 344 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 345 dev->data->dev_id, q_id); 346 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 347 rte_bbdev_log(ERR, 348 "Creating queue name for device %u queue %u failed", 349 dev->data->dev_id, q_id); 350 ret = -ENAMETOOLONG; 351 goto free_q; 352 } 353 q->ag = rte_zmalloc_socket(name, 354 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 355 RTE_CACHE_LINE_SIZE, queue_conf->socket); 356 if (q->ag == NULL) { 357 rte_bbdev_log(ERR, 358 "Failed to allocate queue memory for %s", name); 359 ret = -ENOMEM; 360 goto free_q; 361 } 362 363 /* Allocate memory for code block temp buffer. */ 364 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 365 dev->data->dev_id, q_id); 366 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 367 rte_bbdev_log(ERR, 368 "Creating queue name for device %u queue %u failed", 369 dev->data->dev_id, q_id); 370 ret = -ENAMETOOLONG; 371 goto free_q; 372 } 373 q->code_block = rte_zmalloc_socket(name, 374 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 375 RTE_CACHE_LINE_SIZE, queue_conf->socket); 376 if (q->code_block == NULL) { 377 rte_bbdev_log(ERR, 378 "Failed to allocate queue memory for %s", name); 379 ret = -ENOMEM; 380 goto free_q; 381 } 382 383 /* Allocate memory for Deinterleaver input. */ 384 ret = snprintf(name, RTE_RING_NAMESIZE, 385 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 386 dev->data->dev_id, q_id); 387 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 388 rte_bbdev_log(ERR, 389 "Creating queue name for device %u queue %u failed", 390 dev->data->dev_id, q_id); 391 ret = -ENAMETOOLONG; 392 goto free_q; 393 } 394 q->deint_input = rte_zmalloc_socket(name, 395 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 396 RTE_CACHE_LINE_SIZE, queue_conf->socket); 397 if (q->deint_input == NULL) { 398 rte_bbdev_log(ERR, 399 "Failed to allocate queue memory for %s", name); 400 ret = -ENOMEM; 401 goto free_q; 402 } 403 404 /* Allocate memory for Deinterleaver output. */ 405 ret = snprintf(name, RTE_RING_NAMESIZE, 406 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 407 dev->data->dev_id, q_id); 408 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 409 rte_bbdev_log(ERR, 410 "Creating queue name for device %u queue %u failed", 411 dev->data->dev_id, q_id); 412 ret = -ENAMETOOLONG; 413 goto free_q; 414 } 415 q->deint_output = rte_zmalloc_socket(NULL, 416 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 417 RTE_CACHE_LINE_SIZE, queue_conf->socket); 418 if (q->deint_output == NULL) { 419 rte_bbdev_log(ERR, 420 "Failed to allocate queue memory for %s", name); 421 ret = -ENOMEM; 422 goto free_q; 423 } 424 425 /* Allocate memory for Adapter output. */ 426 ret = snprintf(name, RTE_RING_NAMESIZE, 427 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 428 dev->data->dev_id, q_id); 429 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 430 rte_bbdev_log(ERR, 431 "Creating queue name for device %u queue %u failed", 432 dev->data->dev_id, q_id); 433 ret = -ENAMETOOLONG; 434 goto free_q; 435 } 436 q->adapter_output = rte_zmalloc_socket(NULL, 437 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 438 RTE_CACHE_LINE_SIZE, queue_conf->socket); 439 if (q->adapter_output == NULL) { 440 rte_bbdev_log(ERR, 441 "Failed to allocate queue memory for %s", name); 442 ret = -ENOMEM; 443 goto free_q; 444 } 445 446 /* Create ring for packets awaiting to be dequeued. */ 447 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 448 dev->data->dev_id, q_id); 449 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 450 rte_bbdev_log(ERR, 451 "Creating queue name for device %u queue %u failed", 452 dev->data->dev_id, q_id); 453 ret = -ENAMETOOLONG; 454 goto free_q; 455 } 456 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 457 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 458 if (q->processed_pkts == NULL) { 459 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 460 ret = -rte_errno; 461 goto free_q; 462 } 463 464 q->type = queue_conf->op_type; 465 466 dev->data->queues[q_id].queue_private = q; 467 rte_bbdev_log_debug("setup device queue %s", name); 468 return 0; 469 470 free_q: 471 rte_ring_free(q->processed_pkts); 472 rte_free(q->enc_out); 473 rte_free(q->enc_in); 474 rte_free(q->ag); 475 rte_free(q->code_block); 476 rte_free(q->deint_input); 477 rte_free(q->deint_output); 478 rte_free(q->adapter_output); 479 rte_free(q); 480 return ret; 481 } 482 483 static const struct rte_bbdev_ops pmd_ops = { 484 .info_get = info_get, 485 .queue_setup = q_setup, 486 .queue_release = q_release 487 }; 488 489 #ifdef RTE_BBDEV_SDK_AVX2 490 #ifdef RTE_LIBRTE_BBDEV_DEBUG 491 /* Checks if the encoder input buffer is correct. 492 * Returns 0 if it's valid, -1 otherwise. 493 */ 494 static inline int 495 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 496 const uint16_t in_length) 497 { 498 if (k_idx < 0) { 499 rte_bbdev_log(ERR, "K Index is invalid"); 500 return -1; 501 } 502 503 if (in_length - (k >> 3) < 0) { 504 rte_bbdev_log(ERR, 505 "Mismatch between input length (%u bytes) and K (%u bits)", 506 in_length, k); 507 return -1; 508 } 509 510 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 511 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 512 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 513 return -1; 514 } 515 516 return 0; 517 } 518 519 /* Checks if the decoder input buffer is correct. 520 * Returns 0 if it's valid, -1 otherwise. 521 */ 522 static inline int 523 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 524 { 525 if (k_idx < 0) { 526 rte_bbdev_log(ERR, "K index is invalid"); 527 return -1; 528 } 529 530 if (in_length < kw) { 531 rte_bbdev_log(ERR, 532 "Mismatch between input length (%u) and kw (%u)", 533 in_length, kw); 534 return -1; 535 } 536 537 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 538 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 539 kw, RTE_BBDEV_TURBO_MAX_KW); 540 return -1; 541 } 542 543 return 0; 544 } 545 #endif 546 #endif 547 548 static inline void 549 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 550 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 551 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 552 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 553 uint16_t in_length, struct rte_bbdev_stats *q_stats) 554 { 555 #ifdef RTE_BBDEV_SDK_AVX2 556 #ifdef RTE_LIBRTE_BBDEV_DEBUG 557 int ret; 558 #else 559 RTE_SET_USED(in_length); 560 #endif 561 int16_t k_idx; 562 uint16_t m; 563 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 564 uint64_t first_3_bytes = 0; 565 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 566 struct bblib_crc_request crc_req; 567 struct bblib_crc_response crc_resp; 568 struct bblib_turbo_encoder_request turbo_req; 569 struct bblib_turbo_encoder_response turbo_resp; 570 struct bblib_rate_match_dl_request rm_req; 571 struct bblib_rate_match_dl_response rm_resp; 572 #ifdef RTE_BBDEV_OFFLOAD_COST 573 uint64_t start_time; 574 #else 575 RTE_SET_USED(q_stats); 576 #endif 577 578 k_idx = compute_idx(k); 579 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 580 581 /* CRC24A (for TB) */ 582 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 583 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) { 584 #ifdef RTE_LIBRTE_BBDEV_DEBUG 585 ret = is_enc_input_valid(k - 24, k_idx, in_length); 586 if (ret != 0) { 587 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 588 return; 589 } 590 #endif 591 592 crc_req.data = in; 593 crc_req.len = k - 24; 594 /* Check if there is a room for CRC bits if not use 595 * the temporary buffer. 596 */ 597 if (mbuf_append(m_in, m_in, 3) == NULL) { 598 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 599 in = q->enc_in; 600 } else { 601 /* Store 3 first bytes of next CB as they will be 602 * overwritten by CRC bytes. If it is the last CB then 603 * there is no point to store 3 next bytes and this 604 * if..else branch will be omitted. 605 */ 606 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 607 } 608 609 crc_resp.data = in; 610 #ifdef RTE_BBDEV_OFFLOAD_COST 611 start_time = rte_rdtsc_precise(); 612 #endif 613 /* CRC24A generation */ 614 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 615 #ifdef RTE_BBDEV_OFFLOAD_COST 616 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 617 #endif 618 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 619 /* CRC24B */ 620 #ifdef RTE_LIBRTE_BBDEV_DEBUG 621 ret = is_enc_input_valid(k - 24, k_idx, in_length); 622 if (ret != 0) { 623 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 624 return; 625 } 626 #endif 627 628 crc_req.data = in; 629 crc_req.len = k - 24; 630 /* Check if there is a room for CRC bits if this is the last 631 * CB in TB. If not use temporary buffer. 632 */ 633 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 634 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 635 in = q->enc_in; 636 } else if (c - r > 1) { 637 /* Store 3 first bytes of next CB as they will be 638 * overwritten by CRC bytes. If it is the last CB then 639 * there is no point to store 3 next bytes and this 640 * if..else branch will be omitted. 641 */ 642 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 643 } 644 645 crc_resp.data = in; 646 #ifdef RTE_BBDEV_OFFLOAD_COST 647 start_time = rte_rdtsc_precise(); 648 #endif 649 /* CRC24B generation */ 650 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 651 #ifdef RTE_BBDEV_OFFLOAD_COST 652 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 653 #endif 654 } 655 #ifdef RTE_LIBRTE_BBDEV_DEBUG 656 else { 657 ret = is_enc_input_valid(k, k_idx, in_length); 658 if (ret != 0) { 659 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 660 return; 661 } 662 } 663 #endif 664 665 /* Turbo encoder */ 666 667 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 668 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 669 * So dst_data's length should be 3*(k/8) + 3 bytes. 670 * In Rate-matching bypass case outputs pointers passed to encoder 671 * (out0, out1 and out2) can directly point to addresses of output from 672 * turbo_enc entity. 673 */ 674 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 675 out0 = q->enc_out; 676 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 677 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 678 } else { 679 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 680 (k >> 3) * 3 + 2); 681 if (out0 == NULL) { 682 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 683 rte_bbdev_log(ERR, 684 "Too little space in output mbuf"); 685 return; 686 } 687 enc->output.length += (k >> 3) * 3 + 2; 688 /* rte_bbdev_op_data.offset can be different than the 689 * offset of the appended bytes 690 */ 691 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 692 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 693 out_offset + (k >> 3) + 1); 694 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 695 out_offset + 2 * ((k >> 3) + 1)); 696 } 697 698 turbo_req.case_id = k_idx; 699 turbo_req.input_win = in; 700 turbo_req.length = k >> 3; 701 turbo_resp.output_win_0 = out0; 702 turbo_resp.output_win_1 = out1; 703 turbo_resp.output_win_2 = out2; 704 705 #ifdef RTE_BBDEV_OFFLOAD_COST 706 start_time = rte_rdtsc_precise(); 707 #endif 708 /* Turbo encoding */ 709 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 710 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 711 rte_bbdev_log(ERR, "Turbo Encoder failed"); 712 return; 713 } 714 #ifdef RTE_BBDEV_OFFLOAD_COST 715 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 716 #endif 717 718 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 719 if (first_3_bytes != 0) 720 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 721 722 /* Rate-matching */ 723 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 724 uint8_t mask_id; 725 /* Integer round up division by 8 */ 726 uint16_t out_len = (e + 7) >> 3; 727 /* The mask array is indexed using E%8. E is an even number so 728 * there are only 4 possible values. 729 */ 730 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 731 732 /* get output data starting address */ 733 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 734 if (rm_out == NULL) { 735 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 736 rte_bbdev_log(ERR, 737 "Too little space in output mbuf"); 738 return; 739 } 740 /* rte_bbdev_op_data.offset can be different than the offset 741 * of the appended bytes 742 */ 743 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 744 745 /* index of current code block */ 746 rm_req.r = r; 747 /* total number of code block */ 748 rm_req.C = c; 749 /* For DL - 1, UL - 0 */ 750 rm_req.direction = 1; 751 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 752 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 753 * known we can adjust those parameters 754 */ 755 rm_req.Nsoft = ncb * rm_req.C; 756 rm_req.KMIMO = 1; 757 rm_req.MDL_HARQ = 1; 758 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 759 * are used for E calculation. As E is already known we can 760 * adjust those parameters 761 */ 762 rm_req.NL = e; 763 rm_req.Qm = 1; 764 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 765 766 rm_req.rvidx = enc->rv_index; 767 rm_req.Kidx = k_idx - 1; 768 rm_req.nLen = k + 4; 769 rm_req.tin0 = out0; 770 rm_req.tin1 = out1; 771 rm_req.tin2 = out2; 772 rm_resp.output = rm_out; 773 rm_resp.OutputLen = out_len; 774 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 775 rm_req.bypass_rvidx = 1; 776 else 777 rm_req.bypass_rvidx = 0; 778 779 #ifdef RTE_BBDEV_OFFLOAD_COST 780 start_time = rte_rdtsc_precise(); 781 #endif 782 /* Rate-Matching */ 783 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 784 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 785 rte_bbdev_log(ERR, "Rate matching failed"); 786 return; 787 } 788 #ifdef RTE_BBDEV_OFFLOAD_COST 789 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 790 #endif 791 792 /* SW fills an entire last byte even if E%8 != 0. Clear the 793 * superfluous data bits for consistency with HW device. 794 */ 795 mask_id = (e & 7) >> 1; 796 rm_out[out_len - 1] &= mask_out[mask_id]; 797 enc->output.length += rm_resp.OutputLen; 798 } else { 799 /* Rate matching is bypassed */ 800 801 /* Completing last byte of out0 (where 4 tail bits are stored) 802 * by moving first 4 bits from out1 803 */ 804 tmp_out = (uint8_t *) --out1; 805 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 806 tmp_out++; 807 /* Shifting out1 data by 4 bits to the left */ 808 for (m = 0; m < k >> 3; ++m) { 809 uint8_t *first = tmp_out; 810 uint8_t second = *(tmp_out + 1); 811 *first = (*first << 4) | ((second & 0xF0) >> 4); 812 tmp_out++; 813 } 814 /* Shifting out2 data by 8 bits to the left */ 815 for (m = 0; m < (k >> 3) + 1; ++m) { 816 *tmp_out = *(tmp_out + 1); 817 tmp_out++; 818 } 819 *tmp_out = 0; 820 } 821 #else 822 RTE_SET_USED(q); 823 RTE_SET_USED(op); 824 RTE_SET_USED(r); 825 RTE_SET_USED(c); 826 RTE_SET_USED(k); 827 RTE_SET_USED(ncb); 828 RTE_SET_USED(e); 829 RTE_SET_USED(m_in); 830 RTE_SET_USED(m_out_head); 831 RTE_SET_USED(m_out); 832 RTE_SET_USED(in_offset); 833 RTE_SET_USED(out_offset); 834 RTE_SET_USED(in_length); 835 RTE_SET_USED(q_stats); 836 #endif 837 } 838 839 840 static inline void 841 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 842 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 843 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 844 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 845 { 846 #ifdef RTE_BBDEV_SDK_AVX512 847 RTE_SET_USED(seg_total_left); 848 uint8_t *in, *rm_out; 849 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 850 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 851 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 852 struct bblib_LDPC_ratematch_5gnr_request rm_req; 853 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 854 struct bblib_crc_request crc_req; 855 struct bblib_crc_response crc_resp; 856 uint16_t msgLen, puntBits, parity_offset, out_len; 857 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 858 uint16_t in_length_in_bits = K - enc->n_filler; 859 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 860 861 #ifdef RTE_BBDEV_OFFLOAD_COST 862 uint64_t start_time = rte_rdtsc_precise(); 863 #else 864 RTE_SET_USED(q_stats); 865 #endif 866 867 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 868 869 /* Masking the Filler bits explicitly */ 870 memset(q->enc_in + (in_length_in_bytes - 3), 0, 871 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 872 /* CRC Generation */ 873 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 874 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 875 crc_req.data = in; 876 crc_req.len = in_length_in_bits - 24; 877 crc_resp.data = q->enc_in; 878 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 879 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 880 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 881 crc_req.data = in; 882 crc_req.len = in_length_in_bits - 24; 883 crc_resp.data = q->enc_in; 884 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 885 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) { 886 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2); 887 crc_req.data = in; 888 crc_req.len = in_length_in_bits - 16; 889 crc_resp.data = q->enc_in; 890 bblib_lte_crc16_gen(&crc_req, &crc_resp); 891 } else 892 rte_memcpy(q->enc_in, in, in_length_in_bytes); 893 894 /* LDPC Encoding */ 895 ldpc_req.Zc = enc->z_c; 896 ldpc_req.baseGraph = enc->basegraph; 897 /* Number of rows set to maximum */ 898 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 899 ldpc_req.numberCodeblocks = 1; 900 ldpc_req.input[0] = (int8_t *) q->enc_in; 901 ldpc_resp.output[0] = (int8_t *) q->enc_out; 902 903 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 904 905 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 906 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 907 rte_bbdev_log(ERR, "LDPC Encoder failed"); 908 return; 909 } 910 911 /* 912 * Systematic + Parity : Recreating stream with filler bits, ideally 913 * the bit select could handle this in the RM SDK 914 */ 915 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 916 puntBits = 2 * ldpc_req.Zc; 917 parity_offset = msgLen - puntBits; 918 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 919 puntBits%8, q->adapter_output, 0, parity_offset); 920 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 921 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 922 923 out_len = (e + 7) >> 3; 924 /* get output data starting address */ 925 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 926 if (rm_out == NULL) { 927 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 928 rte_bbdev_log(ERR, 929 "Too little space in output mbuf"); 930 return; 931 } 932 /* 933 * rte_bbdev_op_data.offset can be different than the offset 934 * of the appended bytes 935 */ 936 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 937 938 /* Rate-Matching */ 939 rm_req.E = e; 940 rm_req.Ncb = enc->n_cb; 941 rm_req.Qm = enc->q_m; 942 rm_req.Zc = enc->z_c; 943 rm_req.baseGraph = enc->basegraph; 944 rm_req.input = q->adapter_output; 945 rm_req.nLen = enc->n_filler; 946 rm_req.nullIndex = parity_offset - enc->n_filler; 947 rm_req.rvidx = enc->rv_index; 948 rm_resp.output = q->deint_output; 949 950 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 951 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 952 rte_bbdev_log(ERR, "Rate matching failed"); 953 return; 954 } 955 956 /* RM SDK may provide non zero bits on last byte */ 957 if ((e % 8) != 0) 958 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 959 960 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 961 962 rte_memcpy(rm_out, q->deint_output, out_len); 963 enc->output.length += out_len; 964 965 #ifdef RTE_BBDEV_OFFLOAD_COST 966 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 967 #endif 968 #else 969 RTE_SET_USED(q); 970 RTE_SET_USED(op); 971 RTE_SET_USED(e); 972 RTE_SET_USED(m_in); 973 RTE_SET_USED(m_out_head); 974 RTE_SET_USED(m_out); 975 RTE_SET_USED(in_offset); 976 RTE_SET_USED(out_offset); 977 RTE_SET_USED(seg_total_left); 978 RTE_SET_USED(q_stats); 979 #endif 980 } 981 982 static inline void 983 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 984 struct rte_bbdev_stats *queue_stats) 985 { 986 uint8_t c, r, crc24_bits = 0; 987 uint16_t k, ncb; 988 uint32_t e; 989 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 990 uint16_t in_offset = enc->input.offset; 991 uint16_t out_offset = enc->output.offset; 992 struct rte_mbuf *m_in = enc->input.data; 993 struct rte_mbuf *m_out = enc->output.data; 994 struct rte_mbuf *m_out_head = enc->output.data; 995 uint32_t in_length, mbuf_total_left = enc->input.length; 996 uint16_t seg_total_left; 997 998 /* Clear op status */ 999 op->status = 0; 1000 1001 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1002 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1003 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1004 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1005 return; 1006 } 1007 1008 if (m_in == NULL || m_out == NULL) { 1009 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1010 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1011 return; 1012 } 1013 1014 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1015 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1016 crc24_bits = 24; 1017 1018 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1019 c = enc->tb_params.c; 1020 r = enc->tb_params.r; 1021 } else {/* For Code Block mode */ 1022 c = 1; 1023 r = 0; 1024 } 1025 1026 while (mbuf_total_left > 0 && r < c) { 1027 1028 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1029 1030 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1031 k = (r < enc->tb_params.c_neg) ? 1032 enc->tb_params.k_neg : enc->tb_params.k_pos; 1033 ncb = (r < enc->tb_params.c_neg) ? 1034 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1035 e = (r < enc->tb_params.cab) ? 1036 enc->tb_params.ea : enc->tb_params.eb; 1037 } else { 1038 k = enc->cb_params.k; 1039 ncb = enc->cb_params.ncb; 1040 e = enc->cb_params.e; 1041 } 1042 1043 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1044 m_out, in_offset, out_offset, seg_total_left, 1045 queue_stats); 1046 /* Update total_left */ 1047 in_length = ((k - crc24_bits) >> 3); 1048 mbuf_total_left -= in_length; 1049 /* Update offsets for next CBs (if exist) */ 1050 in_offset += (k - crc24_bits) >> 3; 1051 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1052 out_offset += e >> 3; 1053 else 1054 out_offset += (k >> 3) * 3 + 2; 1055 1056 /* Update offsets */ 1057 if (seg_total_left == in_length) { 1058 /* Go to the next mbuf */ 1059 m_in = m_in->next; 1060 m_out = m_out->next; 1061 in_offset = 0; 1062 out_offset = 0; 1063 } 1064 r++; 1065 } 1066 1067 /* check if all input data was processed */ 1068 if (mbuf_total_left != 0) { 1069 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1070 rte_bbdev_log(ERR, 1071 "Mismatch between mbuf length and included CBs sizes"); 1072 } 1073 } 1074 1075 1076 static inline void 1077 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1078 struct rte_bbdev_stats *queue_stats) 1079 { 1080 uint8_t c, r, crc24_bits = 0; 1081 uint32_t e; 1082 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1083 uint16_t in_offset = enc->input.offset; 1084 uint16_t out_offset = enc->output.offset; 1085 struct rte_mbuf *m_in = enc->input.data; 1086 struct rte_mbuf *m_out = enc->output.data; 1087 struct rte_mbuf *m_out_head = enc->output.data; 1088 uint32_t in_length, mbuf_total_left = enc->input.length; 1089 1090 uint16_t seg_total_left; 1091 1092 /* Clear op status */ 1093 op->status = 0; 1094 1095 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1096 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1097 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1098 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1099 return; 1100 } 1101 1102 if (m_in == NULL || m_out == NULL) { 1103 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1104 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1105 return; 1106 } 1107 1108 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1109 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1110 crc24_bits = 24; 1111 1112 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1113 c = enc->tb_params.c; 1114 r = enc->tb_params.r; 1115 } else { /* For Code Block mode */ 1116 c = 1; 1117 r = 0; 1118 } 1119 1120 while (mbuf_total_left > 0 && r < c) { 1121 1122 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1123 1124 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1125 e = (r < enc->tb_params.cab) ? 1126 enc->tb_params.ea : enc->tb_params.eb; 1127 } else { 1128 e = enc->cb_params.e; 1129 } 1130 1131 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1132 m_out, in_offset, out_offset, seg_total_left, 1133 queue_stats); 1134 /* Update total_left */ 1135 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1136 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1137 mbuf_total_left -= in_length; 1138 /* Update offsets for next CBs (if exist) */ 1139 in_offset += in_length; 1140 out_offset += (e + 7) >> 3; 1141 1142 /* Update offsets */ 1143 if (seg_total_left == in_length) { 1144 /* Go to the next mbuf */ 1145 m_in = m_in->next; 1146 m_out = m_out->next; 1147 in_offset = 0; 1148 out_offset = 0; 1149 } 1150 r++; 1151 } 1152 1153 /* check if all input data was processed */ 1154 if (mbuf_total_left != 0) { 1155 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1156 rte_bbdev_log(ERR, 1157 "Mismatch between mbuf length and included CBs sizes %d", 1158 mbuf_total_left); 1159 } 1160 } 1161 1162 static inline uint16_t 1163 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1164 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1165 { 1166 uint16_t i; 1167 #ifdef RTE_BBDEV_OFFLOAD_COST 1168 queue_stats->acc_offload_cycles = 0; 1169 #endif 1170 1171 for (i = 0; i < nb_ops; ++i) 1172 enqueue_enc_one_op(q, ops[i], queue_stats); 1173 1174 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1175 NULL); 1176 } 1177 1178 static inline uint16_t 1179 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1180 struct rte_bbdev_enc_op **ops, 1181 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1182 { 1183 uint16_t i; 1184 #ifdef RTE_BBDEV_OFFLOAD_COST 1185 queue_stats->acc_offload_cycles = 0; 1186 #endif 1187 1188 for (i = 0; i < nb_ops; ++i) 1189 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1190 1191 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1192 NULL); 1193 } 1194 1195 #ifdef RTE_BBDEV_SDK_AVX2 1196 static inline void 1197 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1198 uint16_t ncb) 1199 { 1200 uint16_t d = k + 4; 1201 uint16_t kpi = ncb / 3; 1202 uint16_t nd = kpi - d; 1203 1204 rte_memcpy(&out[nd], in, d); 1205 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1206 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1207 } 1208 #endif 1209 1210 static inline void 1211 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1212 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1213 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1214 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1215 uint16_t crc24_overlap, uint16_t in_length, 1216 struct rte_bbdev_stats *q_stats) 1217 { 1218 #ifdef RTE_BBDEV_SDK_AVX2 1219 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1220 int ret; 1221 #else 1222 RTE_SET_USED(in_length); 1223 #endif 1224 int32_t k_idx; 1225 int32_t iter_cnt; 1226 uint8_t *in, *out, *adapter_input; 1227 int32_t ncb, ncb_without_null; 1228 struct bblib_turbo_adapter_ul_response adapter_resp; 1229 struct bblib_turbo_adapter_ul_request adapter_req; 1230 struct bblib_turbo_decoder_request turbo_req; 1231 struct bblib_turbo_decoder_response turbo_resp; 1232 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1233 #ifdef RTE_BBDEV_OFFLOAD_COST 1234 uint64_t start_time; 1235 #else 1236 RTE_SET_USED(q_stats); 1237 #endif 1238 1239 k_idx = compute_idx(k); 1240 1241 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1242 ret = is_dec_input_valid(k_idx, kw, in_length); 1243 if (ret != 0) { 1244 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1245 return; 1246 } 1247 #endif 1248 1249 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1250 ncb = kw; 1251 ncb_without_null = (k + 4) * 3; 1252 1253 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1254 struct bblib_deinterleave_ul_request deint_req; 1255 struct bblib_deinterleave_ul_response deint_resp; 1256 1257 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1258 deint_req.pharqbuffer = in; 1259 deint_req.ncb = ncb; 1260 deint_resp.pinteleavebuffer = q->deint_output; 1261 1262 #ifdef RTE_BBDEV_OFFLOAD_COST 1263 start_time = rte_rdtsc_precise(); 1264 #endif 1265 /* Sub-block De-Interleaving */ 1266 bblib_deinterleave_ul(&deint_req, &deint_resp); 1267 #ifdef RTE_BBDEV_OFFLOAD_COST 1268 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1269 #endif 1270 } else 1271 move_padding_bytes(in, q->deint_output, k, ncb); 1272 1273 adapter_input = q->deint_output; 1274 1275 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1276 adapter_req.isinverted = 1; 1277 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1278 adapter_req.isinverted = 0; 1279 else { 1280 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1281 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1282 return; 1283 } 1284 1285 adapter_req.ncb = ncb_without_null; 1286 adapter_req.pinteleavebuffer = adapter_input; 1287 adapter_resp.pharqout = q->adapter_output; 1288 1289 #ifdef RTE_BBDEV_OFFLOAD_COST 1290 start_time = rte_rdtsc_precise(); 1291 #endif 1292 /* Turbo decode adaptation */ 1293 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1294 #ifdef RTE_BBDEV_OFFLOAD_COST 1295 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1296 #endif 1297 1298 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1299 ((k - crc24_overlap) >> 3)); 1300 if (out == NULL) { 1301 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1302 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1303 return; 1304 } 1305 /* rte_bbdev_op_data.offset can be different than the offset of the 1306 * appended bytes 1307 */ 1308 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1309 if (check_crc_24b) 1310 turbo_req.c = c + 1; 1311 else 1312 turbo_req.c = c; 1313 turbo_req.input = (int8_t *)q->adapter_output; 1314 turbo_req.k = k; 1315 turbo_req.k_idx = k_idx; 1316 turbo_req.max_iter_num = dec->iter_max; 1317 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1318 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1319 turbo_resp.ag_buf = q->ag; 1320 turbo_resp.cb_buf = q->code_block; 1321 turbo_resp.output = out; 1322 1323 #ifdef RTE_BBDEV_OFFLOAD_COST 1324 start_time = rte_rdtsc_precise(); 1325 #endif 1326 /* Turbo decode */ 1327 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1328 #ifdef RTE_BBDEV_OFFLOAD_COST 1329 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1330 #endif 1331 dec->hard_output.length += (k >> 3); 1332 1333 if (iter_cnt > 0) { 1334 /* Temporary solution for returned iter_count from SDK */ 1335 iter_cnt = (iter_cnt - 1) >> 1; 1336 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1337 } else { 1338 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1339 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1340 return; 1341 } 1342 #else 1343 RTE_SET_USED(q); 1344 RTE_SET_USED(op); 1345 RTE_SET_USED(c); 1346 RTE_SET_USED(k); 1347 RTE_SET_USED(kw); 1348 RTE_SET_USED(m_in); 1349 RTE_SET_USED(m_out_head); 1350 RTE_SET_USED(m_out); 1351 RTE_SET_USED(in_offset); 1352 RTE_SET_USED(out_offset); 1353 RTE_SET_USED(check_crc_24b); 1354 RTE_SET_USED(crc24_overlap); 1355 RTE_SET_USED(in_length); 1356 RTE_SET_USED(q_stats); 1357 #endif 1358 } 1359 1360 static inline void 1361 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1362 uint8_t c, uint16_t out_length, uint32_t e, 1363 struct rte_mbuf *m_in, 1364 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1365 struct rte_mbuf *m_harq_in, 1366 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1367 uint16_t in_offset, uint16_t out_offset, 1368 uint16_t harq_in_offset, uint16_t harq_out_offset, 1369 bool check_crc_24b, 1370 uint16_t crc24_overlap, uint16_t in_length, 1371 struct rte_bbdev_stats *q_stats) 1372 { 1373 #ifdef RTE_BBDEV_SDK_AVX512 1374 RTE_SET_USED(in_length); 1375 RTE_SET_USED(c); 1376 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1377 struct bblib_rate_dematching_5gnr_request derm_req; 1378 struct bblib_rate_dematching_5gnr_response derm_resp; 1379 struct bblib_ldpc_decoder_5gnr_request dec_req; 1380 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1381 struct bblib_crc_request crc_req; 1382 struct bblib_crc_response crc_resp; 1383 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1384 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1385 int16_t deRmOutSize, numRows; 1386 1387 /* Compute some LDPC BG lengths */ 1388 outLenWithCrc = out_length + (crc24_overlap >> 3); 1389 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1390 K = sys_cols * dec->z_c; 1391 parity_offset = K - 2 * dec->z_c; 1392 1393 #ifdef RTE_BBDEV_OFFLOAD_COST 1394 uint64_t start_time = rte_rdtsc_precise(); 1395 #else 1396 RTE_SET_USED(q_stats); 1397 #endif 1398 1399 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1400 1401 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1402 /** 1403 * Single contiguous block from the first LLR of the 1404 * circular buffer. 1405 */ 1406 harq_in = NULL; 1407 if (m_harq_in != NULL) 1408 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1409 uint8_t *, harq_in_offset); 1410 if (harq_in == NULL) { 1411 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1412 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1413 return; 1414 } 1415 uint16_t harq_in_length = RTE_MIN( 1416 dec->harq_combined_input.length, 1417 (uint32_t) dec->n_cb); 1418 memset(q->ag + harq_in_length, 0, 1419 dec->n_cb - harq_in_length); 1420 rte_memcpy(q->ag, harq_in, harq_in_length); 1421 } 1422 1423 derm_req.p_in = (int8_t *) in; 1424 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1425 derm_req.base_graph = dec->basegraph; 1426 derm_req.zc = dec->z_c; 1427 derm_req.ncb = dec->n_cb; 1428 derm_req.e = e; 1429 derm_req.k0 = 0; /* Actual output from SDK */ 1430 derm_req.isretx = check_bit(dec->op_flags, 1431 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1432 derm_req.rvid = dec->rv_index; 1433 derm_req.modulation_order = dec->q_m; 1434 derm_req.start_null_index = parity_offset - dec->n_filler; 1435 derm_req.num_of_null = dec->n_filler; 1436 1437 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1438 1439 /* Compute RM out size and number of rows */ 1440 deRmOutSize = RTE_MIN( 1441 derm_req.k0 + derm_req.e - 1442 ((derm_req.k0 < derm_req.start_null_index) ? 1443 0 : dec->n_filler), 1444 dec->n_cb - dec->n_filler); 1445 if (m_harq_in != NULL) 1446 deRmOutSize = RTE_MAX(deRmOutSize, 1447 RTE_MIN(dec->n_cb - dec->n_filler, 1448 m_harq_in->data_len)); 1449 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1450 - sys_cols + 2; 1451 numRows = RTE_MAX(4, numRows); 1452 1453 /* get output data starting address */ 1454 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1455 if (out == NULL) { 1456 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1457 rte_bbdev_log(ERR, 1458 "Too little space in LDPC decoder output mbuf"); 1459 return; 1460 } 1461 1462 /* rte_bbdev_op_data.offset can be different than the offset 1463 * of the appended bytes 1464 */ 1465 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1466 adapter_input = q->enc_out; 1467 1468 dec_req.Zc = dec->z_c; 1469 dec_req.baseGraph = dec->basegraph; 1470 dec_req.nRows = numRows; 1471 dec_req.numChannelLlrs = deRmOutSize; 1472 dec_req.varNodes = derm_req.p_harq; 1473 dec_req.numFillerBits = dec->n_filler; 1474 dec_req.maxIterations = dec->iter_max; 1475 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1476 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1477 dec_resp.varNodes = (int16_t *) q->adapter_output; 1478 dec_resp.compactedMessageBytes = q->enc_out; 1479 1480 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1481 1482 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1483 dec->iter_count); 1484 if (!dec_resp.parityPassedAtTermination) 1485 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1486 1487 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1488 1489 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1490 check_bit(dec->op_flags, 1491 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1492 crc_req.data = adapter_input; 1493 crc_req.len = K - dec->n_filler - 24; 1494 crc_resp.check_passed = false; 1495 crc_resp.data = adapter_input; 1496 if (check_crc_24b) 1497 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1498 else 1499 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1500 if (!crc_resp.check_passed) 1501 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1502 } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 1503 crc_req.data = adapter_input; 1504 crc_req.len = K - dec->n_filler - 16; 1505 crc_resp.check_passed = false; 1506 crc_resp.data = adapter_input; 1507 bblib_lte_crc16_check(&crc_req, &crc_resp); 1508 if (!crc_resp.check_passed) 1509 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1510 } 1511 1512 #ifdef RTE_BBDEV_OFFLOAD_COST 1513 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1514 #endif 1515 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1516 harq_out = NULL; 1517 if (m_harq_out != NULL) { 1518 /* Initialize HARQ data length since we overwrite */ 1519 m_harq_out->data_len = 0; 1520 /* Check there is enough space 1521 * in the HARQ outbound buffer 1522 */ 1523 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1524 m_harq_out, deRmOutSize); 1525 } 1526 if (harq_out == NULL) { 1527 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1528 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1529 return; 1530 } 1531 /* get output data starting address and overwrite the data */ 1532 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1533 harq_out_offset); 1534 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1535 dec->harq_combined_output.length += deRmOutSize; 1536 } 1537 1538 rte_memcpy(out, adapter_input, out_length); 1539 dec->hard_output.length += out_length; 1540 #else 1541 RTE_SET_USED(q); 1542 RTE_SET_USED(op); 1543 RTE_SET_USED(c); 1544 RTE_SET_USED(out_length); 1545 RTE_SET_USED(e); 1546 RTE_SET_USED(m_in); 1547 RTE_SET_USED(m_out_head); 1548 RTE_SET_USED(m_out); 1549 RTE_SET_USED(m_harq_in); 1550 RTE_SET_USED(m_harq_out_head); 1551 RTE_SET_USED(m_harq_out); 1552 RTE_SET_USED(harq_in_offset); 1553 RTE_SET_USED(harq_out_offset); 1554 RTE_SET_USED(in_offset); 1555 RTE_SET_USED(out_offset); 1556 RTE_SET_USED(check_crc_24b); 1557 RTE_SET_USED(crc24_overlap); 1558 RTE_SET_USED(in_length); 1559 RTE_SET_USED(q_stats); 1560 #endif 1561 } 1562 1563 1564 static inline void 1565 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1566 struct rte_bbdev_stats *queue_stats) 1567 { 1568 uint8_t c, r = 0; 1569 uint16_t kw, k = 0; 1570 uint16_t crc24_overlap = 0; 1571 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1572 struct rte_mbuf *m_in = dec->input.data; 1573 struct rte_mbuf *m_out = dec->hard_output.data; 1574 struct rte_mbuf *m_out_head = dec->hard_output.data; 1575 uint16_t in_offset = dec->input.offset; 1576 uint16_t out_offset = dec->hard_output.offset; 1577 uint32_t mbuf_total_left = dec->input.length; 1578 uint16_t seg_total_left; 1579 1580 /* Clear op status */ 1581 op->status = 0; 1582 1583 if (m_in == NULL || m_out == NULL) { 1584 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1585 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1586 return; 1587 } 1588 1589 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1590 c = dec->tb_params.c; 1591 } else { /* For Code Block mode */ 1592 k = dec->cb_params.k; 1593 c = 1; 1594 } 1595 1596 if ((c > 1) && !check_bit(dec->op_flags, 1597 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1598 crc24_overlap = 24; 1599 1600 while (mbuf_total_left > 0) { 1601 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1602 k = (r < dec->tb_params.c_neg) ? 1603 dec->tb_params.k_neg : dec->tb_params.k_pos; 1604 1605 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1606 1607 /* Calculates circular buffer size (Kw). 1608 * According to 3gpp 36.212 section 5.1.4.2 1609 * Kw = 3 * Kpi, 1610 * where: 1611 * Kpi = nCol * nRow 1612 * where nCol is 32 and nRow can be calculated from: 1613 * D =< nCol * nRow 1614 * where D is the size of each output from turbo encoder block 1615 * (k + 4). 1616 */ 1617 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1618 1619 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1620 in_offset, out_offset, check_bit(dec->op_flags, 1621 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1622 seg_total_left, queue_stats); 1623 1624 /* To keep CRC24 attached to end of Code block, use 1625 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1626 * removed by default once verified. 1627 */ 1628 1629 mbuf_total_left -= kw; 1630 1631 /* Update offsets */ 1632 if (seg_total_left == kw) { 1633 /* Go to the next mbuf */ 1634 m_in = m_in->next; 1635 m_out = m_out->next; 1636 in_offset = 0; 1637 out_offset = 0; 1638 } else { 1639 /* Update offsets for next CBs (if exist) */ 1640 in_offset += kw; 1641 out_offset += ((k - crc24_overlap) >> 3); 1642 } 1643 r++; 1644 } 1645 } 1646 1647 static inline void 1648 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1649 struct rte_bbdev_stats *queue_stats) 1650 { 1651 uint8_t c, r = 0; 1652 uint32_t e; 1653 uint16_t out_length, crc24_overlap = 0; 1654 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1655 struct rte_mbuf *m_in = dec->input.data; 1656 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1657 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1658 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1659 struct rte_mbuf *m_out = dec->hard_output.data; 1660 struct rte_mbuf *m_out_head = dec->hard_output.data; 1661 uint16_t in_offset = dec->input.offset; 1662 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1663 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1664 uint16_t out_offset = dec->hard_output.offset; 1665 uint32_t mbuf_total_left = dec->input.length; 1666 uint16_t seg_total_left; 1667 1668 /* Clear op status */ 1669 op->status = 0; 1670 1671 if (m_in == NULL || m_out == NULL) { 1672 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1673 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1674 return; 1675 } 1676 1677 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1678 c = dec->tb_params.c; 1679 e = dec->tb_params.ea; 1680 } else { /* For Code Block mode */ 1681 c = 1; 1682 e = dec->cb_params.e; 1683 } 1684 1685 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1686 crc24_overlap = 24; 1687 1688 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1689 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1690 1691 while (mbuf_total_left > 0) { 1692 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1693 e = (r < dec->tb_params.cab) ? 1694 dec->tb_params.ea : dec->tb_params.eb; 1695 /* Special case handling when overusing mbuf */ 1696 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1697 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1698 else 1699 seg_total_left = e; 1700 1701 process_ldpc_dec_cb(q, op, c, out_length, e, 1702 m_in, m_out_head, m_out, 1703 m_harq_in, m_harq_out_head, m_harq_out, 1704 in_offset, out_offset, harq_in_offset, 1705 harq_out_offset, 1706 check_bit(dec->op_flags, 1707 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1708 crc24_overlap, 1709 seg_total_left, queue_stats); 1710 1711 /* To keep CRC24 attached to end of Code block, use 1712 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1713 * removed by default once verified. 1714 */ 1715 1716 mbuf_total_left -= e; 1717 1718 /* Update offsets */ 1719 if (seg_total_left == e) { 1720 /* Go to the next mbuf */ 1721 m_in = m_in->next; 1722 m_out = m_out->next; 1723 if (m_harq_in != NULL) 1724 m_harq_in = m_harq_in->next; 1725 if (m_harq_out != NULL) 1726 m_harq_out = m_harq_out->next; 1727 in_offset = 0; 1728 out_offset = 0; 1729 harq_in_offset = 0; 1730 harq_out_offset = 0; 1731 } else { 1732 /* Update offsets for next CBs (if exist) */ 1733 in_offset += e; 1734 out_offset += out_length; 1735 } 1736 r++; 1737 } 1738 } 1739 1740 static inline uint16_t 1741 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1742 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1743 { 1744 uint16_t i; 1745 #ifdef RTE_BBDEV_OFFLOAD_COST 1746 queue_stats->acc_offload_cycles = 0; 1747 #endif 1748 1749 for (i = 0; i < nb_ops; ++i) 1750 enqueue_dec_one_op(q, ops[i], queue_stats); 1751 1752 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1753 NULL); 1754 } 1755 1756 static inline uint16_t 1757 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1758 struct rte_bbdev_dec_op **ops, 1759 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1760 { 1761 uint16_t i; 1762 #ifdef RTE_BBDEV_OFFLOAD_COST 1763 queue_stats->acc_offload_cycles = 0; 1764 #endif 1765 1766 for (i = 0; i < nb_ops; ++i) 1767 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1768 1769 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1770 NULL); 1771 } 1772 1773 /* Enqueue burst */ 1774 static uint16_t 1775 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1776 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1777 { 1778 void *queue = q_data->queue_private; 1779 struct turbo_sw_queue *q = queue; 1780 uint16_t nb_enqueued = 0; 1781 1782 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1783 1784 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1785 q_data->queue_stats.enqueued_count += nb_enqueued; 1786 1787 return nb_enqueued; 1788 } 1789 1790 /* Enqueue burst */ 1791 static uint16_t 1792 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1793 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1794 { 1795 void *queue = q_data->queue_private; 1796 struct turbo_sw_queue *q = queue; 1797 uint16_t nb_enqueued = 0; 1798 1799 nb_enqueued = enqueue_ldpc_enc_all_ops( 1800 q, ops, nb_ops, &q_data->queue_stats); 1801 1802 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1803 q_data->queue_stats.enqueued_count += nb_enqueued; 1804 1805 return nb_enqueued; 1806 } 1807 1808 /* Enqueue burst */ 1809 static uint16_t 1810 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1811 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1812 { 1813 void *queue = q_data->queue_private; 1814 struct turbo_sw_queue *q = queue; 1815 uint16_t nb_enqueued = 0; 1816 1817 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1818 1819 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1820 q_data->queue_stats.enqueued_count += nb_enqueued; 1821 1822 return nb_enqueued; 1823 } 1824 1825 /* Enqueue burst */ 1826 static uint16_t 1827 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1828 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1829 { 1830 void *queue = q_data->queue_private; 1831 struct turbo_sw_queue *q = queue; 1832 uint16_t nb_enqueued = 0; 1833 1834 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1835 &q_data->queue_stats); 1836 1837 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1838 q_data->queue_stats.enqueued_count += nb_enqueued; 1839 1840 return nb_enqueued; 1841 } 1842 1843 /* Dequeue decode burst */ 1844 static uint16_t 1845 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1846 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1847 { 1848 struct turbo_sw_queue *q = q_data->queue_private; 1849 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1850 (void **)ops, nb_ops, NULL); 1851 q_data->queue_stats.dequeued_count += nb_dequeued; 1852 1853 return nb_dequeued; 1854 } 1855 1856 /* Dequeue encode burst */ 1857 static uint16_t 1858 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1859 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1860 { 1861 struct turbo_sw_queue *q = q_data->queue_private; 1862 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1863 (void **)ops, nb_ops, NULL); 1864 q_data->queue_stats.dequeued_count += nb_dequeued; 1865 1866 return nb_dequeued; 1867 } 1868 1869 /* Parse 16bit integer from string argument */ 1870 static inline int 1871 parse_u16_arg(const char *key, const char *value, void *extra_args) 1872 { 1873 uint16_t *u16 = extra_args; 1874 unsigned int long result; 1875 1876 if ((value == NULL) || (extra_args == NULL)) 1877 return -EINVAL; 1878 errno = 0; 1879 result = strtoul(value, NULL, 0); 1880 if ((result >= (1 << 16)) || (errno != 0)) { 1881 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1882 return -ERANGE; 1883 } 1884 *u16 = (uint16_t)result; 1885 return 0; 1886 } 1887 1888 /* Parse parameters used to create device */ 1889 static int 1890 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1891 { 1892 struct rte_kvargs *kvlist = NULL; 1893 int ret = 0; 1894 1895 if (params == NULL) 1896 return -EINVAL; 1897 if (input_args) { 1898 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1899 if (kvlist == NULL) 1900 return -EFAULT; 1901 1902 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1903 &parse_u16_arg, ¶ms->queues_num); 1904 if (ret < 0) 1905 goto exit; 1906 1907 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1908 &parse_u16_arg, ¶ms->socket_id); 1909 if (ret < 0) 1910 goto exit; 1911 1912 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1913 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1914 RTE_MAX_NUMA_NODES); 1915 goto exit; 1916 } 1917 } 1918 1919 exit: 1920 if (kvlist) 1921 rte_kvargs_free(kvlist); 1922 return ret; 1923 } 1924 1925 /* Create device */ 1926 static int 1927 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1928 struct turbo_sw_params *init_params) 1929 { 1930 struct rte_bbdev *bbdev; 1931 const char *name = rte_vdev_device_name(vdev); 1932 1933 bbdev = rte_bbdev_allocate(name); 1934 if (bbdev == NULL) 1935 return -ENODEV; 1936 1937 bbdev->data->dev_private = rte_zmalloc_socket(name, 1938 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1939 init_params->socket_id); 1940 if (bbdev->data->dev_private == NULL) { 1941 rte_bbdev_release(bbdev); 1942 return -ENOMEM; 1943 } 1944 1945 bbdev->dev_ops = &pmd_ops; 1946 bbdev->device = &vdev->device; 1947 bbdev->data->socket_id = init_params->socket_id; 1948 bbdev->intr_handle = NULL; 1949 1950 /* register rx/tx burst functions for data path */ 1951 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1952 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1953 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1954 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1955 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1956 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1957 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1958 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1959 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1960 init_params->queues_num; 1961 1962 return 0; 1963 } 1964 1965 /* Initialise device */ 1966 static int 1967 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1968 { 1969 struct turbo_sw_params init_params = { 1970 rte_socket_id(), 1971 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1972 }; 1973 const char *name; 1974 const char *input_args; 1975 1976 if (vdev == NULL) 1977 return -EINVAL; 1978 1979 name = rte_vdev_device_name(vdev); 1980 if (name == NULL) 1981 return -EINVAL; 1982 input_args = rte_vdev_device_args(vdev); 1983 parse_turbo_sw_params(&init_params, input_args); 1984 1985 rte_bbdev_log_debug( 1986 "Initialising %s on NUMA node %d with max queues: %d\n", 1987 name, init_params.socket_id, init_params.queues_num); 1988 1989 return turbo_sw_bbdev_create(vdev, &init_params); 1990 } 1991 1992 /* Uninitialise device */ 1993 static int 1994 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 1995 { 1996 struct rte_bbdev *bbdev; 1997 const char *name; 1998 1999 if (vdev == NULL) 2000 return -EINVAL; 2001 2002 name = rte_vdev_device_name(vdev); 2003 if (name == NULL) 2004 return -EINVAL; 2005 2006 bbdev = rte_bbdev_get_named_dev(name); 2007 if (bbdev == NULL) 2008 return -EINVAL; 2009 2010 rte_free(bbdev->data->dev_private); 2011 2012 return rte_bbdev_release(bbdev); 2013 } 2014 2015 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 2016 .probe = turbo_sw_bbdev_probe, 2017 .remove = turbo_sw_bbdev_remove 2018 }; 2019 2020 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 2021 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 2022 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 2023 TURBO_SW_SOCKET_ID_ARG"=<int>"); 2024 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 2025