1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 #include <string.h> 7 8 #include <rte_common.h> 9 #include <bus_vdev_driver.h> 10 #include <rte_malloc.h> 11 #include <rte_ring.h> 12 #include <rte_kvargs.h> 13 #include <rte_cycles.h> 14 #include <rte_errno.h> 15 16 #include <rte_bbdev.h> 17 #include <rte_bbdev_pmd.h> 18 19 #include <rte_hexdump.h> 20 #include <rte_log.h> 21 22 #ifdef RTE_BBDEV_SDK_AVX2 23 #include <ipp.h> 24 #include <ipps.h> 25 #include <phy_turbo.h> 26 #include <phy_crc.h> 27 #include <phy_rate_match.h> 28 #endif 29 #ifdef RTE_BBDEV_SDK_AVX512 30 #include <bit_reverse.h> 31 #include <phy_ldpc_encoder_5gnr.h> 32 #include <phy_ldpc_decoder_5gnr.h> 33 #include <phy_LDPC_ratematch_5gnr.h> 34 #include <phy_rate_dematching_5gnr.h> 35 #endif 36 37 #define DRIVER_NAME baseband_turbo_sw 38 39 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE); 40 #define RTE_LOGTYPE_BBDEV_TURBO_SW bbdev_turbo_sw_logtype 41 42 /* Helper macro for logging */ 43 #define rte_bbdev_log(level, ...) \ 44 RTE_LOG_LINE(level, BBDEV_TURBO_SW, __VA_ARGS__) 45 46 #define rte_bbdev_log_debug(fmt, ...) \ 47 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 48 ##__VA_ARGS__) 49 50 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 51 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 52 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 53 54 /* private data structure */ 55 struct bbdev_private { 56 unsigned int max_nb_queues; /**< Max number of queues */ 57 }; 58 59 /* Initialisation params structure that can be used by Turbo SW driver */ 60 struct turbo_sw_params { 61 int socket_id; /*< Turbo SW device socket */ 62 uint16_t queues_num; /*< Turbo SW device queues number */ 63 }; 64 65 /* Acceptable params for Turbo SW devices */ 66 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 67 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 68 69 static const char * const turbo_sw_valid_params[] = { 70 TURBO_SW_MAX_NB_QUEUES_ARG, 71 TURBO_SW_SOCKET_ID_ARG 72 }; 73 74 /* queue */ 75 struct __rte_cache_aligned turbo_sw_queue { 76 /* Ring for processed (encoded/decoded) operations which are ready to 77 * be dequeued. 78 */ 79 struct rte_ring *processed_pkts; 80 /* Stores input for turbo encoder (used when CRC attachment is 81 * performed 82 */ 83 uint8_t *enc_in; 84 /* Stores output from turbo encoder */ 85 uint8_t *enc_out; 86 /* Alpha gamma buf for bblib_turbo_decoder() function */ 87 int8_t *ag; 88 /* Temp buf for bblib_turbo_decoder() function */ 89 uint16_t *code_block; 90 /* Input buf for bblib_rate_dematching_lte() function */ 91 uint8_t *deint_input; 92 /* Output buf for bblib_rate_dematching_lte() function */ 93 uint8_t *deint_output; 94 /* Output buf for bblib_turbodec_adapter_lte() function */ 95 uint8_t *adapter_output; 96 /* Operation type of this queue */ 97 enum rte_bbdev_op_type type; 98 }; 99 100 101 #ifdef RTE_BBDEV_SDK_AVX2 102 static inline char * 103 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 104 { 105 if (unlikely(len > rte_pktmbuf_tailroom(m))) 106 return NULL; 107 108 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 109 m->data_len = (uint16_t)(m->data_len + len); 110 m_head->pkt_len = (m_head->pkt_len + len); 111 return tail; 112 } 113 114 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 115 static inline int32_t 116 compute_idx(uint16_t k) 117 { 118 int32_t result = 0; 119 120 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 121 return -1; 122 123 if (k > 2048) { 124 if ((k - 2048) % 64 != 0) 125 result = -1; 126 127 result = 124 + (k - 2048) / 64; 128 } else if (k <= 512) { 129 if ((k - 40) % 8 != 0) 130 result = -1; 131 132 result = (k - 40) / 8 + 1; 133 } else if (k <= 1024) { 134 if ((k - 512) % 16 != 0) 135 result = -1; 136 137 result = 60 + (k - 512) / 16; 138 } else { /* 1024 < k <= 2048 */ 139 if ((k - 1024) % 32 != 0) 140 result = -1; 141 142 result = 92 + (k - 1024) / 32; 143 } 144 145 return result; 146 } 147 #endif 148 149 /* Read flag value 0/1 from bitmap */ 150 static inline bool 151 check_bit(uint32_t bitmap, uint32_t bitmask) 152 { 153 return bitmap & bitmask; 154 } 155 156 /* Get device info */ 157 static void 158 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 159 { 160 struct bbdev_private *internals = dev->data->dev_private; 161 const struct rte_bbdev_op_cap *op_cap; 162 int num_op_type = 0; 163 164 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 165 #ifdef RTE_BBDEV_SDK_AVX2 166 { 167 .type = RTE_BBDEV_OP_TURBO_DEC, 168 .cap.turbo_dec = { 169 .capability_flags = 170 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 171 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 172 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 173 RTE_BBDEV_TURBO_CRC_TYPE_24B | 174 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 175 RTE_BBDEV_TURBO_EARLY_TERMINATION, 176 .max_llr_modulus = 16, 177 .num_buffers_src = 178 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 179 .num_buffers_hard_out = 180 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 181 .num_buffers_soft_out = 0, 182 } 183 }, 184 { 185 .type = RTE_BBDEV_OP_TURBO_ENC, 186 .cap.turbo_enc = { 187 .capability_flags = 188 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 189 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 190 RTE_BBDEV_TURBO_RATE_MATCH | 191 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 192 .num_buffers_src = 193 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 194 .num_buffers_dst = 195 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 196 } 197 }, 198 #endif 199 #ifdef RTE_BBDEV_SDK_AVX512 200 { 201 .type = RTE_BBDEV_OP_LDPC_ENC, 202 .cap.ldpc_enc = { 203 .capability_flags = 204 RTE_BBDEV_LDPC_RATE_MATCH | 205 RTE_BBDEV_LDPC_CRC_16_ATTACH | 206 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 207 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 208 .num_buffers_src = 209 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 210 .num_buffers_dst = 211 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 212 } 213 }, 214 { 215 .type = RTE_BBDEV_OP_LDPC_DEC, 216 .cap.ldpc_dec = { 217 .capability_flags = 218 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 219 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 220 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 221 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 222 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 223 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 224 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 225 .llr_size = 8, 226 .llr_decimals = 4, 227 .num_buffers_src = 228 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 229 .num_buffers_hard_out = 230 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 231 .num_buffers_soft_out = 0, 232 } 233 }, 234 #endif 235 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 236 }; 237 238 static struct rte_bbdev_queue_conf default_queue_conf = { 239 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 240 }; 241 #ifdef RTE_BBDEV_SDK_AVX2 242 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 243 dev_info->cpu_flag_reqs = &cpu_flag; 244 #else 245 dev_info->cpu_flag_reqs = NULL; 246 #endif 247 default_queue_conf.socket = dev->data->socket_id; 248 249 dev_info->driver_name = RTE_STR(DRIVER_NAME); 250 dev_info->max_num_queues = internals->max_nb_queues; 251 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 252 dev_info->hardware_accelerated = false; 253 dev_info->max_dl_queue_priority = 0; 254 dev_info->max_ul_queue_priority = 0; 255 dev_info->default_queue_conf = default_queue_conf; 256 dev_info->capabilities = bbdev_capabilities; 257 dev_info->min_alignment = 64; 258 dev_info->harq_buffer_size = 0; 259 dev_info->data_endianness = RTE_LITTLE_ENDIAN; 260 dev_info->device_status = RTE_BBDEV_DEV_NOT_SUPPORTED; 261 262 op_cap = bbdev_capabilities; 263 for (; op_cap->type != RTE_BBDEV_OP_NONE; ++op_cap) 264 num_op_type++; 265 op_cap = bbdev_capabilities; 266 if (num_op_type > 0) { 267 int num_queue_per_type = dev_info->max_num_queues / num_op_type; 268 for (; op_cap->type != RTE_BBDEV_OP_NONE; ++op_cap) 269 dev_info->num_queues[op_cap->type] = num_queue_per_type; 270 } 271 272 rte_bbdev_log_debug("got device info from %u", dev->data->dev_id); 273 } 274 275 /* Release queue */ 276 static int 277 q_release(struct rte_bbdev *dev, uint16_t q_id) 278 { 279 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 280 281 if (q != NULL) { 282 rte_ring_free(q->processed_pkts); 283 rte_free(q->enc_out); 284 rte_free(q->enc_in); 285 rte_free(q->ag); 286 rte_free(q->code_block); 287 rte_free(q->deint_input); 288 rte_free(q->deint_output); 289 rte_free(q->adapter_output); 290 rte_free(q); 291 dev->data->queues[q_id].queue_private = NULL; 292 } 293 294 rte_bbdev_log_debug("released device queue %u:%u", 295 dev->data->dev_id, q_id); 296 return 0; 297 } 298 299 /* Setup a queue */ 300 static int 301 q_setup(struct rte_bbdev *dev, uint16_t q_id, 302 const struct rte_bbdev_queue_conf *queue_conf) 303 { 304 int ret; 305 struct turbo_sw_queue *q; 306 char name[RTE_RING_NAMESIZE]; 307 308 /* Allocate the queue data structure. */ 309 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 310 RTE_CACHE_LINE_SIZE, queue_conf->socket); 311 if (q == NULL) { 312 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 313 return -ENOMEM; 314 } 315 316 /* Allocate memory for encoder output. */ 317 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 318 dev->data->dev_id, q_id); 319 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 320 rte_bbdev_log(ERR, 321 "Creating queue name for device %u queue %u failed", 322 dev->data->dev_id, q_id); 323 ret = -ENAMETOOLONG; 324 goto free_q; 325 } 326 q->enc_out = rte_zmalloc_socket(name, 327 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 328 sizeof(*q->enc_out) * 3, 329 RTE_CACHE_LINE_SIZE, queue_conf->socket); 330 if (q->enc_out == NULL) { 331 rte_bbdev_log(ERR, 332 "Failed to allocate queue memory for %s", name); 333 ret = -ENOMEM; 334 goto free_q; 335 } 336 337 /* Allocate memory for rate matching output. */ 338 ret = snprintf(name, RTE_RING_NAMESIZE, 339 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 340 q_id); 341 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 342 rte_bbdev_log(ERR, 343 "Creating queue name for device %u queue %u failed", 344 dev->data->dev_id, q_id); 345 ret = -ENAMETOOLONG; 346 goto free_q; 347 } 348 q->enc_in = rte_zmalloc_socket(name, 349 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 350 RTE_CACHE_LINE_SIZE, queue_conf->socket); 351 if (q->enc_in == NULL) { 352 rte_bbdev_log(ERR, 353 "Failed to allocate queue memory for %s", name); 354 ret = -ENOMEM; 355 goto free_q; 356 } 357 358 /* Allocate memory for Alpha Gamma temp buffer. */ 359 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 360 dev->data->dev_id, q_id); 361 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 362 rte_bbdev_log(ERR, 363 "Creating queue name for device %u queue %u failed", 364 dev->data->dev_id, q_id); 365 ret = -ENAMETOOLONG; 366 goto free_q; 367 } 368 q->ag = rte_zmalloc_socket(name, 369 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 370 RTE_CACHE_LINE_SIZE, queue_conf->socket); 371 if (q->ag == NULL) { 372 rte_bbdev_log(ERR, 373 "Failed to allocate queue memory for %s", name); 374 ret = -ENOMEM; 375 goto free_q; 376 } 377 378 /* Allocate memory for code block temp buffer. */ 379 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 380 dev->data->dev_id, q_id); 381 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 382 rte_bbdev_log(ERR, 383 "Creating queue name for device %u queue %u failed", 384 dev->data->dev_id, q_id); 385 ret = -ENAMETOOLONG; 386 goto free_q; 387 } 388 q->code_block = rte_zmalloc_socket(name, 389 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 390 RTE_CACHE_LINE_SIZE, queue_conf->socket); 391 if (q->code_block == NULL) { 392 rte_bbdev_log(ERR, 393 "Failed to allocate queue memory for %s", name); 394 ret = -ENOMEM; 395 goto free_q; 396 } 397 398 /* Allocate memory for Deinterleaver input. */ 399 ret = snprintf(name, RTE_RING_NAMESIZE, 400 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 401 dev->data->dev_id, q_id); 402 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 403 rte_bbdev_log(ERR, 404 "Creating queue name for device %u queue %u failed", 405 dev->data->dev_id, q_id); 406 ret = -ENAMETOOLONG; 407 goto free_q; 408 } 409 q->deint_input = rte_zmalloc_socket(name, 410 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 411 RTE_CACHE_LINE_SIZE, queue_conf->socket); 412 if (q->deint_input == NULL) { 413 rte_bbdev_log(ERR, 414 "Failed to allocate queue memory for %s", name); 415 ret = -ENOMEM; 416 goto free_q; 417 } 418 419 /* Allocate memory for Deinterleaver output. */ 420 ret = snprintf(name, RTE_RING_NAMESIZE, 421 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 422 dev->data->dev_id, q_id); 423 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 424 rte_bbdev_log(ERR, 425 "Creating queue name for device %u queue %u failed", 426 dev->data->dev_id, q_id); 427 ret = -ENAMETOOLONG; 428 goto free_q; 429 } 430 q->deint_output = rte_zmalloc_socket(NULL, 431 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 432 RTE_CACHE_LINE_SIZE, queue_conf->socket); 433 if (q->deint_output == NULL) { 434 rte_bbdev_log(ERR, 435 "Failed to allocate queue memory for %s", name); 436 ret = -ENOMEM; 437 goto free_q; 438 } 439 440 /* Allocate memory for Adapter output. */ 441 ret = snprintf(name, RTE_RING_NAMESIZE, 442 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 443 dev->data->dev_id, q_id); 444 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 445 rte_bbdev_log(ERR, 446 "Creating queue name for device %u queue %u failed", 447 dev->data->dev_id, q_id); 448 ret = -ENAMETOOLONG; 449 goto free_q; 450 } 451 q->adapter_output = rte_zmalloc_socket(NULL, 452 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 453 RTE_CACHE_LINE_SIZE, queue_conf->socket); 454 if (q->adapter_output == NULL) { 455 rte_bbdev_log(ERR, 456 "Failed to allocate queue memory for %s", name); 457 ret = -ENOMEM; 458 goto free_q; 459 } 460 461 /* Create ring for packets awaiting to be dequeued. */ 462 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 463 dev->data->dev_id, q_id); 464 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 465 rte_bbdev_log(ERR, 466 "Creating queue name for device %u queue %u failed", 467 dev->data->dev_id, q_id); 468 ret = -ENAMETOOLONG; 469 goto free_q; 470 } 471 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 472 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 473 if (q->processed_pkts == NULL) { 474 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 475 ret = -rte_errno; 476 goto free_q; 477 } 478 479 q->type = queue_conf->op_type; 480 481 dev->data->queues[q_id].queue_private = q; 482 rte_bbdev_log_debug("setup device queue %s", name); 483 return 0; 484 485 free_q: 486 rte_ring_free(q->processed_pkts); 487 rte_free(q->enc_out); 488 rte_free(q->enc_in); 489 rte_free(q->ag); 490 rte_free(q->code_block); 491 rte_free(q->deint_input); 492 rte_free(q->deint_output); 493 rte_free(q->adapter_output); 494 rte_free(q); 495 return ret; 496 } 497 498 static const struct rte_bbdev_ops pmd_ops = { 499 .info_get = info_get, 500 .queue_setup = q_setup, 501 .queue_release = q_release 502 }; 503 504 #ifdef RTE_BBDEV_SDK_AVX2 505 #ifdef RTE_LIBRTE_BBDEV_DEBUG 506 /* Checks if the encoder input buffer is correct. 507 * Returns 0 if it's valid, -1 otherwise. 508 */ 509 static inline int 510 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 511 const uint16_t in_length) 512 { 513 if (k_idx < 0) { 514 rte_bbdev_log(ERR, "K Index is invalid"); 515 return -1; 516 } 517 518 if (in_length - (k >> 3) < 0) { 519 rte_bbdev_log(ERR, 520 "Mismatch between input length (%u bytes) and K (%u bits)", 521 in_length, k); 522 return -1; 523 } 524 525 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 526 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 527 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 528 return -1; 529 } 530 531 return 0; 532 } 533 534 /* Checks if the decoder input buffer is correct. 535 * Returns 0 if it's valid, -1 otherwise. 536 */ 537 static inline int 538 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 539 { 540 if (k_idx < 0) { 541 rte_bbdev_log(ERR, "K index is invalid"); 542 return -1; 543 } 544 545 if (in_length < kw) { 546 rte_bbdev_log(ERR, 547 "Mismatch between input length (%u) and kw (%u)", 548 in_length, kw); 549 return -1; 550 } 551 552 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 553 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 554 kw, RTE_BBDEV_TURBO_MAX_KW); 555 return -1; 556 } 557 558 return 0; 559 } 560 #endif 561 #endif 562 563 static inline void 564 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 565 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 566 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 567 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 568 uint16_t in_length, struct rte_bbdev_stats *q_stats) 569 { 570 #ifdef RTE_BBDEV_SDK_AVX2 571 #ifdef RTE_LIBRTE_BBDEV_DEBUG 572 int ret; 573 #else 574 RTE_SET_USED(in_length); 575 #endif 576 int16_t k_idx; 577 uint16_t m; 578 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 579 uint64_t first_3_bytes = 0; 580 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 581 struct bblib_crc_request crc_req; 582 struct bblib_crc_response crc_resp; 583 struct bblib_turbo_encoder_request turbo_req; 584 struct bblib_turbo_encoder_response turbo_resp; 585 struct bblib_rate_match_dl_request rm_req; 586 struct bblib_rate_match_dl_response rm_resp; 587 uint64_t start_time; 588 589 k_idx = compute_idx(k); 590 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 591 592 /* CRC24A (for TB) */ 593 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 594 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) { 595 #ifdef RTE_LIBRTE_BBDEV_DEBUG 596 ret = is_enc_input_valid(k - 24, k_idx, in_length); 597 if (ret != 0) { 598 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 599 return; 600 } 601 #endif 602 603 crc_req.data = in; 604 crc_req.len = k - 24; 605 /* Check if there is a room for CRC bits if not use 606 * the temporary buffer. 607 */ 608 if (mbuf_append(m_in, m_in, 3) == NULL) { 609 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 610 in = q->enc_in; 611 } else { 612 /* Store 3 first bytes of next CB as they will be 613 * overwritten by CRC bytes. If it is the last CB then 614 * there is no point to store 3 next bytes and this 615 * if..else branch will be omitted. 616 */ 617 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 618 } 619 620 crc_resp.data = in; 621 start_time = rte_rdtsc_precise(); 622 /* CRC24A generation */ 623 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 624 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 625 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 626 /* CRC24B */ 627 #ifdef RTE_LIBRTE_BBDEV_DEBUG 628 ret = is_enc_input_valid(k - 24, k_idx, in_length); 629 if (ret != 0) { 630 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 631 return; 632 } 633 #endif 634 635 crc_req.data = in; 636 crc_req.len = k - 24; 637 /* Check if there is a room for CRC bits if this is the last 638 * CB in TB. If not use temporary buffer. 639 */ 640 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 641 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 642 in = q->enc_in; 643 } else if (c - r > 1) { 644 /* Store 3 first bytes of next CB as they will be 645 * overwritten by CRC bytes. If it is the last CB then 646 * there is no point to store 3 next bytes and this 647 * if..else branch will be omitted. 648 */ 649 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 650 } 651 652 crc_resp.data = in; 653 start_time = rte_rdtsc_precise(); 654 /* CRC24B generation */ 655 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 656 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 657 } 658 #ifdef RTE_LIBRTE_BBDEV_DEBUG 659 else { 660 ret = is_enc_input_valid(k, k_idx, in_length); 661 if (ret != 0) { 662 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 663 return; 664 } 665 } 666 #endif 667 668 /* Turbo encoder */ 669 670 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 671 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 672 * So dst_data's length should be 3*(k/8) + 3 bytes. 673 * In Rate-matching bypass case outputs pointers passed to encoder 674 * (out0, out1 and out2) can directly point to addresses of output from 675 * turbo_enc entity. 676 */ 677 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 678 out0 = q->enc_out; 679 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 680 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 681 } else { 682 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 683 (k >> 3) * 3 + 2); 684 if (out0 == NULL) { 685 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 686 rte_bbdev_log(ERR, 687 "Too little space in output mbuf"); 688 return; 689 } 690 enc->output.length += (k >> 3) * 3 + 2; 691 /* rte_bbdev_op_data.offset can be different than the 692 * offset of the appended bytes 693 */ 694 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 695 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 696 out_offset + (k >> 3) + 1); 697 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 698 out_offset + 2 * ((k >> 3) + 1)); 699 } 700 701 turbo_req.case_id = k_idx; 702 turbo_req.input_win = in; 703 turbo_req.length = k >> 3; 704 turbo_resp.output_win_0 = out0; 705 turbo_resp.output_win_1 = out1; 706 turbo_resp.output_win_2 = out2; 707 708 start_time = rte_rdtsc_precise(); 709 710 /* Turbo encoding */ 711 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 712 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 713 rte_bbdev_log(ERR, "Turbo Encoder failed"); 714 return; 715 } 716 717 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 718 719 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 720 if (first_3_bytes != 0) 721 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 722 723 /* Rate-matching */ 724 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 725 uint8_t mask_id; 726 /* Integer round up division by 8 */ 727 uint16_t out_len = (e + 7) >> 3; 728 /* The mask array is indexed using E%8. E is an even number so 729 * there are only 4 possible values. 730 */ 731 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 732 733 /* get output data starting address */ 734 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 735 if (rm_out == NULL) { 736 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 737 rte_bbdev_log(ERR, 738 "Too little space in output mbuf"); 739 return; 740 } 741 /* rte_bbdev_op_data.offset can be different than the offset 742 * of the appended bytes 743 */ 744 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 745 746 /* index of current code block */ 747 rm_req.r = r; 748 /* total number of code block */ 749 rm_req.C = c; 750 /* For DL - 1, UL - 0 */ 751 rm_req.direction = 1; 752 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 753 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 754 * known we can adjust those parameters 755 */ 756 rm_req.Nsoft = ncb * rm_req.C; 757 rm_req.KMIMO = 1; 758 rm_req.MDL_HARQ = 1; 759 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 760 * are used for E calculation. As E is already known we can 761 * adjust those parameters 762 */ 763 rm_req.NL = e; 764 rm_req.Qm = 1; 765 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 766 767 rm_req.rvidx = enc->rv_index; 768 rm_req.Kidx = k_idx - 1; 769 rm_req.nLen = k + 4; 770 rm_req.tin0 = out0; 771 rm_req.tin1 = out1; 772 rm_req.tin2 = out2; 773 rm_resp.output = rm_out; 774 rm_resp.OutputLen = out_len; 775 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 776 rm_req.bypass_rvidx = 1; 777 else 778 rm_req.bypass_rvidx = 0; 779 780 start_time = rte_rdtsc_precise(); 781 782 /* Rate-Matching */ 783 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 784 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 785 rte_bbdev_log(ERR, "Rate matching failed"); 786 return; 787 } 788 789 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 790 791 /* SW fills an entire last byte even if E%8 != 0. Clear the 792 * superfluous data bits for consistency with HW device. 793 */ 794 mask_id = (e & 7) >> 1; 795 rm_out[out_len - 1] &= mask_out[mask_id]; 796 enc->output.length += rm_resp.OutputLen; 797 } else { 798 /* Rate matching is bypassed */ 799 800 /* Completing last byte of out0 (where 4 tail bits are stored) 801 * by moving first 4 bits from out1 802 */ 803 tmp_out = (uint8_t *) --out1; 804 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 805 tmp_out++; 806 /* Shifting out1 data by 4 bits to the left */ 807 for (m = 0; m < k >> 3; ++m) { 808 uint8_t *first = tmp_out; 809 uint8_t second = *(tmp_out + 1); 810 *first = (*first << 4) | ((second & 0xF0) >> 4); 811 tmp_out++; 812 } 813 /* Shifting out2 data by 8 bits to the left */ 814 for (m = 0; m < (k >> 3) + 1; ++m) { 815 *tmp_out = *(tmp_out + 1); 816 tmp_out++; 817 } 818 *tmp_out = 0; 819 } 820 #else 821 RTE_SET_USED(q); 822 RTE_SET_USED(op); 823 RTE_SET_USED(r); 824 RTE_SET_USED(c); 825 RTE_SET_USED(k); 826 RTE_SET_USED(ncb); 827 RTE_SET_USED(e); 828 RTE_SET_USED(m_in); 829 RTE_SET_USED(m_out_head); 830 RTE_SET_USED(m_out); 831 RTE_SET_USED(in_offset); 832 RTE_SET_USED(out_offset); 833 RTE_SET_USED(in_length); 834 RTE_SET_USED(q_stats); 835 #endif 836 } 837 838 839 static inline void 840 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 841 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 842 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 843 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 844 { 845 #ifdef RTE_BBDEV_SDK_AVX512 846 RTE_SET_USED(seg_total_left); 847 uint8_t *in, *rm_out; 848 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 849 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 850 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 851 struct bblib_LDPC_ratematch_5gnr_request rm_req; 852 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 853 struct bblib_crc_request crc_req; 854 struct bblib_crc_response crc_resp; 855 uint16_t msgLen, puntBits, parity_offset, out_len; 856 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 857 uint16_t in_length_in_bits = K - enc->n_filler; 858 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 859 860 uint64_t start_time = rte_rdtsc_precise(); 861 862 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 863 864 /* Masking the Filler bits explicitly */ 865 memset(q->enc_in + (in_length_in_bytes - 3), 0, 866 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 867 /* CRC Generation */ 868 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 869 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 870 crc_req.data = in; 871 crc_req.len = in_length_in_bits - 24; 872 crc_resp.data = q->enc_in; 873 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 874 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 875 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 876 crc_req.data = in; 877 crc_req.len = in_length_in_bits - 24; 878 crc_resp.data = q->enc_in; 879 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 880 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) { 881 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2); 882 crc_req.data = in; 883 crc_req.len = in_length_in_bits - 16; 884 crc_resp.data = q->enc_in; 885 bblib_lte_crc16_gen(&crc_req, &crc_resp); 886 } else 887 rte_memcpy(q->enc_in, in, in_length_in_bytes); 888 889 /* LDPC Encoding */ 890 ldpc_req.Zc = enc->z_c; 891 ldpc_req.baseGraph = enc->basegraph; 892 /* Number of rows set to maximum */ 893 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 894 ldpc_req.numberCodeblocks = 1; 895 ldpc_req.input[0] = (int8_t *) q->enc_in; 896 ldpc_resp.output[0] = (int8_t *) q->enc_out; 897 898 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 899 900 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 901 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 902 rte_bbdev_log(ERR, "LDPC Encoder failed"); 903 return; 904 } 905 906 /* 907 * Systematic + Parity : Recreating stream with filler bits, ideally 908 * the bit select could handle this in the RM SDK 909 */ 910 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 911 puntBits = 2 * ldpc_req.Zc; 912 parity_offset = msgLen - puntBits; 913 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 914 puntBits%8, q->adapter_output, 0, parity_offset); 915 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 916 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 917 918 out_len = (e + 7) >> 3; 919 /* get output data starting address */ 920 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 921 if (rm_out == NULL) { 922 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 923 rte_bbdev_log(ERR, 924 "Too little space in output mbuf"); 925 return; 926 } 927 /* 928 * rte_bbdev_op_data.offset can be different than the offset 929 * of the appended bytes 930 */ 931 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 932 933 /* Rate-Matching */ 934 rm_req.E = e; 935 rm_req.Ncb = enc->n_cb; 936 rm_req.Qm = enc->q_m; 937 rm_req.Zc = enc->z_c; 938 rm_req.baseGraph = enc->basegraph; 939 rm_req.input = q->adapter_output; 940 rm_req.nLen = enc->n_filler; 941 rm_req.nullIndex = parity_offset - enc->n_filler; 942 rm_req.rvidx = enc->rv_index; 943 rm_resp.output = q->deint_output; 944 945 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 946 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 947 rte_bbdev_log(ERR, "Rate matching failed"); 948 return; 949 } 950 951 /* RM SDK may provide non zero bits on last byte */ 952 if ((e % 8) != 0) 953 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 954 955 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 956 957 rte_memcpy(rm_out, q->deint_output, out_len); 958 enc->output.length += out_len; 959 960 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 961 #else 962 RTE_SET_USED(q); 963 RTE_SET_USED(op); 964 RTE_SET_USED(e); 965 RTE_SET_USED(m_in); 966 RTE_SET_USED(m_out_head); 967 RTE_SET_USED(m_out); 968 RTE_SET_USED(in_offset); 969 RTE_SET_USED(out_offset); 970 RTE_SET_USED(seg_total_left); 971 RTE_SET_USED(q_stats); 972 #endif 973 } 974 975 static inline void 976 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 977 struct rte_bbdev_stats *queue_stats) 978 { 979 uint8_t c, r, crc24_bits = 0; 980 uint16_t k, ncb; 981 uint32_t e; 982 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 983 uint16_t in_offset = enc->input.offset; 984 uint16_t out_offset = enc->output.offset; 985 struct rte_mbuf *m_in = enc->input.data; 986 struct rte_mbuf *m_out = enc->output.data; 987 struct rte_mbuf *m_out_head = enc->output.data; 988 uint32_t in_length, mbuf_total_left = enc->input.length; 989 uint16_t seg_total_left; 990 991 /* Clear op status */ 992 op->status = 0; 993 994 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 995 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 996 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 997 op->status = 1 << RTE_BBDEV_DATA_ERROR; 998 return; 999 } 1000 1001 if (m_in == NULL || m_out == NULL) { 1002 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1003 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1004 return; 1005 } 1006 1007 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1008 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1009 crc24_bits = 24; 1010 1011 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1012 c = enc->tb_params.c; 1013 r = enc->tb_params.r; 1014 } else {/* For Code Block mode */ 1015 c = 1; 1016 r = 0; 1017 } 1018 1019 while (mbuf_total_left > 0 && r < c) { 1020 1021 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1022 1023 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1024 k = (r < enc->tb_params.c_neg) ? 1025 enc->tb_params.k_neg : enc->tb_params.k_pos; 1026 ncb = (r < enc->tb_params.c_neg) ? 1027 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1028 e = (r < enc->tb_params.cab) ? 1029 enc->tb_params.ea : enc->tb_params.eb; 1030 } else { 1031 k = enc->cb_params.k; 1032 ncb = enc->cb_params.ncb; 1033 e = enc->cb_params.e; 1034 } 1035 1036 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1037 m_out, in_offset, out_offset, seg_total_left, 1038 queue_stats); 1039 /* Update total_left */ 1040 in_length = ((k - crc24_bits) >> 3); 1041 mbuf_total_left -= in_length; 1042 /* Update offsets for next CBs (if exist) */ 1043 in_offset += (k - crc24_bits) >> 3; 1044 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1045 out_offset += e >> 3; 1046 else 1047 out_offset += (k >> 3) * 3 + 2; 1048 1049 /* Update offsets */ 1050 if (seg_total_left == in_length) { 1051 /* Go to the next mbuf */ 1052 m_in = m_in->next; 1053 m_out = m_out->next; 1054 in_offset = 0; 1055 out_offset = 0; 1056 } 1057 r++; 1058 } 1059 1060 /* check if all input data was processed */ 1061 if (mbuf_total_left != 0) { 1062 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1063 rte_bbdev_log(ERR, 1064 "Mismatch between mbuf length and included CBs sizes"); 1065 } 1066 } 1067 1068 1069 static inline void 1070 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1071 struct rte_bbdev_stats *queue_stats) 1072 { 1073 uint8_t c, r, crc24_bits = 0; 1074 uint32_t e; 1075 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1076 uint16_t in_offset = enc->input.offset; 1077 uint16_t out_offset = enc->output.offset; 1078 struct rte_mbuf *m_in = enc->input.data; 1079 struct rte_mbuf *m_out = enc->output.data; 1080 struct rte_mbuf *m_out_head = enc->output.data; 1081 uint32_t in_length, mbuf_total_left = enc->input.length; 1082 1083 uint16_t seg_total_left; 1084 1085 /* Clear op status */ 1086 op->status = 0; 1087 1088 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1089 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1090 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1091 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1092 return; 1093 } 1094 1095 if (m_in == NULL || m_out == NULL) { 1096 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1097 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1098 return; 1099 } 1100 1101 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1102 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1103 crc24_bits = 24; 1104 1105 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1106 c = enc->tb_params.c; 1107 r = enc->tb_params.r; 1108 } else { /* For Code Block mode */ 1109 c = 1; 1110 r = 0; 1111 } 1112 1113 while (mbuf_total_left > 0 && r < c) { 1114 1115 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1116 1117 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1118 e = (r < enc->tb_params.cab) ? 1119 enc->tb_params.ea : enc->tb_params.eb; 1120 } else { 1121 e = enc->cb_params.e; 1122 } 1123 1124 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1125 m_out, in_offset, out_offset, seg_total_left, 1126 queue_stats); 1127 /* Update total_left */ 1128 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1129 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1130 mbuf_total_left -= in_length; 1131 /* Update offsets for next CBs (if exist) */ 1132 in_offset += in_length; 1133 out_offset += (e + 7) >> 3; 1134 1135 /* Update offsets */ 1136 if (seg_total_left == in_length) { 1137 /* Go to the next mbuf */ 1138 m_in = m_in->next; 1139 m_out = m_out->next; 1140 in_offset = 0; 1141 out_offset = 0; 1142 } 1143 r++; 1144 } 1145 1146 /* check if all input data was processed */ 1147 if (mbuf_total_left != 0) { 1148 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1149 rte_bbdev_log(ERR, 1150 "Mismatch between mbuf length and included CBs sizes %d", 1151 mbuf_total_left); 1152 } 1153 } 1154 1155 static inline uint16_t 1156 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1157 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1158 { 1159 uint16_t i; 1160 queue_stats->acc_offload_cycles = 0; 1161 1162 for (i = 0; i < nb_ops; ++i) 1163 enqueue_enc_one_op(q, ops[i], queue_stats); 1164 1165 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1166 NULL); 1167 } 1168 1169 static inline uint16_t 1170 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1171 struct rte_bbdev_enc_op **ops, 1172 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1173 { 1174 uint16_t i; 1175 queue_stats->acc_offload_cycles = 0; 1176 1177 for (i = 0; i < nb_ops; ++i) 1178 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1179 1180 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1181 NULL); 1182 } 1183 1184 #ifdef RTE_BBDEV_SDK_AVX2 1185 static inline void 1186 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1187 uint16_t ncb) 1188 { 1189 uint16_t d = k + 4; 1190 uint16_t kpi = ncb / 3; 1191 uint16_t nd = kpi - d; 1192 1193 rte_memcpy(&out[nd], in, d); 1194 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1195 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1196 } 1197 #endif 1198 1199 static inline void 1200 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1201 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1202 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1203 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1204 uint16_t crc24_overlap, uint16_t in_length, 1205 struct rte_bbdev_stats *q_stats) 1206 { 1207 #ifdef RTE_BBDEV_SDK_AVX2 1208 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1209 int ret; 1210 #else 1211 RTE_SET_USED(in_length); 1212 #endif 1213 int32_t k_idx; 1214 int32_t iter_cnt; 1215 uint8_t *in, *out, *adapter_input; 1216 int32_t ncb, ncb_without_null; 1217 struct bblib_turbo_adapter_ul_response adapter_resp; 1218 struct bblib_turbo_adapter_ul_request adapter_req; 1219 struct bblib_turbo_decoder_request turbo_req; 1220 struct bblib_turbo_decoder_response turbo_resp; 1221 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1222 uint64_t start_time; 1223 1224 k_idx = compute_idx(k); 1225 1226 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1227 ret = is_dec_input_valid(k_idx, kw, in_length); 1228 if (ret != 0) { 1229 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1230 return; 1231 } 1232 #endif 1233 1234 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1235 ncb = kw; 1236 ncb_without_null = (k + 4) * 3; 1237 1238 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1239 struct bblib_deinterleave_ul_request deint_req; 1240 struct bblib_deinterleave_ul_response deint_resp; 1241 1242 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1243 deint_req.pharqbuffer = in; 1244 deint_req.ncb = ncb; 1245 deint_resp.pinteleavebuffer = q->deint_output; 1246 1247 start_time = rte_rdtsc_precise(); 1248 /* Sub-block De-Interleaving */ 1249 bblib_deinterleave_ul(&deint_req, &deint_resp); 1250 1251 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1252 } else 1253 move_padding_bytes(in, q->deint_output, k, ncb); 1254 1255 adapter_input = q->deint_output; 1256 1257 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1258 adapter_req.isinverted = 1; 1259 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1260 adapter_req.isinverted = 0; 1261 else { 1262 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1263 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1264 return; 1265 } 1266 1267 adapter_req.ncb = ncb_without_null; 1268 adapter_req.pinteleavebuffer = adapter_input; 1269 adapter_resp.pharqout = q->adapter_output; 1270 1271 start_time = rte_rdtsc_precise(); 1272 1273 /* Turbo decode adaptation */ 1274 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1275 1276 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1277 1278 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1279 ((k - crc24_overlap) >> 3)); 1280 if (out == NULL) { 1281 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1282 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1283 return; 1284 } 1285 /* rte_bbdev_op_data.offset can be different than the offset of the 1286 * appended bytes 1287 */ 1288 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1289 if (check_crc_24b) 1290 turbo_req.c = c + 1; 1291 else 1292 turbo_req.c = c; 1293 turbo_req.input = (int8_t *)q->adapter_output; 1294 turbo_req.k = k; 1295 turbo_req.k_idx = k_idx; 1296 turbo_req.max_iter_num = dec->iter_max; 1297 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1298 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1299 turbo_resp.ag_buf = q->ag; 1300 turbo_resp.cb_buf = q->code_block; 1301 turbo_resp.output = out; 1302 1303 start_time = rte_rdtsc_precise(); 1304 1305 /* Turbo decode */ 1306 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1307 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1308 1309 dec->hard_output.length += (k >> 3); 1310 1311 if (iter_cnt > 0) { 1312 /* Temporary solution for returned iter_count from SDK */ 1313 iter_cnt = (iter_cnt - 1) >> 1; 1314 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1315 } else { 1316 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1317 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1318 return; 1319 } 1320 #else 1321 RTE_SET_USED(q); 1322 RTE_SET_USED(op); 1323 RTE_SET_USED(c); 1324 RTE_SET_USED(k); 1325 RTE_SET_USED(kw); 1326 RTE_SET_USED(m_in); 1327 RTE_SET_USED(m_out_head); 1328 RTE_SET_USED(m_out); 1329 RTE_SET_USED(in_offset); 1330 RTE_SET_USED(out_offset); 1331 RTE_SET_USED(check_crc_24b); 1332 RTE_SET_USED(crc24_overlap); 1333 RTE_SET_USED(in_length); 1334 RTE_SET_USED(q_stats); 1335 #endif 1336 } 1337 1338 static inline void 1339 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1340 uint8_t c, uint16_t out_length, uint32_t e, 1341 struct rte_mbuf *m_in, 1342 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1343 struct rte_mbuf *m_harq_in, 1344 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1345 uint16_t in_offset, uint16_t out_offset, 1346 uint16_t harq_in_offset, uint16_t harq_out_offset, 1347 bool check_crc_24b, 1348 uint16_t crc24_overlap, uint16_t in_length, 1349 struct rte_bbdev_stats *q_stats) 1350 { 1351 #ifdef RTE_BBDEV_SDK_AVX512 1352 RTE_SET_USED(in_length); 1353 RTE_SET_USED(c); 1354 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1355 struct bblib_rate_dematching_5gnr_request derm_req; 1356 struct bblib_rate_dematching_5gnr_response derm_resp; 1357 struct bblib_ldpc_decoder_5gnr_request dec_req; 1358 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1359 struct bblib_crc_request crc_req; 1360 struct bblib_crc_response crc_resp; 1361 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1362 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1363 int16_t deRmOutSize, numRows; 1364 1365 /* Compute some LDPC BG lengths */ 1366 outLenWithCrc = out_length + (crc24_overlap >> 3); 1367 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1368 K = sys_cols * dec->z_c; 1369 parity_offset = K - 2 * dec->z_c; 1370 1371 uint64_t start_time = rte_rdtsc_precise(); 1372 1373 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1374 1375 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1376 /** 1377 * Single contiguous block from the first LLR of the 1378 * circular buffer. 1379 */ 1380 harq_in = NULL; 1381 if (m_harq_in != NULL) 1382 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1383 uint8_t *, harq_in_offset); 1384 if (harq_in == NULL) { 1385 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1386 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1387 return; 1388 } 1389 uint16_t harq_in_length = RTE_MIN( 1390 dec->harq_combined_input.length, 1391 (uint32_t) dec->n_cb); 1392 memset(q->ag + harq_in_length, 0, 1393 dec->n_cb - harq_in_length); 1394 rte_memcpy(q->ag, harq_in, harq_in_length); 1395 } 1396 1397 derm_req.p_in = (int8_t *) in; 1398 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1399 derm_req.base_graph = dec->basegraph; 1400 derm_req.zc = dec->z_c; 1401 derm_req.ncb = dec->n_cb; 1402 derm_req.e = e; 1403 derm_req.k0 = 0; /* Actual output from SDK */ 1404 derm_req.isretx = check_bit(dec->op_flags, 1405 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1406 derm_req.rvid = dec->rv_index; 1407 derm_req.modulation_order = dec->q_m; 1408 derm_req.start_null_index = parity_offset - dec->n_filler; 1409 derm_req.num_of_null = dec->n_filler; 1410 1411 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1412 1413 /* Compute RM out size and number of rows */ 1414 deRmOutSize = RTE_MIN( 1415 derm_req.k0 + derm_req.e - 1416 ((derm_req.k0 < derm_req.start_null_index) ? 1417 0 : dec->n_filler), 1418 dec->n_cb - dec->n_filler); 1419 if (m_harq_in != NULL) 1420 deRmOutSize = RTE_MAX(deRmOutSize, 1421 RTE_MIN(dec->n_cb - dec->n_filler, 1422 m_harq_in->data_len)); 1423 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1424 - sys_cols + 2; 1425 numRows = RTE_MAX(4, numRows); 1426 1427 /* get output data starting address */ 1428 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1429 if (out == NULL) { 1430 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1431 rte_bbdev_log(ERR, 1432 "Too little space in LDPC decoder output mbuf"); 1433 return; 1434 } 1435 1436 /* rte_bbdev_op_data.offset can be different than the offset 1437 * of the appended bytes 1438 */ 1439 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1440 adapter_input = q->enc_out; 1441 1442 dec_req.Zc = dec->z_c; 1443 dec_req.baseGraph = dec->basegraph; 1444 dec_req.nRows = numRows; 1445 dec_req.numChannelLlrs = deRmOutSize; 1446 dec_req.varNodes = derm_req.p_harq; 1447 dec_req.numFillerBits = dec->n_filler; 1448 dec_req.maxIterations = dec->iter_max; 1449 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1450 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1451 dec_resp.varNodes = (int16_t *) q->adapter_output; 1452 dec_resp.compactedMessageBytes = q->enc_out; 1453 1454 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1455 1456 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1457 dec->iter_count); 1458 if (!dec_resp.parityPassedAtTermination) 1459 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1460 1461 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1462 1463 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1464 check_bit(dec->op_flags, 1465 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1466 crc_req.data = adapter_input; 1467 crc_req.len = K - dec->n_filler - 24; 1468 crc_resp.check_passed = false; 1469 crc_resp.data = adapter_input; 1470 if (check_crc_24b) 1471 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1472 else 1473 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1474 if (!crc_resp.check_passed) 1475 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1476 } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 1477 crc_req.data = adapter_input; 1478 crc_req.len = K - dec->n_filler - 16; 1479 crc_resp.check_passed = false; 1480 crc_resp.data = adapter_input; 1481 bblib_lte_crc16_check(&crc_req, &crc_resp); 1482 if (!crc_resp.check_passed) 1483 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1484 } 1485 1486 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1487 1488 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1489 harq_out = NULL; 1490 if (m_harq_out != NULL) { 1491 /* Initialize HARQ data length since we overwrite */ 1492 m_harq_out->data_len = 0; 1493 /* Check there is enough space 1494 * in the HARQ outbound buffer 1495 */ 1496 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1497 m_harq_out, deRmOutSize); 1498 } 1499 if (harq_out == NULL) { 1500 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1501 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1502 return; 1503 } 1504 /* get output data starting address and overwrite the data */ 1505 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1506 harq_out_offset); 1507 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1508 dec->harq_combined_output.length += deRmOutSize; 1509 } 1510 1511 rte_memcpy(out, adapter_input, out_length); 1512 dec->hard_output.length += out_length; 1513 #else 1514 RTE_SET_USED(q); 1515 RTE_SET_USED(op); 1516 RTE_SET_USED(c); 1517 RTE_SET_USED(out_length); 1518 RTE_SET_USED(e); 1519 RTE_SET_USED(m_in); 1520 RTE_SET_USED(m_out_head); 1521 RTE_SET_USED(m_out); 1522 RTE_SET_USED(m_harq_in); 1523 RTE_SET_USED(m_harq_out_head); 1524 RTE_SET_USED(m_harq_out); 1525 RTE_SET_USED(harq_in_offset); 1526 RTE_SET_USED(harq_out_offset); 1527 RTE_SET_USED(in_offset); 1528 RTE_SET_USED(out_offset); 1529 RTE_SET_USED(check_crc_24b); 1530 RTE_SET_USED(crc24_overlap); 1531 RTE_SET_USED(in_length); 1532 RTE_SET_USED(q_stats); 1533 #endif 1534 } 1535 1536 1537 static inline void 1538 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1539 struct rte_bbdev_stats *queue_stats) 1540 { 1541 uint8_t c, r = 0; 1542 uint16_t kw, k = 0; 1543 uint16_t crc24_overlap = 0; 1544 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1545 struct rte_mbuf *m_in = dec->input.data; 1546 struct rte_mbuf *m_out = dec->hard_output.data; 1547 struct rte_mbuf *m_out_head = dec->hard_output.data; 1548 uint16_t in_offset = dec->input.offset; 1549 uint16_t out_offset = dec->hard_output.offset; 1550 uint32_t mbuf_total_left = dec->input.length; 1551 uint16_t seg_total_left; 1552 1553 /* Clear op status */ 1554 op->status = 0; 1555 1556 if (m_in == NULL || m_out == NULL) { 1557 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1558 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1559 return; 1560 } 1561 1562 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1563 c = dec->tb_params.c; 1564 } else { /* For Code Block mode */ 1565 k = dec->cb_params.k; 1566 c = 1; 1567 } 1568 1569 if ((c > 1) && !check_bit(dec->op_flags, 1570 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1571 crc24_overlap = 24; 1572 1573 while (mbuf_total_left > 0) { 1574 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1575 k = (r < dec->tb_params.c_neg) ? 1576 dec->tb_params.k_neg : dec->tb_params.k_pos; 1577 1578 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1579 1580 /* Calculates circular buffer size (Kw). 1581 * According to 3gpp 36.212 section 5.1.4.2 1582 * Kw = 3 * Kpi, 1583 * where: 1584 * Kpi = nCol * nRow 1585 * where nCol is 32 and nRow can be calculated from: 1586 * D =< nCol * nRow 1587 * where D is the size of each output from turbo encoder block 1588 * (k + 4). 1589 */ 1590 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1591 1592 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1593 in_offset, out_offset, check_bit(dec->op_flags, 1594 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1595 seg_total_left, queue_stats); 1596 1597 /* To keep CRC24 attached to end of Code block, use 1598 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1599 * removed by default once verified. 1600 */ 1601 1602 mbuf_total_left -= kw; 1603 1604 /* Update offsets */ 1605 if (seg_total_left == kw) { 1606 /* Go to the next mbuf */ 1607 m_in = m_in->next; 1608 m_out = m_out->next; 1609 in_offset = 0; 1610 out_offset = 0; 1611 } else { 1612 /* Update offsets for next CBs (if exist) */ 1613 in_offset += kw; 1614 out_offset += ((k - crc24_overlap) >> 3); 1615 } 1616 r++; 1617 } 1618 } 1619 1620 static inline void 1621 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1622 struct rte_bbdev_stats *queue_stats) 1623 { 1624 uint8_t c, r = 0; 1625 uint32_t e; 1626 uint16_t out_length, crc24_overlap = 0; 1627 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1628 struct rte_mbuf *m_in = dec->input.data; 1629 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1630 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1631 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1632 struct rte_mbuf *m_out = dec->hard_output.data; 1633 struct rte_mbuf *m_out_head = dec->hard_output.data; 1634 uint16_t in_offset = dec->input.offset; 1635 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1636 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1637 uint16_t out_offset = dec->hard_output.offset; 1638 uint32_t mbuf_total_left = dec->input.length; 1639 uint16_t seg_total_left; 1640 1641 /* Clear op status */ 1642 op->status = 0; 1643 1644 if (m_in == NULL || m_out == NULL) { 1645 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1646 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1647 return; 1648 } 1649 1650 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1651 c = dec->tb_params.c; 1652 e = dec->tb_params.ea; 1653 } else { /* For Code Block mode */ 1654 c = 1; 1655 e = dec->cb_params.e; 1656 } 1657 1658 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1659 crc24_overlap = 24; 1660 1661 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1662 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1663 1664 while (mbuf_total_left > 0) { 1665 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1666 e = (r < dec->tb_params.cab) ? 1667 dec->tb_params.ea : dec->tb_params.eb; 1668 /* Special case handling when overusing mbuf */ 1669 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1670 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1671 else 1672 seg_total_left = e; 1673 1674 process_ldpc_dec_cb(q, op, c, out_length, e, 1675 m_in, m_out_head, m_out, 1676 m_harq_in, m_harq_out_head, m_harq_out, 1677 in_offset, out_offset, harq_in_offset, 1678 harq_out_offset, 1679 check_bit(dec->op_flags, 1680 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1681 crc24_overlap, 1682 seg_total_left, queue_stats); 1683 1684 /* To keep CRC24 attached to end of Code block, use 1685 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1686 * removed by default once verified. 1687 */ 1688 1689 mbuf_total_left -= e; 1690 1691 /* Update offsets */ 1692 if (seg_total_left == e) { 1693 /* Go to the next mbuf */ 1694 m_in = m_in->next; 1695 m_out = m_out->next; 1696 if (m_harq_in != NULL) 1697 m_harq_in = m_harq_in->next; 1698 if (m_harq_out != NULL) 1699 m_harq_out = m_harq_out->next; 1700 in_offset = 0; 1701 out_offset = 0; 1702 harq_in_offset = 0; 1703 harq_out_offset = 0; 1704 } else { 1705 /* Update offsets for next CBs (if exist) */ 1706 in_offset += e; 1707 out_offset += out_length; 1708 } 1709 r++; 1710 } 1711 } 1712 1713 static inline uint16_t 1714 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1715 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1716 { 1717 uint16_t i; 1718 queue_stats->acc_offload_cycles = 0; 1719 1720 for (i = 0; i < nb_ops; ++i) 1721 enqueue_dec_one_op(q, ops[i], queue_stats); 1722 1723 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1724 NULL); 1725 } 1726 1727 static inline uint16_t 1728 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1729 struct rte_bbdev_dec_op **ops, 1730 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1731 { 1732 uint16_t i; 1733 queue_stats->acc_offload_cycles = 0; 1734 1735 for (i = 0; i < nb_ops; ++i) 1736 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1737 1738 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1739 NULL); 1740 } 1741 1742 /* Enqueue burst */ 1743 static uint16_t 1744 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1745 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1746 { 1747 void *queue = q_data->queue_private; 1748 struct turbo_sw_queue *q = queue; 1749 uint16_t nb_enqueued = 0; 1750 1751 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1752 1753 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1754 q_data->queue_stats.enqueued_count += nb_enqueued; 1755 1756 return nb_enqueued; 1757 } 1758 1759 /* Enqueue burst */ 1760 static uint16_t 1761 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1762 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1763 { 1764 void *queue = q_data->queue_private; 1765 struct turbo_sw_queue *q = queue; 1766 uint16_t nb_enqueued = 0; 1767 1768 nb_enqueued = enqueue_ldpc_enc_all_ops( 1769 q, ops, nb_ops, &q_data->queue_stats); 1770 1771 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1772 q_data->queue_stats.enqueued_count += nb_enqueued; 1773 1774 return nb_enqueued; 1775 } 1776 1777 /* Enqueue burst */ 1778 static uint16_t 1779 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1780 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1781 { 1782 void *queue = q_data->queue_private; 1783 struct turbo_sw_queue *q = queue; 1784 uint16_t nb_enqueued = 0; 1785 1786 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1787 1788 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1789 q_data->queue_stats.enqueued_count += nb_enqueued; 1790 1791 return nb_enqueued; 1792 } 1793 1794 /* Enqueue burst */ 1795 static uint16_t 1796 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1797 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1798 { 1799 void *queue = q_data->queue_private; 1800 struct turbo_sw_queue *q = queue; 1801 uint16_t nb_enqueued = 0; 1802 1803 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1804 &q_data->queue_stats); 1805 1806 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1807 q_data->queue_stats.enqueued_count += nb_enqueued; 1808 1809 return nb_enqueued; 1810 } 1811 1812 /* Dequeue decode burst */ 1813 static uint16_t 1814 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1815 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1816 { 1817 struct turbo_sw_queue *q = q_data->queue_private; 1818 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1819 (void **)ops, nb_ops, NULL); 1820 q_data->queue_stats.dequeued_count += nb_dequeued; 1821 1822 return nb_dequeued; 1823 } 1824 1825 /* Dequeue encode burst */ 1826 static uint16_t 1827 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1828 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1829 { 1830 struct turbo_sw_queue *q = q_data->queue_private; 1831 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1832 (void **)ops, nb_ops, NULL); 1833 q_data->queue_stats.dequeued_count += nb_dequeued; 1834 1835 return nb_dequeued; 1836 } 1837 1838 /* Parse 16bit integer from string argument */ 1839 static inline int 1840 parse_u16_arg(const char *key, const char *value, void *extra_args) 1841 { 1842 uint16_t *u16 = extra_args; 1843 unsigned int long result; 1844 1845 if ((value == NULL) || (extra_args == NULL)) 1846 return -EINVAL; 1847 errno = 0; 1848 result = strtoul(value, NULL, 0); 1849 if ((result >= (1 << 16)) || (errno != 0)) { 1850 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1851 return -ERANGE; 1852 } 1853 *u16 = (uint16_t)result; 1854 return 0; 1855 } 1856 1857 /* Parse parameters used to create device */ 1858 static int 1859 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1860 { 1861 struct rte_kvargs *kvlist = NULL; 1862 int ret = 0; 1863 1864 if (params == NULL) 1865 return -EINVAL; 1866 if (input_args) { 1867 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1868 if (kvlist == NULL) 1869 return -EFAULT; 1870 1871 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1872 &parse_u16_arg, ¶ms->queues_num); 1873 if (ret < 0) 1874 goto exit; 1875 1876 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1877 &parse_u16_arg, ¶ms->socket_id); 1878 if (ret < 0) 1879 goto exit; 1880 1881 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1882 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1883 RTE_MAX_NUMA_NODES); 1884 goto exit; 1885 } 1886 } 1887 1888 exit: 1889 rte_kvargs_free(kvlist); 1890 return ret; 1891 } 1892 1893 /* Create device */ 1894 static int 1895 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1896 struct turbo_sw_params *init_params) 1897 { 1898 struct rte_bbdev *bbdev; 1899 const char *name = rte_vdev_device_name(vdev); 1900 1901 bbdev = rte_bbdev_allocate(name); 1902 if (bbdev == NULL) 1903 return -ENODEV; 1904 1905 bbdev->data->dev_private = rte_zmalloc_socket(name, 1906 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1907 init_params->socket_id); 1908 if (bbdev->data->dev_private == NULL) { 1909 rte_bbdev_release(bbdev); 1910 return -ENOMEM; 1911 } 1912 1913 bbdev->dev_ops = &pmd_ops; 1914 bbdev->device = &vdev->device; 1915 bbdev->data->socket_id = init_params->socket_id; 1916 bbdev->intr_handle = NULL; 1917 1918 /* register rx/tx burst functions for data path */ 1919 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1920 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1921 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1922 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1923 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1924 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1925 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1926 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1927 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1928 init_params->queues_num; 1929 1930 return 0; 1931 } 1932 1933 /* Initialise device */ 1934 static int 1935 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1936 { 1937 struct turbo_sw_params init_params = { 1938 rte_socket_id(), 1939 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1940 }; 1941 const char *name; 1942 const char *input_args; 1943 1944 if (vdev == NULL) 1945 return -EINVAL; 1946 1947 name = rte_vdev_device_name(vdev); 1948 if (name == NULL) 1949 return -EINVAL; 1950 input_args = rte_vdev_device_args(vdev); 1951 parse_turbo_sw_params(&init_params, input_args); 1952 1953 rte_bbdev_log_debug( 1954 "Initialising %s on NUMA node %d with max queues: %d", 1955 name, init_params.socket_id, init_params.queues_num); 1956 1957 return turbo_sw_bbdev_create(vdev, &init_params); 1958 } 1959 1960 /* Uninitialise device */ 1961 static int 1962 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 1963 { 1964 struct rte_bbdev *bbdev; 1965 const char *name; 1966 1967 if (vdev == NULL) 1968 return -EINVAL; 1969 1970 name = rte_vdev_device_name(vdev); 1971 if (name == NULL) 1972 return -EINVAL; 1973 1974 bbdev = rte_bbdev_get_named_dev(name); 1975 if (bbdev == NULL) 1976 return -EINVAL; 1977 1978 rte_free(bbdev->data->dev_private); 1979 1980 return rte_bbdev_release(bbdev); 1981 } 1982 1983 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 1984 .probe = turbo_sw_bbdev_probe, 1985 .remove = turbo_sw_bbdev_remove 1986 }; 1987 1988 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 1989 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 1990 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 1991 TURBO_SW_SOCKET_ID_ARG"=<int>"); 1992 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 1993