1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <stdlib.h> 6 #include <string.h> 7 8 #include <rte_common.h> 9 #include <bus_vdev_driver.h> 10 #include <rte_malloc.h> 11 #include <rte_ring.h> 12 #include <rte_kvargs.h> 13 #include <rte_cycles.h> 14 #include <rte_errno.h> 15 16 #include <rte_bbdev.h> 17 #include <rte_bbdev_pmd.h> 18 19 #include <rte_hexdump.h> 20 #include <rte_log.h> 21 22 #ifdef RTE_BBDEV_SDK_AVX2 23 #include <ipp.h> 24 #include <ipps.h> 25 #include <phy_turbo.h> 26 #include <phy_crc.h> 27 #include <phy_rate_match.h> 28 #endif 29 #ifdef RTE_BBDEV_SDK_AVX512 30 #include <bit_reverse.h> 31 #include <phy_ldpc_encoder_5gnr.h> 32 #include <phy_ldpc_decoder_5gnr.h> 33 #include <phy_LDPC_ratematch_5gnr.h> 34 #include <phy_rate_dematching_5gnr.h> 35 #endif 36 37 #define DRIVER_NAME baseband_turbo_sw 38 39 RTE_LOG_REGISTER_DEFAULT(bbdev_turbo_sw_logtype, NOTICE); 40 41 /* Helper macro for logging */ 42 #define rte_bbdev_log(level, fmt, ...) \ 43 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ 44 ##__VA_ARGS__) 45 46 #define rte_bbdev_log_debug(fmt, ...) \ 47 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 48 ##__VA_ARGS__) 49 50 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 51 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 52 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 53 54 /* private data structure */ 55 struct bbdev_private { 56 unsigned int max_nb_queues; /**< Max number of queues */ 57 }; 58 59 /* Initialisation params structure that can be used by Turbo SW driver */ 60 struct turbo_sw_params { 61 int socket_id; /*< Turbo SW device socket */ 62 uint16_t queues_num; /*< Turbo SW device queues number */ 63 }; 64 65 /* Acceptable params for Turbo SW devices */ 66 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 67 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 68 69 static const char * const turbo_sw_valid_params[] = { 70 TURBO_SW_MAX_NB_QUEUES_ARG, 71 TURBO_SW_SOCKET_ID_ARG 72 }; 73 74 /* queue */ 75 struct turbo_sw_queue { 76 /* Ring for processed (encoded/decoded) operations which are ready to 77 * be dequeued. 78 */ 79 struct rte_ring *processed_pkts; 80 /* Stores input for turbo encoder (used when CRC attachment is 81 * performed 82 */ 83 uint8_t *enc_in; 84 /* Stores output from turbo encoder */ 85 uint8_t *enc_out; 86 /* Alpha gamma buf for bblib_turbo_decoder() function */ 87 int8_t *ag; 88 /* Temp buf for bblib_turbo_decoder() function */ 89 uint16_t *code_block; 90 /* Input buf for bblib_rate_dematching_lte() function */ 91 uint8_t *deint_input; 92 /* Output buf for bblib_rate_dematching_lte() function */ 93 uint8_t *deint_output; 94 /* Output buf for bblib_turbodec_adapter_lte() function */ 95 uint8_t *adapter_output; 96 /* Operation type of this queue */ 97 enum rte_bbdev_op_type type; 98 } __rte_cache_aligned; 99 100 101 #ifdef RTE_BBDEV_SDK_AVX2 102 static inline char * 103 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 104 { 105 if (unlikely(len > rte_pktmbuf_tailroom(m))) 106 return NULL; 107 108 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 109 m->data_len = (uint16_t)(m->data_len + len); 110 m_head->pkt_len = (m_head->pkt_len + len); 111 return tail; 112 } 113 114 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 115 static inline int32_t 116 compute_idx(uint16_t k) 117 { 118 int32_t result = 0; 119 120 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 121 return -1; 122 123 if (k > 2048) { 124 if ((k - 2048) % 64 != 0) 125 result = -1; 126 127 result = 124 + (k - 2048) / 64; 128 } else if (k <= 512) { 129 if ((k - 40) % 8 != 0) 130 result = -1; 131 132 result = (k - 40) / 8 + 1; 133 } else if (k <= 1024) { 134 if ((k - 512) % 16 != 0) 135 result = -1; 136 137 result = 60 + (k - 512) / 16; 138 } else { /* 1024 < k <= 2048 */ 139 if ((k - 1024) % 32 != 0) 140 result = -1; 141 142 result = 92 + (k - 1024) / 32; 143 } 144 145 return result; 146 } 147 #endif 148 149 /* Read flag value 0/1 from bitmap */ 150 static inline bool 151 check_bit(uint32_t bitmap, uint32_t bitmask) 152 { 153 return bitmap & bitmask; 154 } 155 156 /* Get device info */ 157 static void 158 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 159 { 160 struct bbdev_private *internals = dev->data->dev_private; 161 const struct rte_bbdev_op_cap *op_cap; 162 int num_op_type = 0; 163 164 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 165 #ifdef RTE_BBDEV_SDK_AVX2 166 { 167 .type = RTE_BBDEV_OP_TURBO_DEC, 168 .cap.turbo_dec = { 169 .capability_flags = 170 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 171 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 172 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 173 RTE_BBDEV_TURBO_CRC_TYPE_24B | 174 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 175 RTE_BBDEV_TURBO_EARLY_TERMINATION, 176 .max_llr_modulus = 16, 177 .num_buffers_src = 178 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 179 .num_buffers_hard_out = 180 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 181 .num_buffers_soft_out = 0, 182 } 183 }, 184 { 185 .type = RTE_BBDEV_OP_TURBO_ENC, 186 .cap.turbo_enc = { 187 .capability_flags = 188 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 189 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 190 RTE_BBDEV_TURBO_RATE_MATCH | 191 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 192 .num_buffers_src = 193 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 194 .num_buffers_dst = 195 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 196 } 197 }, 198 #endif 199 #ifdef RTE_BBDEV_SDK_AVX512 200 { 201 .type = RTE_BBDEV_OP_LDPC_ENC, 202 .cap.ldpc_enc = { 203 .capability_flags = 204 RTE_BBDEV_LDPC_RATE_MATCH | 205 RTE_BBDEV_LDPC_CRC_16_ATTACH | 206 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 207 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 208 .num_buffers_src = 209 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 210 .num_buffers_dst = 211 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 212 } 213 }, 214 { 215 .type = RTE_BBDEV_OP_LDPC_DEC, 216 .cap.ldpc_dec = { 217 .capability_flags = 218 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 219 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 220 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 221 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 222 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 223 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 224 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 225 .llr_size = 8, 226 .llr_decimals = 4, 227 .num_buffers_src = 228 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 229 .num_buffers_hard_out = 230 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 231 .num_buffers_soft_out = 0, 232 } 233 }, 234 #endif 235 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 236 }; 237 238 static struct rte_bbdev_queue_conf default_queue_conf = { 239 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 240 }; 241 #ifdef RTE_BBDEV_SDK_AVX2 242 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 243 dev_info->cpu_flag_reqs = &cpu_flag; 244 #else 245 dev_info->cpu_flag_reqs = NULL; 246 #endif 247 default_queue_conf.socket = dev->data->socket_id; 248 249 dev_info->driver_name = RTE_STR(DRIVER_NAME); 250 dev_info->max_num_queues = internals->max_nb_queues; 251 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 252 dev_info->hardware_accelerated = false; 253 dev_info->max_dl_queue_priority = 0; 254 dev_info->max_ul_queue_priority = 0; 255 dev_info->default_queue_conf = default_queue_conf; 256 dev_info->capabilities = bbdev_capabilities; 257 dev_info->min_alignment = 64; 258 dev_info->harq_buffer_size = 0; 259 dev_info->data_endianness = RTE_LITTLE_ENDIAN; 260 dev_info->device_status = RTE_BBDEV_DEV_NOT_SUPPORTED; 261 262 op_cap = bbdev_capabilities; 263 for (; op_cap->type != RTE_BBDEV_OP_NONE; ++op_cap) 264 num_op_type++; 265 op_cap = bbdev_capabilities; 266 if (num_op_type > 0) { 267 int num_queue_per_type = dev_info->max_num_queues / num_op_type; 268 for (; op_cap->type != RTE_BBDEV_OP_NONE; ++op_cap) 269 dev_info->num_queues[op_cap->type] = num_queue_per_type; 270 } 271 272 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); 273 } 274 275 /* Release queue */ 276 static int 277 q_release(struct rte_bbdev *dev, uint16_t q_id) 278 { 279 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 280 281 if (q != NULL) { 282 rte_ring_free(q->processed_pkts); 283 rte_free(q->enc_out); 284 rte_free(q->enc_in); 285 rte_free(q->ag); 286 rte_free(q->code_block); 287 rte_free(q->deint_input); 288 rte_free(q->deint_output); 289 rte_free(q->adapter_output); 290 rte_free(q); 291 dev->data->queues[q_id].queue_private = NULL; 292 } 293 294 rte_bbdev_log_debug("released device queue %u:%u", 295 dev->data->dev_id, q_id); 296 return 0; 297 } 298 299 /* Setup a queue */ 300 static int 301 q_setup(struct rte_bbdev *dev, uint16_t q_id, 302 const struct rte_bbdev_queue_conf *queue_conf) 303 { 304 int ret; 305 struct turbo_sw_queue *q; 306 char name[RTE_RING_NAMESIZE]; 307 308 /* Allocate the queue data structure. */ 309 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 310 RTE_CACHE_LINE_SIZE, queue_conf->socket); 311 if (q == NULL) { 312 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 313 return -ENOMEM; 314 } 315 316 /* Allocate memory for encoder output. */ 317 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 318 dev->data->dev_id, q_id); 319 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 320 rte_bbdev_log(ERR, 321 "Creating queue name for device %u queue %u failed", 322 dev->data->dev_id, q_id); 323 ret = -ENAMETOOLONG; 324 goto free_q; 325 } 326 q->enc_out = rte_zmalloc_socket(name, 327 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 328 sizeof(*q->enc_out) * 3, 329 RTE_CACHE_LINE_SIZE, queue_conf->socket); 330 if (q->enc_out == NULL) { 331 rte_bbdev_log(ERR, 332 "Failed to allocate queue memory for %s", name); 333 ret = -ENOMEM; 334 goto free_q; 335 } 336 337 /* Allocate memory for rate matching output. */ 338 ret = snprintf(name, RTE_RING_NAMESIZE, 339 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 340 q_id); 341 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 342 rte_bbdev_log(ERR, 343 "Creating queue name for device %u queue %u failed", 344 dev->data->dev_id, q_id); 345 ret = -ENAMETOOLONG; 346 goto free_q; 347 } 348 q->enc_in = rte_zmalloc_socket(name, 349 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 350 RTE_CACHE_LINE_SIZE, queue_conf->socket); 351 if (q->enc_in == NULL) { 352 rte_bbdev_log(ERR, 353 "Failed to allocate queue memory for %s", name); 354 ret = -ENOMEM; 355 goto free_q; 356 } 357 358 /* Allocate memory for Alpha Gamma temp buffer. */ 359 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 360 dev->data->dev_id, q_id); 361 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 362 rte_bbdev_log(ERR, 363 "Creating queue name for device %u queue %u failed", 364 dev->data->dev_id, q_id); 365 ret = -ENAMETOOLONG; 366 goto free_q; 367 } 368 q->ag = rte_zmalloc_socket(name, 369 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 370 RTE_CACHE_LINE_SIZE, queue_conf->socket); 371 if (q->ag == NULL) { 372 rte_bbdev_log(ERR, 373 "Failed to allocate queue memory for %s", name); 374 ret = -ENOMEM; 375 goto free_q; 376 } 377 378 /* Allocate memory for code block temp buffer. */ 379 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 380 dev->data->dev_id, q_id); 381 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 382 rte_bbdev_log(ERR, 383 "Creating queue name for device %u queue %u failed", 384 dev->data->dev_id, q_id); 385 ret = -ENAMETOOLONG; 386 goto free_q; 387 } 388 q->code_block = rte_zmalloc_socket(name, 389 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 390 RTE_CACHE_LINE_SIZE, queue_conf->socket); 391 if (q->code_block == NULL) { 392 rte_bbdev_log(ERR, 393 "Failed to allocate queue memory for %s", name); 394 ret = -ENOMEM; 395 goto free_q; 396 } 397 398 /* Allocate memory for Deinterleaver input. */ 399 ret = snprintf(name, RTE_RING_NAMESIZE, 400 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 401 dev->data->dev_id, q_id); 402 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 403 rte_bbdev_log(ERR, 404 "Creating queue name for device %u queue %u failed", 405 dev->data->dev_id, q_id); 406 ret = -ENAMETOOLONG; 407 goto free_q; 408 } 409 q->deint_input = rte_zmalloc_socket(name, 410 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 411 RTE_CACHE_LINE_SIZE, queue_conf->socket); 412 if (q->deint_input == NULL) { 413 rte_bbdev_log(ERR, 414 "Failed to allocate queue memory for %s", name); 415 ret = -ENOMEM; 416 goto free_q; 417 } 418 419 /* Allocate memory for Deinterleaver output. */ 420 ret = snprintf(name, RTE_RING_NAMESIZE, 421 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 422 dev->data->dev_id, q_id); 423 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 424 rte_bbdev_log(ERR, 425 "Creating queue name for device %u queue %u failed", 426 dev->data->dev_id, q_id); 427 ret = -ENAMETOOLONG; 428 goto free_q; 429 } 430 q->deint_output = rte_zmalloc_socket(NULL, 431 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 432 RTE_CACHE_LINE_SIZE, queue_conf->socket); 433 if (q->deint_output == NULL) { 434 rte_bbdev_log(ERR, 435 "Failed to allocate queue memory for %s", name); 436 ret = -ENOMEM; 437 goto free_q; 438 } 439 440 /* Allocate memory for Adapter output. */ 441 ret = snprintf(name, RTE_RING_NAMESIZE, 442 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 443 dev->data->dev_id, q_id); 444 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 445 rte_bbdev_log(ERR, 446 "Creating queue name for device %u queue %u failed", 447 dev->data->dev_id, q_id); 448 ret = -ENAMETOOLONG; 449 goto free_q; 450 } 451 q->adapter_output = rte_zmalloc_socket(NULL, 452 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 453 RTE_CACHE_LINE_SIZE, queue_conf->socket); 454 if (q->adapter_output == NULL) { 455 rte_bbdev_log(ERR, 456 "Failed to allocate queue memory for %s", name); 457 ret = -ENOMEM; 458 goto free_q; 459 } 460 461 /* Create ring for packets awaiting to be dequeued. */ 462 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 463 dev->data->dev_id, q_id); 464 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 465 rte_bbdev_log(ERR, 466 "Creating queue name for device %u queue %u failed", 467 dev->data->dev_id, q_id); 468 ret = -ENAMETOOLONG; 469 goto free_q; 470 } 471 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 472 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 473 if (q->processed_pkts == NULL) { 474 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 475 ret = -rte_errno; 476 goto free_q; 477 } 478 479 q->type = queue_conf->op_type; 480 481 dev->data->queues[q_id].queue_private = q; 482 rte_bbdev_log_debug("setup device queue %s", name); 483 return 0; 484 485 free_q: 486 rte_ring_free(q->processed_pkts); 487 rte_free(q->enc_out); 488 rte_free(q->enc_in); 489 rte_free(q->ag); 490 rte_free(q->code_block); 491 rte_free(q->deint_input); 492 rte_free(q->deint_output); 493 rte_free(q->adapter_output); 494 rte_free(q); 495 return ret; 496 } 497 498 static const struct rte_bbdev_ops pmd_ops = { 499 .info_get = info_get, 500 .queue_setup = q_setup, 501 .queue_release = q_release 502 }; 503 504 #ifdef RTE_BBDEV_SDK_AVX2 505 #ifdef RTE_LIBRTE_BBDEV_DEBUG 506 /* Checks if the encoder input buffer is correct. 507 * Returns 0 if it's valid, -1 otherwise. 508 */ 509 static inline int 510 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 511 const uint16_t in_length) 512 { 513 if (k_idx < 0) { 514 rte_bbdev_log(ERR, "K Index is invalid"); 515 return -1; 516 } 517 518 if (in_length - (k >> 3) < 0) { 519 rte_bbdev_log(ERR, 520 "Mismatch between input length (%u bytes) and K (%u bits)", 521 in_length, k); 522 return -1; 523 } 524 525 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 526 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 527 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 528 return -1; 529 } 530 531 return 0; 532 } 533 534 /* Checks if the decoder input buffer is correct. 535 * Returns 0 if it's valid, -1 otherwise. 536 */ 537 static inline int 538 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 539 { 540 if (k_idx < 0) { 541 rte_bbdev_log(ERR, "K index is invalid"); 542 return -1; 543 } 544 545 if (in_length < kw) { 546 rte_bbdev_log(ERR, 547 "Mismatch between input length (%u) and kw (%u)", 548 in_length, kw); 549 return -1; 550 } 551 552 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 553 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 554 kw, RTE_BBDEV_TURBO_MAX_KW); 555 return -1; 556 } 557 558 return 0; 559 } 560 #endif 561 #endif 562 563 static inline void 564 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 565 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 566 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 567 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 568 uint16_t in_length, struct rte_bbdev_stats *q_stats) 569 { 570 #ifdef RTE_BBDEV_SDK_AVX2 571 #ifdef RTE_LIBRTE_BBDEV_DEBUG 572 int ret; 573 #else 574 RTE_SET_USED(in_length); 575 #endif 576 int16_t k_idx; 577 uint16_t m; 578 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 579 uint64_t first_3_bytes = 0; 580 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 581 struct bblib_crc_request crc_req; 582 struct bblib_crc_response crc_resp; 583 struct bblib_turbo_encoder_request turbo_req; 584 struct bblib_turbo_encoder_response turbo_resp; 585 struct bblib_rate_match_dl_request rm_req; 586 struct bblib_rate_match_dl_response rm_resp; 587 #ifdef RTE_BBDEV_OFFLOAD_COST 588 uint64_t start_time; 589 #else 590 RTE_SET_USED(q_stats); 591 #endif 592 593 k_idx = compute_idx(k); 594 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 595 596 /* CRC24A (for TB) */ 597 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 598 (enc->code_block_mode == RTE_BBDEV_CODE_BLOCK)) { 599 #ifdef RTE_LIBRTE_BBDEV_DEBUG 600 ret = is_enc_input_valid(k - 24, k_idx, in_length); 601 if (ret != 0) { 602 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 603 return; 604 } 605 #endif 606 607 crc_req.data = in; 608 crc_req.len = k - 24; 609 /* Check if there is a room for CRC bits if not use 610 * the temporary buffer. 611 */ 612 if (mbuf_append(m_in, m_in, 3) == NULL) { 613 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 614 in = q->enc_in; 615 } else { 616 /* Store 3 first bytes of next CB as they will be 617 * overwritten by CRC bytes. If it is the last CB then 618 * there is no point to store 3 next bytes and this 619 * if..else branch will be omitted. 620 */ 621 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 622 } 623 624 crc_resp.data = in; 625 #ifdef RTE_BBDEV_OFFLOAD_COST 626 start_time = rte_rdtsc_precise(); 627 #endif 628 /* CRC24A generation */ 629 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 630 #ifdef RTE_BBDEV_OFFLOAD_COST 631 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 632 #endif 633 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 634 /* CRC24B */ 635 #ifdef RTE_LIBRTE_BBDEV_DEBUG 636 ret = is_enc_input_valid(k - 24, k_idx, in_length); 637 if (ret != 0) { 638 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 639 return; 640 } 641 #endif 642 643 crc_req.data = in; 644 crc_req.len = k - 24; 645 /* Check if there is a room for CRC bits if this is the last 646 * CB in TB. If not use temporary buffer. 647 */ 648 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 649 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 650 in = q->enc_in; 651 } else if (c - r > 1) { 652 /* Store 3 first bytes of next CB as they will be 653 * overwritten by CRC bytes. If it is the last CB then 654 * there is no point to store 3 next bytes and this 655 * if..else branch will be omitted. 656 */ 657 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 658 } 659 660 crc_resp.data = in; 661 #ifdef RTE_BBDEV_OFFLOAD_COST 662 start_time = rte_rdtsc_precise(); 663 #endif 664 /* CRC24B generation */ 665 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 666 #ifdef RTE_BBDEV_OFFLOAD_COST 667 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 668 #endif 669 } 670 #ifdef RTE_LIBRTE_BBDEV_DEBUG 671 else { 672 ret = is_enc_input_valid(k, k_idx, in_length); 673 if (ret != 0) { 674 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 675 return; 676 } 677 } 678 #endif 679 680 /* Turbo encoder */ 681 682 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 683 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 684 * So dst_data's length should be 3*(k/8) + 3 bytes. 685 * In Rate-matching bypass case outputs pointers passed to encoder 686 * (out0, out1 and out2) can directly point to addresses of output from 687 * turbo_enc entity. 688 */ 689 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 690 out0 = q->enc_out; 691 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 692 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 693 } else { 694 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 695 (k >> 3) * 3 + 2); 696 if (out0 == NULL) { 697 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 698 rte_bbdev_log(ERR, 699 "Too little space in output mbuf"); 700 return; 701 } 702 enc->output.length += (k >> 3) * 3 + 2; 703 /* rte_bbdev_op_data.offset can be different than the 704 * offset of the appended bytes 705 */ 706 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 707 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 708 out_offset + (k >> 3) + 1); 709 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 710 out_offset + 2 * ((k >> 3) + 1)); 711 } 712 713 turbo_req.case_id = k_idx; 714 turbo_req.input_win = in; 715 turbo_req.length = k >> 3; 716 turbo_resp.output_win_0 = out0; 717 turbo_resp.output_win_1 = out1; 718 turbo_resp.output_win_2 = out2; 719 720 #ifdef RTE_BBDEV_OFFLOAD_COST 721 start_time = rte_rdtsc_precise(); 722 #endif 723 /* Turbo encoding */ 724 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 725 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 726 rte_bbdev_log(ERR, "Turbo Encoder failed"); 727 return; 728 } 729 #ifdef RTE_BBDEV_OFFLOAD_COST 730 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 731 #endif 732 733 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 734 if (first_3_bytes != 0) 735 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 736 737 /* Rate-matching */ 738 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 739 uint8_t mask_id; 740 /* Integer round up division by 8 */ 741 uint16_t out_len = (e + 7) >> 3; 742 /* The mask array is indexed using E%8. E is an even number so 743 * there are only 4 possible values. 744 */ 745 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 746 747 /* get output data starting address */ 748 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 749 if (rm_out == NULL) { 750 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 751 rte_bbdev_log(ERR, 752 "Too little space in output mbuf"); 753 return; 754 } 755 /* rte_bbdev_op_data.offset can be different than the offset 756 * of the appended bytes 757 */ 758 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 759 760 /* index of current code block */ 761 rm_req.r = r; 762 /* total number of code block */ 763 rm_req.C = c; 764 /* For DL - 1, UL - 0 */ 765 rm_req.direction = 1; 766 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 767 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 768 * known we can adjust those parameters 769 */ 770 rm_req.Nsoft = ncb * rm_req.C; 771 rm_req.KMIMO = 1; 772 rm_req.MDL_HARQ = 1; 773 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 774 * are used for E calculation. As E is already known we can 775 * adjust those parameters 776 */ 777 rm_req.NL = e; 778 rm_req.Qm = 1; 779 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 780 781 rm_req.rvidx = enc->rv_index; 782 rm_req.Kidx = k_idx - 1; 783 rm_req.nLen = k + 4; 784 rm_req.tin0 = out0; 785 rm_req.tin1 = out1; 786 rm_req.tin2 = out2; 787 rm_resp.output = rm_out; 788 rm_resp.OutputLen = out_len; 789 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 790 rm_req.bypass_rvidx = 1; 791 else 792 rm_req.bypass_rvidx = 0; 793 794 #ifdef RTE_BBDEV_OFFLOAD_COST 795 start_time = rte_rdtsc_precise(); 796 #endif 797 /* Rate-Matching */ 798 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 799 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 800 rte_bbdev_log(ERR, "Rate matching failed"); 801 return; 802 } 803 #ifdef RTE_BBDEV_OFFLOAD_COST 804 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 805 #endif 806 807 /* SW fills an entire last byte even if E%8 != 0. Clear the 808 * superfluous data bits for consistency with HW device. 809 */ 810 mask_id = (e & 7) >> 1; 811 rm_out[out_len - 1] &= mask_out[mask_id]; 812 enc->output.length += rm_resp.OutputLen; 813 } else { 814 /* Rate matching is bypassed */ 815 816 /* Completing last byte of out0 (where 4 tail bits are stored) 817 * by moving first 4 bits from out1 818 */ 819 tmp_out = (uint8_t *) --out1; 820 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 821 tmp_out++; 822 /* Shifting out1 data by 4 bits to the left */ 823 for (m = 0; m < k >> 3; ++m) { 824 uint8_t *first = tmp_out; 825 uint8_t second = *(tmp_out + 1); 826 *first = (*first << 4) | ((second & 0xF0) >> 4); 827 tmp_out++; 828 } 829 /* Shifting out2 data by 8 bits to the left */ 830 for (m = 0; m < (k >> 3) + 1; ++m) { 831 *tmp_out = *(tmp_out + 1); 832 tmp_out++; 833 } 834 *tmp_out = 0; 835 } 836 #else 837 RTE_SET_USED(q); 838 RTE_SET_USED(op); 839 RTE_SET_USED(r); 840 RTE_SET_USED(c); 841 RTE_SET_USED(k); 842 RTE_SET_USED(ncb); 843 RTE_SET_USED(e); 844 RTE_SET_USED(m_in); 845 RTE_SET_USED(m_out_head); 846 RTE_SET_USED(m_out); 847 RTE_SET_USED(in_offset); 848 RTE_SET_USED(out_offset); 849 RTE_SET_USED(in_length); 850 RTE_SET_USED(q_stats); 851 #endif 852 } 853 854 855 static inline void 856 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 857 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 858 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 859 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 860 { 861 #ifdef RTE_BBDEV_SDK_AVX512 862 RTE_SET_USED(seg_total_left); 863 uint8_t *in, *rm_out; 864 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 865 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 866 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 867 struct bblib_LDPC_ratematch_5gnr_request rm_req; 868 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 869 struct bblib_crc_request crc_req; 870 struct bblib_crc_response crc_resp; 871 uint16_t msgLen, puntBits, parity_offset, out_len; 872 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 873 uint16_t in_length_in_bits = K - enc->n_filler; 874 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 875 876 #ifdef RTE_BBDEV_OFFLOAD_COST 877 uint64_t start_time = rte_rdtsc_precise(); 878 #else 879 RTE_SET_USED(q_stats); 880 #endif 881 882 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 883 884 /* Masking the Filler bits explicitly */ 885 memset(q->enc_in + (in_length_in_bytes - 3), 0, 886 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 887 /* CRC Generation */ 888 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 889 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 890 crc_req.data = in; 891 crc_req.len = in_length_in_bits - 24; 892 crc_resp.data = q->enc_in; 893 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 894 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 895 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 896 crc_req.data = in; 897 crc_req.len = in_length_in_bits - 24; 898 crc_resp.data = q->enc_in; 899 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 900 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_16_ATTACH) { 901 rte_memcpy(q->enc_in, in, in_length_in_bytes - 2); 902 crc_req.data = in; 903 crc_req.len = in_length_in_bits - 16; 904 crc_resp.data = q->enc_in; 905 bblib_lte_crc16_gen(&crc_req, &crc_resp); 906 } else 907 rte_memcpy(q->enc_in, in, in_length_in_bytes); 908 909 /* LDPC Encoding */ 910 ldpc_req.Zc = enc->z_c; 911 ldpc_req.baseGraph = enc->basegraph; 912 /* Number of rows set to maximum */ 913 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 914 ldpc_req.numberCodeblocks = 1; 915 ldpc_req.input[0] = (int8_t *) q->enc_in; 916 ldpc_resp.output[0] = (int8_t *) q->enc_out; 917 918 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 919 920 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 921 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 922 rte_bbdev_log(ERR, "LDPC Encoder failed"); 923 return; 924 } 925 926 /* 927 * Systematic + Parity : Recreating stream with filler bits, ideally 928 * the bit select could handle this in the RM SDK 929 */ 930 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 931 puntBits = 2 * ldpc_req.Zc; 932 parity_offset = msgLen - puntBits; 933 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 934 puntBits%8, q->adapter_output, 0, parity_offset); 935 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 936 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 937 938 out_len = (e + 7) >> 3; 939 /* get output data starting address */ 940 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 941 if (rm_out == NULL) { 942 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 943 rte_bbdev_log(ERR, 944 "Too little space in output mbuf"); 945 return; 946 } 947 /* 948 * rte_bbdev_op_data.offset can be different than the offset 949 * of the appended bytes 950 */ 951 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 952 953 /* Rate-Matching */ 954 rm_req.E = e; 955 rm_req.Ncb = enc->n_cb; 956 rm_req.Qm = enc->q_m; 957 rm_req.Zc = enc->z_c; 958 rm_req.baseGraph = enc->basegraph; 959 rm_req.input = q->adapter_output; 960 rm_req.nLen = enc->n_filler; 961 rm_req.nullIndex = parity_offset - enc->n_filler; 962 rm_req.rvidx = enc->rv_index; 963 rm_resp.output = q->deint_output; 964 965 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 966 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 967 rte_bbdev_log(ERR, "Rate matching failed"); 968 return; 969 } 970 971 /* RM SDK may provide non zero bits on last byte */ 972 if ((e % 8) != 0) 973 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 974 975 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 976 977 rte_memcpy(rm_out, q->deint_output, out_len); 978 enc->output.length += out_len; 979 980 #ifdef RTE_BBDEV_OFFLOAD_COST 981 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 982 #endif 983 #else 984 RTE_SET_USED(q); 985 RTE_SET_USED(op); 986 RTE_SET_USED(e); 987 RTE_SET_USED(m_in); 988 RTE_SET_USED(m_out_head); 989 RTE_SET_USED(m_out); 990 RTE_SET_USED(in_offset); 991 RTE_SET_USED(out_offset); 992 RTE_SET_USED(seg_total_left); 993 RTE_SET_USED(q_stats); 994 #endif 995 } 996 997 static inline void 998 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 999 struct rte_bbdev_stats *queue_stats) 1000 { 1001 uint8_t c, r, crc24_bits = 0; 1002 uint16_t k, ncb; 1003 uint32_t e; 1004 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 1005 uint16_t in_offset = enc->input.offset; 1006 uint16_t out_offset = enc->output.offset; 1007 struct rte_mbuf *m_in = enc->input.data; 1008 struct rte_mbuf *m_out = enc->output.data; 1009 struct rte_mbuf *m_out_head = enc->output.data; 1010 uint32_t in_length, mbuf_total_left = enc->input.length; 1011 uint16_t seg_total_left; 1012 1013 /* Clear op status */ 1014 op->status = 0; 1015 1016 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1017 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1018 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1019 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1020 return; 1021 } 1022 1023 if (m_in == NULL || m_out == NULL) { 1024 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1025 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1026 return; 1027 } 1028 1029 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1030 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1031 crc24_bits = 24; 1032 1033 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1034 c = enc->tb_params.c; 1035 r = enc->tb_params.r; 1036 } else {/* For Code Block mode */ 1037 c = 1; 1038 r = 0; 1039 } 1040 1041 while (mbuf_total_left > 0 && r < c) { 1042 1043 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1044 1045 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1046 k = (r < enc->tb_params.c_neg) ? 1047 enc->tb_params.k_neg : enc->tb_params.k_pos; 1048 ncb = (r < enc->tb_params.c_neg) ? 1049 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1050 e = (r < enc->tb_params.cab) ? 1051 enc->tb_params.ea : enc->tb_params.eb; 1052 } else { 1053 k = enc->cb_params.k; 1054 ncb = enc->cb_params.ncb; 1055 e = enc->cb_params.e; 1056 } 1057 1058 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1059 m_out, in_offset, out_offset, seg_total_left, 1060 queue_stats); 1061 /* Update total_left */ 1062 in_length = ((k - crc24_bits) >> 3); 1063 mbuf_total_left -= in_length; 1064 /* Update offsets for next CBs (if exist) */ 1065 in_offset += (k - crc24_bits) >> 3; 1066 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1067 out_offset += e >> 3; 1068 else 1069 out_offset += (k >> 3) * 3 + 2; 1070 1071 /* Update offsets */ 1072 if (seg_total_left == in_length) { 1073 /* Go to the next mbuf */ 1074 m_in = m_in->next; 1075 m_out = m_out->next; 1076 in_offset = 0; 1077 out_offset = 0; 1078 } 1079 r++; 1080 } 1081 1082 /* check if all input data was processed */ 1083 if (mbuf_total_left != 0) { 1084 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1085 rte_bbdev_log(ERR, 1086 "Mismatch between mbuf length and included CBs sizes"); 1087 } 1088 } 1089 1090 1091 static inline void 1092 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1093 struct rte_bbdev_stats *queue_stats) 1094 { 1095 uint8_t c, r, crc24_bits = 0; 1096 uint32_t e; 1097 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1098 uint16_t in_offset = enc->input.offset; 1099 uint16_t out_offset = enc->output.offset; 1100 struct rte_mbuf *m_in = enc->input.data; 1101 struct rte_mbuf *m_out = enc->output.data; 1102 struct rte_mbuf *m_out_head = enc->output.data; 1103 uint32_t in_length, mbuf_total_left = enc->input.length; 1104 1105 uint16_t seg_total_left; 1106 1107 /* Clear op status */ 1108 op->status = 0; 1109 1110 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1111 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1112 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1113 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1114 return; 1115 } 1116 1117 if (m_in == NULL || m_out == NULL) { 1118 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1119 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1120 return; 1121 } 1122 1123 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1124 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1125 crc24_bits = 24; 1126 1127 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1128 c = enc->tb_params.c; 1129 r = enc->tb_params.r; 1130 } else { /* For Code Block mode */ 1131 c = 1; 1132 r = 0; 1133 } 1134 1135 while (mbuf_total_left > 0 && r < c) { 1136 1137 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1138 1139 if (enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1140 e = (r < enc->tb_params.cab) ? 1141 enc->tb_params.ea : enc->tb_params.eb; 1142 } else { 1143 e = enc->cb_params.e; 1144 } 1145 1146 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1147 m_out, in_offset, out_offset, seg_total_left, 1148 queue_stats); 1149 /* Update total_left */ 1150 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1151 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1152 mbuf_total_left -= in_length; 1153 /* Update offsets for next CBs (if exist) */ 1154 in_offset += in_length; 1155 out_offset += (e + 7) >> 3; 1156 1157 /* Update offsets */ 1158 if (seg_total_left == in_length) { 1159 /* Go to the next mbuf */ 1160 m_in = m_in->next; 1161 m_out = m_out->next; 1162 in_offset = 0; 1163 out_offset = 0; 1164 } 1165 r++; 1166 } 1167 1168 /* check if all input data was processed */ 1169 if (mbuf_total_left != 0) { 1170 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1171 rte_bbdev_log(ERR, 1172 "Mismatch between mbuf length and included CBs sizes %d", 1173 mbuf_total_left); 1174 } 1175 } 1176 1177 static inline uint16_t 1178 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1179 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1180 { 1181 uint16_t i; 1182 #ifdef RTE_BBDEV_OFFLOAD_COST 1183 queue_stats->acc_offload_cycles = 0; 1184 #endif 1185 1186 for (i = 0; i < nb_ops; ++i) 1187 enqueue_enc_one_op(q, ops[i], queue_stats); 1188 1189 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1190 NULL); 1191 } 1192 1193 static inline uint16_t 1194 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1195 struct rte_bbdev_enc_op **ops, 1196 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1197 { 1198 uint16_t i; 1199 #ifdef RTE_BBDEV_OFFLOAD_COST 1200 queue_stats->acc_offload_cycles = 0; 1201 #endif 1202 1203 for (i = 0; i < nb_ops; ++i) 1204 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1205 1206 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1207 NULL); 1208 } 1209 1210 #ifdef RTE_BBDEV_SDK_AVX2 1211 static inline void 1212 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1213 uint16_t ncb) 1214 { 1215 uint16_t d = k + 4; 1216 uint16_t kpi = ncb / 3; 1217 uint16_t nd = kpi - d; 1218 1219 rte_memcpy(&out[nd], in, d); 1220 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1221 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1222 } 1223 #endif 1224 1225 static inline void 1226 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1227 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1228 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1229 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1230 uint16_t crc24_overlap, uint16_t in_length, 1231 struct rte_bbdev_stats *q_stats) 1232 { 1233 #ifdef RTE_BBDEV_SDK_AVX2 1234 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1235 int ret; 1236 #else 1237 RTE_SET_USED(in_length); 1238 #endif 1239 int32_t k_idx; 1240 int32_t iter_cnt; 1241 uint8_t *in, *out, *adapter_input; 1242 int32_t ncb, ncb_without_null; 1243 struct bblib_turbo_adapter_ul_response adapter_resp; 1244 struct bblib_turbo_adapter_ul_request adapter_req; 1245 struct bblib_turbo_decoder_request turbo_req; 1246 struct bblib_turbo_decoder_response turbo_resp; 1247 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1248 #ifdef RTE_BBDEV_OFFLOAD_COST 1249 uint64_t start_time; 1250 #else 1251 RTE_SET_USED(q_stats); 1252 #endif 1253 1254 k_idx = compute_idx(k); 1255 1256 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1257 ret = is_dec_input_valid(k_idx, kw, in_length); 1258 if (ret != 0) { 1259 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1260 return; 1261 } 1262 #endif 1263 1264 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1265 ncb = kw; 1266 ncb_without_null = (k + 4) * 3; 1267 1268 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1269 struct bblib_deinterleave_ul_request deint_req; 1270 struct bblib_deinterleave_ul_response deint_resp; 1271 1272 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1273 deint_req.pharqbuffer = in; 1274 deint_req.ncb = ncb; 1275 deint_resp.pinteleavebuffer = q->deint_output; 1276 1277 #ifdef RTE_BBDEV_OFFLOAD_COST 1278 start_time = rte_rdtsc_precise(); 1279 #endif 1280 /* Sub-block De-Interleaving */ 1281 bblib_deinterleave_ul(&deint_req, &deint_resp); 1282 #ifdef RTE_BBDEV_OFFLOAD_COST 1283 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1284 #endif 1285 } else 1286 move_padding_bytes(in, q->deint_output, k, ncb); 1287 1288 adapter_input = q->deint_output; 1289 1290 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1291 adapter_req.isinverted = 1; 1292 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1293 adapter_req.isinverted = 0; 1294 else { 1295 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1296 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1297 return; 1298 } 1299 1300 adapter_req.ncb = ncb_without_null; 1301 adapter_req.pinteleavebuffer = adapter_input; 1302 adapter_resp.pharqout = q->adapter_output; 1303 1304 #ifdef RTE_BBDEV_OFFLOAD_COST 1305 start_time = rte_rdtsc_precise(); 1306 #endif 1307 /* Turbo decode adaptation */ 1308 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1309 #ifdef RTE_BBDEV_OFFLOAD_COST 1310 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1311 #endif 1312 1313 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1314 ((k - crc24_overlap) >> 3)); 1315 if (out == NULL) { 1316 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1317 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1318 return; 1319 } 1320 /* rte_bbdev_op_data.offset can be different than the offset of the 1321 * appended bytes 1322 */ 1323 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1324 if (check_crc_24b) 1325 turbo_req.c = c + 1; 1326 else 1327 turbo_req.c = c; 1328 turbo_req.input = (int8_t *)q->adapter_output; 1329 turbo_req.k = k; 1330 turbo_req.k_idx = k_idx; 1331 turbo_req.max_iter_num = dec->iter_max; 1332 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1333 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1334 turbo_resp.ag_buf = q->ag; 1335 turbo_resp.cb_buf = q->code_block; 1336 turbo_resp.output = out; 1337 1338 #ifdef RTE_BBDEV_OFFLOAD_COST 1339 start_time = rte_rdtsc_precise(); 1340 #endif 1341 /* Turbo decode */ 1342 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1343 #ifdef RTE_BBDEV_OFFLOAD_COST 1344 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1345 #endif 1346 dec->hard_output.length += (k >> 3); 1347 1348 if (iter_cnt > 0) { 1349 /* Temporary solution for returned iter_count from SDK */ 1350 iter_cnt = (iter_cnt - 1) >> 1; 1351 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1352 } else { 1353 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1354 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1355 return; 1356 } 1357 #else 1358 RTE_SET_USED(q); 1359 RTE_SET_USED(op); 1360 RTE_SET_USED(c); 1361 RTE_SET_USED(k); 1362 RTE_SET_USED(kw); 1363 RTE_SET_USED(m_in); 1364 RTE_SET_USED(m_out_head); 1365 RTE_SET_USED(m_out); 1366 RTE_SET_USED(in_offset); 1367 RTE_SET_USED(out_offset); 1368 RTE_SET_USED(check_crc_24b); 1369 RTE_SET_USED(crc24_overlap); 1370 RTE_SET_USED(in_length); 1371 RTE_SET_USED(q_stats); 1372 #endif 1373 } 1374 1375 static inline void 1376 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1377 uint8_t c, uint16_t out_length, uint32_t e, 1378 struct rte_mbuf *m_in, 1379 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1380 struct rte_mbuf *m_harq_in, 1381 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1382 uint16_t in_offset, uint16_t out_offset, 1383 uint16_t harq_in_offset, uint16_t harq_out_offset, 1384 bool check_crc_24b, 1385 uint16_t crc24_overlap, uint16_t in_length, 1386 struct rte_bbdev_stats *q_stats) 1387 { 1388 #ifdef RTE_BBDEV_SDK_AVX512 1389 RTE_SET_USED(in_length); 1390 RTE_SET_USED(c); 1391 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1392 struct bblib_rate_dematching_5gnr_request derm_req; 1393 struct bblib_rate_dematching_5gnr_response derm_resp; 1394 struct bblib_ldpc_decoder_5gnr_request dec_req; 1395 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1396 struct bblib_crc_request crc_req; 1397 struct bblib_crc_response crc_resp; 1398 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1399 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1400 int16_t deRmOutSize, numRows; 1401 1402 /* Compute some LDPC BG lengths */ 1403 outLenWithCrc = out_length + (crc24_overlap >> 3); 1404 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1405 K = sys_cols * dec->z_c; 1406 parity_offset = K - 2 * dec->z_c; 1407 1408 #ifdef RTE_BBDEV_OFFLOAD_COST 1409 uint64_t start_time = rte_rdtsc_precise(); 1410 #else 1411 RTE_SET_USED(q_stats); 1412 #endif 1413 1414 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1415 1416 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1417 /** 1418 * Single contiguous block from the first LLR of the 1419 * circular buffer. 1420 */ 1421 harq_in = NULL; 1422 if (m_harq_in != NULL) 1423 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1424 uint8_t *, harq_in_offset); 1425 if (harq_in == NULL) { 1426 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1427 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1428 return; 1429 } 1430 uint16_t harq_in_length = RTE_MIN( 1431 dec->harq_combined_input.length, 1432 (uint32_t) dec->n_cb); 1433 memset(q->ag + harq_in_length, 0, 1434 dec->n_cb - harq_in_length); 1435 rte_memcpy(q->ag, harq_in, harq_in_length); 1436 } 1437 1438 derm_req.p_in = (int8_t *) in; 1439 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1440 derm_req.base_graph = dec->basegraph; 1441 derm_req.zc = dec->z_c; 1442 derm_req.ncb = dec->n_cb; 1443 derm_req.e = e; 1444 derm_req.k0 = 0; /* Actual output from SDK */ 1445 derm_req.isretx = check_bit(dec->op_flags, 1446 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1447 derm_req.rvid = dec->rv_index; 1448 derm_req.modulation_order = dec->q_m; 1449 derm_req.start_null_index = parity_offset - dec->n_filler; 1450 derm_req.num_of_null = dec->n_filler; 1451 1452 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1453 1454 /* Compute RM out size and number of rows */ 1455 deRmOutSize = RTE_MIN( 1456 derm_req.k0 + derm_req.e - 1457 ((derm_req.k0 < derm_req.start_null_index) ? 1458 0 : dec->n_filler), 1459 dec->n_cb - dec->n_filler); 1460 if (m_harq_in != NULL) 1461 deRmOutSize = RTE_MAX(deRmOutSize, 1462 RTE_MIN(dec->n_cb - dec->n_filler, 1463 m_harq_in->data_len)); 1464 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1465 - sys_cols + 2; 1466 numRows = RTE_MAX(4, numRows); 1467 1468 /* get output data starting address */ 1469 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1470 if (out == NULL) { 1471 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1472 rte_bbdev_log(ERR, 1473 "Too little space in LDPC decoder output mbuf"); 1474 return; 1475 } 1476 1477 /* rte_bbdev_op_data.offset can be different than the offset 1478 * of the appended bytes 1479 */ 1480 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1481 adapter_input = q->enc_out; 1482 1483 dec_req.Zc = dec->z_c; 1484 dec_req.baseGraph = dec->basegraph; 1485 dec_req.nRows = numRows; 1486 dec_req.numChannelLlrs = deRmOutSize; 1487 dec_req.varNodes = derm_req.p_harq; 1488 dec_req.numFillerBits = dec->n_filler; 1489 dec_req.maxIterations = dec->iter_max; 1490 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1491 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1492 dec_resp.varNodes = (int16_t *) q->adapter_output; 1493 dec_resp.compactedMessageBytes = q->enc_out; 1494 1495 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1496 1497 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1498 dec->iter_count); 1499 if (!dec_resp.parityPassedAtTermination) 1500 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1501 1502 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1503 1504 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1505 check_bit(dec->op_flags, 1506 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1507 crc_req.data = adapter_input; 1508 crc_req.len = K - dec->n_filler - 24; 1509 crc_resp.check_passed = false; 1510 crc_resp.data = adapter_input; 1511 if (check_crc_24b) 1512 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1513 else 1514 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1515 if (!crc_resp.check_passed) 1516 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1517 } else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 1518 crc_req.data = adapter_input; 1519 crc_req.len = K - dec->n_filler - 16; 1520 crc_resp.check_passed = false; 1521 crc_resp.data = adapter_input; 1522 bblib_lte_crc16_check(&crc_req, &crc_resp); 1523 if (!crc_resp.check_passed) 1524 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1525 } 1526 1527 #ifdef RTE_BBDEV_OFFLOAD_COST 1528 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1529 #endif 1530 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1531 harq_out = NULL; 1532 if (m_harq_out != NULL) { 1533 /* Initialize HARQ data length since we overwrite */ 1534 m_harq_out->data_len = 0; 1535 /* Check there is enough space 1536 * in the HARQ outbound buffer 1537 */ 1538 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1539 m_harq_out, deRmOutSize); 1540 } 1541 if (harq_out == NULL) { 1542 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1543 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1544 return; 1545 } 1546 /* get output data starting address and overwrite the data */ 1547 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1548 harq_out_offset); 1549 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1550 dec->harq_combined_output.length += deRmOutSize; 1551 } 1552 1553 rte_memcpy(out, adapter_input, out_length); 1554 dec->hard_output.length += out_length; 1555 #else 1556 RTE_SET_USED(q); 1557 RTE_SET_USED(op); 1558 RTE_SET_USED(c); 1559 RTE_SET_USED(out_length); 1560 RTE_SET_USED(e); 1561 RTE_SET_USED(m_in); 1562 RTE_SET_USED(m_out_head); 1563 RTE_SET_USED(m_out); 1564 RTE_SET_USED(m_harq_in); 1565 RTE_SET_USED(m_harq_out_head); 1566 RTE_SET_USED(m_harq_out); 1567 RTE_SET_USED(harq_in_offset); 1568 RTE_SET_USED(harq_out_offset); 1569 RTE_SET_USED(in_offset); 1570 RTE_SET_USED(out_offset); 1571 RTE_SET_USED(check_crc_24b); 1572 RTE_SET_USED(crc24_overlap); 1573 RTE_SET_USED(in_length); 1574 RTE_SET_USED(q_stats); 1575 #endif 1576 } 1577 1578 1579 static inline void 1580 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1581 struct rte_bbdev_stats *queue_stats) 1582 { 1583 uint8_t c, r = 0; 1584 uint16_t kw, k = 0; 1585 uint16_t crc24_overlap = 0; 1586 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1587 struct rte_mbuf *m_in = dec->input.data; 1588 struct rte_mbuf *m_out = dec->hard_output.data; 1589 struct rte_mbuf *m_out_head = dec->hard_output.data; 1590 uint16_t in_offset = dec->input.offset; 1591 uint16_t out_offset = dec->hard_output.offset; 1592 uint32_t mbuf_total_left = dec->input.length; 1593 uint16_t seg_total_left; 1594 1595 /* Clear op status */ 1596 op->status = 0; 1597 1598 if (m_in == NULL || m_out == NULL) { 1599 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1600 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1601 return; 1602 } 1603 1604 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1605 c = dec->tb_params.c; 1606 } else { /* For Code Block mode */ 1607 k = dec->cb_params.k; 1608 c = 1; 1609 } 1610 1611 if ((c > 1) && !check_bit(dec->op_flags, 1612 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1613 crc24_overlap = 24; 1614 1615 while (mbuf_total_left > 0) { 1616 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1617 k = (r < dec->tb_params.c_neg) ? 1618 dec->tb_params.k_neg : dec->tb_params.k_pos; 1619 1620 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1621 1622 /* Calculates circular buffer size (Kw). 1623 * According to 3gpp 36.212 section 5.1.4.2 1624 * Kw = 3 * Kpi, 1625 * where: 1626 * Kpi = nCol * nRow 1627 * where nCol is 32 and nRow can be calculated from: 1628 * D =< nCol * nRow 1629 * where D is the size of each output from turbo encoder block 1630 * (k + 4). 1631 */ 1632 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1633 1634 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1635 in_offset, out_offset, check_bit(dec->op_flags, 1636 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1637 seg_total_left, queue_stats); 1638 1639 /* To keep CRC24 attached to end of Code block, use 1640 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1641 * removed by default once verified. 1642 */ 1643 1644 mbuf_total_left -= kw; 1645 1646 /* Update offsets */ 1647 if (seg_total_left == kw) { 1648 /* Go to the next mbuf */ 1649 m_in = m_in->next; 1650 m_out = m_out->next; 1651 in_offset = 0; 1652 out_offset = 0; 1653 } else { 1654 /* Update offsets for next CBs (if exist) */ 1655 in_offset += kw; 1656 out_offset += ((k - crc24_overlap) >> 3); 1657 } 1658 r++; 1659 } 1660 } 1661 1662 static inline void 1663 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1664 struct rte_bbdev_stats *queue_stats) 1665 { 1666 uint8_t c, r = 0; 1667 uint32_t e; 1668 uint16_t out_length, crc24_overlap = 0; 1669 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1670 struct rte_mbuf *m_in = dec->input.data; 1671 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1672 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1673 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1674 struct rte_mbuf *m_out = dec->hard_output.data; 1675 struct rte_mbuf *m_out_head = dec->hard_output.data; 1676 uint16_t in_offset = dec->input.offset; 1677 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1678 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1679 uint16_t out_offset = dec->hard_output.offset; 1680 uint32_t mbuf_total_left = dec->input.length; 1681 uint16_t seg_total_left; 1682 1683 /* Clear op status */ 1684 op->status = 0; 1685 1686 if (m_in == NULL || m_out == NULL) { 1687 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1688 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1689 return; 1690 } 1691 1692 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1693 c = dec->tb_params.c; 1694 e = dec->tb_params.ea; 1695 } else { /* For Code Block mode */ 1696 c = 1; 1697 e = dec->cb_params.e; 1698 } 1699 1700 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1701 crc24_overlap = 24; 1702 1703 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1704 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1705 1706 while (mbuf_total_left > 0) { 1707 if (dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1708 e = (r < dec->tb_params.cab) ? 1709 dec->tb_params.ea : dec->tb_params.eb; 1710 /* Special case handling when overusing mbuf */ 1711 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1712 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1713 else 1714 seg_total_left = e; 1715 1716 process_ldpc_dec_cb(q, op, c, out_length, e, 1717 m_in, m_out_head, m_out, 1718 m_harq_in, m_harq_out_head, m_harq_out, 1719 in_offset, out_offset, harq_in_offset, 1720 harq_out_offset, 1721 check_bit(dec->op_flags, 1722 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1723 crc24_overlap, 1724 seg_total_left, queue_stats); 1725 1726 /* To keep CRC24 attached to end of Code block, use 1727 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1728 * removed by default once verified. 1729 */ 1730 1731 mbuf_total_left -= e; 1732 1733 /* Update offsets */ 1734 if (seg_total_left == e) { 1735 /* Go to the next mbuf */ 1736 m_in = m_in->next; 1737 m_out = m_out->next; 1738 if (m_harq_in != NULL) 1739 m_harq_in = m_harq_in->next; 1740 if (m_harq_out != NULL) 1741 m_harq_out = m_harq_out->next; 1742 in_offset = 0; 1743 out_offset = 0; 1744 harq_in_offset = 0; 1745 harq_out_offset = 0; 1746 } else { 1747 /* Update offsets for next CBs (if exist) */ 1748 in_offset += e; 1749 out_offset += out_length; 1750 } 1751 r++; 1752 } 1753 } 1754 1755 static inline uint16_t 1756 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1757 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1758 { 1759 uint16_t i; 1760 #ifdef RTE_BBDEV_OFFLOAD_COST 1761 queue_stats->acc_offload_cycles = 0; 1762 #endif 1763 1764 for (i = 0; i < nb_ops; ++i) 1765 enqueue_dec_one_op(q, ops[i], queue_stats); 1766 1767 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1768 NULL); 1769 } 1770 1771 static inline uint16_t 1772 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1773 struct rte_bbdev_dec_op **ops, 1774 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1775 { 1776 uint16_t i; 1777 #ifdef RTE_BBDEV_OFFLOAD_COST 1778 queue_stats->acc_offload_cycles = 0; 1779 #endif 1780 1781 for (i = 0; i < nb_ops; ++i) 1782 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1783 1784 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1785 NULL); 1786 } 1787 1788 /* Enqueue burst */ 1789 static uint16_t 1790 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1791 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1792 { 1793 void *queue = q_data->queue_private; 1794 struct turbo_sw_queue *q = queue; 1795 uint16_t nb_enqueued = 0; 1796 1797 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1798 1799 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1800 q_data->queue_stats.enqueued_count += nb_enqueued; 1801 1802 return nb_enqueued; 1803 } 1804 1805 /* Enqueue burst */ 1806 static uint16_t 1807 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1808 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1809 { 1810 void *queue = q_data->queue_private; 1811 struct turbo_sw_queue *q = queue; 1812 uint16_t nb_enqueued = 0; 1813 1814 nb_enqueued = enqueue_ldpc_enc_all_ops( 1815 q, ops, nb_ops, &q_data->queue_stats); 1816 1817 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1818 q_data->queue_stats.enqueued_count += nb_enqueued; 1819 1820 return nb_enqueued; 1821 } 1822 1823 /* Enqueue burst */ 1824 static uint16_t 1825 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1826 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1827 { 1828 void *queue = q_data->queue_private; 1829 struct turbo_sw_queue *q = queue; 1830 uint16_t nb_enqueued = 0; 1831 1832 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1833 1834 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1835 q_data->queue_stats.enqueued_count += nb_enqueued; 1836 1837 return nb_enqueued; 1838 } 1839 1840 /* Enqueue burst */ 1841 static uint16_t 1842 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1843 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1844 { 1845 void *queue = q_data->queue_private; 1846 struct turbo_sw_queue *q = queue; 1847 uint16_t nb_enqueued = 0; 1848 1849 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1850 &q_data->queue_stats); 1851 1852 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1853 q_data->queue_stats.enqueued_count += nb_enqueued; 1854 1855 return nb_enqueued; 1856 } 1857 1858 /* Dequeue decode burst */ 1859 static uint16_t 1860 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1861 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1862 { 1863 struct turbo_sw_queue *q = q_data->queue_private; 1864 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1865 (void **)ops, nb_ops, NULL); 1866 q_data->queue_stats.dequeued_count += nb_dequeued; 1867 1868 return nb_dequeued; 1869 } 1870 1871 /* Dequeue encode burst */ 1872 static uint16_t 1873 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1874 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1875 { 1876 struct turbo_sw_queue *q = q_data->queue_private; 1877 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1878 (void **)ops, nb_ops, NULL); 1879 q_data->queue_stats.dequeued_count += nb_dequeued; 1880 1881 return nb_dequeued; 1882 } 1883 1884 /* Parse 16bit integer from string argument */ 1885 static inline int 1886 parse_u16_arg(const char *key, const char *value, void *extra_args) 1887 { 1888 uint16_t *u16 = extra_args; 1889 unsigned int long result; 1890 1891 if ((value == NULL) || (extra_args == NULL)) 1892 return -EINVAL; 1893 errno = 0; 1894 result = strtoul(value, NULL, 0); 1895 if ((result >= (1 << 16)) || (errno != 0)) { 1896 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1897 return -ERANGE; 1898 } 1899 *u16 = (uint16_t)result; 1900 return 0; 1901 } 1902 1903 /* Parse parameters used to create device */ 1904 static int 1905 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1906 { 1907 struct rte_kvargs *kvlist = NULL; 1908 int ret = 0; 1909 1910 if (params == NULL) 1911 return -EINVAL; 1912 if (input_args) { 1913 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1914 if (kvlist == NULL) 1915 return -EFAULT; 1916 1917 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1918 &parse_u16_arg, ¶ms->queues_num); 1919 if (ret < 0) 1920 goto exit; 1921 1922 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1923 &parse_u16_arg, ¶ms->socket_id); 1924 if (ret < 0) 1925 goto exit; 1926 1927 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1928 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1929 RTE_MAX_NUMA_NODES); 1930 goto exit; 1931 } 1932 } 1933 1934 exit: 1935 rte_kvargs_free(kvlist); 1936 return ret; 1937 } 1938 1939 /* Create device */ 1940 static int 1941 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1942 struct turbo_sw_params *init_params) 1943 { 1944 struct rte_bbdev *bbdev; 1945 const char *name = rte_vdev_device_name(vdev); 1946 1947 bbdev = rte_bbdev_allocate(name); 1948 if (bbdev == NULL) 1949 return -ENODEV; 1950 1951 bbdev->data->dev_private = rte_zmalloc_socket(name, 1952 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1953 init_params->socket_id); 1954 if (bbdev->data->dev_private == NULL) { 1955 rte_bbdev_release(bbdev); 1956 return -ENOMEM; 1957 } 1958 1959 bbdev->dev_ops = &pmd_ops; 1960 bbdev->device = &vdev->device; 1961 bbdev->data->socket_id = init_params->socket_id; 1962 bbdev->intr_handle = NULL; 1963 1964 /* register rx/tx burst functions for data path */ 1965 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1966 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1967 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1968 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1969 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1970 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1971 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1972 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1973 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1974 init_params->queues_num; 1975 1976 return 0; 1977 } 1978 1979 /* Initialise device */ 1980 static int 1981 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1982 { 1983 struct turbo_sw_params init_params = { 1984 rte_socket_id(), 1985 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1986 }; 1987 const char *name; 1988 const char *input_args; 1989 1990 if (vdev == NULL) 1991 return -EINVAL; 1992 1993 name = rte_vdev_device_name(vdev); 1994 if (name == NULL) 1995 return -EINVAL; 1996 input_args = rte_vdev_device_args(vdev); 1997 parse_turbo_sw_params(&init_params, input_args); 1998 1999 rte_bbdev_log_debug( 2000 "Initialising %s on NUMA node %d with max queues: %d\n", 2001 name, init_params.socket_id, init_params.queues_num); 2002 2003 return turbo_sw_bbdev_create(vdev, &init_params); 2004 } 2005 2006 /* Uninitialise device */ 2007 static int 2008 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 2009 { 2010 struct rte_bbdev *bbdev; 2011 const char *name; 2012 2013 if (vdev == NULL) 2014 return -EINVAL; 2015 2016 name = rte_vdev_device_name(vdev); 2017 if (name == NULL) 2018 return -EINVAL; 2019 2020 bbdev = rte_bbdev_get_named_dev(name); 2021 if (bbdev == NULL) 2022 return -EINVAL; 2023 2024 rte_free(bbdev->data->dev_private); 2025 2026 return rte_bbdev_release(bbdev); 2027 } 2028 2029 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 2030 .probe = turbo_sw_bbdev_probe, 2031 .remove = turbo_sw_bbdev_remove 2032 }; 2033 2034 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 2035 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 2036 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 2037 TURBO_SW_SOCKET_ID_ARG"=<int>"); 2038 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 2039