1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <string.h> 6 7 #include <rte_common.h> 8 #include <rte_bus_vdev.h> 9 #include <rte_malloc.h> 10 #include <rte_ring.h> 11 #include <rte_kvargs.h> 12 #include <rte_cycles.h> 13 14 #include <rte_bbdev.h> 15 #include <rte_bbdev_pmd.h> 16 17 #include <rte_hexdump.h> 18 #include <rte_log.h> 19 20 #ifdef RTE_BBDEV_SDK_AVX2 21 #include <ipp.h> 22 #include <ipps.h> 23 #include <phy_turbo.h> 24 #include <phy_crc.h> 25 #include <phy_rate_match.h> 26 #endif 27 #ifdef RTE_BBDEV_SDK_AVX512 28 #include <bit_reverse.h> 29 #include <phy_ldpc_encoder_5gnr.h> 30 #include <phy_ldpc_decoder_5gnr.h> 31 #include <phy_LDPC_ratematch_5gnr.h> 32 #include <phy_rate_dematching_5gnr.h> 33 #endif 34 35 #define DRIVER_NAME baseband_turbo_sw 36 37 /* Turbo SW PMD logging ID */ 38 static int bbdev_turbo_sw_logtype; 39 40 /* Helper macro for logging */ 41 #define rte_bbdev_log(level, fmt, ...) \ 42 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ 43 ##__VA_ARGS__) 44 45 #define rte_bbdev_log_debug(fmt, ...) \ 46 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ 47 ##__VA_ARGS__) 48 49 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) 50 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) 51 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) 52 53 /* private data structure */ 54 struct bbdev_private { 55 unsigned int max_nb_queues; /**< Max number of queues */ 56 }; 57 58 /* Initialisation params structure that can be used by Turbo SW driver */ 59 struct turbo_sw_params { 60 int socket_id; /*< Turbo SW device socket */ 61 uint16_t queues_num; /*< Turbo SW device queues number */ 62 }; 63 64 /* Accecptable params for Turbo SW devices */ 65 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" 66 #define TURBO_SW_SOCKET_ID_ARG "socket_id" 67 68 static const char * const turbo_sw_valid_params[] = { 69 TURBO_SW_MAX_NB_QUEUES_ARG, 70 TURBO_SW_SOCKET_ID_ARG 71 }; 72 73 /* queue */ 74 struct turbo_sw_queue { 75 /* Ring for processed (encoded/decoded) operations which are ready to 76 * be dequeued. 77 */ 78 struct rte_ring *processed_pkts; 79 /* Stores input for turbo encoder (used when CRC attachment is 80 * performed 81 */ 82 uint8_t *enc_in; 83 /* Stores output from turbo encoder */ 84 uint8_t *enc_out; 85 /* Alpha gamma buf for bblib_turbo_decoder() function */ 86 int8_t *ag; 87 /* Temp buf for bblib_turbo_decoder() function */ 88 uint16_t *code_block; 89 /* Input buf for bblib_rate_dematching_lte() function */ 90 uint8_t *deint_input; 91 /* Output buf for bblib_rate_dematching_lte() function */ 92 uint8_t *deint_output; 93 /* Output buf for bblib_turbodec_adapter_lte() function */ 94 uint8_t *adapter_output; 95 /* Operation type of this queue */ 96 enum rte_bbdev_op_type type; 97 } __rte_cache_aligned; 98 99 100 #ifdef RTE_BBDEV_SDK_AVX2 101 static inline char * 102 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 103 { 104 if (unlikely(len > rte_pktmbuf_tailroom(m))) 105 return NULL; 106 107 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 108 m->data_len = (uint16_t)(m->data_len + len); 109 m_head->pkt_len = (m_head->pkt_len + len); 110 return tail; 111 } 112 113 /* Calculate index based on Table 5.1.3-3 from TS34.212 */ 114 static inline int32_t 115 compute_idx(uint16_t k) 116 { 117 int32_t result = 0; 118 119 if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) 120 return -1; 121 122 if (k > 2048) { 123 if ((k - 2048) % 64 != 0) 124 result = -1; 125 126 result = 124 + (k - 2048) / 64; 127 } else if (k <= 512) { 128 if ((k - 40) % 8 != 0) 129 result = -1; 130 131 result = (k - 40) / 8 + 1; 132 } else if (k <= 1024) { 133 if ((k - 512) % 16 != 0) 134 result = -1; 135 136 result = 60 + (k - 512) / 16; 137 } else { /* 1024 < k <= 2048 */ 138 if ((k - 1024) % 32 != 0) 139 result = -1; 140 141 result = 92 + (k - 1024) / 32; 142 } 143 144 return result; 145 } 146 #endif 147 148 /* Read flag value 0/1 from bitmap */ 149 static inline bool 150 check_bit(uint32_t bitmap, uint32_t bitmask) 151 { 152 return bitmap & bitmask; 153 } 154 155 /* Get device info */ 156 static void 157 info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 158 { 159 struct bbdev_private *internals = dev->data->dev_private; 160 161 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 162 #ifdef RTE_BBDEV_SDK_AVX2 163 { 164 .type = RTE_BBDEV_OP_TURBO_DEC, 165 .cap.turbo_dec = { 166 .capability_flags = 167 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 168 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | 169 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 170 RTE_BBDEV_TURBO_CRC_TYPE_24B | 171 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 172 RTE_BBDEV_TURBO_EARLY_TERMINATION, 173 .max_llr_modulus = 16, 174 .num_buffers_src = 175 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 176 .num_buffers_hard_out = 177 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 178 .num_buffers_soft_out = 0, 179 } 180 }, 181 { 182 .type = RTE_BBDEV_OP_TURBO_ENC, 183 .cap.turbo_enc = { 184 .capability_flags = 185 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 186 RTE_BBDEV_TURBO_CRC_24A_ATTACH | 187 RTE_BBDEV_TURBO_RATE_MATCH | 188 RTE_BBDEV_TURBO_RV_INDEX_BYPASS, 189 .num_buffers_src = 190 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 191 .num_buffers_dst = 192 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 193 } 194 }, 195 #endif 196 #ifdef RTE_BBDEV_SDK_AVX512 197 { 198 .type = RTE_BBDEV_OP_LDPC_ENC, 199 .cap.ldpc_enc = { 200 .capability_flags = 201 RTE_BBDEV_LDPC_RATE_MATCH | 202 RTE_BBDEV_LDPC_CRC_24A_ATTACH | 203 RTE_BBDEV_LDPC_CRC_24B_ATTACH, 204 .num_buffers_src = 205 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 206 .num_buffers_dst = 207 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 208 } 209 }, 210 { 211 .type = RTE_BBDEV_OP_LDPC_DEC, 212 .cap.ldpc_dec = { 213 .capability_flags = 214 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 215 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 216 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 217 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 218 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 219 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, 220 .llr_size = 8, 221 .llr_decimals = 4, 222 .num_buffers_src = 223 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 224 .num_buffers_hard_out = 225 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 226 .num_buffers_soft_out = 0, 227 } 228 }, 229 #endif 230 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 231 }; 232 233 static struct rte_bbdev_queue_conf default_queue_conf = { 234 .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, 235 }; 236 #ifdef RTE_BBDEV_SDK_AVX2 237 static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; 238 dev_info->cpu_flag_reqs = &cpu_flag; 239 #else 240 dev_info->cpu_flag_reqs = NULL; 241 #endif 242 default_queue_conf.socket = dev->data->socket_id; 243 244 dev_info->driver_name = RTE_STR(DRIVER_NAME); 245 dev_info->max_num_queues = internals->max_nb_queues; 246 dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; 247 dev_info->hardware_accelerated = false; 248 dev_info->max_dl_queue_priority = 0; 249 dev_info->max_ul_queue_priority = 0; 250 dev_info->default_queue_conf = default_queue_conf; 251 dev_info->capabilities = bbdev_capabilities; 252 dev_info->min_alignment = 64; 253 dev_info->harq_buffer_size = 0; 254 255 rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); 256 } 257 258 /* Release queue */ 259 static int 260 q_release(struct rte_bbdev *dev, uint16_t q_id) 261 { 262 struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; 263 264 if (q != NULL) { 265 rte_ring_free(q->processed_pkts); 266 rte_free(q->enc_out); 267 rte_free(q->enc_in); 268 rte_free(q->ag); 269 rte_free(q->code_block); 270 rte_free(q->deint_input); 271 rte_free(q->deint_output); 272 rte_free(q->adapter_output); 273 rte_free(q); 274 dev->data->queues[q_id].queue_private = NULL; 275 } 276 277 rte_bbdev_log_debug("released device queue %u:%u", 278 dev->data->dev_id, q_id); 279 return 0; 280 } 281 282 /* Setup a queue */ 283 static int 284 q_setup(struct rte_bbdev *dev, uint16_t q_id, 285 const struct rte_bbdev_queue_conf *queue_conf) 286 { 287 int ret; 288 struct turbo_sw_queue *q; 289 char name[RTE_RING_NAMESIZE]; 290 291 /* Allocate the queue data structure. */ 292 q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), 293 RTE_CACHE_LINE_SIZE, queue_conf->socket); 294 if (q == NULL) { 295 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 296 return -ENOMEM; 297 } 298 299 /* Allocate memory for encoder output. */ 300 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", 301 dev->data->dev_id, q_id); 302 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 303 rte_bbdev_log(ERR, 304 "Creating queue name for device %u queue %u failed", 305 dev->data->dev_id, q_id); 306 return -ENAMETOOLONG; 307 } 308 q->enc_out = rte_zmalloc_socket(name, 309 ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * 310 sizeof(*q->enc_out) * 3, 311 RTE_CACHE_LINE_SIZE, queue_conf->socket); 312 if (q->enc_out == NULL) { 313 rte_bbdev_log(ERR, 314 "Failed to allocate queue memory for %s", name); 315 goto free_q; 316 } 317 318 /* Allocate memory for rate matching output. */ 319 ret = snprintf(name, RTE_RING_NAMESIZE, 320 RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, 321 q_id); 322 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 323 rte_bbdev_log(ERR, 324 "Creating queue name for device %u queue %u failed", 325 dev->data->dev_id, q_id); 326 return -ENAMETOOLONG; 327 } 328 q->enc_in = rte_zmalloc_socket(name, 329 (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), 330 RTE_CACHE_LINE_SIZE, queue_conf->socket); 331 if (q->enc_in == NULL) { 332 rte_bbdev_log(ERR, 333 "Failed to allocate queue memory for %s", name); 334 goto free_q; 335 } 336 337 /* Allocate memory for Alpha Gamma temp buffer. */ 338 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", 339 dev->data->dev_id, q_id); 340 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 341 rte_bbdev_log(ERR, 342 "Creating queue name for device %u queue %u failed", 343 dev->data->dev_id, q_id); 344 return -ENAMETOOLONG; 345 } 346 q->ag = rte_zmalloc_socket(name, 347 RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), 348 RTE_CACHE_LINE_SIZE, queue_conf->socket); 349 if (q->ag == NULL) { 350 rte_bbdev_log(ERR, 351 "Failed to allocate queue memory for %s", name); 352 goto free_q; 353 } 354 355 /* Allocate memory for code block temp buffer. */ 356 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", 357 dev->data->dev_id, q_id); 358 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 359 rte_bbdev_log(ERR, 360 "Creating queue name for device %u queue %u failed", 361 dev->data->dev_id, q_id); 362 return -ENAMETOOLONG; 363 } 364 q->code_block = rte_zmalloc_socket(name, 365 RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), 366 RTE_CACHE_LINE_SIZE, queue_conf->socket); 367 if (q->code_block == NULL) { 368 rte_bbdev_log(ERR, 369 "Failed to allocate queue memory for %s", name); 370 goto free_q; 371 } 372 373 /* Allocate memory for Deinterleaver input. */ 374 ret = snprintf(name, RTE_RING_NAMESIZE, 375 RTE_STR(DRIVER_NAME)"_de_i%u:%u", 376 dev->data->dev_id, q_id); 377 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 378 rte_bbdev_log(ERR, 379 "Creating queue name for device %u queue %u failed", 380 dev->data->dev_id, q_id); 381 return -ENAMETOOLONG; 382 } 383 q->deint_input = rte_zmalloc_socket(name, 384 DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), 385 RTE_CACHE_LINE_SIZE, queue_conf->socket); 386 if (q->deint_input == NULL) { 387 rte_bbdev_log(ERR, 388 "Failed to allocate queue memory for %s", name); 389 goto free_q; 390 } 391 392 /* Allocate memory for Deinterleaver output. */ 393 ret = snprintf(name, RTE_RING_NAMESIZE, 394 RTE_STR(DRIVER_NAME)"_de_o%u:%u", 395 dev->data->dev_id, q_id); 396 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 397 rte_bbdev_log(ERR, 398 "Creating queue name for device %u queue %u failed", 399 dev->data->dev_id, q_id); 400 return -ENAMETOOLONG; 401 } 402 q->deint_output = rte_zmalloc_socket(NULL, 403 DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), 404 RTE_CACHE_LINE_SIZE, queue_conf->socket); 405 if (q->deint_output == NULL) { 406 rte_bbdev_log(ERR, 407 "Failed to allocate queue memory for %s", name); 408 goto free_q; 409 } 410 411 /* Allocate memory for Adapter output. */ 412 ret = snprintf(name, RTE_RING_NAMESIZE, 413 RTE_STR(DRIVER_NAME)"_ada_o%u:%u", 414 dev->data->dev_id, q_id); 415 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 416 rte_bbdev_log(ERR, 417 "Creating queue name for device %u queue %u failed", 418 dev->data->dev_id, q_id); 419 return -ENAMETOOLONG; 420 } 421 q->adapter_output = rte_zmalloc_socket(NULL, 422 ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), 423 RTE_CACHE_LINE_SIZE, queue_conf->socket); 424 if (q->adapter_output == NULL) { 425 rte_bbdev_log(ERR, 426 "Failed to allocate queue memory for %s", name); 427 goto free_q; 428 } 429 430 /* Create ring for packets awaiting to be dequeued. */ 431 ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", 432 dev->data->dev_id, q_id); 433 if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { 434 rte_bbdev_log(ERR, 435 "Creating queue name for device %u queue %u failed", 436 dev->data->dev_id, q_id); 437 return -ENAMETOOLONG; 438 } 439 q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, 440 queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 441 if (q->processed_pkts == NULL) { 442 rte_bbdev_log(ERR, "Failed to create ring for %s", name); 443 goto free_q; 444 } 445 446 q->type = queue_conf->op_type; 447 448 dev->data->queues[q_id].queue_private = q; 449 rte_bbdev_log_debug("setup device queue %s", name); 450 return 0; 451 452 free_q: 453 rte_ring_free(q->processed_pkts); 454 rte_free(q->enc_out); 455 rte_free(q->enc_in); 456 rte_free(q->ag); 457 rte_free(q->code_block); 458 rte_free(q->deint_input); 459 rte_free(q->deint_output); 460 rte_free(q->adapter_output); 461 rte_free(q); 462 return -EFAULT; 463 } 464 465 static const struct rte_bbdev_ops pmd_ops = { 466 .info_get = info_get, 467 .queue_setup = q_setup, 468 .queue_release = q_release 469 }; 470 471 #ifdef RTE_BBDEV_SDK_AVX2 472 #ifdef RTE_LIBRTE_BBDEV_DEBUG 473 /* Checks if the encoder input buffer is correct. 474 * Returns 0 if it's valid, -1 otherwise. 475 */ 476 static inline int 477 is_enc_input_valid(const uint16_t k, const int32_t k_idx, 478 const uint16_t in_length) 479 { 480 if (k_idx < 0) { 481 rte_bbdev_log(ERR, "K Index is invalid"); 482 return -1; 483 } 484 485 if (in_length - (k >> 3) < 0) { 486 rte_bbdev_log(ERR, 487 "Mismatch between input length (%u bytes) and K (%u bits)", 488 in_length, k); 489 return -1; 490 } 491 492 if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 493 rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", 494 k, RTE_BBDEV_TURBO_MAX_CB_SIZE); 495 return -1; 496 } 497 498 return 0; 499 } 500 501 /* Checks if the decoder input buffer is correct. 502 * Returns 0 if it's valid, -1 otherwise. 503 */ 504 static inline int 505 is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) 506 { 507 if (k_idx < 0) { 508 rte_bbdev_log(ERR, "K index is invalid"); 509 return -1; 510 } 511 512 if (in_length < kw) { 513 rte_bbdev_log(ERR, 514 "Mismatch between input length (%u) and kw (%u)", 515 in_length, kw); 516 return -1; 517 } 518 519 if (kw > RTE_BBDEV_TURBO_MAX_KW) { 520 rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", 521 kw, RTE_BBDEV_TURBO_MAX_KW); 522 return -1; 523 } 524 525 return 0; 526 } 527 #endif 528 #endif 529 530 static inline void 531 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 532 uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, 533 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 534 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 535 uint16_t in_length, struct rte_bbdev_stats *q_stats) 536 { 537 #ifdef RTE_BBDEV_SDK_AVX2 538 #ifdef RTE_LIBRTE_BBDEV_DEBUG 539 int ret; 540 #else 541 RTE_SET_USED(in_length); 542 #endif 543 int16_t k_idx; 544 uint16_t m; 545 uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; 546 uint64_t first_3_bytes = 0; 547 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 548 struct bblib_crc_request crc_req; 549 struct bblib_crc_response crc_resp; 550 struct bblib_turbo_encoder_request turbo_req; 551 struct bblib_turbo_encoder_response turbo_resp; 552 struct bblib_rate_match_dl_request rm_req; 553 struct bblib_rate_match_dl_response rm_resp; 554 #ifdef RTE_BBDEV_OFFLOAD_COST 555 uint64_t start_time; 556 #else 557 RTE_SET_USED(q_stats); 558 #endif 559 560 k_idx = compute_idx(k); 561 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 562 563 /* CRC24A (for TB) */ 564 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && 565 (enc->code_block_mode == 1)) { 566 #ifdef RTE_LIBRTE_BBDEV_DEBUG 567 ret = is_enc_input_valid(k - 24, k_idx, in_length); 568 if (ret != 0) { 569 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 570 return; 571 } 572 #endif 573 574 crc_req.data = in; 575 crc_req.len = k - 24; 576 /* Check if there is a room for CRC bits if not use 577 * the temporary buffer. 578 */ 579 if (mbuf_append(m_in, m_in, 3) == NULL) { 580 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 581 in = q->enc_in; 582 } else { 583 /* Store 3 first bytes of next CB as they will be 584 * overwritten by CRC bytes. If it is the last CB then 585 * there is no point to store 3 next bytes and this 586 * if..else branch will be omitted. 587 */ 588 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 589 } 590 591 crc_resp.data = in; 592 #ifdef RTE_BBDEV_OFFLOAD_COST 593 start_time = rte_rdtsc_precise(); 594 #endif 595 /* CRC24A generation */ 596 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 597 #ifdef RTE_BBDEV_OFFLOAD_COST 598 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 599 #endif 600 } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { 601 /* CRC24B */ 602 #ifdef RTE_LIBRTE_BBDEV_DEBUG 603 ret = is_enc_input_valid(k - 24, k_idx, in_length); 604 if (ret != 0) { 605 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 606 return; 607 } 608 #endif 609 610 crc_req.data = in; 611 crc_req.len = k - 24; 612 /* Check if there is a room for CRC bits if this is the last 613 * CB in TB. If not use temporary buffer. 614 */ 615 if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { 616 rte_memcpy(q->enc_in, in, (k - 24) >> 3); 617 in = q->enc_in; 618 } else if (c - r > 1) { 619 /* Store 3 first bytes of next CB as they will be 620 * overwritten by CRC bytes. If it is the last CB then 621 * there is no point to store 3 next bytes and this 622 * if..else branch will be omitted. 623 */ 624 first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); 625 } 626 627 crc_resp.data = in; 628 #ifdef RTE_BBDEV_OFFLOAD_COST 629 start_time = rte_rdtsc_precise(); 630 #endif 631 /* CRC24B generation */ 632 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 633 #ifdef RTE_BBDEV_OFFLOAD_COST 634 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 635 #endif 636 } 637 #ifdef RTE_LIBRTE_BBDEV_DEBUG 638 else { 639 ret = is_enc_input_valid(k, k_idx, in_length); 640 if (ret != 0) { 641 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 642 return; 643 } 644 } 645 #endif 646 647 /* Turbo encoder */ 648 649 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. 650 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. 651 * So dst_data's length should be 3*(k/8) + 3 bytes. 652 * In Rate-matching bypass case outputs pointers passed to encoder 653 * (out0, out1 and out2) can directly point to addresses of output from 654 * turbo_enc entity. 655 */ 656 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 657 out0 = q->enc_out; 658 out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); 659 out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); 660 } else { 661 out0 = (uint8_t *)mbuf_append(m_out_head, m_out, 662 (k >> 3) * 3 + 2); 663 if (out0 == NULL) { 664 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 665 rte_bbdev_log(ERR, 666 "Too little space in output mbuf"); 667 return; 668 } 669 enc->output.length += (k >> 3) * 3 + 2; 670 /* rte_bbdev_op_data.offset can be different than the 671 * offset of the appended bytes 672 */ 673 out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 674 out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 675 out_offset + (k >> 3) + 1); 676 out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, 677 out_offset + 2 * ((k >> 3) + 1)); 678 } 679 680 turbo_req.case_id = k_idx; 681 turbo_req.input_win = in; 682 turbo_req.length = k >> 3; 683 turbo_resp.output_win_0 = out0; 684 turbo_resp.output_win_1 = out1; 685 turbo_resp.output_win_2 = out2; 686 687 #ifdef RTE_BBDEV_OFFLOAD_COST 688 start_time = rte_rdtsc_precise(); 689 #endif 690 /* Turbo encoding */ 691 if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { 692 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 693 rte_bbdev_log(ERR, "Turbo Encoder failed"); 694 return; 695 } 696 #ifdef RTE_BBDEV_OFFLOAD_COST 697 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 698 #endif 699 700 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ 701 if (first_3_bytes != 0) 702 *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; 703 704 /* Rate-matching */ 705 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { 706 uint8_t mask_id; 707 /* Integer round up division by 8 */ 708 uint16_t out_len = (e + 7) >> 3; 709 /* The mask array is indexed using E%8. E is an even number so 710 * there are only 4 possible values. 711 */ 712 const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; 713 714 /* get output data starting address */ 715 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 716 if (rm_out == NULL) { 717 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 718 rte_bbdev_log(ERR, 719 "Too little space in output mbuf"); 720 return; 721 } 722 /* rte_bbdev_op_data.offset can be different than the offset 723 * of the appended bytes 724 */ 725 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 726 727 /* index of current code block */ 728 rm_req.r = r; 729 /* total number of code block */ 730 rm_req.C = c; 731 /* For DL - 1, UL - 0 */ 732 rm_req.direction = 1; 733 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO 734 * and MDL_HARQ are used for Ncb calculation. As Ncb is already 735 * known we can adjust those parameters 736 */ 737 rm_req.Nsoft = ncb * rm_req.C; 738 rm_req.KMIMO = 1; 739 rm_req.MDL_HARQ = 1; 740 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G 741 * are used for E calculation. As E is already known we can 742 * adjust those parameters 743 */ 744 rm_req.NL = e; 745 rm_req.Qm = 1; 746 rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; 747 748 rm_req.rvidx = enc->rv_index; 749 rm_req.Kidx = k_idx - 1; 750 rm_req.nLen = k + 4; 751 rm_req.tin0 = out0; 752 rm_req.tin1 = out1; 753 rm_req.tin2 = out2; 754 rm_resp.output = rm_out; 755 rm_resp.OutputLen = out_len; 756 if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) 757 rm_req.bypass_rvidx = 1; 758 else 759 rm_req.bypass_rvidx = 0; 760 761 #ifdef RTE_BBDEV_OFFLOAD_COST 762 start_time = rte_rdtsc_precise(); 763 #endif 764 /* Rate-Matching */ 765 if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { 766 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 767 rte_bbdev_log(ERR, "Rate matching failed"); 768 return; 769 } 770 #ifdef RTE_BBDEV_OFFLOAD_COST 771 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 772 #endif 773 774 /* SW fills an entire last byte even if E%8 != 0. Clear the 775 * superfluous data bits for consistency with HW device. 776 */ 777 mask_id = (e & 7) >> 1; 778 rm_out[out_len - 1] &= mask_out[mask_id]; 779 enc->output.length += rm_resp.OutputLen; 780 } else { 781 /* Rate matching is bypassed */ 782 783 /* Completing last byte of out0 (where 4 tail bits are stored) 784 * by moving first 4 bits from out1 785 */ 786 tmp_out = (uint8_t *) --out1; 787 *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); 788 tmp_out++; 789 /* Shifting out1 data by 4 bits to the left */ 790 for (m = 0; m < k >> 3; ++m) { 791 uint8_t *first = tmp_out; 792 uint8_t second = *(tmp_out + 1); 793 *first = (*first << 4) | ((second & 0xF0) >> 4); 794 tmp_out++; 795 } 796 /* Shifting out2 data by 8 bits to the left */ 797 for (m = 0; m < (k >> 3) + 1; ++m) { 798 *tmp_out = *(tmp_out + 1); 799 tmp_out++; 800 } 801 *tmp_out = 0; 802 } 803 #else 804 RTE_SET_USED(q); 805 RTE_SET_USED(op); 806 RTE_SET_USED(r); 807 RTE_SET_USED(c); 808 RTE_SET_USED(k); 809 RTE_SET_USED(ncb); 810 RTE_SET_USED(e); 811 RTE_SET_USED(m_in); 812 RTE_SET_USED(m_out_head); 813 RTE_SET_USED(m_out); 814 RTE_SET_USED(in_offset); 815 RTE_SET_USED(out_offset); 816 RTE_SET_USED(in_length); 817 RTE_SET_USED(q_stats); 818 #endif 819 } 820 821 822 static inline void 823 process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 824 uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, 825 struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, 826 uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) 827 { 828 #ifdef RTE_BBDEV_SDK_AVX512 829 RTE_SET_USED(seg_total_left); 830 uint8_t *in, *rm_out; 831 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 832 struct bblib_ldpc_encoder_5gnr_request ldpc_req; 833 struct bblib_ldpc_encoder_5gnr_response ldpc_resp; 834 struct bblib_LDPC_ratematch_5gnr_request rm_req; 835 struct bblib_LDPC_ratematch_5gnr_response rm_resp; 836 struct bblib_crc_request crc_req; 837 struct bblib_crc_response crc_resp; 838 uint16_t msgLen, puntBits, parity_offset, out_len; 839 uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 840 uint16_t in_length_in_bits = K - enc->n_filler; 841 uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; 842 843 #ifdef RTE_BBDEV_OFFLOAD_COST 844 uint64_t start_time = rte_rdtsc_precise(); 845 #else 846 RTE_SET_USED(q_stats); 847 #endif 848 849 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 850 851 /* Masking the Filler bits explicitly */ 852 memset(q->enc_in + (in_length_in_bytes - 3), 0, 853 ((K + 7) >> 3) - (in_length_in_bytes - 3)); 854 /* CRC Generation */ 855 if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { 856 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 857 crc_req.data = in; 858 crc_req.len = in_length_in_bits - 24; 859 crc_resp.data = q->enc_in; 860 bblib_lte_crc24a_gen(&crc_req, &crc_resp); 861 } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { 862 rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); 863 crc_req.data = in; 864 crc_req.len = in_length_in_bits - 24; 865 crc_resp.data = q->enc_in; 866 bblib_lte_crc24b_gen(&crc_req, &crc_resp); 867 } else 868 rte_memcpy(q->enc_in, in, in_length_in_bytes); 869 870 /* LDPC Encoding */ 871 ldpc_req.Zc = enc->z_c; 872 ldpc_req.baseGraph = enc->basegraph; 873 /* Number of rows set to maximum */ 874 ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; 875 ldpc_req.numberCodeblocks = 1; 876 ldpc_req.input[0] = (int8_t *) q->enc_in; 877 ldpc_resp.output[0] = (int8_t *) q->enc_out; 878 879 bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); 880 881 if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { 882 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 883 rte_bbdev_log(ERR, "LDPC Encoder failed"); 884 return; 885 } 886 887 /* 888 * Systematic + Parity : Recreating stream with filler bits, ideally 889 * the bit select could handle this in the RM SDK 890 */ 891 msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; 892 puntBits = 2 * ldpc_req.Zc; 893 parity_offset = msgLen - puntBits; 894 ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), 895 puntBits%8, q->adapter_output, 0, parity_offset); 896 ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), 897 parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); 898 899 out_len = (e + 7) >> 3; 900 /* get output data starting address */ 901 rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); 902 if (rm_out == NULL) { 903 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 904 rte_bbdev_log(ERR, 905 "Too little space in output mbuf"); 906 return; 907 } 908 /* 909 * rte_bbdev_op_data.offset can be different than the offset 910 * of the appended bytes 911 */ 912 rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 913 914 /* Rate-Matching */ 915 rm_req.E = e; 916 rm_req.Ncb = enc->n_cb; 917 rm_req.Qm = enc->q_m; 918 rm_req.Zc = enc->z_c; 919 rm_req.baseGraph = enc->basegraph; 920 rm_req.input = q->adapter_output; 921 rm_req.nLen = enc->n_filler; 922 rm_req.nullIndex = parity_offset - enc->n_filler; 923 rm_req.rvidx = enc->rv_index; 924 rm_resp.output = q->deint_output; 925 926 if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { 927 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 928 rte_bbdev_log(ERR, "Rate matching failed"); 929 return; 930 } 931 932 /* RM SDK may provide non zero bits on last byte */ 933 if ((e % 8) != 0) 934 q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; 935 936 bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); 937 938 rte_memcpy(rm_out, q->deint_output, out_len); 939 enc->output.length += out_len; 940 941 #ifdef RTE_BBDEV_OFFLOAD_COST 942 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 943 #endif 944 #else 945 RTE_SET_USED(q); 946 RTE_SET_USED(op); 947 RTE_SET_USED(e); 948 RTE_SET_USED(m_in); 949 RTE_SET_USED(m_out_head); 950 RTE_SET_USED(m_out); 951 RTE_SET_USED(in_offset); 952 RTE_SET_USED(out_offset); 953 RTE_SET_USED(seg_total_left); 954 RTE_SET_USED(q_stats); 955 #endif 956 } 957 958 static inline void 959 enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 960 struct rte_bbdev_stats *queue_stats) 961 { 962 uint8_t c, r, crc24_bits = 0; 963 uint16_t k, ncb; 964 uint32_t e; 965 struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; 966 uint16_t in_offset = enc->input.offset; 967 uint16_t out_offset = enc->output.offset; 968 struct rte_mbuf *m_in = enc->input.data; 969 struct rte_mbuf *m_out = enc->output.data; 970 struct rte_mbuf *m_out_head = enc->output.data; 971 uint32_t in_length, mbuf_total_left = enc->input.length; 972 uint16_t seg_total_left; 973 974 /* Clear op status */ 975 op->status = 0; 976 977 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 978 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 979 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 980 op->status = 1 << RTE_BBDEV_DATA_ERROR; 981 return; 982 } 983 984 if (m_in == NULL || m_out == NULL) { 985 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 986 op->status = 1 << RTE_BBDEV_DATA_ERROR; 987 return; 988 } 989 990 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 991 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 992 crc24_bits = 24; 993 994 if (enc->code_block_mode == 0) { /* For Transport Block mode */ 995 c = enc->tb_params.c; 996 r = enc->tb_params.r; 997 } else {/* For Code Block mode */ 998 c = 1; 999 r = 0; 1000 } 1001 1002 while (mbuf_total_left > 0 && r < c) { 1003 1004 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1005 1006 if (enc->code_block_mode == 0) { 1007 k = (r < enc->tb_params.c_neg) ? 1008 enc->tb_params.k_neg : enc->tb_params.k_pos; 1009 ncb = (r < enc->tb_params.c_neg) ? 1010 enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; 1011 e = (r < enc->tb_params.cab) ? 1012 enc->tb_params.ea : enc->tb_params.eb; 1013 } else { 1014 k = enc->cb_params.k; 1015 ncb = enc->cb_params.ncb; 1016 e = enc->cb_params.e; 1017 } 1018 1019 process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, 1020 m_out, in_offset, out_offset, seg_total_left, 1021 queue_stats); 1022 /* Update total_left */ 1023 in_length = ((k - crc24_bits) >> 3); 1024 mbuf_total_left -= in_length; 1025 /* Update offsets for next CBs (if exist) */ 1026 in_offset += (k - crc24_bits) >> 3; 1027 if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) 1028 out_offset += e >> 3; 1029 else 1030 out_offset += (k >> 3) * 3 + 2; 1031 1032 /* Update offsets */ 1033 if (seg_total_left == in_length) { 1034 /* Go to the next mbuf */ 1035 m_in = m_in->next; 1036 m_out = m_out->next; 1037 in_offset = 0; 1038 out_offset = 0; 1039 } 1040 r++; 1041 } 1042 1043 /* check if all input data was processed */ 1044 if (mbuf_total_left != 0) { 1045 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1046 rte_bbdev_log(ERR, 1047 "Mismatch between mbuf length and included CBs sizes"); 1048 } 1049 } 1050 1051 1052 static inline void 1053 enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, 1054 struct rte_bbdev_stats *queue_stats) 1055 { 1056 uint8_t c, r, crc24_bits = 0; 1057 uint32_t e; 1058 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 1059 uint16_t in_offset = enc->input.offset; 1060 uint16_t out_offset = enc->output.offset; 1061 struct rte_mbuf *m_in = enc->input.data; 1062 struct rte_mbuf *m_out = enc->output.data; 1063 struct rte_mbuf *m_out_head = enc->output.data; 1064 uint32_t in_length, mbuf_total_left = enc->input.length; 1065 1066 uint16_t seg_total_left; 1067 1068 /* Clear op status */ 1069 op->status = 0; 1070 1071 if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1072 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1073 mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); 1074 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1075 return; 1076 } 1077 1078 if (m_in == NULL || m_out == NULL) { 1079 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1080 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1081 return; 1082 } 1083 1084 if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || 1085 (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) 1086 crc24_bits = 24; 1087 1088 if (enc->code_block_mode == 0) { /* For Transport Block mode */ 1089 c = enc->tb_params.c; 1090 r = enc->tb_params.r; 1091 } else { /* For Code Block mode */ 1092 c = 1; 1093 r = 0; 1094 } 1095 1096 while (mbuf_total_left > 0 && r < c) { 1097 1098 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1099 1100 if (enc->code_block_mode == 0) { 1101 e = (r < enc->tb_params.cab) ? 1102 enc->tb_params.ea : enc->tb_params.eb; 1103 } else { 1104 e = enc->cb_params.e; 1105 } 1106 1107 process_ldpc_enc_cb(q, op, e, m_in, m_out_head, 1108 m_out, in_offset, out_offset, seg_total_left, 1109 queue_stats); 1110 /* Update total_left */ 1111 in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 1112 in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); 1113 mbuf_total_left -= in_length; 1114 /* Update offsets for next CBs (if exist) */ 1115 in_offset += in_length; 1116 out_offset += (e + 7) >> 3; 1117 1118 /* Update offsets */ 1119 if (seg_total_left == in_length) { 1120 /* Go to the next mbuf */ 1121 m_in = m_in->next; 1122 m_out = m_out->next; 1123 in_offset = 0; 1124 out_offset = 0; 1125 } 1126 r++; 1127 } 1128 1129 /* check if all input data was processed */ 1130 if (mbuf_total_left != 0) { 1131 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1132 rte_bbdev_log(ERR, 1133 "Mismatch between mbuf length and included CBs sizes %d", 1134 mbuf_total_left); 1135 } 1136 } 1137 1138 static inline uint16_t 1139 enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, 1140 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1141 { 1142 uint16_t i; 1143 #ifdef RTE_BBDEV_OFFLOAD_COST 1144 queue_stats->acc_offload_cycles = 0; 1145 #endif 1146 1147 for (i = 0; i < nb_ops; ++i) 1148 enqueue_enc_one_op(q, ops[i], queue_stats); 1149 1150 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1151 NULL); 1152 } 1153 1154 static inline uint16_t 1155 enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, 1156 struct rte_bbdev_enc_op **ops, 1157 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1158 { 1159 uint16_t i; 1160 #ifdef RTE_BBDEV_OFFLOAD_COST 1161 queue_stats->acc_offload_cycles = 0; 1162 #endif 1163 1164 for (i = 0; i < nb_ops; ++i) 1165 enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); 1166 1167 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1168 NULL); 1169 } 1170 1171 #ifdef RTE_BBDEV_SDK_AVX2 1172 static inline void 1173 move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, 1174 uint16_t ncb) 1175 { 1176 uint16_t d = k + 4; 1177 uint16_t kpi = ncb / 3; 1178 uint16_t nd = kpi - d; 1179 1180 rte_memcpy(&out[nd], in, d); 1181 rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); 1182 rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); 1183 } 1184 #endif 1185 1186 static inline void 1187 process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1188 uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, 1189 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1190 uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, 1191 uint16_t crc24_overlap, uint16_t in_length, 1192 struct rte_bbdev_stats *q_stats) 1193 { 1194 #ifdef RTE_BBDEV_SDK_AVX2 1195 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1196 int ret; 1197 #else 1198 RTE_SET_USED(in_length); 1199 #endif 1200 int32_t k_idx; 1201 int32_t iter_cnt; 1202 uint8_t *in, *out, *adapter_input; 1203 int32_t ncb, ncb_without_null; 1204 struct bblib_turbo_adapter_ul_response adapter_resp; 1205 struct bblib_turbo_adapter_ul_request adapter_req; 1206 struct bblib_turbo_decoder_request turbo_req; 1207 struct bblib_turbo_decoder_response turbo_resp; 1208 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1209 #ifdef RTE_BBDEV_OFFLOAD_COST 1210 uint64_t start_time; 1211 #else 1212 RTE_SET_USED(q_stats); 1213 #endif 1214 1215 k_idx = compute_idx(k); 1216 1217 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1218 ret = is_dec_input_valid(k_idx, kw, in_length); 1219 if (ret != 0) { 1220 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1221 return; 1222 } 1223 #endif 1224 1225 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1226 ncb = kw; 1227 ncb_without_null = (k + 4) * 3; 1228 1229 if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { 1230 struct bblib_deinterleave_ul_request deint_req; 1231 struct bblib_deinterleave_ul_response deint_resp; 1232 1233 deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; 1234 deint_req.pharqbuffer = in; 1235 deint_req.ncb = ncb; 1236 deint_resp.pinteleavebuffer = q->deint_output; 1237 1238 #ifdef RTE_BBDEV_OFFLOAD_COST 1239 start_time = rte_rdtsc_precise(); 1240 #endif 1241 /* Sub-block De-Interleaving */ 1242 bblib_deinterleave_ul(&deint_req, &deint_resp); 1243 #ifdef RTE_BBDEV_OFFLOAD_COST 1244 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1245 #endif 1246 } else 1247 move_padding_bytes(in, q->deint_output, k, ncb); 1248 1249 adapter_input = q->deint_output; 1250 1251 if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) 1252 adapter_req.isinverted = 1; 1253 else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) 1254 adapter_req.isinverted = 0; 1255 else { 1256 op->status |= 1 << RTE_BBDEV_DRV_ERROR; 1257 rte_bbdev_log(ERR, "LLR format wasn't specified"); 1258 return; 1259 } 1260 1261 adapter_req.ncb = ncb_without_null; 1262 adapter_req.pinteleavebuffer = adapter_input; 1263 adapter_resp.pharqout = q->adapter_output; 1264 1265 #ifdef RTE_BBDEV_OFFLOAD_COST 1266 start_time = rte_rdtsc_precise(); 1267 #endif 1268 /* Turbo decode adaptation */ 1269 bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); 1270 #ifdef RTE_BBDEV_OFFLOAD_COST 1271 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1272 #endif 1273 1274 out = (uint8_t *)mbuf_append(m_out_head, m_out, 1275 ((k - crc24_overlap) >> 3)); 1276 if (out == NULL) { 1277 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1278 rte_bbdev_log(ERR, "Too little space in output mbuf"); 1279 return; 1280 } 1281 /* rte_bbdev_op_data.offset can be different than the offset of the 1282 * appended bytes 1283 */ 1284 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1285 if (check_crc_24b) 1286 turbo_req.c = c + 1; 1287 else 1288 turbo_req.c = c; 1289 turbo_req.input = (int8_t *)q->adapter_output; 1290 turbo_req.k = k; 1291 turbo_req.k_idx = k_idx; 1292 turbo_req.max_iter_num = dec->iter_max; 1293 turbo_req.early_term_disable = !check_bit(dec->op_flags, 1294 RTE_BBDEV_TURBO_EARLY_TERMINATION); 1295 turbo_resp.ag_buf = q->ag; 1296 turbo_resp.cb_buf = q->code_block; 1297 turbo_resp.output = out; 1298 1299 #ifdef RTE_BBDEV_OFFLOAD_COST 1300 start_time = rte_rdtsc_precise(); 1301 #endif 1302 /* Turbo decode */ 1303 iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); 1304 #ifdef RTE_BBDEV_OFFLOAD_COST 1305 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1306 #endif 1307 dec->hard_output.length += (k >> 3); 1308 1309 if (iter_cnt > 0) { 1310 /* Temporary solution for returned iter_count from SDK */ 1311 iter_cnt = (iter_cnt - 1) >> 1; 1312 dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); 1313 } else { 1314 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1315 rte_bbdev_log(ERR, "Turbo Decoder failed"); 1316 return; 1317 } 1318 #else 1319 RTE_SET_USED(q); 1320 RTE_SET_USED(op); 1321 RTE_SET_USED(c); 1322 RTE_SET_USED(k); 1323 RTE_SET_USED(kw); 1324 RTE_SET_USED(m_in); 1325 RTE_SET_USED(m_out_head); 1326 RTE_SET_USED(m_out); 1327 RTE_SET_USED(in_offset); 1328 RTE_SET_USED(out_offset); 1329 RTE_SET_USED(check_crc_24b); 1330 RTE_SET_USED(crc24_overlap); 1331 RTE_SET_USED(in_length); 1332 RTE_SET_USED(q_stats); 1333 #endif 1334 } 1335 1336 static inline void 1337 process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1338 uint8_t c, uint16_t out_length, uint32_t e, 1339 struct rte_mbuf *m_in, 1340 struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, 1341 struct rte_mbuf *m_harq_in, 1342 struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, 1343 uint16_t in_offset, uint16_t out_offset, 1344 uint16_t harq_in_offset, uint16_t harq_out_offset, 1345 bool check_crc_24b, 1346 uint16_t crc24_overlap, uint16_t in_length, 1347 struct rte_bbdev_stats *q_stats) 1348 { 1349 #ifdef RTE_BBDEV_SDK_AVX512 1350 RTE_SET_USED(in_length); 1351 RTE_SET_USED(c); 1352 uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; 1353 struct bblib_rate_dematching_5gnr_request derm_req; 1354 struct bblib_rate_dematching_5gnr_response derm_resp; 1355 struct bblib_ldpc_decoder_5gnr_request dec_req; 1356 struct bblib_ldpc_decoder_5gnr_response dec_resp; 1357 struct bblib_crc_request crc_req; 1358 struct bblib_crc_response crc_resp; 1359 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1360 uint16_t K, parity_offset, sys_cols, outLenWithCrc; 1361 int16_t deRmOutSize, numRows; 1362 1363 /* Compute some LDPC BG lengths */ 1364 outLenWithCrc = out_length + (crc24_overlap >> 3); 1365 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1366 K = sys_cols * dec->z_c; 1367 parity_offset = K - 2 * dec->z_c; 1368 1369 #ifdef RTE_BBDEV_OFFLOAD_COST 1370 uint64_t start_time = rte_rdtsc_precise(); 1371 #else 1372 RTE_SET_USED(q_stats); 1373 #endif 1374 1375 in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); 1376 1377 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1378 /** 1379 * Single contiguous block from the first LLR of the 1380 * circular buffer. 1381 */ 1382 harq_in = NULL; 1383 if (m_harq_in != NULL) 1384 harq_in = rte_pktmbuf_mtod_offset(m_harq_in, 1385 uint8_t *, harq_in_offset); 1386 if (harq_in == NULL) { 1387 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1388 rte_bbdev_log(ERR, "No space in harq input mbuf"); 1389 return; 1390 } 1391 uint16_t harq_in_length = RTE_MIN( 1392 dec->harq_combined_input.length, 1393 (uint32_t) dec->n_cb); 1394 memset(q->ag + harq_in_length, 0, 1395 dec->n_cb - harq_in_length); 1396 rte_memcpy(q->ag, harq_in, harq_in_length); 1397 } 1398 1399 derm_req.p_in = (int8_t *) in; 1400 derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ 1401 derm_req.base_graph = dec->basegraph; 1402 derm_req.zc = dec->z_c; 1403 derm_req.ncb = dec->n_cb; 1404 derm_req.e = e; 1405 derm_req.k0 = 0; /* Actual output from SDK */ 1406 derm_req.isretx = check_bit(dec->op_flags, 1407 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1408 derm_req.rvid = dec->rv_index; 1409 derm_req.modulation_order = dec->q_m; 1410 derm_req.start_null_index = parity_offset - dec->n_filler; 1411 derm_req.num_of_null = dec->n_filler; 1412 1413 bblib_rate_dematching_5gnr(&derm_req, &derm_resp); 1414 1415 /* Compute RM out size and number of rows */ 1416 deRmOutSize = RTE_MIN( 1417 derm_req.k0 + derm_req.e - 1418 ((derm_req.k0 < derm_req.start_null_index) ? 1419 0 : dec->n_filler), 1420 dec->n_cb - dec->n_filler); 1421 if (m_harq_in != NULL) 1422 deRmOutSize = RTE_MAX(deRmOutSize, 1423 RTE_MIN(dec->n_cb - dec->n_filler, 1424 m_harq_in->data_len)); 1425 numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) 1426 - sys_cols + 2; 1427 numRows = RTE_MAX(4, numRows); 1428 1429 /* get output data starting address */ 1430 out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); 1431 if (out == NULL) { 1432 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1433 rte_bbdev_log(ERR, 1434 "Too little space in LDPC decoder output mbuf"); 1435 return; 1436 } 1437 1438 /* rte_bbdev_op_data.offset can be different than the offset 1439 * of the appended bytes 1440 */ 1441 out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); 1442 adapter_input = q->enc_out; 1443 1444 dec_req.Zc = dec->z_c; 1445 dec_req.baseGraph = dec->basegraph; 1446 dec_req.nRows = numRows; 1447 dec_req.numChannelLlrs = deRmOutSize; 1448 dec_req.varNodes = derm_req.p_harq; 1449 dec_req.numFillerBits = dec->n_filler; 1450 dec_req.maxIterations = dec->iter_max; 1451 dec_req.enableEarlyTermination = check_bit(dec->op_flags, 1452 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1453 dec_resp.varNodes = (int16_t *) q->adapter_output; 1454 dec_resp.compactedMessageBytes = q->enc_out; 1455 1456 bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); 1457 1458 dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, 1459 dec->iter_count); 1460 if (!dec_resp.parityPassedAtTermination) 1461 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 1462 1463 bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); 1464 1465 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 1466 check_bit(dec->op_flags, 1467 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { 1468 crc_req.data = adapter_input; 1469 crc_req.len = K - dec->n_filler - 24; 1470 crc_resp.check_passed = false; 1471 crc_resp.data = adapter_input; 1472 if (check_crc_24b) 1473 bblib_lte_crc24b_check(&crc_req, &crc_resp); 1474 else 1475 bblib_lte_crc24a_check(&crc_req, &crc_resp); 1476 if (!crc_resp.check_passed) 1477 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 1478 } 1479 1480 #ifdef RTE_BBDEV_OFFLOAD_COST 1481 q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1482 #endif 1483 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1484 harq_out = NULL; 1485 if (m_harq_out != NULL) { 1486 /* Initialize HARQ data length since we overwrite */ 1487 m_harq_out->data_len = 0; 1488 /* Check there is enough space 1489 * in the HARQ outbound buffer 1490 */ 1491 harq_out = (uint8_t *)mbuf_append(m_harq_out_head, 1492 m_harq_out, deRmOutSize); 1493 } 1494 if (harq_out == NULL) { 1495 op->status |= 1 << RTE_BBDEV_DATA_ERROR; 1496 rte_bbdev_log(ERR, "No space in HARQ output mbuf"); 1497 return; 1498 } 1499 /* get output data starting address and overwrite the data */ 1500 harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, 1501 harq_out_offset); 1502 rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); 1503 dec->harq_combined_output.length += deRmOutSize; 1504 } 1505 1506 rte_memcpy(out, adapter_input, out_length); 1507 dec->hard_output.length += out_length; 1508 #else 1509 RTE_SET_USED(q); 1510 RTE_SET_USED(op); 1511 RTE_SET_USED(c); 1512 RTE_SET_USED(out_length); 1513 RTE_SET_USED(e); 1514 RTE_SET_USED(m_in); 1515 RTE_SET_USED(m_out_head); 1516 RTE_SET_USED(m_out); 1517 RTE_SET_USED(m_harq_in); 1518 RTE_SET_USED(m_harq_out_head); 1519 RTE_SET_USED(m_harq_out); 1520 RTE_SET_USED(harq_in_offset); 1521 RTE_SET_USED(harq_out_offset); 1522 RTE_SET_USED(in_offset); 1523 RTE_SET_USED(out_offset); 1524 RTE_SET_USED(check_crc_24b); 1525 RTE_SET_USED(crc24_overlap); 1526 RTE_SET_USED(in_length); 1527 RTE_SET_USED(q_stats); 1528 #endif 1529 } 1530 1531 1532 static inline void 1533 enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1534 struct rte_bbdev_stats *queue_stats) 1535 { 1536 uint8_t c, r = 0; 1537 uint16_t kw, k = 0; 1538 uint16_t crc24_overlap = 0; 1539 struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; 1540 struct rte_mbuf *m_in = dec->input.data; 1541 struct rte_mbuf *m_out = dec->hard_output.data; 1542 struct rte_mbuf *m_out_head = dec->hard_output.data; 1543 uint16_t in_offset = dec->input.offset; 1544 uint16_t out_offset = dec->hard_output.offset; 1545 uint32_t mbuf_total_left = dec->input.length; 1546 uint16_t seg_total_left; 1547 1548 /* Clear op status */ 1549 op->status = 0; 1550 1551 if (m_in == NULL || m_out == NULL) { 1552 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1553 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1554 return; 1555 } 1556 1557 if (dec->code_block_mode == 0) { /* For Transport Block mode */ 1558 c = dec->tb_params.c; 1559 } else { /* For Code Block mode */ 1560 k = dec->cb_params.k; 1561 c = 1; 1562 } 1563 1564 if ((c > 1) && !check_bit(dec->op_flags, 1565 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1566 crc24_overlap = 24; 1567 1568 while (mbuf_total_left > 0) { 1569 if (dec->code_block_mode == 0) 1570 k = (r < dec->tb_params.c_neg) ? 1571 dec->tb_params.k_neg : dec->tb_params.k_pos; 1572 1573 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1574 1575 /* Calculates circular buffer size (Kw). 1576 * According to 3gpp 36.212 section 5.1.4.2 1577 * Kw = 3 * Kpi, 1578 * where: 1579 * Kpi = nCol * nRow 1580 * where nCol is 32 and nRow can be calculated from: 1581 * D =< nCol * nRow 1582 * where D is the size of each output from turbo encoder block 1583 * (k + 4). 1584 */ 1585 kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1586 1587 process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, 1588 in_offset, out_offset, check_bit(dec->op_flags, 1589 RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, 1590 seg_total_left, queue_stats); 1591 1592 /* To keep CRC24 attached to end of Code block, use 1593 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it 1594 * removed by default once verified. 1595 */ 1596 1597 mbuf_total_left -= kw; 1598 1599 /* Update offsets */ 1600 if (seg_total_left == kw) { 1601 /* Go to the next mbuf */ 1602 m_in = m_in->next; 1603 m_out = m_out->next; 1604 in_offset = 0; 1605 out_offset = 0; 1606 } else { 1607 /* Update offsets for next CBs (if exist) */ 1608 in_offset += kw; 1609 out_offset += ((k - crc24_overlap) >> 3); 1610 } 1611 r++; 1612 } 1613 } 1614 1615 static inline void 1616 enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, 1617 struct rte_bbdev_stats *queue_stats) 1618 { 1619 uint8_t c, r = 0; 1620 uint32_t e; 1621 uint16_t out_length, crc24_overlap = 0; 1622 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1623 struct rte_mbuf *m_in = dec->input.data; 1624 struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; 1625 struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; 1626 struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; 1627 struct rte_mbuf *m_out = dec->hard_output.data; 1628 struct rte_mbuf *m_out_head = dec->hard_output.data; 1629 uint16_t in_offset = dec->input.offset; 1630 uint16_t harq_in_offset = dec->harq_combined_input.offset; 1631 uint16_t harq_out_offset = dec->harq_combined_output.offset; 1632 uint16_t out_offset = dec->hard_output.offset; 1633 uint32_t mbuf_total_left = dec->input.length; 1634 uint16_t seg_total_left; 1635 1636 /* Clear op status */ 1637 op->status = 0; 1638 1639 if (m_in == NULL || m_out == NULL) { 1640 rte_bbdev_log(ERR, "Invalid mbuf pointer"); 1641 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1642 return; 1643 } 1644 1645 if (dec->code_block_mode == 0) { /* For Transport Block mode */ 1646 c = dec->tb_params.c; 1647 e = dec->tb_params.ea; 1648 } else { /* For Code Block mode */ 1649 c = 1; 1650 e = dec->cb_params.e; 1651 } 1652 1653 if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1654 crc24_overlap = 24; 1655 1656 out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ 1657 out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); 1658 1659 while (mbuf_total_left > 0) { 1660 if (dec->code_block_mode == 0) 1661 e = (r < dec->tb_params.cab) ? 1662 dec->tb_params.ea : dec->tb_params.eb; 1663 /* Special case handling when overusing mbuf */ 1664 if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) 1665 seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; 1666 else 1667 seg_total_left = e; 1668 1669 process_ldpc_dec_cb(q, op, c, out_length, e, 1670 m_in, m_out_head, m_out, 1671 m_harq_in, m_harq_out_head, m_harq_out, 1672 in_offset, out_offset, harq_in_offset, 1673 harq_out_offset, 1674 check_bit(dec->op_flags, 1675 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), 1676 crc24_overlap, 1677 seg_total_left, queue_stats); 1678 1679 /* To keep CRC24 attached to end of Code block, use 1680 * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it 1681 * removed by default once verified. 1682 */ 1683 1684 mbuf_total_left -= e; 1685 1686 /* Update offsets */ 1687 if (seg_total_left == e) { 1688 /* Go to the next mbuf */ 1689 m_in = m_in->next; 1690 m_out = m_out->next; 1691 if (m_harq_in != NULL) 1692 m_harq_in = m_harq_in->next; 1693 if (m_harq_out != NULL) 1694 m_harq_out = m_harq_out->next; 1695 in_offset = 0; 1696 out_offset = 0; 1697 harq_in_offset = 0; 1698 harq_out_offset = 0; 1699 } else { 1700 /* Update offsets for next CBs (if exist) */ 1701 in_offset += e; 1702 out_offset += out_length; 1703 } 1704 r++; 1705 } 1706 } 1707 1708 static inline uint16_t 1709 enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, 1710 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1711 { 1712 uint16_t i; 1713 #ifdef RTE_BBDEV_OFFLOAD_COST 1714 queue_stats->acc_offload_cycles = 0; 1715 #endif 1716 1717 for (i = 0; i < nb_ops; ++i) 1718 enqueue_dec_one_op(q, ops[i], queue_stats); 1719 1720 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1721 NULL); 1722 } 1723 1724 static inline uint16_t 1725 enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, 1726 struct rte_bbdev_dec_op **ops, 1727 uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) 1728 { 1729 uint16_t i; 1730 #ifdef RTE_BBDEV_OFFLOAD_COST 1731 queue_stats->acc_offload_cycles = 0; 1732 #endif 1733 1734 for (i = 0; i < nb_ops; ++i) 1735 enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); 1736 1737 return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, 1738 NULL); 1739 } 1740 1741 /* Enqueue burst */ 1742 static uint16_t 1743 enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, 1744 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1745 { 1746 void *queue = q_data->queue_private; 1747 struct turbo_sw_queue *q = queue; 1748 uint16_t nb_enqueued = 0; 1749 1750 nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1751 1752 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1753 q_data->queue_stats.enqueued_count += nb_enqueued; 1754 1755 return nb_enqueued; 1756 } 1757 1758 /* Enqueue burst */ 1759 static uint16_t 1760 enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, 1761 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1762 { 1763 void *queue = q_data->queue_private; 1764 struct turbo_sw_queue *q = queue; 1765 uint16_t nb_enqueued = 0; 1766 1767 nb_enqueued = enqueue_ldpc_enc_all_ops( 1768 q, ops, nb_ops, &q_data->queue_stats); 1769 1770 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1771 q_data->queue_stats.enqueued_count += nb_enqueued; 1772 1773 return nb_enqueued; 1774 } 1775 1776 /* Enqueue burst */ 1777 static uint16_t 1778 enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, 1779 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1780 { 1781 void *queue = q_data->queue_private; 1782 struct turbo_sw_queue *q = queue; 1783 uint16_t nb_enqueued = 0; 1784 1785 nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); 1786 1787 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1788 q_data->queue_stats.enqueued_count += nb_enqueued; 1789 1790 return nb_enqueued; 1791 } 1792 1793 /* Enqueue burst */ 1794 static uint16_t 1795 enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, 1796 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1797 { 1798 void *queue = q_data->queue_private; 1799 struct turbo_sw_queue *q = queue; 1800 uint16_t nb_enqueued = 0; 1801 1802 nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, 1803 &q_data->queue_stats); 1804 1805 q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; 1806 q_data->queue_stats.enqueued_count += nb_enqueued; 1807 1808 return nb_enqueued; 1809 } 1810 1811 /* Dequeue decode burst */ 1812 static uint16_t 1813 dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, 1814 struct rte_bbdev_dec_op **ops, uint16_t nb_ops) 1815 { 1816 struct turbo_sw_queue *q = q_data->queue_private; 1817 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1818 (void **)ops, nb_ops, NULL); 1819 q_data->queue_stats.dequeued_count += nb_dequeued; 1820 1821 return nb_dequeued; 1822 } 1823 1824 /* Dequeue encode burst */ 1825 static uint16_t 1826 dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, 1827 struct rte_bbdev_enc_op **ops, uint16_t nb_ops) 1828 { 1829 struct turbo_sw_queue *q = q_data->queue_private; 1830 uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, 1831 (void **)ops, nb_ops, NULL); 1832 q_data->queue_stats.dequeued_count += nb_dequeued; 1833 1834 return nb_dequeued; 1835 } 1836 1837 /* Parse 16bit integer from string argument */ 1838 static inline int 1839 parse_u16_arg(const char *key, const char *value, void *extra_args) 1840 { 1841 uint16_t *u16 = extra_args; 1842 unsigned int long result; 1843 1844 if ((value == NULL) || (extra_args == NULL)) 1845 return -EINVAL; 1846 errno = 0; 1847 result = strtoul(value, NULL, 0); 1848 if ((result >= (1 << 16)) || (errno != 0)) { 1849 rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); 1850 return -ERANGE; 1851 } 1852 *u16 = (uint16_t)result; 1853 return 0; 1854 } 1855 1856 /* Parse parameters used to create device */ 1857 static int 1858 parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) 1859 { 1860 struct rte_kvargs *kvlist = NULL; 1861 int ret = 0; 1862 1863 if (params == NULL) 1864 return -EINVAL; 1865 if (input_args) { 1866 kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); 1867 if (kvlist == NULL) 1868 return -EFAULT; 1869 1870 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], 1871 &parse_u16_arg, ¶ms->queues_num); 1872 if (ret < 0) 1873 goto exit; 1874 1875 ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], 1876 &parse_u16_arg, ¶ms->socket_id); 1877 if (ret < 0) 1878 goto exit; 1879 1880 if (params->socket_id >= RTE_MAX_NUMA_NODES) { 1881 rte_bbdev_log(ERR, "Invalid socket, must be < %u", 1882 RTE_MAX_NUMA_NODES); 1883 goto exit; 1884 } 1885 } 1886 1887 exit: 1888 if (kvlist) 1889 rte_kvargs_free(kvlist); 1890 return ret; 1891 } 1892 1893 /* Create device */ 1894 static int 1895 turbo_sw_bbdev_create(struct rte_vdev_device *vdev, 1896 struct turbo_sw_params *init_params) 1897 { 1898 struct rte_bbdev *bbdev; 1899 const char *name = rte_vdev_device_name(vdev); 1900 1901 bbdev = rte_bbdev_allocate(name); 1902 if (bbdev == NULL) 1903 return -ENODEV; 1904 1905 bbdev->data->dev_private = rte_zmalloc_socket(name, 1906 sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, 1907 init_params->socket_id); 1908 if (bbdev->data->dev_private == NULL) { 1909 rte_bbdev_release(bbdev); 1910 return -ENOMEM; 1911 } 1912 1913 bbdev->dev_ops = &pmd_ops; 1914 bbdev->device = &vdev->device; 1915 bbdev->data->socket_id = init_params->socket_id; 1916 bbdev->intr_handle = NULL; 1917 1918 /* register rx/tx burst functions for data path */ 1919 bbdev->dequeue_enc_ops = dequeue_enc_ops; 1920 bbdev->dequeue_dec_ops = dequeue_dec_ops; 1921 bbdev->enqueue_enc_ops = enqueue_enc_ops; 1922 bbdev->enqueue_dec_ops = enqueue_dec_ops; 1923 bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; 1924 bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; 1925 bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; 1926 bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; 1927 ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = 1928 init_params->queues_num; 1929 1930 return 0; 1931 } 1932 1933 /* Initialise device */ 1934 static int 1935 turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) 1936 { 1937 struct turbo_sw_params init_params = { 1938 rte_socket_id(), 1939 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES 1940 }; 1941 const char *name; 1942 const char *input_args; 1943 1944 if (vdev == NULL) 1945 return -EINVAL; 1946 1947 name = rte_vdev_device_name(vdev); 1948 if (name == NULL) 1949 return -EINVAL; 1950 input_args = rte_vdev_device_args(vdev); 1951 parse_turbo_sw_params(&init_params, input_args); 1952 1953 rte_bbdev_log_debug( 1954 "Initialising %s on NUMA node %d with max queues: %d\n", 1955 name, init_params.socket_id, init_params.queues_num); 1956 1957 return turbo_sw_bbdev_create(vdev, &init_params); 1958 } 1959 1960 /* Uninitialise device */ 1961 static int 1962 turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) 1963 { 1964 struct rte_bbdev *bbdev; 1965 const char *name; 1966 1967 if (vdev == NULL) 1968 return -EINVAL; 1969 1970 name = rte_vdev_device_name(vdev); 1971 if (name == NULL) 1972 return -EINVAL; 1973 1974 bbdev = rte_bbdev_get_named_dev(name); 1975 if (bbdev == NULL) 1976 return -EINVAL; 1977 1978 rte_free(bbdev->data->dev_private); 1979 1980 return rte_bbdev_release(bbdev); 1981 } 1982 1983 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { 1984 .probe = turbo_sw_bbdev_probe, 1985 .remove = turbo_sw_bbdev_remove 1986 }; 1987 1988 RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); 1989 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, 1990 TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " 1991 TURBO_SW_SOCKET_ID_ARG"=<int>"); 1992 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); 1993 1994 RTE_INIT(turbo_sw_bbdev_init_log) 1995 { 1996 bbdev_turbo_sw_logtype = rte_log_register("pmd.bb.turbo_sw"); 1997 if (bbdev_turbo_sw_logtype >= 0) 1998 rte_log_set_level(bbdev_turbo_sw_logtype, RTE_LOG_NOTICE); 1999 } 2000