1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Intel Corporation 3 */ 4 5 #include <unistd.h> 6 7 #include <rte_common.h> 8 #include <rte_log.h> 9 #include <dev_driver.h> 10 #include <rte_malloc.h> 11 #include <rte_mempool.h> 12 #include <rte_errno.h> 13 #include <rte_pci.h> 14 #include <bus_pci_driver.h> 15 #include <rte_byteorder.h> 16 #ifdef RTE_BBDEV_OFFLOAD_COST 17 #include <rte_cycles.h> 18 #endif 19 20 #include <rte_bbdev.h> 21 #include <rte_bbdev_pmd.h> 22 23 #include "fpga_lte_fec.h" 24 25 #ifdef RTE_LIBRTE_BBDEV_DEBUG 26 RTE_LOG_REGISTER_DEFAULT(fpga_lte_fec_logtype, DEBUG); 27 #else 28 RTE_LOG_REGISTER_DEFAULT(fpga_lte_fec_logtype, NOTICE); 29 #endif 30 31 /* Helper macro for logging */ 32 #define rte_bbdev_log(level, fmt, ...) \ 33 rte_log(RTE_LOG_ ## level, fpga_lte_fec_logtype, fmt "\n", \ 34 ##__VA_ARGS__) 35 36 #ifdef RTE_LIBRTE_BBDEV_DEBUG 37 #define rte_bbdev_log_debug(fmt, ...) \ 38 rte_bbdev_log(DEBUG, "fpga_lte_fec: " fmt, \ 39 ##__VA_ARGS__) 40 #else 41 #define rte_bbdev_log_debug(fmt, ...) 42 #endif 43 44 /* FPGA LTE FEC driver names */ 45 #define FPGA_LTE_FEC_PF_DRIVER_NAME intel_fpga_lte_fec_pf 46 #define FPGA_LTE_FEC_VF_DRIVER_NAME intel_fpga_lte_fec_vf 47 48 /* FPGA LTE FEC PCI vendor & device IDs */ 49 #define FPGA_LTE_FEC_VENDOR_ID (0x1172) 50 #define FPGA_LTE_FEC_PF_DEVICE_ID (0x5052) 51 #define FPGA_LTE_FEC_VF_DEVICE_ID (0x5050) 52 53 /* Align DMA descriptors to 256 bytes - cache-aligned */ 54 #define FPGA_RING_DESC_ENTRY_LENGTH (8) 55 /* Ring size is in 256 bits (32 bytes) units */ 56 #define FPGA_RING_DESC_LEN_UNIT_BYTES (32) 57 /* Maximum size of queue */ 58 #define FPGA_RING_MAX_SIZE (1024) 59 #define FPGA_FLR_TIMEOUT_UNIT (16.384) 60 61 #define FPGA_NUM_UL_QUEUES (32) 62 #define FPGA_NUM_DL_QUEUES (32) 63 #define FPGA_TOTAL_NUM_QUEUES (FPGA_NUM_UL_QUEUES + FPGA_NUM_DL_QUEUES) 64 #define FPGA_NUM_INTR_VEC (FPGA_TOTAL_NUM_QUEUES - RTE_INTR_VEC_RXTX_OFFSET) 65 66 #define FPGA_INVALID_HW_QUEUE_ID (0xFFFFFFFF) 67 68 #define FPGA_QUEUE_FLUSH_TIMEOUT_US (1000) 69 #define FPGA_TIMEOUT_CHECK_INTERVAL (5) 70 71 /* FPGA LTE FEC Register mapping on BAR0 */ 72 enum { 73 FPGA_LTE_FEC_VERSION_ID = 0x00000000, /* len: 4B */ 74 FPGA_LTE_FEC_CONFIGURATION = 0x00000004, /* len: 2B */ 75 FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE = 0x00000008, /* len: 1B */ 76 FPGA_LTE_FEC_LOAD_BALANCE_FACTOR = 0x0000000a, /* len: 2B */ 77 FPGA_LTE_FEC_RING_DESC_LEN = 0x0000000c, /* len: 2B */ 78 FPGA_LTE_FEC_FLR_TIME_OUT = 0x0000000e, /* len: 2B */ 79 FPGA_LTE_FEC_VFQ_FLUSH_STATUS_LW = 0x00000018, /* len: 4B */ 80 FPGA_LTE_FEC_VFQ_FLUSH_STATUS_HI = 0x0000001c, /* len: 4B */ 81 FPGA_LTE_FEC_VF0_DEBUG = 0x00000020, /* len: 4B */ 82 FPGA_LTE_FEC_VF1_DEBUG = 0x00000024, /* len: 4B */ 83 FPGA_LTE_FEC_VF2_DEBUG = 0x00000028, /* len: 4B */ 84 FPGA_LTE_FEC_VF3_DEBUG = 0x0000002c, /* len: 4B */ 85 FPGA_LTE_FEC_VF4_DEBUG = 0x00000030, /* len: 4B */ 86 FPGA_LTE_FEC_VF5_DEBUG = 0x00000034, /* len: 4B */ 87 FPGA_LTE_FEC_VF6_DEBUG = 0x00000038, /* len: 4B */ 88 FPGA_LTE_FEC_VF7_DEBUG = 0x0000003c, /* len: 4B */ 89 FPGA_LTE_FEC_QUEUE_MAP = 0x00000040, /* len: 256B */ 90 FPGA_LTE_FEC_RING_CTRL_REGS = 0x00000200 /* len: 2048B */ 91 }; 92 93 /* FPGA LTE FEC Ring Control Registers */ 94 enum { 95 FPGA_LTE_FEC_RING_HEAD_ADDR = 0x00000008, 96 FPGA_LTE_FEC_RING_SIZE = 0x00000010, 97 FPGA_LTE_FEC_RING_MISC = 0x00000014, 98 FPGA_LTE_FEC_RING_ENABLE = 0x00000015, 99 FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN = 0x00000016, 100 FPGA_LTE_FEC_RING_SHADOW_TAIL = 0x00000018, 101 FPGA_LTE_FEC_RING_HEAD_POINT = 0x0000001C 102 }; 103 104 /* FPGA LTE FEC DESCRIPTOR ERROR */ 105 enum { 106 DESC_ERR_NO_ERR = 0x0, 107 DESC_ERR_K_OUT_OF_RANGE = 0x1, 108 DESC_ERR_K_NOT_NORMAL = 0x2, 109 DESC_ERR_KPAI_NOT_NORMAL = 0x3, 110 DESC_ERR_DESC_OFFSET_ERR = 0x4, 111 DESC_ERR_DESC_READ_FAIL = 0x8, 112 DESC_ERR_DESC_READ_TIMEOUT = 0x9, 113 DESC_ERR_DESC_READ_TLP_POISONED = 0xA, 114 DESC_ERR_CB_READ_FAIL = 0xC, 115 DESC_ERR_CB_READ_TIMEOUT = 0xD, 116 DESC_ERR_CB_READ_TLP_POISONED = 0xE 117 }; 118 119 /* FPGA LTE FEC DMA Encoding Request Descriptor */ 120 struct __rte_packed fpga_dma_enc_desc { 121 uint32_t done:1, 122 rsrvd0:11, 123 error:4, 124 rsrvd1:16; 125 uint32_t ncb:16, 126 rsrvd2:14, 127 rv:2; 128 uint32_t bypass_rm:1, 129 irq_en:1, 130 crc_en:1, 131 rsrvd3:13, 132 offset:10, 133 rsrvd4:6; 134 uint16_t e; 135 uint16_t k; 136 uint32_t out_addr_lw; 137 uint32_t out_addr_hi; 138 uint32_t in_addr_lw; 139 uint32_t in_addr_hi; 140 141 union { 142 struct { 143 /* Virtual addresses used to retrieve SW context info */ 144 void *op_addr; 145 /* Stores information about total number of Code Blocks 146 * in currently processed Transport Block 147 */ 148 uint64_t cbs_in_op; 149 }; 150 151 uint8_t sw_ctxt[FPGA_RING_DESC_LEN_UNIT_BYTES * 152 (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; 153 }; 154 }; 155 156 /* FPGA LTE FEC DMA Decoding Request Descriptor */ 157 struct __rte_packed fpga_dma_dec_desc { 158 uint32_t done:1, 159 iter:5, 160 rsrvd0:2, 161 crc_pass:1, 162 rsrvd1:3, 163 error:4, 164 crc_type:1, 165 rsrvd2:7, 166 max_iter:5, 167 rsrvd3:3; 168 uint32_t rsrvd4; 169 uint32_t bypass_rm:1, 170 irq_en:1, 171 drop_crc:1, 172 rsrvd5:13, 173 offset:10, 174 rsrvd6:6; 175 uint16_t k; 176 uint16_t in_len; 177 uint32_t out_addr_lw; 178 uint32_t out_addr_hi; 179 uint32_t in_addr_lw; 180 uint32_t in_addr_hi; 181 182 union { 183 struct { 184 /* Virtual addresses used to retrieve SW context info */ 185 void *op_addr; 186 /* Stores information about total number of Code Blocks 187 * in currently processed Transport Block 188 */ 189 uint8_t cbs_in_op; 190 }; 191 192 uint32_t sw_ctxt[8 * (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; 193 }; 194 }; 195 196 /* FPGA LTE DMA Descriptor */ 197 union fpga_dma_desc { 198 struct fpga_dma_enc_desc enc_req; 199 struct fpga_dma_dec_desc dec_req; 200 }; 201 202 /* FPGA LTE FEC Ring Control Register */ 203 struct __rte_packed fpga_ring_ctrl_reg { 204 uint64_t ring_base_addr; 205 uint64_t ring_head_addr; 206 uint16_t ring_size:11; 207 uint16_t rsrvd0; 208 union { /* Miscellaneous register */ 209 uint8_t misc; 210 uint8_t max_ul_dec:5, 211 max_ul_dec_en:1, 212 rsrvd1:2; 213 }; 214 uint8_t enable; 215 uint8_t flush_queue_en; 216 uint8_t rsrvd2; 217 uint16_t shadow_tail; 218 uint16_t rsrvd3; 219 uint16_t head_point; 220 uint16_t rsrvd4; 221 222 }; 223 224 /* Private data structure for each FPGA FEC device */ 225 struct fpga_lte_fec_device { 226 /** Base address of MMIO registers (BAR0) */ 227 void *mmio_base; 228 /** Base address of memory for sw rings */ 229 void *sw_rings; 230 /** Physical address of sw_rings */ 231 rte_iova_t sw_rings_phys; 232 /** Number of bytes available for each queue in device. */ 233 uint32_t sw_ring_size; 234 /** Max number of entries available for each queue in device */ 235 uint32_t sw_ring_max_depth; 236 /** Base address of response tail pointer buffer */ 237 uint32_t *tail_ptrs; 238 /** Physical address of tail pointers */ 239 rte_iova_t tail_ptr_phys; 240 /** Queues flush completion flag */ 241 uint64_t *flush_queue_status; 242 /* Bitmap capturing which Queues are bound to the PF/VF */ 243 uint64_t q_bound_bit_map; 244 /* Bitmap capturing which Queues have already been assigned */ 245 uint64_t q_assigned_bit_map; 246 /** True if this is a PF FPGA FEC device */ 247 bool pf_device; 248 }; 249 250 /* Structure associated with each queue. */ 251 struct __rte_cache_aligned fpga_queue { 252 struct fpga_ring_ctrl_reg ring_ctrl_reg; /* Ring Control Register */ 253 union fpga_dma_desc *ring_addr; /* Virtual address of software ring */ 254 uint64_t *ring_head_addr; /* Virtual address of completion_head */ 255 uint64_t shadow_completion_head; /* Shadow completion head value */ 256 uint16_t head_free_desc; /* Ring head */ 257 uint16_t tail; /* Ring tail */ 258 /* Mask used to wrap enqueued descriptors on the sw ring */ 259 uint32_t sw_ring_wrap_mask; 260 uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */ 261 uint8_t q_idx; /* Queue index */ 262 struct fpga_lte_fec_device *d; 263 /* MMIO register of shadow_tail used to enqueue descriptors */ 264 void *shadow_tail_addr; 265 }; 266 267 /* Write to 16 bit MMIO register address */ 268 static inline void 269 mmio_write_16(void *addr, uint16_t value) 270 { 271 *((volatile uint16_t *)(addr)) = rte_cpu_to_le_16(value); 272 } 273 274 /* Write to 32 bit MMIO register address */ 275 static inline void 276 mmio_write_32(void *addr, uint32_t value) 277 { 278 *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value); 279 } 280 281 /* Write to 64 bit MMIO register address */ 282 static inline void 283 mmio_write_64(void *addr, uint64_t value) 284 { 285 *((volatile uint64_t *)(addr)) = rte_cpu_to_le_64(value); 286 } 287 288 /* Write a 8 bit register of a FPGA LTE FEC device */ 289 static inline void 290 fpga_reg_write_8(void *mmio_base, uint32_t offset, uint8_t payload) 291 { 292 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 293 *((volatile uint8_t *)(reg_addr)) = payload; 294 } 295 296 /* Write a 16 bit register of a FPGA LTE FEC device */ 297 static inline void 298 fpga_reg_write_16(void *mmio_base, uint32_t offset, uint16_t payload) 299 { 300 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 301 mmio_write_16(reg_addr, payload); 302 } 303 304 /* Write a 32 bit register of a FPGA LTE FEC device */ 305 static inline void 306 fpga_reg_write_32(void *mmio_base, uint32_t offset, uint32_t payload) 307 { 308 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 309 mmio_write_32(reg_addr, payload); 310 } 311 312 /* Write a 64 bit register of a FPGA LTE FEC device */ 313 static inline void 314 fpga_reg_write_64(void *mmio_base, uint32_t offset, uint64_t payload) 315 { 316 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 317 mmio_write_64(reg_addr, payload); 318 } 319 320 /* Write a ring control register of a FPGA LTE FEC device */ 321 static inline void 322 fpga_ring_reg_write(void *mmio_base, uint32_t offset, 323 struct fpga_ring_ctrl_reg payload) 324 { 325 fpga_reg_write_64(mmio_base, offset, payload.ring_base_addr); 326 fpga_reg_write_64(mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_ADDR, 327 payload.ring_head_addr); 328 fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_SIZE, 329 payload.ring_size); 330 fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_POINT, 331 payload.head_point); 332 fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN, 333 payload.flush_queue_en); 334 fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_SHADOW_TAIL, 335 payload.shadow_tail); 336 fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_MISC, 337 payload.misc); 338 fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, 339 payload.enable); 340 } 341 342 /* Read a register of FPGA LTE FEC device */ 343 static uint32_t 344 fpga_reg_read_32(void *mmio_base, uint32_t offset) 345 { 346 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 347 uint32_t ret = *((volatile uint32_t *)(reg_addr)); 348 return rte_le_to_cpu_32(ret); 349 } 350 351 #ifdef RTE_LIBRTE_BBDEV_DEBUG 352 /* Read a register of FPGA LTE FEC device */ 353 static uint8_t 354 fpga_reg_read_8(void *mmio_base, uint32_t offset) 355 { 356 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 357 return *((volatile uint8_t *)(reg_addr)); 358 } 359 360 /* Read a register of FPGA LTE FEC device */ 361 static uint16_t 362 fpga_reg_read_16(void *mmio_base, uint32_t offset) 363 { 364 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 365 uint16_t ret = *((volatile uint16_t *)(reg_addr)); 366 return rte_le_to_cpu_16(ret); 367 } 368 369 /* Read a register of FPGA LTE FEC device */ 370 static uint64_t 371 fpga_reg_read_64(void *mmio_base, uint32_t offset) 372 { 373 void *reg_addr = RTE_PTR_ADD(mmio_base, offset); 374 uint64_t ret = *((volatile uint64_t *)(reg_addr)); 375 return rte_le_to_cpu_64(ret); 376 } 377 378 /* Read Ring Control Register of FPGA LTE FEC device */ 379 static inline void 380 print_ring_reg_debug_info(void *mmio_base, uint32_t offset) 381 { 382 rte_bbdev_log_debug( 383 "FPGA MMIO base address @ %p | Ring Control Register @ offset = 0x%08" 384 PRIx32, mmio_base, offset); 385 rte_bbdev_log_debug( 386 "RING_BASE_ADDR = 0x%016"PRIx64, 387 fpga_reg_read_64(mmio_base, offset)); 388 rte_bbdev_log_debug( 389 "RING_HEAD_ADDR = 0x%016"PRIx64, 390 fpga_reg_read_64(mmio_base, offset + 391 FPGA_LTE_FEC_RING_HEAD_ADDR)); 392 rte_bbdev_log_debug( 393 "RING_SIZE = 0x%04"PRIx16, 394 fpga_reg_read_16(mmio_base, offset + 395 FPGA_LTE_FEC_RING_SIZE)); 396 rte_bbdev_log_debug( 397 "RING_MISC = 0x%02"PRIx8, 398 fpga_reg_read_8(mmio_base, offset + 399 FPGA_LTE_FEC_RING_MISC)); 400 rte_bbdev_log_debug( 401 "RING_ENABLE = 0x%02"PRIx8, 402 fpga_reg_read_8(mmio_base, offset + 403 FPGA_LTE_FEC_RING_ENABLE)); 404 rte_bbdev_log_debug( 405 "RING_FLUSH_QUEUE_EN = 0x%02"PRIx8, 406 fpga_reg_read_8(mmio_base, offset + 407 FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN)); 408 rte_bbdev_log_debug( 409 "RING_SHADOW_TAIL = 0x%04"PRIx16, 410 fpga_reg_read_16(mmio_base, offset + 411 FPGA_LTE_FEC_RING_SHADOW_TAIL)); 412 rte_bbdev_log_debug( 413 "RING_HEAD_POINT = 0x%04"PRIx16, 414 fpga_reg_read_16(mmio_base, offset + 415 FPGA_LTE_FEC_RING_HEAD_POINT)); 416 } 417 418 /* Read Static Register of FPGA LTE FEC device */ 419 static inline void 420 print_static_reg_debug_info(void *mmio_base) 421 { 422 uint16_t config = fpga_reg_read_16(mmio_base, 423 FPGA_LTE_FEC_CONFIGURATION); 424 uint8_t qmap_done = fpga_reg_read_8(mmio_base, 425 FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE); 426 uint16_t lb_factor = fpga_reg_read_16(mmio_base, 427 FPGA_LTE_FEC_LOAD_BALANCE_FACTOR); 428 uint16_t ring_desc_len = fpga_reg_read_16(mmio_base, 429 FPGA_LTE_FEC_RING_DESC_LEN); 430 uint16_t flr_time_out = fpga_reg_read_16(mmio_base, 431 FPGA_LTE_FEC_FLR_TIME_OUT); 432 433 rte_bbdev_log_debug("UL.DL Weights = %u.%u", 434 ((uint8_t)config), ((uint8_t)(config >> 8))); 435 rte_bbdev_log_debug("UL.DL Load Balance = %u.%u", 436 ((uint8_t)lb_factor), ((uint8_t)(lb_factor >> 8))); 437 rte_bbdev_log_debug("Queue-PF/VF Mapping Table = %s", 438 (qmap_done > 0) ? "READY" : "NOT-READY"); 439 rte_bbdev_log_debug("Ring Descriptor Size = %u bytes", 440 ring_desc_len*FPGA_RING_DESC_LEN_UNIT_BYTES); 441 rte_bbdev_log_debug("FLR Timeout = %f usec", 442 (float)flr_time_out*FPGA_FLR_TIMEOUT_UNIT); 443 } 444 445 /* Print decode DMA Descriptor of FPGA LTE FEC device */ 446 static void 447 print_dma_dec_desc_debug_info(union fpga_dma_desc *desc) 448 { 449 rte_bbdev_log_debug("DMA response desc %p\n" 450 "\t-- done(%"PRIu32") | iter(%"PRIu32") | crc_pass(%"PRIu32")" 451 " | error (%"PRIu32") | crc_type(%"PRIu32")\n" 452 "\t-- max_iter(%"PRIu32") | bypass_rm(%"PRIu32") | " 453 "irq_en (%"PRIu32") | drop_crc(%"PRIu32") | offset(%"PRIu32")\n" 454 "\t-- k(%"PRIu32") | in_len (%"PRIu16") | op_add(%p)\n" 455 "\t-- cbs_in_op(%"PRIu32") | in_add (0x%08"PRIx32"%08"PRIx32") | " 456 "out_add (0x%08"PRIx32"%08"PRIx32")", 457 desc, 458 (uint32_t)desc->dec_req.done, 459 (uint32_t)desc->dec_req.iter, 460 (uint32_t)desc->dec_req.crc_pass, 461 (uint32_t)desc->dec_req.error, 462 (uint32_t)desc->dec_req.crc_type, 463 (uint32_t)desc->dec_req.max_iter, 464 (uint32_t)desc->dec_req.bypass_rm, 465 (uint32_t)desc->dec_req.irq_en, 466 (uint32_t)desc->dec_req.drop_crc, 467 (uint32_t)desc->dec_req.offset, 468 (uint32_t)desc->dec_req.k, 469 (uint16_t)desc->dec_req.in_len, 470 desc->dec_req.op_addr, 471 (uint32_t)desc->dec_req.cbs_in_op, 472 (uint32_t)desc->dec_req.in_addr_hi, 473 (uint32_t)desc->dec_req.in_addr_lw, 474 (uint32_t)desc->dec_req.out_addr_hi, 475 (uint32_t)desc->dec_req.out_addr_lw); 476 } 477 #endif 478 479 static int 480 fpga_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id) 481 { 482 /* Number of queues bound to a PF/VF */ 483 uint32_t hw_q_num = 0; 484 uint32_t ring_size, payload, address, q_id, offset; 485 rte_iova_t phys_addr; 486 struct fpga_ring_ctrl_reg ring_reg; 487 struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; 488 489 address = FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE; 490 if (!(fpga_reg_read_32(fpga_dev->mmio_base, address) & 0x1)) { 491 rte_bbdev_log(ERR, 492 "Queue-PF/VF mapping is not set! Was PF configured for device (%s) ?", 493 dev->data->name); 494 return -EPERM; 495 } 496 497 /* Clear queue registers structure */ 498 memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); 499 500 /* Scan queue map. 501 * If a queue is valid and mapped to a calling PF/VF the read value is 502 * replaced with a queue ID and if it's not then 503 * FPGA_INVALID_HW_QUEUE_ID is returned. 504 */ 505 for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { 506 uint32_t hw_q_id = fpga_reg_read_32(fpga_dev->mmio_base, 507 FPGA_LTE_FEC_QUEUE_MAP + (q_id << 2)); 508 509 rte_bbdev_log_debug("%s: queue ID: %u, registry queue ID: %u", 510 dev->device->name, q_id, hw_q_id); 511 512 if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) { 513 fpga_dev->q_bound_bit_map |= (1ULL << q_id); 514 /* Clear queue register of found queue */ 515 offset = FPGA_LTE_FEC_RING_CTRL_REGS + 516 (sizeof(struct fpga_ring_ctrl_reg) * q_id); 517 fpga_ring_reg_write(fpga_dev->mmio_base, 518 offset, ring_reg); 519 ++hw_q_num; 520 } 521 } 522 if (hw_q_num == 0) { 523 rte_bbdev_log(ERR, 524 "No HW queues assigned to this device. Probably this is a VF configured for PF mode. Check device configuration!"); 525 return -ENODEV; 526 } 527 528 if (num_queues > hw_q_num) { 529 rte_bbdev_log(ERR, 530 "Not enough queues for device %s! Requested: %u, available: %u", 531 dev->device->name, num_queues, hw_q_num); 532 return -EINVAL; 533 } 534 535 ring_size = FPGA_RING_MAX_SIZE * sizeof(struct fpga_dma_dec_desc); 536 537 /* Enforce 32 byte alignment */ 538 RTE_BUILD_BUG_ON((RTE_CACHE_LINE_SIZE % 32) != 0); 539 540 /* Allocate memory for SW descriptor rings */ 541 fpga_dev->sw_rings = rte_zmalloc_socket(dev->device->driver->name, 542 num_queues * ring_size, RTE_CACHE_LINE_SIZE, 543 socket_id); 544 if (fpga_dev->sw_rings == NULL) { 545 rte_bbdev_log(ERR, 546 "Failed to allocate memory for %s:%u sw_rings", 547 dev->device->driver->name, dev->data->dev_id); 548 return -ENOMEM; 549 } 550 551 fpga_dev->sw_rings_phys = rte_malloc_virt2iova(fpga_dev->sw_rings); 552 fpga_dev->sw_ring_size = ring_size; 553 fpga_dev->sw_ring_max_depth = FPGA_RING_MAX_SIZE; 554 555 /* Allocate memory for ring flush status */ 556 fpga_dev->flush_queue_status = rte_zmalloc_socket(NULL, 557 sizeof(uint64_t), RTE_CACHE_LINE_SIZE, socket_id); 558 if (fpga_dev->flush_queue_status == NULL) { 559 rte_bbdev_log(ERR, 560 "Failed to allocate memory for %s:%u flush_queue_status", 561 dev->device->driver->name, dev->data->dev_id); 562 return -ENOMEM; 563 } 564 565 /* Set the flush status address registers */ 566 phys_addr = rte_malloc_virt2iova(fpga_dev->flush_queue_status); 567 568 address = FPGA_LTE_FEC_VFQ_FLUSH_STATUS_LW; 569 payload = (uint32_t)(phys_addr); 570 fpga_reg_write_32(fpga_dev->mmio_base, address, payload); 571 572 address = FPGA_LTE_FEC_VFQ_FLUSH_STATUS_HI; 573 payload = (uint32_t)(phys_addr >> 32); 574 fpga_reg_write_32(fpga_dev->mmio_base, address, payload); 575 576 return 0; 577 } 578 579 static int 580 fpga_dev_close(struct rte_bbdev *dev) 581 { 582 struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; 583 584 rte_free(fpga_dev->sw_rings); 585 rte_free(fpga_dev->flush_queue_status); 586 587 return 0; 588 } 589 590 static void 591 fpga_dev_info_get(struct rte_bbdev *dev, 592 struct rte_bbdev_driver_info *dev_info) 593 { 594 struct fpga_lte_fec_device *d = dev->data->dev_private; 595 uint32_t q_id = 0; 596 597 /* TODO RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN and numbers of buffers are set 598 * to temporary values as they are required by test application while 599 * validation phase. 600 */ 601 static const struct rte_bbdev_op_cap bbdev_capabilities[] = { 602 { 603 .type = RTE_BBDEV_OP_TURBO_DEC, 604 .cap.turbo_dec = { 605 .capability_flags = 606 RTE_BBDEV_TURBO_CRC_TYPE_24B | 607 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 608 RTE_BBDEV_TURBO_DEC_INTERRUPTS | 609 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 610 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP, 611 .max_llr_modulus = INT8_MAX, 612 .num_buffers_src = 613 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 614 .num_buffers_hard_out = 615 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 616 .num_buffers_soft_out = 0 617 } 618 }, 619 { 620 .type = RTE_BBDEV_OP_TURBO_ENC, 621 .cap.turbo_enc = { 622 .capability_flags = 623 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 624 RTE_BBDEV_TURBO_RATE_MATCH | 625 RTE_BBDEV_TURBO_ENC_INTERRUPTS, 626 .num_buffers_src = 627 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 628 .num_buffers_dst = 629 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS 630 } 631 }, 632 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 633 }; 634 635 static struct rte_bbdev_queue_conf default_queue_conf; 636 default_queue_conf.socket = dev->data->socket_id; 637 default_queue_conf.queue_size = FPGA_RING_MAX_SIZE; 638 639 640 dev_info->driver_name = dev->device->driver->name; 641 dev_info->queue_size_lim = FPGA_RING_MAX_SIZE; 642 dev_info->hardware_accelerated = true; 643 dev_info->min_alignment = 64; 644 dev_info->default_queue_conf = default_queue_conf; 645 dev_info->capabilities = bbdev_capabilities; 646 dev_info->cpu_flag_reqs = NULL; 647 dev_info->data_endianness = RTE_LITTLE_ENDIAN; 648 dev_info->device_status = RTE_BBDEV_DEV_NOT_SUPPORTED; 649 650 /* Calculates number of queues assigned to device */ 651 dev_info->max_num_queues = 0; 652 for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { 653 uint32_t hw_q_id = fpga_reg_read_32(d->mmio_base, 654 FPGA_LTE_FEC_QUEUE_MAP + (q_id << 2)); 655 if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) 656 dev_info->max_num_queues++; 657 } 658 /* Expose number of queue per operation type */ 659 dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0; 660 dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = dev_info->max_num_queues / 2; 661 dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = dev_info->max_num_queues / 2; 662 dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = 0; 663 dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = 0; 664 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 1; 665 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 1; 666 } 667 668 /** 669 * Find index of queue bound to current PF/VF which is unassigned. Return -1 670 * when there is no available queue 671 */ 672 static int 673 fpga_find_free_queue_idx(struct rte_bbdev *dev, 674 const struct rte_bbdev_queue_conf *conf) 675 { 676 struct fpga_lte_fec_device *d = dev->data->dev_private; 677 uint64_t q_idx; 678 uint8_t i = 0; 679 uint8_t range = FPGA_TOTAL_NUM_QUEUES >> 1; 680 681 if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC) { 682 i = FPGA_NUM_DL_QUEUES; 683 range = FPGA_TOTAL_NUM_QUEUES; 684 } 685 686 for (; i < range; ++i) { 687 q_idx = 1ULL << i; 688 /* Check if index of queue is bound to current PF/VF */ 689 if (d->q_bound_bit_map & q_idx) 690 /* Check if found queue was not already assigned */ 691 if (!(d->q_assigned_bit_map & q_idx)) { 692 d->q_assigned_bit_map |= q_idx; 693 return i; 694 } 695 } 696 697 rte_bbdev_log(INFO, "Failed to find free queue on %s", dev->data->name); 698 699 return -1; 700 } 701 702 static int 703 fpga_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, 704 const struct rte_bbdev_queue_conf *conf) 705 { 706 uint32_t address, ring_offset; 707 struct fpga_lte_fec_device *d = dev->data->dev_private; 708 struct fpga_queue *q; 709 int8_t q_idx; 710 711 /* Check if there is a free queue to assign */ 712 q_idx = fpga_find_free_queue_idx(dev, conf); 713 if (q_idx == -1) 714 return -1; 715 716 /* Allocate the queue data structure. */ 717 q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q), 718 RTE_CACHE_LINE_SIZE, conf->socket); 719 if (q == NULL) { 720 /* Mark queue as un-assigned */ 721 d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); 722 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 723 return -ENOMEM; 724 } 725 726 q->d = d; 727 q->q_idx = q_idx; 728 729 /* Set ring_base_addr */ 730 q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id)); 731 q->ring_ctrl_reg.ring_base_addr = d->sw_rings_phys + 732 (d->sw_ring_size * queue_id); 733 734 /* Allocate memory for Completion Head variable*/ 735 q->ring_head_addr = rte_zmalloc_socket(dev->device->driver->name, 736 sizeof(uint64_t), RTE_CACHE_LINE_SIZE, conf->socket); 737 if (q->ring_head_addr == NULL) { 738 /* Mark queue as un-assigned */ 739 d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); 740 rte_free(q); 741 rte_bbdev_log(ERR, 742 "Failed to allocate memory for %s:%u completion_head", 743 dev->device->driver->name, dev->data->dev_id); 744 return -ENOMEM; 745 } 746 /* Set ring_head_addr */ 747 q->ring_ctrl_reg.ring_head_addr = 748 rte_malloc_virt2iova(q->ring_head_addr); 749 750 /* Clear shadow_completion_head */ 751 q->shadow_completion_head = 0; 752 753 /* Set ring_size */ 754 if (conf->queue_size > FPGA_RING_MAX_SIZE) { 755 /* Mark queue as un-assigned */ 756 d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); 757 rte_free(q->ring_head_addr); 758 rte_free(q); 759 rte_bbdev_log(ERR, 760 "Size of queue is too big %d (MAX: %d ) for %s:%u", 761 conf->queue_size, FPGA_RING_MAX_SIZE, 762 dev->device->driver->name, dev->data->dev_id); 763 return -EINVAL; 764 } 765 q->ring_ctrl_reg.ring_size = conf->queue_size; 766 767 /* Set Miscellaneous FPGA register*/ 768 /* Max iteration number for TTI mitigation - todo */ 769 q->ring_ctrl_reg.max_ul_dec = 0; 770 /* Enable max iteration number for TTI - todo */ 771 q->ring_ctrl_reg.max_ul_dec_en = 0; 772 773 /* Enable the ring */ 774 q->ring_ctrl_reg.enable = 1; 775 776 /* Set FPGA head_point and tail registers */ 777 q->ring_ctrl_reg.head_point = q->tail = 0; 778 779 /* Set FPGA shadow_tail register */ 780 q->ring_ctrl_reg.shadow_tail = q->tail; 781 782 /* Calculates the ring offset for found queue */ 783 ring_offset = FPGA_LTE_FEC_RING_CTRL_REGS + 784 (sizeof(struct fpga_ring_ctrl_reg) * q_idx); 785 786 /* Set FPGA Ring Control Registers */ 787 fpga_ring_reg_write(d->mmio_base, ring_offset, q->ring_ctrl_reg); 788 789 /* Store MMIO register of shadow_tail */ 790 address = ring_offset + FPGA_LTE_FEC_RING_SHADOW_TAIL; 791 q->shadow_tail_addr = RTE_PTR_ADD(d->mmio_base, address); 792 793 q->head_free_desc = q->tail; 794 795 /* Set wrap mask */ 796 q->sw_ring_wrap_mask = conf->queue_size - 1; 797 798 rte_bbdev_log_debug("Setup dev%u q%u: queue_idx=%u", 799 dev->data->dev_id, queue_id, q->q_idx); 800 801 dev->data->queues[queue_id].queue_private = q; 802 803 rte_bbdev_log_debug("BBDEV queue[%d] set up for FPGA queue[%d]", 804 queue_id, q_idx); 805 806 #ifdef RTE_LIBRTE_BBDEV_DEBUG 807 /* Read FPGA Ring Control Registers after configuration*/ 808 print_ring_reg_debug_info(d->mmio_base, ring_offset); 809 #endif 810 return 0; 811 } 812 813 static int 814 fpga_queue_release(struct rte_bbdev *dev, uint16_t queue_id) 815 { 816 struct fpga_lte_fec_device *d = dev->data->dev_private; 817 struct fpga_queue *q = dev->data->queues[queue_id].queue_private; 818 struct fpga_ring_ctrl_reg ring_reg; 819 uint32_t offset; 820 821 rte_bbdev_log_debug("FPGA Queue[%d] released", queue_id); 822 823 if (q != NULL) { 824 memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); 825 offset = FPGA_LTE_FEC_RING_CTRL_REGS + 826 (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); 827 /* Disable queue */ 828 fpga_reg_write_8(d->mmio_base, 829 offset + FPGA_LTE_FEC_RING_ENABLE, 0x00); 830 /* Clear queue registers */ 831 fpga_ring_reg_write(d->mmio_base, offset, ring_reg); 832 833 /* Mark the Queue as un-assigned */ 834 d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q->q_idx)); 835 rte_free(q->ring_head_addr); 836 rte_free(q); 837 dev->data->queues[queue_id].queue_private = NULL; 838 } 839 840 return 0; 841 } 842 843 /* Function starts a device queue. */ 844 static int 845 fpga_queue_start(struct rte_bbdev *dev, uint16_t queue_id) 846 { 847 struct fpga_lte_fec_device *d = dev->data->dev_private; 848 #ifdef RTE_LIBRTE_BBDEV_DEBUG 849 if (d == NULL) { 850 rte_bbdev_log(ERR, "Invalid device pointer"); 851 return -1; 852 } 853 #endif 854 struct fpga_queue *q = dev->data->queues[queue_id].queue_private; 855 uint32_t offset = FPGA_LTE_FEC_RING_CTRL_REGS + 856 (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); 857 uint8_t enable = 0x01; 858 uint16_t zero = 0x0000; 859 860 /* Clear queue head and tail variables */ 861 q->tail = q->head_free_desc = 0; 862 863 /* Clear FPGA head_point and tail registers */ 864 fpga_reg_write_16(d->mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_POINT, 865 zero); 866 fpga_reg_write_16(d->mmio_base, offset + FPGA_LTE_FEC_RING_SHADOW_TAIL, 867 zero); 868 869 /* Enable queue */ 870 fpga_reg_write_8(d->mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, 871 enable); 872 873 rte_bbdev_log_debug("FPGA Queue[%d] started", queue_id); 874 return 0; 875 } 876 877 /* Function stops a device queue. */ 878 static int 879 fpga_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) 880 { 881 struct fpga_lte_fec_device *d = dev->data->dev_private; 882 #ifdef RTE_LIBRTE_BBDEV_DEBUG 883 if (d == NULL) { 884 rte_bbdev_log(ERR, "Invalid device pointer"); 885 return -1; 886 } 887 #endif 888 struct fpga_queue *q = dev->data->queues[queue_id].queue_private; 889 uint32_t offset = FPGA_LTE_FEC_RING_CTRL_REGS + 890 (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); 891 uint8_t payload = 0x01; 892 uint8_t counter = 0; 893 uint8_t timeout = FPGA_QUEUE_FLUSH_TIMEOUT_US / 894 FPGA_TIMEOUT_CHECK_INTERVAL; 895 896 /* Set flush_queue_en bit to trigger queue flushing */ 897 fpga_reg_write_8(d->mmio_base, 898 offset + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN, payload); 899 900 /** Check if queue flush is completed. 901 * FPGA will update the completion flag after queue flushing is 902 * completed. If completion flag is not updated within 1ms it is 903 * considered as a failure. 904 */ 905 while (!(*((volatile uint8_t *)d->flush_queue_status + q->q_idx) & payload)) { 906 if (counter > timeout) { 907 rte_bbdev_log(ERR, "FPGA Queue Flush failed for queue %d", 908 queue_id); 909 return -1; 910 } 911 usleep(FPGA_TIMEOUT_CHECK_INTERVAL); 912 counter++; 913 } 914 915 /* Disable queue */ 916 payload = 0x00; 917 fpga_reg_write_8(d->mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, 918 payload); 919 920 rte_bbdev_log_debug("FPGA Queue[%d] stopped", queue_id); 921 return 0; 922 } 923 924 static inline uint16_t 925 get_queue_id(struct rte_bbdev_data *data, uint8_t q_idx) 926 { 927 uint16_t queue_id; 928 929 for (queue_id = 0; queue_id < data->num_queues; ++queue_id) { 930 struct fpga_queue *q = data->queues[queue_id].queue_private; 931 if (q != NULL && q->q_idx == q_idx) 932 return queue_id; 933 } 934 935 return -1; 936 } 937 938 /* Interrupt handler triggered by FPGA dev for handling specific interrupt */ 939 static void 940 fpga_dev_interrupt_handler(void *cb_arg) 941 { 942 struct rte_bbdev *dev = cb_arg; 943 struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; 944 struct fpga_queue *q; 945 uint64_t ring_head; 946 uint64_t q_idx; 947 uint16_t queue_id; 948 uint8_t i; 949 950 /* Scan queue assigned to this device */ 951 for (i = 0; i < FPGA_TOTAL_NUM_QUEUES; ++i) { 952 q_idx = 1ULL << i; 953 if (fpga_dev->q_bound_bit_map & q_idx) { 954 queue_id = get_queue_id(dev->data, i); 955 if (queue_id == (uint16_t) -1) 956 continue; 957 958 /* Check if completion head was changed */ 959 q = dev->data->queues[queue_id].queue_private; 960 ring_head = *q->ring_head_addr; 961 if (q->shadow_completion_head != ring_head && 962 q->irq_enable == 1) { 963 q->shadow_completion_head = ring_head; 964 rte_bbdev_pmd_callback_process( 965 dev, 966 RTE_BBDEV_EVENT_DEQUEUE, 967 &queue_id); 968 } 969 } 970 } 971 } 972 973 static int 974 fpga_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id) 975 { 976 struct fpga_queue *q = dev->data->queues[queue_id].queue_private; 977 978 if (!rte_intr_cap_multiple(dev->intr_handle)) 979 return -ENOTSUP; 980 981 q->irq_enable = 1; 982 983 return 0; 984 } 985 986 static int 987 fpga_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id) 988 { 989 struct fpga_queue *q = dev->data->queues[queue_id].queue_private; 990 q->irq_enable = 0; 991 992 return 0; 993 } 994 995 static int 996 fpga_intr_enable(struct rte_bbdev *dev) 997 { 998 int ret; 999 uint8_t i; 1000 1001 if (!rte_intr_cap_multiple(dev->intr_handle)) { 1002 rte_bbdev_log(ERR, "Multiple intr vector is not supported by FPGA (%s)", 1003 dev->data->name); 1004 return -ENOTSUP; 1005 } 1006 1007 /* Create event file descriptors for each of 64 queue. Event fds will be 1008 * mapped to FPGA IRQs in rte_intr_enable(). This is a 1:1 mapping where 1009 * the IRQ number is a direct translation to the queue number. 1010 * 1011 * 63 (FPGA_NUM_INTR_VEC) event fds are created as rte_intr_enable() 1012 * mapped the first IRQ to already created interrupt event file 1013 * descriptor (intr_handle->fd). 1014 */ 1015 if (rte_intr_efd_enable(dev->intr_handle, FPGA_NUM_INTR_VEC)) { 1016 rte_bbdev_log(ERR, "Failed to create fds for %u queues", 1017 dev->data->num_queues); 1018 return -1; 1019 } 1020 1021 /* TODO Each event file descriptor is overwritten by interrupt event 1022 * file descriptor. That descriptor is added to epoll observed list. 1023 * It ensures that callback function assigned to that descriptor will 1024 * invoked when any FPGA queue issues interrupt. 1025 */ 1026 for (i = 0; i < FPGA_NUM_INTR_VEC; ++i) { 1027 if (rte_intr_efds_index_set(dev->intr_handle, i, 1028 rte_intr_fd_get(dev->intr_handle))) 1029 return -rte_errno; 1030 } 1031 1032 if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec", 1033 dev->data->num_queues)) { 1034 rte_bbdev_log(ERR, "Failed to allocate %u vectors", 1035 dev->data->num_queues); 1036 return -ENOMEM; 1037 } 1038 1039 ret = rte_intr_enable(dev->intr_handle); 1040 if (ret < 0) { 1041 rte_bbdev_log(ERR, 1042 "Couldn't enable interrupts for device: %s", 1043 dev->data->name); 1044 return ret; 1045 } 1046 1047 ret = rte_intr_callback_register(dev->intr_handle, 1048 fpga_dev_interrupt_handler, dev); 1049 if (ret < 0) { 1050 rte_bbdev_log(ERR, 1051 "Couldn't register interrupt callback for device: %s", 1052 dev->data->name); 1053 return ret; 1054 } 1055 1056 return 0; 1057 } 1058 1059 static const struct rte_bbdev_ops fpga_ops = { 1060 .setup_queues = fpga_setup_queues, 1061 .intr_enable = fpga_intr_enable, 1062 .close = fpga_dev_close, 1063 .info_get = fpga_dev_info_get, 1064 .queue_setup = fpga_queue_setup, 1065 .queue_stop = fpga_queue_stop, 1066 .queue_start = fpga_queue_start, 1067 .queue_release = fpga_queue_release, 1068 .queue_intr_enable = fpga_queue_intr_enable, 1069 .queue_intr_disable = fpga_queue_intr_disable 1070 }; 1071 1072 static inline void 1073 fpga_dma_enqueue(struct fpga_queue *q, uint16_t num_desc, 1074 struct rte_bbdev_stats *queue_stats) 1075 { 1076 #ifdef RTE_BBDEV_OFFLOAD_COST 1077 uint64_t start_time = 0; 1078 queue_stats->acc_offload_cycles = 0; 1079 #else 1080 RTE_SET_USED(queue_stats); 1081 #endif 1082 1083 /* Update tail and shadow_tail register */ 1084 q->tail = (q->tail + num_desc) & q->sw_ring_wrap_mask; 1085 1086 rte_wmb(); 1087 1088 #ifdef RTE_BBDEV_OFFLOAD_COST 1089 /* Start time measurement for enqueue function offload. */ 1090 start_time = rte_rdtsc_precise(); 1091 #endif 1092 mmio_write_16(q->shadow_tail_addr, q->tail); 1093 1094 #ifdef RTE_BBDEV_OFFLOAD_COST 1095 rte_wmb(); 1096 queue_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; 1097 #endif 1098 } 1099 1100 /* Calculates number of CBs in processed encoder TB based on 'r' and input 1101 * length. 1102 */ 1103 static inline uint8_t 1104 get_num_cbs_in_op_enc(struct rte_bbdev_op_turbo_enc *turbo_enc) 1105 { 1106 uint8_t c, c_neg, r, crc24_bits = 0; 1107 uint16_t k, k_neg, k_pos; 1108 uint8_t cbs_in_op = 0; 1109 int32_t length; 1110 1111 length = turbo_enc->input.length; 1112 r = turbo_enc->tb_params.r; 1113 c = turbo_enc->tb_params.c; 1114 c_neg = turbo_enc->tb_params.c_neg; 1115 k_neg = turbo_enc->tb_params.k_neg; 1116 k_pos = turbo_enc->tb_params.k_pos; 1117 crc24_bits = 24; 1118 while (length > 0 && r < c) { 1119 k = (r < c_neg) ? k_neg : k_pos; 1120 length -= (k - crc24_bits) >> 3; 1121 r++; 1122 cbs_in_op++; 1123 } 1124 1125 return cbs_in_op; 1126 } 1127 1128 /* Calculates number of CBs in processed decoder TB based on 'r' and input 1129 * length. 1130 */ 1131 static inline uint16_t 1132 get_num_cbs_in_op_dec(struct rte_bbdev_op_turbo_dec *turbo_dec) 1133 { 1134 uint8_t c, c_neg, r = 0; 1135 uint16_t kw, k, k_neg, k_pos, cbs_in_op = 0; 1136 int32_t length; 1137 1138 length = turbo_dec->input.length; 1139 r = turbo_dec->tb_params.r; 1140 c = turbo_dec->tb_params.c; 1141 c_neg = turbo_dec->tb_params.c_neg; 1142 k_neg = turbo_dec->tb_params.k_neg; 1143 k_pos = turbo_dec->tb_params.k_pos; 1144 while (length > 0 && r < c) { 1145 k = (r < c_neg) ? k_neg : k_pos; 1146 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; 1147 length -= kw; 1148 r++; 1149 cbs_in_op++; 1150 } 1151 1152 return cbs_in_op; 1153 } 1154 1155 /* Read flag value 0/1/ from bitmap */ 1156 static inline bool 1157 check_bit(uint32_t bitmap, uint32_t bitmask) 1158 { 1159 return bitmap & bitmask; 1160 } 1161 1162 /* Print an error if a descriptor error has occurred. 1163 * Return 0 on success, 1 on failure 1164 */ 1165 static inline int 1166 check_desc_error(uint32_t error_code) { 1167 switch (error_code) { 1168 case DESC_ERR_NO_ERR: 1169 return 0; 1170 case DESC_ERR_K_OUT_OF_RANGE: 1171 rte_bbdev_log(ERR, "Block_size_k is out of range (k<40 or k>6144)"); 1172 break; 1173 case DESC_ERR_K_NOT_NORMAL: 1174 rte_bbdev_log(ERR, "Block_size_k is not a normal value within normal range"); 1175 break; 1176 case DESC_ERR_KPAI_NOT_NORMAL: 1177 rte_bbdev_log(ERR, "Three_kpai is not a normal value for UL only"); 1178 break; 1179 case DESC_ERR_DESC_OFFSET_ERR: 1180 rte_bbdev_log(ERR, "Queue offset does not meet the expectation in the FPGA"); 1181 break; 1182 case (DESC_ERR_K_OUT_OF_RANGE | DESC_ERR_DESC_OFFSET_ERR): 1183 rte_bbdev_log(ERR, "Block_size_k is out of range (k<40 or k>6144) and queue offset error"); 1184 break; 1185 case (DESC_ERR_K_NOT_NORMAL | DESC_ERR_DESC_OFFSET_ERR): 1186 rte_bbdev_log(ERR, "Block_size_k is not a normal value within normal range and queue offset error"); 1187 break; 1188 case (DESC_ERR_KPAI_NOT_NORMAL | DESC_ERR_DESC_OFFSET_ERR): 1189 rte_bbdev_log(ERR, "Three_kpai is not a normal value for UL only and queue offset error"); 1190 break; 1191 case DESC_ERR_DESC_READ_FAIL: 1192 rte_bbdev_log(ERR, "Unsuccessful completion for descriptor read"); 1193 break; 1194 case DESC_ERR_DESC_READ_TIMEOUT: 1195 rte_bbdev_log(ERR, "Descriptor read time-out"); 1196 break; 1197 case DESC_ERR_DESC_READ_TLP_POISONED: 1198 rte_bbdev_log(ERR, "Descriptor read TLP poisoned"); 1199 break; 1200 case DESC_ERR_CB_READ_FAIL: 1201 rte_bbdev_log(ERR, "Unsuccessful completion for code block"); 1202 break; 1203 case DESC_ERR_CB_READ_TIMEOUT: 1204 rte_bbdev_log(ERR, "Code block read time-out"); 1205 break; 1206 case DESC_ERR_CB_READ_TLP_POISONED: 1207 rte_bbdev_log(ERR, "Code block read TLP poisoned"); 1208 break; 1209 default: 1210 rte_bbdev_log(ERR, "Descriptor error unknown error code %u", 1211 error_code); 1212 break; 1213 } 1214 return 1; 1215 } 1216 1217 /** 1218 * Set DMA descriptor for encode operation (1 Code Block) 1219 * 1220 * @param op 1221 * Pointer to a single encode operation. 1222 * @param desc 1223 * Pointer to DMA descriptor. 1224 * @param input 1225 * Pointer to pointer to input data which will be decoded. 1226 * @param k 1227 * K value (length of input in bits). 1228 * @param e 1229 * E value (length of output in bits). 1230 * @param ncb 1231 * Ncb value (size of the soft buffer). 1232 * @param out_length 1233 * Length of output buffer 1234 * @param in_offset 1235 * Input offset in rte_mbuf structure. It is used for calculating the point 1236 * where data is starting. 1237 * @param out_offset 1238 * Output offset in rte_mbuf structure. It is used for calculating the point 1239 * where hard output data will be stored. 1240 * @param cbs_in_op 1241 * Number of CBs contained in one operation. 1242 */ 1243 static inline int 1244 fpga_dma_desc_te_fill(struct rte_bbdev_enc_op *op, 1245 struct fpga_dma_enc_desc *desc, struct rte_mbuf *input, 1246 struct rte_mbuf *output, uint16_t k, uint16_t e, uint16_t ncb, 1247 uint32_t in_offset, uint32_t out_offset, uint16_t desc_offset, 1248 uint8_t cbs_in_op) 1249 1250 { 1251 /* reset */ 1252 desc->done = 0; 1253 desc->crc_en = check_bit(op->turbo_enc.op_flags, 1254 RTE_BBDEV_TURBO_CRC_24B_ATTACH); 1255 desc->bypass_rm = !check_bit(op->turbo_enc.op_flags, 1256 RTE_BBDEV_TURBO_RATE_MATCH); 1257 desc->k = k; 1258 desc->e = e; 1259 desc->ncb = ncb; 1260 desc->rv = op->turbo_enc.rv_index; 1261 desc->offset = desc_offset; 1262 /* Set inbound data buffer address */ 1263 desc->in_addr_hi = (uint32_t)( 1264 rte_pktmbuf_iova_offset(input, in_offset) >> 32); 1265 desc->in_addr_lw = (uint32_t)( 1266 rte_pktmbuf_iova_offset(input, in_offset)); 1267 1268 desc->out_addr_hi = (uint32_t)( 1269 rte_pktmbuf_iova_offset(output, out_offset) >> 32); 1270 desc->out_addr_lw = (uint32_t)( 1271 rte_pktmbuf_iova_offset(output, out_offset)); 1272 1273 /* Save software context needed for dequeue */ 1274 desc->op_addr = op; 1275 1276 /* Set total number of CBs in an op */ 1277 desc->cbs_in_op = cbs_in_op; 1278 1279 return 0; 1280 } 1281 1282 /** 1283 * Set DMA descriptor for encode operation (1 Code Block) 1284 * 1285 * @param op 1286 * Pointer to a single encode operation. 1287 * @param desc 1288 * Pointer to DMA descriptor. 1289 * @param input 1290 * Pointer to pointer to input data which will be decoded. 1291 * @param in_length 1292 * Length of an input. 1293 * @param k 1294 * K value (length of an output in bits). 1295 * @param in_offset 1296 * Input offset in rte_mbuf structure. It is used for calculating the point 1297 * where data is starting. 1298 * @param out_offset 1299 * Output offset in rte_mbuf structure. It is used for calculating the point 1300 * where hard output data will be stored. 1301 * @param cbs_in_op 1302 * Number of CBs contained in one operation. 1303 */ 1304 static inline int 1305 fpga_dma_desc_td_fill(struct rte_bbdev_dec_op *op, 1306 struct fpga_dma_dec_desc *desc, struct rte_mbuf *input, 1307 struct rte_mbuf *output, uint16_t in_length, uint16_t k, 1308 uint32_t in_offset, uint32_t out_offset, uint16_t desc_offset, 1309 uint8_t cbs_in_op) 1310 { 1311 /* reset */ 1312 desc->done = 0; 1313 /* Set inbound data buffer address */ 1314 desc->in_addr_hi = (uint32_t)( 1315 rte_pktmbuf_iova_offset(input, in_offset) >> 32); 1316 desc->in_addr_lw = (uint32_t)( 1317 rte_pktmbuf_iova_offset(input, in_offset)); 1318 desc->in_len = in_length; 1319 desc->k = k; 1320 desc->crc_type = !check_bit(op->turbo_dec.op_flags, 1321 RTE_BBDEV_TURBO_CRC_TYPE_24B); 1322 if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1323 && !check_bit(op->turbo_dec.op_flags, 1324 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1325 desc->drop_crc = 1; 1326 desc->max_iter = op->turbo_dec.iter_max * 2; 1327 desc->offset = desc_offset; 1328 desc->out_addr_hi = (uint32_t)( 1329 rte_pktmbuf_iova_offset(output, out_offset) >> 32); 1330 desc->out_addr_lw = (uint32_t)( 1331 rte_pktmbuf_iova_offset(output, out_offset)); 1332 1333 /* Save software context needed for dequeue */ 1334 desc->op_addr = op; 1335 1336 /* Set total number of CBs in an op */ 1337 desc->cbs_in_op = cbs_in_op; 1338 1339 return 0; 1340 } 1341 1342 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1343 /* Validates turbo encoder parameters */ 1344 static int 1345 validate_enc_op(struct rte_bbdev_enc_op *op) 1346 { 1347 struct rte_bbdev_op_turbo_enc *turbo_enc = &op->turbo_enc; 1348 struct rte_bbdev_op_enc_turbo_cb_params *cb = NULL; 1349 struct rte_bbdev_op_enc_turbo_tb_params *tb = NULL; 1350 uint16_t kw, kw_neg, kw_pos; 1351 1352 if (turbo_enc->input.length > 1353 RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { 1354 rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", 1355 turbo_enc->input.length, 1356 RTE_BBDEV_TURBO_MAX_TB_SIZE); 1357 op->status = 1 << RTE_BBDEV_DATA_ERROR; 1358 return -1; 1359 } 1360 1361 if (op->mempool == NULL) { 1362 rte_bbdev_log(ERR, "Invalid mempool pointer"); 1363 return -1; 1364 } 1365 if (turbo_enc->input.data == NULL) { 1366 rte_bbdev_log(ERR, "Invalid input pointer"); 1367 return -1; 1368 } 1369 if (turbo_enc->output.data == NULL) { 1370 rte_bbdev_log(ERR, "Invalid output pointer"); 1371 return -1; 1372 } 1373 if (turbo_enc->rv_index > 3) { 1374 rte_bbdev_log(ERR, 1375 "rv_index (%u) is out of range 0 <= value <= 3", 1376 turbo_enc->rv_index); 1377 return -1; 1378 } 1379 if (turbo_enc->code_block_mode != RTE_BBDEV_TRANSPORT_BLOCK && 1380 turbo_enc->code_block_mode != RTE_BBDEV_CODE_BLOCK) { 1381 rte_bbdev_log(ERR, 1382 "code_block_mode (%u) is out of range 0 <= value <= 1", 1383 turbo_enc->code_block_mode); 1384 return -1; 1385 } 1386 1387 if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1388 tb = &turbo_enc->tb_params; 1389 if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE 1390 || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE) 1391 && tb->c_neg > 0) { 1392 rte_bbdev_log(ERR, 1393 "k_neg (%u) is out of range %u <= value <= %u", 1394 tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1395 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1396 return -1; 1397 } 1398 if (tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE 1399 || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 1400 rte_bbdev_log(ERR, 1401 "k_pos (%u) is out of range %u <= value <= %u", 1402 tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1403 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1404 return -1; 1405 } 1406 if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1)) 1407 rte_bbdev_log(ERR, 1408 "c_neg (%u) is out of range 0 <= value <= %u", 1409 tb->c_neg, 1410 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1); 1411 if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) { 1412 rte_bbdev_log(ERR, 1413 "c (%u) is out of range 1 <= value <= %u", 1414 tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS); 1415 return -1; 1416 } 1417 if (tb->cab > tb->c) { 1418 rte_bbdev_log(ERR, 1419 "cab (%u) is greater than c (%u)", 1420 tb->cab, tb->c); 1421 return -1; 1422 } 1423 if ((tb->ea < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->ea % 2)) 1424 && tb->r < tb->cab) { 1425 rte_bbdev_log(ERR, 1426 "ea (%u) is less than %u or it is not even", 1427 tb->ea, RTE_BBDEV_TURBO_MIN_CB_SIZE); 1428 return -1; 1429 } 1430 if ((tb->eb < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->eb % 2)) 1431 && tb->c > tb->cab) { 1432 rte_bbdev_log(ERR, 1433 "eb (%u) is less than %u or it is not even", 1434 tb->eb, RTE_BBDEV_TURBO_MIN_CB_SIZE); 1435 return -1; 1436 } 1437 1438 kw_neg = 3 * RTE_ALIGN_CEIL(tb->k_neg + 4, 1439 RTE_BBDEV_TURBO_C_SUBBLOCK); 1440 if (tb->ncb_neg < tb->k_neg || tb->ncb_neg > kw_neg) { 1441 rte_bbdev_log(ERR, 1442 "ncb_neg (%u) is out of range (%u) k_neg <= value <= (%u) kw_neg", 1443 tb->ncb_neg, tb->k_neg, kw_neg); 1444 return -1; 1445 } 1446 1447 kw_pos = 3 * RTE_ALIGN_CEIL(tb->k_pos + 4, 1448 RTE_BBDEV_TURBO_C_SUBBLOCK); 1449 if (tb->ncb_pos < tb->k_pos || tb->ncb_pos > kw_pos) { 1450 rte_bbdev_log(ERR, 1451 "ncb_pos (%u) is out of range (%u) k_pos <= value <= (%u) kw_pos", 1452 tb->ncb_pos, tb->k_pos, kw_pos); 1453 return -1; 1454 } 1455 if (tb->r > (tb->c - 1)) { 1456 rte_bbdev_log(ERR, 1457 "r (%u) is greater than c - 1 (%u)", 1458 tb->r, tb->c - 1); 1459 return -1; 1460 } 1461 } else { 1462 cb = &turbo_enc->cb_params; 1463 if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE 1464 || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 1465 rte_bbdev_log(ERR, 1466 "k (%u) is out of range %u <= value <= %u", 1467 cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1468 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1469 return -1; 1470 } 1471 1472 if (cb->e < RTE_BBDEV_TURBO_MIN_CB_SIZE || (cb->e % 2)) { 1473 rte_bbdev_log(ERR, 1474 "e (%u) is less than %u or it is not even", 1475 cb->e, RTE_BBDEV_TURBO_MIN_CB_SIZE); 1476 return -1; 1477 } 1478 1479 kw = RTE_ALIGN_CEIL(cb->k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; 1480 if (cb->ncb < cb->k || cb->ncb > kw) { 1481 rte_bbdev_log(ERR, 1482 "ncb (%u) is out of range (%u) k <= value <= (%u) kw", 1483 cb->ncb, cb->k, kw); 1484 return -1; 1485 } 1486 } 1487 1488 return 0; 1489 } 1490 #endif 1491 1492 static inline char * 1493 mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) 1494 { 1495 if (unlikely(len > rte_pktmbuf_tailroom(m))) 1496 return NULL; 1497 1498 char *tail = (char *)m->buf_addr + m->data_off + m->data_len; 1499 m->data_len = (uint16_t)(m->data_len + len); 1500 m_head->pkt_len = (m_head->pkt_len + len); 1501 return tail; 1502 } 1503 1504 static inline int 1505 enqueue_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op *op, 1506 uint16_t desc_offset) 1507 { 1508 union fpga_dma_desc *desc; 1509 struct rte_mbuf *input; 1510 struct rte_mbuf *output; 1511 int ret; 1512 uint16_t k, e, ncb, ring_offset; 1513 uint32_t total_left, in_length, out_length, in_offset, out_offset; 1514 1515 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1516 /* Validate op structure */ 1517 if (validate_enc_op(op) == -1) { 1518 rte_bbdev_log(ERR, "Turbo encoder validation failed"); 1519 return -EINVAL; 1520 } 1521 #endif 1522 1523 input = op->turbo_enc.input.data; 1524 output = op->turbo_enc.output.data; 1525 in_offset = op->turbo_enc.input.offset; 1526 out_offset = op->turbo_enc.output.offset; 1527 total_left = op->turbo_enc.input.length; 1528 k = op->turbo_enc.cb_params.k; 1529 e = op->turbo_enc.cb_params.e; 1530 ncb = op->turbo_enc.cb_params.ncb; 1531 1532 if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH)) 1533 in_length = ((k - 24) >> 3); 1534 else 1535 in_length = k >> 3; 1536 1537 if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH)) 1538 out_length = (e + 7) >> 3; 1539 else 1540 out_length = (k >> 3) * 3 + 2; 1541 1542 mbuf_append(output, output, out_length); 1543 1544 /* Offset into the ring */ 1545 ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); 1546 /* Setup DMA Descriptor */ 1547 desc = q->ring_addr + ring_offset; 1548 1549 ret = fpga_dma_desc_te_fill(op, &desc->enc_req, input, output, k, e, 1550 ncb, in_offset, out_offset, ring_offset, 1); 1551 if (unlikely(ret < 0)) 1552 return ret; 1553 1554 /* Update lengths */ 1555 total_left -= in_length; 1556 op->turbo_enc.output.length += out_length; 1557 1558 if (total_left > 0) { 1559 rte_bbdev_log(ERR, 1560 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1561 total_left, in_length); 1562 return -1; 1563 } 1564 1565 return 1; 1566 } 1567 1568 static inline int 1569 enqueue_enc_one_op_tb(struct fpga_queue *q, struct rte_bbdev_enc_op *op, 1570 uint16_t desc_offset, uint8_t cbs_in_op) 1571 { 1572 union fpga_dma_desc *desc; 1573 struct rte_mbuf *input, *output_head, *output; 1574 int ret; 1575 uint8_t r, c, crc24_bits = 0; 1576 uint16_t k, e, ncb, ring_offset; 1577 uint32_t mbuf_total_left, in_length, out_length, in_offset, out_offset; 1578 uint32_t seg_total_left; 1579 uint16_t current_enqueued_cbs = 0; 1580 1581 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1582 /* Validate op structure */ 1583 if (validate_enc_op(op) == -1) { 1584 rte_bbdev_log(ERR, "Turbo encoder validation failed"); 1585 return -EINVAL; 1586 } 1587 #endif 1588 1589 input = op->turbo_enc.input.data; 1590 output_head = output = op->turbo_enc.output.data; 1591 in_offset = op->turbo_enc.input.offset; 1592 out_offset = op->turbo_enc.output.offset; 1593 mbuf_total_left = op->turbo_enc.input.length; 1594 1595 c = op->turbo_enc.tb_params.c; 1596 r = op->turbo_enc.tb_params.r; 1597 1598 if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH)) 1599 crc24_bits = 24; 1600 1601 while (mbuf_total_left > 0 && r < c && input != NULL) { 1602 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 1603 1604 e = (r < op->turbo_enc.tb_params.cab) ? 1605 op->turbo_enc.tb_params.ea : 1606 op->turbo_enc.tb_params.eb; 1607 k = (r < op->turbo_enc.tb_params.c_neg) ? 1608 op->turbo_enc.tb_params.k_neg : 1609 op->turbo_enc.tb_params.k_pos; 1610 ncb = (r < op->turbo_enc.tb_params.c_neg) ? 1611 op->turbo_enc.tb_params.ncb_neg : 1612 op->turbo_enc.tb_params.ncb_pos; 1613 1614 in_length = ((k - crc24_bits) >> 3); 1615 1616 if (check_bit(op->turbo_enc.op_flags, 1617 RTE_BBDEV_TURBO_RATE_MATCH)) 1618 out_length = (e + 7) >> 3; 1619 else 1620 out_length = (k >> 3) * 3 + 2; 1621 1622 mbuf_append(output_head, output, out_length); 1623 1624 /* Setup DMA Descriptor */ 1625 ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); 1626 desc = q->ring_addr + ring_offset; 1627 ret = fpga_dma_desc_te_fill(op, &desc->enc_req, input, output, 1628 k, e, ncb, in_offset, out_offset, ring_offset, 1629 cbs_in_op); 1630 if (unlikely(ret < 0)) 1631 return ret; 1632 1633 rte_bbdev_log_debug("DMA request desc %p", desc); 1634 1635 /* Update lengths */ 1636 op->turbo_enc.output.length += out_length; 1637 mbuf_total_left -= in_length; 1638 1639 /* Update offsets */ 1640 if (seg_total_left == in_length) { 1641 /* Go to the next mbuf */ 1642 input = input->next; 1643 output = output->next; 1644 in_offset = 0; 1645 out_offset = 0; 1646 } else { 1647 in_offset += in_length; 1648 out_offset += out_length; 1649 } 1650 1651 r++; 1652 desc_offset++; 1653 current_enqueued_cbs++; 1654 } 1655 1656 if (mbuf_total_left > 0) { 1657 rte_bbdev_log(ERR, 1658 "Some date still left for processing: mbuf_total_left = %u", 1659 mbuf_total_left); 1660 return -1; 1661 } 1662 1663 return current_enqueued_cbs; 1664 } 1665 1666 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1667 /* Validates turbo decoder parameters */ 1668 static int 1669 validate_dec_op(struct rte_bbdev_dec_op *op) 1670 { 1671 struct rte_bbdev_op_turbo_dec *turbo_dec = &op->turbo_dec; 1672 struct rte_bbdev_op_dec_turbo_cb_params *cb = NULL; 1673 struct rte_bbdev_op_dec_turbo_tb_params *tb = NULL; 1674 1675 if (op->mempool == NULL) { 1676 rte_bbdev_log(ERR, "Invalid mempool pointer"); 1677 return -1; 1678 } 1679 if (turbo_dec->input.data == NULL) { 1680 rte_bbdev_log(ERR, "Invalid input pointer"); 1681 return -1; 1682 } 1683 if (turbo_dec->hard_output.data == NULL) { 1684 rte_bbdev_log(ERR, "Invalid hard_output pointer"); 1685 return -1; 1686 } 1687 if (turbo_dec->rv_index > 3) { 1688 rte_bbdev_log(ERR, 1689 "rv_index (%u) is out of range 0 <= value <= 3", 1690 turbo_dec->rv_index); 1691 return -1; 1692 } 1693 if (turbo_dec->iter_min < 1) { 1694 rte_bbdev_log(ERR, 1695 "iter_min (%u) is less than 1", 1696 turbo_dec->iter_min); 1697 return -1; 1698 } 1699 if (turbo_dec->iter_max <= 2) { 1700 rte_bbdev_log(ERR, 1701 "iter_max (%u) is less than or equal to 2", 1702 turbo_dec->iter_max); 1703 return -1; 1704 } 1705 if (turbo_dec->iter_min > turbo_dec->iter_max) { 1706 rte_bbdev_log(ERR, 1707 "iter_min (%u) is greater than iter_max (%u)", 1708 turbo_dec->iter_min, turbo_dec->iter_max); 1709 return -1; 1710 } 1711 if (turbo_dec->code_block_mode != RTE_BBDEV_TRANSPORT_BLOCK && 1712 turbo_dec->code_block_mode != RTE_BBDEV_CODE_BLOCK) { 1713 rte_bbdev_log(ERR, 1714 "code_block_mode (%u) is out of range 0 <= value <= 1", 1715 turbo_dec->code_block_mode); 1716 return -1; 1717 } 1718 1719 if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1720 1721 if ((turbo_dec->op_flags & 1722 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP) && 1723 !(turbo_dec->op_flags & RTE_BBDEV_TURBO_CRC_TYPE_24B)) { 1724 rte_bbdev_log(ERR, 1725 "RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP should accompany RTE_BBDEV_TURBO_CRC_TYPE_24B"); 1726 return -1; 1727 } 1728 1729 tb = &turbo_dec->tb_params; 1730 if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE 1731 || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE) 1732 && tb->c_neg > 0) { 1733 rte_bbdev_log(ERR, 1734 "k_neg (%u) is out of range %u <= value <= %u", 1735 tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1736 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1737 return -1; 1738 } 1739 if ((tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE 1740 || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE) 1741 && tb->c > tb->c_neg) { 1742 rte_bbdev_log(ERR, 1743 "k_pos (%u) is out of range %u <= value <= %u", 1744 tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1745 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1746 return -1; 1747 } 1748 if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1)) 1749 rte_bbdev_log(ERR, 1750 "c_neg (%u) is out of range 0 <= value <= %u", 1751 tb->c_neg, 1752 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1); 1753 if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) { 1754 rte_bbdev_log(ERR, 1755 "c (%u) is out of range 1 <= value <= %u", 1756 tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS); 1757 return -1; 1758 } 1759 if (tb->cab > tb->c) { 1760 rte_bbdev_log(ERR, 1761 "cab (%u) is greater than c (%u)", 1762 tb->cab, tb->c); 1763 return -1; 1764 } 1765 } else { 1766 1767 if (turbo_dec->op_flags & RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP) { 1768 rte_bbdev_log(ERR, 1769 "RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP is invalid in CB-mode"); 1770 return -1; 1771 } 1772 1773 cb = &turbo_dec->cb_params; 1774 if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE 1775 || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { 1776 rte_bbdev_log(ERR, 1777 "k (%u) is out of range %u <= value <= %u", 1778 cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE, 1779 RTE_BBDEV_TURBO_MAX_CB_SIZE); 1780 return -1; 1781 } 1782 } 1783 1784 return 0; 1785 } 1786 #endif 1787 1788 static inline int 1789 enqueue_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op *op, 1790 uint16_t desc_offset) 1791 { 1792 union fpga_dma_desc *desc; 1793 struct rte_mbuf *input; 1794 struct rte_mbuf *output; 1795 int ret; 1796 uint16_t k, kw, ring_offset; 1797 uint32_t total_left, in_length, out_length, in_offset, out_offset; 1798 1799 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1800 /* Validate op structure */ 1801 if (validate_dec_op(op) == -1) { 1802 rte_bbdev_log(ERR, "Turbo decoder validation failed"); 1803 return -EINVAL; 1804 } 1805 #endif 1806 1807 input = op->turbo_dec.input.data; 1808 output = op->turbo_dec.hard_output.data; 1809 total_left = op->turbo_dec.input.length; 1810 in_offset = op->turbo_dec.input.offset; 1811 out_offset = op->turbo_dec.hard_output.offset; 1812 1813 k = op->turbo_dec.cb_params.k; 1814 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; 1815 in_length = kw; 1816 out_length = k >> 3; 1817 1818 mbuf_append(output, output, out_length); 1819 1820 /* Setup DMA Descriptor */ 1821 ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); 1822 desc = q->ring_addr + ring_offset; 1823 ret = fpga_dma_desc_td_fill(op, &desc->dec_req, input, output, 1824 in_length, k, in_offset, out_offset, ring_offset, 1); 1825 if (unlikely(ret < 0)) 1826 return ret; 1827 1828 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1829 print_dma_dec_desc_debug_info(desc); 1830 #endif 1831 1832 /* Update lengths */ 1833 total_left -= in_length; 1834 op->turbo_dec.hard_output.length += out_length; 1835 1836 if (total_left > 0) { 1837 rte_bbdev_log(ERR, 1838 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1839 total_left, in_length); 1840 return -1; 1841 } 1842 1843 return 1; 1844 } 1845 1846 1847 static inline int 1848 enqueue_dec_one_op_tb(struct fpga_queue *q, struct rte_bbdev_dec_op *op, 1849 uint16_t desc_offset, uint8_t cbs_in_op) 1850 { 1851 union fpga_dma_desc *desc; 1852 struct rte_mbuf *input, *output_head, *output; 1853 int ret; 1854 uint8_t r, c; 1855 uint16_t k, kw, in_length, out_length, ring_offset; 1856 uint32_t mbuf_total_left, seg_total_left, in_offset, out_offset; 1857 uint16_t current_enqueued_cbs = 0; 1858 uint16_t crc24_overlap = 0; 1859 1860 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1861 /* Validate op structure */ 1862 if (validate_dec_op(op) == -1) { 1863 rte_bbdev_log(ERR, "Turbo decoder validation failed"); 1864 return -EINVAL; 1865 } 1866 #endif 1867 1868 input = op->turbo_dec.input.data; 1869 output_head = output = op->turbo_dec.hard_output.data; 1870 mbuf_total_left = op->turbo_dec.input.length; 1871 in_offset = op->turbo_dec.input.offset; 1872 out_offset = op->turbo_dec.hard_output.offset; 1873 1874 if (!check_bit(op->turbo_dec.op_flags, 1875 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1876 crc24_overlap = 24; 1877 1878 c = op->turbo_dec.tb_params.c; 1879 r = op->turbo_dec.tb_params.r; 1880 1881 while (mbuf_total_left > 0 && r < c && input != NULL) { 1882 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 1883 k = (r < op->turbo_dec.tb_params.c_neg) ? 1884 op->turbo_dec.tb_params.k_neg : 1885 op->turbo_dec.tb_params.k_pos; 1886 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; 1887 1888 in_length = kw; 1889 out_length = (k - crc24_overlap) >> 3; 1890 1891 mbuf_append(output_head, output, out_length); 1892 1893 if (seg_total_left < in_length) { 1894 rte_bbdev_log(ERR, 1895 "Partial CB found in a TB. FPGA Driver doesn't support scatter-gather operations!"); 1896 return -1; 1897 } 1898 1899 /* Setup DMA Descriptor */ 1900 ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); 1901 desc = q->ring_addr + ring_offset; 1902 ret = fpga_dma_desc_td_fill(op, &desc->dec_req, input, output, 1903 in_length, k, in_offset, out_offset, 1904 ring_offset, cbs_in_op); 1905 if (unlikely(ret < 0)) 1906 return ret; 1907 1908 /* Update lengths */ 1909 ret = rte_pktmbuf_trim(op->turbo_dec.hard_output.data, 1910 (crc24_overlap >> 3)); 1911 #ifdef RTE_LIBRTE_BBDEV_DEBUG 1912 if (ret < 0) { 1913 rte_bbdev_log(ERR, 1914 "The length to remove is greater than the length of the last segment"); 1915 return -EINVAL; 1916 } 1917 #endif 1918 op->turbo_dec.hard_output.length += out_length; 1919 mbuf_total_left -= in_length; 1920 1921 /* Update offsets */ 1922 if (seg_total_left == in_length) { 1923 /* Go to the next mbuf */ 1924 input = input->next; 1925 output = output->next; 1926 in_offset = 0; 1927 out_offset = 0; 1928 } else { 1929 in_offset += in_length; 1930 out_offset += out_length; 1931 } 1932 1933 r++; 1934 desc_offset++; 1935 current_enqueued_cbs++; 1936 } 1937 1938 if (mbuf_total_left > 0) { 1939 rte_bbdev_log(ERR, 1940 "Some date still left for processing: mbuf_total_left = %u", 1941 mbuf_total_left); 1942 return -1; 1943 } 1944 1945 return current_enqueued_cbs; 1946 } 1947 1948 static uint16_t 1949 fpga_enqueue_enc(struct rte_bbdev_queue_data *q_data, 1950 struct rte_bbdev_enc_op **ops, uint16_t num) 1951 { 1952 uint8_t cbs_in_op; 1953 uint16_t i, total_enqueued_cbs = 0; 1954 int32_t avail; 1955 int enqueued_cbs; 1956 struct fpga_queue *q = q_data->queue_private; 1957 union fpga_dma_desc *desc; 1958 1959 /* Check if queue is not full */ 1960 if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == 1961 q->head_free_desc)) 1962 return 0; 1963 1964 /* Calculates available space */ 1965 avail = (q->head_free_desc > q->tail) ? 1966 q->head_free_desc - q->tail - 1 : 1967 q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; 1968 1969 for (i = 0; i < num; ++i) { 1970 if (ops[i]->turbo_enc.code_block_mode == 1971 RTE_BBDEV_TRANSPORT_BLOCK) { 1972 cbs_in_op = get_num_cbs_in_op_enc(&ops[i]->turbo_enc); 1973 /* Check if there is available space for further 1974 * processing 1975 */ 1976 if (unlikely(avail - cbs_in_op < 0)) 1977 break; 1978 avail -= cbs_in_op; 1979 enqueued_cbs = enqueue_enc_one_op_tb(q, ops[i], 1980 total_enqueued_cbs, cbs_in_op); 1981 } else { 1982 /* Check if there is available space for further 1983 * processing 1984 */ 1985 if (unlikely(avail - 1 < 0)) 1986 break; 1987 avail -= 1; 1988 enqueued_cbs = enqueue_enc_one_op_cb(q, ops[i], 1989 total_enqueued_cbs); 1990 } 1991 1992 if (enqueued_cbs < 0) 1993 break; 1994 1995 total_enqueued_cbs += enqueued_cbs; 1996 1997 rte_bbdev_log_debug("enqueuing enc ops [%d/%d] | head %d | tail %d", 1998 total_enqueued_cbs, num, 1999 q->head_free_desc, q->tail); 2000 } 2001 2002 /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt 2003 * only when all previous CBs were already processed. 2004 */ 2005 desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) 2006 & q->sw_ring_wrap_mask); 2007 desc->enc_req.irq_en = q->irq_enable; 2008 2009 fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); 2010 2011 /* Update stats */ 2012 q_data->queue_stats.enqueued_count += i; 2013 q_data->queue_stats.enqueue_err_count += num - i; 2014 2015 return i; 2016 } 2017 2018 static uint16_t 2019 fpga_enqueue_dec(struct rte_bbdev_queue_data *q_data, 2020 struct rte_bbdev_dec_op **ops, uint16_t num) 2021 { 2022 uint8_t cbs_in_op; 2023 uint16_t i, total_enqueued_cbs = 0; 2024 int32_t avail; 2025 int enqueued_cbs; 2026 struct fpga_queue *q = q_data->queue_private; 2027 union fpga_dma_desc *desc; 2028 2029 /* Check if queue is not full */ 2030 if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == 2031 q->head_free_desc)) 2032 return 0; 2033 2034 /* Calculates available space */ 2035 avail = (q->head_free_desc > q->tail) ? 2036 q->head_free_desc - q->tail - 1 : 2037 q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; 2038 2039 for (i = 0; i < num; ++i) { 2040 if (ops[i]->turbo_dec.code_block_mode == 2041 RTE_BBDEV_TRANSPORT_BLOCK) { 2042 cbs_in_op = get_num_cbs_in_op_dec(&ops[i]->turbo_dec); 2043 /* Check if there is available space for further 2044 * processing 2045 */ 2046 if (unlikely(avail - cbs_in_op < 0)) 2047 break; 2048 avail -= cbs_in_op; 2049 enqueued_cbs = enqueue_dec_one_op_tb(q, ops[i], 2050 total_enqueued_cbs, cbs_in_op); 2051 } else { 2052 /* Check if there is available space for further 2053 * processing 2054 */ 2055 if (unlikely(avail - 1 < 0)) 2056 break; 2057 avail -= 1; 2058 enqueued_cbs = enqueue_dec_one_op_cb(q, ops[i], 2059 total_enqueued_cbs); 2060 } 2061 2062 if (enqueued_cbs < 0) 2063 break; 2064 2065 total_enqueued_cbs += enqueued_cbs; 2066 2067 rte_bbdev_log_debug("enqueuing dec ops [%d/%d] | head %d | tail %d", 2068 total_enqueued_cbs, num, 2069 q->head_free_desc, q->tail); 2070 } 2071 2072 /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt 2073 * only when all previous CBs were already processed. 2074 */ 2075 desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) 2076 & q->sw_ring_wrap_mask); 2077 desc->dec_req.irq_en = q->irq_enable; 2078 2079 fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); 2080 2081 /* Update stats */ 2082 q_data->queue_stats.enqueued_count += i; 2083 q_data->queue_stats.enqueue_err_count += num - i; 2084 2085 return i; 2086 } 2087 2088 static inline int 2089 dequeue_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, 2090 uint16_t desc_offset) 2091 { 2092 union fpga_dma_desc *desc; 2093 int desc_error = 0; 2094 2095 /* Set current desc */ 2096 desc = q->ring_addr + ((q->head_free_desc + desc_offset) 2097 & q->sw_ring_wrap_mask); 2098 2099 /*check if done */ 2100 if (desc->enc_req.done == 0) 2101 return -1; 2102 2103 /* make sure the response is read atomically */ 2104 rte_smp_rmb(); 2105 2106 rte_bbdev_log_debug("DMA response desc %p", desc); 2107 2108 *op = desc->enc_req.op_addr; 2109 /* Check the descriptor error field, return 1 on error */ 2110 desc_error = check_desc_error(desc->enc_req.error); 2111 (*op)->status = desc_error << RTE_BBDEV_DATA_ERROR; 2112 2113 return 1; 2114 } 2115 2116 static inline int 2117 dequeue_enc_one_op_tb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, 2118 uint16_t desc_offset) 2119 { 2120 union fpga_dma_desc *desc; 2121 uint8_t cbs_in_op, cb_idx; 2122 int desc_error = 0; 2123 int status = 0; 2124 2125 /* Set descriptor */ 2126 desc = q->ring_addr + ((q->head_free_desc + desc_offset) 2127 & q->sw_ring_wrap_mask); 2128 2129 /* Verify if done bit is set */ 2130 if (desc->enc_req.done == 0) 2131 return -1; 2132 2133 /* Make sure the response is read atomically */ 2134 rte_smp_rmb(); 2135 2136 /* Verify if done bit in all CBs is set */ 2137 cbs_in_op = desc->enc_req.cbs_in_op; 2138 for (cb_idx = 1; cb_idx < cbs_in_op; ++cb_idx) { 2139 desc = q->ring_addr + ((q->head_free_desc + desc_offset + 2140 cb_idx) & q->sw_ring_wrap_mask); 2141 if (desc->enc_req.done == 0) 2142 return -1; 2143 } 2144 2145 /* Make sure the response is read atomically */ 2146 rte_smp_rmb(); 2147 2148 for (cb_idx = 0; cb_idx < cbs_in_op; ++cb_idx) { 2149 desc = q->ring_addr + ((q->head_free_desc + desc_offset + 2150 cb_idx) & q->sw_ring_wrap_mask); 2151 /* Check the descriptor error field, return 1 on error */ 2152 desc_error = check_desc_error(desc->enc_req.error); 2153 status |= desc_error << RTE_BBDEV_DATA_ERROR; 2154 rte_bbdev_log_debug("DMA response desc %p", desc); 2155 } 2156 2157 *op = desc->enc_req.op_addr; 2158 (*op)->status = status; 2159 return cbs_in_op; 2160 } 2161 2162 static inline int 2163 dequeue_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, 2164 uint16_t desc_offset) 2165 { 2166 union fpga_dma_desc *desc; 2167 int desc_error = 0; 2168 /* Set descriptor */ 2169 desc = q->ring_addr + ((q->head_free_desc + desc_offset) 2170 & q->sw_ring_wrap_mask); 2171 2172 /* Verify done bit is set */ 2173 if (desc->dec_req.done == 0) 2174 return -1; 2175 2176 /* make sure the response is read atomically */ 2177 rte_smp_rmb(); 2178 2179 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2180 print_dma_dec_desc_debug_info(desc); 2181 2182 #endif 2183 2184 *op = desc->dec_req.op_addr; 2185 /* FPGA reports in half-iterations, from 0 to 31. get ceiling */ 2186 (*op)->turbo_dec.iter_count = (desc->dec_req.iter + 2) >> 1; 2187 /* crc_pass = 0 when decoder fails */ 2188 (*op)->status = !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; 2189 /* Check the descriptor error field, return 1 on error */ 2190 desc_error = check_desc_error(desc->enc_req.error); 2191 (*op)->status |= desc_error << RTE_BBDEV_DATA_ERROR; 2192 return 1; 2193 } 2194 2195 static inline int 2196 dequeue_dec_one_op_tb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, 2197 uint16_t desc_offset) 2198 { 2199 union fpga_dma_desc *desc; 2200 uint8_t cbs_in_op, cb_idx, iter_count = 0; 2201 int status = 0; 2202 int desc_error = 0; 2203 /* Set descriptor */ 2204 desc = q->ring_addr + ((q->head_free_desc + desc_offset) 2205 & q->sw_ring_wrap_mask); 2206 2207 /* Verify if done bit is set */ 2208 if (desc->dec_req.done == 0) 2209 return -1; 2210 2211 /* Make sure the response is read atomically */ 2212 rte_smp_rmb(); 2213 2214 /* Verify if done bit in all CBs is set */ 2215 cbs_in_op = desc->dec_req.cbs_in_op; 2216 for (cb_idx = 1; cb_idx < cbs_in_op; ++cb_idx) { 2217 desc = q->ring_addr + ((q->head_free_desc + desc_offset + 2218 cb_idx) & q->sw_ring_wrap_mask); 2219 if (desc->dec_req.done == 0) 2220 return -1; 2221 } 2222 2223 /* Make sure the response is read atomically */ 2224 rte_smp_rmb(); 2225 2226 for (cb_idx = 0; cb_idx < cbs_in_op; ++cb_idx) { 2227 desc = q->ring_addr + ((q->head_free_desc + desc_offset + 2228 cb_idx) & q->sw_ring_wrap_mask); 2229 /* get max iter_count for all CBs in op */ 2230 iter_count = RTE_MAX(iter_count, (uint8_t) desc->dec_req.iter); 2231 /* crc_pass = 0 when decoder fails, one fails all */ 2232 status |= !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; 2233 /* Check the descriptor error field, return 1 on error */ 2234 desc_error = check_desc_error(desc->enc_req.error); 2235 status |= desc_error << RTE_BBDEV_DATA_ERROR; 2236 rte_bbdev_log_debug("DMA response desc %p", desc); 2237 } 2238 2239 *op = desc->dec_req.op_addr; 2240 2241 /* FPGA reports in half-iterations, get ceiling */ 2242 (*op)->turbo_dec.iter_count = (iter_count + 2) >> 1; 2243 (*op)->status = status; 2244 return cbs_in_op; 2245 } 2246 2247 static uint16_t 2248 fpga_dequeue_enc(struct rte_bbdev_queue_data *q_data, 2249 struct rte_bbdev_enc_op **ops, uint16_t num) 2250 { 2251 struct fpga_queue *q = q_data->queue_private; 2252 uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; 2253 uint16_t i; 2254 uint16_t dequeued_cbs = 0; 2255 struct rte_bbdev_enc_op *op; 2256 int ret; 2257 2258 for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { 2259 op = (q->ring_addr + ((q->head_free_desc + dequeued_cbs) 2260 & q->sw_ring_wrap_mask))->enc_req.op_addr; 2261 if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 2262 ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs); 2263 else 2264 ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs); 2265 2266 if (ret < 0) 2267 break; 2268 2269 dequeued_cbs += ret; 2270 2271 rte_bbdev_log_debug("dequeuing enc ops [%d/%d] | head %d | tail %d", 2272 dequeued_cbs, num, q->head_free_desc, q->tail); 2273 } 2274 2275 /* Update head */ 2276 q->head_free_desc = (q->head_free_desc + dequeued_cbs) & 2277 q->sw_ring_wrap_mask; 2278 2279 /* Update stats */ 2280 q_data->queue_stats.dequeued_count += i; 2281 2282 return i; 2283 } 2284 2285 static uint16_t 2286 fpga_dequeue_dec(struct rte_bbdev_queue_data *q_data, 2287 struct rte_bbdev_dec_op **ops, uint16_t num) 2288 { 2289 struct fpga_queue *q = q_data->queue_private; 2290 uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; 2291 uint16_t i; 2292 uint16_t dequeued_cbs = 0; 2293 struct rte_bbdev_dec_op *op; 2294 int ret; 2295 2296 for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { 2297 op = (q->ring_addr + ((q->head_free_desc + dequeued_cbs) 2298 & q->sw_ring_wrap_mask))->dec_req.op_addr; 2299 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 2300 ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs); 2301 else 2302 ret = dequeue_dec_one_op_cb(q, &ops[i], dequeued_cbs); 2303 2304 if (ret < 0) 2305 break; 2306 2307 dequeued_cbs += ret; 2308 2309 rte_bbdev_log_debug("dequeuing dec ops [%d/%d] | head %d | tail %d", 2310 dequeued_cbs, num, q->head_free_desc, q->tail); 2311 } 2312 2313 /* Update head */ 2314 q->head_free_desc = (q->head_free_desc + dequeued_cbs) & 2315 q->sw_ring_wrap_mask; 2316 2317 /* Update stats */ 2318 q_data->queue_stats.dequeued_count += i; 2319 2320 return i; 2321 } 2322 2323 /* Initialization Function */ 2324 static void 2325 fpga_lte_fec_init(struct rte_bbdev *dev, struct rte_pci_driver *drv) 2326 { 2327 struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); 2328 2329 dev->dev_ops = &fpga_ops; 2330 dev->enqueue_enc_ops = fpga_enqueue_enc; 2331 dev->enqueue_dec_ops = fpga_enqueue_dec; 2332 dev->dequeue_enc_ops = fpga_dequeue_enc; 2333 dev->dequeue_dec_ops = fpga_dequeue_dec; 2334 2335 ((struct fpga_lte_fec_device *) dev->data->dev_private)->pf_device = 2336 !strcmp(drv->driver.name, 2337 RTE_STR(FPGA_LTE_FEC_PF_DRIVER_NAME)); 2338 ((struct fpga_lte_fec_device *) dev->data->dev_private)->mmio_base = 2339 pci_dev->mem_resource[0].addr; 2340 2341 rte_bbdev_log_debug( 2342 "Init device %s [%s] @ virtaddr %p phyaddr %#"PRIx64, 2343 drv->driver.name, dev->data->name, 2344 (void *)pci_dev->mem_resource[0].addr, 2345 pci_dev->mem_resource[0].phys_addr); 2346 } 2347 2348 static int 2349 fpga_lte_fec_probe(struct rte_pci_driver *pci_drv, 2350 struct rte_pci_device *pci_dev) 2351 { 2352 struct rte_bbdev *bbdev = NULL; 2353 char dev_name[RTE_BBDEV_NAME_MAX_LEN]; 2354 2355 if (pci_dev == NULL) { 2356 rte_bbdev_log(ERR, "NULL PCI device"); 2357 return -EINVAL; 2358 } 2359 2360 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 2361 2362 /* Allocate memory to be used privately by drivers */ 2363 bbdev = rte_bbdev_allocate(pci_dev->device.name); 2364 if (bbdev == NULL) 2365 return -ENODEV; 2366 2367 /* allocate device private memory */ 2368 bbdev->data->dev_private = rte_zmalloc_socket(dev_name, 2369 sizeof(struct fpga_lte_fec_device), RTE_CACHE_LINE_SIZE, 2370 pci_dev->device.numa_node); 2371 2372 if (bbdev->data->dev_private == NULL) { 2373 rte_bbdev_log(CRIT, 2374 "Allocate of %zu bytes for device \"%s\" failed", 2375 sizeof(struct fpga_lte_fec_device), dev_name); 2376 rte_bbdev_release(bbdev); 2377 return -ENOMEM; 2378 } 2379 2380 /* Fill HW specific part of device structure */ 2381 bbdev->device = &pci_dev->device; 2382 bbdev->intr_handle = pci_dev->intr_handle; 2383 bbdev->data->socket_id = pci_dev->device.numa_node; 2384 2385 /* Invoke FEC FPGA device initialization function */ 2386 fpga_lte_fec_init(bbdev, pci_drv); 2387 2388 rte_bbdev_log_debug("bbdev id = %u [%s]", 2389 bbdev->data->dev_id, dev_name); 2390 2391 struct fpga_lte_fec_device *d = bbdev->data->dev_private; 2392 uint32_t version_id = fpga_reg_read_32(d->mmio_base, 2393 FPGA_LTE_FEC_VERSION_ID); 2394 rte_bbdev_log(INFO, "FEC FPGA RTL v%u.%u", 2395 ((uint16_t)(version_id >> 16)), ((uint16_t)version_id)); 2396 2397 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2398 if (!strcmp(pci_drv->driver.name, 2399 RTE_STR(FPGA_LTE_FEC_PF_DRIVER_NAME))) 2400 print_static_reg_debug_info(d->mmio_base); 2401 #endif 2402 return 0; 2403 } 2404 2405 static int 2406 fpga_lte_fec_remove(struct rte_pci_device *pci_dev) 2407 { 2408 struct rte_bbdev *bbdev; 2409 int ret; 2410 uint8_t dev_id; 2411 2412 if (pci_dev == NULL) 2413 return -EINVAL; 2414 2415 /* Find device */ 2416 bbdev = rte_bbdev_get_named_dev(pci_dev->device.name); 2417 if (bbdev == NULL) { 2418 rte_bbdev_log(CRIT, 2419 "Couldn't find HW dev \"%s\" to uninitialise it", 2420 pci_dev->device.name); 2421 return -ENODEV; 2422 } 2423 dev_id = bbdev->data->dev_id; 2424 2425 /* free device private memory before close */ 2426 rte_free(bbdev->data->dev_private); 2427 2428 /* Close device */ 2429 ret = rte_bbdev_close(dev_id); 2430 if (ret < 0) 2431 rte_bbdev_log(ERR, 2432 "Device %i failed to close during uninit: %i", 2433 dev_id, ret); 2434 2435 /* release bbdev from library */ 2436 ret = rte_bbdev_release(bbdev); 2437 if (ret) 2438 rte_bbdev_log(ERR, "Device %i failed to uninit: %i", dev_id, 2439 ret); 2440 2441 rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id); 2442 2443 return 0; 2444 } 2445 2446 static inline void 2447 set_default_fpga_conf(struct rte_fpga_lte_fec_conf *def_conf) 2448 { 2449 /* clear default configuration before initialization */ 2450 memset(def_conf, 0, sizeof(struct rte_fpga_lte_fec_conf)); 2451 /* Set pf mode to true */ 2452 def_conf->pf_mode_en = true; 2453 2454 /* Set ratio between UL and DL to 1:1 (unit of weight is 3 CBs) */ 2455 def_conf->ul_bandwidth = 3; 2456 def_conf->dl_bandwidth = 3; 2457 2458 /* Set Load Balance Factor to 64 */ 2459 def_conf->dl_load_balance = 64; 2460 def_conf->ul_load_balance = 64; 2461 } 2462 2463 /* Initial configuration of FPGA LTE FEC device */ 2464 int 2465 rte_fpga_lte_fec_configure(const char *dev_name, 2466 const struct rte_fpga_lte_fec_conf *conf) 2467 { 2468 uint32_t payload_32, address; 2469 uint16_t payload_16; 2470 uint8_t payload_8; 2471 uint16_t q_id, vf_id, total_q_id, total_ul_q_id, total_dl_q_id; 2472 struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); 2473 struct rte_fpga_lte_fec_conf def_conf; 2474 2475 if (bbdev == NULL) { 2476 rte_bbdev_log(ERR, 2477 "Invalid dev_name (%s), or device is not yet initialised", 2478 dev_name); 2479 return -ENODEV; 2480 } 2481 2482 struct fpga_lte_fec_device *d = bbdev->data->dev_private; 2483 2484 if (conf == NULL) { 2485 rte_bbdev_log(ERR, 2486 "FPGA Configuration was not provided. Default configuration will be loaded."); 2487 set_default_fpga_conf(&def_conf); 2488 conf = &def_conf; 2489 } 2490 2491 /* 2492 * Configure UL:DL ratio. 2493 * [7:0]: UL weight 2494 * [15:8]: DL weight 2495 */ 2496 payload_16 = (conf->dl_bandwidth << 8) | conf->ul_bandwidth; 2497 address = FPGA_LTE_FEC_CONFIGURATION; 2498 fpga_reg_write_16(d->mmio_base, address, payload_16); 2499 2500 /* Clear all queues registers */ 2501 payload_32 = FPGA_INVALID_HW_QUEUE_ID; 2502 for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { 2503 address = (q_id << 2) + FPGA_LTE_FEC_QUEUE_MAP; 2504 fpga_reg_write_32(d->mmio_base, address, payload_32); 2505 } 2506 2507 /* 2508 * If PF mode is enabled allocate all queues for PF only. 2509 * 2510 * For VF mode each VF can have different number of UL and DL queues. 2511 * Total number of queues to configure cannot exceed FPGA 2512 * capabilities - 64 queues - 32 queues for UL and 32 queues for DL. 2513 * Queues mapping is done according to configuration: 2514 * 2515 * UL queues: 2516 * | Q_ID | VF_ID | 2517 * | 0 | 0 | 2518 * | ... | 0 | 2519 * | conf->vf_dl_queues_number[0] - 1 | 0 | 2520 * | conf->vf_dl_queues_number[0] | 1 | 2521 * | ... | 1 | 2522 * | conf->vf_dl_queues_number[1] - 1 | 1 | 2523 * | ... | ... | 2524 * | conf->vf_dl_queues_number[7] - 1 | 7 | 2525 * 2526 * DL queues: 2527 * | Q_ID | VF_ID | 2528 * | 32 | 0 | 2529 * | ... | 0 | 2530 * | conf->vf_ul_queues_number[0] - 1 | 0 | 2531 * | conf->vf_ul_queues_number[0] | 1 | 2532 * | ... | 1 | 2533 * | conf->vf_ul_queues_number[1] - 1 | 1 | 2534 * | ... | ... | 2535 * | conf->vf_ul_queues_number[7] - 1 | 7 | 2536 * 2537 * Example of configuration: 2538 * conf->vf_ul_queues_number[0] = 4; -> 4 UL queues for VF0 2539 * conf->vf_dl_queues_number[0] = 4; -> 4 DL queues for VF0 2540 * conf->vf_ul_queues_number[1] = 2; -> 2 UL queues for VF1 2541 * conf->vf_dl_queues_number[1] = 2; -> 2 DL queues for VF1 2542 * 2543 * UL: 2544 * | Q_ID | VF_ID | 2545 * | 0 | 0 | 2546 * | 1 | 0 | 2547 * | 2 | 0 | 2548 * | 3 | 0 | 2549 * | 4 | 1 | 2550 * | 5 | 1 | 2551 * 2552 * DL: 2553 * | Q_ID | VF_ID | 2554 * | 32 | 0 | 2555 * | 33 | 0 | 2556 * | 34 | 0 | 2557 * | 35 | 0 | 2558 * | 36 | 1 | 2559 * | 37 | 1 | 2560 */ 2561 if (conf->pf_mode_en) { 2562 payload_32 = 0x1; 2563 for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { 2564 address = (q_id << 2) + FPGA_LTE_FEC_QUEUE_MAP; 2565 fpga_reg_write_32(d->mmio_base, address, payload_32); 2566 } 2567 } else { 2568 /* Calculate total number of UL and DL queues to configure */ 2569 total_ul_q_id = total_dl_q_id = 0; 2570 for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { 2571 total_ul_q_id += conf->vf_ul_queues_number[vf_id]; 2572 total_dl_q_id += conf->vf_dl_queues_number[vf_id]; 2573 } 2574 total_q_id = total_dl_q_id + total_ul_q_id; 2575 /* 2576 * Check if total number of queues to configure does not exceed 2577 * FPGA capabilities (64 queues - 32 UL and 32 DL queues) 2578 */ 2579 if ((total_ul_q_id > FPGA_NUM_UL_QUEUES) || 2580 (total_dl_q_id > FPGA_NUM_DL_QUEUES) || 2581 (total_q_id > FPGA_TOTAL_NUM_QUEUES)) { 2582 rte_bbdev_log(ERR, 2583 "FPGA Configuration failed. Too many queues to configure: UL_Q %u, DL_Q %u, FPGA_Q %u", 2584 total_ul_q_id, total_dl_q_id, 2585 FPGA_TOTAL_NUM_QUEUES); 2586 return -EINVAL; 2587 } 2588 total_ul_q_id = 0; 2589 for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { 2590 for (q_id = 0; q_id < conf->vf_ul_queues_number[vf_id]; 2591 ++q_id, ++total_ul_q_id) { 2592 address = (total_ul_q_id << 2) + 2593 FPGA_LTE_FEC_QUEUE_MAP; 2594 payload_32 = ((0x80 + vf_id) << 16) | 0x1; 2595 fpga_reg_write_32(d->mmio_base, address, 2596 payload_32); 2597 } 2598 } 2599 total_dl_q_id = 0; 2600 for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { 2601 for (q_id = 0; q_id < conf->vf_dl_queues_number[vf_id]; 2602 ++q_id, ++total_dl_q_id) { 2603 address = ((total_dl_q_id + FPGA_NUM_UL_QUEUES) 2604 << 2) + FPGA_LTE_FEC_QUEUE_MAP; 2605 payload_32 = ((0x80 + vf_id) << 16) | 0x1; 2606 fpga_reg_write_32(d->mmio_base, address, 2607 payload_32); 2608 } 2609 } 2610 } 2611 2612 /* Setting Load Balance Factor */ 2613 payload_16 = (conf->dl_load_balance << 8) | (conf->ul_load_balance); 2614 address = FPGA_LTE_FEC_LOAD_BALANCE_FACTOR; 2615 fpga_reg_write_16(d->mmio_base, address, payload_16); 2616 2617 /* Setting length of ring descriptor entry */ 2618 payload_16 = FPGA_RING_DESC_ENTRY_LENGTH; 2619 address = FPGA_LTE_FEC_RING_DESC_LEN; 2620 fpga_reg_write_16(d->mmio_base, address, payload_16); 2621 2622 /* Setting FLR timeout value */ 2623 payload_16 = conf->flr_time_out; 2624 address = FPGA_LTE_FEC_FLR_TIME_OUT; 2625 fpga_reg_write_16(d->mmio_base, address, payload_16); 2626 2627 /* Queue PF/VF mapping table is ready */ 2628 payload_8 = 0x1; 2629 address = FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE; 2630 fpga_reg_write_8(d->mmio_base, address, payload_8); 2631 2632 rte_bbdev_log_debug("PF FPGA LTE FEC configuration complete for %s", 2633 dev_name); 2634 2635 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2636 print_static_reg_debug_info(d->mmio_base); 2637 #endif 2638 return 0; 2639 } 2640 2641 /* FPGA LTE FEC PCI PF address map */ 2642 static struct rte_pci_id pci_id_fpga_lte_fec_pf_map[] = { 2643 { 2644 RTE_PCI_DEVICE(FPGA_LTE_FEC_VENDOR_ID, 2645 FPGA_LTE_FEC_PF_DEVICE_ID) 2646 }, 2647 {.device_id = 0}, 2648 }; 2649 2650 static struct rte_pci_driver fpga_lte_fec_pci_pf_driver = { 2651 .probe = fpga_lte_fec_probe, 2652 .remove = fpga_lte_fec_remove, 2653 .id_table = pci_id_fpga_lte_fec_pf_map, 2654 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 2655 }; 2656 2657 /* FPGA LTE FEC PCI VF address map */ 2658 static struct rte_pci_id pci_id_fpga_lte_fec_vf_map[] = { 2659 { 2660 RTE_PCI_DEVICE(FPGA_LTE_FEC_VENDOR_ID, 2661 FPGA_LTE_FEC_VF_DEVICE_ID) 2662 }, 2663 {.device_id = 0}, 2664 }; 2665 2666 static struct rte_pci_driver fpga_lte_fec_pci_vf_driver = { 2667 .probe = fpga_lte_fec_probe, 2668 .remove = fpga_lte_fec_remove, 2669 .id_table = pci_id_fpga_lte_fec_vf_map, 2670 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 2671 }; 2672 2673 2674 RTE_PMD_REGISTER_PCI(FPGA_LTE_FEC_PF_DRIVER_NAME, fpga_lte_fec_pci_pf_driver); 2675 RTE_PMD_REGISTER_PCI_TABLE(FPGA_LTE_FEC_PF_DRIVER_NAME, 2676 pci_id_fpga_lte_fec_pf_map); 2677 RTE_PMD_REGISTER_PCI(FPGA_LTE_FEC_VF_DRIVER_NAME, fpga_lte_fec_pci_vf_driver); 2678 RTE_PMD_REGISTER_PCI_TABLE(FPGA_LTE_FEC_VF_DRIVER_NAME, 2679 pci_id_fpga_lte_fec_vf_map); 2680