1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2022 Intel Corporation 3 */ 4 5 #include <unistd.h> 6 7 #include <rte_common.h> 8 #include <rte_log.h> 9 #include <rte_dev.h> 10 #include <rte_malloc.h> 11 #include <rte_mempool.h> 12 #include <rte_byteorder.h> 13 #include <rte_errno.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_hexdump.h> 16 #include <rte_pci.h> 17 #include <rte_bus_pci.h> 18 #include <rte_cycles.h> 19 20 #include <rte_bbdev.h> 21 #include <rte_bbdev_pmd.h> 22 #include "vrb_pmd.h" 23 24 #ifdef RTE_LIBRTE_BBDEV_DEBUG 25 RTE_LOG_REGISTER_SUFFIX(vrb_logtype, vrb, DEBUG); 26 #else 27 RTE_LOG_REGISTER_SUFFIX(vrb_logtype, vrb, NOTICE); 28 #endif 29 #define RTE_LOGTYPE_VRB vrb_logtype 30 31 /* Calculate the offset of the enqueue register. */ 32 static inline uint32_t 33 vrb1_queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id) 34 { 35 if (pf_device) 36 return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) + VRB1_PfQmgrIngressAq); 37 else 38 return ((qgrp_id << 7) + (aq_id << 3) + VRB1_VfQmgrIngressAq); 39 } 40 41 static inline uint32_t 42 vrb2_queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id) 43 { 44 if (pf_device) 45 return ((vf_id << 14) + (qgrp_id << 9) + (aq_id << 3) + VRB2_PfQmgrIngressAq); 46 else 47 return ((qgrp_id << 9) + (aq_id << 3) + VRB2_VfQmgrIngressAq); 48 } 49 50 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, FFT, MLD, NUM_ACC}; 51 52 /* Return the accelerator enum for a Queue Group Index. */ 53 static inline int 54 accFromQgid(int qg_idx, const struct rte_acc_conf *acc_conf) 55 { 56 int accQg[VRB_MAX_QGRPS]; 57 int NumQGroupsPerFn[NUM_ACC]; 58 int acc, qgIdx, qgIndex = 0; 59 for (qgIdx = 0; qgIdx < VRB_MAX_QGRPS; qgIdx++) 60 accQg[qgIdx] = 0; 61 NumQGroupsPerFn[UL_4G] = acc_conf->q_ul_4g.num_qgroups; 62 NumQGroupsPerFn[UL_5G] = acc_conf->q_ul_5g.num_qgroups; 63 NumQGroupsPerFn[DL_4G] = acc_conf->q_dl_4g.num_qgroups; 64 NumQGroupsPerFn[DL_5G] = acc_conf->q_dl_5g.num_qgroups; 65 NumQGroupsPerFn[FFT] = acc_conf->q_fft.num_qgroups; 66 NumQGroupsPerFn[MLD] = acc_conf->q_mld.num_qgroups; 67 for (acc = UL_4G; acc < NUM_ACC; acc++) 68 for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++) 69 accQg[qgIndex++] = acc; 70 acc = accQg[qg_idx]; 71 return acc; 72 } 73 74 /* Return the queue topology for a Queue Group Index. */ 75 static inline void 76 qtopFromAcc(struct rte_acc_queue_topology **qtop, int acc_enum, struct rte_acc_conf *acc_conf) 77 { 78 struct rte_acc_queue_topology *p_qtop; 79 p_qtop = NULL; 80 81 switch (acc_enum) { 82 case UL_4G: 83 p_qtop = &(acc_conf->q_ul_4g); 84 break; 85 case UL_5G: 86 p_qtop = &(acc_conf->q_ul_5g); 87 break; 88 case DL_4G: 89 p_qtop = &(acc_conf->q_dl_4g); 90 break; 91 case DL_5G: 92 p_qtop = &(acc_conf->q_dl_5g); 93 break; 94 case FFT: 95 p_qtop = &(acc_conf->q_fft); 96 break; 97 case MLD: 98 p_qtop = &(acc_conf->q_mld); 99 break; 100 default: 101 /* NOTREACHED. */ 102 rte_bbdev_log(ERR, "Unexpected error evaluating %s using %d", __func__, acc_enum); 103 break; 104 } 105 *qtop = p_qtop; 106 } 107 108 /* Return the AQ depth for a Queue Group Index. */ 109 static inline int 110 aqDepth(int qg_idx, struct rte_acc_conf *acc_conf) 111 { 112 struct rte_acc_queue_topology *q_top = NULL; 113 114 int acc_enum = accFromQgid(qg_idx, acc_conf); 115 qtopFromAcc(&q_top, acc_enum, acc_conf); 116 117 if (unlikely(q_top == NULL)) 118 return 1; 119 120 return RTE_MAX(1, q_top->aq_depth_log2); 121 } 122 123 /* Return the AQ depth for a Queue Group Index. */ 124 static inline int 125 aqNum(int qg_idx, struct rte_acc_conf *acc_conf) 126 { 127 struct rte_acc_queue_topology *q_top = NULL; 128 129 int acc_enum = accFromQgid(qg_idx, acc_conf); 130 qtopFromAcc(&q_top, acc_enum, acc_conf); 131 132 if (unlikely(q_top == NULL)) 133 return 0; 134 135 return q_top->num_aqs_per_groups; 136 } 137 138 static void 139 initQTop(struct rte_acc_conf *acc_conf) 140 { 141 acc_conf->q_ul_4g.num_aqs_per_groups = 0; 142 acc_conf->q_ul_4g.num_qgroups = 0; 143 acc_conf->q_ul_4g.first_qgroup_index = -1; 144 acc_conf->q_ul_5g.num_aqs_per_groups = 0; 145 acc_conf->q_ul_5g.num_qgroups = 0; 146 acc_conf->q_ul_5g.first_qgroup_index = -1; 147 acc_conf->q_dl_4g.num_aqs_per_groups = 0; 148 acc_conf->q_dl_4g.num_qgroups = 0; 149 acc_conf->q_dl_4g.first_qgroup_index = -1; 150 acc_conf->q_dl_5g.num_aqs_per_groups = 0; 151 acc_conf->q_dl_5g.num_qgroups = 0; 152 acc_conf->q_dl_5g.first_qgroup_index = -1; 153 acc_conf->q_fft.num_aqs_per_groups = 0; 154 acc_conf->q_fft.num_qgroups = 0; 155 acc_conf->q_fft.first_qgroup_index = -1; 156 acc_conf->q_mld.num_aqs_per_groups = 0; 157 acc_conf->q_mld.num_qgroups = 0; 158 acc_conf->q_mld.first_qgroup_index = -1; 159 } 160 161 static inline void 162 updateQtop(uint8_t acc, uint8_t qg, struct rte_acc_conf *acc_conf, struct acc_device *d) { 163 uint32_t reg; 164 struct rte_acc_queue_topology *q_top = NULL; 165 uint16_t aq; 166 167 qtopFromAcc(&q_top, acc, acc_conf); 168 if (unlikely(q_top == NULL)) 169 return; 170 q_top->num_qgroups++; 171 if (q_top->first_qgroup_index == -1) { 172 q_top->first_qgroup_index = qg; 173 /* Can be optimized to assume all are enabled by default. */ 174 reg = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, d->num_aqs - 1)); 175 if (reg & ACC_QUEUE_ENABLE) { 176 q_top->num_aqs_per_groups = d->num_aqs; 177 return; 178 } 179 q_top->num_aqs_per_groups = 0; 180 for (aq = 0; aq < d->num_aqs; aq++) { 181 reg = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, aq)); 182 if (reg & ACC_QUEUE_ENABLE) 183 q_top->num_aqs_per_groups++; 184 } 185 } 186 } 187 188 /* Check device Qmgr is enabled for protection */ 189 static inline bool 190 vrb_check_device_enable(struct rte_bbdev *dev) 191 { 192 uint32_t reg_aq, qg; 193 struct acc_device *d = dev->data->dev_private; 194 195 for (qg = 0; qg < d->num_qgroups; qg++) { 196 reg_aq = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, 0)); 197 if (reg_aq & ACC_QUEUE_ENABLE) 198 return true; 199 } 200 return false; 201 } 202 203 static inline void 204 vrb_vf2pf(struct acc_device *d, unsigned int payload) 205 { 206 acc_reg_write(d, d->reg_addr->vf2pf_doorbell, payload); 207 } 208 209 /* Request device FFT windowing information. */ 210 static inline void 211 vrb_device_fft_win(struct rte_bbdev *dev) 212 { 213 struct acc_device *d = dev->data->dev_private; 214 uint32_t reg, time_out = 0, win; 215 216 if (d->pf_device) 217 return; 218 219 /* Check from the device the first time. */ 220 if (d->fft_window_width[0] == 0) { 221 for (win = 0; win < ACC_MAX_FFT_WIN; win++) { 222 vrb_vf2pf(d, ACC_VF2PF_FFT_WIN_REQUEST | win); 223 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 224 while ((time_out < ACC_STATUS_TO) && (reg == RTE_BBDEV_DEV_NOSTATUS)) { 225 usleep(ACC_STATUS_WAIT); /*< Wait or VF->PF->VF Comms. */ 226 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 227 time_out++; 228 } 229 d->fft_window_width[win] = reg; 230 } 231 } 232 } 233 234 /* Fetch configuration enabled for the PF/VF using MMIO Read (slow). */ 235 static inline void 236 fetch_acc_config(struct rte_bbdev *dev) 237 { 238 struct acc_device *d = dev->data->dev_private; 239 struct rte_acc_conf *acc_conf = &d->acc_conf; 240 uint8_t acc, qg; 241 uint32_t reg_aq, reg_len0, reg_len1, reg_len2, reg_len3, reg0, reg1, reg2, reg3; 242 uint32_t reg_mode, idx; 243 struct rte_acc_queue_topology *q_top = NULL; 244 int qman_func_id[VRB_NUM_ACCS] = {ACC_ACCMAP_0, ACC_ACCMAP_1, 245 ACC_ACCMAP_2, ACC_ACCMAP_3, ACC_ACCMAP_4, ACC_ACCMAP_5}; 246 247 /* No need to retrieve the configuration is already done. */ 248 if (d->configured) 249 return; 250 251 if (!vrb_check_device_enable(dev)) { 252 rte_bbdev_log(NOTICE, "%s has no queue enabled and can't be used.", 253 dev->data->name); 254 return; 255 } 256 257 vrb_device_fft_win(dev); 258 259 d->ddr_size = 0; 260 261 /* Single VF Bundle by VF. */ 262 acc_conf->num_vf_bundles = 1; 263 initQTop(acc_conf); 264 265 if (d->device_variant == VRB1_VARIANT) { 266 reg0 = acc_reg_read(d, d->reg_addr->qman_group_func); 267 reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4); 268 for (qg = 0; qg < d->num_qgroups; qg++) { 269 reg_aq = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, 0)); 270 if (reg_aq & ACC_QUEUE_ENABLE) { 271 if (qg < ACC_NUM_QGRPS_PER_WORD) 272 idx = (reg0 >> (qg * 4)) & 0x7; 273 else 274 idx = (reg1 >> ((qg - ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 275 if (idx < VRB1_NUM_ACCS) { 276 acc = qman_func_id[idx]; 277 updateQtop(acc, qg, acc_conf, d); 278 } 279 } 280 } 281 282 /* Check the depth of the AQs. */ 283 reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset); 284 reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset); 285 for (acc = 0; acc < VRB1_NUM_ACCS; acc++) { 286 qtopFromAcc(&q_top, acc, acc_conf); 287 if (q_top->first_qgroup_index < ACC_NUM_QGRPS_PER_WORD) 288 q_top->aq_depth_log2 = 289 (reg_len0 >> (q_top->first_qgroup_index * 4)) & 0xF; 290 else 291 q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index - 292 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 293 } 294 } else if (d->device_variant == VRB2_VARIANT) { 295 reg0 = acc_reg_read(d, d->reg_addr->qman_group_func); 296 reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4); 297 reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8); 298 reg3 = acc_reg_read(d, d->reg_addr->qman_group_func + 12); 299 /* printf("Debug Function %08x %08x %08x %08x\n", reg0, reg1, reg2, reg3);*/ 300 for (qg = 0; qg < VRB2_NUM_QGRPS; qg++) { 301 reg_aq = acc_reg_read(d, vrb2_queue_offset(d->pf_device, 0, qg, 0)); 302 if (reg_aq & ACC_QUEUE_ENABLE) { 303 /* printf("Qg enabled %d %x\n", qg, reg_aq);*/ 304 if (qg / ACC_NUM_QGRPS_PER_WORD == 0) 305 idx = (reg0 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 306 else if (qg / ACC_NUM_QGRPS_PER_WORD == 1) 307 idx = (reg1 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 308 else if (qg / ACC_NUM_QGRPS_PER_WORD == 2) 309 idx = (reg2 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 310 else 311 idx = (reg3 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 312 if (idx < VRB2_NUM_ACCS) { 313 acc = qman_func_id[idx]; 314 updateQtop(acc, qg, acc_conf, d); 315 } 316 } 317 } 318 319 /* Check the depth of the AQs. */ 320 reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset); 321 reg_len1 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 4); 322 reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 8); 323 reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 12); 324 325 for (acc = 0; acc < VRB2_NUM_ACCS; acc++) { 326 qtopFromAcc(&q_top, acc, acc_conf); 327 if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 0) 328 q_top->aq_depth_log2 = (reg_len0 >> ((q_top->first_qgroup_index % 329 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 330 else if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 1) 331 q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index % 332 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 333 else if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 2) 334 q_top->aq_depth_log2 = (reg_len2 >> ((q_top->first_qgroup_index % 335 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 336 else 337 q_top->aq_depth_log2 = (reg_len3 >> ((q_top->first_qgroup_index % 338 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 339 } 340 } 341 342 /* Read PF mode. */ 343 if (d->pf_device) { 344 reg_mode = acc_reg_read(d, d->reg_addr->pf_mode); 345 acc_conf->pf_mode_en = (reg_mode == ACC_PF_VAL) ? 1 : 0; 346 } else { 347 reg_mode = acc_reg_read(d, d->reg_addr->hi_mode); 348 acc_conf->pf_mode_en = reg_mode & 1; 349 } 350 351 rte_bbdev_log_debug( 352 "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u %u %u AQ %u %u %u %u %u %u Len %u %u %u %u %u %u", 353 (d->pf_device) ? "PF" : "VF", 354 (acc_conf->input_pos_llr_1_bit) ? "POS" : "NEG", 355 (acc_conf->output_pos_llr_1_bit) ? "POS" : "NEG", 356 acc_conf->q_ul_4g.num_qgroups, 357 acc_conf->q_dl_4g.num_qgroups, 358 acc_conf->q_ul_5g.num_qgroups, 359 acc_conf->q_dl_5g.num_qgroups, 360 acc_conf->q_fft.num_qgroups, 361 acc_conf->q_mld.num_qgroups, 362 acc_conf->q_ul_4g.num_aqs_per_groups, 363 acc_conf->q_dl_4g.num_aqs_per_groups, 364 acc_conf->q_ul_5g.num_aqs_per_groups, 365 acc_conf->q_dl_5g.num_aqs_per_groups, 366 acc_conf->q_fft.num_aqs_per_groups, 367 acc_conf->q_mld.num_aqs_per_groups, 368 acc_conf->q_ul_4g.aq_depth_log2, 369 acc_conf->q_dl_4g.aq_depth_log2, 370 acc_conf->q_ul_5g.aq_depth_log2, 371 acc_conf->q_dl_5g.aq_depth_log2, 372 acc_conf->q_fft.aq_depth_log2, 373 acc_conf->q_mld.aq_depth_log2); 374 } 375 376 /* Request device status information. */ 377 static inline uint32_t 378 vrb_device_status(struct rte_bbdev *dev) 379 { 380 struct acc_device *d = dev->data->dev_private; 381 uint32_t reg, time_out = 0; 382 383 if (d->pf_device) 384 return RTE_BBDEV_DEV_NOT_SUPPORTED; 385 386 vrb_vf2pf(d, ACC_VF2PF_STATUS_REQUEST); 387 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 388 while ((time_out < ACC_STATUS_TO) && (reg == RTE_BBDEV_DEV_NOSTATUS)) { 389 usleep(ACC_STATUS_WAIT); /*< Wait or VF->PF->VF Comms */ 390 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 391 time_out++; 392 } 393 394 return reg; 395 } 396 397 /* Checks PF Info Ring to find the interrupt cause and handles it accordingly. */ 398 static inline void 399 vrb_check_ir(struct acc_device *acc_dev) 400 { 401 volatile union acc_info_ring_data *ring_data; 402 uint16_t info_ring_head = acc_dev->info_ring_head, int_nb; 403 if (unlikely(acc_dev->info_ring == NULL)) 404 return; 405 406 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 407 408 while (ring_data->valid) { 409 int_nb = int_from_ring(*ring_data, acc_dev->device_variant); 410 if ((int_nb < ACC_PF_INT_DMA_DL_DESC_IRQ) || ( 411 int_nb > ACC_PF_INT_DMA_MLD_DESC_IRQ)) { 412 rte_bbdev_log(WARNING, "InfoRing: ITR:%d Info:0x%x", 413 int_nb, ring_data->detailed_info); 414 /* Initialize Info Ring entry and move forward. */ 415 ring_data->valid = 0; 416 } 417 info_ring_head++; 418 ring_data = acc_dev->info_ring + (info_ring_head & ACC_INFO_RING_MASK); 419 } 420 } 421 422 /* Interrupt handler triggered by dev for handling specific interrupt. */ 423 static void 424 vrb_dev_interrupt_handler(void *cb_arg) 425 { 426 struct rte_bbdev *dev = cb_arg; 427 struct acc_device *acc_dev = dev->data->dev_private; 428 volatile union acc_info_ring_data *ring_data; 429 struct acc_deq_intr_details deq_intr_det; 430 uint16_t vf_id, aq_id, qg_id, int_nb; 431 432 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 433 434 while (ring_data->valid) { 435 vf_id = vf_from_ring(*ring_data, acc_dev->device_variant); 436 aq_id = aq_from_ring(*ring_data, acc_dev->device_variant); 437 qg_id = qg_from_ring(*ring_data, acc_dev->device_variant); 438 int_nb = int_from_ring(*ring_data, acc_dev->device_variant); 439 if (acc_dev->pf_device) { 440 rte_bbdev_log_debug( 441 "PF Interrupt received, Info Ring data: 0x%x -> %d", 442 ring_data->val, int_nb); 443 444 switch (int_nb) { 445 case ACC_PF_INT_DMA_DL_DESC_IRQ: 446 case ACC_PF_INT_DMA_UL_DESC_IRQ: 447 case ACC_PF_INT_DMA_FFT_DESC_IRQ: 448 case ACC_PF_INT_DMA_UL5G_DESC_IRQ: 449 case ACC_PF_INT_DMA_DL5G_DESC_IRQ: 450 case ACC_PF_INT_DMA_MLD_DESC_IRQ: 451 deq_intr_det.queue_id = get_queue_id_from_ring_info( 452 dev->data, *ring_data); 453 if (deq_intr_det.queue_id == UINT16_MAX) { 454 rte_bbdev_log(ERR, 455 "Couldn't find queue: aq_id: %u, qg_id: %u, vf_id: %u", 456 aq_id, qg_id, vf_id); 457 return; 458 } 459 rte_bbdev_pmd_callback_process(dev, 460 RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det); 461 break; 462 default: 463 rte_bbdev_pmd_callback_process(dev, RTE_BBDEV_EVENT_ERROR, NULL); 464 break; 465 } 466 } else { 467 rte_bbdev_log_debug( 468 "VRB VF Interrupt received, Info Ring data: 0x%x", 469 ring_data->val); 470 switch (int_nb) { 471 case ACC_VF_INT_DMA_DL_DESC_IRQ: 472 case ACC_VF_INT_DMA_UL_DESC_IRQ: 473 case ACC_VF_INT_DMA_FFT_DESC_IRQ: 474 case ACC_VF_INT_DMA_UL5G_DESC_IRQ: 475 case ACC_VF_INT_DMA_DL5G_DESC_IRQ: 476 case ACC_VF_INT_DMA_MLD_DESC_IRQ: 477 /* VFs are not aware of their vf_id - it's set to 0. */ 478 set_vf_in_ring(ring_data, acc_dev->device_variant, 0); 479 deq_intr_det.queue_id = get_queue_id_from_ring_info( 480 dev->data, *ring_data); 481 if (deq_intr_det.queue_id == UINT16_MAX) { 482 rte_bbdev_log(ERR, 483 "Couldn't find queue: aq_id: %u, qg_id: %u", 484 aq_id, qg_id); 485 return; 486 } 487 rte_bbdev_pmd_callback_process(dev, 488 RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det); 489 break; 490 default: 491 rte_bbdev_pmd_callback_process(dev, RTE_BBDEV_EVENT_ERROR, NULL); 492 break; 493 } 494 } 495 496 /* Initialize Info Ring entry and move forward. */ 497 ring_data->val = 0; 498 ++acc_dev->info_ring_head; 499 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 500 } 501 } 502 503 /* Allocate and setup inforing. */ 504 static int 505 allocate_info_ring(struct rte_bbdev *dev) 506 { 507 struct acc_device *d = dev->data->dev_private; 508 rte_iova_t info_ring_iova; 509 uint32_t phys_low, phys_high; 510 511 if (d->info_ring != NULL) 512 return 0; /* Already configured. */ 513 514 /* Allocate InfoRing */ 515 d->info_ring = rte_zmalloc_socket("Info Ring", ACC_INFO_RING_NUM_ENTRIES * 516 sizeof(*d->info_ring), RTE_CACHE_LINE_SIZE, dev->data->socket_id); 517 if (d->info_ring == NULL) { 518 rte_bbdev_log(ERR, 519 "Failed to allocate Info Ring for %s:%u", 520 dev->device->driver->name, 521 dev->data->dev_id); 522 return -ENOMEM; 523 } 524 info_ring_iova = rte_malloc_virt2iova(d->info_ring); 525 526 /* Setup Info Ring. */ 527 phys_high = (uint32_t)(info_ring_iova >> 32); 528 phys_low = (uint32_t)(info_ring_iova); 529 acc_reg_write(d, d->reg_addr->info_ring_hi, phys_high); 530 acc_reg_write(d, d->reg_addr->info_ring_lo, phys_low); 531 if (d->device_variant == VRB1_VARIANT) 532 acc_reg_write(d, d->reg_addr->info_ring_en, VRB1_REG_IRQ_EN_ALL); 533 else 534 acc_reg_write(d, d->reg_addr->info_ring_en, VRB2_REG_IRQ_EN_ALL); 535 d->info_ring_head = (acc_reg_read(d, d->reg_addr->info_ring_ptr) & 536 0xFFF) / sizeof(union acc_info_ring_data); 537 return 0; 538 } 539 540 541 /* Allocate 64MB memory used for all software rings. */ 542 static int 543 vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id) 544 { 545 uint32_t phys_low, phys_high, value; 546 struct acc_device *d = dev->data->dev_private; 547 uint16_t queues_per_op, i; 548 int ret, max_queues = 0; 549 550 if (d->pf_device && !d->acc_conf.pf_mode_en) { 551 rte_bbdev_log(NOTICE, 552 "%s has PF mode disabled. This PF can't be used.", 553 dev->data->name); 554 return -ENODEV; 555 } 556 if (!d->pf_device && d->acc_conf.pf_mode_en) { 557 rte_bbdev_log(NOTICE, 558 "%s has PF mode enabled. This VF can't be used.", 559 dev->data->name); 560 return -ENODEV; 561 } 562 563 if (!vrb_check_device_enable(dev)) { 564 rte_bbdev_log(NOTICE, "%s has no queue enabled and can't be used.", 565 dev->data->name); 566 return -ENODEV; 567 } 568 569 if (d->device_variant == VRB1_VARIANT) { 570 alloc_sw_rings_min_mem(dev, d, num_queues, socket_id); 571 572 /* If minimal memory space approach failed, then allocate 573 * the 2 * 64MB block for the sw rings. 574 */ 575 if (d->sw_rings == NULL) 576 alloc_2x64mb_sw_rings_mem(dev, d, socket_id); 577 578 if (d->sw_rings == NULL) { 579 rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory"); 580 return -ENOMEM; 581 } 582 } else if (d->device_variant == VRB2_VARIANT) { 583 queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP, num_queues); 584 for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) { 585 alloc_sw_rings_min_mem(dev, d, queues_per_op, socket_id); 586 if (d->sw_rings == NULL) { 587 rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory %d", i); 588 return -ENOMEM; 589 } 590 /* Moves the pointer to the relevant array. */ 591 d->sw_rings_array[i] = d->sw_rings; 592 d->sw_rings_iova_array[i] = d->sw_rings_iova; 593 d->sw_rings = NULL; 594 d->sw_rings_base = NULL; 595 d->sw_rings_iova = 0; 596 d->queue_index[i] = 0; 597 } 598 } 599 600 /* Read the populated cfg from device registers. */ 601 fetch_acc_config(dev); 602 603 /* Start Pmon */ 604 for (value = 0; value <= 2; value++) { 605 acc_reg_write(d, d->reg_addr->pmon_ctrl_a, value); 606 acc_reg_write(d, d->reg_addr->pmon_ctrl_b, value); 607 acc_reg_write(d, d->reg_addr->pmon_ctrl_c, value); 608 } 609 610 /* Release AXI from PF. */ 611 if (d->pf_device) 612 acc_reg_write(d, VRB1_PfDmaAxiControl, 1); 613 614 if (d->device_variant == VRB1_VARIANT) { 615 /* Configure device with the base address for DMA descriptor rings. 616 * Same descriptor rings used for UL and DL DMA Engines. 617 * Note : Assuming only VF0 bundle is used for PF mode. 618 */ 619 phys_high = (uint32_t)(d->sw_rings_iova >> 32); 620 phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1)); 621 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high); 622 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low); 623 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high); 624 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low); 625 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high); 626 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low); 627 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high); 628 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low); 629 acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high); 630 acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low); 631 } else if (d->device_variant == VRB2_VARIANT) { 632 /* Configure device with the base address for DMA descriptor rings. 633 * Different ring buffer used for each operation type. 634 * Note : Assuming only VF0 bundle is used for PF mode. 635 */ 636 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, 637 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32)); 638 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, 639 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] 640 & ~(ACC_SIZE_64MBYTE - 1))); 641 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, 642 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32)); 643 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, 644 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] 645 & ~(ACC_SIZE_64MBYTE - 1))); 646 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, 647 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32)); 648 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, 649 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] 650 & ~(ACC_SIZE_64MBYTE - 1))); 651 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, 652 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32)); 653 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, 654 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] 655 & ~(ACC_SIZE_64MBYTE - 1))); 656 acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, 657 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT] >> 32)); 658 acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, 659 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT] 660 & ~(ACC_SIZE_64MBYTE - 1))); 661 acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, 662 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] >> 32)); 663 acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, 664 (uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] 665 & ~(ACC_SIZE_64MBYTE - 1))); 666 } 667 668 /* 669 * Configure Ring Size to the max queue ring size 670 * (used for wrapping purpose). 671 */ 672 value = log2_basic(d->sw_ring_size / ACC_RING_SIZE_GRANULARITY); 673 acc_reg_write(d, d->reg_addr->ring_size, value); 674 675 if (d->device_variant == VRB1_VARIANT) 676 max_queues = VRB1_NUM_QGRPS * VRB1_NUM_AQS; 677 else if (d->device_variant == VRB2_VARIANT) 678 max_queues = VRB2_NUM_QGRPS * VRB2_NUM_AQS; 679 680 /* Configure tail pointer for use when SDONE enabled. */ 681 if (d->tail_ptrs == NULL) 682 d->tail_ptrs = rte_zmalloc_socket(dev->device->driver->name, 683 max_queues * sizeof(uint32_t), 684 RTE_CACHE_LINE_SIZE, socket_id); 685 if (d->tail_ptrs == NULL) { 686 rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u", 687 dev->device->driver->name, 688 dev->data->dev_id); 689 ret = -ENOMEM; 690 goto free_sw_rings; 691 } 692 d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs); 693 694 phys_high = (uint32_t)(d->tail_ptr_iova >> 32); 695 phys_low = (uint32_t)(d->tail_ptr_iova); 696 { 697 acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high); 698 acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low); 699 acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high); 700 acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low); 701 acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high); 702 acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low); 703 acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high); 704 acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low); 705 acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high); 706 acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low); 707 if (d->device_variant == VRB2_VARIANT) { 708 acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high); 709 acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low); 710 } 711 } 712 713 ret = allocate_info_ring(dev); 714 if (ret < 0) { 715 rte_bbdev_log(ERR, "Failed to allocate info_ring for %s:%u", 716 dev->device->driver->name, 717 dev->data->dev_id); 718 /* Continue */ 719 } 720 721 if (d->harq_layout == NULL) 722 d->harq_layout = rte_zmalloc_socket("HARQ Layout", 723 ACC_HARQ_LAYOUT * sizeof(*d->harq_layout), 724 RTE_CACHE_LINE_SIZE, dev->data->socket_id); 725 if (d->harq_layout == NULL) { 726 rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u", 727 dev->device->driver->name, 728 dev->data->dev_id); 729 ret = -ENOMEM; 730 goto free_tail_ptrs; 731 } 732 733 /* Mark as configured properly */ 734 d->configured = true; 735 vrb_vf2pf(d, ACC_VF2PF_USING_VF); 736 737 rte_bbdev_log_debug( 738 "Device (%s) configured sw_rings = %p, sw_rings_iova = %#" 739 PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova); 740 return 0; 741 742 free_tail_ptrs: 743 rte_free(d->tail_ptrs); 744 d->tail_ptrs = NULL; 745 free_sw_rings: 746 if (d->device_variant == VRB1_VARIANT) { 747 rte_free(d->sw_rings_base); 748 d->sw_rings_base = NULL; 749 d->sw_rings = NULL; 750 } else if (d->device_variant == VRB2_VARIANT) { 751 for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) { 752 rte_free(d->sw_rings_array[i]); 753 d->sw_rings_array[i] = 0; 754 } 755 } 756 757 return ret; 758 } 759 760 static int 761 vrb_intr_enable(struct rte_bbdev *dev) 762 { 763 int ret; 764 struct acc_device *d = dev->data->dev_private; 765 766 if (d->device_variant == VRB1_VARIANT) { 767 /* On VRB1: cannot enable MSI/IR to avoid potential back-pressure corner case. */ 768 rte_bbdev_log(ERR, "VRB1 (%s) doesn't support any MSI/MSI-X interrupt", 769 dev->data->name); 770 return -ENOTSUP; 771 } 772 773 /* 774 * MSI/MSI-X are supported. 775 * Option controlled by vfio-intr through EAL parameter. 776 */ 777 if (rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_VFIO_MSI) { 778 779 ret = allocate_info_ring(dev); 780 if (ret < 0) { 781 rte_bbdev_log(ERR, 782 "Couldn't allocate info ring for device: %s", 783 dev->data->name); 784 return ret; 785 } 786 ret = rte_intr_enable(dev->intr_handle); 787 if (ret < 0) { 788 rte_bbdev_log(ERR, 789 "Couldn't enable interrupts for device: %s", 790 dev->data->name); 791 rte_free(d->info_ring); 792 d->info_ring = NULL; 793 return ret; 794 } 795 ret = rte_intr_callback_register(dev->intr_handle, 796 vrb_dev_interrupt_handler, dev); 797 if (ret < 0) { 798 rte_bbdev_log(ERR, 799 "Couldn't register interrupt callback for device: %s", 800 dev->data->name); 801 rte_free(d->info_ring); 802 d->info_ring = NULL; 803 return ret; 804 } 805 806 return 0; 807 } else if (rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) { 808 int i, max_queues; 809 struct acc_device *acc_dev = dev->data->dev_private; 810 811 ret = allocate_info_ring(dev); 812 if (ret < 0) { 813 rte_bbdev_log(ERR, 814 "Couldn't allocate info ring for device: %s", 815 dev->data->name); 816 return ret; 817 } 818 819 if (d->device_variant == VRB1_VARIANT) { 820 if (acc_dev->pf_device) 821 max_queues = VRB1_MAX_PF_MSIX; 822 else 823 max_queues = VRB1_MAX_VF_MSIX; 824 } else { 825 if (acc_dev->pf_device) 826 max_queues = VRB2_MAX_PF_MSIX; 827 else 828 max_queues = VRB2_MAX_VF_MSIX; 829 } 830 831 if (rte_intr_efd_enable(dev->intr_handle, max_queues)) { 832 rte_bbdev_log(ERR, "Failed to create fds for %u queues", 833 dev->data->num_queues); 834 return -1; 835 } 836 837 for (i = 0; i < max_queues; ++i) { 838 if (rte_intr_efds_index_set(dev->intr_handle, i, 839 rte_intr_fd_get(dev->intr_handle))) 840 return -rte_errno; 841 } 842 843 if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec", 844 dev->data->num_queues)) { 845 rte_bbdev_log(ERR, "Failed to allocate %u vectors", 846 dev->data->num_queues); 847 return -ENOMEM; 848 } 849 850 ret = rte_intr_enable(dev->intr_handle); 851 852 if (ret < 0) { 853 rte_bbdev_log(ERR, 854 "Couldn't enable interrupts for device: %s", 855 dev->data->name); 856 rte_free(d->info_ring); 857 d->info_ring = NULL; 858 return ret; 859 } 860 ret = rte_intr_callback_register(dev->intr_handle, 861 vrb_dev_interrupt_handler, dev); 862 if (ret < 0) { 863 rte_bbdev_log(ERR, 864 "Couldn't register interrupt callback for device: %s", 865 dev->data->name); 866 rte_free(d->info_ring); 867 d->info_ring = NULL; 868 return ret; 869 } 870 871 return 0; 872 } 873 874 rte_bbdev_log(ERR, "Device (%s) supports only VFIO MSI/MSI-X interrupts", 875 dev->data->name); 876 return -ENOTSUP; 877 } 878 879 /* Free memory used for software rings. */ 880 static int 881 vrb_dev_close(struct rte_bbdev *dev) 882 { 883 int i; 884 struct acc_device *d = dev->data->dev_private; 885 886 vrb_check_ir(d); 887 if (d->device_variant == VRB1_VARIANT) { 888 rte_free(d->tail_ptrs); 889 rte_free(d->info_ring); 890 rte_free(d->sw_rings_base); 891 rte_free(d->harq_layout); 892 d->tail_ptrs = NULL; 893 d->info_ring = NULL; 894 d->sw_rings_base = NULL; 895 d->sw_rings = NULL; 896 d->harq_layout = NULL; 897 } else if (d->device_variant == VRB2_VARIANT) { 898 rte_free(d->tail_ptrs); 899 rte_free(d->info_ring); 900 rte_free(d->harq_layout); 901 d->tail_ptrs = NULL; 902 d->info_ring = NULL; 903 d->harq_layout = NULL; 904 for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) { 905 rte_free(d->sw_rings_array[i]); 906 d->sw_rings_array[i] = NULL; 907 } 908 } 909 /* Ensure all in flight HW transactions are completed. */ 910 usleep(ACC_LONG_WAIT); 911 return 0; 912 } 913 914 /** 915 * Report a queue index which is free. 916 * Return 0 to 16k for a valid queue_idx or -1 when no queue is available. 917 * Note : Only supporting VF0 Bundle for PF mode. 918 */ 919 static int 920 vrb_find_free_queue_idx(struct rte_bbdev *dev, 921 const struct rte_bbdev_queue_conf *conf) 922 { 923 struct acc_device *d = dev->data->dev_private; 924 int op_2_acc[7] = {0, UL_4G, DL_4G, UL_5G, DL_5G, FFT, MLD}; 925 int acc = op_2_acc[conf->op_type]; 926 struct rte_acc_queue_topology *qtop = NULL; 927 uint16_t group_idx; 928 uint64_t aq_idx; 929 930 qtopFromAcc(&qtop, acc, &(d->acc_conf)); 931 if (qtop == NULL) 932 return -1; 933 /* Identify matching QGroup Index which are sorted in priority order. */ 934 group_idx = qtop->first_qgroup_index + conf->priority; 935 if (group_idx >= d->num_qgroups || 936 conf->priority >= qtop->num_qgroups) { 937 rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u", 938 dev->data->name, conf->priority); 939 return -1; 940 } 941 /* Find a free AQ_idx. */ 942 for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) { 943 if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) { 944 /* Mark the Queue as assigned. */ 945 d->q_assigned_bit_map[group_idx] |= (1ULL << aq_idx); 946 /* Report the AQ Index. */ 947 return queue_index(group_idx, aq_idx, d->device_variant); 948 } 949 } 950 rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u", 951 dev->data->name, conf->priority); 952 return -1; 953 } 954 955 /* Setup device queue. */ 956 static int 957 vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, 958 const struct rte_bbdev_queue_conf *conf) 959 { 960 struct acc_device *d = dev->data->dev_private; 961 struct acc_queue *q; 962 int32_t q_idx; 963 int ret; 964 union acc_dma_desc *desc = NULL; 965 unsigned int desc_idx, b_idx; 966 int fcw_len; 967 968 if (d == NULL) { 969 rte_bbdev_log(ERR, "Undefined device"); 970 return -ENODEV; 971 } 972 /* Allocate the queue data structure. */ 973 q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q), 974 RTE_CACHE_LINE_SIZE, conf->socket); 975 if (q == NULL) { 976 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 977 return -ENOMEM; 978 } 979 980 q->d = d; 981 if (d->device_variant == VRB1_VARIANT) { 982 q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id)); 983 q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id); 984 } else if (d->device_variant == VRB2_VARIANT) { 985 q->ring_addr = RTE_PTR_ADD(d->sw_rings_array[conf->op_type], 986 (d->sw_ring_size * d->queue_index[conf->op_type])); 987 q->ring_addr_iova = d->sw_rings_iova_array[conf->op_type] + 988 (d->sw_ring_size * d->queue_index[conf->op_type]); 989 d->queue_index[conf->op_type]++; 990 } 991 992 /* Prepare the Ring with default descriptor format. */ 993 switch (conf->op_type) { 994 case RTE_BBDEV_OP_LDPC_ENC: 995 fcw_len = ACC_FCW_LE_BLEN; 996 break; 997 case RTE_BBDEV_OP_LDPC_DEC: 998 fcw_len = ACC_FCW_LD_BLEN; 999 break; 1000 case RTE_BBDEV_OP_TURBO_DEC: 1001 fcw_len = ACC_FCW_TD_BLEN; 1002 break; 1003 case RTE_BBDEV_OP_TURBO_ENC: 1004 fcw_len = ACC_FCW_TE_BLEN; 1005 break; 1006 case RTE_BBDEV_OP_FFT: 1007 fcw_len = ACC_FCW_FFT_BLEN; 1008 if (q->d->device_variant == VRB2_VARIANT) 1009 fcw_len = ACC_FCW_FFT_BLEN_VRB2; 1010 break; 1011 case RTE_BBDEV_OP_MLDTS: 1012 fcw_len = ACC_FCW_MLDTS_BLEN; 1013 break; 1014 default: 1015 /* NOT REACHED. */ 1016 fcw_len = 0; 1017 rte_bbdev_log(ERR, "Unexpected error in %s using type %d", __func__, conf->op_type); 1018 break; 1019 } 1020 1021 for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) { 1022 desc = q->ring_addr + desc_idx; 1023 desc->req.word0 = ACC_DMA_DESC_TYPE; 1024 desc->req.word1 = 0; /**< Timestamp. */ 1025 desc->req.word2 = 0; 1026 desc->req.word3 = 0; 1027 uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 1028 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 1029 desc->req.data_ptrs[0].blen = fcw_len; 1030 desc->req.data_ptrs[0].blkid = ACC_DMA_BLKID_FCW; 1031 desc->req.data_ptrs[0].last = 0; 1032 desc->req.data_ptrs[0].dma_ext = 0; 1033 for (b_idx = 1; b_idx < ACC_DMA_MAX_NUM_POINTERS - 1; b_idx++) { 1034 desc->req.data_ptrs[b_idx].blkid = ACC_DMA_BLKID_IN; 1035 desc->req.data_ptrs[b_idx].last = 1; 1036 desc->req.data_ptrs[b_idx].dma_ext = 0; 1037 b_idx++; 1038 desc->req.data_ptrs[b_idx].blkid = 1039 ACC_DMA_BLKID_OUT_ENC; 1040 desc->req.data_ptrs[b_idx].last = 1; 1041 desc->req.data_ptrs[b_idx].dma_ext = 0; 1042 } 1043 /* Preset some fields of LDPC FCW. */ 1044 desc->req.fcw_ld.FCWversion = ACC_FCW_VER; 1045 desc->req.fcw_ld.gain_i = 1; 1046 desc->req.fcw_ld.gain_h = 1; 1047 } 1048 1049 q->lb_in = rte_zmalloc_socket(dev->device->driver->name, 1050 RTE_CACHE_LINE_SIZE, 1051 RTE_CACHE_LINE_SIZE, conf->socket); 1052 if (q->lb_in == NULL) { 1053 rte_bbdev_log(ERR, "Failed to allocate lb_in memory"); 1054 ret = -ENOMEM; 1055 goto free_q; 1056 } 1057 q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in); 1058 q->lb_out = rte_zmalloc_socket(dev->device->driver->name, 1059 RTE_CACHE_LINE_SIZE, 1060 RTE_CACHE_LINE_SIZE, conf->socket); 1061 if (q->lb_out == NULL) { 1062 rte_bbdev_log(ERR, "Failed to allocate lb_out memory"); 1063 ret = -ENOMEM; 1064 goto free_lb_in; 1065 } 1066 q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out); 1067 q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name, 1068 d->sw_ring_max_depth * sizeof(*q->companion_ring_addr), 1069 RTE_CACHE_LINE_SIZE, conf->socket); 1070 if (q->companion_ring_addr == NULL) { 1071 rte_bbdev_log(ERR, "Failed to allocate companion_ring memory"); 1072 ret = -ENOMEM; 1073 goto free_lb_out; 1074 } 1075 1076 /* 1077 * Software queue ring wraps synchronously with the HW when it reaches 1078 * the boundary of the maximum allocated queue size, no matter what the 1079 * sw queue size is. This wrapping is guarded by setting the wrap_mask 1080 * to represent the maximum queue size as allocated at the time when 1081 * the device has been setup (in configure()). 1082 * 1083 * The queue depth is set to the queue size value (conf->queue_size). 1084 * This limits the occupancy of the queue at any point of time, so that 1085 * the queue does not get swamped with enqueue requests. 1086 */ 1087 q->sw_ring_depth = conf->queue_size; 1088 q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1; 1089 1090 q->op_type = conf->op_type; 1091 1092 q_idx = vrb_find_free_queue_idx(dev, conf); 1093 if (q_idx == -1) { 1094 ret = -EINVAL; 1095 goto free_companion_ring_addr; 1096 } 1097 1098 q->fcw_ring = rte_zmalloc_socket(dev->device->driver->name, 1099 ACC_MAX_FCW_SIZE * d->sw_ring_max_depth, 1100 RTE_CACHE_LINE_SIZE, conf->socket); 1101 if (q->fcw_ring == NULL) { 1102 rte_bbdev_log(ERR, "Failed to allocate fcw_ring memory"); 1103 ret = -ENOMEM; 1104 goto free_companion_ring_addr; 1105 } 1106 q->fcw_ring_addr_iova = rte_malloc_virt2iova(q->fcw_ring); 1107 1108 /* For FFT we need to store the FCW separately */ 1109 if (conf->op_type == RTE_BBDEV_OP_FFT) { 1110 for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) { 1111 desc = q->ring_addr + desc_idx; 1112 desc->req.data_ptrs[0].address = q->fcw_ring_addr_iova + 1113 desc_idx * ACC_MAX_FCW_SIZE; 1114 } 1115 } 1116 1117 q->qgrp_id = qg_from_q(q_idx, d->device_variant); 1118 q->vf_id = vf_from_q(q_idx, d->device_variant); 1119 q->aq_id = aq_from_q(q_idx, d->device_variant); 1120 1121 q->aq_depth = 0; 1122 if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC) 1123 q->aq_depth = (1 << d->acc_conf.q_ul_4g.aq_depth_log2); 1124 else if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC) 1125 q->aq_depth = (1 << d->acc_conf.q_dl_4g.aq_depth_log2); 1126 else if (conf->op_type == RTE_BBDEV_OP_LDPC_DEC) 1127 q->aq_depth = (1 << d->acc_conf.q_ul_5g.aq_depth_log2); 1128 else if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC) 1129 q->aq_depth = (1 << d->acc_conf.q_dl_5g.aq_depth_log2); 1130 else if (conf->op_type == RTE_BBDEV_OP_FFT) 1131 q->aq_depth = (1 << d->acc_conf.q_fft.aq_depth_log2); 1132 else if (conf->op_type == RTE_BBDEV_OP_MLDTS) 1133 q->aq_depth = (1 << d->acc_conf.q_mld.aq_depth_log2); 1134 1135 q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base, 1136 d->queue_offset(d->pf_device, q->vf_id, q->qgrp_id, q->aq_id)); 1137 1138 rte_bbdev_log_debug( 1139 "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p base %p", 1140 dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id, 1141 q->aq_id, q->aq_depth, q->mmio_reg_enqueue, 1142 d->mmio_base); 1143 1144 dev->data->queues[queue_id].queue_private = q; 1145 return 0; 1146 1147 free_companion_ring_addr: 1148 rte_free(q->companion_ring_addr); 1149 q->companion_ring_addr = NULL; 1150 free_lb_out: 1151 rte_free(q->lb_out); 1152 q->lb_out = NULL; 1153 free_lb_in: 1154 rte_free(q->lb_in); 1155 q->lb_in = NULL; 1156 free_q: 1157 rte_free(q); 1158 q = NULL; 1159 1160 return ret; 1161 } 1162 1163 /* Stop queue and clear counters. */ 1164 static int 1165 vrb_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) 1166 { 1167 struct acc_queue *q; 1168 1169 q = dev->data->queues[queue_id].queue_private; 1170 rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d", 1171 queue_id, q->sw_ring_head, q->sw_ring_tail, 1172 q->sw_ring_depth, q->op_type); 1173 /* ignore all operations in flight and clear counters */ 1174 q->sw_ring_tail = q->sw_ring_head; 1175 q->aq_enqueued = 0; 1176 q->aq_dequeued = 0; 1177 dev->data->queues[queue_id].queue_stats.enqueued_count = 0; 1178 dev->data->queues[queue_id].queue_stats.dequeued_count = 0; 1179 dev->data->queues[queue_id].queue_stats.enqueue_err_count = 0; 1180 dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0; 1181 dev->data->queues[queue_id].queue_stats.enqueue_warn_count = 0; 1182 dev->data->queues[queue_id].queue_stats.dequeue_warn_count = 0; 1183 dev->data->queues[queue_id].queue_stats.enqueue_depth_avail = 0; 1184 return 0; 1185 } 1186 1187 /* Release queue. */ 1188 static int 1189 vrb_queue_release(struct rte_bbdev *dev, uint16_t q_id) 1190 { 1191 struct acc_device *d = dev->data->dev_private; 1192 struct acc_queue *q = dev->data->queues[q_id].queue_private; 1193 1194 if (q != NULL) { 1195 /* Mark the Queue as un-assigned. */ 1196 d->q_assigned_bit_map[q->qgrp_id] &= (~0ULL - (1 << (uint64_t) q->aq_id)); 1197 rte_free(q->fcw_ring); 1198 rte_free(q->companion_ring_addr); 1199 rte_free(q->lb_in); 1200 rte_free(q->lb_out); 1201 rte_free(q); 1202 dev->data->queues[q_id].queue_private = NULL; 1203 } 1204 1205 return 0; 1206 } 1207 1208 /* Get device info. */ 1209 static void 1210 vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 1211 { 1212 struct acc_device *d = dev->data->dev_private; 1213 int i; 1214 static const struct rte_bbdev_op_cap vrb1_bbdev_capabilities[] = { 1215 { 1216 .type = RTE_BBDEV_OP_TURBO_DEC, 1217 .cap.turbo_dec = { 1218 .capability_flags = 1219 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 1220 RTE_BBDEV_TURBO_CRC_TYPE_24B | 1221 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP | 1222 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN | 1223 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH | 1224 RTE_BBDEV_TURBO_EARLY_TERMINATION | 1225 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 1226 RTE_BBDEV_TURBO_MAP_DEC | 1227 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 1228 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER, 1229 .max_llr_modulus = INT8_MAX, 1230 .num_buffers_src = 1231 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1232 .num_buffers_hard_out = 1233 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1234 .num_buffers_soft_out = 1235 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1236 } 1237 }, 1238 { 1239 .type = RTE_BBDEV_OP_TURBO_ENC, 1240 .cap.turbo_enc = { 1241 .capability_flags = 1242 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 1243 RTE_BBDEV_TURBO_RV_INDEX_BYPASS | 1244 RTE_BBDEV_TURBO_RATE_MATCH | 1245 RTE_BBDEV_TURBO_ENC_SCATTER_GATHER, 1246 .num_buffers_src = 1247 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1248 .num_buffers_dst = 1249 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1250 } 1251 }, 1252 { 1253 .type = RTE_BBDEV_OP_LDPC_ENC, 1254 .cap.ldpc_enc = { 1255 .capability_flags = 1256 RTE_BBDEV_LDPC_RATE_MATCH | 1257 RTE_BBDEV_LDPC_CRC_24B_ATTACH | 1258 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS, 1259 .num_buffers_src = 1260 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1261 .num_buffers_dst = 1262 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1263 } 1264 }, 1265 { 1266 .type = RTE_BBDEV_OP_LDPC_DEC, 1267 .cap.ldpc_dec = { 1268 .capability_flags = 1269 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 1270 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 1271 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 1272 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 1273 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 1274 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 1275 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | 1276 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS | 1277 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER | 1278 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION | 1279 RTE_BBDEV_LDPC_LLR_COMPRESSION, 1280 .llr_size = 8, 1281 .llr_decimals = 1, 1282 .num_buffers_src = 1283 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1284 .num_buffers_hard_out = 1285 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1286 .num_buffers_soft_out = 0, 1287 } 1288 }, 1289 { 1290 .type = RTE_BBDEV_OP_FFT, 1291 .cap.fft = { 1292 .capability_flags = 1293 RTE_BBDEV_FFT_WINDOWING | 1294 RTE_BBDEV_FFT_CS_ADJUSTMENT | 1295 RTE_BBDEV_FFT_DFT_BYPASS | 1296 RTE_BBDEV_FFT_IDFT_BYPASS | 1297 RTE_BBDEV_FFT_WINDOWING_BYPASS, 1298 .num_buffers_src = 1, 1299 .num_buffers_dst = 1, 1300 .fft_windows_num = ACC_MAX_FFT_WIN, 1301 } 1302 }, 1303 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 1304 }; 1305 1306 static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = { 1307 { 1308 .type = RTE_BBDEV_OP_TURBO_DEC, 1309 .cap.turbo_dec = { 1310 .capability_flags = 1311 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 1312 RTE_BBDEV_TURBO_CRC_TYPE_24B | 1313 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP | 1314 RTE_BBDEV_TURBO_EQUALIZER | 1315 RTE_BBDEV_TURBO_SOFT_OUT_SATURATE | 1316 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN | 1317 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH | 1318 RTE_BBDEV_TURBO_SOFT_OUTPUT | 1319 RTE_BBDEV_TURBO_EARLY_TERMINATION | 1320 RTE_BBDEV_TURBO_DEC_INTERRUPTS | 1321 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 1322 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT | 1323 RTE_BBDEV_TURBO_MAP_DEC | 1324 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 1325 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER, 1326 .max_llr_modulus = INT8_MAX, 1327 .num_buffers_src = 1328 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1329 .num_buffers_hard_out = 1330 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1331 .num_buffers_soft_out = 1332 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1333 } 1334 }, 1335 { 1336 .type = RTE_BBDEV_OP_TURBO_ENC, 1337 .cap.turbo_enc = { 1338 .capability_flags = 1339 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 1340 RTE_BBDEV_TURBO_RV_INDEX_BYPASS | 1341 RTE_BBDEV_TURBO_RATE_MATCH | 1342 RTE_BBDEV_TURBO_ENC_INTERRUPTS | 1343 RTE_BBDEV_TURBO_ENC_SCATTER_GATHER, 1344 .num_buffers_src = 1345 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1346 .num_buffers_dst = 1347 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1348 } 1349 }, 1350 { 1351 .type = RTE_BBDEV_OP_LDPC_ENC, 1352 .cap.ldpc_enc = { 1353 .capability_flags = 1354 RTE_BBDEV_LDPC_RATE_MATCH | 1355 RTE_BBDEV_LDPC_CRC_24B_ATTACH | 1356 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS | 1357 RTE_BBDEV_LDPC_ENC_INTERRUPTS | 1358 RTE_BBDEV_LDPC_ENC_SCATTER_GATHER | 1359 RTE_BBDEV_LDPC_ENC_CONCATENATION, 1360 .num_buffers_src = 1361 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1362 .num_buffers_dst = 1363 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1364 } 1365 }, 1366 { 1367 .type = RTE_BBDEV_OP_LDPC_DEC, 1368 .cap.ldpc_dec = { 1369 .capability_flags = 1370 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 1371 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 1372 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 1373 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 1374 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 1375 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 1376 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | 1377 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS | 1378 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER | 1379 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION | 1380 RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION | 1381 RTE_BBDEV_LDPC_LLR_COMPRESSION | 1382 RTE_BBDEV_LDPC_SOFT_OUT_ENABLE | 1383 RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS | 1384 RTE_BBDEV_LDPC_DEC_INTERRUPTS, 1385 .llr_size = 8, 1386 .llr_decimals = 1, 1387 .num_buffers_src = 1388 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1389 .num_buffers_hard_out = 1390 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1391 .num_buffers_soft_out = 0, 1392 } 1393 }, 1394 { 1395 .type = RTE_BBDEV_OP_FFT, 1396 .cap.fft = { 1397 .capability_flags = 1398 RTE_BBDEV_FFT_WINDOWING | 1399 RTE_BBDEV_FFT_CS_ADJUSTMENT | 1400 RTE_BBDEV_FFT_DFT_BYPASS | 1401 RTE_BBDEV_FFT_IDFT_BYPASS | 1402 RTE_BBDEV_FFT_FP16_INPUT | 1403 RTE_BBDEV_FFT_FP16_OUTPUT | 1404 RTE_BBDEV_FFT_POWER_MEAS | 1405 RTE_BBDEV_FFT_WINDOWING_BYPASS | 1406 RTE_BBDEV_FFT_TIMING_OFFSET_PER_CS | 1407 RTE_BBDEV_FFT_TIMING_ERROR | 1408 RTE_BBDEV_FFT_DEWINDOWING | 1409 RTE_BBDEV_FFT_FREQ_RESAMPLING, 1410 .num_buffers_src = 1, 1411 .num_buffers_dst = 1, 1412 .fft_windows_num = ACC_MAX_FFT_WIN, 1413 } 1414 }, 1415 { 1416 .type = RTE_BBDEV_OP_MLDTS, 1417 .cap.mld = { 1418 .capability_flags = 1419 RTE_BBDEV_MLDTS_REP, 1420 .num_buffers_src = 1421 1, 1422 .num_buffers_dst = 1423 1, 1424 } 1425 }, 1426 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 1427 }; 1428 1429 static struct rte_bbdev_queue_conf default_queue_conf; 1430 default_queue_conf.socket = dev->data->socket_id; 1431 default_queue_conf.queue_size = ACC_MAX_QUEUE_DEPTH; 1432 1433 dev_info->driver_name = dev->device->driver->name; 1434 1435 /* Read and save the populated config from registers. */ 1436 fetch_acc_config(dev); 1437 /* Check the status of device. */ 1438 dev_info->device_status = vrb_device_status(dev); 1439 dev_info->fft_window_width = d->fft_window_width; 1440 1441 /* Exposed number of queues. */ 1442 dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0; 1443 dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups * 1444 d->acc_conf.q_ul_4g.num_qgroups; 1445 dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups * 1446 d->acc_conf.q_dl_4g.num_qgroups; 1447 dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups * 1448 d->acc_conf.q_ul_5g.num_qgroups; 1449 dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups * 1450 d->acc_conf.q_dl_5g.num_qgroups; 1451 dev_info->num_queues[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_aqs_per_groups * 1452 d->acc_conf.q_fft.num_qgroups; 1453 dev_info->num_queues[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_aqs_per_groups * 1454 d->acc_conf.q_mld.num_qgroups; 1455 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups; 1456 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups; 1457 dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups; 1458 dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups; 1459 dev_info->queue_priority[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_qgroups; 1460 dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_qgroups; 1461 dev_info->max_num_queues = 0; 1462 for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) { 1463 if (unlikely(dev_info->num_queues[i] > VRB2_MAX_Q_PER_OP)) { 1464 rte_bbdev_log(ERR, "Unexpected number of queues %d exposed for op %d", 1465 dev_info->num_queues[i], i); 1466 dev_info->num_queues[i] = VRB2_MAX_Q_PER_OP; 1467 } 1468 dev_info->max_num_queues += dev_info->num_queues[i]; 1469 } 1470 dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH; 1471 dev_info->hardware_accelerated = true; 1472 dev_info->max_dl_queue_priority = 1473 d->acc_conf.q_dl_4g.num_qgroups - 1; 1474 dev_info->max_ul_queue_priority = 1475 d->acc_conf.q_ul_4g.num_qgroups - 1; 1476 dev_info->default_queue_conf = default_queue_conf; 1477 dev_info->cpu_flag_reqs = NULL; 1478 dev_info->min_alignment = 1; 1479 if (d->device_variant == VRB1_VARIANT) 1480 dev_info->capabilities = vrb1_bbdev_capabilities; 1481 else 1482 dev_info->capabilities = vrb2_bbdev_capabilities; 1483 dev_info->harq_buffer_size = 0; 1484 1485 vrb_check_ir(d); 1486 } 1487 1488 static int 1489 vrb_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id) 1490 { 1491 struct acc_queue *q = dev->data->queues[queue_id].queue_private; 1492 1493 if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI && 1494 rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSIX) 1495 return -ENOTSUP; 1496 1497 q->irq_enable = 1; 1498 return 0; 1499 } 1500 1501 static int 1502 vrb_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id) 1503 { 1504 struct acc_queue *q = dev->data->queues[queue_id].queue_private; 1505 1506 if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI && 1507 rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSIX) 1508 return -ENOTSUP; 1509 1510 q->irq_enable = 0; 1511 return 0; 1512 } 1513 1514 static int 1515 vrb_queue_ops_dump(struct rte_bbdev *dev, uint16_t queue_id, FILE *f) 1516 { 1517 struct acc_queue *q = dev->data->queues[queue_id].queue_private; 1518 struct rte_bbdev_dec_op *op; 1519 uint16_t i, int_nb; 1520 volatile union acc_info_ring_data *ring_data; 1521 uint16_t info_ring_head = q->d->info_ring_head; 1522 static char str[1024]; 1523 1524 if (f == NULL) { 1525 rte_bbdev_log(ERR, "Invalid File input"); 1526 return -EINVAL; 1527 } 1528 1529 /** Print generic information on queue status. */ 1530 fprintf(f, "Dump of operations %s on Queue %d by %s\n", 1531 rte_bbdev_op_type_str(q->op_type), queue_id, dev->device->driver->name); 1532 fprintf(f, " AQ Enqueued %d Dequeued %d Depth %d - Available Enq %d Deq %d\n", 1533 q->aq_enqueued, q->aq_dequeued, q->aq_depth, 1534 acc_ring_avail_enq(q), acc_ring_avail_deq(q)); 1535 1536 /** Print information captured in the info ring. */ 1537 if (q->d->info_ring != NULL) { 1538 fprintf(f, "Info Ring Buffer - Head %d\n", q->d->info_ring_head); 1539 ring_data = q->d->info_ring + (q->d->info_ring_head & ACC_INFO_RING_MASK); 1540 while (ring_data->valid) { 1541 int_nb = int_from_ring(*ring_data, q->d->device_variant); 1542 if ((int_nb < ACC_PF_INT_DMA_DL_DESC_IRQ) || ( 1543 int_nb > ACC_PF_INT_DMA_MLD_DESC_IRQ)) { 1544 fprintf(f, " InfoRing: ITR:%d Info:0x%x", 1545 int_nb, ring_data->detailed_info); 1546 /* Initialize Info Ring entry and move forward. */ 1547 ring_data->valid = 0; 1548 } 1549 info_ring_head++; 1550 ring_data = q->d->info_ring + (info_ring_head & ACC_INFO_RING_MASK); 1551 } 1552 } 1553 1554 fprintf(f, "Ring Content - Head %d Tail %d Depth %d\n", 1555 q->sw_ring_head, q->sw_ring_tail, q->sw_ring_depth); 1556 /** Print information about each operation in the software ring. */ 1557 for (i = 0; i < q->sw_ring_depth; ++i) { 1558 op = (q->ring_addr + i)->req.op_addr; 1559 if (op != NULL) 1560 fprintf(f, " %d\tn %d %s", i, (q->ring_addr + i)->req.numCBs, 1561 rte_bbdev_ops_param_string(op, q->op_type, 1562 str, sizeof(str))); 1563 } 1564 1565 fprintf(f, "== End of File ==\n"); 1566 1567 return 0; 1568 } 1569 1570 static const struct rte_bbdev_ops vrb_bbdev_ops = { 1571 .setup_queues = vrb_setup_queues, 1572 .intr_enable = vrb_intr_enable, 1573 .close = vrb_dev_close, 1574 .info_get = vrb_dev_info_get, 1575 .queue_setup = vrb_queue_setup, 1576 .queue_release = vrb_queue_release, 1577 .queue_stop = vrb_queue_stop, 1578 .queue_intr_enable = vrb_queue_intr_enable, 1579 .queue_intr_disable = vrb_queue_intr_disable, 1580 .queue_ops_dump = vrb_queue_ops_dump 1581 }; 1582 1583 /* PCI PF address map. */ 1584 static struct rte_pci_id pci_id_vrb_pf_map[] = { 1585 { 1586 RTE_PCI_DEVICE(RTE_VRB1_VENDOR_ID, RTE_VRB1_PF_DEVICE_ID) 1587 }, 1588 { 1589 RTE_PCI_DEVICE(RTE_VRB2_VENDOR_ID, RTE_VRB2_PF_DEVICE_ID) 1590 }, 1591 {.device_id = 0}, 1592 }; 1593 1594 /* PCI VF address map. */ 1595 static struct rte_pci_id pci_id_vrb_vf_map[] = { 1596 { 1597 RTE_PCI_DEVICE(RTE_VRB1_VENDOR_ID, RTE_VRB1_VF_DEVICE_ID) 1598 }, 1599 { 1600 RTE_PCI_DEVICE(RTE_VRB2_VENDOR_ID, RTE_VRB2_VF_DEVICE_ID) 1601 }, 1602 {.device_id = 0}, 1603 }; 1604 1605 /* Fill in a frame control word for turbo decoding. */ 1606 static inline void 1607 vrb_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw) 1608 { 1609 fcw->fcw_ver = 1; 1610 fcw->num_maps = ACC_FCW_TD_AUTOMAP; 1611 fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags, 1612 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE); 1613 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1614 fcw->c = op->turbo_dec.tb_params.c; 1615 fcw->k_pos = op->turbo_dec.tb_params.k_pos; 1616 } else { 1617 fcw->c = 1; 1618 fcw->k_pos = op->turbo_dec.cb_params.k; 1619 } 1620 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 1621 fcw->soft_output_en = 1; 1622 fcw->sw_soft_out_dis = 0; 1623 fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags, 1624 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH); 1625 fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags, 1626 RTE_BBDEV_TURBO_SOFT_OUT_SATURATE); 1627 if (check_bit(op->turbo_dec.op_flags, 1628 RTE_BBDEV_TURBO_EQUALIZER)) { 1629 fcw->bypass_teq = 0; 1630 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1631 fcw->cab = op->turbo_dec.tb_params.cab; 1632 fcw->ea = op->turbo_dec.tb_params.ea; 1633 fcw->eb = op->turbo_dec.tb_params.eb; 1634 } else { 1635 fcw->ea = op->turbo_dec.cb_params.e; 1636 fcw->eb = op->turbo_dec.cb_params.e; 1637 } 1638 1639 if (op->turbo_dec.rv_index == 0) 1640 fcw->k0_start_col = ACC_FCW_TD_RVIDX_0; 1641 else if (op->turbo_dec.rv_index == 1) 1642 fcw->k0_start_col = ACC_FCW_TD_RVIDX_1; 1643 else if (op->turbo_dec.rv_index == 2) 1644 fcw->k0_start_col = ACC_FCW_TD_RVIDX_2; 1645 else 1646 fcw->k0_start_col = ACC_FCW_TD_RVIDX_3; 1647 } else { 1648 fcw->bypass_teq = 1; 1649 fcw->eb = 64; /* avoid undefined value */ 1650 } 1651 } else { 1652 fcw->soft_output_en = 0; 1653 fcw->sw_soft_out_dis = 1; 1654 fcw->bypass_teq = 0; 1655 } 1656 1657 fcw->code_block_mode = 1; 1658 fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags, 1659 RTE_BBDEV_TURBO_CRC_TYPE_24B); 1660 1661 fcw->ext_td_cold_reg_en = 1; 1662 fcw->raw_decoder_input_on = 0; 1663 fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2); 1664 fcw->min_iter = 2; 1665 fcw->half_iter_on = check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_HALF_ITERATION_EVEN); 1666 1667 fcw->early_stop_en = check_bit(op->turbo_dec.op_flags, 1668 RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en; 1669 fcw->ext_scale = 0xF; 1670 } 1671 1672 /* Fill in a frame control word for LDPC decoding. */ 1673 static inline void 1674 vrb_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw, 1675 union acc_harq_layout_data *harq_layout, uint16_t device_variant) 1676 { 1677 uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset; 1678 uint32_t harq_index; 1679 uint32_t l; 1680 1681 fcw->qm = op->ldpc_dec.q_m; 1682 fcw->nfiller = op->ldpc_dec.n_filler; 1683 fcw->BG = (op->ldpc_dec.basegraph - 1); 1684 fcw->Zc = op->ldpc_dec.z_c; 1685 fcw->ncb = op->ldpc_dec.n_cb; 1686 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph, 1687 op->ldpc_dec.rv_index, op->ldpc_dec.k0); 1688 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) 1689 fcw->rm_e = op->ldpc_dec.cb_params.e; 1690 else 1691 fcw->rm_e = (op->ldpc_dec.tb_params.r < 1692 op->ldpc_dec.tb_params.cab) ? 1693 op->ldpc_dec.tb_params.ea : 1694 op->ldpc_dec.tb_params.eb; 1695 1696 if (unlikely(check_bit(op->ldpc_dec.op_flags, 1697 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) && 1698 (op->ldpc_dec.harq_combined_input.length == 0))) { 1699 rte_bbdev_log(WARNING, "Null HARQ input size provided"); 1700 /* Disable HARQ input in that case to carry forward. */ 1701 op->ldpc_dec.op_flags ^= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 1702 } 1703 if (unlikely(fcw->rm_e == 0)) { 1704 rte_bbdev_log(WARNING, "Null E input provided"); 1705 fcw->rm_e = 2; 1706 } 1707 1708 fcw->hcin_en = check_bit(op->ldpc_dec.op_flags, 1709 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1710 fcw->hcout_en = check_bit(op->ldpc_dec.op_flags, 1711 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 1712 fcw->crc_select = check_bit(op->ldpc_dec.op_flags, 1713 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK); 1714 fcw->bypass_dec = 0; 1715 fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags, 1716 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS); 1717 if (op->ldpc_dec.q_m == 1) { 1718 fcw->bypass_intlv = 1; 1719 fcw->qm = 2; 1720 } 1721 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) { 1722 fcw->hcin_decomp_mode = 1; 1723 fcw->hcout_comp_mode = 1; 1724 } else if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) { 1725 fcw->hcin_decomp_mode = 4; 1726 fcw->hcout_comp_mode = 4; 1727 } else { 1728 fcw->hcin_decomp_mode = 0; 1729 fcw->hcout_comp_mode = 0; 1730 } 1731 1732 fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags, 1733 RTE_BBDEV_LDPC_LLR_COMPRESSION); 1734 harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset); 1735 if (fcw->hcin_en > 0) { 1736 harq_in_length = op->ldpc_dec.harq_combined_input.length; 1737 if (fcw->hcin_decomp_mode == 1) 1738 harq_in_length = harq_in_length * 8 / 6; 1739 else if (fcw->hcin_decomp_mode == 4) 1740 harq_in_length = harq_in_length * 2; 1741 harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb 1742 - op->ldpc_dec.n_filler); 1743 harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64); 1744 fcw->hcin_size0 = harq_in_length; 1745 fcw->hcin_offset = 0; 1746 fcw->hcin_size1 = 0; 1747 } else { 1748 fcw->hcin_size0 = 0; 1749 fcw->hcin_offset = 0; 1750 fcw->hcin_size1 = 0; 1751 } 1752 1753 fcw->itmax = op->ldpc_dec.iter_max; 1754 fcw->itstop = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1755 fcw->cnu_algo = ACC_ALGO_MSA; 1756 fcw->synd_precoder = fcw->itstop; 1757 1758 if (device_variant != VRB1_VARIANT) { 1759 fcw->so_it = op->ldpc_dec.iter_max; 1760 fcw->so_en = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE); 1761 fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags, 1762 RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS); 1763 fcw->so_bypass_rm = 0; 1764 fcw->minsum_offset = 0; 1765 fcw->dec_llrclip = 0; 1766 } 1767 1768 /* 1769 * These are all implicitly set: 1770 * fcw->synd_post = 0; 1771 * fcw->dec_convllr = 0; 1772 * fcw->hcout_convllr = 0; 1773 * fcw->hcout_size1 = 0; 1774 * fcw->so_it = 0; 1775 * fcw->hcout_offset = 0; 1776 * fcw->negstop_th = 0; 1777 * fcw->negstop_it = 0; 1778 * fcw->negstop_en = 0; 1779 * fcw->gain_i = 1; 1780 * fcw->gain_h = 1; 1781 */ 1782 if (fcw->hcout_en > 0) { 1783 parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8) 1784 * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 1785 k0_p = (fcw->k0 > parity_offset) ? fcw->k0 - op->ldpc_dec.n_filler : fcw->k0; 1786 ncb_p = fcw->ncb - op->ldpc_dec.n_filler; 1787 l = k0_p + fcw->rm_e; 1788 harq_out_length = (uint16_t) fcw->hcin_size0; 1789 harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p); 1790 harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64); 1791 fcw->hcout_size0 = harq_out_length; 1792 fcw->hcout_size1 = 0; 1793 fcw->hcout_offset = 0; 1794 harq_layout[harq_index].offset = fcw->hcout_offset; 1795 harq_layout[harq_index].size0 = fcw->hcout_size0; 1796 } else { 1797 fcw->hcout_size0 = 0; 1798 fcw->hcout_size1 = 0; 1799 fcw->hcout_offset = 0; 1800 } 1801 1802 /* Force saturation to 6 bits LLR. */ 1803 fcw->saturate_input = 1; 1804 1805 fcw->tb_crc_select = 0; 1806 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) 1807 fcw->tb_crc_select = 2; 1808 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) 1809 fcw->tb_crc_select = 1; 1810 } 1811 1812 static inline int 1813 vrb_dma_desc_td_fill(struct rte_bbdev_dec_op *op, 1814 struct acc_dma_req_desc *desc, struct rte_mbuf **input, 1815 struct rte_mbuf *h_output, struct rte_mbuf *s_output, 1816 uint32_t *in_offset, uint32_t *h_out_offset, 1817 uint32_t *s_out_offset, uint32_t *h_out_length, 1818 uint32_t *s_out_length, uint32_t *mbuf_total_left, 1819 uint32_t *seg_total_left, uint8_t r) 1820 { 1821 int next_triplet = 1; /* FCW already done. */ 1822 uint16_t k; 1823 uint16_t crc24_overlap = 0; 1824 uint32_t e, kw; 1825 1826 desc->word0 = ACC_DMA_DESC_TYPE; 1827 desc->word1 = 0; /**< Timestamp could be disabled. */ 1828 desc->word2 = 0; 1829 desc->word3 = 0; 1830 desc->numCBs = 1; 1831 1832 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1833 k = op->turbo_dec.tb_params.k_pos; 1834 e = (r < op->turbo_dec.tb_params.cab) 1835 ? op->turbo_dec.tb_params.ea 1836 : op->turbo_dec.tb_params.eb; 1837 } else { 1838 k = op->turbo_dec.cb_params.k; 1839 e = op->turbo_dec.cb_params.e; 1840 } 1841 1842 if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1843 && !check_bit(op->turbo_dec.op_flags, 1844 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1845 crc24_overlap = 24; 1846 if ((op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) 1847 && check_bit(op->turbo_dec.op_flags, 1848 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP)) 1849 crc24_overlap = 24; 1850 1851 /* Calculates circular buffer size. 1852 * According to 3gpp 36.212 section 5.1.4.2 1853 * Kw = 3 * Kpi, 1854 * where: 1855 * Kpi = nCol * nRow 1856 * where nCol is 32 and nRow can be calculated from: 1857 * D =< nCol * nRow 1858 * where D is the size of each output from turbo encoder block (k + 4). 1859 */ 1860 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; 1861 1862 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) { 1863 rte_bbdev_log(ERR, 1864 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1865 *mbuf_total_left, kw); 1866 return -1; 1867 } 1868 1869 next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw, 1870 seg_total_left, next_triplet, 1871 check_bit(op->turbo_dec.op_flags, 1872 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER)); 1873 if (unlikely(next_triplet < 0)) { 1874 rte_bbdev_log(ERR, 1875 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1876 op); 1877 return -1; 1878 } 1879 desc->data_ptrs[next_triplet - 1].last = 1; 1880 desc->m2dlen = next_triplet; 1881 *mbuf_total_left -= kw; 1882 *h_out_length = ((k - crc24_overlap) >> 3); 1883 next_triplet = acc_dma_fill_blk_type( 1884 desc, h_output, *h_out_offset, 1885 *h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD); 1886 if (unlikely(next_triplet < 0)) { 1887 rte_bbdev_log(ERR, 1888 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1889 op); 1890 return -1; 1891 } 1892 1893 op->turbo_dec.hard_output.length += *h_out_length; 1894 *h_out_offset += *h_out_length; 1895 1896 /* Soft output. */ 1897 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 1898 if (op->turbo_dec.soft_output.data == 0) { 1899 rte_bbdev_log(ERR, "Soft output is not defined"); 1900 return -1; 1901 } 1902 if (check_bit(op->turbo_dec.op_flags, 1903 RTE_BBDEV_TURBO_EQUALIZER)) 1904 *s_out_length = e; 1905 else 1906 *s_out_length = (k * 3) + 12; 1907 1908 next_triplet = acc_dma_fill_blk_type(desc, s_output, 1909 *s_out_offset, *s_out_length, next_triplet, 1910 ACC_DMA_BLKID_OUT_SOFT); 1911 if (unlikely(next_triplet < 0)) { 1912 rte_bbdev_log(ERR, 1913 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1914 op); 1915 return -1; 1916 } 1917 1918 op->turbo_dec.soft_output.length += *s_out_length; 1919 *s_out_offset += *s_out_length; 1920 } 1921 1922 desc->data_ptrs[next_triplet - 1].last = 1; 1923 desc->d2mlen = next_triplet - desc->m2dlen; 1924 1925 desc->op_addr = op; 1926 1927 return 0; 1928 } 1929 1930 static inline int 1931 vrb_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, 1932 struct acc_dma_req_desc *desc, 1933 struct rte_mbuf **input, struct rte_mbuf *h_output, 1934 uint32_t *in_offset, uint32_t *h_out_offset, 1935 uint32_t *h_out_length, uint32_t *mbuf_total_left, 1936 uint32_t *seg_total_left, struct acc_fcw_ld *fcw, uint16_t device_variant) 1937 { 1938 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1939 int next_triplet = 1; /* FCW already done. */ 1940 uint32_t input_length; 1941 uint16_t output_length, crc24_overlap = 0; 1942 uint16_t sys_cols, K, h_p_size, h_np_size; 1943 1944 if (device_variant == VRB1_VARIANT) { 1945 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION) || 1946 check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) { 1947 rte_bbdev_log(ERR, 1948 "VRB1 does not support the requested capabilities %x", 1949 op->ldpc_dec.op_flags); 1950 return -1; 1951 } 1952 } 1953 1954 acc_header_init(desc); 1955 1956 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1957 crc24_overlap = 24; 1958 1959 /* Compute some LDPC BG lengths. */ 1960 input_length = fcw->rm_e; 1961 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_LLR_COMPRESSION)) 1962 input_length = (input_length * 3 + 3) / 4; 1963 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1964 K = sys_cols * dec->z_c; 1965 output_length = K - dec->n_filler - crc24_overlap; 1966 1967 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < input_length))) { 1968 rte_bbdev_log(ERR, 1969 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1970 *mbuf_total_left, input_length); 1971 return -1; 1972 } 1973 1974 next_triplet = acc_dma_fill_blk_type_in(desc, input, 1975 in_offset, input_length, 1976 seg_total_left, next_triplet, 1977 check_bit(op->ldpc_dec.op_flags, 1978 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)); 1979 1980 if (unlikely(next_triplet < 0)) { 1981 rte_bbdev_log(ERR, 1982 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1983 op); 1984 return -1; 1985 } 1986 1987 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1988 if (op->ldpc_dec.harq_combined_input.data == 0) { 1989 rte_bbdev_log(ERR, "HARQ input is not defined"); 1990 return -1; 1991 } 1992 h_p_size = fcw->hcin_size0 + fcw->hcin_size1; 1993 if (fcw->hcin_decomp_mode == 1) 1994 h_p_size = (h_p_size * 3 + 3) / 4; 1995 else if (fcw->hcin_decomp_mode == 4) 1996 h_p_size = h_p_size / 2; 1997 if (op->ldpc_dec.harq_combined_input.data == 0) { 1998 rte_bbdev_log(ERR, "HARQ input is not defined"); 1999 return -1; 2000 } 2001 acc_dma_fill_blk_type( 2002 desc, 2003 op->ldpc_dec.harq_combined_input.data, 2004 op->ldpc_dec.harq_combined_input.offset, 2005 h_p_size, 2006 next_triplet, 2007 ACC_DMA_BLKID_IN_HARQ); 2008 next_triplet++; 2009 } 2010 2011 desc->data_ptrs[next_triplet - 1].last = 1; 2012 desc->m2dlen = next_triplet; 2013 *mbuf_total_left -= input_length; 2014 2015 next_triplet = acc_dma_fill_blk_type(desc, h_output, 2016 *h_out_offset, output_length >> 3, next_triplet, 2017 ACC_DMA_BLKID_OUT_HARD); 2018 2019 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) { 2020 if (op->ldpc_dec.soft_output.data == 0) { 2021 rte_bbdev_log(ERR, "Soft output is not defined"); 2022 return -1; 2023 } 2024 dec->soft_output.length = fcw->rm_e; 2025 acc_dma_fill_blk_type(desc, dec->soft_output.data, dec->soft_output.offset, 2026 fcw->rm_e, next_triplet, ACC_DMA_BLKID_OUT_SOFT); 2027 next_triplet++; 2028 } 2029 2030 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 2031 if (op->ldpc_dec.harq_combined_output.data == 0) { 2032 rte_bbdev_log(ERR, "HARQ output is not defined"); 2033 return -1; 2034 } 2035 2036 /* Pruned size of the HARQ. */ 2037 h_p_size = fcw->hcout_size0 + fcw->hcout_size1; 2038 /* Non-Pruned size of the HARQ. */ 2039 h_np_size = fcw->hcout_offset > 0 ? 2040 fcw->hcout_offset + fcw->hcout_size1 : 2041 h_p_size; 2042 if (fcw->hcin_decomp_mode == 1) { 2043 h_np_size = (h_np_size * 3 + 3) / 4; 2044 h_p_size = (h_p_size * 3 + 3) / 4; 2045 } else if (fcw->hcin_decomp_mode == 4) { 2046 h_np_size = h_np_size / 2; 2047 h_p_size = h_p_size / 2; 2048 } 2049 dec->harq_combined_output.length = h_np_size; 2050 acc_dma_fill_blk_type( 2051 desc, 2052 dec->harq_combined_output.data, 2053 dec->harq_combined_output.offset, 2054 h_p_size, 2055 next_triplet, 2056 ACC_DMA_BLKID_OUT_HARQ); 2057 2058 next_triplet++; 2059 } 2060 2061 *h_out_length = output_length >> 3; 2062 dec->hard_output.length += *h_out_length; 2063 *h_out_offset += *h_out_length; 2064 desc->data_ptrs[next_triplet - 1].last = 1; 2065 desc->d2mlen = next_triplet - desc->m2dlen; 2066 2067 desc->op_addr = op; 2068 2069 return 0; 2070 } 2071 2072 static inline void 2073 vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op, 2074 struct acc_dma_req_desc *desc, 2075 struct rte_mbuf *input, struct rte_mbuf *h_output, 2076 uint32_t *in_offset, uint32_t *h_out_offset, 2077 uint32_t *h_out_length, 2078 union acc_harq_layout_data *harq_layout) 2079 { 2080 int next_triplet = 1; /* FCW already done. */ 2081 desc->data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset(input, *in_offset); 2082 next_triplet++; 2083 2084 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 2085 struct rte_bbdev_op_data hi = op->ldpc_dec.harq_combined_input; 2086 desc->data_ptrs[next_triplet].address = 2087 rte_pktmbuf_iova_offset(hi.data, hi.offset); 2088 next_triplet++; 2089 } 2090 2091 desc->data_ptrs[next_triplet].address = 2092 rte_pktmbuf_iova_offset(h_output, *h_out_offset); 2093 *h_out_length = desc->data_ptrs[next_triplet].blen; 2094 next_triplet++; 2095 2096 if (check_bit(op->ldpc_dec.op_flags, 2097 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 2098 /* Adjust based on previous operation. */ 2099 struct rte_bbdev_dec_op *prev_op = desc->op_addr; 2100 op->ldpc_dec.harq_combined_output.length = 2101 prev_op->ldpc_dec.harq_combined_output.length; 2102 uint32_t harq_idx = hq_index(op->ldpc_dec.harq_combined_output.offset); 2103 uint32_t prev_harq_idx = hq_index(prev_op->ldpc_dec.harq_combined_output.offset); 2104 harq_layout[harq_idx].val = harq_layout[prev_harq_idx].val; 2105 struct rte_bbdev_op_data ho = op->ldpc_dec.harq_combined_output; 2106 desc->data_ptrs[next_triplet].address = 2107 rte_pktmbuf_iova_offset(ho.data, ho.offset); 2108 next_triplet++; 2109 } 2110 2111 op->ldpc_dec.hard_output.length += *h_out_length; 2112 desc->op_addr = op; 2113 } 2114 2115 /* Enqueue one encode operations for device in CB mode. */ 2116 static inline int 2117 enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2118 uint16_t total_enqueued_cbs) 2119 { 2120 union acc_dma_desc *desc = NULL; 2121 int ret; 2122 uint32_t in_offset, out_offset, out_length, mbuf_total_left, seg_total_left; 2123 struct rte_mbuf *input, *output_head, *output; 2124 2125 desc = acc_desc(q, total_enqueued_cbs); 2126 acc_fcw_te_fill(op, &desc->req.fcw_te); 2127 2128 input = op->turbo_enc.input.data; 2129 output_head = output = op->turbo_enc.output.data; 2130 in_offset = op->turbo_enc.input.offset; 2131 out_offset = op->turbo_enc.output.offset; 2132 out_length = 0; 2133 mbuf_total_left = op->turbo_enc.input.length; 2134 seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data) - in_offset; 2135 2136 ret = acc_dma_desc_te_fill(op, &desc->req, &input, output, 2137 &in_offset, &out_offset, &out_length, &mbuf_total_left, 2138 &seg_total_left, 0); 2139 2140 if (unlikely(ret < 0)) 2141 return ret; 2142 2143 mbuf_append(output_head, output, out_length); 2144 2145 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2146 rte_memdump(stderr, "FCW", &desc->req.fcw_te, 2147 sizeof(desc->req.fcw_te) - 8); 2148 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2149 #endif 2150 /* One CB (one op) was successfully prepared to enqueue */ 2151 return 1; 2152 } 2153 2154 /* Enqueue one encode operations for device in CB mode 2155 * multiplexed on the same descriptor. 2156 */ 2157 static inline int 2158 enqueue_ldpc_enc_n_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ops, 2159 uint16_t total_enqueued_descs, int16_t num) 2160 { 2161 union acc_dma_desc *desc = NULL; 2162 uint32_t out_length; 2163 struct rte_mbuf *output_head, *output; 2164 int i, next_triplet; 2165 uint16_t in_length_in_bytes; 2166 struct rte_bbdev_op_ldpc_enc *enc = &ops[0]->ldpc_enc; 2167 struct acc_ptrs *context_ptrs; 2168 2169 desc = acc_desc(q, total_enqueued_descs); 2170 acc_fcw_le_fill(ops[0], &desc->req.fcw_le, num, 0); 2171 2172 /** This could be done at polling. */ 2173 acc_header_init(&desc->req); 2174 desc->req.numCBs = num; 2175 desc->req.dltb = 0; 2176 2177 in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len; 2178 out_length = (enc->cb_params.e + 7) >> 3; 2179 desc->req.m2dlen = 1 + num; 2180 desc->req.d2mlen = num; 2181 next_triplet = 1; 2182 2183 for (i = 0; i < num; i++) { 2184 desc->req.data_ptrs[next_triplet].address = 2185 rte_pktmbuf_iova_offset(ops[i]->ldpc_enc.input.data, 0); 2186 desc->req.data_ptrs[next_triplet].blen = in_length_in_bytes; 2187 next_triplet++; 2188 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2189 ops[i]->ldpc_enc.output.data, 0); 2190 desc->req.data_ptrs[next_triplet].blen = out_length; 2191 next_triplet++; 2192 ops[i]->ldpc_enc.output.length = out_length; 2193 output_head = output = ops[i]->ldpc_enc.output.data; 2194 mbuf_append(output_head, output, out_length); 2195 output->data_len = out_length; 2196 } 2197 2198 desc->req.op_addr = ops[0]; 2199 /* Keep track of pointers even when multiplexed in single descriptor. */ 2200 context_ptrs = q->companion_ring_addr + acc_desc_idx(q, total_enqueued_descs); 2201 for (i = 0; i < num; i++) 2202 context_ptrs->ptr[i].op_addr = ops[i]; 2203 2204 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2205 rte_memdump(stderr, "FCW", &desc->req.fcw_le, 2206 sizeof(desc->req.fcw_le) - 8); 2207 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2208 #endif 2209 2210 /* Number of compatible CBs/ops successfully prepared to enqueue. */ 2211 return num; 2212 } 2213 2214 /* Enqueue one encode operations for VRB1 device for a partial TB 2215 * all codes blocks have same configuration multiplexed on the same descriptor. 2216 */ 2217 static inline void 2218 vrb1_enqueue_ldpc_enc_part_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2219 uint16_t total_enqueued_descs, int16_t num_cbs, uint32_t e, 2220 uint16_t in_len_B, uint32_t out_len_B, uint32_t *in_offset, 2221 uint32_t *out_offset) 2222 { 2223 2224 union acc_dma_desc *desc = NULL; 2225 struct rte_mbuf *output_head, *output; 2226 int i, next_triplet; 2227 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 2228 2229 desc = acc_desc(q, total_enqueued_descs); 2230 acc_fcw_le_fill(op, &desc->req.fcw_le, num_cbs, e); 2231 2232 /** This could be done at polling. */ 2233 acc_header_init(&desc->req); 2234 desc->req.numCBs = num_cbs; 2235 2236 desc->req.m2dlen = 1 + num_cbs; 2237 desc->req.d2mlen = num_cbs; 2238 next_triplet = 1; 2239 2240 for (i = 0; i < num_cbs; i++) { 2241 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2242 enc->input.data, *in_offset); 2243 *in_offset += in_len_B; 2244 desc->req.data_ptrs[next_triplet].blen = in_len_B; 2245 next_triplet++; 2246 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2247 enc->output.data, *out_offset); 2248 *out_offset += out_len_B; 2249 desc->req.data_ptrs[next_triplet].blen = out_len_B; 2250 next_triplet++; 2251 enc->output.length += out_len_B; 2252 output_head = output = enc->output.data; 2253 mbuf_append(output_head, output, out_len_B); 2254 } 2255 2256 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2257 rte_memdump(stderr, "FCW", &desc->req.fcw_le, 2258 sizeof(desc->req.fcw_le) - 8); 2259 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2260 #endif 2261 2262 } 2263 2264 /* Enqueue one encode operations for device in TB mode. */ 2265 static inline int 2266 enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2267 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2268 { 2269 union acc_dma_desc *desc = NULL; 2270 int ret; 2271 uint8_t r, c; 2272 uint32_t in_offset, out_offset, out_length, mbuf_total_left, 2273 seg_total_left; 2274 struct rte_mbuf *input, *output_head, *output; 2275 uint16_t desc_idx, current_enqueued_cbs = 0; 2276 uint64_t fcw_offset; 2277 2278 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2279 desc = q->ring_addr + desc_idx; 2280 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2281 acc_fcw_te_fill(op, &desc->req.fcw_te); 2282 2283 input = op->turbo_enc.input.data; 2284 output_head = output = op->turbo_enc.output.data; 2285 in_offset = op->turbo_enc.input.offset; 2286 out_offset = op->turbo_enc.output.offset; 2287 out_length = 0; 2288 mbuf_total_left = op->turbo_enc.input.length; 2289 2290 c = op->turbo_enc.tb_params.c; 2291 r = op->turbo_enc.tb_params.r; 2292 2293 while (mbuf_total_left > 0 && r < c) { 2294 if (unlikely((input == NULL) || (output == NULL))) 2295 return -1; 2296 2297 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2298 /* Set up DMA descriptor */ 2299 desc = acc_desc(q, total_enqueued_cbs); 2300 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2301 desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN; 2302 2303 ret = acc_dma_desc_te_fill(op, &desc->req, &input, output, 2304 &in_offset, &out_offset, &out_length, 2305 &mbuf_total_left, &seg_total_left, r); 2306 if (unlikely(ret < 0)) 2307 return ret; 2308 mbuf_append(output_head, output, out_length); 2309 2310 /* Set total number of CBs in TB */ 2311 desc->req.cbs_in_tb = cbs_in_tb; 2312 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2313 rte_memdump(stderr, "FCW", &desc->req.fcw_te, 2314 sizeof(desc->req.fcw_te) - 8); 2315 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2316 #endif 2317 2318 if (seg_total_left == 0) { 2319 /* Go to the next mbuf */ 2320 input = input->next; 2321 in_offset = 0; 2322 output = output->next; 2323 out_offset = 0; 2324 } 2325 2326 total_enqueued_cbs++; 2327 current_enqueued_cbs++; 2328 r++; 2329 } 2330 2331 /* In case the number of CB doesn't match, the configuration was invalid. */ 2332 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2333 return -1; 2334 2335 /* Set SDone on last CB descriptor for TB mode. */ 2336 desc->req.sdone_enable = 1; 2337 2338 return current_enqueued_cbs; 2339 } 2340 2341 /* Enqueue one encode operations for device in TB mode. 2342 * returns the number of descs used. 2343 */ 2344 static inline int 2345 vrb1_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2346 uint16_t enq_descs, uint8_t cbs_in_tb) 2347 { 2348 uint8_t num_a, num_b; 2349 uint16_t input_len_B, return_descs; 2350 uint8_t r = op->ldpc_enc.tb_params.r; 2351 uint8_t cab = op->ldpc_enc.tb_params.cab; 2352 union acc_dma_desc *desc; 2353 uint16_t init_enq_descs = enq_descs; 2354 uint32_t in_offset = 0, out_offset = 0; 2355 2356 input_len_B = ((op->ldpc_enc.basegraph == 1 ? 22 : 10) * op->ldpc_enc.z_c 2357 - op->ldpc_enc.n_filler) >> 3; 2358 2359 if (check_bit(op->ldpc_enc.op_flags, RTE_BBDEV_LDPC_CRC_24B_ATTACH)) 2360 input_len_B -= 3; 2361 2362 if (r < cab) { 2363 num_a = cab - r; 2364 num_b = cbs_in_tb - cab; 2365 } else { 2366 num_a = 0; 2367 num_b = cbs_in_tb - r; 2368 } 2369 2370 while (num_a > 0) { 2371 uint32_t e = op->ldpc_enc.tb_params.ea; 2372 uint32_t out_len_B = (e + 7) >> 3; 2373 uint8_t enq = RTE_MIN(num_a, ACC_MUX_5GDL_DESC); 2374 num_a -= enq; 2375 vrb1_enqueue_ldpc_enc_part_tb(q, op, enq_descs, enq, e, input_len_B, 2376 out_len_B, &in_offset, &out_offset); 2377 enq_descs++; 2378 } 2379 while (num_b > 0) { 2380 uint32_t e = op->ldpc_enc.tb_params.eb; 2381 uint32_t out_len_B = (e + 7) >> 3; 2382 uint8_t enq = RTE_MIN(num_b, ACC_MUX_5GDL_DESC); 2383 num_b -= enq; 2384 vrb1_enqueue_ldpc_enc_part_tb(q, op, enq_descs, enq, e, input_len_B, 2385 out_len_B, &in_offset, &out_offset); 2386 enq_descs++; 2387 } 2388 2389 return_descs = enq_descs - init_enq_descs; 2390 /* Keep total number of CBs in first TB. */ 2391 desc = acc_desc(q, init_enq_descs); 2392 desc->req.cbs_in_tb = return_descs; /** Actual number of descriptors. */ 2393 desc->req.op_addr = op; 2394 2395 /* Set SDone on last CB descriptor for TB mode. */ 2396 desc = acc_desc(q, enq_descs - 1); 2397 desc->req.sdone_enable = 1; 2398 desc->req.op_addr = op; 2399 return return_descs; 2400 } 2401 2402 /* Fill in a frame control word for LDPC encoding. */ 2403 static inline void 2404 vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le *fcw) 2405 { 2406 fcw->qm = op->ldpc_enc.q_m; 2407 fcw->nfiller = op->ldpc_enc.n_filler; 2408 fcw->BG = (op->ldpc_enc.basegraph - 1); 2409 fcw->Zc = op->ldpc_enc.z_c; 2410 fcw->ncb = op->ldpc_enc.n_cb; 2411 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph, 2412 op->ldpc_enc.rv_index, 0); 2413 fcw->rm_e = op->ldpc_enc.tb_params.ea; 2414 fcw->rm_e_b = op->ldpc_enc.tb_params.eb; 2415 fcw->crc_select = check_bit(op->ldpc_enc.op_flags, 2416 RTE_BBDEV_LDPC_CRC_24B_ATTACH); 2417 fcw->bypass_intlv = 0; 2418 if (op->ldpc_enc.tb_params.c > 1) { 2419 fcw->mcb_count = 0; 2420 fcw->C = op->ldpc_enc.tb_params.c; 2421 fcw->Cab = op->ldpc_enc.tb_params.cab; 2422 } else { 2423 fcw->mcb_count = 1; 2424 fcw->C = 0; 2425 } 2426 } 2427 2428 /* Enqueue one encode operations for device in TB mode. 2429 * returns the number of descs used. 2430 */ 2431 static inline int 2432 vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2433 uint16_t enq_descs) 2434 { 2435 union acc_dma_desc *desc = NULL; 2436 uint32_t in_offset, out_offset, out_length, seg_total_left; 2437 struct rte_mbuf *input, *output_head, *output; 2438 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 2439 int next_triplet = 1; /* FCW already done. */ 2440 uint32_t in_length_in_bytes; 2441 uint16_t K, in_length_in_bits; 2442 2443 desc = acc_desc(q, enq_descs); 2444 vrb2_fcw_letb_fill(op, &desc->req.fcw_le); 2445 2446 input = enc->input.data; 2447 output_head = output = enc->output.data; 2448 in_offset = enc->input.offset; 2449 out_offset = enc->output.offset; 2450 seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset; 2451 2452 acc_header_init(&desc->req); 2453 K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 2454 in_length_in_bits = K - enc->n_filler; 2455 if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) || 2456 (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH)) 2457 in_length_in_bits -= 24; 2458 in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c; 2459 2460 next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input, &in_offset, 2461 in_length_in_bytes, &seg_total_left, next_triplet, 2462 check_bit(enc->op_flags, RTE_BBDEV_LDPC_ENC_SCATTER_GATHER)); 2463 if (unlikely(next_triplet < 0)) { 2464 rte_bbdev_log(ERR, 2465 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 2466 op); 2467 return -1; 2468 } 2469 desc->req.data_ptrs[next_triplet - 1].last = 1; 2470 desc->req.m2dlen = next_triplet; 2471 2472 /* Set output length */ 2473 /* Integer round up division by 8 */ 2474 out_length = (enc->tb_params.ea * enc->tb_params.cab + 2475 enc->tb_params.eb * (enc->tb_params.c - enc->tb_params.cab) + 7) >> 3; 2476 2477 next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset, 2478 out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC); 2479 enc->output.length = out_length; 2480 out_offset += out_length; 2481 desc->req.data_ptrs[next_triplet - 1].last = 1; 2482 desc->req.data_ptrs[next_triplet - 1].dma_ext = 0; 2483 desc->req.d2mlen = next_triplet - desc->req.m2dlen; 2484 desc->req.numCBs = enc->tb_params.c; 2485 if (desc->req.numCBs > 1) 2486 desc->req.dltb = 1; 2487 desc->req.op_addr = op; 2488 2489 if (out_length < ACC_MAX_E_MBUF) 2490 mbuf_append(output_head, output, out_length); 2491 2492 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2493 rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc->req.fcw_le)); 2494 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2495 #endif 2496 /* One CB (one op) was successfully prepared to enqueue */ 2497 return 1; 2498 } 2499 2500 /** Enqueue one decode operations for device in CB mode. */ 2501 static inline int 2502 enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2503 uint16_t total_enqueued_cbs) 2504 { 2505 union acc_dma_desc *desc = NULL; 2506 int ret; 2507 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length, 2508 h_out_length, mbuf_total_left, seg_total_left; 2509 struct rte_mbuf *input, *h_output_head, *h_output, 2510 *s_output_head, *s_output; 2511 2512 if ((q->d->device_variant == VRB1_VARIANT) && 2513 (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))) { 2514 /* SO not supported for VRB1. */ 2515 return -EPERM; 2516 } 2517 2518 desc = acc_desc(q, total_enqueued_cbs); 2519 vrb_fcw_td_fill(op, &desc->req.fcw_td); 2520 2521 input = op->turbo_dec.input.data; 2522 h_output_head = h_output = op->turbo_dec.hard_output.data; 2523 s_output_head = s_output = op->turbo_dec.soft_output.data; 2524 in_offset = op->turbo_dec.input.offset; 2525 h_out_offset = op->turbo_dec.hard_output.offset; 2526 s_out_offset = op->turbo_dec.soft_output.offset; 2527 h_out_length = s_out_length = 0; 2528 mbuf_total_left = op->turbo_dec.input.length; 2529 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2530 2531 /* Set up DMA descriptor */ 2532 desc = acc_desc(q, total_enqueued_cbs); 2533 2534 ret = vrb_dma_desc_td_fill(op, &desc->req, &input, h_output, 2535 s_output, &in_offset, &h_out_offset, &s_out_offset, 2536 &h_out_length, &s_out_length, &mbuf_total_left, 2537 &seg_total_left, 0); 2538 2539 if (unlikely(ret < 0)) 2540 return ret; 2541 2542 /* Hard output */ 2543 mbuf_append(h_output_head, h_output, h_out_length); 2544 2545 /* Soft output */ 2546 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) 2547 mbuf_append(s_output_head, s_output, s_out_length); 2548 2549 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2550 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2551 sizeof(desc->req.fcw_td)); 2552 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2553 #endif 2554 2555 /* One CB (one op) was successfully prepared to enqueue */ 2556 return 1; 2557 } 2558 2559 /** Enqueue one decode operations for device in CB mode. */ 2560 static inline int 2561 vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2562 uint16_t total_enqueued_cbs, bool same_op) 2563 { 2564 int ret, hq_len; 2565 union acc_dma_desc *desc; 2566 struct rte_mbuf *input, *h_output_head, *h_output; 2567 uint32_t in_offset, h_out_offset, mbuf_total_left, h_out_length = 0; 2568 union acc_harq_layout_data *harq_layout; 2569 2570 if (op->ldpc_dec.cb_params.e == 0) 2571 return -EINVAL; 2572 2573 desc = acc_desc(q, total_enqueued_cbs); 2574 2575 input = op->ldpc_dec.input.data; 2576 h_output_head = h_output = op->ldpc_dec.hard_output.data; 2577 in_offset = op->ldpc_dec.input.offset; 2578 h_out_offset = op->ldpc_dec.hard_output.offset; 2579 mbuf_total_left = op->ldpc_dec.input.length; 2580 harq_layout = q->d->harq_layout; 2581 2582 if (same_op) { 2583 union acc_dma_desc *prev_desc; 2584 prev_desc = acc_desc(q, total_enqueued_cbs - 1); 2585 uint8_t *prev_ptr = (uint8_t *) prev_desc; 2586 uint8_t *new_ptr = (uint8_t *) desc; 2587 /* Copy first 4 words and BDESCs. */ 2588 rte_memcpy(new_ptr, prev_ptr, ACC_5GUL_SIZE_0); 2589 rte_memcpy(new_ptr + ACC_5GUL_OFFSET_0, 2590 prev_ptr + ACC_5GUL_OFFSET_0, 2591 ACC_5GUL_SIZE_1); 2592 desc->req.op_addr = prev_desc->req.op_addr; 2593 /* Copy FCW. */ 2594 rte_memcpy(new_ptr + ACC_DESC_FCW_OFFSET, 2595 prev_ptr + ACC_DESC_FCW_OFFSET, 2596 ACC_FCW_LD_BLEN); 2597 vrb_dma_desc_ld_update(op, &desc->req, input, h_output, 2598 &in_offset, &h_out_offset, 2599 &h_out_length, harq_layout); 2600 } else { 2601 struct acc_fcw_ld *fcw; 2602 uint32_t seg_total_left; 2603 fcw = &desc->req.fcw_ld; 2604 vrb_fcw_ld_fill(op, fcw, harq_layout, q->d->device_variant); 2605 2606 /* Special handling when using mbuf or not. */ 2607 if (check_bit(op->ldpc_dec.op_flags, 2608 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)) 2609 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2610 else 2611 seg_total_left = fcw->rm_e; 2612 ret = vrb_dma_desc_ld_fill(op, &desc->req, &input, h_output, 2613 &in_offset, &h_out_offset, 2614 &h_out_length, &mbuf_total_left, 2615 &seg_total_left, fcw, q->d->device_variant); 2616 if (unlikely(ret < 0)) 2617 return ret; 2618 } 2619 2620 /* Hard output. */ 2621 mbuf_append(h_output_head, h_output, h_out_length); 2622 if (op->ldpc_dec.harq_combined_output.length > 0) { 2623 /* Push the HARQ output into host memory overwriting existing data. */ 2624 struct rte_mbuf *hq_output_head, *hq_output; 2625 op->ldpc_dec.harq_combined_output.data->data_len = 0; 2626 hq_output_head = op->ldpc_dec.harq_combined_output.data; 2627 hq_output = op->ldpc_dec.harq_combined_output.data; 2628 hq_len = op->ldpc_dec.harq_combined_output.length; 2629 if (unlikely(!mbuf_append(hq_output_head, hq_output, hq_len))) { 2630 rte_bbdev_log(ERR, "HARQ output mbuf issue %d %d", 2631 hq_output->buf_len, 2632 hq_len); 2633 return -1; 2634 } 2635 } 2636 2637 if (op->ldpc_dec.soft_output.length > 0) 2638 mbuf_append(op->ldpc_dec.soft_output.data, op->ldpc_dec.soft_output.data, 2639 op->ldpc_dec.soft_output.length); 2640 2641 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2642 rte_memdump(stderr, "FCW", &desc->req.fcw_ld, 2643 sizeof(desc->req.fcw_ld) - 8); 2644 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2645 #endif 2646 2647 /* One CB (one op) was successfully prepared to enqueue. */ 2648 return 1; 2649 } 2650 2651 2652 /* Enqueue one decode operations for device in TB mode. */ 2653 static inline int 2654 vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2655 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2656 { 2657 union acc_dma_desc *desc = NULL; 2658 union acc_dma_desc *desc_first = NULL; 2659 int ret; 2660 uint8_t r, c; 2661 uint32_t in_offset, h_out_offset, h_out_length, mbuf_total_left, seg_total_left; 2662 struct rte_mbuf *input, *h_output_head, *h_output; 2663 uint16_t current_enqueued_cbs = 0; 2664 uint16_t desc_idx, sys_cols, trail_len = 0; 2665 uint64_t fcw_offset; 2666 union acc_harq_layout_data *harq_layout; 2667 2668 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2669 desc = q->ring_addr + desc_idx; 2670 desc_first = desc; 2671 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2672 harq_layout = q->d->harq_layout; 2673 vrb_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout, q->d->device_variant); 2674 2675 input = op->ldpc_dec.input.data; 2676 h_output_head = h_output = op->ldpc_dec.hard_output.data; 2677 in_offset = op->ldpc_dec.input.offset; 2678 h_out_offset = op->ldpc_dec.hard_output.offset; 2679 h_out_length = 0; 2680 mbuf_total_left = op->ldpc_dec.input.length; 2681 c = op->ldpc_dec.tb_params.c; 2682 r = op->ldpc_dec.tb_params.r; 2683 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) { 2684 sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2685 trail_len = sys_cols * op->ldpc_dec.z_c - 2686 op->ldpc_dec.n_filler - 24; 2687 } 2688 2689 while (mbuf_total_left > 0 && r < c) { 2690 if (unlikely((input == NULL) || (h_output == NULL))) 2691 return -1; 2692 2693 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)) 2694 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2695 else 2696 seg_total_left = op->ldpc_dec.input.length; 2697 /* Set up DMA descriptor. */ 2698 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2699 desc = q->ring_addr + desc_idx; 2700 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2701 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2702 desc->req.data_ptrs[0].blen = ACC_FCW_LD_BLEN; 2703 rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld, ACC_FCW_LD_BLEN); 2704 desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len; 2705 ret = vrb_dma_desc_ld_fill(op, &desc->req, &input, 2706 h_output, &in_offset, &h_out_offset, 2707 &h_out_length, 2708 &mbuf_total_left, &seg_total_left, 2709 &desc->req.fcw_ld, q->d->device_variant); 2710 2711 if (unlikely(ret < 0)) 2712 return ret; 2713 2714 /* Hard output. */ 2715 mbuf_append(h_output_head, h_output, h_out_length); 2716 2717 /* Set total number of CBs in TB. */ 2718 desc->req.cbs_in_tb = cbs_in_tb; 2719 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2720 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2721 sizeof(desc->req.fcw_td) - 8); 2722 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2723 #endif 2724 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_DEC_SCATTER_GATHER) 2725 && (seg_total_left == 0)) { 2726 /* Go to the next mbuf. */ 2727 input = input->next; 2728 in_offset = 0; 2729 h_output = h_output->next; 2730 h_out_offset = 0; 2731 } 2732 total_enqueued_cbs++; 2733 current_enqueued_cbs++; 2734 r++; 2735 } 2736 2737 /* In case the number of CB doesn't match, the configuration was invalid. */ 2738 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2739 return -1; 2740 2741 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2742 if (check_mbuf_total_left(mbuf_total_left) != 0) 2743 return -EINVAL; 2744 #endif 2745 /* Set SDone on last CB descriptor for TB mode. */ 2746 desc->req.sdone_enable = 1; 2747 2748 return current_enqueued_cbs; 2749 } 2750 2751 /* Enqueue one decode operations for device in TB mode. */ 2752 static inline int 2753 enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2754 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2755 { 2756 union acc_dma_desc *desc = NULL; 2757 int ret; 2758 uint8_t r, c; 2759 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length, 2760 h_out_length, mbuf_total_left, seg_total_left; 2761 struct rte_mbuf *input, *h_output_head, *h_output, 2762 *s_output_head, *s_output; 2763 uint16_t desc_idx, current_enqueued_cbs = 0; 2764 uint64_t fcw_offset; 2765 2766 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2767 desc = q->ring_addr + desc_idx; 2768 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2769 vrb_fcw_td_fill(op, &desc->req.fcw_td); 2770 2771 input = op->turbo_dec.input.data; 2772 h_output_head = h_output = op->turbo_dec.hard_output.data; 2773 s_output_head = s_output = op->turbo_dec.soft_output.data; 2774 in_offset = op->turbo_dec.input.offset; 2775 h_out_offset = op->turbo_dec.hard_output.offset; 2776 s_out_offset = op->turbo_dec.soft_output.offset; 2777 h_out_length = s_out_length = 0; 2778 mbuf_total_left = op->turbo_dec.input.length; 2779 c = op->turbo_dec.tb_params.c; 2780 r = op->turbo_dec.tb_params.r; 2781 2782 while (mbuf_total_left > 0 && r < c) { 2783 if (unlikely((input == NULL) || (h_output == NULL))) 2784 return -1; 2785 2786 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2787 2788 /* Set up DMA descriptor */ 2789 desc = acc_desc(q, total_enqueued_cbs); 2790 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2791 desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN; 2792 ret = vrb_dma_desc_td_fill(op, &desc->req, &input, 2793 h_output, s_output, &in_offset, &h_out_offset, 2794 &s_out_offset, &h_out_length, &s_out_length, 2795 &mbuf_total_left, &seg_total_left, r); 2796 2797 if (unlikely(ret < 0)) 2798 return ret; 2799 2800 /* Hard output */ 2801 mbuf_append(h_output_head, h_output, h_out_length); 2802 2803 /* Soft output */ 2804 if (check_bit(op->turbo_dec.op_flags, 2805 RTE_BBDEV_TURBO_SOFT_OUTPUT)) 2806 mbuf_append(s_output_head, s_output, s_out_length); 2807 2808 /* Set total number of CBs in TB */ 2809 desc->req.cbs_in_tb = cbs_in_tb; 2810 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2811 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2812 sizeof(desc->req.fcw_td) - 8); 2813 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2814 #endif 2815 2816 if (seg_total_left == 0) { 2817 /* Go to the next mbuf */ 2818 input = input->next; 2819 in_offset = 0; 2820 h_output = h_output->next; 2821 h_out_offset = 0; 2822 2823 if (check_bit(op->turbo_dec.op_flags, 2824 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 2825 s_output = s_output->next; 2826 s_out_offset = 0; 2827 } 2828 } 2829 2830 total_enqueued_cbs++; 2831 current_enqueued_cbs++; 2832 r++; 2833 } 2834 2835 /* In case the number of CB doesn't match, the configuration was invalid. */ 2836 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2837 return -1; 2838 2839 /* Set SDone on last CB descriptor for TB mode */ 2840 desc->req.sdone_enable = 1; 2841 2842 return current_enqueued_cbs; 2843 } 2844 2845 /* Enqueue encode operations for device in CB mode. */ 2846 static uint16_t 2847 vrb_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data, 2848 struct rte_bbdev_enc_op **ops, uint16_t num) 2849 { 2850 struct acc_queue *q = q_data->queue_private; 2851 int32_t avail = acc_ring_avail_enq(q); 2852 uint16_t i; 2853 int ret; 2854 2855 for (i = 0; i < num; ++i) { 2856 /* Check if there are available space for further processing */ 2857 if (unlikely(avail - 1 < 0)) { 2858 acc_enqueue_ring_full(q_data); 2859 break; 2860 } 2861 avail -= 1; 2862 2863 ret = enqueue_enc_one_op_cb(q, ops[i], i); 2864 if (ret < 0) { 2865 acc_enqueue_invalid(q_data); 2866 break; 2867 } 2868 } 2869 2870 if (unlikely(i == 0)) 2871 return 0; /* Nothing to enqueue */ 2872 2873 acc_dma_enqueue(q, i, &q_data->queue_stats); 2874 2875 acc_update_qstat_enqueue(q_data, i, num - i); 2876 return i; 2877 } 2878 2879 /** Enqueue encode operations for device in CB mode. */ 2880 static inline uint16_t 2881 vrb_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data, 2882 struct rte_bbdev_enc_op **ops, uint16_t num) 2883 { 2884 struct acc_queue *q = q_data->queue_private; 2885 int32_t avail = acc_ring_avail_enq(q); 2886 uint16_t i = 0; 2887 int ret, desc_idx = 0; 2888 int16_t enq, left = num; 2889 2890 while (left > 0) { 2891 if (unlikely(avail < 1)) { 2892 acc_enqueue_ring_full(q_data); 2893 break; 2894 } 2895 avail--; 2896 enq = RTE_MIN(left, ACC_MUX_5GDL_DESC); 2897 enq = check_mux(&ops[i], enq); 2898 ret = enqueue_ldpc_enc_n_op_cb(q, &ops[i], desc_idx, enq); 2899 if (ret < 0) { 2900 acc_enqueue_invalid(q_data); 2901 break; 2902 } 2903 i += enq; 2904 desc_idx++; 2905 left = num - i; 2906 } 2907 2908 if (unlikely(i == 0)) 2909 return 0; /* Nothing to enqueue. */ 2910 2911 acc_dma_enqueue(q, desc_idx, &q_data->queue_stats); 2912 2913 acc_update_qstat_enqueue(q_data, i, num - i); 2914 2915 return i; 2916 } 2917 2918 /* Enqueue encode operations for device in TB mode. */ 2919 static uint16_t 2920 vrb_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data, 2921 struct rte_bbdev_enc_op **ops, uint16_t num) 2922 { 2923 struct acc_queue *q = q_data->queue_private; 2924 int32_t avail = acc_ring_avail_enq(q); 2925 uint16_t i, enqueued_cbs = 0; 2926 uint8_t cbs_in_tb; 2927 int ret; 2928 2929 for (i = 0; i < num; ++i) { 2930 cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc); 2931 /* Check if there are available space for further processing */ 2932 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 2933 acc_enqueue_ring_full(q_data); 2934 break; 2935 } 2936 avail -= cbs_in_tb; 2937 2938 ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb); 2939 if (ret <= 0) { 2940 acc_enqueue_invalid(q_data); 2941 break; 2942 } 2943 enqueued_cbs += ret; 2944 } 2945 if (unlikely(enqueued_cbs == 0)) 2946 return 0; /* Nothing to enqueue */ 2947 2948 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 2949 2950 acc_update_qstat_enqueue(q_data, i, num - i); 2951 2952 return i; 2953 } 2954 2955 /* Enqueue LDPC encode operations for device in TB mode. */ 2956 static uint16_t 2957 vrb_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data, 2958 struct rte_bbdev_enc_op **ops, uint16_t num) 2959 { 2960 struct acc_queue *q = q_data->queue_private; 2961 int32_t avail = acc_ring_avail_enq(q); 2962 uint16_t i, enqueued_descs = 0; 2963 uint8_t cbs_in_tb; 2964 int descs_used; 2965 2966 for (i = 0; i < num; ++i) { 2967 if (q->d->device_variant == VRB1_VARIANT) { 2968 cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc); 2969 /* Check if there are available space for further processing. */ 2970 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 2971 acc_enqueue_ring_full(q_data); 2972 break; 2973 } 2974 descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i], 2975 enqueued_descs, cbs_in_tb); 2976 } else { 2977 if (unlikely(avail < 1)) { 2978 acc_enqueue_ring_full(q_data); 2979 break; 2980 } 2981 descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs); 2982 } 2983 if (descs_used < 0) { 2984 acc_enqueue_invalid(q_data); 2985 break; 2986 } 2987 enqueued_descs += descs_used; 2988 avail -= descs_used; 2989 } 2990 if (unlikely(enqueued_descs == 0)) 2991 return 0; /* Nothing to enqueue. */ 2992 2993 acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats); 2994 2995 acc_update_qstat_enqueue(q_data, i, num - i); 2996 2997 return i; 2998 } 2999 3000 /* Enqueue encode operations for device. */ 3001 static uint16_t 3002 vrb_enqueue_enc(struct rte_bbdev_queue_data *q_data, 3003 struct rte_bbdev_enc_op **ops, uint16_t num) 3004 { 3005 int32_t aq_avail = acc_aq_avail(q_data, num); 3006 if (unlikely((aq_avail <= 0) || (num == 0))) 3007 return 0; 3008 if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3009 return vrb_enqueue_enc_tb(q_data, ops, num); 3010 else 3011 return vrb_enqueue_enc_cb(q_data, ops, num); 3012 } 3013 3014 /* Enqueue encode operations for device. */ 3015 static uint16_t 3016 vrb_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, 3017 struct rte_bbdev_enc_op **ops, uint16_t num) 3018 { 3019 int32_t aq_avail = acc_aq_avail(q_data, num); 3020 if (unlikely((aq_avail <= 0) || (num == 0))) 3021 return 0; 3022 if (ops[0]->ldpc_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3023 return vrb_enqueue_ldpc_enc_tb(q_data, ops, num); 3024 else 3025 return vrb_enqueue_ldpc_enc_cb(q_data, ops, num); 3026 } 3027 3028 3029 /* Enqueue decode operations for device in CB mode. */ 3030 static uint16_t 3031 vrb_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data, 3032 struct rte_bbdev_dec_op **ops, uint16_t num) 3033 { 3034 struct acc_queue *q = q_data->queue_private; 3035 int32_t avail = acc_ring_avail_enq(q); 3036 uint16_t i; 3037 int ret; 3038 3039 for (i = 0; i < num; ++i) { 3040 /* Check if there are available space for further processing. */ 3041 if (unlikely(avail - 1 < 0)) 3042 break; 3043 avail -= 1; 3044 3045 ret = enqueue_dec_one_op_cb(q, ops[i], i); 3046 if (ret < 0) 3047 break; 3048 } 3049 3050 if (unlikely(i == 0)) 3051 return 0; /* Nothing to enqueue. */ 3052 3053 acc_dma_enqueue(q, i, &q_data->queue_stats); 3054 3055 acc_update_qstat_enqueue(q_data, i, num - i); 3056 3057 return i; 3058 } 3059 3060 /* Enqueue decode operations for device in TB mode. */ 3061 static uint16_t 3062 vrb_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data, 3063 struct rte_bbdev_dec_op **ops, uint16_t num) 3064 { 3065 struct acc_queue *q = q_data->queue_private; 3066 int32_t avail = acc_ring_avail_enq(q); 3067 uint16_t i, enqueued_cbs = 0; 3068 uint8_t cbs_in_tb; 3069 int ret; 3070 3071 for (i = 0; i < num; ++i) { 3072 cbs_in_tb = get_num_cbs_in_tb_ldpc_dec(&ops[i]->ldpc_dec); 3073 /* Check if there are available space for further processing. */ 3074 if (unlikely((avail - cbs_in_tb < 0) || 3075 (cbs_in_tb == 0))) 3076 break; 3077 avail -= cbs_in_tb; 3078 3079 ret = vrb_enqueue_ldpc_dec_one_op_tb(q, ops[i], 3080 enqueued_cbs, cbs_in_tb); 3081 if (ret <= 0) 3082 break; 3083 enqueued_cbs += ret; 3084 } 3085 3086 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 3087 3088 acc_update_qstat_enqueue(q_data, i, num - i); 3089 return i; 3090 } 3091 3092 /* Enqueue decode operations for device in CB mode. */ 3093 static uint16_t 3094 vrb_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data, 3095 struct rte_bbdev_dec_op **ops, uint16_t num) 3096 { 3097 struct acc_queue *q = q_data->queue_private; 3098 int32_t avail = acc_ring_avail_enq(q); 3099 uint16_t i; 3100 int ret; 3101 bool same_op = false; 3102 3103 for (i = 0; i < num; ++i) { 3104 /* Check if there are available space for further processing. */ 3105 if (unlikely(avail < 1)) { 3106 acc_enqueue_ring_full(q_data); 3107 break; 3108 } 3109 avail -= 1; 3110 rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d %d %d", 3111 i, ops[i]->ldpc_dec.op_flags, ops[i]->ldpc_dec.rv_index, 3112 ops[i]->ldpc_dec.iter_max, ops[i]->ldpc_dec.iter_count, 3113 ops[i]->ldpc_dec.basegraph, ops[i]->ldpc_dec.z_c, 3114 ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m, 3115 ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e, 3116 same_op); 3117 ret = vrb_enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op); 3118 if (ret < 0) { 3119 acc_enqueue_invalid(q_data); 3120 break; 3121 } 3122 } 3123 3124 if (unlikely(i == 0)) 3125 return 0; /* Nothing to enqueue. */ 3126 3127 acc_dma_enqueue(q, i, &q_data->queue_stats); 3128 3129 acc_update_qstat_enqueue(q_data, i, num - i); 3130 return i; 3131 } 3132 3133 3134 /* Enqueue decode operations for device in TB mode. */ 3135 static uint16_t 3136 vrb_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data, 3137 struct rte_bbdev_dec_op **ops, uint16_t num) 3138 { 3139 struct acc_queue *q = q_data->queue_private; 3140 int32_t avail = acc_ring_avail_enq(q); 3141 uint16_t i, enqueued_cbs = 0; 3142 uint8_t cbs_in_tb; 3143 int ret; 3144 3145 for (i = 0; i < num; ++i) { 3146 cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec); 3147 /* Check if there are available space for further processing */ 3148 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 3149 acc_enqueue_ring_full(q_data); 3150 break; 3151 } 3152 avail -= cbs_in_tb; 3153 3154 ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb); 3155 if (ret <= 0) { 3156 acc_enqueue_invalid(q_data); 3157 break; 3158 } 3159 enqueued_cbs += ret; 3160 } 3161 3162 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 3163 3164 acc_update_qstat_enqueue(q_data, i, num - i); 3165 3166 return i; 3167 } 3168 3169 /* Enqueue decode operations for device. */ 3170 static uint16_t 3171 vrb_enqueue_dec(struct rte_bbdev_queue_data *q_data, 3172 struct rte_bbdev_dec_op **ops, uint16_t num) 3173 { 3174 int32_t aq_avail = acc_aq_avail(q_data, num); 3175 if (unlikely((aq_avail <= 0) || (num == 0))) 3176 return 0; 3177 if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3178 return vrb_enqueue_dec_tb(q_data, ops, num); 3179 else 3180 return vrb_enqueue_dec_cb(q_data, ops, num); 3181 } 3182 3183 /* Enqueue decode operations for device. */ 3184 static uint16_t 3185 vrb_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, 3186 struct rte_bbdev_dec_op **ops, uint16_t num) 3187 { 3188 int32_t aq_avail = acc_aq_avail(q_data, num); 3189 if (unlikely((aq_avail <= 0) || (num == 0))) 3190 return 0; 3191 if (ops[0]->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3192 return vrb_enqueue_ldpc_dec_tb(q_data, ops, num); 3193 else 3194 return vrb_enqueue_ldpc_dec_cb(q_data, ops, num); 3195 } 3196 3197 /* Update the operation status when dequeuing for any operation type. */ 3198 static inline void 3199 vrb_update_dequeued_operation(union acc_dma_desc *desc, union acc_dma_rsp_desc rsp, int *op_status, 3200 uint32_t *aq_dequeued, bool clear_rsp, bool clear_opstatus) 3201 { 3202 rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val); 3203 3204 /* Set status based on DMA response. */ 3205 if (clear_opstatus) 3206 *op_status = 0; 3207 *op_status |= ((rsp.input_err) ? (1 << RTE_BBDEV_DATA_ERROR) : 0); 3208 *op_status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0); 3209 *op_status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0); 3210 *op_status |= ((rsp.engine_hung) ? (1 << RTE_BBDEV_ENGINE_ERROR) : 0); 3211 3212 if (desc->req.last_desc_in_batch) { 3213 (*aq_dequeued)++; 3214 desc->req.last_desc_in_batch = 0; 3215 } 3216 3217 if (clear_rsp) { 3218 /* Clear response explicitly. */ 3219 desc->rsp.val = ACC_DMA_DESC_TYPE; 3220 desc->rsp.add_info_0 = 0; /* Reserved bits. */ 3221 desc->rsp.add_info_1 = 0; /* Reserved bits. */ 3222 } 3223 } 3224 3225 /* Dequeue one encode operations from device in CB mode. */ 3226 static inline int 3227 vrb_dequeue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3228 uint16_t *dequeued_ops, uint32_t *aq_dequeued, uint16_t *dequeued_descs, 3229 uint16_t max_requested_ops) 3230 { 3231 union acc_dma_desc *desc, atom_desc; 3232 union acc_dma_rsp_desc rsp; 3233 struct rte_bbdev_enc_op *op; 3234 int i; 3235 struct acc_ptrs *context_ptrs; 3236 uint16_t desc_idx; 3237 3238 desc_idx = acc_desc_idx_tail(q, *dequeued_descs); 3239 desc = q->ring_addr + desc_idx; 3240 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3241 rte_memory_order_relaxed); 3242 3243 if (*dequeued_ops + desc->req.numCBs > max_requested_ops) 3244 return -1; 3245 3246 /* Check fdone bit. */ 3247 if (!(atom_desc.rsp.val & ACC_FDONE)) 3248 return -1; 3249 3250 rsp.val = atom_desc.rsp.val; 3251 3252 /* Dequeue. */ 3253 op = desc->req.op_addr; 3254 3255 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3256 3257 ref_op[0] = op; 3258 context_ptrs = q->companion_ring_addr + desc_idx; 3259 for (i = 1 ; i < desc->req.numCBs; i++) 3260 ref_op[i] = context_ptrs->ptr[i].op_addr; 3261 3262 /* One op was successfully dequeued. */ 3263 (*dequeued_descs)++; 3264 *dequeued_ops += desc->req.numCBs; 3265 return desc->req.numCBs; 3266 } 3267 3268 /* Dequeue one LDPC encode operations from VRB2 device in TB mode. */ 3269 static inline int 3270 vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3271 uint16_t *dequeued_ops, uint32_t *aq_dequeued, 3272 uint16_t *dequeued_descs) 3273 { 3274 union acc_dma_desc *desc, atom_desc; 3275 union acc_dma_rsp_desc rsp; 3276 struct rte_bbdev_enc_op *op; 3277 3278 desc = acc_desc_tail(q, *dequeued_descs); 3279 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3280 rte_memory_order_relaxed); 3281 3282 /* Check fdone bit. */ 3283 if (!(atom_desc.rsp.val & ACC_FDONE)) 3284 return -1; 3285 3286 rsp.val = atom_desc.rsp.val; 3287 3288 /* Dequeue. */ 3289 op = desc->req.op_addr; 3290 3291 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3292 3293 /* One op was successfully dequeued */ 3294 ref_op[0] = op; 3295 (*dequeued_descs)++; 3296 (*dequeued_ops)++; 3297 return 1; 3298 } 3299 3300 /* Dequeue one encode operations from device in TB mode. 3301 * That operation may cover multiple descriptors. 3302 */ 3303 static inline int 3304 vrb_dequeue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3305 uint16_t *dequeued_ops, uint32_t *aq_dequeued, 3306 uint16_t *dequeued_descs, uint16_t max_requested_ops) 3307 { 3308 union acc_dma_desc *desc, *last_desc, atom_desc; 3309 union acc_dma_rsp_desc rsp; 3310 struct rte_bbdev_enc_op *op; 3311 uint8_t i = 0; 3312 uint16_t current_dequeued_descs = 0, descs_in_tb; 3313 3314 desc = acc_desc_tail(q, *dequeued_descs); 3315 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3316 rte_memory_order_relaxed); 3317 3318 if (*dequeued_ops + 1 > max_requested_ops) 3319 return -1; 3320 3321 /* Check fdone bit. */ 3322 if (!(atom_desc.rsp.val & ACC_FDONE)) 3323 return -1; 3324 3325 /* Get number of CBs in dequeued TB. */ 3326 descs_in_tb = desc->req.cbs_in_tb; 3327 /* Get last CB */ 3328 last_desc = acc_desc_tail(q, *dequeued_descs + descs_in_tb - 1); 3329 /* Check if last CB in TB is ready to dequeue (and thus 3330 * the whole TB) - checking sdone bit. If not return. 3331 */ 3332 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)last_desc, 3333 rte_memory_order_relaxed); 3334 if (!(atom_desc.rsp.val & ACC_SDONE)) 3335 return -1; 3336 3337 /* Dequeue. */ 3338 op = desc->req.op_addr; 3339 3340 /* Clearing status, it will be set based on response. */ 3341 op->status = 0; 3342 3343 while (i < descs_in_tb) { 3344 desc = acc_desc_tail(q, *dequeued_descs); 3345 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3346 rte_memory_order_relaxed); 3347 rsp.val = atom_desc.rsp.val; 3348 3349 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, false); 3350 3351 (*dequeued_descs)++; 3352 current_dequeued_descs++; 3353 i++; 3354 } 3355 3356 *ref_op = op; 3357 (*dequeued_ops)++; 3358 return current_dequeued_descs; 3359 } 3360 3361 /* Dequeue one decode operation from device in CB mode. */ 3362 static inline int 3363 vrb_dequeue_dec_one_op_cb(struct rte_bbdev_queue_data *q_data, 3364 struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3365 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3366 { 3367 union acc_dma_desc *desc, atom_desc; 3368 union acc_dma_rsp_desc rsp; 3369 struct rte_bbdev_dec_op *op; 3370 3371 desc = acc_desc_tail(q, dequeued_cbs); 3372 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3373 rte_memory_order_relaxed); 3374 3375 /* Check fdone bit. */ 3376 if (!(atom_desc.rsp.val & ACC_FDONE)) 3377 return -1; 3378 3379 rsp.val = atom_desc.rsp.val; 3380 3381 /* Dequeue. */ 3382 op = desc->req.op_addr; 3383 3384 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, true); 3385 3386 if (op->status != 0) { 3387 /* These errors are not expected. */ 3388 q_data->queue_stats.dequeue_err_count++; 3389 vrb_check_ir(q->d); 3390 } 3391 3392 /* CRC invalid if error exists. */ 3393 if (!op->status) 3394 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3395 op->turbo_dec.iter_count = (uint8_t) rsp.iter_cnt; 3396 3397 desc->rsp.val = ACC_DMA_DESC_TYPE; 3398 desc->rsp.add_info_0 = 0; 3399 desc->rsp.add_info_1 = 0; 3400 *ref_op = op; 3401 3402 /* One CB (op) was successfully dequeued. */ 3403 return 1; 3404 } 3405 3406 /* Dequeue one decode operations from device in CB mode. */ 3407 static inline int 3408 vrb_dequeue_ldpc_dec_one_op_cb(struct rte_bbdev_queue_data *q_data, 3409 struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3410 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3411 { 3412 union acc_dma_desc *desc, atom_desc; 3413 union acc_dma_rsp_desc rsp; 3414 struct rte_bbdev_dec_op *op; 3415 3416 desc = acc_desc_tail(q, dequeued_cbs); 3417 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3418 rte_memory_order_relaxed); 3419 3420 /* Check fdone bit. */ 3421 if (!(atom_desc.rsp.val & ACC_FDONE)) 3422 return -1; 3423 3424 rsp.val = atom_desc.rsp.val; 3425 rte_bbdev_log_debug("Resp. desc %p: %x %x %x", desc, rsp.val, desc->rsp.add_info_0, 3426 desc->rsp.add_info_1); 3427 3428 /* Dequeue. */ 3429 op = desc->req.op_addr; 3430 3431 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, true); 3432 3433 /* Additional op status update for LDPC Decoder. */ 3434 if (op->status != 0) 3435 q_data->queue_stats.dequeue_err_count++; 3436 3437 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3438 if (op->ldpc_dec.hard_output.length > 0 && !rsp.synd_ok) 3439 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 3440 3441 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 3442 check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 3443 if (desc->rsp.add_info_1 != 0) 3444 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 3445 } 3446 3447 op->ldpc_dec.iter_count = (uint8_t) rsp.iter_cnt; 3448 3449 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 3450 vrb_check_ir(q->d); 3451 3452 desc->rsp.val = ACC_DMA_DESC_TYPE; 3453 desc->rsp.add_info_0 = 0; 3454 desc->rsp.add_info_1 = 0; 3455 3456 *ref_op = op; 3457 3458 /* One CB (op) was successfully dequeued. */ 3459 return 1; 3460 } 3461 3462 /* Dequeue one decode operations from device in TB mode for 4G or 5G. */ 3463 static inline int 3464 vrb_dequeue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3465 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3466 { 3467 union acc_dma_desc *desc, *last_desc, atom_desc; 3468 union acc_dma_rsp_desc rsp; 3469 struct rte_bbdev_dec_op *op; 3470 uint8_t cbs_in_tb = 1, cb_idx = 0; 3471 uint32_t tb_crc_check = 0; 3472 3473 desc = acc_desc_tail(q, dequeued_cbs); 3474 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3475 rte_memory_order_relaxed); 3476 3477 /* Check fdone bit. */ 3478 if (!(atom_desc.rsp.val & ACC_FDONE)) 3479 return -1; 3480 3481 /* Dequeue. */ 3482 op = desc->req.op_addr; 3483 3484 /* Get number of CBs in dequeued TB. */ 3485 cbs_in_tb = desc->req.cbs_in_tb; 3486 /* Get last CB. */ 3487 last_desc = acc_desc_tail(q, dequeued_cbs + cbs_in_tb - 1); 3488 /* Check if last CB in TB is ready to dequeue (and thus the whole TB) - checking sdone bit. 3489 * If not return. 3490 */ 3491 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)last_desc, 3492 rte_memory_order_relaxed); 3493 if (!(atom_desc.rsp.val & ACC_SDONE)) 3494 return -1; 3495 3496 /* Clearing status, it will be set based on response. */ 3497 op->status = 0; 3498 3499 /* Read remaining CBs if exists. */ 3500 while (cb_idx < cbs_in_tb) { 3501 desc = acc_desc_tail(q, dequeued_cbs); 3502 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3503 rte_memory_order_relaxed); 3504 rsp.val = atom_desc.rsp.val; 3505 rte_bbdev_log_debug("Resp. desc %p: %x %x %x", desc, 3506 rsp.val, desc->rsp.add_info_0, 3507 desc->rsp.add_info_1); 3508 3509 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, false); 3510 3511 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) 3512 tb_crc_check ^= desc->rsp.add_info_1; 3513 3514 /* CRC invalid if error exists. */ 3515 if (!op->status) 3516 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3517 if (q->op_type == RTE_BBDEV_OP_LDPC_DEC) 3518 op->ldpc_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt, 3519 op->ldpc_dec.iter_count); 3520 else 3521 op->turbo_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt, 3522 op->turbo_dec.iter_count); 3523 3524 desc->rsp.val = ACC_DMA_DESC_TYPE; 3525 desc->rsp.add_info_0 = 0; 3526 desc->rsp.add_info_1 = 0; 3527 dequeued_cbs++; 3528 cb_idx++; 3529 } 3530 3531 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) { 3532 rte_bbdev_log_debug("TB-CRC Check %x", tb_crc_check); 3533 if (tb_crc_check > 0) 3534 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 3535 } 3536 3537 *ref_op = op; 3538 3539 return cb_idx; 3540 } 3541 3542 /* Dequeue encode operations from device. */ 3543 static uint16_t 3544 vrb_dequeue_enc(struct rte_bbdev_queue_data *q_data, 3545 struct rte_bbdev_enc_op **ops, uint16_t num) 3546 { 3547 struct acc_queue *q = q_data->queue_private; 3548 uint32_t avail = acc_ring_avail_deq(q); 3549 uint32_t aq_dequeued = 0; 3550 uint16_t i, dequeued_ops = 0, dequeued_descs = 0; 3551 int ret, cbm; 3552 struct rte_bbdev_enc_op *op; 3553 if (avail == 0) 3554 return 0; 3555 op = acc_op_tail(q, 0); 3556 cbm = op->turbo_enc.code_block_mode; 3557 3558 for (i = 0; i < avail; i++) { 3559 if (cbm == RTE_BBDEV_TRANSPORT_BLOCK) 3560 ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops], 3561 &dequeued_ops, &aq_dequeued, 3562 &dequeued_descs, num); 3563 else 3564 ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops], 3565 &dequeued_ops, &aq_dequeued, 3566 &dequeued_descs, num); 3567 if (ret < 0) 3568 break; 3569 } 3570 3571 q->aq_dequeued += aq_dequeued; 3572 q->sw_ring_tail += dequeued_descs; 3573 3574 acc_update_qstat_dequeue(q_data, dequeued_ops); 3575 3576 return dequeued_ops; 3577 } 3578 3579 /* Dequeue LDPC encode operations from device. */ 3580 static uint16_t 3581 vrb_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data, 3582 struct rte_bbdev_enc_op **ops, uint16_t num) 3583 { 3584 struct acc_queue *q = q_data->queue_private; 3585 uint32_t avail = acc_ring_avail_deq(q); 3586 uint32_t aq_dequeued = 0; 3587 uint16_t i, dequeued_ops = 0, dequeued_descs = 0; 3588 int ret, cbm; 3589 struct rte_bbdev_enc_op *op; 3590 if (avail == 0) 3591 return 0; 3592 op = acc_op_tail(q, 0); 3593 cbm = op->ldpc_enc.code_block_mode; 3594 3595 for (i = 0; i < avail; i++) { 3596 if (cbm == RTE_BBDEV_TRANSPORT_BLOCK) 3597 if (q->d->device_variant == VRB1_VARIANT) 3598 ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops], 3599 &dequeued_ops, &aq_dequeued, 3600 &dequeued_descs, num); 3601 else 3602 ret = vrb2_dequeue_ldpc_enc_one_op_tb(q, &ops[dequeued_ops], 3603 &dequeued_ops, &aq_dequeued, 3604 &dequeued_descs); 3605 else 3606 ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops], 3607 &dequeued_ops, &aq_dequeued, 3608 &dequeued_descs, num); 3609 if (ret < 0) 3610 break; 3611 } 3612 3613 q->aq_dequeued += aq_dequeued; 3614 q->sw_ring_tail += dequeued_descs; 3615 3616 acc_update_qstat_dequeue(q_data, dequeued_ops); 3617 3618 return dequeued_ops; 3619 } 3620 3621 /* Dequeue decode operations from device. */ 3622 static uint16_t 3623 vrb_dequeue_dec(struct rte_bbdev_queue_data *q_data, 3624 struct rte_bbdev_dec_op **ops, uint16_t num) 3625 { 3626 struct acc_queue *q = q_data->queue_private; 3627 uint16_t dequeue_num; 3628 uint32_t avail = acc_ring_avail_deq(q); 3629 uint32_t aq_dequeued = 0; 3630 uint16_t i; 3631 uint16_t dequeued_cbs = 0; 3632 struct rte_bbdev_dec_op *op; 3633 int ret; 3634 3635 dequeue_num = (avail < num) ? avail : num; 3636 3637 for (i = 0; i < dequeue_num; ++i) { 3638 op = acc_op_tail(q, dequeued_cbs); 3639 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3640 ret = vrb_dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs, 3641 &aq_dequeued); 3642 else 3643 ret = vrb_dequeue_dec_one_op_cb(q_data, q, &ops[i], 3644 dequeued_cbs, &aq_dequeued); 3645 3646 if (ret <= 0) 3647 break; 3648 dequeued_cbs += ret; 3649 } 3650 3651 q->aq_dequeued += aq_dequeued; 3652 q->sw_ring_tail += dequeued_cbs; 3653 3654 acc_update_qstat_dequeue(q_data, i); 3655 3656 return i; 3657 } 3658 3659 /* Dequeue decode operations from device. */ 3660 static uint16_t 3661 vrb_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, 3662 struct rte_bbdev_dec_op **ops, uint16_t num) 3663 { 3664 struct acc_queue *q = q_data->queue_private; 3665 uint16_t dequeue_num; 3666 uint32_t avail = acc_ring_avail_deq(q); 3667 uint32_t aq_dequeued = 0; 3668 uint16_t i; 3669 uint16_t dequeued_cbs = 0; 3670 struct rte_bbdev_dec_op *op; 3671 int ret; 3672 3673 dequeue_num = RTE_MIN(avail, num); 3674 3675 for (i = 0; i < dequeue_num; ++i) { 3676 op = acc_op_tail(q, dequeued_cbs); 3677 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3678 ret = vrb_dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs, 3679 &aq_dequeued); 3680 else 3681 ret = vrb_dequeue_ldpc_dec_one_op_cb( 3682 q_data, q, &ops[i], dequeued_cbs, 3683 &aq_dequeued); 3684 3685 if (ret <= 0) 3686 break; 3687 dequeued_cbs += ret; 3688 } 3689 3690 q->aq_dequeued += aq_dequeued; 3691 q->sw_ring_tail += dequeued_cbs; 3692 3693 acc_update_qstat_dequeue(q_data, i); 3694 3695 return i; 3696 } 3697 3698 /* Fill in a frame control word for FFT processing. */ 3699 static inline void 3700 vrb1_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft *fcw) 3701 { 3702 fcw->in_frame_size = op->fft.input_sequence_size; 3703 fcw->leading_pad_size = op->fft.input_leading_padding; 3704 fcw->out_frame_size = op->fft.output_sequence_size; 3705 fcw->leading_depad_size = op->fft.output_leading_depadding; 3706 fcw->cs_window_sel = op->fft.window_index[0] + 3707 (op->fft.window_index[1] << 8) + 3708 (op->fft.window_index[2] << 16) + 3709 (op->fft.window_index[3] << 24); 3710 fcw->cs_window_sel2 = op->fft.window_index[4] + 3711 (op->fft.window_index[5] << 8); 3712 fcw->cs_enable_bmap = op->fft.cs_bitmap; 3713 fcw->num_antennas = op->fft.num_antennas_log2; 3714 fcw->idft_size = op->fft.idft_log2; 3715 fcw->dft_size = op->fft.dft_log2; 3716 fcw->cs_offset = op->fft.cs_time_adjustment; 3717 fcw->idft_shift = op->fft.idft_shift; 3718 fcw->dft_shift = op->fft.dft_shift; 3719 fcw->cs_multiplier = op->fft.ncs_reciprocal; 3720 if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_IDFT_BYPASS)) { 3721 if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_WINDOWING_BYPASS)) 3722 fcw->bypass = 2; 3723 else 3724 fcw->bypass = 1; 3725 } else if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DFT_BYPASS)) 3726 fcw->bypass = 3; 3727 else 3728 fcw->bypass = 0; 3729 } 3730 3731 /* Fill in a frame control word for FFT processing. */ 3732 static inline void 3733 vrb2_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft_3 *fcw) 3734 { 3735 uint8_t cs; 3736 3737 fcw->in_frame_size = op->fft.input_sequence_size; 3738 fcw->leading_pad_size = op->fft.input_leading_padding; 3739 fcw->out_frame_size = op->fft.output_sequence_size; 3740 fcw->leading_depad_size = op->fft.output_leading_depadding; 3741 fcw->cs_window_sel = op->fft.window_index[0] + 3742 (op->fft.window_index[1] << 8) + 3743 (op->fft.window_index[2] << 16) + 3744 (op->fft.window_index[3] << 24); 3745 fcw->cs_window_sel2 = op->fft.window_index[4] + 3746 (op->fft.window_index[5] << 8); 3747 fcw->cs_enable_bmap = op->fft.cs_bitmap; 3748 fcw->num_antennas = op->fft.num_antennas_log2; 3749 fcw->idft_size = op->fft.idft_log2; 3750 fcw->dft_size = op->fft.dft_log2; 3751 fcw->cs_offset = op->fft.cs_time_adjustment; 3752 fcw->idft_shift = op->fft.idft_shift; 3753 fcw->dft_shift = op->fft.dft_shift; 3754 fcw->cs_multiplier = op->fft.ncs_reciprocal; 3755 fcw->power_shift = op->fft.power_shift; 3756 fcw->exp_adj = op->fft.fp16_exp_adjust; 3757 fcw->fp16_in = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_INPUT); 3758 fcw->fp16_out = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_OUTPUT); 3759 fcw->power_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); 3760 if (check_bit(op->fft.op_flags, 3761 RTE_BBDEV_FFT_IDFT_BYPASS)) { 3762 if (check_bit(op->fft.op_flags, 3763 RTE_BBDEV_FFT_WINDOWING_BYPASS)) 3764 fcw->bypass = 2; 3765 else 3766 fcw->bypass = 1; 3767 } else if (check_bit(op->fft.op_flags, 3768 RTE_BBDEV_FFT_DFT_BYPASS)) 3769 fcw->bypass = 3; 3770 else 3771 fcw->bypass = 0; 3772 3773 fcw->enable_dewin = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DEWINDOWING); 3774 fcw->freq_resample_mode = op->fft.freq_resample_mode; 3775 fcw->depad_output_size = fcw->freq_resample_mode == 0 ? 3776 op->fft.output_sequence_size : op->fft.output_depadded_size; 3777 for (cs = 0; cs < RTE_BBDEV_MAX_CS; cs++) { 3778 fcw->cs_theta_0[cs] = op->fft.cs_theta_0[cs]; 3779 fcw->cs_theta_d[cs] = op->fft.cs_theta_d[cs]; 3780 fcw->cs_time_offset[cs] = op->fft.time_offset[cs]; 3781 } 3782 } 3783 3784 static inline int 3785 vrb_dma_desc_fft_fill(struct rte_bbdev_fft_op *op, 3786 struct acc_dma_req_desc *desc, 3787 struct rte_mbuf *input, struct rte_mbuf *output, struct rte_mbuf *win_input, 3788 struct rte_mbuf *pwr, uint32_t *in_offset, uint32_t *out_offset, 3789 uint32_t *win_offset, uint32_t *pwr_offset, uint16_t device_variant) 3790 { 3791 bool pwr_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); 3792 bool win_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DEWINDOWING); 3793 int num_cs = 0, i, bd_idx = 1; 3794 3795 if (device_variant == VRB1_VARIANT) { 3796 /* Force unsupported descriptor format out. */ 3797 pwr_en = 0; 3798 win_en = 0; 3799 } 3800 3801 /* FCW already done */ 3802 acc_header_init(desc); 3803 3804 if (win_en && win_input) { 3805 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(win_input, *win_offset); 3806 desc->data_ptrs[bd_idx].blen = op->fft.output_depadded_size * 2; 3807 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_DEWIN_IN; 3808 desc->data_ptrs[bd_idx].last = 0; 3809 desc->data_ptrs[bd_idx].dma_ext = 0; 3810 bd_idx++; 3811 } 3812 3813 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(input, *in_offset); 3814 desc->data_ptrs[bd_idx].blen = op->fft.input_sequence_size * ACC_IQ_SIZE; 3815 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_IN; 3816 desc->data_ptrs[bd_idx].last = 1; 3817 desc->data_ptrs[bd_idx].dma_ext = 0; 3818 bd_idx++; 3819 3820 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(output, *out_offset); 3821 desc->data_ptrs[bd_idx].blen = op->fft.output_sequence_size * ACC_IQ_SIZE; 3822 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_HARD; 3823 desc->data_ptrs[bd_idx].last = pwr_en ? 0 : 1; 3824 desc->data_ptrs[bd_idx].dma_ext = 0; 3825 desc->m2dlen = win_en ? 3 : 2; 3826 desc->d2mlen = pwr_en ? 2 : 1; 3827 desc->ib_ant_offset = op->fft.input_sequence_size; 3828 desc->num_ant = op->fft.num_antennas_log2 - 3; 3829 3830 for (i = 0; i < RTE_BBDEV_MAX_CS; i++) 3831 if (check_bit(op->fft.cs_bitmap, 1 << i)) 3832 num_cs++; 3833 desc->num_cs = num_cs; 3834 3835 if (pwr_en && pwr) { 3836 bd_idx++; 3837 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(pwr, *pwr_offset); 3838 desc->data_ptrs[bd_idx].blen = num_cs * (1 << op->fft.num_antennas_log2) * 4; 3839 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_SOFT; 3840 desc->data_ptrs[bd_idx].last = 1; 3841 desc->data_ptrs[bd_idx].dma_ext = 0; 3842 } 3843 desc->ob_cyc_offset = op->fft.output_sequence_size; 3844 desc->ob_ant_offset = op->fft.output_sequence_size * num_cs; 3845 desc->op_addr = op; 3846 return 0; 3847 } 3848 3849 /** Enqueue one FFT operation for device. */ 3850 static inline int 3851 vrb_enqueue_fft_one_op(struct acc_queue *q, struct rte_bbdev_fft_op *op, 3852 uint16_t total_enqueued_cbs) 3853 { 3854 union acc_dma_desc *desc; 3855 struct rte_mbuf *input, *output, *pwr, *win; 3856 uint32_t in_offset, out_offset, pwr_offset, win_offset; 3857 struct acc_fcw_fft *fcw; 3858 3859 desc = acc_desc(q, total_enqueued_cbs); 3860 input = op->fft.base_input.data; 3861 output = op->fft.base_output.data; 3862 pwr = op->fft.power_meas_output.data; 3863 win = op->fft.dewindowing_input.data; 3864 in_offset = op->fft.base_input.offset; 3865 out_offset = op->fft.base_output.offset; 3866 pwr_offset = op->fft.power_meas_output.offset; 3867 win_offset = op->fft.dewindowing_input.offset; 3868 3869 fcw = (struct acc_fcw_fft *) (q->fcw_ring + 3870 ((q->sw_ring_head + total_enqueued_cbs) & q->sw_ring_wrap_mask) 3871 * ACC_MAX_FCW_SIZE); 3872 3873 if (q->d->device_variant == VRB1_VARIANT) 3874 vrb1_fcw_fft_fill(op, fcw); 3875 else 3876 vrb2_fcw_fft_fill(op, (struct acc_fcw_fft_3 *) fcw); 3877 vrb_dma_desc_fft_fill(op, &desc->req, input, output, win, pwr, 3878 &in_offset, &out_offset, &win_offset, &pwr_offset, q->d->device_variant); 3879 #ifdef RTE_LIBRTE_BBDEV_DEBUG 3880 rte_memdump(stderr, "FCW", fcw, 128); 3881 rte_memdump(stderr, "Req Desc.", desc, 128); 3882 #endif 3883 return 1; 3884 } 3885 3886 /* Enqueue decode operations for device. */ 3887 static uint16_t 3888 vrb_enqueue_fft(struct rte_bbdev_queue_data *q_data, 3889 struct rte_bbdev_fft_op **ops, uint16_t num) 3890 { 3891 struct acc_queue *q; 3892 int32_t aq_avail, avail; 3893 uint16_t i; 3894 int ret; 3895 3896 aq_avail = acc_aq_avail(q_data, num); 3897 if (unlikely((aq_avail <= 0) || (num == 0))) 3898 return 0; 3899 q = q_data->queue_private; 3900 avail = acc_ring_avail_enq(q); 3901 3902 for (i = 0; i < num; ++i) { 3903 /* Check if there are available space for further processing. */ 3904 if (unlikely(avail < 1)) 3905 break; 3906 avail -= 1; 3907 ret = vrb_enqueue_fft_one_op(q, ops[i], i); 3908 if (ret < 0) 3909 break; 3910 } 3911 3912 if (unlikely(i == 0)) 3913 return 0; /* Nothing to enqueue. */ 3914 3915 acc_dma_enqueue(q, i, &q_data->queue_stats); 3916 3917 acc_update_qstat_enqueue(q_data, i, num - i); 3918 return i; 3919 } 3920 3921 3922 /* Dequeue one FFT operations from device. */ 3923 static inline int 3924 vrb_dequeue_fft_one_op(struct rte_bbdev_queue_data *q_data, 3925 struct acc_queue *q, struct rte_bbdev_fft_op **ref_op, 3926 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3927 { 3928 union acc_dma_desc *desc, atom_desc; 3929 union acc_dma_rsp_desc rsp; 3930 struct rte_bbdev_fft_op *op; 3931 3932 desc = acc_desc_tail(q, dequeued_cbs); 3933 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 3934 rte_memory_order_relaxed); 3935 3936 /* Check fdone bit */ 3937 if (!(atom_desc.rsp.val & ACC_FDONE)) 3938 return -1; 3939 3940 rsp.val = atom_desc.rsp.val; 3941 #ifdef RTE_LIBRTE_BBDEV_DEBUG 3942 rte_memdump(stderr, "Resp", &desc->rsp.val, 3943 sizeof(desc->rsp.val)); 3944 #endif 3945 /* Dequeue. */ 3946 op = desc->req.op_addr; 3947 3948 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3949 3950 if (op->status != 0) 3951 q_data->queue_stats.dequeue_err_count++; 3952 3953 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 3954 vrb_check_ir(q->d); 3955 3956 *ref_op = op; 3957 /* One CB (op) was successfully dequeued. */ 3958 return 1; 3959 } 3960 3961 3962 /* Dequeue FFT operations from device. */ 3963 static uint16_t 3964 vrb_dequeue_fft(struct rte_bbdev_queue_data *q_data, 3965 struct rte_bbdev_fft_op **ops, uint16_t num) 3966 { 3967 struct acc_queue *q = q_data->queue_private; 3968 uint16_t dequeue_num, i, dequeued_cbs = 0; 3969 uint32_t avail = acc_ring_avail_deq(q); 3970 uint32_t aq_dequeued = 0; 3971 int ret; 3972 3973 dequeue_num = RTE_MIN(avail, num); 3974 3975 for (i = 0; i < dequeue_num; ++i) { 3976 ret = vrb_dequeue_fft_one_op(q_data, q, &ops[i], dequeued_cbs, &aq_dequeued); 3977 if (ret <= 0) 3978 break; 3979 dequeued_cbs += ret; 3980 } 3981 3982 q->aq_dequeued += aq_dequeued; 3983 q->sw_ring_tail += dequeued_cbs; 3984 acc_update_qstat_dequeue(q_data, i); 3985 return i; 3986 } 3987 3988 /* Fill in a frame control word for MLD-TS processing. */ 3989 static inline void 3990 vrb2_fcw_mldts_fill(struct rte_bbdev_mldts_op *op, struct acc_fcw_mldts *fcw) 3991 { 3992 fcw->nrb = op->mldts.num_rbs; 3993 fcw->NLayers = op->mldts.num_layers - 1; 3994 fcw->Qmod0 = (op->mldts.q_m[0] >> 1) - 1; 3995 fcw->Qmod1 = (op->mldts.q_m[1] >> 1) - 1; 3996 fcw->Qmod2 = (op->mldts.q_m[2] >> 1) - 1; 3997 fcw->Qmod3 = (op->mldts.q_m[3] >> 1) - 1; 3998 /* Mark some layers as disabled */ 3999 if (op->mldts.num_layers == 2) { 4000 fcw->Qmod2 = 3; 4001 fcw->Qmod3 = 3; 4002 } 4003 if (op->mldts.num_layers == 3) 4004 fcw->Qmod3 = 3; 4005 fcw->Rrep = op->mldts.r_rep; 4006 fcw->Crep = op->mldts.c_rep; 4007 } 4008 4009 /* Fill in descriptor for one MLD-TS processing operation. */ 4010 static inline int 4011 vrb2_dma_desc_mldts_fill(struct rte_bbdev_mldts_op *op, 4012 struct acc_dma_req_desc *desc, 4013 struct rte_mbuf *input_q, struct rte_mbuf *input_r, 4014 struct rte_mbuf *output, 4015 uint32_t *in_offset, uint32_t *out_offset) 4016 { 4017 uint16_t qsize_per_re[VRB2_MLD_LAY_SIZE] = {8, 12, 16}; /* Layer 2 to 4. */ 4018 uint16_t rsize_per_re[VRB2_MLD_LAY_SIZE] = {14, 26, 42}; 4019 uint16_t sc_factor_per_rrep[VRB2_MLD_RREP_SIZE] = {12, 6, 4, 3, 0, 2}; 4020 uint16_t i, outsize_per_re = 0; 4021 uint32_t sc_num, r_num, q_size, r_size, out_size; 4022 4023 /* Prevent out of range access. */ 4024 if (op->mldts.r_rep > 5) 4025 op->mldts.r_rep = 5; 4026 if (op->mldts.num_layers < 2) 4027 op->mldts.num_layers = 2; 4028 if (op->mldts.num_layers > 4) 4029 op->mldts.num_layers = 4; 4030 for (i = 0; i < op->mldts.num_layers; i++) 4031 outsize_per_re += op->mldts.q_m[i]; 4032 sc_num = op->mldts.num_rbs * RTE_BBDEV_SCPERRB * (op->mldts.c_rep + 1); 4033 r_num = op->mldts.num_rbs * sc_factor_per_rrep[op->mldts.r_rep]; 4034 q_size = qsize_per_re[op->mldts.num_layers - 2] * sc_num; 4035 r_size = rsize_per_re[op->mldts.num_layers - 2] * r_num; 4036 out_size = sc_num * outsize_per_re; 4037 4038 /* FCW already done. */ 4039 acc_header_init(desc); 4040 desc->data_ptrs[1].address = rte_pktmbuf_iova_offset(input_q, *in_offset); 4041 desc->data_ptrs[1].blen = q_size; 4042 desc->data_ptrs[1].blkid = ACC_DMA_BLKID_IN; 4043 desc->data_ptrs[1].last = 0; 4044 desc->data_ptrs[1].dma_ext = 0; 4045 desc->data_ptrs[2].address = rte_pktmbuf_iova_offset(input_r, *in_offset); 4046 desc->data_ptrs[2].blen = r_size; 4047 desc->data_ptrs[2].blkid = ACC_DMA_BLKID_IN_MLD_R; 4048 desc->data_ptrs[2].last = 1; 4049 desc->data_ptrs[2].dma_ext = 0; 4050 desc->data_ptrs[3].address = rte_pktmbuf_iova_offset(output, *out_offset); 4051 desc->data_ptrs[3].blen = out_size; 4052 desc->data_ptrs[3].blkid = ACC_DMA_BLKID_OUT_HARD; 4053 desc->data_ptrs[3].last = 1; 4054 desc->data_ptrs[3].dma_ext = 0; 4055 desc->m2dlen = 3; 4056 desc->d2mlen = 1; 4057 desc->op_addr = op; 4058 desc->cbs_in_tb = 1; 4059 4060 return 0; 4061 } 4062 4063 /* Check whether the MLD operation can be processed as a single operation. */ 4064 static inline bool 4065 vrb2_check_mld_r_constraint(struct rte_bbdev_mldts_op *op) { 4066 uint8_t layer_idx, rrep_idx; 4067 uint16_t max_rb[VRB2_MLD_LAY_SIZE][VRB2_MLD_RREP_SIZE] = { 4068 {188, 275, 275, 275, 0, 275}, 4069 {101, 202, 275, 275, 0, 275}, 4070 {62, 124, 186, 248, 0, 275} }; 4071 4072 if (op->mldts.c_rep == 0) 4073 return true; 4074 4075 layer_idx = RTE_MIN(op->mldts.num_layers - VRB2_MLD_MIN_LAYER, 4076 VRB2_MLD_MAX_LAYER - VRB2_MLD_MIN_LAYER); 4077 rrep_idx = RTE_MIN(op->mldts.r_rep, VRB2_MLD_MAX_RREP); 4078 rte_bbdev_log_debug("RB %d index %d %d max %d", op->mldts.num_rbs, layer_idx, rrep_idx, 4079 max_rb[layer_idx][rrep_idx]); 4080 4081 return (op->mldts.num_rbs <= max_rb[layer_idx][rrep_idx]); 4082 } 4083 4084 /** Enqueue MLDTS operation split across symbols. */ 4085 static inline int 4086 enqueue_mldts_split_op(struct acc_queue *q, struct rte_bbdev_mldts_op *op, 4087 uint16_t total_enqueued_descs) 4088 { 4089 uint16_t qsize_per_re[VRB2_MLD_LAY_SIZE] = {8, 12, 16}; /* Layer 2 to 4. */ 4090 uint16_t rsize_per_re[VRB2_MLD_LAY_SIZE] = {14, 26, 42}; 4091 uint16_t sc_factor_per_rrep[VRB2_MLD_RREP_SIZE] = {12, 6, 4, 3, 0, 2}; 4092 uint32_t i, outsize_per_re = 0, sc_num, r_num, q_size, r_size, out_size, num_syms; 4093 union acc_dma_desc *desc, *first_desc; 4094 uint16_t desc_idx, symb; 4095 struct rte_mbuf *input_q, *input_r, *output; 4096 uint32_t in_offset, out_offset; 4097 struct acc_fcw_mldts *fcw; 4098 4099 desc_idx = acc_desc_idx(q, total_enqueued_descs); 4100 first_desc = q->ring_addr + desc_idx; 4101 input_q = op->mldts.qhy_input.data; 4102 input_r = op->mldts.r_input.data; 4103 output = op->mldts.output.data; 4104 in_offset = op->mldts.qhy_input.offset; 4105 out_offset = op->mldts.output.offset; 4106 num_syms = op->mldts.c_rep + 1; 4107 fcw = &first_desc->req.fcw_mldts; 4108 vrb2_fcw_mldts_fill(op, fcw); 4109 fcw->Crep = 0; /* C rep forced to zero. */ 4110 4111 /* Prevent out of range access. */ 4112 if (op->mldts.r_rep > 5) 4113 op->mldts.r_rep = 5; 4114 if (op->mldts.num_layers < 2) 4115 op->mldts.num_layers = 2; 4116 if (op->mldts.num_layers > 4) 4117 op->mldts.num_layers = 4; 4118 4119 for (i = 0; i < op->mldts.num_layers; i++) 4120 outsize_per_re += op->mldts.q_m[i]; 4121 sc_num = op->mldts.num_rbs * RTE_BBDEV_SCPERRB; /* C rep forced to zero. */ 4122 r_num = op->mldts.num_rbs * sc_factor_per_rrep[op->mldts.r_rep]; 4123 q_size = qsize_per_re[op->mldts.num_layers - 2] * sc_num; 4124 r_size = rsize_per_re[op->mldts.num_layers - 2] * r_num; 4125 out_size = sc_num * outsize_per_re; 4126 4127 for (symb = 0; symb < num_syms; symb++) { 4128 desc_idx = ((q->sw_ring_head + total_enqueued_descs + symb) & q->sw_ring_wrap_mask); 4129 desc = q->ring_addr + desc_idx; 4130 acc_header_init(&desc->req); 4131 if (symb == 0) 4132 desc->req.cbs_in_tb = num_syms; 4133 else 4134 rte_memcpy(&desc->req.fcw_mldts, fcw, ACC_FCW_MLDTS_BLEN); 4135 desc->req.data_ptrs[1].address = rte_pktmbuf_iova_offset(input_q, in_offset); 4136 desc->req.data_ptrs[1].blen = q_size; 4137 in_offset += q_size; 4138 desc->req.data_ptrs[1].blkid = ACC_DMA_BLKID_IN; 4139 desc->req.data_ptrs[1].last = 0; 4140 desc->req.data_ptrs[1].dma_ext = 0; 4141 desc->req.data_ptrs[2].address = rte_pktmbuf_iova_offset(input_r, 0); 4142 desc->req.data_ptrs[2].blen = r_size; 4143 desc->req.data_ptrs[2].blkid = ACC_DMA_BLKID_IN_MLD_R; 4144 desc->req.data_ptrs[2].last = 1; 4145 desc->req.data_ptrs[2].dma_ext = 0; 4146 desc->req.data_ptrs[3].address = rte_pktmbuf_iova_offset(output, out_offset); 4147 desc->req.data_ptrs[3].blen = out_size; 4148 out_offset += out_size; 4149 desc->req.data_ptrs[3].blkid = ACC_DMA_BLKID_OUT_HARD; 4150 desc->req.data_ptrs[3].last = 1; 4151 desc->req.data_ptrs[3].dma_ext = 0; 4152 desc->req.m2dlen = VRB2_MLD_M2DLEN; 4153 desc->req.d2mlen = 1; 4154 desc->req.op_addr = op; 4155 4156 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4157 rte_memdump(stderr, "FCW", &desc->req.fcw_mldts, sizeof(desc->req.fcw_mldts)); 4158 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 4159 #endif 4160 } 4161 desc->req.sdone_enable = 0; 4162 4163 return num_syms; 4164 } 4165 4166 /** Enqueue one MLDTS operation. */ 4167 static inline int 4168 enqueue_mldts_one_op(struct acc_queue *q, struct rte_bbdev_mldts_op *op, 4169 uint16_t total_enqueued_descs) 4170 { 4171 union acc_dma_desc *desc; 4172 struct rte_mbuf *input_q, *input_r, *output; 4173 uint32_t in_offset, out_offset; 4174 struct acc_fcw_mldts *fcw; 4175 4176 desc = acc_desc(q, total_enqueued_descs); 4177 input_q = op->mldts.qhy_input.data; 4178 input_r = op->mldts.r_input.data; 4179 output = op->mldts.output.data; 4180 in_offset = op->mldts.qhy_input.offset; 4181 out_offset = op->mldts.output.offset; 4182 fcw = &desc->req.fcw_mldts; 4183 vrb2_fcw_mldts_fill(op, fcw); 4184 vrb2_dma_desc_mldts_fill(op, &desc->req, input_q, input_r, output, 4185 &in_offset, &out_offset); 4186 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4187 rte_memdump(stderr, "FCW", &desc->req.fcw_mldts, sizeof(desc->req.fcw_mldts)); 4188 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 4189 #endif 4190 return 1; 4191 } 4192 4193 /* Enqueue MLDTS operations. */ 4194 static uint16_t 4195 vrb2_enqueue_mldts(struct rte_bbdev_queue_data *q_data, 4196 struct rte_bbdev_mldts_op **ops, uint16_t num) 4197 { 4198 int32_t aq_avail, avail; 4199 struct acc_queue *q = q_data->queue_private; 4200 uint16_t i, enqueued_descs = 0, descs_in_op; 4201 int ret; 4202 bool as_one_op; 4203 4204 aq_avail = acc_aq_avail(q_data, num); 4205 if (unlikely((aq_avail <= 0) || (num == 0))) 4206 return 0; 4207 avail = acc_ring_avail_enq(q); 4208 4209 for (i = 0; i < num; ++i) { 4210 as_one_op = vrb2_check_mld_r_constraint(ops[i]); 4211 descs_in_op = as_one_op ? 1 : ops[i]->mldts.c_rep + 1; 4212 4213 /* Check if there are available space for further processing. */ 4214 if (unlikely(avail < descs_in_op)) { 4215 acc_enqueue_ring_full(q_data); 4216 break; 4217 } 4218 avail -= descs_in_op; 4219 4220 if (as_one_op) 4221 ret = enqueue_mldts_one_op(q, ops[i], enqueued_descs); 4222 else 4223 ret = enqueue_mldts_split_op(q, ops[i], enqueued_descs); 4224 4225 if (ret < 0) { 4226 acc_enqueue_invalid(q_data); 4227 break; 4228 } 4229 4230 enqueued_descs += ret; 4231 } 4232 4233 if (unlikely(i == 0)) 4234 return 0; /* Nothing to enqueue. */ 4235 4236 acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats); 4237 4238 acc_update_qstat_enqueue(q_data, i, num - i); 4239 return i; 4240 } 4241 4242 /* 4243 * Dequeue one MLDTS operation. 4244 * This may have been split over multiple descriptors. 4245 */ 4246 static inline int 4247 dequeue_mldts_one_op(struct rte_bbdev_queue_data *q_data, 4248 struct acc_queue *q, struct rte_bbdev_mldts_op **ref_op, 4249 uint16_t dequeued_ops, uint32_t *aq_dequeued) 4250 { 4251 union acc_dma_desc *desc, atom_desc, *last_desc; 4252 union acc_dma_rsp_desc rsp; 4253 struct rte_bbdev_mldts_op *op; 4254 uint8_t descs_in_op, i; 4255 4256 desc = acc_desc_tail(q, dequeued_ops); 4257 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 4258 rte_memory_order_relaxed); 4259 4260 /* Check fdone bit. */ 4261 if (!(atom_desc.rsp.val & ACC_FDONE)) 4262 return -1; 4263 4264 descs_in_op = desc->req.cbs_in_tb; 4265 if (descs_in_op > 1) { 4266 /* Get last CB. */ 4267 last_desc = acc_desc_tail(q, dequeued_ops + descs_in_op - 1); 4268 /* Check if last op is ready to dequeue by checking fdone bit. If not exit. */ 4269 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)last_desc, 4270 rte_memory_order_relaxed); 4271 if (!(atom_desc.rsp.val & ACC_FDONE)) 4272 return -1; 4273 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4274 rte_memdump(stderr, "Last Resp", &last_desc->rsp.val, sizeof(desc->rsp.val)); 4275 #endif 4276 /* Check each operation iteratively using fdone. */ 4277 for (i = 1; i < descs_in_op - 1; i++) { 4278 last_desc = q->ring_addr + ((q->sw_ring_tail + dequeued_ops + i) 4279 & q->sw_ring_wrap_mask); 4280 atom_desc.atom_hdr = rte_atomic_load_explicit( 4281 (uint64_t __rte_atomic *)last_desc, 4282 rte_memory_order_relaxed); 4283 if (!(atom_desc.rsp.val & ACC_FDONE)) 4284 return -1; 4285 } 4286 } 4287 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4288 rte_memdump(stderr, "Resp", &desc->rsp.val, sizeof(desc->rsp.val)); 4289 #endif 4290 /* Dequeue. */ 4291 op = desc->req.op_addr; 4292 4293 /* Clearing status, it will be set based on response. */ 4294 op->status = 0; 4295 4296 for (i = 0; i < descs_in_op; i++) { 4297 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_ops + i) & q->sw_ring_wrap_mask); 4298 atom_desc.atom_hdr = rte_atomic_load_explicit((uint64_t __rte_atomic *)desc, 4299 rte_memory_order_relaxed); 4300 rsp.val = atom_desc.rsp.val; 4301 4302 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, false); 4303 } 4304 4305 if (op->status != 0) 4306 q_data->queue_stats.dequeue_err_count++; 4307 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 4308 vrb_check_ir(q->d); 4309 4310 *ref_op = op; 4311 4312 return descs_in_op; 4313 } 4314 4315 /* Dequeue MLDTS operations from VRB2 device. */ 4316 static uint16_t 4317 vrb2_dequeue_mldts(struct rte_bbdev_queue_data *q_data, 4318 struct rte_bbdev_mldts_op **ops, uint16_t num) 4319 { 4320 struct acc_queue *q = q_data->queue_private; 4321 uint16_t dequeue_num, i, dequeued_cbs = 0; 4322 uint32_t avail = acc_ring_avail_deq(q); 4323 uint32_t aq_dequeued = 0; 4324 int ret; 4325 4326 dequeue_num = RTE_MIN(avail, num); 4327 4328 for (i = 0; i < dequeue_num; ++i) { 4329 ret = dequeue_mldts_one_op(q_data, q, &ops[i], dequeued_cbs, &aq_dequeued); 4330 if (ret <= 0) 4331 break; 4332 dequeued_cbs += ret; 4333 } 4334 4335 q->aq_dequeued += aq_dequeued; 4336 q->sw_ring_tail += dequeued_cbs; 4337 4338 acc_update_qstat_dequeue(q_data, i); 4339 4340 return i; 4341 } 4342 4343 /* Initialization Function */ 4344 static void 4345 vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv) 4346 { 4347 struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); 4348 struct acc_device *d = dev->data->dev_private; 4349 4350 dev->dev_ops = &vrb_bbdev_ops; 4351 dev->enqueue_enc_ops = vrb_enqueue_enc; 4352 dev->enqueue_dec_ops = vrb_enqueue_dec; 4353 dev->dequeue_enc_ops = vrb_dequeue_enc; 4354 dev->dequeue_dec_ops = vrb_dequeue_dec; 4355 dev->enqueue_ldpc_enc_ops = vrb_enqueue_ldpc_enc; 4356 dev->enqueue_ldpc_dec_ops = vrb_enqueue_ldpc_dec; 4357 dev->dequeue_ldpc_enc_ops = vrb_dequeue_ldpc_enc; 4358 dev->dequeue_ldpc_dec_ops = vrb_dequeue_ldpc_dec; 4359 dev->enqueue_fft_ops = vrb_enqueue_fft; 4360 dev->dequeue_fft_ops = vrb_dequeue_fft; 4361 dev->enqueue_mldts_ops = vrb2_enqueue_mldts; 4362 dev->dequeue_mldts_ops = vrb2_dequeue_mldts; 4363 4364 d->pf_device = !strcmp(drv->driver.name, RTE_STR(VRB_PF_DRIVER_NAME)); 4365 d->mmio_base = pci_dev->mem_resource[0].addr; 4366 4367 /* Device variant specific handling. */ 4368 if ((pci_dev->id.device_id == RTE_VRB1_PF_DEVICE_ID) || 4369 (pci_dev->id.device_id == RTE_VRB1_VF_DEVICE_ID)) { 4370 d->device_variant = VRB1_VARIANT; 4371 d->queue_offset = vrb1_queue_offset; 4372 d->num_qgroups = VRB1_NUM_QGRPS; 4373 d->num_aqs = VRB1_NUM_AQS; 4374 if (d->pf_device) 4375 d->reg_addr = &vrb1_pf_reg_addr; 4376 else 4377 d->reg_addr = &vrb1_vf_reg_addr; 4378 } else if ((pci_dev->id.device_id == RTE_VRB2_PF_DEVICE_ID) || 4379 (pci_dev->id.device_id == RTE_VRB2_VF_DEVICE_ID)) { 4380 d->device_variant = VRB2_VARIANT; 4381 d->queue_offset = vrb2_queue_offset; 4382 d->num_qgroups = VRB2_NUM_QGRPS; 4383 d->num_aqs = VRB2_NUM_AQS; 4384 if (d->pf_device) 4385 d->reg_addr = &vrb2_pf_reg_addr; 4386 else 4387 d->reg_addr = &vrb2_vf_reg_addr; 4388 } 4389 4390 rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"", 4391 drv->driver.name, dev->data->name, 4392 (void *)pci_dev->mem_resource[0].addr, 4393 pci_dev->mem_resource[0].phys_addr); 4394 } 4395 4396 static int vrb_pci_probe(struct rte_pci_driver *pci_drv, 4397 struct rte_pci_device *pci_dev) 4398 { 4399 struct rte_bbdev *bbdev = NULL; 4400 char dev_name[RTE_BBDEV_NAME_MAX_LEN]; 4401 4402 if (pci_dev == NULL) { 4403 rte_bbdev_log(ERR, "NULL PCI device"); 4404 return -EINVAL; 4405 } 4406 4407 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 4408 4409 /* Allocate memory to be used privately by drivers. */ 4410 bbdev = rte_bbdev_allocate(pci_dev->device.name); 4411 if (bbdev == NULL) 4412 return -ENODEV; 4413 4414 /* allocate device private memory. */ 4415 bbdev->data->dev_private = rte_zmalloc_socket(dev_name, 4416 sizeof(struct acc_device), RTE_CACHE_LINE_SIZE, 4417 pci_dev->device.numa_node); 4418 4419 if (bbdev->data->dev_private == NULL) { 4420 rte_bbdev_log(CRIT, 4421 "Allocate of %zu bytes for device \"%s\" failed", 4422 sizeof(struct acc_device), dev_name); 4423 rte_bbdev_release(bbdev); 4424 return -ENOMEM; 4425 } 4426 4427 /* Fill HW specific part of device structure. */ 4428 bbdev->device = &pci_dev->device; 4429 bbdev->intr_handle = pci_dev->intr_handle; 4430 bbdev->data->socket_id = pci_dev->device.numa_node; 4431 4432 /* Invoke device initialization function. */ 4433 vrb_bbdev_init(bbdev, pci_drv); 4434 4435 rte_bbdev_log_debug("Initialised bbdev %s (id = %u)", 4436 dev_name, bbdev->data->dev_id); 4437 return 0; 4438 } 4439 4440 static struct rte_pci_driver vrb_pci_pf_driver = { 4441 .probe = vrb_pci_probe, 4442 .remove = acc_pci_remove, 4443 .id_table = pci_id_vrb_pf_map, 4444 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 4445 }; 4446 4447 static struct rte_pci_driver vrb_pci_vf_driver = { 4448 .probe = vrb_pci_probe, 4449 .remove = acc_pci_remove, 4450 .id_table = pci_id_vrb_vf_map, 4451 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 4452 }; 4453 4454 RTE_PMD_REGISTER_PCI(VRB_PF_DRIVER_NAME, vrb_pci_pf_driver); 4455 RTE_PMD_REGISTER_PCI_TABLE(VRB_PF_DRIVER_NAME, pci_id_vrb_pf_map); 4456 RTE_PMD_REGISTER_PCI(VRB_VF_DRIVER_NAME, vrb_pci_vf_driver); 4457 RTE_PMD_REGISTER_PCI_TABLE(VRB_VF_DRIVER_NAME, pci_id_vrb_vf_map); 4458 4459 /* Initial configuration of a VRB1 device prior to running configure(). */ 4460 int 4461 vrb1_configure(const char *dev_name, struct rte_acc_conf *conf) 4462 { 4463 rte_bbdev_log(INFO, "vrb1_configure"); 4464 uint32_t value, address, status; 4465 int qg_idx, template_idx, vf_idx, acc, i, rlim, alen, timestamp, totalQgs, numEngines; 4466 int numQgs, numQqsAcc; 4467 struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); 4468 4469 /* Compile time checks. */ 4470 RTE_BUILD_BUG_ON(sizeof(struct acc_dma_req_desc) != 256); 4471 RTE_BUILD_BUG_ON(sizeof(union acc_dma_desc) != 256); 4472 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_td) != 24); 4473 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_te) != 32); 4474 4475 if (bbdev == NULL) { 4476 rte_bbdev_log(ERR, 4477 "Invalid dev_name (%s), or device is not yet initialised", 4478 dev_name); 4479 return -ENODEV; 4480 } 4481 struct acc_device *d = bbdev->data->dev_private; 4482 4483 /* Store configuration. */ 4484 rte_memcpy(&d->acc_conf, conf, sizeof(d->acc_conf)); 4485 4486 /* Check we are already out of PG. */ 4487 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4488 if (status > 0) { 4489 if (status != VRB1_PG_MASK_0) { 4490 rte_bbdev_log(ERR, "Unexpected status %x %x", 4491 status, VRB1_PG_MASK_0); 4492 return -ENODEV; 4493 } 4494 /* Clock gate sections that will be un-PG. */ 4495 acc_reg_write(d, VRB1_PfHiClkGateHystReg, VRB1_CLK_DIS); 4496 /* Un-PG required sections. */ 4497 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4498 VRB1_PG_MASK_1); 4499 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4500 if (status != VRB1_PG_MASK_1) { 4501 rte_bbdev_log(ERR, "Unexpected status %x %x", 4502 status, VRB1_PG_MASK_1); 4503 return -ENODEV; 4504 } 4505 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4506 VRB1_PG_MASK_2); 4507 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4508 if (status != VRB1_PG_MASK_2) { 4509 rte_bbdev_log(ERR, "Unexpected status %x %x", 4510 status, VRB1_PG_MASK_2); 4511 return -ENODEV; 4512 } 4513 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4514 VRB1_PG_MASK_3); 4515 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4516 if (status != VRB1_PG_MASK_3) { 4517 rte_bbdev_log(ERR, "Unexpected status %x %x", 4518 status, VRB1_PG_MASK_3); 4519 return -ENODEV; 4520 } 4521 /* Enable clocks for all sections. */ 4522 acc_reg_write(d, VRB1_PfHiClkGateHystReg, VRB1_CLK_EN); 4523 } 4524 4525 /* Explicitly releasing AXI as this may be stopped after PF FLR/BME. */ 4526 address = VRB1_PfDmaAxiControl; 4527 value = 1; 4528 acc_reg_write(d, address, value); 4529 4530 /* Set the fabric mode. */ 4531 address = VRB1_PfFabricM2iBufferReg; 4532 value = VRB1_FABRIC_MODE; 4533 acc_reg_write(d, address, value); 4534 4535 /* Set default descriptor signature. */ 4536 address = VRB1_PfDmaDescriptorSignatuture; 4537 value = 0; 4538 acc_reg_write(d, address, value); 4539 4540 /* Enable the Error Detection in DMA. */ 4541 value = VRB1_CFG_DMA_ERROR; 4542 address = VRB1_PfDmaErrorDetectionEn; 4543 acc_reg_write(d, address, value); 4544 4545 /* AXI Cache configuration. */ 4546 value = VRB1_CFG_AXI_CACHE; 4547 address = VRB1_PfDmaAxcacheReg; 4548 acc_reg_write(d, address, value); 4549 4550 /* AXI Response configuration. */ 4551 acc_reg_write(d, VRB1_PfDmaCfgRrespBresp, 0x0); 4552 4553 /* Default DMA Configuration (Qmgr Enabled). */ 4554 address = VRB1_PfDmaConfig0Reg; 4555 value = 0; 4556 acc_reg_write(d, address, value); 4557 address = VRB1_PfDmaQmanen; 4558 value = 0; 4559 acc_reg_write(d, address, value); 4560 4561 /* Default RLIM/ALEN configuration. */ 4562 rlim = 0; 4563 alen = 1; 4564 timestamp = 0; 4565 address = VRB1_PfDmaConfig1Reg; 4566 value = (1 << 31) + (rlim << 8) + (timestamp << 6) + alen; 4567 acc_reg_write(d, address, value); 4568 4569 /* Default FFT configuration. */ 4570 address = VRB1_PfFftConfig0; 4571 value = VRB1_FFT_CFG_0; 4572 acc_reg_write(d, address, value); 4573 4574 /* Configure DMA Qmanager addresses. */ 4575 address = VRB1_PfDmaQmgrAddrReg; 4576 value = VRB1_PfQmgrEgressQueuesTemplate; 4577 acc_reg_write(d, address, value); 4578 4579 /* ===== Qmgr Configuration ===== */ 4580 /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL. */ 4581 totalQgs = conf->q_ul_4g.num_qgroups + 4582 conf->q_ul_5g.num_qgroups + 4583 conf->q_dl_4g.num_qgroups + 4584 conf->q_dl_5g.num_qgroups + 4585 conf->q_fft.num_qgroups; 4586 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4587 address = VRB1_PfQmgrDepthLog2Grp + ACC_BYTES_IN_WORD * qg_idx; 4588 value = aqDepth(qg_idx, conf); 4589 acc_reg_write(d, address, value); 4590 address = VRB1_PfQmgrTholdGrp + ACC_BYTES_IN_WORD * qg_idx; 4591 value = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1)); 4592 acc_reg_write(d, address, value); 4593 } 4594 4595 /* Template Priority in incremental order. */ 4596 for (template_idx = 0; template_idx < ACC_NUM_TMPL; 4597 template_idx++) { 4598 address = VRB1_PfQmgrGrpTmplateReg0Indx + ACC_BYTES_IN_WORD * template_idx; 4599 value = ACC_TMPL_PRI_0; 4600 acc_reg_write(d, address, value); 4601 address = VRB1_PfQmgrGrpTmplateReg1Indx + ACC_BYTES_IN_WORD * template_idx; 4602 value = ACC_TMPL_PRI_1; 4603 acc_reg_write(d, address, value); 4604 address = VRB1_PfQmgrGrpTmplateReg2indx + ACC_BYTES_IN_WORD * template_idx; 4605 value = ACC_TMPL_PRI_2; 4606 acc_reg_write(d, address, value); 4607 address = VRB1_PfQmgrGrpTmplateReg3Indx + ACC_BYTES_IN_WORD * template_idx; 4608 value = ACC_TMPL_PRI_3; 4609 acc_reg_write(d, address, value); 4610 } 4611 4612 address = VRB1_PfQmgrGrpPriority; 4613 value = VRB1_CFG_QMGR_HI_P; 4614 acc_reg_write(d, address, value); 4615 4616 /* Template Configuration. */ 4617 for (template_idx = 0; template_idx < ACC_NUM_TMPL; 4618 template_idx++) { 4619 value = 0; 4620 address = VRB1_PfQmgrGrpTmplateReg4Indx 4621 + ACC_BYTES_IN_WORD * template_idx; 4622 acc_reg_write(d, address, value); 4623 } 4624 /* 4GUL */ 4625 numQgs = conf->q_ul_4g.num_qgroups; 4626 numQqsAcc = 0; 4627 value = 0; 4628 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4629 value |= (1 << qg_idx); 4630 for (template_idx = VRB1_SIG_UL_4G; 4631 template_idx <= VRB1_SIG_UL_4G_LAST; 4632 template_idx++) { 4633 address = VRB1_PfQmgrGrpTmplateReg4Indx 4634 + ACC_BYTES_IN_WORD * template_idx; 4635 acc_reg_write(d, address, value); 4636 } 4637 /* 5GUL */ 4638 numQqsAcc += numQgs; 4639 numQgs = conf->q_ul_5g.num_qgroups; 4640 value = 0; 4641 numEngines = 0; 4642 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4643 value |= (1 << qg_idx); 4644 for (template_idx = VRB1_SIG_UL_5G; 4645 template_idx <= VRB1_SIG_UL_5G_LAST; 4646 template_idx++) { 4647 /* Check engine power-on status */ 4648 address = VRB1_PfFecUl5gIbDebugReg + ACC_ENGINE_OFFSET * template_idx; 4649 status = (acc_reg_read(d, address) >> 4) & 0x7; 4650 address = VRB1_PfQmgrGrpTmplateReg4Indx 4651 + ACC_BYTES_IN_WORD * template_idx; 4652 if (status == 1) { 4653 acc_reg_write(d, address, value); 4654 numEngines++; 4655 } else 4656 acc_reg_write(d, address, 0); 4657 } 4658 rte_bbdev_log(INFO, "Number of 5GUL engines %d", numEngines); 4659 /* 4GDL */ 4660 numQqsAcc += numQgs; 4661 numQgs = conf->q_dl_4g.num_qgroups; 4662 value = 0; 4663 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4664 value |= (1 << qg_idx); 4665 for (template_idx = VRB1_SIG_DL_4G; 4666 template_idx <= VRB1_SIG_DL_4G_LAST; 4667 template_idx++) { 4668 address = VRB1_PfQmgrGrpTmplateReg4Indx 4669 + ACC_BYTES_IN_WORD * template_idx; 4670 acc_reg_write(d, address, value); 4671 } 4672 /* 5GDL */ 4673 numQqsAcc += numQgs; 4674 numQgs = conf->q_dl_5g.num_qgroups; 4675 value = 0; 4676 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4677 value |= (1 << qg_idx); 4678 for (template_idx = VRB1_SIG_DL_5G; 4679 template_idx <= VRB1_SIG_DL_5G_LAST; 4680 template_idx++) { 4681 address = VRB1_PfQmgrGrpTmplateReg4Indx 4682 + ACC_BYTES_IN_WORD * template_idx; 4683 acc_reg_write(d, address, value); 4684 } 4685 /* FFT */ 4686 numQqsAcc += numQgs; 4687 numQgs = conf->q_fft.num_qgroups; 4688 value = 0; 4689 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4690 value |= (1 << qg_idx); 4691 for (template_idx = VRB1_SIG_FFT; 4692 template_idx <= VRB1_SIG_FFT_LAST; 4693 template_idx++) { 4694 address = VRB1_PfQmgrGrpTmplateReg4Indx 4695 + ACC_BYTES_IN_WORD * template_idx; 4696 acc_reg_write(d, address, value); 4697 } 4698 4699 /* Queue Group Function mapping. */ 4700 int qman_func_id[8] = {0, 2, 1, 3, 4, 0, 0, 0}; 4701 value = 0; 4702 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 4703 acc = accFromQgid(qg_idx, conf); 4704 value |= qman_func_id[acc] << (qg_idx * 4); 4705 } 4706 acc_reg_write(d, VRB1_PfQmgrGrpFunction0, value); 4707 value = 0; 4708 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 4709 acc = accFromQgid(qg_idx + ACC_NUM_QGRPS_PER_WORD, conf); 4710 value |= qman_func_id[acc] << (qg_idx * 4); 4711 } 4712 acc_reg_write(d, VRB1_PfQmgrGrpFunction1, value); 4713 4714 /* Configuration of the Arbitration QGroup depth to 1. */ 4715 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4716 address = VRB1_PfQmgrArbQDepthGrp + 4717 ACC_BYTES_IN_WORD * qg_idx; 4718 value = 0; 4719 acc_reg_write(d, address, value); 4720 } 4721 4722 /* This pointer to ARAM (256kB) is shifted by 2 (4B per register). */ 4723 uint32_t aram_address = 0; 4724 for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { 4725 for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { 4726 address = VRB1_PfQmgrVfBaseAddr + vf_idx 4727 * ACC_BYTES_IN_WORD + qg_idx 4728 * ACC_BYTES_IN_WORD * 64; 4729 value = aram_address; 4730 acc_reg_write(d, address, value); 4731 /* Offset ARAM Address for next memory bank - increment of 4B. */ 4732 aram_address += aqNum(qg_idx, conf) * 4733 (1 << aqDepth(qg_idx, conf)); 4734 } 4735 } 4736 4737 if (aram_address > VRB1_WORDS_IN_ARAM_SIZE) { 4738 rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d", 4739 aram_address, VRB1_WORDS_IN_ARAM_SIZE); 4740 return -EINVAL; 4741 } 4742 4743 /* Performance tuning. */ 4744 acc_reg_write(d, VRB1_PfFabricI2Mdma_weight, 0x0FFF); 4745 acc_reg_write(d, VRB1_PfDma4gdlIbThld, 0x1f10); 4746 4747 /* ==== HI Configuration ==== */ 4748 4749 /* No Info Ring/MSI by default. */ 4750 address = VRB1_PfHiInfoRingIntWrEnRegPf; 4751 value = 0; 4752 acc_reg_write(d, address, value); 4753 address = VRB1_PfHiCfgMsiIntWrEnRegPf; 4754 value = 0xFFFFFFFF; 4755 acc_reg_write(d, address, value); 4756 /* Prevent Block on Transmit Error. */ 4757 address = VRB1_PfHiBlockTransmitOnErrorEn; 4758 value = 0; 4759 acc_reg_write(d, address, value); 4760 /* Prevents to drop MSI. */ 4761 address = VRB1_PfHiMsiDropEnableReg; 4762 value = 0; 4763 acc_reg_write(d, address, value); 4764 /* Set the PF Mode register. */ 4765 address = VRB1_PfHiPfMode; 4766 value = (conf->pf_mode_en) ? ACC_PF_VAL : 0; 4767 acc_reg_write(d, address, value); 4768 4769 /* QoS overflow init. */ 4770 value = 1; 4771 address = VRB1_PfQosmonAEvalOverflow0; 4772 acc_reg_write(d, address, value); 4773 address = VRB1_PfQosmonBEvalOverflow0; 4774 acc_reg_write(d, address, value); 4775 4776 /* Configure the FFT RAM LUT. */ 4777 uint32_t fft_lut[VRB1_FFT_RAM_SIZE] = { 4778 0x1FFFF, 0x1FFFF, 0x1FFFE, 0x1FFFA, 0x1FFF6, 0x1FFF1, 0x1FFEA, 0x1FFE2, 4779 0x1FFD9, 0x1FFCE, 0x1FFC2, 0x1FFB5, 0x1FFA7, 0x1FF98, 0x1FF87, 0x1FF75, 4780 0x1FF62, 0x1FF4E, 0x1FF38, 0x1FF21, 0x1FF09, 0x1FEF0, 0x1FED6, 0x1FEBA, 4781 0x1FE9D, 0x1FE7F, 0x1FE5F, 0x1FE3F, 0x1FE1D, 0x1FDFA, 0x1FDD5, 0x1FDB0, 4782 0x1FD89, 0x1FD61, 0x1FD38, 0x1FD0D, 0x1FCE1, 0x1FCB4, 0x1FC86, 0x1FC57, 4783 0x1FC26, 0x1FBF4, 0x1FBC1, 0x1FB8D, 0x1FB58, 0x1FB21, 0x1FAE9, 0x1FAB0, 4784 0x1FA75, 0x1FA3A, 0x1F9FD, 0x1F9BF, 0x1F980, 0x1F93F, 0x1F8FD, 0x1F8BA, 4785 0x1F876, 0x1F831, 0x1F7EA, 0x1F7A3, 0x1F75A, 0x1F70F, 0x1F6C4, 0x1F677, 4786 0x1F629, 0x1F5DA, 0x1F58A, 0x1F539, 0x1F4E6, 0x1F492, 0x1F43D, 0x1F3E7, 4787 0x1F38F, 0x1F337, 0x1F2DD, 0x1F281, 0x1F225, 0x1F1C8, 0x1F169, 0x1F109, 4788 0x1F0A8, 0x1F046, 0x1EFE2, 0x1EF7D, 0x1EF18, 0x1EEB0, 0x1EE48, 0x1EDDF, 4789 0x1ED74, 0x1ED08, 0x1EC9B, 0x1EC2D, 0x1EBBE, 0x1EB4D, 0x1EADB, 0x1EA68, 4790 0x1E9F4, 0x1E97F, 0x1E908, 0x1E891, 0x1E818, 0x1E79E, 0x1E722, 0x1E6A6, 4791 0x1E629, 0x1E5AA, 0x1E52A, 0x1E4A9, 0x1E427, 0x1E3A3, 0x1E31F, 0x1E299, 4792 0x1E212, 0x1E18A, 0x1E101, 0x1E076, 0x1DFEB, 0x1DF5E, 0x1DED0, 0x1DE41, 4793 0x1DDB1, 0x1DD20, 0x1DC8D, 0x1DBFA, 0x1DB65, 0x1DACF, 0x1DA38, 0x1D9A0, 4794 0x1D907, 0x1D86C, 0x1D7D1, 0x1D734, 0x1D696, 0x1D5F7, 0x1D557, 0x1D4B6, 4795 0x1D413, 0x1D370, 0x1D2CB, 0x1D225, 0x1D17E, 0x1D0D6, 0x1D02D, 0x1CF83, 4796 0x1CED8, 0x1CE2B, 0x1CD7E, 0x1CCCF, 0x1CC1F, 0x1CB6E, 0x1CABC, 0x1CA09, 4797 0x1C955, 0x1C89F, 0x1C7E9, 0x1C731, 0x1C679, 0x1C5BF, 0x1C504, 0x1C448, 4798 0x1C38B, 0x1C2CD, 0x1C20E, 0x1C14E, 0x1C08C, 0x1BFCA, 0x1BF06, 0x1BE42, 4799 0x1BD7C, 0x1BCB5, 0x1BBED, 0x1BB25, 0x1BA5B, 0x1B990, 0x1B8C4, 0x1B7F6, 4800 0x1B728, 0x1B659, 0x1B589, 0x1B4B7, 0x1B3E5, 0x1B311, 0x1B23D, 0x1B167, 4801 0x1B091, 0x1AFB9, 0x1AEE0, 0x1AE07, 0x1AD2C, 0x1AC50, 0x1AB73, 0x1AA95, 4802 0x1A9B6, 0x1A8D6, 0x1A7F6, 0x1A714, 0x1A631, 0x1A54D, 0x1A468, 0x1A382, 4803 0x1A29A, 0x1A1B2, 0x1A0C9, 0x19FDF, 0x19EF4, 0x19E08, 0x19D1B, 0x19C2D, 4804 0x19B3E, 0x19A4E, 0x1995D, 0x1986B, 0x19778, 0x19684, 0x1958F, 0x19499, 4805 0x193A2, 0x192AA, 0x191B1, 0x190B8, 0x18FBD, 0x18EC1, 0x18DC4, 0x18CC7, 4806 0x18BC8, 0x18AC8, 0x189C8, 0x188C6, 0x187C4, 0x186C1, 0x185BC, 0x184B7, 4807 0x183B1, 0x182AA, 0x181A2, 0x18099, 0x17F8F, 0x17E84, 0x17D78, 0x17C6C, 4808 0x17B5E, 0x17A4F, 0x17940, 0x17830, 0x1771E, 0x1760C, 0x174F9, 0x173E5, 4809 0x172D1, 0x171BB, 0x170A4, 0x16F8D, 0x16E74, 0x16D5B, 0x16C41, 0x16B26, 4810 0x16A0A, 0x168ED, 0x167CF, 0x166B1, 0x16592, 0x16471, 0x16350, 0x1622E, 4811 0x1610B, 0x15FE8, 0x15EC3, 0x15D9E, 0x15C78, 0x15B51, 0x15A29, 0x15900, 4812 0x157D7, 0x156AC, 0x15581, 0x15455, 0x15328, 0x151FB, 0x150CC, 0x14F9D, 4813 0x14E6D, 0x14D3C, 0x14C0A, 0x14AD8, 0x149A4, 0x14870, 0x1473B, 0x14606, 4814 0x144CF, 0x14398, 0x14260, 0x14127, 0x13FEE, 0x13EB3, 0x13D78, 0x13C3C, 4815 0x13B00, 0x139C2, 0x13884, 0x13745, 0x13606, 0x134C5, 0x13384, 0x13242, 4816 0x130FF, 0x12FBC, 0x12E78, 0x12D33, 0x12BEE, 0x12AA7, 0x12960, 0x12819, 4817 0x126D0, 0x12587, 0x1243D, 0x122F3, 0x121A8, 0x1205C, 0x11F0F, 0x11DC2, 4818 0x11C74, 0x11B25, 0x119D6, 0x11886, 0x11735, 0x115E3, 0x11491, 0x1133F, 4819 0x111EB, 0x11097, 0x10F42, 0x10DED, 0x10C97, 0x10B40, 0x109E9, 0x10891, 4820 0x10738, 0x105DF, 0x10485, 0x1032B, 0x101D0, 0x10074, 0x0FF18, 0x0FDBB, 4821 0x0FC5D, 0x0FAFF, 0x0F9A0, 0x0F841, 0x0F6E1, 0x0F580, 0x0F41F, 0x0F2BD, 4822 0x0F15B, 0x0EFF8, 0x0EE94, 0x0ED30, 0x0EBCC, 0x0EA67, 0x0E901, 0x0E79A, 4823 0x0E633, 0x0E4CC, 0x0E364, 0x0E1FB, 0x0E092, 0x0DF29, 0x0DDBE, 0x0DC54, 4824 0x0DAE9, 0x0D97D, 0x0D810, 0x0D6A4, 0x0D536, 0x0D3C8, 0x0D25A, 0x0D0EB, 4825 0x0CF7C, 0x0CE0C, 0x0CC9C, 0x0CB2B, 0x0C9B9, 0x0C847, 0x0C6D5, 0x0C562, 4826 0x0C3EF, 0x0C27B, 0x0C107, 0x0BF92, 0x0BE1D, 0x0BCA8, 0x0BB32, 0x0B9BB, 4827 0x0B844, 0x0B6CD, 0x0B555, 0x0B3DD, 0x0B264, 0x0B0EB, 0x0AF71, 0x0ADF7, 4828 0x0AC7D, 0x0AB02, 0x0A987, 0x0A80B, 0x0A68F, 0x0A513, 0x0A396, 0x0A219, 4829 0x0A09B, 0x09F1D, 0x09D9E, 0x09C20, 0x09AA1, 0x09921, 0x097A1, 0x09621, 4830 0x094A0, 0x0931F, 0x0919E, 0x0901C, 0x08E9A, 0x08D18, 0x08B95, 0x08A12, 4831 0x0888F, 0x0870B, 0x08587, 0x08402, 0x0827E, 0x080F9, 0x07F73, 0x07DEE, 4832 0x07C68, 0x07AE2, 0x0795B, 0x077D4, 0x0764D, 0x074C6, 0x0733E, 0x071B6, 4833 0x0702E, 0x06EA6, 0x06D1D, 0x06B94, 0x06A0B, 0x06881, 0x066F7, 0x0656D, 4834 0x063E3, 0x06258, 0x060CE, 0x05F43, 0x05DB7, 0x05C2C, 0x05AA0, 0x05914, 4835 0x05788, 0x055FC, 0x0546F, 0x052E3, 0x05156, 0x04FC9, 0x04E3B, 0x04CAE, 4836 0x04B20, 0x04992, 0x04804, 0x04676, 0x044E8, 0x04359, 0x041CB, 0x0403C, 4837 0x03EAD, 0x03D1D, 0x03B8E, 0x039FF, 0x0386F, 0x036DF, 0x0354F, 0x033BF, 4838 0x0322F, 0x0309F, 0x02F0F, 0x02D7E, 0x02BEE, 0x02A5D, 0x028CC, 0x0273B, 4839 0x025AA, 0x02419, 0x02288, 0x020F7, 0x01F65, 0x01DD4, 0x01C43, 0x01AB1, 4840 0x0191F, 0x0178E, 0x015FC, 0x0146A, 0x012D8, 0x01147, 0x00FB5, 0x00E23, 4841 0x00C91, 0x00AFF, 0x0096D, 0x007DB, 0x00648, 0x004B6, 0x00324, 0x00192}; 4842 4843 acc_reg_write(d, VRB1_PfFftRamPageAccess, VRB1_FFT_RAM_EN + 64); 4844 for (i = 0; i < VRB1_FFT_RAM_SIZE; i++) 4845 acc_reg_write(d, VRB1_PfFftRamOff + i * 4, fft_lut[i]); 4846 acc_reg_write(d, VRB1_PfFftRamPageAccess, VRB1_FFT_RAM_DIS); 4847 4848 /* Enabling AQueues through the Queue hierarchy. */ 4849 for (vf_idx = 0; vf_idx < VRB1_NUM_VFS; vf_idx++) { 4850 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4851 value = 0; 4852 if (vf_idx < conf->num_vf_bundles && qg_idx < totalQgs) 4853 value = (1 << aqNum(qg_idx, conf)) - 1; 4854 address = VRB1_PfQmgrAqEnableVf + vf_idx * ACC_BYTES_IN_WORD; 4855 value += (qg_idx << 16); 4856 acc_reg_write(d, address, value); 4857 } 4858 } 4859 4860 rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name); 4861 return 0; 4862 } 4863 4864 /* Initial configuration of a VRB2 device prior to running configure(). */ 4865 int 4866 vrb2_configure(const char *dev_name, struct rte_acc_conf *conf) 4867 { 4868 rte_bbdev_log(INFO, "vrb2_configure"); 4869 uint32_t value, address, status; 4870 int qg_idx, template_idx, vf_idx, acc, i, aq_reg, static_allocation, numEngines; 4871 int numQgs, numQqsAcc, totalQgs; 4872 int qman_func_id[8] = {0, 2, 1, 3, 4, 5, 0, 0}; 4873 struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); 4874 int rlim, alen, timestamp; 4875 4876 /* Compile time checks. */ 4877 RTE_BUILD_BUG_ON(sizeof(struct acc_dma_req_desc) != 256); 4878 RTE_BUILD_BUG_ON(sizeof(union acc_dma_desc) != 256); 4879 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_td) != 24); 4880 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_te) != 32); 4881 4882 if (bbdev == NULL) { 4883 rte_bbdev_log(ERR, 4884 "Invalid dev_name (%s), or device is not yet initialised", 4885 dev_name); 4886 return -ENODEV; 4887 } 4888 struct acc_device *d = bbdev->data->dev_private; 4889 4890 /* Store configuration. */ 4891 rte_memcpy(&d->acc_conf, conf, sizeof(d->acc_conf)); 4892 4893 /* Explicitly releasing AXI as this may be stopped after PF FLR/BME. */ 4894 address = VRB2_PfDmaAxiControl; 4895 value = 1; 4896 acc_reg_write(d, address, value); 4897 4898 /* Set the fabric mode. */ 4899 address = VRB2_PfFabricM2iBufferReg; 4900 value = VRB2_FABRIC_MODE; 4901 acc_reg_write(d, address, value); 4902 4903 /* Set default descriptor signature. */ 4904 address = VRB2_PfDmaDescriptorSignature; 4905 value = 0; 4906 acc_reg_write(d, address, value); 4907 4908 /* Enable the Error Detection in DMA. */ 4909 value = VRB2_CFG_DMA_ERROR; 4910 address = VRB2_PfDmaErrorDetectionEn; 4911 acc_reg_write(d, address, value); 4912 4913 /* AXI Cache configuration. */ 4914 value = VRB2_CFG_AXI_CACHE; 4915 address = VRB2_PfDmaAxcacheReg; 4916 acc_reg_write(d, address, value); 4917 4918 /* AXI Response configuration. */ 4919 acc_reg_write(d, VRB2_PfDmaCfgRrespBresp, 0x0); 4920 4921 /* Default DMA Configuration (Qmgr Enabled) */ 4922 acc_reg_write(d, VRB2_PfDmaConfig0Reg, 0); 4923 acc_reg_write(d, VRB2_PfDmaQmanenSelect, 0xFFFFFFFF); 4924 acc_reg_write(d, VRB2_PfDmaQmanen, 0); 4925 4926 /* Default RLIM/ALEN configuration. */ 4927 rlim = 0; 4928 alen = 3; 4929 timestamp = 0; 4930 address = VRB2_PfDmaConfig1Reg; 4931 value = (1 << 31) + (rlim << 8) + (timestamp << 6) + alen; 4932 acc_reg_write(d, address, value); 4933 4934 /* Default FFT configuration. */ 4935 for (template_idx = 0; template_idx < VRB2_FFT_NUM; template_idx++) { 4936 acc_reg_write(d, VRB2_PfFftConfig0 + template_idx * 0x1000, VRB2_FFT_CFG_0); 4937 acc_reg_write(d, VRB2_PfFftParityMask8 + template_idx * 0x1000, VRB2_FFT_ECC); 4938 } 4939 4940 /* Configure DMA Qmanager addresses. */ 4941 address = VRB2_PfDmaQmgrAddrReg; 4942 value = VRB2_PfQmgrEgressQueuesTemplate; 4943 acc_reg_write(d, address, value); 4944 4945 /* ===== Qmgr Configuration ===== */ 4946 /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL. */ 4947 totalQgs = conf->q_ul_4g.num_qgroups + conf->q_ul_5g.num_qgroups + 4948 conf->q_dl_4g.num_qgroups + conf->q_dl_5g.num_qgroups + 4949 conf->q_fft.num_qgroups + conf->q_mld.num_qgroups; 4950 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 4951 address = VRB2_PfQmgrDepthLog2Grp + ACC_BYTES_IN_WORD * qg_idx; 4952 value = aqDepth(qg_idx, conf); 4953 acc_reg_write(d, address, value); 4954 address = VRB2_PfQmgrTholdGrp + ACC_BYTES_IN_WORD * qg_idx; 4955 value = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1)); 4956 acc_reg_write(d, address, value); 4957 } 4958 4959 /* Template Priority in incremental order. */ 4960 for (template_idx = 0; template_idx < ACC_NUM_TMPL; template_idx++) { 4961 address = VRB2_PfQmgrGrpTmplateReg0Indx + ACC_BYTES_IN_WORD * template_idx; 4962 value = ACC_TMPL_PRI_0; 4963 acc_reg_write(d, address, value); 4964 address = VRB2_PfQmgrGrpTmplateReg1Indx + ACC_BYTES_IN_WORD * template_idx; 4965 value = ACC_TMPL_PRI_1; 4966 acc_reg_write(d, address, value); 4967 address = VRB2_PfQmgrGrpTmplateReg2Indx + ACC_BYTES_IN_WORD * template_idx; 4968 value = ACC_TMPL_PRI_2; 4969 acc_reg_write(d, address, value); 4970 address = VRB2_PfQmgrGrpTmplateReg3Indx + ACC_BYTES_IN_WORD * template_idx; 4971 value = ACC_TMPL_PRI_3; 4972 acc_reg_write(d, address, value); 4973 address = VRB2_PfQmgrGrpTmplateReg4Indx + ACC_BYTES_IN_WORD * template_idx; 4974 value = ACC_TMPL_PRI_4; 4975 acc_reg_write(d, address, value); 4976 address = VRB2_PfQmgrGrpTmplateReg5Indx + ACC_BYTES_IN_WORD * template_idx; 4977 value = ACC_TMPL_PRI_5; 4978 acc_reg_write(d, address, value); 4979 address = VRB2_PfQmgrGrpTmplateReg6Indx + ACC_BYTES_IN_WORD * template_idx; 4980 value = ACC_TMPL_PRI_6; 4981 acc_reg_write(d, address, value); 4982 address = VRB2_PfQmgrGrpTmplateReg7Indx + ACC_BYTES_IN_WORD * template_idx; 4983 value = ACC_TMPL_PRI_7; 4984 acc_reg_write(d, address, value); 4985 } 4986 4987 address = VRB2_PfQmgrGrpPriority; 4988 value = VRB2_CFG_QMGR_HI_P; 4989 acc_reg_write(d, address, value); 4990 4991 /* Template Configuration. */ 4992 for (template_idx = 0; template_idx < ACC_NUM_TMPL; template_idx++) { 4993 value = 0; 4994 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4995 acc_reg_write(d, address, value); 4996 } 4997 /* 4GUL */ 4998 numQgs = conf->q_ul_4g.num_qgroups; 4999 numQqsAcc = 0; 5000 value = 0; 5001 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5002 value |= (1 << qg_idx); 5003 for (template_idx = VRB2_SIG_UL_4G; template_idx <= VRB2_SIG_UL_4G_LAST; 5004 template_idx++) { 5005 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 5006 acc_reg_write(d, address, value); 5007 } 5008 /* 5GUL */ 5009 numQqsAcc += numQgs; 5010 numQgs = conf->q_ul_5g.num_qgroups; 5011 value = 0; 5012 numEngines = 0; 5013 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5014 value |= (1 << qg_idx); 5015 for (template_idx = VRB2_SIG_UL_5G; template_idx <= VRB2_SIG_UL_5G_LAST; 5016 template_idx++) { 5017 /* Check engine power-on status. */ 5018 address = VRB2_PfFecUl5gIbDebug0Reg + ACC_ENGINE_OFFSET * template_idx; 5019 status = (acc_reg_read(d, address) >> 4) & 0x7; 5020 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 5021 if (status == 1) { 5022 acc_reg_write(d, address, value); 5023 numEngines++; 5024 } else 5025 acc_reg_write(d, address, 0); 5026 } 5027 rte_bbdev_log(INFO, "Number of 5GUL engines %d", numEngines); 5028 /* 4GDL */ 5029 numQqsAcc += numQgs; 5030 numQgs = conf->q_dl_4g.num_qgroups; 5031 value = 0; 5032 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5033 value |= (1 << qg_idx); 5034 for (template_idx = VRB2_SIG_DL_4G; template_idx <= VRB2_SIG_DL_4G_LAST; 5035 template_idx++) { 5036 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 5037 acc_reg_write(d, address, value); 5038 } 5039 /* 5GDL */ 5040 numQqsAcc += numQgs; 5041 numQgs = conf->q_dl_5g.num_qgroups; 5042 value = 0; 5043 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5044 value |= (1 << qg_idx); 5045 for (template_idx = VRB2_SIG_DL_5G; template_idx <= VRB2_SIG_DL_5G_LAST; 5046 template_idx++) { 5047 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 5048 acc_reg_write(d, address, value); 5049 } 5050 /* FFT */ 5051 numQqsAcc += numQgs; 5052 numQgs = conf->q_fft.num_qgroups; 5053 value = 0; 5054 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5055 value |= (1 << qg_idx); 5056 for (template_idx = VRB2_SIG_FFT; template_idx <= VRB2_SIG_FFT_LAST; 5057 template_idx++) { 5058 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 5059 acc_reg_write(d, address, value); 5060 } 5061 /* MLD */ 5062 numQqsAcc += numQgs; 5063 numQgs = conf->q_mld.num_qgroups; 5064 value = 0; 5065 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 5066 value |= (1 << qg_idx); 5067 for (template_idx = VRB2_SIG_MLD; template_idx <= VRB2_SIG_MLD_LAST; 5068 template_idx++) { 5069 address = VRB2_PfQmgrGrpTmplateEnRegIndx 5070 + ACC_BYTES_IN_WORD * template_idx; 5071 acc_reg_write(d, address, value); 5072 } 5073 5074 /* Queue Group Function mapping. */ 5075 for (i = 0; i < 4; i++) { 5076 value = 0; 5077 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 5078 acc = accFromQgid(qg_idx + i * ACC_NUM_QGRPS_PER_WORD, conf); 5079 value |= qman_func_id[acc] << (qg_idx * 4); 5080 } 5081 acc_reg_write(d, VRB2_PfQmgrGrpFunction0 + i * ACC_BYTES_IN_WORD, value); 5082 } 5083 5084 /* Configuration of the Arbitration QGroup depth to 1. */ 5085 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 5086 address = VRB2_PfQmgrArbQDepthGrp + ACC_BYTES_IN_WORD * qg_idx; 5087 value = 0; 5088 acc_reg_write(d, address, value); 5089 } 5090 5091 static_allocation = 1; 5092 if (static_allocation == 1) { 5093 /* This pointer to ARAM (512kB) is shifted by 2 (4B per register). */ 5094 uint32_t aram_address = 0; 5095 for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { 5096 for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { 5097 address = VRB2_PfQmgrVfBaseAddr + vf_idx 5098 * ACC_BYTES_IN_WORD + qg_idx 5099 * ACC_BYTES_IN_WORD * 64; 5100 value = aram_address; 5101 acc_reg_fast_write(d, address, value); 5102 /* Offset ARAM Address for next memory bank - increment of 4B. */ 5103 aram_address += aqNum(qg_idx, conf) * 5104 (1 << aqDepth(qg_idx, conf)); 5105 } 5106 } 5107 if (aram_address > VRB2_WORDS_IN_ARAM_SIZE) { 5108 rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d", 5109 aram_address, VRB2_WORDS_IN_ARAM_SIZE); 5110 return -EINVAL; 5111 } 5112 } else { 5113 /* Dynamic Qmgr allocation. */ 5114 acc_reg_write(d, VRB2_PfQmgrAramAllocEn, 1); 5115 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN0, 0x1000); 5116 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN1, 0); 5117 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN2, 0); 5118 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN3, 0); 5119 acc_reg_write(d, VRB2_PfQmgrSoftReset, 1); 5120 acc_reg_write(d, VRB2_PfQmgrSoftReset, 0); 5121 } 5122 5123 /* ==== HI Configuration ==== */ 5124 5125 /* No Info Ring/MSI by default. */ 5126 address = VRB2_PfHiInfoRingIntWrEnRegPf; 5127 value = 0; 5128 acc_reg_write(d, address, value); 5129 address = VRB2_PfHiCfgMsiIntWrEnRegPf; 5130 value = 0xFFFFFFFF; 5131 acc_reg_write(d, address, value); 5132 /* Prevent Block on Transmit Error. */ 5133 address = VRB2_PfHiBlockTransmitOnErrorEn; 5134 value = 0; 5135 acc_reg_write(d, address, value); 5136 /* Prevents to drop MSI */ 5137 address = VRB2_PfHiMsiDropEnableReg; 5138 value = 0; 5139 acc_reg_write(d, address, value); 5140 /* Set the PF Mode register */ 5141 address = VRB2_PfHiPfMode; 5142 value = ((conf->pf_mode_en) ? ACC_PF_VAL : 0) | 0x1F07F0; 5143 acc_reg_write(d, address, value); 5144 /* Explicitly releasing AXI after PF Mode. */ 5145 acc_reg_write(d, VRB2_PfDmaAxiControl, 1); 5146 5147 /* QoS overflow init. */ 5148 value = 1; 5149 address = VRB2_PfQosmonAEvalOverflow0; 5150 acc_reg_write(d, address, value); 5151 address = VRB2_PfQosmonBEvalOverflow0; 5152 acc_reg_write(d, address, value); 5153 5154 /* Enabling AQueues through the Queue hierarchy. */ 5155 unsigned int en_bitmask[VRB2_AQ_REG_NUM]; 5156 for (vf_idx = 0; vf_idx < VRB2_NUM_VFS; vf_idx++) { 5157 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 5158 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) 5159 en_bitmask[aq_reg] = 0; 5160 if (vf_idx < conf->num_vf_bundles && qg_idx < totalQgs) { 5161 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) { 5162 if (aqNum(qg_idx, conf) >= 16 * (aq_reg + 1)) 5163 en_bitmask[aq_reg] = 0xFFFF; 5164 else if (aqNum(qg_idx, conf) <= 16 * aq_reg) 5165 en_bitmask[aq_reg] = 0x0; 5166 else 5167 en_bitmask[aq_reg] = (1 << (aqNum(qg_idx, 5168 conf) - aq_reg * 16)) - 1; 5169 } 5170 } 5171 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) { 5172 address = VRB2_PfQmgrAqEnableVf + vf_idx * 16 + aq_reg * 4; 5173 value = (qg_idx << 16) + en_bitmask[aq_reg]; 5174 acc_reg_fast_write(d, address, value); 5175 } 5176 } 5177 } 5178 5179 rte_bbdev_log(INFO, 5180 "VRB2 basic config complete for %s - pf_bb_config should ideally be used instead", 5181 dev_name); 5182 return 0; 5183 } 5184