1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2022 Intel Corporation 3 */ 4 5 #include <unistd.h> 6 7 #include <rte_common.h> 8 #include <rte_log.h> 9 #include <rte_dev.h> 10 #include <rte_malloc.h> 11 #include <rte_mempool.h> 12 #include <rte_byteorder.h> 13 #include <rte_errno.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_hexdump.h> 16 #include <rte_pci.h> 17 #include <rte_bus_pci.h> 18 #include <rte_cycles.h> 19 20 #include <rte_bbdev.h> 21 #include <rte_bbdev_pmd.h> 22 #include "vrb_pmd.h" 23 24 #ifdef RTE_LIBRTE_BBDEV_DEBUG 25 RTE_LOG_REGISTER_SUFFIX(vrb_logtype, vrb, DEBUG); 26 #else 27 RTE_LOG_REGISTER_SUFFIX(vrb_logtype, vrb, NOTICE); 28 #endif 29 30 /* Calculate the offset of the enqueue register. */ 31 static inline uint32_t 32 vrb1_queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id) 33 { 34 if (pf_device) 35 return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) + VRB1_PfQmgrIngressAq); 36 else 37 return ((qgrp_id << 7) + (aq_id << 3) + VRB1_VfQmgrIngressAq); 38 } 39 40 static inline uint32_t 41 vrb2_queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id) 42 { 43 if (pf_device) 44 return ((vf_id << 14) + (qgrp_id << 9) + (aq_id << 3) + VRB2_PfQmgrIngressAq); 45 else 46 return ((qgrp_id << 9) + (aq_id << 3) + VRB2_VfQmgrIngressAq); 47 } 48 49 enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, FFT, MLD, NUM_ACC}; 50 51 /* Return the accelerator enum for a Queue Group Index. */ 52 static inline int 53 accFromQgid(int qg_idx, const struct rte_acc_conf *acc_conf) 54 { 55 int accQg[VRB_MAX_QGRPS]; 56 int NumQGroupsPerFn[NUM_ACC]; 57 int acc, qgIdx, qgIndex = 0; 58 for (qgIdx = 0; qgIdx < VRB_MAX_QGRPS; qgIdx++) 59 accQg[qgIdx] = 0; 60 NumQGroupsPerFn[UL_4G] = acc_conf->q_ul_4g.num_qgroups; 61 NumQGroupsPerFn[UL_5G] = acc_conf->q_ul_5g.num_qgroups; 62 NumQGroupsPerFn[DL_4G] = acc_conf->q_dl_4g.num_qgroups; 63 NumQGroupsPerFn[DL_5G] = acc_conf->q_dl_5g.num_qgroups; 64 NumQGroupsPerFn[FFT] = acc_conf->q_fft.num_qgroups; 65 NumQGroupsPerFn[MLD] = acc_conf->q_mld.num_qgroups; 66 for (acc = UL_4G; acc < NUM_ACC; acc++) 67 for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++) 68 accQg[qgIndex++] = acc; 69 acc = accQg[qg_idx]; 70 return acc; 71 } 72 73 /* Return the queue topology for a Queue Group Index. */ 74 static inline void 75 qtopFromAcc(struct rte_acc_queue_topology **qtop, int acc_enum, struct rte_acc_conf *acc_conf) 76 { 77 struct rte_acc_queue_topology *p_qtop; 78 p_qtop = NULL; 79 80 switch (acc_enum) { 81 case UL_4G: 82 p_qtop = &(acc_conf->q_ul_4g); 83 break; 84 case UL_5G: 85 p_qtop = &(acc_conf->q_ul_5g); 86 break; 87 case DL_4G: 88 p_qtop = &(acc_conf->q_dl_4g); 89 break; 90 case DL_5G: 91 p_qtop = &(acc_conf->q_dl_5g); 92 break; 93 case FFT: 94 p_qtop = &(acc_conf->q_fft); 95 break; 96 case MLD: 97 p_qtop = &(acc_conf->q_mld); 98 break; 99 default: 100 /* NOTREACHED. */ 101 rte_bbdev_log(ERR, "Unexpected error evaluating %s using %d", __func__, acc_enum); 102 break; 103 } 104 *qtop = p_qtop; 105 } 106 107 /* Return the AQ depth for a Queue Group Index. */ 108 static inline int 109 aqDepth(int qg_idx, struct rte_acc_conf *acc_conf) 110 { 111 struct rte_acc_queue_topology *q_top = NULL; 112 113 int acc_enum = accFromQgid(qg_idx, acc_conf); 114 qtopFromAcc(&q_top, acc_enum, acc_conf); 115 116 if (unlikely(q_top == NULL)) 117 return 1; 118 119 return RTE_MAX(1, q_top->aq_depth_log2); 120 } 121 122 /* Return the AQ depth for a Queue Group Index. */ 123 static inline int 124 aqNum(int qg_idx, struct rte_acc_conf *acc_conf) 125 { 126 struct rte_acc_queue_topology *q_top = NULL; 127 128 int acc_enum = accFromQgid(qg_idx, acc_conf); 129 qtopFromAcc(&q_top, acc_enum, acc_conf); 130 131 if (unlikely(q_top == NULL)) 132 return 0; 133 134 return q_top->num_aqs_per_groups; 135 } 136 137 static void 138 initQTop(struct rte_acc_conf *acc_conf) 139 { 140 acc_conf->q_ul_4g.num_aqs_per_groups = 0; 141 acc_conf->q_ul_4g.num_qgroups = 0; 142 acc_conf->q_ul_4g.first_qgroup_index = -1; 143 acc_conf->q_ul_5g.num_aqs_per_groups = 0; 144 acc_conf->q_ul_5g.num_qgroups = 0; 145 acc_conf->q_ul_5g.first_qgroup_index = -1; 146 acc_conf->q_dl_4g.num_aqs_per_groups = 0; 147 acc_conf->q_dl_4g.num_qgroups = 0; 148 acc_conf->q_dl_4g.first_qgroup_index = -1; 149 acc_conf->q_dl_5g.num_aqs_per_groups = 0; 150 acc_conf->q_dl_5g.num_qgroups = 0; 151 acc_conf->q_dl_5g.first_qgroup_index = -1; 152 acc_conf->q_fft.num_aqs_per_groups = 0; 153 acc_conf->q_fft.num_qgroups = 0; 154 acc_conf->q_fft.first_qgroup_index = -1; 155 acc_conf->q_mld.num_aqs_per_groups = 0; 156 acc_conf->q_mld.num_qgroups = 0; 157 acc_conf->q_mld.first_qgroup_index = -1; 158 } 159 160 static inline void 161 updateQtop(uint8_t acc, uint8_t qg, struct rte_acc_conf *acc_conf, struct acc_device *d) { 162 uint32_t reg; 163 struct rte_acc_queue_topology *q_top = NULL; 164 uint16_t aq; 165 166 qtopFromAcc(&q_top, acc, acc_conf); 167 if (unlikely(q_top == NULL)) 168 return; 169 q_top->num_qgroups++; 170 if (q_top->first_qgroup_index == -1) { 171 q_top->first_qgroup_index = qg; 172 /* Can be optimized to assume all are enabled by default. */ 173 reg = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, d->num_aqs - 1)); 174 if (reg & ACC_QUEUE_ENABLE) { 175 q_top->num_aqs_per_groups = d->num_aqs; 176 return; 177 } 178 q_top->num_aqs_per_groups = 0; 179 for (aq = 0; aq < d->num_aqs; aq++) { 180 reg = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, aq)); 181 if (reg & ACC_QUEUE_ENABLE) 182 q_top->num_aqs_per_groups++; 183 } 184 } 185 } 186 187 /* Check device Qmgr is enabled for protection */ 188 static inline bool 189 vrb_check_device_enable(struct rte_bbdev *dev) 190 { 191 uint32_t reg_aq, qg; 192 struct acc_device *d = dev->data->dev_private; 193 194 for (qg = 0; qg < d->num_qgroups; qg++) { 195 reg_aq = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, 0)); 196 if (reg_aq & ACC_QUEUE_ENABLE) 197 return true; 198 } 199 return false; 200 } 201 202 static inline void 203 vrb_vf2pf(struct acc_device *d, unsigned int payload) 204 { 205 acc_reg_write(d, d->reg_addr->vf2pf_doorbell, payload); 206 } 207 208 /* Request device FFT windowing information. */ 209 static inline void 210 vrb_device_fft_win(struct rte_bbdev *dev) 211 { 212 struct acc_device *d = dev->data->dev_private; 213 uint32_t reg, time_out = 0, win; 214 215 if (d->pf_device) 216 return; 217 218 /* Check from the device the first time. */ 219 if (d->fft_window_width[0] == 0) { 220 for (win = 0; win < ACC_MAX_FFT_WIN; win++) { 221 vrb_vf2pf(d, ACC_VF2PF_FFT_WIN_REQUEST | win); 222 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 223 while ((time_out < ACC_STATUS_TO) && (reg == RTE_BBDEV_DEV_NOSTATUS)) { 224 usleep(ACC_STATUS_WAIT); /*< Wait or VF->PF->VF Comms. */ 225 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 226 time_out++; 227 } 228 d->fft_window_width[win] = reg; 229 } 230 } 231 } 232 233 /* Fetch configuration enabled for the PF/VF using MMIO Read (slow). */ 234 static inline void 235 fetch_acc_config(struct rte_bbdev *dev) 236 { 237 struct acc_device *d = dev->data->dev_private; 238 struct rte_acc_conf *acc_conf = &d->acc_conf; 239 uint8_t acc, qg; 240 uint32_t reg_aq, reg_len0, reg_len1, reg_len2, reg_len3, reg0, reg1, reg2, reg3; 241 uint32_t reg_mode, idx; 242 struct rte_acc_queue_topology *q_top = NULL; 243 int qman_func_id[VRB_NUM_ACCS] = {ACC_ACCMAP_0, ACC_ACCMAP_1, 244 ACC_ACCMAP_2, ACC_ACCMAP_3, ACC_ACCMAP_4, ACC_ACCMAP_5}; 245 246 /* No need to retrieve the configuration is already done. */ 247 if (d->configured) 248 return; 249 250 if (!vrb_check_device_enable(dev)) { 251 rte_bbdev_log(NOTICE, "%s has no queue enabled and can't be used.", 252 dev->data->name); 253 return; 254 } 255 256 vrb_device_fft_win(dev); 257 258 d->ddr_size = 0; 259 260 /* Single VF Bundle by VF. */ 261 acc_conf->num_vf_bundles = 1; 262 initQTop(acc_conf); 263 264 if (d->device_variant == VRB1_VARIANT) { 265 reg0 = acc_reg_read(d, d->reg_addr->qman_group_func); 266 reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4); 267 for (qg = 0; qg < d->num_qgroups; qg++) { 268 reg_aq = acc_reg_read(d, d->queue_offset(d->pf_device, 0, qg, 0)); 269 if (reg_aq & ACC_QUEUE_ENABLE) { 270 if (qg < ACC_NUM_QGRPS_PER_WORD) 271 idx = (reg0 >> (qg * 4)) & 0x7; 272 else 273 idx = (reg1 >> ((qg - ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 274 if (idx < VRB1_NUM_ACCS) { 275 acc = qman_func_id[idx]; 276 updateQtop(acc, qg, acc_conf, d); 277 } 278 } 279 } 280 281 /* Check the depth of the AQs. */ 282 reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset); 283 reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset); 284 for (acc = 0; acc < NUM_ACC; acc++) { 285 qtopFromAcc(&q_top, acc, acc_conf); 286 if (q_top->first_qgroup_index < ACC_NUM_QGRPS_PER_WORD) 287 q_top->aq_depth_log2 = 288 (reg_len0 >> (q_top->first_qgroup_index * 4)) & 0xF; 289 else 290 q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index - 291 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 292 } 293 } else { 294 reg0 = acc_reg_read(d, d->reg_addr->qman_group_func); 295 reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4); 296 reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8); 297 reg3 = acc_reg_read(d, d->reg_addr->qman_group_func + 12); 298 /* printf("Debug Function %08x %08x %08x %08x\n", reg0, reg1, reg2, reg3);*/ 299 for (qg = 0; qg < VRB2_NUM_QGRPS; qg++) { 300 reg_aq = acc_reg_read(d, vrb2_queue_offset(d->pf_device, 0, qg, 0)); 301 if (reg_aq & ACC_QUEUE_ENABLE) { 302 /* printf("Qg enabled %d %x\n", qg, reg_aq);*/ 303 if (qg / ACC_NUM_QGRPS_PER_WORD == 0) 304 idx = (reg0 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 305 else if (qg / ACC_NUM_QGRPS_PER_WORD == 1) 306 idx = (reg1 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 307 else if (qg / ACC_NUM_QGRPS_PER_WORD == 2) 308 idx = (reg2 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 309 else 310 idx = (reg3 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7; 311 if (idx < VRB_NUM_ACCS) { 312 acc = qman_func_id[idx]; 313 updateQtop(acc, qg, acc_conf, d); 314 } 315 } 316 } 317 318 /* Check the depth of the AQs. */ 319 reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset); 320 reg_len1 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 4); 321 reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 8); 322 reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 12); 323 324 for (acc = 0; acc < NUM_ACC; acc++) { 325 qtopFromAcc(&q_top, acc, acc_conf); 326 if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 0) 327 q_top->aq_depth_log2 = (reg_len0 >> ((q_top->first_qgroup_index % 328 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 329 else if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 1) 330 q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index % 331 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 332 else if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 2) 333 q_top->aq_depth_log2 = (reg_len2 >> ((q_top->first_qgroup_index % 334 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 335 else 336 q_top->aq_depth_log2 = (reg_len3 >> ((q_top->first_qgroup_index % 337 ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF; 338 } 339 } 340 341 /* Read PF mode. */ 342 if (d->pf_device) { 343 reg_mode = acc_reg_read(d, d->reg_addr->pf_mode); 344 acc_conf->pf_mode_en = (reg_mode == ACC_PF_VAL) ? 1 : 0; 345 } else { 346 reg_mode = acc_reg_read(d, d->reg_addr->hi_mode); 347 acc_conf->pf_mode_en = reg_mode & 1; 348 } 349 350 rte_bbdev_log_debug( 351 "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u %u %u AQ %u %u %u %u %u %u Len %u %u %u %u %u %u\n", 352 (d->pf_device) ? "PF" : "VF", 353 (acc_conf->input_pos_llr_1_bit) ? "POS" : "NEG", 354 (acc_conf->output_pos_llr_1_bit) ? "POS" : "NEG", 355 acc_conf->q_ul_4g.num_qgroups, 356 acc_conf->q_dl_4g.num_qgroups, 357 acc_conf->q_ul_5g.num_qgroups, 358 acc_conf->q_dl_5g.num_qgroups, 359 acc_conf->q_fft.num_qgroups, 360 acc_conf->q_mld.num_qgroups, 361 acc_conf->q_ul_4g.num_aqs_per_groups, 362 acc_conf->q_dl_4g.num_aqs_per_groups, 363 acc_conf->q_ul_5g.num_aqs_per_groups, 364 acc_conf->q_dl_5g.num_aqs_per_groups, 365 acc_conf->q_fft.num_aqs_per_groups, 366 acc_conf->q_mld.num_aqs_per_groups, 367 acc_conf->q_ul_4g.aq_depth_log2, 368 acc_conf->q_dl_4g.aq_depth_log2, 369 acc_conf->q_ul_5g.aq_depth_log2, 370 acc_conf->q_dl_5g.aq_depth_log2, 371 acc_conf->q_fft.aq_depth_log2, 372 acc_conf->q_mld.aq_depth_log2); 373 } 374 375 /* Request device status information. */ 376 static inline uint32_t 377 vrb_device_status(struct rte_bbdev *dev) 378 { 379 struct acc_device *d = dev->data->dev_private; 380 uint32_t reg, time_out = 0; 381 382 if (d->pf_device) 383 return RTE_BBDEV_DEV_NOT_SUPPORTED; 384 385 vrb_vf2pf(d, ACC_VF2PF_STATUS_REQUEST); 386 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 387 while ((time_out < ACC_STATUS_TO) && (reg == RTE_BBDEV_DEV_NOSTATUS)) { 388 usleep(ACC_STATUS_WAIT); /*< Wait or VF->PF->VF Comms */ 389 reg = acc_reg_read(d, d->reg_addr->pf2vf_doorbell); 390 time_out++; 391 } 392 393 return reg; 394 } 395 396 /* Checks PF Info Ring to find the interrupt cause and handles it accordingly. */ 397 static inline void 398 vrb_check_ir(struct acc_device *acc_dev) 399 { 400 volatile union acc_info_ring_data *ring_data; 401 uint16_t info_ring_head = acc_dev->info_ring_head, int_nb; 402 if (unlikely(acc_dev->info_ring == NULL)) 403 return; 404 405 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 406 407 while (ring_data->valid) { 408 int_nb = int_from_ring(*ring_data, acc_dev->device_variant); 409 if ((int_nb < ACC_PF_INT_DMA_DL_DESC_IRQ) || ( 410 int_nb > ACC_PF_INT_DMA_MLD_DESC_IRQ)) { 411 rte_bbdev_log(WARNING, "InfoRing: ITR:%d Info:0x%x", 412 int_nb, ring_data->detailed_info); 413 /* Initialize Info Ring entry and move forward. */ 414 ring_data->val = 0; 415 } 416 info_ring_head++; 417 ring_data = acc_dev->info_ring + (info_ring_head & ACC_INFO_RING_MASK); 418 } 419 } 420 421 /* Interrupt handler triggered by dev for handling specific interrupt. */ 422 static void 423 vrb_dev_interrupt_handler(void *cb_arg) 424 { 425 struct rte_bbdev *dev = cb_arg; 426 struct acc_device *acc_dev = dev->data->dev_private; 427 volatile union acc_info_ring_data *ring_data; 428 struct acc_deq_intr_details deq_intr_det; 429 uint16_t vf_id, aq_id, qg_id, int_nb; 430 431 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 432 433 while (ring_data->valid) { 434 vf_id = vf_from_ring(*ring_data, acc_dev->device_variant); 435 aq_id = aq_from_ring(*ring_data, acc_dev->device_variant); 436 qg_id = qg_from_ring(*ring_data, acc_dev->device_variant); 437 int_nb = int_from_ring(*ring_data, acc_dev->device_variant); 438 if (acc_dev->pf_device) { 439 rte_bbdev_log_debug( 440 "PF Interrupt received, Info Ring data: 0x%x -> %d", 441 ring_data->val, int_nb); 442 443 switch (int_nb) { 444 case ACC_PF_INT_DMA_DL_DESC_IRQ: 445 case ACC_PF_INT_DMA_UL_DESC_IRQ: 446 case ACC_PF_INT_DMA_FFT_DESC_IRQ: 447 case ACC_PF_INT_DMA_UL5G_DESC_IRQ: 448 case ACC_PF_INT_DMA_DL5G_DESC_IRQ: 449 case ACC_PF_INT_DMA_MLD_DESC_IRQ: 450 deq_intr_det.queue_id = get_queue_id_from_ring_info( 451 dev->data, *ring_data); 452 if (deq_intr_det.queue_id == UINT16_MAX) { 453 rte_bbdev_log(ERR, 454 "Couldn't find queue: aq_id: %u, qg_id: %u, vf_id: %u", 455 aq_id, qg_id, vf_id); 456 return; 457 } 458 rte_bbdev_pmd_callback_process(dev, 459 RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det); 460 break; 461 default: 462 rte_bbdev_pmd_callback_process(dev, RTE_BBDEV_EVENT_ERROR, NULL); 463 break; 464 } 465 } else { 466 rte_bbdev_log_debug( 467 "VRB VF Interrupt received, Info Ring data: 0x%x\n", 468 ring_data->val); 469 switch (int_nb) { 470 case ACC_VF_INT_DMA_DL_DESC_IRQ: 471 case ACC_VF_INT_DMA_UL_DESC_IRQ: 472 case ACC_VF_INT_DMA_FFT_DESC_IRQ: 473 case ACC_VF_INT_DMA_UL5G_DESC_IRQ: 474 case ACC_VF_INT_DMA_DL5G_DESC_IRQ: 475 case ACC_VF_INT_DMA_MLD_DESC_IRQ: 476 /* VFs are not aware of their vf_id - it's set to 0. */ 477 set_vf_in_ring(ring_data, acc_dev->device_variant, 0); 478 deq_intr_det.queue_id = get_queue_id_from_ring_info( 479 dev->data, *ring_data); 480 if (deq_intr_det.queue_id == UINT16_MAX) { 481 rte_bbdev_log(ERR, 482 "Couldn't find queue: aq_id: %u, qg_id: %u", 483 aq_id, qg_id); 484 return; 485 } 486 rte_bbdev_pmd_callback_process(dev, 487 RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det); 488 break; 489 default: 490 rte_bbdev_pmd_callback_process(dev, RTE_BBDEV_EVENT_ERROR, NULL); 491 break; 492 } 493 } 494 495 /* Initialize Info Ring entry and move forward. */ 496 ring_data->val = 0; 497 ++acc_dev->info_ring_head; 498 ring_data = acc_dev->info_ring + (acc_dev->info_ring_head & ACC_INFO_RING_MASK); 499 } 500 } 501 502 /* Allocate and setup inforing. */ 503 static int 504 allocate_info_ring(struct rte_bbdev *dev) 505 { 506 struct acc_device *d = dev->data->dev_private; 507 rte_iova_t info_ring_iova; 508 uint32_t phys_low, phys_high; 509 510 if (d->info_ring != NULL) 511 return 0; /* Already configured. */ 512 513 /* Allocate InfoRing */ 514 d->info_ring = rte_zmalloc_socket("Info Ring", ACC_INFO_RING_NUM_ENTRIES * 515 sizeof(*d->info_ring), RTE_CACHE_LINE_SIZE, dev->data->socket_id); 516 if (d->info_ring == NULL) { 517 rte_bbdev_log(ERR, 518 "Failed to allocate Info Ring for %s:%u", 519 dev->device->driver->name, 520 dev->data->dev_id); 521 return -ENOMEM; 522 } 523 info_ring_iova = rte_malloc_virt2iova(d->info_ring); 524 525 /* Setup Info Ring. */ 526 phys_high = (uint32_t)(info_ring_iova >> 32); 527 phys_low = (uint32_t)(info_ring_iova); 528 acc_reg_write(d, d->reg_addr->info_ring_hi, phys_high); 529 acc_reg_write(d, d->reg_addr->info_ring_lo, phys_low); 530 if (d->device_variant == VRB1_VARIANT) 531 acc_reg_write(d, d->reg_addr->info_ring_en, VRB1_REG_IRQ_EN_ALL); 532 else 533 acc_reg_write(d, d->reg_addr->info_ring_en, VRB2_REG_IRQ_EN_ALL); 534 d->info_ring_head = (acc_reg_read(d, d->reg_addr->info_ring_ptr) & 535 0xFFF) / sizeof(union acc_info_ring_data); 536 return 0; 537 } 538 539 540 /* Allocate 64MB memory used for all software rings. */ 541 static int 542 vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id) 543 { 544 uint32_t phys_low, phys_high, value; 545 struct acc_device *d = dev->data->dev_private; 546 int ret; 547 548 if (d->pf_device && !d->acc_conf.pf_mode_en) { 549 rte_bbdev_log(NOTICE, 550 "%s has PF mode disabled. This PF can't be used.", 551 dev->data->name); 552 return -ENODEV; 553 } 554 if (!d->pf_device && d->acc_conf.pf_mode_en) { 555 rte_bbdev_log(NOTICE, 556 "%s has PF mode enabled. This VF can't be used.", 557 dev->data->name); 558 return -ENODEV; 559 } 560 561 if (!vrb_check_device_enable(dev)) { 562 rte_bbdev_log(NOTICE, "%s has no queue enabled and can't be used.", 563 dev->data->name); 564 return -ENODEV; 565 } 566 567 alloc_sw_rings_min_mem(dev, d, num_queues, socket_id); 568 569 /* If minimal memory space approach failed, then allocate 570 * the 2 * 64MB block for the sw rings. 571 */ 572 if (d->sw_rings == NULL) 573 alloc_2x64mb_sw_rings_mem(dev, d, socket_id); 574 575 if (d->sw_rings == NULL) { 576 rte_bbdev_log(NOTICE, 577 "Failure allocating sw_rings memory"); 578 return -ENOMEM; 579 } 580 581 /* Configure device with the base address for DMA descriptor rings. 582 * Same descriptor rings used for UL and DL DMA Engines. 583 * Note : Assuming only VF0 bundle is used for PF mode. 584 */ 585 phys_high = (uint32_t)(d->sw_rings_iova >> 32); 586 phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1)); 587 588 /* Read the populated cfg from device registers. */ 589 fetch_acc_config(dev); 590 591 /* Start Pmon */ 592 for (value = 0; value <= 2; value++) { 593 acc_reg_write(d, d->reg_addr->pmon_ctrl_a, value); 594 acc_reg_write(d, d->reg_addr->pmon_ctrl_b, value); 595 acc_reg_write(d, d->reg_addr->pmon_ctrl_c, value); 596 } 597 598 /* Release AXI from PF. */ 599 if (d->pf_device) 600 acc_reg_write(d, VRB1_PfDmaAxiControl, 1); 601 602 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high); 603 acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low); 604 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high); 605 acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low); 606 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high); 607 acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low); 608 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high); 609 acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low); 610 acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high); 611 acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low); 612 if (d->device_variant == VRB2_VARIANT) { 613 acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high); 614 acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low); 615 } 616 /* 617 * Configure Ring Size to the max queue ring size 618 * (used for wrapping purpose). 619 */ 620 value = log2_basic(d->sw_ring_size / ACC_RING_SIZE_GRANULARITY); 621 acc_reg_write(d, d->reg_addr->ring_size, value); 622 623 /* Configure tail pointer for use when SDONE enabled. */ 624 if (d->tail_ptrs == NULL) 625 d->tail_ptrs = rte_zmalloc_socket(dev->device->driver->name, 626 VRB_MAX_QGRPS * VRB_MAX_AQS * sizeof(uint32_t), 627 RTE_CACHE_LINE_SIZE, socket_id); 628 if (d->tail_ptrs == NULL) { 629 rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u", 630 dev->device->driver->name, 631 dev->data->dev_id); 632 ret = -ENOMEM; 633 goto free_sw_rings; 634 } 635 d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs); 636 637 phys_high = (uint32_t)(d->tail_ptr_iova >> 32); 638 phys_low = (uint32_t)(d->tail_ptr_iova); 639 acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high); 640 acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low); 641 acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high); 642 acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low); 643 acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high); 644 acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low); 645 acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high); 646 acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low); 647 acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high); 648 acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low); 649 if (d->device_variant == VRB2_VARIANT) { 650 acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high); 651 acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low); 652 } 653 654 ret = allocate_info_ring(dev); 655 if (ret < 0) { 656 rte_bbdev_log(ERR, "Failed to allocate info_ring for %s:%u", 657 dev->device->driver->name, 658 dev->data->dev_id); 659 /* Continue */ 660 } 661 662 if (d->harq_layout == NULL) 663 d->harq_layout = rte_zmalloc_socket("HARQ Layout", 664 ACC_HARQ_LAYOUT * sizeof(*d->harq_layout), 665 RTE_CACHE_LINE_SIZE, dev->data->socket_id); 666 if (d->harq_layout == NULL) { 667 rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u", 668 dev->device->driver->name, 669 dev->data->dev_id); 670 ret = -ENOMEM; 671 goto free_tail_ptrs; 672 } 673 674 /* Mark as configured properly */ 675 d->configured = true; 676 vrb_vf2pf(d, ACC_VF2PF_USING_VF); 677 678 rte_bbdev_log_debug( 679 "Device (%s) configured sw_rings = %p, sw_rings_iova = %#" 680 PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova); 681 return 0; 682 683 free_tail_ptrs: 684 rte_free(d->tail_ptrs); 685 d->tail_ptrs = NULL; 686 free_sw_rings: 687 rte_free(d->sw_rings_base); 688 d->sw_rings = NULL; 689 690 return ret; 691 } 692 693 static int 694 vrb_intr_enable(struct rte_bbdev *dev) 695 { 696 int ret; 697 struct acc_device *d = dev->data->dev_private; 698 699 if (d->device_variant == VRB1_VARIANT) { 700 /* On VRB1: cannot enable MSI/IR to avoid potential back-pressure corner case. */ 701 rte_bbdev_log(ERR, "VRB1 (%s) doesn't support any MSI/MSI-X interrupt\n", 702 dev->data->name); 703 return -ENOTSUP; 704 } 705 706 /* 707 * MSI/MSI-X are supported. 708 * Option controlled by vfio-intr through EAL parameter. 709 */ 710 if (rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_VFIO_MSI) { 711 712 ret = allocate_info_ring(dev); 713 if (ret < 0) { 714 rte_bbdev_log(ERR, 715 "Couldn't allocate info ring for device: %s", 716 dev->data->name); 717 return ret; 718 } 719 ret = rte_intr_enable(dev->intr_handle); 720 if (ret < 0) { 721 rte_bbdev_log(ERR, 722 "Couldn't enable interrupts for device: %s", 723 dev->data->name); 724 rte_free(d->info_ring); 725 return ret; 726 } 727 ret = rte_intr_callback_register(dev->intr_handle, 728 vrb_dev_interrupt_handler, dev); 729 if (ret < 0) { 730 rte_bbdev_log(ERR, 731 "Couldn't register interrupt callback for device: %s", 732 dev->data->name); 733 rte_free(d->info_ring); 734 return ret; 735 } 736 737 return 0; 738 } else if (rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) { 739 int i, max_queues; 740 struct acc_device *acc_dev = dev->data->dev_private; 741 742 ret = allocate_info_ring(dev); 743 if (ret < 0) { 744 rte_bbdev_log(ERR, 745 "Couldn't allocate info ring for device: %s", 746 dev->data->name); 747 return ret; 748 } 749 750 if (d->device_variant == VRB1_VARIANT) { 751 if (acc_dev->pf_device) 752 max_queues = VRB1_MAX_PF_MSIX; 753 else 754 max_queues = VRB1_MAX_VF_MSIX; 755 } else { 756 if (acc_dev->pf_device) 757 max_queues = VRB2_MAX_PF_MSIX; 758 else 759 max_queues = VRB2_MAX_VF_MSIX; 760 } 761 762 if (rte_intr_efd_enable(dev->intr_handle, max_queues)) { 763 rte_bbdev_log(ERR, "Failed to create fds for %u queues", 764 dev->data->num_queues); 765 return -1; 766 } 767 768 for (i = 0; i < max_queues; ++i) { 769 if (rte_intr_efds_index_set(dev->intr_handle, i, 770 rte_intr_fd_get(dev->intr_handle))) 771 return -rte_errno; 772 } 773 774 if (rte_intr_vec_list_alloc(dev->intr_handle, "intr_vec", 775 dev->data->num_queues)) { 776 rte_bbdev_log(ERR, "Failed to allocate %u vectors", 777 dev->data->num_queues); 778 return -ENOMEM; 779 } 780 781 ret = rte_intr_enable(dev->intr_handle); 782 783 if (ret < 0) { 784 rte_bbdev_log(ERR, 785 "Couldn't enable interrupts for device: %s", 786 dev->data->name); 787 rte_free(d->info_ring); 788 return ret; 789 } 790 ret = rte_intr_callback_register(dev->intr_handle, 791 vrb_dev_interrupt_handler, dev); 792 if (ret < 0) { 793 rte_bbdev_log(ERR, 794 "Couldn't register interrupt callback for device: %s", 795 dev->data->name); 796 rte_free(d->info_ring); 797 return ret; 798 } 799 800 return 0; 801 } 802 803 rte_bbdev_log(ERR, "Device (%s) supports only VFIO MSI/MSI-X interrupts\n", 804 dev->data->name); 805 return -ENOTSUP; 806 } 807 808 /* Free memory used for software rings. */ 809 static int 810 vrb_dev_close(struct rte_bbdev *dev) 811 { 812 struct acc_device *d = dev->data->dev_private; 813 vrb_check_ir(d); 814 if (d->sw_rings_base != NULL) { 815 rte_free(d->tail_ptrs); 816 rte_free(d->info_ring); 817 rte_free(d->sw_rings_base); 818 rte_free(d->harq_layout); 819 d->tail_ptrs = NULL; 820 d->info_ring = NULL; 821 d->sw_rings_base = NULL; 822 d->harq_layout = NULL; 823 } 824 /* Ensure all in flight HW transactions are completed. */ 825 usleep(ACC_LONG_WAIT); 826 return 0; 827 } 828 829 /** 830 * Report a queue index which is free. 831 * Return 0 to 16k for a valid queue_idx or -1 when no queue is available. 832 * Note : Only supporting VF0 Bundle for PF mode. 833 */ 834 static int 835 vrb_find_free_queue_idx(struct rte_bbdev *dev, 836 const struct rte_bbdev_queue_conf *conf) 837 { 838 struct acc_device *d = dev->data->dev_private; 839 int op_2_acc[7] = {0, UL_4G, DL_4G, UL_5G, DL_5G, FFT, MLD}; 840 int acc = op_2_acc[conf->op_type]; 841 struct rte_acc_queue_topology *qtop = NULL; 842 uint16_t group_idx; 843 uint64_t aq_idx; 844 845 qtopFromAcc(&qtop, acc, &(d->acc_conf)); 846 if (qtop == NULL) 847 return -1; 848 /* Identify matching QGroup Index which are sorted in priority order. */ 849 group_idx = qtop->first_qgroup_index + conf->priority; 850 if (group_idx >= d->num_qgroups || 851 conf->priority >= qtop->num_qgroups) { 852 rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u", 853 dev->data->name, conf->priority); 854 return -1; 855 } 856 /* Find a free AQ_idx. */ 857 for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) { 858 if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) { 859 /* Mark the Queue as assigned. */ 860 d->q_assigned_bit_map[group_idx] |= (1ULL << aq_idx); 861 /* Report the AQ Index. */ 862 return queue_index(group_idx, aq_idx, d->device_variant); 863 } 864 } 865 rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u", 866 dev->data->name, conf->priority); 867 return -1; 868 } 869 870 /* Setup device queue. */ 871 static int 872 vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, 873 const struct rte_bbdev_queue_conf *conf) 874 { 875 struct acc_device *d = dev->data->dev_private; 876 struct acc_queue *q; 877 int32_t q_idx; 878 int ret; 879 880 if (d == NULL) { 881 rte_bbdev_log(ERR, "Undefined device"); 882 return -ENODEV; 883 } 884 /* Allocate the queue data structure. */ 885 q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q), 886 RTE_CACHE_LINE_SIZE, conf->socket); 887 if (q == NULL) { 888 rte_bbdev_log(ERR, "Failed to allocate queue memory"); 889 return -ENOMEM; 890 } 891 892 q->d = d; 893 q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id)); 894 q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id); 895 896 /* Prepare the Ring with default descriptor format. */ 897 union acc_dma_desc *desc = NULL; 898 unsigned int desc_idx, b_idx; 899 int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ? 900 ACC_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ? 901 ACC_FCW_TD_BLEN : (conf->op_type == RTE_BBDEV_OP_LDPC_DEC ? 902 ACC_FCW_LD_BLEN : (conf->op_type == RTE_BBDEV_OP_FFT ? 903 ACC_FCW_FFT_BLEN : ACC_FCW_MLDTS_BLEN)))); 904 905 if ((q->d->device_variant == VRB2_VARIANT) && (conf->op_type == RTE_BBDEV_OP_FFT)) 906 fcw_len = ACC_FCW_FFT_BLEN_3; 907 908 for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) { 909 desc = q->ring_addr + desc_idx; 910 desc->req.word0 = ACC_DMA_DESC_TYPE; 911 desc->req.word1 = 0; /**< Timestamp. */ 912 desc->req.word2 = 0; 913 desc->req.word3 = 0; 914 uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 915 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 916 desc->req.data_ptrs[0].blen = fcw_len; 917 desc->req.data_ptrs[0].blkid = ACC_DMA_BLKID_FCW; 918 desc->req.data_ptrs[0].last = 0; 919 desc->req.data_ptrs[0].dma_ext = 0; 920 for (b_idx = 1; b_idx < ACC_DMA_MAX_NUM_POINTERS - 1; b_idx++) { 921 desc->req.data_ptrs[b_idx].blkid = ACC_DMA_BLKID_IN; 922 desc->req.data_ptrs[b_idx].last = 1; 923 desc->req.data_ptrs[b_idx].dma_ext = 0; 924 b_idx++; 925 desc->req.data_ptrs[b_idx].blkid = 926 ACC_DMA_BLKID_OUT_ENC; 927 desc->req.data_ptrs[b_idx].last = 1; 928 desc->req.data_ptrs[b_idx].dma_ext = 0; 929 } 930 /* Preset some fields of LDPC FCW. */ 931 desc->req.fcw_ld.FCWversion = ACC_FCW_VER; 932 desc->req.fcw_ld.gain_i = 1; 933 desc->req.fcw_ld.gain_h = 1; 934 } 935 936 q->lb_in = rte_zmalloc_socket(dev->device->driver->name, 937 RTE_CACHE_LINE_SIZE, 938 RTE_CACHE_LINE_SIZE, conf->socket); 939 if (q->lb_in == NULL) { 940 rte_bbdev_log(ERR, "Failed to allocate lb_in memory"); 941 ret = -ENOMEM; 942 goto free_q; 943 } 944 q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in); 945 q->lb_out = rte_zmalloc_socket(dev->device->driver->name, 946 RTE_CACHE_LINE_SIZE, 947 RTE_CACHE_LINE_SIZE, conf->socket); 948 if (q->lb_out == NULL) { 949 rte_bbdev_log(ERR, "Failed to allocate lb_out memory"); 950 ret = -ENOMEM; 951 goto free_lb_in; 952 } 953 q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out); 954 q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name, 955 d->sw_ring_max_depth * sizeof(*q->companion_ring_addr), 956 RTE_CACHE_LINE_SIZE, conf->socket); 957 if (q->companion_ring_addr == NULL) { 958 rte_bbdev_log(ERR, "Failed to allocate companion_ring memory"); 959 ret = -ENOMEM; 960 goto free_lb_out; 961 } 962 963 /* 964 * Software queue ring wraps synchronously with the HW when it reaches 965 * the boundary of the maximum allocated queue size, no matter what the 966 * sw queue size is. This wrapping is guarded by setting the wrap_mask 967 * to represent the maximum queue size as allocated at the time when 968 * the device has been setup (in configure()). 969 * 970 * The queue depth is set to the queue size value (conf->queue_size). 971 * This limits the occupancy of the queue at any point of time, so that 972 * the queue does not get swamped with enqueue requests. 973 */ 974 q->sw_ring_depth = conf->queue_size; 975 q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1; 976 977 q->op_type = conf->op_type; 978 979 q_idx = vrb_find_free_queue_idx(dev, conf); 980 if (q_idx == -1) { 981 ret = -EINVAL; 982 goto free_companion_ring_addr; 983 } 984 985 q->fcw_ring = rte_zmalloc_socket(dev->device->driver->name, 986 ACC_MAX_FCW_SIZE * d->sw_ring_max_depth, 987 RTE_CACHE_LINE_SIZE, conf->socket); 988 if (q->fcw_ring == NULL) { 989 rte_bbdev_log(ERR, "Failed to allocate fcw_ring memory"); 990 ret = -ENOMEM; 991 goto free_companion_ring_addr; 992 } 993 q->fcw_ring_addr_iova = rte_malloc_virt2iova(q->fcw_ring); 994 995 /* For FFT we need to store the FCW separately */ 996 if (conf->op_type == RTE_BBDEV_OP_FFT) { 997 for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) { 998 desc = q->ring_addr + desc_idx; 999 desc->req.data_ptrs[0].address = q->fcw_ring_addr_iova + 1000 desc_idx * ACC_MAX_FCW_SIZE; 1001 } 1002 } 1003 1004 q->qgrp_id = qg_from_q(q_idx, d->device_variant); 1005 q->vf_id = vf_from_q(q_idx, d->device_variant); 1006 q->aq_id = aq_from_q(q_idx, d->device_variant); 1007 1008 q->aq_depth = 0; 1009 if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC) 1010 q->aq_depth = (1 << d->acc_conf.q_ul_4g.aq_depth_log2); 1011 else if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC) 1012 q->aq_depth = (1 << d->acc_conf.q_dl_4g.aq_depth_log2); 1013 else if (conf->op_type == RTE_BBDEV_OP_LDPC_DEC) 1014 q->aq_depth = (1 << d->acc_conf.q_ul_5g.aq_depth_log2); 1015 else if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC) 1016 q->aq_depth = (1 << d->acc_conf.q_dl_5g.aq_depth_log2); 1017 else if (conf->op_type == RTE_BBDEV_OP_FFT) 1018 q->aq_depth = (1 << d->acc_conf.q_fft.aq_depth_log2); 1019 else if (conf->op_type == RTE_BBDEV_OP_MLDTS) 1020 q->aq_depth = (1 << d->acc_conf.q_mld.aq_depth_log2); 1021 1022 q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base, 1023 d->queue_offset(d->pf_device, q->vf_id, q->qgrp_id, q->aq_id)); 1024 1025 rte_bbdev_log_debug( 1026 "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p base %p\n", 1027 dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id, 1028 q->aq_id, q->aq_depth, q->mmio_reg_enqueue, 1029 d->mmio_base); 1030 1031 dev->data->queues[queue_id].queue_private = q; 1032 return 0; 1033 1034 free_companion_ring_addr: 1035 rte_free(q->companion_ring_addr); 1036 q->companion_ring_addr = NULL; 1037 free_lb_out: 1038 rte_free(q->lb_out); 1039 q->lb_out = NULL; 1040 free_lb_in: 1041 rte_free(q->lb_in); 1042 q->lb_in = NULL; 1043 free_q: 1044 rte_free(q); 1045 q = NULL; 1046 1047 return ret; 1048 } 1049 1050 static inline void 1051 vrb_print_op(struct rte_bbdev_dec_op *op, enum rte_bbdev_op_type op_type, 1052 uint16_t index) 1053 { 1054 if (op == NULL) 1055 return; 1056 if (op_type == RTE_BBDEV_OP_LDPC_DEC) 1057 rte_bbdev_log(INFO, 1058 " Op 5GUL %d %d %d %d %d %d %d %d %d %d %d %d", 1059 index, 1060 op->ldpc_dec.basegraph, op->ldpc_dec.z_c, 1061 op->ldpc_dec.n_cb, op->ldpc_dec.q_m, 1062 op->ldpc_dec.n_filler, op->ldpc_dec.cb_params.e, 1063 op->ldpc_dec.op_flags, op->ldpc_dec.rv_index, 1064 op->ldpc_dec.iter_max, op->ldpc_dec.iter_count, 1065 op->ldpc_dec.harq_combined_input.length 1066 ); 1067 else if (op_type == RTE_BBDEV_OP_LDPC_ENC) { 1068 struct rte_bbdev_enc_op *op_dl = (struct rte_bbdev_enc_op *) op; 1069 rte_bbdev_log(INFO, 1070 " Op 5GDL %d %d %d %d %d %d %d %d %d", 1071 index, 1072 op_dl->ldpc_enc.basegraph, op_dl->ldpc_enc.z_c, 1073 op_dl->ldpc_enc.n_cb, op_dl->ldpc_enc.q_m, 1074 op_dl->ldpc_enc.n_filler, op_dl->ldpc_enc.cb_params.e, 1075 op_dl->ldpc_enc.op_flags, op_dl->ldpc_enc.rv_index 1076 ); 1077 } else if (op_type == RTE_BBDEV_OP_MLDTS) { 1078 struct rte_bbdev_mldts_op *op_mldts = (struct rte_bbdev_mldts_op *) op; 1079 rte_bbdev_log(INFO, " Op MLD %d RBs %d NL %d Rp %d %d %x\n", 1080 index, 1081 op_mldts->mldts.num_rbs, op_mldts->mldts.num_layers, 1082 op_mldts->mldts.r_rep, 1083 op_mldts->mldts.c_rep, op_mldts->mldts.op_flags); 1084 } 1085 } 1086 1087 /* Stop queue and clear counters. */ 1088 static int 1089 vrb_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) 1090 { 1091 struct acc_queue *q; 1092 struct rte_bbdev_dec_op *op; 1093 uint16_t i; 1094 q = dev->data->queues[queue_id].queue_private; 1095 rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d", 1096 queue_id, q->sw_ring_head, q->sw_ring_tail, 1097 q->sw_ring_depth, q->op_type); 1098 for (i = 0; i < q->sw_ring_depth; ++i) { 1099 op = (q->ring_addr + i)->req.op_addr; 1100 vrb_print_op(op, q->op_type, i); 1101 } 1102 /* ignore all operations in flight and clear counters */ 1103 q->sw_ring_tail = q->sw_ring_head; 1104 q->aq_enqueued = 0; 1105 q->aq_dequeued = 0; 1106 dev->data->queues[queue_id].queue_stats.enqueued_count = 0; 1107 dev->data->queues[queue_id].queue_stats.dequeued_count = 0; 1108 dev->data->queues[queue_id].queue_stats.enqueue_err_count = 0; 1109 dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0; 1110 dev->data->queues[queue_id].queue_stats.enqueue_warn_count = 0; 1111 dev->data->queues[queue_id].queue_stats.dequeue_warn_count = 0; 1112 return 0; 1113 } 1114 1115 /* Release queue. */ 1116 static int 1117 vrb_queue_release(struct rte_bbdev *dev, uint16_t q_id) 1118 { 1119 struct acc_device *d = dev->data->dev_private; 1120 struct acc_queue *q = dev->data->queues[q_id].queue_private; 1121 1122 if (q != NULL) { 1123 /* Mark the Queue as un-assigned. */ 1124 d->q_assigned_bit_map[q->qgrp_id] &= (~0ULL - (1 << (uint64_t) q->aq_id)); 1125 rte_free(q->fcw_ring); 1126 rte_free(q->companion_ring_addr); 1127 rte_free(q->lb_in); 1128 rte_free(q->lb_out); 1129 rte_free(q); 1130 dev->data->queues[q_id].queue_private = NULL; 1131 } 1132 1133 return 0; 1134 } 1135 1136 /* Get device info. */ 1137 static void 1138 vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) 1139 { 1140 struct acc_device *d = dev->data->dev_private; 1141 int i; 1142 static const struct rte_bbdev_op_cap vrb1_bbdev_capabilities[] = { 1143 { 1144 .type = RTE_BBDEV_OP_TURBO_DEC, 1145 .cap.turbo_dec = { 1146 .capability_flags = 1147 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 1148 RTE_BBDEV_TURBO_CRC_TYPE_24B | 1149 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP | 1150 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN | 1151 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH | 1152 RTE_BBDEV_TURBO_EARLY_TERMINATION | 1153 RTE_BBDEV_TURBO_DEC_INTERRUPTS | 1154 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 1155 RTE_BBDEV_TURBO_MAP_DEC | 1156 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 1157 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER, 1158 .max_llr_modulus = INT8_MAX, 1159 .num_buffers_src = 1160 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1161 .num_buffers_hard_out = 1162 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1163 .num_buffers_soft_out = 1164 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1165 } 1166 }, 1167 { 1168 .type = RTE_BBDEV_OP_TURBO_ENC, 1169 .cap.turbo_enc = { 1170 .capability_flags = 1171 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 1172 RTE_BBDEV_TURBO_RV_INDEX_BYPASS | 1173 RTE_BBDEV_TURBO_RATE_MATCH | 1174 RTE_BBDEV_TURBO_ENC_INTERRUPTS | 1175 RTE_BBDEV_TURBO_ENC_SCATTER_GATHER, 1176 .num_buffers_src = 1177 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1178 .num_buffers_dst = 1179 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1180 } 1181 }, 1182 { 1183 .type = RTE_BBDEV_OP_LDPC_ENC, 1184 .cap.ldpc_enc = { 1185 .capability_flags = 1186 RTE_BBDEV_LDPC_RATE_MATCH | 1187 RTE_BBDEV_LDPC_CRC_24B_ATTACH | 1188 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS | 1189 RTE_BBDEV_LDPC_ENC_INTERRUPTS, 1190 .num_buffers_src = 1191 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1192 .num_buffers_dst = 1193 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1194 } 1195 }, 1196 { 1197 .type = RTE_BBDEV_OP_LDPC_DEC, 1198 .cap.ldpc_dec = { 1199 .capability_flags = 1200 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 1201 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 1202 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 1203 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 1204 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 1205 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 1206 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | 1207 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS | 1208 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER | 1209 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION | 1210 RTE_BBDEV_LDPC_LLR_COMPRESSION | 1211 RTE_BBDEV_LDPC_DEC_INTERRUPTS, 1212 .llr_size = 8, 1213 .llr_decimals = 1, 1214 .num_buffers_src = 1215 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1216 .num_buffers_hard_out = 1217 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1218 .num_buffers_soft_out = 0, 1219 } 1220 }, 1221 { 1222 .type = RTE_BBDEV_OP_FFT, 1223 .cap.fft = { 1224 .capability_flags = 1225 RTE_BBDEV_FFT_WINDOWING | 1226 RTE_BBDEV_FFT_CS_ADJUSTMENT | 1227 RTE_BBDEV_FFT_DFT_BYPASS | 1228 RTE_BBDEV_FFT_IDFT_BYPASS | 1229 RTE_BBDEV_FFT_WINDOWING_BYPASS, 1230 .num_buffers_src = 1, 1231 .num_buffers_dst = 1, 1232 .fft_windows_num = ACC_MAX_FFT_WIN, 1233 } 1234 }, 1235 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 1236 }; 1237 1238 static const struct rte_bbdev_op_cap vrb2_bbdev_capabilities[] = { 1239 { 1240 .type = RTE_BBDEV_OP_TURBO_DEC, 1241 .cap.turbo_dec = { 1242 .capability_flags = 1243 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | 1244 RTE_BBDEV_TURBO_CRC_TYPE_24B | 1245 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP | 1246 RTE_BBDEV_TURBO_EQUALIZER | 1247 RTE_BBDEV_TURBO_SOFT_OUT_SATURATE | 1248 RTE_BBDEV_TURBO_HALF_ITERATION_EVEN | 1249 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH | 1250 RTE_BBDEV_TURBO_SOFT_OUTPUT | 1251 RTE_BBDEV_TURBO_EARLY_TERMINATION | 1252 RTE_BBDEV_TURBO_DEC_INTERRUPTS | 1253 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | 1254 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT | 1255 RTE_BBDEV_TURBO_MAP_DEC | 1256 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | 1257 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER, 1258 .max_llr_modulus = INT8_MAX, 1259 .num_buffers_src = 1260 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1261 .num_buffers_hard_out = 1262 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1263 .num_buffers_soft_out = 1264 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1265 } 1266 }, 1267 { 1268 .type = RTE_BBDEV_OP_TURBO_ENC, 1269 .cap.turbo_enc = { 1270 .capability_flags = 1271 RTE_BBDEV_TURBO_CRC_24B_ATTACH | 1272 RTE_BBDEV_TURBO_RV_INDEX_BYPASS | 1273 RTE_BBDEV_TURBO_RATE_MATCH | 1274 RTE_BBDEV_TURBO_ENC_INTERRUPTS | 1275 RTE_BBDEV_TURBO_ENC_SCATTER_GATHER, 1276 .num_buffers_src = 1277 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1278 .num_buffers_dst = 1279 RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, 1280 } 1281 }, 1282 { 1283 .type = RTE_BBDEV_OP_LDPC_ENC, 1284 .cap.ldpc_enc = { 1285 .capability_flags = 1286 RTE_BBDEV_LDPC_RATE_MATCH | 1287 RTE_BBDEV_LDPC_CRC_24B_ATTACH | 1288 RTE_BBDEV_LDPC_INTERLEAVER_BYPASS | 1289 RTE_BBDEV_LDPC_ENC_INTERRUPTS | 1290 RTE_BBDEV_LDPC_ENC_SCATTER_GATHER | 1291 RTE_BBDEV_LDPC_ENC_CONCATENATION, 1292 .num_buffers_src = 1293 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1294 .num_buffers_dst = 1295 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1296 } 1297 }, 1298 { 1299 .type = RTE_BBDEV_OP_LDPC_DEC, 1300 .cap.ldpc_dec = { 1301 .capability_flags = 1302 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | 1303 RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | 1304 RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | 1305 RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK | 1306 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | 1307 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | 1308 RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | 1309 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS | 1310 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER | 1311 RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION | 1312 RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION | 1313 RTE_BBDEV_LDPC_LLR_COMPRESSION | 1314 RTE_BBDEV_LDPC_SOFT_OUT_ENABLE | 1315 RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS | 1316 RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS | 1317 RTE_BBDEV_LDPC_DEC_INTERRUPTS, 1318 .llr_size = 8, 1319 .llr_decimals = 2, 1320 .num_buffers_src = 1321 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1322 .num_buffers_hard_out = 1323 RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, 1324 .num_buffers_soft_out = 0, 1325 } 1326 }, 1327 { 1328 .type = RTE_BBDEV_OP_FFT, 1329 .cap.fft = { 1330 .capability_flags = 1331 RTE_BBDEV_FFT_WINDOWING | 1332 RTE_BBDEV_FFT_CS_ADJUSTMENT | 1333 RTE_BBDEV_FFT_DFT_BYPASS | 1334 RTE_BBDEV_FFT_IDFT_BYPASS | 1335 RTE_BBDEV_FFT_FP16_INPUT | 1336 RTE_BBDEV_FFT_FP16_OUTPUT | 1337 RTE_BBDEV_FFT_POWER_MEAS | 1338 RTE_BBDEV_FFT_WINDOWING_BYPASS, 1339 .num_buffers_src = 1, 1340 .num_buffers_dst = 1, 1341 .fft_windows_num = ACC_MAX_FFT_WIN, 1342 } 1343 }, 1344 { 1345 .type = RTE_BBDEV_OP_MLDTS, 1346 .cap.mld = { 1347 .capability_flags = 1348 RTE_BBDEV_MLDTS_REP, 1349 .num_buffers_src = 1350 1, 1351 .num_buffers_dst = 1352 1, 1353 } 1354 }, 1355 RTE_BBDEV_END_OF_CAPABILITIES_LIST() 1356 }; 1357 1358 static struct rte_bbdev_queue_conf default_queue_conf; 1359 default_queue_conf.socket = dev->data->socket_id; 1360 default_queue_conf.queue_size = ACC_MAX_QUEUE_DEPTH; 1361 1362 dev_info->driver_name = dev->device->driver->name; 1363 1364 /* Read and save the populated config from registers. */ 1365 fetch_acc_config(dev); 1366 /* Check the status of device. */ 1367 dev_info->device_status = vrb_device_status(dev); 1368 dev_info->fft_window_width = d->fft_window_width; 1369 1370 /* Exposed number of queues. */ 1371 dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0; 1372 dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups * 1373 d->acc_conf.q_ul_4g.num_qgroups; 1374 dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups * 1375 d->acc_conf.q_dl_4g.num_qgroups; 1376 dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups * 1377 d->acc_conf.q_ul_5g.num_qgroups; 1378 dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups * 1379 d->acc_conf.q_dl_5g.num_qgroups; 1380 dev_info->num_queues[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_aqs_per_groups * 1381 d->acc_conf.q_fft.num_qgroups; 1382 dev_info->num_queues[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_aqs_per_groups * 1383 d->acc_conf.q_mld.num_qgroups; 1384 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups; 1385 dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups; 1386 dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups; 1387 dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups; 1388 dev_info->queue_priority[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_qgroups; 1389 dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_qgroups; 1390 dev_info->max_num_queues = 0; 1391 for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) 1392 dev_info->max_num_queues += dev_info->num_queues[i]; 1393 dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH; 1394 dev_info->hardware_accelerated = true; 1395 dev_info->max_dl_queue_priority = 1396 d->acc_conf.q_dl_4g.num_qgroups - 1; 1397 dev_info->max_ul_queue_priority = 1398 d->acc_conf.q_ul_4g.num_qgroups - 1; 1399 dev_info->default_queue_conf = default_queue_conf; 1400 dev_info->cpu_flag_reqs = NULL; 1401 dev_info->min_alignment = 1; 1402 if (d->device_variant == VRB1_VARIANT) 1403 dev_info->capabilities = vrb1_bbdev_capabilities; 1404 else 1405 dev_info->capabilities = vrb2_bbdev_capabilities; 1406 dev_info->harq_buffer_size = 0; 1407 1408 vrb_check_ir(d); 1409 } 1410 1411 static int 1412 vrb_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id) 1413 { 1414 struct acc_queue *q = dev->data->queues[queue_id].queue_private; 1415 1416 if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI && 1417 rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSIX) 1418 return -ENOTSUP; 1419 1420 q->irq_enable = 1; 1421 return 0; 1422 } 1423 1424 static int 1425 vrb_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id) 1426 { 1427 struct acc_queue *q = dev->data->queues[queue_id].queue_private; 1428 1429 if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI && 1430 rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSIX) 1431 return -ENOTSUP; 1432 1433 q->irq_enable = 0; 1434 return 0; 1435 } 1436 1437 static const struct rte_bbdev_ops vrb_bbdev_ops = { 1438 .setup_queues = vrb_setup_queues, 1439 .intr_enable = vrb_intr_enable, 1440 .close = vrb_dev_close, 1441 .info_get = vrb_dev_info_get, 1442 .queue_setup = vrb_queue_setup, 1443 .queue_release = vrb_queue_release, 1444 .queue_stop = vrb_queue_stop, 1445 .queue_intr_enable = vrb_queue_intr_enable, 1446 .queue_intr_disable = vrb_queue_intr_disable 1447 }; 1448 1449 /* PCI PF address map. */ 1450 static struct rte_pci_id pci_id_vrb_pf_map[] = { 1451 { 1452 RTE_PCI_DEVICE(RTE_VRB1_VENDOR_ID, RTE_VRB1_PF_DEVICE_ID) 1453 }, 1454 { 1455 RTE_PCI_DEVICE(RTE_VRB2_VENDOR_ID, RTE_VRB2_PF_DEVICE_ID) 1456 }, 1457 {.device_id = 0}, 1458 }; 1459 1460 /* PCI VF address map. */ 1461 static struct rte_pci_id pci_id_vrb_vf_map[] = { 1462 { 1463 RTE_PCI_DEVICE(RTE_VRB1_VENDOR_ID, RTE_VRB1_VF_DEVICE_ID) 1464 }, 1465 { 1466 RTE_PCI_DEVICE(RTE_VRB2_VENDOR_ID, RTE_VRB2_VF_DEVICE_ID) 1467 }, 1468 {.device_id = 0}, 1469 }; 1470 1471 /* Fill in a frame control word for turbo decoding. */ 1472 static inline void 1473 vrb_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw) 1474 { 1475 fcw->fcw_ver = 1; 1476 fcw->num_maps = ACC_FCW_TD_AUTOMAP; 1477 fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags, 1478 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE); 1479 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1480 fcw->c = op->turbo_dec.tb_params.c; 1481 fcw->k_pos = op->turbo_dec.tb_params.k_pos; 1482 } else { 1483 fcw->c = 1; 1484 fcw->k_pos = op->turbo_dec.cb_params.k; 1485 } 1486 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 1487 fcw->soft_output_en = 1; 1488 fcw->sw_soft_out_dis = 0; 1489 fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags, 1490 RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH); 1491 fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags, 1492 RTE_BBDEV_TURBO_SOFT_OUT_SATURATE); 1493 if (check_bit(op->turbo_dec.op_flags, 1494 RTE_BBDEV_TURBO_EQUALIZER)) { 1495 fcw->bypass_teq = 0; 1496 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1497 fcw->cab = op->turbo_dec.tb_params.cab; 1498 fcw->ea = op->turbo_dec.tb_params.ea; 1499 fcw->eb = op->turbo_dec.tb_params.eb; 1500 } else { 1501 fcw->ea = op->turbo_dec.cb_params.e; 1502 fcw->eb = op->turbo_dec.cb_params.e; 1503 } 1504 1505 if (op->turbo_dec.rv_index == 0) 1506 fcw->k0_start_col = ACC_FCW_TD_RVIDX_0; 1507 else if (op->turbo_dec.rv_index == 1) 1508 fcw->k0_start_col = ACC_FCW_TD_RVIDX_1; 1509 else if (op->turbo_dec.rv_index == 2) 1510 fcw->k0_start_col = ACC_FCW_TD_RVIDX_2; 1511 else 1512 fcw->k0_start_col = ACC_FCW_TD_RVIDX_3; 1513 } else { 1514 fcw->bypass_teq = 1; 1515 fcw->eb = 64; /* avoid undefined value */ 1516 } 1517 } else { 1518 fcw->soft_output_en = 0; 1519 fcw->sw_soft_out_dis = 1; 1520 fcw->bypass_teq = 0; 1521 } 1522 1523 fcw->code_block_mode = 1; 1524 fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags, 1525 RTE_BBDEV_TURBO_CRC_TYPE_24B); 1526 1527 fcw->ext_td_cold_reg_en = 1; 1528 fcw->raw_decoder_input_on = 0; 1529 fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2); 1530 fcw->min_iter = 2; 1531 fcw->half_iter_on = check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_HALF_ITERATION_EVEN); 1532 1533 fcw->early_stop_en = check_bit(op->turbo_dec.op_flags, 1534 RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en; 1535 fcw->ext_scale = 0xF; 1536 } 1537 1538 /* Fill in a frame control word for LDPC decoding. */ 1539 static inline void 1540 vrb_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw, 1541 union acc_harq_layout_data *harq_layout, uint16_t device_variant) 1542 { 1543 uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset; 1544 uint32_t harq_index; 1545 uint32_t l; 1546 1547 fcw->qm = op->ldpc_dec.q_m; 1548 fcw->nfiller = op->ldpc_dec.n_filler; 1549 fcw->BG = (op->ldpc_dec.basegraph - 1); 1550 fcw->Zc = op->ldpc_dec.z_c; 1551 fcw->ncb = op->ldpc_dec.n_cb; 1552 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph, 1553 op->ldpc_dec.rv_index); 1554 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) 1555 fcw->rm_e = op->ldpc_dec.cb_params.e; 1556 else 1557 fcw->rm_e = (op->ldpc_dec.tb_params.r < 1558 op->ldpc_dec.tb_params.cab) ? 1559 op->ldpc_dec.tb_params.ea : 1560 op->ldpc_dec.tb_params.eb; 1561 1562 if (unlikely(check_bit(op->ldpc_dec.op_flags, 1563 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) && 1564 (op->ldpc_dec.harq_combined_input.length == 0))) { 1565 rte_bbdev_log(WARNING, "Null HARQ input size provided"); 1566 /* Disable HARQ input in that case to carry forward. */ 1567 op->ldpc_dec.op_flags ^= RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; 1568 } 1569 if (unlikely(fcw->rm_e == 0)) { 1570 rte_bbdev_log(WARNING, "Null E input provided"); 1571 fcw->rm_e = 2; 1572 } 1573 1574 fcw->hcin_en = check_bit(op->ldpc_dec.op_flags, 1575 RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); 1576 fcw->hcout_en = check_bit(op->ldpc_dec.op_flags, 1577 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE); 1578 fcw->crc_select = check_bit(op->ldpc_dec.op_flags, 1579 RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK); 1580 fcw->bypass_dec = 0; 1581 fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags, 1582 RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS); 1583 if (op->ldpc_dec.q_m == 1) { 1584 fcw->bypass_intlv = 1; 1585 fcw->qm = 2; 1586 } 1587 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION)) { 1588 fcw->hcin_decomp_mode = 1; 1589 fcw->hcout_comp_mode = 1; 1590 } else if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION)) { 1591 fcw->hcin_decomp_mode = 4; 1592 fcw->hcout_comp_mode = 4; 1593 } else { 1594 fcw->hcin_decomp_mode = 0; 1595 fcw->hcout_comp_mode = 0; 1596 } 1597 1598 fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags, 1599 RTE_BBDEV_LDPC_LLR_COMPRESSION); 1600 harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset); 1601 if (fcw->hcin_en > 0) { 1602 harq_in_length = op->ldpc_dec.harq_combined_input.length; 1603 if (fcw->hcin_decomp_mode == 1) 1604 harq_in_length = harq_in_length * 8 / 6; 1605 else if (fcw->hcin_decomp_mode == 4) 1606 harq_in_length = harq_in_length * 2; 1607 harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb 1608 - op->ldpc_dec.n_filler); 1609 harq_in_length = RTE_ALIGN_CEIL(harq_in_length, 64); 1610 fcw->hcin_size0 = harq_in_length; 1611 fcw->hcin_offset = 0; 1612 fcw->hcin_size1 = 0; 1613 } else { 1614 fcw->hcin_size0 = 0; 1615 fcw->hcin_offset = 0; 1616 fcw->hcin_size1 = 0; 1617 } 1618 1619 fcw->itmax = op->ldpc_dec.iter_max; 1620 fcw->itstop = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); 1621 fcw->cnu_algo = ACC_ALGO_MSA; 1622 fcw->synd_precoder = fcw->itstop; 1623 1624 if (device_variant != VRB1_VARIANT) { 1625 fcw->so_it = op->ldpc_dec.iter_max; 1626 fcw->so_en = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE); 1627 fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags, 1628 RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS); 1629 fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags, 1630 RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS); 1631 fcw->minsum_offset = 1; 1632 fcw->dec_llrclip = 2; 1633 } 1634 1635 /* 1636 * These are all implicitly set 1637 * fcw->synd_post = 0; 1638 * fcw->dec_convllr = 0; 1639 * fcw->hcout_convllr = 0; 1640 * fcw->hcout_size1 = 0; 1641 * fcw->hcout_offset = 0; 1642 * fcw->negstop_th = 0; 1643 * fcw->negstop_it = 0; 1644 * fcw->negstop_en = 0; 1645 * fcw->gain_i = 1; 1646 * fcw->gain_h = 1; 1647 */ 1648 if (fcw->hcout_en > 0) { 1649 parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8) 1650 * op->ldpc_dec.z_c - op->ldpc_dec.n_filler; 1651 k0_p = (fcw->k0 > parity_offset) ? 1652 fcw->k0 - op->ldpc_dec.n_filler : fcw->k0; 1653 ncb_p = fcw->ncb - op->ldpc_dec.n_filler; 1654 l = k0_p + fcw->rm_e; 1655 harq_out_length = (uint16_t) fcw->hcin_size0; 1656 harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p); 1657 harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64); 1658 fcw->hcout_size0 = harq_out_length; 1659 fcw->hcout_size1 = 0; 1660 fcw->hcout_offset = 0; 1661 harq_layout[harq_index].offset = fcw->hcout_offset; 1662 harq_layout[harq_index].size0 = fcw->hcout_size0; 1663 } else { 1664 fcw->hcout_size0 = 0; 1665 fcw->hcout_size1 = 0; 1666 fcw->hcout_offset = 0; 1667 } 1668 1669 fcw->tb_crc_select = 0; 1670 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) 1671 fcw->tb_crc_select = 2; 1672 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) 1673 fcw->tb_crc_select = 1; 1674 } 1675 1676 static inline int 1677 vrb_dma_desc_td_fill(struct rte_bbdev_dec_op *op, 1678 struct acc_dma_req_desc *desc, struct rte_mbuf **input, 1679 struct rte_mbuf *h_output, struct rte_mbuf *s_output, 1680 uint32_t *in_offset, uint32_t *h_out_offset, 1681 uint32_t *s_out_offset, uint32_t *h_out_length, 1682 uint32_t *s_out_length, uint32_t *mbuf_total_left, 1683 uint32_t *seg_total_left, uint8_t r) 1684 { 1685 int next_triplet = 1; /* FCW already done. */ 1686 uint16_t k; 1687 uint16_t crc24_overlap = 0; 1688 uint32_t e, kw; 1689 1690 desc->word0 = ACC_DMA_DESC_TYPE; 1691 desc->word1 = 0; /**< Timestamp could be disabled. */ 1692 desc->word2 = 0; 1693 desc->word3 = 0; 1694 desc->numCBs = 1; 1695 1696 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) { 1697 k = op->turbo_dec.tb_params.k_pos; 1698 e = (r < op->turbo_dec.tb_params.cab) 1699 ? op->turbo_dec.tb_params.ea 1700 : op->turbo_dec.tb_params.eb; 1701 } else { 1702 k = op->turbo_dec.cb_params.k; 1703 e = op->turbo_dec.cb_params.e; 1704 } 1705 1706 if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 1707 && !check_bit(op->turbo_dec.op_flags, 1708 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) 1709 crc24_overlap = 24; 1710 if ((op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK) 1711 && check_bit(op->turbo_dec.op_flags, 1712 RTE_BBDEV_TURBO_DEC_CRC_24B_DROP)) 1713 crc24_overlap = 24; 1714 1715 /* Calculates circular buffer size. 1716 * According to 3gpp 36.212 section 5.1.4.2 1717 * Kw = 3 * Kpi, 1718 * where: 1719 * Kpi = nCol * nRow 1720 * where nCol is 32 and nRow can be calculated from: 1721 * D =< nCol * nRow 1722 * where D is the size of each output from turbo encoder block (k + 4). 1723 */ 1724 kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; 1725 1726 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) { 1727 rte_bbdev_log(ERR, 1728 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1729 *mbuf_total_left, kw); 1730 return -1; 1731 } 1732 1733 next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw, 1734 seg_total_left, next_triplet, 1735 check_bit(op->turbo_dec.op_flags, 1736 RTE_BBDEV_TURBO_DEC_SCATTER_GATHER)); 1737 if (unlikely(next_triplet < 0)) { 1738 rte_bbdev_log(ERR, 1739 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1740 op); 1741 return -1; 1742 } 1743 desc->data_ptrs[next_triplet - 1].last = 1; 1744 desc->m2dlen = next_triplet; 1745 *mbuf_total_left -= kw; 1746 *h_out_length = ((k - crc24_overlap) >> 3); 1747 next_triplet = acc_dma_fill_blk_type( 1748 desc, h_output, *h_out_offset, 1749 *h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD); 1750 if (unlikely(next_triplet < 0)) { 1751 rte_bbdev_log(ERR, 1752 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1753 op); 1754 return -1; 1755 } 1756 1757 op->turbo_dec.hard_output.length += *h_out_length; 1758 *h_out_offset += *h_out_length; 1759 1760 /* Soft output. */ 1761 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 1762 if (op->turbo_dec.soft_output.data == 0) { 1763 rte_bbdev_log(ERR, "Soft output is not defined"); 1764 return -1; 1765 } 1766 if (check_bit(op->turbo_dec.op_flags, 1767 RTE_BBDEV_TURBO_EQUALIZER)) 1768 *s_out_length = e; 1769 else 1770 *s_out_length = (k * 3) + 12; 1771 1772 next_triplet = acc_dma_fill_blk_type(desc, s_output, 1773 *s_out_offset, *s_out_length, next_triplet, 1774 ACC_DMA_BLKID_OUT_SOFT); 1775 if (unlikely(next_triplet < 0)) { 1776 rte_bbdev_log(ERR, 1777 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1778 op); 1779 return -1; 1780 } 1781 1782 op->turbo_dec.soft_output.length += *s_out_length; 1783 *s_out_offset += *s_out_length; 1784 } 1785 1786 desc->data_ptrs[next_triplet - 1].last = 1; 1787 desc->d2mlen = next_triplet - desc->m2dlen; 1788 1789 desc->op_addr = op; 1790 1791 return 0; 1792 } 1793 1794 static inline int 1795 vrb_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, 1796 struct acc_dma_req_desc *desc, 1797 struct rte_mbuf **input, struct rte_mbuf *h_output, 1798 uint32_t *in_offset, uint32_t *h_out_offset, 1799 uint32_t *h_out_length, uint32_t *mbuf_total_left, 1800 uint32_t *seg_total_left, struct acc_fcw_ld *fcw, uint16_t device_variant) 1801 { 1802 struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; 1803 int next_triplet = 1; /* FCW already done. */ 1804 uint32_t input_length; 1805 uint16_t output_length, crc24_overlap = 0; 1806 uint16_t sys_cols, K, h_p_size, h_np_size; 1807 1808 if (device_variant == VRB1_VARIANT) { 1809 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION) || 1810 check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) { 1811 rte_bbdev_log(ERR, 1812 "VRB1 does not support the requested capabilities %x", 1813 op->ldpc_dec.op_flags); 1814 return -1; 1815 } 1816 } 1817 1818 acc_header_init(desc); 1819 1820 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) 1821 crc24_overlap = 24; 1822 1823 /* Compute some LDPC BG lengths. */ 1824 input_length = fcw->rm_e; 1825 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_LLR_COMPRESSION)) 1826 input_length = (input_length * 3 + 3) / 4; 1827 sys_cols = (dec->basegraph == 1) ? 22 : 10; 1828 K = sys_cols * dec->z_c; 1829 output_length = K - dec->n_filler - crc24_overlap; 1830 1831 if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < input_length))) { 1832 rte_bbdev_log(ERR, 1833 "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", 1834 *mbuf_total_left, input_length); 1835 return -1; 1836 } 1837 1838 next_triplet = acc_dma_fill_blk_type_in(desc, input, 1839 in_offset, input_length, 1840 seg_total_left, next_triplet, 1841 check_bit(op->ldpc_dec.op_flags, 1842 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)); 1843 1844 if (unlikely(next_triplet < 0)) { 1845 rte_bbdev_log(ERR, 1846 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 1847 op); 1848 return -1; 1849 } 1850 1851 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1852 if (op->ldpc_dec.harq_combined_input.data == 0) { 1853 rte_bbdev_log(ERR, "HARQ input is not defined"); 1854 return -1; 1855 } 1856 h_p_size = fcw->hcin_size0 + fcw->hcin_size1; 1857 if (fcw->hcin_decomp_mode == 1) 1858 h_p_size = (h_p_size * 3 + 3) / 4; 1859 else if (fcw->hcin_decomp_mode == 4) 1860 h_p_size = h_p_size / 2; 1861 if (op->ldpc_dec.harq_combined_input.data == 0) { 1862 rte_bbdev_log(ERR, "HARQ input is not defined"); 1863 return -1; 1864 } 1865 acc_dma_fill_blk_type( 1866 desc, 1867 op->ldpc_dec.harq_combined_input.data, 1868 op->ldpc_dec.harq_combined_input.offset, 1869 h_p_size, 1870 next_triplet, 1871 ACC_DMA_BLKID_IN_HARQ); 1872 next_triplet++; 1873 } 1874 1875 desc->data_ptrs[next_triplet - 1].last = 1; 1876 desc->m2dlen = next_triplet; 1877 *mbuf_total_left -= input_length; 1878 1879 next_triplet = acc_dma_fill_blk_type(desc, h_output, 1880 *h_out_offset, output_length >> 3, next_triplet, 1881 ACC_DMA_BLKID_OUT_HARD); 1882 1883 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE)) { 1884 if (op->ldpc_dec.soft_output.data == 0) { 1885 rte_bbdev_log(ERR, "Soft output is not defined"); 1886 return -1; 1887 } 1888 dec->soft_output.length = fcw->rm_e; 1889 acc_dma_fill_blk_type(desc, dec->soft_output.data, dec->soft_output.offset, 1890 fcw->rm_e, next_triplet, ACC_DMA_BLKID_OUT_SOFT); 1891 next_triplet++; 1892 } 1893 1894 if (check_bit(op->ldpc_dec.op_flags, 1895 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1896 if (op->ldpc_dec.harq_combined_output.data == 0) { 1897 rte_bbdev_log(ERR, "HARQ output is not defined"); 1898 return -1; 1899 } 1900 1901 /* Pruned size of the HARQ */ 1902 h_p_size = fcw->hcout_size0 + fcw->hcout_size1; 1903 /* Non-Pruned size of the HARQ */ 1904 h_np_size = fcw->hcout_offset > 0 ? 1905 fcw->hcout_offset + fcw->hcout_size1 : 1906 h_p_size; 1907 if (fcw->hcin_decomp_mode == 1) { 1908 h_np_size = (h_np_size * 3 + 3) / 4; 1909 h_p_size = (h_p_size * 3 + 3) / 4; 1910 } else if (fcw->hcin_decomp_mode == 4) { 1911 h_np_size = h_np_size / 2; 1912 h_p_size = h_p_size / 2; 1913 } 1914 dec->harq_combined_output.length = h_np_size; 1915 acc_dma_fill_blk_type( 1916 desc, 1917 dec->harq_combined_output.data, 1918 dec->harq_combined_output.offset, 1919 h_p_size, 1920 next_triplet, 1921 ACC_DMA_BLKID_OUT_HARQ); 1922 1923 next_triplet++; 1924 } 1925 1926 *h_out_length = output_length >> 3; 1927 dec->hard_output.length += *h_out_length; 1928 *h_out_offset += *h_out_length; 1929 desc->data_ptrs[next_triplet - 1].last = 1; 1930 desc->d2mlen = next_triplet - desc->m2dlen; 1931 1932 desc->op_addr = op; 1933 1934 return 0; 1935 } 1936 1937 static inline void 1938 vrb_dma_desc_ld_update(struct rte_bbdev_dec_op *op, 1939 struct acc_dma_req_desc *desc, 1940 struct rte_mbuf *input, struct rte_mbuf *h_output, 1941 uint32_t *in_offset, uint32_t *h_out_offset, 1942 uint32_t *h_out_length, 1943 union acc_harq_layout_data *harq_layout) 1944 { 1945 int next_triplet = 1; /* FCW already done. */ 1946 desc->data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset(input, *in_offset); 1947 next_triplet++; 1948 1949 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { 1950 struct rte_bbdev_op_data hi = op->ldpc_dec.harq_combined_input; 1951 desc->data_ptrs[next_triplet].address = 1952 rte_pktmbuf_iova_offset(hi.data, hi.offset); 1953 next_triplet++; 1954 } 1955 1956 desc->data_ptrs[next_triplet].address = 1957 rte_pktmbuf_iova_offset(h_output, *h_out_offset); 1958 *h_out_length = desc->data_ptrs[next_triplet].blen; 1959 next_triplet++; 1960 1961 if (check_bit(op->ldpc_dec.op_flags, 1962 RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { 1963 /* Adjust based on previous operation. */ 1964 struct rte_bbdev_dec_op *prev_op = desc->op_addr; 1965 op->ldpc_dec.harq_combined_output.length = 1966 prev_op->ldpc_dec.harq_combined_output.length; 1967 uint32_t harq_idx = hq_index(op->ldpc_dec.harq_combined_output.offset); 1968 uint32_t prev_harq_idx = hq_index(prev_op->ldpc_dec.harq_combined_output.offset); 1969 harq_layout[harq_idx].val = harq_layout[prev_harq_idx].val; 1970 struct rte_bbdev_op_data ho = op->ldpc_dec.harq_combined_output; 1971 desc->data_ptrs[next_triplet].address = 1972 rte_pktmbuf_iova_offset(ho.data, ho.offset); 1973 next_triplet++; 1974 } 1975 1976 op->ldpc_dec.hard_output.length += *h_out_length; 1977 desc->op_addr = op; 1978 } 1979 1980 /* Enqueue one encode operations for device in CB mode. */ 1981 static inline int 1982 enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 1983 uint16_t total_enqueued_cbs) 1984 { 1985 union acc_dma_desc *desc = NULL; 1986 int ret; 1987 uint32_t in_offset, out_offset, out_length, mbuf_total_left, seg_total_left; 1988 struct rte_mbuf *input, *output_head, *output; 1989 1990 desc = acc_desc(q, total_enqueued_cbs); 1991 acc_fcw_te_fill(op, &desc->req.fcw_te); 1992 1993 input = op->turbo_enc.input.data; 1994 output_head = output = op->turbo_enc.output.data; 1995 in_offset = op->turbo_enc.input.offset; 1996 out_offset = op->turbo_enc.output.offset; 1997 out_length = 0; 1998 mbuf_total_left = op->turbo_enc.input.length; 1999 seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data) - in_offset; 2000 2001 ret = acc_dma_desc_te_fill(op, &desc->req, &input, output, 2002 &in_offset, &out_offset, &out_length, &mbuf_total_left, 2003 &seg_total_left, 0); 2004 2005 if (unlikely(ret < 0)) 2006 return ret; 2007 2008 mbuf_append(output_head, output, out_length); 2009 2010 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2011 rte_memdump(stderr, "FCW", &desc->req.fcw_te, 2012 sizeof(desc->req.fcw_te) - 8); 2013 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2014 #endif 2015 /* One CB (one op) was successfully prepared to enqueue */ 2016 return 1; 2017 } 2018 2019 /* Enqueue one encode operations for device in CB mode 2020 * multiplexed on the same descriptor. 2021 */ 2022 static inline int 2023 enqueue_ldpc_enc_n_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ops, 2024 uint16_t total_enqueued_descs, int16_t num) 2025 { 2026 union acc_dma_desc *desc = NULL; 2027 uint32_t out_length; 2028 struct rte_mbuf *output_head, *output; 2029 int i, next_triplet; 2030 uint16_t in_length_in_bytes; 2031 struct rte_bbdev_op_ldpc_enc *enc = &ops[0]->ldpc_enc; 2032 struct acc_ptrs *context_ptrs; 2033 2034 desc = acc_desc(q, total_enqueued_descs); 2035 acc_fcw_le_fill(ops[0], &desc->req.fcw_le, num, 0); 2036 2037 /** This could be done at polling. */ 2038 acc_header_init(&desc->req); 2039 desc->req.numCBs = num; 2040 desc->req.dltb = 0; 2041 2042 in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len; 2043 out_length = (enc->cb_params.e + 7) >> 3; 2044 desc->req.m2dlen = 1 + num; 2045 desc->req.d2mlen = num; 2046 next_triplet = 1; 2047 2048 for (i = 0; i < num; i++) { 2049 desc->req.data_ptrs[next_triplet].address = 2050 rte_pktmbuf_iova_offset(ops[i]->ldpc_enc.input.data, 0); 2051 desc->req.data_ptrs[next_triplet].blen = in_length_in_bytes; 2052 next_triplet++; 2053 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2054 ops[i]->ldpc_enc.output.data, 0); 2055 desc->req.data_ptrs[next_triplet].blen = out_length; 2056 next_triplet++; 2057 ops[i]->ldpc_enc.output.length = out_length; 2058 output_head = output = ops[i]->ldpc_enc.output.data; 2059 mbuf_append(output_head, output, out_length); 2060 output->data_len = out_length; 2061 } 2062 2063 desc->req.op_addr = ops[0]; 2064 /* Keep track of pointers even when multiplexed in single descriptor. */ 2065 context_ptrs = q->companion_ring_addr + acc_desc_idx(q, total_enqueued_descs); 2066 for (i = 0; i < num; i++) 2067 context_ptrs->ptr[i].op_addr = ops[i]; 2068 2069 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2070 rte_memdump(stderr, "FCW", &desc->req.fcw_le, 2071 sizeof(desc->req.fcw_le) - 8); 2072 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2073 #endif 2074 2075 /* Number of compatible CBs/ops successfully prepared to enqueue. */ 2076 return num; 2077 } 2078 2079 /* Enqueue one encode operations for VRB1 device for a partial TB 2080 * all codes blocks have same configuration multiplexed on the same descriptor. 2081 */ 2082 static inline void 2083 vrb1_enqueue_ldpc_enc_part_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2084 uint16_t total_enqueued_descs, int16_t num_cbs, uint32_t e, 2085 uint16_t in_len_B, uint32_t out_len_B, uint32_t *in_offset, 2086 uint32_t *out_offset) 2087 { 2088 2089 union acc_dma_desc *desc = NULL; 2090 struct rte_mbuf *output_head, *output; 2091 int i, next_triplet; 2092 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 2093 2094 desc = acc_desc(q, total_enqueued_descs); 2095 acc_fcw_le_fill(op, &desc->req.fcw_le, num_cbs, e); 2096 2097 /** This could be done at polling. */ 2098 acc_header_init(&desc->req); 2099 desc->req.numCBs = num_cbs; 2100 2101 desc->req.m2dlen = 1 + num_cbs; 2102 desc->req.d2mlen = num_cbs; 2103 next_triplet = 1; 2104 2105 for (i = 0; i < num_cbs; i++) { 2106 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2107 enc->input.data, *in_offset); 2108 *in_offset += in_len_B; 2109 desc->req.data_ptrs[next_triplet].blen = in_len_B; 2110 next_triplet++; 2111 desc->req.data_ptrs[next_triplet].address = rte_pktmbuf_iova_offset( 2112 enc->output.data, *out_offset); 2113 *out_offset += out_len_B; 2114 desc->req.data_ptrs[next_triplet].blen = out_len_B; 2115 next_triplet++; 2116 enc->output.length += out_len_B; 2117 output_head = output = enc->output.data; 2118 mbuf_append(output_head, output, out_len_B); 2119 } 2120 2121 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2122 rte_memdump(stderr, "FCW", &desc->req.fcw_le, 2123 sizeof(desc->req.fcw_le) - 8); 2124 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2125 #endif 2126 2127 } 2128 2129 /* Enqueue one encode operations for device in TB mode. */ 2130 static inline int 2131 enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2132 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2133 { 2134 union acc_dma_desc *desc = NULL; 2135 int ret; 2136 uint8_t r, c; 2137 uint32_t in_offset, out_offset, out_length, mbuf_total_left, 2138 seg_total_left; 2139 struct rte_mbuf *input, *output_head, *output; 2140 uint16_t desc_idx, current_enqueued_cbs = 0; 2141 uint64_t fcw_offset; 2142 2143 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2144 desc = q->ring_addr + desc_idx; 2145 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2146 acc_fcw_te_fill(op, &desc->req.fcw_te); 2147 2148 input = op->turbo_enc.input.data; 2149 output_head = output = op->turbo_enc.output.data; 2150 in_offset = op->turbo_enc.input.offset; 2151 out_offset = op->turbo_enc.output.offset; 2152 out_length = 0; 2153 mbuf_total_left = op->turbo_enc.input.length; 2154 2155 c = op->turbo_enc.tb_params.c; 2156 r = op->turbo_enc.tb_params.r; 2157 2158 while (mbuf_total_left > 0 && r < c) { 2159 if (unlikely((input == NULL) || (output == NULL))) 2160 return -1; 2161 2162 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2163 /* Set up DMA descriptor */ 2164 desc = acc_desc(q, total_enqueued_cbs); 2165 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2166 desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN; 2167 2168 ret = acc_dma_desc_te_fill(op, &desc->req, &input, output, 2169 &in_offset, &out_offset, &out_length, 2170 &mbuf_total_left, &seg_total_left, r); 2171 if (unlikely(ret < 0)) 2172 return ret; 2173 mbuf_append(output_head, output, out_length); 2174 2175 /* Set total number of CBs in TB */ 2176 desc->req.cbs_in_tb = cbs_in_tb; 2177 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2178 rte_memdump(stderr, "FCW", &desc->req.fcw_te, 2179 sizeof(desc->req.fcw_te) - 8); 2180 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2181 #endif 2182 2183 if (seg_total_left == 0) { 2184 /* Go to the next mbuf */ 2185 input = input->next; 2186 in_offset = 0; 2187 output = output->next; 2188 out_offset = 0; 2189 } 2190 2191 total_enqueued_cbs++; 2192 current_enqueued_cbs++; 2193 r++; 2194 } 2195 2196 /* In case the number of CB doesn't match, the configuration was invalid. */ 2197 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2198 return -1; 2199 2200 /* Set SDone on last CB descriptor for TB mode. */ 2201 desc->req.sdone_enable = 1; 2202 2203 return current_enqueued_cbs; 2204 } 2205 2206 /* Enqueue one encode operations for device in TB mode. 2207 * returns the number of descs used. 2208 */ 2209 static inline int 2210 vrb1_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2211 uint16_t enq_descs, uint8_t cbs_in_tb) 2212 { 2213 uint8_t num_a, num_b; 2214 uint16_t input_len_B, return_descs; 2215 uint8_t r = op->ldpc_enc.tb_params.r; 2216 uint8_t cab = op->ldpc_enc.tb_params.cab; 2217 union acc_dma_desc *desc; 2218 uint16_t init_enq_descs = enq_descs; 2219 uint32_t in_offset = 0, out_offset = 0; 2220 2221 input_len_B = ((op->ldpc_enc.basegraph == 1 ? 22 : 10) * op->ldpc_enc.z_c 2222 - op->ldpc_enc.n_filler) >> 3; 2223 2224 if (check_bit(op->ldpc_enc.op_flags, RTE_BBDEV_LDPC_CRC_24B_ATTACH)) 2225 input_len_B -= 3; 2226 2227 if (r < cab) { 2228 num_a = cab - r; 2229 num_b = cbs_in_tb - cab; 2230 } else { 2231 num_a = 0; 2232 num_b = cbs_in_tb - r; 2233 } 2234 2235 while (num_a > 0) { 2236 uint32_t e = op->ldpc_enc.tb_params.ea; 2237 uint32_t out_len_B = (e + 7) >> 3; 2238 uint8_t enq = RTE_MIN(num_a, ACC_MUX_5GDL_DESC); 2239 num_a -= enq; 2240 vrb1_enqueue_ldpc_enc_part_tb(q, op, enq_descs, enq, e, input_len_B, 2241 out_len_B, &in_offset, &out_offset); 2242 enq_descs++; 2243 } 2244 while (num_b > 0) { 2245 uint32_t e = op->ldpc_enc.tb_params.eb; 2246 uint32_t out_len_B = (e + 7) >> 3; 2247 uint8_t enq = RTE_MIN(num_b, ACC_MUX_5GDL_DESC); 2248 num_b -= enq; 2249 vrb1_enqueue_ldpc_enc_part_tb(q, op, enq_descs, enq, e, input_len_B, 2250 out_len_B, &in_offset, &out_offset); 2251 enq_descs++; 2252 } 2253 2254 return_descs = enq_descs - init_enq_descs; 2255 /* Keep total number of CBs in first TB. */ 2256 desc = acc_desc(q, init_enq_descs); 2257 desc->req.cbs_in_tb = return_descs; /** Actual number of descriptors. */ 2258 desc->req.op_addr = op; 2259 2260 /* Set SDone on last CB descriptor for TB mode. */ 2261 desc = acc_desc(q, enq_descs - 1); 2262 desc->req.sdone_enable = 1; 2263 desc->req.op_addr = op; 2264 return return_descs; 2265 } 2266 2267 /* Fill in a frame control word for LDPC encoding. */ 2268 static inline void 2269 vrb2_fcw_letb_fill(const struct rte_bbdev_enc_op *op, struct acc_fcw_le *fcw) 2270 { 2271 fcw->qm = op->ldpc_enc.q_m; 2272 fcw->nfiller = op->ldpc_enc.n_filler; 2273 fcw->BG = (op->ldpc_enc.basegraph - 1); 2274 fcw->Zc = op->ldpc_enc.z_c; 2275 fcw->ncb = op->ldpc_enc.n_cb; 2276 fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph, 2277 op->ldpc_enc.rv_index); 2278 fcw->rm_e = op->ldpc_enc.tb_params.ea; 2279 fcw->rm_e_b = op->ldpc_enc.tb_params.eb; 2280 fcw->crc_select = check_bit(op->ldpc_enc.op_flags, 2281 RTE_BBDEV_LDPC_CRC_24B_ATTACH); 2282 fcw->bypass_intlv = 0; 2283 if (op->ldpc_enc.tb_params.c > 1) { 2284 fcw->mcb_count = 0; 2285 fcw->C = op->ldpc_enc.tb_params.c; 2286 fcw->Cab = op->ldpc_enc.tb_params.cab; 2287 } else { 2288 fcw->mcb_count = 1; 2289 fcw->C = 0; 2290 } 2291 } 2292 2293 /* Enqueue one encode operations for device in TB mode. 2294 * returns the number of descs used. 2295 */ 2296 static inline int 2297 vrb2_enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op, 2298 uint16_t enq_descs) 2299 { 2300 union acc_dma_desc *desc = NULL; 2301 uint32_t in_offset, out_offset, out_length, seg_total_left; 2302 struct rte_mbuf *input, *output_head, *output; 2303 struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; 2304 int next_triplet = 1; /* FCW already done. */ 2305 uint32_t in_length_in_bytes; 2306 uint16_t K, in_length_in_bits; 2307 2308 desc = acc_desc(q, enq_descs); 2309 vrb2_fcw_letb_fill(op, &desc->req.fcw_le); 2310 2311 input = enc->input.data; 2312 output_head = output = enc->output.data; 2313 in_offset = enc->input.offset; 2314 out_offset = enc->output.offset; 2315 seg_total_left = rte_pktmbuf_data_len(enc->input.data) - in_offset; 2316 2317 acc_header_init(&desc->req); 2318 K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; 2319 in_length_in_bits = K - enc->n_filler; 2320 if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) || 2321 (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH)) 2322 in_length_in_bits -= 24; 2323 in_length_in_bytes = (in_length_in_bits >> 3) * enc->tb_params.c; 2324 2325 next_triplet = acc_dma_fill_blk_type_in(&desc->req, &input, &in_offset, 2326 in_length_in_bytes, &seg_total_left, next_triplet, 2327 check_bit(enc->op_flags, RTE_BBDEV_LDPC_ENC_SCATTER_GATHER)); 2328 if (unlikely(next_triplet < 0)) { 2329 rte_bbdev_log(ERR, 2330 "Mismatch between data to process and mbuf data length in bbdev_op: %p", 2331 op); 2332 return -1; 2333 } 2334 desc->req.data_ptrs[next_triplet - 1].last = 1; 2335 desc->req.m2dlen = next_triplet; 2336 2337 /* Set output length */ 2338 /* Integer round up division by 8 */ 2339 out_length = (enc->tb_params.ea * enc->tb_params.cab + 2340 enc->tb_params.eb * (enc->tb_params.c - enc->tb_params.cab) + 7) >> 3; 2341 2342 next_triplet = acc_dma_fill_blk_type(&desc->req, output, out_offset, 2343 out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC); 2344 enc->output.length = out_length; 2345 out_offset += out_length; 2346 desc->req.data_ptrs[next_triplet - 1].last = 1; 2347 desc->req.data_ptrs[next_triplet - 1].dma_ext = 0; 2348 desc->req.d2mlen = next_triplet - desc->req.m2dlen; 2349 desc->req.numCBs = enc->tb_params.c; 2350 if (desc->req.numCBs > 1) 2351 desc->req.dltb = 1; 2352 desc->req.op_addr = op; 2353 2354 if (out_length < ACC_MAX_E_MBUF) 2355 mbuf_append(output_head, output, out_length); 2356 2357 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2358 rte_memdump(stderr, "FCW", &desc->req.fcw_le, sizeof(desc->req.fcw_le)); 2359 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2360 #endif 2361 /* One CB (one op) was successfully prepared to enqueue */ 2362 return 1; 2363 } 2364 2365 /** Enqueue one decode operations for device in CB mode. */ 2366 static inline int 2367 enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2368 uint16_t total_enqueued_cbs) 2369 { 2370 union acc_dma_desc *desc = NULL; 2371 int ret; 2372 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length, 2373 h_out_length, mbuf_total_left, seg_total_left; 2374 struct rte_mbuf *input, *h_output_head, *h_output, 2375 *s_output_head, *s_output; 2376 2377 if ((q->d->device_variant == VRB1_VARIANT) && 2378 (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))) { 2379 /* SO not supported for VRB1. */ 2380 return -EPERM; 2381 } 2382 2383 desc = acc_desc(q, total_enqueued_cbs); 2384 vrb_fcw_td_fill(op, &desc->req.fcw_td); 2385 2386 input = op->turbo_dec.input.data; 2387 h_output_head = h_output = op->turbo_dec.hard_output.data; 2388 s_output_head = s_output = op->turbo_dec.soft_output.data; 2389 in_offset = op->turbo_dec.input.offset; 2390 h_out_offset = op->turbo_dec.hard_output.offset; 2391 s_out_offset = op->turbo_dec.soft_output.offset; 2392 h_out_length = s_out_length = 0; 2393 mbuf_total_left = op->turbo_dec.input.length; 2394 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2395 2396 /* Set up DMA descriptor */ 2397 desc = acc_desc(q, total_enqueued_cbs); 2398 2399 ret = vrb_dma_desc_td_fill(op, &desc->req, &input, h_output, 2400 s_output, &in_offset, &h_out_offset, &s_out_offset, 2401 &h_out_length, &s_out_length, &mbuf_total_left, 2402 &seg_total_left, 0); 2403 2404 if (unlikely(ret < 0)) 2405 return ret; 2406 2407 /* Hard output */ 2408 mbuf_append(h_output_head, h_output, h_out_length); 2409 2410 /* Soft output */ 2411 if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) 2412 mbuf_append(s_output_head, s_output, s_out_length); 2413 2414 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2415 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2416 sizeof(desc->req.fcw_td)); 2417 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2418 #endif 2419 2420 /* One CB (one op) was successfully prepared to enqueue */ 2421 return 1; 2422 } 2423 2424 /** Enqueue one decode operations for device in CB mode. */ 2425 static inline int 2426 vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2427 uint16_t total_enqueued_cbs, bool same_op) 2428 { 2429 int ret, hq_len; 2430 union acc_dma_desc *desc; 2431 struct rte_mbuf *input, *h_output_head, *h_output; 2432 uint32_t in_offset, h_out_offset, mbuf_total_left, h_out_length = 0; 2433 union acc_harq_layout_data *harq_layout; 2434 2435 if (op->ldpc_dec.cb_params.e == 0) 2436 return -EINVAL; 2437 2438 desc = acc_desc(q, total_enqueued_cbs); 2439 2440 input = op->ldpc_dec.input.data; 2441 h_output_head = h_output = op->ldpc_dec.hard_output.data; 2442 in_offset = op->ldpc_dec.input.offset; 2443 h_out_offset = op->ldpc_dec.hard_output.offset; 2444 mbuf_total_left = op->ldpc_dec.input.length; 2445 harq_layout = q->d->harq_layout; 2446 2447 if (same_op) { 2448 union acc_dma_desc *prev_desc; 2449 prev_desc = acc_desc(q, total_enqueued_cbs - 1); 2450 uint8_t *prev_ptr = (uint8_t *) prev_desc; 2451 uint8_t *new_ptr = (uint8_t *) desc; 2452 /* Copy first 4 words and BDESCs. */ 2453 rte_memcpy(new_ptr, prev_ptr, ACC_5GUL_SIZE_0); 2454 rte_memcpy(new_ptr + ACC_5GUL_OFFSET_0, 2455 prev_ptr + ACC_5GUL_OFFSET_0, 2456 ACC_5GUL_SIZE_1); 2457 desc->req.op_addr = prev_desc->req.op_addr; 2458 /* Copy FCW. */ 2459 rte_memcpy(new_ptr + ACC_DESC_FCW_OFFSET, 2460 prev_ptr + ACC_DESC_FCW_OFFSET, 2461 ACC_FCW_LD_BLEN); 2462 vrb_dma_desc_ld_update(op, &desc->req, input, h_output, 2463 &in_offset, &h_out_offset, 2464 &h_out_length, harq_layout); 2465 } else { 2466 struct acc_fcw_ld *fcw; 2467 uint32_t seg_total_left; 2468 fcw = &desc->req.fcw_ld; 2469 vrb_fcw_ld_fill(op, fcw, harq_layout, q->d->device_variant); 2470 2471 /* Special handling when using mbuf or not. */ 2472 if (check_bit(op->ldpc_dec.op_flags, 2473 RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)) 2474 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2475 else 2476 seg_total_left = fcw->rm_e; 2477 2478 ret = vrb_dma_desc_ld_fill(op, &desc->req, &input, h_output, 2479 &in_offset, &h_out_offset, 2480 &h_out_length, &mbuf_total_left, 2481 &seg_total_left, fcw, q->d->device_variant); 2482 if (unlikely(ret < 0)) 2483 return ret; 2484 } 2485 2486 /* Hard output. */ 2487 mbuf_append(h_output_head, h_output, h_out_length); 2488 if (op->ldpc_dec.harq_combined_output.length > 0) { 2489 /* Push the HARQ output into host memory. */ 2490 struct rte_mbuf *hq_output_head, *hq_output; 2491 hq_output_head = op->ldpc_dec.harq_combined_output.data; 2492 hq_output = op->ldpc_dec.harq_combined_output.data; 2493 hq_len = op->ldpc_dec.harq_combined_output.length; 2494 if (unlikely(!mbuf_append(hq_output_head, hq_output, hq_len))) { 2495 rte_bbdev_log(ERR, "HARQ output mbuf issue %d %d\n", 2496 hq_output->buf_len, 2497 hq_len); 2498 return -1; 2499 } 2500 } 2501 2502 if (op->ldpc_dec.soft_output.length > 0) 2503 mbuf_append(op->ldpc_dec.soft_output.data, op->ldpc_dec.soft_output.data, 2504 op->ldpc_dec.soft_output.length); 2505 2506 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2507 rte_memdump(stderr, "FCW", &desc->req.fcw_ld, 2508 sizeof(desc->req.fcw_ld) - 8); 2509 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2510 #endif 2511 2512 /* One CB (one op) was successfully prepared to enqueue. */ 2513 return 1; 2514 } 2515 2516 2517 /* Enqueue one decode operations for device in TB mode. */ 2518 static inline int 2519 vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2520 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2521 { 2522 union acc_dma_desc *desc = NULL; 2523 union acc_dma_desc *desc_first = NULL; 2524 int ret; 2525 uint8_t r, c; 2526 uint32_t in_offset, h_out_offset, h_out_length, mbuf_total_left, seg_total_left; 2527 struct rte_mbuf *input, *h_output_head, *h_output; 2528 uint16_t current_enqueued_cbs = 0; 2529 uint16_t desc_idx, sys_cols, trail_len = 0; 2530 uint64_t fcw_offset; 2531 union acc_harq_layout_data *harq_layout; 2532 2533 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2534 desc = q->ring_addr + desc_idx; 2535 desc_first = desc; 2536 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2537 harq_layout = q->d->harq_layout; 2538 2539 vrb_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout, q->d->device_variant); 2540 2541 input = op->ldpc_dec.input.data; 2542 h_output_head = h_output = op->ldpc_dec.hard_output.data; 2543 in_offset = op->ldpc_dec.input.offset; 2544 h_out_offset = op->ldpc_dec.hard_output.offset; 2545 h_out_length = 0; 2546 mbuf_total_left = op->ldpc_dec.input.length; 2547 c = op->ldpc_dec.tb_params.c; 2548 r = op->ldpc_dec.tb_params.r; 2549 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) { 2550 sys_cols = (op->ldpc_dec.basegraph == 1) ? 22 : 10; 2551 trail_len = sys_cols * op->ldpc_dec.z_c - 2552 op->ldpc_dec.n_filler - 24; 2553 } 2554 2555 while (mbuf_total_left > 0 && r < c) { 2556 if (unlikely((input == NULL) || (h_output == NULL))) 2557 return -1; 2558 2559 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_DEC_SCATTER_GATHER)) 2560 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2561 else 2562 seg_total_left = op->ldpc_dec.input.length; 2563 /* Set up DMA descriptor. */ 2564 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2565 desc = q->ring_addr + desc_idx; 2566 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2567 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2568 desc->req.data_ptrs[0].blen = ACC_FCW_LD_BLEN; 2569 rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld, ACC_FCW_LD_BLEN); 2570 desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len; 2571 ret = vrb_dma_desc_ld_fill(op, &desc->req, &input, 2572 h_output, &in_offset, &h_out_offset, 2573 &h_out_length, 2574 &mbuf_total_left, &seg_total_left, 2575 &desc->req.fcw_ld, q->d->device_variant); 2576 2577 if (unlikely(ret < 0)) 2578 return ret; 2579 2580 /* Hard output. */ 2581 mbuf_append(h_output_head, h_output, h_out_length); 2582 2583 /* Set total number of CBs in TB. */ 2584 desc->req.cbs_in_tb = cbs_in_tb; 2585 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2586 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2587 sizeof(desc->req.fcw_td) - 8); 2588 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2589 #endif 2590 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_DEC_SCATTER_GATHER) 2591 && (seg_total_left == 0)) { 2592 /* Go to the next mbuf. */ 2593 input = input->next; 2594 in_offset = 0; 2595 h_output = h_output->next; 2596 h_out_offset = 0; 2597 } 2598 total_enqueued_cbs++; 2599 current_enqueued_cbs++; 2600 r++; 2601 } 2602 2603 /* In case the number of CB doesn't match, the configuration was invalid. */ 2604 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2605 return -1; 2606 2607 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2608 if (check_mbuf_total_left(mbuf_total_left) != 0) 2609 return -EINVAL; 2610 #endif 2611 /* Set SDone on last CB descriptor for TB mode. */ 2612 desc->req.sdone_enable = 1; 2613 2614 return current_enqueued_cbs; 2615 } 2616 2617 /* Enqueue one decode operations for device in TB mode. */ 2618 static inline int 2619 enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op, 2620 uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) 2621 { 2622 union acc_dma_desc *desc = NULL; 2623 int ret; 2624 uint8_t r, c; 2625 uint32_t in_offset, h_out_offset, s_out_offset, s_out_length, 2626 h_out_length, mbuf_total_left, seg_total_left; 2627 struct rte_mbuf *input, *h_output_head, *h_output, 2628 *s_output_head, *s_output; 2629 uint16_t desc_idx, current_enqueued_cbs = 0; 2630 uint64_t fcw_offset; 2631 2632 desc_idx = acc_desc_idx(q, total_enqueued_cbs); 2633 desc = q->ring_addr + desc_idx; 2634 fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET; 2635 vrb_fcw_td_fill(op, &desc->req.fcw_td); 2636 2637 input = op->turbo_dec.input.data; 2638 h_output_head = h_output = op->turbo_dec.hard_output.data; 2639 s_output_head = s_output = op->turbo_dec.soft_output.data; 2640 in_offset = op->turbo_dec.input.offset; 2641 h_out_offset = op->turbo_dec.hard_output.offset; 2642 s_out_offset = op->turbo_dec.soft_output.offset; 2643 h_out_length = s_out_length = 0; 2644 mbuf_total_left = op->turbo_dec.input.length; 2645 c = op->turbo_dec.tb_params.c; 2646 r = op->turbo_dec.tb_params.r; 2647 2648 while (mbuf_total_left > 0 && r < c) { 2649 if (unlikely((input == NULL) || (h_output == NULL))) 2650 return -1; 2651 2652 seg_total_left = rte_pktmbuf_data_len(input) - in_offset; 2653 2654 /* Set up DMA descriptor */ 2655 desc = acc_desc(q, total_enqueued_cbs); 2656 desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset; 2657 desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN; 2658 ret = vrb_dma_desc_td_fill(op, &desc->req, &input, 2659 h_output, s_output, &in_offset, &h_out_offset, 2660 &s_out_offset, &h_out_length, &s_out_length, 2661 &mbuf_total_left, &seg_total_left, r); 2662 2663 if (unlikely(ret < 0)) 2664 return ret; 2665 2666 /* Hard output */ 2667 mbuf_append(h_output_head, h_output, h_out_length); 2668 2669 /* Soft output */ 2670 if (check_bit(op->turbo_dec.op_flags, 2671 RTE_BBDEV_TURBO_SOFT_OUTPUT)) 2672 mbuf_append(s_output_head, s_output, s_out_length); 2673 2674 /* Set total number of CBs in TB */ 2675 desc->req.cbs_in_tb = cbs_in_tb; 2676 #ifdef RTE_LIBRTE_BBDEV_DEBUG 2677 rte_memdump(stderr, "FCW", &desc->req.fcw_td, 2678 sizeof(desc->req.fcw_td) - 8); 2679 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 2680 #endif 2681 2682 if (seg_total_left == 0) { 2683 /* Go to the next mbuf */ 2684 input = input->next; 2685 in_offset = 0; 2686 h_output = h_output->next; 2687 h_out_offset = 0; 2688 2689 if (check_bit(op->turbo_dec.op_flags, 2690 RTE_BBDEV_TURBO_SOFT_OUTPUT)) { 2691 s_output = s_output->next; 2692 s_out_offset = 0; 2693 } 2694 } 2695 2696 total_enqueued_cbs++; 2697 current_enqueued_cbs++; 2698 r++; 2699 } 2700 2701 /* In case the number of CB doesn't match, the configuration was invalid. */ 2702 if (unlikely(current_enqueued_cbs != cbs_in_tb)) 2703 return -1; 2704 2705 /* Set SDone on last CB descriptor for TB mode */ 2706 desc->req.sdone_enable = 1; 2707 2708 return current_enqueued_cbs; 2709 } 2710 2711 /* Enqueue encode operations for device in CB mode. */ 2712 static uint16_t 2713 vrb_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data, 2714 struct rte_bbdev_enc_op **ops, uint16_t num) 2715 { 2716 struct acc_queue *q = q_data->queue_private; 2717 int32_t avail = acc_ring_avail_enq(q); 2718 uint16_t i; 2719 int ret; 2720 2721 for (i = 0; i < num; ++i) { 2722 /* Check if there are available space for further processing */ 2723 if (unlikely(avail - 1 < 0)) { 2724 acc_enqueue_ring_full(q_data); 2725 break; 2726 } 2727 avail -= 1; 2728 2729 ret = enqueue_enc_one_op_cb(q, ops[i], i); 2730 if (ret < 0) { 2731 acc_enqueue_invalid(q_data); 2732 break; 2733 } 2734 } 2735 2736 if (unlikely(i == 0)) 2737 return 0; /* Nothing to enqueue */ 2738 2739 acc_dma_enqueue(q, i, &q_data->queue_stats); 2740 2741 /* Update stats */ 2742 q_data->queue_stats.enqueued_count += i; 2743 q_data->queue_stats.enqueue_err_count += num - i; 2744 return i; 2745 } 2746 2747 /** Enqueue encode operations for device in CB mode. */ 2748 static inline uint16_t 2749 vrb_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data, 2750 struct rte_bbdev_enc_op **ops, uint16_t num) 2751 { 2752 struct acc_queue *q = q_data->queue_private; 2753 int32_t avail = acc_ring_avail_enq(q); 2754 uint16_t i = 0; 2755 int ret, desc_idx = 0; 2756 int16_t enq, left = num; 2757 2758 while (left > 0) { 2759 if (unlikely(avail < 1)) { 2760 acc_enqueue_ring_full(q_data); 2761 break; 2762 } 2763 avail--; 2764 enq = RTE_MIN(left, ACC_MUX_5GDL_DESC); 2765 enq = check_mux(&ops[i], enq); 2766 ret = enqueue_ldpc_enc_n_op_cb(q, &ops[i], desc_idx, enq); 2767 if (ret < 0) { 2768 acc_enqueue_invalid(q_data); 2769 break; 2770 } 2771 i += enq; 2772 desc_idx++; 2773 left = num - i; 2774 } 2775 2776 if (unlikely(i == 0)) 2777 return 0; /* Nothing to enqueue. */ 2778 2779 acc_dma_enqueue(q, desc_idx, &q_data->queue_stats); 2780 2781 /* Update stats. */ 2782 q_data->queue_stats.enqueued_count += i; 2783 q_data->queue_stats.enqueue_err_count += num - i; 2784 2785 return i; 2786 } 2787 2788 /* Enqueue encode operations for device in TB mode. */ 2789 static uint16_t 2790 vrb_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data, 2791 struct rte_bbdev_enc_op **ops, uint16_t num) 2792 { 2793 struct acc_queue *q = q_data->queue_private; 2794 int32_t avail = acc_ring_avail_enq(q); 2795 uint16_t i, enqueued_cbs = 0; 2796 uint8_t cbs_in_tb; 2797 int ret; 2798 2799 for (i = 0; i < num; ++i) { 2800 cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc); 2801 /* Check if there are available space for further processing */ 2802 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 2803 acc_enqueue_ring_full(q_data); 2804 break; 2805 } 2806 avail -= cbs_in_tb; 2807 2808 ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb); 2809 if (ret <= 0) { 2810 acc_enqueue_invalid(q_data); 2811 break; 2812 } 2813 enqueued_cbs += ret; 2814 } 2815 if (unlikely(enqueued_cbs == 0)) 2816 return 0; /* Nothing to enqueue */ 2817 2818 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 2819 2820 /* Update stats */ 2821 q_data->queue_stats.enqueued_count += i; 2822 q_data->queue_stats.enqueue_err_count += num - i; 2823 2824 return i; 2825 } 2826 2827 /* Enqueue LDPC encode operations for device in TB mode. */ 2828 static uint16_t 2829 vrb_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data, 2830 struct rte_bbdev_enc_op **ops, uint16_t num) 2831 { 2832 struct acc_queue *q = q_data->queue_private; 2833 int32_t avail = acc_ring_avail_enq(q); 2834 uint16_t i, enqueued_descs = 0; 2835 uint8_t cbs_in_tb; 2836 int descs_used; 2837 2838 for (i = 0; i < num; ++i) { 2839 if (q->d->device_variant == VRB1_VARIANT) { 2840 cbs_in_tb = get_num_cbs_in_tb_ldpc_enc(&ops[i]->ldpc_enc); 2841 /* Check if there are available space for further processing. */ 2842 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 2843 acc_enqueue_ring_full(q_data); 2844 break; 2845 } 2846 descs_used = vrb1_enqueue_ldpc_enc_one_op_tb(q, ops[i], 2847 enqueued_descs, cbs_in_tb); 2848 } else { 2849 if (unlikely(avail < 1)) { 2850 acc_enqueue_ring_full(q_data); 2851 break; 2852 } 2853 descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs); 2854 } 2855 if (descs_used < 0) { 2856 acc_enqueue_invalid(q_data); 2857 break; 2858 } 2859 enqueued_descs += descs_used; 2860 avail -= descs_used; 2861 } 2862 if (unlikely(enqueued_descs == 0)) 2863 return 0; /* Nothing to enqueue. */ 2864 2865 acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats); 2866 2867 /* Update stats. */ 2868 q_data->queue_stats.enqueued_count += i; 2869 q_data->queue_stats.enqueue_err_count += num - i; 2870 2871 return i; 2872 } 2873 2874 /* Enqueue encode operations for device. */ 2875 static uint16_t 2876 vrb_enqueue_enc(struct rte_bbdev_queue_data *q_data, 2877 struct rte_bbdev_enc_op **ops, uint16_t num) 2878 { 2879 int32_t aq_avail = acc_aq_avail(q_data, num); 2880 if (unlikely((aq_avail <= 0) || (num == 0))) 2881 return 0; 2882 if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 2883 return vrb_enqueue_enc_tb(q_data, ops, num); 2884 else 2885 return vrb_enqueue_enc_cb(q_data, ops, num); 2886 } 2887 2888 /* Enqueue encode operations for device. */ 2889 static uint16_t 2890 vrb_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, 2891 struct rte_bbdev_enc_op **ops, uint16_t num) 2892 { 2893 int32_t aq_avail = acc_aq_avail(q_data, num); 2894 if (unlikely((aq_avail <= 0) || (num == 0))) 2895 return 0; 2896 if (ops[0]->ldpc_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 2897 return vrb_enqueue_ldpc_enc_tb(q_data, ops, num); 2898 else 2899 return vrb_enqueue_ldpc_enc_cb(q_data, ops, num); 2900 } 2901 2902 2903 /* Enqueue decode operations for device in CB mode. */ 2904 static uint16_t 2905 vrb_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data, 2906 struct rte_bbdev_dec_op **ops, uint16_t num) 2907 { 2908 struct acc_queue *q = q_data->queue_private; 2909 int32_t avail = acc_ring_avail_enq(q); 2910 uint16_t i; 2911 int ret; 2912 2913 for (i = 0; i < num; ++i) { 2914 /* Check if there are available space for further processing. */ 2915 if (unlikely(avail - 1 < 0)) 2916 break; 2917 avail -= 1; 2918 2919 ret = enqueue_dec_one_op_cb(q, ops[i], i); 2920 if (ret < 0) 2921 break; 2922 } 2923 2924 if (unlikely(i == 0)) 2925 return 0; /* Nothing to enqueue. */ 2926 2927 acc_dma_enqueue(q, i, &q_data->queue_stats); 2928 2929 /* Update stats. */ 2930 q_data->queue_stats.enqueued_count += i; 2931 q_data->queue_stats.enqueue_err_count += num - i; 2932 2933 return i; 2934 } 2935 2936 /* Enqueue decode operations for device in TB mode. */ 2937 static uint16_t 2938 vrb_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data, 2939 struct rte_bbdev_dec_op **ops, uint16_t num) 2940 { 2941 struct acc_queue *q = q_data->queue_private; 2942 int32_t avail = acc_ring_avail_enq(q); 2943 uint16_t i, enqueued_cbs = 0; 2944 uint8_t cbs_in_tb; 2945 int ret; 2946 2947 for (i = 0; i < num; ++i) { 2948 cbs_in_tb = get_num_cbs_in_tb_ldpc_dec(&ops[i]->ldpc_dec); 2949 /* Check if there are available space for further processing. */ 2950 if (unlikely((avail - cbs_in_tb < 0) || 2951 (cbs_in_tb == 0))) 2952 break; 2953 avail -= cbs_in_tb; 2954 2955 ret = vrb_enqueue_ldpc_dec_one_op_tb(q, ops[i], 2956 enqueued_cbs, cbs_in_tb); 2957 if (ret <= 0) 2958 break; 2959 enqueued_cbs += ret; 2960 } 2961 2962 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 2963 2964 /* Update stats. */ 2965 q_data->queue_stats.enqueued_count += i; 2966 q_data->queue_stats.enqueue_err_count += num - i; 2967 return i; 2968 } 2969 2970 /* Enqueue decode operations for device in CB mode. */ 2971 static uint16_t 2972 vrb_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data, 2973 struct rte_bbdev_dec_op **ops, uint16_t num) 2974 { 2975 struct acc_queue *q = q_data->queue_private; 2976 int32_t avail = acc_ring_avail_enq(q); 2977 uint16_t i; 2978 int ret; 2979 bool same_op = false; 2980 2981 for (i = 0; i < num; ++i) { 2982 /* Check if there are available space for further processing. */ 2983 if (unlikely(avail < 1)) { 2984 acc_enqueue_ring_full(q_data); 2985 break; 2986 } 2987 avail -= 1; 2988 rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d %d %d\n", 2989 i, ops[i]->ldpc_dec.op_flags, ops[i]->ldpc_dec.rv_index, 2990 ops[i]->ldpc_dec.iter_max, ops[i]->ldpc_dec.iter_count, 2991 ops[i]->ldpc_dec.basegraph, ops[i]->ldpc_dec.z_c, 2992 ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m, 2993 ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e, 2994 same_op); 2995 ret = vrb_enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op); 2996 if (ret < 0) { 2997 acc_enqueue_invalid(q_data); 2998 break; 2999 } 3000 } 3001 3002 if (unlikely(i == 0)) 3003 return 0; /* Nothing to enqueue. */ 3004 3005 acc_dma_enqueue(q, i, &q_data->queue_stats); 3006 3007 /* Update stats. */ 3008 q_data->queue_stats.enqueued_count += i; 3009 q_data->queue_stats.enqueue_err_count += num - i; 3010 return i; 3011 } 3012 3013 3014 /* Enqueue decode operations for device in TB mode. */ 3015 static uint16_t 3016 vrb_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data, 3017 struct rte_bbdev_dec_op **ops, uint16_t num) 3018 { 3019 struct acc_queue *q = q_data->queue_private; 3020 int32_t avail = acc_ring_avail_enq(q); 3021 uint16_t i, enqueued_cbs = 0; 3022 uint8_t cbs_in_tb; 3023 int ret; 3024 3025 for (i = 0; i < num; ++i) { 3026 cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec); 3027 /* Check if there are available space for further processing */ 3028 if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) { 3029 acc_enqueue_ring_full(q_data); 3030 break; 3031 } 3032 avail -= cbs_in_tb; 3033 3034 ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb); 3035 if (ret <= 0) { 3036 acc_enqueue_invalid(q_data); 3037 break; 3038 } 3039 enqueued_cbs += ret; 3040 } 3041 3042 acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats); 3043 3044 /* Update stats */ 3045 q_data->queue_stats.enqueued_count += i; 3046 q_data->queue_stats.enqueue_err_count += num - i; 3047 3048 return i; 3049 } 3050 3051 /* Enqueue decode operations for device. */ 3052 static uint16_t 3053 vrb_enqueue_dec(struct rte_bbdev_queue_data *q_data, 3054 struct rte_bbdev_dec_op **ops, uint16_t num) 3055 { 3056 int32_t aq_avail = acc_aq_avail(q_data, num); 3057 if (unlikely((aq_avail <= 0) || (num == 0))) 3058 return 0; 3059 if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3060 return vrb_enqueue_dec_tb(q_data, ops, num); 3061 else 3062 return vrb_enqueue_dec_cb(q_data, ops, num); 3063 } 3064 3065 /* Enqueue decode operations for device. */ 3066 static uint16_t 3067 vrb_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, 3068 struct rte_bbdev_dec_op **ops, uint16_t num) 3069 { 3070 int32_t aq_avail = acc_aq_avail(q_data, num); 3071 if (unlikely((aq_avail <= 0) || (num == 0))) 3072 return 0; 3073 if (ops[0]->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3074 return vrb_enqueue_ldpc_dec_tb(q_data, ops, num); 3075 else 3076 return vrb_enqueue_ldpc_dec_cb(q_data, ops, num); 3077 } 3078 3079 /* Update the operation status when dequeuing for any operation type. */ 3080 static inline void 3081 vrb_update_dequeued_operation(union acc_dma_desc *desc, union acc_dma_rsp_desc rsp, int *op_status, 3082 uint32_t *aq_dequeued, bool clear_rsp, bool clear_opstatus) 3083 { 3084 rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val); 3085 3086 /* Set status based on DMA response. */ 3087 if (clear_opstatus) 3088 *op_status = 0; 3089 *op_status |= ((rsp.input_err) ? (1 << RTE_BBDEV_DATA_ERROR) : 0); 3090 *op_status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0); 3091 *op_status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0); 3092 *op_status |= ((rsp.engine_hung) ? (1 << RTE_BBDEV_ENGINE_ERROR) : 0); 3093 3094 if (desc->req.last_desc_in_batch) { 3095 (*aq_dequeued)++; 3096 desc->req.last_desc_in_batch = 0; 3097 } 3098 3099 if (clear_rsp) { 3100 /* Clear response explicitly. */ 3101 desc->rsp.val = ACC_DMA_DESC_TYPE; 3102 desc->rsp.add_info_0 = 0; /* Reserved bits. */ 3103 desc->rsp.add_info_1 = 0; /* Reserved bits. */ 3104 } 3105 } 3106 3107 /* Dequeue one encode operations from device in CB mode. */ 3108 static inline int 3109 vrb_dequeue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3110 uint16_t *dequeued_ops, uint32_t *aq_dequeued, uint16_t *dequeued_descs, 3111 uint16_t max_requested_ops) 3112 { 3113 union acc_dma_desc *desc, atom_desc; 3114 union acc_dma_rsp_desc rsp; 3115 struct rte_bbdev_enc_op *op; 3116 int i; 3117 struct acc_ptrs *context_ptrs; 3118 uint16_t desc_idx; 3119 3120 desc_idx = acc_desc_idx_tail(q, *dequeued_descs); 3121 desc = q->ring_addr + desc_idx; 3122 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3123 3124 if (*dequeued_ops + desc->req.numCBs > max_requested_ops) 3125 return -1; 3126 3127 /* Check fdone bit. */ 3128 if (!(atom_desc.rsp.val & ACC_FDONE)) 3129 return -1; 3130 3131 rsp.val = atom_desc.rsp.val; 3132 3133 /* Dequeue. */ 3134 op = desc->req.op_addr; 3135 3136 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3137 3138 ref_op[0] = op; 3139 context_ptrs = q->companion_ring_addr + desc_idx; 3140 for (i = 1 ; i < desc->req.numCBs; i++) 3141 ref_op[i] = context_ptrs->ptr[i].op_addr; 3142 3143 /* One op was successfully dequeued. */ 3144 (*dequeued_descs)++; 3145 *dequeued_ops += desc->req.numCBs; 3146 return desc->req.numCBs; 3147 } 3148 3149 /* Dequeue one LDPC encode operations from VRB2 device in TB mode. */ 3150 static inline int 3151 vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3152 uint16_t *dequeued_ops, uint32_t *aq_dequeued, 3153 uint16_t *dequeued_descs) 3154 { 3155 union acc_dma_desc *desc, atom_desc; 3156 union acc_dma_rsp_desc rsp; 3157 struct rte_bbdev_enc_op *op; 3158 3159 desc = acc_desc_tail(q, *dequeued_descs); 3160 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3161 3162 /* Check fdone bit. */ 3163 if (!(atom_desc.rsp.val & ACC_FDONE)) 3164 return -1; 3165 3166 rsp.val = atom_desc.rsp.val; 3167 3168 /* Dequeue. */ 3169 op = desc->req.op_addr; 3170 3171 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3172 3173 /* One op was successfully dequeued */ 3174 ref_op[0] = op; 3175 (*dequeued_descs)++; 3176 (*dequeued_ops)++; 3177 return 1; 3178 } 3179 3180 /* Dequeue one LDPC encode operations from device in TB mode. 3181 * That operation may cover multiple descriptors. 3182 */ 3183 static inline int 3184 vrb_dequeue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **ref_op, 3185 uint16_t *dequeued_ops, uint32_t *aq_dequeued, 3186 uint16_t *dequeued_descs, uint16_t max_requested_ops) 3187 { 3188 union acc_dma_desc *desc, *last_desc, atom_desc; 3189 union acc_dma_rsp_desc rsp; 3190 struct rte_bbdev_enc_op *op; 3191 uint8_t i = 0; 3192 uint16_t current_dequeued_descs = 0, descs_in_tb; 3193 3194 desc = acc_desc_tail(q, *dequeued_descs); 3195 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3196 3197 if (*dequeued_ops + 1 > max_requested_ops) 3198 return -1; 3199 3200 /* Check fdone bit. */ 3201 if (!(atom_desc.rsp.val & ACC_FDONE)) 3202 return -1; 3203 3204 /* Get number of CBs in dequeued TB. */ 3205 descs_in_tb = desc->req.cbs_in_tb; 3206 /* Get last CB */ 3207 last_desc = acc_desc_tail(q, *dequeued_descs + descs_in_tb - 1); 3208 /* Check if last CB in TB is ready to dequeue (and thus 3209 * the whole TB) - checking sdone bit. If not return. 3210 */ 3211 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc, __ATOMIC_RELAXED); 3212 if (!(atom_desc.rsp.val & ACC_SDONE)) 3213 return -1; 3214 3215 /* Dequeue. */ 3216 op = desc->req.op_addr; 3217 3218 /* Clearing status, it will be set based on response. */ 3219 op->status = 0; 3220 3221 while (i < descs_in_tb) { 3222 desc = acc_desc_tail(q, *dequeued_descs); 3223 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3224 rsp.val = atom_desc.rsp.val; 3225 3226 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, false); 3227 3228 (*dequeued_descs)++; 3229 current_dequeued_descs++; 3230 i++; 3231 } 3232 3233 *ref_op = op; 3234 (*dequeued_ops)++; 3235 return current_dequeued_descs; 3236 } 3237 3238 /* Dequeue one decode operation from device in CB mode. */ 3239 static inline int 3240 vrb_dequeue_dec_one_op_cb(struct rte_bbdev_queue_data *q_data, 3241 struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3242 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3243 { 3244 union acc_dma_desc *desc, atom_desc; 3245 union acc_dma_rsp_desc rsp; 3246 struct rte_bbdev_dec_op *op; 3247 3248 desc = acc_desc_tail(q, dequeued_cbs); 3249 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3250 3251 /* Check fdone bit. */ 3252 if (!(atom_desc.rsp.val & ACC_FDONE)) 3253 return -1; 3254 3255 rsp.val = atom_desc.rsp.val; 3256 3257 /* Dequeue. */ 3258 op = desc->req.op_addr; 3259 3260 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, true); 3261 3262 if (op->status != 0) { 3263 /* These errors are not expected. */ 3264 q_data->queue_stats.dequeue_err_count++; 3265 vrb_check_ir(q->d); 3266 } 3267 3268 /* CRC invalid if error exists. */ 3269 if (!op->status) 3270 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3271 op->turbo_dec.iter_count = (uint8_t) rsp.iter_cnt; 3272 3273 desc->rsp.val = ACC_DMA_DESC_TYPE; 3274 desc->rsp.add_info_0 = 0; 3275 desc->rsp.add_info_1 = 0; 3276 *ref_op = op; 3277 3278 /* One CB (op) was successfully dequeued. */ 3279 return 1; 3280 } 3281 3282 /* Dequeue one decode operations from device in CB mode. */ 3283 static inline int 3284 vrb_dequeue_ldpc_dec_one_op_cb(struct rte_bbdev_queue_data *q_data, 3285 struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3286 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3287 { 3288 union acc_dma_desc *desc, atom_desc; 3289 union acc_dma_rsp_desc rsp; 3290 struct rte_bbdev_dec_op *op; 3291 3292 desc = acc_desc_tail(q, dequeued_cbs); 3293 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3294 3295 /* Check fdone bit. */ 3296 if (!(atom_desc.rsp.val & ACC_FDONE)) 3297 return -1; 3298 3299 rsp.val = atom_desc.rsp.val; 3300 rte_bbdev_log_debug("Resp. desc %p: %x %x %x\n", desc, rsp.val, desc->rsp.add_info_0, 3301 desc->rsp.add_info_1); 3302 3303 /* Dequeue. */ 3304 op = desc->req.op_addr; 3305 3306 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, true); 3307 3308 /* Additional op status update for LDPC Decoder. */ 3309 if (op->status != 0) 3310 q_data->queue_stats.dequeue_err_count++; 3311 3312 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3313 if (op->ldpc_dec.hard_output.length > 0 && !rsp.synd_ok) 3314 op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; 3315 3316 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || 3317 check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_16_CHECK)) { 3318 if (desc->rsp.add_info_1 != 0) 3319 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 3320 } 3321 3322 op->ldpc_dec.iter_count = (uint8_t) rsp.iter_cnt; 3323 3324 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 3325 vrb_check_ir(q->d); 3326 3327 desc->rsp.val = ACC_DMA_DESC_TYPE; 3328 desc->rsp.add_info_0 = 0; 3329 desc->rsp.add_info_1 = 0; 3330 3331 *ref_op = op; 3332 3333 /* One CB (op) was successfully dequeued. */ 3334 return 1; 3335 } 3336 3337 /* Dequeue one decode operations from device in TB mode for 4G or 5G. */ 3338 static inline int 3339 vrb_dequeue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op **ref_op, 3340 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3341 { 3342 union acc_dma_desc *desc, *last_desc, atom_desc; 3343 union acc_dma_rsp_desc rsp; 3344 struct rte_bbdev_dec_op *op; 3345 uint8_t cbs_in_tb = 1, cb_idx = 0; 3346 uint32_t tb_crc_check = 0; 3347 3348 desc = acc_desc_tail(q, dequeued_cbs); 3349 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3350 3351 /* Check fdone bit. */ 3352 if (!(atom_desc.rsp.val & ACC_FDONE)) 3353 return -1; 3354 3355 /* Dequeue. */ 3356 op = desc->req.op_addr; 3357 3358 /* Get number of CBs in dequeued TB. */ 3359 cbs_in_tb = desc->req.cbs_in_tb; 3360 /* Get last CB. */ 3361 last_desc = acc_desc_tail(q, dequeued_cbs + cbs_in_tb - 1); 3362 /* Check if last CB in TB is ready to dequeue (and thus the whole TB) - checking sdone bit. 3363 * If not return. 3364 */ 3365 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc, __ATOMIC_RELAXED); 3366 if (!(atom_desc.rsp.val & ACC_SDONE)) 3367 return -1; 3368 3369 /* Clearing status, it will be set based on response. */ 3370 op->status = 0; 3371 3372 /* Read remaining CBs if exists. */ 3373 while (cb_idx < cbs_in_tb) { 3374 desc = acc_desc_tail(q, dequeued_cbs); 3375 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3376 rsp.val = atom_desc.rsp.val; 3377 rte_bbdev_log_debug("Resp. desc %p: %x %x %x", desc, 3378 rsp.val, desc->rsp.add_info_0, 3379 desc->rsp.add_info_1); 3380 3381 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, false, false); 3382 3383 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) 3384 tb_crc_check ^= desc->rsp.add_info_1; 3385 3386 /* CRC invalid if error exists. */ 3387 if (!op->status) 3388 op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR; 3389 if (q->op_type == RTE_BBDEV_OP_LDPC_DEC) 3390 op->ldpc_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt, 3391 op->ldpc_dec.iter_count); 3392 else 3393 op->turbo_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt, 3394 op->turbo_dec.iter_count); 3395 3396 desc->rsp.val = ACC_DMA_DESC_TYPE; 3397 desc->rsp.add_info_0 = 0; 3398 desc->rsp.add_info_1 = 0; 3399 dequeued_cbs++; 3400 cb_idx++; 3401 } 3402 3403 if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK)) { 3404 rte_bbdev_log_debug("TB-CRC Check %x\n", tb_crc_check); 3405 if (tb_crc_check > 0) 3406 op->status |= 1 << RTE_BBDEV_CRC_ERROR; 3407 } 3408 3409 *ref_op = op; 3410 3411 return cb_idx; 3412 } 3413 3414 /* Dequeue encode operations from device. */ 3415 static uint16_t 3416 vrb_dequeue_enc(struct rte_bbdev_queue_data *q_data, 3417 struct rte_bbdev_enc_op **ops, uint16_t num) 3418 { 3419 struct acc_queue *q = q_data->queue_private; 3420 uint32_t avail = acc_ring_avail_deq(q); 3421 uint32_t aq_dequeued = 0; 3422 uint16_t i, dequeued_ops = 0, dequeued_descs = 0; 3423 int ret, cbm; 3424 struct rte_bbdev_enc_op *op; 3425 if (avail == 0) 3426 return 0; 3427 op = acc_op_tail(q, 0); 3428 cbm = op->turbo_enc.code_block_mode; 3429 3430 for (i = 0; i < avail; i++) { 3431 if (cbm == RTE_BBDEV_TRANSPORT_BLOCK) 3432 ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops], 3433 &dequeued_ops, &aq_dequeued, 3434 &dequeued_descs, num); 3435 else 3436 ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops], 3437 &dequeued_ops, &aq_dequeued, 3438 &dequeued_descs, num); 3439 if (ret < 0) 3440 break; 3441 } 3442 3443 q->aq_dequeued += aq_dequeued; 3444 q->sw_ring_tail += dequeued_descs; 3445 3446 /* Update enqueue stats. */ 3447 q_data->queue_stats.dequeued_count += dequeued_ops; 3448 3449 return dequeued_ops; 3450 } 3451 3452 /* Dequeue LDPC encode operations from device. */ 3453 static uint16_t 3454 vrb_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data, 3455 struct rte_bbdev_enc_op **ops, uint16_t num) 3456 { 3457 struct acc_queue *q = q_data->queue_private; 3458 uint32_t avail = acc_ring_avail_deq(q); 3459 uint32_t aq_dequeued = 0; 3460 uint16_t i, dequeued_ops = 0, dequeued_descs = 0; 3461 int ret, cbm; 3462 struct rte_bbdev_enc_op *op; 3463 if (avail == 0) 3464 return 0; 3465 op = acc_op_tail(q, 0); 3466 cbm = op->ldpc_enc.code_block_mode; 3467 3468 for (i = 0; i < avail; i++) { 3469 if (cbm == RTE_BBDEV_TRANSPORT_BLOCK) 3470 if (q->d->device_variant == VRB1_VARIANT) 3471 ret = vrb_dequeue_enc_one_op_tb(q, &ops[dequeued_ops], 3472 &dequeued_ops, &aq_dequeued, 3473 &dequeued_descs, num); 3474 else 3475 ret = vrb2_dequeue_ldpc_enc_one_op_tb(q, &ops[dequeued_ops], 3476 &dequeued_ops, &aq_dequeued, 3477 &dequeued_descs); 3478 else 3479 ret = vrb_dequeue_enc_one_op_cb(q, &ops[dequeued_ops], 3480 &dequeued_ops, &aq_dequeued, 3481 &dequeued_descs, num); 3482 if (ret < 0) 3483 break; 3484 } 3485 3486 q->aq_dequeued += aq_dequeued; 3487 q->sw_ring_tail += dequeued_descs; 3488 3489 /* Update enqueue stats. */ 3490 q_data->queue_stats.dequeued_count += dequeued_ops; 3491 3492 return dequeued_ops; 3493 } 3494 3495 /* Dequeue decode operations from device. */ 3496 static uint16_t 3497 vrb_dequeue_dec(struct rte_bbdev_queue_data *q_data, 3498 struct rte_bbdev_dec_op **ops, uint16_t num) 3499 { 3500 struct acc_queue *q = q_data->queue_private; 3501 uint16_t dequeue_num; 3502 uint32_t avail = acc_ring_avail_deq(q); 3503 uint32_t aq_dequeued = 0; 3504 uint16_t i; 3505 uint16_t dequeued_cbs = 0; 3506 struct rte_bbdev_dec_op *op; 3507 int ret; 3508 3509 dequeue_num = (avail < num) ? avail : num; 3510 3511 for (i = 0; i < dequeue_num; ++i) { 3512 op = acc_op_tail(q, dequeued_cbs); 3513 if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3514 ret = vrb_dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs, 3515 &aq_dequeued); 3516 else 3517 ret = vrb_dequeue_dec_one_op_cb(q_data, q, &ops[i], 3518 dequeued_cbs, &aq_dequeued); 3519 3520 if (ret <= 0) 3521 break; 3522 dequeued_cbs += ret; 3523 } 3524 3525 q->aq_dequeued += aq_dequeued; 3526 q->sw_ring_tail += dequeued_cbs; 3527 3528 /* Update enqueue stats */ 3529 q_data->queue_stats.dequeued_count += i; 3530 3531 return i; 3532 } 3533 3534 /* Dequeue decode operations from device. */ 3535 static uint16_t 3536 vrb_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, 3537 struct rte_bbdev_dec_op **ops, uint16_t num) 3538 { 3539 struct acc_queue *q = q_data->queue_private; 3540 uint16_t dequeue_num; 3541 uint32_t avail = acc_ring_avail_deq(q); 3542 uint32_t aq_dequeued = 0; 3543 uint16_t i; 3544 uint16_t dequeued_cbs = 0; 3545 struct rte_bbdev_dec_op *op; 3546 int ret; 3547 3548 dequeue_num = RTE_MIN(avail, num); 3549 3550 for (i = 0; i < dequeue_num; ++i) { 3551 op = acc_op_tail(q, dequeued_cbs); 3552 if (op->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) 3553 ret = vrb_dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs, 3554 &aq_dequeued); 3555 else 3556 ret = vrb_dequeue_ldpc_dec_one_op_cb( 3557 q_data, q, &ops[i], dequeued_cbs, 3558 &aq_dequeued); 3559 3560 if (ret <= 0) 3561 break; 3562 dequeued_cbs += ret; 3563 } 3564 3565 q->aq_dequeued += aq_dequeued; 3566 q->sw_ring_tail += dequeued_cbs; 3567 3568 /* Update enqueue stats. */ 3569 q_data->queue_stats.dequeued_count += i; 3570 3571 return i; 3572 } 3573 3574 /* Fill in a frame control word for FFT processing. */ 3575 static inline void 3576 vrb1_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft *fcw) 3577 { 3578 fcw->in_frame_size = op->fft.input_sequence_size; 3579 fcw->leading_pad_size = op->fft.input_leading_padding; 3580 fcw->out_frame_size = op->fft.output_sequence_size; 3581 fcw->leading_depad_size = op->fft.output_leading_depadding; 3582 fcw->cs_window_sel = op->fft.window_index[0] + 3583 (op->fft.window_index[1] << 8) + 3584 (op->fft.window_index[2] << 16) + 3585 (op->fft.window_index[3] << 24); 3586 fcw->cs_window_sel2 = op->fft.window_index[4] + 3587 (op->fft.window_index[5] << 8); 3588 fcw->cs_enable_bmap = op->fft.cs_bitmap; 3589 fcw->num_antennas = op->fft.num_antennas_log2; 3590 fcw->idft_size = op->fft.idft_log2; 3591 fcw->dft_size = op->fft.dft_log2; 3592 fcw->cs_offset = op->fft.cs_time_adjustment; 3593 fcw->idft_shift = op->fft.idft_shift; 3594 fcw->dft_shift = op->fft.dft_shift; 3595 fcw->cs_multiplier = op->fft.ncs_reciprocal; 3596 if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_IDFT_BYPASS)) { 3597 if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_WINDOWING_BYPASS)) 3598 fcw->bypass = 2; 3599 else 3600 fcw->bypass = 1; 3601 } else if (check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DFT_BYPASS)) 3602 fcw->bypass = 3; 3603 else 3604 fcw->bypass = 0; 3605 } 3606 3607 /* Fill in a frame control word for FFT processing. */ 3608 static inline void 3609 vrb2_fcw_fft_fill(struct rte_bbdev_fft_op *op, struct acc_fcw_fft_3 *fcw) 3610 { 3611 fcw->in_frame_size = op->fft.input_sequence_size; 3612 fcw->leading_pad_size = op->fft.input_leading_padding; 3613 fcw->out_frame_size = op->fft.output_sequence_size; 3614 fcw->leading_depad_size = op->fft.output_leading_depadding; 3615 fcw->cs_window_sel = op->fft.window_index[0] + 3616 (op->fft.window_index[1] << 8) + 3617 (op->fft.window_index[2] << 16) + 3618 (op->fft.window_index[3] << 24); 3619 fcw->cs_window_sel2 = op->fft.window_index[4] + 3620 (op->fft.window_index[5] << 8); 3621 fcw->cs_enable_bmap = op->fft.cs_bitmap; 3622 fcw->num_antennas = op->fft.num_antennas_log2; 3623 fcw->idft_size = op->fft.idft_log2; 3624 fcw->dft_size = op->fft.dft_log2; 3625 fcw->cs_offset = op->fft.cs_time_adjustment; 3626 fcw->idft_shift = op->fft.idft_shift; 3627 fcw->dft_shift = op->fft.dft_shift; 3628 fcw->cs_multiplier = op->fft.ncs_reciprocal; 3629 fcw->power_shift = op->fft.power_shift; 3630 fcw->exp_adj = op->fft.fp16_exp_adjust; 3631 fcw->fp16_in = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_INPUT); 3632 fcw->fp16_out = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_FP16_OUTPUT); 3633 fcw->power_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); 3634 if (check_bit(op->fft.op_flags, 3635 RTE_BBDEV_FFT_IDFT_BYPASS)) { 3636 if (check_bit(op->fft.op_flags, 3637 RTE_BBDEV_FFT_WINDOWING_BYPASS)) 3638 fcw->bypass = 2; 3639 else 3640 fcw->bypass = 1; 3641 } else if (check_bit(op->fft.op_flags, 3642 RTE_BBDEV_FFT_DFT_BYPASS)) 3643 fcw->bypass = 3; 3644 else 3645 fcw->bypass = 0; 3646 } 3647 3648 static inline int 3649 vrb_dma_desc_fft_fill(struct rte_bbdev_fft_op *op, 3650 struct acc_dma_req_desc *desc, 3651 struct rte_mbuf *input, struct rte_mbuf *output, struct rte_mbuf *win_input, 3652 struct rte_mbuf *pwr, uint32_t *in_offset, uint32_t *out_offset, 3653 uint32_t *win_offset, uint32_t *pwr_offset, uint16_t device_variant) 3654 { 3655 bool pwr_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_POWER_MEAS); 3656 bool win_en = check_bit(op->fft.op_flags, RTE_BBDEV_FFT_DEWINDOWING); 3657 int num_cs = 0, i, bd_idx = 1; 3658 3659 if (device_variant == VRB1_VARIANT) { 3660 /* Force unsupported descriptor format out. */ 3661 pwr_en = 0; 3662 win_en = 0; 3663 } 3664 3665 /* FCW already done */ 3666 acc_header_init(desc); 3667 3668 RTE_SET_USED(win_input); 3669 RTE_SET_USED(win_offset); 3670 3671 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(input, *in_offset); 3672 desc->data_ptrs[bd_idx].blen = op->fft.input_sequence_size * ACC_IQ_SIZE; 3673 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_IN; 3674 desc->data_ptrs[bd_idx].last = 1; 3675 desc->data_ptrs[bd_idx].dma_ext = 0; 3676 bd_idx++; 3677 3678 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(output, *out_offset); 3679 desc->data_ptrs[bd_idx].blen = op->fft.output_sequence_size * ACC_IQ_SIZE; 3680 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_HARD; 3681 desc->data_ptrs[bd_idx].last = pwr_en ? 0 : 1; 3682 desc->data_ptrs[bd_idx].dma_ext = 0; 3683 desc->m2dlen = win_en ? 3 : 2; 3684 desc->d2mlen = pwr_en ? 2 : 1; 3685 desc->ib_ant_offset = op->fft.input_sequence_size; 3686 desc->num_ant = op->fft.num_antennas_log2 - 3; 3687 3688 for (i = 0; i < RTE_BBDEV_MAX_CS; i++) 3689 if (check_bit(op->fft.cs_bitmap, 1 << i)) 3690 num_cs++; 3691 desc->num_cs = num_cs; 3692 3693 if (pwr_en && pwr) { 3694 bd_idx++; 3695 desc->data_ptrs[bd_idx].address = rte_pktmbuf_iova_offset(pwr, *pwr_offset); 3696 desc->data_ptrs[bd_idx].blen = num_cs * (1 << op->fft.num_antennas_log2) * 4; 3697 desc->data_ptrs[bd_idx].blkid = ACC_DMA_BLKID_OUT_SOFT; 3698 desc->data_ptrs[bd_idx].last = 1; 3699 desc->data_ptrs[bd_idx].dma_ext = 0; 3700 } 3701 desc->ob_cyc_offset = op->fft.output_sequence_size; 3702 desc->ob_ant_offset = op->fft.output_sequence_size * num_cs; 3703 desc->op_addr = op; 3704 return 0; 3705 } 3706 3707 /** Enqueue one FFT operation for device. */ 3708 static inline int 3709 vrb_enqueue_fft_one_op(struct acc_queue *q, struct rte_bbdev_fft_op *op, 3710 uint16_t total_enqueued_cbs) 3711 { 3712 union acc_dma_desc *desc; 3713 struct rte_mbuf *input, *output, *pwr, *win; 3714 uint32_t in_offset, out_offset, pwr_offset, win_offset; 3715 struct acc_fcw_fft *fcw; 3716 3717 desc = acc_desc(q, total_enqueued_cbs); 3718 input = op->fft.base_input.data; 3719 output = op->fft.base_output.data; 3720 pwr = op->fft.power_meas_output.data; 3721 win = op->fft.dewindowing_input.data; 3722 in_offset = op->fft.base_input.offset; 3723 out_offset = op->fft.base_output.offset; 3724 pwr_offset = op->fft.power_meas_output.offset; 3725 win_offset = op->fft.dewindowing_input.offset; 3726 3727 fcw = (struct acc_fcw_fft *) (q->fcw_ring + 3728 ((q->sw_ring_head + total_enqueued_cbs) & q->sw_ring_wrap_mask) 3729 * ACC_MAX_FCW_SIZE); 3730 3731 if (q->d->device_variant == VRB1_VARIANT) 3732 vrb1_fcw_fft_fill(op, fcw); 3733 else 3734 vrb2_fcw_fft_fill(op, (struct acc_fcw_fft_3 *) fcw); 3735 vrb_dma_desc_fft_fill(op, &desc->req, input, output, win, pwr, 3736 &in_offset, &out_offset, &win_offset, &pwr_offset, q->d->device_variant); 3737 #ifdef RTE_LIBRTE_BBDEV_DEBUG 3738 rte_memdump(stderr, "FCW", fcw, 128); 3739 rte_memdump(stderr, "Req Desc.", desc, 128); 3740 #endif 3741 return 1; 3742 } 3743 3744 /* Enqueue decode operations for device. */ 3745 static uint16_t 3746 vrb_enqueue_fft(struct rte_bbdev_queue_data *q_data, 3747 struct rte_bbdev_fft_op **ops, uint16_t num) 3748 { 3749 struct acc_queue *q; 3750 int32_t aq_avail, avail; 3751 uint16_t i; 3752 int ret; 3753 3754 aq_avail = acc_aq_avail(q_data, num); 3755 if (unlikely((aq_avail <= 0) || (num == 0))) 3756 return 0; 3757 q = q_data->queue_private; 3758 avail = acc_ring_avail_enq(q); 3759 3760 for (i = 0; i < num; ++i) { 3761 /* Check if there are available space for further processing. */ 3762 if (unlikely(avail < 1)) 3763 break; 3764 avail -= 1; 3765 ret = vrb_enqueue_fft_one_op(q, ops[i], i); 3766 if (ret < 0) 3767 break; 3768 } 3769 3770 if (unlikely(i == 0)) 3771 return 0; /* Nothing to enqueue. */ 3772 3773 acc_dma_enqueue(q, i, &q_data->queue_stats); 3774 3775 /* Update stats */ 3776 q_data->queue_stats.enqueued_count += i; 3777 q_data->queue_stats.enqueue_err_count += num - i; 3778 return i; 3779 } 3780 3781 3782 /* Dequeue one FFT operations from device. */ 3783 static inline int 3784 vrb_dequeue_fft_one_op(struct rte_bbdev_queue_data *q_data, 3785 struct acc_queue *q, struct rte_bbdev_fft_op **ref_op, 3786 uint16_t dequeued_cbs, uint32_t *aq_dequeued) 3787 { 3788 union acc_dma_desc *desc, atom_desc; 3789 union acc_dma_rsp_desc rsp; 3790 struct rte_bbdev_fft_op *op; 3791 3792 desc = acc_desc_tail(q, dequeued_cbs); 3793 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 3794 3795 /* Check fdone bit */ 3796 if (!(atom_desc.rsp.val & ACC_FDONE)) 3797 return -1; 3798 3799 rsp.val = atom_desc.rsp.val; 3800 #ifdef RTE_LIBRTE_BBDEV_DEBUG 3801 rte_memdump(stderr, "Resp", &desc->rsp.val, 3802 sizeof(desc->rsp.val)); 3803 #endif 3804 /* Dequeue. */ 3805 op = desc->req.op_addr; 3806 3807 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, true); 3808 3809 if (op->status != 0) 3810 q_data->queue_stats.dequeue_err_count++; 3811 3812 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 3813 vrb_check_ir(q->d); 3814 3815 *ref_op = op; 3816 /* One CB (op) was successfully dequeued. */ 3817 return 1; 3818 } 3819 3820 3821 /* Dequeue FFT operations from device. */ 3822 static uint16_t 3823 vrb_dequeue_fft(struct rte_bbdev_queue_data *q_data, 3824 struct rte_bbdev_fft_op **ops, uint16_t num) 3825 { 3826 struct acc_queue *q = q_data->queue_private; 3827 uint16_t dequeue_num, i, dequeued_cbs = 0; 3828 uint32_t avail = acc_ring_avail_deq(q); 3829 uint32_t aq_dequeued = 0; 3830 int ret; 3831 3832 dequeue_num = RTE_MIN(avail, num); 3833 3834 for (i = 0; i < dequeue_num; ++i) { 3835 ret = vrb_dequeue_fft_one_op(q_data, q, &ops[i], dequeued_cbs, &aq_dequeued); 3836 if (ret <= 0) 3837 break; 3838 dequeued_cbs += ret; 3839 } 3840 3841 q->aq_dequeued += aq_dequeued; 3842 q->sw_ring_tail += dequeued_cbs; 3843 /* Update enqueue stats. */ 3844 q_data->queue_stats.dequeued_count += i; 3845 return i; 3846 } 3847 3848 /* Fill in a frame control word for MLD-TS processing. */ 3849 static inline void 3850 vrb2_fcw_mldts_fill(struct rte_bbdev_mldts_op *op, struct acc_fcw_mldts *fcw) 3851 { 3852 fcw->nrb = op->mldts.num_rbs; 3853 fcw->NLayers = op->mldts.num_layers - 1; 3854 fcw->Qmod0 = (op->mldts.q_m[0] >> 1) - 1; 3855 fcw->Qmod1 = (op->mldts.q_m[1] >> 1) - 1; 3856 fcw->Qmod2 = (op->mldts.q_m[2] >> 1) - 1; 3857 fcw->Qmod3 = (op->mldts.q_m[3] >> 1) - 1; 3858 /* Mark some layers as disabled */ 3859 if (op->mldts.num_layers == 2) { 3860 fcw->Qmod2 = 3; 3861 fcw->Qmod3 = 3; 3862 } 3863 if (op->mldts.num_layers == 3) 3864 fcw->Qmod3 = 3; 3865 fcw->Rrep = op->mldts.r_rep; 3866 fcw->Crep = op->mldts.c_rep; 3867 } 3868 3869 /* Fill in descriptor for one MLD-TS processing operation. */ 3870 static inline int 3871 vrb2_dma_desc_mldts_fill(struct rte_bbdev_mldts_op *op, 3872 struct acc_dma_req_desc *desc, 3873 struct rte_mbuf *input_q, struct rte_mbuf *input_r, 3874 struct rte_mbuf *output, 3875 uint32_t *in_offset, uint32_t *out_offset) 3876 { 3877 uint16_t qsize_per_re[VRB2_MLD_LAY_SIZE] = {8, 12, 16}; /* Layer 2 to 4. */ 3878 uint16_t rsize_per_re[VRB2_MLD_LAY_SIZE] = {14, 26, 42}; 3879 uint16_t sc_factor_per_rrep[VRB2_MLD_RREP_SIZE] = {12, 6, 4, 3, 0, 2}; 3880 uint16_t i, outsize_per_re = 0; 3881 uint32_t sc_num, r_num, q_size, r_size, out_size; 3882 3883 /* Prevent out of range access. */ 3884 if (op->mldts.r_rep > 5) 3885 op->mldts.r_rep = 5; 3886 if (op->mldts.num_layers < 2) 3887 op->mldts.num_layers = 2; 3888 if (op->mldts.num_layers > 4) 3889 op->mldts.num_layers = 4; 3890 for (i = 0; i < op->mldts.num_layers; i++) 3891 outsize_per_re += op->mldts.q_m[i]; 3892 sc_num = op->mldts.num_rbs * RTE_BBDEV_SCPERRB * (op->mldts.c_rep + 1); 3893 r_num = op->mldts.num_rbs * sc_factor_per_rrep[op->mldts.r_rep]; 3894 q_size = qsize_per_re[op->mldts.num_layers - 2] * sc_num; 3895 r_size = rsize_per_re[op->mldts.num_layers - 2] * r_num; 3896 out_size = sc_num * outsize_per_re; 3897 3898 /* FCW already done. */ 3899 acc_header_init(desc); 3900 desc->data_ptrs[1].address = rte_pktmbuf_iova_offset(input_q, *in_offset); 3901 desc->data_ptrs[1].blen = q_size; 3902 desc->data_ptrs[1].blkid = ACC_DMA_BLKID_IN; 3903 desc->data_ptrs[1].last = 0; 3904 desc->data_ptrs[1].dma_ext = 0; 3905 desc->data_ptrs[2].address = rte_pktmbuf_iova_offset(input_r, *in_offset); 3906 desc->data_ptrs[2].blen = r_size; 3907 desc->data_ptrs[2].blkid = ACC_DMA_BLKID_IN_MLD_R; 3908 desc->data_ptrs[2].last = 1; 3909 desc->data_ptrs[2].dma_ext = 0; 3910 desc->data_ptrs[3].address = rte_pktmbuf_iova_offset(output, *out_offset); 3911 desc->data_ptrs[3].blen = out_size; 3912 desc->data_ptrs[3].blkid = ACC_DMA_BLKID_OUT_HARD; 3913 desc->data_ptrs[3].last = 1; 3914 desc->data_ptrs[3].dma_ext = 0; 3915 desc->m2dlen = 3; 3916 desc->d2mlen = 1; 3917 desc->op_addr = op; 3918 desc->cbs_in_tb = 1; 3919 3920 return 0; 3921 } 3922 3923 /* Check whether the MLD operation can be processed as a single operation. */ 3924 static inline bool 3925 vrb2_check_mld_r_constraint(struct rte_bbdev_mldts_op *op) { 3926 uint8_t layer_idx, rrep_idx; 3927 uint16_t max_rb[VRB2_MLD_LAY_SIZE][VRB2_MLD_RREP_SIZE] = { 3928 {188, 275, 275, 275, 0, 275}, 3929 {101, 202, 275, 275, 0, 275}, 3930 {62, 124, 186, 248, 0, 275} }; 3931 3932 if (op->mldts.c_rep == 0) 3933 return true; 3934 3935 layer_idx = RTE_MIN(op->mldts.num_layers - VRB2_MLD_MIN_LAYER, 3936 VRB2_MLD_MAX_LAYER - VRB2_MLD_MIN_LAYER); 3937 rrep_idx = RTE_MIN(op->mldts.r_rep, VRB2_MLD_MAX_RREP); 3938 rte_bbdev_log_debug("RB %d index %d %d max %d\n", op->mldts.num_rbs, layer_idx, rrep_idx, 3939 max_rb[layer_idx][rrep_idx]); 3940 3941 return (op->mldts.num_rbs <= max_rb[layer_idx][rrep_idx]); 3942 } 3943 3944 /** Enqueue MLDTS operation split across symbols. */ 3945 static inline int 3946 enqueue_mldts_split_op(struct acc_queue *q, struct rte_bbdev_mldts_op *op, 3947 uint16_t total_enqueued_descs) 3948 { 3949 uint16_t qsize_per_re[VRB2_MLD_LAY_SIZE] = {8, 12, 16}; /* Layer 2 to 4. */ 3950 uint16_t rsize_per_re[VRB2_MLD_LAY_SIZE] = {14, 26, 42}; 3951 uint16_t sc_factor_per_rrep[VRB2_MLD_RREP_SIZE] = {12, 6, 4, 3, 0, 2}; 3952 uint32_t i, outsize_per_re = 0, sc_num, r_num, q_size, r_size, out_size, num_syms; 3953 union acc_dma_desc *desc, *first_desc; 3954 uint16_t desc_idx, symb; 3955 struct rte_mbuf *input_q, *input_r, *output; 3956 uint32_t in_offset, out_offset; 3957 struct acc_fcw_mldts *fcw; 3958 3959 desc_idx = acc_desc_idx(q, total_enqueued_descs); 3960 first_desc = q->ring_addr + desc_idx; 3961 input_q = op->mldts.qhy_input.data; 3962 input_r = op->mldts.r_input.data; 3963 output = op->mldts.output.data; 3964 in_offset = op->mldts.qhy_input.offset; 3965 out_offset = op->mldts.output.offset; 3966 num_syms = op->mldts.c_rep + 1; 3967 fcw = &first_desc->req.fcw_mldts; 3968 vrb2_fcw_mldts_fill(op, fcw); 3969 fcw->Crep = 0; /* C rep forced to zero. */ 3970 3971 /* Prevent out of range access. */ 3972 if (op->mldts.r_rep > 5) 3973 op->mldts.r_rep = 5; 3974 if (op->mldts.num_layers < 2) 3975 op->mldts.num_layers = 2; 3976 if (op->mldts.num_layers > 4) 3977 op->mldts.num_layers = 4; 3978 3979 for (i = 0; i < op->mldts.num_layers; i++) 3980 outsize_per_re += op->mldts.q_m[i]; 3981 sc_num = op->mldts.num_rbs * RTE_BBDEV_SCPERRB; /* C rep forced to zero. */ 3982 r_num = op->mldts.num_rbs * sc_factor_per_rrep[op->mldts.r_rep]; 3983 q_size = qsize_per_re[op->mldts.num_layers - 2] * sc_num; 3984 r_size = rsize_per_re[op->mldts.num_layers - 2] * r_num; 3985 out_size = sc_num * outsize_per_re; 3986 3987 for (symb = 0; symb < num_syms; symb++) { 3988 desc_idx = ((q->sw_ring_head + total_enqueued_descs + symb) & q->sw_ring_wrap_mask); 3989 desc = q->ring_addr + desc_idx; 3990 acc_header_init(&desc->req); 3991 if (symb == 0) 3992 desc->req.cbs_in_tb = num_syms; 3993 else 3994 rte_memcpy(&desc->req.fcw_mldts, fcw, ACC_FCW_MLDTS_BLEN); 3995 desc->req.data_ptrs[1].address = rte_pktmbuf_iova_offset(input_q, in_offset); 3996 desc->req.data_ptrs[1].blen = q_size; 3997 in_offset += q_size; 3998 desc->req.data_ptrs[1].blkid = ACC_DMA_BLKID_IN; 3999 desc->req.data_ptrs[1].last = 0; 4000 desc->req.data_ptrs[1].dma_ext = 0; 4001 desc->req.data_ptrs[2].address = rte_pktmbuf_iova_offset(input_r, 0); 4002 desc->req.data_ptrs[2].blen = r_size; 4003 desc->req.data_ptrs[2].blkid = ACC_DMA_BLKID_IN_MLD_R; 4004 desc->req.data_ptrs[2].last = 1; 4005 desc->req.data_ptrs[2].dma_ext = 0; 4006 desc->req.data_ptrs[3].address = rte_pktmbuf_iova_offset(output, out_offset); 4007 desc->req.data_ptrs[3].blen = out_size; 4008 out_offset += out_size; 4009 desc->req.data_ptrs[3].blkid = ACC_DMA_BLKID_OUT_HARD; 4010 desc->req.data_ptrs[3].last = 1; 4011 desc->req.data_ptrs[3].dma_ext = 0; 4012 desc->req.m2dlen = VRB2_MLD_M2DLEN; 4013 desc->req.d2mlen = 1; 4014 desc->req.op_addr = op; 4015 4016 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4017 rte_memdump(stderr, "FCW", &desc->req.fcw_mldts, sizeof(desc->req.fcw_mldts)); 4018 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 4019 #endif 4020 } 4021 desc->req.sdone_enable = 0; 4022 4023 return num_syms; 4024 } 4025 4026 /** Enqueue one MLDTS operation. */ 4027 static inline int 4028 enqueue_mldts_one_op(struct acc_queue *q, struct rte_bbdev_mldts_op *op, 4029 uint16_t total_enqueued_descs) 4030 { 4031 union acc_dma_desc *desc; 4032 struct rte_mbuf *input_q, *input_r, *output; 4033 uint32_t in_offset, out_offset; 4034 struct acc_fcw_mldts *fcw; 4035 4036 desc = acc_desc(q, total_enqueued_descs); 4037 input_q = op->mldts.qhy_input.data; 4038 input_r = op->mldts.r_input.data; 4039 output = op->mldts.output.data; 4040 in_offset = op->mldts.qhy_input.offset; 4041 out_offset = op->mldts.output.offset; 4042 fcw = &desc->req.fcw_mldts; 4043 vrb2_fcw_mldts_fill(op, fcw); 4044 vrb2_dma_desc_mldts_fill(op, &desc->req, input_q, input_r, output, 4045 &in_offset, &out_offset); 4046 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4047 rte_memdump(stderr, "FCW", &desc->req.fcw_mldts, sizeof(desc->req.fcw_mldts)); 4048 rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); 4049 #endif 4050 return 1; 4051 } 4052 4053 /* Enqueue MLDTS operations. */ 4054 static uint16_t 4055 vrb2_enqueue_mldts(struct rte_bbdev_queue_data *q_data, 4056 struct rte_bbdev_mldts_op **ops, uint16_t num) 4057 { 4058 int32_t aq_avail, avail; 4059 struct acc_queue *q = q_data->queue_private; 4060 uint16_t i, enqueued_descs = 0, descs_in_op; 4061 int ret; 4062 bool as_one_op; 4063 4064 aq_avail = acc_aq_avail(q_data, num); 4065 if (unlikely((aq_avail <= 0) || (num == 0))) 4066 return 0; 4067 avail = acc_ring_avail_enq(q); 4068 4069 for (i = 0; i < num; ++i) { 4070 as_one_op = vrb2_check_mld_r_constraint(ops[i]); 4071 descs_in_op = as_one_op ? 1 : ops[i]->mldts.c_rep + 1; 4072 4073 /* Check if there are available space for further processing. */ 4074 if (unlikely(avail < descs_in_op)) { 4075 acc_enqueue_ring_full(q_data); 4076 break; 4077 } 4078 avail -= descs_in_op; 4079 4080 if (as_one_op) 4081 ret = enqueue_mldts_one_op(q, ops[i], enqueued_descs); 4082 else 4083 ret = enqueue_mldts_split_op(q, ops[i], enqueued_descs); 4084 4085 if (ret < 0) { 4086 acc_enqueue_invalid(q_data); 4087 break; 4088 } 4089 4090 enqueued_descs += ret; 4091 } 4092 4093 if (unlikely(i == 0)) 4094 return 0; /* Nothing to enqueue. */ 4095 4096 acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats); 4097 4098 /* Update stats. */ 4099 q_data->queue_stats.enqueued_count += i; 4100 q_data->queue_stats.enqueue_err_count += num - i; 4101 return i; 4102 } 4103 4104 /* 4105 * Dequeue one MLDTS operation. 4106 * This may have been split over multiple descriptors. 4107 */ 4108 static inline int 4109 dequeue_mldts_one_op(struct rte_bbdev_queue_data *q_data, 4110 struct acc_queue *q, struct rte_bbdev_mldts_op **ref_op, 4111 uint16_t dequeued_ops, uint32_t *aq_dequeued) 4112 { 4113 union acc_dma_desc *desc, atom_desc, *last_desc; 4114 union acc_dma_rsp_desc rsp; 4115 struct rte_bbdev_mldts_op *op; 4116 uint8_t descs_in_op, i; 4117 4118 desc = acc_desc_tail(q, dequeued_ops); 4119 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 4120 4121 /* Check fdone bit. */ 4122 if (!(atom_desc.rsp.val & ACC_FDONE)) 4123 return -1; 4124 4125 descs_in_op = desc->req.cbs_in_tb; 4126 if (descs_in_op > 1) { 4127 /* Get last CB. */ 4128 last_desc = acc_desc_tail(q, dequeued_ops + descs_in_op - 1); 4129 /* Check if last op is ready to dequeue by checking fdone bit. If not exit. */ 4130 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc, __ATOMIC_RELAXED); 4131 if (!(atom_desc.rsp.val & ACC_FDONE)) 4132 return -1; 4133 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4134 rte_memdump(stderr, "Last Resp", &last_desc->rsp.val, sizeof(desc->rsp.val)); 4135 #endif 4136 /* Check each operation iteratively using fdone. */ 4137 for (i = 1; i < descs_in_op - 1; i++) { 4138 last_desc = q->ring_addr + ((q->sw_ring_tail + dequeued_ops + i) 4139 & q->sw_ring_wrap_mask); 4140 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc, 4141 __ATOMIC_RELAXED); 4142 if (!(atom_desc.rsp.val & ACC_FDONE)) 4143 return -1; 4144 } 4145 } 4146 #ifdef RTE_LIBRTE_BBDEV_DEBUG 4147 rte_memdump(stderr, "Resp", &desc->rsp.val, sizeof(desc->rsp.val)); 4148 #endif 4149 /* Dequeue. */ 4150 op = desc->req.op_addr; 4151 4152 /* Clearing status, it will be set based on response. */ 4153 op->status = 0; 4154 4155 for (i = 0; i < descs_in_op; i++) { 4156 desc = q->ring_addr + ((q->sw_ring_tail + dequeued_ops + i) & q->sw_ring_wrap_mask); 4157 atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc, __ATOMIC_RELAXED); 4158 rsp.val = atom_desc.rsp.val; 4159 4160 vrb_update_dequeued_operation(desc, rsp, &op->status, aq_dequeued, true, false); 4161 } 4162 4163 if (op->status != 0) 4164 q_data->queue_stats.dequeue_err_count++; 4165 if (op->status & (1 << RTE_BBDEV_DRV_ERROR)) 4166 vrb_check_ir(q->d); 4167 4168 *ref_op = op; 4169 4170 return descs_in_op; 4171 } 4172 4173 /* Dequeue MLDTS operations from VRB2 device. */ 4174 static uint16_t 4175 vrb2_dequeue_mldts(struct rte_bbdev_queue_data *q_data, 4176 struct rte_bbdev_mldts_op **ops, uint16_t num) 4177 { 4178 struct acc_queue *q = q_data->queue_private; 4179 uint16_t dequeue_num, i, dequeued_cbs = 0; 4180 uint32_t avail = acc_ring_avail_deq(q); 4181 uint32_t aq_dequeued = 0; 4182 int ret; 4183 4184 dequeue_num = RTE_MIN(avail, num); 4185 4186 for (i = 0; i < dequeue_num; ++i) { 4187 ret = dequeue_mldts_one_op(q_data, q, &ops[i], dequeued_cbs, &aq_dequeued); 4188 if (ret <= 0) 4189 break; 4190 dequeued_cbs += ret; 4191 } 4192 4193 q->aq_dequeued += aq_dequeued; 4194 q->sw_ring_tail += dequeued_cbs; 4195 /* Update enqueue stats. */ 4196 q_data->queue_stats.dequeued_count += i; 4197 return i; 4198 } 4199 4200 /* Initialization Function */ 4201 static void 4202 vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv) 4203 { 4204 struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); 4205 struct acc_device *d = dev->data->dev_private; 4206 4207 dev->dev_ops = &vrb_bbdev_ops; 4208 dev->enqueue_enc_ops = vrb_enqueue_enc; 4209 dev->enqueue_dec_ops = vrb_enqueue_dec; 4210 dev->dequeue_enc_ops = vrb_dequeue_enc; 4211 dev->dequeue_dec_ops = vrb_dequeue_dec; 4212 dev->enqueue_ldpc_enc_ops = vrb_enqueue_ldpc_enc; 4213 dev->enqueue_ldpc_dec_ops = vrb_enqueue_ldpc_dec; 4214 dev->dequeue_ldpc_enc_ops = vrb_dequeue_ldpc_enc; 4215 dev->dequeue_ldpc_dec_ops = vrb_dequeue_ldpc_dec; 4216 dev->enqueue_fft_ops = vrb_enqueue_fft; 4217 dev->dequeue_fft_ops = vrb_dequeue_fft; 4218 dev->enqueue_mldts_ops = vrb2_enqueue_mldts; 4219 dev->dequeue_mldts_ops = vrb2_dequeue_mldts; 4220 4221 d->pf_device = !strcmp(drv->driver.name, RTE_STR(VRB_PF_DRIVER_NAME)); 4222 d->mmio_base = pci_dev->mem_resource[0].addr; 4223 4224 /* Device variant specific handling. */ 4225 if ((pci_dev->id.device_id == RTE_VRB1_PF_DEVICE_ID) || 4226 (pci_dev->id.device_id == RTE_VRB1_VF_DEVICE_ID)) { 4227 d->device_variant = VRB1_VARIANT; 4228 d->queue_offset = vrb1_queue_offset; 4229 d->num_qgroups = VRB1_NUM_QGRPS; 4230 d->num_aqs = VRB1_NUM_AQS; 4231 if (d->pf_device) 4232 d->reg_addr = &vrb1_pf_reg_addr; 4233 else 4234 d->reg_addr = &vrb1_vf_reg_addr; 4235 } else { 4236 d->device_variant = VRB2_VARIANT; 4237 d->queue_offset = vrb2_queue_offset; 4238 d->num_qgroups = VRB2_NUM_QGRPS; 4239 d->num_aqs = VRB2_NUM_AQS; 4240 if (d->pf_device) 4241 d->reg_addr = &vrb2_pf_reg_addr; 4242 else 4243 d->reg_addr = &vrb2_vf_reg_addr; 4244 } 4245 4246 rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"", 4247 drv->driver.name, dev->data->name, 4248 (void *)pci_dev->mem_resource[0].addr, 4249 pci_dev->mem_resource[0].phys_addr); 4250 } 4251 4252 static int vrb_pci_probe(struct rte_pci_driver *pci_drv, 4253 struct rte_pci_device *pci_dev) 4254 { 4255 struct rte_bbdev *bbdev = NULL; 4256 char dev_name[RTE_BBDEV_NAME_MAX_LEN]; 4257 4258 if (pci_dev == NULL) { 4259 rte_bbdev_log(ERR, "NULL PCI device"); 4260 return -EINVAL; 4261 } 4262 4263 rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); 4264 4265 /* Allocate memory to be used privately by drivers. */ 4266 bbdev = rte_bbdev_allocate(pci_dev->device.name); 4267 if (bbdev == NULL) 4268 return -ENODEV; 4269 4270 /* allocate device private memory. */ 4271 bbdev->data->dev_private = rte_zmalloc_socket(dev_name, 4272 sizeof(struct acc_device), RTE_CACHE_LINE_SIZE, 4273 pci_dev->device.numa_node); 4274 4275 if (bbdev->data->dev_private == NULL) { 4276 rte_bbdev_log(CRIT, 4277 "Allocate of %zu bytes for device \"%s\" failed", 4278 sizeof(struct acc_device), dev_name); 4279 rte_bbdev_release(bbdev); 4280 return -ENOMEM; 4281 } 4282 4283 /* Fill HW specific part of device structure. */ 4284 bbdev->device = &pci_dev->device; 4285 bbdev->intr_handle = pci_dev->intr_handle; 4286 bbdev->data->socket_id = pci_dev->device.numa_node; 4287 4288 /* Invoke device initialization function. */ 4289 vrb_bbdev_init(bbdev, pci_drv); 4290 4291 rte_bbdev_log_debug("Initialised bbdev %s (id = %u)", 4292 dev_name, bbdev->data->dev_id); 4293 return 0; 4294 } 4295 4296 static struct rte_pci_driver vrb_pci_pf_driver = { 4297 .probe = vrb_pci_probe, 4298 .remove = acc_pci_remove, 4299 .id_table = pci_id_vrb_pf_map, 4300 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 4301 }; 4302 4303 static struct rte_pci_driver vrb_pci_vf_driver = { 4304 .probe = vrb_pci_probe, 4305 .remove = acc_pci_remove, 4306 .id_table = pci_id_vrb_vf_map, 4307 .drv_flags = RTE_PCI_DRV_NEED_MAPPING 4308 }; 4309 4310 RTE_PMD_REGISTER_PCI(VRB_PF_DRIVER_NAME, vrb_pci_pf_driver); 4311 RTE_PMD_REGISTER_PCI_TABLE(VRB_PF_DRIVER_NAME, pci_id_vrb_pf_map); 4312 RTE_PMD_REGISTER_PCI(VRB_VF_DRIVER_NAME, vrb_pci_vf_driver); 4313 RTE_PMD_REGISTER_PCI_TABLE(VRB_VF_DRIVER_NAME, pci_id_vrb_vf_map); 4314 4315 /* Initial configuration of a VRB1 device prior to running configure(). */ 4316 int 4317 vrb1_configure(const char *dev_name, struct rte_acc_conf *conf) 4318 { 4319 rte_bbdev_log(INFO, "vrb1_configure"); 4320 uint32_t value, address, status; 4321 int qg_idx, template_idx, vf_idx, acc, i, rlim, alen, timestamp, totalQgs, numEngines; 4322 int numQgs, numQqsAcc; 4323 struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); 4324 4325 /* Compile time checks. */ 4326 RTE_BUILD_BUG_ON(sizeof(struct acc_dma_req_desc) != 256); 4327 RTE_BUILD_BUG_ON(sizeof(union acc_dma_desc) != 256); 4328 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_td) != 24); 4329 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_te) != 32); 4330 4331 if (bbdev == NULL) { 4332 rte_bbdev_log(ERR, 4333 "Invalid dev_name (%s), or device is not yet initialised", 4334 dev_name); 4335 return -ENODEV; 4336 } 4337 struct acc_device *d = bbdev->data->dev_private; 4338 4339 /* Store configuration. */ 4340 rte_memcpy(&d->acc_conf, conf, sizeof(d->acc_conf)); 4341 4342 /* Check we are already out of PG. */ 4343 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4344 if (status > 0) { 4345 if (status != VRB1_PG_MASK_0) { 4346 rte_bbdev_log(ERR, "Unexpected status %x %x", 4347 status, VRB1_PG_MASK_0); 4348 return -ENODEV; 4349 } 4350 /* Clock gate sections that will be un-PG. */ 4351 acc_reg_write(d, VRB1_PfHiClkGateHystReg, VRB1_CLK_DIS); 4352 /* Un-PG required sections. */ 4353 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4354 VRB1_PG_MASK_1); 4355 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4356 if (status != VRB1_PG_MASK_1) { 4357 rte_bbdev_log(ERR, "Unexpected status %x %x", 4358 status, VRB1_PG_MASK_1); 4359 return -ENODEV; 4360 } 4361 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4362 VRB1_PG_MASK_2); 4363 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4364 if (status != VRB1_PG_MASK_2) { 4365 rte_bbdev_log(ERR, "Unexpected status %x %x", 4366 status, VRB1_PG_MASK_2); 4367 return -ENODEV; 4368 } 4369 acc_reg_write(d, VRB1_PfHiSectionPowerGatingReq, 4370 VRB1_PG_MASK_3); 4371 status = acc_reg_read(d, VRB1_PfHiSectionPowerGatingAck); 4372 if (status != VRB1_PG_MASK_3) { 4373 rte_bbdev_log(ERR, "Unexpected status %x %x", 4374 status, VRB1_PG_MASK_3); 4375 return -ENODEV; 4376 } 4377 /* Enable clocks for all sections. */ 4378 acc_reg_write(d, VRB1_PfHiClkGateHystReg, VRB1_CLK_EN); 4379 } 4380 4381 /* Explicitly releasing AXI as this may be stopped after PF FLR/BME. */ 4382 address = VRB1_PfDmaAxiControl; 4383 value = 1; 4384 acc_reg_write(d, address, value); 4385 4386 /* Set the fabric mode. */ 4387 address = VRB1_PfFabricM2iBufferReg; 4388 value = VRB1_FABRIC_MODE; 4389 acc_reg_write(d, address, value); 4390 4391 /* Set default descriptor signature. */ 4392 address = VRB1_PfDmaDescriptorSignatuture; 4393 value = 0; 4394 acc_reg_write(d, address, value); 4395 4396 /* Enable the Error Detection in DMA. */ 4397 value = VRB1_CFG_DMA_ERROR; 4398 address = VRB1_PfDmaErrorDetectionEn; 4399 acc_reg_write(d, address, value); 4400 4401 /* AXI Cache configuration. */ 4402 value = VRB1_CFG_AXI_CACHE; 4403 address = VRB1_PfDmaAxcacheReg; 4404 acc_reg_write(d, address, value); 4405 4406 /* AXI Response configuration. */ 4407 acc_reg_write(d, VRB1_PfDmaCfgRrespBresp, 0x0); 4408 4409 /* Default DMA Configuration (Qmgr Enabled). */ 4410 address = VRB1_PfDmaConfig0Reg; 4411 value = 0; 4412 acc_reg_write(d, address, value); 4413 address = VRB1_PfDmaQmanen; 4414 value = 0; 4415 acc_reg_write(d, address, value); 4416 4417 /* Default RLIM/ALEN configuration. */ 4418 rlim = 0; 4419 alen = 1; 4420 timestamp = 0; 4421 address = VRB1_PfDmaConfig1Reg; 4422 value = (1 << 31) + (rlim << 8) + (timestamp << 6) + alen; 4423 acc_reg_write(d, address, value); 4424 4425 /* Default FFT configuration. */ 4426 address = VRB1_PfFftConfig0; 4427 value = VRB1_FFT_CFG_0; 4428 acc_reg_write(d, address, value); 4429 4430 /* Configure DMA Qmanager addresses. */ 4431 address = VRB1_PfDmaQmgrAddrReg; 4432 value = VRB1_PfQmgrEgressQueuesTemplate; 4433 acc_reg_write(d, address, value); 4434 4435 /* ===== Qmgr Configuration ===== */ 4436 /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL. */ 4437 totalQgs = conf->q_ul_4g.num_qgroups + 4438 conf->q_ul_5g.num_qgroups + 4439 conf->q_dl_4g.num_qgroups + 4440 conf->q_dl_5g.num_qgroups + 4441 conf->q_fft.num_qgroups; 4442 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4443 address = VRB1_PfQmgrDepthLog2Grp + ACC_BYTES_IN_WORD * qg_idx; 4444 value = aqDepth(qg_idx, conf); 4445 acc_reg_write(d, address, value); 4446 address = VRB1_PfQmgrTholdGrp + ACC_BYTES_IN_WORD * qg_idx; 4447 value = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1)); 4448 acc_reg_write(d, address, value); 4449 } 4450 4451 /* Template Priority in incremental order. */ 4452 for (template_idx = 0; template_idx < ACC_NUM_TMPL; 4453 template_idx++) { 4454 address = VRB1_PfQmgrGrpTmplateReg0Indx + ACC_BYTES_IN_WORD * template_idx; 4455 value = ACC_TMPL_PRI_0; 4456 acc_reg_write(d, address, value); 4457 address = VRB1_PfQmgrGrpTmplateReg1Indx + ACC_BYTES_IN_WORD * template_idx; 4458 value = ACC_TMPL_PRI_1; 4459 acc_reg_write(d, address, value); 4460 address = VRB1_PfQmgrGrpTmplateReg2indx + ACC_BYTES_IN_WORD * template_idx; 4461 value = ACC_TMPL_PRI_2; 4462 acc_reg_write(d, address, value); 4463 address = VRB1_PfQmgrGrpTmplateReg3Indx + ACC_BYTES_IN_WORD * template_idx; 4464 value = ACC_TMPL_PRI_3; 4465 acc_reg_write(d, address, value); 4466 } 4467 4468 address = VRB1_PfQmgrGrpPriority; 4469 value = VRB1_CFG_QMGR_HI_P; 4470 acc_reg_write(d, address, value); 4471 4472 /* Template Configuration. */ 4473 for (template_idx = 0; template_idx < ACC_NUM_TMPL; 4474 template_idx++) { 4475 value = 0; 4476 address = VRB1_PfQmgrGrpTmplateReg4Indx 4477 + ACC_BYTES_IN_WORD * template_idx; 4478 acc_reg_write(d, address, value); 4479 } 4480 /* 4GUL */ 4481 numQgs = conf->q_ul_4g.num_qgroups; 4482 numQqsAcc = 0; 4483 value = 0; 4484 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4485 value |= (1 << qg_idx); 4486 for (template_idx = VRB1_SIG_UL_4G; 4487 template_idx <= VRB1_SIG_UL_4G_LAST; 4488 template_idx++) { 4489 address = VRB1_PfQmgrGrpTmplateReg4Indx 4490 + ACC_BYTES_IN_WORD * template_idx; 4491 acc_reg_write(d, address, value); 4492 } 4493 /* 5GUL */ 4494 numQqsAcc += numQgs; 4495 numQgs = conf->q_ul_5g.num_qgroups; 4496 value = 0; 4497 numEngines = 0; 4498 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4499 value |= (1 << qg_idx); 4500 for (template_idx = VRB1_SIG_UL_5G; 4501 template_idx <= VRB1_SIG_UL_5G_LAST; 4502 template_idx++) { 4503 /* Check engine power-on status */ 4504 address = VRB1_PfFecUl5gIbDebugReg + ACC_ENGINE_OFFSET * template_idx; 4505 status = (acc_reg_read(d, address) >> 4) & 0x7; 4506 address = VRB1_PfQmgrGrpTmplateReg4Indx 4507 + ACC_BYTES_IN_WORD * template_idx; 4508 if (status == 1) { 4509 acc_reg_write(d, address, value); 4510 numEngines++; 4511 } else 4512 acc_reg_write(d, address, 0); 4513 } 4514 rte_bbdev_log(INFO, "Number of 5GUL engines %d", numEngines); 4515 /* 4GDL */ 4516 numQqsAcc += numQgs; 4517 numQgs = conf->q_dl_4g.num_qgroups; 4518 value = 0; 4519 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4520 value |= (1 << qg_idx); 4521 for (template_idx = VRB1_SIG_DL_4G; 4522 template_idx <= VRB1_SIG_DL_4G_LAST; 4523 template_idx++) { 4524 address = VRB1_PfQmgrGrpTmplateReg4Indx 4525 + ACC_BYTES_IN_WORD * template_idx; 4526 acc_reg_write(d, address, value); 4527 } 4528 /* 5GDL */ 4529 numQqsAcc += numQgs; 4530 numQgs = conf->q_dl_5g.num_qgroups; 4531 value = 0; 4532 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4533 value |= (1 << qg_idx); 4534 for (template_idx = VRB1_SIG_DL_5G; 4535 template_idx <= VRB1_SIG_DL_5G_LAST; 4536 template_idx++) { 4537 address = VRB1_PfQmgrGrpTmplateReg4Indx 4538 + ACC_BYTES_IN_WORD * template_idx; 4539 acc_reg_write(d, address, value); 4540 } 4541 /* FFT */ 4542 numQqsAcc += numQgs; 4543 numQgs = conf->q_fft.num_qgroups; 4544 value = 0; 4545 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4546 value |= (1 << qg_idx); 4547 for (template_idx = VRB1_SIG_FFT; 4548 template_idx <= VRB1_SIG_FFT_LAST; 4549 template_idx++) { 4550 address = VRB1_PfQmgrGrpTmplateReg4Indx 4551 + ACC_BYTES_IN_WORD * template_idx; 4552 acc_reg_write(d, address, value); 4553 } 4554 4555 /* Queue Group Function mapping. */ 4556 int qman_func_id[8] = {0, 2, 1, 3, 4, 0, 0, 0}; 4557 value = 0; 4558 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 4559 acc = accFromQgid(qg_idx, conf); 4560 value |= qman_func_id[acc] << (qg_idx * 4); 4561 } 4562 acc_reg_write(d, VRB1_PfQmgrGrpFunction0, value); 4563 value = 0; 4564 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 4565 acc = accFromQgid(qg_idx + ACC_NUM_QGRPS_PER_WORD, conf); 4566 value |= qman_func_id[acc] << (qg_idx * 4); 4567 } 4568 acc_reg_write(d, VRB1_PfQmgrGrpFunction1, value); 4569 4570 /* Configuration of the Arbitration QGroup depth to 1. */ 4571 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4572 address = VRB1_PfQmgrArbQDepthGrp + 4573 ACC_BYTES_IN_WORD * qg_idx; 4574 value = 0; 4575 acc_reg_write(d, address, value); 4576 } 4577 4578 /* This pointer to ARAM (256kB) is shifted by 2 (4B per register). */ 4579 uint32_t aram_address = 0; 4580 for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { 4581 for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { 4582 address = VRB1_PfQmgrVfBaseAddr + vf_idx 4583 * ACC_BYTES_IN_WORD + qg_idx 4584 * ACC_BYTES_IN_WORD * 64; 4585 value = aram_address; 4586 acc_reg_write(d, address, value); 4587 /* Offset ARAM Address for next memory bank - increment of 4B. */ 4588 aram_address += aqNum(qg_idx, conf) * 4589 (1 << aqDepth(qg_idx, conf)); 4590 } 4591 } 4592 4593 if (aram_address > VRB1_WORDS_IN_ARAM_SIZE) { 4594 rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n", 4595 aram_address, VRB1_WORDS_IN_ARAM_SIZE); 4596 return -EINVAL; 4597 } 4598 4599 /* Performance tuning. */ 4600 acc_reg_write(d, VRB1_PfFabricI2Mdma_weight, 0x0FFF); 4601 acc_reg_write(d, VRB1_PfDma4gdlIbThld, 0x1f10); 4602 4603 /* ==== HI Configuration ==== */ 4604 4605 /* No Info Ring/MSI by default. */ 4606 address = VRB1_PfHiInfoRingIntWrEnRegPf; 4607 value = 0; 4608 acc_reg_write(d, address, value); 4609 address = VRB1_PfHiCfgMsiIntWrEnRegPf; 4610 value = 0xFFFFFFFF; 4611 acc_reg_write(d, address, value); 4612 /* Prevent Block on Transmit Error. */ 4613 address = VRB1_PfHiBlockTransmitOnErrorEn; 4614 value = 0; 4615 acc_reg_write(d, address, value); 4616 /* Prevents to drop MSI. */ 4617 address = VRB1_PfHiMsiDropEnableReg; 4618 value = 0; 4619 acc_reg_write(d, address, value); 4620 /* Set the PF Mode register. */ 4621 address = VRB1_PfHiPfMode; 4622 value = (conf->pf_mode_en) ? ACC_PF_VAL : 0; 4623 acc_reg_write(d, address, value); 4624 4625 /* QoS overflow init. */ 4626 value = 1; 4627 address = VRB1_PfQosmonAEvalOverflow0; 4628 acc_reg_write(d, address, value); 4629 address = VRB1_PfQosmonBEvalOverflow0; 4630 acc_reg_write(d, address, value); 4631 4632 /* Configure the FFT RAM LUT. */ 4633 uint32_t fft_lut[VRB1_FFT_RAM_SIZE] = { 4634 0x1FFFF, 0x1FFFF, 0x1FFFE, 0x1FFFA, 0x1FFF6, 0x1FFF1, 0x1FFEA, 0x1FFE2, 4635 0x1FFD9, 0x1FFCE, 0x1FFC2, 0x1FFB5, 0x1FFA7, 0x1FF98, 0x1FF87, 0x1FF75, 4636 0x1FF62, 0x1FF4E, 0x1FF38, 0x1FF21, 0x1FF09, 0x1FEF0, 0x1FED6, 0x1FEBA, 4637 0x1FE9D, 0x1FE7F, 0x1FE5F, 0x1FE3F, 0x1FE1D, 0x1FDFA, 0x1FDD5, 0x1FDB0, 4638 0x1FD89, 0x1FD61, 0x1FD38, 0x1FD0D, 0x1FCE1, 0x1FCB4, 0x1FC86, 0x1FC57, 4639 0x1FC26, 0x1FBF4, 0x1FBC1, 0x1FB8D, 0x1FB58, 0x1FB21, 0x1FAE9, 0x1FAB0, 4640 0x1FA75, 0x1FA3A, 0x1F9FD, 0x1F9BF, 0x1F980, 0x1F93F, 0x1F8FD, 0x1F8BA, 4641 0x1F876, 0x1F831, 0x1F7EA, 0x1F7A3, 0x1F75A, 0x1F70F, 0x1F6C4, 0x1F677, 4642 0x1F629, 0x1F5DA, 0x1F58A, 0x1F539, 0x1F4E6, 0x1F492, 0x1F43D, 0x1F3E7, 4643 0x1F38F, 0x1F337, 0x1F2DD, 0x1F281, 0x1F225, 0x1F1C8, 0x1F169, 0x1F109, 4644 0x1F0A8, 0x1F046, 0x1EFE2, 0x1EF7D, 0x1EF18, 0x1EEB0, 0x1EE48, 0x1EDDF, 4645 0x1ED74, 0x1ED08, 0x1EC9B, 0x1EC2D, 0x1EBBE, 0x1EB4D, 0x1EADB, 0x1EA68, 4646 0x1E9F4, 0x1E97F, 0x1E908, 0x1E891, 0x1E818, 0x1E79E, 0x1E722, 0x1E6A6, 4647 0x1E629, 0x1E5AA, 0x1E52A, 0x1E4A9, 0x1E427, 0x1E3A3, 0x1E31F, 0x1E299, 4648 0x1E212, 0x1E18A, 0x1E101, 0x1E076, 0x1DFEB, 0x1DF5E, 0x1DED0, 0x1DE41, 4649 0x1DDB1, 0x1DD20, 0x1DC8D, 0x1DBFA, 0x1DB65, 0x1DACF, 0x1DA38, 0x1D9A0, 4650 0x1D907, 0x1D86C, 0x1D7D1, 0x1D734, 0x1D696, 0x1D5F7, 0x1D557, 0x1D4B6, 4651 0x1D413, 0x1D370, 0x1D2CB, 0x1D225, 0x1D17E, 0x1D0D6, 0x1D02D, 0x1CF83, 4652 0x1CED8, 0x1CE2B, 0x1CD7E, 0x1CCCF, 0x1CC1F, 0x1CB6E, 0x1CABC, 0x1CA09, 4653 0x1C955, 0x1C89F, 0x1C7E9, 0x1C731, 0x1C679, 0x1C5BF, 0x1C504, 0x1C448, 4654 0x1C38B, 0x1C2CD, 0x1C20E, 0x1C14E, 0x1C08C, 0x1BFCA, 0x1BF06, 0x1BE42, 4655 0x1BD7C, 0x1BCB5, 0x1BBED, 0x1BB25, 0x1BA5B, 0x1B990, 0x1B8C4, 0x1B7F6, 4656 0x1B728, 0x1B659, 0x1B589, 0x1B4B7, 0x1B3E5, 0x1B311, 0x1B23D, 0x1B167, 4657 0x1B091, 0x1AFB9, 0x1AEE0, 0x1AE07, 0x1AD2C, 0x1AC50, 0x1AB73, 0x1AA95, 4658 0x1A9B6, 0x1A8D6, 0x1A7F6, 0x1A714, 0x1A631, 0x1A54D, 0x1A468, 0x1A382, 4659 0x1A29A, 0x1A1B2, 0x1A0C9, 0x19FDF, 0x19EF4, 0x19E08, 0x19D1B, 0x19C2D, 4660 0x19B3E, 0x19A4E, 0x1995D, 0x1986B, 0x19778, 0x19684, 0x1958F, 0x19499, 4661 0x193A2, 0x192AA, 0x191B1, 0x190B8, 0x18FBD, 0x18EC1, 0x18DC4, 0x18CC7, 4662 0x18BC8, 0x18AC8, 0x189C8, 0x188C6, 0x187C4, 0x186C1, 0x185BC, 0x184B7, 4663 0x183B1, 0x182AA, 0x181A2, 0x18099, 0x17F8F, 0x17E84, 0x17D78, 0x17C6C, 4664 0x17B5E, 0x17A4F, 0x17940, 0x17830, 0x1771E, 0x1760C, 0x174F9, 0x173E5, 4665 0x172D1, 0x171BB, 0x170A4, 0x16F8D, 0x16E74, 0x16D5B, 0x16C41, 0x16B26, 4666 0x16A0A, 0x168ED, 0x167CF, 0x166B1, 0x16592, 0x16471, 0x16350, 0x1622E, 4667 0x1610B, 0x15FE8, 0x15EC3, 0x15D9E, 0x15C78, 0x15B51, 0x15A29, 0x15900, 4668 0x157D7, 0x156AC, 0x15581, 0x15455, 0x15328, 0x151FB, 0x150CC, 0x14F9D, 4669 0x14E6D, 0x14D3C, 0x14C0A, 0x14AD8, 0x149A4, 0x14870, 0x1473B, 0x14606, 4670 0x144CF, 0x14398, 0x14260, 0x14127, 0x13FEE, 0x13EB3, 0x13D78, 0x13C3C, 4671 0x13B00, 0x139C2, 0x13884, 0x13745, 0x13606, 0x134C5, 0x13384, 0x13242, 4672 0x130FF, 0x12FBC, 0x12E78, 0x12D33, 0x12BEE, 0x12AA7, 0x12960, 0x12819, 4673 0x126D0, 0x12587, 0x1243D, 0x122F3, 0x121A8, 0x1205C, 0x11F0F, 0x11DC2, 4674 0x11C74, 0x11B25, 0x119D6, 0x11886, 0x11735, 0x115E3, 0x11491, 0x1133F, 4675 0x111EB, 0x11097, 0x10F42, 0x10DED, 0x10C97, 0x10B40, 0x109E9, 0x10891, 4676 0x10738, 0x105DF, 0x10485, 0x1032B, 0x101D0, 0x10074, 0x0FF18, 0x0FDBB, 4677 0x0FC5D, 0x0FAFF, 0x0F9A0, 0x0F841, 0x0F6E1, 0x0F580, 0x0F41F, 0x0F2BD, 4678 0x0F15B, 0x0EFF8, 0x0EE94, 0x0ED30, 0x0EBCC, 0x0EA67, 0x0E901, 0x0E79A, 4679 0x0E633, 0x0E4CC, 0x0E364, 0x0E1FB, 0x0E092, 0x0DF29, 0x0DDBE, 0x0DC54, 4680 0x0DAE9, 0x0D97D, 0x0D810, 0x0D6A4, 0x0D536, 0x0D3C8, 0x0D25A, 0x0D0EB, 4681 0x0CF7C, 0x0CE0C, 0x0CC9C, 0x0CB2B, 0x0C9B9, 0x0C847, 0x0C6D5, 0x0C562, 4682 0x0C3EF, 0x0C27B, 0x0C107, 0x0BF92, 0x0BE1D, 0x0BCA8, 0x0BB32, 0x0B9BB, 4683 0x0B844, 0x0B6CD, 0x0B555, 0x0B3DD, 0x0B264, 0x0B0EB, 0x0AF71, 0x0ADF7, 4684 0x0AC7D, 0x0AB02, 0x0A987, 0x0A80B, 0x0A68F, 0x0A513, 0x0A396, 0x0A219, 4685 0x0A09B, 0x09F1D, 0x09D9E, 0x09C20, 0x09AA1, 0x09921, 0x097A1, 0x09621, 4686 0x094A0, 0x0931F, 0x0919E, 0x0901C, 0x08E9A, 0x08D18, 0x08B95, 0x08A12, 4687 0x0888F, 0x0870B, 0x08587, 0x08402, 0x0827E, 0x080F9, 0x07F73, 0x07DEE, 4688 0x07C68, 0x07AE2, 0x0795B, 0x077D4, 0x0764D, 0x074C6, 0x0733E, 0x071B6, 4689 0x0702E, 0x06EA6, 0x06D1D, 0x06B94, 0x06A0B, 0x06881, 0x066F7, 0x0656D, 4690 0x063E3, 0x06258, 0x060CE, 0x05F43, 0x05DB7, 0x05C2C, 0x05AA0, 0x05914, 4691 0x05788, 0x055FC, 0x0546F, 0x052E3, 0x05156, 0x04FC9, 0x04E3B, 0x04CAE, 4692 0x04B20, 0x04992, 0x04804, 0x04676, 0x044E8, 0x04359, 0x041CB, 0x0403C, 4693 0x03EAD, 0x03D1D, 0x03B8E, 0x039FF, 0x0386F, 0x036DF, 0x0354F, 0x033BF, 4694 0x0322F, 0x0309F, 0x02F0F, 0x02D7E, 0x02BEE, 0x02A5D, 0x028CC, 0x0273B, 4695 0x025AA, 0x02419, 0x02288, 0x020F7, 0x01F65, 0x01DD4, 0x01C43, 0x01AB1, 4696 0x0191F, 0x0178E, 0x015FC, 0x0146A, 0x012D8, 0x01147, 0x00FB5, 0x00E23, 4697 0x00C91, 0x00AFF, 0x0096D, 0x007DB, 0x00648, 0x004B6, 0x00324, 0x00192}; 4698 4699 acc_reg_write(d, VRB1_PfFftRamPageAccess, VRB1_FFT_RAM_EN + 64); 4700 for (i = 0; i < VRB1_FFT_RAM_SIZE; i++) 4701 acc_reg_write(d, VRB1_PfFftRamOff + i * 4, fft_lut[i]); 4702 acc_reg_write(d, VRB1_PfFftRamPageAccess, VRB1_FFT_RAM_DIS); 4703 4704 /* Enabling AQueues through the Queue hierarchy. */ 4705 for (vf_idx = 0; vf_idx < VRB1_NUM_VFS; vf_idx++) { 4706 for (qg_idx = 0; qg_idx < VRB1_NUM_QGRPS; qg_idx++) { 4707 value = 0; 4708 if (vf_idx < conf->num_vf_bundles && qg_idx < totalQgs) 4709 value = (1 << aqNum(qg_idx, conf)) - 1; 4710 address = VRB1_PfQmgrAqEnableVf + vf_idx * ACC_BYTES_IN_WORD; 4711 value += (qg_idx << 16); 4712 acc_reg_write(d, address, value); 4713 } 4714 } 4715 4716 rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name); 4717 return 0; 4718 } 4719 4720 /* Initial configuration of a VRB2 device prior to running configure(). */ 4721 int 4722 vrb2_configure(const char *dev_name, struct rte_acc_conf *conf) 4723 { 4724 rte_bbdev_log(INFO, "vrb2_configure"); 4725 uint32_t value, address, status; 4726 int qg_idx, template_idx, vf_idx, acc, i, aq_reg, static_allocation, numEngines; 4727 int numQgs, numQqsAcc, totalQgs; 4728 int qman_func_id[8] = {0, 2, 1, 3, 4, 5, 0, 0}; 4729 struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); 4730 int rlim, alen, timestamp; 4731 4732 /* Compile time checks. */ 4733 RTE_BUILD_BUG_ON(sizeof(struct acc_dma_req_desc) != 256); 4734 RTE_BUILD_BUG_ON(sizeof(union acc_dma_desc) != 256); 4735 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_td) != 24); 4736 RTE_BUILD_BUG_ON(sizeof(struct acc_fcw_te) != 32); 4737 4738 if (bbdev == NULL) { 4739 rte_bbdev_log(ERR, 4740 "Invalid dev_name (%s), or device is not yet initialised", 4741 dev_name); 4742 return -ENODEV; 4743 } 4744 struct acc_device *d = bbdev->data->dev_private; 4745 4746 /* Store configuration. */ 4747 rte_memcpy(&d->acc_conf, conf, sizeof(d->acc_conf)); 4748 4749 /* Explicitly releasing AXI as this may be stopped after PF FLR/BME. */ 4750 address = VRB2_PfDmaAxiControl; 4751 value = 1; 4752 acc_reg_write(d, address, value); 4753 4754 /* Set the fabric mode. */ 4755 address = VRB2_PfFabricM2iBufferReg; 4756 value = VRB2_FABRIC_MODE; 4757 acc_reg_write(d, address, value); 4758 4759 /* Set default descriptor signature. */ 4760 address = VRB2_PfDmaDescriptorSignature; 4761 value = 0; 4762 acc_reg_write(d, address, value); 4763 4764 /* Enable the Error Detection in DMA. */ 4765 value = VRB2_CFG_DMA_ERROR; 4766 address = VRB2_PfDmaErrorDetectionEn; 4767 acc_reg_write(d, address, value); 4768 4769 /* AXI Cache configuration. */ 4770 value = VRB2_CFG_AXI_CACHE; 4771 address = VRB2_PfDmaAxcacheReg; 4772 acc_reg_write(d, address, value); 4773 4774 /* AXI Response configuration. */ 4775 acc_reg_write(d, VRB2_PfDmaCfgRrespBresp, 0x0); 4776 4777 /* Default DMA Configuration (Qmgr Enabled) */ 4778 acc_reg_write(d, VRB2_PfDmaConfig0Reg, 0); 4779 acc_reg_write(d, VRB2_PfDmaQmanenSelect, 0xFFFFFFFF); 4780 acc_reg_write(d, VRB2_PfDmaQmanen, 0); 4781 4782 /* Default RLIM/ALEN configuration. */ 4783 rlim = 0; 4784 alen = 3; 4785 timestamp = 0; 4786 address = VRB2_PfDmaConfig1Reg; 4787 value = (1 << 31) + (rlim << 8) + (timestamp << 6) + alen; 4788 acc_reg_write(d, address, value); 4789 4790 /* Default FFT configuration. */ 4791 for (template_idx = 0; template_idx < VRB2_FFT_NUM; template_idx++) { 4792 acc_reg_write(d, VRB2_PfFftConfig0 + template_idx * 0x1000, VRB2_FFT_CFG_0); 4793 acc_reg_write(d, VRB2_PfFftParityMask8 + template_idx * 0x1000, VRB2_FFT_ECC); 4794 } 4795 4796 /* Configure DMA Qmanager addresses. */ 4797 address = VRB2_PfDmaQmgrAddrReg; 4798 value = VRB2_PfQmgrEgressQueuesTemplate; 4799 acc_reg_write(d, address, value); 4800 4801 /* ===== Qmgr Configuration ===== */ 4802 /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL. */ 4803 totalQgs = conf->q_ul_4g.num_qgroups + conf->q_ul_5g.num_qgroups + 4804 conf->q_dl_4g.num_qgroups + conf->q_dl_5g.num_qgroups + 4805 conf->q_fft.num_qgroups + conf->q_mld.num_qgroups; 4806 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 4807 address = VRB2_PfQmgrDepthLog2Grp + ACC_BYTES_IN_WORD * qg_idx; 4808 value = aqDepth(qg_idx, conf); 4809 acc_reg_write(d, address, value); 4810 address = VRB2_PfQmgrTholdGrp + ACC_BYTES_IN_WORD * qg_idx; 4811 value = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1)); 4812 acc_reg_write(d, address, value); 4813 } 4814 4815 /* Template Priority in incremental order. */ 4816 for (template_idx = 0; template_idx < ACC_NUM_TMPL; template_idx++) { 4817 address = VRB2_PfQmgrGrpTmplateReg0Indx + ACC_BYTES_IN_WORD * template_idx; 4818 value = ACC_TMPL_PRI_0; 4819 acc_reg_write(d, address, value); 4820 address = VRB2_PfQmgrGrpTmplateReg1Indx + ACC_BYTES_IN_WORD * template_idx; 4821 value = ACC_TMPL_PRI_1; 4822 acc_reg_write(d, address, value); 4823 address = VRB2_PfQmgrGrpTmplateReg2Indx + ACC_BYTES_IN_WORD * template_idx; 4824 value = ACC_TMPL_PRI_2; 4825 acc_reg_write(d, address, value); 4826 address = VRB2_PfQmgrGrpTmplateReg3Indx + ACC_BYTES_IN_WORD * template_idx; 4827 value = ACC_TMPL_PRI_3; 4828 acc_reg_write(d, address, value); 4829 address = VRB2_PfQmgrGrpTmplateReg4Indx + ACC_BYTES_IN_WORD * template_idx; 4830 value = ACC_TMPL_PRI_4; 4831 acc_reg_write(d, address, value); 4832 address = VRB2_PfQmgrGrpTmplateReg5Indx + ACC_BYTES_IN_WORD * template_idx; 4833 value = ACC_TMPL_PRI_5; 4834 acc_reg_write(d, address, value); 4835 address = VRB2_PfQmgrGrpTmplateReg6Indx + ACC_BYTES_IN_WORD * template_idx; 4836 value = ACC_TMPL_PRI_6; 4837 acc_reg_write(d, address, value); 4838 address = VRB2_PfQmgrGrpTmplateReg7Indx + ACC_BYTES_IN_WORD * template_idx; 4839 value = ACC_TMPL_PRI_7; 4840 acc_reg_write(d, address, value); 4841 } 4842 4843 address = VRB2_PfQmgrGrpPriority; 4844 value = VRB2_CFG_QMGR_HI_P; 4845 acc_reg_write(d, address, value); 4846 4847 /* Template Configuration. */ 4848 for (template_idx = 0; template_idx < ACC_NUM_TMPL; template_idx++) { 4849 value = 0; 4850 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4851 acc_reg_write(d, address, value); 4852 } 4853 /* 4GUL */ 4854 numQgs = conf->q_ul_4g.num_qgroups; 4855 numQqsAcc = 0; 4856 value = 0; 4857 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4858 value |= (1 << qg_idx); 4859 for (template_idx = VRB2_SIG_UL_4G; template_idx <= VRB2_SIG_UL_4G_LAST; 4860 template_idx++) { 4861 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4862 acc_reg_write(d, address, value); 4863 } 4864 /* 5GUL */ 4865 numQqsAcc += numQgs; 4866 numQgs = conf->q_ul_5g.num_qgroups; 4867 value = 0; 4868 numEngines = 0; 4869 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4870 value |= (1 << qg_idx); 4871 for (template_idx = VRB2_SIG_UL_5G; template_idx <= VRB2_SIG_UL_5G_LAST; 4872 template_idx++) { 4873 /* Check engine power-on status. */ 4874 address = VRB2_PfFecUl5gIbDebug0Reg + ACC_ENGINE_OFFSET * template_idx; 4875 status = (acc_reg_read(d, address) >> 4) & 0x7; 4876 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4877 if (status == 1) { 4878 acc_reg_write(d, address, value); 4879 numEngines++; 4880 } else 4881 acc_reg_write(d, address, 0); 4882 } 4883 rte_bbdev_log(INFO, "Number of 5GUL engines %d", numEngines); 4884 /* 4GDL */ 4885 numQqsAcc += numQgs; 4886 numQgs = conf->q_dl_4g.num_qgroups; 4887 value = 0; 4888 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4889 value |= (1 << qg_idx); 4890 for (template_idx = VRB2_SIG_DL_4G; template_idx <= VRB2_SIG_DL_4G_LAST; 4891 template_idx++) { 4892 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4893 acc_reg_write(d, address, value); 4894 } 4895 /* 5GDL */ 4896 numQqsAcc += numQgs; 4897 numQgs = conf->q_dl_5g.num_qgroups; 4898 value = 0; 4899 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4900 value |= (1 << qg_idx); 4901 for (template_idx = VRB2_SIG_DL_5G; template_idx <= VRB2_SIG_DL_5G_LAST; 4902 template_idx++) { 4903 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4904 acc_reg_write(d, address, value); 4905 } 4906 /* FFT */ 4907 numQqsAcc += numQgs; 4908 numQgs = conf->q_fft.num_qgroups; 4909 value = 0; 4910 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4911 value |= (1 << qg_idx); 4912 for (template_idx = VRB2_SIG_FFT; template_idx <= VRB2_SIG_FFT_LAST; 4913 template_idx++) { 4914 address = VRB2_PfQmgrGrpTmplateEnRegIndx + ACC_BYTES_IN_WORD * template_idx; 4915 acc_reg_write(d, address, value); 4916 } 4917 /* MLD */ 4918 numQqsAcc += numQgs; 4919 numQgs = conf->q_mld.num_qgroups; 4920 value = 0; 4921 for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++) 4922 value |= (1 << qg_idx); 4923 for (template_idx = VRB2_SIG_MLD; template_idx <= VRB2_SIG_MLD_LAST; 4924 template_idx++) { 4925 address = VRB2_PfQmgrGrpTmplateEnRegIndx 4926 + ACC_BYTES_IN_WORD * template_idx; 4927 acc_reg_write(d, address, value); 4928 } 4929 4930 /* Queue Group Function mapping. */ 4931 for (i = 0; i < 4; i++) { 4932 value = 0; 4933 for (qg_idx = 0; qg_idx < ACC_NUM_QGRPS_PER_WORD; qg_idx++) { 4934 acc = accFromQgid(qg_idx + i * ACC_NUM_QGRPS_PER_WORD, conf); 4935 value |= qman_func_id[acc] << (qg_idx * 4); 4936 } 4937 acc_reg_write(d, VRB2_PfQmgrGrpFunction0 + i * ACC_BYTES_IN_WORD, value); 4938 } 4939 4940 /* Configuration of the Arbitration QGroup depth to 1. */ 4941 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 4942 address = VRB2_PfQmgrArbQDepthGrp + ACC_BYTES_IN_WORD * qg_idx; 4943 value = 0; 4944 acc_reg_write(d, address, value); 4945 } 4946 4947 static_allocation = 1; 4948 if (static_allocation == 1) { 4949 /* This pointer to ARAM (512kB) is shifted by 2 (4B per register). */ 4950 uint32_t aram_address = 0; 4951 for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { 4952 for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { 4953 address = VRB2_PfQmgrVfBaseAddr + vf_idx 4954 * ACC_BYTES_IN_WORD + qg_idx 4955 * ACC_BYTES_IN_WORD * 64; 4956 value = aram_address; 4957 acc_reg_fast_write(d, address, value); 4958 /* Offset ARAM Address for next memory bank - increment of 4B. */ 4959 aram_address += aqNum(qg_idx, conf) * 4960 (1 << aqDepth(qg_idx, conf)); 4961 } 4962 } 4963 if (aram_address > VRB2_WORDS_IN_ARAM_SIZE) { 4964 rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n", 4965 aram_address, VRB2_WORDS_IN_ARAM_SIZE); 4966 return -EINVAL; 4967 } 4968 } else { 4969 /* Dynamic Qmgr allocation. */ 4970 acc_reg_write(d, VRB2_PfQmgrAramAllocEn, 1); 4971 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN0, 0x1000); 4972 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN1, 0); 4973 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN2, 0); 4974 acc_reg_write(d, VRB2_PfQmgrAramAllocSetupN3, 0); 4975 acc_reg_write(d, VRB2_PfQmgrSoftReset, 1); 4976 acc_reg_write(d, VRB2_PfQmgrSoftReset, 0); 4977 } 4978 4979 /* ==== HI Configuration ==== */ 4980 4981 /* No Info Ring/MSI by default. */ 4982 address = VRB2_PfHiInfoRingIntWrEnRegPf; 4983 value = 0; 4984 acc_reg_write(d, address, value); 4985 address = VRB2_PfHiCfgMsiIntWrEnRegPf; 4986 value = 0xFFFFFFFF; 4987 acc_reg_write(d, address, value); 4988 /* Prevent Block on Transmit Error. */ 4989 address = VRB2_PfHiBlockTransmitOnErrorEn; 4990 value = 0; 4991 acc_reg_write(d, address, value); 4992 /* Prevents to drop MSI */ 4993 address = VRB2_PfHiMsiDropEnableReg; 4994 value = 0; 4995 acc_reg_write(d, address, value); 4996 /* Set the PF Mode register */ 4997 address = VRB2_PfHiPfMode; 4998 value = ((conf->pf_mode_en) ? ACC_PF_VAL : 0) | 0x1F07F0; 4999 acc_reg_write(d, address, value); 5000 /* Explicitly releasing AXI after PF Mode. */ 5001 acc_reg_write(d, VRB2_PfDmaAxiControl, 1); 5002 5003 /* QoS overflow init. */ 5004 value = 1; 5005 address = VRB2_PfQosmonAEvalOverflow0; 5006 acc_reg_write(d, address, value); 5007 address = VRB2_PfQosmonBEvalOverflow0; 5008 acc_reg_write(d, address, value); 5009 5010 /* Enabling AQueues through the Queue hierarchy. */ 5011 unsigned int en_bitmask[VRB2_AQ_REG_NUM]; 5012 for (vf_idx = 0; vf_idx < VRB2_NUM_VFS; vf_idx++) { 5013 for (qg_idx = 0; qg_idx < VRB2_NUM_QGRPS; qg_idx++) { 5014 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) 5015 en_bitmask[aq_reg] = 0; 5016 if (vf_idx < conf->num_vf_bundles && qg_idx < totalQgs) { 5017 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) { 5018 if (aqNum(qg_idx, conf) >= 16 * (aq_reg + 1)) 5019 en_bitmask[aq_reg] = 0xFFFF; 5020 else if (aqNum(qg_idx, conf) <= 16 * aq_reg) 5021 en_bitmask[aq_reg] = 0x0; 5022 else 5023 en_bitmask[aq_reg] = (1 << (aqNum(qg_idx, 5024 conf) - aq_reg * 16)) - 1; 5025 } 5026 } 5027 for (aq_reg = 0; aq_reg < VRB2_AQ_REG_NUM; aq_reg++) { 5028 address = VRB2_PfQmgrAqEnableVf + vf_idx * 16 + aq_reg * 4; 5029 value = (qg_idx << 16) + en_bitmask[aq_reg]; 5030 acc_reg_fast_write(d, address, value); 5031 } 5032 } 5033 } 5034 5035 rte_bbdev_log(INFO, 5036 "VRB2 basic config complete for %s - pf_bb_config should ideally be used instead", 5037 dev_name); 5038 return 0; 5039 } 5040