1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2016 - 2018 Cavium Inc. 3 * All rights reserved. 4 * www.cavium.com 5 */ 6 7 #include <rte_net.h> 8 #include "qede_rxtx.h" 9 10 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq) 11 { 12 struct rte_mbuf *new_mb = NULL; 13 struct eth_rx_bd *rx_bd; 14 dma_addr_t mapping; 15 uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 16 17 new_mb = rte_mbuf_raw_alloc(rxq->mb_pool); 18 if (unlikely(!new_mb)) { 19 PMD_RX_LOG(ERR, rxq, 20 "Failed to allocate rx buffer " 21 "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", 22 idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), 23 rte_mempool_avail_count(rxq->mb_pool), 24 rte_mempool_in_use_count(rxq->mb_pool)); 25 return -ENOMEM; 26 } 27 rxq->sw_rx_ring[idx].mbuf = new_mb; 28 rxq->sw_rx_ring[idx].page_offset = 0; 29 mapping = rte_mbuf_data_iova_default(new_mb); 30 /* Advance PROD and get BD pointer */ 31 rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring); 32 rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); 33 rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); 34 rxq->sw_rx_prod++; 35 return 0; 36 } 37 38 #define QEDE_MAX_BULK_ALLOC_COUNT 512 39 40 static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count) 41 { 42 void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned; 43 struct rte_mbuf *mbuf = NULL; 44 struct eth_rx_bd *rx_bd; 45 dma_addr_t mapping; 46 int i, ret = 0; 47 uint16_t idx; 48 49 idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 50 51 if (count > QEDE_MAX_BULK_ALLOC_COUNT) 52 count = QEDE_MAX_BULK_ALLOC_COUNT; 53 54 ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count); 55 if (unlikely(ret)) { 56 PMD_RX_LOG(ERR, rxq, 57 "Failed to allocate %d rx buffers " 58 "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", 59 count, idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), 60 rte_mempool_avail_count(rxq->mb_pool), 61 rte_mempool_in_use_count(rxq->mb_pool)); 62 return -ENOMEM; 63 } 64 65 for (i = 0; i < count; i++) { 66 mbuf = obj_p[i]; 67 if (likely(i < count - 1)) 68 rte_prefetch0(obj_p[i + 1]); 69 70 idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 71 rxq->sw_rx_ring[idx].mbuf = mbuf; 72 rxq->sw_rx_ring[idx].page_offset = 0; 73 mapping = rte_mbuf_data_iova_default(mbuf); 74 rx_bd = (struct eth_rx_bd *) 75 ecore_chain_produce(&rxq->rx_bd_ring); 76 rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); 77 rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); 78 rxq->sw_rx_prod++; 79 } 80 81 return 0; 82 } 83 84 /* Criterias for calculating Rx buffer size - 85 * 1) rx_buf_size should not exceed the size of mbuf 86 * 2) In scattered_rx mode - minimum rx_buf_size should be 87 * (MTU + Maximum L2 Header Size + 2) / ETH_RX_MAX_BUFF_PER_PKT 88 * 3) In regular mode - minimum rx_buf_size should be 89 * (MTU + Maximum L2 Header Size + 2) 90 * In above cases +2 corrosponds to 2 bytes padding in front of L2 91 * header. 92 * 4) rx_buf_size should be cacheline-size aligned. So considering 93 * criteria 1, we need to adjust the size to floor instead of ceil, 94 * so that we don't exceed mbuf size while ceiling rx_buf_size. 95 */ 96 int 97 qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz, 98 uint16_t max_frame_size) 99 { 100 struct qede_dev *qdev = QEDE_INIT_QDEV(dev); 101 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 102 int rx_buf_size; 103 104 if (dev->data->scattered_rx) { 105 /* per HW limitation, only ETH_RX_MAX_BUFF_PER_PKT number of 106 * bufferes can be used for single packet. So need to make sure 107 * mbuf size is sufficient enough for this. 108 */ 109 if ((mbufsz * ETH_RX_MAX_BUFF_PER_PKT) < 110 (max_frame_size + QEDE_ETH_OVERHEAD)) { 111 DP_ERR(edev, "mbuf %d size is not enough to hold max fragments (%d) for max rx packet length (%d)\n", 112 mbufsz, ETH_RX_MAX_BUFF_PER_PKT, max_frame_size); 113 return -EINVAL; 114 } 115 116 rx_buf_size = RTE_MAX(mbufsz, 117 (max_frame_size + QEDE_ETH_OVERHEAD) / 118 ETH_RX_MAX_BUFF_PER_PKT); 119 } else { 120 rx_buf_size = max_frame_size + QEDE_ETH_OVERHEAD; 121 } 122 123 /* Align to cache-line size if needed */ 124 return QEDE_FLOOR_TO_CACHE_LINE_SIZE(rx_buf_size); 125 } 126 127 int 128 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 129 uint16_t nb_desc, unsigned int socket_id, 130 __rte_unused const struct rte_eth_rxconf *rx_conf, 131 struct rte_mempool *mp) 132 { 133 struct qede_dev *qdev = QEDE_INIT_QDEV(dev); 134 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 135 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 136 struct qede_rx_queue *rxq; 137 uint16_t max_rx_pkt_len; 138 uint16_t bufsz; 139 size_t size; 140 int rc; 141 142 PMD_INIT_FUNC_TRACE(edev); 143 144 /* Note: Ring size/align is controlled by struct rte_eth_desc_lim */ 145 if (!rte_is_power_of_2(nb_desc)) { 146 DP_ERR(edev, "Ring size %u is not power of 2\n", 147 nb_desc); 148 return -EINVAL; 149 } 150 151 /* Free memory prior to re-allocation if needed... */ 152 if (dev->data->rx_queues[queue_idx] != NULL) { 153 qede_rx_queue_release(dev->data->rx_queues[queue_idx]); 154 dev->data->rx_queues[queue_idx] = NULL; 155 } 156 157 /* First allocate the rx queue data structure */ 158 rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue), 159 RTE_CACHE_LINE_SIZE, socket_id); 160 161 if (!rxq) { 162 DP_ERR(edev, "Unable to allocate memory for rxq on socket %u", 163 socket_id); 164 return -ENOMEM; 165 } 166 167 rxq->qdev = qdev; 168 rxq->mb_pool = mp; 169 rxq->nb_rx_desc = nb_desc; 170 rxq->queue_id = queue_idx; 171 rxq->port_id = dev->data->port_id; 172 173 max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len; 174 175 /* Fix up RX buffer size */ 176 bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 177 /* cache align the mbuf size to simplfy rx_buf_size calculation */ 178 bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz); 179 if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) || 180 (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) { 181 if (!dev->data->scattered_rx) { 182 DP_INFO(edev, "Forcing scatter-gather mode\n"); 183 dev->data->scattered_rx = 1; 184 } 185 } 186 187 rc = qede_calc_rx_buf_size(dev, bufsz, max_rx_pkt_len); 188 if (rc < 0) { 189 rte_free(rxq); 190 return rc; 191 } 192 193 rxq->rx_buf_size = rc; 194 195 DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n", 196 qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx); 197 198 /* Allocate the parallel driver ring for Rx buffers */ 199 size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc; 200 rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size, 201 RTE_CACHE_LINE_SIZE, socket_id); 202 if (!rxq->sw_rx_ring) { 203 DP_ERR(edev, "Memory allocation fails for sw_rx_ring on" 204 " socket %u\n", socket_id); 205 rte_free(rxq); 206 return -ENOMEM; 207 } 208 209 /* Allocate FW Rx ring */ 210 rc = qdev->ops->common->chain_alloc(edev, 211 ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, 212 ECORE_CHAIN_MODE_NEXT_PTR, 213 ECORE_CHAIN_CNT_TYPE_U16, 214 rxq->nb_rx_desc, 215 sizeof(struct eth_rx_bd), 216 &rxq->rx_bd_ring, 217 NULL); 218 219 if (rc != ECORE_SUCCESS) { 220 DP_ERR(edev, "Memory allocation fails for RX BD ring" 221 " on socket %u\n", socket_id); 222 rte_free(rxq->sw_rx_ring); 223 rte_free(rxq); 224 return -ENOMEM; 225 } 226 227 /* Allocate FW completion ring */ 228 rc = qdev->ops->common->chain_alloc(edev, 229 ECORE_CHAIN_USE_TO_CONSUME, 230 ECORE_CHAIN_MODE_PBL, 231 ECORE_CHAIN_CNT_TYPE_U16, 232 rxq->nb_rx_desc, 233 sizeof(union eth_rx_cqe), 234 &rxq->rx_comp_ring, 235 NULL); 236 237 if (rc != ECORE_SUCCESS) { 238 DP_ERR(edev, "Memory allocation fails for RX CQE ring" 239 " on socket %u\n", socket_id); 240 qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); 241 rte_free(rxq->sw_rx_ring); 242 rte_free(rxq); 243 return -ENOMEM; 244 } 245 246 dev->data->rx_queues[queue_idx] = rxq; 247 qdev->fp_array[queue_idx].rxq = rxq; 248 249 DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n", 250 queue_idx, nb_desc, rxq->rx_buf_size, socket_id); 251 252 return 0; 253 } 254 255 static void 256 qede_rx_queue_reset(__rte_unused struct qede_dev *qdev, 257 struct qede_rx_queue *rxq) 258 { 259 DP_INFO(&qdev->edev, "Reset RX queue %u\n", rxq->queue_id); 260 ecore_chain_reset(&rxq->rx_bd_ring); 261 ecore_chain_reset(&rxq->rx_comp_ring); 262 rxq->sw_rx_prod = 0; 263 rxq->sw_rx_cons = 0; 264 *rxq->hw_cons_ptr = 0; 265 } 266 267 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq) 268 { 269 uint16_t i; 270 271 if (rxq->sw_rx_ring) { 272 for (i = 0; i < rxq->nb_rx_desc; i++) { 273 if (rxq->sw_rx_ring[i].mbuf) { 274 rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf); 275 rxq->sw_rx_ring[i].mbuf = NULL; 276 } 277 } 278 } 279 } 280 281 void qede_rx_queue_release(void *rx_queue) 282 { 283 struct qede_rx_queue *rxq = rx_queue; 284 struct qede_dev *qdev; 285 struct ecore_dev *edev; 286 287 if (rxq) { 288 qdev = rxq->qdev; 289 edev = QEDE_INIT_EDEV(qdev); 290 PMD_INIT_FUNC_TRACE(edev); 291 qede_rx_queue_release_mbufs(rxq); 292 qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); 293 qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring); 294 rte_free(rxq->sw_rx_ring); 295 rte_free(rxq); 296 } 297 } 298 299 /* Stops a given RX queue in the HW */ 300 static int qede_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 301 { 302 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 303 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 304 struct ecore_hwfn *p_hwfn; 305 struct qede_rx_queue *rxq; 306 int hwfn_index; 307 int rc; 308 309 if (rx_queue_id < eth_dev->data->nb_rx_queues) { 310 rxq = eth_dev->data->rx_queues[rx_queue_id]; 311 hwfn_index = rx_queue_id % edev->num_hwfns; 312 p_hwfn = &edev->hwfns[hwfn_index]; 313 rc = ecore_eth_rx_queue_stop(p_hwfn, rxq->handle, 314 true, false); 315 if (rc != ECORE_SUCCESS) { 316 DP_ERR(edev, "RX queue %u stop fails\n", rx_queue_id); 317 return -1; 318 } 319 qede_rx_queue_release_mbufs(rxq); 320 qede_rx_queue_reset(qdev, rxq); 321 eth_dev->data->rx_queue_state[rx_queue_id] = 322 RTE_ETH_QUEUE_STATE_STOPPED; 323 DP_INFO(edev, "RX queue %u stopped\n", rx_queue_id); 324 } else { 325 DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id); 326 rc = -EINVAL; 327 } 328 329 return rc; 330 } 331 332 int 333 qede_tx_queue_setup(struct rte_eth_dev *dev, 334 uint16_t queue_idx, 335 uint16_t nb_desc, 336 unsigned int socket_id, 337 const struct rte_eth_txconf *tx_conf) 338 { 339 struct qede_dev *qdev = dev->data->dev_private; 340 struct ecore_dev *edev = &qdev->edev; 341 struct qede_tx_queue *txq; 342 int rc; 343 344 PMD_INIT_FUNC_TRACE(edev); 345 346 if (!rte_is_power_of_2(nb_desc)) { 347 DP_ERR(edev, "Ring size %u is not power of 2\n", 348 nb_desc); 349 return -EINVAL; 350 } 351 352 /* Free memory prior to re-allocation if needed... */ 353 if (dev->data->tx_queues[queue_idx] != NULL) { 354 qede_tx_queue_release(dev->data->tx_queues[queue_idx]); 355 dev->data->tx_queues[queue_idx] = NULL; 356 } 357 358 txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue), 359 RTE_CACHE_LINE_SIZE, socket_id); 360 361 if (txq == NULL) { 362 DP_ERR(edev, 363 "Unable to allocate memory for txq on socket %u", 364 socket_id); 365 return -ENOMEM; 366 } 367 368 txq->nb_tx_desc = nb_desc; 369 txq->qdev = qdev; 370 txq->port_id = dev->data->port_id; 371 372 rc = qdev->ops->common->chain_alloc(edev, 373 ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, 374 ECORE_CHAIN_MODE_PBL, 375 ECORE_CHAIN_CNT_TYPE_U16, 376 txq->nb_tx_desc, 377 sizeof(union eth_tx_bd_types), 378 &txq->tx_pbl, 379 NULL); 380 if (rc != ECORE_SUCCESS) { 381 DP_ERR(edev, 382 "Unable to allocate memory for txbd ring on socket %u", 383 socket_id); 384 qede_tx_queue_release(txq); 385 return -ENOMEM; 386 } 387 388 /* Allocate software ring */ 389 txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring", 390 (sizeof(struct qede_tx_entry) * 391 txq->nb_tx_desc), 392 RTE_CACHE_LINE_SIZE, socket_id); 393 394 if (!txq->sw_tx_ring) { 395 DP_ERR(edev, 396 "Unable to allocate memory for txbd ring on socket %u", 397 socket_id); 398 qdev->ops->common->chain_free(edev, &txq->tx_pbl); 399 qede_tx_queue_release(txq); 400 return -ENOMEM; 401 } 402 403 txq->queue_id = queue_idx; 404 405 txq->nb_tx_avail = txq->nb_tx_desc; 406 407 txq->tx_free_thresh = 408 tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh : 409 (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH); 410 411 dev->data->tx_queues[queue_idx] = txq; 412 qdev->fp_array[queue_idx].txq = txq; 413 414 DP_INFO(edev, 415 "txq %u num_desc %u tx_free_thresh %u socket %u\n", 416 queue_idx, nb_desc, txq->tx_free_thresh, socket_id); 417 418 return 0; 419 } 420 421 static void 422 qede_tx_queue_reset(__rte_unused struct qede_dev *qdev, 423 struct qede_tx_queue *txq) 424 { 425 DP_INFO(&qdev->edev, "Reset TX queue %u\n", txq->queue_id); 426 ecore_chain_reset(&txq->tx_pbl); 427 txq->sw_tx_cons = 0; 428 txq->sw_tx_prod = 0; 429 *txq->hw_cons_ptr = 0; 430 } 431 432 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq) 433 { 434 uint16_t i; 435 436 if (txq->sw_tx_ring) { 437 for (i = 0; i < txq->nb_tx_desc; i++) { 438 if (txq->sw_tx_ring[i].mbuf) { 439 rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf); 440 txq->sw_tx_ring[i].mbuf = NULL; 441 } 442 } 443 } 444 } 445 446 void qede_tx_queue_release(void *tx_queue) 447 { 448 struct qede_tx_queue *txq = tx_queue; 449 struct qede_dev *qdev; 450 struct ecore_dev *edev; 451 452 if (txq) { 453 qdev = txq->qdev; 454 edev = QEDE_INIT_EDEV(qdev); 455 PMD_INIT_FUNC_TRACE(edev); 456 qede_tx_queue_release_mbufs(txq); 457 qdev->ops->common->chain_free(edev, &txq->tx_pbl); 458 rte_free(txq->sw_tx_ring); 459 rte_free(txq); 460 } 461 } 462 463 /* This function allocates fast-path status block memory */ 464 static int 465 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info, 466 uint16_t sb_id) 467 { 468 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 469 struct status_block_e4 *sb_virt; 470 dma_addr_t sb_phys; 471 int rc; 472 473 sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys, 474 sizeof(struct status_block_e4)); 475 if (!sb_virt) { 476 DP_ERR(edev, "Status block allocation failed\n"); 477 return -ENOMEM; 478 } 479 rc = qdev->ops->common->sb_init(edev, sb_info, sb_virt, 480 sb_phys, sb_id); 481 if (rc) { 482 DP_ERR(edev, "Status block initialization failed\n"); 483 OSAL_DMA_FREE_COHERENT(edev, sb_virt, sb_phys, 484 sizeof(struct status_block_e4)); 485 return rc; 486 } 487 488 return 0; 489 } 490 491 int qede_alloc_fp_resc(struct qede_dev *qdev) 492 { 493 struct ecore_dev *edev = &qdev->edev; 494 struct qede_fastpath *fp; 495 uint32_t num_sbs; 496 uint16_t sb_idx; 497 498 if (IS_VF(edev)) 499 ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs); 500 else 501 num_sbs = ecore_cxt_get_proto_cid_count 502 (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL); 503 504 if (num_sbs == 0) { 505 DP_ERR(edev, "No status blocks available\n"); 506 return -EINVAL; 507 } 508 509 qdev->fp_array = rte_calloc("fp", QEDE_RXTX_MAX(qdev), 510 sizeof(*qdev->fp_array), RTE_CACHE_LINE_SIZE); 511 512 if (!qdev->fp_array) { 513 DP_ERR(edev, "fp array allocation failed\n"); 514 return -ENOMEM; 515 } 516 517 memset((void *)qdev->fp_array, 0, QEDE_RXTX_MAX(qdev) * 518 sizeof(*qdev->fp_array)); 519 520 for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) { 521 fp = &qdev->fp_array[sb_idx]; 522 if (!fp) 523 continue; 524 fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info), 525 RTE_CACHE_LINE_SIZE); 526 if (!fp->sb_info) { 527 DP_ERR(edev, "FP sb_info allocation fails\n"); 528 return -1; 529 } 530 if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) { 531 DP_ERR(edev, "FP status block allocation fails\n"); 532 return -1; 533 } 534 DP_INFO(edev, "sb_info idx 0x%x initialized\n", 535 fp->sb_info->igu_sb_id); 536 } 537 538 return 0; 539 } 540 541 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev) 542 { 543 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 544 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 545 struct qede_fastpath *fp; 546 uint16_t sb_idx; 547 uint8_t i; 548 549 PMD_INIT_FUNC_TRACE(edev); 550 551 for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) { 552 fp = &qdev->fp_array[sb_idx]; 553 if (!fp) 554 continue; 555 DP_INFO(edev, "Free sb_info index 0x%x\n", 556 fp->sb_info->igu_sb_id); 557 if (fp->sb_info) { 558 OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt, 559 fp->sb_info->sb_phys, 560 sizeof(struct status_block_e4)); 561 rte_free(fp->sb_info); 562 fp->sb_info = NULL; 563 } 564 } 565 566 /* Free packet buffers and ring memories */ 567 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 568 if (eth_dev->data->rx_queues[i]) { 569 qede_rx_queue_release(eth_dev->data->rx_queues[i]); 570 eth_dev->data->rx_queues[i] = NULL; 571 } 572 } 573 574 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 575 if (eth_dev->data->tx_queues[i]) { 576 qede_tx_queue_release(eth_dev->data->tx_queues[i]); 577 eth_dev->data->tx_queues[i] = NULL; 578 } 579 } 580 581 if (qdev->fp_array) 582 rte_free(qdev->fp_array); 583 qdev->fp_array = NULL; 584 } 585 586 static inline void 587 qede_update_rx_prod(__rte_unused struct qede_dev *edev, 588 struct qede_rx_queue *rxq) 589 { 590 uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); 591 uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring); 592 struct eth_rx_prod_data rx_prods = { 0 }; 593 594 /* Update producers */ 595 rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod); 596 rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod); 597 598 /* Make sure that the BD and SGE data is updated before updating the 599 * producers since FW might read the BD/SGE right after the producer 600 * is updated. 601 */ 602 rte_wmb(); 603 604 internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods), 605 (uint32_t *)&rx_prods); 606 607 /* mmiowb is needed to synchronize doorbell writes from more than one 608 * processor. It guarantees that the write arrives to the device before 609 * the napi lock is released and another qede_poll is called (possibly 610 * on another CPU). Without this barrier, the next doorbell can bypass 611 * this doorbell. This is applicable to IA64/Altix systems. 612 */ 613 rte_wmb(); 614 615 PMD_RX_LOG(DEBUG, rxq, "bd_prod %u cqe_prod %u", bd_prod, cqe_prod); 616 } 617 618 /* Starts a given RX queue in HW */ 619 static int 620 qede_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 621 { 622 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 623 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 624 struct ecore_queue_start_common_params params; 625 struct ecore_rxq_start_ret_params ret_params; 626 struct qede_rx_queue *rxq; 627 struct qede_fastpath *fp; 628 struct ecore_hwfn *p_hwfn; 629 dma_addr_t p_phys_table; 630 uint16_t page_cnt; 631 uint16_t j; 632 int hwfn_index; 633 int rc; 634 635 if (rx_queue_id < eth_dev->data->nb_rx_queues) { 636 fp = &qdev->fp_array[rx_queue_id]; 637 rxq = eth_dev->data->rx_queues[rx_queue_id]; 638 /* Allocate buffers for the Rx ring */ 639 for (j = 0; j < rxq->nb_rx_desc; j++) { 640 rc = qede_alloc_rx_buffer(rxq); 641 if (rc) { 642 DP_ERR(edev, "RX buffer allocation failed" 643 " for rxq = %u\n", rx_queue_id); 644 return -ENOMEM; 645 } 646 } 647 /* disable interrupts */ 648 ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); 649 /* Prepare ramrod */ 650 memset(¶ms, 0, sizeof(params)); 651 params.queue_id = rx_queue_id / edev->num_hwfns; 652 params.vport_id = 0; 653 params.stats_id = params.vport_id; 654 params.p_sb = fp->sb_info; 655 DP_INFO(edev, "rxq %u igu_sb_id 0x%x\n", 656 fp->rxq->queue_id, fp->sb_info->igu_sb_id); 657 params.sb_idx = RX_PI; 658 hwfn_index = rx_queue_id % edev->num_hwfns; 659 p_hwfn = &edev->hwfns[hwfn_index]; 660 p_phys_table = ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring); 661 page_cnt = ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring); 662 memset(&ret_params, 0, sizeof(ret_params)); 663 rc = ecore_eth_rx_queue_start(p_hwfn, 664 p_hwfn->hw_info.opaque_fid, 665 ¶ms, fp->rxq->rx_buf_size, 666 fp->rxq->rx_bd_ring.p_phys_addr, 667 p_phys_table, page_cnt, 668 &ret_params); 669 if (rc) { 670 DP_ERR(edev, "RX queue %u could not be started, rc = %d\n", 671 rx_queue_id, rc); 672 return -1; 673 } 674 /* Update with the returned parameters */ 675 fp->rxq->hw_rxq_prod_addr = ret_params.p_prod; 676 fp->rxq->handle = ret_params.p_handle; 677 678 fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; 679 qede_update_rx_prod(qdev, fp->rxq); 680 eth_dev->data->rx_queue_state[rx_queue_id] = 681 RTE_ETH_QUEUE_STATE_STARTED; 682 DP_INFO(edev, "RX queue %u started\n", rx_queue_id); 683 } else { 684 DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id); 685 rc = -EINVAL; 686 } 687 688 return rc; 689 } 690 691 static int 692 qede_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) 693 { 694 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 695 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 696 struct ecore_queue_start_common_params params; 697 struct ecore_txq_start_ret_params ret_params; 698 struct ecore_hwfn *p_hwfn; 699 dma_addr_t p_phys_table; 700 struct qede_tx_queue *txq; 701 struct qede_fastpath *fp; 702 uint16_t page_cnt; 703 int hwfn_index; 704 int rc; 705 706 if (tx_queue_id < eth_dev->data->nb_tx_queues) { 707 txq = eth_dev->data->tx_queues[tx_queue_id]; 708 fp = &qdev->fp_array[tx_queue_id]; 709 memset(¶ms, 0, sizeof(params)); 710 params.queue_id = tx_queue_id / edev->num_hwfns; 711 params.vport_id = 0; 712 params.stats_id = params.vport_id; 713 params.p_sb = fp->sb_info; 714 DP_INFO(edev, "txq %u igu_sb_id 0x%x\n", 715 fp->txq->queue_id, fp->sb_info->igu_sb_id); 716 params.sb_idx = TX_PI(0); /* tc = 0 */ 717 p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl); 718 page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl); 719 hwfn_index = tx_queue_id % edev->num_hwfns; 720 p_hwfn = &edev->hwfns[hwfn_index]; 721 if (qdev->dev_info.is_legacy) 722 fp->txq->is_legacy = true; 723 rc = ecore_eth_tx_queue_start(p_hwfn, 724 p_hwfn->hw_info.opaque_fid, 725 ¶ms, 0 /* tc */, 726 p_phys_table, page_cnt, 727 &ret_params); 728 if (rc != ECORE_SUCCESS) { 729 DP_ERR(edev, "TX queue %u couldn't be started, rc=%d\n", 730 tx_queue_id, rc); 731 return -1; 732 } 733 txq->doorbell_addr = ret_params.p_doorbell; 734 txq->handle = ret_params.p_handle; 735 736 txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[TX_PI(0)]; 737 SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, 738 DB_DEST_XCM); 739 SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, 740 DB_AGG_CMD_SET); 741 SET_FIELD(txq->tx_db.data.params, 742 ETH_DB_DATA_AGG_VAL_SEL, 743 DQ_XCM_ETH_TX_BD_PROD_CMD); 744 txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD; 745 eth_dev->data->tx_queue_state[tx_queue_id] = 746 RTE_ETH_QUEUE_STATE_STARTED; 747 DP_INFO(edev, "TX queue %u started\n", tx_queue_id); 748 } else { 749 DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id); 750 rc = -EINVAL; 751 } 752 753 return rc; 754 } 755 756 static inline void 757 qede_free_tx_pkt(struct qede_tx_queue *txq) 758 { 759 struct rte_mbuf *mbuf; 760 uint16_t nb_segs; 761 uint16_t idx; 762 763 idx = TX_CONS(txq); 764 mbuf = txq->sw_tx_ring[idx].mbuf; 765 if (mbuf) { 766 nb_segs = mbuf->nb_segs; 767 PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs); 768 while (nb_segs) { 769 /* It's like consuming rxbuf in recv() */ 770 ecore_chain_consume(&txq->tx_pbl); 771 txq->nb_tx_avail++; 772 nb_segs--; 773 } 774 rte_pktmbuf_free(mbuf); 775 txq->sw_tx_ring[idx].mbuf = NULL; 776 txq->sw_tx_cons++; 777 PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n"); 778 } else { 779 ecore_chain_consume(&txq->tx_pbl); 780 txq->nb_tx_avail++; 781 } 782 } 783 784 static inline void 785 qede_process_tx_compl(__rte_unused struct ecore_dev *edev, 786 struct qede_tx_queue *txq) 787 { 788 uint16_t hw_bd_cons; 789 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 790 uint16_t sw_tx_cons; 791 #endif 792 793 rte_compiler_barrier(); 794 hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr); 795 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 796 sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl); 797 PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n", 798 abs(hw_bd_cons - sw_tx_cons)); 799 #endif 800 while (hw_bd_cons != ecore_chain_get_cons_idx(&txq->tx_pbl)) 801 qede_free_tx_pkt(txq); 802 } 803 804 static int qede_drain_txq(struct qede_dev *qdev, 805 struct qede_tx_queue *txq, bool allow_drain) 806 { 807 struct ecore_dev *edev = &qdev->edev; 808 int rc, cnt = 1000; 809 810 while (txq->sw_tx_cons != txq->sw_tx_prod) { 811 qede_process_tx_compl(edev, txq); 812 if (!cnt) { 813 if (allow_drain) { 814 DP_ERR(edev, "Tx queue[%u] is stuck," 815 "requesting MCP to drain\n", 816 txq->queue_id); 817 rc = qdev->ops->common->drain(edev); 818 if (rc) 819 return rc; 820 return qede_drain_txq(qdev, txq, false); 821 } 822 DP_ERR(edev, "Timeout waiting for tx queue[%d]:" 823 "PROD=%d, CONS=%d\n", 824 txq->queue_id, txq->sw_tx_prod, 825 txq->sw_tx_cons); 826 return -1; 827 } 828 cnt--; 829 DELAY(1000); 830 rte_compiler_barrier(); 831 } 832 833 /* FW finished processing, wait for HW to transmit all tx packets */ 834 DELAY(2000); 835 836 return 0; 837 } 838 839 /* Stops a given TX queue in the HW */ 840 static int qede_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) 841 { 842 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 843 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 844 struct ecore_hwfn *p_hwfn; 845 struct qede_tx_queue *txq; 846 int hwfn_index; 847 int rc; 848 849 if (tx_queue_id < eth_dev->data->nb_tx_queues) { 850 txq = eth_dev->data->tx_queues[tx_queue_id]; 851 /* Drain txq */ 852 if (qede_drain_txq(qdev, txq, true)) 853 return -1; /* For the lack of retcodes */ 854 /* Stop txq */ 855 hwfn_index = tx_queue_id % edev->num_hwfns; 856 p_hwfn = &edev->hwfns[hwfn_index]; 857 rc = ecore_eth_tx_queue_stop(p_hwfn, txq->handle); 858 if (rc != ECORE_SUCCESS) { 859 DP_ERR(edev, "TX queue %u stop fails\n", tx_queue_id); 860 return -1; 861 } 862 qede_tx_queue_release_mbufs(txq); 863 qede_tx_queue_reset(qdev, txq); 864 eth_dev->data->tx_queue_state[tx_queue_id] = 865 RTE_ETH_QUEUE_STATE_STOPPED; 866 DP_INFO(edev, "TX queue %u stopped\n", tx_queue_id); 867 } else { 868 DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id); 869 rc = -EINVAL; 870 } 871 872 return rc; 873 } 874 875 int qede_start_queues(struct rte_eth_dev *eth_dev) 876 { 877 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 878 uint8_t id; 879 int rc = -1; 880 881 for_each_rss(id) { 882 rc = qede_rx_queue_start(eth_dev, id); 883 if (rc != ECORE_SUCCESS) 884 return -1; 885 } 886 887 for_each_tss(id) { 888 rc = qede_tx_queue_start(eth_dev, id); 889 if (rc != ECORE_SUCCESS) 890 return -1; 891 } 892 893 return rc; 894 } 895 896 void qede_stop_queues(struct rte_eth_dev *eth_dev) 897 { 898 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 899 uint8_t id; 900 901 /* Stopping RX/TX queues */ 902 for_each_tss(id) { 903 qede_tx_queue_stop(eth_dev, id); 904 } 905 906 for_each_rss(id) { 907 qede_rx_queue_stop(eth_dev, id); 908 } 909 } 910 911 static inline bool qede_tunn_exist(uint16_t flag) 912 { 913 return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK << 914 PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag); 915 } 916 917 static inline uint8_t qede_check_tunn_csum_l3(uint16_t flag) 918 { 919 return !!((PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK << 920 PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT) & flag); 921 } 922 923 /* 924 * qede_check_tunn_csum_l4: 925 * Returns: 926 * 1 : If L4 csum is enabled AND if the validation has failed. 927 * 0 : Otherwise 928 */ 929 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag) 930 { 931 if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK << 932 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag) 933 return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK << 934 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag); 935 936 return 0; 937 } 938 939 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag) 940 { 941 if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK << 942 PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag) 943 return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK << 944 PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag); 945 946 return 0; 947 } 948 949 /* Returns outer L2, L3 and L4 packet_type for tunneled packets */ 950 static inline uint32_t qede_rx_cqe_to_pkt_type_outer(struct rte_mbuf *m) 951 { 952 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 953 struct ether_hdr *eth_hdr; 954 struct ipv4_hdr *ipv4_hdr; 955 struct ipv6_hdr *ipv6_hdr; 956 struct vlan_hdr *vlan_hdr; 957 uint16_t ethertype; 958 bool vlan_tagged = 0; 959 uint16_t len; 960 961 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 962 len = sizeof(struct ether_hdr); 963 ethertype = rte_cpu_to_be_16(eth_hdr->ether_type); 964 965 /* Note: Valid only if VLAN stripping is disabled */ 966 if (ethertype == ETHER_TYPE_VLAN) { 967 vlan_tagged = 1; 968 vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); 969 len += sizeof(struct vlan_hdr); 970 ethertype = rte_cpu_to_be_16(vlan_hdr->eth_proto); 971 } 972 973 if (ethertype == ETHER_TYPE_IPv4) { 974 packet_type |= RTE_PTYPE_L3_IPV4; 975 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, len); 976 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) 977 packet_type |= RTE_PTYPE_L4_TCP; 978 else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) 979 packet_type |= RTE_PTYPE_L4_UDP; 980 } else if (ethertype == ETHER_TYPE_IPv6) { 981 packet_type |= RTE_PTYPE_L3_IPV6; 982 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, len); 983 if (ipv6_hdr->proto == IPPROTO_TCP) 984 packet_type |= RTE_PTYPE_L4_TCP; 985 else if (ipv6_hdr->proto == IPPROTO_UDP) 986 packet_type |= RTE_PTYPE_L4_UDP; 987 } 988 989 if (vlan_tagged) 990 packet_type |= RTE_PTYPE_L2_ETHER_VLAN; 991 else 992 packet_type |= RTE_PTYPE_L2_ETHER; 993 994 return packet_type; 995 } 996 997 static inline uint32_t qede_rx_cqe_to_pkt_type_inner(uint16_t flags) 998 { 999 uint16_t val; 1000 1001 /* Lookup table */ 1002 static const uint32_t 1003 ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = { 1004 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_INNER_L3_IPV4 | 1005 RTE_PTYPE_INNER_L2_ETHER, 1006 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_INNER_L3_IPV6 | 1007 RTE_PTYPE_INNER_L2_ETHER, 1008 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_INNER_L3_IPV4 | 1009 RTE_PTYPE_INNER_L4_TCP | 1010 RTE_PTYPE_INNER_L2_ETHER, 1011 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_INNER_L3_IPV6 | 1012 RTE_PTYPE_INNER_L4_TCP | 1013 RTE_PTYPE_INNER_L2_ETHER, 1014 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_INNER_L3_IPV4 | 1015 RTE_PTYPE_INNER_L4_UDP | 1016 RTE_PTYPE_INNER_L2_ETHER, 1017 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_INNER_L3_IPV6 | 1018 RTE_PTYPE_INNER_L4_UDP | 1019 RTE_PTYPE_INNER_L2_ETHER, 1020 /* Frags with no VLAN */ 1021 [QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_INNER_L3_IPV4 | 1022 RTE_PTYPE_INNER_L4_FRAG | 1023 RTE_PTYPE_INNER_L2_ETHER, 1024 [QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_INNER_L3_IPV6 | 1025 RTE_PTYPE_INNER_L4_FRAG | 1026 RTE_PTYPE_INNER_L2_ETHER, 1027 /* VLANs */ 1028 [QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1029 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1030 [QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1031 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1032 [QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1033 RTE_PTYPE_INNER_L4_TCP | 1034 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1035 [QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1036 RTE_PTYPE_INNER_L4_TCP | 1037 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1038 [QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1039 RTE_PTYPE_INNER_L4_UDP | 1040 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1041 [QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1042 RTE_PTYPE_INNER_L4_UDP | 1043 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1044 /* Frags with VLAN */ 1045 [QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV4 | 1046 RTE_PTYPE_INNER_L4_FRAG | 1047 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1048 [QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV6 | 1049 RTE_PTYPE_INNER_L4_FRAG | 1050 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1051 }; 1052 1053 /* Bits (0..3) provides L3/L4 protocol type */ 1054 /* Bits (4,5) provides frag and VLAN info */ 1055 val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK << 1056 PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) | 1057 (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK << 1058 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) | 1059 (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK << 1060 PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) | 1061 (PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK << 1062 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags; 1063 1064 if (val < QEDE_PKT_TYPE_MAX) 1065 return ptype_lkup_tbl[val]; 1066 1067 return RTE_PTYPE_UNKNOWN; 1068 } 1069 1070 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags) 1071 { 1072 uint16_t val; 1073 1074 /* Lookup table */ 1075 static const uint32_t 1076 ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = { 1077 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER, 1078 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER, 1079 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4 | 1080 RTE_PTYPE_L4_TCP | 1081 RTE_PTYPE_L2_ETHER, 1082 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6 | 1083 RTE_PTYPE_L4_TCP | 1084 RTE_PTYPE_L2_ETHER, 1085 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4 | 1086 RTE_PTYPE_L4_UDP | 1087 RTE_PTYPE_L2_ETHER, 1088 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6 | 1089 RTE_PTYPE_L4_UDP | 1090 RTE_PTYPE_L2_ETHER, 1091 /* Frags with no VLAN */ 1092 [QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_L3_IPV4 | 1093 RTE_PTYPE_L4_FRAG | 1094 RTE_PTYPE_L2_ETHER, 1095 [QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_L3_IPV6 | 1096 RTE_PTYPE_L4_FRAG | 1097 RTE_PTYPE_L2_ETHER, 1098 /* VLANs */ 1099 [QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_L3_IPV4 | 1100 RTE_PTYPE_L2_ETHER_VLAN, 1101 [QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_L3_IPV6 | 1102 RTE_PTYPE_L2_ETHER_VLAN, 1103 [QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_L3_IPV4 | 1104 RTE_PTYPE_L4_TCP | 1105 RTE_PTYPE_L2_ETHER_VLAN, 1106 [QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_L3_IPV6 | 1107 RTE_PTYPE_L4_TCP | 1108 RTE_PTYPE_L2_ETHER_VLAN, 1109 [QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_L3_IPV4 | 1110 RTE_PTYPE_L4_UDP | 1111 RTE_PTYPE_L2_ETHER_VLAN, 1112 [QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_L3_IPV6 | 1113 RTE_PTYPE_L4_UDP | 1114 RTE_PTYPE_L2_ETHER_VLAN, 1115 /* Frags with VLAN */ 1116 [QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_L3_IPV4 | 1117 RTE_PTYPE_L4_FRAG | 1118 RTE_PTYPE_L2_ETHER_VLAN, 1119 [QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_L3_IPV6 | 1120 RTE_PTYPE_L4_FRAG | 1121 RTE_PTYPE_L2_ETHER_VLAN, 1122 }; 1123 1124 /* Bits (0..3) provides L3/L4 protocol type */ 1125 /* Bits (4,5) provides frag and VLAN info */ 1126 val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK << 1127 PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) | 1128 (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK << 1129 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) | 1130 (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK << 1131 PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) | 1132 (PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK << 1133 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags; 1134 1135 if (val < QEDE_PKT_TYPE_MAX) 1136 return ptype_lkup_tbl[val]; 1137 1138 return RTE_PTYPE_UNKNOWN; 1139 } 1140 1141 static inline uint8_t 1142 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag) 1143 { 1144 struct ipv4_hdr *ip; 1145 uint16_t pkt_csum; 1146 uint16_t calc_csum; 1147 uint16_t val; 1148 1149 val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK << 1150 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag); 1151 1152 if (unlikely(val)) { 1153 m->packet_type = qede_rx_cqe_to_pkt_type(flag); 1154 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 1155 ip = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, 1156 sizeof(struct ether_hdr)); 1157 pkt_csum = ip->hdr_checksum; 1158 ip->hdr_checksum = 0; 1159 calc_csum = rte_ipv4_cksum(ip); 1160 ip->hdr_checksum = pkt_csum; 1161 return (calc_csum != pkt_csum); 1162 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 1163 return 1; 1164 } 1165 } 1166 return 0; 1167 } 1168 1169 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq) 1170 { 1171 ecore_chain_consume(&rxq->rx_bd_ring); 1172 rxq->sw_rx_cons++; 1173 } 1174 1175 static inline void 1176 qede_reuse_page(__rte_unused struct qede_dev *qdev, 1177 struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons) 1178 { 1179 struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring); 1180 uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 1181 struct qede_rx_entry *curr_prod; 1182 dma_addr_t new_mapping; 1183 1184 curr_prod = &rxq->sw_rx_ring[idx]; 1185 *curr_prod = *curr_cons; 1186 1187 new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) + 1188 curr_prod->page_offset; 1189 1190 rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping)); 1191 rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping)); 1192 1193 rxq->sw_rx_prod++; 1194 } 1195 1196 static inline void 1197 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, 1198 struct qede_dev *qdev, uint8_t count) 1199 { 1200 struct qede_rx_entry *curr_cons; 1201 1202 for (; count > 0; count--) { 1203 curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)]; 1204 qede_reuse_page(qdev, rxq, curr_cons); 1205 qede_rx_bd_ring_consume(rxq); 1206 } 1207 } 1208 1209 static inline void 1210 qede_rx_process_tpa_cmn_cont_end_cqe(__rte_unused struct qede_dev *qdev, 1211 struct qede_rx_queue *rxq, 1212 uint8_t agg_index, uint16_t len) 1213 { 1214 struct qede_agg_info *tpa_info; 1215 struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */ 1216 uint16_t cons_idx; 1217 1218 /* Under certain conditions it is possible that FW may not consume 1219 * additional or new BD. So decision to consume the BD must be made 1220 * based on len_list[0]. 1221 */ 1222 if (rte_le_to_cpu_16(len)) { 1223 tpa_info = &rxq->tpa_info[agg_index]; 1224 cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1225 curr_frag = rxq->sw_rx_ring[cons_idx].mbuf; 1226 assert(curr_frag); 1227 curr_frag->nb_segs = 1; 1228 curr_frag->pkt_len = rte_le_to_cpu_16(len); 1229 curr_frag->data_len = curr_frag->pkt_len; 1230 tpa_info->tpa_tail->next = curr_frag; 1231 tpa_info->tpa_tail = curr_frag; 1232 qede_rx_bd_ring_consume(rxq); 1233 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) { 1234 PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n"); 1235 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; 1236 rxq->rx_alloc_errors++; 1237 } 1238 } 1239 } 1240 1241 static inline void 1242 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev, 1243 struct qede_rx_queue *rxq, 1244 struct eth_fast_path_rx_tpa_cont_cqe *cqe) 1245 { 1246 PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n", 1247 cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0])); 1248 /* only len_list[0] will have value */ 1249 qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index, 1250 cqe->len_list[0]); 1251 } 1252 1253 static inline void 1254 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev, 1255 struct qede_rx_queue *rxq, 1256 struct eth_fast_path_rx_tpa_end_cqe *cqe) 1257 { 1258 struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */ 1259 1260 qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index, 1261 cqe->len_list[0]); 1262 /* Update total length and frags based on end TPA */ 1263 rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head; 1264 /* TODO: Add Sanity Checks */ 1265 rx_mb->nb_segs = cqe->num_of_bds; 1266 rx_mb->pkt_len = cqe->total_packet_len; 1267 1268 PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d" 1269 " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason, 1270 rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs, 1271 rx_mb->pkt_len); 1272 } 1273 1274 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags) 1275 { 1276 uint32_t val; 1277 1278 /* Lookup table */ 1279 static const uint32_t 1280 ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = { 1281 [QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN, 1282 [QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE, 1283 [QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE, 1284 [QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, 1285 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] = 1286 RTE_PTYPE_TUNNEL_GENEVE, 1287 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] = 1288 RTE_PTYPE_TUNNEL_GRE, 1289 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] = 1290 RTE_PTYPE_TUNNEL_VXLAN, 1291 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] = 1292 RTE_PTYPE_TUNNEL_GENEVE, 1293 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] = 1294 RTE_PTYPE_TUNNEL_GRE, 1295 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] = 1296 RTE_PTYPE_TUNNEL_VXLAN, 1297 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] = 1298 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4, 1299 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] = 1300 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4, 1301 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] = 1302 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4, 1303 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] = 1304 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4, 1305 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] = 1306 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4, 1307 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] = 1308 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4, 1309 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] = 1310 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6, 1311 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] = 1312 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6, 1313 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] = 1314 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6, 1315 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] = 1316 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6, 1317 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] = 1318 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6, 1319 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] = 1320 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6, 1321 }; 1322 1323 /* Cover bits[4-0] to include tunn_type and next protocol */ 1324 val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK << 1325 ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) | 1326 (ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK << 1327 ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags; 1328 1329 if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE) 1330 return ptype_tunn_lkup_tbl[val]; 1331 else 1332 return RTE_PTYPE_UNKNOWN; 1333 } 1334 1335 static inline int 1336 qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb, 1337 uint8_t num_segs, uint16_t pkt_len) 1338 { 1339 struct qede_rx_queue *rxq = p_rxq; 1340 struct qede_dev *qdev = rxq->qdev; 1341 register struct rte_mbuf *seg1 = NULL; 1342 register struct rte_mbuf *seg2 = NULL; 1343 uint16_t sw_rx_index; 1344 uint16_t cur_size; 1345 1346 seg1 = rx_mb; 1347 while (num_segs) { 1348 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size : 1349 pkt_len; 1350 if (unlikely(!cur_size)) { 1351 PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs" 1352 " left for mapping jumbo\n", num_segs); 1353 qede_recycle_rx_bd_ring(rxq, qdev, num_segs); 1354 return -EINVAL; 1355 } 1356 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1357 seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf; 1358 qede_rx_bd_ring_consume(rxq); 1359 pkt_len -= cur_size; 1360 seg2->data_len = cur_size; 1361 seg1->next = seg2; 1362 seg1 = seg1->next; 1363 num_segs--; 1364 rxq->rx_segs++; 1365 } 1366 1367 return 0; 1368 } 1369 1370 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1371 static inline void 1372 print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq, 1373 uint8_t bitfield) 1374 { 1375 PMD_RX_LOG(INFO, rxq, 1376 "len 0x%04x bf 0x%04x hash_val 0x%x" 1377 " ol_flags 0x%04lx l2=%s l3=%s l4=%s tunn=%s" 1378 " inner_l2=%s inner_l3=%s inner_l4=%s\n", 1379 m->data_len, bitfield, m->hash.rss, 1380 (unsigned long)m->ol_flags, 1381 rte_get_ptype_l2_name(m->packet_type), 1382 rte_get_ptype_l3_name(m->packet_type), 1383 rte_get_ptype_l4_name(m->packet_type), 1384 rte_get_ptype_tunnel_name(m->packet_type), 1385 rte_get_ptype_inner_l2_name(m->packet_type), 1386 rte_get_ptype_inner_l3_name(m->packet_type), 1387 rte_get_ptype_inner_l4_name(m->packet_type)); 1388 } 1389 #endif 1390 1391 uint16_t 1392 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1393 { 1394 struct qede_rx_queue *rxq = p_rxq; 1395 struct qede_dev *qdev = rxq->qdev; 1396 struct ecore_dev *edev = &qdev->edev; 1397 uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index; 1398 uint16_t rx_pkt = 0; 1399 union eth_rx_cqe *cqe; 1400 struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL; 1401 register struct rte_mbuf *rx_mb = NULL; 1402 register struct rte_mbuf *seg1 = NULL; 1403 enum eth_rx_cqe_type cqe_type; 1404 uint16_t pkt_len = 0; /* Sum of all BD segments */ 1405 uint16_t len; /* Length of first BD */ 1406 uint8_t num_segs = 1; 1407 uint16_t preload_idx; 1408 uint16_t parse_flag; 1409 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1410 uint8_t bitfield_val; 1411 #endif 1412 uint8_t tunn_parse_flag; 1413 struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa; 1414 uint64_t ol_flags; 1415 uint32_t packet_type; 1416 uint16_t vlan_tci; 1417 bool tpa_start_flg; 1418 uint8_t offset, tpa_agg_idx, flags; 1419 struct qede_agg_info *tpa_info = NULL; 1420 uint32_t rss_hash; 1421 int rx_alloc_count = 0; 1422 1423 hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); 1424 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 1425 1426 rte_rmb(); 1427 1428 if (hw_comp_cons == sw_comp_cons) 1429 return 0; 1430 1431 /* Allocate buffers that we used in previous loop */ 1432 if (rxq->rx_alloc_count) { 1433 if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, 1434 rxq->rx_alloc_count))) { 1435 struct rte_eth_dev *dev; 1436 1437 PMD_RX_LOG(ERR, rxq, 1438 "New buffer allocation failed," 1439 "dropping incoming packetn"); 1440 dev = &rte_eth_devices[rxq->port_id]; 1441 dev->data->rx_mbuf_alloc_failed += 1442 rxq->rx_alloc_count; 1443 rxq->rx_alloc_errors += rxq->rx_alloc_count; 1444 return 0; 1445 } 1446 qede_update_rx_prod(qdev, rxq); 1447 rxq->rx_alloc_count = 0; 1448 } 1449 1450 while (sw_comp_cons != hw_comp_cons) { 1451 ol_flags = 0; 1452 packet_type = RTE_PTYPE_UNKNOWN; 1453 vlan_tci = 0; 1454 tpa_start_flg = false; 1455 rss_hash = 0; 1456 1457 /* Get the CQE from the completion ring */ 1458 cqe = 1459 (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 1460 cqe_type = cqe->fast_path_regular.type; 1461 PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type); 1462 1463 switch (cqe_type) { 1464 case ETH_RX_CQE_TYPE_REGULAR: 1465 fp_cqe = &cqe->fast_path_regular; 1466 break; 1467 case ETH_RX_CQE_TYPE_TPA_START: 1468 cqe_start_tpa = &cqe->fast_path_tpa_start; 1469 tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index]; 1470 tpa_start_flg = true; 1471 /* Mark it as LRO packet */ 1472 ol_flags |= PKT_RX_LRO; 1473 /* In split mode, seg_len is same as len_on_first_bd 1474 * and ext_bd_len_list will be empty since there are 1475 * no additional buffers 1476 */ 1477 PMD_RX_LOG(INFO, rxq, 1478 "TPA start[%d] - len_on_first_bd %d header %d" 1479 " [bd_list[0] %d], [seg_len %d]\n", 1480 cqe_start_tpa->tpa_agg_index, 1481 rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd), 1482 cqe_start_tpa->header_len, 1483 rte_le_to_cpu_16(cqe_start_tpa->ext_bd_len_list[0]), 1484 rte_le_to_cpu_16(cqe_start_tpa->seg_len)); 1485 1486 break; 1487 case ETH_RX_CQE_TYPE_TPA_CONT: 1488 qede_rx_process_tpa_cont_cqe(qdev, rxq, 1489 &cqe->fast_path_tpa_cont); 1490 goto next_cqe; 1491 case ETH_RX_CQE_TYPE_TPA_END: 1492 qede_rx_process_tpa_end_cqe(qdev, rxq, 1493 &cqe->fast_path_tpa_end); 1494 tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index; 1495 tpa_info = &rxq->tpa_info[tpa_agg_idx]; 1496 rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head; 1497 goto tpa_end; 1498 case ETH_RX_CQE_TYPE_SLOW_PATH: 1499 PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n"); 1500 ecore_eth_cqe_completion( 1501 &edev->hwfns[rxq->queue_id % edev->num_hwfns], 1502 (struct eth_slow_path_rx_cqe *)cqe); 1503 /* fall-thru */ 1504 default: 1505 goto next_cqe; 1506 } 1507 1508 /* Get the data from the SW ring */ 1509 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1510 rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf; 1511 assert(rx_mb != NULL); 1512 1513 /* Handle regular CQE or TPA start CQE */ 1514 if (!tpa_start_flg) { 1515 parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags); 1516 offset = fp_cqe->placement_offset; 1517 len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd); 1518 pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len); 1519 vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag); 1520 rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash); 1521 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1522 bitfield_val = fp_cqe->bitfields; 1523 #endif 1524 } else { 1525 parse_flag = 1526 rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags); 1527 offset = cqe_start_tpa->placement_offset; 1528 /* seg_len = len_on_first_bd */ 1529 len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd); 1530 vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag); 1531 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1532 bitfield_val = cqe_start_tpa->bitfields; 1533 #endif 1534 rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash); 1535 } 1536 if (qede_tunn_exist(parse_flag)) { 1537 PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n"); 1538 if (unlikely(qede_check_tunn_csum_l4(parse_flag))) { 1539 PMD_RX_LOG(ERR, rxq, 1540 "L4 csum failed, flags = 0x%x\n", 1541 parse_flag); 1542 rxq->rx_hw_errors++; 1543 ol_flags |= PKT_RX_L4_CKSUM_BAD; 1544 } else { 1545 ol_flags |= PKT_RX_L4_CKSUM_GOOD; 1546 } 1547 1548 if (unlikely(qede_check_tunn_csum_l3(parse_flag))) { 1549 PMD_RX_LOG(ERR, rxq, 1550 "Outer L3 csum failed, flags = 0x%x\n", 1551 parse_flag); 1552 rxq->rx_hw_errors++; 1553 ol_flags |= PKT_RX_EIP_CKSUM_BAD; 1554 } else { 1555 ol_flags |= PKT_RX_IP_CKSUM_GOOD; 1556 } 1557 1558 if (tpa_start_flg) 1559 flags = cqe_start_tpa->tunnel_pars_flags.flags; 1560 else 1561 flags = fp_cqe->tunnel_pars_flags.flags; 1562 tunn_parse_flag = flags; 1563 1564 /* Tunnel_type */ 1565 packet_type = 1566 qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag); 1567 1568 /* Inner header */ 1569 packet_type |= 1570 qede_rx_cqe_to_pkt_type_inner(parse_flag); 1571 1572 /* Outer L3/L4 types is not available in CQE */ 1573 packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); 1574 1575 /* Outer L3/L4 types is not available in CQE. 1576 * Need to add offset to parse correctly, 1577 */ 1578 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; 1579 packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); 1580 } else { 1581 packet_type |= qede_rx_cqe_to_pkt_type(parse_flag); 1582 } 1583 1584 /* Common handling for non-tunnel packets and for inner 1585 * headers in the case of tunnel. 1586 */ 1587 if (unlikely(qede_check_notunn_csum_l4(parse_flag))) { 1588 PMD_RX_LOG(ERR, rxq, 1589 "L4 csum failed, flags = 0x%x\n", 1590 parse_flag); 1591 rxq->rx_hw_errors++; 1592 ol_flags |= PKT_RX_L4_CKSUM_BAD; 1593 } else { 1594 ol_flags |= PKT_RX_L4_CKSUM_GOOD; 1595 } 1596 if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) { 1597 PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n", 1598 parse_flag); 1599 rxq->rx_hw_errors++; 1600 ol_flags |= PKT_RX_IP_CKSUM_BAD; 1601 } else { 1602 ol_flags |= PKT_RX_IP_CKSUM_GOOD; 1603 } 1604 1605 if (CQE_HAS_VLAN(parse_flag) || 1606 CQE_HAS_OUTER_VLAN(parse_flag)) { 1607 /* Note: FW doesn't indicate Q-in-Q packet */ 1608 ol_flags |= PKT_RX_VLAN; 1609 if (qdev->vlan_strip_flg) { 1610 ol_flags |= PKT_RX_VLAN_STRIPPED; 1611 rx_mb->vlan_tci = vlan_tci; 1612 } 1613 } 1614 1615 /* RSS Hash */ 1616 if (qdev->rss_enable) { 1617 ol_flags |= PKT_RX_RSS_HASH; 1618 rx_mb->hash.rss = rss_hash; 1619 } 1620 1621 rx_alloc_count++; 1622 qede_rx_bd_ring_consume(rxq); 1623 1624 if (!tpa_start_flg && fp_cqe->bd_num > 1) { 1625 PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs" 1626 " len on first: %04x Total Len: %04x", 1627 fp_cqe->bd_num, len, pkt_len); 1628 num_segs = fp_cqe->bd_num - 1; 1629 seg1 = rx_mb; 1630 if (qede_process_sg_pkts(p_rxq, seg1, num_segs, 1631 pkt_len - len)) 1632 goto next_cqe; 1633 1634 rx_alloc_count += num_segs; 1635 rxq->rx_segs += num_segs; 1636 } 1637 rxq->rx_segs++; /* for the first segment */ 1638 1639 /* Prefetch next mbuf while processing current one. */ 1640 preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1641 rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf); 1642 1643 /* Update rest of the MBUF fields */ 1644 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; 1645 rx_mb->port = rxq->port_id; 1646 rx_mb->ol_flags = ol_flags; 1647 rx_mb->data_len = len; 1648 rx_mb->packet_type = packet_type; 1649 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1650 print_rx_bd_info(rx_mb, rxq, bitfield_val); 1651 #endif 1652 if (!tpa_start_flg) { 1653 rx_mb->nb_segs = fp_cqe->bd_num; 1654 rx_mb->pkt_len = pkt_len; 1655 } else { 1656 /* store ref to the updated mbuf */ 1657 tpa_info->tpa_head = rx_mb; 1658 tpa_info->tpa_tail = tpa_info->tpa_head; 1659 } 1660 rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *)); 1661 tpa_end: 1662 if (!tpa_start_flg) { 1663 rx_pkts[rx_pkt] = rx_mb; 1664 rx_pkt++; 1665 } 1666 next_cqe: 1667 ecore_chain_recycle_consumed(&rxq->rx_comp_ring); 1668 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 1669 if (rx_pkt == nb_pkts) { 1670 PMD_RX_LOG(DEBUG, rxq, 1671 "Budget reached nb_pkts=%u received=%u", 1672 rx_pkt, nb_pkts); 1673 break; 1674 } 1675 } 1676 1677 /* Request number of bufferes to be allocated in next loop */ 1678 rxq->rx_alloc_count = rx_alloc_count; 1679 1680 rxq->rcv_pkts += rx_pkt; 1681 1682 PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id()); 1683 1684 return rx_pkt; 1685 } 1686 1687 1688 /* Populate scatter gather buffer descriptor fields */ 1689 static inline uint16_t 1690 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg, 1691 struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3, 1692 uint16_t start_seg) 1693 { 1694 struct qede_tx_queue *txq = p_txq; 1695 struct eth_tx_bd *tx_bd = NULL; 1696 dma_addr_t mapping; 1697 uint16_t nb_segs = 0; 1698 1699 /* Check for scattered buffers */ 1700 while (m_seg) { 1701 if (start_seg == 0) { 1702 if (!*bd2) { 1703 *bd2 = (struct eth_tx_2nd_bd *) 1704 ecore_chain_produce(&txq->tx_pbl); 1705 memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd)); 1706 nb_segs++; 1707 } 1708 mapping = rte_mbuf_data_iova(m_seg); 1709 QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len); 1710 PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len); 1711 } else if (start_seg == 1) { 1712 if (!*bd3) { 1713 *bd3 = (struct eth_tx_3rd_bd *) 1714 ecore_chain_produce(&txq->tx_pbl); 1715 memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd)); 1716 nb_segs++; 1717 } 1718 mapping = rte_mbuf_data_iova(m_seg); 1719 QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len); 1720 PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len); 1721 } else { 1722 tx_bd = (struct eth_tx_bd *) 1723 ecore_chain_produce(&txq->tx_pbl); 1724 memset(tx_bd, 0, sizeof(*tx_bd)); 1725 nb_segs++; 1726 mapping = rte_mbuf_data_iova(m_seg); 1727 QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len); 1728 PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len); 1729 } 1730 start_seg++; 1731 m_seg = m_seg->next; 1732 } 1733 1734 /* Return total scattered buffers */ 1735 return nb_segs; 1736 } 1737 1738 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1739 static inline void 1740 print_tx_bd_info(struct qede_tx_queue *txq, 1741 struct eth_tx_1st_bd *bd1, 1742 struct eth_tx_2nd_bd *bd2, 1743 struct eth_tx_3rd_bd *bd3, 1744 uint64_t tx_ol_flags) 1745 { 1746 char ol_buf[256] = { 0 }; /* for verbose prints */ 1747 1748 if (bd1) 1749 PMD_TX_LOG(INFO, txq, 1750 "BD1: nbytes=0x%04x nbds=0x%04x bd_flags=0x%04x bf=0x%04x", 1751 rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds, 1752 bd1->data.bd_flags.bitfields, 1753 rte_cpu_to_le_16(bd1->data.bitfields)); 1754 if (bd2) 1755 PMD_TX_LOG(INFO, txq, 1756 "BD2: nbytes=0x%04x bf1=0x%04x bf2=0x%04x tunn_ip=0x%04x\n", 1757 rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1, 1758 bd2->data.bitfields2, bd2->data.tunn_ip_size); 1759 if (bd3) 1760 PMD_TX_LOG(INFO, txq, 1761 "BD3: nbytes=0x%04x bf=0x%04x MSS=0x%04x " 1762 "tunn_l4_hdr_start_offset_w=0x%04x tunn_hdr_size=0x%04x\n", 1763 rte_cpu_to_le_16(bd3->nbytes), 1764 rte_cpu_to_le_16(bd3->data.bitfields), 1765 rte_cpu_to_le_16(bd3->data.lso_mss), 1766 bd3->data.tunn_l4_hdr_start_offset_w, 1767 bd3->data.tunn_hdr_size_w); 1768 1769 rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf)); 1770 PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf); 1771 } 1772 #endif 1773 1774 /* TX prepare to check packets meets TX conditions */ 1775 uint16_t 1776 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1777 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts, 1778 uint16_t nb_pkts) 1779 { 1780 struct qede_tx_queue *txq = p_txq; 1781 #else 1782 qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts, 1783 uint16_t nb_pkts) 1784 { 1785 #endif 1786 uint64_t ol_flags; 1787 struct rte_mbuf *m; 1788 uint16_t i; 1789 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1790 int ret; 1791 #endif 1792 1793 for (i = 0; i < nb_pkts; i++) { 1794 m = tx_pkts[i]; 1795 ol_flags = m->ol_flags; 1796 if (ol_flags & PKT_TX_TCP_SEG) { 1797 if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) { 1798 rte_errno = -EINVAL; 1799 break; 1800 } 1801 /* TBD: confirm its ~9700B for both ? */ 1802 if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) { 1803 rte_errno = -EINVAL; 1804 break; 1805 } 1806 } else { 1807 if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) { 1808 rte_errno = -EINVAL; 1809 break; 1810 } 1811 } 1812 if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) { 1813 /* We support only limited tunnel protocols */ 1814 if (ol_flags & PKT_TX_TUNNEL_MASK) { 1815 uint64_t temp; 1816 1817 temp = ol_flags & PKT_TX_TUNNEL_MASK; 1818 if (temp == PKT_TX_TUNNEL_VXLAN || 1819 temp == PKT_TX_TUNNEL_GENEVE || 1820 temp == PKT_TX_TUNNEL_MPLSINUDP || 1821 temp == PKT_TX_TUNNEL_GRE) 1822 break; 1823 } 1824 1825 rte_errno = -ENOTSUP; 1826 break; 1827 } 1828 1829 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1830 ret = rte_validate_tx_offload(m); 1831 if (ret != 0) { 1832 rte_errno = ret; 1833 break; 1834 } 1835 #endif 1836 } 1837 1838 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1839 if (unlikely(i != nb_pkts)) 1840 PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n", 1841 nb_pkts - i); 1842 #endif 1843 return i; 1844 } 1845 1846 #define MPLSINUDP_HDR_SIZE (12) 1847 1848 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1849 static inline void 1850 qede_mpls_tunn_tx_sanity_check(struct rte_mbuf *mbuf, 1851 struct qede_tx_queue *txq) 1852 { 1853 if (((mbuf->outer_l2_len + mbuf->outer_l3_len) / 2) > 0xff) 1854 PMD_TX_LOG(ERR, txq, "tunn_l4_hdr_start_offset overflow\n"); 1855 if (((mbuf->outer_l2_len + mbuf->outer_l3_len + 1856 MPLSINUDP_HDR_SIZE) / 2) > 0xff) 1857 PMD_TX_LOG(ERR, txq, "tunn_hdr_size overflow\n"); 1858 if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE) / 2) > 1859 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) 1860 PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n"); 1861 if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2) > 1862 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) 1863 PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n"); 1864 } 1865 #endif 1866 1867 uint16_t 1868 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1869 { 1870 struct qede_tx_queue *txq = p_txq; 1871 struct qede_dev *qdev = txq->qdev; 1872 struct ecore_dev *edev = &qdev->edev; 1873 struct rte_mbuf *mbuf; 1874 struct rte_mbuf *m_seg = NULL; 1875 uint16_t nb_tx_pkts; 1876 uint16_t bd_prod; 1877 uint16_t idx; 1878 uint16_t nb_frags; 1879 uint16_t nb_pkt_sent = 0; 1880 uint8_t nbds; 1881 bool lso_flg; 1882 bool mplsoudp_flg; 1883 __rte_unused bool tunn_flg; 1884 bool tunn_ipv6_ext_flg; 1885 struct eth_tx_1st_bd *bd1; 1886 struct eth_tx_2nd_bd *bd2; 1887 struct eth_tx_3rd_bd *bd3; 1888 uint64_t tx_ol_flags; 1889 uint16_t hdr_size; 1890 /* BD1 */ 1891 uint16_t bd1_bf; 1892 uint8_t bd1_bd_flags_bf; 1893 uint16_t vlan; 1894 /* BD2 */ 1895 uint16_t bd2_bf1; 1896 uint16_t bd2_bf2; 1897 /* BD3 */ 1898 uint16_t mss; 1899 uint16_t bd3_bf; 1900 1901 uint8_t tunn_l4_hdr_start_offset; 1902 uint8_t tunn_hdr_size; 1903 uint8_t inner_l2_hdr_size; 1904 uint16_t inner_l4_hdr_offset; 1905 1906 if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) { 1907 PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u", 1908 nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh); 1909 qede_process_tx_compl(edev, txq); 1910 } 1911 1912 nb_tx_pkts = nb_pkts; 1913 bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); 1914 while (nb_tx_pkts--) { 1915 /* Init flags/values */ 1916 tunn_flg = false; 1917 lso_flg = false; 1918 nbds = 0; 1919 vlan = 0; 1920 bd1 = NULL; 1921 bd2 = NULL; 1922 bd3 = NULL; 1923 hdr_size = 0; 1924 bd1_bf = 0; 1925 bd1_bd_flags_bf = 0; 1926 bd2_bf1 = 0; 1927 bd2_bf2 = 0; 1928 mss = 0; 1929 bd3_bf = 0; 1930 mplsoudp_flg = false; 1931 tunn_ipv6_ext_flg = false; 1932 tunn_hdr_size = 0; 1933 tunn_l4_hdr_start_offset = 0; 1934 1935 mbuf = *tx_pkts++; 1936 assert(mbuf); 1937 1938 /* Check minimum TX BDS availability against available BDs */ 1939 if (unlikely(txq->nb_tx_avail < mbuf->nb_segs)) 1940 break; 1941 1942 tx_ol_flags = mbuf->ol_flags; 1943 bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; 1944 1945 /* TX prepare would have already checked supported tunnel Tx 1946 * offloads. Don't rely on pkt_type marked by Rx, instead use 1947 * tx_ol_flags to decide. 1948 */ 1949 tunn_flg = !!(tx_ol_flags & PKT_TX_TUNNEL_MASK); 1950 1951 if (tunn_flg) { 1952 /* Check against max which is Tunnel IPv6 + ext */ 1953 if (unlikely(txq->nb_tx_avail < 1954 ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT)) 1955 break; 1956 1957 /* First indicate its a tunnel pkt */ 1958 bd1_bf |= ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK << 1959 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; 1960 /* Legacy FW had flipped behavior in regard to this bit 1961 * i.e. it needed to set to prevent FW from touching 1962 * encapsulated packets when it didn't need to. 1963 */ 1964 if (unlikely(txq->is_legacy)) { 1965 bd1_bf ^= 1 << 1966 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; 1967 } 1968 1969 /* Outer IP checksum offload */ 1970 if (tx_ol_flags & (PKT_TX_OUTER_IP_CKSUM | 1971 PKT_TX_OUTER_IPV4)) { 1972 bd1_bd_flags_bf |= 1973 ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK << 1974 ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT; 1975 } 1976 1977 /** 1978 * Currently, only inner checksum offload in MPLS-in-UDP 1979 * tunnel with one MPLS label is supported. Both outer 1980 * and inner layers lengths need to be provided in 1981 * mbuf. 1982 */ 1983 if ((tx_ol_flags & PKT_TX_TUNNEL_MASK) == 1984 PKT_TX_TUNNEL_MPLSINUDP) { 1985 mplsoudp_flg = true; 1986 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1987 qede_mpls_tunn_tx_sanity_check(mbuf, txq); 1988 #endif 1989 /* Outer L4 offset in two byte words */ 1990 tunn_l4_hdr_start_offset = 1991 (mbuf->outer_l2_len + mbuf->outer_l3_len) / 2; 1992 /* Tunnel header size in two byte words */ 1993 tunn_hdr_size = (mbuf->outer_l2_len + 1994 mbuf->outer_l3_len + 1995 MPLSINUDP_HDR_SIZE) / 2; 1996 /* Inner L2 header size in two byte words */ 1997 inner_l2_hdr_size = (mbuf->l2_len - 1998 MPLSINUDP_HDR_SIZE) / 2; 1999 /* Inner L4 header offset from the beggining 2000 * of inner packet in two byte words 2001 */ 2002 inner_l4_hdr_offset = (mbuf->l2_len - 2003 MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2; 2004 2005 /* Inner L2 size and address type */ 2006 bd2_bf1 |= (inner_l2_hdr_size & 2007 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) << 2008 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT; 2009 bd2_bf1 |= (UNICAST_ADDRESS & 2010 ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK) << 2011 ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT; 2012 /* Treated as IPv6+Ext */ 2013 bd2_bf1 |= 2014 1 << ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT; 2015 2016 /* Mark inner IPv6 if present */ 2017 if (tx_ol_flags & PKT_TX_IPV6) 2018 bd2_bf1 |= 2019 1 << ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT; 2020 2021 /* Inner L4 offsets */ 2022 if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) && 2023 (tx_ol_flags & (PKT_TX_UDP_CKSUM | 2024 PKT_TX_TCP_CKSUM))) { 2025 /* Determines if BD3 is needed */ 2026 tunn_ipv6_ext_flg = true; 2027 if ((tx_ol_flags & PKT_TX_L4_MASK) == 2028 PKT_TX_UDP_CKSUM) { 2029 bd2_bf1 |= 2030 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT; 2031 } 2032 2033 /* TODO other pseudo checksum modes are 2034 * not supported 2035 */ 2036 bd2_bf1 |= 2037 ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH << 2038 ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT; 2039 bd2_bf2 |= (inner_l4_hdr_offset & 2040 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << 2041 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; 2042 } 2043 } /* End MPLSoUDP */ 2044 } /* End Tunnel handling */ 2045 2046 if (tx_ol_flags & PKT_TX_TCP_SEG) { 2047 lso_flg = true; 2048 if (unlikely(txq->nb_tx_avail < 2049 ETH_TX_MIN_BDS_PER_LSO_PKT)) 2050 break; 2051 /* For LSO, packet header and payload must reside on 2052 * buffers pointed by different BDs. Using BD1 for HDR 2053 * and BD2 onwards for data. 2054 */ 2055 hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; 2056 if (tunn_flg) 2057 hdr_size += mbuf->outer_l2_len + 2058 mbuf->outer_l3_len; 2059 2060 bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT; 2061 bd1_bd_flags_bf |= 2062 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; 2063 /* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */ 2064 bd1_bd_flags_bf |= 2065 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; 2066 mss = rte_cpu_to_le_16(mbuf->tso_segsz); 2067 /* Using one header BD */ 2068 bd3_bf |= rte_cpu_to_le_16(1 << 2069 ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT); 2070 } else { 2071 if (unlikely(txq->nb_tx_avail < 2072 ETH_TX_MIN_BDS_PER_NON_LSO_PKT)) 2073 break; 2074 bd1_bf |= 2075 (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) 2076 << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; 2077 } 2078 2079 /* Descriptor based VLAN insertion */ 2080 if (tx_ol_flags & PKT_TX_VLAN_PKT) { 2081 vlan = rte_cpu_to_le_16(mbuf->vlan_tci); 2082 bd1_bd_flags_bf |= 2083 1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT; 2084 } 2085 2086 /* Offload the IP checksum in the hardware */ 2087 if (tx_ol_flags & PKT_TX_IP_CKSUM) { 2088 bd1_bd_flags_bf |= 2089 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; 2090 /* There's no DPDK flag to request outer-L4 csum 2091 * offload. But in the case of tunnel if inner L3 or L4 2092 * csum offload is requested then we need to force 2093 * recalculation of L4 tunnel header csum also. 2094 */ 2095 if (tunn_flg && ((tx_ol_flags & PKT_TX_TUNNEL_MASK) != 2096 PKT_TX_TUNNEL_GRE)) { 2097 bd1_bd_flags_bf |= 2098 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK << 2099 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT; 2100 } 2101 } 2102 2103 /* L4 checksum offload (tcp or udp) */ 2104 if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) && 2105 (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM))) { 2106 bd1_bd_flags_bf |= 2107 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; 2108 /* There's no DPDK flag to request outer-L4 csum 2109 * offload. But in the case of tunnel if inner L3 or L4 2110 * csum offload is requested then we need to force 2111 * recalculation of L4 tunnel header csum also. 2112 */ 2113 if (tunn_flg) { 2114 bd1_bd_flags_bf |= 2115 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK << 2116 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT; 2117 } 2118 } 2119 2120 /* Fill the entry in the SW ring and the BDs in the FW ring */ 2121 idx = TX_PROD(txq); 2122 txq->sw_tx_ring[idx].mbuf = mbuf; 2123 2124 /* BD1 */ 2125 bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl); 2126 memset(bd1, 0, sizeof(struct eth_tx_1st_bd)); 2127 nbds++; 2128 2129 /* Map MBUF linear data for DMA and set in the BD1 */ 2130 QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf), 2131 mbuf->data_len); 2132 bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf); 2133 bd1->data.bd_flags.bitfields = bd1_bd_flags_bf; 2134 bd1->data.vlan = vlan; 2135 2136 if (lso_flg || mplsoudp_flg) { 2137 bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce 2138 (&txq->tx_pbl); 2139 memset(bd2, 0, sizeof(struct eth_tx_2nd_bd)); 2140 nbds++; 2141 2142 /* BD1 */ 2143 QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf), 2144 hdr_size); 2145 /* BD2 */ 2146 QEDE_BD_SET_ADDR_LEN(bd2, (hdr_size + 2147 rte_mbuf_data_iova(mbuf)), 2148 mbuf->data_len - hdr_size); 2149 bd2->data.bitfields1 = rte_cpu_to_le_16(bd2_bf1); 2150 if (mplsoudp_flg) { 2151 bd2->data.bitfields2 = 2152 rte_cpu_to_le_16(bd2_bf2); 2153 /* Outer L3 size */ 2154 bd2->data.tunn_ip_size = 2155 rte_cpu_to_le_16(mbuf->outer_l3_len); 2156 } 2157 /* BD3 */ 2158 if (lso_flg || (mplsoudp_flg && tunn_ipv6_ext_flg)) { 2159 bd3 = (struct eth_tx_3rd_bd *) 2160 ecore_chain_produce(&txq->tx_pbl); 2161 memset(bd3, 0, sizeof(struct eth_tx_3rd_bd)); 2162 nbds++; 2163 bd3->data.bitfields = rte_cpu_to_le_16(bd3_bf); 2164 if (lso_flg) 2165 bd3->data.lso_mss = mss; 2166 if (mplsoudp_flg) { 2167 bd3->data.tunn_l4_hdr_start_offset_w = 2168 tunn_l4_hdr_start_offset; 2169 bd3->data.tunn_hdr_size_w = 2170 tunn_hdr_size; 2171 } 2172 } 2173 } 2174 2175 /* Handle fragmented MBUF */ 2176 m_seg = mbuf->next; 2177 2178 /* Encode scatter gather buffer descriptors if required */ 2179 nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3, nbds - 1); 2180 bd1->data.nbds = nbds + nb_frags; 2181 2182 txq->nb_tx_avail -= bd1->data.nbds; 2183 txq->sw_tx_prod++; 2184 bd_prod = 2185 rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); 2186 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 2187 print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags); 2188 #endif 2189 nb_pkt_sent++; 2190 txq->xmit_pkts++; 2191 } 2192 2193 /* Write value of prod idx into bd_prod */ 2194 txq->tx_db.data.bd_prod = bd_prod; 2195 rte_wmb(); 2196 rte_compiler_barrier(); 2197 DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw); 2198 rte_wmb(); 2199 2200 /* Check again for Tx completions */ 2201 qede_process_tx_compl(edev, txq); 2202 2203 PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d", 2204 nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id()); 2205 2206 return nb_pkt_sent; 2207 } 2208 2209 uint16_t 2210 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq, 2211 __rte_unused struct rte_mbuf **pkts, 2212 __rte_unused uint16_t nb_pkts) 2213 { 2214 return 0; 2215 } 2216 2217 2218 /* this function does a fake walk through over completion queue 2219 * to calculate number of BDs used by HW. 2220 * At the end, it restores the state of completion queue. 2221 */ 2222 static uint16_t 2223 qede_parse_fp_cqe(struct qede_rx_queue *rxq) 2224 { 2225 uint16_t hw_comp_cons, sw_comp_cons, bd_count = 0; 2226 union eth_rx_cqe *cqe, *orig_cqe = NULL; 2227 2228 hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); 2229 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 2230 2231 if (hw_comp_cons == sw_comp_cons) 2232 return 0; 2233 2234 /* Get the CQE from the completion ring */ 2235 cqe = (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 2236 orig_cqe = cqe; 2237 2238 while (sw_comp_cons != hw_comp_cons) { 2239 switch (cqe->fast_path_regular.type) { 2240 case ETH_RX_CQE_TYPE_REGULAR: 2241 bd_count += cqe->fast_path_regular.bd_num; 2242 break; 2243 case ETH_RX_CQE_TYPE_TPA_END: 2244 bd_count += cqe->fast_path_tpa_end.num_of_bds; 2245 break; 2246 default: 2247 break; 2248 } 2249 2250 cqe = 2251 (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 2252 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 2253 } 2254 2255 /* revert comp_ring to original state */ 2256 ecore_chain_set_cons(&rxq->rx_comp_ring, sw_comp_cons, orig_cqe); 2257 2258 return bd_count; 2259 } 2260 2261 int 2262 qede_rx_descriptor_status(void *p_rxq, uint16_t offset) 2263 { 2264 uint16_t hw_bd_cons, sw_bd_cons, sw_bd_prod; 2265 uint16_t produced, consumed; 2266 struct qede_rx_queue *rxq = p_rxq; 2267 2268 if (offset > rxq->nb_rx_desc) 2269 return -EINVAL; 2270 2271 sw_bd_cons = ecore_chain_get_cons_idx(&rxq->rx_bd_ring); 2272 sw_bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); 2273 2274 /* find BDs used by HW from completion queue elements */ 2275 hw_bd_cons = sw_bd_cons + qede_parse_fp_cqe(rxq); 2276 2277 if (hw_bd_cons < sw_bd_cons) 2278 /* wraparound case */ 2279 consumed = (0xffff - sw_bd_cons) + hw_bd_cons; 2280 else 2281 consumed = hw_bd_cons - sw_bd_cons; 2282 2283 if (offset <= consumed) 2284 return RTE_ETH_RX_DESC_DONE; 2285 2286 if (sw_bd_prod < sw_bd_cons) 2287 /* wraparound case */ 2288 produced = (0xffff - sw_bd_cons) + sw_bd_prod; 2289 else 2290 produced = sw_bd_prod - sw_bd_cons; 2291 2292 if (offset <= produced) 2293 return RTE_ETH_RX_DESC_AVAIL; 2294 2295 return RTE_ETH_RX_DESC_UNAVAIL; 2296 } 2297