1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2016 - 2018 Cavium Inc. 3 * All rights reserved. 4 * www.cavium.com 5 */ 6 7 #include <rte_net.h> 8 #include "qede_rxtx.h" 9 10 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq) 11 { 12 struct rte_mbuf *new_mb = NULL; 13 struct eth_rx_bd *rx_bd; 14 dma_addr_t mapping; 15 uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 16 17 new_mb = rte_mbuf_raw_alloc(rxq->mb_pool); 18 if (unlikely(!new_mb)) { 19 PMD_RX_LOG(ERR, rxq, 20 "Failed to allocate rx buffer " 21 "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", 22 idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), 23 rte_mempool_avail_count(rxq->mb_pool), 24 rte_mempool_in_use_count(rxq->mb_pool)); 25 return -ENOMEM; 26 } 27 rxq->sw_rx_ring[idx].mbuf = new_mb; 28 rxq->sw_rx_ring[idx].page_offset = 0; 29 mapping = rte_mbuf_data_iova_default(new_mb); 30 /* Advance PROD and get BD pointer */ 31 rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring); 32 rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); 33 rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); 34 rxq->sw_rx_prod++; 35 return 0; 36 } 37 38 #define QEDE_MAX_BULK_ALLOC_COUNT 512 39 40 static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count) 41 { 42 void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned; 43 struct rte_mbuf *mbuf = NULL; 44 struct eth_rx_bd *rx_bd; 45 dma_addr_t mapping; 46 int i, ret = 0; 47 uint16_t idx; 48 49 idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 50 51 if (count > QEDE_MAX_BULK_ALLOC_COUNT) 52 count = QEDE_MAX_BULK_ALLOC_COUNT; 53 54 ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count); 55 if (unlikely(ret)) { 56 PMD_RX_LOG(ERR, rxq, 57 "Failed to allocate %d rx buffers " 58 "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", 59 count, idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), 60 rte_mempool_avail_count(rxq->mb_pool), 61 rte_mempool_in_use_count(rxq->mb_pool)); 62 return -ENOMEM; 63 } 64 65 for (i = 0; i < count; i++) { 66 mbuf = obj_p[i]; 67 if (likely(i < count - 1)) 68 rte_prefetch0(obj_p[i + 1]); 69 70 idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 71 rxq->sw_rx_ring[idx].mbuf = mbuf; 72 rxq->sw_rx_ring[idx].page_offset = 0; 73 mapping = rte_mbuf_data_iova_default(mbuf); 74 rx_bd = (struct eth_rx_bd *) 75 ecore_chain_produce(&rxq->rx_bd_ring); 76 rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); 77 rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); 78 rxq->sw_rx_prod++; 79 } 80 81 return 0; 82 } 83 84 /* Criterias for calculating Rx buffer size - 85 * 1) rx_buf_size should not exceed the size of mbuf 86 * 2) In scattered_rx mode - minimum rx_buf_size should be 87 * (MTU + Maximum L2 Header Size + 2) / ETH_RX_MAX_BUFF_PER_PKT 88 * 3) In regular mode - minimum rx_buf_size should be 89 * (MTU + Maximum L2 Header Size + 2) 90 * In above cases +2 corrosponds to 2 bytes padding in front of L2 91 * header. 92 * 4) rx_buf_size should be cacheline-size aligned. So considering 93 * criteria 1, we need to adjust the size to floor instead of ceil, 94 * so that we don't exceed mbuf size while ceiling rx_buf_size. 95 */ 96 int 97 qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz, 98 uint16_t max_frame_size) 99 { 100 struct qede_dev *qdev = QEDE_INIT_QDEV(dev); 101 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 102 int rx_buf_size; 103 104 if (dev->data->scattered_rx) { 105 /* per HW limitation, only ETH_RX_MAX_BUFF_PER_PKT number of 106 * bufferes can be used for single packet. So need to make sure 107 * mbuf size is sufficient enough for this. 108 */ 109 if ((mbufsz * ETH_RX_MAX_BUFF_PER_PKT) < 110 (max_frame_size + QEDE_ETH_OVERHEAD)) { 111 DP_ERR(edev, "mbuf %d size is not enough to hold max fragments (%d) for max rx packet length (%d)\n", 112 mbufsz, ETH_RX_MAX_BUFF_PER_PKT, max_frame_size); 113 return -EINVAL; 114 } 115 116 rx_buf_size = RTE_MAX(mbufsz, 117 (max_frame_size + QEDE_ETH_OVERHEAD) / 118 ETH_RX_MAX_BUFF_PER_PKT); 119 } else { 120 rx_buf_size = max_frame_size + QEDE_ETH_OVERHEAD; 121 } 122 123 /* Align to cache-line size if needed */ 124 return QEDE_FLOOR_TO_CACHE_LINE_SIZE(rx_buf_size); 125 } 126 127 int 128 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 129 uint16_t nb_desc, unsigned int socket_id, 130 __rte_unused const struct rte_eth_rxconf *rx_conf, 131 struct rte_mempool *mp) 132 { 133 struct qede_dev *qdev = QEDE_INIT_QDEV(dev); 134 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 135 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 136 struct qede_rx_queue *rxq; 137 uint16_t max_rx_pkt_len; 138 uint16_t bufsz; 139 size_t size; 140 int rc; 141 142 PMD_INIT_FUNC_TRACE(edev); 143 144 /* Note: Ring size/align is controlled by struct rte_eth_desc_lim */ 145 if (!rte_is_power_of_2(nb_desc)) { 146 DP_ERR(edev, "Ring size %u is not power of 2\n", 147 nb_desc); 148 return -EINVAL; 149 } 150 151 /* Free memory prior to re-allocation if needed... */ 152 if (dev->data->rx_queues[queue_idx] != NULL) { 153 qede_rx_queue_release(dev->data->rx_queues[queue_idx]); 154 dev->data->rx_queues[queue_idx] = NULL; 155 } 156 157 /* First allocate the rx queue data structure */ 158 rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue), 159 RTE_CACHE_LINE_SIZE, socket_id); 160 161 if (!rxq) { 162 DP_ERR(edev, "Unable to allocate memory for rxq on socket %u", 163 socket_id); 164 return -ENOMEM; 165 } 166 167 rxq->qdev = qdev; 168 rxq->mb_pool = mp; 169 rxq->nb_rx_desc = nb_desc; 170 rxq->queue_id = queue_idx; 171 rxq->port_id = dev->data->port_id; 172 173 max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len; 174 175 /* Fix up RX buffer size */ 176 bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 177 /* cache align the mbuf size to simplfy rx_buf_size calculation */ 178 bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz); 179 if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) || 180 (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) { 181 if (!dev->data->scattered_rx) { 182 DP_INFO(edev, "Forcing scatter-gather mode\n"); 183 dev->data->scattered_rx = 1; 184 } 185 } 186 187 rc = qede_calc_rx_buf_size(dev, bufsz, max_rx_pkt_len); 188 if (rc < 0) { 189 rte_free(rxq); 190 return rc; 191 } 192 193 rxq->rx_buf_size = rc; 194 195 DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n", 196 qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx); 197 198 /* Allocate the parallel driver ring for Rx buffers */ 199 size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc; 200 rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size, 201 RTE_CACHE_LINE_SIZE, socket_id); 202 if (!rxq->sw_rx_ring) { 203 DP_ERR(edev, "Memory allocation fails for sw_rx_ring on" 204 " socket %u\n", socket_id); 205 rte_free(rxq); 206 return -ENOMEM; 207 } 208 209 /* Allocate FW Rx ring */ 210 rc = qdev->ops->common->chain_alloc(edev, 211 ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, 212 ECORE_CHAIN_MODE_NEXT_PTR, 213 ECORE_CHAIN_CNT_TYPE_U16, 214 rxq->nb_rx_desc, 215 sizeof(struct eth_rx_bd), 216 &rxq->rx_bd_ring, 217 NULL); 218 219 if (rc != ECORE_SUCCESS) { 220 DP_ERR(edev, "Memory allocation fails for RX BD ring" 221 " on socket %u\n", socket_id); 222 rte_free(rxq->sw_rx_ring); 223 rte_free(rxq); 224 return -ENOMEM; 225 } 226 227 /* Allocate FW completion ring */ 228 rc = qdev->ops->common->chain_alloc(edev, 229 ECORE_CHAIN_USE_TO_CONSUME, 230 ECORE_CHAIN_MODE_PBL, 231 ECORE_CHAIN_CNT_TYPE_U16, 232 rxq->nb_rx_desc, 233 sizeof(union eth_rx_cqe), 234 &rxq->rx_comp_ring, 235 NULL); 236 237 if (rc != ECORE_SUCCESS) { 238 DP_ERR(edev, "Memory allocation fails for RX CQE ring" 239 " on socket %u\n", socket_id); 240 qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); 241 rte_free(rxq->sw_rx_ring); 242 rte_free(rxq); 243 return -ENOMEM; 244 } 245 246 dev->data->rx_queues[queue_idx] = rxq; 247 qdev->fp_array[queue_idx].rxq = rxq; 248 249 DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n", 250 queue_idx, nb_desc, rxq->rx_buf_size, socket_id); 251 252 return 0; 253 } 254 255 static void 256 qede_rx_queue_reset(__rte_unused struct qede_dev *qdev, 257 struct qede_rx_queue *rxq) 258 { 259 DP_INFO(&qdev->edev, "Reset RX queue %u\n", rxq->queue_id); 260 ecore_chain_reset(&rxq->rx_bd_ring); 261 ecore_chain_reset(&rxq->rx_comp_ring); 262 rxq->sw_rx_prod = 0; 263 rxq->sw_rx_cons = 0; 264 *rxq->hw_cons_ptr = 0; 265 } 266 267 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq) 268 { 269 uint16_t i; 270 271 if (rxq->sw_rx_ring) { 272 for (i = 0; i < rxq->nb_rx_desc; i++) { 273 if (rxq->sw_rx_ring[i].mbuf) { 274 rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf); 275 rxq->sw_rx_ring[i].mbuf = NULL; 276 } 277 } 278 } 279 } 280 281 void qede_rx_queue_release(void *rx_queue) 282 { 283 struct qede_rx_queue *rxq = rx_queue; 284 struct qede_dev *qdev; 285 struct ecore_dev *edev; 286 287 if (rxq) { 288 qdev = rxq->qdev; 289 edev = QEDE_INIT_EDEV(qdev); 290 PMD_INIT_FUNC_TRACE(edev); 291 qede_rx_queue_release_mbufs(rxq); 292 qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); 293 qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring); 294 rte_free(rxq->sw_rx_ring); 295 rte_free(rxq); 296 } 297 } 298 299 /* Stops a given RX queue in the HW */ 300 static int qede_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 301 { 302 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 303 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 304 struct ecore_hwfn *p_hwfn; 305 struct qede_rx_queue *rxq; 306 int hwfn_index; 307 int rc; 308 309 if (rx_queue_id < eth_dev->data->nb_rx_queues) { 310 rxq = eth_dev->data->rx_queues[rx_queue_id]; 311 hwfn_index = rx_queue_id % edev->num_hwfns; 312 p_hwfn = &edev->hwfns[hwfn_index]; 313 rc = ecore_eth_rx_queue_stop(p_hwfn, rxq->handle, 314 true, false); 315 if (rc != ECORE_SUCCESS) { 316 DP_ERR(edev, "RX queue %u stop fails\n", rx_queue_id); 317 return -1; 318 } 319 qede_rx_queue_release_mbufs(rxq); 320 qede_rx_queue_reset(qdev, rxq); 321 eth_dev->data->rx_queue_state[rx_queue_id] = 322 RTE_ETH_QUEUE_STATE_STOPPED; 323 DP_INFO(edev, "RX queue %u stopped\n", rx_queue_id); 324 } else { 325 DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id); 326 rc = -EINVAL; 327 } 328 329 return rc; 330 } 331 332 int 333 qede_tx_queue_setup(struct rte_eth_dev *dev, 334 uint16_t queue_idx, 335 uint16_t nb_desc, 336 unsigned int socket_id, 337 const struct rte_eth_txconf *tx_conf) 338 { 339 struct qede_dev *qdev = dev->data->dev_private; 340 struct ecore_dev *edev = &qdev->edev; 341 struct qede_tx_queue *txq; 342 int rc; 343 344 PMD_INIT_FUNC_TRACE(edev); 345 346 if (!rte_is_power_of_2(nb_desc)) { 347 DP_ERR(edev, "Ring size %u is not power of 2\n", 348 nb_desc); 349 return -EINVAL; 350 } 351 352 /* Free memory prior to re-allocation if needed... */ 353 if (dev->data->tx_queues[queue_idx] != NULL) { 354 qede_tx_queue_release(dev->data->tx_queues[queue_idx]); 355 dev->data->tx_queues[queue_idx] = NULL; 356 } 357 358 txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue), 359 RTE_CACHE_LINE_SIZE, socket_id); 360 361 if (txq == NULL) { 362 DP_ERR(edev, 363 "Unable to allocate memory for txq on socket %u", 364 socket_id); 365 return -ENOMEM; 366 } 367 368 txq->nb_tx_desc = nb_desc; 369 txq->qdev = qdev; 370 txq->port_id = dev->data->port_id; 371 372 rc = qdev->ops->common->chain_alloc(edev, 373 ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, 374 ECORE_CHAIN_MODE_PBL, 375 ECORE_CHAIN_CNT_TYPE_U16, 376 txq->nb_tx_desc, 377 sizeof(union eth_tx_bd_types), 378 &txq->tx_pbl, 379 NULL); 380 if (rc != ECORE_SUCCESS) { 381 DP_ERR(edev, 382 "Unable to allocate memory for txbd ring on socket %u", 383 socket_id); 384 qede_tx_queue_release(txq); 385 return -ENOMEM; 386 } 387 388 /* Allocate software ring */ 389 txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring", 390 (sizeof(struct qede_tx_entry) * 391 txq->nb_tx_desc), 392 RTE_CACHE_LINE_SIZE, socket_id); 393 394 if (!txq->sw_tx_ring) { 395 DP_ERR(edev, 396 "Unable to allocate memory for txbd ring on socket %u", 397 socket_id); 398 qdev->ops->common->chain_free(edev, &txq->tx_pbl); 399 qede_tx_queue_release(txq); 400 return -ENOMEM; 401 } 402 403 txq->queue_id = queue_idx; 404 405 txq->nb_tx_avail = txq->nb_tx_desc; 406 407 txq->tx_free_thresh = 408 tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh : 409 (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH); 410 411 dev->data->tx_queues[queue_idx] = txq; 412 qdev->fp_array[queue_idx].txq = txq; 413 414 DP_INFO(edev, 415 "txq %u num_desc %u tx_free_thresh %u socket %u\n", 416 queue_idx, nb_desc, txq->tx_free_thresh, socket_id); 417 418 return 0; 419 } 420 421 static void 422 qede_tx_queue_reset(__rte_unused struct qede_dev *qdev, 423 struct qede_tx_queue *txq) 424 { 425 DP_INFO(&qdev->edev, "Reset TX queue %u\n", txq->queue_id); 426 ecore_chain_reset(&txq->tx_pbl); 427 txq->sw_tx_cons = 0; 428 txq->sw_tx_prod = 0; 429 *txq->hw_cons_ptr = 0; 430 } 431 432 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq) 433 { 434 uint16_t i; 435 436 if (txq->sw_tx_ring) { 437 for (i = 0; i < txq->nb_tx_desc; i++) { 438 if (txq->sw_tx_ring[i].mbuf) { 439 rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf); 440 txq->sw_tx_ring[i].mbuf = NULL; 441 } 442 } 443 } 444 } 445 446 void qede_tx_queue_release(void *tx_queue) 447 { 448 struct qede_tx_queue *txq = tx_queue; 449 struct qede_dev *qdev; 450 struct ecore_dev *edev; 451 452 if (txq) { 453 qdev = txq->qdev; 454 edev = QEDE_INIT_EDEV(qdev); 455 PMD_INIT_FUNC_TRACE(edev); 456 qede_tx_queue_release_mbufs(txq); 457 qdev->ops->common->chain_free(edev, &txq->tx_pbl); 458 rte_free(txq->sw_tx_ring); 459 rte_free(txq); 460 } 461 } 462 463 /* This function allocates fast-path status block memory */ 464 static int 465 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info, 466 uint16_t sb_id) 467 { 468 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 469 struct status_block_e4 *sb_virt; 470 dma_addr_t sb_phys; 471 int rc; 472 473 sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys, 474 sizeof(struct status_block_e4)); 475 if (!sb_virt) { 476 DP_ERR(edev, "Status block allocation failed\n"); 477 return -ENOMEM; 478 } 479 rc = qdev->ops->common->sb_init(edev, sb_info, sb_virt, 480 sb_phys, sb_id); 481 if (rc) { 482 DP_ERR(edev, "Status block initialization failed\n"); 483 OSAL_DMA_FREE_COHERENT(edev, sb_virt, sb_phys, 484 sizeof(struct status_block_e4)); 485 return rc; 486 } 487 488 return 0; 489 } 490 491 int qede_alloc_fp_resc(struct qede_dev *qdev) 492 { 493 struct ecore_dev *edev = &qdev->edev; 494 struct qede_fastpath *fp; 495 uint32_t num_sbs; 496 uint16_t sb_idx; 497 498 if (IS_VF(edev)) 499 ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs); 500 else 501 num_sbs = ecore_cxt_get_proto_cid_count 502 (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL); 503 504 if (num_sbs == 0) { 505 DP_ERR(edev, "No status blocks available\n"); 506 return -EINVAL; 507 } 508 509 qdev->fp_array = rte_calloc("fp", QEDE_RXTX_MAX(qdev), 510 sizeof(*qdev->fp_array), RTE_CACHE_LINE_SIZE); 511 512 if (!qdev->fp_array) { 513 DP_ERR(edev, "fp array allocation failed\n"); 514 return -ENOMEM; 515 } 516 517 memset((void *)qdev->fp_array, 0, QEDE_RXTX_MAX(qdev) * 518 sizeof(*qdev->fp_array)); 519 520 for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) { 521 fp = &qdev->fp_array[sb_idx]; 522 if (!fp) 523 continue; 524 fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info), 525 RTE_CACHE_LINE_SIZE); 526 if (!fp->sb_info) { 527 DP_ERR(edev, "FP sb_info allocation fails\n"); 528 return -1; 529 } 530 if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) { 531 DP_ERR(edev, "FP status block allocation fails\n"); 532 return -1; 533 } 534 DP_INFO(edev, "sb_info idx 0x%x initialized\n", 535 fp->sb_info->igu_sb_id); 536 } 537 538 return 0; 539 } 540 541 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev) 542 { 543 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 544 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 545 struct qede_fastpath *fp; 546 uint16_t sb_idx; 547 uint8_t i; 548 549 PMD_INIT_FUNC_TRACE(edev); 550 551 for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) { 552 fp = &qdev->fp_array[sb_idx]; 553 if (!fp) 554 continue; 555 DP_INFO(edev, "Free sb_info index 0x%x\n", 556 fp->sb_info->igu_sb_id); 557 if (fp->sb_info) { 558 OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt, 559 fp->sb_info->sb_phys, 560 sizeof(struct status_block_e4)); 561 rte_free(fp->sb_info); 562 fp->sb_info = NULL; 563 } 564 } 565 566 /* Free packet buffers and ring memories */ 567 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { 568 if (eth_dev->data->rx_queues[i]) { 569 qede_rx_queue_release(eth_dev->data->rx_queues[i]); 570 eth_dev->data->rx_queues[i] = NULL; 571 } 572 } 573 574 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { 575 if (eth_dev->data->tx_queues[i]) { 576 qede_tx_queue_release(eth_dev->data->tx_queues[i]); 577 eth_dev->data->tx_queues[i] = NULL; 578 } 579 } 580 581 if (qdev->fp_array) 582 rte_free(qdev->fp_array); 583 qdev->fp_array = NULL; 584 } 585 586 static inline void 587 qede_update_rx_prod(__rte_unused struct qede_dev *edev, 588 struct qede_rx_queue *rxq) 589 { 590 uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); 591 uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring); 592 struct eth_rx_prod_data rx_prods = { 0 }; 593 594 /* Update producers */ 595 rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod); 596 rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod); 597 598 /* Make sure that the BD and SGE data is updated before updating the 599 * producers since FW might read the BD/SGE right after the producer 600 * is updated. 601 */ 602 rte_wmb(); 603 604 internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods), 605 (uint32_t *)&rx_prods); 606 607 /* mmiowb is needed to synchronize doorbell writes from more than one 608 * processor. It guarantees that the write arrives to the device before 609 * the napi lock is released and another qede_poll is called (possibly 610 * on another CPU). Without this barrier, the next doorbell can bypass 611 * this doorbell. This is applicable to IA64/Altix systems. 612 */ 613 rte_wmb(); 614 615 PMD_RX_LOG(DEBUG, rxq, "bd_prod %u cqe_prod %u", bd_prod, cqe_prod); 616 } 617 618 /* Starts a given RX queue in HW */ 619 static int 620 qede_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) 621 { 622 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 623 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 624 struct ecore_queue_start_common_params params; 625 struct ecore_rxq_start_ret_params ret_params; 626 struct qede_rx_queue *rxq; 627 struct qede_fastpath *fp; 628 struct ecore_hwfn *p_hwfn; 629 dma_addr_t p_phys_table; 630 uint16_t page_cnt; 631 uint16_t j; 632 int hwfn_index; 633 int rc; 634 635 if (rx_queue_id < eth_dev->data->nb_rx_queues) { 636 fp = &qdev->fp_array[rx_queue_id]; 637 rxq = eth_dev->data->rx_queues[rx_queue_id]; 638 /* Allocate buffers for the Rx ring */ 639 for (j = 0; j < rxq->nb_rx_desc; j++) { 640 rc = qede_alloc_rx_buffer(rxq); 641 if (rc) { 642 DP_ERR(edev, "RX buffer allocation failed" 643 " for rxq = %u\n", rx_queue_id); 644 return -ENOMEM; 645 } 646 } 647 /* disable interrupts */ 648 ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); 649 /* Prepare ramrod */ 650 memset(¶ms, 0, sizeof(params)); 651 params.queue_id = rx_queue_id / edev->num_hwfns; 652 params.vport_id = 0; 653 params.stats_id = params.vport_id; 654 params.p_sb = fp->sb_info; 655 DP_INFO(edev, "rxq %u igu_sb_id 0x%x\n", 656 fp->rxq->queue_id, fp->sb_info->igu_sb_id); 657 params.sb_idx = RX_PI; 658 hwfn_index = rx_queue_id % edev->num_hwfns; 659 p_hwfn = &edev->hwfns[hwfn_index]; 660 p_phys_table = ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring); 661 page_cnt = ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring); 662 memset(&ret_params, 0, sizeof(ret_params)); 663 rc = ecore_eth_rx_queue_start(p_hwfn, 664 p_hwfn->hw_info.opaque_fid, 665 ¶ms, fp->rxq->rx_buf_size, 666 fp->rxq->rx_bd_ring.p_phys_addr, 667 p_phys_table, page_cnt, 668 &ret_params); 669 if (rc) { 670 DP_ERR(edev, "RX queue %u could not be started, rc = %d\n", 671 rx_queue_id, rc); 672 return -1; 673 } 674 /* Update with the returned parameters */ 675 fp->rxq->hw_rxq_prod_addr = ret_params.p_prod; 676 fp->rxq->handle = ret_params.p_handle; 677 678 fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; 679 qede_update_rx_prod(qdev, fp->rxq); 680 eth_dev->data->rx_queue_state[rx_queue_id] = 681 RTE_ETH_QUEUE_STATE_STARTED; 682 DP_INFO(edev, "RX queue %u started\n", rx_queue_id); 683 } else { 684 DP_ERR(edev, "RX queue %u is not in range\n", rx_queue_id); 685 rc = -EINVAL; 686 } 687 688 return rc; 689 } 690 691 static int 692 qede_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) 693 { 694 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 695 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 696 struct ecore_queue_start_common_params params; 697 struct ecore_txq_start_ret_params ret_params; 698 struct ecore_hwfn *p_hwfn; 699 dma_addr_t p_phys_table; 700 struct qede_tx_queue *txq; 701 struct qede_fastpath *fp; 702 uint16_t page_cnt; 703 int hwfn_index; 704 int rc; 705 706 if (tx_queue_id < eth_dev->data->nb_tx_queues) { 707 txq = eth_dev->data->tx_queues[tx_queue_id]; 708 fp = &qdev->fp_array[tx_queue_id]; 709 memset(¶ms, 0, sizeof(params)); 710 params.queue_id = tx_queue_id / edev->num_hwfns; 711 params.vport_id = 0; 712 params.stats_id = params.vport_id; 713 params.p_sb = fp->sb_info; 714 DP_INFO(edev, "txq %u igu_sb_id 0x%x\n", 715 fp->txq->queue_id, fp->sb_info->igu_sb_id); 716 params.sb_idx = TX_PI(0); /* tc = 0 */ 717 p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl); 718 page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl); 719 hwfn_index = tx_queue_id % edev->num_hwfns; 720 p_hwfn = &edev->hwfns[hwfn_index]; 721 if (qdev->dev_info.is_legacy) 722 fp->txq->is_legacy = true; 723 rc = ecore_eth_tx_queue_start(p_hwfn, 724 p_hwfn->hw_info.opaque_fid, 725 ¶ms, 0 /* tc */, 726 p_phys_table, page_cnt, 727 &ret_params); 728 if (rc != ECORE_SUCCESS) { 729 DP_ERR(edev, "TX queue %u couldn't be started, rc=%d\n", 730 tx_queue_id, rc); 731 return -1; 732 } 733 txq->doorbell_addr = ret_params.p_doorbell; 734 txq->handle = ret_params.p_handle; 735 736 txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[TX_PI(0)]; 737 SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, 738 DB_DEST_XCM); 739 SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, 740 DB_AGG_CMD_SET); 741 SET_FIELD(txq->tx_db.data.params, 742 ETH_DB_DATA_AGG_VAL_SEL, 743 DQ_XCM_ETH_TX_BD_PROD_CMD); 744 txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD; 745 eth_dev->data->tx_queue_state[tx_queue_id] = 746 RTE_ETH_QUEUE_STATE_STARTED; 747 DP_INFO(edev, "TX queue %u started\n", tx_queue_id); 748 } else { 749 DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id); 750 rc = -EINVAL; 751 } 752 753 return rc; 754 } 755 756 static inline void 757 qede_free_tx_pkt(struct qede_tx_queue *txq) 758 { 759 struct rte_mbuf *mbuf; 760 uint16_t nb_segs; 761 uint16_t idx; 762 763 idx = TX_CONS(txq); 764 mbuf = txq->sw_tx_ring[idx].mbuf; 765 if (mbuf) { 766 nb_segs = mbuf->nb_segs; 767 PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs); 768 while (nb_segs) { 769 /* It's like consuming rxbuf in recv() */ 770 ecore_chain_consume(&txq->tx_pbl); 771 txq->nb_tx_avail++; 772 nb_segs--; 773 } 774 rte_pktmbuf_free(mbuf); 775 txq->sw_tx_ring[idx].mbuf = NULL; 776 txq->sw_tx_cons++; 777 PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n"); 778 } else { 779 ecore_chain_consume(&txq->tx_pbl); 780 txq->nb_tx_avail++; 781 } 782 } 783 784 static inline void 785 qede_process_tx_compl(__rte_unused struct ecore_dev *edev, 786 struct qede_tx_queue *txq) 787 { 788 uint16_t hw_bd_cons; 789 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 790 uint16_t sw_tx_cons; 791 #endif 792 793 rte_compiler_barrier(); 794 hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr); 795 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 796 sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl); 797 PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n", 798 abs(hw_bd_cons - sw_tx_cons)); 799 #endif 800 while (hw_bd_cons != ecore_chain_get_cons_idx(&txq->tx_pbl)) 801 qede_free_tx_pkt(txq); 802 } 803 804 static int qede_drain_txq(struct qede_dev *qdev, 805 struct qede_tx_queue *txq, bool allow_drain) 806 { 807 struct ecore_dev *edev = &qdev->edev; 808 int rc, cnt = 1000; 809 810 while (txq->sw_tx_cons != txq->sw_tx_prod) { 811 qede_process_tx_compl(edev, txq); 812 if (!cnt) { 813 if (allow_drain) { 814 DP_ERR(edev, "Tx queue[%u] is stuck," 815 "requesting MCP to drain\n", 816 txq->queue_id); 817 rc = qdev->ops->common->drain(edev); 818 if (rc) 819 return rc; 820 return qede_drain_txq(qdev, txq, false); 821 } 822 DP_ERR(edev, "Timeout waiting for tx queue[%d]:" 823 "PROD=%d, CONS=%d\n", 824 txq->queue_id, txq->sw_tx_prod, 825 txq->sw_tx_cons); 826 return -1; 827 } 828 cnt--; 829 DELAY(1000); 830 rte_compiler_barrier(); 831 } 832 833 /* FW finished processing, wait for HW to transmit all tx packets */ 834 DELAY(2000); 835 836 return 0; 837 } 838 839 /* Stops a given TX queue in the HW */ 840 static int qede_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) 841 { 842 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 843 struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); 844 struct ecore_hwfn *p_hwfn; 845 struct qede_tx_queue *txq; 846 int hwfn_index; 847 int rc; 848 849 if (tx_queue_id < eth_dev->data->nb_tx_queues) { 850 txq = eth_dev->data->tx_queues[tx_queue_id]; 851 /* Drain txq */ 852 if (qede_drain_txq(qdev, txq, true)) 853 return -1; /* For the lack of retcodes */ 854 /* Stop txq */ 855 hwfn_index = tx_queue_id % edev->num_hwfns; 856 p_hwfn = &edev->hwfns[hwfn_index]; 857 rc = ecore_eth_tx_queue_stop(p_hwfn, txq->handle); 858 if (rc != ECORE_SUCCESS) { 859 DP_ERR(edev, "TX queue %u stop fails\n", tx_queue_id); 860 return -1; 861 } 862 qede_tx_queue_release_mbufs(txq); 863 qede_tx_queue_reset(qdev, txq); 864 eth_dev->data->tx_queue_state[tx_queue_id] = 865 RTE_ETH_QUEUE_STATE_STOPPED; 866 DP_INFO(edev, "TX queue %u stopped\n", tx_queue_id); 867 } else { 868 DP_ERR(edev, "TX queue %u is not in range\n", tx_queue_id); 869 rc = -EINVAL; 870 } 871 872 return rc; 873 } 874 875 int qede_start_queues(struct rte_eth_dev *eth_dev) 876 { 877 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 878 uint8_t id; 879 int rc = -1; 880 881 for_each_rss(id) { 882 rc = qede_rx_queue_start(eth_dev, id); 883 if (rc != ECORE_SUCCESS) 884 return -1; 885 } 886 887 for_each_tss(id) { 888 rc = qede_tx_queue_start(eth_dev, id); 889 if (rc != ECORE_SUCCESS) 890 return -1; 891 } 892 893 return rc; 894 } 895 896 void qede_stop_queues(struct rte_eth_dev *eth_dev) 897 { 898 struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); 899 uint8_t id; 900 901 /* Stopping RX/TX queues */ 902 for_each_tss(id) { 903 qede_tx_queue_stop(eth_dev, id); 904 } 905 906 for_each_rss(id) { 907 qede_rx_queue_stop(eth_dev, id); 908 } 909 } 910 911 static inline bool qede_tunn_exist(uint16_t flag) 912 { 913 return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK << 914 PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag); 915 } 916 917 static inline uint8_t qede_check_tunn_csum_l3(uint16_t flag) 918 { 919 return !!((PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK << 920 PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT) & flag); 921 } 922 923 /* 924 * qede_check_tunn_csum_l4: 925 * Returns: 926 * 1 : If L4 csum is enabled AND if the validation has failed. 927 * 0 : Otherwise 928 */ 929 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag) 930 { 931 if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK << 932 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag) 933 return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK << 934 PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag); 935 936 return 0; 937 } 938 939 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag) 940 { 941 if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK << 942 PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag) 943 return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK << 944 PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag); 945 946 return 0; 947 } 948 949 /* Returns outer L2, L3 and L4 packet_type for tunneled packets */ 950 static inline uint32_t qede_rx_cqe_to_pkt_type_outer(struct rte_mbuf *m) 951 { 952 uint32_t packet_type = RTE_PTYPE_UNKNOWN; 953 struct rte_ether_hdr *eth_hdr; 954 struct rte_ipv4_hdr *ipv4_hdr; 955 struct rte_ipv6_hdr *ipv6_hdr; 956 struct rte_vlan_hdr *vlan_hdr; 957 uint16_t ethertype; 958 bool vlan_tagged = 0; 959 uint16_t len; 960 961 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 962 len = sizeof(struct rte_ether_hdr); 963 ethertype = rte_cpu_to_be_16(eth_hdr->ether_type); 964 965 /* Note: Valid only if VLAN stripping is disabled */ 966 if (ethertype == RTE_ETHER_TYPE_VLAN) { 967 vlan_tagged = 1; 968 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1); 969 len += sizeof(struct rte_vlan_hdr); 970 ethertype = rte_cpu_to_be_16(vlan_hdr->eth_proto); 971 } 972 973 if (ethertype == RTE_ETHER_TYPE_IPv4) { 974 packet_type |= RTE_PTYPE_L3_IPV4; 975 ipv4_hdr = rte_pktmbuf_mtod_offset(m, 976 struct rte_ipv4_hdr *, len); 977 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) 978 packet_type |= RTE_PTYPE_L4_TCP; 979 else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) 980 packet_type |= RTE_PTYPE_L4_UDP; 981 } else if (ethertype == RTE_ETHER_TYPE_IPv6) { 982 packet_type |= RTE_PTYPE_L3_IPV6; 983 ipv6_hdr = rte_pktmbuf_mtod_offset(m, 984 struct rte_ipv6_hdr *, len); 985 if (ipv6_hdr->proto == IPPROTO_TCP) 986 packet_type |= RTE_PTYPE_L4_TCP; 987 else if (ipv6_hdr->proto == IPPROTO_UDP) 988 packet_type |= RTE_PTYPE_L4_UDP; 989 } 990 991 if (vlan_tagged) 992 packet_type |= RTE_PTYPE_L2_ETHER_VLAN; 993 else 994 packet_type |= RTE_PTYPE_L2_ETHER; 995 996 return packet_type; 997 } 998 999 static inline uint32_t qede_rx_cqe_to_pkt_type_inner(uint16_t flags) 1000 { 1001 uint16_t val; 1002 1003 /* Lookup table */ 1004 static const uint32_t 1005 ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = { 1006 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_INNER_L3_IPV4 | 1007 RTE_PTYPE_INNER_L2_ETHER, 1008 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_INNER_L3_IPV6 | 1009 RTE_PTYPE_INNER_L2_ETHER, 1010 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_INNER_L3_IPV4 | 1011 RTE_PTYPE_INNER_L4_TCP | 1012 RTE_PTYPE_INNER_L2_ETHER, 1013 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_INNER_L3_IPV6 | 1014 RTE_PTYPE_INNER_L4_TCP | 1015 RTE_PTYPE_INNER_L2_ETHER, 1016 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_INNER_L3_IPV4 | 1017 RTE_PTYPE_INNER_L4_UDP | 1018 RTE_PTYPE_INNER_L2_ETHER, 1019 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_INNER_L3_IPV6 | 1020 RTE_PTYPE_INNER_L4_UDP | 1021 RTE_PTYPE_INNER_L2_ETHER, 1022 /* Frags with no VLAN */ 1023 [QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_INNER_L3_IPV4 | 1024 RTE_PTYPE_INNER_L4_FRAG | 1025 RTE_PTYPE_INNER_L2_ETHER, 1026 [QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_INNER_L3_IPV6 | 1027 RTE_PTYPE_INNER_L4_FRAG | 1028 RTE_PTYPE_INNER_L2_ETHER, 1029 /* VLANs */ 1030 [QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1031 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1032 [QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1033 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1034 [QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1035 RTE_PTYPE_INNER_L4_TCP | 1036 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1037 [QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1038 RTE_PTYPE_INNER_L4_TCP | 1039 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1040 [QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV4 | 1041 RTE_PTYPE_INNER_L4_UDP | 1042 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1043 [QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV6 | 1044 RTE_PTYPE_INNER_L4_UDP | 1045 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1046 /* Frags with VLAN */ 1047 [QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV4 | 1048 RTE_PTYPE_INNER_L4_FRAG | 1049 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1050 [QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV6 | 1051 RTE_PTYPE_INNER_L4_FRAG | 1052 RTE_PTYPE_INNER_L2_ETHER_VLAN, 1053 }; 1054 1055 /* Bits (0..3) provides L3/L4 protocol type */ 1056 /* Bits (4,5) provides frag and VLAN info */ 1057 val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK << 1058 PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) | 1059 (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK << 1060 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) | 1061 (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK << 1062 PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) | 1063 (PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK << 1064 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags; 1065 1066 if (val < QEDE_PKT_TYPE_MAX) 1067 return ptype_lkup_tbl[val]; 1068 1069 return RTE_PTYPE_UNKNOWN; 1070 } 1071 1072 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags) 1073 { 1074 uint16_t val; 1075 1076 /* Lookup table */ 1077 static const uint32_t 1078 ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = { 1079 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER, 1080 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER, 1081 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4 | 1082 RTE_PTYPE_L4_TCP | 1083 RTE_PTYPE_L2_ETHER, 1084 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6 | 1085 RTE_PTYPE_L4_TCP | 1086 RTE_PTYPE_L2_ETHER, 1087 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4 | 1088 RTE_PTYPE_L4_UDP | 1089 RTE_PTYPE_L2_ETHER, 1090 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6 | 1091 RTE_PTYPE_L4_UDP | 1092 RTE_PTYPE_L2_ETHER, 1093 /* Frags with no VLAN */ 1094 [QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_L3_IPV4 | 1095 RTE_PTYPE_L4_FRAG | 1096 RTE_PTYPE_L2_ETHER, 1097 [QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_L3_IPV6 | 1098 RTE_PTYPE_L4_FRAG | 1099 RTE_PTYPE_L2_ETHER, 1100 /* VLANs */ 1101 [QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_L3_IPV4 | 1102 RTE_PTYPE_L2_ETHER_VLAN, 1103 [QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_L3_IPV6 | 1104 RTE_PTYPE_L2_ETHER_VLAN, 1105 [QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_L3_IPV4 | 1106 RTE_PTYPE_L4_TCP | 1107 RTE_PTYPE_L2_ETHER_VLAN, 1108 [QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_L3_IPV6 | 1109 RTE_PTYPE_L4_TCP | 1110 RTE_PTYPE_L2_ETHER_VLAN, 1111 [QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_L3_IPV4 | 1112 RTE_PTYPE_L4_UDP | 1113 RTE_PTYPE_L2_ETHER_VLAN, 1114 [QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_L3_IPV6 | 1115 RTE_PTYPE_L4_UDP | 1116 RTE_PTYPE_L2_ETHER_VLAN, 1117 /* Frags with VLAN */ 1118 [QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_L3_IPV4 | 1119 RTE_PTYPE_L4_FRAG | 1120 RTE_PTYPE_L2_ETHER_VLAN, 1121 [QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_L3_IPV6 | 1122 RTE_PTYPE_L4_FRAG | 1123 RTE_PTYPE_L2_ETHER_VLAN, 1124 }; 1125 1126 /* Bits (0..3) provides L3/L4 protocol type */ 1127 /* Bits (4,5) provides frag and VLAN info */ 1128 val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK << 1129 PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) | 1130 (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK << 1131 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) | 1132 (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK << 1133 PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) | 1134 (PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK << 1135 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags; 1136 1137 if (val < QEDE_PKT_TYPE_MAX) 1138 return ptype_lkup_tbl[val]; 1139 1140 return RTE_PTYPE_UNKNOWN; 1141 } 1142 1143 static inline uint8_t 1144 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag) 1145 { 1146 struct rte_ipv4_hdr *ip; 1147 uint16_t pkt_csum; 1148 uint16_t calc_csum; 1149 uint16_t val; 1150 1151 val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK << 1152 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag); 1153 1154 if (unlikely(val)) { 1155 m->packet_type = qede_rx_cqe_to_pkt_type(flag); 1156 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { 1157 ip = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 1158 sizeof(struct rte_ether_hdr)); 1159 pkt_csum = ip->hdr_checksum; 1160 ip->hdr_checksum = 0; 1161 calc_csum = rte_ipv4_cksum(ip); 1162 ip->hdr_checksum = pkt_csum; 1163 return (calc_csum != pkt_csum); 1164 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { 1165 return 1; 1166 } 1167 } 1168 return 0; 1169 } 1170 1171 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq) 1172 { 1173 ecore_chain_consume(&rxq->rx_bd_ring); 1174 rxq->sw_rx_cons++; 1175 } 1176 1177 static inline void 1178 qede_reuse_page(__rte_unused struct qede_dev *qdev, 1179 struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons) 1180 { 1181 struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring); 1182 uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); 1183 struct qede_rx_entry *curr_prod; 1184 dma_addr_t new_mapping; 1185 1186 curr_prod = &rxq->sw_rx_ring[idx]; 1187 *curr_prod = *curr_cons; 1188 1189 new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) + 1190 curr_prod->page_offset; 1191 1192 rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping)); 1193 rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping)); 1194 1195 rxq->sw_rx_prod++; 1196 } 1197 1198 static inline void 1199 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, 1200 struct qede_dev *qdev, uint8_t count) 1201 { 1202 struct qede_rx_entry *curr_cons; 1203 1204 for (; count > 0; count--) { 1205 curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)]; 1206 qede_reuse_page(qdev, rxq, curr_cons); 1207 qede_rx_bd_ring_consume(rxq); 1208 } 1209 } 1210 1211 static inline void 1212 qede_rx_process_tpa_cmn_cont_end_cqe(__rte_unused struct qede_dev *qdev, 1213 struct qede_rx_queue *rxq, 1214 uint8_t agg_index, uint16_t len) 1215 { 1216 struct qede_agg_info *tpa_info; 1217 struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */ 1218 uint16_t cons_idx; 1219 1220 /* Under certain conditions it is possible that FW may not consume 1221 * additional or new BD. So decision to consume the BD must be made 1222 * based on len_list[0]. 1223 */ 1224 if (rte_le_to_cpu_16(len)) { 1225 tpa_info = &rxq->tpa_info[agg_index]; 1226 cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1227 curr_frag = rxq->sw_rx_ring[cons_idx].mbuf; 1228 assert(curr_frag); 1229 curr_frag->nb_segs = 1; 1230 curr_frag->pkt_len = rte_le_to_cpu_16(len); 1231 curr_frag->data_len = curr_frag->pkt_len; 1232 tpa_info->tpa_tail->next = curr_frag; 1233 tpa_info->tpa_tail = curr_frag; 1234 qede_rx_bd_ring_consume(rxq); 1235 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) { 1236 PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n"); 1237 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; 1238 rxq->rx_alloc_errors++; 1239 } 1240 } 1241 } 1242 1243 static inline void 1244 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev, 1245 struct qede_rx_queue *rxq, 1246 struct eth_fast_path_rx_tpa_cont_cqe *cqe) 1247 { 1248 PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n", 1249 cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0])); 1250 /* only len_list[0] will have value */ 1251 qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index, 1252 cqe->len_list[0]); 1253 } 1254 1255 static inline void 1256 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev, 1257 struct qede_rx_queue *rxq, 1258 struct eth_fast_path_rx_tpa_end_cqe *cqe) 1259 { 1260 struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */ 1261 1262 qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index, 1263 cqe->len_list[0]); 1264 /* Update total length and frags based on end TPA */ 1265 rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head; 1266 /* TODO: Add Sanity Checks */ 1267 rx_mb->nb_segs = cqe->num_of_bds; 1268 rx_mb->pkt_len = cqe->total_packet_len; 1269 1270 PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d" 1271 " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason, 1272 rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs, 1273 rx_mb->pkt_len); 1274 } 1275 1276 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags) 1277 { 1278 uint32_t val; 1279 1280 /* Lookup table */ 1281 static const uint32_t 1282 ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = { 1283 [QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN, 1284 [QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE, 1285 [QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE, 1286 [QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, 1287 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] = 1288 RTE_PTYPE_TUNNEL_GENEVE, 1289 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] = 1290 RTE_PTYPE_TUNNEL_GRE, 1291 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] = 1292 RTE_PTYPE_TUNNEL_VXLAN, 1293 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] = 1294 RTE_PTYPE_TUNNEL_GENEVE, 1295 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] = 1296 RTE_PTYPE_TUNNEL_GRE, 1297 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] = 1298 RTE_PTYPE_TUNNEL_VXLAN, 1299 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] = 1300 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4, 1301 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] = 1302 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4, 1303 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] = 1304 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4, 1305 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] = 1306 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4, 1307 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] = 1308 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4, 1309 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] = 1310 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4, 1311 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] = 1312 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6, 1313 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] = 1314 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6, 1315 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] = 1316 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6, 1317 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] = 1318 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6, 1319 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] = 1320 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6, 1321 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] = 1322 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6, 1323 }; 1324 1325 /* Cover bits[4-0] to include tunn_type and next protocol */ 1326 val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK << 1327 ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) | 1328 (ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK << 1329 ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags; 1330 1331 if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE) 1332 return ptype_tunn_lkup_tbl[val]; 1333 else 1334 return RTE_PTYPE_UNKNOWN; 1335 } 1336 1337 static inline int 1338 qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb, 1339 uint8_t num_segs, uint16_t pkt_len) 1340 { 1341 struct qede_rx_queue *rxq = p_rxq; 1342 struct qede_dev *qdev = rxq->qdev; 1343 register struct rte_mbuf *seg1 = NULL; 1344 register struct rte_mbuf *seg2 = NULL; 1345 uint16_t sw_rx_index; 1346 uint16_t cur_size; 1347 1348 seg1 = rx_mb; 1349 while (num_segs) { 1350 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size : 1351 pkt_len; 1352 if (unlikely(!cur_size)) { 1353 PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs" 1354 " left for mapping jumbo\n", num_segs); 1355 qede_recycle_rx_bd_ring(rxq, qdev, num_segs); 1356 return -EINVAL; 1357 } 1358 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1359 seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf; 1360 qede_rx_bd_ring_consume(rxq); 1361 pkt_len -= cur_size; 1362 seg2->data_len = cur_size; 1363 seg1->next = seg2; 1364 seg1 = seg1->next; 1365 num_segs--; 1366 rxq->rx_segs++; 1367 } 1368 1369 return 0; 1370 } 1371 1372 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1373 static inline void 1374 print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq, 1375 uint8_t bitfield) 1376 { 1377 PMD_RX_LOG(INFO, rxq, 1378 "len 0x%04x bf 0x%04x hash_val 0x%x" 1379 " ol_flags 0x%04lx l2=%s l3=%s l4=%s tunn=%s" 1380 " inner_l2=%s inner_l3=%s inner_l4=%s\n", 1381 m->data_len, bitfield, m->hash.rss, 1382 (unsigned long)m->ol_flags, 1383 rte_get_ptype_l2_name(m->packet_type), 1384 rte_get_ptype_l3_name(m->packet_type), 1385 rte_get_ptype_l4_name(m->packet_type), 1386 rte_get_ptype_tunnel_name(m->packet_type), 1387 rte_get_ptype_inner_l2_name(m->packet_type), 1388 rte_get_ptype_inner_l3_name(m->packet_type), 1389 rte_get_ptype_inner_l4_name(m->packet_type)); 1390 } 1391 #endif 1392 1393 uint16_t 1394 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 1395 { 1396 struct qede_rx_queue *rxq = p_rxq; 1397 struct qede_dev *qdev = rxq->qdev; 1398 struct ecore_dev *edev = &qdev->edev; 1399 uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index; 1400 uint16_t rx_pkt = 0; 1401 union eth_rx_cqe *cqe; 1402 struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL; 1403 register struct rte_mbuf *rx_mb = NULL; 1404 register struct rte_mbuf *seg1 = NULL; 1405 enum eth_rx_cqe_type cqe_type; 1406 uint16_t pkt_len = 0; /* Sum of all BD segments */ 1407 uint16_t len; /* Length of first BD */ 1408 uint8_t num_segs = 1; 1409 uint16_t preload_idx; 1410 uint16_t parse_flag; 1411 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1412 uint8_t bitfield_val; 1413 #endif 1414 uint8_t tunn_parse_flag; 1415 struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa; 1416 uint64_t ol_flags; 1417 uint32_t packet_type; 1418 uint16_t vlan_tci; 1419 bool tpa_start_flg; 1420 uint8_t offset, tpa_agg_idx, flags; 1421 struct qede_agg_info *tpa_info = NULL; 1422 uint32_t rss_hash; 1423 int rx_alloc_count = 0; 1424 1425 1426 /* Allocate buffers that we used in previous loop */ 1427 if (rxq->rx_alloc_count) { 1428 if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, 1429 rxq->rx_alloc_count))) { 1430 struct rte_eth_dev *dev; 1431 1432 PMD_RX_LOG(ERR, rxq, 1433 "New buffer allocation failed," 1434 "dropping incoming packetn"); 1435 dev = &rte_eth_devices[rxq->port_id]; 1436 dev->data->rx_mbuf_alloc_failed += 1437 rxq->rx_alloc_count; 1438 rxq->rx_alloc_errors += rxq->rx_alloc_count; 1439 return 0; 1440 } 1441 qede_update_rx_prod(qdev, rxq); 1442 rxq->rx_alloc_count = 0; 1443 } 1444 1445 hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); 1446 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 1447 1448 rte_rmb(); 1449 1450 if (hw_comp_cons == sw_comp_cons) 1451 return 0; 1452 1453 while (sw_comp_cons != hw_comp_cons) { 1454 ol_flags = 0; 1455 packet_type = RTE_PTYPE_UNKNOWN; 1456 vlan_tci = 0; 1457 tpa_start_flg = false; 1458 rss_hash = 0; 1459 1460 /* Get the CQE from the completion ring */ 1461 cqe = 1462 (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 1463 cqe_type = cqe->fast_path_regular.type; 1464 PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type); 1465 1466 switch (cqe_type) { 1467 case ETH_RX_CQE_TYPE_REGULAR: 1468 fp_cqe = &cqe->fast_path_regular; 1469 break; 1470 case ETH_RX_CQE_TYPE_TPA_START: 1471 cqe_start_tpa = &cqe->fast_path_tpa_start; 1472 tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index]; 1473 tpa_start_flg = true; 1474 /* Mark it as LRO packet */ 1475 ol_flags |= PKT_RX_LRO; 1476 /* In split mode, seg_len is same as len_on_first_bd 1477 * and ext_bd_len_list will be empty since there are 1478 * no additional buffers 1479 */ 1480 PMD_RX_LOG(INFO, rxq, 1481 "TPA start[%d] - len_on_first_bd %d header %d" 1482 " [bd_list[0] %d], [seg_len %d]\n", 1483 cqe_start_tpa->tpa_agg_index, 1484 rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd), 1485 cqe_start_tpa->header_len, 1486 rte_le_to_cpu_16(cqe_start_tpa->ext_bd_len_list[0]), 1487 rte_le_to_cpu_16(cqe_start_tpa->seg_len)); 1488 1489 break; 1490 case ETH_RX_CQE_TYPE_TPA_CONT: 1491 qede_rx_process_tpa_cont_cqe(qdev, rxq, 1492 &cqe->fast_path_tpa_cont); 1493 goto next_cqe; 1494 case ETH_RX_CQE_TYPE_TPA_END: 1495 qede_rx_process_tpa_end_cqe(qdev, rxq, 1496 &cqe->fast_path_tpa_end); 1497 tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index; 1498 tpa_info = &rxq->tpa_info[tpa_agg_idx]; 1499 rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head; 1500 goto tpa_end; 1501 case ETH_RX_CQE_TYPE_SLOW_PATH: 1502 PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n"); 1503 ecore_eth_cqe_completion( 1504 &edev->hwfns[rxq->queue_id % edev->num_hwfns], 1505 (struct eth_slow_path_rx_cqe *)cqe); 1506 /* fall-thru */ 1507 default: 1508 goto next_cqe; 1509 } 1510 1511 /* Get the data from the SW ring */ 1512 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1513 rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf; 1514 assert(rx_mb != NULL); 1515 1516 /* Handle regular CQE or TPA start CQE */ 1517 if (!tpa_start_flg) { 1518 parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags); 1519 offset = fp_cqe->placement_offset; 1520 len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd); 1521 pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len); 1522 vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag); 1523 rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash); 1524 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1525 bitfield_val = fp_cqe->bitfields; 1526 #endif 1527 } else { 1528 parse_flag = 1529 rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags); 1530 offset = cqe_start_tpa->placement_offset; 1531 /* seg_len = len_on_first_bd */ 1532 len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd); 1533 vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag); 1534 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1535 bitfield_val = cqe_start_tpa->bitfields; 1536 #endif 1537 rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash); 1538 } 1539 if (qede_tunn_exist(parse_flag)) { 1540 PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n"); 1541 if (unlikely(qede_check_tunn_csum_l4(parse_flag))) { 1542 PMD_RX_LOG(ERR, rxq, 1543 "L4 csum failed, flags = 0x%x\n", 1544 parse_flag); 1545 rxq->rx_hw_errors++; 1546 ol_flags |= PKT_RX_L4_CKSUM_BAD; 1547 } else { 1548 ol_flags |= PKT_RX_L4_CKSUM_GOOD; 1549 } 1550 1551 if (unlikely(qede_check_tunn_csum_l3(parse_flag))) { 1552 PMD_RX_LOG(ERR, rxq, 1553 "Outer L3 csum failed, flags = 0x%x\n", 1554 parse_flag); 1555 rxq->rx_hw_errors++; 1556 ol_flags |= PKT_RX_EIP_CKSUM_BAD; 1557 } else { 1558 ol_flags |= PKT_RX_IP_CKSUM_GOOD; 1559 } 1560 1561 if (tpa_start_flg) 1562 flags = cqe_start_tpa->tunnel_pars_flags.flags; 1563 else 1564 flags = fp_cqe->tunnel_pars_flags.flags; 1565 tunn_parse_flag = flags; 1566 1567 /* Tunnel_type */ 1568 packet_type = 1569 qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag); 1570 1571 /* Inner header */ 1572 packet_type |= 1573 qede_rx_cqe_to_pkt_type_inner(parse_flag); 1574 1575 /* Outer L3/L4 types is not available in CQE */ 1576 packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); 1577 1578 /* Outer L3/L4 types is not available in CQE. 1579 * Need to add offset to parse correctly, 1580 */ 1581 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; 1582 packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb); 1583 } else { 1584 packet_type |= qede_rx_cqe_to_pkt_type(parse_flag); 1585 } 1586 1587 /* Common handling for non-tunnel packets and for inner 1588 * headers in the case of tunnel. 1589 */ 1590 if (unlikely(qede_check_notunn_csum_l4(parse_flag))) { 1591 PMD_RX_LOG(ERR, rxq, 1592 "L4 csum failed, flags = 0x%x\n", 1593 parse_flag); 1594 rxq->rx_hw_errors++; 1595 ol_flags |= PKT_RX_L4_CKSUM_BAD; 1596 } else { 1597 ol_flags |= PKT_RX_L4_CKSUM_GOOD; 1598 } 1599 if (unlikely(qede_check_notunn_csum_l3(rx_mb, parse_flag))) { 1600 PMD_RX_LOG(ERR, rxq, "IP csum failed, flags = 0x%x\n", 1601 parse_flag); 1602 rxq->rx_hw_errors++; 1603 ol_flags |= PKT_RX_IP_CKSUM_BAD; 1604 } else { 1605 ol_flags |= PKT_RX_IP_CKSUM_GOOD; 1606 } 1607 1608 if (CQE_HAS_VLAN(parse_flag) || 1609 CQE_HAS_OUTER_VLAN(parse_flag)) { 1610 /* Note: FW doesn't indicate Q-in-Q packet */ 1611 ol_flags |= PKT_RX_VLAN; 1612 if (qdev->vlan_strip_flg) { 1613 ol_flags |= PKT_RX_VLAN_STRIPPED; 1614 rx_mb->vlan_tci = vlan_tci; 1615 } 1616 } 1617 1618 /* RSS Hash */ 1619 if (qdev->rss_enable) { 1620 ol_flags |= PKT_RX_RSS_HASH; 1621 rx_mb->hash.rss = rss_hash; 1622 } 1623 1624 rx_alloc_count++; 1625 qede_rx_bd_ring_consume(rxq); 1626 1627 if (!tpa_start_flg && fp_cqe->bd_num > 1) { 1628 PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs" 1629 " len on first: %04x Total Len: %04x", 1630 fp_cqe->bd_num, len, pkt_len); 1631 num_segs = fp_cqe->bd_num - 1; 1632 seg1 = rx_mb; 1633 if (qede_process_sg_pkts(p_rxq, seg1, num_segs, 1634 pkt_len - len)) 1635 goto next_cqe; 1636 1637 rx_alloc_count += num_segs; 1638 rxq->rx_segs += num_segs; 1639 } 1640 rxq->rx_segs++; /* for the first segment */ 1641 1642 /* Prefetch next mbuf while processing current one. */ 1643 preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); 1644 rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf); 1645 1646 /* Update rest of the MBUF fields */ 1647 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM; 1648 rx_mb->port = rxq->port_id; 1649 rx_mb->ol_flags = ol_flags; 1650 rx_mb->data_len = len; 1651 rx_mb->packet_type = packet_type; 1652 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX 1653 print_rx_bd_info(rx_mb, rxq, bitfield_val); 1654 #endif 1655 if (!tpa_start_flg) { 1656 rx_mb->nb_segs = fp_cqe->bd_num; 1657 rx_mb->pkt_len = pkt_len; 1658 } else { 1659 /* store ref to the updated mbuf */ 1660 tpa_info->tpa_head = rx_mb; 1661 tpa_info->tpa_tail = tpa_info->tpa_head; 1662 } 1663 rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *)); 1664 tpa_end: 1665 if (!tpa_start_flg) { 1666 rx_pkts[rx_pkt] = rx_mb; 1667 rx_pkt++; 1668 } 1669 next_cqe: 1670 ecore_chain_recycle_consumed(&rxq->rx_comp_ring); 1671 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 1672 if (rx_pkt == nb_pkts) { 1673 PMD_RX_LOG(DEBUG, rxq, 1674 "Budget reached nb_pkts=%u received=%u", 1675 rx_pkt, nb_pkts); 1676 break; 1677 } 1678 } 1679 1680 /* Request number of bufferes to be allocated in next loop */ 1681 rxq->rx_alloc_count = rx_alloc_count; 1682 1683 rxq->rcv_pkts += rx_pkt; 1684 1685 PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id()); 1686 1687 return rx_pkt; 1688 } 1689 1690 1691 /* Populate scatter gather buffer descriptor fields */ 1692 static inline uint16_t 1693 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg, 1694 struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3, 1695 uint16_t start_seg) 1696 { 1697 struct qede_tx_queue *txq = p_txq; 1698 struct eth_tx_bd *tx_bd = NULL; 1699 dma_addr_t mapping; 1700 uint16_t nb_segs = 0; 1701 1702 /* Check for scattered buffers */ 1703 while (m_seg) { 1704 if (start_seg == 0) { 1705 if (!*bd2) { 1706 *bd2 = (struct eth_tx_2nd_bd *) 1707 ecore_chain_produce(&txq->tx_pbl); 1708 memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd)); 1709 nb_segs++; 1710 } 1711 mapping = rte_mbuf_data_iova(m_seg); 1712 QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len); 1713 PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len); 1714 } else if (start_seg == 1) { 1715 if (!*bd3) { 1716 *bd3 = (struct eth_tx_3rd_bd *) 1717 ecore_chain_produce(&txq->tx_pbl); 1718 memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd)); 1719 nb_segs++; 1720 } 1721 mapping = rte_mbuf_data_iova(m_seg); 1722 QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len); 1723 PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len); 1724 } else { 1725 tx_bd = (struct eth_tx_bd *) 1726 ecore_chain_produce(&txq->tx_pbl); 1727 memset(tx_bd, 0, sizeof(*tx_bd)); 1728 nb_segs++; 1729 mapping = rte_mbuf_data_iova(m_seg); 1730 QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len); 1731 PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len); 1732 } 1733 start_seg++; 1734 m_seg = m_seg->next; 1735 } 1736 1737 /* Return total scattered buffers */ 1738 return nb_segs; 1739 } 1740 1741 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1742 static inline void 1743 print_tx_bd_info(struct qede_tx_queue *txq, 1744 struct eth_tx_1st_bd *bd1, 1745 struct eth_tx_2nd_bd *bd2, 1746 struct eth_tx_3rd_bd *bd3, 1747 uint64_t tx_ol_flags) 1748 { 1749 char ol_buf[256] = { 0 }; /* for verbose prints */ 1750 1751 if (bd1) 1752 PMD_TX_LOG(INFO, txq, 1753 "BD1: nbytes=0x%04x nbds=0x%04x bd_flags=0x%04x bf=0x%04x", 1754 rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds, 1755 bd1->data.bd_flags.bitfields, 1756 rte_cpu_to_le_16(bd1->data.bitfields)); 1757 if (bd2) 1758 PMD_TX_LOG(INFO, txq, 1759 "BD2: nbytes=0x%04x bf1=0x%04x bf2=0x%04x tunn_ip=0x%04x\n", 1760 rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1, 1761 bd2->data.bitfields2, bd2->data.tunn_ip_size); 1762 if (bd3) 1763 PMD_TX_LOG(INFO, txq, 1764 "BD3: nbytes=0x%04x bf=0x%04x MSS=0x%04x " 1765 "tunn_l4_hdr_start_offset_w=0x%04x tunn_hdr_size=0x%04x\n", 1766 rte_cpu_to_le_16(bd3->nbytes), 1767 rte_cpu_to_le_16(bd3->data.bitfields), 1768 rte_cpu_to_le_16(bd3->data.lso_mss), 1769 bd3->data.tunn_l4_hdr_start_offset_w, 1770 bd3->data.tunn_hdr_size_w); 1771 1772 rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf)); 1773 PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf); 1774 } 1775 #endif 1776 1777 /* TX prepare to check packets meets TX conditions */ 1778 uint16_t 1779 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1780 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts, 1781 uint16_t nb_pkts) 1782 { 1783 struct qede_tx_queue *txq = p_txq; 1784 #else 1785 qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts, 1786 uint16_t nb_pkts) 1787 { 1788 #endif 1789 uint64_t ol_flags; 1790 struct rte_mbuf *m; 1791 uint16_t i; 1792 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1793 int ret; 1794 #endif 1795 1796 for (i = 0; i < nb_pkts; i++) { 1797 m = tx_pkts[i]; 1798 ol_flags = m->ol_flags; 1799 if (ol_flags & PKT_TX_TCP_SEG) { 1800 if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) { 1801 rte_errno = -EINVAL; 1802 break; 1803 } 1804 /* TBD: confirm its ~9700B for both ? */ 1805 if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) { 1806 rte_errno = -EINVAL; 1807 break; 1808 } 1809 } else { 1810 if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) { 1811 rte_errno = -EINVAL; 1812 break; 1813 } 1814 } 1815 if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) { 1816 /* We support only limited tunnel protocols */ 1817 if (ol_flags & PKT_TX_TUNNEL_MASK) { 1818 uint64_t temp; 1819 1820 temp = ol_flags & PKT_TX_TUNNEL_MASK; 1821 if (temp == PKT_TX_TUNNEL_VXLAN || 1822 temp == PKT_TX_TUNNEL_GENEVE || 1823 temp == PKT_TX_TUNNEL_MPLSINUDP || 1824 temp == PKT_TX_TUNNEL_GRE) 1825 continue; 1826 } 1827 1828 rte_errno = -ENOTSUP; 1829 break; 1830 } 1831 1832 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1833 ret = rte_validate_tx_offload(m); 1834 if (ret != 0) { 1835 rte_errno = ret; 1836 break; 1837 } 1838 #endif 1839 } 1840 1841 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1842 if (unlikely(i != nb_pkts)) 1843 PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n", 1844 nb_pkts - i); 1845 #endif 1846 return i; 1847 } 1848 1849 #define MPLSINUDP_HDR_SIZE (12) 1850 1851 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1852 static inline void 1853 qede_mpls_tunn_tx_sanity_check(struct rte_mbuf *mbuf, 1854 struct qede_tx_queue *txq) 1855 { 1856 if (((mbuf->outer_l2_len + mbuf->outer_l3_len) / 2) > 0xff) 1857 PMD_TX_LOG(ERR, txq, "tunn_l4_hdr_start_offset overflow\n"); 1858 if (((mbuf->outer_l2_len + mbuf->outer_l3_len + 1859 MPLSINUDP_HDR_SIZE) / 2) > 0xff) 1860 PMD_TX_LOG(ERR, txq, "tunn_hdr_size overflow\n"); 1861 if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE) / 2) > 1862 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) 1863 PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n"); 1864 if (((mbuf->l2_len - MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2) > 1865 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) 1866 PMD_TX_LOG(ERR, txq, "inner_l2_hdr_size overflow\n"); 1867 } 1868 #endif 1869 1870 uint16_t 1871 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1872 { 1873 struct qede_tx_queue *txq = p_txq; 1874 struct qede_dev *qdev = txq->qdev; 1875 struct ecore_dev *edev = &qdev->edev; 1876 struct rte_mbuf *mbuf; 1877 struct rte_mbuf *m_seg = NULL; 1878 uint16_t nb_tx_pkts; 1879 uint16_t bd_prod; 1880 uint16_t idx; 1881 uint16_t nb_frags; 1882 uint16_t nb_pkt_sent = 0; 1883 uint8_t nbds; 1884 bool lso_flg; 1885 bool mplsoudp_flg; 1886 __rte_unused bool tunn_flg; 1887 bool tunn_ipv6_ext_flg; 1888 struct eth_tx_1st_bd *bd1; 1889 struct eth_tx_2nd_bd *bd2; 1890 struct eth_tx_3rd_bd *bd3; 1891 uint64_t tx_ol_flags; 1892 uint16_t hdr_size; 1893 /* BD1 */ 1894 uint16_t bd1_bf; 1895 uint8_t bd1_bd_flags_bf; 1896 uint16_t vlan; 1897 /* BD2 */ 1898 uint16_t bd2_bf1; 1899 uint16_t bd2_bf2; 1900 /* BD3 */ 1901 uint16_t mss; 1902 uint16_t bd3_bf; 1903 1904 uint8_t tunn_l4_hdr_start_offset; 1905 uint8_t tunn_hdr_size; 1906 uint8_t inner_l2_hdr_size; 1907 uint16_t inner_l4_hdr_offset; 1908 1909 if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) { 1910 PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u", 1911 nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh); 1912 qede_process_tx_compl(edev, txq); 1913 } 1914 1915 nb_tx_pkts = nb_pkts; 1916 bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); 1917 while (nb_tx_pkts--) { 1918 /* Init flags/values */ 1919 tunn_flg = false; 1920 lso_flg = false; 1921 nbds = 0; 1922 vlan = 0; 1923 bd1 = NULL; 1924 bd2 = NULL; 1925 bd3 = NULL; 1926 hdr_size = 0; 1927 bd1_bf = 0; 1928 bd1_bd_flags_bf = 0; 1929 bd2_bf1 = 0; 1930 bd2_bf2 = 0; 1931 mss = 0; 1932 bd3_bf = 0; 1933 mplsoudp_flg = false; 1934 tunn_ipv6_ext_flg = false; 1935 tunn_hdr_size = 0; 1936 tunn_l4_hdr_start_offset = 0; 1937 1938 mbuf = *tx_pkts++; 1939 assert(mbuf); 1940 1941 /* Check minimum TX BDS availability against available BDs */ 1942 if (unlikely(txq->nb_tx_avail < mbuf->nb_segs)) 1943 break; 1944 1945 tx_ol_flags = mbuf->ol_flags; 1946 bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; 1947 1948 /* TX prepare would have already checked supported tunnel Tx 1949 * offloads. Don't rely on pkt_type marked by Rx, instead use 1950 * tx_ol_flags to decide. 1951 */ 1952 tunn_flg = !!(tx_ol_flags & PKT_TX_TUNNEL_MASK); 1953 1954 if (tunn_flg) { 1955 /* Check against max which is Tunnel IPv6 + ext */ 1956 if (unlikely(txq->nb_tx_avail < 1957 ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT)) 1958 break; 1959 1960 /* First indicate its a tunnel pkt */ 1961 bd1_bf |= ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK << 1962 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; 1963 /* Legacy FW had flipped behavior in regard to this bit 1964 * i.e. it needed to set to prevent FW from touching 1965 * encapsulated packets when it didn't need to. 1966 */ 1967 if (unlikely(txq->is_legacy)) { 1968 bd1_bf ^= 1 << 1969 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; 1970 } 1971 1972 /* Outer IP checksum offload */ 1973 if (tx_ol_flags & (PKT_TX_OUTER_IP_CKSUM | 1974 PKT_TX_OUTER_IPV4)) { 1975 bd1_bd_flags_bf |= 1976 ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK << 1977 ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT; 1978 } 1979 1980 /** 1981 * Currently, only inner checksum offload in MPLS-in-UDP 1982 * tunnel with one MPLS label is supported. Both outer 1983 * and inner layers lengths need to be provided in 1984 * mbuf. 1985 */ 1986 if ((tx_ol_flags & PKT_TX_TUNNEL_MASK) == 1987 PKT_TX_TUNNEL_MPLSINUDP) { 1988 mplsoudp_flg = true; 1989 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 1990 qede_mpls_tunn_tx_sanity_check(mbuf, txq); 1991 #endif 1992 /* Outer L4 offset in two byte words */ 1993 tunn_l4_hdr_start_offset = 1994 (mbuf->outer_l2_len + mbuf->outer_l3_len) / 2; 1995 /* Tunnel header size in two byte words */ 1996 tunn_hdr_size = (mbuf->outer_l2_len + 1997 mbuf->outer_l3_len + 1998 MPLSINUDP_HDR_SIZE) / 2; 1999 /* Inner L2 header size in two byte words */ 2000 inner_l2_hdr_size = (mbuf->l2_len - 2001 MPLSINUDP_HDR_SIZE) / 2; 2002 /* Inner L4 header offset from the beggining 2003 * of inner packet in two byte words 2004 */ 2005 inner_l4_hdr_offset = (mbuf->l2_len - 2006 MPLSINUDP_HDR_SIZE + mbuf->l3_len) / 2; 2007 2008 /* Inner L2 size and address type */ 2009 bd2_bf1 |= (inner_l2_hdr_size & 2010 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK) << 2011 ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT; 2012 bd2_bf1 |= (UNICAST_ADDRESS & 2013 ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK) << 2014 ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT; 2015 /* Treated as IPv6+Ext */ 2016 bd2_bf1 |= 2017 1 << ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT; 2018 2019 /* Mark inner IPv6 if present */ 2020 if (tx_ol_flags & PKT_TX_IPV6) 2021 bd2_bf1 |= 2022 1 << ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT; 2023 2024 /* Inner L4 offsets */ 2025 if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) && 2026 (tx_ol_flags & (PKT_TX_UDP_CKSUM | 2027 PKT_TX_TCP_CKSUM))) { 2028 /* Determines if BD3 is needed */ 2029 tunn_ipv6_ext_flg = true; 2030 if ((tx_ol_flags & PKT_TX_L4_MASK) == 2031 PKT_TX_UDP_CKSUM) { 2032 bd2_bf1 |= 2033 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT; 2034 } 2035 2036 /* TODO other pseudo checksum modes are 2037 * not supported 2038 */ 2039 bd2_bf1 |= 2040 ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH << 2041 ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT; 2042 bd2_bf2 |= (inner_l4_hdr_offset & 2043 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << 2044 ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; 2045 } 2046 } /* End MPLSoUDP */ 2047 } /* End Tunnel handling */ 2048 2049 if (tx_ol_flags & PKT_TX_TCP_SEG) { 2050 lso_flg = true; 2051 if (unlikely(txq->nb_tx_avail < 2052 ETH_TX_MIN_BDS_PER_LSO_PKT)) 2053 break; 2054 /* For LSO, packet header and payload must reside on 2055 * buffers pointed by different BDs. Using BD1 for HDR 2056 * and BD2 onwards for data. 2057 */ 2058 hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; 2059 if (tunn_flg) 2060 hdr_size += mbuf->outer_l2_len + 2061 mbuf->outer_l3_len; 2062 2063 bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT; 2064 bd1_bd_flags_bf |= 2065 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; 2066 /* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */ 2067 bd1_bd_flags_bf |= 2068 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; 2069 mss = rte_cpu_to_le_16(mbuf->tso_segsz); 2070 /* Using one header BD */ 2071 bd3_bf |= rte_cpu_to_le_16(1 << 2072 ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT); 2073 } else { 2074 if (unlikely(txq->nb_tx_avail < 2075 ETH_TX_MIN_BDS_PER_NON_LSO_PKT)) 2076 break; 2077 bd1_bf |= 2078 (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) 2079 << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; 2080 } 2081 2082 /* Descriptor based VLAN insertion */ 2083 if (tx_ol_flags & PKT_TX_VLAN_PKT) { 2084 vlan = rte_cpu_to_le_16(mbuf->vlan_tci); 2085 bd1_bd_flags_bf |= 2086 1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT; 2087 } 2088 2089 /* Offload the IP checksum in the hardware */ 2090 if (tx_ol_flags & PKT_TX_IP_CKSUM) { 2091 bd1_bd_flags_bf |= 2092 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT; 2093 /* There's no DPDK flag to request outer-L4 csum 2094 * offload. But in the case of tunnel if inner L3 or L4 2095 * csum offload is requested then we need to force 2096 * recalculation of L4 tunnel header csum also. 2097 */ 2098 if (tunn_flg && ((tx_ol_flags & PKT_TX_TUNNEL_MASK) != 2099 PKT_TX_TUNNEL_GRE)) { 2100 bd1_bd_flags_bf |= 2101 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK << 2102 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT; 2103 } 2104 } 2105 2106 /* L4 checksum offload (tcp or udp) */ 2107 if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) && 2108 (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM))) { 2109 bd1_bd_flags_bf |= 2110 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT; 2111 /* There's no DPDK flag to request outer-L4 csum 2112 * offload. But in the case of tunnel if inner L3 or L4 2113 * csum offload is requested then we need to force 2114 * recalculation of L4 tunnel header csum also. 2115 */ 2116 if (tunn_flg) { 2117 bd1_bd_flags_bf |= 2118 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK << 2119 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT; 2120 } 2121 } 2122 2123 /* Fill the entry in the SW ring and the BDs in the FW ring */ 2124 idx = TX_PROD(txq); 2125 txq->sw_tx_ring[idx].mbuf = mbuf; 2126 2127 /* BD1 */ 2128 bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl); 2129 memset(bd1, 0, sizeof(struct eth_tx_1st_bd)); 2130 nbds++; 2131 2132 /* Map MBUF linear data for DMA and set in the BD1 */ 2133 QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf), 2134 mbuf->data_len); 2135 bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf); 2136 bd1->data.bd_flags.bitfields = bd1_bd_flags_bf; 2137 bd1->data.vlan = vlan; 2138 2139 if (lso_flg || mplsoudp_flg) { 2140 bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce 2141 (&txq->tx_pbl); 2142 memset(bd2, 0, sizeof(struct eth_tx_2nd_bd)); 2143 nbds++; 2144 2145 /* BD1 */ 2146 QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf), 2147 hdr_size); 2148 /* BD2 */ 2149 QEDE_BD_SET_ADDR_LEN(bd2, (hdr_size + 2150 rte_mbuf_data_iova(mbuf)), 2151 mbuf->data_len - hdr_size); 2152 bd2->data.bitfields1 = rte_cpu_to_le_16(bd2_bf1); 2153 if (mplsoudp_flg) { 2154 bd2->data.bitfields2 = 2155 rte_cpu_to_le_16(bd2_bf2); 2156 /* Outer L3 size */ 2157 bd2->data.tunn_ip_size = 2158 rte_cpu_to_le_16(mbuf->outer_l3_len); 2159 } 2160 /* BD3 */ 2161 if (lso_flg || (mplsoudp_flg && tunn_ipv6_ext_flg)) { 2162 bd3 = (struct eth_tx_3rd_bd *) 2163 ecore_chain_produce(&txq->tx_pbl); 2164 memset(bd3, 0, sizeof(struct eth_tx_3rd_bd)); 2165 nbds++; 2166 bd3->data.bitfields = rte_cpu_to_le_16(bd3_bf); 2167 if (lso_flg) 2168 bd3->data.lso_mss = mss; 2169 if (mplsoudp_flg) { 2170 bd3->data.tunn_l4_hdr_start_offset_w = 2171 tunn_l4_hdr_start_offset; 2172 bd3->data.tunn_hdr_size_w = 2173 tunn_hdr_size; 2174 } 2175 } 2176 } 2177 2178 /* Handle fragmented MBUF */ 2179 m_seg = mbuf->next; 2180 2181 /* Encode scatter gather buffer descriptors if required */ 2182 nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3, nbds - 1); 2183 bd1->data.nbds = nbds + nb_frags; 2184 2185 txq->nb_tx_avail -= bd1->data.nbds; 2186 txq->sw_tx_prod++; 2187 bd_prod = 2188 rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); 2189 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX 2190 print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags); 2191 #endif 2192 nb_pkt_sent++; 2193 txq->xmit_pkts++; 2194 } 2195 2196 /* Write value of prod idx into bd_prod */ 2197 txq->tx_db.data.bd_prod = bd_prod; 2198 rte_wmb(); 2199 rte_compiler_barrier(); 2200 DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw); 2201 rte_wmb(); 2202 2203 /* Check again for Tx completions */ 2204 qede_process_tx_compl(edev, txq); 2205 2206 PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d", 2207 nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id()); 2208 2209 return nb_pkt_sent; 2210 } 2211 2212 uint16_t 2213 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq, 2214 __rte_unused struct rte_mbuf **pkts, 2215 __rte_unused uint16_t nb_pkts) 2216 { 2217 return 0; 2218 } 2219 2220 2221 /* this function does a fake walk through over completion queue 2222 * to calculate number of BDs used by HW. 2223 * At the end, it restores the state of completion queue. 2224 */ 2225 static uint16_t 2226 qede_parse_fp_cqe(struct qede_rx_queue *rxq) 2227 { 2228 uint16_t hw_comp_cons, sw_comp_cons, bd_count = 0; 2229 union eth_rx_cqe *cqe, *orig_cqe = NULL; 2230 2231 hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); 2232 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 2233 2234 if (hw_comp_cons == sw_comp_cons) 2235 return 0; 2236 2237 /* Get the CQE from the completion ring */ 2238 cqe = (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 2239 orig_cqe = cqe; 2240 2241 while (sw_comp_cons != hw_comp_cons) { 2242 switch (cqe->fast_path_regular.type) { 2243 case ETH_RX_CQE_TYPE_REGULAR: 2244 bd_count += cqe->fast_path_regular.bd_num; 2245 break; 2246 case ETH_RX_CQE_TYPE_TPA_END: 2247 bd_count += cqe->fast_path_tpa_end.num_of_bds; 2248 break; 2249 default: 2250 break; 2251 } 2252 2253 cqe = 2254 (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring); 2255 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); 2256 } 2257 2258 /* revert comp_ring to original state */ 2259 ecore_chain_set_cons(&rxq->rx_comp_ring, sw_comp_cons, orig_cqe); 2260 2261 return bd_count; 2262 } 2263 2264 int 2265 qede_rx_descriptor_status(void *p_rxq, uint16_t offset) 2266 { 2267 uint16_t hw_bd_cons, sw_bd_cons, sw_bd_prod; 2268 uint16_t produced, consumed; 2269 struct qede_rx_queue *rxq = p_rxq; 2270 2271 if (offset > rxq->nb_rx_desc) 2272 return -EINVAL; 2273 2274 sw_bd_cons = ecore_chain_get_cons_idx(&rxq->rx_bd_ring); 2275 sw_bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); 2276 2277 /* find BDs used by HW from completion queue elements */ 2278 hw_bd_cons = sw_bd_cons + qede_parse_fp_cqe(rxq); 2279 2280 if (hw_bd_cons < sw_bd_cons) 2281 /* wraparound case */ 2282 consumed = (0xffff - sw_bd_cons) + hw_bd_cons; 2283 else 2284 consumed = hw_bd_cons - sw_bd_cons; 2285 2286 if (offset <= consumed) 2287 return RTE_ETH_RX_DESC_DONE; 2288 2289 if (sw_bd_prod < sw_bd_cons) 2290 /* wraparound case */ 2291 produced = (0xffff - sw_bd_cons) + sw_bd_prod; 2292 else 2293 produced = sw_bd_prod - sw_bd_cons; 2294 2295 if (offset <= produced) 2296 return RTE_ETH_RX_DESC_AVAIL; 2297 2298 return RTE_ETH_RX_DESC_UNAVAIL; 2299 } 2300