1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2014-2021 Netronome Systems, Inc. 3 * All rights reserved. 4 * 5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. 6 */ 7 8 /* 9 * vim:shiftwidth=8:noexpandtab 10 * 11 * @file dpdk/pmd/nfp_rxtx.c 12 * 13 * Netronome vNIC DPDK Poll-Mode Driver: Rx/Tx functions 14 */ 15 16 #include <ethdev_driver.h> 17 #include <ethdev_pci.h> 18 19 #include "nfp_common.h" 20 #include "nfp_ctrl.h" 21 #include "nfp_rxtx.h" 22 #include "nfp_logs.h" 23 #include "nfpcore/nfp_mip.h" 24 #include "nfpcore/nfp_rtsym.h" 25 #include "nfpcore/nfp-common/nfp_platform.h" 26 27 static int 28 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) 29 { 30 struct nfp_net_rx_buff *rxe = rxq->rxbufs; 31 uint64_t dma_addr; 32 unsigned int i; 33 34 PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %u descriptors", 35 rxq->rx_count); 36 37 for (i = 0; i < rxq->rx_count; i++) { 38 struct nfp_net_rx_desc *rxd; 39 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); 40 41 if (mbuf == NULL) { 42 PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%u", 43 (unsigned int)rxq->qidx); 44 return -ENOMEM; 45 } 46 47 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf)); 48 49 rxd = &rxq->rxds[i]; 50 rxd->fld.dd = 0; 51 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; 52 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; 53 rxe[i].mbuf = mbuf; 54 PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64, i, dma_addr); 55 } 56 57 /* Make sure all writes are flushed before telling the hardware */ 58 rte_wmb(); 59 60 /* Not advertising the whole ring as the firmware gets confused if so */ 61 PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u", 62 rxq->rx_count - 1); 63 64 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); 65 66 return 0; 67 } 68 69 int 70 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) 71 { 72 int i; 73 74 for (i = 0; i < dev->data->nb_rx_queues; i++) { 75 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0) 76 return -1; 77 } 78 return 0; 79 } 80 81 uint32_t 82 nfp_net_rx_queue_count(void *rx_queue) 83 { 84 struct nfp_net_rxq *rxq; 85 struct nfp_net_rx_desc *rxds; 86 uint32_t idx; 87 uint32_t count; 88 89 rxq = rx_queue; 90 91 idx = rxq->rd_p; 92 93 count = 0; 94 95 /* 96 * Other PMDs are just checking the DD bit in intervals of 4 97 * descriptors and counting all four if the first has the DD 98 * bit on. Of course, this is not accurate but can be good for 99 * performance. But ideally that should be done in descriptors 100 * chunks belonging to the same cache line 101 */ 102 103 while (count < rxq->rx_count) { 104 rxds = &rxq->rxds[idx]; 105 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 106 break; 107 108 count++; 109 idx++; 110 111 /* Wrapping? */ 112 if ((idx) == rxq->rx_count) 113 idx = 0; 114 } 115 116 return count; 117 } 118 119 /* nfp_net_parse_meta() - Parse the metadata from packet */ 120 static void 121 nfp_net_parse_meta(struct nfp_meta_parsed *meta, 122 struct nfp_net_rx_desc *rxd, 123 struct nfp_net_rxq *rxq, 124 struct rte_mbuf *mbuf) 125 { 126 uint32_t meta_info; 127 uint32_t vlan_info; 128 uint8_t *meta_offset; 129 struct nfp_net_hw *hw = rxq->hw; 130 131 if (unlikely((NFD_CFG_MAJOR_VERSION_of(hw->ver) < 2) || 132 NFP_DESC_META_LEN(rxd) == 0)) 133 return; 134 135 meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *); 136 meta_offset -= NFP_DESC_META_LEN(rxd); 137 meta_info = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 138 meta_offset += 4; 139 140 for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, meta_offset += 4) { 141 switch (meta_info & NFP_NET_META_FIELD_MASK) { 142 case NFP_NET_META_HASH: 143 /* Next field type is about the hash type */ 144 meta_info >>= NFP_NET_META_FIELD_SIZE; 145 /* Hash value is in the data field */ 146 meta->hash = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 147 meta->hash_type = meta_info & NFP_NET_META_FIELD_MASK; 148 break; 149 case NFP_NET_META_VLAN: 150 vlan_info = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 151 meta->vlan[meta->vlan_layer].offload = 152 vlan_info >> NFP_NET_META_VLAN_OFFLOAD; 153 meta->vlan[meta->vlan_layer].tci = 154 vlan_info & NFP_NET_META_VLAN_MASK; 155 meta->vlan[meta->vlan_layer].tpid = NFP_NET_META_TPID(vlan_info); 156 ++meta->vlan_layer; 157 break; 158 default: 159 /* Unsupported metadata can be a performance issue */ 160 return; 161 } 162 } 163 } 164 165 /* 166 * nfp_net_parse_meta_hash() - Set mbuf hash data based on the metadata info 167 * 168 * The RSS hash and hash-type are prepended to the packet data. 169 * Extract and decode it and set the mbuf fields. 170 */ 171 static void 172 nfp_net_parse_meta_hash(const struct nfp_meta_parsed *meta, 173 struct nfp_net_rx_desc *rxd, 174 struct nfp_net_rxq *rxq, 175 struct rte_mbuf *mbuf) 176 { 177 uint32_t hash; 178 uint32_t hash_type; 179 struct nfp_net_hw *hw = rxq->hw; 180 181 if ((hw->ctrl & NFP_NET_CFG_CTRL_RSS_ANY) == 0) 182 return; 183 184 if (likely((hw->cap & NFP_NET_CFG_CTRL_RSS_ANY) != 0 && 185 NFP_DESC_META_LEN(rxd) != 0)) { 186 hash = meta->hash; 187 hash_type = meta->hash_type; 188 } else { 189 if ((rxd->rxd.flags & PCIE_DESC_RX_RSS) == 0) 190 return; 191 192 hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET); 193 hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET); 194 } 195 196 mbuf->hash.rss = hash; 197 mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 198 199 switch (hash_type) { 200 case NFP_NET_RSS_IPV4: 201 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV4; 202 break; 203 case NFP_NET_RSS_IPV6: 204 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6; 205 break; 206 case NFP_NET_RSS_IPV6_EX: 207 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; 208 break; 209 case NFP_NET_RSS_IPV4_TCP: 210 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; 211 break; 212 case NFP_NET_RSS_IPV6_TCP: 213 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; 214 break; 215 case NFP_NET_RSS_IPV4_UDP: 216 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; 217 break; 218 case NFP_NET_RSS_IPV6_UDP: 219 mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT; 220 break; 221 default: 222 mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK; 223 } 224 } 225 226 /* 227 * nfp_net_parse_meta_vlan() - Set mbuf vlan_strip data based on metadata info 228 * 229 * The VLAN info TPID and TCI are prepended to the packet data. 230 * Extract and decode it and set the mbuf fields. 231 */ 232 static void 233 nfp_net_parse_meta_vlan(const struct nfp_meta_parsed *meta, 234 struct nfp_net_rx_desc *rxd, 235 struct nfp_net_rxq *rxq, 236 struct rte_mbuf *mb) 237 { 238 struct nfp_net_hw *hw = rxq->hw; 239 240 /* Skip if hardware don't support setting vlan. */ 241 if ((hw->ctrl & (NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_RXVLAN_V2)) == 0) 242 return; 243 244 /* 245 * The nic support the two way to send the VLAN info, 246 * 1. According the metadata to send the VLAN info when NFP_NET_CFG_CTRL_RXVLAN_V2 247 * is set 248 * 2. According the descriptor to sned the VLAN info when NFP_NET_CFG_CTRL_RXVLAN 249 * is set 250 * 251 * If the nic doesn't send the VLAN info, it is not necessary 252 * to do anything. 253 */ 254 if ((hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN_V2) != 0) { 255 if (meta->vlan_layer >= 1 && meta->vlan[0].offload != 0) { 256 mb->vlan_tci = rte_cpu_to_le_32(meta->vlan[0].tci); 257 mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 258 } 259 } else if ((hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN) != 0) { 260 if ((rxd->rxd.flags & PCIE_DESC_RX_VLAN) != 0) { 261 mb->vlan_tci = rte_cpu_to_le_32(rxd->rxd.vlan); 262 mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 263 } 264 } 265 } 266 267 /* 268 * nfp_net_parse_meta_qinq() - Set mbuf qinq_strip data based on metadata info 269 * 270 * The out VLAN tci are prepended to the packet data. 271 * Extract and decode it and set the mbuf fields. 272 * 273 * If both RTE_MBUF_F_RX_VLAN and NFP_NET_CFG_CTRL_RXQINQ are set, the 2 VLANs 274 * have been stripped by the hardware and their TCIs are saved in 275 * mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer). 276 * If NFP_NET_CFG_CTRL_RXQINQ is set and RTE_MBUF_F_RX_VLAN is unset, only the 277 * outer VLAN is removed from packet data, but both tci are saved in 278 * mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer). 279 * 280 * qinq set & vlan set : meta->vlan_layer>=2, meta->vlan[0].offload=1, meta->vlan[1].offload=1 281 * qinq set & vlan not set: meta->vlan_layer>=2, meta->vlan[1].offload=1,meta->vlan[0].offload=0 282 * qinq not set & vlan set: meta->vlan_layer=1, meta->vlan[0].offload=1 283 * qinq not set & vlan not set: meta->vlan_layer=0 284 */ 285 static void 286 nfp_net_parse_meta_qinq(const struct nfp_meta_parsed *meta, 287 struct nfp_net_rxq *rxq, 288 struct rte_mbuf *mb) 289 { 290 struct nfp_net_hw *hw = rxq->hw; 291 292 if ((hw->ctrl & NFP_NET_CFG_CTRL_RXQINQ) == 0 || 293 (hw->cap & NFP_NET_CFG_CTRL_RXQINQ) == 0) 294 return; 295 296 if (meta->vlan_layer < NFP_META_MAX_VLANS) 297 return; 298 299 if (meta->vlan[0].offload == 0) 300 mb->vlan_tci = rte_cpu_to_le_16(meta->vlan[0].tci); 301 mb->vlan_tci_outer = rte_cpu_to_le_16(meta->vlan[1].tci); 302 PMD_RX_LOG(DEBUG, "Received outer vlan is %u inter vlan is %u", 303 mb->vlan_tci_outer, mb->vlan_tci); 304 mb->ol_flags |= RTE_MBUF_F_RX_QINQ | RTE_MBUF_F_RX_QINQ_STRIPPED; 305 } 306 307 /* 308 * RX path design: 309 * 310 * There are some decisions to take: 311 * 1) How to check DD RX descriptors bit 312 * 2) How and when to allocate new mbufs 313 * 314 * Current implementation checks just one single DD bit each loop. As each 315 * descriptor is 8 bytes, it is likely a good idea to check descriptors in 316 * a single cache line instead. Tests with this change have not shown any 317 * performance improvement but it requires further investigation. For example, 318 * depending on which descriptor is next, the number of descriptors could be 319 * less than 8 for just checking those in the same cache line. This implies 320 * extra work which could be counterproductive by itself. Indeed, last firmware 321 * changes are just doing this: writing several descriptors with the DD bit 322 * for saving PCIe bandwidth and DMA operations from the NFP. 323 * 324 * Mbuf allocation is done when a new packet is received. Then the descriptor 325 * is automatically linked with the new mbuf and the old one is given to the 326 * user. The main drawback with this design is mbuf allocation is heavier than 327 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the 328 * cache point of view it does not seem allocating the mbuf early on as we are 329 * doing now have any benefit at all. Again, tests with this change have not 330 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing 331 * so looking at the implications of this type of allocation should be studied 332 * deeply 333 */ 334 335 uint16_t 336 nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 337 { 338 struct nfp_net_rxq *rxq; 339 struct nfp_net_rx_desc *rxds; 340 struct nfp_net_rx_buff *rxb; 341 struct nfp_net_hw *hw; 342 struct rte_mbuf *mb; 343 struct rte_mbuf *new_mb; 344 struct nfp_meta_parsed meta; 345 uint16_t nb_hold; 346 uint64_t dma_addr; 347 uint16_t avail; 348 349 avail = 0; 350 rxq = rx_queue; 351 if (unlikely(rxq == NULL)) { 352 /* 353 * DPDK just checks the queue is lower than max queues 354 * enabled. But the queue needs to be configured 355 */ 356 RTE_LOG_DP(ERR, PMD, "RX Bad queue\n"); 357 return avail; 358 } 359 360 hw = rxq->hw; 361 nb_hold = 0; 362 363 while (avail < nb_pkts) { 364 rxb = &rxq->rxbufs[rxq->rd_p]; 365 if (unlikely(rxb == NULL)) { 366 RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n"); 367 break; 368 } 369 370 rxds = &rxq->rxds[rxq->rd_p]; 371 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 372 break; 373 374 /* 375 * Memory barrier to ensure that we won't do other 376 * reads before the DD bit. 377 */ 378 rte_rmb(); 379 380 /* 381 * We got a packet. Let's alloc a new mbuf for refilling the 382 * free descriptor ring as soon as possible 383 */ 384 new_mb = rte_pktmbuf_alloc(rxq->mem_pool); 385 if (unlikely(new_mb == NULL)) { 386 RTE_LOG_DP(DEBUG, PMD, 387 "RX mbuf alloc failed port_id=%u queue_id=%u\n", 388 rxq->port_id, (unsigned int)rxq->qidx); 389 nfp_net_mbuf_alloc_failed(rxq); 390 break; 391 } 392 393 /* 394 * Grab the mbuf and refill the descriptor with the 395 * previously allocated mbuf 396 */ 397 mb = rxb->mbuf; 398 rxb->mbuf = new_mb; 399 400 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u", 401 rxds->rxd.data_len, rxq->mbuf_size); 402 403 /* Size of this segment */ 404 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 405 /* Size of the whole packet. We just support 1 segment */ 406 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 407 408 if (unlikely((mb->data_len + hw->rx_offset) > 409 rxq->mbuf_size)) { 410 /* 411 * This should not happen and the user has the 412 * responsibility of avoiding it. But we have 413 * to give some info about the error 414 */ 415 RTE_LOG_DP(ERR, PMD, 416 "mbuf overflow likely due to the RX offset.\n" 417 "\t\tYour mbuf size should have extra space for" 418 " RX offset=%u bytes.\n" 419 "\t\tCurrently you just have %u bytes available" 420 " but the received packet is %u bytes long", 421 hw->rx_offset, 422 rxq->mbuf_size - hw->rx_offset, 423 mb->data_len); 424 rte_pktmbuf_free(mb); 425 break; 426 } 427 428 /* Filling the received mbuf with packet info */ 429 if (hw->rx_offset) 430 mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; 431 else 432 mb->data_off = RTE_PKTMBUF_HEADROOM + 433 NFP_DESC_META_LEN(rxds); 434 435 /* No scatter mode supported */ 436 mb->nb_segs = 1; 437 mb->next = NULL; 438 mb->port = rxq->port_id; 439 440 memset(&meta, 0, sizeof(meta)); 441 nfp_net_parse_meta(&meta, rxds, rxq, mb); 442 nfp_net_parse_meta_hash(&meta, rxds, rxq, mb); 443 nfp_net_parse_meta_vlan(&meta, rxds, rxq, mb); 444 nfp_net_parse_meta_qinq(&meta, rxq, mb); 445 446 /* Checking the checksum flag */ 447 nfp_net_rx_cksum(rxq, rxds, mb); 448 449 /* Adding the mbuf to the mbuf array passed by the app */ 450 rx_pkts[avail++] = mb; 451 452 /* Now resetting and updating the descriptor */ 453 rxds->vals[0] = 0; 454 rxds->vals[1] = 0; 455 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb)); 456 rxds->fld.dd = 0; 457 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xff; 458 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; 459 nb_hold++; 460 461 rxq->rd_p++; 462 if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/ 463 rxq->rd_p = 0; 464 } 465 466 if (nb_hold == 0) 467 return nb_hold; 468 469 PMD_RX_LOG(DEBUG, "RX port_id=%u queue_id=%u, %d packets received", 470 rxq->port_id, (unsigned int)rxq->qidx, nb_hold); 471 472 nb_hold += rxq->nb_rx_hold; 473 474 /* 475 * FL descriptors needs to be written before incrementing the 476 * FL queue WR pointer 477 */ 478 rte_wmb(); 479 if (nb_hold > rxq->rx_free_thresh) { 480 PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u", 481 rxq->port_id, (unsigned int)rxq->qidx, 482 (unsigned int)nb_hold, (unsigned int)avail); 483 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); 484 nb_hold = 0; 485 } 486 rxq->nb_rx_hold = nb_hold; 487 488 return avail; 489 } 490 491 static void 492 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) 493 { 494 unsigned int i; 495 496 if (rxq->rxbufs == NULL) 497 return; 498 499 for (i = 0; i < rxq->rx_count; i++) { 500 if (rxq->rxbufs[i].mbuf) { 501 rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); 502 rxq->rxbufs[i].mbuf = NULL; 503 } 504 } 505 } 506 507 void 508 nfp_net_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_idx) 509 { 510 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx]; 511 512 if (rxq) { 513 nfp_net_rx_queue_release_mbufs(rxq); 514 rte_eth_dma_zone_free(dev, "rx_ring", queue_idx); 515 rte_free(rxq->rxbufs); 516 rte_free(rxq); 517 } 518 } 519 520 void 521 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) 522 { 523 nfp_net_rx_queue_release_mbufs(rxq); 524 rxq->rd_p = 0; 525 rxq->nb_rx_hold = 0; 526 } 527 528 int 529 nfp_net_rx_queue_setup(struct rte_eth_dev *dev, 530 uint16_t queue_idx, uint16_t nb_desc, 531 unsigned int socket_id, 532 const struct rte_eth_rxconf *rx_conf, 533 struct rte_mempool *mp) 534 { 535 int ret; 536 uint16_t min_rx_desc; 537 uint16_t max_rx_desc; 538 const struct rte_memzone *tz; 539 struct nfp_net_rxq *rxq; 540 struct nfp_net_hw *hw; 541 uint32_t rx_desc_sz; 542 543 hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); 544 545 PMD_INIT_FUNC_TRACE(); 546 547 ret = nfp_net_rx_desc_limits(hw, &min_rx_desc, &max_rx_desc); 548 if (ret != 0) 549 return ret; 550 551 /* Validating number of descriptors */ 552 rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); 553 if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || 554 nb_desc > max_rx_desc || nb_desc < min_rx_desc) { 555 PMD_DRV_LOG(ERR, "Wrong nb_desc value"); 556 return -EINVAL; 557 } 558 559 /* 560 * Free memory prior to re-allocation if needed. This is the case after 561 * calling nfp_net_stop 562 */ 563 if (dev->data->rx_queues[queue_idx]) { 564 nfp_net_rx_queue_release(dev, queue_idx); 565 dev->data->rx_queues[queue_idx] = NULL; 566 } 567 568 /* Allocating rx queue data structure */ 569 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), 570 RTE_CACHE_LINE_SIZE, socket_id); 571 if (rxq == NULL) 572 return -ENOMEM; 573 574 dev->data->rx_queues[queue_idx] = rxq; 575 576 /* Hw queues mapping based on firmware configuration */ 577 rxq->qidx = queue_idx; 578 rxq->fl_qcidx = queue_idx * hw->stride_rx; 579 rxq->rx_qcidx = rxq->fl_qcidx + (hw->stride_rx - 1); 580 rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); 581 rxq->qcp_rx = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->rx_qcidx); 582 583 /* 584 * Tracking mbuf size for detecting a potential mbuf overflow due to 585 * RX offset 586 */ 587 rxq->mem_pool = mp; 588 rxq->mbuf_size = rxq->mem_pool->elt_size; 589 rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); 590 hw->flbufsz = rxq->mbuf_size; 591 592 rxq->rx_count = nb_desc; 593 rxq->port_id = dev->data->port_id; 594 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 595 rxq->drop_en = rx_conf->rx_drop_en; 596 597 /* 598 * Allocate RX ring hardware descriptors. A memzone large enough to 599 * handle the maximum ring size is allocated in order to allow for 600 * resizing in later calls to the queue setup function. 601 */ 602 tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, 603 sizeof(struct nfp_net_rx_desc) * 604 max_rx_desc, NFP_MEMZONE_ALIGN, 605 socket_id); 606 607 if (tz == NULL) { 608 PMD_DRV_LOG(ERR, "Error allocating rx dma"); 609 nfp_net_rx_queue_release(dev, queue_idx); 610 dev->data->rx_queues[queue_idx] = NULL; 611 return -ENOMEM; 612 } 613 614 /* Saving physical and virtual addresses for the RX ring */ 615 rxq->dma = (uint64_t)tz->iova; 616 rxq->rxds = (struct nfp_net_rx_desc *)tz->addr; 617 618 /* mbuf pointers array for referencing mbufs linked to RX descriptors */ 619 rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", 620 sizeof(*rxq->rxbufs) * nb_desc, 621 RTE_CACHE_LINE_SIZE, socket_id); 622 if (rxq->rxbufs == NULL) { 623 nfp_net_rx_queue_release(dev, queue_idx); 624 dev->data->rx_queues[queue_idx] = NULL; 625 return -ENOMEM; 626 } 627 628 PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, 629 rxq->rxbufs, rxq->rxds, (unsigned long)rxq->dma); 630 631 nfp_net_reset_rx_queue(rxq); 632 633 rxq->hw = hw; 634 635 /* 636 * Telling the HW about the physical address of the RX ring and number 637 * of descriptors in log2 format 638 */ 639 nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); 640 nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); 641 642 return 0; 643 } 644 645 /* 646 * nfp_net_tx_free_bufs - Check for descriptors with a complete 647 * status 648 * @txq: TX queue to work with 649 * Returns number of descriptors freed 650 */ 651 int 652 nfp_net_tx_free_bufs(struct nfp_net_txq *txq) 653 { 654 uint32_t qcp_rd_p; 655 int todo; 656 657 PMD_TX_LOG(DEBUG, "queue %u. Check for descriptor with a complete" 658 " status", txq->qidx); 659 660 /* Work out how many packets have been sent */ 661 qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); 662 663 if (qcp_rd_p == txq->rd_p) { 664 PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending " 665 "packets (%u, %u)", txq->qidx, 666 qcp_rd_p, txq->rd_p); 667 return 0; 668 } 669 670 if (qcp_rd_p > txq->rd_p) 671 todo = qcp_rd_p - txq->rd_p; 672 else 673 todo = qcp_rd_p + txq->tx_count - txq->rd_p; 674 675 PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u", 676 qcp_rd_p, txq->rd_p, txq->rd_p); 677 678 if (todo == 0) 679 return todo; 680 681 txq->rd_p += todo; 682 if (unlikely(txq->rd_p >= txq->tx_count)) 683 txq->rd_p -= txq->tx_count; 684 685 return todo; 686 } 687 688 static void 689 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) 690 { 691 unsigned int i; 692 693 if (txq->txbufs == NULL) 694 return; 695 696 for (i = 0; i < txq->tx_count; i++) { 697 if (txq->txbufs[i].mbuf) { 698 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 699 txq->txbufs[i].mbuf = NULL; 700 } 701 } 702 } 703 704 void 705 nfp_net_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_idx) 706 { 707 struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; 708 709 if (txq) { 710 nfp_net_tx_queue_release_mbufs(txq); 711 rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); 712 rte_free(txq->txbufs); 713 rte_free(txq); 714 } 715 } 716 717 void 718 nfp_net_reset_tx_queue(struct nfp_net_txq *txq) 719 { 720 nfp_net_tx_queue_release_mbufs(txq); 721 txq->wr_p = 0; 722 txq->rd_p = 0; 723 } 724 725 static int 726 nfp_net_nfd3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 727 uint16_t nb_desc, unsigned int socket_id, 728 const struct rte_eth_txconf *tx_conf) 729 { 730 int ret; 731 uint16_t min_tx_desc; 732 uint16_t max_tx_desc; 733 const struct rte_memzone *tz; 734 struct nfp_net_txq *txq; 735 uint16_t tx_free_thresh; 736 struct nfp_net_hw *hw; 737 uint32_t tx_desc_sz; 738 739 hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); 740 741 PMD_INIT_FUNC_TRACE(); 742 743 ret = nfp_net_tx_desc_limits(hw, &min_tx_desc, &max_tx_desc); 744 if (ret != 0) 745 return ret; 746 747 /* Validating number of descriptors */ 748 tx_desc_sz = nb_desc * sizeof(struct nfp_net_nfd3_tx_desc); 749 if ((NFD3_TX_DESC_PER_SIMPLE_PKT * tx_desc_sz) % NFP_ALIGN_RING_DESC != 0 || 750 nb_desc > max_tx_desc || nb_desc < min_tx_desc) { 751 PMD_DRV_LOG(ERR, "Wrong nb_desc value"); 752 return -EINVAL; 753 } 754 755 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? 756 tx_conf->tx_free_thresh : 757 DEFAULT_TX_FREE_THRESH); 758 759 if (tx_free_thresh > (nb_desc)) { 760 PMD_DRV_LOG(ERR, 761 "tx_free_thresh must be less than the number of TX " 762 "descriptors. (tx_free_thresh=%u port=%d " 763 "queue=%d)", (unsigned int)tx_free_thresh, 764 dev->data->port_id, (int)queue_idx); 765 return -(EINVAL); 766 } 767 768 /* 769 * Free memory prior to re-allocation if needed. This is the case after 770 * calling nfp_net_stop 771 */ 772 if (dev->data->tx_queues[queue_idx]) { 773 PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", 774 queue_idx); 775 nfp_net_tx_queue_release(dev, queue_idx); 776 dev->data->tx_queues[queue_idx] = NULL; 777 } 778 779 /* Allocating tx queue data structure */ 780 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq), 781 RTE_CACHE_LINE_SIZE, socket_id); 782 if (txq == NULL) { 783 PMD_DRV_LOG(ERR, "Error allocating tx dma"); 784 return -ENOMEM; 785 } 786 787 dev->data->tx_queues[queue_idx] = txq; 788 789 /* 790 * Allocate TX ring hardware descriptors. A memzone large enough to 791 * handle the maximum ring size is allocated in order to allow for 792 * resizing in later calls to the queue setup function. 793 */ 794 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, 795 sizeof(struct nfp_net_nfd3_tx_desc) * 796 NFD3_TX_DESC_PER_SIMPLE_PKT * 797 max_tx_desc, NFP_MEMZONE_ALIGN, 798 socket_id); 799 if (tz == NULL) { 800 PMD_DRV_LOG(ERR, "Error allocating tx dma"); 801 nfp_net_tx_queue_release(dev, queue_idx); 802 dev->data->tx_queues[queue_idx] = NULL; 803 return -ENOMEM; 804 } 805 806 txq->tx_count = nb_desc * NFD3_TX_DESC_PER_SIMPLE_PKT; 807 txq->tx_free_thresh = tx_free_thresh; 808 txq->tx_pthresh = tx_conf->tx_thresh.pthresh; 809 txq->tx_hthresh = tx_conf->tx_thresh.hthresh; 810 txq->tx_wthresh = tx_conf->tx_thresh.wthresh; 811 812 /* queue mapping based on firmware configuration */ 813 txq->qidx = queue_idx; 814 txq->tx_qcidx = queue_idx * hw->stride_tx; 815 txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx); 816 817 txq->port_id = dev->data->port_id; 818 819 /* Saving physical and virtual addresses for the TX ring */ 820 txq->dma = (uint64_t)tz->iova; 821 txq->txds = (struct nfp_net_nfd3_tx_desc *)tz->addr; 822 823 /* mbuf pointers array for referencing mbufs linked to TX descriptors */ 824 txq->txbufs = rte_zmalloc_socket("txq->txbufs", 825 sizeof(*txq->txbufs) * txq->tx_count, 826 RTE_CACHE_LINE_SIZE, socket_id); 827 if (txq->txbufs == NULL) { 828 nfp_net_tx_queue_release(dev, queue_idx); 829 dev->data->tx_queues[queue_idx] = NULL; 830 return -ENOMEM; 831 } 832 PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, 833 txq->txbufs, txq->txds, (unsigned long)txq->dma); 834 835 nfp_net_reset_tx_queue(txq); 836 837 txq->hw = hw; 838 839 /* 840 * Telling the HW about the physical address of the TX ring and number 841 * of descriptors in log2 format 842 */ 843 nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma); 844 nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(txq->tx_count)); 845 846 return 0; 847 } 848 849 /* 850 * nfp_net_nfd3_tx_vlan() - Set vlan info in the nfd3 tx desc 851 * 852 * If enable NFP_NET_CFG_CTRL_TXVLAN_V2 853 * Vlan_info is stored in the meta and 854 * is handled in the nfp_net_nfd3_set_meta_vlan 855 * else if enable NFP_NET_CFG_CTRL_TXVLAN 856 * Vlan_info is stored in the tx_desc and 857 * is handled in the nfp_net_nfd3_tx_vlan 858 */ 859 static void 860 nfp_net_nfd3_tx_vlan(struct nfp_net_txq *txq, 861 struct nfp_net_nfd3_tx_desc *txd, 862 struct rte_mbuf *mb) 863 { 864 struct nfp_net_hw *hw = txq->hw; 865 866 if ((hw->cap & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0 || 867 (hw->cap & NFP_NET_CFG_CTRL_TXVLAN) == 0) 868 return; 869 870 if ((mb->ol_flags & RTE_MBUF_F_TX_VLAN) != 0) { 871 txd->flags |= PCIE_DESC_TX_VLAN; 872 txd->vlan = mb->vlan_tci; 873 } 874 } 875 876 static void 877 nfp_net_set_meta_vlan(struct nfp_net_meta_raw *meta_data, 878 struct rte_mbuf *pkt, 879 uint8_t layer) 880 { 881 uint16_t vlan_tci; 882 uint16_t tpid; 883 884 tpid = RTE_ETHER_TYPE_VLAN; 885 vlan_tci = pkt->vlan_tci; 886 887 meta_data->data[layer] = rte_cpu_to_be_32(tpid << 16 | vlan_tci); 888 } 889 890 static void 891 nfp_net_nfd3_set_meta_data(struct nfp_net_meta_raw *meta_data, 892 struct nfp_net_txq *txq, 893 struct rte_mbuf *pkt) 894 { 895 uint8_t vlan_layer = 0; 896 struct nfp_net_hw *hw; 897 uint32_t meta_info; 898 uint8_t layer = 0; 899 char *meta; 900 901 hw = txq->hw; 902 903 if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) != 0 && 904 (hw->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0) { 905 if (meta_data->length == 0) 906 meta_data->length = NFP_NET_META_HEADER_SIZE; 907 meta_data->length += NFP_NET_META_FIELD_SIZE; 908 meta_data->header |= NFP_NET_META_VLAN; 909 } 910 911 if (meta_data->length == 0) 912 return; 913 914 meta_info = meta_data->header; 915 meta_data->header = rte_cpu_to_be_32(meta_data->header); 916 meta = rte_pktmbuf_prepend(pkt, meta_data->length); 917 memcpy(meta, &meta_data->header, sizeof(meta_data->header)); 918 meta += NFP_NET_META_HEADER_SIZE; 919 920 for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, layer++, 921 meta += NFP_NET_META_FIELD_SIZE) { 922 switch (meta_info & NFP_NET_META_FIELD_MASK) { 923 case NFP_NET_META_VLAN: 924 if (vlan_layer > 0) { 925 PMD_DRV_LOG(ERR, "At most 1 layers of vlan is supported"); 926 return; 927 } 928 nfp_net_set_meta_vlan(meta_data, pkt, layer); 929 vlan_layer++; 930 break; 931 default: 932 PMD_DRV_LOG(ERR, "The metadata type not supported"); 933 return; 934 } 935 936 memcpy(meta, &meta_data->data[layer], sizeof(meta_data->data[layer])); 937 } 938 } 939 940 uint16_t 941 nfp_net_nfd3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 942 { 943 struct nfp_net_txq *txq; 944 struct nfp_net_hw *hw; 945 struct nfp_net_nfd3_tx_desc *txds, txd; 946 struct nfp_net_meta_raw meta_data; 947 struct rte_mbuf *pkt; 948 uint64_t dma_addr; 949 int pkt_size, dma_size; 950 uint16_t free_descs, issued_descs; 951 struct rte_mbuf **lmbuf; 952 int i; 953 954 txq = tx_queue; 955 hw = txq->hw; 956 txds = &txq->txds[txq->wr_p]; 957 958 PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets", 959 txq->qidx, txq->wr_p, nb_pkts); 960 961 if (nfp_net_nfd3_free_tx_desc(txq) < NFD3_TX_DESC_PER_SIMPLE_PKT * nb_pkts || 962 nfp_net_nfd3_txq_full(txq)) 963 nfp_net_tx_free_bufs(txq); 964 965 free_descs = (uint16_t)nfp_net_nfd3_free_tx_desc(txq); 966 if (unlikely(free_descs == 0)) 967 return 0; 968 969 pkt = *tx_pkts; 970 971 issued_descs = 0; 972 PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets", 973 txq->qidx, nb_pkts); 974 /* Sending packets */ 975 for (i = 0; i < nb_pkts && free_descs > 0; i++) { 976 memset(&meta_data, 0, sizeof(meta_data)); 977 /* Grabbing the mbuf linked to the current descriptor */ 978 lmbuf = &txq->txbufs[txq->wr_p].mbuf; 979 /* Warming the cache for releasing the mbuf later on */ 980 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf); 981 982 pkt = *(tx_pkts + i); 983 984 nfp_net_nfd3_set_meta_data(&meta_data, txq, pkt); 985 986 if (unlikely(pkt->nb_segs > 1 && 987 !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) { 988 PMD_INIT_LOG(ERR, "Multisegment packet not supported"); 989 goto xmit_end; 990 } 991 992 /* Checking if we have enough descriptors */ 993 if (unlikely(pkt->nb_segs > free_descs)) 994 goto xmit_end; 995 996 /* 997 * Checksum and VLAN flags just in the first descriptor for a 998 * multisegment packet, but TSO info needs to be in all of them. 999 */ 1000 txd.data_len = pkt->pkt_len; 1001 nfp_net_nfd3_tx_tso(txq, &txd, pkt); 1002 nfp_net_nfd3_tx_cksum(txq, &txd, pkt); 1003 nfp_net_nfd3_tx_vlan(txq, &txd, pkt); 1004 1005 /* 1006 * mbuf data_len is the data in one segment and pkt_len data 1007 * in the whole packet. When the packet is just one segment, 1008 * then data_len = pkt_len 1009 */ 1010 pkt_size = pkt->pkt_len; 1011 1012 while (pkt != NULL && free_descs > 0) { 1013 /* Copying TSO, VLAN and cksum info */ 1014 *txds = txd; 1015 1016 /* Releasing mbuf used by this descriptor previously*/ 1017 if (*lmbuf) 1018 rte_pktmbuf_free_seg(*lmbuf); 1019 1020 /* 1021 * Linking mbuf with descriptor for being released 1022 * next time descriptor is used 1023 */ 1024 *lmbuf = pkt; 1025 1026 dma_size = pkt->data_len; 1027 dma_addr = rte_mbuf_data_iova(pkt); 1028 PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:" 1029 "%" PRIx64 "", dma_addr); 1030 1031 /* Filling descriptors fields */ 1032 txds->dma_len = dma_size; 1033 txds->data_len = txd.data_len; 1034 txds->dma_addr_hi = (dma_addr >> 32) & 0xff; 1035 txds->dma_addr_lo = (dma_addr & 0xffffffff); 1036 free_descs--; 1037 1038 txq->wr_p++; 1039 if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/ 1040 txq->wr_p = 0; 1041 1042 pkt_size -= dma_size; 1043 1044 /* 1045 * Making the EOP, packets with just one segment 1046 * the priority 1047 */ 1048 if (likely(pkt_size == 0)) 1049 txds->offset_eop = PCIE_DESC_TX_EOP; 1050 else 1051 txds->offset_eop = 0; 1052 1053 /* Set the meta_len */ 1054 txds->offset_eop |= meta_data.length; 1055 1056 pkt = pkt->next; 1057 /* Referencing next free TX descriptor */ 1058 txds = &txq->txds[txq->wr_p]; 1059 lmbuf = &txq->txbufs[txq->wr_p].mbuf; 1060 issued_descs++; 1061 } 1062 } 1063 1064 xmit_end: 1065 /* Increment write pointers. Force memory write before we let HW know */ 1066 rte_wmb(); 1067 nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs); 1068 1069 return i; 1070 } 1071 1072 static void 1073 nfp_net_nfdk_set_meta_data(struct rte_mbuf *pkt, 1074 struct nfp_net_txq *txq, 1075 uint64_t *metadata) 1076 { 1077 char *meta; 1078 uint8_t layer = 0; 1079 uint32_t meta_type; 1080 struct nfp_net_hw *hw; 1081 uint32_t header_offset; 1082 uint8_t vlan_layer = 0; 1083 struct nfp_net_meta_raw meta_data; 1084 1085 memset(&meta_data, 0, sizeof(meta_data)); 1086 hw = txq->hw; 1087 1088 if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) != 0 && 1089 (hw->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0) { 1090 if (meta_data.length == 0) 1091 meta_data.length = NFP_NET_META_HEADER_SIZE; 1092 meta_data.length += NFP_NET_META_FIELD_SIZE; 1093 meta_data.header |= NFP_NET_META_VLAN; 1094 } 1095 1096 if (meta_data.length == 0) 1097 return; 1098 1099 meta_type = meta_data.header; 1100 header_offset = meta_type << NFP_NET_META_NFDK_LENGTH; 1101 meta_data.header = header_offset | meta_data.length; 1102 meta_data.header = rte_cpu_to_be_32(meta_data.header); 1103 meta = rte_pktmbuf_prepend(pkt, meta_data.length); 1104 memcpy(meta, &meta_data.header, sizeof(meta_data.header)); 1105 meta += NFP_NET_META_HEADER_SIZE; 1106 1107 for (; meta_type != 0; meta_type >>= NFP_NET_META_FIELD_SIZE, layer++, 1108 meta += NFP_NET_META_FIELD_SIZE) { 1109 switch (meta_type & NFP_NET_META_FIELD_MASK) { 1110 case NFP_NET_META_VLAN: 1111 if (vlan_layer > 0) { 1112 PMD_DRV_LOG(ERR, "At most 1 layers of vlan is supported"); 1113 return; 1114 } 1115 nfp_net_set_meta_vlan(&meta_data, pkt, layer); 1116 vlan_layer++; 1117 break; 1118 default: 1119 PMD_DRV_LOG(ERR, "The metadata type not supported"); 1120 return; 1121 } 1122 1123 memcpy(meta, &meta_data.data[layer], sizeof(meta_data.data[layer])); 1124 } 1125 1126 *metadata = NFDK_DESC_TX_CHAIN_META; 1127 } 1128 1129 static int 1130 nfp_net_nfdk_tx_queue_setup(struct rte_eth_dev *dev, 1131 uint16_t queue_idx, 1132 uint16_t nb_desc, 1133 unsigned int socket_id, 1134 const struct rte_eth_txconf *tx_conf) 1135 { 1136 int ret; 1137 uint16_t min_tx_desc; 1138 uint16_t max_tx_desc; 1139 const struct rte_memzone *tz; 1140 struct nfp_net_txq *txq; 1141 uint16_t tx_free_thresh; 1142 struct nfp_net_hw *hw; 1143 uint32_t tx_desc_sz; 1144 1145 hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); 1146 1147 PMD_INIT_FUNC_TRACE(); 1148 1149 ret = nfp_net_tx_desc_limits(hw, &min_tx_desc, &max_tx_desc); 1150 if (ret != 0) 1151 return ret; 1152 1153 /* Validating number of descriptors */ 1154 tx_desc_sz = nb_desc * sizeof(struct nfp_net_nfdk_tx_desc); 1155 if ((NFDK_TX_DESC_PER_SIMPLE_PKT * tx_desc_sz) % NFP_ALIGN_RING_DESC != 0 || 1156 (NFDK_TX_DESC_PER_SIMPLE_PKT * nb_desc) % NFDK_TX_DESC_BLOCK_CNT != 0 || 1157 nb_desc > max_tx_desc || nb_desc < min_tx_desc) { 1158 PMD_DRV_LOG(ERR, "Wrong nb_desc value"); 1159 return -EINVAL; 1160 } 1161 1162 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? 1163 tx_conf->tx_free_thresh : 1164 DEFAULT_TX_FREE_THRESH); 1165 1166 if (tx_free_thresh > (nb_desc)) { 1167 PMD_DRV_LOG(ERR, 1168 "tx_free_thresh must be less than the number of TX " 1169 "descriptors. (tx_free_thresh=%u port=%d " 1170 "queue=%d)", (unsigned int)tx_free_thresh, 1171 dev->data->port_id, (int)queue_idx); 1172 return -(EINVAL); 1173 } 1174 1175 /* 1176 * Free memory prior to re-allocation if needed. This is the case after 1177 * calling nfp_net_stop 1178 */ 1179 if (dev->data->tx_queues[queue_idx]) { 1180 PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d", 1181 queue_idx); 1182 nfp_net_tx_queue_release(dev, queue_idx); 1183 dev->data->tx_queues[queue_idx] = NULL; 1184 } 1185 1186 /* Allocating tx queue data structure */ 1187 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq), 1188 RTE_CACHE_LINE_SIZE, socket_id); 1189 if (txq == NULL) { 1190 PMD_DRV_LOG(ERR, "Error allocating tx dma"); 1191 return -ENOMEM; 1192 } 1193 1194 /* 1195 * Allocate TX ring hardware descriptors. A memzone large enough to 1196 * handle the maximum ring size is allocated in order to allow for 1197 * resizing in later calls to the queue setup function. 1198 */ 1199 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, 1200 sizeof(struct nfp_net_nfdk_tx_desc) * 1201 NFDK_TX_DESC_PER_SIMPLE_PKT * 1202 max_tx_desc, NFP_MEMZONE_ALIGN, 1203 socket_id); 1204 if (tz == NULL) { 1205 PMD_DRV_LOG(ERR, "Error allocating tx dma"); 1206 nfp_net_tx_queue_release(dev, queue_idx); 1207 return -ENOMEM; 1208 } 1209 1210 txq->tx_count = nb_desc * NFDK_TX_DESC_PER_SIMPLE_PKT; 1211 txq->tx_free_thresh = tx_free_thresh; 1212 txq->tx_pthresh = tx_conf->tx_thresh.pthresh; 1213 txq->tx_hthresh = tx_conf->tx_thresh.hthresh; 1214 txq->tx_wthresh = tx_conf->tx_thresh.wthresh; 1215 1216 /* queue mapping based on firmware configuration */ 1217 txq->qidx = queue_idx; 1218 txq->tx_qcidx = queue_idx * hw->stride_tx; 1219 txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx); 1220 1221 txq->port_id = dev->data->port_id; 1222 1223 /* Saving physical and virtual addresses for the TX ring */ 1224 txq->dma = (uint64_t)tz->iova; 1225 txq->ktxds = (struct nfp_net_nfdk_tx_desc *)tz->addr; 1226 1227 /* mbuf pointers array for referencing mbufs linked to TX descriptors */ 1228 txq->txbufs = rte_zmalloc_socket("txq->txbufs", 1229 sizeof(*txq->txbufs) * txq->tx_count, 1230 RTE_CACHE_LINE_SIZE, socket_id); 1231 1232 if (txq->txbufs == NULL) { 1233 nfp_net_tx_queue_release(dev, queue_idx); 1234 return -ENOMEM; 1235 } 1236 PMD_TX_LOG(DEBUG, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64, 1237 txq->txbufs, txq->ktxds, (unsigned long)txq->dma); 1238 1239 nfp_net_reset_tx_queue(txq); 1240 1241 dev->data->tx_queues[queue_idx] = txq; 1242 txq->hw = hw; 1243 /* 1244 * Telling the HW about the physical address of the TX ring and number 1245 * of descriptors in log2 format 1246 */ 1247 nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma); 1248 nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(txq->tx_count)); 1249 1250 return 0; 1251 } 1252 1253 int 1254 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, 1255 uint16_t queue_idx, 1256 uint16_t nb_desc, 1257 unsigned int socket_id, 1258 const struct rte_eth_txconf *tx_conf) 1259 { 1260 struct nfp_net_hw *hw; 1261 1262 hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); 1263 1264 switch (NFD_CFG_CLASS_VER_of(hw->ver)) { 1265 case NFP_NET_CFG_VERSION_DP_NFD3: 1266 return nfp_net_nfd3_tx_queue_setup(dev, queue_idx, 1267 nb_desc, socket_id, tx_conf); 1268 case NFP_NET_CFG_VERSION_DP_NFDK: 1269 if (NFD_CFG_MAJOR_VERSION_of(hw->ver) < 5) { 1270 PMD_DRV_LOG(ERR, "NFDK must use ABI 5 or newer, found: %d", 1271 NFD_CFG_MAJOR_VERSION_of(hw->ver)); 1272 return -EINVAL; 1273 } 1274 return nfp_net_nfdk_tx_queue_setup(dev, queue_idx, 1275 nb_desc, socket_id, tx_conf); 1276 default: 1277 PMD_DRV_LOG(ERR, "The version of firmware is not correct."); 1278 return -EINVAL; 1279 } 1280 } 1281 1282 static inline uint32_t 1283 nfp_net_nfdk_free_tx_desc(struct nfp_net_txq *txq) 1284 { 1285 uint32_t free_desc; 1286 1287 if (txq->wr_p >= txq->rd_p) 1288 free_desc = txq->tx_count - (txq->wr_p - txq->rd_p); 1289 else 1290 free_desc = txq->rd_p - txq->wr_p; 1291 1292 return (free_desc > NFDK_TX_DESC_STOP_CNT) ? 1293 (free_desc - NFDK_TX_DESC_STOP_CNT) : 0; 1294 } 1295 1296 static inline uint32_t 1297 nfp_net_nfdk_txq_full(struct nfp_net_txq *txq) 1298 { 1299 return (nfp_net_nfdk_free_tx_desc(txq) < txq->tx_free_thresh); 1300 } 1301 1302 static inline int 1303 nfp_net_nfdk_headlen_to_segs(unsigned int headlen) 1304 { 1305 return DIV_ROUND_UP(headlen + 1306 NFDK_TX_MAX_DATA_PER_DESC - 1307 NFDK_TX_MAX_DATA_PER_HEAD, 1308 NFDK_TX_MAX_DATA_PER_DESC); 1309 } 1310 1311 static int 1312 nfp_net_nfdk_tx_maybe_close_block(struct nfp_net_txq *txq, struct rte_mbuf *pkt) 1313 { 1314 unsigned int n_descs, wr_p, i, nop_slots; 1315 struct rte_mbuf *pkt_temp; 1316 1317 pkt_temp = pkt; 1318 n_descs = nfp_net_nfdk_headlen_to_segs(pkt_temp->data_len); 1319 while (pkt_temp->next) { 1320 pkt_temp = pkt_temp->next; 1321 n_descs += DIV_ROUND_UP(pkt_temp->data_len, NFDK_TX_MAX_DATA_PER_DESC); 1322 } 1323 1324 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) 1325 return -EINVAL; 1326 1327 /* Under count by 1 (don't count meta) for the round down to work out */ 1328 n_descs += !!(pkt->ol_flags & RTE_MBUF_F_TX_TCP_SEG); 1329 1330 if (round_down(txq->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 1331 round_down(txq->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 1332 goto close_block; 1333 1334 if ((uint32_t)txq->data_pending + pkt->pkt_len > NFDK_TX_MAX_DATA_PER_BLOCK) 1335 goto close_block; 1336 1337 return 0; 1338 1339 close_block: 1340 wr_p = txq->wr_p; 1341 nop_slots = D_BLOCK_CPL(wr_p); 1342 1343 memset(&txq->ktxds[wr_p], 0, nop_slots * sizeof(struct nfp_net_nfdk_tx_desc)); 1344 for (i = wr_p; i < nop_slots + wr_p; i++) { 1345 if (txq->txbufs[i].mbuf) { 1346 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 1347 txq->txbufs[i].mbuf = NULL; 1348 } 1349 } 1350 txq->data_pending = 0; 1351 txq->wr_p = D_IDX(txq, txq->wr_p + nop_slots); 1352 1353 return nop_slots; 1354 } 1355 1356 static inline uint64_t 1357 nfp_net_nfdk_tx_cksum(struct nfp_net_txq *txq, struct rte_mbuf *mb, 1358 uint64_t flags) 1359 { 1360 uint64_t ol_flags; 1361 struct nfp_net_hw *hw = txq->hw; 1362 1363 if ((hw->cap & NFP_NET_CFG_CTRL_TXCSUM) == 0) 1364 return flags; 1365 1366 ol_flags = mb->ol_flags; 1367 1368 /* IPv6 does not need checksum */ 1369 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) 1370 flags |= NFDK_DESC_TX_L3_CSUM; 1371 1372 if (ol_flags & RTE_MBUF_F_TX_L4_MASK) 1373 flags |= NFDK_DESC_TX_L4_CSUM; 1374 1375 return flags; 1376 } 1377 1378 static inline uint64_t 1379 nfp_net_nfdk_tx_tso(struct nfp_net_txq *txq, struct rte_mbuf *mb) 1380 { 1381 uint64_t ol_flags; 1382 struct nfp_net_nfdk_tx_desc txd; 1383 struct nfp_net_hw *hw = txq->hw; 1384 1385 if ((hw->cap & NFP_NET_CFG_CTRL_LSO_ANY) == 0) 1386 goto clean_txd; 1387 1388 ol_flags = mb->ol_flags; 1389 1390 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) 1391 goto clean_txd; 1392 1393 txd.l3_offset = mb->l2_len; 1394 txd.l4_offset = mb->l2_len + mb->l3_len; 1395 txd.lso_meta_res = 0; 1396 txd.mss = rte_cpu_to_le_16(mb->tso_segsz); 1397 txd.lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; 1398 txd.lso_totsegs = (mb->pkt_len + mb->tso_segsz) / mb->tso_segsz; 1399 1400 return txd.raw; 1401 1402 clean_txd: 1403 txd.l3_offset = 0; 1404 txd.l4_offset = 0; 1405 txd.lso_hdrlen = 0; 1406 txd.mss = 0; 1407 txd.lso_totsegs = 0; 1408 txd.lso_meta_res = 0; 1409 1410 return txd.raw; 1411 } 1412 1413 uint16_t 1414 nfp_net_nfdk_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1415 { 1416 uint32_t buf_idx; 1417 uint64_t dma_addr; 1418 uint16_t free_descs; 1419 uint32_t npkts = 0; 1420 uint64_t metadata = 0; 1421 uint16_t issued_descs = 0; 1422 struct nfp_net_txq *txq; 1423 struct nfp_net_hw *hw; 1424 struct nfp_net_nfdk_tx_desc *ktxds; 1425 struct rte_mbuf *pkt, *temp_pkt; 1426 struct rte_mbuf **lmbuf; 1427 1428 txq = tx_queue; 1429 hw = txq->hw; 1430 1431 PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets", 1432 txq->qidx, txq->wr_p, nb_pkts); 1433 1434 if ((nfp_net_nfdk_free_tx_desc(txq) < NFDK_TX_DESC_PER_SIMPLE_PKT * 1435 nb_pkts) || (nfp_net_nfdk_txq_full(txq))) 1436 nfp_net_tx_free_bufs(txq); 1437 1438 free_descs = (uint16_t)nfp_net_nfdk_free_tx_desc(txq); 1439 if (unlikely(free_descs == 0)) 1440 return 0; 1441 1442 PMD_TX_LOG(DEBUG, "queue: %u. Sending %u packets", txq->qidx, nb_pkts); 1443 /* Sending packets */ 1444 while ((npkts < nb_pkts) && free_descs) { 1445 uint32_t type, dma_len, dlen_type, tmp_dlen; 1446 int nop_descs, used_descs; 1447 1448 pkt = *(tx_pkts + npkts); 1449 nop_descs = nfp_net_nfdk_tx_maybe_close_block(txq, pkt); 1450 if (nop_descs < 0) 1451 goto xmit_end; 1452 1453 issued_descs += nop_descs; 1454 ktxds = &txq->ktxds[txq->wr_p]; 1455 /* Grabbing the mbuf linked to the current descriptor */ 1456 buf_idx = txq->wr_p; 1457 lmbuf = &txq->txbufs[buf_idx++].mbuf; 1458 /* Warming the cache for releasing the mbuf later on */ 1459 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf); 1460 1461 temp_pkt = pkt; 1462 nfp_net_nfdk_set_meta_data(pkt, txq, &metadata); 1463 1464 if (unlikely(pkt->nb_segs > 1 && 1465 !(hw->cap & NFP_NET_CFG_CTRL_GATHER))) { 1466 PMD_INIT_LOG(ERR, "Multisegment packet not supported"); 1467 goto xmit_end; 1468 } 1469 1470 /* 1471 * Checksum and VLAN flags just in the first descriptor for a 1472 * multisegment packet, but TSO info needs to be in all of them. 1473 */ 1474 1475 dma_len = pkt->data_len; 1476 if ((hw->cap & NFP_NET_CFG_CTRL_LSO_ANY) && 1477 (pkt->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 1478 type = NFDK_DESC_TX_TYPE_TSO; 1479 } else if (pkt->next == NULL && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) { 1480 type = NFDK_DESC_TX_TYPE_SIMPLE; 1481 } else { 1482 type = NFDK_DESC_TX_TYPE_GATHER; 1483 } 1484 1485 /* Implicitly truncates to chunk in below logic */ 1486 dma_len -= 1; 1487 1488 /* 1489 * We will do our best to pass as much data as we can in descriptor 1490 * and we need to make sure the first descriptor includes whole 1491 * head since there is limitation in firmware side. Sometimes the 1492 * value of 'dma_len & NFDK_DESC_TX_DMA_LEN_HEAD' will be less 1493 * than packet head len. 1494 */ 1495 dlen_type = (dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 1496 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 1497 (NFDK_DESC_TX_TYPE_HEAD & (type << 12)); 1498 ktxds->dma_len_type = rte_cpu_to_le_16(dlen_type); 1499 dma_addr = rte_mbuf_data_iova(pkt); 1500 PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:" 1501 "%" PRIx64 "", dma_addr); 1502 ktxds->dma_addr_hi = rte_cpu_to_le_16(dma_addr >> 32); 1503 ktxds->dma_addr_lo = rte_cpu_to_le_32(dma_addr & 0xffffffff); 1504 ktxds++; 1505 1506 /* 1507 * Preserve the original dlen_type, this way below the EOP logic 1508 * can use dlen_type. 1509 */ 1510 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1511 dma_len -= tmp_dlen; 1512 dma_addr += tmp_dlen + 1; 1513 1514 /* 1515 * The rest of the data (if any) will be in larger DMA descriptors 1516 * and is handled with the dma_len loop. 1517 */ 1518 while (pkt) { 1519 if (*lmbuf) 1520 rte_pktmbuf_free_seg(*lmbuf); 1521 *lmbuf = pkt; 1522 while (dma_len > 0) { 1523 dma_len -= 1; 1524 dlen_type = NFDK_DESC_TX_DMA_LEN & dma_len; 1525 1526 ktxds->dma_len_type = rte_cpu_to_le_16(dlen_type); 1527 ktxds->dma_addr_hi = rte_cpu_to_le_16(dma_addr >> 32); 1528 ktxds->dma_addr_lo = rte_cpu_to_le_32(dma_addr & 0xffffffff); 1529 ktxds++; 1530 1531 dma_len -= dlen_type; 1532 dma_addr += dlen_type + 1; 1533 } 1534 1535 if (pkt->next == NULL) 1536 break; 1537 1538 pkt = pkt->next; 1539 dma_len = pkt->data_len; 1540 dma_addr = rte_mbuf_data_iova(pkt); 1541 PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:" 1542 "%" PRIx64 "", dma_addr); 1543 1544 lmbuf = &txq->txbufs[buf_idx++].mbuf; 1545 } 1546 1547 (ktxds - 1)->dma_len_type = rte_cpu_to_le_16(dlen_type | NFDK_DESC_TX_EOP); 1548 1549 ktxds->raw = rte_cpu_to_le_64(nfp_net_nfdk_tx_cksum(txq, temp_pkt, metadata)); 1550 ktxds++; 1551 1552 if ((hw->cap & NFP_NET_CFG_CTRL_LSO_ANY) && 1553 (temp_pkt->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 1554 ktxds->raw = rte_cpu_to_le_64(nfp_net_nfdk_tx_tso(txq, temp_pkt)); 1555 ktxds++; 1556 } 1557 1558 used_descs = ktxds - txq->ktxds - txq->wr_p; 1559 if (round_down(txq->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 1560 round_down(txq->wr_p + used_descs - 1, NFDK_TX_DESC_BLOCK_CNT)) { 1561 PMD_INIT_LOG(INFO, "Used descs cross block boundary"); 1562 goto xmit_end; 1563 } 1564 1565 txq->wr_p = D_IDX(txq, txq->wr_p + used_descs); 1566 if (txq->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1567 txq->data_pending += temp_pkt->pkt_len; 1568 else 1569 txq->data_pending = 0; 1570 1571 issued_descs += used_descs; 1572 npkts++; 1573 free_descs = (uint16_t)nfp_net_nfdk_free_tx_desc(txq); 1574 } 1575 1576 xmit_end: 1577 /* Increment write pointers. Force memory write before we let HW know */ 1578 rte_wmb(); 1579 nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs); 1580 1581 return npkts; 1582 } 1583